5月份增加后台生成方式，并简化程序

0298d2cf · bruxellse_li · 34cbd322 · 0298d2cf · 0298d2cf · 0298d2cf
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/.idea/SCBG-PYTHON.iml
+++ b/.idea/SCBG-PYTHON.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="Flask">
+    <option name="enabled" value="true" />
+  </component>
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+  <component name="TemplatesService">
+    <option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
+  </component>
+</module>
\ No newline at end of file
--- a/.idea/deployment.xml
+++ b/.idea/deployment.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
+    <serverData>
+      <paths name="root@114.115.130.239:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.141.81:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.141.81:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.9.59:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.90.53:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.90.53:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.90.53:22 password (1)">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="zzsn@192.168.1.149:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="zzsn@192.168.1.149:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+    </serverData>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="33">
+            <item index="0" class="java.lang.String" itemvalue="pandas" />
+            <item index="1" class="java.lang.String" itemvalue="tqdm" />
+            <item index="2" class="java.lang.String" itemvalue="transformers" />
+            <item index="3" class="java.lang.String" itemvalue="sentencepiece" />
+            <item index="4" class="java.lang.String" itemvalue="keras" />
+            <item index="5" class="java.lang.String" itemvalue="gevent" />
+            <item index="6" class="java.lang.String" itemvalue="torch" />
+            <item index="7" class="java.lang.String" itemvalue="numpy" />
+            <item index="8" class="java.lang.String" itemvalue="Flask" />
+            <item index="9" class="java.lang.String" itemvalue="thulac" />
+            <item index="10" class="java.lang.String" itemvalue="beautifulsoup4" />
+            <item index="11" class="java.lang.String" itemvalue="fdfs_client" />
+            <item index="12" class="java.lang.String" itemvalue="pymysql" />
+            <item index="13" class="java.lang.String" itemvalue="selenium" />
+            <item index="14" class="java.lang.String" itemvalue="matplotlib" />
+            <item index="15" class="java.lang.String" itemvalue="pyecharts" />
+            <item index="16" class="java.lang.String" itemvalue="requests" />
+            <item index="17" class="java.lang.String" itemvalue="docx" />
+            <item index="18" class="java.lang.String" itemvalue="flask_sqlalchemy" />
+            <item index="19" class="java.lang.String" itemvalue="scikit_learn" />
+            <item index="20" class="java.lang.String" itemvalue="gensim" />
+            <item index="21" class="java.lang.String" itemvalue="sentence_transformers" />
+            <item index="22" class="java.lang.String" itemvalue="elasticsearch" />
+            <item index="23" class="java.lang.String" itemvalue="nltk" />
+            <item index="24" class="java.lang.String" itemvalue="symspellpy" />
+            <item index="25" class="java.lang.String" itemvalue="wordcloud" />
+            <item index="26" class="java.lang.String" itemvalue="concurrent_log_handler" />
+            <item index="27" class="java.lang.String" itemvalue="setuptools" />
+            <item index="28" class="java.lang.String" itemvalue="gunicorn" />
+            <item index="29" class="java.lang.String" itemvalue="jieba" />
+            <item index="30" class="java.lang.String" itemvalue="flask" />
+            <item index="31" class="java.lang.String" itemvalue="flak_cors" />
+            <item index="32" class="java.lang.String" itemvalue="paddle" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
\ No newline at end of file
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/SCBG-PYTHON.iml" filepath="$PROJECT_DIR$/.idea/SCBG-PYTHON.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
--- a/Dockerfile
+++ b/Dockerfile
@@ -27,6 +27,7 @@ RUN sed -i s@/archive.ubuntu.com/@/repo.huaweicloud.com/@g /etc/apt/sources.list
    && dpkg -i /opt/SCBG-PYTHON/google-chrome-stable_current_amd64.deb \
    && apt-get install -f \
    && rm /opt/SCBG-PYTHON/google-chrome-stable_current_amd64.deb \
+#    && ln -sf /usr/local/bin/python /usr/bin/python \
    && /usr/local/bin/python -m pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn \
    && pip install -r /opt/SCBG-PYTHON/requirements.txt -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com --no-cache-dir --default-timeout=10000 \
    && apt-get remove -y build-essential && apt-get clean \
@@ -34,6 +35,12 @@ RUN sed -i s@/archive.ubuntu.com/@/repo.huaweicloud.com/@g /etc/apt/sources.list
    && chmod u+x /opt/SCBG-PYTHON/start.sh
+ENV HOME=/home/user
+#ENV LANG=en_US.utf8
+#ENV LC_ALL=en_US.utf8
+#ENV PATH="/usr/local/bin/python:${PATH}"
 EXPOSE 4000
 WORKDIR '/opt/SCBG-PYTHON'
@@ -41,6 +48,6 @@ WORKDIR '/opt/SCBG-PYTHON'
 # CMD ["./start.sh"]
 # c——告诉shell 运行后续命令， 此处是执行shell脚本，并将输出重定向到指定文件中
-CMD ["sh", "-c", "/bin/bash start.sh | tee /opt/SCBG-PYTHON/start.log"]
+CMD ["sh", "-c", "/bin/bash start.sh | tee /opt/SCBG-PYTHON/log/start.log"]
--- a/app_run.py
+++ b/app_run.py
--- a/clean_content.py
+++ b/clean_content.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File    : 文章内容检查.py
+# @Time    : 2022/12/9 16:05
+# @Author  : bruxelles_li
+# @Software: PyCharm
+from bs4 import BeautifulSoup
+import re
+punctuation = re.compile(r'[\n0-9a-zA-Z、！？｡。＂＃＄％＆＇（）＊＋，－／：；＜＝＞＠［＼］＾＿｀｛｜｝～｟｠｢｣､'
+                         r'〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏.!"#$%&\'()*+,\-'
+                         r'./:;<=>?@\[\]\\^_`{|}~一二三四五六七八九十《]')
+# punctuation = re.compile(r'[0-9]')
+def is_punctuation(text):
+    punctuation_pattern = re.compile(punctuation)
+    if punctuation_pattern.sub('', text):
+        return False
+    else:
+        return True
+# 程序返回处理函数
+def clean_html_tag(content):
+    text = content
+    bs = BeautifulSoup(text, 'lxml')
+    temp = []
+    match_content = bs.text.strip()
+    # 初步清洗文中的空白符，杂乱符号
+    pattern = re.compile(
+        '[＃＄＊$＜＝＞＠●▍［＼］△▲＾＿｀■▋｛｜｝～｟｠ф｢｣\u3000〈〉《》「」『』【】※〔〕〖〗〘〙〚〛〜〰〾〿\*〈〉]')
+    match_content0 = pattern.sub('', match_content)
+    match_content1 = re.sub(r"(阅读提示|点击 上方文字 关注我们 |点击 上方文字 关注我们|点击蓝字丨关注我们|点击蓝字 关注我们|- THE END - |◀——|-)", "", match_content0)
+    match_content2 = re.sub(r"(?=\（图片[：:]).+(?<=\）)", "", match_content1)
+    match_content3 = re.sub(r"&mdash&mdash", "&mdash", match_content2)
+    match_content4 = re.sub(r"&mdash", "&", match_content3)
+    match_content5 = re.sub(r"       ", "", match_content4)
+    match_content6 = re.sub(r"(?=\（).*(?<=图\）)", "", match_content5)
+    match_content7 = re.sub(r'。"', "。”", match_content6)
+    match_content8 = re.sub(r"(。；|。，)", "。", match_content7)
+    match_content9 = re.sub(r"(\\t|\\)", "", match_content8)
+    list_content = match_content9.split('\n')
+    temp_content = []
+    for text in list_content:
+        if len(text) <= 2:
+            continue
+        else:
+            text = text.strip()
+            if text.endswith("。") or text.endswith("“") or text.endswith(".") or text.endswith('”'):
+               text = text
+            else:
+                text = text + "\t" + "\t"
+                text = re.sub(r".*(?<=记者).*(?<=摄)", "", text)
+            temp_content.append(text)
+    # print(temp_content)
+    str_content = "\t".join(temp_content)
+    a = re.sub('\t\t\t', '——', str_content)
+    a0 = re.sub('\t\t', '', a)
+    a1 = re.sub(r"：——", "：", a0)
+    a2 = re.sub(r"。）", "）", a1)
+    a3 = re.sub(r"(。”|！”)", "”", a2)
+    b = re.sub("\t", "\n", a3).strip()
+    c = b.split('\n')
+    # print(len(c))
+    for d in c:
+        e = d.strip('&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;')
+        # 去除中间包含指定字符所在的句子
+        f = re.sub("(微信|如需转载|免责声明|公告|jpeg|jpg|png|声明：|附件：|责任单位：|编辑：).*?(?<=。)", '', str(e))
+        # 删除邮箱所在行
+        g = re.sub(".*(?=\.com|www\.).*", "", f)
+        # print(g)
+        if len(g) <= 20:
+            continue
+        else:
+            temp.append(g)
+    h = "\t".join(temp)
+    j = re.sub("\t(?=而|但|对于|此外|因此|与此同时|这种|基于此|但是|然而)", "", h)
+    new_content = re.sub("\t", "\n", j)
+    new_content_list = new_content.split("\n")
+    final_content_list = []
+    for k in new_content_list:
+        k = "  " + k
+        # 先去除中间包含javascript、html所在的段落内容
+        l = re.sub(".*(function。|html|background|javascript|image).*", '', k)
+        if l:
+            final_content_list.append(l.strip("——"))
+    final_content = "\n\n".join(final_content_list) if len(final_content_list) >= 10 else "".join(final_content_list)
+    return final_content
+if __name__ == "__main__":
+    text = """工业和信息化部◀——◀——◀——◀——◀—— 人力资源社会保障部 生态环境部 商务部 市场监管总局
+ 持续健全市场化运营体制机制，守好安全生产底线红线，推进绿色低碳科技研发应用，为实现碳达峰碳中和目标贡献力量。把坚持党的领导加强党的建设融入公司治理，凝聚各方面工作合力，努力开创公司改革发展新局面。（图片：孚能科技将绿色发展融入企业成长，并带动产业链协同提昇晉昇，提拔）
+    """
+    print(clean_html_tag(content=text))
--- a/copy_content.py
+++ b/copy_content.py
@@ -21,8 +21,8 @@ from docx.table import _Cell, Table, _Row
 from docx.text.paragraph import Paragraph
 from docx.shared import Pt
 # 定义待复制内容的匹配模式
-start_pattern = re.compile(r'(?<=[0-9][\.．]会计报表重要项目的明细信息及说明)$')
+start_pattern = re.compile(r'(?<=[0-9][\.．]会计报表重要项目的明细信息及说明)$|(?<=[0-9][\.．]会计报表重要项目的明细信息及说明。)$')
-end_pattern = re.compile(r'(?<=[0-9][\.．]需要说明的其他事项)$|(?<=[0-9][\.．]需要说明的其他事项。)（略）$')
+end_pattern = re.compile(r'(?<=[0-9][\.．]需要说明的其他事项)$|(?<=[0-9][\.．]需要说明的其他事项。)（略）$|(?<=[0-9][\.．]需要说明的其他事项[。\.])$')
 def iter_block_items(parent):
@@ -129,10 +129,92 @@ def copy_content_main(doc_path: str, temp_path: str):
    return None
+# todo: 先复制内容到模板中，保存更新后的模板
+def new_copy_content_main(doc_document, template_document):
+    doc = doc_document
+    # 新建临时文档
+    new_doc = Document()
+    start_found = False
+    end_found = False
+    for element in doc.element.body.xpath("w:p | w:tbl"):
+        if isinstance(element, CT_P):
+            para = Paragraph(element, doc)
+            start_results = re.findall(start_pattern, para.text)
+            if start_results:
+                # print(para.text)
+                start_found = True
+                continue
+        if isinstance(element, CT_P):
+            para = Paragraph(element, doc)
+            end_results = re.findall(end_pattern, para.text)
+            if end_results:
+                # print(para.text)
+                end_found = True
+                break
+        # 复制文本段落
+        if start_found and not end_found and isinstance(element, CT_P):
+            para = Paragraph(element, doc)
+            new_doc.add_paragraph(para.text)
+        # 复制表格
+        if start_found and not end_found and isinstance(element, CT_Tbl):
+            table = Table(element, doc)
+            new_table = deepcopy(table._element)
+            # 在目标文档添加一个空段落
+            new_doc.add_paragraph('')
+            # 获取新段落
+            new_paragraph = new_doc.paragraphs[-1]
+            # 在新段落中添加表格
+            new_paragraph._element.addprevious(new_table)
+    # 遍历文档中的段落，去除多余的空白段落
+    for para in new_doc.paragraphs:
+        # 使用正则表达式匹配空白段落（只包含空格和换行符）
+        if re.match(r'^\s*$', para.text):
+            # 删除空白段落
+            new_doc._element.body.remove(para._element)
+    # 获取待插入内容在目标文档中的位置
+    # source_doc = Document(temp_path)
+    source_doc = template_document
+    start_index = None
+    for index, para in enumerate(source_doc.paragraphs):
+        start_result = re.findall(start_pattern, para.text)
+        if start_result:
+            start_index = index
+            break
+    target_paragraph = source_doc.paragraphs[start_index]
+    # 遍历源文档中的所有元素
+    for element in reversed(new_doc.element.body):
+        # 如果是段落，就在目标段落之后添加
+        if isinstance(element, CT_P):
+            # 考虑样式发生变化，对此进行调整
+            para = Paragraph(element, doc)
+            # 设置字体和字号
+            new_para = source_doc.add_paragraph(para.text, style='Normal')
+            font = new_para.runs[0].font
+            font.name = "宋体"
+            font.size = Pt(12)
+            new_para.paragraph_format.space_before = Pt(12)
+            new_para.paragraph_format.first_line_indent = Pt(25)
+            source_doc.element.body.insert(source_doc.element.body.index(target_paragraph._element) + 1,
+                                           new_para._element)
+        # 如果是表格，也在目标段落之后添加
+        elif isinstance(element, CT_Tbl):
+            source_doc.element.body.insert(source_doc.element.body.index(target_paragraph._element) + 1, element)
+    # source_doc.save(temp_path)
+    return source_doc
 if __name__ == '__main__':
-    doc_path = "data/3月23测试半成品.docx"
+    doc_path = "data/2022年度德阳市旌阳区人民法院(1).docx"
    # doc_path = 'data/特殊教育学校(1).docx'
-    temp_path = "data/new_财务报告模板.docx"
+    temp_path = "data/财务报告模板(2).doc"
    copy_content_main(doc_path, temp_path)
    # docx_file = r'wKjIbGQeSb6AUq1aAAgAABcLaMw312.docx'
    # doc = Document(docx_file)

--- a/copy_table.py
+++ b/copy_table.py
@@ -110,8 +110,8 @@ def new_document():
    return para._p
-def generate_report(table_names_data, save_path, template_path, tables_dict):
+def generate_report(table_names_data, template_document, tables_dict):
-    document = Document(template_path)
+    document = template_document
    pattern = re.compile(r'(?<={{).*?(?=}})')
    # block 块对象主要包括标题、段落、图片、表、列表
    # run 内联对象为块对象的组成部分，块对象的所有内容都包含在内联对象中，一个块对象由一个或多个内联对象组成。修改字体、字号、文字颜色需要用到run
@@ -139,7 +139,41 @@ def generate_report(table_names_data, save_path, template_path, tables_dict):
                p = block._element
                p.getparent().remove(p)
                block._p = block._element = None
-    document.save(save_path)
+    # document.save(save_path)
+    return document
+def new_generate_report(table_names_data, template_document, tables_dict):
+    document = template_document
+    pattern = re.compile(r'(?<={{).*?(?=}})')
+    # block 块对象主要包括标题、段落、图片、表、列表
+    # run 内联对象为块对象的组成部分，块对象的所有内容都包含在内联对象中，一个块对象由一个或多个内联对象组成。修改字体、字号、文字颜色需要用到run
+    # for block in iter_block_items(document):
+    for block in document.paragraphs:
+        if isinstance(block, Paragraph):
+            match = pattern.findall(block.text)
+            if match and "table" in match[0]:
+                table_name = match[0]
+                for _ in table_names_data[tables_dict[table_name]]:
+                    # white_row = new_document()
+                    # 在XML 级别上进行操作，即在元素之后直接添加内容，将任何尾部文本移动到新插入的元素后面，目的是使得新元素成为紧随其后的兄弟元素
+                    # block._p.addnext(white_row)
+                    block._p.addnext(_)
+                p = block._element
+                p.getparent().remove(p)
+                block._p = block._element = None
+    # 清除模板定义中的续表
+    pattern_clear = re.compile(r'(?<=续表)[0-9]')
+    for block in iter_block_items(document):
+        if isinstance(block, Paragraph):
+            match = pattern_clear.findall(block.text)
+            if match:
+                p = block._element
+                p.getparent().remove(p)
+                block._p = block._element = None
+    # document.save(save_path)
+    return document
 if __name__ == '__main__':
@@ -147,7 +181,6 @@ if __name__ == '__main__':
    start_time = datetime.datetime.now()
    # 参数：tables_dict、docx_file、save_path、template_path
    tables_dict = {
-                "table13": "以名义金额计量的资产名称、数量等情况，以及以名义金额计量理由的说明",
                "table5": "收入费用表（2）",
                "table4": "收入费用表（1）",
                "table3": "资产负债表续表2",
@@ -168,5 +201,5 @@ if __name__ == '__main__':
    document = Document(docx_file)
    data_result = get_choose_table(document, list(tables_dict.values()))
    print(data_result)
-    generate_report(data_result, save_path=r'data/报告文件.docx', template_path=r'data/new_财务报告模板.docx', tables_dict=tables_dict)
+    # generate_report(data_result, save_path=r'data/报告文件.docx', template_path=r'data/new_财务报告模板.docx', tables_dict=tables_dict)
--- a/detector_source.py
+++ b/detector_source.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File    : 资源检测程序.py
+# @Time    : 2022/9/30 10:39
+# @Author  : bruxelles_li
+# @Software: PyCharm
+import logging
+import os, time, re, subprocess
+# 获取CPU负载信息
+def get_cpu():
+    last_worktime = 0
+    last_idletime = 0
+    f = open("/proc/stat", "r")
+    line = ""
+    while not "cpu " in line: line = f.readline()
+    f.close()
+    spl = line.split(" ")
+    worktime = int(spl[2]) + int(spl[3]) + int(spl[4])
+    idletime = int(spl[5])
+    dworktime = (worktime - last_worktime)
+    didletime = (idletime - last_idletime)
+    rate = float(dworktime) / (didletime + dworktime)
+    last_worktime = worktime
+    last_idletime = idletime
+    if (last_worktime == 0): return 0
+    return rate
+# 获取内存负载信息
+def get_mem_usage_percent():
+    try:
+        f = open('/proc/meminfo', 'r')
+        for line in f:
+            if line.startswith('MemTotal:'):
+                mem_total = int(line.split()[1])
+            elif line.startswith('MemFree:'):
+                mem_free = int(line.split()[1])
+            elif line.startswith('Buffers:'):
+                mem_buffer = int(line.split()[1])
+            elif line.startswith('Cached:'):
+                mem_cache = int(line.split()[1])
+            elif line.startswith('SwapTotal:'):
+                vmem_total = int(line.split()[1])
+            elif line.startswith('SwapFree:'):
+                vmem_free = int(line.split()[1])
+            else:
+                continue
+        f.close()
+    except:
+        return None
+    physical_percent = usage_percent(mem_total - (mem_free + mem_buffer + mem_cache), mem_total)
+    virtual_percent = 0
+    if vmem_total > 0:
+        virtual_percent = usage_percent((vmem_total - vmem_free), vmem_total)
+    return physical_percent, virtual_percent
+def usage_percent(use, total):
+    try:
+        ret = (float(use) / total) * 100
+    except ZeroDivisionError:
+        raise Exception("ERROR - zero division error")
+    return ret
+# 获取磁盘根目录占用信息
+def disk_info():
+    statvfs = os.statvfs('/')       # 根目录信息 可根据情况修改
+    total_disk_space = statvfs.f_frsize * statvfs.f_blocks
+    free_disk_space = statvfs.f_frsize * statvfs.f_bfree
+    disk_usage = (total_disk_space - free_disk_space) * 100.0 / total_disk_space
+    disk_usage = int(disk_usage)
+    # disk_tip = "硬盘空间使用率（最大100%）：" + str(disk_usage) + "%"
+    # print(str(disk_usage))
+    return str(disk_usage)
+# 获取内存占用信息
+def mem_info():
+    mem_usage = get_mem_usage_percent()
+    mem_usage = int(mem_usage[0])
+    # mem_tip = "物理内存使用率（最大100%）：" + str(mem_usage) + "%"
+    # print(str(mem_usage))
+    return str(mem_usage)
+# 获取CPU占用信息
+def cpu_info():
+    cpu_usage = int(get_cpu() * 100)
+    # cpu_tip = "CPU使用率（最大100%）：" + str(cpu_usage) + "%"
+    # print(str(cpu_usage))
+    return str(cpu_usage)
+# 获取系统占用信息
+def sys_info():
+    load_average = os.getloadavg()
+    # print(len(load_average))
+    # load_tip = "系统负载（三个数值中有一个超过3就是高）：" + str(load_average)
+    return len(load_average)
+# 获取计算机当前时间
+def time_info():
+    now_time = time.strftime('%Y-%m-%d %H:%M:%S')
+    return "主机的当前时间：%s" % now_time
+# 获取计算机主机名称
+def hostname_info():
+    hostnames = os.popen("hostname").read().strip()
+    return "你的主机名是: %s" % hostnames
+# 获取IP地址信息
+def ip_info():
+    ipadd = os.popen("ip a| grep ens192 | grep inet | awk '{print $2}'").read().strip()
+    return ipadd
+# 获取根的占用信息
+def disk_info_root():
+    child = subprocess.Popen(["df", "-h"], stdout=subprocess.PIPE)
+    out = child.stdout.readlines()
+    for item in out:
+        line = item.strip().split()
+        # 我这里只查看centos的根
+        if '/dev/mapper/centos-root' in line:
+            title = [u'-文件系统-', u'--容量-', u'-已用-', u'-可用-', u'-已用-', u'-挂载点--']
+            content = "\t".join(title)
+            if eval(line[4][0:-1]) > 60:
+                line[0] = 'centos-root'
+                content += '\r\n' + '\t'.join(line)
+                return content
+# 测试程序
+# if __name__ == "__main__":
+#     disk_information = disk_info()
+#     disk_usage = [int(s) for s in re.findall(r'\b\d+\b', disk_information)]
+#     infomation = [hostname_info(), time_info(), disk_information]
+    # print(disk_usage)
+    # # 如果磁盘占用高于60%就发邮件告警
+    # if disk_usage[0] > 60:
+    #     print("当前磁盘占用率已超过60%，建议清除磁盘内存！")
+    #
+    # # print(hostname_info())
+    # # print(time_info())
+    # # print(ip_info())
+    # print(sys_info())
+    # print(cpu_info())
+    # print(mem_info())
+    # print(disk_info())
--- a/extract_factor.py
+++ b/extract_factor.py
@@ -3,7 +3,7 @@
 # @Author : ctt
 # @File : 文本内容提取
 # @Project : untitled1
-import re
+import re, os
 from docx import Document
 import pandas as pd
@@ -40,7 +40,7 @@ class Extract:
    # {“主要职能”：””, “机构情况”：””, “人员情况”：””, “当年取得的主要事业成效”}
    def __init__(self):
        # self.main_functions = re.compile(r'(?<=[0-9][\.．]主要职能[。\n])(.|\n)*?(?=[0-9][\.．]机构情况[。\n])')
-        self.main_functions = re.compile(r'(?<=[0-9][\.．]主要职能[。\n])(.|\n)*?(?=（[一二三四五六七八九十]）当年取得的主要事业成效[。\n])')
+        self.main_functions = re.compile(r'(?<=（[一二三四五六七八九十]）基本情况[。\n])(.|\n)*?(?=（[一二三四五六七八九十]）当年取得的主要事业成效[。\n])')
        # self.institutional_situation = re.compile(r'(?<=[0-9][\.．]机构情况[。\n])(.|\n)*?(?=[0-9][\.．]人员情况[。\n])')
        # self.personnel_situation = re.compile(r'(?<=[0-9][\.．]人员情况[。\n])(.|\n)*?(?=（[一二三四五六七八九十]）当年取得的主要事业成效[。\n])')
        self.business_results = re.compile(r'(?<=（[一二三四五六七八九十]）当年取得的主要事业成效[。\n])(.|\n)*?(?=[一二三四五六七八九十]、收入支出预算执行情况分析)')
@@ -74,12 +74,16 @@ def get_text_from_docx(filepath):
        contents = []
        for paragraph in document.paragraphs:
            if '<w:numPr>' in paragraph._element.xml:
-                contents.append('1.'+paragraph.text)
+                # print(paragraph.text)
+                contents.append('1.'+paragraph.text.replace("\xa0", ""))
                contents.append('\n')
            else:
-                contents.append(paragraph.text)
+                # print(paragraph.text)
+                contents.append(paragraph.text.replace("\xa0", ""))
                contents.append('\n')
-        return ''.join(contents)
+        str_contents = ''.join(contents)
+        # return ''.join(contents)
+        return str_contents
 def get_cover_content_from_docx(filepath):
@@ -116,26 +120,10 @@ def get_cover_content_from_docx(filepath):
 if __name__ == '__main__':
-    new_path = "data/2022年度安岳县元坝镇人民政府部门决算分析报告(1).docx"
+    # filepath = "data/wKjIbGRUpsaATABTAA5_0ejDDaQ144.docx"
-    document = get_text_from_docx(new_path)
-    data = Extract().extract_result(document)
-    print(data)
-    # fifth_area_pattern = re.compile(r'(?<=[0-9][\.．]会计报表重要项目的明细信息及说明[。\n])(.|\n)*?(?=[0-9][\.．]需要说明的其他事项[。\n])')
-    # filepath = "wKjIbGQeSb6AUq1aAAgAABcLaMw312.docx"
-    # document = Document(filepath)
-    # documents = get_text_from_docx(filepath)
-    #
-    # area_group = fifth_area_pattern.search(documents)
-    # if area_group:
-    #     area_text = area_group.group().strip("1.").strip()
-    # else:
-    #     area_text = ""
-    #
-    # print(area_text)
    # cover_contents, other_contents = get_cover_content_from_docx(filepath)
    # cover_pattern = re.compile(r"([0-9]{0,4}).*(?=(财务报告))")
-    #
+    # #
-    # # print(content)
    # cover_group = cover_pattern.search(cover_contents)
    # if cover_group:
    #     cover_text = cover_group.group().strip()
@@ -147,13 +135,13 @@ if __name__ == '__main__':
    # other_data["reportTitle"] = cover_text
    # print(other_data)
+    extract = Extract()
+    document = get_text_from_docx("data/2022年度安岳县元坝镇人民政府部门决算分析报告(1).docx")
+    data = Extract().extract_result(document)
+    print(data)
+    # path = r'D:\四川报告\相关代码\四川报告之文本内容提取\data'
-    # extract = Extract()
-    # # path = r'D:\四川报告\相关代码\四川报告之文本内容提取\data'
-    # path = "data/temp.docx"
-    # result = extract.extract_result(path)
-    # print(result)
    # for file in os.listdir(path):
    #     if file[-4:] == 'docx':
    #         filepath = os.path.join(path, file)

--- a/extract_table.py
+++ b/extract_table.py
@@ -12,6 +12,7 @@ from docx.oxml.text.paragraph import CT_P
 from docx.oxml.table import CT_Tbl
 from docx.table import _Cell, Table, _Row
 from docx.text.paragraph import Paragraph
+from docx.shared import Pt
 def iter_block_items(parent):
@@ -161,60 +162,137 @@ def get_other1_table(document, table_names: list):
    return table_names_data
+def read_document(document, old, new):
+    # 遍历表格
+    for table in document.tables:
+        for row in table.rows:
+            for cell in row.cells:
+                # 遍历表格段落内容，回到上个步骤，将cell当作paragraph处理
+                for paragraph in cell.paragraphs:
+                    for run in paragraph.runs:
+                        # 替换功能
+                        if old in cell.text:
+                            run.text = run.text.replace(old, new)
+    return document
+def replace_document(document):
+    # 循环遍历所有段落
+    temp_list_0, temp_list_1 = [], []
+    for para in document.paragraphs:
+        # 使用replace()函数替换垂直制表符
+        if ("主要职能" in para.text and "机构情况" in para.text) or ("主要职能" in para.text and "&&&" in para.text):
+            temp_list_0 = para.text.split("&&&")
+        elif "&&&" in para.text:
+            temp_list_1 = para.text.split("&&&")
+    print(temp_list_0)
+    print(temp_list_1)
+    # 依次在第一部分添加目标内容
+    for temp in temp_list_0:
+        if temp:
+            for i, p in enumerate(document.paragraphs):
+                if ("主要职能" in p.text and "机构情况" in p.text) or ("主要职能" in p.text and "&&&" in p.text):
+                    target_para = document.paragraphs[i]
+                    new_para = target_para.insert_paragraph_before(temp)
+                    # todo: 添加段落样式
+                    font = new_para.runs[0].font
+                    font.name = "宋体"
+                    font.size = Pt(12)
+                    # new_para.paragraph_format.space_before = Pt(12)
+                    new_para.paragraph_format.first_line_indent = Pt(25)
+                    new_para.paragraph_format.line_spacing = Pt(23.4)
+    # 置换原来段落内容为空
+    for p in document.paragraphs:
+        if ("主要职能" in p.text and "机构情况" in p.text) or ("主要职能" in p.text and "&&&" in p.text):
+            p.text = p.text.replace(p.text, "")
+            document._element.body.remove(p._element)
+    # 依次在第二部分添加目标内容
+    for temp in temp_list_1:
+        if temp:
+            for i, p in enumerate(document.paragraphs):
+                if "&&&" in p.text:
+                    target_para = document.paragraphs[i]
+                    new_para = target_para.insert_paragraph_before(temp)
+                    # todo: 添加段落样式
+                    font = new_para.runs[0].font
+                    font.name = "宋体"
+                    font.size = Pt(12)
+                    # new_para.paragraph_format.space_before = Pt(12)
+                    new_para.paragraph_format.first_line_indent = Pt(25)
+                    new_para.paragraph_format.line_spacing = Pt(23.4)
+    # 置换原来段落内容为空
+    for p in document.paragraphs:
+        if "&&&" in p.text:
+            p.text = p.text.replace(p.text, "")
+            document._element.body.remove(p._element)
+    # 检查并置换目标段落为空
+    for para in document.paragraphs:
+        if "{{info.amountDescription}}" in para.text:
+            para.text = para.text.replace(para.text, "")
+            document._element.body.remove(para._element)
+    # document.save("data/test.docx")
+    return document
 if __name__ == '__main__':
-    docx_file = r'data/3月23测试半成品.docx'
+    docx_file = r'data/20230512204902_5月10号财务报告测试.docx'
    document = Document(docx_file)
-    table_names = ['货币资金明细信息如下']
+    replace_document(document)
-    print(get_other1_table(document, table_names))
+    # 循环遍历所有段落
+    # temp_list_0, temp_list_1 = [], []
-    # import datetime
+    # for para in document.paragraphs:
-    # start_time = datetime.datetime.now()
+    #     # 使用replace()函数替换垂直制表符
-    # docx_file = r'data/四川报告模板.docx'
+    #     if "主要职能&&&" in para.text:
-    # document = Document(docx_file)
+    #         temp_list_0 = para.text.split("&&&")
-    # data = get_choose_table(document, ['资产负债表', '收入费用表（1）', '收入费用表（2）'])
+    #     elif "&&&" in para.text:
-    # # 处理资产负债表
+    #         temp_list_1 = para.text.split("&&&")
-    # temp_list = data["资产负债表"]
-    # temp_dict = {}
    #
-    # for temp in temp_list:
+    # for temp in temp_list_0:
-    #     temp_text = re.sub(":", "：", temp["项目"])
+    #     if temp:
-    #     if temp_text.endswith("："):
+    #         for i, p in enumerate(document.paragraphs):
-    #         temp_dict.update({"temp_key": temp_text})
+    #             if "主要职能&&&" in p.text:
-    #         continue
+    #                 target_para = document.paragraphs[i]
-    #     else:
+    #                 new_para = target_para.insert_paragraph_before(temp)
-    #         temp["上级项目"] = temp_dict["temp_key"].strip("：")
+    #                 # todo: 添加段落样式
+    #                 font = new_para.runs[0].font
+    #                 font.name = "宋体"
+    #                 font.size = Pt(12)
+    #                 # new_para.paragraph_format.space_before = Pt(12)
+    #                 new_para.paragraph_format.first_line_indent = Pt(25)
+    #                 new_para.paragraph_format.line_spacing = Pt(23.4)
    #
+    # for p in document.paragraphs:
+    #     if "主要职能&&&" in p.text:
+    #         p.text = p.text.replace(p.text, "")
+    #         document._element.body.remove(p._element)
    #
-    # # 处理收入费用表（1）
+    # for temp in temp_list_1:
-    # temp_list_0 = data["收入费用表（1）"]
+    #     if temp:
-    # temp_dict_0 = {"temp_key": "收入合计"}
+    #         for i, p in enumerate(document.paragraphs):
-    # # updata_list = ["收入合计", "本年盈余"]
+    #             if "&&&" in p.text:
-    # for temp_0 in temp_list_0:
+    #                 target_para = document.paragraphs[i]
-    #     if temp_0["项目"].strip() == "收入合计":
+    #                 new_para = target_para.insert_paragraph_before(temp)
-    #         temp_dict_0.update({"temp_key": "本年盈余"})
+    #                 # todo: 添加段落样式
-    #     else:
+    #                 font = new_para.runs[0].font
-    #         if temp_0["项目"].strip() == "本年盈余":
+    #                 font.name = "宋体"
-    #             continue
+    #                 font.size = Pt(12)
-    #         else:
+    #                 # new_para.paragraph_format.space_before = Pt(12)
-    #             temp_0["上级项目"] = temp_dict_0["temp_key"]
+    #                 new_para.paragraph_format.first_line_indent = Pt(25)
+    #                 new_para.paragraph_format.line_spacing = Pt(23.4)
    #
-    # # 处理收入费用表（2）
+    # for p in document.paragraphs:
-    # temp_list_1 = data["收入费用表（2）"]
+    #     if "&&&" in p.text:
-    # temp_dict_1 = {"temp_key": "收入合计"}
+    #         p.text = p.text.replace(p.text, "")
-    # # updata_list = ["收入合计", "本年盈余"]
+    #         document._element.body.remove(p._element)
-    # for temp_1 in temp_list_1:
+    #
-    #     if temp_1["项目"].strip() == "收入合计":
+    # for para in document.paragraphs:
-    #         temp_dict_1.update({"temp_key": "本年盈余"})
+    #     if "{{info.amountDescription}}" in para.text:
-    #     else:
+    #         para.text = para.text.replace(para.text, "")
-    #         if temp_1["项目"].strip() == "本年盈余":
+    #         document._element.body.remove(para._element)
-    #             continue
-    #         else:
+    document.save("data/test.docx")
-    #             temp_1["上级项目"] = temp_dict_1["temp_key"]
-    # print(data)
-    # end_time = datetime.datetime.now()
-    # print(start_time)
-    # print(end_time)
-    # print("耗时: {}秒".format(end_time - start_time))

--- a/generate/gen_user_report_auto_generated.py
+++ b/generate/gen_user_report_auto_generated.py
--- a/generate/pic_echarts.py
+++ b/generate/pic_echarts.py
@@ -11,14 +11,14 @@ from pyecharts.charts import Pie, Bar, Line, Grid
 from pyecharts.faker import Faker
 from pyecharts.render import make_snapshot  # 导入输出图片工具
 from snapshot_selenium import snapshot  # 使用snapshot-selenium 渲染图片
-from pyecharts.globals import CurrentConfig, ThemeType
+from pyecharts.globals import CurrentConfig
 from pathlib import Path
 # import time
 from unittest import mock
 from base.config.base_config import root_dir
 from utils.tools import timeit
-# root_dir = '..'
+import threading
 # 解决linux 下图片生成失败问题
@@ -32,16 +32,6 @@ def get_chrome_driver():
    return webdriver.Chrome(options=options)
-# def timeit(f):
-#     def timed(*args, **kw):
-#         ts = time.time()
-#         print('......begin     {0:8s}......'.format(f.__name__))
-#         result = f(*args, **kw)
-#         te = time.time()
-#         print('......finish    {0:8s}, took:{1:.4f} sec......'.format(f.__name__, te - ts))
-#         return result
-#
-#     return timed
 """
 关于: [图片生成的中文字体样式渲染问题]
@@ -57,10 +47,13 @@ CurrentConfig.ONLINE_HOST = 'http://39.105.62.235:8000/assets/'
 pic_echarts_dir = os.path.join(root_dir, 'generate/echarts')
 Path(pic_echarts_dir).mkdir(parents=True, exist_ok=True)
+lock = threading.RLock()
 @timeit
-def pic_echarts_pie(keys: list, values: list, title: str or None) -> str:
+def pic_echarts_pie(keys: list, values: list, title: str or None, pic_echarts_path: str) -> str:
-    pic_echarts_path = os.path.join(pic_echarts_dir, 'echarts_pie.png')
+    # pic_echarts_path = os.path.join(pic_echarts_dir, 'echarts_pie.png')
+    # pic_echarts_path = os.path.join(pic_echarts_dir, temp_file_name)
+    with lock:
        pie = (
            Pie().add(
                series_name='',
@@ -71,26 +64,23 @@ def pic_echarts_pie(keys: list, values: list, title: str or None) -> str:
                title_opts=opts.TitleOpts(title=title),
                legend_opts=opts.LegendOpts(type_='scroll', pos_left='80%', orient='vertical', textstyle_opts=opts.TextStyleOpts(font_size=20))
            ).set_series_opts(
-            # label_opts=opts.LabelOpts(formatter='{b}: {c}({d}%)')
+                label_opts=opts.LabelOpts(formatter="{b}: {d}%", font_size=18)
-            label_opts=opts.LabelOpts(formatter="{b}: {d}%", font_size=20)
            )
-        # # 设置标签字体大小
-        # .set_series_opts(label_opts=opts.LabelOpts(font_size=22))
        )
+        # print("当前处理的数据集key{}，和value{}".format(keys, values))
+    with lock:
        with mock.patch('snapshot_selenium.snapshot.get_chrome_driver', get_chrome_driver):
            make_snapshot(snapshot, pie.render(), pic_echarts_path)
    # make_snapshot(snapshot, pie.render(), pic_echarts_path)
    return pic_echarts_path
 @timeit
 def pic_echarts_bar(
-        keys: list, dict_values: dict, title=None,
+        keys: list, dict_values: dict, temp_file_name: str, title=None,
-        x_name=None, y_name=None
+        x_name=None, y_name=None,
 ) -> str:
-    pic_echarts_path = os.path.join(pic_echarts_dir, 'echarts_bar.png')
+    pic_echarts_path = os.path.join(pic_echarts_dir, temp_file_name)
    bar = (
        Bar().add_xaxis(
            xaxis_data=keys
@@ -113,10 +103,10 @@ def pic_echarts_bar(
 @timeit
 def pic_echarts_line(
-        keys: list, dict_values: dict, title=None,
+        keys: list, dict_values: dict, temp_file_name: str, title=None,
        x_name=None, y_name=None
 ) -> str:
-    pic_echarts_path = os.path.join(pic_echarts_dir, 'echarts_line.png')
+    pic_echarts_path = os.path.join(pic_echarts_dir, temp_file_name)
    line = (
        Line().add_xaxis(
            xaxis_data=keys
@@ -193,6 +183,7 @@ def pic_echarts_line_test() -> None:
 @timeit
 def pic_echarts_bar_line(
+        temp_file_name: str,
        keys=['2016年报', '2017年报', '2018年报', '2019年报', '2020年报', '2021年报'],
        dict_bar_values={
            '总资产': [1905.11, 1998.17, 2009.65, 2031.37, 1950.35, 1988.65],
@@ -205,7 +196,7 @@ def pic_echarts_bar_line(
        title='资产负债表(CNY)',
        x_name='年度', y_name_left='金额/(亿元)', y_name_right='负债率/(%)'
 ) -> str:
-    pic_echarts_path = os.path.join(pic_echarts_dir, 'echarts_bar_line.png')
+    pic_echarts_path = os.path.join(pic_echarts_dir, temp_file_name)
    bar = (
        Bar().add_xaxis(
            xaxis_data=keys
@@ -342,7 +333,8 @@ def pic_echarts_bar_line_test() -> str:
 if __name__ == '__main__':
-    pic_echarts_pie(keys=Faker.choose(), values=Faker.values(), title='Echarts Pie 标题1')
+    # pic_echarts_pie(keys=Faker.choose(), values=Faker.values(), title='Echarts Pie 标题1')
    # pic_echarts_bar(
    #     keys=Faker.choose(),
    #     dict_values={
@@ -363,4 +355,4 @@ if __name__ == '__main__':
    #     y_name='Y轴名称'
    # )
    # pic_echarts_line_test()
-    # pic_echarts_bar_line()
+    pic_echarts_bar_line()
--- a/generate_report.py
+++ b/generate_report.py
@@ -11,7 +11,7 @@ from flask import request
 from flask import Flask, send_file
 # from transform_doc_to_docx import doc2docx, closesoft
 import subprocess
-from generate.gen_user_report_auto_generated import main_process
+from generate.gen_user_report_auto_generated import new_main_process
 UPLOAD_FOLDER = r'data'       # 上传路径
 Path(UPLOAD_FOLDER).mkdir(parents=True, exist_ok=True)
 abs_path = os.path.split(os.path.realpath(__file__))[0]
@@ -64,7 +64,7 @@ def generate_report(template_path, document_path, report_name, object):
        half_work_path = os.path.join(UPLOAD_FOLDER, template_filename)
    else:
        return "上传文件格式有误，当前仅支持doc 和 docx 格式，请选择正确文件重新上传！"
-    main_process(half_work_path, tables_dict, template_path, report_name, data_object, save_path)
+    # main_process(half_work_path, tables_dict, template_path, report_name, data_object, save_path)
    # send_path = os.path.join(UPLOAD_FOLDER, report_name)
    # return send_file(send_path, as_attachment=True)

--- a/main_server.py
+++ b/main_server.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @File    : main_server.py
+# @Time    : 2023/5/15 15:39
+# @Author  : bruxelles_li
+# @Software: PyCharm
+import logging
+import requests
+import threading
+import sys
+import json
+from pathlib import Path
+sys.path.append('../')
+# 关闭多余连接
+s = requests.session()
+s.keep_alive = False
+from detector_source import sys_info, cpu_info, mem_info
+# 文件上传服务器定义
+from fdfs_client.client import *
+tracker_conf = get_tracker_conf("data/fdfs_client.conf")
+client = Fdfs_client(tracker_conf)
+formatter = logging.Formatter("%(asctime)s [%(levelname)s] <%(processName)s> (%(threadName)s) %(message)s")
+# 创建一个logger, 并设置日志级别
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+# 创建一个handler，用于将日志输出到控制台，并设置日志级别
+ch = logging.StreamHandler()
+ch.setLevel(logging.INFO)
+ch.setFormatter(formatter)
+# 创建一个filehandler，用于将错误日志输出到文件，并设置日志级别
+_tmp_path = os.path.dirname(os.path.abspath(__file__))
+_tmp_path = os.path.join(_tmp_path, 'log')
+Path(_tmp_path).mkdir(parents=True, exist_ok=True)
+fh = logging.FileHandler(os.path.join(_tmp_path, "main_server_error.log"))
+fh1 = logging.FileHandler(os.path.join(_tmp_path, "main_server_info.log"))
+fh.setLevel(level=logging.ERROR)
+fh1.setLevel(level=logging.INFO)
+fh.setFormatter(formatter)
+fh1.setFormatter(formatter)
+# 同时将日志输出到控制台和文件
+logger.addHandler(ch)
+logger.addHandler(fh)
+logger.addHandler(fh1)
+# TODO: 定义进程存放列表
+all_thread = []
+"""
+测试地址：http://114.115.185.13:9988/datapull/aiReport/report/callbackStatus
+正式地址：http://192.168.1.70:9988/datapull/aiReport/report/callbackStatus
+"""
+# callback_url = "http://114.115.185.13:9988/datapull/aiReport/report/callbackStatus"
+callback_url = "http://192.168.1.70:9988/datapull/aiReport/report/callbackStatus"
+# todo: 合并段落和句子去重处理
+def main_process(half_document_path, tables_dict, template_path, data_object, report_id,
+                 output_report_path, report_name, template_id):
+    # 输入半成品、模板和必要数据，对模板进行更新，输出更新后的模板路径
+    try:
+        from generate.gen_user_report_auto_generated import new_main_process
+        from docx import Document
+        half_document = Document(half_document_path)
+        template_document = Document(template_path)
+        report_processed_path = new_main_process(half_document=half_document,
+                                                 tables_dict=tables_dict,
+                                                 template_document=template_document,
+                                                 data_object=data_object,
+                                                 report_id=report_id,
+                                                 output_report_path=output_report_path)
+        # todo: 增加去重表格中的圆圈【空格】
+        report_processed = Document(report_processed_path)
+        from extract_table import read_document, replace_document
+        report_processed = read_document(report_processed, "\u00A0", "")
+        # todo: 增加缺失值项替换为空
+        report_processed = replace_document(report_processed)
+        final_report_name = '{}_{}.docx'.format(str(datetime.datetime.now().strftime('%Y%m%d%H%M%S')), report_name)
+        UPLOAD_FOLDER = r'data/'  # 上传路径
+        Path(UPLOAD_FOLDER).mkdir(parents=True, exist_ok=True)
+        send_path = os.path.join(UPLOAD_FOLDER, final_report_name)
+        report_processed.save(send_path)
+        ret_upload = client.upload_by_filename(send_path)
+        logger.info(ret_upload["Remote file_id"])
+        # b'group1/M00/00/0A/wKjIlGRjcHiAVnTuAAEo5wnGJLQ89.docx'
+        str_ret_upload = str(ret_upload["Remote file_id"])
+        filePath = str_ret_upload.strip('b').replace("'", "").strip()
+        logger.info(filePath)
+        dict_result = {
+            "status": "0",  # 处理状态 （"0" 成功， "1" 失败）
+            "result": "处理成功",
+            "id": report_id,  # 报告id
+            "templeteId": template_id,  # 报告模板
+            "filePath": filePath
+        }
+        logger.info(dict_result)
+        # todo: 调用java的状态更新接口返回异常的结果
+        payload = json.dumps(dict_result)
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        r1 = requests.post(url=callback_url,
+                           headers=headers, data=payload)
+        logger.info(r1.text)
+    except Exception as e:
+        dict_result = {
+            "status": "1",  # 处理状态 （0 成功， 1 失败）
+            "result": "生成失败！+ {}".format(str(e)),
+            "id": report_id,  # 报告id
+            "templeteId": template_id,  # 报告模板
+            "filePath": ""
+        }
+        # todo: 调用java的状态更新接口返回异常的结果
+        payload = json.dumps(dict_result)
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        r1 = requests.post(url=callback_url,
+                           headers=headers, data=payload)
+        logger.info(r1.text)
+    return dict_result
+def env_eval():
+    # todo 获取资源相关信息(磁盘占用率、系统占用信息【超过3个为高】、CPU占用率、物理内存占用率)
+    # disk_usage = disk_info()
+    sys_usage = sys_info()
+    cpu_usage = cpu_info()
+    men_usage = mem_info()
+    # todo 资源不够用时，返回 False
+    if sys_usage > 20 or cpu_usage > str(95) or men_usage > str(95):
+        return False
+    # todo 资源够用时，返回 True
+    return True
+def system_start():
+    while True:
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        r1 = requests.post(url='http://localhost:4000/queue_size', headers=headers)
+        r1_json = json.loads(r1.text)
+        queue_left_number = r1_json['queue_left_number']
+        logger.info("当前队列任务总数：" + str(queue_left_number))
+        if queue_left_number == 0:
+            time.sleep(6)
+        else:
+            # todo: 若任务队列不为空进行报告处理
+            for i in range(queue_left_number):
+                r2 = requests.post(url='http://localhost:4000/subject_consumer', headers=headers)
+                r2_json = json.loads(r2.text)
+                config_info = r2_json['data']
+                logger.info(config_info)
+                report_id = config_info["report_id"]
+                template_id = config_info["template_id"]
+                half_document_path = config_info["half_document_path"]
+                tables_dict = config_info["tables_dict"]
+                template_path = config_info["template_path"]
+                data_object = config_info["data_object"]
+                output_report_path = config_info["output_report_path"]
+                report_name = config_info["report_name"]
+                logger.info('##########报告生成服务###############')
+                t = threading.Thread(target=main_process, args=(half_document_path, tables_dict, template_path,
+                                                                data_object, report_id, output_report_path,
+                                                                report_name, template_id),
+                                     daemon=True)
+                while True:
+                    if env_eval():
+                        break
+                    else:
+                        time.sleep(6)
+                # 启动
+                t.start()
+                all_thread.append(t)
+def system_resume():
+    """
+    恢复模型训练服务状态
+    :return:
+    """
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    # 清空当前服务中的队列，避免重复启动服务
+    r1 = requests.post(url='http://localhost:4000/queue_size', headers=headers)
+    r1_json = r1.json()
+    logger.info('当前队列数量：%d' % r1_json['queue_left_number'])
+    if r1_json['queue_left_number'] > 0:
+        logger.info('正在消费队列，直到队列为空！')
+        while True:
+            r2 = requests.post(url='http://localhost:4000/subject_consumer', headers=headers)
+            r2_json = r2.json()
+            if r2_json['queue_left_number'] == 0:
+                logger.info('队列消费完毕！可放心进行数据去重入库服务 ...')
+                break
+    else:
+        logger.info('队列为空！可放心进行数据去重入库服务 ...')
+def start_up_check():
+    """
+    启动前检查
+    :return:
+    """
+    while True:
+        try:
+            headers = {
+                'Content-Type': 'application/json'
+            }
+            r0 = requests.post(url='http://localhost:4000/queue_size', headers=headers)
+            # todo: 如果服务启动，即接口访问正常，则返回“真”
+            server_started = True
+        except requests.exceptions.ConnectionError as e:
+            server_started = False
+            logger.error("Error: ConnectionError" + str(e))
+            logger.warning('服务未启动，请先启动server! 程序已退出。')
+            exit(123)
+            # logger.info('server正在尝试自启 ...')
+            # time.sleep(3)
+        if server_started:
+            logger.info("server启动成功！报告生成服务已启动...")
+            break
+if __name__ == '__main__':
+    # 开始启动后台处理服务
+    start_up_check()
+    logger.info('报告生成服务恢复中 ...')
+    system_resume()
+    time.sleep(30)
+    logger.info('报告生成服务恢复完成！')
+    logger.info('报告生成服务运行中 ...')
+    system_start()
--- a/multiy_area_copy_content.py
+++ b/multiy_area_copy_content.py
--- a/scbg_app_config.py
+++ b/scbg_app_config.py
@@ -16,8 +16,8 @@ timeout = 300                           # 超时 -> 目前为迎合ZZSN_NLP平
 # worker_class = 'gevent'               # 使用gevent模式，还可以使用sync 模式，默认的是sync模式
 # workers = multiprocessing.cpu_count()   # 进程数 12
-workers = 3                           # 低资源 13G 服务器负载过大可调整此处为 1
+workers = 1                           # 低资源 13G 服务器负载过大可调整此处为 1
-threads = 50                            # 指定每个进程开启的线程数
+threads = 10                            # 指定每个进程开启的线程数
 loglevel = 'error'                      # 日志级别，这个日志级别指的是错误日志的级别，而访问日志的级别无法设置
 access_log_format = '%(t)s %(p)s %(h)s "%(r)s" %(s)s %(L)s %(b)s %(f)s" "%(a)s"'  # 设置gunicorn访问日志格式，错误日志无法设置

--- a/start.sh
+++ b/start.sh
@@ -2,14 +2,20 @@
 # description: auto_run
 # 四川报告生成监控
 # 检测脚本是否在运行，若已经在运行，则等待一段时间后再次检查，若未启动则进行启动
-function start_interface() {
+# echo "114.115.185.13 dfs" >>/etc/hosts
+#echo "192.168.200.148 dfs" >>/etc/hosts     # 测试环境
+echo "192.168.1.75 dfs" >>/etc/hosts       # 正式环境
+start_interface() {
  INTERFACE_IS_STRAT=`ps -ef | grep scbg_app_config.py | grep -v grep | wc -l`
-  if [ $INTERFACE_IS_STRAT -eq 4 ] ; then
+  if [ $INTERFACE_IS_STRAT -ne 0 ] ; then
-    usleep
+     sleep 30m
  else
    echo "=========Service Will Start=========="
 #    cd /data/lzc/scbg-python/SCBG-PYTHON && nohup gunicorn -c scbg_app_config.py app_run:app 2>&1 &
-    cd /opt/SCBG-PYTHON && exec nohup gunicorn -c scbg_app_config.py app_run:app 2>&1 &
+    cd /opt/SCBG-PYTHON
+    exec nohup gunicorn -c scbg_app_config.py app_run:app 2>&1 &
+    sleep 5m
+    exec nohup python -u main_server.py >main_server.log 2>&1 &
    echo "=========Service Start Completed!========"
  fi

--- a/transform_doc_to_docx.py
+++ b/transform_doc_to_docx.py
@@ -42,4 +42,4 @@ def doc2docx(path):
 if __name__ == '__main__':
    closesoft()
-    doc2docx(r'D:\四川报告\相关代码\从word中提取指定表格\data\特殊教育学校(1).doc')
+    doc2docx(r'D:/四川报告/相关代码/从word中提取指定表格/data/四川省化工作质量安全检测研究院2022年度部门决算分析报告(1).doc')
--- a/四川报告python服务部署.docx
+++ b/四川报告python服务部署.docx
--- a/四川报告模型conda环境部署文档.docx
+++ b/四川报告模型conda环境部署文档.docx