平台模型管理

91d93313 · bruxellse_li · 6291eec9 · 91d93313 · 91d93313 · 91d93313
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/.idea/Model-Management.iml
+++ b/.idea/Model-Management.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
--- a/.idea/deployment.xml
+++ b/.idea/deployment.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
+    <serverData>
+      <paths name="python@180.76.177.55:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.130.239:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.141.81:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.141.81:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.141.81:22 password (1)">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.9.59:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.90.53:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.90.53:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.90.53:22 password (1)">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="zzsn@192.168.1.149:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="zzsn@192.168.1.149:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+    </serverData>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="49">
+            <item index="0" class="java.lang.String" itemvalue="pandas" />
+            <item index="1" class="java.lang.String" itemvalue="tqdm" />
+            <item index="2" class="java.lang.String" itemvalue="transformers" />
+            <item index="3" class="java.lang.String" itemvalue="sentencepiece" />
+            <item index="4" class="java.lang.String" itemvalue="keras" />
+            <item index="5" class="java.lang.String" itemvalue="gevent" />
+            <item index="6" class="java.lang.String" itemvalue="torch" />
+            <item index="7" class="java.lang.String" itemvalue="numpy" />
+            <item index="8" class="java.lang.String" itemvalue="Flask" />
+            <item index="9" class="java.lang.String" itemvalue="thulac" />
+            <item index="10" class="java.lang.String" itemvalue="beautifulsoup4" />
+            <item index="11" class="java.lang.String" itemvalue="fdfs_client" />
+            <item index="12" class="java.lang.String" itemvalue="pymysql" />
+            <item index="13" class="java.lang.String" itemvalue="selenium" />
+            <item index="14" class="java.lang.String" itemvalue="matplotlib" />
+            <item index="15" class="java.lang.String" itemvalue="pyecharts" />
+            <item index="16" class="java.lang.String" itemvalue="requests" />
+            <item index="17" class="java.lang.String" itemvalue="docx" />
+            <item index="18" class="java.lang.String" itemvalue="flask_sqlalchemy" />
+            <item index="19" class="java.lang.String" itemvalue="scikit_learn" />
+            <item index="20" class="java.lang.String" itemvalue="gensim" />
+            <item index="21" class="java.lang.String" itemvalue="sentence_transformers" />
+            <item index="22" class="java.lang.String" itemvalue="elasticsearch" />
+            <item index="23" class="java.lang.String" itemvalue="nltk" />
+            <item index="24" class="java.lang.String" itemvalue="symspellpy" />
+            <item index="25" class="java.lang.String" itemvalue="wordcloud" />
+            <item index="26" class="java.lang.String" itemvalue="concurrent_log_handler" />
+            <item index="27" class="java.lang.String" itemvalue="setuptools" />
+            <item index="28" class="java.lang.String" itemvalue="gunicorn" />
+            <item index="29" class="java.lang.String" itemvalue="jieba" />
+            <item index="30" class="java.lang.String" itemvalue="flask" />
+            <item index="31" class="java.lang.String" itemvalue="flak_cors" />
+            <item index="32" class="java.lang.String" itemvalue="paddle" />
+            <item index="33" class="java.lang.String" itemvalue="bert_serving" />
+            <item index="34" class="java.lang.String" itemvalue="certifi" />
+            <item index="35" class="java.lang.String" itemvalue="SQLAlchemy" />
+            <item index="36" class="java.lang.String" itemvalue="xlrd" />
+            <item index="37" class="java.lang.String" itemvalue="bert_serving_client" />
+            <item index="38" class="java.lang.String" itemvalue="pytime" />
+            <item index="39" class="java.lang.String" itemvalue="goose3" />
+            <item index="40" class="java.lang.String" itemvalue="Flask_Cors" />
+            <item index="41" class="java.lang.String" itemvalue="paddlepaddle" />
+            <item index="42" class="java.lang.String" itemvalue="trustai" />
+            <item index="43" class="java.lang.String" itemvalue="paddle_serving_client" />
+            <item index="44" class="java.lang.String" itemvalue="tritonclient" />
+            <item index="45" class="java.lang.String" itemvalue="paddle_serving_server" />
+            <item index="46" class="java.lang.String" itemvalue="paddlenlp" />
+            <item index="47" class="java.lang.String" itemvalue="openai" />
+            <item index="48" class="java.lang.String" itemvalue="feedparser" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
\ No newline at end of file
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Model-Management.iml" filepath="$PROJECT_DIR$/.idea/Model-Management.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
--- a/FastText-Model/.idea/.gitignore
+++ b/FastText-Model/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/FastText-Model/.idea/FastText-Model.iml
+++ b/FastText-Model/.idea/FastText-Model.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
+    </content>
+    <orderEntry type="jdk" jdkName="Remote Python 3.9.5 (sftp://root@114.116.90.53:22/home/python/anaconda3/envs/JXYQ@py39/bin/python3.9)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+  <component name="TemplatesService">
+    <option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
+  </component>
+</module>
\ No newline at end of file
--- a/FastText-Model/.idea/deployment.xml
+++ b/FastText-Model/.idea/deployment.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="PublishConfigData" autoUpload="On explicit save action" serverName="FastText-Model" remoteFilesAllowedToDisappearOnAutoupload="false" autoUploadExternalChanges="true">
+    <serverData>
+      <paths name="FastText-Model">
+        <serverdata>
+          <mappings>
+            <mapping deploy="/" local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="python@180.76.177.55:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.130.239:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.141.81:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.141.81:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.141.81:22 password (1)">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.9.59:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.90.53:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.90.53:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.90.53:22 password (1)">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="zzsn@192.168.1.149:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="zzsn@192.168.1.149:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+    </serverData>
+    <option name="myAutoUpload" value="ON_EXPLICIT_SAVE" />
+  </component>
+</project>
\ No newline at end of file
--- a/FastText-Model/.idea/inspectionProfiles/Project_Default.xml
+++ b/FastText-Model/.idea/inspectionProfiles/Project_Default.xml
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="49">
+            <item index="0" class="java.lang.String" itemvalue="pandas" />
+            <item index="1" class="java.lang.String" itemvalue="tqdm" />
+            <item index="2" class="java.lang.String" itemvalue="transformers" />
+            <item index="3" class="java.lang.String" itemvalue="sentencepiece" />
+            <item index="4" class="java.lang.String" itemvalue="keras" />
+            <item index="5" class="java.lang.String" itemvalue="gevent" />
+            <item index="6" class="java.lang.String" itemvalue="torch" />
+            <item index="7" class="java.lang.String" itemvalue="numpy" />
+            <item index="8" class="java.lang.String" itemvalue="Flask" />
+            <item index="9" class="java.lang.String" itemvalue="thulac" />
+            <item index="10" class="java.lang.String" itemvalue="beautifulsoup4" />
+            <item index="11" class="java.lang.String" itemvalue="fdfs_client" />
+            <item index="12" class="java.lang.String" itemvalue="pymysql" />
+            <item index="13" class="java.lang.String" itemvalue="selenium" />
+            <item index="14" class="java.lang.String" itemvalue="matplotlib" />
+            <item index="15" class="java.lang.String" itemvalue="pyecharts" />
+            <item index="16" class="java.lang.String" itemvalue="requests" />
+            <item index="17" class="java.lang.String" itemvalue="docx" />
+            <item index="18" class="java.lang.String" itemvalue="flask_sqlalchemy" />
+            <item index="19" class="java.lang.String" itemvalue="scikit_learn" />
+            <item index="20" class="java.lang.String" itemvalue="gensim" />
+            <item index="21" class="java.lang.String" itemvalue="sentence_transformers" />
+            <item index="22" class="java.lang.String" itemvalue="elasticsearch" />
+            <item index="23" class="java.lang.String" itemvalue="nltk" />
+            <item index="24" class="java.lang.String" itemvalue="symspellpy" />
+            <item index="25" class="java.lang.String" itemvalue="wordcloud" />
+            <item index="26" class="java.lang.String" itemvalue="concurrent_log_handler" />
+            <item index="27" class="java.lang.String" itemvalue="setuptools" />
+            <item index="28" class="java.lang.String" itemvalue="gunicorn" />
+            <item index="29" class="java.lang.String" itemvalue="jieba" />
+            <item index="30" class="java.lang.String" itemvalue="flask" />
+            <item index="31" class="java.lang.String" itemvalue="flak_cors" />
+            <item index="32" class="java.lang.String" itemvalue="paddle" />
+            <item index="33" class="java.lang.String" itemvalue="bert_serving" />
+            <item index="34" class="java.lang.String" itemvalue="certifi" />
+            <item index="35" class="java.lang.String" itemvalue="SQLAlchemy" />
+            <item index="36" class="java.lang.String" itemvalue="xlrd" />
+            <item index="37" class="java.lang.String" itemvalue="bert_serving_client" />
+            <item index="38" class="java.lang.String" itemvalue="pytime" />
+            <item index="39" class="java.lang.String" itemvalue="goose3" />
+            <item index="40" class="java.lang.String" itemvalue="Flask_Cors" />
+            <item index="41" class="java.lang.String" itemvalue="paddlepaddle" />
+            <item index="42" class="java.lang.String" itemvalue="trustai" />
+            <item index="43" class="java.lang.String" itemvalue="paddle_serving_client" />
+            <item index="44" class="java.lang.String" itemvalue="tritonclient" />
+            <item index="45" class="java.lang.String" itemvalue="paddle_serving_server" />
+            <item index="46" class="java.lang.String" itemvalue="paddlenlp" />
+            <item index="47" class="java.lang.String" itemvalue="openai" />
+            <item index="48" class="java.lang.String" itemvalue="feedparser" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
\ No newline at end of file
--- a/FastText-Model/.idea/inspectionProfiles/profiles_settings.xml
+++ b/FastText-Model/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/FastText-Model/.idea/misc.xml
+++ b/FastText-Model/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.9.5 (sftp://root@114.116.90.53:22/home/python/anaconda3/envs/JXYQ@py39/bin/python3.9)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/FastText-Model/.idea/modules.xml
+++ b/FastText-Model/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/FastText-Model.iml" filepath="$PROJECT_DIR$/.idea/FastText-Model.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/FastText-Model/.idea/remote-mappings.xml
+++ b/FastText-Model/.idea/remote-mappings.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="RemoteMappingsManager">
+    <list>
+      <list>
+        <remote-mappings server-id="python@sftp://root@114.116.90.53:22/home/python/anaconda3/envs/JXYQ@py39/bin/python3.9">
+          <settings>
+            <list>
+              <mapping local-root="$PROJECT_DIR$" remote-root="/home/python/lzc/新平台模型管理/FastText-Model" />
+            </list>
+          </settings>
+        </remote-mappings>
+      </list>
+    </list>
+  </component>
+</project>
\ No newline at end of file
--- a/FastText-Model/.idea/webServers.xml
+++ b/FastText-Model/.idea/webServers.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="WebServers">
+    <option name="servers">
+      <webServer id="89b44d2f-6e3e-40a6-8aa0-e1bc3fbcfd0f" name="FastText-Model">
+        <fileTransfer rootFolder="/home/python/lzc/新平台模型管理/FastText-Model" accessType="SFTP" host="114.116.90.53" port="22" sshConfigId="c0166359-81ab-467c-838f-8c7ee48db0f2" sshConfig="root@114.116.90.53:22 password">
+          <advancedOptions>
+            <advancedOptions dataProtectionLevel="Private" passiveMode="true" shareSSLContext="true" />
+          </advancedOptions>
+        </fileTransfer>
+      </webServer>
+    </option>
+  </component>
+</project>
\ No newline at end of file
--- a/FastText-Model/File_Operation/Operation.py
+++ b/FastText-Model/File_Operation/Operation.py
--- a/FastText-Model/File_Operation/UnRAR.exe
+++ b/FastText-Model/File_Operation/UnRAR.exe
--- a/FastText-Model/File_Operation/entity.py
+++ b/FastText-Model/File_Operation/entity.py
+# -*- coding: utf-8 -*-
+
+# 智能采集请求
+# 1、考虑：请求智能采集时，不再使用实体类
+#    a. 仍使用：通过HTTP的 raw 请求体，直接传递HTML源文件，通过query参数传递 lang-code、link-text 参数
+#    b. 原因：在 postman 中，不方便进行测试，无法使用粘贴后的HTML源文件
+# 2、不考虑：使用实体类，利大于弊
+#    a. 使用实体类，方便扩展参数字段
+#    b. 方便展示接口文档：调用 json_parameter_utility.get_json_parameters 函数，可显示请求实体类
+class ExtractionRequest:
+    # 语言代码
+    # 1、采集“非中文”的文章时，需要用到语言代码
+    lang_code = ""
+    # 链接文本
+    # 1、用于采集标题，如果不提供，标题的准确度会下降
+    link_text = ""
+    # 文章页面源文件
+    # 1、用于采集标题、发布时间、内容等
+    article_html = ""
+
+    @staticmethod
+    def from_dict(dictionary: dict):
+        extraction_request = ExtractionRequest()
+        # 尝试方法：
+        # 1、将字典，更新到内部的 __dict__ 对象
+        # extraction_request.__dict__.update(dictionary)
+        # 将字典值，设置到当前对象
+        for key in dictionary:
+            setattr(extraction_request, key, dictionary[key])
+
+        return extraction_request
+
+    def to_dict(self):
+        # 转换为字典对象：
+        # 1、序列化为JSON时，需要调用此方法
+        # 2、转换为JSON字符串：json.dumps(extraction_result, default=ExtractionResult.to_dict)
+        data = {}
+        # 借助内部的 __dict__ 对象
+        # 1、将内部的 __dict__ 对象，更新到新的字典对象中
+        data.update(self.__dict__)
+
+        return data
+
+
+# 采集结果
+class ExtractionResult:
+    # 标题
+    title = ""
+    # 发布日期
+    publish_date = ""
+    # 正文（保留所有HTML标记，如：br、img）
+    text = ""
+    # URL
+    url = ""
+
+    # 摘要
+    meta_description = ""
+    # 干净正文（不带HTML）
+    cleaned_text = ""
+
+    # 来源（目前只支持采集中文网站中的“来源”）
+    # source = ""
+
+    # 顶部图片（top_image：采集不到任何内容，不再使用此属性）
+    # top_image = ""
+
+    def to_dict(self):
+        # 转换为字典对象：
+        # 1、序列化为JSON时，需要调用此方法
+        # 2、转换为JSON字符串：json.dumps(extraction_result, default=ExtractionResult.to_dict)
+        data = {}
+        # 借助内部的 __dict__ 对象
+        # 1、将内部的 __dict__ 对象，更新到新的字典对象中
+        data.update(self.__dict__)
+
+        return data
+
+
+class UrlPickingRequest:
+    # 列表页面的响应URL
+    # 1、作为Base URL，用于拼接提取到的相对URL
+    # 2、Base URL：必须使用响应URL
+    # 3、示例：在 Python中，通过 requests.get(url) 请求URL后，需要使用 resp.url 作为 Base URL
+    list_page_resp_url = ""
+    # 列表页面源文件
+    # 1、用于提取文章网址
+    list_page_html = ""
+
+    @staticmethod
+    def from_dict(dictionary: dict):
+        url_picking_request = UrlPickingRequest()
+        # 将字典值，设置到当前对象
+        for key in dictionary:
+            setattr(url_picking_request, key, dictionary[key])
+
+        return url_picking_request
+
+    def to_dict(self):
+        # 转换为字典对象：
+        # 1、序列化为JSON时，需要调用此方法
+        # 2、转换为JSON字符串：json.dumps(extraction_result, default=ExtractionResult.to_dict)
+        data = {}
+        # 借助内部的 __dict__ 对象
+        # 1、将内部的 __dict__ 对象，更新到新的字典对象中
+        data.update(self.__dict__)
+
+        return data
--- a/FastText-Model/File_Operation/smart_extractor.py
+++ b/FastText-Model/File_Operation/smart_extractor.py
--- a/FastText-Model/File_Operation/smart_extractor_utility.py
+++ b/FastText-Model/File_Operation/smart_extractor_utility.py
--- a/FastText-Model/File_Operation/一带一路模型对接接口内容范围.png
+++ b/FastText-Model/File_Operation/一带一路模型对接接口内容范围.png
--- a/FastText-Model/File_Operation/语料上传异常处理说明.xlsx
+++ b/FastText-Model/File_Operation/语料上传异常处理说明.xlsx
--- a/FastText-Model/README.md
+++ b/FastText-Model/README.md
+# FastText-Model
+
+#### 介绍
+新平台NLP算法组 model
+
+
+#### 安装教程
+
+1.  指定conda环境的python版本
+2.  执行requirement.txt
+3.  也可以指定运行环境|提前在宿主机上创建好
+
+#### 使用说明
+
+1.  xxxx
+2.  xxxx
+3.  xxxx
+
+#### 参与贡献
+
+1.  Fork 本仓库
+2.  新建 Feat_xxx 分支
+3.  提交代码
+4.  新建 Pull Request
+
--- a/FastText-Model/app/__init__.py
+++ b/FastText-Model/app/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/7/31 10:21
--- a/FastText-Model/app/app_config.py
+++ b/FastText-Model/app/app_config.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File    : app_config.py
+# @Time    : 2023/4/1 10:31
+# @Author  : bruxelles_li
+# @Software: PyCharm
+
+import os
+import multiprocessing
+
+from pathlib import Path
+
+bind = '0.0.0.0:4005'                   # 绑定ip和端口号
+backlog = 512                           # 监听队列
+# chdir = '/home/zzsn/liuyan/bin'       # gunicorn要切换到的目的工作目录
+timeout = 300                           # 超时 -> 目前为迎合ZZSN_NLP平台 一带一路要素抽取(文件)需求 暂时关闭超时
+# worker_class = 'gevent'               # 使用gevent模式，还可以使用sync 模式，默认的是sync模式
+
+# workers = multiprocessing.cpu_count() # 进程数 12
+workers = 1                             # 低资源 13G 服务器负载过大可调整此处为 1
+threads = 50                            # 指定每个进程开启的线程数
+loglevel = 'error'                       # 日志级别，这个日志级别指的是错误日志的级别，而访问日志的级别无法设置
+access_log_format = '%(t)s %(p)s %(h)s "%(r)s" %(s)s %(L)s %(b)s %(f)s" "%(a)s"'  # 设置gunicorn访问日志格式，错误日志无法设置
+
+"""
+其每个选项的含义如下：
+h          remote address
+l          '-'
+u          currently '-', may be user name in future releases
+t          date of the request
+r          status line (e.g. ``GET / HTTP/1.1``)
+s          status
+b          response length or '-'
+f          referer
+a          user agent
+T          request time in seconds
+D          request time in microseconds
+L          request time in decimal seconds
+p          process ID
+"""
+_tmp_path = os.path.dirname(os.path.abspath(__file__))
+_tmp_path = os.path.join(_tmp_path, 'log')
+Path(_tmp_path).mkdir(parents=True, exist_ok=True)
+accesslog = os.path.join(_tmp_path, 'gunicorn_access.log')      # 访问日志文件
+errorlog = os.path.join(_tmp_path, 'gunicorn_error.log')         # 错误日志文件
+
+# gunicorn -c app_config.py app_run:app -D --daemon
--- a/FastText-Model/app/app_run.py
+++ b/FastText-Model/app/app_run.py
--- a/FastText-Model/app/detector_source.py
+++ b/FastText-Model/app/detector_source.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File    : 资源检测程序.py
+# @Time    : 2022/9/30 10:39
+# @Author  : bruxelles_li
+# @Software: PyCharm
+import logging
+import os, time, re, subprocess
+
+
+# 获取CPU负载信息
+def get_cpu():
+    last_worktime = 0
+    last_idletime = 0
+    f = open("/proc/stat", "r")
+    line = ""
+    while not "cpu " in line: line = f.readline()
+    f.close()
+    spl = line.split(" ")
+    worktime = int(spl[2]) + int(spl[3]) + int(spl[4])
+    idletime = int(spl[5])
+    dworktime = (worktime - last_worktime)
+    didletime = (idletime - last_idletime)
+    rate = float(dworktime) / (didletime + dworktime)
+    last_worktime = worktime
+    last_idletime = idletime
+    if (last_worktime == 0): return 0
+    return rate
+
+
+# 获取内存负载信息
+def get_mem_usage_percent():
+    try:
+        f = open('/proc/meminfo', 'r')
+        for line in f:
+            if line.startswith('MemTotal:'):
+                mem_total = int(line.split()[1])
+            elif line.startswith('MemFree:'):
+                mem_free = int(line.split()[1])
+            elif line.startswith('Buffers:'):
+                mem_buffer = int(line.split()[1])
+            elif line.startswith('Cached:'):
+                mem_cache = int(line.split()[1])
+            elif line.startswith('SwapTotal:'):
+                vmem_total = int(line.split()[1])
+            elif line.startswith('SwapFree:'):
+                vmem_free = int(line.split()[1])
+            else:
+                continue
+        f.close()
+    except:
+        return None
+    physical_percent = usage_percent(mem_total - (mem_free + mem_buffer + mem_cache), mem_total)
+    virtual_percent = 0
+    if vmem_total > 0:
+        virtual_percent = usage_percent((vmem_total - vmem_free), vmem_total)
+    return physical_percent, virtual_percent
+
+
+def usage_percent(use, total):
+    try:
+        ret = (float(use) / total) * 100
+    except ZeroDivisionError:
+        raise Exception("ERROR - zero division error")
+    return ret
+
+
+# 获取磁盘根目录占用信息
+def disk_info():
+    statvfs = os.statvfs('/')       # 根目录信息 可根据情况修改
+    total_disk_space = statvfs.f_frsize * statvfs.f_blocks
+    free_disk_space = statvfs.f_frsize * statvfs.f_bfree
+    disk_usage = (total_disk_space - free_disk_space) * 100.0 / total_disk_space
+    disk_usage = int(disk_usage)
+    # disk_tip = "硬盘空间使用率（最大100%）：" + str(disk_usage) + "%"
+    # print(str(disk_usage))
+    return str(disk_usage)
+
+
+# 获取内存占用信息
+def mem_info():
+    mem_usage = get_mem_usage_percent()
+    mem_usage = int(mem_usage[0])
+    # mem_tip = "物理内存使用率（最大100%）：" + str(mem_usage) + "%"
+    # print(str(mem_usage))
+    return str(mem_usage)
+
+
+# 获取CPU占用信息
+def cpu_info():
+    cpu_usage = int(get_cpu() * 100)
+    # cpu_tip = "CPU使用率（最大100%）：" + str(cpu_usage) + "%"
+    # print(str(cpu_usage))
+    return str(cpu_usage)
+
+
+# 获取系统占用信息
+def sys_info():
+    load_average = os.getloadavg()
+    # print(len(load_average))
+    # load_tip = "系统负载（三个数值中有一个超过3就是高）：" + str(load_average)
+    return len(load_average)
+
+
+# 获取计算机当前时间
+def time_info():
+    now_time = time.strftime('%Y-%m-%d %H:%M:%S')
+    return "主机的当前时间：%s" % now_time
+
+
+# 获取计算机主机名称
+def hostname_info():
+    hostnames = os.popen("hostname").read().strip()
+    return "你的主机名是: %s" % hostnames
+
+
+# 获取IP地址信息
+def ip_info():
+    ipadd = os.popen("ip a| grep ens192 | grep inet | awk '{print $2}'").read().strip()
+    return ipadd
+
+
+# 获取根的占用信息
+def disk_info_root():
+    child = subprocess.Popen(["df", "-h"], stdout=subprocess.PIPE)
+    out = child.stdout.readlines()
+
+    for item in out:
+        line = item.strip().split()
+        # 我这里只查看centos的根
+        if '/dev/mapper/centos-root' in line:
+            title = [u'-文件系统-', u'--容量-', u'-已用-', u'-可用-', u'-已用-', u'-挂载点--']
+            content = "\t".join(title)
+            if eval(line[4][0:-1]) > 60:
+                line[0] = 'centos-root'
+                content += '\r\n' + '\t'.join(line)
+                return content
+
+
+# 测试程序
+# if __name__ == "__main__":
+#     disk_information = disk_info()
+#     disk_usage = [int(s) for s in re.findall(r'\b\d+\b', disk_information)]
+#     infomation = [hostname_info(), time_info(), disk_information]
+    # print(disk_usage)
+    # # 如果磁盘占用高于60%就发邮件告警
+    # if disk_usage[0] > 60:
+    #     print("当前磁盘占用率已超过60%，建议清除磁盘内存！")
+    #
+    # # print(hostname_info())
+    # # print(time_info())
+    # # print(ip_info())
+    # print(sys_info())
+    # print(cpu_info())
+    # print(mem_info())
+    # print(disk_info())
--- a/FastText-Model/app/main_server.py
+++ b/FastText-Model/app/main_server.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File    : main_server.py
+# @Time    : 2023/3/31 10:31
+# @Author  : bruxelles_li
+# @Software: PyCharm
+import logging
+import requests
+import threading
+import sys
+import time, os
+import json
+import pandas as pd
+import glob
+from pathlib import Path
+sys.path.append('../')
+# 关闭多余连接
+s = requests.session()
+s.keep_alive = False
+from classification.runner.runner_fast_text import FastTextRunner_train
+from detector_source import sys_info, cpu_info, mem_info
+from classification.data.data_process import pro_data
+# 定义日志输出格式
+formatter = logging.Formatter("%(asctime)s [%(levelname)s] <%(processName)s> (%(threadName)s) %(message)s")
+# 创建一个logger, 并设置日志级别
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+# 创建一个handler，用于将日志输出到控制台，并设置日志级别
+ch = logging.StreamHandler()
+ch.setLevel(logging.INFO)
+ch.setFormatter(formatter)
+# 创建一个filehandler，用于将错误日志输出到文件，并设置日志级别
+_tmp_path = os.path.dirname(os.path.abspath(__file__))
+# print(_tmp_path)
+_tmp_path = os.path.join(_tmp_path, 'log')
+Path(_tmp_path).mkdir(parents=True, exist_ok=True)
+fh = logging.FileHandler(os.path.join(_tmp_path, "main_server_error.log"))
+fh1 = logging.FileHandler(os.path.join(_tmp_path, "main_server_info.log"))
+fh.setLevel(level=logging.ERROR)
+fh1.setLevel(level=logging.INFO)
+fh.setFormatter(formatter)
+fh1.setFormatter(formatter)
+# 同时将日志输出到控制台和文件
+logger.addHandler(ch)
+logger.addHandler(fh)
+logger.addHandler(fh1)
+# 定义训练配置文件
+train_config_path = '../classification/config/fasttext_config_train.yml'
+
+# todo: 定义处理数据相关路径
+root_path = r'../word2vec/doc_similarity/'
+stop_words_path = os.path.join(root_path, 'stop_words.txt')
+save_data_path = r'../datasets/classification/{}/{}/{}.txt'
+file_types = ['xls', 'xlsx']
+# 加载java回调接口
+java_call_back_url = "http://192.168.1.82:9988/manage/algorithmModel/process/changeStatus"
+# 加载端口号
+port = 4005
+modelName = "FastText-Model"
+
+# TODO: 定义进程存放列表
+all_thread = []
+
+
+def merge_df(dataset_path):
+    all_files = []
+    for file_type in file_types:
+        all_files.extend(glob.glob(os.path.join(dataset_path, f'*.{file_type}')))
+
+    # 将所有文件合并到一个DataFrame中
+    combined_df = pd.concat([pd.read_excel(f) for f in all_files], ignore_index=True)
+    # 去除重复行
+    combined_df.drop_duplicates(keep='first', inplace=True)
+    return combined_df
+
+
+def train_model4FastText(data_path, model_path, modelProcessId, root_dataset):
+    """
+    train
+    :return:
+    """
+    combined_df = merge_df(dataset_path=root_dataset)
+    # 预处理数据
+    pro_data(dataFolderName=data_path, data_df=combined_df, stop_words_path=stop_words_path,
+             save_data_path=save_data_path, modelName=modelName)
+    logger.info("====数据预处理成功，准备进入训练阶段===")
+    # 进入训练
+    runner_train = FastTextRunner_train(config_path=train_config_path, model_train=True)
+    runner_train.train(data_path=data_path, model_path=model_path, auto_tune_duration=300)
+    dict_result = runner_train.test(data_path=data_path, model_path=model_path)
+    str_dict_result = json.dumps(dict_result, ensure_ascii=False)
+    logger.info(str_dict_result)
+    # todo: 调用java的状态更新接口返回训练后的结果
+    payload = json.dumps({
+        "id": modelProcessId,
+        "result": str_dict_result
+    })
+    # todo: 调用接口访问实施生成参数函数来生成currentTime, appId
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    r1 = requests.post(url=f"{java_call_back_url}",
+                       headers=headers, data=payload)
+
+    r1_json = json.loads(r1.text)
+    # print(r1_json)
+    logger.info(r1_json)
+    return str_dict_result
+
+
+def env_eval(modelProcessId):
+    # todo 获取资源相关信息(磁盘占用率、系统占用信息【超过3个为高】、CPU占用率、物理内存占用率)
+    # disk_usage = disk_info()
+    sys_usage = sys_info()
+    cpu_usage = cpu_info()
+    men_usage = mem_info()
+    # todo 资源不够用时，返回 False
+    if sys_usage > 10000 or cpu_usage > str(95) or men_usage > str(95):
+        # todo: 调用java的状态更新接口提示资源占用过高的结果
+        str_dict_result = {
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '模型训练失败！当前模型训练资源占用率过高，请检查系统占用信息【超过10个为高】、CPU占用率【超过85%为高】、物理内存占用率【超过85%为高】',
+            'resultData': None
+        }
+        logger.info(str_dict_result)
+        payload = json.dumps({
+            "id": modelProcessId,
+            "result": str_dict_result
+        })
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        r1 = requests.post(
+            url=f"{java_call_back_url}",
+            headers=headers, data=payload)
+
+        r1_json = json.loads(r1.text)
+        # print(r1_json)
+        logger.info(r1_json)
+        return False
+    # todo 资源够用时，返回 True
+    return True
+
+
+def system_start():
+    while True:
+        # print("=====正在进行训练服务=====")
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        r1 = requests.post(url=f'http://localhost:{int(port)}/queue_size', headers=headers)
+        r1_json = json.loads(r1.text)
+        # print(r1_json)
+        queue_left_number = r1_json['queue_left_number']
+        logger.info("当前队列任务总数：" + str(queue_left_number))
+        if queue_left_number == 0:
+            # logger.warning("队列为空！无可处理任务。")
+            time.sleep(30)
+        else:
+            for i in range(queue_left_number):
+                r2 = requests.post(url=f'http://localhost:{int(port)}/subject_consumer', headers=headers)
+                r2_json = json.loads(r2.text)
+                config_info = r2_json['data']
+                logger.info(config_info)
+                modelProcessId = config_info["modelProcessId"]
+                model_path = config_info["model_path"]
+                data_path = config_info["data_path"]
+                root_dataset = config_info["root_dataset"]
+                logger.info('##########FastText-Model###############')
+                t = threading.Thread(target=train_model4FastText,
+                                     args=(data_path, model_path, modelProcessId, root_dataset),
+                                     daemon=True)
+                while True:
+                    if env_eval(modelProcessId):
+                        break
+                    else:
+                        time.sleep(600)
+                # 启动
+                t.start()
+                all_thread.append(t)
+
+
+def system_resume():
+    """
+    恢复模型训练服务状态
+    :return:
+    """
+
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    # 清空当前服务中的队列，避免重复启动同一个模型训练
+    r1 = requests.post(url=f'http://localhost:{int(port)}/queue_size', headers=headers)
+    r1_json = r1.json()
+    logger.info('当前队列数量：%d' % r1_json['queue_left_number'])
+    if r1_json['queue_left_number'] > 0:
+        logger.info('正在消费队列，直到队列为空！')
+        while True:
+            r2 = requests.post(url=f'http://localhost:{int(port)}/subject_consumer', headers=headers)
+            r2_json = r2.json()
+            if r2_json['queue_left_number'] == 0:
+                logger.info('队列消费完毕！可放心进行模型训练 ...')
+                break
+    else:
+        logger.info('队列为空！可放心进行模型训练 ...')
+
+
+def start_up_check():
+    """
+    启动前检查
+    :return:
+    """
+    while True:
+        try:
+            headers = {
+                'Content-Type': 'application/json'
+            }
+            r0 = requests.post(url=f'http://localhost:{int(port)}/queue_size', headers=headers)
+            server_started = True
+        except requests.exceptions.ConnectionError as e:
+            server_started = False
+            logger.error("Error: ConnectionError")
+            logger.warning('服务未启动，请先启动server! 程序已退出。')
+            exit(123)
+            # logger.info('server正在尝试自启 ...')
+            # time.sleep(3)
+        if server_started:
+            logger.info("server启动成功！模型训练服务已启动...")
+            break
+
+
+if __name__ == '__main__':
+    # root_path = "../datasets/classification/zcjd_column_classify/zcjd_V0"
+    # data_df = merge_df(root_path)
+    # print(len(data_df))
+    # print(data_df)
+    # 开始启动模型训练服务
+    start_up_check()
+    logger.info('模型训练服务恢复中 ...')
+    system_resume()
+    time.sleep(30)
+    logger.info('模型训练服务恢复完成！')
+    logger.info('模型训练服务运行中 ...')
+    system_start()
+
--- a/FastText-Model/base/__init__.py
+++ b/FastText-Model/base/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/7/31 10:21
--- a/FastText-Model/base/app/__init__.py
+++ b/FastText-Model/base/app/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/21 9:30
--- a/FastText-Model/base/app/base_app.py
+++ b/FastText-Model/base/app/base_app.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_app
+# @Author   : LiuYan
+# @Time     : 2021/4/21 9:30
+
+import json
+
+from flask import Flask, Blueprint, request
+
+from utils.log import logger
+
+app = Flask(__name__)
--- a/FastText-Model/base/config/__init__.py
+++ b/FastText-Model/base/config/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/16 18:03
--- a/FastText-Model/base/config/base_config.py
+++ b/FastText-Model/base/config/base_config.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_config
+# @Author   : LiuYan
+# @Time     : 2021/4/16 18:06
+
+import os
+import pymysql
+
+from abc import abstractmethod, ABC
+
+# root_dir = '/data/lzc/zzsn_nlp_br'
+# root_dir = '/data/lzc'
+root_dir = '..'  # deploy
+db_config = {
+    'host': os.environ.get('brpa_tidb_host') if 'brpa_tidb_host' in os.environ else None,
+    'port': int(os.environ.get('brpa_tidb_port')) if 'brpa_tidb_port' in os.environ else None,
+    'user': os.environ.get('brpa_tidb_user') if 'brpa_tidb_user' in os.environ else None,
+    'password': os.environ.get('brpa_tidb_password') if 'brpa_tidb_password' in os.environ else None,
+    'database': os.environ.get('brpa_tidb_database') if 'brpa_tidb_database' in os.environ else None,
+    'charset': 'utf8mb4',
+    'cursorclass': pymysql.cursors.DictCursor
+}
+
+
+class BaseConfig(ABC):
+    @abstractmethod
+    def __init__(self):
+        super(BaseConfig, self).__init__()
+
+    @abstractmethod
+    def load_config(self):
+        """
+        Add the config you need.
+        :return: config(YamlDict)
+        """
+        pass
--- a/FastText-Model/base/config/base_config.yml
+++ b/FastText-Model/base/config/base_config.yml
+home:
+  dir: '/data/lzc'
+
+# Please set the GPU or CPU to be used for your model training in the LoadConfig object
+device: "cuda:0"
+
+# shared for multiple projects in this machine, raw data, read only
+data:
+  # base: '/data'
+  base: 'd:/data'
--- a/FastText-Model/base/data/__init__.py
+++ b/FastText-Model/base/data/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/16 18:03
--- a/FastText-Model/base/data/base_data_loader.py
+++ b/FastText-Model/base/data/base_data_loader.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_data_loader
+# @Author   : LiuYan
+# @Time     : 2021/4/19 9:37
+
+from abc import ABC, abstractmethod
+
+
+class BaseDataLoader(ABC):
+    @abstractmethod
+    def __init__(self):
+        super(BaseDataLoader, self).__init__()
+
+    @abstractmethod
+    def _load_data(self):
+        """
+        load raw data according to data config
+        :return:
+        """
+        pass
+
+    @abstractmethod
+    def load_train(self):
+        pass
+
+    @abstractmethod
+    def load_valid(self):
+        pass
+
+    @abstractmethod
+    def load_test(self):
+        pass
+
+    pass
--- a/FastText-Model/base/data/base_data_process.py
+++ b/FastText-Model/base/data/base_data_process.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_data_process
+# @Author   : LiuYan
+# @Time     : 2021/4/19 9:37
+
+from abc import ABC, abstractmethod
+
+
+class BaseDataProcess(ABC):
+    """
+    data processing
+    """
+
+    @abstractmethod
+    def __init__(self):
+        super(BaseDataProcess, self).__init__()
+
+    @abstractmethod
+    def process(self):
+        pass
--- a/FastText-Model/base/data/base_data_reader.py
+++ b/FastText-Model/base/data/base_data_reader.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_data_reader
+# @Author   : LiuYan
+# @Time     : 2021/4/19 9:37
+
+from abc import ABC, abstractmethod
+
+
+class BaseDataReader(ABC):
+    @abstractmethod
+    def __init__(self):
+        super(BaseDataReader, self).__init__()
+
+    @abstractmethod
+    def reade(self):
+        pass
+
+    @abstractmethod
+    def save(self):
+        pass
--- a/FastText-Model/base/evaluation/__init__.py
+++ b/FastText-Model/base/evaluation/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/16 18:04
--- a/FastText-Model/base/evaluation/base_evaluator.py
+++ b/FastText-Model/base/evaluation/base_evaluator.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_evaluator
+# @Author   : LiuYan
+# @Time     : 2021/4/19 10:39
+
+from abc import ABC, abstractmethod
+
+
+class BaseEvaluator(ABC):
+    @abstractmethod
+    def __init__(self):
+        super(BaseEvaluator, self).__init__()
+
+    @abstractmethod
+    def evaluate(self, dict_inputs: dict) -> tuple:
+        pass
--- a/FastText-Model/base/loss/__init__.py
+++ b/FastText-Model/base/loss/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/16 18:04
--- a/FastText-Model/base/loss/base_loss.py
+++ b/FastText-Model/base/loss/base_loss.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_loss
+# @Author   : LiuYan
+# @Time     : 2021/4/19 10:41
+
+from abc import abstractmethod
+import torch.nn as nn
+
+
+class BaseLoss(nn.Module):
+
+    def __init__(self, loss_config):
+        super(BaseLoss, self).__init__()
+        self._config = loss_config
+
+    @abstractmethod
+    def forward(self, dict_outputs: dict) -> dict:
+        pass
--- a/FastText-Model/base/model/__init__.py
+++ b/FastText-Model/base/model/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/16 18:04
--- a/FastText-Model/base/model/base_model.py
+++ b/FastText-Model/base/model/base_model.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_model
+# @Author   : LiuYan
+# @Time     : 2021/4/19 10:42
+
+from abc import ABC, abstractmethod
+import torch.nn as nn
+
+
+class BaseModel(nn.Module, ABC):
+
+    def __init__(self):
+        super(BaseModel, self).__init__()
+
+    @abstractmethod
+    def forward(self, dict_inputs: dict) -> dict:
+        pass
--- a/FastText-Model/base/runner/__init__.py
+++ b/FastText-Model/base/runner/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/16 18:04
--- a/FastText-Model/base/runner/base_runner.py
+++ b/FastText-Model/base/runner/base_runner.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_runner
+# @Author   : LiuYan
+# @Time     : 2021/4/19 10:42
+
+from abc import ABC, abstractmethod
+
+from utils.utils import timeit
+
+
+class BaseRunner(ABC):
+    """
+    Abstract definition for runner
+    """
+
+    @abstractmethod
+    def __init__(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_config(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_data(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_model(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_loss(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_optimizer(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_evaluator(self):
+        pass
+
+    @abstractmethod
+    def train(self):
+        pass
+
+    @abstractmethod
+    def _train_epoch(self, epoch: int):
+        pass
+
+    @abstractmethod
+    def _valid(self, epoch: int):
+        pass
+
+    @abstractmethod
+    def test(self):
+        pass
+
+    @abstractmethod
+    def pred(self, title: str, content: str) -> str or dict:
+        pass
+
+    @abstractmethod
+    def _display_result(self, dict_result: dict):
+        pass
+
+    @abstractmethod
+    def _save_model(self):
+        pass
+
+    @abstractmethod
+    def _load_model(self):
+        pass
+
+
+class train_BaseRunner(ABC):
+    """
+    Abstract definition for runner
+    """
+
+    @abstractmethod
+    def __init__(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_config(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_data(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_model(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_loss(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_optimizer(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_evaluator(self):
+        pass
+
+    @abstractmethod
+    def train(self):
+        pass
+
+    @abstractmethod
+    def _train_epoch(self, epoch: int):
+        pass
+
+    @abstractmethod
+    def _valid(self, data_path, model_path, epoch: int):
+        pass
+
+    @abstractmethod
+    def test(self):
+        pass
+
+    @abstractmethod
+    def pred(self, title: str, content: str) -> str or dict:
+        pass
+
+    @abstractmethod
+    def _display_result(self, dict_result: dict):
+        pass
+
+    @abstractmethod
+    def _save_model(self, model_path):
+        pass
+
+    @abstractmethod
+    def _load_model(self):
+        pass
\ No newline at end of file
--- a/FastText-Model/classification/__init__.py
+++ b/FastText-Model/classification/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/7/31 17:24
--- a/FastText-Model/classification/app/__init__.py
+++ b/FastText-Model/classification/app/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/21 9:59
--- a/FastText-Model/classification/config/__init__.py
+++ b/FastText-Model/classification/config/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/15 10:31
--- a/FastText-Model/classification/config/config_fast_text.py
+++ b/FastText-Model/classification/config/config_fast_text.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : fast_text_config
+# @Author   : LiuYan
+# @Time     : 2021/4/19 10:46
+
+import dynamic_yaml
+import torch
+
+from base.config.base_config import BaseConfig
+
+
+class FastTextConfig(BaseConfig):
+    def __init__(self, config_path):
+        super(FastTextConfig, self).__init__()
+        self._config_path = config_path
+        pass
+
+    def load_config(self):
+        with open(self._config_path, mode='r', encoding='UTF-8') as f:
+            config = dynamic_yaml.load(f)
+        config.device = torch.device(config.device if torch.cuda.is_available() else 'cpu')
+        return config
--- a/FastText-Model/classification/config/fasttext_config_pred.yml
+++ b/FastText-Model/classification/config/fasttext_config_pred.yml
+home:
+#  dir: '/home/zzsn/liuyan'  # train or test
+  dir: '../../..'              # deploy
+
+# Shared for multiple modules in the project
+project:
+  name: 'platform_project'
+  dir:
+    work: '{home.dir}/{project.name}'
+
+# Please set the GPU or CPU to be used for your model training in the LoadConfig object
+device: 'cpu'
+
+status: 'pred'   # pred / test / train
+
+# shared for multiple projects in this machine, raw data, read only
+data:
+  dir: ''
+  name: 'FastText-Model'
+  num_vocab: ~
+  num_tag: ~
+
+model:
+  name: 'Origin-Model'
+
+loss:
+  name: 'ft_loss'
+
+learn:
+  time: '2023_03_31-12_15_17'
+  dir:
+    work: '{home.dir}/model_saved/classification/{data.name}'
+    logs: '{learn.dir.work}/log'
+    saved: '{learn.dir.work}/{model.name}'
+    result: '{learn.dir.work}/data/result'
+    # save_model: '{learn.dir.saved}-{learn.time}/model.bin'
+    load_model: '{learn.dir.saved}-{learn.time}/model.bin'
--- a/FastText-Model/classification/config/fasttext_config_train.yml
+++ b/FastText-Model/classification/config/fasttext_config_train.yml
+home:
+#  dir: '/data/lzc'  # train or test
+  dir: '../../..'              # deploy
+
+# Shared for multiple modules in the project
+project:
+  name: 'platform_project'
+  dir:
+    work: '{home.dir}/{project.name}'
+
+# Please set the GPU or CPU to be used for your model training in the LoadConfig object
+#device: 'cpu'
+device: 'cuda:0'
+
+status: 'train'   # pred / test / train
+
+# shared for multiple projects in this machine, raw data, read only
+data:
+  dir: '../datasets/classification'
+  name: 'FastText-Model'
+  path0: '{data.dir}/{data.name}%s'
+
+  train_path: '{data.dir}/{data.name}%s/train.txt'
+  valid_path: '{data.dir}/{data.name}%s/valid.txt'
+  test_path: '{data.dir}/{data.name}%s/valid.txt'
+
+  batch_size: 4
+  num_vocab: ~
+  num_tag: ~
+
+model:
+  name: 'Origin-Model'
+
+loss:
+  name: 'ft_loss'
+
+learn:
+  time: '2023_03_31-12_15_17'
+  dir:
+    work: '{home.dir}/model_saved/classification'
+    logs: '{learn.dir.work}/log'
+    saved0: '{learn.dir.work}%s'
+    saved: '{learn.dir.work}/{data.name}%s'
+    result: '{learn.dir.work}/data/result'
+    # save_model: '{learn.dir.saved}-{learn.time}/model.bin'
+    load_model: '{learn.dir.saved}-{learn.time}/model.bin'
--- a/FastText-Model/classification/data/__init__.py
+++ b/FastText-Model/classification/data/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/15 10:31
--- a/FastText-Model/classification/data/data_process.py
+++ b/FastText-Model/classification/data/data_process.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : data_process
+# @Author   : bruxellse_li
+# @Time     : 2023/3/31 08:39
+
+import os
+import pandas as pd
+import sys
+
+from pathlib import Path
+from pandas import DataFrame
+from sklearn.model_selection import train_test_split
+# 追加工作路径
+sys.path.append('../../')
+from classification.utils.utils import *
+
+
+def process_txt(data_loader: DataFrame, train_file_path: str, valid_file_path: str, stop_words_path:str):
+    articles = data_loader['article']
+    labels = data_loader['label']
+
+    article_list = []
+    for article, label in zip(articles, labels):
+        if type(article) is str:
+            text = article.replace('\n', '').replace('\r', '').replace('\t', '')
+        else:
+            print('{} is not str!'.format(article))
+            continue
+        text = seg(text=text, sw=stop_words(path=stop_words_path))
+        text = '__label__{} {}'.format(label, text)
+        article_list.append(text)
+
+    train_data, valid_data = train_test_split(
+        article_list, train_size=0.8, random_state=2021, shuffle=True
+    )
+    with open(
+        train_file_path, 'w', encoding='utf-8'
+    ) as train_file, open(
+        valid_file_path, 'w', encoding='utf-8'
+    ) as valid_file:
+        for train in train_data:
+            train_file.write(train + '\n')
+        for valid in valid_data:
+            valid_file.write(valid + '\n')
+    pass
+
+
+def process(data_loader, train_file_path: str, valid_file_path: str, stop_words_path: str):
+    # 创建语料路径
+    # Path(os.path.abspath(os.path.join(train_file_path, os.path.pardir))).mkdir(parents=True, exist_ok=True)
+
+    # data_loader = pd.read_excel(path, keep_default_na=False).astype(str)
+    data_loader['article'] = data_loader['title'] + '。' + data_loader['content']
+    data_loader['article'] = data_loader.article.apply(clean_tag).apply(clean_txt)
+    process_txt(
+        data_loader=data_loader,
+        train_file_path=train_file_path,
+        valid_file_path=valid_file_path,
+        stop_words_path=stop_words_path
+    )
+    return None
+
+
+# 语料处理函数定义
+def pro_data(modelName, dataFolderName, data_df, stop_words_path, save_data_path):
+    # save_data_path = '/home/python/lzc/datasets/classification/{}/{}/{}.txt'
+    process(
+        data_loader=data_df,
+        train_file_path=save_data_path.format(modelName, dataFolderName, 'train'),
+        valid_file_path=save_data_path.format(modelName, dataFolderName, 'valid'),
+        stop_words_path=stop_words_path
+    )
+    return None
+
+
+if __name__ == '__main__':
+    modelName, dataFolderName, data_path = "gzdt_dataset", "gzdt_V1", "../../datasets/Receive_File/测试数据.xlsx"
+    save_data_path = r'../../datasets/classification/{}/{}/{}.txt'
+    root_path = r'../../word2vec/doc_similarity/'
+    stop_words_path = os.path.join(root_path, 'stop_words.txt')
+    pro_data(modelName, dataFolderName, data_path, stop_words_path, save_data_path)
+    # date = '20230329'
+    # path = '../datasets/{}_total_{}.xlsx'
+    #
+    # save_data_path = '/home/zzsn/liuyan/datasets/the_belt_and_road/classification/{}/{}_{}.txt'
+    # # 机械舆情 时事要闻栏目分类
+    # ssyw_name = 'ssyw_column_classify'
+    # # 机械舆情 国资动态栏目分类
+    # gzdt_name = 'gzdt_column_classify'
+    # # 机械舆情 上下游栏目分类
+    # sxy_name = 'sxy_column_classify'
+    # # 机械舆情 行业舆情栏目分类
+    # hyyq_name = 'hyyq_column_classify'
+    # # 机械舆情 管理动态栏目分类
+    # gldt_name = 'gldt_column_classify'
+    # # 机械舆情 龙头企业栏目分类
+    # ltqy_name = 'ltqy_column_classify'
+    # # 机械舆情 新兴领域栏目分类
+    # xxly_name = 'xxly_column_classify'
+    # # 机械舆情 综合资讯栏目分类
+    # zhzx_name = 'zhzx_column_classify'
+    # # 机械舆情 负面舆情栏目分类
+    # fmyq_name = 'fmyq_column_classify'
+    #
+    # process(
+    #     path=path.format(gzdt_name, date),
+    #     train_file_path=save_data_path.format(gzdt_name, 'train', date),
+    #     valid_file_path=save_data_path.format(gzdt_name, 'valid', date)
+    # )
+    # pass
--- a/FastText-Model/classification/data/data_stats.py
+++ b/FastText-Model/classification/data/data_stats.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : data_stats
+# @Author   : LiuYan
+# @Time     : 2021/4/15 16:52
+
+import pandas as pd
+
+from collections import Counter
+
+
+if __name__ == '__main__':
+    pass
--- a/FastText-Model/classification/evaluation/__init__.py
+++ b/FastText-Model/classification/evaluation/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/15 10:33
--- a/FastText-Model/classification/evaluation/classify_evaluator.py
+++ b/FastText-Model/classification/evaluation/classify_evaluator.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : eval_classification
+# @Author   : LiuYan
+# @Time     : 2021/4/20 21:19
+
+from collections import Counter
+from sklearn.metrics import precision_score, recall_score, f1_score
+
+from base.evaluation.base_evaluator import BaseEvaluator
+
+
+class ClassifyEvaluator(BaseEvaluator):
+
+    # def __init__(self, label_dict: dict):
+    def __init__(self):
+        super(ClassifyEvaluator, self).__init__()
+        # self._label_dict = label_dict
+        # self._count_dict = {'TP': 0}
+        pass
+
+    def evaluate(self, true_list: list, pred_list: list) -> tuple:
+        dict_result = {}
+        true_labels = Counter(true_list)
+        pred_labels = Counter(pred_list)
+        print(true_labels)
+        print(pred_labels)
+        for true_label in true_labels:
+            # print(true_labels[true_label], pred_labels[true_label])
+            dict_result[true_label] = {
+                'precision': 0,
+                'recall': 0,
+                'f1-score': 0,
+                'true_num': 0,
+                'pred_num': pred_labels[true_label],
+                'total_num': true_labels[true_label]
+            }
+
+        for true, pred in zip(true_list, pred_list):
+            if true == pred:
+                dict_result[true]['true_num'] += 1
+
+        print('\n' + ''.join('-' for i in range(89)))
+        print('label_type\t\t\tp\t\t\tr\t\t\tf1\t\t\ttrue_num\t\t\tpred_num\ttotal_num')
+        string = '{0}{1:<12.4f}{2:<12.4f}{3:<12.4f}{4:<12}{5:<12}{6:<12}'
+        true_nums, pred_nums, total_nums = 0, 0, 0
+        for label_type in dict_result:
+            true_nums += dict_result[label_type]['true_num']
+            pred_nums += dict_result[label_type]['pred_num']
+            total_nums += dict_result[label_type]['total_num']
+            p = dict_result[label_type]['true_num'] / dict_result[label_type]['pred_num'] if dict_result[label_type]['pred_num'] != 0 else 0
+            r = dict_result[label_type]['true_num'] / dict_result[label_type]['total_num'] if dict_result[label_type]['total_num'] != 0 else 0
+            f1 = 2 * p * r / (p + r) if p + r != 0 else 0
+            chunk_type_out = label_type + ''.join(
+                ' ' for i in range(20 - (((len(label_type.encode('utf-8')) - len(label_type)) // 2) + len(label_type)))
+            )
+            print(string.format(chunk_type_out, p, r, f1, dict_result[label_type]['true_num'],
+                             dict_result[label_type]['pred_num'], dict_result[label_type]['total_num']), chr(12288))
+            dict_result[label_type]['precision'] = p
+            dict_result[label_type]['recall'] = r
+            dict_result[label_type]['f1-score'] = f1
+        p = true_nums / pred_nums if pred_nums != 0 else 0
+        r = true_nums / total_nums if total_nums != 0 else 0
+        f1 = 2 * p * r / (p + r) if p + r != 0 else 0
+        print(string.format('average{}'.format(''.join(' ' for i in range(13))), p, r, f1,
+                            true_nums, pred_nums, total_nums), chr(12288))
+        print(''.join('-' for i in range(89)) + '\n')
+        dict_result['average'] = {
+            'precision': p,
+            'recall': r,
+            'f1-score': f1,
+            'true_num': true_nums,
+            'pred_num': pred_nums,
+            'total_num': total_nums
+        }
+
+        return p, r, f1, dict_result
--- a/FastText-Model/classification/model/__init__.py
+++ b/FastText-Model/classification/model/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/8/2 15:47
--- a/FastText-Model/classification/runner/__init__.py
+++ b/FastText-Model/classification/runner/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/15 10:31
--- a/FastText-Model/classification/runner/runner_fast_text.py
+++ b/FastText-Model/classification/runner/runner_fast_text.py
--- a/FastText-Model/classification/test/__init__.py
+++ b/FastText-Model/classification/test/__init__.py
+#!/usr/bin/env python 
+# -*- coding: utf-8 -*-
+# @File    : __init__.py.py
+# @Time    : 2022/1/5 18:09
+# @Author  : Mr.Ygg
+# @Software: PyCharm
--- a/FastText-Model/classification/test/test_br_pro_risk_recognition.py
+++ b/FastText-Model/classification/test/test_br_pro_risk_recognition.py
+#!/usr/bin/env python 
+# -*- coding: utf-8 -*-
+# @File    : test_br_pro_risk_recognition.py
+# @Time    : 2022/1/5 18:09
+# @Author  : Mr.Ygg
+# @Software: PyCharm
+
+from base.app.base_app import *
+from classification.runner.runner_fast_text import FastTextRunner
+from classification.utils.utils import load_risk_keywords, is_include_compound_words
+
+# 风险分类
+risk_info = [
+    '外部政治风险',
+    '主权政治风险',
+    '社会动荡风险',
+    '对华关系风险',
+    '资金风险',
+    '财政风险',
+    '汇率风险'
+    '通货膨胀风险',
+    '环保风险',
+    '法律风险',
+    '突发事件风险',
+    '项目实施风险',
+    '企业风险',
+    '其他风险'
+]
+ft_config_path = '../config/config_br_pro_risk_recognition.yml'
+runner = FastTextRunner(config_path=ft_config_path)
+
+# 招聘股票筛选模型
+ft_config_path_rc_f_zp_gp = '../config/config_rc_f_zp_gp.yml'
+runner_rc_f_zp_gp = FastTextRunner(config_path=ft_config_path_rc_f_zp_gp)
+# 项目资讯正负面信息分析模型
+ft_config_path_psa = '../config/config_br_pro_sentiment_analysis.yml'
+runner_psa = FastTextRunner(config_path=ft_config_path_psa)
+
+list_country = []
+with open('../config/country.txt', 'r', encoding='utf-8') as f:
+    lines = f.readlines()
+for line in lines:
+    list_country.append(line.strip().split('(')[0].split('（')[0])
+
+# 模型可识别的风险类型
+risk_model_info = [
+    '社会动荡风险',
+    '突发事件风险'
+]
+# 风险分类关键词
+dict_risk_keywords = load_risk_keywords('../config/risk_keywords.xlsx')
+
+
+def pred(title: str, content: str) -> dict:
+    dict_result = {
+        '风险类别1': '',
+        '风险类别2': '',
+        '风险类别3': '',
+        '风险类别4': ''
+    }
+
+    # 招聘股票筛选模型
+    result_rc_f_zp_gp = runner_rc_f_zp_gp.pred(title=title, content=content)
+    # 0: 非招聘股票 1: 招聘信息 2: 股票信息
+    bool_rc_f_zp_gp = False if result_rc_f_zp_gp == '1' else True
+    logger.info('招聘股票筛选模型: {}'.format(result_rc_f_zp_gp))
+    logger.info('招聘股票筛选模型: {}'.format(bool_rc_f_zp_gp))
+    # 正负面筛选模型
+    result_psa = runner_psa.pred(title=title, content=content)
+    bool_psa = True if result_psa == '项目负面资讯信息' else False
+    logger.info('正负面筛选模型: {}'.format(result_psa))
+    logger.info('正负面筛选模型: {}'.format(bool_psa))
+
+    # 国家识别筛选模型
+    bool_country = False
+    text = title + '。' + content[: len(content) // 5]
+    for country in list_country:
+        if country in text:
+            bool_country = True
+            logger.info('国家识别筛选模型: {}'.format(country))
+            break
+    logger.info('国家识别筛选模型: {}'.format(bool_country))
+
+    text = title + '。' + content
+    if bool_country and bool_psa:
+        """
+        1. 招聘股票筛选模型 -> 非招聘股票信息
+        2. 国家识别筛选模型 -> 一带一路相关国家
+        3. 正负面筛选模型 -> 负面信息
+        """
+        # 风险识别筛选模型
+        result = runner.pred(
+            title=title,
+            content=content
+        )
+        dict_result['风险类别1'] = result
+        dict_result['风险类别2'] = result
+        dict_result['风险类别3'] = result
+        dict_result['风险类别4'] = result
+        logger.info('风险识别筛选模型: {}'.format(result))
+        # 基于关键词的筛选模型
+        if type(result) is str and result in risk_model_info:
+            # risk_model_info所包含的风险类别需按照关键词筛选掉一些脏数据
+            bool_risk_keyword = False
+            for risk_keyword in dict_risk_keywords[result]:
+                compound_words = risk_keyword.split('+')
+                if is_include_compound_words(text=text, compound_words=compound_words):
+                    bool_risk_keyword = True
+                    break
+
+            result = result if bool_risk_keyword else '无风险'
+            dict_result['风险类别3'] = result
+            dict_result['风险类别4'] = result
+            logger.info('关键词筛选: {}'.format(bool_risk_keyword))
+            if result == '无风险':
+                dict_risk_keywords_num = {
+                    risk_keywords_key: 0 for risk_keywords_key in dict_risk_keywords
+                }
+                bool_risk_keyword, risk_category = False, result
+                for risk_keywords_key in dict_risk_keywords_num:
+                    for risk_keyword in dict_risk_keywords[risk_keywords_key]:
+                        compound_words = risk_keyword.split('+')
+                        if is_include_compound_words(text=text, compound_words=compound_words):
+                            bool_risk_keyword = True
+                            dict_risk_keywords_num[risk_keywords_key] += 1
+
+                if bool_risk_keyword:
+                    risk_category = max(dict_risk_keywords_num, key=dict_risk_keywords_num.get)
+
+                dict_result['风险类别3'] = risk_category
+                logger.info('关键词筛选后召回风险信息: {}'.format(risk_category))
+        elif type(result) is str and result == '无风险':
+            # 模型识别为无风险的信息，采用关键词召回一些有用的风险信息
+            dict_risk_keywords_num = {
+                risk_keywords_key: 0 for risk_keywords_key in dict_risk_keywords
+            }
+            # 不召回模型能识别的风险类别？ √
+            for risk_keywords_key in risk_model_info:
+                dict_risk_keywords_num.pop(risk_keywords_key) if risk_keywords_key in dict_risk_keywords_num else None
+
+            bool_risk_keyword, risk_category = False, result
+            for risk_keywords_key in dict_risk_keywords_num:
+                for risk_keyword in dict_risk_keywords[risk_keywords_key]:
+                    compound_words = risk_keyword.split('+')
+                    if is_include_compound_words(text=text, compound_words=compound_words):
+                        bool_risk_keyword = True
+                        dict_risk_keywords_num[risk_keywords_key] += 1
+
+            if bool_risk_keyword:
+                risk_category = max(dict_risk_keywords_num, key=dict_risk_keywords_num.get)
+
+            dict_result['风险类别2'] = risk_category
+            dict_result['风险类别3'] = risk_category
+            dict_result['风险类别4'] = risk_category
+            logger.info('关键词召回风险信息: {}'.format(risk_category))
+        else:
+            result = result if type(result) is str else 'error'
+            dict_result['风险类别3'] = result
+            dict_result['风险类别4'] = result
+            logger.info('ELSE 风险信息: {}'.format(result))
+    else:
+        dict_result['风险类别1'] = '无风险'
+        dict_result['风险类别2'] = '无风险'
+        dict_result['风险类别3'] = '无风险'
+        dict_result['风险类别4'] = '无风险'
+        logger.info('招聘股票|国家识别筛选: 无风险')
+
+    return dict_result
+
+
+if __name__ == '__main__':
+    import os
+    import pandas
+    root_dir = '../data/datasource/test'
+    # file_name = 'br总资讯'
+    file_name = '境外快讯_1.4'
+    df = pandas.read_excel(os.path.join(root_dir, 'input_file/{}.xlsx'.format(file_name)))
+    df.drop_duplicates(subset='标题', keep='first', inplace=True)
+    list_title = df['标题']
+    list_content = df['正文']
+    dict_risk_result = {
+        '风险类别1': [],
+        '风险类别2': [],
+        '风险类别3': [],
+        '风险类别4': []
+    }
+    list_risk, list_risk_old = [], []
+    for index, (title, content) in enumerate(zip(list_title, list_content)):
+        dict_result = pred(title=title, content=content)
+        for key in dict_risk_result:
+            dict_risk_result[key].append(dict_result[key] if key in dict_result else 'error')
+
+        result_old = runner.pred(title=title, content=content)
+        list_risk_old.append(result_old)
+        logger.info('{} / {}\n'.format(index + 1, len(list_title)))
+
+    df['风险类别_old'] = list_risk_old
+    for key in dict_risk_result:
+        df[key] = dict_risk_result[key]
+
+    df.to_excel(os.path.join(root_dir, 'output_file/{}_result_20220112_s.xlsx'.format(file_name)))
\ No newline at end of file
--- a/FastText-Model/classification/test/test_label.py
+++ b/FastText-Model/classification/test/test_label.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File    : test_label.py
+# @Time    : 2022/1/7 18:28
+# @Author  : Mr.Ygg
+# @Software: PyCharm
+
+import os
+import pandas as pd
+
+from classification.utils.utils import load_risk_keywords, is_include_compound_words
+
+root_dir = '../data/datasource/test'
+# file_name = '项目风险模型数据集_总'
+file_name = '去重_F_ZP_GP'
+
+df = pd.read_excel(os.path.join(root_dir, 'input_file/{}.xlsx'.format(file_name)))
+
+list_title = df['标题']
+list_content = df['正文']
+
+list_country = []
+with open('../config/country.txt', 'r', encoding='utf-8') as f:
+    lines = f.readlines()
+for line in lines:
+    list_country.append(line.strip().split('(')[0].split('（')[0])
+
+# 风险分类关键词
+dict_risk_keywords = load_risk_keywords('../config/risk_keywords.xlsx')
+
+list_bool_yiqing = []
+list_bool_country = []
+list_risk_key_words_category = []
+for title, content in zip(list_title, list_content):
+    if type(title) is float:
+        title = ''
+    if type(content) is float:
+        content = ''
+    # 国家识别筛选模型
+    bool_country = False
+    text = title + '。' + content[: len(content) // 5]
+    for country in list_country:
+        if country in text:
+            bool_country = True
+            list_bool_country.append('是')
+            break
+    if not bool_country:
+        list_bool_country.append('否')
+
+    text = title + '。' + content
+    # 关键词: 疫情
+    if '疫情' in text:
+        list_bool_yiqing.append('是')
+    else:
+        list_bool_yiqing.append('否')
+    # 风险关键词
+    dict_risk_keywords_num = {
+        risk_keywords_key: 0 for risk_keywords_key in dict_risk_keywords
+    }
+    bool_risk_keyword = False
+    risk_category = '无风险'
+    for risk_keywords_key in dict_risk_keywords_num:
+        for risk_keyword in dict_risk_keywords[risk_keywords_key]:
+            compound_words = risk_keyword.split('+')
+            if is_include_compound_words(text=text, compound_words=compound_words):
+                bool_risk_keyword = True
+                dict_risk_keywords_num[risk_keywords_key] += 1
+
+    if bool_risk_keyword:
+        risk_category = max(dict_risk_keywords_num, key=dict_risk_keywords_num.get)
+
+    list_risk_key_words_category.append(risk_category)
+
+
+df['是否含"疫情"关键词'] = list_bool_yiqing
+df['是否含一带一路相关国家'] = list_bool_country
+df['关键词分类'] = list_risk_key_words_category
+
+df.to_excel(os.path.join(root_dir, 'output_file/{}_result.xlsx'.format(file_name)))
\ No newline at end of file
--- a/FastText-Model/classification/test/test_label_merge.py
+++ b/FastText-Model/classification/test/test_label_merge.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File    : test_label_merge.py
+# @Time    : 2022/1/10 10:32
+# @Author  : Mr.Ygg
+# @Software: PyCharm
+
+import os
+import pandas as pd
+
+root_dir = '../data/datasource/test'
+df = pd.read_excel(os.path.join(root_dir, 'input_file/br风险模型数据集_总_20220110.xlsx'))
+list_label_1 = df['风险类别'].to_list()
+list_label_2 = df['修正风险类别'].to_list()
+list_label_3 = df['雪珂终审'].to_list()
+list_label = []
+for label_1, label_2, label_3 in zip(
+    list_label_1, list_label_2, list_label_3
+):
+    label = ''
+    if type(label_1) is str:
+        label = label_1
+    if type(label_2) is str:
+        label = label_2
+    if type(label_3) is str:
+        label = label_3
+    list_label.append(label)
+
+df['label'] = list_label
+df.to_excel(os.path.join(root_dir, 'output_file/br风险模型数据集_总_20220110.xlsx'))
\ No newline at end of file
--- a/FastText-Model/classification/test/test_merge.py
+++ b/FastText-Model/classification/test/test_merge.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File    : test_merge.py
+# @Time    : 2022/1/7 17:23
+# @Author  : Mr.Ygg
+# @Software: PyCharm
+
+import os
+import pandas as pd
+
+dict_df = {
+    '标题': [],
+    '正文': [],
+    '状态': [],
+    '类型': []
+}
+root_dir = '../data/datasource/test/input_file'
+list_file = os.listdir(root_dir)
+for file_name in list_file:
+    file_path = os.path.join(root_dir, file_name)
+    print(file_path)
+    df = pd.read_excel(file_path)
+    list_title = df['标题'].to_list()
+    list_content = df['正文'].to_list()
+    list_status = df['审核状态'].to_list()
+    list_type = df['资讯类型'].to_list()
+    dict_df['标题'].extend(list_title)
+    dict_df['正文'].extend(list_content)
+    dict_df['状态'].extend(list_status)
+    dict_df['类型'].extend(list_type)
+
+df = pd.DataFrame(dict_df)
+df.to_excel(os.path.join(root_dir, 'br总资讯.xlsx'))
+
+
--- a/FastText-Model/classification/utils/__init__.py
+++ b/FastText-Model/classification/utils/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/16 16:40
--- a/FastText-Model/classification/utils/utils.py
+++ b/FastText-Model/classification/utils/utils.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : utils
+# @Author   : LiuYan
+# @Time     : 2021/4/16 16:40
+
+import re
+import jieba
+import pandas
+
+from bs4 import BeautifulSoup
+
+
+def clean_tag(text):
+    """
+    清除网页标签
+    :param text:
+    :return:
+    """
+    bs = BeautifulSoup(str(text), 'html.parser')
+    return bs.text
+
+
+def clean_txt(raw):
+    """
+    去除表情
+    :param raw:
+    :return:
+    """
+    res = re.compile(u'[\U00010000-\U0010ffff\uD800-\uDBFF\uDC00-\uDFFF]')
+    return res.sub('', raw)
+
+
+def seg(text, sw):
+    """
+    分词，NLPTokenizer会基于全部命名实体识别和词性标注进行分词
+    :param text:
+    :param NLPTokenizer:
+    :param sw:
+    :return:
+    """
+    # text = ' '.join([i.word for i in NLPTokenizer.segment(text) if i.word.strip() and i.word not in sw])
+    text = ' '.join([i.strip() for i in jieba.cut(text) if i.strip() and i not in sw])
+    return text
+
+
+def stop_words(path: str) -> list:
+    """
+    去除停用词
+    :return:
+    """
+    with open(path, 'r', encoding='utf-8') as swf:
+        return [line.strip() for line in swf]
+
+
+def segment_para(text):
+    """
+
+    :param text:
+    :return:
+    """
+    split_pattern = re.compile(r'\n|。|？|！|\?|\!|\s')
+    global_sentences = split_pattern.split(text)
+    global_sentences = ''.join([str(i).strip() + '。' for i in global_sentences if len(i) >= 13])
+    return global_sentences
+
+
+def cut_sent(para):
+    """
+
+    :param para:
+    :return:
+    """
+    para = re.sub('([。！？\?])([^”’])', r"\1\n\2", para)  # 单字符断句符
+    para = re.sub('(\.{6})([^”’])', r"\1\n\2", para)  # 英文省略号
+    para = re.sub('(\…{2})([^”’])', r"\1\n\2", para)  # 中文省略号
+    para = re.sub('([。！？\?][”’])([^，。！？\?])', r'\1\n\2', para)
+    # 如果双引号前有终止符，那么双引号才是句子的终点，把分句符\n放到双引号后，注意前面的几句都小心保留了双引号
+    para = para.rstrip()  # 段尾如果有多余的\n就去掉它
+    return para.split("\n")
+
+
+def transform_data(text, label):
+    """
+
+    :param text:
+    :param label:
+    :return:
+    """
+    fasttext_line = '__label__{} {}'.format(label, text)
+    return fasttext_line
+
+
+def load_risk_keywords(path: str) -> dict:
+    """
+    加载风险分类关键词
+    :param path:
+    :return:
+    """
+    df = pandas.read_excel(path)
+    dict_risk_keywords = dict()
+    for key in df:
+        list_risk_keywords = []
+        list_df = df[key].to_list()
+        for keyword in list_df:
+            if type(keyword) is str:
+                list_risk_keywords.append(keyword.strip())
+        dict_risk_keywords[key] = list_risk_keywords
+
+    return dict_risk_keywords
+
+
+def is_include_compound_words(text: str, compound_words: list) -> bool:
+    """
+    文本(text)中是否包含组合词[List]
+    组合词判断有先后顺序
+    :param text:
+    :param compound_words:
+    :return: True: 是    False: 否
+    """
+    for compound_word in compound_words:
+        if compound_word not in text:
+            return False
+        else:
+            text = text[text.find(compound_word) + len(compound_word):]
+
+    return True
--- a/FastText-Model/config.json
+++ b/FastText-Model/config.json
+{
+  "port": 4005,
+  "ip": "114.116.90.53",
+  "model_name": "FastText-Model",
+  "train_url": "/platform/classification/FastText-Model/model_train/",
+  "application_url": "/platform/classification/FastText-Model/pred/",
+  "show_file_url": "/platform/operation/process/show_file/",
+  "remove_file_url": "/platform/operation/process/remove_file/",
+  "upload_file_url": "/platform/operation/process/upload_file/",
+  "publish_version_url": "/platform/operation/process/publish_version/",
+  "model_test_url": "/platform/operation/process/model_test/",
+  "dataset_saved_path": "../datasets/classification/FastText-Model",
+  "model_saved_path": "../../../model_saved/classification/FastText-Model",
+  "java_call_back_url": "http://114.115.205.50:9988/manage/algorithmModel/process/changeStatus",
+  "train_info": {
+    "modelProcessId": {
+      "paramter_name": "训练日志Id",
+      "paramter_data": "",
+      "paramter_description": "模型训练日志id"
+    },
+    "task_id": {
+      "paramter_name": "模型训练任务id",
+      "paramter_data": "",
+      "paramter_description": "模型训练任务id"
+    },
+    "learning_rate": {
+      "paramter_name": "学习率",
+      "paramter_data": 0.03,
+      "paramter_description": "学习率"
+    },
+    "gpu": {
+      "paramter_name": "GPU",
+      "paramter_data": "",
+      "paramter_description": "是否使用GPU"
+    },
+    "data_path": {
+      "paramter_name": "语料版本",
+      "paramter_data": "",
+      "paramter_description": "模型训练时用户填入参数——语料版本"
+    },
+    "model_path": {
+      "paramter_name": "模型版本",
+      "paramter_data": "",
+      "paramter_description": "模型训练时用户填入参数——模型版本"
+    }
+  },
+  "application_info": {
+    "title": {
+      "paramter_name": "文章标题",
+      "paramter_data": "",
+      "paramter_description": "文章标题"
+    },
+    "content": {
+      "paramter_name": "文章内容",
+      "paramter_data": "",
+      "paramter_description": "文章内容"
+    },
+    "id": {
+      "paramter_name": "文章id",
+      "paramter_data": "",
+      "paramter_description": "文章id"
+    }
+  },
+  "show_file_info": {
+    "file_path": {
+      "paramter_name": "查询文件的相对路径",
+      "paramter_data": "",
+      "paramter_description": "要查询的文件目录，注意这里是相对地址，eg: 查询语料保存根目录dataset_saved_path的语料情况可传入../datasets/classification/"
+    }
+  },
+  "remove_file_info": {
+    "file_path": {
+      "paramter_name": "删除文件的相对路径",
+      "paramter_data": "",
+      "paramter_description": "要删除的文件，注意这里是相对地址，eg: 删除语料保存根目录dataset_saved_path下的ssyw_column_classify语料文件夹可传入../datasets/classification/ssyw_column_classify"
+    },
+    "flag": {
+      "paramter_name": "文件删除标识",
+      "paramter_data": "",
+      "paramter_description": "删除文件还是文件夹的标识，删除文件时flag=“/”，删除文件夹时flag为空字符串"
+    }
+  },
+  "upload_file_info": {
+    "request_url": {
+      "paramter_name": "语料下载地址",
+      "paramter_data": "",
+      "paramter_description": "待上传的语料文件下载地址，当前仅支持xlsx和xls文件，且文件内容需要包含title、content、label三个字段"
+    },
+    "task_id": {
+      "paramter_name": "模型训练任务id",
+      "paramter_data": "",
+      "paramter_description": "模型训练任务id"
+    }
+  },
+  "publish_version": {
+    "trainModelName": {
+      "paramter_name": "模型版本",
+      "paramter_data": "",
+      "paramter_description": "待发布的模型版本"
+    },
+    "task_id": {
+      "paramter_name": "模型训练任务id",
+      "paramter_data": "",
+      "paramter_description": "模型训练任务id"
+    }
+  },
+  "model_test_info": {
+    "task_id": {
+      "paramter_name": "模型训练任务id",
+      "paramter_data": "",
+      "paramter_description": "模型训练任务id"
+    },
+     "trainModelName": {
+      "paramter_name": "模型版本",
+      "paramter_data": "",
+      "paramter_description": "待测试的模型版本"
+    },
+     "data_type": {
+      "paramter_name": "测试方式",
+      "paramter_data": "",
+      "paramter_description": "可选项：url地址解析标题正文|file文件"
+     },
+    "request_url": {
+      "paramter_name": "测试文件下载地址",
+      "paramter_data": "",
+      "paramter_description": "待上传的测试文件下载地址，当前仅支持xlsx和xls文件，且文件内容需要包含title、content、label三个字段"
+    }
+  }
+
+}
--- a/FastText-Model/datasets/jx_data.xlsx
+++ b/FastText-Model/datasets/jx_data.xlsx
--- a/FastText-Model/get_back_call.py
+++ b/FastText-Model/get_back_call.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import logging
+import requests
+import json
+# 加载java回调接口
+java_call_back_url = "http://192.168.1.82:9988/manage/algorithmModel/process/changeStatus"
+# 定义日志输出格式
+formatter = logging.Formatter("%(asctime)s [%(levelname)s] <%(processName)s> (%(threadName)s) %(message)s")
+# 创建一个logger, 并设置日志级别
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+import requests
+import json
+
+url = "http://192.168.1.82:9988/manage/algorithmModel/process/changeStatus"
+
+payload = json.dumps({
+  "result": "{'code': 200, 'result': '模型训练成功！模型评测指标为: precision: 100%  recall: 100%  f1-score: 100%', 'model_path': '../../../model_saved/classification/FastText-Model/11111/V0-2023_06_11-15_33_15/model.bin'}",
+  "id": "1455372078906662913"
+})
+headers = {
+  'Content-Type': 'application/json'
+}
+
+response = requests.request("POST", url, headers=headers, data=payload)
+
+print(response.text)
+
+
+
+
+
+
+
+
+
+# dict_result = {'code': 200, 'result': '模型训练成功！模型评测指标为: precision: 100%  recall: 100%  f1-score: 100%', 'model_path': '../../../model_saved/classification/FastText-Model/11111/V0-2023_06_11-15_33_15/model.bin'}
+# modelProcessId = "1455372078906662913"
+# str_dict_result = json.dumps(dict_result, ensure_ascii=False)
+# print(str_dict_result)
+# # todo: 调用java的状态更新接口返回训练后的结果
+# payload = json.dumps({
+#     "id": modelProcessId,
+#     "result": str_dict_result
+# })
+# print(payload)
+# # todo: 调用接口访问实施生成参数函数来生成currentTime, appId
+# headers = {
+#     'Content-Type': 'application/json'
+# }
+# r1 = requests.post(url="http://192.168.1.82:9988/manage/algorithmModel/process/changeStatus",
+#                    headers=headers, data=payload)
+#
+# r1_json = json.loads(r1.text)
+# # print(r1_json)
+# print(r1_json)
--- a/FastText-Model/requirements.txt
+++ b/FastText-Model/requirements.txt
+# python3.9.5
+gunicorn==20.1.0
+beautifulsoup4==4.11.1
+datasketch==1.5.3
+dynamic_yaml==1.2.3
+emoji==1.4.2
+Flask==2.0.1
+hanlp==2.1.0b3
+jieba==0.42.1
+jionlp_py39==1.3.45
+keras_bert==0.88.0
+matplotlib==3.3.4
+numpy==1.19.5
+pandas==1.1.5
+psutil==5.8.0
+PyMySQL==1.0.2
+python_Levenshtein==0.20.5
+pytorch_pretrained_bert==0.6.2
+PyYAML==5.3.1
+rarfile==4.0
+requests==2.28.1
+scikit_learn==1.1.2
+seaborn==0.11.2
+simhash==2.0.0
+tensorflow==2.6.0
+torch==1.9.0
+tqdm==4.62.2
+Werkzeug==2.2.2
+xlrd==1.1.0
+XlsxWriter==3.0.1
+protobuf==3.19.5
+Levenshtein==0.20.5
+sklearn==0.0
+fasttext==0.9.2
--- a/FastText-Model/start.sh
+++ b/FastText-Model/start.sh
+#!/bin/sh
+exec nohup gunicorn -c app/app_config.py app/app_run:app --timeout 1200 & python app/main_server.py --timeout 300 >service.log 2>&1 &
+
+
--- a/FastText-Model/utils/__init__.py
+++ b/FastText-Model/utils/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/7/31 17:36
--- a/FastText-Model/utils/build_word2vec_weights.py
+++ b/FastText-Model/utils/build_word2vec_weights.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : build_word2vec_weights
+# @Author   : LiuYan
+# @Time     : 2020/6/24 14:46
+
+from itertools import islice
+
+import numpy as np
+import torch
+from utils.utils import timeit
+
+
+@timeit
+def load_word2vec(path=None, word_vocab=None, embedding_dim=None):
+    """
+    loading word vector
+    :param path: None
+    :param word_vocab: None
+    :param embedding_dim: 768/100 bert/glove.6B.100d
+    :return: a vector corresponding to word_vocab.
+    """
+    word_vocab_dict = word_vocab.stoi
+    vectors_vocab = load_vec(path, embedding_dim=embedding_dim)
+    if '[PAD]' in vectors_vocab:
+        pad = vectors_vocab['[PAD]']
+    elif 'pad' in vectors_vocab:
+        pad = vectors_vocab['pad']
+    if '[UNK]' in vectors_vocab:
+        unk = vectors_vocab['[UNK]']
+    elif 'unk' in vectors_vocab:
+        unk = vectors_vocab['unk']
+    vocab_size = len(word_vocab)
+    embed_weights = torch.zeros(vocab_size, embedding_dim)
+    for word, index in word_vocab_dict.items():  # word and index
+        if word in vectors_vocab:
+            em = vectors_vocab[word]
+        elif word == '<pad>':
+            em = pad
+        else:
+            em = unk
+        embed_weights[index, :] = torch.from_numpy(np.array(em))
+    return embed_weights
+
+
+@timeit
+def load_vec(path=None, embedding_dim=None):
+    """
+    loading word vector
+    :param path: None
+    :param embedding_dim: 768/100 bert/glove.6B.100d
+    :return: a dictionary of word vectors
+    """
+    vectors_vocab = {}
+    with open(path, 'r', encoding='utf-8') as f:
+        for line in islice(f, 1, None):  # skip the first row
+            items = line.split()
+            char, vectors = items[0], items[-embedding_dim:]
+            vectors = [float(vector) for vector in vectors]
+            vectors_vocab[char] = vectors
+    return vectors_vocab
--- a/FastText-Model/utils/database_mysql.py
+++ b/FastText-Model/utils/database_mysql.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : database_mysql
+# @Author   : LiuYan
+# @Time     : 2021/9/14 17:51
+
+import time
+import base64
+import pymysql
+
+from base.config.base_config import db_config
+
+
+class DatabaseMySQL(object):
+    def __init__(self):
+        super(DatabaseMySQL, self).__init__()
+        self._conn = None
+        self._cursor = None
+        self._connect()
+
+    def _connect(self) -> None:
+        self._conn = pymysql.connect(**db_config)
+        self._cursor = self._conn.cursor()
+
+    def query(self, id_model_process: str) -> list:
+        # 获取表单信息
+        sql_query = 'select * from brpa_algorithm_model_process where id={};'.format(id_model_process)
+        print('SQL: {}'.format(sql_query))
+        self._cursor.execute(sql_query)
+        list_result = self._cursor.fetchall()
+        return list_result
+
+    def update(self, id_model_process: str, process_result: str, model_path: str or None, status: int,
+               update_by="'yan'", update_time=time.strftime('%Y-%m-%d %H:%M:%S')) -> None:
+        # 替换process_result内部单引号为双引号
+        process_result = process_result.replace("'", '"')
+        # Update
+        update_time = time.strftime('%Y-%m-%d %H:%M:%S')
+        sql_update = '''update brpa_algorithm_model_process 
+                        set process_result = '{}', model_path = '{}', status = {}, update_by = {}, update_time = '{}'  
+                        where id = {};'''.format(
+            process_result, model_path, status, update_by, update_time, id_model_process
+        ) if model_path else '''update brpa_algorithm_model_process 
+                        set process_result = '{}', status = {}, update_by = {}, update_time = '{}'  
+                        where id = {};'''.format(
+            process_result, status, update_by, update_time, id_model_process
+        )
+        print('SQL: {}'.format(sql_update))
+        self._cursor.execute(sql_update)
+        self._conn.commit()
+
+    def close(self) -> None:
+        self._cursor.close()
+        self._conn.close()
+
+
+if __name__ == '__main__':
+    import json
+    id_model_process = '1453295293008211969'
+    dict_result = {
+        'result': '训练成功！模型评测指标为: precision: {:.0f}%  recall: {:.0f}%  f1-score: {:.0f}%'.format(
+                    0.91111111111111 * 100,
+                    0.91111111111111 * 100,
+                    0.91111111111111 * 100
+                )
+    }
+    dbm = DatabaseMySQL()
+    list_result = dbm.query(id_model_process=id_model_process)
+    model_path = '/home/zzsn/liuyan/zzsn_nlp_br/classification/model/model_saved/fast_text-pro_info_filter-2021_10_14-18_37_50/model.bin'
+    dbm.update(id_model_process=id_model_process, process_result=dict_result['result'], model_path=model_path, status=1)
+    dict_result = {
+        'result': '训练失败！'
+    }
+    dbm.update(id_model_process='1453536215885279233', process_result=dict_result['result'], model_path=None, status=2)
+    list_result = dbm.query(id_model_process=id_model_process)
+    dbm.close()
--- a/FastText-Model/utils/log.py
+++ b/FastText-Model/utils/log.py
+#!/usr/bin/env phthon3
+# -*- coding: utf-8 -*
+# @File     : log
+# @Author   : LiuYan
+# @Time     : 2020/6/21 21:08
+
+import os
+import logging
+import logging.handlers
+
+from pathlib import Path
+
+__all__ = ['logger']
+
+# 用户配置部分 ↓
+import tqdm
+
+LEVEL_COLOR = {
+    'DEBUG': 'cyan',
+    'INFO': 'green',
+    'WARNING': 'yellow',
+    'ERROR': 'red',
+    'CRITICAL': 'red,bg_white',
+}
+STDOUT_LOG_FMT = '%(log_color)s[%(asctime)s] [%(levelname)s] [%(threadName)s] [%(filename)s:%(lineno)d] %(message)s'
+STDOUT_DATE_FMT = '%Y-%m-%d %H:%M:%S'
+FILE_LOG_FMT = '[%(asctime)s] [%(levelname)s] [%(threadName)s] [%(filename)s:%(lineno)d] %(message)s'
+FILE_DATE_FMT = '%Y-%m-%d %H:%M:%S'
+
+
+# 用户配置部分 ↑
+
+
+class ColoredFormatter(logging.Formatter):
+    COLOR_MAP = {
+        'black': '30',
+        'red': '31',
+        'green': '32',
+        'yellow': '33',
+        'blue': '34',
+        'magenta': '35',
+        'cyan': '36',
+        'white': '37',
+        'bg_black': '40',
+        'bg_red': '41',
+        'bg_green': '42',
+        'bg_yellow': '43',
+        'bg_blue': '44',
+        'bg_magenta': '45',
+        'bg_cyan': '46',
+        'bg_white': '47',
+        'light_black': '1;30',
+        'light_red': '1;31',
+        'light_green': '1;32',
+        'light_yellow': '1;33',
+        'light_blue': '1;34',
+        'light_magenta': '1;35',
+        'light_cyan': '1;36',
+        'light_white': '1;37',
+        'light_bg_black': '100',
+        'light_bg_red': '101',
+        'light_bg_green': '102',
+        'light_bg_yellow': '103',
+        'light_bg_blue': '104',
+        'light_bg_magenta': '105',
+        'light_bg_cyan': '106',
+        'light_bg_white': '107',
+    }
+
+    def __init__(self, fmt, datefmt):
+        super(ColoredFormatter, self).__init__(fmt, datefmt)
+
+    def parse_color(self, level_name):
+        color_name = LEVEL_COLOR.get(level_name, '')
+        if not color_name:
+            return ""
+
+        color_value = []
+        color_name = color_name.split(',')
+        for _cn in color_name:
+            color_code = self.COLOR_MAP.get(_cn, '')
+            if color_code:
+                color_value.append(color_code)
+
+        return '\033[' + ';'.join(color_value) + 'm'
+
+    def format(self, record):
+        record.log_color = self.parse_color(record.levelname)
+        message = super(ColoredFormatter, self).format(record) + '\033[0m'
+
+        return message
+
+
+class TqdmLoggingHandler(logging.Handler):
+    def __init__(self, level=logging.NOTSET):
+        super().__init__(level)
+
+    def emit(self, record):
+        try:
+            msg = self.format(record)
+            tqdm.tqdm.write(msg)
+            self.flush()
+        except (KeyboardInterrupt, SystemExit):
+            raise
+        except:
+            self.handleError(record)
+
+
+def _get_logger(log_to_file=True, log_filename='default.log', log_level='DEBUG'):
+    _logger = logging.getLogger(__name__)
+
+    stdout_handler = logging.StreamHandler()
+    stdout_handler.setFormatter(
+        ColoredFormatter(
+            fmt=STDOUT_LOG_FMT,
+            datefmt=STDOUT_DATE_FMT,
+        )
+    )
+    _logger.addHandler(stdout_handler)
+    # _logger.setLevel(logging.INFO)
+    # _logger.addHandler(TqdmLoggingHandler())
+
+    if log_to_file:
+        # _tmp_path = os.path.dirname(os.path.abspath(__file__))
+        # _tmp_path = os.path.join(_tmp_path, '../logs/{}'.format(log_filename))
+        _project_path = os.path.dirname(os.getcwd())
+        _tmp_path = os.path.join(_project_path, 'logs')
+        Path(_tmp_path).mkdir(parents=True, exist_ok=True)
+        _tmp_path = os.path.join(_tmp_path, log_filename)
+        file_handler = logging.handlers.TimedRotatingFileHandler(_tmp_path, when='midnight', backupCount=30)
+        file_formatter = logging.Formatter(
+            fmt=FILE_LOG_FMT,
+            datefmt=FILE_DATE_FMT,
+        )
+        file_handler.setFormatter(file_formatter)
+        _logger.addHandler(file_handler)
+
+    _logger.setLevel(log_level)
+    return _logger
+
+
+logger = _get_logger(log_to_file=False)
--- a/FastText-Model/utils/tool.py
+++ b/FastText-Model/utils/tool.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : tool
+# @Author   : LiuYan
+# @Time     : 2021/6/21 11:22
+
+import re
+import json
+
+
+def read_json(path: str) -> list:
+    f = open(path, 'r', encoding='utf-8')
+    examples = []
+    for line in f.readlines():
+        examples.append(json.loads(line))
+    f.close()
+    return examples
+
+
+def clean_text(text: str) -> str:
+    return re.sub('\n+', '\n', text.strip().replace(' ', '').replace('\t', '').replace('\r', ''))
--- a/FastText-Model/utils/utils.py
+++ b/FastText-Model/utils/utils.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : utils
+# @Author   : LiuYan
+# @Time     : 2021/4/16 17:54
+
+from __future__ import unicode_literals, print_function, division
+
+import time
+import xlsxwriter
+
+
+def timeit(f):
+    def timed(*args, **kw):
+        ts = time.time()
+        print('......begin     {0:8s}......'.format(f.__name__))
+        result = f(*args, **kw)
+        te = time.time()
+        print('......finish    {0:8s}, took:{1:.4f} sec......'.format(f.__name__, te - ts))
+        return result
+
+    return timed
+
+
+def list2xlsx(result_list: list, xlsx_path: str):
+    """
+
+    :param result_list: [
+                            {
+                                'id': 1,
+                                'title': 't',
+                                ...
+                            }
+                            ...
+                        ]
+    :param xlsx_path: '/home/zzsn/liuyan/result/result.xlsx'
+    :return:
+    """
+    workbook = xlsxwriter.Workbook(xlsx_path)
+    worksheet = workbook.add_worksheet('sheet1')
+    worksheet.write_row(row=0, col=0, data=list(result_list[0].keys()))
+
+    for row_index, result_dict in enumerate(result_list):
+        worksheet.write_row(row=row_index + 1, col=0, data=list(
+            ';'.join(result) if type(result) in [list, set] else result for result in result_dict.values()
+        ))
+
+    workbook.close()
--- a/FastText-Model/模型配置文件约定.txt
+++ b/FastText-Model/模型配置文件约定.txt
+获取语料目录情况：
+获取语料目录情况：
+真实的show_file_url地址：http://ip:port + operation_prefix + show_file_url
+真实的upload_file_url地址：http://ip:port + operation_prefix + upload_file_url
+真实的publish_version_url地址：http://ip:port + operation_prefix + publish_version_url
+真实的model_test_url地址：http://ip:port + operation_prefix + model_test_url
+真实的train_file地址：http://ip:port + train_url
+真实的application_url地址：http://ip:port + application_prefi + /pred/
+
+
+# 真实的remove_file_url地址：http://ip:port + operation_prefix + remove_file_url
+
+
+http://114.116.90.53:4004/new_task/
+
+{
+  "port": 4004,
+  "ip": "114.116.90.53",
+  "model_name": "ssyw_column_classify",
+  "operation_prefix": "/platform/operation/process",
+  "application_prefix": "/platform/classification/ssyw_column/classify",
+  "train_url": "/platform/classification/ssyw_column/classify/model_train/",
+  "application_url": "/pred/",
+  "show_file_url": "/show_file/",
+  "remove_file_url": "/remove_file/",
+  "upload_file_url": "/upload_file/",
+  "publish_version_url": "/publish_version/",
+  "model_test_url": "/model_test/",
+  "dataset_saved_path": "../datasets/classification",
+  "model_saved_path": "../../../model_saved/classification",
+  "java_call_back_url": "http://114.115.205.50:9988/manage/algorithmModel/process/changeStatus",
+  "train_info": {
+    "modelProcessId": {
+      "paramter_name": "模型任务Id",
+      "paramter_data": "",
+      "paramter_description": "模型训练任务id，关联哪个模型"
+    },
+    "learning_rate": {
+      "paramter_name": "学习率",
+      "paramter_data": 0.03,
+      "paramter_description": "学习率"
+    },
+    "epoch": {
+      "paramter_name": "训练轮数",
+      "paramter_data": 10,
+      "paramter_description": "训练轮数"
+    },
+    "gpu": {
+      "paramter_name": "GPU",
+      "paramter_data": "",
+      "paramter_description": "是否使用GPU"
+    },
+    "data_path": {
+      "paramter_name": "语料版本",
+      "paramter_data": "",
+      "paramter_description": "模型训练时用户填入参数——语料版本"
+    },
+    "model_path": {
+      "paramter_name": "模型版本",
+      "paramter_data": "",
+      "paramter_description": "模型训练时用户填入参数——模型版本"
+    }
+  },
+  "application_info": {
+    "title": {
+      "paramter_name": "文章标题",
+      "paramter_data": "",
+      "paramter_description": "文章标题"
+    },
+    "content": {
+      "paramter_name": "文章内容",
+      "paramter_data": "",
+      "paramter_description": "文章内容"
+    },
+    "id": {
+      "paramter_name": "文章id",
+      "paramter_data": "",
+      "paramter_description": "文章id"
+    }
+  },
+  "show_file_info": {
+    "file_path": {
+      "paramter_name": "查询文件的相对路径",
+      "paramter_data": "",
+      "paramter_description": "要查询的文件目录，注意这里是相对地址，eg: 查询语料保存根目录dataset_saved_path的语料情况可传入../datasets/classification/"
+    }
+  },
+  "remove_file_info": {
+    "file_path": {
+      "paramter_name": "删除文件的相对路径",
+      "paramter_data": "",
+      "paramter_description": "要删除的文件，注意这里是相对地址，eg: 删除语料保存根目录dataset_saved_path下的ssyw_column_classify语料文件夹可传入../datasets/classification/ssyw_column_classify"
+    },
+    "flag": {
+      "paramter_name": "文件删除标识",
+      "paramter_data": "",
+      "paramter_description": "删除文件还是文件夹的标识，删除文件时flag=“/”，删除文件夹时flag为空字符串"
+    }
+  },
+  "upload_file_info": {
+    "url_path": {
+      "paramter_name": "语料下载地址",
+      "paramter_data": "",
+      "paramter_description": "待上传的语料文件下载地址，当前仅支持xlsx和xls文件，且文件内容需要包含title、content、label三个字段"
+    },
+    "dataFolderName": {
+      "paramter_name": "语料版本名称",
+      "paramter_data": "",
+      "paramter_description": "待上传的语料版本名称，在训练的时候使用"
+    }
+  },
+  "publish_version": {
+    "trainModelName": {
+      "paramter_name": "模型版本",
+      "paramter_data": "",
+      "paramter_description": "待发布的模型版本"
+    },
+    "versionName": {
+      "paramter_name": "发布版本号",
+      "paramter_data": "",
+      "paramter_description": "待发布的版本号"
+    }
+  },
+  "model_test_info": {
+     "modelProcessId": {
+      "paramter_name": "模型任务Id",
+      "paramter_data": "",
+      "paramter_description": "模型训练任务id，关联哪个模型"
+    },
+     "trainModelName": {
+      "paramter_name": "模型版本",
+      "paramter_data": "",
+      "paramter_description": "待测试的模型版本"
+    },
+     "data_type": {
+      "paramter_name": "测试方式",
+      "paramter_data": "",
+      "paramter_description": "可选项：url地址解析|file文件"
+     },
+    "url_path": {
+      "paramter_name": "测试文件下载地址",
+      "paramter_data": "",
+      "paramter_description": "待上传的测试文件下载地址，当前仅支持xlsx和xls文件，且文件内容需要包含title、content、label三个字段"
+    },
+      "title": {
+      "paramter_name": "文章标题",
+      "paramter_data": "",
+      "paramter_description": "文章标题"
+    },
+    "content": {
+      "paramter_name": "文章内容",
+      "paramter_data": "",
+      "paramter_description": "文章内容"
+    }
+  }
+
+}
\ No newline at end of file
--- a/服务器信息访问/main_model.py
+++ b/服务器信息访问/main_model.py
+#!/usr/bin/env python3
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import socket
+import os
+import psutil
+
+
+# 获取CPU负载信息
+def get_cpu():
+    last_worktime = 0
+    last_idletime = 0
+    f = open("/proc/stat", "r")
+    line = ""
+    while not "cpu " in line: line = f.readline()
+    f.close()
+    spl = line.split(" ")
+    worktime = int(spl[2]) + int(spl[3]) + int(spl[4])
+    idletime = int(spl[5])
+    dworktime = (worktime - last_worktime)
+    didletime = (idletime - last_idletime)
+    rate = float(dworktime) / (didletime + dworktime)
+    last_worktime = worktime
+    last_idletime = idletime
+    if (last_worktime == 0): return 0
+    return rate
+
+
+def get_hostname():
+    return socket.gethostname()
+
+
+def get_uptime():
+    with open('/proc/uptime', 'r') as f:
+        uptime_seconds = float(f.readline().split()[0])
+        uptime_minutes, uptime_seconds = divmod(uptime_seconds, 60)
+        uptime_hours, uptime_minutes = divmod(uptime_minutes, 60)
+        uptime_days, uptime_hours = divmod(uptime_hours, 24)
+        return f"{int(uptime_days)} days, {int(uptime_hours)} hours, {int(uptime_minutes)} minutes, {int(uptime_seconds)} seconds"
+
+
+def get_kernel_version():
+    return os.uname().release
+
+
+# 获取CPU占用信息
+def get_cpu_info():
+    cpu_usage = int(get_cpu() * 100)
+    # cpu_tip = "CPU使用率（最大100%）：" + str(cpu_usage) + "%"
+    # print(str(cpu_usage))
+    return str(cpu_usage)
+
+
+def get_memory_info():
+    memory_info = psutil.virtual_memory()
+    return f"Total memory: {memory_info.total / 1024 / 1024:.2f} MB\nUsed memory: {memory_info.used / 1024 / 1024:.2f} MB\nFree memory: {memory_info.available / 1024 / 1024:.2f} MB"
+
+
+def get_disk_usage():
+    partitions = psutil.disk_partitions()
+    disk_usage = ""
+    for partition in partitions:
+        usage = psutil.disk_usage(partition.mountpoint)
+        disk_usage += f"{partition.mountpoint} - Total: {usage.total / 1024 / 1024:.2f} MB, Used: {usage.used / 1024 / 1024:.2f} MB, Free: {usage.free / 1024 / 1024:.2f} MB\n"
+    return disk_usage
+
+
+def get_network_interfaces():
+    interfaces = psutil.net_if_addrs()
+    network_interfaces = ""
+    for interface_name, interface_addresses in interfaces.items():
+        network_interfaces += f"{interface_name}\n"
+        for address in interface_addresses:
+            if address.family == socket.AF_INET:
+                network_interfaces += f"  IP address: {address.address}\n"
+                network_interfaces += f"  Netmask: {address.netmask}\n"
+            elif address.family == socket.AF_PACKET:
+                network_interfaces += f"  MAC address: {address.address}\n"
+    return network_interfaces
+
+
+def main_pro():
+    hostname = get_hostname()
+    UpTime = get_uptime()
+    KN_Version = get_kernel_version()
+    CPU_Info = get_cpu_info()
+    Memory_Info = get_memory_info()
+    Disk_Usage = get_disk_usage()
+    Network_Interfaces = get_network_interfaces()
+    dict_result = {
+        "HostName": hostname,
+        "UpTime": UpTime,
+        "KN_Version": KN_Version,
+        "CPU_Info": CPU_Info,
+        "Memory_Info": Memory_Info,
+        "Disk_Usage": Disk_Usage,
+        "Network_Interfaces": Network_Interfaces
+    }
+    return dict_result
+
+
+if __name__ == "__main__":
+    print(f"Hostname: {get_hostname()}")
+    print(f"Uptime: {get_uptime()}")
+    print(f"Kernel version: {get_kernel_version()}")
+    print(f"CPU information:\n{get_cpu_info()}")
+    print(f"Memory information:\n{get_memory_info()}")
+    print(f"Disk usage:\n{get_disk_usage()}")
+    print(f"Network interfaces:\n{get_network_interfaces()}")
--- a/模型任务/main_app.py
+++ b/模型任务/main_app.py
+#!/usr/bin/env python3
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+创建模型任务功能
+http://114.116.90.53:4004/new_task/
+"""
+import os
+import sys, json
+import logging
+import requests
+import argparse
+import queue
+from pathlib import Path
+from flask import Flask, jsonify, request
+from main_model import main_info
+import re
+os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
+os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
+logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] <%(processName)s> (%(threadName)s) %('
+                                               'message)s')
+logger = logging.getLogger(__name__)
+app = Flask(__name__)
+
+# todo: 基于文件名来表示模型名称
+root_path = "../"
+
+# 跨域支持1
+from flask_cors import CORS
+
+CORS(app, supports_credentials=True)
+
+
+@app.route('/', methods=['POST'])
+def hello_world():
+    app.logger.info('请选择正确的方式上传!')
+    return '请选择正确的方式上传!'
+
+
+@app.route(f'/get_server_info/', methods=['GET', 'POST'])
+def get_server_info():
+    dict_result = main_info()
+    app.logger.info(dict_result)
+    return json.dumps(dict_result, ensure_ascii=False)
+
+
+@app.route(f'/new_task/', methods=['POST'])
+def build_task():
+    try:
+        params = json.loads(request.data.decode('utf-8'))
+        modelName = params["modelName"]
+        modelPath = os.path.join(root_path, modelName)
+        if modelName:
+            # 获取目录下的config.json文件信息返回
+            config_path = os.path.join(modelPath, "config.json")
+            config_json = json.load(open(config_path, 'r', encoding='utf-8'))
+            dict_result = {
+                "code": 200,
+                'handleMsg': 'Success',
+                'logs': None,
+                "resultData": config_json
+            }
+        else:
+            dict_result = {
+                "code": 500,
+                'handleMsg': 'Failure',
+                'logs': None,
+                "resultData": "请选择模型管理中存在的模型来进行创建模型任务！"
+            }
+
+    except Exception as e:
+        dict_result = {
+            'code': 500,
+            'success': 'false',
+            'message': "操作失败" + str(e),
+            'result': None
+        }
+
+    app.logger.info(dict_result)
+    return json.dumps(dict_result, ensure_ascii=False)
+
+
+if __name__ == '__main__':
+    app.config['JSON_AS_ASCII'] = False
+    app.config['JSONIFY_MIMETYPE'] = "application/json;charset=utf-8"
+    app.run(host='0.0.0.0', port=4004, debug=False)
--- a/模型任务/main_model.py
+++ b/模型任务/main_model.py
+#!/usr/bin/env python3
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import socket
+import os
+import psutil
+import platform
+
+
+# 获取CPU负载信息
+def get_cpu():
+    last_worktime = 0
+    last_idletime = 0
+    f = open("/proc/stat", "r")
+    line = ""
+    while not "cpu " in line: line = f.readline()
+    f.close()
+    spl = line.split(" ")
+    worktime = int(spl[2]) + int(spl[3]) + int(spl[4])
+    idletime = int(spl[5])
+    dworktime = (worktime - last_worktime)
+    didletime = (idletime - last_idletime)
+    rate = float(dworktime) / (didletime + dworktime)
+    last_worktime = worktime
+    last_idletime = idletime
+    if (last_worktime == 0): return 0
+    return rate
+
+
+def get_hostname():
+    return socket.gethostname()
+
+
+def get_uptime():
+    with open('/proc/uptime', 'r') as f:
+        uptime_seconds = float(f.readline().split()[0])
+        uptime_minutes, uptime_seconds = divmod(uptime_seconds, 60)
+        uptime_hours, uptime_minutes = divmod(uptime_minutes, 60)
+        uptime_days, uptime_hours = divmod(uptime_hours, 24)
+        return f"{int(uptime_days)} days, {int(uptime_hours)} hours, {int(uptime_minutes)} minutes, {int(uptime_seconds)} seconds"
+
+
+def get_kernel_version():
+    return os.uname().release
+
+
+# 获取CPU占用信息
+def get_cpu_info():
+    cpu_usage = int(get_cpu() * 100)
+    # cpu_tip = "CPU使用率（最大100%）：" + str(cpu_usage) + "%"
+    # print(str(cpu_usage))
+    return str(cpu_usage)
+
+
+def get_memory_info():
+    memory_info = psutil.virtual_memory()
+    return f"Total memory: {memory_info.total / 1024 / 1024:.2f} MB\nUsed memory: {memory_info.used / 1024 / 1024:.2f} MB\nFree memory: {memory_info.available / 1024 / 1024:.2f} MB"
+
+
+def get_disk_usage():
+    partitions = psutil.disk_partitions()
+    disk_usage = ""
+    for partition in partitions:
+        usage = psutil.disk_usage(partition.mountpoint)
+        disk_usage += f"{partition.mountpoint} - Total: {usage.total / 1024 / 1024:.2f} MB, Used: {usage.used / 1024 / 1024:.2f} MB, Free: {usage.free / 1024 / 1024:.2f} MB\n"
+    return disk_usage
+
+
+def get_network_interfaces():
+    interfaces = psutil.net_if_addrs()
+    network_interfaces = ""
+    for interface_name, interface_addresses in interfaces.items():
+        network_interfaces += f"{interface_name}\n"
+        for address in interface_addresses:
+            if address.family == socket.AF_INET:
+                network_interfaces += f"  IP address: {address.address}\n"
+                network_interfaces += f"  Netmask: {address.netmask}\n"
+            elif address.family == socket.AF_PACKET:
+                network_interfaces += f"  MAC address: {address.address}\n"
+    return network_interfaces
+
+
+def get_public_ip():
+    """
+    获取公网IP地址
+    """
+    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+    s.connect(("8.8.8.8", 80))
+    ip = s.getsockname()[0]
+    s.close()
+    return ip
+
+
+def main_pro():
+    hostname = get_hostname()
+    # UpTime = get_uptime()
+    KN_Version = get_kernel_version()
+    CPU_Info = get_cpu_info()
+    Memory_Info = get_memory_info()
+    Disk_Usage = get_disk_usage()
+    ip = get_public_ip()
+    dict_result = {
+        "HostName": hostname,
+        # "UpTime": UpTime,
+        "KN_Version": KN_Version,
+        "CPU_Info": CPU_Info,
+        "Memory_Info": Memory_Info,
+        "Disk_Usage": Disk_Usage,
+        "Network_Interfaces": ip
+    }
+    return dict_result
+
+
+def main_info():
+    # 获取操作系统信息
+    os_info = platform.platform()
+
+    # 获取处理器信息
+    processor_info = platform.processor()
+
+    # 获取可用内存大小
+    mem_info = psutil.virtual_memory()
+    available_mem = round(mem_info.available / 1024 / 1024, 2)
+
+    # 获取可用硬盘大小
+    disk_info = psutil.disk_usage('/')
+    available_disk = round(disk_info.free / 1024 / 1024, 2)
+
+    # 获取私有ip
+    ip = get_public_ip()
+
+    # 打印机器信息
+    print("操作系统：", os_info)
+    print("处理器型号：", processor_info)
+    print("可用内存大小：", available_mem, "MB")
+    print("可用硬盘大小：", available_disk, "MB")
+    print("ip地址：", ip)
+    dict_result = {
+        "操作系统：": os_info,
+        "处理器型号：": processor_info,
+        "可用内存大小：": available_mem,
+        "可用硬盘大小：": available_disk,
+        "ip地址：": "114.116.90.53"
+    }
+    return dict_result
+
+
+if __name__ == "__main__":
+    main_info()
+    # import requests
+    #
+    # response = requests.get('https://api.ipify.org')
+    # public_ip = response.text
+    #
+    # print(public_ip)
+
+    # dict_result = main_pro()
+    # print(dict_result)