平台模型管理

91d93313 · bruxellse_li · 6291eec9 · 91d93313 · 91d93313 · 91d93313
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/.idea/Model-Management.iml
+++ b/.idea/Model-Management.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
--- a/.idea/deployment.xml
+++ b/.idea/deployment.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
+    <serverData>
+      <paths name="python@180.76.177.55:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.130.239:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.141.81:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.141.81:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.141.81:22 password (1)">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.9.59:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.90.53:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.90.53:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.90.53:22 password (1)">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="zzsn@192.168.1.149:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="zzsn@192.168.1.149:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+    </serverData>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="49">
+            <item index="0" class="java.lang.String" itemvalue="pandas" />
+            <item index="1" class="java.lang.String" itemvalue="tqdm" />
+            <item index="2" class="java.lang.String" itemvalue="transformers" />
+            <item index="3" class="java.lang.String" itemvalue="sentencepiece" />
+            <item index="4" class="java.lang.String" itemvalue="keras" />
+            <item index="5" class="java.lang.String" itemvalue="gevent" />
+            <item index="6" class="java.lang.String" itemvalue="torch" />
+            <item index="7" class="java.lang.String" itemvalue="numpy" />
+            <item index="8" class="java.lang.String" itemvalue="Flask" />
+            <item index="9" class="java.lang.String" itemvalue="thulac" />
+            <item index="10" class="java.lang.String" itemvalue="beautifulsoup4" />
+            <item index="11" class="java.lang.String" itemvalue="fdfs_client" />
+            <item index="12" class="java.lang.String" itemvalue="pymysql" />
+            <item index="13" class="java.lang.String" itemvalue="selenium" />
+            <item index="14" class="java.lang.String" itemvalue="matplotlib" />
+            <item index="15" class="java.lang.String" itemvalue="pyecharts" />
+            <item index="16" class="java.lang.String" itemvalue="requests" />
+            <item index="17" class="java.lang.String" itemvalue="docx" />
+            <item index="18" class="java.lang.String" itemvalue="flask_sqlalchemy" />
+            <item index="19" class="java.lang.String" itemvalue="scikit_learn" />
+            <item index="20" class="java.lang.String" itemvalue="gensim" />
+            <item index="21" class="java.lang.String" itemvalue="sentence_transformers" />
+            <item index="22" class="java.lang.String" itemvalue="elasticsearch" />
+            <item index="23" class="java.lang.String" itemvalue="nltk" />
+            <item index="24" class="java.lang.String" itemvalue="symspellpy" />
+            <item index="25" class="java.lang.String" itemvalue="wordcloud" />
+            <item index="26" class="java.lang.String" itemvalue="concurrent_log_handler" />
+            <item index="27" class="java.lang.String" itemvalue="setuptools" />
+            <item index="28" class="java.lang.String" itemvalue="gunicorn" />
+            <item index="29" class="java.lang.String" itemvalue="jieba" />
+            <item index="30" class="java.lang.String" itemvalue="flask" />
+            <item index="31" class="java.lang.String" itemvalue="flak_cors" />
+            <item index="32" class="java.lang.String" itemvalue="paddle" />
+            <item index="33" class="java.lang.String" itemvalue="bert_serving" />
+            <item index="34" class="java.lang.String" itemvalue="certifi" />
+            <item index="35" class="java.lang.String" itemvalue="SQLAlchemy" />
+            <item index="36" class="java.lang.String" itemvalue="xlrd" />
+            <item index="37" class="java.lang.String" itemvalue="bert_serving_client" />
+            <item index="38" class="java.lang.String" itemvalue="pytime" />
+            <item index="39" class="java.lang.String" itemvalue="goose3" />
+            <item index="40" class="java.lang.String" itemvalue="Flask_Cors" />
+            <item index="41" class="java.lang.String" itemvalue="paddlepaddle" />
+            <item index="42" class="java.lang.String" itemvalue="trustai" />
+            <item index="43" class="java.lang.String" itemvalue="paddle_serving_client" />
+            <item index="44" class="java.lang.String" itemvalue="tritonclient" />
+            <item index="45" class="java.lang.String" itemvalue="paddle_serving_server" />
+            <item index="46" class="java.lang.String" itemvalue="paddlenlp" />
+            <item index="47" class="java.lang.String" itemvalue="openai" />
+            <item index="48" class="java.lang.String" itemvalue="feedparser" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
\ No newline at end of file
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Model-Management.iml" filepath="$PROJECT_DIR$/.idea/Model-Management.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
--- a/FastText-Model/.idea/.gitignore
+++ b/FastText-Model/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/FastText-Model/.idea/FastText-Model.iml
+++ b/FastText-Model/.idea/FastText-Model.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
+    </content>
+    <orderEntry type="jdk" jdkName="Remote Python 3.9.5 (sftp://root@114.116.90.53:22/home/python/anaconda3/envs/JXYQ@py39/bin/python3.9)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+  <component name="TemplatesService">
+    <option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
+  </component>
+</module>
\ No newline at end of file
--- a/FastText-Model/.idea/deployment.xml
+++ b/FastText-Model/.idea/deployment.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="PublishConfigData" autoUpload="On explicit save action" serverName="FastText-Model" remoteFilesAllowedToDisappearOnAutoupload="false" autoUploadExternalChanges="true">
+    <serverData>
+      <paths name="FastText-Model">
+        <serverdata>
+          <mappings>
+            <mapping deploy="/" local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="python@180.76.177.55:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.130.239:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.141.81:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.141.81:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.115.141.81:22 password (1)">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.9.59:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.90.53:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.90.53:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@114.116.90.53:22 password (1)">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="zzsn@192.168.1.149:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="zzsn@192.168.1.149:22 password">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+    </serverData>
+    <option name="myAutoUpload" value="ON_EXPLICIT_SAVE" />
+  </component>
+</project>
\ No newline at end of file
--- a/FastText-Model/.idea/inspectionProfiles/Project_Default.xml
+++ b/FastText-Model/.idea/inspectionProfiles/Project_Default.xml
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="49">
+            <item index="0" class="java.lang.String" itemvalue="pandas" />
+            <item index="1" class="java.lang.String" itemvalue="tqdm" />
+            <item index="2" class="java.lang.String" itemvalue="transformers" />
+            <item index="3" class="java.lang.String" itemvalue="sentencepiece" />
+            <item index="4" class="java.lang.String" itemvalue="keras" />
+            <item index="5" class="java.lang.String" itemvalue="gevent" />
+            <item index="6" class="java.lang.String" itemvalue="torch" />
+            <item index="7" class="java.lang.String" itemvalue="numpy" />
+            <item index="8" class="java.lang.String" itemvalue="Flask" />
+            <item index="9" class="java.lang.String" itemvalue="thulac" />
+            <item index="10" class="java.lang.String" itemvalue="beautifulsoup4" />
+            <item index="11" class="java.lang.String" itemvalue="fdfs_client" />
+            <item index="12" class="java.lang.String" itemvalue="pymysql" />
+            <item index="13" class="java.lang.String" itemvalue="selenium" />
+            <item index="14" class="java.lang.String" itemvalue="matplotlib" />
+            <item index="15" class="java.lang.String" itemvalue="pyecharts" />
+            <item index="16" class="java.lang.String" itemvalue="requests" />
+            <item index="17" class="java.lang.String" itemvalue="docx" />
+            <item index="18" class="java.lang.String" itemvalue="flask_sqlalchemy" />
+            <item index="19" class="java.lang.String" itemvalue="scikit_learn" />
+            <item index="20" class="java.lang.String" itemvalue="gensim" />
+            <item index="21" class="java.lang.String" itemvalue="sentence_transformers" />
+            <item index="22" class="java.lang.String" itemvalue="elasticsearch" />
+            <item index="23" class="java.lang.String" itemvalue="nltk" />
+            <item index="24" class="java.lang.String" itemvalue="symspellpy" />
+            <item index="25" class="java.lang.String" itemvalue="wordcloud" />
+            <item index="26" class="java.lang.String" itemvalue="concurrent_log_handler" />
+            <item index="27" class="java.lang.String" itemvalue="setuptools" />
+            <item index="28" class="java.lang.String" itemvalue="gunicorn" />
+            <item index="29" class="java.lang.String" itemvalue="jieba" />
+            <item index="30" class="java.lang.String" itemvalue="flask" />
+            <item index="31" class="java.lang.String" itemvalue="flak_cors" />
+            <item index="32" class="java.lang.String" itemvalue="paddle" />
+            <item index="33" class="java.lang.String" itemvalue="bert_serving" />
+            <item index="34" class="java.lang.String" itemvalue="certifi" />
+            <item index="35" class="java.lang.String" itemvalue="SQLAlchemy" />
+            <item index="36" class="java.lang.String" itemvalue="xlrd" />
+            <item index="37" class="java.lang.String" itemvalue="bert_serving_client" />
+            <item index="38" class="java.lang.String" itemvalue="pytime" />
+            <item index="39" class="java.lang.String" itemvalue="goose3" />
+            <item index="40" class="java.lang.String" itemvalue="Flask_Cors" />
+            <item index="41" class="java.lang.String" itemvalue="paddlepaddle" />
+            <item index="42" class="java.lang.String" itemvalue="trustai" />
+            <item index="43" class="java.lang.String" itemvalue="paddle_serving_client" />
+            <item index="44" class="java.lang.String" itemvalue="tritonclient" />
+            <item index="45" class="java.lang.String" itemvalue="paddle_serving_server" />
+            <item index="46" class="java.lang.String" itemvalue="paddlenlp" />
+            <item index="47" class="java.lang.String" itemvalue="openai" />
+            <item index="48" class="java.lang.String" itemvalue="feedparser" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
\ No newline at end of file
--- a/FastText-Model/.idea/inspectionProfiles/profiles_settings.xml
+++ b/FastText-Model/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/FastText-Model/.idea/misc.xml
+++ b/FastText-Model/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.9.5 (sftp://root@114.116.90.53:22/home/python/anaconda3/envs/JXYQ@py39/bin/python3.9)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/FastText-Model/.idea/modules.xml
+++ b/FastText-Model/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/FastText-Model.iml" filepath="$PROJECT_DIR$/.idea/FastText-Model.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/FastText-Model/.idea/remote-mappings.xml
+++ b/FastText-Model/.idea/remote-mappings.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="RemoteMappingsManager">
+    <list>
+      <list>
+        <remote-mappings server-id="python@sftp://root@114.116.90.53:22/home/python/anaconda3/envs/JXYQ@py39/bin/python3.9">
+          <settings>
+            <list>
+              <mapping local-root="$PROJECT_DIR$" remote-root="/home/python/lzc/新平台模型管理/FastText-Model" />
+            </list>
+          </settings>
+        </remote-mappings>
+      </list>
+    </list>
+  </component>
+</project>
\ No newline at end of file
--- a/FastText-Model/.idea/webServers.xml
+++ b/FastText-Model/.idea/webServers.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="WebServers">
+    <option name="servers">
+      <webServer id="89b44d2f-6e3e-40a6-8aa0-e1bc3fbcfd0f" name="FastText-Model">
+        <fileTransfer rootFolder="/home/python/lzc/新平台模型管理/FastText-Model" accessType="SFTP" host="114.116.90.53" port="22" sshConfigId="c0166359-81ab-467c-838f-8c7ee48db0f2" sshConfig="root@114.116.90.53:22 password">
+          <advancedOptions>
+            <advancedOptions dataProtectionLevel="Private" passiveMode="true" shareSSLContext="true" />
+          </advancedOptions>
+        </fileTransfer>
+      </webServer>
+    </option>
+  </component>
+</project>
\ No newline at end of file
--- a/FastText-Model/File_Operation/Operation.py
+++ b/FastText-Model/File_Operation/Operation.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File    : Operation.py
+# @Time    : 2023/3/21 08:25
+# @Author  : bruxelles_li
+# @Software: PyCharm
+import os
+import subprocess
+import sys
+from flask import Flask, request, jsonify
+from pathlib import Path
+import shutil
+from shutil import Error
+from tqdm import tqdm
+import requests
+import socket
+import datetime
+import pandas as pd
+import glob
+
+# 追加工作路径
+sys.path.append('../')
+from base.app.base_app import *
+from File_Operation.smart_extractor import extract_by_html_test
+
+
+# 定义operation_prefix
+operation_prefix = "/platform/operation/process"  # 上传、删除、测试、发布
+operation_file = Blueprint(f'{operation_prefix}', __name__)
+UPLOAD_FOLDER = r'../datasets/classification/FastText-Model/'  # 上传路径
+Path(UPLOAD_FOLDER).mkdir(parents=True, exist_ok=True)
+
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+file_types = ['xls', 'xlsx']
+# 获取父进程的环境变量
+env = os.environ.copy()
+env['PYTHONPATH'] = "/home/python/anaconda3/envs/JXYQ@py39/lib/python3.9/site-packages"
+
+
+# 找到目标文件并将文件移动到新目录下
+def find_dir(dir_path, folder_path):
+    list_dir = os.listdir(dir_path)
+    sorted_list = sorted(list_dir, key=lambda x: x[7:], reverse=True)
+    # 取第二个
+    source_path = os.path.join(dir_path, sorted_list[1])
+    for file_name in os.listdir(source_path):
+        file_path = os.path.join(source_path, file_name)
+        # 如果是xlsx文件，复制到基于时间戳的目录下
+        if file_name.endswith('.xlsx') or file_name.endswith('.xls'):
+            shutil.copy(file_path, folder_path)
+    print("success")
+
+
+def check_port(port):
+    try:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.bind(('localhost', port))
+            print(f"Port {port} is available")
+    except socket.error as e:
+        print(f"Port {port} is already in use")
+
+
+def get_available_port(start_port, end_port):
+    for port in range(start_port, end_port+1):
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            try:
+                s.bind(('localhost', port))
+                return port
+            except socket.error as e:
+                continue
+    raise Exception("No available ports in the specified range")
+
+
+def get_random_available_port():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(('localhost', 0))
+        return s.getsockname()[1]
+
+
+def all_exist(avalue, bvalue):
+    if all(any(x in y for y in bvalue) for x in avalue):
+        return "True"
+    return "False"
+
+
+def judge_len(file):
+    if len(file) == 2 or len(file) == 4:
+        return "True"
+    return "False"
+
+
+def merge_df(dataset_path):
+    all_files = []
+    for file_type in file_types:
+        all_files.extend(glob.glob(os.path.join(dataset_path, f'*.{file_type}')))
+
+    # 将所有文件合并到一个DataFrame中
+    combined_df = pd.concat([pd.read_excel(f) for f in all_files], ignore_index=True)
+    # 去除重复行
+    combined_df.drop_duplicates(keep='first', inplace=True)
+    return combined_df
+
+
+@operation_file.route('/show_file/', methods=['POST'])
+def show():
+    try:
+        data = json.loads(request.data.decode('utf-8'))
+        file_path = data["file_path"]
+        file_list = os.listdir(file_path)
+        logger.info(file_list)
+        if file_list:
+            new_file_list = []
+            for file_name in file_list:
+                if "Origin-Model" in file_name:
+                    continue
+                else:
+                    new_file_list.append(file_name)
+            result = {
+                'handleMsg': 'Success',
+                'code': 200,
+                'logs': '处理成功！',
+                "resultData": ";".join(new_file_list)
+            }
+            app.logger.info(result)
+            return jsonify(result)
+        else:
+            result = {
+                'handleMsg': 'Success',
+                'code': 200,
+                'logs': '处理成功！',
+                "resultData": "当前查询的文件路径下内容为空!"
+            }
+            app.logger.info(result)
+            return jsonify(result)
+
+    except Exception as e:
+        # print(e)
+        result = {
+            'handleMsg': 'Failure',
+            'code': 500,
+            'logs': '处理失败！当前查询的文件路径不存在，请选择正确的路径参数后重新操作' + str(e),
+            "resultData": ""
+        }
+        app.logger.info(result)
+        return jsonify(result)
+
+
+@operation_file.route('/remove_file/', methods=['POST'])
+def remove():
+    try:
+        data = json.loads(request.data.decode('utf-8'))
+        file_path = data["file_path"]
+        flag = data["flag"]
+        if flag == "/":
+            os.remove(file_path)
+        else:
+            shutil.rmtree(file_path)
+        result = {
+            "code": 200,
+            'handleMsg': 'Success',
+            'resultData': '文件删除成功！',
+            'logs': None
+        }
+        app.logger.info(result)
+        return jsonify(result)
+    except Exception as e:
+        result = {
+            "code": 500,
+            'handleMsg': 'Failure',
+            'resultData': None,
+            'logs': '删除失败，当前文件不存在，请选择正确的文件路径参数后重新删除！' + str(e)
+        }
+        app.logger.info(result)
+    return jsonify(result)
+
+
+# todo: 先进行语料上传操作
+@operation_file.route('/upload_file/', methods=['GET', 'POST'])
+def upload_file():
+    try:
+        # todo: 采用requests请求下载文件，包含任务id 和 url
+        data = json.loads(request.data.decode('utf-8'))
+        request_url = data["request_url"]   # http://114.115.215.96/group1/M00/01/A3/wKjIbGSFKouAPnsHAApkU0_Y0Bg21.xlsx
+        task_id = data["task_id"]
+        # 先判断该任务id的语料路径是否存在，若存在，则继续判断是否
+        root_path = app.config['UPLOAD_FOLDER'] + task_id
+        if os.path.exists(root_path):
+            # 该语料路径存在，则将对下面的文件目录进行遍历，并创建一个带时间戳的目录，来存放新的文件以及当前待下载的文件
+            folder_name = "floder" + "-" + str(datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
+            folder_path = app.config['UPLOAD_FOLDER'] + task_id + "/" + folder_name
+            Path(folder_path).mkdir(parents=True, exist_ok=True)
+            # 调用递归函数traverse_dir 先将该目录下之前的文件复制到该目录下
+            find_dir(dir_path=root_path, folder_path=folder_path)
+            # 然后将当前传过来的语料也放入到该文件夹下
+            filename = request_url.split("/")[-1]
+            save4path = os.path.join(folder_path, filename)
+            # 下载文件
+            r = requests.get(request_url, stream=True)
+            with open(save4path, "wb") as f:
+                for chunk in r.iter_content(chunk_size=512):
+                    f.write(chunk)
+            # 再调用merge_df合并文件
+            combined_df = merge_df(folder_path)
+            print(f"合并后的长度为{len(combined_df)}")
+            merge_filename = str(datetime.datetime.now().strftime('%Y%m%d%H%M%S')) + ".xlsx"
+            combined_df.to_excel(os.path.join(folder_path, merge_filename), index=False)
+            # 删除其它文件
+            for file_name in os.listdir(folder_path):
+                if file_name != merge_filename:
+                    os.remove(os.path.join(folder_path, file_name))
+
+        else:
+            # 该语料路径不存在，则创建目录后下载文件
+            folder_name = "floder" + "-" + str(datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
+            folder_path = app.config['UPLOAD_FOLDER'] + task_id + "/" + folder_name
+            Path(folder_path).mkdir(parents=True, exist_ok=True)
+            # 将当前传过来的语料也放入到该文件夹下
+            filename = request_url.split("/")[-1]
+            save4path = os.path.join(folder_path, filename)
+            # 下载文件
+            r = requests.get(request_url, stream=True)
+            with open(save4path, "wb") as f:
+                for chunk in r.iter_content(chunk_size=512):
+                    f.write(chunk)
+
+        result = {
+            "code": 200,
+            'handleMsg': 'Success',
+            'logs': '文件上传成功！',
+            "resultData": folder_name
+
+        }
+        app.logger.info(result)
+        return jsonify(result)  # 返回保存成功的信息
+
+    except Error as e:
+        result = {
+            "code": 500,
+            'handleMsg': 'Failure',
+            'resultData': None,
+            'logs': '上传失败，当前上传的语料版本名称已经存在！' + str(e)
+        }
+        app.logger.info(result)
+        return jsonify(result)
+    except Exception as e1:
+        result = {
+            "code": 500,
+            'handleMsg': 'Failure',
+            'resultData': None,
+            'logs': '上传失败！' + str(e1)
+        }
+        app.logger.info(result)
+        return jsonify(result)
+
+
+@operation_file.route('/publish_version/', methods=['POST'])
+def publish():
+    try:
+        data = json.loads(request.data.decode('utf-8'))
+        model_version = data['trainModelName']
+        task_id = data["task_id"]
+        classification = r'../../../model_saved/classification/FastText-Model/'
+
+        model_path = classification + task_id + "/" + model_version
+        micro_server_port = get_available_port(start_port=3000, end_port=3050)  # 自查可用端口   ，范围：3000 - 3050
+        print(micro_server_port)
+        # subprocess.call(
+        #     "python ../app/app_run.py -model_path {} -micro_server_port {}".format(model_path, int(micro_server_port)),
+        #     shell=True)
+        # subprocess.call(['python', '../app/app_run.py', '-model_path', model_path, '-micro_server_port', str(micro_server_port)],
+        #                 env=env,
+        #                 executable=sys.executable)
+        cmd = ['python', '../app/app_run.py', '-model_path', model_path, '-micro_server_port', str(micro_server_port)]
+        # 后台启动子进程
+        subprocess.Popen(cmd,
+                         stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE,
+                         stdin=subprocess.PIPE,
+                         shell=False,
+                         close_fds=True,
+                         preexec_fn=os.setsid,
+                         env=env,
+                         executable=sys.executable
+                         )
+        config_json = json.load(open('../config.json', 'r', encoding='utf-8'))
+        publish_url = 'http://{}:{}/platform/classification/FastText-Model/model_pred/'.format(config_json['ip'], micro_server_port)
+        result = {
+            'handleMsg': 'Success',
+            'code': 200,
+            'logs': None,
+            "resultData": publish_url
+        }
+    except Exception as e:
+        # print(e)
+        app.logger.info(e)
+        result = {
+            'handleMsg': 'failure',
+            'code': 500,
+            'logs': '模型发布失败，请检查参数后重新发布！' + str(e),
+            "resultData": None
+        }
+    app.logger.info(result)
+    return jsonify(result)
+
+
+@operation_file.route('/model_test/', methods=['POST'])
+def model_test():
+    try:
+        data = json.loads(request.data.decode('utf-8'))
+        task_id = data['task_id']
+        model_version = data['trainModelName']
+        type = data['data_type']
+        request_url = data['request_url']
+        # 定义模型root路径
+        classification = r'../../../model_saved/classification/FastText-Model/'
+        # 定义模型应用文件存放路径
+        model_path = classification + "Origin-Model-2023_03_31-12_15_17"
+        # 定义模型文件副本保存路径，若模型副本路径存在，则不需要备份，若模型副本不存在，需备份模型文件
+        model_copy = classification + "copy_model"
+        if not os.path.exists(model_copy):
+            # 若模型副本文件不存在，需备份，先创建副本保存路径
+            Path(model_copy).mkdir(parents=True, exist_ok=True)
+            # 将上版模型文件移动到副本保存路径，此时模型应用文件夹下内容为空
+            for dirpath, dirnames, filenames in os.walk(model_path):
+                # print(filenames)
+                for f0 in tqdm(filenames):
+                    src_file = os.path.join(dirpath, f0)
+                    shutil.move(src_file, model_copy)
+        else:
+            # 若模型副本文件已经存在，无需备份，将上版模型文件删除，此时模型应用文件夹下内容为空
+            for dirpath, dirnames, filenames in os.walk(model_path):
+                for f0 in tqdm(filenames):
+                    src_file = os.path.join(dirpath, f0)
+                    os.remove(src_file)
+        # 此时模型应用文件夹下内容为空，下面定义当前模型训练后的文件路径
+        src_path = classification + task_id + "/" + model_version
+        # 如果当前训练后的模型文件存在并且符合要求，则将当前训练好的模型文件复制一份到模型应用的保存路径
+        file_type_list = ['bin', 'json']
+        if os.path.exists(src_path):
+            for dirpath1, dirnames1, filenames1 in os.walk(src_path):
+                for f1 in tqdm(filenames1):
+                    file_type = f1.split('.')[1]
+                    temp_file = os.path.join(dirpath1, f1)
+                    if file_type in file_type_list:
+                        shutil.copy(temp_file, model_path)
+            # 根据测试方式选择解析方法
+            if type.strip() == "url":
+                try:
+                    # 测试：按HTML采集
+                    dict_parse = extract_by_html_test(request_url)
+                    # 调用应用环境下的模型测试接口来处理当前数据
+                    url = "http://localhost:4005/platform/classification/FastText-Model/model_test/"
+                    MODEL_PATH = model_path + "/" + "model.bin"
+                    # print(MODEL_PATH)
+                    payload = json.dumps({
+                        "model_path": MODEL_PATH,
+                        "title": dict_parse["title"],
+                        "content": dict_parse["content"]
+                    })
+                    headers = {
+                        'Content-Type': 'application/json'
+                    }
+                    response = requests.request("POST", url, headers=headers, data=payload)
+                    text = response.text.encode('utf-8')
+                    obj = json.loads(text)
+                    label = obj["result"]["label"]
+                    result = {
+                        'handleMsg': 'success',
+                        'code': 200,
+                        'logs': None,
+                        "resultData": {
+                            "title": dict_parse["title"],
+                            "content": dict_parse["content"],
+                            "label": label
+                        }
+                    }
+                except Exception as e:
+                    # 测试完毕，先移除模型应用文件夹中的内容，此时模型应用文件夹下内容为空
+                    for dirpath, dirnames, filenames in os.walk(model_path):
+                        # print(filenames)
+                        for f0 in tqdm(filenames):
+                            src_file = os.path.join(dirpath, f0)
+                            os.remove(src_file)
+
+                    # 接着将模型副本中的文件复制到模型应用文件夹下
+                    for dirpath, dirnames, filenames in os.walk(model_copy):
+                        # print(filenames)
+                        for f0 in tqdm(filenames):
+                            src_file = os.path.join(dirpath, f0)
+                            shutil.copy(src_file, model_path)
+                    # 所有处理操作完毕，返回模型测试结果
+                    result = {
+                        'handleMsg': 'failure',
+                        'code': 500,
+                        'logs': "智能解析url 网页内容失败，请重新选择测试内容" + str(e),
+                        "resultData": None
+                    }
+                    app.logger.info(result)
+                    return jsonify(result)
+            else:
+                # 先下载文件，然后解析文件内容进行处理
+                filename = request_url.split("/")[-1]
+                save4path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+                # 下载文件
+                r = requests.get(request_url, stream=True)
+                with open(save4path, "wb") as f:
+                    for chunk in r.iter_content(chunk_size=512):
+                        f.write(chunk)
+                data_df = pd.read_excel(save4path, keep_default_na=False).astype(str)
+                list_one = []
+                for idx, row in tqdm(data_df.iterrows()):
+                    title = row["title"]
+                    content = row["content"]
+                    # 调用应用环境下的模型测试接口来处理当前数据
+                    url = "http://localhost:4005/platform/classification/FastText-Model/model_test/"
+                    MODEL_PATH = model_path + "/" + "model.bin"
+                    # print(MODEL_PATH)
+                    payload = json.dumps({
+                        "model_path": MODEL_PATH,
+                        "title": title,
+                        "content": content
+                    })
+                    headers = {
+                        'Content-Type': 'application/json'
+                    }
+                    try:
+                        response = requests.request("POST", url, headers=headers, data=payload)
+                        text = response.text.encode('utf-8')
+                        obj = json.loads(text)
+                        label = obj["result"]["label"]
+                        list_one.append({
+                            "title": title,
+                            "content": content,
+                            "label": label
+                        })
+                    except:
+                        continue
+                result = {
+                    'handleMsg': 'success',
+                    'code': 200,
+                    'logs': None,
+                    "resultData": list_one
+                }
+
+            # 测试完毕，先移除模型应用文件夹中的内容，此时模型应用文件夹下内容为空
+            for dirpath, dirnames, filenames in os.walk(model_path):
+                # print(filenames)
+                for f0 in tqdm(filenames):
+                    src_file = os.path.join(dirpath, f0)
+                    os.remove(src_file)
+
+            # 接着将模型副本中的文件复制到模型应用文件夹下
+            for dirpath, dirnames, filenames in os.walk(model_copy):
+                # print(filenames)
+                for f0 in tqdm(filenames):
+                    src_file = os.path.join(dirpath, f0)
+                    shutil.copy(src_file, model_path)
+            # 所有处理操作完毕，返回模型测试结果
+            return result
+        else:
+            result = {
+
+                'handleMsg': 'failure',
+                'code': 500,
+                'logs': f'待测试模型版本-{model_version}不存在，请选择已有的模型版本进行测试！',
+                "resultData": None
+            }
+            app.logger.info(result)
+            return jsonify(result)
+
+    except Exception as e:
+        # print(e)
+        app.logger.info(e)
+        result = {
+            'handleMsg': 'failure',
+            'code': 500,
+            'logs': '模型测试失败，请检查参数后重新测试！' + str(e),
+            "resultData": None
+        }
+        app.logger.info(result)
+        return jsonify(result)
+
+
+if __name__ == '__main__':
+    port = get_available_port(3000, 3050)
+    print(port)
+    # app.run(host=HOST, port=PORT, debug=DEBUG)
--- a/FastText-Model/File_Operation/UnRAR.exe
+++ b/FastText-Model/File_Operation/UnRAR.exe
--- a/FastText-Model/File_Operation/entity.py
+++ b/FastText-Model/File_Operation/entity.py
+# -*- coding: utf-8 -*-
+
+# 智能采集请求
+# 1、考虑：请求智能采集时，不再使用实体类
+#    a. 仍使用：通过HTTP的 raw 请求体，直接传递HTML源文件，通过query参数传递 lang-code、link-text 参数
+#    b. 原因：在 postman 中，不方便进行测试，无法使用粘贴后的HTML源文件
+# 2、不考虑：使用实体类，利大于弊
+#    a. 使用实体类，方便扩展参数字段
+#    b. 方便展示接口文档：调用 json_parameter_utility.get_json_parameters 函数，可显示请求实体类
+class ExtractionRequest:
+    # 语言代码
+    # 1、采集“非中文”的文章时，需要用到语言代码
+    lang_code = ""
+    # 链接文本
+    # 1、用于采集标题，如果不提供，标题的准确度会下降
+    link_text = ""
+    # 文章页面源文件
+    # 1、用于采集标题、发布时间、内容等
+    article_html = ""
+
+    @staticmethod
+    def from_dict(dictionary: dict):
+        extraction_request = ExtractionRequest()
+        # 尝试方法：
+        # 1、将字典，更新到内部的 __dict__ 对象
+        # extraction_request.__dict__.update(dictionary)
+        # 将字典值，设置到当前对象
+        for key in dictionary:
+            setattr(extraction_request, key, dictionary[key])
+
+        return extraction_request
+
+    def to_dict(self):
+        # 转换为字典对象：
+        # 1、序列化为JSON时，需要调用此方法
+        # 2、转换为JSON字符串：json.dumps(extraction_result, default=ExtractionResult.to_dict)
+        data = {}
+        # 借助内部的 __dict__ 对象
+        # 1、将内部的 __dict__ 对象，更新到新的字典对象中
+        data.update(self.__dict__)
+
+        return data
+
+
+# 采集结果
+class ExtractionResult:
+    # 标题
+    title = ""
+    # 发布日期
+    publish_date = ""
+    # 正文（保留所有HTML标记，如：br、img）
+    text = ""
+    # URL
+    url = ""
+
+    # 摘要
+    meta_description = ""
+    # 干净正文（不带HTML）
+    cleaned_text = ""
+
+    # 来源（目前只支持采集中文网站中的“来源”）
+    # source = ""
+
+    # 顶部图片（top_image：采集不到任何内容，不再使用此属性）
+    # top_image = ""
+
+    def to_dict(self):
+        # 转换为字典对象：
+        # 1、序列化为JSON时，需要调用此方法
+        # 2、转换为JSON字符串：json.dumps(extraction_result, default=ExtractionResult.to_dict)
+        data = {}
+        # 借助内部的 __dict__ 对象
+        # 1、将内部的 __dict__ 对象，更新到新的字典对象中
+        data.update(self.__dict__)
+
+        return data
+
+
+class UrlPickingRequest:
+    # 列表页面的响应URL
+    # 1、作为Base URL，用于拼接提取到的相对URL
+    # 2、Base URL：必须使用响应URL
+    # 3、示例：在 Python中，通过 requests.get(url) 请求URL后，需要使用 resp.url 作为 Base URL
+    list_page_resp_url = ""
+    # 列表页面源文件
+    # 1、用于提取文章网址
+    list_page_html = ""
+
+    @staticmethod
+    def from_dict(dictionary: dict):
+        url_picking_request = UrlPickingRequest()
+        # 将字典值，设置到当前对象
+        for key in dictionary:
+            setattr(url_picking_request, key, dictionary[key])
+
+        return url_picking_request
+
+    def to_dict(self):
+        # 转换为字典对象：
+        # 1、序列化为JSON时，需要调用此方法
+        # 2、转换为JSON字符串：json.dumps(extraction_result, default=ExtractionResult.to_dict)
+        data = {}
+        # 借助内部的 __dict__ 对象
+        # 1、将内部的 __dict__ 对象，更新到新的字典对象中
+        data.update(self.__dict__)
+
+        return data
--- a/FastText-Model/File_Operation/smart_extractor.py
+++ b/FastText-Model/File_Operation/smart_extractor.py
+# -*- coding: utf-8 -*-
+import requests, sys
+from goose3 import Goose
+from goose3.text import StopWordsChinese, StopWordsKorean, StopWordsArabic
+# 追加工作路径
+sys.path.append('../')
+from File_Operation.entity import *
+from File_Operation.smart_extractor_utility import SmartExtractorUtility
+# goose3自带的lxml，提示找不到etree，但仍可使用
+from lxml import etree
+from lxml.html import HtmlElement
+
+
+class SmartExtractor:
+    @staticmethod
+    def get_supported_lang_code_dict():
+        """
+        支持语言：
+        1、需要分词，传递分词器（3种）：
+           a. 中文、韩语、阿拉伯语
+        2、不需要分词，直接传递语言编码（16种）
+           a. 其中英语、俄语，单独测试
+        """
+        supported_lang_code_dict = {
+            'cn': '中文',  # 中文
+            'zh-cn': '简体中文',  # 简体中文
+            'ko': '韩语',  # 韩语
+            'ar': '阿拉伯语',  # 阿拉伯语
+            'en': '英语',  # 英语
+            'ru': '俄语',  # 俄语
+            'da': '丹麦语',  # 丹麦语
+            'de': '德语',  # 德语
+            'es': '西班牙语',  # 西班牙语
+            'fi': '芬兰语',  # 芬兰语
+            'fr': '法语',  # 法语
+            'hu': '匈牙利语',  # 匈牙利语
+            'id': '印度尼西亚语',  # 印度尼西亚语
+            'it': '意大利语',  # 意大利语
+            'nb': '挪威语（伯克梅尔）',  # 挪威语（伯克梅尔）
+            'nl': '荷兰语',  # 荷兰语
+            'no': '挪威文（耐诺斯克）',  # 挪威文（耐诺斯克）
+            'pl': '波兰语',  # 波兰语
+            'pt': '葡萄牙语',  # 葡萄牙语
+            'sv': '瑞典语',  # 瑞典语
+        }
+
+        return supported_lang_code_dict
+
+    def __init__(self, lang_code='cn'):
+        """
+        构造器：未指定 lang_code 参数时，默认为 cn
+        """
+        # 支持语言
+        supported_lang_code_list = list(SmartExtractor.get_supported_lang_code_dict())
+
+        # 初始化 goose 对象：
+        # 1、根据语言代码，创建 goose 对象
+        if lang_code is None or lang_code == 'cn' or lang_code == 'zh-cn':
+            # 需要分词：中文
+            # 1、不指定lang_code参数，或不指定lang_code为 None 时，默认为中文分词
+            # 2、Flask Web接口：未指定get参数 lang_code 时，lang_code 会接收为 None
+            self.goose = Goose({'stopwords_class': StopWordsChinese})
+        elif lang_code == 'ko':
+            # 需要分词：韩语
+            # 1、测试：只传递语言，不传递分词器
+            # self.goose = Goose({'use_meta_language': False, 'target_language': 'ko'})  # 测试失败：正文采集为空
+            # self.goose = Goose()    # 测试失败：正文采集为空
+            # 韩语分词：测试成功
+            self.goose = Goose({'stopwords_class': StopWordsKorean})
+        elif lang_code == 'ar':
+            # 需要分词：阿拉伯语
+            # self.goose = Goose({'use_meta_language': False, 'target_language': 'en'})       # 测试失败：正文采集为空
+            # self.goose = Goose()    # 测试成功
+            # self.goose = Goose({'use_meta_language': False, 'target_language': lang_code})  # 测试成功：直接传递语言编码
+            self.goose = Goose({'stopwords_class': StopWordsArabic})
+        elif lang_code == 'en':
+            # 单独测试：英文
+            # self.goose = Goose({'use_meta_language': False, 'target_language': 'en'})
+            # 测试成功：创建Goose对象时，不指定语言默认为英文分词
+            self.goose = Goose()
+        elif lang_code == 'ru':
+            # 单独测试：俄语
+            # self.goose = Goose({'use_meta_language': False, 'target_language': 'en'})       # 测试失败：正文采集为空
+            self.goose = Goose({'use_meta_language': False, 'target_language': lang_code})  # 测试成功：直接传递语言编码
+        elif lang_code in supported_lang_code_list:
+            # 其它语言编码，统一处理，不再单独测试
+            self.goose = Goose({'use_meta_language': False, 'target_language': lang_code})
+        else:
+            # 未识别的语言代码
+            raise Exception(f'智能采集时，无法识别语言代码：{lang_code}')
+
+    def get_extraction_result(self, article, link_text=''):
+        """
+        获取采集结果：
+        1、从 artcile 对象中，采集数据并封装到 ExtractionResult
+        """
+        # 用于保存：采集后的文本
+        extraction_result = ExtractionResult()
+
+        # 标题
+        # extraction_result.title = article.title     # 原办法：使用 goose 采集到的 title 中的标题
+        extraction_result.title = SmartExtractorUtility.get_article_title(article, link_text)
+        # 发布日期
+        extraction_result.publish_date = SmartExtractorUtility.get_publish_date(article)
+        # 正文（保留所有HTML标记，如：br、img）
+        extraction_result.text = SmartExtractorUtility.get_article_text(article)
+        # URL
+        extraction_result.url = article.final_url
+
+        # 摘要
+        extraction_result.meta_description = article.meta_description
+        # 干净正文（不带HTML）
+        extraction_result.cleaned_text = article.cleaned_text
+        # 来源（目前只支持采集中文网站中的“来源”）
+        extraction_result.source = ''
+
+        return extraction_result
+
+    def extract_by_url(self, url, link_text=''):
+        """
+        按URL采集内容
+        """
+        # 采集正文：传入url
+        article = self.goose.extract(url=url)
+        # article = goose.extract(raw_html=html)
+
+        return self.get_extraction_result(article, link_text)
+
+    def extract_by_html(self, html, link_text=''):
+        """
+        按HTML采集内容
+        """
+        # 采集正文：传入html
+        article = self.goose.extract(raw_html=html)
+
+        return self.get_extraction_result(article, link_text)
+
+
+def extract_by_url_test(url: str, lang_code: str):
+    # 测试：按URL采集
+    # url_list = [
+    #     # "http://www.news.cn/politics/2022-07/31/c_1128879636.htm",  # 短文本
+    #     # "https://baijiahao.baidu.com/s?id=1741311527693101670",  # 带多张图片
+    #     # "https://news.cctv.com/2022/08/16/ARTIERrXbbVtVUaQU0pMzQxf220816.shtml",  # 带多张图片，及一个视频（测试内容XPath失败）
+    #     # "http://opinion.people.com.cn/n1/2022/0803/c1003-32492653.html",  # 人民网
+    #     # 韩文：中央日报-politics
+    #     # "https://www.joongang.co.kr/article/25094974",
+    #     # "https://www.joongang.co.kr/article/25094967",
+    #     # 英文：加德满都邮报-national-security
+    #     # "https://kathmandupost.com/national-security/2020/01/17/police-s-intelligence-continues-to-fail-them-as-chand-party-claims-explosion",
+    #     # "https://kathmandupost.com/national-security/2019/11/04/india-s-new-political-map-places-disputed-territory-of-kalapani-inside-its-own-borders",  # 测试采集：发布时间
+    #     # 俄语：今日白俄罗斯报-word
+    #     # "https://www.sb.by/articles/byvshiy-premer-ministr-italii-zayavil-chto-strane-sleduet-otkazatsya-ot-gaza-iz-rossii.html",
+    #     # 'https://www.sb.by/articles/kryuchkov-predupredil-o-nepopravimykh-posledstviyakh-dlya-ukrainy-v-sluchae-udarov-po-krymu.html',
+    #     # 阿语
+    #     # "http://arabic.people.com.cn/n3/2022/0822/c31659-10137917.html",
+    #     # "http://arabic.people.com.cn/n3/2022/0822/c31657-10137909.html",
+    #     # 测试提取标题
+    #     # "http://www.sasac.gov.cn/n4470048/n16518962/n20928507/n20928570/c25819031/content.html",
+    #     # "http://www.forestry.gov.cn/main/102/20220823/092407820617754.html",
+    #     # "http://www.sasac.gov.cn/n2588025/n2588139/c25825832/content.html", # 标题采集为空
+    #     # 'http://www.crfeb.com.cn/1j/_124/2005409/index.html',   # 内容采集失败
+    #     # 'http://www.crfeb.com.cn/1j/_124/912248/index.html',  # 内容采集失败
+    #     # 'https://www.crcc.cn/art/2021/11/12/art_205_3413380.html',  # 中国铁建股份有限公司-工作动态（日期采集错误）
+    #     # 'http://ccecc.crcc.cn/art/2015/11/19/art_7608_1136312.html',  # 中国土木工程集团有限公司-多个栏目（日期采集错误）
+    #     # 'http://v.people.cn/n1/2022/0901/c444662-32517559.html',    # 人民网视频：title必须以“元素中的标题”开始，不能判断“包含”
+    #     # 'https://www.chec.bj.cn/cn/xwzx/gsyw/2022/202207/t20220706_8128.html', # 中国港湾工程有限责任公司-公司要闻（标题采集失败）
+    #     # 'https://www.cscec.com/xwzx_new/gsyw_new/202208/3570377.html', # 中国建筑集团有限公司-中建要闻（标题采集失败）
+    #     # 'https://www.crbc.com/site/crbc/276/info/2022/46884837.html',  # 中国路桥工程有限责任公司-多个栏目（标题采集失败）
+    #     # 'http://www.cgcoc.com.cn/news/432.html',  # 中地海外集团有限公司-新闻中心（标题和内容采集失败）
+    #     # 'http://www.mcc.com.cn/mcc/_132154/_132572/308233/index.html'  # 中国五矿（测试：正文采集失败）
+    #     # 'http://www.powerchina.cn/art/2015/5/27/art_7449_441845.html',  # 中国电力建设集团（测试：标题、正文采集失败）
+    #     # 中国电力建设集团（测试：标题采集失败），相比列表中的链接文本、title标签中的内容，元素中的标题，“秉承丝路精髓  抒写锦绣华章”中间多出一个空格
+    #     # 'http://world.people.com.cn/n1/2022/0624/c1002-32455607.html',  # 标题采集失败：看着没有问题
+    #     'https://www.cscec.com/xwzx_new/zqydt_new/202209/3578274.html',  # 中国建筑股份有限公司-企业动态：日期采集错误，采集到当天日期
+    # ]
+
+    # 语言编码
+    # lang_code = 'cn'
+    # lang_code = 'ko'
+    # lang_code = 'en'
+    # lang_code = 'ru'
+    # lang_code = 'ar'
+    print("-" * 100)
+    print('请求URL：', url)
+    extraction_result = SmartExtractor(lang_code).extract_by_url(url)
+    # todo： 将内容返回
+    dict_parse = {
+        "title": extraction_result.title,
+        "publistDate": extraction_result.publish_date,
+        "content": extraction_result.cleaned_text
+    }
+    return dict_parse
+
+    # for url in url_list:
+    #     print("-" * 100)
+    #     print('请求URL：', url)
+    #     extraction_result = SmartExtractor(lang_code).extract_by_url(url)
+    #
+    #     # 测试转换为JSON
+    #     # 1、直接转换时，会抛异常：TypeError: Object of type ExtractionResult is not JSON serializable
+    #     # print(json.dumps(extraction_result))
+    #     # print(json.dumps(extraction_result, default=ExtractionResult.to_dict))    # 转换成功：指定序列化器
+    #     # print(type(json.dumps(extraction_result.to_dict())))  # 返回类型：<class 'str'>，内容中的中文会被转义
+    #     # print(str(extraction_result.to_dict()))     # 如果直接转换为字符串，中文不会被转义
+    #
+    #     # 打印测试结果
+    #     print_extraction_result(extraction_result)
+
+
+def extract_by_html_test(url):
+    # 测试：按HTML采集
+    html = '''
+<html>
+  <head>
+  <title>标题</title>
+  </head>
+  <body>
+  <div>标题</div>
+  <div>内容</div>
+  </body>
+</html>
+    '''
+    # 测试：通过请求URL，获取完整的html
+    # url = "http://www.news.cn/politics/2022-07/31/c_1128879636.htm"     # 测试成功
+    # url = "http://views.ce.cn/view/ent/202208/15/t20220815_37961634.shtml"  # 1、测试失败：lxml.etree.ParserError: Document is empty
+    # url = 'https://www.crcc.cn/art/2021/11/12/art_205_3413380.html'  # 中国铁建股份有限公司-工作动态（日期采集错误）
+    # url = 'http://ccecc.crcc.cn/art/2015/11/19/art_7608_1136312.html'  # 中国土木工程集团有限公司-多个栏目（日期采集错误）
+    print()
+    print("-" * 100)
+    print('请求URL：', url)
+    html = requests.get(url).text
+    # 语言编码
+    lang_code = 'cn'
+
+    # 采集内容
+    extraction_result = SmartExtractor(lang_code).extract_by_html(html)
+    # todo： 将内容返回
+    dict_parse = {
+        "title": extraction_result.title,
+        # "publistDate": extraction_result.publish_date,
+        "content": extraction_result.cleaned_text
+    }
+    # 打印测试结果
+    # print_extraction_result(extraction_result)
+    return dict_parse
+
+
+def print_extraction_result(extraction_result):
+    # 打印测试结果
+    print("标题：", extraction_result.title)  # 标题
+    print("发布时间：", extraction_result.publish_date)  # 发布时间
+    print("正文：", extraction_result.text)  # 正文
+    print("URL：", extraction_result.url)  # URL
+
+    print("摘要：", extraction_result.meta_description)  # 摘要
+    print("干净正文：", extraction_result.cleaned_text)  # 干净正文
+
+
+if __name__ == '__main__':
+    try:
+        # # 测试：按URL采集
+        # print(extract_by_url_test("http://www.gov.cn/zhengce/zhengceku/2008-03/28/content_6253.htm"))
+
+        # 测试：按HTML采集
+        dict_parse = extract_by_html_test("https://www.msn.cn/zh-cn/news/other/%E6%9B%BE%E7%BB%8F%E4%B8%91%E5%88%B0%E7%94%B7%E4%B8%BB%E9%80%83%E8%B7%91-%E5%A6%82%E4%BB%8A%E9%80%86%E8%A2%AD%E6%88%90%E6%83%B9%E7%9C%BC%E8%BE%A3%E5%A6%B9-%E6%9C%80%E4%B8%91%E5%A5%B3%E5%9B%A2-cindy%E5%AE%8C%E6%88%90/ar-AA1cmel9?ocid=msedgntp&cvid=2952893909c64335846c8f7d0d608e48&ei=5")
+        print(dict_parse)
+    except Exception as e:
+        print("采集失败：", e)
--- a/FastText-Model/File_Operation/smart_extractor_utility.py
+++ b/FastText-Model/File_Operation/smart_extractor_utility.py
+import re
+from goose3.article import Article
+from lxml import etree
+from lxml.html import HtmlElement
+
+
+class SmartExtractorUtility:
+    # 标题最小长度
+    title_min_len = 6
+
+    @staticmethod
+    def extract_publish_date(html):
+        pattern_list = [
+            # 2010-10-1 8:00:00
+            r"20\d{2}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}",
+            # 2010-10-1 8:00
+            r"20\d{2}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}",
+            # 2010年10月1日 8:00:00
+            r"20\d{2}年\d{1,2}月\d{1,2}日 \d{1,2}:\d{1,2}:\d{1,2}",
+            # 2010年10月1日 8:00
+            r"20\d{2}年\d{1,2}月\d{1,2}日 \d{1,2}:\d{1,2}",
+            # 2010/10/1 8:00:00
+            r"20\d{2}/\d{1,2}/\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}",
+            # 2010/10/1 8:00
+            r"20\d{2}/\d{1,2}/\d{1,2} \d{1,2}:\d{1,2}",
+            # 2010-10-1
+            r"20\d{2}-\d{1,2}-\d{1,2}",
+            # 2010年10月1日
+            r"20\d{2}年\d{1,2}月\d{1,2}日",
+            # 2010/10/1
+            r"20\d{2}/\d{1,2}/\d{1,2}",
+            # 2022.08.28
+            r"20\d{2}\.\d{1,2}\.\d{1,2}"
+            # 12-07-02 10:10
+            r"\d{2}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}",
+            # 1月前
+            r"\d+(&nbsp;| )*月前",
+            # 12天前
+            r"\d+(&nbsp;| )*天前",
+            # 2小时前
+            r"\d+(&nbsp;| )*小时前",
+            # 15分钟前
+            r"\d+(&nbsp;| )*分钟前",
+            # 昨天&nbsp;17:59
+            r"昨天(&nbsp;| )*\d{1,2}:\d{1,2}",
+        ]
+
+        # 尝试匹配所有正则式
+        for pattern in pattern_list:
+            # 提取可见日期：
+            # 1、必须在标签内部，不能提取HTML标签属性中的日期
+            # 2、提取规则：必须在 > 和 < 之间，且中间不能再有 >
+            tag_pattern = f'>[^>]*(?P<date>{pattern})[^>]*<'
+            # 搜索第一个匹配项
+            match = re.search(tag_pattern, html)
+            # 如果匹配成功，返回正确的发布时间
+            if match:
+                return match.group('date')
+
+        # 所有正则式匹配失败，返回空字符串
+        return ""
+
+    @staticmethod
+    def add_html_br(cleaned_text):
+        # 包装HTML标记：换行
+        # 1、优先替换双换行：使用goose提取到的cleaned_text，都是双换行
+        cleaned_text = cleaned_text.replace("\n\n", "<br>")
+        cleaned_text = cleaned_text.replace("\n", "<br>")
+        return cleaned_text
+
+    @staticmethod
+    def get_article_title(article: Article, link_text=''):
+        #
+        # 优先提取h1、div、span、td元素中的标题
+        # 1、测试任务：2.智能采集\1.测试任务\国资委-新闻发布
+        #    a. 原title标题：中国能建：聚焦价值创造 打造国企改革发展“红色引擎”－国务院国有资产监督管理委员会
+        #    b. div元素中的标题：中国能建：聚焦价值创造 打造国企改革发展“红色引擎”
+        # 2、测试任务：2.智能采集\1.测试任务\国家林业和草原局-地方动态
+        #    a. 原title标题：上海完成森林资源年度监测遥感解译图斑市级质量检查_地方动态_国家林业和草原局政府网
+        #    b. span元素中的标题：上海完成森林资源年度监测遥感解译图斑市级质量检查
+        #
+        # 根据xpath，查询标题元素时：
+        # 1、标签优先级：h1、特殊元素（id或class包含title）、h2、h3、div、span、td
+        #
+        title_element_list = [
+            'h1',
+            'h2',
+            'h3',
+            'div',
+            'span',
+            'td',
+            'p',
+        ]
+
+        # 对比标题前，统一将空格剔除（2022-09-21）：
+        # 1、测试任务：3.马荣：一带一路，配置不成功\中国电力建设集团（测试：标题采集失败）
+        # 2、相比列表中的链接文本、title标签中的内容，元素中的标题，“秉承丝路精髓  抒写锦绣华章”中间多出一个空格
+        link_text = link_text.replace(" ", "")
+        tag_title = article.title.replace(" ", "")
+
+        title = None
+        for title_element in title_element_list:
+            element_list = article.raw_doc.getroottree().xpath(f'//{title_element}')
+            # 查询XPath成功，遍历所有元素
+            for element in element_list:
+                # 取纯文本内容，包括子元素
+                text = etree.tounicode(element, method='text').strip()
+                text_no_space = text.replace(" ", "")
+                # 判断标题：
+                # 1、如果智能采集的原title标题，以“元素内容”开头，则取元素内容
+                # 2、查找成功后，返回text作为标题，否则继续下一个循环
+                # 判断是否以“元素中的标题”开始：
+                # 1、title必须以“元素中的标题”开始，不能判断“包含”
+                # 2、测试URL：http://v.people.cn/n1/2022/0901/c444662-32517559.html
+                # 3、title标签：<title>亿缕阳光丨小生意，大格局--人民视频--人民网</title>
+                #    a. 如果判断“包含”，会采集到：人民网
+                #    b. 因为存在元素：<a href="http://www.people.com.cn/" class="clink">人民网</a>
+                #    c. 如果判断以“元素中的标题”开始，采集到：亿缕阳光丨小生意，大格局
+                #    d. 标题元素：<h2>亿缕阳光丨小生意，大格局</h2>
+                # 新方案：
+                # 1、对比常用元素：仍判断是否以“元素中的标题”开始
+                # 2、优先对比“链接文本”，其次对比“title元素”
+                # 3、满足最少字数：6个字
+                # 新方案（2022-09-21）：
+                # 1、对比“链接文本”、“title元素”时，除了判断开始，同时允许结尾
+                # 2、测试任务：3.马荣：一带一路，配置不成功\中国电力建设集团（测试：标题采集失败）
+                #    a. 列表中的链接文本：【“一带一路”旗舰篇】秉承丝路精髓 抒写锦绣华章——河北电...
+                #    b. title标签中的内容：<title>中国电力建设集团 公司要闻 【“一带一路”旗舰篇】秉承丝路精髓 抒写锦绣华章——河北电建一公司摘取“一带一路”上的“鲁班奖”桂冠</title>
+                #    c. 元素中的标题：【“一带一路”旗舰篇】秉承丝路精髓  抒写锦绣华章——河北电建一公司摘取“一带一路”上的“鲁班奖”桂冠
+                if text_no_space is not None and text_no_space != '' and len(
+                        text_no_space) >= SmartExtractorUtility.title_min_len:
+                    # 优先判断6个字，以方便调试：排除短文本元素
+                    if link_text.startswith(text_no_space) or link_text.endswith(text_no_space) or tag_title.startswith(
+                            text_no_space) or tag_title.endswith(text_no_space):
+                        # 返回时，仍返回未剔除空格后的标题
+                        return text
+
+        if title:
+            # 查找成功，返回元素中的标题
+            return title
+        else:
+            # 查找失败，返回提取到的title属性
+            # return article.title
+            # 新考虑：标题采集失败后，返回空值
+            # 1、原因：article.title 不可靠，只是提取了 title 标签中的内容
+            return ''
+
+    @staticmethod
+    def get_publish_date(article: Article):
+        # 优先使用正则式提取日期
+        # 1、测试任务：加德满都邮报-national-security
+        #    a. 使用 publish_datetime_utc 提取英文日期后，提取错误
+        #    b. 实际日期：Friday, August 19, 2022，但提取到了：2015-02-05
+        #    c. 原因：在下方JS中，有一段JSON文本： "datePublished": "2015-02-05T08:00:00+08:00"
+        # 2、注意：中文网站，都必须使用正则式
+        publish_date = SmartExtractorUtility.extract_publish_date(article.raw_html)
+        if publish_date != '':
+            return publish_date
+        else:
+            if article.publish_datetime_utc:
+                # 优先使用提取成功的 datetime
+                return article.publish_datetime_utc.strftime('%Y-%m-%d')
+            elif article.publish_date:
+                # 其次使用提取成功的 date 字符串
+                return article.publish_date
+            else:
+                # 全部提取失败，返回字符串
+                return ''
+
+    @staticmethod
+    def get_article_text(article: Article):
+        # 第一种方法：在纯文本（cleaned_text）基础上，添加br标签
+        # 1、缺点：无法获取图片，同时会丢掉原有的p标签（只能用br替补）
+        # text = SmartExtractor.add_html_br(article.cleaned_text)
+
+        # 第二种方法：直接获取 top_node 的HTML内容
+        # 1、优点：可保留原有的p标签等
+        # 2、缺点：无法获取图片，img标签未被保留
+        # text = etree.tounicode(article.top_node, method='html')
+
+        # 测试抛出异常
+        # raise Exception("测试抛出异常")
+
+        # 第三种方法：获取到 top_node 的xpath，再通过xpath查询原始doc
+        # 1、可行：通过查询原始doc，可以获取“正文”的所有HTML内容
+        # 2、遇到问题：获取到 top_node 的xpath不准确，与原位置偏移一个元素
+        #    a. 测试URL：https://news.cctv.com/2022/08/16/ARTIERrXbbVtVUaQU0pMzQxf220816.shtml
+        #    b. 获取到的xpath：/html/body/div/div[1]/div[2]/div[4]
+        #    c. 实际xpath：/html/body/div/div[1]/div[2]/div[5]
+        # 3、解决办法：
+        #    a. 优先使用id、class查询，如果没有id、class，再查询 top_node 的xpath
+        xpath = None
+        if type(article.top_node) is HtmlElement:
+            if 'id' in article.top_node.attrib:
+                xpath = "//*[@id='{}']".format(article.top_node.attrib['id'])
+            elif 'class' in article.top_node.attrib:
+                xpath = "//*[@class='{}']".format(article.top_node.attrib['class'])
+            else:
+                xpath = article.top_node.getroottree().getpath(article.top_node)
+        else:
+            # article.top_node 有时为空：
+            # 1、测试URL：https://baijiahao.baidu.com/s?id=1741311527693101670
+            # 2、输出日志：article.top_node 不是 HtmlElement 对象：None
+            print("SmartExtractor：article.top_node 为 {}，不是 HtmlElement 对象。".format(article.top_node))
+
+            # article.top_node 为空时，直接输出 cleaned_text：
+            # 1、在纯文本（cleaned_text）基础上，添加br标签
+            text = SmartExtractorUtility.add_html_br(article.cleaned_text)
+            return text
+
+        # 根据xpath，查询元素
+        element_list = article.raw_doc.getroottree().xpath(xpath)
+        if element_list:
+            # 查询XPath成功，获取第一个元素的HTML
+            text = etree.tounicode(element_list[0], method='html')
+        else:
+            # 查询XPath失败，返回 top_node 原有的HTML
+            # 1、缺点：无法获取图片，img标签未被保留
+            text = etree.tounicode(article.top_node, method='html')
+
+        return text
--- a/FastText-Model/File_Operation/一带一路模型对接接口内容范围.png
+++ b/FastText-Model/File_Operation/一带一路模型对接接口内容范围.png
--- a/FastText-Model/File_Operation/语料上传异常处理说明.xlsx
+++ b/FastText-Model/File_Operation/语料上传异常处理说明.xlsx
--- a/FastText-Model/README.md
+++ b/FastText-Model/README.md
+# FastText-Model
+
+#### 介绍
+新平台NLP算法组 model
+
+
+#### 安装教程
+
+1.  指定conda环境的python版本
+2.  执行requirement.txt
+3.  也可以指定运行环境|提前在宿主机上创建好
+
+#### 使用说明
+
+1.  xxxx
+2.  xxxx
+3.  xxxx
+
+#### 参与贡献
+
+1.  Fork 本仓库
+2.  新建 Feat_xxx 分支
+3.  提交代码
+4.  新建 Pull Request
+
--- a/FastText-Model/app/__init__.py
+++ b/FastText-Model/app/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/7/31 10:21
--- a/FastText-Model/app/app_config.py
+++ b/FastText-Model/app/app_config.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File    : app_config.py
+# @Time    : 2023/4/1 10:31
+# @Author  : bruxelles_li
+# @Software: PyCharm
+
+import os
+import multiprocessing
+
+from pathlib import Path
+
+bind = '0.0.0.0:4005'                   # 绑定ip和端口号
+backlog = 512                           # 监听队列
+# chdir = '/home/zzsn/liuyan/bin'       # gunicorn要切换到的目的工作目录
+timeout = 300                           # 超时 -> 目前为迎合ZZSN_NLP平台 一带一路要素抽取(文件)需求 暂时关闭超时
+# worker_class = 'gevent'               # 使用gevent模式，还可以使用sync 模式，默认的是sync模式
+
+# workers = multiprocessing.cpu_count() # 进程数 12
+workers = 1                             # 低资源 13G 服务器负载过大可调整此处为 1
+threads = 50                            # 指定每个进程开启的线程数
+loglevel = 'error'                       # 日志级别，这个日志级别指的是错误日志的级别，而访问日志的级别无法设置
+access_log_format = '%(t)s %(p)s %(h)s "%(r)s" %(s)s %(L)s %(b)s %(f)s" "%(a)s"'  # 设置gunicorn访问日志格式，错误日志无法设置
+
+"""
+其每个选项的含义如下：
+h          remote address
+l          '-'
+u          currently '-', may be user name in future releases
+t          date of the request
+r          status line (e.g. ``GET / HTTP/1.1``)
+s          status
+b          response length or '-'
+f          referer
+a          user agent
+T          request time in seconds
+D          request time in microseconds
+L          request time in decimal seconds
+p          process ID
+"""
+_tmp_path = os.path.dirname(os.path.abspath(__file__))
+_tmp_path = os.path.join(_tmp_path, 'log')
+Path(_tmp_path).mkdir(parents=True, exist_ok=True)
+accesslog = os.path.join(_tmp_path, 'gunicorn_access.log')      # 访问日志文件
+errorlog = os.path.join(_tmp_path, 'gunicorn_error.log')         # 错误日志文件
+
+# gunicorn -c app_config.py app_run:app -D --daemon
--- a/FastText-Model/app/app_run.py
+++ b/FastText-Model/app/app_run.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File    : app_run.py
+# @Time    : 2023/3/31 10:31
+# @Author  : bruxelles_li
+# @Software: PyCharm
+import json
+import os
+import sys
+import logging
+import requests
+import argparse
+import queue
+from pathlib import Path
+from flask import Flask, jsonify, request
+import re
+
+# 模型训练服务
+sys.path.append('../')
+# 关闭多余连接
+s = requests.session()
+s.keep_alive = False
+from classification.config.config_fast_text import FastTextConfig
+from classification.runner.runner_fast_text import FastTextRunner
+
+os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
+os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
+logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] <%(processName)s> (%(threadName)s) %('
+                                               'message)s')
+logger = logging.getLogger(__name__)
+
+# Queue基本FIFO队列  先进先出 FIFO即First in First Out,先进先出
+# maxsize设置队列中，数据上限，小于或等于0则不限制，容器中大于这个数则阻塞，直到队列中的数据被消掉
+q = queue.Queue(maxsize=0)
+# 定义训练配置文件
+train_config_path = '../classification/config/fasttext_config_train.yml'
+# 定义应用配置文件
+pred_config_path = '../classification/config/fasttext_config_pred.yml'
+
+# 关闭多余连接
+s = requests.session()
+s.keep_alive = False
+UPLOAD_FOLDER = r'../datasets/Receive_File'  # 上传路径
+Path(UPLOAD_FOLDER).mkdir(parents=True, exist_ok=True)
+
+TEMPFILE_FOLDER = UPLOAD_FOLDER + "/" + "Temp_file"
+Path(TEMPFILE_FOLDER).mkdir(parents=True, exist_ok=True)
+
+ALLOWED_EXTENSIONS = set(['xls', 'xlsx'])  # 允许上传的文件类型
+app = Flask(__name__)
+# 定义上传文件临时路径
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+app.config['TEMPFILE_FOLDER'] = TEMPFILE_FOLDER
+
+from base.app.base_app import *
+# 定义模型训练配套服务
+from File_Operation.Operation import operation_file
+# 此处需要变动
+# from classification.app.app_ssyw_column_classify import classification_ssyw_column_classify
+
+# operation_file 模型训练配套服务
+operation_prefix = "/platform/operation/process"  # 上传、删除、测试、发布
+app.register_blueprint(operation_file, url_prefix="{}".format(operation_prefix))
+
+# classification 训练接口参数
+train_url = "/platform/classification/FastText-Model/model_train/"
+model_name = "FastText-Model"
+
+# 定义应用url
+application_url = "/model_pred/"
+
+# 定义测试url
+model_test_url = "/model_test/"
+
+# 跨域支持1
+from flask_cors import CORS
+
+CORS(app, supports_credentials=True)
+
+
+@app.route('/', methods=['POST'])
+def hello_world():
+    app.logger.info('请选择正确的方式上传!')
+    return '请选择正确的方式上传!'
+
+
+@app.route('/subject_consumer', methods=['GET', 'POST'])
+def subject_consumer():
+    if not q.empty():
+        config_info = q.get()
+        return jsonify(message='当前队列数量：' + str(q.qsize()),
+                       queue_left_number=str(q.qsize()),
+                       data=config_info)
+    else:
+        return jsonify(message='队列为空！', queue_left_number=0)
+
+
+@app.route('/queue_size', methods=['GET', 'POST'])
+def queue_size():
+    return jsonify(queue_left_number=q.qsize())
+
+
+@app.route("/platform/classification/FastText-Model/model_train/", methods=['POST'])
+def model_train():
+    """
+    {   'reg_lambda': 1,
+        'scale_pos_weight': 1,
+        'reg_alpha': 1,
+        'modelProcessId': '1453295295008211969',
+        'learning_rate': '0.02',
+        'gpu': False,
+        'min_child_weight': 1,
+        'train': 'http://39.105.62.235:7000/br/classification/project_info/filter/train',
+        'data_path': '/datasets/the_belt_and_road/classification/pro_info_filter'
+    }
+    -> data:
+    :return:
+    """
+    try:
+        data = json.loads(request.data.decode('utf-8'))
+        modelProcessId = data['modelProcessId']
+        task_id = data["task_id"]
+        learning_rate = data['learning_rate'] if 'learning_rate' in data else 0.02
+        epoch = data['epoch'] if "epoch" in data else 5
+        gpu = data['gpu'] if 'gpu' in data else None
+        # 模型版本  语料版本  名称
+        data_path_0 = data['data_path']
+        model_path_0 = data['model_path']
+        data_path_1 = "/" + task_id + "/" + data_path_0.strip('/')
+        model_path_1 = '/' + task_id + "/" + model_path_0.strip('/')
+        # 加载配置文件获取语料和模型文件存放路径
+        _config = FastTextConfig(config_path=train_config_path).load_config()
+        # todo: 先做数据集检测，分两种情况来处理
+        data_temp_path = _config.data.path0 % data_path_1
+        app.logger.info(data_temp_path)
+        # todo: 接着做模型路径检测，如果当前模型版本已经存在，则提示当前版本已经存在
+        temp_path = _config.learn.dir.saved % model_path_1
+        app.logger.info(temp_path)
+        if os.path.exists(data_temp_path):
+            pass
+        else:
+            dict_result = {
+                'code': 500,
+                'isHandleSuccess': False,
+                'logs': '模型训练失败！当前模型训练的语料文件不存在，请上传语料后再进行训练',
+                'result': None
+            }
+            app.logger.info(dict_result)
+            return json.dumps(dict_result, ensure_ascii=False)
+
+        # todo: 模型版本检测是都需要的，线程执行就好
+        if not os.path.exists(temp_path):
+            model_path = model_path_1
+        else:
+            dict_result = {
+                'code': 500,
+                'isHandleSuccess': False,
+                'logs': '模型训练失败！当前模型版本已经存在，请更改模型版本号再重新进行训练',
+                'result': None
+            }
+            app.logger.info(dict_result)
+            return json.dumps(dict_result, ensure_ascii=False)
+
+        # 模型保存版本 和 数据集 无异常，则开始进入模型训练部分
+        VER = data_path_1
+        root_dataset = data_temp_path
+        app.logger.info(root_dataset)
+        config_info = {
+            "modelProcessId": modelProcessId,
+            "data_path": VER,
+            "model_path": model_path,
+            'root_dataset': root_dataset
+        }
+        q.put(config_info)
+        app.logger.info(config_info)
+        dict_result = {
+            'code': 200,
+            'isHandleSuccess': True,
+            'logs': '模型训练中 ...',
+            'result': None
+        }
+    except Exception as e:
+        dict_result = {
+            'code': 500,
+            'isHandleSuccess': False,
+            'logs': '训练失败！' + str(e),
+            'result': None
+        }
+    app.logger.info(dict_result)
+    return json.dumps(dict_result, ensure_ascii=False)
+
+
+@app.route("/platform/classification/FastText-Model/model_test/", methods=['POST'])
+def model_test():
+    """
+    {
+        'threshold': 0.5,
+        'model_path': '/zzsn_nlp_br/classification/model/model_saved',
+        'url': 'http://39.105.62.235:7000/br/classification/project_info/filter/model_test'
+    }
+    -> data:
+    :return:
+    """
+    try:
+        data = json.loads(request.data.decode('utf-8'))
+        title = data['title'] if 'title' in data else None
+        content = data['content'] if 'content' in data else None
+        model_path = data['model_path'] if 'model_path' in data else None
+
+        runner_test = FastTextRunner(config_path=pred_config_path, model_path=model_path)
+        dict_result = runner_test.test(title=title, content=content)
+        if dict_result['code'] != 200:
+            dict_result['logs'] = '模型测试失败！' + dict_result['logs']
+    except Exception as e:
+        dict_result = {
+            'handleMsg': 'failure',
+            'code': 500,
+            'logs': '模型测试失败！' + str(e),
+            'resultData': None
+        }
+
+    app.logger.info(dict_result)
+
+    return json.dumps(dict_result, ensure_ascii=False)
+
+
+@app.route("/platform/classification/FastText-Model/model_pred/", methods=['POST'])
+def model_pred():
+    try:
+        data_list = json.loads(request.data.decode('utf-8'))
+        result_one = []
+        for data in data_list:
+            title = data['title'] if 'title' in data else None
+            content = data['content'] if 'content' in data else None
+            infoId = data["id"] if "id" in data else None
+            level2 = ssyw_runner.pred(
+                title=title,
+                content=content
+            ).strip()
+            result_one.append({
+                "id": infoId,
+                'labels': level2
+            })
+        dict_result = {
+            'code': 200,
+            'message': "操作成功",
+            'result': result_one
+        }
+    except Exception as e:
+        dict_result = {
+            'code': 500,
+            'success': 'false',
+            'message': "操作失败" + str(e),
+            'result': None
+        }
+
+    app.logger.info(dict_result)
+
+    return json.dumps(dict_result, ensure_ascii=False)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='')
+    parser.add_argument('-port', dest='port', help='', default=4005)
+    parser.add_argument('-host', dest='host', help='', default='0.0.0.0')
+
+    # 微服务参数
+    parser.add_argument('-model_path', dest='model_path', help='', default='')
+    parser.add_argument('-micro_server_port', dest='micro_server_port', help='', default=None)
+
+    args = parser.parse_args()
+    if args.model_path and args.micro_server_port:
+        model_path = os.path.join(args.model_path, "model.bin")
+        ssyw_runner = FastTextRunner(config_path=pred_config_path, model_path=model_path)
+        app.run(host=args.host,
+                port=int(args.micro_server_port)
+                )
+    else:
+        ssyw_runner = FastTextRunner(config_path=pred_config_path)
+        app.run(host=args.host,
+                port=int(args.port)
+                )
+
+
+
+
--- a/FastText-Model/app/detector_source.py
+++ b/FastText-Model/app/detector_source.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File    : 资源检测程序.py
+# @Time    : 2022/9/30 10:39
+# @Author  : bruxelles_li
+# @Software: PyCharm
+import logging
+import os, time, re, subprocess
+
+
+# 获取CPU负载信息
+def get_cpu():
+    last_worktime = 0
+    last_idletime = 0
+    f = open("/proc/stat", "r")
+    line = ""
+    while not "cpu " in line: line = f.readline()
+    f.close()
+    spl = line.split(" ")
+    worktime = int(spl[2]) + int(spl[3]) + int(spl[4])
+    idletime = int(spl[5])
+    dworktime = (worktime - last_worktime)
+    didletime = (idletime - last_idletime)
+    rate = float(dworktime) / (didletime + dworktime)
+    last_worktime = worktime
+    last_idletime = idletime
+    if (last_worktime == 0): return 0
+    return rate
+
+
+# 获取内存负载信息
+def get_mem_usage_percent():
+    try:
+        f = open('/proc/meminfo', 'r')
+        for line in f:
+            if line.startswith('MemTotal:'):
+                mem_total = int(line.split()[1])
+            elif line.startswith('MemFree:'):
+                mem_free = int(line.split()[1])
+            elif line.startswith('Buffers:'):
+                mem_buffer = int(line.split()[1])
+            elif line.startswith('Cached:'):
+                mem_cache = int(line.split()[1])
+            elif line.startswith('SwapTotal:'):
+                vmem_total = int(line.split()[1])
+            elif line.startswith('SwapFree:'):
+                vmem_free = int(line.split()[1])
+            else:
+                continue
+        f.close()
+    except:
+        return None
+    physical_percent = usage_percent(mem_total - (mem_free + mem_buffer + mem_cache), mem_total)
+    virtual_percent = 0
+    if vmem_total > 0:
+        virtual_percent = usage_percent((vmem_total - vmem_free), vmem_total)
+    return physical_percent, virtual_percent
+
+
+def usage_percent(use, total):
+    try:
+        ret = (float(use) / total) * 100
+    except ZeroDivisionError:
+        raise Exception("ERROR - zero division error")
+    return ret
+
+
+# 获取磁盘根目录占用信息
+def disk_info():
+    statvfs = os.statvfs('/')       # 根目录信息 可根据情况修改
+    total_disk_space = statvfs.f_frsize * statvfs.f_blocks
+    free_disk_space = statvfs.f_frsize * statvfs.f_bfree
+    disk_usage = (total_disk_space - free_disk_space) * 100.0 / total_disk_space
+    disk_usage = int(disk_usage)
+    # disk_tip = "硬盘空间使用率（最大100%）：" + str(disk_usage) + "%"
+    # print(str(disk_usage))
+    return str(disk_usage)
+
+
+# 获取内存占用信息
+def mem_info():
+    mem_usage = get_mem_usage_percent()
+    mem_usage = int(mem_usage[0])
+    # mem_tip = "物理内存使用率（最大100%）：" + str(mem_usage) + "%"
+    # print(str(mem_usage))
+    return str(mem_usage)
+
+
+# 获取CPU占用信息
+def cpu_info():
+    cpu_usage = int(get_cpu() * 100)
+    # cpu_tip = "CPU使用率（最大100%）：" + str(cpu_usage) + "%"
+    # print(str(cpu_usage))
+    return str(cpu_usage)
+
+
+# 获取系统占用信息
+def sys_info():
+    load_average = os.getloadavg()
+    # print(len(load_average))
+    # load_tip = "系统负载（三个数值中有一个超过3就是高）：" + str(load_average)
+    return len(load_average)
+
+
+# 获取计算机当前时间
+def time_info():
+    now_time = time.strftime('%Y-%m-%d %H:%M:%S')
+    return "主机的当前时间：%s" % now_time
+
+
+# 获取计算机主机名称
+def hostname_info():
+    hostnames = os.popen("hostname").read().strip()
+    return "你的主机名是: %s" % hostnames
+
+
+# 获取IP地址信息
+def ip_info():
+    ipadd = os.popen("ip a| grep ens192 | grep inet | awk '{print $2}'").read().strip()
+    return ipadd
+
+
+# 获取根的占用信息
+def disk_info_root():
+    child = subprocess.Popen(["df", "-h"], stdout=subprocess.PIPE)
+    out = child.stdout.readlines()
+
+    for item in out:
+        line = item.strip().split()
+        # 我这里只查看centos的根
+        if '/dev/mapper/centos-root' in line:
+            title = [u'-文件系统-', u'--容量-', u'-已用-', u'-可用-', u'-已用-', u'-挂载点--']
+            content = "\t".join(title)
+            if eval(line[4][0:-1]) > 60:
+                line[0] = 'centos-root'
+                content += '\r\n' + '\t'.join(line)
+                return content
+
+
+# 测试程序
+# if __name__ == "__main__":
+#     disk_information = disk_info()
+#     disk_usage = [int(s) for s in re.findall(r'\b\d+\b', disk_information)]
+#     infomation = [hostname_info(), time_info(), disk_information]
+    # print(disk_usage)
+    # # 如果磁盘占用高于60%就发邮件告警
+    # if disk_usage[0] > 60:
+    #     print("当前磁盘占用率已超过60%，建议清除磁盘内存！")
+    #
+    # # print(hostname_info())
+    # # print(time_info())
+    # # print(ip_info())
+    # print(sys_info())
+    # print(cpu_info())
+    # print(mem_info())
+    # print(disk_info())
--- a/FastText-Model/app/main_server.py
+++ b/FastText-Model/app/main_server.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File    : main_server.py
+# @Time    : 2023/3/31 10:31
+# @Author  : bruxelles_li
+# @Software: PyCharm
+import logging
+import requests
+import threading
+import sys
+import time, os
+import json
+import pandas as pd
+import glob
+from pathlib import Path
+sys.path.append('../')
+# 关闭多余连接
+s = requests.session()
+s.keep_alive = False
+from classification.runner.runner_fast_text import FastTextRunner_train
+from detector_source import sys_info, cpu_info, mem_info
+from classification.data.data_process import pro_data
+# 定义日志输出格式
+formatter = logging.Formatter("%(asctime)s [%(levelname)s] <%(processName)s> (%(threadName)s) %(message)s")
+# 创建一个logger, 并设置日志级别
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+# 创建一个handler，用于将日志输出到控制台，并设置日志级别
+ch = logging.StreamHandler()
+ch.setLevel(logging.INFO)
+ch.setFormatter(formatter)
+# 创建一个filehandler，用于将错误日志输出到文件，并设置日志级别
+_tmp_path = os.path.dirname(os.path.abspath(__file__))
+# print(_tmp_path)
+_tmp_path = os.path.join(_tmp_path, 'log')
+Path(_tmp_path).mkdir(parents=True, exist_ok=True)
+fh = logging.FileHandler(os.path.join(_tmp_path, "main_server_error.log"))
+fh1 = logging.FileHandler(os.path.join(_tmp_path, "main_server_info.log"))
+fh.setLevel(level=logging.ERROR)
+fh1.setLevel(level=logging.INFO)
+fh.setFormatter(formatter)
+fh1.setFormatter(formatter)
+# 同时将日志输出到控制台和文件
+logger.addHandler(ch)
+logger.addHandler(fh)
+logger.addHandler(fh1)
+# 定义训练配置文件
+train_config_path = '../classification/config/fasttext_config_train.yml'
+
+# todo: 定义处理数据相关路径
+root_path = r'../word2vec/doc_similarity/'
+stop_words_path = os.path.join(root_path, 'stop_words.txt')
+save_data_path = r'../datasets/classification/{}/{}/{}.txt'
+file_types = ['xls', 'xlsx']
+# 加载java回调接口
+java_call_back_url = "http://192.168.1.82:9988/manage/algorithmModel/process/changeStatus"
+# 加载端口号
+port = 4005
+modelName = "FastText-Model"
+
+# TODO: 定义进程存放列表
+all_thread = []
+
+
+def merge_df(dataset_path):
+    all_files = []
+    for file_type in file_types:
+        all_files.extend(glob.glob(os.path.join(dataset_path, f'*.{file_type}')))
+
+    # 将所有文件合并到一个DataFrame中
+    combined_df = pd.concat([pd.read_excel(f) for f in all_files], ignore_index=True)
+    # 去除重复行
+    combined_df.drop_duplicates(keep='first', inplace=True)
+    return combined_df
+
+
+def train_model4FastText(data_path, model_path, modelProcessId, root_dataset):
+    """
+    train
+    :return:
+    """
+    combined_df = merge_df(dataset_path=root_dataset)
+    # 预处理数据
+    pro_data(dataFolderName=data_path, data_df=combined_df, stop_words_path=stop_words_path,
+             save_data_path=save_data_path, modelName=modelName)
+    logger.info("====数据预处理成功，准备进入训练阶段===")
+    # 进入训练
+    runner_train = FastTextRunner_train(config_path=train_config_path, model_train=True)
+    runner_train.train(data_path=data_path, model_path=model_path, auto_tune_duration=300)
+    dict_result = runner_train.test(data_path=data_path, model_path=model_path)
+    str_dict_result = json.dumps(dict_result, ensure_ascii=False)
+    logger.info(str_dict_result)
+    # todo: 调用java的状态更新接口返回训练后的结果
+    payload = json.dumps({
+        "id": modelProcessId,
+        "result": str_dict_result
+    })
+    # todo: 调用接口访问实施生成参数函数来生成currentTime, appId
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    r1 = requests.post(url=f"{java_call_back_url}",
+                       headers=headers, data=payload)
+
+    r1_json = json.loads(r1.text)
+    # print(r1_json)
+    logger.info(r1_json)
+    return str_dict_result
+
+
+def env_eval(modelProcessId):
+    # todo 获取资源相关信息(磁盘占用率、系统占用信息【超过3个为高】、CPU占用率、物理内存占用率)
+    # disk_usage = disk_info()
+    sys_usage = sys_info()
+    cpu_usage = cpu_info()
+    men_usage = mem_info()
+    # todo 资源不够用时，返回 False
+    if sys_usage > 10000 or cpu_usage > str(95) or men_usage > str(95):
+        # todo: 调用java的状态更新接口提示资源占用过高的结果
+        str_dict_result = {
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '模型训练失败！当前模型训练资源占用率过高，请检查系统占用信息【超过10个为高】、CPU占用率【超过85%为高】、物理内存占用率【超过85%为高】',
+            'resultData': None
+        }
+        logger.info(str_dict_result)
+        payload = json.dumps({
+            "id": modelProcessId,
+            "result": str_dict_result
+        })
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        r1 = requests.post(
+            url=f"{java_call_back_url}",
+            headers=headers, data=payload)
+
+        r1_json = json.loads(r1.text)
+        # print(r1_json)
+        logger.info(r1_json)
+        return False
+    # todo 资源够用时，返回 True
+    return True
+
+
+def system_start():
+    while True:
+        # print("=====正在进行训练服务=====")
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        r1 = requests.post(url=f'http://localhost:{int(port)}/queue_size', headers=headers)
+        r1_json = json.loads(r1.text)
+        # print(r1_json)
+        queue_left_number = r1_json['queue_left_number']
+        logger.info("当前队列任务总数：" + str(queue_left_number))
+        if queue_left_number == 0:
+            # logger.warning("队列为空！无可处理任务。")
+            time.sleep(30)
+        else:
+            for i in range(queue_left_number):
+                r2 = requests.post(url=f'http://localhost:{int(port)}/subject_consumer', headers=headers)
+                r2_json = json.loads(r2.text)
+                config_info = r2_json['data']
+                logger.info(config_info)
+                modelProcessId = config_info["modelProcessId"]
+                model_path = config_info["model_path"]
+                data_path = config_info["data_path"]
+                root_dataset = config_info["root_dataset"]
+                logger.info('##########FastText-Model###############')
+                t = threading.Thread(target=train_model4FastText,
+                                     args=(data_path, model_path, modelProcessId, root_dataset),
+                                     daemon=True)
+                while True:
+                    if env_eval(modelProcessId):
+                        break
+                    else:
+                        time.sleep(600)
+                # 启动
+                t.start()
+                all_thread.append(t)
+
+
+def system_resume():
+    """
+    恢复模型训练服务状态
+    :return:
+    """
+
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    # 清空当前服务中的队列，避免重复启动同一个模型训练
+    r1 = requests.post(url=f'http://localhost:{int(port)}/queue_size', headers=headers)
+    r1_json = r1.json()
+    logger.info('当前队列数量：%d' % r1_json['queue_left_number'])
+    if r1_json['queue_left_number'] > 0:
+        logger.info('正在消费队列，直到队列为空！')
+        while True:
+            r2 = requests.post(url=f'http://localhost:{int(port)}/subject_consumer', headers=headers)
+            r2_json = r2.json()
+            if r2_json['queue_left_number'] == 0:
+                logger.info('队列消费完毕！可放心进行模型训练 ...')
+                break
+    else:
+        logger.info('队列为空！可放心进行模型训练 ...')
+
+
+def start_up_check():
+    """
+    启动前检查
+    :return:
+    """
+    while True:
+        try:
+            headers = {
+                'Content-Type': 'application/json'
+            }
+            r0 = requests.post(url=f'http://localhost:{int(port)}/queue_size', headers=headers)
+            server_started = True
+        except requests.exceptions.ConnectionError as e:
+            server_started = False
+            logger.error("Error: ConnectionError")
+            logger.warning('服务未启动，请先启动server! 程序已退出。')
+            exit(123)
+            # logger.info('server正在尝试自启 ...')
+            # time.sleep(3)
+        if server_started:
+            logger.info("server启动成功！模型训练服务已启动...")
+            break
+
+
+if __name__ == '__main__':
+    # root_path = "../datasets/classification/zcjd_column_classify/zcjd_V0"
+    # data_df = merge_df(root_path)
+    # print(len(data_df))
+    # print(data_df)
+    # 开始启动模型训练服务
+    start_up_check()
+    logger.info('模型训练服务恢复中 ...')
+    system_resume()
+    time.sleep(30)
+    logger.info('模型训练服务恢复完成！')
+    logger.info('模型训练服务运行中 ...')
+    system_start()
+
--- a/FastText-Model/base/__init__.py
+++ b/FastText-Model/base/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/7/31 10:21
--- a/FastText-Model/base/app/__init__.py
+++ b/FastText-Model/base/app/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/21 9:30
--- a/FastText-Model/base/app/base_app.py
+++ b/FastText-Model/base/app/base_app.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_app
+# @Author   : LiuYan
+# @Time     : 2021/4/21 9:30
+
+import json
+
+from flask import Flask, Blueprint, request
+
+from utils.log import logger
+
+app = Flask(__name__)
--- a/FastText-Model/base/config/__init__.py
+++ b/FastText-Model/base/config/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/16 18:03
--- a/FastText-Model/base/config/base_config.py
+++ b/FastText-Model/base/config/base_config.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_config
+# @Author   : LiuYan
+# @Time     : 2021/4/16 18:06
+
+import os
+import pymysql
+
+from abc import abstractmethod, ABC
+
+# root_dir = '/data/lzc/zzsn_nlp_br'
+# root_dir = '/data/lzc'
+root_dir = '..'  # deploy
+db_config = {
+    'host': os.environ.get('brpa_tidb_host') if 'brpa_tidb_host' in os.environ else None,
+    'port': int(os.environ.get('brpa_tidb_port')) if 'brpa_tidb_port' in os.environ else None,
+    'user': os.environ.get('brpa_tidb_user') if 'brpa_tidb_user' in os.environ else None,
+    'password': os.environ.get('brpa_tidb_password') if 'brpa_tidb_password' in os.environ else None,
+    'database': os.environ.get('brpa_tidb_database') if 'brpa_tidb_database' in os.environ else None,
+    'charset': 'utf8mb4',
+    'cursorclass': pymysql.cursors.DictCursor
+}
+
+
+class BaseConfig(ABC):
+    @abstractmethod
+    def __init__(self):
+        super(BaseConfig, self).__init__()
+
+    @abstractmethod
+    def load_config(self):
+        """
+        Add the config you need.
+        :return: config(YamlDict)
+        """
+        pass
--- a/FastText-Model/base/config/base_config.yml
+++ b/FastText-Model/base/config/base_config.yml
+home:
+  dir: '/data/lzc'
+
+# Please set the GPU or CPU to be used for your model training in the LoadConfig object
+device: "cuda:0"
+
+# shared for multiple projects in this machine, raw data, read only
+data:
+  # base: '/data'
+  base: 'd:/data'
--- a/FastText-Model/base/data/__init__.py
+++ b/FastText-Model/base/data/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/16 18:03
--- a/FastText-Model/base/data/base_data_loader.py
+++ b/FastText-Model/base/data/base_data_loader.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_data_loader
+# @Author   : LiuYan
+# @Time     : 2021/4/19 9:37
+
+from abc import ABC, abstractmethod
+
+
+class BaseDataLoader(ABC):
+    @abstractmethod
+    def __init__(self):
+        super(BaseDataLoader, self).__init__()
+
+    @abstractmethod
+    def _load_data(self):
+        """
+        load raw data according to data config
+        :return:
+        """
+        pass
+
+    @abstractmethod
+    def load_train(self):
+        pass
+
+    @abstractmethod
+    def load_valid(self):
+        pass
+
+    @abstractmethod
+    def load_test(self):
+        pass
+
+    pass
--- a/FastText-Model/base/data/base_data_process.py
+++ b/FastText-Model/base/data/base_data_process.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_data_process
+# @Author   : LiuYan
+# @Time     : 2021/4/19 9:37
+
+from abc import ABC, abstractmethod
+
+
+class BaseDataProcess(ABC):
+    """
+    data processing
+    """
+
+    @abstractmethod
+    def __init__(self):
+        super(BaseDataProcess, self).__init__()
+
+    @abstractmethod
+    def process(self):
+        pass
--- a/FastText-Model/base/data/base_data_reader.py
+++ b/FastText-Model/base/data/base_data_reader.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_data_reader
+# @Author   : LiuYan
+# @Time     : 2021/4/19 9:37
+
+from abc import ABC, abstractmethod
+
+
+class BaseDataReader(ABC):
+    @abstractmethod
+    def __init__(self):
+        super(BaseDataReader, self).__init__()
+
+    @abstractmethod
+    def reade(self):
+        pass
+
+    @abstractmethod
+    def save(self):
+        pass
--- a/FastText-Model/base/evaluation/__init__.py
+++ b/FastText-Model/base/evaluation/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/16 18:04
--- a/FastText-Model/base/evaluation/base_evaluator.py
+++ b/FastText-Model/base/evaluation/base_evaluator.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_evaluator
+# @Author   : LiuYan
+# @Time     : 2021/4/19 10:39
+
+from abc import ABC, abstractmethod
+
+
+class BaseEvaluator(ABC):
+    @abstractmethod
+    def __init__(self):
+        super(BaseEvaluator, self).__init__()
+
+    @abstractmethod
+    def evaluate(self, dict_inputs: dict) -> tuple:
+        pass
--- a/FastText-Model/base/loss/__init__.py
+++ b/FastText-Model/base/loss/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/16 18:04
--- a/FastText-Model/base/loss/base_loss.py
+++ b/FastText-Model/base/loss/base_loss.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_loss
+# @Author   : LiuYan
+# @Time     : 2021/4/19 10:41
+
+from abc import abstractmethod
+import torch.nn as nn
+
+
+class BaseLoss(nn.Module):
+
+    def __init__(self, loss_config):
+        super(BaseLoss, self).__init__()
+        self._config = loss_config
+
+    @abstractmethod
+    def forward(self, dict_outputs: dict) -> dict:
+        pass
--- a/FastText-Model/base/model/__init__.py
+++ b/FastText-Model/base/model/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/16 18:04
--- a/FastText-Model/base/model/base_model.py
+++ b/FastText-Model/base/model/base_model.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_model
+# @Author   : LiuYan
+# @Time     : 2021/4/19 10:42
+
+from abc import ABC, abstractmethod
+import torch.nn as nn
+
+
+class BaseModel(nn.Module, ABC):
+
+    def __init__(self):
+        super(BaseModel, self).__init__()
+
+    @abstractmethod
+    def forward(self, dict_inputs: dict) -> dict:
+        pass
--- a/FastText-Model/base/runner/__init__.py
+++ b/FastText-Model/base/runner/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/16 18:04
--- a/FastText-Model/base/runner/base_runner.py
+++ b/FastText-Model/base/runner/base_runner.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : base_runner
+# @Author   : LiuYan
+# @Time     : 2021/4/19 10:42
+
+from abc import ABC, abstractmethod
+
+from utils.utils import timeit
+
+
+class BaseRunner(ABC):
+    """
+    Abstract definition for runner
+    """
+
+    @abstractmethod
+    def __init__(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_config(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_data(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_model(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_loss(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_optimizer(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_evaluator(self):
+        pass
+
+    @abstractmethod
+    def train(self):
+        pass
+
+    @abstractmethod
+    def _train_epoch(self, epoch: int):
+        pass
+
+    @abstractmethod
+    def _valid(self, epoch: int):
+        pass
+
+    @abstractmethod
+    def test(self):
+        pass
+
+    @abstractmethod
+    def pred(self, title: str, content: str) -> str or dict:
+        pass
+
+    @abstractmethod
+    def _display_result(self, dict_result: dict):
+        pass
+
+    @abstractmethod
+    def _save_model(self):
+        pass
+
+    @abstractmethod
+    def _load_model(self):
+        pass
+
+
+class train_BaseRunner(ABC):
+    """
+    Abstract definition for runner
+    """
+
+    @abstractmethod
+    def __init__(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_config(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_data(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_model(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_loss(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_optimizer(self):
+        pass
+
+    @timeit
+    @abstractmethod
+    def _build_evaluator(self):
+        pass
+
+    @abstractmethod
+    def train(self):
+        pass
+
+    @abstractmethod
+    def _train_epoch(self, epoch: int):
+        pass
+
+    @abstractmethod
+    def _valid(self, data_path, model_path, epoch: int):
+        pass
+
+    @abstractmethod
+    def test(self):
+        pass
+
+    @abstractmethod
+    def pred(self, title: str, content: str) -> str or dict:
+        pass
+
+    @abstractmethod
+    def _display_result(self, dict_result: dict):
+        pass
+
+    @abstractmethod
+    def _save_model(self, model_path):
+        pass
+
+    @abstractmethod
+    def _load_model(self):
+        pass
\ No newline at end of file
--- a/FastText-Model/classification/__init__.py
+++ b/FastText-Model/classification/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/7/31 17:24
--- a/FastText-Model/classification/app/__init__.py
+++ b/FastText-Model/classification/app/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/21 9:59
--- a/FastText-Model/classification/config/__init__.py
+++ b/FastText-Model/classification/config/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/15 10:31
--- a/FastText-Model/classification/config/config_fast_text.py
+++ b/FastText-Model/classification/config/config_fast_text.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : fast_text_config
+# @Author   : LiuYan
+# @Time     : 2021/4/19 10:46
+
+import dynamic_yaml
+import torch
+
+from base.config.base_config import BaseConfig
+
+
+class FastTextConfig(BaseConfig):
+    def __init__(self, config_path):
+        super(FastTextConfig, self).__init__()
+        self._config_path = config_path
+        pass
+
+    def load_config(self):
+        with open(self._config_path, mode='r', encoding='UTF-8') as f:
+            config = dynamic_yaml.load(f)
+        config.device = torch.device(config.device if torch.cuda.is_available() else 'cpu')
+        return config
--- a/FastText-Model/classification/config/fasttext_config_pred.yml
+++ b/FastText-Model/classification/config/fasttext_config_pred.yml
+home:
+#  dir: '/home/zzsn/liuyan'  # train or test
+  dir: '../../..'              # deploy
+
+# Shared for multiple modules in the project
+project:
+  name: 'platform_project'
+  dir:
+    work: '{home.dir}/{project.name}'
+
+# Please set the GPU or CPU to be used for your model training in the LoadConfig object
+device: 'cpu'
+
+status: 'pred'   # pred / test / train
+
+# shared for multiple projects in this machine, raw data, read only
+data:
+  dir: ''
+  name: 'FastText-Model'
+  num_vocab: ~
+  num_tag: ~
+
+model:
+  name: 'Origin-Model'
+
+loss:
+  name: 'ft_loss'
+
+learn:
+  time: '2023_03_31-12_15_17'
+  dir:
+    work: '{home.dir}/model_saved/classification/{data.name}'
+    logs: '{learn.dir.work}/log'
+    saved: '{learn.dir.work}/{model.name}'
+    result: '{learn.dir.work}/data/result'
+    # save_model: '{learn.dir.saved}-{learn.time}/model.bin'
+    load_model: '{learn.dir.saved}-{learn.time}/model.bin'
--- a/FastText-Model/classification/config/fasttext_config_train.yml
+++ b/FastText-Model/classification/config/fasttext_config_train.yml
+home:
+#  dir: '/data/lzc'  # train or test
+  dir: '../../..'              # deploy
+
+# Shared for multiple modules in the project
+project:
+  name: 'platform_project'
+  dir:
+    work: '{home.dir}/{project.name}'
+
+# Please set the GPU or CPU to be used for your model training in the LoadConfig object
+#device: 'cpu'
+device: 'cuda:0'
+
+status: 'train'   # pred / test / train
+
+# shared for multiple projects in this machine, raw data, read only
+data:
+  dir: '../datasets/classification'
+  name: 'FastText-Model'
+  path0: '{data.dir}/{data.name}%s'
+
+  train_path: '{data.dir}/{data.name}%s/train.txt'
+  valid_path: '{data.dir}/{data.name}%s/valid.txt'
+  test_path: '{data.dir}/{data.name}%s/valid.txt'
+
+  batch_size: 4
+  num_vocab: ~
+  num_tag: ~
+
+model:
+  name: 'Origin-Model'
+
+loss:
+  name: 'ft_loss'
+
+learn:
+  time: '2023_03_31-12_15_17'
+  dir:
+    work: '{home.dir}/model_saved/classification'
+    logs: '{learn.dir.work}/log'
+    saved0: '{learn.dir.work}%s'
+    saved: '{learn.dir.work}/{data.name}%s'
+    result: '{learn.dir.work}/data/result'
+    # save_model: '{learn.dir.saved}-{learn.time}/model.bin'
+    load_model: '{learn.dir.saved}-{learn.time}/model.bin'
--- a/FastText-Model/classification/data/__init__.py
+++ b/FastText-Model/classification/data/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/15 10:31
--- a/FastText-Model/classification/data/data_process.py
+++ b/FastText-Model/classification/data/data_process.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : data_process
+# @Author   : bruxellse_li
+# @Time     : 2023/3/31 08:39
+
+import os
+import pandas as pd
+import sys
+
+from pathlib import Path
+from pandas import DataFrame
+from sklearn.model_selection import train_test_split
+# 追加工作路径
+sys.path.append('../../')
+from classification.utils.utils import *
+
+
+def process_txt(data_loader: DataFrame, train_file_path: str, valid_file_path: str, stop_words_path:str):
+    articles = data_loader['article']
+    labels = data_loader['label']
+
+    article_list = []
+    for article, label in zip(articles, labels):
+        if type(article) is str:
+            text = article.replace('\n', '').replace('\r', '').replace('\t', '')
+        else:
+            print('{} is not str!'.format(article))
+            continue
+        text = seg(text=text, sw=stop_words(path=stop_words_path))
+        text = '__label__{} {}'.format(label, text)
+        article_list.append(text)
+
+    train_data, valid_data = train_test_split(
+        article_list, train_size=0.8, random_state=2021, shuffle=True
+    )
+    with open(
+        train_file_path, 'w', encoding='utf-8'
+    ) as train_file, open(
+        valid_file_path, 'w', encoding='utf-8'
+    ) as valid_file:
+        for train in train_data:
+            train_file.write(train + '\n')
+        for valid in valid_data:
+            valid_file.write(valid + '\n')
+    pass
+
+
+def process(data_loader, train_file_path: str, valid_file_path: str, stop_words_path: str):
+    # 创建语料路径
+    # Path(os.path.abspath(os.path.join(train_file_path, os.path.pardir))).mkdir(parents=True, exist_ok=True)
+
+    # data_loader = pd.read_excel(path, keep_default_na=False).astype(str)
+    data_loader['article'] = data_loader['title'] + '。' + data_loader['content']
+    data_loader['article'] = data_loader.article.apply(clean_tag).apply(clean_txt)
+    process_txt(
+        data_loader=data_loader,
+        train_file_path=train_file_path,
+        valid_file_path=valid_file_path,
+        stop_words_path=stop_words_path
+    )
+    return None
+
+
+# 语料处理函数定义
+def pro_data(modelName, dataFolderName, data_df, stop_words_path, save_data_path):
+    # save_data_path = '/home/python/lzc/datasets/classification/{}/{}/{}.txt'
+    process(
+        data_loader=data_df,
+        train_file_path=save_data_path.format(modelName, dataFolderName, 'train'),
+        valid_file_path=save_data_path.format(modelName, dataFolderName, 'valid'),
+        stop_words_path=stop_words_path
+    )
+    return None
+
+
+if __name__ == '__main__':
+    modelName, dataFolderName, data_path = "gzdt_dataset", "gzdt_V1", "../../datasets/Receive_File/测试数据.xlsx"
+    save_data_path = r'../../datasets/classification/{}/{}/{}.txt'
+    root_path = r'../../word2vec/doc_similarity/'
+    stop_words_path = os.path.join(root_path, 'stop_words.txt')
+    pro_data(modelName, dataFolderName, data_path, stop_words_path, save_data_path)
+    # date = '20230329'
+    # path = '../datasets/{}_total_{}.xlsx'
+    #
+    # save_data_path = '/home/zzsn/liuyan/datasets/the_belt_and_road/classification/{}/{}_{}.txt'
+    # # 机械舆情 时事要闻栏目分类
+    # ssyw_name = 'ssyw_column_classify'
+    # # 机械舆情 国资动态栏目分类
+    # gzdt_name = 'gzdt_column_classify'
+    # # 机械舆情 上下游栏目分类
+    # sxy_name = 'sxy_column_classify'
+    # # 机械舆情 行业舆情栏目分类
+    # hyyq_name = 'hyyq_column_classify'
+    # # 机械舆情 管理动态栏目分类
+    # gldt_name = 'gldt_column_classify'
+    # # 机械舆情 龙头企业栏目分类
+    # ltqy_name = 'ltqy_column_classify'
+    # # 机械舆情 新兴领域栏目分类
+    # xxly_name = 'xxly_column_classify'
+    # # 机械舆情 综合资讯栏目分类
+    # zhzx_name = 'zhzx_column_classify'
+    # # 机械舆情 负面舆情栏目分类
+    # fmyq_name = 'fmyq_column_classify'
+    #
+    # process(
+    #     path=path.format(gzdt_name, date),
+    #     train_file_path=save_data_path.format(gzdt_name, 'train', date),
+    #     valid_file_path=save_data_path.format(gzdt_name, 'valid', date)
+    # )
+    # pass
--- a/FastText-Model/classification/data/data_stats.py
+++ b/FastText-Model/classification/data/data_stats.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : data_stats
+# @Author   : LiuYan
+# @Time     : 2021/4/15 16:52
+
+import pandas as pd
+
+from collections import Counter
+
+
+if __name__ == '__main__':
+    pass
--- a/FastText-Model/classification/evaluation/__init__.py
+++ b/FastText-Model/classification/evaluation/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/15 10:33
--- a/FastText-Model/classification/evaluation/classify_evaluator.py
+++ b/FastText-Model/classification/evaluation/classify_evaluator.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : eval_classification
+# @Author   : LiuYan
+# @Time     : 2021/4/20 21:19
+
+from collections import Counter
+from sklearn.metrics import precision_score, recall_score, f1_score
+
+from base.evaluation.base_evaluator import BaseEvaluator
+
+
+class ClassifyEvaluator(BaseEvaluator):
+
+    # def __init__(self, label_dict: dict):
+    def __init__(self):
+        super(ClassifyEvaluator, self).__init__()
+        # self._label_dict = label_dict
+        # self._count_dict = {'TP': 0}
+        pass
+
+    def evaluate(self, true_list: list, pred_list: list) -> tuple:
+        dict_result = {}
+        true_labels = Counter(true_list)
+        pred_labels = Counter(pred_list)
+        print(true_labels)
+        print(pred_labels)
+        for true_label in true_labels:
+            # print(true_labels[true_label], pred_labels[true_label])
+            dict_result[true_label] = {
+                'precision': 0,
+                'recall': 0,
+                'f1-score': 0,
+                'true_num': 0,
+                'pred_num': pred_labels[true_label],
+                'total_num': true_labels[true_label]
+            }
+
+        for true, pred in zip(true_list, pred_list):
+            if true == pred:
+                dict_result[true]['true_num'] += 1
+
+        print('\n' + ''.join('-' for i in range(89)))
+        print('label_type\t\t\tp\t\t\tr\t\t\tf1\t\t\ttrue_num\t\t\tpred_num\ttotal_num')
+        string = '{0}{1:<12.4f}{2:<12.4f}{3:<12.4f}{4:<12}{5:<12}{6:<12}'
+        true_nums, pred_nums, total_nums = 0, 0, 0
+        for label_type in dict_result:
+            true_nums += dict_result[label_type]['true_num']
+            pred_nums += dict_result[label_type]['pred_num']
+            total_nums += dict_result[label_type]['total_num']
+            p = dict_result[label_type]['true_num'] / dict_result[label_type]['pred_num'] if dict_result[label_type]['pred_num'] != 0 else 0
+            r = dict_result[label_type]['true_num'] / dict_result[label_type]['total_num'] if dict_result[label_type]['total_num'] != 0 else 0
+            f1 = 2 * p * r / (p + r) if p + r != 0 else 0
+            chunk_type_out = label_type + ''.join(
+                ' ' for i in range(20 - (((len(label_type.encode('utf-8')) - len(label_type)) // 2) + len(label_type)))
+            )
+            print(string.format(chunk_type_out, p, r, f1, dict_result[label_type]['true_num'],
+                             dict_result[label_type]['pred_num'], dict_result[label_type]['total_num']), chr(12288))
+            dict_result[label_type]['precision'] = p
+            dict_result[label_type]['recall'] = r
+            dict_result[label_type]['f1-score'] = f1
+        p = true_nums / pred_nums if pred_nums != 0 else 0
+        r = true_nums / total_nums if total_nums != 0 else 0
+        f1 = 2 * p * r / (p + r) if p + r != 0 else 0
+        print(string.format('average{}'.format(''.join(' ' for i in range(13))), p, r, f1,
+                            true_nums, pred_nums, total_nums), chr(12288))
+        print(''.join('-' for i in range(89)) + '\n')
+        dict_result['average'] = {
+            'precision': p,
+            'recall': r,
+            'f1-score': f1,
+            'true_num': true_nums,
+            'pred_num': pred_nums,
+            'total_num': total_nums
+        }
+
+        return p, r, f1, dict_result
--- a/FastText-Model/classification/model/__init__.py
+++ b/FastText-Model/classification/model/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/8/2 15:47
--- a/FastText-Model/classification/runner/__init__.py
+++ b/FastText-Model/classification/runner/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/15 10:31
--- a/FastText-Model/classification/runner/runner_fast_text.py
+++ b/FastText-Model/classification/runner/runner_fast_text.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : runner_fast_text
+# @Author   : LiuYan
+# @Time     : 2021/4/15 16:44
+
+import os
+import sys
+import time
+import json
+import warnings
+import fasttext
+import pandas as pd
+
+from pathlib import Path
+
+sys.path.append('../../')
+from utils.utils import timeit
+from base.runner.base_runner import BaseRunner, train_BaseRunner
+from classification.config.config_fast_text import FastTextConfig
+from classification.evaluation.classify_evaluator import ClassifyEvaluator
+from classification.utils.utils import *
+warnings.filterwarnings('ignore')
+fasttext.FastText.eprint = lambda x: None
+
+
+class FastTextRunner_train(train_BaseRunner):
+    def __init__(self, config_path: str, model_train=False, model_path=None):
+        super(FastTextRunner_train, self).__init__()
+        self._config_path = config_path
+        self._config = None
+
+        self._time = time.strftime('%Y_%m_%d-%H_%M_%S')
+        self._model_train = model_train
+        self._model_path = model_path
+
+        self._train_dataloader = None
+        self._valid_dataloader = None
+        self._test_dataloader = None
+
+        self._model = None
+        self._loss = None
+        self._optimizer = None
+
+        self._evaluator = None
+        self._build()
+
+    @timeit
+    def _build(self):
+        self._build_config()
+        # self._time = self._config.learn.time
+        self._build_data()
+        self._build_model()
+        self._build_loss()
+        self._build_optimizer()
+        self._build_evaluator()
+        pass
+
+    @timeit
+    def _build_config(self):
+        self._config = FastTextConfig(config_path=self._config_path).load_config()
+        pass
+
+    @timeit
+    def _build_data(self):
+        if self._config.status in ['train', 'test'] or self._model_train:
+            self._train_path = self._config.data.train_path
+            self._valid_path = self._config.data.valid_path
+            self._test_path = self._config.data.test_path
+        else:
+            self._stop_words = stop_words(
+                path=r'../../word2vec/f_zp_gp/stop_words.txt'
+            )
+            # self._stop_words = stop_words(
+            #     path=os.path.join(self._config.home.dir, 'word2vec/f_zp_gp/stop_words.txt')
+            # )
+        pass
+
+    @timeit
+    def _build_model(self):
+        if self._model_path:
+            self._config.learn.dir.load_model = self._model_path
+        if self._config.status in ['test', 'pred'] and not self._model_train:
+            self._load_model()
+        pass
+
+    @timeit
+    def _build_loss(self):
+        pass
+
+    @timeit
+    def _build_optimizer(self):
+        pass
+
+    @timeit
+    def _build_evaluator(self):
+        self._evaluator = ClassifyEvaluator()
+        pass
+
+    @timeit
+    def train(self, data_path, model_path, auto_tune_duration=500, auto_tune_model_size='200M'):
+        self._model = fasttext.train_supervised(
+            input=self._train_path % data_path, autotuneValidationFile=self._test_path % data_path,
+            autotuneDuration=auto_tune_duration, autotuneModelSize=auto_tune_model_size
+        )
+        self._save_model(model_path)
+        pass
+
+    def _train_epoch(self, epoch: int):
+        pass
+
+    def _valid(self, data_path, model_path, epoch: int) -> None or dict:
+        with open(self._valid_path % data_path, encoding='utf-8') as file:
+            self._valid_dataloader = file.readlines()
+        labels = []
+        pre_labels = []
+        for text in self._valid_dataloader:
+            label = text.replace('__label__', '').split(' ')[0]
+            labels.append(label)
+            text = text.replace('__label__', '')[1: -1]
+            # pred_labels, pred_pros = self._model.predict(text, k=2)
+            # for pred_label, pred_prob in zip(pred_labels, pred_pros):
+            #     print(pred_label, pred_prob)
+            pre_label = self._model.predict(text)[0][0].replace('__label__', '')
+            # print(pre_label, self._model.predict(text))
+            pre_labels.append(pre_label)
+
+        p, r, f1, dict_result = self._evaluator.evaluate(true_list=labels, pred_list=pre_labels)
+
+        if self._config.status == 'train' or self._model_train:
+            json_result = json.dumps(dict_result)
+            with open(self._config.learn.dir.saved % model_path + '-{}/evaluation_metrics.json'.format(self._time),
+                      'w', encoding='utf-8') as f:
+                f.write(json_result)
+        if self._model_train:
+            dict_result = {
+                'code': 200,
+                'result': '模型训练成功！模型评测指标为: precision: {:.0f}%  recall: {:.0f}%  f1-score: {:.0f}%'.format(
+                    dict_result['average']['precision'] * 100,
+                    dict_result['average']['recall'] * 100,
+                    dict_result['average']['f1-score'] * 100
+                ),
+                'model_path': self._config.learn.dir.saved % model_path + '-{}/model.bin'.format(self._time)
+            }
+            return dict_result
+
+    def test(self, data_path=None, model_path=None, title=None, content=None) -> None or dict:
+        if self._model_train:
+            return self._valid(data_path=data_path, model_path=model_path, epoch=100)
+        elif self._model_path:
+            with open(
+                    os.path.join(os.path.split(self._model_path)[0], 'evaluation_metrics.json'),
+                    'r', encoding='utf-8'
+            ) as f:
+                json_result = json.load(f)
+            evaluation_metrics = {
+                '精确率(P)': '{:.0f}%'.format(json_result['average']['precision'] * 100),
+                '召回率(R)': '{:.0f}%'.format(json_result['average']['recall'] * 100),
+                'F1值(F1)': '{:.0f}%'.format(json_result['average']['f1-score'] * 100)
+            }
+            result = self.pred(title=title, content=content)
+            dict_result = {
+                'handleMsg': 'success',
+                'code': 200,
+                'logs': '模型测试成功！',
+                'result': {
+                    'label': result,
+                    'evaluation_metrics': evaluation_metrics
+                }
+            } if type(result) == str else result
+            return dict_result
+        else:
+            self._valid(data_path=data_path, model_path=model_path, epoch=100)
+
+    def pred(self, title: str, content: str) -> str or dict:
+        text = (title + '。') * 2 + content
+        text = clean_txt(raw=clean_tag(text=text))
+        if type(text) is str:
+            text = text.replace('\n', '').replace('\r', '').replace('\t', '')
+        else:
+            return {
+                'handleMsg': 'failure',
+                'code': 300,
+                'logs': '{} is not str!'.format(text),
+                'result': {
+                    'label': None
+                }
+            }
+        text = seg(text=text, sw=self._stop_words)
+        pre_label = self._model.predict(text)[0][0].replace('__label__', '')
+
+        return pre_label
+
+    def pred_file(self, file_path: str, result_path: str) -> None or dict:
+        data_loader = pd.read_excel(file_path)
+        titles, contents = data_loader['title'], data_loader['content']
+        labels = []
+        for title, content in zip(titles, contents):
+            pred_result = self.pred(title, content)
+            if type(pred_result) == str:
+                labels.append('是' if pred_result == '1' else '否')
+            else:
+                return pred_result
+
+        data_loader['label'] = labels
+        data_loader.to_excel(result_path)
+
+    def _display_result(self, dict_result: dict):
+        pass
+
+    def _save_model(self, model_path):
+        print(self._config.learn.dir.saved % model_path + '-{}/model.bin'.format(self._time))
+        Path(self._config.learn.dir.saved % model_path + '-{}'.format(self._time)).mkdir(parents=True, exist_ok=True)
+        self._model.save_model(self._config.learn.dir.saved % model_path + '-{}/model.bin'.format(self._time))
+
+    def _load_model(self):
+        self._model = fasttext.load_model(self._config.learn.dir.load_model)
+
+
+class FastTextRunner(BaseRunner):
+    def __init__(self, config_path: str, model_train=False, model_path=None):
+        super(FastTextRunner, self).__init__()
+        self._config_path = config_path
+        self._config = None
+
+        self._time = time.strftime('%Y_%m_%d-%H_%M_%S')
+        self._model_train = model_train
+        self._model_path = model_path
+
+        self._train_dataloader = None
+        self._valid_dataloader = None
+        self._test_dataloader = None
+
+        self._model = None
+        self._loss = None
+        self._optimizer = None
+
+        self._evaluator = None
+        self._build()
+
+    @timeit
+    def _build(self):
+        self._build_config()
+        self._build_data()
+        self._build_model()
+        self._build_loss()
+        self._build_optimizer()
+        self._build_evaluator()
+        pass
+
+    @timeit
+    def _build_config(self):
+        self._config = FastTextConfig(config_path=self._config_path).load_config()
+        pass
+
+    @timeit
+    def _build_data(self):
+        if self._config.status in ['train', 'test'] or self._model_train:
+            self._train_path = self._config.data.train_path
+            self._valid_path = self._config.data.valid_path
+            self._test_path = self._config.data.test_path
+        else:
+            self._stop_words = stop_words(
+                path=os.path.join(self._config.data.dir, '../word2vec/f_zp_gp/stop_words.txt')
+            )
+            # self._stop_words = stop_words(
+            #     path=os.path.join(self._config.home.dir, 'word2vec/f_zp_gp/stop_words.txt')
+            # )
+        pass
+
+    @timeit
+    def _build_model(self):
+        if self._model_path:
+            self._config.learn.dir.load_model = self._model_path
+        if self._config.status in ['test', 'pred'] and not self._model_train:
+            self._load_model()
+        pass
+
+    @timeit
+    def _build_loss(self):
+        pass
+
+    @timeit
+    def _build_optimizer(self):
+        pass
+
+    @timeit
+    def _build_evaluator(self):
+        self._evaluator = ClassifyEvaluator()
+        pass
+
+    @timeit
+    def train(self, auto_tune_duration=5000, auto_tune_model_size='200M'):
+        self._model = fasttext.train_supervised(
+            input=self._train_path, autotuneValidationFile=self._test_path,
+            autotuneDuration=auto_tune_duration, autotuneModelSize=auto_tune_model_size
+        )
+        self._save_model()
+        pass
+
+    def _train_epoch(self, epoch: int):
+        pass
+
+    def _valid(self, epoch: int) -> None or dict:
+        with open(self._valid_path, encoding='utf-8') as file:
+            self._valid_dataloader = file.readlines()
+        labels = []
+        pre_labels = []
+        for text in self._valid_dataloader:
+            label = text.replace('__label__', '').split(' ')[0]
+            labels.append(label)
+            text = text.replace('__label__', '')[1: -1]
+            # pred_labels, pred_pros = self._model.predict(text, k=2)
+            # for pred_label, pred_prob in zip(pred_labels, pred_pros):
+            #     print(pred_label, pred_prob)
+            pre_label = self._model.predict(text)[0][0].replace('__label__', '')
+            # print(pre_label, self._model.predict(text))
+            pre_labels.append(pre_label)
+
+        p, r, f1, dict_result = self._evaluator.evaluate(true_list=labels, pred_list=pre_labels)
+
+        if self._config.status == 'train' or self._model_train:
+            json_result = json.dumps(dict_result)
+            with open(self._config.learn.dir.saved + '-{}/evaluation_metrics.json'.format(self._time),
+                      'w', encoding='utf-8') as f:
+                f.write(json_result)
+        if self._model_train:
+            dict_result = {
+                'code': 200,
+                'result': '模型训练成功！模型评测指标为: precision: {:.0f}%  recall: {:.0f}%  f1-score: {:.0f}%'.format(
+                    dict_result['average']['precision'] * 100,
+                    dict_result['average']['recall'] * 100,
+                    dict_result['average']['f1-score'] * 100
+                ),
+                'model_path': self._config.learn.dir.saved + '-{}/model.bin'.format(self._time)
+            }
+            return dict_result
+
+    def test(self, title=None, content=None) -> None or dict:
+        if self._model_train:
+            return self._valid(epoch=100)
+        elif self._model_path:
+            with open(
+                    os.path.join(os.path.split(self._model_path)[0], 'evaluation_metrics.json'),
+                    'r', encoding='utf-8'
+            ) as f:
+                json_result = json.load(f)
+            evaluation_metrics = {
+                '精确率(P)': '{:.0f}%'.format(json_result['average']['precision'] * 100),
+                '召回率(R)': '{:.0f}%'.format(json_result['average']['recall'] * 100),
+                'F1值(F1)': '{:.0f}%'.format(json_result['average']['f1-score'] * 100)
+            }
+            result = self.pred(title=title, content=content)
+            dict_result = {
+                'handleMsg': 'success',
+                'code': 200,
+                'logs': '模型测试成功！',
+                'result': {
+                    'label': result,
+                    'evaluation_metrics': evaluation_metrics
+                }
+            } if type(result) == str else result
+            return dict_result
+        else:
+            self._valid(epoch=100)
+
+    def pred(self, title: str, content: str) -> str or dict:
+        text = (title + '。') * 2 + content
+        text = clean_txt(raw=clean_tag(text=text))
+        if type(text) is str:
+            text = text.replace('\n', '').replace('\r', '').replace('\t', '')
+        else:
+            return {
+                'handleMsg': 'failure',
+                'code': 500,
+                'logs': '{} is not str!'.format(text),
+                'result': {
+                    'label': None
+                }
+            }
+        text = seg(text=text, sw=self._stop_words)
+        pre_label = self._model.predict(text)[0][0].replace('__label__', '')
+
+        return pre_label
+
+    def pred_file(self, file_path: str, result_path: str) -> None or dict:
+        data_loader = pd.read_excel(file_path)
+        titles, contents = data_loader['title'], data_loader['content']
+        labels = []
+        for title, content in zip(titles, contents):
+            pred_result = self.pred(title, content)
+            if type(pred_result) == str:
+                labels.append('是' if pred_result == '1' else '否')
+            else:
+                return pred_result
+
+        data_loader['label'] = labels
+        data_loader.to_excel(result_path)
+
+    def _display_result(self, dict_result: dict):
+        pass
+
+    def _save_model(self):
+        Path(self._config.learn.dir.saved + '-{}'.format(self._time)).mkdir(parents=True, exist_ok=True)
+        self._model.save_model(self._config.learn.dir.saved + '-{}/model.bin'.format(self._time))
+
+    def _load_model(self):
+        self._model = fasttext.load_model(self._config.learn.dir.load_model)
+
+
+if __name__ == '__main__':
+    # 一带一路 项目资讯识别筛选模型
+    ft_config_path = '../config/config_br_pro_info_filter.yml'
+    # 一带一路 项目信息知识分类模型
+    # ft_config_path = '../config/config_br_pro_info_type.yml'
+    # 一带一路 项目商机信息识别分析模型
+    # ft_config_path = '../config/config_br_buss_op_recognition.yml'
+    # 一带一路 项目风险信息识别分析模型
+    # ft_config_path = '../config/config_br_pro_risk_recognition.yml'
+    # 一带一路 项目资讯正负面信息分析模型
+    # ft_config_path = '../config/config_br_pro_sentiment_analysis.yml'
+
+    runner = FastTextRunner(config_path=ft_config_path)
+    # runner.train(
+    #     auto_tune_duration=15000
+    # )
+    runner.test()
--- a/FastText-Model/classification/test/__init__.py
+++ b/FastText-Model/classification/test/__init__.py
+#!/usr/bin/env python 
+# -*- coding: utf-8 -*-
+# @File    : __init__.py.py
+# @Time    : 2022/1/5 18:09
+# @Author  : Mr.Ygg
+# @Software: PyCharm
--- a/FastText-Model/classification/test/test_br_pro_risk_recognition.py
+++ b/FastText-Model/classification/test/test_br_pro_risk_recognition.py
+#!/usr/bin/env python 
+# -*- coding: utf-8 -*-
+# @File    : test_br_pro_risk_recognition.py
+# @Time    : 2022/1/5 18:09
+# @Author  : Mr.Ygg
+# @Software: PyCharm
+
+from base.app.base_app import *
+from classification.runner.runner_fast_text import FastTextRunner
+from classification.utils.utils import load_risk_keywords, is_include_compound_words
+
+# 风险分类
+risk_info = [
+    '外部政治风险',
+    '主权政治风险',
+    '社会动荡风险',
+    '对华关系风险',
+    '资金风险',
+    '财政风险',
+    '汇率风险'
+    '通货膨胀风险',
+    '环保风险',
+    '法律风险',
+    '突发事件风险',
+    '项目实施风险',
+    '企业风险',
+    '其他风险'
+]
+ft_config_path = '../config/config_br_pro_risk_recognition.yml'
+runner = FastTextRunner(config_path=ft_config_path)
+
+# 招聘股票筛选模型
+ft_config_path_rc_f_zp_gp = '../config/config_rc_f_zp_gp.yml'
+runner_rc_f_zp_gp = FastTextRunner(config_path=ft_config_path_rc_f_zp_gp)
+# 项目资讯正负面信息分析模型
+ft_config_path_psa = '../config/config_br_pro_sentiment_analysis.yml'
+runner_psa = FastTextRunner(config_path=ft_config_path_psa)
+
+list_country = []
+with open('../config/country.txt', 'r', encoding='utf-8') as f:
+    lines = f.readlines()
+for line in lines:
+    list_country.append(line.strip().split('(')[0].split('（')[0])
+
+# 模型可识别的风险类型
+risk_model_info = [
+    '社会动荡风险',
+    '突发事件风险'
+]
+# 风险分类关键词
+dict_risk_keywords = load_risk_keywords('../config/risk_keywords.xlsx')
+
+
+def pred(title: str, content: str) -> dict:
+    dict_result = {
+        '风险类别1': '',
+        '风险类别2': '',
+        '风险类别3': '',
+        '风险类别4': ''
+    }
+
+    # 招聘股票筛选模型
+    result_rc_f_zp_gp = runner_rc_f_zp_gp.pred(title=title, content=content)
+    # 0: 非招聘股票 1: 招聘信息 2: 股票信息
+    bool_rc_f_zp_gp = False if result_rc_f_zp_gp == '1' else True
+    logger.info('招聘股票筛选模型: {}'.format(result_rc_f_zp_gp))
+    logger.info('招聘股票筛选模型: {}'.format(bool_rc_f_zp_gp))
+    # 正负面筛选模型
+    result_psa = runner_psa.pred(title=title, content=content)
+    bool_psa = True if result_psa == '项目负面资讯信息' else False
+    logger.info('正负面筛选模型: {}'.format(result_psa))
+    logger.info('正负面筛选模型: {}'.format(bool_psa))
+
+    # 国家识别筛选模型
+    bool_country = False
+    text = title + '。' + content[: len(content) // 5]
+    for country in list_country:
+        if country in text:
+            bool_country = True
+            logger.info('国家识别筛选模型: {}'.format(country))
+            break
+    logger.info('国家识别筛选模型: {}'.format(bool_country))
+
+    text = title + '。' + content
+    if bool_country and bool_psa:
+        """
+        1. 招聘股票筛选模型 -> 非招聘股票信息
+        2. 国家识别筛选模型 -> 一带一路相关国家
+        3. 正负面筛选模型 -> 负面信息
+        """
+        # 风险识别筛选模型
+        result = runner.pred(
+            title=title,
+            content=content
+        )
+        dict_result['风险类别1'] = result
+        dict_result['风险类别2'] = result
+        dict_result['风险类别3'] = result
+        dict_result['风险类别4'] = result
+        logger.info('风险识别筛选模型: {}'.format(result))
+        # 基于关键词的筛选模型
+        if type(result) is str and result in risk_model_info:
+            # risk_model_info所包含的风险类别需按照关键词筛选掉一些脏数据
+            bool_risk_keyword = False
+            for risk_keyword in dict_risk_keywords[result]:
+                compound_words = risk_keyword.split('+')
+                if is_include_compound_words(text=text, compound_words=compound_words):
+                    bool_risk_keyword = True
+                    break
+
+            result = result if bool_risk_keyword else '无风险'
+            dict_result['风险类别3'] = result
+            dict_result['风险类别4'] = result
+            logger.info('关键词筛选: {}'.format(bool_risk_keyword))
+            if result == '无风险':
+                dict_risk_keywords_num = {
+                    risk_keywords_key: 0 for risk_keywords_key in dict_risk_keywords
+                }
+                bool_risk_keyword, risk_category = False, result
+                for risk_keywords_key in dict_risk_keywords_num:
+                    for risk_keyword in dict_risk_keywords[risk_keywords_key]:
+                        compound_words = risk_keyword.split('+')
+                        if is_include_compound_words(text=text, compound_words=compound_words):
+                            bool_risk_keyword = True
+                            dict_risk_keywords_num[risk_keywords_key] += 1
+
+                if bool_risk_keyword:
+                    risk_category = max(dict_risk_keywords_num, key=dict_risk_keywords_num.get)
+
+                dict_result['风险类别3'] = risk_category
+                logger.info('关键词筛选后召回风险信息: {}'.format(risk_category))
+        elif type(result) is str and result == '无风险':
+            # 模型识别为无风险的信息，采用关键词召回一些有用的风险信息
+            dict_risk_keywords_num = {
+                risk_keywords_key: 0 for risk_keywords_key in dict_risk_keywords
+            }
+            # 不召回模型能识别的风险类别？ √
+            for risk_keywords_key in risk_model_info:
+                dict_risk_keywords_num.pop(risk_keywords_key) if risk_keywords_key in dict_risk_keywords_num else None
+
+            bool_risk_keyword, risk_category = False, result
+            for risk_keywords_key in dict_risk_keywords_num:
+                for risk_keyword in dict_risk_keywords[risk_keywords_key]:
+                    compound_words = risk_keyword.split('+')
+                    if is_include_compound_words(text=text, compound_words=compound_words):
+                        bool_risk_keyword = True
+                        dict_risk_keywords_num[risk_keywords_key] += 1
+
+            if bool_risk_keyword:
+                risk_category = max(dict_risk_keywords_num, key=dict_risk_keywords_num.get)
+
+            dict_result['风险类别2'] = risk_category
+            dict_result['风险类别3'] = risk_category
+            dict_result['风险类别4'] = risk_category
+            logger.info('关键词召回风险信息: {}'.format(risk_category))
+        else:
+            result = result if type(result) is str else 'error'
+            dict_result['风险类别3'] = result
+            dict_result['风险类别4'] = result
+            logger.info('ELSE 风险信息: {}'.format(result))
+    else:
+        dict_result['风险类别1'] = '无风险'
+        dict_result['风险类别2'] = '无风险'
+        dict_result['风险类别3'] = '无风险'
+        dict_result['风险类别4'] = '无风险'
+        logger.info('招聘股票|国家识别筛选: 无风险')
+
+    return dict_result
+
+
+if __name__ == '__main__':
+    import os
+    import pandas
+    root_dir = '../data/datasource/test'
+    # file_name = 'br总资讯'
+    file_name = '境外快讯_1.4'
+    df = pandas.read_excel(os.path.join(root_dir, 'input_file/{}.xlsx'.format(file_name)))
+    df.drop_duplicates(subset='标题', keep='first', inplace=True)
+    list_title = df['标题']
+    list_content = df['正文']
+    dict_risk_result = {
+        '风险类别1': [],
+        '风险类别2': [],
+        '风险类别3': [],
+        '风险类别4': []
+    }
+    list_risk, list_risk_old = [], []
+    for index, (title, content) in enumerate(zip(list_title, list_content)):
+        dict_result = pred(title=title, content=content)
+        for key in dict_risk_result:
+            dict_risk_result[key].append(dict_result[key] if key in dict_result else 'error')
+
+        result_old = runner.pred(title=title, content=content)
+        list_risk_old.append(result_old)
+        logger.info('{} / {}\n'.format(index + 1, len(list_title)))
+
+    df['风险类别_old'] = list_risk_old
+    for key in dict_risk_result:
+        df[key] = dict_risk_result[key]
+
+    df.to_excel(os.path.join(root_dir, 'output_file/{}_result_20220112_s.xlsx'.format(file_name)))
\ No newline at end of file
--- a/FastText-Model/classification/test/test_label.py
+++ b/FastText-Model/classification/test/test_label.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File    : test_label.py
+# @Time    : 2022/1/7 18:28
+# @Author  : Mr.Ygg
+# @Software: PyCharm
+
+import os
+import pandas as pd
+
+from classification.utils.utils import load_risk_keywords, is_include_compound_words
+
+root_dir = '../data/datasource/test'
+# file_name = '项目风险模型数据集_总'
+file_name = '去重_F_ZP_GP'
+
+df = pd.read_excel(os.path.join(root_dir, 'input_file/{}.xlsx'.format(file_name)))
+
+list_title = df['标题']
+list_content = df['正文']
+
+list_country = []
+with open('../config/country.txt', 'r', encoding='utf-8') as f:
+    lines = f.readlines()
+for line in lines:
+    list_country.append(line.strip().split('(')[0].split('（')[0])
+
+# 风险分类关键词
+dict_risk_keywords = load_risk_keywords('../config/risk_keywords.xlsx')
+
+list_bool_yiqing = []
+list_bool_country = []
+list_risk_key_words_category = []
+for title, content in zip(list_title, list_content):
+    if type(title) is float:
+        title = ''
+    if type(content) is float:
+        content = ''
+    # 国家识别筛选模型
+    bool_country = False
+    text = title + '。' + content[: len(content) // 5]
+    for country in list_country:
+        if country in text:
+            bool_country = True
+            list_bool_country.append('是')
+            break
+    if not bool_country:
+        list_bool_country.append('否')
+
+    text = title + '。' + content
+    # 关键词: 疫情
+    if '疫情' in text:
+        list_bool_yiqing.append('是')
+    else:
+        list_bool_yiqing.append('否')
+    # 风险关键词
+    dict_risk_keywords_num = {
+        risk_keywords_key: 0 for risk_keywords_key in dict_risk_keywords
+    }
+    bool_risk_keyword = False
+    risk_category = '无风险'
+    for risk_keywords_key in dict_risk_keywords_num:
+        for risk_keyword in dict_risk_keywords[risk_keywords_key]:
+            compound_words = risk_keyword.split('+')
+            if is_include_compound_words(text=text, compound_words=compound_words):
+                bool_risk_keyword = True
+                dict_risk_keywords_num[risk_keywords_key] += 1
+
+    if bool_risk_keyword:
+        risk_category = max(dict_risk_keywords_num, key=dict_risk_keywords_num.get)
+
+    list_risk_key_words_category.append(risk_category)
+
+
+df['是否含"疫情"关键词'] = list_bool_yiqing
+df['是否含一带一路相关国家'] = list_bool_country
+df['关键词分类'] = list_risk_key_words_category
+
+df.to_excel(os.path.join(root_dir, 'output_file/{}_result.xlsx'.format(file_name)))
\ No newline at end of file
--- a/FastText-Model/classification/test/test_label_merge.py
+++ b/FastText-Model/classification/test/test_label_merge.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File    : test_label_merge.py
+# @Time    : 2022/1/10 10:32
+# @Author  : Mr.Ygg
+# @Software: PyCharm
+
+import os
+import pandas as pd
+
+root_dir = '../data/datasource/test'
+df = pd.read_excel(os.path.join(root_dir, 'input_file/br风险模型数据集_总_20220110.xlsx'))
+list_label_1 = df['风险类别'].to_list()
+list_label_2 = df['修正风险类别'].to_list()
+list_label_3 = df['雪珂终审'].to_list()
+list_label = []
+for label_1, label_2, label_3 in zip(
+    list_label_1, list_label_2, list_label_3
+):
+    label = ''
+    if type(label_1) is str:
+        label = label_1
+    if type(label_2) is str:
+        label = label_2
+    if type(label_3) is str:
+        label = label_3
+    list_label.append(label)
+
+df['label'] = list_label
+df.to_excel(os.path.join(root_dir, 'output_file/br风险模型数据集_总_20220110.xlsx'))
\ No newline at end of file
--- a/FastText-Model/classification/test/test_merge.py
+++ b/FastText-Model/classification/test/test_merge.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File    : test_merge.py
+# @Time    : 2022/1/7 17:23
+# @Author  : Mr.Ygg
+# @Software: PyCharm
+
+import os
+import pandas as pd
+
+dict_df = {
+    '标题': [],
+    '正文': [],
+    '状态': [],
+    '类型': []
+}
+root_dir = '../data/datasource/test/input_file'
+list_file = os.listdir(root_dir)
+for file_name in list_file:
+    file_path = os.path.join(root_dir, file_name)
+    print(file_path)
+    df = pd.read_excel(file_path)
+    list_title = df['标题'].to_list()
+    list_content = df['正文'].to_list()
+    list_status = df['审核状态'].to_list()
+    list_type = df['资讯类型'].to_list()
+    dict_df['标题'].extend(list_title)
+    dict_df['正文'].extend(list_content)
+    dict_df['状态'].extend(list_status)
+    dict_df['类型'].extend(list_type)
+
+df = pd.DataFrame(dict_df)
+df.to_excel(os.path.join(root_dir, 'br总资讯.xlsx'))
+
+
--- a/FastText-Model/classification/utils/__init__.py
+++ b/FastText-Model/classification/utils/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/4/16 16:40
--- a/FastText-Model/classification/utils/utils.py
+++ b/FastText-Model/classification/utils/utils.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : utils
+# @Author   : LiuYan
+# @Time     : 2021/4/16 16:40
+
+import re
+import jieba
+import pandas
+
+from bs4 import BeautifulSoup
+
+
+def clean_tag(text):
+    """
+    清除网页标签
+    :param text:
+    :return:
+    """
+    bs = BeautifulSoup(str(text), 'html.parser')
+    return bs.text
+
+
+def clean_txt(raw):
+    """
+    去除表情
+    :param raw:
+    :return:
+    """
+    res = re.compile(u'[\U00010000-\U0010ffff\uD800-\uDBFF\uDC00-\uDFFF]')
+    return res.sub('', raw)
+
+
+def seg(text, sw):
+    """
+    分词，NLPTokenizer会基于全部命名实体识别和词性标注进行分词
+    :param text:
+    :param NLPTokenizer:
+    :param sw:
+    :return:
+    """
+    # text = ' '.join([i.word for i in NLPTokenizer.segment(text) if i.word.strip() and i.word not in sw])
+    text = ' '.join([i.strip() for i in jieba.cut(text) if i.strip() and i not in sw])
+    return text
+
+
+def stop_words(path: str) -> list:
+    """
+    去除停用词
+    :return:
+    """
+    with open(path, 'r', encoding='utf-8') as swf:
+        return [line.strip() for line in swf]
+
+
+def segment_para(text):
+    """
+
+    :param text:
+    :return:
+    """
+    split_pattern = re.compile(r'\n|。|？|！|\?|\!|\s')
+    global_sentences = split_pattern.split(text)
+    global_sentences = ''.join([str(i).strip() + '。' for i in global_sentences if len(i) >= 13])
+    return global_sentences
+
+
+def cut_sent(para):
+    """
+
+    :param para:
+    :return:
+    """
+    para = re.sub('([。！？\?])([^”’])', r"\1\n\2", para)  # 单字符断句符
+    para = re.sub('(\.{6})([^”’])', r"\1\n\2", para)  # 英文省略号
+    para = re.sub('(\…{2})([^”’])', r"\1\n\2", para)  # 中文省略号
+    para = re.sub('([。！？\?][”’])([^，。！？\?])', r'\1\n\2', para)
+    # 如果双引号前有终止符，那么双引号才是句子的终点，把分句符\n放到双引号后，注意前面的几句都小心保留了双引号
+    para = para.rstrip()  # 段尾如果有多余的\n就去掉它
+    return para.split("\n")
+
+
+def transform_data(text, label):
+    """
+
+    :param text:
+    :param label:
+    :return:
+    """
+    fasttext_line = '__label__{} {}'.format(label, text)
+    return fasttext_line
+
+
+def load_risk_keywords(path: str) -> dict:
+    """
+    加载风险分类关键词
+    :param path:
+    :return:
+    """
+    df = pandas.read_excel(path)
+    dict_risk_keywords = dict()
+    for key in df:
+        list_risk_keywords = []
+        list_df = df[key].to_list()
+        for keyword in list_df:
+            if type(keyword) is str:
+                list_risk_keywords.append(keyword.strip())
+        dict_risk_keywords[key] = list_risk_keywords
+
+    return dict_risk_keywords
+
+
+def is_include_compound_words(text: str, compound_words: list) -> bool:
+    """
+    文本(text)中是否包含组合词[List]
+    组合词判断有先后顺序
+    :param text:
+    :param compound_words:
+    :return: True: 是    False: 否
+    """
+    for compound_word in compound_words:
+        if compound_word not in text:
+            return False
+        else:
+            text = text[text.find(compound_word) + len(compound_word):]
+
+    return True
--- a/FastText-Model/config.json
+++ b/FastText-Model/config.json
+{
+  "port": 4005,
+  "ip": "114.116.90.53",
+  "model_name": "FastText-Model",
+  "train_url": "/platform/classification/FastText-Model/model_train/",
+  "application_url": "/platform/classification/FastText-Model/pred/",
+  "show_file_url": "/platform/operation/process/show_file/",
+  "remove_file_url": "/platform/operation/process/remove_file/",
+  "upload_file_url": "/platform/operation/process/upload_file/",
+  "publish_version_url": "/platform/operation/process/publish_version/",
+  "model_test_url": "/platform/operation/process/model_test/",
+  "dataset_saved_path": "../datasets/classification/FastText-Model",
+  "model_saved_path": "../../../model_saved/classification/FastText-Model",
+  "java_call_back_url": "http://114.115.205.50:9988/manage/algorithmModel/process/changeStatus",
+  "train_info": {
+    "modelProcessId": {
+      "paramter_name": "训练日志Id",
+      "paramter_data": "",
+      "paramter_description": "模型训练日志id"
+    },
+    "task_id": {
+      "paramter_name": "模型训练任务id",
+      "paramter_data": "",
+      "paramter_description": "模型训练任务id"
+    },
+    "learning_rate": {
+      "paramter_name": "学习率",
+      "paramter_data": 0.03,
+      "paramter_description": "学习率"
+    },
+    "gpu": {
+      "paramter_name": "GPU",
+      "paramter_data": "",
+      "paramter_description": "是否使用GPU"
+    },
+    "data_path": {
+      "paramter_name": "语料版本",
+      "paramter_data": "",
+      "paramter_description": "模型训练时用户填入参数——语料版本"
+    },
+    "model_path": {
+      "paramter_name": "模型版本",
+      "paramter_data": "",
+      "paramter_description": "模型训练时用户填入参数——模型版本"
+    }
+  },
+  "application_info": {
+    "title": {
+      "paramter_name": "文章标题",
+      "paramter_data": "",
+      "paramter_description": "文章标题"
+    },
+    "content": {
+      "paramter_name": "文章内容",
+      "paramter_data": "",
+      "paramter_description": "文章内容"
+    },
+    "id": {
+      "paramter_name": "文章id",
+      "paramter_data": "",
+      "paramter_description": "文章id"
+    }
+  },
+  "show_file_info": {
+    "file_path": {
+      "paramter_name": "查询文件的相对路径",
+      "paramter_data": "",
+      "paramter_description": "要查询的文件目录，注意这里是相对地址，eg: 查询语料保存根目录dataset_saved_path的语料情况可传入../datasets/classification/"
+    }
+  },
+  "remove_file_info": {
+    "file_path": {
+      "paramter_name": "删除文件的相对路径",
+      "paramter_data": "",
+      "paramter_description": "要删除的文件，注意这里是相对地址，eg: 删除语料保存根目录dataset_saved_path下的ssyw_column_classify语料文件夹可传入../datasets/classification/ssyw_column_classify"
+    },
+    "flag": {
+      "paramter_name": "文件删除标识",
+      "paramter_data": "",
+      "paramter_description": "删除文件还是文件夹的标识，删除文件时flag=“/”，删除文件夹时flag为空字符串"
+    }
+  },
+  "upload_file_info": {
+    "request_url": {
+      "paramter_name": "语料下载地址",
+      "paramter_data": "",
+      "paramter_description": "待上传的语料文件下载地址，当前仅支持xlsx和xls文件，且文件内容需要包含title、content、label三个字段"
+    },
+    "task_id": {
+      "paramter_name": "模型训练任务id",
+      "paramter_data": "",
+      "paramter_description": "模型训练任务id"
+    }
+  },
+  "publish_version": {
+    "trainModelName": {
+      "paramter_name": "模型版本",
+      "paramter_data": "",
+      "paramter_description": "待发布的模型版本"
+    },
+    "task_id": {
+      "paramter_name": "模型训练任务id",
+      "paramter_data": "",
+      "paramter_description": "模型训练任务id"
+    }
+  },
+  "model_test_info": {
+    "task_id": {
+      "paramter_name": "模型训练任务id",
+      "paramter_data": "",
+      "paramter_description": "模型训练任务id"
+    },
+     "trainModelName": {
+      "paramter_name": "模型版本",
+      "paramter_data": "",
+      "paramter_description": "待测试的模型版本"
+    },
+     "data_type": {
+      "paramter_name": "测试方式",
+      "paramter_data": "",
+      "paramter_description": "可选项：url地址解析标题正文|file文件"
+     },
+    "request_url": {
+      "paramter_name": "测试文件下载地址",
+      "paramter_data": "",
+      "paramter_description": "待上传的测试文件下载地址，当前仅支持xlsx和xls文件，且文件内容需要包含title、content、label三个字段"
+    }
+  }
+
+}
--- a/FastText-Model/datasets/jx_data.xlsx
+++ b/FastText-Model/datasets/jx_data.xlsx
--- a/FastText-Model/get_back_call.py
+++ b/FastText-Model/get_back_call.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import logging
+import requests
+import json
+# 加载java回调接口
+java_call_back_url = "http://192.168.1.82:9988/manage/algorithmModel/process/changeStatus"
+# 定义日志输出格式
+formatter = logging.Formatter("%(asctime)s [%(levelname)s] <%(processName)s> (%(threadName)s) %(message)s")
+# 创建一个logger, 并设置日志级别
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+import requests
+import json
+
+url = "http://192.168.1.82:9988/manage/algorithmModel/process/changeStatus"
+
+payload = json.dumps({
+  "result": "{'code': 200, 'result': '模型训练成功！模型评测指标为: precision: 100%  recall: 100%  f1-score: 100%', 'model_path': '../../../model_saved/classification/FastText-Model/11111/V0-2023_06_11-15_33_15/model.bin'}",
+  "id": "1455372078906662913"
+})
+headers = {
+  'Content-Type': 'application/json'
+}
+
+response = requests.request("POST", url, headers=headers, data=payload)
+
+print(response.text)
+
+
+
+
+
+
+
+
+
+# dict_result = {'code': 200, 'result': '模型训练成功！模型评测指标为: precision: 100%  recall: 100%  f1-score: 100%', 'model_path': '../../../model_saved/classification/FastText-Model/11111/V0-2023_06_11-15_33_15/model.bin'}
+# modelProcessId = "1455372078906662913"
+# str_dict_result = json.dumps(dict_result, ensure_ascii=False)
+# print(str_dict_result)
+# # todo: 调用java的状态更新接口返回训练后的结果
+# payload = json.dumps({
+#     "id": modelProcessId,
+#     "result": str_dict_result
+# })
+# print(payload)
+# # todo: 调用接口访问实施生成参数函数来生成currentTime, appId
+# headers = {
+#     'Content-Type': 'application/json'
+# }
+# r1 = requests.post(url="http://192.168.1.82:9988/manage/algorithmModel/process/changeStatus",
+#                    headers=headers, data=payload)
+#
+# r1_json = json.loads(r1.text)
+# # print(r1_json)
+# print(r1_json)
--- a/FastText-Model/requirements.txt
+++ b/FastText-Model/requirements.txt
+# python3.9.5
+gunicorn==20.1.0
+beautifulsoup4==4.11.1
+datasketch==1.5.3
+dynamic_yaml==1.2.3
+emoji==1.4.2
+Flask==2.0.1
+hanlp==2.1.0b3
+jieba==0.42.1
+jionlp_py39==1.3.45
+keras_bert==0.88.0
+matplotlib==3.3.4
+numpy==1.19.5
+pandas==1.1.5
+psutil==5.8.0
+PyMySQL==1.0.2
+python_Levenshtein==0.20.5
+pytorch_pretrained_bert==0.6.2
+PyYAML==5.3.1
+rarfile==4.0
+requests==2.28.1
+scikit_learn==1.1.2
+seaborn==0.11.2
+simhash==2.0.0
+tensorflow==2.6.0
+torch==1.9.0
+tqdm==4.62.2
+Werkzeug==2.2.2
+xlrd==1.1.0
+XlsxWriter==3.0.1
+protobuf==3.19.5
+Levenshtein==0.20.5
+sklearn==0.0
+fasttext==0.9.2
--- a/FastText-Model/start.sh
+++ b/FastText-Model/start.sh
+#!/bin/sh
+exec nohup gunicorn -c app/app_config.py app/app_run:app --timeout 1200 & python app/main_server.py --timeout 300 >service.log 2>&1 &
+
+
--- a/FastText-Model/utils/__init__.py
+++ b/FastText-Model/utils/__init__.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : __init__.py
+# @Author   : LiuYan
+# @Time     : 2021/7/31 17:36
--- a/FastText-Model/utils/build_word2vec_weights.py
+++ b/FastText-Model/utils/build_word2vec_weights.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : build_word2vec_weights
+# @Author   : LiuYan
+# @Time     : 2020/6/24 14:46
+
+from itertools import islice
+
+import numpy as np
+import torch
+from utils.utils import timeit
+
+
+@timeit
+def load_word2vec(path=None, word_vocab=None, embedding_dim=None):
+    """
+    loading word vector
+    :param path: None
+    :param word_vocab: None
+    :param embedding_dim: 768/100 bert/glove.6B.100d
+    :return: a vector corresponding to word_vocab.
+    """
+    word_vocab_dict = word_vocab.stoi
+    vectors_vocab = load_vec(path, embedding_dim=embedding_dim)
+    if '[PAD]' in vectors_vocab:
+        pad = vectors_vocab['[PAD]']
+    elif 'pad' in vectors_vocab:
+        pad = vectors_vocab['pad']
+    if '[UNK]' in vectors_vocab:
+        unk = vectors_vocab['[UNK]']
+    elif 'unk' in vectors_vocab:
+        unk = vectors_vocab['unk']
+    vocab_size = len(word_vocab)
+    embed_weights = torch.zeros(vocab_size, embedding_dim)
+    for word, index in word_vocab_dict.items():  # word and index
+        if word in vectors_vocab:
+            em = vectors_vocab[word]
+        elif word == '<pad>':
+            em = pad
+        else:
+            em = unk
+        embed_weights[index, :] = torch.from_numpy(np.array(em))
+    return embed_weights
+
+
+@timeit
+def load_vec(path=None, embedding_dim=None):
+    """
+    loading word vector
+    :param path: None
+    :param embedding_dim: 768/100 bert/glove.6B.100d
+    :return: a dictionary of word vectors
+    """
+    vectors_vocab = {}
+    with open(path, 'r', encoding='utf-8') as f:
+        for line in islice(f, 1, None):  # skip the first row
+            items = line.split()
+            char, vectors = items[0], items[-embedding_dim:]
+            vectors = [float(vector) for vector in vectors]
+            vectors_vocab[char] = vectors
+    return vectors_vocab
--- a/FastText-Model/utils/database_mysql.py
+++ b/FastText-Model/utils/database_mysql.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : database_mysql
+# @Author   : LiuYan
+# @Time     : 2021/9/14 17:51
+
+import time
+import base64
+import pymysql
+
+from base.config.base_config import db_config
+
+
+class DatabaseMySQL(object):
+    def __init__(self):
+        super(DatabaseMySQL, self).__init__()
+        self._conn = None
+        self._cursor = None
+        self._connect()
+
+    def _connect(self) -> None:
+        self._conn = pymysql.connect(**db_config)
+        self._cursor = self._conn.cursor()
+
+    def query(self, id_model_process: str) -> list:
+        # 获取表单信息
+        sql_query = 'select * from brpa_algorithm_model_process where id={};'.format(id_model_process)
+        print('SQL: {}'.format(sql_query))
+        self._cursor.execute(sql_query)
+        list_result = self._cursor.fetchall()
+        return list_result
+
+    def update(self, id_model_process: str, process_result: str, model_path: str or None, status: int,
+               update_by="'yan'", update_time=time.strftime('%Y-%m-%d %H:%M:%S')) -> None:
+        # 替换process_result内部单引号为双引号
+        process_result = process_result.replace("'", '"')
+        # Update
+        update_time = time.strftime('%Y-%m-%d %H:%M:%S')
+        sql_update = '''update brpa_algorithm_model_process 
+                        set process_result = '{}', model_path = '{}', status = {}, update_by = {}, update_time = '{}'  
+                        where id = {};'''.format(
+            process_result, model_path, status, update_by, update_time, id_model_process
+        ) if model_path else '''update brpa_algorithm_model_process 
+                        set process_result = '{}', status = {}, update_by = {}, update_time = '{}'  
+                        where id = {};'''.format(
+            process_result, status, update_by, update_time, id_model_process
+        )
+        print('SQL: {}'.format(sql_update))
+        self._cursor.execute(sql_update)
+        self._conn.commit()
+
+    def close(self) -> None:
+        self._cursor.close()
+        self._conn.close()
+
+
+if __name__ == '__main__':
+    import json
+    id_model_process = '1453295293008211969'
+    dict_result = {
+        'result': '训练成功！模型评测指标为: precision: {:.0f}%  recall: {:.0f}%  f1-score: {:.0f}%'.format(
+                    0.91111111111111 * 100,
+                    0.91111111111111 * 100,
+                    0.91111111111111 * 100
+                )
+    }
+    dbm = DatabaseMySQL()
+    list_result = dbm.query(id_model_process=id_model_process)
+    model_path = '/home/zzsn/liuyan/zzsn_nlp_br/classification/model/model_saved/fast_text-pro_info_filter-2021_10_14-18_37_50/model.bin'
+    dbm.update(id_model_process=id_model_process, process_result=dict_result['result'], model_path=model_path, status=1)
+    dict_result = {
+        'result': '训练失败！'
+    }
+    dbm.update(id_model_process='1453536215885279233', process_result=dict_result['result'], model_path=None, status=2)
+    list_result = dbm.query(id_model_process=id_model_process)
+    dbm.close()
--- a/FastText-Model/utils/log.py
+++ b/FastText-Model/utils/log.py
+#!/usr/bin/env phthon3
+# -*- coding: utf-8 -*
+# @File     : log
+# @Author   : LiuYan
+# @Time     : 2020/6/21 21:08
+
+import os
+import logging
+import logging.handlers
+
+from pathlib import Path
+
+__all__ = ['logger']
+
+# 用户配置部分 ↓
+import tqdm
+
+LEVEL_COLOR = {
+    'DEBUG': 'cyan',
+    'INFO': 'green',
+    'WARNING': 'yellow',
+    'ERROR': 'red',
+    'CRITICAL': 'red,bg_white',
+}
+STDOUT_LOG_FMT = '%(log_color)s[%(asctime)s] [%(levelname)s] [%(threadName)s] [%(filename)s:%(lineno)d] %(message)s'
+STDOUT_DATE_FMT = '%Y-%m-%d %H:%M:%S'
+FILE_LOG_FMT = '[%(asctime)s] [%(levelname)s] [%(threadName)s] [%(filename)s:%(lineno)d] %(message)s'
+FILE_DATE_FMT = '%Y-%m-%d %H:%M:%S'
+
+
+# 用户配置部分 ↑
+
+
+class ColoredFormatter(logging.Formatter):
+    COLOR_MAP = {
+        'black': '30',
+        'red': '31',
+        'green': '32',
+        'yellow': '33',
+        'blue': '34',
+        'magenta': '35',
+        'cyan': '36',
+        'white': '37',
+        'bg_black': '40',
+        'bg_red': '41',
+        'bg_green': '42',
+        'bg_yellow': '43',
+        'bg_blue': '44',
+        'bg_magenta': '45',
+        'bg_cyan': '46',
+        'bg_white': '47',
+        'light_black': '1;30',
+        'light_red': '1;31',
+        'light_green': '1;32',
+        'light_yellow': '1;33',
+        'light_blue': '1;34',
+        'light_magenta': '1;35',
+        'light_cyan': '1;36',
+        'light_white': '1;37',
+        'light_bg_black': '100',
+        'light_bg_red': '101',
+        'light_bg_green': '102',
+        'light_bg_yellow': '103',
+        'light_bg_blue': '104',
+        'light_bg_magenta': '105',
+        'light_bg_cyan': '106',
+        'light_bg_white': '107',
+    }
+
+    def __init__(self, fmt, datefmt):
+        super(ColoredFormatter, self).__init__(fmt, datefmt)
+
+    def parse_color(self, level_name):
+        color_name = LEVEL_COLOR.get(level_name, '')
+        if not color_name:
+            return ""
+
+        color_value = []
+        color_name = color_name.split(',')
+        for _cn in color_name:
+            color_code = self.COLOR_MAP.get(_cn, '')
+            if color_code:
+                color_value.append(color_code)
+
+        return '\033[' + ';'.join(color_value) + 'm'
+
+    def format(self, record):
+        record.log_color = self.parse_color(record.levelname)
+        message = super(ColoredFormatter, self).format(record) + '\033[0m'
+
+        return message
+
+
+class TqdmLoggingHandler(logging.Handler):
+    def __init__(self, level=logging.NOTSET):
+        super().__init__(level)
+
+    def emit(self, record):
+        try:
+            msg = self.format(record)
+            tqdm.tqdm.write(msg)
+            self.flush()
+        except (KeyboardInterrupt, SystemExit):
+            raise
+        except:
+            self.handleError(record)
+
+
+def _get_logger(log_to_file=True, log_filename='default.log', log_level='DEBUG'):
+    _logger = logging.getLogger(__name__)
+
+    stdout_handler = logging.StreamHandler()
+    stdout_handler.setFormatter(
+        ColoredFormatter(
+            fmt=STDOUT_LOG_FMT,
+            datefmt=STDOUT_DATE_FMT,
+        )
+    )
+    _logger.addHandler(stdout_handler)
+    # _logger.setLevel(logging.INFO)
+    # _logger.addHandler(TqdmLoggingHandler())
+
+    if log_to_file:
+        # _tmp_path = os.path.dirname(os.path.abspath(__file__))
+        # _tmp_path = os.path.join(_tmp_path, '../logs/{}'.format(log_filename))
+        _project_path = os.path.dirname(os.getcwd())
+        _tmp_path = os.path.join(_project_path, 'logs')
+        Path(_tmp_path).mkdir(parents=True, exist_ok=True)
+        _tmp_path = os.path.join(_tmp_path, log_filename)
+        file_handler = logging.handlers.TimedRotatingFileHandler(_tmp_path, when='midnight', backupCount=30)
+        file_formatter = logging.Formatter(
+            fmt=FILE_LOG_FMT,
+            datefmt=FILE_DATE_FMT,
+        )
+        file_handler.setFormatter(file_formatter)
+        _logger.addHandler(file_handler)
+
+    _logger.setLevel(log_level)
+    return _logger
+
+
+logger = _get_logger(log_to_file=False)
--- a/FastText-Model/utils/tool.py
+++ b/FastText-Model/utils/tool.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : tool
+# @Author   : LiuYan
+# @Time     : 2021/6/21 11:22
+
+import re
+import json
+
+
+def read_json(path: str) -> list:
+    f = open(path, 'r', encoding='utf-8')
+    examples = []
+    for line in f.readlines():
+        examples.append(json.loads(line))
+    f.close()
+    return examples
+
+
+def clean_text(text: str) -> str:
+    return re.sub('\n+', '\n', text.strip().replace(' ', '').replace('\t', '').replace('\r', ''))
--- a/FastText-Model/utils/utils.py
+++ b/FastText-Model/utils/utils.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @File     : utils
+# @Author   : LiuYan
+# @Time     : 2021/4/16 17:54
+
+from __future__ import unicode_literals, print_function, division
+
+import time
+import xlsxwriter
+
+
+def timeit(f):
+    def timed(*args, **kw):
+        ts = time.time()
+        print('......begin     {0:8s}......'.format(f.__name__))
+        result = f(*args, **kw)
+        te = time.time()
+        print('......finish    {0:8s}, took:{1:.4f} sec......'.format(f.__name__, te - ts))
+        return result
+
+    return timed
+
+
+def list2xlsx(result_list: list, xlsx_path: str):
+    """
+
+    :param result_list: [
+                            {
+                                'id': 1,
+                                'title': 't',
+                                ...
+                            }
+                            ...
+                        ]
+    :param xlsx_path: '/home/zzsn/liuyan/result/result.xlsx'
+    :return:
+    """
+    workbook = xlsxwriter.Workbook(xlsx_path)
+    worksheet = workbook.add_worksheet('sheet1')
+    worksheet.write_row(row=0, col=0, data=list(result_list[0].keys()))
+
+    for row_index, result_dict in enumerate(result_list):
+        worksheet.write_row(row=row_index + 1, col=0, data=list(
+            ';'.join(result) if type(result) in [list, set] else result for result in result_dict.values()
+        ))
+
+    workbook.close()
--- a/FastText-Model/模型配置文件约定.txt
+++ b/FastText-Model/模型配置文件约定.txt
+获取语料目录情况：
+获取语料目录情况：
+真实的show_file_url地址：http://ip:port + operation_prefix + show_file_url
+真实的upload_file_url地址：http://ip:port + operation_prefix + upload_file_url
+真实的publish_version_url地址：http://ip:port + operation_prefix + publish_version_url
+真实的model_test_url地址：http://ip:port + operation_prefix + model_test_url
+真实的train_file地址：http://ip:port + train_url
+真实的application_url地址：http://ip:port + application_prefi + /pred/
+
+
+# 真实的remove_file_url地址：http://ip:port + operation_prefix + remove_file_url
+
+
+http://114.116.90.53:4004/new_task/
+
+{
+  "port": 4004,
+  "ip": "114.116.90.53",
+  "model_name": "ssyw_column_classify",
+  "operation_prefix": "/platform/operation/process",
+  "application_prefix": "/platform/classification/ssyw_column/classify",
+  "train_url": "/platform/classification/ssyw_column/classify/model_train/",
+  "application_url": "/pred/",
+  "show_file_url": "/show_file/",
+  "remove_file_url": "/remove_file/",
+  "upload_file_url": "/upload_file/",
+  "publish_version_url": "/publish_version/",
+  "model_test_url": "/model_test/",
+  "dataset_saved_path": "../datasets/classification",
+  "model_saved_path": "../../../model_saved/classification",
+  "java_call_back_url": "http://114.115.205.50:9988/manage/algorithmModel/process/changeStatus",
+  "train_info": {
+    "modelProcessId": {
+      "paramter_name": "模型任务Id",
+      "paramter_data": "",
+      "paramter_description": "模型训练任务id，关联哪个模型"
+    },
+    "learning_rate": {
+      "paramter_name": "学习率",
+      "paramter_data": 0.03,
+      "paramter_description": "学习率"
+    },
+    "epoch": {
+      "paramter_name": "训练轮数",
+      "paramter_data": 10,
+      "paramter_description": "训练轮数"
+    },
+    "gpu": {
+      "paramter_name": "GPU",
+      "paramter_data": "",
+      "paramter_description": "是否使用GPU"
+    },
+    "data_path": {
+      "paramter_name": "语料版本",
+      "paramter_data": "",
+      "paramter_description": "模型训练时用户填入参数——语料版本"
+    },
+    "model_path": {
+      "paramter_name": "模型版本",
+      "paramter_data": "",
+      "paramter_description": "模型训练时用户填入参数——模型版本"
+    }
+  },
+  "application_info": {
+    "title": {
+      "paramter_name": "文章标题",
+      "paramter_data": "",
+      "paramter_description": "文章标题"
+    },
+    "content": {
+      "paramter_name": "文章内容",
+      "paramter_data": "",
+      "paramter_description": "文章内容"
+    },
+    "id": {
+      "paramter_name": "文章id",
+      "paramter_data": "",
+      "paramter_description": "文章id"
+    }
+  },
+  "show_file_info": {
+    "file_path": {
+      "paramter_name": "查询文件的相对路径",
+      "paramter_data": "",
+      "paramter_description": "要查询的文件目录，注意这里是相对地址，eg: 查询语料保存根目录dataset_saved_path的语料情况可传入../datasets/classification/"
+    }
+  },
+  "remove_file_info": {
+    "file_path": {
+      "paramter_name": "删除文件的相对路径",
+      "paramter_data": "",
+      "paramter_description": "要删除的文件，注意这里是相对地址，eg: 删除语料保存根目录dataset_saved_path下的ssyw_column_classify语料文件夹可传入../datasets/classification/ssyw_column_classify"
+    },
+    "flag": {
+      "paramter_name": "文件删除标识",
+      "paramter_data": "",
+      "paramter_description": "删除文件还是文件夹的标识，删除文件时flag=“/”，删除文件夹时flag为空字符串"
+    }
+  },
+  "upload_file_info": {
+    "url_path": {
+      "paramter_name": "语料下载地址",
+      "paramter_data": "",
+      "paramter_description": "待上传的语料文件下载地址，当前仅支持xlsx和xls文件，且文件内容需要包含title、content、label三个字段"
+    },
+    "dataFolderName": {
+      "paramter_name": "语料版本名称",
+      "paramter_data": "",
+      "paramter_description": "待上传的语料版本名称，在训练的时候使用"
+    }
+  },
+  "publish_version": {
+    "trainModelName": {
+      "paramter_name": "模型版本",
+      "paramter_data": "",
+      "paramter_description": "待发布的模型版本"
+    },
+    "versionName": {
+      "paramter_name": "发布版本号",
+      "paramter_data": "",
+      "paramter_description": "待发布的版本号"
+    }
+  },
+  "model_test_info": {
+     "modelProcessId": {
+      "paramter_name": "模型任务Id",
+      "paramter_data": "",
+      "paramter_description": "模型训练任务id，关联哪个模型"
+    },
+     "trainModelName": {
+      "paramter_name": "模型版本",
+      "paramter_data": "",
+      "paramter_description": "待测试的模型版本"
+    },
+     "data_type": {
+      "paramter_name": "测试方式",
+      "paramter_data": "",
+      "paramter_description": "可选项：url地址解析|file文件"
+     },
+    "url_path": {
+      "paramter_name": "测试文件下载地址",
+      "paramter_data": "",
+      "paramter_description": "待上传的测试文件下载地址，当前仅支持xlsx和xls文件，且文件内容需要包含title、content、label三个字段"
+    },
+      "title": {
+      "paramter_name": "文章标题",
+      "paramter_data": "",
+      "paramter_description": "文章标题"
+    },
+    "content": {
+      "paramter_name": "文章内容",
+      "paramter_data": "",
+      "paramter_description": "文章内容"
+    }
+  }
+
+}
\ No newline at end of file
--- a/服务器信息访问/main_model.py
+++ b/服务器信息访问/main_model.py
+#!/usr/bin/env python3
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import socket
+import os
+import psutil
+
+
+# 获取CPU负载信息
+def get_cpu():
+    last_worktime = 0
+    last_idletime = 0
+    f = open("/proc/stat", "r")
+    line = ""
+    while not "cpu " in line: line = f.readline()
+    f.close()
+    spl = line.split(" ")
+    worktime = int(spl[2]) + int(spl[3]) + int(spl[4])
+    idletime = int(spl[5])
+    dworktime = (worktime - last_worktime)
+    didletime = (idletime - last_idletime)
+    rate = float(dworktime) / (didletime + dworktime)
+    last_worktime = worktime
+    last_idletime = idletime
+    if (last_worktime == 0): return 0
+    return rate
+
+
+def get_hostname():
+    return socket.gethostname()
+
+
+def get_uptime():
+    with open('/proc/uptime', 'r') as f:
+        uptime_seconds = float(f.readline().split()[0])
+        uptime_minutes, uptime_seconds = divmod(uptime_seconds, 60)
+        uptime_hours, uptime_minutes = divmod(uptime_minutes, 60)
+        uptime_days, uptime_hours = divmod(uptime_hours, 24)
+        return f"{int(uptime_days)} days, {int(uptime_hours)} hours, {int(uptime_minutes)} minutes, {int(uptime_seconds)} seconds"
+
+
+def get_kernel_version():
+    return os.uname().release
+
+
+# 获取CPU占用信息
+def get_cpu_info():
+    cpu_usage = int(get_cpu() * 100)
+    # cpu_tip = "CPU使用率（最大100%）：" + str(cpu_usage) + "%"
+    # print(str(cpu_usage))
+    return str(cpu_usage)
+
+
+def get_memory_info():
+    memory_info = psutil.virtual_memory()
+    return f"Total memory: {memory_info.total / 1024 / 1024:.2f} MB\nUsed memory: {memory_info.used / 1024 / 1024:.2f} MB\nFree memory: {memory_info.available / 1024 / 1024:.2f} MB"
+
+
+def get_disk_usage():
+    partitions = psutil.disk_partitions()
+    disk_usage = ""
+    for partition in partitions:
+        usage = psutil.disk_usage(partition.mountpoint)
+        disk_usage += f"{partition.mountpoint} - Total: {usage.total / 1024 / 1024:.2f} MB, Used: {usage.used / 1024 / 1024:.2f} MB, Free: {usage.free / 1024 / 1024:.2f} MB\n"
+    return disk_usage
+
+
+def get_network_interfaces():
+    interfaces = psutil.net_if_addrs()
+    network_interfaces = ""
+    for interface_name, interface_addresses in interfaces.items():
+        network_interfaces += f"{interface_name}\n"
+        for address in interface_addresses:
+            if address.family == socket.AF_INET:
+                network_interfaces += f"  IP address: {address.address}\n"
+                network_interfaces += f"  Netmask: {address.netmask}\n"
+            elif address.family == socket.AF_PACKET:
+                network_interfaces += f"  MAC address: {address.address}\n"
+    return network_interfaces
+
+
+def main_pro():
+    hostname = get_hostname()
+    UpTime = get_uptime()
+    KN_Version = get_kernel_version()
+    CPU_Info = get_cpu_info()
+    Memory_Info = get_memory_info()
+    Disk_Usage = get_disk_usage()
+    Network_Interfaces = get_network_interfaces()
+    dict_result = {
+        "HostName": hostname,
+        "UpTime": UpTime,
+        "KN_Version": KN_Version,
+        "CPU_Info": CPU_Info,
+        "Memory_Info": Memory_Info,
+        "Disk_Usage": Disk_Usage,
+        "Network_Interfaces": Network_Interfaces
+    }
+    return dict_result
+
+
+if __name__ == "__main__":
+    print(f"Hostname: {get_hostname()}")
+    print(f"Uptime: {get_uptime()}")
+    print(f"Kernel version: {get_kernel_version()}")
+    print(f"CPU information:\n{get_cpu_info()}")
+    print(f"Memory information:\n{get_memory_info()}")
+    print(f"Disk usage:\n{get_disk_usage()}")
+    print(f"Network interfaces:\n{get_network_interfaces()}")
--- a/模型任务/main_app.py
+++ b/模型任务/main_app.py
+#!/usr/bin/env python3
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+创建模型任务功能
+http://114.116.90.53:4004/new_task/
+"""
+import os
+import sys, json
+import logging
+import requests
+import argparse
+import queue
+from pathlib import Path
+from flask import Flask, jsonify, request
+from main_model import main_info
+import re
+os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
+os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
+logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] <%(processName)s> (%(threadName)s) %('
+                                               'message)s')
+logger = logging.getLogger(__name__)
+app = Flask(__name__)
+
+# todo: 基于文件名来表示模型名称
+root_path = "../"
+
+# 跨域支持1
+from flask_cors import CORS
+
+CORS(app, supports_credentials=True)
+
+
+@app.route('/', methods=['POST'])
+def hello_world():
+    app.logger.info('请选择正确的方式上传!')
+    return '请选择正确的方式上传!'
+
+
+@app.route(f'/get_server_info/', methods=['GET', 'POST'])
+def get_server_info():
+    dict_result = main_info()
+    app.logger.info(dict_result)
+    return json.dumps(dict_result, ensure_ascii=False)
+
+
+@app.route(f'/new_task/', methods=['POST'])
+def build_task():
+    try:
+        params = json.loads(request.data.decode('utf-8'))
+        modelName = params["modelName"]
+        modelPath = os.path.join(root_path, modelName)
+        if modelName:
+            # 获取目录下的config.json文件信息返回
+            config_path = os.path.join(modelPath, "config.json")
+            config_json = json.load(open(config_path, 'r', encoding='utf-8'))
+            dict_result = {
+                "code": 200,
+                'handleMsg': 'Success',
+                'logs': None,
+                "resultData": config_json
+            }
+        else:
+            dict_result = {
+                "code": 500,
+                'handleMsg': 'Failure',
+                'logs': None,
+                "resultData": "请选择模型管理中存在的模型来进行创建模型任务！"
+            }
+
+    except Exception as e:
+        dict_result = {
+            'code': 500,
+            'success': 'false',
+            'message': "操作失败" + str(e),
+            'result': None
+        }
+
+    app.logger.info(dict_result)
+    return json.dumps(dict_result, ensure_ascii=False)
+
+
+if __name__ == '__main__':
+    app.config['JSON_AS_ASCII'] = False
+    app.config['JSONIFY_MIMETYPE'] = "application/json;charset=utf-8"
+    app.run(host='0.0.0.0', port=4004, debug=False)
--- a/模型任务/main_model.py
+++ b/模型任务/main_model.py
+#!/usr/bin/env python3
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import socket
+import os
+import psutil
+import platform
+
+
+# 获取CPU负载信息
+def get_cpu():
+    last_worktime = 0
+    last_idletime = 0
+    f = open("/proc/stat", "r")
+    line = ""
+    while not "cpu " in line: line = f.readline()
+    f.close()
+    spl = line.split(" ")
+    worktime = int(spl[2]) + int(spl[3]) + int(spl[4])
+    idletime = int(spl[5])
+    dworktime = (worktime - last_worktime)
+    didletime = (idletime - last_idletime)
+    rate = float(dworktime) / (didletime + dworktime)
+    last_worktime = worktime
+    last_idletime = idletime
+    if (last_worktime == 0): return 0
+    return rate
+
+
+def get_hostname():
+    return socket.gethostname()
+
+
+def get_uptime():
+    with open('/proc/uptime', 'r') as f:
+        uptime_seconds = float(f.readline().split()[0])
+        uptime_minutes, uptime_seconds = divmod(uptime_seconds, 60)
+        uptime_hours, uptime_minutes = divmod(uptime_minutes, 60)
+        uptime_days, uptime_hours = divmod(uptime_hours, 24)
+        return f"{int(uptime_days)} days, {int(uptime_hours)} hours, {int(uptime_minutes)} minutes, {int(uptime_seconds)} seconds"
+
+
+def get_kernel_version():
+    return os.uname().release
+
+
+# 获取CPU占用信息
+def get_cpu_info():
+    cpu_usage = int(get_cpu() * 100)
+    # cpu_tip = "CPU使用率（最大100%）：" + str(cpu_usage) + "%"
+    # print(str(cpu_usage))
+    return str(cpu_usage)
+
+
+def get_memory_info():
+    memory_info = psutil.virtual_memory()
+    return f"Total memory: {memory_info.total / 1024 / 1024:.2f} MB\nUsed memory: {memory_info.used / 1024 / 1024:.2f} MB\nFree memory: {memory_info.available / 1024 / 1024:.2f} MB"
+
+
+def get_disk_usage():
+    partitions = psutil.disk_partitions()
+    disk_usage = ""
+    for partition in partitions:
+        usage = psutil.disk_usage(partition.mountpoint)
+        disk_usage += f"{partition.mountpoint} - Total: {usage.total / 1024 / 1024:.2f} MB, Used: {usage.used / 1024 / 1024:.2f} MB, Free: {usage.free / 1024 / 1024:.2f} MB\n"
+    return disk_usage
+
+
+def get_network_interfaces():
+    interfaces = psutil.net_if_addrs()
+    network_interfaces = ""
+    for interface_name, interface_addresses in interfaces.items():
+        network_interfaces += f"{interface_name}\n"
+        for address in interface_addresses:
+            if address.family == socket.AF_INET:
+                network_interfaces += f"  IP address: {address.address}\n"
+                network_interfaces += f"  Netmask: {address.netmask}\n"
+            elif address.family == socket.AF_PACKET:
+                network_interfaces += f"  MAC address: {address.address}\n"
+    return network_interfaces
+
+
+def get_public_ip():
+    """
+    获取公网IP地址
+    """
+    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+    s.connect(("8.8.8.8", 80))
+    ip = s.getsockname()[0]
+    s.close()
+    return ip
+
+
+def main_pro():
+    hostname = get_hostname()
+    # UpTime = get_uptime()
+    KN_Version = get_kernel_version()
+    CPU_Info = get_cpu_info()
+    Memory_Info = get_memory_info()
+    Disk_Usage = get_disk_usage()
+    ip = get_public_ip()
+    dict_result = {
+        "HostName": hostname,
+        # "UpTime": UpTime,
+        "KN_Version": KN_Version,
+        "CPU_Info": CPU_Info,
+        "Memory_Info": Memory_Info,
+        "Disk_Usage": Disk_Usage,
+        "Network_Interfaces": ip
+    }
+    return dict_result
+
+
+def main_info():
+    # 获取操作系统信息
+    os_info = platform.platform()
+
+    # 获取处理器信息
+    processor_info = platform.processor()
+
+    # 获取可用内存大小
+    mem_info = psutil.virtual_memory()
+    available_mem = round(mem_info.available / 1024 / 1024, 2)
+
+    # 获取可用硬盘大小
+    disk_info = psutil.disk_usage('/')
+    available_disk = round(disk_info.free / 1024 / 1024, 2)
+
+    # 获取私有ip
+    ip = get_public_ip()
+
+    # 打印机器信息
+    print("操作系统：", os_info)
+    print("处理器型号：", processor_info)
+    print("可用内存大小：", available_mem, "MB")
+    print("可用硬盘大小：", available_disk, "MB")
+    print("ip地址：", ip)
+    dict_result = {
+        "操作系统：": os_info,
+        "处理器型号：": processor_info,
+        "可用内存大小：": available_mem,
+        "可用硬盘大小：": available_disk,
+        "ip地址：": "114.116.90.53"
+    }
+    return dict_result
+
+
+if __name__ == "__main__":
+    main_info()
+    # import requests
+    #
+    # response = requests.get('https://api.ipify.org')
+    # public_ip = response.text
+    #
+    # print(public_ip)
+
+    # dict_result = main_pro()
+    # print(dict_result)