自然语言平台版本V1.0

c4e5365c · ctt · c4e5365c · c4e5365c · c4e5365c · c4e5365c
--- a/README.md
+++ b/README.md
+nohup python manage.py runserver --noreload 0.0.0.0:7004 >> app.log 2>&1 &
--- a/base/__init__.py
+++ b/base/__init__.py
--- a/base/admin.py
+++ b/base/admin.py
+from django.contrib import admin
+
+# Register your models here.
--- a/base/apps.py
+++ b/base/apps.py
+from django.apps import AppConfig
+
+
+class BaseConfig(AppConfig):
+    name = 'base'
--- a/base/migrations/__init__.py
+++ b/base/migrations/__init__.py
--- a/base/models.py
+++ b/base/models.py
+from django.db import models
+from datetime import datetime
+# Create your models here.
+
+
+class User(models.Model):
+    username = models.CharField(max_length=30, unique=True)
+    true_name = models.CharField(max_length=30)
+    sex = models.CharField(max_length=2)
+    mobile_number = models.CharField(max_length=20)
+    mail = models.CharField(max_length=20)
+    id_card = models.CharField(max_length=20)
+    password = models.CharField(max_length=40)
+    account_number = models.CharField(max_length=20)
+
+    def toDict(self):
+        return {'id':self.id,
+                'username':self.username,
+                'true_name':self.true_name,
+                'sex':self.sex,
+                'mobile_number':self.mobile_number,
+                'mail':self.mail,
+                'id_card':self.id_card,
+                'password':self.password,
+                'account_number':self.account_number,
+                # 'update_at':self.update_at.strftime('%Y-%m-%d %H:%M:%S')
+                }
+    class Meta:
+        db_table = 'user'
+
+
+class ServiceManage(models.Model):
+    name = models.CharField(max_length=15)
+    username = models.CharField(max_length=30)
+    filenames = models.CharField(max_length=200)
+    create_date = models.DateTimeField(default=None)
+    end_date = models.DateTimeField(default=None)
+    state = models.CharField(max_length=10)
+    path = models.CharField(max_length=20)
+    def toDict(self):
+        return {'name': self.name,
+                'username': self.username,
+                'filenames': self.filenames,
+                'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
+                'end_date': self.end_date.strftime('%Y-%m-%d %H:%M:%S'),
+                'state': self.state,
+                'path': self.path,
+                }
+    class Meta:
+        db_table = 'service_manage'
+
+
+class SubjectManage(models.Model):
+    sid = models.CharField(max_length=10, unique=True)
+    name = models.CharField(max_length=30)
+    def toDict(self):
+        return {'sid': self.sid,
+                'name': self.name,
+                }
+    class Meta:
+        db_table = 'subject_manage'
+
+
+class ModelManage(models.Model):
+    task_name = models.CharField(max_length=30)
+    function_type = models.CharField(max_length=20)
+    model_type = models.CharField(max_length=20)
+    version_num = models.IntegerField()
+    create_date = models.DateTimeField(default=None)
+
+    def toDict(self):
+        return {'id': self.id,
+                'task_name': self.task_name,
+                'function_type': self.function_type,
+                'model_type': self.model_type,
+                'version_num': self.version_num,
+                'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
+                }
+
+    class Meta:
+        db_table = 'model_manage'
+
+
+class VersionManage(models.Model):
+    model = models.ForeignKey(ModelManage, related_name='version_model', on_delete=models.CASCADE)
+    version = models.CharField(max_length=20)
+    create_date = models.DateTimeField(default=None)
+    end_date = models.DateTimeField(default=None)
+    state = models.CharField(max_length=20)
+    creator = models.CharField(max_length=30)
+    path = models.CharField(max_length=20, unique=True)
+
+    def toDict(self):
+        return {'id': self.id,
+                'version': self.version,
+                'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
+                'end_date': self.end_date.strftime('%Y-%m-%d %H:%M:%S'),
+                'state': self.state,
+                'creator': self.creator,
+                'path': self.path,
+                }
+
+
+    class Meta:
+        db_table = 'version_manage'
\ No newline at end of file
--- a/base/tests.py
+++ b/base/tests.py
+from django.test import TestCase
+# Create your tests here.
--- a/base/urls.py
+++ b/base/urls.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/12 18:05
+# @Author  : 程婷婷
+# @FileName: urls.py
+# @Software: PyCharm
+from django.urls import path
+from base.views import views
+from django.conf.urls import url
+from base.views import views as base_views
+
+urlpatterns = [
+    url(r'^register-account', base_views.register_account, name='register_account'),
+    url(r'^verify-username', base_views.verify_username, name='verify_username'),
+    url(r'^login', base_views.login, name='login'),
+    url(r'^reset-password', base_views.reset_password, name='reset_password'),
+    url(r'^show-config-file', base_views.show_config_file, name='show_config_file'),
+    url(r'^show-service-file', base_views.show_service_file, name='show_service_file'),
+    url(r'^delete-file-row-manage', base_views.delete_file_row_manage, name='delete_file_row_manage'),
+    url(r'^delete-file-row-service', base_views.delete_file_row_service, name='delete_file_row_service'),
+    url(r'^file-upload', base_views.file_upload, name='file_upload'),
+    url(r'^show-log-file', base_views.show_log_file, name='show_log_file'),
+    url(r'^validate-code', base_views.validate_code, name='validate_code'),
+    url(r'^download-zip', base_views.download_zip, name='download_zip'),
+    url(r'^download-xlsx', base_views.download_xlsx, name='download_xlsx'),
+    url(r'^query-manage', base_views.query_manage, name='query_manage'),
+    url(r'^forget-password', base_views.forget_password, name='forget_password'),
+    url(r'^train', base_views.run_train, name='train'),
+    url(r'^query-service-manage', base_views.query_service_manage, name='query_service_manage'),
+    url(r'^query-subject', base_views.query_subject, name='query_subject'),
+    url(r'^query-version', base_views.query_version, name='query_version'),
+    url(r'^query-task-name', base_views.query_task_name, name='query_task_name')
+]
--- a/base/views/__init__.py
+++ b/base/views/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/13 11:51
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/base/views/config/BaseConfig.py
+++ b/base/views/config/BaseConfig.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/10 14:34
+# @Author  : 程婷婷
+# @FileName: BaseConfig.py
+# @Software: PyCharm
+import yaml
+
+
+class BaseConfig:
+
+    def __init__(self, config_path):
+        self._config_path = config_path
+        self._parsed_file = self.load_config()
+
+    def load_config(self):
+        print(self._config_path)
+        with open(self._config_path) as yaml_file:
+            parsed_file = yaml.load(yaml_file, Loader=yaml.FullLoader)
+        return parsed_file
+
+# if __name__ == '__main__':
+#     bc = BaseConfig()
+#     print(bc._parsed_file)
+#     print(bc.load_config()['data_path'])
+#     print(bc.load_config()['embedding'])
+#     print(bc.load_config()['model'])
+
--- a/base/views/config/__init__.py
+++ b/base/views/config/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/base/views/data/BaseDataLoader.py
+++ b/base/views/data/BaseDataLoader.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 9:58
+# @Author  : 程婷婷
+# @FileName: BaseDataLoader.py
+# @Software: PyCharm
+import pandas as pd
+from base.views.config.BaseConfig import BaseConfig
+class BaseDataLoader:
+    def __init__(self, config_path):
+        self.data_loader_config = BaseConfig(config_path)._parsed_file['data_loader']
+
+    def read_file(self):
+        symbol = self.data_loader_config['dataset_path'].split('.')[-1]
+        if (symbol == 'xlsx') or (symbol == 'xls'):
+            df = pd.read_excel(r''+self.data_loader_config['dataset_path'])
+        elif symbol == '.csv':
+            df = pd.read_csv(r''+self.data_loader_config['dataset_path'], sep='\t')
+        else:
+            print('数据类型错误')
+            return '数据类型错误'
+        df.drop_duplicates(subset='content', keep='first', inplace=True)
+        df.dropna(subset=['content', 'label'], inplace=True)
+        df = df.reset_index(drop=True)
+        print('=================执行正文去重和去空之后共有%d条数据=============' % len(df['content']))
+        return df
+
+    def read_stopwords(self):
+        # 读取停顿词列表
+        stopword_list = [k.strip() for k in open(self.data_loader_config['stopwords_path'], encoding='utf8').readlines() if
+                         k.strip() != '']
+        return stopword_list
--- a/base/views/data/BaseDataProcess.py
+++ b/base/views/data/BaseDataProcess.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/10 15:28
+# @Author  : 程婷婷
+# @FileName: BaseDataProcess.py
+# @Software: PyCharm
+import re
+import jieba
+import pickle
+import gensim
+import logging
+import numpy as np
+import pandas as pd
+from pyhanlp import *
+from bs4 import BeautifulSoup
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.feature_extraction.text import TfidfTransformer
+from sklearn.model_selection import train_test_split
+from sklearn.feature_selection import mutual_info_classif, SelectPercentile
+from base.views.config.BaseConfig import BaseConfig
+from base.views.data.BaseDataLoader import BaseDataLoader
+from platform_zzsn.settings import BASE_DIR
+
+format = '%(asctime)s %(levelname)s %(pathname)s %(funcName)s %(message)s'
+logging.basicConfig(format=format, level=logging.INFO)
+
+
+class BaseDataProcess:
+    def __init__(self, config_path):
+        self.embedding_config = BaseConfig(config_path)._parsed_file['embedding']
+        self.process_config = BaseConfig(config_path)._parsed_file['data_process']
+        PerceptronLexicalAnalyzer = JClass('com.hankcs.hanlp.model.perceptron.PerceptronLexicalAnalyzer')
+        self.pla_segment = PerceptronLexicalAnalyzer()
+        self.bdl = BaseDataLoader(config_path)
+
+    def clean_content(self, content):
+        bs = BeautifulSoup(content, 'html.parser')
+        return bs.text
+
+    def remove_char(self, content):
+        # 保留中文、英语字母、数字和标点
+        graph_filter = re.compile(r'[^\u4e00-\u9fa5a-zA-Z0-9\s，。\.,？\?!！；;]')
+        content = graph_filter.sub('', content)
+        return content
+
+    def jieba_tokenizer(self, content):
+        if self.process_config['use_stopwords']:
+            stopwords = self.bdl.read_stopwords()
+        else:
+            stopwords = []
+        return ' '.join([word for word in jieba.lcut(content) if word not in stopwords])
+
+    def pla_tokenizer(self, content):
+        words = list(self.pla_segment.analyze(content).toWordArray())
+        if self.process_config['use_stopwords']:
+            stopwords = self.bdl.read_stopwords()
+        else:
+            stopwords = []
+        return ' '.join([word for word in words if word not in stopwords])
+
+    def save(self, voc, path):
+        with open(path, 'wb') as voc_file:
+            pickle.dump(voc, voc_file)
+
+    def process(self, data, min_content=0):
+        processed_data = []
+        for record in data:
+            record = self.clean_content(str(record))
+            record = self.remove_char(record)
+            if len(record) > min_content:
+                methods = self.process_config['tokenizer']
+                if methods == 'PerceptronLexicalAnalyzer':
+                    record = self.pla_tokenizer(record)
+                    record = [row.strip() for row in record if row.strip() != '']
+                else:
+                    record = self.jieba_tokenizer(record)
+                    record = [row.strip() for row in record if row.strip() != '']
+                processed_data.append(' '.join(record))
+            else:
+                pass
+        return processed_data
+
+    def split_dataset(self, data, use_dev):
+        if use_dev:
+            train_data_set, test_dev_set = train_test_split(data,
+                                                            train_size=self.process_config['train_size'],
+                                                            random_state=self.process_config['random_state'],
+                                                            shuffle=True)
+
+            train_data_set, test_data_set, dev_data_set = train_test_split(test_dev_set,
+                                                                           test_size=self.process_config['test_size'],
+                                                                           random_state=self.process_config['random_state'],
+                                                                           shuffle=True)
+            print(len(train_data_set) + len(test_data_set) + len(dev_data_set))
+            return train_data_set, test_data_set, dev_data_set
+        else:
+            train_data_set, test_data_set = train_test_split(data,
+                                                             train_size=self.process_config['train_size'],
+                                                             random_state=self.process_config['random_state'],
+                                                             shuffle=True)
+            return train_data_set, test_data_set
+
+    def bag_of_words(self, data, label):
+        vectorizer = CountVectorizer(ngram_range=(1, 1), min_df=5)
+        x = vectorizer.fit_transform(data)
+        transformer = TfidfTransformer(norm=self.embedding_config['norm'], use_idf=self.embedding_config['use_idf'],
+                                       smooth_idf=self.embedding_config['smooth_idf'])
+        x = transformer.fit_transform(x).toarray()
+        if self.embedding_config['with_feature_selection']:
+            transformed_data = SelectPercentile(mutual_info_classif, 20).fit_transform(x, label)
+        else:
+            transformed_data = x
+        os.makedirs(self.embedding_config['embedding_path'], exist_ok=True)
+        self.save(voc=vectorizer.vocabulary_, path=os.path.join(self.embedding_config['embedding_path'], 'tfidf.pkl'))
+        return transformed_data, vectorizer.get_feature_names()
+
+    def word2vec(self, data, feature_words):
+        model = gensim.models.word2vec.Word2Vec(sentences=data,
+                                                size=self.embedding_config['size'],
+                                                window=self.embedding_config['window'],
+                                                min_count=self.embedding_config['min_count'],
+                                                workers=self.embedding_config['workers'],
+                                                sg=self.embedding_config['sg'],
+                                                iter=self.embedding_config['iter'])
+        vocabulary_w2v = model.wv.vocab.keys()
+        count = 0
+        if self.embedding_config['use_Tencent']:
+            model_tencent = gensim.models.KeyedVectors.load_word2vec_format(
+                os.path.join(BASE_DIR, 'static/base/Tencent_AILab_ChineseEmbedding.bin'), binary=True)
+            vocabulary_tencent = model_tencent.wv.vocab.keys()
+            vector_matrix = np.zeros((len(feature_words), int(self.embedding_config['size']) + 200))
+            for word in feature_words:
+                if word in vocabulary_tencent:
+                    vector_tencent = model_tencent.wv.word_vec(word)
+                else:
+                    vector_tencent = np.random.randn(200)
+                if word in vocabulary_w2v:
+                    vector_w2v = model.wv.word_vec(word)
+                else:
+                    vector_w2v = np.random.randn(self.embedding_config['size'])
+                vector = np.concatenate((vector_tencent, vector_w2v))
+                vector_matrix[count] = vector
+                count += 1
+        else:
+            vector_matrix = np.zeros((len(feature_words), self.embedding_config['size']))
+            for word in feature_words:
+                if word in vocabulary_w2v:
+                    vector_w2v = model.wv.word_vec(word)
+                else:
+                    vector_w2v = np.random.randn(self.embedding_config['size'])
+                vector_matrix[count] = vector_w2v
+                count += 1
+        os.makedirs(self.embedding_config['embedding_path'], exist_ok=True)
+        model.save(os.path.join(self.embedding_config['embedding_path'], 'word2vec.model'))
+        return vector_matrix
--- a/base/views/data/__init__.py
+++ b/base/views/data/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/base/views/evaluator/BaseEvaluator.py
+++ b/base/views/evaluator/BaseEvaluator.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 16:30
+# @Author  : 程婷婷
+# @FileName: BaseEvaluator.py
+# @Software: PyCharm
+from sklearn.metrics import precision_score, f1_score, recall_score, classification_report
+import logging
+from base.views.config.BaseConfig import BaseConfig
+formats = '%(asctime)s %(levelname)s %(pathname)s %(funcName)s %(message)s'
+logging.basicConfig(format=formats, level=logging.INFO)
+
+
+class BaseEvaluator:
+    def __init__(self, config_path):
+        self.evaluate_config = BaseConfig(config_path)._parsed_file['evaluate']
+
+    def evaluate(self, y_true, y_pred, label_mapping, logger):
+        result = []
+        y_true = list(map(str, y_true))
+        y_pred = list(map(str, y_pred))
+        logger.info('模型评估结果如下：')
+        if not label_mapping:
+            result.append(classification_report(y_true, y_pred))
+            logger.info(classification_report(y_true, y_pred))
+        else:
+            for value in label_mapping.values():
+                print([k for k,v in label_mapping.items() if v == value])
+                p = precision_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
+                r = recall_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
+                f1 = f1_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
+                print({'value': value,'召回率为': r, '精确率为': p, 'F1': f1})
+                logger.info('标签为%s' % [k for k,v in label_mapping.items() if v == value][0])
+                logger.info('精确率为%.2f' %p)
+                logger.info('召回率为%.2f' %r)
+                logger.info('精确率为%.2f' %f1)
+                result.append(str({'label': value,'recall': r, 'precision': p, 'F1': f1}))
+        return ' '.join(result)
+
+# y_true = [0, 1, 2, 0, 1, 2]
+# y_pred = [0, 2, 1, 0, 0, 1]
+# print(BaseEvaluator())
--- a/base/views/evaluator/__init__.py
+++ b/base/views/evaluator/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/base/views/interaction.py
+++ b/base/views/interaction.py
+import os
+import yaml
+import random
+import smtplib
+from email.mime.text import MIMEText
+from django.core.paginator import Paginator
+from email.mime.multipart import MIMEMultipart
+from PIL import Image,ImageFont,ImageDraw,ImageFilter
+from base.models import ModelManage, ServiceManage, VersionManage
+from platform_zzsn.settings import BASE_DIR
+
+
+class Picture:
+    def __init__(self):
+        self.size = (240,60)
+        self.mode='RGB'
+        self.color='white'
+        self.font = ImageFont.truetype(os.path.join(BASE_DIR,
+                                                    'static/common/font/arial.ttf'), 36) #设置字体大小
+
+    def randChar(self):
+        basic='23456789abcdefghijklmnpqrstwxyzABCDEFGHIJKLMNPQRSTWXYZ'
+        return basic[random.randint(0,len(basic)-1)] #随机字符
+
+    def randBdColor(self):
+        return (random.randint(64,255),random.randint(64,255),random.randint(64,255)) #背景
+
+    def randTextColor(self):
+        return (random.randint(32, 127), random.randint(32, 127), random.randint(32, 127)) #随机颜色
+
+    def proPicture(self):
+        new_image=Image.new(self.mode,self.size,self.color) #创建新图像有三个默认参数:尺寸,颜色,模式
+        drawObject=ImageDraw.Draw(new_image) #创建一个可以对image操作的对象
+        line_num = random.randint(4,6) # 干扰线条数
+        for i in range(line_num):
+            #size=(240,60)
+            begin = (random.randint(0, self.size[0]), random.randint(0, self.size[1]))
+            end = (random.randint(0, self.size[0]), random.randint(0, self.size[1]))
+            drawObject.line([begin, end], self.randTextColor())
+
+        for x in range(240):
+            for y in range(60):
+                tmp = random.randint(0,50)
+                if tmp>30: #调整干扰点数量
+                    drawObject.point((x,y),self.randBdColor())
+
+        randchar=''
+        for i in range(5):
+            rand=self.randChar()
+            randchar+=rand
+            drawObject.text([50*i+10,10],rand,self.randTextColor(),font=self.font) #写入字符
+
+        new_image = new_image.filter(ImageFilter.SHARPEN) # 滤镜
+
+        return new_image,randchar
+
+
+def update_config_file(config_path, config_file):
+    data = yaml.load(config_file, Loader=yaml.FullLoader)
+    data['data_loader'] = {}
+    model_path = data['model']['model_path']
+    model_name = data['model']['model_name']
+    if data['model']['model_path']:
+        data['model']['model_path'] = os.path.join(config_path, model_path)
+    else:
+        data['model']['model_path'] = os.path.join(config_path, model_name)
+        print(data['model']['model_path'])
+    embedding_path = data['embedding']['embedding_path']
+    if embedding_path:
+        data['embedding']['embedding_path'] = os.path.join(config_path, data['embedding']['embedding_path'])
+    else:
+        if data['embedding']['name']:
+            data['embedding']['embedding_path'] = os.path.join(config_path, data['embedding']['name'])
+    tokenizer_path = data['embedding']['tokenizer_path']
+    if tokenizer_path:
+        data['embedding']['tokenizer_path'] = os.path.join(config_path, data['embedding']['tokenizer_path'])
+    try:
+        test_file_path = data['data_process']['test_file_path']
+        train_file_path = data['data_process']['train_file_path']
+    except KeyError:
+        pass
+    else:
+        data['data_process']['test_file_path'] = os.path.join(config_path, test_file_path)
+        data['data_process']['train_file_path'] = os.path.join(config_path, train_file_path)
+    for file in os.listdir(config_path):
+        if ('.xls' == file[-4:]) or ('.xlsx' == file[-5:]):
+            xlsx_path = os.path.join(config_path, file)
+            data['data_loader']['dataset_path'] = xlsx_path
+    if 'save_fname' in data['runner'].keys():
+        data['runner']['save_fpath'] = os.path.join(config_path, data['runner']['save_fname'])
+    data['data_loader']['stopwords_path'] = os.path.join(BASE_DIR, 'static/base/baidu_stopwords.txt')
+
+    file_path = os.path.join(config_path, 'config.yaml')
+    with open(file_path, 'w') as yaml_file:
+        yaml.safe_dump(data, yaml_file, default_flow_style=False)
+    return file_path
+
+def select_manage(task_name, function_type, model_type, begin_cdate, end_cdate, page_size, current_page):
+    condition = {'task_name': task_name, 'function_type': function_type, 'model_type': model_type,
+                 'create_date__range': (begin_cdate, end_cdate,)
+                 }
+    del_keys = []
+    for key in condition.keys():
+        if not condition[key]:
+            del_keys.append(key)
+    if not condition['create_date__range'][0]:
+        del_keys.append('create_date__range')
+    for key in del_keys:
+        condition.pop(key)
+    managers = ModelManage.objects.filter(**condition).order_by('-create_date')
+    len_managers = len(managers)
+    page = Paginator(managers, page_size)
+    maxpages = page.num_pages  # 最大页数
+    pIndex = int(current_page)
+    # 判断页数是否越界
+    if pIndex > maxpages:
+        pIndex = maxpages
+    manager_list = page.page(pIndex)  # 当前页数据
+    return list(manager_list), len_managers
+
+def select_version(model_id, begin_cdate, end_cdate, page_size, current_page):
+    condition = {'model_id': model_id,
+                 'create_date__range': (begin_cdate, end_cdate,)
+                 }
+    del_keys = []
+    if not condition['create_date__range'][0]:
+        del_keys.append('create_date__range')
+    for key in del_keys:
+        condition.pop(key)
+    versions = VersionManage.objects.filter(**condition).order_by('-create_date')
+    len_versions = len(versions)
+    page = Paginator(versions, page_size)
+    maxpages = page.num_pages  # 最大页数
+    pIndex = int(current_page)
+    # 判断页数是否越界
+    if pIndex > maxpages:
+        pIndex = maxpages
+    version_list = page.page(pIndex)  # 当前页数据
+    return list(version_list), len_versions
+
+def select_service_manage(name, begin_cdate, end_cdate, state, username, page_size, current_page):
+    condition = {
+        'name': name,
+        'state': state,
+        'create_date__range': (begin_cdate, end_cdate),
+        'username': username,
+                 }
+    del_keys = []
+    for key in condition.keys():
+        if not condition[key]:
+            del_keys.append(key)
+    if not condition['create_date__range'][0]:
+        del_keys.append('create_date__range')
+    for key in del_keys:
+        condition.pop(key)
+    print(condition)
+    service_managers = ServiceManage.objects.filter(**condition).order_by('-create_date')
+    len_service_managers = len(service_managers)
+    page = Paginator(service_managers, page_size)
+    maxpages = page.num_pages
+    pIndex = int(current_page)
+    # 判断页数是否越界
+    if pIndex > maxpages:
+        pIndex = maxpages
+    manager_list = page.page(pIndex)  # 当前页数据
+    return list(manager_list), len_service_managers
+
+def sendMail(user,pwd,sender,receiver,msg_title):
+    mail_host = "smtp.163.com"   #163的SMTP服务器
+    message = MIMEMultipart('alternative')
+    #设置邮件的发送者
+    message["From"] = sender
+    #设置邮件的接收方
+    message["To"] = ",".join(receiver)
+    #4.设置邮件的标题
+    message["Subject"] = msg_title
+    # 添加plain格式的文本
+    # message.attach(MIMEText('您好，\n'
+    #                         '   您当前的密码为%s, 为了保证您的账号安全，请尽快登陆重置您的密码'%msg_content, 'plain', 'utf-8'))
+    # 添加html内容
+    message.attach(MIMEText('<html>'
+                                '<body>'
+                                    '<h1>Hello </h1><br> '
+                                    '<h3>To ensure the security of your account, please log in and reset your password as soon as possible.</h3>'
+                                    '<h2><a href="http://192.168.1.149:8020/reset_password/">点此重置</a></h2>'
+                                    '</body>'
+                            '</html>', 'html', 'utf-8'))
+    #1.启用服务器发送邮件
+    smtpObj = smtplib.SMTP_SSL(mail_host,465)
+    #2.登录邮箱进行验证
+    smtpObj.login(user,pwd)
+    #3.发送邮件
+    #参数：发送方，接收方，邮件信息
+    smtpObj.sendmail(sender,receiver,message.as_string())
+    return True
--- a/base/views/loss/BaseLoss.py
+++ b/base/views/loss/BaseLoss.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 16:29
+# @Author  : 程婷婷
+# @FileName: BaseLoss.py
+# @Software: PyCharm
--- a/base/views/loss/__init__.py
+++ b/base/views/loss/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/base/views/model/BaseModel.py
+++ b/base/views/model/BaseModel.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 16:18
+# @Author  : 程婷婷
+# @FileName: BaseModel.py
+# @Software: PyCharm
+from base.views.config.BaseConfig import BaseConfig
+import os
+import pickle
+
+
+class BaseModel:
+    def __init__(self,config_path):
+        self.model_config = BaseConfig(config_path)._parsed_file['model']
+
+    def building_model(self, *params):
+        pass
+
+    def save(self, model):
+        dir = os.path.dirname(self.model_config['model_path'])
+        if not os.path.exists(dir):
+            os.makedirs(dir)
+        with open(self.model_config['model_path'], 'wb') as model_file:
+            pickle.dump(model, model_file)
+
+    def predict(self, model, X):
+        proba = model.predict_proba(X)
+        y_predict = model.predict(X)
+        return {'proba': proba, 'y_predict': y_predict}
\ No newline at end of file
--- a/base/views/model/__init__.py
+++ b/base/views/model/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/base/views/runner/BaseRunner.py
+++ b/base/views/runner/BaseRunner.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 16:36
+# @Author  : 程婷婷
+# @FileName: BaseRunner.py
+# @Software: PyCharm
+from base.views.config.BaseConfig import BaseConfig
+
+
+class BaseRunner:
+    def __init__(self,config_path):
+        self.runner_config = BaseConfig(config_path)._parsed_file['runner']
+
+    def train(self, logger):
+        pass
--- a/base/views/runner/__init__.py
+++ b/base/views/runner/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/base/views/runner/test.py
+++ b/base/views/runner/test.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 9:24
+# @Author  : 程婷婷
+# @FileName: test.py
+# @Software: PyCharm
+import jieba
+import re
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.datasets import load_digits
+from sklearn.feature_selection import SelectPercentile, chi2
+X, y = load_digits(return_X_y=True)
+print(X.shape)
+print(X[:10], y[:100])
+X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)
+print(X_new.shape)
+print(X_new[:10])
+
+
+
--- a/base/views/token_authorize.py
+++ b/base/views/token_authorize.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/20 16:58
+# @Author  : 程婷婷
+# @FileName: token_authorize.py
+# @Software: PyCharm
+import jwt
+import time
+import functools
+from jwt import exceptions
+from django.http import JsonResponse
+from platform_zzsn.settings import *
+
+global SECRET_KEY
+SECRET_KEY = SECRET_KEY
+# 定义签名密钥，用于校验jwt的有效、合法性
+
+def create_token(user):
+    '''基于jwt创建token的函数'''
+    headers = {
+        "alg": "HS256",
+        "typ": "JWT"
+    }
+    exp = int(time.time() + 3*60*60)
+    payload = {
+        "id": user.id,
+        "name": user.username,
+        "exp": exp
+    }
+    token = jwt.encode(payload=payload, key=SECRET_KEY, algorithm='HS256', headers=headers).decode('utf-8')
+    return token
+
+def login_required(view_func):
+    @functools.wraps(view_func)
+    def validate_token(request, *args, **kwargs):
+        '''校验token的函数，校验通过则返回解码信息'''
+        payload = None
+        msg = None
+        try:
+            token = request.META.get("HTTP_AUTHORIZATION")
+            payload = jwt.decode(token, SECRET_KEY, True, algorithm='HS256')
+            print(payload)
+            return view_func(request, *args, **kwargs)
+            # jwt有效、合法性校验
+        except exceptions.ExpiredSignatureError:
+            return JsonResponse({
+                'handle_msg': 'failure',
+                'is_handle_success': False,
+                'logs': '登录已过期'
+            })
+        except jwt.DecodeError:
+            return JsonResponse({
+                'handle_msg': 'failure',
+                'is_handle_success': False,
+                'logs': '缺少参数token'
+            #     token认证失败
+            })
+        except jwt.InvalidTokenError:
+            return JsonResponse({
+                'handle_msg': 'failure',
+                'is_handle_success': False,
+                'logs': '缺少参数token'
+            #     非法的token
+            })
+    return validate_token
\ No newline at end of file
--- a/base/views/utils.py
+++ b/base/views/utils.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/9 11:19
+# @Author  : 程婷婷
+# @FileName: utils.py
+# @Software: PyCharm
+import os
+import re
+import jieba
+import zipfile
+import pandas as pd
+from docx import Document
+from platform_zzsn.settings import *
+
+
+def read_txt(path):
+    with open(path, 'r', encoding='utf8') as file:
+        lines = file.readlines()
+    return lines
+
+def read_docx(pending_file, user_file):
+    jieba.load_userdict(user_file)
+    document = Document(pending_file)
+    doc_text_list = []
+    for para in document.paragraphs:
+        para_text = re.sub(r'\s', '', para.text)
+        if para_text:
+            doc_text_list.append(para_text)
+    return doc_text_list
+
+def read_excel(pending_file, user_file):
+    jieba.load_userdict(user_file)
+    doc_text_list = pd.read_excel(pending_file)['content']
+    doc_text_list.dropna(inplace=True)
+    return doc_text_list
+
+def merge_para(paras):
+    new_paras = []
+    for i, para in enumerate(paras):
+        if not new_paras:
+            new_paras.append(para)
+        elif (len(new_paras[-1]) < 500):
+            new_paras[-1] += para
+        else:
+            new_paras.append(para)
+    return new_paras
+
+def filter_stopwords(para):
+    path = os.path.join(BASE_DIR, 'static/base/baidu_stopwords.txt')
+    stopword_list = [k.strip() for k in read_txt(path) if
+                     k.strip() != '']
+    words = [word for word in jieba.lcut(para) if word not in stopword_list]
+    return words
+
+# 获取列表的第二个元素
+def takeSecond(elem):
+    return elem[1]
+
+def takeFirst_len(elem):
+    return len(elem[0])
+
+def make_zip(file_dir: str, zip_path: str) -> None:
+    zip_f = zipfile.ZipFile(zip_path, 'w')
+    pre_len = len(os.path.dirname(file_dir))
+    for parent, dir_names, filenames in os.walk(file_dir):
+        for filename in filenames:
+            path_file = os.path.join(parent, filename)
+            arc_name = path_file[pre_len:].strip(os.path.sep)
+            zip_f.write(path_file, arc_name)
+    zip_f.close()
+
--- a/base/views/views.py
+++ b/base/views/views.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/14 10:32
+# @Author  : 程婷婷
+# @FileName: views.py
+# @Software: PyCharm
+import os
+import base64
+import shutil
+import logging
+import datetime
+import tempfile
+import zipfile
+from io import BytesIO
+from django.db import transaction
+from wsgiref.util import FileWrapper
+from django.core.paginator import Paginator
+from werkzeug.utils import secure_filename
+from django.forms.models import model_to_dict
+from django.http import JsonResponse, HttpResponse
+from django.core.files.storage import default_storage
+from django.views.decorators.http import require_POST
+from base.views import interaction, utils
+from base.views.token_authorize import *
+from base.models import User, ModelManage, ServiceManage, SubjectManage, VersionManage
+from classify.views.textcnn_classify.TextcnnClassifyRunner import TextcnnClassifyRunner
+from classify.views.xgboost_classify.XgboostClassifyRunner import XgboostClassifyRunner
+from classify.views.logistic_classify.LogisticClassifyRunner import LogisticClassifyRunner
+from classify.views.fasttext_classify.FastTextRunner import FastTextRunner
+# from classify.flair_classify.FlairClassifyRunner import FlairClassifyRunner
+from clustering.views.KMeans.KmeansRunner import KmeansRunner
+from platform_zzsn.settings import BASE_DIR
+print('-----------')
+print(BASE_DIR)
+UPLOAD_FOLDER = os.path.join(BASE_DIR, 'media/')
+ALLOWED_EXTENSIONS = set(['yaml', 'xlsx', 'xls', 'doc', 'docx', 'txt'])
+
+# 登录
+@require_POST
+def login(request):
+    username = request.POST['username']
+    password = request.POST['password']
+    try:
+        user = User.objects.filter(username=username)
+        if not user:
+            return JsonResponse({
+                'handleMsg': 'failure',
+                'isHandleSuccess': False,
+                'logs': '用户不存在！',
+                'resultData': False,
+            })
+        elif user[0].password == password:
+            token = create_token(user[0])
+            return JsonResponse({
+                'token': token,
+                'handleMsg': 'success',
+                'isHandleSuccess': True,
+                'logs': '登陆成功！',
+                'resultData': 'customer',
+            })
+        else:
+            return JsonResponse({
+                'handleMsg': 'failure',
+                'isHandleSuccess': False,
+                'logs': '账号或密码不正确！',
+                'resultData': False,
+            })
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+                'handleMsg': 'failure',
+                'isHandleSuccess': False,
+                'logs': '登陆失败！',
+                'resultData': False,
+        })
+
+# 注册
+@require_POST
+def register_account(request):
+    try:
+        username = request.POST['username']
+        true_name = request.POST['true_name']
+        sex = request.POST['sex']
+        mobile_number = request.POST['mobile_number']
+        mail = request.POST['mail']
+        id_card = request.POST['id_card']
+        password = request.POST['password']
+        account_number = username + '@zzsn.cn'
+        user = User.objects.create(
+            username=username,
+            true_name=true_name,
+            sex=sex,
+            mobile_number=mobile_number,
+            mail=mail,
+            id_card=id_card,
+            password=password,
+            account_number=account_number,
+        )
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '注册失败！',
+            'resultData': False
+    })
+    else:
+        return JsonResponse({
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '注册成功！',
+            'resultData': True,
+        })
+
+# 核查用户名
+@require_POST
+def verify_username(request):
+    try:
+        username = request.POST['username']
+        usernames = User.objects.values_list('username', flat=True)
+        if username in usernames:
+            return JsonResponse({
+                'handleMsg': 'failure',
+                'isHandleSuccess': False,
+                'logs': '该用户名已存在！',
+                'resultData': False,
+            })
+        else:
+            return JsonResponse({
+                'handleMsg': 'success',
+                'isHandleSuccess': True,
+                'logs': '此用户名可用！',
+                'resultData': True
+            })
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '用户名对比失败！',
+            'resultData': False
+        })
+
+# 重置密码
+@require_POST
+def reset_password(request):
+    username = request.POST['username']
+    password = request.POST['password']
+    try:
+        user = User.objects.get(username=username)
+        user.password = password
+        user.save()
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '重置密码失败！',
+            'resultData': False
+        })
+    else:
+        return JsonResponse({
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '重置密码成功！',
+            'resultData': True,
+        })
+
+@require_POST
+@login_required
+def show_config_file(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    model_type = request.POST['model_type']
+    try:
+        path = os.path.join(BASE_DIR, r'static/common/config_data/'+ model_type + '.yaml')
+        data = utils.read_txt(path)
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '配置文件加载失败！',
+            'resultData': False,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '配置文件加载成功！',
+            'resultData': ''.join(data),
+        })
+
+@require_POST
+@login_required
+def show_service_file(request):
+    # token = request.META.get("HTTP_AUTHORIZATION")
+    service_type = request.POST['service_type']
+    service_name = request.POST['service_name']
+    example_dir = os.path.join(BASE_DIR, 'static/common/', service_type, service_name)
+    temp = tempfile.TemporaryFile()
+    archive = zipfile.ZipFile(temp, 'w', zipfile.ZIP_DEFLATED)
+    print(example_dir)
+    filenames = os.listdir(example_dir)
+    for filename in filenames:
+        archive.write(os.path.join(example_dir, filename), filename)
+    archive.close()
+    lenth = temp.tell()
+    temp.seek(0)
+    wrapper = FileWrapper(temp)
+    response = HttpResponse(wrapper, content_type='application/zip')
+    response['Content-Disposition'] = 'attachment; filename=example.zip'
+    response['Content-Length'] = lenth
+    return response
+
+
+@require_POST
+@login_required
+@transaction.atomic
+def delete_file_row_manage(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    path_timestamp = request.POST['path_timestamp']
+    try:
+        path = os.path.join(UPLOAD_FOLDER, str(path_timestamp) + '/')
+        if os.path.exists(path):
+            shutil.rmtree(path)
+        version_manage = VersionManage.objects.get(path=path_timestamp)
+        if version_manage:
+            model_id = version_manage.model.id
+            print(model_id)
+            all_version = VersionManage.objects.filter(model_id=model_id)
+            if len(all_version) == 1:
+                version_manage.delete()
+                version_manage.model.delete()
+            elif version_manage.state == '训练成功':
+                    model_manage = version_manage.model
+                    version_manage.delete()
+                    model_manage.version_num = max(0, model_manage.version_num - 1)
+                    model_manage.save()
+            else:
+                version_manage.delete()
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '删除失败！',
+            'resultData': False,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '删除成功！',
+            'resultData': True,
+        })
+
+@require_POST
+@login_required
+def file_upload(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    files = request.FILES.getlist('files')
+    path_timestamp = request.POST['path_timestamp']
+    if not path_timestamp:
+        path_timestamp = int(round(time.time() * 1000000))
+    path = os.path.join(UPLOAD_FOLDER, str(path_timestamp))
+    try:
+        for file in files:
+            print('上传文件名称为%s' % file.name)
+            if file and (file.name.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS):
+                filename = secure_filename(file.name)
+                os.makedirs(path, exist_ok=True)
+                default_storage.save(os.path.join(path, filename), file)
+            else:
+                return JsonResponse({
+                    'token': token,
+                    'handleMsg': 'failure',
+                    'isHandleSuccess': False,
+                    'logs': '数据文件格式错误！',
+                    'resultData': False,
+                })
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '文件上传失败！',
+            'resultData': False,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '文件上传成功！',
+            'resultData': str(path_timestamp),
+        })
+
+@require_POST
+@login_required
+def download_zip(request):
+    """
+    最终可用 实现功能：zip打包 下载 删除
+    :param filename:
+    :return:
+    """
+    path_timestamp = request.POST['path_timestamp']
+    token = request.META.get("HTTP_AUTHORIZATION")
+    print(path_timestamp)
+    file_dir = os.path.join(UPLOAD_FOLDER, path_timestamp)
+    try:
+        if not os.path.exists(file_dir):
+            return JsonResponse({
+                'token': token,
+                'handleMsg': 'failure',
+                'isHandleSuccess': False,
+                'logs': '该文件夹不存在！',
+                'resultData': False
+            })
+        else:
+            temp = tempfile.TemporaryFile()
+            archive = zipfile.ZipFile(temp, 'w', zipfile.ZIP_DEFLATED)
+            num = 0
+            sub_dirs = []
+            for cur_dir, dirs, files in os.walk(file_dir):
+                if num == 0:
+                    sub_dirs = dirs
+                    num += 1
+                for file in files:
+                    sub_dir = os.path.split(cur_dir)[-1]
+                    if sub_dir in sub_dirs:
+                        archive.write(os.path.join(cur_dir, file), os.path.join(sub_dir, file))
+                    else:
+                        archive.write(os.path.join(cur_dir, file), file)
+            archive.close()
+            lenth = temp.tell()
+            temp.seek(0)
+            wrapper = FileWrapper(temp)
+            response = HttpResponse(wrapper, content_type='application/zip')
+            response['Content-Disposition'] = 'attachment; filename=archive.zip'
+            response['Content-Length'] = lenth
+            return response
+    except Exception as e:
+            print(e)
+            return JsonResponse({
+                'token': token,
+                'handleMsg': 'failure',
+                'isHandleSuccess': False,
+                'logs': '下载失败！',
+                'resultData': False
+            })
+
+
+@require_POST
+def forget_password(request):
+    try:
+        username = request.POST['username']
+        mobile_number = request.POST['mobile_number']
+        mail = request.POST['mail']
+        user = User.objects.get(username=username)
+        if user.mobile_number == mobile_number:
+            if user.mail == mail:
+                mail_username = "15617380221@163.com"
+                mail_pwd = "2698641198cjh"
+                mail_sender = "15617380221@163.com"
+                mail_receiver = [mail]
+                email_title = "郑州数能AI算法小组"
+                interaction.sendMail(mail_username, mail_pwd, mail_sender, mail_receiver, email_title)
+                return JsonResponse({
+                    'handleMsg': 'success',
+                    'isHandleSuccess': True,
+                    'logs': '个人信息验证成功，密码已发至邮箱！',
+                    'resultData': True
+                })
+            else:
+                return JsonResponse({
+                    'handleMsg': 'failure',
+                    'isHandleSuccess': False,
+                    'logs': '邮箱账号填写错误！',
+                    'resultData': False
+                })
+        else:
+            return JsonResponse({
+                'handleMsg': 'failure',
+                'isHandleSuccess': False,
+                'logs': '手机号填写错误！',
+                'resultData': False
+            })
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '信息对比失败！',
+            'resultData': False
+        })
+
+@require_POST
+@login_required
+def show_log_file(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    path_timestamp = request.POST['path_timestamp']
+    try:
+        path = UPLOAD_FOLDER + path_timestamp
+        files = [filename for filename in os.listdir(path) if 'log' in filename]
+        log_path = os.path.join(path, files[0])
+        data = utils.read_txt(log_path)
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '日志文件加载失败！',
+            'resultData': False,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '日志文件加载成功！',
+            'resultData': ''.join(data),
+        })
+
+
+@require_POST
+def validate_code(request):
+    pic = interaction.Picture()
+    img, code = pic.proPicture()
+    output_buffer = BytesIO()
+    img.save(output_buffer, format='JPEG')
+    byte_data = output_buffer.getvalue()
+    base64_str = base64.b64encode(byte_data)
+    base64_str = 'data:image/jpg;base64,' + str(base64_str, 'utf-8')
+    data = {'img': base64_str, 'code': code}
+    return JsonResponse({
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '生成成功！',
+        'resultData': data,
+    })
+
+@require_POST
+@login_required
+@transaction.atomic
+def run_train(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    task_name = request.POST['task_name']
+    function_type = request.POST['function_type']
+    model_type = request.POST['model_type']
+    path_timestamp = request.POST['path_timestamp']
+    config_file = request.POST['config_file']
+    version_num = request.POST['version_num']
+    model_id = request.POST['model_id']
+    creator = request.POST['creator']
+    create_time = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+    config_path = os.path.join(UPLOAD_FOLDER, path_timestamp)
+    logger = logging.getLogger(path_timestamp)
+    logger.setLevel(logging.INFO)
+    fh = logging.FileHandler(os.path.join(config_path,'train.log'), encoding='utf8')
+    ch = logging.StreamHandler()
+    # formatter = logging.Formatter(
+    #     '[%(asctime)s][%(thread)d][%(filename)s][line: %(lineno)d][%(levelname)s] ## %(message)s')
+    # fh.setFormatter(formatter)
+    # ch.setFormatter(formatter)
+    logger.addHandler(fh)
+    logger.addHandler(ch)
+    if not version_num:
+        version_num = 0
+        new_version = 1
+    else:
+        new_version = 0
+    if not model_id:
+        model_manage = ModelManage.objects.create(
+            task_name=task_name,
+            function_type=function_type,
+            model_type=model_type,
+            version_num=int(version_num),
+            create_date=create_time,
+        )
+        model_id = max(ModelManage.objects.values_list('id', flat=True))
+    else:
+        model_manage = ModelManage.objects.get(id=model_id)
+    if not new_version:
+        versions = VersionManage.objects.filter(model_id=model_id)
+        new_version = max([int(version.version.replace('V', '')) for version in versions])+1
+    version_manage = VersionManage.objects.create(model_id=model_id,
+                                                  version='V'+str(new_version),
+                                                  create_date=create_time,
+                                                  state='正在训练',
+                                                  creator=creator,
+                                                  path=path_timestamp,
+    )
+    try:
+        config_path = interaction.update_config_file(config_path, config_file)
+        print(config_path)
+        train_dict = {'fasttext': FastTextRunner(config_path),
+                      'xgboost': XgboostClassifyRunner(config_path),
+                      'logistic': LogisticClassifyRunner(config_path),
+                      # 'flair': FlairClassifyRunner(config_path),
+                      'textcnn': TextcnnClassifyRunner(config_path),
+                      'kmeans': KmeansRunner(config_path)}
+        train_dict[model_type].train(logger)
+        end_time = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+        version_manage.end_date = end_time
+        version_manage.state = '训练成功'
+        version_manage.save()
+        model_manage.version_num = int(version_num) + 1
+        model_manage.save()
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '训练成功！',
+            'resultData': True,
+        })
+    except Exception as e:
+        print(e)
+        end_time = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+        version_manage.end_date = end_time
+        version_manage.state = '训练失败'
+        version_manage.save()
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': str(e),
+            'resultData': False,
+        })
+    finally:
+        logging.Logger.manager.loggerDict.pop(path_timestamp)
+        logger.manager = None
+        logger.handlers = []
+
+
+@require_POST
+@login_required
+def query_manage(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    try:
+        task_name = request.POST['task_name']
+        function_type = request.POST['function_type']
+        model_type = request.POST['model_type']
+        begin_cdate = request.POST['begin_date']
+        end_cdate = request.POST['end_date']
+        page_size = request.POST['page_size']
+        current_page = request.POST['current_page']
+        manager_list, len_managers = interaction.select_manage(task_name, function_type, model_type, begin_cdate, end_cdate, page_size, current_page)
+        # manager_list = [model_to_dict(manager) for manager in manager_list]
+        manager_list = [ModelManage.toDict(manager) for manager in manager_list]
+        result = {'current_page': int(current_page), 'page_size': int(page_size), 'data': manager_list, 'total': len_managers}
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '查询失败',
+            'resultData': False,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '查询成功',
+            'resultData': result,
+        })
+
+@require_POST
+@login_required
+def query_version(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    try:
+        begin_cdate = request.POST['begin_date']
+        end_cdate = request.POST['end_date']
+        model_id = request.POST['model_id']
+        page_size = request.POST['page_size']
+        current_page = request.POST['current_page']
+        version_list, len_versions = interaction.select_version(model_id, begin_cdate, end_cdate, page_size, current_page)
+        # manager_list = [model_to_dict(manager) for manager in manager_list]
+        manager_list = [VersionManage.toDict(version) for version in version_list]
+        print(manager_list)
+        result = {'current_page': int(current_page), 'page_size': int(page_size), 'data': manager_list, 'total': len_versions}
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '查询失败',
+            'resultData': False,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '查询成功',
+            'resultData': result,
+        })
+
+@require_POST
+@login_required
+def query_service_manage(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    try:
+        name = request.POST['name']
+        begin_cdate = request.POST['begin_date']
+        end_cdate = request.POST['end_date']
+        state = request.POST['state']
+        page_size = request.POST['page_size']
+        current_page = request.POST['current_page']
+        username = request.POST['username']
+        manager_list, len_managers = interaction.select_service_manage(
+            name, begin_cdate, end_cdate, state,
+            username, page_size, current_page)
+        # manager_list = [model_to_dict(manager) for manager in manager_list]
+        manager_list = [ServiceManage.toDict(manager) for manager in manager_list]
+        result = {'current_page': int(current_page), 'page_size': int(page_size), 'data': manager_list, 'total': len_managers}
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '查询失败',
+            'resultData': False,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '查询成功',
+            'resultData': result,
+        })
+
+@require_POST
+@login_required
+@transaction.atomic
+def delete_file_row_service(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    path_timestamp = request.POST['path_timestamp']
+    try:
+        path = os.path.join(UPLOAD_FOLDER, str(path_timestamp))
+        if os.path.exists(path):
+            shutil.rmtree(path)
+        ServiceManage.objects.filter(path=path_timestamp).delete()
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '删除失败！',
+            'resultData': False,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '删除成功！',
+            'resultData': True,
+        })
+
+@require_POST
+@login_required
+def download_xlsx(request):
+    path_timestamp = request.POST['path_timestamp']
+    path = os.path.join(UPLOAD_FOLDER, path_timestamp)
+    xls_path = os.path.join(path, 'result.xlsx')
+    with open(xls_path, 'rb') as file:
+        data = file.readlines()
+    response = HttpResponse(data, content_type='application/vnd.ms-excel')
+    response['Content-Disposition'] = 'attachment; filename=result.xlsx'
+    return response
+
+@require_POST
+@login_required
+def query_subject(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    current_page = request.POST['current_page']
+    page_size = request.POST['page_size']
+    try:
+        subjects = SubjectManage.objects.all()
+        len_subjects = len(subjects)
+        page = Paginator(subjects, page_size)
+        maxpages = page.num_pages  # 最大页数
+        pIndex = int(current_page)
+        # 判断页数是否越界
+        if pIndex > maxpages:
+            pIndex = maxpages
+        subject_list = [SubjectManage.toDict(subject) for subject in list(page.page(pIndex))] # 当前页数据
+
+        result_data = {'current_page': int(current_page),
+                       'page_size': int(page_size),
+                       'data': subject_list,
+                       'total': len_subjects}
+        print(result_data)
+    except Exception as e:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'fail',
+            'isHandleSuccess': False,
+            'logs': str(e),
+            'resultData': None,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '查询成功！',
+            'resultData': result_data,
+        })
+
+@require_POST
+@login_required
+def query_task_name(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    task_name = request.POST['task_name']
+    try:
+        model_manages = ModelManage.objects.filter(task_name__contains=task_name)[:20]
+        task_names = [ModelManage.toDict(i)['task_name'] for i in model_manages]
+    except Exception as e:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'fail',
+            'isHandleSuccess': False,
+            'logs': str(e),
+            'resultData': None,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '查询成功！',
+            'resultData': task_names,
+        })
\ No newline at end of file
--- a/basic_service/__init__.py
+++ b/basic_service/__init__.py
--- a/basic_service/admin.py
+++ b/basic_service/admin.py
+from django.contrib import admin
+
+# Register your models here.
--- a/basic_service/apps.py
+++ b/basic_service/apps.py
+from django.apps import AppConfig
+
+
+class BasicServiceConfig(AppConfig):
+    name = 'basic_service'
--- a/basic_service/migrations/__init__.py
+++ b/basic_service/migrations/__init__.py
--- a/basic_service/models.py
+++ b/basic_service/models.py
+from django.db import models
+
+
+# Create your models here.
--- a/basic_service/tests.py
+++ b/basic_service/tests.py
+#-*- coding:utf-8 -*-
+
+from django.test import TestCase
+# Create your tests here.
--- a/basic_service/urls.py
+++ b/basic_service/urls.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/12 18:05
+# @Author  : 程婷婷
+# @FileName: urls.py
+# @Software: PyCharm
+from django.urls import path
+from django.conf.urls import url
+from basic_service.views import views
+
+urlpatterns = [
+    url(r'^ner_single', views.ner_single, name='ner_single'),
+    url(r'^doc-similarity-single', views.doc_similarity_single, name='doc_similarity_single'),
+    url(r'^associated-word-single', views.associated_word_single, name='associated_word_single'),
+    url(r'^word_cut', views.word_cut, name='word_cut'),
+    url(r'^word_pos', views.word_pos, name='word_pos'),
+    url(r'^new_word_find', views.new_word_find, name='new_word_find'),
+    url(r'^show_srl', views.show_srl, name='show_srl'),
+    url(r'^show_dep', views.show_dep, name='show_dep'),
+    url(r'^create_keywords', views.create_keywords, name='create_keywords'),
+    url(r'^get_summary', views.get_summary, name='get_summary'),
+    url(r'^word_co_occurrence', views.word_co_occurrence, name='word_co_occurrence')
+]
\ No newline at end of file
--- a/basic_service/views/__init__.py
+++ b/basic_service/views/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/13 10:02
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/basic_service/views/basic.py
+++ b/basic_service/views/basic.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/12 19:54
+# @Author  : 程婷婷
+# @FileName: basic.py
+# @Software: PyCharm
+import os
+import jieba
+import json
+import requests
+import jionlp as jio
+from ltp import LTP
+import jieba.analyse
+import ahocorasick
+import pandas as pd
+from gensim.models.keyedvectors import KeyedVectors
+from platform_zzsn.settings import BASE_DIR
+from model.base.views import utils
+
+General_dict = utils.read_txt(os.path.join(BASE_DIR, 'static/base/dict_sogou.txt'))
+General_dict_ = ''
+for key in General_dict:
+    General_dict_ += ' ' + str(key.strip())
+
+def word_cut(text):
+    ltp = LTP()
+    sentences = ltp.sent_split([text])
+    segment, _ = ltp.seg(sentences)
+    return segment
+
+def word_pos(text):
+    ltp = LTP()
+    sentences = ltp.sent_split([text])
+    segment, hidden = ltp.seg(sentences)
+    pos = ltp.pos(hidden)
+    return segment, pos
+
+class AC_Unicode:
+    """稍微封装一下，弄个支持unicode的AC自动机
+    """
+    def __init__(self):
+        self.ac = ahocorasick.Automaton()
+    def add_word(self, k, v):
+        # k = k.encode('utf-8')
+        return self.ac.add_word(k, v)
+    def make_automaton(self):
+        return self.ac.make_automaton()
+    def iter(self, s):
+        # 搜索文本中存在的单词
+        # s = s.encode('utf-8')
+        return self.ac.iter(s)
+
+def new_words_find(text):
+    words = list(jieba.cut(text, HMM=True))
+    words_copy = words.copy()
+    ac = AC_Unicode()
+    sign = [0] * len(words_copy)
+    for word in words:
+        if len(word) >= 2:
+            ac.add_word(word, word)
+    ac.make_automaton()
+    result_ac = ac.iter(General_dict_)
+    for index, key in result_ac:
+        try:
+            words.remove(key)
+        except:
+            continue
+    for index, word in enumerate(words_copy):
+        if (len(word) >= 2) and (word in words):
+            sign[index] = 1
+    return words_copy, sign
+
+def show_srl(text):
+    ltp = LTP()
+    sentences = ltp.sent_split([text])
+    sentences_srl_dict, sentences_seg_dict = {}, {}
+    for i, sentence in enumerate(sentences):
+        seg, hidden = ltp.seg([sentence])
+        srl = ltp.srl(hidden, keep_empty=False)
+        sentences_seg_dict['句子' + str(i+1)+'：'+str(sentence)] = seg[0]
+        sentences_srl_dict['句子'+str(i+1)+'：'+str(sentence)] = srl[0]
+    return sentences_seg_dict, sentences_srl_dict
+
+def show_dep(text):
+    ltp = LTP()
+    sentences = ltp.sent_split([text])
+    sentences_dep_dict, sentences_seg_dict = {}, {}
+    for i, sentence in enumerate(sentences):
+        seg, hidden = ltp.seg([sentence])
+        dep = ltp.dep(hidden)
+        sentences_seg_dict['句子'+str(i+1)+'：'+str(sentence)] = seg[0]
+        sentences_dep_dict['句子'+str(i+1)+'：'+str(sentence)] = dep[0]
+    return sentences_seg_dict, sentences_dep_dict
+
+def create_keywords(text:str, topK:int, with_weight:bool)->list:
+    print(type(topK))
+    keywords = jio.keyphrase.extract_keyphrase(text, top_k=topK, with_weight=with_weight)
+    print(keywords)
+    return keywords
+
+def ner(text):
+    ltp = LTP()
+    seg, hidden = ltp.seg([text])
+    entity = ltp.ner(hidden)
+    return seg[0], entity[0]
+
+def related_word_recommendation(words, word_num):
+    # print(model.wv.most_similar(words))
+    # print(words.split(','), word_num)
+    print(words)
+    result = model.most_similar_cosmul(words.split(','), topn=int(word_num))  # 余弦相似度
+    print(result)
+    return result
+
+def post_similarity(url, text_1, text_2, sim_algorithm_name):
+    payload = {'text_1': text_1, 'text_2': text_2, 'sim_algorithm_name': sim_algorithm_name}
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    response = requests.request('POST', url, headers=headers, data=json.dumps(payload))
+    data = json.loads(response.text)
+    return data
+
+def summary(text, summary_length):
+    summaries = jio.summary.extract_summary(text, summary_length)
+    return summaries
+# zh_nlp = stanza.Pipeline('zh-hans')
+# en_nlp = stanza.Pipeline('en')
+# nlp_dict = {'zh': zh_nlp, 'en': en_nlp}
+#model = KeyedVectors.load_word2vec_format(os.path.join(BASE_DIR, 'static/base/Tencent_AILab_ChineseEmbedding.bin'), binary=True)
+# if __name__ == '__main__':
+    # print(word_cut('汤姆生病了。他去了医院。'))
+    # print(word_pos('汤姆生病了。他去了医院。'))
+    # print(new_words_find('白月光，形容的是一种可望不可即的人或者事物，虽然一直在心上，却从不在身边。'))
+    # print(new_words_find('爷青回，表示爷的青春又回来了，爷表示的是自己，将自己的身份地位抬高一个档次，像我是你大爷一样，通常用来形容那些知名的人、经典的动画、影视、游戏剧等重新复出或者是回归。'))
+    # show_srl('他叫汤姆去拿外衣。')
+    # print(show_dep('他叫汤姆去拿外衣。'))
--- a/basic_service/views/co_occurrence.py
+++ b/basic_service/views/co_occurrence.py
+# -*- coding: utf-8 -*-
+# @Time : 2021/10/13 17:07
+# @Author : ctt
+# @File : co
+# @Project : platform_zzsn
+from basic_service.views.basic import create_keywords
+import pandas as pd
+import numpy as np
+
+def Get_file_keywords(filepath, topK):
+    data_array = []  # 每篇文章关键词的二维数组
+    set_word = []  # 所有关键词的集合
+    df = pd.read_excel(filepath)
+    sentences = df['内容'].tolist()
+    for sentence in sentences:
+        words = create_keywords(sentence, topK=topK, with_weight=False)
+        data_array.append(str(words))
+        for word in words:
+            if word not in set_word:
+                set_word.append(str(word))
+    set_word = list(set(set_word))  # 所有关键词的集合
+    return data_array, set_word
+
+
+# 初始化矩阵
+def build_matirx(set_word):
+    edge = len(set_word) + 1  # 建立矩阵，矩阵的高度和宽度为关键词集合的长度+1
+    matrix = [[''] * edge] * edge # 初始化矩阵
+    # print(matrix.shape)
+    print(matrix)
+    print(set_word)
+    matrix[0][1:] = np.array(set_word)
+    print(matrix)
+    matrix = list(map(list, zip(*matrix)))
+    print(set_word)
+    matrix[0][1:] = np.array(set_word)  # 赋值矩阵的第一行与第一列
+    return matrix
+
+
+# 计算各个关键词的共现次数
+def count_matrix(matrix, formated_data):
+    for row in range(1, len(matrix)):
+        # 遍历矩阵第一行，跳过下标为0的元素
+        for col in range(1, len(matrix)):
+            # 遍历矩阵第一列，跳过下标为0的元素
+            # 实际上就是为了跳过matrix中下标为[0][0]的元素，因为[0][0]为空，不为关键词
+            if matrix[0][row] == matrix[col][0]:
+                # 如果取出的行关键词和取出的列关键词相同，则其对应的共现次数为0，即矩阵对角线为0
+                matrix[col][row] = str(0)
+            else:
+                counter = 0  # 初始化计数器
+                for ech in formated_data:
+                    # 遍历格式化后的原始数据，让取出的行关键词和取出的列关键词进行组合，
+                    # 再放到每条原始数据中查询
+                    if matrix[0][row] in ech and matrix[col][0] in ech:
+                        counter += 1
+                    else:
+                        continue
+                matrix[col][row] = str(counter)
+    return matrix
+
+def main(filepath, topK):
+    formated_data, set_word = Get_file_keywords(filepath, topK)
+    matrix = build_matirx(set_word)
+    matrix = count_matrix(matrix, formated_data)
+    # data = pd.DataFrame(matrix)
+    return matrix
\ No newline at end of file
--- a/basic_service/views/new_word_recognition.py
+++ b/basic_service/views/new_word_recognition.py
+import re
+import pandas as pd
+from collections import defaultdict, Counter
+import numpy as np
+import ahocorasick
+import math
+
+
+def read_text(file_articles, encoding='utf8'):
+    texts = set()
+    with open(file_articles, encoding=encoding) as f:
+        for line in f.readlines():
+            line = re.split(u'[^\u4e00-\u9fa50-9a-zA-Z]+', line)
+            for s in line:
+                if len(s) > 1:
+                    texts.add(s)
+
+    print('文章数(即文本行数)：{}'.format(len(texts)))
+    return texts
+
+
+def get_ngrams_counts(texts, n, min_count):
+    '''
+    返回ngrams出现的频数
+    :param n: gram个数
+    :param min_count: 最小出现次数，小于该值抛弃
+    :return:
+    '''
+    ngrams = defaultdict(int)
+    for t in list(texts):
+        for i in range(len(t)):
+            for j in range(1, n+1):
+                if i+j <= len(t):
+                    ngrams[t[i:i+j]] += 1
+
+    ngrams = {i:j for i,j in ngrams.items() if j >= min_count}
+    total = 1.*sum([j for i,j in ngrams.items() if len(i) == 1])
+    print('字数：{}'.format(total))
+
+    return ngrams, total
+
+
+
+
+def filter_with_porba(s, min_proba, total, ngrams):
+    '''
+    统计凝固度，并根据阈值抛弃一定数量的词
+    :param s:
+    :param min_proba:
+    :return:
+    '''
+    if len(s) >= 2:
+        score = min([total*ngrams[s]/(ngrams[s[:i+1]]*ngrams[s[i+1:]]) for i in range(len(s)-1)])
+        if score > min_proba[len(s)]:
+            return True
+    else:
+        return False
+
+
+def cut(s, n, ngrams):
+    '''
+    使用ngrams切分文本：采取宁愿不切，也不切错的原则
+    :param s: 一段文本
+    :param ngrams: 筛选过后的gram集合
+    :return:
+    '''
+    # 统计文本每个长度大于2的子串在G中出现的次数
+    r = np.array([0]*(len(s)-1))   # 大于2的片段频数统计
+    for i in range(len(s)-1):
+        for j in range(2, n+1):
+            if s[i:i+j] in ngrams:
+                r[i:i+j-1] += 1
+
+    # 切分方法：只要有一个子串在G中，就不切分。只有当r中的统计次数为0时才切分一次。
+    w = [s[0]]
+    for i in range(1, len(s)):
+        if r[i-1] > 0:
+            w[-1] += s[i]
+        else:
+            w.append(s[i])
+    return w
+
+
+def is_real(s, n, ngrams):
+    if len(s) >= 4:
+        for i in range(4, n+1):
+            for j in range(len(s)-i+1):
+                if s[j:j+i] not in ngrams:
+                    return False
+        return True
+    else:
+        return True
+
+
+
+def cal_entropy(dict_gram,key):
+    '''
+    计算gram的边界熵，分别计算左边界和右边界
+    :param dict_gram:
+    :param key:
+    :return:
+    '''
+    left = dict_gram['left']
+    if len(set(left)) ==1 and left[0] ==' ' :
+        entropy_left = -1  # 如果左边界为空，则将其设置为-1
+    else:
+        list_left = list(Counter(left).values())
+        sum_left = sum(list_left)
+        entropy_left = sum([-(i / sum_left) * math.log(i / sum_left) for i in list_left])
+
+    right = dict_gram['right']
+    if len(set(right)) ==1 and right[0] ==' ' :
+        entropy_right = -1  # 如果右边界为空，则将其设置为-1
+    else:
+        list_right = list(Counter(right).values())
+        sum_right = sum(list_right)
+        entropy_right = sum([ -(i/sum_right)*math.log(i/sum_right) for i in list_right])
+
+    if entropy_left==-1 and entropy_right==-1:
+        entropy =-2   # 如果左右边界熵都为空，将其设置为-2
+    else:
+        entropy = min(entropy_left, entropy_right)
+    return entropy
+
+
+
+class AC_Unicode:
+    """稍微封装一下，弄个支持unicode的AC自动机
+    """
+    def __init__(self):
+        self.ac = ahocorasick.Automaton()
+    def add_word(self, k, v):
+        # k = k.encode('utf-8')
+        return self.ac.add_word(k, v)
+    def make_automaton(self):
+        return self.ac.make_automaton()
+    def iter(self, s):
+        # 搜索文本中存在的单词
+        # s = s.encode('utf-8')
+        return self.ac.iter(s)
+
+
+def get_ngrams_neighbor_ac(texts, w):
+    '''
+       返回ngrams出现的左右相邻的字, 将所有文本拼接成一行，利用AC自动机一次匹配所有词
+       根据匹配结果获取该词的左右字，从而计算边界熵
+    '''
+    neighbors = {}
+
+    text_line  = ''
+    for line in texts:
+        text_line += ' '+ line
+
+    print('构建AC自动机...')
+    ac = AC_Unicode()
+    for gram in w.keys():
+        if len(gram)>1:
+            ac.add_word(gram, gram)
+    ac.make_automaton()
+    result_ac = ac.iter(text_line)
+
+    print('迭代匹配结果...')
+    for item in result_ac:
+        index, key = item
+        if key not in neighbors.keys():
+            neighbors[key] = {'left':[], 'right':[]}
+        else:
+            index_left = index-len(key) + 1
+            if index_left-1 >= 0:
+                neighbors[key]['left'].append(text_line[index_left-1 : index_left])
+
+            index_right = index
+            if index_left-1 <=  len(text_line):
+                neighbors[key]['right'].append(text_line[index_right+1 : index_right+2])
+
+    print('计算边界熵...')
+    ngrams_entropy = defaultdict(int)
+    for key in neighbors.keys():
+        entropy = cal_entropy(neighbors[key], key)
+        ngrams_entropy[key] = entropy
+    return ngrams_entropy
+
+
+
+def remove_general_words_ac(dict_general_words, ws):
+    '''
+    根据常用词词典移除常用词，将常用词典拼成长文本
+    利用AC自动机匹配出现在长文本中词，并将其删除
+    :param dict_general_words:
+    :param ws:
+    :return:
+    '''
+    print('移除常用词...')
+
+    ac = AC_Unicode()
+    for gram in ws.keys():
+        if len(gram)>1:
+            ac.add_word(gram, gram)
+
+    General_dict = pd.read_csv(dict_general_words)
+    General_dict = list(General_dict['0'].values)
+    General_dict_ = ''
+    for key in General_dict:
+        General_dict_ += ' ' + str(key)
+
+    ac.make_automaton()
+    result_ac = ac.iter(General_dict_)
+    for index, key in result_ac:
+        try:
+            del ws[key]
+        except: continue
+    final_w = sorted(ws.items(), key=lambda item: item[1],reverse=True)
+    return final_w
+
+
+
+def get_new_words( file_in, file_dict, file_out, min_count, min_proba):
+    '''
+    获取新词
+    :param file_in: 按行存储的输入文档，每行可以看做一篇文章，utf8编码
+    :param file_dict: 常用词词典，每行一个词
+    :param file_out: 输出文件，每行一个词，和其对应的边界熵，按边界熵从打到小排列，gbk编码
+    :param min_count: ngrams最小出现次数
+    :param min_proba: 不同长度的词对应的最小凝固度阈值字典，这里输入长度为2,3,4的即可
+    :return:
+    '''
+    import time
+    import pandas as pd
+    start = time.time()
+
+    n = 4 # 默认ngrams中的n为4
+    df = pd.read_excel(file_in)['摘要']  # 读取数据
+    df.dropna(inplace=True)
+    texts = []
+    for text in df:
+        if len(str(text)) > 10:
+            print(text)
+            texts.append(''.join(text.split()))
+    ngrams, total = get_ngrams_counts(texts, n, min_count)  # 获取ngrams
+    ngrams_filter = set(i for i, j in ngrams.items() if filter_with_porba(i, min_proba, total, ngrams))  # 计算凝固度，并根据阈值过滤ngrams
+
+    # 根据ngrams分词
+    words = defaultdict(int)
+    for t in texts:
+        for i in cut(t, n, ngrams_filter):
+            words[i] += 1
+    w = {i: j for i, j in words.items() if j >= min_count}  # 根据阈值筛选出出现频率较高的词
+
+    # 注意此时的words和ngrams_filter,也就是凝固度集合，鄙视完全重合的。因为会分出来ngrams中没有的词。
+    # w = {i: j for i, j in words.items() if is_real(i, n, ngrams_filter)}
+    print('凝固度筛选词的长度：{}'.format(len(w)))
+
+    ws = get_ngrams_neighbor_ac(texts, w)  # 按边界熵大小排序
+    final_w = remove_general_words_ac(file_dict, ws)
+    punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~“”？，！【】（）、。：；’‘……￥·↓/"""
+    count_num = 0
+    with open(file_out, 'w', encoding='utf-8') as writer:
+        for value in final_w:
+            word = value[0]
+            sign = 0
+            for i in word:
+                if i in punctuation:
+                    sign = 1
+                    break
+
+            print(sign)
+            if (len(word) >= 2) and (sign==0):
+                writer.write('{},{}\n'.format(word, value[1]))
+                count_num += 1
+
+    end = time.time()
+    print('新词个数：{}'.format(count_num))
+    print('花费时间：{}分钟'.format(round((end - start) / 60, 2)))
+
+
+
+
+if __name__ == '__main__':
+
+    min_count = 1
+    min_proba = {2: 500, 3: 1000, 4: 500}
+    file_in = r'D:\临时工作\临时工作代码\企业资讯八方面-附关键词\风险管理.xlsx'  # utf8
+    file_dict = './dict_sogou_vec.txt'  # utf8
+    file_out = './find_words_.csv'  # gbk
+    # import pdfplumber
+    #
+    # file_path = r'C:\xxxx\practice.PDF'
+    #
+    # with pdfplumber.open(file_path) as pdf:
+    #     page = pdf.pages[11]
+    #     print(page.extract_text())
+    get_new_words(file_in, file_dict, file_out, min_count, min_proba)
+
--- a/basic_service/views/views.py
+++ b/basic_service/views/views.py
+from tkinter import _flatten
+from django.http import JsonResponse
+from django.views.decorators.http import require_POST
+from basic_service.views import basic, co_occurrence
+from model.base.views.token_authorize import *
+import shutil
+
+UPLOAD_FOLDER = '/home/zzsn/ctt/platform_zzsn/media/'
+
+# Create your views here.
+@require_POST
+@login_required
+def doc_similarity_single(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text_1 = request.POST['text_1']
+    text_2 = request.POST['text_2']
+    sim_algorithm_name = request.POST['sim_algorithm_name']
+    print(text_1)
+    print(text_2)
+    url = 'http://localhost:7005/doc_sim/calculate_similarity'
+    result = basic.post_similarity(url, text_1, text_2, sim_algorithm_name)
+    result['token'] = token
+    return JsonResponse(result)
+
+@require_POST
+@login_required
+def ner_single(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    words, entity = basic.ner(text)
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': {'words': words, 'pos': entity},
+    })
+
+@require_POST
+@login_required
+def associated_word_single(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    word_num = request.POST['word_num']
+    try:
+        related_words = basic.related_word_recommendation(text, word_num)
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': str(e),
+            'resultData': None,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '处理成功',
+            'resultData': related_words,
+        })
+
+@require_POST
+@login_required
+def word_cut(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    words = basic.word_cut(text)
+    words = list(_flatten(words))
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': words,
+    })
+
+@require_POST
+@login_required
+def word_pos(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    words, pos = basic.word_pos(text)
+    words = list(_flatten(words))
+    pos = list(_flatten(pos))
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': {'words': words, 'pos': pos},
+    })
+
+@require_POST
+@login_required
+def new_word_find(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    words, sign = basic.new_words_find(text)
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': {'words': words, 'sign': sign},
+    })
+
+@require_POST
+@login_required
+def show_srl(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    words, srl = basic.show_srl(text)
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': {'words': words, 'srl': srl},
+    })
+
+@require_POST
+@login_required
+def show_dep(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    words, dep = basic.show_dep(text)
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': {'words': words, 'dep': dep},
+    })
+
+@require_POST
+@login_required
+def create_keywords(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    topK = int(request.POST['topK'])
+    with_weight = bool(request.POST['with_weight'])
+    key_words = basic.create_keywords(text=text, topK=topK, with_weight=with_weight)
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': {'keywords': key_words},
+    })
+
+@require_POST
+@login_required
+def get_summary(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    summary_length = request.POST['summary_length']
+    summaries = basic.summary(text, summary_length)
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': {'summaries': summaries},
+    })
+
+@require_POST
+@login_required
+def word_co_occurrence(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    path_timestamp = request.POST['path_timestamp']
+    pending_file = request.POST['pending_file']
+    path = os.path.join(UPLOAD_FOLDER, path_timestamp)
+    filepath = os.path.join(path, pending_file)
+    topK = int(request.POST['topK'])
+    word_matric = co_occurrence.main(filepath, topK)
+    if os.path.exists(path_timestamp):
+        shutil.rmtree(path_timestamp)
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': {'word_matric': word_matric},
+    })
+
--- a/manage.py
+++ b/manage.py
+#!/usr/bin/env python
+"""Django's command-line utility for administrative tasks."""
+import os
+import sys
+
+
+def main():
+    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'platform_zzsn.settings')
+    try:
+        from django.core.management import execute_from_command_line
+    except ImportError as exc:
+        raise ImportError(
+            "Couldn't import Django. Are you sure it's installed and "
+            "available on your PYTHONPATH environment variable? Did you "
+            "forget to activate a virtual environment?"
+        ) from exc
+    execute_from_command_line(sys.argv)
+
+
+if __name__ == '__main__':
+    main()
--- a/model/__init__.py
+++ b/model/__init__.py
--- a/model/base/__init__.py
+++ b/model/base/__init__.py
+from model.base.views.config import BaseConfig
+from model.base.views.data import BaseDataLoader
+from model.base.views.data import BaseDataProcess
+from model.base.views.evaluator import BaseEvaluator
+from model.base.views.loss import BaseLoss
+from model.base.views.model import BaseModel
+from model.base.views.runner import BaseRunner
\ No newline at end of file
--- a/model/base/admin.py
+++ b/model/base/admin.py
+from django.contrib import admin
+
+# Register your models here.
--- a/model/base/apps.py
+++ b/model/base/apps.py
+from django.apps import AppConfig
+
+
+class BaseConfig(AppConfig):
+    name = 'base'
--- a/model/base/migrations/__init__.py
+++ b/model/base/migrations/__init__.py
--- a/model/base/models.py
+++ b/model/base/models.py
+from django.db import models
+from datetime import datetime
+# Create your models here.
+
+
+class User(models.Model):
+    username = models.CharField(max_length=30, unique=True)
+    true_name = models.CharField(max_length=30)
+    sex = models.CharField(max_length=2)
+    mobile_number = models.CharField(max_length=20)
+    mail = models.CharField(max_length=20)
+    id_card = models.CharField(max_length=20)
+    password = models.CharField(max_length=40)
+    account_number = models.CharField(max_length=20)
+
+    def toDict(self):
+        return {'id':self.id,
+                'username':self.username,
+                'true_name':self.true_name,
+                'sex':self.sex,
+                'mobile_number':self.mobile_number,
+                'mail':self.mail,
+                'id_card':self.id_card,
+                'password':self.password,
+                'account_number':self.account_number,
+                # 'update_at':self.update_at.strftime('%Y-%m-%d %H:%M:%S')
+                }
+    class Meta:
+        db_table = 'user'
+
+
+class ServiceManage(models.Model):
+    name = models.CharField(max_length=15)
+    username = models.CharField(max_length=30)
+    filenames = models.CharField(max_length=200)
+    create_date = models.DateTimeField(default=None)
+    end_date = models.DateTimeField(default=None)
+    state = models.CharField(max_length=10)
+    path = models.CharField(max_length=20)
+    def toDict(self):
+        return {'name': self.name,
+                'username': self.username,
+                'filenames': self.filenames,
+                'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
+                'end_date': self.end_date.strftime('%Y-%m-%d %H:%M:%S'),
+                'state': self.state,
+                'path': self.path,
+                }
+    class Meta:
+        db_table = 'service_manage'
+
+
+class SubjectManage(models.Model):
+    sid = models.CharField(max_length=10, unique=True)
+    name = models.CharField(max_length=30)
+    def toDict(self):
+        return {'sid': self.sid,
+                'name': self.name,
+                }
+    class Meta:
+        db_table = 'subject_manage'
+
+
+class ModelManage(models.Model):
+    task_name = models.CharField(max_length=30)
+    function_type = models.CharField(max_length=20)
+    model_type = models.CharField(max_length=20)
+    version_num = models.IntegerField()
+    create_date = models.DateTimeField(default=None)
+
+    def toDict(self):
+        return {'id': self.id,
+                'task_name': self.task_name,
+                'function_type': self.function_type,
+                'model_type': self.model_type,
+                'version_num': self.version_num,
+                'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
+                }
+
+    class Meta:
+        db_table = 'model_manage'
+
+
+class VersionManage(models.Model):
+    model = models.ForeignKey(ModelManage, related_name='version_model', on_delete=models.CASCADE)
+    version = models.CharField(max_length=20)
+    create_date = models.DateTimeField(default=None)
+    end_date = models.DateTimeField(default=None)
+    state = models.CharField(max_length=20)
+    creator = models.CharField(max_length=30)
+    path = models.CharField(max_length=20, unique=True)
+
+    def toDict(self):
+        return {'id': self.id,
+                'version': self.version,
+                'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
+                'end_date': self.end_date.strftime('%Y-%m-%d %H:%M:%S'),
+                'state': self.state,
+                'creator': self.creator,
+                'path': self.path,
+                }
+
+
+    class Meta:
+        db_table = 'version_manage'
\ No newline at end of file
--- a/model/base/tests.py
+++ b/model/base/tests.py
+from django.test import TestCase
+# Create your tests here.
--- a/model/base/urls.py
+++ b/model/base/urls.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/12 18:05
+# @Author  : 程婷婷
+# @FileName: urls.py
+# @Software: PyCharm
+from model.base.views import views as base_views
+from django.conf.urls import url
+
+urlpatterns = [
+    url(r'^register-account', base_views.register_account, name='register_account'),
+    url(r'^verify-username', base_views.verify_username, name='verify_username'),
+    url(r'^login', base_views.login, name='login'),
+    url(r'^reset-password', base_views.reset_password, name='reset_password'),
+    url(r'^show-config-file', base_views.show_config_file, name='show_config_file'),
+    url(r'^show-service-file', base_views.show_service_file, name='show_service_file'),
+    url(r'^delete-file-row-manage', base_views.delete_file_row_manage, name='delete_file_row_manage'),
+    url(r'^delete-file-row-service', base_views.delete_file_row_service, name='delete_file_row_service'),
+    url(r'^file-upload', base_views.file_upload, name='file_upload'),
+    url(r'^show-log-file', base_views.show_log_file, name='show_log_file'),
+    url(r'^validate-code', base_views.validate_code, name='validate_code'),
+    url(r'^download-zip', base_views.download_zip, name='download_zip'),
+    url(r'^download-xlsx', base_views.download_xlsx, name='download_xlsx'),
+    url(r'^query-manage', base_views.query_manage, name='query_manage'),
+    url(r'^forget-password', base_views.forget_password, name='forget_password'),
+    url(r'^train', base_views.run_train, name='train'),
+    url(r'^query-service-manage', base_views.query_service_manage, name='query_service_manage'),
+    url(r'^query-subject', base_views.query_subject, name='query_subject'),
+    url(r'^query-version', base_views.query_version, name='query_version'),
+    url(r'^query-task-name', base_views.query_task_name, name='query_task_name')
+]
--- a/model/base/views/__init__.py
+++ b/model/base/views/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/13 11:51
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/base/views/config/BaseConfig.py
+++ b/model/base/views/config/BaseConfig.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/10 14:34
+# @Author  : 程婷婷
+# @FileName: BaseConfig.py
+# @Software: PyCharm
+import yaml
+
+
+class BaseConfig:
+
+    def __init__(self, config_path):
+        self._config_path = config_path
+        self._parsed_file = self.load_config()
+
+    def load_config(self):
+        print(self._config_path)
+        with open(self._config_path) as yaml_file:
+            parsed_file = yaml.load(yaml_file, Loader=yaml.FullLoader)
+        return parsed_file
+
+# if __name__ == '__main__':
+#     bc = BaseConfig()
+#     print(bc._parsed_file)
+#     print(bc.load_config()['data_path'])
+#     print(bc.load_config()['embedding'])
+#     print(bc.load_config()['model'])
+
--- a/model/base/views/config/__init__.py
+++ b/model/base/views/config/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/base/views/data/BaseDataLoader.py
+++ b/model/base/views/data/BaseDataLoader.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 9:58
+# @Author  : 程婷婷
+# @FileName: BaseDataLoader.py
+# @Software: PyCharm
+import pandas as pd
+from model.base.views.config.BaseConfig import BaseConfig
+class BaseDataLoader:
+    def __init__(self, config_path):
+        self.data_loader_config = BaseConfig(config_path)._parsed_file['data_loader']
+
+    def read_file(self):
+        symbol = self.data_loader_config['dataset_path'].split('.')[-1]
+        if (symbol == 'xlsx') or (symbol == 'xls'):
+            df = pd.read_excel(r''+self.data_loader_config['dataset_path'])
+        elif symbol == '.csv':
+            df = pd.read_csv(r''+self.data_loader_config['dataset_path'], sep='\t')
+        else:
+            print('数据类型错误')
+            return '数据类型错误'
+        df.drop_duplicates(subset='content', keep='first', inplace=True)
+        df.dropna(subset=['content', 'title'], inplace=True)
+        df = df.reset_index(drop=True)
+        print('=================执行正文去重和去空之后共有%d条数据=============' % len(df['content']))
+        return df
+
+    def read_stopwords(self):
+        # 读取停顿词列表
+        stopword_list = [k.strip() for k in open(self.data_loader_config['stopwords_path'], encoding='utf8').readlines() if
+                         k.strip() != '']
+        return stopword_list
--- a/model/base/views/data/BaseDataProcess.py
+++ b/model/base/views/data/BaseDataProcess.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/10 15:28
+# @Author  : 程婷婷
+# @FileName: BaseDataProcess.py
+# @Software: PyCharm
+import re
+import jieba
+import pickle
+import gensim
+import logging
+import numpy as np
+from pyhanlp import *
+from bs4 import BeautifulSoup
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.feature_extraction.text import TfidfTransformer
+from sklearn.model_selection import train_test_split
+from sklearn.feature_selection import mutual_info_classif, SelectPercentile
+from model.base import BaseConfig
+from model.base import BaseDataLoader
+from platform_zzsn.settings import BASE_DIR
+
+format = '%(asctime)s %(levelname)s %(pathname)s %(funcName)s %(message)s'
+logging.basicConfig(format=format, level=logging.INFO)
+
+
+class BaseDataProcess:
+    def __init__(self, config_path):
+        self.embedding_config = BaseConfig.BaseConfig(config_path)._parsed_file['embedding']
+        self.process_config = BaseConfig.BaseConfig(config_path)._parsed_file['data_process']
+        PerceptronLexicalAnalyzer = JClass('com.hankcs.hanlp.model.perceptron.PerceptronLexicalAnalyzer')
+        self.pla_segment = PerceptronLexicalAnalyzer()
+        self.bdl = BaseDataLoader.BaseDataLoader(config_path)
+
+    def clean_content(self, content):
+        bs = BeautifulSoup(content, 'html.parser')
+        return bs.text
+
+    def remove_char(self, content):
+        # 保留中文、英语字母、数字和标点
+        graph_filter = re.compile(r'[^\u4e00-\u9fa5a-zA-Z0-9\s，。\.,？\?!！；;]')
+        content = graph_filter.sub('', content)
+        return content
+
+    def jieba_tokenizer(self, content):
+        if self.process_config['use_stopwords']:
+            stopwords = self.bdl.read_stopwords()
+        else:
+            stopwords = []
+        return ' '.join([word for word in jieba.lcut(content) if word not in stopwords])
+
+    def pla_tokenizer(self, content):
+        words = list(self.pla_segment.analyze(content).toWordArray())
+        if self.process_config['use_stopwords']:
+            stopwords = self.bdl.read_stopwords()
+        else:
+            stopwords = []
+        return ' '.join([word for word in words if word not in stopwords])
+
+    def save(self, voc, path):
+        with open(path, 'wb') as voc_file:
+            pickle.dump(voc, voc_file)
+
+    def process(self, data, min_content=0):
+        processed_data = []
+        for record in data:
+            record = self.clean_content(str(record))
+            record = self.remove_char(record)
+            if len(record) > min_content:
+                methods = self.process_config['tokenizer']
+                if methods == 'PerceptronLexicalAnalyzer':
+                    record = self.pla_tokenizer(record)
+                    record = [row.strip() for row in record if row.strip() != '']
+                else:
+                    record = self.jieba_tokenizer(record)
+                    record = [row.strip() for row in record if row.strip() != '']
+                processed_data.append(' '.join(record))
+            else:
+                pass
+        return processed_data
+
+    def split_dataset(self, data, use_dev):
+        if use_dev:
+            train_data_set, test_dev_set = train_test_split(data,
+                                                            train_size=self.process_config['train_size'],
+                                                            random_state=self.process_config['random_state'],
+                                                            shuffle=True)
+
+            train_data_set, test_data_set, dev_data_set = train_test_split(test_dev_set,
+                                                                           test_size=self.process_config['test_size'],
+                                                                           random_state=self.process_config['random_state'],
+                                                                           shuffle=True)
+            print(len(train_data_set) + len(test_data_set) + len(dev_data_set))
+            return train_data_set, test_data_set, dev_data_set
+        else:
+            train_data_set, test_data_set = train_test_split(data,
+                                                             train_size=self.process_config['train_size'],
+                                                             random_state=self.process_config['random_state'],
+                                                             shuffle=True)
+            return train_data_set, test_data_set
+
+    def bag_of_words(self, data, label):
+        vectorizer = CountVectorizer(ngram_range=(1, 1), min_df=5)
+        x = vectorizer.fit_transform(data)
+        transformer = TfidfTransformer(norm=self.embedding_config['norm'], use_idf=self.embedding_config['use_idf'],
+                                       smooth_idf=self.embedding_config['smooth_idf'])
+        x = transformer.fit_transform(x).toarray()
+        if self.embedding_config['with_feature_selection']:
+            transformed_data = SelectPercentile(mutual_info_classif, 20).fit_transform(x, label)
+        else:
+            transformed_data = x
+        os.makedirs(self.embedding_config['embedding_path'], exist_ok=True)
+        self.save(voc=vectorizer.vocabulary_, path=os.path.join(self.embedding_config['embedding_path'], 'tfidf.pkl'))
+        return transformed_data, vectorizer.get_feature_names()
+
+    def word2vec(self, data, feature_words):
+        model = gensim.models.word2vec.Word2Vec(sentences=data,
+                                                size=self.embedding_config['size'],
+                                                window=self.embedding_config['window'],
+                                                min_count=self.embedding_config['min_count'],
+                                                workers=self.embedding_config['workers'],
+                                                sg=self.embedding_config['sg'],
+                                                iter=self.embedding_config['iter'])
+        vocabulary_w2v = model.wv.vocab.keys()
+        count = 0
+        if self.embedding_config['use_Tencent']:
+            model_tencent = gensim.models.KeyedVectors.load_word2vec_format(
+                os.path.join(BASE_DIR, 'static/base/Tencent_AILab_ChineseEmbedding.bin'), binary=True)
+            vocabulary_tencent = model_tencent.wv.vocab.keys()
+            vector_matrix = np.zeros((len(feature_words), int(self.embedding_config['size']) + 200))
+            for word in feature_words:
+                if word in vocabulary_tencent:
+                    vector_tencent = model_tencent.wv.word_vec(word)
+                else:
+                    vector_tencent = np.random.randn(200)
+                if word in vocabulary_w2v:
+                    vector_w2v = model.wv.word_vec(word)
+                else:
+                    vector_w2v = np.random.randn(self.embedding_config['size'])
+                vector = np.concatenate((vector_tencent, vector_w2v))
+                vector_matrix[count] = vector
+                count += 1
+        else:
+            vector_matrix = np.zeros((len(feature_words), self.embedding_config['size']))
+            for word in feature_words:
+                if word in vocabulary_w2v:
+                    vector_w2v = model.wv.word_vec(word)
+                else:
+                    vector_w2v = np.random.randn(self.embedding_config['size'])
+                vector_matrix[count] = vector_w2v
+                count += 1
+        os.makedirs(self.embedding_config['embedding_path'], exist_ok=True)
+        model.save(os.path.join(self.embedding_config['embedding_path'], 'word2vec.model'))
+        return vector_matrix
--- a/model/base/views/data/__init__.py
+++ b/model/base/views/data/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/base/views/evaluator/BaseEvaluator.py
+++ b/model/base/views/evaluator/BaseEvaluator.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 16:30
+# @Author  : 程婷婷
+# @FileName: BaseEvaluator.py
+# @Software: PyCharm
+from sklearn.metrics import precision_score, f1_score, recall_score, classification_report
+import logging
+from model.base.views.config.BaseConfig import BaseConfig
+formats = '%(asctime)s %(levelname)s %(pathname)s %(funcName)s %(message)s'
+logging.basicConfig(format=formats, level=logging.INFO)
+
+
+class BaseEvaluator:
+    def __init__(self, config_path):
+        self.evaluate_config = BaseConfig(config_path)._parsed_file['evaluate']
+
+    def evaluate(self, y_true, y_pred, label_mapping, logger):
+        result = []
+        y_true = list(map(str, y_true))
+        y_pred = list(map(str, y_pred))
+        logger.info('模型评估结果如下：')
+        if not label_mapping:
+            result.append(classification_report(y_true, y_pred))
+            logger.info(classification_report(y_true, y_pred))
+        else:
+            for value in label_mapping.values():
+                print([k for k,v in label_mapping.items() if v == value])
+                p = precision_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
+                r = recall_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
+                f1 = f1_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
+                print({'value': value,'召回率为': r, '精确率为': p, 'F1': f1})
+                logger.info('标签为%s' % [k for k,v in label_mapping.items() if v == value][0])
+                logger.info('精确率为%.2f' %p)
+                logger.info('召回率为%.2f' %r)
+                logger.info('精确率为%.2f' %f1)
+                result.append(str({'label': value,'recall': r, 'precision': p, 'F1': f1}))
+        return ' '.join(result)
+
+# y_true = [0, 1, 2, 0, 1, 2]
+# y_pred = [0, 2, 1, 0, 0, 1]
+# print(BaseEvaluator())
--- a/model/base/views/evaluator/__init__.py
+++ b/model/base/views/evaluator/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/base/views/interaction.py
+++ b/model/base/views/interaction.py
+import os
+import yaml
+import random
+import smtplib
+from email.mime.text import MIMEText
+from django.core.paginator import Paginator
+from email.mime.multipart import MIMEMultipart
+from PIL import Image,ImageFont,ImageDraw,ImageFilter
+from model.base.models import ModelManage, ServiceManage, VersionManage
+from platform_zzsn.settings import BASE_DIR
+
+
+class Picture:
+    def __init__(self):
+        self.size = (240,60)
+        self.mode='RGB'
+        self.color='white'
+        self.font = ImageFont.truetype(os.path.join(BASE_DIR,
+                                                    'static/common/font/arial.ttf'), 36) #设置字体大小
+
+    def randChar(self):
+        basic='23456789abcdefghijklmnpqrstwxyzABCDEFGHIJKLMNPQRSTWXYZ'
+        return basic[random.randint(0,len(basic)-1)] #随机字符
+
+    def randBdColor(self):
+        return (random.randint(64,255),random.randint(64,255),random.randint(64,255)) #背景
+
+    def randTextColor(self):
+        return (random.randint(32, 127), random.randint(32, 127), random.randint(32, 127)) #随机颜色
+
+    def proPicture(self):
+        new_image=Image.new(self.mode,self.size,self.color) #创建新图像有三个默认参数:尺寸,颜色,模式
+        drawObject=ImageDraw.Draw(new_image) #创建一个可以对image操作的对象
+        line_num = random.randint(4,6) # 干扰线条数
+        for i in range(line_num):
+            #size=(240,60)
+            begin = (random.randint(0, self.size[0]), random.randint(0, self.size[1]))
+            end = (random.randint(0, self.size[0]), random.randint(0, self.size[1]))
+            drawObject.line([begin, end], self.randTextColor())
+
+        for x in range(240):
+            for y in range(60):
+                tmp = random.randint(0,50)
+                if tmp>30: #调整干扰点数量
+                    drawObject.point((x,y),self.randBdColor())
+
+        randchar=''
+        for i in range(5):
+            rand=self.randChar()
+            randchar+=rand
+            drawObject.text([50*i+10,10],rand,self.randTextColor(),font=self.font) #写入字符
+
+        new_image = new_image.filter(ImageFilter.SHARPEN) # 滤镜
+
+        return new_image,randchar
+
+
+def update_config_file(config_path, config_file):
+    data = yaml.load(config_file, Loader=yaml.FullLoader)
+    data['data_loader'] = {}
+    model_path = data['model']['model_path']
+    model_name = data['model']['model_name']
+    if data['model']['model_path']:
+        data['model']['model_path'] = os.path.join(config_path, model_path)
+    else:
+        data['model']['model_path'] = os.path.join(config_path, model_name)
+        print(data['model']['model_path'])
+    embedding_path = data['embedding']['embedding_path']
+    if embedding_path:
+        data['embedding']['embedding_path'] = os.path.join(config_path, data['embedding']['embedding_path'])
+    else:
+        if data['embedding']['name']:
+            data['embedding']['embedding_path'] = os.path.join(config_path, data['embedding']['name'])
+    tokenizer_path = data['embedding']['tokenizer_path']
+    if tokenizer_path:
+        data['embedding']['tokenizer_path'] = os.path.join(config_path, data['embedding']['tokenizer_path'])
+    try:
+        test_file_path = data['data_process']['test_file_path']
+        train_file_path = data['data_process']['train_file_path']
+    except KeyError:
+        pass
+    else:
+        data['data_process']['test_file_path'] = os.path.join(config_path, test_file_path)
+        data['data_process']['train_file_path'] = os.path.join(config_path, train_file_path)
+    for file in os.listdir(config_path):
+        if ('.xls' == file[-4:]) or ('.xlsx' == file[-5:]):
+            xlsx_path = os.path.join(config_path, file)
+            data['data_loader']['dataset_path'] = xlsx_path
+    if 'save_fname' in data['runner'].keys():
+        data['runner']['save_fpath'] = os.path.join(config_path, data['runner']['save_fname'])
+    data['data_loader']['stopwords_path'] = os.path.join(BASE_DIR, 'static/base/baidu_stopwords.txt')
+
+    file_path = os.path.join(config_path, 'config.yaml')
+    with open(file_path, 'w') as yaml_file:
+        yaml.safe_dump(data, yaml_file, default_flow_style=False)
+    return file_path
+
+def select_manage(task_name, function_type, model_type, begin_cdate, end_cdate, page_size, current_page):
+    condition = {'task_name': task_name, 'function_type': function_type, 'model_type': model_type,
+                 'create_date__range': (begin_cdate, end_cdate,)
+                 }
+    del_keys = []
+    for key in condition.keys():
+        if not condition[key]:
+            del_keys.append(key)
+    if not condition['create_date__range'][0]:
+        del_keys.append('create_date__range')
+    for key in del_keys:
+        condition.pop(key)
+    managers = ModelManage.objects.filter(**condition).order_by('-create_date')
+    len_managers = len(managers)
+    page = Paginator(managers, page_size)
+    maxpages = page.num_pages  # 最大页数
+    pIndex = int(current_page)
+    # 判断页数是否越界
+    if pIndex > maxpages:
+        pIndex = maxpages
+    manager_list = page.page(pIndex)  # 当前页数据
+    return list(manager_list), len_managers
+
+def select_version(model_id, begin_cdate, end_cdate, page_size, current_page):
+    condition = {'model_id': model_id,
+                 'create_date__range': (begin_cdate, end_cdate,)
+                 }
+    del_keys = []
+    if not condition['create_date__range'][0]:
+        del_keys.append('create_date__range')
+    for key in del_keys:
+        condition.pop(key)
+    versions = VersionManage.objects.filter(**condition).order_by('-create_date')
+    len_versions = len(versions)
+    page = Paginator(versions, page_size)
+    maxpages = page.num_pages  # 最大页数
+    pIndex = int(current_page)
+    # 判断页数是否越界
+    if pIndex > maxpages:
+        pIndex = maxpages
+    version_list = page.page(pIndex)  # 当前页数据
+    return list(version_list), len_versions
+
+def select_service_manage(name, begin_cdate, end_cdate, state, username, page_size, current_page):
+    condition = {
+        'name': name,
+        'state': state,
+        'create_date__range': (begin_cdate, end_cdate),
+        'username': username,
+                 }
+    del_keys = []
+    for key in condition.keys():
+        if not condition[key]:
+            del_keys.append(key)
+    if not condition['create_date__range'][0]:
+        del_keys.append('create_date__range')
+    for key in del_keys:
+        condition.pop(key)
+    print(condition)
+    service_managers = ServiceManage.objects.filter(**condition).order_by('-create_date')
+    len_service_managers = len(service_managers)
+    page = Paginator(service_managers, page_size)
+    maxpages = page.num_pages
+    pIndex = int(current_page)
+    # 判断页数是否越界
+    if pIndex > maxpages:
+        pIndex = maxpages
+    manager_list = page.page(pIndex)  # 当前页数据
+    return list(manager_list), len_service_managers
+
+def sendMail(user,pwd,sender,receiver,msg_title):
+    mail_host = "smtp.163.com"   #163的SMTP服务器
+    message = MIMEMultipart('alternative')
+    #设置邮件的发送者
+    message["From"] = sender
+    #设置邮件的接收方
+    message["To"] = ",".join(receiver)
+    #4.设置邮件的标题
+    message["Subject"] = msg_title
+    # 添加plain格式的文本
+    # message.attach(MIMEText('您好，\n'
+    #                         '   您当前的密码为%s, 为了保证您的账号安全，请尽快登陆重置您的密码'%msg_content, 'plain', 'utf-8'))
+    # 添加html内容
+    message.attach(MIMEText('<html>'
+                                '<body>'
+                                    '<h1>Hello </h1><br> '
+                                    '<h3>To ensure the security of your account, please log in and reset your password as soon as possible.</h3>'
+                                    '<h2><a href="http://192.168.1.149:8020/reset_password/">点此重置</a></h2>'
+                                    '</body>'
+                            '</html>', 'html', 'utf-8'))
+    #1.启用服务器发送邮件
+    smtpObj = smtplib.SMTP_SSL(mail_host,465)
+    #2.登录邮箱进行验证
+    smtpObj.login(user,pwd)
+    #3.发送邮件
+    #参数：发送方，接收方，邮件信息
+    smtpObj.sendmail(sender,receiver,message.as_string())
+    return True
--- a/model/base/views/loss/BaseLoss.py
+++ b/model/base/views/loss/BaseLoss.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 16:29
+# @Author  : 程婷婷
+# @FileName: BaseLoss.py
+# @Software: PyCharm
--- a/model/base/views/loss/__init__.py
+++ b/model/base/views/loss/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/base/views/model/BaseModel.py
+++ b/model/base/views/model/BaseModel.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 16:18
+# @Author  : 程婷婷
+# @FileName: BaseModel.py
+# @Software: PyCharm
+from model.base.views.config.BaseConfig import BaseConfig
+import os
+import pickle
+
+
+class BaseModel:
+    def __init__(self,config_path):
+        self.model_config = BaseConfig(config_path)._parsed_file['model']
+
+    def building_model(self, *params):
+        pass
+
+    def save(self, model):
+        dir = os.path.dirname(self.model_config['model_path'])
+        if not os.path.exists(dir):
+            os.makedirs(dir)
+        with open(self.model_config['model_path'], 'wb') as model_file:
+            pickle.dump(model, model_file)
+
+    def predict(self, model, X):
+        proba = model.predict_proba(X)
+        y_predict = model.predict(X)
+        return {'proba': proba, 'y_predict': y_predict}
\ No newline at end of file
--- a/model/base/views/model/__init__.py
+++ b/model/base/views/model/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/base/views/runner/BaseRunner.py
+++ b/model/base/views/runner/BaseRunner.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 16:36
+# @Author  : 程婷婷
+# @FileName: BaseRunner.py
+# @Software: PyCharm
+from model.base.views.config.BaseConfig import BaseConfig
+
+
+class BaseRunner:
+    def __init__(self,config_path):
+        self.runner_config = BaseConfig(config_path)._parsed_file['runner']
+
+    def train(self, logger):
+        pass
--- a/model/base/views/runner/__init__.py
+++ b/model/base/views/runner/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/base/views/runner/test.py
+++ b/model/base/views/runner/test.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 9:24
+# @Author  : 程婷婷
+# @FileName: test.py
+# @Software: PyCharm
+import jieba
+import re
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.datasets import load_digits
+from sklearn.feature_selection import SelectPercentile, chi2
+X, y = load_digits(return_X_y=True)
+print(X.shape)
+print(X[:10], y[:100])
+X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)
+print(X_new.shape)
+print(X_new[:10])
+
+
+
--- a/model/base/views/token_authorize.py
+++ b/model/base/views/token_authorize.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/20 16:58
+# @Author  : 程婷婷
+# @FileName: token_authorize.py
+# @Software: PyCharm
+import jwt
+import time
+import functools
+from jwt import exceptions
+from django.http import JsonResponse
+from platform_zzsn.settings import *
+
+global SECRET_KEY
+SECRET_KEY = SECRET_KEY
+# 定义签名密钥，用于校验jwt的有效、合法性
+
+def create_token(user):
+    '''基于jwt创建token的函数'''
+    headers = {
+        "alg": "HS256",
+        "typ": "JWT"
+    }
+    exp = int(time.time() + 3*60*60)
+    payload = {
+        "id": user.id,
+        "name": user.username,
+        "exp": exp
+    }
+    token = jwt.encode(payload=payload, key=SECRET_KEY, algorithm='HS256', headers=headers).decode('utf-8')
+    return token
+
+def login_required(view_func):
+    @functools.wraps(view_func)
+    def validate_token(request, *args, **kwargs):
+        '''校验token的函数，校验通过则返回解码信息'''
+        payload = None
+        msg = None
+        try:
+            token = request.META.get("HTTP_AUTHORIZATION")
+            payload = jwt.decode(token, SECRET_KEY, True, algorithm='HS256')
+            print(payload)
+            return view_func(request, *args, **kwargs)
+            # jwt有效、合法性校验
+        except exceptions.ExpiredSignatureError:
+            return JsonResponse({
+                'handle_msg': 'failure',
+                'is_handle_success': False,
+                'logs': '登录已过期'
+            })
+        except jwt.DecodeError:
+            return JsonResponse({
+                'handle_msg': 'failure',
+                'is_handle_success': False,
+                'logs': '缺少参数token'
+            #     token认证失败
+            })
+        except jwt.InvalidTokenError:
+            return JsonResponse({
+                'handle_msg': 'failure',
+                'is_handle_success': False,
+                'logs': '缺少参数token'
+            #     非法的token
+            })
+    return validate_token
\ No newline at end of file
--- a/model/base/views/utils.py
+++ b/model/base/views/utils.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/9 11:19
+# @Author  : 程婷婷
+# @FileName: utils.py
+# @Software: PyCharm
+import os
+import re
+import jieba
+import zipfile
+import pandas as pd
+from docx import Document
+from platform_zzsn.settings import *
+
+
+def read_txt(path):
+    with open(path, 'r', encoding='utf8') as file:
+        lines = file.readlines()
+    return lines
+
+def read_docx(pending_file, user_file):
+    jieba.load_userdict(user_file)
+    document = Document(pending_file)
+    doc_text_list = []
+    for para in document.paragraphs:
+        para_text = re.sub(r'\s', '', para.text)
+        if para_text:
+            doc_text_list.append(para_text)
+    return doc_text_list
+
+def read_excel(pending_file, user_file):
+    jieba.load_userdict(user_file)
+    doc_text_list = pd.read_excel(pending_file)['content']
+    doc_text_list.dropna(inplace=True)
+    return doc_text_list
+
+def merge_para(paras):
+    new_paras = []
+    for i, para in enumerate(paras):
+        if not new_paras:
+            new_paras.append(para)
+        elif (len(new_paras[-1]) < 500):
+            new_paras[-1] += para
+        else:
+            new_paras.append(para)
+    return new_paras
+
+def filter_stopwords(para):
+    path = os.path.join(BASE_DIR, 'static/base/baidu_stopwords.txt')
+    stopword_list = [k.strip() for k in read_txt(path) if
+                     k.strip() != '']
+    words = [word for word in jieba.lcut(para) if word not in stopword_list]
+    return words
+
+# 获取列表的第二个元素
+def takeSecond(elem):
+    return elem[1]
+
+def takeFirst_len(elem):
+    return len(elem[0])
+
+def make_zip(file_dir: str, zip_path: str) -> None:
+    zip_f = zipfile.ZipFile(zip_path, 'w')
+    pre_len = len(os.path.dirname(file_dir))
+    for parent, dir_names, filenames in os.walk(file_dir):
+        for filename in filenames:
+            path_file = os.path.join(parent, filename)
+            arc_name = path_file[pre_len:].strip(os.path.sep)
+            zip_f.write(path_file, arc_name)
+    zip_f.close()
+
--- a/model/base/views/views.py
+++ b/model/base/views/views.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/14 10:32
+# @Author  : 程婷婷
+# @FileName: views.py
+# @Software: PyCharm
+import base64
+import shutil
+import logging
+import tempfile
+import zipfile
+from io import BytesIO
+from django.db import transaction
+from wsgiref.util import FileWrapper
+from django.core.paginator import Paginator
+from werkzeug.utils import secure_filename
+from django.http import JsonResponse, HttpResponse
+from django.core.files.storage import default_storage
+from django.views.decorators.http import require_POST
+from model.base.views import utils, interaction
+from model.base.views.token_authorize import *
+from model.base.models import User, ModelManage, ServiceManage, SubjectManage, VersionManage
+from model.classify.views.textcnn_classify.TextcnnClassifyRunner import TextcnnClassifyRunner
+from model.classify.views.xgboost_classify.XgboostClassifyRunner import XgboostClassifyRunner
+from model.classify.views.logistic_classify.LogisticClassifyRunner import LogisticClassifyRunner
+from model.classify.views.fasttext_classify.FastTextRunner import FastTextRunner
+# from classify.flair_classify.FlairClassifyRunner import FlairClassifyRunner
+from model.clustering.views.KMeans.KmeansRunner import KmeansRunner
+from platform_zzsn.settings import MEDIA_ROOT
+print('-----------')
+print(MEDIA_ROOT)
+UPLOAD_FOLDER = MEDIA_ROOT
+ALLOWED_EXTENSIONS = set(['yaml', 'xlsx', 'xls', 'doc', 'docx', 'txt'])
+
+# 登录
+@require_POST
+def login(request):
+    username = request.POST['username']
+    password = request.POST['password']
+    try:
+        user = User.objects.filter(username=username)
+        if not user:
+            return JsonResponse({
+                'handleMsg': 'failure',
+                'isHandleSuccess': False,
+                'logs': '用户不存在！',
+                'resultData': False,
+            })
+        elif user[0].password == password:
+            token = create_token(user[0])
+            return JsonResponse({
+                'token': token,
+                'handleMsg': 'success',
+                'isHandleSuccess': True,
+                'logs': '登陆成功！',
+                'resultData': 'zzsn',
+            })
+        else:
+            return JsonResponse({
+                'handleMsg': 'failure',
+                'isHandleSuccess': False,
+                'logs': '账号或密码不正确！',
+                'resultData': False,
+            })
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+                'handleMsg': 'failure',
+                'isHandleSuccess': False,
+                'logs': '登陆失败！',
+                'resultData': False,
+        })
+
+# 注册
+@require_POST
+def register_account(request):
+    try:
+        username = request.POST['username']
+        true_name = request.POST['true_name']
+        sex = request.POST['sex']
+        mobile_number = request.POST['mobile_number']
+        mail = request.POST['mail']
+        id_card = request.POST['id_card']
+        password = request.POST['password']
+        account_number = username + '@zzsn.cn'
+        user = User.objects.create(
+            username=username,
+            true_name=true_name,
+            sex=sex,
+            mobile_number=mobile_number,
+            mail=mail,
+            id_card=id_card,
+            password=password,
+            account_number=account_number,
+        )
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '注册失败！',
+            'resultData': False
+    })
+    else:
+        return JsonResponse({
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '注册成功！',
+            'resultData': True,
+        })
+
+# 核查用户名
+@require_POST
+def verify_username(request):
+    try:
+        username = request.POST['username']
+        print(username)
+        usernames = User.objects.values_list('username', flat=True)
+        if username in usernames:
+            return JsonResponse({
+                'handleMsg': 'failure',
+                'isHandleSuccess': False,
+                'logs': '该用户名已存在！',
+                'resultData': False,
+            })
+        else:
+            return JsonResponse({
+                'handleMsg': 'success',
+                'isHandleSuccess': True,
+                'logs': '此用户名可用！',
+                'resultData': True
+            })
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '用户名对比失败！',
+            'resultData': False
+        })
+
+# 重置密码
+@require_POST
+def reset_password(request):
+    username = request.POST['username']
+    password = request.POST['password']
+    try:
+        user = User.objects.get(username=username)
+        user.password = password
+        user.save()
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '重置密码失败！',
+            'resultData': False
+        })
+    else:
+        return JsonResponse({
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '重置密码成功！',
+            'resultData': True,
+        })
+
+@require_POST
+@login_required
+def show_config_file(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    model_type = request.POST['model_type']
+    try:
+        path = os.path.join(BASE_DIR, r'static/common/config_data/'+ model_type + '.yaml')
+        data = utils.read_txt(path)
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '配置文件加载失败！',
+            'resultData': False,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '配置文件加载成功！',
+            'resultData': ''.join(data),
+        })
+
+@require_POST
+@login_required
+def show_service_file(request):
+    # token = request.META.get("HTTP_AUTHORIZATION")
+    service_type = request.POST['service_type']
+    service_name = request.POST['service_name']
+    example_dir = os.path.join(BASE_DIR, 'static/common/', service_type, service_name)
+    temp = tempfile.TemporaryFile()
+    archive = zipfile.ZipFile(temp, 'w', zipfile.ZIP_DEFLATED)
+    print(example_dir)
+    filenames = os.listdir(example_dir)
+    for filename in filenames:
+        archive.write(os.path.join(example_dir, filename), filename)
+    archive.close()
+    lenth = temp.tell()
+    temp.seek(0)
+    wrapper = FileWrapper(temp)
+    response = HttpResponse(wrapper, content_type='application/zip')
+    response['Content-Disposition'] = 'attachment; filename=example.zip'
+    response['Content-Length'] = lenth
+    return response
+
+
+@require_POST
+@login_required
+@transaction.atomic
+def delete_file_row_manage(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    path_timestamp = request.POST['path_timestamp']
+    try:
+        path = os.path.join(UPLOAD_FOLDER, str(path_timestamp) + '/')
+        if os.path.exists(path):
+            shutil.rmtree(path)
+        version_manage = VersionManage.objects.get(path=path_timestamp)
+        if version_manage:
+            model_id = version_manage.model.id
+            print(model_id)
+            all_version = VersionManage.objects.filter(model_id=model_id)
+            if len(all_version) == 1:
+                version_manage.delete()
+                version_manage.model.delete()
+            elif version_manage.state == '训练成功':
+                    model_manage = version_manage.model
+                    version_manage.delete()
+                    model_manage.version_num = max(0, model_manage.version_num - 1)
+                    model_manage.save()
+            else:
+                version_manage.delete()
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '删除失败！',
+            'resultData': False,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '删除成功！',
+            'resultData': True,
+        })
+
+@require_POST
+@login_required
+def file_upload(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    files = request.FILES.getlist('files')
+    path_timestamp = request.POST['path_timestamp']
+    if not path_timestamp:
+        path_timestamp = int(round(time.time() * 1000000))
+    path = os.path.join(UPLOAD_FOLDER, str(path_timestamp))
+    try:
+        for file in files:
+            print('上传文件名称为%s' % file.name)
+            if file and (file.name.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS):
+                filename = secure_filename(file.name)
+                os.makedirs(path, exist_ok=True)
+                #default_storage.save(os.path.join(path, filename), file)
+                with open(os.path.join(path, filename), 'wb') as f:
+                    for chunk in file.chunks():
+                        f.write(chunk)
+            else:
+                return JsonResponse({
+                    'token': token,
+                    'handleMsg': 'failure',
+                    'isHandleSuccess': False,
+                    'logs': '数据文件格式错误！',
+                    'resultData': False,
+                })
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '文件上传失败！',
+            'resultData': False,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '文件上传成功！',
+            'resultData': str(path_timestamp),
+        })
+
+@require_POST
+@login_required
+def download_zip(request):
+    """
+    最终可用 实现功能：zip打包 下载 删除
+    :param filename:
+    :return:
+    """
+    path_timestamp = request.POST['path_timestamp']
+    token = request.META.get("HTTP_AUTHORIZATION")
+    print(path_timestamp)
+    file_dir = os.path.join(UPLOAD_FOLDER, path_timestamp)
+    try:
+        if not os.path.exists(file_dir):
+            return JsonResponse({
+                'token': token,
+                'handleMsg': 'failure',
+                'isHandleSuccess': False,
+                'logs': '该文件夹不存在！',
+                'resultData': False
+            })
+        else:
+            temp = tempfile.TemporaryFile()
+            archive = zipfile.ZipFile(temp, 'w', zipfile.ZIP_DEFLATED)
+            num = 0
+            sub_dirs = []
+            for cur_dir, dirs, files in os.walk(file_dir):
+                if num == 0:
+                    sub_dirs = dirs
+                    num += 1
+                for file in files:
+                    sub_dir = os.path.split(cur_dir)[-1]
+                    if sub_dir in sub_dirs:
+                        archive.write(os.path.join(cur_dir, file), os.path.join(sub_dir, file))
+                    else:
+                        archive.write(os.path.join(cur_dir, file), file)
+            archive.close()
+            lenth = temp.tell()
+            temp.seek(0)
+            wrapper = FileWrapper(temp)
+            response = HttpResponse(wrapper, content_type='application/zip')
+            response['Content-Disposition'] = 'attachment; filename=archive.zip'
+            response['Content-Length'] = lenth
+            return response
+    except Exception as e:
+            print(e)
+            return JsonResponse({
+                'token': token,
+                'handleMsg': 'failure',
+                'isHandleSuccess': False,
+                'logs': '下载失败！',
+                'resultData': False
+            })
+
+
+@require_POST
+def forget_password(request):
+    try:
+        username = request.POST['username']
+        mobile_number = request.POST['mobile_number']
+        mail = request.POST['mail']
+        user = User.objects.get(username=username)
+        if user.mobile_number == mobile_number:
+            if user.mail == mail:
+                mail_username = "15617380221@163.com"
+                mail_pwd = "2698641198cjh"
+                mail_sender = "15617380221@163.com"
+                mail_receiver = [mail]
+                email_title = "郑州数能AI算法小组"
+                interaction.sendMail(mail_username, mail_pwd, mail_sender, mail_receiver, email_title)
+                return JsonResponse({
+                    'handleMsg': 'success',
+                    'isHandleSuccess': True,
+                    'logs': '个人信息验证成功，密码已发至邮箱！',
+                    'resultData': True
+                })
+            else:
+                return JsonResponse({
+                    'handleMsg': 'failure',
+                    'isHandleSuccess': False,
+                    'logs': '邮箱账号填写错误！',
+                    'resultData': False
+                })
+        else:
+            return JsonResponse({
+                'handleMsg': 'failure',
+                'isHandleSuccess': False,
+                'logs': '手机号填写错误！',
+                'resultData': False
+            })
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '信息对比失败！',
+            'resultData': False
+        })
+
+@require_POST
+@login_required
+def show_log_file(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    path_timestamp = request.POST['path_timestamp']
+    try:
+        path = UPLOAD_FOLDER + path_timestamp
+        files = [filename for filename in os.listdir(path) if 'log' in filename]
+        log_path = os.path.join(path, files[0])
+        data = utils.read_txt(log_path)
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '日志文件加载失败！',
+            'resultData': False,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '日志文件加载成功！',
+            'resultData': ''.join(data),
+        })
+
+
+@require_POST
+def validate_code(request):
+    pic = interaction.Picture()
+    img, code = pic.proPicture()
+    output_buffer = BytesIO()
+    img.save(output_buffer, format='JPEG')
+    byte_data = output_buffer.getvalue()
+    base64_str = base64.b64encode(byte_data)
+    base64_str = 'data:image/jpg;base64,' + str(base64_str, 'utf-8')
+    data = {'img': base64_str, 'code': code}
+    return JsonResponse({
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '生成成功！',
+        'resultData': data,
+    })
+
+@require_POST
+@login_required
+@transaction.atomic
+def run_train(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    task_name = request.POST['task_name']
+    function_type = request.POST['function_type']
+    model_type = request.POST['model_type']
+    path_timestamp = request.POST['path_timestamp']
+    config_file = request.POST['config_file']
+    version_num = request.POST['version_num']
+    model_id = request.POST['model_id']
+    creator = request.POST['creator']
+    create_time = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+    config_path = os.path.join(UPLOAD_FOLDER, path_timestamp)
+    logger = logging.getLogger(path_timestamp)
+    logger.setLevel(logging.INFO)
+    fh = logging.FileHandler(os.path.join(config_path,'train.log'), encoding='utf8')
+    ch = logging.StreamHandler()
+    # formatter = logging.Formatter(
+    #     '[%(asctime)s][%(thread)d][%(filename)s][line: %(lineno)d][%(levelname)s] ## %(message)s')
+    # fh.setFormatter(formatter)
+    # ch.setFormatter(formatter)
+    logger.addHandler(fh)
+    logger.addHandler(ch)
+    if not version_num:
+        version_num = 0
+        new_version = 1
+    else:
+        new_version = 0
+    if not model_id:
+        model_manage = ModelManage.objects.create(
+            task_name=task_name,
+            function_type=function_type,
+            model_type=model_type,
+            version_num=int(version_num),
+            create_date=create_time,
+        )
+        model_id = max(ModelManage.objects.values_list('id', flat=True))
+    else:
+        model_manage = ModelManage.objects.get(id=model_id)
+    if not new_version:
+        versions = VersionManage.objects.filter(model_id=model_id)
+        new_version = max([int(version.version.replace('V', '')) for version in versions])+1
+    version_manage = VersionManage.objects.create(model_id=model_id,
+                                                  version='V'+str(new_version),
+                                                  create_date=create_time,
+                                                  state='正在训练',
+                                                  creator=creator,
+                                                  path=path_timestamp,
+    )
+    try:
+        config_path = interaction.update_config_file(config_path, config_file)
+        print(config_path)
+        train_dict = {
+            # 'fasttext': FastTextRunner(config_path),
+        #               'xgboost': XgboostClassifyRunner(config_path),
+        #               'logistic': LogisticClassifyRunner(config_path),
+                      # 'flair': FlairClassifyRunner(config_path),
+                      # 'textcnn': TextcnnClassifyRunner(config_path),
+                      'kmeans': KmeansRunner(config_path)}
+        train_dict[model_type].train(logger)
+        end_time = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+        version_manage.end_date = end_time
+        version_manage.state = '训练成功'
+        version_manage.save()
+        model_manage.version_num = int(version_num) + 1
+        model_manage.save()
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '训练成功！',
+            'resultData': True,
+        })
+    except Exception as e:
+        print(e)
+        end_time = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+        version_manage.end_date = end_time
+        version_manage.state = '训练失败'
+        version_manage.save()
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': str(e),
+            'resultData': False,
+        })
+    finally:
+        logging.Logger.manager.loggerDict.pop(path_timestamp)
+        logger.manager = None
+        logger.handlers = []
+
+
+@require_POST
+@login_required
+def query_manage(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    try:
+        task_name = request.POST['task_name']
+        function_type = request.POST['function_type']
+        model_type = request.POST['model_type']
+        begin_cdate = request.POST['begin_date']
+        end_cdate = request.POST['end_date']
+        page_size = request.POST['page_size']
+        current_page = request.POST['current_page']
+        manager_list, len_managers = interaction.select_manage(task_name, function_type, model_type, begin_cdate, end_cdate, page_size, current_page)
+        # manager_list = [model_to_dict(manager) for manager in manager_list]
+        manager_list = [ModelManage.toDict(manager) for manager in manager_list]
+        result = {'current_page': int(current_page), 'page_size': int(page_size), 'data': manager_list, 'total': len_managers}
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '查询失败',
+            'resultData': False,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '查询成功',
+            'resultData': result,
+        })
+
+@require_POST
+@login_required
+def query_version(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    try:
+        begin_cdate = request.POST['begin_date']
+        end_cdate = request.POST['end_date']
+        model_id = request.POST['model_id']
+        page_size = request.POST['page_size']
+        current_page = request.POST['current_page']
+        version_list, len_versions = interaction.select_version(model_id, begin_cdate, end_cdate, page_size, current_page)
+        # manager_list = [model_to_dict(manager) for manager in manager_list]
+        manager_list = [VersionManage.toDict(version) for version in version_list]
+        print(manager_list)
+        result = {'current_page': int(current_page), 'page_size': int(page_size), 'data': manager_list, 'total': len_versions}
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '查询失败',
+            'resultData': False,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '查询成功',
+            'resultData': result,
+        })
+
+@require_POST
+@login_required
+def query_service_manage(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    try:
+        name = request.POST['name']
+        begin_cdate = request.POST['begin_date']
+        end_cdate = request.POST['end_date']
+        state = request.POST['state']
+        page_size = request.POST['page_size']
+        current_page = request.POST['current_page']
+        username = request.POST['username']
+        manager_list, len_managers = interaction.select_service_manage(
+            name, begin_cdate, end_cdate, state,
+            username, page_size, current_page)
+        # manager_list = [model_to_dict(manager) for manager in manager_list]
+        manager_list = [ServiceManage.toDict(manager) for manager in manager_list]
+        result = {'current_page': int(current_page), 'page_size': int(page_size), 'data': manager_list, 'total': len_managers}
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': e,
+            'resultData': False,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '查询成功',
+            'resultData': result,
+        })
+
+@require_POST
+@login_required
+@transaction.atomic
+def delete_file_row_service(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    path_timestamp = request.POST['path_timestamp']
+    try:
+        path = os.path.join(UPLOAD_FOLDER, str(path_timestamp))
+        if os.path.exists(path):
+            shutil.rmtree(path)
+        ServiceManage.objects.filter(path=path_timestamp).delete()
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '删除失败！',
+            'resultData': False,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '删除成功！',
+            'resultData': True,
+        })
+
+@require_POST
+@login_required
+def download_xlsx(request):
+    path_timestamp = request.POST['path_timestamp']
+    path = os.path.join(UPLOAD_FOLDER, path_timestamp)
+    xls_path = os.path.join(path, 'result.xlsx')
+    with open(xls_path, 'rb') as file:
+        data = file.readlines()
+    response = HttpResponse(data, content_type='application/vnd.ms-excel')
+    response['Content-Disposition'] = 'attachment; filename=result.xlsx'
+    return response
+
+@require_POST
+@login_required
+def query_subject(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    current_page = request.POST['current_page']
+    page_size = request.POST['page_size']
+    try:
+        subjects = SubjectManage.objects.all()
+        len_subjects = len(subjects)
+        page = Paginator(subjects, page_size)
+        maxpages = page.num_pages  # 最大页数
+        pIndex = int(current_page)
+        # 判断页数是否越界
+        if pIndex > maxpages:
+            pIndex = maxpages
+        subject_list = [SubjectManage.toDict(subject) for subject in list(page.page(pIndex))] # 当前页数据
+
+        result_data = {'current_page': int(current_page),
+                       'page_size': int(page_size),
+                       'data': subject_list,
+                       'total': len_subjects}
+        print(result_data)
+    except Exception as e:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'fail',
+            'isHandleSuccess': False,
+            'logs': str(e),
+            'resultData': None,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '查询成功！',
+            'resultData': result_data,
+        })
+
+@require_POST
+@login_required
+def query_task_name(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    task_name = request.POST['task_name']
+    try:
+        model_manages = ModelManage.objects.filter(task_name__contains=task_name)[:20]
+        task_names = [ModelManage.toDict(i)['task_name'] for i in model_manages]
+    except Exception as e:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'fail',
+            'isHandleSuccess': False,
+            'logs': str(e),
+            'resultData': None,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '查询成功！',
+            'resultData': task_names,
+        })
--- a/model/classify/__init__.py
+++ b/model/classify/__init__.py
+from model.classify.views.fasttext_classify import FastTextConfig
+from model.classify.views.fasttext_classify.data import FastTextDataLoader
+from model.classify.views.fasttext_classify.data import FastTextProcess
+from model.classify.views.fasttext_classify import FastTextModel
+from model.classify.views.fasttext_classify import FastTextEvaluator
+from model.classify.views.fasttext_classify import FastTextRunner
--- a/model/classify/admin.py
+++ b/model/classify/admin.py
+from django.contrib import admin
+
+# Register your models here.
--- a/model/classify/apps.py
+++ b/model/classify/apps.py
+from django.apps import AppConfig
+
+
+class ClassifyConfig(AppConfig):
+    name = 'classify'
--- a/model/classify/migrations/__init__.py
+++ b/model/classify/migrations/__init__.py
--- a/model/classify/models.py
+++ b/model/classify/models.py
+from django.db import models
+
+# Create your models here.
--- a/model/classify/tests.py
+++ b/model/classify/tests.py
+from django.test import TestCase
+# Create your tests here.
--- a/model/classify/urls.py
+++ b/model/classify/urls.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/12 18:05
+# @Author  : 程婷婷
+# @FileName: urls.py
+# @Software: PyCharm
+from django.urls import path
+from basic_service import views
+
--- a/model/classify/views/__init__.py
+++ b/model/classify/views/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/13 11:24
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/classify/views/fasttext_classify/FastTextConfig.py
+++ b/model/classify/views/fasttext_classify/FastTextConfig.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:06
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyConfig.py
+# @Software: PyCharm
+from model.base import BaseConfig
+
+class FastTextConfig(BaseConfig.BaseConfig):
+    def __init__(self, config_path):
+        super().__init__(config_path)
--- a/model/classify/views/fasttext_classify/FastTextEvaluator.py
+++ b/model/classify/views/fasttext_classify/FastTextEvaluator.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/14 14:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyEvaluator.py
+# @Software: PyCharm
+from model.base import BaseEvaluator
+
+class FastTextEvaluator(BaseEvaluator.BaseEvaluator):
+    def __init__(self, config_path):
+        super().__init__(config_path)
\ No newline at end of file
--- a/model/classify/views/fasttext_classify/FastTextModel.py
+++ b/model/classify/views/fasttext_classify/FastTextModel.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:18
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyModel.py
+# @Software: PyCharm
+import fasttext
+from model.base import BaseModel
+
+
+
+class FastTextModel(BaseModel.BaseModel):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+
+    def building_model(self, input, autotuneValidationFile):
+        model = fasttext.train_supervised(input=input,
+                                          autotuneValidationFile=autotuneValidationFile,
+                                          autotuneDuration=self.model_config['autotuneDuration'],
+                                          autotuneModelSize=self.model_config['autotuneModelSize'])
+        model.save_model(self.model_config['model_path'])
+        return model
--- a/model/classify/views/fasttext_classify/FastTextRunner.py
+++ b/model/classify/views/fasttext_classify/FastTextRunner.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:33
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyRunner.py
+# @Software: PyCharm
+from model.base import BaseRunner
+from model.classify import FastTextProcess
+from model.classify import FastTextModel
+from model.classify import FastTextEvaluator
+
+
+class FastTextRunner(BaseRunner.BaseRunner):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+        self.ftp = FastTextProcess.FastTextProcess(config_path)
+        self.ftm = FastTextModel.FastTextModel(config_path)
+        self.fte = FastTextEvaluator.FastTextEvaluator(config_path)
+
+    def train(self, logger):
+        train_path, test_path = self.ftp.runner_process(logger)
+        model = self.ftm.building_model(input=train_path, autotuneValidationFile=test_path)
+        with open(test_path, encoding='utf8') as file:
+            test_data = file.readlines()
+        true_labels, predict_labels = [], []
+        for text in test_data:
+            label = text.replace('__label__', '')[0]
+            text = text.replace('__label__', '')[1:-1]
+            true_labels.append(int(label))
+            predict_label = model.predict(text)[0][0].replace('__label__', '')
+            # print(pre_label)
+            predict_labels.append(int(predict_label))
+        evaluate_result = self.fte.evaluate(true_labels, predict_labels, label_mapping=None, logger=logger)
+        print(evaluate_result)
+        return 'success'
+# if __name__ == '__main__':
+#     state = FastTextRunner().train()
+#     print(state)
\ No newline at end of file
--- a/model/classify/views/fasttext_classify/__init__.py
+++ b/model/classify/views/fasttext_classify/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/14 10:28
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
+
--- a/model/classify/views/fasttext_classify/data/FastTextDataLoader.py
+++ b/model/classify/views/fasttext_classify/data/FastTextDataLoader.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 11:19
+# @Author  : 程婷婷
+# @FileName: FastTextDataLoader.py
+# @Software: PyCharm
+from model.base import BaseDataLoader
+
+class FastTextDataLoader(BaseDataLoader.BaseDataLoader):
+    def __init__(self, config_path):
+        super().__init__(config_path)
\ No newline at end of file
--- a/model/classify/views/fasttext_classify/data/FastTextProcess.py
+++ b/model/classify/views/fasttext_classify/data/FastTextProcess.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyProcess.py
+# @Software: PyCharm
+import re
+import time
+from model.base import BaseDataProcess
+from model.classify import FastTextDataLoader
+
+class FastTextProcess(BaseDataProcess.BaseDataProcess):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+        self.ftdl = FastTextDataLoader.FastTextDataLoader(config_path)
+
+    def remove_char(self, content):
+        graph_filter = re.compile(u'[\U00010000-\U0010ffff\uD800-\uDBFF\uDC00-\uDFFFa-z\n\s]')
+        content = graph_filter.sub('', content)
+        return content
+
+    def process(self, data, min_content):
+        processed_data = []
+        i = 0
+        for record in data:
+            record = self.remove_char(record)
+            if len(record) > min_content:
+                methods = self.process_config['tokenizer']
+                if methods == 'PerceptronLexicalAnalyzer':
+                    record = self.pla_tokenizer(record)
+                else:
+                    record = self.jieba_tokenizer(record)
+                processed_data.append(record)
+                i += 1
+            else:
+                i += 1
+                pass
+            if (i+1)%100 == 0 or i+1 == len(data):
+                print(time.strftime('%Y-%m-%d %H:%M:%S'),'第',i+1,'条文本分词完毕')
+        return processed_data
+
+    def transform_data(self, data, labels):
+        format_data = []
+        for i in range(len(data)):
+            fasttext_line = "__label__{} {}".format(labels[i], data[i])
+            format_data.append(fasttext_line)
+        return format_data
+
+    def runner_process(self, logger):
+        df = self.ftdl.read_file()
+        processed_data = self.process(df['content'], min_content=10)
+        # if self.process_config['label_encode']:
+        if type(df['label'][0]) == int:
+            labels = df['label']
+        else:
+            all_label = list(set(df['label']))
+            self.label_mapping = {v: k for k, v in dict(enumerate(all_label)).items()}
+            labels = df['label'].map(self.label_mapping)
+        print(labels)
+        fomat_data = self.transform_data(processed_data, labels)
+        if self.process_config['use_dev']:
+            train_data_set, test_data_set, dev_data_set = self.split_dataset(fomat_data, use_dev=self.process_config['use_dev'])
+        else:
+            train_data_set, test_data_set = self.split_dataset(fomat_data, use_dev=self.process_config['use_dev'])
+        with open(self.process_config['train_file_path'], 'w', encoding='utf-8') as trainf, \
+                open(self.process_config['test_file_path'], 'w', encoding='utf-8') as testf:
+            for train_row in train_data_set:
+                trainf.write(train_row + '\n')
+            for test_row in test_data_set:
+                testf.write(test_row + '\n')
+        logger.info('处理后的数据量为 %d 条' % len(fomat_data))
+        logger.info('训练集的数据量为 %d 条' % len(train_data_set))
+        logger.info('测试集的数据量为 %d 条' % len(test_data_set))
+        return self.process_config['train_file_path'], self.process_config['test_file_path']
\ No newline at end of file
--- a/model/classify/views/fasttext_classify/data/__init__.py
+++ b/model/classify/views/fasttext_classify/data/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 11:18
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/classify/views/flair_classify/FlairClassifyConfig.py
+++ b/model/classify/views/flair_classify/FlairClassifyConfig.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:06
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyConfig.py
+# @Software: PyCharm
+from model.base.views.config import BaseConfig
+
+class FastTextConfig(BaseConfig):
+    def __init__(self):
+        super().__init__()
--- a/model/classify/views/flair_classify/FlairClassifyEvaluator.py
+++ b/model/classify/views/flair_classify/FlairClassifyEvaluator.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/14 14:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyEvaluator.py
+# @Software: PyCharm
+from model.base.views.evaluator import BaseEvaluator
+
+
+class FlairClassifyEvaluator(BaseEvaluator):
+    def __init__(self, config_path):
+        super().__init__(config_path)
\ No newline at end of file
--- a/model/classify/views/flair_classify/FlairClassifyModel.py
+++ b/model/classify/views/flair_classify/FlairClassifyModel.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:18
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyModel.py
+# @Software: PyCharm
+from torch.optim import Adam
+from torch.optim.lr_scheduler import OneCycleLR
+from flair.models import TextClassifier
+from flair.trainers import ModelTrainer
+from model.base.views.model.BaseModel import BaseModel
+
+
+class FlairClassifyModel(BaseModel):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+
+    def building_model(self, corpus, document_embeddings, label_dict, loss_weights):
+        # downstream classifier
+        classifier = TextClassifier(
+            document_embeddings,
+            label_dictionary=label_dict,
+            loss_weights=loss_weights
+        )
+
+        # model trainer
+        trainer = ModelTrainer(classifier, corpus, optimizer=Adam)
+        model_save_path = self.model_config['model_path']
+        trainer.train(str(model_save_path),
+                      learning_rate=3e-5,  # use very small learning rate
+                      mini_batch_size=16,
+                      scheduler=OneCycleLR,
+                      mini_batch_chunk_size=2,  # optionally set this if transformer is too much for your machine
+                      max_epochs=3,  # terminate after X epochs
+                      monitor_train=True,
+                      monitor_test=True,
+                      checkpoint=True
+                      )
+        return classifier, trainer
--- a/model/classify/views/flair_classify/FlairClassifyRunner.py
+++ b/model/classify/views/flair_classify/FlairClassifyRunner.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:33
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyRunner.py
+# @Software: PyCharm
+import os
+import numpy as np
+import torch
+import random
+from model.base.views.runner import BaseRunner
+from model.classify.views.flair_classify import FlairClassifyProcess
+from model.classify.views.flair_classify import FlairClassifyModel
+from model.classify.views.flair_classify import FlairClassifyEvaluator
+
+class FlairClassifyRunner(BaseRunner):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+        self.fcp = FlairClassifyProcess(config_path)
+        self.fcm = FlairClassifyModel(config_path)
+        self.fce = FlairClassifyEvaluator(config_path)
+
+    def reproducibility(seed):
+        '''
+        固定随机种子
+        :param seed:
+        :return:
+        '''
+        os.environ["PYTHONHASHSEED"] = str(seed)
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+
+    def train(self):
+        corpus, document_embeddings, label_dict, loss_weights = self.fcp.runner_process()
+        model = self.fcm.building_model(
+            corpus=corpus,
+            document_embeddings=document_embeddings,
+            label_dict=label_dict,
+            loss_weights=loss_weights
+        )
+        #self.fce.evaluate(true_labels, predict_labels)
+        return 'success'
+if __name__ == '__main__':
+    state = FlairClassifyRunner().train()
+    print(state)
\ No newline at end of file
--- a/model/classify/views/flair_classify/__init__.py
+++ b/model/classify/views/flair_classify/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/14 10:28
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/classify/views/flair_classify/data/FlairClassifyDataLoader.py
+++ b/model/classify/views/flair_classify/data/FlairClassifyDataLoader.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 11:46
+# @Author  : 程婷婷
+# @FileName: FlairClassifyDataLoader.py
+# @Software: PyCharm
+from model.base import BaseDataLoader
+
+
+class FlairClassifyDataLoader(BaseDataLoader):
+    def __init__(self, config_path):
+        super().__init__(config_path)
\ No newline at end of file
--- a/model/classify/views/flair_classify/data/FlairClassifyProcess.py
+++ b/model/classify/views/flair_classify/data/FlairClassifyProcess.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyProcess.py
+# @Software: PyCharm
+from flair.data import Sentence, Corpus
+import re
+from transformers import AutoTokenizer
+from torch.utils.data import Dataset
+from flair.embeddings import TransformerDocumentEmbeddings
+from model.base import BaseDataProcess
+from model.classify import FlairClassifyDataLoader
+
+
+class DataSet(Dataset):
+    def __init__(
+            self, data_df, tokenizer,
+    ):
+        df = data_df.copy()
+        sep_token = tokenizer.special_tokens_map['sep_token']
+        self.samples = df.content.apply(lambda s: re.sub("<sep>", sep_token, s)).values
+        self.labels = df.label.values
+        self.tokenizer = tokenizer
+
+    def __len__(self):
+        return len(self.samples)
+
+    def __getitem__(self, index):
+        sample, label = self.samples[index], self.labels[index]
+        sentence = Sentence(sample, use_tokenizer=self.tokenizer.tokenize)
+        if not len(sentence):
+            sentence = Sentence(self.tokenizer.unk_token, use_tokenizer=self.tokenizer.tokenize)
+            print(sample)
+            print(sentence)
+        sentence.add_label('class', str(label))
+        return sentence
+
+
+class FlairClassifyProcess(BaseDataProcess):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+        self.fcdl = FlairClassifyDataLoader(config_path)
+
+    @staticmethod
+    def add_sep_token(content):
+        return re.sub('。', '。<sep>', content)
+
+    def runner_process(self):
+        df = self.fcdl.read_file()
+        df = df[df.content.apply(lambda s: s.strip()).apply(len) > 10]
+        df = df.reset_index(drop=True)
+        df['content'] = df['content'].apply(lambda s: self.add_sep_token(str(s)))
+        pos = df.label.value_counts()
+        loss_weights = (pos.sum() - pos) / pos
+        self.loss_weights = loss_weights.to_dict()
+        if self.process_config['label_encode']:
+            all_label = list(set(df['label']))
+            self.label_mapping = {v: k for k, v in dict(enumerate(all_label)).items()}
+            labels = df['label'].map(self.label_mapping)
+            print(labels)
+        tokenizer = AutoTokenizer.from_pretrained(self.embedding_config['pretrained_name'])
+        if self.process_config['use_dev']:
+            train_data_set, test_data_set, dev_data_set = self.split_dataset(df, use_dev=self.process_config['use_dev'])
+            train_set = DataSet(train_data_set, tokenizer)
+            test_set = DataSet(test_data_set, tokenizer)
+            val_set = DataSet(dev_data_set, tokenizer)
+            corpus = Corpus(train=train_set, dev=val_set, test=test_set)
+        else:
+            train_data_set, test_data_set = self.split_dataset(df, use_dev=self.process_config['use_dev'])
+            train_set = DataSet(train_data_set, tokenizer)
+            test_set = DataSet(test_data_set, tokenizer)
+            corpus = Corpus(train=train_set, test=test_set)
+        label_dict = corpus.make_label_dictionary()
+        document_embeddings = TransformerDocumentEmbeddings(
+            self.embedding_config['pretrained_name'], fine_tune=True
+        )
+        return corpus, document_embeddings, label_dict, loss_weights
--- a/model/classify/views/flair_classify/data/__init__.py
+++ b/model/classify/views/flair_classify/data/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 11:43
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/classify/views/logistic_classify/LogisticClassifyConfig.py
+++ b/model/classify/views/logistic_classify/LogisticClassifyConfig.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:06
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyConfig.py
+# @Software: PyCharm
+from model.base.views.config import BaseConfig
+
+class LogisticClassifyConfig(BaseConfig):
+    def __init__(self):
+        super().__init__()
--- a/model/classify/views/logistic_classify/LogisticClassifyEvaluator.py
+++ b/model/classify/views/logistic_classify/LogisticClassifyEvaluator.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/14 14:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyEvaluator.py
+# @Software: PyCharm
+from model.base.views.evaluator.BaseEvaluator import BaseEvaluator
+
+
+class LogisticClassifyEvaluator(BaseEvaluator):
+    def __init__(self, config_path):
+        super().__init__(config_path)
--- a/model/classify/views/logistic_classify/LogisticClassifyModel.py
+++ b/model/classify/views/logistic_classify/LogisticClassifyModel.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:18
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyModel.py
+# @Software: PyCharm
+import os
+from sklearn import linear_model
+import joblib
+import heapq
+import numpy as np
+from sklearn import metrics
+from model.base.views.model.BaseModel import BaseModel
+
+
+class ensemble:
+
+    def __init__(self, name, r, data, labels, model_save_path):
+        self.Name = name
+        self.Data = data
+        self.Labels = labels
+        self.model_save_path = model_save_path
+
+        self.Num = len(labels)
+        self.Index = [i for i in range(self.Num)]
+
+        print(self.Name + ' | Train |  Title  | Number of Data     | ' + str(self.Num))
+        self.Num_Positive = self.Labels.count(1)
+        self.Num_Negative = self.Labels.count(0)
+        print(self.Name + ' | Train |  Title  | Number of Positive | ' + str(self.Num_Positive))
+        print(self.Name + ' | Train |  Title  | Number of Negative | ' + str(self.Num_Negative))
+        print(self.Name + ' | Train |  Title  | Data Loaded' + '\n')
+
+        self.Ite = 1
+        self.Index_Retain_Train = [i for i in range(self.Num)]
+        self.Index_Retain_Predict = [i for i in range(self.Num)]
+        self.Index_Delete = {}
+        self.Recall = []
+        self.Precision = []
+        self.F1 = []
+        self.Threshold = {}
+        self.recall = r
+
+        self.config = True
+
+    def classifier(self, data, labels):
+        clf = linear_model.SGDClassifier(loss='log', penalty='l1', alpha=1e-3, class_weight='balanced',
+                                         learning_rate='optimal', eta0=0.0)
+        clf.fit(data, labels)
+        probabilities = []
+        probabilities_positive = []
+        probabilities_negative = []
+        tmp = clf.predict_proba(data)
+        for i in range(len(data)):
+            if labels[i] == 1:
+                probabilities.append(tmp[i][1])
+                probabilities_positive.append(tmp[i][1])
+            else:
+                probabilities.append(tmp[i][1])
+                probabilities_negative.append(tmp[i][1])
+        return clf, probabilities, probabilities_positive, probabilities_negative
+
+    def unit(self):
+        data_train = [self.Data[idx] for idx in self.Index_Retain_Train]
+        labels_train = [self.Labels[idx] for idx in self.Index_Retain_Train]
+
+        num_positive = labels_train.count(1)
+        num_negative = labels_train.count(0)
+
+        print(self.Name + ' | Train |  Title  | iteration | ' + str(self.Ite) + ' | Logistic Regression ... ...')
+        clf_lr, probabilities_train, probabilities_positive_train, probabilities_negative_train = self.classifier(
+            data=data_train, labels=labels_train)
+
+        print(self.Name + ' | Train |  Title  | iteration | ' + str(self.Ite) + ' | Adjust Threshold ... ...')
+        print(heapq.nsmallest(max(int(0.01 * self.Num_Positive), 1), probabilities_positive_train))
+        threshold = heapq.nsmallest(max(int(0.01 * self.Num_Positive), 1), probabilities_positive_train)[-1]
+
+        Index_Retain_Train = []
+        for i in range(num_positive + num_negative):
+            if labels_train[i] == 1:
+                Index_Retain_Train.append(self.Index_Retain_Train[i])
+            elif probabilities_train[i] > threshold:
+                Index_Retain_Train.append(self.Index_Retain_Train[i])
+        self.Index_Retain_Train = Index_Retain_Train
+
+        data_predict = [self.Data[idx] for idx in self.Index_Retain_Predict]
+        tmp = clf_lr.predict_proba(data_predict).tolist()
+        probabilities_predict = list(map(list, zip(*tmp)))[1]
+
+        Predictions = [0 for i in range(self.Num)]
+        Index_Retain_Predict = []
+        self.Index_Delete[self.Ite] = []
+        for i in range(len(data_predict)):
+            if probabilities_predict[i] >= threshold:
+                Index_Retain_Predict.append(self.Index_Retain_Predict[i])
+                Predictions[self.Index_Retain_Predict[i]] = 1
+            else:
+                self.Index_Delete[self.Ite].append(self.Index_Retain_Predict[i])
+        self.Index_Retain_Predict = Index_Retain_Predict
+
+        recall = metrics.recall_score(self.Labels, Predictions, pos_label=1)
+        precision = metrics.precision_score(self.Labels, Predictions, pos_label=1)
+        f1 = metrics.f1_score(self.Labels, Predictions, pos_label=1)
+
+        if recall >= self.recall:
+            self.f1 = f1
+            print(self.Name + ' | Train |  Title  | iteration | ' + str(
+                self.Ite) + ' | Positive Recall    | ' + '%.4f' % recall)
+            print(self.Name + ' | Train |  Title  | iteration | ' + str(
+                self.Ite) + ' | Positive Precision | ' + '%.4f' % precision)
+            print(self.Name + ' | Train |  Title  | iteration | ' + str(
+                self.Ite) + ' | Positive F1        | ' + '%.4f' % f1 + '\n')
+            self.Recall.append(recall)
+            self.Precision.append(precision)
+            self.F1.append(f1)
+            joblib.dump(clf_lr,os.path.join(
+                        self.model_save_path ,self.Name + '_iteration_' + str(self.Ite) + '_train_title_classifier.m'))
+            self.Threshold[self.Ite] = threshold
+            self.Ite += 1
+        else:
+            print(self.Name + ' | Train |  Title  | iteration | ' + str(
+                self.Ite) + ' | Positive Recall Less Than Given Recall' + '\n')
+            self.Index_Retain_Predict += self.Index_Delete[self.Ite]
+            del self.Index_Delete[self.Ite]
+            self.config = False
+
+    def train_title(self):
+        while self.config == True:
+            self.unit()
+        return self.Threshold, self.Index_Retain_Predict, self.Index_Delete
+
+    def train_content(self, data, Index_Retain_Predict_Title, r, logger):
+        data_train = data
+        labels_train = [self.Labels[idx] for idx in Index_Retain_Predict_Title]
+
+        print(self.Name + ' | Train | Content | Number of Data     | ' + str(len(labels_train)))
+        num_positive = labels_train.count(1)
+        num_negative = labels_train.count(0)
+        print(self.Name + ' | Train | Content | Number of Positive | ' + str(num_positive))
+        print(self.Name + ' | Train | Content | Number of Negative | ' + str(num_negative) + '\n')
+
+        clf_xg = linear_model.SGDClassifier(loss='log', penalty='l1', alpha=1e-3, class_weight='balanced',
+                                            learning_rate='optimal', eta0=0.0)
+        clf_xg.fit(data_train, labels_train)
+
+        joblib.dump(clf_xg, os.path.join(
+            self.model_save_path , self.Name + '_train_content_classifier.m'))
+        tmp = clf_xg.predict_proba(np.array(data_train)).tolist()
+        probabilities_predict = list(map(list, zip(*tmp)))[1]
+
+        Recall = []
+        Precision = []
+        F1 = []
+        Threshold = []
+        for t in [x / 1000 for x in range(1001)]:
+            Predictions = [0 for i in range(self.Num)]
+            for i in range(len(data_train)):
+                if probabilities_predict[i] >= t:
+                    Predictions[Index_Retain_Predict_Title[i]] = 1
+            recall = metrics.recall_score(self.Labels, Predictions, pos_label=1)
+            precision = metrics.precision_score(self.Labels, Predictions, pos_label=1)
+            f1 = metrics.f1_score(self.Labels, Predictions, pos_label=1)
+            Recall.append(recall)
+            Precision.append(precision)
+            F1.append(f1)
+            Threshold.append(t)
+            if recall < r:
+                break
+
+        print(self.Name + ' | Train | Content |  Finally   | Threshold          | ' + '%.4f' % Threshold[-1] + '\n')
+
+        print(self.Name + ' | Train | Content |  Finally   | Positive Recall    | ' + '%.4f' % Recall[-1])
+        print(self.Name + ' | Train | Content |  Finally   | Positive Precision | ' + '%.4f' % Precision[-1])
+        print(self.Name + ' | Train | Content |  Finally   | Positive F1        | ' + '%.4f' % F1[-1] + '\n')
+        logger.info('模型评估结果如下：')
+        logger.info('精确率为%.2f' % Precision[-1])
+        logger.info('召回率为%.2f' % Recall[-1])
+        logger.info('精确率为%.2f' % F1[-1])
+        Index_Retain_Predict = []
+        Index_Delete = []
+        for i in range(len(data_train)):
+            if probabilities_predict[i] >= Threshold[-1]:
+                Index_Retain_Predict.append(Index_Retain_Predict_Title[i])
+            else:
+                Index_Delete.append(Index_Retain_Predict_Title[i])
+        return Threshold[-1], Index_Retain_Predict, Index_Delete
+
+
+class LogisticClassifyModel(BaseModel):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+
+    def building_model(self, tfidf_title=None, tfidf_content=None, labels=None, r=None, logger=None):
+        if not os.path.exists(self.model_config['model_path']):
+            os.makedirs(self.model_config['model_path'])
+        lr = ensemble(name=self.model_config['name'],
+                      r=self.model_config['r'],
+                      data=tfidf_title,
+                      labels=labels,
+                      model_save_path=self.model_config['model_path'])  # r可调节，训练在召回率低于r时停止过滤进入下阶段过滤。
+        if tfidf_title:
+            Threshold, self.Index_Retain_Predict_Title, Index_Delete_Title = lr.train_title()
+            joblib.dump(Threshold, os.path.join(self.model_config['model_path'], self.model_config['name'] + '_title_threshold.pkl'))
+            return Threshold, self.Index_Retain_Predict_Title, Index_Delete_Title
+        elif tfidf_content:
+            threshold, Index_Retain_Predict_Content, Index_Delete_Content = lr.train_content(
+                data=tfidf_content,
+                Index_Retain_Predict_Title=self.Index_Retain_Predict_Title,
+                r=0.8,
+                logger=logger)  # r可调节，训练最终在召回率低于r时终止。
+            joblib.dump(threshold, os.path.join(self.model_config['model_path'],
+                                                self.model_config['name'] + '_content_threshold.pkl'))
+            return threshold, Index_Retain_Predict_Content, Index_Delete_Content
--- a/model/classify/views/logistic_classify/LogisticClassifyRunner.py
+++ b/model/classify/views/logistic_classify/LogisticClassifyRunner.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:33
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyRunner.py
+# @Software: PyCharm
+from model.base.views.runner.BaseRunner import BaseRunner
+from model.classify.views.logistic_classify.data.LogisticClassifyProcess import LogisticClassifyProcess
+from model.classify.views.logistic_classify.LogisticClassifyModel import LogisticClassifyModel
+from model.classify.views.logistic_classify.LogisticClassifyEvaluator import LogisticClassifyEvaluator
+
+
+class LogisticClassifyRunner(BaseRunner):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+        self.lcp = LogisticClassifyProcess(config_path)
+        self.lcm = LogisticClassifyModel(config_path)
+        self.lce = LogisticClassifyEvaluator(config_path)
+
+    def train(self, logger):
+        tfidf_title, idf_title, labels = self.lcp.title_process(logger)
+        Threshold,Index_Retain_Predict_Title,Index_Delete_Title = self.lcm.building_model(
+            tfidf_title=tfidf_title,
+            labels=labels,
+            logger=logger
+        )
+        tfidf_content, idf_content = self.lcp.content_process(Index_Retain_Predict_Title)
+
+        threshold, Index_Retain_Predict_Content, Index_Delete_Content = self.lcm.building_model(
+            labels = labels,
+            tfidf_content=tfidf_content,
+            r=0.8,
+            logger=logger
+        )  # r可调节，训练最终在召回率低于r时终止。
+        return 'success'
+
+
+# if __name__ == '__main__':
+#     state = LogisticClassifyRunner().train()
+#     print(state)
--- a/model/classify/views/logistic_classify/__init__.py
+++ b/model/classify/views/logistic_classify/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/17 9:08
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/classify/views/logistic_classify/data/LogisticClassifyDataLoader.py
+++ b/model/classify/views/logistic_classify/data/LogisticClassifyDataLoader.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 14:53
+# @Author  : 程婷婷
+# @FileName: LogisticClassifyDataLoader.py
+# @Software: PyCharm
+from model.base.views.data.BaseDataLoader import BaseDataLoader
+
+class LogisticClassifyDataLoader(BaseDataLoader):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+
--- a/model/classify/views/logistic_classify/data/LogisticClassifyProcess.py
+++ b/model/classify/views/logistic_classify/data/LogisticClassifyProcess.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyProcess.py
+# @Software: PyCharm
+import re
+import os
+import jieba
+import joblib
+from sklearn.utils import shuffle
+from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
+from model.base.views.data.BaseDataProcess import BaseDataProcess
+from model.classify.views.logistic_classify.data.LogisticClassifyDataLoader import LogisticClassifyDataLoader
+
+
+class LogisticClassifyProcess(BaseDataProcess):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+        self.lcdl = LogisticClassifyDataLoader(config_path)
+
+    def document2sentences(self, document, key_words):
+        symbols = frozenset(u"，。！？\n：；“”|）\u3000")
+        out_sentences = ''
+        for symbol in symbols:
+            document = document.replace(symbol, '。')
+        document = document.replace('\t', '').replace('\n', '')
+        sentences = document.split('。')
+        for sentence in sentences:
+            for key in key_words:
+                weight = sentence.count(key)
+                sentence += '。'
+                out_sentences += sentence * weight
+        return out_sentences
+
+    def filtrate_words(self, words):
+        find_chinese = re.compile(u"[\u4e00-\u9fa5]+")
+        symbols = "[A-Za-z0-9\[\`\~\!\@\#\$\^\&\*\(\)\=\|\{\}\'\:\;\'\,\[\]\.\<\>\/\?\~\！\@\#\\\&\*\%]"
+        stopwords = self.lcdl.read_stopwords()
+        filtrated_words = []
+        for j in range(len(words)):
+            if re.findall(find_chinese, words[j]) == []:
+                continue
+            elif re.sub(symbols, "", re.findall(find_chinese, words[j])[0]) == '':
+                continue
+            elif re.sub(symbols, "", re.findall(find_chinese, words[j])[0]) in stopwords:
+                continue
+            else:
+                filtrated_words.append(re.sub(symbols, "", re.findall(find_chinese, words[j])[0]))
+        return ' '.join(filtrated_words)
+
+    def get_chi(self, data, labels):
+        num = len(data)
+        length = len(data[0])
+        # print(type(labels[0]))
+        # print(labels[0])
+        print('================')
+        print(len(labels))
+        print(len(data))
+        data_p = [data[i] for i in range(num) if int(labels[i]) == 1]
+        data_n = [data[i] for i in range(num) if int(labels[i]) == 0]
+        num_p = len(data_p)
+        num_n = len(data_n)
+        print('正样本为%s', str(num_p))
+        print('负样本为%s', str(num_n))
+
+        data_p_t = list(map(list, zip(*data_p)))
+        data_n_t = list(map(list, zip(*data_n)))
+
+        chi_square = []
+        for i in range(length):
+            b = data_p_t[i].count(0)
+            d = data_n_t[i].count(0)
+            a = num_p - b
+            c = num_n - d
+            if num_p * num_n * (a + c) * (b + d) == 0:
+                chi_square.append(0)
+            else:
+                chi_square.append((num * pow(a * d - b * c, 2)) / (num_p * num_n * (a + c) * (b + d)))
+        return chi_square
+
+    def get_vocabulary_title(self, titles_tokenized_filtered, contents_tokenized_filtered, labels):
+
+        data = [
+            self.embedding_config['title_weight'] * (titles_tokenized_filtered[i] + ' ') + contents_tokenized_filtered[i]
+            for i in range(len(labels))]
+        cv = CountVectorizer(ngram_range=(1, 3), min_df=2)
+        tf = cv.fit_transform(data)
+        vocabulary_list = cv.get_feature_names()
+        print(' | Train |  Title  | Vocabulary | Original Length | ' + str(len(vocabulary_list)))
+        num_key_words = int(len(vocabulary_list) * self.embedding_config['title_feature_ratio'])
+        print(' | Train |  Title  | Vocabulary |     Length      | ' + str(num_key_words))
+        print(tf.toarray())
+        tf_weights = tf.toarray().tolist()
+        chi_square = self.get_chi(tf_weights, labels)
+        print(' | Train |  Title  | Vocabulary | Complete by CHI ......')
+        original_vocabulary_chi_square = [(vocabulary_list[i], chi_square[i]) for i in range(len(vocabulary_list))]
+        sorted_original_vocabulary_chi_square = sorted(original_vocabulary_chi_square, key=lambda x: x[1], reverse=True)
+        vocabulary_list = [sorted_original_vocabulary_chi_square[i][0] for i in range(num_key_words)]
+        vocabulary_title = {}
+        k = 0
+        for word in vocabulary_list:
+            vocabulary_title[word] = k
+            k += 1
+        return vocabulary_title
+
+    def get_tfidf_title(self, titles_tokenized_filtered, contents_tokenized_filtered, vocabulary_title):
+        data = [
+            self.embedding_config['title_weight'] * (titles_tokenized_filtered[i] + ' ') + contents_tokenized_filtered[i]
+            for i in range(len(self.labels))]
+        cv = CountVectorizer(ngram_range=(1, 3), vocabulary=vocabulary_title)
+        train_tf = cv.fit_transform(data)
+        print(' | Train |  Title  | TF | Completed ......')
+        tfidf_transformer = TfidfTransformer(norm='l2', use_idf=True, smooth_idf=True)
+        train_tfidf = tfidf_transformer.fit_transform(train_tf)
+        train_tfidf_weights = train_tfidf.toarray().tolist()
+        print(' | Train |  Title  | TFIDF | Completed ......')
+        idf = tfidf_transformer.idf_.tolist()
+        return train_tfidf_weights, idf
+
+    def get_vocabulary_content(self, contents_tokenized_filtered, labels, index):
+        data = [contents_tokenized_filtered[idx] for idx in index]
+        labels = [labels[idx] for idx in index]
+        tf_transformer = CountVectorizer(ngram_range=(1, 3), min_df=2)
+        tf = tf_transformer.fit_transform(data)
+        vocabulary_list = tf_transformer.get_feature_names()
+        print(' | Train | Content | Vocabulary | Original Length | ' + str(len(vocabulary_list)))
+        num_key_words = int(len(vocabulary_list) * self.embedding_config['content_feature_ratio'])
+        print(' | Train | Content | Vocabulary |     Length      | ' + str(num_key_words))
+        tf_weights = tf.toarray().tolist()
+        chi_square = self.get_chi(tf_weights, labels)
+        print(' | Train | Content | Vocabulary | Complete by CHI ......')
+        original_vocabulary_chi_square = [(vocabulary_list[i], chi_square[i]) for i in range(len(vocabulary_list))]
+        sorted_original_vocabulary_chi_square = sorted(original_vocabulary_chi_square, key=lambda x: x[1], reverse=True)
+        vocabulary_list = [sorted_original_vocabulary_chi_square[i][0] for i in range(num_key_words)]
+        self.vocabulary_content = {}
+        k = 0
+        for word in vocabulary_list:
+            self.vocabulary_content[word] = k
+            k += 1
+        return self.vocabulary_content
+
+    def get_tfidf_content(self, contents_tokenized_filtered, vocabulary_content, index):
+        data = [contents_tokenized_filtered[idx] for idx in index]
+        tf_transformer = CountVectorizer(ngram_range=(1, 3), vocabulary=vocabulary_content)
+        train_tf = tf_transformer.fit_transform(data)
+        print(' | Train | Content | TF | Completed ......')
+        tfidf_transformer = TfidfTransformer(norm='l2', use_idf=True, smooth_idf=True)
+        train_tfidf = tfidf_transformer.fit_transform(train_tf)
+        train_tfidf_weights = train_tfidf.toarray().tolist()
+        print(' | Train | Content | TFIDF | Completed ......')
+        idf = tfidf_transformer.idf_.tolist()
+        return train_tfidf_weights, idf
+
+    def title_process(self, logger):
+        df = self.lcdl.read_file()
+        key_words = []
+        for word in list(set(df['key_words'])):
+            if str(word) and str(word) != 'nan':
+                key_words.append(word)
+                jieba.add_word(str(word))
+        df.dropna(subset=['content', 'label'], inplace=True)
+        df = shuffle(df)
+        df = df.reset_index(drop=True)
+        all_label = list(set(df['label']))
+        self.label_mapping = {v: k for k, v in dict(enumerate(all_label)).items()}
+        df['label'] = df['label'].map(self.label_mapping)
+        print('有用的数据共%d条' % len(df))
+        logger.info('处理后的数据量为 %d 条' %len(df))
+        train_set, test_set = self.split_dataset(df, use_dev=self.process_config['use_dev'])
+        logger.info('训练集的数据量为 %d 条' % len(train_set))
+        logger.info('测试集的数据量为 %d 条' % len(test_set))
+        train_set = train_set.reset_index(drop=True)
+        self.labels = train_set['label']
+        train_set['content'] = [self.document2sentences(content, key_words) for content in train_set['content']]
+        titles_tokenized = [jieba.lcut(sentences) for sentences in train_set['title']]
+        contents_tokenized = [jieba.lcut(sentences) for sentences in train_set['content']]
+        titles_tokenized_filtered = [self.filtrate_words(words) for words in titles_tokenized]
+        print(' | Train | Content |  Filtered ......')
+        self.contents_tokenized_filtered = [self.filtrate_words(words) for words in contents_tokenized]
+        vocabulary_title = self.get_vocabulary_title(titles_tokenized_filtered,
+                                                     self.contents_tokenized_filtered,
+                                                     self.labels)
+        # joblib.dump(vocabulary_title, filename=)
+        tfidf_title, idf_title = self.get_tfidf_title(titles_tokenized_filtered,
+                                                      self.contents_tokenized_filtered,
+                                                      vocabulary_title)
+        labels = self.labels.tolist()
+        if not os.path.exists(self.embedding_config['embedding_path']):
+            os.makedirs(self.embedding_config['embedding_path'])
+        joblib.dump(vocabulary_title, filename=os.path.join(
+            self.embedding_config['embedding_path'] ,self.embedding_config['name']+'_vocabulary_title.pkl'))
+        joblib.dump(idf_title, filename=os.path.join(
+            self.embedding_config['embedding_path'] ,self.embedding_config['name']+'_idf_title.pkl'))
+        return tfidf_title, idf_title, labels
+
+    def content_process(self, Index_Retain_Predict_Title):
+        vocabulary_content = self.get_vocabulary_content(self.contents_tokenized_filtered,
+                                                         self.labels,
+                                                         Index_Retain_Predict_Title)  # feature_ratio可调节，用来控制词表的长度，防止词表过长，运行时间太长或者内存溢出。
+        tfidf_content, idf_content = self.get_tfidf_content(self.contents_tokenized_filtered,
+                                                            vocabulary_content,
+                                                            Index_Retain_Predict_Title)
+        if not os.path.exists(self.embedding_config['embedding_path']):
+            os.makedirs(self.embedding_config['embedding_path'])
+        joblib.dump(vocabulary_content, os.path.join(
+            self.embedding_config['embedding_path'] ,self.embedding_config['name']+'_vocabulary_content.pkl'))
+        joblib.dump(idf_content, os.path.join(
+            self.embedding_config['embedding_path'], self.embedding_config['name'] + '_idf_content.pkl'))
+        return tfidf_content, idf_content
\ No newline at end of file
--- a/model/classify/views/logistic_classify/data/__init__.py
+++ b/model/classify/views/logistic_classify/data/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 14:47
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/classify/views/textcnn_classify/TextcnnClassifyConfig.py
+++ b/model/classify/views/textcnn_classify/TextcnnClassifyConfig.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:06
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyConfig.py
+# @Software: PyCharm
+from model.base.views.config import BaseConfig
+
+class TextcnnConfig(BaseConfig):
+    def __init__(self):
+        super().__init__()
--- a/model/classify/views/textcnn_classify/TextcnnClassifyEvaluator.py
+++ b/model/classify/views/textcnn_classify/TextcnnClassifyEvaluator.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/14 14:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyEvaluator.py
+# @Software: PyCharm
+from model.base.views.evaluator.BaseEvaluator import BaseEvaluator
+
+
+class TextcnnClassifyEvaluator(BaseEvaluator):
+    def __init__(self, config_path):
+        super().__init__(config_path)
--- a/model/classify/views/textcnn_classify/TextcnnClassifyModel.py
+++ b/model/classify/views/textcnn_classify/TextcnnClassifyModel.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:18
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyModel.py
+# @Software: PyCharm
+import os
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+from keras.layers.merge import concatenate
+from keras.utils import np_utils
+from tensorflow.python.keras.regularizers import l2
+from keras.layers.embeddings import Embedding
+from keras.layers import Conv1D, MaxPooling1D, Flatten, Dropout, Dense, Input
+from keras.models import Model
+from model.base.views.model.BaseModel import BaseModel
+
+
+class TextcnnClassifyModel(BaseModel):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+
+    def building_model(self,
+                       x_train_padded_seqs,
+                       y_train,
+                       x_test_padded_seqs,
+                       y_test,
+                       embedding_matrix,
+                       classes_weight,
+                       vocab):
+        # 构建TextCNN模型
+        main_input = Input(shape=(self.model_config['input_shape'],), dtype='float32')
+        # 词嵌入（使用预训练的词向量） 768是词向量维度
+        embedder = Embedding(len(vocab) + 1, 768, input_length=self.model_config['input_shape'], weights=[embedding_matrix], trainable=False)
+        # embedder = Embedding(len(vocab) + 1, 300, input_length=50, trainable=False)
+        embed = embedder(main_input)
+        #   kernel_size:整数或由单个整数构成的list/tuple，卷积核的空域或时域窗长度
+        #  'same'对边界也进行补0，但是保证输入维度与输出维度相同
+        cnn1 = Conv1D(filters=256,
+                      kernel_size=3,
+                      padding='same',
+                      strides=1,
+                      activation=self.model_config['activation'],
+                      kernel_regularizer=l2(0.05))(embed)
+        # pool_size 池化窗口大小
+        cnn1 = MaxPooling1D(pool_size=int(self.model_config['input_shape'])-2)(cnn1)
+        cnn2 = Conv1D(filters=256,
+                      kernel_size=4,
+                      padding='same',
+                      strides=1,
+                      activation=self.model_config['activation'],
+                      kernel_regularizer=l2(0.05))(embed)
+        cnn2 = MaxPooling1D(pool_size=int(self.model_config['input_shape'])-3)(cnn2)
+        cnn3 = Conv1D(filters=256,
+                      kernel_size=5,
+                      padding='same',
+                      strides=1,
+                      activation=self.model_config['activation'],
+                      kernel_regularizer=l2(0.05))(embed)
+        cnn3 = MaxPooling1D(pool_size=int(self.model_config['input_shape'])-4)(cnn3)
+        cnn = concatenate([cnn1, cnn2, cnn3], axis=-1)
+        flat = Flatten()(cnn)
+        drop = Dropout(0.2)(flat)
+        #  units：代表该层的输出维度
+        main_output = Dense(units=2, activation='softmax')(drop)
+        model = Model(inputs=main_input, outputs=main_output)
+        model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
+        one_hot_labels = np_utils.to_categorical(y_train, num_classes=2)
+        model.fit(x_train_padded_seqs,
+                  one_hot_labels,
+                  batch_size=self.model_config['batch_size'],
+                  epochs=self.model_config['epochs'],
+                  shuffle=self.model_config['shuffle'],
+                  class_weight=classes_weight)
+        model.save(self.model_config['model_path'])
+        return model
--- a/model/classify/views/textcnn_classify/TextcnnClassifyRunner.py
+++ b/model/classify/views/textcnn_classify/TextcnnClassifyRunner.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:33
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyRunner.py
+# @Software: PyCharm
+import numpy as np
+from model.base.views.runner.BaseRunner import BaseRunner
+from model.classify.views.textcnn_classify.data.TextcnnClassifyProcess import TextcnnClassifyProcess
+from model.classify.views.textcnn_classify.TextcnnClassifyModel import TextcnnClassifyModel
+from model.classify.views.textcnn_classify.TextcnnClassifyEvaluator import TextcnnClassifyEvaluator
+
+
+class TextcnnClassifyRunner(BaseRunner):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+        self.tcp = TextcnnClassifyProcess(config_path)
+        self.tcm = TextcnnClassifyModel(config_path)
+        self.tce = TextcnnClassifyEvaluator(config_path)
+
+    def train(self, logger):
+        x_train_padded_seqs, train_label, x_test_padded_seqs, test_label = self.tcp.runner_process(logger)
+        classes_weight = self.tcp.class_weight(train_label)
+        print(classes_weight)
+        model = self.tcm.building_model(
+            x_train_padded_seqs=x_train_padded_seqs,
+            y_train=train_label,
+            x_test_padded_seqs=x_test_padded_seqs,
+            y_test=test_label,
+            embedding_matrix=self.tcp.embedding_matrix,
+            classes_weight=classes_weight,
+            vocab=self.tcp.vocab
+        )
+        result = model.predict(x_test_padded_seqs)  # 预测样本属于每个类别的概率
+        predict_label = np.argmax(result, axis=1)  # 获得最大概率对应的标签
+        self.tce.evaluate(test_label, predict_label, self.tcp.label_mapping, logger)
+        return 'success'
+
+
+# if __name__ == '__main__':
+#     state = TextcnnClassifyRunner().train()
+#     print(state)
--- a/model/classify/views/textcnn_classify/__init__.py
+++ b/model/classify/views/textcnn_classify/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/17 9:08
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/classify/views/textcnn_classify/data/TextcnnClassifyDataLoader.py
+++ b/model/classify/views/textcnn_classify/data/TextcnnClassifyDataLoader.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 15:53
+# @Author  : 程婷婷
+# @FileName: TextcnnClassifyDataLoader.py
+# @Software: PyCharm
+from model.base.views.data.BaseDataLoader import BaseDataLoader
+class TextcnnClassifyDataLoader(BaseDataLoader):
+    def __init__(self, config_path):
+        super().__init__(config_path)
\ No newline at end of file
--- a/model/classify/views/textcnn_classify/data/TextcnnClassifyProcess.py
+++ b/model/classify/views/textcnn_classify/data/TextcnnClassifyProcess.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyProcess.py
+# @Software: PyCharm
+import pandas as pd
+import numpy as np
+from keras.preprocessing.text import Tokenizer
+from keras.preprocessing.sequence import pad_sequences
+from bert_serving.client import BertClient
+import joblib
+from sklearn.utils import class_weight
+from model.base.views.data.BaseDataProcess import BaseDataProcess
+from model.classify.views.textcnn_classify.data.TextcnnClassifyDataLoader import TextcnnClassifyDataLoader
+
+
+class TextcnnClassifyProcess(BaseDataProcess):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+        self.tcdl = TextcnnClassifyDataLoader(config_path)
+
+    def tokenier(self, data, label):
+        tokenizer = Tokenizer()  # 创建一个Tokenizer对象
+        tokenizer.fit_on_texts(data)  # 编号，编号是根据词频的
+        self.vocab = tokenizer.word_index  # 得到每个词的编号
+        df = pd.DataFrame(columns=['content', 'label'])
+        df['content'] = data
+        df['label'] = label
+        train_set, test_set = self.split_dataset(df, use_dev=self.process_config['use_dev'])
+        x_train_word_ids = tokenizer.texts_to_sequences(train_set['content'])
+        x_test_word_ids = tokenizer.texts_to_sequences(test_set['content'])  # 序列模式
+        x_train_padded_seqs = pad_sequences(x_train_word_ids, maxlen=3500)
+        x_test_padded_seqs = pad_sequences(x_test_word_ids, maxlen=3500)
+        # with open(tokenizer_path, 'wb') as file:
+        #     pickle.dump(tokenizer, file, protocol=pickle.HIGHEST_PROTOCOL)
+        joblib.dump(tokenizer, filename=self.embedding_config['tokenizer_path'])
+        return x_train_padded_seqs, train_set['label'], x_test_padded_seqs, test_set['label']
+
+    def get_embeddingMatrix(self, vocab):
+        # 初始化存储所有向量的大矩阵，留意其中多一位（首行），词向量全为 0，用于 padding补零。
+        embedding_matrix = np.zeros((len(vocab) + 1, 768))
+        bert_client = BertClient(port=5558, port_out=5559)
+        for word, i in vocab.items():
+            try:
+                # print(word)
+                embedding_vector = bert_client.encode(word.split(' '))
+                if embedding_vector.shape == (1, 768):
+                    embedding_vector = embedding_vector.mean(axis=0)
+                    embedding_matrix[i] = embedding_vector
+                else:
+                    print(embedding_vector.shape)
+                    print('----------类型错误----------')
+            except KeyError:
+                continue
+        return embedding_matrix
+
+    def class_weight(self, y_train):
+        weight = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
+        classes_weight = dict(enumerate(weight))
+        return classes_weight
+
+    def runner_process(self, logger):
+        df = self.tcdl.read_file()
+        all_label = list(set(df['label']))
+        self.label_mapping = {v: k for k, v in dict(enumerate(all_label)).items()}
+        labels = df['label'].map(self.label_mapping)
+        processed_data = self.process(df['content'], min_content=self.process_config['min_content'])
+        print(processed_data)
+        x_train_padded_seqs, train_label, x_test_padded_seqs, test_label = self.tokenier(processed_data,
+                                                                                         labels)
+        logger.info('处理后的数据量为 %d 条' % (len(train_label) + len(test_label)))
+        logger.info('训练集的数据量为 %d 条' % len(train_label))
+        logger.info('测试集的数据量为 %d 条' % len(test_label))
+        self.embedding_matrix = self.get_embeddingMatrix(self.vocab)
+        joblib.dump(self.embedding_matrix, filename=self.embedding_config['embedding_path'])
+        return x_train_padded_seqs, train_label, x_test_padded_seqs, test_label
--- a/model/classify/views/textcnn_classify/data/__init__.py
+++ b/model/classify/views/textcnn_classify/data/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 15:52
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/classify/views/views.py
+++ b/model/classify/views/views.py
+from django.shortcuts import render
+
+# Create your views here.
--- a/model/classify/views/xgboost_classify/XgboostClassifyConfig.py
+++ b/model/classify/views/xgboost_classify/XgboostClassifyConfig.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:06
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyConfig.py
+# @Software: PyCharm
+from model.base.views.config import BaseConfig
+
+class XgboostClassifyConfig(BaseConfig):
+    def __init__(self, config_path):
+        super().__init__(config_path)
--- a/model/classify/views/xgboost_classify/XgboostClassifyEvaluator.py
+++ b/model/classify/views/xgboost_classify/XgboostClassifyEvaluator.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/14 14:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyEvaluator.py
+# @Software: PyCharm
+from model.base.views.evaluator.BaseEvaluator import BaseEvaluator
+
+
+class XgboostClassifyEvaluator(BaseEvaluator):
+    def __init__(self, config_path):
+        super().__init__(config_path)
--- a/model/classify/views/xgboost_classify/XgboostClassifyModel.py
+++ b/model/classify/views/xgboost_classify/XgboostClassifyModel.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:18
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyModel.py
+# @Software: PyCharm
+import scipy.sparse.csr
+import scipy.sparse.csc
+import pickle
+import numpy as np
+from xgboost import XGBClassifier
+from model.base.views.model.BaseModel import BaseModel
+
+
+class XgboostClassify(object):
+    def __init__(self, label_dict, signature, lr=0.1, reg_alpha=0, reg_lambda=1, objective='binary:logitraw', \
+                 with_sample_weight=True, subsample=1, min_child_weight=1, scale_pos_weight=1, thres=0.5):
+        self.lr = lr
+        self.label_dict = label_dict
+        self.signature = signature
+        self.reg_alpha = reg_alpha
+        self.reg_lambda = reg_lambda
+        self.objective = objective
+        self.with_sample_weight = with_sample_weight
+        self.min_child_weight = min_child_weight
+        self.scale_pos_weight = scale_pos_weight
+        self.thres = thres
+        self.clf = None
+
+    def set_signature(self, new_signature):
+        self.signature = new_signature
+
+    def train(self, X, Y, save_to=None):
+        print(len(self.label_dict))
+        assert len(self.label_dict) == 2, 'It should have exactly two classes.'
+        if isinstance(X, scipy.sparse.csr.csr_matrix):
+            data = X.tocsc()
+        elif isinstance(X, np.ndarray):
+            data = X
+        else:
+            data = np.array(X, copy=False)
+        if isinstance(Y, scipy.sparse.csr.csr_matrix):
+            label = Y.todense()
+        else:
+            label = np.array(Y, copy=False)
+        if len(np.unique(label)) == 1:
+            print('Only contains one label, training stopped.')
+            return
+
+        N_0 = np.sum(label == 0)
+        N_1 = np.sum(label == 1)
+        w_0 = (N_0 + N_1) / (2. * N_0)
+        w_1 = (N_0 + N_1) / (2. * N_1)
+        self.clf = XGBClassifier(reg_alpha=self.reg_alpha, reg_lambda=self.reg_lambda, objective=self.objective, \
+                                 min_child_weight=self.min_child_weight, scale_pos_weight=self.scale_pos_weight,
+                                 learning_rate=self.lr)
+        if self.with_sample_weight:
+            self.clf.fit(data, label, sample_weight=[w_0 if l == 0 else w_1 for l in label])
+        else:
+            self.clf.fit(data, label)
+        # print('Finished.')
+        if save_to:
+            self.save(save_to)
+
+    def save(self, save_to):
+        file_name = save_to + ('-%s.xgb' % self.signature)
+        with open(file_name, 'wb') as f:
+            pickle.dump((self.clf, self.label_dict, self.signature), f)
+
+    @staticmethod
+    def load(file_path):
+        with open(file_path, 'rb') as f:
+            clf, label_dict, signature = pickle.load(f)
+        xgb = Xgboost(label_dict, signature)
+        xgb.clf = clf
+        return xgb
+
+    def predict(self, X, thres=0.5, return_real_label=False):
+        prob = self.predict_pro(X)
+        label = np.zeros((prob.shape[0],))
+        label[prob[:, 1] >= thres] = 1
+        if return_real_label:
+            return [self.label_dict[l] for l in label]
+        else:
+            return label.astype(np.int64)
+
+    def sigmoid(self, x):
+        return 1 / (1 + np.exp(-x))
+
+    def predict_pro(self, X):
+        if not (isinstance(X, scipy.sparse.csr.csr_matrix) or isinstance(X, np.ndarray) or isinstance(X,
+                                                                                                      scipy.sparse.csc.csc_matrix)):
+            X = np.array(X, copy=False)
+        if isinstance(X, scipy.sparse.csr.csr_matrix):
+            X = X.tocsc()
+        if self.clf and X.shape[0] > 0:
+            if len(X.shape) == 1:
+                X = [X]
+            prob = self.clf.predict_proba(X)
+            prob = np.array([self.sigmoid(i) for i in prob[:]])
+            return prob
+        else:
+            if not self.clf:
+                print('模型还没训练，请先训练模型')
+            else:
+                print('数据不能为空')
+
+
+class XgboostClassifyModel(BaseModel):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+
+    def building_model(self, label_dict, signature, X_train, Y_train):
+        xgb = XgboostClassify(label_dict,
+                      signature,
+                      lr=self.model_config['lr'],
+                      reg_alpha=self.model_config['reg_alpha'],
+                      reg_lambda=self.model_config['reg_lambda'],
+                      objective=self.model_config['objective'],
+                      with_sample_weight=self.model_config['with_sample_weight'],
+                      subsample=self.model_config['subsample'],
+                      thres=self.model_config['thres'],
+                      min_child_weight=self.model_config['min_child_weight'],
+                      scale_pos_weight=self.model_config['scale_pos_weight'])
+        clf_save_to = self.model_config['model_path']
+        print('开始训练')
+        xgb.train(X_train, Y_train, save_to=clf_save_to)
+        print('训练结束')
+        return xgb
\ No newline at end of file
--- a/model/classify/views/xgboost_classify/XgboostClassifyRunner.py
+++ b/model/classify/views/xgboost_classify/XgboostClassifyRunner.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:33
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyRunner.py
+# @Software: PyCharm
+import time
+import numpy as np
+from model.base.views.runner.BaseRunner import BaseRunner
+from model.classify.views.xgboost_classify.data.XgboostClassifyProcess import XgboostClassifyProcess
+from model.classify.views.xgboost_classify.XgboostClassifyModel import XgboostClassifyModel
+from model.classify.views.xgboost_classify.XgboostClassifyEvaluator import XgboostClassifyEvaluator
+
+
+class XgboostClassifyRunner(BaseRunner):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+        self.signature = int(time.time())
+        self.xcp = XgboostClassifyProcess(config_path)
+        self.xcm = XgboostClassifyModel(config_path)
+        self.xce = XgboostClassifyEvaluator(config_path)
+
+    def train(self, logger):
+        train_set, test_set = self.xcp.runner_process(signature=self.signature)
+        print(self.xcp.label_mapping)
+        label_dict,   = self.xcp.label_mapping,
+        X_train = np.delete(train_set, -1, axis=1)
+        Y_train = train_set[:, -1].astype(np.int64)
+        print(X_train.shape)
+        print(Y_train)
+        print(list(set(Y_train)))
+        logger.info('处理后的数据量为 %d 条' %(len(train_set)+len(test_set)))
+        logger.info('训练集的数据量为 %d 条'%len(train_set))
+        logger.info('测试集的数据量为 %d 条'%len(test_set))
+        print('==========训练集有%d条数据==========' %len(X_train))
+        model = self.xcm.building_model(
+            label_dict,
+            self.signature,
+            X_train,
+            Y_train
+        )
+        # xg = XgboostClassify(label_dict=self.xcp.label_mapping, signature=self.signature)
+        X_test = np.delete(test_set, -1, axis=1)
+        true_label = test_set[:, -1].astype(np.int64)
+        print(list(set(true_label)))
+        predict_label = model.predict(X_test, thres=self.runner_config['thres'])
+        predict_label = predict_label.tolist()
+        print(list(set(predict_label)))
+        self.xce.evaluate(true_label, predict_label, label_dict, logger)
+        return 'success'
+
+
+# if __name__ == '__main__':
+#     state = XgboostClassifyRunner().train()
+#     print(state)
--- a/model/classify/views/xgboost_classify/__init__.py
+++ b/model/classify/views/xgboost_classify/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/17 9:08
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/classify/views/xgboost_classify/data/XgboostClassifyProcess.py
+++ b/model/classify/views/xgboost_classify/data/XgboostClassifyProcess.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyProcess.py
+# @Software: PyCharm
+import numpy as np
+from sklearn.utils import class_weight
+import pickle
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.feature_extraction.text import TfidfTransformer
+from sklearn.feature_selection import mutual_info_classif, SelectPercentile
+import scipy.linalg
+import jieba
+from sklearn.base import BaseEstimator, TransformerMixin
+from model.base.views.data.BaseDataProcess import BaseDataProcess
+from model.classify.views.xgboost_classify.data.XgoostClassifyDataLoader import XgboostClassifyDataLoader
+
+class Vocabulary:
+    def __init__(self, signature, min_word_len=2, name='voc'):
+        self.signature = signature
+        self.min_word_len = min_word_len
+        self.name = name
+        self.voc = dict()
+        self.freq = dict()
+        self.doc_freq = dict()
+        self.oov = None
+        self.size = 0
+        self._fixed_voc = False
+
+    def set_state(self, fixed=False):
+        assert fixed in [True, False, 0, 1]
+        self._fixed_voc = fixed
+
+    def get_state(self):
+        state = 'Fixed' if self._fixed_voc else 'Not fixed'
+        return state
+
+    def shuffle(self):
+        self.check_state()
+        idx = np.random.permutation(self.size)
+        shuffled_voc = dict()
+        shuffled_freq = dict()
+        shuffled_doc_freq = dict()
+        for key, id in self.voc.items():
+            shuffled_voc[key] = idx[id]
+            shuffled_freq[idx[id]] = self.freq[id]
+            shuffled_doc_freq[idx[id]] = self.doc_freq[id]
+        del self.voc, self.freq, self.doc_freq
+        self.voc, self.freq, self.doc_freq = shuffled_voc, shuffled_freq, shuffled_doc_freq
+
+    def _is_useless(self, x):
+        if len(x) < self.min_word_len:
+            return True
+        if x.strip(
+                '''#&$_%^*-+=<>`~!@(（）)?？/\\[]{}—"';:：；，。,.‘’“”|…\n abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890''') == '':
+            return True
+        return False
+
+    def update(self, words):
+        if self._fixed_voc:
+            raise Exception('Fixed vocabulary does not support update.')
+        for word in words:
+            if not self._is_useless(word):
+                id = self.voc.get(word, None)
+                if id is None:  # new word
+                    self.voc[word] = self.size
+                    self.freq[self.size] = 1
+                    self.doc_freq[self.size] = 0  # create doc_freq item
+                    self.size += 1
+                else:
+                    self.freq[id] += 1
+        for word in set(words):
+            if not self._is_useless(word):
+                id = self.voc.get(word, None)
+                if id is not None:
+                    self.doc_freq[id] += 1  # update doc_freq
+
+    def get(self, word):
+        return self.voc.get(word, self.oov)
+
+    def __getitem__(self, word):
+        return self.voc.get(word, self.oov)
+
+    def __contains__(self, word):
+        return self.voc.__contains__(word)
+
+    def __iter__(self):
+        return iter(self.voc)
+
+    def __sizeof__(self):
+        return self.voc.__sizeof__() + self.freq.__sizeof__() + self.signature.__sizeof__() + self.size.__sizeof__() + \
+               self.name.__sizeof__() + self._fixed_voc.__sizeof__() + self.oov.__sizeof__() + self.doc_freq.__sizeof__()
+
+    def __delitem__(self, word):  # delete would destory the inner representation
+        if self._fixed_voc:
+            raise Exception('Fixed vocabulary does not support deletion.')
+        else:
+            raise NotImplementedError
+
+    def get_size(self):
+        return self.size
+
+    def clear(self):
+        del self.voc, self.freq, self.doc_freq
+        self.voc = dict()
+        self.freq = dict()
+        self.doc_freq = dict()
+        self.size = 0
+        self._fixed_voc = False
+
+    def check_state(self):
+        return len(self.voc) == self.size and len(self.freq) == self.size and len(self.doc_freq) == self.size
+
+    def to_dict(self):
+        return self.voc
+
+    def set_signature(self, new_signature):
+        self.signature = new_signature
+
+    def save(self, file_name=None):
+        save_to = (file_name if file_name else self.name) + '-%s.voc' % self.signature
+        with open(save_to, 'wb') as f:
+            pickle.dump([self.voc,
+                         self.freq,
+                         self.doc_freq,
+                         self.size,
+                         self.min_word_len,
+                         self.oov,
+                         self._fixed_voc,
+                         self.name,
+                         self.signature], f)
+
+    @classmethod
+    def load(cls, file_name):
+        with open(file_name, 'rb') as f:
+            [voc, freq, doc_freq, size, min_word_len, oov, _fixed, name, signature] = pickle.load(f)
+
+        voc_from_file = cls(signature, name)
+        voc_from_file.voc = voc
+        voc_from_file.freq = freq
+        voc_from_file.doc_freq = doc_freq
+        voc_from_file.size = size
+        voc_from_file.min_word_len = min_word_len
+        voc_from_file.oov = oov
+        voc_from_file._fixed_voc = _fixed
+        voc_from_file.signature = signature
+        return voc_from_file
+
+class DataProcessor:
+    def __init__(self, data, transformer='tf', transformer_norm='l2'):
+        self.data = data
+        transformer = transformer.lower()
+        assert transformer in ['tf', 'tfidf']
+        self.transformer_type = transformer
+        self.transformer_norm = transformer_norm
+        self.transformer = None
+
+    def reset(self):
+        self.transformer = None
+        self.cv = None
+
+    def preprocess(self, label_dict, _all=False, _emotion=False):
+        processed_data = {}
+        processed_label = {}
+        processed_label_dict = {}
+        # only_have_one_label_key = []
+        for key in self.data:
+            print(key)
+            if not _emotion:  # _all=False, _emotion=False
+                processed_data[key] = [' '.join(jieba.lcut(str(record[0]))) for record in self.data[key]]
+                label = [record[1] for record in self.data[key]]
+                processed_label[key] = label
+                processed_label_dict[key] = label_dict
+                processed_data[key] = np.array(processed_data[key])
+                print(processed_label_dict)
+        return processed_data, processed_label, processed_label_dict
+
+    def update_vocab(self, vocab, processed_data):
+        if type(processed_data) == dict:
+            for key in processed_data:
+                for record in processed_data[key]:
+                    vocab.update(record.split(' '))
+        else:
+            for record in processed_data:
+                vocab.update(record.split(' '))
+        assert vocab.check_state(), 'Something wrong with vocabulary.'
+
+    def transform(self, vocab, data, label, with_feature_selection=False, feature_selection_method='FDA', binary=False):
+        vocab.set_state(fixed=True)
+        assert feature_selection_method in ['FDA', 'SelectPercentile']
+        if not self.transformer:
+            self.cv = CountVectorizer(decode_error='replace', vocabulary=vocab.to_dict(), binary=binary)
+            if self.transformer_type == 'tf':
+                self.transformer = TfidfTransformer(norm=self.transformer_norm, use_idf=False)
+            else:
+                self.transformer = TfidfTransformer(norm=self.transformer_norm, use_idf=True)
+        if type(data) == dict:
+            transformed_data = {}
+            for key in data:
+                if with_feature_selection:
+                    if feature_selection_method == 'FDA':
+                        transformed_data[key] = FDA().fit_transform(
+                                                            self.transformer.transform(self.cv.transform(data[key])), label[key]
+                                                        )
+                    else:
+                        transformed_data[key] = SelectPercentile(mutual_info_classif, 20).fit_transform(
+                                                            self.transformer.transform(self.cv.transform(data[key])), label[key]
+                                                        )
+                else:
+                    transformed_data[key] = self.transformer.transform(self.cv.transform(data[key]))
+        else:
+            if with_feature_selection:
+                if feature_selection_method == 'FDA':
+                    transformed_data = FDA().fit_transform(
+                                                            self.transformer.transform(self.cv.transform(data)), label
+                                                        )
+                else:
+                    transformed_data = SelectPercentile(mutual_info_classif, 20).fit_transform(
+                                                            self.transformer.transform(self.cv.transform(data)), label
+                                                        )
+            else:
+                transformed_data = self.transformer.transform(self.cv.transform(data))
+        return transformed_data
+
+
+class FDA(BaseEstimator, TransformerMixin):
+
+    def __init__(self, alpha=1e-4):
+        '''Fisher discriminant analysis
+        Arguments:
+        ----------
+        alpha : float
+            Regularization parameter
+        '''
+
+        self.alpha = alpha
+
+    def fit(self, X, Y):
+        '''Fit the LDA model
+        Parameters
+        ----------
+        X : array-like, shape [n_samples, n_features]
+            Training data
+        Y : array-like, shape [n_samples]
+            Training labels
+        Returns
+        -------
+        self : object
+        '''
+
+        n, d_orig = X.shape
+        classes = np.unique(Y)
+
+        assert (len(Y) == n)
+
+        if isinstance(X, scipy.sparse.csr.csr_matrix):
+            mean_global = X.mean(axis=0)
+        else:
+            mean_global = np.mean(X, axis=0, keepdims=True)
+        scatter_within = self.alpha * np.eye(d_orig)
+        scatter_between = np.zeros_like(scatter_within)
+
+        for c in classes:
+            n_c = np.sum(Y == c)
+            if n_c < 2:
+                continue
+            if isinstance(X, scipy.sparse.csr.csr_matrix):
+                mu_diff = X[Y == c].mean(axis=0) - mean_global
+            else:
+                mu_diff = np.mean(X[Y == c], axis=0, keepdims=True) - mean_global
+            scatter_between = scatter_between + n_c * np.dot(mu_diff.T, mu_diff)
+            if isinstance(X, scipy.sparse.csr.csr_matrix):
+                scatter_within = scatter_within + n_c * np.cov(X[Y == c].todense(), rowvar=0)
+            else:
+                scatter_within = scatter_within + n_c * np.cov(X[Y == c], rowvar=0)
+
+        e_vals, e_vecs = scipy.linalg.eig(scatter_between, scatter_within)
+
+        self.e_vals_ = e_vals
+        self.e_vecs_ = e_vecs
+
+        self.components_ = e_vecs.T
+
+        return self
+
+    def transform(self, X):
+        '''Transform data by FDA
+        Parameters
+        ----------
+        X : array-like, shape [n_samples, n_features]
+            Data to be transformed
+        Returns
+        -------
+        X_new : array, shape (n_samples, n_atoms)
+        '''
+
+        return X.dot(self.components_.T)
+
+    def fit_transform(self, X, Y):
+        self.fit(X, Y)
+        return self.transform(X)
+
+
+class XgboostClassifyProcess(BaseDataProcess):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+        self.xcdl = XgboostClassifyDataLoader(config_path)
+
+    def class_weight(self, y_train):
+        weight = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
+        classes_weight = dict(enumerate(weight))
+        return classes_weight
+
+    def runner_process(self, signature):
+        df = self.xcdl.read_file()
+        all_label = list(set(df['label']))
+        self.label_mapping = {v: k for k, v in dict(enumerate(all_label)).items()}
+        labels = df['label'].map(self.label_mapping)
+        processed_data = df['content'].map(lambda x: ' '.join(jieba.lcut(x)))
+        dp = DataProcessor(processed_data,
+                           transformer=self.embedding_config['transformer'],
+                           transformer_norm=self.embedding_config['transformer_norm'])
+        dp.reset()
+        vocab = Vocabulary(signature=signature, name='vocab-%s' % self.embedding_config['name'], min_word_len=2)
+        dp.update_vocab(vocab, processed_data)
+        print('%s, after updating, %s' % (self.embedding_config['name'], vocab.get_size()))
+        transformed_data = dp.transform(vocab, processed_data, labels)
+        vocab_save_to = self.embedding_config['embedding_path']
+        print(vocab.to_dict())
+        vocab.save(vocab_save_to)
+        merged_data = np.append(transformed_data.toarray(), labels.values.reshape((-1, 1)), axis=1)
+        print(merged_data.shape)
+        train_set, test_set = self.split_dataset(merged_data, self.process_config['use_dev'])
+        return train_set, test_set
+# import time
+# signature = int(time.time())
+# XgboostClassifyProcess().runner_process(signature)
\ No newline at end of file
--- a/model/classify/views/xgboost_classify/data/XgoostClassifyDataLoader.py
+++ b/model/classify/views/xgboost_classify/data/XgoostClassifyDataLoader.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 16:23
+# @Author  : 程婷婷
+# @FileName: XgoostClassifyDataLoader.py
+# @Software: PyCharm
+from model.base.views.data.BaseDataLoader import BaseDataLoader
+
+
+class XgboostClassifyDataLoader(BaseDataLoader):
+    def __init__(self, config_path):
+        super().__init__(config_path)
\ No newline at end of file
--- a/model/classify/views/xgboost_classify/data/__init__.py
+++ b/model/classify/views/xgboost_classify/data/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 16:23
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/clustering/__init__.py
+++ b/model/clustering/__init__.py
--- a/model/clustering/admin.py
+++ b/model/clustering/admin.py
+from django.contrib import admin
+
+# Register your models here.
--- a/model/clustering/apps.py
+++ b/model/clustering/apps.py
+from django.apps import AppConfig
+
+
+class ClusteringConfig(AppConfig):
+    name = 'clustering'
--- a/model/clustering/migrations/__init__.py
+++ b/model/clustering/migrations/__init__.py
--- a/model/clustering/models.py
+++ b/model/clustering/models.py
+from django.db import models
+
+# Create your models here.
--- a/model/clustering/tests.py
+++ b/model/clustering/tests.py
+from django.test import TestCase
+
+# Create your tests here.
--- a/model/clustering/urls.py
+++ b/model/clustering/urls.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/12 18:05
+# @Author  : 程婷婷
+# @FileName: urls.py
+# @Software: PyCharm
--- a/model/clustering/views/KMeans/KMeansConfig.py
+++ b/model/clustering/views/KMeans/KMeansConfig.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:06
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyConfig.py
+# @Software: PyCharm
+from model.base.views.config import BaseConfig
+
+class KMeansConfig(BaseConfig):
+    def __init__(self):
+        super().__init__()
+
+# print(KmeansConfig()._parsed_file)
\ No newline at end of file
--- a/model/clustering/views/KMeans/KmeansEvaluator.py
+++ b/model/clustering/views/KMeans/KmeansEvaluator.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/14 14:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyEvaluator.py
+# @Software: PyCharm
+from model.base.views.evaluator.BaseEvaluator import BaseEvaluator
+from sklearn import metrics
+
+
+class KmeansEvaluator(BaseEvaluator):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+
+    def compute_silhouette(self, X, labels):
+        score = metrics.silhouette_score(X, labels, metric='euclidean')
+        return score
\ No newline at end of file
--- a/model/clustering/views/KMeans/KmeansModel.py
+++ b/model/clustering/views/KMeans/KmeansModel.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:18
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyModel.py
+# @Software: PyCharm
+from model.base.views.model.BaseModel import BaseModel
+import numpy as np
+from sklearn.cluster import KMeans
+from sklearn import metrics
+
+
+class KmeansModel(BaseModel):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+
+
+    def chose_k(self, data):
+        silhouette_int = -1  # 初始化的平均轮廓系数阀值
+        for n_clusters in range(3, 20):
+            kmeans = KMeans(n_clusters=n_clusters,
+                            init=self.model_config['init'],
+                            n_init=self.model_config['n_init'],
+                            max_iter=self.model_config['max_iter'])
+            cluster_labels_tmp = kmeans.fit_predict(data)  # 训练聚类模型
+            silhouette_tmp = metrics.silhouette_score(data, cluster_labels_tmp)  # 得到每个K下的平均轮廓系数
+            if silhouette_tmp > silhouette_int:  # 如果平均轮廓系数更高
+                best_k = n_clusters  # 将最好的K存储下来
+                silhouette_int = silhouette_tmp  # 将最好的平均轮廓得分存储下来
+                best_kmeans = kmeans  # 将最好的模型存储下来
+        print('=========已获得最优模型，共分为%d类========='%best_k)
+        return best_kmeans
+
+
+
+    def building_model(self, data):
+        if not self.model_config['n_clusters']:
+            model = self.chose_k(data)
+        else:
+            kmeans = KMeans(n_clusters=self.model_config['n_clusters'],
+                            init=self.model_config['init'],
+                            n_init=self.model_config['n_init'],
+                            max_iter=self.model_config['max_iter'])
+            model = kmeans.fit(data)
+            print('=========共分为%s类=========' % str(self.model_config['n_clusters']))
+        classes = model.labels_
+        print(list(set(classes)))
+        data_cluster = [[] for i in range(max(classes)+1)]
+        result = [[] for j in range(max(classes)+1)]
+        for i in range(np.array(data).shape[0]):
+            for j in range(max(classes)+1):
+                if classes[i] == j:
+                    result[j].append(i)
+                    data_cluster[j].append(data[i])
+        self.save(model)
+        return model, data_cluster, result
--- a/model/clustering/views/KMeans/KmeansRunner.py
+++ b/model/clustering/views/KMeans/KmeansRunner.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:33
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyRunner.py
+# @Software: PyCharm
+import numpy as np
+import pandas as pd
+from model.base.views.runner.BaseRunner import BaseRunner
+from model.clustering.views.KMeans.KmeansModel import KmeansModel
+from model.clustering.views.KMeans.data.KMeansDataLoader import KMeansDataLoader
+from model.clustering.views.KMeans.data.KmeansProcess import KmeansProcess
+from model.clustering.views.KMeans.KmeansEvaluator import KmeansEvaluator
+import logging
+
+format = '%(asctime)s %(levelname)s %(pathname)s %(funcName)s %(message)s'
+logging.basicConfig(format=format, level=logging.INFO)
+
+
+class KmeansRunner(BaseRunner):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+        self.km = KmeansModel(config_path)
+        self.kdl = KMeansDataLoader(config_path)
+        self.kp = KmeansProcess(config_path)
+        self.ke = KmeansEvaluator(config_path)
+
+    def single_train(self, logger):
+        df = self.kdl.read_file()
+        if 'labels' not in df.columns:
+            df['labels'] = ''
+        logger.info('处理后的数据量为 %d 条' %(len(df)))
+        transformed_data = self.kp.runner_process(df['content'], df['labels'])
+        model, data_cluster, result = self.km.building_model(transformed_data)
+        centroids = model.cluster_centers_
+        self.labels_ = model.labels_
+        result_sorted = []
+        similarity = []
+        for j in range(max(self.labels_)+1):
+            distances = [(np.linalg.norm(centroids[j] - data_cluster[j][i]), result[j][i]) for i in
+                         range(len(result[j]))]
+            distances_sorted = sorted(distances, key=lambda x: x[0])
+            result_sorted.append([value[1] for value in distances_sorted])
+            similarity.append([value[0] for value in distances_sorted])
+        score = self.ke.compute_silhouette(X=transformed_data, labels=self.labels_)
+        print('====================轮廓系数为%.4f====================' %score)
+        logger.info('轮廓系数为 %.4f ' %score)
+        return model, result_sorted, similarity, df
+
+    def train(self, logger):
+        model, result_sorted, similarity, df = self.single_train(logger)
+        columns = list(df.columns)
+        columns.append('distance')
+        writer = pd.ExcelWriter(self.runner_config['save_fpath'])
+        for j in range(max(self.labels_)+1):
+            df_out = pd.DataFrame(columns=columns)
+            if len(result_sorted[j]):
+                for i in range(len(result_sorted[j])):
+                    row = list(df.iloc[result_sorted[j][i]])
+                    row.append(float(similarity[j][i]))
+                    df_out.loc[i] = row
+            print('第%s类有%d条数据' %(j, len(result_sorted[j])))
+            logger.info('第%s类有%d条数据' %(j, len(result_sorted[j])))
+            df_out.to_excel(writer, sheet_name='sheet' + str(j + 1), index=False)
+        writer.close()
+        return 'success'
+
+
+#if __name__ == '__main__':
+#    state = KmeansRunner().write_file()
+#    print(state)
--- a/model/clustering/views/KMeans/__init__.py
+++ b/model/clustering/views/KMeans/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/14 10:28
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/clustering/views/KMeans/data/KMeansDataLoader.py
+++ b/model/clustering/views/KMeans/data/KMeansDataLoader.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 17:39
+# @Author  : 程婷婷
+# @FileName: KMeansDataLoader.py
+# @Software: PyCharm
+from model.base.views.data.BaseDataLoader import BaseDataLoader
+
+
+class KMeansDataLoader(BaseDataLoader):
+    def __init__(self, config_path):
+        super().__init__(config_path)
\ No newline at end of file
--- a/model/clustering/views/KMeans/data/KmeansProcess.py
+++ b/model/clustering/views/KMeans/data/KmeansProcess.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyProcess.py
+# @Software: PyCharm
+import numpy as np
+import re
+import time
+from model.base.views.data.BaseDataProcess import BaseDataProcess
+
+class KmeansProcess(BaseDataProcess):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+
+    def remove_char(self, content):
+        # 保留中文、英语字母、数字和标点
+        graph_filter = re.compile(r'[^\u4e00-\u9fa5，。\.,？\?!！；;]')
+        content = graph_filter.sub('', content)
+        return content
+
+    def process(self, data, min_content):
+        processed_data = []
+        i = 0
+        for record in data:
+            record = self.remove_char(record)
+            if len(record) > min_content:
+                methods = self.process_config['tokenizer']
+                if methods == 'PerceptronLexicalAnalyzer':
+                    record = self.pla_tokenizer(record)
+                else:
+                    record = self.jieba_tokenizer(record)
+                processed_data.append(record)
+                i += 1
+            else:
+                i += 1
+                pass
+            if (i+1)%100 == 0 or i+1 == len(data):
+                print(time.strftime('%Y-%m-%d %H:%M:%S'),'第',i+1,'条文本分词完毕')
+        return processed_data
+
+    def runner_process(self, data, labels):
+        # all_label = list(set(labels))
+        # label_mapping = {v: k for k, v in dict(enumerate(all_label)).items()}
+        processed_data = self.process(data, min_content=10)
+        transformed_data1, feature_words = self.bag_of_words(processed_data, labels)
+        processed_data2 = []
+        for i in processed_data:
+            record = i.split(' ')
+            processed_data2.append(record)
+        transformed_data2 = self.word2vec(processed_data2, feature_words=feature_words)
+        transformed_data = np.dot(transformed_data1, transformed_data2)
+        return transformed_data
+
+# import pandas as pd
+# df = pd.read_excel(r'E:\working\model_train\KMeans\data\test.xlsx')
+# kp = KmeansProcess()
+# kp.runner_process()
--- a/model/clustering/views/KMeans/data/__init__.py
+++ b/model/clustering/views/KMeans/data/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 17:38
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/clustering/views/__init__.py
+++ b/model/clustering/views/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/13 11:23
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/clustering/views/views.py
+++ b/model/clustering/views/views.py
+from django.shortcuts import render
+
+# Create your views here.
--- a/platform_zzsn.md
+++ b/platform_zzsn.md
+## /基础服务之短文本相似度计算
+```text
+短文本相似度计算提供了两个短文本之间的语义相似度计算能力，计算方式有5种，分别为cos_sim、lev_sim、cos_sim、min_hash、sim_hash。
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/basic/doc-similarity-single/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzMDE1MzQzNH0.lTaat1GCB1pffWu1pmTJrpPGW8O_KEsy8QvuefLs6Lo | Text | 是 | []
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+text_1 | 内蒙古没有下大雨了 | Text | 是 | 文本内容1
+text_2 | 内蒙古下大雨了 | Text | 是 | 与文本内容1进行对比的文本内容2
+sim_algorithm_name | sim_has | Text | 是 | 计算相似度的方法
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /场景化服务之关键词挖掘
+```text
+关键词挖掘功能
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/scenario/extraction-keywords/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization |  | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+pending_file | 测试.docx | Text | 是 | 上传的docx文档或者xlsx文档的名称
+user_file | user_dict.txt | Text | 是 | 用户上传的自定义词典txt
+path_timestamp | 1629770804649253 | Text | 是 | 文件夹名称
+username | ctt | Text | 是 | 登录的用户名称
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /场景化服务之文件上传
+```text
+文件上传功能，需注意的是path_timestamp。为保证同一页面内上传的文件在同一文件夹内，故第一次上传时path_timestamp为空，接口返回时会自动创建文件夹并返回path_timestamp的值；此后该页面上传时需将path_timestamp的具体值传入。
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/file-upload/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzMDMyMDQ2OX0.Q2VNzrTMU1G8VG3E3PMRcwlkJ5K0RqGCshzIz1htFgM | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+files | [] | File | 是 | 用户上传的文件
+path_timestamp | 1629704627130134 | Text | 是 | 文件夹名称（同页面内第一次上传文件，此参数为空，返回值中会包含此参数的值；第二次以及更多次上传文件时需要带上第一次返回此参数的值）
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /用户注册
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/register-account/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYyOTg5NDE1NH0.wpTJ5W25A502WPKIDDQeC_NNlIV3Of56bTheLjbkABg | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+username | ly | Text | 是 | 用户名
+true_name | ly | Text | 是 | 用户真实姓名
+sex | 女 | Text | 是 | 用户性别
+mobile_number | 15617380221 | Text | 是 | 用户电话号码
+mail | 2698641198@qq.com | Text | 是 | 用户邮箱
+id_card | 410527199811565698 | Text | 是 | 用户身份证号码
+password | 123456 | Text | 是 | 用户设置的登录密码
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /核查用户名是否存在
+```text
+核查数据库中是否存在用户输入的用户名称。
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/verify-username/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYyOTg5NDE1NH0.wpTJ5W25A502WPKIDDQeC_NNlIV3Of56bTheLjbkABg | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+username | ctt | Text | 是 | 用户注册时填入的用户名称
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+#### 成功响应示例
+```javascript
+{
+	"handleMsg": "success",
+	"isHandleSuccess": true,
+	"logs": "此用户名可用！",
+	"resultData": true
+}
+```
+参数名 | 示例值 | 参数类型 | 参数描述
+--- | --- | --- | ---
+handleMsg | success | Text | 
+isHandleSuccess | true | Text | 
+logs | 此用户名可用！ | Text | 
+resultData | true | Text | 
+#### 失败响应示例
+```javascript
+{
+	"handleMsg": "failure",
+	"isHandleSuccess": false,
+	"logs": "该用户名已存在！",
+	"resultData": false
+}
+```
+参数名 | 示例值 | 参数类型 | 参数描述
+--- | --- | --- | ---
+handleMsg | failure | Text | 
+isHandleSuccess | false | Text | 
+logs | 该用户名已存在！ | Text | 
+resultData | false | Text | 
+## /重置密码
+```text
+重置密码功能，根据用户名和用户输入的密码，更新数据库中的密码
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/reset-password/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+username | ctt | Text | 是 | 用户名
+password | 123456 | Text | 是 | 用户新密码
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+#### 成功响应示例
+```javascript
+{
+	"handleMsg": "success",
+	"isHandleSuccess": true,
+	"logs": "重置密码成功！",
+	"resultData": true
+}
+```
+参数名 | 示例值 | 参数类型 | 参数描述
+--- | --- | --- | ---
+handleMsg | success | Text | 
+isHandleSuccess | true | Text | 
+logs | 重置密码成功！ | Text | 
+resultData | true | Text | 
+#### 失败响应示例
+```javascript
+{
+    "handleMsg": "failure",
+    "isHandleSuccess": false,
+    "logs": "重置密码失败！",
+    "resultData": false
+}
+```
+参数名 | 示例值 | 参数类型 | 参数描述
+--- | --- | --- | ---
+handleMsg | failure | Text | 
+isHandleSuccess | false | Text | 
+logs | 重置密码失败！ | Text | 
+resultData | false | Text | 
+## /测试 展示配置文件/base/show-config-file/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/show-config-file/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYyOTg5NjA3Nn0.KGp7HWhb61EP-1w6X0y1t9pIDuKmObWlj5muWNJbvIA | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+model_type | textcnn | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 删除文件夹和记录 /base/delete-file-row-manage/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/delete-file-row/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYyOTg5NjA3Nn0.KGp7HWhb61EP-1w6X0y1t9pIDuKmObWlj5muWNJbvIA | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+path_timestamp | 1626320056405440 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试  显示日志文件/base/show-log-file/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/show-log-file/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYyOTg5NjA3Nn0.KGp7HWhb61EP-1w6X0y1t9pIDuKmObWlj5muWNJbvIA | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+id | 3 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 生成验证码/base/validate-code/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/validate-code/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 验证用户登录/base/login/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/login
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+username | ctt | Text | 否 | 
+password | 123456 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 下载zip /base/download-zip/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/download-zip/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+path_timestamp | 1628935910315627 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 查询manage /base/query-manage/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/query-manager/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYyOTg5NjA3Nn0.KGp7HWhb61EP-1w6X0y1t9pIDuKmObWlj5muWNJbvIA | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+task_name |  | Text | 否 | 
+function_type |  | Text | 否 | 
+model_type |  | Text | 否 | 
+begin_date |  | Text | 否 | 
+page_size | 10 | Text | 否 | 
+end_date |  | Text | 否 | 
+current_page | 1 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 发送邮件重新设置密码/base/forget-password/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/forget-password/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+username | ctt | Text | 否 | 
+mobile_number | 15617380221 | Text | 否 | 
+mail | 2698641198@qq.com | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 服务结果文件下载/base/download-xlsx/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/download-xls/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYyOTcxNTMwN30.sJMIlptQoHhqBeGMdgxdJ7WN0PgbqhYRjPde39scj98 | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+pending_file | /home/zzsn/ctt/platform_temporary/test0810.xlsx | Text | 否 | 
+user_file | /home/zzsn/ctt/platform_temporary/user_dict.txt | Text | 否 | 
+path_timestamp | 1629770804649253 | Text | 否 | 
+username | ctt | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 查询service_manage /base/query-service-manage/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/query-service-manage/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzMDY2MzQ5Mn0.bEHfjDAKo5qoWa1dFSIqtz0fhFWmMRWdoqYPYCZ8Nd0 | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+name | 关键词挖掘 | Text | 否 | 
+begin_date | 2021-09-01 | Text | 否 | 
+end_date | 2021-09-12 | Text | 否 | 
+state | 已完成 | Text | 否 | 
+page_size | 10 | Text | 否 | 
+current_page | 1 | Text | 否 | 
+username | ctt | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 公司名称提取/scenario/extraction-company/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/scenario/extraction-company/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYyOTg2NjI4NH0.iOxrDWPASgMoIholJybpZ7wQs92EyJ3c952HdKIlvcc | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+path_timestamp | 1629855525880127 | Text | 否 | 
+pending_file | 负面信息---350.xlsx | Text | 否 | 
+user_file | 监控企业信息1.xls | Text | 否 | 
+username | ctt | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 股票招聘识别/scenario/stock-recruitment-filter/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/scenario/stock-recruitment-filter/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYyOTg2NjI4NH0.iOxrDWPASgMoIholJybpZ7wQs92EyJ3c952HdKIlvcc | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+path_timestamp | 110 | Text | 否 | 
+pending_file | test_file.xlsx | Text | 否 | 
+user_file | 监控企业信息1.xls | Text | 否 | 
+username | ctt | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 删除service_manage /base/delete-file-row-service/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/delete-file-row-service/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYyOTg5NDE1NH0.wpTJ5W25A502WPKIDDQeC_NNlIV3Of56bTheLjbkABg | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+path_timestamp | 1629855525880127 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 关联词汇推荐/basic/associated-word-single/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/basic/associated-word-single/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzNjY5MDgxOX0.BkyvWgsS5iVK8rLAde01w8QJh1UbGD4f39FgtocSyc8 | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+text | 混改,数字化转型 | Text | 否 | 多个词以英文逗号隔开,
+word_num | 5 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试  下载样例文件/base/show-service-file/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/show-service-file/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYyOTk2MTE2OH0.b8EXACgZZbqXCdyTehOLtRbfiyO1RZP_GlVbau_Gm9A | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+service_type | scenario_service | Text | 否 | 
+service_name | extraction_company_name | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 相似度-去重 /scenario/doc-similarity-duplicate-single/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/scenario/doc-similarity-duplicate-single/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzMDE1MjQzN30.PFqUOsYoRqQpLvtE5_xkbYgbpD72MZvRGh24L4xGONc | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+text_1 | 徐东华（机械工业经济管理研究院院长、书记） | Text | 否 | 
+text_2 | 天空没有下雨 | Text | 否 | 
+sim | 0.6 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 自动生成报告/scenario/report-generator-single/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/scenario/report-generator-single/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYyOTk4MTgzNn0.V_int4T1l-txK0-q5NGlV_-NhdDzjYvJI72hzmrc5gs | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+title | 千亿民营房企泰禾集团巨额债务违约 | Text | 否 | 
+sid | 4544 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 专家发言观点/scenario/extraction-speech-single/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/scenario/extraction-speech-single/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzMDMyMDQ2OX0.Q2VNzrTMU1G8VG3E3PMRcwlkJ5K0RqGCshzIz1htFgM | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+title | 华为自主研发 | Text | 否 | 
+content | 随着华为透露自主研发的操作系统“鸿蒙OS”后，网友对国产操作系统的热情与期待一天比一天旺盛。除了鸿蒙OS频频上头条以外，中兴公司研发的新支点OS也已经出货超两亿套了。不过目前消费者市场上，仍旧很少见到国产操作系统。
+在桌面有Windows、移动有iOS、安卓、服务器有Linux系统的情况下，国产操作系统应该如何突围呢？对于这个问题，全球移动通信协会高级顾问、中国移动原董事长王建宙给出了他的看法。
+中国移动原董事长王建宙在近日举办的中国“科”公司峰会上表示，5G时代要重视移动操作系统的创新。“目前的5G手机仅仅是加快了上网速度，但这远远不够，应该努力去开发新的功能。”
+王建宙认为，现有的5G手机在功能上没有特别的创新，因为本身操作系统还是用的4G操作系统，没有什么变化。2G和3G不一样，2G的操作系统是塞班操作系统，是Windows操作系统，3G时完全不一样，是iOS操作系统，是安卓操作系统。
+“现有的移动设备操作系统都是在桌面机操作系统基础上修改和延伸的，不断修改、不断延伸、不断增加功能，但操作系统的功能是管理硬件和软件资源”，王建宙表示“5G带来的是万物互联，这就使得移动设备所面临的功能和环境都发生了非常大的变化。这种变化不仅跟原来的桌面机完全不一样，而且跟原有的手机也不一样，所以我们迫切需要一个更加实时、更加适合于万物互联的移动设备的操作系统。”
+王建宙认为，“新的5G操作系统在功能上一定要超过现有操作系统。如果只是跟现有操作系统差不多，也很好，但很难形成一个新的生态系统。所以新操作系统一定要高起点开发，要有新的操作系统，特别是在物联网和人工智能方面。 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 一带一路信息过滤/scenario/project-info-filter-single/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/scenario/project-info-filter-single/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzMDE0MDQ2OX0.XH-1-PmTI4ScKaTTPowXYvVG0WSz8xcQ8qrrx3tNmAQ | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+title | 华为自主研发 | Text | 否 | 
+content | 随着华为透露自主研发的操作系统“鸿蒙OS”后，网友对国产操作系统的热情与期待一天比一天旺盛。除了鸿蒙OS频频上头条以外，中兴公司研发的新支点OS也已经出货超两亿套了。不过目前消费者市场上，仍旧很少见到国产操作系统。
+在桌面有Windows、移动有iOS、安卓、服务器有Linux系统的情况下，国产操作系统应该如何突围呢？对于这个问题，全球移动通信协会高级顾问、中国移动原董事长王建宙给出了他的看法。
+中国移动原董事长王建宙在近日举办的中国“科”公司峰会上表示，5G时代要重视移动操作系统的创新。“目前的5G手机仅仅是加快了上网速度，但这远远不够，应该努力去开发新的功能。”
+王建宙认为，现有的5G手机在功能上没有特别的创新，因为本身操作系统还是用的4G操作系统，没有什么变化。2G和3G不一样，2G的操作系统是塞班操作系统，是Windows操作系统，3G时完全不一样，是iOS操作系统，是安卓操作系统。
+“现有的移动设备操作系统都是在桌面机操作系统基础上修改和延伸的，不断修改、不断延伸、不断增加功能，但操作系统的功能是管理硬件和软件资源”，王建宙表示“5G带来的是万物互联，这就使得移动设备所面临的功能和环境都发生了非常大的变化。这种变化不仅跟原来的桌面机完全不一样，而且跟原有的手机也不一样，所以我们迫切需要一个更加实时、更加适合于万物互联的移动设备的操作系统。”
+王建宙认为，“新的5G操作系统在功能上一定要超过现有操作系统。如果只是跟现有操作系统差不多，也很好，但很难形成一个新的生态系统。所以新操作系统一定要高起点开发，要有新的操作系统，特别是在物联网和人工智能方面。 | Text | 否 | 
+content | 5月10日，山东电建三公司海外市场开发再传捷报！公司与ACWAPower签订沙特朱拜勒3A独立海水淡化项目EPC合同，海水淡化业务板块又添新业绩。沙特朱拜勒3A独立海水淡化项目，位于朱拜勒市达曼法赫德国王国际机场以北约65公里处，项目采用海水反渗透技术，日产水量达60万吨。项目建设成后，对于缓解沙特东部省沿海岸日益增长的用水需求具有重要意义。该项目是公司与ACWAPower签约的第三个海水淡化项目，充分体现了ACWAPower对公司综合实力，以及公司在中东非洲区域项目执行过程中所展现出的卓越管理水平和勇于担当的企业精神的高度认可。同时，该项目也是公司在沙特EPC总承包的第10个大型工程项目 | Text | 否 | 
+title | 山东电建三公司再传捷报 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 一带一路项目要素抽取/scenario/project-info-extraction-single/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/scenario/project-info-extraction-single/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzMDMxODQyNn0.w9NDZgWbk5oWfYHmdHWfmKrlAcWErBBbnzgx39PuCrs | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+title | 山东电建三公司再传捷报 | Text | 否 | 
+content | 5月10日，山东电建三公司海外市场开发再传捷报！公司与ACWAPower签订沙特朱拜勒3A独立海水淡化项目EPC合同，海水淡化业务板块又添新业绩。沙特朱拜勒3A独立海水淡化项目，位于朱拜勒市达曼法赫德国王国际机场以北约65公里处，项目采用海水反渗透技术，日产水量达60万吨。项目建设成后，对于缓解沙特东部省沿海岸日益增长的用水需求具有重要意义。该项目是公司与ACWAPower签约的第三个海水淡化项目，充分体现了ACWAPower对公司综合实力，以及公司在中东非洲区域项目执行过程中所展现出的卓越管理水平和勇于担当的企业精神的高度认可。同时，该项目也是公司在沙特EPC总承包的第10个大型工程项目 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 讲话提取/scenario/extraction-speech
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/scenario/extraction-speech/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzMDE0MDQ2OX0.XH-1-PmTI4ScKaTTPowXYvVG0WSz8xcQ8qrrx3tNmAQ | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+path_timestamp | 1630135538675714 | Text | 否 | 
+pending_file | 发言内容_案例.xlsx | Text | 否 | 
+username | ctt | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 股票招聘/scenario/stock_recruitment_filter_single
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/scenario/stock_recruitment_filter_single
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzMDE0MDQ2OX0.XH-1-PmTI4ScKaTTPowXYvVG0WSz8xcQ8qrrx3tNmAQ | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+title | 山东电建三公司再传捷报 | Text | 否 | 
+content | 5月10日，山东电建三公司海外市场开发再传捷报！公司与ACWAPower签订沙特朱拜勒3A独立海水淡化项目EPC合同，海水淡化业务板块又添新业绩。沙特朱拜勒3A独立海水淡化项目，位于朱拜勒市达曼法赫德国王国际机场以北约65公里处，项目采用海水反渗透技术，日产水量达60万吨。项目建设成后，对于缓解沙特东部省沿海岸日益增长的用水需求具有重要意义。该项目是公司与ACWAPower签约的第三个海水淡化项目，充分体现了ACWAPower对公司综合实力，以及公司在中东非洲区域项目执行过程中所展现出的卓越管理水平和勇于担当的企业精神的高度认可。同时，该项目也是公司在沙特EPC总承包的第10个大型工程项目 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 一带一路要素抽取/scenario/project-info-extraction/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/scenario/project-info-extraction/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzMDMyMDQ2OX0.Q2VNzrTMU1G8VG3E3PMRcwlkJ5K0RqGCshzIz1htFgM | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+title | 华为自主研发 | Text | 否 | 
+content | 随着华为透露自主研发的操作系统“鸿蒙OS”后，网友对国产操作系统的热情与期待一天比一天旺盛。除了鸿蒙OS频频上头条以外，中兴公司研发的新支点OS也已经出货超两亿套了。不过目前消费者市场上，仍旧很少见到国产操作系统。
+在桌面有Windows、移动有iOS、安卓、服务器有Linux系统的情况下，国产操作系统应该如何突围呢？对于这个问题，全球移动通信协会高级顾问、中国移动原董事长王建宙给出了他的看法。
+中国移动原董事长王建宙在近日举办的中国“科”公司峰会上表示，5G时代要重视移动操作系统的创新。“目前的5G手机仅仅是加快了上网速度，但这远远不够，应该努力去开发新的功能。”
+王建宙认为，现有的5G手机在功能上没有特别的创新，因为本身操作系统还是用的4G操作系统，没有什么变化。2G和3G不一样，2G的操作系统是塞班操作系统，是Windows操作系统，3G时完全不一样，是iOS操作系统，是安卓操作系统。
+“现有的移动设备操作系统都是在桌面机操作系统基础上修改和延伸的，不断修改、不断延伸、不断增加功能，但操作系统的功能是管理硬件和软件资源”，王建宙表示“5G带来的是万物互联，这就使得移动设备所面临的功能和环境都发生了非常大的变化。这种变化不仅跟原来的桌面机完全不一样，而且跟原有的手机也不一样，所以我们迫切需要一个更加实时、更加适合于万物互联的移动设备的操作系统。”
+王建宙认为，“新的5G操作系统在功能上一定要超过现有操作系统。如果只是跟现有操作系统差不多，也很好，但很难形成一个新的生态系统。所以新操作系统一定要高起点开发，要有新的操作系统，特别是在物联网和人工智能方面。 | Text | 否 | 
+content | 5月10日，山东电建三公司海外市场开发再传捷报！公司与ACWAPower签订沙特朱拜勒3A独立海水淡化项目EPC合同，海水淡化业务板块又添新业绩。沙特朱拜勒3A独立海水淡化项目，位于朱拜勒市达曼法赫德国王国际机场以北约65公里处，项目采用海水反渗透技术，日产水量达60万吨。项目建设成后，对于缓解沙特东部省沿海岸日益增长的用水需求具有重要意义。该项目是公司与ACWAPower签约的第三个海水淡化项目，充分体现了ACWAPower对公司综合实力，以及公司在中东非洲区域项目执行过程中所展现出的卓越管理水平和勇于担当的企业精神的高度认可。同时，该项目也是公司在沙特EPC总承包的第10个大型工程项目 | Text | 否 | 
+title | 山东电建三公司再传捷报 | Text | 否 | 
+path_timestamp | 1630309688929082 | Text | 否 | 
+pending_file | 一带一路项目要素抽取_案例.xlsx | Text | 否 | 
+username | ctt | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 一带一路信息过滤/scenario/project-info-filter/
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/scenario/project-info-filter/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzMDE1MzQzNH0.lTaat1GCB1pffWu1pmTJrpPGW8O_KEsy8QvuefLs6Lo | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+title | 华为自主研发 | Text | 否 | 
+content | 随着华为透露自主研发的操作系统“鸿蒙OS”后，网友对国产操作系统的热情与期待一天比一天旺盛。除了鸿蒙OS频频上头条以外，中兴公司研发的新支点OS也已经出货超两亿套了。不过目前消费者市场上，仍旧很少见到国产操作系统。
+在桌面有Windows、移动有iOS、安卓、服务器有Linux系统的情况下，国产操作系统应该如何突围呢？对于这个问题，全球移动通信协会高级顾问、中国移动原董事长王建宙给出了他的看法。
+中国移动原董事长王建宙在近日举办的中国“科”公司峰会上表示，5G时代要重视移动操作系统的创新。“目前的5G手机仅仅是加快了上网速度，但这远远不够，应该努力去开发新的功能。”
+王建宙认为，现有的5G手机在功能上没有特别的创新，因为本身操作系统还是用的4G操作系统，没有什么变化。2G和3G不一样，2G的操作系统是塞班操作系统，是Windows操作系统，3G时完全不一样，是iOS操作系统，是安卓操作系统。
+“现有的移动设备操作系统都是在桌面机操作系统基础上修改和延伸的，不断修改、不断延伸、不断增加功能，但操作系统的功能是管理硬件和软件资源”，王建宙表示“5G带来的是万物互联，这就使得移动设备所面临的功能和环境都发生了非常大的变化。这种变化不仅跟原来的桌面机完全不一样，而且跟原有的手机也不一样，所以我们迫切需要一个更加实时、更加适合于万物互联的移动设备的操作系统。”
+王建宙认为，“新的5G操作系统在功能上一定要超过现有操作系统。如果只是跟现有操作系统差不多，也很好，但很难形成一个新的生态系统。所以新操作系统一定要高起点开发，要有新的操作系统，特别是在物联网和人工智能方面。 | Text | 否 | 
+content | 5月10日，山东电建三公司海外市场开发再传捷报！公司与ACWAPower签订沙特朱拜勒3A独立海水淡化项目EPC合同，海水淡化业务板块又添新业绩。沙特朱拜勒3A独立海水淡化项目，位于朱拜勒市达曼法赫德国王国际机场以北约65公里处，项目采用海水反渗透技术，日产水量达60万吨。项目建设成后，对于缓解沙特东部省沿海岸日益增长的用水需求具有重要意义。该项目是公司与ACWAPower签约的第三个海水淡化项目，充分体现了ACWAPower对公司综合实力，以及公司在中东非洲区域项目执行过程中所展现出的卓越管理水平和勇于担当的企业精神的高度认可。同时，该项目也是公司在沙特EPC总承包的第10个大型工程项目 | Text | 否 | 
+title | 山东电建三公司再传捷报 | Text | 否 | 
+path_timestamp | 12011 | Text | 否 | 
+pending_file | 一带一路项目资讯筛选_样例.xlsx | Text | 否 | 
+username | ctt | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 报告生成-查询专题sid和专题名称
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/base/query-subject/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzMDM4MzQ1OX0.wJS8vxk7yYRZuRyGWX3VfOBAvQkVYMHZjcvkAiUxM8A | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+current_page | 2 | Text | 否 | 
+page_size | 10 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 分词
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/basic/word_cut/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzMzg2Njk0M30.oR78Gt5va302elpcZqUB2srPwm9wd1UmsSTszFO0p7o | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+text | 今天天气真好 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 词性
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/basic/word_pos/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzMzg2Njk0M30.oR78Gt5va302elpcZqUB2srPwm9wd1UmsSTszFO0p7o | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+text | 今天天气真好 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 新词发现
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/basic/new_word_find/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzMzg2Njk0M30.oR78Gt5va302elpcZqUB2srPwm9wd1UmsSTszFO0p7o | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+text | 白月光，形容的是一种可望不可即的人或者事物，虽然一直在心上，却从不在身边。 最早出自张爱玲小说《红玫瑰与白玫瑰》，后来变成网络流行语并被大家所熟知是源于一部热播电视剧《延禧攻略》，剧中秦岚饰演的富察皇后被剧迷们形容为乾隆皇帝心中的“白月光”， 生当复来归，死亦长相思。  | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 语义角色标注
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/basic/show_srl/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzNzA2OTM0Nn0.rcyJ5aKlUIYoVnQA2YMhlOHjlJb9By1RZtMf5SLEKzE | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+text | 他叫汤姆去拿外衣。 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 依存分析
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/basic/show_dep/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzMzg2Njk0M30.oR78Gt5va302elpcZqUB2srPwm9wd1UmsSTszFO0p7o | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+text | 白月光，形容的是一种可望不可即的人或者事物，虽然一直在心上，却从不在身边。 最早出自张爱玲小说《红玫瑰与白玫瑰》，后来变成网络流行语并被大家所熟知是源于一部热播电视剧《延禧攻略》，剧中秦岚饰演的富察皇后被剧迷们形容为乾隆皇帝心中的“白月光”， 生当复来归，死亦长相思。  | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /positive_negative_judgment
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/scenario/positive_negative_judgment/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzNjM3Mjk5MH0.Zd0au3XyIFJKgv1cAMhd9E5Vede-OyDgfcjiaYzoayg | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+username | ctt | Text | 否 | 
+industry_code | 36 | Text | 否 | 
+start_year | 2016 | Text | 否 | 
+stop_year | 2020 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 关键短语挖掘
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/basic/create_keywords/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzNzA3NDAyMX0.Rofx64XlQWXwYonhrle0hSAIlrACXT-oVsn5vB4HGrs | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+topK | 5 | Text | 否 | 选取多少个关键短语返回，默认为 5
+with_weight | True | Text | 否 | 指定返回关键短语是否需要短语权重
+text | 法国媒体最新披露，巴黎圣母院火灾当晚，第一次消防警报响起时，负责查验的保安找错了位置，因而可能贻误了救火的最佳时机。 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
+## /测试 中文命名实体识别
+```text
+暂无描述
+```
+#### 接口状态
+> 开发中
+
+#### 接口URL
+> http://192.168.1.149:8020/basic/ner_single/
+
+#### 请求方式
+> POST
+
+#### Content-Type
+> form-data
+
+#### 请求Header参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+Authorization | eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwibmFtZSI6ImN0dCIsImV4cCI6MTYzNzEyMzE2OX0.uUmDv9XOfWcgLXg9IkoE5X6ISQK67gH4lumJBGR2X7E | Text | 否 | 
+#### 请求Body参数
+参数名 | 示例值 | 参数类型 | 是否必填 | 参数描述
+--- | --- | --- | --- | ---
+text | 中国进出口银行与中国银行加强合作 | Text | 否 | 
+#### 预执行脚本
+```javascript
+暂无预执行脚本
+```
+#### 后执行脚本
+```javascript
+暂无后执行脚本
+```
\ No newline at end of file
--- a/platform_zzsn/__init__.py
+++ b/platform_zzsn/__init__.py
+import pymysql
+pymysql.install_as_MySQLdb()
--- a/platform_zzsn/settings.py
+++ b/platform_zzsn/settings.py
+"""
+Django settings for platform_zzsn project.
+
+Generated by 'django-admin startproject' using Django 2.2.
+
+For more information on this file, see
+https://docs.djangoproject.com/en/2.2/topics/settings/
+
+For the full list of settings and their values, see
+https://docs.djangoproject.com/en/2.2/ref/settings/
+"""
+
+import os
+import datetime
+# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))+'/'
+
+
+# Quick-start development settings - unsuitable for production
+# See https://docs.djangoproject.com/en/2.2/howto/deployment/checklist/
+
+# SECURITY WARNING: keep the secret key used in production secret!
+SECRET_KEY = '2kzp8f76mp1^-)ko+^ji%+m*@g#i)005v0^vq5*zy0g7bcbo0*'
+
+# SECURITY WARNING: don't run with debug turned on in production!
+DEBUG = True
+APPEND_SLASH = False
+ALLOWED_HOSTS = ['*']
+
+
+# Application definition
+
+INSTALLED_APPS = [
+    'django.contrib.admin',
+    'django.contrib.auth',
+    'django.contrib.contenttypes',
+    'django.contrib.sessions',
+    'django.contrib.messages',
+    'django.contrib.staticfiles',
+    'model.base',
+    'basic_service',
+    'model.classify',
+    'model.clustering',
+    'scenario_service',
+]
+
+MIDDLEWARE = [
+    'django.middleware.security.SecurityMiddleware',
+    'django.contrib.sessions.middleware.SessionMiddleware',
+    'django.middleware.common.CommonMiddleware',
+    # 'django.middleware.csrf.CsrfViewMiddleware',
+    'django.contrib.auth.middleware.AuthenticationMiddleware',
+    'django.contrib.messages.middleware.MessageMiddleware',
+    'django.middleware.clickjacking.XFrameOptionsMiddleware',
+]
+
+ROOT_URLCONF = 'platform_zzsn.urls'
+
+TEMPLATES = [
+    {
+        'BACKEND': 'django.template.backends.django.DjangoTemplates',
+        'DIRS': [],
+        'APP_DIRS': True,
+        'OPTIONS': {
+            'context_processors': [
+                'django.template.context_processors.debug',
+                'django.template.context_processors.request',
+                'django.contrib.auth.context_processors.auth',
+                'django.contrib.messages.context_processors.messages',
+            ],
+        },
+    },
+]
+
+WSGI_APPLICATION = 'platform_zzsn.wsgi.application'
+
+
+# Database
+# https://docs.djangoproject.com/en/2.2/ref/settings/#databases
+
+DATABASES = {
+    'default': {
+        'ENGINE': 'django.db.backends.mysql',   # 数据库引擎
+        'NAME': 'platform_ctt',  # 数据库名，先前创建的
+        'USER': 'root',     # 用户名，可以自己创建用户
+        'PASSWORD': 'ydyl123456',  # 密码
+        'HOST': '114.115.151.73',  # mysql服务所在的主机ip
+        'PORT': '3306',         # mysql服务端口
+    }
+}
+
+
+# Password validation
+# https://docs.djangoproject.com/en/2.2/ref/settings/#auth-password-validators
+
+AUTH_PASSWORD_VALIDATORS = [
+    {
+        'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
+    },
+]
+
+
+# Internationalization
+# https://docs.djangoproject.com/en/2.2/topics/i18n/
+
+LANGUAGE_CODE = 'zh-hans'
+
+TIME_ZONE = 'Asia/Shanghai'
+
+USE_I18N = True
+
+USE_L10N = True
+
+USE_TZ = False
+
+# Static files (CSS, JavaScript, Images)
+# https://docs.djangoproject.com/en/2.2/howto/static-files/
+
+STATIC_URL = '/static/'
+MEDIA_ROOT = os.path.join(BASE_DIR, 'media')
\ No newline at end of file
--- a/platform_zzsn/urls.py
+++ b/platform_zzsn/urls.py
+"""platform_zzsn URL Configuration
+
+The `urlpatterns` list routes URLs to views. For more information please see:
+    https://docs.djangoproject.com/en/2.2/topics/http/urls/
+Examples:
+Function views
+    1. Add an import:  from my_app import views
+    2. Add a URL to urlpatterns:  path('', views.home, name='home')
+Class-based views
+    1. Add an import:  from other_app.views import Home
+    2. Add a URL to urlpatterns:  path('', Home.as_view(), name='home')
+Including another URLconf
+    1. Import the include() function: from django.urls import include, path
+    2. Add a URL to urlpatterns:  path('blog/', include('blog.urls'))
+"""
+from django.urls import include, path
+import basic_service.urls
+import scenario_service.urls
+import model.base.urls
+
+
+urlpatterns = [
+    # path('admin/', admin.site.urls),
+    path('basic/', include(basic_service.urls)),
+    # path('classify/', include(classify.urls)),
+    # path('clustering/', include(clustering.urls)),
+    path('base/', include(model.base.urls)),
+    path('scenario/', include(scenario_service.urls)),
+]
--- a/platform_zzsn/wsgi.py
+++ b/platform_zzsn/wsgi.py
+"""
+WSGI config for platform_zzsn project.
+
+It exposes the WSGI callable as a module-level variable named ``application``.
+
+For more information on this file, see
+https://docs.djangoproject.com/en/2.2/howto/deployment/wsgi/
+"""
+
+import os
+
+from django.core.wsgi import get_wsgi_application
+
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'platform_zzsn.settings')
+
+application = get_wsgi_application()
--- a/scenario_service/__init__.py
+++ b/scenario_service/__init__.py
--- a/scenario_service/admin.py
+++ b/scenario_service/admin.py
+from django.contrib import admin
+
+# Register your models here.
--- a/scenario_service/apps.py
+++ b/scenario_service/apps.py
+from django.apps import AppConfig
+
+
+class ScenarioServiceConfig(AppConfig):
+    name = 'scenario_service'
--- a/scenario_service/migrations/__init__.py
+++ b/scenario_service/migrations/__init__.py
--- a/scenario_service/models.py
+++ b/scenario_service/models.py
+from django.db import models
+
+# Create your models here.
--- a/scenario_service/tests.py
+++ b/scenario_service/tests.py
+from django.test import TestCase
+import requests
+import json
+from requests.adapters import HTTPAdapter
+
+def post_br_single_file(url, file_name):
+    payload = {'file_path': file_name}
+    # headers = {
+    #     'Content-Type': 'application/json'
+    # }
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36',
+        'Content-Type': 'application/json',
+        # 'Connection': 'close'
+    }
+
+    requests.adapters.DEFAULT_RETRIES = 3
+    response = requests.request('POST', url, headers=headers, data=json.dumps(payload),timeout=200)
+    data = json.loads(response.text)
+    return data
+import time
+start_time = time.time()
+result = post_br_single_file('http://192.168.1.149:7000/br/doc_event/project_info/extraction/pred_file', '/home/zzsn/ctt/platform_zzsn/media/1201/0830.xlsx')
+print(result)
+end_time = time.time()
+print(end_time-start_time)
\ No newline at end of file
--- a/scenario_service/urls.py
+++ b/scenario_service/urls.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/12 18:05
+# @Author  : 程婷婷
+# @FileName: urls.py
+# @Software: PyCharm
+from django.urls import path
+from django.conf.urls import url
+from scenario_service.views import views
+
+
+urlpatterns = [
+    url(r'^project-info-filter-single', views.project_info_filter_single, name='project_info_filter_single'),
+    url(r'^project-info-filter', views.project_info_filter, name='project_info_filter'),
+    url(r'^project-info-extraction-single', views.project_info_extraction_single, name='project_info_extraction_single'),
+    url(r'^project-info-extraction', views.project_info_extraction, name='project_info_extraction'),
+    url(r'^doc-similarity-duplicate-single', views.doc_similarity_duplicate_single, name='doc_similarity_duplicate_single'),
+    url(r'^report-generator-single', views.report_generator_single, name='report_generator_single'),
+    url(r'^extraction-speech-single', views.extraction_speech_single, name='extraction_speech_single'),
+    url(r'^extraction-speech', views.extraction_speech, name='extraction_speech'),
+    url(r'^extraction-keywords', views.extraction_keywords, name='extraction_keywords'),
+    # url(r'^download_xls', views.download_xls, name='download_xls'),
+    url(r'^extraction-company', views.extraction_company, name='extraction_company'),
+    url(r'^stock-recruitment-filter', views.stock_recruitment_filter, name='stock_recruitment_filter'),
+    url(r'^stock_recruitment_filter_single', views.stock_recruitment_filter_single, name='stock_recruitment_filter_single'),
+    url(r'^positive_negative_judgment', views.positive_negative_judgment, name='positive_negative_judgment')
+]
\ No newline at end of file
--- a/scenario_service/views/__init__.py
+++ b/scenario_service/views/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/13 10:08
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/scenario_service/views/cv_tfidf.py
+++ b/scenario_service/views/cv_tfidf.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/6 15:44
+# @Author  : 程婷婷
+# @FileName: cv_tfidf.py
+# @Software: PyCharm
+# coding:utf-8
+from sklearn.feature_extraction.text import TfidfTransformer
+from sklearn.feature_extraction.text import CountVectorizer
+from model.base.views.utils import *
+
+
+def cv_tfidf(corpus):
+    vectorizer = CountVectorizer()  # 该类会将文本中的词语转换为词频矩阵，矩阵元素a[i][j] 表示j词在i类文本下的词频
+    transformer = TfidfTransformer()  # 该类会统计每个词语的tf-idf权值
+    X = vectorizer.fit_transform(corpus)  # 将文本转为词频矩阵
+    tfidf = transformer.fit_transform(X)  # 计算tf-idf，
+    word = vectorizer.get_feature_names()  # 获取词袋模型中的所有词语
+    weight = tfidf.toarray()  # 将tf-idf矩阵抽取出来，元素a[i][j]表示j词在i类文本中的tf-idf权重
+    return word, weight
+
+def get_word_tf_frequency(word, weight, word2count):
+    word_tf, keyword, word_weight = [], [], []
+    for i in range(len(weight)):  # 打印每类文本的tf-idf词语权重
+        temp = list(zip(word, weight[i]))
+        temp.sort(key=takeSecond, reverse=True)
+        result = temp[0: 3]
+        result.sort(key=takeFirst_len, reverse=True)
+        for index, data in enumerate(result):
+            if data[0] not in word2count:
+                continue
+            word_weight.append(word2count[data[0]])
+            keyword.append(data[0])
+            word_tf.append(data[1])
+    return word_tf, keyword, word_weight
--- a/scenario_service/views/positive_negative_judgment_base_emotion_words.py
+++ b/scenario_service/views/positive_negative_judgment_base_emotion_words.py
+"""
+Author: Tao Zhang
+Desc: 基于字典的情感极性分析，用于年报情感分析。
+2021-10-29: 实现分行业的数字化转型正负面趋势分析。
+2021-11-05: 实现echart折线分析图的嵌入，API开发完成。
+"""
+import cx_Oracle
+import pandas as pd
+import os
+import re
+from tqdm import tqdm
+import jieba
+import json
+import zipfile
+from flask import Flask, request, make_response
+from urllib.parse import quote
+import io
+from platform_zzsn.settings import BASE_DIR
+
+# 开始加载情感词典
+print('开始加载情感词典 ...')
+reverse_words = ['车道偏离']  # 屏蔽词
+
+negdict = []  # 消极情感词典
+posdict = []  # 积极情感词典
+nodict = []  # 否定词词典
+plusdict = []  # 程度副词词典
+sentiment_base_dir = os.path.join(BASE_DIR, 'static/base/sentiment_dict')
+sl = pd.read_csv(os.path.join(sentiment_base_dir, '中文金融词典/dict/formal_neg.txt'), header=None, encoding='utf-8')
+for i in range(len(sl[0])):
+    negdict.append(sl[0][i])
+# sl = pd.read_csv('情感极性词典/正面情绪词.txt', header=None, encoding='utf-8')
+sl = pd.read_csv(os.path.join(sentiment_base_dir, '中文金融词典/dict/formal_pos.txt'), header=None, encoding='utf-8')
+for i in range(len(sl[0])):
+    posdict.append(sl[0][i])
+sl = pd.read_csv(os.path.join(sentiment_base_dir, '情感极性词典/否定词.txt'), header=None, encoding='utf-8')
+for i in range(len(sl[0])):
+    nodict.append(sl[0][i])
+sl = pd.read_csv(os.path.join(sentiment_base_dir, '情感极性词典/程度副词.txt'), header=None, encoding='utf-8')
+for i in range(len(sl[0])):
+    plusdict.append(sl[0][i])
+
+print('情感词典加载完成！')
+# 加载情感词典结束
+for w in ['非公开', '非流动', '车联网', '网联化', '智能网联化', '智能网联', '新能源', '共享化']:
+    jieba.add_word(w)
+
+
+def clean_blank_lines(text):
+    """
+        清理多余空行
+    :param text:
+    :return:
+    """
+    text_ = re.sub('[\n]+', '\n', text.replace('\t', '').replace('\r', ''))
+    return text_
+
+
+def repaire_table_of_content(content):
+    """
+    修复目录格式
+    eg:
+    第一节重要提示、 第一节 重要提示、
+    第四节九、公司未 第四节 九、公司未
+    第一节重要提示、 第一节 重要提示、
+    第二节公司简介和 第二节 公司简介和
+    第三节公司业务概 第三节 公司业务概
+    """
+
+    chapter = re.findall(r'第\S{1,2}节\S{,5}', content)
+    for i in chapter:
+        i_s = i.split('节')
+        new_cha = i_s[0] + '节' + ' ' + i_s[-1]
+        # print(i, new_cha)
+        content = content.replace(i, new_cha)
+    return content
+
+
+def filter4sentences(filter_keywords, sentences):
+    """
+    :param filter_keywords: 过滤词
+    :param sentences: 待过滤的句子
+    :return:
+    """
+    sentences_success = []
+    total = 0
+    for i in tqdm(sentences):
+        total += len(i['句子'])
+        for sent in i['句子']:
+            for w in filter_keywords:
+                if w in sent:
+                    # print('success +1')
+                    sentences_success.append({'年报文件名称': i['年报文件名称'],
+                                              '年份': i['年份'],
+                                              '股票简称': i['股票简称'],
+                                              '句子': sent})
+                    break
+    print('句子总数：' + str(total) + '，筛选出的句子数量：' + str(len(sentences_success)))
+
+    return sentences_success
+
+
+# 预测方法
+def predict(s, negdict, posdict, nodict, plusdict):
+    p = 0
+
+    for rw in reverse_words:  # 去掉文本中的屏蔽词
+        if rw in s:
+            s = s.replace(rw, '')
+
+    sd = list(jieba.cut(s))  # 分词
+    temp = {'积极词': [], '消极词': [], '副词': [], '否定词': []}
+    for i in range(len(sd)):
+        if sd[i] in negdict:
+            if i > 0 and sd[i - 1] in nodict:
+                p = p + 1
+                temp['消极词'].append((i, sd[i]))
+                temp['否定词'].append((i - 1, sd[i - 1]))
+            elif i > 0 and sd[i - 1] in plusdict:
+                p = p - 2
+                temp['消极词'].append((i, sd[i]))
+                temp['副词'].append(sd[i - 1])
+            else:
+                p = p - 1
+                temp['消极词'].append((i, sd[i]))
+        elif sd[i] in posdict:
+            if i > 0 and sd[i - 1] in nodict:
+                p = p - 1
+                temp['积极词'].append((i, sd[i]))
+                temp['否定词'].append((i - 1, sd[i - 1]))
+            elif i > 0 and sd[i - 1] in plusdict:
+                p = p + 2
+                temp['积极词'].append((i, sd[i]))
+                temp['副词'].append((i - 1, sd[i - 1]))
+            elif i > 0 and sd[i - 1] in negdict:
+                p = p - 1
+                temp['积极词'].append((i, sd[i]))
+                temp['消极词'].append((i - 1, sd[i - 1]))
+            elif i < len(sd) - 1 and sd[i + 1] in negdict:
+                p = p - 1
+                temp['积极词'].append((i, sd[i]))
+                temp['消极词'].append((i + 1, sd[i + 1]))
+            else:
+                p = p + 1
+                temp['积极词'].append((i, sd[i]))
+        elif sd[i] in nodict:
+            p = p - 0.5
+            temp['否定词'].append((i, sd[i]))
+    temp_u = {}
+    for k, v in temp.items():
+        temp_u[k] = list(set(v))
+    return p, sd, temp_u
+
+
+def get_echart_line_map(years_range_list, positive_count_list, negative_count_list, title='数字化转型正负面趋势分析'):
+    """
+        生成折线图
+    :param years_range_list: 年份区间
+    :param positive_count_list: 正面数量
+    :param negative_count_list: 负面数量
+    :param title 标题
+    :return:
+    """
+
+    with open('input_data/echart_line_template.html', 'r') as file:
+        html = file.read()
+        html = html.replace('years_range_list', str(years_range_list))
+        html = html.replace('positive_count_list', str(positive_count_list))
+        html = html.replace('negative_count_list', str(negative_count_list))
+
+    with open(os.path.join('outputs', title + '_' + 'index.html'), 'w') as file2:
+        file2.write(html)
+
+
+def sentiment_analysis(years_range_list, sentences, path):
+    """
+    :param years_range_list: 分析年份区间
+    :param sentences: 多个句子
+    :return:
+    """
+    mydata = pd.DataFrame(data=sentences)
+    len1 = len(mydata)
+    mydata.drop_duplicates(subset=['句子'], inplace=True)
+    mydata.reset_index(drop=True, inplace=True)
+    print('去重数量为：' + str(len1 - len(mydata)) + '，剩余条数：' + str(len(mydata)))
+    tol = 0
+    # mydata['pred'] = 0
+    for i in tqdm(range(len(mydata))):
+        tol = tol + 1
+        score, sd, info = predict(mydata.loc[i, '句子'], negdict, posdict, nodict, plusdict)
+        mydata.loc[i, '分词'] = ' '.join(sd)
+        if score > 0:
+            mydata.loc[i, 'pred'] = 1  # 积极
+            mydata.loc[i, 'info'] = json.dumps(info, ensure_ascii=False)
+        elif score < 0:
+            mydata.loc[i, 'pred'] = 0  # 消极
+            mydata.loc[i, 'info'] = json.dumps(info, ensure_ascii=False)
+        else:
+            mydata.loc[i, 'pred'] = -2  # 无情感
+            mydata.loc[i, 'info'] = json.dumps(info, ensure_ascii=False)
+
+    print(mydata.head(10))
+
+    mydata.to_excel(os.path.join(path, '分析结果_test.xlsx'), index=False, columns=['年报文件名称', '年份', '股票简称',
+                                                                    '句子', '分词', 'pred', 'info'])
+    mydata_year_set = set(mydata['年份'].tolist())
+    positive_count = []
+    negative_count = []
+    for year in years_range_list:
+        if year in mydata_year_set:
+            # 取数据
+            df_current = mydata[mydata['年份'] == year]
+            positive_count.append(len(df_current[df_current['pred'] == 1]))
+            negative_count.append(len(df_current[df_current['pred'] == 0]))
+        else:
+            positive_count.append(0)
+            negative_count.append(0)
+
+    # get_echart_line_map(years_range_list, positive_count, negative_count)
+    return True
+
+
+def process_v2(IndustryCode='36', start_year=2016, stop_year=2020, path='./'):
+    """
+         从数据库里检索符合条件的内容
+    :param IndustryCode: 行业大类代码
+    :param start_year: 起始年份
+    :param stop_year: 终止年份
+    :return:
+    """
+    year_range = [str(y) for y in range(int(start_year), int(stop_year) + 1)]
+    guanliceng_content = []  # 存储管理层文本
+    success = 0  # 统计成功提取数量
+    # data_root_path = '../../../东方财富网/'  # .txt格式年报数据根路径
+    filter_keywords_path = os.path.join(sentiment_base_dir, '数字化转型_词库.xlsx')
+    types = {'行业大类代码': str, '上市公司代码': str}
+    df = pd.read_excel(os.path.join(sentiment_base_dir, '截至2021年2季度上市公司_4352家【From证监会】.xlsx'), dtype=types)
+    IndustryCode2info = {}
+    for idx, row in df.iterrows():
+        if row['行业大类代码'] not in IndustryCode2info:
+            IndustryCode2info[row['行业大类代码']] = dict(row)
+    print('行业大类数量：' + str(len(IndustryCode2info)))
+    # print('数据库中，行业大类数量：' + str(len(os.listdir(data_root_path))))
+
+    connect = cx_Oracle.connect('cis', 'cis_zzsn9988', '114.116.91.1:1521/ORCL')
+    cursor = connect.cursor()
+
+    if IndustryCode in df['行业大类代码'].tolist():
+        df_useful = df[df['行业大类代码'] == IndustryCode]
+        print(df_useful.head(10))
+        IndustryName = IndustryCode2info[IndustryCode]['行业大类名称']
+        print('正在分析：' + IndustryName + ' ...')
+        sql_str = "SELECT TITLE, COMPLETE_SENTENCES, YEAR, STOCK_NAME FROM COMPANY_ANNUAL_REPORT WHERE INDUSTRY_CODE='%s' AND YEAR BETWEEN '%s' AND '%s'" % (
+            str(IndustryCode), str(start_year), str(stop_year))
+        print('\n' + sql_str + '\n\n')
+        cursor.execute(sql_str)
+        data = cursor.fetchall()
+        cursor.execute('commit')
+        print(data[0: 10])
+        print('step2：完整句子提取【开始】 ...')
+        complete_sentences = []
+        for i in tqdm(data):
+            sentences_bytes = i[1].read()
+            sentences_str = sentences_bytes.decode('utf8')  # 对字节解码，转为str类型
+            # content_bytes2 = content_str.encode('utf8')  # 对str类型编码，转为字节类型
+            temp_dict = {'年报文件名称': i[0],
+                         '年份': i[2],
+                         '股票简称': i[3],
+                         '句子': sentences_str.split('<sep>')}
+            complete_sentences.append(temp_dict)
+            # print(json.dumps(temp_dict, ensure_ascii=False, indent=2))
+        # json.dump(complete_sentences, open(save_path, 'w'),
+        #           ensure_ascii=False, indent=2)
+
+        print('step2：完整句子提取【完成】')
+
+        print('step3：符合条件的句子筛选【开始】 ...')
+        keywords = []
+        for sheet_name in ['来源【政策库】', '来源【模型推荐】']:
+            df1 = pd.read_excel(filter_keywords_path, sheet_name=sheet_name)
+            if sheet_name == '来源【政策库】':
+                df1 = df1[df1['label'] == 1]
+            keywords.extend(df1['关键词'].to_list())
+        sentences_useful = filter4sentences(keywords, complete_sentences)
+        print(sentences_useful[0: 5])
+        # for item in sentences_useful:
+        #     print(item)
+        print('step3：符合条件的句子筛选【完成】')
+
+        print('step4: 正负面预测开始 ...')
+        results = sentiment_analysis(year_range, sentences_useful, path)
+        print('step4: 正负面预测完成！')
+
+        # print('step5: 打包结果 ...')
+        # zip_directory('outputs', 'results_zip/分析结果.zip')
+        # print('step5: 结果打包完成')
+        return {"success": 1, "msg": "{} 行业分析完成！".format(IndustryCode)}
+    else:
+        print('Sorry! IndustryCategoryCode 不在”行业大类代码“中。请检查！')
+
--- a/scenario_service/views/scenario.py
+++ b/scenario_service/views/scenario.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/13 9:26
+# @Author  : 程婷婷
+# @FileName: scenario.py
+# @Software: PyCharm
+import os
+import json
+import requests
+import pandas as pd
+from collections import Counter
+from requests.adapters import HTTPAdapter
+from scenario_service.views import cv_tfidf
+from model.base.views import utils
+
+
+def post_project_info(url, title, content):
+    payload={'title':title,'content':content}
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    response = requests.request('POST', url, headers=headers, data=json.dumps(payload))
+    data = json.loads(response.text)
+    print(data)
+    return data
+
+def post_speech(url, title, content):
+    payload = {'title': title,
+               'content': content}
+    files = [
+    ]
+    headers = {}
+    response = requests.request("POST", url, headers=headers, data=payload, files=files)
+    data = json.loads(response.text)
+    return data
+
+def post_report(url):
+    response = requests.request("GET", url)
+    data = json.loads(response.text)
+    return data
+
+def post_similarity_duplicate(url, text_1, text_2, sim):
+    payload = {'text_1': text_1, 'text_2': text_2, 'sim': sim}
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    response = requests.request('POST', url, headers=headers, data=json.dumps(payload))
+    data = json.loads(response.text)
+    return data
+
+def post_extraction_company(url, file_name, company_file_name):
+    payload = {'file_name': file_name, 'company_file_name': company_file_name}
+    files = [
+
+    ]
+    headers = {
+        'token': '1',
+    }
+    response = requests.request('POST', url, headers=headers, data=payload, files=files)
+    data = json.loads(response.text)
+    return data
+
+def post_br_single_file(url, file_name):
+    payload = {'file_path': file_name}
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    s = requests.session()
+    s.mount('http://', HTTPAdapter(max_retries=3))
+    response = s.request('POST', url, headers=headers, data=json.dumps(payload), timeout=60*60*10)
+    data = json.loads(response.text)
+    return data
+
+def post_stock_recruitment_predict(url, file_name):
+    payload = {'file_name': file_name}
+    files = [
+
+    ]
+    headers = {
+        'token': '1',
+    }
+    response = requests.request('POST', url, headers=headers, data=payload, files=files)
+    data = json.loads(response.text)
+    return data
+
+def cv_tfidf_keywords(download_path, pending_file, user_file):
+    file_type = pending_file.split('.')[-1]
+    if (file_type == 'docx') or (file_type == 'doc'):
+        doc_text_list = utils.read_docx(pending_file, user_file)
+        doc_text_list = utils.merge_para(doc_text_list)
+    else:
+        # print('运行xlsx文件')
+        doc_text_list = utils.read_excel(pending_file, user_file)
+        # print(doc_text_list)
+    corpus, all_words, = [], []
+    for para in doc_text_list:
+        words = utils.filter_stopwords(para)
+        all_words.extend(words)
+        corpus.append(' '.join(words))
+    print("len(corpus):" + str(len(corpus)))
+    word2count = Counter(all_words)
+    word, weight = cv_tfidf.cv_tfidf(corpus)
+    word_tf, keyword, word_weight = cv_tfidf.get_word_tf_frequency(word, weight, word2count)
+    out_df = pd.DataFrame(columns=['词', '词频', 'tfidf'])
+    out_df['词'] = keyword
+    out_df['词频'] = word_weight
+    out_df['tfidf'] = word_tf
+    out_df.drop_duplicates(subset=['词', '词频'], inplace=True)
+    out_df.to_excel(os.path.join(download_path, 'result.xlsx'), index=False, encoding='utf-8')
+    return out_df
+
--- a/scenario_service/views/views.py
+++ b/scenario_service/views/views.py
+from django.http import JsonResponse
+from django.views.decorators.http import require_POST
+import pandas as pd
+from scenario_service.views import scenario, positive_negative_judgment_base_emotion_words
+from model.base.views.token_authorize import *
+from model.base.models import ServiceManage
+from platform_zzsn.settings import MEDIA_ROOT
+
+UPLOAD_FOLDER = MEDIA_ROOT
+# Create your views here.
+@require_POST
+@login_required
+def project_info_filter_single(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    title = request.POST['title']
+    content = request.POST['content']
+    url = 'http://114.116.49.86:7000/br/classification/project_info/filter/pred'
+    result = scenario.post_project_info(url, title, content)
+    if result['resultData']['label']:
+        result['resultData']['label'] = '一带一路项目信息'
+    else:
+        result['resultData']['label'] = '非一带一路项目信息'
+    result['token'] = token
+    return JsonResponse(result)
+
+@require_POST
+@login_required
+def stock_recruitment_filter_single(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    title = request.POST['title']
+    content = request.POST['content']
+    url = 'http://localhost:7005/classification/rc/f_zp_gp/pred'
+    result = scenario.post_project_info(url, title, content)
+    result['token'] = token
+    return JsonResponse(result)
+
+@require_POST
+@login_required
+def project_info_extraction_single(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    title = request.POST['title']
+    content = request.POST['content']
+    url = 'http://114.116.49.86:7000/br/doc_event/project_info/extraction/pred'
+    result = scenario.post_project_info(url, title, content)
+    result['resultData']['项目名称'] = result['resultData']['pro_name']
+    result['resultData']['项目名称'] = result['resultData']['pro_name']
+    result['resultData']['项目国别'] = result['resultData']['pro_country']
+    result['resultData']['承建单位'] = result['resultData']['pro_build_unit']
+    result['resultData']['相关方'] = result['resultData']['pro_related_unit']
+    result['resultData']['项目投资'] = result['resultData']['pro_money']
+    result['resultData']['开工日期'] = result['resultData']['pro_start_date']
+    result['resultData']['项目地点'] = result['resultData']['pro_location']
+    result['resultData']['项目状态'] = result['resultData']['pro_state']
+    result['resultData']['项目产能'] = result['resultData']['pro_capacity']
+    result['resultData']['项目背景'] = result['resultData']['pro_background']
+    result['resultData']['项目简介'] = result['resultData']['pro_brief']
+    result['resultData']['项目内容'] = result['resultData']['pro_content']
+    result['resultData']['项目意义'] = result['resultData']['pro_significance']
+    result['resultData']['项目城市'] = result['resultData']['pro_city']
+    result['resultData']['业主单位'] = result['resultData']['pro_owner_unit']
+    result['resultData']['执行方式'] = result['resultData']['pro_run_mode']
+    result['resultData']['建设周期'] = result['resultData']['pro_time_limit']
+    result['resultData']['完工日期'] = result['resultData']['pro_end_date']
+    del result['resultData']['pro_name']
+    del result['resultData']['pro_country']
+    del result['resultData']['pro_build_unit']
+    del result['resultData']['pro_related_unit']
+    del result['resultData']['pro_money']
+    del result['resultData']['pro_start_date']
+    del result['resultData']['pro_location']
+    del result['resultData']['pro_state']
+    del result['resultData']['pro_capacity']
+    del result['resultData']['pro_background']
+    del result['resultData']['pro_brief']
+    del result['resultData']['pro_content']
+    del result['resultData']['pro_significance']
+    del result['resultData']['pro_city']
+    del result['resultData']['pro_owner_unit']
+    del result['resultData']['pro_run_mode']
+    del result['resultData']['pro_time_limit']
+    del result['resultData']['pro_end_date']
+    result['token'] = token
+    return JsonResponse(result)
+
+@require_POST
+@login_required
+def doc_similarity_duplicate_single(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text_1 = request.POST['text_1']
+    text_2 = request.POST['text_2']
+    sim = request.POST['sim']
+    url = 'http://localhost:7005/doc_sim/similarity'
+    result = scenario.post_similarity_duplicate(url, text_1, text_2, sim)
+    if result['resultData']['is_repetition']:
+        result['resultData']['is_repetition'] = '重复'
+    else:
+        result['resultData']['is_repetition'] = '不重复'
+    result['token'] = token
+    return JsonResponse(result)
+
+@require_POST
+@login_required
+def report_generator_single(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    title = request.POST['name']
+    sid = str(request.POST['sid'])
+    url = "http://114.116.99.6:1811/report_generator?title=%s&sid=%s" %(title, sid)
+    result = scenario.post_report(url)
+    result['token'] = token
+    return JsonResponse(result)
+
+@require_POST
+@login_required
+def extraction_speech_single(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    title = request.POST['title']
+    content = request.POST['content']
+    url = 'http://localhost:1812/speech/'
+    result = scenario.post_speech(url, title, content)
+    regular = []
+    for index,row in enumerate(result['resultData']['抽取内容']):
+        regular.append({'person_name': list(row.keys())[0], 'person_speech': list(row.values())[0]})
+    result['resultData']['抽取内容'] = regular
+    result['token'] = token
+    return JsonResponse(result)
+
+@require_POST
+@login_required
+def extraction_speech(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    username = request.POST['username']
+    pending_file = request.POST['pending_file']
+    path_timestamp = request.POST['path_timestamp']
+    url = 'http://192.168.1.149:1812/speech/'
+    path = os.path.join(UPLOAD_FOLDER, path_timestamp)
+    create_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+    service_manager = ServiceManage.objects.create(
+        name='专家观点、领导讲话、组织发言提取',
+        username=username,
+        create_date=create_date,
+        end_date=None,
+        state='进行中',
+        filenames=pending_file + '；',
+        path=str(path_timestamp),
+    )
+    pending_file = os.path.join(path, pending_file)
+    df = pd.read_excel(pending_file)
+    result_type, result_speech = [], []
+    try:
+        for index in range(len(df['title'])):
+            speech = scenario.post_speech(url, df['title'][index], df['content'][index])
+            result_type.append(speech['resultData']['type'])
+            result_speech.append(speech['resultData']['抽取内容'])
+        df['讲话类型'] = result_type
+        df['讲话内容'] = result_speech
+        df.to_excel(os.path.join(path, 'result.xlsx'), index=False)
+    except Exception as e:
+        print(e)
+        service_manager.state = '失败'
+        end_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+        service_manager.end_date = end_date
+        service_manager.save()
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': e,
+            'resultData': False,
+        })
+    else:
+        service_manager.state = '已完成'
+        end_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+        service_manager.end_date = end_date
+        service_manager.save()
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '处理成功',
+            'resultData': True,
+        })
+
+
+@require_POST
+@login_required
+def extraction_keywords(request):
+    path_timestamp = request.POST['path_timestamp']
+    path = os.path.join(UPLOAD_FOLDER, path_timestamp)
+    pending_file = request.POST['pending_file']
+    user_file = request.POST['user_file']
+    username = request.POST['username']
+    token = request.META.get("HTTP_AUTHORIZATION")
+    create_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+    print(pending_file, user_file)
+    service_manager = ServiceManage.objects.create(
+        name='关键词挖掘',
+        username=username,
+        create_date=create_date,
+        end_date=None,
+        state='进行中',
+        filenames=pending_file+'；'+user_file,
+        path=str(path_timestamp),
+    )
+    pending_file = os.path.join(path, pending_file)
+    user_file = os.path.join(path, user_file)
+    try:
+        scenario.cv_tfidf_keywords(path, pending_file, user_file)
+    except Exception as e:
+        print(e)
+        service_manager.state = '失败'
+        end_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+        service_manager.end_date = end_date
+        service_manager.save()
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': e,
+            'resultData': False,
+        })
+    else:
+        service_manager.state = '已完成'
+        end_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+        service_manager.end_date = end_date
+        service_manager.save()
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '处理成功',
+            'resultData': True,
+        })
+
+@require_POST
+@login_required
+def extraction_company(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    path_timestamp = request.POST['path_timestamp']
+    path = os.path.join(UPLOAD_FOLDER, path_timestamp)
+    pending_file = request.POST['pending_file']
+    company_file_name = request.POST['user_file']
+    username = request.POST['username']
+    create_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+    print(pending_file, company_file_name)
+    service_manager = ServiceManage.objects.create(
+        name='公司名称提取',
+        username=username,
+        create_date=create_date,
+        end_date=None,
+        state='进行中',
+        filenames=pending_file+'；'+company_file_name,
+        path=str(path_timestamp),
+    )
+    pending_file = os.path.join(path, pending_file)
+    company_file_name = os.path.join(path, company_file_name)
+    url = 'http://localhost:7005/zzsn_platform/liyan/ex_company_name/test_file'
+    print(pending_file)
+    print(company_file_name)
+    result = scenario.post_extraction_company(url, file_name=pending_file, company_file_name=company_file_name)
+    end_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+    print(result)
+    if result['isHandleSuccess']:
+        service_manager.end_date = end_date
+        service_manager.state = '已完成'
+        service_manager.save()
+        result['resultData'] = True
+    else:
+        service_manager.end_date = end_date
+        service_manager.state = '失败'
+        service_manager.save()
+        result['resultData'] = False
+    result['token'] = token
+    return JsonResponse(result)
+
+@require_POST
+@login_required
+def stock_recruitment_filter(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    path_timestamp = request.POST['path_timestamp']
+    path = os.path.join(UPLOAD_FOLDER, path_timestamp)
+    pending_file = request.POST['pending_file']
+    username = request.POST['username']
+    create_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+    print(pending_file)
+    service_manager = ServiceManage.objects.create(
+        name='股票招聘筛选',
+        username=username,
+        create_date=create_date,
+        end_date=None,
+        state='进行中',
+        filenames=pending_file+'；',
+        path=str(path_timestamp),
+    )
+    pending_file = os.path.join(path, pending_file)
+    url = 'http://192.168.1.149:7001/classification/rc/f_zp_gp/test_file'
+    print(pending_file)
+    result = scenario.post_stock_recruitment_predict(url, file_name=pending_file)
+    end_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+    print(result)
+    if result['isHandleSuccess']:
+        service_manager.end_date = end_date
+        service_manager.state = '已完成'
+        service_manager.save()
+        result['resultData'] = True
+    else:
+        service_manager.end_date = end_date
+        service_manager.state = '失败'
+        service_manager.save()
+        result['resultData'] = False
+    result['token'] = token
+    return JsonResponse(result)
+
+@require_POST
+@login_required
+def project_info_filter(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    path_timestamp = request.POST['path_timestamp']
+    pending_file = request.POST['pending_file']
+    username = request.POST['username']
+    path = os.path.join(UPLOAD_FOLDER, path_timestamp)
+    create_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+    print(pending_file)
+    service_manager = ServiceManage.objects.create(
+        name='一带一路项目信息筛选',
+        username=username,
+        create_date=create_date,
+        end_date=None,
+        state='进行中',
+        filenames=pending_file+'；',
+        path=str(path_timestamp),
+    )
+    pending_file = os.path.join(path, pending_file)
+    url = 'http://114.116.49.86:7000/br/doc_event/project_info/extraction/pred_file'
+    print(pending_file)
+    result = scenario.post_br_single_file(url, file_name=pending_file)
+    end_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+    print(result)
+    if result['isHandleSuccess']:
+        service_manager.end_date = end_date
+        service_manager.state = '已完成'
+        service_manager.save()
+        result['resultData'] = True
+    else:
+        service_manager.end_date = end_date
+        service_manager.state = '失败'
+        service_manager.save()
+        result['resultData'] = False
+    result['token'] = token
+    return JsonResponse(result)
+
+@require_POST
+@login_required
+def project_info_extraction(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    path_timestamp = request.POST['path_timestamp']
+    pending_file = request.POST['pending_file']
+    username = request.POST['username']
+    path = os.path.join(UPLOAD_FOLDER, path_timestamp)
+    create_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+    print(pending_file)
+    service_manager = ServiceManage.objects.create(
+        name='一带一路项目要素抽取',
+        username=username,
+        create_date=create_date,
+        end_date=None,
+        state='进行中',
+        filenames=pending_file+'；',
+        path=str(path_timestamp),
+    )
+    pending_file = os.path.join(path, pending_file)
+    url = 'http://114.116.49.86:7000/br/doc_event/project_info/extraction/pred_file'
+    print(pending_file)
+    result = scenario.post_br_single_file(url, file_name=pending_file)
+    end_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+    print(result)
+    if result['isHandleSuccess']:
+        service_manager.end_date = end_date
+        service_manager.state = '已完成'
+        service_manager.save()
+        result['resultData'] = True
+    else:
+        service_manager.end_date = end_date
+        service_manager.state = '失败'
+        service_manager.save()
+        result['resultData'] = False
+    result['token'] = token
+    return JsonResponse(result)
+
+@require_POST
+@login_required
+def positive_negative_judgment(request):
+    token = request.META.get('HTTP_AUTHORIZATION')
+    username = request.POST['username']
+    industry_code = str(request.POST['industry_code'])  # 行业ID (必填)
+    start_year = request.POST['start_year']  # 开始年
+    stop_year = request.POST['stop_year']  # 结束年
+    path_timestamp = int(round(time.time() * 1000000))
+    path = os.path.join(UPLOAD_FOLDER, str(path_timestamp))
+    if not os.path.exists(path, ):
+        os.mkdir(path)
+    create_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+    service_manager = ServiceManage.objects.create(
+        name='基于情感词的正负面分析',
+        username=username,
+        create_date=create_date,
+        end_date=None,
+        state='进行中',
+        filenames=str(industry_code),
+        path=str(path_timestamp),
+    )
+    try:
+        result = positive_negative_judgment_base_emotion_words.process_v2(industry_code, start_year, stop_year, path)
+        end_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+        service_manager.end_date = end_date
+        service_manager.state = '成功'
+        service_manager.save()
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '处理成功',
+            'resultData': True,
+        })
+    except Exception as e:
+        print(e)
+        end_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
+        service_manager.end_date = end_date
+        service_manager.state = '失败'
+        service_manager.save()
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': '处理失败',
+            'resultData': False,
+        })
+
+
--- a/scenario_service/views/word_count.py
+++ b/scenario_service/views/word_count.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2022/6/15 12:10
+# @Author  : bruxellse_li
+# @FileName: word_count.py
+# @Software: PyCharm
+import os
+import pandas as pd
+import re
+import jieba
+import jieba.posseg                 # 词性获取
+import collections                  # 词频统计库
+from base.views import utils
+from platform_zzsn.settings import *
+
+
+class WordCount:
+    def __init__(self):
+        # 英文词性转中文词性字典：简洁版
+        self.En2Cn = {
+            'zg': '状态词',
+            'a': '形容词',
+            'ad': '形容词',
+            'ag': '形容词',
+            'al': '形容词',
+            'an': '形容词',
+            'b': '区别词',
+            'bl': '区别词',
+            'c': '连词',
+            'cc': '连词',
+            'd': '副词',
+            'e': '叹词',
+            'eng': '英文',
+            'f': '方位词',
+            'g': '语素',
+            'h': '前缀',
+            'i': '成语',
+            'j': '简称略语',
+            'k': '后缀',
+            'l': '习用语',
+            'm': '数词',
+            'mq': '数量词',
+            'n': '名词',
+            'ng': '名词',
+            'nl': '名词',
+            'nr': '名词',
+            'nr1': '名词',
+            'nr2': '名词',
+            'nrf': '名词',
+            'nrfg': '名词',
+            'nrj': '名词',
+            'ns': '名词',
+            'nsf': '名词',
+            'nt': '名词',
+            'nz': '名词',
+            'o': '拟声词',
+            'p': '介词',
+            'pba': '介词',
+            'pbei': '介词',
+            'q': '量词',
+            'qt': '量词',
+            'qv': '量词',
+            'r': '代词',
+            'rg': '代词',
+            'rr': '代词',
+            'rz': '代词',
+            'rzs': '代词',
+            'rzt': '代词',
+            'rzv': '代词',
+            'ry': '代词',
+            'rys': '代词',
+            'ryt': '代词',
+            'ryv': '代词',
+            's': '处所词',
+            't': '时间词',
+            'tg': '时间词',
+            'u': '助词',
+            'ude1': '助词',
+            'ude2': '助词',
+            'ude3': '助词',
+            'udeng': '助词',
+            'udh': '助词',
+            'uguo': '助词',
+            'ule': '助词',
+            'ulian': '助词',
+            'uls': '助词',
+            'usuo': '助词',
+            'uyy': '助词',
+            'uzhe': '助词',
+            'uzhi': '助词',
+            'v': '动词',
+            'vd': '动词',
+            'vf': '动词',
+            'vg': '动词',
+            'vi': '动词',
+            'vl': '动词',
+            'vn': '动词',
+            'vshi': '动词',
+            'vx': '动词',
+            'vyou': '动词',
+            'w': '标点符号',
+            'wb': '标点符号',
+            'wd': '标点符号',
+            'wf': '标点符号',
+            'wj': '标点符号',
+            'wh': '标点符号',
+            'wkz': '标点符号',
+            'wky': '标点符号',
+            'wm': '标点符号',
+            'wn': '标点符号',
+            'wp': '标点符号',
+            'ws': '标点符号',
+            'wt': '标点符号',
+            'ww': '标点符号',
+            'wyz': '标点符号',
+            'wyy': '标点符号',
+            'x': '字符串',
+            'xu': '字符串',
+            'xx': '字符串',
+            'y': '语气词',
+            'z': '状态词',
+            'un': '未知词'
+        }
+        # 英文词性转中文词性字典：详细版
+        self.En2Cn_Pro = {
+            'a': '形容词',
+            'ad': '形容词-副形词',
+            'ag': '形容词-形容词性语素',
+            'al': '形容词-形容词性惯用语',
+            'an': '形容词-名形词',
+            'b': '区别词',
+            'bl': '区别词-区别词性惯用语',
+            'c': '连词',
+            'cc': '连词-并列连词',
+            'd': '副词',
+            'e': '叹词',
+            'eng': '英文',
+            'f': '方位词',
+            'g': '语素',
+            'h': '前缀',
+            'i': '成语',
+            'j': '简称略语',
+            'k': '后缀',
+            'l': '习用语',
+            'm': '数词',
+            'mq': '数量词',
+            'n': '名词',
+            'ng': '名词-名词性语素',
+            'nl': '名词-名词性惯用语',
+            'nr': '名词-人名',
+            'nr1': '名词-汉语姓氏',
+            'nr2': '名词-汉语名字',
+            'nrf': '名词-音译人名',
+            'nrfg': '名词-人名',
+            'nrj': '名词-日语人名',
+            'ns': '名词-地名',
+            'nsf': '名词-音译地名',
+            'nt': '名词-机构团体名',
+            'nz': '名词-其他专名',
+            'o': '拟声词',
+            'p': '介词',
+            'pba': '介词-“把”',
+            'pbei': '介词-“被”',
+            'q': '量词',
+            'qt': '量词-动量词',
+            'qv': '量词-时量词',
+            'r': '代词',
+            'rg': '代词-代词性语素',
+            'rr': '代词-人称代词',
+            'rz': '代词-指示代词',
+            'rzs': '代词-处所指示代词',
+            'rzt': '代词-时间指示代词',
+            'rzv': '代词-谓词性指示代词',
+            'ry': '代词-疑问代词',
+            'rys': '代词-处所疑问代词',
+            'ryt': '代词-时间疑问代词',
+            'ryv': '代词-谓词性疑问代词',
+            's': '处所词',
+            't': '时间词',
+            'tg': '时间词-时间词性语素',
+            'u': '助词',
+            'ude1': '助词-“的”“底”',
+            'ude2': '助词-“地”',
+            'ude3': '助词-“得”',
+            'udeng': '助词-“等”“等等”“云云”',
+            'udh': '助词-“的话”',
+            'uguo': '助词-“过”',
+            'ule': '助词-“了”“喽”',
+            'ulian': '助词-“连”',
+            'uls': '助词-“来讲”“来说”“而言”“说来”',
+            'usuo': '助词-“所”',
+            'uyy': '助词-“一样”“一般”“似的”“般”',
+            'uzhe': '助词-“着”',
+            'uzhi': '助词-“之”',
+            'v': '动词',
+            'vd': '动词-副动词',
+            'vf': '动词-趋向动词',
+            'vg': '动词-动词性语素',
+            'vi': '动词-不及物动词（内动词）',
+            'vl': '动词-动词性惯用语',
+            'vn': '动词-名动词',
+            'vshi': '动词-“是”',
+            'vx': '动词-形式动词',
+            'vyou': '动词-“有”',
+            'w': '标点符号',
+            'wb': '标点符号-百分号千分号，全角：％ ‰ 半角：%',
+            'wd': '标点符号-逗号，全角：， 半角：,',
+            'wf': '标点符号-分号，全角：； 半角： ; ',
+            'wj': '标点符号-句号，全角：。',
+            'wh': '标点符号-单位符号，全角：￥ ＄ ￡ ° ℃ 半角 $',
+            'wkz': '标点符号-左括号，全角：（ 〔 ［ ｛ 《 【 〖 〈 半角：( [ { <',
+            'wky': '标点符号-右括号，全角：） 〕 ］ ｝ 》 】 〗 〉 半角： ) ] { >',
+            'wm': '标点符号-冒号，全角：： 半角： :',
+            'wn': '标点符号-顿号，全角：、',
+            'wp': '标点符号-破折号，全角：—— －－ ——－ 半角：—',
+            'ws': '标点符号-省略号，全角：…… …',
+            'wt': '标点符号-叹号，全角：！ 半角：!',
+            'ww': '标点符号-问号，全角：？ 半角：?',
+            'wyz': '标点符号-左引号，全角：“ ‘ 『',
+            'wyy': '标点符号-右引号，全角：” ’ 』',
+            'x': '字符串',
+            'xu': '字符串-网址URL',
+            'xx': '字符串-非语素字',
+            'y': '语气词',
+            'z': '状态词',
+            'un': '未知词'
+        }
+
+    def add_customer_word(self, user_words: str):
+        pattern = re.compile(r''+ '[,，]')
+        user_words_list = pattern.split(user_words)
+        for word in user_words_list:
+            jieba.suggest_freq(word, tune=True)
+
+    # def combine_text(self, file_path):
+    #     df = pd.read_excel(file_path)
+    #     text = '。'.join(df['content'])
+    #     return text
+
+    def analysis_data(self, string_data):
+        seg_list_exact = jieba.posseg.cut(string_data, HMM=True)  # 精确模式分词+HMM
+        object_list = []
+        # 去除停用词
+        stopwords_path = os.path.join(BASE_DIR, 'static/base/baidu_stopwords.txt')
+        with open(stopwords_path, 'r', encoding='UTF-8') as meaninglessFile:
+            stopwords = set(meaninglessFile.read().split('\n'))
+        stopwords.add(' ')
+        for word in seg_list_exact:  # 循环读出每个分词
+            if word not in stopwords:  # 如果不在去除词库中
+                object_list.append(word)  # 分词追加到列表
+        number = 1000
+        word_counts = collections.Counter(object_list)  # 对分词做词频统计
+        word_counts_top = word_counts.most_common(number)  # 获取前number个最高频的词
+        index = 0
+        out_df = pd.DataFrame(columns=['词语', '词频', '词性'])
+        for top_word, frequency in word_counts_top:  # 获取词语和词频
+            try:
+                out_df.loc[index] = [top_word.word, frequency, self.En2Cn_Pro[top_word.flag]]
+                index += 1
+            except KeyError:
+                pass
+        return out_df
+
+
+
+if __name__ == '__main__':
+    pending_file = r'C:\Users\EDZ\Desktop\data1104.xlsx'
+    user_file = r'C:\Users\EDZ\Desktop\用户自定义词典_样例.txt'
+    doc_text_list = utils.read_excel(pending_file, user_file)
+    # print(doc_text_list)
+    text = '。'.join(doc_text_list)
+    print("len(corpus):" + str(len(text)))
+    wc = WordCount()
+    out_df = wc.analysis_data(string_data=text)
+    out_df.to_excel('结果.xlsx', index=False)
\ No newline at end of file
--- a/static/base/config.yaml
+++ b/static/base/config.yaml
+data_loader:
+  dataset_path: E:\working\model_train\KMeans\dataset\test.xlsx
+  stopwords_path: E:\working\model_train\base\dataset\stopwords.txt
+data_process:
+  use_stopwords: True
+  tokenizer: PerceptronLexicalAnalyzer
+  use_dev: False
+  train_size: 0.8
+  test_size: 0.1
+  random_state: 2021
+embedding:
+  size: 100
+  window: 5
+  min_count: 5
+  workers: 5
+  sg: 0
+  iter: 20
+  norm: l2
+  use_idf: False
+  smooth_idf: False
+model:
+  model_save: E:\working\model_train\kmeans_model
+  n_clusters: 30
+  init: k-means++
+  n_init: 5
+  max_iter: 100
+evaluate:
+  average: micro
+runner:
\ No newline at end of file
--- a/static/base/sentiment_dict/中文金融词典/test.py
+++ b/static/base/sentiment_dict/中文金融词典/test.py
+"""
+"""
+年报正式用语词典
+dict/formal_pos.txt 正式用语正面情绪词典
+dict/formal_neg.txt 正式用语负面情绪词典
+"""
+
+from cnsenti import Sentiment
+
+senti = Sentiment(pos='dict/formal_pos.txt',  # 正面词典txt文件相对路径
+                  neg='dict/formal_neg.txt',  # 负面词典txt文件相对路径
+                  merge=False,  # 是否将cnsenti自带词典和用户导入的自定义词典融合
+                  encoding='utf-8')  # 两txt均为utf-8编码
+
+# test_text = '这家公司是行业的引领者，是中流砥柱。今年的业绩非常好。'
+text1 = '公司加强数字化制造顶层策划，助推核心制造能力升级，通过对内外部资源的充分调研、分析和论证，研究并制定了《航天晨光智能制造方案》，提出了公司智能信息化方案规划和智能制造产业发展路径，制定了公司智能制造及信息化建设工作计划，明确通过“三步走”的方式，实现设备资源内部优化和外部拓展 '
+text2 = '1、转型升级风险：转型升级成效尚未呈现，主业市场需求持续下降公司虽大力推进转型升级工作，但成效尚未呈现，目前仍以传统装备制造业为主，产品以单件、元件为主，信息化、智能化程度不高，缺乏成套、成组、高端化、集成化的新产品'
+
+result = senti.sentiment_count(text2)
+
+print('sentiment_count', result)
--- a/static/base/sentiment_dict/中文金融词典/说明.md
+++ b/static/base/sentiment_dict/中文金融词典/说明.md
+
+
+
+可以使用cnsenti库中的自定义方法，计算年报或财经类社交媒体的文本情绪。
+
+> 姚加权，冯绪，王赞钧，纪荣嵘，张维. 语调、情绪及市场影响：基于金融情绪词典. 管理科学学报，2021. 24(5), 26-46.
+
+该论文开发了中文的金融情感词典，已有的中文金融情感词典有以下不足：
+
+- 大多采用形容情绪词，对于金融场景适用性差
+- 将LM英文词典本土化，制作中文金融情绪词典
+- 词典构建方法多为人工
+
+
+
+该论文开发中文情绪词典，从年报和社交媒体两个数据源出发，借助数据挖掘和深度学习算法，构建了正式用语 和 非正式用于两大类情感词典。
+
+
+
+## 标注思路
+
+一般构建词典要么用多个词典融合，要么人工标准训练。该论文采用了一定的技巧，不需要人工标注即可实现近乎人工标注的效果。
+
+### 正式词典标注思路
+
+正式用语情感词典，通过年报公布后3个交易日累积正负收益率为标准，将年报标记为正负面情绪两类。
+
+### 非正式词典标注思路
+
+使用所有中国上市公司在雪球论坛和东方财富股吧内相关帖子，共8130万条。
+
+在网络股票论坛，用户发表自己的意见时，经常带有表情符号，从而使得帖子带有明显的情绪指标。 这种含有特殊指标的帖子，省去了人工标注文本情绪的工作。
+
+
+
+<br>
+
+具体构建词典的步骤，大家可以阅读论文原文。论文已经公开了中文情感词典，我已将其整理为4个txt文件
+
+- formal_pos.txt  正式用语**正面**情绪词典
+- formal_neg.txt  正式用语**负面**情绪词典
+- unformal_pos.txt  非正式用语**正面**情绪词典
+- unformal_neg.txt  非正式用语**负面**情绪词典
+
+<br>
+
+
+
+## 中文金融词典使用方法
+
+cnsenti实现了自定义词典功能，导入不同的txt词典文件，即可实现不同方面的情绪词统计。
+
+### 年报正式用语词典
+
+- dict/formal_pos.txt   正式用语**正面**情绪词典
+- dict/formal_neg.txt    正式用语**负面**情绪词典
+
+```python
+from cnsenti import Sentiment
+
+senti = Sentiment(pos='dict/formal_pos.txt',  #正面词典txt文件相对路径
+                  neg='dict/formal_neg.txt',  #负面词典txt文件相对路径
+                  merge=False,             #是否将cnsenti自带词典和用户导入的自定义词典融合
+                  encoding='utf-8')      #两txt均为utf-8编码
+
+test_text = '这家公司是行业的引领者，是中流砥柱。今年的业绩非常好。'
+result = senti.sentiment_count(test_text)
+
+print('sentiment_count',result)
+```
+
+Run
+
+```
+sentiment_count {'words': 16, 'sentences': 2, 'pos': 3, 'neg': 0}
+```
+
+
+
+
+
+<br>
+
+
+
+### 财经社交媒体非正式用语词典
+
+- dict/unformal_pos.txt   非正式用语**正面**情绪词典
+- dict/unformal_neg.txt    非正式用语**负面**情绪词典
+
+```python
+from cnsenti import Sentiment
+
+senti = Sentiment(pos='dict/unformal_pos.txt',  #正面词典txt文件相对路径
+                  neg='dict/unformal_neg.txt',  #负面词典txt文件相对路径
+                  merge=False,             #融合cnsenti自带词典和用户导入的自定义词典
+                  encoding='utf-8')      #两txt均为utf-8编码
+
+test_text = '这个股票前期走势承压，现在阴跌，散户只能割肉离场，这股票真垃圾'
+result = senti.sentiment_count(test_text)
+
+print('sentiment_count',result)
+```
+
+Run
+
+```
+sentiment_count {'words': 18, 'sentences': 1, 'pos': 0, 'neg': 2}
+```
+
+
+
+
+
+<br>
+
+## 说明
+
+读者如需使用本项目词典，请引用如下参考文献：
+
+> 姚加权，冯绪，王赞钧，纪荣嵘，张维. 语调、情绪及市场影响：基于金融情绪词典. 管理科学学报，2021. 24(5), 26-46.
+
+
+
+另外，Python暑期工作坊现在正在报名，内容涵盖Python语法、数据采集(网络爬虫)、文本数据清洗（文本分析）、机器学习等。20号开始直播。感兴趣的可以关注
+
+
+
--- a/static/common/config_data/fasttext.yaml
+++ b/static/common/config_data/fasttext.yaml
+data_process:
+  label_encode: true
+  random_state: 2021
+  test_file_path: test.txt
+  test_size: 0.1
+  tokenizer: jieba
+  train_file_path: train.txt
+  train_size: 0.8
+  use_dev: false
+  use_stopwords: true
+embedding:
+  name: null
+  embedding_path: null
+  tokenizer_path: null
+evaluate:
+  average: binary
+model:
+  autotuneDuration: 100
+  autotuneModelSize: 200M
+  model_name: fxxl_model.bin
+  model_path: null
+runner:
+  thres: null
--- a/static/common/config_data/flair.yaml
+++ b/static/common/config_data/flair.yaml
+data_process:
+  use_dev: False
+  train_size: 0.7
+  test_size: 0.2
+  random_state: 2021
+  label_encode: False
+embedding:
+  pretrained_name: bert-base-chinese
+  embedding_path: null
+  tokenizer_path: null
+model:
+  model_name: fxxl_model
+  model_path: null
+evaluate:
+  average: micro
+runner:
+  thres: null
\ No newline at end of file
--- a/static/common/config_data/kmeans.yaml
+++ b/static/common/config_data/kmeans.yaml
+data_process:
+  use_stopwords: True
+  tokenizer: PerceptronLexicalAnalyzer
+  random_state: 2021
+embedding:
+  use_Tencent: True
+  size: 100
+  window: 5
+  min_count: 5
+  workers: 5
+  sg: 0
+  iter: 20
+  norm: l2
+  use_idf: True
+  smooth_idf: True
+  with_feature_selection: False
+  embedding_path: voc/
+  tokenizer_path: null
+model:
+  model_path: null
+  model_name: kmeans_model.pkl
+  n_clusters: False
+  init: k-means++
+  n_init: 5
+  max_iter: 100
+evaluate:
+runner:
+  save_fname: results2.xlsx
--- a/static/common/config_data/logistic.yaml
+++ b/static/common/config_data/logistic.yaml
+data_process:
+  use_stopwords: True
+  use_dev: False
+  train_size: 0.8
+  test_size: 0.1
+  random_state: 2021
+embedding:
+  name: fxxl
+  title_weight: 5
+  title_feature_ratio: 0.1
+  content_feature_ratio: 0.2
+  tokenizer_path: null
+  embedding_path: vocab/
+model:
+  name: fxxl
+  r: 0.95
+  model_name: null
+  model_path: model/
+evaluate:
+  average: binary
+runner:
+  thres: 0.55
\ No newline at end of file
--- a/static/common/config_data/textcnn.yaml
+++ b/static/common/config_data/textcnn.yaml
+data_process:
+  use_stopwords: True
+  tokenizer: jieba
+  use_dev: False
+  train_size: 0.8
+  test_size: 0.1
+  random_state: 2021
+  min_content: 50
+embedding:
+  embedding_path: bert_CNNVectorize_data
+  tokenizer_path: tokenizer_data.pkl
+model:
+  input_shape: 3500
+  batch_size: 32
+  epochs: 2
+  shuffle: True
+  activation: relu
+  model_name: fxxl.h5
+  model_path: null
+evaluate:
+  average: binary
+runner:
+  thres: null
\ No newline at end of file
--- a/static/common/config_data/xgboost.yaml
+++ b/static/common/config_data/xgboost.yaml
+data_process:
+  use_stopwords: True
+  tokenizer: jieba
+  use_dev: False
+  train_size: 0.8
+  test_size: 0.1
+  random_state: 2021
+  min_content: 50
+embedding:
+  name: fxxl
+  transformer: tf
+  transformer_norm: l2
+  embedding_path: null
+  tokenizer_path: null
+model:
+  lr: 0.1
+  reg_alpha: 0
+  reg_lambda: 1
+  objective: binary:logitraw
+  with_sample_weight: True
+  subsample: 1
+  thres: 0.55
+  min_child_weight: 1
+  scale_pos_weight: 1
+  model_name: fxxl
+  model_path: null
+evaluate:
+  average: binary
+runner:
+  thres: null
\ No newline at end of file