自然语言平台版本V1.0

c4e5365c · ctt · c4e5365c · c4e5365c · c4e5365c · c4e5365c
--- a/README.md
+++ b/README.md
+nohup python manage.py runserver --noreload 0.0.0.0:7004 >> app.log 2>&1 &
--- a/base/__init__.py
+++ b/base/__init__.py
--- a/base/admin.py
+++ b/base/admin.py
+from django.contrib import admin
+# Register your models here.
--- a/base/apps.py
+++ b/base/apps.py
+from django.apps import AppConfig
+class BaseConfig(AppConfig):
+    name = 'base'
--- a/base/migrations/__init__.py
+++ b/base/migrations/__init__.py
--- a/base/models.py
+++ b/base/models.py
+from django.db import models
+from datetime import datetime
+# Create your models here.
+class User(models.Model):
+    username = models.CharField(max_length=30, unique=True)
+    true_name = models.CharField(max_length=30)
+    sex = models.CharField(max_length=2)
+    mobile_number = models.CharField(max_length=20)
+    mail = models.CharField(max_length=20)
+    id_card = models.CharField(max_length=20)
+    password = models.CharField(max_length=40)
+    account_number = models.CharField(max_length=20)
+    def toDict(self):
+        return {'id':self.id,
+                'username':self.username,
+                'true_name':self.true_name,
+                'sex':self.sex,
+                'mobile_number':self.mobile_number,
+                'mail':self.mail,
+                'id_card':self.id_card,
+                'password':self.password,
+                'account_number':self.account_number,
+                # 'update_at':self.update_at.strftime('%Y-%m-%d %H:%M:%S')
+                }
+    class Meta:
+        db_table = 'user'
+class ServiceManage(models.Model):
+    name = models.CharField(max_length=15)
+    username = models.CharField(max_length=30)
+    filenames = models.CharField(max_length=200)
+    create_date = models.DateTimeField(default=None)
+    end_date = models.DateTimeField(default=None)
+    state = models.CharField(max_length=10)
+    path = models.CharField(max_length=20)
+    def toDict(self):
+        return {'name': self.name,
+                'username': self.username,
+                'filenames': self.filenames,
+                'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
+                'end_date': self.end_date.strftime('%Y-%m-%d %H:%M:%S'),
+                'state': self.state,
+                'path': self.path,
+                }
+    class Meta:
+        db_table = 'service_manage'
+class SubjectManage(models.Model):
+    sid = models.CharField(max_length=10, unique=True)
+    name = models.CharField(max_length=30)
+    def toDict(self):
+        return {'sid': self.sid,
+                'name': self.name,
+                }
+    class Meta:
+        db_table = 'subject_manage'
+class ModelManage(models.Model):
+    task_name = models.CharField(max_length=30)
+    function_type = models.CharField(max_length=20)
+    model_type = models.CharField(max_length=20)
+    version_num = models.IntegerField()
+    create_date = models.DateTimeField(default=None)
+    def toDict(self):
+        return {'id': self.id,
+                'task_name': self.task_name,
+                'function_type': self.function_type,
+                'model_type': self.model_type,
+                'version_num': self.version_num,
+                'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
+                }
+    class Meta:
+        db_table = 'model_manage'
+class VersionManage(models.Model):
+    model = models.ForeignKey(ModelManage, related_name='version_model', on_delete=models.CASCADE)
+    version = models.CharField(max_length=20)
+    create_date = models.DateTimeField(default=None)
+    end_date = models.DateTimeField(default=None)
+    state = models.CharField(max_length=20)
+    creator = models.CharField(max_length=30)
+    path = models.CharField(max_length=20, unique=True)
+    def toDict(self):
+        return {'id': self.id,
+                'version': self.version,
+                'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
+                'end_date': self.end_date.strftime('%Y-%m-%d %H:%M:%S'),
+                'state': self.state,
+                'creator': self.creator,
+                'path': self.path,
+                }
+    class Meta:
+        db_table = 'version_manage'
\ No newline at end of file
--- a/base/tests.py
+++ b/base/tests.py
+from django.test import TestCase
+# Create your tests here.
--- a/base/urls.py
+++ b/base/urls.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/12 18:05
+# @Author  : 程婷婷
+# @FileName: urls.py
+# @Software: PyCharm
+from django.urls import path
+from base.views import views
+from django.conf.urls import url
+from base.views import views as base_views
+urlpatterns = [
+    url(r'^register-account', base_views.register_account, name='register_account'),
+    url(r'^verify-username', base_views.verify_username, name='verify_username'),
+    url(r'^login', base_views.login, name='login'),
+    url(r'^reset-password', base_views.reset_password, name='reset_password'),
+    url(r'^show-config-file', base_views.show_config_file, name='show_config_file'),
+    url(r'^show-service-file', base_views.show_service_file, name='show_service_file'),
+    url(r'^delete-file-row-manage', base_views.delete_file_row_manage, name='delete_file_row_manage'),
+    url(r'^delete-file-row-service', base_views.delete_file_row_service, name='delete_file_row_service'),
+    url(r'^file-upload', base_views.file_upload, name='file_upload'),
+    url(r'^show-log-file', base_views.show_log_file, name='show_log_file'),
+    url(r'^validate-code', base_views.validate_code, name='validate_code'),
+    url(r'^download-zip', base_views.download_zip, name='download_zip'),
+    url(r'^download-xlsx', base_views.download_xlsx, name='download_xlsx'),
+    url(r'^query-manage', base_views.query_manage, name='query_manage'),
+    url(r'^forget-password', base_views.forget_password, name='forget_password'),
+    url(r'^train', base_views.run_train, name='train'),
+    url(r'^query-service-manage', base_views.query_service_manage, name='query_service_manage'),
+    url(r'^query-subject', base_views.query_subject, name='query_subject'),
+    url(r'^query-version', base_views.query_version, name='query_version'),
+    url(r'^query-task-name', base_views.query_task_name, name='query_task_name')
+]
--- a/base/views/__init__.py
+++ b/base/views/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/13 11:51
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/base/views/config/BaseConfig.py
+++ b/base/views/config/BaseConfig.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/10 14:34
+# @Author  : 程婷婷
+# @FileName: BaseConfig.py
+# @Software: PyCharm
+import yaml
+class BaseConfig:
+    def __init__(self, config_path):
+        self._config_path = config_path
+        self._parsed_file = self.load_config()
+    def load_config(self):
+        print(self._config_path)
+        with open(self._config_path) as yaml_file:
+            parsed_file = yaml.load(yaml_file, Loader=yaml.FullLoader)
+        return parsed_file
+# if __name__ == '__main__':
+#     bc = BaseConfig()
+#     print(bc._parsed_file)
+#     print(bc.load_config()['data_path'])
+#     print(bc.load_config()['embedding'])
+#     print(bc.load_config()['model'])
--- a/base/views/config/__init__.py
+++ b/base/views/config/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/base/views/data/BaseDataLoader.py
+++ b/base/views/data/BaseDataLoader.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 9:58
+# @Author  : 程婷婷
+# @FileName: BaseDataLoader.py
+# @Software: PyCharm
+import pandas as pd
+from base.views.config.BaseConfig import BaseConfig
+class BaseDataLoader:
+    def __init__(self, config_path):
+        self.data_loader_config = BaseConfig(config_path)._parsed_file['data_loader']
+    def read_file(self):
+        symbol = self.data_loader_config['dataset_path'].split('.')[-1]
+        if (symbol == 'xlsx') or (symbol == 'xls'):
+            df = pd.read_excel(r''+self.data_loader_config['dataset_path'])
+        elif symbol == '.csv':
+            df = pd.read_csv(r''+self.data_loader_config['dataset_path'], sep='\t')
+        else:
+            print('数据类型错误')
+            return '数据类型错误'
+        df.drop_duplicates(subset='content', keep='first', inplace=True)
+        df.dropna(subset=['content', 'label'], inplace=True)
+        df = df.reset_index(drop=True)
+        print('=================执行正文去重和去空之后共有%d条数据=============' % len(df['content']))
+        return df
+    def read_stopwords(self):
+        # 读取停顿词列表
+        stopword_list = [k.strip() for k in open(self.data_loader_config['stopwords_path'], encoding='utf8').readlines() if
+                         k.strip() != '']
+        return stopword_list
--- a/base/views/data/BaseDataProcess.py
+++ b/base/views/data/BaseDataProcess.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/10 15:28
+# @Author  : 程婷婷
+# @FileName: BaseDataProcess.py
+# @Software: PyCharm
+import re
+import jieba
+import pickle
+import gensim
+import logging
+import numpy as np
+import pandas as pd
+from pyhanlp import *
+from bs4 import BeautifulSoup
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.feature_extraction.text import TfidfTransformer
+from sklearn.model_selection import train_test_split
+from sklearn.feature_selection import mutual_info_classif, SelectPercentile
+from base.views.config.BaseConfig import BaseConfig
+from base.views.data.BaseDataLoader import BaseDataLoader
+from platform_zzsn.settings import BASE_DIR
+format = '%(asctime)s %(levelname)s %(pathname)s %(funcName)s %(message)s'
+logging.basicConfig(format=format, level=logging.INFO)
+class BaseDataProcess:
+    def __init__(self, config_path):
+        self.embedding_config = BaseConfig(config_path)._parsed_file['embedding']
+        self.process_config = BaseConfig(config_path)._parsed_file['data_process']
+        PerceptronLexicalAnalyzer = JClass('com.hankcs.hanlp.model.perceptron.PerceptronLexicalAnalyzer')
+        self.pla_segment = PerceptronLexicalAnalyzer()
+        self.bdl = BaseDataLoader(config_path)
+    def clean_content(self, content):
+        bs = BeautifulSoup(content, 'html.parser')
+        return bs.text
+    def remove_char(self, content):
+        # 保留中文、英语字母、数字和标点
+        graph_filter = re.compile(r'[^\u4e00-\u9fa5a-zA-Z0-9\s，。\.,？\?!！；;]')
+        content = graph_filter.sub('', content)
+        return content
+    def jieba_tokenizer(self, content):
+        if self.process_config['use_stopwords']:
+            stopwords = self.bdl.read_stopwords()
+        else:
+            stopwords = []
+        return ' '.join([word for word in jieba.lcut(content) if word not in stopwords])
+    def pla_tokenizer(self, content):
+        words = list(self.pla_segment.analyze(content).toWordArray())
+        if self.process_config['use_stopwords']:
+            stopwords = self.bdl.read_stopwords()
+        else:
+            stopwords = []
+        return ' '.join([word for word in words if word not in stopwords])
+    def save(self, voc, path):
+        with open(path, 'wb') as voc_file:
+            pickle.dump(voc, voc_file)
+    def process(self, data, min_content=0):
+        processed_data = []
+        for record in data:
+            record = self.clean_content(str(record))
+            record = self.remove_char(record)
+            if len(record) > min_content:
+                methods = self.process_config['tokenizer']
+                if methods == 'PerceptronLexicalAnalyzer':
+                    record = self.pla_tokenizer(record)
+                    record = [row.strip() for row in record if row.strip() != '']
+                else:
+                    record = self.jieba_tokenizer(record)
+                    record = [row.strip() for row in record if row.strip() != '']
+                processed_data.append(' '.join(record))
+            else:
+                pass
+        return processed_data
+    def split_dataset(self, data, use_dev):
+        if use_dev:
+            train_data_set, test_dev_set = train_test_split(data,
+                                                            train_size=self.process_config['train_size'],
+                                                            random_state=self.process_config['random_state'],
+                                                            shuffle=True)
+            train_data_set, test_data_set, dev_data_set = train_test_split(test_dev_set,
+                                                                           test_size=self.process_config['test_size'],
+                                                                           random_state=self.process_config['random_state'],
+                                                                           shuffle=True)
+            print(len(train_data_set) + len(test_data_set) + len(dev_data_set))
+            return train_data_set, test_data_set, dev_data_set
+        else:
+            train_data_set, test_data_set = train_test_split(data,
+                                                             train_size=self.process_config['train_size'],
+                                                             random_state=self.process_config['random_state'],
+                                                             shuffle=True)
+            return train_data_set, test_data_set
+    def bag_of_words(self, data, label):
+        vectorizer = CountVectorizer(ngram_range=(1, 1), min_df=5)
+        x = vectorizer.fit_transform(data)
+        transformer = TfidfTransformer(norm=self.embedding_config['norm'], use_idf=self.embedding_config['use_idf'],
+                                       smooth_idf=self.embedding_config['smooth_idf'])
+        x = transformer.fit_transform(x).toarray()
+        if self.embedding_config['with_feature_selection']:
+            transformed_data = SelectPercentile(mutual_info_classif, 20).fit_transform(x, label)
+        else:
+            transformed_data = x
+        os.makedirs(self.embedding_config['embedding_path'], exist_ok=True)
+        self.save(voc=vectorizer.vocabulary_, path=os.path.join(self.embedding_config['embedding_path'], 'tfidf.pkl'))
+        return transformed_data, vectorizer.get_feature_names()
+    def word2vec(self, data, feature_words):
+        model = gensim.models.word2vec.Word2Vec(sentences=data,
+                                                size=self.embedding_config['size'],
+                                                window=self.embedding_config['window'],
+                                                min_count=self.embedding_config['min_count'],
+                                                workers=self.embedding_config['workers'],
+                                                sg=self.embedding_config['sg'],
+                                                iter=self.embedding_config['iter'])
+        vocabulary_w2v = model.wv.vocab.keys()
+        count = 0
+        if self.embedding_config['use_Tencent']:
+            model_tencent = gensim.models.KeyedVectors.load_word2vec_format(
+                os.path.join(BASE_DIR, 'static/base/Tencent_AILab_ChineseEmbedding.bin'), binary=True)
+            vocabulary_tencent = model_tencent.wv.vocab.keys()
+            vector_matrix = np.zeros((len(feature_words), int(self.embedding_config['size']) + 200))
+            for word in feature_words:
+                if word in vocabulary_tencent:
+                    vector_tencent = model_tencent.wv.word_vec(word)
+                else:
+                    vector_tencent = np.random.randn(200)
+                if word in vocabulary_w2v:
+                    vector_w2v = model.wv.word_vec(word)
+                else:
+                    vector_w2v = np.random.randn(self.embedding_config['size'])
+                vector = np.concatenate((vector_tencent, vector_w2v))
+                vector_matrix[count] = vector
+                count += 1
+        else:
+            vector_matrix = np.zeros((len(feature_words), self.embedding_config['size']))
+            for word in feature_words:
+                if word in vocabulary_w2v:
+                    vector_w2v = model.wv.word_vec(word)
+                else:
+                    vector_w2v = np.random.randn(self.embedding_config['size'])
+                vector_matrix[count] = vector_w2v
+                count += 1
+        os.makedirs(self.embedding_config['embedding_path'], exist_ok=True)
+        model.save(os.path.join(self.embedding_config['embedding_path'], 'word2vec.model'))
+        return vector_matrix
--- a/base/views/data/__init__.py
+++ b/base/views/data/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/base/views/evaluator/BaseEvaluator.py
+++ b/base/views/evaluator/BaseEvaluator.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 16:30
+# @Author  : 程婷婷
+# @FileName: BaseEvaluator.py
+# @Software: PyCharm
+from sklearn.metrics import precision_score, f1_score, recall_score, classification_report
+import logging
+from base.views.config.BaseConfig import BaseConfig
+formats = '%(asctime)s %(levelname)s %(pathname)s %(funcName)s %(message)s'
+logging.basicConfig(format=formats, level=logging.INFO)
+class BaseEvaluator:
+    def __init__(self, config_path):
+        self.evaluate_config = BaseConfig(config_path)._parsed_file['evaluate']
+    def evaluate(self, y_true, y_pred, label_mapping, logger):
+        result = []
+        y_true = list(map(str, y_true))
+        y_pred = list(map(str, y_pred))
+        logger.info('模型评估结果如下：')
+        if not label_mapping:
+            result.append(classification_report(y_true, y_pred))
+            logger.info(classification_report(y_true, y_pred))
+        else:
+            for value in label_mapping.values():
+                print([k for k,v in label_mapping.items() if v == value])
+                p = precision_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
+                r = recall_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
+                f1 = f1_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
+                print({'value': value,'召回率为': r, '精确率为': p, 'F1': f1})
+                logger.info('标签为%s' % [k for k,v in label_mapping.items() if v == value][0])
+                logger.info('精确率为%.2f' %p)
+                logger.info('召回率为%.2f' %r)
+                logger.info('精确率为%.2f' %f1)
+                result.append(str({'label': value,'recall': r, 'precision': p, 'F1': f1}))
+        return ' '.join(result)
+# y_true = [0, 1, 2, 0, 1, 2]
+# y_pred = [0, 2, 1, 0, 0, 1]
+# print(BaseEvaluator())
--- a/base/views/evaluator/__init__.py
+++ b/base/views/evaluator/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/base/views/interaction.py
+++ b/base/views/interaction.py
+import os
+import yaml
+import random
+import smtplib
+from email.mime.text import MIMEText
+from django.core.paginator import Paginator
+from email.mime.multipart import MIMEMultipart
+from PIL import Image,ImageFont,ImageDraw,ImageFilter
+from base.models import ModelManage, ServiceManage, VersionManage
+from platform_zzsn.settings import BASE_DIR
+class Picture:
+    def __init__(self):
+        self.size = (240,60)
+        self.mode='RGB'
+        self.color='white'
+        self.font = ImageFont.truetype(os.path.join(BASE_DIR,
+                                                    'static/common/font/arial.ttf'), 36) #设置字体大小
+    def randChar(self):
+        basic='23456789abcdefghijklmnpqrstwxyzABCDEFGHIJKLMNPQRSTWXYZ'
+        return basic[random.randint(0,len(basic)-1)] #随机字符
+    def randBdColor(self):
+        return (random.randint(64,255),random.randint(64,255),random.randint(64,255)) #背景
+    def randTextColor(self):
+        return (random.randint(32, 127), random.randint(32, 127), random.randint(32, 127)) #随机颜色
+    def proPicture(self):
+        new_image=Image.new(self.mode,self.size,self.color) #创建新图像有三个默认参数:尺寸,颜色,模式
+        drawObject=ImageDraw.Draw(new_image) #创建一个可以对image操作的对象
+        line_num = random.randint(4,6) # 干扰线条数
+        for i in range(line_num):
+            #size=(240,60)
+            begin = (random.randint(0, self.size[0]), random.randint(0, self.size[1]))
+            end = (random.randint(0, self.size[0]), random.randint(0, self.size[1]))
+            drawObject.line([begin, end], self.randTextColor())
+        for x in range(240):
+            for y in range(60):
+                tmp = random.randint(0,50)
+                if tmp>30: #调整干扰点数量
+                    drawObject.point((x,y),self.randBdColor())
+        randchar=''
+        for i in range(5):
+            rand=self.randChar()
+            randchar+=rand
+            drawObject.text([50*i+10,10],rand,self.randTextColor(),font=self.font) #写入字符
+        new_image = new_image.filter(ImageFilter.SHARPEN) # 滤镜
+        return new_image,randchar
+def update_config_file(config_path, config_file):
+    data = yaml.load(config_file, Loader=yaml.FullLoader)
+    data['data_loader'] = {}
+    model_path = data['model']['model_path']
+    model_name = data['model']['model_name']
+    if data['model']['model_path']:
+        data['model']['model_path'] = os.path.join(config_path, model_path)
+    else:
+        data['model']['model_path'] = os.path.join(config_path, model_name)
+        print(data['model']['model_path'])
+    embedding_path = data['embedding']['embedding_path']
+    if embedding_path:
+        data['embedding']['embedding_path'] = os.path.join(config_path, data['embedding']['embedding_path'])
+    else:
+        if data['embedding']['name']:
+            data['embedding']['embedding_path'] = os.path.join(config_path, data['embedding']['name'])
+    tokenizer_path = data['embedding']['tokenizer_path']
+    if tokenizer_path:
+        data['embedding']['tokenizer_path'] = os.path.join(config_path, data['embedding']['tokenizer_path'])
+    try:
+        test_file_path = data['data_process']['test_file_path']
+        train_file_path = data['data_process']['train_file_path']
+    except KeyError:
+        pass
+    else:
+        data['data_process']['test_file_path'] = os.path.join(config_path, test_file_path)
+        data['data_process']['train_file_path'] = os.path.join(config_path, train_file_path)
+    for file in os.listdir(config_path):
+        if ('.xls' == file[-4:]) or ('.xlsx' == file[-5:]):
+            xlsx_path = os.path.join(config_path, file)
+            data['data_loader']['dataset_path'] = xlsx_path
+    if 'save_fname' in data['runner'].keys():
+        data['runner']['save_fpath'] = os.path.join(config_path, data['runner']['save_fname'])
+    data['data_loader']['stopwords_path'] = os.path.join(BASE_DIR, 'static/base/baidu_stopwords.txt')
+    file_path = os.path.join(config_path, 'config.yaml')
+    with open(file_path, 'w') as yaml_file:
+        yaml.safe_dump(data, yaml_file, default_flow_style=False)
+    return file_path
+def select_manage(task_name, function_type, model_type, begin_cdate, end_cdate, page_size, current_page):
+    condition = {'task_name': task_name, 'function_type': function_type, 'model_type': model_type,
+                 'create_date__range': (begin_cdate, end_cdate,)
+                 }
+    del_keys = []
+    for key in condition.keys():
+        if not condition[key]:
+            del_keys.append(key)
+    if not condition['create_date__range'][0]:
+        del_keys.append('create_date__range')
+    for key in del_keys:
+        condition.pop(key)
+    managers = ModelManage.objects.filter(**condition).order_by('-create_date')
+    len_managers = len(managers)
+    page = Paginator(managers, page_size)
+    maxpages = page.num_pages  # 最大页数
+    pIndex = int(current_page)
+    # 判断页数是否越界
+    if pIndex > maxpages:
+        pIndex = maxpages
+    manager_list = page.page(pIndex)  # 当前页数据
+    return list(manager_list), len_managers
+def select_version(model_id, begin_cdate, end_cdate, page_size, current_page):
+    condition = {'model_id': model_id,
+                 'create_date__range': (begin_cdate, end_cdate,)
+                 }
+    del_keys = []
+    if not condition['create_date__range'][0]:
+        del_keys.append('create_date__range')
+    for key in del_keys:
+        condition.pop(key)
+    versions = VersionManage.objects.filter(**condition).order_by('-create_date')
+    len_versions = len(versions)
+    page = Paginator(versions, page_size)
+    maxpages = page.num_pages  # 最大页数
+    pIndex = int(current_page)
+    # 判断页数是否越界
+    if pIndex > maxpages:
+        pIndex = maxpages
+    version_list = page.page(pIndex)  # 当前页数据
+    return list(version_list), len_versions
+def select_service_manage(name, begin_cdate, end_cdate, state, username, page_size, current_page):
+    condition = {
+        'name': name,
+        'state': state,
+        'create_date__range': (begin_cdate, end_cdate),
+        'username': username,
+                 }
+    del_keys = []
+    for key in condition.keys():
+        if not condition[key]:
+            del_keys.append(key)
+    if not condition['create_date__range'][0]:
+        del_keys.append('create_date__range')
+    for key in del_keys:
+        condition.pop(key)
+    print(condition)
+    service_managers = ServiceManage.objects.filter(**condition).order_by('-create_date')
+    len_service_managers = len(service_managers)
+    page = Paginator(service_managers, page_size)
+    maxpages = page.num_pages
+    pIndex = int(current_page)
+    # 判断页数是否越界
+    if pIndex > maxpages:
+        pIndex = maxpages
+    manager_list = page.page(pIndex)  # 当前页数据
+    return list(manager_list), len_service_managers
+def sendMail(user,pwd,sender,receiver,msg_title):
+    mail_host = "smtp.163.com"   #163的SMTP服务器
+    message = MIMEMultipart('alternative')
+    #设置邮件的发送者
+    message["From"] = sender
+    #设置邮件的接收方
+    message["To"] = ",".join(receiver)
+    #4.设置邮件的标题
+    message["Subject"] = msg_title
+    # 添加plain格式的文本
+    # message.attach(MIMEText('您好，\n'
+    #                         '   您当前的密码为%s, 为了保证您的账号安全，请尽快登陆重置您的密码'%msg_content, 'plain', 'utf-8'))
+    # 添加html内容
+    message.attach(MIMEText('<html>'
+                                '<body>'
+                                    '<h1>Hello </h1><br> '
+                                    '<h3>To ensure the security of your account, please log in and reset your password as soon as possible.</h3>'
+                                    '<h2><a href="http://192.168.1.149:8020/reset_password/">点此重置</a></h2>'
+                                    '</body>'
+                            '</html>', 'html', 'utf-8'))
+    #1.启用服务器发送邮件
+    smtpObj = smtplib.SMTP_SSL(mail_host,465)
+    #2.登录邮箱进行验证
+    smtpObj.login(user,pwd)
+    #3.发送邮件
+    #参数：发送方，接收方，邮件信息
+    smtpObj.sendmail(sender,receiver,message.as_string())
+    return True
--- a/base/views/loss/BaseLoss.py
+++ b/base/views/loss/BaseLoss.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 16:29
+# @Author  : 程婷婷
+# @FileName: BaseLoss.py
+# @Software: PyCharm
--- a/base/views/loss/__init__.py
+++ b/base/views/loss/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/base/views/model/BaseModel.py
+++ b/base/views/model/BaseModel.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 16:18
+# @Author  : 程婷婷
+# @FileName: BaseModel.py
+# @Software: PyCharm
+from base.views.config.BaseConfig import BaseConfig
+import os
+import pickle
+class BaseModel:
+    def __init__(self,config_path):
+        self.model_config = BaseConfig(config_path)._parsed_file['model']
+    def building_model(self, *params):
+        pass
+    def save(self, model):
+        dir = os.path.dirname(self.model_config['model_path'])
+        if not os.path.exists(dir):
+            os.makedirs(dir)
+        with open(self.model_config['model_path'], 'wb') as model_file:
+            pickle.dump(model, model_file)
+    def predict(self, model, X):
+        proba = model.predict_proba(X)
+        y_predict = model.predict(X)
+        return {'proba': proba, 'y_predict': y_predict}
\ No newline at end of file
--- a/base/views/model/__init__.py
+++ b/base/views/model/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/base/views/runner/BaseRunner.py
+++ b/base/views/runner/BaseRunner.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 16:36
+# @Author  : 程婷婷
+# @FileName: BaseRunner.py
+# @Software: PyCharm
+from base.views.config.BaseConfig import BaseConfig
+class BaseRunner:
+    def __init__(self,config_path):
+        self.runner_config = BaseConfig(config_path)._parsed_file['runner']
+    def train(self, logger):
+        pass
--- a/base/views/runner/__init__.py
+++ b/base/views/runner/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/base/views/runner/test.py
+++ b/base/views/runner/test.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 9:24
+# @Author  : 程婷婷
+# @FileName: test.py
+# @Software: PyCharm
+import jieba
+import re
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.datasets import load_digits
+from sklearn.feature_selection import SelectPercentile, chi2
+X, y = load_digits(return_X_y=True)
+print(X.shape)
+print(X[:10], y[:100])
+X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)
+print(X_new.shape)
+print(X_new[:10])
--- a/base/views/token_authorize.py
+++ b/base/views/token_authorize.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/20 16:58
+# @Author  : 程婷婷
+# @FileName: token_authorize.py
+# @Software: PyCharm
+import jwt
+import time
+import functools
+from jwt import exceptions
+from django.http import JsonResponse
+from platform_zzsn.settings import *
+global SECRET_KEY
+SECRET_KEY = SECRET_KEY
+# 定义签名密钥，用于校验jwt的有效、合法性
+def create_token(user):
+    '''基于jwt创建token的函数'''
+    headers = {
+        "alg": "HS256",
+        "typ": "JWT"
+    }
+    exp = int(time.time() + 3*60*60)
+    payload = {
+        "id": user.id,
+        "name": user.username,
+        "exp": exp
+    }
+    token = jwt.encode(payload=payload, key=SECRET_KEY, algorithm='HS256', headers=headers).decode('utf-8')
+    return token
+def login_required(view_func):
+    @functools.wraps(view_func)
+    def validate_token(request, *args, **kwargs):
+        '''校验token的函数，校验通过则返回解码信息'''
+        payload = None
+        msg = None
+        try:
+            token = request.META.get("HTTP_AUTHORIZATION")
+            payload = jwt.decode(token, SECRET_KEY, True, algorithm='HS256')
+            print(payload)
+            return view_func(request, *args, **kwargs)
+            # jwt有效、合法性校验
+        except exceptions.ExpiredSignatureError:
+            return JsonResponse({
+                'handle_msg': 'failure',
+                'is_handle_success': False,
+                'logs': '登录已过期'
+            })
+        except jwt.DecodeError:
+            return JsonResponse({
+                'handle_msg': 'failure',
+                'is_handle_success': False,
+                'logs': '缺少参数token'
+            #     token认证失败
+            })
+        except jwt.InvalidTokenError:
+            return JsonResponse({
+                'handle_msg': 'failure',
+                'is_handle_success': False,
+                'logs': '缺少参数token'
+            #     非法的token
+            })
+    return validate_token
\ No newline at end of file
--- a/base/views/utils.py
+++ b/base/views/utils.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/9 11:19
+# @Author  : 程婷婷
+# @FileName: utils.py
+# @Software: PyCharm
+import os
+import re
+import jieba
+import zipfile
+import pandas as pd
+from docx import Document
+from platform_zzsn.settings import *
+def read_txt(path):
+    with open(path, 'r', encoding='utf8') as file:
+        lines = file.readlines()
+    return lines
+def read_docx(pending_file, user_file):
+    jieba.load_userdict(user_file)
+    document = Document(pending_file)
+    doc_text_list = []
+    for para in document.paragraphs:
+        para_text = re.sub(r'\s', '', para.text)
+        if para_text:
+            doc_text_list.append(para_text)
+    return doc_text_list
+def read_excel(pending_file, user_file):
+    jieba.load_userdict(user_file)
+    doc_text_list = pd.read_excel(pending_file)['content']
+    doc_text_list.dropna(inplace=True)
+    return doc_text_list
+def merge_para(paras):
+    new_paras = []
+    for i, para in enumerate(paras):
+        if not new_paras:
+            new_paras.append(para)
+        elif (len(new_paras[-1]) < 500):
+            new_paras[-1] += para
+        else:
+            new_paras.append(para)
+    return new_paras
+def filter_stopwords(para):
+    path = os.path.join(BASE_DIR, 'static/base/baidu_stopwords.txt')
+    stopword_list = [k.strip() for k in read_txt(path) if
+                     k.strip() != '']
+    words = [word for word in jieba.lcut(para) if word not in stopword_list]
+    return words
+# 获取列表的第二个元素
+def takeSecond(elem):
+    return elem[1]
+def takeFirst_len(elem):
+    return len(elem[0])
+def make_zip(file_dir: str, zip_path: str) -> None:
+    zip_f = zipfile.ZipFile(zip_path, 'w')
+    pre_len = len(os.path.dirname(file_dir))
+    for parent, dir_names, filenames in os.walk(file_dir):
+        for filename in filenames:
+            path_file = os.path.join(parent, filename)
+            arc_name = path_file[pre_len:].strip(os.path.sep)
+            zip_f.write(path_file, arc_name)
+    zip_f.close()
--- a/base/views/views.py
+++ b/base/views/views.py
--- a/basic_service/__init__.py
+++ b/basic_service/__init__.py
--- a/basic_service/admin.py
+++ b/basic_service/admin.py
+from django.contrib import admin
+# Register your models here.
--- a/basic_service/apps.py
+++ b/basic_service/apps.py
+from django.apps import AppConfig
+class BasicServiceConfig(AppConfig):
+    name = 'basic_service'
--- a/basic_service/migrations/__init__.py
+++ b/basic_service/migrations/__init__.py
--- a/basic_service/models.py
+++ b/basic_service/models.py
+from django.db import models
+# Create your models here.
--- a/basic_service/tests.py
+++ b/basic_service/tests.py
+#-*- coding:utf-8 -*-
+from django.test import TestCase
+# Create your tests here.
--- a/basic_service/urls.py
+++ b/basic_service/urls.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/12 18:05
+# @Author  : 程婷婷
+# @FileName: urls.py
+# @Software: PyCharm
+from django.urls import path
+from django.conf.urls import url
+from basic_service.views import views
+urlpatterns = [
+    url(r'^ner_single', views.ner_single, name='ner_single'),
+    url(r'^doc-similarity-single', views.doc_similarity_single, name='doc_similarity_single'),
+    url(r'^associated-word-single', views.associated_word_single, name='associated_word_single'),
+    url(r'^word_cut', views.word_cut, name='word_cut'),
+    url(r'^word_pos', views.word_pos, name='word_pos'),
+    url(r'^new_word_find', views.new_word_find, name='new_word_find'),
+    url(r'^show_srl', views.show_srl, name='show_srl'),
+    url(r'^show_dep', views.show_dep, name='show_dep'),
+    url(r'^create_keywords', views.create_keywords, name='create_keywords'),
+    url(r'^get_summary', views.get_summary, name='get_summary'),
+    url(r'^word_co_occurrence', views.word_co_occurrence, name='word_co_occurrence')
+]
\ No newline at end of file
--- a/basic_service/views/__init__.py
+++ b/basic_service/views/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/13 10:02
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/basic_service/views/basic.py
+++ b/basic_service/views/basic.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/12 19:54
+# @Author  : 程婷婷
+# @FileName: basic.py
+# @Software: PyCharm
+import os
+import jieba
+import json
+import requests
+import jionlp as jio
+from ltp import LTP
+import jieba.analyse
+import ahocorasick
+import pandas as pd
+from gensim.models.keyedvectors import KeyedVectors
+from platform_zzsn.settings import BASE_DIR
+from model.base.views import utils
+General_dict = utils.read_txt(os.path.join(BASE_DIR, 'static/base/dict_sogou.txt'))
+General_dict_ = ''
+for key in General_dict:
+    General_dict_ += ' ' + str(key.strip())
+def word_cut(text):
+    ltp = LTP()
+    sentences = ltp.sent_split([text])
+    segment, _ = ltp.seg(sentences)
+    return segment
+def word_pos(text):
+    ltp = LTP()
+    sentences = ltp.sent_split([text])
+    segment, hidden = ltp.seg(sentences)
+    pos = ltp.pos(hidden)
+    return segment, pos
+class AC_Unicode:
+    """稍微封装一下，弄个支持unicode的AC自动机
+    """
+    def __init__(self):
+        self.ac = ahocorasick.Automaton()
+    def add_word(self, k, v):
+        # k = k.encode('utf-8')
+        return self.ac.add_word(k, v)
+    def make_automaton(self):
+        return self.ac.make_automaton()
+    def iter(self, s):
+        # 搜索文本中存在的单词
+        # s = s.encode('utf-8')
+        return self.ac.iter(s)
+def new_words_find(text):
+    words = list(jieba.cut(text, HMM=True))
+    words_copy = words.copy()
+    ac = AC_Unicode()
+    sign = [0] * len(words_copy)
+    for word in words:
+        if len(word) >= 2:
+            ac.add_word(word, word)
+    ac.make_automaton()
+    result_ac = ac.iter(General_dict_)
+    for index, key in result_ac:
+        try:
+            words.remove(key)
+        except:
+            continue
+    for index, word in enumerate(words_copy):
+        if (len(word) >= 2) and (word in words):
+            sign[index] = 1
+    return words_copy, sign
+def show_srl(text):
+    ltp = LTP()
+    sentences = ltp.sent_split([text])
+    sentences_srl_dict, sentences_seg_dict = {}, {}
+    for i, sentence in enumerate(sentences):
+        seg, hidden = ltp.seg([sentence])
+        srl = ltp.srl(hidden, keep_empty=False)
+        sentences_seg_dict['句子' + str(i+1)+'：'+str(sentence)] = seg[0]
+        sentences_srl_dict['句子'+str(i+1)+'：'+str(sentence)] = srl[0]
+    return sentences_seg_dict, sentences_srl_dict
+def show_dep(text):
+    ltp = LTP()
+    sentences = ltp.sent_split([text])
+    sentences_dep_dict, sentences_seg_dict = {}, {}
+    for i, sentence in enumerate(sentences):
+        seg, hidden = ltp.seg([sentence])
+        dep = ltp.dep(hidden)
+        sentences_seg_dict['句子'+str(i+1)+'：'+str(sentence)] = seg[0]
+        sentences_dep_dict['句子'+str(i+1)+'：'+str(sentence)] = dep[0]
+    return sentences_seg_dict, sentences_dep_dict
+def create_keywords(text:str, topK:int, with_weight:bool)->list:
+    print(type(topK))
+    keywords = jio.keyphrase.extract_keyphrase(text, top_k=topK, with_weight=with_weight)
+    print(keywords)
+    return keywords
+def ner(text):
+    ltp = LTP()
+    seg, hidden = ltp.seg([text])
+    entity = ltp.ner(hidden)
+    return seg[0], entity[0]
+def related_word_recommendation(words, word_num):
+    # print(model.wv.most_similar(words))
+    # print(words.split(','), word_num)
+    print(words)
+    result = model.most_similar_cosmul(words.split(','), topn=int(word_num))  # 余弦相似度
+    print(result)
+    return result
+def post_similarity(url, text_1, text_2, sim_algorithm_name):
+    payload = {'text_1': text_1, 'text_2': text_2, 'sim_algorithm_name': sim_algorithm_name}
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    response = requests.request('POST', url, headers=headers, data=json.dumps(payload))
+    data = json.loads(response.text)
+    return data
+def summary(text, summary_length):
+    summaries = jio.summary.extract_summary(text, summary_length)
+    return summaries
+# zh_nlp = stanza.Pipeline('zh-hans')
+# en_nlp = stanza.Pipeline('en')
+# nlp_dict = {'zh': zh_nlp, 'en': en_nlp}
+#model = KeyedVectors.load_word2vec_format(os.path.join(BASE_DIR, 'static/base/Tencent_AILab_ChineseEmbedding.bin'), binary=True)
+# if __name__ == '__main__':
+    # print(word_cut('汤姆生病了。他去了医院。'))
+    # print(word_pos('汤姆生病了。他去了医院。'))
+    # print(new_words_find('白月光，形容的是一种可望不可即的人或者事物，虽然一直在心上，却从不在身边。'))
+    # print(new_words_find('爷青回，表示爷的青春又回来了，爷表示的是自己，将自己的身份地位抬高一个档次，像我是你大爷一样，通常用来形容那些知名的人、经典的动画、影视、游戏剧等重新复出或者是回归。'))
+    # show_srl('他叫汤姆去拿外衣。')
+    # print(show_dep('他叫汤姆去拿外衣。'))
--- a/basic_service/views/co_occurrence.py
+++ b/basic_service/views/co_occurrence.py
+# -*- coding: utf-8 -*-
+# @Time : 2021/10/13 17:07
+# @Author : ctt
+# @File : co
+# @Project : platform_zzsn
+from basic_service.views.basic import create_keywords
+import pandas as pd
+import numpy as np
+def Get_file_keywords(filepath, topK):
+    data_array = []  # 每篇文章关键词的二维数组
+    set_word = []  # 所有关键词的集合
+    df = pd.read_excel(filepath)
+    sentences = df['内容'].tolist()
+    for sentence in sentences:
+        words = create_keywords(sentence, topK=topK, with_weight=False)
+        data_array.append(str(words))
+        for word in words:
+            if word not in set_word:
+                set_word.append(str(word))
+    set_word = list(set(set_word))  # 所有关键词的集合
+    return data_array, set_word
+# 初始化矩阵
+def build_matirx(set_word):
+    edge = len(set_word) + 1  # 建立矩阵，矩阵的高度和宽度为关键词集合的长度+1
+    matrix = [[''] * edge] * edge # 初始化矩阵
+    # print(matrix.shape)
+    print(matrix)
+    print(set_word)
+    matrix[0][1:] = np.array(set_word)
+    print(matrix)
+    matrix = list(map(list, zip(*matrix)))
+    print(set_word)
+    matrix[0][1:] = np.array(set_word)  # 赋值矩阵的第一行与第一列
+    return matrix
+# 计算各个关键词的共现次数
+def count_matrix(matrix, formated_data):
+    for row in range(1, len(matrix)):
+        # 遍历矩阵第一行，跳过下标为0的元素
+        for col in range(1, len(matrix)):
+            # 遍历矩阵第一列，跳过下标为0的元素
+            # 实际上就是为了跳过matrix中下标为[0][0]的元素，因为[0][0]为空，不为关键词
+            if matrix[0][row] == matrix[col][0]:
+                # 如果取出的行关键词和取出的列关键词相同，则其对应的共现次数为0，即矩阵对角线为0
+                matrix[col][row] = str(0)
+            else:
+                counter = 0  # 初始化计数器
+                for ech in formated_data:
+                    # 遍历格式化后的原始数据，让取出的行关键词和取出的列关键词进行组合，
+                    # 再放到每条原始数据中查询
+                    if matrix[0][row] in ech and matrix[col][0] in ech:
+                        counter += 1
+                    else:
+                        continue
+                matrix[col][row] = str(counter)
+    return matrix
+def main(filepath, topK):
+    formated_data, set_word = Get_file_keywords(filepath, topK)
+    matrix = build_matirx(set_word)
+    matrix = count_matrix(matrix, formated_data)
+    # data = pd.DataFrame(matrix)
+    return matrix
\ No newline at end of file
--- a/basic_service/views/new_word_recognition.py
+++ b/basic_service/views/new_word_recognition.py
+import re
+import pandas as pd
+from collections import defaultdict, Counter
+import numpy as np
+import ahocorasick
+import math
+def read_text(file_articles, encoding='utf8'):
+    texts = set()
+    with open(file_articles, encoding=encoding) as f:
+        for line in f.readlines():
+            line = re.split(u'[^\u4e00-\u9fa50-9a-zA-Z]+', line)
+            for s in line:
+                if len(s) > 1:
+                    texts.add(s)
+    print('文章数(即文本行数)：{}'.format(len(texts)))
+    return texts
+def get_ngrams_counts(texts, n, min_count):
+    '''
+    返回ngrams出现的频数
+    :param n: gram个数
+    :param min_count: 最小出现次数，小于该值抛弃
+    :return:
+    '''
+    ngrams = defaultdict(int)
+    for t in list(texts):
+        for i in range(len(t)):
+            for j in range(1, n+1):
+                if i+j <= len(t):
+                    ngrams[t[i:i+j]] += 1
+    ngrams = {i:j for i,j in ngrams.items() if j >= min_count}
+    total = 1.*sum([j for i,j in ngrams.items() if len(i) == 1])
+    print('字数：{}'.format(total))
+    return ngrams, total
+def filter_with_porba(s, min_proba, total, ngrams):
+    '''
+    统计凝固度，并根据阈值抛弃一定数量的词
+    :param s:
+    :param min_proba:
+    :return:
+    '''
+    if len(s) >= 2:
+        score = min([total*ngrams[s]/(ngrams[s[:i+1]]*ngrams[s[i+1:]]) for i in range(len(s)-1)])
+        if score > min_proba[len(s)]:
+            return True
+    else:
+        return False
+def cut(s, n, ngrams):
+    '''
+    使用ngrams切分文本：采取宁愿不切，也不切错的原则
+    :param s: 一段文本
+    :param ngrams: 筛选过后的gram集合
+    :return:
+    '''
+    # 统计文本每个长度大于2的子串在G中出现的次数
+    r = np.array([0]*(len(s)-1))   # 大于2的片段频数统计
+    for i in range(len(s)-1):
+        for j in range(2, n+1):
+            if s[i:i+j] in ngrams:
+                r[i:i+j-1] += 1
+    # 切分方法：只要有一个子串在G中，就不切分。只有当r中的统计次数为0时才切分一次。
+    w = [s[0]]
+    for i in range(1, len(s)):
+        if r[i-1] > 0:
+            w[-1] += s[i]
+        else:
+            w.append(s[i])
+    return w
+def is_real(s, n, ngrams):
+    if len(s) >= 4:
+        for i in range(4, n+1):
+            for j in range(len(s)-i+1):
+                if s[j:j+i] not in ngrams:
+                    return False
+        return True
+    else:
+        return True
+def cal_entropy(dict_gram,key):
+    '''
+    计算gram的边界熵，分别计算左边界和右边界
+    :param dict_gram:
+    :param key:
+    :return:
+    '''
+    left = dict_gram['left']
+    if len(set(left)) ==1 and left[0] ==' ' :
+        entropy_left = -1  # 如果左边界为空，则将其设置为-1
+    else:
+        list_left = list(Counter(left).values())
+        sum_left = sum(list_left)
+        entropy_left = sum([-(i / sum_left) * math.log(i / sum_left) for i in list_left])
+    right = dict_gram['right']
+    if len(set(right)) ==1 and right[0] ==' ' :
+        entropy_right = -1  # 如果右边界为空，则将其设置为-1
+    else:
+        list_right = list(Counter(right).values())
+        sum_right = sum(list_right)
+        entropy_right = sum([ -(i/sum_right)*math.log(i/sum_right) for i in list_right])
+    if entropy_left==-1 and entropy_right==-1:
+        entropy =-2   # 如果左右边界熵都为空，将其设置为-2
+    else:
+        entropy = min(entropy_left, entropy_right)
+    return entropy
+class AC_Unicode:
+    """稍微封装一下，弄个支持unicode的AC自动机
+    """
+    def __init__(self):
+        self.ac = ahocorasick.Automaton()
+    def add_word(self, k, v):
+        # k = k.encode('utf-8')
+        return self.ac.add_word(k, v)
+    def make_automaton(self):
+        return self.ac.make_automaton()
+    def iter(self, s):
+        # 搜索文本中存在的单词
+        # s = s.encode('utf-8')
+        return self.ac.iter(s)
+def get_ngrams_neighbor_ac(texts, w):
+    '''
+       返回ngrams出现的左右相邻的字, 将所有文本拼接成一行，利用AC自动机一次匹配所有词
+       根据匹配结果获取该词的左右字，从而计算边界熵
+    '''
+    neighbors = {}
+    text_line  = ''
+    for line in texts:
+        text_line += ' '+ line
+    print('构建AC自动机...')
+    ac = AC_Unicode()
+    for gram in w.keys():
+        if len(gram)>1:
+            ac.add_word(gram, gram)
+    ac.make_automaton()
+    result_ac = ac.iter(text_line)
+    print('迭代匹配结果...')
+    for item in result_ac:
+        index, key = item
+        if key not in neighbors.keys():
+            neighbors[key] = {'left':[], 'right':[]}
+        else:
+            index_left = index-len(key) + 1
+            if index_left-1 >= 0:
+                neighbors[key]['left'].append(text_line[index_left-1 : index_left])
+            index_right = index
+            if index_left-1 <=  len(text_line):
+                neighbors[key]['right'].append(text_line[index_right+1 : index_right+2])
+    print('计算边界熵...')
+    ngrams_entropy = defaultdict(int)
+    for key in neighbors.keys():
+        entropy = cal_entropy(neighbors[key], key)
+        ngrams_entropy[key] = entropy
+    return ngrams_entropy
+def remove_general_words_ac(dict_general_words, ws):
+    '''
+    根据常用词词典移除常用词，将常用词典拼成长文本
+    利用AC自动机匹配出现在长文本中词，并将其删除
+    :param dict_general_words:
+    :param ws:
+    :return:
+    '''
+    print('移除常用词...')
+    ac = AC_Unicode()
+    for gram in ws.keys():
+        if len(gram)>1:
+            ac.add_word(gram, gram)
+    General_dict = pd.read_csv(dict_general_words)
+    General_dict = list(General_dict['0'].values)
+    General_dict_ = ''
+    for key in General_dict:
+        General_dict_ += ' ' + str(key)
+    ac.make_automaton()
+    result_ac = ac.iter(General_dict_)
+    for index, key in result_ac:
+        try:
+            del ws[key]
+        except: continue
+    final_w = sorted(ws.items(), key=lambda item: item[1],reverse=True)
+    return final_w
+def get_new_words( file_in, file_dict, file_out, min_count, min_proba):
+    '''
+    获取新词
+    :param file_in: 按行存储的输入文档，每行可以看做一篇文章，utf8编码
+    :param file_dict: 常用词词典，每行一个词
+    :param file_out: 输出文件，每行一个词，和其对应的边界熵，按边界熵从打到小排列，gbk编码
+    :param min_count: ngrams最小出现次数
+    :param min_proba: 不同长度的词对应的最小凝固度阈值字典，这里输入长度为2,3,4的即可
+    :return:
+    '''
+    import time
+    import pandas as pd
+    start = time.time()
+    n = 4 # 默认ngrams中的n为4
+    df = pd.read_excel(file_in)['摘要']  # 读取数据
+    df.dropna(inplace=True)
+    texts = []
+    for text in df:
+        if len(str(text)) > 10:
+            print(text)
+            texts.append(''.join(text.split()))
+    ngrams, total = get_ngrams_counts(texts, n, min_count)  # 获取ngrams
+    ngrams_filter = set(i for i, j in ngrams.items() if filter_with_porba(i, min_proba, total, ngrams))  # 计算凝固度，并根据阈值过滤ngrams
+    # 根据ngrams分词
+    words = defaultdict(int)
+    for t in texts:
+        for i in cut(t, n, ngrams_filter):
+            words[i] += 1
+    w = {i: j for i, j in words.items() if j >= min_count}  # 根据阈值筛选出出现频率较高的词
+    # 注意此时的words和ngrams_filter,也就是凝固度集合，鄙视完全重合的。因为会分出来ngrams中没有的词。
+    # w = {i: j for i, j in words.items() if is_real(i, n, ngrams_filter)}
+    print('凝固度筛选词的长度：{}'.format(len(w)))
+    ws = get_ngrams_neighbor_ac(texts, w)  # 按边界熵大小排序
+    final_w = remove_general_words_ac(file_dict, ws)
+    punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~“”？，！【】（）、。：；’‘……￥·↓/"""
+    count_num = 0
+    with open(file_out, 'w', encoding='utf-8') as writer:
+        for value in final_w:
+            word = value[0]
+            sign = 0
+            for i in word:
+                if i in punctuation:
+                    sign = 1
+                    break
+            print(sign)
+            if (len(word) >= 2) and (sign==0):
+                writer.write('{},{}\n'.format(word, value[1]))
+                count_num += 1
+    end = time.time()
+    print('新词个数：{}'.format(count_num))
+    print('花费时间：{}分钟'.format(round((end - start) / 60, 2)))
+if __name__ == '__main__':
+    min_count = 1
+    min_proba = {2: 500, 3: 1000, 4: 500}
+    file_in = r'D:\临时工作\临时工作代码\企业资讯八方面-附关键词\风险管理.xlsx'  # utf8
+    file_dict = './dict_sogou_vec.txt'  # utf8
+    file_out = './find_words_.csv'  # gbk
+    # import pdfplumber
+    #
+    # file_path = r'C:\xxxx\practice.PDF'
+    #
+    # with pdfplumber.open(file_path) as pdf:
+    #     page = pdf.pages[11]
+    #     print(page.extract_text())
+    get_new_words(file_in, file_dict, file_out, min_count, min_proba)
--- a/basic_service/views/views.py
+++ b/basic_service/views/views.py
+from tkinter import _flatten
+from django.http import JsonResponse
+from django.views.decorators.http import require_POST
+from basic_service.views import basic, co_occurrence
+from model.base.views.token_authorize import *
+import shutil
+UPLOAD_FOLDER = '/home/zzsn/ctt/platform_zzsn/media/'
+# Create your views here.
+@require_POST
+@login_required
+def doc_similarity_single(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text_1 = request.POST['text_1']
+    text_2 = request.POST['text_2']
+    sim_algorithm_name = request.POST['sim_algorithm_name']
+    print(text_1)
+    print(text_2)
+    url = 'http://localhost:7005/doc_sim/calculate_similarity'
+    result = basic.post_similarity(url, text_1, text_2, sim_algorithm_name)
+    result['token'] = token
+    return JsonResponse(result)
+@require_POST
+@login_required
+def ner_single(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    words, entity = basic.ner(text)
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': {'words': words, 'pos': entity},
+    })
+@require_POST
+@login_required
+def associated_word_single(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    word_num = request.POST['word_num']
+    try:
+        related_words = basic.related_word_recommendation(text, word_num)
+    except Exception as e:
+        print(e)
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'failure',
+            'isHandleSuccess': False,
+            'logs': str(e),
+            'resultData': None,
+        })
+    else:
+        return JsonResponse({
+            'token': token,
+            'handleMsg': 'success',
+            'isHandleSuccess': True,
+            'logs': '处理成功',
+            'resultData': related_words,
+        })
+@require_POST
+@login_required
+def word_cut(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    words = basic.word_cut(text)
+    words = list(_flatten(words))
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': words,
+    })
+@require_POST
+@login_required
+def word_pos(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    words, pos = basic.word_pos(text)
+    words = list(_flatten(words))
+    pos = list(_flatten(pos))
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': {'words': words, 'pos': pos},
+    })
+@require_POST
+@login_required
+def new_word_find(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    words, sign = basic.new_words_find(text)
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': {'words': words, 'sign': sign},
+    })
+@require_POST
+@login_required
+def show_srl(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    words, srl = basic.show_srl(text)
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': {'words': words, 'srl': srl},
+    })
+@require_POST
+@login_required
+def show_dep(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    words, dep = basic.show_dep(text)
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': {'words': words, 'dep': dep},
+    })
+@require_POST
+@login_required
+def create_keywords(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    topK = int(request.POST['topK'])
+    with_weight = bool(request.POST['with_weight'])
+    key_words = basic.create_keywords(text=text, topK=topK, with_weight=with_weight)
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': {'keywords': key_words},
+    })
+@require_POST
+@login_required
+def get_summary(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    text = request.POST['text']
+    summary_length = request.POST['summary_length']
+    summaries = basic.summary(text, summary_length)
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': {'summaries': summaries},
+    })
+@require_POST
+@login_required
+def word_co_occurrence(request):
+    token = request.META.get("HTTP_AUTHORIZATION")
+    path_timestamp = request.POST['path_timestamp']
+    pending_file = request.POST['pending_file']
+    path = os.path.join(UPLOAD_FOLDER, path_timestamp)
+    filepath = os.path.join(path, pending_file)
+    topK = int(request.POST['topK'])
+    word_matric = co_occurrence.main(filepath, topK)
+    if os.path.exists(path_timestamp):
+        shutil.rmtree(path_timestamp)
+    return JsonResponse({
+        'token': token,
+        'handleMsg': 'success',
+        'isHandleSuccess': True,
+        'logs': '处理成功',
+        'resultData': {'word_matric': word_matric},
+    })
--- a/manage.py
+++ b/manage.py
+#!/usr/bin/env python
+"""Django's command-line utility for administrative tasks."""
+import os
+import sys
+def main():
+    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'platform_zzsn.settings')
+    try:
+        from django.core.management import execute_from_command_line
+    except ImportError as exc:
+        raise ImportError(
+            "Couldn't import Django. Are you sure it's installed and "
+            "available on your PYTHONPATH environment variable? Did you "
+            "forget to activate a virtual environment?"
+        ) from exc
+    execute_from_command_line(sys.argv)
+if __name__ == '__main__':
+    main()
--- a/model/__init__.py
+++ b/model/__init__.py
--- a/model/base/__init__.py
+++ b/model/base/__init__.py
+from model.base.views.config import BaseConfig
+from model.base.views.data import BaseDataLoader
+from model.base.views.data import BaseDataProcess
+from model.base.views.evaluator import BaseEvaluator
+from model.base.views.loss import BaseLoss
+from model.base.views.model import BaseModel
+from model.base.views.runner import BaseRunner
\ No newline at end of file
--- a/model/base/admin.py
+++ b/model/base/admin.py
+from django.contrib import admin
+# Register your models here.
--- a/model/base/apps.py
+++ b/model/base/apps.py
+from django.apps import AppConfig
+class BaseConfig(AppConfig):
+    name = 'base'
--- a/model/base/migrations/__init__.py
+++ b/model/base/migrations/__init__.py
--- a/model/base/models.py
+++ b/model/base/models.py
+from django.db import models
+from datetime import datetime
+# Create your models here.
+class User(models.Model):
+    username = models.CharField(max_length=30, unique=True)
+    true_name = models.CharField(max_length=30)
+    sex = models.CharField(max_length=2)
+    mobile_number = models.CharField(max_length=20)
+    mail = models.CharField(max_length=20)
+    id_card = models.CharField(max_length=20)
+    password = models.CharField(max_length=40)
+    account_number = models.CharField(max_length=20)
+    def toDict(self):
+        return {'id':self.id,
+                'username':self.username,
+                'true_name':self.true_name,
+                'sex':self.sex,
+                'mobile_number':self.mobile_number,
+                'mail':self.mail,
+                'id_card':self.id_card,
+                'password':self.password,
+                'account_number':self.account_number,
+                # 'update_at':self.update_at.strftime('%Y-%m-%d %H:%M:%S')
+                }
+    class Meta:
+        db_table = 'user'
+class ServiceManage(models.Model):
+    name = models.CharField(max_length=15)
+    username = models.CharField(max_length=30)
+    filenames = models.CharField(max_length=200)
+    create_date = models.DateTimeField(default=None)
+    end_date = models.DateTimeField(default=None)
+    state = models.CharField(max_length=10)
+    path = models.CharField(max_length=20)
+    def toDict(self):
+        return {'name': self.name,
+                'username': self.username,
+                'filenames': self.filenames,
+                'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
+                'end_date': self.end_date.strftime('%Y-%m-%d %H:%M:%S'),
+                'state': self.state,
+                'path': self.path,
+                }
+    class Meta:
+        db_table = 'service_manage'
+class SubjectManage(models.Model):
+    sid = models.CharField(max_length=10, unique=True)
+    name = models.CharField(max_length=30)
+    def toDict(self):
+        return {'sid': self.sid,
+                'name': self.name,
+                }
+    class Meta:
+        db_table = 'subject_manage'
+class ModelManage(models.Model):
+    task_name = models.CharField(max_length=30)
+    function_type = models.CharField(max_length=20)
+    model_type = models.CharField(max_length=20)
+    version_num = models.IntegerField()
+    create_date = models.DateTimeField(default=None)
+    def toDict(self):
+        return {'id': self.id,
+                'task_name': self.task_name,
+                'function_type': self.function_type,
+                'model_type': self.model_type,
+                'version_num': self.version_num,
+                'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
+                }
+    class Meta:
+        db_table = 'model_manage'
+class VersionManage(models.Model):
+    model = models.ForeignKey(ModelManage, related_name='version_model', on_delete=models.CASCADE)
+    version = models.CharField(max_length=20)
+    create_date = models.DateTimeField(default=None)
+    end_date = models.DateTimeField(default=None)
+    state = models.CharField(max_length=20)
+    creator = models.CharField(max_length=30)
+    path = models.CharField(max_length=20, unique=True)
+    def toDict(self):
+        return {'id': self.id,
+                'version': self.version,
+                'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
+                'end_date': self.end_date.strftime('%Y-%m-%d %H:%M:%S'),
+                'state': self.state,
+                'creator': self.creator,
+                'path': self.path,
+                }
+    class Meta:
+        db_table = 'version_manage'
\ No newline at end of file
--- a/model/base/tests.py
+++ b/model/base/tests.py
+from django.test import TestCase
+# Create your tests here.
--- a/model/base/urls.py
+++ b/model/base/urls.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/12 18:05
+# @Author  : 程婷婷
+# @FileName: urls.py
+# @Software: PyCharm
+from model.base.views import views as base_views
+from django.conf.urls import url
+urlpatterns = [
+    url(r'^register-account', base_views.register_account, name='register_account'),
+    url(r'^verify-username', base_views.verify_username, name='verify_username'),
+    url(r'^login', base_views.login, name='login'),
+    url(r'^reset-password', base_views.reset_password, name='reset_password'),
+    url(r'^show-config-file', base_views.show_config_file, name='show_config_file'),
+    url(r'^show-service-file', base_views.show_service_file, name='show_service_file'),
+    url(r'^delete-file-row-manage', base_views.delete_file_row_manage, name='delete_file_row_manage'),
+    url(r'^delete-file-row-service', base_views.delete_file_row_service, name='delete_file_row_service'),
+    url(r'^file-upload', base_views.file_upload, name='file_upload'),
+    url(r'^show-log-file', base_views.show_log_file, name='show_log_file'),
+    url(r'^validate-code', base_views.validate_code, name='validate_code'),
+    url(r'^download-zip', base_views.download_zip, name='download_zip'),
+    url(r'^download-xlsx', base_views.download_xlsx, name='download_xlsx'),
+    url(r'^query-manage', base_views.query_manage, name='query_manage'),
+    url(r'^forget-password', base_views.forget_password, name='forget_password'),
+    url(r'^train', base_views.run_train, name='train'),
+    url(r'^query-service-manage', base_views.query_service_manage, name='query_service_manage'),
+    url(r'^query-subject', base_views.query_subject, name='query_subject'),
+    url(r'^query-version', base_views.query_version, name='query_version'),
+    url(r'^query-task-name', base_views.query_task_name, name='query_task_name')
+]
--- a/model/base/views/__init__.py
+++ b/model/base/views/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/13 11:51
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/base/views/config/BaseConfig.py
+++ b/model/base/views/config/BaseConfig.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/10 14:34
+# @Author  : 程婷婷
+# @FileName: BaseConfig.py
+# @Software: PyCharm
+import yaml
+class BaseConfig:
+    def __init__(self, config_path):
+        self._config_path = config_path
+        self._parsed_file = self.load_config()
+    def load_config(self):
+        print(self._config_path)
+        with open(self._config_path) as yaml_file:
+            parsed_file = yaml.load(yaml_file, Loader=yaml.FullLoader)
+        return parsed_file
+# if __name__ == '__main__':
+#     bc = BaseConfig()
+#     print(bc._parsed_file)
+#     print(bc.load_config()['data_path'])
+#     print(bc.load_config()['embedding'])
+#     print(bc.load_config()['model'])
--- a/model/base/views/config/__init__.py
+++ b/model/base/views/config/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/base/views/data/BaseDataLoader.py
+++ b/model/base/views/data/BaseDataLoader.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 9:58
+# @Author  : 程婷婷
+# @FileName: BaseDataLoader.py
+# @Software: PyCharm
+import pandas as pd
+from model.base.views.config.BaseConfig import BaseConfig
+class BaseDataLoader:
+    def __init__(self, config_path):
+        self.data_loader_config = BaseConfig(config_path)._parsed_file['data_loader']
+    def read_file(self):
+        symbol = self.data_loader_config['dataset_path'].split('.')[-1]
+        if (symbol == 'xlsx') or (symbol == 'xls'):
+            df = pd.read_excel(r''+self.data_loader_config['dataset_path'])
+        elif symbol == '.csv':
+            df = pd.read_csv(r''+self.data_loader_config['dataset_path'], sep='\t')
+        else:
+            print('数据类型错误')
+            return '数据类型错误'
+        df.drop_duplicates(subset='content', keep='first', inplace=True)
+        df.dropna(subset=['content', 'title'], inplace=True)
+        df = df.reset_index(drop=True)
+        print('=================执行正文去重和去空之后共有%d条数据=============' % len(df['content']))
+        return df
+    def read_stopwords(self):
+        # 读取停顿词列表
+        stopword_list = [k.strip() for k in open(self.data_loader_config['stopwords_path'], encoding='utf8').readlines() if
+                         k.strip() != '']
+        return stopword_list
--- a/model/base/views/data/BaseDataProcess.py
+++ b/model/base/views/data/BaseDataProcess.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/10 15:28
+# @Author  : 程婷婷
+# @FileName: BaseDataProcess.py
+# @Software: PyCharm
+import re
+import jieba
+import pickle
+import gensim
+import logging
+import numpy as np
+from pyhanlp import *
+from bs4 import BeautifulSoup
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.feature_extraction.text import TfidfTransformer
+from sklearn.model_selection import train_test_split
+from sklearn.feature_selection import mutual_info_classif, SelectPercentile
+from model.base import BaseConfig
+from model.base import BaseDataLoader
+from platform_zzsn.settings import BASE_DIR
+format = '%(asctime)s %(levelname)s %(pathname)s %(funcName)s %(message)s'
+logging.basicConfig(format=format, level=logging.INFO)
+class BaseDataProcess:
+    def __init__(self, config_path):
+        self.embedding_config = BaseConfig.BaseConfig(config_path)._parsed_file['embedding']
+        self.process_config = BaseConfig.BaseConfig(config_path)._parsed_file['data_process']
+        PerceptronLexicalAnalyzer = JClass('com.hankcs.hanlp.model.perceptron.PerceptronLexicalAnalyzer')
+        self.pla_segment = PerceptronLexicalAnalyzer()
+        self.bdl = BaseDataLoader.BaseDataLoader(config_path)
+    def clean_content(self, content):
+        bs = BeautifulSoup(content, 'html.parser')
+        return bs.text
+    def remove_char(self, content):
+        # 保留中文、英语字母、数字和标点
+        graph_filter = re.compile(r'[^\u4e00-\u9fa5a-zA-Z0-9\s，。\.,？\?!！；;]')
+        content = graph_filter.sub('', content)
+        return content
+    def jieba_tokenizer(self, content):
+        if self.process_config['use_stopwords']:
+            stopwords = self.bdl.read_stopwords()
+        else:
+            stopwords = []
+        return ' '.join([word for word in jieba.lcut(content) if word not in stopwords])
+    def pla_tokenizer(self, content):
+        words = list(self.pla_segment.analyze(content).toWordArray())
+        if self.process_config['use_stopwords']:
+            stopwords = self.bdl.read_stopwords()
+        else:
+            stopwords = []
+        return ' '.join([word for word in words if word not in stopwords])
+    def save(self, voc, path):
+        with open(path, 'wb') as voc_file:
+            pickle.dump(voc, voc_file)
+    def process(self, data, min_content=0):
+        processed_data = []
+        for record in data:
+            record = self.clean_content(str(record))
+            record = self.remove_char(record)
+            if len(record) > min_content:
+                methods = self.process_config['tokenizer']
+                if methods == 'PerceptronLexicalAnalyzer':
+                    record = self.pla_tokenizer(record)
+                    record = [row.strip() for row in record if row.strip() != '']
+                else:
+                    record = self.jieba_tokenizer(record)
+                    record = [row.strip() for row in record if row.strip() != '']
+                processed_data.append(' '.join(record))
+            else:
+                pass
+        return processed_data
+    def split_dataset(self, data, use_dev):
+        if use_dev:
+            train_data_set, test_dev_set = train_test_split(data,
+                                                            train_size=self.process_config['train_size'],
+                                                            random_state=self.process_config['random_state'],
+                                                            shuffle=True)
+            train_data_set, test_data_set, dev_data_set = train_test_split(test_dev_set,
+                                                                           test_size=self.process_config['test_size'],
+                                                                           random_state=self.process_config['random_state'],
+                                                                           shuffle=True)
+            print(len(train_data_set) + len(test_data_set) + len(dev_data_set))
+            return train_data_set, test_data_set, dev_data_set
+        else:
+            train_data_set, test_data_set = train_test_split(data,
+                                                             train_size=self.process_config['train_size'],
+                                                             random_state=self.process_config['random_state'],
+                                                             shuffle=True)
+            return train_data_set, test_data_set
+    def bag_of_words(self, data, label):
+        vectorizer = CountVectorizer(ngram_range=(1, 1), min_df=5)
+        x = vectorizer.fit_transform(data)
+        transformer = TfidfTransformer(norm=self.embedding_config['norm'], use_idf=self.embedding_config['use_idf'],
+                                       smooth_idf=self.embedding_config['smooth_idf'])
+        x = transformer.fit_transform(x).toarray()
+        if self.embedding_config['with_feature_selection']:
+            transformed_data = SelectPercentile(mutual_info_classif, 20).fit_transform(x, label)
+        else:
+            transformed_data = x
+        os.makedirs(self.embedding_config['embedding_path'], exist_ok=True)
+        self.save(voc=vectorizer.vocabulary_, path=os.path.join(self.embedding_config['embedding_path'], 'tfidf.pkl'))
+        return transformed_data, vectorizer.get_feature_names()
+    def word2vec(self, data, feature_words):
+        model = gensim.models.word2vec.Word2Vec(sentences=data,
+                                                size=self.embedding_config['size'],
+                                                window=self.embedding_config['window'],
+                                                min_count=self.embedding_config['min_count'],
+                                                workers=self.embedding_config['workers'],
+                                                sg=self.embedding_config['sg'],
+                                                iter=self.embedding_config['iter'])
+        vocabulary_w2v = model.wv.vocab.keys()
+        count = 0
+        if self.embedding_config['use_Tencent']:
+            model_tencent = gensim.models.KeyedVectors.load_word2vec_format(
+                os.path.join(BASE_DIR, 'static/base/Tencent_AILab_ChineseEmbedding.bin'), binary=True)
+            vocabulary_tencent = model_tencent.wv.vocab.keys()
+            vector_matrix = np.zeros((len(feature_words), int(self.embedding_config['size']) + 200))
+            for word in feature_words:
+                if word in vocabulary_tencent:
+                    vector_tencent = model_tencent.wv.word_vec(word)
+                else:
+                    vector_tencent = np.random.randn(200)
+                if word in vocabulary_w2v:
+                    vector_w2v = model.wv.word_vec(word)
+                else:
+                    vector_w2v = np.random.randn(self.embedding_config['size'])
+                vector = np.concatenate((vector_tencent, vector_w2v))
+                vector_matrix[count] = vector
+                count += 1
+        else:
+            vector_matrix = np.zeros((len(feature_words), self.embedding_config['size']))
+            for word in feature_words:
+                if word in vocabulary_w2v:
+                    vector_w2v = model.wv.word_vec(word)
+                else:
+                    vector_w2v = np.random.randn(self.embedding_config['size'])
+                vector_matrix[count] = vector_w2v
+                count += 1
+        os.makedirs(self.embedding_config['embedding_path'], exist_ok=True)
+        model.save(os.path.join(self.embedding_config['embedding_path'], 'word2vec.model'))
+        return vector_matrix
--- a/model/base/views/data/__init__.py
+++ b/model/base/views/data/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/base/views/evaluator/BaseEvaluator.py
+++ b/model/base/views/evaluator/BaseEvaluator.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 16:30
+# @Author  : 程婷婷
+# @FileName: BaseEvaluator.py
+# @Software: PyCharm
+from sklearn.metrics import precision_score, f1_score, recall_score, classification_report
+import logging
+from model.base.views.config.BaseConfig import BaseConfig
+formats = '%(asctime)s %(levelname)s %(pathname)s %(funcName)s %(message)s'
+logging.basicConfig(format=formats, level=logging.INFO)
+class BaseEvaluator:
+    def __init__(self, config_path):
+        self.evaluate_config = BaseConfig(config_path)._parsed_file['evaluate']
+    def evaluate(self, y_true, y_pred, label_mapping, logger):
+        result = []
+        y_true = list(map(str, y_true))
+        y_pred = list(map(str, y_pred))
+        logger.info('模型评估结果如下：')
+        if not label_mapping:
+            result.append(classification_report(y_true, y_pred))
+            logger.info(classification_report(y_true, y_pred))
+        else:
+            for value in label_mapping.values():
+                print([k for k,v in label_mapping.items() if v == value])
+                p = precision_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
+                r = recall_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
+                f1 = f1_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
+                print({'value': value,'召回率为': r, '精确率为': p, 'F1': f1})
+                logger.info('标签为%s' % [k for k,v in label_mapping.items() if v == value][0])
+                logger.info('精确率为%.2f' %p)
+                logger.info('召回率为%.2f' %r)
+                logger.info('精确率为%.2f' %f1)
+                result.append(str({'label': value,'recall': r, 'precision': p, 'F1': f1}))
+        return ' '.join(result)
+# y_true = [0, 1, 2, 0, 1, 2]
+# y_pred = [0, 2, 1, 0, 0, 1]
+# print(BaseEvaluator())
--- a/model/base/views/evaluator/__init__.py
+++ b/model/base/views/evaluator/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/base/views/interaction.py
+++ b/model/base/views/interaction.py
+import os
+import yaml
+import random
+import smtplib
+from email.mime.text import MIMEText
+from django.core.paginator import Paginator
+from email.mime.multipart import MIMEMultipart
+from PIL import Image,ImageFont,ImageDraw,ImageFilter
+from model.base.models import ModelManage, ServiceManage, VersionManage
+from platform_zzsn.settings import BASE_DIR
+class Picture:
+    def __init__(self):
+        self.size = (240,60)
+        self.mode='RGB'
+        self.color='white'
+        self.font = ImageFont.truetype(os.path.join(BASE_DIR,
+                                                    'static/common/font/arial.ttf'), 36) #设置字体大小
+    def randChar(self):
+        basic='23456789abcdefghijklmnpqrstwxyzABCDEFGHIJKLMNPQRSTWXYZ'
+        return basic[random.randint(0,len(basic)-1)] #随机字符
+    def randBdColor(self):
+        return (random.randint(64,255),random.randint(64,255),random.randint(64,255)) #背景
+    def randTextColor(self):
+        return (random.randint(32, 127), random.randint(32, 127), random.randint(32, 127)) #随机颜色
+    def proPicture(self):
+        new_image=Image.new(self.mode,self.size,self.color) #创建新图像有三个默认参数:尺寸,颜色,模式
+        drawObject=ImageDraw.Draw(new_image) #创建一个可以对image操作的对象
+        line_num = random.randint(4,6) # 干扰线条数
+        for i in range(line_num):
+            #size=(240,60)
+            begin = (random.randint(0, self.size[0]), random.randint(0, self.size[1]))
+            end = (random.randint(0, self.size[0]), random.randint(0, self.size[1]))
+            drawObject.line([begin, end], self.randTextColor())
+        for x in range(240):
+            for y in range(60):
+                tmp = random.randint(0,50)
+                if tmp>30: #调整干扰点数量
+                    drawObject.point((x,y),self.randBdColor())
+        randchar=''
+        for i in range(5):
+            rand=self.randChar()
+            randchar+=rand
+            drawObject.text([50*i+10,10],rand,self.randTextColor(),font=self.font) #写入字符
+        new_image = new_image.filter(ImageFilter.SHARPEN) # 滤镜
+        return new_image,randchar
+def update_config_file(config_path, config_file):
+    data = yaml.load(config_file, Loader=yaml.FullLoader)
+    data['data_loader'] = {}
+    model_path = data['model']['model_path']
+    model_name = data['model']['model_name']
+    if data['model']['model_path']:
+        data['model']['model_path'] = os.path.join(config_path, model_path)
+    else:
+        data['model']['model_path'] = os.path.join(config_path, model_name)
+        print(data['model']['model_path'])
+    embedding_path = data['embedding']['embedding_path']
+    if embedding_path:
+        data['embedding']['embedding_path'] = os.path.join(config_path, data['embedding']['embedding_path'])
+    else:
+        if data['embedding']['name']:
+            data['embedding']['embedding_path'] = os.path.join(config_path, data['embedding']['name'])
+    tokenizer_path = data['embedding']['tokenizer_path']
+    if tokenizer_path:
+        data['embedding']['tokenizer_path'] = os.path.join(config_path, data['embedding']['tokenizer_path'])
+    try:
+        test_file_path = data['data_process']['test_file_path']
+        train_file_path = data['data_process']['train_file_path']
+    except KeyError:
+        pass
+    else:
+        data['data_process']['test_file_path'] = os.path.join(config_path, test_file_path)
+        data['data_process']['train_file_path'] = os.path.join(config_path, train_file_path)
+    for file in os.listdir(config_path):
+        if ('.xls' == file[-4:]) or ('.xlsx' == file[-5:]):
+            xlsx_path = os.path.join(config_path, file)
+            data['data_loader']['dataset_path'] = xlsx_path
+    if 'save_fname' in data['runner'].keys():
+        data['runner']['save_fpath'] = os.path.join(config_path, data['runner']['save_fname'])
+    data['data_loader']['stopwords_path'] = os.path.join(BASE_DIR, 'static/base/baidu_stopwords.txt')
+    file_path = os.path.join(config_path, 'config.yaml')
+    with open(file_path, 'w') as yaml_file:
+        yaml.safe_dump(data, yaml_file, default_flow_style=False)
+    return file_path
+def select_manage(task_name, function_type, model_type, begin_cdate, end_cdate, page_size, current_page):
+    condition = {'task_name': task_name, 'function_type': function_type, 'model_type': model_type,
+                 'create_date__range': (begin_cdate, end_cdate,)
+                 }
+    del_keys = []
+    for key in condition.keys():
+        if not condition[key]:
+            del_keys.append(key)
+    if not condition['create_date__range'][0]:
+        del_keys.append('create_date__range')
+    for key in del_keys:
+        condition.pop(key)
+    managers = ModelManage.objects.filter(**condition).order_by('-create_date')
+    len_managers = len(managers)
+    page = Paginator(managers, page_size)
+    maxpages = page.num_pages  # 最大页数
+    pIndex = int(current_page)
+    # 判断页数是否越界
+    if pIndex > maxpages:
+        pIndex = maxpages
+    manager_list = page.page(pIndex)  # 当前页数据
+    return list(manager_list), len_managers
+def select_version(model_id, begin_cdate, end_cdate, page_size, current_page):
+    condition = {'model_id': model_id,
+                 'create_date__range': (begin_cdate, end_cdate,)
+                 }
+    del_keys = []
+    if not condition['create_date__range'][0]:
+        del_keys.append('create_date__range')
+    for key in del_keys:
+        condition.pop(key)
+    versions = VersionManage.objects.filter(**condition).order_by('-create_date')
+    len_versions = len(versions)
+    page = Paginator(versions, page_size)
+    maxpages = page.num_pages  # 最大页数
+    pIndex = int(current_page)
+    # 判断页数是否越界
+    if pIndex > maxpages:
+        pIndex = maxpages
+    version_list = page.page(pIndex)  # 当前页数据
+    return list(version_list), len_versions
+def select_service_manage(name, begin_cdate, end_cdate, state, username, page_size, current_page):
+    condition = {
+        'name': name,
+        'state': state,
+        'create_date__range': (begin_cdate, end_cdate),
+        'username': username,
+                 }
+    del_keys = []
+    for key in condition.keys():
+        if not condition[key]:
+            del_keys.append(key)
+    if not condition['create_date__range'][0]:
+        del_keys.append('create_date__range')
+    for key in del_keys:
+        condition.pop(key)
+    print(condition)
+    service_managers = ServiceManage.objects.filter(**condition).order_by('-create_date')
+    len_service_managers = len(service_managers)
+    page = Paginator(service_managers, page_size)
+    maxpages = page.num_pages
+    pIndex = int(current_page)
+    # 判断页数是否越界
+    if pIndex > maxpages:
+        pIndex = maxpages
+    manager_list = page.page(pIndex)  # 当前页数据
+    return list(manager_list), len_service_managers
+def sendMail(user,pwd,sender,receiver,msg_title):
+    mail_host = "smtp.163.com"   #163的SMTP服务器
+    message = MIMEMultipart('alternative')
+    #设置邮件的发送者
+    message["From"] = sender
+    #设置邮件的接收方
+    message["To"] = ",".join(receiver)
+    #4.设置邮件的标题
+    message["Subject"] = msg_title
+    # 添加plain格式的文本
+    # message.attach(MIMEText('您好，\n'
+    #                         '   您当前的密码为%s, 为了保证您的账号安全，请尽快登陆重置您的密码'%msg_content, 'plain', 'utf-8'))
+    # 添加html内容
+    message.attach(MIMEText('<html>'
+                                '<body>'
+                                    '<h1>Hello </h1><br> '
+                                    '<h3>To ensure the security of your account, please log in and reset your password as soon as possible.</h3>'
+                                    '<h2><a href="http://192.168.1.149:8020/reset_password/">点此重置</a></h2>'
+                                    '</body>'
+                            '</html>', 'html', 'utf-8'))
+    #1.启用服务器发送邮件
+    smtpObj = smtplib.SMTP_SSL(mail_host,465)
+    #2.登录邮箱进行验证
+    smtpObj.login(user,pwd)
+    #3.发送邮件
+    #参数：发送方，接收方，邮件信息
+    smtpObj.sendmail(sender,receiver,message.as_string())
+    return True
--- a/model/base/views/loss/BaseLoss.py
+++ b/model/base/views/loss/BaseLoss.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 16:29
+# @Author  : 程婷婷
+# @FileName: BaseLoss.py
+# @Software: PyCharm
--- a/model/base/views/loss/__init__.py
+++ b/model/base/views/loss/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/base/views/model/BaseModel.py
+++ b/model/base/views/model/BaseModel.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 16:18
+# @Author  : 程婷婷
+# @FileName: BaseModel.py
+# @Software: PyCharm
+from model.base.views.config.BaseConfig import BaseConfig
+import os
+import pickle
+class BaseModel:
+    def __init__(self,config_path):
+        self.model_config = BaseConfig(config_path)._parsed_file['model']
+    def building_model(self, *params):
+        pass
+    def save(self, model):
+        dir = os.path.dirname(self.model_config['model_path'])
+        if not os.path.exists(dir):
+            os.makedirs(dir)
+        with open(self.model_config['model_path'], 'wb') as model_file:
+            pickle.dump(model, model_file)
+    def predict(self, model, X):
+        proba = model.predict_proba(X)
+        y_predict = model.predict(X)
+        return {'proba': proba, 'y_predict': y_predict}
\ No newline at end of file
--- a/model/base/views/model/__init__.py
+++ b/model/base/views/model/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/base/views/runner/BaseRunner.py
+++ b/model/base/views/runner/BaseRunner.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 16:36
+# @Author  : 程婷婷
+# @FileName: BaseRunner.py
+# @Software: PyCharm
+from model.base.views.config.BaseConfig import BaseConfig
+class BaseRunner:
+    def __init__(self,config_path):
+        self.runner_config = BaseConfig(config_path)._parsed_file['runner']
+    def train(self, logger):
+        pass
--- a/model/base/views/runner/__init__.py
+++ b/model/base/views/runner/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 17:04
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/base/views/runner/test.py
+++ b/model/base/views/runner/test.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 9:24
+# @Author  : 程婷婷
+# @FileName: test.py
+# @Software: PyCharm
+import jieba
+import re
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.datasets import load_digits
+from sklearn.feature_selection import SelectPercentile, chi2
+X, y = load_digits(return_X_y=True)
+print(X.shape)
+print(X[:10], y[:100])
+X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)
+print(X_new.shape)
+print(X_new[:10])
--- a/model/base/views/token_authorize.py
+++ b/model/base/views/token_authorize.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/20 16:58
+# @Author  : 程婷婷
+# @FileName: token_authorize.py
+# @Software: PyCharm
+import jwt
+import time
+import functools
+from jwt import exceptions
+from django.http import JsonResponse
+from platform_zzsn.settings import *
+global SECRET_KEY
+SECRET_KEY = SECRET_KEY
+# 定义签名密钥，用于校验jwt的有效、合法性
+def create_token(user):
+    '''基于jwt创建token的函数'''
+    headers = {
+        "alg": "HS256",
+        "typ": "JWT"
+    }
+    exp = int(time.time() + 3*60*60)
+    payload = {
+        "id": user.id,
+        "name": user.username,
+        "exp": exp
+    }
+    token = jwt.encode(payload=payload, key=SECRET_KEY, algorithm='HS256', headers=headers).decode('utf-8')
+    return token
+def login_required(view_func):
+    @functools.wraps(view_func)
+    def validate_token(request, *args, **kwargs):
+        '''校验token的函数，校验通过则返回解码信息'''
+        payload = None
+        msg = None
+        try:
+            token = request.META.get("HTTP_AUTHORIZATION")
+            payload = jwt.decode(token, SECRET_KEY, True, algorithm='HS256')
+            print(payload)
+            return view_func(request, *args, **kwargs)
+            # jwt有效、合法性校验
+        except exceptions.ExpiredSignatureError:
+            return JsonResponse({
+                'handle_msg': 'failure',
+                'is_handle_success': False,
+                'logs': '登录已过期'
+            })
+        except jwt.DecodeError:
+            return JsonResponse({
+                'handle_msg': 'failure',
+                'is_handle_success': False,
+                'logs': '缺少参数token'
+            #     token认证失败
+            })
+        except jwt.InvalidTokenError:
+            return JsonResponse({
+                'handle_msg': 'failure',
+                'is_handle_success': False,
+                'logs': '缺少参数token'
+            #     非法的token
+            })
+    return validate_token
\ No newline at end of file
--- a/model/base/views/utils.py
+++ b/model/base/views/utils.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/9 11:19
+# @Author  : 程婷婷
+# @FileName: utils.py
+# @Software: PyCharm
+import os
+import re
+import jieba
+import zipfile
+import pandas as pd
+from docx import Document
+from platform_zzsn.settings import *
+def read_txt(path):
+    with open(path, 'r', encoding='utf8') as file:
+        lines = file.readlines()
+    return lines
+def read_docx(pending_file, user_file):
+    jieba.load_userdict(user_file)
+    document = Document(pending_file)
+    doc_text_list = []
+    for para in document.paragraphs:
+        para_text = re.sub(r'\s', '', para.text)
+        if para_text:
+            doc_text_list.append(para_text)
+    return doc_text_list
+def read_excel(pending_file, user_file):
+    jieba.load_userdict(user_file)
+    doc_text_list = pd.read_excel(pending_file)['content']
+    doc_text_list.dropna(inplace=True)
+    return doc_text_list
+def merge_para(paras):
+    new_paras = []
+    for i, para in enumerate(paras):
+        if not new_paras:
+            new_paras.append(para)
+        elif (len(new_paras[-1]) < 500):
+            new_paras[-1] += para
+        else:
+            new_paras.append(para)
+    return new_paras
+def filter_stopwords(para):
+    path = os.path.join(BASE_DIR, 'static/base/baidu_stopwords.txt')
+    stopword_list = [k.strip() for k in read_txt(path) if
+                     k.strip() != '']
+    words = [word for word in jieba.lcut(para) if word not in stopword_list]
+    return words
+# 获取列表的第二个元素
+def takeSecond(elem):
+    return elem[1]
+def takeFirst_len(elem):
+    return len(elem[0])
+def make_zip(file_dir: str, zip_path: str) -> None:
+    zip_f = zipfile.ZipFile(zip_path, 'w')
+    pre_len = len(os.path.dirname(file_dir))
+    for parent, dir_names, filenames in os.walk(file_dir):
+        for filename in filenames:
+            path_file = os.path.join(parent, filename)
+            arc_name = path_file[pre_len:].strip(os.path.sep)
+            zip_f.write(path_file, arc_name)
+    zip_f.close()
--- a/model/base/views/views.py
+++ b/model/base/views/views.py
--- a/model/classify/__init__.py
+++ b/model/classify/__init__.py
+from model.classify.views.fasttext_classify import FastTextConfig
+from model.classify.views.fasttext_classify.data import FastTextDataLoader
+from model.classify.views.fasttext_classify.data import FastTextProcess
+from model.classify.views.fasttext_classify import FastTextModel
+from model.classify.views.fasttext_classify import FastTextEvaluator
+from model.classify.views.fasttext_classify import FastTextRunner
--- a/model/classify/admin.py
+++ b/model/classify/admin.py
+from django.contrib import admin
+# Register your models here.
--- a/model/classify/apps.py
+++ b/model/classify/apps.py
+from django.apps import AppConfig
+class ClassifyConfig(AppConfig):
+    name = 'classify'
--- a/model/classify/migrations/__init__.py
+++ b/model/classify/migrations/__init__.py
--- a/model/classify/models.py
+++ b/model/classify/models.py
+from django.db import models
+# Create your models here.
--- a/model/classify/tests.py
+++ b/model/classify/tests.py
+from django.test import TestCase
+# Create your tests here.
--- a/model/classify/urls.py
+++ b/model/classify/urls.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/12 18:05
+# @Author  : 程婷婷
+# @FileName: urls.py
+# @Software: PyCharm
+from django.urls import path
+from basic_service import views
--- a/model/classify/views/__init__.py
+++ b/model/classify/views/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/8/13 11:24
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/classify/views/fasttext_classify/FastTextConfig.py
+++ b/model/classify/views/fasttext_classify/FastTextConfig.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:06
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyConfig.py
+# @Software: PyCharm
+from model.base import BaseConfig
+class FastTextConfig(BaseConfig.BaseConfig):
+    def __init__(self, config_path):
+        super().__init__(config_path)
--- a/model/classify/views/fasttext_classify/FastTextEvaluator.py
+++ b/model/classify/views/fasttext_classify/FastTextEvaluator.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/14 14:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyEvaluator.py
+# @Software: PyCharm
+from model.base import BaseEvaluator
+class FastTextEvaluator(BaseEvaluator.BaseEvaluator):
+    def __init__(self, config_path):
+        super().__init__(config_path)
\ No newline at end of file
--- a/model/classify/views/fasttext_classify/FastTextModel.py
+++ b/model/classify/views/fasttext_classify/FastTextModel.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:18
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyModel.py
+# @Software: PyCharm
+import fasttext
+from model.base import BaseModel
+class FastTextModel(BaseModel.BaseModel):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+    def building_model(self, input, autotuneValidationFile):
+        model = fasttext.train_supervised(input=input,
+                                          autotuneValidationFile=autotuneValidationFile,
+                                          autotuneDuration=self.model_config['autotuneDuration'],
+                                          autotuneModelSize=self.model_config['autotuneModelSize'])
+        model.save_model(self.model_config['model_path'])
+        return model
--- a/model/classify/views/fasttext_classify/FastTextRunner.py
+++ b/model/classify/views/fasttext_classify/FastTextRunner.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:33
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyRunner.py
+# @Software: PyCharm
+from model.base import BaseRunner
+from model.classify import FastTextProcess
+from model.classify import FastTextModel
+from model.classify import FastTextEvaluator
+class FastTextRunner(BaseRunner.BaseRunner):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+        self.ftp = FastTextProcess.FastTextProcess(config_path)
+        self.ftm = FastTextModel.FastTextModel(config_path)
+        self.fte = FastTextEvaluator.FastTextEvaluator(config_path)
+    def train(self, logger):
+        train_path, test_path = self.ftp.runner_process(logger)
+        model = self.ftm.building_model(input=train_path, autotuneValidationFile=test_path)
+        with open(test_path, encoding='utf8') as file:
+            test_data = file.readlines()
+        true_labels, predict_labels = [], []
+        for text in test_data:
+            label = text.replace('__label__', '')[0]
+            text = text.replace('__label__', '')[1:-1]
+            true_labels.append(int(label))
+            predict_label = model.predict(text)[0][0].replace('__label__', '')
+            # print(pre_label)
+            predict_labels.append(int(predict_label))
+        evaluate_result = self.fte.evaluate(true_labels, predict_labels, label_mapping=None, logger=logger)
+        print(evaluate_result)
+        return 'success'
+# if __name__ == '__main__':
+#     state = FastTextRunner().train()
+#     print(state)
\ No newline at end of file
--- a/model/classify/views/fasttext_classify/__init__.py
+++ b/model/classify/views/fasttext_classify/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/14 10:28
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/classify/views/fasttext_classify/data/FastTextDataLoader.py
+++ b/model/classify/views/fasttext_classify/data/FastTextDataLoader.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 11:19
+# @Author  : 程婷婷
+# @FileName: FastTextDataLoader.py
+# @Software: PyCharm
+from model.base import BaseDataLoader
+class FastTextDataLoader(BaseDataLoader.BaseDataLoader):
+    def __init__(self, config_path):
+        super().__init__(config_path)
\ No newline at end of file
--- a/model/classify/views/fasttext_classify/data/FastTextProcess.py
+++ b/model/classify/views/fasttext_classify/data/FastTextProcess.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyProcess.py
+# @Software: PyCharm
+import re
+import time
+from model.base import BaseDataProcess
+from model.classify import FastTextDataLoader
+class FastTextProcess(BaseDataProcess.BaseDataProcess):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+        self.ftdl = FastTextDataLoader.FastTextDataLoader(config_path)
+    def remove_char(self, content):
+        graph_filter = re.compile(u'[\U00010000-\U0010ffff\uD800-\uDBFF\uDC00-\uDFFFa-z\n\s]')
+        content = graph_filter.sub('', content)
+        return content
+    def process(self, data, min_content):
+        processed_data = []
+        i = 0
+        for record in data:
+            record = self.remove_char(record)
+            if len(record) > min_content:
+                methods = self.process_config['tokenizer']
+                if methods == 'PerceptronLexicalAnalyzer':
+                    record = self.pla_tokenizer(record)
+                else:
+                    record = self.jieba_tokenizer(record)
+                processed_data.append(record)
+                i += 1
+            else:
+                i += 1
+                pass
+            if (i+1)%100 == 0 or i+1 == len(data):
+                print(time.strftime('%Y-%m-%d %H:%M:%S'),'第',i+1,'条文本分词完毕')
+        return processed_data
+    def transform_data(self, data, labels):
+        format_data = []
+        for i in range(len(data)):
+            fasttext_line = "__label__{} {}".format(labels[i], data[i])
+            format_data.append(fasttext_line)
+        return format_data
+    def runner_process(self, logger):
+        df = self.ftdl.read_file()
+        processed_data = self.process(df['content'], min_content=10)
+        # if self.process_config['label_encode']:
+        if type(df['label'][0]) == int:
+            labels = df['label']
+        else:
+            all_label = list(set(df['label']))
+            self.label_mapping = {v: k for k, v in dict(enumerate(all_label)).items()}
+            labels = df['label'].map(self.label_mapping)
+        print(labels)
+        fomat_data = self.transform_data(processed_data, labels)
+        if self.process_config['use_dev']:
+            train_data_set, test_data_set, dev_data_set = self.split_dataset(fomat_data, use_dev=self.process_config['use_dev'])
+        else:
+            train_data_set, test_data_set = self.split_dataset(fomat_data, use_dev=self.process_config['use_dev'])
+        with open(self.process_config['train_file_path'], 'w', encoding='utf-8') as trainf, \
+                open(self.process_config['test_file_path'], 'w', encoding='utf-8') as testf:
+            for train_row in train_data_set:
+                trainf.write(train_row + '\n')
+            for test_row in test_data_set:
+                testf.write(test_row + '\n')
+        logger.info('处理后的数据量为 %d 条' % len(fomat_data))
+        logger.info('训练集的数据量为 %d 条' % len(train_data_set))
+        logger.info('测试集的数据量为 %d 条' % len(test_data_set))
+        return self.process_config['train_file_path'], self.process_config['test_file_path']
\ No newline at end of file
--- a/model/classify/views/fasttext_classify/data/__init__.py
+++ b/model/classify/views/fasttext_classify/data/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 11:18
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/classify/views/flair_classify/FlairClassifyConfig.py
+++ b/model/classify/views/flair_classify/FlairClassifyConfig.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:06
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyConfig.py
+# @Software: PyCharm
+from model.base.views.config import BaseConfig
+class FastTextConfig(BaseConfig):
+    def __init__(self):
+        super().__init__()
--- a/model/classify/views/flair_classify/FlairClassifyEvaluator.py
+++ b/model/classify/views/flair_classify/FlairClassifyEvaluator.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/14 14:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyEvaluator.py
+# @Software: PyCharm
+from model.base.views.evaluator import BaseEvaluator
+class FlairClassifyEvaluator(BaseEvaluator):
+    def __init__(self, config_path):
+        super().__init__(config_path)
\ No newline at end of file
--- a/model/classify/views/flair_classify/FlairClassifyModel.py
+++ b/model/classify/views/flair_classify/FlairClassifyModel.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:18
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyModel.py
+# @Software: PyCharm
+from torch.optim import Adam
+from torch.optim.lr_scheduler import OneCycleLR
+from flair.models import TextClassifier
+from flair.trainers import ModelTrainer
+from model.base.views.model.BaseModel import BaseModel
+class FlairClassifyModel(BaseModel):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+    def building_model(self, corpus, document_embeddings, label_dict, loss_weights):
+        # downstream classifier
+        classifier = TextClassifier(
+            document_embeddings,
+            label_dictionary=label_dict,
+            loss_weights=loss_weights
+        )
+        # model trainer
+        trainer = ModelTrainer(classifier, corpus, optimizer=Adam)
+        model_save_path = self.model_config['model_path']
+        trainer.train(str(model_save_path),
+                      learning_rate=3e-5,  # use very small learning rate
+                      mini_batch_size=16,
+                      scheduler=OneCycleLR,
+                      mini_batch_chunk_size=2,  # optionally set this if transformer is too much for your machine
+                      max_epochs=3,  # terminate after X epochs
+                      monitor_train=True,
+                      monitor_test=True,
+                      checkpoint=True
+                      )
+        return classifier, trainer
--- a/model/classify/views/flair_classify/FlairClassifyRunner.py
+++ b/model/classify/views/flair_classify/FlairClassifyRunner.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:33
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyRunner.py
+# @Software: PyCharm
+import os
+import numpy as np
+import torch
+import random
+from model.base.views.runner import BaseRunner
+from model.classify.views.flair_classify import FlairClassifyProcess
+from model.classify.views.flair_classify import FlairClassifyModel
+from model.classify.views.flair_classify import FlairClassifyEvaluator
+class FlairClassifyRunner(BaseRunner):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+        self.fcp = FlairClassifyProcess(config_path)
+        self.fcm = FlairClassifyModel(config_path)
+        self.fce = FlairClassifyEvaluator(config_path)
+    def reproducibility(seed):
+        '''
+        固定随机种子
+        :param seed:
+        :return:
+        '''
+        os.environ["PYTHONHASHSEED"] = str(seed)
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+    def train(self):
+        corpus, document_embeddings, label_dict, loss_weights = self.fcp.runner_process()
+        model = self.fcm.building_model(
+            corpus=corpus,
+            document_embeddings=document_embeddings,
+            label_dict=label_dict,
+            loss_weights=loss_weights
+        )
+        #self.fce.evaluate(true_labels, predict_labels)
+        return 'success'
+if __name__ == '__main__':
+    state = FlairClassifyRunner().train()
+    print(state)
\ No newline at end of file
--- a/model/classify/views/flair_classify/__init__.py
+++ b/model/classify/views/flair_classify/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/14 10:28
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/classify/views/flair_classify/data/FlairClassifyDataLoader.py
+++ b/model/classify/views/flair_classify/data/FlairClassifyDataLoader.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 11:46
+# @Author  : 程婷婷
+# @FileName: FlairClassifyDataLoader.py
+# @Software: PyCharm
+from model.base import BaseDataLoader
+class FlairClassifyDataLoader(BaseDataLoader):
+    def __init__(self, config_path):
+        super().__init__(config_path)
\ No newline at end of file
--- a/model/classify/views/flair_classify/data/FlairClassifyProcess.py
+++ b/model/classify/views/flair_classify/data/FlairClassifyProcess.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyProcess.py
+# @Software: PyCharm
+from flair.data import Sentence, Corpus
+import re
+from transformers import AutoTokenizer
+from torch.utils.data import Dataset
+from flair.embeddings import TransformerDocumentEmbeddings
+from model.base import BaseDataProcess
+from model.classify import FlairClassifyDataLoader
+class DataSet(Dataset):
+    def __init__(
+            self, data_df, tokenizer,
+    ):
+        df = data_df.copy()
+        sep_token = tokenizer.special_tokens_map['sep_token']
+        self.samples = df.content.apply(lambda s: re.sub("<sep>", sep_token, s)).values
+        self.labels = df.label.values
+        self.tokenizer = tokenizer
+    def __len__(self):
+        return len(self.samples)
+    def __getitem__(self, index):
+        sample, label = self.samples[index], self.labels[index]
+        sentence = Sentence(sample, use_tokenizer=self.tokenizer.tokenize)
+        if not len(sentence):
+            sentence = Sentence(self.tokenizer.unk_token, use_tokenizer=self.tokenizer.tokenize)
+            print(sample)
+            print(sentence)
+        sentence.add_label('class', str(label))
+        return sentence
+class FlairClassifyProcess(BaseDataProcess):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+        self.fcdl = FlairClassifyDataLoader(config_path)
+    @staticmethod
+    def add_sep_token(content):
+        return re.sub('。', '。<sep>', content)
+    def runner_process(self):
+        df = self.fcdl.read_file()
+        df = df[df.content.apply(lambda s: s.strip()).apply(len) > 10]
+        df = df.reset_index(drop=True)
+        df['content'] = df['content'].apply(lambda s: self.add_sep_token(str(s)))
+        pos = df.label.value_counts()
+        loss_weights = (pos.sum() - pos) / pos
+        self.loss_weights = loss_weights.to_dict()
+        if self.process_config['label_encode']:
+            all_label = list(set(df['label']))
+            self.label_mapping = {v: k for k, v in dict(enumerate(all_label)).items()}
+            labels = df['label'].map(self.label_mapping)
+            print(labels)
+        tokenizer = AutoTokenizer.from_pretrained(self.embedding_config['pretrained_name'])
+        if self.process_config['use_dev']:
+            train_data_set, test_data_set, dev_data_set = self.split_dataset(df, use_dev=self.process_config['use_dev'])
+            train_set = DataSet(train_data_set, tokenizer)
+            test_set = DataSet(test_data_set, tokenizer)
+            val_set = DataSet(dev_data_set, tokenizer)
+            corpus = Corpus(train=train_set, dev=val_set, test=test_set)
+        else:
+            train_data_set, test_data_set = self.split_dataset(df, use_dev=self.process_config['use_dev'])
+            train_set = DataSet(train_data_set, tokenizer)
+            test_set = DataSet(test_data_set, tokenizer)
+            corpus = Corpus(train=train_set, test=test_set)
+        label_dict = corpus.make_label_dictionary()
+        document_embeddings = TransformerDocumentEmbeddings(
+            self.embedding_config['pretrained_name'], fine_tune=True
+        )
+        return corpus, document_embeddings, label_dict, loss_weights
--- a/model/classify/views/flair_classify/data/__init__.py
+++ b/model/classify/views/flair_classify/data/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 11:43
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/classify/views/logistic_classify/LogisticClassifyConfig.py
+++ b/model/classify/views/logistic_classify/LogisticClassifyConfig.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:06
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyConfig.py
+# @Software: PyCharm
+from model.base.views.config import BaseConfig
+class LogisticClassifyConfig(BaseConfig):
+    def __init__(self):
+        super().__init__()
--- a/model/classify/views/logistic_classify/LogisticClassifyEvaluator.py
+++ b/model/classify/views/logistic_classify/LogisticClassifyEvaluator.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/14 14:14
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyEvaluator.py
+# @Software: PyCharm
+from model.base.views.evaluator.BaseEvaluator import BaseEvaluator
+class LogisticClassifyEvaluator(BaseEvaluator):
+    def __init__(self, config_path):
+        super().__init__(config_path)
--- a/model/classify/views/logistic_classify/LogisticClassifyModel.py
+++ b/model/classify/views/logistic_classify/LogisticClassifyModel.py
--- a/model/classify/views/logistic_classify/LogisticClassifyRunner.py
+++ b/model/classify/views/logistic_classify/LogisticClassifyRunner.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:33
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyRunner.py
+# @Software: PyCharm
+from model.base.views.runner.BaseRunner import BaseRunner
+from model.classify.views.logistic_classify.data.LogisticClassifyProcess import LogisticClassifyProcess
+from model.classify.views.logistic_classify.LogisticClassifyModel import LogisticClassifyModel
+from model.classify.views.logistic_classify.LogisticClassifyEvaluator import LogisticClassifyEvaluator
+class LogisticClassifyRunner(BaseRunner):
+    def __init__(self, config_path):
+        super().__init__(config_path)
+        self.lcp = LogisticClassifyProcess(config_path)
+        self.lcm = LogisticClassifyModel(config_path)
+        self.lce = LogisticClassifyEvaluator(config_path)
+    def train(self, logger):
+        tfidf_title, idf_title, labels = self.lcp.title_process(logger)
+        Threshold,Index_Retain_Predict_Title,Index_Delete_Title = self.lcm.building_model(
+            tfidf_title=tfidf_title,
+            labels=labels,
+            logger=logger
+        )
+        tfidf_content, idf_content = self.lcp.content_process(Index_Retain_Predict_Title)
+        threshold, Index_Retain_Predict_Content, Index_Delete_Content = self.lcm.building_model(
+            labels = labels,
+            tfidf_content=tfidf_content,
+            r=0.8,
+            logger=logger
+        )  # r可调节，训练最终在召回率低于r时终止。
+        return 'success'
+# if __name__ == '__main__':
+#     state = LogisticClassifyRunner().train()
+#     print(state)
--- a/model/classify/views/logistic_classify/__init__.py
+++ b/model/classify/views/logistic_classify/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/17 9:08
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/classify/views/logistic_classify/data/LogisticClassifyDataLoader.py
+++ b/model/classify/views/logistic_classify/data/LogisticClassifyDataLoader.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 14:53
+# @Author  : 程婷婷
+# @FileName: LogisticClassifyDataLoader.py
+# @Software: PyCharm
+from model.base.views.data.BaseDataLoader import BaseDataLoader
+class LogisticClassifyDataLoader(BaseDataLoader):
+    def __init__(self, config_path):
+        super().__init__(config_path)
--- a/model/classify/views/logistic_classify/data/LogisticClassifyProcess.py
+++ b/model/classify/views/logistic_classify/data/LogisticClassifyProcess.py
--- a/model/classify/views/logistic_classify/data/__init__.py
+++ b/model/classify/views/logistic_classify/data/__init__.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/1 14:47
+# @Author  : 程婷婷
+# @FileName: __init__.py.py
+# @Software: PyCharm
--- a/model/classify/views/textcnn_classify/TextcnnClassifyConfig.py
+++ b/model/classify/views/textcnn_classify/TextcnnClassifyConfig.py
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Time    : 2021/5/11 20:06
+# @Author  : 程婷婷
+# @FileName: XgboostClassifyConfig.py
+# @Software: PyCharm
+from model.base.views.config import BaseConfig
+class TextcnnConfig(BaseConfig):
+    def __init__(self):
+        super().__init__()
--- a/model/classify/views/textcnn_classify/TextcnnClassifyEvaluator.py
+++ b/model/classify/views/textcnn_classify/TextcnnClassifyEvaluator.py
--- a/model/classify/views/textcnn_classify/TextcnnClassifyModel.py
+++ b/model/classify/views/textcnn_classify/TextcnnClassifyModel.py
--- a/model/classify/views/textcnn_classify/TextcnnClassifyRunner.py
+++ b/model/classify/views/textcnn_classify/TextcnnClassifyRunner.py
--- a/model/classify/views/textcnn_classify/__init__.py
+++ b/model/classify/views/textcnn_classify/__init__.py
--- a/model/classify/views/textcnn_classify/data/TextcnnClassifyDataLoader.py
+++ b/model/classify/views/textcnn_classify/data/TextcnnClassifyDataLoader.py
--- a/model/classify/views/textcnn_classify/data/TextcnnClassifyProcess.py
+++ b/model/classify/views/textcnn_classify/data/TextcnnClassifyProcess.py
--- a/model/classify/views/textcnn_classify/data/__init__.py
+++ b/model/classify/views/textcnn_classify/data/__init__.py
--- a/model/classify/views/views.py
+++ b/model/classify/views/views.py
--- a/model/classify/views/xgboost_classify/XgboostClassifyConfig.py
+++ b/model/classify/views/xgboost_classify/XgboostClassifyConfig.py
--- a/model/classify/views/xgboost_classify/XgboostClassifyEvaluator.py
+++ b/model/classify/views/xgboost_classify/XgboostClassifyEvaluator.py
--- a/model/classify/views/xgboost_classify/XgboostClassifyModel.py
+++ b/model/classify/views/xgboost_classify/XgboostClassifyModel.py
--- a/model/classify/views/xgboost_classify/XgboostClassifyRunner.py
+++ b/model/classify/views/xgboost_classify/XgboostClassifyRunner.py
--- a/model/classify/views/xgboost_classify/__init__.py
+++ b/model/classify/views/xgboost_classify/__init__.py
--- a/model/classify/views/xgboost_classify/data/XgboostClassifyProcess.py
+++ b/model/classify/views/xgboost_classify/data/XgboostClassifyProcess.py
--- a/model/classify/views/xgboost_classify/data/XgoostClassifyDataLoader.py
+++ b/model/classify/views/xgboost_classify/data/XgoostClassifyDataLoader.py
--- a/model/classify/views/xgboost_classify/data/__init__.py
+++ b/model/classify/views/xgboost_classify/data/__init__.py
--- a/model/clustering/__init__.py
+++ b/model/clustering/__init__.py
--- a/model/clustering/admin.py
+++ b/model/clustering/admin.py
--- a/model/clustering/apps.py
+++ b/model/clustering/apps.py
--- a/model/clustering/migrations/__init__.py
+++ b/model/clustering/migrations/__init__.py
--- a/model/clustering/models.py
+++ b/model/clustering/models.py
--- a/model/clustering/tests.py
+++ b/model/clustering/tests.py
--- a/model/clustering/urls.py
+++ b/model/clustering/urls.py
--- a/model/clustering/views/KMeans/KMeansConfig.py
+++ b/model/clustering/views/KMeans/KMeansConfig.py
--- a/model/clustering/views/KMeans/KmeansEvaluator.py
+++ b/model/clustering/views/KMeans/KmeansEvaluator.py
--- a/model/clustering/views/KMeans/KmeansModel.py
+++ b/model/clustering/views/KMeans/KmeansModel.py
--- a/model/clustering/views/KMeans/KmeansRunner.py
+++ b/model/clustering/views/KMeans/KmeansRunner.py
--- a/model/clustering/views/KMeans/__init__.py
+++ b/model/clustering/views/KMeans/__init__.py
--- a/model/clustering/views/KMeans/data/KMeansDataLoader.py
+++ b/model/clustering/views/KMeans/data/KMeansDataLoader.py
--- a/model/clustering/views/KMeans/data/KmeansProcess.py
+++ b/model/clustering/views/KMeans/data/KmeansProcess.py
--- a/model/clustering/views/KMeans/data/__init__.py
+++ b/model/clustering/views/KMeans/data/__init__.py
--- a/model/clustering/views/__init__.py
+++ b/model/clustering/views/__init__.py
--- a/model/clustering/views/views.py
+++ b/model/clustering/views/views.py
--- a/platform_zzsn.md
+++ b/platform_zzsn.md
--- a/platform_zzsn/__init__.py
+++ b/platform_zzsn/__init__.py
--- a/platform_zzsn/settings.py
+++ b/platform_zzsn/settings.py
--- a/platform_zzsn/urls.py
+++ b/platform_zzsn/urls.py
--- a/platform_zzsn/wsgi.py
+++ b/platform_zzsn/wsgi.py
--- a/scenario_service/__init__.py
+++ b/scenario_service/__init__.py
--- a/scenario_service/admin.py
+++ b/scenario_service/admin.py
--- a/scenario_service/apps.py
+++ b/scenario_service/apps.py
--- a/scenario_service/migrations/__init__.py
+++ b/scenario_service/migrations/__init__.py
--- a/scenario_service/models.py
+++ b/scenario_service/models.py
--- a/scenario_service/tests.py
+++ b/scenario_service/tests.py
--- a/scenario_service/urls.py
+++ b/scenario_service/urls.py
--- a/scenario_service/views/__init__.py
+++ b/scenario_service/views/__init__.py
--- a/scenario_service/views/cv_tfidf.py
+++ b/scenario_service/views/cv_tfidf.py
--- a/scenario_service/views/positive_negative_judgment_base_emotion_words.py
+++ b/scenario_service/views/positive_negative_judgment_base_emotion_words.py
--- a/scenario_service/views/scenario.py
+++ b/scenario_service/views/scenario.py
--- a/scenario_service/views/views.py
+++ b/scenario_service/views/views.py
--- a/scenario_service/views/word_count.py
+++ b/scenario_service/views/word_count.py
--- a/static/base/config.yaml
+++ b/static/base/config.yaml
--- a/static/base/sentiment_dict/中文金融词典/test.py
+++ b/static/base/sentiment_dict/中文金融词典/test.py
--- a/static/base/sentiment_dict/中文金融词典/说明.md
+++ b/static/base/sentiment_dict/中文金融词典/说明.md
--- a/static/common/config_data/fasttext.yaml
+++ b/static/common/config_data/fasttext.yaml
--- a/static/common/config_data/flair.yaml
+++ b/static/common/config_data/flair.yaml
--- a/static/common/config_data/kmeans.yaml
+++ b/static/common/config_data/kmeans.yaml
--- a/static/common/config_data/logistic.yaml
+++ b/static/common/config_data/logistic.yaml
--- a/static/common/config_data/textcnn.yaml
+++ b/static/common/config_data/textcnn.yaml
--- a/static/common/config_data/xgboost.yaml
+++ b/static/common/config_data/xgboost.yaml