提交 c4e5365c 作者: ctt

自然语言平台版本V1.0

上级
nohup python manage.py runserver --noreload 0.0.0.0:7004 >> app.log 2>&1 &
from django.contrib import admin
# Register your models here.
from django.apps import AppConfig
class BaseConfig(AppConfig):
name = 'base'
from django.db import models
from datetime import datetime
# Create your models here.
class User(models.Model):
username = models.CharField(max_length=30, unique=True)
true_name = models.CharField(max_length=30)
sex = models.CharField(max_length=2)
mobile_number = models.CharField(max_length=20)
mail = models.CharField(max_length=20)
id_card = models.CharField(max_length=20)
password = models.CharField(max_length=40)
account_number = models.CharField(max_length=20)
def toDict(self):
return {'id':self.id,
'username':self.username,
'true_name':self.true_name,
'sex':self.sex,
'mobile_number':self.mobile_number,
'mail':self.mail,
'id_card':self.id_card,
'password':self.password,
'account_number':self.account_number,
# 'update_at':self.update_at.strftime('%Y-%m-%d %H:%M:%S')
}
class Meta:
db_table = 'user'
class ServiceManage(models.Model):
name = models.CharField(max_length=15)
username = models.CharField(max_length=30)
filenames = models.CharField(max_length=200)
create_date = models.DateTimeField(default=None)
end_date = models.DateTimeField(default=None)
state = models.CharField(max_length=10)
path = models.CharField(max_length=20)
def toDict(self):
return {'name': self.name,
'username': self.username,
'filenames': self.filenames,
'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
'end_date': self.end_date.strftime('%Y-%m-%d %H:%M:%S'),
'state': self.state,
'path': self.path,
}
class Meta:
db_table = 'service_manage'
class SubjectManage(models.Model):
sid = models.CharField(max_length=10, unique=True)
name = models.CharField(max_length=30)
def toDict(self):
return {'sid': self.sid,
'name': self.name,
}
class Meta:
db_table = 'subject_manage'
class ModelManage(models.Model):
task_name = models.CharField(max_length=30)
function_type = models.CharField(max_length=20)
model_type = models.CharField(max_length=20)
version_num = models.IntegerField()
create_date = models.DateTimeField(default=None)
def toDict(self):
return {'id': self.id,
'task_name': self.task_name,
'function_type': self.function_type,
'model_type': self.model_type,
'version_num': self.version_num,
'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
}
class Meta:
db_table = 'model_manage'
class VersionManage(models.Model):
model = models.ForeignKey(ModelManage, related_name='version_model', on_delete=models.CASCADE)
version = models.CharField(max_length=20)
create_date = models.DateTimeField(default=None)
end_date = models.DateTimeField(default=None)
state = models.CharField(max_length=20)
creator = models.CharField(max_length=30)
path = models.CharField(max_length=20, unique=True)
def toDict(self):
return {'id': self.id,
'version': self.version,
'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
'end_date': self.end_date.strftime('%Y-%m-%d %H:%M:%S'),
'state': self.state,
'creator': self.creator,
'path': self.path,
}
class Meta:
db_table = 'version_manage'
\ No newline at end of file
from django.test import TestCase
# Create your tests here.
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/8/12 18:05
# @Author : 程婷婷
# @FileName: urls.py
# @Software: PyCharm
from django.urls import path
from base.views import views
from django.conf.urls import url
from base.views import views as base_views
urlpatterns = [
url(r'^register-account', base_views.register_account, name='register_account'),
url(r'^verify-username', base_views.verify_username, name='verify_username'),
url(r'^login', base_views.login, name='login'),
url(r'^reset-password', base_views.reset_password, name='reset_password'),
url(r'^show-config-file', base_views.show_config_file, name='show_config_file'),
url(r'^show-service-file', base_views.show_service_file, name='show_service_file'),
url(r'^delete-file-row-manage', base_views.delete_file_row_manage, name='delete_file_row_manage'),
url(r'^delete-file-row-service', base_views.delete_file_row_service, name='delete_file_row_service'),
url(r'^file-upload', base_views.file_upload, name='file_upload'),
url(r'^show-log-file', base_views.show_log_file, name='show_log_file'),
url(r'^validate-code', base_views.validate_code, name='validate_code'),
url(r'^download-zip', base_views.download_zip, name='download_zip'),
url(r'^download-xlsx', base_views.download_xlsx, name='download_xlsx'),
url(r'^query-manage', base_views.query_manage, name='query_manage'),
url(r'^forget-password', base_views.forget_password, name='forget_password'),
url(r'^train', base_views.run_train, name='train'),
url(r'^query-service-manage', base_views.query_service_manage, name='query_service_manage'),
url(r'^query-subject', base_views.query_subject, name='query_subject'),
url(r'^query-version', base_views.query_version, name='query_version'),
url(r'^query-task-name', base_views.query_task_name, name='query_task_name')
]
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/8/13 11:51
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/10 14:34
# @Author : 程婷婷
# @FileName: BaseConfig.py
# @Software: PyCharm
import yaml
class BaseConfig:
def __init__(self, config_path):
self._config_path = config_path
self._parsed_file = self.load_config()
def load_config(self):
print(self._config_path)
with open(self._config_path) as yaml_file:
parsed_file = yaml.load(yaml_file, Loader=yaml.FullLoader)
return parsed_file
# if __name__ == '__main__':
# bc = BaseConfig()
# print(bc._parsed_file)
# print(bc.load_config()['data_path'])
# print(bc.load_config()['embedding'])
# print(bc.load_config()['model'])
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 17:04
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/6/1 9:58
# @Author : 程婷婷
# @FileName: BaseDataLoader.py
# @Software: PyCharm
import pandas as pd
from base.views.config.BaseConfig import BaseConfig
class BaseDataLoader:
def __init__(self, config_path):
self.data_loader_config = BaseConfig(config_path)._parsed_file['data_loader']
def read_file(self):
symbol = self.data_loader_config['dataset_path'].split('.')[-1]
if (symbol == 'xlsx') or (symbol == 'xls'):
df = pd.read_excel(r''+self.data_loader_config['dataset_path'])
elif symbol == '.csv':
df = pd.read_csv(r''+self.data_loader_config['dataset_path'], sep='\t')
else:
print('数据类型错误')
return '数据类型错误'
df.drop_duplicates(subset='content', keep='first', inplace=True)
df.dropna(subset=['content', 'label'], inplace=True)
df = df.reset_index(drop=True)
print('=================执行正文去重和去空之后共有%d条数据=============' % len(df['content']))
return df
def read_stopwords(self):
# 读取停顿词列表
stopword_list = [k.strip() for k in open(self.data_loader_config['stopwords_path'], encoding='utf8').readlines() if
k.strip() != '']
return stopword_list
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/10 15:28
# @Author : 程婷婷
# @FileName: BaseDataProcess.py
# @Software: PyCharm
import re
import jieba
import pickle
import gensim
import logging
import numpy as np
import pandas as pd
from pyhanlp import *
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import mutual_info_classif, SelectPercentile
from base.views.config.BaseConfig import BaseConfig
from base.views.data.BaseDataLoader import BaseDataLoader
from platform_zzsn.settings import BASE_DIR
format = '%(asctime)s %(levelname)s %(pathname)s %(funcName)s %(message)s'
logging.basicConfig(format=format, level=logging.INFO)
class BaseDataProcess:
def __init__(self, config_path):
self.embedding_config = BaseConfig(config_path)._parsed_file['embedding']
self.process_config = BaseConfig(config_path)._parsed_file['data_process']
PerceptronLexicalAnalyzer = JClass('com.hankcs.hanlp.model.perceptron.PerceptronLexicalAnalyzer')
self.pla_segment = PerceptronLexicalAnalyzer()
self.bdl = BaseDataLoader(config_path)
def clean_content(self, content):
bs = BeautifulSoup(content, 'html.parser')
return bs.text
def remove_char(self, content):
# 保留中文、英语字母、数字和标点
graph_filter = re.compile(r'[^\u4e00-\u9fa5a-zA-Z0-9\s,。\.,?\?!!;;]')
content = graph_filter.sub('', content)
return content
def jieba_tokenizer(self, content):
if self.process_config['use_stopwords']:
stopwords = self.bdl.read_stopwords()
else:
stopwords = []
return ' '.join([word for word in jieba.lcut(content) if word not in stopwords])
def pla_tokenizer(self, content):
words = list(self.pla_segment.analyze(content).toWordArray())
if self.process_config['use_stopwords']:
stopwords = self.bdl.read_stopwords()
else:
stopwords = []
return ' '.join([word for word in words if word not in stopwords])
def save(self, voc, path):
with open(path, 'wb') as voc_file:
pickle.dump(voc, voc_file)
def process(self, data, min_content=0):
processed_data = []
for record in data:
record = self.clean_content(str(record))
record = self.remove_char(record)
if len(record) > min_content:
methods = self.process_config['tokenizer']
if methods == 'PerceptronLexicalAnalyzer':
record = self.pla_tokenizer(record)
record = [row.strip() for row in record if row.strip() != '']
else:
record = self.jieba_tokenizer(record)
record = [row.strip() for row in record if row.strip() != '']
processed_data.append(' '.join(record))
else:
pass
return processed_data
def split_dataset(self, data, use_dev):
if use_dev:
train_data_set, test_dev_set = train_test_split(data,
train_size=self.process_config['train_size'],
random_state=self.process_config['random_state'],
shuffle=True)
train_data_set, test_data_set, dev_data_set = train_test_split(test_dev_set,
test_size=self.process_config['test_size'],
random_state=self.process_config['random_state'],
shuffle=True)
print(len(train_data_set) + len(test_data_set) + len(dev_data_set))
return train_data_set, test_data_set, dev_data_set
else:
train_data_set, test_data_set = train_test_split(data,
train_size=self.process_config['train_size'],
random_state=self.process_config['random_state'],
shuffle=True)
return train_data_set, test_data_set
def bag_of_words(self, data, label):
vectorizer = CountVectorizer(ngram_range=(1, 1), min_df=5)
x = vectorizer.fit_transform(data)
transformer = TfidfTransformer(norm=self.embedding_config['norm'], use_idf=self.embedding_config['use_idf'],
smooth_idf=self.embedding_config['smooth_idf'])
x = transformer.fit_transform(x).toarray()
if self.embedding_config['with_feature_selection']:
transformed_data = SelectPercentile(mutual_info_classif, 20).fit_transform(x, label)
else:
transformed_data = x
os.makedirs(self.embedding_config['embedding_path'], exist_ok=True)
self.save(voc=vectorizer.vocabulary_, path=os.path.join(self.embedding_config['embedding_path'], 'tfidf.pkl'))
return transformed_data, vectorizer.get_feature_names()
def word2vec(self, data, feature_words):
model = gensim.models.word2vec.Word2Vec(sentences=data,
size=self.embedding_config['size'],
window=self.embedding_config['window'],
min_count=self.embedding_config['min_count'],
workers=self.embedding_config['workers'],
sg=self.embedding_config['sg'],
iter=self.embedding_config['iter'])
vocabulary_w2v = model.wv.vocab.keys()
count = 0
if self.embedding_config['use_Tencent']:
model_tencent = gensim.models.KeyedVectors.load_word2vec_format(
os.path.join(BASE_DIR, 'static/base/Tencent_AILab_ChineseEmbedding.bin'), binary=True)
vocabulary_tencent = model_tencent.wv.vocab.keys()
vector_matrix = np.zeros((len(feature_words), int(self.embedding_config['size']) + 200))
for word in feature_words:
if word in vocabulary_tencent:
vector_tencent = model_tencent.wv.word_vec(word)
else:
vector_tencent = np.random.randn(200)
if word in vocabulary_w2v:
vector_w2v = model.wv.word_vec(word)
else:
vector_w2v = np.random.randn(self.embedding_config['size'])
vector = np.concatenate((vector_tencent, vector_w2v))
vector_matrix[count] = vector
count += 1
else:
vector_matrix = np.zeros((len(feature_words), self.embedding_config['size']))
for word in feature_words:
if word in vocabulary_w2v:
vector_w2v = model.wv.word_vec(word)
else:
vector_w2v = np.random.randn(self.embedding_config['size'])
vector_matrix[count] = vector_w2v
count += 1
os.makedirs(self.embedding_config['embedding_path'], exist_ok=True)
model.save(os.path.join(self.embedding_config['embedding_path'], 'word2vec.model'))
return vector_matrix
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 17:04
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 16:30
# @Author : 程婷婷
# @FileName: BaseEvaluator.py
# @Software: PyCharm
from sklearn.metrics import precision_score, f1_score, recall_score, classification_report
import logging
from base.views.config.BaseConfig import BaseConfig
formats = '%(asctime)s %(levelname)s %(pathname)s %(funcName)s %(message)s'
logging.basicConfig(format=formats, level=logging.INFO)
class BaseEvaluator:
def __init__(self, config_path):
self.evaluate_config = BaseConfig(config_path)._parsed_file['evaluate']
def evaluate(self, y_true, y_pred, label_mapping, logger):
result = []
y_true = list(map(str, y_true))
y_pred = list(map(str, y_pred))
logger.info('模型评估结果如下:')
if not label_mapping:
result.append(classification_report(y_true, y_pred))
logger.info(classification_report(y_true, y_pred))
else:
for value in label_mapping.values():
print([k for k,v in label_mapping.items() if v == value])
p = precision_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
r = recall_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
f1 = f1_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
print({'value': value,'召回率为': r, '精确率为': p, 'F1': f1})
logger.info('标签为%s' % [k for k,v in label_mapping.items() if v == value][0])
logger.info('精确率为%.2f' %p)
logger.info('召回率为%.2f' %r)
logger.info('精确率为%.2f' %f1)
result.append(str({'label': value,'recall': r, 'precision': p, 'F1': f1}))
return ' '.join(result)
# y_true = [0, 1, 2, 0, 1, 2]
# y_pred = [0, 2, 1, 0, 0, 1]
# print(BaseEvaluator())
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 17:04
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
import os
import yaml
import random
import smtplib
from email.mime.text import MIMEText
from django.core.paginator import Paginator
from email.mime.multipart import MIMEMultipart
from PIL import Image,ImageFont,ImageDraw,ImageFilter
from base.models import ModelManage, ServiceManage, VersionManage
from platform_zzsn.settings import BASE_DIR
class Picture:
def __init__(self):
self.size = (240,60)
self.mode='RGB'
self.color='white'
self.font = ImageFont.truetype(os.path.join(BASE_DIR,
'static/common/font/arial.ttf'), 36) #设置字体大小
def randChar(self):
basic='23456789abcdefghijklmnpqrstwxyzABCDEFGHIJKLMNPQRSTWXYZ'
return basic[random.randint(0,len(basic)-1)] #随机字符
def randBdColor(self):
return (random.randint(64,255),random.randint(64,255),random.randint(64,255)) #背景
def randTextColor(self):
return (random.randint(32, 127), random.randint(32, 127), random.randint(32, 127)) #随机颜色
def proPicture(self):
new_image=Image.new(self.mode,self.size,self.color) #创建新图像有三个默认参数:尺寸,颜色,模式
drawObject=ImageDraw.Draw(new_image) #创建一个可以对image操作的对象
line_num = random.randint(4,6) # 干扰线条数
for i in range(line_num):
#size=(240,60)
begin = (random.randint(0, self.size[0]), random.randint(0, self.size[1]))
end = (random.randint(0, self.size[0]), random.randint(0, self.size[1]))
drawObject.line([begin, end], self.randTextColor())
for x in range(240):
for y in range(60):
tmp = random.randint(0,50)
if tmp>30: #调整干扰点数量
drawObject.point((x,y),self.randBdColor())
randchar=''
for i in range(5):
rand=self.randChar()
randchar+=rand
drawObject.text([50*i+10,10],rand,self.randTextColor(),font=self.font) #写入字符
new_image = new_image.filter(ImageFilter.SHARPEN) # 滤镜
return new_image,randchar
def update_config_file(config_path, config_file):
data = yaml.load(config_file, Loader=yaml.FullLoader)
data['data_loader'] = {}
model_path = data['model']['model_path']
model_name = data['model']['model_name']
if data['model']['model_path']:
data['model']['model_path'] = os.path.join(config_path, model_path)
else:
data['model']['model_path'] = os.path.join(config_path, model_name)
print(data['model']['model_path'])
embedding_path = data['embedding']['embedding_path']
if embedding_path:
data['embedding']['embedding_path'] = os.path.join(config_path, data['embedding']['embedding_path'])
else:
if data['embedding']['name']:
data['embedding']['embedding_path'] = os.path.join(config_path, data['embedding']['name'])
tokenizer_path = data['embedding']['tokenizer_path']
if tokenizer_path:
data['embedding']['tokenizer_path'] = os.path.join(config_path, data['embedding']['tokenizer_path'])
try:
test_file_path = data['data_process']['test_file_path']
train_file_path = data['data_process']['train_file_path']
except KeyError:
pass
else:
data['data_process']['test_file_path'] = os.path.join(config_path, test_file_path)
data['data_process']['train_file_path'] = os.path.join(config_path, train_file_path)
for file in os.listdir(config_path):
if ('.xls' == file[-4:]) or ('.xlsx' == file[-5:]):
xlsx_path = os.path.join(config_path, file)
data['data_loader']['dataset_path'] = xlsx_path
if 'save_fname' in data['runner'].keys():
data['runner']['save_fpath'] = os.path.join(config_path, data['runner']['save_fname'])
data['data_loader']['stopwords_path'] = os.path.join(BASE_DIR, 'static/base/baidu_stopwords.txt')
file_path = os.path.join(config_path, 'config.yaml')
with open(file_path, 'w') as yaml_file:
yaml.safe_dump(data, yaml_file, default_flow_style=False)
return file_path
def select_manage(task_name, function_type, model_type, begin_cdate, end_cdate, page_size, current_page):
condition = {'task_name': task_name, 'function_type': function_type, 'model_type': model_type,
'create_date__range': (begin_cdate, end_cdate,)
}
del_keys = []
for key in condition.keys():
if not condition[key]:
del_keys.append(key)
if not condition['create_date__range'][0]:
del_keys.append('create_date__range')
for key in del_keys:
condition.pop(key)
managers = ModelManage.objects.filter(**condition).order_by('-create_date')
len_managers = len(managers)
page = Paginator(managers, page_size)
maxpages = page.num_pages # 最大页数
pIndex = int(current_page)
# 判断页数是否越界
if pIndex > maxpages:
pIndex = maxpages
manager_list = page.page(pIndex) # 当前页数据
return list(manager_list), len_managers
def select_version(model_id, begin_cdate, end_cdate, page_size, current_page):
condition = {'model_id': model_id,
'create_date__range': (begin_cdate, end_cdate,)
}
del_keys = []
if not condition['create_date__range'][0]:
del_keys.append('create_date__range')
for key in del_keys:
condition.pop(key)
versions = VersionManage.objects.filter(**condition).order_by('-create_date')
len_versions = len(versions)
page = Paginator(versions, page_size)
maxpages = page.num_pages # 最大页数
pIndex = int(current_page)
# 判断页数是否越界
if pIndex > maxpages:
pIndex = maxpages
version_list = page.page(pIndex) # 当前页数据
return list(version_list), len_versions
def select_service_manage(name, begin_cdate, end_cdate, state, username, page_size, current_page):
condition = {
'name': name,
'state': state,
'create_date__range': (begin_cdate, end_cdate),
'username': username,
}
del_keys = []
for key in condition.keys():
if not condition[key]:
del_keys.append(key)
if not condition['create_date__range'][0]:
del_keys.append('create_date__range')
for key in del_keys:
condition.pop(key)
print(condition)
service_managers = ServiceManage.objects.filter(**condition).order_by('-create_date')
len_service_managers = len(service_managers)
page = Paginator(service_managers, page_size)
maxpages = page.num_pages
pIndex = int(current_page)
# 判断页数是否越界
if pIndex > maxpages:
pIndex = maxpages
manager_list = page.page(pIndex) # 当前页数据
return list(manager_list), len_service_managers
def sendMail(user,pwd,sender,receiver,msg_title):
mail_host = "smtp.163.com" #163的SMTP服务器
message = MIMEMultipart('alternative')
#设置邮件的发送者
message["From"] = sender
#设置邮件的接收方
message["To"] = ",".join(receiver)
#4.设置邮件的标题
message["Subject"] = msg_title
# 添加plain格式的文本
# message.attach(MIMEText('您好,\n'
# ' 您当前的密码为%s, 为了保证您的账号安全,请尽快登陆重置您的密码'%msg_content, 'plain', 'utf-8'))
# 添加html内容
message.attach(MIMEText('<html>'
'<body>'
'<h1>Hello </h1><br> '
'<h3>To ensure the security of your account, please log in and reset your password as soon as possible.</h3>'
'<h2><a href="http://192.168.1.149:8020/reset_password/">点此重置</a></h2>'
'</body>'
'</html>', 'html', 'utf-8'))
#1.启用服务器发送邮件
smtpObj = smtplib.SMTP_SSL(mail_host,465)
#2.登录邮箱进行验证
smtpObj.login(user,pwd)
#3.发送邮件
#参数:发送方,接收方,邮件信息
smtpObj.sendmail(sender,receiver,message.as_string())
return True
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 16:29
# @Author : 程婷婷
# @FileName: BaseLoss.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 17:04
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 16:18
# @Author : 程婷婷
# @FileName: BaseModel.py
# @Software: PyCharm
from base.views.config.BaseConfig import BaseConfig
import os
import pickle
class BaseModel:
def __init__(self,config_path):
self.model_config = BaseConfig(config_path)._parsed_file['model']
def building_model(self, *params):
pass
def save(self, model):
dir = os.path.dirname(self.model_config['model_path'])
if not os.path.exists(dir):
os.makedirs(dir)
with open(self.model_config['model_path'], 'wb') as model_file:
pickle.dump(model, model_file)
def predict(self, model, X):
proba = model.predict_proba(X)
y_predict = model.predict(X)
return {'proba': proba, 'y_predict': y_predict}
\ No newline at end of file
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 17:04
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 16:36
# @Author : 程婷婷
# @FileName: BaseRunner.py
# @Software: PyCharm
from base.views.config.BaseConfig import BaseConfig
class BaseRunner:
def __init__(self,config_path):
self.runner_config = BaseConfig(config_path)._parsed_file['runner']
def train(self, logger):
pass
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 17:04
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 9:24
# @Author : 程婷婷
# @FileName: test.py
# @Software: PyCharm
import jieba
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.datasets import load_digits
from sklearn.feature_selection import SelectPercentile, chi2
X, y = load_digits(return_X_y=True)
print(X.shape)
print(X[:10], y[:100])
X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)
print(X_new.shape)
print(X_new[:10])
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/8/20 16:58
# @Author : 程婷婷
# @FileName: token_authorize.py
# @Software: PyCharm
import jwt
import time
import functools
from jwt import exceptions
from django.http import JsonResponse
from platform_zzsn.settings import *
global SECRET_KEY
SECRET_KEY = SECRET_KEY
# 定义签名密钥,用于校验jwt的有效、合法性
def create_token(user):
'''基于jwt创建token的函数'''
headers = {
"alg": "HS256",
"typ": "JWT"
}
exp = int(time.time() + 3*60*60)
payload = {
"id": user.id,
"name": user.username,
"exp": exp
}
token = jwt.encode(payload=payload, key=SECRET_KEY, algorithm='HS256', headers=headers).decode('utf-8')
return token
def login_required(view_func):
@functools.wraps(view_func)
def validate_token(request, *args, **kwargs):
'''校验token的函数,校验通过则返回解码信息'''
payload = None
msg = None
try:
token = request.META.get("HTTP_AUTHORIZATION")
payload = jwt.decode(token, SECRET_KEY, True, algorithm='HS256')
print(payload)
return view_func(request, *args, **kwargs)
# jwt有效、合法性校验
except exceptions.ExpiredSignatureError:
return JsonResponse({
'handle_msg': 'failure',
'is_handle_success': False,
'logs': '登录已过期'
})
except jwt.DecodeError:
return JsonResponse({
'handle_msg': 'failure',
'is_handle_success': False,
'logs': '缺少参数token'
# token认证失败
})
except jwt.InvalidTokenError:
return JsonResponse({
'handle_msg': 'failure',
'is_handle_success': False,
'logs': '缺少参数token'
# 非法的token
})
return validate_token
\ No newline at end of file
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/8/9 11:19
# @Author : 程婷婷
# @FileName: utils.py
# @Software: PyCharm
import os
import re
import jieba
import zipfile
import pandas as pd
from docx import Document
from platform_zzsn.settings import *
def read_txt(path):
with open(path, 'r', encoding='utf8') as file:
lines = file.readlines()
return lines
def read_docx(pending_file, user_file):
jieba.load_userdict(user_file)
document = Document(pending_file)
doc_text_list = []
for para in document.paragraphs:
para_text = re.sub(r'\s', '', para.text)
if para_text:
doc_text_list.append(para_text)
return doc_text_list
def read_excel(pending_file, user_file):
jieba.load_userdict(user_file)
doc_text_list = pd.read_excel(pending_file)['content']
doc_text_list.dropna(inplace=True)
return doc_text_list
def merge_para(paras):
new_paras = []
for i, para in enumerate(paras):
if not new_paras:
new_paras.append(para)
elif (len(new_paras[-1]) < 500):
new_paras[-1] += para
else:
new_paras.append(para)
return new_paras
def filter_stopwords(para):
path = os.path.join(BASE_DIR, 'static/base/baidu_stopwords.txt')
stopword_list = [k.strip() for k in read_txt(path) if
k.strip() != '']
words = [word for word in jieba.lcut(para) if word not in stopword_list]
return words
# 获取列表的第二个元素
def takeSecond(elem):
return elem[1]
def takeFirst_len(elem):
return len(elem[0])
def make_zip(file_dir: str, zip_path: str) -> None:
zip_f = zipfile.ZipFile(zip_path, 'w')
pre_len = len(os.path.dirname(file_dir))
for parent, dir_names, filenames in os.walk(file_dir):
for filename in filenames:
path_file = os.path.join(parent, filename)
arc_name = path_file[pre_len:].strip(os.path.sep)
zip_f.write(path_file, arc_name)
zip_f.close()
from django.contrib import admin
# Register your models here.
from django.apps import AppConfig
class BasicServiceConfig(AppConfig):
name = 'basic_service'
from django.db import models
# Create your models here.
#-*- coding:utf-8 -*-
from django.test import TestCase
# Create your tests here.
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/8/12 18:05
# @Author : 程婷婷
# @FileName: urls.py
# @Software: PyCharm
from django.urls import path
from django.conf.urls import url
from basic_service.views import views
urlpatterns = [
url(r'^ner_single', views.ner_single, name='ner_single'),
url(r'^doc-similarity-single', views.doc_similarity_single, name='doc_similarity_single'),
url(r'^associated-word-single', views.associated_word_single, name='associated_word_single'),
url(r'^word_cut', views.word_cut, name='word_cut'),
url(r'^word_pos', views.word_pos, name='word_pos'),
url(r'^new_word_find', views.new_word_find, name='new_word_find'),
url(r'^show_srl', views.show_srl, name='show_srl'),
url(r'^show_dep', views.show_dep, name='show_dep'),
url(r'^create_keywords', views.create_keywords, name='create_keywords'),
url(r'^get_summary', views.get_summary, name='get_summary'),
url(r'^word_co_occurrence', views.word_co_occurrence, name='word_co_occurrence')
]
\ No newline at end of file
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/8/13 10:02
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/8/12 19:54
# @Author : 程婷婷
# @FileName: basic.py
# @Software: PyCharm
import os
import jieba
import json
import requests
import jionlp as jio
from ltp import LTP
import jieba.analyse
import ahocorasick
import pandas as pd
from gensim.models.keyedvectors import KeyedVectors
from platform_zzsn.settings import BASE_DIR
from model.base.views import utils
General_dict = utils.read_txt(os.path.join(BASE_DIR, 'static/base/dict_sogou.txt'))
General_dict_ = ''
for key in General_dict:
General_dict_ += ' ' + str(key.strip())
def word_cut(text):
ltp = LTP()
sentences = ltp.sent_split([text])
segment, _ = ltp.seg(sentences)
return segment
def word_pos(text):
ltp = LTP()
sentences = ltp.sent_split([text])
segment, hidden = ltp.seg(sentences)
pos = ltp.pos(hidden)
return segment, pos
class AC_Unicode:
"""稍微封装一下,弄个支持unicode的AC自动机
"""
def __init__(self):
self.ac = ahocorasick.Automaton()
def add_word(self, k, v):
# k = k.encode('utf-8')
return self.ac.add_word(k, v)
def make_automaton(self):
return self.ac.make_automaton()
def iter(self, s):
# 搜索文本中存在的单词
# s = s.encode('utf-8')
return self.ac.iter(s)
def new_words_find(text):
words = list(jieba.cut(text, HMM=True))
words_copy = words.copy()
ac = AC_Unicode()
sign = [0] * len(words_copy)
for word in words:
if len(word) >= 2:
ac.add_word(word, word)
ac.make_automaton()
result_ac = ac.iter(General_dict_)
for index, key in result_ac:
try:
words.remove(key)
except:
continue
for index, word in enumerate(words_copy):
if (len(word) >= 2) and (word in words):
sign[index] = 1
return words_copy, sign
def show_srl(text):
ltp = LTP()
sentences = ltp.sent_split([text])
sentences_srl_dict, sentences_seg_dict = {}, {}
for i, sentence in enumerate(sentences):
seg, hidden = ltp.seg([sentence])
srl = ltp.srl(hidden, keep_empty=False)
sentences_seg_dict['句子' + str(i+1)+':'+str(sentence)] = seg[0]
sentences_srl_dict['句子'+str(i+1)+':'+str(sentence)] = srl[0]
return sentences_seg_dict, sentences_srl_dict
def show_dep(text):
ltp = LTP()
sentences = ltp.sent_split([text])
sentences_dep_dict, sentences_seg_dict = {}, {}
for i, sentence in enumerate(sentences):
seg, hidden = ltp.seg([sentence])
dep = ltp.dep(hidden)
sentences_seg_dict['句子'+str(i+1)+':'+str(sentence)] = seg[0]
sentences_dep_dict['句子'+str(i+1)+':'+str(sentence)] = dep[0]
return sentences_seg_dict, sentences_dep_dict
def create_keywords(text:str, topK:int, with_weight:bool)->list:
print(type(topK))
keywords = jio.keyphrase.extract_keyphrase(text, top_k=topK, with_weight=with_weight)
print(keywords)
return keywords
def ner(text):
ltp = LTP()
seg, hidden = ltp.seg([text])
entity = ltp.ner(hidden)
return seg[0], entity[0]
def related_word_recommendation(words, word_num):
# print(model.wv.most_similar(words))
# print(words.split(','), word_num)
print(words)
result = model.most_similar_cosmul(words.split(','), topn=int(word_num)) # 余弦相似度
print(result)
return result
def post_similarity(url, text_1, text_2, sim_algorithm_name):
payload = {'text_1': text_1, 'text_2': text_2, 'sim_algorithm_name': sim_algorithm_name}
headers = {
'Content-Type': 'application/json'
}
response = requests.request('POST', url, headers=headers, data=json.dumps(payload))
data = json.loads(response.text)
return data
def summary(text, summary_length):
summaries = jio.summary.extract_summary(text, summary_length)
return summaries
# zh_nlp = stanza.Pipeline('zh-hans')
# en_nlp = stanza.Pipeline('en')
# nlp_dict = {'zh': zh_nlp, 'en': en_nlp}
#model = KeyedVectors.load_word2vec_format(os.path.join(BASE_DIR, 'static/base/Tencent_AILab_ChineseEmbedding.bin'), binary=True)
# if __name__ == '__main__':
# print(word_cut('汤姆生病了。他去了医院。'))
# print(word_pos('汤姆生病了。他去了医院。'))
# print(new_words_find('白月光,形容的是一种可望不可即的人或者事物,虽然一直在心上,却从不在身边。'))
# print(new_words_find('爷青回,表示爷的青春又回来了,爷表示的是自己,将自己的身份地位抬高一个档次,像我是你大爷一样,通常用来形容那些知名的人、经典的动画、影视、游戏剧等重新复出或者是回归。'))
# show_srl('他叫汤姆去拿外衣。')
# print(show_dep('他叫汤姆去拿外衣。'))
# -*- coding: utf-8 -*-
# @Time : 2021/10/13 17:07
# @Author : ctt
# @File : co
# @Project : platform_zzsn
from basic_service.views.basic import create_keywords
import pandas as pd
import numpy as np
def Get_file_keywords(filepath, topK):
data_array = [] # 每篇文章关键词的二维数组
set_word = [] # 所有关键词的集合
df = pd.read_excel(filepath)
sentences = df['内容'].tolist()
for sentence in sentences:
words = create_keywords(sentence, topK=topK, with_weight=False)
data_array.append(str(words))
for word in words:
if word not in set_word:
set_word.append(str(word))
set_word = list(set(set_word)) # 所有关键词的集合
return data_array, set_word
# 初始化矩阵
def build_matirx(set_word):
edge = len(set_word) + 1 # 建立矩阵,矩阵的高度和宽度为关键词集合的长度+1
matrix = [[''] * edge] * edge # 初始化矩阵
# print(matrix.shape)
print(matrix)
print(set_word)
matrix[0][1:] = np.array(set_word)
print(matrix)
matrix = list(map(list, zip(*matrix)))
print(set_word)
matrix[0][1:] = np.array(set_word) # 赋值矩阵的第一行与第一列
return matrix
# 计算各个关键词的共现次数
def count_matrix(matrix, formated_data):
for row in range(1, len(matrix)):
# 遍历矩阵第一行,跳过下标为0的元素
for col in range(1, len(matrix)):
# 遍历矩阵第一列,跳过下标为0的元素
# 实际上就是为了跳过matrix中下标为[0][0]的元素,因为[0][0]为空,不为关键词
if matrix[0][row] == matrix[col][0]:
# 如果取出的行关键词和取出的列关键词相同,则其对应的共现次数为0,即矩阵对角线为0
matrix[col][row] = str(0)
else:
counter = 0 # 初始化计数器
for ech in formated_data:
# 遍历格式化后的原始数据,让取出的行关键词和取出的列关键词进行组合,
# 再放到每条原始数据中查询
if matrix[0][row] in ech and matrix[col][0] in ech:
counter += 1
else:
continue
matrix[col][row] = str(counter)
return matrix
def main(filepath, topK):
formated_data, set_word = Get_file_keywords(filepath, topK)
matrix = build_matirx(set_word)
matrix = count_matrix(matrix, formated_data)
# data = pd.DataFrame(matrix)
return matrix
\ No newline at end of file
import re
import pandas as pd
from collections import defaultdict, Counter
import numpy as np
import ahocorasick
import math
def read_text(file_articles, encoding='utf8'):
texts = set()
with open(file_articles, encoding=encoding) as f:
for line in f.readlines():
line = re.split(u'[^\u4e00-\u9fa50-9a-zA-Z]+', line)
for s in line:
if len(s) > 1:
texts.add(s)
print('文章数(即文本行数):{}'.format(len(texts)))
return texts
def get_ngrams_counts(texts, n, min_count):
'''
返回ngrams出现的频数
:param n: gram个数
:param min_count: 最小出现次数,小于该值抛弃
:return:
'''
ngrams = defaultdict(int)
for t in list(texts):
for i in range(len(t)):
for j in range(1, n+1):
if i+j <= len(t):
ngrams[t[i:i+j]] += 1
ngrams = {i:j for i,j in ngrams.items() if j >= min_count}
total = 1.*sum([j for i,j in ngrams.items() if len(i) == 1])
print('字数:{}'.format(total))
return ngrams, total
def filter_with_porba(s, min_proba, total, ngrams):
'''
统计凝固度,并根据阈值抛弃一定数量的词
:param s:
:param min_proba:
:return:
'''
if len(s) >= 2:
score = min([total*ngrams[s]/(ngrams[s[:i+1]]*ngrams[s[i+1:]]) for i in range(len(s)-1)])
if score > min_proba[len(s)]:
return True
else:
return False
def cut(s, n, ngrams):
'''
使用ngrams切分文本:采取宁愿不切,也不切错的原则
:param s: 一段文本
:param ngrams: 筛选过后的gram集合
:return:
'''
# 统计文本每个长度大于2的子串在G中出现的次数
r = np.array([0]*(len(s)-1)) # 大于2的片段频数统计
for i in range(len(s)-1):
for j in range(2, n+1):
if s[i:i+j] in ngrams:
r[i:i+j-1] += 1
# 切分方法:只要有一个子串在G中,就不切分。只有当r中的统计次数为0时才切分一次。
w = [s[0]]
for i in range(1, len(s)):
if r[i-1] > 0:
w[-1] += s[i]
else:
w.append(s[i])
return w
def is_real(s, n, ngrams):
if len(s) >= 4:
for i in range(4, n+1):
for j in range(len(s)-i+1):
if s[j:j+i] not in ngrams:
return False
return True
else:
return True
def cal_entropy(dict_gram,key):
'''
计算gram的边界熵,分别计算左边界和右边界
:param dict_gram:
:param key:
:return:
'''
left = dict_gram['left']
if len(set(left)) ==1 and left[0] ==' ' :
entropy_left = -1 # 如果左边界为空,则将其设置为-1
else:
list_left = list(Counter(left).values())
sum_left = sum(list_left)
entropy_left = sum([-(i / sum_left) * math.log(i / sum_left) for i in list_left])
right = dict_gram['right']
if len(set(right)) ==1 and right[0] ==' ' :
entropy_right = -1 # 如果右边界为空,则将其设置为-1
else:
list_right = list(Counter(right).values())
sum_right = sum(list_right)
entropy_right = sum([ -(i/sum_right)*math.log(i/sum_right) for i in list_right])
if entropy_left==-1 and entropy_right==-1:
entropy =-2 # 如果左右边界熵都为空,将其设置为-2
else:
entropy = min(entropy_left, entropy_right)
return entropy
class AC_Unicode:
"""稍微封装一下,弄个支持unicode的AC自动机
"""
def __init__(self):
self.ac = ahocorasick.Automaton()
def add_word(self, k, v):
# k = k.encode('utf-8')
return self.ac.add_word(k, v)
def make_automaton(self):
return self.ac.make_automaton()
def iter(self, s):
# 搜索文本中存在的单词
# s = s.encode('utf-8')
return self.ac.iter(s)
def get_ngrams_neighbor_ac(texts, w):
'''
返回ngrams出现的左右相邻的字, 将所有文本拼接成一行,利用AC自动机一次匹配所有词
根据匹配结果获取该词的左右字,从而计算边界熵
'''
neighbors = {}
text_line = ''
for line in texts:
text_line += ' '+ line
print('构建AC自动机...')
ac = AC_Unicode()
for gram in w.keys():
if len(gram)>1:
ac.add_word(gram, gram)
ac.make_automaton()
result_ac = ac.iter(text_line)
print('迭代匹配结果...')
for item in result_ac:
index, key = item
if key not in neighbors.keys():
neighbors[key] = {'left':[], 'right':[]}
else:
index_left = index-len(key) + 1
if index_left-1 >= 0:
neighbors[key]['left'].append(text_line[index_left-1 : index_left])
index_right = index
if index_left-1 <= len(text_line):
neighbors[key]['right'].append(text_line[index_right+1 : index_right+2])
print('计算边界熵...')
ngrams_entropy = defaultdict(int)
for key in neighbors.keys():
entropy = cal_entropy(neighbors[key], key)
ngrams_entropy[key] = entropy
return ngrams_entropy
def remove_general_words_ac(dict_general_words, ws):
'''
根据常用词词典移除常用词,将常用词典拼成长文本
利用AC自动机匹配出现在长文本中词,并将其删除
:param dict_general_words:
:param ws:
:return:
'''
print('移除常用词...')
ac = AC_Unicode()
for gram in ws.keys():
if len(gram)>1:
ac.add_word(gram, gram)
General_dict = pd.read_csv(dict_general_words)
General_dict = list(General_dict['0'].values)
General_dict_ = ''
for key in General_dict:
General_dict_ += ' ' + str(key)
ac.make_automaton()
result_ac = ac.iter(General_dict_)
for index, key in result_ac:
try:
del ws[key]
except: continue
final_w = sorted(ws.items(), key=lambda item: item[1],reverse=True)
return final_w
def get_new_words( file_in, file_dict, file_out, min_count, min_proba):
'''
获取新词
:param file_in: 按行存储的输入文档,每行可以看做一篇文章,utf8编码
:param file_dict: 常用词词典,每行一个词
:param file_out: 输出文件,每行一个词,和其对应的边界熵,按边界熵从打到小排列,gbk编码
:param min_count: ngrams最小出现次数
:param min_proba: 不同长度的词对应的最小凝固度阈值字典,这里输入长度为2,3,4的即可
:return:
'''
import time
import pandas as pd
start = time.time()
n = 4 # 默认ngrams中的n为4
df = pd.read_excel(file_in)['摘要'] # 读取数据
df.dropna(inplace=True)
texts = []
for text in df:
if len(str(text)) > 10:
print(text)
texts.append(''.join(text.split()))
ngrams, total = get_ngrams_counts(texts, n, min_count) # 获取ngrams
ngrams_filter = set(i for i, j in ngrams.items() if filter_with_porba(i, min_proba, total, ngrams)) # 计算凝固度,并根据阈值过滤ngrams
# 根据ngrams分词
words = defaultdict(int)
for t in texts:
for i in cut(t, n, ngrams_filter):
words[i] += 1
w = {i: j for i, j in words.items() if j >= min_count} # 根据阈值筛选出出现频率较高的词
# 注意此时的words和ngrams_filter,也就是凝固度集合,鄙视完全重合的。因为会分出来ngrams中没有的词。
# w = {i: j for i, j in words.items() if is_real(i, n, ngrams_filter)}
print('凝固度筛选词的长度:{}'.format(len(w)))
ws = get_ngrams_neighbor_ac(texts, w) # 按边界熵大小排序
final_w = remove_general_words_ac(file_dict, ws)
punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~“”?,!【】()、。:;’‘……¥·↓/"""
count_num = 0
with open(file_out, 'w', encoding='utf-8') as writer:
for value in final_w:
word = value[0]
sign = 0
for i in word:
if i in punctuation:
sign = 1
break
print(sign)
if (len(word) >= 2) and (sign==0):
writer.write('{},{}\n'.format(word, value[1]))
count_num += 1
end = time.time()
print('新词个数:{}'.format(count_num))
print('花费时间:{}分钟'.format(round((end - start) / 60, 2)))
if __name__ == '__main__':
min_count = 1
min_proba = {2: 500, 3: 1000, 4: 500}
file_in = r'D:\临时工作\临时工作代码\企业资讯八方面-附关键词\风险管理.xlsx' # utf8
file_dict = './dict_sogou_vec.txt' # utf8
file_out = './find_words_.csv' # gbk
# import pdfplumber
#
# file_path = r'C:\xxxx\practice.PDF'
#
# with pdfplumber.open(file_path) as pdf:
# page = pdf.pages[11]
# print(page.extract_text())
get_new_words(file_in, file_dict, file_out, min_count, min_proba)
from tkinter import _flatten
from django.http import JsonResponse
from django.views.decorators.http import require_POST
from basic_service.views import basic, co_occurrence
from model.base.views.token_authorize import *
import shutil
UPLOAD_FOLDER = '/home/zzsn/ctt/platform_zzsn/media/'
# Create your views here.
@require_POST
@login_required
def doc_similarity_single(request):
token = request.META.get("HTTP_AUTHORIZATION")
text_1 = request.POST['text_1']
text_2 = request.POST['text_2']
sim_algorithm_name = request.POST['sim_algorithm_name']
print(text_1)
print(text_2)
url = 'http://localhost:7005/doc_sim/calculate_similarity'
result = basic.post_similarity(url, text_1, text_2, sim_algorithm_name)
result['token'] = token
return JsonResponse(result)
@require_POST
@login_required
def ner_single(request):
token = request.META.get("HTTP_AUTHORIZATION")
text = request.POST['text']
words, entity = basic.ner(text)
return JsonResponse({
'token': token,
'handleMsg': 'success',
'isHandleSuccess': True,
'logs': '处理成功',
'resultData': {'words': words, 'pos': entity},
})
@require_POST
@login_required
def associated_word_single(request):
token = request.META.get("HTTP_AUTHORIZATION")
text = request.POST['text']
word_num = request.POST['word_num']
try:
related_words = basic.related_word_recommendation(text, word_num)
except Exception as e:
print(e)
return JsonResponse({
'token': token,
'handleMsg': 'failure',
'isHandleSuccess': False,
'logs': str(e),
'resultData': None,
})
else:
return JsonResponse({
'token': token,
'handleMsg': 'success',
'isHandleSuccess': True,
'logs': '处理成功',
'resultData': related_words,
})
@require_POST
@login_required
def word_cut(request):
token = request.META.get("HTTP_AUTHORIZATION")
text = request.POST['text']
words = basic.word_cut(text)
words = list(_flatten(words))
return JsonResponse({
'token': token,
'handleMsg': 'success',
'isHandleSuccess': True,
'logs': '处理成功',
'resultData': words,
})
@require_POST
@login_required
def word_pos(request):
token = request.META.get("HTTP_AUTHORIZATION")
text = request.POST['text']
words, pos = basic.word_pos(text)
words = list(_flatten(words))
pos = list(_flatten(pos))
return JsonResponse({
'token': token,
'handleMsg': 'success',
'isHandleSuccess': True,
'logs': '处理成功',
'resultData': {'words': words, 'pos': pos},
})
@require_POST
@login_required
def new_word_find(request):
token = request.META.get("HTTP_AUTHORIZATION")
text = request.POST['text']
words, sign = basic.new_words_find(text)
return JsonResponse({
'token': token,
'handleMsg': 'success',
'isHandleSuccess': True,
'logs': '处理成功',
'resultData': {'words': words, 'sign': sign},
})
@require_POST
@login_required
def show_srl(request):
token = request.META.get("HTTP_AUTHORIZATION")
text = request.POST['text']
words, srl = basic.show_srl(text)
return JsonResponse({
'token': token,
'handleMsg': 'success',
'isHandleSuccess': True,
'logs': '处理成功',
'resultData': {'words': words, 'srl': srl},
})
@require_POST
@login_required
def show_dep(request):
token = request.META.get("HTTP_AUTHORIZATION")
text = request.POST['text']
words, dep = basic.show_dep(text)
return JsonResponse({
'token': token,
'handleMsg': 'success',
'isHandleSuccess': True,
'logs': '处理成功',
'resultData': {'words': words, 'dep': dep},
})
@require_POST
@login_required
def create_keywords(request):
token = request.META.get("HTTP_AUTHORIZATION")
text = request.POST['text']
topK = int(request.POST['topK'])
with_weight = bool(request.POST['with_weight'])
key_words = basic.create_keywords(text=text, topK=topK, with_weight=with_weight)
return JsonResponse({
'token': token,
'handleMsg': 'success',
'isHandleSuccess': True,
'logs': '处理成功',
'resultData': {'keywords': key_words},
})
@require_POST
@login_required
def get_summary(request):
token = request.META.get("HTTP_AUTHORIZATION")
text = request.POST['text']
summary_length = request.POST['summary_length']
summaries = basic.summary(text, summary_length)
return JsonResponse({
'token': token,
'handleMsg': 'success',
'isHandleSuccess': True,
'logs': '处理成功',
'resultData': {'summaries': summaries},
})
@require_POST
@login_required
def word_co_occurrence(request):
token = request.META.get("HTTP_AUTHORIZATION")
path_timestamp = request.POST['path_timestamp']
pending_file = request.POST['pending_file']
path = os.path.join(UPLOAD_FOLDER, path_timestamp)
filepath = os.path.join(path, pending_file)
topK = int(request.POST['topK'])
word_matric = co_occurrence.main(filepath, topK)
if os.path.exists(path_timestamp):
shutil.rmtree(path_timestamp)
return JsonResponse({
'token': token,
'handleMsg': 'success',
'isHandleSuccess': True,
'logs': '处理成功',
'resultData': {'word_matric': word_matric},
})
#!/usr/bin/env python
"""Django's command-line utility for administrative tasks."""
import os
import sys
def main():
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'platform_zzsn.settings')
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
) from exc
execute_from_command_line(sys.argv)
if __name__ == '__main__':
main()
from model.base.views.config import BaseConfig
from model.base.views.data import BaseDataLoader
from model.base.views.data import BaseDataProcess
from model.base.views.evaluator import BaseEvaluator
from model.base.views.loss import BaseLoss
from model.base.views.model import BaseModel
from model.base.views.runner import BaseRunner
\ No newline at end of file
from django.contrib import admin
# Register your models here.
from django.apps import AppConfig
class BaseConfig(AppConfig):
name = 'base'
from django.db import models
from datetime import datetime
# Create your models here.
class User(models.Model):
username = models.CharField(max_length=30, unique=True)
true_name = models.CharField(max_length=30)
sex = models.CharField(max_length=2)
mobile_number = models.CharField(max_length=20)
mail = models.CharField(max_length=20)
id_card = models.CharField(max_length=20)
password = models.CharField(max_length=40)
account_number = models.CharField(max_length=20)
def toDict(self):
return {'id':self.id,
'username':self.username,
'true_name':self.true_name,
'sex':self.sex,
'mobile_number':self.mobile_number,
'mail':self.mail,
'id_card':self.id_card,
'password':self.password,
'account_number':self.account_number,
# 'update_at':self.update_at.strftime('%Y-%m-%d %H:%M:%S')
}
class Meta:
db_table = 'user'
class ServiceManage(models.Model):
name = models.CharField(max_length=15)
username = models.CharField(max_length=30)
filenames = models.CharField(max_length=200)
create_date = models.DateTimeField(default=None)
end_date = models.DateTimeField(default=None)
state = models.CharField(max_length=10)
path = models.CharField(max_length=20)
def toDict(self):
return {'name': self.name,
'username': self.username,
'filenames': self.filenames,
'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
'end_date': self.end_date.strftime('%Y-%m-%d %H:%M:%S'),
'state': self.state,
'path': self.path,
}
class Meta:
db_table = 'service_manage'
class SubjectManage(models.Model):
sid = models.CharField(max_length=10, unique=True)
name = models.CharField(max_length=30)
def toDict(self):
return {'sid': self.sid,
'name': self.name,
}
class Meta:
db_table = 'subject_manage'
class ModelManage(models.Model):
task_name = models.CharField(max_length=30)
function_type = models.CharField(max_length=20)
model_type = models.CharField(max_length=20)
version_num = models.IntegerField()
create_date = models.DateTimeField(default=None)
def toDict(self):
return {'id': self.id,
'task_name': self.task_name,
'function_type': self.function_type,
'model_type': self.model_type,
'version_num': self.version_num,
'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
}
class Meta:
db_table = 'model_manage'
class VersionManage(models.Model):
model = models.ForeignKey(ModelManage, related_name='version_model', on_delete=models.CASCADE)
version = models.CharField(max_length=20)
create_date = models.DateTimeField(default=None)
end_date = models.DateTimeField(default=None)
state = models.CharField(max_length=20)
creator = models.CharField(max_length=30)
path = models.CharField(max_length=20, unique=True)
def toDict(self):
return {'id': self.id,
'version': self.version,
'create_date': self.create_date.strftime('%Y-%m-%d %H:%M:%S'),
'end_date': self.end_date.strftime('%Y-%m-%d %H:%M:%S'),
'state': self.state,
'creator': self.creator,
'path': self.path,
}
class Meta:
db_table = 'version_manage'
\ No newline at end of file
from django.test import TestCase
# Create your tests here.
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/8/12 18:05
# @Author : 程婷婷
# @FileName: urls.py
# @Software: PyCharm
from model.base.views import views as base_views
from django.conf.urls import url
urlpatterns = [
url(r'^register-account', base_views.register_account, name='register_account'),
url(r'^verify-username', base_views.verify_username, name='verify_username'),
url(r'^login', base_views.login, name='login'),
url(r'^reset-password', base_views.reset_password, name='reset_password'),
url(r'^show-config-file', base_views.show_config_file, name='show_config_file'),
url(r'^show-service-file', base_views.show_service_file, name='show_service_file'),
url(r'^delete-file-row-manage', base_views.delete_file_row_manage, name='delete_file_row_manage'),
url(r'^delete-file-row-service', base_views.delete_file_row_service, name='delete_file_row_service'),
url(r'^file-upload', base_views.file_upload, name='file_upload'),
url(r'^show-log-file', base_views.show_log_file, name='show_log_file'),
url(r'^validate-code', base_views.validate_code, name='validate_code'),
url(r'^download-zip', base_views.download_zip, name='download_zip'),
url(r'^download-xlsx', base_views.download_xlsx, name='download_xlsx'),
url(r'^query-manage', base_views.query_manage, name='query_manage'),
url(r'^forget-password', base_views.forget_password, name='forget_password'),
url(r'^train', base_views.run_train, name='train'),
url(r'^query-service-manage', base_views.query_service_manage, name='query_service_manage'),
url(r'^query-subject', base_views.query_subject, name='query_subject'),
url(r'^query-version', base_views.query_version, name='query_version'),
url(r'^query-task-name', base_views.query_task_name, name='query_task_name')
]
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/8/13 11:51
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/10 14:34
# @Author : 程婷婷
# @FileName: BaseConfig.py
# @Software: PyCharm
import yaml
class BaseConfig:
def __init__(self, config_path):
self._config_path = config_path
self._parsed_file = self.load_config()
def load_config(self):
print(self._config_path)
with open(self._config_path) as yaml_file:
parsed_file = yaml.load(yaml_file, Loader=yaml.FullLoader)
return parsed_file
# if __name__ == '__main__':
# bc = BaseConfig()
# print(bc._parsed_file)
# print(bc.load_config()['data_path'])
# print(bc.load_config()['embedding'])
# print(bc.load_config()['model'])
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 17:04
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/6/1 9:58
# @Author : 程婷婷
# @FileName: BaseDataLoader.py
# @Software: PyCharm
import pandas as pd
from model.base.views.config.BaseConfig import BaseConfig
class BaseDataLoader:
def __init__(self, config_path):
self.data_loader_config = BaseConfig(config_path)._parsed_file['data_loader']
def read_file(self):
symbol = self.data_loader_config['dataset_path'].split('.')[-1]
if (symbol == 'xlsx') or (symbol == 'xls'):
df = pd.read_excel(r''+self.data_loader_config['dataset_path'])
elif symbol == '.csv':
df = pd.read_csv(r''+self.data_loader_config['dataset_path'], sep='\t')
else:
print('数据类型错误')
return '数据类型错误'
df.drop_duplicates(subset='content', keep='first', inplace=True)
df.dropna(subset=['content', 'title'], inplace=True)
df = df.reset_index(drop=True)
print('=================执行正文去重和去空之后共有%d条数据=============' % len(df['content']))
return df
def read_stopwords(self):
# 读取停顿词列表
stopword_list = [k.strip() for k in open(self.data_loader_config['stopwords_path'], encoding='utf8').readlines() if
k.strip() != '']
return stopword_list
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/10 15:28
# @Author : 程婷婷
# @FileName: BaseDataProcess.py
# @Software: PyCharm
import re
import jieba
import pickle
import gensim
import logging
import numpy as np
from pyhanlp import *
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import mutual_info_classif, SelectPercentile
from model.base import BaseConfig
from model.base import BaseDataLoader
from platform_zzsn.settings import BASE_DIR
format = '%(asctime)s %(levelname)s %(pathname)s %(funcName)s %(message)s'
logging.basicConfig(format=format, level=logging.INFO)
class BaseDataProcess:
def __init__(self, config_path):
self.embedding_config = BaseConfig.BaseConfig(config_path)._parsed_file['embedding']
self.process_config = BaseConfig.BaseConfig(config_path)._parsed_file['data_process']
PerceptronLexicalAnalyzer = JClass('com.hankcs.hanlp.model.perceptron.PerceptronLexicalAnalyzer')
self.pla_segment = PerceptronLexicalAnalyzer()
self.bdl = BaseDataLoader.BaseDataLoader(config_path)
def clean_content(self, content):
bs = BeautifulSoup(content, 'html.parser')
return bs.text
def remove_char(self, content):
# 保留中文、英语字母、数字和标点
graph_filter = re.compile(r'[^\u4e00-\u9fa5a-zA-Z0-9\s,。\.,?\?!!;;]')
content = graph_filter.sub('', content)
return content
def jieba_tokenizer(self, content):
if self.process_config['use_stopwords']:
stopwords = self.bdl.read_stopwords()
else:
stopwords = []
return ' '.join([word for word in jieba.lcut(content) if word not in stopwords])
def pla_tokenizer(self, content):
words = list(self.pla_segment.analyze(content).toWordArray())
if self.process_config['use_stopwords']:
stopwords = self.bdl.read_stopwords()
else:
stopwords = []
return ' '.join([word for word in words if word not in stopwords])
def save(self, voc, path):
with open(path, 'wb') as voc_file:
pickle.dump(voc, voc_file)
def process(self, data, min_content=0):
processed_data = []
for record in data:
record = self.clean_content(str(record))
record = self.remove_char(record)
if len(record) > min_content:
methods = self.process_config['tokenizer']
if methods == 'PerceptronLexicalAnalyzer':
record = self.pla_tokenizer(record)
record = [row.strip() for row in record if row.strip() != '']
else:
record = self.jieba_tokenizer(record)
record = [row.strip() for row in record if row.strip() != '']
processed_data.append(' '.join(record))
else:
pass
return processed_data
def split_dataset(self, data, use_dev):
if use_dev:
train_data_set, test_dev_set = train_test_split(data,
train_size=self.process_config['train_size'],
random_state=self.process_config['random_state'],
shuffle=True)
train_data_set, test_data_set, dev_data_set = train_test_split(test_dev_set,
test_size=self.process_config['test_size'],
random_state=self.process_config['random_state'],
shuffle=True)
print(len(train_data_set) + len(test_data_set) + len(dev_data_set))
return train_data_set, test_data_set, dev_data_set
else:
train_data_set, test_data_set = train_test_split(data,
train_size=self.process_config['train_size'],
random_state=self.process_config['random_state'],
shuffle=True)
return train_data_set, test_data_set
def bag_of_words(self, data, label):
vectorizer = CountVectorizer(ngram_range=(1, 1), min_df=5)
x = vectorizer.fit_transform(data)
transformer = TfidfTransformer(norm=self.embedding_config['norm'], use_idf=self.embedding_config['use_idf'],
smooth_idf=self.embedding_config['smooth_idf'])
x = transformer.fit_transform(x).toarray()
if self.embedding_config['with_feature_selection']:
transformed_data = SelectPercentile(mutual_info_classif, 20).fit_transform(x, label)
else:
transformed_data = x
os.makedirs(self.embedding_config['embedding_path'], exist_ok=True)
self.save(voc=vectorizer.vocabulary_, path=os.path.join(self.embedding_config['embedding_path'], 'tfidf.pkl'))
return transformed_data, vectorizer.get_feature_names()
def word2vec(self, data, feature_words):
model = gensim.models.word2vec.Word2Vec(sentences=data,
size=self.embedding_config['size'],
window=self.embedding_config['window'],
min_count=self.embedding_config['min_count'],
workers=self.embedding_config['workers'],
sg=self.embedding_config['sg'],
iter=self.embedding_config['iter'])
vocabulary_w2v = model.wv.vocab.keys()
count = 0
if self.embedding_config['use_Tencent']:
model_tencent = gensim.models.KeyedVectors.load_word2vec_format(
os.path.join(BASE_DIR, 'static/base/Tencent_AILab_ChineseEmbedding.bin'), binary=True)
vocabulary_tencent = model_tencent.wv.vocab.keys()
vector_matrix = np.zeros((len(feature_words), int(self.embedding_config['size']) + 200))
for word in feature_words:
if word in vocabulary_tencent:
vector_tencent = model_tencent.wv.word_vec(word)
else:
vector_tencent = np.random.randn(200)
if word in vocabulary_w2v:
vector_w2v = model.wv.word_vec(word)
else:
vector_w2v = np.random.randn(self.embedding_config['size'])
vector = np.concatenate((vector_tencent, vector_w2v))
vector_matrix[count] = vector
count += 1
else:
vector_matrix = np.zeros((len(feature_words), self.embedding_config['size']))
for word in feature_words:
if word in vocabulary_w2v:
vector_w2v = model.wv.word_vec(word)
else:
vector_w2v = np.random.randn(self.embedding_config['size'])
vector_matrix[count] = vector_w2v
count += 1
os.makedirs(self.embedding_config['embedding_path'], exist_ok=True)
model.save(os.path.join(self.embedding_config['embedding_path'], 'word2vec.model'))
return vector_matrix
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 17:04
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 16:30
# @Author : 程婷婷
# @FileName: BaseEvaluator.py
# @Software: PyCharm
from sklearn.metrics import precision_score, f1_score, recall_score, classification_report
import logging
from model.base.views.config.BaseConfig import BaseConfig
formats = '%(asctime)s %(levelname)s %(pathname)s %(funcName)s %(message)s'
logging.basicConfig(format=formats, level=logging.INFO)
class BaseEvaluator:
def __init__(self, config_path):
self.evaluate_config = BaseConfig(config_path)._parsed_file['evaluate']
def evaluate(self, y_true, y_pred, label_mapping, logger):
result = []
y_true = list(map(str, y_true))
y_pred = list(map(str, y_pred))
logger.info('模型评估结果如下:')
if not label_mapping:
result.append(classification_report(y_true, y_pred))
logger.info(classification_report(y_true, y_pred))
else:
for value in label_mapping.values():
print([k for k,v in label_mapping.items() if v == value])
p = precision_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
r = recall_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
f1 = f1_score(y_true, y_pred, average=self.evaluate_config['average'], pos_label=str(value))
print({'value': value,'召回率为': r, '精确率为': p, 'F1': f1})
logger.info('标签为%s' % [k for k,v in label_mapping.items() if v == value][0])
logger.info('精确率为%.2f' %p)
logger.info('召回率为%.2f' %r)
logger.info('精确率为%.2f' %f1)
result.append(str({'label': value,'recall': r, 'precision': p, 'F1': f1}))
return ' '.join(result)
# y_true = [0, 1, 2, 0, 1, 2]
# y_pred = [0, 2, 1, 0, 0, 1]
# print(BaseEvaluator())
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 17:04
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
import os
import yaml
import random
import smtplib
from email.mime.text import MIMEText
from django.core.paginator import Paginator
from email.mime.multipart import MIMEMultipart
from PIL import Image,ImageFont,ImageDraw,ImageFilter
from model.base.models import ModelManage, ServiceManage, VersionManage
from platform_zzsn.settings import BASE_DIR
class Picture:
def __init__(self):
self.size = (240,60)
self.mode='RGB'
self.color='white'
self.font = ImageFont.truetype(os.path.join(BASE_DIR,
'static/common/font/arial.ttf'), 36) #设置字体大小
def randChar(self):
basic='23456789abcdefghijklmnpqrstwxyzABCDEFGHIJKLMNPQRSTWXYZ'
return basic[random.randint(0,len(basic)-1)] #随机字符
def randBdColor(self):
return (random.randint(64,255),random.randint(64,255),random.randint(64,255)) #背景
def randTextColor(self):
return (random.randint(32, 127), random.randint(32, 127), random.randint(32, 127)) #随机颜色
def proPicture(self):
new_image=Image.new(self.mode,self.size,self.color) #创建新图像有三个默认参数:尺寸,颜色,模式
drawObject=ImageDraw.Draw(new_image) #创建一个可以对image操作的对象
line_num = random.randint(4,6) # 干扰线条数
for i in range(line_num):
#size=(240,60)
begin = (random.randint(0, self.size[0]), random.randint(0, self.size[1]))
end = (random.randint(0, self.size[0]), random.randint(0, self.size[1]))
drawObject.line([begin, end], self.randTextColor())
for x in range(240):
for y in range(60):
tmp = random.randint(0,50)
if tmp>30: #调整干扰点数量
drawObject.point((x,y),self.randBdColor())
randchar=''
for i in range(5):
rand=self.randChar()
randchar+=rand
drawObject.text([50*i+10,10],rand,self.randTextColor(),font=self.font) #写入字符
new_image = new_image.filter(ImageFilter.SHARPEN) # 滤镜
return new_image,randchar
def update_config_file(config_path, config_file):
data = yaml.load(config_file, Loader=yaml.FullLoader)
data['data_loader'] = {}
model_path = data['model']['model_path']
model_name = data['model']['model_name']
if data['model']['model_path']:
data['model']['model_path'] = os.path.join(config_path, model_path)
else:
data['model']['model_path'] = os.path.join(config_path, model_name)
print(data['model']['model_path'])
embedding_path = data['embedding']['embedding_path']
if embedding_path:
data['embedding']['embedding_path'] = os.path.join(config_path, data['embedding']['embedding_path'])
else:
if data['embedding']['name']:
data['embedding']['embedding_path'] = os.path.join(config_path, data['embedding']['name'])
tokenizer_path = data['embedding']['tokenizer_path']
if tokenizer_path:
data['embedding']['tokenizer_path'] = os.path.join(config_path, data['embedding']['tokenizer_path'])
try:
test_file_path = data['data_process']['test_file_path']
train_file_path = data['data_process']['train_file_path']
except KeyError:
pass
else:
data['data_process']['test_file_path'] = os.path.join(config_path, test_file_path)
data['data_process']['train_file_path'] = os.path.join(config_path, train_file_path)
for file in os.listdir(config_path):
if ('.xls' == file[-4:]) or ('.xlsx' == file[-5:]):
xlsx_path = os.path.join(config_path, file)
data['data_loader']['dataset_path'] = xlsx_path
if 'save_fname' in data['runner'].keys():
data['runner']['save_fpath'] = os.path.join(config_path, data['runner']['save_fname'])
data['data_loader']['stopwords_path'] = os.path.join(BASE_DIR, 'static/base/baidu_stopwords.txt')
file_path = os.path.join(config_path, 'config.yaml')
with open(file_path, 'w') as yaml_file:
yaml.safe_dump(data, yaml_file, default_flow_style=False)
return file_path
def select_manage(task_name, function_type, model_type, begin_cdate, end_cdate, page_size, current_page):
condition = {'task_name': task_name, 'function_type': function_type, 'model_type': model_type,
'create_date__range': (begin_cdate, end_cdate,)
}
del_keys = []
for key in condition.keys():
if not condition[key]:
del_keys.append(key)
if not condition['create_date__range'][0]:
del_keys.append('create_date__range')
for key in del_keys:
condition.pop(key)
managers = ModelManage.objects.filter(**condition).order_by('-create_date')
len_managers = len(managers)
page = Paginator(managers, page_size)
maxpages = page.num_pages # 最大页数
pIndex = int(current_page)
# 判断页数是否越界
if pIndex > maxpages:
pIndex = maxpages
manager_list = page.page(pIndex) # 当前页数据
return list(manager_list), len_managers
def select_version(model_id, begin_cdate, end_cdate, page_size, current_page):
condition = {'model_id': model_id,
'create_date__range': (begin_cdate, end_cdate,)
}
del_keys = []
if not condition['create_date__range'][0]:
del_keys.append('create_date__range')
for key in del_keys:
condition.pop(key)
versions = VersionManage.objects.filter(**condition).order_by('-create_date')
len_versions = len(versions)
page = Paginator(versions, page_size)
maxpages = page.num_pages # 最大页数
pIndex = int(current_page)
# 判断页数是否越界
if pIndex > maxpages:
pIndex = maxpages
version_list = page.page(pIndex) # 当前页数据
return list(version_list), len_versions
def select_service_manage(name, begin_cdate, end_cdate, state, username, page_size, current_page):
condition = {
'name': name,
'state': state,
'create_date__range': (begin_cdate, end_cdate),
'username': username,
}
del_keys = []
for key in condition.keys():
if not condition[key]:
del_keys.append(key)
if not condition['create_date__range'][0]:
del_keys.append('create_date__range')
for key in del_keys:
condition.pop(key)
print(condition)
service_managers = ServiceManage.objects.filter(**condition).order_by('-create_date')
len_service_managers = len(service_managers)
page = Paginator(service_managers, page_size)
maxpages = page.num_pages
pIndex = int(current_page)
# 判断页数是否越界
if pIndex > maxpages:
pIndex = maxpages
manager_list = page.page(pIndex) # 当前页数据
return list(manager_list), len_service_managers
def sendMail(user,pwd,sender,receiver,msg_title):
mail_host = "smtp.163.com" #163的SMTP服务器
message = MIMEMultipart('alternative')
#设置邮件的发送者
message["From"] = sender
#设置邮件的接收方
message["To"] = ",".join(receiver)
#4.设置邮件的标题
message["Subject"] = msg_title
# 添加plain格式的文本
# message.attach(MIMEText('您好,\n'
# ' 您当前的密码为%s, 为了保证您的账号安全,请尽快登陆重置您的密码'%msg_content, 'plain', 'utf-8'))
# 添加html内容
message.attach(MIMEText('<html>'
'<body>'
'<h1>Hello </h1><br> '
'<h3>To ensure the security of your account, please log in and reset your password as soon as possible.</h3>'
'<h2><a href="http://192.168.1.149:8020/reset_password/">点此重置</a></h2>'
'</body>'
'</html>', 'html', 'utf-8'))
#1.启用服务器发送邮件
smtpObj = smtplib.SMTP_SSL(mail_host,465)
#2.登录邮箱进行验证
smtpObj.login(user,pwd)
#3.发送邮件
#参数:发送方,接收方,邮件信息
smtpObj.sendmail(sender,receiver,message.as_string())
return True
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 16:29
# @Author : 程婷婷
# @FileName: BaseLoss.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 17:04
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 16:18
# @Author : 程婷婷
# @FileName: BaseModel.py
# @Software: PyCharm
from model.base.views.config.BaseConfig import BaseConfig
import os
import pickle
class BaseModel:
def __init__(self,config_path):
self.model_config = BaseConfig(config_path)._parsed_file['model']
def building_model(self, *params):
pass
def save(self, model):
dir = os.path.dirname(self.model_config['model_path'])
if not os.path.exists(dir):
os.makedirs(dir)
with open(self.model_config['model_path'], 'wb') as model_file:
pickle.dump(model, model_file)
def predict(self, model, X):
proba = model.predict_proba(X)
y_predict = model.predict(X)
return {'proba': proba, 'y_predict': y_predict}
\ No newline at end of file
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 17:04
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 16:36
# @Author : 程婷婷
# @FileName: BaseRunner.py
# @Software: PyCharm
from model.base.views.config.BaseConfig import BaseConfig
class BaseRunner:
def __init__(self,config_path):
self.runner_config = BaseConfig(config_path)._parsed_file['runner']
def train(self, logger):
pass
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 17:04
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 9:24
# @Author : 程婷婷
# @FileName: test.py
# @Software: PyCharm
import jieba
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.datasets import load_digits
from sklearn.feature_selection import SelectPercentile, chi2
X, y = load_digits(return_X_y=True)
print(X.shape)
print(X[:10], y[:100])
X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)
print(X_new.shape)
print(X_new[:10])
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/8/20 16:58
# @Author : 程婷婷
# @FileName: token_authorize.py
# @Software: PyCharm
import jwt
import time
import functools
from jwt import exceptions
from django.http import JsonResponse
from platform_zzsn.settings import *
global SECRET_KEY
SECRET_KEY = SECRET_KEY
# 定义签名密钥,用于校验jwt的有效、合法性
def create_token(user):
'''基于jwt创建token的函数'''
headers = {
"alg": "HS256",
"typ": "JWT"
}
exp = int(time.time() + 3*60*60)
payload = {
"id": user.id,
"name": user.username,
"exp": exp
}
token = jwt.encode(payload=payload, key=SECRET_KEY, algorithm='HS256', headers=headers).decode('utf-8')
return token
def login_required(view_func):
@functools.wraps(view_func)
def validate_token(request, *args, **kwargs):
'''校验token的函数,校验通过则返回解码信息'''
payload = None
msg = None
try:
token = request.META.get("HTTP_AUTHORIZATION")
payload = jwt.decode(token, SECRET_KEY, True, algorithm='HS256')
print(payload)
return view_func(request, *args, **kwargs)
# jwt有效、合法性校验
except exceptions.ExpiredSignatureError:
return JsonResponse({
'handle_msg': 'failure',
'is_handle_success': False,
'logs': '登录已过期'
})
except jwt.DecodeError:
return JsonResponse({
'handle_msg': 'failure',
'is_handle_success': False,
'logs': '缺少参数token'
# token认证失败
})
except jwt.InvalidTokenError:
return JsonResponse({
'handle_msg': 'failure',
'is_handle_success': False,
'logs': '缺少参数token'
# 非法的token
})
return validate_token
\ No newline at end of file
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/8/9 11:19
# @Author : 程婷婷
# @FileName: utils.py
# @Software: PyCharm
import os
import re
import jieba
import zipfile
import pandas as pd
from docx import Document
from platform_zzsn.settings import *
def read_txt(path):
with open(path, 'r', encoding='utf8') as file:
lines = file.readlines()
return lines
def read_docx(pending_file, user_file):
jieba.load_userdict(user_file)
document = Document(pending_file)
doc_text_list = []
for para in document.paragraphs:
para_text = re.sub(r'\s', '', para.text)
if para_text:
doc_text_list.append(para_text)
return doc_text_list
def read_excel(pending_file, user_file):
jieba.load_userdict(user_file)
doc_text_list = pd.read_excel(pending_file)['content']
doc_text_list.dropna(inplace=True)
return doc_text_list
def merge_para(paras):
new_paras = []
for i, para in enumerate(paras):
if not new_paras:
new_paras.append(para)
elif (len(new_paras[-1]) < 500):
new_paras[-1] += para
else:
new_paras.append(para)
return new_paras
def filter_stopwords(para):
path = os.path.join(BASE_DIR, 'static/base/baidu_stopwords.txt')
stopword_list = [k.strip() for k in read_txt(path) if
k.strip() != '']
words = [word for word in jieba.lcut(para) if word not in stopword_list]
return words
# 获取列表的第二个元素
def takeSecond(elem):
return elem[1]
def takeFirst_len(elem):
return len(elem[0])
def make_zip(file_dir: str, zip_path: str) -> None:
zip_f = zipfile.ZipFile(zip_path, 'w')
pre_len = len(os.path.dirname(file_dir))
for parent, dir_names, filenames in os.walk(file_dir):
for filename in filenames:
path_file = os.path.join(parent, filename)
arc_name = path_file[pre_len:].strip(os.path.sep)
zip_f.write(path_file, arc_name)
zip_f.close()
from model.classify.views.fasttext_classify import FastTextConfig
from model.classify.views.fasttext_classify.data import FastTextDataLoader
from model.classify.views.fasttext_classify.data import FastTextProcess
from model.classify.views.fasttext_classify import FastTextModel
from model.classify.views.fasttext_classify import FastTextEvaluator
from model.classify.views.fasttext_classify import FastTextRunner
from django.contrib import admin
# Register your models here.
from django.apps import AppConfig
class ClassifyConfig(AppConfig):
name = 'classify'
from django.db import models
# Create your models here.
from django.test import TestCase
# Create your tests here.
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/8/12 18:05
# @Author : 程婷婷
# @FileName: urls.py
# @Software: PyCharm
from django.urls import path
from basic_service import views
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/8/13 11:24
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 20:06
# @Author : 程婷婷
# @FileName: XgboostClassifyConfig.py
# @Software: PyCharm
from model.base import BaseConfig
class FastTextConfig(BaseConfig.BaseConfig):
def __init__(self, config_path):
super().__init__(config_path)
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/14 14:14
# @Author : 程婷婷
# @FileName: XgboostClassifyEvaluator.py
# @Software: PyCharm
from model.base import BaseEvaluator
class FastTextEvaluator(BaseEvaluator.BaseEvaluator):
def __init__(self, config_path):
super().__init__(config_path)
\ No newline at end of file
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 20:18
# @Author : 程婷婷
# @FileName: XgboostClassifyModel.py
# @Software: PyCharm
import fasttext
from model.base import BaseModel
class FastTextModel(BaseModel.BaseModel):
def __init__(self, config_path):
super().__init__(config_path)
def building_model(self, input, autotuneValidationFile):
model = fasttext.train_supervised(input=input,
autotuneValidationFile=autotuneValidationFile,
autotuneDuration=self.model_config['autotuneDuration'],
autotuneModelSize=self.model_config['autotuneModelSize'])
model.save_model(self.model_config['model_path'])
return model
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 20:33
# @Author : 程婷婷
# @FileName: XgboostClassifyRunner.py
# @Software: PyCharm
from model.base import BaseRunner
from model.classify import FastTextProcess
from model.classify import FastTextModel
from model.classify import FastTextEvaluator
class FastTextRunner(BaseRunner.BaseRunner):
def __init__(self, config_path):
super().__init__(config_path)
self.ftp = FastTextProcess.FastTextProcess(config_path)
self.ftm = FastTextModel.FastTextModel(config_path)
self.fte = FastTextEvaluator.FastTextEvaluator(config_path)
def train(self, logger):
train_path, test_path = self.ftp.runner_process(logger)
model = self.ftm.building_model(input=train_path, autotuneValidationFile=test_path)
with open(test_path, encoding='utf8') as file:
test_data = file.readlines()
true_labels, predict_labels = [], []
for text in test_data:
label = text.replace('__label__', '')[0]
text = text.replace('__label__', '')[1:-1]
true_labels.append(int(label))
predict_label = model.predict(text)[0][0].replace('__label__', '')
# print(pre_label)
predict_labels.append(int(predict_label))
evaluate_result = self.fte.evaluate(true_labels, predict_labels, label_mapping=None, logger=logger)
print(evaluate_result)
return 'success'
# if __name__ == '__main__':
# state = FastTextRunner().train()
# print(state)
\ No newline at end of file
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/14 10:28
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/6/1 11:19
# @Author : 程婷婷
# @FileName: FastTextDataLoader.py
# @Software: PyCharm
from model.base import BaseDataLoader
class FastTextDataLoader(BaseDataLoader.BaseDataLoader):
def __init__(self, config_path):
super().__init__(config_path)
\ No newline at end of file
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 20:14
# @Author : 程婷婷
# @FileName: XgboostClassifyProcess.py
# @Software: PyCharm
import re
import time
from model.base import BaseDataProcess
from model.classify import FastTextDataLoader
class FastTextProcess(BaseDataProcess.BaseDataProcess):
def __init__(self, config_path):
super().__init__(config_path)
self.ftdl = FastTextDataLoader.FastTextDataLoader(config_path)
def remove_char(self, content):
graph_filter = re.compile(u'[\U00010000-\U0010ffff\uD800-\uDBFF\uDC00-\uDFFFa-z\n\s]')
content = graph_filter.sub('', content)
return content
def process(self, data, min_content):
processed_data = []
i = 0
for record in data:
record = self.remove_char(record)
if len(record) > min_content:
methods = self.process_config['tokenizer']
if methods == 'PerceptronLexicalAnalyzer':
record = self.pla_tokenizer(record)
else:
record = self.jieba_tokenizer(record)
processed_data.append(record)
i += 1
else:
i += 1
pass
if (i+1)%100 == 0 or i+1 == len(data):
print(time.strftime('%Y-%m-%d %H:%M:%S'),'第',i+1,'条文本分词完毕')
return processed_data
def transform_data(self, data, labels):
format_data = []
for i in range(len(data)):
fasttext_line = "__label__{} {}".format(labels[i], data[i])
format_data.append(fasttext_line)
return format_data
def runner_process(self, logger):
df = self.ftdl.read_file()
processed_data = self.process(df['content'], min_content=10)
# if self.process_config['label_encode']:
if type(df['label'][0]) == int:
labels = df['label']
else:
all_label = list(set(df['label']))
self.label_mapping = {v: k for k, v in dict(enumerate(all_label)).items()}
labels = df['label'].map(self.label_mapping)
print(labels)
fomat_data = self.transform_data(processed_data, labels)
if self.process_config['use_dev']:
train_data_set, test_data_set, dev_data_set = self.split_dataset(fomat_data, use_dev=self.process_config['use_dev'])
else:
train_data_set, test_data_set = self.split_dataset(fomat_data, use_dev=self.process_config['use_dev'])
with open(self.process_config['train_file_path'], 'w', encoding='utf-8') as trainf, \
open(self.process_config['test_file_path'], 'w', encoding='utf-8') as testf:
for train_row in train_data_set:
trainf.write(train_row + '\n')
for test_row in test_data_set:
testf.write(test_row + '\n')
logger.info('处理后的数据量为 %d 条' % len(fomat_data))
logger.info('训练集的数据量为 %d 条' % len(train_data_set))
logger.info('测试集的数据量为 %d 条' % len(test_data_set))
return self.process_config['train_file_path'], self.process_config['test_file_path']
\ No newline at end of file
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/6/1 11:18
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 20:06
# @Author : 程婷婷
# @FileName: XgboostClassifyConfig.py
# @Software: PyCharm
from model.base.views.config import BaseConfig
class FastTextConfig(BaseConfig):
def __init__(self):
super().__init__()
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/14 14:14
# @Author : 程婷婷
# @FileName: XgboostClassifyEvaluator.py
# @Software: PyCharm
from model.base.views.evaluator import BaseEvaluator
class FlairClassifyEvaluator(BaseEvaluator):
def __init__(self, config_path):
super().__init__(config_path)
\ No newline at end of file
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 20:18
# @Author : 程婷婷
# @FileName: XgboostClassifyModel.py
# @Software: PyCharm
from torch.optim import Adam
from torch.optim.lr_scheduler import OneCycleLR
from flair.models import TextClassifier
from flair.trainers import ModelTrainer
from model.base.views.model.BaseModel import BaseModel
class FlairClassifyModel(BaseModel):
def __init__(self, config_path):
super().__init__(config_path)
def building_model(self, corpus, document_embeddings, label_dict, loss_weights):
# downstream classifier
classifier = TextClassifier(
document_embeddings,
label_dictionary=label_dict,
loss_weights=loss_weights
)
# model trainer
trainer = ModelTrainer(classifier, corpus, optimizer=Adam)
model_save_path = self.model_config['model_path']
trainer.train(str(model_save_path),
learning_rate=3e-5, # use very small learning rate
mini_batch_size=16,
scheduler=OneCycleLR,
mini_batch_chunk_size=2, # optionally set this if transformer is too much for your machine
max_epochs=3, # terminate after X epochs
monitor_train=True,
monitor_test=True,
checkpoint=True
)
return classifier, trainer
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 20:33
# @Author : 程婷婷
# @FileName: XgboostClassifyRunner.py
# @Software: PyCharm
import os
import numpy as np
import torch
import random
from model.base.views.runner import BaseRunner
from model.classify.views.flair_classify import FlairClassifyProcess
from model.classify.views.flair_classify import FlairClassifyModel
from model.classify.views.flair_classify import FlairClassifyEvaluator
class FlairClassifyRunner(BaseRunner):
def __init__(self, config_path):
super().__init__(config_path)
self.fcp = FlairClassifyProcess(config_path)
self.fcm = FlairClassifyModel(config_path)
self.fce = FlairClassifyEvaluator(config_path)
def reproducibility(seed):
'''
固定随机种子
:param seed:
:return:
'''
os.environ["PYTHONHASHSEED"] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
def train(self):
corpus, document_embeddings, label_dict, loss_weights = self.fcp.runner_process()
model = self.fcm.building_model(
corpus=corpus,
document_embeddings=document_embeddings,
label_dict=label_dict,
loss_weights=loss_weights
)
#self.fce.evaluate(true_labels, predict_labels)
return 'success'
if __name__ == '__main__':
state = FlairClassifyRunner().train()
print(state)
\ No newline at end of file
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/14 10:28
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/6/1 11:46
# @Author : 程婷婷
# @FileName: FlairClassifyDataLoader.py
# @Software: PyCharm
from model.base import BaseDataLoader
class FlairClassifyDataLoader(BaseDataLoader):
def __init__(self, config_path):
super().__init__(config_path)
\ No newline at end of file
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 20:14
# @Author : 程婷婷
# @FileName: XgboostClassifyProcess.py
# @Software: PyCharm
from flair.data import Sentence, Corpus
import re
from transformers import AutoTokenizer
from torch.utils.data import Dataset
from flair.embeddings import TransformerDocumentEmbeddings
from model.base import BaseDataProcess
from model.classify import FlairClassifyDataLoader
class DataSet(Dataset):
def __init__(
self, data_df, tokenizer,
):
df = data_df.copy()
sep_token = tokenizer.special_tokens_map['sep_token']
self.samples = df.content.apply(lambda s: re.sub("<sep>", sep_token, s)).values
self.labels = df.label.values
self.tokenizer = tokenizer
def __len__(self):
return len(self.samples)
def __getitem__(self, index):
sample, label = self.samples[index], self.labels[index]
sentence = Sentence(sample, use_tokenizer=self.tokenizer.tokenize)
if not len(sentence):
sentence = Sentence(self.tokenizer.unk_token, use_tokenizer=self.tokenizer.tokenize)
print(sample)
print(sentence)
sentence.add_label('class', str(label))
return sentence
class FlairClassifyProcess(BaseDataProcess):
def __init__(self, config_path):
super().__init__(config_path)
self.fcdl = FlairClassifyDataLoader(config_path)
@staticmethod
def add_sep_token(content):
return re.sub('。', '。<sep>', content)
def runner_process(self):
df = self.fcdl.read_file()
df = df[df.content.apply(lambda s: s.strip()).apply(len) > 10]
df = df.reset_index(drop=True)
df['content'] = df['content'].apply(lambda s: self.add_sep_token(str(s)))
pos = df.label.value_counts()
loss_weights = (pos.sum() - pos) / pos
self.loss_weights = loss_weights.to_dict()
if self.process_config['label_encode']:
all_label = list(set(df['label']))
self.label_mapping = {v: k for k, v in dict(enumerate(all_label)).items()}
labels = df['label'].map(self.label_mapping)
print(labels)
tokenizer = AutoTokenizer.from_pretrained(self.embedding_config['pretrained_name'])
if self.process_config['use_dev']:
train_data_set, test_data_set, dev_data_set = self.split_dataset(df, use_dev=self.process_config['use_dev'])
train_set = DataSet(train_data_set, tokenizer)
test_set = DataSet(test_data_set, tokenizer)
val_set = DataSet(dev_data_set, tokenizer)
corpus = Corpus(train=train_set, dev=val_set, test=test_set)
else:
train_data_set, test_data_set = self.split_dataset(df, use_dev=self.process_config['use_dev'])
train_set = DataSet(train_data_set, tokenizer)
test_set = DataSet(test_data_set, tokenizer)
corpus = Corpus(train=train_set, test=test_set)
label_dict = corpus.make_label_dictionary()
document_embeddings = TransformerDocumentEmbeddings(
self.embedding_config['pretrained_name'], fine_tune=True
)
return corpus, document_embeddings, label_dict, loss_weights
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/6/1 11:43
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 20:06
# @Author : 程婷婷
# @FileName: XgboostClassifyConfig.py
# @Software: PyCharm
from model.base.views.config import BaseConfig
class LogisticClassifyConfig(BaseConfig):
def __init__(self):
super().__init__()
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/14 14:14
# @Author : 程婷婷
# @FileName: XgboostClassifyEvaluator.py
# @Software: PyCharm
from model.base.views.evaluator.BaseEvaluator import BaseEvaluator
class LogisticClassifyEvaluator(BaseEvaluator):
def __init__(self, config_path):
super().__init__(config_path)
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 20:33
# @Author : 程婷婷
# @FileName: XgboostClassifyRunner.py
# @Software: PyCharm
from model.base.views.runner.BaseRunner import BaseRunner
from model.classify.views.logistic_classify.data.LogisticClassifyProcess import LogisticClassifyProcess
from model.classify.views.logistic_classify.LogisticClassifyModel import LogisticClassifyModel
from model.classify.views.logistic_classify.LogisticClassifyEvaluator import LogisticClassifyEvaluator
class LogisticClassifyRunner(BaseRunner):
def __init__(self, config_path):
super().__init__(config_path)
self.lcp = LogisticClassifyProcess(config_path)
self.lcm = LogisticClassifyModel(config_path)
self.lce = LogisticClassifyEvaluator(config_path)
def train(self, logger):
tfidf_title, idf_title, labels = self.lcp.title_process(logger)
Threshold,Index_Retain_Predict_Title,Index_Delete_Title = self.lcm.building_model(
tfidf_title=tfidf_title,
labels=labels,
logger=logger
)
tfidf_content, idf_content = self.lcp.content_process(Index_Retain_Predict_Title)
threshold, Index_Retain_Predict_Content, Index_Delete_Content = self.lcm.building_model(
labels = labels,
tfidf_content=tfidf_content,
r=0.8,
logger=logger
) # r可调节,训练最终在召回率低于r时终止。
return 'success'
# if __name__ == '__main__':
# state = LogisticClassifyRunner().train()
# print(state)
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/17 9:08
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/6/1 14:53
# @Author : 程婷婷
# @FileName: LogisticClassifyDataLoader.py
# @Software: PyCharm
from model.base.views.data.BaseDataLoader import BaseDataLoader
class LogisticClassifyDataLoader(BaseDataLoader):
def __init__(self, config_path):
super().__init__(config_path)
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/6/1 14:47
# @Author : 程婷婷
# @FileName: __init__.py.py
# @Software: PyCharm
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/5/11 20:06
# @Author : 程婷婷
# @FileName: XgboostClassifyConfig.py
# @Software: PyCharm
from model.base.views.config import BaseConfig
class TextcnnConfig(BaseConfig):
def __init__(self):
super().__init__()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论