提交 6af1aa0d 作者: 薛凌堃

天眼查企业动态

上级 18c4a5a7
......@@ -14,9 +14,9 @@ from base.smart import smart_extractor
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
jieba.cut("必须加载jieba")
# 初始化,设置中文分词
jieba.cut("必须加载jieba")
smart =smart_extractor.SmartExtractor('cn')
baseCore = BaseCore()
log = baseCore.getLogger()
cnx = pymysql.connect(host='114.116.44.11', user='root', password='f7s0&7qqtK', db='dbScore', charset='utf8mb4')
......@@ -154,8 +154,8 @@ def beinWork(tyc_code, social_code,start_time):
time_format = baseCore.getNowTime(1)
try:
# 开始进行智能解析
lang = baseCore.detect_language(title)
smart = smart_extractor.SmartExtractor(lang)
# lang = baseCore.detect_language(title)
# smart = smart_extractor.SmartExtractor(lang)
contentText = smart.extract_by_url(link).text
# time.sleep(3)
except Exception as e:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论