提交 c702fb7b 作者: 薛凌堃

语言判断

上级 09af6b88
import os
import os
......@@ -192,7 +192,9 @@ def GetContent(pdf_url,info_url, pdf_name, social_code, year, pub_time, start_ti
else:
return False
content = retData['content']
lang = baseCore.detect_language(content)
if lang == 'cn':
lang = 'zh'
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
dic_news = {
'attachmentIds': att_id,
......@@ -203,7 +205,7 @@ def GetContent(pdf_url,info_url, pdf_name, social_code, year, pub_time, start_ti
'deleteFlag': '0',
'id': '',
'keyWords': '',
'lang': 'zh',
'lang': lang,
'origin': '东方财富网',
'publishDate': pub_time,
'sid': '1684032033495392257',
......
import os
import os
......@@ -278,7 +278,9 @@ def spider(browser, code, social_code, com_name):
att_id = ''
browser2.quit()
lang = baseCore.detect_language(content)
if lang == 'cn':
lang = 'zh'
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
dic_news = {
'attachmentIds': att_id,
......@@ -289,7 +291,7 @@ def spider(browser, code, social_code, com_name):
'deleteFlag': '0',
'id': '',
'keyWords': '',
'lang': 'zh',
'lang': lang,
'origin': '东方财富网',
'publishDate': publishDate,
'sid': '1684032033495392257',
......
......@@ -308,7 +308,9 @@ def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_na
else:
return False
content = retData['content']
lang = baseCore.detect_language(content)
if lang == 'cn':
lang = 'zh'
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
dic_news = {
'attachmentIds': att_id,
......@@ -319,7 +321,7 @@ def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_na
'deleteFlag': '0',
'id': '',
'keyWords': '',
'lang': 'zh',
'lang': lang,
'origin': origin,
'publishDate': pub_time,
'sid': '1684032033495392257',
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论