提交 9fc0f9da 作者: LiuLiYuan

中科软 01/15

上级 fd395ea2
# D:\Program Files\Python36
# D:\Program Files\Python36
# D:\Program Files\Python36
# -*- coding: utf-8 -*-
# @Time : 2022/2/19 14:20
import re
from pyquery import PyQuery as pq
import xlrd
from bson.objectid import ObjectId
......@@ -66,6 +68,10 @@ class Handler():
r = response.text+'<p/><br>译文来源:微软自动翻译<br></p>'
return r
def contains_chinese(self,text):
pattern = re.compile(r'[\u4e00-\u9fa5]')
match = re.search(pattern, text)
return match is not None
if __name__ == '__main__':
Translate = Translate()
......@@ -132,7 +138,7 @@ if __name__ == '__main__':
except:
richTextForeign = a_dict['richTextForeign']
continue
if title == titleForeign:
if title.strip() == titleForeign.strip() and Handler.contains_chinese(title):
richText = richTextForeign
content = pq(richText).text()
tag2 = Handler.getKeywordsTags2(content)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论