提交 33d124a4 作者: LiuLiYuan

翻译 01-12

上级 fca9d56e
#coding:utf-8 #coding:utf-8
...@@ -16,6 +16,7 @@ from selenium.webdriver.support.wait import WebDriverWait ...@@ -16,6 +16,7 @@ from selenium.webdriver.support.wait import WebDriverWait
# from selenium.webdriver.chrome.service import Service # from selenium.webdriver.chrome.service import Service
from selenium.webdriver.firefox.service import Service from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.proxy import Proxy, ProxyType
from base.BaseCore import BaseCore from base.BaseCore import BaseCore
baseCore = BaseCore() baseCore = BaseCore()
...@@ -42,17 +43,23 @@ class Translate(): ...@@ -42,17 +43,23 @@ class Translate():
# chrome_options.add_argument('--ignore-certificate-errors') # chrome_options.add_argument('--ignore-certificate-errors')
# chrome_options.add_argument("--disable-blink-features=AutomationControlled") # chrome_options.add_argument("--disable-blink-features=AutomationControlled")
# chrome_options.add_argument("--start-maximized") # chrome_options.add_argument("--start-maximized")
# proxy = baseCore.get_proxy() proxy_ = baseCore.get_proxy()
# chrome_options.add_argument('--proxy-server=' + proxy['http'].split('://')[1]) # chrome_options.add_argument('--proxy-server=' + proxy['http'].split('://')[1])
# chrome_options.add_argument( # chrome_options.add_argument(
# 'user-agent=' + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36') # 'user-agent=' + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
# #
# browser = webdriver.Chrome(service=path, chrome_options=chrome_options) # browser = webdriver.Chrome(service=path, chrome_options=chrome_options)
proxy= Proxy()
proxy.proxy_type = ProxyType.MANUAL
proxy.http_proxy = proxy_['http'].split('://')[1]
proxy.ssl_proxy = proxy_['http'].split('://')[1]
service = Service(r'F:\spider\firefox\geckodriver_1.exe') service = Service(r'F:\spider\firefox\geckodriver_1.exe')
options = Options() options = Options()
options.set_preference("general.useragent.override", options.set_preference("general.useragent.override",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3")
browser = webdriver.Firefox(options=options, service=service) capabilities = webdriver.DesiredCapabilities.FIREFOX
proxy.add_to_capabilities(capabilities)
browser = webdriver.Firefox(options=options, service=service,capabilities=capabilities)
return browser return browser
def translate(self, sentence, lang): def translate(self, sentence, lang):
...@@ -147,10 +154,13 @@ class Translate(): ...@@ -147,10 +154,13 @@ class Translate():
sentence = str(tag).strip() sentence = str(tag).strip()
tag_list.append(sentence) tag_list.append(sentence)
sentence = '' sentence = ''
num = 0
for tag in tag_list: for tag in tag_list:
if tag == '': if tag.strip() == '':
continue continue
sentence += f'{tag}😊' sentence += f'{tag}😊'
num += 1
#print(num)
# if len(sentence) == 1: # if len(sentence) == 1:
# continue # continue
# if sentence == '\n' or sentence == '\t' or sentence == ' ': # if sentence == '\n' or sentence == '\t' or sentence == ' ':
...@@ -193,23 +203,24 @@ class Translate(): ...@@ -193,23 +203,24 @@ class Translate():
break break
#print(result) #print(result)
sentences = result.split('😊') sentences = result.split('😊')
print(len(sentences)) #print(len(sentences))
num = 0 num = 0
for tag in html.find_all(text=True): for tag in html.find_all(text=True):
if tag == '': if tag.strip() == '':
continue continue
#print(num,tag)
sentence = sentences[num] sentence = sentences[num]
tag.replace_with(sentence) tag.replace_with(sentence)
num += 1 num += 1
return str(html.prettify()) + '<p/><br>译文来源:微软自动翻译<br></p>' return str(html.prettify()) + '<p/><br>译文来源:微软自动翻译<br></p>'
if __name__ == "__main__": # if __name__ == "__main__":
test = Translate() # test = Translate()
db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017/', username='admin', password='ZZsn@9988').中科软[ # db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017/', username='admin', password='ZZsn@9988').中科软[
'数据源_0504'] # '数据源_0504']
data = db_storage.find_one({'_id': ObjectId('656f14e84d6d77428c713271')}) # data = db_storage.find_one({'_id': ObjectId('656f14e84d6d77428c713271')})
a = data['richTextForeign'] # a = data['richTextForeign']
result = test.gethtml(a) # result = test.gethtml(a)
print(result) # print(result)
test.close() # test.close()
\ No newline at end of file \ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论