提交 14054899 作者: LiuLiYuan

百度翻译 01/15

上级 2a165047
#coding:utf-8 #coding:utf-8
...@@ -5,6 +5,7 @@ import string ...@@ -5,6 +5,7 @@ import string
import time import time
from urllib.parse import quote from urllib.parse import quote
import psutil
import pymongo import pymongo
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from bson import ObjectId from bson import ObjectId
...@@ -17,6 +18,7 @@ from selenium.webdriver.support.wait import WebDriverWait ...@@ -17,6 +18,7 @@ from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.firefox.service import Service from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.proxy import Proxy, ProxyType from selenium.webdriver.common.proxy import Proxy, ProxyType
from func_timeout import func_set_timeout
from base.BaseCore import BaseCore from base.BaseCore import BaseCore
baseCore = BaseCore() baseCore = BaseCore()
...@@ -58,6 +60,14 @@ class Translate(): ...@@ -58,6 +60,14 @@ class Translate():
browser = webdriver.Firefox(firefox_profile=profile, service=service,options=options) browser = webdriver.Firefox(firefox_profile=profile, service=service,options=options)
return browser return browser
def kill_firefox(self):
for proc in psutil.process_iter():
try:
if proc.name() == "firefox.exe":
proc.kill()
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
def translate(self, sentence, lang): def translate(self, sentence, lang):
sentence_ = sentence sentence_ = sentence
wait = WebDriverWait(self.browser, 20) wait = WebDriverWait(self.browser, 20)
...@@ -91,6 +101,7 @@ class Translate(): ...@@ -91,6 +101,7 @@ class Translate():
result = self.translate(sentence_, lang) result = self.translate(sentence_, lang)
return result return result
@func_set_timeout(90)
def get_input_language_type(self, word, wait): def get_input_language_type(self, word, wait):
self.browser.get("https://fanyi.baidu.com/") self.browser.get("https://fanyi.baidu.com/")
wait.until(EC.presence_of_element_located((By.ID, "baidu_translate_input"))) wait.until(EC.presence_of_element_located((By.ID, "baidu_translate_input")))
...@@ -209,6 +220,8 @@ class Translate(): ...@@ -209,6 +220,8 @@ class Translate():
for tag in html.find_all(text=True): for tag in html.find_all(text=True):
if tag.strip() == '': if tag.strip() == '':
continue continue
if self.is_website_link(str(tag).strip()):
continue
#print(num,tag) #print(num,tag)
sentence = sentences[num] sentence = sentences[num]
tag.replace_with(sentence) tag.replace_with(sentence)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论