提交 2a165047 作者: LiuLiYuan

fanyi 01-12

上级 33d124a4
#coding:utf-8 #coding:utf-8
...@@ -35,31 +35,27 @@ class Translate(): ...@@ -35,31 +35,27 @@ class Translate():
def close(self): def close(self):
self.browser.quit() self.browser.quit()
def is_website_link(self,string):
pattern = r"^(http|https)?(://)?[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+(/[a-zA-Z0-9-_.?=/]*)?$"
if re.match(pattern, string):
return True
else:
return False
def createDriver(self): def createDriver(self):
# chrome_driver = r'F:\spider\117\chromedriver-win64\chromedriver.exe'
# path = Service(chrome_driver)
# chrome_options = webdriver.ChromeOptions()
# chrome_options.add_argument('--disable-gpu')
# chrome_options.add_argument('--ignore-certificate-errors')
# chrome_options.add_argument("--disable-blink-features=AutomationControlled")
# chrome_options.add_argument("--start-maximized")
proxy_ = baseCore.get_proxy() proxy_ = baseCore.get_proxy()
# chrome_options.add_argument('--proxy-server=' + proxy['http'].split('://')[1]) profile = webdriver.FirefoxProfile()
# chrome_options.add_argument( profile.set_preference('network.proxy.type',1)
# 'user-agent=' + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36') profile.set_preference('network.proxy.http',proxy_['http'].split('://')[1].split(':')[0])
# profile.set_preference('network.proxy.http_port',int(proxy_['http'].split('://')[1].split(':')[1]))
# browser = webdriver.Chrome(service=path, chrome_options=chrome_options) profile.set_preference('network.proxy.ssl',proxy_['http'].split('://')[1].split(':')[0])
proxy= Proxy() profile.set_preference('network.proxy.ssl_port',int(proxy_['http'].split('://')[1].split(':')[1]))
proxy.proxy_type = ProxyType.MANUAL profile.update_preferences()
proxy.http_proxy = proxy_['http'].split('://')[1]
proxy.ssl_proxy = proxy_['http'].split('://')[1]
service = Service(r'F:\spider\firefox\geckodriver_1.exe') service = Service(r'F:\spider\firefox\geckodriver_1.exe')
options = Options() options = Options()
options.set_preference("general.useragent.override", options.set_preference("general.useragent.override",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3")
capabilities = webdriver.DesiredCapabilities.FIREFOX browser = webdriver.Firefox(firefox_profile=profile, service=service,options=options)
proxy.add_to_capabilities(capabilities)
browser = webdriver.Firefox(options=options, service=service,capabilities=capabilities)
return browser return browser
def translate(self, sentence, lang): def translate(self, sentence, lang):
...@@ -158,6 +154,8 @@ class Translate(): ...@@ -158,6 +154,8 @@ class Translate():
for tag in tag_list: for tag in tag_list:
if tag.strip() == '': if tag.strip() == '':
continue continue
if self.is_website_link(str(tag).strip()):
continue
sentence += f'{tag}😊' sentence += f'{tag}😊'
num += 1 num += 1
#print(num) #print(num)
...@@ -170,6 +168,9 @@ class Translate(): ...@@ -170,6 +168,9 @@ class Translate():
#print(sentence) #print(sentence)
result = '' result = ''
while True: while True:
if len(sentence.strip()) == 1 and self.is_punctuation(sentence.strip()):
result += sentence
break
if len(sentence) > 1000: if len(sentence) > 1000:
index_1000 = sentence[999] index_1000 = sentence[999]
# 判断该字符是不是逗号或句号 # 判断该字符是不是逗号或句号
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论