提交 adb1e05d 作者: 薛凌堃

1/11

上级 0dffe4be
#百度翻译 不登录翻译1000字 登录翻译5000字 #百度翻译 不登录翻译1000字 登录翻译5000字
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
import re import re
import string import string
import time import time
from urllib.parse import quote
import pymongo import pymongo
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from selenium import webdriver from selenium import webdriver
...@@ -23,6 +23,7 @@ class Translate(): ...@@ -23,6 +23,7 @@ class Translate():
# self._lang_list_original = ["中文", "英语", "韩语", "法语", "日语", "希腊语", "俄语"] # self._lang_list_original = ["中文", "英语", "韩语", "法语", "日语", "希腊语", "俄语"]
# self._num = len(self._lang_list) # self._num = len(self._lang_list)
self.url = "https://fanyi.baidu.com/#{}/{}/{}" self.url = "https://fanyi.baidu.com/#{}/{}/{}"
# self.url = "https://fanyi.baidu.com/#"
self.header = { self.header = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1"} "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1"}
...@@ -64,7 +65,8 @@ class Translate(): ...@@ -64,7 +65,8 @@ class Translate():
pass pass
else: else:
word_type = lang word_type = lang
url = self.url.format(word_type, 'zh', sentence_) url_ = self.url.format(word_type, 'zh', sentence_)
url = quote(url_, safe='/:#')
browser.set_page_load_timeout(10) browser.set_page_load_timeout(10)
try: try:
browser.get(url) browser.get(url)
...@@ -193,8 +195,8 @@ class Translate(): ...@@ -193,8 +195,8 @@ class Translate():
if lang == 'zh': if lang == 'zh':
return contentWithTag return contentWithTag
for tag in html.find_all(text=True): for tag in html.find_all(text=True):
sentence = str(tag) # sentence = str(tag)
# sentence = " 実際に働き手の数が8がけ(8割)になる16年後、介護のようなケアサービスを今のような形で受けることは困難になると予測される。" sentence = "95% say hello"
if sentence == '\n' or sentence == '\t' or sentence == ' ': if sentence == '\n' or sentence == '\t' or sentence == ' ':
continue continue
if self.is_punctuation(sentence): if self.is_punctuation(sentence):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论