Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
Z
zzsn_spider
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
王景浩
zzsn_spider
Commits
fd395ea2
提交
fd395ea2
authored
1月 15, 2024
作者:
LiuLiYuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
中科软 01/15
上级
14054899
隐藏空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
1265 行增加
和
0 行删除
+1265
-0
baidufanyi.py
zkr/baidufanyi.py
+222
-0
esToMongodb.py
zkr/esToMongodb.py
+554
-0
推送.py
zkr/推送.py
+318
-0
翻译.py
zkr/翻译.py
+171
-0
没有找到文件。
zkr/baidufanyi.py
0 → 100644
浏览文件 @
fd395ea2
#coding:utf-8
# 百度翻译 不登录翻译1000字 登录翻译5000字
import
re
import
string
import
time
from
urllib.parse
import
quote
import
psutil
import
pymongo
from
bs4
import
BeautifulSoup
from
bson
import
ObjectId
from
selenium
import
webdriver
from
selenium.webdriver.common.by
import
By
from
selenium.webdriver.support
import
expected_conditions
as
EC
from
selenium.webdriver.support.wait
import
WebDriverWait
# from selenium.webdriver.chrome.service import Service
from
selenium.webdriver.firefox.service
import
Service
from
selenium.webdriver.firefox.options
import
Options
from
selenium.webdriver.common.proxy
import
Proxy
,
ProxyType
from
func_timeout
import
func_set_timeout
from
base.BaseCore
import
BaseCore
baseCore
=
BaseCore
()
class
Translate
():
def
__init__
(
self
):
self
.
url
=
"https://fanyi.baidu.com/#"
self
.
header
=
{
"User-Agent"
:
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1"
}
self
.
browser
=
self
.
createDriver
()
self
.
db_storage
=
\
pymongo
.
MongoClient
(
'mongodb://114.115.221.202:27017'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
中科软
[
'数据源_0106'
]
def
close
(
self
):
self
.
browser
.
quit
()
def
is_website_link
(
self
,
string
):
pattern
=
r"^(http|https)?(://)?[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+(/[a-zA-Z0-9-_.?=/]*)?$"
if
re
.
match
(
pattern
,
string
):
return
True
else
:
return
False
def
createDriver
(
self
):
proxy_
=
baseCore
.
get_proxy
()
profile
=
webdriver
.
FirefoxProfile
()
profile
.
set_preference
(
'network.proxy.type'
,
1
)
profile
.
set_preference
(
'network.proxy.http'
,
proxy_
[
'http'
]
.
split
(
'://'
)[
1
]
.
split
(
':'
)[
0
])
profile
.
set_preference
(
'network.proxy.http_port'
,
int
(
proxy_
[
'http'
]
.
split
(
'://'
)[
1
]
.
split
(
':'
)[
1
]))
profile
.
set_preference
(
'network.proxy.ssl'
,
proxy_
[
'http'
]
.
split
(
'://'
)[
1
]
.
split
(
':'
)[
0
])
profile
.
set_preference
(
'network.proxy.ssl_port'
,
int
(
proxy_
[
'http'
]
.
split
(
'://'
)[
1
]
.
split
(
':'
)[
1
]))
profile
.
update_preferences
()
service
=
Service
(
r'F:\spider\firefox\geckodriver_1.exe'
)
options
=
Options
()
options
.
set_preference
(
"general.useragent.override"
,
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
)
options
.
add_argument
(
'--headless'
)
options
.
add_argument
(
'--disable-gpu'
)
options
.
add_argument
(
'--private'
)
browser
=
webdriver
.
Firefox
(
firefox_profile
=
profile
,
service
=
service
,
options
=
options
)
return
browser
def
kill_firefox
(
self
):
for
proc
in
psutil
.
process_iter
():
try
:
if
proc
.
name
()
==
"firefox.exe"
:
proc
.
kill
()
except
(
psutil
.
NoSuchProcess
,
psutil
.
AccessDenied
,
psutil
.
ZombieProcess
):
pass
def
translate
(
self
,
sentence
,
lang
):
sentence_
=
sentence
wait
=
WebDriverWait
(
self
.
browser
,
20
)
try
:
word_type
=
self
.
get_input_language_type
(
sentence_
,
wait
)
except
:
self
.
browser
.
quit
()
self
.
browser
=
self
.
createDriver
()
result
=
self
.
translate
(
sentence_
,
lang
)
return
result
if
word_type
:
if
word_type
==
lang
:
pass
else
:
word_type
=
lang
url
=
self
.
url
.
format
(
word_type
,
'zh'
,
sentence_
)
url
=
quote
(
url
,
safe
=
'/:#'
)
self
.
browser
.
set_page_load_timeout
(
10
)
try
:
self
.
browser
.
get
(
url
)
wait
.
until
(
EC
.
presence_of_element_located
(
(
By
.
XPATH
,
'//*[@id="main-outer"]/div/div/div[1]/div[2]/div[1]/div[2]/div/div/div[1]/p[2]'
)))
result_
=
self
.
browser
.
find_element
(
By
.
XPATH
,
'//*[@id="main-outer"]/div/div/div[1]/div[2]/div[1]/div[2]/div/div/div[1]/p[2]'
)
result
=
result_
.
text
.
strip
()
return
result
except
:
self
.
browser
.
quit
()
self
.
browser
=
self
.
createDriver
()
result
=
self
.
translate
(
sentence_
,
lang
)
return
result
@func_set_timeout
(
30
)
def
get_input_language_type
(
self
,
word
,
wait
):
self
.
browser
.
get
(
"https://fanyi.baidu.com/"
)
wait
.
until
(
EC
.
presence_of_element_located
((
By
.
ID
,
"baidu_translate_input"
)))
input_word
=
self
.
browser
.
find_element
(
By
.
ID
,
"baidu_translate_input"
)
input_word
.
send_keys
(
word
)
wait
.
until
(
EC
.
presence_of_element_located
(
(
By
.
XPATH
,
'//*[@id="main-outer"]/div/div/div[1]/div[1]/div[1]/a[1]/span/span'
)))
word_type
=
self
.
browser
.
find_element
(
By
.
XPATH
,
'//*[@id="main-outer"]/div/div/div[1]/div[1]/div[1]/a[1]/span/span'
)
word_type
=
word_type
.
get_attribute
(
"data-lang"
)
return
word_type
def
is_punctuation
(
self
,
char
):
punctuation
=
string
.
punctuation
+
'、'
+
'('
+
'…'
+
')'
+
'《'
+
'》'
+
'“'
+
'”'
+
':'
+
';'
+
'!'
+
' '
+
'。'
return
char
in
punctuation
def
sentence_split_sentence
(
self
,
contentWithTag
):
pattern
=
re
.
compile
(
r'[^\n]+(?=\n)|[^\n]+$'
)
match_group
=
pattern
.
finditer
(
contentWithTag
)
sentences
=
[]
if
match_group
:
for
_
in
match_group
:
start_end_index
=
_
.
span
()
sentences
.
append
((
start_end_index
[
0
],
start_end_index
[
1
],
_
.
group
()))
if
(
not
sentences
)
and
(
len
(
contentWithTag
)
>=
4
):
sentences
.
append
((
0
,
len
(
contentWithTag
),
contentWithTag
))
return
sentences
def
jionstr
(
self
,
html
):
paragraphs
=
[]
current_sentence
=
''
for
tag
in
html
.
find_all
(
text
=
True
):
sentence
=
str
(
tag
)
if
sentence
==
'
\n
'
or
sentence
==
'
\t
'
or
sentence
==
' '
:
continue
if
self
.
is_punctuation
(
sentence
):
continue
if
sentence
.
startswith
(
'https://'
)
or
sentence
.
startswith
(
'http://'
)
or
sentence
.
startswith
(
'www.'
):
continue
# 检查拼接后的句子长度是否超过1000字
if
len
(
current_sentence
)
+
len
(
sentence
)
<=
1000
:
current_sentence
+=
sentence
else
:
paragraphs
.
append
(
current_sentence
.
strip
())
current_sentence
=
sentence
return
paragraphs
@func_set_timeout
(
300
)
def
gethtml
(
self
,
contentWithTag
):
tag_list
=
[]
html
=
BeautifulSoup
(
contentWithTag
,
'html.parser'
)
content
=
html
.
text
lang
=
baseCore
.
detect_language
(
content
)
if
lang
==
'zh'
:
return
contentWithTag
for
tag
in
html
.
find_all
(
text
=
True
):
sentence
=
str
(
tag
)
.
strip
()
tag_list
.
append
(
sentence
)
sentence
=
''
num
=
0
for
tag
in
tag_list
:
if
tag
.
strip
()
==
''
:
continue
if
self
.
is_website_link
(
str
(
tag
)
.
strip
()):
continue
sentence
+=
f
'{tag}😊'
num
+=
1
result
=
''
while
True
:
if
len
(
sentence
.
strip
())
==
1
and
self
.
is_punctuation
(
sentence
.
strip
()):
result
+=
sentence
break
if
len
(
sentence
)
>
1000
:
index_1000
=
sentence
[
999
]
# 判断该字符是不是逗号或句号
if
index_1000
==
'.'
or
index_1000
==
'。'
or
index_1000
==
','
or
index_1000
==
','
:
# 如果是标点符号
result
+=
self
.
translate
(
sentence
[:
1000
]
.
strip
(),
lang
)
sentence
=
sentence
[
1000
:]
else
:
# 如果不是标点符号
i
=
1000
while
i
>=
0
:
j
=
i
-
1
if
j
<=
0
:
break
index_punctuation
=
sentence
[
j
]
if
index_punctuation
==
'.'
or
index_punctuation
==
'。'
or
index_punctuation
==
','
or
index_punctuation
==
','
:
result
+=
self
.
translate
(
sentence
[:
j
+
1
]
.
strip
(),
lang
)
sentence
=
sentence
[
j
+
1
:]
break
else
:
i
=
j
continue
if
i
==
1
:
result
+=
self
.
translate
(
sentence
[:
1000
]
.
strip
(),
lang
)
sentence
=
sentence
[
1000
:]
else
:
# 翻译
result
+=
self
.
translate
(
sentence
,
lang
)
time
.
sleep
(
2
)
break
sentences
=
result
.
split
(
'😊'
)
num
=
0
for
tag
in
html
.
find_all
(
text
=
True
):
if
tag
.
strip
()
==
''
:
continue
if
self
.
is_website_link
(
str
(
tag
)
.
strip
()):
continue
sentence
=
sentences
[
num
]
tag
.
replace_with
(
sentence
)
num
+=
1
return
str
(
html
.
prettify
())
+
'<p/><br>译文来源:微软自动翻译<br></p>'
zkr/esToMongodb.py
0 → 100644
浏览文件 @
fd395ea2
import
datetime
import
time
from
urllib.parse
import
urlparse
import
numpy
as
np
import
pandas
as
pd
import
pymongo
from
elasticsearch
import
Elasticsearch
,
helpers
from
base
import
BaseCore
from
langdetect
import
detect
baseCore
=
BaseCore
.
BaseCore
()
log
=
baseCore
.
getLogger
()
es_client
=
Elasticsearch
([{
'host'
:
'114.115.215.250'
,
'port'
:
'9700'
}],
http_auth
=
(
'elastic'
,
'zzsn9988'
),
timeout
=
600
)
FLGS
=
[[
'1534520509510537217'
,
'289118'
,
'http://www.boeing.cn/presscenter/'
,
'波音公司中文官网'
,
'波音公司中文官网'
],
[
'1534521744938586113'
,
'287555'
,
'https://www.bosch.com.cn/'
,
'博世中文官网'
,
'博世中文官网'
],
[
'1534522156257202178'
,
'289118'
,
'http://www.boeing.cn/presscenter/'
,
'波音公司中文官网'
,
'波音公司中文官网'
],
[
'1534575498018770946'
,
'289461'
,
'https://www.bbc.com/news/world'
,
'BBC-世界'
,
'BBC-世界'
],
[
'1534575502137577474'
,
'289458'
,
'https://www.bbc.com/news/uk'
,
'BBC-英国'
,
'BBC-英国'
],
[
'1534599844112191489'
,
'289026'
,
'https://media.ford.com/content/fordmedia/fna/us/en/news.html'
,
'福特汽车公司英文官网'
,
'福特汽车公司英文官网'
],
[
'1534677550518587394'
,
'289469'
,
'https://www.telegraph.co.uk/news/uk/'
,
'telegraph-英国'
,
'telegraph-英国'
],
[
'1534681654112374786'
,
'289055'
,
'https://news.panasonic.com/global/all/all.html'
,
'松下有限公司英文官网'
,
'松下有限公司英文官网'
],
[
'1534681764263186434'
,
'289116'
,
'https://www.bayer.com.cn/zh-hans/allnews'
,
'拜耳集团中文官网'
,
'拜耳集团中文官网'
],
[
'1534682368133910529'
,
'289578'
,
'https://www.shell.com/media/news-and-media-releases.html'
,
'壳牌石油-全球'
,
'壳牌石油-全球'
],
[
'1534682381371133953'
,
'289052'
,
'https://www.citigroup.com/citi/news/news_list_view.html'
,
'花旗集团英文官网'
,
'花旗集团英文官网'
],
[
'1534682486119682049'
,
'289581'
,
'https://www.axa.com/en/press/press-releases'
,
'安盛AXA-全球'
,
'安盛AXA-全球'
],
[
'1534682558727278593'
,
'289028'
,
'https://www.kddi.com/corporate/newsrelease/2023/'
,
'KDDI株式会社英文官网'
,
'KDDI株式会社英文官网'
],
[
'1534682749761048578'
,
'289072'
,
'https://www.merck.com/media/news/'
,
'默沙东英文官网'
,
'默沙东英文官网'
],
[
'1534682800935751682'
,
'289067'
,
'https://www.nestle.com/media/news'
,
'雀巢集团英文官网'
,
'雀巢集团英文官网'
],
[
'1534682931789647873'
,
'289239'
,
'https://www.jnj.com/media-center/press-releases#See
%20
All
%20
Press
%20
Releases'
,
'强生公司英文官网'
,
'强生公司英文官网'
],
[
'1534683017357643778'
,
'284674'
,
'https://china.newsroom.ibm.com/'
,
'国际商业机器中文官网'
,
'国际商业机器中文官网'
],
[
'1534683106398523393'
,
'289053'
,
'https://www.ubs.com/global/en/investor-relations/press-releases.html'
,
'瑞银集团股份公司英文官网'
,
'瑞银集团股份公司英文官网'
],
[
'1534683108550201345'
,
'289041'
,
'https://press.hp.com/us/en/press-releases.html'
,
'惠普公司英文官网'
,
'惠普公司英文官网'
],
[
'1534683330609238018'
,
'289415'
,
'https://www.morganstanley.com/about-us-newsroom#-536583991-tab'
,
'摩根士丹利英文官网'
,
'摩根士丹利英文官网'
],
[
'1534683387307839489'
,
'289121'
,
'https://www.cisco.com/c/zh_cn/about/press.html'
,
'思科公司中文官网'
,
'思科公司中文官网'
],
[
'1534683411685134337'
,
'289050'
,
'https://www.bayer.com/media/en-us/#/search'
,
'拜耳集团英文官网'
,
'拜耳集团英文官网'
],
[
'1534685529548607489'
,
'289980'
,
'https://auto.gasgoo.com/'
,
'盖世汽车'
,
'盖世汽车'
],
[
'1534715893386137602'
,
'284655'
,
'https://www.chooseparisregion.org/zh-hans/
%
E6
%
B6
%88%
E6
%81%
AF'
,
'巴黎大区投资促进局(中文)'
,
'巴黎大区投资促进局(中文)'
],
[
'1534715895542009857'
,
'284656'
,
'https://www.toyota.com.cn/mediacenter/index.php#a188
%
E6
%81%
AF'
,
'丰田汽车公司中文官网'
,
'丰田汽车公司中文官网'
],
[
'1534715898025037825'
,
'284657'
,
'https://www.apple.com.cn/newsroom/'
,
'苹果公司中文官网'
,
'苹果公司中文官网'
],
[
'1534715902261284866'
,
'284659'
,
'https://career.daimler.com.cn/events.html'
,
'戴姆勒中文官网'
,
'戴姆勒中文官网'
],
[
'1534715904366825473'
,
'284660'
,
'https://www.swissre.com/china/news-insights/press-release.html'
,
'瑞士再保险股份有限公司中文官网'
,
'瑞士再保险股份有限公司中文官网'
],
[
'1534715908590489602'
,
'284662'
,
'https://www.novartis.com.cn/news/news-archive'
,
'诺华公司中文官网'
,
'诺华公司中文官网'
],
[
'1534715910817665025'
,
'284663'
,
'https://www.aviva-cofco.com.cn/website/xxzx/xxzx_zxzx/list-1.shtml'
,
'英杰华集团中文官网'
,
'英杰华集团中文官网'
],
[
'1534715919160135681'
,
'284666'
,
'https://www.hyundaimotorgroup.com.cn/news/'
,
'现代汽车中文官网'
,
'现代汽车中文官网'
],
[
'1534715924050694146'
,
'284668'
,
'http://newsroom.ge.com.cn/'
,
'通用电气公司中文官网'
,
'通用电气公司中文官网'
],
[
'1534715927200616449'
,
'284669'
,
'https://www.schneider-electric.cn/zh/about-us/press/'
,
'施耐德电气中文官网'
,
'施耐德电气中文官网'
],
[
'1534715930719637506'
,
'284670'
,
'https://www.walmart.cn/newsroom/'
,
'沃尔玛公司中文官网'
,
'沃尔玛公司中文官网'
],
[
'1534715933722759169'
,
'284671'
,
'https://www.volkswagengroupchina.com.cn/zh-cn/news'
,
'大众汽车中文官网'
,
'大众汽车中文官网'
],
[
'1534715937363415041'
,
'284672'
,
'https://new.abb.com/cn/news-center'
,
'ABB中文官网'
,
'阿西布朗勃法瑞公司 ABB中文官网'
],
[
'1534715941301866498'
,
'284673'
,
'https://www.qualcomm.cn/news'
,
'高通中文官网'
,
'高通中文官网'
],
[
'1534715954132242433'
,
'284695'
,
'https://presse.paris.fr/pages?active_tab=0&item_id=0'
,
'巴黎市长活动'
,
'巴黎市长活动'
],
[
'1534715956460081154'
,
'284696'
,
'https://www.chooseparisregion.org/fr/actualites'
,
'巴黎大区投资促进局'
,
'巴黎大区投资促进局'
],
[
'1534715958485929985'
,
'284715'
,
'https://www.apc-paris.com/espace-presse'
,
'巴黎的气候局'
,
'巴黎的气候局'
],
[
'1534715960956375042'
,
'284716'
,
'https://www.institutparisregion.fr/international-1/'
,
'巴黎大区规划与城市规划研究所'
,
'巴黎大区规划与城市规划研究所'
],
[
'1534715963300990978'
,
'284717'
,
'https://www.institutparisregion.fr/economie.html#'
,
'巴黎官网1'
,
'巴黎官网1'
],
[
'1534715965926625281'
,
'284718'
,
'https://www.institutparisregion.fr/gouvernance/'
,
'巴黎官网2'
,
'巴黎官网2'
],
[
'1534715968141217794'
,
'284719'
,
'https://www.institutparisregion.fr/planification/'
,
'巴黎官网3'
,
'巴黎官网3'
],
[
'1534715970309672962'
,
'284720'
,
'https://www.institutparisregion.fr/mobilite-et-transports/'
,
'巴黎官网4'
,
'巴黎官网4'
],
[
'1534715975145705474'
,
'284722'
,
'https://www.institutparisregion.fr/amenagement-et-territoires/'
,
'巴黎官网5'
,
'巴黎官网5'
],
[
'1534715977754562561'
,
'284723'
,
'https://www.institutparisregion.fr/environnement/'
,
'巴黎官网6'
,
'巴黎官网6'
],
[
'1534715984725495810'
,
'284726'
,
'https://www.institutparisregion.fr/prevention-securite/'
,
'巴黎官网8'
,
'巴黎官网8'
],
[
'1534716024063873026'
,
'284795'
,
'https://www.apple.com/newsroom'
,
'苹果公司英文官网'
,
'苹果公司英文官网'
],
[
'1534716026974720002'
,
'284796'
,
'https://www.tesla.com/blog'
,
'特斯拉英文官网'
,
'特斯拉英文官网'
],
[
'1534716032674779137'
,
'284835'
,
'https://www1.nyc.gov/office-of-the-mayor/news.page'
,
'纽约市官网-市长'
,
'纽约市官网-市长'
],
[
'1534716035220721666'
,
'284836'
,
'https://council.nyc.gov/press'
,
'纽约市议会-新闻'
,
'纽约市议会-新闻'
],
[
'1534716040534904833'
,
'284855'
,
'https://www1.nyc.gov/assets/cto/#/press'
,
'纽约市市长首席技术官网站-新闻'
,
'纽约市市长首席技术官网站-新闻'
],
[
'1534716045064753153'
,
'284857'
,
'https://business.nycgo.com/press-and-media/press-releases/'
,
'纽约市及公司'
,
'纽约市及公司'
],
[
'1534716047480672257'
,
'284858'
,
'https://www.london.gov.uk/media-centre/mayors-press-releases?order=DESC'
,
'伦敦市长新闻稿'
,
'伦敦市长新闻稿'
],
[
'1534716050160832513'
,
'284859'
,
'https://www.london.gov.uk/about-us/london-assembly/london-assembly-press-releases?order=DESC'
,
'伦敦议会新闻'
,
'伦敦议会新闻'
],
[
'1534716052442533890'
,
'284860'
,
'https://www.london.gov.uk/business-and-economy-publications?order=DESC'
,
'商业和经济出版物'
,
'商业和经济出版物'
],
[
'1534716054862647298'
,
'284861'
,
'https://media.londonandpartners.com/news'
,
'伦敦发展促进署'
,
'伦敦发展促进署'
],
[
'1534716058121621505'
,
'284862'
,
'https://www.koho.metro.tokyo.lg.jp/diary/report/new.html'
,
'东京最新消息'
,
'东京最新消息'
],
[
'1534716065717506049'
,
'284865'
,
'https://www.koho.metro.tokyo.lg.jp/diary/report/2021/01/index.html'
,
'东京最新消息'
,
'东京最新消息'
],
[
'1534716068053733377'
,
'284866'
,
'https://www.metro.tokyo.lg.jp/tosei/hodohappyo/ichiran.html'
,
'东京政府新闻'
,
'东京政府新闻'
],
[
'1534716075184050177'
,
'284869'
,
'https://www.metro.tokyo.lg.jp/english/topics/latest.html'
,
'东京政府新闻(英文)'
,
'东京政府新闻(英文)'
],
[
'1534716077616746497'
,
'284870'
,
'https://shintosei.metro.tokyo.lg.jp/'
,
'市政府的体制改革(东京)'
,
'市政府的体制改革(东京)'
],
[
'1534716082763157506'
,
'284872'
,
'https://note.com/kouzoukaikaku'
,
'市政府的体制改革'
,
'市政府的体制改革'
],
[
'1534716121208147969'
,
'284936'
,
'https://www.swissre.com/media/news-releases.html'
,
'瑞士再保险股份有限公司英文官网'
,
'瑞士再保险股份有限公司英文官网'
],
[
'1534716124160937985'
,
'284937'
,
'https://www.allianz.com/en/press/news.html'
,
'安联保险集团英文官网'
,
'安联保险集团英文官网'
],
[
'1534716129097633793'
,
'284939'
,
'https://www.aviva.com/search/#sort=date
%20
descending'
,
'英杰华集团英文官网'
,
'英杰华集团英文官网'
],
[
'1534716131362557953'
,
'284940'
,
'https://www.mizuhogroup.com/news'
,
'日本瑞穗金融集团英文官网'
,
'日本瑞穗金融集团英文官网'
],
[
'1534716133916889090'
,
'284955'
,
'https://www.sanofi.com/en/media-room/press-releases'
,
'赛诺菲英文官网'
,
'赛诺菲英文官网'
],
[
'1534716153592369153'
,
'284981'
,
'https://www.hyundai.com/worldwide/en/company/newsroom.release.corporate'
,
'现代汽车英文官网'
,
'现代汽车英文官网'
],
[
'1534716195082424321'
,
'285015'
,
'https://www.goldmansachs.com/media-relations/press-releases-and-comments/current/index.html'
,
'高盛英文官网'
,
'高盛英文官网'
],
[
'1534716198475616257'
,
'285016'
,
'https://www.ge.com/news/press-releases'
,
'通用电气公司英文官网'
,
'通用电气公司英文官网'
],
[
'1534716200962838530'
,
'285017'
,
'https://www.se.com/ww/en/about-us/newsroom/news/'
,
'施耐德电气英文官网'
,
'施耐德电气英文官网'
],
[
'1534716209645047810'
,
'285019'
,
'https://global.abb/group/en/media/releases/trade'
,
'ABB英文官网'
,
'阿西布朗勃法瑞公司 ABB英文官网'
],
[
'1534716214791458817'
,
'285035'
,
'https://www.blackrock.com/corporate/newsroom'
,
'贝莱德集团英文官网'
,
'贝莱德集团英文官网'
],
[
'1534716217614225410'
,
'285036'
,
'https://newsroom.ibm.com/announcements'
,
'国际商业机器英文官网'
,
'国际商业机器英文官网'
],
[
'1534716219954647042'
,
'285037'
,
'https://press.siemens.com/global/en/press-search?f
%5
B0
%5
D=content_type
%3
Ac2_ct_press_release'
,
'西门子英文官网'
,
'门子英文官网'
],
[
'1534716222060187650'
,
'285055'
,
'https://www.nydailynews.com/new-york/bronx/'
,
'nydailynews-布朗克斯'
,
'nydailynews-布朗克斯'
],
[
'1534716224908120066'
,
'285056'
,
'https://www.zaobao.com/realtime/world'
,
'zaobao-国际'
,
'zaobao-国际'
],
[
'1534716226929774594'
,
'285057'
,
'https://www.nydailynews.com/new-york/brooklyn/'
,
'nydailynews-布鲁克林'
,
'nydailynews'
],
[
'1534716229400219649'
,
'285058'
,
'https://www.nydailynews.com/new-york/queens/'
,
'nydailynews-皇后区'
,
'nydailynews'
],
[
'1534716231690309633'
,
'285059'
,
'https://www.zaobao.com/realtime/singapore'
,
'zaobao-新加坡'
,
'zaobao'
],
[
'1534716234424995841'
,
'285060'
,
'https://www.asahi.com/national/list/'
,
'朝日社会'
,
'朝日新闻'
],
[
'1534716239730790402'
,
'285062'
,
'https://www.tokyo-np.co.jp/tokyo/metropolitan'
,
'tokyo-首都圈新闻'
,
'tokyo-首都圈新闻'
],
[
'1534716242738106370'
,
'285063'
,
'https://www.nydailynews.com/news/politics/nyc-elections-2021/'
,
'nydailynews-2021 年纽约市选举'
,
'nydailynews'
],
[
'1534716246252933121'
,
'285064'
,
'https://www.standard.co.uk/tech/huawei'
,
'standard-华为'
,
'standard'
],
[
'1534716249650319361'
,
'285066'
,
'https://www.rfi.fr/fr/europe/'
,
'rfi-欧洲'
,
'rfi'
],
[
'1534716252443725826'
,
'285067'
,
'https://www.zaobao.com.sg/news/world'
,
'zaobao-国际2'
,
'zaobao'
],
[
'1534716254813507586'
,
'285068'
,
'https://www.rfi.fr/fr/asie-pacifique/'
,
'rfi-亚洲和太平洋'
,
'rfi'
],
[
'1534716256881299458'
,
'285069'
,
'https://www.standard.co.uk/optimist/sustainable'
,
'standard-可持续标准'
,
'standard'
],
[
'1534716259041366018'
,
'285070'
,
'https://www.zaobao.com.sg/news/singapore'
,
'zaobao-新加坡2'
,
'zaobao'
],
[
'1534716261650223105'
,
'285071'
,
'https://www.rfi.fr/fr/afrique/'
,
'rfi-非洲'
,
'rfi'
],
[
'1534716267060875266'
,
'285073'
,
'https://www.rfi.fr/fr/moyen-orient/'
,
'rfi-中东'
,
'rfi'
],
[
'1534716269598429185'
,
'285074'
,
'https://www.standard.co.uk/optimist/vaccine-world'
,
'standard-世界疫苗'
,
'standard'
],
[
'1534716272077262850'
,
'285075'
,
'https://www.zaobao.com.sg/forum/editorial'
,
'zaobao-社论'
,
'zaobao'
],
[
'1534716277240451074'
,
'285076'
,
'https://www.rfi.fr/fr/notre-s
%
C3
%
A9lection/'
,
'rfi-我们的选择'
,
'rfi'
],
[
'1534716280059023362'
,
'285077'
,
'https://www.rfi.fr/fr/tag/revue-de-presse/'
,
'rfi-报纸'
,
'rfi'
],
[
'1534716282533662722'
,
'285078'
,
'https://www.zaobao.com.sg/finance/singapore'
,
'zaobao-狮城财经'
,
'zaobao'
],
[
'1534716285415149569'
,
'285079'
,
'https://www.prnasia.com/releases/all/listpage-recent-all-all-all-all-spec-1.shtml'
,
'prnasia-最新新闻稿'
,
'prnasia'
],
[
'1534716288091115521'
,
'285080'
,
'https://www.standard.co.uk/optimist/the-sustainables'
,
'standard-可持续发展'
,
'standard'
],
[
'1534716290297319426'
,
'285081'
,
'https://www.zaobao.com.sg/finance/world'
,
'zaobao-全球财经'
,
'zaobao'
],
[
'1534716292650323970'
,
'285082'
,
'https://www.prnasia.com/lightnews/listpage-102-all-1.shtml'
,
'prnasia-美通社头条'
,
'prnasia'
],
[
'1534716295011717122'
,
'285083'
,
'https://www.zaobao.com.sg/special/sg-cn'
,
'zaobao-新中交流'
,
'zaobao'
],
[
'1534716298002255873'
,
'285084'
,
'https://www.prnasia.com/releases/all/listpage-pc-all-all-all-all-spec-1.shtml'
,
'prnasia-上市公司新闻'
,
'prnasia'
],
[
'1534716301194121217'
,
'285085'
,
'https://en.prnasia.com/all_releases/recent.shtml'
,
'prnasia-英文NewsLatest News'
,
'prnasia'
],
[
'1534716304520204290'
,
'285087'
,
'https://www.rfi.fr/fr/tech/'
,
'rfi-科技'
,
'rfi'
],
[
'1534716307007426561'
,
'285088'
,
'https://en.prnasia.com/all_releases/pc.shtml'
,
'prnasia-Public Company News'
,
'prnasia'
],
[
'1534716309930856449'
,
'285089'
,
'https://hk.prnasia.com/story/industry/n-2-0.shtml'
,
'prnasia-所有新闻稿'
,
'prnasia'
],
[
'1534716313126916097'
,
'285090'
,
'https://www.tokyo-np.co.jp/tags/coronavirus?ref=gnb_pc_lv1'
,
'tokyo-东京新闻新冠'
,
'tokyo'
],
[
'1534716316138426369'
,
'285091'
,
'https://www.rfi.fr/fr/sciences/'
,
'rfi-科学'
,
'rfi'
],
[
'1534716319334486017'
,
'285092'
,
'https://jp.prnasia.com/releases/recent.shtml'
,
'prnasia-最新的'
,
'prnasia'
],
[
'1534716322014646274'
,
'285093'
,
'https://www.rfi.fr/fr/
%
C3
%
A9conomie/'
,
'rfi-经济'
,
'rfi'
],
[
'1534716324367650818'
,
'285094'
,
'https://www.rfi.fr/fr/environnement/'
,
'rfi-环境'
,
'rfi'
],
[
'1534716327895060481'
,
'285096'
,
'https://www.asahi.com/national/list/calamity.html?iref=pc_gnavi'
,
'朝日灾害/交通信息'
,
'朝日新闻'
],
[
'1534716330579415042'
,
'285097'
,
'https://www.rfi.fr/fr/france/'
,
'rfi-法国'
,
'rfi'
],
[
'1534716333523816450'
,
'285098'
,
'https://www.asahi.com/business/list/industry.html?iref=pc_gnavi'
,
'朝日行业/产品'
,
'朝日新闻'
],
[
'1534716336757624833'
,
'285100'
,
'https://www.rfi.fr/fr/am
%
C3
%
A9riques/'
,
'rfi-美洲'
,
'rfi'
],
[
'1534716339555225601'
,
'285101'
,
'https://www.tokyo-np.co.jp/tokyo/t_news?ref=gnb_pc_lv1'
,
'tokyo-东京'
,
'tokyo'
],
[
'1534716342415740929'
,
'285102'
,
'https://www.asahi.com/business/list/finance.html?iref=pc_gnavi'
,
'朝日金融・財政'
,
'朝日新闻'
],
[
'1534716345938956290'
,
'285105'
,
'https://www.nydailynews.com/new-york/'
,
'nydailynews-纽约'
,
'nydailynews'
],
[
'1534716348468121601'
,
'285106'
,
'https://www.tokyo-np.co.jp/n/national?ref=gnb_pc_lv1'
,
'tokyo-社会'
,
'tokyo'
],
[
'1534716352821809154'
,
'285109'
,
'https://www.nydailynews.com/new-york/manhattan/'
,
'nydailynews-曼哈顿'
,
'nydailynews'
],
[
'1534716355816542210'
,
'285110'
,
'https://www.tokyo-np.co.jp/n/politics?ref=gnb_pc_lv1'
,
'tokyo-政治'
,
'tokyo'
],
[
'1534716358962270210'
,
'285112'
,
'https://www.tokyo-np.co.jp/n/economics?ref=gnb_pc_lv1'
,
'tokyo-经济'
,
'tokyo'
],
[
'1534716361256554497'
,
'285113'
,
'https://www.tokyo-np.co.jp/f/life/education'
,
'tokyo-教育'
,
'tokyo'
],
[
'1534716365434081281'
,
'285116'
,
'https://www.tokyo-np.co.jp/n/column/editorial'
,
'tokyo-社论'
,
'tokyo'
],
[
'1534716370223976449'
,
'285119'
,
'https://www.standard.co.uk/news/london'
,
'standard-伦敦'
,
'standard'
],
[
'1534716373084491778'
,
'285120'
,
'https://www.asahi.com/politics/list/economicpolicy.html?iref=pc_gnavi'
,
'朝日経済政策'
,
'朝日新闻'
],
[
'1534716375827566594'
,
'285121'
,
'https://www.asahi.com/business/list/work.html?iref=pc_gnavi'
,
'朝日労働・雇用'
,
'朝日新闻'
],
[
'1534716378243485697'
,
'285122'
,
'https://www.standard.co.uk/news/mayor'
,
'standard-伦敦市长'
,
'standard'
],
[
'1534716383641554945'
,
'285124'
,
'https://www.standard.co.uk/news/transport'
,
'standard-运输'
,
'standard'
],
[
'1534716386908917761'
,
'285125'
,
'https://www.asahi.com/business/list/statistics.html?iref=pc_gnavi'
,
'朝日市況・統計'
,
'朝日新闻'
],
[
'1534716389073178625'
,
'285126'
,
'https://www.standard.co.uk/news/health'
,
'standard-健康'
,
'standard'
],
[
'1534716391375851522'
,
'285127'
,
'https://www.standard.co.uk/news/education'
,
'standard-教育'
,
'standard'
],
[
'1534716396031528961'
,
'285128'
,
'https://www.standard.co.uk/topic/coronavirus'
,
'standard-新冠病毒'
,
'standard'
],
[
'1534716398585860097'
,
'285129'
,
'https://www.standard.co.uk/business/business-news'
,
'standard-商业'
,
'standard'
],
[
'1534716495604305921'
,
'285020'
,
'https://global.abb/group/en/media/releases/group'
,
'ABB英文官网'
,
'阿西布朗勃法瑞公司 ABB英文官网'
],
[
'1534716498745839617'
,
'285256'
,
'https://www.nydailynews.com/coronavirus/'
,
'nydailynews-纽约每日新闻新冠病毒'
,
'nydailynews'
],
[
'1534716501501497345'
,
'285257'
,
'https://www.nydailynews.com/new-york/education/'
,
'nydailynews-教育'
,
'nydailynews'
],
[
'1534716504110354433'
,
'285258'
,
'https://www.nydailynews.com/news/politics/new-york-elections-government/'
,
'nydailynews-纽约政治'
,
'nydailynews'
],
[
'1534716653960253441'
,
'285373'
,
'https://www.lemonde.fr/paris/'
,
'lemonde-世界报巴黎'
,
'lemonde'
],
[
'1534716656707522562'
,
'285376'
,
'https://www.enterprisesg.gov.sg/media-centre/news'
,
'新加坡企业发展局'
,
'新加坡企业发展局'
],
[
'1534716661023461377'
,
'285377'
,
'https://www.enterprisesg.gov.sg/media-centre/media-releases'
,
'新加坡企业发展局'
,
'新加坡企业发展局'
],
[
'1534716671593107458'
,
'285398'
,
'https://www.mti.gov.sg/Newsroom/Press-Releases'
,
'新加坡贸工部'
,
'新加坡贸工部'
],
[
'1534716683362324482'
,
'285439'
,
'http://www.cankaoxiaoxi.com/world/omxw/'
,
'参考消息-欧美'
,
'参考消息'
],
[
'1534716685702746114'
,
'285440'
,
'http://www.cankaoxiaoxi.com/world/ytxw/'
,
'参考消息-亚太'
,
'参考消息'
],
[
'1534716687980253185'
,
'285441'
,
'http://www.cankaoxiaoxi.com/world/qtdq/'
,
'参考消息-其他'
,
'参考消息'
],
[
'1534716706913341441'
,
'285477'
,
'https://search.globaltimes.cn/QuickSearchCtrl'
,
'环球时报英文版-Tokyo'
,
'环球时报英文版'
],
[
'1534716711342526465'
,
'285495'
,
'https://www.edb.gov.sg/en/business-insights/insights.html'
,
'新加坡经济发展局'
,
'新加坡经济发展局'
],
[
'1534716730451775489'
,
'284667'
,
'https://www.goldmansachs.com/worldwide/greater-china/media-relations/media-relations.html'
,
'高盛中文官网'
,
'高盛中文官网'
],
[
'1534716734952263682'
,
'285576'
,
'https://edc.nyc/press'
,
'纽约市经济发展公司'
,
'纽约市经济发展公司'
],
[
'1534716734952263682'
,
'285577'
,
'https://edc.nyc/press'
,
'纽约市经济发展公司'
,
'纽约市经济发展公司'
],
[
'1534716734952263682'
,
'285578'
,
'https://edc.nyc/press'
,
'纽约市经济发展公司'
,
'纽约市经济发展公司'
],
[
'1534716734952263682'
,
'284856'
,
'https://edc.nyc/press'
,
'纽约市经济发展公司'
,
'纽约市经济发展公司'
],
[
'1534716747816194049'
,
'284725'
,
'https://www.institutparisregion.fr/societe-et-habitat/'
,
'巴黎官网7'
,
'巴黎官网7'
],
[
'1534716747816194049'
,
'285598'
,
'https://www.institutparisregion.fr/societe-et-habitat/'
,
'巴黎官网7'
,
'巴黎官网7'
],
[
'1534716747816194049'
,
'285596'
,
'https://www.institutparisregion.fr/societe-et-habitat/'
,
'巴黎官网7'
,
'巴黎官网7'
],
[
'1534716747816194049'
,
'285597'
,
'https://www.institutparisregion.fr/societe-et-habitat/'
,
'巴黎官网7'
,
'巴黎官网7'
],
[
'1534716807706660866'
,
'285021'
,
'https://www.qualcomm.com/news'
,
'高通英文官网'
,
'高通英文官网'
],
[
'1534717369286217730'
,
'286486'
,
'https://www.amazonaws.cn/newsroom/?tile=editorial-3&sc_icampaign=acts-pr&sc_ichannel=ha&sc_iplace=editorial&trk=d91f25e9-f315-417b-b6f0-e22eba7e81ee'
,
'亚马逊中文企业官网'
,
'亚马逊中文企业官网'
],
[
'1534717372754907138'
,
'286487'
,
'https://press.aboutamazon.com/press-releases'
,
'亚马逊英文企业官网'
,
'亚马逊英文企业官网'
],
[
'1534717394909220865'
,
'286596'
,
'https://corporate.comcast.com/press'
,
'康卡斯特英文官网'
,
'康卡斯特英文官网'
],
[
'1534717781259145217'
,
'287115'
,
'https://www1.nyc.gov/site/planning/about/press-releases.page'
,
'纽约市城市规划司网站-住房和经济栏目'
,
'纽约市城市规划司网站-住房和经济栏目'
],
[
'1534717781259145217'
,
'284837'
,
'https://www1.nyc.gov/site/planning/about/press-releases.page'
,
'纽约市城市规划司网站-住房和经济栏目'
,
'纽约市城市规划司网站-住房和经济栏目'
],
[
'1534717810371809281'
,
'287156'
,
'https://www.bosch-presse.de/pressportal/de/en/news/'
,
'博世英文企业官网'
,
'博世英文企业官网'
],
[
'1534717813395902466'
,
'287157'
,
'https://www.asml.com/en/news/press-releases'
,
'荷兰阿斯麦英文企业官网'
,
'荷兰阿斯麦英文企业官网'
],
[
'1534717816323526657'
,
'287175'
,
'https://www.inditex.cn/en/web/guest/news-releases'
,
'西班牙爱特思英文企业官网'
,
'西班牙爱特思英文企业官网'
],
[
'1534717818953355265'
,
'287176'
,
'https://www.varian.com/zh-hans/about-varian/newsroom/press-releases'
,
'美国瓦里安中文企业官网'
,
'美国瓦里安中文企业官网'
],
[
'1534717822203940866'
,
'287177'
,
'https://www.bosch-home.cn/press'
,
'博世中文企业官网'
,
'博世中文企业官网'
],
[
'1534718107949289473'
,
'287415'
,
'https://search.newsroom.toyota.co.jp/en/corporate/search.x?_ga=2.67304533.573543556.1630913758-1874225170.1630487643'
,
'丰田汽车公司英文官网'
,
'丰田汽车公司英文官网'
],
[
'1534718107949289473'
,
'284728'
,
'https://search.newsroom.toyota.co.jp/en/corporate/search.x?_ga=2.67304533.573543556.1630913758-1874225170.1630487643'
,
'丰田汽车公司英文官网'
,
'丰田汽车公司英文官网'
],
[
'1534718167068004353'
,
'287555'
,
'https://www.bosch.com.cn/'
,
'博世中文官网'
,
'博世中文官网'
],
[
'1534718185908817922'
,
'287575'
,
'https://www.eni.com/en-IT/search.html?tab=press-release&question=*&onlytab=true'
,
'埃尼集团英文官网'
,
'埃尼集团英文官网'
],
[
'1534718188853219329'
,
'287576'
,
'https://www.mitsubishicorp.com/jp/en/pr/'
,
'三菱商事英文官网'
,
'三菱商事英文官网'
],
[
'1534718192250605569'
,
'287577'
,
'https://www.mitsubishicorp.com/jp/zh/pr/'
,
'三菱商事中文官网'
,
'三菱商事中文官网'
],
[
'1534718250928918530'
,
'287686'
,
'https://www.tokyoupdates.metro.tokyo.lg.jp/category/environment/'
,
'tokyoupdates环境'
,
'tokyoupdates环境'
],
[
'1534718443392946177'
,
'287956'
,
'https://www.tokyoupdates.metro.tokyo.lg.jp/category/society/'
,
'tokyoupdates社会'
,
'tokyoupdates社会'
],
[
'1534718451483758594'
,
'287958'
,
'https://www.tokyoupdates.metro.tokyo.lg.jp/category/business/'
,
'tokyoupdates商业'
,
'tokyoupdates商业'
],
[
'1534718455002779650'
,
'287959'
,
'https://www.tokyoupdates.metro.tokyo.lg.jp/category/tokyo/'
,
'tokyoupdates东京魅力'
,
'tokyoupdates东京魅力'
],
[
'1566690173991768065'
,
'289039'
,
'https://www.lg.com/cn/about-lg/press-media'
,
'LG电子株式会社中文官网'
,
'LG电子株式会社中文官网'
],
[
'1567074573317304322'
,
'289030'
,
'https://corporate.bestbuy.com/archive/'
,
'百思买集团英文官网'
,
'百思买集团英文官网'
],
[
'1567465016056008705'
,
'289035'
,
'https://www.db.com/media/news'
,
'德意志银行公司英文官网'
,
'德意志银行公司英文官网'
],
[
'1570357517939060738'
,
'289031'
,
'https://news.pg.com/news-releases/default.aspx'
,
'宝洁公司英文官网'
,
'宝洁公司英文官网'
],
[
'1570968738543349761'
,
'289054'
,
'https://www.oracle.com/news/'
,
'甲骨文公司英文官网'
,
'甲骨文公司英文官网'
],
[
'1600029295246245889'
,
'289776'
,
'https://apnews.com/hub/us-news?utm_source=apnewsnav&utm_medium=navigation'
,
'美联社-美国新闻'
,
'美联社-美国新闻'
],
[
'1615605928371847169'
,
'289466'
,
'https://www.theguardian.com/world'
,
'theguardian-世界'
,
'theguardian-世界'
],
[
'1615607015430586370'
,
'289464'
,
'https://www.theguardian.com/uk-news'
,
'theguardian-英国'
,
'theguardian-英国'
],
[
'1648552318727004161'
,
'289064'
,
'https://panasonic.cn/about/news/'
,
'松下有限公司中文官网'
,
'松下有限公司中文官网'
],
[
'1661879421190971393'
,
'289065'
,
'https://www.jpmorgan.com/news'
,
'摩根大通公司英文官网'
,
'摩根大通公司'
],
[
'1679701673101942786'
,
'289051'
,
'https://www.7andi.com/company/news/2023.html'
,
'Seven&I控股公司英文官网'
,
'Seven&I控股公司英文官网'
],
[
'1680126972063047681'
,
'289038'
,
'https://www.gsk.com/en-gb/media/press-releases/'
,
'葛兰素史克公司英文官网'
,
'葛兰素史克公司英文官网'
],
[
'1685845692253081601'
,
'289959'
,
'https://about.fb.com/news/'
,
'脸书'
,
'脸书'
],
[
'1685850553195565058'
,
'289061'
,
'https://news.microsoft.com/zh-cn/'
,
'微软公司中文官网'
,
'微软公司中文官网'
],
[
'1685860897158729729'
,
'289034'
,
'https://www.intel.cn/content/www/cn/zh/newsroom/home.html'
,
'英特尔公司中文官网'
,
'英特尔公司中文官网'
],
[
'1685868618520072194'
,
'289238'
,
'https://www.jnj.com.cn/news/press-releases'
,
'强生公司中文官网'
,
'强生公司中文官网'
],
[
'1685872467758682114'
,
'289071'
,
'https://www.pfizer.com/news/media-resources'
,
'辉瑞制药有限公司英文官网'
,
'辉瑞制药有限公司英文官网'
],
[
'1685974438113054722'
,
'289066'
,
'https://www.ford.com.cn/newsroom/'
,
'福特汽车公司中文官网'
,
'福特汽车公司中文官网'
],
[
'1686333807677890562'
,
'289032'
,
'https://www.nestle.com.cn/media/pressreleases'
,
'雀巢集团中文官网'
,
'雀巢集团中文官网'
],
[
'1692102158291283970'
,
'289581'
,
'https://www.axa.com/en/press/press-releases'
,
'安盛AXA-全球'
,
'安盛AXA-全球'
],
[
'1695389560841687041'
,
'289237'
,
'https://www.sc.com/en/media/press-releases/'
,
'渣打银行英文官网'
,
'渣打银行英文官网'
],
[
'1742444673277730817'
,
'289758'
,
'https://chicago.urbanize.city/'
,
'芝加哥城市化'
,
'芝加哥城市化'
],
[
'1742444680290607105'
,
'289706'
,
'https://www.nitori.co.jp/news/'
,
'宜得利外文官网'
,
'宜得利外文官网'
],
[
'1742444687311872002'
,
'289705'
,
'https://www.nitorichina.com/news'
,
'宜得利中文官网'
,
'宜得利中文官网'
],
[
'1742444694047924225'
,
'289695'
,
'https://kansai-sanpo.com/category/area/
%
e9
%96%
a2
%
e8
%
a5
%
bf
%
ef
%
bc
%88%
e5
%
a4
%
a7
%
e9
%98%
aa
%
ef
%
bc
%89
/osaka/'
,
'大阪府城市再开发信息'
,
'大阪府城市再开发信息'
],
[
'1742444702386200577'
,
'289696'
,
'https://www.boston.gov/news'
,
'波士顿官网新闻'
,
'波士顿官网新闻'
],
[
'1742444729091334146'
,
'289899'
,
'https://maron-trachte.brussels/nl/nieuws/'
,
'布鲁塞尔大区部长网站新闻'
,
'布鲁塞尔大区部长网站新闻'
],
[
'1742444738322997249'
,
'289900'
,
'https://bisa.brussels/nieuws'
,
'布鲁塞尔统计分析新闻'
,
'布鲁塞尔统计分析新闻'
],
[
'1742444746908737538'
,
'289901'
,
'https://www.bruzz.be/gemeente/brussel'
,
'布鲁塞尔大区城市新闻'
,
'布鲁塞尔大区城市新闻'
],
[
'1742444754869526529'
,
'289735'
,
'https://www.austintexas.gov/department/communications/news'
,
'奥斯汀市新闻'
,
'奥斯汀市新闻'
],
[
'1742444762939367426'
,
'289736'
,
'https://www.kut.org/austin'
,
'奥斯汀城市新闻'
,
'奥斯汀城市新闻'
],
[
'1742444775413227521'
,
'289737'
,
'https://www.austinchronicle.com/daily/news/'
,
'奥斯汀纪事报'
,
'奥斯汀纪事报'
],
[
'1742444816672595970'
,
'290023'
,
'https://houstondaily.com/stories/tag/9-business'
,
'休斯顿日报-经济新闻'
,
'休斯顿日报-经济新闻'
],
[
'1742444827737169922'
,
'290024'
,
'https://communityimpact.com/development/houston/'
,
'休斯顿社区发展新闻'
,
'休斯顿社区发展新闻'
],
[
'1742444847572033538'
,
'289981'
,
'https://www.inabr.com/'
,
'汽车商业评论'
,
'汽车商业评论'
],
[
'1742444858330423298'
,
'289979'
,
'https://www.chinaev100.com/'
,
'中国电动汽车百人会'
,
'中国电动汽车百人会'
],
[
'1742444867897630722'
,
'289795'
,
'https://www.comune.roma.it/web/it/notizie.page'
,
'罗马市议会官网'
,
'罗马市议会官网'
],
[
'1742444881885634561'
,
'289796'
,
'https://www.architettiroma.it/notizie/'
,
'罗马建筑师协会新闻'
,
'罗马建筑师协会新闻'
],
[
'1742444893667434498'
,
'289797'
,
'https://www.romatoday.it/dossier/ambiente/'
,
'今日罗马环境新闻'
,
'今日罗马环境新闻'
],
[
'1742444906481033217'
,
'289798'
,
'https://www.romatoday.it/dossier/economia/'
,
'今日罗马经济新闻'
,
'今日罗马经济新闻'
],
[
'1742444916002103297'
,
'289799'
,
'https://www.romatoday.it/dossier/potere/'
,
'今日罗马政治新闻'
,
'今日罗马政治新闻'
],
[
'1742444928115253249'
,
'289800'
,
'https://argomenti.ilsole24ore.com/tag/comune-di-roma'
,
'24小时报罗马新闻'
,
'24小时报罗马新闻'
],
[
'1742444939058192385'
,
'289738'
,
'https://austin.urbanize.city/'
,
'奥斯汀城市化'
,
'奥斯汀城市化'
],
[
'1742444971329167362'
,
'289879'
,
'https://www.hel.fi/fi/uutiset'
,
'赫尔辛基市官网新闻'
,
'赫尔辛基市官网新闻'
],
[
'1742444981487771650'
,
'289880'
,
'https://digi.hel.fi/ajankohtaista/'
,
'数字赫尔辛基新闻'
,
'数字赫尔辛基新闻'
],
[
'1742444992812392450'
,
'289881'
,
'https://www.helsinginuutiset.fi/paikalliset/'
,
'赫尔辛基本地新闻'
,
'赫尔辛基本地新闻'
],
[
'1742445002102775810'
,
'289882'
,
'https://www.brussel.be/nieuws'
,
'布鲁塞尔新闻'
,
'布鲁塞尔新闻'
],
[
'1742445009522499585'
,
'289883'
,
'https://brulocalis.brussels/fr/actualites'
,
'布鲁塞尔市政协会新闻'
,
'布鲁塞尔市政协会新闻'
],
[
'1742445018569613314'
,
'289884'
,
'https://rudivervoort.brussels/news/?lang=nl'
,
'布鲁塞尔大区首席大臣网站新闻'
,
'布鲁塞尔大区首席大臣网站新闻'
],
[
'1742445029118287873'
,
'289755'
,
'https://www.chicago.gov/city/en/depts/mayor/press_room/press_releases.html'
,
'芝加哥市官网'
,
'芝加哥市官网'
],
[
'1742445039524356098'
,
'289756'
,
'https://www.chicagoconstructionnews.com/author/carolyngruske/'
,
'芝加哥建筑新闻特刊'
,
'芝加哥建筑新闻特刊'
],
[
'1742445049200615425'
,
'289757'
,
'https://chicago.suntimes.com/city-hall'
,
'芝加哥太阳时报-市政厅新闻'
,
'芝加哥太阳时报-市政厅新闻'
],
[
'1742445056565813250'
,
'289622'
,
'http://chinese.joins.com/news/articleList.html?sc_section_code=S1N4&view_type=sm'
,
'中央日报-政治社会'
,
'中央日报-政治社会'
],
[
'1742445063578689538'
,
'289619'
,
'https://news.seoul.go.kr/?doing_wp_cron=1664163365.5353860855102539062500'
,
'首尔官网'
,
'首尔官网'
],
[
'1742445070847418370'
,
'289655'
,
'https://www.merckgroup.com/en/news-stories.html'
,
'默克英文官网'
,
'默克英文官网'
],
[
'1742445079869366273'
,
'289675'
,
'https://www.pref.osaka.lg.jp/hodo/index.php?site=fumin'
,
'大阪府新闻'
,
'大阪府新闻'
],
[
'1742445090116050946'
,
'289623'
,
'https://cn.yna.co.kr/politics/index'
,
'韩联社-政治'
,
'韩联社-政治'
],
[
'1742445099733590017'
,
'289620'
,
'https://www.donga.com/cn/List?c=01'
,
'东亚日报-政治'
,
'东亚日报-政治'
],
[
'1742445107463692290'
,
'289621'
,
'https://www.donga.com/cn/List?c=08'
,
'东亚日报-文化'
,
'东亚日报-文化'
],
[
'1742445115256709121'
,
'289677'
,
'https://www.asahi.com/area/osaka/list.html'
,
'朝日新闻_大阪府'
,
'朝日新闻_大阪府'
],
[
'1742445122793873409'
,
'289676'
,
'https://www.constnews.com/?cat=13837'
,
'建设新闻'
,
'建设新闻'
],
[
'1742445138585427970'
,
'289698'
,
'https://content.boston.gov/departments/press-office'
,
'波士顿市长新闻办公室'
,
'波士顿市长新闻办公室'
],
[
'1742445152762175490'
,
'289699'
,
'http://www.bostonplans.org/news-calendar/news-updates'
,
'波士顿发展局新闻'
,
'波士顿发展局新闻'
],
[
'1742445163453456386'
,
'289700'
,
'https://www.nbcboston.com/news/local/'
,
'NBC波士顿新闻'
,
'NBC波士顿新闻'
],
[
'1742445175453360130'
,
'289701'
,
'https://boston.curbed.com/boston-development'
,
'波士顿发展新闻'
,
'波士顿发展新闻'
],
[
'1742445204352114690'
,
'289702'
,
'https://bostoday.6amcity.com/civic'
,
'今日波士顿'
,
'今日波士顿'
],
[
'1742445262153818113'
,
'289835'
,
'https://www.kk.dk/nyheder'
,
'哥本哈根市官网'
,
'哥本哈根市官网'
],
[
'1742445272681521154'
,
'284935'
,
'https://media.mercedes-benz.com/'
,
'戴姆勒英文官网'
,
'戴姆勒英文官网'
],
[
'1742445298006728706'
,
'284938'
,
'https://www.novartis.com/news/news-archive?type=key-releases'
,
'诺华公司英文官网'
,
'诺华公司英文官网'
],
[
'1742445327832424449'
,
'285018'
,
'https://www.volkswagenag.com/en/media/news-archive.html'
,
'大众汽车英文官网'
,
'大众汽车英文官网'
],
[
'1742447177587523586'
,
'285103'
,
'https://www.mylondon.news/all-about/politics'
,
'mylondon-政治'
,
'mylondon'
],
[
'1742447228720283650'
,
'287155'
,
'https://www.varian.com/about-varian/newsroom/press-releases?n=1'
,
'美国瓦里安英文企业官网'
,
'美国瓦里安英文企业官网'
],
[
'1742447414062383106'
,
'289836'
,
'https://www.kbh.dk/nyheder'
,
'KBH新闻'
,
'KBH新闻'
],
[
'1742447621940477954'
,
'285114'
,
'https://www.mylondon.news/news/zone-1-news/'
,
'mylondon-伦敦市中心'
,
'mylondon'
],
[
'1742447732376502273'
,
'285117'
,
'https://www.mylondon.news/news/south-london-news/'
,
'mylondon-南伦敦'
,
'mylondon'
],
[
'1742447759983411201'
,
'285107'
,
'https://www.mylondon.news/news/news-opinion/'
,
'mylondon-新闻观点'
,
'mylondon'
],
[
'1742447892271759362'
,
'285108'
,
'https://www.mylondon.news/news/nostalgia/'
,
'mylondon-怀旧之情'
,
'mylondon'
],
[
'1742447906922463234'
,
'285111'
,
'https://www.business-live.co.uk/all-about/london'
,
'mylondon-伦敦商业'
,
'mylondon'
],
[
'1742447933875060738'
,
'285115'
,
'https://www.mylondon.news/news/west-london-news/'
,
'mylondon-西伦敦'
,
'mylondon'
],
[
'1742447965856628738'
,
'285255'
,
'https://global.abb/group/en/media/releases/abb-ability'
,
'ABB英文官网'
,
'阿西布朗勃法瑞公司 ABB英文官网'
],
[
'1742448002405793794'
,
'285259'
,
'https://www.nydailynews.com/opinion/'
,
'nydailynews-观点'
,
'nydailynews'
],
[
'1742448244954005506'
,
'285065'
,
'https://www.mylondon.news/all-about/traffic-and-travel'
,
'mylondon-交通出行'
,
'mylondon'
],
[
'1742448367792586754'
,
'284658'
,
'https://www.tesla.cn/blog/'
,
'特斯拉中文官网'
,
'特斯拉中文官网'
],
[
'1742448386805366786'
,
'284661'
,
'https://www.jdallianz.com/news-list'
,
'安联保险集团中文官网'
,
'安联保险集团中文官网'
],
[
'1742448488219443201'
,
'285099'
,
'https://www.mylondon.news/all-about/education'
,
'mylondon-教育'
,
'mylondon'
],
[
'1742448499481149442'
,
'285118'
,
'https://www.mylondon.news/news/north-london-news/'
,
'mylondon-北伦敦'
,
'mylondon'
],
[
'1742448522944086017'
,
'284677'
,
'http://w1.siemens.com.cn/news/news_articles/default.aspx/'
,
'西门子中文官网'
,
'西门子中文官网'
],
[
'1742448562278268930'
,
'289062'
,
'https://cn.kddi.com/zh_cn/company/news/'
,
'KDDI株式会社中文官网'
,
'KDDI株式会社中文官网'
],
[
'1742448571593818114'
,
'289063'
,
'https://www.citi.com.cn/html/cn/Press_room/Press_release.html'
,
'花旗集团中文官网'
,
'花旗集团中文官网'
],
[
'1742448588589137922'
,
'289074'
,
'https://www.oracle.com/cn/news/'
,
'甲骨文公司中文官网'
,
'甲骨文公司中文官网'
],
[
'1742448599594991617'
,
'289119'
,
'https://www.sony.com.cn/content/sonyportal/zh-cn/cms/newscenter.html'
,
'索尼集团中文官网'
,
'索尼集团中文官网'
],
[
'1742448610177220609'
,
'289120'
,
'https://www.pg.com.cn/'
,
'宝洁公司中文官网'
,
'宝洁公司中文官网'
],
[
'1742448623611576321'
,
'289199'
,
'https://planning.lacity.org/resources/latest-news'
,
'洛杉矶城市规划局'
,
'洛杉矶城市规划局'
],
[
'1742448633841483777'
,
'289200'
,
'https://planning.lacity.org/resources/publications'
,
'洛杉矶城市规划局月度报告'
,
'洛杉矶城市规划局月度报告'
],
[
'1742448644696342529'
,
'289201'
,
'https://www.cbsnews.com/losangeles/local-news/los-angeles/'
,
'CBS新闻'
,
'CBS新闻'
],
[
'1742448654003503105'
,
'289203'
,
'https://www.citywatchla.com/index.php/cw/los-angeles'
,
'洛杉矶城市观察'
,
'洛杉矶城市观察'
],
[
'1742448671749603329'
,
'289204'
,
'https://sf.gov/news/all'
,
'旧金山官网新闻'
,
'旧金山官网新闻'
],
[
'1742448685632749570'
,
'289205'
,
'https://sfplanning.org/news?page=0'
,
'旧金山城市规划局新闻'
,
'旧金山城市规划局新闻'
],
[
'1742448700237316097'
,
'289209'
,
'https://www.mos.ru/search?category=newsfeed&hostApplied=false&page=1&q=&sort=date_desc&types=news_en'
,
'莫斯科城市新闻'
,
'莫斯科城市新闻'
],
[
'1742448713428402178'
,
'289115'
,
'https://www.bmw.com.cn/zh/topics/experience/bmw-news/news-2023-5.html'
,
'宝马集团中文官网'
,
'宝马集团中文官网'
],
[
'1742448726195863553'
,
'289117'
,
'http://www.sbcvc.com/a/gongsixinwen/gongsixinwen/'
,
'软银集团中文官网'
,
'软银集团中文官网'
],
[
'1742448741270192130'
,
'289214'
,
'https://www.themoscowtimes.com/ru/search/moscow'
,
'莫斯科时报'
,
'莫斯科时报'
],
[
'1742448748496977922'
,
'289218'
,
'https://urbantoronto.ca/'
,
'多伦多城市新闻'
,
'多伦多城市新闻'
],
[
'1742448761277022210'
,
'289037'
,
'https://news.gm.com.cn/zh/home/newsroom.html'
,
'通用汽车公司中文官网'
,
'通用汽车公司中文官网'
],
[
'1742448778209427458'
,
'289042'
,
'https://www.pfizer.com.cn/news/pfizer_press_releases_cn.html'
,
'辉瑞制药有限公司中文官网'
,
'辉瑞制药有限公司中文官网'
],
[
'1742448813986840578'
,
'289056'
,
'https://www.softbank.jp/corp/news/'
,
'软银集团英文官网'
,
'软银集团英文官网'
],
[
'1742448845565755393'
,
'289068'
,
'https://www.dell.com/zh-cn/dt/corporate/newsroom.htm#/filter-on/Country:zh-cn'
,
'戴尔科技公司中文官网'
,
'戴尔科技公司中文官网'
],
[
'1742448855938269185'
,
'289069'
,
'https://www.intel.com/content/www/us/en/newsroom/home.html'
,
'英特尔公司英文官网'
,
'英特尔公司英文官网'
],
[
'1742448869007720450'
,
'289070'
,
'https://news.gm.com/newsroom.html'
,
'通用汽车公司英文官网'
,
'通用汽车公司英文官网'
],
[
'1742448941640482818'
,
'289284'
,
'https://www.mos.ru/en/news/maintheme/211287/'
,
'莫斯科城市新闻'
,
'莫斯科城市新闻'
],
[
'1742448950159114241'
,
'289285'
,
'https://www.mos.ru/en/news/maintheme/18287/'
,
'莫斯科城市新闻'
,
'莫斯科城市新闻'
],
[
'1742448960229638145'
,
'289286'
,
'https://www.mos.ru/en/news/maintheme/172287/'
,
'莫斯科城市新闻'
,
'莫斯科城市新闻'
],
[
'1742448970803478530'
,
'289287'
,
'https://www.mos.ru/en/news/maintheme/68287/'
,
'莫斯科城市新闻'
,
'莫斯科城市新闻'
],
[
'1742448978734907394'
,
'289288'
,
'https://www.hitachi.com.cn/about/press/index.html'
,
'日立公司中文官网'
,
'日立公司中文官网'
],
[
'1742448990613176322'
,
'289289'
,
'https://www.euronews.com/tag/moscow'
,
'欧洲新闻网'
,
'欧洲新闻网'
],
[
'1742449001652584450'
,
'289315'
,
'https://www.berlin.de/sen/uvk/presse/pressemitteilungen/2023/'
,
'柏林市新闻'
,
'柏林市新闻'
],
[
'1742449011681165314'
,
'289316'
,
'https://www.berlin.de/sen/uvk/presse/pressemitteilungen/2021/'
,
'柏林市新闻(2021年)'
,
'柏林市新闻(2021年)'
],
[
'1742449018966671362'
,
'289317'
,
'https://www.berlin.de/aktuelles/'
,
'柏林新闻'
,
'柏林新闻'
],
[
'1742449030857523201'
,
'289318'
,
'https://smart-city-berlin.de/en/news-list?tx_news_pi1
%5
B
%40
widget_0
%5
D
%5
BcurrentPage
%5
D=2&cHash=dc8f1670d4f3218a6d899cd6df645d5a'
,
'柏林智慧城市'
,
'柏林智慧城市'
],
[
'1742449042198921218'
,
'289319'
,
'https://www.berlin-partner.de/aktuelles'
,
'柏林伙伴'
,
'柏林伙伴'
],
[
'1742449067452825602'
,
'289236'
,
'https://www.philips.com.cn/a-w/about/news/home'
,
'荷兰飞利浦公司中文官网'
,
'荷兰飞利浦公司中文官网'
],
[
'1742449104253648898'
,
'289330'
,
'https://www.20minutos.es/minuteca/comunidad-de-madrid/'
,
'马德里市20分钟报'
,
'马德里市20分钟报'
],
[
'1742449117734141953'
,
'289331'
,
'https://www.madridesnoticia.es/secciones/municipios/municipios-madrid/'
,
'马德里新闻'
,
'马德里新闻'
],
[
'1742449137917132801'
,
'289033'
,
'https://www.dell.com/en-us/dt/corporate/newsroom/announcements.htm#country=en-us'
,
'戴尔科技公司英文官网'
,
'戴尔科技公司英文官网'
],
[
'1742449152165183490'
,
'289036'
,
'https://boeing.mediaroom.com/'
,
'波音公司英文官网'
,
'波音公司英文官网'
],
[
'1742449161015164929'
,
'289196'
,
'https://www.lamayor.org/media/press_releases'
,
'洛杉矶市长新闻'
,
'洛杉矶市长新闻'
],
[
'1742449182729076738'
,
'289379'
,
'https://www.morgenpost.de/berlin/'
,
'柏林晨报-柏林新闻'
,
'柏林晨报-柏林新闻'
],
[
'1742449204946305025'
,
'289460'
,
'https://www.bbc.com/news/england/london'
,
'BBC-伦敦'
,
'BBC-伦敦'
],
[
'1742449219089498113'
,
'289459'
,
'https://www.bbc.com/news/england'
,
'BBC-英格兰'
,
'BBC-英格兰'
],
[
'1742449231739518978'
,
'289462'
,
'https://www.bbc.com/news/world/europe'
,
'BBC-欧洲'
,
'BBC-欧洲'
],
[
'1742449245278732290'
,
'289463'
,
'https://www.theguardian.com/international'
,
'theguardian-国际'
,
'theguardian-国际'
],
[
'1742449257706455041'
,
'289465'
,
'https://www.theguardian.com/politics'
,
'theguardian-英国政治'
,
'theguardian-英国政治'
],
[
'1742449267659538433'
,
'289467'
,
'https://www.theguardian.com/world/europe-news'
,
'theguardian-欧洲'
,
'theguardian-欧洲'
],
[
'1742449277201580034'
,
'289468'
,
'https://www.telegraph.co.uk/news/'
,
'telegraph-英国新闻'
,
'telegraph-英国新闻'
],
[
'1742449292762447874'
,
'289470'
,
'https://metro.co.uk/news/'
,
'metro-新闻'
,
'metro-新闻'
],
[
'1742449301679538177'
,
'289471'
,
'https://metro.co.uk/tag/london/'
,
'metro-伦敦'
,
'metro-伦敦'
],
[
'1742449310575656962'
,
'289472'
,
'https://metro.co.uk/news/uk/'
,
'metro-英国'
,
'metro-英国'
],
[
'1742449389386629122'
,
'289043'
,
'https://newsroom.cisco.com/c/r/newsroom/en/us/all-news.html'
,
'思科公司英文官网'
,
'思科公司英文官网'
],
[
'1742449401050988545'
,
'289044'
,
'https://www.merckgroup.com.cn/cn-zh/company/press-news/news.html'
,
'默克中文官网'
,
'默克中文官网'
],
[
'1742449438606786562'
,
'289029'
,
'https://www.seoul.go.kr/realmnews/in/list.do'
,
'首尔官方网站'
,
'首尔官方网站'
],
[
'1742449446768902145'
,
'289280'
,
'https://www.metro.tokyo.lg.jp'
,
'市长活动'
,
'东京'
],
[
'1742449460861763585'
,
'289777'
,
'https://abc.com/collection/news'
,
'美国广播公司新闻'
,
'美国广播公司新闻'
],
[
'1742449471901171713'
,
'289775'
,
'https://www.sfgate.com/news/?IPID=SFGate-HP-Header'
,
'旧金山纪事报'
,
'旧金山纪事报'
],
[
'1742449487755640834'
,
'289040'
,
'https://www.blog.google/around-the-globe/'
,
'谷歌公司英文官网'
,
'谷歌公司英文官网'
],
[
'1742449500497936386'
,
'289555'
,
'https://www.amsterdam.nl/en/news/'
,
'Amsterdam-新闻'
,
'Amsterdam-新闻'
],
[
'1742449514569826306'
,
'289556'
,
'https://ground.news/'
,
'Gound-新闻'
,
'Gound-新闻'
],
[
'1742449523654688769'
,
'289557'
,
'https://www.gov.kz/'
,
'gov-新闻'
,
'gov-新闻'
],
[
'1742449530621427713'
,
'289558'
,
'https://www.gov.kz/memleket/entities/astana/'
,
'gov-阿斯塔纳'
,
'gov-阿斯塔纳'
],
[
'1742449700029366273'
,
'289576'
,
'https://www.philips.com/a-w/about/news/all-news'
,
'飞利浦-全球'
,
'飞利浦-全球'
],
[
'1742449710414462978'
,
'289577'
,
'https://www.philips.com.cn/a-w/about/news/home.html'
,
'飞利浦-中国'
,
'飞利浦-中国'
],
[
'1742449731784441857'
,
'289049'
,
'http://english.chosun.com/svc/list_in/search.html?query=Seoul&sort=0&catid=2'
,
'朝鲜日报'
,
'朝鲜日报'
],
[
'1742449742047903745'
,
'289048'
,
'https://mayor.seoul.go.kr/app/oh/seoul/newsList.do'
,
'首尔市长新闻'
,
'首尔市长新闻'
],
[
'1742449762537078785'
,
'289277'
,
'https://www.philips.com/a-w/about/news/home'
,
'荷兰飞利浦公司英文官网'
,
'荷兰飞利浦公司英文官网'
],
[
'1742449775791079425'
,
'289024'
,
'https://www.jpmorganchina.com.cn/zh/news'
,
'摩根大通公司中文官网'
,
'摩根大通公司'
],
[
'1742449789942661122'
,
'289025'
,
'https://news.microsoft.com/'
,
'微软公司英文官网'
,
'微软公司英文官网'
],
[
'1742449808561176577'
,
'289027'
,
'https://www.bmwgroup.com/en/news.html'
,
'宝马集团英文官网'
,
'宝马集团英文官网'
],
[
'1742449817079808002'
,
'289579'
,
'https://www.shell.com.cn/zh_cn/media/press-releases/2022-media-releases.html'
,
'壳牌石油-中国'
,
'壳牌石油-中国'
],
[
'1742449825803960322'
,
'289580'
,
'https://www.ing.com/Newsroom.htm'
,
'ING-全球'
,
'ING-全球'
],
[
'1742449833206906881'
,
'289335'
,
'https://www.gsk-china.com/zh-cn/media/press-releases/'
,
'葛兰素史克公司中文官网'
,
'葛兰素史克公司中文官网'
],
[
'1742449842887360514'
,
'289375'
,
'https://www.morgenpost.de/berlin-aktuell/startups/'
,
'柏林晨报-初创企业新闻'
,
'柏林晨报-初创企业新闻'
],
[
'1742449858590834689'
,
'289535'
,
'https://madrid.fundacionlaboral.org/actualidad/noticias/territorial/'
,
'建筑劳工基金会新闻'
,
'建筑劳工基金会新闻'
],
[
'1742449870854979586'
,
'289536'
,
'https://okdiario.com/madrid/'
,
'马德里自治区-好日报'
,
'马德里自治区-好日报'
],
[
'1742449879893704705'
,
'289537'
,
'https://sur-madrid.com/online/category/comunidad-de-madrid/'
,
'SurMadrid-马德里自治区'
,
'SurMadrid-马德里自治区'
],
[
'1742449888945012738'
,
'289538'
,
'https://www.elindependiente.com/espana/madrid/'
,
'SurMadrid-马德里'
,
'SurMadrid-马德里'
],
[
'1742449973590261761'
,
'289584'
,
'https://www.cbsnews.com/newyork/'
,
'CBS纽约'
,
'CBS纽约'
],
[
'1742450026417520642'
,
'289476'
,
'https://www.hel.fi/helsinki/en'
,
'赫尔辛基新闻'
,
'赫尔辛基新闻'
],
[
'1742450035896647681'
,
'289477'
,
'https://www.themayor.eu/en/finland/'
,
'themayor-芬兰'
,
'themayor-芬兰'
],
[
'1742450048064323586'
,
'289478'
,
'https://www.themayor.eu/en/finland/helsinki'
,
'themayor-赫尔辛基'
,
'themayor-赫尔辛基'
],
[
'1742450060336857090'
,
'289479'
,
'https://www.themayor.eu/en/denmark/'
,
'themayor-丹麦'
,
'themayor-丹麦'
],
[
'1742450069358804993'
,
'289480'
,
'https://www.themayor.eu/en/denmark/copenhagen'
,
'themayor-哥本哈根'
,
'themayor-哥本哈根'
],
[
'1742450077890019329'
,
'289481'
,
'https://www.brusselstimes.com/'
,
'Brussels-新闻'
,
'Brussels-新闻'
],
[
'1742450087155236866'
,
'289482'
,
'https://www.brusselstimes.com/brussels'
,
'Brussels-布鲁塞尔'
,
'Brussels-布鲁塞尔'
],
[
'1742450095661285377'
,
'289483'
,
'https://www.vrt.be/vrtnws/en/'
,
'VRTNWS-新闻'
,
'VRTNWS-新闻'
],
[
'1742450103974395905'
,
'289484'
,
'https://www.vrt.be/vrtnws/en/categories/brussels/'
,
'VRTNWS-布鲁塞尔'
,
'VRTNWS-布鲁塞尔'
],
[
'1742450187705286657'
,
'289206'
,
'https://abc7news.com/place/san-francisco-city-hall/'
,
'旧金山市政厅新闻'
,
'旧金山市政厅新闻'
],
[
'1742450198165880834'
,
'289207'
,
'https://www.mos.ru/search?category=newsfeed&page=1&q=&sort=date_desc&types=event_en'
,
'莫斯科市长活动'
,
'莫斯科市长活动'
],
[
'1742450205895983105'
,
'289210'
,
'https://abc7news.com/san-francisco/'
,
'旧金山新闻'
,
'旧金山新闻'
],
[
'1742450214750158849'
,
'289211'
,
'https://investmoscow.ru/press-center/news-and-events'
,
'莫斯科投资'
,
'莫斯科投资'
],
[
'1742450222492844034'
,
'289212'
,
'https://sfist.com/politics/'
,
'旧金山政治新闻'
,
'旧金山政治新闻'
],
[
'1742450229153398786'
,
'289216'
,
'https://sfist.com/business-tech/'
,
'旧金山商业科技新闻'
,
'旧金山商业科技新闻'
],
[
'1742450242285764609'
,
'289217'
,
'https://www.toronto.ca/news/'
,
'多伦多官网新闻'
,
'多伦多官网新闻'
],
[
'1742450257993433090'
,
'289485'
,
'https://www.morganstanleychina.com/about-us/newsroom#813106141-tab'
,
'摩根士丹利中文官网'
,
'摩根士丹利中文官网'
],
[
'1742450287244509186'
,
'289327'
,
'https://www.madrid.es/portales/munimadrid/es/Inicio/El-Ayuntamiento/Todas-las-noticias/?vgnextfmt=default&vgnextchannel=e40362215c483510VgnVCM2000001f4a900aRCRD'
,
'马德里市议会新闻'
,
'马德里市议会新闻'
],
[
'1742450296765579265'
,
'289328'
,
'https://diario.madrid.es/blog/notas-de-prensa/'
,
'马德里报'
,
'马德里报'
],
[
'1742450305577811969'
,
'289329'
,
'https://www.comunidad.madrid/hemeroteca'
,
'马德里自治区新闻'
,
'马德里自治区新闻'
],
[
'1742450577096081410'
,
'289235'
,
'https://www.lg.co.kr/media/release'
,
'LG电子株式会社英文官网'
,
'LG电子株式会社英文官网'
]]
datacol
=
pymongo
.
MongoClient
(
'mongodb://114.115.221.202:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)[
'中科软'
][
'数据源_0504'
]
def
get_sid_data
(
sid
,
startTime
,
endTime
):
query
=
{
"query"
:
{
"bool"
:
{
"must"
:
[
{
"match"
:
{
"sid"
:
f
"{sid}"
}
},
{
"range"
:
{
"createDate"
:
{
"gte"
:
f
"{startTime}"
,
"lte"
:
f
"{endTime}"
}
}
}
]
}
}
}
results
=
helpers
.
scan
(
client
=
es_client
,
query
=
query
,
scroll
=
"10m"
,
index
=
"basedata"
,
size
=
1000
,
)
all_config_info
=
[]
for
item
in
results
:
row
=
item
[
"_source"
]
all_config_info
.
append
(
row
)
return
all_config_info
def
getKeywordsTags
(
text
):
import
requests
# url = "http://192.168.1.149:7005/get_phrase/"
url
=
"http://39.105.62.235:7006/get_phrase/"
# url = "http://39.105.62.235:7009/get_phrase/"
payload
=
{
'topK'
:
'20'
,
'text'
:
text
,
'name'
:
'phrase'
}
response
=
requests
.
request
(
"POST"
,
url
,
data
=
payload
,
timeout
=
200
)
log
.
info
(
response
.
text
)
r
=
response
.
json
()
# Show response
log
.
info
(
f
"result:{type(r)}==={r}"
)
try
:
tag
=
r
[
'resultData'
][
'data'
]
except
Exception
as
e
:
log
.
error
(
"===特征词标签错误"
,
e
)
tag
=
''
return
tag
def
doJob
():
start_time
=
0
while
True
:
end_time
=
datetime
.
datetime
.
now
()
end_timeStr
=
end_time
.
strftime
(
'
%
Y-
%
m-
%
dT
%
H:
%
M:
%
S'
)
#end_timeStr = '2024-01-07T00:00:00'
if
start_time
==
0
:
# start_time = datetime.datetime(end_time.year, end_time.month, end_time.day)
# start_timeStr = start_time.strftime('%Y-%m-%dT00:00:00')
start_timeStr
=
'2024-01-07T00:00:00'
else
:
start_timeStr
=
start_time
.
strftime
(
'
%
Y-
%
m-
%
dT
%
H:
%
M:
%
S'
)
log
.
info
(
f
'同步时间==={start_timeStr}==={end_timeStr}'
)
for
FLG
in
FLGS
:
clbSid
=
FLG
[
0
]
SID
=
FLG
[
1
]
columnUrl
=
FLG
[
2
]
columns
=
FLG
[
3
]
name
=
FLG
[
4
]
datas
=
get_sid_data
(
clbSid
,
start_timeStr
,
end_timeStr
)
log
.
info
(
f
'{columns}===需入库{len(datas)}条数据'
)
for
data
in
datas
:
SOURCEADDRESS
=
data
[
'sourceAddress'
]
flg
=
datacol
.
find_one
({
'url'
:
SOURCEADDRESS
})
if
flg
:
log
.
info
(
f
'{SOURCEADDRESS}===已入库'
)
continue
clbId
=
data
[
'id'
]
TITLE
=
data
[
'title'
]
#LANG = data['lang']
CONTENT
=
data
[
'contentWithTag'
]
content_no_tag
=
data
[
'content'
]
LANG
=
detect
(
content_no_tag
)
CREATE_DATE
=
data
[
'createDate'
]
.
replace
(
'T'
,
' '
)
try
:
PUBLISH_DATE
=
data
[
'publishDate'
]
.
replace
(
'T'
,
' '
)
except
:
log
.
error
(
f
'{TITLE}===没有发布时时间'
)
continue
if
str
(
LANG
)
.
startswith
(
'zh'
)
or
str
(
LANG
)
==
'no'
:
titleForeign
=
''
title
=
TITLE
# 中文标题
richTextForeign
=
''
richText
=
CONTENT
# 中文富文本
contentForeign
=
''
content
=
content_no_tag
# 中文内容
tag1
=
''
try
:
tag2
=
';'
.
join
(
getKeywordsTags
(
content
))
except
:
continue
parseUrl
=
urlparse
(
SOURCEADDRESS
)
source
=
'{}//{}'
.
format
(
parseUrl
[
0
],
parseUrl
[
1
])
postCode
=
'1'
else
:
# 英文数据 :翻译带标签文本,还要标注翻译来源:
titleForeign
=
TITLE
# 原文标题
title
=
'待审核,待更新'
# 中文标题获取,flag : 0:百度翻译 1:微软翻译
richTextForeign
=
CONTENT
# 原文富文本
richText
=
'待审核,待更新'
contentForeign
=
content_no_tag
# 原文内容
content
=
'待审核,待更新'
# 中文内容
tag1
=
'待审核,待更新'
tag2
=
'待审核,待更新'
postCode
=
'2'
parseUrl
=
urlparse
(
SOURCEADDRESS
)
source
=
'{}://{}'
.
format
(
parseUrl
[
0
],
parseUrl
[
1
])
item
=
{}
item
[
'clbId'
]
=
str
(
clbId
)
item
[
'sid'
]
=
str
(
SID
)
item
[
'LANG'
]
=
LANG
item
[
'TITLE'
]
=
data
[
'title'
]
item
[
'url'
]
=
SOURCEADDRESS
item
[
'CONTENT'
]
=
CONTENT
item
[
'columns'
]
=
columns
item
[
'columnUrl'
]
=
columnUrl
item
[
'titleForeign'
]
=
titleForeign
item
[
'title'
]
=
title
item
[
'richTextForeign'
]
=
richTextForeign
item
[
'richText'
]
=
richText
item
[
'contentForeign'
]
=
contentForeign
item
[
'content'
]
=
content
item
[
'tag1'
]
=
tag1
item
[
'tag2'
]
=
tag2
item
[
'name'
]
=
name
item
[
'source'
]
=
source
item
[
'newsTime'
]
=
PUBLISH_DATE
item
[
'CREATE_DATE'
]
=
CREATE_DATE
item
[
'INSERT_DATE'
]
=
str
(
datetime
.
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
))
# 写入时间
item
[
'postCode'
]
=
postCode
# 写入时间
item
[
'postTime'
]
=
str
(
datetime
.
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
))
# 写入时间
try
:
#datacol.insert_one(item)
log
.
info
(
f
'{columns}==={TITLE}===入库成功'
)
except
Exception
as
e
:
log
.
error
(
f
'{columns}==={TITLE}===入库失败==={e}'
)
start_time
=
end_time
time_now
=
time
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
,
time
.
localtime
())
log
.
info
(
f
"当前时间:{time_now}"
)
log
.
info
(
'等待一小时----'
)
time
.
sleep
(
3600
)
if
__name__
==
'__main__'
:
doJob
()
baseCore
.
close
()
zkr/推送.py
0 → 100644
浏览文件 @
fd395ea2
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
# @Author: MENG
# @Time : 2022-2-25
import
time
import
pymongo
import
datetime
import
requests
import
json
import
re
import
base64
from
kafka
import
KafkaProducer
from
requests.packages
import
urllib3
from
gridfs
import
GridFS
import
os
from
base
import
BaseCore
baseCore
=
BaseCore
.
BaseCore
()
log
=
baseCore
.
getLogger
()
urllib3
.
disable_warnings
()
db_storage
=
pymongo
.
MongoClient
(
host
=
'114.115.221.202'
,
port
=
27017
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
中科软
[
'数据源_0106'
]
client
=
pymongo
.
MongoClient
(
host
=
'114.115.221.202'
,
port
=
27017
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
db
=
client
[
'ZZSN'
]
# 获取GridFS对象
fs
=
GridFS
(
db
)
# 推数据
def
post_data
(
data
):
url
=
"https://103.83.45.34/overseasdata/news_info"
payload
=
json
.
dumps
(
data
)
headers
=
{
'Content-Type'
:
'application/json'
}
response
=
requests
.
request
(
"POST"
,
url
,
headers
=
headers
,
data
=
payload
,
verify
=
False
)
#log.info(response.status_code)
r
=
response
.
json
()
log
.
info
(
f
'''{datetime.datetime.now().strftime("
%
Y-
%
m-
%
d
%
H:
%
M:
%
S")}===推送数据==={r}'''
)
try
:
return
r
[
'code'
],
r
[
'msg'
]
except
:
return
r
[
'status'
]
# 推图片
def
post_img
(
data
):
url
=
"https://103.83.45.34/overseasdata/news_info/save_news_pic"
payload
=
json
.
dumps
(
data
)
headers
=
{
'Content-Type'
:
'application/json'
}
response
=
requests
.
request
(
"POST"
,
url
,
headers
=
headers
,
data
=
payload
,
verify
=
False
)
r
=
response
.
json
()
log
.
info
(
f
'''{datetime.datetime.now().strftime("
%
Y-
%
m-
%
d
%
H:
%
M:
%
S")}==推送图片==={r}'''
)
try
:
return
r
[
'code'
]
except
:
return
r
[
'status'
]
# 推文件数据
def
post_filedata
(
data
):
url
=
"https://103.83.45.34/overseasdata/news_info/upload_pdf"
# 生产
payload
=
{}
filename
=
data
[
'pdfFilename'
]
ids
=
data
[
'ids'
]
files
=
get_pdf_from_mongodb
(
filename
,
ids
)
headers
=
{}
url
=
url
+
'?ids='
+
ids
response
=
requests
.
request
(
"POST"
,
url
,
headers
=
headers
,
data
=
payload
,
files
=
files
)
text
=
response
.
text
# 检查文件是否存在
file_path
=
ids
+
'.pdf'
if
os
.
path
.
exists
(
file_path
):
# 删除文件
os
.
remove
(
file_path
)
log
.
info
(
f
"文件 '{file_path}' 已成功删除"
)
else
:
log
.
eroor
(
f
"文件 '{file_path}' 不存在"
)
return
text
# 从mongodb中读取文件
def
get_pdf_from_mongodb
(
filename
,
output_path
):
output_path
=
output_path
+
'.pdf'
# 从GridFS获取PDF文件
with
open
(
output_path
,
'wb'
)
as
file
:
file
.
write
(
fs
.
get_version
(
filename
=
filename
)
.
read
())
files
=
[
(
'file'
,
(
output_path
,
open
(
output_path
,
'rb'
),
'application/pdf'
))
]
return
files
# 判断加推送
def
pan_dun_and_tui_song
():
db_dict_list
=
[]
now
=
datetime
.
datetime
.
now
()
yes1
=
now
+
datetime
.
timedelta
(
days
=-
2
)
date_yes
=
yes1
.
strftime
(
'
%
Y-
%
m-
%
d'
)
for
db_dict
in
db_storage
.
find
({
'postCode'
:
{
'$in'
:[
'1'
,
'10'
]},
'newsTime'
:
{
'$gte'
:
date_yes
}}):
db_dict_list
.
append
(
db_dict
)
log
.
info
(
f
"{date_yes}===需要推送{len(db_dict_list)}条数据"
)
for
db_dict
in
db_dict_list
:
try
:
columns
=
db_dict
[
'columns'
]
.
strip
()
except
:
columns
=
db_dict
[
'columns'
]
try
:
name
=
db_dict
[
'name'
]
.
strip
()
except
:
name
=
db_dict
[
'name'
]
try
:
titleForeign
=
db_dict
[
'titleForeign'
]
.
strip
()
except
:
titleForeign
=
db_dict
[
'titleForeign'
]
try
:
title
=
db_dict
[
'title'
]
.
strip
()
except
:
title
=
db_dict
[
'title'
]
try
:
richTextForeign
=
db_dict
[
'richTextForeign'
]
.
strip
()
except
:
richTextForeign
=
db_dict
[
'richTextForeign'
]
try
:
contentForeign
=
db_dict
[
'contentForeign'
]
.
strip
()
except
:
contentForeign
=
db_dict
[
'contentForeign'
]
try
:
pdfurl
=
db_dict
[
'pdfurl'
]
.
strip
()
except
:
pdfurl
=
''
newsTime
=
db_dict
[
'newsTime'
]
.
strip
()
CREATE_DATE
=
db_dict
[
'CREATE_DATE'
]
.
strip
()
try
:
tag1
=
db_dict
[
'tag1'
]
.
strip
()
except
:
tag1
=
db_dict
[
'tag1'
]
if
len
(
newsTime
)
==
10
:
newsTime
+=
' 00:00:00'
if
db_dict
[
'postCode'
]
==
'1'
:
try
:
content
=
db_dict
[
'content'
]
.
strip
()
except
:
content
=
db_dict
[
'content'
]
try
:
richText
=
db_dict
[
'richText'
]
.
strip
()
except
:
richText
=
db_dict
[
'richText'
]
try
:
tag2
=
db_dict
[
'tag2'
]
.
strip
()
except
:
tag2
=
db_dict
[
'tag2'
]
elif
db_dict
[
'postCode'
]
==
'10'
:
content
=
db_dict
[
'content'
]
richText
=
db_dict
[
'richText'
]
tag2
=
db_dict
[
'tag2'
]
if
title
==
''
or
content
==
''
or
richText
==
''
:
db_storage
.
update_one
({
'_id'
:
db_dict
[
'_id'
]},
{
'$set'
:
{
'postCode'
:
'403'
}})
continue
if
'? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?'
in
content
:
db_storage
.
update_one
({
'_id'
:
db_dict
[
'_id'
]},
{
'$set'
:
{
'postCode'
:
'403'
}})
continue
if
'???????????????????????'
in
content
:
db_storage
.
update_one
({
'_id'
:
db_dict
[
'_id'
]},
{
'$set'
:
{
'postCode'
:
'403'
}})
continue
if
titleForeign
==
title
:
db_storage
.
update_one
({
'_id'
:
db_dict
[
'_id'
]},
{
'$set'
:
{
'postCode'
:
'403'
}})
continue
if
columns
!=
'戴姆勒中文官网'
and
name
!=
'巴黎市长活动'
and
name
!=
'mylondon'
and
str
(
newsTime
)[:
-
2
]
==
str
(
CREATE_DATE
)[:
-
2
]:
db_storage
.
update_one
({
'_id'
:
db_dict
[
'_id'
]},
{
'$set'
:
{
'postCode'
:
'403'
}})
continue
if
content
==
'译文来源:微软自动翻译'
:
db_storage
.
update_one
({
'_id'
:
db_dict
[
'_id'
]},
{
'$set'
:
{
'postCode'
:
'2'
}})
continue
if
tag2
==
''
:
db_storage
.
update_one
({
'_id'
:
db_dict
[
'_id'
]},
{
'$set'
:
{
'postCode'
:
'2'
}})
continue
if
richTextForeign
==
richText
or
contentForeign
==
content
:
db_storage
.
update_one
({
'_id'
:
db_dict
[
'_id'
]},
{
'$set'
:
{
'postCode'
:
'2'
}})
continue
if
title
==
'待审核,待更新'
or
richText
==
'待审核,待更新'
or
content
==
'待审核,待更新'
or
tag1
==
'待审核,待更新'
or
tag2
==
'待审核,待更新'
:
db_storage
.
update_one
({
'_id'
:
db_dict
[
'_id'
]},
{
'$set'
:
{
'postCode'
:
'2'
}})
continue
name
=
db_dict
[
'name'
]
if
name
==
'人民网'
:
columns
=
'人民网'
sid
=
db_dict
[
'sid'
]
.
strip
()
if
str
(
sid
)
==
(
'284671'
):
try
:
img_results
=
re
.
findall
(
'<img(.*?)>'
,
richText
)
except
:
continue
for
img_result
in
img_results
:
img_url
=
img_result
.
split
(
'src="'
)[
-
1
]
.
split
(
'"'
)[
0
]
img_tem
=
str
(
img_url
)
if
img_tem
.
find
(
'volkswagengroupchina'
)
>
-
1
:
tems
=
img_tem
.
split
(
'
\\
'
)
img_tem
=
'https://www.volkswagengroupchina.com.cn/MediaFile//Sync/'
+
tems
[
-
2
]
+
'/'
+
tems
[
-
1
]
log
.
info
(
f
'替换内容链接==={img_tem}'
)
richText
=
str
(
richText
)
.
replace
(
img_url
,
img_tem
)
db_dict
[
'richText'
]
=
richText
try
:
pst_data
=
{
"ids"
:
str
(
db_dict
[
"_id"
]),
"columns"
:
columns
,
"titleForeign"
:
clean
(
db_dict
[
'titleForeign'
]),
"title"
:
clean
(
db_dict
[
'title'
]),
"contentForeign"
:
clean
(
db_dict
[
'contentForeign'
]),
"content"
:
clean
(
db_dict
[
'content'
]),
"newsTime"
:
db_dict
[
'newsTime'
],
"richTextForeign"
:
clean
(
db_dict
[
'richTextForeign'
]),
"richText"
:
clean
(
db_dict
[
'richText'
]
.
replace
(
"
\\
xF0
\\
x9F
\\
xA4
\\
xAD"
,
''
)),
"tag1"
:
db_dict
[
'tag1'
],
"tag2"
:
db_dict
[
'tag2'
],
"columnUrl"
:
db_dict
[
'columnUrl'
],
"url"
:
db_dict
[
'url'
],
"name"
:
db_dict
[
'name'
],
"source"
:
db_dict
[
'source'
]
}
except
Exception
as
e
:
log
.
error
(
e
)
continue
log
.
info
(
title
)
try
:
postCode
,
msg
=
post_data
(
pst_data
)
except
Exception
as
e
:
log
.
error
(
e
)
log
.
error
(
'推送接口出错!'
)
time
.
sleep
(
60
)
continue
if
str
(
postCode
)
==
'0'
:
if
'该新闻已存在,不需要二次更新'
in
msg
:
continue
# 根据pdf链接是否为空来推送pdf文件
if
pdfurl
.
strip
():
# 上传pdf
pdf_data
=
{
"ids"
:
str
(
db_dict
[
"_id"
]),
"columns"
:
columns
,
"title"
:
clean
(
db_dict
[
'title'
]),
"newsTime"
:
db_dict
[
'newsTime'
],
"url"
:
db_dict
[
'url'
],
"pdfurl"
:
db_dict
[
'pdfurl'
],
"pdfFilename"
:
db_dict
[
'pdfFilename'
]
}
try
:
postCode
=
post_filedata
(
pdf_data
)
except
Exception
as
e
:
pass
log
.
info
(
f
"推送==={db_dict['_id']}==={db_dict['columns']}==={newsTime}==={postCode}"
)
now_time
=
datetime
.
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
db_storage
.
update_one
({
'_id'
:
db_dict
[
'_id'
]},
{
'$set'
:
{
'postCode'
:
str
(
postCode
),
'postTime'
:
now_time
}})
content
=
db_dict
[
'CONTENT'
]
try
:
img_results
=
re
.
findall
(
'<img(.*?)>'
,
content
)
except
:
continue
for
img_result
in
img_results
:
img_url
=
img_result
.
split
(
'src="'
)[
-
1
]
.
split
(
'"'
)[
0
]
img_tem
=
str
(
img_url
)
if
img_tem
.
find
(
'volkswagengroupchina'
)
>
-
1
:
tems
=
img_tem
.
split
(
'
\\
'
)
img_url
=
'https://www.volkswagengroupchina.com.cn/MediaFile//Sync/'
+
tems
[
-
2
]
+
'/'
+
tems
[
-
1
]
log
.
info
(
f
'转换后的链接==={img_url}'
)
try
:
img_headers
=
{
'user-agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
,
}
img_resp
=
requests
.
get
(
img_url
,
headers
=
img_headers
,
timeout
=
10
)
.
content
if
len
(
img_resp
)
<
1
:
continue
except
:
continue
img_content
=
str
(
base64
.
b64encode
(
img_resp
),
encoding
=
'utf-8'
)
img_dict
=
{
'isd'
:
str
(
db_dict
[
"_id"
]),
'img_href'
:
img_url
,
'img_content'
:
img_content
}
try
:
post_img
(
img_dict
)
except
:
log
.
error
(
'图片推送error'
)
continue
elif
str
(
postCode
)
==
'101'
:
log
.
info
(
'中科软处理数据中!'
)
time
.
sleep
(
60
)
else
:
log
.
error
(
f
"推送失败==={db_dict['_id']}==={db_dict['columns']}==={newsTime}==={postCode}"
)
def
clean
(
desstr
,
restr
=
''
):
# 匹配所有的表情符号
# 过滤表情
try
:
res
=
re
.
compile
(
u'[
\U00010000
-
\U0010ffff
]'
)
except
re
.
error
:
res
=
re
.
compile
(
u'[
\uD800
-
\uDBFF
][
\uDC00
-
\uDFFF
]'
)
return
res
.
sub
(
restr
,
desstr
)
if
__name__
==
'__main__'
:
while
True
:
try
:
pan_dun_and_tui_song
()
log
.
info
(
'等待十分钟'
)
time
.
sleep
(
600
)
except
Exception
as
e
:
log
.
error
(
e
)
continue
zkr/翻译.py
0 → 100644
浏览文件 @
fd395ea2
# D:\Program Files\Python36
# D:\Program Files\Python36
# -*- coding: utf-8 -*-
# @Time : 2022/2/19 14:20
from
pyquery
import
PyQuery
as
pq
import
xlrd
from
bson.objectid
import
ObjectId
import
json
import
time
import
requests
import
datetime
import
pymongo
import
pymysql
import
warnings
import
random
from
hashlib
import
md5
import
redis
from
baidufanyi
import
Translate
from
base
import
BaseCore
baseCore
=
BaseCore
.
BaseCore
()
log
=
baseCore
.
getLogger
()
Filtrations
=
[
'纽约州'
,
'折扣推荐'
,
'股指上涨'
,
'涨跌不一'
,
'二手'
,
'免费发帖'
,
'超市特价'
,
'爱尔兰移民'
,
'藏独'
,
'疆独'
,
'台独'
,
'英语广播'
,
'高通胀'
,
'苹果官网'
,
'人才招聘'
,
'工作机会'
,
'GAZOO'
,
'尸体'
,
'美通社日历'
,
'苹果日报'
,
'HPV'
,
'皇家马德里'
,
'安阳'
,
'柏林区'
,
'皇家马德里'
,
'傅首尔'
,
'???'
,
'? ? ?'
,
'火灾'
,
'偷盗'
,
'盗抢'
,
'死亡'
,
'性侵'
,
'毒品'
,
'犯罪'
,
'报名'
,
'美剧'
,
'男子'
,
'女子'
,
'嫌犯'
,
'摘要'
,
'要闻'
,
'LGBT'
,
'跨性别'
,
'火灾'
,
'马拉松'
,
'球员'
,
'偷盗'
,
'着火'
,
'梅西'
,
'盗抢'
,
'餐馆'
,
'新冠疫情'
,
'银行'
,
'篮球'
,
'疫苗'
,
'死亡'
,
'三人篮球'
,
'新冠'
,
'三人女篮'
,
'谋杀'
,
'欧罗巴联赛'
,
'肇事逃逸'
,
'性侵'
,
'新冠病例'
,
'赛车公司'
,
'人权捍卫'
,
'内马尔'
,
'吸毒'
,
'失踪者'
,
'超级明星'
,
'毒品'
,
'遗体'
,
'球星'
,
'犯罪'
,
'致命袭击'
,
'遇袭'
,
'报名'
,
'巨额报酬'
,
'群殴'
,
'美剧'
,
'链接'
,
'持刀袭击'
,
'男子'
,
'大卖场'
,
'纵火'
,
'白人'
,
'开枪打死'
,
'泽连斯基'
,
'女子'
,
'新冠变异株'
,
'特朗普'
,
'嫌犯'
,
'大火'
,
'利率'
,
'嫌疑人'
,
'起火'
,
'脸书'
,
'世界杯'
,
'枪击'
,
'马德里大师赛'
,
'四强'
,
'抄袭'
,
'中国网球'
,
'八强'
,
'虐待'
,
'无人机袭击'
,
'音乐厅'
,
'患者'
,
'加冕典礼'
,
'楼市'
,
'性丑闻'
,
'加冕仪式'
,
'股票'
,
'监狱'
,
'枪杀'
,
'股市'
,
'牢房'
,
'枪手'
,
'警察'
,
'百元店'
,
'票房'
,
'警方'
,
'橄榄球联盟'
,
'娱乐圈'
,
'WSL'
,
'超级碗'
,
'唱片'
,
'超级联赛'
,
'美妆'
,
'枪杀'
,
'化妆'
,
'暴击'
,
'韩妆'
,
'验尸官'
,
'拐卖'
,
'大麻'
,
'买卖人口'
,
'儿童、孩子们'
,
'足球裁判'
,
'持刀'
,
'死于'
,
'毒贩'
,
'车祸'
,
'入狱'
,
'华裔'
,
'抢劫'
,
'种族歧视'
,
'法庭'
,
'辱骂'
,
'谋杀'
,
'婚礼'
,
'价格'
,
'车辆着火'
,
'板球运动员'
,
'招聘'
,
'重返赛场'
,
'明星'
,
'赛场'
,
'醉汉'
,
'乌克兰战争'
,
'持枪'
,
'纳粹'
,
'皇家空军'
,
'纳粹分子'
,
'强迫'
,
'被枪杀'
,
'残疾'
,
'命案'
,
'读博'
,
'洋基队'
,
'航班'
,
'意甲冠军'
,
'高尔夫球'
,
'国际米兰'
,
'商店'
,
'LGBT'
,
'陪审团'
,
'虐待'
,
'运动员'
,
'妻子'
,
'新型冠状病毒'
,
'新冠'
,
'年薪'
,
'房市'
,
'房地产'
,
'地产'
,
'星级主厨'
,
'骗局'
,
'老年人'
,
'餐厅'
,
'球队'
,
'诈骗'
,
'维权'
,
'耐力锦标赛'
,
'拉力赛'
,
'WRC'
,
'WTRC'
,
'耐力赛'
,
'TCR'
,
'河南'
,
'排位赛'
,
'色情'
,
'阴道'
,
'凯尔特人'
,
'天安门'
]
Sifts
=
[[
'lejournaldugrandparis-成功的故事'
,
'巴黎'
],
[
'lejournaldugrandparis-大巴黎'
,
'巴黎'
],
[
'lejournaldugrandparis-大巴黎报革新'
,
'巴黎'
],
[
'lejournaldugrandparis-地点'
,
'巴黎'
],
[
'lejournaldugrandparis-公共市场'
,
'巴黎'
],
[
'lejournaldugrandparis-规划'
,
'巴黎'
],
[
'lejournaldugrandparis-国际的'
,
'巴黎'
],
[
'lejournaldugrandparis-机构'
,
'巴黎'
],
[
'lejournaldugrandparis-基础设施'
,
'巴黎'
],
[
'lejournaldugrandparis-社区'
,
'巴黎'
],
[
'lejournaldugrandparis-生长'
,
'巴黎'
],
[
'lejournaldugrandparis-托儿所'
,
'巴黎'
],
[
'lejournaldugrandparis-吸引力'
,
'巴黎'
],
[
'lemonde-世界报巴黎'
,
'巴黎'
],
[
'mylondon-北伦敦'
,
'伦敦'
],
[
'mylondon-东伦敦'
,
'伦敦'
],
[
'mylondon-怀旧之情'
,
'伦敦'
],
[
'mylondon-驾驶'
,
'伦敦'
],
[
'mylondon-交通出行'
,
'伦敦'
],
[
'mylondon-教育'
,
'伦敦'
],
[
'mylondon-伦敦商业'
,
'伦敦'
],
[
'mylondon-伦敦市中心'
,
'伦敦'
],
[
'mylondon-南伦敦'
,
'伦敦'
],
[
'mylondon-西伦敦'
,
'伦敦'
],
[
'mylondon-新闻观点'
,
'伦敦'
],
[
'mylondon-政治'
,
'伦敦'
],
[
'nydailynews-2021 年纽约市选举'
,
'纽约'
],
[
'nydailynews-布朗克斯'
,
'纽约'
],
[
'nydailynews-布鲁克林'
,
'纽约'
],
[
'nydailynews-观点'
,
'纽约'
],
[
'nydailynews-皇后区'
,
'纽约'
],
[
'nydailynews-教育'
,
'纽约'
],
[
'nydailynews-曼哈顿'
,
'纽约'
],
[
'nydailynews-纽约'
,
'纽约'
],
[
'nydailynews-纽约每日新闻新冠病毒'
,
'纽约'
],
[
'nydailynews-纽约政治'
,
'纽约'
],
[
'OTTAWAZINE'
,
'渥太华'
],
[
'ouest-france-法兰西西部报法兰西岛'
,
'巴黎'
],
[
'standard-华为'
,
'伦敦'
],
[
'standard-健康'
,
'伦敦'
],
[
'standard-教育'
,
'伦敦'
],
[
'standard-可持续标准'
,
'伦敦'
],
[
'standard-可持续发展'
,
'伦敦'
],
[
'standard-伦敦'
,
'伦敦'
],
[
'standard-伦敦市长'
,
'伦敦'
],
[
'standard-商业'
,
'伦敦'
],
[
'standard-世界疫苗'
,
'伦敦'
],
[
'standard-新冠病毒'
,
'伦敦'
],
[
'standard-运输'
,
'伦敦'
],
[
'straitstimes-工作'
,
'新加坡'
],
[
'straitstimes-公司与市场'
,
'新加坡'
],
[
'straitstimes-环境'
,
'新加坡'
],
[
'straitstimes-健康'
,
'新加坡'
],
[
'straitstimes-经济'
,
'新加坡'
],
[
'straitstimes-科技新闻'
,
'新加坡'
],
[
'straitstimes-社区'
,
'新加坡'
],
[
'straitstimes-消费者'
,
'新加坡'
],
[
'straitstimes-育儿与教育'
,
'新加坡'
],
[
'straitstimes-运输'
,
'新加坡'
],
[
'straitstimes-政治'
,
'新加坡'
],
[
'straitstimes-住房'
,
'新加坡'
],
[
'tokyo-东京'
,
'东京'
],
[
'tokyo-东京新闻新冠'
,
'东京'
],
[
'tokyo-教育'
,
'东京'
],
[
'tokyo-经济'
,
'东京'
],
[
'tokyo-社会'
,
'东京'
],
[
'tokyo-社论'
,
'东京'
],
[
'tokyo-首都圈新闻'
,
'东京'
],
[
'tokyo-政治'
,
'东京'
],
[
'xxxx-sssss'
,
'北京'
],
[
'爱岛文化'
,
'都柏林'
],
[
'爱尔兰吧'
,
'都柏林'
],
[
'柏林伙伴'
,
'柏林'
],
[
'朝日东京'
,
'东京'
],
[
'朝日世論調査'
,
'东京'
],
[
'朝日小池都政'
,
'东京'
],
[
'城市知道温哥华'
,
'温哥华'
],
[
'大华府华人资讯网'
,
'温哥华'
],
[
'德国华人街'
,
'柏林'
],
[
'德国热线'
,
'柏林'
],
[
'德中网'
,
'柏林'
],
[
'东京在线'
,
'东京'
],
[
'俄罗斯龙报'
,
'莫斯科'
],
[
'法国巴黎大区'
,
'巴黎'
],
[
'法国中文网'
,
'巴黎'
],
[
'华人志'
,
'都柏林'
],
[
'加都人'
,
'渥太华'
],
[
'伦敦发展促进署(公众号)'
,
'伦敦'
],
[
'每日俄罗斯在线'
,
'莫斯科'
],
[
'纽约华人资讯网'
,
'纽约'
],
[
'纽约时间'
,
'纽约'
],
[
'企航新加坡'
,
'新加坡'
],
[
'瞧纽约'
,
'纽约'
],
[
'温哥华头条'
,
'温哥华'
],
[
'渥太华CFC'
,
'渥太华'
],
[
'新加坡红蚂蚁'
,
'新加坡'
],
[
'新加坡华人圈'
,
'新加坡'
],
[
'新加坡圈'
,
'新加坡'
],
[
'新加坡鱼尾文'
,
'新加坡'
],
[
'英伦投资客'
,
'伦敦'
],
[
'遇见纽约'
,
'纽约'
],
[
'中国驻英国大使馆'
,
'伦敦'
],
[
'英国驻华大使馆'
,
'伦敦'
],
[
'法国驻华大使馆'
,
'巴黎'
],
[
'法国旅游发展署'
,
'巴黎'
],
[
'德国印象'
,
'柏林'
],
[
'德国联邦外贸与投资署'
,
'柏林'
],
[
'赴德研究'
,
'柏林'
],
[
'投资德国北威州'
,
'科隆'
],
[
'中国驻杜塞尔多夫总领馆'
,
'科隆'
],
[
'凤凰欧洲'
,
'罗马'
],
[
'中国驻欧盟使团'
,
'罗马'
],
[
'马德里投资代表处'
,
'马德里'
],
[
'中国驻罗马尼亚大使馆'
,
'布加勒斯特'
],
[
'以色列经济与产业部'
,
'特拉维夫'
],
[
'朝鲜日报'
,
'首尔'
],
[
'首尔日报'
,
'首尔'
],
[
'首尔中国文化中心'
,
'首尔'
],
[
'中国驻日本大使馆'
,
'东京'
],
[
'中国驻新加坡大使馆'
,
'新加坡'
],
[
'中国驻纽约总领馆'
,
'纽约'
],
[
'中国驻法国大使馆网'
,
'巴黎'
],
[
'CBS新闻'
,
'洛杉矶'
],
[
'洛杉矶城市观察'
,
'洛杉矶'
],
[
'今日洛杉矶'
,
'洛杉矶'
],
[
'旧金山湾区华人资讯'
,
'旧金山'
],
[
'旧金山市政厅新闻'
,
'旧金山'
],
[
'旧金山新闻'
,
'旧金山'
],
[
'旧金山政治新闻'
,
'旧金山'
],
[
'旧金山商业科技新闻'
,
'旧金山'
],
[
'多伦多城市新闻'
,
'多伦多'
],
[
'莫斯科时报'
,
'莫斯科'
],
[
'goToronto'
,
'多伦多'
],
[
'多伦多时间'
,
'多伦多'
],
[
'西闻'
,
'马德里'
],
[
'马德里市20分钟报'
,
'马德里'
],
[
'马德里新闻'
,
'马德里'
],
[
'柏林晨报-柏林新闻'
,
'柏林'
],
[
'柏林晨报-初创企业新闻'
,
'柏林'
],
[
'东京观光指南 GOTOKYO'
,
'东京'
],
[
'BBC-英格兰'
,
'伦敦'
],
[
'BBC-伦敦'
,
'伦敦'
],
[
'metro-伦敦'
,
'伦敦'
],
[
'metro-英国'
,
'伦敦'
],
[
'马德里自治区-好日报'
,
'马德里自治区'
],
[
'SurMadrid-马德里自治区'
,
'马德里自治区'
],
[
'SurMadrid-马德里'
,
'马德里自治区'
],
[
'欧华集团'
,
'马德里自治区'
],
[
'西班牙华人街'
,
'马德里自治区'
],
[
'ABC纽约'
,
'纽约'
],
[
'FOX5纽约'
,
'纽约'
],
[
'CBS纽约'
,
'纽约'
],
[
'东亚日报-文化'
,
'首尔'
],
[
'东亚日报-政治'
,
'首尔'
],
[
'韩联社-政治'
,
'首尔'
],
[
'建设新闻'
,
'大阪府'
],
[
'朝日新闻_大阪府'
,
'大阪府'
],
[
'客观日本'
,
'大阪府'
],
[
'波士顿中文网'
,
'波士顿'
],
[
'波士顿留学生网'
,
'波士顿'
],
[
'波士顿发展新闻'
,
'波士顿'
],
[
'今日波士顿'
,
'波士顿'
],
[
'韩国亚洲经济'
,
'首尔'
],
[
'亚洲日报'
,
'首尔'
],
[
'奥斯汀纪事报'
,
'奥斯汀'
],
[
'奥斯汀城市化'
,
'奥斯汀'
],
[
'休斯顿星空网'
,
'奥斯汀'
],
[
'芝加哥城市化'
,
'芝加哥'
],
[
'芝加哥建筑新闻特刊'
,
'芝加哥'
],
[
'芝加哥一手资讯'
,
'芝加哥'
],
[
'旧金山纪事报'
,
'旧金山'
],
[
'中国驻匈牙利大使馆'
,
'布达佩斯'
],
[
'中国驻德国大使馆'
,
'柏林'
],
[
'中国驻比利时大使馆'
,
'布鲁塞尔'
],
[
'中国驻荷兰王国大使馆'
,
'阿姆斯特丹'
],
[
'丹麦投资促进局'
,
'哥本哈根'
],
[
'丹麦研究中心'
,
'哥本哈根'
],
[
'丹麦科技创业中心'
,
'哥本哈根'
],
[
'今日芬兰'
,
'赫尔辛基'
],
[
'芬兰创新商业资讯'
,
'赫尔辛基'
],
[
'赫尔辛基Helsinki Times'
,
'赫尔辛基'
],
[
'赫尔辛基华人生活'
,
'赫尔辛基'
],
[
'首尔新闻'
,
'首尔'
],
[
'NBC波士顿新闻'
,
'波士顿'
],
[
'中国驻法国大使馆'
,
'巴黎'
],
[
'哥本哈根中国文化中心'
,
'哥本哈根'
],
[
'罗马建筑师协会新闻'
,
'罗马'
],
[
'今日罗马环境新闻'
,
'罗马'
],
[
'今日罗马经济新闻'
,
'罗马'
],
[
'今日罗马政治新闻'
,
'罗马'
],
[
'24小时报罗马新闻'
,
'罗马'
],
[
'KBH新闻'
,
'哥本哈根'
],
[
'数字赫尔辛基新闻'
,
'赫尔辛基'
],
[
'赫尔辛基本地新闻'
,
'赫尔辛基'
],
[
'布鲁塞尔新闻'
,
'布鲁塞尔'
],
[
'布鲁塞尔大区城市新闻'
,
'布鲁塞尔'
],
[
'布鲁塞尔统计分析新闻'
,
'布鲁塞尔'
],
[
'休斯顿社区发展新闻'
,
'休斯顿'
],
[
'休斯顿日报-经济新闻'
,
'休斯顿'
],
[
'今日悉尼'
,
'悉尼'
],
[
'悉尼印象'
,
'悉尼'
],
[
'商务投资布鲁塞尔'
,
'布鲁塞尔'
]]
warnings
.
filterwarnings
(
"ignore"
,
category
=
DeprecationWarning
)
class
Handler
():
def
__init__
(
self
):
self
.
clientLocal
=
pymongo
.
MongoClient
(
'mongodb://localhost:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
# mongodb Tag1
def
getcityTag1
(
self
,
text
):
url
=
"http://39.105.62.235:7006/get_city/"
payload
=
{
'text'
:
text
}
try
:
response
=
requests
.
request
(
"POST"
,
url
,
data
=
payload
,
timeout
=
300
)
r
=
response
.
json
()
except
:
r
=
{}
try
:
tag
=
r
[
'resultData'
][
'data'
]
except
Exception
as
e
:
tag
=
''
return
tag
# mongodb Tag2
def
getKeywordsTags2
(
self
,
text
):
url
=
"http://39.105.62.235:7006/get_phrase/"
payload
=
{
'topK'
:
'20'
,
'text'
:
text
,
'name'
:
'phrase'
}
try
:
response
=
requests
.
request
(
"POST"
,
url
,
data
=
payload
)
r
=
response
.
json
()
tag
=
r
[
'resultData'
][
'data'
]
except
Exception
as
e
:
tag
=
''
return
tag
# 百度html翻译
def
postTrans
(
self
,
html
):
url
=
"http://114.115.155.139:8008/api/translate/getTranslateInfoWithTagContent"
payload
=
{
'co'
:
html
}
files
=
[]
headers
=
{}
response
=
requests
.
request
(
"POST"
,
url
,
headers
=
headers
,
data
=
payload
,
files
=
files
,
timeout
=
600
)
r
=
response
.
text
+
'<p/><br>译文来源:微软自动翻译<br></p>'
return
r
if
__name__
==
'__main__'
:
Translate
=
Translate
()
Handler
=
Handler
()
db_storage
=
pymongo
.
MongoClient
(
'mongodb://114.115.221.202:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
中科软
[
'数据源_0106'
]
r
=
redis
.
StrictRedis
(
host
=
'114.115.221.202'
,
port
=
6379
,
db
=
1
,
decode_responses
=
True
,
password
=
'clbzzsn'
)
while
True
:
# db_id = r.spop('db_id2')
db_id
=
r
.
spop
(
'db_sid2'
)
if
db_id
is
None
:
print
(
'暂无翻译数据, 等待十分钟'
)
time
.
sleep
(
600
)
continue
a_dict
=
db_storage
.
find_one
({
'_id'
:
ObjectId
(
db_id
)})
log
.
info
(
f
'{db_id}==={datetime.datetime.now().strftime("
%
Y-
%
m-
%
d
%
H:
%
M:
%
S")}'
)
t1
=
int
(
time
.
time
())
try
:
titleForeign
=
a_dict
[
'titleForeign'
]
.
strip
()
LANG
=
a_dict
[
'LANG'
]
.
strip
()
except
Exception
as
e
:
continue
try
:
columns
=
a_dict
[
'columns'
]
.
strip
()
except
:
columns
=
a_dict
[
'columns'
]
try
:
try
:
title
=
Translate
.
gethtml
(
titleForeign
)
except
:
Translate
.
kill_firefox
()
title
=
Handler
.
postTrans
(
titleForeign
)
title
=
pq
(
title
)
.
text
()
.
replace
(
'译文来源:微软自动翻译'
,
''
)
.
strip
()
except
:
log
.
error
(
'翻译失败'
)
continue
flg
=
False
for
Filtration
in
Filtrations
:
if
Filtration
in
title
:
richText
=
' '
content
=
' '
tag1
=
''
tag2
=
' '
db_storage
.
update_one
({
'_id'
:
ObjectId
(
db_id
)},{
'$set'
:{
'postCode'
:
'10'
,
'title'
:
title
,
'richText'
:
richText
,
'content'
:
content
,
'tag1'
:
tag1
,
'tag2'
:
tag2
}})
log
.
info
(
f
'{title}===包含过滤词'
)
flg
=
True
break
if
flg
:
continue
for
Sift
in
Sifts
:
if
Sift
[
0
]
in
columns
:
if
Sift
[
1
]
not
in
title
:
richText
=
' '
content
=
' '
tag1
=
''
tag2
=
' '
db_storage
.
update_one
({
'_id'
:
ObjectId
(
db_id
)},{
'$set'
:{
'postCode'
:
'10'
,
'title'
:
title
,
'richText'
:
richText
,
'content'
:
content
,
'tag1'
:
tag1
,
'tag2'
:
tag2
}})
log
.
info
(
f
'{title}===缺少城市信息'
)
flg
=
True
break
if
flg
:
continue
try
:
richTextForeign
=
a_dict
[
'richTextForeign'
]
.
strip
()
except
:
richTextForeign
=
a_dict
[
'richTextForeign'
]
continue
if
title
==
titleForeign
:
richText
=
richTextForeign
content
=
pq
(
richText
)
.
text
()
tag2
=
Handler
.
getKeywordsTags2
(
content
)
tag2
=
';'
.
join
(
tag2
)
db_storage
.
update_one
({
'_id'
:
ObjectId
(
db_id
)},{
'$set'
:{
'postCode'
:
'1'
,
'LANG'
:
'zh-cn'
,
'titleForeign'
:
''
,
'richTextForeign'
:
''
,
'contentForeign'
:
''
,
'TITLE'
:
title
,
'title'
:
title
,
'tag1'
:
''
,
'tag2'
:
tag2
,
'content'
:
content
,
'richText'
:
richText
}})
log
.
info
(
f
'{title}===语言识别错误已修改'
)
continue
log
.
info
(
title
)
try
:
try
:
richText
=
Translate
.
gethtml
(
richTextForeign
)
log
.
info
(
'浏览器翻译成功'
)
except
:
Translate
.
kill_firefox
()
richText
=
Handler
.
postTrans
(
richTextForeign
)
log
.
info
(
'接口翻译成功'
)
except
Exception
as
e
:
log
.
error
(
'翻译失败'
)
continue
content
=
pq
(
richText
)
.
text
()
try
:
tag1
=
Handler
.
getcityTag1
(
content
)
except
:
tag1
=
''
tag1
=
';'
.
join
(
tag1
)
try
:
tag2
=
Handler
.
getKeywordsTags2
(
content
)
tag2
=
';'
.
join
(
tag2
)
except
:
continue
db_storage
.
update_one
({
'_id'
:
a_dict
[
'_id'
]},
{
'$set'
:
{
'title'
:
title
,
'richText'
:
richText
,
'content'
:
content
,
'tag1'
:
tag1
,
'tag2'
:
tag2
,
'postCode'
:
'1'
}})
t2
=
int
(
time
.
time
())
log
.
info
(
f
"{db_id}===翻译用时==={int(t2 - t1)}"
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论