Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
Z
zzsn_spider
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
王景浩
zzsn_spider
Commits
bcf9dc87
提交
bcf9dc87
authored
9月 09, 2024
作者:
XveLingKun
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
0909-更新mysql和es的ip和mongo的ip
上级
9e335d9a
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
112 个修改的文件
包含
131 行增加
和
131 行删除
+131
-131
BaseCore.py
REITs_policyData/BaseCore.py
+1
-1
edge_pyautogui.py
Translate/edge_pyautogui.py
+1
-1
Twbaseinfo.py
comData/BaseInfo_TW/Twbaseinfo.py
+1
-1
base_info.py
comData/BaseInfo_qcc/base_info.py
+1
-1
base_info_0815.py
comData/BaseInfo_qcc/base_info_0815.py
+1
-1
baseinfo0123.py
comData/BaseInfo_qcc/baseinfo0123.py
+1
-1
baseinfo1113.py
comData/BaseInfo_qcc/baseinfo1113.py
+1
-1
baseinfo1122.py
comData/BaseInfo_qcc/baseinfo1122.py
+1
-1
dujs_1020_baseinfo.py
comData/BaseInfo_qcc/dujs_1020_baseinfo.py
+1
-1
gudongxinxi.py
comData/BaseInfo_qcc/gudongxinxi.py
+1
-1
base_info_us.py
comData/SEC_US/base_info_us.py
+1
-1
baseinfo0130_tyc.py
comData/Tyc/baseinfo0130_tyc.py
+2
-2
baseinfo0227_tyc.py
comData/Tyc/baseinfo0227_tyc.py
+2
-2
baseinfotyc_update.py
comData/Tyc/baseinfotyc_update.py
+2
-2
baseinfouptime_tyc.py
comData/Tyc/baseinfouptime_tyc.py
+2
-2
classtool.py
comData/Tyc/classtool.py
+2
-2
fbs_tyc_qydt.py
comData/Tyc/fbs_tyc_qydt.py
+1
-1
get_tyc_cookies.py
comData/Tyc/get_tyc_cookies.py
+1
-1
newsbucai.py
comData/Tyc/newsbucai.py
+1
-1
newsbucai1.py
comData/Tyc/newsbucai1.py
+1
-1
tyc_qydt_add.py
comData/Tyc/tyc_qydt_add.py
+1
-1
resentYanbao.py
comData/YanBao/resentYanbao.py
+2
-2
fbs_annualreport.py
comData/annualReport/fbs_annualreport.py
+1
-1
fbs_annualreport_1.py
comData/annualReport/fbs_annualreport_1.py
+1
-1
证监会-年报.py
comData/annualReport/证监会-年报.py
+2
-2
雪球网-年报.py
comData/annualReport/雪球网-年报.py
+2
-2
report.py
comData/annualReport1014/report.py
+1
-1
report1.py
comData/annualReport1014/report1.py
+1
-1
report_url.py
comData/annualReport1014/report_url.py
+1
-1
report_url_om.py
comData/annualReport1014/report_url_om.py
+1
-1
test.py
comData/annualReport1023/test.py
+1
-1
uptoes.py
comData/annualReport1023/uptoes.py
+1
-1
annualreportUS.py
comData/annualReport_US/annualreportUS.py
+1
-1
config.ini
comData/caiwushuju/config.ini
+1
-1
bmfw.py
comData/dingzhi/bmfw.py
+1
-1
dfsm_sasac.py
comData/dingzhi/dfsm_sasac.py
+1
-1
europa.py
comData/dingzhi/europa.py
+1
-1
gdelt.py
comData/dingzhi/gdelt.py
+1
-1
gzyw_sasac.py
comData/dingzhi/gzyw_sasac.py
+1
-1
miit.py
comData/dingzhi/miit.py
+1
-1
see_measures.py
comData/dingzhi/see_measures.py
+1
-1
wsj_detail.py
comData/dingzhi/wsj_detail.py
+1
-1
zzcx.py
comData/dingzhi/zzcx.py
+1
-1
zyjjhy.py
comData/important_meeting/zyjjhy.py
+1
-1
zyqmshggldxzhy19.py
comData/important_meeting/zyqmshggldxzhy19.py
+1
-1
leadership.py
comData/leadership/leadership.py
+1
-1
nasdaq_news.py
comData/nasdaq/nasdaq_news.py
+1
-1
ccpg.py
comData/negative_news/ccpg.py
+1
-1
creditchina.py
comData/negative_news/creditchina.py
+1
-1
baseinfo_champion.py
comData/newlist/champion/baseinfo_champion.py
+1
-1
china100.py
comData/newlist/china100/china100.py
+1
-1
global100.py
comData/newlist/global100/global100.py
+1
-1
baseinfo_hundred.py
comData/newlist/hundred/baseinfo_hundred.py
+1
-1
baseinfo_tech.py
comData/newlist/technological/baseinfo_tech.py
+1
-1
fbs_notice.py
comData/noticeReport/fbs_notice.py
+1
-1
东方财富网-公告.py
comData/noticeReport/东方财富网-公告.py
+2
-2
东方财富网-港股公告-2.py
comData/noticeReport/东方财富网-港股公告-2.py
+2
-2
东方财富网-港股公告.py
comData/noticeReport/东方财富网-港股公告.py
+2
-2
证监会-公告.py
comData/noticeReport/证监会-公告.py
+2
-2
ClassTool.py
comData/policylaw/ClassTool.py
+1
-1
gyw.py
comData/policylaw/gyw/gyw.py
+1
-1
tingtype.py
comData/policylaw/tingtype.py
+1
-1
nyse_news.py
comData/sinafinance_news/nyse_news.py
+1
-1
nyse_news_gn.py
comData/sinafinance_news/nyse_news_gn.py
+1
-1
nyse_news_xg.py
comData/sinafinance_news/nyse_news_xg.py
+1
-1
nyse_notice_gn.py
comData/sinafinance_news/nyse_notice_gn.py
+1
-1
qgzypt.py
comData/tender/qgzypt.py
+1
-1
zhongguocg.py
comData/tender/zhongguocg.py
+1
-1
zhongyangcg.py
comData/tender/zhongyangcg.py
+1
-1
tradviewNew.py
comData/tradingview/tradviewNew.py
+1
-1
tradviewNewBak.py
comData/tradingview/tradviewNewBak.py
+1
-1
newsbucaitest.py
comData/tyctest/newsbucaitest.py
+1
-1
tycdt.py
comData/tyctest/tycdt.py
+1
-1
config.ini
comData/weixin_solo/config.ini
+1
-1
get_tokenCookies.py
comData/weixin_solo/get_tokenCookies.py
+2
-2
oneWeixin.py
comData/weixin_solo/oneWeixin.py
+3
-3
oneWeixin2.py
comData/weixin_solo/oneWeixin2.py
+2
-2
oneWeixin_test.py
comData/weixin_solo/oneWeixin_test.py
+2
-2
oneWeixin2.py
comData/weixin_solo/机器人大会/oneWeixin2.py
+4
-4
yahoo_baseInfo.py
comData/yhcj/yahoo_baseInfo.py
+1
-1
雅虎财经_企业动态.py
comData/yhcj/雅虎财经_企业动态.py
+2
-2
weibo.py
comData/微博采集/weibo.py
+4
-4
get_employees.py
comData/福布斯榜单/get_employees.py
+2
-2
get_employees_2022.py
comData/福布斯榜单/get_employees_2022.py
+2
-2
裁判文书网列表正文.py
cpws/裁判文书网列表正文.py
+2
-2
裁判文书网列表正文_redis.py
cpws/裁判文书网列表正文_redis.py
+2
-2
classtool.py
enterprise_tyc/classtool.py
+1
-1
丝路商机剩余数据查询.py
es拉取全球企业资讯/丝路商机剩余数据查询.py
+2
-2
拉取不保留数据.py
es拉取全球企业资讯/拉取不保留数据.py
+2
-2
拉取保留数据.py
es拉取全球企业资讯/拉取保留数据.py
+2
-2
config.ini
google_comm/config.ini
+1
-1
europa.py
gwzk/europa.py
+1
-1
lhg.py
gwzk/lhg.py
+1
-1
ozll.py
gwzk/ozll.py
+1
-1
wtoorg.py
gwzk/wtoorg.py
+1
-1
config.ini
jrtt1news_comm/config.ini
+1
-1
qiushi_leaderspeech.py
qiushi_leaderspeech.py
+1
-1
config.ini
qqnews_comm/config.ini
+1
-1
config.ini
sougou_comm/config.ini
+1
-1
config.ini
souhunews_comm/config.ini
+1
-1
test.py
test.py
+0
-0
classtool.py
test/classtool.py
+0
-0
test.py
test/test.py
+0
-0
config.ini
toutiaonews_comm/config.ini
+0
-0
esToMongodb.py
zkr/esToMongodb.py
+0
-0
翻译.py
zkr/翻译.py
+0
-0
spider_main.py
中国外汇交易中心/spider_main.py
+0
-0
1.py
习近平讲话/1.py
+0
-0
get_html.py
国务院问答对处理/get_html.py
+0
-0
baidufanyi.py
百度翻译/baidufanyi.py
+0
-0
fanyi_test.py
百度翻译/fanyi_test.py
+0
-0
config.ini
百度采集/baidu_comm/config.ini
+0
-0
没有找到文件。
REITs_policyData/BaseCore.py
浏览文件 @
bcf9dc87
...
...
@@ -550,7 +550,7 @@ class BaseCore:
def
sendkafka
(
self
,
post_data
,
topic
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
topic
,
json
.
dumps
(
post_data
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
...
...
Translate/edge_pyautogui.py
浏览文件 @
bcf9dc87
...
...
@@ -35,7 +35,7 @@ thread_local = threading.local()
from
tempfile
import
TemporaryFile
r
=
redis
.
StrictRedis
(
host
=
'114.115.221.202'
,
port
=
6379
,
db
=
1
,
decode_responses
=
True
,
password
=
'clbzzsn'
)
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
中科软
[
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
中科软
[
'数据源_0504'
]
# path = r'D:\soft\msedgedriver.exe'
...
...
comData/BaseInfo_TW/Twbaseinfo.py
浏览文件 @
bcf9dc87
...
...
@@ -61,7 +61,7 @@ aa_dict = {
'status'
:
0
,
# 状态
}
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"regionInfo"
,
json
.
dumps
(
aa_dict
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
...
...
comData/BaseInfo_qcc/base_info.py
浏览文件 @
bcf9dc87
...
...
@@ -419,7 +419,7 @@ if __name__ == '__main__':
baseCore
.
writerToExcel
(
name_list
,
file_name
)
log
.
info
(
f
'采集{com_name}成功=======耗时{baseCore.getTimeCost(start_time,time.time())}'
)
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"regionInfo"
,
json
.
dumps
(
post_data
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
...
...
comData/BaseInfo_qcc/base_info_0815.py
浏览文件 @
bcf9dc87
...
...
@@ -44,7 +44,7 @@ aa_dict = {
}
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"regionInfo"
,
json
.
dumps
(
aa_dict
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
...
...
comData/BaseInfo_qcc/baseinfo0123.py
浏览文件 @
bcf9dc87
...
...
@@ -44,7 +44,7 @@ def create_driver():
# 发送数据
def
sendkafka
(
post_data
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"enterpriseInfo"
,
json
.
dumps
(
post_data
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
...
...
comData/BaseInfo_qcc/baseinfo1113.py
浏览文件 @
bcf9dc87
...
...
@@ -26,7 +26,7 @@ tag = Tag()
# 发送数据
def
sendkafka
(
post_data
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"enterpriseInfo"
,
json
.
dumps
(
post_data
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
...
...
comData/BaseInfo_qcc/baseinfo1122.py
浏览文件 @
bcf9dc87
...
...
@@ -26,7 +26,7 @@ tag = Tag()
# 发送数据
def
sendkafka
(
post_data
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"enterpriseInfo"
,
json
.
dumps
(
post_data
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
...
...
comData/BaseInfo_qcc/dujs_1020_baseinfo.py
浏览文件 @
bcf9dc87
...
...
@@ -405,7 +405,7 @@ if __name__ == '__main__':
log
.
info
(
f
'采集{com_name}成功=======耗时{baseCore.getTimeCost(start_time, time.time())}'
)
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"regionInfo"
,
json
.
dumps
(
post_data
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
...
...
comData/BaseInfo_qcc/gudongxinxi.py
浏览文件 @
bcf9dc87
...
...
@@ -94,7 +94,7 @@ def login():
import
pymongo
# 连接数据库
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'股东信息'
]
# 从数据库存储
...
...
comData/SEC_US/base_info_us.py
浏览文件 @
bcf9dc87
...
...
@@ -121,7 +121,7 @@ if __name__=='__main__':
}
print
(
com_dict
)
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"regionInfo"
,
json
.
dumps
(
com_dict
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
log
.
info
(
kafka_result
.
get
(
timeout
=
10
))
log
.
info
(
f
'{cik}---{cname}---基本信息采集成功'
)
...
...
comData/Tyc/baseinfo0130_tyc.py
浏览文件 @
bcf9dc87
...
...
@@ -11,7 +11,7 @@ from kafka import KafkaProducer
import
urllib3
from
selenium.webdriver.support.wait
import
WebDriverWait
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'天眼查登录信息'
]
urllib3
.
disable_warnings
(
urllib3
.
exceptions
.
InsecureRequestWarning
)
from
dateutil.relativedelta
import
relativedelta
...
...
@@ -47,7 +47,7 @@ def create_driver():
# 发送数据
def
sendkafka
(
post_data
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"enterpriseInfo"
,
json
.
dumps
(
post_data
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
...
...
comData/Tyc/baseinfo0227_tyc.py
浏览文件 @
bcf9dc87
...
...
@@ -12,7 +12,7 @@ from selenium.webdriver.edge.service import Service
import
urllib3
from
selenium.webdriver.support.wait
import
WebDriverWait
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'天眼查登录信息'
]
urllib3
.
disable_warnings
(
urllib3
.
exceptions
.
InsecureRequestWarning
)
from
dateutil.relativedelta
import
relativedelta
...
...
@@ -54,7 +54,7 @@ def create_driver():
# 发送数据
def
sendkafka
(
post_data
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"enterpriseInfo"
,
json
.
dumps
(
post_data
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
...
...
comData/Tyc/baseinfotyc_update.py
浏览文件 @
bcf9dc87
...
...
@@ -13,7 +13,7 @@ from kafka import KafkaProducer
import
urllib3
from
retry
import
retry
from
selenium.webdriver.support.wait
import
WebDriverWait
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'天眼查登录信息'
]
urllib3
.
disable_warnings
(
urllib3
.
exceptions
.
InsecureRequestWarning
)
...
...
@@ -52,7 +52,7 @@ def create_driver():
# 发送数据
def
sendkafka
(
post_data
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"enterpriseInfo"
,
json
.
dumps
(
post_data
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
...
...
comData/Tyc/baseinfouptime_tyc.py
浏览文件 @
bcf9dc87
...
...
@@ -11,7 +11,7 @@ from kafka import KafkaProducer
import
urllib3
from
selenium.webdriver.support.wait
import
WebDriverWait
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'天眼查登录信息'
]
urllib3
.
disable_warnings
(
urllib3
.
exceptions
.
InsecureRequestWarning
)
...
...
@@ -50,7 +50,7 @@ def create_driver():
# 发送数据
def
sendkafka
(
post_data
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"enterpriseInfo"
,
json
.
dumps
(
post_data
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
...
...
comData/Tyc/classtool.py
浏览文件 @
bcf9dc87
...
...
@@ -14,9 +14,9 @@ baseCore = BaseCore.BaseCore()
log
=
baseCore
.
getLogger
()
cnx
=
baseCore
.
cnx
cursor
=
baseCore
.
cursor
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'天眼查登录信息'
]
db_storage2
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
db_storage2
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'最大股东信息0902'
]
...
...
comData/Tyc/fbs_tyc_qydt.py
浏览文件 @
bcf9dc87
...
...
@@ -225,7 +225,7 @@ def beinWork(tyc_code, social_code):
baseCore
.
recordLog
(
social_code
,
taskType
,
state
,
takeTime
,
link
,
e
)
continue
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"researchReportDynamicTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/Tyc/get_tyc_cookies.py
浏览文件 @
bcf9dc87
...
...
@@ -5,7 +5,7 @@ from selenium import webdriver
import
pymongo
from
selenium.webdriver.common.by
import
By
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'天眼查登录信息'
]
url
=
'https://www.tianyancha.com/'
...
...
comData/Tyc/newsbucai.py
浏览文件 @
bcf9dc87
...
...
@@ -255,7 +255,7 @@ def beinWork(tyc_code, social_code,start_time):
# print(dic_news)
# 将相应字段通过kafka传输保存
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"researchReportDynamicTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/Tyc/newsbucai1.py
浏览文件 @
bcf9dc87
...
...
@@ -265,7 +265,7 @@ def beinWork(tyc_code, social_code, start_time):
# print(dic_news)
# 将相应字段通过kafka传输保存
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"researchReportDynamicTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/Tyc/tyc_qydt_add.py
浏览文件 @
bcf9dc87
...
...
@@ -246,7 +246,7 @@ def beinWork(tyc_code, social_code,start_time):
# print(dic_news)
# 将相应字段通过kafka传输保存
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"researchReportDynamicTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/YanBao/resentYanbao.py
浏览文件 @
bcf9dc87
...
...
@@ -241,7 +241,7 @@ def download(data, order_by,header):
'type'
:
'0'
}
# 将相应字段通过kafka传输保存
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
try
:
kafka_result
=
producer
.
send
(
"researchReportStudyTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
log
.
info
(
kafka_result
.
get
(
timeout
=
10
))
...
...
@@ -321,7 +321,7 @@ def download(data, order_by,header):
# log.info(dic_news)
# 将相应字段通过kafka传输保存
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"researchReportStudyTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
log
.
info
(
kafka_result
.
get
(
timeout
=
10
))
...
...
comData/annualReport/fbs_annualreport.py
浏览文件 @
bcf9dc87
...
...
@@ -18,7 +18,7 @@ cursor = baseCore.cursor
def
sendKafka
(
dic_news
):
try
:
# 114.116.116.241
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"researchReportYearTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/annualReport/fbs_annualreport_1.py
浏览文件 @
bcf9dc87
...
...
@@ -18,7 +18,7 @@ cursor = baseCore.cursor
def
sendKafka
(
dic_news
):
try
:
# 114.116.116.241
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"researchReportYearTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/annualReport/证监会-年报.py
浏览文件 @
bcf9dc87
impor
t
json
impor
t
json
...
...
@@ -196,7 +196,7 @@ def SpiderByZJH(url, payload, dic_info, num, start_time):
# print(dic_news)
# 将相应字段通过kafka传输保存
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"researchReportYearTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/annualReport/雪球网-年报.py
浏览文件 @
bcf9dc87
# -*-
coding: utf-8 -*-
# -*-
coding: utf-8 -*-
...
...
@@ -225,7 +225,7 @@ def spider_annual_report(dict_info,num):
}
# 将相应字段通过kafka传输保存
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"researchReportYearTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/annualReport1014/report.py
浏览文件 @
bcf9dc87
...
...
@@ -35,7 +35,7 @@ log.info(f'=============当前pid为{baseCore.getPID()}==============')
def
sendKafka
(
dic_news
):
start_time
=
time
.
time
()
try
:
# 114.116.116.241
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"researchReportYearTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/annualReport1014/report1.py
浏览文件 @
bcf9dc87
...
...
@@ -40,7 +40,7 @@ log.info(f'=============当前pid为{baseCore.getPID()}==============')
def
sendKafka
(
dic_news
):
start_time
=
time
.
time
()
try
:
# 114.116.116.241
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"researchReportYearTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/annualReport1014/report_url.py
浏览文件 @
bcf9dc87
...
...
@@ -35,7 +35,7 @@ taskType = '企业年报'
def
sendKafka
(
dic_news
):
start_time
=
time
.
time
()
try
:
# 114.116.116.241
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"researchReportYearTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/annualReport1014/report_url_om.py
浏览文件 @
bcf9dc87
...
...
@@ -35,7 +35,7 @@ file_path = 'D:/kkwork/zzsn_spider/data/1_福布斯2000_PDF_50_郑'
def
sendKafka
(
dic_news
):
start_time
=
time
.
time
()
try
:
# 114.116.116.241
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"researchReportYearTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/annualReport1023/test.py
浏览文件 @
bcf9dc87
...
...
@@ -44,7 +44,7 @@ taskType = '企业年报'
def
sendKafka
(
dic_news
,
xydm
):
start_time
=
time
.
time
()
try
:
# 114.116.116.241
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"researchReportYearTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/annualReport1023/uptoes.py
浏览文件 @
bcf9dc87
...
...
@@ -30,7 +30,7 @@ pathType = 'QYYearReport/'
def
sendKafka
(
dic_news
):
try
:
# 114.116.116.241
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"researchReportYearTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/annualReport_US/annualreportUS.py
浏览文件 @
bcf9dc87
...
...
@@ -197,7 +197,7 @@ def spider(com_name,cik,up_okCount):
# print(dic_news)
# 将相应字段通过kafka传输保存
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
compression_type
=
'gzip'
,
batch_size
=
1638400
,
linger_ms
=
1
,
buffer_memory
=
33445532
*
2
,
max_request_size
=
1024
*
1024
*
50
)
#,batch_size=20480000,buffer_memory=64000000)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
compression_type
=
'gzip'
,
batch_size
=
1638400
,
linger_ms
=
1
,
buffer_memory
=
33445532
*
2
,
max_request_size
=
1024
*
1024
*
50
)
#,batch_size=20480000,buffer_memory=64000000)
kafka_result
=
producer
.
send
(
"researchReportYearTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/caiwushuju/config.ini
浏览文件 @
bcf9dc87
...
...
@@ -11,7 +11,7 @@ database=caiji
url
=
jdbc:mysql://114.115.159.144:3306/caiji?useUnicode=true&characterEncoding=utf-8&serverTimezone=Asia/Shanghai&useSSL=false
[kafka]
bootstrap_servers
=
1
14.115.159.144
:9092
bootstrap_servers
=
1
.95.3.121
:9092
topic
=
keyWordsInfo
groupId
=
python_baidu
...
...
comData/dingzhi/bmfw.py
浏览文件 @
bcf9dc87
...
...
@@ -167,7 +167,7 @@ def sendTokafka(ddata):
'type'
:
''
}
log
.
info
(
aa_dict
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
try
:
kafka_result
=
producer
.
send
(
"crawlerInfo"
,
json
.
dumps
(
aa_dict
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
r
.
sadd
(
info_code
+
'-test'
,
sourceAddress
)
...
...
comData/dingzhi/dfsm_sasac.py
浏览文件 @
bcf9dc87
...
...
@@ -127,7 +127,7 @@ def two_dfsm_mtgc():
}
log
.
info
(
f
'{page}--{title}--{href}'
)
# info_list.append(result_dict)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
try
:
kafka_result
=
producer
.
send
(
"crawlerInfo"
,
json
.
dumps
(
result_dict
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/dingzhi/europa.py
浏览文件 @
bcf9dc87
...
...
@@ -21,7 +21,7 @@ db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017', username='ad
@retry
(
tries
=
2
,
delay
=
5
)
def
sendKafka
(
dic
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"research_center_fourth"
,
json
.
dumps
(
dic
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
log
.
info
(
f
'{dic["sourceAddress"]}传输成功'
)
...
...
comData/dingzhi/gdelt.py
浏览文件 @
bcf9dc87
...
...
@@ -128,7 +128,7 @@ def spider_work(href):
'labels'
:
[],
}
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"crawlerInfo"
,
json
.
dumps
(
aa_dict
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
#print(aa_dict)
...
...
comData/dingzhi/gzyw_sasac.py
浏览文件 @
bcf9dc87
...
...
@@ -138,7 +138,7 @@ def gzyw():
}
log
.
info
(
f
'{page}--{title}--{href}'
)
# info_list.append(result_dict)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
try
:
kafka_result
=
producer
.
send
(
"crawlerInfo"
,
json
.
dumps
(
result_dict
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/dingzhi/miit.py
浏览文件 @
bcf9dc87
...
...
@@ -142,7 +142,7 @@ def sendTokafka(ddata):
'source'
:
'python定制采集'
,
'type'
:
''
}
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
try
:
kafka_result
=
producer
.
send
(
"crawlerInfo"
,
json
.
dumps
(
aa_dict
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
r
.
sadd
(
info_code
+
'-test'
,
sourceAddress
)
...
...
comData/dingzhi/see_measures.py
浏览文件 @
bcf9dc87
...
...
@@ -188,7 +188,7 @@ if __name__ == "__main__":
# 将相应字段通过kafka传输保存
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"crawlerInfo"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/dingzhi/wsj_detail.py
浏览文件 @
bcf9dc87
...
...
@@ -149,7 +149,7 @@ def getData(key):
# 将相应字段通过kafka传输保存
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"crawlerInfo"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/dingzhi/zzcx.py
浏览文件 @
bcf9dc87
...
...
@@ -230,7 +230,7 @@ def zzcx():
print
(
result_dict
)
# break
# break
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
try
:
kafka_result
=
producer
.
send
(
"crawlerInfo"
,
json
.
dumps
(
result_dict
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/important_meeting/zyjjhy.py
浏览文件 @
bcf9dc87
...
...
@@ -49,7 +49,7 @@ def is_member_containing_string(key, string):
def
sendKafka
(
dic_info
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
try
:
kafka_result
=
producer
.
send
(
"research_center_fourth"
,
json
.
dumps
(
dic_info
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/important_meeting/zyqmshggldxzhy19.py
浏览文件 @
bcf9dc87
...
...
@@ -150,7 +150,7 @@ if __name__ == "__main__":
}
r
.
sadd
(
info_code
,
newsUrl
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
try
:
kafka_result
=
producer
.
send
(
"research_center_fourth"
,
json
.
dumps
(
dic_info
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/leadership/leadership.py
浏览文件 @
bcf9dc87
...
...
@@ -16,7 +16,7 @@ headers = {
def
sendKafka
(
dic_news
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"leadership"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf-8'
))
...
...
comData/nasdaq/nasdaq_news.py
浏览文件 @
bcf9dc87
...
...
@@ -186,7 +186,7 @@ def getDicB(data, soup):
# 数据发送至Kafka
@retry
(
tries
=
3
,
delay
=
1
)
def
sendKafka
(
dic_news
,
start_time
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"researchReportTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/negative_news/ccpg.py
浏览文件 @
bcf9dc87
...
...
@@ -57,7 +57,7 @@ def getNowDate():
def
sendkafka
(
processitem
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
"1
14.115.159.144
:9092"
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
"1
.95.3.121
:9092"
])
content
=
processitem
[
'content'
]
publishDate
=
str
(
processitem
[
'publishDate'
])
title
=
processitem
[
'title'
]
...
...
comData/negative_news/creditchina.py
浏览文件 @
bcf9dc87
...
...
@@ -44,7 +44,7 @@ def getNowDate():
def
sendKafka
(
dic_news
):
start_time
=
time
.
time
()
try
:
# 114.116.116.241
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"crawlerInfo"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/newlist/champion/baseinfo_champion.py
浏览文件 @
bcf9dc87
...
...
@@ -408,7 +408,7 @@ if __name__ == '__main__':
log
.
info
(
f
'采集{com_name}成功=======耗时{baseCore.getTimeCost(start_time,time.time())}'
)
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"regionInfo"
,
json
.
dumps
(
post_data
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
...
...
comData/newlist/china100/china100.py
浏览文件 @
bcf9dc87
...
...
@@ -416,7 +416,7 @@ if __name__ == '__main__':
baseCore
.
writerToExcel
(
name_list
,
'中国100强企业.xlsx'
)
log
.
info
(
f
'采集{com_name}成功=======耗时{baseCore.getTimeCost(start_time,time.time())}'
)
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"regionInfo"
,
json
.
dumps
(
post_data
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
...
...
comData/newlist/global100/global100.py
浏览文件 @
bcf9dc87
...
...
@@ -416,7 +416,7 @@ if __name__ == '__main__':
baseCore
.
writerToExcel
(
name_list
,
'跨国公司100大.xlsx'
)
log
.
info
(
f
'采集{com_name}成功=======耗时{baseCore.getTimeCost(start_time,time.time())}'
)
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"regionInfo"
,
json
.
dumps
(
post_data
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
...
...
comData/newlist/hundred/baseinfo_hundred.py
浏览文件 @
bcf9dc87
...
...
@@ -408,7 +408,7 @@ if __name__ == '__main__':
log
.
info
(
f
'采集{com_name}成功=======耗时{baseCore.getTimeCost(start_time,time.time())}'
)
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"regionInfo"
,
json
.
dumps
(
post_data
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
...
...
comData/newlist/technological/baseinfo_tech.py
浏览文件 @
bcf9dc87
...
...
@@ -408,7 +408,7 @@ if __name__ == '__main__':
log
.
info
(
f
'采集{com_name}成功=======耗时{baseCore.getTimeCost(start_time,time.time())}'
)
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"regionInfo"
,
json
.
dumps
(
post_data
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
...
...
comData/noticeReport/fbs_notice.py
浏览文件 @
bcf9dc87
...
...
@@ -217,7 +217,7 @@ def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time):
# print(dic_news)
# 将相应字段通过kafka传输保存
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"researchReportTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
...
...
comData/noticeReport/东方财富网-公告.py
浏览文件 @
bcf9dc87
impor
t
os
impor
t
os
...
...
@@ -250,7 +250,7 @@ def GetContent(pdf_url,info_url, pdf_name, social_code, year, pub_time, start_ti
# print(dic_news)
# 将相应字段通过kafka传输保存
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"researchReportNoticeTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/noticeReport/东方财富网-港股公告-2.py
浏览文件 @
bcf9dc87
"""
"""
...
...
@@ -263,7 +263,7 @@ def GetContent(pdf_url,info_url, pdf_name, social_code, year, pub_time, start_ti
# print(dic_news)
# 将相应字段通过kafka传输保存
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"researchReportNoticeTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/noticeReport/东方财富网-港股公告.py
浏览文件 @
bcf9dc87
impor
t
os
impor
t
os
...
...
@@ -218,7 +218,7 @@ def ifInstert(short_name, social_code, pdf_url):
def
sendKafka
(
social_code
,
newsUrl
,
dic_news
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"researchReportNoticeTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/noticeReport/证监会-公告.py
浏览文件 @
bcf9dc87
...
...
@@ -352,7 +352,7 @@ def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_na
# print(dic_news)
# 将相应字段通过kafka传输保存
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"researchReportNoticeTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
...
...
comData/policylaw/ClassTool.py
浏览文件 @
bcf9dc87
...
...
@@ -93,7 +93,7 @@ class ClassTool():
def
sendKafka
(
self
,
dic_news
):
try
:
# 114.116.116.241
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"policy"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/policylaw/gyw/gyw.py
浏览文件 @
bcf9dc87
...
...
@@ -208,7 +208,7 @@ def getDic(driver):
@retry
(
tries
=
2
,
delay
=
5
)
def
sendKafka
(
dic
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"research_center_fourth"
,
json
.
dumps
(
dic
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
log
.
info
(
f
'{dic["sourceAddress"]}传输成功'
)
...
...
comData/policylaw/tingtype.py
浏览文件 @
bcf9dc87
...
...
@@ -64,7 +64,7 @@ def save_data(dic_news):
def
sendKafka
(
dic_news
):
start_time
=
time
.
time
()
try
:
# 114.116.116.241
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"policy"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/sinafinance_news/nyse_news.py
浏览文件 @
bcf9dc87
...
...
@@ -118,7 +118,7 @@ def getDic(social_code, li):
# 数据发送至Kafka
@retry
(
tries
=
3
,
delay
=
1
)
def
sendKafka
(
dic_news
,
start_time
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"researchReportTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/sinafinance_news/nyse_news_gn.py
浏览文件 @
bcf9dc87
...
...
@@ -141,7 +141,7 @@ def getDic(social_code, title, href, pub_time):
# 数据发送至Kafka
@retry
(
tries
=
3
,
delay
=
1
)
def
sendKafka
(
dic_news
,
start_time
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"researchReportTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/sinafinance_news/nyse_news_xg.py
浏览文件 @
bcf9dc87
...
...
@@ -149,7 +149,7 @@ def getDic(social_code, title, href, pub_time):
# 数据发送至Kafka
@retry
(
tries
=
3
,
delay
=
1
)
def
sendKafka
(
dic_news
,
start_time
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"researchReportTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/sinafinance_news/nyse_notice_gn.py
浏览文件 @
bcf9dc87
...
...
@@ -37,7 +37,7 @@ def ifInstert(social_code, pdf_url):
@retry
(
tries
=
3
,
delay
=
1
)
def
sendKafka
(
dic_news
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"researchReportTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
...
...
comData/tender/qgzypt.py
浏览文件 @
bcf9dc87
...
...
@@ -37,7 +37,7 @@ session.mount('http://', HTTPAdapter(max_retries=3))
# 发送kafka
def
sendKafka
(
dic_news
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"tenderClusterData"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/tender/zhongguocg.py
浏览文件 @
bcf9dc87
...
...
@@ -20,7 +20,7 @@ URL = 'http://www.ccgp.gov.cn/'
def
sendKafka
(
dic_news
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"tenderClusterData"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/tender/zhongyangcg.py
浏览文件 @
bcf9dc87
...
...
@@ -37,7 +37,7 @@ session.get(URL, headers=headers)
def
sendKafka
(
dic_news
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"tenderClusterData"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/tradingview/tradviewNew.py
浏览文件 @
bcf9dc87
...
...
@@ -260,7 +260,7 @@ def sendToKafka(detailmsg):
'socialCreditCode'
:
detailmsg
[
'socialCreditCode'
],
'year'
:
detailmsg
[
'year'
]
}
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
try
:
kafka_result
=
producer
.
send
(
"researchReportTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/tradingview/tradviewNewBak.py
浏览文件 @
bcf9dc87
...
...
@@ -276,7 +276,7 @@ def sendToKafka(detailmsg):
'socialCreditCode'
:
detailmsg
[
'socialCreditCode'
],
'year'
:
detailmsg
[
'year'
]
}
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
try
:
kafka_result
=
producer
.
send
(
"researchReportTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/tyctest/newsbucaitest.py
浏览文件 @
bcf9dc87
...
...
@@ -241,7 +241,7 @@ def beinWork(tyc_code, social_code,start_time):
# print(dic_news)
# 将相应字段通过kafka传输保存
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"researchReportTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/tyctest/tycdt.py
浏览文件 @
bcf9dc87
...
...
@@ -325,7 +325,7 @@ class Tycdt(object):
# print(dic_news)
# 将相应字段通过kafka传输保存
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"researchReportTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/weixin_solo/config.ini
浏览文件 @
bcf9dc87
...
...
@@ -11,7 +11,7 @@ database=caiji
url
=
jdbc:mysql://114.115.159.144:3306/caiji?useUnicode=true&characterEncoding=utf-8&serverTimezone=Asia/Shanghai&useSSL=false
[kafka]
bootstrap_servers
=
1
14.115.159.144
:9092
bootstrap_servers
=
1
.95.3.121
:9092
topic
=
keyWordsInfo
groupId
=
python_baidu
...
...
comData/weixin_solo/get_tokenCookies.py
浏览文件 @
bcf9dc87
...
...
@@ -74,8 +74,8 @@ if __name__ == "__main__":
# loadinfo = [token,cookies]
# 保存到数据库中
#
insert = f"insert into weixin_tokenCookies_person (token,cookies,create_time,fenghao_time,user_name,update_time) values ('{token}','{escape_string(cookies)}',now(),DATE_SUB(NOW(), INTERVAL 1 DAY),'{user_name}',now())"
insert
=
f
"insert into weixin_tokenCookies (token,cookies,create_time,fenghao_time,user_name,update_time) values ('{token}','{escape_string(cookies)}',now(),DATE_SUB(NOW(), INTERVAL 1 DAY),'{user_name}',now())"
insert
=
f
"insert into weixin_tokenCookies_person (token,cookies,create_time,fenghao_time,user_name,update_time) values ('{token}','{escape_string(cookies)}',now(),DATE_SUB(NOW(), INTERVAL 1 DAY),'{user_name}',now())"
#
insert = f"insert into weixin_tokenCookies (token,cookies,create_time,fenghao_time,user_name,update_time) values ('{token}','{escape_string(cookies)}',now(),DATE_SUB(NOW(), INTERVAL 1 DAY),'{user_name}',now())"
cursor_
.
execute
(
insert
)
cnx_
.
commit
()
browser
.
close
()
...
...
comData/weixin_solo/oneWeixin.py
浏览文件 @
bcf9dc87
...
...
@@ -177,7 +177,7 @@ def get_info(sid,json_search,origin,url_,info_source_code,page):
'createDate'
:
time_now
}
# for nnn in range(0, 3):
# producer = KafkaProducer(bootstrap_servers=['1
14.115.159.144
:9092'])
# producer = KafkaProducer(bootstrap_servers=['1
.95.3.121
:9092'])
# try:
# kafka_result = producer.send("crawlerInfo", json.dumps(dic_info, ensure_ascii=False).encode('utf8'))
# kafka_time_out = kafka_result.get(timeout=10)
...
...
@@ -202,9 +202,9 @@ def get_info(sid,json_search,origin,url_,info_source_code,page):
# 'source': '1',
# }
# for nnn2 in range(0, 3):
# producer2 = KafkaProducer(bootstrap_servers=['1
14.115.159.144
:9092'])
# producer2 = KafkaProducer(bootstrap_servers=['1
.95.3.121
:9092'])
# try:
# # producer2 = KafkaProducer(bootstrap_servers=['1
14.115.159.144
:9092'])
# # producer2 = KafkaProducer(bootstrap_servers=['1
.95.3.121
:9092'])
# kafka_result2 = producer2.send("collectionAndDispatcherInfo",
# json.dumps(dic_info2, ensure_ascii=False).encode('utf8'))
# break
...
...
comData/weixin_solo/oneWeixin2.py
浏览文件 @
bcf9dc87
...
...
@@ -243,7 +243,7 @@ def get_info(dict_json, linkid):
}
for
nnn
in
range
(
0
,
3
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
7
,
0
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
7
,
0
))
kafka_result
=
producer
.
send
(
"crawlerInfo"
,
json
.
dumps
(
dic_info
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
kafka_time_out
=
kafka_result
.
get
(
timeout
=
10
)
# add_url(sid, url_news)
...
...
@@ -267,7 +267,7 @@ def get_info(dict_json, linkid):
}
for
nnn2
in
range
(
0
,
3
):
try
:
producer2
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
7
,
0
))
producer2
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
7
,
0
))
kafka_result2
=
producer2
.
send
(
"collectionAndDispatcherInfo"
,
json
.
dumps
(
dic_info2
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
break
...
...
comData/weixin_solo/oneWeixin_test.py
浏览文件 @
bcf9dc87
...
...
@@ -246,7 +246,7 @@ def get_info(dict_json, linkid):
log
.
info
(
dic_info
)
# for nnn in range(0, 3):
# try:
# producer = KafkaProducer(bootstrap_servers=['1
14.115.159.144
:9092'], api_version=(2, 7, 0))
# producer = KafkaProducer(bootstrap_servers=['1
.95.3.121
:9092'], api_version=(2, 7, 0))
# kafka_result = producer.send("crawlerInfo_test", json.dumps(dic_info, ensure_ascii=False).encode('utf8'))
# kafka_time_out = kafka_result.get(timeout=10)
# # add_url(sid, url_news)
...
...
@@ -270,7 +270,7 @@ def get_info(dict_json, linkid):
# }
# for nnn2 in range(0, 3):
# try:
# producer2 = KafkaProducer(bootstrap_servers=['1
14.115.159.144
:9092'], api_version=(2,7,0))
# producer2 = KafkaProducer(bootstrap_servers=['1
.95.3.121
:9092'], api_version=(2,7,0))
# kafka_result2 = producer2.send("collectionAndDispatcherInfo",
# json.dumps(dic_info2, ensure_ascii=False).encode('utf8'))
# break
...
...
comData/weixin_solo/机器人大会/oneWeixin2.py
浏览文件 @
bcf9dc87
# -*-
coding: utf-8 -*-
# -*-
coding: utf-8 -*-
...
...
@@ -19,7 +19,7 @@ sys.path.append('D:\\zzsn_spider\\base')
import
BaseCore
# todo 连接mongo
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'机器人分会'
]
...
...
@@ -37,7 +37,7 @@ rMonitor = redis.Redis(host='114.116.90.53', port=6380, password='clbzzsn', db=1
@retry
(
tries
=
2
,
delay
=
5
)
def
sendMonitorKafka
(
dic_news
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
7
,
0
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
7
,
0
))
kafka_result
=
producer
.
send
(
"data_lifecycle_log_data_crawler"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf-8'
))
log
.
info
(
'数据监控Kafka发送成功'
)
...
...
@@ -45,7 +45,7 @@ def sendMonitorKafka(dic_news):
@retry
(
tries
=
3
,
delay
=
5
)
def
sendKafka
(
result_dict
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
7
,
0
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
7
,
0
))
kafka_result
=
producer
.
send
(
"crawlerInfo"
,
json
.
dumps
(
result_dict
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
log
.
info
(
f
"{result_dict['title']}===发送kafka成功!"
)
...
...
comData/yhcj/yahoo_baseInfo.py
浏览文件 @
bcf9dc87
...
...
@@ -40,7 +40,7 @@ headers = {
# 发送kafka
@retry
(
delay
=
5
)
def
sendKafka
(
company_dict
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
api_version
=
(
2
,
0
,
2
))
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
api_version
=
(
2
,
0
,
2
))
# kafka_result = producer.send("regionInfo", json.dumps(company_dict, ensure_ascii=False).encode('utf8'))
kafka_result
=
producer
.
send
(
"enterpriseInfo"
,
json
.
dumps
(
company_dict
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
kafka_result
.
get
(
timeout
=
10
)
...
...
comData/yhcj/雅虎财经_企业动态.py
浏览文件 @
bcf9dc87
# 雅虎财
经企业动态获取
# 雅虎财
经企业动态获取
...
...
@@ -30,7 +30,7 @@ last_url = ''
# 发送kafka
@retry
(
tries
=
3
,
delay
=
5
)
def
sendKafka
(
dic_news
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
kafka_result
=
producer
.
send
(
"researchReportTopic"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
comData/微博采集/weibo.py
浏览文件 @
bcf9dc87
from
kafka
import
KafkaConsumer
from
kafka
import
KafkaConsumer
...
...
@@ -270,7 +270,7 @@ def get_content_by_user_uid(url, sid):
# for one_news_info in list_all_info: # 将每一个文章数据转换为json格式,把json文件用kafka发送出去
# for num_pro in range(0, 3):
# try:
# producer = KafkaProducer(bootstrap_servers=['1
14.115.159.144
:9092'])
# producer = KafkaProducer(bootstrap_servers=['1
.95.3.121
:9092'])
# kafka_result = producer.send("crawlerInfo",
# json.dumps(one_news_info, ensure_ascii=False).encode('utf8'))
# print(kafka_result.get(timeout=10))
...
...
@@ -288,8 +288,8 @@ def get_content_by_user_uid(url, sid):
def
consume
():
"""auto_commit_enable=True, auto_commit_interval_ms=3000"""
consumer
=
KafkaConsumer
(
"weiBoCrawl"
,
auto_offset_reset
=
'earliest'
,
group_id
=
"python_weibo"
,
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
])
# consumer = KafkaConsumer("pythonInfo", auto_offset_reset='earliest', bootstrap_servers=['1
14.115.159.144
:9092'])
bootstrap_servers
=
[
'1
.95.3.121
:9092'
])
# consumer = KafkaConsumer("pythonInfo", auto_offset_reset='earliest', bootstrap_servers=['1
.95.3.121
:9092'])
for
message
in
consumer
:
mes_dict
=
json
.
loads
(
message
.
value
.
decode
(
'utf-8'
))
# print(message.value.decode('utf-8'))
...
...
comData/福布斯榜单/get_employees.py
浏览文件 @
bcf9dc87
impor
t
json
impor
t
json
...
...
@@ -6,7 +6,7 @@ import pymongo
# url = "https://web.archive.org/web/20230702131549/https://www.forbes.com/lists/global2000/"
url
=
"https://web.archive.org/web/20220929184024/https://www.forbes.com/lists/global2000/"
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'福布斯企业人数'
]
headers
=
{
'Accept'
:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
,
...
...
comData/福布斯榜单/get_employees_2022.py
浏览文件 @
bcf9dc87
impor
t
json
impor
t
json
...
...
@@ -6,7 +6,7 @@ import requests
from
bs4
import
BeautifulSoup
from
retry
import
retry
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'2022年福布斯企业人数'
]
url
=
'https://web.archive.org/web/20220929184024/https://www.forbes.com/lists/global2000/'
headers
=
{
...
...
cpws/裁判文书网列表正文.py
浏览文件 @
bcf9dc87
impor
t
base64
impor
t
base64
...
...
@@ -228,7 +228,7 @@ def getProcessitem(bdetail):
def
sendkafka
(
processitem
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
"1
14.115.159.144
:9092"
)
producer
=
KafkaProducer
(
bootstrap_servers
=
"1
.95.3.121
:9092"
)
content
=
processitem
[
'content'
]
publishDate
=
str
(
processitem
[
'publishDate'
])
title
=
processitem
[
'title'
]
...
...
cpws/裁判文书网列表正文_redis.py
浏览文件 @
bcf9dc87
#codi
ng=utf-8
#codi
ng=utf-8
...
...
@@ -257,7 +257,7 @@ def getProcessitem(bdetail):
def
sendkafka
(
processitem
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
"1
14.115.159.144
:9092"
)
producer
=
KafkaProducer
(
bootstrap_servers
=
"1
.95.3.121
:9092"
)
content
=
processitem
[
'content'
]
publishDate
=
str
(
processitem
[
'publishDate'
])
title
=
processitem
[
'title'
]
...
...
enterprise_tyc/classtool.py
浏览文件 @
bcf9dc87
...
...
@@ -17,7 +17,7 @@ baseCore = BaseCore()
log
=
baseCore
.
getLogger
()
cnx
=
baseCore
.
cnx
cursor
=
baseCore
.
cursor
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'天眼查登录信息'
]
...
...
es拉取全球企业资讯/丝路商机剩余数据查询.py
浏览文件 @
bcf9dc87
impor
t
json
impor
t
json
...
...
@@ -18,7 +18,7 @@ from urllib.parse import unquote
baseCore
=
BaseCore
.
BaseCore
(
sqlFlg
=
False
)
log
=
baseCore
.
getLogger
()
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'新华丝路-丝路商机100+'
]
lock
=
threading
.
Lock
()
...
...
es拉取全球企业资讯/拉取不保留数据.py
浏览文件 @
bcf9dc87
impor
t
json
impor
t
json
...
...
@@ -17,7 +17,7 @@ from urllib.parse import unquote
baseCore
=
BaseCore
.
BaseCore
(
sqlFlg
=
False
)
log
=
baseCore
.
getLogger
()
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'7-17全球企业资讯删除数据'
]
lock
=
threading
.
Lock
()
...
...
es拉取全球企业资讯/拉取保留数据.py
浏览文件 @
bcf9dc87
impor
t
json
impor
t
json
...
...
@@ -17,7 +17,7 @@ from urllib.parse import unquote
baseCore
=
BaseCore
.
BaseCore
(
sqlFlg
=
False
)
log
=
baseCore
.
getLogger
()
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'新华丝路-丝路商机'
]
lock
=
threading
.
Lock
()
...
...
google_comm/config.ini
浏览文件 @
bcf9dc87
...
...
@@ -11,7 +11,7 @@ database=caiji
url
=
jdbc:mysql://114.115.159.144:3306/caiji?useUnicode=true&characterEncoding=utf-8&serverTimezone=Asia/Shanghai&useSSL=false
[kafka]
bootstrap_servers
=
1
14.115.159.144
:9092
bootstrap_servers
=
1
.95.3.121
:9092
topic
=
keyWordsInfo
groupId
=
python_google
...
...
gwzk/europa.py
浏览文件 @
bcf9dc87
...
...
@@ -21,7 +21,7 @@ db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017', username='ad
@retry
(
tries
=
2
,
delay
=
5
)
def
sendKafka
(
dic
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"research_center_fourth"
,
json
.
dumps
(
dic
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
log
.
info
(
f
'{dic["sourceAddress"]}传输成功'
)
...
...
gwzk/lhg.py
浏览文件 @
bcf9dc87
...
...
@@ -40,7 +40,7 @@ headers = {
@retry
(
tries
=
2
,
delay
=
5
)
def
sendKafka
(
dic
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"research_center_fourth"
,
json
.
dumps
(
dic
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
log
.
info
(
f
'{dic["sourceAddress"]}传输成功'
)
...
...
gwzk/ozll.py
浏览文件 @
bcf9dc87
...
...
@@ -37,7 +37,7 @@ headers = {
@retry
(
tries
=
2
,
delay
=
5
)
def
sendKafka
(
dic
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"research_center_fourth"
,
json
.
dumps
(
dic
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
log
.
info
(
f
'{dic["sourceAddress"]}传输成功'
)
...
...
gwzk/wtoorg.py
浏览文件 @
bcf9dc87
...
...
@@ -45,7 +45,7 @@ headers = {
@retry
(
tries
=
2
,
delay
=
5
)
def
sendKafka
(
dic
):
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"research_center_fourth"
,
json
.
dumps
(
dic
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
log
.
info
(
f
'{dic["sourceAddress"]}传输成功'
)
...
...
jrtt1news_comm/config.ini
浏览文件 @
bcf9dc87
...
...
@@ -11,7 +11,7 @@ database=caiji
url
=
jdbc:mysql://114.115.159.144:3306/caiji?useUnicode=true&characterEncoding=utf-8&serverTimezone=Asia/Shanghai&useSSL=false
[kafka]
bootstrap_servers
=
1
14.115.159.144
:9092
bootstrap_servers
=
1
.95.3.121
:9092
topic
=
keyWordsInfo
groupId
=
python_sougou
...
...
qiushi_leaderspeech.py
浏览文件 @
bcf9dc87
...
...
@@ -16,7 +16,7 @@ log = baseCore.getLogger()
r
=
redis
.
Redis
(
host
=
"114.115.236.206"
,
port
=
6379
,
password
=
'clbzzsn'
,
db
=
0
)
def
sendKafka
(
dic_news
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
14.115.159.144
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1
.95.3.121
:9092'
],
max_request_size
=
1024
*
1024
*
20
)
kafka_result
=
producer
.
send
(
"crawlerInfo"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
qqnews_comm/config.ini
浏览文件 @
bcf9dc87
...
...
@@ -11,7 +11,7 @@ database=caiji
url
=
jdbc:mysql://114.115.159.144:3306/caiji?useUnicode=true&characterEncoding=utf-8&serverTimezone=Asia/Shanghai&useSSL=false
[kafka]
bootstrap_servers
=
1
14.115.159.144
:9092
bootstrap_servers
=
1
.95.3.121
:9092
topic
=
keyWordsInfo
groupId
=
python_sougou
...
...
sougou_comm/config.ini
浏览文件 @
bcf9dc87
...
...
@@ -11,7 +11,7 @@ database=caiji
url
=
jdbc:mysql://114.115.159.144:3306/caiji?useUnicode=true&characterEncoding=utf-8&serverTimezone=Asia/Shanghai&useSSL=false
[kafka]
bootstrap_servers
=
1
14.115.159.144
:9092
bootstrap_servers
=
1
.95.3.121
:9092
topic
=
keyWordsInfo
groupId
=
python_sougou
...
...
souhunews_comm/config.ini
浏览文件 @
bcf9dc87
...
...
@@ -11,7 +11,7 @@ database=caiji
url
=
jdbc:mysql://114.115.159.144:3306/caiji?useUnicode=true&characterEncoding=utf-8&serverTimezone=Asia/Shanghai&useSSL=false
[kafka]
bootstrap_servers
=
1
14.115.159.144
:9092
bootstrap_servers
=
1
.95.3.121
:9092
topic
=
keyWordsInfo
groupId
=
python_sougou
...
...
test.py
浏览文件 @
bcf9dc87
差异被折叠。
点击展开。
test/classtool.py
浏览文件 @
bcf9dc87
差异被折叠。
点击展开。
test/test.py
浏览文件 @
bcf9dc87
差异被折叠。
点击展开。
toutiaonews_comm/config.ini
浏览文件 @
bcf9dc87
差异被折叠。
点击展开。
zkr/esToMongodb.py
浏览文件 @
bcf9dc87
差异被折叠。
点击展开。
zkr/翻译.py
浏览文件 @
bcf9dc87
差异被折叠。
点击展开。
中国外汇交易中心/spider_main.py
浏览文件 @
bcf9dc87
差异被折叠。
点击展开。
习近平讲话/1.py
浏览文件 @
bcf9dc87
差异被折叠。
点击展开。
国务院问答对处理/get_html.py
浏览文件 @
bcf9dc87
差异被折叠。
点击展开。
百度翻译/baidufanyi.py
浏览文件 @
bcf9dc87
差异被折叠。
点击展开。
百度翻译/fanyi_test.py
浏览文件 @
bcf9dc87
差异被折叠。
点击展开。
百度采集/baidu_comm/config.ini
浏览文件 @
bcf9dc87
差异被折叠。
点击展开。
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论