Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
Z
zzsn_spider
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
1
合并请求
1
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
丁双波
zzsn_spider
Commits
21aafa31
提交
21aafa31
authored
10月 11, 2024
作者:
XveLingKun
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
2024-10-11
上级
c20792ca
全部展开
显示空白字符变更
内嵌
并排
正在显示
9 个修改的文件
包含
34 行增加
和
29 行删除
+34
-29
612test.py
612test.py
+0
-0
RedisPPData.py
base/RedisPPData.py
+1
-1
CorePerson.py
comData/Tyc/CorePerson.py
+7
-7
CorePerson_Update.py
comData/Tyc/CorePerson_Update.py
+2
-2
europa.py
comData/dingzhi/europa.py
+3
-2
东方财富网-公告.py
comData/noticeReport/东方财富网-公告.py
+13
-9
ClassTool.py
comData/policylaw/ClassTool.py
+2
-2
fenzhijigou.py
enterprise_tyc/fenzhijigou.py
+3
-3
1.py
习近平讲话/1.py
+3
-3
没有找到文件。
612test.py
浏览文件 @
21aafa31
差异被折叠。
点击展开。
base/RedisPPData.py
浏览文件 @
21aafa31
...
...
@@ -164,7 +164,7 @@ def NoticeDF():
continue
# 获取全部美股企业
mg_query
=
"select
*
from sys_base_enterprise_ipo where category=7 and securities_code is not null and priority =1"
mg_query
=
"select
social_credit_code
from sys_base_enterprise_ipo where category=7 and securities_code is not null and priority =1"
cursor_
.
execute
(
mg_query
)
cnx_
.
commit
()
mg_result
=
cursor_
.
fetchall
()
...
...
comData/Tyc/CorePerson.py
浏览文件 @
21aafa31
...
...
@@ -63,8 +63,8 @@ def get_html(tycid, driver, headers):
@retry
(
tries
=
5
,
delay
=
2
)
def
get_page
(
url
,
s
,
headers
):
ip
=
baseCore
.
get_proxy
()
res
=
s
.
get
(
url
=
url
,
headers
=
headers
,
proxies
=
ip
,
timeout
=
(
5
,
10
))
#
res = s.get(url=url, headers=headers, verify=False)
#
res = s.get(url=url, headers=headers, proxies=ip, timeout=(5, 10))
res
=
s
.
get
(
url
=
url
,
headers
=
headers
,
verify
=
False
)
if
res
.
status_code
!=
200
:
raise
data_page
=
res
.
json
()
...
...
@@ -141,9 +141,9 @@ def doJob():
else
:
continue
# 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
#
item = baseCore.redicPullData('CorPersonEnterprise:gnqy_socialCode')
item
=
baseCore
.
redicPullData
(
'CorPersonEnterprise:gnqy_socialCode'
)
# 判断 如果Redis中已经没有数据,则等待
item
=
'9133000070471161XA'
#
item = '9133000070471161XA'
if
item
==
None
:
time
.
sleep
(
30
*
60
)
continue
...
...
@@ -499,10 +499,10 @@ def doJob():
continue
else
:
pass
response
=
requests
.
post
(
'http://114.115.236.206:8088/sync/executive'
,
data
=
json_updata
,
timeout
=
300
,
verify
=
False
)
response_
=
requests
.
post
(
'http://114.116.116.241:9098/userserver/sync/executive'
,
data
=
json_updata
,
timeout
=
300
,
response
=
requests
.
post
(
'http://1.95.72.34:8088/sync/executive'
,
data
=
json_updata
,
timeout
=
300
,
verify
=
False
)
response_
=
requests
.
post
(
'http://114.116.116.241:9098/userserver/sync/executive'
,
data
=
json_updata
,
timeout
=
300
,
verify
=
False
)
print
(
response
.
text
)
print
(
response_
.
text
)
log
.
info
(
'=========成功======'
)
...
...
comData/Tyc/CorePerson_Update.py
浏览文件 @
21aafa31
...
...
@@ -64,8 +64,8 @@ def get_html(tycid, driver, headers):
@retry
(
tries
=
5
,
delay
=
3
)
def
get_page
(
url
,
s
,
headers
):
ip
=
baseCore
.
get_proxy
()
res
=
s
.
get
(
url
=
url
,
headers
=
headers
,
proxies
=
ip
,
timeout
=
(
5
,
10
))
#
res = s.get(url=url, headers=headers, verify=False)
#
res = s.get(url=url, headers=headers, proxies=ip, timeout=(5, 10))
res
=
s
.
get
(
url
=
url
,
headers
=
headers
,
verify
=
False
)
if
res
.
status_code
!=
200
:
raise
data_page
=
res
.
json
()
...
...
comData/dingzhi/europa.py
浏览文件 @
21aafa31
...
...
@@ -16,7 +16,7 @@ from base import BaseCore
baseCore
=
BaseCore
.
BaseCore
()
log
=
baseCore
.
getLogger
()
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'国外智库'
]
@retry
(
tries
=
2
,
delay
=
5
)
...
...
@@ -105,7 +105,8 @@ def doJob():
'Accept-Encoding'
:
'gzip, deflate, br'
,
'Accept-Language'
:
'zh-CN,zh;q=0.9'
,
'Cache-Control'
:
'max-age=0'
,
'Cookie'
:
'JSESSIONID=BHezogPwi8NJVECsKXCXqijdQ00-yMJHw_gR8wiC.ip-10-240-5-121; __cf_bm=c2byUypnSjXPS_UFDM7BMRGDxN6AQEkNVUjzw9HuSq8-1707054653-1-AbbI7JWWkfWKVGi8SKI06f0jGEjPdk5kvHAIRRpBHSSSnmxj1IcvGUT8+/O6R0U2RLZJECZdUzZIXAwFuEz5lPo=; _gcl_au=1.1.201344533.1707054655; _gid=GA1.2.557164000.1707054655; cb-enabled=enabled; cf_clearance=6tK6.WKHJbXXoV4NTgbyHRhetRxMdWPZofwlv01F65Y-1707054656-1-AfrYlWnLLZFC1sKxeFVQintPrZnjvjoJSZwRRhAYwqRHGdWbU5IFZQDJZJM21l20Tj6gk4JxNobWT0wGzp1Dgjw=; _ce.irv=new; cebs=1; _ce.clock_event=1; _ce.clock_data=72
%2
C123.149.3.159
%2
C1
%2
C9c1ce27f08b16479d2e17743062b28ed; custom_cookie_AB=1; AWSALB=I/eGQ0glcxuROskD1JKEl/dqsqElpmo/MnwLboJZJB2QthQFFWnLA3gzuJTskEaZxJD7VuWEEsqjhLVvhq4q2Wt0RebuRhukeHpKvgmGMelxpn/RiDmehyvxTOiS; AWSALBCORS=I/eGQ0glcxuROskD1JKEl/dqsqElpmo/MnwLboJZJB2QthQFFWnLA3gzuJTskEaZxJD7VuWEEsqjhLVvhq4q2Wt0RebuRhukeHpKvgmGMelxpn/RiDmehyvxTOiS; _gat_UA-1887794-2=1; _dc_gtm_UA-136634323-1=1; _ga_F5XZ540Q4V=GS1.1.1707054655.1.1.1707055119.7.0.0; _ga=GA1.1.1014316406.1707054655; _ga_F7KSNTXTRX=GS1.1.1707054655.1.1.1707055119.0.0.0; cebsp_=5; _ce.s=v~212f033193b9432855ae8335d6d3969cc1f8b751~lcw~1707055134688~lva~1707054658247~vpv~0~v11.fhb~1707054659602~v11.lhb~1707055126493~v11.cs~325107~v11.s~6d7ba630-c364-11ee-aba8-136dbbf9a447~v11.sla~1707055134688~v11.send~1707055135439~lcw~1707055135439'
,
# 'Cookie': 'JSESSIONID=BHezogPwi8NJVECsKXCXqijdQ00-yMJHw_gR8wiC.ip-10-240-5-121; __cf_bm=c2byUypnSjXPS_UFDM7BMRGDxN6AQEkNVUjzw9HuSq8-1707054653-1-AbbI7JWWkfWKVGi8SKI06f0jGEjPdk5kvHAIRRpBHSSSnmxj1IcvGUT8+/O6R0U2RLZJECZdUzZIXAwFuEz5lPo=; _gcl_au=1.1.201344533.1707054655; _gid=GA1.2.557164000.1707054655; cb-enabled=enabled; cf_clearance=6tK6.WKHJbXXoV4NTgbyHRhetRxMdWPZofwlv01F65Y-1707054656-1-AfrYlWnLLZFC1sKxeFVQintPrZnjvjoJSZwRRhAYwqRHGdWbU5IFZQDJZJM21l20Tj6gk4JxNobWT0wGzp1Dgjw=; _ce.irv=new; cebs=1; _ce.clock_event=1; _ce.clock_data=72%2C123.149.3.159%2C1%2C9c1ce27f08b16479d2e17743062b28ed; custom_cookie_AB=1; AWSALB=I/eGQ0glcxuROskD1JKEl/dqsqElpmo/MnwLboJZJB2QthQFFWnLA3gzuJTskEaZxJD7VuWEEsqjhLVvhq4q2Wt0RebuRhukeHpKvgmGMelxpn/RiDmehyvxTOiS; AWSALBCORS=I/eGQ0glcxuROskD1JKEl/dqsqElpmo/MnwLboJZJB2QthQFFWnLA3gzuJTskEaZxJD7VuWEEsqjhLVvhq4q2Wt0RebuRhukeHpKvgmGMelxpn/RiDmehyvxTOiS; _gat_UA-1887794-2=1; _dc_gtm_UA-136634323-1=1; _ga_F5XZ540Q4V=GS1.1.1707054655.1.1.1707055119.7.0.0; _ga=GA1.1.1014316406.1707054655; _ga_F7KSNTXTRX=GS1.1.1707054655.1.1.1707055119.0.0.0; cebsp_=5; _ce.s=v~212f033193b9432855ae8335d6d3969cc1f8b751~lcw~1707055134688~lva~1707054658247~vpv~0~v11.fhb~1707054659602~v11.lhb~1707055126493~v11.cs~325107~v11.s~6d7ba630-c364-11ee-aba8-136dbbf9a447~v11.sla~1707055134688~v11.send~1707055135439~lcw~1707055135439',
'Cookie'
:
'__cf_bm=d9hIc2bALTgTBZ64CyxHwuWXuAZmsBuh5CakctSWeP0-1728549696-1.0.1.1-6XP3FXhlXvLTp0Bgcnhh00_7UcjUmV9KlVd6Zr5jbUVcZiwH4qM9suuA_1f181EMaZ2drTFJVLBGwS27V98VGg; JSESSIONID=MiizoFg43W81UlgdhIa0nXXsTwxJoYreIo6ZAgc1.ip-10-240-5-72; _gcl_au=1.1.472576737.1728549714; _ga_F5XZ540Q4V=GS1.1.1728549714.1.0.1728549714.60.0.0; _gid=GA1.2.2100045182.1728549715; _ga_F7KSNTXTRX=GS1.1.1728549714.1.0.1728549714.0.0.0; _ga=GA1.1.507449157.1728549715; cb-enabled=enabled; AWSALB=ycwWtvr4GmtlXD8YwugYRc9pfc68sD/lVUvwOaJNcSBtyoLxAQMeqeEz5IuiDVbsKzd6pKkCIhBEE1UcGoORiD70DwsEPrEAQgDZ3OuZodS0BG0g1OYO1vvhUClf; AWSALBCORS=ycwWtvr4GmtlXD8YwugYRc9pfc68sD/lVUvwOaJNcSBtyoLxAQMeqeEz5IuiDVbsKzd6pKkCIhBEE1UcGoORiD70DwsEPrEAQgDZ3OuZodS0BG0g1OYO1vvhUClf; custom_cookie_AB=2; cf_clearance=q8LbD2iMK8HvCfC8EC0ZxocKplGL_1yI7mPEDZx5FM0-1728549715-1.2.1.1-n7xc_Cop0OGUMyp2wTnL.YPlw71QrDsks_alvUMeYQXTShaMFiSIcJyDLbrGj.Bb_TbpRMaSE999joCsuSDRXsCPmIuEFZGhR3qaJt5a1EKeWRLQbf4IcRKLCwQTj3O3eITWLIPJmisWYQxNp0Rm6gLNDEM6zOvFkhkuwfaJP7Taj9JQ4eeAajhmhiGpijEiE3PCc499D1f_PAP09Y8uMRqj_YZGD99IBxoG1gBlRylFlUa9riPqWs7vaD6IFFwFEyeuc3XzC3Nl0TiYDj9sBiG8Us9jeZgWwXm2J7E.MJQKeQBM1utSKAEsVW3Lhs8emraPJYaDLzSW7MtMZ8C1m93z0dRSOtjEtzL7Cz5IL7wm3qSY2MqAug.Y2f3eXKb23Oxe6ebcO1vJRK5YHYyQdOV1by5J0t5oKQ5iRrVq1XBUiYYMX.e8lWy6rD1WO.qP'
,
'Referer'
:
'https://www.oecd-ilibrary.org/economics/oecd-policy-responses-on-the-impacts-of-the-war-in-ukraine_dc825602-en?page=2'
,
'Sec-Ch-Ua'
:
'"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"'
,
'Sec-Ch-Ua-Mobile'
:
'?0'
,
...
...
comData/noticeReport/东方财富网-公告.py
浏览文件 @
21aafa31
impor
t
os
impor
t
os
...
...
@@ -399,7 +399,7 @@ if __name__ =='__main__':
start_time
=
time
.
time
()
# 获取企业信息
# social_code = baseCore.redicPullData('NoticeEnterprise:mgqy_socialCode_add')
social_code
=
'Z
ZSN22080900000046
'
social_code
=
'Z
D0CN0012309068194
'
if
not
social_code
:
time
.
sleep
(
20
)
continue
...
...
@@ -417,8 +417,8 @@ if __name__ =='__main__':
else
:
# log.info(f'数据库中无该企业{social_code}')
sql
=
f
"SELECT * FROM sys_base_enterprise_ipo WHERE social_credit_code = '{social_code}' and category=7 and securities_code is not null and priority=1"
cursor
.
execute
(
sql
)
data
=
cursor
.
fetchall
()
cursor
_
.
execute
(
sql
)
data
=
cursor
_
.
fetchone
()
if
data
:
pass
else
:
...
...
@@ -430,17 +430,21 @@ if __name__ =='__main__':
Category
=
data
[
6
]
Exchange
=
data
[
7
]
sql_baseinfo
=
f
"SELECT * FROM sys_base_enterprise WHERE social_credit_code = '{social_code}'"
cursor
.
execute
(
sql_baseinfo
)
data_baseinfo
=
cursor
.
fetchone
()
cursor
_
.
execute
(
sql_baseinfo
)
data_baseinfo
=
cursor
_
.
fetchone
()
if
data_baseinfo
:
pass
CompanyName
=
data_baseinfo
[
3
]
EnglishName
=
data_baseinfo
[
34
]
countryName
=
data_baseinfo
[
39
]
if
CompanyName
:
pass
else
:
continue
# 写入数据库
insert
=
"INSERT INTO EnterpriseInfo(CompanyName, SocialCode, SecuritiesCode, SecuritiesShortName, EnglishName, SecuritiesType, Category, Exchange, countryName) VALUES (
%
s,
%
s)"
cursor
_
.
execute
(
insert
,
(
CompanyName
,
social_code
,
SecuritiesCode
,
SecuritiesShortName
,
EnglishName
,
SecuritiesType
,
Category
,
Exchange
,
countryName
))
cnx
_
.
commit
()
insert
=
"INSERT INTO EnterpriseInfo(CompanyName, SocialCode, SecuritiesCode, SecuritiesShortName, EnglishName, SecuritiesType, Category, Exchange, countryName) VALUES (
%
s,
%
s
,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s
)"
cursor
.
execute
(
insert
,
(
CompanyName
,
social_code
,
SecuritiesCode
,
SecuritiesShortName
,
EnglishName
,
SecuritiesType
,
Category
,
Exchange
,
countryName
))
cnx
.
commit
()
com_name
=
CompanyName
code
=
SecuritiesCode
...
...
comData/policylaw/ClassTool.py
浏览文件 @
21aafa31
...
...
@@ -24,7 +24,7 @@ class ClassTool():
def
__init__
(
self
):
self
.
taskType
=
'政策法规'
self
.
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
self
.
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'国务院_国资委_copy1'
]
self
.
driver_path
=
r'D:\cmd100\chromedriver.exe'
...
...
@@ -93,7 +93,7 @@ class ClassTool():
def
sendKafka
(
self
,
dic_news
):
try
:
# 114.116.116.241
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1.95.
3.121:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1.95.
78.131:9092'
],
max_request_size
=
1024
*
1024
*
20
,
api_version
=
(
2
,
5
,
0
)
)
kafka_result
=
producer
.
send
(
"policy"
,
json
.
dumps
(
dic_news
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
enterprise_tyc/fenzhijigou.py
浏览文件 @
21aafa31
...
...
@@ -30,8 +30,8 @@ def getJson(url, headers, s):
ip
=
baseCore
.
get_proxy
()
log
.
info
(
f
'当前使用的ip是{ip}'
)
# req = requests.get(url, headers=headers, timeout=20)
req
=
s
.
get
(
url
,
headers
=
headers
,
proxies
=
ip
,
timeout
=
(
5
,
10
))
#
req = s.get(url, headers=headers, timeout=(5, 10))
#
req = s.get(url, headers=headers, proxies=ip, timeout=(5, 10))
req
=
s
.
get
(
url
,
headers
=
headers
,
timeout
=
(
5
,
10
))
dataJson
=
req
.
json
()
if
dataJson
[
'errorCode'
]
!=
0
:
raise
...
...
@@ -116,7 +116,7 @@ def doJob():
dics
.
append
(
dic
)
log
.
info
(
f
'{socialCreditCode}==={tycId}===共采集{len(dics)}条记录'
)
if
dics
:
req
=
sendData
(
'http://1
14.115.236.206
:8088/sync/branch'
,
dics
)
req
=
sendData
(
'http://1
.95.72.34
:8088/sync/branch'
,
dics
)
log
.
info
(
f
'{socialCreditCode}==={req.text}'
)
takeTime
=
baseCore
.
getTimeCost
(
start
,
time
.
time
())
log
.
info
(
f
'{socialCreditCode}==={req.text}===耗时{takeTime}'
)
...
...
习近平讲话/1.py
浏览文件 @
21aafa31
# -*-
coding: utf-8 -*-
# -*-
coding: utf-8 -*-
...
...
@@ -12,7 +12,7 @@ from kafka import KafkaProducer
from
requests.packages
import
urllib3
from
datetime
import
datetime
,
timedelta
urllib3
.
disable_warnings
()
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
14.115.221.202
:27017'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'人民网-习讲话数据库_copy'
]
db_storage
=
pymongo
.
MongoClient
(
'mongodb://1
.95.69.135
:27017'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'人民网-习讲话数据库_copy'
]
def
newsdata
(
art_content_dict
,
art_type_dict
,
dic_lables
):
for
key
,
value
in
art_content_dict
.
items
():
...
...
@@ -61,7 +61,7 @@ def newsdata(art_content_dict,art_type_dict,dic_lables):
del
post_dict
[
'tags'
]
del
post_dict
[
'title_pd'
]
# 发送kafka
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1.95.
3.121:9092'
],
max_request_size
=
1024
*
1024
*
20
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'1.95.
78.131:9092'
],
max_request_size
=
1024
*
1024
*
20
,
api_version
=
(
2
,
5
,
0
)
)
kafka_result
=
producer
.
send
(
"research_center_fourth"
,
json
.
dumps
(
post_dict
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论