Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
Z
zzsn_spider
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
王景浩
zzsn_spider
Commits
b52e4502
提交
b52e4502
authored
2月 26, 2024
作者:
薛凌堃
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
2/26
上级
ca40e9aa
隐藏空白字符变更
内嵌
并排
正在显示
6 个修改的文件
包含
252 行增加
和
45 行删除
+252
-45
研究中心需更新企业.py
base/研究中心需更新企业.py
+62
-0
test.py
comData/BaseInfo_qcc/test.py
+15
-17
test_1.py
comData/BaseInfo_qcc/test_1.py
+36
-0
zyqmshggldxzhy19.py
comData/important_meeting/zyqmshggldxzhy19.py
+69
-24
get_tokenCookies.py
comData/weixin_solo/get_tokenCookies.py
+1
-1
test.py
test.py
+69
-3
没有找到文件。
base/研究中心需更新企业.py
0 → 100644
浏览文件 @
b52e4502
from
apscheduler.schedulers.blocking
import
BlockingScheduler
from
apscheduler.schedulers.blocking
import
BlockingScheduler
import
pandas
as
pd
import
redis
def
putCom
():
com_list
=
[
'91210000558190456G'
,
'914200001000115161'
,
'911100007109310534'
,
'9111000071093123XX'
,
'91110000100017643K'
,
'91110000100018267J'
,
'91110000MA01P657XY'
,
'91230100127057741M'
,
'91440300190346175T'
,
'ZZSN22083000000003'
,
'91110000400000720M'
,
'911100001055722912'
,
'91110000100005220B'
,
'911100001000094165'
,
'91310000132200821H'
,
'911100001000128855'
,
'91110000710924910P'
,
'91110000710924929L'
,
'911100007109225442'
,
'9111000071092649XU'
,
'91310000MA1FL70B67'
,
'911100007109311097'
,
'912201011239989159'
,
'911100007178306183'
,
'91310000MA7ALG04XG'
,
'91110000100017707H'
,
'91110000710929498G'
,
'91110000100010249W'
,
'9151000062160427XG'
,
'91310000MA1FL4B24G'
,
'91110000400001889L'
,
'9144030010001694XX'
,
'91110000100000825Q'
,
'91110000100006194G'
,
'91110000717828315T'
,
'91110000100001043E'
,
'91110000MA005UCQ5P'
,
'91110000710935732K'
,
'91110000710930392Y'
,
'91110000710930296M'
,
'911100007109303176'
,
'91110000710925243K'
,
'91110000100014071Q'
,
'91110000100009563N'
,
'9111000071093107XN'
,
'9111000010001002XD'
,
'91110000100001852R'
,
'91110000100001625L'
,
'911100001000080343'
,
'91110000400008060U'
,
'91110000101699383Q'
,
'91110000100000489L'
,
'9111000071092868XL'
,
'91110000100001035K'
,
'911100004000011410'
,
'91110000710933809D'
,
'91110000100010310K'
,
'91133100MABRLCFR5Q'
,
'91110000MA001HYK9X'
,
'911100001000016682'
,
'911100007109279199'
,
'12100000400010275N'
,
'91110000710935636A'
,
'91110000100024800K'
,
'9144000076384341X8'
,
'91440000100005896P'
,
'91110000MA01W8B394'
,
'91110000717830650E'
,
'91110000100003057A'
,
'ZZSN22061600000001'
,
'91310000MA1FL0LX06'
,
'9111000010169286X1'
,
'91110000100010433L'
,
'91110000100010660R'
,
'91110000102016548J'
,
'91110000100001676W'
,
'9111000071092200XY'
,
'91133100MA0G9YKT8B'
,
'9111000010000093XR'
,
'91110000100006485K'
,
'91360702MA7FK4MR44'
,
'91420100MA4L0GG411'
,
'91110000101625149Q'
,
'12100000400006022G'
,
'912302001285125661'
,
'91110000100005888C'
,
'911100007109250324'
,
'91110000100024915R'
,
'9111000040000094XW'
,
'91310000MA1FL1MMXL'
,
'91110000100015058K'
,
'91110000710929930X'
,
'91133100MA0GBL5F38'
,
'9111000010000085X6'
,
'91110000101100414N'
]
df
=
pd
.
read_excel
(
'D:
\\
企业数据
\\
数据组提供
\\
国内企业.xlsx'
)
# 连接到Redis数据库
r
=
redis
.
Redis
(
host
=
"114.115.236.206"
,
port
=
6379
,
password
=
'clbzzsn'
,
db
=
6
)
for
i
in
range
(
len
(
df
)):
social_code
=
df
[
'social_code'
][
i
]
com_name
=
df
[
'name'
][
i
]
# print(social_code)
if
social_code
in
com_list
:
pass
else
:
if
'ZZSN'
in
social_code
or
'ZD'
in
social_code
:
continue
else
:
item
=
social_code
+
'|'
+
com_name
r
.
rpush
(
'UpdateBasdeInfo:SocialCode_CompanyName'
,
item
)
def
putCom_task
():
# 实例化一个调度器
scheduler
=
BlockingScheduler
()
# 每个月执行一次
scheduler
.
add_job
(
putCom
,
'cron'
,
day
=
1
,
hour
=
0
,
minute
=
0
)
try
:
# redisPushData # 定时开始前执行一次
# putCom()
scheduler
.
start
()
except
Exception
as
e
:
print
(
'定时采集异常'
,
e
)
pass
if
__name__
==
'__main__'
:
putCom_task
()
\ No newline at end of file
comData/BaseInfo_qcc/test.py
浏览文件 @
b52e4502
import
pandas
as
pd
# from pandas import DataFrame as df
import
pymysql
import
redis
# 连接到Redis
cnx
=
pymysql
.
connect
(
host
=
'114.116.44.11'
,
user
=
'caiji'
,
password
=
'f7s0&7qqtK'
,
db
=
'clb_project'
,
charset
=
'utf8mb4'
)
r
=
redis
.
Redis
(
host
=
"114.115.236.206"
,
port
=
6379
,
password
=
'clbzzsn'
,
db
=
6
)
import
urllib3
urllib3
.
disable_warnings
(
urllib3
.
exceptions
.
InsecureRequestWarning
)
with
cnx
.
cursor
()
as
cursor
:
select
=
"""select relationName, relationId from klb_company"""
cursor
.
execute
(
select
)
results
=
cursor
.
fetchall
()
for
result
in
results
:
name
=
result
[
0
]
xydm
=
result
[
1
]
item
=
f
'{name}|{xydm}'
r
.
rpush
(
'SousuoBaidu:companyname'
,
cell_value
)
# 列表名称
list_name
=
'BaseInfoEnterpriseMz:gnqy_socialCode'
# 获取列表中的所有元素
elements
=
r
.
lrange
(
list_name
,
0
,
-
1
)
# 遍历列表中的元素
for
element
in
elements
:
# 获取元素在列表中的数量
count
=
r
.
lrem
(
list_name
,
0
,
element
)
# 如果数量大于1,说明有重复值,删除多余的重复值
if
count
>
1
:
r
.
lrem
(
list_name
,
count
-
1
,
element
)
# 打印处理后的列表
print
(
r
.
lrange
(
list_name
,
0
,
-
1
))
comData/BaseInfo_qcc/test_1.py
0 → 100644
浏览文件 @
b52e4502
import
pandas
as
pd
# from pandas import DataFrame as df
import
pymysql
cnx
=
pymysql
.
connect
(
host
=
'114.116.44.11'
,
user
=
'caiji'
,
password
=
'f7s0&7qqtK'
,
db
=
'clb_project'
,
charset
=
'utf8mb4'
)
import
urllib3
urllib3
.
disable_warnings
(
urllib3
.
exceptions
.
InsecureRequestWarning
)
# df_all = pd.read_excel('D:\\企业数据\\数据组提供\\国内企业.xlsx', dtype=str)
with
cnx
.
cursor
()
as
cursor
:
select
=
"""select relationName, relationId from klb_company"""
cursor
.
execute
(
select
)
results
=
cursor
.
fetchall
()
# print(results)
for
result
in
results
:
name
=
result
[
0
]
xydm
=
result
[
1
]
with
cnx
.
cursor
()
as
cursor
:
update
=
'''update sys_base_enterprise set name =
%
s where name is null and social_credit_code =
%
s'''
cursor
.
execute
(
update
,
(
name
,
xydm
))
cnx
.
commit
()
print
(
f
'{name}==={xydm}更新完成'
)
# list_xydm = ['91110000102017145R','911100001021096991','9111000010285973X7','91110000108283057Y','911100003180821571','91110000322283429E','91110000336431162N','911100005620857121','911100006000036940','911100006000107204','911100006000348885','911100006004827014','911100006337095702','91110000633713369X','911100006337942853','9111000063379674X4','9111000066990444XF','91110000672354637K','91110000700004889C','91110000700006921H','91110000700049024C','91110000700084217T','911100007001499141','9111000070038501XJ','91110000710923360K','91110000710924945A','911100007109255774','911100007177242684','91110000723951109B','91110000726360320G','91110000726362190T','911100007263731643','911100007423131451','91110000754166859U','91110000767525590U','91110000771589298U','911100007776681570','91110000783967006U','91110000802062000J','91110101101100895C','91110101335453570K','91110101355304193A','91110101579007657G','91110101783962889A','911101020592352188','911101021011011341','91110102634381829U','91110102674290067J','91110102685772854R','9111010278170742XX','91110102MA01FFJ36J','91110105051390889B','91110105101756720B','91110105306495333L','91110105306737662D','911101053179472352','91110105318247193G','91110105335500066Q','9111010535131161X2','91110105357967759L','91110105397625067T','91110105400614650L','91110105562128137P','91110105575219505U','911101055790551576','91110105585848161G','911101055938354164','911101055977289680','91110105600015572M','91110105625911031F','911101056336607540','91110105664618436J','91110105669928206J','91110105672840619D','91110105679620184F','911101056876404680','91110105690843864U','91110105726334827M','91110105756025873C','91110105756700197H','9111010576143898XE','91110105764202737L','91110105777670900X','91110105783991313X','911101057877635020','91110105790696320H','91110105801719541B','9111010580171955X3','9111010580177089XM','91110105802095822J','91110105MA002Q6M79','91110105MA003RD50R','91110105MA004C0H06','91110105MA00AGXN3L','91110105MA00FJHN72','91110105MA01AEWR5C','91110105MA01L9PH51','911101060695678147','911101060741434189','91110106101133080K','91110106306572212M','91110106351301243L','91110106567475437Y','911101065768942978','91110106585840012D','91110106587714554K','91110106593832696G','91110106633760720H','91110106633764772R','91110106663111019U','9111010667059416X2','91110106675098771D','911101066932508023','91110106749395454K','911101067533312850','9111010676504112XW','911101067855339571','911101068022066683','91110106MA0056B19T','91110106MA005DBW1G','91110106MA01P1RE3Y','91110107102288949G','91110107587683145R','91110107MA009GQ72T','911101080513793057','91110108061322142F','911101080627636876','91110108062782191G','911101080628016980','911101080649193741','91110108067265302X','91110108074122078Y','911101080765656577','91110108078505359A','91110108078545633M','911101080785732550','91110108089647010H','911101080918560737','91110108093369842B','91110108096441731D','911101080984827059','91110108099067984A','91110108099442801R','9111010810110401X3','911101081011420915','91110108101609659C','91110108101880422A','911101081020223907','9111010810202736X2','91110108102094378J','911101083065093288','91110108306623614J','91110108318056936P','91110108318058456U','9111010831813798XE','91110108327142377N','911101083271749266','911101083272391527','91110108335481926M','91110108335562435H','911101083363962058','911101083398292057','91110108344290793F','91110108344314759F','911101083443180558','911101083443783306','91110108344403743F','91110108355313321X','91110108400001643B','91110108551427625G','91110108554837179A','91110108554890762H','91110108560358422K','91110108560385447N','91110108560432856H','91110108562135265P','91110108563622495U','91110108565780884D','9111010856749593XU','911101085694855326','911101085694925139','91110108569524423F','911101085712035817','9111010857128414X8','911101085712845102','91110108576914390R','91110108576914817K','911101085790313156','911101085844819439','91110108585861972A','9111010858587583XQ','91110108587665983J','911101085890746187','91110108590662476F','911101085923662400','9111010859963405XW','91110108599644434U','91110108599663854W','911101086000694820','911101086003726929','91110108600404359L','91110108633708906M','91110108660513776K','91110108661550528Q','911101086615579497','91110108662151975E','91110108662170324C','911101086621777295','911101086631036849','911101086631154075','91110108663124944W','91110108663136638D','91110108664619674E','911101086656289355','91110108666258040N','911101086684483666','91110108671727577D','91110108672826657J','911101086738170589','9111010867662354XX','911101086766404898','91110108679604408D','91110108679611421U','911101086796241695','911101086804563776','91110108682894987G','911101086835621402','91110108686919328W','911101086883662373','91110108690011590J','91110108693213091F','911101086950387332','9111010869504894XN','91110108696323261L','91110108699627252X','91110108700235062K','91110108718777804Y','91110108718785556J','91110108722617934K','9111010872357215XK','91110108723952478G','911101087263410239','91110108733464566A','911101087355893625','9111010873559070X0','91110108735591489G','91110108737656338N','911101087376669155','91110108740421820F','91110108746113570P','91110108746729965F','91110108752161931Y','91110108753327825C','9111010875333972X7','9111010875467591XH','91110108754681201W','91110108758242935T','911101087582455976','911101087601419302','9111010876142254XU','91110108762181186P','911101087629781362','911101087635308194','91110108766287121Q','911101087684682847','91110108768471723F','91110108769354705D','91110108769356188B','91110108769900489W','911101087704233332','91110108770425654N','91110108771981556U','91110108771986242H','91110108773361465H','91110108773369432Y','911101087740615606','91110108774714285P','91110108775491714G','91110108776352708P','91110108777650264L','911101087776681301','911101087795289672','9111010878020592XF','91110108780217285R','91110108780238166U','91110108780955384Y','91110108780964686N','91110108781703664R','91110108782543551R','91110108783218849X','911101087839528242','911101087861701904','91110108790650445R','911101087921006070','911101087934019542','91110108794082078E','911101087951375794','91110108797552733T','91110108798525948B','911101087990254941','911101088011707638','91110108802021110U','911101088020333577','91110108802041787A','91110108802045657E','91110108802068007C','911101088020726207','91110108802109673L','91110108MA001N718J','91110108MA0021P69M','91110108MA002XL790','91110108MA003LNY5D','91110108MA003TAB64','91110108MA003YWP4D','91110108MA0043KP9E','91110108MA004F704R','91110108MA004LW69T','91110108MA004RAE05','91110108MA0068GY1F','91110108MA006K8Y3P','91110108MA0071CR55','91110108MA007H3P5K','91110108MA0086HR6G','91110108MA008DA429','91110108MA008HB66A','91110108MA008P9657','91110108MA008PK575','91110108MA0092QT4X','91110108MA00AGM13W','91110108MA00AU927M','91110108MA00DCJ01Y','91110108MA00DE1B2B','91110108MA00FA7E5C','91110108MA00GUD41A','91110108MA0188DW84','91110108MA018J4L08','91110108MA018MCC6M','91110108MA01BBB16K','91110108MA01BP1P7B','91110108MA01C8JR79','91110108MA01DMU77F','91110108MA01DNC75B','91110108MA01EGPQX2','91110108MA01RCWH0M','91110108MA01RWUG4Y','91110108MA01WQE10K','91110109330285061E','91110109567452606A','91110109590674493W','9111011159606037XJ','911101116812383633','91110111700001063P','91110111MA003JG31Y','91110111MA01L2H65N','91110112551358631R','91110112700216160K','911101127177330338','911101127400501696','911101127415832828','9111011276218407XN','91110112766758720D','911101127889851669','9111011279904576XL','91110113080516727E','91110113306541555R','91110113576855941L','911101136812208172','91110113696302276M','91110113741581703F','91110113752642938G','91110113756000350K','911101137577358263','91110113762992739Q','91110113MA001GWR0M','9111011408549335X2','9111011455135477XA','91110114582515556F','91110114589114325P','91110114590663348R','91110114600067778R','911101146796092682','911101146804798353','91110114682851688K','91110114685107782U','911101146900106275','911101147226688971','911101147426127944','91110114744716255J','9111011475010452XE','91110114750144214X','911101147667528632','911101147770556682','91110114MA001D4X3K','91110114MA01AEDF61','911101151016193470','911101151029162045','91110115576904205N','91110115733451490U','91110115736468984G','91110115746112690C','91110115MA0048EL1E','91110115MA017K5L4X','91110116064905925Y','911101165996396434','91110116767502874D','91110116MA005B3L58','91110116MA01C0AY5K','91110117330386452K','911102283272479535','911102283512805187','91110228582505681F','9111022867876096X3','911102287177842959','91110228754175237Y','91110228MA006GMF6R','9111030205136463XD','91110302053604529E','91110302057391444C','9111030210221806X9','911103021022784175','91110302306784047Q','911103025604366893','91110302565797010A','91110302565820110R','911103026003405002','91110302677444199R','911103026787533566','911103026857985287','91110302735090430Y','911103027493533932','911103027493534308','9111030276350109XG','9111030278250283XW','91110302801786752A','91110302MA0048YP1U','91110302MA005FFW29','91110302MA0066E64R','91110302MA008RUM5Y','91110302MA00AR3F76','91110302MA00B9G54G','91110302MA00B9MQ4G','91110302MA00BJ6B78','91110302MA00G8EH41','91110302MA00GQGB73','91110302MA00GRMLX4','91110302MA01AAXW1T','91110302MA01HEH15A','91110400MA029M4P80','91120000058736889L','91120000103069967Y','91120000103870914U','91120000741366579H','91120000761253280R','911200007676306733','91120000MA06F32U06','911201046630720486','91120104789385824Y','911201048034181441','91120110083028075A','91120110300659413H','91120111103789059M','911201116847488286','91120111697419046H','91120111722991870E','91120111741361313C','911201117925370324','91120112064042488E','91120112093771153W','911201127803488406','91120112MA05WM7M02','91120113079635948K','91120113660321205C','911201137303863474','91120113783335092P','911201160587336021','911201160612051730','91120116086586515N','91120116103481433E','91120116239661863L','91120116239663439U','91120116300452033U','911201163409833307','9112011655651308XJ','91120116562678278A','91120116586419887T','91120116592916759Q','91120116600910892X','911201166630834172','91120116671457175N','91120116675967105W','91120116697408240K','91120116712934952M','91120116718278597H','911201167244641345','91120116730357968N','911201167328190464','91120116735474530F','91120116746652267N','911201167491124502','91120116758137027D','91120116764348197P','911201167803339648','911201167833047124','91120116794980409G','911201167972829995','91120116MA05PQB5XT','91120116MA069EXE4T','91120116MA06DRM4XY','91120116MA0705BL96','911201186670532667','91120118735488182M','91120118762158867F','91120118MA05JQUK0G','91120118MA05QFTE3C','91120118MA05T81X8A','91120118MA0697LP9T','91120118MA06T62187','911202216877459052','91120222566105610W','91120222575108434H','911202227706300842','91120222MA05KHKY2P','91120222MA05UAG55H','91120223600894351U','91120223761280668D','91120224300621490X','911202245661215811','91120224681877747F','91120224700557176T','91120224MA07871882','9113000023565800XC','91130000752446136W','911301001044060055','91130100107744755W','91130100601090291K','91130100689298985P','91130100732910720N','91130100732914772Y','91130100745411306F','911301007468556979','91130100754027891A','91130100776179546U','91130100787019708G','91130101678512755X','91130101789833818T','91130104784084838J','911301257898318475','91130132MA0A7AYE2H','91130181791385313K','91130181791386236G','91130182685711699G','91130182791357005D','91130183575506723L','911301836870224839','911301837713256634','911301847216647980','911301856652827511','91130185669060689W','911301857233544863','9113018576519998X9','91130193074894510E','911301931078905417','911302005661986189','911302006799397935','9113020068276818X4','91130204347873513P','91130224666556267M','91130225MA07U3734B','91130229721600380L','91130281MA07KE3A17','91130282554499915A','91130282750290545E','91130282MA07P2E981','91130283052683448M','91130283601019508G','91130293096112137N','911302936746855014','91130293774420041F','91130293796568127H','91130294308381129A','91130300601108025E','91130301329656355R','91130301601147147J','91130301678536714D','9113030168136727XL','91130301MA08XEAB6Q','911303035673840924','91130303673240113T','91130392601151496U','91130400730275049G','91130400MA08CCBX29','91130405748493781M','91130407MA0CUE7R5R','91130408MA08XFQJ61','911304246843413669','91130424757510432X','91130426699207653P','91130429679913817F','91130434MA07R66J0A','91130435564863776A','911304816690569897','91130481721643479D','91130500769806003D','91130501693478268W','91130525721609633G','911305287343786273','91130528737368715C','91130528743430458K','91130532723397101T','91130582095633598M','911306007006711044','911306055728149239','91130605596834603J','91130605601201668M','911306056746516436','91130606550419199D','91130637752422695J','9113070556195375XQ','91130705769821035L','911307317870236272','91130800757548430L','91130802728832010D','91130803MA0CMRHN8L','91130824771327626D','9113090010971869X1','91130900670338967F','91130900700660368J','91130900765171063F','911309007681306540','91130900779198582P','91130900791398851A','911309033200553935','91130922687004365L','91130923732923871M','9113092510971914XX','91130927789824567A','911309313081379192','91130931329627183Q','91130931557675726N','91130931MA07MEM874','91130981596828756D','91130981763428435T','911309825809745213','911309826760246784','911309827401835863','91130983MA07N7T53G','91130984567358986H','911310001057748114','9113100060134890XT','911310007634343680','91131022336194910M','9113102258690708X9','91131025731429118G','9113102630827362XB','91131028MA07KQYW1M','91131081335912618X','9113108267322544XE','91131082755457551W','91131100109804512G','91131100700865494B','91131100746851872J','91131101236298229N','911311017913820594','91131101MA08EWC63Q','911311220826746736','91131125780842443M','91131127560486483D','911311816882161913','91131181700712973X','91131182093289869R','91131182109874836Y','91131182779189192D','91140000056278968H','91140000160963703Y','91140000330566883Q','911400005973987278','91140000701000732H','91140000715931861P','9114000074855218XX','91140100110047117B','91140100556560310M','91140100568462347Q','91140100578457859T','91140100586171535D','91140100713634804H','91140100713674988T','91140100715946502L','91140100754093899G','91140100792241864R','91140108731935643H','91140121757294792H','91140200770127753X','91140300694291892Q','91140300748578443L','91140311666645518N','91140322110721968E','911404001107700495','911405003257661198','91140500586185996N','91140522MA0JRG8Q99','911406006024604424','91140700065564755Y','91140700719819164X','91140800556559520F','91140800733994655W','911408007540500477','911408227011988570','9114082370110438XN','91140900798276152L','91140930794219089L','91140932729686916F','911500001141618816','91150000733284733B','91150100573268485R','911502047971536367','91150291594612345H','91150291701423911F','91150291787086089U','91150800701444800H','911525007116525588','91210000118887313L','91210000242666665H','91210000686609602P','91210000738792171J','912101000016232858','91210100117812926M','91210100243490227Y','912101006046149869','912101006625215774','91210100769563590L','91210100798474220Q','912101047845707057','91210105057192314D','91210105564689755B','91210105798464057N','912101063132548617','912101066671654449','912101066874643611','912101067386643481','91210106755504303X','91210106760090619H','91210106769599542Y','91210112675348347K','9121011271579529XH','91210112738671871J','91210112769598654A','91210112MA0P432U8R','91210112MA0P44NRXH','91210113088956102C','91210113578366586N','912101137555387734','91210113760060444N','91210114340680807E','91210181MA0XQF19XH','9121020011831278X6','91210200118561313C','91210200241297917U','912102006048648626','91210200677529168F','91210200716992578X','91210200723495318L','912102007409045158','91210200744362020N','91210200751579797A','91210200MA0TR2P80G','91210204MA0QDTY23G','91210211MA0Y19KN3J','91210212732749973K','91210213604838795D','912102137327794199','912102137920497177','91210231736407196M','91210231756073509F','91210242118382526E','91210242728848952B','91210244559824828B','91210283696011524C','91210300MA0TT2DH9R','91210381241525115T','91210381567557686J','91210381603655081B','9121060008113718XY','91210600120109772C','91210600242814525N','912106005909453539','91210682781643139U','912107000721599341','912107002420322837','91210700577233300Y','91210700736737822M','91210700749779175E','91210700768337030B','912108006768912029','912108006926672350','91210900584194995N','912109007016848390','91210921MA0XX7NM2T','9121100059093999XJ','91211000726845918Y','9121100474278967XK','91211021590945396H','91211103MA102UN249','91211200561382299U','912112006737775195','9121122106407122X3','91211300791572581J','9121140055815624XQ','91211400747119974B','91220000664275090B','9122000070222720XH','91220000786819498L','91220101050518975F','91220101081849654U','912201012449758167','91220101310012867G','91220101697761845P','91220101727117306C','91220101730777372U','912201017484274776','912201017561541220','912201017671658636','912201017671930129','912201017710567829','91220101794404583W','91220101794442483P','91220101MA14TY564H','91220104675648489W','912202011239483018','912202016051690282','91220201682611844F','91220201786812798U','91220300565092475E','91220422MA0Y3F777J','91220501126870028U','91220501244575134M','91220501244583871H','91220501723101462L','91220501791105350N','91220521MA173E261W','91220523660141001R','9122082155977797XR','912224037742347248','912301001275921118','91230100607168790X','91230108127420096N','91230108MA1B0JHY73','912301107631541551','912301993011658539','91230199301195470J','91230199301211856H','9123019958512805X3','91230199696825683P','91230199775036754Q','91230199MA18XA396K','91230300130721906W','91230600560617893B','91230600MA1BF4U29A','912306056926467095','912312815838229881','91233001569893325G','91310000051240362X','9131000005124956XX','9131000005304658XH','91310000057656705A','913100000625940784','91310000076492259A','91310000080013687R','913100001321244277','913100001321644452','913100001322131129','91310000132231361P','91310000132653687M','91310000132660318J','9131000013297865X2','9131000013413459XC','91310000300253536H','91310000301354857P','91310000312519282U','91310000324299264L','913100003244893596','91310000342056098N','913100003508461023','91310000351008055W','913100005515491712','91310000552962929G','91310000557430243L','913100005665114915','91310000568072146K','913100005708082124','91310000572698184Q','91310000582138631D','91310000582139781F','913100005867988561','91310000590384058P','91310000590397350D','913100006072612077','91310000607272280Q','91310000607286404W','913100006072944121','9131000060729499X9','91310000607311067X','91310000607339123C','913100006073622866','91310000607370331G','913100006073785958','91310000607403041J','91310000607404087G','91310000607422576R','913100006074261470','91310000607431720X','9131000060751688XT','913100006075916282','91310000607601064L','9131000063021103X7','91310000630453442X','91310000630483465G','91310000630948912G','91310000630965915K','91310000631137409B','91310000631191552K','913100006314149553','913100006314627462','91310000631521822M','91310000631534594F','913100006317557680','9131000066240918XU','91310000667780236Q','91310000669359189D','91310000669363292T','91310000669421384T','913100006711091037','91310000674575425N','913100006746031318','91310000680976508E','91310000680999558Q','91310000682254509X','91310000682263886E','913100006873885738','91310000690125272H','91310000692998798F','9131000069420172XB','9131000069578172XC','91310000695810746C','91310000697295223K','913100006988365624','913100007030116706','913100007030557379','913100007030973396','91310000703147746G','91310000703340159B','913100007294735903','91310000729493479N','91310000733344636F','91310000733365971U','91310000734057153P','91310000734081815D','91310000735408592G','913100007381411253','91310000738505304H','913100007385256042','913100007421053624','91310000747273971D','91310000748756174J','913100007487913409','91310000751468181F','91310000751863771N','913100007518999777','91310000756110429R','9131000075842961XY','9131000075855850XT','91310000759040681R','91310000761199691M','913100007653010244','91310000765583375Y','91310000765596096G','91310000768354199F','913100007694197083','91310000770201458T','913100007714584745','91310000772115131G','91310000772864810L','913100007728924912','91310000773282177G','913100007743059833','91310000774323671U','9131000077478390X5','91310000775216587B','91310000775238065L','913100007757838991','91310000778930516R','913100007824379352','91310000784298270U','91310000784783241W','91310000787230976G','91310000787878254Q','91310000792703993P','91310000792783700P','91310000797050338W','91310000MA1FL74J78','91310000MA1G8BHPXX','91310000MA1H38T58K','91310000MA1H3GDC5H','91310000MA1J37FN5Q','91310000MA1K2Q6J2X','91310000MA1K35P57Y','91310000MA7CJ9P40C','913101040693974723','91310104301579458U','91310104342172646H','913101043423482187','9131010455298989X1','913101045758452582','9131010458529260X8','91310104669392966T','91310104674626798A','913101046855187256','91310104692921256Y','91310104742657562G','91310104776270040D','91310104MA1FR0P33B','91310104MA1FR9PL54','91310105074824416U','91310105090037252C','91310105312284129D','91310105779753697E','91310106066020397Q','91310106550090004W','91310106630236093C','91310106MA1FY9LT3N','91310107051295590B','913101076076323035','91310107781531233F','91310109312143131N','91310110054590464F','91310110078155571L','913101100861724784','91310110342313605X','913101103507613521','91310110351027504X','91310110351154941K','91310110591673062R','91310110757926286X','91310110787862412B','9131011205506145X2','913101121326732580','91310112301708379M','91310112350881637E','91310112350889276J','91310112351114237B','91310112557480662J','913101125868251134','91310112607425988Y','91310112607671054B','91310112630792962D','91310112767225977D','913101127785041388','913101127851867808','91310112789576698P','91310112MA1GB5HL74','91310112MA1GB63D5E','91310112MA1GBCU74Q','91310112MA1GBEPY9P','91310112MA1GBWLUXN','91310112MA1GC28U0L','91310112MA1GC78A07','91310112MA1GCHQP57','91310113086201072B','913101131345344112','91310113342290888U','91310113550058717X','91310113631482720W','91310113754764752Y','913101140693041410','91310114074811922G','91310114320742767K','9131011434217342X2','9131011455159938XA','913101145529068046','913101145619308064','91310114570796872F','91310114577469866W','9131011458207544XY','91310114588740092M','91310114591692730A','91310114630211689Y','913101146305896733','913101146308058904','913101146315357223','913101146319344919','91310114695793034W','913101147030104249','91310114754758651R','913101147557198576','91310114760573215T','913101147728614257','91310114781898318F','91310114MA1GT4926T','91310114MA1GURM19N','91310114MA1GW61HX2','91310114MA1GWJL62M','91310115051251125K','913101150512565326','913101150608727672','913101150609007219','91310115080028627C','9131011508201988XX','91310115084100518T','913101150938266958','913101151321295193','913101151339870722','913101153124932461','9131011532075221XC','91310115324253960J','91310115324284513E','9131011533262045X2','91310115332642560W','913101153986795507','91310115555949711X','91310115568057640Y','91310115569630816D','91310115570750452T','91310115572703801L','91310115599770596C','913101156822157531','91310115690170444F','913101156972022424','91310115703497359F','91310115767236430H','913101157732980993','91310115779776581R','91310115792736664G','91310115795654795D','91310115MA1H70PK5R','91310115MA1H727E7E','91310115MA1H7RLE45','91310115MA1H7W8439','91310115MA1H7W8514','91310115MA1H9HD02E','91310115MA1H9K3FX7','91310115MA1HATB40R','91310115MA1K39C71R','91310115MA1K3B1R09','91310115MA1K3BQK2U','91310115MA1K3CM30B','91310115MA1K3F6C05','91310115MA1K3K2N9H','91310115MA1K3KJW0N','91310115MA1K3MP458','91310115MA1K41R2X3','91310115MA1K493TXQ','91310115MA1K4CLB55','91310115MA1K4MF39X','913101160637712405','913101161321521531','91310116555985835J','913101165601545691','91310116563135240C','91310116566563515F','913101166607195719','91310116662495241T','91310116671156516L','91310116676273009U','91310116687330646Y','91310116759882926H','913101175559503333','9131011756018678XG','91310117574182309H','91310117598194355D','913101176311798956','91310117632167028T','91310117662458598U','91310117662473499L','91310117662485385P','91310117669377619D','913200001347587142','91320000583783720B','913200006082630012','91320000710929340E','91320000743141824Y','91320000751254554N','91320100093975981A','91320100134974572K','91320100135847161T','913201025628951334','91320191134955910F','91320191726079387X','913201921349556628','913201922497944756','91320200135890776N','91320200135914870B','913204002508323014','91320500741304044W','91320509138285715E','91320509796141166A','91320582134789270G','913207007322513070','91320982571427139M','91321291703974741U','913300001429120051','9133000014293866XE','91330000142941287T','91330000710924531U','91330100253930310D','91330110MA2CGBC056','913302001440685655','91330200704800698F','91340000148941616G','91370112MABYCTU036']
# for xydm in list_xydm:
# for num_df in range(len(df_all)):
# social_code = str(df_all['social_code'][num_df])
# if social_code == xydm:
# com_name = str(df_all['name'][num_df])
# with cnx.cursor() as cursor:
# update = '''update sys_base_enterprise set name = %s where social_credit_code = %s'''
# cursor.execute(update, (com_name, xydm))
# cnx.commit()
# print(f'{xydm}===更新成功{com_name}')
# break
\ No newline at end of file
comData/important_meeting/zyqmshggldxzhy19.py
浏览文件 @
b52e4502
# 中央全面深化改革委员会会议
import
json
import
sys
import
time
import
redis
import
requests
from
bs4
import
BeautifulSoup
from
datetime
import
datetime
from
kafka
import
KafkaProducer
headers
=
{
sys
.
path
.
append
(
'D:
\\
kkwork
\\
zzsn_spider
\\
base'
)
import
BaseCore
baseCore
=
BaseCore
.
BaseCore
()
log
=
baseCore
.
getLogger
()
header
=
{
'Accept'
:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
,
'Accept-Encoding'
:
'gzip, deflate, br'
,
'Accept-Language'
:
'zh-CN,zh;q=0.9'
,
...
...
@@ -26,22 +32,50 @@ headers = {
'sec-ch-ua-mobile'
:
'?0'
,
'sec-ch-ua-platform'
:
'"Windows"'
}
headers
=
{
'Accept'
:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
,
'Accept-Encoding'
:
'gzip, deflate, br'
,
'Accept-Language'
:
'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6'
,
'Connection'
:
'keep-alive'
,
'Cookie'
:
'cna=HcAKHtgXUG4CAQHBO1G6ZJYK'
,
'Host'
:
'news.12371.cn'
,
'Sec-Fetch-Dest'
:
'document'
,
'Sec-Fetch-Mode'
:
'navigate'
,
'Sec-Fetch-Site'
:
'none'
,
'Sec-Fetch-User'
:
'?1'
,
'Upgrade-Insecure-Requests'
:
'1'
,
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0'
,
'sec-ch-ua'
:
'"Not A(Brand";v="99", "Microsoft Edge";v="121", "Chromium";v="121"'
,
'sec-ch-ua-mobile'
:
'?0'
,
'sec-ch-ua-platform'
:
'"Windows"'
}
if
__name__
==
"__main__"
:
# 中央全面深化改革委员会会议
r
=
redis
.
Redis
(
host
=
'114.115.236.206'
,
port
=
6379
,
password
=
'clbzzsn'
,
db
=
5
)
# 中央全面深化改革领导小组会议
# url_list = ['https://www.12371.cn/special/zyqmshggldxzhy19/', 'https://www.12371.cn/special/zyqmshggldxzhy19/']
url_list
=
[
'https://www.12371.cn/special/zyqmshggldxzhy19/'
]
for
url
in
url_list
:
request
=
requests
.
get
(
url
=
url
,
headers
=
headers
)
soup
=
BeautifulSoup
(
request
.
content
,
'html.parser'
)
request
.
encoding
=
request
.
apparent_encoding
# print(soup)
info_html
=
soup
.
find
(
'div'
,
id
=
'SUBD1663831285709121'
)
.
find
(
'ul'
,
class_
=
'ul_list'
)
ul_list
=
info_html
.
find_all
(
'li'
)
for
ul
in
ul_list
:
url
=
'https://www.12371.cn/special/zyqmshggldxzhy19/'
request
=
requests
.
get
(
url
=
url
,
headers
=
header
)
soup
=
BeautifulSoup
(
request
.
content
,
'html.parser'
)
# print(soup)
request
.
encoding
=
request
.
apparent_encoding
# print(soup)
# info_html = soup.find('div', id='SUBD1663831285709121').find('ul', class_='ul_list')
info_html_list
=
soup
.
find_all
(
'div'
,
class_
=
'dyw1023_right_list01 hyty'
)
flag
=
1
for
info_html
in
info_html_list
:
if
flag
==
1
:
info_code
=
'IN-20230816-0004'
sid
=
'1691633319715676162'
else
:
sid
=
'1691633869186277378'
info_code
=
'IN-20230816-0005'
ul_list
=
info_html
.
find
(
'ul'
,
class_
=
'ul_list'
)
.
find_all
(
'li'
)
for
ul
in
ul_list
[::
-
1
]:
publishDate_
=
str
(
ul
.
find
(
'span'
)
.
text
)
date_obj
=
datetime
.
strptime
(
publishDate_
,
"
%
Y年
%
m月
%
d日"
)
publishDate
=
date_obj
.
strftime
(
'
%
Y-
%
m-
%
d'
)
...
...
@@ -51,18 +85,27 @@ if __name__ == "__main__":
newsUrl
=
ul
.
find
(
'a'
)[
'href'
]
summary
=
ul
.
find
(
'a'
)
.
text
# todo: 链接判重
news_request
=
requests
.
get
(
url
=
newsUrl
,
headers
=
headers
)
try
:
flag
=
r
.
sismember
(
info_code
,
newsUrl
)
if
flag
:
log
.
info
(
'信息已采集入库过'
)
continue
except
Exception
as
e
:
continue
news_request
=
requests
.
get
(
url
=
newsUrl
,
headers
=
headers
,
allow_redirects
=
False
)
news_soup
=
BeautifulSoup
(
news_request
.
content
,
'html.parser'
)
print
(
news_soup
)
title
=
news_soup
.
find
(
'h1'
,
class_
=
'big_title'
)
.
text
source
=
news_soup
.
find
(
'div'
,
class_
=
'title_bottom'
)
.
find
(
'i'
)
.
text
contentwithTag
=
news_soup
.
find
(
'div'
,
class_
=
'word'
)
content
=
contentwithTag
.
text
if
url
==
'https://www.12371.cn/special/zyqmshggldxzhy19/'
:
sid
=
'1691633319715676162'
else
:
sid
=
'1691633869186277378'
# print(news_soup)
try
:
title
=
news_soup
.
find
(
'h1'
,
class_
=
'big_title'
)
.
text
source
=
news_soup
.
find
(
'div'
,
class_
=
'title_bottom'
)
.
find
(
'i'
)
.
text
contentwithTag
=
news_soup
.
find
(
'div'
,
class_
=
'word'
)
content
=
contentwithTag
.
text
except
Exception
as
e
:
log
.
error
(
f
'解析网页出错{newsUrl}'
)
continue
time_now
=
time
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
,
time
.
localtime
())
dic_info
=
{
'id'
:
'1681549361661489154'
+
str
(
int
(
time
.
time
()
*
1000
)),
'title'
:
title
,
...
...
@@ -79,6 +122,7 @@ if __name__ == "__main__":
'createDate'
:
time_now
,
}
r
.
sadd
(
info_code
,
newsUrl
)
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'114.115.159.144:9092'
])
try
:
kafka_result
=
producer
.
send
(
"research_center_fourth"
,
...
...
@@ -89,4 +133,5 @@ if __name__ == "__main__":
print
(
e
)
print
(
'发送kafka异常!'
)
finally
:
producer
.
close
()
\ No newline at end of file
producer
.
close
()
flag
+=
1
\ No newline at end of file
comData/weixin_solo/get_tokenCookies.py
浏览文件 @
b52e4502
...
...
@@ -56,7 +56,7 @@ if __name__=="__main__":
url
=
"https://mp.weixin.qq.com/"
browser
.
get
(
url
)
# 可改动
time
.
sleep
(
2
0
)
time
.
sleep
(
8
0
)
s
=
requests
.
session
()
#获取到token和cookies
...
...
test.py
浏览文件 @
b52e4502
...
...
@@ -170,5 +170,71 @@ for data in datas:
# f.write(dic_info_)
# break
# req = requests.post('http://192.168.1.236:5000/translate',data=dic_info_,headers=headers)
req
=
requests
.
post
(
'http://117.78.23.14:5001/translate'
,
data
=
dic_info_
,
headers
=
headers
)
log
.
info
(
req
.
text
)
\ No newline at end of file
req
=
requests
.
post
(
'http://117.78.23.14:5000/translate'
,
data
=
dic_info_
,
headers
=
headers
)
log
.
info
(
req
.
text
)
# import re, datetime
#
#
# def paserTime(publishtime):
# timeType = ['年前', '月前', '周前', '前天', '昨天', '天前', '今天', '小时前', '分钟前']
# current_datetime = datetime.datetime.now()
# publishtime = publishtime.strip()
# print(publishtime)
#
# try:
# if '年前' in publishtime:
# numbers = re.findall(r'\d+', publishtime)
# day = int(numbers[0])
# delta = datetime.timedelta(days=365 * day)
# publishtime = current_datetime - delta
# elif '月前' in publishtime:
# numbers = re.findall(r'\d+', publishtime)
# day = int(numbers[0])
# delta = datetime.timedelta(months=day)
# publishtime = current_datetime - delta
# elif '周前' in publishtime:
# numbers = re.findall(r'\d+', publishtime)
# day = int(numbers[0])
# delta = datetime.timedelta(weeks=day)
# publishtime = current_datetime - delta
# elif '天前' in publishtime:
# numbers = re.findall(r'\d+', publishtime)
# day = int(numbers[0])
# delta = datetime.timedelta(days=day)
# publishtime = current_datetime - delta
# elif '前天' in publishtime:
# delta = datetime.timedelta(days=2)
# publishtime = current_datetime - delta
# elif '昨天' in publishtime:
# current_datetime = datetime.datetime.now()
# delta = datetime.timedelta(days=1)
# publishtime = current_datetime - delta
# elif '今天' in publishtime or '小时前' in publishtime or '分钟前' in publishtime:
# if '小时' in publishtime:
# hour = publishtime.split("小时")[0]
# else:
# hour = 0
# if hour != 0:
# min = publishtime.split("小时")[1].split("分钟")[0]
# else:
# min = publishtime.split("分钟")[0]
#
# delta = datetime.timedelta(hours=int(hour), minutes=int(min))
# publishtime = current_datetime - delta
# elif '年' in publishtime and '月' in publishtime:
# time_format = '%Y年%m月%d日'
# publishtime = datetime.datetime.strptime(publishtime, time_format)
# elif '月' in publishtime and '日' in publishtime:
# current_year = current_datetime.year
# time_format = '%Y年%m月%d日'
# publishtime = str(current_year) + '年' + publishtime
# publishtime = datetime.datetime.strptime(publishtime, time_format)
# except Exception as e:
# print('时间解析异常!!')
# return publishtime
#
# if __name__ == "__main__":
# publishtime_ = '1小时17分钟前'
# publish_time = paserTime(publishtime_).strftime("%Y-%m-%d")
# print(publish_time)
\ No newline at end of file
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论