提交 b52e4502 作者: 薛凌堃

2/26

上级 ca40e9aa
from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.schedulers.blocking import BlockingScheduler
import pandas as pd
import redis
def putCom():
com_list = ['91210000558190456G', '914200001000115161', '911100007109310534', '9111000071093123XX',
'91110000100017643K', '91110000100018267J', '91110000MA01P657XY', '91230100127057741M',
'91440300190346175T', 'ZZSN22083000000003', '91110000400000720M', '911100001055722912',
'91110000100005220B', '911100001000094165', '91310000132200821H', '911100001000128855',
'91110000710924910P', '91110000710924929L', '911100007109225442', '9111000071092649XU',
'91310000MA1FL70B67', '911100007109311097', '912201011239989159', '911100007178306183',
'91310000MA7ALG04XG', '91110000100017707H', '91110000710929498G', '91110000100010249W',
'9151000062160427XG', '91310000MA1FL4B24G', '91110000400001889L', '9144030010001694XX',
'91110000100000825Q', '91110000100006194G', '91110000717828315T', '91110000100001043E',
'91110000MA005UCQ5P', '91110000710935732K', '91110000710930392Y', '91110000710930296M',
'911100007109303176', '91110000710925243K', '91110000100014071Q', '91110000100009563N',
'9111000071093107XN', '9111000010001002XD', '91110000100001852R', '91110000100001625L',
'911100001000080343', '91110000400008060U', '91110000101699383Q', '91110000100000489L',
'9111000071092868XL', '91110000100001035K', '911100004000011410', '91110000710933809D',
'91110000100010310K', '91133100MABRLCFR5Q', '91110000MA001HYK9X', '911100001000016682',
'911100007109279199', '12100000400010275N', '91110000710935636A', '91110000100024800K',
'9144000076384341X8', '91440000100005896P', '91110000MA01W8B394', '91110000717830650E',
'91110000100003057A', 'ZZSN22061600000001', '91310000MA1FL0LX06', '9111000010169286X1',
'91110000100010433L', '91110000100010660R', '91110000102016548J', '91110000100001676W',
'9111000071092200XY', '91133100MA0G9YKT8B', '9111000010000093XR', '91110000100006485K',
'91360702MA7FK4MR44', '91420100MA4L0GG411', '91110000101625149Q', '12100000400006022G',
'912302001285125661', '91110000100005888C', '911100007109250324', '91110000100024915R',
'9111000040000094XW', '91310000MA1FL1MMXL', '91110000100015058K', '91110000710929930X',
'91133100MA0GBL5F38', '9111000010000085X6', '91110000101100414N']
df = pd.read_excel('D:\\企业数据\\数据组提供\\国内企业.xlsx')
# 连接到Redis数据库
r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6)
for i in range(len(df)):
social_code = df['social_code'][i]
com_name = df['name'][i]
# print(social_code)
if social_code in com_list:
pass
else:
if 'ZZSN' in social_code or 'ZD' in social_code:
continue
else:
item = social_code + '|' + com_name
r.rpush('UpdateBasdeInfo:SocialCode_CompanyName', item)
def putCom_task():
# 实例化一个调度器
scheduler = BlockingScheduler()
# 每个月执行一次
scheduler.add_job(putCom, 'cron', day=1, hour=0, minute=0)
try:
# redisPushData # 定时开始前执行一次
# putCom()
scheduler.start()
except Exception as e:
print('定时采集异常', e)
pass
if __name__ == '__main__':
putCom_task()
\ No newline at end of file
import pandas as pd
# from pandas import DataFrame as df
import pymysql
import redis
# 连接到Redis
cnx = pymysql.connect(host='114.116.44.11', user='caiji', password='f7s0&7qqtK', db='clb_project', charset='utf8mb4')
r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6)
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
with cnx.cursor() as cursor:
select = """select relationName, relationId from klb_company"""
cursor.execute(select)
results = cursor.fetchall()
for result in results:
name = result[0]
xydm = result[1]
item = f'{name}|{xydm}'
r.rpush('SousuoBaidu:companyname', cell_value)
# 列表名称
list_name = 'BaseInfoEnterpriseMz:gnqy_socialCode'
# 获取列表中的所有元素
elements = r.lrange(list_name, 0, -1)
# 遍历列表中的元素
for element in elements:
# 获取元素在列表中的数量
count = r.lrem(list_name, 0, element)
# 如果数量大于1,说明有重复值,删除多余的重复值
if count > 1:
r.lrem(list_name, count - 1, element)
# 打印处理后的列表
print(r.lrange(list_name, 0, -1))
import pandas as pd
# from pandas import DataFrame as df
import pymysql
cnx = pymysql.connect(host='114.116.44.11', user='caiji', password='f7s0&7qqtK', db='clb_project', charset='utf8mb4')
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# df_all = pd.read_excel('D:\\企业数据\\数据组提供\\国内企业.xlsx', dtype=str)
with cnx.cursor() as cursor:
select = """select relationName, relationId from klb_company"""
cursor.execute(select)
results = cursor.fetchall()
# print(results)
for result in results:
name = result[0]
xydm = result[1]
with cnx.cursor() as cursor:
update = '''update sys_base_enterprise set name = %s where name is null and social_credit_code = %s'''
cursor.execute(update, (name, xydm))
cnx.commit()
print(f'{name}==={xydm}更新完成')
# list_xydm = ['91110000102017145R','911100001021096991','9111000010285973X7','91110000108283057Y','911100003180821571','91110000322283429E','91110000336431162N','911100005620857121','911100006000036940','911100006000107204','911100006000348885','911100006004827014','911100006337095702','91110000633713369X','911100006337942853','9111000063379674X4','9111000066990444XF','91110000672354637K','91110000700004889C','91110000700006921H','91110000700049024C','91110000700084217T','911100007001499141','9111000070038501XJ','91110000710923360K','91110000710924945A','911100007109255774','911100007177242684','91110000723951109B','91110000726360320G','91110000726362190T','911100007263731643','911100007423131451','91110000754166859U','91110000767525590U','91110000771589298U','911100007776681570','91110000783967006U','91110000802062000J','91110101101100895C','91110101335453570K','91110101355304193A','91110101579007657G','91110101783962889A','911101020592352188','911101021011011341','91110102634381829U','91110102674290067J','91110102685772854R','9111010278170742XX','91110102MA01FFJ36J','91110105051390889B','91110105101756720B','91110105306495333L','91110105306737662D','911101053179472352','91110105318247193G','91110105335500066Q','9111010535131161X2','91110105357967759L','91110105397625067T','91110105400614650L','91110105562128137P','91110105575219505U','911101055790551576','91110105585848161G','911101055938354164','911101055977289680','91110105600015572M','91110105625911031F','911101056336607540','91110105664618436J','91110105669928206J','91110105672840619D','91110105679620184F','911101056876404680','91110105690843864U','91110105726334827M','91110105756025873C','91110105756700197H','9111010576143898XE','91110105764202737L','91110105777670900X','91110105783991313X','911101057877635020','91110105790696320H','91110105801719541B','9111010580171955X3','9111010580177089XM','91110105802095822J','91110105MA002Q6M79','91110105MA003RD50R','91110105MA004C0H06','91110105MA00AGXN3L','91110105MA00FJHN72','91110105MA01AEWR5C','91110105MA01L9PH51','911101060695678147','911101060741434189','91110106101133080K','91110106306572212M','91110106351301243L','91110106567475437Y','911101065768942978','91110106585840012D','91110106587714554K','91110106593832696G','91110106633760720H','91110106633764772R','91110106663111019U','9111010667059416X2','91110106675098771D','911101066932508023','91110106749395454K','911101067533312850','9111010676504112XW','911101067855339571','911101068022066683','91110106MA0056B19T','91110106MA005DBW1G','91110106MA01P1RE3Y','91110107102288949G','91110107587683145R','91110107MA009GQ72T','911101080513793057','91110108061322142F','911101080627636876','91110108062782191G','911101080628016980','911101080649193741','91110108067265302X','91110108074122078Y','911101080765656577','91110108078505359A','91110108078545633M','911101080785732550','91110108089647010H','911101080918560737','91110108093369842B','91110108096441731D','911101080984827059','91110108099067984A','91110108099442801R','9111010810110401X3','911101081011420915','91110108101609659C','91110108101880422A','911101081020223907','9111010810202736X2','91110108102094378J','911101083065093288','91110108306623614J','91110108318056936P','91110108318058456U','9111010831813798XE','91110108327142377N','911101083271749266','911101083272391527','91110108335481926M','91110108335562435H','911101083363962058','911101083398292057','91110108344290793F','91110108344314759F','911101083443180558','911101083443783306','91110108344403743F','91110108355313321X','91110108400001643B','91110108551427625G','91110108554837179A','91110108554890762H','91110108560358422K','91110108560385447N','91110108560432856H','91110108562135265P','91110108563622495U','91110108565780884D','9111010856749593XU','911101085694855326','911101085694925139','91110108569524423F','911101085712035817','9111010857128414X8','911101085712845102','91110108576914390R','91110108576914817K','911101085790313156','911101085844819439','91110108585861972A','9111010858587583XQ','91110108587665983J','911101085890746187','91110108590662476F','911101085923662400','9111010859963405XW','91110108599644434U','91110108599663854W','911101086000694820','911101086003726929','91110108600404359L','91110108633708906M','91110108660513776K','91110108661550528Q','911101086615579497','91110108662151975E','91110108662170324C','911101086621777295','911101086631036849','911101086631154075','91110108663124944W','91110108663136638D','91110108664619674E','911101086656289355','91110108666258040N','911101086684483666','91110108671727577D','91110108672826657J','911101086738170589','9111010867662354XX','911101086766404898','91110108679604408D','91110108679611421U','911101086796241695','911101086804563776','91110108682894987G','911101086835621402','91110108686919328W','911101086883662373','91110108690011590J','91110108693213091F','911101086950387332','9111010869504894XN','91110108696323261L','91110108699627252X','91110108700235062K','91110108718777804Y','91110108718785556J','91110108722617934K','9111010872357215XK','91110108723952478G','911101087263410239','91110108733464566A','911101087355893625','9111010873559070X0','91110108735591489G','91110108737656338N','911101087376669155','91110108740421820F','91110108746113570P','91110108746729965F','91110108752161931Y','91110108753327825C','9111010875333972X7','9111010875467591XH','91110108754681201W','91110108758242935T','911101087582455976','911101087601419302','9111010876142254XU','91110108762181186P','911101087629781362','911101087635308194','91110108766287121Q','911101087684682847','91110108768471723F','91110108769354705D','91110108769356188B','91110108769900489W','911101087704233332','91110108770425654N','91110108771981556U','91110108771986242H','91110108773361465H','91110108773369432Y','911101087740615606','91110108774714285P','91110108775491714G','91110108776352708P','91110108777650264L','911101087776681301','911101087795289672','9111010878020592XF','91110108780217285R','91110108780238166U','91110108780955384Y','91110108780964686N','91110108781703664R','91110108782543551R','91110108783218849X','911101087839528242','911101087861701904','91110108790650445R','911101087921006070','911101087934019542','91110108794082078E','911101087951375794','91110108797552733T','91110108798525948B','911101087990254941','911101088011707638','91110108802021110U','911101088020333577','91110108802041787A','91110108802045657E','91110108802068007C','911101088020726207','91110108802109673L','91110108MA001N718J','91110108MA0021P69M','91110108MA002XL790','91110108MA003LNY5D','91110108MA003TAB64','91110108MA003YWP4D','91110108MA0043KP9E','91110108MA004F704R','91110108MA004LW69T','91110108MA004RAE05','91110108MA0068GY1F','91110108MA006K8Y3P','91110108MA0071CR55','91110108MA007H3P5K','91110108MA0086HR6G','91110108MA008DA429','91110108MA008HB66A','91110108MA008P9657','91110108MA008PK575','91110108MA0092QT4X','91110108MA00AGM13W','91110108MA00AU927M','91110108MA00DCJ01Y','91110108MA00DE1B2B','91110108MA00FA7E5C','91110108MA00GUD41A','91110108MA0188DW84','91110108MA018J4L08','91110108MA018MCC6M','91110108MA01BBB16K','91110108MA01BP1P7B','91110108MA01C8JR79','91110108MA01DMU77F','91110108MA01DNC75B','91110108MA01EGPQX2','91110108MA01RCWH0M','91110108MA01RWUG4Y','91110108MA01WQE10K','91110109330285061E','91110109567452606A','91110109590674493W','9111011159606037XJ','911101116812383633','91110111700001063P','91110111MA003JG31Y','91110111MA01L2H65N','91110112551358631R','91110112700216160K','911101127177330338','911101127400501696','911101127415832828','9111011276218407XN','91110112766758720D','911101127889851669','9111011279904576XL','91110113080516727E','91110113306541555R','91110113576855941L','911101136812208172','91110113696302276M','91110113741581703F','91110113752642938G','91110113756000350K','911101137577358263','91110113762992739Q','91110113MA001GWR0M','9111011408549335X2','9111011455135477XA','91110114582515556F','91110114589114325P','91110114590663348R','91110114600067778R','911101146796092682','911101146804798353','91110114682851688K','91110114685107782U','911101146900106275','911101147226688971','911101147426127944','91110114744716255J','9111011475010452XE','91110114750144214X','911101147667528632','911101147770556682','91110114MA001D4X3K','91110114MA01AEDF61','911101151016193470','911101151029162045','91110115576904205N','91110115733451490U','91110115736468984G','91110115746112690C','91110115MA0048EL1E','91110115MA017K5L4X','91110116064905925Y','911101165996396434','91110116767502874D','91110116MA005B3L58','91110116MA01C0AY5K','91110117330386452K','911102283272479535','911102283512805187','91110228582505681F','9111022867876096X3','911102287177842959','91110228754175237Y','91110228MA006GMF6R','9111030205136463XD','91110302053604529E','91110302057391444C','9111030210221806X9','911103021022784175','91110302306784047Q','911103025604366893','91110302565797010A','91110302565820110R','911103026003405002','91110302677444199R','911103026787533566','911103026857985287','91110302735090430Y','911103027493533932','911103027493534308','9111030276350109XG','9111030278250283XW','91110302801786752A','91110302MA0048YP1U','91110302MA005FFW29','91110302MA0066E64R','91110302MA008RUM5Y','91110302MA00AR3F76','91110302MA00B9G54G','91110302MA00B9MQ4G','91110302MA00BJ6B78','91110302MA00G8EH41','91110302MA00GQGB73','91110302MA00GRMLX4','91110302MA01AAXW1T','91110302MA01HEH15A','91110400MA029M4P80','91120000058736889L','91120000103069967Y','91120000103870914U','91120000741366579H','91120000761253280R','911200007676306733','91120000MA06F32U06','911201046630720486','91120104789385824Y','911201048034181441','91120110083028075A','91120110300659413H','91120111103789059M','911201116847488286','91120111697419046H','91120111722991870E','91120111741361313C','911201117925370324','91120112064042488E','91120112093771153W','911201127803488406','91120112MA05WM7M02','91120113079635948K','91120113660321205C','911201137303863474','91120113783335092P','911201160587336021','911201160612051730','91120116086586515N','91120116103481433E','91120116239661863L','91120116239663439U','91120116300452033U','911201163409833307','9112011655651308XJ','91120116562678278A','91120116586419887T','91120116592916759Q','91120116600910892X','911201166630834172','91120116671457175N','91120116675967105W','91120116697408240K','91120116712934952M','91120116718278597H','911201167244641345','91120116730357968N','911201167328190464','91120116735474530F','91120116746652267N','911201167491124502','91120116758137027D','91120116764348197P','911201167803339648','911201167833047124','91120116794980409G','911201167972829995','91120116MA05PQB5XT','91120116MA069EXE4T','91120116MA06DRM4XY','91120116MA0705BL96','911201186670532667','91120118735488182M','91120118762158867F','91120118MA05JQUK0G','91120118MA05QFTE3C','91120118MA05T81X8A','91120118MA0697LP9T','91120118MA06T62187','911202216877459052','91120222566105610W','91120222575108434H','911202227706300842','91120222MA05KHKY2P','91120222MA05UAG55H','91120223600894351U','91120223761280668D','91120224300621490X','911202245661215811','91120224681877747F','91120224700557176T','91120224MA07871882','9113000023565800XC','91130000752446136W','911301001044060055','91130100107744755W','91130100601090291K','91130100689298985P','91130100732910720N','91130100732914772Y','91130100745411306F','911301007468556979','91130100754027891A','91130100776179546U','91130100787019708G','91130101678512755X','91130101789833818T','91130104784084838J','911301257898318475','91130132MA0A7AYE2H','91130181791385313K','91130181791386236G','91130182685711699G','91130182791357005D','91130183575506723L','911301836870224839','911301837713256634','911301847216647980','911301856652827511','91130185669060689W','911301857233544863','9113018576519998X9','91130193074894510E','911301931078905417','911302005661986189','911302006799397935','9113020068276818X4','91130204347873513P','91130224666556267M','91130225MA07U3734B','91130229721600380L','91130281MA07KE3A17','91130282554499915A','91130282750290545E','91130282MA07P2E981','91130283052683448M','91130283601019508G','91130293096112137N','911302936746855014','91130293774420041F','91130293796568127H','91130294308381129A','91130300601108025E','91130301329656355R','91130301601147147J','91130301678536714D','9113030168136727XL','91130301MA08XEAB6Q','911303035673840924','91130303673240113T','91130392601151496U','91130400730275049G','91130400MA08CCBX29','91130405748493781M','91130407MA0CUE7R5R','91130408MA08XFQJ61','911304246843413669','91130424757510432X','91130426699207653P','91130429679913817F','91130434MA07R66J0A','91130435564863776A','911304816690569897','91130481721643479D','91130500769806003D','91130501693478268W','91130525721609633G','911305287343786273','91130528737368715C','91130528743430458K','91130532723397101T','91130582095633598M','911306007006711044','911306055728149239','91130605596834603J','91130605601201668M','911306056746516436','91130606550419199D','91130637752422695J','9113070556195375XQ','91130705769821035L','911307317870236272','91130800757548430L','91130802728832010D','91130803MA0CMRHN8L','91130824771327626D','9113090010971869X1','91130900670338967F','91130900700660368J','91130900765171063F','911309007681306540','91130900779198582P','91130900791398851A','911309033200553935','91130922687004365L','91130923732923871M','9113092510971914XX','91130927789824567A','911309313081379192','91130931329627183Q','91130931557675726N','91130931MA07MEM874','91130981596828756D','91130981763428435T','911309825809745213','911309826760246784','911309827401835863','91130983MA07N7T53G','91130984567358986H','911310001057748114','9113100060134890XT','911310007634343680','91131022336194910M','9113102258690708X9','91131025731429118G','9113102630827362XB','91131028MA07KQYW1M','91131081335912618X','9113108267322544XE','91131082755457551W','91131100109804512G','91131100700865494B','91131100746851872J','91131101236298229N','911311017913820594','91131101MA08EWC63Q','911311220826746736','91131125780842443M','91131127560486483D','911311816882161913','91131181700712973X','91131182093289869R','91131182109874836Y','91131182779189192D','91140000056278968H','91140000160963703Y','91140000330566883Q','911400005973987278','91140000701000732H','91140000715931861P','9114000074855218XX','91140100110047117B','91140100556560310M','91140100568462347Q','91140100578457859T','91140100586171535D','91140100713634804H','91140100713674988T','91140100715946502L','91140100754093899G','91140100792241864R','91140108731935643H','91140121757294792H','91140200770127753X','91140300694291892Q','91140300748578443L','91140311666645518N','91140322110721968E','911404001107700495','911405003257661198','91140500586185996N','91140522MA0JRG8Q99','911406006024604424','91140700065564755Y','91140700719819164X','91140800556559520F','91140800733994655W','911408007540500477','911408227011988570','9114082370110438XN','91140900798276152L','91140930794219089L','91140932729686916F','911500001141618816','91150000733284733B','91150100573268485R','911502047971536367','91150291594612345H','91150291701423911F','91150291787086089U','91150800701444800H','911525007116525588','91210000118887313L','91210000242666665H','91210000686609602P','91210000738792171J','912101000016232858','91210100117812926M','91210100243490227Y','912101006046149869','912101006625215774','91210100769563590L','91210100798474220Q','912101047845707057','91210105057192314D','91210105564689755B','91210105798464057N','912101063132548617','912101066671654449','912101066874643611','912101067386643481','91210106755504303X','91210106760090619H','91210106769599542Y','91210112675348347K','9121011271579529XH','91210112738671871J','91210112769598654A','91210112MA0P432U8R','91210112MA0P44NRXH','91210113088956102C','91210113578366586N','912101137555387734','91210113760060444N','91210114340680807E','91210181MA0XQF19XH','9121020011831278X6','91210200118561313C','91210200241297917U','912102006048648626','91210200677529168F','91210200716992578X','91210200723495318L','912102007409045158','91210200744362020N','91210200751579797A','91210200MA0TR2P80G','91210204MA0QDTY23G','91210211MA0Y19KN3J','91210212732749973K','91210213604838795D','912102137327794199','912102137920497177','91210231736407196M','91210231756073509F','91210242118382526E','91210242728848952B','91210244559824828B','91210283696011524C','91210300MA0TT2DH9R','91210381241525115T','91210381567557686J','91210381603655081B','9121060008113718XY','91210600120109772C','91210600242814525N','912106005909453539','91210682781643139U','912107000721599341','912107002420322837','91210700577233300Y','91210700736737822M','91210700749779175E','91210700768337030B','912108006768912029','912108006926672350','91210900584194995N','912109007016848390','91210921MA0XX7NM2T','9121100059093999XJ','91211000726845918Y','9121100474278967XK','91211021590945396H','91211103MA102UN249','91211200561382299U','912112006737775195','9121122106407122X3','91211300791572581J','9121140055815624XQ','91211400747119974B','91220000664275090B','9122000070222720XH','91220000786819498L','91220101050518975F','91220101081849654U','912201012449758167','91220101310012867G','91220101697761845P','91220101727117306C','91220101730777372U','912201017484274776','912201017561541220','912201017671658636','912201017671930129','912201017710567829','91220101794404583W','91220101794442483P','91220101MA14TY564H','91220104675648489W','912202011239483018','912202016051690282','91220201682611844F','91220201786812798U','91220300565092475E','91220422MA0Y3F777J','91220501126870028U','91220501244575134M','91220501244583871H','91220501723101462L','91220501791105350N','91220521MA173E261W','91220523660141001R','9122082155977797XR','912224037742347248','912301001275921118','91230100607168790X','91230108127420096N','91230108MA1B0JHY73','912301107631541551','912301993011658539','91230199301195470J','91230199301211856H','9123019958512805X3','91230199696825683P','91230199775036754Q','91230199MA18XA396K','91230300130721906W','91230600560617893B','91230600MA1BF4U29A','912306056926467095','912312815838229881','91233001569893325G','91310000051240362X','9131000005124956XX','9131000005304658XH','91310000057656705A','913100000625940784','91310000076492259A','91310000080013687R','913100001321244277','913100001321644452','913100001322131129','91310000132231361P','91310000132653687M','91310000132660318J','9131000013297865X2','9131000013413459XC','91310000300253536H','91310000301354857P','91310000312519282U','91310000324299264L','913100003244893596','91310000342056098N','913100003508461023','91310000351008055W','913100005515491712','91310000552962929G','91310000557430243L','913100005665114915','91310000568072146K','913100005708082124','91310000572698184Q','91310000582138631D','91310000582139781F','913100005867988561','91310000590384058P','91310000590397350D','913100006072612077','91310000607272280Q','91310000607286404W','913100006072944121','9131000060729499X9','91310000607311067X','91310000607339123C','913100006073622866','91310000607370331G','913100006073785958','91310000607403041J','91310000607404087G','91310000607422576R','913100006074261470','91310000607431720X','9131000060751688XT','913100006075916282','91310000607601064L','9131000063021103X7','91310000630453442X','91310000630483465G','91310000630948912G','91310000630965915K','91310000631137409B','91310000631191552K','913100006314149553','913100006314627462','91310000631521822M','91310000631534594F','913100006317557680','9131000066240918XU','91310000667780236Q','91310000669359189D','91310000669363292T','91310000669421384T','913100006711091037','91310000674575425N','913100006746031318','91310000680976508E','91310000680999558Q','91310000682254509X','91310000682263886E','913100006873885738','91310000690125272H','91310000692998798F','9131000069420172XB','9131000069578172XC','91310000695810746C','91310000697295223K','913100006988365624','913100007030116706','913100007030557379','913100007030973396','91310000703147746G','91310000703340159B','913100007294735903','91310000729493479N','91310000733344636F','91310000733365971U','91310000734057153P','91310000734081815D','91310000735408592G','913100007381411253','91310000738505304H','913100007385256042','913100007421053624','91310000747273971D','91310000748756174J','913100007487913409','91310000751468181F','91310000751863771N','913100007518999777','91310000756110429R','9131000075842961XY','9131000075855850XT','91310000759040681R','91310000761199691M','913100007653010244','91310000765583375Y','91310000765596096G','91310000768354199F','913100007694197083','91310000770201458T','913100007714584745','91310000772115131G','91310000772864810L','913100007728924912','91310000773282177G','913100007743059833','91310000774323671U','9131000077478390X5','91310000775216587B','91310000775238065L','913100007757838991','91310000778930516R','913100007824379352','91310000784298270U','91310000784783241W','91310000787230976G','91310000787878254Q','91310000792703993P','91310000792783700P','91310000797050338W','91310000MA1FL74J78','91310000MA1G8BHPXX','91310000MA1H38T58K','91310000MA1H3GDC5H','91310000MA1J37FN5Q','91310000MA1K2Q6J2X','91310000MA1K35P57Y','91310000MA7CJ9P40C','913101040693974723','91310104301579458U','91310104342172646H','913101043423482187','9131010455298989X1','913101045758452582','9131010458529260X8','91310104669392966T','91310104674626798A','913101046855187256','91310104692921256Y','91310104742657562G','91310104776270040D','91310104MA1FR0P33B','91310104MA1FR9PL54','91310105074824416U','91310105090037252C','91310105312284129D','91310105779753697E','91310106066020397Q','91310106550090004W','91310106630236093C','91310106MA1FY9LT3N','91310107051295590B','913101076076323035','91310107781531233F','91310109312143131N','91310110054590464F','91310110078155571L','913101100861724784','91310110342313605X','913101103507613521','91310110351027504X','91310110351154941K','91310110591673062R','91310110757926286X','91310110787862412B','9131011205506145X2','913101121326732580','91310112301708379M','91310112350881637E','91310112350889276J','91310112351114237B','91310112557480662J','913101125868251134','91310112607425988Y','91310112607671054B','91310112630792962D','91310112767225977D','913101127785041388','913101127851867808','91310112789576698P','91310112MA1GB5HL74','91310112MA1GB63D5E','91310112MA1GBCU74Q','91310112MA1GBEPY9P','91310112MA1GBWLUXN','91310112MA1GC28U0L','91310112MA1GC78A07','91310112MA1GCHQP57','91310113086201072B','913101131345344112','91310113342290888U','91310113550058717X','91310113631482720W','91310113754764752Y','913101140693041410','91310114074811922G','91310114320742767K','9131011434217342X2','9131011455159938XA','913101145529068046','913101145619308064','91310114570796872F','91310114577469866W','9131011458207544XY','91310114588740092M','91310114591692730A','91310114630211689Y','913101146305896733','913101146308058904','913101146315357223','913101146319344919','91310114695793034W','913101147030104249','91310114754758651R','913101147557198576','91310114760573215T','913101147728614257','91310114781898318F','91310114MA1GT4926T','91310114MA1GURM19N','91310114MA1GW61HX2','91310114MA1GWJL62M','91310115051251125K','913101150512565326','913101150608727672','913101150609007219','91310115080028627C','9131011508201988XX','91310115084100518T','913101150938266958','913101151321295193','913101151339870722','913101153124932461','9131011532075221XC','91310115324253960J','91310115324284513E','9131011533262045X2','91310115332642560W','913101153986795507','91310115555949711X','91310115568057640Y','91310115569630816D','91310115570750452T','91310115572703801L','91310115599770596C','913101156822157531','91310115690170444F','913101156972022424','91310115703497359F','91310115767236430H','913101157732980993','91310115779776581R','91310115792736664G','91310115795654795D','91310115MA1H70PK5R','91310115MA1H727E7E','91310115MA1H7RLE45','91310115MA1H7W8439','91310115MA1H7W8514','91310115MA1H9HD02E','91310115MA1H9K3FX7','91310115MA1HATB40R','91310115MA1K39C71R','91310115MA1K3B1R09','91310115MA1K3BQK2U','91310115MA1K3CM30B','91310115MA1K3F6C05','91310115MA1K3K2N9H','91310115MA1K3KJW0N','91310115MA1K3MP458','91310115MA1K41R2X3','91310115MA1K493TXQ','91310115MA1K4CLB55','91310115MA1K4MF39X','913101160637712405','913101161321521531','91310116555985835J','913101165601545691','91310116563135240C','91310116566563515F','913101166607195719','91310116662495241T','91310116671156516L','91310116676273009U','91310116687330646Y','91310116759882926H','913101175559503333','9131011756018678XG','91310117574182309H','91310117598194355D','913101176311798956','91310117632167028T','91310117662458598U','91310117662473499L','91310117662485385P','91310117669377619D','913200001347587142','91320000583783720B','913200006082630012','91320000710929340E','91320000743141824Y','91320000751254554N','91320100093975981A','91320100134974572K','91320100135847161T','913201025628951334','91320191134955910F','91320191726079387X','913201921349556628','913201922497944756','91320200135890776N','91320200135914870B','913204002508323014','91320500741304044W','91320509138285715E','91320509796141166A','91320582134789270G','913207007322513070','91320982571427139M','91321291703974741U','913300001429120051','9133000014293866XE','91330000142941287T','91330000710924531U','91330100253930310D','91330110MA2CGBC056','913302001440685655','91330200704800698F','91340000148941616G','91370112MABYCTU036']
# for xydm in list_xydm:
# for num_df in range(len(df_all)):
# social_code = str(df_all['social_code'][num_df])
# if social_code == xydm:
# com_name = str(df_all['name'][num_df])
# with cnx.cursor() as cursor:
# update = '''update sys_base_enterprise set name = %s where social_credit_code = %s'''
# cursor.execute(update, (com_name, xydm))
# cnx.commit()
# print(f'{xydm}===更新成功{com_name}')
# break
\ No newline at end of file
# 中央全面深化改革委员会会议
import json
import sys
import time
import redis
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from kafka import KafkaProducer
headers = {
sys.path.append('D:\\kkwork\\zzsn_spider\\base')
import BaseCore
baseCore = BaseCore.BaseCore()
log = baseCore.getLogger()
header = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
......@@ -26,22 +32,50 @@ headers = {
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"'
}
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'Connection': 'keep-alive',
'Cookie': 'cna=HcAKHtgXUG4CAQHBO1G6ZJYK',
'Host': 'news.12371.cn',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0',
'sec-ch-ua': '"Not A(Brand";v="99", "Microsoft Edge";v="121", "Chromium";v="121"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"'
}
if __name__ == "__main__":
# 中央全面深化改革委员会会议
r = redis.Redis(host='114.115.236.206', port=6379, password='clbzzsn', db=5)
# 中央全面深化改革领导小组会议
# url_list = ['https://www.12371.cn/special/zyqmshggldxzhy19/', 'https://www.12371.cn/special/zyqmshggldxzhy19/']
url_list = ['https://www.12371.cn/special/zyqmshggldxzhy19/']
for url in url_list:
request = requests.get(url=url, headers=headers)
soup = BeautifulSoup(request.content, 'html.parser')
request.encoding = request.apparent_encoding
# print(soup)
info_html = soup.find('div', id='SUBD1663831285709121').find('ul', class_='ul_list')
ul_list = info_html.find_all('li')
for ul in ul_list:
url = 'https://www.12371.cn/special/zyqmshggldxzhy19/'
request = requests.get(url=url, headers=header)
soup = BeautifulSoup(request.content, 'html.parser')
# print(soup)
request.encoding = request.apparent_encoding
# print(soup)
# info_html = soup.find('div', id='SUBD1663831285709121').find('ul', class_='ul_list')
info_html_list = soup.find_all('div', class_='dyw1023_right_list01 hyty')
flag = 1
for info_html in info_html_list:
if flag == 1:
info_code = 'IN-20230816-0004'
sid = '1691633319715676162'
else:
sid = '1691633869186277378'
info_code = 'IN-20230816-0005'
ul_list = info_html.find('ul', class_='ul_list').find_all('li')
for ul in ul_list[::-1]:
publishDate_ = str(ul.find('span').text)
date_obj= datetime.strptime(publishDate_, "%Y年%m月%d日")
publishDate = date_obj.strftime('%Y-%m-%d')
......@@ -51,18 +85,27 @@ if __name__ == "__main__":
newsUrl = ul.find('a')['href']
summary = ul.find('a').text
# todo: 链接判重
news_request = requests.get(url=newsUrl, headers=headers)
try:
flag = r.sismember(info_code, newsUrl)
if flag:
log.info('信息已采集入库过')
continue
except Exception as e:
continue
news_request = requests.get(url=newsUrl, headers=headers, allow_redirects=False)
news_soup = BeautifulSoup(news_request.content, 'html.parser')
print(news_soup)
title = news_soup.find('h1', class_='big_title').text
source = news_soup.find('div', class_='title_bottom').find('i').text
contentwithTag = news_soup.find('div', class_='word')
content = contentwithTag.text
if url == 'https://www.12371.cn/special/zyqmshggldxzhy19/':
sid = '1691633319715676162'
else:
sid = '1691633869186277378'
# print(news_soup)
try:
title = news_soup.find('h1', class_='big_title').text
source = news_soup.find('div', class_='title_bottom').find('i').text
contentwithTag = news_soup.find('div', class_='word')
content = contentwithTag.text
except Exception as e:
log.error(f'解析网页出错{newsUrl}')
continue
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
dic_info ={
'id': '1681549361661489154' + str(int(time.time()*1000)),
'title': title,
......@@ -79,6 +122,7 @@ if __name__ == "__main__":
'createDate': time_now,
}
r.sadd(info_code, newsUrl)
producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'])
try:
kafka_result = producer.send("research_center_fourth",
......@@ -89,4 +133,5 @@ if __name__ == "__main__":
print(e)
print('发送kafka异常!')
finally:
producer.close()
\ No newline at end of file
producer.close()
flag += 1
\ No newline at end of file
......@@ -56,7 +56,7 @@ if __name__=="__main__":
url = "https://mp.weixin.qq.com/"
browser.get(url)
# 可改动
time.sleep(20)
time.sleep(80)
s = requests.session()
#获取到token和cookies
......
......@@ -170,5 +170,71 @@ for data in datas:
# f.write(dic_info_)
# break
# req = requests.post('http://192.168.1.236:5000/translate',data=dic_info_,headers=headers)
req = requests.post('http://117.78.23.14:5001/translate',data=dic_info_,headers=headers)
log.info(req.text)
\ No newline at end of file
req = requests.post('http://117.78.23.14:5000/translate',data=dic_info_,headers=headers)
log.info(req.text)
# import re, datetime
#
#
# def paserTime(publishtime):
# timeType = ['年前', '月前', '周前', '前天', '昨天', '天前', '今天', '小时前', '分钟前']
# current_datetime = datetime.datetime.now()
# publishtime = publishtime.strip()
# print(publishtime)
#
# try:
# if '年前' in publishtime:
# numbers = re.findall(r'\d+', publishtime)
# day = int(numbers[0])
# delta = datetime.timedelta(days=365 * day)
# publishtime = current_datetime - delta
# elif '月前' in publishtime:
# numbers = re.findall(r'\d+', publishtime)
# day = int(numbers[0])
# delta = datetime.timedelta(months=day)
# publishtime = current_datetime - delta
# elif '周前' in publishtime:
# numbers = re.findall(r'\d+', publishtime)
# day = int(numbers[0])
# delta = datetime.timedelta(weeks=day)
# publishtime = current_datetime - delta
# elif '天前' in publishtime:
# numbers = re.findall(r'\d+', publishtime)
# day = int(numbers[0])
# delta = datetime.timedelta(days=day)
# publishtime = current_datetime - delta
# elif '前天' in publishtime:
# delta = datetime.timedelta(days=2)
# publishtime = current_datetime - delta
# elif '昨天' in publishtime:
# current_datetime = datetime.datetime.now()
# delta = datetime.timedelta(days=1)
# publishtime = current_datetime - delta
# elif '今天' in publishtime or '小时前' in publishtime or '分钟前' in publishtime:
# if '小时' in publishtime:
# hour = publishtime.split("小时")[0]
# else:
# hour = 0
# if hour != 0:
# min = publishtime.split("小时")[1].split("分钟")[0]
# else:
# min = publishtime.split("分钟")[0]
#
# delta = datetime.timedelta(hours=int(hour), minutes=int(min))
# publishtime = current_datetime - delta
# elif '年' in publishtime and '月' in publishtime:
# time_format = '%Y年%m月%d日'
# publishtime = datetime.datetime.strptime(publishtime, time_format)
# elif '月' in publishtime and '日' in publishtime:
# current_year = current_datetime.year
# time_format = '%Y年%m月%d日'
# publishtime = str(current_year) + '年' + publishtime
# publishtime = datetime.datetime.strptime(publishtime, time_format)
# except Exception as e:
# print('时间解析异常!!')
# return publishtime
#
# if __name__ == "__main__":
# publishtime_ = '1小时17分钟前'
# publish_time = paserTime(publishtime_).strftime("%Y-%m-%d")
# print(publish_time)
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论