提交 caa8a15c 作者: 薛凌堃

更新企业基本信息采集

上级 6500f5f7
......@@ -559,6 +559,12 @@ class BaseCore:
token = token_list[random.randint(0, len(token_list) - 1)][0]
return token
# 删除失效的token
def delete_token(self, token):
deletesql = f"delete from QCC_token where token='{token}' "
self.cursor.execute(deletesql)
self.cnx.commit()
# 获取天眼查token
def GetTYCToken(self):
query = 'select token from TYC_token'
......
......@@ -323,20 +323,28 @@ if __name__ == '__main__':
'Accept-Encoding': 'gzip, deflate, br,'
}
name_list = []
#从redis里拿数据
while True:
# TODO:需要隔两个小时左右抓包修改,token从数据库中获得
token = baseCore.GetToken()
list_weicha = []
list_all_info = []
name_list = []
if token:
pass
else:
log.info('==========已无token==========')
time.sleep(30)
continue
start_time = time.time()
# 获取企业信息
# social_code = baseCore.redicPullData('BaseInfoEnterprise:gnqy_socialCode')
social_code = '91330000734530895W'
if social_code == '':
social_code = baseCore.redicPullData('BaseInfoEnterprise:gnqy_socialCode')
# social_code = '91330000734530895W'
if social_code == '' or social_code is None:
time.sleep(20)
continue
if '搜索不到' in social_code:
continue
else:
pass
dic_info = baseCore.getInfomation(social_code)
log.info(f'----当前企业{social_code}--开始处理---')
count = dic_info[14]
......@@ -350,10 +358,16 @@ if __name__ == '__main__':
company_id = find_id_by_name(start_time,token,social_code)
else:
company_id = find_id_by_name(start_time,token,com_name)
if company_id == 'null':
log.info('=====搜索不到该企业====')
#todo:搜不到的企业没有信用代码 传输不过去 生成一个信用代码
baseCore.rePutIntoR('global100:baseinfo', social_code + ':搜索不到')
continue
if not company_id:
log.info(com_name + ":企业ID获取失败===重新放入redis")
list_weicha.append(com_name + ":企业ID获取失败")
baseCore.rePutIntoR('BaseInfoEnterprise:gnqy_socialCode',social_code)
baseCore.delete_token(token)
log.info('=====已重新放入redis,失效token已删除======')
time.sleep(20)
continue
else:
......@@ -367,27 +381,32 @@ if __name__ == '__main__':
except:
log.info(f'====={social_code}=====获取基本信息失败,重新放入redis=====')
baseCore.rePutIntoR('BaseInfoEnterprise:gnqy_social_code', social_code)
baseCore.delete_token(token)
log.info('=====已重新放入redis,失效token已删除======')
continue
if post_data_list:
pass
else:
log.info(f'======{social_code}====企查查token失效====')
time.sleep(20)
continue
for post_data in post_data_list:
list_all_info.append(post_data)
if post_data is None:
print(com_name + ":企业信息获取失败")
list_weicha.append(com_name + ":企业信息获取失败")
log.info(com_name + ":企业信息获取失败")
continue
get_name = post_data['name']
get_socialcode = post_data['socialCreditCode']
# todo:将信用代码或企业名称更新到表中
# updatesocialcode = f"update Global100 set SocialCode = '{get_socialcode}' where CompanyName = '{com_name}'"
# cursor_.execute(updatesocialcode)
# cnx_.commit()
name_compile = {
'yuan_name':com_name,
'get_name':get_name
}
name_list.append(name_compile)
nowtime = baseCore.getNowTime(1).replace('-', '_')[:10]
baseCore.writerToExcel(name_list, f'企业名称对比_{nowtime}.xlsx')
log.info(f'采集{com_name}成功=======耗时{baseCore.getTimeCost(start_time,time.time())}')
try:
producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'], api_version=(2, 0, 2))
......@@ -403,12 +422,6 @@ if __name__ == '__main__':
runType = 'BaseInfoRunCount'
count += 1
baseCore.updateRun(social_code, runType, count)
nowtime = baseCore.getNowTime(1).replace('-','_')[:10]
companyName = pd.DataFrame(name_list)
companyName.to_excel(f'./data/企业名称对比_{nowtime}.xlsx',index=False)
false_com = pd.DataFrame(list_weicha)
false_com.to_excel(f'./data/采集失败企业名单_{nowtime}.xlsx',index=False)
......
......@@ -46,15 +46,19 @@ def find_id_by_name(start,token,name):
KeyNo = False
log.info(f'=======您的账号访问超频,请升级小程序版本=====时间{baseCore.getTimeCost(start, time.time())}')
return KeyNo
if resp_dict['status']==40102:
KeyNo = False
log.info(f'=======无效的session=====时间{baseCore.getTimeCost(start, time.time())}')
return KeyNo
try:
if resp_dict['result']['Result']:
result_dict = resp_dict['result']['Result'][0]
KeyNo = result_dict['KeyNo']
Name = result_dict['Name'].replace('<em>', '').replace('</em>', '').strip()
if Name == '':
KeyNo = ''
KeyNo = 'null'
else:
KeyNo = ''
KeyNo = 'null'
except:
KeyNo = False
log.info(f'====token失效====时间{baseCore.getTimeCost(start,time.time())}')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论