提交 1f595f59 作者: 薛凌堃

天眼查脚本维护

上级 472a45d5
...@@ -47,11 +47,11 @@ cnx_ = baseCore.cnx ...@@ -47,11 +47,11 @@ cnx_ = baseCore.cnx
cursor_ = baseCore.cursor cursor_ = baseCore.cursor
taskType = '天眼查企业id/天眼查' taskType = '天眼查企业id/天眼查'
#根据信用代码获取天眼查id 企业名字等信息 #根据信用代码获取天眼查id 企业名字等信息
def getTycIdByXYDM(xydm): def getTycIdByXYDM(com_name):
retData={'state':False,'tycData':None,'reput':True} retData={'state':False,'tycData':None,'reput':True}
url=f"https://capi.tianyancha.com/cloud-tempest/search/suggest/v3?_={baseCore.getNowTime(3)}" url=f"https://capi.tianyancha.com/cloud-tempest/search/suggest/v3?_={baseCore.getNowTime(3)}"
ip = baseCore.get_proxy() ip = baseCore.get_proxy()
paramJsonData = {'keyword':xydm} paramJsonData = {'keyword':com_name}
try: try:
# headers['User-Agent'] = baseCore.getRandomUserAgent() # headers['User-Agent'] = baseCore.getRandomUserAgent()
# headers['X-AUTH-TOKEN'] = baseCore.GetTYCToken() # headers['X-AUTH-TOKEN'] = baseCore.GetTYCToken()
...@@ -62,21 +62,21 @@ def getTycIdByXYDM(xydm): ...@@ -62,21 +62,21 @@ def getTycIdByXYDM(xydm):
if retJsonData['data'] and retJsonData['state']== 'ok': if retJsonData['data'] and retJsonData['state']== 'ok':
pass pass
else: else:
log.error(f"---{xydm}-未查询到该企业---") log.error(f"---{com_name}-未查询到该企业---")
retData['reput'] = False retData['reput'] = False
return retData return retData
matchType=retJsonData['data'][0]['matchType'] matchType=retJsonData['data'][0]['matchType']
if matchType=='信用代码匹配': if matchType =='公司名称匹配':
retData['state'] = True retData['state'] = True
retData['tycData'] = retJsonData['data'][0] retData['tycData'] = retJsonData['data'][0]
response.close() response.close()
return retData return retData
else: else:
log.error(f"{xydm}------{retJsonData}") log.error(f"{com_name}------{retJsonData}")
response.close() response.close()
return retData return retData
except Exception as e: except Exception as e:
log.error(f"---{xydm}--{e}---") log.error(f"---{com_name}--{e}---")
return retData return retData
......
...@@ -52,7 +52,7 @@ headers = { ...@@ -52,7 +52,7 @@ headers = {
cnx_ = baseCore.cnx cnx_ = baseCore.cnx
cursor_ = baseCore.cursor cursor_ = baseCore.cursor
taskType = '企业动态/天眼查/补采专精特新' taskType = '企业动态/天眼查/'
def reqDetailmsg(url,headers): def reqDetailmsg(url,headers):
...@@ -81,14 +81,14 @@ def beinWork(tyc_code, social_code,start_time): ...@@ -81,14 +81,14 @@ def beinWork(tyc_code, social_code,start_time):
for m in range(0,3): for m in range(0,3):
ip = baseCore.get_proxy() ip = baseCore.get_proxy()
headers['User-Agent'] = baseCore.getRandomUserAgent() headers['User-Agent'] = baseCore.getRandomUserAgent()
response = requests.get(url=url, headers=headers, proxies=ip, verify=False) response = requests.get(url=url, headers=headers, verify=False)
time.sleep(random.randint(3, 5)) time.sleep(random.randint(3, 5))
break break
if (response.status_code == 200): if (response.status_code == 200):
pass pass
except Exception as e: except Exception as e:
#todo:重新放入redis中 #todo:重新放入redis中
baseCore.rePutIntoR('NoticeEnterprise:gnqy_socialCode',social_code) baseCore.rePutIntoR('NewsResend:newsInfo',social_code)
log.error(f"{tyc_code}-----获取总数接口失败") log.error(f"{tyc_code}-----获取总数接口失败")
error = '获取总数接口失败' error = '获取总数接口失败'
state = 0 state = 0
...@@ -125,7 +125,7 @@ def beinWork(tyc_code, social_code,start_time): ...@@ -125,7 +125,7 @@ def beinWork(tyc_code, social_code,start_time):
up_okCount = 0 up_okCount = 0
up_errorCount = 0 up_errorCount = 0
up_repetCount = 0 up_repetCount = 0
for num in range(1, totalPage + 1): for num in range(1, 10):
time.sleep(3) time.sleep(3)
log.info(f"获取分页数据--{tyc_code}----分页{num}----开始") log.info(f"获取分页数据--{tyc_code}----分页{num}----开始")
start_page = time.time() start_page = time.time()
...@@ -134,7 +134,7 @@ def beinWork(tyc_code, social_code,start_time): ...@@ -134,7 +134,7 @@ def beinWork(tyc_code, social_code,start_time):
try: try:
ip = baseCore.get_proxy() ip = baseCore.get_proxy()
headers['User-Agent'] = baseCore.getRandomUserAgent() headers['User-Agent'] = baseCore.getRandomUserAgent()
response_page = requests.get(url=url_page, headers=headers, proxies=ip, verify=False) response_page = requests.get(url=url_page, headers=headers, verify=False)
# time.sleep(3) # time.sleep(3)
break break
except: except:
...@@ -168,24 +168,15 @@ def beinWork(tyc_code, social_code,start_time): ...@@ -168,24 +168,15 @@ def beinWork(tyc_code, social_code,start_time):
source = info_page['website'] source = info_page['website']
link = info_page['uri'] link = info_page['uri']
try: try:
sel_sql = '''select social_credit_code from brpa_source_article_news where source_address = %s and social_credit_code=%s and type='2' ''' time_struct = time.localtime(int(info_page['rtm'] / 1000)) # 首先把时间戳转换为结构化时间
cursor_.execute(sel_sql, (link, social_code)) time_format = time.strftime("%Y-%m-%d %H:%M:%S", time_struct) # 把结构化时间转换为格式化时间
except Exception as e: if time_format > '2023-12-10 00:00':
print(e) pass
selects = cursor_.fetchone() else:
if selects:
log.info(f'{tyc_code}-----{social_code}----{link}:已经存在')
# todo:如果该条数据存在则说明该条数据之后的都已经采集完成,就可以跳出函数,执行下一个企业
retData['up_okCount'] = up_okCount retData['up_okCount'] = up_okCount
retData['up_errorCount'] = up_errorCount retData['up_errorCount'] = up_errorCount
retData['up_repetCount'] = up_repetCount retData['up_repetCount'] = up_repetCount
# return retData return retData
continue
try:
time_struct = time.localtime(int(info_page['rtm'] / 1000)) # 首先把时间戳转换为结构化时间
time_format = time.strftime("%Y-%m-%d %H:%M:%S", time_struct) # 把结构化时间转换为格式化时间
except: except:
time_format = baseCore.getNowTime(1) time_format = baseCore.getNowTime(1)
try: try:
...@@ -303,8 +294,8 @@ def beinWork(tyc_code, social_code,start_time): ...@@ -303,8 +294,8 @@ def beinWork(tyc_code, social_code,start_time):
def doJob(): def doJob():
while True: while True:
# 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息 # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
social_code = baseCore.redicPullData('NewsEnterprise:gnqybc_socialCode') social_code = baseCore.redicPullData('NewsResend:newsInfo')
# social_code = '912301001275921118' # social_code = '91320000733334390E'
# 判断 如果Redis中已经没有数据,则等待 # 判断 如果Redis中已经没有数据,则等待
if social_code == None: if social_code == None:
time.sleep(20) time.sleep(20)
...@@ -316,14 +307,15 @@ def doJob(): ...@@ -316,14 +307,15 @@ def doJob():
pass pass
else: else:
#数据重新塞入redis #数据重新塞入redis
baseCore.rePutIntoR('NewsEnterprise:gnqybc_socialCode',social_code) baseCore.rePutIntoR('NewsResend:newsInfo',social_code)
continue continue
id = data[0] id = data[0]
com_name= data[1]
xydm = data[2] xydm = data[2]
tycid = data[11] tycid = data[11]
if tycid == None or tycid == '': if tycid == None or tycid == '':
try: try:
retData = getTycIdByXYDM(xydm) retData = getTycIdByXYDM(com_name)
if retData['tycData'] and retData['reput']: if retData['tycData'] and retData['reput']:
tycid = retData['tycData']['id'] tycid = retData['tycData']['id']
# todo:写入数据库 # todo:写入数据库
...@@ -335,7 +327,7 @@ def doJob(): ...@@ -335,7 +327,7 @@ def doJob():
takeTime = baseCore.getTimeCost(start, time.time()) takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败') baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
log.info(f'======={social_code}====重新放入redis====') log.info(f'======={social_code}====重新放入redis====')
baseCore.rePutIntoR('NewsEnterprise:gnqybc_socialCode', social_code) baseCore.rePutIntoR('NewsResend:newsInfo', social_code)
continue continue
elif not retData['reput'] and not retData['tycData']: elif not retData['reput'] and not retData['tycData']:
continue continue
...@@ -343,7 +335,7 @@ def doJob(): ...@@ -343,7 +335,7 @@ def doJob():
state = 0 state = 0
takeTime = baseCore.getTimeCost(start, time.time()) takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败') baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
baseCore.rePutIntoR('NewsEnterprise:gnqybc_socialCode', social_code) baseCore.rePutIntoR('NewsResend:newsInfo', social_code)
continue continue
count = data[17] count = data[17]
log.info(f"{id}---{xydm}----{tycid}----开始处理") log.info(f"{id}---{xydm}----{tycid}----开始处理")
...@@ -363,7 +355,7 @@ def doJob(): ...@@ -363,7 +355,7 @@ def doJob():
except Exception as e: except Exception as e:
log.info(f'==={social_code}=====获取企业信息失败====') log.info(f'==={social_code}=====获取企业信息失败====')
#重新塞入redis #重新塞入redis
baseCore.rePutIntoR('NewsEnterprise:gnqybc_socialCode',social_code) baseCore.rePutIntoR('NewsResend:newsInfo',social_code)
state = 0 state = 0
takeTime = baseCore.getTimeCost(start, time.time()) takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, '', f'获取企业信息失败--{e}') baseCore.recordLog(social_code, taskType, state, takeTime, '', f'获取企业信息失败--{e}')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论