提交 1f595f59 作者: 薛凌堃

天眼查脚本维护

上级 472a45d5
......@@ -47,11 +47,11 @@ cnx_ = baseCore.cnx
cursor_ = baseCore.cursor
taskType = '天眼查企业id/天眼查'
#根据信用代码获取天眼查id 企业名字等信息
def getTycIdByXYDM(xydm):
def getTycIdByXYDM(com_name):
retData={'state':False,'tycData':None,'reput':True}
url=f"https://capi.tianyancha.com/cloud-tempest/search/suggest/v3?_={baseCore.getNowTime(3)}"
ip = baseCore.get_proxy()
paramJsonData = {'keyword':xydm}
paramJsonData = {'keyword':com_name}
try:
# headers['User-Agent'] = baseCore.getRandomUserAgent()
# headers['X-AUTH-TOKEN'] = baseCore.GetTYCToken()
......@@ -62,21 +62,21 @@ def getTycIdByXYDM(xydm):
if retJsonData['data'] and retJsonData['state']== 'ok':
pass
else:
log.error(f"---{xydm}-未查询到该企业---")
log.error(f"---{com_name}-未查询到该企业---")
retData['reput'] = False
return retData
matchType=retJsonData['data'][0]['matchType']
if matchType=='信用代码匹配':
if matchType =='公司名称匹配':
retData['state'] = True
retData['tycData'] = retJsonData['data'][0]
response.close()
return retData
else:
log.error(f"{xydm}------{retJsonData}")
log.error(f"{com_name}------{retJsonData}")
response.close()
return retData
except Exception as e:
log.error(f"---{xydm}--{e}---")
log.error(f"---{com_name}--{e}---")
return retData
......
......@@ -52,7 +52,7 @@ headers = {
cnx_ = baseCore.cnx
cursor_ = baseCore.cursor
taskType = '企业动态/天眼查/补采专精特新'
taskType = '企业动态/天眼查/'
def reqDetailmsg(url,headers):
......@@ -81,14 +81,14 @@ def beinWork(tyc_code, social_code,start_time):
for m in range(0,3):
ip = baseCore.get_proxy()
headers['User-Agent'] = baseCore.getRandomUserAgent()
response = requests.get(url=url, headers=headers, proxies=ip, verify=False)
response = requests.get(url=url, headers=headers, verify=False)
time.sleep(random.randint(3, 5))
break
if (response.status_code == 200):
pass
except Exception as e:
#todo:重新放入redis中
baseCore.rePutIntoR('NoticeEnterprise:gnqy_socialCode',social_code)
baseCore.rePutIntoR('NewsResend:newsInfo',social_code)
log.error(f"{tyc_code}-----获取总数接口失败")
error = '获取总数接口失败'
state = 0
......@@ -125,7 +125,7 @@ def beinWork(tyc_code, social_code,start_time):
up_okCount = 0
up_errorCount = 0
up_repetCount = 0
for num in range(1, totalPage + 1):
for num in range(1, 10):
time.sleep(3)
log.info(f"获取分页数据--{tyc_code}----分页{num}----开始")
start_page = time.time()
......@@ -134,7 +134,7 @@ def beinWork(tyc_code, social_code,start_time):
try:
ip = baseCore.get_proxy()
headers['User-Agent'] = baseCore.getRandomUserAgent()
response_page = requests.get(url=url_page, headers=headers, proxies=ip, verify=False)
response_page = requests.get(url=url_page, headers=headers, verify=False)
# time.sleep(3)
break
except:
......@@ -168,24 +168,15 @@ def beinWork(tyc_code, social_code,start_time):
source = info_page['website']
link = info_page['uri']
try:
sel_sql = '''select social_credit_code from brpa_source_article_news where source_address = %s and social_credit_code=%s and type='2' '''
cursor_.execute(sel_sql, (link, social_code))
except Exception as e:
print(e)
selects = cursor_.fetchone()
if selects:
log.info(f'{tyc_code}-----{social_code}----{link}:已经存在')
# todo:如果该条数据存在则说明该条数据之后的都已经采集完成,就可以跳出函数,执行下一个企业
retData['up_okCount'] = up_okCount
retData['up_errorCount'] = up_errorCount
retData['up_repetCount'] = up_repetCount
# return retData
continue
try:
time_struct = time.localtime(int(info_page['rtm'] / 1000)) # 首先把时间戳转换为结构化时间
time_format = time.strftime("%Y-%m-%d %H:%M:%S", time_struct) # 把结构化时间转换为格式化时间
if time_format > '2023-12-10 00:00':
pass
else:
retData['up_okCount'] = up_okCount
retData['up_errorCount'] = up_errorCount
retData['up_repetCount'] = up_repetCount
return retData
except:
time_format = baseCore.getNowTime(1)
try:
......@@ -303,8 +294,8 @@ def beinWork(tyc_code, social_code,start_time):
def doJob():
while True:
# 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
social_code = baseCore.redicPullData('NewsEnterprise:gnqybc_socialCode')
# social_code = '912301001275921118'
social_code = baseCore.redicPullData('NewsResend:newsInfo')
# social_code = '91320000733334390E'
# 判断 如果Redis中已经没有数据,则等待
if social_code == None:
time.sleep(20)
......@@ -316,14 +307,15 @@ def doJob():
pass
else:
#数据重新塞入redis
baseCore.rePutIntoR('NewsEnterprise:gnqybc_socialCode',social_code)
baseCore.rePutIntoR('NewsResend:newsInfo',social_code)
continue
id = data[0]
com_name= data[1]
xydm = data[2]
tycid = data[11]
if tycid == None or tycid == '':
try:
retData = getTycIdByXYDM(xydm)
retData = getTycIdByXYDM(com_name)
if retData['tycData'] and retData['reput']:
tycid = retData['tycData']['id']
# todo:写入数据库
......@@ -335,7 +327,7 @@ def doJob():
takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
log.info(f'======={social_code}====重新放入redis====')
baseCore.rePutIntoR('NewsEnterprise:gnqybc_socialCode', social_code)
baseCore.rePutIntoR('NewsResend:newsInfo', social_code)
continue
elif not retData['reput'] and not retData['tycData']:
continue
......@@ -343,7 +335,7 @@ def doJob():
state = 0
takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
baseCore.rePutIntoR('NewsEnterprise:gnqybc_socialCode', social_code)
baseCore.rePutIntoR('NewsResend:newsInfo', social_code)
continue
count = data[17]
log.info(f"{id}---{xydm}----{tycid}----开始处理")
......@@ -363,7 +355,7 @@ def doJob():
except Exception as e:
log.info(f'==={social_code}=====获取企业信息失败====')
#重新塞入redis
baseCore.rePutIntoR('NewsEnterprise:gnqybc_socialCode',social_code)
baseCore.rePutIntoR('NewsResend:newsInfo',social_code)
state = 0
takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, '', f'获取企业信息失败--{e}')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论