提交 09af6b88 作者: 薛凌堃

采集高管不加代理ip

上级 5940b41f
...@@ -27,7 +27,7 @@ def doJob(): ...@@ -27,7 +27,7 @@ def doJob():
# 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息 # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
social_code = baseCore.redicPullData('CorPersonEnterprise:gnqy_socialCode') social_code = baseCore.redicPullData('CorPersonEnterprise:gnqy_socialCode')
# 判断 如果Redis中已经没有数据,则等待 # 判断 如果Redis中已经没有数据,则等待
# social_code = '91110108778635402E' # social_code = '9135020056842712XB'
if social_code == None: if social_code == None:
time.sleep(20) time.sleep(20)
continue continue
...@@ -48,10 +48,10 @@ def doJob(): ...@@ -48,10 +48,10 @@ def doJob():
retData = getTycIdByXYDM(xydm) retData = getTycIdByXYDM(xydm)
if retData['state']: if retData['state']:
tycid = retData['tycData']['id'] tycid = retData['tycData']['id']
# todo:写入数据库 # # todo:写入数据库
updateSql = f"update EnterpriseInfo set TYCID = '{tycid}' where SocialCode = '{xydm}'" # updateSql = f"update EnterpriseInfo set TYCID = '{tycid}' where SocialCode = '{xydm}'"
cursor_.execute(updateSql) # cursor_.execute(updateSql)
cnx_.commit() # cnx_.commit()
else: else:
state = 0 state = 0
takeTime = baseCore.getTimeCost(start, time.time()) takeTime = baseCore.getTimeCost(start, time.time())
...@@ -74,7 +74,8 @@ def doJob(): ...@@ -74,7 +74,8 @@ def doJob():
#https://capi.tianyancha.com/cloud-listed-company/listed/getHkNoRepeatSeniorExecutive?_=1692929256462&gid=209370942&pageSize=20&pageNum=1 #https://capi.tianyancha.com/cloud-listed-company/listed/getHkNoRepeatSeniorExecutive?_=1692929256462&gid=209370942&pageSize=20&pageNum=1
url = f'https://capi.tianyancha.com/cloud-listed-company/listed/noRepeatSeniorExecutive?_={t}&gid={tycid}&pageSize=20&pageNum={page}' url = f'https://capi.tianyancha.com/cloud-listed-company/listed/noRepeatSeniorExecutive?_={t}&gid={tycid}&pageSize=20&pageNum={page}'
ip = baseCore.get_proxy() ip = baseCore.get_proxy()
res = requests.get(url,headers=headers,proxies=ip) # ,verify=False # res = requests.get(url,headers=headers,proxies=ip) # ,verify=False
res = requests.get(url,headers=headers) # ,verify=False
time.sleep(1) time.sleep(1)
list_all = res.json()['data']['dataList'] list_all = res.json()['data']['dataList']
if list_all: if list_all:
...@@ -137,7 +138,8 @@ def doJob(): ...@@ -137,7 +138,8 @@ def doJob():
t = int(time.time() * 1000) t = int(time.time() * 1000)
url = f'https://capi.tianyancha.com/cloud-listed-company/listed/getHkNoRepeatSeniorExecutive?_={t}&gid={tycid}&pageSize=20&pageNum={page}' url = f'https://capi.tianyancha.com/cloud-listed-company/listed/getHkNoRepeatSeniorExecutive?_={t}&gid={tycid}&pageSize=20&pageNum={page}'
ip = baseCore.get_proxy() ip = baseCore.get_proxy()
res = requests.get(url, headers=headers, proxies=ip) # ,verify=False # res = requests.get(url, headers=headers, proxies=ip) # ,verify=False
res = requests.get(url, headers=headers) # ,verify=False
time.sleep(1) time.sleep(1)
list_all = res.json()['data']['dataList'] list_all = res.json()['data']['dataList']
if list_all: if list_all:
...@@ -173,7 +175,8 @@ def doJob(): ...@@ -173,7 +175,8 @@ def doJob():
t = int(time.time() * 1000) t = int(time.time() * 1000)
url = f'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_={t}&gid={tycid}&pageSize=20&pageNum={page}' url = f'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_={t}&gid={tycid}&pageSize=20&pageNum={page}'
ip = baseCore.get_proxy() ip = baseCore.get_proxy()
res = requests.get(url, headers=headers, proxies=ip) # ,verify=False # res = requests.get(url, headers=headers, proxies=ip) # ,verify=False
res = requests.get(url, headers=headers) # ,verify=False
time.sleep(1) time.sleep(1)
list_all = res.json()['data']['result'] list_all = res.json()['data']['result']
# todo:增加一种情况 # todo:增加一种情况
...@@ -211,7 +214,8 @@ def doJob(): ...@@ -211,7 +214,8 @@ def doJob():
currentTerm = '' currentTerm = ''
person_id = one_info['id'] person_id = one_info['id']
person_url = f'https://www.tianyancha.com/human/{person_id}-c{tycid}' person_url = f'https://www.tianyancha.com/human/{person_id}-c{tycid}'
person_res = requests.get(person_url, headers=headers, proxies=ip) # person_res = requests.get(person_url, headers=headers, proxies=ip)
person_res = requests.get(person_url, headers=headers)
person_soup = BeautifulSoup(person_res.content, 'html.parser') person_soup = BeautifulSoup(person_res.content, 'html.parser')
try: try:
personInfo = person_soup.find('span', {'class': '_56d0a'}).text.strip() personInfo = person_soup.find('span', {'class': '_56d0a'}).text.strip()
...@@ -265,6 +269,7 @@ def doJob(): ...@@ -265,6 +269,7 @@ def doJob():
log.info('=========成功======') log.info('=========成功======')
except Exception as e: except Exception as e:
log.info(f'==={social_code}=====企业核心人员采集失败===重新放入redis====') log.info(f'==={social_code}=====企业核心人员采集失败===重新放入redis====')
log.info(e)
# 重新塞入redis # 重新塞入redis
baseCore.rePutIntoR('CorPersonEnterprise:gnqy_socialCode', social_code) baseCore.rePutIntoR('CorPersonEnterprise:gnqy_socialCode', social_code)
state = 0 state = 0
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论