提交 09af6b88 作者: 薛凌堃

采集高管不加代理ip

上级 5940b41f
......@@ -27,7 +27,7 @@ def doJob():
# 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
social_code = baseCore.redicPullData('CorPersonEnterprise:gnqy_socialCode')
# 判断 如果Redis中已经没有数据,则等待
# social_code = '91110108778635402E'
# social_code = '9135020056842712XB'
if social_code == None:
time.sleep(20)
continue
......@@ -48,10 +48,10 @@ def doJob():
retData = getTycIdByXYDM(xydm)
if retData['state']:
tycid = retData['tycData']['id']
# todo:写入数据库
updateSql = f"update EnterpriseInfo set TYCID = '{tycid}' where SocialCode = '{xydm}'"
cursor_.execute(updateSql)
cnx_.commit()
# # todo:写入数据库
# updateSql = f"update EnterpriseInfo set TYCID = '{tycid}' where SocialCode = '{xydm}'"
# cursor_.execute(updateSql)
# cnx_.commit()
else:
state = 0
takeTime = baseCore.getTimeCost(start, time.time())
......@@ -74,7 +74,8 @@ def doJob():
#https://capi.tianyancha.com/cloud-listed-company/listed/getHkNoRepeatSeniorExecutive?_=1692929256462&gid=209370942&pageSize=20&pageNum=1
url = f'https://capi.tianyancha.com/cloud-listed-company/listed/noRepeatSeniorExecutive?_={t}&gid={tycid}&pageSize=20&pageNum={page}'
ip = baseCore.get_proxy()
res = requests.get(url,headers=headers,proxies=ip) # ,verify=False
# res = requests.get(url,headers=headers,proxies=ip) # ,verify=False
res = requests.get(url,headers=headers) # ,verify=False
time.sleep(1)
list_all = res.json()['data']['dataList']
if list_all:
......@@ -137,7 +138,8 @@ def doJob():
t = int(time.time() * 1000)
url = f'https://capi.tianyancha.com/cloud-listed-company/listed/getHkNoRepeatSeniorExecutive?_={t}&gid={tycid}&pageSize=20&pageNum={page}'
ip = baseCore.get_proxy()
res = requests.get(url, headers=headers, proxies=ip) # ,verify=False
# res = requests.get(url, headers=headers, proxies=ip) # ,verify=False
res = requests.get(url, headers=headers) # ,verify=False
time.sleep(1)
list_all = res.json()['data']['dataList']
if list_all:
......@@ -173,7 +175,8 @@ def doJob():
t = int(time.time() * 1000)
url = f'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_={t}&gid={tycid}&pageSize=20&pageNum={page}'
ip = baseCore.get_proxy()
res = requests.get(url, headers=headers, proxies=ip) # ,verify=False
# res = requests.get(url, headers=headers, proxies=ip) # ,verify=False
res = requests.get(url, headers=headers) # ,verify=False
time.sleep(1)
list_all = res.json()['data']['result']
# todo:增加一种情况
......@@ -211,7 +214,8 @@ def doJob():
currentTerm = ''
person_id = one_info['id']
person_url = f'https://www.tianyancha.com/human/{person_id}-c{tycid}'
person_res = requests.get(person_url, headers=headers, proxies=ip)
# person_res = requests.get(person_url, headers=headers, proxies=ip)
person_res = requests.get(person_url, headers=headers)
person_soup = BeautifulSoup(person_res.content, 'html.parser')
try:
personInfo = person_soup.find('span', {'class': '_56d0a'}).text.strip()
......@@ -265,6 +269,7 @@ def doJob():
log.info('=========成功======')
except Exception as e:
log.info(f'==={social_code}=====企业核心人员采集失败===重新放入redis====')
log.info(e)
# 重新塞入redis
baseCore.rePutIntoR('CorPersonEnterprise:gnqy_socialCode', social_code)
state = 0
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论