提交 49cbda1c 作者: XveLingKun

企业核心人员更新

上级 3092eef7
......@@ -70,7 +70,7 @@ def get_page(url, s, headers):
total_page_ = data_page['data']['total']
except:
raise
return total_page_
return total_page_, data_page
from selenium import webdriver
......@@ -198,6 +198,7 @@ def doJob():
log.info(f"{id}---{xydm}----{tycid}----开始采集核心人员")
list_one_info = []
num = 1
data_page = {}
try:
charge = get_html(tycid, driver, headers)
# 页面请求三次都失败
......@@ -224,7 +225,7 @@ def doJob():
log.info(f"{id}---{xydm}----{tycid}----没有最新公示")
url1 = f'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_={t}&gid={tycid}&pageSize=20&pageNum=1'
try:
total_page1 = get_page(url1, s, headers)
total_page1, data_page = get_page(url1, s, headers)
except:
total_page1 = 0
url = 'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_={}&gid={}&pageSize=20&pageNum={}'
......@@ -235,12 +236,12 @@ def doJob():
url2 = f'https://capi.tianyancha.com/cloud-listed-company/listed/noRepeatSeniorExecutive?_={t}&gid={tycid}&pageSize=20&pageNum=1'
url3 = f'https://capi.tianyancha.com/cloud-listed-company/listed/getHkNoRepeatSeniorExecutive?_={t}&gid={tycid}&pageSize=20&pageNum=1'
try:
total_page2 = get_page(url2, s, headers)
total_page2, data_page = get_page(url2, s, headers)
except:
total_page2 = 0
time.sleep(1)
try:
total_page3 = get_page(url3, s, headers)
total_page3, data_page = get_page(url3, s, headers)
except:
total_page3 = 0
if total_page2 == charge:
......@@ -274,6 +275,9 @@ def doJob():
else:
maxpage = int((total_page/20) + 1) +1
for page in range(1, maxpage):
if page == 1:
errorCode = data_page['errorCode']
else:
res = None
for c in range(3):
try:
......@@ -284,7 +288,9 @@ def doJob():
res = s.get(url_, headers=headers, proxies=ip) # ,verify=False
# res = requests.get(url_, headers=headers, verify=False) # ,verify=False
time.sleep(randint(2, 4))
if res.json()['errorCode'] != 0:
data_page = res.json()
errorCode = res.json()['errorCode']
if errorCode != 0:
continue
else:
break
......@@ -292,26 +298,26 @@ def doJob():
except:
continue
if res.json()['errorCode'] == 0:
if errorCode == 0:
pass
else:
# token.updateTokeen(id_cookie, 2)
# 重新塞入redis
# baseCore.rePutIntoR('UpdateCoreperson:SocialCode_CompanyName', item)
log.info(f'{id}---{xydm}----{tycid}--{res.json()}--高管信息请求失败')
log.info(f'{id}---{xydm}----{tycid}--{data_page}--高管信息请求失败')
continue
# todo:test测试
log.info(f'{id}---{xydm}----{tycid}----{res.json()}')
log.info(f'{id}---{xydm}----{tycid}----{data_page}')
try:
list_all = res.json()['data']['dataList']
list_all = data_page['data']['dataList']
except:
list_all = res.json()['data']['result']
list_all = data_page['data']['result']
if list_all:
pass
else:
log.info(f'{id}---{xydm}----{tycid}----没有高管信息')
# todo: 关闭连接
res.close()
# res.close()
if flag == 1:
for one_info in list_all:
name = one_info['name']
......@@ -349,22 +355,22 @@ def doJob():
"personInfo": personInfo,
"sort": str(num)
}
dic_json_img = {
"socialCreditCode": social_code,
"name": name,
"sex": sex,
"education": education,
"position": position,
"salary": Salary,
"birthYear": birthYear,
"shareNum": StockKeepings,
"shareRatio": '',
"benefitShare": '',
"currentTerm": currentTerm,
"personInfo": personInfo,
"头像": person_img,
"sort": str(num)
}
# dic_json_img = {
# "socialCreditCode": social_code,
# "name": name,
# "sex": sex,
# "education": education,
# "position": position,
# "salary": Salary,
# "birthYear": birthYear,
# "shareNum": StockKeepings,
# "shareRatio": '',
# "benefitShare": '',
# "currentTerm": currentTerm,
# "personInfo": personInfo,
# "头像": person_img,
# "sort": str(num)
# }
num = num + 1
list_one_info.append(dic_json)
# list_all_2.append(dic_json_img)
......@@ -441,22 +447,22 @@ def doJob():
"personInfo": personInfo,
"sort": str(num)
}
dic_json_img = {
"socialCreditCode": social_code,
"name": name,
"sex": '',
"education": '',
"position": position,
"salary": '',
"birthYear": '',
"shareNum": '',
"shareRatio": '',
"benefitShare": '',
"currentTerm": '',
"personInfo": personInfo,
"头像": person_img,
"sort": str(num)
}
# dic_json_img = {
# "socialCreditCode": social_code,
# "name": name,
# "sex": '',
# "education": '',
# "position": position,
# "salary": '',
# "birthYear": '',
# "shareNum": '',
# "shareRatio": '',
# "benefitShare": '',
# "currentTerm": '',
# "personInfo": personInfo,
# "头像": person_img,
# "sort": str(num)
# }
num = num + 1
list_one_info.append(dic_json)
# print(list_one_info)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论