提交 49cbda1c 作者: XveLingKun

企业核心人员更新

上级 3092eef7
...@@ -70,7 +70,7 @@ def get_page(url, s, headers): ...@@ -70,7 +70,7 @@ def get_page(url, s, headers):
total_page_ = data_page['data']['total'] total_page_ = data_page['data']['total']
except: except:
raise raise
return total_page_ return total_page_, data_page
from selenium import webdriver from selenium import webdriver
...@@ -198,6 +198,7 @@ def doJob(): ...@@ -198,6 +198,7 @@ def doJob():
log.info(f"{id}---{xydm}----{tycid}----开始采集核心人员") log.info(f"{id}---{xydm}----{tycid}----开始采集核心人员")
list_one_info = [] list_one_info = []
num = 1 num = 1
data_page = {}
try: try:
charge = get_html(tycid, driver, headers) charge = get_html(tycid, driver, headers)
# 页面请求三次都失败 # 页面请求三次都失败
...@@ -224,7 +225,7 @@ def doJob(): ...@@ -224,7 +225,7 @@ def doJob():
log.info(f"{id}---{xydm}----{tycid}----没有最新公示") log.info(f"{id}---{xydm}----{tycid}----没有最新公示")
url1 = f'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_={t}&gid={tycid}&pageSize=20&pageNum=1' url1 = f'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_={t}&gid={tycid}&pageSize=20&pageNum=1'
try: try:
total_page1 = get_page(url1, s, headers) total_page1, data_page = get_page(url1, s, headers)
except: except:
total_page1 = 0 total_page1 = 0
url = 'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_={}&gid={}&pageSize=20&pageNum={}' url = 'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_={}&gid={}&pageSize=20&pageNum={}'
...@@ -235,12 +236,12 @@ def doJob(): ...@@ -235,12 +236,12 @@ def doJob():
url2 = f'https://capi.tianyancha.com/cloud-listed-company/listed/noRepeatSeniorExecutive?_={t}&gid={tycid}&pageSize=20&pageNum=1' url2 = f'https://capi.tianyancha.com/cloud-listed-company/listed/noRepeatSeniorExecutive?_={t}&gid={tycid}&pageSize=20&pageNum=1'
url3 = f'https://capi.tianyancha.com/cloud-listed-company/listed/getHkNoRepeatSeniorExecutive?_={t}&gid={tycid}&pageSize=20&pageNum=1' url3 = f'https://capi.tianyancha.com/cloud-listed-company/listed/getHkNoRepeatSeniorExecutive?_={t}&gid={tycid}&pageSize=20&pageNum=1'
try: try:
total_page2 = get_page(url2, s, headers) total_page2, data_page = get_page(url2, s, headers)
except: except:
total_page2 = 0 total_page2 = 0
time.sleep(1) time.sleep(1)
try: try:
total_page3 = get_page(url3, s, headers) total_page3, data_page = get_page(url3, s, headers)
except: except:
total_page3 = 0 total_page3 = 0
if total_page2 == charge: if total_page2 == charge:
...@@ -274,44 +275,49 @@ def doJob(): ...@@ -274,44 +275,49 @@ def doJob():
else: else:
maxpage = int((total_page/20) + 1) +1 maxpage = int((total_page/20) + 1) +1
for page in range(1, maxpage): for page in range(1, maxpage):
res = None if page == 1:
for c in range(3): errorCode = data_page['errorCode']
try: else:
for d in range(3): res = None
ip = baseCore.get_proxy() for c in range(3):
url_ = url.format(t, tycid, page) try:
# url_ = 'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_=1706765329671&gid=8715844&pageSize=20&pageNum=1' for d in range(3):
res = s.get(url_, headers=headers, proxies=ip) # ,verify=False ip = baseCore.get_proxy()
# res = requests.get(url_, headers=headers, verify=False) # ,verify=False url_ = url.format(t, tycid, page)
time.sleep(randint(2, 4)) # url_ = 'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_=1706765329671&gid=8715844&pageSize=20&pageNum=1'
if res.json()['errorCode'] != 0: res = s.get(url_, headers=headers, proxies=ip) # ,verify=False
continue # res = requests.get(url_, headers=headers, verify=False) # ,verify=False
else: time.sleep(randint(2, 4))
break data_page = res.json()
break errorCode = res.json()['errorCode']
except: if errorCode != 0:
continue continue
else:
break
break
except:
continue
if res.json()['errorCode'] == 0: if errorCode == 0:
pass pass
else: else:
# token.updateTokeen(id_cookie, 2) # token.updateTokeen(id_cookie, 2)
# 重新塞入redis # 重新塞入redis
# baseCore.rePutIntoR('UpdateCoreperson:SocialCode_CompanyName', item) # baseCore.rePutIntoR('UpdateCoreperson:SocialCode_CompanyName', item)
log.info(f'{id}---{xydm}----{tycid}--{res.json()}--高管信息请求失败') log.info(f'{id}---{xydm}----{tycid}--{data_page}--高管信息请求失败')
continue continue
# todo:test测试 # todo:test测试
log.info(f'{id}---{xydm}----{tycid}----{res.json()}') log.info(f'{id}---{xydm}----{tycid}----{data_page}')
try: try:
list_all = res.json()['data']['dataList'] list_all = data_page['data']['dataList']
except: except:
list_all = res.json()['data']['result'] list_all = data_page['data']['result']
if list_all: if list_all:
pass pass
else: else:
log.info(f'{id}---{xydm}----{tycid}----没有高管信息') log.info(f'{id}---{xydm}----{tycid}----没有高管信息')
# todo: 关闭连接 # todo: 关闭连接
res.close() # res.close()
if flag == 1: if flag == 1:
for one_info in list_all: for one_info in list_all:
name = one_info['name'] name = one_info['name']
...@@ -349,22 +355,22 @@ def doJob(): ...@@ -349,22 +355,22 @@ def doJob():
"personInfo": personInfo, "personInfo": personInfo,
"sort": str(num) "sort": str(num)
} }
dic_json_img = { # dic_json_img = {
"socialCreditCode": social_code, # "socialCreditCode": social_code,
"name": name, # "name": name,
"sex": sex, # "sex": sex,
"education": education, # "education": education,
"position": position, # "position": position,
"salary": Salary, # "salary": Salary,
"birthYear": birthYear, # "birthYear": birthYear,
"shareNum": StockKeepings, # "shareNum": StockKeepings,
"shareRatio": '', # "shareRatio": '',
"benefitShare": '', # "benefitShare": '',
"currentTerm": currentTerm, # "currentTerm": currentTerm,
"personInfo": personInfo, # "personInfo": personInfo,
"头像": person_img, # "头像": person_img,
"sort": str(num) # "sort": str(num)
} # }
num = num + 1 num = num + 1
list_one_info.append(dic_json) list_one_info.append(dic_json)
# list_all_2.append(dic_json_img) # list_all_2.append(dic_json_img)
...@@ -441,22 +447,22 @@ def doJob(): ...@@ -441,22 +447,22 @@ def doJob():
"personInfo": personInfo, "personInfo": personInfo,
"sort": str(num) "sort": str(num)
} }
dic_json_img = { # dic_json_img = {
"socialCreditCode": social_code, # "socialCreditCode": social_code,
"name": name, # "name": name,
"sex": '', # "sex": '',
"education": '', # "education": '',
"position": position, # "position": position,
"salary": '', # "salary": '',
"birthYear": '', # "birthYear": '',
"shareNum": '', # "shareNum": '',
"shareRatio": '', # "shareRatio": '',
"benefitShare": '', # "benefitShare": '',
"currentTerm": '', # "currentTerm": '',
"personInfo": personInfo, # "personInfo": personInfo,
"头像": person_img, # "头像": person_img,
"sort": str(num) # "sort": str(num)
} # }
num = num + 1 num = num + 1
list_one_info.append(dic_json) list_one_info.append(dic_json)
# print(list_one_info) # print(list_one_info)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论