提交 5e97ace2 作者: 薛凌堃

核心人员bug处理

上级 253a2372
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
""" """
import datetime import datetime
import json import json
from random import randint
import requests, time import requests, time
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
...@@ -30,16 +31,14 @@ from classtool import Token, File, Tag ...@@ -30,16 +31,14 @@ from classtool import Token, File, Tag
token = Token() token = Token()
@retry(tries=3, delay=1) @retry(tries=3, delay=1)
def get_html(tycid, s, headers): def get_html(tycid, driver, headers):
url = f"https://www.tianyancha.com/company/{tycid}" url = f"https://www.tianyancha.com/company/{tycid}"
# ip = baseCore.get_proxy() # ip = baseCore.get_proxy()
response = s.get(url=url, headers=headers) #, proxies=ip) driver.get(url=url) #, proxies=ip)
if response.status_code == 200: time.sleep(5)
pass page_source = driver.page_source
else:
raise
# return -1 # return -1
soup = BeautifulSoup(response.content, 'html.parser') soup = BeautifulSoup(page_source, 'html.parser')
try: try:
div_part = soup.find('div', attrs={'data-dim': 'staff'}) div_part = soup.find('div', attrs={'data-dim': 'staff'})
# div_part.find('div', class_='dimHeader_root__XTCLe') # div_part.find('div', class_='dimHeader_root__XTCLe')
...@@ -74,8 +73,49 @@ def get_page(url, s, headers): ...@@ -74,8 +73,49 @@ def get_page(url, s, headers):
return total_page_ return total_page_
from selenium import webdriver
def create_driver():
path = r'D:\soft\msedgedriver.exe'
# options = webdriver.EdgeOptions()
options = {
"browserName": "MicrosoftEdge",
"ms:edgeOptions": {
"extensions": [], "args": ["--start-maximized"] # 添加最大化窗口运作参数
}
}
session = webdriver.Edge(executable_path=path, capabilities=options)
return session
def login(driver):
cookies = {}
cookies_list, id_cookie, user_name = token.get_cookies()
if cookies_list:
pass
else:
log.info("没有账号了,等待30分钟")
time.sleep(30*60)
return '', '', ''
log.info(f'=====当前使用的是{user_name}的cookie======')
for cookie in cookies_list:
driver.add_cookie(cookie)
time.sleep(3)
driver.refresh()
time.sleep(5)
for cookie in cookies_list:
cookies[cookie['name']] = cookie['value']
s = requests.Session()
s.cookies.update(cookies)
return driver, id_cookie,s
def doJob(): def doJob():
# for social_code in social_code_list: # for social_code in social_code_list:
driver = create_driver()
url = 'https://www.tianyancha.com/'
driver.get(url)
driver.maximize_window()
while True: while True:
# todo:设置cookies的使用 # todo:设置cookies的使用
headers = { headers = {
...@@ -87,21 +127,15 @@ def doJob(): ...@@ -87,21 +127,15 @@ def doJob():
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'version': 'TYC-Web' 'version': 'TYC-Web'
} }
cookies_list, id_cookie, user_name = token.get_cookies() driver, id_cookie, s = login(driver)
if cookies_list: if id_cookie:
pass pass
else: else:
log.info("没有账号了,等待30分钟")
time.sleep(30*60)
continue continue
log.info(f'=====当前使用的是{user_name}的cookie======')
cookies = {}
for cookie in cookies_list:
cookies[cookie['name']] = cookie['value']
s = requests.Session()
s.cookies.update(cookies)
# 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息 # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
item = baseCore.redicPullData('UpdateCoreperson:SocialCode_CompanyName') # item = baseCore.redicPullData('UpdateCoreperson:SocialCode_CompanyName')
item = '913600007969593637|江西国泰集团股份有限公司'
# 判断 如果Redis中已经没有数据,则等待 # 判断 如果Redis中已经没有数据,则等待
# social_code = '91110108780992804C' # social_code = '91110108780992804C'
if item == None: if item == None:
...@@ -165,7 +199,7 @@ def doJob(): ...@@ -165,7 +199,7 @@ def doJob():
list_one_info = [] list_one_info = []
num = 1 num = 1
try: try:
charge = get_html(tycid, s, headers) charge = get_html(tycid, driver, headers)
# 页面请求三次都失败 # 页面请求三次都失败
except: except:
charge = -1 charge = -1
...@@ -180,7 +214,10 @@ def doJob(): ...@@ -180,7 +214,10 @@ def doJob():
continue continue
elif charge == -2: elif charge == -2:
# 该企业没有人员信息 # 该企业没有人员信息
log.info(f"{id}---{xydm}----{tycid}----没有核心人员") token.updateTokeen(id_cookie, 2)
baseCore.rePutIntoR('UpdateCoreperson:SocialCode_CompanyName', item)
log.info(f"{id}---{xydm}----{tycid}----没有核心人员或需要滑动验证----重新放入redis")
# log.info(f"{id}---{xydm}----{tycid}----没有核心人员")
continue continue
elif charge == 0: elif charge == 0:
...@@ -222,7 +259,7 @@ def doJob(): ...@@ -222,7 +259,7 @@ def doJob():
log.info(f'{id}---{xydm}----{tycid}----页面和接口数据不对应---{charge}---{total_page2}---{total_page3}') log.info(f'{id}---{xydm}----{tycid}----页面和接口数据不对应---{charge}---{total_page2}---{total_page3}')
continue continue
if total_page == 0: if total_page == 0:
token.updateTokeen(id_cookie, 2) # token.updateTokeen(id_cookie, 2)
# 重新塞入redis # 重新塞入redis
baseCore.rePutIntoR('UpdateCoreperson:SocialCode_CompanyName', item) baseCore.rePutIntoR('UpdateCoreperson:SocialCode_CompanyName', item)
log.info(f'==={social_code}=====总数请求失败===重新放入redis====') log.info(f'==={social_code}=====总数请求失败===重新放入redis====')
...@@ -232,27 +269,36 @@ def doJob(): ...@@ -232,27 +269,36 @@ def doJob():
# flag = 2 # flag = 2
# todo: 测试程序是否执行到这一步 # todo: 测试程序是否执行到这一步
log.info(f'总数为{total_page}') log.info(f'总数为{total_page}')
for page in range(1, int((total_page / 20) + 1) + 1): if int(total_page % 20) == 0:
maxpage = int((total_page / 20) + 1)
else:
maxpage = int((total_page/20) + 1) +1
for page in range(1, maxpage):
res = None res = None
for c in range(3): for c in range(3):
ip = baseCore.get_proxy() try:
url_ = url.format(t, tycid, page) for d in range(3):
# url_ = 'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_=1706765329671&gid=8715844&pageSize=20&pageNum=1' ip = baseCore.get_proxy()
res = requests.get(url_, headers=headers, proxies=ip, verify=False) # ,verify=False url_ = url.format(t, tycid, page)
time.sleep(1) # url_ = 'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_=1706765329671&gid=8715844&pageSize=20&pageNum=1'
if res.status_code == 200: res = s.get(url_, headers=headers, proxies=ip) # ,verify=False
# res = requests.get(url_, headers=headers, verify=False) # ,verify=False
time.sleep(randint(2, 4))
if res.json()['errorCode'] != 0:
continue
else:
break
break break
else: except:
if c == 2:
break
continue continue
if res:
if res.json()['errorCode'] == 0:
pass pass
else: else:
token.updateTokeen(id_cookie, 2) # token.updateTokeen(id_cookie, 2)
# 重新塞入redis # 重新塞入redis
baseCore.rePutIntoR('UpdateCoreperson:SocialCode_CompanyName', item) # baseCore.rePutIntoR('UpdateCoreperson:SocialCode_CompanyName', item)
log.info(f'{id}---{xydm}----{tycid}----高管信息请求失败') log.info(f'{id}---{xydm}----{tycid}--{res.json()}--高管信息请求失败')
continue continue
# todo:test测试 # todo:test测试
log.info(f'{id}---{xydm}----{tycid}----{res.json()}') log.info(f'{id}---{xydm}----{tycid}----{res.json()}')
...@@ -419,9 +465,9 @@ def doJob(): ...@@ -419,9 +465,9 @@ def doJob():
continue continue
else: else:
pass pass
# response = requests.post('http://114.115.236.206:8088/sync/executive', data=json_updata, timeout=300, response = requests.post('http://114.115.236.206:8088/sync/executive', data=json_updata, timeout=300,
# verify=False) verify=False)
# print(response.text) print(response.text)
log.info('=========成功======') log.info('=========成功======')
token.updateTokeen(id_cookie, 3) token.updateTokeen(id_cookie, 3)
time.sleep(10) time.sleep(10)
......
...@@ -39,8 +39,8 @@ def getTycIdByXYDM(com_name, s): ...@@ -39,8 +39,8 @@ def getTycIdByXYDM(com_name, s):
try: try:
# headers['User-Agent'] = baseCore.getRandomUserAgent() # headers['User-Agent'] = baseCore.getRandomUserAgent()
# headers['X-AUTH-TOKEN'] = baseCore.GetTYCToken() # headers['X-AUTH-TOKEN'] = baseCore.GetTYCToken()
# response = requests.post(url,json=paramJsonData,headers=headers,verify=False, proxies=ip) response = s.post(url,json=paramJsonData,headers=headers,verify=False, proxies=ip)
response = s.post(url, json=paramJsonData, headers=headers) # response = s.post(url, json=paramJsonData, headers=headers)
time.sleep(random.randint(3, 5)) time.sleep(random.randint(3, 5))
retJsonData =json.loads(response.content.decode('utf-8')) retJsonData =json.loads(response.content.decode('utf-8'))
if retJsonData['data'] and retJsonData['state'] == 'ok': if retJsonData['data'] and retJsonData['state'] == 'ok':
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论