提交 060ce7c4 作者: 薛凌堃

2023/8/12

上级 aedff657
......@@ -92,11 +92,18 @@ def SpiderByZJH(url, payload, dic_info, num, start_time):
short_name = dic_info[4]
soup = RequestUrl(url, payload, item_id, start_time)
if soup == '':
return
return False
# 先获取页数
page = 0
try:
page = soup.find('div', class_='pages').find('ul', class_='g-ul').text
except:
e = f"该企业没有{dic_parms['Catagory2']}数据"
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, dic_parms['url'], f'{e}')
return False
if page != 0:
total = re.findall(r'\d+', page)[0]
r_page = int(total) % 15
if r_page == 0:
......@@ -220,7 +227,7 @@ def SpiderByZJH(url, payload, dic_info, num, start_time):
continue
else:
continue
return True
def getUrl(code, url_parms, Catagory2_parms):
# 深市
......
......@@ -261,14 +261,16 @@ def SpiderByZJH(url, payload, dic_info, start_time): # dic_info 数据库中获
if soup == '':
return False
# 先获取页数
page = 0
try:
page = soup.find('div', class_='pages').find('ul', class_='g-ul').text
except:
e = f"该企业没有{dic_parms['Catagory2']}数据"
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, dic_parms['url'], 'Kafka操作失败')
baseCore.recordLog(social_code, taskType, state, takeTime, dic_parms['url'], f'{e}')
return False
if page != 0:
total = re.findall(r'\d+', page)[0]
r_page = int(total) % 15
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论