提交 98ca1672 作者: 薛凌堃

Merge remote-tracking branch 'origin/master'

......@@ -49,6 +49,7 @@ def RequestUrl(url, payload, item_id, start_time):
if response.status_code == 200:
# 请求成功,处理响应数据
# print(response.text)
soup = BeautifulSoup(response.text, 'html.parser')
pass
else:
# 请求失败,输出错误信息
......@@ -56,9 +57,7 @@ def RequestUrl(url, payload, item_id, start_time):
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(item_id, taskType, state, takeTime, url, '请求失败')
soup = BeautifulSoup(response.text, 'html.parser')
soup = ''
return soup
......@@ -89,6 +88,8 @@ def SpiderByZJH(url, payload, dic_info, num, start_time):
# years = dic_info['call_year']
short_name = dic_info[4]
soup = RequestUrl(url, payload, item_id, start_time)
if soup == '':
return
# 先获取页数
page = soup.find('div', class_='pages').find('ul', class_='g-ul').text
......@@ -106,8 +107,9 @@ def SpiderByZJH(url, payload, dic_info, num, start_time):
else:
# http://eid.csrc.gov.cn/101811/index_3_f.html
href = url.split('index')[0] + f'index_{i}_f.html'
soup = RequestUrl(href, payload, item_id, start_time)
if soup == '':
continue
tr_list = soup.find('div', id='txt').find_all('tr')
for tr in tr_list[1:]:
td_list = tr.find_all('td')
......@@ -210,7 +212,7 @@ def SpiderByZJH(url, payload, dic_info, num, start_time):
baseCore.recordLog(item_id, taskType, state, takeTime, pdf_url, e)
continue
else:
continue
continue
def getUrl(code, url_parms, Catagory2_parms):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论