提交 6cdfccca 作者: LiuLiYuan

国研网增量修改 02/04

上级 9dbcd59d
...@@ -248,23 +248,15 @@ def doJob(): ...@@ -248,23 +248,15 @@ def doJob():
source = data['source'] source = data['source']
summary = data['summary'] summary = data['summary']
publishdate = data['deliveddate'] publishdate = data['deliveddate']
if publishdate < '2023-05-25':
pageFlg = False
break
chnid = data['chnid'] chnid = data['chnid']
leafid = data['leafid'] leafid = data['leafid']
code = data['code'].split('_')[1] code = data['code'].split('_')[1]
href = f'https://d.drcnet.com.cn/eDRCnet.common.web/DocDetail.aspx?chnid={chnid}&leafid={leafid}&docid={code}&uid=8007&version=YDYL' href = f'https://d.drcnet.com.cn/eDRCnet.common.web/DocDetail.aspx?chnid={chnid}&leafid={leafid}&docid={code}&uid=8007&version=YDYL'
is_href = db_storage.find_one({'网址': href}) is_href = db_storage.find_one({'网址': href})
if is_href: if is_href:
continue pageFlg = False
break
driver.get(href) driver.get(href)
# js = "return action=document.body.scrollHeight"
# new_height = driver.execute_script(js)
# for i in range(0, new_height, 300):
# driver.execute_script(js)
# driver.execute_script('window.scrollTo(0, %s)' % (i))
# time.sleep(1)
try: try:
WebDriverWait(driver, 5).until( WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.ID, 'AttachmentDownload')) EC.presence_of_element_located((By.ID, 'AttachmentDownload'))
...@@ -339,7 +331,6 @@ def doJob(): ...@@ -339,7 +331,6 @@ def doJob():
log.error(f'{href}===传输失败') log.error(f'{href}===传输失败')
num += 1 num += 1
time.sleep(5) time.sleep(5)
pageFlg = False
else: else:
break break
log.info(f'{country}===共采集{num}条') log.info(f'{country}===共采集{num}条')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论