提交 422c5516 作者: LiuLiYuan

审计法规 02/06

上级 e976fee1
import time
import pandas as pd
import requests
from bs4 import BeautifulSoup
from retry import retry
......@@ -69,11 +72,13 @@ def getHref(Keywords):
href = 'https://sclx.pkulaw.com' + tag.get('url')
except:
href = ''
time.sleep(1)
return href
@retry(tries=2, delay=5)
def getData(href):
data = []
ip = baseCore.get_proxy()
req = requests.get(href, headers=headers, proxies=ip, verify=False)
req.encoding = req.apparent_encoding
......@@ -89,15 +94,18 @@ def getData(href):
relevance = li.find('div', class_='relevance').text.strip()
except:
relevance = ''
log.info(f'{publishDate}==={theme}==={relevance}')
data.append([publishDate,theme,relevance])
time.sleep(1)
return data
def doJob():
data = []
Keywords = '中华人民共和国公司法(2023修订)'
href = getHref(Keywords)
if href:
getData(href)
data += getData(href)
df = pd.DataFrame(data)
print(df)
if __name__ == '__main__':
doJob()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论