提交 2a7b3088 作者: 薛凌堃

江西省人民政府

上级 687679ca
import time import time
...@@ -66,6 +66,7 @@ def getDataJson(): ...@@ -66,6 +66,7 @@ def getDataJson():
def getContent(url, num, publishDate): def getContent(url, num, publishDate):
id_list = [] id_list = []
soup = getSoup(url) soup = getSoup(url)
policy.paserUrl(soup, url)
contentWithTag = soup.find('div', attrs={'id': 'zoom'}) contentWithTag = soup.find('div', attrs={'id': 'zoom'})
img_list = contentWithTag.find_all('img') img_list = contentWithTag.find_all('img')
num_ = 1 num_ = 1
...@@ -108,6 +109,7 @@ def doJob(): ...@@ -108,6 +109,7 @@ def doJob():
for data_ in data_json: for data_ in data_json:
data_ = data_.replace('\\', '') data_ = data_.replace('\\', '')
soup = BeautifulSoup(data_, 'lxml') soup = BeautifulSoup(data_, 'lxml')
title = soup.select('body > div > div:nth-of-type(1) > span:nth-of-type(2) > a')[0].text.lstrip().strip() title = soup.select('body > div > div:nth-of-type(1) > span:nth-of-type(2) > a')[0].text.lstrip().strip()
pub_hao = soup.find('table', class_='jcse-service-table').find_all('tr')[0].find_all('td')[ pub_hao = soup.find('table', class_='jcse-service-table').find_all('tr')[0].find_all('td')[
-1].text.lstrip().strip() -1].text.lstrip().strip()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论