提交 8bec5313 作者: 薛凌堃

领导人物

上级 d471d4f7
......@@ -174,6 +174,76 @@ def zyzsjg():
# sendKafka(data)
print(data)
def dfrwk():
datas_df = []
url_df = 'http://district.ce.cn/zt/rwk/'
req = requests.get(url=url_df, headers=headers)
soup = BeautifulSoup(req.content, 'html.parser')
df_list = soup.find('div', class_='left1').find_all('div')
for df in df_list:
df_place = df.text.replace('\n', '')
try:
df_href = df.find('a')['href']
except:
df_href = ''
if df_href:
datas_df.append([df_place,df_href])
print(datas_df)
peoples = []
for data in datas_df:
place = data[0]
href = data[1]
req_df = requests.get(url=href, headers=headers)
soup_df = BeautifulSoup(req_df.content, 'html.parser')
df_list_df = soup_df.find_all('div', class_='left2')
for df in df_list_df:
try:
rwpart = df.find('div', class_='ren2')
except:
log.error(f'{place}===={href}')
continue
if rwpart:
pass
else:
continue
tr_list = rwpart.find_all('tr')
for tr in tr_list:
td_list = tr.find_all('td')
if len(td_list) == 3:
leader = td_list[1].text
try:
leader_href = td_list[1].find('a')['href']
except:
leader_href = ''
# continue
position = td_list[2].text
print(place, leader, position)
if len(td_list) == 2:
leader = td_list[0].text
try:
leader_href = td_list[0].find('a')['href']
except:
leader_href = ''
# continue
position = td_list[1].text
print(place, leader, position)
people = {
'name': leader, # 姓名
'sex': '', # 性别
'work': position, # 职务
'birthplace': '', # 出生地
'birthday': '', # 出生日期
'company': '', # 曾任单位
'city': '', # 关联城市
'school': '', # 毕业院校
'province': '', # 省或直辖市
'type': 3, # 直属类别(1:部委人物库 2:中直任务库 3:地方人物库)
'department': '', # 部门
'headSculpture': '', # 照片链接
}
# print(name)
peoples.append(people)
def gwybw_task():
# 实例化一个调度器
......@@ -200,11 +270,12 @@ def zyzsjg_task():
if __name__ == "__main__":
try:
gwybw_task()
except:
log.error('部委人物采集出错')
try:
zyzsjg_task()
except:
log.error('中直人物采集出错')
# try:
# gwybw_task()
# except:
# log.error('部委人物采集出错')
# try:
# zyzsjg_task()
# except:
# log.error('中直人物采集出错')
dfrwk()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论