提交 c04f26fb 作者: 薛凌堃

企业年报脚本维护

上级 310e8f15
import json
import json
......@@ -76,8 +76,19 @@ def SpiderByZJH(url, payload, dic_info, num, start_time):
soup = RequestUrl(url, payload, item_id, start_time)
if soup == '':
return
# 先获取页数
# 判断查找内容是否存在
try:
is_exist = soup.find('div', class_='con').text
if is_exist == '没有查询到数据':
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, url, '没有查询到数据')
return
except:
pass
# 先获取页数
page = soup.find('div', class_='pages').find('ul', class_='g-ul').text
total = re.findall(r'\d+', page)[0]
......@@ -135,7 +146,7 @@ def SpiderByZJH(url, payload, dic_info, num, start_time):
year = str(year)
# page_size = 0
name_pdf = f'{short_name}:{year}年年度报告.pdf'
name_pdf = f'{short_name}{year}年年度报告.pdf'
sel_sql = '''select item_id,year from clb_sys_attachment where item_id = %s and year = %s and type_id=1'''
cursor_.execute(sel_sql, (item_id, year))
selects = cursor_.fetchone()
......
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
......@@ -12,7 +12,7 @@ from datetime import datetime
from kafka import KafkaProducer
import sys
sys.path.append('D:\\KK\\zzsn_spider\\base')
sys.path.append('D:\\kkwork\\zzsn_spider\\base')
import BaseCore
baseCore = BaseCore.BaseCore()
import requests, re, time, pymysql, fitz
......@@ -79,6 +79,8 @@ def spider_annual_report(dict_info,num):
except:
log.info(f'{social_code}.........年度报告列表为空')
exception = '年度报告列表为空'
# 证监会企业年报
baseCore.r.rpush('AnnualEnterprise:zjh_socialCode', social_code)
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, '', exception)
......@@ -310,6 +312,7 @@ if __name__ == '__main__':
count += 1
runType = 'AnnualReportCount'
baseCore.updateRun(social_code, runType, count)
baseCore.r.rpush('AnnualEnterprise:zjh_socialCode',social_code)
# break
# cursor.close()
cnx_.close()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论