提交 6809e271 作者: LiJunMing

证监会公告脚本维护

上级 93d43561
""" import json
"""
证监会公告采集,只能按照搜索企业来采,从上市库里拿企业数据,sys_enterprise_ipo_copy1
craw_state:已采集过表示为True,未采集表示为0,拿取数据表示为ing,解析失败表示为400
update_state:为1 表示需要更新,用来增量循环
如何统计出来该报告采到了没有,dt_error库统计失败的信息
"""
import json import json
import re import re
import time import time
import fitz
import pymysql
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from kafka import KafkaProducer from kafka import KafkaProducer
from datetime import datetime
from base import BaseCore from base import BaseCore
# from fdfs_client.client import get_tracker_conf, Fdfs_client
baseCore = BaseCore.BaseCore() baseCore = BaseCore.BaseCore()
log = baseCore.getLogger() log = baseCore.getLogger()
...@@ -25,8 +14,6 @@ cursor = baseCore.cursor ...@@ -25,8 +14,6 @@ cursor = baseCore.cursor
cnx_ = baseCore.cnx_ cnx_ = baseCore.cnx_
cursor_ = baseCore.cursor_ cursor_ = baseCore.cursor_
# tracker_conf = get_tracker_conf('./client.conf')
# client = Fdfs_client(tracker_conf)
taskType = '企业公告/证监会' taskType = '企业公告/证监会'
...@@ -431,7 +418,7 @@ if __name__ == '__main__': ...@@ -431,7 +418,7 @@ if __name__ == '__main__':
com_name = dic_info[1] com_name = dic_info[1]
dic_parms = getUrl(code, url_parms, Catagory2_parms) dic_parms = getUrl(code, url_parms, Catagory2_parms)
dic_parms_ls = getUrl(code, url_parms_ls, Catagory2_parms_ls) dic_parms_ls = getUrl(code, url_parms_ls, Catagory2_parms_ls)
if len(dic_parms) > 0: if dic_parms:
start_time_cj = time.time() start_time_cj = time.time()
SpiderByZJH(dic_parms["url"], dic_parms["payload"], dic_info, start_time,num) SpiderByZJH(dic_parms["url"], dic_parms["payload"], dic_info, start_time,num)
log.info(f'{code}==========={short_name},{com_name},发行公告,耗时{baseCore.getTimeCost(start_time_cj, time.time())}') log.info(f'{code}==========={short_name},{com_name},发行公告,耗时{baseCore.getTimeCost(start_time_cj, time.time())}')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论