提交 6809e271 作者: LiJunMing

证监会公告脚本维护

上级 93d43561
"""
import json
"""
证监会公告采集,只能按照搜索企业来采,从上市库里拿企业数据,sys_enterprise_ipo_copy1
craw_state:已采集过表示为True,未采集表示为0,拿取数据表示为ing,解析失败表示为400
update_state:为1 表示需要更新,用来增量循环
如何统计出来该报告采到了没有,dt_error库统计失败的信息
"""
import json
import re
import time
import fitz
import pymysql
import requests
from bs4 import BeautifulSoup
from kafka import KafkaProducer
from datetime import datetime
from base import BaseCore
# from fdfs_client.client import get_tracker_conf, Fdfs_client
baseCore = BaseCore.BaseCore()
log = baseCore.getLogger()
......@@ -25,8 +14,6 @@ cursor = baseCore.cursor
cnx_ = baseCore.cnx_
cursor_ = baseCore.cursor_
# tracker_conf = get_tracker_conf('./client.conf')
# client = Fdfs_client(tracker_conf)
taskType = '企业公告/证监会'
......@@ -431,7 +418,7 @@ if __name__ == '__main__':
com_name = dic_info[1]
dic_parms = getUrl(code, url_parms, Catagory2_parms)
dic_parms_ls = getUrl(code, url_parms_ls, Catagory2_parms_ls)
if len(dic_parms) > 0:
if dic_parms:
start_time_cj = time.time()
SpiderByZJH(dic_parms["url"], dic_parms["payload"], dic_info, start_time,num)
log.info(f'{code}==========={short_name},{com_name},发行公告,耗时{baseCore.getTimeCost(start_time_cj, time.time())}')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论