提交 d667977d 作者: 薛凌堃

证监会公告维护

上级 d85f3006
""" """
...@@ -26,7 +26,50 @@ cursor = baseCore.cursor ...@@ -26,7 +26,50 @@ cursor = baseCore.cursor
# tracker_conf = get_tracker_conf('./client.conf') # tracker_conf = get_tracker_conf('./client.conf')
# client = Fdfs_client(tracker_conf) # client = Fdfs_client(tracker_conf)
taskType = '企业公告/证监会/福布斯' taskType = '企业公告/证监会'
def secrchATT(item_id, name, type_id):
sel_sql = '''select id from clb_sys_attachment where item_id = %s and name = %s and type_id=%s '''
cursor.execute(sel_sql, (item_id, name, type_id))
selects = cnx.fetchone()
return selects
# 插入到att表 返回附件id
def tableUpdate(retData, com_name, year, pdf_name, num):
item_id = retData['item_id']
type_id = retData['type_id']
group_name = retData['group_name']
path = retData['path']
full_path = retData['full_path']
category = retData['category']
file_size = retData['file_size']
status = retData['status']
create_by = retData['create_by']
page_size = retData['page_size']
create_time = retData['create_time']
order_by = num
selects = secrchATT(item_id, pdf_name, type_id)
if selects:
log.info(f'com_name:{com_name}已存在')
id = selects[0]
return id
else:
Upsql = '''insert into clb_sys_attachment(year,name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time,page_size) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
values = (
year, pdf_name, type_id, item_id, group_name, path, full_path, category, file_size, order_by,
status, create_by,
create_time, page_size)
cursor.execute(Upsql, values) # 插入
cnx.commit() # 提交
log.info("更新完成:{}".format(Upsql))
selects = secrchATT(item_id, pdf_name, type_id)
id = selects[0]
return id
def RequestUrl(url, payload, social_code,start_time): def RequestUrl(url, payload, social_code,start_time):
# ip = get_proxy()[random.randint(0, 3)] # ip = get_proxy()[random.randint(0, 3)]
...@@ -132,7 +175,7 @@ def getUrl(code, url_parms, Catagory2_parms): ...@@ -132,7 +175,7 @@ def getUrl(code, url_parms, Catagory2_parms):
return dic_parms return dic_parms
def InsterInto(short_name, social_code, name_pdf, pub_time, pdf_url, report_type): def InsterInto(short_name, social_code, pdf_url):
inster = False inster = False
sel_sql = '''select social_credit_code,source_address from brpa_source_article where social_credit_code = %s and source_address = %s and origin='证监会' and type='1' ''' sel_sql = '''select social_credit_code,source_address from brpa_source_article where social_credit_code = %s and source_address = %s and origin='证监会' and type='1' '''
...@@ -174,7 +217,7 @@ def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_na ...@@ -174,7 +217,7 @@ def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_na
log.info(f'====pdf解析失败====') log.info(f'====pdf解析失败====')
return False return False
num = num + 1 num = num + 1
att_id = baseCore.tableUpdate(retData,com_name,year,pdf_name,num) att_id = tableUpdate(retData,com_name,year,pdf_name,num)
content = retData['content'] content = retData['content']
if retData['state']: if retData['state']:
pass pass
...@@ -292,7 +335,7 @@ def SpiderByZJH(url, payload, dic_info, start_time,num): # dic_info 数据库 ...@@ -292,7 +335,7 @@ def SpiderByZJH(url, payload, dic_info, start_time,num): # dic_info 数据库
report_type = td_list[4].text.strip() report_type = td_list[4].text.strip()
# 信息插入数据库 # 信息插入数据库
insert = InsterInto(short_name, social_code, name_pdf, pub_time, pdf_url, report_type) insert = InsterInto(short_name, social_code, name_pdf)
if insert: if insert:
# # 公告信息列表 # # 公告信息列表
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论