提交 ccc727e5 作者: 薛凌堃

修改证监会name加上.pdf

上级 a354950d
import json import json
...@@ -53,7 +53,7 @@ def uptoOBS(pdf_url,pdf_name,type_id,social_code): ...@@ -53,7 +53,7 @@ def uptoOBS(pdf_url,pdf_name,type_id,social_code):
page_size = 0 page_size = 0
for i in range(0, 3): for i in range(0, 3):
try: try:
name = pdf_name + '.pdf' name = pdf_name
now_time = time.strftime("%Y-%m") now_time = time.strftime("%Y-%m")
result = obsClient.putContent('zzsn', 'QYNotice/'+name, content=response.content) result = obsClient.putContent('zzsn', 'QYNotice/'+name, content=response.content)
with fitz.open(stream=response.content, filetype='pdf') as doc: with fitz.open(stream=response.content, filetype='pdf') as doc:
...@@ -89,7 +89,8 @@ def uptoOBS(pdf_url,pdf_name,type_id,social_code): ...@@ -89,7 +89,8 @@ def uptoOBS(pdf_url,pdf_name,type_id,social_code):
def secrchATT(item_id, name, type_id,order_by): def secrchATT(item_id, name, type_id,order_by):
sel_sql = '''select id from clb_sys_attachment where item_id = %s and name = %s and type_id=%s and order_by=%s ''' sel_sql = '''select id from clb_sys_attachment where item_id = %s and name = %s and type_id=%s and order_by=%s '''
cursor_.execute(sel_sql, (item_id, name, type_id,order_by)) cursor_.execute(sel_sql, (item_id, name, type_id,order_by))
selects = cursor_.fetchone() select = cursor_.fetchall()
selects = select[-1]
return selects return selects
# 插入到att表 返回附件id # 插入到att表 返回附件id
...@@ -124,7 +125,7 @@ def tableUpdate(retData, com_name, year, pdf_name, num): ...@@ -124,7 +125,7 @@ def tableUpdate(retData, com_name, year, pdf_name, num):
cnx_.commit() # 提交 cnx_.commit() # 提交
except Exception as e: except Exception as e:
print(e) print(e)
log.info("更新完成:{}".format(Upsql)) log.info(f"更新完成:{item_id}===={pdf_name}")
selects = secrchATT(item_id, pdf_name, type_id,order_by) selects = secrchATT(item_id, pdf_name, type_id,order_by)
id = selects[0] id = selects[0]
return id return id
...@@ -282,7 +283,7 @@ def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_na ...@@ -282,7 +283,7 @@ def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_na
#判断文件是否已经存在obs服务器中 #判断文件是否已经存在obs服务器中
# file_path = 'QYNotice//浙江国祥股份有限公司首次公开发行股票并在主板上市暂缓发行公告' # file_path = 'QYNotice//浙江国祥股份有限公司首次公开发行股票并在主板上市暂缓发行公告'
now_time = time.strftime("%Y-%m") now_time = time.strftime("%Y-%m")
file_path = 'QYNotice/'+pdf_name+'.pdf' file_path = 'QYNotice/'+pdf_name
response = obsClient.getObjectMetadata('zzsn', file_path) response = obsClient.getObjectMetadata('zzsn', file_path)
if response.status >= 300: if response.status >= 300:
log.info('=====文件不存在obs=====') log.info('=====文件不存在obs=====')
...@@ -410,7 +411,7 @@ def SpiderByZJH(url, payload, dic_info, start_time,num): # dic_info 数据库 ...@@ -410,7 +411,7 @@ def SpiderByZJH(url, payload, dic_info, start_time,num): # dic_info 数据库
pdf_url_info = td_list[2] pdf_url_info = td_list[2]
# print(pdf_url) # print(pdf_url)
pdf_url = pdf_url_info['onclick'].strip('downloadPdf1(').split(',')[0].strip('\'') pdf_url = pdf_url_info['onclick'].strip('downloadPdf1(').split(',')[0].strip('\'')
name_pdf = pdf_url_info['onclick'].strip('downloadPdf1(').split('\',')[1].strip('\'') name_pdf = pdf_url_info['onclick'].strip('downloadPdf1(').split('\',')[1].strip('\'') + '.pdf'
pub_time = pdf_url_info['onclick'].strip('downloadPdf1(').split('\',')[2].strip('\'') pub_time = pdf_url_info['onclick'].strip('downloadPdf1(').split('\',')[2].strip('\'')
#todo:判断发布日期是否是日期格式 #todo:判断发布日期是否是日期格式
...@@ -489,7 +490,7 @@ if __name__ == '__main__': ...@@ -489,7 +490,7 @@ if __name__ == '__main__':
start_time = time.time() start_time = time.time()
# 获取企业信息 # 获取企业信息
# social_code = baseCore.redicPullData('NoticeEnterprise:gnqy_socialCode') # social_code = baseCore.redicPullData('NoticeEnterprise:gnqy_socialCode')
social_code = '91440500617540496Q' social_code = '91110108740053589U'
# 判断 如果Redis中已经没有数据,则等待 # 判断 如果Redis中已经没有数据,则等待
if social_code == None: if social_code == None:
time.sleep(20) time.sleep(20)
...@@ -531,6 +532,7 @@ if __name__ == '__main__': ...@@ -531,6 +532,7 @@ if __name__ == '__main__':
count += 1 count += 1
runType = 'NoticeReportCount' runType = 'NoticeReportCount'
baseCore.updateRun(social_code, runType, count) baseCore.updateRun(social_code, runType, count)
break
cursor.close() cursor.close()
cnx.close() cnx.close()
baseCore.close() baseCore.close()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论