提交 e87f42c5 作者: 薛凌堃

年报

上级 637feb1a
...@@ -58,22 +58,22 @@ if __name__ == '__main__': ...@@ -58,22 +58,22 @@ if __name__ == '__main__':
'Accept-Encoding': 'gzip, deflate, br', 'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
} }
query = "SELECT * FROM clb_sys_attachment WHERE type_id=1 AND source='证监会' AND id = '18703822757'" query = "SELECT * FROM clb_sys_attachment WHERE type_id=1 AND create_time >= '2023-10-27' AND create_time <= '2023-10-28' AND source like '%官网' "
cursor_.execute(query) cursor_.execute(query)
results = cursor_.fetchall() results = cursor_.fetchall()
for result in results: for result in results:
att_id = result[0] att_id = result[0]
name = result[1] name = result[1]
source = result[18]
social_code = result[3] social_code = result[3]
selectcom = f"select * from EnterpriseInfo where SocialCode = '{social_code}'" # selectcom = f"select * from EnterpriseInfo where SocialCode = '{social_code}'"
cursor.execute(selectcom) # cursor.execute(selectcom)
selects = cursor.fetchone() # selects = cursor.fetchone()
com_name = selects[4] # com_name = selects[4]
if com_name: # if com_name:
pass # pass
else: # else:
com_name = selects[1] # com_name = selects[1]
full_path = 'http://zzsn.luyuen.com/' + result[19] full_path = 'http://zzsn.luyuen.com/' + result[19]
year = result[9] year = result[9]
create_time = result[13] create_time = result[13]
...@@ -103,7 +103,7 @@ if __name__ == '__main__': ...@@ -103,7 +103,7 @@ if __name__ == '__main__':
'id': '', 'id': '',
'keyWords': '', 'keyWords': '',
'lang': detect_language, 'lang': detect_language,
'origin': '证监会', 'origin': source,
# 'origin': '雪球网', # 'origin': '雪球网',
'publishDate': publish, 'publishDate': publish,
'sid': '1684032033495392257', 'sid': '1684032033495392257',
......
...@@ -29,7 +29,7 @@ type_id = 1 ...@@ -29,7 +29,7 @@ type_id = 1
create_by = 'XueLingKun' create_by = 'XueLingKun'
taskType = '企业年报' taskType = '企业年报'
file_path = 'D:\\年报\\欧盟记分牌2500_年报补充_718_20231018' file_path = 'D:\\年报\\福布斯2000年报PDF下载-207'
log.info(f'=============当前pid为{baseCore.getPID()}==============') log.info(f'=============当前pid为{baseCore.getPID()}==============')
def sendKafka(dic_news): def sendKafka(dic_news):
...@@ -146,9 +146,9 @@ if __name__=='__main__': ...@@ -146,9 +146,9 @@ if __name__=='__main__':
social_code = data[1] social_code = data[1]
ename = data[2] ename = data[2]
cname = data[3] cname = data[3]
file_name = ename + ':' + file_year + '年年度报告' + '.pdf' file_name = cname + ':' + file_year + '年年度报告' + '.pdf'
content = '' content = ''
origin = ename + '官网' origin = cname + '官网'
#解析文件页数和内容 #解析文件页数和内容
log.info(f"-----------正在处理{file_name}--------------") log.info(f"-----------正在处理{file_name}--------------")
with open(pdf_path, 'rb') as file: with open(pdf_path, 'rb') as file:
...@@ -178,7 +178,7 @@ if __name__=='__main__': ...@@ -178,7 +178,7 @@ if __name__=='__main__':
retData_f = uptoOBS(retData, pathType, taskType, start_time,file_name,pdf_path) retData_f = uptoOBS(retData, pathType, taskType, start_time,file_name,pdf_path)
if retData_f['state']: if retData_f['state']:
#retData, com_name, year, pdf_name, num, pub_time #retData, com_name, year, pdf_name, num, pub_time
att_id= baseCore.tableUpdate(retData_f, cname,file_year,file_name, num,file_year+'-12-31',origin) att_id= baseCore.tableUpdate(retData_f,file_year,file_name, num,file_year+'-12-31',origin)
if att_id: if att_id:
detect_language = baseCore.detect_language(content) detect_language = baseCore.detect_language(content)
dic_news = { dic_news = {
......
...@@ -211,7 +211,7 @@ if __name__=='__main__': ...@@ -211,7 +211,7 @@ if __name__=='__main__':
'sid': '1684032033495392257', 'sid': '1684032033495392257',
'sourceAddress': '', # 原文链接 'sourceAddress': '', # 原文链接
'summary': '', 'summary': '',
'title': file_name, 'title': file_name.replace('.pdf',''),
'type': 1, 'type': 1,
'socialCreditCode': social_code, 'socialCreditCode': social_code,
'year': file_year 'year': file_year
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论