提交 e87f42c5 作者: 薛凌堃

年报

上级 637feb1a
......@@ -58,22 +58,22 @@ if __name__ == '__main__':
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
}
query = "SELECT * FROM clb_sys_attachment WHERE type_id=1 AND source='证监会' AND id = '18703822757'"
query = "SELECT * FROM clb_sys_attachment WHERE type_id=1 AND create_time >= '2023-10-27' AND create_time <= '2023-10-28' AND source like '%官网' "
cursor_.execute(query)
results = cursor_.fetchall()
for result in results:
att_id = result[0]
name = result[1]
source = result[18]
social_code = result[3]
selectcom = f"select * from EnterpriseInfo where SocialCode = '{social_code}'"
cursor.execute(selectcom)
selects = cursor.fetchone()
com_name = selects[4]
if com_name:
pass
else:
com_name = selects[1]
# selectcom = f"select * from EnterpriseInfo where SocialCode = '{social_code}'"
# cursor.execute(selectcom)
# selects = cursor.fetchone()
# com_name = selects[4]
# if com_name:
# pass
# else:
# com_name = selects[1]
full_path = 'http://zzsn.luyuen.com/' + result[19]
year = result[9]
create_time = result[13]
......@@ -103,7 +103,7 @@ if __name__ == '__main__':
'id': '',
'keyWords': '',
'lang': detect_language,
'origin': '证监会',
'origin': source,
# 'origin': '雪球网',
'publishDate': publish,
'sid': '1684032033495392257',
......
......@@ -29,7 +29,7 @@ type_id = 1
create_by = 'XueLingKun'
taskType = '企业年报'
file_path = 'D:\\年报\\欧盟记分牌2500_年报补充_718_20231018'
file_path = 'D:\\年报\\福布斯2000年报PDF下载-207'
log.info(f'=============当前pid为{baseCore.getPID()}==============')
def sendKafka(dic_news):
......@@ -146,9 +146,9 @@ if __name__=='__main__':
social_code = data[1]
ename = data[2]
cname = data[3]
file_name = ename + ':' + file_year + '年年度报告' + '.pdf'
file_name = cname + ':' + file_year + '年年度报告' + '.pdf'
content = ''
origin = ename + '官网'
origin = cname + '官网'
#解析文件页数和内容
log.info(f"-----------正在处理{file_name}--------------")
with open(pdf_path, 'rb') as file:
......@@ -178,7 +178,7 @@ if __name__=='__main__':
retData_f = uptoOBS(retData, pathType, taskType, start_time,file_name,pdf_path)
if retData_f['state']:
#retData, com_name, year, pdf_name, num, pub_time
att_id= baseCore.tableUpdate(retData_f, cname,file_year,file_name, num,file_year+'-12-31',origin)
att_id= baseCore.tableUpdate(retData_f,file_year,file_name, num,file_year+'-12-31',origin)
if att_id:
detect_language = baseCore.detect_language(content)
dic_news = {
......
......@@ -211,7 +211,7 @@ if __name__=='__main__':
'sid': '1684032033495392257',
'sourceAddress': '', # 原文链接
'summary': '',
'title': file_name,
'title': file_name.replace('.pdf',''),
'type': 1,
'socialCreditCode': social_code,
'year': file_year
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论