提交 9dc7843b 作者: 薛凌堃

美国证券交易委员会kafka大小调整

上级 a74f108a
......@@ -520,7 +520,7 @@ def fbspdfurlinfo():
if __name__ == "__main__":
start = time.time()
fbspdfurlinfo()
# fbspdfurlinfo()
# danxiangguanjun()
# kegaishifan()
# shuangbaiqiye()
......@@ -536,7 +536,7 @@ if __name__ == "__main__":
# FBS()
# MengZhi()
# NQEnterprise()
# SEC_CIK()
SEC_CIK()
# dujioashou()
# omeng()
# AnnualEnterpriseUS()
......
......@@ -29,7 +29,7 @@ type_id = 1
create_by = 'XueLingKun'
taskType = '企业年报'
#付俊雪的需要改为巨潮资讯网1_福布斯2000_PDF_60_付
file_path = 'D:\\BaiduNetdiskDownload\\1_福布斯2000_PDF_55_邵'
file_path = 'D:\\BaiduNetdiskDownload\\1_福布斯2000_PDF_60_付'
log.info(f'=============当前pid为{baseCore.getPID()}==============')
def sendKafka(dic_news):
......@@ -180,7 +180,7 @@ if __name__=='__main__':
'id': '',
'keyWords': '',
'lang': 'zh',
'origin': '企业官网',
'origin': '巨潮资讯网',
'publishDate': file_year + '-12-31',
'sid': '1684032033495392257',
'sourceAddress': '', # 原文链接
......
......@@ -190,7 +190,7 @@ def spider(com_name,cik,up_okCount):
# print(dic_news)
# 将相应字段通过kafka传输保存
try:
producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'],compression_type='gzip',batch_size=1638400,linger_ms=1,buffer_memory=33445532*2,max_request_size=8388608) #,batch_size=20480000,buffer_memory=64000000)
producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'],compression_type='gzip',batch_size=1638400,linger_ms=1,buffer_memory=33445532*2,max_request_size=1024*1024*50) #,batch_size=20480000,buffer_memory=64000000)
kafka_result = producer.send("researchReportTopic",
json.dumps(dic_news, ensure_ascii=False).encode('utf8'))
......@@ -203,7 +203,8 @@ def spider(com_name,cik,up_okCount):
}
log.info(dic_result)
try:
insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,type,create_time) values(%s,%s,%s,%s,now())'''
insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,type,publish_time,title,content,create_time) values(%s,%s,%s,%s,%s,%s,%s,now())'''
# 动态信息列表
up_okCount = up_okCount + 1
list_info = [
......@@ -211,6 +212,9 @@ def spider(com_name,cik,up_okCount):
news_url,
'SEC',
'1',
filingDate,
title,
content[:500]
]
cursor_.execute(insert_sql, tuple(list_info))
cnx_.commit()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论