企业公告脚本测试

afb6cfd1 · 薛凌堃 · 3e7c3b28 · afb6cfd1 · afb6cfd1
--- a/comData/noticeReport/东方财富网-公告.py
+++ b/comData/noticeReport/东方财富网-公告.py
--- a/comData/noticeReport/证监会-公告.py
+++ b/comData/noticeReport/证监会-公告.py
-import json
+import json
@@ -120,7 +120,7 @@ def tableUpdate(retData, com_name, year, pdf_name, num):
        values = (
            year, pdf_name, type_id, item_id, group_name, path, full_path, category, file_size, order_by,
            status, create_by,
-            create_time, page_size,path,'zzsn')
+            create_time, page_size,full_path.split('https://zzsn.obs.cn-north-1.myhuaweicloud.com/')[1],'zzsn')
        cursor_.execute(Upsql, values)  # 插入
        cnx_.commit()  # 提交
    except Exception as e:
@@ -283,14 +283,14 @@ def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_na
    #判断文件是否已经存在obs服务器中
    # file_path = 'QYNotice//浙江国祥股份有限公司首次公开发行股票并在主板上市暂缓发行公告'
    now_time = time.strftime("%Y-%m")
-    file_path = 'QYNotice/'+pdf_name
-    response = obsClient.getObjectMetadata('zzsn', file_path)
-    if response.status >= 300:
-        log.info('=====文件不存在obs=====')
-        pass
-    else:
-        log.info(f'=====文件存在obs========{file_path}')
-        return False
+    # file_path = 'QYNotice/'+pdf_name
+    # response = obsClient.getObjectMetadata('zzsn', file_path)
+    # if response.status >= 300:
+    #     log.info('=====文件不存在obs=====')
+    #     pass
+    # else:
+    #     log.info(f'=====文件存在obs========{file_path}')
+    #     return False
    #上传至华为云服务器
    retData = uptoOBS(pdf_url,pdf_name,8,social_code)
    #附件插入att数据库
@@ -323,7 +323,7 @@ def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_na
        'sid': '1684032033495392257',
        'sourceAddress': pdf_url,  # 原文链接
        'summary': '',
-        'title': pdf_name,
+        'title': pdf_name.replace('.pdf',''),
        'type': 3,
        'socialCreditCode': social_code,
        'year': year
@@ -332,7 +332,7 @@ def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_na
    # 将相应字段通过kafka传输保存
    try:
        producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'])
-        kafka_result = producer.send("researchReportTopic", json.dumps(dic_news, ensure_ascii=False).encode('utf8'))
+        kafka_result = producer.send("researchReportTopicaaaas", json.dumps(dic_news, ensure_ascii=False).encode('utf8'))

        print(kafka_result.get(timeout=10))

@@ -430,6 +430,7 @@ def SpiderByZJH(url, payload, dic_info, start_time,num):  # dic_info 数据库
            # 判断数据库中是否有该条资讯
            ifexist = ifInstert(short_name, social_code, pdf_url)
            #如果不存在 ifexist = True
+            # ifexist = True
            if ifexist:
                # 解析PDF内容，先获取PDF链接 下载 解析成功，解析失败 ，传输成功，传输失败
                result = GetContent(pdf_url, name_pdf, social_code, year, pub_time, start_time,com_name,num)