国资委 9/11

f93b78fc · LiuLiYuan · 329c83aa · f93b78fc
--- a/comData/policylaw/2.py
+++ b/comData/policylaw/2.py
@@ -142,7 +142,6 @@ def remove_dup():
    pass
 # 国务院文件
 def get_content1():
    def getPageConunt(a_list, url, headers, s):
        data = {"code": "18122f54c5c", "thirdPartyCode": "thirdparty_code_107", "thirdPartyTableId": 30,
@@ -293,7 +292,7 @@ def get_content1():
                            'publishDate': pub_time1,                     #发布时间
                            'writtenDate': pub_time2,                     #成文时间
                            'sid': '1697458829758697473',                 #信息源id
-                            'sourceAddress': href[0],                     #原文链接
+                            'sourceAddress': href,                     #原文链接
                            'summary': '',                                #摘要
                            'title': title                                #标题
                        }
@@ -309,7 +308,7 @@ def get_content1():
            log.error(f'{pcodeJiguan}...获取总数失败')
            continue
        end_time = time.time()
-        print(f'共抓取{num}条数据，共耗时{start_time - end_time}')
+        log.info(f'共抓取国务院文件{num}条数据，共耗时{start_time - end_time}')
 # 国务院部门文件
@@ -423,7 +422,7 @@ def get_content2():
                                    #todo:将返回的地址更新到soup
                                    file['href'] = 'http://114.115.215.96/' + full_path
                        except:
-                            print(f'{title}...{href}获取内容失败')
+                            log.error(f'{title}...{href}获取内容失败')
                            continue
                        time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                        #todo:传kafka字段
@@ -453,13 +452,13 @@ def get_content2():
                            save_data(dic_news)
                        num += 1
                except:
-                    print(f'{bmfl}...第{pageNo}页获取信息列表失败')
+                    log.error(f'{bmfl}...第{pageNo}页获取信息列表失败')
                    continue
        except:
-            print(f'{bmfl}...获取页数失败')
+            log.error(f'{bmfl}...获取页数失败')
            continue
    end_time = time.time()
-    print(f'共抓取{num}条数据，耗时{end_time - start_time}')
+    log.info(f'共抓取国务院部门文件{num}条数据，耗时{end_time - start_time}')
 # 国务院国有资产监督管理委员会-政策发布
@@ -548,7 +547,7 @@ def get_content3():
            'summary': '',                                #摘要
            'title': title                                #标题
        }
-        # print(dic_news)
+        # print(title)
        flag = sendKafka(dic_news)
        if flag:
            save_data(dic_news)
@@ -576,7 +575,7 @@ def get_content3():
                sendContent(href, headers,title,pub_time,num)
                num += 1
        end_time = time.time()
-        print(f'共抓取{num}条数据，耗时{end_time - start_time}')
+        log.info(f'共抓取国资委文件{num}条数据，耗时{end_time - start_time}')
    def partOne():
        start_time = time.time()
@@ -610,7 +609,7 @@ def get_content3():
        except:
            pass
        end_time = time.time()
-        print(f'共抓取{num}条数据，耗时{end_time - start_time}')
+        log.info(f'共抓取国资委文件{num}条数据，耗时{end_time - start_time}')
    partOne()
    partTwo()