reits专题数据

ce4c997a · 薛凌堃 · 43034e09 · ce4c997a · ce4c997a
--- a/REITs专题数据/BaseCore.py
+++ b/REITs专题数据/BaseCore.py
-# 核心工具包
+# 核心工具包
@@ -423,17 +423,26 @@ class BaseCore:
            return 'cn'
        return result[0]

+    #创建excel文件
+    def check_excel_file(self,file_path):
+        if os.path.isfile(file_path):
+            self.getLogger().info("Excel文件已存在")
+            return True
+        else:
+            self.getLogger().info("Excel文件不存在，正在创建...")
+            return False
+
    #追加接入excel
-    def writerToExcel(self,detailList,filename):
+    def writerToExcel(self, detailList, filename, sheet_name):
        # filename='baidu搜索.xlsx'
        # 读取已存在的xlsx文件
-        existing_data = pd.read_excel(filename,engine='openpyxl',dtype=str)
+        existing_data = pd.read_excel(filename, sheet_name=sheet_name, engine='openpyxl', dtype=str)
        # 创建新的数据
        new_data = pd.DataFrame(data=detailList)
        # 将新数据添加到现有数据的末尾
        combined_data = existing_data.append(new_data, ignore_index=True)
        # 将结果写入到xlsx文件
-        combined_data.to_excel(filename, index=False)
+        combined_data.to_excel(filename, sheet_name=sheet_name, index=False)
        # return combined_data

    #解析word文件页数

--- a/REITs专题数据/reits.py
+++ b/REITs专题数据/reits.py
-import os
+import os
 import os

+import openpyxl
 import requests
 from bs4 import BeautifulSoup
 from datetime import datetime
@@ -11,7 +12,7 @@ from urllib.parse import urljoin

 import BaseCore
 baseCore = BaseCore.BaseCore()
-
+log = baseCore.getLogger()
 filepath = "data/"

 class Policy():
@@ -96,6 +97,7 @@ class Policy():
        pass


+
 policy = Policy()
 #国家发展和改革委员会 https://www.ndrc.gov.cn/xxgk/wjk/index.html?tab=all&qt=
 def reform():
@@ -118,6 +120,7 @@ def reform():
    url = 'https://fwfx.ndrc.gov.cn/api/query?qt=REITs&tab=all&page=1&pageSize=20&siteCode=bm04000fgk&key=CAB549A94CF659904A7D6B0E8FC8A7E9&startDateStr=&endDateStr=&timeOption=0&sort=dateDesc'
    result = policy.getrequest_json(headers, url)
    data_list = result['data']['resultList']
+    DataList = []
    num = 0
    for info in data_list:
        num += 1
@@ -174,18 +177,32 @@ def reform():
            except:
                pass
            dic_info = {
-                'title': title,
-                'summary':summary,
-                'publishDate': publishDate,
-                'source': source,
-                'pub_hao': pubHao,
-                'contentWithTag': contentWithTag,
-                'content': content
+                '序号':num,
+                '标题': title,
+                '时间': publishDate,
+                '来源': source,
+                '原文链接':newsUrl,
+                '发文字号': pubHao,
+                '摘要':summary,
+                '正文': content,
+                '附件名称':'',
+                '附件链接':'',
            }
-            print(dic_info)
+            DataList.append(dic_info)
+            file_name = f'../data/REITs专题数据.xlsx'
+            sheet_name = "国家发展和改革委员会"
+            file_exist = baseCore.check_excel_file(file_name)
+            if file_exist:
+                pass
+            else:
+                wb = openpyxl.Workbook()
+                wb.save(file_name)
+                log.info("Excel文件已创建")
+            baseCore.writerToExcel(DataList, file_name, sheet_name)

        except:
-            print(newsUrl)
+            log.info(f"error！！！{newsUrl}")
+    log.info(f'=============处理结束，以采集{num}条数据=================')

 #证券期货 https://neris.csrc.gov.cn/falvfagui/multipleFindController/indexJsp
 def zhengquanqihuo():
@@ -450,7 +467,7 @@ def beijing():



+if __name__=="__main__":

-
-# reform()
+    reform()
 # zhengquanqihuo()
\ No newline at end of file