提交 ce4c997a 作者: 薛凌堃

reits专题数据

上级 43034e09
# 核心工具包 # 核心工具包
...@@ -423,17 +423,26 @@ class BaseCore: ...@@ -423,17 +423,26 @@ class BaseCore:
return 'cn' return 'cn'
return result[0] return result[0]
#创建excel文件
def check_excel_file(self,file_path):
if os.path.isfile(file_path):
self.getLogger().info("Excel文件已存在")
return True
else:
self.getLogger().info("Excel文件不存在,正在创建...")
return False
#追加接入excel #追加接入excel
def writerToExcel(self,detailList,filename): def writerToExcel(self, detailList, filename, sheet_name):
# filename='baidu搜索.xlsx' # filename='baidu搜索.xlsx'
# 读取已存在的xlsx文件 # 读取已存在的xlsx文件
existing_data = pd.read_excel(filename,engine='openpyxl',dtype=str) existing_data = pd.read_excel(filename, sheet_name=sheet_name, engine='openpyxl', dtype=str)
# 创建新的数据 # 创建新的数据
new_data = pd.DataFrame(data=detailList) new_data = pd.DataFrame(data=detailList)
# 将新数据添加到现有数据的末尾 # 将新数据添加到现有数据的末尾
combined_data = existing_data.append(new_data, ignore_index=True) combined_data = existing_data.append(new_data, ignore_index=True)
# 将结果写入到xlsx文件 # 将结果写入到xlsx文件
combined_data.to_excel(filename, index=False) combined_data.to_excel(filename, sheet_name=sheet_name, index=False)
# return combined_data # return combined_data
#解析word文件页数 #解析word文件页数
......
import os import os
import os import os
import openpyxl
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from datetime import datetime from datetime import datetime
...@@ -11,7 +12,7 @@ from urllib.parse import urljoin ...@@ -11,7 +12,7 @@ from urllib.parse import urljoin
import BaseCore import BaseCore
baseCore = BaseCore.BaseCore() baseCore = BaseCore.BaseCore()
log = baseCore.getLogger()
filepath = "data/" filepath = "data/"
class Policy(): class Policy():
...@@ -96,6 +97,7 @@ class Policy(): ...@@ -96,6 +97,7 @@ class Policy():
pass pass
policy = Policy() policy = Policy()
#国家发展和改革委员会 https://www.ndrc.gov.cn/xxgk/wjk/index.html?tab=all&qt= #国家发展和改革委员会 https://www.ndrc.gov.cn/xxgk/wjk/index.html?tab=all&qt=
def reform(): def reform():
...@@ -118,6 +120,7 @@ def reform(): ...@@ -118,6 +120,7 @@ def reform():
url = 'https://fwfx.ndrc.gov.cn/api/query?qt=REITs&tab=all&page=1&pageSize=20&siteCode=bm04000fgk&key=CAB549A94CF659904A7D6B0E8FC8A7E9&startDateStr=&endDateStr=&timeOption=0&sort=dateDesc' url = 'https://fwfx.ndrc.gov.cn/api/query?qt=REITs&tab=all&page=1&pageSize=20&siteCode=bm04000fgk&key=CAB549A94CF659904A7D6B0E8FC8A7E9&startDateStr=&endDateStr=&timeOption=0&sort=dateDesc'
result = policy.getrequest_json(headers, url) result = policy.getrequest_json(headers, url)
data_list = result['data']['resultList'] data_list = result['data']['resultList']
DataList = []
num = 0 num = 0
for info in data_list: for info in data_list:
num += 1 num += 1
...@@ -174,18 +177,32 @@ def reform(): ...@@ -174,18 +177,32 @@ def reform():
except: except:
pass pass
dic_info = { dic_info = {
'title': title, '序号':num,
'summary':summary, '标题': title,
'publishDate': publishDate, '时间': publishDate,
'source': source, '来源': source,
'pub_hao': pubHao, '原文链接':newsUrl,
'contentWithTag': contentWithTag, '发文字号': pubHao,
'content': content '摘要':summary,
'正文': content,
'附件名称':'',
'附件链接':'',
} }
print(dic_info) DataList.append(dic_info)
file_name = f'../data/REITs专题数据.xlsx'
sheet_name = "国家发展和改革委员会"
file_exist = baseCore.check_excel_file(file_name)
if file_exist:
pass
else:
wb = openpyxl.Workbook()
wb.save(file_name)
log.info("Excel文件已创建")
baseCore.writerToExcel(DataList, file_name, sheet_name)
except: except:
print(newsUrl) log.info(f"error!!!{newsUrl}")
log.info(f'=============处理结束,以采集{num}条数据=================')
#证券期货 https://neris.csrc.gov.cn/falvfagui/multipleFindController/indexJsp #证券期货 https://neris.csrc.gov.cn/falvfagui/multipleFindController/indexJsp
def zhengquanqihuo(): def zhengquanqihuo():
...@@ -450,7 +467,7 @@ def beijing(): ...@@ -450,7 +467,7 @@ def beijing():
if __name__=="__main__":
reform()
# reform()
# zhengquanqihuo() # zhengquanqihuo()
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论