提交 ce4c997a 作者: 薛凌堃

reits专题数据

上级 43034e09
# 核心工具包
# 核心工具包
......@@ -423,17 +423,26 @@ class BaseCore:
return 'cn'
return result[0]
#创建excel文件
def check_excel_file(self,file_path):
if os.path.isfile(file_path):
self.getLogger().info("Excel文件已存在")
return True
else:
self.getLogger().info("Excel文件不存在,正在创建...")
return False
#追加接入excel
def writerToExcel(self,detailList,filename):
def writerToExcel(self, detailList, filename, sheet_name):
# filename='baidu搜索.xlsx'
# 读取已存在的xlsx文件
existing_data = pd.read_excel(filename,engine='openpyxl',dtype=str)
existing_data = pd.read_excel(filename, sheet_name=sheet_name, engine='openpyxl', dtype=str)
# 创建新的数据
new_data = pd.DataFrame(data=detailList)
# 将新数据添加到现有数据的末尾
combined_data = existing_data.append(new_data, ignore_index=True)
# 将结果写入到xlsx文件
combined_data.to_excel(filename, index=False)
combined_data.to_excel(filename, sheet_name=sheet_name, index=False)
# return combined_data
#解析word文件页数
......
import os
import os
import os
import openpyxl
import requests
from bs4 import BeautifulSoup
from datetime import datetime
......@@ -11,7 +12,7 @@ from urllib.parse import urljoin
import BaseCore
baseCore = BaseCore.BaseCore()
log = baseCore.getLogger()
filepath = "data/"
class Policy():
......@@ -96,6 +97,7 @@ class Policy():
pass
policy = Policy()
#国家发展和改革委员会 https://www.ndrc.gov.cn/xxgk/wjk/index.html?tab=all&qt=
def reform():
......@@ -118,6 +120,7 @@ def reform():
url = 'https://fwfx.ndrc.gov.cn/api/query?qt=REITs&tab=all&page=1&pageSize=20&siteCode=bm04000fgk&key=CAB549A94CF659904A7D6B0E8FC8A7E9&startDateStr=&endDateStr=&timeOption=0&sort=dateDesc'
result = policy.getrequest_json(headers, url)
data_list = result['data']['resultList']
DataList = []
num = 0
for info in data_list:
num += 1
......@@ -174,18 +177,32 @@ def reform():
except:
pass
dic_info = {
'title': title,
'summary':summary,
'publishDate': publishDate,
'source': source,
'pub_hao': pubHao,
'contentWithTag': contentWithTag,
'content': content
'序号':num,
'标题': title,
'时间': publishDate,
'来源': source,
'原文链接':newsUrl,
'发文字号': pubHao,
'摘要':summary,
'正文': content,
'附件名称':'',
'附件链接':'',
}
print(dic_info)
DataList.append(dic_info)
file_name = f'../data/REITs专题数据.xlsx'
sheet_name = "国家发展和改革委员会"
file_exist = baseCore.check_excel_file(file_name)
if file_exist:
pass
else:
wb = openpyxl.Workbook()
wb.save(file_name)
log.info("Excel文件已创建")
baseCore.writerToExcel(DataList, file_name, sheet_name)
except:
print(newsUrl)
log.info(f"error!!!{newsUrl}")
log.info(f'=============处理结束,以采集{num}条数据=================')
#证券期货 https://neris.csrc.gov.cn/falvfagui/multipleFindController/indexJsp
def zhengquanqihuo():
......@@ -450,7 +467,7 @@ def beijing():
if __name__=="__main__":
# reform()
reform()
# zhengquanqihuo()
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论