提交 e3ee9068 作者: LiJunMing

新三板财务数据脚本维护

上级 6809e271
......@@ -84,8 +84,19 @@ def getdetail(reportInfodata,name_map,listinfo,url_name):
listinfo.append(dic_info)
return listinfo
def getinfo(com_code,social_code):
dic_info = {}
def getinfo(info_date,com_code,social_code):
for nnn in range(0, 3):
try:
panduan = check_date(com_code, info_date)
except:
time.sleep(1)
if panduan:
log.info(f'{info_date}----已采集过')
return
else:
pass
for nnn in range(0, 3):
try:
ynFirst = check_code(com_code)
......@@ -107,51 +118,83 @@ def getinfo(com_code,social_code):
a_infoData = getrequests(url_lrb)
b_infoData = getrequests(url_zcfzb)
c_infoData = getrequests(url_xjllb)
#对报告期做循环
for i in range(0,5):
listLrb = []
listZcfzb = []
listXjllb = []
for i in range(len(a_infoData)):
report_date_a = a_infoData[i]['report_date']
report_date_a = getFormatedate(int(report_date_a / 1000))
if info_date == report_date_a:
log.info(f'======正在采集利润表:{com_code}---{info_date}=======')
# 利润表
reportLrbdata = a_infoData[i]
report_date = a_infoData[i]['report_date']
#时间戳转化为日期
report_date = getFormatedate(int(report_date / 1000))
# 检查报告期是否已经存在
for nnn in range(0, 3):
try:
panduan = check_date(com_code, report_date)
except:
time.sleep(1)
if panduan:
log.info(f'{report_date}----已采集过')
listLrb = getdetail(reportLrbdata, lrb_name_map, listLrb, lrb_name)
log.info(f'利润表数据:{len(listLrb)}个')
break
else:
continue
for j in range(len(b_infoData)):
report_date_b = b_infoData[j]['report_date']
report_date_b = getFormatedate(int(report_date_b / 1000))
if info_date == report_date_b:
log.info(f'======正在采集资产负债表:{com_code}---{info_date}=======')
reportZcfzbdata = b_infoData[j]
listZcfzb = getdetail(reportZcfzbdata, zcfzb_name_map, listZcfzb, zcfzb_name)
log.info(f'资产负债表数据:{len(listZcfzb)}个')
break
else:
pass
log.info(f'======正在采集:{com_code}---{report_date}=======')
#利润表
list_Lrb = getdetail(reportLrbdata,lrb_name_map,listLrb,lrb_name)
log.info(f'利润表数据:{len(list_Lrb)}个')
# print(list_Lrb)
#资产负债表
reportZcfzbdata = b_infoData[i]
list_Zcfzb = getdetail(reportZcfzbdata,zcfzb_name_map,listZcfzb,zcfzb_name)
#现金流量表
reportXjllbdata = c_infoData[i]
list_Xjllb = getdetail(reportXjllbdata,xjllb_name_map,listXjllb,xjllb_name)
continue
for k in range(len(c_infoData)):
report_date_c = c_infoData[k]['report_date']
report_date_c = getFormatedate(int(report_date_c / 1000))
if info_date == report_date_c:
log.info(f'======正在采集现金流量表:{com_code}---{info_date}=======')
reportXjllbdata = c_infoData[k]
listXjllb = getdetail(reportXjllbdata, xjllb_name_map, listXjllb, xjllb_name)
log.info(f'现金流量表数据:{len(listXjllb)}个')
break
else:
continue
# reportLrbname = a_infoData[i]['report_name']
# reporZCFZbname = b_infoData[i]['report_name']
# reportXJLLBname = c_infoData[i]['report_name']
#时间戳转化为日期
# report_date = getFormatedate(int(report_date / 1000))
# 检查报告期是否已经存在
# log.info(f'======正在采集:{com_code}---{info_date}=======')
# #利润表
# reportLrbdata = a_infoData[i]
# list_Lrb = getdetail(reportLrbdata,lrb_name_map,listLrb,lrb_name)
# log.info(f'利润表数据:{len(list_Lrb)}个')
#
# # print(list_Lrb)
# #资产负债表
# try:
# reportZcfzbdata = b_infoData[j]
# list_Zcfzb = getdetail(reportZcfzbdata,zcfzb_name_map,listZcfzb,zcfzb_name)
#
# except:
# list_Zcfzb = []
# #现金流量表
# reportXjllbdata = c_infoData[k]
# list_Xjllb = getdetail(reportXjllbdata,xjllb_name_map,listXjllb,xjllb_name)
dic_info = {
"socialCreditCode": social_code,
"securitiesCode": com_code[2:],
"date": report_date,
"debt": list_Zcfzb,
"profit": list_Lrb,
"cash": list_Xjllb,
"date": info_date,
"debt": listZcfzb,
"profit": listLrb,
"cash": listXjllb,
"ynFirst": ynFirst,
}
# print(dic_info)
#一个报告期结束
log.info(f'----{com_code}--{report_date}----结束')
log.info(f'----{com_code}--{info_date}----结束')
if dic_info:
# 调凯歌接口存储数据
data = json.dumps(dic_info)
......@@ -163,24 +206,26 @@ def getinfo(com_code,social_code):
break
except:
time.sleep(1)
log.info(f'----{com_code}--{report_date}--------数据发送接口完毕------------')
log.info(f'----{com_code}--{info_date}--------数据发送接口完毕------------')
for nnn in range(0, 3):
try:
add_date(com_code, report_date)
add_date(com_code, info_date)
break
except:
time.sleep(1)
else:
log.error(f'---{com_code}--{report_date}--')
log.error(f'---{com_code}--{info_date}--')
if __name__ == '__main__':
info_date_list = []
# try:
# chromedriver = "D:/chrome/chromedriver.exe"
# browser = webdriver.Chrome(chromedriver)
# except Exception as e:
# print(e)
list_date = ['2023-06-30']
list_month = ['-12-31', '-06-30']
for year in range(2022, 2020, -1):
for month in list_month:
date = str(year) + month
list_date.append(date)
opt = webdriver.ChromeOptions()
opt.add_argument(
'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36')
......@@ -191,7 +236,6 @@ if __name__ == '__main__':
opt.add_experimental_option('excludeSwitches', ['enable-logging'])
opt.add_experimental_option('useAutomationExtension', False)
opt.binary_location = r'D:/Google/Chrome/Application/chrome.exe'
# chromedriver = r'C:\Users\WIN10\DataspellProjects\crawlerProjectDemo\tmpcrawler\cmd100\chromedriver.exe'
chromedriver = r'D:/cmd100/chromedriver.exe'
browser = webdriver.Chrome(chrome_options=opt, executable_path=chromedriver)
headers = {
......@@ -381,10 +425,10 @@ if __name__ == '__main__':
'加:期初现金及现金等价物余额':'final_balance_of_cce',
'期末现金及现金等价物余额':'final_balance_of_cce'
}
table_type = ['income','balance']
while True:
social_code = baseCore.redicPullData('NQEnterprise:nq_finance')
# social_code = baseCore.redicPullData('NQEnterprise:nq_finance_test')
# social_code = baseCore.redicPullData('NQEnterprise:nq_finance')
social_code = '9144030067312725XJ'
if social_code is None:
log.info('已没有数据----------等待')
time.sleep(20)
......@@ -395,8 +439,9 @@ if __name__ == '__main__':
com_code = data[3]
start = time.time()
com_code = 'NQ' + com_code
dic_info = getinfo(com_code,social_code)
break
for info_date in list_date:
dic_info = getinfo(info_date,com_code,social_code)
......
......@@ -17,3 +17,11 @@ element.getparent() #获取给定元素的父元素
# print(data)
# aa = pd.read_csv(StringIO(data),escapechar='\r')
# print(aa)
import pandas as pd
# 读取txt文件
data = pd.read_csv('D:\\美国证券交易委员会\\2023q2\\pre.txt', delimiter='\t') # 根据实际情况选择正确的分隔符
# 将数据保存为csv文件
data.to_csv('D:\\美国证券交易委员会\\2023q2\\pre.csv', index=False) # index=False表示不保存行索引
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论