提交 e3ee9068 作者: LiJunMing

新三板财务数据脚本维护

上级 6809e271
...@@ -84,8 +84,19 @@ def getdetail(reportInfodata,name_map,listinfo,url_name): ...@@ -84,8 +84,19 @@ def getdetail(reportInfodata,name_map,listinfo,url_name):
listinfo.append(dic_info) listinfo.append(dic_info)
return listinfo return listinfo
def getinfo(com_code,social_code): def getinfo(info_date,com_code,social_code):
dic_info = {}
for nnn in range(0, 3):
try:
panduan = check_date(com_code, info_date)
except:
time.sleep(1)
if panduan:
log.info(f'{info_date}----已采集过')
return
else:
pass
for nnn in range(0, 3): for nnn in range(0, 3):
try: try:
ynFirst = check_code(com_code) ynFirst = check_code(com_code)
...@@ -107,80 +118,114 @@ def getinfo(com_code,social_code): ...@@ -107,80 +118,114 @@ def getinfo(com_code,social_code):
a_infoData = getrequests(url_lrb) a_infoData = getrequests(url_lrb)
b_infoData = getrequests(url_zcfzb) b_infoData = getrequests(url_zcfzb)
c_infoData = getrequests(url_xjllb) c_infoData = getrequests(url_xjllb)
#对报告期做循环
for i in range(0,5): listLrb = []
listLrb = [] listZcfzb = []
listZcfzb = [] listXjllb = []
listXjllb = [] for i in range(len(a_infoData)):
reportLrbdata = a_infoData[i] report_date_a = a_infoData[i]['report_date']
report_date = a_infoData[i]['report_date'] report_date_a = getFormatedate(int(report_date_a / 1000))
#时间戳转化为日期 if info_date == report_date_a:
report_date = getFormatedate(int(report_date / 1000)) log.info(f'======正在采集利润表:{com_code}---{info_date}=======')
# 检查报告期是否已经存在 # 利润表
for nnn in range(0, 3): reportLrbdata = a_infoData[i]
try: listLrb = getdetail(reportLrbdata, lrb_name_map, listLrb, lrb_name)
panduan = check_date(com_code, report_date) log.info(f'利润表数据:{len(listLrb)}个')
except: break
time.sleep(1) else:
if panduan: continue
log.info(f'{report_date}----已采集过')
for j in range(len(b_infoData)):
report_date_b = b_infoData[j]['report_date']
report_date_b = getFormatedate(int(report_date_b / 1000))
if info_date == report_date_b:
log.info(f'======正在采集资产负债表:{com_code}---{info_date}=======')
reportZcfzbdata = b_infoData[j]
listZcfzb = getdetail(reportZcfzbdata, zcfzb_name_map, listZcfzb, zcfzb_name)
log.info(f'资产负债表数据:{len(listZcfzb)}个')
break
else:
continue continue
for k in range(len(c_infoData)):
report_date_c = c_infoData[k]['report_date']
report_date_c = getFormatedate(int(report_date_c / 1000))
if info_date == report_date_c:
log.info(f'======正在采集现金流量表:{com_code}---{info_date}=======')
reportXjllbdata = c_infoData[k]
listXjllb = getdetail(reportXjllbdata, xjllb_name_map, listXjllb, xjllb_name)
log.info(f'现金流量表数据:{len(listXjllb)}个')
break
else: else:
pass continue
log.info(f'======正在采集:{com_code}---{report_date}=======') # reportLrbname = a_infoData[i]['report_name']
#利润表 # reporZCFZbname = b_infoData[i]['report_name']
list_Lrb = getdetail(reportLrbdata,lrb_name_map,listLrb,lrb_name) # reportXJLLBname = c_infoData[i]['report_name']
log.info(f'利润表数据:{len(list_Lrb)}个')
# print(list_Lrb)
#资产负债表
reportZcfzbdata = b_infoData[i]
list_Zcfzb = getdetail(reportZcfzbdata,zcfzb_name_map,listZcfzb,zcfzb_name)
#现金流量表
reportXjllbdata = c_infoData[i]
list_Xjllb = getdetail(reportXjllbdata,xjllb_name_map,listXjllb,xjllb_name)
#时间戳转化为日期
# report_date = getFormatedate(int(report_date / 1000))
# 检查报告期是否已经存在
dic_info = { # log.info(f'======正在采集:{com_code}---{info_date}=======')
"socialCreditCode": social_code, # #利润表
"securitiesCode": com_code[2:], # reportLrbdata = a_infoData[i]
"date": report_date, # list_Lrb = getdetail(reportLrbdata,lrb_name_map,listLrb,lrb_name)
"debt": list_Zcfzb, # log.info(f'利润表数据:{len(list_Lrb)}个')
"profit": list_Lrb, #
"cash": list_Xjllb, # # print(list_Lrb)
"ynFirst": ynFirst, # #资产负债表
} # try:
# print(dic_info) # reportZcfzbdata = b_infoData[j]
#一个报告期结束 # list_Zcfzb = getdetail(reportZcfzbdata,zcfzb_name_map,listZcfzb,zcfzb_name)
log.info(f'----{com_code}--{report_date}----结束') #
if dic_info: # except:
# 调凯歌接口存储数据 # list_Zcfzb = []
data = json.dumps(dic_info) # #现金流量表
# print(data) # reportXjllbdata = c_infoData[k]
url_baocun = 'http://114.115.236.206:8088/sync/finance/xq' # list_Xjllb = getdetail(reportXjllbdata,xjllb_name_map,listXjllb,xjllb_name)
for nnn in range(0, 3):
try: dic_info = {
res_baocun = requests.post(url_baocun, data=data) "socialCreditCode": social_code,
break "securitiesCode": com_code[2:],
except: "date": info_date,
time.sleep(1) "debt": listZcfzb,
log.info(f'----{com_code}--{report_date}--------数据发送接口完毕------------') "profit": listLrb,
for nnn in range(0, 3): "cash": listXjllb,
try: "ynFirst": ynFirst,
add_date(com_code, report_date) }
break # print(dic_info)
except: #一个报告期结束
time.sleep(1) log.info(f'----{com_code}--{info_date}----结束')
else: if dic_info:
log.error(f'---{com_code}--{report_date}--') # 调凯歌接口存储数据
data = json.dumps(dic_info)
# print(data)
url_baocun = 'http://114.115.236.206:8088/sync/finance/xq'
for nnn in range(0, 3):
try:
res_baocun = requests.post(url_baocun, data=data)
break
except:
time.sleep(1)
log.info(f'----{com_code}--{info_date}--------数据发送接口完毕------------')
for nnn in range(0, 3):
try:
add_date(com_code, info_date)
break
except:
time.sleep(1)
else:
log.error(f'---{com_code}--{info_date}--')
if __name__ == '__main__': if __name__ == '__main__':
info_date_list = [] info_date_list = []
# try: list_date = ['2023-06-30']
# chromedriver = "D:/chrome/chromedriver.exe" list_month = ['-12-31', '-06-30']
# browser = webdriver.Chrome(chromedriver) for year in range(2022, 2020, -1):
# except Exception as e: for month in list_month:
# print(e) date = str(year) + month
list_date.append(date)
opt = webdriver.ChromeOptions() opt = webdriver.ChromeOptions()
opt.add_argument( opt.add_argument(
'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36') 'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36')
...@@ -191,7 +236,6 @@ if __name__ == '__main__': ...@@ -191,7 +236,6 @@ if __name__ == '__main__':
opt.add_experimental_option('excludeSwitches', ['enable-logging']) opt.add_experimental_option('excludeSwitches', ['enable-logging'])
opt.add_experimental_option('useAutomationExtension', False) opt.add_experimental_option('useAutomationExtension', False)
opt.binary_location = r'D:/Google/Chrome/Application/chrome.exe' opt.binary_location = r'D:/Google/Chrome/Application/chrome.exe'
# chromedriver = r'C:\Users\WIN10\DataspellProjects\crawlerProjectDemo\tmpcrawler\cmd100\chromedriver.exe'
chromedriver = r'D:/cmd100/chromedriver.exe' chromedriver = r'D:/cmd100/chromedriver.exe'
browser = webdriver.Chrome(chrome_options=opt, executable_path=chromedriver) browser = webdriver.Chrome(chrome_options=opt, executable_path=chromedriver)
headers = { headers = {
...@@ -381,10 +425,10 @@ if __name__ == '__main__': ...@@ -381,10 +425,10 @@ if __name__ == '__main__':
'加:期初现金及现金等价物余额':'final_balance_of_cce', '加:期初现金及现金等价物余额':'final_balance_of_cce',
'期末现金及现金等价物余额':'final_balance_of_cce' '期末现金及现金等价物余额':'final_balance_of_cce'
} }
table_type = ['income','balance']
while True: while True:
social_code = baseCore.redicPullData('NQEnterprise:nq_finance') # social_code = baseCore.redicPullData('NQEnterprise:nq_finance')
# social_code = baseCore.redicPullData('NQEnterprise:nq_finance_test') social_code = '9144030067312725XJ'
if social_code is None: if social_code is None:
log.info('已没有数据----------等待') log.info('已没有数据----------等待')
time.sleep(20) time.sleep(20)
...@@ -395,8 +439,9 @@ if __name__ == '__main__': ...@@ -395,8 +439,9 @@ if __name__ == '__main__':
com_code = data[3] com_code = data[3]
start = time.time() start = time.time()
com_code = 'NQ' + com_code com_code = 'NQ' + com_code
dic_info = getinfo(com_code,social_code) for info_date in list_date:
break dic_info = getinfo(info_date,com_code,social_code)
......
...@@ -16,4 +16,12 @@ element.getparent() #获取给定元素的父元素 ...@@ -16,4 +16,12 @@ element.getparent() #获取给定元素的父元素
# data = '"1234","456\r7","897"' # data = '"1234","456\r7","897"'
# print(data) # print(data)
# aa = pd.read_csv(StringIO(data),escapechar='\r') # aa = pd.read_csv(StringIO(data),escapechar='\r')
# print(aa) # print(aa)
\ No newline at end of file
import pandas as pd
# 读取txt文件
data = pd.read_csv('D:\\美国证券交易委员会\\2023q2\\pre.txt', delimiter='\t') # 根据实际情况选择正确的分隔符
# 将数据保存为csv文件
data.to_csv('D:\\美国证券交易委员会\\2023q2\\pre.csv', index=False) # index=False表示不保存行索引
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论