Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
Z
zzsn_spider
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
王景浩
zzsn_spider
Commits
e3ee9068
提交
e3ee9068
authored
9月 21, 2023
作者:
LiJunMing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
新三板财务数据脚本维护
上级
6809e271
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
128 行增加
和
76 行删除
+128
-76
finance_xq.py
comData/dfcfwGpdm/NQenterprise/finance_xq.py
+119
-74
test.py
test.py
+9
-2
没有找到文件。
comData/dfcfwGpdm/NQenterprise/finance_xq.py
浏览文件 @
e3ee9068
...
...
@@ -84,8 +84,19 @@ def getdetail(reportInfodata,name_map,listinfo,url_name):
listinfo
.
append
(
dic_info
)
return
listinfo
def
getinfo
(
com_code
,
social_code
):
dic_info
=
{}
def
getinfo
(
info_date
,
com_code
,
social_code
):
for
nnn
in
range
(
0
,
3
):
try
:
panduan
=
check_date
(
com_code
,
info_date
)
except
:
time
.
sleep
(
1
)
if
panduan
:
log
.
info
(
f
'{info_date}----已采集过'
)
return
else
:
pass
for
nnn
in
range
(
0
,
3
):
try
:
ynFirst
=
check_code
(
com_code
)
...
...
@@ -107,80 +118,114 @@ def getinfo(com_code,social_code):
a_infoData
=
getrequests
(
url_lrb
)
b_infoData
=
getrequests
(
url_zcfzb
)
c_infoData
=
getrequests
(
url_xjllb
)
#对报告期做循环
for
i
in
range
(
0
,
5
):
listLrb
=
[]
listZcfzb
=
[]
listXjllb
=
[]
reportLrbdata
=
a_infoData
[
i
]
report_date
=
a_infoData
[
i
][
'report_date'
]
#时间戳转化为日期
report_date
=
getFormatedate
(
int
(
report_date
/
1000
))
# 检查报告期是否已经存在
for
nnn
in
range
(
0
,
3
):
try
:
panduan
=
check_date
(
com_code
,
report_date
)
except
:
time
.
sleep
(
1
)
if
panduan
:
log
.
info
(
f
'{report_date}----已采集过'
)
listLrb
=
[]
listZcfzb
=
[]
listXjllb
=
[]
for
i
in
range
(
len
(
a_infoData
)):
report_date_a
=
a_infoData
[
i
][
'report_date'
]
report_date_a
=
getFormatedate
(
int
(
report_date_a
/
1000
))
if
info_date
==
report_date_a
:
log
.
info
(
f
'======正在采集利润表:{com_code}---{info_date}======='
)
# 利润表
reportLrbdata
=
a_infoData
[
i
]
listLrb
=
getdetail
(
reportLrbdata
,
lrb_name_map
,
listLrb
,
lrb_name
)
log
.
info
(
f
'利润表数据:{len(listLrb)}个'
)
break
else
:
continue
for
j
in
range
(
len
(
b_infoData
)):
report_date_b
=
b_infoData
[
j
][
'report_date'
]
report_date_b
=
getFormatedate
(
int
(
report_date_b
/
1000
))
if
info_date
==
report_date_b
:
log
.
info
(
f
'======正在采集资产负债表:{com_code}---{info_date}======='
)
reportZcfzbdata
=
b_infoData
[
j
]
listZcfzb
=
getdetail
(
reportZcfzbdata
,
zcfzb_name_map
,
listZcfzb
,
zcfzb_name
)
log
.
info
(
f
'资产负债表数据:{len(listZcfzb)}个'
)
break
else
:
continue
for
k
in
range
(
len
(
c_infoData
)):
report_date_c
=
c_infoData
[
k
][
'report_date'
]
report_date_c
=
getFormatedate
(
int
(
report_date_c
/
1000
))
if
info_date
==
report_date_c
:
log
.
info
(
f
'======正在采集现金流量表:{com_code}---{info_date}======='
)
reportXjllbdata
=
c_infoData
[
k
]
listXjllb
=
getdetail
(
reportXjllbdata
,
xjllb_name_map
,
listXjllb
,
xjllb_name
)
log
.
info
(
f
'现金流量表数据:{len(listXjllb)}个'
)
break
else
:
pass
log
.
info
(
f
'======正在采集:{com_code}---{report_date}======='
)
#利润表
list_Lrb
=
getdetail
(
reportLrbdata
,
lrb_name_map
,
listLrb
,
lrb_name
)
log
.
info
(
f
'利润表数据:{len(list_Lrb)}个'
)
# print(list_Lrb)
#资产负债表
reportZcfzbdata
=
b_infoData
[
i
]
list_Zcfzb
=
getdetail
(
reportZcfzbdata
,
zcfzb_name_map
,
listZcfzb
,
zcfzb_name
)
#现金流量表
reportXjllbdata
=
c_infoData
[
i
]
list_Xjllb
=
getdetail
(
reportXjllbdata
,
xjllb_name_map
,
listXjllb
,
xjllb_name
)
continue
# reportLrbname = a_infoData[i]['report_name']
# reporZCFZbname = b_infoData[i]['report_name']
# reportXJLLBname = c_infoData[i]['report_name']
#时间戳转化为日期
# report_date = getFormatedate(int(report_date / 1000))
# 检查报告期是否已经存在
dic_info
=
{
"socialCreditCode"
:
social_code
,
"securitiesCode"
:
com_code
[
2
:],
"date"
:
report_date
,
"debt"
:
list_Zcfzb
,
"profit"
:
list_Lrb
,
"cash"
:
list_Xjllb
,
"ynFirst"
:
ynFirst
,
}
# print(dic_info)
#一个报告期结束
log
.
info
(
f
'----{com_code}--{report_date}----结束'
)
if
dic_info
:
# 调凯歌接口存储数据
data
=
json
.
dumps
(
dic_info
)
# print(data)
url_baocun
=
'http://114.115.236.206:8088/sync/finance/xq'
for
nnn
in
range
(
0
,
3
):
try
:
res_baocun
=
requests
.
post
(
url_baocun
,
data
=
data
)
break
except
:
time
.
sleep
(
1
)
log
.
info
(
f
'----{com_code}--{report_date}--------数据发送接口完毕------------'
)
for
nnn
in
range
(
0
,
3
):
try
:
add_date
(
com_code
,
report_date
)
break
except
:
time
.
sleep
(
1
)
else
:
log
.
error
(
f
'---{com_code}--{report_date}--'
)
# log.info(f'======正在采集:{com_code}---{info_date}=======')
# #利润表
# reportLrbdata = a_infoData[i]
# list_Lrb = getdetail(reportLrbdata,lrb_name_map,listLrb,lrb_name)
# log.info(f'利润表数据:{len(list_Lrb)}个')
#
# # print(list_Lrb)
# #资产负债表
# try:
# reportZcfzbdata = b_infoData[j]
# list_Zcfzb = getdetail(reportZcfzbdata,zcfzb_name_map,listZcfzb,zcfzb_name)
#
# except:
# list_Zcfzb = []
# #现金流量表
# reportXjllbdata = c_infoData[k]
# list_Xjllb = getdetail(reportXjllbdata,xjllb_name_map,listXjllb,xjllb_name)
dic_info
=
{
"socialCreditCode"
:
social_code
,
"securitiesCode"
:
com_code
[
2
:],
"date"
:
info_date
,
"debt"
:
listZcfzb
,
"profit"
:
listLrb
,
"cash"
:
listXjllb
,
"ynFirst"
:
ynFirst
,
}
# print(dic_info)
#一个报告期结束
log
.
info
(
f
'----{com_code}--{info_date}----结束'
)
if
dic_info
:
# 调凯歌接口存储数据
data
=
json
.
dumps
(
dic_info
)
# print(data)
url_baocun
=
'http://114.115.236.206:8088/sync/finance/xq'
for
nnn
in
range
(
0
,
3
):
try
:
res_baocun
=
requests
.
post
(
url_baocun
,
data
=
data
)
break
except
:
time
.
sleep
(
1
)
log
.
info
(
f
'----{com_code}--{info_date}--------数据发送接口完毕------------'
)
for
nnn
in
range
(
0
,
3
):
try
:
add_date
(
com_code
,
info_date
)
break
except
:
time
.
sleep
(
1
)
else
:
log
.
error
(
f
'---{com_code}--{info_date}--'
)
if
__name__
==
'__main__'
:
info_date_list
=
[]
# try:
# chromedriver = "D:/chrome/chromedriver.exe"
# browser = webdriver.Chrome(chromedriver)
# except Exception as e:
# print(e)
list_date
=
[
'2023-06-30'
]
list_month
=
[
'-12-31'
,
'-06-30'
]
for
year
in
range
(
2022
,
2020
,
-
1
):
for
month
in
list_month
:
date
=
str
(
year
)
+
month
list_date
.
append
(
date
)
opt
=
webdriver
.
ChromeOptions
()
opt
.
add_argument
(
'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
)
...
...
@@ -191,7 +236,6 @@ if __name__ == '__main__':
opt
.
add_experimental_option
(
'excludeSwitches'
,
[
'enable-logging'
])
opt
.
add_experimental_option
(
'useAutomationExtension'
,
False
)
opt
.
binary_location
=
r'D:/Google/Chrome/Application/chrome.exe'
# chromedriver = r'C:\Users\WIN10\DataspellProjects\crawlerProjectDemo\tmpcrawler\cmd100\chromedriver.exe'
chromedriver
=
r'D:/cmd100/chromedriver.exe'
browser
=
webdriver
.
Chrome
(
chrome_options
=
opt
,
executable_path
=
chromedriver
)
headers
=
{
...
...
@@ -381,10 +425,10 @@ if __name__ == '__main__':
'加:期初现金及现金等价物余额'
:
'final_balance_of_cce'
,
'期末现金及现金等价物余额'
:
'final_balance_of_cce'
}
table_type
=
[
'income'
,
'balance'
]
while
True
:
social_code
=
baseCore
.
redicPullData
(
'NQEnterprise:nq_finance'
)
# social_code = baseCore.redicPullData('NQEnterprise:nq_finance_test')
#
social_code = baseCore.redicPullData('NQEnterprise:nq_finance')
social_code
=
'9144030067312725XJ'
if
social_code
is
None
:
log
.
info
(
'已没有数据----------等待'
)
time
.
sleep
(
20
)
...
...
@@ -395,8 +439,9 @@ if __name__ == '__main__':
com_code
=
data
[
3
]
start
=
time
.
time
()
com_code
=
'NQ'
+
com_code
dic_info
=
getinfo
(
com_code
,
social_code
)
break
for
info_date
in
list_date
:
dic_info
=
getinfo
(
info_date
,
com_code
,
social_code
)
...
...
test.py
浏览文件 @
e3ee9068
...
...
@@ -16,4 +16,12 @@ element.getparent() #获取给定元素的父元素
# data = '"1234","456\r7","897"'
# print(data)
# aa = pd.read_csv(StringIO(data),escapechar='\r')
# print(aa)
\ No newline at end of file
# print(aa)
import
pandas
as
pd
# 读取txt文件
data
=
pd
.
read_csv
(
'D:
\\
美国证券交易委员会
\\
2023q2
\\
pre.txt'
,
delimiter
=
'
\t
'
)
# 根据实际情况选择正确的分隔符
# 将数据保存为csv文件
data
.
to_csv
(
'D:
\\
美国证券交易委员会
\\
2023q2
\\
pre.csv'
,
index
=
False
)
# index=False表示不保存行索引
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论