Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
Z
zzsn_spider
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
王景浩
zzsn_spider
Commits
22b065ed
提交
22b065ed
authored
10月 07, 2023
作者:
LiuLiYuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
纳斯达克财务数据 10/07
上级
afe226ba
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
59 行增加
和
33 行删除
+59
-33
nasdaq_caiwu.py
comData/caiwushuju/nasdaq_caiwu.py
+59
-33
没有找到文件。
comData/caiwushuju/nasdaq_caiwu.py
浏览文件 @
22b065ed
...
...
@@ -11,7 +11,6 @@ from bs4 import BeautifulSoup
from
requests.adapters
import
HTTPAdapter
from
requests.packages
import
urllib3
from
retry
import
retry
from
base
import
BaseCore
urllib3
.
disable_warnings
()
...
...
@@ -20,6 +19,7 @@ log = baseCore.getLogger()
cnx
=
pymysql
.
connect
(
host
=
'114.115.159.144'
,
user
=
'caiji'
,
password
=
'zzsn9988'
,
db
=
'caiji'
,
charset
=
'utf8mb4'
)
cursor
=
cnx
.
cursor
()
r
=
baseCore
.
r
URL
=
'https://www.nasdaq.com/'
session
=
requests
.
session
()
session
.
mount
(
'https://'
,
HTTPAdapter
(
pool_connections
=
20
,
pool_maxsize
=
100
))
...
...
@@ -65,6 +65,7 @@ def add_date(com_code, date_list):
# 数据发送端口
def
sendData
(
start_time
,
social_code
,
gpdm
,
dic_info
):
data
=
json
.
dumps
(
dic_info
)
# print(data)
url_baocun
=
'http://114.115.236.206:8088/sync/finance/nsdk'
for
nnn
in
range
(
0
,
3
):
try
:
...
...
@@ -86,7 +87,7 @@ def getUnit(gpdm):
req
.
encoding
=
req
.
apparent_encoding
soup
=
BeautifulSoup
(
req
.
text
,
'lxml'
)
unit
=
soup
.
find
(
'div'
,
class_
=
'financials__note'
)
.
text
.
split
(
' '
)[
1
]
.
lstrip
()
.
strip
()
unit
=
f
'
(千){unit}
'
unit
=
f
'
{unit}(千)
'
req
.
close
()
return
unit
...
...
@@ -104,9 +105,11 @@ def getlist(table, tableName):
value
=
re
.
sub
(
r"[^\d+-]"
,
""
,
value
)
else
:
value
=
'-'
date
=
years
[
f
'value{i}'
]
.
split
(
'/'
)[
2
]
+
'-'
+
years
[
f
'value{i}'
]
.
split
(
'/'
)[
0
]
+
'-'
+
\
years
[
f
'value{i}'
]
.
split
(
'/'
)[
1
]
list
.
append
({
f
'{tableName}'
:
name
,
'value'
:
value
,
'date'
:
date
,
})
date_
=
years
[
f
'value{i}'
]
if
date_
:
date
=
date_
.
split
(
'/'
)[
2
]
+
'-'
+
date_
.
split
(
'/'
)[
0
]
+
'-'
+
\
date_
.
split
(
'/'
)[
1
]
list
.
append
({
f
'{tableName}'
:
name
,
'value'
:
value
,
'date'
:
date
,
})
return
list
...
...
@@ -162,6 +165,7 @@ def getYear(start_time, session, social_code, gpdm):
# 判断该报告期是否已采过
panduan
=
check_date
(
social_code
,
date
+
'-year'
)
if
panduan
:
log
.
info
(
f
'{social_code}=={gpdm}=={date}年度数据采集过'
)
continue
xjll_list_f
=
reviseData
([
item
for
item
in
final_list
if
'xjll'
in
item
],
unit
,
'xjll'
)
zcfz_list_f
=
reviseData
([
item
for
item
in
final_list
if
'zcfz'
in
item
],
unit
,
'zcfz'
)
...
...
@@ -177,6 +181,7 @@ def getYear(start_time, session, social_code, gpdm):
"ynFirst"
:
ynFirst
,
}
sendData
(
start_time
,
social_code
,
gpdm
,
dic_info
)
log
.
info
(
f
'{social_code}=={gpdm}=={date}年度财务数据采集成功'
)
date_list
.
append
(
date
+
'-year'
)
else
:
log
.
error
(
f
'找不到{social_code}=={gpdm}年度财务数据'
)
...
...
@@ -184,6 +189,7 @@ def getYear(start_time, session, social_code, gpdm):
takeTime
=
baseCore
.
getTimeCost
(
start_time
,
time
.
time
())
baseCore
.
recordLog
(
social_code
,
taskType
,
state
,
takeTime
,
url
,
f
'{social_code}===无年度财务数据'
)
except
:
log
.
error
(
f
'{social_code}===年度财务数据访问失败'
)
state
=
0
takeTime
=
baseCore
.
getTimeCost
(
start_time
,
time
.
time
())
baseCore
.
recordLog
(
social_code
,
taskType
,
state
,
takeTime
,
url
,
f
'{social_code}===年度财务数据访问失败'
)
...
...
@@ -217,6 +223,7 @@ def getQuarter(start_time, session, social_code, gpdm):
# 判断该报告期是否已采过
panduan
=
check_date
(
social_code
,
date
+
'-quarter'
)
if
panduan
:
log
.
info
(
f
'{social_code}=={gpdm}=={date}季度数据采集过'
)
continue
xjll_list_f
=
reviseData
([
item
for
item
in
final_list
if
'xjll'
in
item
],
unit
,
'xjll'
)
zcfz_list_f
=
reviseData
([
item
for
item
in
final_list
if
'zcfz'
in
item
],
unit
,
'zcfz'
)
...
...
@@ -236,13 +243,15 @@ def getQuarter(start_time, session, social_code, gpdm):
if
panduan_flag
:
dic_info
[
'dateFlag'
]
=
'year'
sendData
(
start_time
,
social_code
,
gpdm
,
dic_info
)
log
.
info
(
f
'{social_code}=={gpdm}=={date}季度财务数据采集成功'
)
date_list
.
append
(
date
+
'-quarter'
)
else
:
log
.
error
(
f
'{social_code}=={gpdm}无季度财务数据'
)
state
=
0
takeTime
=
baseCore
.
getTimeCost
(
start_time
,
time
.
time
())
baseCore
.
recordLog
(
social_code
,
taskType
,
state
,
takeTime
,
url
,
f
'{social_code}===无季度财务数据'
)
except
:
log
.
error
(
f
'{social_code}===季度财务数据访问失败'
)
state
=
0
takeTime
=
baseCore
.
getTimeCost
(
start_time
,
time
.
time
())
baseCore
.
recordLog
(
social_code
,
taskType
,
state
,
takeTime
,
url
,
f
'{social_code}===季度财务数据访问失败'
)
...
...
@@ -250,36 +259,52 @@ def getQuarter(start_time, session, social_code, gpdm):
return
date_list
def
FinanceFromNasdaq
():
sql
=
"select xydm from mgzqyjwyh_list where state=2 and exchange='Nasdaq';"
cursor
.
execute
(
sql
)
finance
=
cursor
.
fetchall
()
finance_list
=
[
item
[
0
]
for
item
in
finance
]
for
item
in
finance_list
:
r
.
rpush
(
'FinanceFromNasdaq:nasdaqfinance_socialCode'
,
item
)
print
(
'redis放入成功'
)
def
getInfomation
(
social_code
):
sql
=
f
"select * from mgzqyjwyh_list where state=2 and xydm='{social_code}';"
cursor
.
execute
(
sql
)
data
=
cursor
.
fetchone
()
return
data
def
doJob
():
# while True:
# social_code = baseCore.redicPullData('')
# datas_enterprise = baseCore.getInfomation(social_code)
session
.
get
(
URL
,
headers
=
headers
)
# sql = "select * from mgzqyjwyh_list where state=2 and exchange='Nasdaq';"
# cursor.execute(sql)
# datas_enterprise = cursor.fetchall()
# for data_enterprise in datas_enterprise:
start_time
=
time
.
time
()
# gpdm = data_enterprise[3]
# social_code = data_enterprise[6]
social_code
=
'ZD0CN0012309000172'
gpdm
=
'NTES'
# 采集年度数据
date_list_year
=
getYear
(
start_time
,
session
,
social_code
,
gpdm
)
# 保存年度数据到redis
add_date
(
social_code
,
date_list_year
)
# 采集季度数据
date_list_quarter
=
getQuarter
(
start_time
,
session
,
social_code
,
gpdm
)
# 保存季度数据到redis
add_date
(
social_code
,
date_list_quarter
)
timeCost
=
baseCore
.
getTimeCost
(
start_time
,
time
.
time
())
state
=
1
baseCore
.
recordLog
(
social_code
,
taskType
,
state
,
timeCost
,
''
,
''
)
log
.
info
(
f
'{social_code}=={gpdm}==耗时{timeCost}'
)
# break
cursor
.
close
()
cnx
.
close
()
while
True
:
social_code
=
baseCore
.
redicPullData
(
'FinanceFromNasdaq:nasdaqfinance_socialCode'
)
if
not
social_code
or
social_code
==
None
:
log
.
info
(
'============已没有数据============等待==============='
)
time
.
sleep
(
600
)
continue
data_enterprise
=
getInfomation
(
social_code
)
start_time
=
time
.
time
()
gpdm
=
data_enterprise
[
3
]
social_code
=
data_enterprise
[
6
]
# print(gpdm,social_code)
# 采集年度数据
date_list_year
=
getYear
(
start_time
,
session
,
social_code
,
gpdm
)
# 保存年度数据到redis
add_date
(
social_code
,
date_list_year
)
# 采集季度数据
date_list_quarter
=
getQuarter
(
start_time
,
session
,
social_code
,
gpdm
)
# 保存季度数据到redis
add_date
(
social_code
,
date_list_quarter
)
timeCost
=
baseCore
.
getTimeCost
(
start_time
,
time
.
time
())
state
=
1
baseCore
.
recordLog
(
social_code
,
taskType
,
state
,
timeCost
,
''
,
''
)
log
.
info
(
f
'{social_code}=={gpdm}==耗时{timeCost}'
)
if
__name__
==
'__main__'
:
# 财务数据采集
doJob
()
# 企业股票代码放入redis
# FinanceFromNasdaq()
cursor
.
close
()
cnx
.
close
()
\ No newline at end of file
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论