Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
Z
zzsn_spider
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
1
合并请求
1
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
丁双波
zzsn_spider
Commits
41a25e25
提交
41a25e25
authored
4月 12, 2024
作者:
XveLingKun
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
中英对照标签调整
上级
bf7737d6
显示空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
122 行增加
和
42 行删除
+122
-42
get_tyc_cookies.py
comData/Tyc/get_tyc_cookies.py
+1
-1
东方财富网财务数据.py
comData/caiwushuju/东方财富网财务数据.py
+78
-11
trading_economics.py
comData/dingzhi/trading_economics.py
+13
-0
wxList.py
comData/weixin_solo/wxList.py
+30
-30
没有找到文件。
comData/Tyc/get_tyc_cookies.py
浏览文件 @
41a25e25
...
@@ -26,7 +26,7 @@ if __name__ == "__main__":
...
@@ -26,7 +26,7 @@ if __name__ == "__main__":
name
=
input
(
'所属用户:'
)
name
=
input
(
'所属用户:'
)
driver
=
create_driver
()
driver
=
create_driver
()
driver
.
get
(
url
)
driver
.
get
(
url
)
time
.
sleep
(
6
0
)
time
.
sleep
(
8
0
)
cookies
=
driver
.
get_cookies
()
cookies
=
driver
.
get_cookies
()
# print(driver.get_cookies())
# print(driver.get_cookies())
...
...
comData/caiwushuju/东方财富网财务数据.py
浏览文件 @
41a25e25
"""
"""
...
@@ -15,7 +15,7 @@ log = baseCore.getLogger()
...
@@ -15,7 +15,7 @@ log = baseCore.getLogger()
# 判断股票代码是否存在
# 判断股票代码是否存在
def
check_code
(
com_code
):
def
check_code
(
com_code
):
r
=
redis
.
Redis
(
host
=
"114.11
5.236.206"
,
port
=
6379
,
password
=
'clbzzsn'
,
db
=
3
)
r
=
redis
.
Redis
(
host
=
"114.11
6.90.53"
,
port
=
6380
,
password
=
'clbzzsn'
,
db
=
3
)
res
=
r
.
exists
(
'com_caiwushuju_code::'
+
com_code
)
res
=
r
.
exists
(
'com_caiwushuju_code::'
+
com_code
)
#如果key存在 则不是第一次采集该企业, res = 1
#如果key存在 则不是第一次采集该企业, res = 1
if
res
:
if
res
:
...
@@ -24,7 +24,7 @@ def check_code(com_code):
...
@@ -24,7 +24,7 @@ def check_code(com_code):
return
True
#表示是第一次采集
return
True
#表示是第一次采集
def
check_date
(
com_code
,
info_date
):
def
check_date
(
com_code
,
info_date
):
r
=
redis
.
Redis
(
host
=
"114.11
5.236.206"
,
port
=
6379
,
password
=
'clbzzsn'
,
db
=
3
)
r
=
redis
.
Redis
(
host
=
"114.11
6.90.53"
,
port
=
6380
,
password
=
'clbzzsn'
,
db
=
3
)
res
=
r
.
sismember
(
'com_caiwushuju_code::'
+
com_code
,
info_date
)
# 注意是 保存set的方式
res
=
r
.
sismember
(
'com_caiwushuju_code::'
+
com_code
,
info_date
)
# 注意是 保存set的方式
if
res
:
if
res
:
return
True
return
True
...
@@ -33,7 +33,7 @@ def check_date(com_code,info_date):
...
@@ -33,7 +33,7 @@ def check_date(com_code,info_date):
# 将采集后的股票代码对应的报告期保存进redis
# 将采集后的股票代码对应的报告期保存进redis
def
add_date
(
com_code
,
date_list
):
def
add_date
(
com_code
,
date_list
):
r
=
redis
.
Redis
(
host
=
"114.11
5.236.206"
,
port
=
6379
,
password
=
'clbzzsn'
,
db
=
3
)
r
=
redis
.
Redis
(
host
=
"114.11
6.90.53"
,
port
=
6380
,
password
=
'clbzzsn'
,
db
=
3
)
#遍历date_list 放入redis
#遍历date_list 放入redis
for
date
in
date_list
:
for
date
in
date_list
:
res
=
r
.
sadd
(
'com_caiwushuju_code::'
+
com_code
,
date
)
res
=
r
.
sadd
(
'com_caiwushuju_code::'
+
com_code
,
date
)
...
@@ -158,7 +158,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
...
@@ -158,7 +158,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
for
one_info
in
soup_zcfzb
.
find_all
(
'tr'
)[
2
:]:
for
one_info
in
soup_zcfzb
.
find_all
(
'tr'
)[
2
:]:
if
'value.'
not
in
one_info
.
text
:
if
'value.'
not
in
one_info
.
text
:
continue
continue
info_name
=
one_info
.
find
(
'span'
)
.
text
# todo:24-4-12 添加判断标签是否是隐藏标签,是则跳过,找下一个标签
info_tag_list
=
one_info
.
find_all
(
'span'
)
info_name
=
''
for
info_tag
in
info_tag_list
:
if
'display:none'
in
info_tag
.
get
(
'style'
):
continue
else
:
info_name
=
info_tag
.
text
break
if
info_name
:
pass
else
:
continue
# info_name = one_info.find('span').text
if
'审计意见'
in
info_name
:
if
'审计意见'
in
info_name
:
continue
continue
info_name_en
=
re
.
findall
(
'value
\
.(.*?)
\
)}}'
,
one_info
.
text
)[
0
]
info_name_en
=
re
.
findall
(
'value
\
.(.*?)
\
)}}'
,
one_info
.
text
)[
0
]
...
@@ -212,7 +225,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
...
@@ -212,7 +225,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
break
break
if
'value.'
not
in
one_info
.
text
:
if
'value.'
not
in
one_info
.
text
:
continue
continue
info_name
=
one_info
.
find
(
'span'
)
.
text
# todo:24-4-12 添加判断标签是否是隐藏标签,是则跳过,找下一个标签
info_tag_list
=
one_info
.
find_all
(
'span'
)
info_name
=
''
for
info_tag
in
info_tag_list
:
if
'display:none'
in
info_tag
.
get
(
'style'
):
continue
else
:
info_name
=
info_tag
.
text
break
if
info_name
:
pass
else
:
continue
# info_name = one_info.find('span').text
if
'审计意见'
in
info_name
:
if
'审计意见'
in
info_name
:
continue
continue
info_name_en
=
re
.
findall
(
'value
\
.(.*?)
\
)}}'
,
one_info
.
text
)[
0
]
info_name_en
=
re
.
findall
(
'value
\
.(.*?)
\
)}}'
,
one_info
.
text
)[
0
]
...
@@ -273,7 +299,7 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
...
@@ -273,7 +299,7 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
soup_name
=
BeautifulSoup
(
res_name
.
content
,
'html.parser'
)
soup_name
=
BeautifulSoup
(
res_name
.
content
,
'html.parser'
)
# 第一个表
# 第一个表
try
:
try
:
script_zcfzb
=
soup_name
.
find
(
'script'
,
{
'id'
:
'
zcfzb
_qy'
})
script_zcfzb
=
soup_name
.
find
(
'script'
,
{
'id'
:
'
cccccccccc
_qy'
})
if
script_zcfzb
:
if
script_zcfzb
:
soup_zcfzb
=
BeautifulSoup
(
script_zcfzb
.
text
.
strip
(),
'lxml'
)
soup_zcfzb
=
BeautifulSoup
(
script_zcfzb
.
text
.
strip
(),
'lxml'
)
else
:
else
:
...
@@ -341,7 +367,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
...
@@ -341,7 +367,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
for
one_info
in
soup_zcfzb
.
find_all
(
'tr'
)[
2
:]:
for
one_info
in
soup_zcfzb
.
find_all
(
'tr'
)[
2
:]:
if
'value.'
not
in
one_info
.
text
:
if
'value.'
not
in
one_info
.
text
:
continue
continue
info_name
=
one_info
.
find
(
'span'
)
.
text
# todo:24-4-12 添加判断标签是否是隐藏标签,是则跳过,找下一个标签
info_tag_list
=
one_info
.
find_all
(
'span'
)
info_name
=
''
for
info_tag
in
info_tag_list
:
if
'display:none'
in
info_tag
.
get
(
'style'
):
continue
else
:
info_name
=
info_tag
.
text
break
if
info_name
:
pass
else
:
continue
# info_name = one_info.find('span').text
if
'审计意见'
in
info_name
:
if
'审计意见'
in
info_name
:
continue
continue
info_name_en
=
re
.
findall
(
'value
\
.(.*?)
\
)}}'
,
one_info
.
text
)[
0
]
info_name_en
=
re
.
findall
(
'value
\
.(.*?)
\
)}}'
,
one_info
.
text
)[
0
]
...
@@ -368,7 +407,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
...
@@ -368,7 +407,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
for
one_info
in
soup_lrb
.
find_all
(
'tr'
)[
2
:]:
for
one_info
in
soup_lrb
.
find_all
(
'tr'
)[
2
:]:
if
'value.'
not
in
one_info
.
text
:
if
'value.'
not
in
one_info
.
text
:
continue
continue
info_name
=
one_info
.
find
(
'span'
)
.
text
# todo:24-4-12 添加判断标签是否是隐藏标签,是则跳过,找下一个标签
info_tag_list
=
one_info
.
find_all
(
'span'
)
info_name
=
''
for
info_tag
in
info_tag_list
:
if
'display:none'
in
info_tag
.
get
(
'style'
):
continue
else
:
info_name
=
info_tag
.
text
break
if
info_name
:
pass
else
:
continue
# info_name = one_info.find('span').text
if
'审计意见'
in
info_name
:
if
'审计意见'
in
info_name
:
continue
continue
info_name_en
=
re
.
findall
(
'value
\
.(.*?)
\
)}}'
,
one_info
.
text
)[
0
]
info_name_en
=
re
.
findall
(
'value
\
.(.*?)
\
)}}'
,
one_info
.
text
)[
0
]
...
@@ -395,7 +447,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
...
@@ -395,7 +447,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
break
break
if
'value.'
not
in
one_info
.
text
:
if
'value.'
not
in
one_info
.
text
:
continue
continue
info_name
=
one_info
.
find
(
'span'
)
.
text
# todo:24-4-12 添加判断标签是否是隐藏标签,是则跳过,找下一个标签
info_tag_list
=
one_info
.
find_all
(
'span'
)
info_name
=
''
for
info_tag
in
info_tag_list
:
if
'display:none'
in
info_tag
.
get
(
'style'
):
continue
else
:
info_name
=
info_tag
.
text
break
if
info_name
:
pass
else
:
continue
# info_name = one_info.find('span').text
if
'审计意见'
in
info_name
:
if
'审计意见'
in
info_name
:
continue
continue
info_name_en
=
re
.
findall
(
'value
\
.(.*?)
\
)}}'
,
one_info
.
text
)[
0
]
info_name_en
=
re
.
findall
(
'value
\
.(.*?)
\
)}}'
,
one_info
.
text
)[
0
]
...
@@ -455,7 +520,7 @@ def getReportTime():
...
@@ -455,7 +520,7 @@ def getReportTime():
list_month
=
[
'-12-31'
,
'-09-30'
,
'-06-30'
,
'-03-31'
]
list_month
=
[
'-12-31'
,
'-09-30'
,
'-06-30'
,
'-03-31'
]
for
year
in
range
(
year
,
2018
,
-
1
):
for
year
in
range
(
year
,
2018
,
-
1
):
for
month
in
list_month
:
for
month
in
list_month
[::
-
1
]
:
date
=
str
(
year
)
+
month
date
=
str
(
year
)
+
month
#todo:判断拼接的报告期是否大于当前日期,如果大于当前日期,怎说明还没到这个时间,跳过
#todo:判断拼接的报告期是否大于当前日期,如果大于当前日期,怎说明还没到这个时间,跳过
current_date
=
current_date_
.
strftime
(
'
%
Y-
%
m-
%
d'
)
current_date
=
current_date_
.
strftime
(
'
%
Y-
%
m-
%
d'
)
...
@@ -513,6 +578,8 @@ def job(taskType,cnx,cursor):
...
@@ -513,6 +578,8 @@ def job(taskType,cnx,cursor):
# securities_code = code
# securities_code = code
# else:
# else:
# continue
# continue
# 测试:
# securities_code = '601179'
if
exchange
==
1
:
if
exchange
==
1
:
com_code
=
'bj'
+
securities_code
com_code
=
'bj'
+
securities_code
if
exchange
==
2
:
if
exchange
==
2
:
...
...
comData/dingzhi/trading_economics.py
0 → 100644
浏览文件 @
41a25e25
import
requests
headers
=
{
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0'
}
if
__name__
==
"__main__"
:
url
=
'https://tradingeconomics.com/united-states/indicators'
requests
.
get
(
url
,
headers
=
headers
)
if
requests
.
status_codes
==
200
:
pass
pass
\ No newline at end of file
comData/weixin_solo/wxList.py
浏览文件 @
41a25e25
...
@@ -311,38 +311,38 @@ def getnumber_redis():
...
@@ -311,38 +311,38 @@ def getnumber_redis():
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
# getFromSql()
getFromSql
()
numbers
=
getnumber_redis
()
log
.
info
(
"当前批次采集公众号个数{}"
.
format
(
numbers
))
time
.
sleep
(
3
)
dic_user_count
=
{}
# dic_user_count = {
# 'name': '',
# 'use_count': 0,
# 'gzh_count': 0
# }
start
=
time
.
time
()
log
.
info
(
f
"开始时间{baseCore.getNowTime(1)}"
)
while
True
:
infoSourceCode
=
baseCore
.
redicPullData
(
'WeiXinGZH:infoSourceCode'
)
# infoSourceCode = 'IN-20220609-57899'
if
infoSourceCode
==
'None'
or
infoSourceCode
==
None
:
log
.
info
(
"redis已经没有数据了,重新放置数据"
)
log
.
info
(
f
"采集完一轮公众号耗时{baseCore.getTimeCost(start, time.time())}"
)
# time.sleep(60)
# numbers = getnumber_redis()
# numbers = getnumber_redis()
# log.info("当前批次采集公众号个数{}".format(numbers))
# log.info("当前批次采集公众号个数{}".format(numbers))
break
# time.sleep(3)
# dic_user_count = {}
# # dic_user_count = {
# # 'name': '',
# # 'use_count': 0,
# # 'gzh_count': 0
# # }
# start = time.time()
# log.info(f"开始时间{baseCore.getNowTime(1)}")
# while True:
#
# infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
# infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
# continue
# # infoSourceCode = 'IN-20220609-57899'
# if infoSourceCode == 'None' or infoSourceCode == None:
getWxList
(
infoSourceCode
,
dic_user_count
)
# log.info("redis已经没有数据了,重新放置数据")
if
dic_user_count
:
# log.info(f"采集完一轮公众号耗时{baseCore.getTimeCost(start, time.time())}")
for
key
,
value
in
dic_user_count
.
items
():
#
log
.
info
(
f
"====账号{key},使用次数{value}"
)
# # time.sleep(60)
# # numbers = getnumber_redis()
# # log.info("当前批次采集公众号个数{}".format(numbers))
# break
# break
# infoSourceCode = 'IN-20220917-0159'
# # infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
# getWxList(infoSourceCode)
# # continue
#
# getWxList(infoSourceCode, dic_user_count)
# if dic_user_count:
# for key, value in dic_user_count.items():
# log.info(f"====账号{key},使用次数{value}")
# # break
# # infoSourceCode = 'IN-20220917-0159'
# # getWxList(infoSourceCode)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论