Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
Z
zzsn_spider
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
王景浩
zzsn_spider
Commits
54d3083a
提交
54d3083a
authored
4月 07, 2024
作者:
XveLingKun
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
微信公众号采集
上级
f434a907
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
101 行增加
和
33 行删除
+101
-33
oneWeixin2.py
comData/weixin_solo/oneWeixin2.py
+26
-15
test.py
comData/weixin_solo/test.py
+46
-2
wxList.py
comData/weixin_solo/wxList.py
+29
-16
没有找到文件。
comData/weixin_solo/oneWeixin2.py
浏览文件 @
54d3083a
...
@@ -24,7 +24,7 @@ r = baseCore.r
...
@@ -24,7 +24,7 @@ r = baseCore.r
urllib3
.
disable_warnings
()
urllib3
.
disable_warnings
()
def
rePutIntoR
(
item
):
def
rePutIntoR
(
item
):
r
.
rpush
(
'WeiXinGZH:
infoSourceCode
'
,
item
)
r
.
rpush
(
'WeiXinGZH:
linkid
'
,
item
)
def
updatewxLink
(
link
,
info_source_code
,
state
):
def
updatewxLink
(
link
,
info_source_code
,
state
):
updateSuccess
=
f
"update wx_link set state= {state} where link='{link}' and info_source_code='{info_source_code}' "
updateSuccess
=
f
"update wx_link set state= {state} where link='{link}' and info_source_code='{info_source_code}' "
...
@@ -39,7 +39,7 @@ def getjsonInfo():
...
@@ -39,7 +39,7 @@ def getjsonInfo():
pass
pass
else
:
else
:
log
.
info
(
'-----没有数据了-----'
)
log
.
info
(
'-----没有数据了-----'
)
return
False
return
False
,
False
#从数据库中获取信息 一条
#从数据库中获取信息 一条
select_sql
=
f
"select * from wx_link where state=0 and id= '{linkid}'"
select_sql
=
f
"select * from wx_link where state=0 and id= '{linkid}'"
cursor_
.
execute
(
select_sql
)
cursor_
.
execute
(
select_sql
)
...
@@ -49,7 +49,7 @@ def getjsonInfo():
...
@@ -49,7 +49,7 @@ def getjsonInfo():
pass
pass
else
:
else
:
log
.
info
(
'-----没有数据了-----'
)
log
.
info
(
'-----没有数据了-----'
)
return
False
return
False
,
False
dict_json
=
{
dict_json
=
{
'sid'
:
row
[
1
],
'sid'
:
row
[
1
],
'site_uri'
:
row
[
2
],
'site_uri'
:
row
[
2
],
...
@@ -63,7 +63,7 @@ def getjsonInfo():
...
@@ -63,7 +63,7 @@ def getjsonInfo():
update_sql
=
f
"update wx_link set state=1 where link='{row[7]}' and info_source_code='{row[4]}' "
update_sql
=
f
"update wx_link set state=1 where link='{row[7]}' and info_source_code='{row[4]}' "
cursor_
.
execute
(
update_sql
)
cursor_
.
execute
(
update_sql
)
cnx_
.
commit
()
cnx_
.
commit
()
return
dict_json
return
dict_json
,
linkid
@retry
(
tries
=
3
,
delay
=
2
)
@retry
(
tries
=
3
,
delay
=
2
)
def
getrequest
(
url_news
):
def
getrequest
(
url_news
):
...
@@ -72,7 +72,7 @@ def getrequest(url_news):
...
@@ -72,7 +72,7 @@ def getrequest(url_news):
res_news
=
requests
.
get
(
url_news
,
proxies
=
ip
,
timeout
=
20
)
res_news
=
requests
.
get
(
url_news
,
proxies
=
ip
,
timeout
=
20
)
if
res_news
.
status_code
!=
200
:
if
res_news
.
status_code
!=
200
:
raise
raise
return
res_news
def
get_info
(
dict_json
):
def
get_info
(
dict_json
):
# list_all_info = []
# list_all_info = []
...
@@ -102,6 +102,7 @@ def get_info(dict_json):
...
@@ -102,6 +102,7 @@ def get_info(dict_json):
# 修改请求方法,retry 3次
# 修改请求方法,retry 3次
try
:
try
:
res_news
=
getrequest
(
url_news
)
res_news
=
getrequest
(
url_news
)
# print(res_news)
except
:
except
:
try
:
try
:
res_news
=
requests
.
get
(
url_news
,
timeout
=
20
)
res_news
=
requests
.
get
(
url_news
,
timeout
=
20
)
...
@@ -118,7 +119,7 @@ def get_info(dict_json):
...
@@ -118,7 +119,7 @@ def get_info(dict_json):
news_html
=
rm_style_attr
(
news_html
)
news_html
=
rm_style_attr
(
news_html
)
del
news_html
[
'id'
]
del
news_html
[
'id'
]
del
news_html
[
'class'
]
del
news_html
[
'class'
]
except
:
except
Exception
as
e
:
log
.
error
(
f
'{url_news}-----{info_source_code}'
)
log
.
error
(
f
'{url_news}-----{info_source_code}'
)
return
False
return
False
try
:
try
:
...
@@ -255,9 +256,15 @@ def rm_style_attr(soup):
...
@@ -255,9 +256,15 @@ def rm_style_attr(soup):
except
:
except
:
continue
continue
# first_div = soup.select('div[id="js_content"]')
# # 设置style属性
# first_div['style'] = 'width: 814px ; margin: 0 auto;'
first_div
=
soup
.
select
(
'div[id="js_content"]'
)
first_div
=
soup
.
select
(
'div[id="js_content"]'
)
# 设置style属性
if
first_div
:
first_div
[
'style'
]
=
'width: 814px ; margin: 0 auto;'
first_div
=
first_div
[
0
]
# 获取第一个匹配的元素
first_div
[
'style'
]
=
'width: 814px ; margin: 0 auto;'
# 设置style属性
return
soup
return
soup
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
@@ -267,12 +274,15 @@ if __name__=="__main__":
...
@@ -267,12 +274,15 @@ if __name__=="__main__":
#一次拿取一篇文章
#一次拿取一篇文章
# todo: 从redis拿数据 更新mysql状态
# todo: 从redis拿数据 更新mysql状态
dict_json
=
getjsonInfo
()
dict_json
,
linkid
=
getjsonInfo
()
if
dict_json
:
try
:
if
get_info
(
dict_json
):
if
dict_json
:
num_caiji
=
num_caiji
+
1
if
get_info
(
dict_json
):
log
.
info
(
f
'-----已采集{num_caiji}篇文章---来源{dict_json["site_name"]}----'
)
num_caiji
=
num_caiji
+
1
else
:
log
.
info
(
f
'-----已采集{num_caiji}篇文章---来源{dict_json["site_name"]}----'
)
continue
else
:
continue
except
:
rePutIntoR
(
linkid
)
baseCore
.
close
()
baseCore
.
close
()
\ No newline at end of file
comData/weixin_solo/test.py
浏览文件 @
54d3083a
# -*- coding: utf-8 -*-
import
re
import
time
import
time
import
pandas
as
pd
import
pandas
as
pd
...
@@ -40,6 +43,7 @@ import pandas as pd
...
@@ -40,6 +43,7 @@ import pandas as pd
# else:
# else:
# pass
# pass
import
redis
import
redis
from
bs4
import
BeautifulSoup
def
check_url
():
def
check_url
():
...
@@ -61,11 +65,51 @@ def test(dic_user_count):
...
@@ -61,11 +65,51 @@ def test(dic_user_count):
return
dic_user_count
return
dic_user_count
def
test1
():
def
test1
():
dic_user_count
=
{
"A"
:
0
}
dic_user_count
=
{}
for
i
in
range
(
3
):
for
i
in
range
(
3
):
dic_user_count
=
test
(
dic_user_count
)
dic_user_count
=
test
(
dic_user_count
)
print
(
dic_user_count
)
print
(
dic_user_count
)
def
rm_style_attr
(
soup
):
# 查找所有含有style属性的标签
style_tags
=
soup
.
find_all
(
style
=
True
)
# 遍历每个style标签
for
style_tag
in
style_tags
:
try
:
# 使用正则表达式替换
styleattr
=
style_tag
[
'style'
]
styleattr
=
re
.
sub
(
r'visibility:(?s).{1,}?;'
,
''
,
styleattr
)
styleattr
=
re
.
sub
(
r'font-family:(?s).{1,}?;'
,
''
,
styleattr
)
styleattr
=
re
.
sub
(
r'color:(?s).{1,}?;'
,
''
,
styleattr
)
styleattr
=
re
.
sub
(
r'font-size:(?s).{1,}?;'
,
''
,
styleattr
)
style_tag
[
'style'
]
=
styleattr
except
:
continue
first_div
=
soup
.
select
(
'div[id="js_content"]'
)
if
first_div
:
first_div
=
first_div
[
0
]
# 获取第一个匹配的元素
first_div
[
'style'
]
=
'width: 814px ; margin: 0 auto;'
# 设置style属性
# # 设置style属性
# first_div['style'] = 'width: 814px ; margin: 0 auto;'
# print(first_div)
return
soup
def
aaa
(
dic_user_count
):
for
i
in
range
(
3
):
if
"A"
in
dic_user_count
:
dic_user_count
[
"A"
]
+=
1
else
:
dic_user_count
[
"A"
]
=
1
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
test1
()
# html = """<div class="rich_media_content js_underline_content autoTypeSetting24psection" id="js_content" style="width: 814px ; margin: 0 auto;"><section style="white-space: normal;text-indent: 2em;margin-top: 16px;margin-bottom: 16px;line-height: 1.75em;"><span style="outline: 0px;color: rgb(51, 51, 51);letter-spacing: 0.544px;text-indent: 2em;">3月31日,中国十七冶建安分公司承建的黄石EOD项目大泉路生态廊道工程马鞍山路高架桥顺利通车。<br/></span></section><section style="white-space: normal;margin-top: 16px;margin-bottom: 16px;line-height: 1.75em;text-indent: 0em;"><img class="rich_pages wxw-img" data-backh="312" data-backw="578" data-imgfileid="100024978" data-ratio="0.539568345323741" data-src="https://mmbiz.qpic.cn/mmbiz_png/ibEX3YMicPu80BnkeVDWh45k2S5saQQqDfvKJfBiblZO6OjbyWrYSJ3c2fib2eQReXQSLMONFicRD0fT2OdFY4da0og/640?wx_fmt=png&from=appmsg" data-type="png" data-w="695" style="border: none;width: 100%;height: auto;" title=""/></section><section style="white-space: normal;text-indent: 2em;margin-top: 16px;margin-bottom: 16px;line-height: 1.75em;"><span style="outline: 0px;color: rgb(51, 51, 51);letter-spacing: 0.544px;text-indent: 2em;">马鞍山路高架桥全长315米,设计速度为60km/h,采用双向四车道。该桥横跨马鞍山路,建成后将缓解马鞍山路和大泉路交叉口的交通压力,对减轻市民们的出行焦虑有着重要意义,同时将进一步加强大冶、阳新与黄石城区的联系,为黄石更好融入“武鄂黄黄”都市圈奠定基础。</span></section><section style="white-space: normal;text-indent: 2em;margin-top: 16px;margin-bottom: 16px;line-height: 1.75em;"><span style="outline: 0px;color: rgb(51, 51, 51);letter-spacing: 0.544px;text-indent: 2em;">为全力推动马鞍山路高架桥建成,项目管理团队锚定节点目标不动摇,倒排工期,通过精心组织,优化施工方案,主动出击,联合行政主管部门多次召开推进会,克服施工过程中的管线迁改难题,为马鞍山路高架桥顺利通车提供坚实组织保障。</span></section><section style="white-space: normal;margin-top: 16px;margin-bottom: 16px;line-height: 1.75em;text-indent: 0em;"><img class="rich_pages wxw-img" data-backh="374" data-backw="578" data-imgfileid="100024979" data-ratio="0.6474820143884892" data-src="https://mmbiz.qpic.cn/mmbiz_png/ibEX3YMicPu80BnkeVDWh45k2S5saQQqDfZBWgkUaG20wMwKDxZ4SjjFwZg0bvicuAcLPUia6ECktibM9KPj5NIrmicg/640?wx_fmt=png&from=appmsg" data-type="png" data-w="695" style="border: none;width: 100%;height: auto;" title=""/></section><section style="white-space: normal;text-indent: 2em;margin-top: 16px;margin-bottom: 16px;line-height: 1.75em;"><span style="outline: 0px;color: rgb(51, 51, 51);letter-spacing: 0.544px;text-indent: 2em;">项目部将牢牢把握“十七冶发展要义”,深入践行“24小时工作法”,全力打造精品工程、安全工程、民生工程,为黄石打造武汉都市圈贡献十七冶力量。</span></section><section style="white-space: normal;text-indent: 2em;margin-top: 16px;margin-bottom: 16px;line-height: 1.75em;"><span style="outline: 0px;color: rgb(51, 51, 51);letter-spacing: 0.544px;text-indent: 2em;"></span></section><section style="white-space: normal;text-indent: 2em;margin-top: 16px;margin-bottom: 16px;line-height: 1.75em;"><span style="outline: 0px;color: rgb(51, 51, 51);letter-spacing: 0.544px;text-indent: 2em;"></span></section><section style="margin-top: 16px;margin-bottom: 16px;white-space: normal;text-indent: 2em;line-height: 1.75em;"><section data-id="103115" data-role="splitline" data-tools="135编辑器" style="outline: 0px;font-variant-ligatures: normal;letter-spacing: 0.544px;orphans: 2;widows: 2;background-color: rgb(255, 255, 255);color: rgb(34, 34, 34);font-size: 16px;font-family: 微软雅黑;text-indent: 32px;overflow-wrap: break-word !important;"><section style="margin: 16px auto;outline: 0px;text-align: center;text-indent: 2em;line-height: 1.75em;overflow-wrap: break-word !important;"><section style="outline: 0px;display: flex;justify-content: center;align-items: center;overflow-wrap: break-word !important;"><section style="margin-left: 2px;outline: 0px;height: 6px;width: 6px;border-radius: 100%;border-width: 1px;border-style: solid;border-color: rgb(35, 35, 35);overflow: hidden;overflow-wrap: break-word !important;"><br data-filtered="filtered" style="outline: 0px;overflow-wrap: break-word !important;"/></section><section style="margin-left: 2px;outline: 0px;height: 6px;width: 6px;border-radius: 100%;border-width: 1px;border-style: solid;border-color: rgb(35, 35, 35);overflow: hidden;overflow-wrap: break-word !important;"> <br data-filtered="filtered" style="outline: 0px;overflow-wrap: break-word !important;"/></section><section style="margin-left: 2px;outline: 0px;height: 6px;width: 6px;border-radius: 100%;border-width: 1px;border-style: solid;border-color: rgb(35, 35, 35);overflow: hidden;overflow-wrap: break-word !important;"><br style="outline: 0px;overflow-wrap: break-word !important;"/></section><section style="margin-left: 4px;outline: 0px;height: 1px;background-color: rgb(35, 35, 35);flex: 1 1 0%;overflow: hidden;overflow-wrap: break-word !important;"><br data-filtered="filtered" style="outline: 0px;overflow-wrap: break-word !important;"/></section></section></section></section><section style="margin: 16px 8px;outline: 0px;font-variant-ligatures: normal;letter-spacing: 0.544px;orphans: 2;text-indent: 0em;widows: 2;background-color: rgb(255, 255, 255);color: rgb(34, 34, 34);font-family: 微软雅黑;text-align: left;line-height: 1.75em;overflow-wrap: break-word !important;"><span style="outline: 0px;font-size: 14px;color: rgb(136, 136, 136);overflow-wrap: break-word !important;">来源:中国企业网</span><img class="rich_pages wxw-img __bg_gif" data-backh="192" data-backw="320" data-fileid="100002618" data-galleryid="" data-imgfileid="100024527" data-ratio="0.6" data-src="https://mmbiz.qpic.cn/mmbiz_gif/ibEX3YMicPu82TZf4RScpazSD7OuViaH4cEUx9rCibPavn2cJXiagJrmVuVTpOJgibBV8368H2RYxxYp3Fhn1a7SU20Q/640?wx_fmt=gif" data-type="gif" data-w="1000" style="letter-spacing: 0.544px;text-indent: 0em;outline: 0px;color: rgb(136, 136, 136);font-size: 15px;text-align: center;width: 562px;visibility: visible !important;height: auto !important;" width="320px"/></section></section><p style="display: none;"><mp-style-type data-value="3"></mp-style-type></p></div>"""
# soup = BeautifulSoup(html, 'html.parser')
# soup = rm_style_attr(soup)
# print(soup)
dic_user_count
=
{}
aaa
(
dic_user_count
)
if
dic_user_count
:
for
key
,
value
in
dic_user_count
.
items
():
print
(
f
"====账号{key},采集公众号个数{value}"
)
comData/weixin_solo/wxList.py
浏览文件 @
54d3083a
...
@@ -10,8 +10,8 @@ import urllib3
...
@@ -10,8 +10,8 @@ import urllib3
from
pymysql.converters
import
escape_string
from
pymysql.converters
import
escape_string
import
sys
import
sys
sys
.
path
.
append
(
'D:
\\
zzsn_spider
\\
base'
)
# sys.path.append('D:\\zzsn
\\base')
import
BaseCore
from
base
import
BaseCore
urllib3
.
disable_warnings
(
urllib3
.
exceptions
.
InsecureRequestWarning
)
urllib3
.
disable_warnings
(
urllib3
.
exceptions
.
InsecureRequestWarning
)
baseCore
=
BaseCore
.
BaseCore
()
baseCore
=
BaseCore
.
BaseCore
()
...
@@ -184,7 +184,7 @@ def getToken():
...
@@ -184,7 +184,7 @@ def getToken():
return
row
[
0
]
return
row
[
0
]
# 获取列表数据
# 获取列表数据
每一页换一次公众号
def
getPageData
(
dic_url
,
page
,
dic_user_count
):
def
getPageData
(
dic_url
,
page
,
dic_user_count
):
url_
=
dic_url
[
'url_'
]
url_
=
dic_url
[
'url_'
]
origin
=
dic_url
[
'name'
]
origin
=
dic_url
[
'name'
]
...
@@ -206,7 +206,6 @@ def getPageData(dic_url, page, dic_user_count):
...
@@ -206,7 +206,6 @@ def getPageData(dic_url, page, dic_user_count):
user_name
=
tokenAndCookie
[
2
]
user_name
=
tokenAndCookie
[
2
]
token
=
tokenAndCookie
[
0
]
token
=
tokenAndCookie
[
0
]
log
.
info
(
f
"获取token到----{token}----{user_name}"
)
log
.
info
(
f
"获取token到----{token}----{user_name}"
)
dic_user_count
[
user_name
]
=
0
cookies
=
json
.
loads
(
tokenAndCookie
[
1
])
cookies
=
json
.
loads
(
tokenAndCookie
[
1
])
# s.cookies.update(cookies)
# s.cookies.update(cookies)
...
@@ -223,18 +222,22 @@ def getPageData(dic_url, page, dic_user_count):
...
@@ -223,18 +222,22 @@ def getPageData(dic_url, page, dic_user_count):
str_t
=
json
.
dumps
(
json_search
)
str_t
=
json
.
dumps
(
json_search
)
ret
=
json_search
[
'base_resp'
][
'ret'
]
ret
=
json_search
[
'base_resp'
][
'ret'
]
if
ret
==
0
:
if
ret
==
0
:
dic_user_count
[
user_name
]
+=
1
# 使用一次就记录一次
pass
if
user_name
in
dic_user_count
:
dic_user_count
[
user_name
]
+=
1
else
:
dic_user_count
[
user_name
]
=
1
elif
ret
==
200013
:
elif
ret
==
200013
:
log
.
info
(
f
'======{origin}-----{biz}----{user_name}账号被封======='
)
log
.
info
(
f
'======{origin}-----{biz}----{user_name}账号被封======='
)
# 封号修改token
# 封号修改token
updateTokeen
(
token
,
1
)
updateTokeen
(
token
,
1
)
return
getPageData
(
dic_url
,
page
,
dic_user_count
)
return
getPageData
(
dic_url
,
page
,
dic_user_count
)
,
dic_user_count
elif
ret
==
200002
:
elif
ret
==
200002
:
log
.
info
(
f
'======{origin}-----{biz}----该公众号号biz错误,请检查======='
)
log
.
info
(
f
'======{origin}-----{biz}----该公众号号biz错误,请检查======='
)
error
=
[
origin
,
url_
,
info_source_code
,
str_t
,
'无效biz参数'
]
error
=
[
origin
,
url_
,
info_source_code
,
str_t
,
'无效biz参数'
]
insertBadSql
(
error
)
insertBadSql
(
error
)
return
True
return
True
,
dic_user_count
elif
ret
==
200003
:
elif
ret
==
200003
:
log
.
info
(
f
'======{origin}-----{biz}----{user_name}账号无效session======='
)
log
.
info
(
f
'======{origin}-----{biz}----{user_name}账号无效session======='
)
# session失效修改token
# session失效修改token
...
@@ -255,7 +258,7 @@ def getPageData(dic_url, page, dic_user_count):
...
@@ -255,7 +258,7 @@ def getPageData(dic_url, page, dic_user_count):
error
=
[
origin
,
url_
,
info_source_code
,
str_t
,
'其他错误'
]
error
=
[
origin
,
url_
,
info_source_code
,
str_t
,
'其他错误'
]
insertBadSql
(
error
)
insertBadSql
(
error
)
updateTokeen
(
token
,
2
)
updateTokeen
(
token
,
2
)
return
True
return
True
,
dic_user_count
# 修改token使用时间
# 修改token使用时间
updateTokeen
(
token
,
3
)
updateTokeen
(
token
,
3
)
# 保存数据到数据库
# 保存数据到数据库
...
@@ -263,7 +266,7 @@ def getPageData(dic_url, page, dic_user_count):
...
@@ -263,7 +266,7 @@ def getPageData(dic_url, page, dic_user_count):
# 获取微信公众号数据
# 获取微信公众号数据
def
getWxList
(
infoSourceCode
):
def
getWxList
(
infoSourceCode
,
dic_user_count
):
dic_url
=
getSourceInfo
(
infoSourceCode
)
dic_url
=
getSourceInfo
(
infoSourceCode
)
log
.
info
(
f
"======{infoSourceCode}----开始采集======="
)
log
.
info
(
f
"======{infoSourceCode}----开始采集======="
)
...
@@ -276,7 +279,7 @@ def getWxList(infoSourceCode):
...
@@ -276,7 +279,7 @@ def getWxList(infoSourceCode):
return
return
origin
=
dic_url
[
'name'
]
origin
=
dic_url
[
'name'
]
biz
=
dic_url
[
'biz'
]
biz
=
dic_url
[
'biz'
]
dic_user_count
=
{}
for
page
in
range
(
1
,
6
):
for
page
in
range
(
1
,
6
):
retFlag
,
dic_user_count
=
getPageData
(
dic_url
,
page
,
dic_user_count
)
retFlag
,
dic_user_count
=
getPageData
(
dic_url
,
page
,
dic_user_count
)
time
.
sleep
(
random
.
randint
(
60
,
181
))
time
.
sleep
(
random
.
randint
(
60
,
181
))
...
@@ -286,8 +289,6 @@ def getWxList(infoSourceCode):
...
@@ -286,8 +289,6 @@ def getWxList(infoSourceCode):
else
:
else
:
# 没有结束
# 没有结束
pass
pass
for
key
,
value
in
dic_user_count
.
items
():
log
.
info
(
f
"====账号{key},采集公众号个数{value}"
)
log
.
info
(
f
"======{origin}-----{biz}----结束采集======="
)
log
.
info
(
f
"======{origin}-----{biz}----结束采集======="
)
...
@@ -310,17 +311,26 @@ def getnumber_redis():
...
@@ -310,17 +311,26 @@ def getnumber_redis():
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
# getFromSql()
numbers
=
getnumber_redis
()
numbers
=
getnumber_redis
()
log
.
info
(
"当前批次采集公众号个数{}"
.
format
(
numbers
))
log
.
info
(
"当前批次采集公众号个数{}"
.
format
(
numbers
))
time
.
sleep
(
3
)
time
.
sleep
(
3
)
dic_user_count
=
{}
# dic_user_count = {
# 'name': '',
# 'use_count': 0,
# 'gzh_count': 0
# }
while
True
:
while
True
:
start
=
time
.
time
()
start
=
time
.
time
()
log
.
info
(
f
"开始时间{baseCore.getNowTime(1)}"
)
log
.
info
(
f
"开始时间{baseCore.getNowTime(1)}"
)
infoSourceCode
=
baseCore
.
redicPullData
(
'WeiXinGZH:infoSourceCode'
)
infoSourceCode
=
baseCore
.
redicPullData
(
'WeiXinGZH:infoSourceCode'
)
# infoSourceCode = 'IN-20220609-57899'
if
infoSourceCode
==
'None'
or
infoSourceCode
==
None
:
if
infoSourceCode
==
'None'
or
infoSourceCode
==
None
:
log
.
info
(
"redis已经没有数据了,重新放置数据"
)
log
.
info
(
"redis已经没有数据了,重新放置数据"
)
log
.
info
(
f
"采集完一轮公众号耗时{baseCore.getTimeCost(start, time.time())}"
)
log
.
info
(
f
"采集完一轮公众号耗时{baseCore.getTimeCost(start, time.time())}"
)
# getFromSql()
# time.sleep(60)
# time.sleep(60)
# numbers = getnumber_redis()
# numbers = getnumber_redis()
# log.info("当前批次采集公众号个数{}".format(numbers))
# log.info("当前批次采集公众号个数{}".format(numbers))
...
@@ -328,7 +338,10 @@ if __name__ == "__main__":
...
@@ -328,7 +338,10 @@ if __name__ == "__main__":
# infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
# infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
continue
continue
getWxList
(
infoSourceCode
)
getWxList
(
infoSourceCode
,
dic_user_count
)
if
dic_user_count
:
for
key
,
value
in
dic_user_count
.
items
():
log
.
info
(
f
"====账号{key},使用次数{value}"
)
# break
# infoSourceCode = 'IN-20220917-0159'
# infoSourceCode = 'IN-20220917-0159'
# getWxList(infoSourceCode)
# getWxList(infoSourceCode)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论