Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
Z
zzsn_spider
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
1
合并请求
1
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
丁双波
zzsn_spider
Commits
be47bb58
提交
be47bb58
authored
12月 19, 2023
作者:
薛凌堃
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
雪球网新三板企业
上级
71a0e996
显示空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
1249 行增加
和
0 行删除
+1249
-0
BaseCore.py
comData/NQenterprise/BaseCore.py
+717
-0
finance_xq.py
comData/NQenterprise/finance_xq.py
+467
-0
getQccId.py
comData/NQenterprise/getQccId.py
+65
-0
没有找到文件。
comData/NQenterprise/BaseCore.py
0 → 100644
浏览文件 @
be47bb58
# 核心工具包
import
os
import
random
import
socket
import
sys
import
time
import
fitz
import
logbook
import
logbook.more
import
pandas
as
pd
import
requests
import
zhconv
import
pymysql
import
redis
from
selenium
import
webdriver
from
selenium.webdriver.chrome.service
import
Service
from
openpyxl
import
Workbook
import
langid
#创建连接池
import
pymysql
from
pymysql
import
connections
from
DBUtils.PooledDB
import
PooledDB
# import sys
# sys.path.append('D://zzsn_spider//base//fdfs_client')
from
fdfs_client.client
import
get_tracker_conf
,
Fdfs_client
tracker_conf
=
get_tracker_conf
(
'D:
\\
kkwork
\\
zzsn_spider
\\
base
\\
client.conf'
)
client
=
Fdfs_client
(
tracker_conf
)
# 注意 程序退出前 调用BaseCore.close() 关闭相关资源
class
BaseCore
:
# 序列号
__seq
=
0
# 代理池 数据库连接
# __cnx_proxy =None
# __cursor_proxy = None
cnx
=
None
cursor
=
None
cnx_
=
None
cursor_
=
None
r
=
None
# agent 池
__USER_AGENT_LIST
=
[
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.29 Safari/525.13'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/531.4 (KHTML, like Gecko) Chrome/3.0.194.0 Safari/531.4'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.50 Safari/525.19'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.7 Safari/532.0'
,
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; Lunascape 5.0 alpha2)'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.7 Safari/532.2'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; ru-RU) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.10 Safari/532.0'
,
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Maxthon;'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.1 (KHTML, like Gecko) Chrome/2.0.169.0 Safari/530.1'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; ja-JP; rv:1.7) Gecko/20040614 Firefox/0.9'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.810.0 Safari/535.1'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.0 Safari/532.0'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.6 (KHTML, like Gecko) Chrome/7.0.500.0 Safari/534.6'
,
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; TencentTraveler)'
,
'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.4 (KHTML, like Gecko) Chrome/6.0.481.0 Safari/534.4'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.370.0 Safari/533.4'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.4.154.31 Safari/525.19'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.1.17) Gecko/20110123 (like Firefox/3.x) SeaMonkey/2.0.12'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB) AppleWebKit/534.1 (KHTML, like Gecko) Chrome/6.0.428.0 Safari/534.1'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; de-DE) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/7.0.540.0 Safari/534.10'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; de-DE) Chrome/4.0.223.3 Safari/532.2'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/12.0.702.0 Safari/534.24'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.42 Safari/525.19'
,
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.3 (KHTML, like Gecko) Chrome/4.0.227.0 Safari/532.3'
,
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.8 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.8'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.460.0 Safari/534.3'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.463.0 Safari/534.3'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528.9 (KHTML, like Gecko) Chrome/2.0.157.0 Safari/528.9'
,
'Mozilla/5.0 (Windows NT 5.2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.794.0 Safari/535.1'
,
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.694.0 Safari/534.24'
,
'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5'
,
'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:15.0) Gecko/20120427 Firefox/15.0a1'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6'
,
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon; .NET CLR 1.1.4322)'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.223.4 Safari/532.2'
,
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11'
,
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.682.0 Safari/534.21'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/530.0 (KHTML, like Gecko) Chrome/2.0.182.0 Safari/531.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.9 (KHTML, like Gecko) Chrome/7.0.531.0 Safari/534.9'
,
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; WOW64; Trident/6.0)'
,
'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.811.0 Safari/535.1'
,
'ozilla/5.0 (Windows; U; Windows NT 5.0; de-DE; rv:1.7.5) Gecko/20041108 Firefox/1.0'
,
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'
,
'Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.127 Safari/533.4'
,
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E) QQBrowser/6.9.11079.201'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10'
,
'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2'
,
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; zh-cn) Opera 8.50'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/7.0.0 Safari/700.13'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.4 Safari/532.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.6 Safari/532.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.1 Safari/532.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20041107 Firefox/0.9.2 StumbleUpon/1.994'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11 (.NET CLR 3.5.30729)'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.7.5) Gecko/20041110 Firefox/1.0'
,
'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36'
,
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; en) Opera 8.0'
,
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1 QQBrowser/6.9.11079.201'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0b4pre) Gecko/20100815 Minefield/4.0b4pre'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421 Minefield/3.0.2pre'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.6 Safari/530.5'
,
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.0.3705)'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.21 Safari/532.0'
,
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.792.0 Safari/535.1'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.1 (KHTML, like Gecko) Chrome/2.0.168.0 Safari/530.1'
,
'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20040913 Firefox/0.10'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.8 (KHTML, like Gecko) Chrome/2.0.177.1 Safari/530.8'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/533.17.8 (KHTML, like Gecko) Version/5.0.1 Safari/533.17.8'
,
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.40 Safari/530.5'
,
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.24 Safari/532.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528.10 (KHTML, like Gecko) Chrome/2.0.157.2 Safari/528.10'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.223.2 Safari/532.2'
,
'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7'
,
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; T312461)'
,
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.461.0 Safari/534.3'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.0; rv:1.7.3) Gecko/20041001 Firefox/0.10.1'
,
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)'
,
'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2; de-DE) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.2 Safari/532.0'
,
'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/16.0 Firefox/16.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/531.3 (KHTML, like Gecko) Chrome/3.0.193.2 Safari/531.3'
,
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1'
,
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'
,
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.864.0 Safari/535.2'
,
'Mozilla/5.0 (Windows NT 5.2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.6 Safari/532.0'
,
'Mozilla/5.0 (Windows NT 5.1; rv:2.1.1) Gecko/20110415 Firefox/4.0.2pre Fennec/4.0.1'
,
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.801.0 Safari/535.1'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.212.0 Safari/532.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.0 Safari/532.5'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7'
,
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.697.0 Safari/534.24'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/7.0.548.0 Safari/534.10'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.652.0 Safari/534.17'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10 ChromePlus/1.5.2.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.0 Safari/532.1'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.7 Safari/532.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.342.2 Safari/533.2'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.4 Safari/532.1'
,
'Mozilla/5.0 (Windows NT 6.0; rv:2.1.1) Gecko/20110415 Firefox/4.0.2pre Fennec/4.0.1'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.0 Safari/525.19'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; sv-SE; rv:1.7.5) Gecko/20041108 Firefox/1.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.462.0 Safari/534.3'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; de-DE; rv:1.7.5) Gecko/20041122 Firefox/1.0'
,
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; uZardWeb/1.0; Server_JP)'
,
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; HCI0449; .NET CLR 1.0.3705)'
,
'Mozilla/4.0 (compatible; MSIE 5.0; Windows 98; DigExt); Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1);'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.23 Safari/530.5'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.208.0 Safari/532.0'
,
'Mozilla/5.0 (Windows NT 6.0; rv:14.0) Gecko/20100101 Firefox/14.0.1'
,
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36'
,
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.7 (KHTML, like Gecko) Chrome/2.0.176.0 Safari/530.7'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.678.0 Safari/534.21'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.21 Safari/532.0'
,
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)'
,
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; InfoPath.1'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.55 Safari/525.19'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0a1) Gecko/20110623 Firefox/7.0a1 Fennec/7.0a1'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.724.100 Safari/534.30'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.33 Safari/534.3 SE 2.X MetaSr 1.0'
,
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; WOW64; SV1; uZardWeb/1.0; Server_HK)'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1'
,
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)'
,
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)'
,
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3'
,
'Mozilla/5.0 (Windows NT 6.0) yi; AppleWebKit/345667.12221 (KHTML, like Gecko) Chrome/23.0.1271.26 Safari/453667.1221'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/531.2 (KHTML, like Gecko) Chrome/3.0.191.3 Safari/531.2'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.39 Safari/530.5'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.1 Safari/532.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.38 Safari/532.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.27 Safari/532.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8b) Gecko/20050118 Firefox/1.0+'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; ja-JP; rv:1.7) Gecko/20040707 Firefox/0.9.2'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.0 Safari/532.0'
,
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.4 (KHTML, like Gecko) Chrome/2.0.171.0 Safari/530.4'
,
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648)'
,
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; nl-NL; rv:1.7.5) Gecko/20041202 Firefox/1.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.204.0 Safari/532.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.6 Safari/532.2'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/528.8 (KHTML, like Gecko) Chrome/1.0.156.0 Safari/528.8'
,
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/6.0)'
,
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 1.0.3705; .NET CLR 2.0.50727; .NET CLR 1.1.4322)'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.43 Safari/534.7'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.15 Safari/534.13'
,
'Mozilla/5.0 (ipad Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.6 (KHTML, like Gecko) Chrome/7.0.498.0 Safari/534.6'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.43 Safari/530.5'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.208.0 Safari/532.0'
,
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.19 (KHTML, like Gecko) Chrome/11.0.661.0 Safari/534.19'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-CA) AppleWebKit/534.13 (KHTML like Gecko) Chrome/9.0.597.98 Safari/534.13'
,
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.201.1 Safari/532.0'
,
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.201.1 Safari/532.0'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/530.6 (KHTML, like Gecko) Chrome/2.0.174.0 Safari/530.6'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.3.154.6 Safari/525.19'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.599.0 Safari/534.13'
,
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.8 (KHTML, like Gecko) Chrome/7.0.521.0 Safari/534.8'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1b2pre) Gecko/20081015 Fennec/1.0a1'
,
'Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5'
]
#Android agent池
__USER_PHONE_AGENT_LIST
=
[
'Mozilla/5.0 (Linux; Android 7.1.1; OPPO R9sk) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36'
]
def
__init__
(
self
):
# self.__cnx_proxy = pymysql.connect(host='114.115.159.144', user='caiji', password='zzsn9988', db='clb_project',
# charset='utf8mb4')
# self.__cursor_proxy = self.__cnx_proxy.cursor()
self
.
cnx
=
pymysql
.
connect
(
host
=
'114.115.159.144'
,
user
=
'caiji'
,
password
=
'zzsn9988'
,
db
=
'caiji'
,
charset
=
'utf8mb4'
)
self
.
cursor
=
self
.
cnx
.
cursor
()
#11数据库
self
.
cnx_
=
pymysql
.
connect
(
host
=
'114.116.44.11'
,
user
=
'caiji'
,
password
=
'f7s0&7qqtK'
,
db
=
'clb_project'
,
charset
=
'utf8mb4'
)
self
.
cursor_
=
self
.
cnx_
.
cursor
()
# 连接到Redis
self
.
r
=
redis
.
Redis
(
host
=
"114.115.236.206"
,
port
=
6379
,
password
=
'clbzzsn'
,
db
=
6
)
self
.
pool_caiji
=
PooledDB
(
creator
=
pymysql
,
maxconnections
=
5
,
mincached
=
2
,
maxcached
=
5
,
blocking
=
True
,
host
=
'114.115.159.144'
,
port
=
3306
,
user
=
'caiji'
,
password
=
'zzsn9988'
,
database
=
'caiji'
,
charset
=
'utf8mb4'
)
self
.
pool_11
=
PooledDB
(
creator
=
pymysql
,
maxconnections
=
5
,
mincached
=
2
,
maxcached
=
5
,
blocking
=
True
,
host
=
'114.116.44.11'
,
port
=
3306
,
user
=
'caiji'
,
password
=
'f7s0&7qqtK'
,
database
=
'clb_project'
,
charset
=
'utf8mb4'
)
def
close
(
self
):
try
:
self
.
cursor
.
close
()
self
.
cnx
.
close
()
except
:
pass
# 计算耗时
def
getTimeCost
(
self
,
start
,
end
):
seconds
=
int
(
end
-
start
)
m
,
s
=
divmod
(
seconds
,
60
)
h
,
m
=
divmod
(
m
,
60
)
if
(
h
>
0
):
return
"
%
d小时
%
d分钟
%
d秒"
%
(
h
,
m
,
s
)
elif
(
m
>
0
):
return
"
%
d分钟
%
d秒"
%
(
m
,
s
)
elif
(
seconds
>
0
):
return
"
%
d秒"
%
(
s
)
else
:
ms
=
int
((
end
-
start
)
*
1000
)
return
"
%
d毫秒"
%
(
ms
)
# 当前时间格式化
# 1 : 2001-01-01 12:00:00 %Y-%m-%d %H:%M:%S
# 2 : 010101120000 %y%m%d%H%M%S
# 时间戳 3:1690179526555 精确到秒
def
getNowTime
(
self
,
type
):
now_time
=
""
if
type
==
1
:
now_time
=
time
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
if
type
==
2
:
now_time
=
time
.
strftime
(
"
%
y
%
m
%
d
%
H
%
M
%
S"
)
if
type
==
3
:
now_time
=
int
(
time
.
time
()
*
1000
)
return
now_time
# 获取流水号
def
getNextSeq
(
self
):
self
.
__seq
+=
1
if
self
.
__seq
>
1000
:
self
.
__seq
=
0
return
self
.
getNowTime
(
2
)
+
str
(
self
.
__seq
)
.
zfill
(
3
)
# 获取信用代码
def
getNextXydm
(
self
):
self
.
__seq
+=
1
if
self
.
__seq
>
1000
:
self
.
__seq
=
0
return
"ZZSN"
+
self
.
getNowTime
(
2
)
+
str
(
self
.
__seq
)
.
zfill
(
3
)
# 日志格式
def
logFormate
(
self
,
record
,
handler
):
formate
=
"[{date}] [{level}] [{filename}] [{func_name}] [{lineno}] {msg}"
.
format
(
date
=
record
.
time
,
# 日志时间
level
=
record
.
level_name
,
# 日志等级
filename
=
os
.
path
.
split
(
record
.
filename
)[
-
1
],
# 文件名
func_name
=
record
.
func_name
,
# 函数名
lineno
=
record
.
lineno
,
# 行号
msg
=
record
.
message
# 日志内容
)
return
formate
# 获取logger
def
getLogger
(
self
,
fileLogFlag
=
True
,
stdOutFlag
=
True
):
dirname
,
filename
=
os
.
path
.
split
(
os
.
path
.
abspath
(
sys
.
argv
[
0
]))
dirname
=
os
.
path
.
join
(
dirname
,
"logs"
)
filename
=
filename
.
replace
(
".py"
,
""
)
+
".log"
if
not
os
.
path
.
exists
(
dirname
):
os
.
mkdir
(
dirname
)
logbook
.
set_datetime_format
(
'local'
)
logger
=
logbook
.
Logger
(
filename
)
logger
.
handlers
=
[]
if
fileLogFlag
:
# 日志输出到文件
logFile
=
logbook
.
TimedRotatingFileHandler
(
os
.
path
.
join
(
dirname
,
filename
),
date_format
=
'
%
Y-
%
m-
%
d'
,
bubble
=
True
,
encoding
=
'utf-8'
)
logFile
.
formatter
=
self
.
logFormate
logger
.
handlers
.
append
(
logFile
)
if
stdOutFlag
:
# 日志打印到屏幕
logStd
=
logbook
.
more
.
ColorizedStderrHandler
(
bubble
=
True
)
logStd
.
formatter
=
self
.
logFormate
logger
.
handlers
.
append
(
logStd
)
return
logger
# 获取随机的userAgent
def
getRandomUserAgent
(
self
):
return
random
.
choice
(
self
.
__USER_AGENT_LIST
)
# 获取代理
def
get_proxy
(
self
):
sql
=
"select proxy from clb_proxy"
self
.
cursor
.
execute
(
sql
)
proxy_lists
=
self
.
cursor
.
fetchall
()
ip_list
=
[]
for
proxy_
in
proxy_lists
:
ip_list
.
append
(
str
(
proxy_
)
.
replace
(
"('"
,
''
)
.
replace
(
"',)"
,
''
))
proxy_list
=
[]
for
str_ip
in
ip_list
:
str_ip_list
=
str_ip
.
split
(
'-'
)
proxyMeta
=
"http://
%(host)
s:
%(port)
s"
%
{
"host"
:
str_ip_list
[
0
],
"port"
:
str_ip_list
[
1
],
}
proxy
=
{
"HTTP"
:
proxyMeta
,
"HTTPS"
:
proxyMeta
}
proxy_list
.
append
(
proxy
)
return
proxy_list
[
random
.
randint
(
0
,
3
)]
#字符串截取
def
getSubStr
(
self
,
str
,
beginStr
,
endStr
):
if
beginStr
==
''
:
pass
else
:
begin
=
str
.
rfind
(
beginStr
)
if
begin
==-
1
:
begin
=
0
str
=
str
[
begin
:]
if
endStr
==
''
:
pass
else
:
end
=
str
.
rfind
(
endStr
)
if
end
==-
1
:
pass
else
:
str
=
str
[
0
:
end
+
1
]
return
str
# 繁体字转简体字
def
hant_2_hans
(
self
,
hant_str
:
str
):
'''
Function: 将 hant_str 由繁体转化为简体
'''
return
zhconv
.
convert
(
hant_str
,
'zh-hans'
)
# 判断字符串里是否含数字
def
str_have_num
(
self
,
str_num
):
panduan
=
False
for
str_1
in
str_num
:
ppp
=
str_1
.
isdigit
()
if
ppp
:
panduan
=
ppp
return
panduan
# # 从Redis的List中获取并移除一个元素
# def redicPullData(self,type,key):
# #1 表示国内 2 表示国外
# if type == 1:
# gn_item = self.r.lpop(key)
# return gn_item.decode() if gn_item else None
# if type == 2:
# gw_item = self.r.lpop(key)
# return gw_item.decode() if gw_item else None
# 从Redis的List中获取并移除一个元素
def
redicPullData
(
self
,
key
):
item
=
self
.
r
.
lpop
(
key
)
return
item
.
decode
()
if
item
else
None
# 获得脚本进程PID
def
getPID
(
self
):
PID
=
os
.
getpid
()
return
PID
# 获取本机IP
def
getIP
(
self
):
IP
=
socket
.
gethostbyname
(
socket
.
gethostname
())
return
IP
def
mkPath
(
self
,
path
):
folder
=
os
.
path
.
exists
(
path
)
if
not
folder
:
# 判断是否存在文件夹如果不存在则创建为文件夹
os
.
makedirs
(
path
)
# makedirs 创建文件时如果路径不存在会创建这个路径
else
:
pass
# 生成google模拟浏览器 必须传入值为googledriver位置信息
# headless用于决定是否为无头浏览器,初始默认为无头浏览器
# 正常浏览器可用于开始对页面解析使用或一些网站无头时无法正常采集
# 无头浏览器用于后续对信息采集时不会有浏览器一直弹出,
def
buildDriver
(
self
,
path
,
headless
=
True
):
service
=
Service
(
path
)
chrome_options
=
webdriver
.
ChromeOptions
()
if
headless
:
chrome_options
.
add_argument
(
'--headless'
)
chrome_options
.
add_argument
(
'--disable-gpu'
)
chrome_options
.
add_experimental_option
(
"excludeSwitches"
,
[
"enable-automation"
])
chrome_options
.
add_experimental_option
(
'useAutomationExtension'
,
False
)
chrome_options
.
add_argument
(
'lang=zh-CN,zh,zh-TW,en-US,en'
)
chrome_options
.
add_argument
(
'user-agent='
+
self
.
getRandomUserAgent
())
# 'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36')
driver
=
webdriver
.
Chrome
(
options
=
chrome_options
,
service
=
service
)
# with open(r'F:\zzsn\zzsn_spider\base\stealth.min.js') as f:
# js = f.read()
#
# driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
# "source": js
# })
return
driver
# 根据社会信用代码获取企业信息
def
getInfomation
(
self
,
social_code
):
data
=
[]
try
:
sql
=
f
"SELECT * FROM sys_base_enterprise_ipo WHERE social_credit_code = '{social_code}'and securities_type='新三板' and listed='1' "
# self.cursor.execute(sql)
# data = self.cursor.fetchone()
conn
=
self
.
pool_11
.
connection
()
cursor
=
conn
.
cursor
()
cursor
.
execute
(
sql
)
data
=
cursor
.
fetchone
()
data
=
list
(
data
)
cursor
.
close
()
conn
.
close
()
except
:
log
=
self
.
getLogger
()
log
.
info
(
'=========数据库操作失败========'
)
return
data
# 更新企业采集次数
def
updateRun
(
self
,
social_code
,
runType
,
count
):
try
:
sql_update
=
f
"UPDATE EnterpriseInfo SET {runType} = {count} WHERE SocialCode = '{social_code}'"
# self.cursor.execute(sql_update)
# self.cnx.commit()
conn
=
self
.
pool_caiji
.
connection
()
cursor
=
conn
.
cursor
()
cursor
.
execute
(
sql_update
)
conn
.
commit
()
cursor
.
close
()
conn
.
close
()
except
:
log
=
self
.
getLogger
()
log
.
info
(
'======更新数据库失败======'
)
# 保存日志入库
def
recordLog
(
self
,
xydm
,
taskType
,
state
,
takeTime
,
url
,
e
):
try
:
createTime
=
self
.
getNowTime
(
1
)
ip
=
self
.
getIP
()
pid
=
self
.
getPID
()
sql
=
"INSERT INTO LogTable(SocialCode,TaskType,state,TakeTime,url,CreateTime,ProcessIp,PID,Exception) VALUES(
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s)"
values
=
[
xydm
,
taskType
,
state
,
takeTime
,
url
,
createTime
,
ip
,
pid
,
e
]
# try:
# self.cursor.execute(sql, values)
# except Exception as e:
# print(e)
# self.cnx.commit()
cnn
=
self
.
pool_caiji
.
connection
()
cursor
=
cnn
.
cursor
()
cursor
.
execute
(
sql
,
values
)
cnn
.
commit
()
cursor
.
close
()
cnn
.
close
()
except
:
log
=
self
.
getLogger
()
log
.
info
(
'======保存日志失败====='
)
#获取企查查token
def
GetToken
(
self
):
#获取企查查token
query
=
"select token from QCC_token "
# token = '67ec7402166df1da84ae83c4b95cefc0' # 需要隔两个小时左右抓包修改
self
.
cursor
.
execute
(
query
)
token
=
self
.
cursor
.
fetchone
()[
0
]
return
token
#获取天眼查token
def
GetTYCToken
(
self
):
query
=
'select token from TYC_token'
self
.
cursor
.
execute
(
query
)
token
=
self
.
cursor
.
fetchone
()[
0
]
return
token
#检测语言
def
detect_language
(
self
,
text
):
# 使用langid.py判断文本的语言
result
=
langid
.
classify
(
text
)
if
result
==
''
:
return
'cn'
if
result
[
0
]
==
''
:
return
'cn'
return
result
[
0
]
#追加接入excel
def
writerToExcel
(
self
,
detailList
,
filename
):
# filename='baidu搜索.xlsx'
# 读取已存在的xlsx文件
existing_data
=
pd
.
read_excel
(
filename
,
engine
=
'openpyxl'
,
dtype
=
str
)
# 创建新的数据
new_data
=
pd
.
DataFrame
(
data
=
detailList
)
# 将新数据添加到现有数据的末尾
combined_data
=
existing_data
.
append
(
new_data
,
ignore_index
=
True
)
# 将结果写入到xlsx文件
combined_data
.
to_excel
(
filename
,
index
=
False
)
# return combined_data
#对失败或者断掉的企业 重新放入redis
def
rePutIntoR
(
self
,
key
,
item
):
self
.
r
.
rpush
(
key
,
item
)
#增加计数器的值并返回增加后的值
def
incrSet
(
self
,
key
):
# 增加计数器的值并返回增加后的值
new_value
=
self
.
r
.
incr
(
key
)
print
(
"增加后的值:"
,
new_value
)
return
new_value
#获取key剩余的过期时间
def
getttl
(
self
,
key
):
# 获取key的剩余过期时间
ttl
=
self
.
r
.
ttl
(
key
)
print
(
"剩余过期时间:"
,
ttl
)
# 判断key是否已过期
if
ttl
<
0
:
# key已过期,将key的值重置为0
self
.
r
.
set
(
key
,
0
)
self
.
r
.
expire
(
key
,
3600
)
time
.
sleep
(
2
)
#上传至文件服务器,并解析pdf的内容和页数
def
upLoadToServe
(
self
,
pdf_url
,
type_id
,
social_code
):
headers
=
{}
retData
=
{
'state'
:
False
,
'type_id'
:
type_id
,
'item_id'
:
social_code
,
'group_name'
:
'group1'
,
'path'
:
''
,
'full_path'
:
''
,
'category'
:
'pdf'
,
'file_size'
:
''
,
'status'
:
1
,
'create_by'
:
'XueLingKun'
,
'create_time'
:
''
,
'page_size'
:
''
,
'content'
:
''
}
headers
[
'User-Agent'
]
=
self
.
getRandomUserAgent
()
for
i
in
range
(
0
,
3
):
try
:
resp_content
=
requests
.
get
(
pdf_url
,
headers
=
headers
,
verify
=
False
,
timeout
=
20
)
.
content
break
except
:
time
.
sleep
(
3
)
continue
page_size
=
0
for
i
in
range
(
0
,
3
):
try
:
result
=
client
.
upload_by_buffer
(
resp_content
,
file_ext_name
=
'pdf'
)
with
fitz
.
open
(
stream
=
resp_content
,
filetype
=
'pdf'
)
as
doc
:
page_size
=
doc
.
page_count
for
page
in
doc
.
pages
():
retData
[
'content'
]
+=
page
.
get_text
()
break
except
:
time
.
sleep
(
3
)
continue
if
page_size
<
1
:
# pdf解析失败
print
(
f
'======pdf解析失败====='
)
return
retData
else
:
time_now
=
time
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
,
time
.
localtime
())
retData
[
'state'
]
=
True
retData
[
'path'
]
=
bytes
.
decode
(
result
[
'Remote file_id'
])
.
replace
(
'group1'
,
''
)
retData
[
'full_path'
]
=
bytes
.
decode
(
result
[
'Remote file_id'
])
retData
[
'file_size'
]
=
result
[
'Uploaded size'
]
retData
[
'create_time'
]
=
time_now
retData
[
'page_size'
]
=
page_size
return
retData
def
secrchATT
(
self
,
item_id
,
year
,
type_id
):
sel_sql
=
'''select id from clb_sys_attachment where item_id =
%
s and year =
%
s and type_id=
%
s '''
self
.
cursor_
.
execute
(
sel_sql
,
(
item_id
,
year
,
type_id
))
selects
=
self
.
cursor_
.
fetchone
()
return
selects
#插入到att表 返回附件id
def
tableUpdate
(
self
,
retData
,
com_name
,
year
,
pdf_name
,
num
):
item_id
=
retData
[
'item_id'
]
type_id
=
retData
[
'type_id'
]
group_name
=
retData
[
'group_name'
]
path
=
retData
[
'path'
]
full_path
=
retData
[
'full_path'
]
category
=
retData
[
'category'
]
file_size
=
retData
[
'file_size'
]
status
=
retData
[
'status'
]
create_by
=
retData
[
'create_by'
]
page_size
=
retData
[
'page_size'
]
create_time
=
retData
[
'create_time'
]
order_by
=
num
selects
=
self
.
secrchATT
(
item_id
,
year
,
type_id
)
# sel_sql = '''select id,item_id from clb_sys_attachment where item_id = %s and year = %s and type_id=%s '''
# self.cursor.execute(sel_sql, (item_id, year,type_id))
# selects = self.cursor.fetchone()
if
selects
:
self
.
getLogger
()
.
info
(
f
'com_name:{com_name}已存在'
)
id
=
selects
[
0
]
return
id
else
:
Upsql
=
'''insert into clb_sys_attachment(year,name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time,page_size) values(
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s)'''
values
=
(
year
,
pdf_name
,
type_id
,
item_id
,
group_name
,
path
,
full_path
,
category
,
file_size
,
order_by
,
status
,
create_by
,
create_time
,
page_size
)
self
.
cursor_
.
execute
(
Upsql
,
values
)
# 插入
self
.
cnx_
.
commit
()
# 提交
self
.
getLogger
()
.
info
(
"更新完成:{}"
.
format
(
Upsql
))
selects
=
self
.
secrchATT
(
item_id
,
year
,
type_id
)
id
=
selects
[
0
]
return
id
# 更新企业的CIK
def
updateCIK
(
self
,
social_code
,
cik
):
try
:
sql
=
f
"UPDATE EnterpriseInfo SET CIK = '{cik}' WHERE SocialCode = '{social_code}'"
cnn
=
self
.
pool_caiji
.
connection
()
cursor
=
cnn
.
cursor
()
cursor
.
execute
(
sql
)
cnn
.
commit
()
cursor
.
close
()
cnn
.
close
()
except
:
log
=
self
.
getLogger
()
log
.
info
(
'======保存企业CIK失败====='
)
comData/NQenterprise/finance_xq.py
0 → 100644
浏览文件 @
be47bb58
"""
雪球网财务数据 根据接口
"""
import
json
import
time
import
redis
import
requests
from
bs4
import
BeautifulSoup
import
datetime
from
selenium
import
webdriver
from
BaseCore
import
BaseCore
baseCore
=
BaseCore
()
log
=
baseCore
.
getLogger
()
def
getFormatedate
(
timestamp
):
date
=
datetime
.
datetime
.
fromtimestamp
(
timestamp
)
formatted_date
=
date
.
strftime
(
'
%
Y-
%
m-
%
d'
)
return
formatted_date
def
check_code
(
com_code
):
r
=
redis
.
Redis
(
host
=
"114.115.236.206"
,
port
=
6379
,
password
=
'clbzzsn'
,
db
=
3
)
res
=
r
.
exists
(
'com_xqcwsj_code::'
+
com_code
)
if
res
:
return
False
else
:
return
True
def
check_date
(
com_code
,
info_date
):
r
=
redis
.
Redis
(
host
=
"114.115.236.206"
,
port
=
6379
,
password
=
'clbzzsn'
,
db
=
3
)
res
=
r
.
sismember
(
'com_xqcwsj_code::'
+
com_code
,
info_date
)
# 注意是 保存set的方式
if
res
:
return
True
else
:
return
False
# 将采集后的股票代码对应的报告期保存进redis
def
add_date
(
com_code
,
report_date
):
r
=
redis
.
Redis
(
host
=
"114.115.236.206"
,
port
=
6379
,
password
=
'clbzzsn'
,
db
=
3
)
res
=
r
.
sadd
(
'com_xqcwsj_code::'
+
com_code
,
report_date
)
def
getrequests
(
url
):
req
=
requests
.
get
(
url
=
url
,
headers
=
headers
)
data
=
req
.
json
()
a_infoData
=
data
[
'data'
][
'list'
]
return
a_infoData
def
getdriver
(
url_name
):
# 使用模拟浏览器打开
browser
.
get
(
url_name
)
time
.
sleep
(
3
)
page_source
=
browser
.
page_source
soup
=
BeautifulSoup
(
page_source
,
'html.parser'
)
return
soup
def
getdetail
(
reportInfodata
,
name_map
,
listinfo
,
url_name
):
# # 报告期
# report_date = reportInfodata['report_date']
# report_date = getFormatedate(int(report_date / 1000))
#模拟浏览器请求
soup
=
getdriver
(
url_name
)
time
.
sleep
(
2
)
# 利润表
table
=
soup
.
find
(
'div'
,
class_
=
'tab-table-responsive'
)
list_tr
=
table
.
find_all
(
'tr'
)
for
tr
in
list_tr
[
1
:]:
info_name
=
tr
.
find
(
'td'
,
colspan
=
'2'
)
.
text
# 营业总收入
try
:
info_enname
=
name_map
[
info_name
]
info_data
=
reportInfodata
[
info_enname
][
0
]
if
info_data
is
None
:
info_data
=
'--'
except
:
info_enname
=
'--'
info_data
=
'--'
dic_info
=
{
"name"
:
info_name
,
'enName'
:
info_enname
,
"value"
:
info_data
,
"unit"
:
"元"
}
listinfo
.
append
(
dic_info
)
return
listinfo
def
getinfo
(
info_date
,
com_code
,
social_code
):
for
nnn
in
range
(
0
,
3
):
try
:
panduan
=
check_date
(
com_code
,
info_date
)
except
:
time
.
sleep
(
1
)
if
panduan
:
log
.
info
(
f
'{info_date}----已采集过'
)
return
else
:
pass
for
nnn
in
range
(
0
,
3
):
try
:
ynFirst
=
check_code
(
com_code
)
break
except
:
time
.
sleep
(
1
)
#'https://stock.xueqiu.com/v5/stock/finance/cn/balance.json?symbol=NQ873286&type=all&is_detail=true&count=5×tamp=1694508688637'
url_lrb
=
f
'https://stock.xueqiu.com/v5/stock/finance/cn/income.json?symbol={com_code}&type=all&is_detail=true&count=5×tamp=1694414063178'
url_zcfzb
=
f
'https://stock.xueqiu.com/v5/stock/finance/cn/balance.json?symbol={com_code}&type=all&is_detail=true&count=5×tamp=1694508688637'
url_xjllb
=
f
'https://stock.xueqiu.com/v5/stock/finance/cn/cash_flow.json?symbol={com_code}&type=all&is_detail=true&count=5×tamp=1694512695956'
lrb_name
=
f
'https://xueqiu.com/snowman/S/{com_code}/detail#/GSLRB'
zcfzb_name
=
f
'https://xueqiu.com/snowman/S/{com_code}/detail#/ZCFZB'
xjllb_name
=
f
'https://xueqiu.com/snowman/S/{com_code}/detail#/XJLLB'
a_infoData
=
getrequests
(
url_lrb
)
b_infoData
=
getrequests
(
url_zcfzb
)
c_infoData
=
getrequests
(
url_xjllb
)
listLrb
=
[]
listZcfzb
=
[]
listXjllb
=
[]
for
i
in
range
(
len
(
a_infoData
)):
report_date_a
=
a_infoData
[
i
][
'report_date'
]
report_date_a
=
getFormatedate
(
int
(
report_date_a
/
1000
))
if
info_date
==
report_date_a
:
log
.
info
(
f
'======正在采集利润表:{com_code}---{info_date}======='
)
# 利润表
reportLrbdata
=
a_infoData
[
i
]
listLrb
=
getdetail
(
reportLrbdata
,
lrb_name_map
,
listLrb
,
lrb_name
)
log
.
info
(
f
'利润表数据:{len(listLrb)}个'
)
break
else
:
continue
for
j
in
range
(
len
(
b_infoData
)):
report_date_b
=
b_infoData
[
j
][
'report_date'
]
report_date_b
=
getFormatedate
(
int
(
report_date_b
/
1000
))
if
info_date
==
report_date_b
:
log
.
info
(
f
'======正在采集资产负债表:{com_code}---{info_date}======='
)
reportZcfzbdata
=
b_infoData
[
j
]
listZcfzb
=
getdetail
(
reportZcfzbdata
,
zcfzb_name_map
,
listZcfzb
,
zcfzb_name
)
log
.
info
(
f
'资产负债表数据:{len(listZcfzb)}个'
)
break
else
:
continue
for
k
in
range
(
len
(
c_infoData
)):
report_date_c
=
c_infoData
[
k
][
'report_date'
]
report_date_c
=
getFormatedate
(
int
(
report_date_c
/
1000
))
if
info_date
==
report_date_c
:
log
.
info
(
f
'======正在采集现金流量表:{com_code}---{info_date}======='
)
reportXjllbdata
=
c_infoData
[
k
]
listXjllb
=
getdetail
(
reportXjllbdata
,
xjllb_name_map
,
listXjllb
,
xjllb_name
)
log
.
info
(
f
'现金流量表数据:{len(listXjllb)}个'
)
break
else
:
continue
# reportLrbname = a_infoData[i]['report_name']
# reporZCFZbname = b_infoData[i]['report_name']
# reportXJLLBname = c_infoData[i]['report_name']
#时间戳转化为日期
# report_date = getFormatedate(int(report_date / 1000))
# 检查报告期是否已经存在
# log.info(f'======正在采集:{com_code}---{info_date}=======')
# #利润表
# reportLrbdata = a_infoData[i]
# list_Lrb = getdetail(reportLrbdata,lrb_name_map,listLrb,lrb_name)
# log.info(f'利润表数据:{len(list_Lrb)}个')
#
# # print(list_Lrb)
# #资产负债表
# try:
# reportZcfzbdata = b_infoData[j]
# list_Zcfzb = getdetail(reportZcfzbdata,zcfzb_name_map,listZcfzb,zcfzb_name)
#
# except:
# list_Zcfzb = []
# #现金流量表
# reportXjllbdata = c_infoData[k]
# list_Xjllb = getdetail(reportXjllbdata,xjllb_name_map,listXjllb,xjllb_name)
dic_info
=
{
"socialCreditCode"
:
social_code
,
"securitiesCode"
:
com_code
[
2
:],
"date"
:
info_date
,
"debt"
:
listZcfzb
,
"profit"
:
listLrb
,
"cash"
:
listXjllb
,
"ynFirst"
:
ynFirst
,
}
# print(dic_info)
#一个报告期结束
log
.
info
(
f
'----{com_code}--{info_date}----结束'
)
if
dic_info
:
# 调凯歌接口存储数据
data
=
json
.
dumps
(
dic_info
)
# print(data)
url_baocun
=
'http://114.115.236.206:8088/sync/finance/xq'
for
nnn
in
range
(
0
,
3
):
try
:
res_baocun
=
requests
.
post
(
url_baocun
,
data
=
data
)
break
except
:
time
.
sleep
(
1
)
log
.
info
(
f
'----{com_code}--{info_date}--------数据发送接口完毕------------'
)
for
nnn
in
range
(
0
,
3
):
try
:
add_date
(
com_code
,
info_date
)
break
except
:
time
.
sleep
(
1
)
else
:
log
.
error
(
f
'---{com_code}--{info_date}--'
)
if
__name__
==
'__main__'
:
info_date_list
=
[]
list_date
=
[]
# list_date = ['2023-06-30']
# list_month = ['-12-31', '-06-30']
# for year in range(2022, 2020, -1):
# for month in list_month:
# date = str(year) + month
# list_date.append(date)
current_date_
=
datetime
.
datetime
.
now
()
year
=
int
(
current_date_
.
strftime
(
'
%
Y'
))
# list_date = ['2023-09-30','2023-06-30','2023-03-31']
list_month
=
[
'-12-31'
,
'-09-30'
,
'-06-30'
,
'-03-31'
]
for
year
in
range
(
year
,
2020
,
-
1
):
for
month
in
list_month
:
date
=
str
(
year
)
+
month
# todo:判断拼接的报告期是否大于当前日期,如果大于当前日期,怎说明还没到这个时间,跳过
current_date
=
current_date_
.
strftime
(
'
%
Y-
%
m-
%
d'
)
if
current_date
<
date
:
continue
list_date
.
append
(
date
)
opt
=
webdriver
.
ChromeOptions
()
opt
.
add_argument
(
'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
)
opt
.
add_argument
(
"--ignore-certificate-errors"
)
opt
.
add_argument
(
"--ignore-ssl-errors"
)
opt
.
add_experimental_option
(
"excludeSwitches"
,
[
"enable-automation"
])
opt
.
add_experimental_option
(
'excludeSwitches'
,
[
'enable-logging'
])
opt
.
add_experimental_option
(
'useAutomationExtension'
,
False
)
opt
.
binary_location
=
r'D:/Google/Chrome/Application/chrome.exe'
chromedriver
=
r'D:/cmd100/chromedriver.exe'
browser
=
webdriver
.
Chrome
(
chrome_options
=
opt
,
executable_path
=
chromedriver
)
headers
=
{
'authority'
:
'stock.xueqiu.com'
,
'method'
:
'GET'
,
'path'
:
'/v5/stock/finance/cn/income.json?symbol=NQ873286&type=all&is_detail=true&count=5×tamp=1694414063178'
,
'scheme'
:
'https'
,
'Accept'
:
'application/json, text/plain, */*'
,
'Accept-Encoding'
:
'gzip, deflate, br'
,
'Accept-Language'
:
'zh-CN,zh;q=0.9'
,
'Cache-Control'
:
'no-cache'
,
'Cookie'
:
'cookiesu=181695108116155; device_id=46ec772dfdf04c691b8e58a2b65c3c1a; s=cb12c0lym8; xq_a_token=a97fa15a5bb947c53ed434a6c0364dd03f36962c; xqat=a97fa15a5bb947c53ed434a6c0364dd03f36962c; xq_r_token=457987e3f3df9d22b53ad50b975087ff84ee9a79; xq_id_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJ1aWQiOi0xLCJpc3MiOiJ1YyIsImV4cCI6MTcwNDU4NzkwNCwiY3RtIjoxNzAyOTY4MTA0NTAyLCJjaWQiOiJkOWQwbjRBWnVwIn0.MWTvu9PB8MH3bfoemYe4oSuRNy9TkVk4E3OgNPiaSPTIuIESgx5guP7RTonL1GdqCt-H16mpNeQMAj52797u_n4EcIoCryZpmUPxjUakjOAJRpUR5H0ioQGDJ4HeJiy8on3O44F3-IXWd0ukaQadr2yyqSoaBWbKN8an4YF-_2a9Cx5sicFBzwGfL3BsdmICy4wJ9hSUjVpwyH2BIeJnDeghC4C9vvkfQ0ewN5Dfh3hN6Z-9onoqcQq6zRNdcj5HNxlHBPPmiPTrm3PQcybMsn-JYymWIf1cLlNw3of3OYmLyYA9ImccO8KZdU7nIKpXJGQqz3hUHY2flQ5y9gydGA; u=181695108116155; Hm_lvt_1db88642e346389874251b5a1eded6e3=1701745994,1702968150; is_overseas=0; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1702968249'
,
'Origin'
:
'https://xueqiu.com'
,
'Pragma'
:
'no-cache'
,
'Referer'
:
'https://xueqiu.com/snowman/S/NQ873286/detail'
,
'Sec-Ch-Ua'
:
'"Not/A)Brand";v="99", "Google Chrome";v="115", "Chromium";v="115"'
,
'Sec-Ch-Ua-Mobile'
:
'?0'
,
'Sec-Ch-Ua-Platform'
:
'"Windows"'
,
'Sec-Fetch-Dest'
:
'empty'
,
'Sec-Fetch-Mode'
:
'cors'
,
'Sec-Fetch-Site'
:
'same-site'
,
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'
}
#中英文名称映射
lrb_name_map
=
{
'营业总收入'
:
'total_revenue'
,
'其中:营业收入'
:
'revenue'
,
'营业总成本'
:
'operating_costs'
,
'其中:营业成本'
:
'operating_cost'
,
'营业税金及附加'
:
'operating_taxes_and_surcharge'
,
'销售费用'
:
'sales_fee'
,
'管理费用'
:
'manage_fee'
,
'研发费用'
:
'rad_cost'
,
'财务费用'
:
'financing_expenses'
,
'其中:利息费用'
:
'finance_cost_interest_fee'
,
'利息收入'
:
'finance_cost_interest_income'
,
'资产减值损失'
:
'asset_impairment_loss'
,
'信用减值损失'
:
'credit_impairment_loss'
,
'加:公允价值变动收益'
:
''
,
'投资收益'
:
'invest_income'
,
'其中:对联营企业和合营企业的投资收益'
:
''
,
'资产处置收益'
:
'asset_disposal_income'
,
'其他收益'
:
'other_income'
,
'营业利润'
:
'op'
,
'加:营业外收入'
:
'non_operating_income'
,
'其中:非流动资产处置利得'
:
''
,
'减:营业外支出'
:
'non_operating_payout'
,
'其中:非流动资产处置损失'
:
''
,
'利润总额'
:
'profit_total_amt'
,
'减:所得税费用'
:
'income_tax_expenses'
,
'净利润差额(合计平衡项目)'
:
''
,
'净利润'
:
'net_profit'
,
'(一)持续经营净利润'
:
'continous_operating_np'
,
'归属于母公司股东的净利润'
:
'net_profit_atsopc'
,
'少数股东损益'
:
'minority_gal'
,
'扣除非经常性损益后的净利润'
:
'net_profit_after_nrgal_atsolc'
,
'基本每股收益'
:
'basic_eps'
,
'稀释每股收益'
:
'dlt_earnings_per_share'
,
'其他综合收益'
:
'othr_compre_income'
,
'归属母公司所有者的其他综合收益'
:
''
,
'综合收益总额'
:
'total_compre_income'
,
'归属于母公司股东的综合收益总额'
:
'net_profit_atsopc'
,
'归属于少数股东的综合收益总额'
:
'total_compre_income_atms'
}
zcfzb_name_map
=
{
'货币资金'
:
'currency_funds'
,
'交易性金融资产'
:
''
,
'应收票据及应收账款'
:
'ar_and_br'
,
'其中:应收票据'
:
'bills_receivable'
,
'应收账款'
:
'account_receivable'
,
'预付款项'
:
'pre_payment'
,
'应收利息'
:
''
,
'应收股利'
:
''
,
'其他应收款'
:
'othr_receivables'
,
'存货'
:
'inventory'
,
'合同资产'
:
''
,
'划分为持有待售的资产'
:
''
,
'一年内到期的非流动资产'
:
'nca_due_within_one_year'
,
'其他流动资产'
:
'intangible_assets'
,
'流动资产合计'
:
'total_current_assets'
,
'可供出售金融资产'
:
''
,
'持有至到期投资'
:
''
,
'长期应收款'
:
''
,
'长期股权投资'
:
''
,
'其他权益工具投资'
:
''
,
'其他非流动金融资产'
:
''
,
'投资性房地产'
:
''
,
'固定资产合计'
:
'fixed_asset_sum'
,
'其中:固定资产'
:
'fixed_asset'
,
'固定资产清理'
:
''
,
'在建工程合计'
:
'construction_in_process_sum'
,
'其中:在建工程'
:
'construction_in_process'
,
'工程物资'
:
''
,
'生产性生物资产'
:
''
,
'油气资产'
:
''
,
'无形资产'
:
'intangible_assets'
,
'开发支出'
:
'dev_expenditure'
,
'商誉'
:
''
,
'长期待摊费用'
:
'lt_deferred_expense'
,
'递延所得税资产'
:
'dt_assets'
,
'其他非流动资产'
:
'othr_noncurrent_assets'
,
'非流动资产合计'
:
'total_noncurrent_assets'
,
'资产合计'
:
'total_assets'
,
'短期借款'
:
'st_loan'
,
'交易性金融负债'
:
''
,
'衍生金融负债'
:
''
,
'应付票据及应付账款'
:
'accounts_payable'
,
'应付票据'
:
''
,
'应付账款'
:
''
,
'预收款项'
:
''
,
'合同负债'
:
'contract_liabilities'
,
'应付职工薪酬'
:
'payroll_payable'
,
'应交税费'
:
'tax_payable'
,
'应付利息'
:
''
,
'应付股利'
:
''
,
'其他应付款'
:
'othr_payables'
,
'划分为持有待售的负债'
:
''
,
'一年内到期的非流动负债'
:
'noncurrent_liab_due_in1y'
,
'其他流动负债'
:
'othr_current_liab'
,
'流动负债合计'
:
'total_current_liab'
,
'长期借款'
:
'lt_loan'
,
'应付债券'
:
''
,
'长期应付款合计'
:
'lt_payable_sum'
,
'长期应付款'
:
'lt_payable'
,
'专项应付款'
:
''
,
'预计负债'
:
''
,
'递延所得税负债'
:
'dt_liab'
,
'递延收益-非流动负债'
:
''
,
'其他非流动负债'
:
''
,
'非流动负债合计'
:
'total_noncurrent_liab'
,
'负债合计'
:
'total_liab'
,
'实收资本(或股本)'
:
'shares'
,
'其他权益工具'
:
''
,
'永续债'
:
''
,
'资本公积'
:
'capital_reserve'
,
'减:库存股'
:
''
,
'其他综合收益'
:
''
,
'专项储备'
:
'special_reserve'
,
'盈余公积'
:
'earned_surplus'
,
'未分配利润'
:
'undstrbtd_profit'
,
'一般风险准备'
:
''
,
'外币报表折算差额'
:
''
,
'归属于母公司股东权益合计'
:
'total_quity_atsopc'
,
'少数股东权益'
:
'minority_equity'
,
'股东权益合计'
:
'total_holders_equity'
,
'负债和股东权益总计'
:
'total_assets'
}
xjllb_name_map
=
{
'销售商品、提供劳务收到的现金'
:
'cash_received_of_sales_service'
,
'收到的税费返还'
:
'refund_of_tax_and_levies'
,
'收到其他与经营活动有关的现金'
:
'cash_received_of_othr_oa'
,
'经营活动现金流入小计'
:
'sub_total_of_ci_from_oa'
,
'购买商品、接受劳务支付的现金'
:
'goods_buy_and_service_cash_pay'
,
'支付给职工以及为职工支付的现金'
:
'cash_paid_to_employee_etc'
,
'支付的各项税费'
:
'payments_of_all_taxes'
,
'支付其他与经营活动有关的现金'
:
'othrcash_paid_relating_to_oa'
,
'经营活动现金流出小计'
:
'sub_total_of_cos_from_oa'
,
'经营活动产生的现金流量净额'
:
'ncf_from_oa'
,
'收回投资收到的现金'
:
'cash_received_of_dspsl_invest'
,
'取得投资收益收到的现金'
:
"othrcash_paid_relating_to_fa"
,
'处置固定资产、无形资产和其他长期资产收回的现金净额'
:
'net_cash_of_disposal_assets'
,
'处置子公司及其他营业单位收到的现金净额'
:
''
,
'收到其他与投资活动有关的现金'
:
''
,
'投资活动现金流入小计'
:
'sub_total_of_ci_from_ia'
,
'购建固定资产、无形资产和其他长期资产支付的现金'
:
'cash_paid_for_assets'
,
'投资支付的现金'
:
'invest_paid_cash'
,
'取得子公司及其他营业单位支付的现金净额'
:
''
,
'支付其他与投资活动有关的现金'
:
''
,
'投资活动现金流出小计'
:
'sub_total_of_cos_from_ia'
,
'投资活动产生的现金流量净额'
:
'ncf_from_ia'
,
'筹资活动产生的现金流量'
:
''
,
'吸收投资收到的现金'
:
'cash_received_of_absorb_invest'
,
'其中:子公司吸收少数股东投资收到的现金'
:
''
,
'取得借款收到的现金'
:
'cash_received_of_borrowing'
,
'发行债券收到的现金'
:
''
,
'收到其他与筹资活动有关的现金'
:
'cash_received_of_othr_fa'
,
'筹资活动现金流入小计'
:
'sub_total_of_ci_from_fa'
,
'偿还债务支付的现金'
:
'cash_pay_for_debt'
,
'分配股利、利润或偿付利息支付的现金'
:
'cash_paid_of_distribution'
,
'其中:子公司支付给少数股东的股利'
:
''
,
'支付其他与筹资活动有关的现金'
:
'othrcash_paid_relating_to_fa'
,
'筹资活动现金流出小计'
:
'sub_total_of_cos_from_fa'
,
'筹资活动产生的现金流量净额'
:
'ncf_from_fa'
,
'汇率变动对现金及现金等价物的影响'
:
''
,
'现金及现金等价物净增加额'
:
'net_increase_in_cce'
,
'加:期初现金及现金等价物余额'
:
'final_balance_of_cce'
,
'期末现金及现金等价物余额'
:
'final_balance_of_cce'
}
while
True
:
# social_code = baseCore.redicPullData('NQEnterprise:nq_finance')
social_code
=
'91520115697528509T'
if
social_code
is
None
:
log
.
info
(
'已没有数据----------等待'
)
time
.
sleep
(
20
)
continue
log
.
info
(
f
'========正在采集{social_code}==========='
)
data
=
baseCore
.
getInfomation
(
social_code
)
# social_code = data[1]
com_code
=
data
[
3
]
start
=
time
.
time
()
com_code
=
'NQ'
+
com_code
for
info_date
in
list_date
:
dic_info
=
getinfo
(
info_date
,
com_code
,
social_code
)
comData/NQenterprise/getQccId.py
0 → 100644
浏览文件 @
be47bb58
# -*- coding: utf-8 -*-
import
time
from
urllib.parse
import
quote
import
requests
import
urllib3
from
base.BaseCore
import
BaseCore
baseCore
=
BaseCore
()
log
=
baseCore
.
getLogger
()
headers
=
{
'Host'
:
'xcx.qcc.com'
,
'Connection'
:
'keep-alive'
,
'Qcc-Platform'
:
'mp-weixin'
,
'Qcc-Timestamp'
:
''
,
'Qcc-Version'
:
'1.0.0'
,
'User-Agent'
:
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36 MicroMessenger/7.0.9.501 NetType/WIFI MiniProgramEnv/Windows WindowsWechat'
,
'content-type'
:
'application/json'
,
'Referer'
:
'https://servicewechat.com/wx395200814fcd7599/166/page-frame.html'
,
'Accept-Encoding'
:
'gzip, deflate, br,'
}
# 通过企业名称或信用代码获取企查查id
def
find_id_by_name
(
start
,
token
,
name
):
urllib3
.
disable_warnings
()
qcc_key
=
name
t
=
str
(
int
(
time
.
time
())
*
1000
)
headers
[
'Qcc-Timestamp'
]
=
t
url
=
f
"https://xcx.qcc.com/mp-weixin/forwardApp/v3/base/advancedSearch?token={token}&t={t}&pageIndex=1&needGroup=yes&insuredCntStart=&insuredCntEnd=&startDateBegin=&startDateEnd=®istCapiBegin=®istCapiEnd=&countyCode=&province=&sortField=&isSortAsc=&searchKey={quote(qcc_key)}&searchIndex=default&industryV3="
for
lll
in
range
(
1
,
6
):
try
:
resp_dict
=
requests
.
get
(
url
=
url
,
headers
=
headers
,
verify
=
False
)
.
json
()
break
except
:
print
(
'重试'
)
time
.
sleep
(
5
)
continue
time
.
sleep
(
2
)
#{'status': 40101, 'message': '无效的sessionToken!'} {'status': 401, 'message': '您的账号访问超频,请升级小程序版本'}
if
resp_dict
[
'status'
]
==
40101
:
KeyNo
=
False
log
.
info
(
f
'====token失效====时间{baseCore.getTimeCost(start, time.time())}'
)
return
KeyNo
if
resp_dict
[
'status'
]
==
401
:
KeyNo
=
False
log
.
info
(
f
'=======您的账号访问超频,请升级小程序版本=====时间{baseCore.getTimeCost(start, time.time())}'
)
return
KeyNo
try
:
if
resp_dict
[
'result'
][
'Result'
]:
result_dict
=
resp_dict
[
'result'
][
'Result'
][
0
]
KeyNo
=
result_dict
[
'KeyNo'
]
Name
=
result_dict
[
'Name'
]
.
replace
(
'<em>'
,
''
)
.
replace
(
'</em>'
,
''
)
.
strip
()
if
Name
==
''
:
KeyNo
=
''
else
:
KeyNo
=
''
except
:
KeyNo
=
False
log
.
info
(
f
'====token失效====时间{baseCore.getTimeCost(start,time.time())}'
)
return
KeyNo
print
(
"{},企业代码为:{}"
.
format
(
qcc_key
,
KeyNo
))
return
KeyNo
\ No newline at end of file
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论