Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
Z
zzsn_spider
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
王景浩
zzsn_spider
Commits
a86fe277
提交
a86fe277
authored
2月 01, 2024
作者:
薛凌堃
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
天眼查基本信息
上级
f751b7bb
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
51 行增加
和
51 行删除
+51
-51
baseinfo0130_tyc.py
comData/Tyc/baseinfo0130_tyc.py
+47
-47
classtool.py
comData/Tyc/classtool.py
+3
-3
get_tyc_cookies.py
comData/Tyc/get_tyc_cookies.py
+1
-1
没有找到文件。
comData/Tyc/baseinfo0130_tyc.py
浏览文件 @
a86fe277
...
...
@@ -97,7 +97,7 @@ def baseinfo(com_soup):
span_list
=
briefTag
.
find_all
(
'span'
)
for
span
in
span_list
:
if
len
(
span
.
attrs
)
==
0
:
data
[
'简介'
]
=
span
.
text
data
[
'简介'
]
=
span
.
text
.
split
(
'通过天眼查大数据分析'
)[
0
]
break
return
data
...
...
@@ -106,107 +106,107 @@ def dic_handle(result_dic):
try
:
company_name
=
result_dic
[
'企业名称'
]
except
:
company_name
=
''
company_name
=
None
try
:
CreditCode
=
result_dic
[
'统一社会信用代码'
]
except
:
CreditCode
=
''
CreditCode
=
None
try
:
OperName
=
result_dic
[
'法定代表人'
]
except
:
OperName
=
''
OperName
=
None
try
:
PhoneNumber
=
result_dic
[
'电话'
]
except
:
PhoneNumber
=
''
PhoneNumber
=
None
try
:
WebSite
=
result_dic
[
'网址'
]
except
:
WebSite
=
''
WebSite
=
None
try
:
Email
=
result_dic
[
'邮箱'
]
except
:
Email
=
''
Email
=
None
try
:
Desc
=
result_dic
[
'简介'
]
except
:
Desc
=
''
Desc
=
None
try
:
Status
=
result_dic
[
'经营状态'
]
except
:
Status
=
''
Status
=
None
try
:
StartDate
=
result_dic
[
'成立日期'
]
except
:
StartDate
=
''
StartDate
=
None
try
:
RecCap
=
result_dic
[
'实缴资本'
]
except
:
RecCap
=
''
RecCap
=
None
try
:
RegistCapi
=
result_dic
[
'注册资本'
]
except
:
RegistCapi
=
''
RegistCapi
=
None
try
:
CheckDate
=
result_dic
[
'核准日期'
]
except
:
CheckDate
=
''
CheckDate
=
None
try
:
OrgNo
=
result_dic
[
'组织机构代码'
]
except
:
OrgNo
=
''
OrgNo
=
None
try
:
No
=
result_dic
[
'工商注册号'
]
except
:
No
=
''
No
=
None
try
:
taxpayerNo
=
result_dic
[
'纳税人识别号'
]
except
:
taxpayerNo
=
''
taxpayerNo
=
None
try
:
EconKind
=
result_dic
[
'企业类型'
]
except
:
EconKind
=
''
EconKind
=
None
try
:
TermStart
=
result_dic
[
'营业期限'
]
.
split
(
'至'
)[
0
]
except
:
TermStart
=
''
TermStart
=
None
try
:
TeamEnd
=
result_dic
[
'营业期限'
]
.
split
(
'至'
)[
1
]
except
:
TeamEnd
=
''
TeamEnd
=
None
try
:
TaxpayerType
=
result_dic
[
'纳税人资质'
]
except
:
TaxpayerType
=
''
TaxpayerType
=
None
try
:
SubIndustry
=
result_dic
[
'国标行业'
]
except
:
SubIndustry
=
''
#
try:
#
SubIndustry = result_dic['国标行业']
#
except:
#
SubIndustry = ''
try
:
region
=
result_dic
[
'所属地区'
]
except
:
region
=
''
region
=
None
try
:
pattern
=
r'^(.*?省|.*?自治区)?(.*?市|.*?自治州)?(.*?区|.*?县|.*?自治县|.*?市辖区)?(.*?区|.*?县|.*?自治县|.*?市辖区)?$'
matches
=
re
.
match
(
pattern
,
region
)
...
...
@@ -220,53 +220,53 @@ def dic_handle(result_dic):
break
except
:
Province
=
''
City
=
''
County
=
''
Province
=
None
City
=
None
County
=
None
try
:
BelongOrg
=
result_dic
[
'登记机关'
]
except
:
BelongOrg
=
''
BelongOrg
=
None
try
:
Info
=
result_dic
[
'人员规模'
]
except
:
Info
=
''
Info
=
None
try
:
can_bao
=
result_dic
[
'参保人数'
]
except
:
can_bao
=
''
can_bao
=
None
try
:
OriginalName
=
result_dic
[
'曾用名'
]
except
:
OriginalName
=
''
OriginalName
=
None
try
:
EnglishName
=
result_dic
[
'英文名称'
]
except
:
EnglishName
=
''
EnglishName
=
None
try
:
IxCode
=
result_dic
[
'进出口企业代码'
]
except
:
IxCode
=
''
IxCode
=
None
try
:
Address
=
result_dic
[
'地址'
]
except
:
Address
=
''
Address
=
None
try
:
Scope
=
result_dic
[
'经营范围'
]
except
:
Scope
=
''
Scope
=
None
aa_dict
=
{
'name'
:
company_name
,
# 企业名称
'shortName'
:
''
,
# 企业简称
'shortName'
:
None
,
# 企业简称
'socialCreditCode'
:
CreditCode
,
# 统一社会信用代码
'legalPerson'
:
OperName
,
# 法定代表人
'officialPhone'
:
PhoneNumber
,
# 电话
...
...
@@ -285,7 +285,7 @@ def dic_handle(result_dic):
'businessStartDate'
:
TermStart
,
# 营业期限自
'businessEndDate'
:
TeamEnd
,
# 营业期限至
'taxpayerQualification'
:
TaxpayerType
,
# 纳税人资质
'industry'
:
SubIndustry
,
# 所属行业
'industry'
:
None
,
# 所属行业
'region'
:
region
,
'province'
:
Province
,
# 所属省
'city'
:
City
,
# 所属市
...
...
@@ -565,7 +565,7 @@ if __name__ == '__main__':
start_time
=
time
.
time
()
# 获取企业信息
# company_field = baseCore.redicPullData('BaseInfoEnterprise:gnqy_socialCode')
company_field
=
'911
30000738711917Q
||'
company_field
=
'911
10000710925016E
||'
if
company_field
==
'end'
:
# 本轮处理完毕,需要发送邮件,并且进入下一轮
baseCore
.
sendEmail
(
file_name
)
...
...
@@ -604,14 +604,14 @@ if __name__ == '__main__':
# category = company_field.split('|')[19]
# exchange = company_field.split('|')[20]
# listType = company_field.split('|')[21]
ynDomestic
=
'1'
countryName
=
''
securitiesCode
=
''
securitiesShortName
=
''
listingDate
=
''
category
=
''
exchange
=
''
listType
=
''
ynDomestic
=
None
countryName
=
None
securitiesCode
=
None
securitiesShortName
=
None
listingDate
=
None
category
=
None
exchange
=
None
listType
=
None
count
=
redaytowork
(
com_name
,
social_code
,
securitiesCode
,
securitiesShortName
,
listingDate
,
category
,
exchange
,
listType
,
ynDomestic
,
countryName
,
file_name
)
...
...
comData/Tyc/classtool.py
浏览文件 @
a86fe277
...
...
@@ -56,7 +56,7 @@ class Token():
query
=
{
'fenghaoTime'
:
{
'$lt'
:
'updateTime'
},
# 封号时间小于更新时间
}
result
=
db_storage
.
find_one
(
query
,
sort
=
[(
'updateTime'
,
-
1
)])
result
=
db_storage
.
find_one
(
query
,
sort
=
[(
'updateTime'
,
1
)])
cookies
=
result
[
'cookies'
]
id_token
=
result
[
'_id'
]
return
cookies
,
id_token
...
...
@@ -77,7 +77,7 @@ class Token():
filter
=
{
'_id'
:
ObjectId
(
id_token
)}
# 更新操作
update
=
{
'$set'
:
{
'fenghaoTime'
:
datetime
.
datetime
.
now
()}}
update
=
{
'$set'
:
{
'fenghaoTime'
:
datetime
.
datetime
.
now
()
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
}}
# 执行更新操作
db_storage
.
update_one
(
filter
,
update
)
...
...
@@ -86,7 +86,7 @@ class Token():
filter
=
{
'_id'
:
ObjectId
(
id_token
)}
# 更新操作
update
=
{
'$set'
:
{
'updateT
time'
:
datetime
.
datetime
.
now
(
)}}
update
=
{
'$set'
:
{
'updateT
ime'
:
datetime
.
datetime
.
now
()
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)}}
# 执行更新操作
db_storage
.
update_one
(
filter
,
update
)
...
...
comData/Tyc/get_tyc_cookies.py
浏览文件 @
a86fe277
...
...
@@ -26,7 +26,7 @@ if __name__ == "__main__":
name
=
input
(
'所属用户:'
)
driver
=
create_driver
()
driver
.
get
(
url
)
time
.
sleep
(
10
0
)
time
.
sleep
(
6
0
)
cookies
=
driver
.
get_cookies
()
# print(driver.get_cookies())
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论