Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
Z
zzsn_spider
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
王景浩
zzsn_spider
Commits
785f3d85
提交
785f3d85
authored
2月 04, 2024
作者:
LiuLiYuan
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'origin/master'
上级
6cdfccca
c14c8e55
显示空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
128 行增加
和
140 行删除
+128
-140
CorePerson.py
comData/Tyc/CorePerson.py
+123
-119
getTycId.py
comData/Tyc/getTycId.py
+5
-21
没有找到文件。
comData/Tyc/CorePerson.py
浏览文件 @
785f3d85
#补充剩余核心人员信息
#先采集天眼查id,再通过id采集核心人员信息
"""
天眼查人员信息
问题1:页面和接口数据不一致 目前方法 单独处理
问题2:页面人员总数拿的不够准确 目前方法 修改获取父标签逻辑
"""
import
datetime
import
json
import
os
import
subprocess
import
sys
import
requests
,
time
,
random
import
pandas
as
pd
import
requests
,
time
from
bs4
import
BeautifulSoup
import
urllib3
from
retry
import
retry
...
...
@@ -17,10 +16,6 @@ from getTycId import getTycIdByXYDM
baseCore
=
BaseCore
()
urllib3
.
disable_warnings
(
urllib3
.
exceptions
.
InsecureRequestWarning
)
log
=
baseCore
.
getLogger
()
headers
=
{
'Cookie'
:
'HWWAFSESID=38a70202d86311cd90f; HWWAFSESTIME=1706662296323; jsid=SEO-BING-ALL-SY-000001; TYCID=e35f3910bfd211eeac66555a29ade465; ssuid=6800091776; sajssdk_2015_cross_new_user=1; csrfToken=e85dxv9-DXNUkQ7yuzIgZrbs; bannerFlag=true; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1706662300; _ga=GA1.2.1071312772.1706662301; _gid=GA1.2.1602571847.1706662301; tyc-user-info={
%22
state
%22
:
%220%22%2
C
%22
vipManager
%22
:
%220%22%2
C
%22
mobile
%22
:
%2217103126138%22%2
C
%22
userId
%22
:
%22304029617%22
}; tyc-user-info-save-time=1706662339304; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNzEwMzEyNjEzOCIsImlhdCI6MTcwNjY2MjMzOCwiZXhwIjoxNzA5MjU0MzM4fQ.z9cOzr0YWyU_rxTZNn8ojsxfMAdre4NbQLzwgKAGdI-CCcfPvuBBrL4tFP5HmR5pDv204e4P4k4Ll4kKPhBQTg; tyc-user-phone=
%255
B
%252217103126138%2522%255
D; searchSessionId=1706667106.29658260; sensorsdata2015jssdkcross=
%7
B
%22
distinct_id
%22%3
A
%22304029617%22%2
C
%22
first_id
%22%3
A
%2218
d5d0009e8153-01c79a4d65a09f9-4c657b58-921600-18d5d0009e914e
%22%2
C
%22
props
%22%3
A
%7
B
%22%24
latest_traffic_source_type
%22%3
A
%22%
E7
%9
B
%
B4
%
E6
%8
E
%
A5
%
E6
%
B5
%81%
E9
%87%8
F
%22%2
C
%22%24
latest_search_keyword
%22%3
A
%22%
E6
%9
C
%
AA
%
E5
%8
F
%96%
E5
%88%
B0
%
E5
%80%
BC_
%
E7
%9
B
%
B4
%
E6
%8
E
%
A5
%
E6
%89%93%
E5
%
BC
%80%22%2
C
%22%24
latest_referrer
%22%3
A
%22%22%7
D
%2
C
%22
identities
%22%3
A
%22
eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMThkNWQwMDA5ZTgxNTMtMDFjNzlhNGQ2NWEwOWY5LTRjNjU3YjU4LTkyMTYwMC0xOGQ1ZDAwMDllOTE0ZSIsIiRpZGVudGl0eV9sb2dpbl9pZCI6IjMwNDAyOTYxNyJ9
%22%2
C
%22
history_login_id
%22%3
A
%7
B
%22
name
%22%3
A
%22%24
identity_login_id
%22%2
C
%22
value
%22%3
A
%22304029617%22%7
D
%2
C
%22%24
device_id
%22%3
A
%2218
d5d0009e8153-01c79a4d65a09f9-4c657b58-921600-18d5d0009e914e
%22%7
D; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1706667529'
,
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
,
}
cnx_
=
baseCore
.
cnx
cursor_
=
baseCore
.
cursor
...
...
@@ -30,71 +25,72 @@ cursor = baseCore.cursor_
list_all_1
=
[]
list_all_2
=
[]
taskType
=
'天眼查/核心人员'
ip_num
=
0
def
get_proxy
(
ip_num
):
sql
=
"select proxy from clb_proxy"
cursor_
.
execute
(
sql
)
proxy_lists
=
cursor_
.
fetchall
()
cnx_
.
commit
()
ip_list
=
[]
for
proxy_
in
proxy_lists
:
ip_list
.
append
(
str
(
proxy_
)
.
replace
(
"('"
,
''
)
.
replace
(
"',)"
,
''
))
proxy_list
=
[]
for
str_ip
in
ip_list
:
str_ip_list
=
str_ip
.
split
(
'-'
)
proxyMeta
=
"http://
%(host)
s:
%(port)
s"
%
{
"host"
:
str_ip_list
[
0
],
"port"
:
str_ip_list
[
1
],
}
proxy
=
{
"http"
:
proxyMeta
,
"https"
:
proxyMeta
}
proxy_list
.
append
(
proxy
)
return
proxy_list
[
ip_num
]
from
lxml
import
etree
from
classtool
import
Token
,
File
,
Tag
token
=
Token
()
@retry
(
tries
=
3
,
delay
=
1
)
def
get_html
(
tycid
,
ip_num
):
def
get_html
(
tycid
,
s
,
headers
):
url
=
f
"https://www.tianyancha.com/company/{tycid}"
ip
=
get_proxy
(
ip_num
)
response
=
requests
.
get
(
url
=
url
,
headers
=
headers
,
proxies
=
ip
)
# ip = baseCore.get_proxy(
)
response
=
s
.
get
(
url
=
url
,
headers
=
headers
)
if
response
.
status_code
==
200
:
pass
else
:
ip_num
+=
1
raise
# return -1
soup
=
BeautifulSoup
(
response
.
content
,
'html.parser'
)
try
:
tmp_field
=
soup
.
find
(
'div'
,
class_
=
'dim-tab-root'
)
.
find
(
'span'
)
.
text
div_part
=
soup
.
find
(
'div'
,
attrs
=
{
'data-dim'
:
'staff'
})
# div_part.find('div', class_='dimHeader_root__XTCLe')
except
:
return
-
1
try
:
tmp_field
=
div_part
.
find
(
'div'
,
class_
=
'dim-tab-root'
)
.
find
(
'span'
)
.
text
if
'最新公示'
in
tmp_field
:
total
=
soup
.
find
(
'div'
,
class_
=
'dim-tab-root'
)
.
find
(
'span'
)
.
get_text
()
.
split
(
'最新公示'
)[
1
]
.
replace
(
' '
,
''
)
total
=
div_part
.
find
(
'div'
,
class_
=
'dim-tab-root'
)
.
find
(
'span'
)
.
get_text
()
.
split
(
'最新公示'
)[
1
]
.
replace
(
' '
,
''
)
return
int
(
total
)
else
:
return
0
return
-
1
except
:
return
0
@retry
(
tries
=
3
,
delay
=
1
)
def
get_page
(
url
,
ip_num
):
ip
=
get_proxy
(
ip_num
)
res
=
requests
.
get
(
url
=
url
,
headers
=
headers
,
proxies
=
ip
)
if
res
.
status_code
==
200
:
pass
else
:
ip_num
+=
1
raise
def
get_page
(
url
,
s
,
headers
):
ip
=
baseCore
.
get_proxy
()
res
=
s
.
get
(
url
=
url
,
headers
=
headers
,
proxies
=
ip
)
time
.
sleep
(
1
)
total_page_
=
res
.
json
()[
'data'
][
'total'
]
if
res
.
status_code
!=
200
:
raise
data_page
=
res
.
json
()
total_page_
=
data_page
[
'data'
][
'total'
]
return
total_page_
def
doJob
():
# for social_code in social_code_list:
while
True
:
# todo:设置cookies的使用
headers
=
{
'Accept'
:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
,
'Accept-Encoding'
:
'gzip, deflate, br'
,
'Accept-Language'
:
'zh-CN,zh;q=0.9'
,
'Cache-Control'
:
'max-age=0'
,
'Connection'
:
'keep-alive'
,
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
,
'version'
:
'TYC-Web'
}
cookies_list
,
id_cookie
=
token
.
get_cookies
()
cookies
=
{}
for
cookie
in
cookies_list
:
cookies
[
cookie
[
'name'
]]
=
cookie
[
'value'
]
s
=
requests
.
Session
()
s
.
cookies
.
update
(
cookies
)
# 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
# social_code = baseCore.redicPullData('CorPersonEnterprise:gnqy_socialCode')
# 判断 如果Redis中已经没有数据,则等待
social_code
=
'91
320691550279691N
'
social_code
=
'91
1101067916069050
'
if
social_code
==
None
:
time
.
sleep
(
20
)
continue
...
...
@@ -108,26 +104,28 @@ def doJob():
tycid
=
data
[
11
]
count
=
data
[
17
]
else
:
#数据重新塞入redis
#
数据重新塞入redis
# log.info(f'数据库中无该企业{social_code}')
sql
=
f
"SELECT * FROM sys_base_enterprise WHERE social_credit_code = '{social_code}'"
cursor
.
execute
(
sql
)
data
=
cursor
.
fetchone
()
if
data
:
pass
else
:
#数据库中并没有该企业 需要新增
pass
id
=
data
[
0
]
com_name
=
data
[
3
]
xydm
=
data
[
1
]
conut
=
0
# 写入数据库
insert
=
"INSERT INTO EnterpriseInfo(
com_name, xydm, social_credit_code) VALUES (
%
s,
%
s,
%
s)"
cursor_
.
execute
(
insert
,
(
com_name
,
xydm
,
social_code
))
insert
=
"INSERT INTO EnterpriseInfo(
CompanyName, SocialCode) VALUES (
%
s,
%
s)"
cursor_
.
execute
(
insert
,
(
com_name
,
xydm
))
cnx_
.
commit
()
tycid
=
''
# baseCore.rePutIntoR('CorPersonEnterpriseNone:gnqy_socialCode', social_code)
# continue
if
tycid
==
None
or
tycid
==
''
:
try
:
retData
=
getTycIdByXYDM
(
com_name
)
retData
=
getTycIdByXYDM
(
com_name
,
s
)
if
retData
[
'state'
]:
tycid
=
retData
[
'tycData'
][
'id'
]
# # todo:写入数据库
...
...
@@ -147,35 +145,29 @@ def doJob():
baseCore
.
recordLog
(
social_code
,
taskType
,
state
,
takeTime
,
''
,
'获取天眼查id失败'
)
baseCore
.
rePutIntoR
(
'CorPersonEnterpriseError:gnqy_socialCode'
,
social_code
)
continue
count
=
data
[
17
]
log
.
info
(
f
"{id}---{xydm}----{tycid}----开始采集核心人员"
)
list_one_info
=
[]
num
=
1
#todo:先确定接口走哪个
try
:
charge
=
get_html
(
tycid
,
ip_num
)
except
Exception
as
e
:
charge
=
get_html
(
tycid
,
s
,
headers
)
# 页面请求三次都失败
except
:
charge
=
-
1
log
.
info
(
e
)
total_page
=
0
t
=
int
(
time
.
time
()
*
1000
)
if
charge
==
-
1
:
token
.
updateTokeen
(
id_cookie
,
2
)
# 重新塞入redis
baseCore
.
rePutIntoR
(
'CorPersonEnterpriseError:gnqy_socialCode'
,
social_code
)
log
.
info
(
f
'==={social_code}=====页面请求失败===重新放入redis===='
)
log
.
info
(
f
"{id}---{xydm}----{tycid}----请求失败"
)
# 获取当前进程pid
current_pid
=
baseCore
.
getPID
()
# todo: 重新启动新进程,杀死当前进程
subprocess
.
Popen
([
sys
.
executable
]
+
sys
.
argv
)
os
.
kill
(
current_pid
,
9
)
baseCore
.
rePutIntoR
(
'CorPersonEnterprise:gnqy_socialCode'
,
social_code
)
log
.
info
(
f
"{id}---{xydm}----{tycid}----请求失败----重新放入redis"
)
time
.
sleep
(
2
)
continue
elif
charge
==
0
:
log
.
info
(
f
"{id}---{xydm}----{tycid}----没有最新公示"
)
url1
=
f
'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_={t}&gid={tycid}&pageSize=20&pageNum=1'
try
:
total_page1
=
get_page
(
url1
,
ip_num
)
total_page1
=
get_page
(
url1
,
s
,
headers
)
except
:
total_page1
=
0
url
=
'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_={}&gid={}&pageSize=20&pageNum={}'
...
...
@@ -186,12 +178,12 @@ def doJob():
url2
=
f
'https://capi.tianyancha.com/cloud-listed-company/listed/noRepeatSeniorExecutive?_={t}&gid={tycid}&pageSize=20&pageNum=1'
url3
=
f
'https://capi.tianyancha.com/cloud-listed-company/listed/getHkNoRepeatSeniorExecutive?_={t}&gid={tycid}&pageSize=20&pageNum=1'
try
:
total_page2
=
get_page
(
url2
,
ip_num
)
total_page2
=
get_page
(
url2
,
s
,
headers
)
except
:
total_page2
=
0
time
.
sleep
(
2
)
time
.
sleep
(
1
)
try
:
total_page3
=
get_page
(
url3
,
ip_num
)
total_page3
=
get_page
(
url3
,
s
,
headers
)
except
:
total_page3
=
0
if
total_page2
==
charge
:
...
...
@@ -206,33 +198,38 @@ def doJob():
else
:
total_page
=
0
flag
=
0
log
.
info
(
f
'{id}---{xydm}----{tycid}----没有高管信息'
)
baseCore
.
rePutIntoR
(
'CorPersonEnterpriseMap:gnqy_socialCode'
,
social_code
)
log
.
info
(
f
'{id}---{xydm}----{tycid}----页面和接口数据不对应'
)
continue
if
total_page
==
0
:
token
.
updateTokeen
(
id_cookie
,
2
)
# 重新塞入redis
baseCore
.
rePutIntoR
(
'CorPersonEnterprise
Error
:gnqy_socialCode'
,
social_code
)
baseCore
.
rePutIntoR
(
'CorPersonEnterprise:gnqy_socialCode'
,
social_code
)
log
.
info
(
f
'==={social_code}=====总数请求失败===重新放入redis===='
)
continue
#todo:获取页数
time
.
sleep
(
2
)
for
page
in
range
(
1
,
int
((
total_page
/
20
)
+
1
)
+
1
):
# # todo:获取页数
# total_page = 34
# flag = 2
for
page
in
range
(
1
,
int
((
total_page
/
20
)
+
1
)
+
1
):
res
=
None
for
c
in
range
(
3
):
ip
=
baseCore
.
get_proxy
()
url_
=
url
.
format
(
t
,
tycid
,
page
)
res
=
requests
.
get
(
url_
,
headers
=
headers
,
proxies
=
ip
)
# ,verify=False
# url_ = 'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_=1706765329671&gid=8715844&pageSize=20&pageNum=1'
res
=
requests
.
get
(
url_
,
headers
=
headers
,
proxies
=
ip
,
verify
=
False
)
# ,verify=False
time
.
sleep
(
1
)
if
res
.
status_code
==
200
:
break
else
:
if
c
==
2
:
res
=
''
break
continue
if
res
:
pass
else
:
token
.
updateTokeen
(
id_cookie
,
2
)
# 重新塞入redis
baseCore
.
rePutIntoR
(
'CorPersonEnterprise
Error
:gnqy_socialCode'
,
social_code
)
baseCore
.
rePutIntoR
(
'CorPersonEnterprise:gnqy_socialCode'
,
social_code
)
log
.
info
(
f
'{id}---{xydm}----{tycid}----高管信息请求失败'
)
continue
try
:
...
...
@@ -250,7 +247,7 @@ def doJob():
education
=
one_info
[
'education'
]
position
=
one_info
[
'position'
]
Salary
=
one_info
[
'salary'
]
#todo:获取当前年份
#
todo:获取当前年份
now
=
datetime
.
datetime
.
now
()
year
=
now
.
year
try
:
...
...
@@ -266,37 +263,37 @@ def doJob():
except
:
person_img
=
'--'
dic_json
=
{
"socialCreditCode"
:
social_code
,
"name"
:
name
,
"sex"
:
sex
,
"education"
:
education
,
"position"
:
position
,
"salary"
:
Salary
,
"birthYear"
:
birthYear
,
"shareNum"
:
StockKeepings
,
"shareRatio"
:
''
,
"benefitShare"
:
''
,
"currentTerm"
:
currentTerm
,
"personInfo"
:
personInfo
,
"sort"
:
str
(
num
)
"socialCreditCode"
:
social_code
,
"name"
:
name
,
"sex"
:
sex
,
"education"
:
education
,
"position"
:
position
,
"salary"
:
Salary
,
"birthYear"
:
birthYear
,
"shareNum"
:
StockKeepings
,
"shareRatio"
:
''
,
"benefitShare"
:
''
,
"currentTerm"
:
currentTerm
,
"personInfo"
:
personInfo
,
"sort"
:
str
(
num
)
}
dic_json_img
=
{
"socialCreditCode"
:
social_code
,
"name"
:
name
,
"sex"
:
sex
,
"education"
:
education
,
"position"
:
position
,
"salary"
:
Salary
,
"birthYear"
:
birthYear
,
"shareNum"
:
StockKeepings
,
"shareRatio"
:
''
,
"benefitShare"
:
''
,
"currentTerm"
:
currentTerm
,
"personInfo"
:
personInfo
,
"头像"
:
person_img
,
"sort"
:
str
(
num
)
"socialCreditCode"
:
social_code
,
"name"
:
name
,
"sex"
:
sex
,
"education"
:
education
,
"position"
:
position
,
"salary"
:
Salary
,
"birthYear"
:
birthYear
,
"shareNum"
:
StockKeepings
,
"shareRatio"
:
''
,
"benefitShare"
:
''
,
"currentTerm"
:
currentTerm
,
"personInfo"
:
personInfo
,
"头像"
:
person_img
,
"sort"
:
str
(
num
)
}
num
=
num
+
1
num
=
num
+
1
list_one_info
.
append
(
dic_json
)
# list_all_2.append(dic_json_img)
elif
flag
==
3
:
...
...
@@ -314,8 +311,11 @@ def doJob():
except
:
birthYear
=
''
personInfo
=
one_info
[
'resume_cn'
]
timestamp
=
int
(
int
(
one_info
[
'employ_date'
])
/
10000
)
try
:
timestamp
=
int
(
one_info
[
'employ_date'
])
/
1000
currentTerm
=
time
.
strftime
(
"
%
Y-
%
m-
%
d"
,
time
.
localtime
(
timestamp
))
except
:
currentTerm
=
''
dic_json
=
{
"socialCreditCode"
:
social_code
,
"name"
:
name
,
...
...
@@ -327,7 +327,7 @@ def doJob():
"shareNum"
:
''
,
"shareRatio"
:
''
,
"benefitShare"
:
''
,
"currentTerm"
:
currentTerm
+
'至-'
,
"currentTerm"
:
currentTerm
+
'至-'
,
"personInfo"
:
personInfo
,
"sort"
:
str
(
num
)
}
...
...
@@ -393,19 +393,23 @@ def doJob():
continue
else
:
pass
response
=
requests
.
post
(
'http://114.115.236.206:8088/sync/executive'
,
data
=
json_updata
,
timeout
=
300
,
verify
=
False
)
response
=
requests
.
post
(
'http://114.115.236.206:8088/sync/executive'
,
data
=
json_updata
,
timeout
=
300
,
verify
=
False
)
print
(
response
.
text
)
log
.
info
(
'=========成功======'
)
token
.
updateTokeen
(
id_cookie
,
3
)
time
.
sleep
(
10
)
except
Exception
as
e
:
log
.
info
(
f
'==={social_code}=====企业核心人员采集失败===重新放入redis===='
)
log
.
info
(
e
)
# 重新塞入redis
baseCore
.
rePutIntoR
(
'CorPersonEnterprise
Error
:gnqy_socialCode'
,
social_code
)
baseCore
.
rePutIntoR
(
'CorPersonEnterprise:gnqy_socialCode'
,
social_code
)
state
=
0
takeTime
=
baseCore
.
getTimeCost
(
start
,
time
.
time
())
baseCore
.
recordLog
(
social_code
,
taskType
,
state
,
takeTime
,
''
,
f
'获取企业信息失败--{e}'
)
time
.
sleep
(
5
)
# break
break
# df_img = pd.DataFrame(list_all_2)
# df_img.to_excel('企业主要人员-头像.xlsx',index=False)
if
__name__
==
"__main__"
:
...
...
comData/Tyc/getTycId.py
浏览文件 @
785f3d85
...
...
@@ -21,45 +21,29 @@ headers = {
'Connection'
:
'keep-alive'
,
'Content-Length'
:
'32'
,
'Content-Type'
:
'application/json'
,
'Host'
:
'capi.tianyancha.com'
,
'Origin'
:
'https://www.tianyancha.com'
,
'Referer'
:
'https://www.tianyancha.com/'
,
'Sec-Fetch-Dest'
:
'empty'
,
'Sec-Fetch-Mode'
:
'cors'
,
'Sec-Fetch-Site'
:
'same-site'
,
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
,
'X-AUTH-TOKEN'
:
'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxODcwMzc1MjYwMCIsImlhdCI6MTcwMjcxMjg4MywiZXhwIjoxNzA1MzA0ODgzfQ.mVTR6Wz7W_IBjf4rLYhKacG9CRxGTzIGKmlqrR9jN-_t0Z4vUYVYwOTMzo7vT9IClJELruhl4d31KBHX0bZ1NQ'
,
'X-TYCID'
:
'6f6298905d3011ee96146793e725899d'
,
'sec-ch-ua'
:
'"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"'
,
'sec-ch-ua-mobile'
:
'?0'
,
'sec-ch-ua-platform'
:
'"Windows"'
,
'version'
:
'TYC-Web'
}
# headers = {
# 'X-TYCID':'30c1289042f511ee9182cd1e1bcaa517',
# # 'X-AUTH-TOKEN': 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMzU5MjQ4MTgzOSIsImlhdCI6MTY5MjkzMzIxMiwiZXhwIjoxNjk1NTI1MjEyfQ.BKxDem8fpgeDHrIgm3qCoF76ueHtQSG1DggiTl4FAaoNKt4gem6NTX1XYndPXqVj9TXfl-8yp2kKE3jY66dyig',
# 'version':'TYC-Web',
# 'Content-Type':'application/json;charset=UTF-8'
# }
# cnx = pymysql.connect(host='114.116.44.11', user='caiji', password='f7s0&7qqtK', db='dbScore', charset='utf8mb4')
# cursor= cnx.cursor()
cnx_
=
baseCore
.
cnx
cursor_
=
baseCore
.
cursor
taskType
=
'天眼查企业id/天眼查'
#根据信用代码获取天眼查id 企业名字等信息
def
getTycIdByXYDM
(
com_name
):
def
getTycIdByXYDM
(
com_name
,
s
):
retData
=
{
'state'
:
False
,
'tycData'
:
None
,
'reput'
:
True
}
url
=
f
"https://capi.tianyancha.com/cloud-tempest/search/suggest/v3?_={baseCore.getNowTime(3)}"
ip
=
baseCore
.
get_proxy
()
paramJsonData
=
{
'keyword'
:
com_name
}
paramJsonData
=
{
'keyword'
:
com_name
}
try
:
# headers['User-Agent'] = baseCore.getRandomUserAgent()
# headers['X-AUTH-TOKEN'] = baseCore.GetTYCToken()
# response = requests.post(url,json=paramJsonData,headers=headers,verify=False, proxies=ip)
response
=
requests
.
post
(
url
,
json
=
paramJsonData
,
headers
=
headers
,
verify
=
False
)
response
=
s
.
post
(
url
,
json
=
paramJsonData
,
headers
=
headers
)
time
.
sleep
(
random
.
randint
(
3
,
5
))
retJsonData
=
json
.
loads
(
response
.
content
.
decode
(
'utf-8'
))
if
retJsonData
[
'data'
]
and
retJsonData
[
'state'
]
==
'ok'
:
if
retJsonData
[
'data'
]
and
retJsonData
[
'state'
]
==
'ok'
:
pass
else
:
log
.
error
(
f
"---{com_name}-未查询到该企业---"
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论