Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
Z
zzsn_spider
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
王景浩
zzsn_spider
Commits
f4a32ade
提交
f4a32ade
authored
2月 27, 2024
作者:
LiuLiYuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
天眼查基本信息 2/27
上级
2ea9c487
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
467 行增加
和
0 行删除
+467
-0
baseinfo0227_tyc.py
comData/Tyc/baseinfo0227_tyc.py
+467
-0
没有找到文件。
comData/Tyc/baseinfo0227_tyc.py
0 → 100644
浏览文件 @
f4a32ade
# -*- coding: utf-8 -*-
import
datetime
import
json
import
re
import
time
import
pymongo
import
requests
from
bs4
import
BeautifulSoup
from
kafka
import
KafkaProducer
import
urllib3
from
selenium.webdriver.support.wait
import
WebDriverWait
db_storage
=
pymongo
.
MongoClient
(
'mongodb://114.115.221.202:27017/'
,
username
=
'admin'
,
password
=
'ZZsn@9988'
)
.
ZZSN
[
'天眼查登录信息'
]
urllib3
.
disable_warnings
(
urllib3
.
exceptions
.
InsecureRequestWarning
)
from
dateutil.relativedelta
import
relativedelta
import
sys
# sys.path.append('D:\\KK\\zzsn_spider\\base')
# sys.path.append('D:\\kkwork\\zzsn_spider\\base')
# import BaseCore
from
base
import
BaseCore
baseCore
=
BaseCore
.
BaseCore
()
cnx_
=
baseCore
.
cnx
cursor_
=
baseCore
.
cursor
log
=
baseCore
.
getLogger
()
from
classtool
import
Token
,
File
,
Tag
token
=
Token
()
file
=
File
()
tag
=
Tag
()
from
selenium
import
webdriver
from
selenium.webdriver.common.by
import
By
def
create_driver
():
path
=
r'D:\soft\msedgedriver.exe'
# options = webdriver.EdgeOptions()
options
=
{
"browserName"
:
"MicrosoftEdge"
,
"ms:edgeOptions"
:
{
"extensions"
:
[],
"args"
:
[
"--start-maximized"
]
# 添加最大化窗口运作参数
}
}
session
=
webdriver
.
Edge
(
executable_path
=
path
,
capabilities
=
options
)
return
session
# 发送数据
def
sendkafka
(
post_data
):
try
:
producer
=
KafkaProducer
(
bootstrap_servers
=
[
'114.115.159.144:9092'
],
api_version
=
(
2
,
0
,
2
))
kafka_result
=
producer
.
send
(
"enterpriseInfo"
,
json
.
dumps
(
post_data
,
ensure_ascii
=
False
)
.
encode
(
'utf8'
))
print
(
kafka_result
.
get
(
timeout
=
10
))
except
:
exception
=
'kafka传输失败'
state
=
0
takeTime
=
baseCore
.
getTimeCost
(
start_time
,
time
.
time
())
baseCore
.
recordLog
(
social_code
,
taskType
,
state
,
takeTime
,
''
,
exception
)
log
.
info
(
f
"{com_name}--{social_code}--kafka传输失败"
)
def
Lreputredis
(
company_field
):
# todo: 重新放入redis
baseCore
.
r
.
lrem
(
'BaseInfoEnterprise:gnqy_socialCode'
,
0
,
'end'
)
baseCore
.
r
.
rpush
(
'BaseInfoEnterprise:gnqy_socialCode'
,
company_field
)
baseCore
.
r
.
rpush
(
'BaseInfoEnterprise:gnqy_socialCode'
,
'end'
)
# 检查登陆状态
def
checklogin
(
key
):
t
=
int
(
time
.
time
())
# url = 'https://www.tianyancha.com/search?key=%E4%B8%AD%E5%9B%BD%E7%9F%B3%E6%B2%B9%E5%8C%96%E5%B7%A5%E9%9B%86%E5%9B%A2%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8&sessionNo=1706594186.22975563'
url
=
f
'https://www.tianyancha.com/search?key={key}&sessionNo={t}'
driver
.
get
(
url
)
time
.
sleep
(
2
)
page_source
=
driver
.
page_source
soup
=
BeautifulSoup
(
page_source
,
'html.parser'
)
# todo:检查未登录状态
# if soup.find('title').text == '会员登录 - 企查查':
# log.info('状态---未登录')
# soup = ''
# return soup
return
soup
# 采集准备
def
redaytowork
(
com_name
,
social_code
,
securitiesCode
,
securitiesShortName
,
listingDate
,
category
,
exchange
,
listType
,
ynDomestic
,
countryName
,
file_name
):
log
.
info
(
f
'----当前企业{social_code}-{com_name}--开始处理---'
)
count
=
0
# 如果没有信用代码 就通过名字搜索 如果有信用代码 就通过信用代码
if
social_code
and
'ZZSN'
not
in
social_code
and
'ZD'
not
in
social_code
:
soup
=
checklogin
(
social_code
)
else
:
soup
=
checklogin
(
com_name
)
if
not
soup
:
log
.
info
(
"登录失效===重新放入redis"
)
# baseCore.r.lpush('BaseInfoEnterprise:gnqy_socialCode', company_field)
Lreputredis
(
company_field
)
token
.
updateTokeen
(
id_cookie
,
2
)
# log.info('=====已重新放入redis,失效cookies已删除======')
time
.
sleep
(
20
)
return
count
else
:
try
:
searchinfo
=
soup
.
find
(
'div'
,
class_
=
'index_content-tool-title__K1Z6C'
)
.
find
(
'span'
,
class_
=
'index_title-count__lDSjB'
)
.
text
except
:
try
:
# todo:可能是搜不到该企业
errormessage
=
soup
.
find
(
'div'
,
class_
=
'index_no-data-reason-title__V3gFY'
)
.
text
if
'抱歉'
in
errormessage
:
log
.
info
(
'=====搜索不到该企业===='
)
data
=
[
com_name
,
social_code
]
# todo:搜不到的企业需要返回到一个表格中
file
.
appenddata
(
file_name
,
'需处理企业'
,
data
)
return
count
except
:
log
.
info
(
"登录失效===重新放入redis"
)
# baseCore.r.lpush('UpdateBasdeInfo:SocialCode_CompanyName', company_field)
Lreputredis
(
company_field
)
token
.
updateTokeen
(
id_cookie
,
2
)
# log.info('=====已重新放入redis,cookies已封号======')
time
.
sleep
(
20
)
return
count
else
:
# 开始采集
try
:
if
spiderwork
(
soup
,
com_name
,
securitiesCode
,
securitiesShortName
,
listingDate
,
category
,
exchange
,
listType
,
ynDomestic
,
countryName
,
file_name
):
count
+=
1
log
.
info
(
f
'采集{com_name}成功=======耗时{baseCore.getTimeCost(start_time, time.time())}'
)
token
.
updateTokeen
(
id_cookie
,
3
)
return
count
else
:
return
count
except
Exception
as
e
:
log
.
info
(
f
'====={social_code}=====获取基本信息失败,重新放入redis====='
)
# baseCore.r.lpush('BaseInfoEnterprise:gnqy_socialCode', company_field)
Lreputredis
(
company_field
)
token
.
updateTokeen
(
id_cookie
,
2
)
log
.
info
(
'=====已重新放入redis,cookies已封号======'
)
return
count
def
ifbeforename
(
company_url
):
driver
.
get
(
company_url
)
time
.
sleep
(
2
)
com_soup
=
BeautifulSoup
(
driver
.
page_source
,
'html.parser'
)
try
:
businessinfo
=
com_soup
.
find
(
'table'
,
{
'class'
:
'index_tableBox__ZadJW'
})
except
:
businessinfo
=
''
if
businessinfo
:
try
:
name
=
businessinfo
.
find
(
'span'
,
class_
=
'index_history-gray-tags__o8mkl'
)
.
text
value
=
\
businessinfo
.
find
(
'span'
,
class_
=
'index_copy-text__ri7W6'
)
.
text
.
replace
(
'展开'
,
''
)
.
replace
(
' '
,
''
)
.
replace
(
'…'
,
''
)
.
replace
(
'
\n
'
,
''
)
.
replace
(
'复制'
,
''
)
.
split
(
'('
)[
0
]
except
:
name
=
'曾用名'
value
=
''
return
value
else
:
return
''
# 采集基本信息和工商信息
def
spiderinfo
(
company_url
,
securitiesCode
,
securitiesShortName
,
listingDate
,
category
,
exchange
,
listType
,
ynDomestic
,
countryName
,
file_name
):
qccid
=
company_url
.
split
(
'company/'
)[
1
]
log
.
info
(
f
'====={qccid}====='
)
driver
.
get
(
company_url
)
page_source_detail
=
driver
.
page_source
com_soup
=
BeautifulSoup
(
page_source_detail
,
'html.parser'
)
script
=
com_soup
.
find
(
'script'
,
attrs
=
{
'id'
:
'__NEXT_DATA__'
})
.
text
script
=
json
.
loads
(
script
)
script
=
script
[
'props'
][
'pageProps'
][
'dehydratedState'
][
'queries'
][
0
][
'state'
][
'data'
][
'data'
]
companyName
=
script
[
'name'
]
updateTime
=
int
(
script
[
'updateTimes'
])
updateTime
=
datetime
.
datetime
.
fromtimestamp
(
updateTime
/
1000
)
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
creditCode
=
script
[
'creditCode'
]
operName
=
script
[
'legalPersonName'
]
phoneNumber
=
script
[
'phoneNumber'
]
webSite
=
script
[
'websiteList'
]
try
:
email
=
script
[
'emailList'
][
0
]
except
:
email
=
None
desc
=
script
[
'baseInfo'
]
status
=
script
[
'regStatus'
]
startDate
=
int
(
script
[
'estiblishTime'
])
startDate
=
datetime
.
datetime
.
fromtimestamp
(
startDate
/
1000
)
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
registCapi
=
script
[
'regCapital'
]
recCap
=
script
[
'actualCapital'
]
checkDate
=
int
(
script
[
'approvedTime'
])
checkDate
=
datetime
.
datetime
.
fromtimestamp
(
checkDate
/
1000
)
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
orgNo
=
script
[
'orgNumber'
]
No
=
script
[
'regNumber'
]
taxpayerNo
=
script
[
'taxNumber'
]
econKind
=
script
[
'companyOrgType'
]
termStart
=
int
(
script
[
'fromTime'
])
termStart
=
datetime
.
datetime
.
fromtimestamp
(
termStart
/
1000
)
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
termEnd
=
script
[
'toTime'
]
termEnd
=
datetime
.
datetime
.
fromtimestamp
(
termEnd
/
1000
)
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
taxpayerType
=
script
[
'taxQualification'
]
subIndustry
=
script
[
'industryInfo'
][
'nameLevel3'
]
belogOrg
=
script
[
'regInstitute'
]
info
=
script
[
'staffNumRange'
]
canbao
=
script
[
'socialStaffNum'
]
try
:
originalName
=
script
[
'historyNames'
]
originalName
=
originalName
.
split
(
'
\n
'
)[
0
]
except
:
originalName
=
None
englishName
=
script
[
'property3'
]
address
=
script
[
'taxAddress'
]
scope
=
script
[
'businessScope'
]
aa_dic
=
{
'name'
:
companyName
,
# 企业名称
'shortName'
:
None
,
# 企业简称
'socialCreditCode'
:
creditCode
,
# 统一社会信用代码
'legalPerson'
:
operName
,
# 法定代表人
'officialPhone'
:
phoneNumber
,
# 电话
'officialUrl'
:
webSite
,
# 官网
'officialEmail'
:
email
,
# 邮箱
'briefInfo'
:
desc
,
# 简介
'registerStatus'
:
status
,
# 登记状态
'incorporationDate'
:
startDate
,
# 成立日期
'capital'
:
registCapi
,
# 注册资本
'paidCapital'
:
recCap
,
# 实缴资本
'approvalDate'
:
checkDate
,
# 核准日期
'organizationCode'
:
orgNo
,
# 组织机构代码
'registerNo'
:
No
,
# 工商注册号
'taxpayerNo'
:
taxpayerNo
,
# 纳税人识别号
'type'
:
econKind
,
# 企业类型
'businessStartDate'
:
termStart
,
# 营业期限自
'businessEndDate'
:
termEnd
,
# 营业期限至
'taxpayerQualification'
:
taxpayerType
,
# 纳税人资质
'industry'
:
subIndustry
,
# 所属行业
'region'
:
None
,
'province'
:
None
,
# 所属省
'city'
:
None
,
# 所属市
'county'
:
None
,
# 所属县
'registerDepartment'
:
belogOrg
,
# 登记机关
'scale'
:
info
,
# 人员规模
'insured'
:
canbao
,
# 参保人数
'beforeName'
:
originalName
,
# 曾用名
'englishName'
:
englishName
,
# 英文名
'importExportEnterpriseCode'
:
None
,
# 进出口企业代码
'address'
:
address
,
# 地址
'businessRange'
:
scope
,
# 经营范围
'status'
:
0
,
# 状态
'sourceUpdateTime'
:
updateTime
,
# 更新时间
'qccId'
:
qccid
,
'ynDomestic'
:
ynDomestic
,
'countryName'
:
countryName
,
'securitiesCode'
:
securitiesCode
,
'securitiesShortName'
:
securitiesShortName
,
'listingDate'
:
listingDate
,
'category'
:
category
,
'exchange'
:
exchange
,
'listingType'
:
listType
,
}
for
key
,
value
in
aa_dic
.
items
():
if
value
==
'None'
:
aa_dic
[
key
]
=
None
# 发送kafka
# sendkafka(aa_dic)
def
remove_parentheses
(
text
):
# 清除中文小括号
text
=
re
.
sub
(
r'(|)'
,
''
,
text
)
# 清除英文小括号
text
=
re
.
sub
(
r'\(|\)'
,
''
,
text
)
return
text
.
replace
(
' '
,
''
)
# 判断名称是否统一
def
spiderwork
(
soup
,
receptname
,
securitiesCode
,
securitiesShortName
,
listingDate
,
category
,
exchange
,
listType
,
ynDomestic
,
countryName
,
file_name
):
company_url
=
''
try
:
company_list
=
soup
.
find_all
(
'div'
,
class_
=
'index_search-box__7YVh6'
)
except
:
log
.
info
(
f
'====={social_code}=====获取基本信息失败,重新放入redis====='
)
# baseCore.r.lpush('BaseInfoEnterprise:gnqy_socialCode', company_field)
Lreputredis
(
company_field
)
token
.
updateTokeen
(
id_cookie
,
2
)
log
.
info
(
'=====已重新放入redis,cookies已封号======'
)
return
False
# receptname = '小米通讯技术有限公司'
for
compamy
in
company_list
:
info_t
=
compamy
.
find
(
'div'
,
class_
=
'index_name__qEdWi'
)
getname
=
info_t
.
find
(
'span'
)
.
text
log
.
info
(
f
'接收到的企业名称--{receptname}---采到的企业名称--{getname}'
)
if
receptname
and
getname
==
receptname
:
company_url
=
info_t
.
find
(
'a'
)[
'href'
]
break
elif
not
receptname
:
company_url
=
info_t
.
find
(
'a'
)[
'href'
]
break
else
:
jian_name
=
remove_parentheses
(
baseCore
.
hant_2_hans
(
getname
))
if
remove_parentheses
(
receptname
)
==
jian_name
:
log
.
info
(
f
'接收到的企业名称--{receptname}---转化成简体字的企业名称--{jian_name}'
)
company_url
=
info_t
.
find
(
'a'
)[
'href'
]
break
else
:
continue
if
company_url
:
# company_url = 'https://www.qcc.com/firm/80af5085726bb6b9c7770f1e4d0580f4.html'
# company_url = 'https://www.qcc.com/firm/50f75e8a8859e609ec37976f8abe827d.html'
# 采集基本信息和工商信息
spiderinfo
(
company_url
,
securitiesCode
,
securitiesShortName
,
listingDate
,
category
,
exchange
,
listType
,
ynDomestic
,
countryName
,
file_name
)
else
:
# 判断是否是曾用名
getname
=
''
for
child
in
company_list
[
0
]
.
find_all
():
if
child
.
has_attr
(
'class'
):
print
(
child
[
'class'
])
if
'index_name'
in
child
[
'class'
][
0
]:
getname
=
child
.
text
company_url
=
child
.
find
(
'a'
)[
'href'
]
break
# tr = company_list[:1][0]
# info_t = tr.find('div', class_='index_name__qEdWi')
# getname = info_t.find('span').text
if
getname
:
log
.
info
(
f
'------可能是曾用名------接收到的企业名称--{receptname}---采到的企业名称--{getname}'
)
beforename
=
ifbeforename
(
company_url
)
if
beforename
==
receptname
:
spiderinfo
(
company_url
,
securitiesCode
,
securitiesShortName
,
listingDate
,
category
,
exchange
,
listType
,
ynDomestic
,
countryName
,
file_name
)
else
:
# 没有搜到相同的企业名称
data
=
[
com_name
,
social_code
]
file
.
appenddata
(
file_name
,
'需处理企业'
,
data
)
time
.
sleep
(
2
)
return
False
else
:
# 没有搜到相同的企业名称
data
=
[
com_name
,
social_code
]
file
.
appenddata
(
file_name
,
'需处理企业'
,
data
)
time
.
sleep
(
2
)
return
False
return
True
def
login
():
# time.sleep(10)
cookies_list
,
id_cookie
,
user_name
=
token
.
get_cookies
()
log
.
info
(
f
'=====当前使用的是{user_name}的cookie======'
)
for
cookie
in
cookies_list
:
driver
.
add_cookie
(
cookie
)
time
.
sleep
(
5
)
driver
.
refresh
()
# url_test = 'https://www.qcc.com/firm/a5f5bb3776867b3e273cd034d6fb4baa.html'
# driver.get(url_test)
# # driver.get('https://www.qcc.com/')
time
.
sleep
(
5
)
return
driver
,
id_cookie
if
__name__
==
'__main__'
:
taskType
=
'基本信息/天眼查'
# driver, id_cookie = login()
driver
=
create_driver
()
url
=
'https://www.tianyancha.com/'
driver
.
get
(
url
)
driver
.
maximize_window
()
while
True
:
driver
,
id_cookie
=
login
()
nowtime
=
baseCore
.
getNowTime
(
1
)
.
replace
(
'-'
,
''
)[:
8
]
file_name
=
f
'./data/国内企业基本信息采集情况.xlsx'
file
.
createFile
(
file_name
)
headers
=
{
'Accept'
:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
,
'Accept-Encoding'
:
'gzip, deflate, br'
,
'Accept-Language'
:
'zh-CN,zh;q=0.9'
,
'Cache-Control'
:
'max-age=0'
,
'Connection'
:
'keep-alive'
,
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
,
'Cookie'
:
'TYCID=6f6298905d3011ee96146793e725899d; ssuid=3467188160; _ga=GA1.2.1049062268.1697190322; HWWAFSESID=2eb035742bde209aa60; HWWAFSESTIME=1706586308439; csrfToken=bT_looAjInHGeAnvjjl12L9v; bannerFlag=true; jsid=SEO-BAIDU-ALL-SY-000001; bdHomeCount=0; tyc-user-phone=
%255
B
%252216603863075%2522%252
C
%2522152%25203756%25200528%2522%252
C
%2522159%25200367%25203315%2522%255
D; sensorsdata2015jssdkcross=
%7
B
%22
distinct_id
%22%3
A
%22310689501%22%2
C
%22
first_id
%22%3
A
%2218
ad696a2ef680-0ae5cd9293a1538-26031f51-921600-18ad696a2f0dc5
%22%2
C
%22
props
%22%3
A
%7
B
%22%24
latest_traffic_source_type
%22%3
A
%22%
E7
%9
B
%
B4
%
E6
%8
E
%
A5
%
E6
%
B5
%81%
E9
%87%8
F
%22%2
C
%22%24
latest_search_keyword
%22%3
A
%22%
E6
%9
C
%
AA
%
E5
%8
F
%96%
E5
%88%
B0
%
E5
%80%
BC_
%
E7
%9
B
%
B4
%
E6
%8
E
%
A5
%
E6
%89%93%
E5
%
BC
%80%22%2
C
%22%24
latest_referrer
%22%3
A
%22%22%7
D
%2
C
%22
identities
%22%3
A
%22
eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMThhZDY5NmEyZWY2ODAtMGFlNWNkOTI5M2ExNTM4LTI2MDMxZjUxLTkyMTYwMC0xOGFkNjk2YTJmMGRjNSIsIiRpZGVudGl0eV9sb2dpbl9pZCI6IjMxMDY4OTUwMSJ9
%22%2
C
%22
history_login_id
%22%3
A
%7
B
%22
name
%22%3
A
%22%24
identity_login_id
%22%2
C
%22
value
%22%3
A
%22310689501%22%7
D
%2
C
%22%24
device_id
%22%3
A
%2218
ad696a2ef680-0ae5cd9293a1538-26031f51-921600-18ad696a2f0dc5
%22%7
D; tyc-user-info=
%7
B
%22
state
%22%3
A
%220%22%2
C
%22
vipManager
%22%3
A
%220%22%2
C
%22
mobile
%22%3
A
%2218703752600%22%2
C
%22
userId
%22%3
A
%22310689501%22%7
D; tyc-user-info-save-time=1707008605562; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxODcwMzc1MjYwMCIsImlhdCI6MTcwNzAwODYwNSwiZXhwIjoxNzA5NjAwNjA1fQ.i8WEUrXjG2X__SnGGlnjwNXyOEdXlslrnvzvKZ_xlVA0rdjdsYHdaieAzkmIjoKbuv6Lc4Eqpb70hWIlq2zeoQ; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1705286979,1706586312; searchSessionId=1707118324.99879267;'
}
# cookies_list, id_cookie = token.get_cookies()
# cookies = {}
# for cookie in cookies_list:
# cookies[cookie['name']] = cookie['value']
# s = requests.Session()
# s.cookies.update(cookies)
start_time
=
time
.
time
()
# 获取企业信息
# company_field = baseCore.redicPullData('BaseInfoEnterprise:gnqy_socialCode')
company_field
=
'|北京华信瑞德信息技术有限公司|北京华信瑞德信息技术有限公司|||||||||||||1|中国内地|||||||'
if
company_field
==
'end'
:
# 本轮处理完毕,需要发送邮件,并且进入下一轮
baseCore
.
sendEmail
(
file_name
)
time
.
sleep
(
20
)
file
.
deleteFile
(
file_name
)
continue
if
company_field
==
''
or
company_field
is
None
:
# 本轮结束后没有新增的企业要采集
file
.
deleteFile
(
file_name
)
flag
=
True
while
flag
:
log
.
info
(
'--------已没有数据---------'
)
time
.
sleep
(
30
)
if
not
baseCore
.
check_mysql_conn
(
cnx_
):
# 144数据库
cnx_
=
baseCore
.
cnx
cursor_
=
cnx_
.
cursor
()
log
.
info
(
'===11数据库重新连接成功==='
)
company_field
=
baseCore
.
redicPullData
(
'BaseInfoEnterprise:gnqy_socialCode'
)
if
company_field
:
flag
=
False
log
.
info
(
"-----已添加数据------"
)
# baseCore.r.lpush('BaseInfoEnterprise:gnqy_socialCode', company_field)
Lreputredis
(
company_field
)
continue
continue
# company_field_ = f'|{company_field}'
social_code
=
company_field
.
split
(
'|'
)[
0
]
com_name
=
company_field
.
split
(
'|'
)[
2
]
.
replace
(
' '
,
''
)
ynDomestic
=
company_field
.
split
(
'|'
)[
15
]
countryName
=
company_field
.
split
(
'|'
)[
16
]
securitiesCode
=
company_field
.
split
(
'|'
)[
17
]
securitiesShortName
=
company_field
.
split
(
'|'
)[
18
]
listingDate
=
company_field
.
split
(
'|'
)[
21
]
category
=
company_field
.
split
(
'|'
)[
19
]
exchange
=
company_field
.
split
(
'|'
)[
20
]
listType
=
company_field
.
split
(
'|'
)[
21
]
# ynDomestic = None
# countryName = None
# securitiesCode = None
# securitiesShortName = None
# listingDate = None
# category = None
# exchange = None
# listType = None
count
=
redaytowork
(
com_name
,
social_code
,
securitiesCode
,
securitiesShortName
,
listingDate
,
category
,
exchange
,
listType
,
ynDomestic
,
countryName
,
file_name
)
time
.
sleep
(
10
)
break
# baseCore.r.close()
# baseCore.sendEmail(file_name)
# 信息采集完成后将该企业的采集次数更新
# runType = 'BaseInfoRunCount'
# baseCore.updateRun(social_code, runType, count)
# break
baseCore
.
close
()
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论