提交 a86fe277 作者: 薛凌堃

天眼查基本信息

上级 f751b7bb
......@@ -97,7 +97,7 @@ def baseinfo(com_soup):
span_list = briefTag.find_all('span')
for span in span_list:
if len(span.attrs) == 0:
data['简介'] = span.text
data['简介'] = span.text.split('通过天眼查大数据分析')[0]
break
return data
......@@ -106,107 +106,107 @@ def dic_handle(result_dic):
try:
company_name = result_dic['企业名称']
except:
company_name = ''
company_name = None
try:
CreditCode = result_dic['统一社会信用代码']
except:
CreditCode = ''
CreditCode = None
try:
OperName = result_dic['法定代表人']
except:
OperName = ''
OperName = None
try:
PhoneNumber = result_dic['电话']
except:
PhoneNumber = ''
PhoneNumber = None
try:
WebSite = result_dic['网址']
except:
WebSite = ''
WebSite = None
try:
Email = result_dic['邮箱']
except:
Email = ''
Email = None
try:
Desc = result_dic['简介']
except:
Desc = ''
Desc = None
try:
Status = result_dic['经营状态']
except:
Status = ''
Status = None
try:
StartDate = result_dic['成立日期']
except:
StartDate = ''
StartDate = None
try:
RecCap = result_dic['实缴资本']
except:
RecCap = ''
RecCap = None
try:
RegistCapi = result_dic['注册资本']
except:
RegistCapi = ''
RegistCapi = None
try:
CheckDate = result_dic['核准日期']
except:
CheckDate = ''
CheckDate = None
try:
OrgNo = result_dic['组织机构代码']
except:
OrgNo = ''
OrgNo = None
try:
No = result_dic['工商注册号']
except:
No = ''
No = None
try:
taxpayerNo = result_dic['纳税人识别号']
except:
taxpayerNo = ''
taxpayerNo = None
try:
EconKind = result_dic['企业类型']
except:
EconKind = ''
EconKind = None
try:
TermStart = result_dic['营业期限'].split('至')[0]
except:
TermStart = ''
TermStart = None
try:
TeamEnd = result_dic['营业期限'].split('至')[1]
except:
TeamEnd = ''
TeamEnd = None
try:
TaxpayerType = result_dic['纳税人资质']
except:
TaxpayerType = ''
TaxpayerType = None
try:
SubIndustry = result_dic['国标行业']
except:
SubIndustry = ''
# try:
# SubIndustry = result_dic['国标行业']
# except:
# SubIndustry = ''
try:
region = result_dic['所属地区']
except:
region = ''
region = None
try:
pattern = r'^(.*?省|.*?自治区)?(.*?市|.*?自治州)?(.*?区|.*?县|.*?自治县|.*?市辖区)?(.*?区|.*?县|.*?自治县|.*?市辖区)?$'
matches = re.match(pattern, region)
......@@ -220,53 +220,53 @@ def dic_handle(result_dic):
break
except:
Province = ''
City = ''
County = ''
Province = None
City = None
County = None
try:
BelongOrg = result_dic['登记机关']
except:
BelongOrg = ''
BelongOrg = None
try:
Info = result_dic['人员规模']
except:
Info = ''
Info = None
try:
can_bao = result_dic['参保人数']
except:
can_bao = ''
can_bao = None
try:
OriginalName = result_dic['曾用名']
except:
OriginalName = ''
OriginalName = None
try:
EnglishName = result_dic['英文名称']
except:
EnglishName = ''
EnglishName = None
try:
IxCode = result_dic['进出口企业代码']
except:
IxCode = ''
IxCode = None
try:
Address = result_dic['地址']
except:
Address = ''
Address = None
try:
Scope = result_dic['经营范围']
except:
Scope = ''
Scope = None
aa_dict = {
'name': company_name, # 企业名称
'shortName': '', # 企业简称
'shortName': None, # 企业简称
'socialCreditCode': CreditCode, # 统一社会信用代码
'legalPerson': OperName, # 法定代表人
'officialPhone': PhoneNumber, # 电话
......@@ -285,7 +285,7 @@ def dic_handle(result_dic):
'businessStartDate': TermStart, # 营业期限自
'businessEndDate': TeamEnd, # 营业期限至
'taxpayerQualification': TaxpayerType, # 纳税人资质
'industry': SubIndustry, # 所属行业
'industry': None, # 所属行业
'region': region,
'province': Province, # 所属省
'city': City, # 所属市
......@@ -565,7 +565,7 @@ if __name__ == '__main__':
start_time = time.time()
# 获取企业信息
# company_field = baseCore.redicPullData('BaseInfoEnterprise:gnqy_socialCode')
company_field = '91130000738711917Q||'
company_field = '91110000710925016E||'
if company_field == 'end':
# 本轮处理完毕,需要发送邮件,并且进入下一轮
baseCore.sendEmail(file_name)
......@@ -604,14 +604,14 @@ if __name__ == '__main__':
# category = company_field.split('|')[19]
# exchange = company_field.split('|')[20]
# listType = company_field.split('|')[21]
ynDomestic = '1'
countryName = ''
securitiesCode = ''
securitiesShortName = ''
listingDate = ''
category = ''
exchange = ''
listType = ''
ynDomestic = None
countryName = None
securitiesCode = None
securitiesShortName = None
listingDate = None
category = None
exchange = None
listType = None
count = redaytowork(com_name, social_code, securitiesCode, securitiesShortName, listingDate, category, exchange,
listType, ynDomestic, countryName, file_name)
......
......@@ -56,7 +56,7 @@ class Token():
query = {
'fenghaoTime': {'$lt': 'updateTime'}, # 封号时间小于更新时间
}
result = db_storage.find_one(query, sort=[('updateTime', -1)])
result = db_storage.find_one(query, sort=[('updateTime', 1)])
cookies = result['cookies']
id_token = result['_id']
return cookies, id_token
......@@ -77,7 +77,7 @@ class Token():
filter = {'_id': ObjectId(id_token)}
# 更新操作
update = {'$set': {'fenghaoTime': datetime.datetime.now()}}
update = {'$set': {'fenghaoTime': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}}
# 执行更新操作
db_storage.update_one(filter, update)
......@@ -86,7 +86,7 @@ class Token():
filter = {'_id': ObjectId(id_token)}
# 更新操作
update = {'$set': {'updateTtime': datetime.datetime.now()}}
update = {'$set': {'updateTime': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}}
# 执行更新操作
db_storage.update_one(filter, update)
......
......@@ -26,7 +26,7 @@ if __name__ == "__main__":
name = input('所属用户:')
driver = create_driver()
driver.get(url)
time.sleep(100)
time.sleep(60)
cookies = driver.get_cookies()
# print(driver.get_cookies())
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论