提交 a86fe277 作者: 薛凌堃

天眼查基本信息

上级 f751b7bb
...@@ -97,7 +97,7 @@ def baseinfo(com_soup): ...@@ -97,7 +97,7 @@ def baseinfo(com_soup):
span_list = briefTag.find_all('span') span_list = briefTag.find_all('span')
for span in span_list: for span in span_list:
if len(span.attrs) == 0: if len(span.attrs) == 0:
data['简介'] = span.text data['简介'] = span.text.split('通过天眼查大数据分析')[0]
break break
return data return data
...@@ -106,107 +106,107 @@ def dic_handle(result_dic): ...@@ -106,107 +106,107 @@ def dic_handle(result_dic):
try: try:
company_name = result_dic['企业名称'] company_name = result_dic['企业名称']
except: except:
company_name = '' company_name = None
try: try:
CreditCode = result_dic['统一社会信用代码'] CreditCode = result_dic['统一社会信用代码']
except: except:
CreditCode = '' CreditCode = None
try: try:
OperName = result_dic['法定代表人'] OperName = result_dic['法定代表人']
except: except:
OperName = '' OperName = None
try: try:
PhoneNumber = result_dic['电话'] PhoneNumber = result_dic['电话']
except: except:
PhoneNumber = '' PhoneNumber = None
try: try:
WebSite = result_dic['网址'] WebSite = result_dic['网址']
except: except:
WebSite = '' WebSite = None
try: try:
Email = result_dic['邮箱'] Email = result_dic['邮箱']
except: except:
Email = '' Email = None
try: try:
Desc = result_dic['简介'] Desc = result_dic['简介']
except: except:
Desc = '' Desc = None
try: try:
Status = result_dic['经营状态'] Status = result_dic['经营状态']
except: except:
Status = '' Status = None
try: try:
StartDate = result_dic['成立日期'] StartDate = result_dic['成立日期']
except: except:
StartDate = '' StartDate = None
try: try:
RecCap = result_dic['实缴资本'] RecCap = result_dic['实缴资本']
except: except:
RecCap = '' RecCap = None
try: try:
RegistCapi = result_dic['注册资本'] RegistCapi = result_dic['注册资本']
except: except:
RegistCapi = '' RegistCapi = None
try: try:
CheckDate = result_dic['核准日期'] CheckDate = result_dic['核准日期']
except: except:
CheckDate = '' CheckDate = None
try: try:
OrgNo = result_dic['组织机构代码'] OrgNo = result_dic['组织机构代码']
except: except:
OrgNo = '' OrgNo = None
try: try:
No = result_dic['工商注册号'] No = result_dic['工商注册号']
except: except:
No = '' No = None
try: try:
taxpayerNo = result_dic['纳税人识别号'] taxpayerNo = result_dic['纳税人识别号']
except: except:
taxpayerNo = '' taxpayerNo = None
try: try:
EconKind = result_dic['企业类型'] EconKind = result_dic['企业类型']
except: except:
EconKind = '' EconKind = None
try: try:
TermStart = result_dic['营业期限'].split('至')[0] TermStart = result_dic['营业期限'].split('至')[0]
except: except:
TermStart = '' TermStart = None
try: try:
TeamEnd = result_dic['营业期限'].split('至')[1] TeamEnd = result_dic['营业期限'].split('至')[1]
except: except:
TeamEnd = '' TeamEnd = None
try: try:
TaxpayerType = result_dic['纳税人资质'] TaxpayerType = result_dic['纳税人资质']
except: except:
TaxpayerType = '' TaxpayerType = None
try: # try:
SubIndustry = result_dic['国标行业'] # SubIndustry = result_dic['国标行业']
except: # except:
SubIndustry = '' # SubIndustry = ''
try: try:
region = result_dic['所属地区'] region = result_dic['所属地区']
except: except:
region = '' region = None
try: try:
pattern = r'^(.*?省|.*?自治区)?(.*?市|.*?自治州)?(.*?区|.*?县|.*?自治县|.*?市辖区)?(.*?区|.*?县|.*?自治县|.*?市辖区)?$' pattern = r'^(.*?省|.*?自治区)?(.*?市|.*?自治州)?(.*?区|.*?县|.*?自治县|.*?市辖区)?(.*?区|.*?县|.*?自治县|.*?市辖区)?$'
matches = re.match(pattern, region) matches = re.match(pattern, region)
...@@ -220,53 +220,53 @@ def dic_handle(result_dic): ...@@ -220,53 +220,53 @@ def dic_handle(result_dic):
break break
except: except:
Province = '' Province = None
City = '' City = None
County = '' County = None
try: try:
BelongOrg = result_dic['登记机关'] BelongOrg = result_dic['登记机关']
except: except:
BelongOrg = '' BelongOrg = None
try: try:
Info = result_dic['人员规模'] Info = result_dic['人员规模']
except: except:
Info = '' Info = None
try: try:
can_bao = result_dic['参保人数'] can_bao = result_dic['参保人数']
except: except:
can_bao = '' can_bao = None
try: try:
OriginalName = result_dic['曾用名'] OriginalName = result_dic['曾用名']
except: except:
OriginalName = '' OriginalName = None
try: try:
EnglishName = result_dic['英文名称'] EnglishName = result_dic['英文名称']
except: except:
EnglishName = '' EnglishName = None
try: try:
IxCode = result_dic['进出口企业代码'] IxCode = result_dic['进出口企业代码']
except: except:
IxCode = '' IxCode = None
try: try:
Address = result_dic['地址'] Address = result_dic['地址']
except: except:
Address = '' Address = None
try: try:
Scope = result_dic['经营范围'] Scope = result_dic['经营范围']
except: except:
Scope = '' Scope = None
aa_dict = { aa_dict = {
'name': company_name, # 企业名称 'name': company_name, # 企业名称
'shortName': '', # 企业简称 'shortName': None, # 企业简称
'socialCreditCode': CreditCode, # 统一社会信用代码 'socialCreditCode': CreditCode, # 统一社会信用代码
'legalPerson': OperName, # 法定代表人 'legalPerson': OperName, # 法定代表人
'officialPhone': PhoneNumber, # 电话 'officialPhone': PhoneNumber, # 电话
...@@ -285,7 +285,7 @@ def dic_handle(result_dic): ...@@ -285,7 +285,7 @@ def dic_handle(result_dic):
'businessStartDate': TermStart, # 营业期限自 'businessStartDate': TermStart, # 营业期限自
'businessEndDate': TeamEnd, # 营业期限至 'businessEndDate': TeamEnd, # 营业期限至
'taxpayerQualification': TaxpayerType, # 纳税人资质 'taxpayerQualification': TaxpayerType, # 纳税人资质
'industry': SubIndustry, # 所属行业 'industry': None, # 所属行业
'region': region, 'region': region,
'province': Province, # 所属省 'province': Province, # 所属省
'city': City, # 所属市 'city': City, # 所属市
...@@ -565,7 +565,7 @@ if __name__ == '__main__': ...@@ -565,7 +565,7 @@ if __name__ == '__main__':
start_time = time.time() start_time = time.time()
# 获取企业信息 # 获取企业信息
# company_field = baseCore.redicPullData('BaseInfoEnterprise:gnqy_socialCode') # company_field = baseCore.redicPullData('BaseInfoEnterprise:gnqy_socialCode')
company_field = '91130000738711917Q||' company_field = '91110000710925016E||'
if company_field == 'end': if company_field == 'end':
# 本轮处理完毕,需要发送邮件,并且进入下一轮 # 本轮处理完毕,需要发送邮件,并且进入下一轮
baseCore.sendEmail(file_name) baseCore.sendEmail(file_name)
...@@ -604,14 +604,14 @@ if __name__ == '__main__': ...@@ -604,14 +604,14 @@ if __name__ == '__main__':
# category = company_field.split('|')[19] # category = company_field.split('|')[19]
# exchange = company_field.split('|')[20] # exchange = company_field.split('|')[20]
# listType = company_field.split('|')[21] # listType = company_field.split('|')[21]
ynDomestic = '1' ynDomestic = None
countryName = '' countryName = None
securitiesCode = '' securitiesCode = None
securitiesShortName = '' securitiesShortName = None
listingDate = '' listingDate = None
category = '' category = None
exchange = '' exchange = None
listType = '' listType = None
count = redaytowork(com_name, social_code, securitiesCode, securitiesShortName, listingDate, category, exchange, count = redaytowork(com_name, social_code, securitiesCode, securitiesShortName, listingDate, category, exchange,
listType, ynDomestic, countryName, file_name) listType, ynDomestic, countryName, file_name)
......
...@@ -56,7 +56,7 @@ class Token(): ...@@ -56,7 +56,7 @@ class Token():
query = { query = {
'fenghaoTime': {'$lt': 'updateTime'}, # 封号时间小于更新时间 'fenghaoTime': {'$lt': 'updateTime'}, # 封号时间小于更新时间
} }
result = db_storage.find_one(query, sort=[('updateTime', -1)]) result = db_storage.find_one(query, sort=[('updateTime', 1)])
cookies = result['cookies'] cookies = result['cookies']
id_token = result['_id'] id_token = result['_id']
return cookies, id_token return cookies, id_token
...@@ -77,7 +77,7 @@ class Token(): ...@@ -77,7 +77,7 @@ class Token():
filter = {'_id': ObjectId(id_token)} filter = {'_id': ObjectId(id_token)}
# 更新操作 # 更新操作
update = {'$set': {'fenghaoTime': datetime.datetime.now()}} update = {'$set': {'fenghaoTime': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}}
# 执行更新操作 # 执行更新操作
db_storage.update_one(filter, update) db_storage.update_one(filter, update)
...@@ -86,7 +86,7 @@ class Token(): ...@@ -86,7 +86,7 @@ class Token():
filter = {'_id': ObjectId(id_token)} filter = {'_id': ObjectId(id_token)}
# 更新操作 # 更新操作
update = {'$set': {'updateTtime': datetime.datetime.now()}} update = {'$set': {'updateTime': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}}
# 执行更新操作 # 执行更新操作
db_storage.update_one(filter, update) db_storage.update_one(filter, update)
......
...@@ -26,7 +26,7 @@ if __name__ == "__main__": ...@@ -26,7 +26,7 @@ if __name__ == "__main__":
name = input('所属用户:') name = input('所属用户:')
driver = create_driver() driver = create_driver()
driver.get(url) driver.get(url)
time.sleep(100) time.sleep(60)
cookies = driver.get_cookies() cookies = driver.get_cookies()
# print(driver.get_cookies()) # print(driver.get_cookies())
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论