提交 e6179927 作者: 薛凌堃

天眼查企业基本信息脚本调整

上级 c14c8e55
......@@ -411,6 +411,7 @@ def ifbeforename(company_url):
# 采集基本信息和工商信息
def spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, category, exchange, listType, ynDomestic, countryName, file_name):
qccid = company_url.split('company/')[1]
log.info(f'====={qccid}=====')
req_ = s.get(headers=headers, url=company_url)
com_soup = BeautifulSoup(req_.content, 'html.parser')
......@@ -494,7 +495,7 @@ def spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, ca
aa_dic['category'] = category
aa_dic['exchange'] = exchange
aa_dic['listingType'] = listType
# print(aa_dic)
print(aa_dic)
sendkafka(aa_dic)
# print(aa_dic)
......@@ -533,6 +534,7 @@ def spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, ca
aa_dic['exchange'] = exchange
aa_dic['listingType'] = listType
sendkafka(aa_dic)
print(aa_dic)
def remove_parentheses(text):
# 清除中文小括号
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论