提交 36713e81 作者: 丁双波

Merge remote-tracking branch 'origin/master'

...@@ -475,6 +475,7 @@ class BaseCore: ...@@ -475,6 +475,7 @@ class BaseCore:
except: except:
log = self.getLogger() log = self.getLogger()
log.info('=========数据库操作失败========') log.info('=========数据库操作失败========')
return data return data
# 更新企业采集次数 # 更新企业采集次数
...@@ -527,6 +528,13 @@ class BaseCore: ...@@ -527,6 +528,13 @@ class BaseCore:
token = self.cursor.fetchone()[0] token = self.cursor.fetchone()[0]
return token return token
#获取天眼查token
def GetTYCToken(self):
query = 'select token from TYC_token'
self.cursor.execute(query)
token = self.cursor.fetchone()[0]
return token
#检测语言 #检测语言
def detect_language(self, text): def detect_language(self, text):
# 使用langid.py判断文本的语言 # 使用langid.py判断文本的语言
......
...@@ -315,12 +315,13 @@ def FBS(): ...@@ -315,12 +315,13 @@ def FBS():
for item in gw_social_list: for item in gw_social_list:
r.rpush('NewsEnterpriseFbs:gwqy_socialCode', item) r.rpush('NewsEnterpriseFbs:gwqy_socialCode', item)
r.rpush('BaseInfoEnterpriseFbs:gwqy_social_code',item) # r.rpush('BaseInfoEnterpriseFbs:gwqy_social_code',item)
for item in gn_social_list: # for item in gn_social_list:
if not r.exists(item): # if not r.exists(item):
r.rpush('NewsEnterpriseFbs:gnqy_socialCode', item) # r.rpush('NewsEnterpriseFbs:gnqy_socialCode', item)
r.rpush('NoticeEnterpriseFbs:gnqy_socialCode',item) # r.rpush('CorPersonEnterpriseFbs:gnqy_socialCode', item)
r.rpush('BaseInfoEnterpriseFbs:gnqy_social_code',item) # r.rpush('NoticeEnterpriseFbs:gnqy_socialCode',item)
# r.rpush('BaseInfoEnterpriseFbs:gnqy_social_code',item)
closeSql(cnx,cursor) closeSql(cnx,cursor)
#将IPO的国外股票代码放到redis中 #将IPO的国外股票代码放到redis中
......
...@@ -37,6 +37,11 @@ def find_id_by_name(start,token,name): ...@@ -37,6 +37,11 @@ def find_id_by_name(start,token,name):
time.sleep(5) time.sleep(5)
continue continue
time.sleep(2) time.sleep(2)
#{'status': 40101, 'message': '无效的sessionToken!'}
if resp_dict['status']==40101:
KeyNo = False
log.info(f'====token失效====时间{baseCore.getTimeCost(start, time.time())}')
return KeyNo
try: try:
if resp_dict['result']['Result']: if resp_dict['result']['Result']:
result_dict = resp_dict['result']['Result'][0] result_dict = resp_dict['result']['Result'][0]
......
...@@ -19,7 +19,7 @@ jieba.cut("必须加载jieba") ...@@ -19,7 +19,7 @@ jieba.cut("必须加载jieba")
smart =smart_extractor.SmartExtractor('cn') smart =smart_extractor.SmartExtractor('cn')
baseCore = BaseCore() baseCore = BaseCore()
log = baseCore.getLogger() log = baseCore.getLogger()
cnx = pymysql.connect(host='114.116.44.11', user='root', password='f7s0&7qqtK', db='dbScore', charset='utf8mb4') cnx = pymysql.connect(host='114.116.44.11', user='caiji', password='f7s0&7qqtK', db='dbScore', charset='utf8mb4')
cursor= cnx.cursor() cursor= cnx.cursor()
cnx_ = baseCore.cnx cnx_ = baseCore.cnx
...@@ -37,7 +37,7 @@ headers = { ...@@ -37,7 +37,7 @@ headers = {
'Referer': 'https://www.tianyancha.com/', 'Referer': 'https://www.tianyancha.com/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51' 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51'
} }
taskType = '企业动态/天眼查/福布斯' taskType = '企业动态/天眼查'
def beinWork(tyc_code, social_code): def beinWork(tyc_code, social_code):
start_time = time.time() start_time = time.time()
time.sleep(3) time.sleep(3)
...@@ -154,11 +154,14 @@ def beinWork(tyc_code, social_code): ...@@ -154,11 +154,14 @@ def beinWork(tyc_code, social_code):
# 开始进行智能解析 # 开始进行智能解析
# lang = baseCore.detect_language(title) # lang = baseCore.detect_language(title)
# smart = smart_extractor.SmartExtractor(lang) # smart = smart_extractor.SmartExtractor(lang)
contentText = smart.extract_by_url(link).text #带标签正文
contentWithTag = smart.extract_by_url(link).text
#不带标签正文
content = smart.extract_by_url(link).cleaned_text
# time.sleep(3) # time.sleep(3)
except Exception as e: except Exception as e:
contentText = '' contentWithTag = ''
if contentText == '': if contentWithTag == '':
log.error(f'获取正文失败:--------{tyc_code}--------{num}--------{link}') log.error(f'获取正文失败:--------{tyc_code}--------{num}--------{link}')
e = '获取正文失败' e = '获取正文失败'
state = 0 state = 0
...@@ -174,7 +177,7 @@ def beinWork(tyc_code, social_code): ...@@ -174,7 +177,7 @@ def beinWork(tyc_code, social_code):
continue continue
try: try:
#todo:更换插入的库 #todo:更换插入的库
insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,author,type) values(%s,%s,%s,%s,%s)''' insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,type,create_time) values(%s,%s,%s,%s,now())'''
# 动态信息列表 # 动态信息列表
up_okCount = up_okCount + 1 up_okCount = up_okCount + 1
...@@ -182,14 +185,73 @@ def beinWork(tyc_code, social_code): ...@@ -182,14 +185,73 @@ def beinWork(tyc_code, social_code):
social_code, social_code,
link, link,
'天眼查', '天眼查',
source,
'2', '2',
] ]
cursor_.execute(insert_sql, tuple(list_info)) cursor_.execute(insert_sql, tuple(list_info))
cnx_.commit() cnx_.commit()
# 采集一条资讯记录一条,记录该企业采到了多少的资讯 # 采集一条资讯记录一条,记录该企业采到了多少的资讯
log.info(f'{social_code}----{link}:新增一条') log.info(f'{social_code}----{link}:新增一条')
# 采集一条资讯记录一条,记录该企业采到了多少的资讯
log.info(f'{social_code}----{link}:新增一条')
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# todo:插入一条数据,并传入kafka
dic_news = {
'attachmentIds': '',
'author': '',
'content': content,
'contentWithTag': contentWithTag,
'createDate': time_now,
'deleteFlag': '0',
'id': '',
'keyWords': '',
'lang': 'zh',
'origin': '天眼查',
'publishDate': time_format,
'sid': '1684032033495392257',
'sourceAddress': link, # 原文链接
'summary': info_page['abstracts'],
'title': title,
'type': 2,
'socialCreditCode': social_code,
'year': time_format[:4]
}
except Exception as e:
log.info(f'传输失败:{social_code}----{link}')
# e = '数据库传输失败'
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, link, e)
continue
try:
producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'])
kafka_result = producer.send("researchReportTopic",
json.dumps(dic_news, ensure_ascii=False).encode('utf8'))
print(kafka_result.get(timeout=10))
dic_result = {
'success': 'ture',
'message': '操作成功',
'code': '200',
}
log.info(dic_result)
# 传输成功,写入日志中
state = 1
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, link, '')
# return True
except Exception as e: except Exception as e:
dic_result = {
'success': 'false',
'message': '操作失败',
'code': '204',
'e': e
}
log.error(dic_result)
e = 'Kafka操作失败'
state = 0 state = 0
takeTime = baseCore.getTimeCost(start_time, time.time()) takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, link, e) baseCore.recordLog(social_code, taskType, state, takeTime, link, e)
...@@ -205,8 +267,9 @@ def doJob(): ...@@ -205,8 +267,9 @@ def doJob():
while True: while True:
start = time.time() start = time.time()
# 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息 # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息 天眼查ID19276488
social_code = baseCore.redicPullData('NewsEnterpriseFbs:gnqy_socialCode') # social_code = baseCore.redicPullData('NewsEnterpriseFbs:gnqy_socialCode')
social_code = '912301001275921118'
if social_code == None: if social_code == None:
time.sleep(20) time.sleep(20)
continue continue
...@@ -222,19 +285,25 @@ def doJob(): ...@@ -222,19 +285,25 @@ def doJob():
id = data[0] id = data[0]
xydm = data[2] xydm = data[2]
tycid = data[11] tycid = data[11]
if tycid == None: if tycid == None or tycid== '':
try: try:
retData = getTycIdByXYDM(xydm) retData = getTycIdByXYDM(xydm)
tycid = retData['tycData']['id'] if retData:
tycid = retData['id']
# todo:写入数据库 # todo:写入数据库
updateSql = f"update Enterprise set TYCID = '{tycid}' where SocialCode = '{xydm}'" updateSql = f"update EnterpriseInfo set TYCID = '{tycid}' where SocialCode = '{xydm}'"
cursor_.execute(updateSql) cursor_.execute(updateSql)
cnx_.commit() cnx_.commit()
else:
state = 0
takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
baseCore.rePutIntoR('NewsEnterpriseFbs:gnqy_socialCode', social_code)
except: except:
state = 0 state = 0
takeTime = baseCore.getTimeCost(start, time.time()) takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败') baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
baseCore.rePutIntoR('NewsEnterprise:gnqy_socialCode', social_code) baseCore.rePutIntoR('NewsEnterpriseFbs:gnqy_socialCode', social_code)
continue continue
count = data[17] count = data[17]
log.info(f"{id}---{xydm}----{tycid}----开始处理") log.info(f"{id}---{xydm}----{tycid}----开始处理")
...@@ -242,8 +311,10 @@ def doJob(): ...@@ -242,8 +311,10 @@ def doJob():
# 开始采集企业动态 # 开始采集企业动态
retData = beinWork(tycid, xydm) retData = beinWork(tycid, xydm)
# 信息采集完成后将该企业的采集次数更新
# baseCore.updateRun(xydm, runType, count) runType = 'NewsRunCount'
count += 1
baseCore.updateRun(xydm, runType, count)
total = retData['total'] total = retData['total']
up_okCount = retData['up_okCount'] up_okCount = retData['up_okCount']
up_errorCount = retData['up_errorCount'] up_errorCount = retData['up_errorCount']
...@@ -257,7 +328,7 @@ def doJob(): ...@@ -257,7 +328,7 @@ def doJob():
takeTime = baseCore.getTimeCost(start, time.time()) takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, '', f'获取企业信息失败--{e}') baseCore.recordLog(social_code, taskType, state, takeTime, '', f'获取企业信息失败--{e}')
time.sleep(5) time.sleep(5)
# break
cursor.close() cursor.close()
cnx.close() cnx.close()
# 释放资源 # 释放资源
......
...@@ -10,9 +10,15 @@ from base.BaseCore import BaseCore ...@@ -10,9 +10,15 @@ from base.BaseCore import BaseCore
requests.adapters.DEFAULT_RETRIES = 5 requests.adapters.DEFAULT_RETRIES = 5
baseCore = BaseCore() baseCore = BaseCore()
log = baseCore.getLogger() log = baseCore.getLogger()
headers={ # headers={
'X-AUTH-TOKEN':'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMzY4MzgxNjk4NCIsImlhdCI6MTY5MDE3ODYyOCwiZXhwIjoxNjkyNzcwNjI4fQ.VV3Zoa4RM5nVN8UXBc0-81KMGqLzTOme6rButeETGfFQi7p5h4ydg8CFrEsizr_iFwB3_BVaKR2o2xR-M4ipbQ', # 'X-AUTH-TOKEN':'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMzY4MzgxNjk4NCIsImlhdCI6MTY5MDE3ODYyOCwiZXhwIjoxNjkyNzcwNjI4fQ.VV3Zoa4RM5nVN8UXBc0-81KMGqLzTOme6rButeETGfFQi7p5h4ydg8CFrEsizr_iFwB3_BVaKR2o2xR-M4ipbQ',
'X-TYCID':'77e997401d5f11ee9e91d5a0fd3c0b83', # 'X-TYCID':'77e997401d5f11ee9e91d5a0fd3c0b83',
# 'version':'TYC-Web',
# 'Content-Type':'application/json;charset=UTF-8'
# }
headers = {
'X-TYCID':'30c1289042f511ee9182cd1e1bcaa517',
# 'X-AUTH-TOKEN': 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMzU5MjQ4MTgzOSIsImlhdCI6MTY5MjkzMzIxMiwiZXhwIjoxNjk1NTI1MjEyfQ.BKxDem8fpgeDHrIgm3qCoF76ueHtQSG1DggiTl4FAaoNKt4gem6NTX1XYndPXqVj9TXfl-8yp2kKE3jY66dyig',
'version':'TYC-Web', 'version':'TYC-Web',
'Content-Type':'application/json;charset=UTF-8' 'Content-Type':'application/json;charset=UTF-8'
} }
...@@ -27,6 +33,7 @@ def getTycIdByXYDM(xydm): ...@@ -27,6 +33,7 @@ def getTycIdByXYDM(xydm):
paramJsonData = {'keyword':xydm} paramJsonData = {'keyword':xydm}
try: try:
headers['User-Agent'] = baseCore.getRandomUserAgent() headers['User-Agent'] = baseCore.getRandomUserAgent()
headers['X-AUTH-TOKEN'] = baseCore.GetTYCToken()
response = requests.post(url,json=paramJsonData,headers=headers,verify=False, proxies=ip) response = requests.post(url,json=paramJsonData,headers=headers,verify=False, proxies=ip)
time.sleep(random.randint(3, 5)) time.sleep(random.randint(3, 5))
retJsonData =json.loads(response.content.decode('utf-8')) retJsonData =json.loads(response.content.decode('utf-8'))
...@@ -35,14 +42,14 @@ def getTycIdByXYDM(xydm): ...@@ -35,14 +42,14 @@ def getTycIdByXYDM(xydm):
retData['state'] = True retData['state'] = True
retData['tycData'] = retJsonData['data'][0] retData['tycData'] = retJsonData['data'][0]
response.close() response.close()
return retData return retData['tycData']
else: else:
log.error(f"{xydm}------{retJsonData}") log.error(f"{xydm}------{retJsonData}")
response.close() response.close()
return retData return retData['tycData']
except Exception as e: except:
log.error(f"{xydm}---exception---{e}") log.error(f"---{xydm}--天眼查token失效---")
return retData return retData['tycData']
# 更新天眼查企业基本信息 # 更新天眼查企业基本信息
......
...@@ -3,7 +3,6 @@ import json ...@@ -3,7 +3,6 @@ import json
import requests, time, pymysql import requests, time, pymysql
import jieba import jieba
import sys import sys
from kafka import KafkaProducer from kafka import KafkaProducer
from getTycId import getTycIdByXYDM from getTycId import getTycIdByXYDM
from base.BaseCore import BaseCore from base.BaseCore import BaseCore
...@@ -12,15 +11,15 @@ from base.smart import smart_extractor ...@@ -12,15 +11,15 @@ from base.smart import smart_extractor
# import BaseCore # import BaseCore
# from smart import smart_extractor # from smart import smart_extractor
import urllib3 import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# 初始化,设置中文分词 # 初始化,设置中文分词
jieba.cut("必须加载jieba") jieba.cut("必须加载jieba")
smart =smart_extractor.SmartExtractor('cn') smart =smart_extractor.SmartExtractor('cn')
baseCore = BaseCore() baseCore = BaseCore()
log = baseCore.getLogger() log = baseCore.getLogger()
cnx = pymysql.connect(host='114.116.44.11', user='root', password='f7s0&7qqtK', db='dbScore', charset='utf8mb4') cnx = pymysql.connect(host='114.116.44.11', user='caiji', password='f7s0&7qqtK', db='dbScore', charset='utf8mb4')
cursor = cnx.cursor() cursor = cnx.cursor()
pageSize = 10 pageSize = 10
headers = { headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
...@@ -134,10 +133,10 @@ def beinWork(tyc_code, social_code,start_time): ...@@ -134,10 +133,10 @@ def beinWork(tyc_code, social_code,start_time):
link = info_page['uri'] link = info_page['uri']
try: try:
sel_sql = '''select social_credit_code from brpa_source_article where source_address = %s and social_credit_code=%s and type='2' ''' sel_sql = '''select social_credit_code from brpa_source_article where source_address = %s and social_credit_code=%s and type='2' '''
cursor.execute(sel_sql, (link, social_code)) cursor_.execute(sel_sql, (link, social_code))
except Exception as e: except Exception as e:
print(e) print(e)
selects = cursor.fetchone() selects = cursor_.fetchone()
if selects: if selects:
log.info(f'{tyc_code}-----{social_code}----{link}:已经存在') log.info(f'{tyc_code}-----{social_code}----{link}:已经存在')
...@@ -156,7 +155,10 @@ def beinWork(tyc_code, social_code,start_time): ...@@ -156,7 +155,10 @@ def beinWork(tyc_code, social_code,start_time):
# 开始进行智能解析 # 开始进行智能解析
# lang = baseCore.detect_language(title) # lang = baseCore.detect_language(title)
# smart = smart_extractor.SmartExtractor(lang) # smart = smart_extractor.SmartExtractor(lang)
#带标签正文
contentText = smart.extract_by_url(link).text contentText = smart.extract_by_url(link).text
#不带标签正文
content = smart.extract_by_url(link).cleaned_text
# time.sleep(3) # time.sleep(3)
except Exception as e: except Exception as e:
contentText = '' contentText = ''
...@@ -175,36 +177,25 @@ def beinWork(tyc_code, social_code,start_time): ...@@ -175,36 +177,25 @@ def beinWork(tyc_code, social_code,start_time):
pass pass
continue continue
try: try:
insert_sql = '''insert into brpa_source_article(social_credit_code,title,summary,content,publish_date,source_address,origin,author,type,lang) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''' insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,type,create_time) values(%s,%s,%s,%s,now())'''
# 动态信息列表 # 动态信息列表
up_okCount = up_okCount + 1 up_okCount = up_okCount + 1
list_info = [ list_info = [
social_code, social_code,
title,
info_page['abstracts'], # 摘要
contentText, # 正文
time_format, # 发布时间
link, link,
'天眼查', '天眼查',
source,
'2', '2',
'zh'
] ]
cursor.execute(insert_sql, tuple(list_info)) cursor_.execute(insert_sql, tuple(list_info))
cnx.commit() cnx_.commit()
# 采集一条资讯记录一条,记录该企业采到了多少的资讯 # 采集一条资讯记录一条,记录该企业采到了多少的资讯
log.info(f'{social_code}----{link}:新增一条') log.info(f'{social_code}----{link}:新增一条')
sel_sql = "select article_id from brpa_source_article where source_address = %s and social_credit_code = %s"
cursor.execute(sel_sql, (link, social_code))
row = cursor.fetchone()
id = row[0]
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# todo:插入一条数据,并传入kafka # todo:插入一条数据,并传入kafka
dic_news = { dic_news = {
'attachmentIds': id, 'attachmentIds': '',
'author': '', 'author': '',
'content': contentText, 'content': content,
'contentWithTag': contentText, 'contentWithTag': contentText,
'createDate': time_now, 'createDate': time_now,
'deleteFlag': '0', 'deleteFlag': '0',
...@@ -222,7 +213,6 @@ def beinWork(tyc_code, social_code,start_time): ...@@ -222,7 +213,6 @@ def beinWork(tyc_code, social_code,start_time):
'year': time_format[:4] 'year': time_format[:4]
} }
except Exception as e: except Exception as e:
log.info(f'传输失败:{social_code}----{link}') log.info(f'传输失败:{social_code}----{link}')
e = '数据库传输失败' e = '数据库传输失败'
state = 0 state = 0
...@@ -237,7 +227,6 @@ def beinWork(tyc_code, social_code,start_time): ...@@ -237,7 +227,6 @@ def beinWork(tyc_code, social_code,start_time):
json.dumps(dic_news, ensure_ascii=False).encode('utf8')) json.dumps(dic_news, ensure_ascii=False).encode('utf8'))
print(kafka_result.get(timeout=10)) print(kafka_result.get(timeout=10))
dic_result = { dic_result = {
'success': 'ture', 'success': 'ture',
'message': '操作成功', 'message': '操作成功',
...@@ -250,7 +239,6 @@ def beinWork(tyc_code, social_code,start_time): ...@@ -250,7 +239,6 @@ def beinWork(tyc_code, social_code,start_time):
baseCore.recordLog(social_code, taskType, state, takeTime, link, '') baseCore.recordLog(social_code, taskType, state, takeTime, link, '')
# return True # return True
except Exception as e: except Exception as e:
dic_result = { dic_result = {
'success': 'false', 'success': 'false',
'message': '操作失败', 'message': '操作失败',
...@@ -269,12 +257,12 @@ def beinWork(tyc_code, social_code,start_time): ...@@ -269,12 +257,12 @@ def beinWork(tyc_code, social_code,start_time):
retData['up_repetCount'] = up_repetCount retData['up_repetCount'] = up_repetCount
return retData return retData
# 日志信息保存至现已创建好数据库中,因此并没有再对此前保存日志信息数据库进行保存 # 日志信息保存至现已创建好数据库中,因此并没有再对此前保存日志信息数据库进行保存
def doJob(): def doJob():
while True: while True:
# 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息 # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
social_code = baseCore.redicPullData('NewsEnterprise:gnqy_socialCode') # social_code = baseCore.redicPullData('NewsEnterprise:gnqy_socialCode')
social_code = '912301001275921118'
# 判断 如果Redis中已经没有数据,则等待 # 判断 如果Redis中已经没有数据,则等待
if social_code == None: if social_code == None:
time.sleep(20) time.sleep(20)
...@@ -291,28 +279,31 @@ def doJob(): ...@@ -291,28 +279,31 @@ def doJob():
id = data[0] id = data[0]
xydm = data[2] xydm = data[2]
tycid = data[11] tycid = data[11]
if tycid == None: if tycid == None or tycid == '':
try: try:
retData = getTycIdByXYDM(xydm) retData = getTycIdByXYDM(xydm)
tycid = retData['tycData']['id'] if retData:
#todo:写入数据库 tycid = retData['id']
updateSql = f"update Enterprise set TYCID = '{tycid}' where SocialCode = '{xydm}'" # todo:写入数据库
updateSql = f"update EnterpriseInfo set TYCID = '{tycid}' where SocialCode = '{xydm}'"
cursor_.execute(updateSql) cursor_.execute(updateSql)
cnx_.commit() cnx_.commit()
else:
state = 0
takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
log.info(f'======={social_code}====重新放入redis====')
baseCore.rePutIntoR('NewsEnterprise:gnqy_socialCode', social_code)
continue
except: except:
state = 0 state = 0
takeTime = baseCore.getTimeCost(start, time.time()) takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败') baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
baseCore.rePutIntoR('NewsEnterprise:gnqy_socialCode',social_code) baseCore.rePutIntoR('NewsEnterprise:gnqy_socialCode', social_code)
continue continue
count = data[17] count = data[17]
log.info(f"{id}---{xydm}----{tycid}----开始处理") log.info(f"{id}---{xydm}----{tycid}----开始处理")
start_time = time.time() start_time = time.time()
# updateBeginSql = f"update ssqy_tyc set update_state=2,date_time=now() where id={id}"
# cursor.execute(updateBeginSql)
# cnx.commit()
# 开始采集企业动态 # 开始采集企业动态
retData = beinWork(tycid, xydm,start_time) retData = beinWork(tycid, xydm,start_time)
# 信息采集完成后将该企业的采集次数更新 # 信息采集完成后将该企业的采集次数更新
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论