提交 91b061ca 作者: 薛凌堃

动态解决过数据问题

上级 ebda8083
......@@ -56,6 +56,7 @@ def NewsEnterprise():
cnx,cursor = connectSql()
# #获取国内企业
gn_query = "select SocialCode from EnterpriseInfo where Place = '1'"
# gn_query = "SELECT a.SocialCode From EnterpriseInfo a ,EnterpriseType b WHERE a.SocialCode = b.SocialCode AND b.type=13 AND a.Place=1"
cursor.execute(gn_query)
gn_result = cursor.fetchall()
#获取国外企业
......@@ -105,7 +106,7 @@ def NoticeEnterprise():
gn_social_list = [item[0] for item in gn_result]
print('=======')
for item in gn_social_list:
r.rpush('NoticeEnterprise:gnqy_socialCode', item)
r.rpush('NoticeEnterprise:gnqy_socialCode_add', item)
closeSql(cnx,cursor)
#企业公告定时任务
......@@ -153,7 +154,7 @@ def BaseInfoEnterprise_task():
def CorPerson():
cnx, cursor = connectSql()
# gn_query = "select SocialCode from EnterpriseInfo where Place = '1'"
gn_query = "SELECT a.SocialCode From EnterpriseInfo a ,EnterpriseType b WHERE a.SocialCode = b.SocialCode AND b.type=8 AND a.Place=1"
gn_query = "SELECT a.SocialCode From EnterpriseInfo a ,EnterpriseType b WHERE a.SocialCode = b.SocialCode AND b.type=13 AND a.Place=1"
cursor.execute(gn_query)
gn_result = cursor.fetchall()
cnx.commit()
......@@ -563,12 +564,13 @@ if __name__ == "__main__":
# kegaishifan()
# shuangbaiqiye()
# zhuangjingtexind()
NoticeEnterprise()
# NoticeEnterprise()
# AnnualEnterpriseIPO()
# AnnualEnterprise()
# BaseInfoEnterprise()
# BaseInfoEnterpriseAbroad()
# NewsEnterprise_task()
# NewsEnterprise()
NewsEnterprise()
# CorPerson()
# china100()
# global100()
......@@ -585,7 +587,6 @@ if __name__ == "__main__":
# AnnualEnterpriseUS()
# NoticeEnterprise_task()
# AnnualEnterprise_task()
# NoticeEnterprise()
# FinanceFromEast()
log.info(f'====={basecore.getNowTime(1)}=====添加数据成功======耗时:{basecore.getTimeCost(start,time.time())}===')
import json
import os
import random
import subprocess
import requests, time, pymysql
import jieba
......@@ -35,8 +37,9 @@ headers = {
'Referer': 'https://www.tianyancha.com/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51'
}
cnx_ = baseCore.cnx
cursor_ = baseCore.cursor
#144数据库连接,使用连接池
cnx_ = baseCore.pool_caiji.connection()
cursor_ = cnx_.cursor()
taskType = '企业动态/天眼查'
......@@ -48,7 +51,7 @@ def beinWork(tyc_code, social_code,start_time):
t = time.time()
url = f'https://capi.tianyancha.com/cloud-yq-news/company/detail/publicmsg/news/webSimple?_={t}&id={tyc_code}&ps={pageSize}&pn=1&emotion=-100&event=-100'
try:
for m in range(0, 3):
for m in range(0,3):
ip = baseCore.get_proxy()
headers['User-Agent'] = baseCore.getRandomUserAgent()
response = requests.get(url=url, headers=headers, proxies=ip, verify=False)
......@@ -57,11 +60,18 @@ def beinWork(tyc_code, social_code,start_time):
if (response.status_code == 200):
pass
except Exception as e:
#todo:重新放入redis中
baseCore.rePutIntoR('NoticeEnterprise:gnqy_socialCode',social_code)
log.error(f"{tyc_code}-----获取总数接口失败")
error = '获取总数接口失败'
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, url, f'{error}----{e}')
#获取当前进程pid
current_pid = baseCore.getPID()
#todo: 重新启动新进程,杀死当前进程
subprocess.Popen([sys.executable] + sys.argv)
os.kill(current_pid,9)
return retData
try:
json_1 = json.loads(response.content.decode('utf-8'))
......@@ -337,6 +347,6 @@ def doJob():
baseCore.close()
if __name__ == '__main__':
log.info(f'当前进程id为{baseCore.getPID()}')
doJob()
天眼查企业动态
天眼查企业动态
采集脚本调整 11.06 tyc_qydt_add.py:
原因:总是出现“获取总接口数失败”的错误,但是重新启动又不会报错,导致无法debug判断问题所在,
目前解决方法: 1.重新启动一个新的进程,杀死当前进程
`import os
import sys
import subprocess
sys.argv.append("新进程已启动")
# 重新启动新进程
subprocess.Popen([sys.executable] + sys.argv + ["当前进程"])
os._exit(0)
#获取当前进程pid
current_pid = baseCore.getPID()
#todo: 重新启动新进程,杀死当前进程
subprocess.Popen([sys.executable] + sys.argv)
os.kill(current_pid,9)`
2.使用数据库连接池避免数据库长时间未连接导致连接关闭
`cnx_ = baseCore.pool_caiji.connection()
cursor_ = cnx_.cursor()`
![img.png](img.png)
import os
import sys
import subprocess
import socket
# 获取本机IP
import time
# 获得脚本进程PID
def getPID():
PID = os.getpid()
return PID
# 重新启动一个新的进程并添加标识
def restart():
# 在新进程中打印标识
print("新进程已启动")
# 检查是否是新进程
def check_if_new_process():
# 检查是否有标识
if "新进程已启动" in sys.argv:
return True
return False
# 检查是否是当前进程
def check_if_current_process():
# 检查是否有标识
if "当前进程" in sys.argv:
return True
return False
while True:
if check_if_new_process():
# 在新进程中进行一些操作
print("这是新进程")
pid = getPID()
print(f'新前进程的ip为:{pid}')
if check_if_current_process():
print('新进程为当前进程')
sys.argv.remove('新进程已启动')
# 延迟一段时间,以便观察新进程运行
time.sleep(5)
else:
# 在当前进程中杀死自己并重新启动新进程
pid_ = getPID()
print(f"这是当前进程,准备重新启动,pid为{pid_}")
# 添加标识以标记为新进程
sys.argv.append("新进程已启动")
# 重新启动新进程
subprocess.Popen([sys.executable] + sys.argv + ["当前进程"])
os._exit(0)
# import os
# import sys
# import subprocess
#
# # 重新启动一个新的进程
# subprocess.Popen([sys.executable] + sys.argv)
#
# # 杀死当前进程
# os._exit(0)
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论