提交 91b061ca 作者: 薛凌堃

动态解决过数据问题

上级 ebda8083
...@@ -56,6 +56,7 @@ def NewsEnterprise(): ...@@ -56,6 +56,7 @@ def NewsEnterprise():
cnx,cursor = connectSql() cnx,cursor = connectSql()
# #获取国内企业 # #获取国内企业
gn_query = "select SocialCode from EnterpriseInfo where Place = '1'" gn_query = "select SocialCode from EnterpriseInfo where Place = '1'"
# gn_query = "SELECT a.SocialCode From EnterpriseInfo a ,EnterpriseType b WHERE a.SocialCode = b.SocialCode AND b.type=13 AND a.Place=1"
cursor.execute(gn_query) cursor.execute(gn_query)
gn_result = cursor.fetchall() gn_result = cursor.fetchall()
#获取国外企业 #获取国外企业
...@@ -105,7 +106,7 @@ def NoticeEnterprise(): ...@@ -105,7 +106,7 @@ def NoticeEnterprise():
gn_social_list = [item[0] for item in gn_result] gn_social_list = [item[0] for item in gn_result]
print('=======') print('=======')
for item in gn_social_list: for item in gn_social_list:
r.rpush('NoticeEnterprise:gnqy_socialCode', item) r.rpush('NoticeEnterprise:gnqy_socialCode_add', item)
closeSql(cnx,cursor) closeSql(cnx,cursor)
#企业公告定时任务 #企业公告定时任务
...@@ -153,7 +154,7 @@ def BaseInfoEnterprise_task(): ...@@ -153,7 +154,7 @@ def BaseInfoEnterprise_task():
def CorPerson(): def CorPerson():
cnx, cursor = connectSql() cnx, cursor = connectSql()
# gn_query = "select SocialCode from EnterpriseInfo where Place = '1'" # gn_query = "select SocialCode from EnterpriseInfo where Place = '1'"
gn_query = "SELECT a.SocialCode From EnterpriseInfo a ,EnterpriseType b WHERE a.SocialCode = b.SocialCode AND b.type=8 AND a.Place=1" gn_query = "SELECT a.SocialCode From EnterpriseInfo a ,EnterpriseType b WHERE a.SocialCode = b.SocialCode AND b.type=13 AND a.Place=1"
cursor.execute(gn_query) cursor.execute(gn_query)
gn_result = cursor.fetchall() gn_result = cursor.fetchall()
cnx.commit() cnx.commit()
...@@ -563,12 +564,13 @@ if __name__ == "__main__": ...@@ -563,12 +564,13 @@ if __name__ == "__main__":
# kegaishifan() # kegaishifan()
# shuangbaiqiye() # shuangbaiqiye()
# zhuangjingtexind() # zhuangjingtexind()
NoticeEnterprise() # NoticeEnterprise()
# AnnualEnterpriseIPO() # AnnualEnterpriseIPO()
# AnnualEnterprise() # AnnualEnterprise()
# BaseInfoEnterprise()
# BaseInfoEnterpriseAbroad() # BaseInfoEnterpriseAbroad()
# NewsEnterprise_task() # NewsEnterprise_task()
# NewsEnterprise() NewsEnterprise()
# CorPerson() # CorPerson()
# china100() # china100()
# global100() # global100()
...@@ -585,7 +587,6 @@ if __name__ == "__main__": ...@@ -585,7 +587,6 @@ if __name__ == "__main__":
# AnnualEnterpriseUS() # AnnualEnterpriseUS()
# NoticeEnterprise_task() # NoticeEnterprise_task()
# AnnualEnterprise_task() # AnnualEnterprise_task()
# NoticeEnterprise()
# FinanceFromEast() # FinanceFromEast()
log.info(f'====={basecore.getNowTime(1)}=====添加数据成功======耗时:{basecore.getTimeCost(start,time.time())}===') log.info(f'====={basecore.getNowTime(1)}=====添加数据成功======耗时:{basecore.getTimeCost(start,time.time())}===')
import json import json
import os
import random import random
import subprocess
import requests, time, pymysql import requests, time, pymysql
import jieba import jieba
...@@ -35,8 +37,9 @@ headers = { ...@@ -35,8 +37,9 @@ headers = {
'Referer': 'https://www.tianyancha.com/', 'Referer': 'https://www.tianyancha.com/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51' 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51'
} }
cnx_ = baseCore.cnx #144数据库连接,使用连接池
cursor_ = baseCore.cursor cnx_ = baseCore.pool_caiji.connection()
cursor_ = cnx_.cursor()
taskType = '企业动态/天眼查' taskType = '企业动态/天眼查'
...@@ -48,7 +51,7 @@ def beinWork(tyc_code, social_code,start_time): ...@@ -48,7 +51,7 @@ def beinWork(tyc_code, social_code,start_time):
t = time.time() t = time.time()
url = f'https://capi.tianyancha.com/cloud-yq-news/company/detail/publicmsg/news/webSimple?_={t}&id={tyc_code}&ps={pageSize}&pn=1&emotion=-100&event=-100' url = f'https://capi.tianyancha.com/cloud-yq-news/company/detail/publicmsg/news/webSimple?_={t}&id={tyc_code}&ps={pageSize}&pn=1&emotion=-100&event=-100'
try: try:
for m in range(0, 3): for m in range(0,3):
ip = baseCore.get_proxy() ip = baseCore.get_proxy()
headers['User-Agent'] = baseCore.getRandomUserAgent() headers['User-Agent'] = baseCore.getRandomUserAgent()
response = requests.get(url=url, headers=headers, proxies=ip, verify=False) response = requests.get(url=url, headers=headers, proxies=ip, verify=False)
...@@ -57,11 +60,18 @@ def beinWork(tyc_code, social_code,start_time): ...@@ -57,11 +60,18 @@ def beinWork(tyc_code, social_code,start_time):
if (response.status_code == 200): if (response.status_code == 200):
pass pass
except Exception as e: except Exception as e:
#todo:重新放入redis中
baseCore.rePutIntoR('NoticeEnterprise:gnqy_socialCode',social_code)
log.error(f"{tyc_code}-----获取总数接口失败") log.error(f"{tyc_code}-----获取总数接口失败")
error = '获取总数接口失败' error = '获取总数接口失败'
state = 0 state = 0
takeTime = baseCore.getTimeCost(start_time, time.time()) takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, url, f'{error}----{e}') baseCore.recordLog(social_code, taskType, state, takeTime, url, f'{error}----{e}')
#获取当前进程pid
current_pid = baseCore.getPID()
#todo: 重新启动新进程,杀死当前进程
subprocess.Popen([sys.executable] + sys.argv)
os.kill(current_pid,9)
return retData return retData
try: try:
json_1 = json.loads(response.content.decode('utf-8')) json_1 = json.loads(response.content.decode('utf-8'))
...@@ -337,6 +347,6 @@ def doJob(): ...@@ -337,6 +347,6 @@ def doJob():
baseCore.close() baseCore.close()
if __name__ == '__main__': if __name__ == '__main__':
log.info(f'当前进程id为{baseCore.getPID()}')
doJob() doJob()
天眼查企业动态
天眼查企业动态
采集脚本调整 11.06 tyc_qydt_add.py:
原因:总是出现“获取总接口数失败”的错误,但是重新启动又不会报错,导致无法debug判断问题所在,
目前解决方法: 1.重新启动一个新的进程,杀死当前进程
`import os
import sys
import subprocess
sys.argv.append("新进程已启动")
# 重新启动新进程
subprocess.Popen([sys.executable] + sys.argv + ["当前进程"])
os._exit(0)
#获取当前进程pid
current_pid = baseCore.getPID()
#todo: 重新启动新进程,杀死当前进程
subprocess.Popen([sys.executable] + sys.argv)
os.kill(current_pid,9)`
2.使用数据库连接池避免数据库长时间未连接导致连接关闭
`cnx_ = baseCore.pool_caiji.connection()
cursor_ = cnx_.cursor()`
![img.png](img.png)
import os
import sys
import subprocess
import socket
# 获取本机IP
import time
# 获得脚本进程PID
def getPID():
PID = os.getpid()
return PID
# 重新启动一个新的进程并添加标识
def restart():
# 在新进程中打印标识
print("新进程已启动")
# 检查是否是新进程
def check_if_new_process():
# 检查是否有标识
if "新进程已启动" in sys.argv:
return True
return False
# 检查是否是当前进程
def check_if_current_process():
# 检查是否有标识
if "当前进程" in sys.argv:
return True
return False
while True:
if check_if_new_process():
# 在新进程中进行一些操作
print("这是新进程")
pid = getPID()
print(f'新前进程的ip为:{pid}')
if check_if_current_process():
print('新进程为当前进程')
sys.argv.remove('新进程已启动')
# 延迟一段时间,以便观察新进程运行
time.sleep(5)
else:
# 在当前进程中杀死自己并重新启动新进程
pid_ = getPID()
print(f"这是当前进程,准备重新启动,pid为{pid_}")
# 添加标识以标记为新进程
sys.argv.append("新进程已启动")
# 重新启动新进程
subprocess.Popen([sys.executable] + sys.argv + ["当前进程"])
os._exit(0)
# import os
# import sys
# import subprocess
#
# # 重新启动一个新的进程
# subprocess.Popen([sys.executable] + sys.argv)
#
# # 杀死当前进程
# os._exit(0)
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论