提交 70f8ebff 作者: 刘伟刚

Merge remote-tracking branch 'origin/master'

...@@ -17,14 +17,15 @@ import langid ...@@ -17,14 +17,15 @@ import langid
# 注意 程序退出前 调用BaseCore.close() 关闭相关资源 # 注意 程序退出前 调用BaseCore.close() 关闭相关资源
class BaseCore: class BaseCore:
# 序列号 # 序列号
__seq = 0 __seq = 0
# 代理池 数据库连接 # 代理池 数据库连接
__cnx_proxy =None __cnx_proxy =None
__cursor_proxy = None __cursor_proxy = None
cnx = None
cursor = None
r = None
# agent 池 # agent 池
__USER_AGENT_LIST = [ __USER_AGENT_LIST = [
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36',
...@@ -392,7 +393,7 @@ class BaseCore: ...@@ -392,7 +393,7 @@ class BaseCore:
# 从Redis的List中获取并移除一个元素 # 从Redis的List中获取并移除一个元素
def redicPullData(self,key): def redicPullData(self,key):
item = self.r.rpop(key) item = self.r.lpop(key)
return item.decode() if item else None return item.decode() if item else None
# 获得脚本进程PID # 获得脚本进程PID
...@@ -480,7 +481,7 @@ class BaseCore: ...@@ -480,7 +481,7 @@ class BaseCore:
def writerToExcel(self,detailList,filename): def writerToExcel(self,detailList,filename):
# filename='baidu搜索.xlsx' # filename='baidu搜索.xlsx'
# 读取已存在的xlsx文件 # 读取已存在的xlsx文件
existing_data = pd.read_excel(filename,engine='openpyxl') existing_data = pd.read_excel(filename,engine='openpyxl',dtype=str)
# 创建新的数据 # 创建新的数据
new_data = pd.DataFrame(data=detailList) new_data = pd.DataFrame(data=detailList)
# 将新数据添加到现有数据的末尾 # 将新数据添加到现有数据的末尾
......
...@@ -20,7 +20,9 @@ headers={ ...@@ -20,7 +20,9 @@ headers={
cnx = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji',charset='utf8mb4') cnx = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji',charset='utf8mb4')
cursor= cnx.cursor() cursor= cnx.cursor()
def getTotal(pageSize): taskType = '股票代码/东方财富网'
def getTotal(pageSize,start):
total=0 total=0
for num in range(3): for num in range(3):
try: try:
...@@ -34,10 +36,15 @@ def getTotal(pageSize): ...@@ -34,10 +36,15 @@ def getTotal(pageSize):
content = baseCore.getSubStr(content, '{', '}') content = baseCore.getSubStr(content, '{', '}')
retJson = json.loads(content) retJson = json.loads(content)
total = retJson['data']['total'] total = retJson['data']['total']
response.close()
break break
except Exception as e: except Exception as e:
log.info(f"------第{num}次出错---{e}") log.info(f"------第{num}次出错---{e}")
continue continue
exception = '链接失败'
state = 0
takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog('', taskType, state, takeTime, 'http://quote.eastmoney.com/center/gridlist.html?st=ChangePercent&sortType=C&sortRule=-1#hs_a_board', exception)
return total return total
...@@ -67,21 +74,30 @@ def getPageDta(pageIndex,pageSize,totalPage): ...@@ -67,21 +74,30 @@ def getPageDta(pageIndex,pageSize,totalPage):
continue continue
else: else:
log.info(f"{gpdm}-------{name}---新增") log.info(f"{gpdm}-------{name}---新增")
insertSql= f"insert into gpdm(gpdm,name,state,create_date) values ('{gpdm}','{name}',1,now())"
cursor.execute(insertSql)
cnx.commit()
insertSql= f"insert into gpdm(gpdm,name,state,create_date) values ('{gpdm}','{name}',1,now())" response.close()
cursor.execute(insertSql)
cnx.commit()
log.info(f"【{pageIndex}/{totalPage}】-----------end,耗时{baseCore.getTimeCost(start, time.time())}") log.info(f"【{pageIndex}/{totalPage}】-----------end,耗时{baseCore.getTimeCost(start, time.time())}")
break break
except Exception as e: except Exception as e:
log.info(f"------第{num}次出错---{e}") log.info(f"------第{num}次出错---{e}")
continue continue
exception = f'第{pageIndex}页链接失败'
state = 0
takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog('', taskType, state, takeTime, '', exception)
def doJob(): def doJob():
pageSize=20 pageSize=20
total=getTotal(pageSize) start_time = time.time()
total=getTotal(pageSize,start_time)
if total==0: if total==0:
exception = '股票代码总数为零'
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog('', taskType, state, takeTime, 'http://quote.eastmoney.com/center/gridlist.html?st=ChangePercent&sortType=C&sortRule=-1#hs_a_board', exception)
log.info(f"股票代码总数-----------{total},请检查") log.info(f"股票代码总数-----------{total},请检查")
return return
log.info(f"股票代码总数-----------{total}") log.info(f"股票代码总数-----------{total}")
...@@ -91,6 +107,9 @@ def doJob(): ...@@ -91,6 +107,9 @@ def doJob():
totalPage = total // pageSize + 1 totalPage = total // pageSize + 1
for pageIndex in range(1, totalPage + 1): for pageIndex in range(1, totalPage + 1):
getPageDta(pageIndex,pageSize,totalPage) getPageDta(pageIndex,pageSize,totalPage)
state = 1
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog('', taskType, state, takeTime, '', '')
# 释放资源 # 释放资源
cursor.close() cursor.close()
cnx.close() cnx.close()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论