提交 70f8ebff 作者: 刘伟刚

Merge remote-tracking branch 'origin/master'

......@@ -17,14 +17,15 @@ import langid
# 注意 程序退出前 调用BaseCore.close() 关闭相关资源
class BaseCore:
# 序列号
__seq = 0
# 代理池 数据库连接
__cnx_proxy =None
__cursor_proxy = None
cnx = None
cursor = None
r = None
# agent 池
__USER_AGENT_LIST = [
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36',
......@@ -392,7 +393,7 @@ class BaseCore:
# 从Redis的List中获取并移除一个元素
def redicPullData(self,key):
item = self.r.rpop(key)
item = self.r.lpop(key)
return item.decode() if item else None
# 获得脚本进程PID
......@@ -480,7 +481,7 @@ class BaseCore:
def writerToExcel(self,detailList,filename):
# filename='baidu搜索.xlsx'
# 读取已存在的xlsx文件
existing_data = pd.read_excel(filename,engine='openpyxl')
existing_data = pd.read_excel(filename,engine='openpyxl',dtype=str)
# 创建新的数据
new_data = pd.DataFrame(data=detailList)
# 将新数据添加到现有数据的末尾
......
......@@ -20,7 +20,9 @@ headers={
cnx = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji',charset='utf8mb4')
cursor= cnx.cursor()
def getTotal(pageSize):
taskType = '股票代码/东方财富网'
def getTotal(pageSize,start):
total=0
for num in range(3):
try:
......@@ -34,10 +36,15 @@ def getTotal(pageSize):
content = baseCore.getSubStr(content, '{', '}')
retJson = json.loads(content)
total = retJson['data']['total']
response.close()
break
except Exception as e:
log.info(f"------第{num}次出错---{e}")
continue
exception = '链接失败'
state = 0
takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog('', taskType, state, takeTime, 'http://quote.eastmoney.com/center/gridlist.html?st=ChangePercent&sortType=C&sortRule=-1#hs_a_board', exception)
return total
......@@ -67,21 +74,30 @@ def getPageDta(pageIndex,pageSize,totalPage):
continue
else:
log.info(f"{gpdm}-------{name}---新增")
insertSql= f"insert into gpdm(gpdm,name,state,create_date) values ('{gpdm}','{name}',1,now())"
cursor.execute(insertSql)
cnx.commit()
insertSql= f"insert into gpdm(gpdm,name,state,create_date) values ('{gpdm}','{name}',1,now())"
cursor.execute(insertSql)
cnx.commit()
response.close()
log.info(f"【{pageIndex}/{totalPage}】-----------end,耗时{baseCore.getTimeCost(start, time.time())}")
break
except Exception as e:
log.info(f"------第{num}次出错---{e}")
continue
exception = f'第{pageIndex}页链接失败'
state = 0
takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog('', taskType, state, takeTime, '', exception)
def doJob():
pageSize=20
total=getTotal(pageSize)
start_time = time.time()
total=getTotal(pageSize,start_time)
if total==0:
exception = '股票代码总数为零'
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog('', taskType, state, takeTime, 'http://quote.eastmoney.com/center/gridlist.html?st=ChangePercent&sortType=C&sortRule=-1#hs_a_board', exception)
log.info(f"股票代码总数-----------{total},请检查")
return
log.info(f"股票代码总数-----------{total}")
......@@ -91,6 +107,9 @@ def doJob():
totalPage = total // pageSize + 1
for pageIndex in range(1, totalPage + 1):
getPageDta(pageIndex,pageSize,totalPage)
state = 1
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog('', taskType, state, takeTime, '', '')
# 释放资源
cursor.close()
cnx.close()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论