提交 7bada3ac 作者: LiuLiYuan

Merge remote-tracking branch 'origin/master'

......@@ -17,14 +17,15 @@ import langid
# 注意 程序退出前 调用BaseCore.close() 关闭相关资源
class BaseCore:
# 序列号
__seq = 0
# 代理池 数据库连接
__cnx_proxy =None
__cursor_proxy = None
cnx = None
cursor = None
r = None
# agent 池
__USER_AGENT_LIST = [
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36',
......@@ -392,7 +393,7 @@ class BaseCore:
# 从Redis的List中获取并移除一个元素
def redicPullData(self,key):
item = self.r.rpop(key)
item = self.r.lpop(key)
return item.decode() if item else None
# 获得脚本进程PID
......@@ -480,7 +481,7 @@ class BaseCore:
def writerToExcel(self,detailList,filename):
# filename='baidu搜索.xlsx'
# 读取已存在的xlsx文件
existing_data = pd.read_excel(filename,engine='openpyxl')
existing_data = pd.read_excel(filename,engine='openpyxl',dtype=str)
# 创建新的数据
new_data = pd.DataFrame(data=detailList)
# 将新数据添加到现有数据的末尾
......
import time
import pymysql
from base import BaseCore
from apscheduler.schedulers.blocking import BlockingScheduler
basecore = BaseCore.BaseCore()
log = basecore.getLogger()
#144数据库
cnx = basecore.cnx
cursor = basecore.cursor
r = basecore.r
#11数据库
cnx_ = pymysql.connect(host='114.116.44.11', user='root', password='f7s0&7qqtK', db='clb_project', charset='utf8mb4')
cursor_ = cnx_.cursor()
# # 连接到Redis
# r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6)
#
......@@ -32,9 +39,9 @@ r = basecore.r
#企业动态
def NewsEnterprise():
# #获取国内企业
# gn_query = "select SocialCode from EnterpriseInfo where Place = '1'"
# cursor.execute(gn_query)
# gn_result = cursor.fetchall()
gn_query = "select SocialCode from EnterpriseInfo where Place = '1'"
cursor.execute(gn_query)
gn_result = cursor.fetchall()
#获取国外企业
gw_query = "select SocialCode from EnterpriseInfo where Place = '2'"
cursor.execute(gw_query)
......@@ -42,20 +49,20 @@ def NewsEnterprise():
gw_social_list = [item[0] for item in gw_result]
#todo:打印长度
print(len(gw_social_list))
# gn_social_list = [item[0] for item in gn_result]
# print(len(gw_social_list))
gn_social_list = [item[0] for item in gn_result]
print('=======')
#将数据插入到redis中
# for item in gn_social_list:
# r.rpush('NewsEnterprise:gnqy_socialCode', item)
count = 0
# count = 0
for item in gw_social_list:
r.rpush('NewsEnterprise:gwqy_socialCode', item)
count+=1
print(item)
print(count)
# count+=1
# print(item)
# print(count)
#企业动态定时任务
def NewsEnterprise_task():
# 实例化一个调度器
......@@ -140,6 +147,29 @@ def BaseInfoEnterprise_task():
print('定时采集异常', e)
pass
#东方财富网财务数据
def FinanceFromEast():
#从上市企业库中读取数据
sql_sel = '''select social_credit_code from sys_base_enterprise_ipo where category = '1' limit 10 '''
cursor_.execute(sql_sel)
finance = cursor_.fetchall()
finance_list = [item[0] for item in finance]
print('=======')
for item in finance_list:
r.rpush('FinanceFromEast:finance_socialCode', item)
def FinanceFromEase_task():
# 实例化一个调度器
scheduler = BlockingScheduler()
# 每个季度执行一次
scheduler.add_job(FinanceFromEast, 'cron', month='1-12/3', day='1',hour=0, minute=0)
try:
# redisPushData # 定时开始前执行一次
scheduler.start()
except Exception as e:
print('定时采集异常', e)
pass
#微信公众号
def WeiXingetFromSql():
selectSql = "SELECT info_source_code from info_source where site_uri like '%mp.weixin.qq.com%'"
......@@ -207,9 +237,6 @@ def FBS():
r.rpush('NewsEnterpriseFbs:gnqy_socialCode', item)
if __name__ == "__main__":
start = time.time()
# NoticeEnterprise()
......@@ -222,8 +249,9 @@ if __name__ == "__main__":
# FBS()
# NoticeEnterprise_task()
# AnnualEnterprise_task()
NoticeEnterprise()
# NoticeEnterprise()
FinanceFromEast()
log.info(f'====={basecore.getNowTime(1)}=====添加数据成功======耗时:{basecore.getTimeCost(start,time.time())}===')
# cnx.close()
# cursor.close()
cnx_.close()
cursor_.close()
# basecore.close()
#coding=utf-8
#coding=utf-8
......@@ -266,13 +266,13 @@ class BaiduSpider(object):
break
for detail in lists:
publishTag=detail['publishTag']
if publishTag:
pubtime = datetime.datetime.strptime(publishTag, "%Y-%m-%d %H:%M:%S")
needDate='2022-01-01 00:00:00'
needTime = datetime.datetime.strptime(needDate, "%Y-%m-%d %H:%M:%S")
if pubtime < needTime:
timeFlag = True
break
# if publishTag:
# pubtime = datetime.datetime.strptime(publishTag, "%Y-%m-%d %H:%M:%S")
# needDate='2022-01-01 00:00:00'
# needTime = datetime.datetime.strptime(needDate, "%Y-%m-%d %H:%M:%S")
# if pubtime < needTime:
# timeFlag = True
# break
is_member = self.r.sismember('pybaidu_baidu_'+self.wordsCode, durl)
if is_member:
continue
......@@ -398,7 +398,7 @@ class BaiduSpider(object):
processitem=self.getProcessitem(bdetail)
try:
self.sendkafka(processitem)
self.r.sadd('pybaidu_test_'+self.wordsCode, processitem['sourceAddress'])
self.r.sadd('pybaidu_baidu_'+self.wordsCode, processitem['sourceAddress'])
except Exception as e:
self.logger.info("放入kafka失败!")
#插入数据库
......
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
......@@ -190,29 +190,7 @@ if __name__ == '__main__':
while True:
try:
codeList=[]
codeList.append('KW-20221114-0007')
codeList.append('KW-20221114-0006')
codeList.append('KW-20221114-0005')
codeList.append('KW-20221114-0009')
codeList.append('KW-20221114-0011')
codeList.append('KW-20221114-0012')
codeList.append('KW-20221114-0013')
codeList.append('KW-20221114-0014')
codeList.append('KW-20221114-0018')
codeList.append('KW-20221213-0006')
codeList.append('KW-20221114-0008')
codeList.append('KW-20221114-0015')
codeList.append('KW-20221114-0016')
codeList.append('KW-20221114-0017')
codeList.append('KW-20221114-0019')
codeList.append('KW-20221114-0022')
codeList.append('KW-20221114-0023')
codeList.append('KW-20221114-0024')
codeList.append('KW-20221114-0025')
codeList.append('KW-20221114-0026')
codeList.append('KW-20221114-0027')
codeList.append('KW-20221114-0020')
codeList.append('KW-20221114-0021')
codeList.append('KW-20230818-0003')
for codeid in codeList:
try:
# keymsg=baiduTaskJob.getkafka()
......
......@@ -12,6 +12,9 @@ pip install tqdm -i https://pypi.douban.com/simple
pip install goose3 -i https://mirrors.aliyun.com/pypi/simple
pip install Beautifulsoup4 -i https://mirrors.aliyun.com/pypi/simple
pip install langid -i https://mirrors.aliyun.com/pypi/simple/
pip install jieba -i https://mirrors.aliyun.com/pypi/simple
selenium==3.141.0
selenium-wire==5.1.0
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论