Merge remote-tracking branch 'origin/master'

7bada3ac · LiuLiYuan · a237c960 · f0246e72 · 7bada3ac · 7bada3ac
--- a/base/BaseCore.py
+++ b/base/BaseCore.py
@@ -17,14 +17,15 @@ import langid
 # 注意 程序退出前 调用BaseCore.close() 关闭相关资源
 class BaseCore:
    # 序列号
    __seq = 0
    # 代理池 数据库连接
    __cnx_proxy =None
    __cursor_proxy = None
+    cnx = None
+    cursor = None
+    r = None
    # agent 池
    __USER_AGENT_LIST = [
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36',
@@ -392,7 +393,7 @@ class BaseCore:
    # 从Redis的List中获取并移除一个元素
    def redicPullData(self,key):
-        item = self.r.rpop(key)
+        item = self.r.lpop(key)
        return item.decode() if item else None
    # 获得脚本进程PID
@@ -480,7 +481,7 @@ class BaseCore:
    def writerToExcel(self,detailList,filename):
        # filename='baidu搜索.xlsx'
        # 读取已存在的xlsx文件
-        existing_data = pd.read_excel(filename,engine='openpyxl')
+        existing_data = pd.read_excel(filename,engine='openpyxl',dtype=str)
        # 创建新的数据
        new_data = pd.DataFrame(data=detailList)
        # 将新数据添加到现有数据的末尾

--- a/base/RedisPPData.py
+++ b/base/RedisPPData.py
 import time
+import pymysql
 from base import BaseCore
 from apscheduler.schedulers.blocking import BlockingScheduler
 basecore = BaseCore.BaseCore()
 log = basecore.getLogger()
+#144数据库
 cnx = basecore.cnx
 cursor = basecore.cursor
 r = basecore.r
+#11数据库
+cnx_ = pymysql.connect(host='114.116.44.11', user='root', password='f7s0&7qqtK', db='clb_project', charset='utf8mb4')
+cursor_ = cnx_.cursor()
 # # 连接到Redis
 # r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6)
 #
@@ -32,9 +39,9 @@ r = basecore.r
 #企业动态
 def NewsEnterprise():
    # #获取国内企业
-    # gn_query = "select SocialCode from EnterpriseInfo where Place = '1'"
+    gn_query = "select SocialCode from EnterpriseInfo where Place = '1'"
-    # cursor.execute(gn_query)
+    cursor.execute(gn_query)
-    # gn_result = cursor.fetchall()
+    gn_result = cursor.fetchall()
    #获取国外企业
    gw_query = "select SocialCode from EnterpriseInfo where Place = '2'"
    cursor.execute(gw_query)
@@ -42,20 +49,20 @@ def NewsEnterprise():
    gw_social_list = [item[0] for item in gw_result]
    #todo:打印长度
-    print(len(gw_social_list))
+    # print(len(gw_social_list))
-    # gn_social_list = [item[0] for item in gn_result]
+    gn_social_list = [item[0] for item in gn_result]
    print('=======')
    #将数据插入到redis中
    # for item in gn_social_list:
    #     r.rpush('NewsEnterprise:gnqy_socialCode', item)
-    count = 0
+    # count = 0
    for item in gw_social_list:
        r.rpush('NewsEnterprise:gwqy_socialCode', item)
-        count+=1
+    #     count+=1
-        print(item)
+    #     print(item)
-    print(count)
+    # print(count)
 #企业动态定时任务
 def NewsEnterprise_task():
    # 实例化一个调度器
@@ -140,6 +147,29 @@ def BaseInfoEnterprise_task():
        print('定时采集异常', e)
        pass
+#东方财富网财务数据
+def FinanceFromEast():
+    #从上市企业库中读取数据
+    sql_sel = '''select social_credit_code from sys_base_enterprise_ipo where category = '1' limit 10 '''
+    cursor_.execute(sql_sel)
+    finance = cursor_.fetchall()
+    finance_list = [item[0] for item in finance]
+    print('=======')
+    for item in finance_list:
+        r.rpush('FinanceFromEast:finance_socialCode', item)
+def FinanceFromEase_task():
+    # 实例化一个调度器
+    scheduler = BlockingScheduler()
+    # 每个季度执行一次
+    scheduler.add_job(FinanceFromEast, 'cron', month='1-12/3', day='1',hour=0, minute=0)
+    try:
+        # redisPushData  # 定时开始前执行一次
+        scheduler.start()
+    except Exception as e:
+        print('定时采集异常', e)
+        pass
 #微信公众号
 def WeiXingetFromSql():
    selectSql = "SELECT info_source_code from info_source where site_uri like '%mp.weixin.qq.com%'"
@@ -207,9 +237,6 @@ def FBS():
            r.rpush('NewsEnterpriseFbs:gnqy_socialCode', item)
 if __name__ == "__main__":
    start = time.time()
    # NoticeEnterprise()
@@ -222,8 +249,9 @@ if __name__ == "__main__":
    # FBS()
    # NoticeEnterprise_task()
    # AnnualEnterprise_task()
-    NoticeEnterprise()
+    # NoticeEnterprise()
+    FinanceFromEast()
    log.info(f'====={basecore.getNowTime(1)}=====添加数据成功======耗时：{basecore.getTimeCost(start,time.time())}===')
-    # cnx.close()
+    cnx_.close()
-    # cursor.close()
+    cursor_.close()
    # basecore.close()
--- a/comData/weixin_solo/oneWeixin.py
+++ b/comData/weixin_solo/oneWeixin.py
--- a/百度采集/baidu_comm/baiduSpider.py
+++ b/百度采集/baidu_comm/baiduSpider.py
 #coding=utf-8
@@ -266,13 +266,13 @@ class BaiduSpider(object):
                    break
                for detail in lists:
                    publishTag=detail['publishTag']
-                    if publishTag:
+                    # if publishTag:
-                        pubtime = datetime.datetime.strptime(publishTag, "%Y-%m-%d %H:%M:%S")
+                    #     pubtime = datetime.datetime.strptime(publishTag, "%Y-%m-%d %H:%M:%S")
-                        needDate='2022-01-01 00:00:00'
+                    #     needDate='2022-01-01 00:00:00'
-                        needTime = datetime.datetime.strptime(needDate, "%Y-%m-%d %H:%M:%S")
+                    #     needTime = datetime.datetime.strptime(needDate, "%Y-%m-%d %H:%M:%S")
-                        if pubtime < needTime:
+                    #     if pubtime < needTime:
-                            timeFlag = True
+                    #         timeFlag = True
-                            break
+                    #         break
                    is_member = self.r.sismember('pybaidu_baidu_'+self.wordsCode, durl)
                    if is_member:
                        continue
@@ -398,7 +398,7 @@ class BaiduSpider(object):
                    processitem=self.getProcessitem(bdetail)
                    try:
                        self.sendkafka(processitem)
-                        self.r.sadd('pybaidu_test_'+self.wordsCode, processitem['sourceAddress'])
+                        self.r.sadd('pybaidu_baidu_'+self.wordsCode, processitem['sourceAddress'])
                    except Exception as e:
                        self.logger.info("放入kafka失败！")
                    #插入数据库

--- a/百度采集/baidu_comm/baidutaskJob_loc.py
+++ b/百度采集/baidu_comm/baidutaskJob_loc.py
 # -*- coding: utf-8 -*-
@@ -190,29 +190,7 @@ if __name__ == '__main__':
    while True:
        try:
            codeList=[]
-            codeList.append('KW-20221114-0007')
+            codeList.append('KW-20230818-0003')
-            codeList.append('KW-20221114-0006')
-            codeList.append('KW-20221114-0005')
-            codeList.append('KW-20221114-0009')
-            codeList.append('KW-20221114-0011')
-            codeList.append('KW-20221114-0012')
-            codeList.append('KW-20221114-0013')
-            codeList.append('KW-20221114-0014')
-            codeList.append('KW-20221114-0018')
-            codeList.append('KW-20221213-0006')
-            codeList.append('KW-20221114-0008')
-            codeList.append('KW-20221114-0015')
-            codeList.append('KW-20221114-0016')
-            codeList.append('KW-20221114-0017')
-            codeList.append('KW-20221114-0019')
-            codeList.append('KW-20221114-0022')
-            codeList.append('KW-20221114-0023')
-            codeList.append('KW-20221114-0024')
-            codeList.append('KW-20221114-0025')
-            codeList.append('KW-20221114-0026')
-            codeList.append('KW-20221114-0027')
-            codeList.append('KW-20221114-0020')
-            codeList.append('KW-20221114-0021')
            for codeid in codeList:
                try:
                    # keymsg=baiduTaskJob.getkafka()

--- a/百度采集/baidu_comm/requirements.txt
+++ b/百度采集/baidu_comm/requirements.txt
@@ -12,6 +12,9 @@ pip install tqdm  -i https://pypi.douban.com/simple
 pip install goose3 -i https://mirrors.aliyun.com/pypi/simple
 pip install Beautifulsoup4 -i https://mirrors.aliyun.com/pypi/simple
 pip install langid -i https://mirrors.aliyun.com/pypi/simple/
+pip install jieba -i https://mirrors.aliyun.com/pypi/simple
 selenium==3.141.0
 selenium-wire==5.1.0