Merge remote-tracking branch 'origin/master'

610b0b53 · LiuLiYuan · bf4b15ac · e96f6a29 · 610b0b53 · 610b0b53
--- a/base/BaseCore.py
+++ b/base/BaseCore.py
@@ -7,21 +7,17 @@ import logbook
 import logbook.more
 # 核心工具包
 import pymysql
+import redis
 from selenium import webdriver
-# 注意 程序退出前 调用BaseCore.close() 关闭相关资源
 from selenium.webdriver.chrome.service import Service
+# 注意 程序退出前 调用BaseCore.close() 关闭相关资源
 class BaseCore:
    # 序列号
    __seq = 0
    # 代理池 数据库连接
-    __cnx_proxy = None
+    __cnx_proxy =None
    __cursor_proxy = None
-    # 基本信息 数据库连接
-    __cnx_infomation = None
-    __cursor_infomation = None
    # agent 池
    __USER_AGENT_LIST = [
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36',
@@ -215,26 +211,29 @@ class BaseCore:
        'Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5'
    ]
+    # 连接到Redis
+    r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6)
    def close(self):
        try:
            self.__cursor_proxy.close()
            self.__cnx_proxy.close()
-            self.__cursor_infomation.close()
+            self.cursor.close()
-            self.__cnx_infomation.close()
+            self.cnx.close()
-        except:
+        except :
            pass
    def __init__(self):
        self.__cnx_proxy = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='clb_project',
                                           charset='utf8mb4')
        self.__cursor_proxy = self.__cnx_proxy.cursor()
-        self.__cnx_infomation = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji',
+        self.cnx = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji',
-                                                charset='utf8mb4')
+                                   charset='utf8mb4')
-        self.__cursor_infomation = self.__cnx_infomation.cursor()
+        self.cursor = self.cnx.cursor()
        pass
    # 计算耗时
-    def getTimeCost(self, start, end):
+    def getTimeCost(self,start, end):
        seconds = int(end - start)
        m, s = divmod(seconds, 60)
        h, m = divmod(m, 60)
@@ -247,7 +246,6 @@ class BaseCore:
        else:
            ms = int((end - start) * 1000)
            return "%d毫秒" % (ms)
    # 当前时间格式化
    # 1 : 2001-01-01 12:00:00 %Y-%m-%d %H:%M:%S
    # 2 : 010101120000 %y%m%d%H%M%S
@@ -277,7 +275,7 @@ class BaseCore:
        return "ZZSN" + self.getNowTime(2) + str(self.__seq).zfill(3)
    # 日志格式
-    def logFormate(self, record, handler):
+    def logFormate(self,record, handler):
        formate = "[{date}] [{level}] [{filename}] [{func_name}] [{lineno}] {msg}".format(
            date=record.time,  # 日志时间
            level=record.level_name,  # 日志等级
@@ -287,9 +285,8 @@ class BaseCore:
            msg=record.message  # 日志内容
        )
        return formate
    # 获取logger
-    def getLogger(self, fileLogFlag=True, stdOutFlag=True):
+    def getLogger(self,fileLogFlag=True, stdOutFlag=True):
        dirname, filename = os.path.split(os.path.abspath(sys.argv[0]))
        dirname = os.path.join(dirname, "logs")
        filename = filename.replace(".py", "") + ".log"
@@ -338,25 +335,48 @@ class BaseCore:
            proxy_list.append(proxy)
        return proxy_list[random.randint(0, 3)]
-    # 字符串截取
+    #字符串截取
-    def getSubStr(self, str, beginStr, endStr):
+    def getSubStr(self,str,beginStr,endStr):
-        if beginStr == '':
+        if beginStr=='':
            pass
        else:
-            begin = str.find(beginStr)
+            begin=str.find(beginStr)
-            if begin == -1:
+            if begin==-1:
-                begin = 0
+                begin=0
-            str = str[begin:]
+            str=str[begin:]
-        if endStr == '':
+        if endStr=='':
            pass
        else:
-            end = str.rfind(endStr)
+            end=str.rfind(endStr)
-            if end == -1:
+            if end==-1:
                pass
            else:
-                str = str[0:end + 1]
+                str = str[0:end+1]
        return str
+    # def pullDateFromSql(self):
+    #     query = "select SocialCode from EnterpriseInfo "
+    #     self.cursor.execute(query)
+    #     result  = self.cursor.fetchall()
+    #     social_list = list(result)
+    #     return social_list
+    #
+    # def redisPushData(self,social_list):
+    #
+    #     #将数据插入到redis中
+    #     for item in social_list:
+    #         self.r.rpush('qy_socialCode', item)
+    # 从Redis的List中获取并移除一个元素
+    def redicPullData(self,type):
+        if type == 1:
+            gn_item = self.r.lpop('gnqy_socialCode')
+            return gn_item.decode() if gn_item else None
+        if type == 2:
+            gw_item = self.r.lpop('gwqy_socialCode')
+            return gw_item.decode() if gw_item else None
    # 获得脚本进程PID
    def getPID(self):
        PID = os.getpid()
@@ -384,7 +404,7 @@ class BaseCore:
        chrome_options.add_argument(
            'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36')
        driver = webdriver.Chrome(chrome_options=chrome_options, service=service)
-        with open('./stealth.min.js') as f:
+        with open('../../base/stealth.min.js') as f:
            js = f.read()
        driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
@@ -395,15 +415,15 @@ class BaseCore:
    # 根据社会信用代码获取企业信息
    def getInfomation(self, social_code):
        sql = f"SELECT * FROM EnterpriseInfo WHERE SocialCode = '{social_code}'"
-        self.__cursor_infomation.execute(sql)
+        self.cursor.execute(sql)
-        data = self.__cursor_infomation.fetchone()
+        data = self.cursor.fetchone()
        return data
    # 更新企业采集次数
    def updateRun(self, social_code, runType, count):
        sql_update = f"UPDATE EnterpriseInfo SET {runType} = {count} WHERE SocialCode = '{social_code}'"
-        self.__cursor_infomation.excute(sql_update)
+        self.cursor.execute(sql_update)
-        self.__cnx_infomation.commit()
+        self.cnx.commit()
    # 保存日志入库
    def recordLog(self, xydm, taskType, state, takeTime, url, e):
@@ -412,5 +432,10 @@ class BaseCore:
        pid = self.getPID()
        sql = "INSERT INTO LogTable(SocialCode,TaskType,state,TakeTime,url,CreateTime,ProcessIp,PID,Exception) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        values = [xydm, taskType, state, takeTime, url, createTime, ip, pid, e]
-        self.__cursor_infomation.excute(sql, values)
+        try:
-        self.__cnx_infomation.commit()
+            self.cursor.execute(sql, values)
+        except Exception as e:
+            print(e)
+        self.cnx.commit()
--- a/base/RedisPPData.py
+++ b/base/RedisPPData.py
+import time
+import pymysql
+import redis
+from base import BaseCore
+from apscheduler.schedulers.blocking import BlockingScheduler
+basecore = BaseCore.BaseCore()
+log = basecore.getLogger()
+# 连接到Redis
+r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6)
+cnx = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji',
+                           charset='utf8mb4')
+cursor = cnx.cursor()
+def pullDateFromSql():
+    gn_query = "select SocialCode from EnterpriseInfo where Place = '1' limit 1 "
+    cursor.execute(gn_query)
+    gn_result = cursor.fetchall()
+    gw_query = "select SocialCode from EnterpriseInfo where Place = '2' limit 1 "
+    cursor.execute(gw_query)
+    gw_result = cursor.fetchall()
+    gw_social_list = [item[0] for item in gw_result]
+    gn_social_list = [item[0] for item in gn_result]
+    return gn_social_list,gw_social_list
+def redisPushData():
+    print('=======')
+    gn_social_list,gw_social_list = pullDateFromSql()
+    #将数据插入到redis中
+    for item in gn_social_list:
+        r.rpush('gnqy_socialCode', item)
+    for item in gw_social_list:
+        r.rpush('gwqy_socialCode', item)
+# 从Redis的List中获取并移除一个元素
+def redicPullData(type):
+    gn_item = r.lpop('gn_socialCode')
+    gw_item = r.lpop('gw_socialCode')
+    #1 表示国内  2 表示国外
+    if type==1:
+        return gn_item.decode() if gn_item else None
+    if type==2:
+        return gw_item.decode() if gw_item else None
+def task(task_time):
+    # 实例化一个调度器
+    scheduler = BlockingScheduler()
+    # 每半分钟执行一次
+    scheduler.add_job(redisPushData, 'cron', second=task_time, max_instances=3)
+    # 每天早上9点执行一次
+    # scheduler.add_job(self.auto_tb(), 'cron', day='*', hour=12, minute=5, start_date='2021-12-16 09:00:00',end_date='2023-11-30 23:59:59')
+    try:
+        # redisPushData  # 定时开始前执行一次
+        scheduler.start()
+    except Exception as e:
+        print('定时采集异常', e)
+        pass
+if __name__ == "__main__":
+    start = time.time()
+    task_time = '*/10'
+    task(task_time)
+    log.info(f'====={basecore.getNowTime(1)}=====添加数据成功======耗时：{basecore.getTimeCost(start,time.time())}===')
\ No newline at end of file
--- a/base/smart/smart_extractor.py
+++ b/base/smart/smart_extractor.py
@@ -52,11 +52,6 @@ class SmartExtractor:
        # 支持语言
        self.goose = Goose({'stopwords_class': StopWordsChinese})
    def get_extraction_result(self, article, link_text=''):
        """
        获取采集结果：

--- a/comData/tcyQydt/tyc_qydt_add.py
+++ b/comData/tcyQydt/tyc_qydt_add.py
@@ -68,6 +68,9 @@ def beinWork(tyc_code, social_code):
            break
        except Exception as e:
            log.error(f"request请求异常----{m}-----{e}")
+            state = 0
+            takeTime = baseCore.getTimeCost(start_time, time.time())
+            baseCore.recordLog(social_code, taskType, state, takeTime, url, e)
            pass
    if (response.status_code == 200):
@@ -154,10 +157,9 @@ def beinWork(tyc_code, social_code):
            selects = cursor.fetchone()
            if selects:
                log.info(f'{tyc_code}-----{social_code}----{link}:已经存在')
-                # up_repetCount = up_repetCount + 1
-                # continue
                # todo:如果该条数据存在则说明该条数据之后的都已经采集完成，就可以跳出函数，执行下一个企业
-                # retData['up_state'] = True
                retData['up_okCount'] = up_okCount
                retData['up_errorCount'] = up_errorCount
                retData['up_repetCount'] = up_repetCount
@@ -274,7 +276,7 @@ def beinWork(tyc_code, social_code):
                baseCore.recordLog(social_code, taskType, state, takeTime, link, e)
        log.info(f"获取分页数据--{tyc_code}----分页{num}，耗时{baseCore.getTimeCost(start_page, time.time())}")
-    # retData['up_state'] = True
    retData['up_okCount'] = up_okCount
    retData['up_errorCount'] = up_errorCount
    retData['up_repetCount'] = up_repetCount
@@ -286,9 +288,9 @@ def beinWork(tyc_code, social_code):
 def doJob():
    while True:
        # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
-        social_code = ''
+        social_code = baseCore.redicPullData(1)
        # 判断 如果Redis中已经没有数据，则等待
-        if social_code == '':
+        if social_code == 'None':
            time.sleep(20)
            continue
        data = baseCore.getInfomation(social_code)
@@ -309,30 +311,10 @@ def doJob():
        runType = 'NewsRunCount'
        count += 1
        baseCore.updateRun(social_code, runType, count)
-        # up_state = retData['up_state']
        total = retData['total']
        up_okCount = retData['up_okCount']
        up_errorCount = retData['up_errorCount']
        up_repetCount = retData['up_repetCount']
-        # if up_state:
-        #     stateNum = 1
-        # else:
-        #     stateNum = 4
-        #
-        # # 取出数据库中okCount errorCount repetCount 并更新
-        # selectOrginSql = f"select okCount,errorCount,repetCount,total from ssqy_tyc where id={id}"
-        # cursor.execute(selectOrginSql)
-        # count_info = cursor.fetchone()
-        # okCount = count_info[0]
-        # errorCount = count_info[1]
-        # repetCount = count_info[2]
-        #
-        # updateEndSql = f"update ssqy_tyc set update_state={stateNum},up_okCount={up_okCount},up_errorCount={up_errorCount},up_repetCount={up_repetCount} ,date_time=now(),okCount={okCount+up_okCount},errorCount={errorCount+up_errorCount},repetCount={repetCount+up_repetCount},total={total} where id={id}"
-        # cursor.execute(updateEndSql)
-        # cnx.commit()
        log.info(
            f"{id}---{xydm}----{tycid}----结束处理，耗时{baseCore.getTimeCost(start_time, time.time())}---总数:{total}---成功数:{up_okCount}----失败数:{up_errorCount}--重复数:{up_repetCount}")
@@ -341,19 +323,7 @@ def doJob():
    # 释放资源
    baseCore.close()
 # Press the green button in the gutter to run the script.
 if __name__ == '__main__':
    doJob()
-    # link = 'https://m.thepaper.cn/newsDetail_forward_24049067'
-    # social_code = '915101006653023886'
-    # try:
-    #     sel_sql = '''select social_credit_code from brpa_source_article where source_address = %s and social_credit_code = %s and type='2' '''
-    #     print(sel_sql)
-    #     cursor.execute(sel_sql, (link,social_code))
-    #     aa = cursor.fetchone()
-    #     print(aa)
-    # except Exception as e:
-    #     print(e)
--- a/comData/yhcj/雅虎财经_企业动态.py
+++ b/comData/yhcj/雅虎财经_企业动态.py
 # 雅虎财经企业动态获取
 # 雅虎财经企业动态获取
+import json
 import time
-import pandas as pd
 import pymysql
-import requests
+from kafka import KafkaProducer
-from bs4 import BeautifulSoup
 from selenium.webdriver.common.by import By
-from selenium import webdriver
 from base.BaseCore import BaseCore
 baseCore = BaseCore()
-log= BaseCore.getLogger()
+log= baseCore.getLogger()
 #获取资讯详情
-def getZx(xydm,url,title,cnx):
+def getZx(xydm,url,title,cnx,path):
    start_time_content= time.time()
    try:
-        path = r'E:\chromedriver_win32\chromedriver.exe'
        driverContent = baseCore.buildDriver(path)
        driverContent.get(url)
        try:
@@ -53,29 +48,86 @@ def getZx(xydm,url,title,cnx):
            '2',
            'zh'
        ]
-        with cnx.cursor() as cursor:
-            try:
-                insert_sql = '''insert into brpa_source_article(social_credit_code,title,summary,content,publish_date,source_address,origin,author,type,lang) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
-                cursor.execute(insert_sql, tuple(list_info))
-                cnx.commit()
-            except Exception as e1:
+        try:
-                log.error("保存数据库失败")
+            insert_sql = '''insert into brpa_source_article(social_credit_code,title,summary,content,publish_date,source_address,origin,author,type,lang) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
-                e1 = str(e1) + '.........保存数据库失败'
+            cursor.execute(insert_sql, tuple(list_info))
-                return e1
+            cnx.commit()
-        log.info(f"文章耗时，耗时{baseCore.getTimeCost(start_time_content,time.time())}")
+        except Exception as e1:
-    except Exception  as e:
+            log.error("保存数据库失败")
+            e1 = str(e1) + '.........保存数据库失败'
+            return e1
+        log.info(f"文章耗时，耗时{baseCore.getTimeCost(start_time_content, time.time())}")
+        try:
+            sel_sql = "select article_id from brpa_source_article where source_address = %s and social_credit_code = %s"
+            cursor.execute(sel_sql, (url, social_code))
+            row = cursor.fetchone()
+            id = row[0]
+            time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+            # todo:插入一条数据，并传入kafka
+            dic_news = {
+                'attachmentIds': id,
+                'author': '',
+                'content': content,
+                'contentWithTag': content,
+                'createDate': time_now,
+                'deleteFlag': '0',
+                'id': '',
+                'keyWords': '',
+                'lang': 'zh',
+                'origin': '天眼查',
+                'publishDate': pub_time,
+                'sid': '1684032033495392257',
+                'sourceAddress': url,  # 原文链接
+                'summary': '',
+                'title': title,
+                'type': 2,
+                'socialCreditCode': social_code,
+                'year': pub_time[:4]
+            }
+            # print(dic_news)
+            # 将相应字段通过kafka传输保存
+            try:
+                producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'])
+                kafka_result = producer.send("researchReportTopic",
+                                             json.dumps(dic_news, ensure_ascii=False).encode('utf8'))
+                print(kafka_result.get(timeout=10))
+                dic_result = {
+                    'success': 'ture',
+                    'message': '操作成功',
+                    'code': '200',
+                }
+                log.info(dic_result)
+                # 传输成功,写入日志中
+                state = 1
+                takeTime = baseCore.getTimeCost(start_time, time.time())
+                baseCore.recordLog(social_code, taskType, state, takeTime, url, '')
+                # return True
+            except Exception as e:
+                dic_result = {
+                    'success': 'false',
+                    'message': '操作失败',
+                    'code': '204',
+                    'e': e
+                }
+                log.error(dic_result)
+                e = str(e) + '操作失败'
+                state = 0
+                takeTime = baseCore.getTimeCost(start_time, time.time())
+                baseCore.recordLog(social_code, taskType, state, takeTime, url, e)
+        except Exception as e:
+            log.info(f'传输失败:{social_code}----{url}')
+            e = '传输失败'
+            state = 0
+            takeTime = baseCore.getTimeCost(start_time, time.time())
+            baseCore.recordLog(social_code, taskType, state, takeTime, url, e)
+    except Exception as e:
        log.error("获取正文失败")
-        e = str(e)+'.........获取正文失败'
+        e = str(e) + '.........获取正文失败'
        return e
-    return ''
-path = r'E:\chromedriver_win32\chromedriver.exe'
-driver = baseCore.buildDriver(path)
-cnx = pymysql.connect(host='114.116.44.11', user='root', password='f7s0&7qqtK', db='dbScore', charset='utf8mb4')
 # 拖拽30次获取企业新闻
 def scroll(driver):
@@ -85,63 +137,65 @@ def scroll(driver):
        driver.execute_script(js)
        time.sleep(0.1)
+if __name__ == "__main__":
+    path = r'D:\chrome\chromedriver.exe'
+    driver = baseCore.buildDriver(path)
-while True:
+    cnx = pymysql.connect(host='114.116.44.11', user='root', password='f7s0&7qqtK', db='dbScore', charset='utf8mb4')
-    # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
+    cursor = cnx.cursor()
-    social_code = ''
+    while True:
-    # 判断 如果Redis中已经没有数据，则等待
+        # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
-    if social_code == '':
+        social_code= baseCore.redicPullData(2)
-        time.sleep(20)
-        continue
+        # 判断 如果Redis中已经没有数据，则等待
-    data = baseCore.getInfomation(social_code)
+        if social_code == 'None':
-    name = data[1]
+            time.sleep(20)
-    enname = data[5]
+            continue
-    gpdm = data[3]
+        data = baseCore.getInfomation(social_code)
-    xydm = data[2]
+        name = data[1]
-    taskType = '企业动态'
+        enname = data[5]
-    # 获取该企业对应项目的采集次数
+        gpdm = data[3]
-    count = data[17]
+        xydm = data[2]
-    start_time = time.time()
+        taskType = '企业动态'
-    if(gpdm==''):
+        # 获取该企业对应项目的采集次数
-        log.error(f"{name}--股票代码为空 跳过")
+        count = data[17]
-        e = '.......股票代码为空 跳过'
+        start_time = time.time()
-        state = 0
+        if(gpdm==''):
-        takeTime = baseCore.getTimeCost(start_time,time.time())
+            log.error(f"{name}--股票代码为空 跳过")
-        baseCore.recordLog(xydm,taskType,state,takeTime,'',e)
+            e = '.......股票代码为空 跳过'
-        continue
+            state = 0
-    url=f"https://finance.yahoo.com/quote/{gpdm}/press-releases?p={gpdm}"
+            takeTime = baseCore.getTimeCost(start_time,time.time())
-    driver.get(url)
+            baseCore.recordLog(xydm,taskType,state,takeTime,'',e)
-    scroll(driver)
+            continue
-    try:
+        url=f"https://finance.yahoo.com/quote/{gpdm}/press-releases?p={gpdm}"
-        news_div = driver.find_element(By.ID, 'summaryPressStream-0-Stream')
+        driver.get(url)
-    except Exception as e:
+        scroll(driver)
-        log.error(f"{name}--{gpdm}--没找到新闻元素")
-        e = str(e) + '.......没找到新闻元素'
-        state = 0
-        takeTime = baseCore.getTimeCost(start_time,time.time())
-        baseCore.recordLog(xydm,taskType,state,takeTime,url,e)
-        continue
-    news_lis =  news_div.find_elements(By.XPATH,"./ul/li")
-    log.info(f"{name}--{gpdm}--{len(news_lis)}条信息")
-    for i in range(0,len(news_lis)):
        try:
-            a_ele= news_lis[i].find_element(By.XPATH,"./div[1]/div[1]/div[2]/h3[1]/a")
+            news_div = driver.find_element(By.ID, 'summaryPressStream-0-Stream')
        except Exception as e:
-            log.error(f"{name}--{gpdm}--{i}----a标签没找到")
+            log.error(f"{name}--{gpdm}--没找到新闻元素")
-            e = str(e) + '.......a标签没找到'
+            e = str(e) + '.......没找到新闻元素'
            state = 0
            takeTime = baseCore.getTimeCost(start_time,time.time())
            baseCore.recordLog(xydm,taskType,state,takeTime,url,e)
            continue
-        news_url = a_ele.get_attribute("href").lstrip().strip().replace("'","''")
+        news_lis =  news_div.find_elements(By.XPATH,"./ul/li")
-        if(news_url.startswith("https://finance.yahoo.com")):
+        log.info(f"{name}--{gpdm}--{len(news_lis)}条信息")
-            pass
+        for i in range(0,len(news_lis)):
-        else:
+            try:
-            continue
+                a_ele= news_lis[i].find_element(By.XPATH,"./div[1]/div[1]/div[2]/h3[1]/a")
-        #判断url是否已经存在
+            except Exception as e:
-        with cnx.cursor() as cursor:
+                log.error(f"{name}--{gpdm}--{i}----a标签没找到")
+                e = str(e) + '.......a标签没找到'
+                state = 0
+                takeTime = baseCore.getTimeCost(start_time,time.time())
+                baseCore.recordLog(xydm,taskType,state,takeTime,url,e)
+                continue
+            news_url = a_ele.get_attribute("href").lstrip().strip().replace("'","''")
+            if(news_url.startswith("https://finance.yahoo.com")):
+                pass
+            else:
+                continue
+            #判断url是否已经存在
            sel_sql = '''select social_credit_code from brpa_source_article where source_address = %s and social_credit_code=%s '''
            cursor.execute(sel_sql, (news_url,xydm))
            selects = cursor.fetchall()
@@ -152,22 +206,24 @@ while True:
                takeTime = baseCore.getTimeCost(start_time,time.time())
                baseCore.recordLog(xydm,taskType,state,takeTime,news_url,e)
                continue
-        title = a_ele.text.lstrip().strip().replace("'","''")
+            title = a_ele.text.lstrip().strip().replace("'","''")
-        e = getZx(xydm,news_url,title,cnx)
+            e = getZx(xydm,news_url,title,cnx,path)
-        if e == '':
+            if e == '':
-            state = 1
+                state = 1
-        else:
+            else:
-            state = 0
+                state = 0
-        takeTime = baseCore.getTimeCost(start_time,time.time())
+            takeTime = baseCore.getTimeCost(start_time,time.time())
-        baseCore.recordLog(xydm,taskType,state,takeTime,news_url,e)
+            baseCore.recordLog(xydm,taskType,state,takeTime,news_url,e)
-        log.info(f"{name}--{gpdm}--{i}----{news_url}----------{news_url}")
+            log.info(f"{name}--{gpdm}--{i}----{news_url}")
-    log.info(f"{name}--{gpdm}--企业整体，耗时{baseCore.getTimeCost(start_time,time.time())}")
+        log.info(f"{name}--{gpdm}--企业整体，耗时{baseCore.getTimeCost(start_time,time.time())}")
-    # 信息采集完成后将该企业的采集次数更新
+        # 信息采集完成后将该企业的采集次数更新
-    runType = 'NewsRunCount'
+        runType = 'NewsRunCount'
-    count += 1
+        count += 1
-    baseCore.updateRun(social_code,runType,count)
+        baseCore.updateRun(social_code,runType,count)
-#释放资源
+    cursor.close()
-baseCore.close()
+    cnx.close()
\ No newline at end of file
+    #释放资源
+    baseCore.close()
\ No newline at end of file