tradingview的动态采集

7715292a · 刘伟刚 · d82da41e · 7715292a · 7715292a · 7715292a
--- a/comData/caiwushuju/YAHOO财务数据tmp.py
+++ b/comData/caiwushuju/YAHOO财务数据tmp.py
+# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
+# @Author: MENG
+# @Time  : 2022-4-9
+import datetime
+import xlrd
+from selenium.webdriver.support.wait import WebDriverWait
+from tqdm import tqdm
+import pymongo
+import pymysql
+import time
+import requests
+from pyquery import PyQuery as pq
+from selenium import webdriver
+import json
+from requests.packages import urllib3
+urllib3.disable_warnings()
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import re
+from BaseCore import BaseCore
+import configparser
+import redis
+"""
+将请求链接分成三个
+https://finance.yahoo.com/quote/WMT/financials?p=WMT
+https://finance.yahoo.com/quote/WMT/balance-sheet?p=WMT
+https://finance.yahoo.com/quote/WMT/cash-flow?p=WMT
+雅虎财务数据流程修改
+1.从sys_base_enterprise_ipo获取到国外上市和台湾企业的股票代码 category 5,6
+2.从雅虎财经上请求获取财务数据和币种单位。
+3.将数据插入更新到表config_finance_data_sync 
+信息更新的字段 
+social_credit_code
+name
+stock_code
+content
+level_relation
+unit
+create_time
+4.将采集结果通知接口进行拉取数据处理
+"""
+class YahooCaiwu(object):
+    def __init__(self):
+        self.config = configparser.ConfigParser()
+        # 读取配置文件
+        self.config.read('config.ini')
+        baseCore=BaseCore()
+        self.logger=baseCore.getLogger()
+        self.r = redis.Redis(host=self.config.get('redis', 'host'),
+                             port=self.config.get('redis', 'port'),
+                             password=self.config.get('redis', 'pass'), db=6)
+        self.driver=self.get_webdriver()
+    # 雅虎财经处理表格
+    def deal_table(self,doc_resp):
+        all_dict = {}
+        resp1_table = doc_resp('#Col1-1-Financials-Proxy section div:nth-child(3)>div>div').children()
+        catalogue_title = pq(resp1_table[0]).text().split('\n')
+        doc_items = pq(resp1_table[1]).children()
+        if len(doc_items)<1:
+            resp1_table = doc_resp('#Col1-1-Financials-Proxy section div:nth-child(4)>div>div').children()
+            catalogue_title = pq(resp1_table[0]).text().split('\n')
+            doc_items = pq(resp1_table[1]).children()
+        catalogue_dict = {}
+        content_dict = {}
+        for doc_item in doc_items:
+            if pq(doc_item).text() == '':
+                continue
+            a = pq(pq(doc_item).children()[0]).text().split('\n')[0]
+            a_list = pq(pq(doc_item).children()[0]).text().split('\n')[1:]
+            content_dict[a] = a_list
+            b_dict = {}
+            for doc_item1 in pq(doc_item).children()[1]:
+                b = pq(pq(doc_item1).children()[0]).text().split('\n')[0]
+                if not b:
+                    continue
+                b_list = pq(pq(doc_item1).children()[0]).text().split('\n')[1:]
+                content_dict[b] = b_list
+                c_dict = {}
+                for doc_item2 in pq(doc_item1).children()[1]:
+                    c = pq(pq(doc_item2).children()[0]).text().split('\n')[0]
+                    if not c:
+                        continue
+                    c_list = pq(pq(doc_item2).children()[0]).text().split('\n')[1:]
+                    content_dict[c] = c_list
+                    d_dict = {}
+                    for doc_item3 in pq(doc_item2).children()[1]:
+                        d = pq(pq(doc_item3).children()[0]).text().split('\n')[0]
+                        if not d:
+                            continue
+                        d_list = pq(pq(doc_item3).children()[0]).text().split('\n')[1:]
+                        content_dict[d] = d_list
+                        e_dict = {}
+                        for doc_item4 in pq(doc_item3).children()[1]:
+                            e = pq(pq(doc_item4).children()[0]).text().split('\n')[0]
+                            if not e:
+                                continue
+                            e_list = pq(pq(doc_item4).children()[0]).text().split('\n')[1:]
+                            content_dict[e] = e_list
+                            f_dict = {}
+                            for doc_item5 in pq(doc_item4).children()[1]:
+                                f = pq(pq(doc_item5).children()[0]).text().split('\n')[0]
+                                if not f:
+                                    continue
+                                f_list = pq(pq(doc_item5).children()[0]).text().split('\n')[1:]
+                                content_dict[f] = f_list
+                                g_dict = {}
+                                for doc_item6 in pq(doc_item5).children()[1]:
+                                    g = pq(pq(doc_item6).children()[0]).text().split('\n')[0]
+                                    if not g:
+                                        continue
+                                    g_list = pq(pq(doc_item6).children()[0]).text().split('\n')[1:]
+                                    content_dict[g] = g_list
+                                    g_dict[g] = {}
+                                f_dict[f] = g_dict
+                            e_dict[e] = f_dict
+                        d_dict[d] = e_dict
+                    c_dict[c] = d_dict
+                b_dict[b] = c_dict
+            catalogue_dict[a] = b_dict
+        all_dict['表头'] = catalogue_title
+        all_dict['目录'] = catalogue_dict
+        all_dict['内容'] = content_dict
+        return all_dict
+    def get_webdriver(self):
+        chrome_options = webdriver.ChromeOptions()
+        chrome_options.add_argument('--disable-gpu')
+        chrome_options.add_argument('--ignore-certificate-errors')
+        chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
+        chrome_options.add_argument("--disable-blink-features=AutomationControlled")
+        chrome_options.add_argument("--start-maximized")
+        # chrome_options.add_argument('--headless')
+        chrome_options.binary_location = self.config.get('selenium', 'binary_location')
+        executable_path =self.config.get('selenium', 'chrome_driver')
+        driver = webdriver.Chrome(options=chrome_options, executable_path=executable_path)
+        return driver
+    def conn11(self):
+        conn = pymysql.Connect(host='114.116.44.11', port=3306, user='caiji', passwd='f7s0&7qqtK', db='clb_project',
+                               charset='utf8')
+        cursor = conn.cursor()
+        return conn,cursor
+    def getCodeFromRedis(self):
+        securitiescode=self.r.lpop('NoticeEnterprise:securities_code')
+        securitiescode = securitiescode.decode('utf-8')
+        return securitiescode
+    # 雅虎财经
+    def get_content2(self):
+        conn,cursor=self.conn11()
+        try:
+            sql1 = """select social_credit_code,securities_code,securities_short_name from sys_base_enterprise_ipo where exchange='8'  """  # and stock_code = "SYNH"
+            # sql1 = f"select social_credit_code,securities_code,securities_short_name from sys_base_enterprise_ipo where securities_code='{securitiescode}'  "  # and stock_code = "SYNH"
+            cursor.execute(sql1)
+            result_data = cursor.fetchall()
+        except Exception as e:
+            self.logger.info("数据查询异常！")
+        for data in result_data:
+            try:
+                data_list = list(data)
+                print(data_list)
+                social_credit_code = data_list[0]
+                stock = data_list[1]
+                securities_short_name = data_list[2] if data_list[2] is not None else ""
+                # content_sql = ''
+                self.logger.info(f"需要采集的股票代码{securities_short_name}")
+                stock2=str(stock)
+                if stock2.upper().endswith("HK") and stock2.upper().startswith("0") :
+                    stock2=stock2[1:]
+                url = f'https://finance.yahoo.com/quote/{stock2}/financials?p={stock2}'
+                try:
+                    print(f'正在采集：{url}')
+                    self.driver.get(url)
+                    # 等待页面加载完成
+                    wait = WebDriverWait(self.driver, 300)
+                    wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
+                    time.sleep(2)
+                    try:
+                        # driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
+                        self.driver.find_element(By.XPATH,'//div[@id="Col1-1-Financials-Proxy"]/section/div[2]/button/div/span[text()="Expand All"]').click()
+                        wait = WebDriverWait(self.driver, 60)
+                        wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
+                        time.sleep(2)
+                    except Exception as e:
+                        print(e)
+                        pass
+                    doc_resp1 = pq(self.driver.page_source)
+                    unit=self.get_unit(doc_resp1)
+                    financials1 = self.deal_table(doc_resp1)
+                    self.driver.find_element(By.XPATH,'//div/span[text()="Quarterly"]').click()
+                    time.sleep(5)
+                    try:
+                        self.driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
+                        time.sleep(5)
+                    except Exception as e:
+                        print(e)
+                        pass
+                    doc_resp2 = pq(self.driver.page_source)
+                    financials2 = self.deal_table(doc_resp2)
+                    self.driver.find_element(By.XPATH,'//div/span[text()="Balance Sheet"]').click()
+                    time.sleep(5)
+                    try: #//*[@id="Col1-1-Financials-Proxy"]/section/div[2]/button/div/span
+                        self.driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
+                        time.sleep(5)
+                    except Exception as e:
+                        print(e)
+                        pass
+                    doc_resp3 = pq(self.driver.page_source)
+                    financials3 = self.deal_table(doc_resp3)
+                    self.driver.find_element(By.XPATH,'//div/span[text()="Quarterly"]').click()
+                    time.sleep(5)
+                    try:
+                        self.driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
+                        time.sleep(5)
+                    except Exception as e:
+                        print(e)
+                        pass
+                    doc_resp4 = pq(self.driver.page_source)
+                    financials4 = self.deal_table(doc_resp4)
+                    self.driver.find_element(By.XPATH,'//div/span[text()="Cash Flow"]').click()
+                    time.sleep(5)
+                    try:
+                        self.driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
+                        time.sleep(5)
+                    except Exception as e:
+                        print(e)
+                        pass
+                    doc_resp5 = pq(self.driver.page_source)
+                    financials5 = self.deal_table(doc_resp5)
+                    self.driver.find_element(By.XPATH,'//div/span[text()="Quarterly"]').click()
+                    time.sleep(5)
+                    try:
+                        self.driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
+                        time.sleep(5)
+                    except Exception as e:
+                        print(e)
+                        pass
+                    doc_resp6 = pq(self.driver.page_source)
+                    financials6 = self.deal_table(doc_resp6)
+                    financials_dict = {
+                        '表1': financials1,
+                        '表2': financials2,
+                        '表3': financials3,
+                        '表4': financials4,
+                        '表5': financials5,
+                        '表6': financials6,
+                    }
+                    mu_lus = ''
+                    for i in range(1, 7):
+                        mu_lu = financials_dict[f'表{i}']['目录']
+                        mu_lu = json.dumps(mu_lu, ensure_ascii=False, indent=4)
+                        mu_lus += mu_lu + '&&&&'
+                    level_relation = mu_lus[:-4]
+                    financials = ''
+                    for i in range(1, 7):
+                        a_list = financials_dict[f'表{i}']['表头']
+                        for a in a_list:
+                            financials += a + '\n'
+                        b_dict = financials_dict[f'表{i}']['内容']
+                        for key, values in b_dict.items():
+                            financials += key + '\n'
+                            for b in values:
+                                financials += b + '\n'
+                        financials += '&&&&' + '\n'
+                    financials = financials.strip()
+                    content = financials[:-4].strip().replace('\n&&&&\n', '&&&&')
+                    # if content[:100] in str(content_sql).replace("\\n","\n"):
+                    #     print(f"{orc_id}:无最新数据")
+                    #     continue
+                    # sql = "UPDATE config_finance_data_sync SET level_relation=%s, content=%s  WHERE ID = %s"
+                    # val = (level_relation, content, orc_id)
+                    # cursor.execute(sql, val)
+                    # conn.commit()
+                    ipo_data={
+                        'social_credit_code':social_credit_code,
+                        'stock':stock,
+                        'securities_short_name':securities_short_name,
+                        'content':content,
+                        'level_relation':level_relation,
+                        'unit':unit,
+                        'origin_type':1
+                    }
+                    flag=self.dataToSql(conn,cursor,ipo_data)
+                    if flag:
+                        # get_url = f'http://192.168.1.49:8088/sync/finance/yh?securitiesCode={stock}'
+                        get_url = f'http://114.115.236.206:8088/sync/finance/yh?securitiesCode={stock}'
+                        try:
+                            resp = requests.get(get_url)
+                            print(resp.text)
+                            print('调用接口成功！！')
+                        except:
+                            with open('雅虎财经-财务数据_发送错误ID.txt', 'a', encoding='utf8')as f:
+                                f.write(stock + '\n')
+                except Exception as e:
+                    print(e)
+                    print(f'采集：{url}失败')
+                    self.driver.quit()
+                    time.sleep(10)
+                    self.driver=self.get_webdriver()
+            except:
+                # time.sleep(60 * 10)
+                self.driver.quit()
+                time.sleep(10)
+                self.driver=self.get_webdriver()
+                print('出错，重试中！')
+                continue
+        # self.driver.close()
+    def dataToSql(self,conn,cursor,ipo_data):
+        try:
+            social_credit_code=ipo_data['social_credit_code']
+            stock=ipo_data['stock']
+            securities_short_name=ipo_data['securities_short_name']
+            content=ipo_data['content']
+            level_relation=ipo_data['level_relation']
+            unit=ipo_data['unit']
+            origin_type=ipo_data['origin_type']
+            if len(unit) == 0:
+                return False
+            if len(content) == 0:
+                return False
+            if len(level_relation) == 0:
+                return False
+            # 检查记录是否存在
+            select_sql=f"SELECT * FROM config_finance_data_sync WHERE stock_code='{stock}'"
+            cursor.execute(select_sql)
+            existing_record = cursor.fetchone()
+            # 获取当前时间
+            current_time = datetime.datetime.now()
+            # 将时间转换为字符串
+            currentdate = current_time.strftime("%Y-%m-%d %H:%M:%S")
+            if existing_record:
+                # 记录已存在，执行更新操作
+                update_param=(social_credit_code,content,level_relation,origin_type,currentdate,stock)
+                update_sql=f"UPDATE config_finance_data_sync SET social_credit_code=%s , content=%s , level_relation=%s,origin_type=%s ,create_time=%s WHERE stock_code=%s "
+                cursor.execute(update_sql,update_param)
+                print('更新成功')
+            else:
+                insert_param=(social_credit_code,content,level_relation,unit,stock,origin_type,currentdate)
+                insert_sql=f"INSERT INTO config_finance_data_sync (social_credit_code, content,level_relation,unit,stock_code,origin_type,create_time) VALUES ( %s,%s, %s, %s, %s, %s, %s)"
+                # 记录不存在，执行插入操作
+                cursor.execute(insert_sql,insert_param)
+                print('插入成功')
+            # 提交事务
+            conn.commit()
+        except Exception as e:
+            return False
+        return True
+    def get_unit(self,doc_resp):
+        try:
+            resp1_table = doc_resp('#quote-header-info >div:nth-child(2)>div:nth-child(1)>div:nth-child(2)>span')
+            currency = pq(resp1_table[0]).text()
+            if 'Currency in' in currency:
+                result = re.findall(r'(?<=Currency in\s).*', currency)
+                currency=result[0]
+                if '(' in currency:
+                    currency=currency.split('(')[0]
+                currency=str(currency).upper()+'(千)'
+        except Exception as e:
+            currency=''
+        return currency
+    #对比指标计算
+    def calculateIndexReq(self):
+        get_url = 'http://114.115.236.206:8088/sync/calculateIndex'
+        try:
+            params={
+                'type':2
+            }
+            resp = requests.get(get_url,params=params)
+            print(resp.text)
+            text=json.loads(resp.text)
+            codee=text['code']
+            while codee==-200:
+                time.sleep(600)
+                resp = requests.get(get_url)
+                print(resp.text)
+                text=json.loads(resp.text)
+                codee=text['code']
+                if  codee==-200:
+                    break
+            print('调用接口成功！！')
+        except:
+            print('调用失败！')
+if __name__ == '__main__':
+    # parse_excel()
+    #get_content1()
+    yahoo=YahooCaiwu()
+    yahoo.get_content2()
+    yahoo.calculateIndexReq()
--- a/comData/caiwushuju/yahoo_stock.py
+++ b/comData/caiwushuju/yahoo_stock.py
+import pymysql
+import requests
+import json
+import time
+import datetime
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+from BaseCore import BaseCore
+baseCore=BaseCore()
+logger=baseCore.getLogger()
+def reqmsg(offset,operand):
+    header={
+        'Connection':'keep-alive',
+        'Content-Length':'268',
+        'sec-ch-ua':'"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
+        'sec-ch-ua-platform':'"Windows"',
+        'sec-ch-ua-mobile':'?0',
+        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
+        'Content-Type':'application/json',
+        'Accept':'*/*',
+        'Origin':'https://finance.yahoo.com',
+        'Sec-Fetch-Site':'same-site',
+        'Sec-Fetch-Mode':'cors',
+        'Sec-Fetch-Dest':'empty',
+        'Referer':'https://finance.yahoo.com/screener/unsaved/ff7f1d36-5088-4986-b7ed-4c7ee4f7af57?count=100&offset=0',
+        'Accept-Encoding':'gzip, deflate, br',
+        'Accept-Language':'zh-CN,zh;q=0.9',
+        'Cookie':'tbla_id=24af1dd7-fb87-4d22-a0cf-7c313202694c-tuct86184e4; gpp=DBAA; gpp_sid=-1; OTH=v=2&s=2&d=eyJraWQiOiIwMTY0MGY5MDNhMjRlMWMxZjA5N2ViZGEyZDA5YjE5NmM5ZGUzZWQ5IiwiYWxnIjoiUlMyNTYifQ.eyJjdSI6eyJndWlkIjoiMkZJTDdIQjVZVFcyN1NLWVFIQjNOTFU0RDQiLCJwZXJzaXN0ZW50Ijp0cnVlLCJzaWQiOiJZbXE5VW82MkRsd0QifX0.sVtMHG-HjplldMiy1GXA1thZlnwJusoLr5vAjuHOGIppAWCgZgTz4HuUAB4weZAfICtLge3MZbDnfnIDuHSm620aQ-8lc9RbpQ0_YtWbn50lbi13EgxHuDs7IDvozIqZ7Wji4DldEHGMezxWOqwzG6HeiWdu51gngtC0wYXtKGM; T=af=JnRzPTE2OTY3Mjc4NDAmcHM9THVnWDRYeE11MUNhSXkxcHFOb0pjUS0t&d=bnMBeWFob28BZwEyRklMN0hCNVlUVzI3U0tZUUhCM05MVTRENAFhYwFBRWVLWnVsbQFhbAF3ZWlnYW5nbGl1MTFAZ21haWwuY29tAXNjAW1icl9yZWdpc3RyYXRpb24BZnMBeGI5ODZlUmxJZ01nAXp6AWdNZ0lsQkE3RQFhAVFBRQFsYXQBZ01nSWxCAW51ATA-&kt=EAApyOTr6JKep3_MVia32x9bA--~I&ku=FAAfpFFRxC0lNQCwURloTuGtPI.ZMaMkip9vcBGgEYFfK9jscSEGovt9tf6JPudIpJ1LGwPF8XPDrQGyLdzpK0WeyodXshfKU_VWmF7zaHgEKwVTP6eyxJagSsjv_f.k4KH4UemJUDrEv6AlrYlxgrVtqn8oRdc0E6dse7_A.dyKxk-~E; F=d=GTRbFBs9vIsFBPqpFYbEBjWkFoKZ3VYPcOZjc86puK_qeukJy9prU1z2; PH=l=en-US; Y=v=1&n=4e4ri71j7l6on&l=lj3pdsegobxk43l22ihvwfahc77wvm4u4aao9056/o&p=n2svvhk00000000&r=1ce&intl=us; axids=gam=y-dy_7BtBG2uKuxS_spt7cbrFy7QoSkfWRrfb.CnwY_FJGvFqjAA---A&dv360=eS1Sci5ZRTQ5RTJ1RXBvZkRrM1JMM1pzM1Z0LlBqSkhYdEZnRUY1cmVqRVZWdGo4Z3hjM0NiMnVnTE5YVGFibjYzcFROS35B; gam_id=y-dy_7BtBG2uKuxS_spt7cbrFy7QoSkfWRrfb.CnwY_FJGvFqjAA---A; GUC=AQEACAJlI0llUkIdBwRd&s=AQAAAIsCEds8&g=ZSIDMg; A1=d=AQABBFnbm2QCEJD73Orb0UPzH5ts62DMAwYFEgEACAJJI2VSZdyia3sB_eMBAAcIKP9nYQIMhisIDy8f80k8wbm5XkoLgG-PKwkBBwoBCg&S=AQAAAo9aGFBw2wIEVJoc_Tspjbw; A3=d=AQABBFnbm2QCEJD73Orb0UPzH5ts62DMAwYFEgEACAJJI2VSZdyia3sB_eMBAAcIKP9nYQIMhisIDy8f80k8wbm5XkoLgG-PKwkBBwoBCg&S=AQAAAo9aGFBw2wIEVJoc_Tspjbw; cmp=t=1696898974&j=0&u=1YNN; PRF=t%3DLMT%252BAAPL%252BAAPL.BA%252BTM.BA%252B%255EIXIC%252BISP%252B032830.KS%252BABG.JO%252BWCC%252BAHT.L%252BCPB%252BVMUK.L%252BRAJESHEXPO.NS%252B8128.HK%252B5019.T%26newChartbetateaser%3D1; __gpi=UID=00000c5bc07ce289:T=1696900616:RT=1696900616:S=ALNI_MZVy68LYVM9slK8cg6vB3OeE3-uvw; A1S=d=AQABBFnbm2QCEJD73Orb0UPzH5ts62DMAwYFEgEACAJJI2VSZdyia3sB_eMBAAcIKP9nYQIMhisIDy8f80k8wbm5XkoLgG-PKwkBBwoBCg&S=AQAAAo9aGFBw2wIEVJoc_Tspjbw',
+    }
+    data={
+        "sortType": "DESC",
+        "sortField": "intradaymarketcap",
+        "quoteType": "EQUITY",
+        "offset": offset,
+        "query": {
+            "operator": "and",
+            "operands": [
+                {
+                    "operator": "or",
+                    "operands": [operand]
+                }
+            ]
+        },
+        "size": 100,
+        "userId": "2FIL7HB5YTW27SKYQHB3NLU4D4",
+        "userIdType": "guid"
+    }
+    proxy = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
+    url='https://query2.finance.yahoo.com/v1/finance/screener?crumb=sZDWS3KsXAl&lang=en-US&region=US&formatted=true&corsDomain=finance.yahoo.com'
+    for i in range(0,3):
+        try:
+            response=requests.post(url=url,data=json.dumps(data),headers=header,verify=False,timeout=20,proxies=proxy)
+            stockmsg=response.json()
+        except Exception as e:
+            stockmsg=''
+            logger.info(f"第offset={offset}页请求失败{e}")
+        if stockmsg:
+            logger.info(f"第offset={offset}页请求成功")
+            break
+    return stockmsg
+def listPage():
+    operands=[
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "ar"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "au"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "ch"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "cn"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "de"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "ee"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "at"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "ca"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "be"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "cl"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "cz"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "dk"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "eg"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "fi"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "br"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "es"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "fr"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "gb"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "hk"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "id"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "gr"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "hu"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "il"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "ie"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "it"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "in"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "kr"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "is"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "jp"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "kw"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "lt"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "lk"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "lv"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "mx"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "nl"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "nz"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "my"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "no"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "ph"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "pe"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "vn"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "us"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "tr"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "sr"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "za"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "ve"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "th"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "sg"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "tw"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "sa"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "qa"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "pl"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "se"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "ru"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "pt"
+            ]
+        },
+        {
+            "operator": "EQ",
+            "operands": [
+                "region",
+                "pk"
+            ]
+        }
+    ]
+    for operand in operands:
+        logger.info(f'采集地域股票信息{operand}')
+        #第一次请求获取地域总共有的股票代码数量
+        try:
+            stockmsg=reqmsg(0,operand)
+            total=stockmsg['finance']['result'][0]['total']
+        except Exception as e:
+            logger.info(f'region该地域没有股票信息{operand}')
+            continue
+        for i in range(0,total,100):
+            logger.info(f"offset的值{i}")
+            stockmsg=reqmsg(i,operand)
+            if stockmsg:
+                try:
+                    getStock(stockmsg)
+                except Exception as e:
+                    logger.info(f"解析失败{e}")
+                    time.sleep(3)
+def getStock(stockmsg):
+    quotes=stockmsg['finance']['result'][0]['quotes']
+    for quote in quotes:
+        symbol=quote['symbol']
+        try:
+            longName=quote['longName']
+        except:
+            longName=''
+        try:
+            exchange=quote['exchange']
+        except:
+            exchange=''
+        try:
+            fullExchangeName=quote['fullExchangeName']
+        except:
+            fullExchangeName=''
+        try:
+            financialCurrency=quote['financialCurrency']
+        except:
+            financialCurrency=''
+        try:
+            market=quote['market']
+        except:
+            market=''
+        try:
+            shortName=quote['shortName']
+        except:
+            shortName=''
+        quotmsg={
+            'symbol':symbol,
+            'longName':longName,
+            'exchange':exchange,
+            'fullExchangeName':fullExchangeName,
+            'financialCurrency':financialCurrency,
+            'market':market,
+            'shortName':shortName
+        }
+        dataToSql(quotmsg)
+def conn144():
+    conn = pymysql.Connect(host='114.115.159.144', port=3306, user='caiji', passwd='zzsn9988', db='caiji',
+                           charset='utf8')
+    cursor = conn.cursor()
+    return conn,cursor
+def dataToSql(quotmsg):
+    conn,cursor=conn144()
+    try:
+        symbol=quotmsg['symbol']
+        longName=quotmsg['longName']
+        exchange=quotmsg['exchange']
+        fullExchangeName=quotmsg['fullExchangeName']
+        financialCurrency=quotmsg['financialCurrency']
+        market=quotmsg['market']
+        shortName=quotmsg['shortName']
+        # 检查记录是否存在
+        select_sql=f"SELECT * FROM yahoostock WHERE symbol='{symbol}'"
+        cursor.execute(select_sql)
+        existing_record = cursor.fetchone()
+        # 获取当前时间
+        current_time = datetime.datetime.now()
+        # 将时间转换为字符串
+        currentdate = current_time.strftime("%Y-%m-%d %H:%M:%S")
+        if existing_record:
+            # 记录已存在，执行更新操作
+            # update_param=(symbol,longName,exchange,fullExchangeName,financialCurrency,market,shortName,currentdate)
+            # update_sql=f"UPDATE yahoostock SET social_credit_code=%s , content=%s , level_relation=%s,origin_type=%s ,create_time=%s WHERE stock_code=%s "
+            # cursor.execute(update_sql,update_param)
+            logger.info(f"股票代码已采集入库过{symbol}")
+        else:
+            insert_param=(symbol,longName,exchange,fullExchangeName,financialCurrency,market,shortName,currentdate)
+            insert_sql=f"INSERT INTO yahoostock (symbol, longName,exchange,fullExchangeName,financialCurrency,market,shortName,currentdate) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s)"
+            # 记录不存在，执行插入操作
+            cursor.execute(insert_sql,insert_param)
+            logger.info(f"{symbol}股票入库添加成功")
+        # 提交事务
+        conn.commit()
+    except Exception as e:
+        return False
+    return True
+if __name__ == '__main__':
+    logger.info(f"采集开始")
+    try:
+        listPage()
+    except Exception as e:
+        logger.info(f"程序异常退出{e}")
+    logger.info(f"采集结束")
--- a/comData/tradingview/baseCore.py
+++ b/comData/tradingview/baseCore.py
+# -*- coding: utf-8 -*-
+import os
+import random
+import sys
+import time
+import logbook
+import logbook.more
+# 核心工具包
+import pymysql
+import redis
+# 注意 程序退出前 调用BaseCore.close() 关闭相关资源
+class BaseCore:
+    # 序列号
+    __seq = 0
+    # 代理池 数据库连接
+    __cnx_proxy =None
+    __cursor_proxy = None
+    # agent 池
+    __USER_AGENT_LIST = [
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.29 Safari/525.13',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/531.4 (KHTML, like Gecko) Chrome/3.0.194.0 Safari/531.4',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.50 Safari/525.19',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.7 Safari/532.0',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; Lunascape 5.0 alpha2)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.7 Safari/532.2',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; ru-RU) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.10 Safari/532.0',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Maxthon;',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.1 (KHTML, like Gecko) Chrome/2.0.169.0 Safari/530.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; ja-JP; rv:1.7) Gecko/20040614 Firefox/0.9',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.810.0 Safari/535.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.0 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.6 (KHTML, like Gecko) Chrome/7.0.500.0 Safari/534.6',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; TencentTraveler)',
+        'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.4 (KHTML, like Gecko) Chrome/6.0.481.0 Safari/534.4',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.370.0 Safari/533.4',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.4.154.31 Safari/525.19',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.1.17) Gecko/20110123 (like Firefox/3.x) SeaMonkey/2.0.12',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB) AppleWebKit/534.1 (KHTML, like Gecko) Chrome/6.0.428.0 Safari/534.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; de-DE) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/7.0.540.0 Safari/534.10',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; de-DE) Chrome/4.0.223.3 Safari/532.2',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/12.0.702.0 Safari/534.24',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.42 Safari/525.19',
+        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.3 (KHTML, like Gecko) Chrome/4.0.227.0 Safari/532.3',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.8 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.8',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.460.0 Safari/534.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.463.0 Safari/534.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528.9 (KHTML, like Gecko) Chrome/2.0.157.0 Safari/528.9',
+        'Mozilla/5.0 (Windows NT 5.2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.794.0 Safari/535.1',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.694.0 Safari/534.24',
+        'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5',
+        'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:15.0) Gecko/20120427 Firefox/15.0a1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon; .NET CLR 1.1.4322)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.223.4 Safari/532.2',
+        'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.682.0 Safari/534.21',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/530.0 (KHTML, like Gecko) Chrome/2.0.182.0 Safari/531.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.9 (KHTML, like Gecko) Chrome/7.0.531.0 Safari/534.9',
+        'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; WOW64; Trident/6.0)',
+        'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.811.0 Safari/535.1',
+        'ozilla/5.0 (Windows; U; Windows NT 5.0; de-DE; rv:1.7.5) Gecko/20041108 Firefox/1.0',
+        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
+        'Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.127 Safari/533.4',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E) QQBrowser/6.9.11079.201',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
+        'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; zh-cn) Opera 8.50',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/7.0.0 Safari/700.13',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.4 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.6 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.1 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20041107 Firefox/0.9.2 StumbleUpon/1.994',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11 (.NET CLR 3.5.30729)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.7.5) Gecko/20041110 Firefox/1.0',
+        'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; en) Opera 8.0',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1 QQBrowser/6.9.11079.201',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0b4pre) Gecko/20100815 Minefield/4.0b4pre',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421 Minefield/3.0.2pre',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.6 Safari/530.5',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.0.3705)',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.21 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.792.0 Safari/535.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.1 (KHTML, like Gecko) Chrome/2.0.168.0 Safari/530.1',
+        'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20040913 Firefox/0.10',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.8 (KHTML, like Gecko) Chrome/2.0.177.1 Safari/530.8',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/533.17.8 (KHTML, like Gecko) Version/5.0.1 Safari/533.17.8',
+        'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.40 Safari/530.5',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.24 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528.10 (KHTML, like Gecko) Chrome/2.0.157.2 Safari/528.10',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.223.2 Safari/532.2',
+        'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; T312461)',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.461.0 Safari/534.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.0; rv:1.7.3) Gecko/20041001 Firefox/0.10.1',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)',
+        'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; de-DE) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.2 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/16.0 Firefox/16.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/531.3 (KHTML, like Gecko) Chrome/3.0.193.2 Safari/531.3',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.864.0 Safari/535.2',
+        'Mozilla/5.0 (Windows NT 5.2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.6 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 5.1; rv:2.1.1) Gecko/20110415 Firefox/4.0.2pre Fennec/4.0.1',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.801.0 Safari/535.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.212.0 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.0 Safari/532.5',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.697.0 Safari/534.24',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/7.0.548.0 Safari/534.10',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.652.0 Safari/534.17',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10 ChromePlus/1.5.2.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.0 Safari/532.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.7 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.342.2 Safari/533.2',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.4 Safari/532.1',
+        'Mozilla/5.0 (Windows NT 6.0; rv:2.1.1) Gecko/20110415 Firefox/4.0.2pre Fennec/4.0.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.0 Safari/525.19',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; sv-SE; rv:1.7.5) Gecko/20041108 Firefox/1.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.462.0 Safari/534.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; de-DE; rv:1.7.5) Gecko/20041122 Firefox/1.0',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; uZardWeb/1.0; Server_JP)',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; HCI0449; .NET CLR 1.0.3705)',
+        'Mozilla/4.0 (compatible; MSIE 5.0; Windows 98; DigExt); Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1);',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.23 Safari/530.5',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.208.0 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 6.0; rv:14.0) Gecko/20100101 Firefox/14.0.1',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.7 (KHTML, like Gecko) Chrome/2.0.176.0 Safari/530.7',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.678.0 Safari/534.21',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.21 Safari/532.0',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)',
+        'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; InfoPath.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.55 Safari/525.19',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0a1) Gecko/20110623 Firefox/7.0a1 Fennec/7.0a1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.724.100 Safari/534.30',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.33 Safari/534.3 SE 2.X MetaSr 1.0',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; WOW64; SV1; uZardWeb/1.0; Server_HK)',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)',
+        'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3',
+        'Mozilla/5.0 (Windows NT 6.0) yi; AppleWebKit/345667.12221 (KHTML, like Gecko) Chrome/23.0.1271.26 Safari/453667.1221',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/531.2 (KHTML, like Gecko) Chrome/3.0.191.3 Safari/531.2',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.39 Safari/530.5',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.1 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.38 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.27 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8b) Gecko/20050118 Firefox/1.0+',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; ja-JP; rv:1.7) Gecko/20040707 Firefox/0.9.2',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.0 Safari/532.0',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.4 (KHTML, like Gecko) Chrome/2.0.171.0 Safari/530.4',
+        'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648)',
+        'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; nl-NL; rv:1.7.5) Gecko/20041202 Firefox/1.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.204.0 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.6 Safari/532.2',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/528.8 (KHTML, like Gecko) Chrome/1.0.156.0 Safari/528.8',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/6.0)',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 1.0.3705; .NET CLR 2.0.50727; .NET CLR 1.1.4322)',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.43 Safari/534.7',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.15 Safari/534.13',
+        'Mozilla/5.0 (ipad Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.6 (KHTML, like Gecko) Chrome/7.0.498.0 Safari/534.6',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.43 Safari/530.5',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.208.0 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.19 (KHTML, like Gecko) Chrome/11.0.661.0 Safari/534.19',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-CA) AppleWebKit/534.13 (KHTML like Gecko) Chrome/9.0.597.98 Safari/534.13',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.201.1 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.201.1 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/530.6 (KHTML, like Gecko) Chrome/2.0.174.0 Safari/530.6',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.3.154.6 Safari/525.19',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.599.0 Safari/534.13',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.8 (KHTML, like Gecko) Chrome/7.0.521.0 Safari/534.8',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1b2pre) Gecko/20081015 Fennec/1.0a1',
+        'Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5'
+    ]
+    def close(self):
+        try:
+            self.__cursor_proxy.close()
+            self.__cnx_proxy.close()
+        except :
+            pass
+    def __init__(self):
+        self.__cnx_proxy = pymysql.connect(host='114.115.159.144', user='caiji', password='zzsn9988', db='clb_project',
+                                           charset='utf8mb4')
+        self.__cursor_proxy= self.__cnx_proxy.cursor()
+        # 连接到Redis
+        self.r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6)
+    # 计算耗时
+    def getTimeCost(self,start, end):
+        seconds = int(end - start)
+        m, s = divmod(seconds, 60)
+        h, m = divmod(m, 60)
+        if (h > 0):
+            return "%d小时%d分钟%d秒" % (h, m, s)
+        elif (m > 0):
+            return "%d分钟%d秒" % (m, s)
+        elif (seconds > 0):
+            return "%d秒" % (s)
+        else:
+            ms = int((end - start) * 1000)
+            return "%d毫秒" % (ms)
+    # 当前时间格式化
+    # 1 : 2001-01-01 12:00:00 %Y-%m-%d %H:%M:%S
+    # 2 : 010101120000 %y%m%d%H%M%S
+    # 时间戳 3:1690179526555  精确到秒
+    def getNowTime(self, type):
+        now_time = ""
+        if type == 1:
+            now_time = time.strftime("%Y-%m-%d %H:%M:%S")
+        if type == 2:
+            now_time = time.strftime("%y%m%d%H%M%S")
+        if type == 3:
+            now_time = int(time.time() * 1000)
+        return now_time
+    # 日志格式
+    def logFormate(self,record, handler):
+        formate = "[{date}] [{level}] [{filename}] [{func_name}] [{lineno}] {msg}".format(
+            date=record.time,  # 日志时间
+            level=record.level_name,  # 日志等级
+            filename=os.path.split(record.filename)[-1],  # 文件名
+            func_name=record.func_name,  # 函数名
+            lineno=record.lineno,  # 行号
+            msg=record.message  # 日志内容
+        )
+        return formate
+    # 获取logger
+    def getLogger(self,fileLogFlag=True, stdOutFlag=True):
+        dirname, filename = os.path.split(os.path.abspath(sys.argv[0]))
+        dirname = os.path.join(dirname, "logs")
+        filename = filename.replace(".py", "") + ".log"
+        if not os.path.exists(dirname):
+            os.mkdir(dirname)
+        logbook.set_datetime_format('local')
+        logger = logbook.Logger(filename)
+        logger.handlers = []
+        if fileLogFlag:  # 日志输出到文件
+            logFile = logbook.TimedRotatingFileHandler(os.path.join(dirname, filename), date_format='%Y-%m-%d',
+                                                       bubble=True, encoding='utf-8')
+            logFile.formatter = self.logFormate
+            logger.handlers.append(logFile)
+        if stdOutFlag:  # 日志打印到屏幕
+            logStd = logbook.more.ColorizedStderrHandler(bubble=True)
+            logStd.formatter = self.logFormate
+            logger.handlers.append(logStd)
+        return logger
+    # 获取随机的userAgent
+    def getRandomUserAgent(self):
+        return random.choice(self.__USER_AGENT_LIST)
+    # 获取代理
+    def get_proxy(self):
+        sql = "select proxy from clb_proxy"
+        self.__cursor_proxy.execute(sql)
+        proxy_lists = self.__cursor_proxy.fetchall()
+        ip_list = []
+        for proxy_ in proxy_lists:
+            ip_list.append(str(proxy_).replace("('", '').replace("',)", ''))
+        proxy_list = []
+        for str_ip in ip_list:
+            str_ip_list = str_ip.split('-')
+            proxyMeta = "http://%(host)s:%(port)s" % {
+                "host": str_ip_list[0],
+                "port": str_ip_list[1],
+            }
+            proxy = {
+                "HTTP": proxyMeta,
+                "HTTPS": proxyMeta
+            }
+            proxy_list.append(proxy)
+        return proxy_list[random.randint(0, 3)]
+    # def get_proxy(self):
+    #     ip_list = []
+    #     with self.__cursor_proxy as cursor:
+    #         sql_str = '''select PROXY from clb_proxy where id={} '''.format(random.randint(1, 12))
+    #         print(sql_str)
+    #         cursor.execute(sql_str)
+    #         rows = cursor.fetchall()
+    #         for row in tqdm(rows):
+    #             str_ip = row[0]
+    #             str_ip_list = str_ip.split('-')
+    #             proxyMeta = "http://%(host)s:%(port)s" % {
+    #                 "host": str_ip_list[0],
+    #                 "port": str_ip_list[1],
+    #             }
+    #             proxy = {
+    #                 "HTTP": proxyMeta,
+    #                 "HTTPS": proxyMeta
+    #             }
+    #             ip_list.append(proxy)
+    #
+    #     return  ip_list
+    # def get_proxyIPPort(self):
+    #     ip_list = []
+    #     with self.__cursor_proxy as cursor:
+    #         sql_str = '''select PROXY from clb_proxy where id={} '''.format(random.randint(1, 12))
+    #         print(sql_str)
+    #         cursor.execute(sql_str)
+    #         rows = cursor.fetchall()
+    #         for row in tqdm(rows):
+    #             str_ip = row[0]
+    #             str_ip_list = str_ip.split('-')
+    #             proxy = {
+    #                 "host": str_ip_list[0],
+    #                 "port": str_ip_list[1],
+    #             }
+    #
+    #             ip_list.append(proxy)
+    #
+    #     return  ip_list
+    #
\ No newline at end of file
--- a/comData/tradingview/entity.py
+++ b/comData/tradingview/entity.py
+# -*- coding: utf-8 -*-
+# 智能采集请求
+# 1、考虑：请求智能采集时，不再使用实体类
+#    a. 仍使用：通过HTTP的 raw 请求体，直接传递HTML源文件，通过query参数传递 lang-code、link-text 参数
+#    b. 原因：在 postman 中，不方便进行测试，无法使用粘贴后的HTML源文件
+# 2、不考虑：使用实体类，利大于弊
+#    a. 使用实体类，方便扩展参数字段
+#    b. 方便展示接口文档：调用 json_parameter_utility.get_json_parameters 函数，可显示请求实体类
+class ExtractionRequest:
+    # 语言代码
+    # 1、采集“非中文”的文章时，需要用到语言代码
+    lang_code = ""
+    # 链接文本
+    # 1、用于采集标题，如果不提供，标题的准确度会下降
+    link_text = ""
+    # 文章页面源文件
+    # 1、用于采集标题、发布时间、内容等
+    article_html = ""
+    @staticmethod
+    def from_dict(dictionary: dict):
+        extraction_request = ExtractionRequest()
+        # 尝试方法：
+        # 1、将字典，更新到内部的 __dict__ 对象
+        # extraction_request.__dict__.update(dictionary)
+        # 将字典值，设置到当前对象
+        for key in dictionary:
+            setattr(extraction_request, key, dictionary[key])
+        return extraction_request
+    def to_dict(self):
+        # 转换为字典对象：
+        # 1、序列化为JSON时，需要调用此方法
+        # 2、转换为JSON字符串：json.dumps(extraction_result, default=ExtractionResult.to_dict)
+        data = {}
+        # 借助内部的 __dict__ 对象
+        # 1、将内部的 __dict__ 对象，更新到新的字典对象中
+        data.update(self.__dict__)
+        return data
+# 采集结果
+class ExtractionResult:
+    # 标题
+    title = ""
+    # 发布日期
+    publish_date = ""
+    # 正文（保留所有HTML标记，如：br、img）
+    text = ""
+    # URL
+    url = ""
+    # 摘要
+    meta_description = ""
+    # 干净正文（不带HTML）
+    cleaned_text = ""
+    # 来源（目前只支持采集中文网站中的“来源”）
+    # source = ""
+    # 顶部图片（top_image：采集不到任何内容，不再使用此属性）
+    # top_image = ""
+    def to_dict(self):
+        # 转换为字典对象：
+        # 1、序列化为JSON时，需要调用此方法
+        # 2、转换为JSON字符串：json.dumps(extraction_result, default=ExtractionResult.to_dict)
+        data = {}
+        # 借助内部的 __dict__ 对象
+        # 1、将内部的 __dict__ 对象，更新到新的字典对象中
+        data.update(self.__dict__)
+        return data
+class UrlPickingRequest:
+    # 列表页面的响应URL
+    # 1、作为Base URL，用于拼接提取到的相对URL
+    # 2、Base URL：必须使用响应URL
+    # 3、示例：在 Python中，通过 requests.get(url) 请求URL后，需要使用 resp.url 作为 Base URL
+    list_page_resp_url = ""
+    # 列表页面源文件
+    # 1、用于提取文章网址
+    list_page_html = ""
+    @staticmethod
+    def from_dict(dictionary: dict):
+        url_picking_request = UrlPickingRequest()
+        # 将字典值，设置到当前对象
+        for key in dictionary:
+            setattr(url_picking_request, key, dictionary[key])
+        return url_picking_request
+    def to_dict(self):
+        # 转换为字典对象：
+        # 1、序列化为JSON时，需要调用此方法
+        # 2、转换为JSON字符串：json.dumps(extraction_result, default=ExtractionResult.to_dict)
+        data = {}
+        # 借助内部的 __dict__ 对象
+        # 1、将内部的 __dict__ 对象，更新到新的字典对象中
+        data.update(self.__dict__)
+        return data
--- a/comData/tradingview/smart_extractor.py
+++ b/comData/tradingview/smart_extractor.py
+# -*- coding: utf-8 -*-
+import requests
+from goose3 import Goose
+from goose3.text import StopWordsChinese, StopWordsKorean, StopWordsArabic
+from entity import *
+from smart_extractor_utility import SmartExtractorUtility
+# goose3自带的lxml，提示找不到etree，但仍可使用
+from lxml import etree
+from lxml.html import HtmlElement
+class SmartExtractor:
+    @staticmethod
+    def get_supported_lang_code_dict():
+        """
+        支持语言：
+        1、需要分词，传递分词器（3种）：
+           a. 中文、韩语、阿拉伯语
+        2、不需要分词，直接传递语言编码（16种）
+           a. 其中英语、俄语，单独测试
+        """
+        supported_lang_code_dict = {
+            'cn': '中文',  # 中文
+            'zh-cn': '简体中文',  # 简体中文
+            'zh': '简体中文',  # 简体中文
+            'ko': '韩语',  # 韩语
+            'ar': '阿拉伯语',  # 阿拉伯语
+            'en': '英语',  # 英语
+            'ru': '俄语',  # 俄语
+            'da': '丹麦语',  # 丹麦语
+            'de': '德语',  # 德语
+            'es': '西班牙语',  # 西班牙语
+            'fi': '芬兰语',  # 芬兰语
+            'fr': '法语',  # 法语
+            'hu': '匈牙利语',  # 匈牙利语
+            'id': '印度尼西亚语',  # 印度尼西亚语
+            'it': '意大利语',  # 意大利语
+            'nb': '挪威语（伯克梅尔）',  # 挪威语（伯克梅尔）
+            'nl': '荷兰语',  # 荷兰语
+            'no': '挪威文（耐诺斯克）',  # 挪威文（耐诺斯克）
+            'pl': '波兰语',  # 波兰语
+            'pt': '葡萄牙语',  # 葡萄牙语
+            'sv': '瑞典语',  # 瑞典语
+        }
+        return supported_lang_code_dict
+    def __init__(self, lang_code='cn'):
+        """
+        构造器：未指定 lang_code 参数时，默认为 cn
+        """
+        # 支持语言
+        supported_lang_code_list = list(SmartExtractor.get_supported_lang_code_dict())
+        # 初始化 goose 对象：
+        # 1、根据语言代码，创建 goose 对象
+        if lang_code is None or lang_code == 'cn' or lang_code == 'zh-cn'  or lang_code == 'zh':
+            # 需要分词：中文
+            # 1、不指定lang_code参数，或不指定lang_code为 None 时，默认为中文分词
+            # 2、Flask Web接口：未指定get参数 lang_code 时，lang_code 会接收为 None
+            self.goose = Goose({'stopwords_class': StopWordsChinese})
+        elif lang_code == 'ko':
+            # 需要分词：韩语
+            # 1、测试：只传递语言，不传递分词器
+            # self.goose = Goose({'use_meta_language': False, 'target_language': 'ko'})  # 测试失败：正文采集为空
+            # self.goose = Goose()    # 测试失败：正文采集为空
+            # 韩语分词：测试成功
+            self.goose = Goose({'stopwords_class': StopWordsKorean})
+        elif lang_code == 'ar':
+            # 需要分词：阿拉伯语
+            # self.goose = Goose({'use_meta_language': False, 'target_language': 'en'})       # 测试失败：正文采集为空
+            # self.goose = Goose()    # 测试成功
+            # self.goose = Goose({'use_meta_language': False, 'target_language': lang_code})  # 测试成功：直接传递语言编码
+            self.goose = Goose({'stopwords_class': StopWordsArabic})
+        elif lang_code == 'en':
+            # 单独测试：英文
+            # self.goose = Goose({'use_meta_language': False, 'target_language': 'en'})
+            # 测试成功：创建Goose对象时，不指定语言默认为英文分词
+            self.goose = Goose()
+        elif lang_code == 'ru':
+            # 单独测试：俄语
+            # self.goose = Goose({'use_meta_language': False, 'target_language': 'en'})       # 测试失败：正文采集为空
+            self.goose = Goose({'use_meta_language': False, 'target_language': lang_code})  # 测试成功：直接传递语言编码
+        elif lang_code in supported_lang_code_list:
+            # 其它语言编码，统一处理，不再单独测试
+            self.goose = Goose({'use_meta_language': False, 'target_language': lang_code})
+        else:
+            # 未识别的语言代码
+            raise Exception(f'智能采集时，无法识别语言代码：{lang_code}')
+    def get_extraction_result(self, article, link_text=''):
+        """
+        获取采集结果：
+        1、从 artcile 对象中，采集数据并封装到 ExtractionResult
+        """
+        # 用于保存：采集后的文本
+        extraction_result = ExtractionResult()
+        # 标题
+        # extraction_result.title = article.title     # 原办法：使用 goose 采集到的 title 中的标题
+        extraction_result.title = SmartExtractorUtility.get_article_title(article, link_text)
+        # 发布日期
+        extraction_result.publish_date = SmartExtractorUtility.get_publish_date(article)
+        # 正文（保留所有HTML标记，如：br、img）
+        extraction_result.text = SmartExtractorUtility.get_article_text(article)
+        # URL
+        extraction_result.url = article.final_url
+        # 摘要
+        extraction_result.meta_description = article.meta_description
+        # 干净正文（不带HTML）
+        extraction_result.cleaned_text = article.cleaned_text
+        # 来源（目前只支持采集中文网站中的“来源”）
+        extraction_result.source = ''
+        return extraction_result
+    def extract_by_url(self, url, link_text=''):
+        """
+        按URL采集内容
+        """
+        # 采集正文：传入url
+        article = self.goose.extract(url=url)
+        # article = goose.extract(raw_html=html)
+        return self.get_extraction_result(article, link_text)
+    def extract_by_html(self, html, link_text=''):
+        """
+        按HTML采集内容
+        """
+        # 采集正文：传入html
+        article = self.goose.extract(raw_html=html)
+        return self.get_extraction_result(article, link_text)
+def extract_by_url_test():
+    # 测试：按URL采集
+    url_list = [
+        # "http://www.news.cn/politics/2022-07/31/c_1128879636.htm",  # 短文本
+        # "https://baijiahao.baidu.com/s?id=1741311527693101670",  # 带多张图片
+        # "https://news.cctv.com/2022/08/16/ARTIERrXbbVtVUaQU0pMzQxf220816.shtml",  # 带多张图片，及一个视频（测试内容XPath失败）
+        # "http://opinion.people.com.cn/n1/2022/0803/c1003-32492653.html",  # 人民网
+        # 韩文：中央日报-politics
+        # "https://www.joongang.co.kr/article/25094974",
+        # "https://www.joongang.co.kr/article/25094967",
+        # 英文：加德满都邮报-national-security
+        # "https://kathmandupost.com/national-security/2020/01/17/police-s-intelligence-continues-to-fail-them-as-chand-party-claims-explosion",
+        # "https://kathmandupost.com/national-security/2019/11/04/india-s-new-political-map-places-disputed-territory-of-kalapani-inside-its-own-borders",  # 测试采集：发布时间
+        # 俄语：今日白俄罗斯报-word
+        # "https://www.sb.by/articles/byvshiy-premer-ministr-italii-zayavil-chto-strane-sleduet-otkazatsya-ot-gaza-iz-rossii.html",
+        # 'https://www.sb.by/articles/kryuchkov-predupredil-o-nepopravimykh-posledstviyakh-dlya-ukrainy-v-sluchae-udarov-po-krymu.html',
+        # 阿语
+        # "http://arabic.people.com.cn/n3/2022/0822/c31659-10137917.html",
+        # "http://arabic.people.com.cn/n3/2022/0822/c31657-10137909.html",
+        # 测试提取标题
+        # "http://www.sasac.gov.cn/n4470048/n16518962/n20928507/n20928570/c25819031/content.html",
+        # "http://www.forestry.gov.cn/main/102/20220823/092407820617754.html",
+        # "http://www.sasac.gov.cn/n2588025/n2588139/c25825832/content.html", # 标题采集为空
+        # 'http://www.crfeb.com.cn/1j/_124/2005409/index.html',   # 内容采集失败
+        # 'http://www.crfeb.com.cn/1j/_124/912248/index.html',  # 内容采集失败
+        # 'https://www.crcc.cn/art/2021/11/12/art_205_3413380.html',  # 中国铁建股份有限公司-工作动态（日期采集错误）
+        # 'http://ccecc.crcc.cn/art/2015/11/19/art_7608_1136312.html',  # 中国土木工程集团有限公司-多个栏目（日期采集错误）
+        # 'http://v.people.cn/n1/2022/0901/c444662-32517559.html',    # 人民网视频：title必须以“元素中的标题”开始，不能判断“包含”
+        # 'https://www.chec.bj.cn/cn/xwzx/gsyw/2022/202207/t20220706_8128.html', # 中国港湾工程有限责任公司-公司要闻（标题采集失败）
+        # 'https://www.cscec.com/xwzx_new/gsyw_new/202208/3570377.html', # 中国建筑集团有限公司-中建要闻（标题采集失败）
+        # 'https://www.crbc.com/site/crbc/276/info/2022/46884837.html',  # 中国路桥工程有限责任公司-多个栏目（标题采集失败）
+        # 'http://www.cgcoc.com.cn/news/432.html',  # 中地海外集团有限公司-新闻中心（标题和内容采集失败）
+        # 'http://www.mcc.com.cn/mcc/_132154/_132572/308233/index.html'  # 中国五矿（测试：正文采集失败）
+        # 'http://www.powerchina.cn/art/2015/5/27/art_7449_441845.html',  # 中国电力建设集团（测试：标题、正文采集失败）
+        # 中国电力建设集团（测试：标题采集失败），相比列表中的链接文本、title标签中的内容，元素中的标题，“秉承丝路精髓  抒写锦绣华章”中间多出一个空格
+        # 'http://world.people.com.cn/n1/2022/0624/c1002-32455607.html',  # 标题采集失败：看着没有问题
+        # 'https://www.cscec.com/xwzx_new/zqydt_new/202209/3578274.html',  # 中国建筑股份有限公司-企业动态：日期采集错误，采集到当天日期
+        # 'https://3g.k.sohu.com/t/n705260979'    #天眼查--企业公告'
+        # 'https://baijiahao.baidu.com/s?id=1769415116218226935'
+        # 'https://m.gelonghui.com/community/post/1678728#ocr'
+        'http://epaper.zqrb.cn/html/2023-05/27/content_950333.htm'
+    ]
+    # 语言编码
+    lang_code = 'cn'
+    # lang_code = 'ko'
+    # lang_code = 'en'
+    # lang_code = 'ru'
+    # lang_code = 'ar'
+    for url in url_list:
+        print()
+        print("-" * 100)
+        print('请求URL：', url)
+        extraction_result = SmartExtractor(lang_code).extract_by_url(url)
+        # 测试转换为JSON
+        # 1、直接转换时，会抛异常：TypeError: Object of type ExtractionResult is not JSON serializable
+        # print(json.dumps(extraction_result))
+        # print(json.dumps(extraction_result, default=ExtractionResult.to_dict))    # 转换成功：指定序列化器
+        # print(type(json.dumps(extraction_result.to_dict())))  # 返回类型：<class 'str'>，内容中的中文会被转义
+        # print(str(extraction_result.to_dict()))     # 如果直接转换为字符串，中文不会被转义
+        # 打印测试结果
+        print_extraction_result(extraction_result)
+def extract_by_html_test():
+    # 测试：按HTML采集
+    html = '''
+<html>
+  <head>
+  <title>标题</title>
+  </head>
+  <body>
+  <div>标题</div>
+  <div>内容</div>
+  </body>
+</html>
+    '''
+    # 测试：通过请求URL，获取完整的html
+    # url = "http://www.news.cn/politics/2022-07/31/c_1128879636.htm"     # 测试成功
+    # url = "http://views.ce.cn/view/ent/202208/15/t20220815_37961634.shtml"  # 1、测试失败：lxml.etree.ParserError: Document is empty
+    url = 'https://www.crcc.cn/art/2021/11/12/art_205_3413380.html'  # 中国铁建股份有限公司-工作动态（日期采集错误）
+    # url = 'http://ccecc.crcc.cn/art/2015/11/19/art_7608_1136312.html'  # 中国土木工程集团有限公司-多个栏目（日期采集错误）
+    print()
+    print("-" * 100)
+    print('请求URL：', url)
+    html = requests.get(url).text
+    # 语言编码
+    lang_code = 'cn'
+    # 采集内容
+    extraction_result = SmartExtractor(lang_code).extract_by_html(html)
+    # 打印测试结果
+    print_extraction_result(extraction_result)
+def print_extraction_result(extraction_result):
+    # 打印测试结果
+    print("标题：", extraction_result.title)  # 标题
+    print("发布时间：", extraction_result.publish_date)  # 发布时间
+    print("正文：", extraction_result.text)  # 正文
+    print("URL：", extraction_result.url)  # URL
+    print("摘要：", extraction_result.meta_description)  # 摘要
+    print("干净正文：", extraction_result.cleaned_text)  # 干净正文
+if __name__ == '__main__':
+    try:
+        # 测试：按URL采集
+        extract_by_url_test()
+        # 测试：按HTML采集
+        # extract_by_html_test()
+    except Exception as e:
+        print("采集失败：", e)
--- a/comData/tradingview/smart_extractor_utility.py
+++ b/comData/tradingview/smart_extractor_utility.py
+# -*- coding: utf-8 -*-
+import re
+from goose3.article import Article
+from lxml import etree
+from lxml.html import HtmlElement
+class SmartExtractorUtility:
+    # 标题最小长度
+    title_min_len = 6
+    @staticmethod
+    def extract_publish_date(html):
+        pattern_list = [
+            # 2010-10-1 8:00:00
+            r"20\d{2}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}",
+            # 2010-10-1 8:00
+            r"20\d{2}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}",
+            # 2010年10月1日 8:00:00
+            r"20\d{2}年\d{1,2}月\d{1,2}日 \d{1,2}:\d{1,2}:\d{1,2}",
+            # 2010年10月1日 8:00
+            r"20\d{2}年\d{1,2}月\d{1,2}日 \d{1,2}:\d{1,2}",
+            # 2010/10/1 8:00:00
+            r"20\d{2}/\d{1,2}/\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}",
+            # 2010/10/1 8:00
+            r"20\d{2}/\d{1,2}/\d{1,2} \d{1,2}:\d{1,2}",
+            # 2010-10-1
+            r"20\d{2}-\d{1,2}-\d{1,2}",
+            # 2010年10月1日
+            r"20\d{2}年\d{1,2}月\d{1,2}日",
+            # 2010/10/1
+            r"20\d{2}/\d{1,2}/\d{1,2}",
+            # 2022.08.28
+            r"20\d{2}\.\d{1,2}\.\d{1,2}"
+            # 12-07-02 10:10
+            r"\d{2}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}",
+            # 1月前
+            r"\d+(&nbsp;| )*月前",
+            # 12天前
+            r"\d+(&nbsp;| )*天前",
+            # 2小时前
+            r"\d+(&nbsp;| )*小时前",
+            # 15分钟前
+            r"\d+(&nbsp;| )*分钟前",
+            # 昨天&nbsp;17:59
+            r"昨天(&nbsp;| )*\d{1,2}:\d{1,2}",
+        ]
+        # 尝试匹配所有正则式
+        for pattern in pattern_list:
+            # 提取可见日期：
+            # 1、必须在标签内部，不能提取HTML标签属性中的日期
+            # 2、提取规则：必须在 > 和 < 之间，且中间不能再有 >
+            tag_pattern = f'>[^>]*(?P<date>{pattern})[^>]*<'
+            # 搜索第一个匹配项
+            match = re.search(tag_pattern, html)
+            # 如果匹配成功，返回正确的发布时间
+            if match:
+                return match.group('date')
+        # 所有正则式匹配失败，返回空字符串
+        return ""
+    @staticmethod
+    def add_html_br(cleaned_text):
+        # 包装HTML标记：换行
+        # 1、优先替换双换行：使用goose提取到的cleaned_text，都是双换行
+        cleaned_text = cleaned_text.replace("\n\n", "<br>")
+        cleaned_text = cleaned_text.replace("\n", "<br>")
+        return cleaned_text
+    @staticmethod
+    def get_article_title(article: Article, link_text=''):
+        #
+        # 优先提取h1、div、span、td元素中的标题
+        # 1、测试任务：2.智能采集\1.测试任务\国资委-新闻发布
+        #    a. 原title标题：中国能建：聚焦价值创造 打造国企改革发展“红色引擎”－国务院国有资产监督管理委员会
+        #    b. div元素中的标题：中国能建：聚焦价值创造 打造国企改革发展“红色引擎”
+        # 2、测试任务：2.智能采集\1.测试任务\国家林业和草原局-地方动态
+        #    a. 原title标题：上海完成森林资源年度监测遥感解译图斑市级质量检查_地方动态_国家林业和草原局政府网
+        #    b. span元素中的标题：上海完成森林资源年度监测遥感解译图斑市级质量检查
+        #
+        # 根据xpath，查询标题元素时：
+        # 1、标签优先级：h1、特殊元素（id或class包含title）、h2、h3、div、span、td
+        #
+        title_element_list = [
+            'h1',
+            'h2',
+            'h3',
+            'div',
+            'span',
+            'td',
+            'p',
+        ]
+        # 对比标题前，统一将空格剔除（2022-09-21）：
+        # 1、测试任务：3.马荣：一带一路，配置不成功\中国电力建设集团（测试：标题采集失败）
+        # 2、相比列表中的链接文本、title标签中的内容，元素中的标题，“秉承丝路精髓  抒写锦绣华章”中间多出一个空格
+        link_text = link_text.replace(" ", "")
+        tag_title = article.title.replace(" ", "")
+        title = None
+        for title_element in title_element_list:
+            element_list = article.raw_doc.getroottree().xpath(f'//{title_element}')
+            # 查询XPath成功，遍历所有元素
+            for element in element_list:
+                # 取纯文本内容，包括子元素
+                text = etree.tounicode(element, method='text').strip()
+                text_no_space = text.replace(" ", "")
+                # 判断标题：
+                # 1、如果智能采集的原title标题，以“元素内容”开头，则取元素内容
+                # 2、查找成功后，返回text作为标题，否则继续下一个循环
+                # 判断是否以“元素中的标题”开始：
+                # 1、title必须以“元素中的标题”开始，不能判断“包含”
+                # 2、测试URL：http://v.people.cn/n1/2022/0901/c444662-32517559.html
+                # 3、title标签：<title>亿缕阳光丨小生意，大格局--人民视频--人民网</title>
+                #    a. 如果判断“包含”，会采集到：人民网
+                #    b. 因为存在元素：<a href="http://www.people.com.cn/" class="clink">人民网</a>
+                #    c. 如果判断以“元素中的标题”开始，采集到：亿缕阳光丨小生意，大格局
+                #    d. 标题元素：<h2>亿缕阳光丨小生意，大格局</h2>
+                # 新方案：
+                # 1、对比常用元素：仍判断是否以“元素中的标题”开始
+                # 2、优先对比“链接文本”，其次对比“title元素”
+                # 3、满足最少字数：6个字
+                # 新方案（2022-09-21）：
+                # 1、对比“链接文本”、“title元素”时，除了判断开始，同时允许结尾
+                # 2、测试任务：3.马荣：一带一路，配置不成功\中国电力建设集团（测试：标题采集失败）
+                #    a. 列表中的链接文本：【“一带一路”旗舰篇】秉承丝路精髓 抒写锦绣华章——河北电...
+                #    b. title标签中的内容：<title>中国电力建设集团 公司要闻 【“一带一路”旗舰篇】秉承丝路精髓 抒写锦绣华章——河北电建一公司摘取“一带一路”上的“鲁班奖”桂冠</title>
+                #    c. 元素中的标题：【“一带一路”旗舰篇】秉承丝路精髓  抒写锦绣华章——河北电建一公司摘取“一带一路”上的“鲁班奖”桂冠
+                if text_no_space is not None and text_no_space != '' and len(
+                        text_no_space) >= SmartExtractorUtility.title_min_len:
+                    # 优先判断6个字，以方便调试：排除短文本元素
+                    if link_text.startswith(text_no_space) or link_text.endswith(text_no_space) or tag_title.startswith(
+                            text_no_space) or tag_title.endswith(text_no_space):
+                        # 返回时，仍返回未剔除空格后的标题
+                        return text
+        if title:
+            # 查找成功，返回元素中的标题
+            return title
+        else:
+            # 查找失败，返回提取到的title属性
+            # return article.title
+            # 新考虑：标题采集失败后，返回空值
+            # 1、原因：article.title 不可靠，只是提取了 title 标签中的内容
+            return ''
+    @staticmethod
+    def get_publish_date(article: Article):
+        # 优先使用正则式提取日期
+        # 1、测试任务：加德满都邮报-national-security
+        #    a. 使用 publish_datetime_utc 提取英文日期后，提取错误
+        #    b. 实际日期：Friday, August 19, 2022，但提取到了：2015-02-05
+        #    c. 原因：在下方JS中，有一段JSON文本： "datePublished": "2015-02-05T08:00:00+08:00"
+        # 2、注意：中文网站，都必须使用正则式
+        publish_date = SmartExtractorUtility.extract_publish_date(article.raw_html)
+        if publish_date != '':
+            return publish_date
+        else:
+            if article.publish_datetime_utc:
+                # 优先使用提取成功的 datetime
+                return article.publish_datetime_utc.strftime('%Y-%m-%d')
+            elif article.publish_date:
+                # 其次使用提取成功的 date 字符串
+                return article.publish_date
+            else:
+                # 全部提取失败，返回字符串
+                return ''
+    @staticmethod
+    def get_article_text(article: Article):
+        # 第一种方法：在纯文本（cleaned_text）基础上，添加br标签
+        # 1、缺点：无法获取图片，同时会丢掉原有的p标签（只能用br替补）
+        # text = SmartExtractor.add_html_br(article.cleaned_text)
+        # 第二种方法：直接获取 top_node 的HTML内容
+        # 1、优点：可保留原有的p标签等
+        # 2、缺点：无法获取图片，img标签未被保留
+        # text = etree.tounicode(article.top_node, method='html')
+        # 测试抛出异常
+        # raise Exception("测试抛出异常")
+        # 第三种方法：获取到 top_node 的xpath，再通过xpath查询原始doc
+        # 1、可行：通过查询原始doc，可以获取“正文”的所有HTML内容
+        # 2、遇到问题：获取到 top_node 的xpath不准确，与原位置偏移一个元素
+        #    a. 测试URL：https://news.cctv.com/2022/08/16/ARTIERrXbbVtVUaQU0pMzQxf220816.shtml
+        #    b. 获取到的xpath：/html/body/div/div[1]/div[2]/div[4]
+        #    c. 实际xpath：/html/body/div/div[1]/div[2]/div[5]
+        # 3、解决办法：
+        #    a. 优先使用id、class查询，如果没有id、class，再查询 top_node 的xpath
+        xpath = None
+        if type(article.top_node) is HtmlElement:
+            if 'id' in article.top_node.attrib:
+                xpath = "//*[@id='{}']".format(article.top_node.attrib['id'])
+            elif 'class' in article.top_node.attrib:
+                xpath = "//*[@class='{}']".format(article.top_node.attrib['class'])
+            else:
+                xpath = article.top_node.getroottree().getpath(article.top_node)
+        else:
+            # article.top_node 有时为空：
+            # 1、测试URL：https://baijiahao.baidu.com/s?id=1741311527693101670
+            # 2、输出日志：article.top_node 不是 HtmlElement 对象：None
+            print("SmartExtractor：article.top_node 为 {}，不是 HtmlElement 对象。".format(article.top_node))
+            # article.top_node 为空时，直接输出 cleaned_text：
+            # 1、在纯文本（cleaned_text）基础上，添加br标签
+            text = SmartExtractorUtility.add_html_br(article.cleaned_text)
+            return text
+        # 根据xpath，查询元素
+        element_list = article.raw_doc.getroottree().xpath(xpath)
+        if element_list:
+            # 查询XPath成功，获取第一个元素的HTML
+            text = etree.tounicode(element_list[0], method='html')
+        else:
+            # 查询XPath失败，返回 top_node 原有的HTML
+            # 1、缺点：无法获取图片，img标签未被保留
+            text = etree.tounicode(article.top_node, method='html')
+        return text
--- a/comData/tradingview/tradviewNew.py
+++ b/comData/tradingview/tradviewNew.py
+#coding=utf-8
+import datetime
+import json
+import time
+import pymysql
+import requests
+from kafka import KafkaProducer
+from smart_extractor import SmartExtractor
+from bs4 import BeautifulSoup
+from langid import langid
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+from baseCore import BaseCore
+basecore = BaseCore()
+log = basecore.getLogger()
+r = basecore.r
+def reqmsg(url):
+    header={
+        'Connection':'keep-alive',
+        #'sec-ch-ua':'"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
+        'sec-ch-ua-mobile':'?0',
+        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
+        'sec-ch-ua-platform':'"Windows"',
+        'Accept':'*/*',
+        'Origin':'https://cn.tradingview.com',
+        'Sec-Fetch-Site':'same-site',
+        'Sec-Fetch-Mode':'cors',
+        'Sec-Fetch-Dest':'empty',
+        'Referer':'https://cn.tradingview.com/',
+        'Accept-Encoding':'gzip, deflate, br',
+        'Accept-Language':'zh-CN,zh;q=0.9'
+    }
+    proxy = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
+    for i in range(0,3):
+        try:
+            response=requests.get(url=url,headers=header,timeout=10,proxies=proxy,verify=False)
+            searchmsg=response.json()
+        except Exception as e:
+            searchmsg=''
+            log.info(f'{url}---请求失败--{e}')
+        if searchmsg:
+            log.info(f'{url}---请求成功')
+            break
+    return searchmsg
+def reqDetailmsg(url):
+    header={
+        'Host':'cn.tradingview.com',
+        'Connection':'keep-alive',
+        'sec-ch-ua':'"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
+        'sec-ch-ua-mobile':'?0',
+        'sec-ch-ua-platform':'"Windows"',
+        'Upgrade-Insecure-Requests':'1',
+        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
+        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+        'Sec-Fetch-Site':'none',
+        'Sec-Fetch-Mode':'navigate',
+        'Sec-Fetch-User':'?1',
+        'Sec-Fetch-Dest':'document',
+        'Accept-Encoding':'gzip, deflate, br',
+        'Accept-Language':'zh-CN,zh;q=0.9',
+        'Cookie':'cookiePrivacyPreferenceBannerProduction=notApplicable; cookiesSettings={"analytics":true,"advertising":true}; _ga=GA1.1.153931157.1696599356; will_start_trial=1; device_t=MzBfV0F3OjA.5HeDqPHu8F5Ux85y2Bi3xCC-liNchYNYW1zUgqB5E4s; sessionid=rcy2dho7lh83k6tasjy4jjatig31tbdf; sessionid_sign=v1:K9a7nKtEZ3MWrJqUgqr9ZaVHrjlepGyPAoGrDmq2DiM=; _gcl_au=1.1.557075741.1696651024; png=f403f4d2-d955-4385-b59c-f2d74f7ec679; etg=f403f4d2-d955-4385-b59c-f2d74f7ec679; cachec=f403f4d2-d955-4385-b59c-f2d74f7ec679; tv_ecuid=f403f4d2-d955-4385-b59c-f2d74f7ec679; _ga_YVVRYGL0E0=deleted; __gads=ID=b0fa0efe8c0ccdc3:T=1696647286:RT=1696916773:S=ALNI_MaPEozJ_doJikuSMJ0r5yFDU3j_Mw; __gpi=UID=00000c59f5923a81:T=1696647286:RT=1696916773:S=ALNI_Ma-WnwGckO3mzIStdpHv1jmEDMMvA; _sp_ses.cf1a=*; _sp_id.cf1a=8a315f91-7829-4ad7-bf4b-151a217809dd.1696599355.14.1696924687.1696916773.00da5df6-3641-4999-a8cf-e2d01afa79e7; _ga_YVVRYGL0E0=GS1.1.1696924315.18.1.1696924691.38.0.0',
+    }
+    proxy = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
+    for i in range(0,3):
+        try:
+            response=requests.get(url=url,headers=header,timeout=10,proxies=proxy,verify=False)
+            htmltext=response.text
+        except Exception as e:
+            htmltext=''
+            log.info(f'{url}---详情请求失败--{e}')
+        if htmltext:
+            log.info(f'{url}---详情请求成功')
+            break
+    return htmltext
+def paserList(searchmsg,social_code):
+    items=searchmsg['items']
+    for item in items:
+        try:
+            id=item['id']
+            title=item['title']
+            storyPath='https://cn.tradingview.com'+item['storyPath']
+            published=item['published']
+            published=getFormatedate(published)
+            #是否重复判断
+            flag=selectLinkMsg(storyPath,social_code)
+            if flag:
+                log.info(f'{social_code}---{storyPath}---数据已采集过')
+                continue
+        except Exception as e:
+            log.info(f'列表解析失败----{e}')
+            continue
+        try:
+            source=item['source']
+        except Exception as e:
+            source=''
+        try:
+            link=item['link']
+        except Exception as e:
+            link=''
+        try:
+            symbol=item['relatedSymbols'][0]['symbol']
+        except Exception as e:
+            symbol=''
+        try:
+            # if link:
+            #     sourceAddress=link
+            # else:
+            #     sourceAddress=storyPath
+            sourceAddress=storyPath
+            content,contentWithTag=extractorMsg(sourceAddress,title)
+            if content:
+                time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+                detailmsg={
+                    'content': content,
+                    'contentWithTag': contentWithTag,
+                    'createDate': time_now,
+                    'publishDate': published,
+                    'sourceAddress': sourceAddress,  # 原文链接
+                    'summary': '',
+                    'title': title,
+                    'socialCreditCode': social_code,
+                    'year': published[:4]
+                }
+                sendToKafka(detailmsg)
+                saveLinkMsg(sourceAddress,social_code)
+                log.info(f'信息发生kafka成功----{sourceAddress}')
+            else:
+                log.info(f'内容抽取失败----{sourceAddress}')
+        except Exception as e:
+            log.info(f'{social_code}____{sourceAddress}详情采集异常{e}')
+def getFormatedate(timestamp):
+    date = datetime.datetime.fromtimestamp(timestamp)
+    formatted_date = date.strftime('%Y-%m-%d')
+    return formatted_date
+def createDriver():
+    chrome_driver =r'C:\Users\WIN10\DataspellProjects\crawlerProjectDemo\tmpcrawler\cmd100\chromedriver.exe'
+    path =  Service(chrome_driver)
+    chrome_options = webdriver.ChromeOptions()
+    chrome_options.binary_location = r'D:\crawler\baidu_crawler\tool\Google\Chrome\Application\chrome.exe'
+    # 设置代理
+    # proxy = "127.0.0.1:8080"  # 代理地址和端口
+    # chrome_options.add_argument('--proxy-server=http://' + proxy)
+    driver = webdriver.Chrome(service=path,chrome_options=chrome_options)
+    return driver
+def extractorMsg(url,title):
+    content=''
+    contentWithTag=''
+    lang=detect_language(title)
+    sm=SmartExtractor(lang)
+    raw_html=reqDetailmsg(url)
+    if raw_html:
+        try:
+            soup=BeautifulSoup(raw_html,'html.parser')
+            tdoc=soup.select('div[class="body-KX2tCBZq body-pIO_GYwT content-pIO_GYwT"]')[0]
+            content=tdoc.text
+            contentWithTag=str(tdoc)
+        except Exception as e:
+            log.info(f'抽取失败！！{e}')
+        if content:
+            log.info(f'抽取成功')
+        else:
+            try:
+                article=sm.extract_by_html(raw_html)
+                content=article.cleaned_text
+                contentWithTag=article.text
+            except Exception as e:
+                log.info(f'抽取失败！！{e}')
+        if content:
+            log.info(f'抽取成功')
+    else:
+        driver=createDriver()
+        driver.get(url)
+        time.sleep(3)
+        raw_html=driver.page_source
+        try:
+            article=sm.extract_by_html(raw_html)
+            content=article.cleaned_text
+            contentWithTag=article.text
+        except Exception as e:
+            log.info(f'抽取失败！！{e}')
+    return content,contentWithTag
+def detect_language(html):
+    soup = BeautifulSoup(html, "html.parser")
+    text = soup.get_text()
+    # 使用langid.py判断文本的语言
+    lang, confidence = langid.classify(text)
+    return lang
+def conn144():
+    conn = pymysql.Connect(host='114.115.159.144', port=3306, user='caiji', passwd='zzsn9988', db='caiji',
+                           charset='utf8')
+    cursor = conn.cursor()
+    return conn,cursor
+def getStockFromSql():
+    conn,cursor=conn144()
+    # 检查记录是否存在
+    select_sql=f"SELECT ticker,exchange,xydm FROM mgzqyjwyh_list "
+    cursor.execute(select_sql)
+    gn_result = cursor.fetchall()
+    conn.commit()
+    itemList=[]
+    for item in gn_result:
+        try:
+            ticker=item[0]
+            exchange=item[1]
+            xydm=item[2]
+            exchange=str(exchange).upper()
+            param=exchange+':'+ticker+'_'+xydm
+            r.rpush('tradview_ticker', param)
+            itemList.append(param)
+        except Exception as e:
+            print(e)
+    cursor.close()
+    conn.close()
+    return itemList
+def sendToKafka(detailmsg):
+    dic_news = {
+        'attachmentIds': '',
+        'author': '',
+        'content': detailmsg['content'],
+        'contentWithTag': detailmsg['contentWithTag'],
+        'createDate': detailmsg['createDate'],
+        'deleteFlag': '0',
+        'id': '',
+        'keyWords': '',
+        'lang': 'zh',
+        'origin': 'Tradingview',
+        'publishDate': detailmsg['publishDate'],
+        'sid': '1711619846545776641',
+        'sourceAddress': detailmsg['sourceAddress'],  # 原文链接
+        'summary': '',
+        'title': detailmsg['title'],
+        'type': 2,
+        'socialCreditCode': detailmsg['socialCreditCode'],
+        'year': detailmsg['year']
+    }
+    producer=KafkaProducer(bootstrap_servers=['114.115.159.144:9092'])
+    try:
+        kafka_result = producer.send("researchReportTopic",
+                                     json.dumps(dic_news, ensure_ascii=False).encode('utf8'))
+        # log.info(kafka_result.get(timeout=10))
+        log.info('发送kafka成功！')
+    except Exception as e:
+        log.info(f"发生kafka失败{e}")
+    finally:
+        producer.close()
+#将连接保存到数据库
+def saveLinkMsg(link,social_code):
+    conn,cursor=conn144()
+    try:
+        insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,type,publish_time,create_time) values(%s,%s,%s,%s,%s,now())'''
+        # 动态信息列表
+        time_format = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+        list_info = [
+            social_code,
+            link,
+            'Tradingview',
+            '2',
+            time_format
+        ]
+        cursor.execute(insert_sql, tuple(list_info))
+    except Exception as e:
+        log.info(f'{link}插入库中失败{e}')
+    finally:
+        conn.commit()
+        cursor.close()
+        conn.close()
+#查询是否存在
+def selectLinkMsg(link,social_code):
+    flag=False
+    conn,cursor=conn144()
+    try:
+        sel_sql = '''select social_credit_code from brpa_source_article where source_address = %s and social_credit_code=%s and type='2' '''
+        cursor.execute(sel_sql, (link, social_code))
+        selects = cursor.fetchone()
+        if selects:
+            log.info(f'-----{social_code}----{link}:已经存在')
+            flag=True
+    except Exception as e:
+        log.info(f'查询数据是否在库中失败{e}')
+    finally:
+        conn.commit()
+        cursor.close()
+        conn.close()
+    return flag
+if __name__ == '__main__':
+    # url='https://news-headlines.tradingview.com/v2/headlines?client=web&lang=zh-Hans&symbol=NASDAQ%3AAAPL'
+    # searchmsg=reqmsg(url)
+    # print(searchmsg)
+    # getStockFromSql()
+    while True:
+        try:
+            tradview_ticker=r.lpop('tradview_ticker')
+            if tradview_ticker:
+                tradviewticker = tradview_ticker.decode(errors='ignore')
+                ticker_param=str(tradviewticker).split('_')[0]
+                social_code=str(tradviewticker).split('_')[1]
+                url=f'https://news-headlines.tradingview.com/v2/headlines?client=web&lang=zh-Hans&symbol={tradview_ticker}'
+                searchmsg=reqmsg(url)
+                paserList(searchmsg,social_code)
+        except Exception as e:
+            log.info(f'redis中获取企业信息为空{e}')
+            break
--- a/comData/tradingview/tradviewNewBak.py
+++ b/comData/tradingview/tradviewNewBak.py
+#coding=utf-8
+import datetime
+import json
+import time
+import pymysql
+import requests
+from kafka import KafkaProducer
+from smart_extractor import SmartExtractor
+from bs4 import BeautifulSoup
+from gne import GeneralNewsExtractor
+from langid import langid
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from base import BaseCore
+basecore = BaseCore.BaseCore()
+log = basecore.getLogger()
+r = basecore.r
+def reqmsg(url):
+    header={
+        'Connection':'keep-alive',
+        #'sec-ch-ua':'"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
+        'sec-ch-ua-mobile':'?0',
+        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
+        'sec-ch-ua-platform':'"Windows"',
+        'Accept':'*/*',
+        'Origin':'https://cn.tradingview.com',
+        'Sec-Fetch-Site':'same-site',
+        'Sec-Fetch-Mode':'cors',
+        'Sec-Fetch-Dest':'empty',
+        'Referer':'https://cn.tradingview.com/',
+        'Accept-Encoding':'gzip, deflate, br',
+        'Accept-Language':'zh-CN,zh;q=0.9'
+    }
+    proxy = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
+    for i in range(0,3):
+        try:
+            response=requests.get(url=url,headers=header,timeout=10,proxies=proxy,verify=False)
+            searchmsg=response.json()
+        except Exception as e:
+            searchmsg=''
+            log.info(f'{url}---请求失败--{e}')
+        if searchmsg:
+            log.info(f'{url}---请求成功')
+            break
+    return searchmsg
+def reqDetailmsg(url):
+    header={
+        'Host':'cn.tradingview.com',
+        'Connection':'keep-alive',
+        'sec-ch-ua':'"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
+        'sec-ch-ua-mobile':'?0',
+        'sec-ch-ua-platform':'"Windows"',
+        'Upgrade-Insecure-Requests':'1',
+        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
+        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+        'Sec-Fetch-Site':'none',
+        'Sec-Fetch-Mode':'navigate',
+        'Sec-Fetch-User':'?1',
+        'Sec-Fetch-Dest':'document',
+        'Accept-Encoding':'gzip, deflate, br',
+        'Accept-Language':'zh-CN,zh;q=0.9',
+        'Cookie':'cookiePrivacyPreferenceBannerProduction=notApplicable; cookiesSettings={"analytics":true,"advertising":true}; _ga=GA1.1.153931157.1696599356; will_start_trial=1; device_t=MzBfV0F3OjA.5HeDqPHu8F5Ux85y2Bi3xCC-liNchYNYW1zUgqB5E4s; sessionid=rcy2dho7lh83k6tasjy4jjatig31tbdf; sessionid_sign=v1:K9a7nKtEZ3MWrJqUgqr9ZaVHrjlepGyPAoGrDmq2DiM=; _gcl_au=1.1.557075741.1696651024; png=f403f4d2-d955-4385-b59c-f2d74f7ec679; etg=f403f4d2-d955-4385-b59c-f2d74f7ec679; cachec=f403f4d2-d955-4385-b59c-f2d74f7ec679; tv_ecuid=f403f4d2-d955-4385-b59c-f2d74f7ec679; _ga_YVVRYGL0E0=deleted; __gads=ID=b0fa0efe8c0ccdc3:T=1696647286:RT=1696916773:S=ALNI_MaPEozJ_doJikuSMJ0r5yFDU3j_Mw; __gpi=UID=00000c59f5923a81:T=1696647286:RT=1696916773:S=ALNI_Ma-WnwGckO3mzIStdpHv1jmEDMMvA; _sp_ses.cf1a=*; _sp_id.cf1a=8a315f91-7829-4ad7-bf4b-151a217809dd.1696599355.14.1696924687.1696916773.00da5df6-3641-4999-a8cf-e2d01afa79e7; _ga_YVVRYGL0E0=GS1.1.1696924315.18.1.1696924691.38.0.0',
+    }
+    proxy = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
+    for i in range(0,3):
+        try:
+            response=requests.get(url=url,headers=header,timeout=10,proxies=proxy,verify=False)
+            htmltext=response.text
+        except Exception as e:
+            htmltext=''
+            log.info(f'{url}---详情请求失败--{e}')
+        if htmltext:
+            log.info(f'{url}---详情请求成功')
+            break
+    return htmltext
+def paserList(searchmsg,social_code):
+    items=searchmsg['items']
+    for item in items:
+        try:
+            id=item['id']
+            title=item['title']
+            storyPath='https://cn.tradingview.com'+item['storyPath']
+            published=item['published']
+            published=getFormatedate(published)
+            #是否重复判断
+            flag=selectLinkMsg(storyPath,social_code)
+            if flag:
+                log.info(f'{social_code}---{storyPath}---数据已采集过')
+                continue
+        except Exception as e:
+            log.info(f'列表解析失败----{e}')
+            continue
+        try:
+            source=item['source']
+        except Exception as e:
+            source=''
+        try:
+            link=item['link']
+        except Exception as e:
+            link=''
+        try:
+            symbol=item['relatedSymbols'][0]['symbol']
+        except Exception as e:
+            symbol=''
+        try:
+            # if link:
+            #     sourceAddress=link
+            # else:
+            #     sourceAddress=storyPath
+            sourceAddress=storyPath
+            content,contentWithTag=extractorMsg(sourceAddress,title)
+            if content:
+                time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+                detailmsg={
+                    'content': content,
+                    'contentWithTag': contentWithTag,
+                    'createDate': time_now,
+                    'publishDate': published,
+                    'sourceAddress': sourceAddress,  # 原文链接
+                    'summary': '',
+                    'title': title,
+                    'socialCreditCode': social_code,
+                    'year': published[:4]
+                }
+                sendToKafka(detailmsg)
+                saveLinkMsg(sourceAddress,social_code)
+                log.info(f'信息发生kafka成功----{sourceAddress}')
+            else:
+                log.info(f'内容抽取失败----{sourceAddress}')
+        except Exception as e:
+            log.info(f'{social_code}____{sourceAddress}详情采集异常{e}')
+def getFormatedate(timestamp):
+    date = datetime.datetime.fromtimestamp(timestamp)
+    formatted_date = date.strftime('%Y-%m-%d')
+    return formatted_date
+def createDriver():
+    chrome_driver =r'C:\Users\WIN10\DataspellProjects\crawlerProjectDemo\tmpcrawler\cmd100\chromedriver.exe'
+    path =  Service(chrome_driver)
+    chrome_options = webdriver.ChromeOptions()
+    chrome_options.binary_location = r'D:\crawler\baidu_crawler\tool\Google\Chrome\Application\chrome.exe'
+    # 设置代理
+    # proxy = "127.0.0.1:8080"  # 代理地址和端口
+    # chrome_options.add_argument('--proxy-server=http://' + proxy)
+    driver = webdriver.Chrome(service=path,chrome_options=chrome_options)
+    return driver
+def extractorMsg(url,title):
+    content=''
+    contentWithTag=''
+    lang=detect_language(title)
+    sm=SmartExtractor(lang)
+    raw_html=reqDetailmsg(url)
+    if raw_html:
+        try:
+            soup=BeautifulSoup(raw_html,'html.parser')
+            tdoc=soup.select('div[class="body-KX2tCBZq body-pIO_GYwT content-pIO_GYwT"]')[0]
+            content=tdoc.text
+            contentWithTag=str(tdoc)
+        except Exception as e:
+            log.info(f'抽取失败！！{e}')
+        if content:
+            log.info(f'抽取成功')
+        else:
+            try:
+                article=sm.extract_by_html(raw_html)
+                content=article.cleaned_text
+                contentWithTag=article.text
+            except Exception as e:
+                log.info(f'抽取失败！！{e}')
+        if content:
+            log.info(f'抽取成功')
+        else:
+            try:
+                article_content=paserDetail(raw_html,url)
+                content=article_content['content']
+                contentWithTag=article_content['body_html']
+            except Exception as e:
+                log.info(f'抽取失败！！{e}')
+    else:
+        driver=createDriver()
+        driver.get(url)
+        time.sleep(3)
+        raw_html=driver.page_source
+        try:
+            article=sm.extract_by_html(raw_html)
+            content=article.cleaned_text
+            contentWithTag=article.text
+        except Exception as e:
+            log.info(f'抽取失败！！{e}')
+        if content:
+            log.info(f'抽取成功')
+        else:
+            try:
+                article_content=paserDetail(raw_html,url)
+                content=article_content['content']
+                contentWithTag=article_content['body_html']
+            except Exception as e:
+                log.info(f'抽取失败！！{e}')
+    return content,contentWithTag
+#智能抽取
+def paserDetail(detailhtml,detailurl):
+    try:
+        extractor = GeneralNewsExtractor()
+        article_content = extractor.extract(detailhtml,host=detailurl,with_body_html=True)
+    except:
+        article_content={}
+    return article_content
+def detect_language(html):
+    soup = BeautifulSoup(html, "html.parser")
+    text = soup.get_text()
+    # 使用langid.py判断文本的语言
+    lang, confidence = langid.classify(text)
+    return lang
+def conn144():
+    conn = pymysql.Connect(host='114.115.159.144', port=3306, user='caiji', passwd='zzsn9988', db='caiji',
+                           charset='utf8')
+    cursor = conn.cursor()
+    return conn,cursor
+def getStockFromSql():
+    conn,cursor=conn144()
+    # 检查记录是否存在
+    select_sql=f"SELECT ticker,exchange,xydm FROM mgzqyjwyh_list "
+    cursor.execute(select_sql)
+    gn_result = cursor.fetchall()
+    conn.commit()
+    itemList=[]
+    for item in gn_result:
+        try:
+            ticker=item[0]
+            exchange=item[1]
+            xydm=item[2]
+            exchange=str(exchange).upper()
+            param=exchange+':'+ticker+'_'+xydm
+            r.rpush('tradview_ticker', param)
+            itemList.append(param)
+        except Exception as e:
+            print(e)
+    cursor.close()
+    conn.close()
+    return itemList
+def sendToKafka(detailmsg):
+    dic_news = {
+        'attachmentIds': '',
+        'author': '',
+        'content': detailmsg['content'],
+        'contentWithTag': detailmsg['contentWithTag'],
+        'createDate': detailmsg['createDate'],
+        'deleteFlag': '0',
+        'id': '',
+        'keyWords': '',
+        'lang': 'zh',
+        'origin': 'Tradingview',
+        'publishDate': detailmsg['publishDate'],
+        'sid': '1711619846545776641',
+        'sourceAddress': detailmsg['sourceAddress'],  # 原文链接
+        'summary': '',
+        'title': detailmsg['title'],
+        'type': 2,
+        'socialCreditCode': detailmsg['socialCreditCode'],
+        'year': detailmsg['year']
+    }
+    producer=KafkaProducer(bootstrap_servers=['114.115.159.144:9092'])
+    try:
+        kafka_result = producer.send("researchReportTopic",
+                                     json.dumps(dic_news, ensure_ascii=False).encode('utf8'))
+        log.info(kafka_result.get(timeout=10))
+    except Exception as e:
+        log.info(f"发生kafka失败{e}")
+    finally:
+        producer.close()
+#将连接保存到数据库
+def saveLinkMsg(link,social_code):
+    conn,cursor=conn144()
+    try:
+        insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,type,publish_time,create_time) values(%s,%s,%s,%s,%s,now())'''
+        # 动态信息列表
+        time_format = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+        list_info = [
+            social_code,
+            link,
+            'Tradingview',
+            '2',
+            time_format
+        ]
+        cursor.execute(insert_sql, tuple(list_info))
+    except Exception as e:
+        log.info(f'{link}插入库中失败{e}')
+    finally:
+        conn.commit()
+        cursor.close()
+        conn.close()
+#查询是否存在
+def selectLinkMsg(link,social_code):
+    flag=False
+    conn,cursor=conn144()
+    try:
+        sel_sql = '''select social_credit_code from brpa_source_article where source_address = %s and social_credit_code=%s and type='2' '''
+        cursor.execute(sel_sql, (link, social_code))
+        selects = cursor.fetchone()
+        if selects:
+            log.info(f'-----{social_code}----{link}:已经存在')
+            flag=True
+    except Exception as e:
+        log.info(f'查询数据是否在库中失败{e}')
+    finally:
+        conn.commit()
+        cursor.close()
+        conn.close()
+    return flag
+if __name__ == '__main__':
+    # url='https://news-headlines.tradingview.com/v2/headlines?client=web&lang=zh-Hans&symbol=NASDAQ%3AAAPL'
+    # searchmsg=reqmsg(url)
+    # print(searchmsg)
+    # getStockFromSql()
+    while True:
+        try:
+            tradview_ticker=r.lpop('tradview_ticker')
+            if tradview_ticker:
+                tradviewticker = tradview_ticker.decode(errors='ignore')
+                ticker_param=str(tradviewticker).split('_')[0]
+                social_code=str(tradviewticker).split('_')[1]
+                url=f'https://news-headlines.tradingview.com/v2/headlines?client=web&lang=zh-Hans&symbol={tradview_ticker}'
+                searchmsg=reqmsg(url)
+                paserList(searchmsg,social_code)
+        except Exception as e:
+            log.info(f'redis中获取企业信息为空{e}')
+            break