import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import configparser
import redis
import concurrent.futures
from pyquery import PyQuery as pq
class hgCookie(object):

    def __init__(self):
        # 创建ConfigParser对象
        self.config = configparser.ConfigParser()
        # 读取配置文件
        self.config.read('config.ini')
        self.r = redis.Redis(host=self.config.get('redis', 'host'),
                             port=self.config.get('redis', 'port'),
                             password=self.config.get('redis', 'pass'), db=0)
        self.bin_path=self.config.get('selenium', 'binary_location')
        self.driver_path=self.config.get('selenium', 'chrome_driver')

    def get_webdriver(self):
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--ignore-certificate-errors')
        chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
        chrome_options.add_argument("--disable-blink-features=AutomationControlled")
        chrome_options.add_argument("--start-maximized")
        # chrome_options.add_argument('--headless')
        chrome_options.binary_location = self.bin_path
        executable_path =self.driver_path
        driver = webdriver.Chrome(options=chrome_options, executable_path=executable_path)
        return driver

    def reqGetCookie(self):
        url='http://stats.customs.gov.cn/queryData/queryDataByWhere'
        driver=self.get_webdriver()
        driver.get(url)
        # 等待页面加载完成
        wait = WebDriverWait(driver, 10)  # 设置最长等待时间为10秒
        wait.until(EC.presence_of_element_located((By.ID, "doSearch")))  # 等待元素出现在页面中
        # 获取页面加载的所有cookie信息
        cookies = driver.get_cookies()
        # 构建cookie字符串
        cookie_str = '; '.join([f"{cookie['name']}={cookie['value']}" for cookie in cookies])
        # 打印cookie字符串
        self.r.sadd('hgcookie',cookie_str)
        driver.quit()

    def runSpider(self,i):
        print(i)
        self.reqGetCookie()

    def getnewMonth(self):
        url='http://stats.customs.gov.cn/queryData/queryDataByWhere'
        driver=self.get_webdriver()
        driver.get(url)
        # 等待页面加载完成
        wait = WebDriverWait(driver, 10)  # 设置最长等待时间为10秒
        wait.until(EC.presence_of_element_located((By.ID, "doSearch")))  # 等待元素出现在页面中
        html=driver.page_source
        doc=pq(html)
        endMonth=doc('select[id="endMonth"]>option[selected="selected"]').text()
        print(f'海关页面的月份{endMonth}')
        self.r.set('newMonth',endMonth)
        driver.quit()

if __name__ == '__main__':
    hgCookie=hgCookie()
    hgCookie.getnewMonth()

    while True:
        size=hgCookie.r.scard('hgcookie')

        print(f'海关的cookie数量：{size}')
        if size>100:
            time.sleep(60)
        kwList=[]
        for i in range(1, 101):
            kwList.append(i)
        if kwList:
            # 创建一个线程池，指定线程数量为4
            with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
                # 提交任务给线程池，每个任务处理一个数据
                results = [executor.submit(hgCookie.runSpider, data) for data in kwList]
                # 获取任务的执行结果
                for future in concurrent.futures.as_completed(results):
                    try:
                        result = future.result()
                        # 处理任务的执行结果
                        print(f"任务执行结束: {result}")
                    except Exception as e:
                        # 处理任务执行过程中的异常
                        print(f"任务执行exception: {e}")