# -*- coding: utf-8 -*-
"""
任务集成测试
1、连接redis做取出
2、连接kafka做信息的获取，与存储

"""
import time

import redis
from kafka import KafkaProducer
from kafka import KafkaConsumer
import json
import itertools
from baiduSpider import BaiduSpider

import concurrent.futures
from baseCore import BaseCore
from queue import Queue
import configparser

class BaiduTaskJob(object):
    def __init__(self):
        # 创建ConfigParser对象
        self.config = configparser.ConfigParser()
        # 读取配置文件
        self.config.read('config.ini')
        self.r = redis.Redis(host=self.config.get('redis', 'host'),
                             port=self.config.get('redis', 'port'),
                             password=self.config.get('redis', 'pass'), db=0)

    def getkafka(self):
        # Kafka集群的地址
        bootstrap_servers = self.config.get('kafka', 'bootstrap_servers')
        # 要订阅的主题
        topic = self.config.get('kafka', 'topic')
        groupId=self.config.get('kafka', 'groupId')
        consumer = KafkaConsumer(topic, group_id=groupId,
                                 bootstrap_servers=[bootstrap_servers],
                                 value_deserializer=lambda m: json.loads(m.decode('utf-8')))
        try:
            for record in consumer:
                try:
                    logger.info("value:",record.value)
                    keymsg=record.value
                    if keymsg:
                        break
                    else:
                        continue
                #print("%s:%d:%d: key=%s value=%s" % (msg.topic, msg.partition, msg.offset, msg.key, msg.value))
                except Exception as e:
                    logger.info("msg.value error:",e)
        except KeyboardInterrupt as e:
            keymsg={}
        finally:
            consumer.close()

        return keymsg


    def getkeyFromredis(self,codeid):

        kvalue=self.r.get('KEY_WORDS_TO_REDIS::'+codeid)
        kvalue=kvalue.decode('utf-8')
        kvalue=json.loads(kvalue)
        return kvalue

    def getkeywords(self,keywords):
        kwList=[]
        if ')+(' in keywords:
            k1List=keywords.split('+')
            kk2=[]
            for k2 in k1List:
                k2=k2.strip("()")
                k2List=k2.split('|')
                kk2.append(k2List)
            if len(kk2)==2:
                result = list(itertools.product(kk2[0], kk2[1]))
            elif len(kk2)==3:
                result = list(itertools.product(kk2[0], kk2[1],kk2[2]))
            elif len(kk2)==4:
                result = list(itertools.product(kk2[0], kk2[1],kk2[2],kk2[3]))


            for res in result:
                kwstr=''
                for kw in res:
                    kwstr+=kw+"+"
                kwList.append(kwstr.strip('+'))
        elif '+(' in keywords:
            k1List=keywords.split('+')
            kk2=[]
            for k2 in k1List:
                k2=k2.strip("()")
                k2List=k2.split('|')
                kk2.append(k2List)
            if len(kk2)==2:
                result = list(itertools.product(kk2[0], kk2[1]))

            for res in result:
                kwstr=''
                for kw in res:
                    kwstr+=kw+"+"
                kwList.append(kwstr.strip('+'))
        else:
            k3=keywords.split("|")
            kwList=k3
        return kwList


    def paserKeyMsg(self,keymsg):
        logger.info('----------')
        wordsCode=keymsg['wordsCode']
        id=keymsg['id']
        try:
            searchEngines=keymsg['searchEngines']
        except Exception as e:
            searchEngines=[]
        kwList=[]
        if searchEngines:
            if '3' in searchEngines:
                keyword=keymsg['keyWord']
                keymsglist=self.getkeywords(keyword)
                for kw in keymsglist:
                    kwmsg={
                        'kw':kw,
                        'wordsCode':wordsCode,
                        'sid':id
                    }
                    kwList.append(kwmsg)
            else:
                pass
                # logger.info('+++++')
                # keyword=keymsg['keyWord']
                # keymsglist=self.getkeywords(keyword)
                # for kw in keymsglist:
                #     kwmsg={
                #         'kw':kw,
                #         'wordsCode':wordsCode,
                #         'sid':id
                #     }
                #     kwList.append(kwmsg)

        return kwList

    # def runSpider(self,kwmsg):
    #     try:
    #         searchkw=kwmsg['kw']
    #         wordsCode=kwmsg['wordsCode']
    #         sid=kwmsg['sid']
    #
    #         baiduSpider=BaiduSpider(searchkw,wordsCode,sid)
    #         baiduSpider.get_page_html()
    #         baiduSpider.get_detail_html()
    #     except Exception as e:
    #         logger.info('百度搜索异常'+searchkw)
    #     finally:
    #         baiduSpider.driver.quit()
    #     logger.info("关键词采集结束！"+searchkw)
    def runSpider(self,kwmsg):

        searchkw=kwmsg['kw']
        wordsCode=kwmsg['wordsCode']
        sid=kwmsg['sid']
        baiduSpider=BaiduSpider(searchkw,wordsCode,sid)

        try:
            baiduSpider.get_page_html()
        except Exception as e:
            logger.info('百度搜索异常'+searchkw)
        finally:
            baiduSpider.driver.quit()
        if baiduSpider.detailList.qsize() != 0:
            try:
                baiduSpider.get_detail_html()
            except Exception as e:
                logger.info('详情解析异常'+searchkw)
            finally:
                baiduSpider.driver.quit()
        logger.info("关键词采集结束！"+searchkw)
if __name__ == '__main__':
    # ss='道地西洋参+(销售市场|交易市场|直播带货|借助大会平台|网店|微商|电商|农民博主|推介宣传|高品质定位|西洋参产品经营者加盟|引进龙头企业|西洋参冷风库|建设农旅中心|农产品展销中心|精品民宿|温泉)'
    # keymsglist=getkeywords(ss)
    # print(keymsglist)
    # 创建Redis连接

    baiduTaskJob=BaiduTaskJob()
    baseCore=BaseCore()
    logger=baseCore.getLogger()
    print('---------------')
    while True:
        try:
            try:
                keymsg=baiduTaskJob.getkafka()
                kwList=baiduTaskJob.paserKeyMsg(keymsg)
            except Exception as e:
                logger.info("从kafka拿取信息失败！")
                time.sleep(5)
                continue
            if kwList:
                # 创建一个线程池，指定线程数量为4
                with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
                    # 提交任务给线程池，每个任务处理一个数据
                    results = [executor.submit(baiduTaskJob.runSpider, data) for data in kwList]
                    # 获取任务的执行结果
                    for future in concurrent.futures.as_completed(results):
                        try:
                            result = future.result()
                            # 处理任务的执行结果
                            logger.info(f"任务执行结束: {result}")
                        except Exception as e:
                            # 处理任务执行过程中的异常
                            logger.info(f"任务执行exception: {e}")
        except Exception as e:
            logger.info('采集异常')

