import functools
import random
import threading
import traceback

import pymysql
import requests,time
from base import BaseCore
import concurrent.futures

from comData.Tyc.getTycId import getTycIdByXYDM
baseCore = BaseCore.BaseCore()
# cnx = baseCore.cnx
# cursor = baseCore.cursor
log = baseCore.getLogger()
taskType = '天眼查专利/国内榜单'
# 需调整放入国外和国内的redis
# 设置一个全局变量用于控制线程退出
should_exit = False
def connectSql():
    cnx = pymysql.connect(host='114.115.159.144', user='caiji', password='zzsn9988', db='caiji',
                                   charset='utf8mb4')
    cursor = cnx.cursor()
    return cnx,cursor

#关闭数据库连接
def closeSql(cnx,cursor):
    cnx.close()
    cursor.close()

# 获取代理
def get_proxy():
    cnx,cursor = connectSql()
    sql = "select proxy from clb_proxy"
    cursor.execute(sql)
    proxy_lists = cursor.fetchall()
    cnx.commit()
    closeSql(cnx,cursor)
    ip_list = []
    for proxy_ in proxy_lists:
        ip_list.append(str(proxy_).replace("('", '').replace("',)", ''))
    proxy_list = []
    for str_ip in ip_list:
        str_ip_list = str_ip.split('-')
        proxyMeta = "http://%(host)s:%(port)s" % {
            "host": str_ip_list[0],
            "port": str_ip_list[1],
        }
        proxy = {
            "http": proxyMeta,
            "https": proxyMeta
        }
        proxy_list.append(proxy)
    return proxy_list[random.randint(0, 4)]

def spider_zhuanli(com_name, social_code, tycid):
    page = 1
    start_time = time.time()

    t = int(time.time() * 1000)
    header = {
        'Accept': 'application/json, text/plain, */*',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        'Content-Type': 'application/json',
        'Host': 'capi.tianyancha.com',
        'Origin': 'https://www.tianyancha.com',
        'Referer': 'https://www.tianyancha.com/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
        'X-AUTH-TOKEN': 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMzI3MzczNzEzMSIsImlhdCI6MTcwMzE1MjEzMSwiZXhwIjoxNzA1NzQ0MTMxfQ.3tF-UFhorC_mS4h2UIBOZamApfcaJEfjBbr8K11d2yHhELBM1pEvjd6yccxhLzVKRoyFdTn-1Cz6__ZpzgjnGg',
        'X-TYCID': '6f6298905d3011ee96146793e725899d',
        'sec-ch-ua': '"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'version': 'TYC-Web'
    }
    while True:
        log.info(f'===正在处理第{page}页===')
        url = f'https://capi.tianyancha.com/cloud-intellectual-property/patent/patentListV6?_={t}&id={tycid}&pageSize=100&pageNum={page}&type=-100&lprs=-100&applyYear=-100&pubYear=-100&fullSearchText=&sortField=&sortType=-100'
        try:
            ip = get_proxy()
        except:
            time.sleep(2)
            ip = get_proxy()
        try:
            res_j = requests.get(url=url, headers=header, proxies=ip, verify=False).json()
        except:
            for i in range(3):
                try:
                    res_j = requests.get(url=url, headers=header, verify=False).json()
                except:
                    time.sleep(2)
                    continue
        # print(res_j)
        try:
            list_all = res_j['data']['items']
        except:
            dic_info = {
                '企业名称': com_name,
                '统一信用代码': social_code
            }
            cnx, cursor = connectSql()
            selectSql = f"select count(1) from zhuanli_sh_tyc where social_code='{social_code}' "
            # lock.acquire()
            cursor.execute(selectSql)
            count = cursor.fetchone()[0]
            closeSql(cnx, cursor)
            # lock.release()
            if count > 0:
                log.info(f"{com_name}---{social_code}---已经存在---无专利")
                log.info(f"---{social_code}----{tycid}--共{page-1}页--结束处理")
                break
            else:
                values_tuple = tuple(dic_info.values())
                # log.info(f"{gpdm}-------{companyname}---新增")
                cnx, cursor = connectSql()
                insertSql = f"insert into zhuanli_sh_tyc(com_name,social_code) values (%s,%s)"
                # lock.acquire()
                cursor.execute(insertSql, values_tuple)
                cnx.commit()
                # lock.release()
                closeSql(cnx,cursor)
                log.info(f"{com_name}---{social_code}---新增---无专利")
                log.info(f"---{social_code}----{tycid}--共{page-1}页--结束处理")
                break

        if list_all:
            for one_zhuanli in list_all:
                title = one_zhuanli['title']
                try:
                    shenqingri = one_zhuanli['applicationTime']
                except:
                    shenqingri = ''
                try:
                    shenqing_code = one_zhuanli['patentNum']
                except:
                    shenqing_code = ''
                try:
                    leixing = one_zhuanli['patentType']
                except:
                    leixing = ''
                try:
                    status = one_zhuanli['lprs']
                except:
                    status = ''
                try:
                    gongkairi = one_zhuanli['pubDate']
                except:
                    gongkairi = ''
                try:
                    gongkai_code = one_zhuanli['pubnumber']
                except:
                    gongkai_code = ''
                try:
                    famingren = one_zhuanli['inventor']
                except:
                    famingren = ''
                try:
                    shenqingren = one_zhuanli['applicantName']
                except:
                    shenqingren = ''
                try:
                    gongneng = one_zhuanli['cat']
                except:
                    gongneng = ''
                try:
                    uuid = one_zhuanli['uuid']
                except:
                    uuid = ''

                dic_info = {
                    '企业名称': com_name,
                    '统一信用代码': social_code,
                    '专利名称': title,
                    '申请日': shenqingri,
                    '申请号': shenqing_code,
                    '专利类型': leixing,
                    '专利状态': status,
                    '公开日': gongkairi,
                    '公开号': gongkai_code,
                    '发明人': famingren,
                    '申请人': shenqingren,
                    '功能': gongneng,
                    '天眼查详情id': uuid,
                    '年份': shenqingri[:4]
                }
                cnx, cursor = connectSql()
                selectSql = f"select count(1) from zhuanli_sh_tyc where shenqing_code='{shenqing_code}' "
                # lock.acquire()
                cursor.execute(selectSql)
                count = cursor.fetchone()[0]
                # lock.release()
                closeSql(cnx,cursor)
                if count > 0:
                    log.info(f"{com_name}-------{shenqing_code}---已经存在")
                else:
                    values_tuple = tuple(dic_info.values())
                    # log.info(f"{gpdm}-------{companyname}---新增")
                    cnx,cursor = connectSql()
                    insertSql = f"insert into zhuanli_sh_tyc(com_name,social_code,title,shenqingri,shenqing_code,leixing,status,gongkairi,gongkai_code,famingren,shenqingren,gongneng,uuid,year) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
                    cursor.execute(insertSql, values_tuple)
                    cnx.commit()
                    closeSql(cnx,cursor)
                    log.info(f"{com_name}-------{shenqing_code}---新增")
                time.sleep(2)
            log.info(f"【{page}】-----------end,耗时{baseCore.getTimeCost(start_time, time.time())}")
            page+=1
        else:
            log.info(f"---{social_code}----{tycid}--共{page}页--结束处理")
            break

def runSpider():
    # 根据从Redis中拿到的社会信用代码, 在数据库中获取对应基本信息
    social_code = baseCore.redicPullData('ZhuanLi:gnshSocial_code')
    # social_code = '91360400794798498A'
    # 判断 如果Redis中已经没有数据，则等待
    if social_code == None:
        # time.sleep(20)
        # 任务执行结束后设置should_exit为True
        global should_exit
        should_exit = True

    start = time.time()
    try:
        data = baseCore.getInfomation(social_code)
        if len(data) != 0:
            pass
        else:
            # 数据重新塞入redis
            baseCore.rePutIntoR('ZhuanLi:gnshSocial_code', social_code)
            return False
        id = data[0]
        com_name = data[1]
        tycid = data[11]
        place = data[6]
        if place != 1:
            baseCore.rePutIntoR('Zhuanli:gwSocial_code', social_code)
            return False
        if tycid == None or tycid == '':
            try:
                retData = getTycIdByXYDM(social_code)
                if retData['tycData'] and retData['reput']:
                    tycid = retData['tycData']['id']
                    # todo:写入数据库
                    cnx,cursor = connectSql()
                    updateSql = f"update EnterpriseInfo set TYCID = '{tycid}' where SocialCode = '{social_code}'"
                    cursor.execute(updateSql)
                    cnx.commit()
                    closeSql(cnx,cursor)
                elif not retData['tycData'] and retData['reput']:
                    state = 0
                    takeTime = baseCore.getTimeCost(start, time.time())
                    baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
                    log.info(f'======={social_code}====重新放入redis====')
                    baseCore.rePutIntoR('ZhuanLi:gnshSocial_code', social_code)
                    return False
                elif not retData['reput'] and not retData['tycData']:
                    return False
            except:
                state = 0
                takeTime = baseCore.getTimeCost(start, time.time())
                baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
                baseCore.rePutIntoR('ZhuanLi:gnshSocial_code', social_code)
                return False

        log.info(f"{id}---{social_code}----{tycid}----开始处理")

        spider_zhuanli(com_name, social_code, tycid)

    except Exception as e:

        traceback.print_exc()
        log.info(f'==={social_code}=====获取企业信息失败==={e}=')
        # 重新塞入redis
        baseCore.rePutIntoR('ZhuanLi:gnshSocial_code', social_code)
        state = 0
        takeTime = baseCore.getTimeCost(start, time.time())
        baseCore.recordLog(social_code, taskType, state, takeTime, '', f'获取企业信息失败--{e}')
        time.sleep(5)
    finally:
        # global should_exit
        # should_exit = True
        return

# if __name__ == "__main__":
#     while True:
#         # 创建一个线程池，指定线程数量为4
#         with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
#             results = []
#             while True:
#                 # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
#                 social_code = baseCore.redicPullData('ZhuanLi:gnshSocial_code')
#                 # social_code = '91350700856994874M'
#                 # 判断 如果Redis中已经没有数据，则等待
#                 if social_code == None:
#                     # time.sleep(20)
#                     break
#
#                 future = executor.submit(runSpider, social_code)
#                 results.append(future)
#                 # 获取任务的执行结果
#             for future in concurrent.futures.as_completed(results):
#                 try:
#                     result = future.result()
#                     # 处理任务的执行结果
#                     print(f"任务执行结束: {result}")
#                 except Exception as e:
#                     # 处理任务执行过程中的异常
#                     # print(f"任务执行exception: {e}")
#                     traceback.print_exc()

def run_threads(num_threads):
    threads = []
    for i in range(num_threads):

        thread = threading.Thread(target=runSpider)

        threads.append(thread)
        thread.start()
    # while True:
    #     if should_exit:
    #         break
    for thread in threads:
        thread.join()

if __name__ == '__main__':
    while True:

        start = time.time()
        num_threads = 5
        run_threads(num_threads)
        log.info(f'5线程 总耗时{time.time()-start}秒')
