import requests,time,re,random
from base import BaseCore
import pandas as pd
from bs4 import BeautifulSoup as bs
from comData.Tyc.getTycId import getTycIdByXYDM
baseCore = BaseCore.BaseCore()
cnx = baseCore.cnx
cursor = baseCore.cursor
log = baseCore.getLogger()
taskType = '天眼查专利/国内上市'


def spider_zhuanli(com_name, social_code, tycid, page, list_all_info):
    start_time = time.time()
    log.info(f'===正在处理第{page}页===')
    # list_all_info = []

    t = int(time.time() * 1000)
    header = {
        'Accept': 'application/json, text/plain, */*',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        'Content-Type': 'application/json',
        'Host': 'capi.tianyancha.com',
        'Origin': 'https://www.tianyancha.com',
        'Referer': 'https://www.tianyancha.com/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
        'X-AUTH-TOKEN': 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMzI3MzczNzEzMSIsImlhdCI6MTcwMzE1MjEzMSwiZXhwIjoxNzA1NzQ0MTMxfQ.3tF-UFhorC_mS4h2UIBOZamApfcaJEfjBbr8K11d2yHhELBM1pEvjd6yccxhLzVKRoyFdTn-1Cz6__ZpzgjnGg',
        'X-TYCID': '6f6298905d3011ee96146793e725899d',
        'sec-ch-ua': '"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'version': 'TYC-Web'
    }
    url = f'https://capi.tianyancha.com/cloud-intellectual-property/patent/patentListV6?_={t}&id={tycid}&pageSize=100&pageNum={page}&type=-100&lprs=-100&applyYear=-100&pubYear=-100&fullSearchText=&sortField=&sortType=-100'

    try:
        ip = baseCore.get_proxy()
    except:
        time.sleep(2)
        ip = baseCore.get_proxy()
    try:
        res_j = requests.get(url=url, headers=header, proxies=ip, verify=False).json()
    except:
        for i in range(3):
            try:
                res_j = requests.get(url=url, headers=header, verify=False).json()
            except:
                time.sleep(2)
                continue
    # print(res_j)
    try:
        list_all = res_j['data']['items']
    except:
        dic_info = {
            '企业名称': com_name,
            '统一信用代码': social_code
        }
        selectSql = f"select count(1) from zhuanli_sh_tyc where social_code='{social_code}' "
        cursor.execute(selectSql)
        count = cursor.fetchone()[0]
        if count > 0:
            log.info(f"{com_name}---{social_code}---已经存在---无专利")
            return 0
        else:
            values_tuple = tuple(dic_info.values())
            # log.info(f"{gpdm}-------{companyname}---新增")
            insertSql = f"insert into zhuanli_sh_tyc(com_name,social_code) values (%s,%s)"
            cursor.execute(insertSql, values_tuple)
            cnx.commit()
            log.info(f"{com_name}---{social_code}---新增---无专利")
        return 0
    # print(list_all)
    if list_all:
        for one_zhuanli in list_all:
            title = one_zhuanli['title']
            try:
                shenqingri = one_zhuanli['applicationTime']
            except:
                shenqingri = ''
            try:
                shenqing_code = one_zhuanli['patentNum']
            except:
                shenqing_code = ''
            try:
                leixing = one_zhuanli['patentType']
            except:
                leixing = ''
            try:
                status = one_zhuanli['lprs']
            except:
                status = ''
            try:
                gongkairi = one_zhuanli['pubDate']
            except:
                gongkairi = ''
            try:
                gongkai_code = one_zhuanli['pubnumber']
            except:
                gongkai_code = ''
            try:
                famingren = one_zhuanli['inventor']
            except:
                famingren = ''
            try:
                shenqingren = one_zhuanli['applicantName']
            except:
                shenqingren = ''
            try:
                gongneng = one_zhuanli['cat']
            except:
                gongneng = ''
            try:
                uuid = one_zhuanli['uuid']
            except:
                uuid = ''

            dic_info = {
                '企业名称': com_name,
                '统一信用代码': social_code,
                '专利名称': title,
                '申请日': shenqingri,
                '申请号': shenqing_code,
                '专利类型': leixing,
                '专利状态': status,
                '公开日': gongkairi,
                '公开号': gongkai_code,
                '发明人': famingren,
                '申请人': shenqingren,
                '功能': gongneng,
                '天眼查详情id': uuid,
                '年份': shenqingri[:4]
            }
            selectSql = f"select count(1) from zhuanli_sh_tyc where shenqing_code='{shenqing_code}' "
            cursor.execute(selectSql)
            count = cursor.fetchone()[0]
            if count > 0:
                log.info(f"{com_name}-------{shenqing_code}---已经存在")
                continue
            else:
                values_tuple = tuple(dic_info.values())
                # log.info(f"{gpdm}-------{companyname}---新增")
                insertSql = f"insert into zhuanli_sh_tyc(com_name,social_code,title,shenqingri,shenqing_code,leixing,status,gongkairi,gongkai_code,famingren,shenqingren,gongneng,uuid,year) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
                cursor.execute(insertSql, values_tuple)
                cnx.commit()
                log.info(f"{com_name}-------{shenqing_code}---新增")
            time.sleep(2)
            # list_all_info.append(dic_info)
        log.info(f"【{page}】-----------end,耗时{baseCore.getTimeCost(start_time, time.time())}")
        return page
    else:
        return 0

if __name__ == "__main__":
    while True:
        list_all_info = []
        # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
        social_code = baseCore.redicPullData('ZhuanLi:gnshSocial_code_zg500')
        # social_code = '91350700856994874M'
        # 判断 如果Redis中已经没有数据，则等待
        if social_code == None:
            # time.sleep(20)
            break
        start = time.time()
        try:
            data = baseCore.getInfomation(social_code)
            if len(data) != 0:
                pass
            else:
                # 数据重新塞入redis
                baseCore.rePutIntoR('ZhuanLi:gnshSocial_code_zg500', social_code)
                continue
            id = data[0]
            com_name = data[1]
            xydm = data[2]
            tycid = data[11]
            if tycid == None or tycid == '':
                try:
                    retData = getTycIdByXYDM(xydm)
                    if retData['tycData'] and retData['reput']:
                        tycid = retData['tycData']['id']
                        # todo:写入数据库
                        updateSql = f"update EnterpriseInfo set TYCID = '{tycid}' where SocialCode = '{xydm}'"
                        cursor.execute(updateSql)
                        cnx.commit()
                    elif not retData['tycData'] and retData['reput']:
                        state = 0
                        takeTime = baseCore.getTimeCost(start, time.time())
                        baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
                        log.info(f'======={social_code}====重新放入redis====')
                        baseCore.rePutIntoR('NewsEnterprise:gnqy_socialCode', social_code)
                        continue
                    elif not retData['reput'] and not retData['tycData']:
                        continue
                except:
                    state = 0
                    takeTime = baseCore.getTimeCost(start, time.time())
                    baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
                    baseCore.rePutIntoR('NewsEnterprise:gnqy_socialCode', social_code)
                    continue
            count = data[17]
            log.info(f"{id}---{xydm}----{tycid}----开始处理")
            page = 1
            while True:
                page = spider_zhuanli(com_name, xydm, tycid, page, list_all_info)
                if page != 0:
                    page += 1

                else:
                    # print(len(list_all_info))
                    # df_all_info = pd.DataFrame(list_all_info)
                    # df_all_info.to_excel('中国上市企业专利.xlsx', index=False)
                    log.info(f"{id}---{xydm}----{tycid}----结束处理")
                    break
        except Exception as e:
            log.info(f'==={social_code}=====获取企业信息失败==={e}=')
            # 重新塞入redis
            baseCore.rePutIntoR('ZhuanLi:gnshSocial_code_zg500', social_code)
            state = 0
            takeTime = baseCore.getTimeCost(start, time.time())
            baseCore.recordLog(social_code, taskType, state, takeTime, '', f'获取企业信息失败--{e}')
            time.sleep(5)
