
"""
采集企业信用代码和企业标签
"""
import json
import time

import requests
import urllib3
from bs4 import BeautifulSoup
from retry import retry

from getTycId import getTycIdByXYDM
from base import BaseCore

from selenium import webdriver
from classtool import Token, Info

token = Token()
info = Info()
baseCore = BaseCore.BaseCore()
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
log = baseCore.getLogger()


def create_driver():
    path = r'D:\soft\msedgedriver.exe'

    # options = webdriver.EdgeOptions()
    options = {
        "browserName": "MicrosoftEdge",
        "ms:edgeOptions": {
            "extensions": [], "args": ["--start-maximized"]  # 添加最大化窗口运作参数
        }
    }

    session = webdriver.Edge(executable_path=path, capabilities=options)
    return session


def login(driver):
    cookies = {}
    cookies_list, id_cookie, user_name = token.get_cookies()
    if cookies_list:
        pass
    else:
        log.info("没有账号了,等待30分钟")
        time.sleep(30 * 60)
        return '', '', ''
    log.info(f'=====当前使用的是{user_name}的cookie======')
    for cookie in cookies_list:
        driver.add_cookie(cookie)
    time.sleep(3)
    driver.refresh()
    # time.sleep(3)
    for cookie in cookies_list:
        cookies[cookie['name']] = cookie['value']
    s = requests.Session()
    s.cookies.update(cookies)
    time.sleep(3)
    return driver, id_cookie, s


@retry(tries=3, delay=1)
def get_html(tycid, driver, dic_info):
    url = f"https://www.tianyancha.com/company/{tycid}"
    driver.get(url=url)
    time.sleep(3)
    page_source = driver.page_source

    soup = BeautifulSoup(page_source, 'html.parser')

    script = soup.find('script', attrs={'id': '__NEXT_DATA__'}).text
    script = json.loads(script)
    script = script['props']['pageProps']['dehydratedState']['queries'][0]['state']['data']['data']['tagListV2']
    tag_list = []
    filter_list = ['存续', '曾用名', '竞争风险', '司法案件', '合作风险', '股权出质', '仍注册']
    for tag in script:
        if tag['title'] in filter_list:
            continue
        if tag['color'] == '#FF463C':
            continue
        tag_list.append(tag['title'])
    dic_info['股东企业标签'] = tag_list
    return dic_info

if __name__ == "__main__":

    driver = create_driver()
    url = 'https://www.tianyancha.com/'
    driver.get(url)
    driver.maximize_window()
    while True:
        item = baseCore.redicPullData('shareHolderInfo')
        driver, id_cookie, s = login(driver)
        if id_cookie:
            pass
        else:
            continue
        com_name = item.split('|')[1]
        # com_name = '杭州君瀚股权投资合伙企业（有限合伙）'
        no = item.split('|')[0]
        # no = '3'
        xydm = ''
        tycid = ''
        dic_info = {}
        # time.sleep(3)
        try:
            retData = getTycIdByXYDM(com_name, s)
        except:
            retData = {}
            log.info('获取天眼查ID失败')
        if retData:
            log.info(f'retData: {retData}')
            if retData['state']:
                tycid = retData['tycData']['id']
                xydm = retData['tycData']['taxCode']
            else:
                token.updateTokeen(id_cookie, 3)
                log.info(f'{com_name} 重新放入redis')
                baseCore.rePutIntoR('shareHorder', item)
                continue
            if xydm:
                pass
            else:
                log.info('未找到该企业,或该企业没有信用代码')

            dic_info['股东企业信用代码'] = xydm

            dic_result = get_html(tycid, driver, dic_info)
            # 存储数据库
            info.update_info(no, dic_result)
            token.updateTokeen(id_cookie, 3)
            log.info(f'{xydm}---{com_name}---更新完成')
            time.sleep(1)
        else:
            token.updateTokeen(id_cookie, 3)
            log.info(f'{com_name} 重新放入redis')
            baseCore.rePutIntoR('shareHorder', item)
        # break