import json
import time

import numpy as np
import pandas as pd
import pymysql
import requests
from bs4 import BeautifulSoup
from kafka import KafkaProducer
from NewsYahoo import news

from base.BaseCore import BaseCore
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

taskType = '企业基本信息/雅虎财经'

baseCore = BaseCore()
r = baseCore.r
log = baseCore.getLogger()
headers = {
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
    'cache-control': 'max-age=0',
    # 'cookie': 'maex=%7B%22v2%22%3A%7B%7D%7D; GUC=AQEBBwFjY49jkEIa8gQo&s=AQAAABw20C7P&g=Y2JIFQ; A1=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc; A3=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc; A1S=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc&j=WORLD; PRF=t%3D6954.T%252BTEL%252BSOLB.BR%252BSTM%252BEMR%252BGT%252BAMD%252BSYM.DE%252BPEMEX%252BSGO.PA%252BLRLCF%252BSYNH%252B001040.KS; cmp=t=1669714927&j=0&u=1---',
    'sec-ch-ua': '"Chromium";v="106", "Google Chrome";v="106", "Not;A=Brand";v="99"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': "Windows",
    'sec-fetch-dest': 'document',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
}


# 根据股票代码 获取企业基本信息 高管信息
def getInfo(name,enname,gpdm, xydm, start):
    if 'HK' in str(gpdm):
        tmp_g = str(gpdm).split('.')[0]
        if len(tmp_g) == 5:
            gpdm_ = str(gpdm)[1:]
        else:
            pass
    else:
        gpdm_ = gpdm
    retData = {}
    retData['base_info'] = {
        '公司名称': name,
        '英文名': enname,
        '信用代码': xydm,
        '股票代码': gpdm,
        '地址': '',
        '电话': '',
        '公司网站': '',
        '部门': '',
        '行业': '',
        '员工人数': '',
        '公司简介': ''
    }
    retData['people_info'] = []
    # https://finance.yahoo.com/quote/VOW3.DE/profile?p=VOW3.DE
    url = f'https://finance.yahoo.com/quote/{gpdm_}/profile?p={gpdm_}'

    time.sleep(3)
    for i in range(0, 3):
        try:
            response = requests.get(url, headers=headers, verify=False)
            time.sleep(1)
            if (response.status_code == 200):
                break
            else:
                log.error(f"{gpdm}---第{i}次---获取基本信息接口返回失败：{response.status_code}")
        except:
            continue

    if (response.status_code == 200):
        pass
    else:
        log.error(f"{gpdm}------获取基本信息接口重试后依然失败失败：{response.status_code}")
        exeception = '获取基本信息接口返回失败'
        state = 0
        takeTime = baseCore.getTimeCost(start, time.time())
        baseCore.recordLog(xydm, taskType, state, takeTime, url, exeception)
        rePutIntoR('')
        return [state,retData]

    state = 1
    soup = BeautifulSoup(response.content, 'html.parser')
    page = soup.find('div', {'id': 'Col1-0-Profile-Proxy'})
    try:
        com_info = page.find('div', {'class': 'Mb(25px)'})
    except:
        com_info = ''
    try:
        com_phone = com_info.find_all('p')[0].find('a').text
    except:
        com_phone = ''
    try:
        com_url = com_info.find_all('p')[0].find('a', {'target': '_blank'}).text
    except:
        com_url = ''
    try:
        com_address = com_info.find_all('p')[0].text.replace(com_phone, '').replace(com_url, '')
    except:
        com_address = ''
    try:
        com_bumen = com_info.find_all('p')[1].find_all('span', {'class': 'Fw(600)'})[0].text
    except:
        com_bumen = ''
    try:
        com_hangye = com_info.find_all('p')[1].find_all('span', {'class': 'Fw(600)'})[1].text
    except:
        com_hangye = ''
    try:
        com_people = com_info.find_all('p')[1].find_all('span', {'class': 'Fw(600)'})[2].text
    except:
        com_people = ''
    try:
        com_jianjie = page.find('p', {'class': 'Mt(15px) Lh(1.6)'}).text
    except:
        com_jianjie = ''
    dic_com_info = {
        '公司名称': name,
        '英文名': enname,
        '信用代码': xydm,
        '股票代码': gpdm,
        '地址': com_address,
        '电话': com_phone,
        '公司网站': com_url,
        '部门': com_bumen,
        '行业': com_hangye,
        '员工人数': com_people,
        '公司简介': com_jianjie
    }
    retData['base_info'] = dic_com_info
    # 高管信息
    retPeople = []
    try:
        list_people = page.find('table', {'class': 'W(100%)'}).find_all('tr')[1:]
    except:
        list_people = []
    for one_people in list_people:
        try:
            p_name = one_people.find_all('td')[0].text
        except:
            p_name = ''
            continue
        try:
            p_zhiwu = one_people.find_all('td')[1].text
        except:
            p_zhiwu = ''
        try:
            p_money = one_people.find_all('td')[2].text
        except:
            p_money = ''
        try:
            p_xingshi = one_people.find_all('td')[3].text
        except:
            p_xingshi = ''
        try:
            p_year = one_people.find_all('td')[4].text
        except:
            p_year = ''

        if (p_zhiwu == "N/A"):
            p_zhiwu = ""
        if (p_money == "N/A"):
            p_money = ""
        if (p_xingshi == "N/A"):
            p_xingshi = ""
        if (p_year == "N/A"):
            p_year = ""
        dic_main_people = {
            '公司名称': name,
            '股票代码': gpdm,
            '信用代码': xydm,
            '姓名': p_name,
            '职务': p_zhiwu,
            '薪资': p_money,
            '行使': p_xingshi,
            '出生年份': p_year
        }
        retPeople.append(dic_main_people)
    retData['people_info'] = retPeople
    log.info(f"获取基本信息--{gpdm}，耗时{baseCore.getTimeCost(start, time.time())}")
    response.close()
    return [state,retData]


# 保存基本信息
def saveBaseInfo(info,start):
    # 基本信息发送到kafka
    company_dict = {
        'name': info['base_info']['公司名称'],  # 企业名称
        'shortName': '',  # 企业简称
        'socialCreditCode': info['base_info']['信用代码'],  # 统一社会信用代码
        'officialPhone': info['base_info']['电话'],  # 电话
        'officialUrl': info['base_info']['公司网站'],  # 官网
        'briefInfo': info['base_info']['公司简介'],  # 简介
        'industry': info['base_info']['行业'],  # 所属行业
        'englishName': info['base_info']['英文名'],  # 英文名
        'address': info['base_info']['地址'],  # 地址
        'status': 0,  # 状态
    }
    # print(company_dict)
    producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'], api_version=(2, 0, 2))
    kafka_result = producer.send("regionInfo", json.dumps(company_dict, ensure_ascii=False).encode('utf8'))
    kafka_result.get(timeout=10)
    log.info(f"保存基本信息--{info['base_info']['信用代码']}，耗时{baseCore.getTimeCost(start, time.time())}")
    log.info(f"保存基本信息--{company_dict['name']}，耗时{baseCore.getTimeCost(start, time.time())}")


# 保存高管信息
def savePeopleInfo(info,start):
    # 高管信息调用接口
    list_people = info['people_info']
    list_one_info = []
    for i in range(0, len(list_people)):
        dic_json = {
            "socialCreditCode": list_people[i]['信用代码'],
            "name": list_people[i]['姓名'],
            "sex": '',
            "education": '',
            "position": list_people[i]['职务'],
            "salary": list_people[i]['薪资'],
            "birthYear": list_people[i]['出生年份'],
            "shareNum": '',
            "shareRatio": '',
            "benefitShare": '',
            "currentTerm": '',
            "personInfo": '',
            "sort": str(i + 1)
        }
        list_one_info.append(dic_json)
    json_updata = json.dumps(list_one_info)
    # print(json_updata)
    if json_updata == '[]':
        pass
    else:
        for i in range(0, 3):
            response = requests.post('http://114.115.236.206:9988/datapull/sync/executive', data=json_updata,
                                     timeout=300, verify=False)
            if (response.status_code == 200):
                retJson = json.loads(response.content.decode('utf-8'))
                if (retJson['success'] or retJson['success'] == 'true'):
                    break

        if (response.status_code == 200):
            retJson = json.loads(response.content.decode('utf-8'))
            if (retJson['success'] or retJson['success'] == 'true'):
                pass
            else:
                log.error("保存高管接口失败---{retJson}")
                exception = '保存高管接口失败'
                state = 0
                takeTime = baseCore.getTimeCost(start, time.time())
                baseCore.recordLog(dic_json['socialCreditCode'], taskType, state, takeTime, '', exception)
                return state
        else:
            log.error("保存高管接口失败---{response.status_code}")
            exception = '保存高管接口失败'
            state = 0
            takeTime = baseCore.getTimeCost(start, time.time())
            baseCore.recordLog(dic_json['socialCreditCode'], taskType, state, takeTime, '', exception)
            return state

    state = 1
    log.info(f"保存高管信息--{info['base_info']['信用代码']}，耗时{baseCore.getTimeCost(start, time.time())}")
    return state


def rePutIntoR(item):
    r.rpush('BaseInfoEnterprise:gwqy_socialCode', item)


# def getInfomation(social_code):
#     sql = f"SELECT * FROM EnterpriseInfo WHERE SocialCode = '{social_code}'"
#     cursor.execute(sql)
#     data = cursor.fetchone()
#     return data



# 采集工作
def beginWork():
    while True:
        social_code = baseCore.redicPullData('BaseInfoEnterprise:gwqy_socialCode')
        if not social_code:
            time.sleep(20)
            continue
        if social_code == 'None':
            time.sleep(20)
            continue
        # 数据库中获取基本信息
        data = baseCore.getInfomation(social_code)
        name = data[1]
        enname = data[5]
        gpdm = data[3]
        xydm = data[2]

        # 获取该企业对应项目的采集次数
        count = data[13]
        start_time = time.time()
        # 股票代码为空跳过
        if gpdm is None:
            log.error(f"{name}--股票代码为空 跳过")
            exception = '股票代码为空'
            state = 0
            takeTime = baseCore.getTimeCost(start_time, time.time())
            baseCore.recordLog(xydm, taskType, state, takeTime, '', exception)
            continue
        try:
            retData = getInfo(name,enname,gpdm, xydm, start_time)
            # 基本信息采集成功 进行数据入库,否则不入库
            if retData[0] == 1:
                # 企业基本信息入库
                try:
                    saveBaseInfo(retData[1],start_time)
                except:
                    log.error(f'{name}....企业基本信息Kafka操作失败')
                    exception = 'Kafka操作失败'
                    state = 0
                    takeTime = baseCore.getTimeCost(start_time, time.time())
                    baseCore.recordLog(xydm, taskType, state, takeTime, '', exception)
                #   企业高管信息入库
                state = savePeopleInfo(retData[1],start_time)
                # 只有企业高管信息和企业基本信息都采集到,该企业才算采集成功
                if state == 1:
                    takeTime = baseCore.getTimeCost(start_time, time.time())
                    baseCore.recordLog(xydm, taskType, state, takeTime, '', '')
                else:
                    pass
            else:
                pass
        except Exception as e:
            # 若出现尚未发现的错误,则保存错误信息以及出错位置
            ee = e.__traceback__.tb_lineno
            log.error(f'{name}...{xydm}...{gpdm}.....数据采集失败,原因:{ee}行 {e}')
            state = 0
            takeTime = baseCore.getTimeCost(start_time, time.time())
            baseCore.recordLog(xydm, taskType, state, takeTime, '', f'数据采集失败,原因:{ee}行 {e}')

        # 企业数据采集完成,采集次数加一
        count += 1
        runType = 'BaseInfoRunCount'
        baseCore.updateRun(social_code,runType,count)

    # 释放资源
    baseCore.close()


if __name__ == '__main__':
    cnx = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji',charset='utf8mb4')
    cursor = cnx.cursor()
    beginWork()
    cursor.close()
    cnx.close()
