# -*- coding: utf-8 -*-
# @Author: MENG
# @Time  : 2022-4-9
import datetime

import xlrd
from selenium.webdriver.support.wait import WebDriverWait
from tqdm import tqdm
import pymongo
import pymysql
import time
import requests
from pyquery import PyQuery as pq
from selenium import webdriver
import json
from requests.packages import urllib3
urllib3.disable_warnings()
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import re
from BaseCore import BaseCore
import configparser
import redis
"""
将请求链接分成三个
https://finance.yahoo.com/quote/WMT/financials?p=WMT
https://finance.yahoo.com/quote/WMT/balance-sheet?p=WMT
https://finance.yahoo.com/quote/WMT/cash-flow?p=WMT

雅虎财务数据流程修改
1.从sys_base_enterprise_ipo获取到国外上市和台湾企业的股票代码 category 5,6
2.从雅虎财经上请求获取财务数据和币种单位。
3.将数据插入更新到表config_finance_data_sync 
信息更新的字段 
social_credit_code
name
stock_code
content
level_relation
unit
create_time
4.将采集结果通知接口进行拉取数据处理

"""

class YahooCaiwu(object):

    def __init__(self):
        self.config = configparser.ConfigParser()
        # 读取配置文件
        self.config.read('config.ini')
        baseCore=BaseCore()
        self.logger=baseCore.getLogger()
        self.r = redis.Redis(host=self.config.get('redis', 'host'),
                             port=self.config.get('redis', 'port'),
                             password=self.config.get('redis', 'pass'), db=6)
        self.driver=self.get_webdriver()

    # 雅虎财经处理表格
    def deal_table(self,doc_resp):
        all_dict = {}
        resp1_table = doc_resp('#Col1-1-Financials-Proxy section div:nth-child(3)>div>div').children()
        catalogue_title = pq(resp1_table[0]).text().split('\n')
        doc_items = pq(resp1_table[1]).children()
        if len(doc_items)<1:
            resp1_table = doc_resp('#Col1-1-Financials-Proxy section div:nth-child(4)>div>div').children()
            catalogue_title = pq(resp1_table[0]).text().split('\n')
            doc_items = pq(resp1_table[1]).children()
        catalogue_dict = {}
        content_dict = {}
        for doc_item in doc_items:
            if pq(doc_item).text() == '':
                continue
            a = pq(pq(doc_item).children()[0]).text().split('\n')[0]
            a_list = pq(pq(doc_item).children()[0]).text().split('\n')[1:]
            content_dict[a] = a_list
            b_dict = {}
            for doc_item1 in pq(doc_item).children()[1]:
                b = pq(pq(doc_item1).children()[0]).text().split('\n')[0]
                if not b:
                    continue
                b_list = pq(pq(doc_item1).children()[0]).text().split('\n')[1:]
                content_dict[b] = b_list
                c_dict = {}
                for doc_item2 in pq(doc_item1).children()[1]:
                    c = pq(pq(doc_item2).children()[0]).text().split('\n')[0]
                    if not c:
                        continue
                    c_list = pq(pq(doc_item2).children()[0]).text().split('\n')[1:]
                    content_dict[c] = c_list
                    d_dict = {}
                    for doc_item3 in pq(doc_item2).children()[1]:
                        d = pq(pq(doc_item3).children()[0]).text().split('\n')[0]
                        if not d:
                            continue
                        d_list = pq(pq(doc_item3).children()[0]).text().split('\n')[1:]
                        content_dict[d] = d_list
                        e_dict = {}
                        for doc_item4 in pq(doc_item3).children()[1]:
                            e = pq(pq(doc_item4).children()[0]).text().split('\n')[0]
                            if not e:
                                continue
                            e_list = pq(pq(doc_item4).children()[0]).text().split('\n')[1:]
                            content_dict[e] = e_list
                            f_dict = {}
                            for doc_item5 in pq(doc_item4).children()[1]:
                                f = pq(pq(doc_item5).children()[0]).text().split('\n')[0]
                                if not f:
                                    continue
                                f_list = pq(pq(doc_item5).children()[0]).text().split('\n')[1:]
                                content_dict[f] = f_list
                                g_dict = {}
                                for doc_item6 in pq(doc_item5).children()[1]:
                                    g = pq(pq(doc_item6).children()[0]).text().split('\n')[0]
                                    if not g:
                                        continue
                                    g_list = pq(pq(doc_item6).children()[0]).text().split('\n')[1:]
                                    content_dict[g] = g_list
                                    g_dict[g] = {}
                                f_dict[f] = g_dict
                            e_dict[e] = f_dict
                        d_dict[d] = e_dict
                    c_dict[c] = d_dict
                b_dict[b] = c_dict
            catalogue_dict[a] = b_dict
        all_dict['表头'] = catalogue_title
        all_dict['目录'] = catalogue_dict
        all_dict['内容'] = content_dict
        return all_dict


    def get_webdriver(self):
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--ignore-certificate-errors')
        chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
        chrome_options.add_argument("--disable-blink-features=AutomationControlled")
        chrome_options.add_argument("--start-maximized")
        # chrome_options.add_argument('--headless')
        chrome_options.binary_location = self.config.get('selenium', 'binary_location')
        executable_path =self.config.get('selenium', 'chrome_driver')
        driver = webdriver.Chrome(options=chrome_options, executable_path=executable_path)
        return driver

    def conn11(self):
        conn = pymysql.Connect(host='114.116.44.11', port=3306, user='caiji', passwd='f7s0&7qqtK', db='clb_project',
                               charset='utf8')
        cursor = conn.cursor()
        return conn,cursor

    def getCodeFromRedis(self):
        securitiescode=self.r.lpop('NoticeEnterprise:securities_code')
        securitiescode = securitiescode.decode('utf-8')
        return securitiescode
    # 雅虎财经
    def get_content2(self,securitiescode):
        self.logger.info(f"需要采集的股票代码{securitiescode}")
        conn,cursor=self.conn11()
        try:
            # sql1 = """select social_credit_code,securities_code,securities_short_name from sys_base_enterprise_ipo where category in ('4')  """  # and stock_code = "SYNH"
            sql1 = f"select social_credit_code,securities_code,securities_short_name from sys_base_enterprise_ipo where securities_code='{securitiescode}'  "  # and stock_code = "SYNH"
            cursor.execute(sql1)
            result_data = cursor.fetchall()
        except Exception as e:
            self.logger.info("数据查询异常！")

        for data in result_data:
            try:
                data_list = list(data)
                print(data_list)
                social_credit_code = data_list[0]
                stock = data_list[1]
                securities_short_name = data_list[2] if data_list[2] is not None else ""
                # content_sql = ''
                stock2=str(stock)
                if stock2.upper().endswith("HK") and stock2.upper().startswith("0") :
                    stock2=stock2[1:]
                url = f'https://finance.yahoo.com/quote/{stock2}/financials?p={stock2}'
                try:
                    print(f'正在采集：{url}')
                    self.driver.get(url)
                    # 等待页面加载完成
                    wait = WebDriverWait(self.driver, 300)
                    wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
                    time.sleep(2)
                    try:
                        # driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
                        self.driver.find_element(By.XPATH,'//div[@id="Col1-1-Financials-Proxy"]/section/div[2]/button/div/span[text()="Expand All"]').click()
                        wait = WebDriverWait(self.driver, 60)
                        wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
                        time.sleep(2)
                    except Exception as e:
                        print(e)
                        pass
                    doc_resp1 = pq(self.driver.page_source)
                    unit=self.get_unit(doc_resp1)
                    financials1 = self.deal_table(doc_resp1)
                    self.driver.find_element(By.XPATH,'//div/span[text()="Quarterly"]').click()
                    time.sleep(5)
                    try:
                        self.driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
                        time.sleep(5)
                    except Exception as e:
                        print(e)
                        pass
                    doc_resp2 = pq(self.driver.page_source)
                    financials2 = self.deal_table(doc_resp2)
                    self.driver.find_element(By.XPATH,'//div/span[text()="Balance Sheet"]').click()
                    time.sleep(5)
                    try: #//*[@id="Col1-1-Financials-Proxy"]/section/div[2]/button/div/span
                        self.driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
                        time.sleep(5)
                    except Exception as e:
                        print(e)
                        pass
                    doc_resp3 = pq(self.driver.page_source)
                    financials3 = self.deal_table(doc_resp3)
                    self.driver.find_element(By.XPATH,'//div/span[text()="Quarterly"]').click()
                    time.sleep(5)
                    try:
                        self.driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
                        time.sleep(5)
                    except Exception as e:
                        print(e)
                        pass
                    doc_resp4 = pq(self.driver.page_source)
                    financials4 = self.deal_table(doc_resp4)
                    self.driver.find_element(By.XPATH,'//div/span[text()="Cash Flow"]').click()
                    time.sleep(5)
                    try:
                        self.driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
                        time.sleep(5)
                    except Exception as e:
                        print(e)
                        pass
                    doc_resp5 = pq(self.driver.page_source)
                    financials5 = self.deal_table(doc_resp5)
                    self.driver.find_element(By.XPATH,'//div/span[text()="Quarterly"]').click()
                    time.sleep(5)
                    try:
                        self.driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
                        time.sleep(5)
                    except Exception as e:
                        print(e)
                        pass
                    doc_resp6 = pq(self.driver.page_source)
                    financials6 = self.deal_table(doc_resp6)
                    financials_dict = {
                        '表1': financials1,
                        '表2': financials2,
                        '表3': financials3,
                        '表4': financials4,
                        '表5': financials5,
                        '表6': financials6,
                    }
                    mu_lus = ''
                    for i in range(1, 7):
                        mu_lu = financials_dict[f'表{i}']['目录']
                        mu_lu = json.dumps(mu_lu, ensure_ascii=False, indent=4)
                        mu_lus += mu_lu + '&&&&'
                    level_relation = mu_lus[:-4]
                    financials = ''
                    for i in range(1, 7):
                        a_list = financials_dict[f'表{i}']['表头']
                        for a in a_list:
                            financials += a + '\n'
                        b_dict = financials_dict[f'表{i}']['内容']
                        for key, values in b_dict.items():
                            financials += key + '\n'
                            for b in values:
                                financials += b + '\n'
                        financials += '&&&&' + '\n'
                    financials = financials.strip()
                    content = financials[:-4].strip().replace('\n&&&&\n', '&&&&')

                    # if content[:100] in str(content_sql).replace("\\n","\n"):
                    #     print(f"{orc_id}:无最新数据")
                    #     continue

                    # sql = "UPDATE config_finance_data_sync SET level_relation=%s, content=%s  WHERE ID = %s"
                    # val = (level_relation, content, orc_id)
                    # cursor.execute(sql, val)
                    # conn.commit()
                    ipo_data={
                        'social_credit_code':social_credit_code,
                        'stock':stock,
                        'securities_short_name':securities_short_name,
                        'content':content,
                        'level_relation':level_relation,
                        'unit':unit,
                        'origin_type':1
                    }
                    flag=self.dataToSql(conn,cursor,ipo_data)
                    if flag:
                        # get_url = f'http://192.168.1.49:8088/sync/finance/yh?securitiesCode={stock}'
                        get_url = f'http://114.115.236.206:8088/sync/finance/yh?securitiesCode={stock}'
                        try:
                            resp = requests.get(get_url)
                            print(resp.text)
                            print('调用接口成功！！')
                        except:
                            with open('雅虎财经-财务数据_发送错误ID.txt', 'a', encoding='utf8')as f:
                                f.write(stock + '\n')
                except Exception as e:
                    print(e)
                    print(f'采集：{url}失败')
                    self.driver.quit()
                    time.sleep(10)
                    self.driver=self.get_webdriver()
            except:
                # time.sleep(60 * 10)
                self.driver.quit()
                time.sleep(10)
                self.driver=self.get_webdriver()
                print('出错，重试中！')
                continue
        # self.driver.close()


    def dataToSql(self,conn,cursor,ipo_data):
        try:
            social_credit_code=ipo_data['social_credit_code']
            stock=ipo_data['stock']
            securities_short_name=ipo_data['securities_short_name']
            content=ipo_data['content']
            level_relation=ipo_data['level_relation']
            unit=ipo_data['unit']
            origin_type=ipo_data['origin_type']
            if len(unit) == 0:
                return False
            if len(content) == 0:
                return False
            if len(level_relation) == 0:
                return False
            # 检查记录是否存在
            select_sql=f"SELECT * FROM config_finance_data_sync WHERE stock_code='{stock}'"
            cursor.execute(select_sql)
            existing_record = cursor.fetchone()
            # 获取当前时间
            current_time = datetime.datetime.now()
            # 将时间转换为字符串
            currentdate = current_time.strftime("%Y-%m-%d %H:%M:%S")
            if existing_record:
                # 记录已存在，执行更新操作
                update_param=(social_credit_code,content,level_relation,origin_type,currentdate,stock)
                update_sql=f"UPDATE config_finance_data_sync SET social_credit_code=%s , content=%s , level_relation=%s,origin_type=%s ,create_time=%s WHERE stock_code=%s "
                cursor.execute(update_sql,update_param)
                print('更新成功')
            else:
                insert_param=(social_credit_code,content,level_relation,unit,stock,origin_type,currentdate)
                insert_sql=f"INSERT INTO config_finance_data_sync (social_credit_code, content,level_relation,unit,stock_code,origin_type,create_time) VALUES ( %s,%s, %s, %s, %s, %s, %s)"
                # 记录不存在，执行插入操作
                cursor.execute(insert_sql,insert_param)
                print('插入成功')

            # 提交事务
            conn.commit()
        except Exception as e:
            return False
        return True

    def get_unit(self,doc_resp):
        try:
            resp1_table = doc_resp('#quote-header-info >div:nth-child(2)>div:nth-child(1)>div:nth-child(2)>span')
            currency = pq(resp1_table[0]).text()
            if 'Currency in' in currency:
                result = re.findall(r'(?<=Currency in\s).*', currency)
                currency=result[0]
                if '(' in currency:
                    currency=currency.split('(')[0]
                currency=str(currency).upper()+'(千)'
        except Exception as e:
            currency=''
        return currency

    #对比指标计算
    def calculateIndexReq(self):
        get_url = 'http://114.115.236.206:8088/sync/calculateIndex'
        try:
            params={
                'type':2
            }
            resp = requests.get(get_url,params=params)
            print(resp.text)
            text=json.loads(resp.text)
            codee=text['code']
            while codee==-200:
                time.sleep(600)
                resp = requests.get(get_url)
                print(resp.text)
                text=json.loads(resp.text)
                codee=text['code']
                if  codee==-200:
                    break
            print('调用接口成功！！')
        except:
            print('调用失败！')
if __name__ == '__main__':
    # parse_excel()
    #get_content1()
    yahoo=YahooCaiwu()
    while True:
        securitiescode=''
        try:
            securitiescode=yahoo.getCodeFromRedis()
            yahoo.get_content2(securitiescode)
        except Exception as e:
            print('没有数据暂停5分钟')
            yahoo.calculateIndexReq()
            if securitiescode:
                yahoo.r.rpush('NoticeEnterprise:securities_code',securitiescode)
            else:
                time.sleep(300)
                print('没有数据暂停5分钟')

