import configparser
import re
import time
from urllib.parse import quote, unquote

import pymysql
import redis
import requests
import json
from pyquery import PyQuery as pq
from bs4 import BeautifulSoup
import difflib
import urllib3
from seleniumwire import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from base.BaseCore import BaseCore
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from operator import itemgetter
from itertools import groupby
import datetime
from decimal import Decimal

class SinaUsstock(object):

    def __init__(self):
        baseCore=BaseCore()
        self.logger=baseCore.getLogger()
        self.config = configparser.ConfigParser()
        # 读取配置文件
        self.config.read('config.ini')
        self.r = redis.Redis(host=self.config.get('redis', 'host'),
                        port=self.config.get('redis', 'port'),
                        password=self.config.get('redis', 'pass'), db=6)
        self.driver=self.get_webdriver()

    def conn11(self):
        conn = pymysql.Connect(host='114.116.44.11', port=3306, user='caiji', passwd='f7s0&7qqtK', db='clb_project',
                               charset='utf8')
        cursor = conn.cursor()
        return conn,cursor

    def deal_table(self,doc_resp):
        soup=BeautifulSoup(doc_resp,'html.parser')
        tdoc=soup.select('div[class="tbl_wrap"]>table[class="data_tbl os_tbl"]')[1]
        tbody=pq(str(tdoc))
        uint=tbody.find('tbody>tr:nth-child(1)>th').text().split(':')[1]
        pdate=tbody.find('tbody>tr:nth-child(1)>td').text().replace('至','').split(' ')
        btds=tbody.find('tbody>tr:gt(0)')
        seriesList=[]
        for btd in btds:
            tddoc=pq(btd)
            seriesName=tddoc.find('th').text().replace('+','').replace('-','')
            seriesValue=tddoc.find('td').text().split(' ')
            for i in range(0,len(pdate)):
                value=seriesValue[i]
                try:
                    if '亿' in value:
                        value = value.replace("亿", "").replace(",", "")
                        value = Decimal(value) * Decimal('100000000')
                        # value = eval(value)
                    elif '万' in value:
                        value = value.replace("万", "").replace(",", "")
                        value = Decimal(value) * Decimal('10000')
                        # value = eval(value)
                except Exception as e:
                    print(e)
                    print(value)
                vvla=str(value).replace(",", "")
                serisemsg={
                    'name':seriesName,
                    'value':vvla,
                    'ddte':pdate[i],
                    'uint':uint,
                }
                seriesList.append(serisemsg)

        return seriesList

    # 判断股票代码是否存在
    def check_code(self,com_code):
        r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3)
        res = r.exists('com_sinacaiwushuju_code::'+com_code)
        #如果key存在 则不是第一次采集该企业， res = 1
        if res:
            return False  #表示不是第一次采集
        else:
            return True #表示是第一次采集

    def check_date(self,com_code,info_date):
        r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=3)
        res = r.sismember('com_sinacaiwushuju_code::'+com_code, info_date)  # 注意是 保存set的方式
        if res:
            return True
        else:
            return False

    # 将采集后的股票代码对应的报告期保存进redis
    def add_date(self,com_code,date_list):
        r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3)
        #遍历date_list 放入redis
        for date in date_list:
            res = r.sadd('com_sinacaiwushuju_code::'+com_code,date)

    def getCodeFromRedis(self):
        securitiescode=self.r.lpop('sina_usstock:securities_code')
        securitiescode = securitiescode.decode('utf-8')
        return securitiescode

    def get_webdriver(self):
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--ignore-certificate-errors')
        chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
        chrome_options.add_argument("--disable-blink-features=AutomationControlled")
        chrome_options.add_argument("--start-maximized")
        # chrome_options.add_argument('--headless')
        chrome_options.binary_location = self.config.get('selenium', 'binary_location')
        executable_path =self.config.get('selenium', 'chrome_driver')
        driver = webdriver.Chrome(options=chrome_options, executable_path=executable_path)
        return driver

    # 使用股票代码拼接地址访问新浪财经地址，通过定位现金浏览，资产负债，利润信息
    #若执行中出现异常则将股票代码放回redis中，
    def get_content2(self,securitiescode):
        self.logger.info(f"需要采集的股票代码{securitiescode}")
        conn,cursor=self.conn11()
        try:
            sql1 = f"select social_credit_code,securities_code,securities_short_name from sys_base_enterprise_ipo where securities_code='{securitiescode}'  "  # and stock_code = "SYNH"
            cursor.execute(sql1)
            result_data = cursor.fetchall()
        except Exception as e:
            self.logger.info("数据查询异常！")
            return
        for data in result_data:
            try:
                data_list = list(data)
                social_credit_code = data_list[0]
                stock = data_list[1]
                stock2=str(stock)
                url=f'http://quotes.sina.com.cn/usstock/hq/balance.php?s={stock2}'
                self.driver.get(url)
                wait = WebDriverWait(self.driver, 10)
                wait.until(EC.presence_of_element_located((By.CLASS_NAME, "grey")))
                #1.现金 cash 年度 季度
                #点击现金标签加载页面 点击年度 解析数据 现金流量表
                try:
                    self.driver.find_element(By.XPATH,'//div[@class="title"]//a[text()="现金流量表"]').click()
                    # 等待页面加载完成
                    wait = WebDriverWait(self.driver, 10)
                    wait.until(EC.presence_of_element_located((By.CLASS_NAME, "grey")))
                    self.driver.find_element(By.XPATH,'//div[@class="tbl_wrap"]/div[@align="right"]/a[text()="年度数据"]').click()
                    # 等待页面加载完成
                    wait = WebDriverWait(self.driver, 10)
                    wait.until(EC.presence_of_element_located((By.CLASS_NAME, "grey")))
                    doc_resp1=self.driver.page_source
                    seriesList1=self.deal_table(doc_resp1)
                    zbl1=self.groupdata(seriesList1,'cash')
                except Exception as e:
                    self.logger.info(e)
                    self.r.rpush('sina_usstock:securities_code',securitiescode)
                    return
               #点击现金标签加载页面 点击季度 解析数据
                try:
                    self.driver.find_element(By.XPATH,'//div[@class="tbl_wrap"]/div[@align="right"]/a[text()="季度数据"]').click()
                    # 等待页面加载完成
                    wait = WebDriverWait(self.driver, 10)
                    wait.until(EC.presence_of_element_located((By.CLASS_NAME, "grey")))
                    doc_resp1=self.driver.page_source
                    seriesList2=self.deal_table(doc_resp1)
                    zbl2=self.groupdata(seriesList2,'cash')
                except Exception as e:
                    self.logger.info(e)
                    self.r.rpush('sina_usstock:securities_code',securitiescode)
                    return
                    #2.资产 debt 年度 季度
                #点击资产标签加载页面 点击年度 解析数据
                try:
                    self.driver.find_element(By.XPATH,'//div[@class="title"]//a[text()="资产负债表"]').click()
                    # 等待页面加载完成
                    wait = WebDriverWait(self.driver, 10)
                    wait.until(EC.presence_of_element_located((By.CLASS_NAME, "grey")))
                    self.driver.find_element(By.XPATH,'//div[@class="tbl_wrap"]/div[@align="right"]/a[text()="年度数据"]').click()
                    # 等待页面加载完成
                    wait = WebDriverWait(self.driver, 10)
                    wait.until(EC.presence_of_element_located((By.CLASS_NAME, "grey")))
                    doc_resp1=self.driver.page_source
                    seriesList3=self.deal_table(doc_resp1)
                    zbl3=self.groupdata(seriesList3,'debt')
                except Exception as e:
                    self.logger.info(e)
                    self.r.rpush('sina_usstock:securities_code',securitiescode)
                    return
                    #点击现金标签加载页面 点击季度 解析数据
                try:
                    self.driver.find_element(By.XPATH,'//div[@class="tbl_wrap"]/div[@align="right"]/a[text()="季度数据"]').click()
                    # 等待页面加载完成
                    wait = WebDriverWait(self.driver, 10)
                    wait.until(EC.presence_of_element_located((By.CLASS_NAME, "grey")))
                    doc_resp1=self.driver.page_source
                    seriesList4=self.deal_table(doc_resp1)
                    zbl4=self.groupdata(seriesList4,'debt')
                except Exception as e:
                    self.logger.info(e)
                    self.r.rpush('sina_usstock:securities_code',securitiescode)
                    return
                    #点击利资产标签加载页面 点击季度 解析数据
                #3.利润 profit 年度 季度
                #点击利润标签加载页面 点击年度 解析数据
                #点击利润标签加载页面 点击季度 解析数据
                try:
                    self.driver.find_element(By.XPATH,'//div[@class="title"]//a[text()="利润表"]').click()
                    # 等待页面加载完成
                    wait = WebDriverWait(self.driver, 10)
                    wait.until(EC.presence_of_element_located((By.CLASS_NAME, "grey")))
                    self.driver.find_element(By.XPATH,'//div[@class="tbl_wrap"]/div[@align="right"]/a[text()="年度数据"]').click()
                    # 等待页面加载完成
                    wait = WebDriverWait(self.driver, 10)
                    wait.until(EC.presence_of_element_located((By.CLASS_NAME, "grey")))
                    doc_resp1=self.driver.page_source
                    seriesList5=self.deal_table(doc_resp1)
                    zbl5=self.groupdata(seriesList5,'profit')
                except Exception as e:
                    self.logger.info(e)
                    self.r.rpush('sina_usstock:securities_code',securitiescode)
                    return
                    #点击现金标签加载页面 点击季度 解析数据
                try:
                    self.driver.find_element(By.XPATH,'//div[@class="tbl_wrap"]/div[@align="right"]/a[text()="季度数据"]').click()
                    # 等待页面加载完成
                    wait = WebDriverWait(self.driver, 10)
                    wait.until(EC.presence_of_element_located((By.CLASS_NAME, "grey")))
                    doc_resp1=self.driver.page_source
                    seriesList6=self.deal_table(doc_resp1)
                    zbl6=self.groupdata(seriesList6,'profit')
                except Exception as e:
                    self.logger.info(e)
                    self.r.rpush('sina_usstock:securities_code',securitiescode)
                    return

                    #转换数据格式发送接口
                annualzb=zbl1+zbl3+zbl5
                annualzb=self.groupZbData(annualzb,stock,social_credit_code,'year')
                self.sendToFinance(annualzb)
                quarterzb=zbl2+zbl4+zbl6
                quarterzb=self.groupZbData(quarterzb,stock,social_credit_code,'quarter')
                self.sendToFinance(quarterzb)
                self.logger.info(f'++++++++++股票：{stock}采集结束')

            except Exception as e:
                self.driver.quit()
                time.sleep(3)
                self.driver=self.get_webdriver()
                self.logger.info(e)
                self.r.rpush('sina_usstock:securities_code',securitiescode)
                return



    def sendToFinance(self,zbmsg):
            for zbb in zbmsg:
                com_code=zbb['securitiesCode']
                com_date=zbb['date']
                #判断股票代码是否采集过
                if self.check_code(com_code):
                    zbb['ynFirst']=True

                if len(zbb) != 0:
                    # 调凯歌接口存储数据
                    data = json.dumps(zbb)
                    #暂无接口
                    url_baocun = 'http://114.115.236.206:8088/sync/finance/sina'
                    # url_baocun = 'http://114.115.236.206:8088/sync/finance/df'
                    for nnn in range(0, 3):
                        try:
                            res_baocun = requests.post(url_baocun, data=data)
                            #将采集到的股票代码和日期进行记录用来标记是否采集过
                            com_date_list=[]
                            com_date_list.append(com_date)
                            self.add_date(com_code,com_date)

                            self.logger.info(res_baocun.text)
                            break
                        except:
                            time.sleep(1)

    #zbList,stock,social_credit_code
    def groupZbData(self,zbList,stock,social_credit_code,dateFlag):
        self.logger.info('数据根据日期进行组合')
        # 根据 date对数据分组
        # 根据时间属性对列表内容进行分类
        zbList.sort(key=itemgetter('date'))  # 先按照age属性进行排序
        zbgroups = groupby(zbList, key=itemgetter('date'))  # 根据age属性进行分组
        # 遍历每个分组，并打印分类结果
        zbList=[]
        for date, group in zbgroups:
            result={}
            for item in group:
                for key, value in item.items():
                    if key == "date":
                        continue
                    if key not in result:
                        result[key] = []
                    result[key].extend(value)
            result["date"] = date
            result["securitiesCode"] = stock
            result["socialCreditCode"] = social_credit_code
            result["dateFlag"] = dateFlag
            result["ynFirst"] = False
            # "securitiesCode": "2342",
            # "socialCreditCode": "12314",
            # "date": "2023-06-31",
            # "dateFlag": "quarter",
            # "ynFirst": false
            zbList.append(result)
        return zbList

    #表数据和对应财务指标
    def groupdata(self,ssMsg,zbtype):
        self.logger.info('对数据进行指标数据进分组')
        # 根据时间属性对列表内容进行分类
        ssMsg.sort(key=itemgetter('ddte'))  # 先按照age属性进行排序
        groups = groupby(ssMsg, key=itemgetter('ddte'))  # 根据age属性进行分组
        # 遍历每个分组，并打印分类结果
        zbList=[]
        for ddte, group in groups:
            # print(f"ddte: {ddte}")
            ssList=[]
            for item in group:
                ii={
                    "enName": "",
                    "name": item['name'],
                    "value": item['value'],
                    "uint": item['uint']
                }
                ssList.append(ii)
            ddrit={
                "date":ddte,
                zbtype:ssList
            }
            zbList.append(ddrit)
        return zbList

    def getFormatedate(self,timestamp):
        date = datetime.datetime.fromtimestamp(timestamp)
        formatted_date = date.strftime('%Y-%m-%d')
        return formatted_date



if __name__ == '__main__':
    sinaUsstock=SinaUsstock()
    # securitiescode= sinaUsstock.r.lpop('sina_usstock:securities_code')
    # securitiescode= sinaUsstock.getCodeFromRedis()
    securitiescode='AAPL'
    try:
        sinaUsstock.get_content2(securitiescode)
    except Exception as e:
        sinaUsstock.r.rpush('sina_usstock:securities_code',securitiescode)













