import re
import time
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
import pymongo
import requests

from retry import retry

from base import BaseCore

baseCore = BaseCore.BaseCore()
db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017', username='admin', password='zzsn@9988').研究中心[
    'REITs基金行情-深圳']
log = baseCore.getLogger()

headers = {
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Content-Type': 'application/json',
    'Host': 'www.szse.cn',
    'Pragma': 'no-cache',
    'Referer': 'http://www.szse.cn/market/product/list/all/index.html',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0',
    'X-Request-Type': 'ajax',
    'X-Requested-With': 'XMLHttpRequest',
}


# 获取基金代码与上市时间
@retry(tries=3, delay=3)
def getData():
    data_list = []
    ip = baseCore.get_proxy()
    url = 'https://reits.szse.cn/api/report/ShowReport/data?SHOWTYPE=JSON&CATALOGID=reits_fund_list&PAGENO=1&PAGESIZE=10'
    req = requests.get(url, headers=headers, proxies=ip)
    req.encoding = req.apparent_encoding
    data_json = req.json()[0]['data']
    for data_ in data_json:
        jjjcurl = re.findall('<u>(.*?)</u>', data_['jjjcurl'])[0].lstrip().strip()
        sys_key = data_['sys_key'].lstrip().strip()
        ssrq = data_['ssrq'].lstrip().strip()
        # 基金简称 基金代码 上市时间
        data = [jjjcurl, sys_key, ssrq]
        data_list.append(data)
    return data_list


# 获取基金基本信息
@retry(delay=5)
def getInfoList():
    code_list = []
    url = 'http://www.szse.cn/api/report/ShowReport/data?SHOWTYPE=JSON&CATALOGID=1105&TABKEY=tab1&selectJjlb=%E5%9F%BA%E7%A1%80%E8%AE%BE%E6%96%BD%E5%9F%BA%E9%87%91'
    ip = baseCore.get_proxy()
    req = requests.get(url, headers=headers, proxies=ip)
    req.encoding = req.apparent_encoding
    data_list = req.json()[0]['data']
    for data_ in data_list:
        # data = {
        #     '基金代码': re.findall('<u>(.*?)</u>', data_['sys_key'])[0],
        #     '基金简称': re.findall('<u>(.*?)</u>', data_['jjjcurl'])[0],
        #     '基金类别': data_['jjlb'],
        #     '投资类别': data_['tzlb'],
        #     '上市日期': data_['ssrq'],
        #     '当前规模(万份)': data_['dqgm'],
        #     '基金管理人': data_['glrmc'],
        #     '最新基金净值': data_['cxjqhq'],
        # }
        data = [re.findall('<u>(.*?)</u>', data_['sys_key'])[0], re.findall('<u>(.*?)</u>', data_['jjjcurl'])[0],
                data_['jjlb'], data_['tzlb'], data_['ssrq'], data_['dqgm'], data_['glrmc'], data_['cxjqhq'], ]
        name_list = ['基金代码', '基金简称', '基金类别', '投资类别', '上市日期', '当前规模(万份)', '基金管理人', '最新基金净值']
        code_list.append(data)
    return code_list


# 获取基金交易信息
@retry(tries=5, delay=20)
def getDataList(code, start_date, end_date):
    ip = baseCore.get_proxy()
    url = f'http://www.szse.cn/api/report/ShowReport/data?SHOWTYPE=JSON&CATALOGID=1815_stock_snapshot&TABKEY=tab2&txtDMorJC={code}&txtBeginDate={str(start_date)[:10]}&txtEndDate={str(end_date)[:10]}&archiveDate=2021-11-01'
    req = requests.get(url, headers=headers, proxies=ip)
    req.encoding = req.apparent_encoding
    data_json = req.json()[0]['data'][::-1]
    req.close()
    for data_ in data_json:
        jyrq = data_['jyrq']
        zqdm = data_['zqdm']
        zqjc = data_['zqjc']
        qss = data_['qss']
        ks = data_['ks']
        zg = data_['zg']
        zd = data_['zd']
        ss = data_['ss']
        sdf = data_['sdf']
        cjgs = data_['cjgs']
        cjje = data_['cjje']
        syl1 = data_['syl1']
        is_insert = db_storage.find_one({'code': code, 'date': jyrq, 'exchange': '深圳证券交易所'})
        if is_insert:
            log.info(f'{code}==={jyrq}===已采集')
            continue
        dic_info = {
            'code': float(zqdm),  # 代码
            'shortName': float(zqjc),  # 简称
            'opening': float(ks),  # 开盘价
            'max': float(zg),  # 最高价
            'min': float(zd),  # 最低价
            'closed': float(ss),  # 收盘价
            'beforeClosed': float(qss),  # 前收价
            'volume': cjgs,  # 交易量
            'amount': cjje,  # 交易金额
            'date': jyrq,  # 时间
            'country': '中国',  # 国家
            'exchange': '深圳证券交易所'  # 交易所
        }
        db_storage.insert_one(dic_info)
        log.info(f'{code}==={jyrq}===采集成功')
        time.sleep(1)


def doJob():
    data_list = getData()
    log.info('开始采集')
    for data in data_list:
        code = data[0]
        name = data[1]
        log.info(f'{code}==={name}===开始采集')
        start_date = data[2]
        start_date = datetime.strptime(start_date, "%Y-%m-%d")
        current_date = datetime.now()
        end_date = start_date + timedelta(days=5)
        while end_date != current_date:
            time.sleep(1)
            try:
                getDataList(code, start_date, end_date)
            except:
                log.error(f'{code}==={start_date}-{end_date}===采集失败')
            start_date = end_date + timedelta(days=1)
            end_date = start_date + timedelta(days=5)
            if end_date > current_date:
                end_date = current_date


if __name__ == '__main__':
    doJob()
    baseCore.close()
