import json
import re
import time
import calendar

import pymongo
import requests
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from apscheduler.schedulers.blocking import BlockingScheduler
from retry import retry

import BaseCore

db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017', username='admin', password='ZZsn@9988').RESCenter[
    'REITsTxnStat']
baseCore = BaseCore.BaseCore()
log = baseCore.getLogger()
headers = {
    'Accept': '*/*',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Host': 'query.sse.com.cn',
    'Pragma': 'no-cache',
    'Referer': 'http://www.sse.com.cn/',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0',
}


@retry(tries=5, delay=20)
def getJson(url):
    # ip = baseCore.get_proxy()
    req = requests.get(url, headers=headers)
    req.encoding = req.apparent_encoding
    data_json = re.findall('\((.*)\)', req.text)[0]
    data_json = json.loads(data_json)
    req.close()
    return data_json


# 2021-06-26
# 每日概况
def getDayData():
    # start_date = datetime(2021, 6, 21)
    start_date = datetime.today() - timedelta(days=5)
    end_date = datetime.today() - timedelta(days=1)
    date_range = [start_date + timedelta(days=x) for x in range((end_date - start_date).days + 1)]
    for date in date_range:
        date_ = date.strftime('%Y-%m-%d')
        url = f'http://query.sse.com.cn/commonQuery.do?jsonCallBack=jsonpCallback89728&sqlId=COMMON_SSE_REITS_HQXX_CJTJ_DAY_L&TRADE_DATE={date_}&FUND_TYPE=01&_={int(time.time())}'
        try:
            data_json = getJson(url)['result']
            if len(data_json) == 0:
                continue
            data_json = data_json[0]
        except Exception as e:
            log.error(f'{date}===连接失败==={e}')
            time.sleep(3)
            continue
        is_insert = db_storage.find_one({'strDate': str(date)[:10], 'exchange': '上海证券交易所'})
        if is_insert:
            log.info(f'{date}===已采集')
            time.sleep(3)
            continue
        dic_info = {
            'number': int(data_json['LIST_NUM']),  # 挂牌数
            'volume': float(data_json['TRADE_VOL']) * 10000,  # 成交量
            'amount': float(data_json['TRADE_AMT']) * 10000,  # 成交金额
            'totalValue': float(data_json['TOTAL_VALUE']) * 10000,  # 市价总额
            'negoValue': float(data_json['NEGO_VALUE']) * 10000,  # 流通市值
            'toRate': float(data_json['TO_RATE']),  # 换手率
            'date': date,
            'strDate':str(date)[:10],
            'country': '中国',
            'exchange': '上海证券交易所',
            'currency': 'CNY',  # 币种
        }
        try:
            db_storage.insert_one(dic_info)
            log.info(f'{date}===采集成功')
        except Exception as e:
            log.error(f'{date}===数据存储失败==={e}')
        time.sleep(3)


# 每周概况
def getWeekData(writer):
    data_list = []
    start_date = datetime(2021, 6, 21)
    end_date = datetime.today()
    date_range = [start_date + timedelta(days=x) for x in range(0, (end_date - start_date).days + 1, 7)]
    for date_1 in date_range:
        date_2 = (date_1 + timedelta(days=6)).strftime('%Y-%m-%d')
        date_1 = date_1.strftime('%Y-%m-%d')
        url = f'http://query.sse.com.cn/commonQuery.do?jsonCallBack=jsonpCallback65413&sqlId=COMMON_SSE_REITS_HQXX_CJTJ_WEEK_L&START_DATE={date_1}&END_DATE={date_2}&FUND_TYPE=01&_={int(time.time())}'
        data_json = getJson(url)['result']
        for data_ in data_json:
            data = [data_['LIST_NUM'], data_['TRADE_VOL'], data_['TRADE_AMT'], data_['TOTAL_VALUE'],
                    data_['NEGO_VALUE'], data_['TO_RATE'], f'{date_1}至{date_2}']
            dic_info = {
                '挂牌数': data_['LIST_NUM'],
                '成交量(亿份)': data_['TRADE_VOL'],
                '成交金额(亿元)': data_['TRADE_AMT'],
                '市价总额(亿元)': data_['TOTAL_VALUE'],
                '流通市值(亿元)': data_['NEGO_VALUE'],
                '换手率(%)': data_['TO_RATE'],
                '日期': f'{date_1}至{date_2}',
                '类别': '每周概况'
            }
            db_storage.insert_one(dic_info)
            log.info(f'{date_1}至{date_2}===采集完成')
            data_list.append(data)
        time.sleep(1)
    df = pd.DataFrame(np.array(data_list))
    df.columns = ['挂牌数', '成交量(亿份)', '成交金额(亿元)', '市价总额(亿元)', '流通市值(亿元)', '换手率(%)', '日期']
    df.to_excel(writer, sheet_name='每周概况', index=False)


# 月度概况
def getMonthData(writer):
    data_list = []
    start_date = datetime.strptime('2021-06-01', '%Y-%m-%d')
    current_date = datetime.now()
    while start_date <= current_date:
        year = start_date.year
        month = start_date.month
        date = start_date.strftime('%Y-%m')
        url = f'http://query.sse.com.cn/commonQuery.do?jsonCallBack=jsonpCallback76435&sqlId=COMMON_SSE_REITS_HQXX_CJTJ_MONTH_L&TRADE_DATE={date}&FUND_TYPE=01&_={int(time.time())}'
        data_json = getJson(url)['result']
        for data_ in data_json:
            data = [data_['LIST_NUM'], data_['TRADE_VOL'], data_['TRADE_AMT'], data_['TOTAL_VALUE'],
                    data_['NEGO_VALUE'], data_['TO_RATE'], date]
            dic_info = {
                '挂牌数': data_['LIST_NUM'],
                '成交量(亿份)': data_['TRADE_VOL'],
                '成交金额(亿元)': data_['TRADE_AMT'],
                '市价总额(亿元)': data_['TOTAL_VALUE'],
                '流通市值(亿元)': data_['NEGO_VALUE'],
                '换手率(%)': data_['TO_RATE'],
                '日期': date,
                '类别': '月度概况'
            }
            db_storage.insert_one(dic_info)
            log.info(f'{date}===采集完成')
            data_list.append(data)
        if month == 12:
            start_date = start_date.replace(year=year + 1, month=1)
        else:
            start_date = start_date.replace(month=month + 1)
        time.sleep(1)
    df = pd.DataFrame(np.array(data_list))
    df.columns = ['挂牌数', '成交量(亿份)', '成交金额(亿元)', '市价总额(亿元)', '流通市值(亿元)', '换手率(%)', '日期']
    df.to_excel(writer, sheet_name='每月概况', index=False)


def task():
    # 实例化一个调度器
    scheduler = BlockingScheduler()
    # 每天执行一次
    scheduler.add_job(getDayData, 'cron', hour='8', minute=0, max_instances=2 )
    try:
        scheduler.start()
    except Exception as e:
        log.error('定时采集异常', e)
        pass


if __name__ == '__main__':
    task()
    baseCore.close()