import requests, re, time, pymysql
from bs4 import BeautifulSoup as bs
from fdfs_client.client import get_tracker_conf, Fdfs_client
from base import BaseCore

baseCore = BaseCore.BaseCore()
requests.adapters.DEFAULT_RETRIES = 3
log = baseCore.getLogger()

cnx = pymysql.connect(host='114.116.44.11', user='root', password='f7s0&7qqtK', db='clb_project', charset='utf8mb4')
cursor = cnx.cursor()
tracker_conf = get_tracker_conf('./client.conf')
client = Fdfs_client(tracker_conf)
taskType = '企业年报/雪球网'


def tableUpdate(year, com_name, type_id, item_id, group_name, path, full_path, category, file_size, order_by, status,
                create_by, create_time):
    sel_sql = '''select item_id from clb_sys_attachment where item_id = %s and year = %s'''
    cursor.execute(sel_sql, (item_id, year))
    selects = cursor.fetchone()
    if selects:
        print(f'{com_name},{year}已存在')
    else:
        Upsql = '''insert into clb_sys_attachment(year,name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''

        values = (
            year, com_name, type_id, item_id, group_name, path, full_path, category, file_size, order_by, status,
            create_by,
            create_time)

        cursor.execute(Upsql, values)  # 插入
        cnx.commit()  # 提交
        print("更新完成:{}".format(Upsql))


def getContent(social_code, com_name, code,start_time):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
    }
    comp = re.compile('-?[1-9]\d*')
    num = 1
    ip = baseCore.get_proxy()
    url_1 = f'https://vip.stock.finance.sina.com.cn/corp/go.php/vCB_Bulletin/stockid/{code}/page_type/ndbg.phtml'
    res_1 = requests.get(url_1, proxies=ip)
    soup = bs(res_1.content, 'html.parser',from_encoding='gb2312')

    # 获取年度报告列表
    try:
        list_all = soup.find('div', {'class': 'datelist'}).find_all('a')
    except:
        log.info(f'{social_code}.........年度报告列表为空')
        exception = '年度报告列表为空'
        state = 0
        takeTime = baseCore.getTimeCost(start_time, time.time())
        baseCore.recordLog(social_code, taskType, state, takeTime, '', exception)
        return

        # 获取年报详细信息
    for href in list_all:
        ip = baseCore.get_proxy()
        year_url = 'https://vip.stock.finance.sina.com.cn' + href.get('href')
        year_name = href.text
        res_2 = requests.get(year_url, proxies=ip)
        soup_2 = bs(res_2.content, 'html.parser',from_encoding='gb2312')
        try:
            pdf_url = soup_2.find('th', {'style': 'text-align:center'}).find('a').get('href')
        except:
            log.error(f'{social_code}....{year_url}....无下载链接')
            exception = '无下载链接'
            state = 0
            takeTime = baseCore.getTimeCost(start_time, time.time())
            baseCore.recordLog(social_code, taskType, state, takeTime, year_url, exception)
            continue
        for i in range(0, 3):
            try:
                resp_content = requests.get(pdf_url, headers=headers, verify=False, timeout=20).content
                break
            except:
                time.sleep(3)
                continue
        try:
            year = comp.findall(year_name)[0]
        except:
            continue
        name_pdf = f"{com_name}：{year}年年报.pdf".replace('*', '')

        result = ''
        for i in range(0, 3):
            try:
                result = client.upload_by_buffer(resp_content, file_ext_name='pdf')
                break
            except Exception as e:
                log.error(f'{social_code}...年报上传服务器出错:{e}')
                time.sleep(3)
                continue
        if result == '':
            exception = '上传服务器失败'
            state = 0
            takeTime = baseCore.getTimeCost(start_time, time.time())
            baseCore.recordLog(social_code, taskType, state, takeTime, year_url, exception)
            continue

        time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

        type_id = '1'
        item_id = social_code
        group_name = 'group1'
        path = bytes.decode(result['Remote file_id']).replace('group1', '')
        full_path = bytes.decode(result['Remote file_id'])
        category = 'pdf'
        file_size = result['Uploaded size']
        order_by = num
        status = 1
        create_by = 'XueLingKun'
        create_time = time_now
        try:
            tableUpdate(year, name_pdf, type_id, item_id, group_name, path, full_path, category, file_size,
                        order_by, status, create_by, create_time)
            state = 1
            takeTime = baseCore.getTimeCost(start_time, time.time())
            baseCore.recordLog(social_code, taskType, state, takeTime, year_url, '')
        except:
            exception = '数据库传输失败'
            state = 0
            takeTime = baseCore.getTimeCost(start_time, time.time())
            baseCore.recordLog(social_code, taskType, state, takeTime, year_url, exception)


def begin():
    while True:
        start_time = time.time()
        # 获取企业信息
        social_code = baseCore.redicPullData('AnnualEnterprise:gnshqy_socialCode')
        if not social_code:
            time.sleep(20)
            continue
        if social_code == 'None':
            time.sleep(20)
            continue
        if social_code == '':
            time.sleep(20)
            continue
        dic_info = baseCore.getInfomation(social_code)
        count = dic_info[15]
        code = dic_info[3]
        com_name = dic_info[4]
        if code is None:
            exeception = '股票代码为空'
            state = 0
            takeTime = baseCore.getTimeCost(start_time, time.time())
            baseCore.recordLog(social_code, taskType, state, takeTime, '', exeception)
            continue

        getContent(social_code, com_name, code,start_time)

        count += 1
        runType = 'AnnualReportCount'
        baseCore.updateRun(social_code, runType, count)


if __name__ == '__main__':
    begin()
    cursor.close()
    cnx.close()
    baseCore.close()
