import requests, pymysql, re, time, json, sys
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from concurrent.futures.thread import ThreadPoolExecutor

from base.BaseCore import BaseCore
baseCore = BaseCore()
log = baseCore.getLogger()

cnx = baseCore.cnx
cursor = baseCore.cursor


def InsterInto(short_name, social_code, pdf_url):
    inster = False

    sel_sql = '''select social_credit_code,source_address from brpa_source_article where social_credit_code = %s and source_address = %s'''
    cursor.execute(sel_sql, (social_code, pdf_url))
    selects = cursor.fetchone()
    if selects:
        print(f'com_name:{short_name}、{pdf_url}已存在')
        return inster

    # 信息插入数据库
    try:
        insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,type,create_time) values(%s,%s,%s,%s,now())'''

        list_info = [
            social_code,
            pdf_url,
            '东方财富网',
            '1',
        ]
        #144数据库
        cursor.execute(insert_sql, tuple(list_info))
        cnx.commit()
        insert = True
        return insert
    except:
        state = 0
        takeTime = baseCore.getTimeCost(start_time, time.time())
        baseCore.recordLog(social_code, taskType, state, takeTime, pdf_url, '数据库传输失败')
        return insert

def gonggao_info(dic_info):
    list_all_info = []
    code = dic_info[3]
    com_name = dic_info[4]
    social__code = dic_info[2]
    if 'HK' in code:
        # browser.quit()
        return
    code1 = str(code)

    while True:
        if len(code1) < 6:
            code1 = '0' + code1
        else:
            break

    if code1[0] == '0' or code1[0] == '3' or code[0] == '2':
        com_code = 'SZ' + code1
    elif code1[0] == '6' or code1[0] == '9':
        com_code = 'SH' + code1
    elif code1[0] == '8' or code1[0] == '4':
        com_code = 'BJ' + code1

    break_id = 0
    for page1 in range(1, 2):
        if break_id == 1:
            break
        url = f'https://np-anotice-stock.eastmoney.com/api/security/ann?sr=-1&page_size=50&page_index={page1}&ann_type=A&client_source=web&stock_list={code1}&f_node=0&s_node=0'

        for n1 in range(0, 3):
            try:
                res = requests.get(url, verify=False)
                break
            except:
                if n1 == 2:
                    sys.exit(0)
                time.sleep(5)
                continue

        res_json = res.json()

        list_all = res_json['data']['list']
        if list_all:
            for one_info in list_all:
                title = one_info['title']
                info_date = one_info['notice_date']
                if page1 > 1 and '2022' in info_date:
                    break_id = 1
                    break
                if '2021' in info_date:  # 只采集22年以后的数据
                    break_id = 1
                    break

                try:
                    info_type = one_info['columns'][0]['column_name']
                except:
                    info_type = ''
                art_code = one_info['art_code']
                info_url = 'https://data.eastmoney.com/notices/detail/' + com_code + '/' + art_code + '.html'
                t = int(time.time() * 1000)
                json_url = f'https://np-cnotice-stock.eastmoney.com/api/content/ann?art_code={art_code}&client_source=web&page_index=1&_={t}'

                for n1 in range(0, 3):
                    try:
                        json_2 = requests.get(json_url, verify=False).json()
                        break
                    except:
                        if n1 == 2:
                            sys.exit(0)
                        time.sleep(5)
                        continue

                try:
                    pdf_url = json_2['data']['attach_url']
                except:
                    pdf_url = ''

                #拿到pdfurl去数据库中查找，如果有该条信息 则跳过，否则继续采集
                sel_sql = '''select social_credit_code from brpa_source_article where source_address = %s and type='1' '''

                cursor.execute(sel_sql, info_url)
                selects = cursor.fetchall()
                if selects:
                    return
                else:
                    pass

                try:
                    info_content = json_2['data']['notice_content']
                except:
                    info_content = ''

                list_info = [
                    social_code,
                    title,
                    info_content[:2000],
                    info_date,
                    info_url,
                    pdf_url,
                    '东方财富网',
                    '1',
                    'zh'
                ]
                # list_all_info.append(tuple(list_info))
                with cnx.cursor() as cursor:
                    sel_sql = '''select social_credit_code from brpa_source_article where source_address = %s '''

                    cursor.execute(sel_sql, info_url)
                    selects = cursor.fetchall()
                    if selects:
                        break
                    else:
                        #todo:取消入库操作
                        insert_sql = '''insert into brpa_source_article(social_credit_code,title,summary,publish_date,source_address,pdf_address,origin,type,lang) values(%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
                        cursor.execute(insert_sql, tuple(list_info))
                        cnx.commit()
        else:
            break


    print(f'{code}:传输完成')
    # list_all_info_1.append(list_all_info)
    list_c.append(code)


if __name__ =='__main__':
    #从redis中读取social_code'

    list_c = []
    list_all_info_1 = []
    num = 0
    taskType = '企业公告/东方财富网'
    while True:
        start_time = time.time()
        # 获取企业信息
        social_code = baseCore.redicPullData('NoticeEnterpriseEasteFinance:gnshqy_socialCode')
        # social_code = '911100007109288314'
        if not social_code:
            time.sleep(20)
            continue
        if social_code == 'None':
            time.sleep(20)
            continue
        if social_code == '':
            time.sleep(20)
            continue
        dic_info = baseCore.getInfomation(social_code)
        count = dic_info[15]
        code = dic_info[3]
        com_name = dic_info[4]
        gonggao_info(dic_info)



