import json

from kafka import KafkaProducer

import requests, time,  fitz
import urllib3
from base import BaseCore

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

baseCore = BaseCore.BaseCore()

cnx_ = baseCore.cnx_
cursor_ = cnx_.cursor()

cnx = baseCore.cnx
cursor = baseCore.cursor

def sendKafka(dic_news):
    try:  # 114.116.116.241
        producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'],max_request_size=1024*1024*20)
        kafka_result = producer.send("researchReportYearTopic",
                                     json.dumps(dic_news, ensure_ascii=False).encode('utf8'))

        print(kafka_result.get(timeout=10))

        dic_result = {
            'success': 'ture',
            'message': '操作成功',
            'code': '200',
        }
        log.info(dic_result)
        return True
    except Exception as e:
        dic_result = {
            'success': 'false',
            'message': '操作失败',
            'code': '204',
            'e': e
        }
        log.info(dic_result)
        return False
#state1

def secrchATT(type_id, xydm, att_id):

    sel_sql = '''select * from clb_sys_attachment where item_id=%s and type_id=%s and id=%s'''
    cursor_.execute(sel_sql, (xydm, type_id, att_id))
    selects = cursor_.fetchall()
    return selects


if __name__ == '__main__':
    header = {
        'Connection': 'keep-alive',
        'Cache-Control': 'max-age=0',
        'sec-ch-ua': '"Chromium";v="112", "Microsoft Edge";v="112", "Not:A-Brand";v="99"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.64',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-User': '?1',
        'Sec-Fetch-Dest': 'document',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    }
    info_list = [
        # 'ZZSN230824151202408|1724390910695550977|2017',
        'ZZSN230912210754179|18703841781|2022'
    ]
    for info in info_list:
        xydm = info.split('|')[0]
        att_id = info.split('|')[1]
        year = info.split('|')[2]
        selects = secrchATT('1', xydm, att_id)
        if len(selects) > 1:
            pass
        elif len(selects) == 1:
            select = selects[0]
            file_name = select[1]
            origin = select[18]
            create_time = select[13]
            publishDate = select[21]
            if year == '2023':
                publishDate = '2023-08-31'
            full_path = 'http://zzsn.luyuen.com/' + str(select[19])
            time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            create_time = time_now
            content = ''
            for i in range(0, 3):
                try:
                    response = requests.get(url=full_path, headers=header, timeout=30)
                    break
                except Exception as e:
                    time.sleep(3)
                    continue
            with fitz.open(stream=response.content, filetype='pdf') as doc:
                page_size = doc.page_count
                log = baseCore.getLogger()
                log.info(f'当前页码----{page_size}')
                for page in doc.pages():
                    content += page.get_text()
            detect_language = baseCore.detect_language(content)
            dic_news = {
                'attachmentIds': att_id,
                'author': '',
                'content': content,
                'contentWithTag': '',
                'createDate': str(create_time),
                'deleteFlag': '0',
                'id': '',
                'keyWords': '',
                'lang': detect_language,
                'origin': origin,
                # 'origin': '雪球网',
                'publishDate': publishDate,
                'sid': '1684032033495392257',
                'sourceAddress': '',  # 原文链接
                'summary': '',
                'title': file_name.replace('.pdf','').replace('.PDF',''),
                'type': 1,
                'socialCreditCode': xydm,
                'year': year
            }
            if sendKafka(dic_news):
                # 100表示成功
                log.info(f'==========={xydm}成功============')

    cnx.close()
    cursor_.close()
    baseCore.close()



