import json
import os
import time

from kafka import KafkaProducer
from obs import ObsClient
import fitz
from urllib.parse import unquote

from retry import retry

import requests

import BaseCore
baseCore = BaseCore.BaseCore()
log = baseCore.getLogger()
cnx = baseCore.cnx
cursor = baseCore.cursor
cnx_ = baseCore.cnx_
cursor_ = baseCore.cursor_
pathType = 'QYYearReport/'
type_id = 1
create_by = 'XueLingKun'
taskType = '企业年报'
file_path = 'D:/kkwork/zzsn_spider/data/1_福布斯2000_PDF_28_白'
file_list = os.listdir(file_path)
    # print(pdf_list)
for file in file_list:
    num = 1
    start_time = time.time()
    pdf_path = file_path + '/'+file

    file_rank = int(file.split('-')[0])
    file_year = file.split('-')[1]
    file_name = file.split('-')[2].strip('.pdf') + file_year +'年年度报告' + '.pdf'
    #file_rank 对应上企业信用代码
    selectsql = f"select * from rankandcode where id = {file_rank}"
    cursor.execute(selectsql)
    data = cursor.fetchone()
    cnx.commit()
    social_code = data[1]
    ename = data[2]
    cname = data[3]
    content = ''
    #解析文件页数和内容
    with open(pdf_path, 'rb') as file:
        byte_stream = file.read()
        # print(byte_stream)
        try:
            with fitz.open(stream=byte_stream, filetype='pdf') as doc:
                # page_size = doc.pageCount
                page_size = doc.page_count
                # print(page_size)
                for page in doc.pages():
                    content += page.get_text()
                # print(content)
        except Exception as e:
            print(f'文件已损坏:{cname}')
            continue
    #解析文件大小
    file_size = os.path.getsize(pdf_path)
    file_size = baseCore.convert_size(file_size)

    # selectssql = f"select file_size from clb_sys_attachment where file_size='28.00 KB' and name='{file_name}' and item_id = '{social_code}'and create_time>='2023-10-14'  "
    # cursor_.execute(selectssql)
    # result = cursor_.fetchone()
    # if result:
    #     if result[0]=='28.00 KB':
    #         pass
    #     else:
    #         continue
    # else:
    #     continue
    #
    # if social_code=='ZZSN230824151210788':
    #     continue
    updatesql = f"update clb_sys_attachment set file_size='{file_size}' where  name='{file_name}' and item_id = '{social_code}'and create_time>='2023-10-14' "
    cursor_.execute(updatesql)
    cnx_.commit()
    print(f'更新成功---{file_size}----{file_name}---{social_code}')