import os
import time

import requests
from retry import retry
from base.BaseCore import BaseCore
baseCore = BaseCore()
log = baseCore.getLogger()
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF WindowsWechat(0x6309080f)XWEB/8461',
    'Content-Type': 'application/octet-stream',
}


@retry(tries=3, delay=5)
def getContent(url):
    req = requests.get(url, headers=headers,timeout=120)
    if req.status_code != 200:
        raise
    req.encoding = req.apparent_encoding
    content = req.content
    return content


if __name__ == '__main__':

    url_list = []
    name_list = []
    count_dict = {}
    while True:
        item = baseCore.redicPullData('Download:gwshrfe')
        if not item or item == 'None':
            log.info('已没有数据')
            continue

        if 'http' not in item:
            # 文件名字
            file_name_ = item
            if file_name_ in name_list:
                count_dict[file_name_] += 1
                file_name = file_name_ + '_' + str(count_dict[file_name_])
            else:
                count_dict[file_name_] = 1
                file_name = file_name_
            name_list.append(file_name_)
            continue
        else:
            # 说明是链接
            url = item

            if url in url_list:
                log.info(f'{url}该链接已处理过')
                continue
            log.info(f'{file_name}==={url}===开始采集')

            try:
                content = getContent(url)
            except:
                # r.sadd('gwshrfe', url)
                log.error(f'{file_name}==={url}===解析失败')
                time.sleep(2)
                continue
            # 需加上后缀
            category = os.path.splitext(url)[1]
            path = f'./文件1/{file_name}'
            if not os.path.exists(path):
                os.makedirs(path)
            file = f'{path}/{file_name}{category}'

            try:
                with open(file, 'wb') as f:
                    f.write(content)
                log.info(f'{url}===下载成功')
            except:
                log.error(f'{url}===下载失败')
            url_list.append(url)
            time.sleep(2)
    baseCore.close()