# https://www.henan.gov.cn/
import json
from urllib.parse import urljoin

import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import xlsxwriter
import openpyxl
import BaseCore
baseCore=BaseCore.BaseCore()

def reqGetHtml(url,header):
    for i in range(0,3):
        try:
            proxy=baseCore.get_proxy()
            response=requests.get(url=url,headers=header,proxies=proxy,verify=False,timeout=10)
            response.encoding=response.apparent_encoding
            hcont=response.text
            if hcont:
                break
        except Exception as e:
            hcont=''

    return hcont

def reqPostHtml(url,header,data):
    for i in range(0,3):
        try:
            proxy=baseCore.get_proxy()
            if isinstance(data, str):
                res=requests.post(url=url,data=data,headers=header,proxies=proxy,verify=False,timeout=10)
            else:
                res=requests.post(url=url,data=json.dumps(data),headers=header,verify=False,timeout=10)
            hcont=res.text
            if hcont:
                break
        except Exception as e:
            hcont=''

    return hcont

def reqPostStrHtml(url,header,data):
    for i in range(0,3):
        try:
            res=requests.post(url=url,data=data,headers=header,verify=False,timeout=10)
            hcont=res.text
            if hcont:
                break
        except Exception as e:
            hcont=''

    return hcont

def createDriver():
    chrome_driver = r'D:\Google\Chrome\Application\chrome.exe'
    path =  Service(chrome_driver)
    chrome_options = webdriver.ChromeOptions()
    chrome_options.binary_location =r'D:\chrome\chromedriver.exe'
    # 设置代理
    # proxy = "127.0.0.1:8080"  # 代理地址和端口
    # chrome_options.add_argument('--proxy-server=http://' + proxy)
    driver = webdriver.Chrome(service=path,chrome_options=chrome_options)
    return driver
# 将html中的相对地址转换成绝对地址
def paserUrl(html, listurl):
    # 获取所有的<a>标签和<img>标签
    if isinstance(html, str):
        html = BeautifulSoup(html, 'html.parser')

    links = html.find_all(['a', 'img'])
    # 遍历标签，将相对地址转换为绝对地址
    for link in links:
        if 'href' in link.attrs:
            link['href'] = urljoin(listurl, link['href'])
        elif 'src' in link.attrs:
            link['src'] = urljoin(listurl, link['src'])
    return html

def pdwriterXLS(dlist,siteName):
    df_out = pd.DataFrame(data=dlist)
    df_out.to_excel(siteName+'.xlsx', engine='xlsxwriter', index=False)












