package com.zzsn.download;

import com.gargoylesoftware.htmlunit.*;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.HttpClient;
import org.apache.http.conn.params.ConnRouteParams;
import org.apache.http.impl.client.DefaultHttpClient;

import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;

@Slf4j
public class PageDownload {
//            106.111.73.25-35946-hys_81310170_41c8-12345678
//            60.184.197.64-34013-hys_81310170_41c8-12345678
//            114.99.221.245-39604-hys_81310170_41c8-12345678
//            49.82.130.253-52312-hys_81310170_41c8-12345678
    /**默认代理地址*/
    public static String PROXY_ADDR = "114.99.221.245";
    /**默认代理接口*/
    public static int PROXY_PORT = 39604;
    public String  getProxyIp(){
        List<String> proxyList=new ArrayList<>();
        proxyList.add("106.111.73.25-35946-hys_81310170_41c8-12345678");
        proxyList.add("60.184.197.64-34013-hys_81310170_41c8-12345678");
        proxyList.add("114.99.221.245-39604-hys_81310170_41c8-12345678");
        proxyList.add("49.82.130.253-52312-hys_81310170_41c8-12345678");
        Random random = new Random();
        int n = random.nextInt(proxyList.size());
       return proxyList.get(n);

    }
    public static HttpClient getHttpClient(String proxyip) {

        String[] proxys=proxyip.split("-");
        DefaultHttpClient httpClient = new DefaultHttpClient();
        String proxyHost = proxys[0];
        int proxyPort = Integer.parseInt(proxys[1]);
        String userName = proxys[2];
        String password = proxys[3];
        httpClient.getCredentialsProvider().setCredentials(
                new AuthScope(proxyHost, proxyPort),
                new UsernamePasswordCredentials(userName, password));
        HttpHost proxy = new HttpHost(proxyHost,proxyPort);
        httpClient.getParams().setParameter(ConnRouteParams.DEFAULT_PROXY, proxy);
        return httpClient;
    }
    /**
     * 用模拟浏览器的方法，下载页面
     * @param urlstr 链接的参数
     * @return
     * @throws FailingHttpStatusCodeException
     * @throws MalformedURLException
     * @throws IOException
     */
    public  String downloadByWebClientProxy(String urlstr,String charset) throws FailingHttpStatusCodeException, MalformedURLException, IOException {

        WebClient webClient = new WebClient(BrowserVersion.CHROME);
        webClient.getOptions().setJavaScriptEnabled(true);
        webClient.getOptions().setActiveXNative(false);
        webClient.getOptions().setCssEnabled(false);  //启用css
        webClient.getOptions().setRedirectEnabled(true);//百度阅读暂改
        webClient.getOptions().setThrowExceptionOnScriptError(false);
        webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
        webClient.setAjaxController(new NicelyResynchronizingAjaxController());//很重要，设置支持AJAX
        webClient.getOptions().setTimeout(20000);//设置“浏览器”的请求超时时间
        webClient.setJavaScriptTimeout(30000);//设置JS执行的超时时间

        String proxyIp = getProxyIp();
        String[] proxys=proxyIp.split("-");
        String proxyHost = proxys[0];
        int proxyPort = Integer.parseInt(proxys[1]);

        ProxyConfig proxyConfig = new ProxyConfig();
        webClient.getOptions().setProxyConfig(proxyConfig);
        String pageStr = null;
        try {
            Page page = webClient.getPage(urlstr);
            if (page instanceof HtmlPage) {
                HtmlPage htmlPage = (HtmlPage) page;
                webClient.waitForBackgroundJavaScript(5000); //阻塞线程
                pageStr = htmlPage.asXml();
            }
        } catch (Exception e) {
            log.error(String.format("PAMSG:DownloadByWebClient Error, Connection reset? read timeout? connection timeout? EXCEPTION: %s ", e.getMessage()));
            return pageStr;
        }
        webClient.close();
        return pageStr;
    }
    /**
     * 用模拟浏览器的方法，下载页面
     * @param urlstr 链接的参数
     * @return
     * @throws FailingHttpStatusCodeException
     * @throws MalformedURLException
     * @throws IOException
     */
    public  String downloadByWebClient(String urlstr,String charset) throws FailingHttpStatusCodeException, MalformedURLException, IOException {

        WebClient webClient = new WebClient(BrowserVersion.CHROME);
        webClient.getOptions().setJavaScriptEnabled(true);
        webClient.getOptions().setActiveXNative(false);
        webClient.getOptions().setCssEnabled(false);  //启用css
        webClient.getOptions().setRedirectEnabled(true);//百度阅读暂改
        webClient.getOptions().setThrowExceptionOnScriptError(false);
        webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
        webClient.setAjaxController(new NicelyResynchronizingAjaxController());//很重要，设置支持AJAX
        webClient.getOptions().setTimeout(20000);//设置“浏览器”的请求超时时间
        webClient.setJavaScriptTimeout(30000);//设置JS执行的超时时间
//        webClient.addRequestHeader("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9");

        String pageStr = null;
        try {
            Page page = webClient.getPage(urlstr);
            if (page instanceof HtmlPage) {
                HtmlPage htmlPage = (HtmlPage) page;
                webClient.waitForBackgroundJavaScript(5000); //阻塞线程
                pageStr = htmlPage.asXml();
            }
        } catch (Exception e) {
            log.error(String.format("PAMSG:DownloadByWebClient Error, Connection reset? read timeout? connection timeout? EXCEPTION: %s ", e.getMessage()));
            pageStr=downloadByWebClientProxy(urlstr,charset);
        }
        webClient.close();
        return pageStr;
    }


}
