package com.zzsn.util;


import com.zzsn.common.SpringUtil;
import com.zzsn.common.cache.MemcachedUtils;
import com.zzsn.download.PageDownload;
import com.zzsn.entity.*;
import com.zzsn.service.PaserErroreMsgService;
import com.zzsn.service.ProcessitemService;
import com.zzsn.service.SiteService;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.params.ConnRouteParams;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.conn.ssl.TrustStrategy;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.ssl.SSLContextBuilder;
import org.apache.http.util.EntityUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.springframework.core.io.DefaultResourceLoader;
import org.springframework.core.io.Resource;

import javax.net.ssl.SSLContext;
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.security.KeyManagementException;
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

@Data
@Slf4j
public class VerifySiteUtil {

    private int threadId = 0;
    private boolean isSuccess = false;
    private String statisticStr = null;
    private List<SiteTemplate> siteTemplateList;
    public  Integer pageNum=1;
    public static PageDownload pageDownload=new PageDownload();
    String url="https://www.baidu.com/";

    ProcessitemService processitemService= SpringUtil.getBean(ProcessitemService.class) ;
    PaserErroreMsgService paserErroreMsgService= SpringUtil.getBean(PaserErroreMsgService.class) ;
    SiteService siteService= SpringUtil.getBean(SiteService.class) ;

//    获取模板
    public SiteTemplate getSiteTemp(String infourl){
        SiteTemplate sTemplate=new SiteTemplate();
        QueryWrapper<Site> queryWrapper=new QueryWrapper<>();
        String domain="";
        try {
            if(infourl.contains("qq.com") && !infourl.contains("://new.qq.com")){
                infourl= transqqURl(infourl);
            }
             domain = new URL(infourl).getHost();
        }catch (Exception e){
            return null;
        }
//        String domain=new URL(infourl).getHost();
        queryWrapper.eq("domain_uri",domain);
        try {
            List<Site> sitelist = siteService.list(queryWrapper);
            if (sitelist.size() > 0) {
                Site site = sitelist.get(0);
                if (null != site.getMatchTitle()) {
                    sTemplate.setMatchTitle(site.getMatchTitle());
                }
                if (null != site.getMatchSummary()) {
                    sTemplate.setMatchSummary(site.getMatchSummary());
                }
                if (null != site.getMatchContent()) {
                    sTemplate.setMatchContent(site.getMatchContent());
                }
                if (null != site.getMatchTitle()) {
                    sTemplate.setMatchAuthor(site.getMatchAuthor());

                }
                if (null != site.getMatchOrigin()) {
                    sTemplate.setMatchOrigin(site.getMatchOrigin());
                }
                if (null != site.getMatchPublishDate()) {
                    sTemplate.setMatchPublishDate(site.getMatchPublishDate());
                }
                MemcachedUtils.put("domainUri_"+site.getDomainUri(),site);
            }
        }catch (Exception e){
            log.info("模板信息获取报错");
        }
        return sTemplate;
    }
//
    public String getContent( CatchWebByMetaSearch cwbm ){
        String infourl = cwbm.getSourceaddress();
        if(infourl.contains("qq.com") && !infourl.contains("://new.qq.com")){
            infourl= transqqURl(infourl);
        }
        String infodata = "";
        String charset = "";
        System.out.println(cwbm.getTitle()+"=="+infourl);
        if (infourl == null || infourl.contains(".pdf") || infourl.trim().length()==0|| infourl.contains(".PDF")||infourl.contains("download")) {
            return null;
        }
        CloseableHttpClient  httpClient =createSSLClientDefault();
        HttpGet httpgeturl = new HttpGet(infourl);// Get请求
        httpgeturl.getParams().setIntParameter(
                CoreConnectionPNames.CONNECTION_TIMEOUT, 60000);
        httpgeturl.getParams().setParameter(
                "http.socket.timeout", 60000);
        // 伪装成浏览器
        httpgeturl.setHeader("Content-Type",
                "application/x-www-form-urlencoded;charset=utf-8");
        httpgeturl.setHeader("User-Agent", "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US);");
        httpgeturl.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
        HttpResponse httprespse=null;
        try {
            httprespse = httpClient.execute(httpgeturl);
        } catch (Exception e1) {
            System.out.println("请求失败。。更换协议");
            httpClient=createSSLClientDefaulttsl12();
            try {
               httprespse = httpClient.execute(httpgeturl);
            } catch (Exception e2) {
                return "";
            }
        }
        HttpEntity entitydata = httprespse.getEntity();// 获取返回数据
        // 上次更新时间取得
        Header lastModify = httprespse
                .getFirstHeader("Last-Modified");
        if (lastModify == null) {
            lastModify = httprespse.getLastHeader("Last-Modified");
        }

        String charstype = EntityUtils
                .getContentCharSet(entitydata);
        if (charstype != null) {
            charset = charstype;
        } else {
            charset = LocateCharSet(infourl);
        }
        charset = Utility.charsetcheck(charset);
        try {
            infodata = EntityUtils.toString(entitydata, charset);
        }catch (Exception e){
            return "";
        }

        httpgeturl.releaseConnection();
//        if(infourl.contains("toutiao.com") &&(null == infodata || infodata.length() < 50)
//        ){
//            infodata = RequestUtil.getTaotiaoData(infourl );
//        }
        System.out.println("122222222222222222222222");
        if(StringUtils.isEmpty(infodata)){
            //为空，则爬取下一个
            return "";
        }

        String contentCharset = Utility.getWebEncodingByStr(infodata);
        String content = null;
        if (infodata != null && charset != null
                && contentCharset != null) {
            //content = Utility.convertCharset(infodata, charset,charset);
            content=infodata;
        }
        if (content != null) {
            cwbm.setCharset(charset);
            cwbm.setLastModify(lastModify == null ? "" : lastModify
                    .getValue());
            //cwbm.setContent(content);
        }
        return content;
    }

    public DocInfo doPaserByTag(String htmlContent, DocInfo docInfo, SiteTemplate siteTemplate){
        DefaultMsg dm = new DefaultMsg();
        Document doc =  Jsoup.parse(htmlContent);
        if(null!=siteTemplate.getMatchTitle()&&siteTemplate.getMatchTitle().length()>0) {
            //标题
            String title =paseElementByCSS(doc,siteTemplate.getMatchTitle());
            if (StringUtils.isNotEmpty(title)) {
                docInfo.setTitle(title.replace("...", ""));
            }
        }
        if(null!=siteTemplate.getMatchContent()&&siteTemplate.getMatchContent().length()>0) {
            Elements elementsByTag = doc.select(siteTemplate.getMatchContent());
            String contentWithTag = Utility.RemoveUselessHTMLTagX(elementsByTag.html());
//			System.out.println(elementsByTag);
//			String contentWithTag =paseElementByCSS(doc,siteTemplate.getMatchContent());
            if (contentWithTag == null || contentWithTag.trim().length() == 0) {
                return docInfo;
            }
            docInfo.setContentWithTag(contentWithTag);
            docInfo.setContentNoTag(Utility.TransferHTML2Text(contentWithTag).replaceAll("\\n",""));
        }
        if(null!=siteTemplate.getMatchAuthor()&&siteTemplate.getMatchAuthor().length()>0) {
            String author=paseElementByCSS(doc,siteTemplate.getMatchAuthor());
            if(author.length()>0) {
                docInfo.setAuthor(author);
            }
        }
        if(null!=siteTemplate.getMatchPublishDate()&&siteTemplate.getMatchPublishDate().length()>0) {
            String publishDate=paseElementByCSS(doc,siteTemplate.getMatchPublishDate());
            if(publishDate.length()>0) {
                docInfo.setPublishDate(DateUtil.getPublishDate(publishDate));
            }
        }
        if(null!=siteTemplate.getMatchSummary()&&siteTemplate.getMatchSummary().length()>0) {
            String summary=paseElementByCSS(doc,siteTemplate.getMatchSummary());
            if(summary.length()>0) {
                docInfo.setSummary(summary);
            }
        }
        if(null!=siteTemplate.getMatchOrigin()&&siteTemplate.getMatchOrigin().length()>0) {
            String origin=paseElementByCSS(doc,siteTemplate.getMatchOrigin());
            if(origin.length()>0) {
                docInfo.setOrigin(origin);
            }
        }
//        this.buildProcessItem(docInfo);
        return docInfo;

    }
    public String paseElementByCSS(Document doc,String tag){
        String msg="";
        try {
            Elements elements = doc.select(tag);
            if (elements.size() > 0) {
                msg = elements.get(0).text().trim();
            }
        }catch (Exception e){
            e.printStackTrace();
        }finally {
            return msg;
        }
//		return msg;
    }
//  保存有问题的站点
    public void saveErrorSite( CatchWebByMetaSearch cwbm,String keywords){
        try {
            String infourl = cwbm.getSourceaddress();
            String domainurl = new URL(infourl).getHost();
            PaserErroreMsg paserErroreMsg = new PaserErroreMsg();
            paserErroreMsg.setKeywords(keywords);
            paserErroreMsg.setDomainUrl(domainurl);
            paserErroreMsg.setInfoUrl(infourl);
            paserErroreMsg.setContTitle(cwbm.getTitle());

            List<PaserErroreMsg> paserErroreMsgs = paserErroreMsgService.list();
            List<PaserErroreMsg> erroreMsgList = paserErroreMsgs.stream().filter(item -> domainurl.contains(item.getDomainUrl())).collect(Collectors.toList());
            if (erroreMsgList.size()<1) {
                paserErroreMsgService.save(paserErroreMsg);
            }
            List<Site> siteList = siteService.list();
            List<Site> sites = siteList.stream().filter(item -> domainurl.contains(item.getDomainUri())).collect(Collectors.toList());
            if (sites.size() <1) {
                Site site = new Site();
                site.setDomainUri(domainurl);
                site.setUri(infourl);
                site.setName(cwbm.getSourcesite());
                siteService.save(site);
            }
        }catch (Exception e){
            System.out.println("保存失败");
        }
    }
    // 抓取新闻内容
    public List<DocInfo> CatchWebNews(List<CatchWebByMetaSearch> catchWebList,String keywords) {
        List<DocInfo> docInfoList=new ArrayList<>();
        try {
            int count = 0;
            for (int i = 0; i < catchWebList.size(); i++) {
                count++;
                try {
                    CatchWebByMetaSearch cwbm = catchWebList.get(i);
//                    请求下载内容
                    String content = getContent(cwbm);
                    DocInfo docInfo = new DocInfo();
                    try {
                        String infourl = cwbm.getSourceaddress();
                        SiteTemplate siteTemp = getSiteTemp(infourl);
                        docInfo.setContentType("HTML");
                        docInfo.setOrgId(cwbm.getOrgId());
                        docInfo.setSid(1111L);
                        docInfo.setSourceType("News");
                        docInfo.setLastModified(cwbm.getLastModify());
                        docInfo.setCharset("utf-8");
                        docInfo.setSourceaddress(cwbm.getSourceaddress());
                        docInfo.setTitle(cwbm.getTitle().replace("...", ""));
                        docInfo.setAuthor(cwbm.getAuthor());
                        docInfo.setPublishDate(cwbm.getPublishDate());
//                        docInfo.setOrigin(cwbm.getSourcesite());
                        docInfo.setKeywords(keywords);
                        docInfo.setSummary(cwbm.getSummary());
//                        封装解析的docinfo对象
                        if (null!=siteTemp&&null!=siteTemp.getMatchTitle()&& siteTemp.getMatchTitle().length()>1) {
                            try {
                                docInfo = doPaserByTag(content, docInfo, siteTemp);
                            }catch (Exception e){
                                saveErrorSite(cwbm,keywords);
                            }
                        } else {
                            saveErrorSite(cwbm,keywords);
                            System.out.println(cwbm.getTitle()+"+++++"+infourl);

                        }
                    } catch (Exception e1) {
                        // TODO Auto-generated catch block
                        e1.printStackTrace();
                    }
//                    Processitem processitem=new Processitem();
//                    SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmmssSSSS");//定义格式，不显示毫秒
//                    Timestamp now = new Timestamp(System.currentTimeMillis());//获取系统当前时间
//                    Random r = new Random();
//                    int num = r.nextInt(10);
//                    String idtime = df.format(now)+String.valueOf(num);
//                    processitem.setId(Long.valueOf(idtime));
//                    processitem.setSid(Long.valueOf(docInfo.getSid()));
//                    processitem.setTid("2222");
//                    processitem.setTitle(docInfo.getTitle());
//                    processitem.setSummary(docInfo.getSummary());
//                    processitem.setKeywords(docInfo.getKeywords());
//                    processitem.setContent(docInfo.getContentWithTag());
//                    processitem.setHash("");
//                    processitem.setAuthor(docInfo.getAuthor());
//                    processitem.setSourcesite(docInfo.getOrigin());
//                    processitem.setSourceaddress(docInfo.getSourceaddress());
//                    processitem.setType("NEWS");
//                    processitem.setWithtagfile("");
//                    processitem.setPublishDate(docInfo.getPublishDate());
//                    processitem.setCreateBy("admin");
//                    String dateTime = LocalDate.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd"));
//                    processitem.setCreateDate(dateTime);
//                    processitem.setCharset("");
//                    processitem.setProcessResult(0L);
//                    processitem.setLastModified("");
//                    processitem.setOrgId(0L);
//                    processitem.setWords(docInfo.getContentNoTag());
//                    processitem.setOrigin(docInfo.getOrigin());
//                    processitem.setOrientation("");
//                    processitem.setFromWhere("百度");
//                    processitem.setFromId("baidu");
//                    processitem.setSourceType(docInfo.getSourceType());
//                    processitem.setFeaturewords("");
//                    processitem.setFileDownloadPath("");
//                    processitem.setContentImgCvtTag("");
//                    processitem.setRelatePlaces("");
//                    processitem.setRelatePerson("");
//                    processitem.setRelateOrg("");
//                    processitem.setRelateEvent("");
//                    processitem.setRelateDate("");
//                    processitem.setRelevance("");
//                    processitem.setLang("cn");
//
//                    processitemService.save(processitem);
                    docInfo.setId(count+"");
                    docInfoList.add(docInfo);
                } catch (Exception e)
                {
                    e.printStackTrace();
                    System.out.println("访问出错！");
                    CatchWebByMetaSearch cwbm = catchWebList.get(i);
                    saveErrorSite(cwbm,keywords);
                    System.out.println(cwbm.getTitle()+"+++++");

                    continue;
                }

            }
            System.out.println("本次成功件数：" + count);
            log.info("本次成功件数：" + count);
        } catch (Exception e) {
            System.out.println("访问出错！");
        }
        return docInfoList;
    }
    // 提取百度新闻列表URL
    @SuppressWarnings("deprecation")
    public static List<CatchWebByMetaSearch> CatchWebOfBaiduByProxy(
            List<String> urlList, String charset, Long orgId, Long tid) {
        try {
            List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
            for (int i = 0; i < urlList.size(); i++) {

                try {
                    URL url = new URL(urlList.get(i));
                    URI uri = null;
                    String uri_code = "";
                    try {
                        uri = new URI(url.getProtocol(), url.getHost(),
                                url.getPath(), url.getQuery(), null);
                        uri_code = Utility.encodURI(uri.toString())
                                .replaceAll("%2520", "+").replaceAll("%25", "%")
                                .replaceAll("%20", "+");
                    } catch (URISyntaxException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                    //HttpResponse httpresponse=getMethod2(uri_code);
                    HttpClient client = null;
                    //创建httpGet
                    Document doc = null;
                    HttpGet httpGet = null;
                    try {
                        Thread.sleep(8500L);
//                        client = getHttpClient();
//                        httpGet = new HttpGet(uri_code);
//                        HttpGet request = null;
//                        httpGet.getParams().setIntParameter(
//                                CoreConnectionPNames.CONNECTION_TIMEOUT, 60000);
//                        httpGet.getParams().setParameter(
//                                "http.socket.timeout", 60000);
//                        // 伪装成浏览器
//                        httpGet.setHeader("Content-Type",
//                                "application/x-www-form-urlencoded;charset=utf-8");
//                        httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US);");
//                        httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
//                        httpGet.setHeader(HttpHeaders.CONNECTION, "close");
//                        HttpResponse response = client.execute(httpGet);
//
//                        HttpEntity resEntity = response.getEntity();
//                        String rtnStr = EntityUtils.toString(resEntity, "UTF-8");
//                        String rtnStr=ChromeUtil.getChromeDoc(urlList.get(i));
                        String urlx=urlList.get(i);
                        String rtnStr = pageDownload.downloadByWebClient(urlx,charset);
//                        String rtnStr = pageDownload.downloadByWebClientProxy(urlx,charset);
                        File file = new File("D:\\output333111.txt");
                        if(file.exists()) {
                            file.delete();
                            file = new File("D:\\output333111.txt");
                        }

                        try {
                            BufferedWriter bw = new BufferedWriter(new FileWriter(file,true));
                            StringBuffer out = new StringBuffer();
                            out.append("标题："+rtnStr);
                            out.append("\r\n");
                            bw.write(out.toString());
                            bw.flush();
                            bw.close();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }

                        doc=Jsoup.parse(rtnStr);

                    } catch (Exception e) {
                        e.printStackTrace();
                    } finally {
                        if (null!=httpGet) {

                            httpGet.abort();
                        }
                        if (null!=client) {

                            client.getConnectionManager().closeIdleConnections(0,
                                    TimeUnit.MICROSECONDS);

                        }



                    }


                    System.out.println("----百度搜索----" + urlList.get(i));

                    Elements firstElementsLink = doc.select("div.result-op");
                    List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
                    CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
                    for (int m=0;m<firstElementsLink.size();m++) {
                        catchWebByMetaSearch = new CatchWebByMetaSearch();
                        Elements orainAndDate = firstElementsLink.get(m).select("span");

                        if (orainAndDate.size()>0) {
                            String orainAndDatestr = orainAndDate.text();
                            //发布时间
                            String publishDate = DateUtil.getPublishDate(orainAndDatestr);
                            catchWebByMetaSearch.setPublishDate(publishDate);

                            //来源
                            String orin = orainAndDate.get(0).text();
                            catchWebByMetaSearch.setSourcesite(orin);
                        }


                        Elements titleAndUrl = firstElementsLink.get(m).select("a[data-click]");

                        if (titleAndUrl.size()>0) {
                            //标题
                            String title = titleAndUrl.get(0).text().trim();
                            catchWebByMetaSearch.setTitle(title);
                            //源网址
                            String addressurl= titleAndUrl.attr("href");
                            catchWebByMetaSearch.setSourceaddress(addressurl);
                            System.out.println(addressurl);
                        }


                        catchWebByMetaSearch.setOrgId(orgId);
                        catchWebByMetaSearch.setTid(tid);
                        metaSearchList.add(catchWebByMetaSearch);
                    }
                    catchWebByMetaSearchList.addAll(metaSearchList);
                } catch (Exception e) {
                    // TODO Auto-generated catch block
                    i--;
                }
            }
            return catchWebByMetaSearchList;

        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }
    /**
     * 爬取图片
     * 创建人:  李东亮
     * 创建时间:  2016-5-10 上午10:57:20
     * @version 1.0
     * @param sourceaddress
     * @throws Exception
     */
    private ContentFileResult getContentFile(String contentWithTag,String sourceaddress)throws Exception{
        String contentImgCvtTag = contentWithTag;
        String formatImgContent= contentWithTag;
        Map<String, FileTag> imgDataMap = ContentFileFinder.getContentFileTag(contentWithTag,sourceaddress);
        //key为图片爬取路径，value为图片保存路径
        Map<String, FileTag> imgMap = new HashMap<String, FileTag>();
        for (String key : imgDataMap.keySet()) {
            FileTag fileTag = imgDataMap.get(key);
            while (contentImgCvtTag.contains(key)) {
                //IMG_SERVER开头的路径
                contentImgCvtTag = contentImgCvtTag.replace(key, fileTag.getSaveTag());
            }
//            while (formatImgContent.contains(fileTag.getAbsolutePath())&&!key.equals(fileTag.getAbsoluteTag())) {
//                //转换为绝对路径
//                formatImgContent = formatImgContent.replace(key, fileTag.getAbsoluteTag());
//            }
            imgMap.put(fileTag.getAbsolutePath(), fileTag);
        }

        ContentFileResult cis = new ContentFileResult();
        cis.setContentAbsoulute(formatImgContent);
        cis.setContentImgCvtTag(contentImgCvtTag);
        cis.setFileMap(imgMap);
        return cis;
    }
    public InputStream getImg(String dataUrl){
        CloseableHttpClient httpClient = createSSLClientDefault();

        CloseableHttpResponse response = null;
        InputStream instream =null;
        try {
            HttpGet get = new HttpGet();
            get.setURI(new URI(dataUrl));
            response = httpClient.execute(get);
            HttpEntity entity = response.getEntity();
            if (entity != null) {
                //创建一个输入流对象
                instream = entity.getContent();

//                 BufferedReader reader = new BufferedReader(new InputStreamReader(instream));
//                 StringBuilder sb = new StringBuilder();
//                 String line = null;
//                 while ((line = reader.readLine()) != null) {
//                     sb.append(line + "\n");
//                 }
//                 instream.close();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        return instream;
    }

    public static String  getProxyIp(){
        List<String> proxyList=new ArrayList<>();
        proxyList.add("106.111.73.25-35946-hys_81310170_41c8-12345678");
        proxyList.add("60.184.197.64-34013-hys_81310170_41c8-12345678");
        proxyList.add("114.99.221.245-39604-hys_81310170_41c8-12345678");
        proxyList.add("49.82.130.253-52312-hys_81310170_41c8-12345678");
        Random random = new Random();
        int n = random.nextInt(proxyList.size());
        return proxyList.get(n);

    }

    public static HttpClient getHttpClient() {
        String proxyip=getProxyIp();
        String[] proxys=proxyip.split("-");
        DefaultHttpClient httpClient = new DefaultHttpClient();
        String proxyHost = proxys[0];
        int proxyPort = Integer.parseInt(proxys[1]);
        String userName = proxys[2];
        String password = proxys[3];
        httpClient.getCredentialsProvider().setCredentials(
                new AuthScope(proxyHost, proxyPort),
                new UsernamePasswordCredentials(userName, password));
        HttpHost proxy = new HttpHost(proxyHost,proxyPort);
        httpClient.getParams().setParameter(ConnRouteParams.DEFAULT_PROXY, proxy);
        return httpClient;
    }
    //转换qq新闻链接
    public static String transqqURl(String oldurl){
        String patt="https://new.qq.com/omn/[date]/[pamars].html";
        String b1=oldurl.substring(oldurl.lastIndexOf("/")+1);
        String b2=getNumbers(b1);
        String curl=patt.replace("[date]",b2).replace("[pamars]",b1);
        return curl;
    }

    public static String getNumbers(String content) {

        Pattern pattern = Pattern.compile("\\d+");
        Matcher matcher = pattern.matcher(content);
        while (matcher.find()) {
            return matcher.group(0);
        }
        return "";
    }
  // 抓取新闻内容
  public DocInfo catchWebNews(SiteTemplate siteTemplate,String url) {

        DocInfo docInfo = new DocInfo();

        try {
            CatchWebByMetaSearch cwbm = new CatchWebByMetaSearch();
            cwbm.setSourceaddress(url);
//                    获取内容
            String content = getContent(cwbm);

            try {
                String infourl = cwbm.getSourceaddress();
                SiteTemplate siteTemp = getSiteTemp(infourl);
                docInfo.setContentType("HTML");
                docInfo.setOrgId(222L);
                docInfo.setSid(1111L);
                docInfo.setSourceType("News");
                docInfo.setCharset("utf-8");
                docInfo.setTitle("");
                docInfo.setKeywords("");
                StandardWebExtractorHandler swe = new StandardWebExtractorHandler();
                if (null!=siteTemp&&null!=siteTemp.getMatchTitle()&& siteTemp.getMatchTitle().length()>1) {
                    try {
                        docInfo = doPaserByTag(content, docInfo, siteTemp);
                    }catch (Exception e){
                        return docInfo;
                    }
                } else {
                    return docInfo;
                }
            } catch (Exception e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }
        } catch (Exception e)
        {
            return docInfo;
        }

    return docInfo;
  }

  private  CloseableHttpClient createSSLClientDefault(){
    try {
      SSLContext sslContext = new SSLContextBuilder().loadTrustMaterial(null, new TrustStrategy() {
        //信任所有
        @Override
        public boolean isTrusted(
                java.security.cert.X509Certificate[] arg0, String arg1)
                throws java.security.cert.CertificateException {
          // TODO Auto-generated method stub
          return true;
        }

      }).build();
      SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext);
      return HttpClients.custom().setSSLSocketFactory(sslsf).build();
    } catch (KeyManagementException e) {
      e.printStackTrace();
    } catch (NoSuchAlgorithmException e) {
      e.printStackTrace();
    } catch (KeyStoreException e) {
      e.printStackTrace();
    }
    return  HttpClients.createDefault();
  }
  private  CloseableHttpClient createSSLClientDefaulttsl12(){
    try {
      SSLContext sslContext = new SSLContextBuilder().useProtocol("TLSv1.2").loadTrustMaterial(null, new TrustStrategy() {
        //信任所有
        @Override
        public boolean isTrusted(
                java.security.cert.X509Certificate[] arg0, String arg1)
                throws java.security.cert.CertificateException {
          // TODO Auto-generated method stub
          return true;
        }

      }).build();
      SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext);
      return HttpClients.custom().setSSLSocketFactory(sslsf).build();
    } catch (KeyManagementException e) {
      e.printStackTrace();
    } catch (NoSuchAlgorithmException e) {
      e.printStackTrace();
    } catch (KeyStoreException e) {
      e.printStackTrace();
    }
    return  HttpClients.createDefault();
  }
  // 获取所要抓取网页的编码方式
  private String LocateCharSet(String url) {
    String encoding = "gbk";
    try {
      Connection conn = Jsoup.connect(url);
      conn.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36)");
      // 伪装成浏览器
      Document doc = conn.ignoreContentType(true).timeout(10000).get();

      Pattern p1 = Pattern.compile("<meta[^>]*>",
              Pattern.CASE_INSENSITIVE);
      Matcher m1 = p1.matcher(doc.toString());
      while (m1.find()) {
        String str = m1.group();
        Pattern p2 = Pattern.compile("charset[^\\s||\"||;||'||>]*");
        Matcher m2 = p2.matcher(str);
        if (m2.find()) {
          encoding = m2.group().substring(8);
          if (encoding.trim().length() == 0) {
            Pattern p3 = Pattern
                    .compile("charset=\"[^\\s||\"||;||>]*");
            Matcher m3 = p3.matcher(str);
            if (m3.find()) {
              encoding = m3.group().substring(9);
            }
            if (encoding.trim().length() == 0) {
              // encoding = DetectCharSet.detectCharSet(fileName);
              // if(encoding == null){
              encoding = "gbk";
              // }
            }
          }

          return encoding;
        }
      }
    } catch (IOException e) {
      // e.printStackTrace();
      log.error("获取出错编码方式");
      System.out.println("获取出错编码方式");
      return encoding;
    }

    return encoding;
  }

  public static Properties getConfig() {
    Properties properties = new Properties();
    InputStream is = null;
    String location = "constants.properties";
    try {
      Resource resource = new DefaultResourceLoader().getResource(location);
      is = resource.getInputStream();
      properties.load(is);
      log.debug("jdbc config: {}", properties.toString());
    } catch (IOException ex) {
      log.error("Could not load property file:" + location, ex);
    } finally {
      try {
        if (is != null) {
          is.close();
        }
      } catch (IOException ioe) {
        // ignore
      }
    }
    return properties;
  }
}
