更新

cc9aa52f · 张文库 · f314a48b · cc9aa52f · cc9aa52f · cc9aa52f
--- a/comm_crawler/src/main/java/com/zzsn/api/SiteInfoVerify.java
+++ b/comm_crawler/src/main/java/com/zzsn/api/SiteInfoVerify.java
@@ -29,7 +29,11 @@ public class SiteInfoVerify{
        List<String> urlList=getPageListUrl(siteMsgTemple);
        String charset="utf-8";
        if(siteMsgTemple.getYnDynamicCrawl()!=1){
+            try {
                charset = paserSiteDownload.getCharSet(urlList.get(0));
+            } catch (IOException e) {
+                //
+            }
        }


@@ -82,7 +86,11 @@ public class SiteInfoVerify{
            PaserSiteDownload paserSiteDownload=new PaserSiteDownload();
            charset = paserSiteDownload.locateCharSet(urlList.get(0));
        }catch (Exception e){
+            try {
                charset = paserSiteDownload.getCharSet(urlList.get(0));
+            } catch (IOException ex) {
+                //
+            }
        }
        //判断解析表达式类型
        if(siteMsgTemple.getListExpressionType().equals("3")) {//css表达式
@@ -165,7 +173,7 @@ public class SiteInfoVerify{
        clbAnsProcessitem.setSid(docInfo.getSid()+"");
        clbAnsProcessitem.setTitle(docInfo.getTitle());
        clbAnsProcessitem.setContent(docInfo.getContentNoTag());
-        clbAnsProcessitem.setContentWithtag(docInfo.getContentWithTag());
+        clbAnsProcessitem.setContentWithTag(docInfo.getContentWithTag());
        clbAnsProcessitem.setSummary(docInfo.getSummary());
        clbAnsProcessitem.setAuthor(docInfo.getAuthor());
        clbAnsProcessitem.setOrigin(docInfo.getOrigin());

--- a/comm_crawler/src/main/java/com/zzsn/crawler/DynaminSiteThread.java
+++ b/comm_crawler/src/main/java/com/zzsn/crawler/DynaminSiteThread.java
@@ -18,6 +18,7 @@ import org.springframework.kafka.core.KafkaTemplate;
 import org.springframework.scheduling.annotation.Async;
 import org.springframework.stereotype.Component;

+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.List;
@@ -36,7 +37,7 @@ public class DynaminSiteThread implements Runnable{
        crawler();
    }

-    @Async("asyncexecutorService")
+//    @Async("asyncexecutorService")
    public   void crawler(){

        //获取栏目链接以及翻页的链接
@@ -62,8 +63,12 @@ public class DynaminSiteThread implements Runnable{
        String charset = "";
        try {
            charset = paserSiteDownload.locateCharSet(urlList.get(0));
-        }catch (Exception e){
+        } catch (Exception e) {
+            try {
                charset = paserSiteDownload.getCharSet(urlList.get(0));
+            } catch (IOException ex) {
+                //
+            }
        }
        //获取列表url等信息通过匹配url过滤
        List<CatchWebByMetaSearch> metaSearchList=new ArrayList<>();
@@ -90,8 +95,8 @@ public class DynaminSiteThread implements Runnable{
            WebContentPaserByRegular webContentPaserByRegular=new WebContentPaserByRegular();
            metaSearchList = webContentPaserByRegular.catchWebOfStaticmsgByRegular(urlList, charset, siteMsgTemple);
        }
-//        log.info("本次获取列表url： "+metaSearchList.size()+"个");

+        //资讯类容抽取
        siteMsgTemple.setDetailExpressionType(siteMsgTemple.getDetailExpressionType()==null?"0":siteMsgTemple.getDetailExpressionType());
        //判断解析详情表达式类型
        if(siteMsgTemple.getDetailExpressionType().equals("3")) {//css表达式
@@ -145,7 +150,7 @@ public class DynaminSiteThread implements Runnable{
        clbAnsProcessitem.setSid(docInfo.getSid()+"");
        clbAnsProcessitem.setTitle(docInfo.getTitle());
        clbAnsProcessitem.setContent(docInfo.getContentNoTag());
-        clbAnsProcessitem.setContentWithtag(docInfo.getContentWithTag());
+        clbAnsProcessitem.setContentWithTag(docInfo.getContentWithTag());
        clbAnsProcessitem.setSummary(docInfo.getSummary());
        clbAnsProcessitem.setAuthor(docInfo.getAuthor());
        clbAnsProcessitem.setOrigin(docInfo.getOrigin());

--- a/comm_crawler/src/main/java/com/zzsn/crawler/PaserSiteDownload.java
+++ b/comm_crawler/src/main/java/com/zzsn/crawler/PaserSiteDownload.java
@@ -406,11 +406,14 @@ public class PaserSiteDownload {
    return  HttpClients.createDefault();
  }

-    public static String getCharSet(String url) {
-        String html="";
+    public static String getCharSet(String url) throws IOException {
+        String html = "";
+        HttpResponse httprespse = null;
+        HttpEntity entitydata = null;
        CloseableHttpClient httpClient = CreateSSLClientDefault.createSSLClientDefault();
+        try {
+//            Thread.sleep(500L);
            HttpGet httpgeturl = new HttpGet(url);// Get请求
-
            httpgeturl.getParams().setIntParameter(
                    CoreConnectionPNames.CONNECTION_TIMEOUT, 60000);
            httpgeturl.getParams().setParameter(
@@ -422,31 +425,22 @@ public class PaserSiteDownload {
            httpgeturl.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9");
            //httpgeturl.setHeader("Accept-Language", "en");
            //httpgeturl.setHeader("Accept-Encoding", "gzip, deflate");
-        HttpResponse httprespse=null;
-        try {
-            Thread.sleep(500L);
            httprespse = httpClient.execute(httpgeturl);
+            entitydata = httprespse.getEntity();// 获取返回数据
+            httpgeturl.releaseConnection();
        } catch (Exception e2) {
-            // TODO Auto-generated catch block
-//            e2.printStackTrace();
            log.info("请求访问失败！");
            return "utf-8";
-        } // 发送请求
-        HttpEntity entitydata = httprespse.getEntity();// 获取返回数据
-
-        Header lastModify = httprespse.getFirstHeader("Last-Modified");
+        } finally {
+            httpClient.close();
+        }
        String charset="utf-8";
        String infodata="";
        try {
-            Thread.sleep(500L);
            infodata = EntityUtils.toString(entitydata, charset);
-
        } catch (Exception e1) {
-            // TODO Auto-generated catch block
            e1.printStackTrace();
        }
-        httpgeturl.releaseConnection();
-

        Pattern p1 = Pattern.compile("<meta[^>]*>",
                Pattern.CASE_INSENSITIVE);
@@ -465,27 +459,24 @@ public class PaserSiteDownload {
                        charset = m3.group().substring(9);
                    }
                    if (charset.trim().length() == 0) {
-                        // encoding = DetectCharSet.detectCharSet(fileName);
-                        // if(encoding == null){
                        charset = "gbk";
-                        // }
                    }
                }
-
                return charset;
            }
        }
        return charset;
    }
+
    public static String getHtml(String url,String charset) {
        String html="";
        CloseableHttpClient  httpClient = CreateSSLClientDefault.createSSLClientDefault();
        HttpGet httpgeturl = new HttpGet(url);// Get请求

        httpgeturl.getParams().setIntParameter(
-                CoreConnectionPNames.CONNECTION_TIMEOUT, 60000);
+                CoreConnectionPNames.CONNECTION_TIMEOUT, 20000);
        httpgeturl.getParams().setParameter(
-                HttpMethodParams.SO_TIMEOUT, 60000);
+                HttpMethodParams.SO_TIMEOUT, 20000);
        // 伪装成浏览器
        httpgeturl.setHeader("Content-Type",
                "application/x-www-form-urlencoded;charset=utf-8");
@@ -499,16 +490,14 @@ public class PaserSiteDownload {
            httprespse = httpClient.execute(httpgeturl);
        } catch (Exception e2) {
            httpgeturl.releaseConnection();
-            // TODO Auto-generated catch block
-//            e2.printStackTrace();
            return "";
        } // 发送请求
        HttpEntity entitydata = httprespse.getEntity();// 获取返回数据
-        Header lastModify = httprespse
-                .getFirstHeader("Last-Modified");
-        if (lastModify == null) {
-            lastModify = httprespse.getLastHeader("Last-Modified");
-        }
+//        Header lastModify = httprespse
+//                .getFirstHeader("Last-Modified");
+//        if (lastModify == null) {
+//            lastModify = httprespse.getLastHeader("Last-Modified");
+//        }
        if(charset==null) {
            String charstype = EntityUtils
                    .getContentCharSet(entitydata);
@@ -524,15 +513,13 @@ public class PaserSiteDownload {
        try {
            Thread.sleep(500L);
            infodata = EntityUtils.toString(entitydata, charset);
-
+            httpgeturl.releaseConnection();
+            httpClient.close();
        } catch (Exception e1) {
-            // TODO Auto-generated catch block
-//            e1.printStackTrace();
            log.info("内容解析异常");
        }finally {
            httpgeturl.releaseConnection();
        }
-
        return infodata;
    }
  // 获取所要抓取网页的编码方式
@@ -542,7 +529,7 @@ public class PaserSiteDownload {
          Connection conn = Jsoup.connect(url);
          conn.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36)");
          // 伪装成浏览器
-      Document doc = conn.ignoreContentType(true).timeout(10000).get();
+          Document doc = conn.ignoreContentType(true).timeout(5000).get();

          Pattern p1 = Pattern.compile("<meta[^>]*>",
                  Pattern.CASE_INSENSITIVE);
@@ -561,23 +548,16 @@ public class PaserSiteDownload {
                          encoding = m3.group().substring(9);
                      }
                      if (encoding.trim().length() == 0) {
-              // encoding = DetectCharSet.detectCharSet(fileName);
-              // if(encoding == null){
                          encoding = "gbk";
-              // }
                      }
                  }
-
                  return encoding;
              }
          }
      } catch (IOException e) {
-      // e.printStackTrace();
          log.error("获取编码方式出错");
-      System.out.println("获取编码方式出错");
          return encoding;
      }
-
      return encoding;
  }

@@ -608,7 +588,7 @@ public class PaserSiteDownload {
        clbAnsProcessitem.setSid(docInfo.getSid()+"");
        clbAnsProcessitem.setTitle(docInfo.getTitle());
        clbAnsProcessitem.setContent(docInfo.getContentNoTag());
-        clbAnsProcessitem.setContentWithtag(docInfo.getContentWithTag());
+        clbAnsProcessitem.setContentWithTag(docInfo.getContentWithTag());
        clbAnsProcessitem.setSummary(docInfo.getSummary());
        clbAnsProcessitem.setAuthor(docInfo.getAuthor());
        clbAnsProcessitem.setOrigin(docInfo.getOrigin());

--- a/comm_crawler/src/main/java/com/zzsn/crawler/SiteThread.java
+++ b/comm_crawler/src/main/java/com/zzsn/crawler/SiteThread.java
@@ -19,6 +19,7 @@ import org.springframework.kafka.core.KafkaTemplate;
 import org.springframework.scheduling.annotation.Async;
 import org.springframework.stereotype.Component;

+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.List;
@@ -56,7 +57,12 @@ public class SiteThread implements Runnable{
            urlList.addAll(hisUrlList);
        }
        //获取编码
-        String charset = paserSiteDownload.getCharSet(urlList.get(0));
+        String charset = null;
+        try {
+            charset = paserSiteDownload.getCharSet(urlList.get(0));
+        } catch (IOException e) {
+            //
+        }

        //获取列表url等信息通过匹配url过滤
        List<CatchWebByMetaSearch> metaSearchList=new ArrayList<>();
@@ -85,8 +91,8 @@ public class SiteThread implements Runnable{
            WebContentPaserByRegular webContentPaserByRegular=new WebContentPaserByRegular();
            metaSearchList = webContentPaserByRegular.catchWebOfStaticmsgByRegular(urlList, charset, siteMsgTemple);
        }
-//        log.info("本次获取列表url： "+metaSearchList.size()+"个");

+        //获取文章详情
        siteMsgTemple.setDetailExpressionType(siteMsgTemple.getDetailExpressionType()==null?"0":siteMsgTemple.getDetailExpressionType());
        //判断解析详情表达式类型
        if(siteMsgTemple.getDetailExpressionType().equals("3")) {//css表达式
@@ -138,7 +144,7 @@ public class SiteThread implements Runnable{
        clbAnsProcessitem.setSid(docInfo.getSid()+"");
        clbAnsProcessitem.setTitle(docInfo.getTitle());
        clbAnsProcessitem.setContent(docInfo.getContentNoTag());
-        clbAnsProcessitem.setContentWithtag(docInfo.getContentWithTag());
+        clbAnsProcessitem.setContentWithTag(docInfo.getContentWithTag());
        clbAnsProcessitem.setSummary(docInfo.getSummary());
        clbAnsProcessitem.setAuthor(docInfo.getAuthor());
        clbAnsProcessitem.setOrigin(docInfo.getOrigin());

--- a/comm_crawler/src/main/java/com/zzsn/crawler/paser/PaserCommDownload.java
+++ b/comm_crawler/src/main/java/com/zzsn/crawler/paser/PaserCommDownload.java
@@ -356,7 +356,7 @@ public class PaserCommDownload {
        clbAnsProcessitem.setSid(docInfo.getSid()+"");
        clbAnsProcessitem.setTitle(docInfo.getTitle());
        clbAnsProcessitem.setContent(docInfo.getContentNoTag());
-        clbAnsProcessitem.setContentWithtag(docInfo.getContentWithTag());
+        clbAnsProcessitem.setContentWithTag(docInfo.getContentWithTag());
        clbAnsProcessitem.setSummary(docInfo.getSummary());
        clbAnsProcessitem.setAuthor(docInfo.getAuthor());
        clbAnsProcessitem.setOrigin(docInfo.getOrigin());

--- a/comm_crawler/src/main/java/com/zzsn/crawler/paser/WebContentPaserByCss.java
+++ b/comm_crawler/src/main/java/com/zzsn/crawler/paser/WebContentPaserByCss.java
@@ -78,11 +78,9 @@ public class WebContentPaserByCss {

                            TimeUnit.SECONDS.sleep(2);
                        }
-                        if (StringUtils.isEmpty(body)&&siteMsgTemple.getYnDynamicCrawl() == 1) {//当body为空和动态时调用
-                            sentBadSiteMsg(siteMsgTemple,"动态请求异常","0");
-                        }else{
-                            sentBadSiteMsg(siteMsgTemple,"静态网络请求异常","0");
-                        }
+//                        if (StringUtils.isEmpty(body)) {
+//                            sentBadSiteMsg(siteMsgTemple, "请求异常", "1");
+//                        }
                        if(StringUtils.isNotEmpty(body)) {
                            Document doc = Jsoup.parse(body);
                            //抽取资讯url
@@ -94,9 +92,9 @@ public class WebContentPaserByCss {
 //                                catchWebByMetaSearches = parserCrawlerSiteListByCss(siteMsgTemple, doc);
 //                                catchWebByMetaSearchList.addAll(catchWebByMetaSearches);
 //                            }
-                            if (catchWebByMetaSearches.size() < 1 && siteMsgTemple.getYnDynamicCrawl() == 1) {//提取不到信息时再次调用
-                                sentBadSiteMsg(siteMsgTemple, "列表解析配置异常", "1");
-                            }
+//                            if (catchWebByMetaSearches.size() < 1 && siteMsgTemple.getYnDynamicCrawl() == 1) {//提取不到信息时再次调用
+//                                sentBadSiteMsg(siteMsgTemple, "列表解析配置异常", "1");
+//                            }
                        }
                        if(StringUtils.isNotEmpty(siteMsgTemple.getIsScreenshot()) && siteMsgTemple.getIsScreenshot().contains("1")){
                            String imagUrl="";
@@ -315,11 +313,11 @@ public class WebContentPaserByCss {
                        if(StringUtils.isNotEmpty(content)) {
                            docInfo = doPaserByCssTag(content, docInfo, siteMsgTemple);
                        }else {
-                            sentBadSiteMsg(siteMsgTemple,"解析配置异常","1");
+//                            sentBadSiteMsg(siteMsgTemple,"解析配置异常","1");
                            log.info("栏目名称："+siteMsgTemple.getSiteName()+" 链接请求："+cwbm.getSourceaddress()+"  内容为空："+content);
                        }
                    }catch (Exception e){
-                        sentBadSiteMsg(siteMsgTemple,"解析配置异常","1");
+//                        sentBadSiteMsg(siteMsgTemple,"解析配置异常","1");
                        log.info("详情内容解析出现异常："+cwbm.getSourceaddress());
                    }

@@ -329,9 +327,9 @@ public class WebContentPaserByCss {
                        docInfo.setId(count+"");
                        ClbAnsProcessitem processitem =paserSiteDownload.docInfoTrans2Processitem(docInfo);
                        if(siteMsgTemple.getYnDynamicCrawl()==1) {
-                            processitem.setSource("动态爬取");
+                            processitem.setSource("2");
                        }else{
-                            processitem.setSource("静态爬取");
+                            processitem.setSource("1");
                        }
                        String docjson = mapper.writeValueAsString(processitem);
 //                        kafkaTemplate.send(Constants.KAFKA_PRODUCT_TOPIC, "key", docjson);

--- a/comm_crawler/src/main/java/com/zzsn/crawler/paser/WebContentPaserByJsonXpath.java
+++ b/comm_crawler/src/main/java/com/zzsn/crawler/paser/WebContentPaserByJsonXpath.java
@@ -8,10 +8,7 @@ import com.zzsn.crawler.PaserSiteDownload;
 import com.zzsn.crawler.uriparser.HtmlPageParser;
 import com.zzsn.download.PageBuilderParser;
 import com.zzsn.download.PageDownloader;
-import com.zzsn.entity.CatchWebByMetaSearch;
-import com.zzsn.entity.ClbAnsProcessitem;
-import com.zzsn.entity.DocInfo;
-import com.zzsn.entity.SiteMsgTemple;
+import com.zzsn.entity.*;
 import com.zzsn.generation.Constants;
 import com.zzsn.job.JedisUtil;
 import com.zzsn.util.ContentUtility;
@@ -84,10 +81,12 @@ public class WebContentPaserByJsonXpath {
                                }
                            }
                        }
-                        if(StringUtils.isNotEmpty(body)) {
+                        if (StringUtils.isNotEmpty(body)) {
                            //抽取资讯url
                            List<CatchWebByMetaSearch> catchWebByMetaSearches = parserCrawlerSiteListByJsonpath(siteMsgTemple, body);
                            catchWebByMetaSearchList.addAll(catchWebByMetaSearches);
+                        } else {
+//                            sentBadSiteMsg(siteMsgTemple,"网络访问请求异常","1");
                        }
                    } catch (Exception e) {
                        log.info("列表下载异常 对应的链接："+uri_code);
@@ -239,18 +238,18 @@ public class WebContentPaserByJsonXpath {
                    try {
                        ClbAnsProcessitem processitem = docInfoTrans2Processitem(docInfo);
                        if(siteMsgTemple.getYnDynamicCrawl()==1) {
-                            processitem.setSource("动态爬取");
+                            processitem.setSource("2");
                        }else{
-                            processitem.setSource("静态爬取");
+                            processitem.setSource("1");
                        }
                        String docjson = mapper.writeValueAsString(processitem);
 //                        kafkaTemplate.send(Constants.KAFKA_PRODUCT_TOPIC, "key", docjson);
-                        int partition=0;
-                        try {
-                            partition = Integer.parseInt(Constants.KAFKA_PRODUCT_PARTITION);
-                        }catch (Exception e){
-                            log.info("分区配置异常："+Constants.KAFKA_PRODUCT_PARTITION);
-                        }
+//                        int partition=0;
+//                        try {
+//                            partition = Integer.parseInt(Constants.KAFKA_PRODUCT_PARTITION);
+//                        }catch (Exception e){
+//                            log.info("分区配置异常："+Constants.KAFKA_PRODUCT_PARTITION);
+//                        }
                        kafkaTemplate.send(Constants.KAFKA_PRODUCT_TOPIC,  docjson);
                        docInfoList.add(docInfo);
                        log.info("发送到kafka成功。");
@@ -343,7 +342,7 @@ public class WebContentPaserByJsonXpath {
        clbAnsProcessitem.setSid(docInfo.getSid()+"");
        clbAnsProcessitem.setTitle(docInfo.getTitle());
        clbAnsProcessitem.setContent(docInfo.getContentNoTag());
-        clbAnsProcessitem.setContentWithtag(docInfo.getContentWithTag());
+        clbAnsProcessitem.setContentWithTag(docInfo.getContentWithTag());
        clbAnsProcessitem.setSummary(docInfo.getSummary());
        clbAnsProcessitem.setAuthor(docInfo.getAuthor());
        clbAnsProcessitem.setOrigin(docInfo.getOrigin());
@@ -580,4 +579,30 @@ public class WebContentPaserByJsonXpath {
        return encoding;
    }

+    /**
+     *
+     * @param siteMsgTemple
+     * @param msg 异常信息
+     * @param problemType 问题类型（1：信息源异常   2：爬取类别设置异常
+     */
+    public void sentBadSiteMsg(SiteMsgTemple siteMsgTemple,String msg,String problemType){
+        try {
+            BadSiteMsg badSiteMsg = new BadSiteMsg();
+            badSiteMsg.setId(siteMsgTemple.getId());
+            badSiteMsg.setInfoSourceCode(siteMsgTemple.getInfoSourceCode());
+            badSiteMsg.setWebSiteName(siteMsgTemple.getWebSiteName());
+            badSiteMsg.setSiteName(siteMsgTemple.getSiteName());
+            badSiteMsg.setSiteUri(siteMsgTemple.getSiteUri());
+            badSiteMsg.setErrorType(msg);
+            badSiteMsg.setProblemType(problemType);
+            String crawlerType=siteMsgTemple.getYnDynamicCrawl()!=1?"0":siteMsgTemple.getYnDynamicCrawl()+"";
+            badSiteMsg.setCrawlerType(crawlerType);
+            ObjectMapper mapper = new ObjectMapper();
+            String docjson = mapper.writeValueAsString(badSiteMsg);
+            kafkaTemplate.send("badSiteTopic", docjson);
+            log.info("信息源问题："+msg);
+        }catch (Exception e){
+
+        }
+    }
 }
--- a/comm_crawler/src/main/java/com/zzsn/crawler/paser/WebContentPaserByRegular.java
+++ b/comm_crawler/src/main/java/com/zzsn/crawler/paser/WebContentPaserByRegular.java
--- a/comm_crawler/src/main/java/com/zzsn/crawler/paser/WebContentPaserByXpath.java
+++ b/comm_crawler/src/main/java/com/zzsn/crawler/paser/WebContentPaserByXpath.java
@@ -9,10 +9,7 @@ import com.zzsn.crawler.uriparser.SeleniumTime;
 import com.zzsn.crawler.uriparser.WebPageScreenShot;
 import com.zzsn.download.PageBuilderParser;
 import com.zzsn.download.PageDownloader;
-import com.zzsn.entity.CatchWebByMetaSearch;
-import com.zzsn.entity.ClbAnsProcessitem;
-import com.zzsn.entity.DocInfo;
-import com.zzsn.entity.SiteMsgTemple;
+import com.zzsn.entity.*;
 import com.zzsn.generation.Constants;
 import com.zzsn.job.JedisUtil;
 import com.zzsn.util.*;
@@ -105,6 +102,9 @@ public class WebContentPaserByXpath {
                                body = SeleniumTime.getScopehtml(uri_code);
                            }
                        }
+//                        if(StringUtils.isEmpty(body)){
+//                            sentBadSiteMsg(siteMsgTemple,"网络访问请求异常","1");
+//                        }
                        //抽取资讯url
                        List<CatchWebByMetaSearch> catchWebByMetaSearches = parserCrawlerSiteListByXpath(siteMsgTemple, body);
                        catchWebByMetaSearchList.addAll(catchWebByMetaSearches);
@@ -131,6 +131,28 @@ public class WebContentPaserByXpath {
            return catchWebByMetaSearchList;
    }

+
+    public void sentBadSiteMsg(SiteMsgTemple siteMsgTemple,String msg,String problemType){
+        try {
+            BadSiteMsg badSiteMsg = new BadSiteMsg();
+            badSiteMsg.setId(siteMsgTemple.getId());
+            badSiteMsg.setInfoSourceCode(siteMsgTemple.getInfoSourceCode());
+            badSiteMsg.setWebSiteName(siteMsgTemple.getWebSiteName());
+            badSiteMsg.setSiteName(siteMsgTemple.getSiteName());
+            badSiteMsg.setSiteUri(siteMsgTemple.getSiteUri());
+            badSiteMsg.setErrorType(msg);
+            badSiteMsg.setProblemType(problemType);
+            String crawlerType=siteMsgTemple.getYnDynamicCrawl()!=1?"0":siteMsgTemple.getYnDynamicCrawl()+"";
+            badSiteMsg.setCrawlerType(crawlerType);
+            ObjectMapper mapper = new ObjectMapper();
+            String docjson = mapper.writeValueAsString(badSiteMsg);
+            kafkaTemplate.send("badSiteTopic", docjson);
+            log.info("信息源问题："+msg);
+        }catch (Exception e){
+
+        }
+    }
+
    //提取列表信息
    public  List<CatchWebByMetaSearch> parserCrawlerSiteListByXpath(SiteMsgTemple siteMsgTemple,String body)throws Exception {
        List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
@@ -361,9 +383,9 @@ public class WebContentPaserByXpath {
                    try {
                        ClbAnsProcessitem processitem = docInfoTrans2Processitem(docInfo);
                        if(siteMsgTemple.getYnDynamicCrawl()==1) {
-                            processitem.setSource("动态爬取");
+                            processitem.setSource("2");
                        }else{
-                            processitem.setSource("静态爬取");
+                            processitem.setSource("1");
                        }
                        String docjson = mapper.writeValueAsString(processitem);
 //                        kafkaTemplate.send(Constants.KAFKA_PRODUCT_TOPIC, "key", docjson);
@@ -489,7 +511,7 @@ public class WebContentPaserByXpath {
        clbAnsProcessitem.setSid(docInfo.getSid()+"");
        clbAnsProcessitem.setTitle(docInfo.getTitle());
        clbAnsProcessitem.setContent(docInfo.getContentNoTag());
-        clbAnsProcessitem.setContentWithtag(docInfo.getContentWithTag());
+        clbAnsProcessitem.setContentWithTag(docInfo.getContentWithTag());
        clbAnsProcessitem.setSummary(docInfo.getSummary());
        clbAnsProcessitem.setAuthor(docInfo.getAuthor());
        clbAnsProcessitem.setOrigin(docInfo.getOrigin());

--- a/comm_crawler/src/main/java/com/zzsn/crawler/uriparser/SeleniumTime.java
+++ b/comm_crawler/src/main/java/com/zzsn/crawler/uriparser/SeleniumTime.java
@@ -70,42 +70,38 @@ public class SeleniumTime {
 		ChromeDriverService service = new ChromeDriverService.Builder().
 				usingDriverExecutable(new File(Constants.CHROMEDRIVE)).usingAnyFreePort().build();
 		try {
+			System.setProperty("webdriver.chrome.driver", Constants.CHROMEDRIVE);
 			service.start();
 			if (!System.getProperty("os.name").toUpperCase().contains("WINDOWS")) {
 				chromeOptions.addArguments("--disable-gpu", "--window-size=1290,1080");
 				chromeOptions.addArguments("headless");//无界面参数
 				chromeOptions.addArguments("no-sandbox");//禁用沙盒 就是被这个参数搞了一天
 			}
-//			chromeOptions.addArguments("--disable-gpu", "--window-size=1290,1080");
-//			chromeOptions.addArguments("headless");//无界面参数
-//			chromeOptions.addArguments("no-sandbox");//禁用沙盒 就是被这个参数搞了一天
 			driver = new ChromeDriver(chromeOptions);//生成实例
 			try {
-				Duration duration=Duration.of(60, ChronoUnit.SECONDS);
+				Duration duration=Duration.of(100, ChronoUnit.SECONDS);
 				driver.manage().timeouts().pageLoadTimeout(duration);
 				driver.get(url);
-				Thread.sleep(1000l);
+				Thread.sleep(10002);
 				try {
 					WebElement webElement = driver.findElement(By.xpath("/html"));
 					html = webElement.getAttribute("outerHTML");
 					System.out.println("browser will be close");
 				} catch (Exception e) {
 					log.info("chromedriver 出现异常：" + e.getMessage());
+				}finally {
+					driver.quit();
 				}
 			} catch (Exception e) {
 				log.info("chromedriver 出现异常：" + e.getMessage());
 			} finally {
-				try {
 				driver.quit();
 				service.stop();
-					Thread.sleep(3000l);
-				} catch (InterruptedException e) {
-
-				}
 			}
 		} catch (Exception e) {
-
-			return "";
+			log.info("chromedriver 驱动访问出现异常：" + e.getMessage());
+		} finally {
+			service.stop();
 		}
 		return html;
 	}

--- a/comm_crawler/src/main/java/com/zzsn/crawler/uriparser/WebPageScreenShot.java
+++ b/comm_crawler/src/main/java/com/zzsn/crawler/uriparser/WebPageScreenShot.java
@@ -41,8 +41,8 @@ public class WebPageScreenShot {
 //      driver.manage().window().maximize();

        String js1 = "return document.body.clientHeight.toString()";
-        String js1_result = ((JavascriptExecutor) driver).executeScript(js1) + "";
-        int height = Integer.parseInt(js1_result);
+//        String js1_result = ((JavascriptExecutor) driver).executeScript(js1) + "";
+//        int height = Integer.parseInt(js1_result);
        List<String> files = new ArrayList<String>();
        int last_t = 0;
 //        for (int i = 0; i < 20; ) {
@@ -80,7 +80,7 @@ public class WebPageScreenShot {
        CustomScreenshot customScreenshot=new CustomScreenshot();
        files.add(customScreenshot.fullScreenshotLong(driver).getAbsolutePath());
        driver.quit();//退出浏览器
-        boolean flag = merge(files.toArray(new String[]{}), type, resultPath);
+//        boolean flag = merge(files.toArray(new String[]{}), type, resultPath);
 //        if(flag){
 //            InputStream inputStream =new BufferedInputStream(new FileInputStream(resultPath));
 //            HashMap map = ObsUpload.uploadShotInputStream(inputStream, "png");

--- a/comm_crawler/src/main/java/com/zzsn/crawlerOther/ArticleCrawlerThread.java
+++ b/comm_crawler/src/main/java/com/zzsn/crawlerOther/ArticleCrawlerThread.java
@@ -133,7 +133,7 @@ public class ArticleCrawlerThread {
        clbAnsProcessitem.setSid(docInfo.getSid()+"");
        clbAnsProcessitem.setTitle(docInfo.getTitle());
        clbAnsProcessitem.setContent(docInfo.getContentNoTag());
-        clbAnsProcessitem.setContentWithtag(docInfo.getContentWithTag());
+        clbAnsProcessitem.setContentWithTag(docInfo.getContentWithTag());
        clbAnsProcessitem.setSummary(docInfo.getSummary());
        clbAnsProcessitem.setAuthor(docInfo.getAuthor());
        clbAnsProcessitem.setOrigin(docInfo.getOrigin());

--- a/comm_crawler/src/main/java/com/zzsn/crawlerOther/paser/PaserCommDownload.java
+++ b/comm_crawler/src/main/java/com/zzsn/crawlerOther/paser/PaserCommDownload.java
@@ -361,7 +361,7 @@ public class PaserCommDownload {
        clbAnsProcessitem.setSid(docInfo.getSid()+"");
        clbAnsProcessitem.setTitle(docInfo.getTitle());
        clbAnsProcessitem.setContent(docInfo.getContentNoTag());
-        clbAnsProcessitem.setContentWithtag(docInfo.getContentWithTag());
+        clbAnsProcessitem.setContentWithTag(docInfo.getContentWithTag());
        clbAnsProcessitem.setSummary(docInfo.getSummary());
        clbAnsProcessitem.setAuthor(docInfo.getAuthor());
        clbAnsProcessitem.setOrigin(docInfo.getOrigin());

--- a/comm_crawler/src/main/java/com/zzsn/crawlerOther/paser/WebContentPaserByJsonXpath.java
+++ b/comm_crawler/src/main/java/com/zzsn/crawlerOther/paser/WebContentPaserByJsonXpath.java
@@ -237,9 +237,9 @@ public class WebContentPaserByJsonXpath {
                    try {
                        ClbAnsProcessitem processitem = docInfoTrans2Processitem(docInfo);
                        if(siteMsgTemple.getYnDynamicCrawl()==1) {
-                            processitem.setSource("动态爬取");
+                            processitem.setSource("2");
                        }else{
-                            processitem.setSource("静态爬取");
+                            processitem.setSource("1");
                        }
                        String docjson = mapper.writeValueAsString(processitem);
                        kafkaTemplate.send(Constants.KAFKA_PRODUCT_TOPIC, "key", docjson);
@@ -332,7 +332,7 @@ public class WebContentPaserByJsonXpath {
        clbAnsProcessitem.setSid(docInfo.getSid()+"");
        clbAnsProcessitem.setTitle(docInfo.getTitle());
        clbAnsProcessitem.setContent(docInfo.getContentNoTag());
-        clbAnsProcessitem.setContentWithtag(docInfo.getContentWithTag());
+        clbAnsProcessitem.setContentWithTag(docInfo.getContentWithTag());
        clbAnsProcessitem.setSummary(docInfo.getSummary());
        clbAnsProcessitem.setAuthor(docInfo.getAuthor());
        clbAnsProcessitem.setOrigin(docInfo.getOrigin());

--- a/comm_crawler/src/main/java/com/zzsn/crawlerOther/paser/WebContentPaserByRegular.java
+++ b/comm_crawler/src/main/java/com/zzsn/crawlerOther/paser/WebContentPaserByRegular.java
@@ -321,9 +321,9 @@ public class WebContentPaserByRegular {
                    try {
                        ClbAnsProcessitem processitem =paserSiteDownload.docInfoTrans2Processitem(docInfo);
                        if(siteMsgTemple.getYnDynamicCrawl()==1) {
-                            processitem.setSource("动态爬取");
+                            processitem.setSource("2");
                        }else{
-                            processitem.setSource("静态爬取");
+                            processitem.setSource("1");
                        }
                        if(StringUtils.isEmpty(processitem.getTitle())||StringUtils.isEmpty(processitem.getContent())
                                ||StringUtils.isEmpty(processitem.getPublishDate())){

--- a/comm_crawler/src/main/java/com/zzsn/crawlerOther/paser/WebContentPaserByXpath.java
+++ b/comm_crawler/src/main/java/com/zzsn/crawlerOther/paser/WebContentPaserByXpath.java
@@ -364,9 +364,9 @@ public class WebContentPaserByXpath {
                    try {
                        ClbAnsProcessitem processitem = docInfoTrans2Processitem(docInfo);
                        if(siteMsgTemple.getYnDynamicCrawl()==1) {
-                            processitem.setSource("动态爬取");
+                            processitem.setSource("2");
                        }else{
-                            processitem.setSource("静态爬取");
+                            processitem.setSource("1");
                        }
                        String docjson = mapper.writeValueAsString(processitem);
                        kafkaTemplate.send(Constants.KAFKA_PRODUCT_TOPIC, "key", docjson);
@@ -483,7 +483,7 @@ public class WebContentPaserByXpath {
        clbAnsProcessitem.setSid(docInfo.getSid()+"");
        clbAnsProcessitem.setTitle(docInfo.getTitle());
        clbAnsProcessitem.setContent(docInfo.getContentNoTag());
-        clbAnsProcessitem.setContentWithtag(docInfo.getContentWithTag());
+        clbAnsProcessitem.setContentWithTag(docInfo.getContentWithTag());
        clbAnsProcessitem.setSummary(docInfo.getSummary());
        clbAnsProcessitem.setAuthor(docInfo.getAuthor());
        clbAnsProcessitem.setOrigin(docInfo.getOrigin());

--- a/comm_crawler/src/main/java/com/zzsn/download/PageConnectioner.java
+++ b/comm_crawler/src/main/java/com/zzsn/download/PageConnectioner.java
@@ -86,9 +86,8 @@ public class PageConnectioner {
 			//参数类型是json字符串用到
 			connection.setRequestProperty("Content-Type","application/json");
 		} catch (Exception e) {
-
+			//
 		}
-		
 		return connection;
 	}

@@ -157,6 +156,7 @@ public class PageConnectioner {
 		URL url = null;
 		Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(PROXY_ADDR, PROXY_PORT));
 		HttpsURLConnection connection = null;
+		try {
 			trustAllHttpsCertificates();
 			HostnameVerifier hv = new HostnameVerifier() {
 				@Override
@@ -166,8 +166,6 @@ public class PageConnectioner {

 			};
 			HttpsURLConnection.setDefaultHostnameVerifier(hv);
-		try{
-			
 			url = new URL(urlstr);
 			if (false) {
 				connection = (HttpsURLConnection) url.openConnection(proxy);
@@ -180,14 +178,12 @@ public class PageConnectioner {
 			connection.setRequestProperty("connection", "Keep-Alive");
 			connection.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.8");
 			connection.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36");
-
-		}
-		catch(Exception e){
-
+		} catch (Exception e) {
+			//
 		}
-		
 		return connection;
 	}
+
 	/**构造下载使用的{@link HttpsURLConnection}
 	 * @param urlstr 下载url
 	 * @return
@@ -252,9 +248,9 @@ public class PageConnectioner {
 					break;
 				} catch (Exception e1) {
 					try {
-						Thread.sleep(10000);
+						Thread.sleep(2000);
 					} catch (InterruptedException e2) {
-//						logUtil.getLogger().error(String.format("ORMSG: The site server access denied, EXCEPTION: %s",ExceptionUtil.getExceptionStr(e2)));
+						//
 					}
 				}
 			}
@@ -313,10 +309,18 @@ public class PageConnectioner {
 		long startDownTime = System.currentTimeMillis();
 		PageGet pg = null;
 		String docBody = null;
+
+		HttpURLConnection connection = null;
 		try {
-			pg = new PageGet(url, encoding, this.connection(url,headerParams));
-		} catch (Exception e3) {
+			connection = this.connection(url,headerParams);
+			pg = new PageGet(url, encoding, connection);
+		} catch (Exception e1) {
+			assert connection != null;
+			connection.disconnect();
 			return docBody;
+		}finally {
+			assert connection != null;
+			connection.disconnect();
 		}

 		try {
@@ -356,12 +360,18 @@ public class PageConnectioner {
 		long startDownTime = System.currentTimeMillis();
 		PageGet pg = null;
 		String docBody = null;
+		HttpURLConnection connection = null;
 		try {
-			pg = new PageGet(url, encoding, this.connection(url));
+			connection = this.connection(url);
+			pg = new PageGet(url, encoding, connection);
 		} catch (Exception e3) {
+			assert connection != null;
+			connection.disconnect();
 			return docBody;
+		}finally {
+			assert connection != null;
+			connection.disconnect();
 		}
-		
 		try {
 			pg.urlConnectionGet();
 			docBody = pg.getPageStr();
@@ -393,12 +403,18 @@ public class PageConnectioner {
 	 * @return
 	 */
 	protected String staticHttpsConnectByGet(String url, String encoding, boolean bFrame) {
-		long exitTimeDis = 3000;
+		long exitTimeDis = 10000;
 		long startDownTime = System.currentTimeMillis();
 		PageGet pg = null;
+		HttpsURLConnection connection = null;
 		try {
-			pg = new PageGet(url, encoding, this.httpsconnection(url));
+			connection = this.httpsconnection(url);
+			pg = new PageGet(url, encoding, connection);
 		} catch (Exception e3) {
+			//
+		} finally {
+			assert connection != null;
+			connection.disconnect();
 		}
 		String docBody = null;
 		try {
@@ -542,15 +558,23 @@ public class PageConnectioner {
 		long startDownTime = System.currentTimeMillis();
 		PagePost pp = null;
 		String docBody = null;
+		HttpURLConnection connection = null;
 		try {
 			if (postParam != null && postParam.contains("[Content-type]")) { // 仅用于  鹏云课堂
 				String param = postParam.replace("[Content-type]", "");
-				pp = new PagePost(url, encoding, this.connection(url,param),param);
+				connection = this.connection(url,param);
+				pp = new PagePost(url, encoding, connection,param);
 			}else{
-				pp = new PagePost(url, encoding, this.connection(url), postParam);
+				connection = this.connection(url);
+				pp = new PagePost(url, encoding, connection, postParam);
 			}
 		} catch (Exception e3) {
+			assert connection != null;
+			connection.disconnect();
 			return docBody;
+		}finally {
+			assert connection != null;
+			connection.disconnect();
 		}

 		try {
@@ -589,15 +613,23 @@ public class PageConnectioner {
 		long startDownTime = System.currentTimeMillis();
 		PagePost pp = null;
 		String docBody = null;
+		HttpURLConnection connection = null;
 		try {
 			if (postParam!= null && postParam.contains("{")&& postParam.contains(":")) { // 仅用于  鹏云课堂
 				String param = postParam.replace("[Content-type]", "");
-				pp = new PagePost(url, encoding, this.connection(url,param),param);
+				connection = this.connection(url,param);
+				pp = new PagePost(url, encoding, connection,param);
 			}else{
-				pp = new PagePost(url, encoding, this.connection(url), postParam);
+				connection = this.connection(url);
+				pp = new PagePost(url, encoding, connection, postParam);
 			}
 		} catch (Exception e3) {
+			assert connection != null;
+			connection.disconnect();
 			return docBody;
+		}finally {
+			assert connection != null;
+			connection.disconnect();
 		}

 		try {
@@ -634,13 +666,18 @@ public class PageConnectioner {
 		long exitTimeDis = 30000;

 		long startDownTime = System.currentTimeMillis();
+		HttpsURLConnection connection = null;
 		PagePost pp = null;
 		try {
-			pp = new PagePost(url, encoding, this.httpsconnection(url),param);
+			connection = this.httpsconnection(url);
+			pp = new PagePost(url, encoding, connection, param);
 		} catch (Exception e3) {
-			// TODO Auto-generated catch block
-			e3.printStackTrace();
+			//
+		} finally {
+			assert connection != null;
+			connection.disconnect();
 		}
+
 		String docBody = null;
 		try {
 			pp.urlHttpsConnectionPost();
@@ -693,7 +730,7 @@ public class PageConnectioner {
 		String pageStr="";
 		try {
 			HtmlPage htmlPage = webClient.getPage(urlstr);
-			webClient.waitForBackgroundJavaScript(600000);
+			webClient.waitForBackgroundJavaScript(300000);
 			pageStr = htmlPage.asXml();
 		}catch (Exception e){

@@ -740,7 +777,6 @@ public class PageConnectioner {
 //				JavaScriptPage scriptPage = (JavaScriptPage) page;
 //				pageStr = scriptPage.getContent();
 //			}
-
 		} catch (Exception e) {
 		}finally {
 			webClient.close();

--- a/comm_crawler/src/main/java/com/zzsn/download/PageDownloader.java
+++ b/comm_crawler/src/main/java/com/zzsn/download/PageDownloader.java
@@ -49,6 +49,8 @@ public class PageDownloader {

 	// 如果页面编码格式未知，则从页面中获取该页面编码格式
 	public String getEncodingFromHtmlFile(String urlstr, HttpURLConnection connection) throws IOException {
+		String encoding = null;
+		try {
 			connection.setRequestMethod("GET");
 			connection.setRequestProperty("User-Agent", "Mozilla/5.0 " + "(Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.14) "
 					+ "Gecko/20080404 Firefox/2.0.0.14");
@@ -56,7 +58,6 @@ public class PageDownloader {
 			connection.setRequestProperty("Cookie", "auth=token");

 			String contentType = connection.getHeaderField("Content-Type");
-		String encoding = null;
 			if (contentType != null) {
 				String temp = "charset=";
 				int m = contentType.indexOf(temp);
@@ -65,17 +66,23 @@ public class PageDownloader {
 				}
 			}
 			if (encoding == null) {
-			try {
 				InputStream is = null;
+				try {
 					is = connection.getInputStream();
 					BufferedInputStream bufferedInputStream = new BufferedInputStream(is);
 					encoding = EncodeDetector.getEncoding(bufferedInputStream);
-				is.close();
 				} catch (Exception e) {
-
+					//
+				}finally {
+					assert is != null;
+					is.close();
 				}
 			}
+		} catch (Exception e) {
+			//
+		} finally {
 			connection.disconnect();
+		}
 		return encoding;
 	}

@@ -159,25 +166,19 @@ public class PageDownloader {
 		if (interval > 0 && lastDownloadTime > 0 && dis < interval){
 			new PageDownloader(dis+2000);
 		}
-		long startDtime = System.currentTimeMillis();
 		PageConnectioner pConn = new PageConnectioner();
-		HttpURLConnection connection = null;
 		try {
-			connection = pConn.connection(url);
 			if (encoding == null || encoding.isEmpty()) {//获取网站编码
-//				encoding = getEncodingFromHtmlFile(url, connection);
 				PaserSiteDownload paserSiteDownload=new PaserSiteDownload();
 				encoding = paserSiteDownload.locateCharSet(url);
 			}
 		} catch (Exception e1) {
-		//	e1.printStackTrace();
 			log.info("获取编码失败");
 		}
 		String docBody = null;
 		if (bDynamic) {
 			docBody = pConn.dynamicConnectByGet(url, encoding);
 		} else {
-//			this.bDownloadUseFrame=true;
 			if (bFrame && this.bDownloadUseFrame) {
 				String body = null;
 				try {
@@ -196,12 +197,11 @@ public class PageDownloader {
 			}
 			if(url.contains("https:")){
 				try {
-					connection = pConn.httpsconnection(url);
 					if (encoding == null || encoding.isEmpty()) {
 						encoding = "utf-8";
 					}
 				} catch (Exception e1) {
-				//	e1.printStackTrace();
+					//
 				}
 				docBody = pConn.staticHttpsConnectByGet(url, encoding,false);
 			}else{
@@ -237,6 +237,9 @@ public class PageDownloader {
 			}
 		} catch (Exception e1) {
 		//	e1.printStackTrace();
+		}finally {
+			assert connection != null;
+			connection.disconnect();
 		}
 		String docBody = null;
 		if (bDynamic) {
@@ -264,7 +267,7 @@ public class PageDownloader {
 			}
 			if(url.contains("https:")){
 				try {
-					connection = pConn.httpsconnection(url);
+//					connection = pConn.httpsconnection(url);
 					if (encoding == null || encoding.isEmpty()) {
 						encoding = "utf-8";
 					}
@@ -368,6 +371,9 @@ public class PageDownloader {
 			}
 		} catch (Exception e1) {
 		//	e1.printStackTrace();
+		}finally {
+			assert connection != null;
+			connection.disconnect();
 		}
 		String docBody = null;
 		if (bDynamic) {
@@ -493,7 +499,6 @@ public class PageDownloader {
 				return true;
 			}
 		} catch (Exception e) {
-			// TODO Auto-generated catch block
 			return true;
 		}
 		return false;

--- a/comm_crawler/src/main/java/com/zzsn/entity/ClbAnsProcessitem.java
+++ b/comm_crawler/src/main/java/com/zzsn/entity/ClbAnsProcessitem.java
@@ -21,7 +21,7 @@ public class ClbAnsProcessitem {
    /**正文*/
    private String content;

-    private String contentWithtag;
+    private String contentWithTag;


    /**未知*/

--- a/comm_crawler/src/main/java/com/zzsn/job/KafkaConsumerJob.java
+++ b/comm_crawler/src/main/java/com/zzsn/job/KafkaConsumerJob.java
@@ -50,7 +50,7 @@ public class KafkaConsumerJob {
 //         latest earliest
        //时间间隔设置为1h
 //        properties.put("max.poll.interval.ms", 60*60*1000);
-        properties.put(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, 60*60*1000);
+        properties.put(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, 2*60*60*1000);
        properties.put(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG,25000);
        properties.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG,30000);
        properties.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, 1);
@@ -62,11 +62,11 @@ public class KafkaConsumerJob {
 //            , Constants.THREAD_SIZE , 0, TimeUnit.SECONDS, new ArrayBlockingQueue<>(1));


-    @Scheduled(cron = "0 0/5 * * * ?")
-    @Async("asyncTaskExecutor")
+    @Scheduled(cron = "0 0/2 * * * ?")
+//    @Async("asyncTaskExecutor")
    public void consumer (){
-        ExecutorService threadPool = Executors.newFixedThreadPool(Constants.THREAD_SIZE);
-        log.info("进入定时获取mq消息");
+//        ExecutorService threadPool = Executors.newFixedThreadPool(Constants.THREAD_SIZE);
+        log.info("进入定时获取topic消息");
        //1.创建消费者
        KafkaConsumer<String, String> consumer = createConsumer();
        // 消费某个主题的某个分区数据
@@ -83,7 +83,6 @@ public class KafkaConsumerJob {
                //在0ms内等待Kafka的broker返回数据.超时参数指定poll在多久之后可以返回，不管有没有可用的数据都要返回
                ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(3000));
                //手动提交已消费数据的offset
-//                consumer.commitAsync();
                consumer.commitSync();
                if (records != null && records.count() > 0) {
                    for (ConsumerRecord record : records) {
@@ -98,13 +97,19 @@ public class KafkaConsumerJob {
                        }
                    }
                }
-
            }
        }catch (Exception e){
-//            consumer.commitSync();
-            log.info(e.getMessage());
-//            consumer = createConsumer();
-//            consumer.subscribe(Arrays.asList(Constants.KAFKA_CONSUMER_TOPIC));
+            //退出应用程序前使用close方法关闭消费者，网络连接和socket也会随之关闭，并立即触发一次再均衡
+            consumer.close();
+            System.out.println("error!!!!!!!!!!!");
+            consumer = createConsumer();
+            // 消费某个主题的某个分区数据
+            kafkaConsumerPartition = Constants.KAFKA_CONSUMER_PARTITION;
+            String[] partitions1 = kafkaConsumerPartition.split(",");
+            for (int i = 0; i < partitions1.length; i++) {
+                topicPartitions.add(new TopicPartition(Constants.KAFKA_CONSUMER_TOPIC, Integer.parseInt(partitions1[i])));
+            }
+            consumer.assign(topicPartitions);
        }

    }

--- a/comm_crawler/src/main/resources/constants.properties
+++ b/comm_crawler/src/main/resources/constants.properties
@@ -35,8 +35,8 @@ PROXYID=1
 #线程池大小
 THREAD_SIZE=1
 #
-CHROMEDRIVE= E:\\chrome\\chromedriver.exe
-CHROMEBIN= C:\\Users\\WIN10\\AppData\\Local\\Google\\Chrome\\Application\\chrome.exe
+CHROMEDRIVE= D:\\chrome\\chromedriver.exe
+CHROMEBIN= C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe
 USER_DATA_DIR= C:\\Users\\WIN10\\AppData\\Local\\Google\\Chrome\\User Data\\Default

 #mysql connection
@@ -52,7 +52,7 @@ KAFKA_CONSUMER_SERVERS=114.115.159.144:9092
 #KAFKA_CONSUMER_TOPIC = staticCrawlTopic
 KAFKA_CONSUMER_TOPIC =clb-infosource-handler-dynamin
 #
-KAFKA_CONSUMER_GROUP_ID=dynamin-sync
+KAFKA_CONSUMER_GROUP_ID=test-zs1
 #KAFKA_CONSUMER_AUTO_OFFSET_RESET=latest
 KAFKA_CONSUMER_AUTO_OFFSET_RESET=earliest
 KAFKA_PRODUCT_TOPIC=crawlerInfo
@@ -62,16 +62,16 @@ KAFKA_COLLECT_TOPIC=collectionAndDispatcherInfo
 META_SEARCH_URL=https://www.google.com/search?hl=en&lr=lang_en&tbm=nws&sa=X&q=
 #META_SEARCH_URL=https://www.baidu.com/s?rtt=1&bsst=1&cl=2&tn=news&ie=utf-8&word=
 #指定分区使用逗号分割
-KAFKA_CONSUMER_PARTITION=0
+KAFKA_CONSUMER_PARTITION=0,1,2,3
 #KAFKA_CONSUMER_PARTITION=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 KAFKA_PRODUCT_PARTITION=0



 # Redis settings
-redis.host=127.0.0.1
+redis.host=114.116.26.150
 redis.port=6379
-redis.pass=xxxxxx
+redis.pass=zzsn9988
 #redis.host=8.130.30.33
 #redis.port=9010
 #redis.pass=wxadS&jklim

--- a/sina_search/src/main/java/com/zzsn/conf/ThreadExecutorConfig.java
+++ b/sina_search/src/main/java/com/zzsn/conf/ThreadExecutorConfig.java
@@ -17,8 +17,8 @@ public class ThreadExecutorConfig {
    @Bean(value = "asyncTaskExecutor")
    public Executor executor() {
        ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
-        executor.setCorePoolSize(1);//线程池维护线程的最少数量
-        executor.setMaxPoolSize(1);//线程池维护线程的最大数量
+        executor.setCorePoolSize(2);//线程池维护线程的最少数量
+        executor.setMaxPoolSize(5);//线程池维护线程的最大数量
        executor.setQueueCapacity(5000);//缓存队列
        executor.setThreadNamePrefix("ssmsExecutor-");
        /**