提交 3582bfa7 作者: liuweigang

通用采集代码更新3

上级 17d44434
......@@ -3,20 +3,30 @@ package com.zzsn.download;
import com.gargoylesoftware.htmlunit.*;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.zzsn.crawler.oracledb.OracleDBManager;
import com.zzsn.crawler.oracledb.OracleDataTable;
import com.zzsn.generation.Constants;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.HttpClient;
import org.apache.http.conn.params.ConnRouteParams;
import org.apache.http.impl.client.DefaultHttpClient;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLSession;
import java.io.IOException;
import java.net.*;
import java.sql.SQLException;
public class PageConnectioner {
/**默认代理地址*/
public static String PROXY_ADDR = "proxy.zj.chinamobile.com";
// private static final String PROXY_ADDR = "114.249.113.226";
// public static String PROXY_ADDR = "proxy.zj.chinamobile.com";
private static final String PROXY_ADDR = "114.249.113.226";
/**默认代理接口*/
public static int PROXY_PORT = 8080;
// private static final int PROXY_PORT = 9000;
// public static int PROXY_PORT = 8080;
private static final int PROXY_PORT = 9000;
/**下载失败后的暂停时间*/
private static final long SLEEP_TIME = 5000;
......@@ -28,11 +38,23 @@ public class PageConnectioner {
HttpURLConnection connection = null;
try {
url = new URL(urlstr);
// if (false) {
// Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(PROXY_ADDR, PROXY_PORT));
// connection = (HttpURLConnection) url.openConnection(proxy);
// }
connection = (HttpURLConnection) url.openConnection();
if (Constants.PROXYID==1) {
String proxyIP = getProxyIP();
String[] proxys=proxyIP.split("-");
String proxyHost = proxys[0];
int proxyPort = Integer.parseInt(proxys[1]);
String userName = proxys[2];
String password = proxys[3];
//创建代理服务器
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxyHost, proxyPort));
//设置代理的用户名密码
Authenticator.setDefault(new MyAuth(userName, password));
// 设定连接的相关参数
connection = (HttpURLConnection) url.openConnection(proxy);
}else {
connection = (HttpURLConnection) url.openConnection();
}
connection.setConnectTimeout(5000);
connection.setReadTimeout(5000);
connection.setRequestProperty("accept", "*/*");
......@@ -47,9 +69,75 @@ public class PageConnectioner {
} catch (Exception e) {
}
return connection;
}
static class MyAuth extends Authenticator
{
private String user;
private String pass;
public MyAuth(String user, String pass)
{
this.user = user;
this.pass = pass;
}
@Override
protected PasswordAuthentication getPasswordAuthentication() {
return new PasswordAuthentication(user, pass.toCharArray());
}
}
public static String getProxyIP(){
String searchSql = "select proxy from CIS_sys_Proxy where ID = 1";
String proxy="";
OracleDBManager dm = new OracleDBManager();
String[] coulmn = null;
int[] type = null;
try {
OracleDataTable dt = dm.getResultData(coulmn, type, searchSql);
if(dt != null && dt.getRowCount()> 0){
for(int i = 0; i<dt.getRowCount(); i++){
for(int j = 0; j<dt.getColCoun(); j++)
if(dt.getRow()[i][j].length()>5){
proxy=dt.getRow()[i][j];
}
}
}else
System.out.println("查询失败");
} catch (SQLException e) {
e.printStackTrace();
}
return proxy;
}
public static HttpClient getHttpClient() {
String proxyIP = getProxyIP();
String[] proxys=proxyIP.split("-");
DefaultHttpClient httpClient = new DefaultHttpClient();
String proxyHost = proxys[0];
int proxyPort = Integer.parseInt(proxys[1]);
String userName = proxys[2];
String password = proxys[3];
httpClient.getCredentialsProvider().setCredentials(
new AuthScope(proxyHost, proxyPort),
new UsernamePasswordCredentials(userName, password));
HttpHost proxy = new HttpHost(proxyHost,proxyPort);
httpClient.getParams().setParameter(ConnRouteParams.DEFAULT_PROXY, proxy);
return httpClient;
}
public static HttpClient getNoProxyHttpClient() {
String[] proxys=getProxyIP().split("-");
DefaultHttpClient httpClient = new DefaultHttpClient();
String proxyHost = proxys[0];
int proxyPort = Integer.parseInt(proxys[1]);
String userName = proxys[2];
String password = proxys[3];
httpClient.getCredentialsProvider().setCredentials(
new AuthScope(proxyHost, proxyPort),
new UsernamePasswordCredentials(userName, password));
HttpHost proxy = new HttpHost(proxyHost,proxyPort);
httpClient.getParams().setParameter(ConnRouteParams.DEFAULT_PROXY, proxy);
return httpClient;
}
/**构造下载使用的{@link HttpURLConnection}
* @param urlstr 下载url (当参数类型是json字符串时调用)
......@@ -154,7 +242,7 @@ public class PageConnectioner {
*/
protected HttpsURLConnection httpsconnection(String urlstr) throws Exception {
URL url = null;
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(PROXY_ADDR, PROXY_PORT));
// Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(PROXY_ADDR, PROXY_PORT));
HttpsURLConnection connection = null;
try {
trustAllHttpsCertificates();
......@@ -167,11 +255,27 @@ public class PageConnectioner {
};
HttpsURLConnection.setDefaultHostnameVerifier(hv);
url = new URL(urlstr);
if (false) {
if (Constants.PROXYID==1) {
String proxyIP = getProxyIP();
String[] proxys=proxyIP.split("-");
String proxyHost = proxys[0];
int proxyPort = Integer.parseInt(proxys[1]);
String userName = proxys[2];
String password = proxys[3];
//创建代理服务器
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxyHost, proxyPort));
//设置代理的用户名密码
Authenticator.setDefault(new MyAuth(userName, password));
// 设定连接的相关参数
connection = (HttpsURLConnection) url.openConnection(proxy);
} else {
}else {
connection = (HttpsURLConnection) url.openConnection();
}
// if (false) {
// connection = (HttpsURLConnection) url.openConnection(proxy);
// } else {
// connection = (HttpsURLConnection) url.openConnection();
// }
connection.setConnectTimeout(5000);
connection.setReadTimeout(5000);
connection.setRequestProperty("accept", "*/*");
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论