Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
M
meta_crawler
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
刘伟刚
meta_crawler
Commits
3582bfa7
提交
3582bfa7
authored
7月 27, 2022
作者:
liuweigang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
通用采集代码更新3
上级
17d44434
显示空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
116 行增加
和
12 行删除
+116
-12
PageConnectioner.java
...ler/src/main/java/com/zzsn/download/PageConnectioner.java
+116
-12
没有找到文件。
comm_crawler/src/main/java/com/zzsn/download/PageConnectioner.java
浏览文件 @
3582bfa7
...
...
@@ -3,20 +3,30 @@ package com.zzsn.download;
import
com.gargoylesoftware.htmlunit.*
;
import
com.gargoylesoftware.htmlunit.html.HtmlPage
;
import
com.zzsn.crawler.oracledb.OracleDBManager
;
import
com.zzsn.crawler.oracledb.OracleDataTable
;
import
com.zzsn.generation.Constants
;
import
org.apache.http.HttpHost
;
import
org.apache.http.auth.AuthScope
;
import
org.apache.http.auth.UsernamePasswordCredentials
;
import
org.apache.http.client.HttpClient
;
import
org.apache.http.conn.params.ConnRouteParams
;
import
org.apache.http.impl.client.DefaultHttpClient
;
import
javax.net.ssl.HostnameVerifier
;
import
javax.net.ssl.HttpsURLConnection
;
import
javax.net.ssl.SSLSession
;
import
java.io.IOException
;
import
java.net.*
;
import
java.sql.SQLException
;
public
class
PageConnectioner
{
/**默认代理地址*/
public
static
String
PROXY_ADDR
=
"proxy.zj.chinamobile.com"
;
//
private static final String PROXY_ADDR = "114.249.113.226";
//
public static String PROXY_ADDR = "proxy.zj.chinamobile.com";
private
static
final
String
PROXY_ADDR
=
"114.249.113.226"
;
/**默认代理接口*/
public
static
int
PROXY_PORT
=
8080
;
//
private static final int PROXY_PORT = 9000;
//
public static int PROXY_PORT = 8080;
private
static
final
int
PROXY_PORT
=
9000
;
/**下载失败后的暂停时间*/
private
static
final
long
SLEEP_TIME
=
5000
;
...
...
@@ -28,11 +38,23 @@ public class PageConnectioner {
HttpURLConnection
connection
=
null
;
try
{
url
=
new
URL
(
urlstr
);
// if (false) {
// Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(PROXY_ADDR, PROXY_PORT));
// connection = (HttpURLConnection) url.openConnection(proxy);
// }
if
(
Constants
.
PROXYID
==
1
)
{
String
proxyIP
=
getProxyIP
();
String
[]
proxys
=
proxyIP
.
split
(
"-"
);
String
proxyHost
=
proxys
[
0
];
int
proxyPort
=
Integer
.
parseInt
(
proxys
[
1
]);
String
userName
=
proxys
[
2
];
String
password
=
proxys
[
3
];
//创建代理服务器
Proxy
proxy
=
new
Proxy
(
Proxy
.
Type
.
HTTP
,
new
InetSocketAddress
(
proxyHost
,
proxyPort
));
//设置代理的用户名密码
Authenticator
.
setDefault
(
new
MyAuth
(
userName
,
password
));
// 设定连接的相关参数
connection
=
(
HttpURLConnection
)
url
.
openConnection
(
proxy
);
}
else
{
connection
=
(
HttpURLConnection
)
url
.
openConnection
();
}
connection
.
setConnectTimeout
(
5000
);
connection
.
setReadTimeout
(
5000
);
connection
.
setRequestProperty
(
"accept"
,
"*/*"
);
...
...
@@ -47,9 +69,75 @@ public class PageConnectioner {
}
catch
(
Exception
e
)
{
}
return
connection
;
}
static
class
MyAuth
extends
Authenticator
{
private
String
user
;
private
String
pass
;
public
MyAuth
(
String
user
,
String
pass
)
{
this
.
user
=
user
;
this
.
pass
=
pass
;
}
@Override
protected
PasswordAuthentication
getPasswordAuthentication
()
{
return
new
PasswordAuthentication
(
user
,
pass
.
toCharArray
());
}
}
public
static
String
getProxyIP
(){
String
searchSql
=
"select proxy from CIS_sys_Proxy where ID = 1"
;
String
proxy
=
""
;
OracleDBManager
dm
=
new
OracleDBManager
();
String
[]
coulmn
=
null
;
int
[]
type
=
null
;
try
{
OracleDataTable
dt
=
dm
.
getResultData
(
coulmn
,
type
,
searchSql
);
if
(
dt
!=
null
&&
dt
.
getRowCount
()>
0
){
for
(
int
i
=
0
;
i
<
dt
.
getRowCount
();
i
++){
for
(
int
j
=
0
;
j
<
dt
.
getColCoun
();
j
++)
if
(
dt
.
getRow
()[
i
][
j
].
length
()>
5
){
proxy
=
dt
.
getRow
()[
i
][
j
];
}
}
}
else
System
.
out
.
println
(
"查询失败"
);
}
catch
(
SQLException
e
)
{
e
.
printStackTrace
();
}
return
proxy
;
}
public
static
HttpClient
getHttpClient
()
{
String
proxyIP
=
getProxyIP
();
String
[]
proxys
=
proxyIP
.
split
(
"-"
);
DefaultHttpClient
httpClient
=
new
DefaultHttpClient
();
String
proxyHost
=
proxys
[
0
];
int
proxyPort
=
Integer
.
parseInt
(
proxys
[
1
]);
String
userName
=
proxys
[
2
];
String
password
=
proxys
[
3
];
httpClient
.
getCredentialsProvider
().
setCredentials
(
new
AuthScope
(
proxyHost
,
proxyPort
),
new
UsernamePasswordCredentials
(
userName
,
password
));
HttpHost
proxy
=
new
HttpHost
(
proxyHost
,
proxyPort
);
httpClient
.
getParams
().
setParameter
(
ConnRouteParams
.
DEFAULT_PROXY
,
proxy
);
return
httpClient
;
}
public
static
HttpClient
getNoProxyHttpClient
()
{
String
[]
proxys
=
getProxyIP
().
split
(
"-"
);
DefaultHttpClient
httpClient
=
new
DefaultHttpClient
();
String
proxyHost
=
proxys
[
0
];
int
proxyPort
=
Integer
.
parseInt
(
proxys
[
1
]);
String
userName
=
proxys
[
2
];
String
password
=
proxys
[
3
];
httpClient
.
getCredentialsProvider
().
setCredentials
(
new
AuthScope
(
proxyHost
,
proxyPort
),
new
UsernamePasswordCredentials
(
userName
,
password
));
HttpHost
proxy
=
new
HttpHost
(
proxyHost
,
proxyPort
);
httpClient
.
getParams
().
setParameter
(
ConnRouteParams
.
DEFAULT_PROXY
,
proxy
);
return
httpClient
;
}
/**构造下载使用的{@link HttpURLConnection}
* @param urlstr 下载url (当参数类型是json字符串时调用)
...
...
@@ -154,7 +242,7 @@ public class PageConnectioner {
*/
protected
HttpsURLConnection
httpsconnection
(
String
urlstr
)
throws
Exception
{
URL
url
=
null
;
Proxy
proxy
=
new
Proxy
(
Proxy
.
Type
.
HTTP
,
new
InetSocketAddress
(
PROXY_ADDR
,
PROXY_PORT
));
//
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(PROXY_ADDR, PROXY_PORT));
HttpsURLConnection
connection
=
null
;
try
{
trustAllHttpsCertificates
();
...
...
@@ -167,11 +255,27 @@ public class PageConnectioner {
};
HttpsURLConnection
.
setDefaultHostnameVerifier
(
hv
);
url
=
new
URL
(
urlstr
);
if
(
false
)
{
if
(
Constants
.
PROXYID
==
1
)
{
String
proxyIP
=
getProxyIP
();
String
[]
proxys
=
proxyIP
.
split
(
"-"
);
String
proxyHost
=
proxys
[
0
];
int
proxyPort
=
Integer
.
parseInt
(
proxys
[
1
]);
String
userName
=
proxys
[
2
];
String
password
=
proxys
[
3
];
//创建代理服务器
Proxy
proxy
=
new
Proxy
(
Proxy
.
Type
.
HTTP
,
new
InetSocketAddress
(
proxyHost
,
proxyPort
));
//设置代理的用户名密码
Authenticator
.
setDefault
(
new
MyAuth
(
userName
,
password
));
// 设定连接的相关参数
connection
=
(
HttpsURLConnection
)
url
.
openConnection
(
proxy
);
}
else
{
}
else
{
connection
=
(
HttpsURLConnection
)
url
.
openConnection
();
}
// if (false) {
// connection = (HttpsURLConnection) url.openConnection(proxy);
// } else {
// connection = (HttpsURLConnection) url.openConnection();
// }
connection
.
setConnectTimeout
(
5000
);
connection
.
setReadTimeout
(
5000
);
connection
.
setRequestProperty
(
"accept"
,
"*/*"
);
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论