Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
M
meta_crawler
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
刘伟刚
meta_crawler
Commits
e7ad390c
提交
e7ad390c
authored
7月 25, 2022
作者:
张文库
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
谷歌浏览器驱动复用
上级
a1b331fb
显示空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
126 行增加
和
86 行删除
+126
-86
ChromeExecutor.java
...rawler/src/main/java/com/zzsn/crawler/ChromeExecutor.java
+14
-12
ReuseWebDriver.java
...rawler/src/main/java/com/zzsn/crawler/ReuseWebDriver.java
+42
-47
ChromeTest.java
comm_crawler/src/main/java/com/zzsn/test/ChromeTest.java
+24
-25
DriverUtil.java
comm_crawler/src/main/java/com/zzsn/util/DriverUtil.java
+46
-2
没有找到文件。
comm_crawler/src/main/java/com/zzsn/crawler/ChromeExecutor.java
浏览文件 @
e7ad390c
package
com
.
zzsn
.
crawler
;
import
org.openqa.selenium.NoSuchSessionException
;
import
org.openqa.selenium.SessionNotCreatedException
;
import
lombok.extern.slf4j.Slf4j
;
import
org.openqa.selenium.UnsupportedCommandException
;
import
org.openqa.selenium.WebDriverException
;
import
org.openqa.selenium.remote.*
;
import
org.openqa.selenium.remote.codec.w3c.W3CHttpCommandCodec
;
import
org.openqa.selenium.remote.codec.w3c.W3CHttpResponseCodec
;
import
org.openqa.selenium.remote.http.*
;
import
org.openqa.selenium.remote.http.HttpClient
;
import
org.openqa.selenium.remote.http.HttpRequest
;
import
org.openqa.selenium.remote.http.HttpResponse
;
import
java.io.IOException
;
import
java.net.URL
;
...
...
@@ -19,6 +19,7 @@ import static org.openqa.selenium.remote.DriverCommand.*;
* @author kethy
* @date 2022-07-23 18:33:50
*/
@Slf4j
public
class
ChromeExecutor
extends
HttpCommandExecutor
{
private
CommandCodec
<
HttpRequest
>
commandCodec
;
private
ResponseCodec
<
HttpResponse
>
responseCodec
;
...
...
@@ -39,17 +40,19 @@ public class ChromeExecutor extends HttpCommandExecutor {
public
Response
execute
(
Command
command
)
throws
IOException
{
if
(
command
.
getSessionId
()
==
null
)
{
if
(
QUIT
.
equals
(
command
.
getName
()))
{
return
n
ew
Response
()
;
return
n
ull
;
}
if
(!
GET_ALL_SESSIONS
.
equals
(
command
.
getName
())
&&
!
NEW_SESSION
.
equals
(
command
.
getName
()))
{
throw
new
NoSuchSessionException
(
"会话ID为空,请调用quit()退出后再使用驱动"
);
log
.
error
(
"会话ID为空,请调用退出后再使用驱动..."
);
return
null
;
}
}
if
(
NEW_SESSION
.
equals
(
command
.
getName
()))
{
if
(
commandCodec
!=
null
)
{
throw
new
SessionNotCreatedException
(
"Session 已存在.."
);
log
.
error
(
"Session 未创建..."
);
return
null
;
}
ProtocolHandshake
handshake
=
new
ProtocolHandshake
();
...
...
@@ -62,7 +65,8 @@ public class ChromeExecutor extends HttpCommandExecutor {
}
if
(
commandCodec
==
null
||
responseCodec
==
null
)
{
throw
new
WebDriverException
(
"未定义命令或解码器。无法继续.."
);
log
.
error
(
"未定义命令或解码器。无法继续..."
);
return
null
;
}
HttpRequest
httpRequest
=
commandCodec
.
encode
(
command
);
...
...
@@ -81,10 +85,9 @@ public class ChromeExecutor extends HttpCommandExecutor {
return
response
;
}
catch
(
UnsupportedCommandException
e
)
{
if
(
e
.
getMessage
()
==
null
||
""
.
equals
(
e
.
getMessage
()))
{
throw
new
UnsupportedOperationException
(
"未接收到服务器信息。命令:"
+
command
.
getName
(),
e
.
getCaus
e
());
log
.
error
(
"未接收到服务器返回信息。命令:"
+
command
.
getNam
e
());
}
throw
e
;
return
null
;
}
}
}
\ No newline at end of file
comm_crawler/src/main/java/com/zzsn/crawler/ReuseWebDriver.java
浏览文件 @
e7ad390c
package
com
.
zzsn
.
crawler
;
import
com.google.common.collect.ImmutableMap
;
import
lombok.extern.slf4j.Slf4j
;
import
org.openqa.selenium.Capabilities
;
import
org.openqa.selenium.MutableCapabilities
;
import
org.openqa.selenium.WebDriverException
;
import
org.openqa.selenium.net.UrlChecker
;
import
org.openqa.selenium.remote.Command
;
import
org.openqa.selenium.remote.DriverCommand
;
import
org.openqa.selenium.remote.RemoteWebDriver
;
import
org.openqa.selenium.remote.Response
;
import
org.openqa.selenium.remote.internal.WebElementToJsonConverter
;
import
java.io.IOException
;
...
...
@@ -19,44 +20,30 @@ import java.util.concurrent.TimeUnit;
import
java.util.stream.Collectors
;
import
java.util.stream.Stream
;
import
static
org
.
openqa
.
selenium
.
remote
.
CapabilityType
.
SUPPORTS_JAVASCRIPT
;
/***
* 驱动复用
* @date 2022/7/23 18:
18
* @date 2022/7/23 18:
01
* @author andylau
*/
@Slf4j
public
class
ReuseWebDriver
extends
RemoteWebDriver
{
private
Capabilities
capabilities
;
private
String
serverUrl
;
public
ReuseWebDriver
(
String
serverUrl
,
String
sessionId
)
throws
IOException
{
super
(
new
ChromeExecutor
(
new
URL
(
serverUrl
)),
null
);
this
(
serverUrl
);
super
.
setSessionId
(
sessionId
);
this
.
serverUrl
=
serverUrl
;
connectTest
(
sessionId
);
}
@Override
protected
void
startSession
(
Capabilities
capabilities
)
{
private
ReuseWebDriver
(
String
serverUrl
)
throws
MalformedURLException
{
super
(
new
ChromeExecutor
(
new
URL
(
serverUrl
)),
new
MutableCapabilities
());
}
@Override
public
Capabilities
getCapabilities
()
{
return
capabilities
;
}
protected
void
startSession
(
Capabilities
capabilities
)
{
/**
* 判断浏览器是否支持运行JS脚本
*
* @author andylau
* @date 2022/7/24 10:08
*/
private
boolean
isJavascriptDisabled
()
{
return
!
capabilities
.
is
(
SUPPORTS_JAVASCRIPT
);
}
/***
...
...
@@ -66,9 +53,6 @@ public class ReuseWebDriver extends RemoteWebDriver {
*/
@Override
public
Object
executeScript
(
String
script
,
Object
...
args
)
{
if
(
isJavascriptDisabled
())
{
throw
new
UnsupportedOperationException
(
"必须使用支持执行javascript的WebDriver的底层实例.."
);
}
// 替换引号
script
=
script
.
replaceAll
(
"\""
,
"\\\""
);
List
<
Object
>
convertedArgs
=
Stream
.
of
(
args
).
map
(
new
WebElementToJsonConverter
()).
collect
(
Collectors
.
toList
());
...
...
@@ -79,13 +63,10 @@ public class ReuseWebDriver extends RemoteWebDriver {
/***
* 再打开的浏览器异步执行JS脚本
* @author andylau
* @date 2022/7/2
4 10
:06
* @date 2022/7/2
3 18
:06
*/
@Override
public
Object
executeAsyncScript
(
String
script
,
Object
...
args
)
{
if
(
isJavascriptDisabled
())
{
throw
new
UnsupportedOperationException
(
"必须使用支持执行javascript的WebDriver的底层实例.."
);
}
script
=
script
.
replaceAll
(
"\""
,
"\\\""
);
List
<
Object
>
convertedArgs
=
Stream
.
of
(
args
).
map
(
new
WebElementToJsonConverter
()).
collect
(
Collectors
.
toList
());
Map
<
String
,
?>
params
=
ImmutableMap
.
of
(
"script"
,
script
,
"args"
,
convertedArgs
);
...
...
@@ -93,37 +74,51 @@ public class ReuseWebDriver extends RemoteWebDriver {
}
/***
* 连接测试
*
session
连接测试
* @author andylau
* @date 2022/7/2
4 10
:03
* @date 2022/7/2
3 18
:03
*/
private
void
connectTest
(
String
sessionId
)
throws
IOException
{
if
(!
sessionId
.
isEmpty
())
{
super
.
setSessionId
(
sessionId
);
private
boolean
connectTest
()
{
Command
command
=
new
Command
(
super
.
getSessionId
(),
DriverCommand
.
STATUS
);
try
{
Response
response
=
getCommandExecutor
().
execute
(
command
);
if
(
response
!=
null
&&
0
==
response
.
getStatus
())
{
return
true
;
}
}
catch
(
Exception
e
)
{
log
.
error
(
"Session 连接失败..."
);
}
return
false
;
}
Command
command
=
new
Command
(
super
.
getSessionId
(),
DriverCommand
.
STATUS
);
getCommandExecutor
().
execute
(
command
);
this
.
capabilities
=
new
MutableCapabilities
();
/***
* 判断是否连接失败并退出
* @author andylau
* @date 2022/7/25 14:25
*/
public
boolean
connectTestFail
()
{
boolean
flag
=
!
this
.
connectTest
();
// 失败时退出服务
if
(
flag
)
{
try
{
this
.
quit
();
}
catch
(
Exception
e
)
{
// do nothing
}
}
return
flag
;
}
@Override
public
void
quit
()
{
super
.
quit
();
// 关闭DriverService,避免无法关闭DriverService,导致出现过多Driver进程。
WebDriverException
throwe
=
null
;
try
{
URL
killUrl
=
new
URL
(
serverUrl
+
"/shutdown"
);
new
UrlChecker
().
waitUntilUnavailable
(
3
,
TimeUnit
.
SECONDS
,
killUrl
);
}
catch
(
MalformedURLException
e
)
{
throwe
=
new
WebDriverException
(
e
);
}
catch
(
UrlChecker
.
TimeoutException
e
)
{
throwe
=
new
WebDriverException
(
"等待驱动程序服务关闭时超时.."
,
e
);
}
if
(
throwe
!=
null
)
{
throw
throwe
;
}
catch
(
Exception
e
)
{
log
.
error
(
"驱动退出异常..."
);
}
}
}
comm_crawler/src/main/java/com/zzsn/test/ChromeTest.java
浏览文件 @
e7ad390c
package
com
.
zzsn
.
test
;
import
com.alibaba.fastjson.JSON
;
import
com.zzsn.crawler.ChromeDriverPool
;
import
com.zzsn.crawler.ReuseWebDriver
;
import
com.zzsn.generation.Constants
;
import
com.zzsn.job.JedisUtil
;
import
com.zzsn.util.DriverUtil
;
import
lombok.extern.slf4j.Slf4j
;
import
org.openqa.selenium.chrome.ChromeDriver
;
import
org.openqa.selenium.chrome.ChromeDriverService
;
import
org.openqa.selenium.remote.HttpCommandExecutor
;
import
org.openqa.selenium.remote.SessionId
;
import
java.io.IOException
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.concurrent.CountDownLatch
;
import
java.util.concurrent.ExecutorService
;
import
java.util.concurrent.Executors
;
@Slf4j
public
class
ChromeTest
{
// public static void main(String[] args) throws Exception {
// //模拟并发
...
...
@@ -87,33 +96,17 @@ public class ChromeTest {
// latch.countDown();//开炮
// }
public
static
void
main
(
String
[]
args
)
{
// 初始化一个chrome浏览器实例
ReuseWebDriver
driver
=
null
;
try
{
driver
=
new
ReuseWebDriver
(
"http://localhost:63714"
,
"fdaa2ca38a077a604dffdca6ecc5df1d"
);
}
catch
(
Exception
e
)
{
System
.
out
.
println
(
"Session连接失败,重新打开浏览器驱动..."
);
ChromeDriver
chromeDriver
=
DriverUtil
.
reconnectDriver
();
String
serverUrl
=
DriverUtil
.
getServerUrl
(
chromeDriver
);
String
sessionId
=
chromeDriver
.
getSessionId
().
toString
();
try
{
driver
=
new
ReuseWebDriver
(
serverUrl
,
sessionId
);
}
catch
(
IOException
e1
)
{
System
.
out
.
println
(
"获取驱动连接失败!!!"
);
// 可复用驱动使用Demo
public
static
void
main
(
String
[]
args
)
throws
Exception
{
ReuseWebDriver
driver
=
DriverUtil
.
getChromeDriver
();
if
(
driver
==
null
)
{
// 从缓存取出SessionId为空才时,驱动会返回null,可参考工具类重新设置缓存
log
.
error
(
"获取浏览器驱动失败,请检查SessionId缓存是否存在..."
);
return
;
}
}
// 最大化窗口
// driver.manage().window().maximize();
// 设置隐性等待时间
// driver.manage().timeouts().implicitlyWait(3, TimeUnit.SECONDS);
// get()打开一个站点
try
{
// 测试打开bing
driver
.
get
(
"https://www.bing.com"
);
// getTitle()获取当前页面title的值
System
.
out
.
println
(
"当前打开页面的标题是: "
+
driver
.
getTitle
());
...
...
@@ -123,8 +116,14 @@ public class ChromeTest {
System
.
out
.
println
(((
HttpCommandExecutor
)
driver
.
getCommandExecutor
()).
getAddressOfRemoteServer
());
driver
.
executeScript
(
"alert(\"hello,this is an alert!\")"
);
// 关闭并退出浏览器
// driver.quit();
}
catch
(
Exception
e
)
{
// 驱动突然崩溃(可手动关闭驱动触发该场景), response在短时间内会返回正常请求码从而导致
// driver.get("https://www.bing.com") 获取页面信息异常
// 这里直接退出,不在重新获取,下次获取链接时在重新启动驱动服务
log
.
error
(
"获取浏览器驱动异常,驱动重启中..."
);
driver
.
quit
();
}
}
}
comm_crawler/src/main/java/com/zzsn/util/DriverUtil.java
浏览文件 @
e7ad390c
package
com
.
zzsn
.
util
;
import
com.alibaba.fastjson.JSON
;
import
com.zzsn.crawler.ReuseWebDriver
;
import
com.zzsn.generation.Constants
;
import
com.zzsn.job.JedisUtil
;
import
lombok.extern.slf4j.Slf4j
;
import
org.openqa.selenium.chrome.ChromeDriver
;
import
org.openqa.selenium.chrome.ChromeDriverService
;
import
org.openqa.selenium.remote.HttpCommandExecutor
;
import
java.net.URL
;
import
java.util.HashMap
;
import
java.util.Map
;
/**
* @author andylau
* @version 1.0
* @date 2022/7/23 17:14
**/
@Slf4j
public
class
DriverUtil
{
/***
...
...
@@ -19,7 +26,7 @@ public class DriverUtil {
* @author andylau
* @date 2022/7/23 17:15
*/
p
ublic
static
ChromeDriver
reconnectDriver
()
{
p
rivate
static
ChromeDriver
reconnectDriver
()
{
System
.
setProperty
(
"webdriver.chrome.driver"
,
Constants
.
CHROMEDRIVE
);
ChromeDriverService
service
=
ChromeDriverService
.
createDefaultService
();
// 重新初始化一个chrome浏览器实例
...
...
@@ -31,9 +38,46 @@ public class DriverUtil {
* @author andylau
* @date 2022/7/23 17:15
*/
p
ublic
static
String
getServerUrl
(
ChromeDriver
driver
)
{
p
rivate
static
String
getServerUrl
(
ChromeDriver
driver
)
{
HttpCommandExecutor
commandExecutor
=
(
HttpCommandExecutor
)
driver
.
getCommandExecutor
();
URL
server
=
commandExecutor
.
getAddressOfRemoteServer
();
return
server
.
toString
();
}
@SuppressWarnings
(
"all"
)
public
static
ReuseWebDriver
connectChrome
(
String
sessionId
,
String
serverUrl
)
throws
Exception
{
if
(
serverUrl
==
null
||
""
.
equals
(
serverUrl
)
||
sessionId
==
null
||
""
.
equals
(
sessionId
))
{
log
.
error
(
"未获取到驱动服务地址、sessionId"
);
return
null
;
}
ReuseWebDriver
driver
=
new
ReuseWebDriver
(
serverUrl
,
sessionId
);
if
(
driver
.
connectTestFail
())
{
// 若驱动返回错误码,重新创建驱动服务并缓存
ChromeDriver
chromeDriver
=
DriverUtil
.
reconnectDriver
();
serverUrl
=
DriverUtil
.
getServerUrl
(
chromeDriver
);
sessionId
=
chromeDriver
.
getSessionId
().
toString
();
Map
<
String
,
String
>
map
=
new
HashMap
<>(
2
);
map
.
put
(
"sessionId"
,
sessionId
);
map
.
put
(
"serverUrl"
,
serverUrl
);
// 缓存浏览器驱动信息
JedisUtil
.
setString
(
"SELENIUM_DRIVER_CACHE"
,
JSON
.
toJSONString
(
map
),
-
1
);
driver
=
new
ReuseWebDriver
(
serverUrl
,
sessionId
);
}
return
driver
;
}
/***
* 获取当前活动的谷歌浏览器驱动
* @author andylau
* @date 2022/7/25 15:07
*/
@SuppressWarnings
(
"all"
)
public
static
ReuseWebDriver
getChromeDriver
()
throws
Exception
{
String
cacheInfo
=
JedisUtil
.
getString
(
"SELENIUM_DRIVER_CACHE"
);
Map
<
String
,
String
>
map
=
JSON
.
parseObject
(
cacheInfo
,
Map
.
class
);
String
sessionId
=
map
.
get
(
"sessionId"
);
String
serverUrl
=
map
.
get
(
"serverUrl"
);
return
connectChrome
(
sessionId
,
serverUrl
);
}
}
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论