Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
M
meta_crawler
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
刘伟刚
meta_crawler
Commits
a1b331fb
提交
a1b331fb
authored
7月 24, 2022
作者:
张文库
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
谷歌浏览器驱动复用
上级
cc9aa52f
显示空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
339 行增加
和
36 行删除
+339
-36
ChromeExecutor.java
...rawler/src/main/java/com/zzsn/crawler/ChromeExecutor.java
+91
-0
ReuseWebDriver.java
...rawler/src/main/java/com/zzsn/crawler/ReuseWebDriver.java
+129
-0
ChromeTest.java
comm_crawler/src/main/java/com/zzsn/test/ChromeTest.java
+80
-36
DriverUtil.java
comm_crawler/src/main/java/com/zzsn/util/DriverUtil.java
+39
-0
没有找到文件。
comm_crawler/src/main/java/com/zzsn/crawler/ChromeExecutor.java
0 → 100644
浏览文件 @
a1b331fb
package
com
.
zzsn
.
crawler
;
import
org.openqa.selenium.NoSuchSessionException
;
import
org.openqa.selenium.SessionNotCreatedException
;
import
org.openqa.selenium.UnsupportedCommandException
;
import
org.openqa.selenium.WebDriverException
;
import
org.openqa.selenium.remote.*
;
import
org.openqa.selenium.remote.codec.w3c.W3CHttpCommandCodec
;
import
org.openqa.selenium.remote.codec.w3c.W3CHttpResponseCodec
;
import
org.openqa.selenium.remote.http.*
;
import
java.io.IOException
;
import
java.net.URL
;
import
static
org
.
openqa
.
selenium
.
remote
.
DriverCommand
.*;
/***
* 窗口执行
* @author kethy
* @date 2022-07-23 18:33:50
*/
public
class
ChromeExecutor
extends
HttpCommandExecutor
{
private
CommandCodec
<
HttpRequest
>
commandCodec
;
private
ResponseCodec
<
HttpResponse
>
responseCodec
;
private
final
HttpClient
client
;
ChromeExecutor
(
URL
addressOfRemoteServer
)
{
super
(
addressOfRemoteServer
);
initCodec
();
this
.
client
=
HttpClient
.
Factory
.
createDefault
().
createClient
(
addressOfRemoteServer
);
}
private
void
initCodec
()
{
commandCodec
=
new
W3CHttpCommandCodec
();
responseCodec
=
new
W3CHttpResponseCodec
();
}
@Override
public
Response
execute
(
Command
command
)
throws
IOException
{
if
(
command
.
getSessionId
()
==
null
)
{
if
(
QUIT
.
equals
(
command
.
getName
()))
{
return
new
Response
();
}
if
(!
GET_ALL_SESSIONS
.
equals
(
command
.
getName
())
&&
!
NEW_SESSION
.
equals
(
command
.
getName
()))
{
throw
new
NoSuchSessionException
(
"会话ID为空,请调用quit()退出后再使用驱动"
);
}
}
if
(
NEW_SESSION
.
equals
(
command
.
getName
()))
{
if
(
commandCodec
!=
null
)
{
throw
new
SessionNotCreatedException
(
"Session 已存在.."
);
}
ProtocolHandshake
handshake
=
new
ProtocolHandshake
();
ProtocolHandshake
.
Result
result
=
handshake
.
createSession
(
client
,
command
);
Dialect
dialect
=
result
.
getDialect
();
commandCodec
=
dialect
.
getCommandCodec
();
responseCodec
=
dialect
.
getResponseCodec
();
return
result
.
createResponse
();
}
if
(
commandCodec
==
null
||
responseCodec
==
null
)
{
throw
new
WebDriverException
(
"未定义命令或解码器。无法继续.."
);
}
HttpRequest
httpRequest
=
commandCodec
.
encode
(
command
);
try
{
HttpResponse
httpResponse
=
client
.
execute
(
httpRequest
);
Response
response
=
responseCodec
.
decode
(
httpResponse
);
if
(
response
.
getSessionId
()
==
null
)
{
if
(
httpResponse
.
getTargetHost
()
!=
null
)
{
response
.
setSessionId
(
String
.
valueOf
(
HttpSessionId
.
getSessionId
(
httpResponse
.
getTargetHost
())));
}
else
{
response
.
setSessionId
(
command
.
getSessionId
().
toString
());
}
}
// if (QUIT.equals(command.getName())) { }
return
response
;
}
catch
(
UnsupportedCommandException
e
)
{
if
(
e
.
getMessage
()
==
null
||
""
.
equals
(
e
.
getMessage
()))
{
throw
new
UnsupportedOperationException
(
"未接收到服务器信息。命令:"
+
command
.
getName
(),
e
.
getCause
());
}
throw
e
;
}
}
}
\ No newline at end of file
comm_crawler/src/main/java/com/zzsn/crawler/ReuseWebDriver.java
0 → 100644
浏览文件 @
a1b331fb
package
com
.
zzsn
.
crawler
;
import
com.google.common.collect.ImmutableMap
;
import
org.openqa.selenium.Capabilities
;
import
org.openqa.selenium.MutableCapabilities
;
import
org.openqa.selenium.WebDriverException
;
import
org.openqa.selenium.net.UrlChecker
;
import
org.openqa.selenium.remote.Command
;
import
org.openqa.selenium.remote.DriverCommand
;
import
org.openqa.selenium.remote.RemoteWebDriver
;
import
org.openqa.selenium.remote.internal.WebElementToJsonConverter
;
import
java.io.IOException
;
import
java.net.MalformedURLException
;
import
java.net.URL
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.concurrent.TimeUnit
;
import
java.util.stream.Collectors
;
import
java.util.stream.Stream
;
import
static
org
.
openqa
.
selenium
.
remote
.
CapabilityType
.
SUPPORTS_JAVASCRIPT
;
/***
* 驱动复用
* @date 2022/7/23 18:18
* @author andylau
*/
public
class
ReuseWebDriver
extends
RemoteWebDriver
{
private
Capabilities
capabilities
;
private
String
serverUrl
;
public
ReuseWebDriver
(
String
serverUrl
,
String
sessionId
)
throws
IOException
{
super
(
new
ChromeExecutor
(
new
URL
(
serverUrl
)),
null
);
this
.
serverUrl
=
serverUrl
;
connectTest
(
sessionId
);
}
@Override
protected
void
startSession
(
Capabilities
capabilities
)
{
}
@Override
public
Capabilities
getCapabilities
()
{
return
capabilities
;
}
/**
* 判断浏览器是否支持运行JS脚本
*
* @author andylau
* @date 2022/7/24 10:08
*/
private
boolean
isJavascriptDisabled
()
{
return
!
capabilities
.
is
(
SUPPORTS_JAVASCRIPT
);
}
/***
* 再打开的浏览器同步执行JS脚本
* @author andylau
* @date 2022/7/24 10:06
*/
@Override
public
Object
executeScript
(
String
script
,
Object
...
args
)
{
if
(
isJavascriptDisabled
())
{
throw
new
UnsupportedOperationException
(
"必须使用支持执行javascript的WebDriver的底层实例.."
);
}
// 替换引号
script
=
script
.
replaceAll
(
"\""
,
"\\\""
);
List
<
Object
>
convertedArgs
=
Stream
.
of
(
args
).
map
(
new
WebElementToJsonConverter
()).
collect
(
Collectors
.
toList
());
Map
<
String
,
?>
params
=
ImmutableMap
.
of
(
"script"
,
script
,
"args"
,
convertedArgs
);
return
execute
(
DriverCommand
.
EXECUTE_SCRIPT
,
params
).
getValue
();
}
/***
* 再打开的浏览器异步执行JS脚本
* @author andylau
* @date 2022/7/24 10:06
*/
@Override
public
Object
executeAsyncScript
(
String
script
,
Object
...
args
)
{
if
(
isJavascriptDisabled
())
{
throw
new
UnsupportedOperationException
(
"必须使用支持执行javascript的WebDriver的底层实例.."
);
}
script
=
script
.
replaceAll
(
"\""
,
"\\\""
);
List
<
Object
>
convertedArgs
=
Stream
.
of
(
args
).
map
(
new
WebElementToJsonConverter
()).
collect
(
Collectors
.
toList
());
Map
<
String
,
?>
params
=
ImmutableMap
.
of
(
"script"
,
script
,
"args"
,
convertedArgs
);
return
execute
(
DriverCommand
.
EXECUTE_ASYNC_SCRIPT
,
params
).
getValue
();
}
/***
* 连接测试
* @author andylau
* @date 2022/7/24 10:03
*/
private
void
connectTest
(
String
sessionId
)
throws
IOException
{
if
(!
sessionId
.
isEmpty
())
{
super
.
setSessionId
(
sessionId
);
}
Command
command
=
new
Command
(
super
.
getSessionId
(),
DriverCommand
.
STATUS
);
getCommandExecutor
().
execute
(
command
);
this
.
capabilities
=
new
MutableCapabilities
();
}
@Override
public
void
quit
()
{
super
.
quit
();
// 关闭DriverService,避免无法关闭DriverService,导致出现过多Driver进程。
WebDriverException
throwe
=
null
;
try
{
URL
killUrl
=
new
URL
(
serverUrl
+
"/shutdown"
);
new
UrlChecker
().
waitUntilUnavailable
(
3
,
TimeUnit
.
SECONDS
,
killUrl
);
}
catch
(
MalformedURLException
e
)
{
throwe
=
new
WebDriverException
(
e
);
}
catch
(
UrlChecker
.
TimeoutException
e
)
{
throwe
=
new
WebDriverException
(
"等待驱动程序服务关闭时超时.."
,
e
);
}
if
(
throwe
!=
null
)
{
throw
throwe
;
}
}
}
comm_crawler/src/main/java/com/zzsn/test/ChromeTest.java
浏览文件 @
a1b331fb
package
com
.
zzsn
.
test
;
import
com.zzsn.crawler.ChromeDriverPool
;
import
com.zzsn.crawler.ReuseWebDriver
;
import
com.zzsn.util.DriverUtil
;
import
org.openqa.selenium.chrome.ChromeDriver
;
import
org.openqa.selenium.remote.HttpCommandExecutor
;
import
java.io.IOException
;
import
java.util.concurrent.CountDownLatch
;
import
java.util.concurrent.ExecutorService
;
import
java.util.concurrent.Executors
;
public
class
ChromeTest
{
public
static
void
main
(
String
[]
args
)
throws
Exception
{
//模拟并发
int
parallelSize
=
5
;
final
CountDownLatch
latch
=
new
CountDownLatch
(
1
);
ExecutorService
pool
=
Executors
.
newFixedThreadPool
(
parallelSize
);
for
(
int
i
=
0
;
i
<
parallelSize
;
i
++)
{
pool
.
execute
(
new
Runnable
()
{
@Override
public
void
run
()
{
ChromeDriverPool
.
Worker
work
=
null
;
try
{
latch
.
await
();
//阻塞所有线程
System
.
out
.
println
(
Thread
.
currentThread
().
getName
());
while
(
true
)
{
//业务需求,一直等待直到获取到驱动
work
=
ChromeDriverPool
.
getPool
();
if
(
work
!=
null
)
{
break
;
}
Thread
.
sleep
(
1000
);
}
work
.
start
();
ChromeDriver
driver
=
work
.
getDriver
();
System
.
out
.
println
(
Thread
.
currentThread
().
getName
()+
":"
+
driver
);
Thread
.
sleep
(
1000
*
10
);
}
catch
(
Exception
e
)
{
System
.
out
.
println
(
e
.
getMessage
());
}
finally
{
work
.
shutdown
();
}
}
});
}
latch
.
countDown
();
//开炮
}
//
public static void main(String[] args) throws Exception {
//
//模拟并发
//
int parallelSize = 5;
//
final CountDownLatch latch = new CountDownLatch(1);
//
ExecutorService pool = Executors.newFixedThreadPool(parallelSize);
//
//
for (int i = 0; i < parallelSize; i++) {
//
pool.execute(new Runnable() {
//
@Override
//
public void run() {
//
//
ChromeDriverPool.Worker work = null;
//
try {
//
latch.await();//阻塞所有线程
//
System.out.println(Thread.currentThread().getName());
//
while (true) {//业务需求,一直等待直到获取到驱动
//
work = ChromeDriverPool.getPool();
//
if (work != null) {
//
break;
//
}
//
Thread.sleep(1000);
//
}
//
work.start();
//
ChromeDriver driver = work.getDriver();
//
System.out.println(Thread.currentThread().getName()+":"+driver);
//
Thread.sleep(1000 * 10);
//
} catch (Exception e) {
//
System.out.println(e.getMessage());
//
}finally {
//
work.shutdown();
//
}
//
}
//
});
//
}
//
latch.countDown();//开炮
//
}
// public static void main(String[] args) throws Exception {
// //模拟并发
...
...
@@ -83,4 +87,44 @@ public class ChromeTest {
// latch.countDown();//开炮
// }
public
static
void
main
(
String
[]
args
)
{
// 初始化一个chrome浏览器实例
ReuseWebDriver
driver
=
null
;
try
{
driver
=
new
ReuseWebDriver
(
"http://localhost:63714"
,
"fdaa2ca38a077a604dffdca6ecc5df1d"
);
}
catch
(
Exception
e
)
{
System
.
out
.
println
(
"Session连接失败,重新打开浏览器驱动..."
);
ChromeDriver
chromeDriver
=
DriverUtil
.
reconnectDriver
();
String
serverUrl
=
DriverUtil
.
getServerUrl
(
chromeDriver
);
String
sessionId
=
chromeDriver
.
getSessionId
().
toString
();
try
{
driver
=
new
ReuseWebDriver
(
serverUrl
,
sessionId
);
}
catch
(
IOException
e1
)
{
System
.
out
.
println
(
"获取驱动连接失败!!!"
);
return
;
}
}
// 最大化窗口
// driver.manage().window().maximize();
// 设置隐性等待时间
// driver.manage().timeouts().implicitlyWait(3, TimeUnit.SECONDS);
// get()打开一个站点
driver
.
get
(
"https://www.bing.com"
);
// getTitle()获取当前页面title的值
System
.
out
.
println
(
"当前打开页面的标题是: "
+
driver
.
getTitle
());
System
.
out
.
println
(
driver
.
getSessionId
());
System
.
out
.
println
(
driver
.
getCapabilities
());
System
.
out
.
println
(((
HttpCommandExecutor
)
driver
.
getCommandExecutor
()).
getAddressOfRemoteServer
());
driver
.
executeScript
(
"alert(\"hello,this is an alert!\")"
);
// 关闭并退出浏览器
// driver.quit();
}
}
comm_crawler/src/main/java/com/zzsn/util/DriverUtil.java
0 → 100644
浏览文件 @
a1b331fb
package
com
.
zzsn
.
util
;
import
com.zzsn.generation.Constants
;
import
org.openqa.selenium.chrome.ChromeDriver
;
import
org.openqa.selenium.chrome.ChromeDriverService
;
import
org.openqa.selenium.remote.HttpCommandExecutor
;
import
java.net.URL
;
/**
* @author andylau
* @version 1.0
* @date 2022/7/23 17:14
**/
public
class
DriverUtil
{
/***
* 重新获取驱动
* @author andylau
* @date 2022/7/23 17:15
*/
public
static
ChromeDriver
reconnectDriver
()
{
System
.
setProperty
(
"webdriver.chrome.driver"
,
Constants
.
CHROMEDRIVE
);
ChromeDriverService
service
=
ChromeDriverService
.
createDefaultService
();
// 重新初始化一个chrome浏览器实例
return
new
ChromeDriver
(
service
);
}
/***
* 获取驱动服务地址
* @author andylau
* @date 2022/7/23 17:15
*/
public
static
String
getServerUrl
(
ChromeDriver
driver
)
{
HttpCommandExecutor
commandExecutor
=
(
HttpCommandExecutor
)
driver
.
getCommandExecutor
();
URL
server
=
commandExecutor
.
getAddressOfRemoteServer
();
return
server
.
toString
();
}
}
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论