提交 e7ad390c 作者: 张文库

谷歌浏览器驱动复用

上级 a1b331fb
package com.zzsn.crawler; package com.zzsn.crawler;
import org.openqa.selenium.NoSuchSessionException; import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.SessionNotCreatedException;
import org.openqa.selenium.UnsupportedCommandException; import org.openqa.selenium.UnsupportedCommandException;
import org.openqa.selenium.WebDriverException;
import org.openqa.selenium.remote.*; import org.openqa.selenium.remote.*;
import org.openqa.selenium.remote.codec.w3c.W3CHttpCommandCodec; import org.openqa.selenium.remote.codec.w3c.W3CHttpCommandCodec;
import org.openqa.selenium.remote.codec.w3c.W3CHttpResponseCodec; import org.openqa.selenium.remote.codec.w3c.W3CHttpResponseCodec;
import org.openqa.selenium.remote.http.*; import org.openqa.selenium.remote.http.HttpClient;
import org.openqa.selenium.remote.http.HttpRequest;
import org.openqa.selenium.remote.http.HttpResponse;
import java.io.IOException; import java.io.IOException;
import java.net.URL; import java.net.URL;
...@@ -19,6 +19,7 @@ import static org.openqa.selenium.remote.DriverCommand.*; ...@@ -19,6 +19,7 @@ import static org.openqa.selenium.remote.DriverCommand.*;
* @author kethy * @author kethy
* @date 2022-07-23 18:33:50 * @date 2022-07-23 18:33:50
*/ */
@Slf4j
public class ChromeExecutor extends HttpCommandExecutor { public class ChromeExecutor extends HttpCommandExecutor {
private CommandCodec<HttpRequest> commandCodec; private CommandCodec<HttpRequest> commandCodec;
private ResponseCodec<HttpResponse> responseCodec; private ResponseCodec<HttpResponse> responseCodec;
...@@ -39,17 +40,19 @@ public class ChromeExecutor extends HttpCommandExecutor { ...@@ -39,17 +40,19 @@ public class ChromeExecutor extends HttpCommandExecutor {
public Response execute(Command command) throws IOException { public Response execute(Command command) throws IOException {
if (command.getSessionId() == null) { if (command.getSessionId() == null) {
if (QUIT.equals(command.getName())) { if (QUIT.equals(command.getName())) {
return new Response(); return null;
} }
if (!GET_ALL_SESSIONS.equals(command.getName()) && !NEW_SESSION.equals(command.getName())) { if (!GET_ALL_SESSIONS.equals(command.getName()) && !NEW_SESSION.equals(command.getName())) {
throw new NoSuchSessionException("会话ID为空,请调用quit()退出后再使用驱动"); log.error("会话ID为空,请调用退出后再使用驱动...");
return null;
} }
} }
if (NEW_SESSION.equals(command.getName())) { if (NEW_SESSION.equals(command.getName())) {
if (commandCodec != null) { if (commandCodec != null) {
throw new SessionNotCreatedException("Session 已存在.."); log.error("Session 未创建...");
return null;
} }
ProtocolHandshake handshake = new ProtocolHandshake(); ProtocolHandshake handshake = new ProtocolHandshake();
...@@ -62,7 +65,8 @@ public class ChromeExecutor extends HttpCommandExecutor { ...@@ -62,7 +65,8 @@ public class ChromeExecutor extends HttpCommandExecutor {
} }
if (commandCodec == null || responseCodec == null) { if (commandCodec == null || responseCodec == null) {
throw new WebDriverException("未定义命令或解码器。无法继续.."); log.error("未定义命令或解码器。无法继续...");
return null;
} }
HttpRequest httpRequest = commandCodec.encode(command); HttpRequest httpRequest = commandCodec.encode(command);
...@@ -81,10 +85,9 @@ public class ChromeExecutor extends HttpCommandExecutor { ...@@ -81,10 +85,9 @@ public class ChromeExecutor extends HttpCommandExecutor {
return response; return response;
} catch (UnsupportedCommandException e) { } catch (UnsupportedCommandException e) {
if (e.getMessage() == null || "".equals(e.getMessage())) { if (e.getMessage() == null || "".equals(e.getMessage())) {
throw new UnsupportedOperationException("未接收到服务器信息。命令:" + command.getName(), e.getCause()); log.error("未接收到服务器返回信息。命令:" + command.getName());
} }
throw e; return null;
} }
} }
} }
\ No newline at end of file
package com.zzsn.crawler; package com.zzsn.crawler;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.Capabilities; import org.openqa.selenium.Capabilities;
import org.openqa.selenium.MutableCapabilities; import org.openqa.selenium.MutableCapabilities;
import org.openqa.selenium.WebDriverException;
import org.openqa.selenium.net.UrlChecker; import org.openqa.selenium.net.UrlChecker;
import org.openqa.selenium.remote.Command; import org.openqa.selenium.remote.Command;
import org.openqa.selenium.remote.DriverCommand; import org.openqa.selenium.remote.DriverCommand;
import org.openqa.selenium.remote.RemoteWebDriver; import org.openqa.selenium.remote.RemoteWebDriver;
import org.openqa.selenium.remote.Response;
import org.openqa.selenium.remote.internal.WebElementToJsonConverter; import org.openqa.selenium.remote.internal.WebElementToJsonConverter;
import java.io.IOException; import java.io.IOException;
...@@ -19,44 +20,30 @@ import java.util.concurrent.TimeUnit; ...@@ -19,44 +20,30 @@ import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import static org.openqa.selenium.remote.CapabilityType.SUPPORTS_JAVASCRIPT;
/*** /***
* 驱动复用 * 驱动复用
* @date 2022/7/23 18:18 * @date 2022/7/23 18:01
* @author andylau * @author andylau
*/ */
@Slf4j
public class ReuseWebDriver extends RemoteWebDriver { public class ReuseWebDriver extends RemoteWebDriver {
private Capabilities capabilities;
private String serverUrl; private String serverUrl;
public ReuseWebDriver(String serverUrl, String sessionId) throws IOException { public ReuseWebDriver(String serverUrl, String sessionId) throws IOException {
super(new ChromeExecutor(new URL(serverUrl)), null); this(serverUrl);
super.setSessionId(sessionId);
this.serverUrl = serverUrl; this.serverUrl = serverUrl;
connectTest(sessionId);
} }
@Override private ReuseWebDriver(String serverUrl) throws MalformedURLException {
protected void startSession(Capabilities capabilities) { super(new ChromeExecutor(new URL(serverUrl)), new MutableCapabilities());
} }
@Override @Override
public Capabilities getCapabilities() { protected void startSession(Capabilities capabilities) {
return capabilities;
}
/**
* 判断浏览器是否支持运行JS脚本
*
* @author andylau
* @date 2022/7/24 10:08
*/
private boolean isJavascriptDisabled() {
return !capabilities.is(SUPPORTS_JAVASCRIPT);
} }
/*** /***
...@@ -66,9 +53,6 @@ public class ReuseWebDriver extends RemoteWebDriver { ...@@ -66,9 +53,6 @@ public class ReuseWebDriver extends RemoteWebDriver {
*/ */
@Override @Override
public Object executeScript(String script, Object... args) { public Object executeScript(String script, Object... args) {
if (isJavascriptDisabled()) {
throw new UnsupportedOperationException("必须使用支持执行javascript的WebDriver的底层实例..");
}
// 替换引号 // 替换引号
script = script.replaceAll("\"", "\\\""); script = script.replaceAll("\"", "\\\"");
List<Object> convertedArgs = Stream.of(args).map(new WebElementToJsonConverter()).collect(Collectors.toList()); List<Object> convertedArgs = Stream.of(args).map(new WebElementToJsonConverter()).collect(Collectors.toList());
...@@ -79,13 +63,10 @@ public class ReuseWebDriver extends RemoteWebDriver { ...@@ -79,13 +63,10 @@ public class ReuseWebDriver extends RemoteWebDriver {
/*** /***
* 再打开的浏览器异步执行JS脚本 * 再打开的浏览器异步执行JS脚本
* @author andylau * @author andylau
* @date 2022/7/24 10:06 * @date 2022/7/23 18:06
*/ */
@Override @Override
public Object executeAsyncScript(String script, Object... args) { public Object executeAsyncScript(String script, Object... args) {
if (isJavascriptDisabled()) {
throw new UnsupportedOperationException("必须使用支持执行javascript的WebDriver的底层实例..");
}
script = script.replaceAll("\"", "\\\""); script = script.replaceAll("\"", "\\\"");
List<Object> convertedArgs = Stream.of(args).map(new WebElementToJsonConverter()).collect(Collectors.toList()); List<Object> convertedArgs = Stream.of(args).map(new WebElementToJsonConverter()).collect(Collectors.toList());
Map<String, ?> params = ImmutableMap.of("script", script, "args", convertedArgs); Map<String, ?> params = ImmutableMap.of("script", script, "args", convertedArgs);
...@@ -93,37 +74,51 @@ public class ReuseWebDriver extends RemoteWebDriver { ...@@ -93,37 +74,51 @@ public class ReuseWebDriver extends RemoteWebDriver {
} }
/*** /***
* 连接测试 * session 连接测试
* @author andylau * @author andylau
* @date 2022/7/24 10:03 * @date 2022/7/23 18:03
*/ */
private void connectTest(String sessionId) throws IOException { private boolean connectTest() {
if (!sessionId.isEmpty()) { Command command = new Command(super.getSessionId(), DriverCommand.STATUS);
super.setSessionId(sessionId); try {
Response response = getCommandExecutor().execute(command);
if (response != null && 0 == response.getStatus()) {
return true;
}
} catch (Exception e) {
log.error("Session 连接失败...");
}
return false;
} }
Command command = new Command(super.getSessionId(), DriverCommand.STATUS); /***
getCommandExecutor().execute(command); * 判断是否连接失败并退出
this.capabilities = new MutableCapabilities(); * @author andylau
* @date 2022/7/25 14:25
*/
public boolean connectTestFail() {
boolean flag = !this.connectTest();
// 失败时退出服务
if (flag) {
try {
this.quit();
} catch (Exception e) {
// do nothing
}
} }
return flag;
}
@Override @Override
public void quit() { public void quit() {
super.quit(); super.quit();
// 关闭DriverService,避免无法关闭DriverService,导致出现过多Driver进程。 // 关闭DriverService,避免无法关闭DriverService,导致出现过多Driver进程。
WebDriverException throwe = null;
try { try {
URL killUrl = new URL(serverUrl + "/shutdown"); URL killUrl = new URL(serverUrl + "/shutdown");
new UrlChecker().waitUntilUnavailable(3, TimeUnit.SECONDS, killUrl); new UrlChecker().waitUntilUnavailable(3, TimeUnit.SECONDS, killUrl);
} catch (MalformedURLException e) { } catch (Exception e) {
throwe = new WebDriverException(e); log.error("驱动退出异常...");
} catch (UrlChecker.TimeoutException e) {
throwe = new WebDriverException("等待驱动程序服务关闭时超时..", e);
}
if (throwe != null) {
throw throwe;
} }
} }
} }
package com.zzsn.test; package com.zzsn.test;
import com.alibaba.fastjson.JSON;
import com.zzsn.crawler.ChromeDriverPool; import com.zzsn.crawler.ChromeDriverPool;
import com.zzsn.crawler.ReuseWebDriver; import com.zzsn.crawler.ReuseWebDriver;
import com.zzsn.generation.Constants;
import com.zzsn.job.JedisUtil;
import com.zzsn.util.DriverUtil; import com.zzsn.util.DriverUtil;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeDriverService;
import org.openqa.selenium.remote.HttpCommandExecutor; import org.openqa.selenium.remote.HttpCommandExecutor;
import org.openqa.selenium.remote.SessionId;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CountDownLatch; import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
@Slf4j
public class ChromeTest { public class ChromeTest {
// public static void main(String[] args) throws Exception { // public static void main(String[] args) throws Exception {
// //模拟并发 // //模拟并发
...@@ -87,33 +96,17 @@ public class ChromeTest { ...@@ -87,33 +96,17 @@ public class ChromeTest {
// latch.countDown();//开炮 // latch.countDown();//开炮
// } // }
// 可复用驱动使用Demo
public static void main(String[] args) { public static void main(String[] args) throws Exception {
// 初始化一个chrome浏览器实例 ReuseWebDriver driver = DriverUtil.getChromeDriver();
ReuseWebDriver driver = null; if (driver == null) {
try { // 从缓存取出SessionId为空才时,驱动会返回null,可参考工具类重新设置缓存
driver = new ReuseWebDriver("http://localhost:63714", "fdaa2ca38a077a604dffdca6ecc5df1d"); log.error("获取浏览器驱动失败,请检查SessionId缓存是否存在...");
} catch (Exception e) {
System.out.println("Session连接失败,重新打开浏览器驱动...");
ChromeDriver chromeDriver = DriverUtil.reconnectDriver();
String serverUrl = DriverUtil.getServerUrl(chromeDriver);
String sessionId = chromeDriver.getSessionId().toString();
try {
driver = new ReuseWebDriver(serverUrl, sessionId);
} catch (IOException e1) {
System.out.println("获取驱动连接失败!!!");
return; return;
} }
}
// 最大化窗口 try {
// driver.manage().window().maximize(); // 测试打开bing
// 设置隐性等待时间
// driver.manage().timeouts().implicitlyWait(3, TimeUnit.SECONDS);
// get()打开一个站点
driver.get("https://www.bing.com"); driver.get("https://www.bing.com");
// getTitle()获取当前页面title的值 // getTitle()获取当前页面title的值
System.out.println("当前打开页面的标题是: " + driver.getTitle()); System.out.println("当前打开页面的标题是: " + driver.getTitle());
...@@ -123,8 +116,14 @@ public class ChromeTest { ...@@ -123,8 +116,14 @@ public class ChromeTest {
System.out.println(((HttpCommandExecutor) driver.getCommandExecutor()).getAddressOfRemoteServer()); System.out.println(((HttpCommandExecutor) driver.getCommandExecutor()).getAddressOfRemoteServer());
driver.executeScript("alert(\"hello,this is an alert!\")"); driver.executeScript("alert(\"hello,this is an alert!\")");
// 关闭并退出浏览器 // 关闭并退出浏览器
// driver.quit(); // driver.quit();
} catch (Exception e) {
// 驱动突然崩溃(可手动关闭驱动触发该场景), response在短时间内会返回正常请求码从而导致
// driver.get("https://www.bing.com") 获取页面信息异常
// 这里直接退出,不在重新获取,下次获取链接时在重新启动驱动服务
log.error("获取浏览器驱动异常,驱动重启中...");
driver.quit();
}
} }
} }
package com.zzsn.util; package com.zzsn.util;
import com.alibaba.fastjson.JSON;
import com.zzsn.crawler.ReuseWebDriver;
import com.zzsn.generation.Constants; import com.zzsn.generation.Constants;
import com.zzsn.job.JedisUtil;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeDriverService; import org.openqa.selenium.chrome.ChromeDriverService;
import org.openqa.selenium.remote.HttpCommandExecutor; import org.openqa.selenium.remote.HttpCommandExecutor;
import java.net.URL; import java.net.URL;
import java.util.HashMap;
import java.util.Map;
/** /**
* @author andylau * @author andylau
* @version 1.0 * @version 1.0
* @date 2022/7/23 17:14 * @date 2022/7/23 17:14
**/ **/
@Slf4j
public class DriverUtil { public class DriverUtil {
/*** /***
...@@ -19,7 +26,7 @@ public class DriverUtil { ...@@ -19,7 +26,7 @@ public class DriverUtil {
* @author andylau * @author andylau
* @date 2022/7/23 17:15 * @date 2022/7/23 17:15
*/ */
public static ChromeDriver reconnectDriver() { private static ChromeDriver reconnectDriver() {
System.setProperty("webdriver.chrome.driver", Constants.CHROMEDRIVE); System.setProperty("webdriver.chrome.driver", Constants.CHROMEDRIVE);
ChromeDriverService service = ChromeDriverService.createDefaultService(); ChromeDriverService service = ChromeDriverService.createDefaultService();
// 重新初始化一个chrome浏览器实例 // 重新初始化一个chrome浏览器实例
...@@ -31,9 +38,46 @@ public class DriverUtil { ...@@ -31,9 +38,46 @@ public class DriverUtil {
* @author andylau * @author andylau
* @date 2022/7/23 17:15 * @date 2022/7/23 17:15
*/ */
public static String getServerUrl(ChromeDriver driver) { private static String getServerUrl(ChromeDriver driver) {
HttpCommandExecutor commandExecutor = (HttpCommandExecutor) driver.getCommandExecutor(); HttpCommandExecutor commandExecutor = (HttpCommandExecutor) driver.getCommandExecutor();
URL server = commandExecutor.getAddressOfRemoteServer(); URL server = commandExecutor.getAddressOfRemoteServer();
return server.toString(); return server.toString();
} }
@SuppressWarnings("all")
public static ReuseWebDriver connectChrome(String sessionId, String serverUrl) throws Exception {
if (serverUrl == null || "".equals(serverUrl) || sessionId == null || "".equals(sessionId)) {
log.error("未获取到驱动服务地址、sessionId");
return null;
}
ReuseWebDriver driver = new ReuseWebDriver(serverUrl, sessionId);
if (driver.connectTestFail()) {
// 若驱动返回错误码,重新创建驱动服务并缓存
ChromeDriver chromeDriver = DriverUtil.reconnectDriver();
serverUrl = DriverUtil.getServerUrl(chromeDriver);
sessionId = chromeDriver.getSessionId().toString();
Map<String, String> map = new HashMap<>(2);
map.put("sessionId", sessionId);
map.put("serverUrl", serverUrl);
// 缓存浏览器驱动信息
JedisUtil.setString("SELENIUM_DRIVER_CACHE", JSON.toJSONString(map), -1);
driver = new ReuseWebDriver(serverUrl, sessionId);
}
return driver;
}
/***
* 获取当前活动的谷歌浏览器驱动
* @author andylau
* @date 2022/7/25 15:07
*/
@SuppressWarnings("all")
public static ReuseWebDriver getChromeDriver() throws Exception {
String cacheInfo = JedisUtil.getString("SELENIUM_DRIVER_CACHE");
Map<String, String> map = JSON.parseObject(cacheInfo, Map.class);
String sessionId = map.get("sessionId");
String serverUrl = map.get("serverUrl");
return connectChrome(sessionId, serverUrl);
}
} }
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论