Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
M
meta_crawler
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
刘伟刚
meta_crawler
Commits
649ac47c
提交
649ac47c
authored
7月 26, 2022
作者:
张文库
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
谷歌浏览器驱动复用
上级
89f24aab
显示空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
17 行增加
和
165 行删除
+17
-165
SeleniumTime.java
...rc/main/java/com/zzsn/crawler/uriparser/SeleniumTime.java
+12
-163
Constants.java
..._crawler/src/main/java/com/zzsn/generation/Constants.java
+3
-0
DriverUtil.java
comm_crawler/src/main/java/com/zzsn/util/DriverUtil.java
+2
-2
没有找到文件。
comm_crawler/src/main/java/com/zzsn/crawler/uriparser/SeleniumTime.java
浏览文件 @
649ac47c
...
@@ -8,7 +8,9 @@ import java.time.Duration;
...
@@ -8,7 +8,9 @@ import java.time.Duration;
import
java.time.temporal.ChronoUnit
;
import
java.time.temporal.ChronoUnit
;
import
java.util.concurrent.TimeUnit
;
import
java.util.concurrent.TimeUnit
;
import
com.zzsn.crawler.ReuseWebDriver
;
import
com.zzsn.generation.Constants
;
import
com.zzsn.generation.Constants
;
import
com.zzsn.util.DriverUtil
;
import
lombok.extern.slf4j.Slf4j
;
import
lombok.extern.slf4j.Slf4j
;
import
org.openqa.selenium.*
;
import
org.openqa.selenium.*
;
import
org.openqa.selenium.chrome.ChromeDriver
;
import
org.openqa.selenium.chrome.ChromeDriver
;
...
@@ -19,65 +21,18 @@ import org.openqa.selenium.interactions.Actions;
...
@@ -19,65 +21,18 @@ import org.openqa.selenium.interactions.Actions;
@Slf4j
@Slf4j
public
class
SeleniumTime
{
public
class
SeleniumTime
{
// public static ChromeOptions chromeOptions =new ChromeOptions() ;
// public static ChromeDriver driver;
// public static ChromeDriverService service;
// static {
// service = new ChromeDriverService.Builder().
// usingDriverExecutable(new File(Constants.CHROMEDRIVE)).usingAnyFreePort().build();
// try {
// service.wait(30000,30000);
// service.start();
// } catch (Exception e) {
// e.printStackTrace();
// }
// driver = new ChromeDriver(service, chromeOptions);//生成实例
// }
public
SeleniumTime
(){
public
SeleniumTime
(){
// System.setProperty("webdriver.chrome.driver", Constants.CHROMEDRIVE);
// System.setProperty("webdriver.chrome.bin", Constants.CHROMEBIN);
// System.setProperty("sun.net.client.defaultConnectTimeout", "95000");
// System.setProperty("sun.net.client.defaultReadTimeout", "95000");
//
//// DesiredCapabilities capabilities = new DesiredCapabilities("chrome", "", Platform.ANY);
//// WebDriver driver = new RemoteWebDriver(new URL("http://127.0.0.1:4444/wd/hub/"), capabilities);
//// chromeOptions.addArguments("blink-settings=imagesEnabled=false");
//// chromeOptions.addArguments("user-data-dir=C:\\Users\\WIN10\\AppData\\Local\\Google\\Chrome\\User Data\\Default");
// driver = new ChromeDriver(chromeOptions);
// service = new ChromeDriverService.Builder().
// usingDriverExecutable(new File(Constants.CHROMEDRIVE)).usingAnyFreePort().build();
// try {
// service.wait(30000,30000);
// service.start();
// } catch (Exception e) {
// e.printStackTrace();
// }
// driver = new ChromeDriver(service, chromeOptions);//生成实例
}
}
/**
/**
* 根据网址获取网页html信息
* 调用驱动获取html信息
* @param url
* @param url 网页地址
* @return
*/
*/
// @Async("asyncTaskExecutorSelenium")
// @Async("asyncTaskExecutorSelenium")
public
static
String
getScopehtml
(
String
url
){
public
static
String
getScopehtml
(
String
url
)
{
String
html
=
""
;
String
html
=
""
;
ChromeOptions
chromeOptions
=
new
ChromeOptions
();
ChromeDriver
driver
;
ChromeDriverService
service
=
new
ChromeDriverService
.
Builder
().
usingDriverExecutable
(
new
File
(
Constants
.
CHROMEDRIVE
)).
usingAnyFreePort
().
build
();
try
{
try
{
System
.
setProperty
(
"webdriver.chrome.driver"
,
Constants
.
CHROMEDRIVE
);
ReuseWebDriver
driver
=
DriverUtil
.
getChromeDriver
();
service
.
start
();
if
(!
System
.
getProperty
(
"os.name"
).
toUpperCase
().
contains
(
"WINDOWS"
))
{
chromeOptions
.
addArguments
(
"--disable-gpu"
,
"--window-size=1290,1080"
);
chromeOptions
.
addArguments
(
"headless"
);
//无界面参数
chromeOptions
.
addArguments
(
"no-sandbox"
);
//禁用沙盒 就是被这个参数搞了一天
}
driver
=
new
ChromeDriver
(
chromeOptions
);
//生成实例
try
{
try
{
Duration
duration
=
Duration
.
of
(
100
,
ChronoUnit
.
SECONDS
);
Duration
duration
=
Duration
.
of
(
100
,
ChronoUnit
.
SECONDS
);
driver
.
manage
().
timeouts
().
pageLoadTimeout
(
duration
);
driver
.
manage
().
timeouts
().
pageLoadTimeout
(
duration
);
...
@@ -86,89 +41,19 @@ public class SeleniumTime {
...
@@ -86,89 +41,19 @@ public class SeleniumTime {
try
{
try
{
WebElement
webElement
=
driver
.
findElement
(
By
.
xpath
(
"/html"
));
WebElement
webElement
=
driver
.
findElement
(
By
.
xpath
(
"/html"
));
html
=
webElement
.
getAttribute
(
"outerHTML"
);
html
=
webElement
.
getAttribute
(
"outerHTML"
);
System
.
out
.
println
(
"browser will be close"
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
info
(
"chromedriver 出现异常:"
+
e
.
getMessage
());
log
.
info
(
"获取页面内容异常:"
+
e
.
getMessage
());
}
finally
{
driver
.
quit
();
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
info
(
"chromedriver 出现异常:"
+
e
.
getMessage
());
// 若驱动Session连接异常,则直接退出驱动并在下次访问得的时候重新打开驱动
}
finally
{
log
.
info
(
"驱动打开URL异常:"
+
e
.
getMessage
());
driver
.
quit
();
service
.
stop
();
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
info
(
"chromedriver 驱动访问出现异常:"
+
e
.
getMessage
());
log
.
info
(
"驱动访问页面出现出现异常:"
+
e
.
getMessage
());
}
finally
{
service
.
stop
();
}
}
return
html
;
return
html
;
}
}
// public static String getScopehtml(String url){
//
// ChromeOptions chromeOptions = new ChromeOptions();
// ChromeDriver driver;
// ChromeDriverService service;
// service = new ChromeDriverService.Builder().
// usingDriverExecutable(new File(Constants.CHROMEDRIVE)).usingAnyFreePort().build();
// try {
// service.start();
// } catch (Exception e) {
// service.stop();
// return "";
//// e.printStackTrace();
// }
// if (!System.getProperty("os.name").toUpperCase().contains("WINDOWS")) {
// chromeOptions.addArguments("--disable-gpu", "--window-size=1290,1080");
// chromeOptions.addArguments("headless");//无界面参数
// chromeOptions.addArguments("no-sandbox");//禁用沙盒 就是被这个参数搞了一天
// }
// chromeOptions.addArguments("--disable-gpu", "--window-size=1290,1080");
// chromeOptions.addArguments("headless");//无界面参数
// chromeOptions.addArguments("no-sandbox");//禁用沙盒 就是被这个参数搞了一天
// driver = new ChromeDriver(service, chromeOptions);//生成实例
// String html = "";
// try {
// driver.manage().timeouts().pageLoadTimeout(60, TimeUnit.SECONDS);
// driver.get(url);
// Thread.sleep(1000l);
// try {
//// byte[] screenshotAs = driver.getScreenshotAs(OutputType.BYTES);
//// File src = ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE);
//// SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss"); //转换时间格式
//// String time = dateFormat.format(Calendar.getInstance().getTime()); //获取当前时间
//// FileUtils.copyFile(src, new File("Screenshots", time + ".png"));// 拷贝截图文件到我们项目./Screenshots
//
// System.out.println("browser will be close");
// WebElement webElement = driver.findElement(By.xpath("/html"));
// html = webElement.getAttribute("outerHTML");
// } catch (Exception e) {
// log.info("chromedriver 出现异常:" + e.getMessage());
// try {
// Thread.sleep(1000l);
// driver.quit();
// service.stop();
// Thread.sleep(1000l);
// } catch (InterruptedException e2) {
// service.stop();
// }
// }
// } catch (Exception e) {
// log.info("chromedriver 出现异常:" + e.getMessage());
// } finally {
// try {
// Thread.sleep(1000l);
// driver.quit();
// service.stop();
// Thread.sleep(1000l);
// } catch (InterruptedException e) {
//
// }
// }
//
// return html;
// }
public
static
InputStream
getScreenshot
(
String
url
){
public
static
InputStream
getScreenshot
(
String
url
){
ChromeOptions
chromeOptions
=
new
ChromeOptions
()
;
ChromeOptions
chromeOptions
=
new
ChromeOptions
()
;
ChromeDriver
driver
;
ChromeDriver
driver
;
...
@@ -217,43 +102,7 @@ public class SeleniumTime {
...
@@ -217,43 +102,7 @@ public class SeleniumTime {
return
inStream
;
return
inStream
;
}
}
// @Async("asyncTaskExecutorSelenium")
// public static String getScopehtml(String url){
// System.setProperty("webdriver.chrome.driver", Constants.CHROMEDRIVE); //chromederiver存放位置
// System.setProperty("webdriver.chrome.bin", Constants.CHROMEBIN); //chrome安装位置
// ChromeOptions options = new ChromeOptions();
// if(System.getProperty("os.name").toUpperCase().indexOf("WINDOWS")>0){
//
// }else{
// options.addArguments("--disable-gpu","--window-size=1290,1080");
// options.addArguments("headless");//无界面参数
// options.addArguments("no-sandbox");//禁用沙盒 就是被这个参数搞了一天
// }
// WebDriver driver ;
//
// driver = new ChromeDriver(options);
// String html="";
// if(StringUtils.isEmpty(url)){
// return html;
// }
// try{
// driver.get(url);
// Thread.sleep(3000l);
// WebElement webElement = driver.findElement(By.xpath("/html"));
// html = webElement.getAttribute("outerHTML");
//// System.out.println(html);
// }catch(Exception e){
// log.info("chromedriver 出现异常:"+e.getMessage());
// }finally {
// driver.quit();
// try {
// Thread.sleep(3000l);
// } catch (InterruptedException e) {
// }
// }
//
// return html;
// }
public
void
close
(){
public
void
close
(){
// driver.close();
// driver.close();
...
...
comm_crawler/src/main/java/com/zzsn/generation/Constants.java
浏览文件 @
649ac47c
...
@@ -202,6 +202,9 @@ public class Constants {
...
@@ -202,6 +202,9 @@ public class Constants {
public
static
final
String
REDIS_MAXWAITMILLIS
=
prop
.
getProperty
(
"redis.maxWaitMillis"
);
public
static
final
String
REDIS_MAXWAITMILLIS
=
prop
.
getProperty
(
"redis.maxWaitMillis"
);
public
static
final
String
REDIS_TESTONBORROW
=
prop
.
getProperty
(
"redis.testOnBorrow"
);
public
static
final
String
REDIS_TESTONBORROW
=
prop
.
getProperty
(
"redis.testOnBorrow"
);
public
static
final
String
SELENIUM_DRIVER_CACHE
=
prop
.
getProperty
(
"selenium.driver.cache"
);
public
static
final
String
HUAWEICLOUD_END_POINT
=
prop
.
getProperty
(
"HUAWEICLOUD_END_POINT"
);
public
static
final
String
HUAWEICLOUD_END_POINT
=
prop
.
getProperty
(
"HUAWEICLOUD_END_POINT"
);
public
static
final
String
HUAWEICLOUD_BUCKET_NAME
=
prop
.
getProperty
(
"HUAWEICLOUD_BUCKET_NAME"
);
public
static
final
String
HUAWEICLOUD_BUCKET_NAME
=
prop
.
getProperty
(
"HUAWEICLOUD_BUCKET_NAME"
);
public
static
final
String
HUAWEICLOUD_AK
=
prop
.
getProperty
(
"HUAWEICLOUD_AK"
);
public
static
final
String
HUAWEICLOUD_AK
=
prop
.
getProperty
(
"HUAWEICLOUD_AK"
);
...
...
comm_crawler/src/main/java/com/zzsn/util/DriverUtil.java
浏览文件 @
649ac47c
...
@@ -77,7 +77,7 @@ public class DriverUtil {
...
@@ -77,7 +77,7 @@ public class DriverUtil {
map
.
put
(
"sessionId"
,
sessionId
);
map
.
put
(
"sessionId"
,
sessionId
);
map
.
put
(
"serverUrl"
,
serverUrl
);
map
.
put
(
"serverUrl"
,
serverUrl
);
// 缓存浏览器驱动信息
// 缓存浏览器驱动信息
JedisUtil
.
setString
(
"SELENIUM_DRIVER_CACHE"
,
JSON
.
toJSONString
(
map
),
-
1
);
JedisUtil
.
setString
(
Constants
.
SELENIUM_DRIVER_CACHE
,
JSON
.
toJSONString
(
map
),
-
1
);
driver
=
new
ReuseWebDriver
(
serverUrl
,
sessionId
);
driver
=
new
ReuseWebDriver
(
serverUrl
,
sessionId
);
}
}
return
driver
;
return
driver
;
...
@@ -89,7 +89,7 @@ public class DriverUtil {
...
@@ -89,7 +89,7 @@ public class DriverUtil {
* @date 2022/7/25 15:07
* @date 2022/7/25 15:07
*/
*/
public
static
ReuseWebDriver
getChromeDriver
()
throws
Exception
{
public
static
ReuseWebDriver
getChromeDriver
()
throws
Exception
{
String
cacheInfo
=
JedisUtil
.
getString
(
"SELENIUM_DRIVER_CACHE"
);
String
cacheInfo
=
JedisUtil
.
getString
(
Constants
.
SELENIUM_DRIVER_CACHE
);
Map
<
String
,
String
>
map
=
JSON
.
parseObject
(
cacheInfo
,
Map
.
class
);
Map
<
String
,
String
>
map
=
JSON
.
parseObject
(
cacheInfo
,
Map
.
class
);
String
sessionId
=
map
.
get
(
"sessionId"
);
String
sessionId
=
map
.
get
(
"sessionId"
);
String
serverUrl
=
map
.
get
(
"serverUrl"
);
String
serverUrl
=
map
.
get
(
"serverUrl"
);
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论