提交 07f2048a 作者: liuweigang

添加百度搜索

上级 5880af84
Belarus+restrict+The Boston Consulting Group
Belarus+restrict+Aspen Human Institute
Belarus+restrict+East-West Center
Belarus+restrict+Center for Defense Information
Belarus+restrict+Battelle
Belarus+restrict+The Population Council
Belarus+restrict+Institute for the Future
Belarus+restrict+U.S. Asian Cultural Academy
Belarus+restrict+Atlantic Council
Belarus+restrict+Centre for European Policy Studies
Belarus+restrict+Royal Institute of Intemational Affairs
Belarus+restrict+The Fabian Society
Belarus+restrict+The Foreign Policy Centre
Belarus+restrict+Institut français des relations internationales
Belarus+restrict+Irish Examiner
Belarus+restrict+timesofmalta
Belarus+restrict+texasnewstoday
Belarus+restrict+californianewstimes
Belarus+restrict+foxbusiness
Belarus+restrict+IAI
Belarus+restrict+DGAP
Belarus+stop+Center for a New America Security
Belarus+stop+CNAS
Belarus+stop+WestExec Advisor
Belarus+stop+Carnegie Endowment for International Peace
Belarus+stop+CEIP
Belarus+stop+Brookings Institution
Belarus+stop+National Endowment for Democracy
Belarus+stop+NED
Belarus+stop+Council on Foreign Relations
Belarus+stop+CFR
Belarus+stop+Foreign Policy for American
Belarus+stop+fp4America
Belarus+stop+Center for American Progress
Belarus+stop+CAP
Belarus+stop+Meridian International
Belarus+stop+Asia Society
Belarus+stop+The Asia Group LLG
Belarus+stop+Penn Biden Center for Diplomacy and Global Engagement
Belarus+stop+Rand
Belarus+stop+Foreign Affairs
Belarus+stop+Center for Strategic and International Studies
Belarus+stop+Hoover Institution on War
Belarus+stop+revolution and Peace
Belarus+stop+American Enterprise Institute for Public Policy Research
Belarus+stop+Hudson Institute
Belarus+stop+Urban Institute
Belarus+stop+International Institute for Applied Systems Analysis
Belarus+stop+Heritage Foundation
Belarus+stop+CATO Institute
Belarus+stop+Carter Center
Belarus+stop+The Boston Consulting Group
Belarus+stop+Aspen Human Institute
Belarus+stop+East-West Center
Belarus+stop+Center for Defense Information
Belarus+stop+Battelle
Belarus+stop+The Population Council
Belarus+stop+Institute for the Future
Belarus+stop+U.S. Asian Cultural Academy
Belarus+stop+Atlantic Council
Belarus+stop+Centre for European Policy Studies
Belarus+stop+Royal Institute of Intemational Affairs
Belarus+stop+The Fabian Society
Belarus+stop+The Foreign Policy Centre
Belarus+stop+Institut français des relations internationales
Belarus+stop+Irish Examiner
Belarus+stop+timesofmalta
Belarus+stop+texasnewstoday
Belarus+stop+californianewstimes
Belarus+stop+foxbusiness
Belarus+stop+IAI
Belarus+stop+DGAP
Belarus+Punishment+Center for a New America Security
Belarus+Punishment+CNAS
Belarus+Punishment+WestExec Advisor
Belarus+Punishment+Carnegie Endowment for International Peace
Belarus+Punishment+CEIP
Belarus+Punishment+Brookings Institution
Belarus+Punishment+National Endowment for Democracy
Belarus+Punishment+NED
Belarus+Punishment+Council on Foreign Relations
Belarus+Punishment+CFR
Belarus+Punishment+Foreign Policy for American
Belarus+Punishment+fp4America
Belarus+Punishment+Center for American Progress
Belarus+Punishment+CAP
Belarus+Punishment+Meridian International
Belarus+Punishment+Asia Society
Belarus+Punishment+The Asia Group LLG
Belarus+Punishment+Penn Biden Center for Diplomacy and Global Engagement
Belarus+Punishment+Rand
Belarus+Punishment+Foreign Affairs
Belarus+Punishment+Center for Strategic and International Studies
Belarus+Punishment+Hoover Institution on War
Belarus+Punishment+revolution and Peace
Belarus+Punishment+American Enterprise Institute for Public Policy Research
Belarus+Punishment+Hudson Institute
Belarus+Punishment+Urban Institute
Belarus+Punishment+International Institute for Applied Systems Analysis
Belarus+Punishment+Heritage Foundation
Belarus+Punishment+CATO Institute
Belarus+Punishment+Carter Center
Belarus+Punishment+The Boston Consulting Group
Belarus+Punishment+Aspen Human Institute
Belarus+Punishment+East-West Center
Belarus+Punishment+Center for Defense Information
Belarus+Punishment+Battelle
Belarus+Punishment+The Population Council
Belarus+Punishment+Institute for the Future
Belarus+Punishment+U.S. Asian Cultural Academy
Belarus+Punishment+Atlantic Council
Belarus+Punishment+Centre for European Policy Studies
Belarus+Punishment+Royal Institute of Intemational Affairs
Belarus+Punishment+The Fabian Society
Belarus+Punishment+The Foreign Policy Centre
Belarus+Punishment+Institut français des relations internationales
Belarus+Punishment+Irish Examiner
Belarus+Punishment+timesofmalta
Belarus+Punishment+texasnewstoday
Belarus+Punishment+californianewstimes
Belarus+Punishment+foxbusiness
Belarus+Punishment+IAI
Belarus+Punishment+DGAP
Belarus+punishment+Center for a New America Security
Belarus+punishment+CNAS
Belarus+punishment+WestExec Advisor
Belarus+punishment+Carnegie Endowment for International Peace
Belarus+punishment+CEIP
Belarus+punishment+Brookings Institution
Belarus+punishment+National Endowment for Democracy
Belarus+punishment+NED
Belarus+punishment+Council on Foreign Relations
Belarus+punishment+CFR
Belarus+punishment+Foreign Policy for American
Belarus+punishment+fp4America
Belarus+punishment+Center for American Progress
Belarus+punishment+CAP
Belarus+punishment+Meridian International
Belarus+punishment+Asia Society
Belarus+punishment+The Asia Group LLG
Belarus+punishment+Penn Biden Center for Diplomacy and Global Engagement
Belarus+punishment+Rand
Belarus+punishment+Foreign Affairs
Belarus+punishment+Center for Strategic and International Studies
Belarus+punishment+Hoover Institution on War
Belarus+punishment+revolution and Peace
Belarus+punishment+American Enterprise Institute for Public Policy Research
Belarus+punishment+Hudson Institute
Belarus+punishment+Urban Institute
Belarus+punishment+International Institute for Applied Systems Analysis
Belarus+punishment+Heritage Foundation
Belarus+punishment+CATO Institute
Belarus+punishment+Carter Center
Belarus+punishment+The Boston Consulting Group
Belarus+punishment+Aspen Human Institute
Belarus+punishment+East-West Center
Belarus+punishment+Center for Defense Information
Belarus+punishment+Battelle
Belarus+punishment+The Population Council
Belarus+punishment+Institute for the Future
Belarus+punishment+U.S. Asian Cultural Academy
Belarus+punishment+Atlantic Council
Belarus+punishment+Centre for European Policy Studies
Belarus+punishment+Royal Institute of Intemational Affairs
Belarus+punishment+The Fabian Society
Belarus+punishment+The Foreign Policy Centre
Belarus+punishment+Institut français des relations internationales
Belarus+punishment+Irish Examiner
Belarus+punishment+timesofmalta
Belarus+punishment+texasnewstoday
Belarus+punishment+californianewstimes
Belarus+punishment+foxbusiness
Belarus+punishment+IAI
Belarus+punishment+DGAP
\ No newline at end of file
Russia+stop+CNAS
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
package com.zzsn;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.web.servlet.ServletComponentScan;
@SpringBootApplication
@ServletComponentScan
public class CrawlerStaticApplication {
public static void main(String[] args) {
SpringApplication.run(CrawlerStaticApplication.class, args);
}
}
package com.zzsn.cache;
import com.alibaba.druid.util.StringUtils;
import com.zzsn.utility.index.Constants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
import java.util.Set;
public class JedisFactory {
private static final Logger Log = LoggerFactory.getLogger(JedisFactory.class);
private static final Logger logger = LoggerFactory.getLogger(JedisUtil.class);
private static JedisPool jedisPool = null;
public static void init(){
String host = Constants.REDIS_LOCALHOST;
String port = Constants.REDIS_PORT;
String pass = Constants.REDIS_PASS;
String timeout = Constants.REDIS_TIMEOUT;
String maxIdle = Constants.REDIS_MAXIDLE;
String maxTotal = Constants.REDIS_MAXIDLE;
String maxWaitMillis = Constants.REDIS_MAXWAITMILLIS;
String testOnBorrow = Constants.REDIS_TESTONBORROW;
JedisPoolConfig config = new JedisPoolConfig();
//控制一个pool可分配多少个jedis实例,通过pool.getResource()来获取;
//如果赋值为-1,则表示不限制;如果pool已经分配了maxActive个jedis实例,则此时pool的状态为exhausted(耗尽)。
config.setMaxTotal(Integer.parseInt(maxTotal));
//控制一个pool最多有多少个状态为idle(空闲的)的jedis实例。
config.setMaxIdle(Integer.parseInt(maxIdle));
//表示当borrow(引入)一个jedis实例时,最大的等待时间,如果超过等待时间,则直接抛出JedisConnectionException;
config.setMaxWaitMillis(Long.parseLong(maxWaitMillis));
//在borrow一个jedis实例时,是否提前进行validate操作;如果为true,则得到的jedis实例均是可用的;
config.setTestOnBorrow(Boolean.valueOf(testOnBorrow));
jedisPool = new JedisPool(config, host, Integer.parseInt(port), Integer.parseInt(timeout));
}
public static String getKeyStr(String key) {
Jedis jedis=getJedis();
try {
if (StringUtils.isEmpty(key)) {
return null;
}
return jedis.get( key);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (jedis != null) {
// 如果使用 JedisPool , close 操作不是关闭连接,代表归还连接池
jedis.close();
}
}
return null;
}
private static Jedis getJedis() {
return jedisPool.getResource();
}
/**
* 设置缓存 永不过期,(一个月后会自动过期)
* @param key
* @return
*/
public static boolean setKeyStr(String key, String value) {
Jedis jedis=getJedis();
boolean result = false;
try {
if (StringUtils.isEmpty(key)) {
return false;
}
jedis.set(key, value);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (jedis != null) {
// 如果使用 JedisPool , close 操作不是关闭连接,代表归还连接池
jedis.close();
}
/* if (!client.isShutdown()) {
try {
client.shutdown();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}*/
}
return result;
}
public static boolean setKeyStrtime(String key, String value,Integer time) {
Jedis jedis=getJedis();
boolean result = false;
try {
if (StringUtils.isEmpty(key)) {
return false;
}
jedis.set(key, value);
if (time > 0) {
/**
* 如果设置了 expireTime, 那么这个 finalKey会在expireTime秒后过期,那么该键会被自动删除
* 这一功能配合出色的性能让Redis可以作为缓存系统来使用,成为了缓存系统Memcached的有力竞争者
*/
jedis.expire(key, time);
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (jedis != null) {
// 如果使用 JedisPool , close 操作不是关闭连接,代表归还连接池
jedis.close();
}
}
return result;
}
public static Set<String> getKeySet(String key) {
Jedis jedis=getJedis();
try {
Set<String> obj = jedis.smembers(key);
if("null".equals(obj)){
return null;
}
return obj;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (jedis != null) {
// 如果使用 JedisPool , close 操作不是关闭连接,代表归还连接池
jedis.close();
}
}
return null;
}
/**
* 设置缓存 永不过期,(一个月后会自动过期)
* @param key
* @return
*/
public static boolean setKeySet(String key, Set<String> value) {
Jedis jedis=getJedis();
boolean result = false;
try {
if(value==null){
return false;
}
jedis.sadd(key, value.toArray(new String[value.size()]));
} catch (Exception e){
e.printStackTrace();
} finally {
if (jedis != null) {
// 如果使用 JedisPool , close 操作不是关闭连接,代表归还连接池
jedis.close();
}
}
return result;
}
public static void del(String key) {
Jedis jedis=getJedis();
try {
if (StringUtils.isEmpty(key)) {
logger.error("key is null");
return;
}
jedis.del(key);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
if (jedis != null) {
// 如果使用 JedisPool , close 操作不是关闭连接,代表归还连接池
jedis.close();
}
}
}
}
package com.zzsn.cache;
import com.alibaba.druid.util.StringUtils;
import com.zzsn.utility.index.Constants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
import java.util.List;
import java.util.Set;
public class JedisUtil {
private static final String PREFIX = "ll_idea";
private static final Logger logger = LoggerFactory.getLogger(JedisUtil.class);
private static JedisPool jedisPool = null;
private JedisUtil() {
}
//写成静态代码块形式,只加载一次,节省资源
static {
}
/**
* 从jedis连接池中获取获取jedis对象
*
* @return
*/
private static void init(){
// Properties properties = PropertyUtil.loadProperties("redis.properties");
String host = Constants.REDIS_LOCALHOST;
String port = Constants.REDIS_PORT;
String pass = Constants.REDIS_PASS;
String timeout = Constants.REDIS_TIMEOUT;
String maxIdle = Constants.REDIS_MAXIDLE;
String maxTotal = Constants.REDIS_MAXTOTAL;
String maxWaitMillis = Constants.REDIS_MAXWAITMILLIS;
String testOnBorrow = Constants.REDIS_TESTONBORROW;
JedisPoolConfig config = new JedisPoolConfig();
//控制一个pool可分配多少个jedis实例,通过pool.getResource()来获取;
//如果赋值为-1,则表示不限制;如果pool已经分配了maxActive个jedis实例,则此时pool的状态为exhausted(耗尽)。
config.setMaxTotal(Integer.parseInt(maxTotal));
//控制一个pool最多有多少个状态为idle(空闲的)的jedis实例。
config.setMaxIdle(Integer.parseInt(maxIdle));
//表示当borrow(引入)一个jedis实例时,最大的等待时间,如果超过等待时间,则直接抛出JedisConnectionException;
config.setMaxWaitMillis(Long.parseLong(maxWaitMillis));
//在borrow一个jedis实例时,是否提前进行validate操作;如果为true,则得到的jedis实例均是可用的;
config.setTestOnBorrow(Boolean.valueOf(testOnBorrow));
jedisPool = new JedisPool(config, host, Integer.parseInt(port), Integer.parseInt(timeout));
}
private static Jedis getJedis() {
init();
return jedisPool.getResource();
}
private static final JedisUtil jedisUtil = new JedisUtil();
/**
* 获取JedisUtil实例
*
* @return
*/
public static JedisUtil getInstance() {
return jedisUtil;
}
public static void returnResource(final Jedis jedis) {
if (jedis != null && jedisPool != null) {
jedis.close();
}
}
public static Jedis getDefaultJedis() {
// return getJedis(HOST_IP, HOST_PORT);//简装版
return getJedis();
}
/**
* 根据 pattern 获取 redis 中的键
*/
public static Set<String> getKeysByPattern(String pattern) {
return getDefaultJedis().keys(pattern);
}
public static boolean exists(String key) throws Exception {
if (StringUtils.isEmpty(key)) {
throw new Exception("key is null");
}
return getDefaultJedis().exists(PREFIX + key);
}
public static void del(String key) throws Exception {
if (StringUtils.isEmpty(key)) {
logger.error("key is null");
throw new Exception("key is null");
}
getDefaultJedis().del(PREFIX + key);
}
public static void setString(String key, String value, int expireTime) throws Exception {
if (StringUtils.isEmpty(key)) {
logger.error("key is null");
throw new Exception("key is null");
}
String finalKey = PREFIX + key;
getDefaultJedis().set(finalKey, value);
if (expireTime > 0) {
/**
* 如果设置了 expireTime, 那么这个 finalKey会在expireTime秒后过期,那么该键会被自动删除
* 这一功能配合出色的性能让Redis可以作为缓存系统来使用,成为了缓存系统Memcached的有力竞争者
*/
getDefaultJedis().expire(finalKey, expireTime);
}
}
public static String getString(String key) throws Exception {
if (StringUtils.isEmpty(key)) {
logger.error("key is null");
throw new Exception("key is null");
}
return getDefaultJedis().get(PREFIX + key);
}
public static long setnx(String key, String value) throws Exception {
if (StringUtils.isEmpty(key)) {
logger.error("key is null");
throw new Exception("key is null");
}
return getDefaultJedis().setnx(PREFIX + key, value);
}
public static long expire(String key, int seconds) throws Exception {
if (StringUtils.isEmpty(key)) {
logger.error("key is null");
throw new Exception("key is null");
}
return getDefaultJedis().expire(PREFIX + key, seconds);
}
public static void pushList(String key, String value, String flag) throws Exception {
if (StringUtils.isEmpty(key) || StringUtils.isEmpty(flag)) {
logger.error("key or flag is null");
throw new Exception("key or flag is null");
}
/**
* key代表的是链表的名字 List是一个双端链表,lpush是往链表的头部插入一条数据,rpush是往尾部插入一条数据
*/
if (flag.equalsIgnoreCase("L")) {
getDefaultJedis().lpush(PREFIX + key, value);
} else if (flag.equalsIgnoreCase("R")) {
getDefaultJedis().rpush(PREFIX + key, value);
} else {
logger.error("unknown flag");
throw new Exception("unknown flag");
}
}
public static String popList(String key, String flag) throws Exception {
if (StringUtils.isEmpty(key) || StringUtils.isEmpty(flag)) {
logger.error("key or flag is null");
throw new Exception("key or flag is null");
}
if (flag.equalsIgnoreCase("L")) {
return getDefaultJedis().lpop(PREFIX + key);
} else if (flag.equalsIgnoreCase("R")) {
return getDefaultJedis().rpop(PREFIX + key);
} else {
logger.error("unknown flag");
throw new Exception("unknown flag");
}
}
/**
* 获取 List 中指定区间上的元素
*/
public static List<String> getAppointedList(String key, long start, long end) throws Exception {
if (StringUtils.isEmpty(key)) {
logger.error("key is null");
throw new Exception("key is null");
}
return getDefaultJedis().lrange(PREFIX + key, start, end);
}
public static List<String> getList(String key) throws Exception {
if (StringUtils.isEmpty(key)) {
logger.error("key is null");
throw new Exception("key is null");
}
return getDefaultJedis().lrange(PREFIX + key, 0, -1);
}
public static void sadd(String key,String value)throws Exception{
if (StringUtils.isEmpty(key)) {
logger.error("key is null");
throw new Exception("key is null");
}
getDefaultJedis().sadd(key,value);
}
public static boolean sismember(String key,String value)throws Exception{
if (StringUtils.isEmpty(key)) {
logger.error("key is null");
throw new Exception("key is null");
}
return getDefaultJedis().sismember(key,value);
}
}
package com.zzsn.cache;
import com.google.code.yanf4j.core.impl.StandardSocketOption;
import net.rubyeye.xmemcached.MemcachedClient;
import net.rubyeye.xmemcached.XMemcachedClientBuilder;
import net.rubyeye.xmemcached.command.BinaryCommandFactory;
import net.rubyeye.xmemcached.transcoders.SerializingTranscoder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.core.io.DefaultResourceLoader;
import org.springframework.core.io.Resource;
import java.io.IOException;
import java.io.InputStream;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
/**
* Memcached客户端
*/
public class Memcached {
private static Logger logger = LoggerFactory.getLogger(Memcached.class);
private static MemcachedClient client = null;
static {
logger.debug("memcached initialize...");
Properties prop = getConfig();
String server = prop.getProperty("memcached.server");
if (server == null || server.isEmpty()) {
throw new IllegalArgumentException("The property 'memcached.server' is not found in memcached.properties file!");
}
String[] servers = server.split(",");
int[] weights = new int[servers.length];
List<InetSocketAddress> addressList = new ArrayList<>(servers.length);
for (int i = 0; i < servers.length; i++) {
String[] addr = servers[i].split(":");
addressList.add(new InetSocketAddress(addr[0], Integer.parseInt(addr[1])));
String weight = prop.getProperty("memcached.server"+(i+1)+".weight");
if (weight == null || weight.isEmpty()) {
weights[i] = 1;
} else {
weights[i] = Integer.parseInt(weight);
}
}
XMemcachedClientBuilder builder = new XMemcachedClientBuilder(addressList, weights);
String poolSize = prop.getProperty("memcached.connectionPoolSize");
if (poolSize != null && !poolSize.isEmpty()) {
builder.setConnectionPoolSize(Integer.parseInt(poolSize));
}
String failureMode = prop.getProperty("memcached.failureMode");
if (failureMode != null && !failureMode.isEmpty()) {
builder.setFailureMode(Boolean.parseBoolean(failureMode));
}
String connTimeout = prop.getProperty("memcached.connectTimeout");
if (connTimeout != null && !connTimeout.isEmpty()) {
builder.setConnectTimeout(Integer.parseInt(connTimeout));
}
String opTimeout = prop.getProperty("memcached.opTimeout");
if (opTimeout != null && !opTimeout.isEmpty()) {
builder.setOpTimeout(Integer.parseInt(opTimeout));
}
String enableHealSession = prop.getProperty("memcached.enableHealSession");
if (enableHealSession != null && !enableHealSession.isEmpty()) {
builder.setEnableHealSession(Boolean.parseBoolean(enableHealSession));//启用或者禁止连接修复
}
String statistics = prop.getProperty("memcached.statistics");
if (statistics != null && !statistics.isEmpty()) {
builder.getConfiguration().setStatisticsServer(Boolean.parseBoolean(statistics));
}
String binary = prop.getProperty("memcached.binaryCommand");
if (binary != null && "true".equals(binary)) {
builder.setCommandFactory(new BinaryCommandFactory());
}
builder.setTranscoder(new SerializingTranscoder());
builder.setSocketOption(StandardSocketOption.SO_RCVBUF, 32* 1024);// 设置接收缓存区为32K,默认16K
builder.setSocketOption(StandardSocketOption.SO_SNDBUF,16 *1024); // 设置发送缓冲区为16K,默认为8K
builder.setSocketOption(StandardSocketOption.TCP_NODELAY,true); // 启用nagle算法,提高吞吐量,默认关闭
String sessionIdleTimeout = prop.getProperty("memcahced.sessionIdleTimeout");
if (sessionIdleTimeout != null && !sessionIdleTimeout.isEmpty()) {
builder.getConfiguration().setSessionIdleTimeout(Integer.parseInt(sessionIdleTimeout)*1000); // 如果连接超过x秒没有任何IO操作发生即认为空闲并发起心跳检测
}
try {
client = builder.build();
String optimizeMergeBuffer = prop.getProperty("memcached.optimizeMergeBuffer");
if (optimizeMergeBuffer != null && !optimizeMergeBuffer.isEmpty()) {
client.setOptimizeMergeBuffer(Boolean.parseBoolean(optimizeMergeBuffer));
}
String mergeFactor = prop.getProperty("memcached.mergeFactor");
if (mergeFactor != null && !optimizeMergeBuffer.isEmpty()) {
client.setMergeFactor(Integer.parseInt(mergeFactor));
}
logger.debug("memcached initialize completed!");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private Memcached() {
}
public static MemcachedClient getClient() {
return client;
}
public static void shutdown(MemcachedClient client) {
if (client != null) {
try {
client.shutdown();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static Properties getConfig() {
Properties properties = new Properties();
InputStream is = null;
String location = "memcached.properties";
try {
Resource resource = new DefaultResourceLoader().getResource(location);
is = resource.getInputStream();
properties.load(is);
logger.debug("memcached config: {}", properties.toString());
} catch (IOException ex) {
logger.error("Could not load property file:" + location, ex);
} finally {
try {
if (is != null) {
is.close();
}
} catch (IOException ioe) {
// ignore
}
}
return properties;
}
}
package com.zzsn.cache;
import net.rubyeye.xmemcached.KeyIterator;
import net.rubyeye.xmemcached.MemcachedClient;
import net.rubyeye.xmemcached.XMemcachedClientBuilder;
import net.rubyeye.xmemcached.exception.MemcachedException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicLong;
/**
* @Author Sugar
* @Version 2018/6/8 13:22
*/
public class MemcachedAdmin {
private static Logger logger = LoggerFactory.getLogger(MemcachedAdmin.class);
private static MemcachedClient client = null;
private MemcachedAdmin() {
}
private static MemcachedClient getClient() {
if (client != null) {
return client;
}
Properties prop = Memcached.getConfig();
String server = prop.getProperty("memcached.server");
if (server == null || server.isEmpty()) {
throw new IllegalArgumentException("The property 'memcached.server' is not found in memcached.properties file!");
}
String[] servers = server.split(",");
List<InetSocketAddress> addressList = new ArrayList<>(servers.length);
for (int i = 0; i < servers.length; i++) {
String[] addr = servers[i].split(":");
addressList.add(new InetSocketAddress(addr[0], Integer.parseInt(addr[1])));
}
XMemcachedClientBuilder builder = new XMemcachedClientBuilder(addressList);
try {
client = builder.build();
} catch (IOException e) {
e.printStackTrace();
}
return client;
}
@Deprecated
public static long deleteAll(String keyPrefix) {
AtomicLong count = new AtomicLong();
MemcachedClient client = getClient();
client.getAvailableServers().forEach(inet -> {
try {
KeyIterator iterator = client.getKeyIterator(inet);
while (iterator.hasNext()) {
String key = iterator.next();
if (key.startsWith(keyPrefix)) {
boolean result = client.delete(key);
long i = count.incrementAndGet();
if (logger.isDebugEnabled()) {
logger.debug("[{}] Delete key[{}]: {}={}", inet, i, key, result);
}
}
}
} catch (MemcachedException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
} catch (TimeoutException e) {
e.printStackTrace();
}
});
logger.info("Delete a total of {} keys starting with {}", count.get(), keyPrefix);
return count.get();
}
@Deprecated
public static List<String> getAllKey(String keyPrefix) {
MemcachedClient client = getClient();
List<String> keys = new ArrayList<>();
client.getAvailableServers().forEach(inet -> {
try {
KeyIterator iterator = client.getKeyIterator(inet);
while (iterator.hasNext()) {
String key = iterator.next();
if (key.startsWith(keyPrefix)) {
keys.add(key);
}
}
} catch (MemcachedException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
} catch (TimeoutException e) {
e.printStackTrace();
}
});
return keys;
}
/**
* 动态添加一台服务
* @param host
* @param port
* @return
*/
public static boolean addServer(String host, int port) {
return addServer(host, port, 1);
}
/**
* 动态添加一台服务
* @param host
* @param port
* @return
*/
public static boolean addServer(String host, int port, int weight) {
try {
Memcached.getClient().addServer(host, port, weight);
return true;
} catch (IOException e) {
e.printStackTrace();
return false;
}
}
}
package com.zzsn.cache;
import io.protostuff.*;
import io.protostuff.runtime.RuntimeSchema;
import org.springframework.objenesis.Objenesis;
import org.springframework.objenesis.ObjenesisStd;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* @Author Sugar
* @Version 2018/3/16 13:32
*/
public class ProtostuffUtil {
public static final Objenesis objenesis = new ObjenesisStd(true);
private static final String ERR_TRUNCATED_MESSAGE =
"While parsing a protocol message, the input ended unexpectedly " +
"in the middle of a field. This could mean either than the " +
"input has been truncated or that an embedded message " +
"misreported its own length.";
/**
* 序列化
*
* @param obj
* @return
*/
public static <T> byte[] serializer(T obj) {
LinkedBuffer buffer = LinkedBuffer.allocate(LinkedBuffer.DEFAULT_BUFFER_SIZE);
try {
Schema<T> schema = RuntimeSchema.getSchema((Class<T>) obj.getClass());
return ProtostuffIOUtil.toByteArray(obj, schema, buffer);
} catch (Exception e) {
throw new IllegalStateException("序列化对象失败:" + obj, e);
} finally {
buffer.clear();
}
}
/**
* 反序列化
*
* @param data
* @param clazz
* @return
*/
public static <T> T deserializer(byte[] data, Class<T> clazz) {
T obj = null;
try {
Schema<T> schema = RuntimeSchema.getSchema(clazz);
// obj = schema.newMessage();
obj = objenesis.newInstance(clazz);
ProtostuffIOUtil.mergeFrom(data, obj, schema);
} catch (Exception e) {
throw new IllegalStateException("反序列化对象失败:class=" + clazz + ", data=" + new String(data), e);
}
return obj;
}
public static <T> byte[] serializeList(List<T> list) {
@SuppressWarnings("unchecked")
Schema<T> schema = (Schema<T>) RuntimeSchema.getSchema(list.get(0).getClass());
LinkedBuffer buffer = LinkedBuffer.allocate(1024 * 1024);
ByteArrayOutputStream bos = null;
try {
bos = new ByteArrayOutputStream();
ProtostuffIOUtil.writeListTo(bos, list, schema, buffer);
return bos.toByteArray();
} catch (Exception e) {
throw new IllegalStateException("序列化对象列表失败:" + list, e);
} finally {
buffer.clear();
try {
if (bos != null) {
bos.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 反序列化对象列表
*
* @param data
* @param clazz
* @param <T>
* @return
*/
public static <T> List<T> deserializeList(byte[] data, Class<T> clazz) {
Schema<T> schema = RuntimeSchema.getSchema(clazz);
List<T> result = null;
try {
result = parseListFrom(new ByteArrayInputStream(data), schema, clazz);
} catch (IOException e) {
throw new IllegalStateException("反序列化对象列表失败:class=" + clazz + ", data=" + new String(data), e);
}
return result;
}
private static <T> List<T> parseListFrom(final InputStream in, final Schema<T> schema, Class<T> clazz)
throws IOException {
int size = in.read();
if (size == -1) {
return Collections.emptyList();
}
if (size > 0x7f) {
size = readRawVarint32(in, size);
}
final ArrayList<T> list = new ArrayList<T>(size);
final CodedInput input = new CodedInput(in, true);
for (int i = 0; i < size; i++) {
// final T message = schema.newMessage();
final T message = objenesis.newInstance(clazz);//使用objensis代替newInstance()
list.add(message);
schema.mergeFrom(input, message);
input.checkLastTagWas(0);
}
assert in.read() == -1;
return list;
}
/**
* Reads a varint from the input one byte at a time, so that it does not read any bytes after the end of the varint.
* If you simply wrapped the stream in a CodedInput and used readRawVarint32(InputStream) then you would
* probably end up reading past the end of the varint since CodedInput buffers its input.
*/
private static int readRawVarint32(final InputStream input, final int firstByte) throws IOException {
int result = firstByte & 0x7f;
int offset = 7;
for (; offset < 32; offset += 7) {
final int b = input.read();
if (b == -1) {
throw new ProtobufException(ERR_TRUNCATED_MESSAGE);
}
result |= (b & 0x7f) << offset;
if ((b & 0x80) == 0) {
return result;
}
}
// Keep reading up to 64 bits.
for (; offset < 64; offset += 7) {
final int b = input.read();
if (b == -1) {
throw new ProtobufException(ERR_TRUNCATED_MESSAGE);
}
if ((b & 0x80) == 0) {
return result;
}
}
throw new ProtobufException(
"CodedInput encountered a malformed varint.");
}
}
package com.zzsn.conf;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.web.servlet.MultipartAutoConfiguration;
import org.springframework.boot.web.servlet.MultipartConfigFactory;
import org.springframework.context.annotation.Bean;
import javax.servlet.MultipartConfigElement;
import java.io.File;
@SpringBootApplication(exclude = {MultipartAutoConfiguration.class})
public class SpringBootFileuploadApplication {
@Bean
MultipartConfigElement multipartConfigElement() {
MultipartConfigFactory factory = new MultipartConfigFactory();
String location = System.getProperty("user.dir") + "/data/tmp";
File tmpFile = new File(location);
if (!tmpFile.exists()) {
tmpFile.mkdirs();
}
factory.setLocation(location);
return factory.createMultipartConfig();
}
}
package com.zzsn.docinfo;
import java.io.Serializable;
import java.util.Map;
/**
* 数据接口文档
* 创建人:李东亮
* 创建时间:2016-4-6 下午3:44:17
* 公司 :郑州数能软件科技有限公司
* @version 1.0
*
*/
public class DocInfo implements Serializable{
private String contentType;
private Long orgId;
private Long sid;
//News:新闻,BBS:论坛,Blog:博客,MicroBlog:微博,WeChat:微信,Video:视频,Other:其他
private String sourceType;
private String lastModified;
private String charset;
private String sourceaddress;
private String lang;
private String title;
private String author;
private String publishDate;
private String origin;
private String keywords;
private String summary;
private String contentWithTag;
private String contentNoTag;
private String contentImgCvtTag;
private String fileDownLoadPath;
private Map<String,String> otherParams;
public Long getOrgId() {
return orgId;
}
public void setOrgId(Long orgId) {
this.orgId = orgId;
}
public String getContentType() {
return contentType;
}
public void setContentType(String contentType) {
this.contentType = contentType;
}
public Long getSid() {
return sid;
}
public void setSid(Long sid) {
this.sid = sid;
}
public String getSourceType() {
return sourceType;
}
public void setSourceType(String sourceType) {
this.sourceType = sourceType;
}
public String getLastModified() {
return lastModified;
}
public void setLastModified(String lastModified) {
this.lastModified = lastModified;
}
public String getCharset() {
return charset;
}
public void setCharset(String charset) {
this.charset = charset;
}
public String getSourceaddress() {
return sourceaddress;
}
public void setSourceaddress(String sourceaddress) {
this.sourceaddress = sourceaddress;
}
public String getLang() {
return lang;
}
public void setLang(String lang) {
this.lang = lang;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public String getPublishDate() {
return publishDate;
}
public void setPublishDate(String publishDate) {
this.publishDate = publishDate;
}
public String getOrigin() {
return origin;
}
public void setOrigin(String origin) {
this.origin = origin;
}
public String getKeywords() {
return keywords;
}
public void setKeywords(String keywords) {
this.keywords = keywords;
}
public String getSummary() {
return summary;
}
public void setSummary(String summary) {
this.summary = summary;
}
public String getContentWithTag() {
return contentWithTag;
}
public void setContentWithTag(String contentWithTag) {
this.contentWithTag = contentWithTag;
}
public String getContentNoTag() {
return contentNoTag;
}
public void setContentNoTag(String contentNoTag) {
this.contentNoTag = contentNoTag;
}
public Map<String, String> getOtherParams() {
return otherParams;
}
public void setOtherParams(Map<String, String> otherParams) {
this.otherParams = otherParams;
}
public String getFileDownLoadPath() {
return fileDownLoadPath;
}
public void setFileDownLoadPath(String fileDownLoadPath) {
this.fileDownLoadPath = fileDownLoadPath;
}
public String getContentImgCvtTag() {
return contentImgCvtTag;
}
public void setContentImgCvtTag(String contentImgCvtTag) {
this.contentImgCvtTag = contentImgCvtTag;
}
}
package com.zzsn.entity;
import java.io.Serializable;
public class SiteTemplate implements Serializable {
private static final long serialVersionUID = 1L;
String sourceId;
String siteName;
String siteHost;
String siteLang;
String matchUrl;
String matchTitle;
String matchSummary;
String matchContent;
String matchPublishDate;
String matchOrigin;
String matchAuthor;
String excloume1;
String excloume2;
public SiteTemplate() {
super();
}
public SiteTemplate(String siteName, String siteHost, String siteLang, String matchUrl, String matchTitle,
String matchSummary, String matchContent, String matchPublishDate, String matchOrigin, String matchAuthor,
String excloume1, String excloume2) {
super();
this.siteName = siteName;
this.siteHost = siteHost;
this.siteLang = siteLang;
this.matchUrl = matchUrl;
this.matchTitle = matchTitle;
this.matchSummary = matchSummary;
this.matchContent = matchContent;
this.matchPublishDate = matchPublishDate;
this.matchOrigin = matchOrigin;
this.matchAuthor = matchAuthor;
this.excloume1 = excloume1;
this.excloume2 = excloume2;
}
public String getSourceId() {
return sourceId;
}
public void setSourceId(String sourceId) {
this.sourceId = sourceId;
}
public String getSiteName() {
return siteName;
}
public void setSiteName(String siteName) {
this.siteName = siteName;
}
public String getSiteHost() {
return siteHost;
}
public void setSiteHost(String siteHost) {
this.siteHost = siteHost;
}
public String getSiteLang() {
return siteLang;
}
public void setSiteLang(String siteLang) {
this.siteLang = siteLang;
}
public String getMatchUrl() {
return matchUrl;
}
public void setMatchUrl(String matchUrl) {
this.matchUrl = matchUrl;
}
public String getMatchTitle() {
return matchTitle;
}
public void setMatchTitle(String matchTitle) {
this.matchTitle = matchTitle;
}
public String getMatchSummary() {
return matchSummary;
}
public void setMatchSummary(String matchSummary) {
this.matchSummary = matchSummary;
}
public String getMatchContent() {
return matchContent;
}
public void setMatchContent(String matchContent) {
this.matchContent = matchContent;
}
public String getMatchPublishDate() {
return matchPublishDate;
}
public void setMatchPublishDate(String matchPublishDate) {
this.matchPublishDate = matchPublishDate;
}
public String getMatchOrigin() {
return matchOrigin;
}
public void setMatchOrigin(String matchOrigin) {
this.matchOrigin = matchOrigin;
}
public String getMatchAuthor() {
return matchAuthor;
}
public void setMatchAuthor(String matchAuthor) {
this.matchAuthor = matchAuthor;
}
@Override
public String toString() {
return "SiteTemplate [sourceId=" + sourceId + ", siteName=" + siteName + ", siteHost=" + siteHost
+ ", siteLang=" + siteLang + ", matchUrl=" + matchUrl + ", matchTitle=" + matchTitle + ", matchSummary="
+ matchSummary + ", matchContent=" + matchContent + ", matchPublishDate=" + matchPublishDate
+ ", matchOrigin=" + matchOrigin + ", matchAuthor=" + matchAuthor + ", excloume1=" + excloume1
+ ", excloume2=" + excloume2 + "]";
}
}
package com.zzsn.job;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
/**
* 阻塞线程池
* 线程池的线程数到达最大线程数阻塞等待
* 可用于多线程获取MQ消息任务
* 因为会阻塞,就不用考虑拒绝策略这一块的重写
*/
public class BlockThreadPoolExecute extends ThreadPoolExecutor {
private ReentrantLock lock = new ReentrantLock();
private Condition condition = this.lock.newCondition();
public BlockThreadPoolExecute(int corePoolSize, int maximumPoolSize, long keepAliveTime, TimeUnit unit, BlockingQueue<Runnable> workQueue) {
super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue);
}
@Override
public void execute(Runnable command) {
//进行同步锁定
this.lock.lock();
super.execute(command);
try {
//如果线程池的数量已经达到最大线程池的数量,则进行挂起操作
if (getPoolSize() == getMaximumPoolSize()) {
this.condition.await();
}
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
this.lock.unlock();
}
}
@Override
protected void afterExecute(Runnable r, Throwable t) {
try{
lock.lock();
this.condition.signal();
}finally {
this.lock.unlock();
}
}
}
\ No newline at end of file
package com.zzsn.job;
import com.google.gson.Gson;
import com.zzsn.search.MetaSearchThread;
import com.zzsn.search.entity.KeywordMsg;
import com.zzsn.utility.index.Constants;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.CommonClientConfigs;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.util.Arrays;
import java.util.Properties;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
@Component
@EnableScheduling
@Slf4j
public class KafkaConsumerJob {
private static KafkaConsumer<String, String> createConsumer() {
Properties properties = new Properties();
System.out.println(Constants.KAFKA_CONSUMER_SERVERS);
properties.put(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, Constants.KAFKA_CONSUMER_SERVERS);
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
properties.put(ConsumerConfig.GROUP_ID_CONFIG, Constants.KAFKA_CONSUMER_GROUP_ID);
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
//kafka数据的读取方式
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,Constants.KAFKA_CONSUMER_AUTO_OFFSET_RESET);
// latest earliest
//时间间隔设置为1h
properties.put("max.poll.interval.ms", 60*60*1000);
properties.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, 1);
return new KafkaConsumer<>(properties);
}
public static final ExecutorService poolExecuter = new BlockThreadPoolExecute(3
, 6 , 0, TimeUnit.SECONDS, new ArrayBlockingQueue<>(1));
@Scheduled(cron = "0 0/2 * * * ?")
public void consumer (){
log.info("定时获取mq消息");
//1.创建消费者
KafkaConsumer<String, String> consumer = createConsumer();
consumer.subscribe(Arrays.asList(Constants.KAFKA_CONSUMER_TOPIC));
try{
while(true){
//消费者是一个长期运行的程序,通过持续轮询向Kafka请求数据。在其他线程中调用consumer.wakeup()可以退出循环
//在0ms内等待Kafka的broker返回数据.超时参数指定poll在多久之后可以返回,不管有没有可用的数据都要返回
ConsumerRecords<String, String> records = consumer.poll(0);
consumer.commitSync();
for(ConsumerRecord record : records){
KeywordMsg keywordMsg = new Gson().fromJson(record.value().toString(), KeywordMsg.class);
MetaSearchThread metaSearchThread=new MetaSearchThread();
metaSearchThread.keywordMsg=keywordMsg;
//创建使用固定线程数的线程池
// metaSearchThread.start();
// TimeUnit.SECONDS.sleep(120);
poolExecuter.execute(metaSearchThread);
}
}
}catch (Exception e){
consumer = createConsumer();
consumer.subscribe(Arrays.asList(Constants.KAFKA_CONSUMER_TOPIC));
}
}
// @Scheduled(cron = "0 0/2 * * * ?")
// public void consumer (){
// log.info("定时获取mq消息");
// //1.创建消费者
// KafkaConsumer<String, String> consumer = createConsumer();
// consumer.subscribe(Arrays.asList(Constants.KAFKA_CONSUMER_TOPIC));
//
// try{
// while(true){
// //消费者是一个长期运行的程序,通过持续轮询向Kafka请求数据。在其他线程中调用consumer.wakeup()可以退出循环
// //在0ms内等待Kafka的broker返回数据.超时参数指定poll在多久之后可以返回,不管有没有可用的数据都要返回
// ConsumerRecords<String, String> records = consumer.poll(0);
// consumer.commitSync();
// for(ConsumerRecord record : records){
// KeywordMsg keywordMsg = new Gson().fromJson(record.value().toString(), KeywordMsg.class);
// MetaSearchThread metaSearchThread=new MetaSearchThread();
// metaSearchThread.keywordMsg=keywordMsg;
// //创建使用固定线程数的线程池
//// metaSearchThread.start();
// poolExecuter.execute(()->{
// String threadName= Thread.currentThread().getName();
// log.info(threadName+"开始执行");
// try {
// metaSearchThread.start();
// TimeUnit.SECONDS.sleep(120);
// } catch (InterruptedException e) {
// e.printStackTrace();
// }
// log.info(threadName+"执行结束");
// });
// }
// }
// }catch (Exception e){
// consumer = createConsumer();
// consumer.subscribe(Arrays.asList(Constants.KAFKA_CONSUMER_TOPIC));
// }
//
// }
// private static KafkaConsumer<String, String> create2Consumer() {
// Properties props = new Properties();
// // 必须设置的属性
// props.put("bootstrap.servers", "114.115.159.144:9092");
// props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
// props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
// props.put("group.id", "group1");
// // 可选设置属性
// props.put("enable.auto.commit", "true");
// // 自动提交offset,每1s提交一次
// props.put("auto.commit.interval.ms", "1000");
// props.put("auto.offset.reset","earliest ");
// props.put("client.id", "es-sync");
// return new KafkaConsumer<>(props);
// }
}
package com.zzsn.job;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.serialization.StringSerializer;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.kafka.annotation.EnableKafka;
import org.springframework.kafka.core.DefaultKafkaProducerFactory;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.kafka.core.ProducerFactory;
import java.util.HashMap;
import java.util.Map;
/**
* kafka生产者配置
* @author Java小白
*
*/
@Configuration
@EnableKafka
public class KafkaProducerConfig {
@Value("${kafka.producer.servers}")
private String servers;
@Value("${kafka.producer.retries}")
private int retries;
@Value("${kafka.producer.batch.size}")
private int batchSize;
@Value("${kafka.producer.linger}")
private int linger;
@Value("${kafka.producer.buffer.memory}")
private int bufferMemory;
/**
* 配置生产者信息(消费提供者信息)
* @return
*/
public Map<String, Object> producerConfigs() {
Map<String, Object> props = new HashMap<>();
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, servers);
props.put(ProducerConfig.RETRIES_CONFIG, retries);
props.put(ProducerConfig.BATCH_SIZE_CONFIG, batchSize);
props.put(ProducerConfig.LINGER_MS_CONFIG, linger);
props.put(ProducerConfig.BUFFER_MEMORY_CONFIG, bufferMemory);
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
return props;
}
/**
* 消费工厂
* @return
*/
public ProducerFactory<String, String> producerFactory() {
return new DefaultKafkaProducerFactory<>(producerConfigs());
}
/**
* 消息发送工具类
* @return
*/
@Bean
public KafkaTemplate<String, String> kafkaTemplate() {
//需要指定消费工厂
return new KafkaTemplate<String, String>(producerFactory());
}
}
\ No newline at end of file
package com.zzsn.job;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
public class PropertyUtil {
//加载property文件到io流里面
public static Properties loadProperties(String propertyFile) {
Properties properties = new Properties();
try {
InputStream is = PropertyUtil.class.getClassLoader().getResourceAsStream(propertyFile);
if(is == null){
is = PropertyUtil.class.getClassLoader().getResourceAsStream("properties/" + propertyFile);
}
properties.load(is);
} catch (IOException e) {
e.printStackTrace();
}
return properties;
}
/**
* 根据key值取得对应的value值
*
* @param key
* @return
*/
public static String getValue(String propertyFile, String key) {
Properties properties = loadProperties(propertyFile);
return properties.getProperty(key);
}
}
package com.zzsn.paser;
import com.zzsn.utility.index.Constants;
import org.openqa.selenium.By;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintStream;
public class SeleniumTime {
public ChromeOptions chromeOptions =new ChromeOptions() ;
public ChromeDriver driver;
public SeleniumTime(){
// System.setProperty("webdriver.chrome.driver", "E:\\cmd\\chromedriver.exe");
// System.setProperty("webdriver.chrome.driver", "D:\\cmdvip\\chromedriver.exe");
// System.setProperty("webdriver.chrome.driver", "E:\\chrome\\chromedriver.exe");
System.setProperty("webdriver.chrome.driver", Constants.CHROMEDRIVE);
chromeOptions.addArguments("blink-settings=imagesEnabled=false");
// chromeOptions.addArguments("--headless");
driver = new ChromeDriver(chromeOptions);
}
/**
* 根据网址获取网页html信息
* @param url
* @return
*/
public String getScopehtml(String url){
//=====================================================================================================
// ChromeOptions chromeOptions =new ChromeOptions();
//// System.setProperty("webdriver.chrome.driver", Constants.CHROMEDRIVE);
// System.setProperty("webdriver.chrome.driver", "D:\\project\\cmd\\chromedriver.exe");
// //System.setProperty("webdriver.chrome.bin", "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe");
// //chromeOptions.setBinary("C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe");
// //C:\Users\Administrator\AppData\Local\Google\Chrome\Application\chrome.exe
// //C:\Program Files (x86)\Google\Chrome\Application\chrome.exe
// //chromeOptions.addArguments("--headless");
// ChromeDriver driver = new ChromeDriver(chromeOptions);
//=====================================================================================================
try{
driver.get(url);
WebElement webElement = driver.findElement(By.xpath("/html"));
try{
Thread.sleep(3000l);
String html = webElement.getAttribute("outerHTML");
Thread.sleep(5000l);
driver.quit();
// System.out.println(html);
if(url.contains("http://www.flw.ph")){
String a = "<div class=\"attach_nopermission attach_tips\">";
String b = "<span class=\"atips_close\" onclick=\"this.parentNode.style.display='none'\">x</span>";
if(html.contains(a)&&html.contains(b)){
String[] split = html.split(a);
String sa = split[0];
String[] split2 = split[1].split(b);
String sb = split2[1];
String substring = sb.substring(7);
String sab = sa + substring ;
return sab;
}
}
return html;
}catch(Exception e){
System.out.println("动态爬取方式一出现+"+"org.openqa.selenium.StaleElementReferenceException异常"
+"可能原因为过快的执行没有找到指定的页面元素");
System.out.println("=============执行方法二==============");
Thread.sleep(3000l);
String html = driver.getPageSource();
Thread.sleep(5000l);
driver.quit();
if(url.contains("http://www.flw.ph")){
String a = "<div class=\"attach_nopermission attach_tips\">";
String b = "<span class=\"atips_close\" onclick=\"this.parentNode.style.display='none'\">x</span>";
if(html.contains(a)&&html.contains(b)){
String[] split = html.split(a);
String sa = split[0];
String[] split2 = split[1].split(b);
String sb = split2[1];
String substring = sb.substring(7);
String sab = sa + substring ;
return sab;
}
}
return html;
}
// Thread.sleep(3000l);
// String source = driver.getPageSource();
// //if(source.length()!=0){
// driver.quit();
// return source;
//}
// String html = webElement.getAttribute("outerHTML");
// //System.out.println(html);
// driver.quit();
// return html;
//==========================================================================
// driver.get(url);
// // 休眠1s,为了让js执行完
// Thread.sleep(1000l);
// // 网页源码
// String source = driver.getPageSource();
// System.out.println("进入SeleniumTime中的getScopehtml方法获取相应的html");
// driver.quit();
// return source;
}catch(Exception e){
try {
Thread.sleep(5000l);
} catch (InterruptedException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
driver.quit();
e.printStackTrace();
}
try {
Thread.sleep(5000l);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
driver.quit();
return null;
}
public static void main(String[] args) {
//去除html中的相关标签
/**
* 网上大多是说明直接使用正则表达式不能很好的适用于html
* 经过尝试我无法删除先关div中内容,只能自己通过字符串切割的形式获取
*/
SeleniumTime s = new SeleniumTime();
String scopehtml = s.getScopehtml("http://www.flw.ph/thread-869016-1-1.html");
String a = "<div class=\"attach_nopermission attach_tips\">";
String b = "<span class=\"atips_close\" onclick=\"this.parentNode.style.display='none'\">x</span>";
System.out.println("开始");
if(scopehtml.contains(a)){
System.out.println("包含a");
}
if(scopehtml.contains(a)){
System.out.println("包含b");
}
System.out.println("结束");
String[] split = scopehtml.split(a);
String sa = split[0];
System.out.println("首次截取的长度"+split.length);
String[] split2 = split[1].split(b);
String sb = split2[1];
String substring = sb.substring(7);
System.out.println("再次截取的长度"+split2.length);
String sab = sa + substring ;
// //解决方式 正则匹配删除标签
// // *.div[class="t_fsz"]
// String regex = "<div class=\"attach_nopermission attach_tips\">(.*?)</div>";
// //String regex = "<div.*?>(.*?)</div>";
// //String regex = "*.div[class="+"attach_nopermission attach_tips"+"]";
//
//// boolean isMatch = regex.matches(scopehtml);
//// System.out.println("字符串中是否包含了 'runoob' 子字符串? " + isMatch);
////
// // 创建 Pattern 对象
// Pattern r = Pattern.compile(regex);
//
// // 现在创建 matcher 对象
// Matcher m = r.matcher(scopehtml);
// if (m.find( )) {
// System.out.println("Found value: " + m.group(0) );
// System.out.println("Found value: " + m.group(1) );
// System.out.println("Found value: " + m.group(2) );
// System.out.println("Found value: " + m.group(3) );
// } else {
// System.out.println("NO MATCH");
// }
//
//
File file = new File("D:/123.txt");
try {
PrintStream ps = new PrintStream(new FileOutputStream(file));
ps.println(sab);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
package com.zzsn.paser;
import com.zzsn.cache.MemcachedUtils;
import com.zzsn.docinfo.DocInfo;
import com.zzsn.entity.Site;
import com.zzsn.entity.SiteTemplate;
import com.zzsn.search.extractor.ContentFileFinder;
import com.zzsn.search.extractor.DefaultMsg;
import com.zzsn.utility.model.CatchWebByMetaSearch;
import com.zzsn.utility.util.DateUtil;
import com.zzsn.utility.util.Utility;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class SourceTemplateByTag {
// 从缓存中获取对应的模板
public SiteTemplate getSiteTemp(String infourl){
SiteTemplate sTemplate=new SiteTemplate();
try {
String domain=new URL(infourl).getHost();
Site site=(Site) MemcachedUtils.get("domainUri_"+domain);
if (null != site.getMatchTitle()) {
sTemplate.setMatchTitle(site.getMatchTitle());
}
if (null != site.getMatchSummary()) {
sTemplate.setMatchSummary(site.getMatchSummary());
}
if (null != site.getMatchContent()) {
sTemplate.setMatchContent(site.getMatchContent());
}
if (null != site.getMatchTitle()) {
sTemplate.setMatchAuthor(site.getMatchAuthor());
}
if (null != site.getMatchOrigin()) {
sTemplate.setMatchOrigin(site.getMatchOrigin());
}
if (null != site.getMatchPublishDate()) {
sTemplate.setMatchPublishDate(site.getMatchPublishDate());
}
}catch (Exception e){
return null;
}
return sTemplate;
}
static List<Site> sList=new ArrayList<Site>();
//保存有问题的站点
public static void saveNoTempSite( CatchWebByMetaSearch cwbm ){
try {
String infourl = cwbm.getSourceaddress();
String domainurl = new URL(infourl).getHost();
Site site = new Site();
site.setDomainUri(domainurl);
site.setUri(infourl);
site.setName(cwbm.getSourcesite());
Object cacheObj=MemcachedUtils.get("tempSite");
if (null==cacheObj ) {
List<Site> s2List=new ArrayList<Site>();
sList=s2List;
}
sList.add(site);
MemcachedUtils.set("tempSite", sList,60*60*24);
System.out.println("保存成功!!");
}catch (Exception e){
e.printStackTrace();
System.out.println("保存失败");
}
}
public static DocInfo doPaserByTag(String htmlContent, DocInfo docInfo, SiteTemplate siteTemplate){
DefaultMsg dm = new DefaultMsg();
Document doc = Jsoup.parse(htmlContent);
System.out.println("===========doPaserByTag");
if(null!=siteTemplate.getMatchTitle()&&siteTemplate.getMatchTitle().length()>0) {
//标题
String title =paseElementByCSS(doc,siteTemplate.getMatchTitle());
if (StringUtils.isNotEmpty(title)) {
docInfo.setTitle(title.replace("...", ""));
}
}
if(null!=siteTemplate.getMatchContent()&&siteTemplate.getMatchContent().length()>0) {
Elements elementsByTag = doc.select(siteTemplate.getMatchContent());
String contentWithTag = Utility.RemoveUselessHTMLTagX(elementsByTag.html());
System.out.println("==========="+elementsByTag);
// String contentWithTag =paseElementByCSS(doc,siteTemplate.getMatchContent());
if (contentWithTag == null || contentWithTag.trim().length() == 0) {
return docInfo;
}
docInfo.setContentWithTag(ContentFileFinder.rmHtmlImgOrAtag(contentWithTag));
docInfo.setContentNoTag(Utility.TransferHTML2Text(contentWithTag).replaceAll("\\n",""));
}
if(null!=siteTemplate.getMatchAuthor()&&siteTemplate.getMatchAuthor().length()>0) {
String author=paseElementByCSS(doc,siteTemplate.getMatchAuthor());
if(author.length()>0) {
docInfo.setAuthor(author);
}
}
if(null!=siteTemplate.getMatchPublishDate()&&siteTemplate.getMatchPublishDate().length()>0) {
String publishDate=paseElementByCSS(doc,siteTemplate.getMatchPublishDate());
if(publishDate.length()>0) {
docInfo.setPublishDate(DateUtil.getPublishDate(publishDate));
}
}
if(null!=siteTemplate.getMatchSummary()&&siteTemplate.getMatchSummary().length()>0) {
String summary=paseElementByCSS(doc,siteTemplate.getMatchSummary());
if(summary.length()>0) {
docInfo.setSummary(summary);
}
}
if(null!=siteTemplate.getMatchOrigin()&&siteTemplate.getMatchOrigin().length()>0) {
String origin=paseElementByCSS(doc,siteTemplate.getMatchOrigin());
if(origin.length()>0) {
docInfo.setOrigin(origin);
}
}
// this.buildProcessItem(docInfo);
return docInfo;
}
public static String paseElementByCSS(Document doc,String tag){
String msg="";
try {
Elements elements = doc.select(tag);
if (elements.size() > 0) {
msg = elements.get(0).text().trim();
}
}catch (Exception e){
e.printStackTrace();
}finally {
return msg;
}
// return msg;
}
}
package com.zzsn.search;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Vector;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
/**
* 百度搜索
* 1.根据关键词请求进入页面,
* 2.将抓取到的内容信息保存到本地数据库表
*/
public class BaiduSearch {
public static void main(String[] args) throws IOException {
// String filepath=args[0];
String filepath="E:\\crawlerWorkspace\\metaSearch\\data\\project.txt";
File f = new File(filepath);
List<String> allLines = FileUtil.getFileLines(f, "utf-8");
BaiduSearchThread baiduSearchThread = new BaiduSearchThread();
paser(allLines);
}
public static void paser(List<String> keywords){
List<List<String>> splitList = splitList(keywords,5000);
ExecutorService threadPool = Executors.newFixedThreadPool(1);
Vector<BaiduSearchThread> workers = new Vector<BaiduSearchThread>();
int index = 0;
try {
for (List<String> keywordList : splitList) {
// BaiduSearchThread worker = new BaiduSearchThread();
BaiduSearchThread baiduSearchThread = new BaiduSearchThread();
baiduSearchThread.setThreadId(index++);
baiduSearchThread.setKeywords(keywordList);
workers.add(baiduSearchThread);
threadPool.execute(baiduSearchThread);
Thread.sleep(1000);
}
}catch (Exception e){
System.out.println(e.getMessage());
}
threadPool.shutdown();
while (true) {
boolean isfinished = threadPool.isTerminated();
if (isfinished)
break;
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
System.out.println(e.getMessage());
}
}
}
// 对list进行分割
public static <T> List<List<T>> splitList(List<T> list, int len) {
if (list == null || list.size() == 0 || len < 1) {
return null;
}
List<List<T>> result = new ArrayList<List<T>>();
int size = list.size();
int count = (size + len - 1) / len;
for (int i = 0; i < count; i++) {
List<T> subList = list.subList(i * len, ((i + 1) * len > size ? size : len * (i + 1)));
result.add(subList);
}
return result;
}
}
package com.zzsn.search;
import java.io.*;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
public class FileUtil {
public static List<String> getFileLines(File file, String encoding)
throws IOException
{
List<String> lines = new ArrayList<String>();
if (encoding == null)
{
encoding = "utf-8";
}
FileInputStream stream = new FileInputStream(file);
InputStreamReader reader = new InputStreamReader(stream, encoding);
BufferedReader bufferedReader = new BufferedReader(reader);
String line = null;
while ((line = bufferedReader.readLine()) != null)
{
lines.add(line);
}
bufferedReader.close();
reader.close();
stream.close();
return lines;
}
//导出到csv文件
public static void array2CSV(ArrayList<ArrayList<String>> data, String path)
{
try {
BufferedWriter out =new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path),"UTF-8"));
for (int i = 0; i < data.size(); i++)
{
ArrayList<String> onerow=data.get(i);
for (int j = 0; j < onerow.size(); j++)
{
out.write(DelQuota(onerow.get(j)));
out.write("|");
}
out.newLine();
}
out.flush();
out.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public static String DelQuota(String str)
{
String result = str;
// String[] strQuota = { "~", "!", "@", "#", "$", "%", "^", "&", "*", "(", ")", "`", ";", "'", ",", ".", "/", ":", "/,", "<", ">", "?" };
String[] strQuota = { "|" };
for (int i = 0; i < strQuota.length; i++)
{
if (result.indexOf(strQuota[i]) > -1)
result = result.replace(strQuota[i], "");
}
return result;
}
public static void outMsg(String fileName, String content) {
File files = new File(fileName);
FileWriter writer = null;
try {
if (!files.exists()) {
files.createNewFile();
}
// 打开一个写文件器,构造函数中的第二个参数true表示以追加形式写文件
writer = new FileWriter(files, true);
writer.write(content);
writer.write("\n");
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(writer != null){
writer.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static String getFileBody(File file, String encoding)
{
StringBuffer buffer = new StringBuffer();
try {
List<String> lines = getFileLines(file, encoding);
for (String line : lines)
{
buffer.append(line);
buffer.append("\n");
}
} catch (IOException e) {
e.printStackTrace();
}
return buffer.toString();
}
public static String getFileBody(String path, String encoding)
{
File file = new File(path);
if (!file.exists())
{
return null;
}
return getFileBody(file, encoding);
}
public static void mergeFile(String dir, int depth,
String outPath, String encoding)
{
File dirFile = new File(dir);
List<File> filesForMerge = collectFiles(dirFile, depth, -1);
StringBuffer buffer = new StringBuffer();
for (File file : filesForMerge)
{
String temp = getFileBody(file, encoding);
buffer.append(temp);
buffer.append("\n");
}
try {
PrintWriter pw = new PrintWriter(outPath);
pw.write(buffer.toString());
pw.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static List<File> collectFiles(File dirFile, int depth, int sampleNum)
{
List<File> filesForMerge = new ArrayList<File>();
if (1 == depth)
{
File[] subFiles = dirFile.listFiles();
for (File file : subFiles)
{
if (file.isDirectory())
{
continue;
}
filesForMerge.add(file);
}
filesForMerge = sampleFiles(filesForMerge, sampleNum);
}
else
{
File[] subFiles = dirFile.listFiles();
for (File file : subFiles)
{
if (!file.isDirectory())
{
continue;
}
List<File> files = collectFiles(file, depth-1, sampleNum);
if (files.size() > 0)
{
filesForMerge.addAll(files);
}
}
}
return filesForMerge;
}
private static List<File> sampleFiles(List<File> origFiles, int sampleNum)
{
List<File> sampleFiles = new ArrayList<File>();
int totalSize = origFiles.size();
if (sampleNum >= 0 && totalSize > sampleNum)
{
if (sampleNum < totalSize / 10)
{
sampleNum = totalSize / 10;
}
Random random = new Random();
int count = 0;
while (count < sampleNum)
{
int pos = random.nextInt(totalSize);
File nextFile = origFiles.get(pos);
if (sampleFiles.contains(nextFile))
{
continue;
}
sampleFiles.add(nextFile);
count ++;
}
return sampleFiles;
}
return origFiles;
}
/**
* eg: cmread-books\dushi\16
* */
public static String getDifferPath(File subFile, File ancestorFile)
{
File parentFile = subFile.getParentFile();
String dirpath = "";
while (!parentFile.getPath().equalsIgnoreCase(ancestorFile.getPath()))
{
dirpath = String.format("%s\\%s", parentFile.getName(), dirpath);
parentFile = parentFile.getParentFile();
}
return dirpath;
}
public static void makeDir(String path)
{
File dirFile = new File(path);
if (!dirFile.exists())
{
dirFile.mkdirs();
}
}
/**
* find file/dir toFindFileName from ancestors of searchDirName
* */
public static File findDecestorFile(File searchDir, String toFindFileName)
{
if (searchDir.getName().equalsIgnoreCase(toFindFileName))
{
return searchDir;
}
if (searchDir.isDirectory())
{
File[] subFiles = searchDir.listFiles();
for (File subFile : subFiles)
{
File temp = findDecestorFile(subFile, toFindFileName);
if (temp != null)
{
return temp;
}
}
}
return null;
}
public static File findFile(String fileName, File parentFile, int compType)
{
File[] files = parentFile.listFiles();
switch (compType) {
case 0: //equal
for (File file : files)
{
if (file.getName().equalsIgnoreCase(fileName))
{
return file;
}
}
break;
case 1: //start with
for (File file : files)
{
if (file.getName().toLowerCase().startsWith(fileName.toLowerCase()))
{
return file;
}
}
break;
case 2: //contains
for (File file : files)
{
if (file.getName().toLowerCase().indexOf(fileName.toLowerCase()) != -1)
{
return file;
}
}
break;
default:
break;
}
return null;
}
private static final char BOM = '\uFEFF';
public static ArrayList<String> readTxtFile(String fileName,String encoding) {
ArrayList<String> rtn = new ArrayList<String>();
BufferedReader reader = null;
try {
reader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), encoding));
String line = null;
boolean isFirstLine = true;
while ((line = reader.readLine()) != null) {
if (isFirstLine && line.length() > 0 && line.charAt(0) == BOM) {
line = line.substring(1);
isFirstLine = false;
}
rtn.add(line);
}
} catch (Exception e) {
String errMsg = String.format("Exception while reading file: %s", fileName, e.getMessage());
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
String errMsg = String.format("Exception while closing file [%s]: %s", fileName, e.getMessage());
e.printStackTrace();
}
}
}
return rtn;
}
public static void copyFile(File source, File dest) throws IOException {
FileChannel inputChannel = null;
FileChannel outputChannel = null;
try {
inputChannel = new FileInputStream(source).getChannel();
outputChannel = new FileOutputStream(dest).getChannel();
outputChannel.transferFrom(inputChannel, 0, inputChannel.size());
} finally {
inputChannel.close();
outputChannel.close();
}
}
public static List<String> findFiles(String ph) {
List<String> filesList = new ArrayList<String>();
File file = new File(ph);
if (!file.exists()) {
return null;
}
File tempFile;
File[] files = file.listFiles();
for (int i = 0; i < files.length; i++) {
tempFile = files[i];
if (tempFile.isFile()) {
filesList.add(tempFile.getName().toString());
}
}
return filesList;
}
}
package com.zzsn.search.db;
import java.sql.Connection;
import java.sql.SQLException;
/**
* 数据库连接层MYSQL
* @author Administrator
*
*/
public class DBConnection {
/**
* 连接数据库
* @return
*/
public static Connection getDBConnection()
{
// 1. 注册驱动
try {
Class.forName("com.mysql.jdbc.Driver");
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// 获取数据库的连接
try {
Connection conn = java.sql.DriverManager.getConnection("jdbc:mysql://localhost:3306/clb_project?useUnicode=true&characterEncoding=utf8", "root", "root");
return conn;
} catch (SQLException e1) {
e1.printStackTrace();
}
return null;
}
}
\ No newline at end of file
package com.zzsn.search.db;
import java.sql.*;
import java.util.ArrayList;
import java.util.HashMap;
/**
* MYSQL数据库底层封装
* @author Administrator
*
*/
public class DBManager {
private PreparedStatement pstmt;
private Connection conn;
private ResultSet rs;
/**
* 打开数据库
*/
public DBManager() {
conn = DBConnection.getDBConnection();
}
/**
* 执行修改添加操作
* @param coulmn
* @param type
* @param sql
* @return
* @throws SQLException
*/
public boolean updateOrAdd(String[] coulmn, int[] type, String sql) throws SQLException
{
if(!setPstmtParam(coulmn, type, sql))
return false;
boolean flag = pstmt.executeUpdate()>0?true:false;
closeDB();
return flag;
}
/**
* 获取查询结果集
* @param coulmn
* @param type
* @param sql
* @throws SQLException
*/
public DataTable getResultData(String[] coulmn, int[] type, String sql) throws SQLException
{
DataTable dt = new DataTable();
ArrayList<HashMap<String, String>>list = new ArrayList<HashMap<String, String>>();
if(!setPstmtParam(coulmn, type, sql))
return null;
rs = pstmt.executeQuery();
ResultSetMetaData rsmd = rs.getMetaData();//取数据库的列名
int numberOfColumns = rsmd.getColumnCount();
while(rs.next())
{
HashMap<String, String> rsTree = new HashMap<String, String>();
for(int r=1;r<numberOfColumns+1;r++)
{
rsTree.put(rsmd.getColumnName(r),rs.getObject(r).toString());
}
list.add(rsTree);
}
closeDB();
dt.setDataTable(list);
return dt;
}
/**
* 参数设置
* @param coulmn
* @param type
* @throws SQLException
* @throws NumberFormatException
*/
private boolean setPstmtParam(String[] coulmn, int[] type, String sql) throws NumberFormatException, SQLException
{
if(sql== null) return false;
pstmt = conn.prepareStatement(sql);
if(coulmn != null && type != null && coulmn.length !=0 && type.length !=0 )
{
for (int i = 0; i<type.length; i++) {
switch (type[i]) {
case Types.INTEGER:
pstmt.setInt(i+1, Integer.parseInt(coulmn[i]));
break;
case Types.BOOLEAN:
pstmt.setBoolean(i+1, Boolean.parseBoolean(coulmn[i]));
break;
case Types.CHAR:
pstmt.setString(i+1, coulmn[i]);
break;
case Types.DOUBLE:
pstmt.setDouble(i+1, Double.parseDouble(coulmn[i]));
break;
case Types.FLOAT:
pstmt.setFloat(i+1, Float.parseFloat(coulmn[i]));
break;
default:
break;
}
}
}
return true;
}
/**
* 关闭数据库
* @throws SQLException
*/
private void closeDB() throws SQLException
{
if(rs != null)
{
rs.close();
}
if(pstmt != null)
{
pstmt.close();
}
if(conn != null)
{
conn.close();
}
}
}
\ No newline at end of file
package com.zzsn.search.db;
import java.util.*;
/**
* 数据集封装
* @author Administrator
*/
public class DataTable {
public String[] column;//列字段
public String[][] row; //行值
public int rowCount = 0;//行数
public int colCoun = 0;//列数
public DataTable() {
super();
}
public DataTable(String[] column, String[][] row, int rowCount, int colCoun) {
super();
this.column = column;
this.row = row;
this.rowCount = rowCount;
this.colCoun = colCoun;
}
public void setDataTable(ArrayList<HashMap<String, String>> list) {
rowCount = list.size();
colCoun = list.get(0).size();
column = new String[colCoun];
row = new String[rowCount][colCoun];
for (int i = 0; i < rowCount; i++) {
Set<Map.Entry<String, String>> set = list.get(i).entrySet();
int j = 0;
for (Iterator<Map.Entry<String, String>> it = set.iterator(); it
.hasNext();) {
Map.Entry<String, String> entry = (Map.Entry<String, String>) it
.next();
row[i][j] = entry.getValue();
if (i == rowCount - 1) {
column[j] = entry.getKey();
}
j++;
}
}
}
public String[] getColumn() {
return column;
}
public void setColumn(String[] column) {
this.column = column;
}
public String[][] getRow() {
return row;
}
public void setRow(String[][] row) {
this.row = row;
}
public int getRowCount() {
return rowCount;
}
public void setRowCount(int rowCount) {
this.rowCount = rowCount;
}
public int getColCoun() {
return colCoun;
}
public void setColCoun(int colCoun) {
this.colCoun = colCoun;
}
}
\ No newline at end of file
package com.zzsn.search.db;
import lombok.extern.slf4j.Slf4j;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
/**
* @author xub
* @Description: 雪花算法
* @date 2019/8/14 下午8:22
*/
@Slf4j
public class SnowIdUtils {
/**
* 私有的 静态内部类
*/
private static class SnowFlake {
/**
* 内部类对象(单例模式)
*/
private static final SnowFlake SNOW_FLAKE = new SnowFlake();
/**
* 起始的时间戳
*/
private final long START_TIMESTAMP = 1557489395327L;
/**
* 序列号占用位数
*/
private final long SEQUENCE_BIT = 12;
/**
* 机器标识占用位数
*/
private final long MACHINE_BIT = 10;
/**
* 时间戳位移位数
*/
private final long TIMESTAMP_LEFT = SEQUENCE_BIT + MACHINE_BIT;
/**
* 最大序列号 (4095)
*/
private final long MAX_SEQUENCE = ~(-1L << SEQUENCE_BIT);
/**
* 最大机器编号 (1023)
*/
private final long MAX_MACHINE_ID = ~(-1L << MACHINE_BIT);
/**
* 生成id机器标识部分
*/
private long machineIdPart;
/**
* 序列号
*/
private long sequence = 0L;
/**
* 上一次时间戳
*/
private long lastStamp = -1L;
/**
* 构造函数初始化机器编码
*/
private SnowFlake() {
//模拟这里获得本机机器编码
long localIp = 4321;
//localIp & MAX_MACHINE_ID最大不会超过1023,在左位移12位
machineIdPart = (localIp & MAX_MACHINE_ID) << SEQUENCE_BIT;
}
/**
* 获取雪花ID
*/
public synchronized long nextId() {
long currentStamp = timeGen();
//避免机器时钟回拨
while (currentStamp < lastStamp) {
// //服务器时钟被调整了,ID生成器停止服务.
throw new RuntimeException(String.format("时钟已经回拨. Refusing to generate id for %d milliseconds", lastStamp - currentStamp));
}
if (currentStamp == lastStamp) {
// 每次+1
sequence = (sequence + 1) & MAX_SEQUENCE;
// 毫秒内序列溢出
if (sequence == 0) {
// 阻塞到下一个毫秒,获得新的时间戳
currentStamp = getNextMill();
}
} else {
//不同毫秒内,序列号置0
sequence = 0L;
}
lastStamp = currentStamp;
//时间戳部分+机器标识部分+序列号部分
return (currentStamp - START_TIMESTAMP) << TIMESTAMP_LEFT | machineIdPart | sequence;
}
/**
* 阻塞到下一个毫秒,直到获得新的时间戳
*/
private long getNextMill() {
long mill = timeGen();
//
while (mill <= lastStamp) {
mill = timeGen();
}
return mill;
}
/**
* 返回以毫秒为单位的当前时间
*/
protected long timeGen() {
return System.currentTimeMillis();
}
}
/**
* 获取long类型雪花ID
*/
public static long uniqueLong() {
return SnowFlake.SNOW_FLAKE.nextId();
}
/**
* 获取String类型雪花ID
*/
public static String uniqueLongHex() {
return String.format("%016x", uniqueLong());
}
/**
* 测试
*/
public static void main(String[] args) throws InterruptedException {
//计时开始时间
long start = System.currentTimeMillis();
//让100个线程同时进行
final CountDownLatch latch = new CountDownLatch(100);
//判断生成的20万条记录是否有重复记录
final Map<Long, Integer> map = new ConcurrentHashMap();
for (int i = 0; i < 100; i++) {
//创建100个线程
new Thread(() -> {
for (int s = 0; s < 2000; s++) {
long snowID = SnowIdUtils.uniqueLong();
log.info("生成雪花ID={}",snowID);
Integer put = map.put(snowID, 1);
if (put != null) {
throw new RuntimeException("主键重复");
}
}
latch.countDown();
}).start();
}
//让上面100个线程执行结束后,在走下面输出信息
latch.await();
log.info("生成20万条雪花ID总用时={}", System.currentTimeMillis() - start);
}
}
package com.zzsn.search.db;
import java.sql.SQLException;
import java.sql.Types;
/**
* 测试Demo
* @author Administrator
*/
public class TestBusIness{
static String searchSql = "SELECT sid,title,summary,keywords,content,sourceaddress,publish_date,origin from cis_ans_processitem where sid='2022021506' limit 10 ";
static String insertSql = "insert into score(name, age, score)values(?,?,?)";
static String deleteSql = "delete from score where id = ?";
static String updateSql = "update score set name = ? where id = ?";
static String searchSql2="select count(*) form from cis_ans_processitem where sid='202202150000' and ";
public static void main(String[] args) {
// intsertData();
searchData();
}
private static void intsertData()
{
DBManager dm = new DBManager();
String[] coulmn = new String[]{"wyf2", "23", "89.5"};
int[] type = new int[]{Types.CHAR, Types.INTEGER, Types.DOUBLE};
try {
boolean flag = dm.updateOrAdd(coulmn, type, insertSql);
if(flag)
System.out.println("插入成功");
} catch (SQLException e) {
e.printStackTrace();
}
}
private static void searchData()
{
DBManager dm = new DBManager();
String[] coulmn = null;
int[] type = null;
try {
DataTable dt = dm.getResultData(coulmn, type, searchSql);
if(dt != null && dt.getRowCount()> 0){
for(int i = 0; i<dt.getRowCount(); i++)
{
for(int j = 0; j<dt.getColCoun(); j++)
System.out.printf(dt.getRow()[i][j]+"\t");
System.out.println();
}
}
else
System.out.println("查询失败");
} catch (SQLException e) {
e.printStackTrace();
}
}
}
\ No newline at end of file
package com.zzsn.search.entity;
import lombok.Data;
import java.util.List;
@Data
public class ClbAnsProcessitem {
/**主键*/
private String id;
/**信息源id*/
private String sid;
/**团队id*/
private String tid;
/**标题*/
private String title;
/**摘要*/
private String summary;
/**关键词*/
private String keyWords;
/**正文*/
private String content;
private String contentWithtag;
/**未知*/
private String hash;
/**作者*/
private String author;
/**来源*/
private String sourceSite;
/**地址*/
private String sourceAddress;
/**未知*/
private String currentProcess;
/**类别*/
private String type;
/**未知*/
private String withTagFile;
/**发布时间*/
private String publishDate;
/**创建人*/
private String createBy;
/**创建时间*/
private String createDate;
/**编码*/
private String charset;
/**未知*/
private Integer processResult;
/**最新更新时间*/
private String lastModified;
/**组织id*/
private String orgId;
/**词*/
private String words;
/**来源*/
private String origin;
/**未知*/
private String orientation;
/**来源*/
private String fromWhere;
/**来源id*/
private String fromId;
/**来源类别*/
private String sourceType;
/**未知*/
private String featureWords;
/**下载地址*/
private String fileDownloadPath;
private String contentImgCvtTag;
/**关联地址*/
private String relatePlaces;
/**关联人*/
private String relatePerson;
/**关联组织*/
private String relateOrg;
/**事件*/
private String relateEvent;
/**时间*/
private String relateDate;
/**未知*/
private Integer relevance1;
/**未知*/
private String relevance;
/**语言*/
private String lang;
/**组织*/
private String orgs;
/**(临时处理)关联的专题id*/
private List<String> subjectIds;
}
\ No newline at end of file
package com.zzsn.search.entity;
import lombok.Data;
@Data
public class KeywordMsg {
/**
* id
*/
public String id;
/**
* 关键词组
*/
public String keyWord;
/**
* 消息时间
*/
public String startTime;
/**
* 状态
*/
public String status;
/**
* 专题编码
*/
public String wordsCode;
/**
* 专题名称
*/
public String wordsName;
}
package com.zzsn.search.extractor;
import com.zzsn.utility.model.FileTag;
import com.zzsn.utility.util.DateUtil;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 获取正文中的图片或者文件
* 创建人:李东亮
* 创建时间:2016-8-30 下午5:25:04
* 公司 :郑州数能软件科技有限公司
* @version 1.0
*
*/
public class ContentFileFinder {
/**
* 获取父路径
* 创建人: 李东亮
* 创建时间: 2015-7-6 下午3:17:44
* @version 1.0
* @param path
* @return
* @throws IOException
*/
public static String getDirPath(String path) {
path = path.substring(0, path.lastIndexOf("/")) ;
return path;
}
/**
* 去除路径中的./
* 创建人: 李东亮
* 创建时间: 2015-7-6 下午3:43:00
* @version 1.0
* @param path
* @return
* @throws IOException
*/
public static String formatPath(String currentPageURL,String imgPath) {
String start="";
if(currentPageURL.indexOf("http://")!=-1){
start = "http://";
}else if(currentPageURL.indexOf("https://")!=-1){
start = "https://";
}
//绝对路径
if(imgPath.startsWith("/")){
currentPageURL = currentPageURL.replace(start, "");
int subIndex = currentPageURL.indexOf("/");
if(subIndex==-1){
subIndex = currentPageURL.length();
}
String domain = currentPageURL.substring(0, subIndex);
return start+domain+imgPath;
}
//相对路径
String path = currentPageURL+"/"+imgPath;
path = path.replaceAll(start, "D:/");
File f = new File(path);
String filePath="";
try {
filePath = f.getCanonicalPath();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
String result = filePath.replaceAll("D:\\\\", start);
result = result.replaceAll("\\\\", "/");
return result;
}
/**
* 生成图片文件保存路径
* 创建人: 李东亮
* 创建时间: 2016-3-23 下午2:50:33
* @version 1.0
* @return
*/
private static String genImgFileName(String suffix){
String dir = DateUtil.format(new Date(), "yyyy-MM-dd");
String uuid = UUID.randomUUID().toString();
return dir+"/"+uuid+suffix;
}
/**
* 确保有src属性并且src属性指向正确的图片地址
* 创建人: 李东亮
* 创建时间: 2016-6-6 下午1:46:03
* @version 1.0
* @param rawTag
* @return
*/
public static Element ensureSrc(Element imgTag){
// Document doc = Jsoup.parseBodyFragment(rawTag);
String firstSrcAtt=null;
if(imgTag.hasAttr("original")){
firstSrcAtt = "original";
}else if(imgTag.hasAttr("data-src")){
firstSrcAtt = "data-src";
}else if(imgTag.hasAttr("_src")){
firstSrcAtt = "_src";
}else if(imgTag.hasAttr("src")){
firstSrcAtt ="src";
}
if(firstSrcAtt==null){
return null;
}
imgTag.attr("src", imgTag.attr(firstSrcAtt));
return imgTag;
}
/**
* 获取图片的绝对路径
* 创建人: 李东亮
* 创建时间: 2016-6-6 下午2:05:02
* @version 1.0
* @param element
* @param uri
* @return
*/
public static String getAbsolutePath(Element element,String uri,String linkAtt){
String absolutePath = element.attr(linkAtt);
if(absolutePath.startsWith("data:image")){
return null;
}
if(absolutePath.startsWith("file:")){
return null;
}
if (absolutePath.matches("(?i)^javascript.*|#")) {
return null;
}
if(absolutePath!=null && absolutePath.trim().length()>0 && !absolutePath.startsWith("http://")&&!absolutePath.startsWith("https://")&&uri!=null){
String puriDir = getDirPath(uri);
absolutePath = formatPath(puriDir,absolutePath);
}
return absolutePath;
}
/**
* 获取后缀名
* 创建人: 李东亮
* 创建时间: 2016-8-30 下午5:00:39
* @version 1.0
* @param uri
* @return
*/
public static String getSuffix(String uri){
uri = uri.replaceAll("http://|https://", "");
Pattern p = Pattern.compile("/.+(\\.\\w{1,4})$");
Matcher m = p.matcher(uri);
if(m.find()){
return m.group(1);
}
return "";
}
/**
* 获取正文中的文件标签,包含正文中的图片和附件
* 创建人: 李东亮
* 创建时间: 2016-9-8 下午3:01:09
* @version 1.0
* @param content
* @param sourceaddress
* @return
*/
public static Map<String,FileTag> getContentFileTag(String content,String sourceaddress){
Map<String,FileTag> imgMap = new HashMap<String,FileTag>();
if(content==null||content.length()==0){
return imgMap;
}
String rawTag;
String absolutePath;
FileTag fileTag;
String savePath;
Document doc = Jsoup.parse(content);
Elements imgTags = doc.select("img");
Element imgTag;
String suffix = "";
String filePathAttr;
String preFixPath;
for (Iterator<Element> iterator = imgTags.iterator(); iterator.hasNext();) {
fileTag = new FileTag();
imgTag = iterator.next();
rawTag = imgTag.outerHtml().replaceAll("\"", "'").replaceAll("'>", "' \\/>");
if(imgTag.tagName().toLowerCase().equals("img")){
filePathAttr = "src";
//使src指向正确的图片显示路径
imgTag = ensureSrc(imgTag);
preFixPath="IMG_SERVER/";
}else
{
filePathAttr="href";
fileTag.setFileName(imgTag.text());
preFixPath="FILE_SERVER/";
}
//获取图片的绝对路径,并且使src指向图片的绝对路径
absolutePath = getAbsolutePath(imgTag,sourceaddress,filePathAttr);
if(absolutePath==null){
continue;
}
imgTag.attr(filePathAttr,absolutePath);
fileTag.setAbsolutePath(absolutePath);
fileTag.setAbsoluteTag(imgTag.outerHtml());
//图片保存路径
suffix = ContentFileFinder.getSuffix(absolutePath);
savePath = genImgFileName(suffix);
fileTag.setSavePath(savePath);
//图片保存标签
imgTag.attr(filePathAttr,preFixPath+fileTag.getSavePath());
fileTag.setSaveTag(imgTag.outerHtml());
//key为图片完整路径
imgMap.put(rawTag, fileTag);
}
return imgMap;
}
/**
* 得到网页中图片的地址
* @param sets html字符串
*/
public static Set<String> getImgStr(String htmlStr) {
Set<String> pics = new HashSet<String>();
String img = "";
Pattern p_image;
Matcher m_image;
String regEx_img = "<img.*src\\s*=\\s*(.*?)[^>]*?>";
p_image = Pattern.compile(regEx_img, Pattern.CASE_INSENSITIVE);
m_image = p_image.matcher(htmlStr);
while (m_image.find()) {
// 得到<img />数据
img = m_image.group();
System.out.println(img);
// 匹配<img>中的src数据
Matcher m = Pattern.compile("src\\s*=\\s*\"?(.*?)(\"|>|\\s+)").matcher(img);
while (m.find()) {
pics.add(m.group(1));
}
}
return pics;
}
public static String rmHtmlImgOrAtag(String htmlStr) {
//匹配img标签的正则表达式
String regxpForImgTag = "<img\\s[^>]+/>|<img.*>(.*?)</img>|<a.*>(.*?)</a>|<a.*/>";
Pattern pattern = Pattern.compile(regxpForImgTag);
Matcher matcher = pattern.matcher(htmlStr);
while (matcher.find()) {
String temp = matcher.group();
htmlStr = htmlStr.replace(temp, " ");
}
return htmlStr;
}
public static void main(String[] args) {
String str = "<span></span><img data-src=\"http://static.tianyaui.com/img/static/2011/imgloading.gif\" title=\"点击图片查看幻灯模式\" original2=\"http://img3.laibafile.cn/p/l/246500759.jpg\" /> "
+ "<div>hello<div/><img data-src=\"http://static.tianyaui.com/img/static/2011/imgloading.gif\" title=\\\"点击图片查看幻灯模式\\\" original2=\\\"http://img3.laibafile.cn/p/l/246500759.jpg\\\" >qew</img>";
// System.out.println(ContentFileFinder.getSuffix("http://www.baidu.com//a.xls"));
// getContentFileTag(str,"");
str=rmHtmlImgOrAtag(str);
System.out.println(str);
String aString=str+"<div class=\"_3L5YSq\" title=\"vue兼容ie11的解决方法 2021年最新解决方案\">"
+ "<a class=\"_1-HJSV _1OhGeD\" href=\"/p/4718213d9373\" target=\"_blank\" rel=\"noopener noreferrer\">vue兼容ie11的解决方法 2021年最新解决方案</a>"
+ "<span>qwqe<a href='qwe'/></span</div>";
aString=rmHtmlImgOrAtag(aString);
System.out.println(aString);
}
}
package com.zzsn.search.extractor;
/**
* 默认返回对象设置
* 创建人:李东亮
* 创建时间:2015-5-13 上午10:23:02
* 公司 :郑州数能软件科技有限公司
* @version 1.0
*
*/
public class DefaultMsg {
private int success = 1;
private String msg;
public DefaultMsg()
{
}
public DefaultMsg(int success)
{
this.success = success;
}
public int getSuccess() {
return success;
}
public void setSuccess(int success) {
this.success = success;
}
public String getMsg() {
return msg;
}
public void setMsg(String msg) {
this.msg = msg;
}
}
package com.zzsn.search.extractor;
import net.rubyeye.xmemcached.MemcachedClient;
import net.rubyeye.xmemcached.exception.MemcachedException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import java.io.UnsupportedEncodingException;
import java.util.concurrent.TimeoutException;
public class MemcachedFactory {
private static final Logger Log = LoggerFactory.getLogger(MemcachedFactory.class);
private static ApplicationContext factory;
protected static MemcachedClient client;
/**
* 加载spring容器
* 创建人: 李东亮 MemcachedFactory
* 创建时间: 2015-5-30 上午11:39:31
* @version 1.0
*/
public static void init(){
factory = new ClassPathXmlApplicationContext("conf/spring-memcached.xml");
client = (MemcachedClient )factory.getBean("memcachedClient");
}
/**
* 获取Key值
* @param key
* @return
*/
public static Object getKey(String key) {
try {
Object obj = client.get(key);
if("null".equals(obj)){
return null;
}
return obj;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
/* if (!client.isShutdown()) {
try {
client.shutdown();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}*/
}
return null;
}
/**
* 设置缓存 10天
* @param key
* @return
*/
public static boolean setExpireskey(String key, Object value) {
boolean result = false;
try {
if(value==null){
value="null";
}
if (client.get(key) != null ) {
result = client.replace(key, 3600*24*10, value);
} else {
result = client.set(key, 3600*24*10, value);
}
} catch (TimeoutException | InterruptedException | MemcachedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
/* if (!client.isShutdown()) {
try {
client.shutdown();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}*/
}
return result;
}
/**
* 设置缓存 永不过期,(一个月后会自动过期)
* @param key
* @return
*/
public static boolean setKey(String key, Object value) {
boolean result = false;
try {
if(value==null){
value="null";
}
if (client.get(key) != null ) {
result = client.replace(key, 0, value);
} else {
result = client.set(key, 0, value);
}
} catch (TimeoutException | InterruptedException | MemcachedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
/* if (!client.isShutdown()) {
try {
client.shutdown();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}*/
}
return result;
}
public static void setLset(String key,String value){
// client
}
public static void main(String[] args) throws UnsupportedEncodingException {
MemcachedFactory.init();
// DBServiceFactory.init();
Object object=MemcachedFactory.getKey("domainUri_www.ceconline.com");
System.out.println(object);
// List<Site> sites = DBServiceFactory.getCrawlerService().getSnSites();
/* //memcached清空缓存
Map<String,String> map = new HashMap<String,String>();
for(int i=0;i<1000000;i++){
map.put(i+"","adadadadadadadadadadadadadadader");
}
MemcachedFactory.setKey("aa", map);
Map<String,String> m = (Map<String, String>) MemcachedFactory.getKey("aa");
System.out.println(m.size());*/
//MemcachedFactory.flushAll();
}
}
package com.zzsn.search.google;
import com.zzsn.search.util.PublishDateUtil;
import com.zzsn.utility.model.CatchWebByMetaSearch;
import com.zzsn.utility.util.ChromeUtil;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class GoogleRecorderUtil {
public static String getRealUrlFromBaiduUrl(String url) {
Connection.Response res = null;
int itimeout = 60000;
try {
res = Jsoup.connect(url).timeout(itimeout).method(Connection.Method.GET).followRedirects(false).execute();
return res.header("Location");
} catch (IOException e) {
e.printStackTrace();
}
return "";
}
@SuppressWarnings("deprecation")
public static List<CatchWebByMetaSearch> CatchWebOfGoogle(
List<String> urlList, String charset, Long orgId, Long tid) {
try {
List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
for (int i = 0; i < urlList.size(); i++) {
Thread.sleep(1000*5);
CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
Document doc = null;
String docstr= ChromeUtil.getChromeDoc(urlList.get(i),((i)*20+180)+"");
if(docstr==null){
continue;
}
doc=Jsoup.parse(docstr);
Elements firstElementsLink = doc.select("div[class=g]");
//若果没有结果则不循环
if(firstElementsLink.size()==0){
break;
}
String info = doc.toString();
for (int j = 0; j < firstElementsLink.size(); j++) {
catchWebByMetaSearch= new CatchWebByMetaSearch();
Elements e=firstElementsLink.get(j).select("h3");
Elements a=firstElementsLink.get(j).select("a");
Elements timespan=firstElementsLink.get(j).select("span[class=f]");
System.out.println(e.get(0).text());
System.out.println(a.get(0).attr("href"));
catchWebByMetaSearch.setTid(tid);
catchWebByMetaSearch.setSummary(urlList.get(i));
catchWebByMetaSearch.setOrgId(orgId);
catchWebByMetaSearch.setSourceaddress(a.get(0).attr("href"));
catchWebByMetaSearch.setTitle(e.get(0).text());
if(timespan.size()>0){
System.out.println(timespan.get(0).text());
if(timespan.get(0).text().contains("hours")) {
catchWebByMetaSearch.setPublishDate(DatePaserUtil.getCreateDate());
}else if(timespan.get(0).text().contains("day")) {
int day=DatePaserUtil.getIntstr(timespan.get(0).text());
catchWebByMetaSearch.setPublishDate(DatePaserUtil.getDateBeforesomdat(day));
}
else if(timespan.get(0).text().contains("ago")) {
catchWebByMetaSearch.setPublishDate(DatePaserUtil.getCreateDate());
}
else{
String date= PublishDateUtil.getPublishDate(timespan.get(0).text());
System.out.println(date);
catchWebByMetaSearch.setPublishDate(date);
}
System.out.println(catchWebByMetaSearch.getPublishDate());
}
metaSearchList.add(catchWebByMetaSearch);
}
catchWebByMetaSearchList.addAll(metaSearchList);
}
return catchWebByMetaSearchList;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
@SuppressWarnings("deprecation")
public static List<CatchWebByMetaSearch> CatchWebOfGoogle1(
List<String> urlList, String charset, Long orgId, Long tid) {
try {
List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
for (int i = 0; i < urlList.size(); i++) {
Thread.sleep(1000*5);
CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
Document doc = null;
System.out.println(urlList.get(i));
String docstr=ChromeUtil.getChromeDocnews(urlList.get(i),((i)*20+0)+"");
if(docstr==null){
continue;
}
doc=Jsoup.parse(docstr);
Elements firstElementsLink = doc.select("g-card[class=ftSUBd]");
//若果没有结果则不循环
if(firstElementsLink.size()==0){
break;
}
String info = doc.toString();
for (int j = 0; j < firstElementsLink.size(); j++) {
catchWebByMetaSearch= new CatchWebByMetaSearch();
// System.out.println(firstElementsLink.get(j).toString());
//标题
// Elements e=firstElementsLink.get(j).select("div[class=mCBkyc tNxQIb y355M JIFdL JQe2Ld nDgy9d]");
Elements e=firstElementsLink.get(j).select("div[class=\"mCBkyc y355M JQe2Ld nDgy9d\"]");
//链接
Elements a=firstElementsLink.get(j).select("a");
//Elements timespan=firstElementsLink.get(j).select("span[class=WG9SHc]");
System.out.println(e.get(0).text());
System.out.println(a.get(0).attr("href"));
catchWebByMetaSearch.setTid(tid);
catchWebByMetaSearch.setSummary(urlList.get(i));
//catchWebByMetaSearch.setOrgId(orgId);
catchWebByMetaSearch.setSourceaddress(a.get(0).attr("href"));
catchWebByMetaSearch.setTitle(e.get(0).text());
//来源
String origin=firstElementsLink.get(j).select("div[class=\"CEMjEf NUnG9d\"]").text();
catchWebByMetaSearch.setSourcesite(origin);
metaSearchList.add(catchWebByMetaSearch);
}
catchWebByMetaSearchList.addAll(metaSearchList);
}
return catchWebByMetaSearchList;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}
package com.zzsn.search.google;
import com.zzsn.search.FileUtil;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Vector;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
/**
* 百度搜索
* 1.根据关键词请求进入页面,
* 2.将抓取到的内容信息保存到本地数据库表
*/
public class GoogleSearch {
public static void main(String[] args) throws IOException {
// String filepath=args[0];
String filepath="E:\\crawlerWorkspace\\metaSearch\\data\\googlekeyword.txt";
File f = new File(filepath);
List<String> allLines = FileUtil.getFileLines(f, "utf-8");
GoogleSearchThread baiduSearchThread = new GoogleSearchThread();
paser(allLines);
}
public static void paser(List<String> keywords){
List<List<String>> splitList = splitList(keywords,5000);
ExecutorService threadPool = Executors.newFixedThreadPool(1);
Vector<GoogleSearchThread> workers = new Vector<GoogleSearchThread>();
int index = 0;
try {
for (List<String> keywordList : splitList) {
// BaiduSearchThread worker = new BaiduSearchThread();
GoogleSearchThread baiduSearchThread = new GoogleSearchThread();
baiduSearchThread.setThreadId(index++);
baiduSearchThread.setKeywords(keywordList);
workers.add(baiduSearchThread);
threadPool.execute(baiduSearchThread);
Thread.sleep(1000);
}
}catch (Exception e){
System.out.println(e.getMessage());
}
threadPool.shutdown();
while (true) {
boolean isfinished = threadPool.isTerminated();
if (isfinished)
break;
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
System.out.println(e.getMessage());
}
}
}
// 对list进行分割
public static <T> List<List<T>> splitList(List<T> list, int len) {
if (list == null || list.size() == 0 || len < 1) {
return null;
}
List<List<T>> result = new ArrayList<List<T>>();
int size = list.size();
int count = (size + len - 1) / len;
for (int i = 0; i < count; i++) {
List<T> subList = list.subList(i * len, ((i + 1) * len > size ? size : len * (i + 1)));
result.add(subList);
}
return result;
}
}
package com.zzsn.search.util;
import org.springframework.util.CollectionUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class SplitKeyword {
//关键词处理
public static List<String> transForm(String keyWords) {
if (keyWords == null) {
return null;
}
//(*备注:或:用“,”号表示,与:用“+”号表示,非:用“-”号表示)
keyWords = keyWords.replaceAll("[,,;;]", "|");
keyWords = keyWords.replace("(", "(");
keyWords = keyWords.replace(")", ")");
List<String> keyWordsList = new ArrayList<>();
if (!keyWords.contains("+")) { //不含有+ 形式:A|B|C
keyWords = keyWords.replace("(", "");
keyWords = keyWords.replace(")", "");
keyWordsList = Arrays.asList(keyWords.split("\\|"));
} else if (keyWords.contains(")+(")) { //含有+ 形式:(A|B)+(C|D)+(E|F)
String[] cronArray = keyWords.split("\\+");
List<List<String>> list = new ArrayList<>();
for (int i = 0; i < cronArray.length; i++) {
cronArray[i] = cronArray[i].replace("(", "");
cronArray[i] = cronArray[i].replace(")", "");
cronArray[i] = cronArray[i].replace("|", ",");
list.add(Arrays.asList(cronArray[i].split(",")));
}
List<String> stringList = getPermutations(list);
for (String str : stringList) {
keyWordsList.add(str.substring(0, str.length() - 1));
}
}
return keyWordsList;
}
/**
* 多个数组排列组合
* @param list 原始list
* @param <T> 数据类型
* @return
*/
private static <T> List<List<T>> getDescartes(List<List<T>> list) {
List<List<T>> returnList = new ArrayList<>();
descartesRecursive(list, 0, returnList, new ArrayList<T>());
return returnList;
}
/**
* 递归实现
* 原理:从原始list的0开始依次遍历到最后
*
* @param originalList 原始list
* @param position 当前递归在原始list的position
* @param returnList 返回结果
* @param cacheList 临时保存的list
*/
private static <T> void descartesRecursive(List<List<T>> originalList, int position, List<List<T>> returnList, List<T> cacheList) {
List<T> originalItemList = originalList.get(position);
for (int i = 0; i < originalItemList.size(); i++) {
//最后一个复用cacheList,节省内存
List<T> childCacheList = (i == originalItemList.size() - 1) ? cacheList : new ArrayList<>(cacheList);
childCacheList.add(originalItemList.get(i));
if (position == originalList.size() - 1) {//遍历到最后退出递归
returnList.add(childCacheList);
continue;
}
descartesRecursive(originalList, position + 1, returnList, childCacheList);
}
}
/**
* 多个数组排列组合
* @param list 原始list
* @return
*/
private static List<String> getPermutations(List<List<String>> list) {
List<String> resultList = new ArrayList<>();
List<List<String>> list1 = getDescartes(list);
if(!CollectionUtils.isEmpty(list1)){
list1.forEach(temp->{
String str = listToStr(temp);
if (!org.springframework.util.StringUtils.isEmpty(str)) {
resultList.add(listToStr(temp));
}
});
}
return resultList;
}
private static String listToStr(List<String> list){
StringBuffer str = new StringBuffer();
if(!CollectionUtils.isEmpty(list)){
list.forEach(temp->{
str.append(temp).append("+");
});
}
return str.toString();
}
public static void main(String[] args) {
String kwords="Russia+(sanction|Cancel|revoke|restrict|stop|Punishment|punishment)+(Center for a New America Security|CNAS|WestExec Advisor|Carnegie Endowment for International Peace|CEIP|Brookings Institution|National Endowment for Democracy|NED|Council on Foreign Relations|CFR|Foreign Policy for American|fp4America|Center for American Progress|CAP|Meridian International|Asia Society|The Asia Group LLG|Penn Biden Center for Diplomacy and Global Engagement|Rand|Foreign Affairs|Center for Strategic and International Studies|Hoover Institution on War|revolution and Peace|American Enterprise Institute for Public Policy Research|Hudson Institute|Urban Institute|International Institute for Applied Systems Analysis|Heritage Foundation|CATO Institute|Carter Center|The Boston Consulting Group|Aspen Human Institute|East-West Center|Center for Defense Information|Battelle|The Population Council|Institute for the Future|U.S. Asian Cultural Academy|Atlantic Council|Centre for European Policy Studies|Royal Institute of Intemational Affairs|The Fabian Society|The Foreign Policy Centre|Institut français des relations internationales|Irish Examiner|timesofmalta|texasnewstoday|californianewstimes|foxbusiness|IAI|DGAP)";
List<String> strings = transForm(kwords);
for (String key :strings) {
System.out.println(key);
}
}
}
package com.zzsn.search.util;
import org.springframework.beans.BeansException;
import org.springframework.beans.factory.NoSuchBeanDefinitionException;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.stereotype.Component;
import java.util.Map;
/**
* 获取Spring的ApplicationContext对象工具,可以用静态方法的方式获取spring容器中的bean
* @author https://blog.csdn.net/chen_2890
* @date 2019/6/26 16:20
*/
@Component
public class SpringContextUtil implements ApplicationContextAware {
private static ApplicationContext applicationContext;
@Override
public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
SpringContextUtil.applicationContext = applicationContext;
}
/**
* 获取applicationContext
*/
public static ApplicationContext getApplicationContext() {
return applicationContext;
}
/**
* 通过name获取 Bean.
*/
public static Object getBean(String name) {
Object o = null;
try {
o = getApplicationContext().getBean(name);
} catch (NoSuchBeanDefinitionException e) {
// e.printStackTrace();
}
return o;
}
/**
* 通过class获取Bean.
*/
public static <T> T getBean(Class<T> clazz) {
return getApplicationContext().getBean(clazz);
}
/**
* 通过name,以及Clazz返回指定的Bean
*/
public static <T> T getBean(String name, Class<T> clazz) {
return getApplicationContext().getBean(name, clazz);
}
/**
* 通过name获取 Bean.
*/
public static <T> Map<String, T> getBeansOfType(Class<T> clazz) {
return getApplicationContext().getBeansOfType(clazz);
}
/**
* 获取配置文件配置项的值
*
* @param key 配置项key
*/
public static String getEnvironmentProperty(String key) {
return getApplicationContext().getEnvironment().getProperty(key);
}
/**
* 获取spring.profiles.active
*/
public static String getActiveProfile() {
return getApplicationContext().getEnvironment().getActiveProfiles()[0];
}
}
package com.zzsn.utility.index;
import lombok.extern.slf4j.Slf4j;
import org.springframework.core.io.DefaultResourceLoader;
import org.springframework.core.io.Resource;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
/**
* 系统变量
* 创建人:李东亮
* 创建时间:2015-5-12 上午11:52:07
* 公司 :郑州数能软件科技有限公司
* @version 1.0
*
*/
@Slf4j
public class Constants {
private static Properties prop = getConfig();
// private static Properties prop = new Properties();
// static {
// try {
// prop.load(Constants.class.getResourceAsStream("constants.properties"));
// } catch (IOException e) {
// // TODO Auto-generated catch block
// log.error("错误", e);
// e.printStackTrace();
// }
// }
public static Properties getConfig() {
Properties properties = new Properties();
InputStream is = null;
String location = "constants.properties";
try {
Resource resource = new DefaultResourceLoader().getResource(location);
is = resource.getInputStream();
properties.load(is);
log.debug("constants config: {}", properties.toString());
} catch (IOException ex) {
log.error("Could not load property file:" + location, ex);
} finally {
try {
if (is != null) {
is.close();
}
} catch (IOException ioe) {
// ignore
}
}
return properties;
}
//爬取到的网页类型
public static final String TYPE_HTML = "HTML";
public static final String TYPE_EXCEL = "EXCEL";
public static final String TYPE_WORD = "WORD";
public static final String TYPE_PPT = "PPT";
public static final String TYPE_PDF = "PDF";
public static final String TYPE_IMG = "IMG";
public static final String DEFAULT_LANG="cn";
public static final String WORK_URL = prop.getProperty("WORK_URL");
public static final String LOCAL_URL = prop.getProperty("LOCAL_URL");
public static final String ORGS = prop.getProperty("ORGS");
public static final String TID = prop.getProperty("TID");
public static final Integer PROXYFLAG = Integer.parseInt(prop.getProperty("PROXY"));
public static final long PROXYID = Long.parseLong(prop.getProperty("PROXYID"));
public static final String CHROMEDRIVE = prop.getProperty("CHROMEDRIVE");
public static final String CHROMEBIN = prop.getProperty("CHROMEBIN");
public static final String KAFKA_CONSUMER_SERVERS= prop.getProperty("KAFKA_CONSUMER_SERVERS");
public static final String KAFKA_CONSUMER_TOPIC= prop.getProperty("KAFKA_CONSUMER_TOPIC");
public static final String KAFKA_CONSUMER_GROUP_ID= prop.getProperty("KAFKA_CONSUMER_GROUP_ID");
public static final String KAFKA_CONSUMER_AUTO_OFFSET_RESET= prop.getProperty("KAFKA_CONSUMER_AUTO_OFFSET_RESET");
public static final String PROXY= prop.getProperty("PROXY");
public static final String KAFKA_PRODUCT_TOPIC= prop.getProperty("KAFKA_PRODUCT_TOPIC");
public static final String SOURCEADDRESS="SOURCEADDRESS_";
public static final String META_SEARCH_URL= prop.getProperty("META_SEARCH_URL");
// redis
public static final String REDIS_LOCALHOST= prop.getProperty("redis.host");
public static final String REDIS_PORT= prop.getProperty("redis.port");
public static final String REDIS_PASS= prop.getProperty("redis.pass");
public static final String REDIS_TIMEOUT= prop.getProperty("redis.timeout");
public static final String REDIS_MAXIDLE= prop.getProperty("redis.maxIdle");
public static final String REDIS_MAXTOTAL= prop.getProperty("redis.maxTotal");
public static final String REDIS_MAXWAITMILLIS= prop.getProperty("redis.maxWaitMillis");
public static final String REDIS_TESTONBORROW= prop.getProperty("redis.testOnBorrow");
}
package com.zzsn.utility.model;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.Vector;
/**
* 情报信息表
* 创建人:李东亮
* 创建时间:2015-6-16 下午4:58:51
* 公司 :郑州数能软件科技有限公司
* @version 1.0
*
*/
public class Basedata implements Serializable {
private static final Logger Log = LoggerFactory.getLogger(Basedata.class);
private Long id;
//processitem id
//system_type id
//site id
private Long sid;
private String title;
private String summary;
private String keywords;
private String author;
private String sourcesite;
private String sourceaddress;
private String type;
private String withtagfile;
private String publishDate;
private String createBy;
private String createDate;
private String content;
private String contentNoTag;
private Integer isdelete;
private Long orgId;
private String origin;
private String lang;
private Vector<Double> featVector = null; // 文档特征向量
private boolean isNormalized = false; //是否归一化
private String words;
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getSummary() {
return summary;
}
public void setSummary(String summary) {
this.summary = summary;
}
public String getKeywords() {
return keywords;
}
public void setKeywords(String keywords) {
this.keywords = keywords;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public String getSourcesite() {
return sourcesite;
}
public void setSourcesite(String sourcesite) {
this.sourcesite = sourcesite;
}
public String getSourceaddress() {
return sourceaddress;
}
public void setSourceaddress(String sourceaddress) {
this.sourceaddress = sourceaddress;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getWithtagfile() {
return withtagfile;
}
public void setWithtagfile(String withtagfile) {
this.withtagfile = withtagfile;
}
public String getPublishDate() {
return publishDate;
}
public void setPublishDate(String publishDate) {
this.publishDate = publishDate;
}
public String getCreateBy() {
return createBy;
}
public void setCreateBy(String createBy) {
this.createBy = createBy;
}
public String getCreateDate() {
return createDate;
}
public void setCreateDate(String createDate) {
this.createDate = createDate;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public Vector<Double> getFeatVector() {
return featVector;
}
public void setFeatVector(Vector<Double> featVector) {
this.featVector = featVector;
}
public boolean isNormalized() {
return isNormalized;
}
public void setNormalized(boolean isNormalized) {
this.isNormalized = isNormalized;
}
public double similarity(Basedata other) {
// TODO: maybe do some caching here too?
double dotProduct = 0;
if(other.isNormalized()==false)
{
other.normalize();
}
if(isNormalized == false)
{
this.normalize();
}
for (int i = 0; i < featVector.size(); ++i) {
dotProduct += featVector.get(i) * other.getFeatVector().get(i);
// magnitude += featVector.get(i) * featVector.get(i);
// magnitudeOther += other.getFeatVector().get(i)
// * other.getFeatVector().get(i);
}
return dotProduct; // / Math.sqrt(magnitude * magnitudeOther);
}
public boolean normalize()
{
try
{
if(featVector == null)
{
isNormalized = false;
return false;
}
double magnitude = 0;
for (int i = 0; i < featVector.size(); ++i) {
magnitude += featVector.get(i) * featVector.get(i);
}
magnitude = Math.sqrt(magnitude);
for (int i = 0; i < featVector.size(); ++i) {
featVector.set(i,featVector.get(i)/magnitude);
}
isNormalized = true;
return true;
}
catch(Exception e)
{
Log.error("异常", e);
isNormalized = false;
return false;
}
}
public boolean merge(Basedata other)
{
try
{
for (int i = 0; i < featVector.size(); ++i) {
featVector.set(i, (featVector.get(i) + other.getFeatVector().get(i)) / 2);
}
normalize();
return true;
}
catch(Exception e)
{
return false;
}
}
public Long getSid() {
return sid;
}
public void setSid(Long sid) {
this.sid = sid;
}
public Integer getIsdelete() {
return isdelete;
}
public void setIsdelete(Integer isdelete) {
this.isdelete = isdelete;
}
public Long getOrgId() {
return orgId;
}
public void setOrgId(Long orgId) {
this.orgId = orgId;
}
public String getWords() {
return words;
}
public void setWords(String words) {
this.words = words;
}
public String getOrigin() {
return origin;
}
public void setOrigin(String origin) {
this.origin = origin;
}
public String getContentNoTag() {
return contentNoTag;
}
public void setContentNoTag(String contentNoTag) {
this.contentNoTag = contentNoTag;
}
public String getLang() {
return lang;
}
public void setLang(String lang) {
this.lang = lang;
}
}
package com.zzsn.utility.model;
import java.io.Serializable;
import java.util.Map;
public class CatchWebByHeritrix implements Serializable{
//columns START
private String contentType;
private Long contentLength;
private String charset;
private String sourceaddress;
private String lastModified;
private String content;
private Long orgId;
private Long sid;
private ImgData imgData;
private String sourceType;
private Map<String, Object> data;
public String getContentType() {
return contentType;
}
public void setContentType(String contentType) {
this.contentType = contentType;
}
public Long getContentLength() {
return contentLength;
}
public void setContentLength(Long contentLength) {
this.contentLength = contentLength;
}
public String getCharset() {
return charset;
}
public void setCharset(String charset) {
this.charset = charset;
}
public String getSourceaddress() {
return sourceaddress;
}
public void setSourceaddress(String sourceaddress) {
this.sourceaddress = sourceaddress;
}
public String getLastModified() {
return lastModified;
}
public void setLastModified(String lastModified) {
this.lastModified = lastModified;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public Long getOrgId() {
return orgId;
}
public void setOrgId(Long orgId) {
this.orgId = orgId;
}
public Long getSid() {
return sid;
}
public void setSid(Long sid) {
this.sid = sid;
}
public ImgData getImgData() {
return imgData;
}
public void setImgData(ImgData imgData) {
this.imgData = imgData;
}
public Map<String, Object> getData() {
return data;
}
public void setData(Map<String, Object> data) {
this.data = data;
}
public String getSourceType() {
return sourceType;
}
public void setSourceType(String sourceType) {
this.sourceType = sourceType;
}
}
package com.zzsn.utility.model;
import java.io.Serializable;
public class CatchWebByMetaSearch implements Serializable{
//columns START
private String charset;
private String lastModify;
private Long sid;
private String title;
private String summary;
private String content;
private String author;
private String sourcesite;
private String sourceaddress;
private String type;
private String publishDate;
private Long orgId;
private Long tid;
//columns END
public String getCharset() {
return charset;
}
public void setCharset(String charset) {
this.charset = charset;
}
public String getLastModify() {
return lastModify;
}
public void setLastModify(String lastModify) {
this.lastModify = lastModify;
}
public Long getSid() {
return sid;
}
public void setSid(Long sid) {
this.sid = sid;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getSummary() {
return summary;
}
public void setSummary(String summary) {
this.summary = summary;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public String getSourcesite() {
return sourcesite;
}
public void setSourcesite(String sourcesite) {
this.sourcesite = sourcesite;
}
public String getSourceaddress() {
return sourceaddress;
}
public void setSourceaddress(String sourceaddress) {
this.sourceaddress = sourceaddress;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getPublishDate() {
return publishDate;
}
public void setPublishDate(String publishDate) {
this.publishDate = publishDate;
}
public Long getOrgId() {
return orgId;
}
public void setOrgId(Long orgId) {
this.orgId = orgId;
}
public Long getTid() {
return tid;
}
public void setTid(Long tid) {
this.tid = tid;
}
}
package com.zzsn.utility.model;
import java.util.Map;
/**
* 爬取图片对象
* 创建人:李东亮
* 创建时间:2016-8-18 下午7:56:22
* 公司 :郑州数能软件科技有限公司
* @version 1.0
*
*/
public class ContentFileResult {
String contentAbsoulute;
String contentImgCvtTag;
private Map<String,FileTag > fileMap;
public String getContentAbsoulute() {
return contentAbsoulute;
}
public void setContentAbsoulute(String contentAbsoulute) {
this.contentAbsoulute = contentAbsoulute;
}
public Map<String, FileTag> getFileMap() {
return fileMap;
}
public void setFileMap(Map<String, FileTag> fileMap) {
this.fileMap = fileMap;
}
public String getContentImgCvtTag() {
return contentImgCvtTag;
}
public void setContentImgCvtTag(String contentImgCvtTag) {
this.contentImgCvtTag = contentImgCvtTag;
}
}
package com.zzsn.utility.model;
/**
* img传输对象
* 创建人:李东亮
* 创建时间:2015-7-6 下午4:51:51
* 公司 :郑州数能软件科技有限公司
* @version 1.0
*
*/
public class FileTag {
//src路径如果为相对路径,则转换为绝对路径
private String absolutePath;
//src路径转换为绝对路径之后的标签
private String absoluteTag;
//用于JSP替换
private String saveTag;
//图片保存路径
private String savePath;
//下载文件名
private String fileName;
public String getAbsolutePath() {
return absolutePath;
}
public void setAbsolutePath(String absolutePath) {
this.absolutePath = absolutePath;
}
public String getAbsoluteTag() {
return absoluteTag;
}
public void setAbsoluteTag(String absoluteTag) {
this.absoluteTag = absoluteTag;
}
public String getSaveTag() {
return saveTag;
}
public void setSaveTag(String saveTag) {
this.saveTag = saveTag;
}
public String getSavePath() {
return savePath;
}
public void setSavePath(String savePath) {
this.savePath = savePath;
}
public String getFileName() {
return fileName;
}
public void setFileName(String fileName) {
this.fileName = fileName;
}
}
package com.zzsn.utility.model;
/**
* img传输对象
* 创建人:李东亮
* 创建时间:2015-7-6 下午4:51:51
* 公司 :郑州数能软件科技有限公司
* @version 1.0
*
*/
public class ImgData {
//用于ImgRegexDecideRule判定
private String formatTag;
//用于JSP替换
private String replaceTag;
//图片保存路径
private String localImgPath;
public String getFormatTag() {
return formatTag;
}
public void setFormatTag(String formatTag) {
this.formatTag = formatTag;
}
public String getReplaceTag() {
return replaceTag;
}
public void setReplaceTag(String replaceTag) {
this.replaceTag = replaceTag;
}
public String getLocalImgPath() {
return localImgPath;
}
public void setLocalImgPath(String localImgPath) {
this.localImgPath = localImgPath;
}
}
package com.zzsn.utility.model;
/**
* 情报课题Model
* @author 刘小鹏
* @date 2015-12-23 14:13
* @company 郑州数能软件科技有限公司
*/
public class Infotask {
//columns START
/**
* id db_column: ID
*/
private Long id;
/**
* orgId db_column: ORG_ID
*/
private Long orgId;
/**
* name db_column: NAME
*/
private String name;
/**
* survey db_column: SURVEY
*/
private String survey;
/**
* sponsor db_column: SPONSOR
*/
private String sponsor;
/**
* keywords db_column: KEYWORDS
*/
private String keywords;
/**
* excludeWords db_column: EXCLUDE_WORDS
*/
private String excludeWords;
/**
* startDate db_column: START_DATE
*/
private String startDate;
/**
* endDate db_column: END_DATE
*/
private String endDate;
/**
* createBy db_column: CREATE_BY
*/
private String createBy;
/**
* createDate db_column: CREATE_DATE
*/
private String createDate;
private String taskStartDate;
private String taskEndDate;
//columns END
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
public Long getOrgId() {
return orgId;
}
public void setOrgId(Long orgId) {
this.orgId = orgId;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getSurvey() {
return survey;
}
public void setSurvey(String survey) {
this.survey = survey;
}
public String getSponsor() {
return sponsor;
}
public void setSponsor(String sponsor) {
this.sponsor = sponsor;
}
public String getKeywords() {
return keywords;
}
public void setKeywords(String keywords) {
this.keywords = keywords;
}
public String getExcludeWords() {
return excludeWords;
}
public void setExcludeWords(String excludeWords) {
this.excludeWords = excludeWords;
}
public String getStartDate() {
return startDate;
}
public void setStartDate(String startDate) {
this.startDate = startDate;
}
public String getEndDate() {
return endDate;
}
public void setEndDate(String endDate) {
this.endDate = endDate;
}
public String getCreateBy() {
return createBy;
}
public void setCreateBy(String createBy) {
this.createBy = createBy;
}
public String getCreateDate() {
return createDate;
}
public void setCreateDate(String createDate) {
this.createDate = createDate;
}
public String getTaskStartDate() {
return taskStartDate;
}
public void setTaskStartDate(String taskStartDate) {
this.taskStartDate = taskStartDate;
}
public String getTaskEndDate() {
return taskEndDate;
}
public void setTaskEndDate(String taskEndDate) {
this.taskEndDate = taskEndDate;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((id == null) ? 0 : id.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
Infotask other = (Infotask) obj;
if (id == null) {
if (other.id != null)
return false;
} else if (!id.equals(other.id))
return false;
return true;
}
}
package com.zzsn.utility.model;
import java.util.List;
/**
* 元搜索关键词Model
* @author 刘小鹏
* @date 2015-11-02 14:13
* @company 郑州数能软件科技有限公司
*/
public class MetaSearch {
//columns START
/**
* id db_column: ID
*/
private Long id;
/**
* keyword db_column: KEYWORD
*/
private String keyword;
/**
* tid db_column: TID
*/
private Long tid;
/**
* orgId db_column: ORG_ID
*/
private Long orgId;
/**
* creatby db_column: CREATBY
*/
private String creatby;
/**
* createdate db_column: CREATEDATE
*/
private String createdate;
private List<Long> orgs;
private String tids;
//columns END
public String getTids() {
return tids;
}
public void setTids(String tids) {
this.tids = tids;
}
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
public String getKeyword() {
return keyword;
}
public void setKeyword(String keyword) {
this.keyword = keyword;
}
public Long getTid() {
return tid;
}
public void setTid(Long tid) {
this.tid = tid;
}
public Long getOrgId() {
return orgId;
}
public void setOrgId(Long orgId) {
this.orgId = orgId;
}
public String getCreatby() {
return creatby;
}
public void setCreatby(String creatby) {
this.creatby = creatby;
}
public String getCreatedate() {
return createdate;
}
public void setCreatedate(String createdate) {
this.createdate = createdate;
}
public List<Long> getOrgs() {
return orgs;
}
public void setOrgs(List<Long> orgs) {
this.orgs = orgs;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((id == null) ? 0 : id.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
MetaSearch other = (MetaSearch) obj;
if (id == null) {
if (other.id != null)
return false;
} else if (!id.equals(other.id))
return false;
return true;
}
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论