提交 b8689932 作者: XveLingKun

微信公众号日志添加

上级 976e2fb4
...@@ -11,6 +11,7 @@ from bs4 import BeautifulSoup ...@@ -11,6 +11,7 @@ from bs4 import BeautifulSoup
from obs import ObsClient from obs import ObsClient
from kafka import KafkaProducer from kafka import KafkaProducer
from retry import retry
from base.BaseCore import BaseCore from base.BaseCore import BaseCore
baseCore = BaseCore() baseCore = BaseCore()
...@@ -64,6 +65,15 @@ def getjsonInfo(): ...@@ -64,6 +65,15 @@ def getjsonInfo():
cnx_.commit() cnx_.commit()
return dict_json return dict_json
@retry(tries=3,delay=2)
def getrequest(url_news):
ip = baseCore.get_proxy()
res_news = requests.get(url_news, proxies=ip, timeout=20)
if res_news.status_code != 200:
raise
def get_info(dict_json): def get_info(dict_json):
# list_all_info = [] # list_all_info = []
# num_caiji = 0 # num_caiji = 0
...@@ -80,23 +90,32 @@ def get_info(dict_json): ...@@ -80,23 +90,32 @@ def get_info(dict_json):
origin = dict_json['site_name'] origin = dict_json['site_name']
url_news = dict_json['link'] url_news = dict_json['link']
info_source_code = dict_json['info_source_code'] info_source_code = dict_json['info_source_code']
# url_ft = check_url(sid, url_news)
# if url_ft: # try:
# return list_all_info,num_caiji # ip = baseCore.get_proxy()
# res_news = requests.get(url_news, proxies=ip, timeout=20)
# except:
# #400请求失败
# updatewxLink(url_news, info_source_code, 400)
# return False
# 修改请求方法,retry 3次
try: try:
ip = baseCore.get_proxy() res_news = getrequest(url_news)
res_news = requests.get(url_news, proxies=ip,timeout=20)
except: except:
#400请求失败 try:
updatewxLink(url_news,info_source_code,400) res_news = requests.get(url_news, timeout=20)
return False except:
# 400请求失败
updatewxLink(url_news, info_source_code, 400)
return False
soup_news = BeautifulSoup(res_news.content, 'html.parser') soup_news = BeautifulSoup(res_news.content, 'html.parser')
try: try:
news_html = soup_news.find('div', {'id': 'js_content'}) news_html = soup_news.find('div', {'id': 'js_content'})
news_html['style'] = 'width: 814px ; margin: 0 auto;' news_html['style'] = 'width: 814px ; margin: 0 auto;'
#del news_html['style'] #del news_html['style']
news_html=rm_style_attr(news_html) news_html = rm_style_attr(news_html)
del news_html['id'] del news_html['id']
del news_html['class'] del news_html['class']
except: except:
......
...@@ -41,32 +41,6 @@ import pandas as pd ...@@ -41,32 +41,6 @@ import pandas as pd
# pass # pass
import redis import redis
from base.BaseCore import BaseCore
baseCore = BaseCore()
r = baseCore.r
key = 'counter'
expiration_time = 10 # 设置过期时间 60秒
# # 设置自增
# r.incr(key)
# # #自增并设置过期时间
# while True:
# # 设置自增
# r.incr(key)
# value = int(r.get(key).decode())
#
# if value > 10:
# print(value)
# # 设置过期时间
# r.expire(key, expiration_time)
# time.sleep(20)
# print('------------------')
# continue
# # print(value)
# time.sleep(5)
# print(value)
# print("==========")
def check_url(): def check_url():
r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=6) r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=6)
...@@ -76,7 +50,22 @@ def check_url(): ...@@ -76,7 +50,22 @@ def check_url():
print('True') print('True')
else: else:
print('False') print('False')
check_url() # check_url()
def test(dic_user_count):
dic_user_count["A"] += 1
# print(dic_user.items())
for key,value in dic_user_count.items():
print(key,value)
return dic_user_count
def test1():
dic_user_count = {"A":0}
for i in range(3):
dic_user_count = test(dic_user_count)
print(dic_user_count)
if __name__ == "__main__":
test1()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论