提交 ce6193f3 作者: 14238

231008

上级 58db05cc
# created by virtualenv automatically
*
import gc
from flask import Flask, render_template, request, current_app
import configparser
from controller.Main import Main # 导入全部蓝图变量
import datetime
from apscheduler.schedulers.blocking import BlockingScheduler
from datetime import datetime
from dao.Conn import ConnMySql
import sys
import io
# 清除登录状态
def clearLoginStateIn24H():
conn = ConnMySql()
conn.userClearLoginStateIn24H()
print("清除登录状态-" + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
app = Flask(__name__) # 初始化Flask对象
app.register_blueprint(Main) # 将所有蓝图对象注册到app这个flask对象内
# 上传文件最大16M字节
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
# App配置信息,键=段名+键名,如:db.port=3306
cfg = configparser.ConfigParser()
cfg.optionxform = str # 保持配置文件中键的大小写
cfg.read("static/conf/sys.ini", encoding='utf-8')
sections = cfg.sections()
for section in sections:
items = cfg.items(section)
for key, val in items:
app.config[section + '.' + key] = val
# 个别取值进行特殊处理
app.config['db.port'] = int(app.config['db.port'])
if app.config['sys.useProxy'] == "0":
app.config['sys.useProxy'] = False
else:
app.config['sys.useProxy'] = True
app.config['sys.proxyid'] = 0 #当前使用的代理id
app.config['sys.userid'] = 0 #当前使用的账号id
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
if __name__ == '__main__':
# webbrowser.open("0.0.0.0:5000")
app.run(host='0.0.0.0', port=5201, debug=True) # 启动入口
# 启动定时任务,定时清除异常登录状态,每半小时一次
# sched = BlockingScheduler()
# sched.add_job(clearLoginStateIn24H, 'interval', seconds=1800, id='task-clearLoginStateIn24H')
# sched.start()
import gc
from flask import Blueprint, request, current_app, make_response, send_file # 导入蓝图
import datetime
import re
import os
import logging
import sys
import io
import tempfile
import openpyxl
import string
import json
from selenium.webdriver.common.proxy import Proxy, ProxyType
from selenium import webdriver
from selenium.webdriver.common.by import By
from util import UtilDate
from service.Service02 import Service02
Main = Blueprint('Main', __name__) # 初始化一个蓝图,而不是Flask对象
# 接受请求,读取请求体中的JSON参数,根据参数进行抓取动作
# {"from":"1900-01-01","last":最近x天数, "orgs":["单位1全称","单位2全称","单位3全称",...]}
@Main.route('/Main/getData', methods=["POST"])
def getData():
print("POST /Main/getData")
paras = request.get_json(force=True)
dateFrom = paras['from']
lastDays = paras['last']
orgs = paras['orgs']
if dateFrom == "":
if lastDays == "":
lastDays = 0
else:
lastDays = -(int(lastDays) - 1)
dateFrom = UtilDate.dateAdd("", "d", lastDays)
service02 = Service02()
return service02.getData(dateFrom, orgs) #"https://wenshu.court.gov.cn/website/wenshu/181029CR4M5A62CH/index.html"
class ProxyDao():
def t(self):
pass
# 基本信息
from util import UtilDate
from util import UtilNumber
class BaseInfo:
info_title: str # 标题
key_word: str # 关键词
info_bianhao: str # 案号
info_address: str # 管辖法院
info_time: str # 发布日期 #yyyy-mm-dd
info_id: str # 案件ID
info_yuanyou: str # 裁判理由
info_content: str # 正文内容
# 判断本条信息日期是否在指定日期(含)之后
def isAfter(self, sDate: str) -> bool:
if sDate == "":
return False
else:
if self.info_time >= sDate:
return True
else:
return False
def toString(self):
return self.info_title + "\t" + self.key_word + "\t" + self.info_bianhao + "\t" + self.info_address + "\t" + self.info_time + "\t" + self.info_id
home = C:\Program Files\Python
implementation = CPython
version_info = 3.8.0.final.0
virtualenv = 20.13.0
include-system-site-packages = true
base-prefix = C:\Program Files\Python
base-exec-prefix = C:\Program Files\Python
base-executable = C:\Program Files\Python\python.exe
#系统配置
[sys]
#文字识别Url,用于识别裁判文书网的验证码
ocrUrl=http://114.116.49.86:8013/wzsb_app?withCrLf=false
#登录模式,0-无需登录,1-账号登录(需要口令、短信、验证码相应的选择器不能为空),2-cookie登录
loginMode=1
#是否使用代理,0-不用,1-使用,需登录的一般不适用代理
useProxy=1
#验证码识别,0-不识别,1-识别,暂采用固定的方法识别验证码,后续扩展为不同的识别模式
verifiCode=0
#登录Url ?open=login
loginUrl=https://wenshu.court.gov.cn/website/wenshu/181010CARHS5BS3C/index.html
#正常Url,登录后可能会自动跳转到正常Url
mainUrl=https://wenshu.court.gov.cn
#登录-用户
loginUser=#root > div > form > div > div:nth-child(1) > div > div > div > input
#登录-口令
loginPasswd=#root > div > form > div > div:nth-child(2) > div > div > div > input
#裁判文书网的图形验证码在单独的页面,输入正确后返回到登录界面
#登录-图形验证码输入框,不为空时则需要识别验证码
loginCaptchaInput=body > div > div.card-body > div > form > div.captcha > input
#登录-图形验证码图片
loginCaptchaImage=#Image1
#登录-图形验证码确认按钮
loginCaptchaButton=body > div > div.card-body > div > form > div.warnbtn > input
#登录-短信验证码,可能和图形验证码同时需要,暂未处理
loginSMSCode=
#主界面登录按钮
loginButton=#loginLi > a
#登录界面确认登录按钮
loginOk=#root > div > form > div > div.login-button-container > span
#数据库配置
[db]
host=114.115.159.144
port=3306
user=caiji
passwd=zzsn9988
db=caiji
charset=utf8
#css选择器配置
[css]
#搜索-文本框
searchInput=#_view_1540966814000 > div > div.search-wrapper.clearfix > div.search-middle > input
#搜索-按钮
searchButton=#_view_1540966814000 > div > div.search-wrapper.clearfix > div.search-rightBtn.search-click
#列表-日期倒排按钮
listDateSort=#_view_1545184311000 > div.LM_tool.clearfix > div:nth-child(2) > a
#列表-案件数量
listCount=#_view_1545184311000 > div.LM_con.clearfix > div.fr.con_right > span
#列表-案件名称
listTitle=#_view_1545184311000 > div:nth-child(?) > div.list_title.clearfix > h4 > a
#列表-编号
listBianhao=#_view_1545184311000 > div:nth-child(?) > div.list_subtitle > span.ah
#列表-法院
listAddress=#_view_1545184311000 > div:nth-child(?) > div.list_subtitle > span.slfyName
#列表-审结日期
listTime=#_view_1545184311000 > div:nth-child(?) > div.list_subtitle > span.cprq
#列表-案由
listYuanyou=#_view_1545184311000 > div:nth-child(?) > div.list_reason > p
#下一页按钮
listNextPage=#_view_1545184311000 > div.left_7_3 > a:last-child
#正文-链接,一般和title相同
contLink=#_view_1545184311000 > div:nth-child(?) > div.list_title.clearfix > h4 > a
#正文-正文
contContent=#_view_1541573883000 > div > div.PDF_box > div.PDF_pox
# 验证码识别,暂只处理裁判文书网的验证码
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.webdriver import WebDriver
import requests
from flask import current_app
from pathlib import Path
import tempfile
import uuid
import hashlib
import os
import json
# selecter: 验证码图片css选择器
def getCaptchaMode1(browser: WebDriver,selecter: str):
ret = ""
# 通过requests发送一个get请求到图片地址,返回的响应就是图片内容
out_path = "./Temp_file"
try:
Path(out_path).mkdir(parents=True, exist_ok=True)
# 将获取到的图片二进制流写入本地文件
path_name = os.path.join(out_path, str(uuid.uuid4())) + ".png"
print(path_name)
# 保存验证码图片
img = browser.find_element(By.CSS_SELECTOR, selecter)
img.screenshot(path_name)
# #url方式下载
# r = requests.get(imgUrl)
# with open(path_name, 'wb') as f:
# # 对于图片类型的通过r.content方式访问响应内容,将响应内容写入baidu.png中
# f.write(r.content)
ocrUrl = current_app.config['sys.ocrUrl']
# 调用文字识别服务
file = open(path_name, "rb")
response = requests.post(ocrUrl, files={"multiRequest": file})
file.close()
os.remove(path_name)
# 返回:{"code":200,"logs":null,"message":"success","resultData":"2rVK"}
oRet = json.loads(response.text)
ret = oRet["resultData"]
#os.remove(path_name)
print(ret)
except Exception as err:
print('getCaptchaMode1 error:', err)
return ret
from datetime import datetime,timedelta
from dateutil.relativedelta import relativedelta
#将yyyy月m月d日格式的日期转为yyyy-mm-dd格式的日期
def convertDate(sDate:str):
sDate = sDate.replace("年","-")
sDate = sDate.replace("月", "-")
sDate = sDate.replace("日", "")
date_obj = datetime.strptime(sDate, '%Y-%m-%d')
sDate = date_obj.strftime('%Y-%m-%d')
return sDate
#日期加减偏置,参数ymd为单位,y=年,m=月,d=日
def dateAdd(sDate:str,ymd:str="d",diff:int=1):
if sDate=="":
sDate = datetime.now()
sDate = sDate.strftime('%Y-%m-%d')
date_obj = datetime.strptime(sDate, '%Y-%m-%d')
if ymd=="y":
if diff > 0:
date_obj = date_obj+relativedelta(years=diff)
else:
diff=-diff
date_obj = date_obj - relativedelta(years=diff)
elif ymd=="m":
if diff>0:
date_obj = date_obj + relativedelta(months=diff)
else:
diff=-diff
date_obj = date_obj - relativedelta(months=diff)
elif ymd=="d":
date_obj = date_obj + timedelta(days=diff)
else:
pass
sDate = date_obj.strftime('%Y-%m-%d')
return sDate
\ No newline at end of file
#数值处理类
#将字符串的金额转换为数值型金额,字符串金额可能包含万元,人民币等
def convertMoney(sMoney:str):
sMoney = sMoney.replace("万", "")
sMoney = sMoney.replace("亿", "")
sMoney = sMoney.replace("人民币", "")
sMoney = sMoney.replace("元", "")
return float(sMoney)
from selenium.webdriver.chrome.webdriver import WebDriver
from seleniumwire import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
#IP代理池
class UtilProxy:
id:int
ip:str
port:str
name:str
password:str
#切换IP代理
def alterIP(self,browser:WebDriver):
pass
# 账号信息
from util import UtilDate
from util import UtilNumber
class LoginInfo:
id: int # 标题
user_group: str
user_name: str
user_passwd: str
# 代理IP信息
from util import UtilDate
from util import UtilNumber
class ProxyInfo:
id: int # 标题
ip: str
port: str
user_name: str
user_passwd: str
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论