提交 6a073342 作者: LiuLiYuan

政策法规 10/17

上级 bcb04605
...@@ -20,9 +20,6 @@ from DBUtils.PooledDB import PooledDB ...@@ -20,9 +20,6 @@ from DBUtils.PooledDB import PooledDB
# import sys # import sys
# sys.path.append('D://zzsn_spider//base//fdfs_client') # sys.path.append('D://zzsn_spider//base//fdfs_client')
from fdfs_client.client import get_tracker_conf, Fdfs_client
tracker_conf = get_tracker_conf('D:\\kkwork\\zzsn_spider\\comData\\policylaw\\client.conf')
client = Fdfs_client(tracker_conf)
from obs import ObsClient from obs import ObsClient
import fitz import fitz
...@@ -444,72 +441,8 @@ class BaseCore: ...@@ -444,72 +441,8 @@ class BaseCore:
# def doc_page(self,file_path): # def doc_page(self,file_path):
# doc = Document(file_path) # doc = Document(file_path)
# return len(doc.sections) # return len(doc.sections)
def pdf_content(self,resp_content):
# 解析pdf文件内容
content = ''
for i in range(0, 3):
try:
result = client.upload_by_buffer(resp_content, file_ext_name='pdf')
with fitz.open(stream=resp_content, filetype='pdf') as doc:
# page_size = doc.page_count
for page in doc.pages():
content += page.get_text()
break
except:
time.sleep(3)
continue
return content
# 替换为绝对路径之后,解析出来a.href
def uploadToserver(self,file_href,item_id):
category = os.path.splitext(file_href)[1]
# 上传至文件服务器
headers = {}
retData = {'state': False, 'type_id': 7, 'item_id': item_id, 'group_name': 'group1', 'path': '',
'full_path': '',
'category': category, 'file_size': '', 'status': 1, 'create_by': 'XueLingKun',
'create_time': '', 'page_size': '', 'content': ''}
headers['User-Agent'] = self.getRandomUserAgent()
resp_content = ''
for i in range(0, 3):
try:
resp_content = requests.get(file_href, headers=headers, verify=False, timeout=20).content
break
except:
time.sleep(3)
continue
if resp_content:
pass
else:
return retData
# page_size = 0
# if category == '.doc' or category == '.docx':
# # page_size = self.doc_page(file_href)
# return retData
# if category == '.pdf' or category == '.PDF':
# page_size = self.pdf_page(resp_content)
for i in range(0, 3):
try:
result = client.upload_by_buffer(resp_content,file_ext_name=category.replace('.',''))
self.getLogger().info('-------文件上传成功------')
break
except:
time.sleep(3)
continue
# if page_size>0:
# pass
# else:
# self.getLogger().info(f'======解析失败=====')
# return retData
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
retData['state'] = True
retData['path'] = bytes.decode(result['Remote file_id']).replace('group1', '')
retData['full_path'] = bytes.decode(result['Remote file_id'])
retData['file_size'] = result['Uploaded size']
retData['create_time'] = time_now
# retData['page_size'] = page_size
return retData
def secrchATT(self,item_id,file_name,type_id,order_by): def secrchATT(self,item_id,file_name,type_id,order_by):
sel_sql = '''select id from clb_sys_attachment where item_id = %s and name = %s and type_id=%s and order_by=%s ''' sel_sql = '''select id from clb_sys_attachment where item_id = %s and name = %s and type_id=%s and order_by=%s '''
...@@ -518,7 +451,7 @@ class BaseCore: ...@@ -518,7 +451,7 @@ class BaseCore:
return selects return selects
#插入到att表 返回附件id #插入到att表 返回附件id
def tableUpdate(self,retData,com_name,file_name,num): def tableUpdate(self,retData,com_name,file_name,num,pub_time):
item_id = retData['item_id'] item_id = retData['item_id']
type_id = retData['type_id'] type_id = retData['type_id']
group_name = retData['group_name'] group_name = retData['group_name']
...@@ -533,12 +466,12 @@ class BaseCore: ...@@ -533,12 +466,12 @@ class BaseCore:
order_by = num order_by = num
Upsql = '''insert into clb_sys_attachment(name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time,object_key,bucket_name) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''' Upsql = '''insert into clb_sys_attachment(name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time,object_key,bucket_name,publish_time) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
values = ( values = (
file_name, type_id, item_id, group_name, path, full_path, category, file_size, order_by, file_name, type_id, item_id, group_name, path, full_path, category, file_size, order_by,
status, create_by, status, create_by,
create_time,path,'zzsn') create_time,path,'zzsn',pub_time)
self.cursor_.execute(Upsql, values) # 插入 self.cursor_.execute(Upsql, values) # 插入
self.cnx_.commit() # 提交 self.cnx_.commit() # 提交
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论