提交 1da1551d 作者: 薛凌堃

工具包注释fdfs方法

上级 54dd8a54
...@@ -25,8 +25,8 @@ from DBUtils.PooledDB import PooledDB ...@@ -25,8 +25,8 @@ from DBUtils.PooledDB import PooledDB
from fdfs_client.client import get_tracker_conf, Fdfs_client from fdfs_client.client import get_tracker_conf, Fdfs_client
import uuid import uuid
tracker_conf = get_tracker_conf('D:\\kkwork\\zzsn_spider\\base\\client.conf') # tracker_conf = get_tracker_conf('D:\\kkwork\\zzsn_spider\\base\\client.conf')
client = Fdfs_client(tracker_conf) # client = Fdfs_client(tracker_conf)
from obs import ObsClient from obs import ObsClient
import fitz import fitz
...@@ -613,48 +613,48 @@ class BaseCore: ...@@ -613,48 +613,48 @@ class BaseCore:
self.r.expire(key, 3600) self.r.expire(key, 3600)
time.sleep(2) time.sleep(2)
# 上传至文件服务器,并解析pdf的内容和页数 # # 上传至文件服务器,并解析pdf的内容和页数
def upLoadToServe(self, pdf_url, type_id, social_code): # def upLoadToServe(self, pdf_url, type_id, social_code):
headers = {} # headers = {}
retData = {'state': False, 'type_id': type_id, 'item_id': social_code, 'group_name': '', 'path': '', # retData = {'state': False, 'type_id': type_id, 'item_id': social_code, 'group_name': '', 'path': '',
'full_path': '', # 'full_path': '',
'category': 'pdf', 'file_size': '', 'status': 1, 'create_by': 'XueLingKun', # 'category': 'pdf', 'file_size': '', 'status': 1, 'create_by': 'XueLingKun',
'create_time': '', 'page_size': '', 'content': ''} # 'create_time': '', 'page_size': '', 'content': ''}
headers['User-Agent'] = self.getRandomUserAgent() # headers['User-Agent'] = self.getRandomUserAgent()
for i in range(0, 3): # for i in range(0, 3):
try: # try:
resp_content = requests.get(pdf_url, headers=headers, verify=False, timeout=20).content # resp_content = requests.get(pdf_url, headers=headers, verify=False, timeout=20).content
break # break
except: # except:
time.sleep(3) # time.sleep(3)
continue # continue
page_size = 0 # page_size = 0
#
for i in range(0, 3): # for i in range(0, 3):
try: # try:
result = client.upload_by_buffer(resp_content, file_ext_name='pdf') # result = client.upload_by_buffer(resp_content, file_ext_name='pdf')
with fitz.open(stream=resp_content, filetype='pdf') as doc: # with fitz.open(stream=resp_content, filetype='pdf') as doc:
page_size = doc.page_count # page_size = doc.page_count
for page in doc.pages(): # for page in doc.pages():
retData['content'] += page.get_text() # retData['content'] += page.get_text()
break # break
except: # except:
time.sleep(3) # time.sleep(3)
continue # continue
if page_size < 1: # if page_size < 1:
# pdf解析失败 # # pdf解析失败
print(f'======pdf解析失败=====') # print(f'======pdf解析失败=====')
return retData # return retData
else: # else:
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) # time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
retData['state'] = True # retData['state'] = True
retData['path'] = bytes.decode(result['Remote file_id']).replace('group1', '') # retData['path'] = bytes.decode(result['Remote file_id']).replace('group1', '')
retData['full_path'] = bytes.decode(result['Remote file_id']) # retData['full_path'] = bytes.decode(result['Remote file_id'])
retData['file_size'] = result['Uploaded size'] # retData['file_size'] = result['Uploaded size']
retData['create_time'] = time_now # retData['create_time'] = time_now
retData['page_size'] = page_size # retData['page_size'] = page_size
#
return retData # return retData
def secrchATT(self, item_id, year, type_id): def secrchATT(self, item_id, year, type_id):
sel_sql = '''select id from clb_sys_attachment where item_id = %s and year = %s and type_id=%s ''' sel_sql = '''select id from clb_sys_attachment where item_id = %s and year = %s and type_id=%s '''
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论