提交 b376f641 作者: 刘伟刚

Merge remote-tracking branch 'origin/master'

...@@ -650,7 +650,7 @@ class BaseCore: ...@@ -650,7 +650,7 @@ class BaseCore:
return selects return selects
#插入到att表 返回附件id #插入到att表 返回附件id
def tableUpdate(self,retData,com_name,year,pdf_name,num): def tableUpdate(self,retData,com_name,year,pdf_name,num,pub_time):
item_id = retData['item_id'] item_id = retData['item_id']
type_id = retData['type_id'] type_id = retData['type_id']
group_name = retData['group_name'] group_name = retData['group_name']
...@@ -670,12 +670,12 @@ class BaseCore: ...@@ -670,12 +670,12 @@ class BaseCore:
id = '' id = ''
return id return id
else: else:
Upsql = '''insert into clb_sys_attachment(year,name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time,page_size) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''' Upsql = '''insert into clb_sys_attachment(year,name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time,page_size,publish_time) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s.%s)'''
values = ( values = (
year, pdf_name, type_id, item_id, group_name, path, full_path, category, file_size, order_by, year, pdf_name, type_id, item_id, group_name, path, full_path, category, file_size, order_by,
status, create_by, status, create_by,
create_time, page_size) create_time, page_size,pub_time)
self.cursor_.execute(Upsql, values) # 插入 self.cursor_.execute(Upsql, values) # 插入
self.cnx_.commit() # 提交 self.cnx_.commit() # 提交
...@@ -759,7 +759,7 @@ class BaseCore: ...@@ -759,7 +759,7 @@ class BaseCore:
try: try:
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
retData['state'] = True retData['state'] = True
retData['path'] = result['body']['objectUrl'].split('.com')[1] retData['path'] = unquote(result['body']['objectUrl'].split('.com')[1])
retData['full_path'] = unquote(result['body']['objectUrl']) retData['full_path'] = unquote(result['body']['objectUrl'])
retData['file_size'] = self.convert_size(file_size) retData['file_size'] = self.convert_size(file_size)
retData['create_time'] = time_now retData['create_time'] = time_now
......
import json import json
...@@ -133,7 +133,7 @@ def SpiderByZJH(url, payload, dic_info, num, start_time): ...@@ -133,7 +133,7 @@ def SpiderByZJH(url, payload, dic_info, num, start_time):
return False return False
#插入数据库获取att_id #插入数据库获取att_id
num = num + 1 num = num + 1
att_id = baseCore.tableUpdate(retData, short_name, year, name_pdf, num) att_id = baseCore.tableUpdate(retData, short_name, year, name_pdf, num,pub_time)
if att_id: if att_id:
pass pass
else: else:
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
...@@ -164,7 +164,7 @@ def spider_annual_report(dict_info,num): ...@@ -164,7 +164,7 @@ def spider_annual_report(dict_info,num):
return False return False
num = num + 1 num = num + 1
try: try:
att_id = baseCore.tableUpdate(retData,com_name,year,name_pdf,num) att_id = baseCore.tableUpdate(retData,com_name,year,name_pdf,num,pub_time)
content = retData['content'] content = retData['content']
state = 1 state = 1
takeTime = baseCore.getTimeCost(start_time, time.time()) takeTime = baseCore.getTimeCost(start_time, time.time())
......
...@@ -40,7 +40,8 @@ def save_data(dic_news): ...@@ -40,7 +40,8 @@ def save_data(dic_news):
'网址':dic_news['sourceAddress'], '网址':dic_news['sourceAddress'],
'tid':dic_news['labels'][0]['relationId'], 'tid':dic_news['labels'][0]['relationId'],
'来源':dic_news['labels'][0]['relationName'], '来源':dic_news['labels'][0]['relationName'],
'创建时间':dic_news['createDate'] '创建时间':dic_news['createDate'],
'带标签内容': dic_news['contentWithTag'][:100]
} }
db_storage.insert_one(aaa_dic) db_storage.insert_one(aaa_dic)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论