政策法规 10/17

6a073342 · LiuLiYuan · bcb04605 · 6a073342 · 6a073342
--- a/comData/policylaw/BaseCore.py
+++ b/comData/policylaw/BaseCore.py
@@ -20,9 +20,6 @@ from DBUtils.PooledDB import PooledDB
 # import sys
 # sys.path.append('D://zzsn_spider//base//fdfs_client')

-from fdfs_client.client import get_tracker_conf, Fdfs_client
-tracker_conf = get_tracker_conf('D:\\kkwork\\zzsn_spider\\comData\\policylaw\\client.conf')
-client = Fdfs_client(tracker_conf)

 from obs import ObsClient
 import fitz
@@ -444,72 +441,8 @@ class BaseCore:
    # def doc_page(self,file_path):
    #     doc = Document(file_path)
    #     return len(doc.sections)
-    def pdf_content(self,resp_content):
-        # 解析pdf文件内容
-        content = ''
-        for i in range(0, 3):
-            try:
-                result = client.upload_by_buffer(resp_content, file_ext_name='pdf')
-                with fitz.open(stream=resp_content, filetype='pdf') as doc:
-                    # page_size = doc.page_count
-                    for page in doc.pages():
-                        content += page.get_text()
-                break
-            except:
-                time.sleep(3)
-                continue
-        return content

-    # 替换为绝对路径之后，解析出来a.href
-    def uploadToserver(self,file_href,item_id):
-        category = os.path.splitext(file_href)[1]
-        # 上传至文件服务器
-        headers = {}
-        retData = {'state': False, 'type_id': 7, 'item_id': item_id, 'group_name': 'group1', 'path': '',
-                   'full_path': '',
-                   'category': category, 'file_size': '', 'status': 1, 'create_by': 'XueLingKun',
-                   'create_time': '', 'page_size': '', 'content': ''}
-        headers['User-Agent'] = self.getRandomUserAgent()

-        resp_content = ''
-        for i in range(0, 3):
-            try:
-                resp_content = requests.get(file_href, headers=headers, verify=False, timeout=20).content
-                break
-            except:
-                time.sleep(3)
-                continue
-        if resp_content:
-            pass
-        else:
-            return retData
-        # page_size = 0
-        # if category == '.doc' or category == '.docx':
-        #     # page_size = self.doc_page(file_href)
-        #     return retData
-        # if category == '.pdf' or category == '.PDF':
-        #     page_size = self.pdf_page(resp_content)
-        for i in range(0, 3):
-            try:
-                result = client.upload_by_buffer(resp_content,file_ext_name=category.replace('.',''))
-                self.getLogger().info('-------文件上传成功------')
-                break
-            except:
-                time.sleep(3)
-                continue
-        # if page_size>0:
-        #     pass
-        # else:
-        #     self.getLogger().info(f'======解析失败=====')
-        #     return retData
-        time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-        retData['state'] = True
-        retData['path'] = bytes.decode(result['Remote file_id']).replace('group1', '')
-        retData['full_path'] = bytes.decode(result['Remote file_id'])
-        retData['file_size'] = result['Uploaded size']
-        retData['create_time'] = time_now
-        # retData['page_size'] = page_size
-        return retData

    def secrchATT(self,item_id,file_name,type_id,order_by):
        sel_sql = '''select id from clb_sys_attachment where item_id = %s and name = %s and type_id=%s and order_by=%s '''
@@ -518,7 +451,7 @@ class BaseCore:
        return selects

    #插入到att表 返回附件id
-    def tableUpdate(self,retData,com_name,file_name,num):
+    def tableUpdate(self,retData,com_name,file_name,num,pub_time):
            item_id = retData['item_id']
            type_id = retData['type_id']
            group_name = retData['group_name']
@@ -533,12 +466,12 @@ class BaseCore:
            order_by = num


-            Upsql = '''insert into clb_sys_attachment(name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time,object_key,bucket_name) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
+            Upsql = '''insert into clb_sys_attachment(name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time,object_key,bucket_name,publish_time) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''

            values = (
                file_name, type_id, item_id, group_name, path, full_path, category, file_size, order_by,
                status, create_by,
-                create_time,path,'zzsn')
+                create_time,path,'zzsn',pub_time)

            self.cursor_.execute(Upsql, values)  # 插入
            self.cnx_.commit()  # 提交

--- a/comData/policylaw/policy.py
+++ b/comData/policylaw/policy.py
@@ -3,6 +3,7 @@
 """数据全量跑一遍，不做判重逻辑"""
 import datetime
 import json
+import os
 import re
 import time

@@ -131,6 +132,7 @@ def redefid(idList):
 def remove_dup():
    pass

+
 # 国务院文件
 def get_content1():
    pathType = 'policy/gwywj/'
@@ -252,12 +254,15 @@ def get_content1():
                                        or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                        or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                                    file_name = file.text.strip()
+                                    category = os.path.splitext(file_href)[1]
+                                    if category not in file_name:
+                                        file_name = file_name + category
                                    retData = baseCore.uptoOBS(file_href,'1766',file_name)
                                    if retData['state']:
                                        pass
                                    else:
                                        continue
-                                    att_id,full_path = baseCore.tableUpdate(retData,'国务院文件',file_name,num)
+                                    att_id,full_path = baseCore.tableUpdate(retData,'国务院文件',file_name,num,pub_time1)
                                    id_list.append(att_id)

                                    #todo:将返回的地址更新到soup
@@ -408,12 +413,15 @@ def get_content2():
                                        or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                        or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                                    file_name = file.text.strip()
+                                    category = os.path.splitext(file_href)[1]
+                                    if category not in file_name:
+                                        file_name = file_name + category
                                    retData = baseCore.uptoOBS(file_href,'1699',file_name)
                                    if retData['state']:
                                        pass
                                    else:
                                        continue
-                                    att_id,full_path = baseCore.tableUpdate(retData,'国务院文件',file_name,num)
+                                    att_id,full_path = baseCore.tableUpdate(retData,'国务院文件',file_name,num,pub_time1)
                                    id_list.append(att_id)

                                    #todo:将返回的地址更新到soup
@@ -516,12 +524,15 @@ def get_content3():
                    or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                    or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                file_name = file.text.strip()
+                category = os.path.splitext(file_href)[1]
+                if category not in file_name:
+                    file_name = file_name + category
                retData = baseCore.uptoOBS(file_href,'1642',file_name)
                if retData['state']:
                    pass
                else:
                    continue
-                att_id,full_path = baseCore.tableUpdate(retData,'国务院国资委',file_name,num)
+                att_id,full_path = baseCore.tableUpdate(retData,'国务院国资委',file_name,num,pub_time)
                id_list.append(att_id)

                #todo:将返回的地址更新到soup
@@ -624,7 +635,7 @@ def get_content3():

 # 北京
 def bei_jing():
-
+    num = 0
    start_time = time.time()
    pathType = 'policy/beijing/'
    # 有反爬需要使用selenium
@@ -637,12 +648,12 @@ def bei_jing():
        "excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    chrome_options.add_argument('lang=zh-CN,zh,zh-TW,en-US,en')
-
+    chrome_options.add_argument('log-level=3')
    chrome_options.add_argument(
        'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36')
    # bro = webdriver.Chrome(chrome_options=chrome_options, executable_path=r'D:/chrome/103/chromedriver.exe')
-    chrome_options.binary_location = r'D:/fbs_spider/Google/Chrome/Application/chrome.exe'
-    chromedriver = r'D:/fbs_spider/cmd100/chromedriver.exe'
+    chrome_options.binary_location = r'D:\Google\Chrome\Application\chrome.exe'
+    chromedriver = r'D:\cmd100\chromedriver.exe'
    bro = webdriver.Chrome(chrome_options=chrome_options, executable_path=chromedriver)
    with open('../../base/stealth.min.js') as f:
        js = f.read()
@@ -732,12 +743,15 @@ def bei_jing():
                        or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                        or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                    file_name = file.text.strip()
-                    retData = baseCore.uptoOBS(file_href, '1667',file_name)
+                    category = os.path.splitext(file_href)[1]
+                    if category not in file_name:
+                        file_name = file_name + category
+                    retData = baseCore.uptoOBS(file_href, '1667',pathType,file_name)
                    if retData['state']:
                        pass
                    else:
                        continue
-                    att_id, full_path = baseCore.tableUpdate(retData, '北京市国资委', file_name, num)
+                    att_id, full_path = baseCore.tableUpdate(retData, '北京市国资委', file_name, num,pub_time)
                    id_list.append(att_id)

                    # todo:将返回的地址更新到soup
@@ -855,14 +869,17 @@ def nei_meng_gu():

                            fu_jian_re = str(real_href).split('/t')[0] + '/' + str(fu_jian_re).split('./')[1]
                            fu_jian_href = fu_jian_re
+                            category = os.path.splitext(fu_jian_href)[1]
+                            if category not in title:
+                                file_name = title + category
                            # print(fu_jian_href)
                            # todo:附件上传至文件服务器
-                            retData = baseCore.uptoOBS(fu_jian_href, '1669',pathType,title)
+                            retData = baseCore.uptoOBS(fu_jian_href, '1669',pathType,file_name)
                            if retData['state']:
                                pass
                            else:
                                continue
-                            att_id, full_path = baseCore.tableUpdate(retData, '内蒙古自治区国资委', title, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '内蒙古自治区国资委', file_name, num,pub_time)
                            id_list.append(att_id)

                log.info(title)
@@ -1002,13 +1019,16 @@ def ji_lin():
                                    or '.rar' in fu_jian_href or '.ppt' in fu_jian_href or '.PDF' in fu_jian_href or '.DOC' in fu_jian_href \
                                    or '.XLS' in fu_jian_href or '.ZIP' in fu_jian_href or '.RAR' in fu_jian_href:
                                file_name = fu_jian_href.text.strip()
+                                category = os.path.splitext(fu_jian_href)[1]
+                                if category not in file_name:
+                                    file_name = file_name + category
                                # print(fu_jian_href)
                                retData = baseCore.uptoOBS(fu_jian_href, '1670',pathType,file_name)
                                if retData['state']:
                                    pass
                                else:
                                    continue
-                                att_id, full_path = baseCore.tableUpdate(retData, '吉林市国资委', file_name, num)
+                                att_id, full_path = baseCore.tableUpdate(retData, '吉林市国资委', file_name, num,pub_time)
                                id_list.append(att_id)
                                #
                                # # todo:将返回的地址更新到soup
@@ -1044,12 +1064,15 @@ def ji_lin():
                                or '.rar' in fj_href or '.ppt' in fj_href or '.PDF' in fj_href or '.DOC' in fj_href \
                                or '.XLS' in fj_href or '.ZIP' in fj_href or '.RAR' in fj_href:
                            # print(fj_href)
+                            category = os.path.splitext(fj_href)[1]
+                            if category not in file_name:
+                                file_name = file_name + category
                            retData = baseCore.uptoOBS(fj_href, '1670',pathType,file_name)
                            if retData['state']:
                                pass
                            else:
                                continue
-                            att_id, full_path = baseCore.tableUpdate(retData, '吉林省国资委', file_name, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '吉林省国资委', file_name, num,pub_time)
                            id_list.append(att_id)
                            #
                            # # todo:将返回的地址更新到soup
@@ -1106,7 +1129,6 @@ def ji_lin():
    print('共', count, '条', '...........', '共耗时', end - start, '秒')

 # 上海
-
 def shang_hai():
    start = time.time()
    pathType = 'policy/shanghai/'
@@ -1196,12 +1218,15 @@ def shang_hai():
                        if '.doc' in fu_jian_href or '.docx' in fu_jian_href or '.pdf' in fu_jian_href or '.xls' in fu_jian_href or '.zip' in fu_jian_href \
                                or '.rar' in fu_jian_href or '.ppt' in fu_jian_href or '.PDF' in fu_jian_href or '.DOC' in fu_jian_href \
                                or '.XLS' in fu_jian_href or '.ZIP' in fu_jian_href or '.RAR' in fu_jian_href:
+                            category = os.path.splitext(fu_jian_href)[1]
+                            if category not in file_name:
+                                file_name = file_name + category
                            retData = baseCore.uptoOBS(fu_jian_href, '1671',pathType,file_name)
                            if retData['state']:
                                pass
                            else:
                                continue
-                            att_id, full_path = baseCore.tableUpdate(retData, '上海市国资委', file_name, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '上海市国资委', file_name, num,pub_time)
                            id_list.append(att_id)

                            # todo:将返回的地址更新到soup
@@ -1434,13 +1459,16 @@ def fu_jian():
                        # 解析出pdf内容
                        content = baseCore.pdf_content(resp_content)
                        contentwithtag = ''
+                        category = os.path.splitext(real_href)[1]
+                        if category not in title:
+                            file_name = title + category
                        # 文件上传至服务器
-                        retData = baseCore.uptoOBS(real_href, '1673',pathType,title)
+                        retData = baseCore.uptoOBS(real_href, '1673',pathType,file_name)
                        if retData['state']:
                            pass
                        else:
                            continue
-                        att_id, full_path = baseCore.tableUpdate(retData, '福建省国资委', title, num)
+                        att_id, full_path = baseCore.tableUpdate(retData, '福建省国资委', file_name, num,'')
                        id_list.append(att_id)
                        pub_hao = ''
                        pub_time = ''
@@ -1454,6 +1482,15 @@ def fu_jian():
                            i_soup = BeautifulSoup(i_html, 'html.parser')
                            # 相对路径转化为绝对路径
                            i_soup = paserUrl(i_soup, real_href)
+                            source_ = str(i_soup.find('div', attrs={'class': 'xl_tit2_l'}).text)
+                            pub_source = source_.split('来源：')[1].split('发布时间：')[0].strip().lstrip()
+                            pub_time = source_.split('发布时间：')[1].split('浏览量：')[0].strip().lstrip()
+                            contentwithtag = i_soup.find('div', attrs={'class': 'xl_con1'})
+                            content = i_soup.find('div', attrs={'class': 'xl_con1'}).text
+                            if content == '' or content == None:
+                                log.info(f'-----{href}----{title}----内容为空-----')
+                                continue
+                            pub_hao = ''
                            # print(real_href)
                            # todo:获取附件地址
                            try:
@@ -1470,6 +1507,9 @@ def fu_jian():
                                if '.doc' in fj_href or '.docx' in fj_href or '.xlsx' in fj_href or '.pdf' in fj_href or '.xls' in fj_href or '.zip' in fj_href \
                                        or '.rar' in fj_href or '.ppt' in fj_href or '.PDF' in fj_href or '.DOC' in fj_href \
                                        or '.XLS' in fj_href or '.ZIP' in fj_href or '.RAR' in fj_href:
+                                    category = os.path.splitext(fj_href)[1]
+                                    if category not in file_name:
+                                        file_name = file_name + category
                                    print(fj_href)
                                    # 找到附件后 上传至文件服务器
                                    retData = baseCore.uptoOBS(fj_href, '1673',pathType,file_name)
@@ -1477,21 +1517,11 @@ def fu_jian():
                                        pass
                                    else:
                                        continue
-                                    att_id, full_path = baseCore.tableUpdate(retData, '福建省国资委', file_name, num)
+                                    att_id, full_path = baseCore.tableUpdate(retData, '福建省国资委', file_name, num,pub_time)
                                    id_list.append(att_id)
                                    # 将文件服务器的链接替换
                                    fu_jian['href'] = full_path

-                            source_ = str(i_soup.find('div', attrs={'class': 'xl_tit2_l'}).text)
-                            pub_source = source_.split('来源：')[1].split('发布时间：')[0].strip().lstrip()
-                            pub_time = source_.split('发布时间：')[1].split('浏览量：')[0].strip().lstrip()
-                            contentwithtag = i_soup.find('div', attrs={'class': 'xl_con1'})
-                            content = i_soup.find('div', attrs={'class': 'xl_con1'}).text
-                            if content == '' or content == None:
-                                log.info(f'-----{href}----{title}----内容为空-----')
-                                continue
-                            pub_hao = ''
-
                        except:
                            pub_source = ''
                            pub_time = ''
@@ -1714,13 +1744,16 @@ def guang_dong():
                        if '.doc' in fj_href or '.docx' in fj_href or '.pdf' in fj_href or '.xls' in fj_href or '.zip' in fj_href \
                                or '.rar' in fj_href or '.ppt' in fj_href or '.PDF' in fj_href or '.DOC' in fj_href \
                                or '.xlsx' in fj_href or '.ZIP' in fj_href or '.RAR' in fj_href:
+                            category = os.path.splitext(fj_href)[1]
+                            if category not in file_name:
+                                file_name = file_name + category
                            # 附件上传至文件服务器
                            retData = baseCore.uptoOBS(fj_href, '1676',pathType,file_name)
                            if retData['state']:
                                pass
                            else:
                                continue
-                            att_id, full_path = baseCore.tableUpdate(retData, '广东省国资委', file_name, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '广东省国资委', file_name, num,pub_time)
                            id_list.append(att_id)
                            # 将文件服务器的链接替换
                            fu_jian['href'] = full_path
@@ -1835,13 +1868,16 @@ def hai_nan():
                                if '.doc' in fu_jian_href or '.pdf' in fu_jian_href or '.docx' in fu_jian_href or '.xlsx' in fu_jian_href or '.xls' in fu_jian_href or '.zip' in fu_jian_href \
                                        or '.rar' in fu_jian_href or '.ppt' in fu_jian_href or '.PDF' in fu_jian_href or '.DOC' in fu_jian_href \
                                        or '.XLS' in fu_jian_href or '.ZIP' in fu_jian_href or '.RAR' in fu_jian_href:
+                                    category = os.path.splitext(fu_jian_href)[1]
+                                    if category not in file_name:
+                                        file_name = file_name + category
                                    # 上传至文件服务器
                                    retData = baseCore.uptoOBS(fu_jian_href, '1677',pathType,file_name)
                                    if retData['state']:
                                        pass
                                    else:
                                        continue
-                                    att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num)
+                                    att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num,pub_time)
                                    id_list.append(att_id)
                                    # 将文件服务器的链接替换
                                    fu_jian['href'] = full_path
@@ -1879,6 +1915,9 @@ def hai_nan():
                                        if '.doc' in fu_jian_href or '.pdf' in fu_jian_href or '.xls' in fu_jian_href or '.zip' in fu_jian_href \
                                                or '.rar' in fu_jian_href or '.ppt' in fu_jian_href or '.PDF' in fu_jian_href or '.DOC' in fu_jian_href \
                                                or '.XLS' in fu_jian_href or '.ZIP' in fu_jian_href or '.RAR' in fu_jian_href:
+                                            category = os.path.splitext(fu_jian_href)[1]
+                                            if category not in file_name:
+                                                file_name = file_name + category
                                            # print(f'----附件：{fu_jian_href}-----filename:{file_name}')
                                            # 附件上传至文件服务器
                                            retData = baseCore.uptoOBS(fu_jian_href, '1677',pathType,file_name)
@@ -1887,7 +1926,7 @@ def hai_nan():
                                            else:
                                                continue
                                            # 更新到数据库
-                                            att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num)
+                                            att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num,pub_time)
                                            id_list.append(att_id)
                                            fu_jian['href'] = full_path
                                except:
@@ -2103,13 +2142,16 @@ def hai_nan():
                        if '.doc' in fu_jian_href or '.docx' in fu_jian_href or '.pdf' in fu_jian_href or '.xls' in fu_jian_href or '.xlsx' in fu_jian_href or '.zip' in fu_jian_href \
                                or '.rar' in fu_jian_href or '.ppt' in fu_jian_href or '.PDF' in fu_jian_href or '.DOC' in fu_jian_href \
                                or '.XLS' in fu_jian_href or '.ZIP' in fu_jian_href or '.RAR' in fu_jian_href:
+                            category = os.path.splitext(fu_jian_href)[1]
+                            if category not in file_name:
+                                file_name = file_name + category
                            # 上传至文件服务器
                            retData = baseCore.uptoOBS(fu_jian_href, '1677',pathType,file_name)
                            if retData['state']:
                                pass
                            else:
                                continue
-                            att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num,pub_time)
                            id_list.append(att_id)
                            # todo:将返回的地址更新到soup
                            fu_jian['href'] = full_path
@@ -2216,13 +2258,16 @@ def hai_nan():
                            if '.doc' in fu_jian_href or '.docx' in fu_jian_href or '.xlsx' in fu_jian_href or '.pdf' in fu_jian_href or '.xls' in fu_jian_href or '.zip' in fu_jian_href \
                                    or '.rar' in fu_jian_href or '.ppt' in fu_jian_href or '.PDF' in fu_jian_href or '.DOC' in fu_jian_href \
                                    or '.XLS' in fu_jian_href or '.ZIP' in fu_jian_href or '.RAR' in fu_jian_href:
+                                category = os.path.splitext(fu_jian_href)[1]
+                                if category not in file_name:
+                                    file_name = file_name + category
                                # 上传至文件服务器
                                retData = baseCore.uptoOBS(fu_jian_href, '1677',pathType,file_name)
                                if retData['state']:
                                    pass
                                else:
                                    continue
-                                att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num)
+                                att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num,pub_time)
                                id_list.append(att_id)
                                fu_jian['href'] = full_path
                                # print(f'----附件：{fu_jian_href}')
@@ -2492,13 +2537,16 @@ def si_chuan():
                        if '.doc' in fu_jian_href or '.docx' in fu_jian_href or '.pdf' in fu_jian_href or '.xls' in fu_jian_href or '.xlsx' in fu_jian_href or '.zip' in fu_jian_href \
                                or '.rar' in fu_jian_href or '.ppt' in fu_jian_href or '.PDF' in fu_jian_href or '.DOC' in fu_jian_href \
                                or '.XLS' in fu_jian_href or '.ZIP' in fu_jian_href or '.RAR' in fu_jian_href:
+                            category = os.path.splitext(fu_jian_href)[1]
+                            if category not in file_name:
+                                file_name = file_name + category
                            # 对附件上传至文件服务器
                            retData = baseCore.uptoOBS(fu_jian_href, '1678',pathType,file_name)
                            if retData['stste']:
                                pass
                            else:
                                continue
-                            att_id, full_path = baseCore.tableUpdate(retData, '四川省国资委', file_name, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '四川省国资委', file_name, num,pub_time)
                            id_list.append(att_id)
                            fu_jian['href'] = full_path

@@ -2622,6 +2670,9 @@ def guang_xi():
                            if '.docx' in fu_jian_href or '.pdf' in fu_jian_href or '.xlsx' in fu_jian_href or '.zip' in fu_jian_href \
                                    or '.rar' in fu_jian_href or '.ppt' in fu_jian_href or '.PDF' in fu_jian_href or '.DOC' in fu_jian_href \
                                    or '.XLS' in fu_jian_href or '.ZIP' in fu_jian_href or '.RAR' in fu_jian_href:
+                                category = os.path.splitext(fu_jian_href)[1]
+                                if category not in file_name:
+                                    file_name = file_name + category
                                # 附件上传至文件服务器
                                retData = baseCore.uptoOBS(fu_jian_href, '1692',pathType,file_name)
                                if retData['state']:
@@ -2629,7 +2680,7 @@ def guang_xi():
                                else:
                                    continue

-                                att_id, full_path = baseCore.tableUpdate(retData, '广西壮族自治区国资委', file_name, num)
+                                att_id, full_path = baseCore.tableUpdate(retData, '广西壮族自治区国资委', file_name, num,pub_time)
                                id_list.append(att_id)
                                # 将附件链接替换
                                fu_jian['href'] = full_path
@@ -2736,6 +2787,9 @@ def gui_zhou():
                        if '.doc' in fu_jian_href or '.pdf' in fu_jian_href or '.xls' in fu_jian_href or '.zip' in fu_jian_href \
                                or '.rar' in fu_jian_href or '.ppt' in fu_jian_href or '.PDF' in fu_jian_href or '.DOC' in fu_jian_href \
                                or '.XLS' in fu_jian_href or '.ZIP' in fu_jian_href or '.RAR' in fu_jian_href:
+                            category = os.path.splitext(fu_jian_href)[1]
+                            if category not in file_name:
+                                file_name = file_name + category
                            # 附件上传至文件服务器
                            retData = baseCore.uptoOBS(fu_jian_href, '1694',pathType,file_name)
                            if retData['state']:
@@ -2743,7 +2797,7 @@ def gui_zhou():
                            else:
                                continue

-                            att_id, full_path = baseCore.tableUpdate(retData, '贵州省国资委', file_name, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '贵州省国资委', file_name, num,pub_time)
                            id_list.append(att_id)
                            # 将附件链接替换
                            fu_jian['href'] = full_path
@@ -2846,6 +2900,9 @@ def yun_nan():
                                        or '.rar' in fu_jian_href or '.ppt' in fu_jian_href or '.PDF' in fu_jian_href or '.DOC' in fu_jian_href \
                                        or '.XLS' in fu_jian_href or '.ZIP' in fu_jian_href or '.RAR' in fu_jian_href:
                                    try:
+                                        category = os.path.splitext(fu_jian_href)[1]
+                                        if category not in file_name:
+                                            file_name = file_name + category
                                        # 附件上传至文件服务器
                                        retData = baseCore.uptoOBS(fu_jian_href, '1679',pathType,file_name)
                                        if retData['state']:
@@ -2853,7 +2910,7 @@ def yun_nan():
                                        else:
                                            continue

-                                        att_id, full_path = baseCore.tableUpdate(retData, '云南省国资委', file_name, num)
+                                        att_id, full_path = baseCore.tableUpdate(retData, '云南省国资委', file_name, num,'')
                                        id_list.append(att_id)
                                        # 将附件链接替换
                                        fu_jian['href'] = full_path
@@ -2964,6 +3021,9 @@ def yun_nan():
                                        or '.XLS' in fu_jian_href or '.ZIP' in fu_jian_href or '.RAR' in fu_jian_href:
                                    # print(fu_jian_href)
                                    try:
+                                        category = os.path.splitext(fu_jian_href)[1]
+                                        if category not in file_name:
+                                            file_name = file_name + category
                                        # 附件上传至文件服务器
                                        retData = baseCore.uptoOBS(fu_jian_href, '1679',pathType,file_name)
                                        if retData['state']:
@@ -2971,7 +3031,7 @@ def yun_nan():
                                        else:
                                            continue

-                                        att_id, full_path = baseCore.tableUpdate(retData, '云南省国资委', file_name, num)
+                                        att_id, full_path = baseCore.tableUpdate(retData, '云南省国资委', file_name, num,pub_time)
                                        id_list.append(att_id)
                                        # 将附件链接替换
                                        fu_jian['href'] = full_path
@@ -3108,6 +3168,9 @@ def chong_qing():
                                    or '.rar' in fu_jian_href or '.ppt' in fu_jian_href or '.PDF' in fu_jian_href or '.DOC' in fu_jian_href \
                                    or '.XLS' in fu_jian_href or '.ZIP' in fu_jian_href or '.RAR' in fu_jian_href:
                                try:
+                                    category = os.path.splitext(fu_jian_href)[1]
+                                    if category not in file_name:
+                                        file_name = file_name + category
                                    # 附件上传至文件服务器
                                    retData = baseCore.uptoOBS(fu_jian_href, '1693',pathType,file_name)
                                    if retData['state']:
@@ -3115,7 +3178,7 @@ def chong_qing():
                                    else:
                                        continue

-                                    att_id, full_path = baseCore.tableUpdate(retData, '重庆市国资委', file_name, num)
+                                    att_id, full_path = baseCore.tableUpdate(retData, '重庆市国资委', file_name, num,pub_time)
                                    id_list.append(att_id)
                                    # 将附件链接替换
                                    fu_jian['href'] = full_path
@@ -3234,12 +3297,15 @@ def tian_jin():
                                    or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                    or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                                file_name = file.text.strip()
-                                retData = baseCore.uptoOBS(file_href, '1683',file_name)
+                                category = os.path.splitext(file_href)[1]
+                                if category not in file_name:
+                                    file_name = file_name + category
+                                retData = baseCore.uptoOBS(file_href, '1683',pathType,file_name)
                                if retData['state']:
                                    pass
                                else:
                                    continue
-                                att_id, full_path = baseCore.tableUpdate(retData, '天津市国资委', file_name, num)
+                                att_id, full_path = baseCore.tableUpdate(retData, '天津市国资委', file_name, num,publishDate)
                                id_list.append(att_id)
                                # todo:将返回的地址更新到soup
                                file['href'] = full_path
@@ -3362,12 +3428,15 @@ def tian_jin():
                                    or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                    or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                                file_name = file.text.strip()
-                                retData = baseCore.uptoOBS(file_href, '1683',file_name)
+                                category = os.path.splitext(file_href)[1]
+                                if category not in file_name:
+                                    file_name = file_name + category
+                                retData = baseCore.uptoOBS(file_href, '1683',pathType,file_name)
                                if retData['state']:
                                    pass
                                else:
                                    continue
-                                att_id, full_path = baseCore.tableUpdate(retData, '天津市国资委', file_name, num)
+                                att_id, full_path = baseCore.tableUpdate(retData, '天津市国资委', file_name, num,publishDate)
                                id_list.append(att_id)
                                # todo:将返回的地址更新到soup
                                file['href'] = full_path
@@ -3494,12 +3563,15 @@ def tian_jin():
                                    or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                    or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                                file_name = file.text.strip()
-                                retData = baseCore.uptoOBS(file_href, '1683',file_name)
+                                category = os.path.splitext(file_href)[1]
+                                if category not in file_name:
+                                    file_name = file_name + category
+                                retData = baseCore.uptoOBS(file_href, '1683',pathType,file_name)
                                if retData['state']:
                                    pass
                                else:
                                    continue
-                                att_id, full_path = baseCore.tableUpdate(retData, '天津市国资委', file_name, num)
+                                att_id, full_path = baseCore.tableUpdate(retData, '天津市国资委', file_name, num,publishDate)
                                id_list.append(att_id)
                                # todo:将返回的地址更新到soup
                                file['href'] = full_path
@@ -3600,12 +3672,15 @@ def xin_jiang():
                                    or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                    or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                                file_name = file.text.strip()
-                                retData = baseCore.uptoOBS(file_href, '1682',file_name)
+                                category = os.path.splitext(file_href)[1]
+                                if category not in file_name:
+                                    file_name = file_name + category
+                                retData = baseCore.uptoOBS(file_href, '1682',pathType,file_name)
                                if retData['state']:
                                    pass
                                else:
                                    continue
-                                att_id, full_path = baseCore.tableUpdate(retData, '新疆维吾尔自治区国资委', file_name, num)
+                                att_id, full_path = baseCore.tableUpdate(retData, '新疆维吾尔自治区国资委', file_name, num,publishDate)
                                id_list.append(att_id)
                                # todo:将返回的地址更新到soup
                                file['href'] = full_path
@@ -3704,12 +3779,15 @@ def xin_jiang():
                                    or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                    or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                                file_name = file.text.strip()
-                                retData = baseCore.uptoOBS(file_href, '1682',file_name)
+                                category = os.path.splitext(file_href)[1]
+                                if category not in file_name:
+                                    file_name = file_name + category
+                                retData = baseCore.uptoOBS(file_href, '1682',pathType,file_name)
                                if retData['state']:
                                    pass
                                else:
                                    continue
-                                att_id, full_path = baseCore.tableUpdate(retData, '新疆维吾尔自治区国资委', file_name, num)
+                                att_id, full_path = baseCore.tableUpdate(retData, '新疆维吾尔自治区国资委', file_name, num,publishDate)
                                id_list.append(att_id)
                                # todo:将返回的地址更新到soup
                                file['href'] = full_path
@@ -3829,12 +3907,15 @@ def shan_xi():
                                or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                            file_name = file.text.strip()
-                            retData = baseCore.uptoOBS(file_href, '1684',file_name)
+                            category = os.path.splitext(file_href)[1]
+                            if category not in file_name:
+                                file_name = file_name + category
+                            retData = baseCore.uptoOBS(file_href, '1684',pathType,file_name)
                            if retData['state']:
                                pass
                            else:
                                continue
-                            att_id, full_path = baseCore.tableUpdate(retData, '山西省国资委', file_name, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '山西省国资委', file_name, num,publishDate)
                            id_list.append(att_id)
                            # todo:将返回的地址更新到soup
                            file['href'] = full_path
@@ -3946,12 +4027,15 @@ def liao_ning():
                                or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                            file_name = file.text.strip()
-                            retData = baseCore.uptoOBS(file_href, '1685',file_name)
+                            category = os.path.splitext(file_href)[1]
+                            if category not in file_name:
+                                file_name = file_name + category
+                            retData = baseCore.uptoOBS(file_href, '1685',pathType,file_name)
                            if retData['state']:
                                pass
                            else:
                                continue
-                            att_id, full_path = baseCore.tableUpdate(retData, '辽宁省国资委', file_name, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '辽宁省国资委', file_name, num,publishDate)
                            id_list.append(att_id)
                            # todo:将返回的地址更新到soup
                            file['href'] = full_path
@@ -4056,12 +4140,15 @@ def hei_long_jiang():
                                    or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                    or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                                file_name = file.text.strip()
-                                retData = baseCore.uptoOBS(file_href, '1687',file_name)
+                                category = os.path.splitext(file_href)[1]
+                                if category not in file_name:
+                                    file_name = file_name + category
+                                retData = baseCore.uptoOBS(file_href, '1687',pathType,file_name)
                                if retData['state']:
                                    pass
                                else:
                                    continue
-                                att_id, full_path = baseCore.tableUpdate(retData, '江苏省国资委', file_name, num)
+                                att_id, full_path = baseCore.tableUpdate(retData, '江苏省国资委', file_name, num,publishDate)
                                id_list.append(att_id)
                                # todo:将返回的地址更新到soup
                                file['href'] = full_path
@@ -4169,12 +4256,15 @@ def jiang_su():
                                or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                            file_name = file.text.strip()
-                            retData = baseCore.uptoOBS(file_href, '1687',file_name)
+                            category = os.path.splitext(file_href)[1]
+                            if category not in file_name:
+                                file_name = file_name + category
+                            retData = baseCore.uptoOBS(file_href, '1687',pathType,file_name)
                            if retData['state']:
                                pass
                            else:
                                continue
-                            att_id, full_path = baseCore.tableUpdate(retData, '江苏省国资委', file_name, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '江苏省国资委', file_name, num,publishDate)
                            id_list.append(att_id)
                            # todo:将返回的地址更新到soup
                            file['href'] = full_path
@@ -4277,12 +4367,15 @@ def an_hui():
                                    or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                    or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                                file_name = file.text.strip()
-                                retData = baseCore.uptoOBS(file_href, '1688',file_name)
+                                category = os.path.splitext(file_href)[1]
+                                if category not in file_name:
+                                    file_name = file_name + category
+                                retData = baseCore.uptoOBS(file_href, '1688',pathType,file_name)
                                if retData['state']:
                                    pass
                                else:
                                    continue
-                                att_id, full_path = baseCore.tableUpdate(retData, '安徽省国资委', file_name, num)
+                                att_id, full_path = baseCore.tableUpdate(retData, '安徽省国资委', file_name, num,publishDate)
                                id_list.append(att_id)
                                # todo:将返回的地址更新到soup
                                file['href'] = full_path
@@ -4378,12 +4471,15 @@ def an_hui():
                                    or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                    or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                                file_name = file.text.strip()
-                                retData = baseCore.uptoOBS(file_href, '1688',file_name)
+                                category = os.path.splitext(file_href)[1]
+                                if category not in file_name:
+                                    file_name = file_name + category
+                                retData = baseCore.uptoOBS(file_href, '1688',pathType,file_name)
                                if retData['state']:
                                    pass
                                else:
                                    continue
-                                att_id, full_path = baseCore.tableUpdate(retData, '安徽省国资委', file_name, num)
+                                att_id, full_path = baseCore.tableUpdate(retData, '安徽省国资委', file_name, num,publishDate)
                                id_list.append(att_id)
                                # todo:将返回的地址更新到soup
                                file['href'] = full_path
@@ -4510,12 +4606,15 @@ def jiang_xi():
                                or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                            file_name = file.text.strip()
-                            retData = baseCore.uptoOBS(file_href, '1689',file_name)
+                            category = os.path.splitext(file_href)[1]
+                            if category not in file_name:
+                                file_name = file_name + category
+                            retData = baseCore.uptoOBS(file_href, '1689',pathType,file_name)
                            if retData['state']:
                                pass
                            else:
                                continue
-                            att_id, full_path = baseCore.tableUpdate(retData, '江西省国资委', file_name, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '江西省国资委', file_name, num,writtenDate)
                            id_list.append(att_id)
                            # todo:将返回的地址更新到soup
                            file['href'] = full_path
@@ -4611,12 +4710,15 @@ def he_nan():
                            or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                            or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                        file_name = file.text.strip()
-                        retData = baseCore.uptoOBS(file_href, '1690',file_name)
+                        category = os.path.splitext(file_href)[1]
+                        if category not in file_name:
+                            file_name = file_name + category
+                        retData = baseCore.uptoOBS(file_href, '1690',pathType,file_name)
                        if retData['state']:
                            pass
                        else:
                            continue
-                        att_id, full_path = baseCore.tableUpdate(retData, '河南省国资委', file_name, num)
+                        att_id, full_path = baseCore.tableUpdate(retData, '河南省国资委', file_name, num,publishDate)
                        id_list.append(att_id)
                        # todo:将返回的地址更新到soup
                        file['href'] =  full_path
@@ -4725,12 +4827,15 @@ def hu_nan():
                                or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                            file_name = file.text.strip()
-                            retData = baseCore.uptoOBS(file_href, '1691',file_name)
+                            category = os.path.splitext(file_href)[1]
+                            if category not in file_name:
+                                file_name = file_name + category
+                            retData = baseCore.uptoOBS(file_href, '1691',pathType,file_name)
                            if retData['state']:
                                pass
                            else:
                                continue
-                            att_id, full_path = baseCore.tableUpdate(retData, '湖南省国资委', file_name, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '湖南省国资委', file_name, num,publishDate)
                            id_list.append(att_id)
                            # todo:将返回的地址更新到soup
                            file['href'] = full_path
@@ -4857,23 +4962,26 @@ def gan_su():
                                or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                            file_name = file.text.strip()
+                            category = os.path.splitext(file_href)[1]
+                            if category not in file_name:
+                                file_name = file_name + category
                            retData = baseCore.uptoOBS(file_href, '1696',file_name)
                            if retData['state']:
                                pass
                            else:
                                continue
-                            att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num,publishDate)
                            id_list.append(att_id)
                            # todo:将返回的地址更新到soup
                            file['href'] =  full_path
-                    id_ = redefid(id_list)
+                    # id_ = redefid(id_list)
                    contentWithTag = str(soup.prettify())
                    content = soup.text
                    if content == '' or content == None:
                        log.info(f'-----{href}----{title}----内容为空-----')
                        continue
-                    t = time.strptime(publishDate, "%Y年%m月%d日")
-                    publishDate = time.strftime("%Y-%m-%d %H:%M:%S", t)
+                    # t = time.strptime(publishDate, "%Y年%m月%d日")
+                    # publishDate = time.strftime("%Y-%m-%d %H:%M:%S", t)
                    time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                    # todo:传kafka字段
                    dic_news = {
@@ -5010,13 +5118,16 @@ def gan_su():
                                or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                            file_name = file.text.strip()
+                            category = os.path.splitext(file_href)[1]
+                            if category not in file_name:
+                                file_name = file_name + category
                            log.info(f'{file_name}---{href}--')
                            retData = baseCore.uptoOBS(file_href, '1696',file_name)
                            if retData['state']:
                                pass
                            else:
                                continue
-                            att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num,publishDate)
                            id_list.append(att_id)
                            # todo:将返回的地址更新到soup
                            file['href'] = full_path
@@ -5028,8 +5139,8 @@ def gan_su():
                        continue
                    if len(content) < 2:
                        continue
-                    t = time.strptime(publishDate, "%Y年%m月%d日")
-                    publishDate = time.strftime("%Y-%m-%d %H:%M:%S", t)
+                    # t = time.strptime(publishDate, "%Y年%m月%d日")
+                    # publishDate = time.strftime("%Y-%m-%d %H:%M:%S", t)
                    time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                    # todo:传kafka字段
                    dic_news = {
@@ -5176,12 +5287,15 @@ def gan_su():
                                or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                            file_name = file.text.strip()
+                            category = os.path.splitext(file_href)[1]
+                            if category not in file_name:
+                                file_name = file_name + category
                            retData = baseCore.uptoOBS(file_href, '1696',file_name)
                            if retData['state']:
                                pass
                            else:
                                continue
-                            att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num,publishDate)
                            id_list.append(att_id)
                            # todo:将返回的地址更新到soup
                            file['href'] = full_path
@@ -5286,12 +5400,15 @@ def ning_xia():
                                or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                            file_name = file.text.strip()
-                            retData = baseCore.uptoOBS(file_href, '1697',file_name)
+                            category = os.path.splitext(file_href)[1]
+                            if category not in file_name:
+                                file_name = file_name + category
+                            retData = baseCore.uptoOBS(file_href, '1697',pathType,file_name)
                            if retData['state']:
                                pass
                            else:
                                continue
-                            att_id, full_path = baseCore.tableUpdate(retData, '宁夏回族自治区国资委', file_name, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '宁夏回族自治区国资委', file_name, num,publishDate)
                            id_list.append(att_id)
                            # todo:将返回的地址更新到soup
                            file['href'] = full_path
@@ -5393,12 +5510,15 @@ def shanxi():
                            or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                            or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                        file_name = file.text.strip()
-                        retData = baseCore.uptoOBS(file_href, '1680',file_name)
+                        category = os.path.splitext(file_href)[1]
+                        if category not in file_name:
+                            file_name = file_name + category
+                        retData = baseCore.uptoOBS(file_href, '1680',pathType,file_name)
                        if retData['state']:
                            pass
                        else:
                            continue
-                        att_id, full_path = baseCore.tableUpdate(retData, '陕西省国资委', file_name, num)
+                        att_id, full_path = baseCore.tableUpdate(retData, '陕西省国资委', file_name, num,publishDate)
                        id_list.append(att_id)
                        # todo:将返回的地址更新到soup
                        file['href'] = full_path
@@ -5496,12 +5616,15 @@ def xi_zang():
                                or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                            file_name = file.text.strip()
-                            retData = baseCore.uptoOBS(file_href, '1695',file_name)
+                            category = os.path.splitext(file_href)[1]
+                            if category not in file_name:
+                                file_name = file_name + category
+                            retData = baseCore.uptoOBS(file_href, '1695',pathType,file_name)
                            if retData['state']:
                                pass
                            else:
                                continue
-                            att_id, full_path = baseCore.tableUpdate(retData, '西藏自治区国资委', file_name, num)
+                            att_id, full_path = baseCore.tableUpdate(retData, '西藏自治区国资委', file_name, num,publishDate)
                            id_list.append(att_id)
                            # todo:将返回的地址更新到soup
                            file['href'] = full_path
@@ -5598,12 +5721,15 @@ def qing_hai():
                                    or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                    or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                                file_name = file.text.strip()
-                                retData = baseCore.uptoOBS(file_href, '1681',file_name)
+                                category = os.path.splitext(file_href)[1]
+                                if category not in file_name:
+                                    file_name = file_name + category
+                                retData = baseCore.uptoOBS(file_href, '1681',pathType,file_name)
                                if retData['state']:
                                    pass
                                else:
                                    continue
-                                att_id, full_path = baseCore.tableUpdate(retData, '青海省国资委', file_name, num)
+                                att_id, full_path = baseCore.tableUpdate(retData, '青海省国资委', file_name, num,publishDate)
                                id_list.append(att_id)
                                # todo:将返回的地址更新到soup
                                file['href'] = full_path
@@ -5722,12 +5848,15 @@ def qing_hai():
                                            or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                                            or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                                        file_name = file.text.strip()
-                                        retData = baseCore.uptoOBS(file_href, '1681',file_name)
+                                        category = os.path.splitext(file_href)[1]
+                                        if category not in file_name:
+                                            file_name = file_name + category
+                                        retData = baseCore.uptoOBS(file_href, '1681',pathType,file_name)
                                        if retData['state']:
                                            pass
                                        else:
                                            continue
-                                        att_id, full_path = baseCore.tableUpdate(retData, '青海省国资委', file_name, num)
+                                        att_id, full_path = baseCore.tableUpdate(retData, '青海省国资委', file_name, num,publishDate)
                                        id_list.append(att_id)
                                        # todo:将返回的地址更新到soup
                                        file['href'] = full_path
@@ -5813,12 +5942,15 @@ def he_bei():
                        or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                        or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                    file_name = file.text.strip()
-                    retData = baseCore.uptoOBS(file_href, '1668',file_name)
+                    category = os.path.splitext(file_href)[1]
+                    if category not in file_name:
+                        file_name = file_name + category
+                    retData = baseCore.uptoOBS(file_href, '1668',pathType,file_name)
                    if retData['state']:
                        pass
                    else:
                        continue
-                    att_id, full_path = baseCore.tableUpdate(retData, '河北省国资委', file_name, num)
+                    att_id, full_path = baseCore.tableUpdate(retData, '河北省国资委', file_name, num,publishDate)
                    id_list.append(att_id)
                    # todo:将返回的地址更新到soup
                    file['href'] = full_path
@@ -5935,12 +6067,15 @@ def hu_bei():
                        or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
                        or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
                    file_name = file.text.strip()
-                    retData = baseCore.uptoOBS(file_href, '1675',file_name)
+                    category = os.path.splitext(file_href)[1]
+                    if category not in file_name:
+                        file_name = file_name + category
+                    retData = baseCore.uptoOBS(file_href, '1675',pathType,file_name)
                    if retData['state']:
                        pass
                    else:
                        continue
-                    att_id, full_path = baseCore.tableUpdate(retData, '湖北省国资委', file_name, num)
+                    att_id, full_path = baseCore.tableUpdate(retData, '湖北省国资委', file_name, num,publishDate)
                    id_list.append(att_id)
                    # todo:将返回的地址更新到soup
                    file['href'] = full_path