提交 89859afd 作者: 薛凌堃

政策法规脚本调整

上级 59beffab
...@@ -38,8 +38,8 @@ taskType = '政策法规' ...@@ -38,8 +38,8 @@ taskType = '政策法规'
db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017', username='admin', password='zzsn@9988').caiji[ db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017', username='admin', password='zzsn@9988').caiji[
'国务院_国资委_copy1'] '国务院_国资委_copy1']
driver_path = r'D:\fbs_spider\cmd100\chromedriver.exe' driver_path = r'D:\cmd100\chromedriver.exe'
chromr_bin = r'D:\fbs_spider\Google\Chrome\Application\chrome.exe' chromr_bin = r'D:\Google\Chrome\Application\chrome.exe'
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
...@@ -258,7 +258,7 @@ def get_content1(): ...@@ -258,7 +258,7 @@ def get_content1():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href,'1766',pathType,file_name) retData = baseCore.uptoOBS(file_href,'1766',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -414,7 +414,7 @@ def get_content2(): ...@@ -414,7 +414,7 @@ def get_content2():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href,'1699',pathType,file_name) retData = baseCore.uptoOBS(file_href,'1699',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -522,7 +522,7 @@ def get_content3(): ...@@ -522,7 +522,7 @@ def get_content3():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href,'1642',pathType,file_name) retData = baseCore.uptoOBS(file_href,'1642',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -738,7 +738,7 @@ def bei_jing(): ...@@ -738,7 +738,7 @@ def bei_jing():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1667',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1667',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -3240,7 +3240,7 @@ def tian_jin(): ...@@ -3240,7 +3240,7 @@ def tian_jin():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1683',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1683',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -3368,7 +3368,7 @@ def tian_jin(): ...@@ -3368,7 +3368,7 @@ def tian_jin():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1683',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1683',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -3500,7 +3500,7 @@ def tian_jin(): ...@@ -3500,7 +3500,7 @@ def tian_jin():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1683',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1683',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -3606,7 +3606,7 @@ def xin_jiang(): ...@@ -3606,7 +3606,7 @@ def xin_jiang():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1682',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1682',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -3710,7 +3710,7 @@ def xin_jiang(): ...@@ -3710,7 +3710,7 @@ def xin_jiang():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1682',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1682',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -3835,7 +3835,7 @@ def shan_xi(): ...@@ -3835,7 +3835,7 @@ def shan_xi():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1684',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1684',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -3952,7 +3952,7 @@ def liao_ning(): ...@@ -3952,7 +3952,7 @@ def liao_ning():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1685',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1685',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -4062,7 +4062,7 @@ def hei_long_jiang(): ...@@ -4062,7 +4062,7 @@ def hei_long_jiang():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1687',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1687',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -4175,7 +4175,7 @@ def jiang_su(): ...@@ -4175,7 +4175,7 @@ def jiang_su():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1687',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1687',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -4283,7 +4283,7 @@ def an_hui(): ...@@ -4283,7 +4283,7 @@ def an_hui():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1688',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1688',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -4384,7 +4384,7 @@ def an_hui(): ...@@ -4384,7 +4384,7 @@ def an_hui():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1688',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1688',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -4516,7 +4516,7 @@ def jiang_xi(): ...@@ -4516,7 +4516,7 @@ def jiang_xi():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1689',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1689',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -4617,7 +4617,7 @@ def he_nan(): ...@@ -4617,7 +4617,7 @@ def he_nan():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1690',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1690',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -4731,7 +4731,7 @@ def hu_nan(): ...@@ -4731,7 +4731,7 @@ def hu_nan():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1691',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1691',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -4857,7 +4857,7 @@ def gan_su(): ...@@ -4857,7 +4857,7 @@ def gan_su():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1696',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1696',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -4940,9 +4940,9 @@ def gan_su(): ...@@ -4940,9 +4940,9 @@ def gan_su():
href = dd['href'] href = dd['href']
publishDate = dd['publishDate'] publishDate = dd['publishDate']
is_href = db_storage.find_one({'网址': href}) is_href = db_storage.find_one({'网址': href})
if is_href: # if is_href:
num+=1 # num+=1
continue # continue
bro.get(href) bro.get(href)
try: try:
alls = bro.find_element(By.CLASS_NAME, 'alls').text alls = bro.find_element(By.CLASS_NAME, 'alls').text
...@@ -4997,15 +4997,16 @@ def gan_su(): ...@@ -4997,15 +4997,16 @@ def gan_su():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1696',pathType,file_name) log.info(f'{file_name}---{href}--')
if retData['state']: # retData = baseCore.uptoOBS(file_href, '1696',file_name)
pass # if retData['state']:
else: # pass
continue # else:
att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num) # continue
id_list.append(att_id) # att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num)
# todo:将返回的地址更新到soup # id_list.append(att_id)
file['href'] = full_path # # todo:将返回的地址更新到soup
# file['href'] = full_path
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
content = soup.text content = soup.text
...@@ -5018,30 +5019,30 @@ def gan_su(): ...@@ -5018,30 +5019,30 @@ def gan_su():
# publishDate = time.strftime("%Y-%m-%d %H:%M:%S", t) # publishDate = time.strftime("%Y-%m-%d %H:%M:%S", t)
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# todo:传kafka字段 # todo:传kafka字段
dic_news = { # dic_news = {
'attachmentIds': id_list, # 'attachmentIds': id_list,
'author': '', # 'author': '',
'content': str(content), # 'content': str(content),
'contentWithTag': str(contentWithTag), # 'contentWithTag': str(contentWithTag),
'createDate': time_now, # 'createDate': time_now,
'deleteFlag': 0, # 'deleteFlag': 0,
'id': '', # 'id': '',
'labels': [{'relationId': "1696", 'relationName': "甘肃省国资委", 'labelMark': "policy"}], # 'labels': [{'relationId': "1696", 'relationName': "甘肃省国资委", 'labelMark': "policy"}],
'origin': origin, # 'origin': origin,
'organ': organ, # 'organ': organ,
'topicClassification': topicClassification, # 'topicClassification': topicClassification,
'issuedNumber': pub_hao, # 'issuedNumber': pub_hao,
'publishDate': publishDate, # 'publishDate': publishDate,
'writtenDate': writtenDate, # 'writtenDate': writtenDate,
'sid': '1697458829758697473', # 'sid': '1697458829758697473',
'sourceAddress': href, # 'sourceAddress': href,
'summary': '', # 'summary': '',
'title': title # 'title': title
} # }
# print(dic_news) # # print(dic_news)
flag = sendKafka(dic_news) # flag = sendKafka(dic_news)
if flag: # if flag:
save_data(dic_news) # save_data(dic_news)
num += 1 num += 1
count += 1 count += 1
except Exception as e: except Exception as e:
...@@ -5160,7 +5161,7 @@ def gan_su(): ...@@ -5160,7 +5161,7 @@ def gan_su():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1696',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1696',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -5215,9 +5216,9 @@ def gan_su(): ...@@ -5215,9 +5216,9 @@ def gan_su():
end_time = time.time() end_time = time.time()
print(f'共抓取{count}条数据,共耗时{end_time - start_time}') print(f'共抓取{count}条数据,共耗时{end_time - start_time}')
gan_su1() # gan_su1()
gan_su2() gan_su2()
gan_su3() # gan_su3()
# 宁夏 # 宁夏
def ning_xia(): def ning_xia():
...@@ -5268,7 +5269,7 @@ def ning_xia(): ...@@ -5268,7 +5269,7 @@ def ning_xia():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1697',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1697',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -5375,7 +5376,7 @@ def shanxi(): ...@@ -5375,7 +5376,7 @@ def shanxi():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1680',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1680',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -5478,7 +5479,7 @@ def xi_zang(): ...@@ -5478,7 +5479,7 @@ def xi_zang():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1695',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1695',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -5580,7 +5581,7 @@ def qing_hai(): ...@@ -5580,7 +5581,7 @@ def qing_hai():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1681',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1681',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -5704,7 +5705,7 @@ def qing_hai(): ...@@ -5704,7 +5705,7 @@ def qing_hai():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1681',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1681',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -5795,7 +5796,7 @@ def he_bei(): ...@@ -5795,7 +5796,7 @@ def he_bei():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1668',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1668',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -5917,7 +5918,7 @@ def hu_bei(): ...@@ -5917,7 +5918,7 @@ def hu_bei():
or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \ or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href: or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href:
file_name = file.text.strip() file_name = file.text.strip()
retData = baseCore.uptoOBS(file_href, '1675',pathType,file_name) retData = baseCore.uptoOBS(file_href, '1675',file_name)
if retData['state']: if retData['state']:
pass pass
else: else:
...@@ -5970,41 +5971,41 @@ def hu_bei(): ...@@ -5970,41 +5971,41 @@ def hu_bei():
print(f'共抓取{count}条数据,共耗时{end_time - start_time}') print(f'共抓取{count}条数据,共耗时{end_time - start_time}')
if __name__ == '__main__': if __name__ == '__main__':
get_content1() # get_content1()
get_content2() # get_content2()
get_content3() # get_content3()
bei_jing() # bei_jing()
nei_meng_gu() # nei_meng_gu()
ji_lin() # ji_lin()
shang_hai() # shang_hai()
zhe_jiang() # zhe_jiang()
fu_jian() # fu_jian()
shan_dong() # shan_dong()
guang_dong() # guang_dong()
hai_nan() # hai_nan()
si_chuan() # si_chuan()
guang_xi() # guang_xi()
gui_zhou() # gui_zhou()
yun_nan() # yun_nan()
chong_qing() # chong_qing()
tian_jin() # tian_jin()
xin_jiang() # xin_jiang()
shan_xi() # shan_xi()
liao_ning() # liao_ning()
hei_long_jiang() # hei_long_jiang()
jiang_su() # jiang_su()
an_hui() # an_hui()
jiang_xi() # jiang_xi()
he_nan() # he_nan()
hu_nan() # hu_nan()
gan_su() gan_su()
ning_xia() # ning_xia()
xi_zang() # xi_zang()
shanxi() # shanxi()
qing_hai() # qing_hai()
he_bei() # he_bei()
qing_hai() # qing_hai()
current_time = datetime.datetime.now() # current_time = datetime.datetime.now()
midnight_time = current_time.replace(hour=0, minute=0, second=0, microsecond=0) + datetime.timedelta(days=1) # midnight_time = current_time.replace(hour=0, minute=0, second=0, microsecond=0) + datetime.timedelta(days=1)
sleep_seconds = (midnight_time - current_time).total_seconds() # sleep_seconds = (midnight_time - current_time).total_seconds()
time.sleep(sleep_seconds) # time.sleep(sleep_seconds)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论