提交 6c7e4c57 作者: XveLingKun

政策法规--域名修改

上级 6ce462aa
......@@ -133,7 +133,7 @@ def bei_jing():
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list)
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
......
......@@ -111,7 +111,7 @@ def chong_qing():
pub_time)
id_list.append(att_id)
# 将附件链接替换
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
except:
continue
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
......
......@@ -136,7 +136,7 @@ def fu_jian():
pub_time)
id_list.append(att_id)
# 将文件服务器的链接替换
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
except:
pub_source = ''
......
......@@ -108,7 +108,7 @@ def gan_su():
att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list)
contentWithTag = str(soup.prettify())
content = soup.text
......@@ -265,7 +265,7 @@ def gan_su():
att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
contentWithTag = str(soup.prettify())
content = soup.text
......@@ -436,7 +436,7 @@ def gan_su():
att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
contentWithTag = str(soup.prettify())
content = soup.text
......
......@@ -83,7 +83,7 @@ def guang_dong():
att_id, full_path = baseCore.tableUpdate(retData, '广东省国资委', file_name, num, pub_time)
id_list.append(att_id)
# 将文件服务器的链接替换
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# todo:传kafka字段
......
......@@ -109,7 +109,7 @@ def guang_xi():
pub_time)
id_list.append(att_id)
# 将附件链接替换
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# todo:传kafka字段
......
......@@ -92,7 +92,7 @@ def gui_zhou():
att_id, full_path = baseCore.tableUpdate(retData, '贵州省国资委', file_name, num, pub_time)
id_list.append(att_id)
# 将附件链接替换
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# todo:传kafka字段
......
......@@ -152,7 +152,7 @@ def get_content1():
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
except:
log.error(f'{title}...{href}...获取内容失败')
continue
......
......@@ -135,7 +135,7 @@ def get_content2():
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
except:
log.error(f'{title}...{href}获取内容失败')
continue
......
......@@ -90,7 +90,7 @@ def get_content3():
continue
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# todo:传kafka字段
dic_news = {
......
......@@ -108,7 +108,7 @@ def hai_nan():
pub_time)
id_list.append(att_id)
# 将文件服务器的链接替换
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
except:
try:
# print(href)
......@@ -157,7 +157,7 @@ def hai_nan():
att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num,
pub_time)
id_list.append(att_id)
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
except:
continue
......@@ -397,7 +397,7 @@ def hai_nan():
att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num, pub_time)
id_list.append(att_id)
# todo:将返回的地址更新到soup
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# print(f'附件:{fu_jian_href}')
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# todo:传kafka字段
......@@ -519,7 +519,7 @@ def hai_nan():
continue
att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num, pub_time)
id_list.append(att_id)
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# print(f'----附件:{fu_jian_href}')
else:
pass
......
......@@ -61,7 +61,7 @@ def he_bei():
att_id, full_path = baseCore.tableUpdate(retData, '河北省国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list)
contentWithTag = str(soup.prettify())
if len(contentWithTag) < 1:
......
......@@ -67,7 +67,7 @@ def he_nan():
att_id, full_path = baseCore.tableUpdate(retData, '河南省国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
contentWithTag = str(soup.prettify())
content = soup.text
......
......@@ -74,7 +74,7 @@ def hei_long_jiang():
att_id, full_path = baseCore.tableUpdate(retData, '江苏省国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
contentWithTag = str(soup.prettify())
content = soup.text
......
......@@ -87,7 +87,7 @@ def hu_bei(chromr_bin=None):
att_id, full_path = baseCore.tableUpdate(retData, '湖北省国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list)
contentWithTag = str(soup.prettify())
if len(contentWithTag) < 1:
......
......@@ -82,7 +82,7 @@ def hu_nan():
att_id, full_path = baseCore.tableUpdate(retData, '湖南省国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
contentWithTag = str(soup.prettify())
content = soup.text
......
......@@ -124,7 +124,7 @@ def ji_lin():
id_list.append(att_id)
#
# # todo:将返回的地址更新到soup
li.find('a')['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
li.find('a')['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
else:
continue
else:
......@@ -168,7 +168,7 @@ def ji_lin():
# id_list.append(att_id)
# #
# # # todo:将返回的地址更新到soup
# fu_jian_href['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
# fu_jian_href['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# else:
# continue
......
......@@ -85,7 +85,7 @@ def jiang_su():
att_id, full_path = baseCore.tableUpdate(retData, '江苏省国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
contentWithTag = str(soup.prettify())
content = soup.text
......
......@@ -101,7 +101,7 @@ def jiang_xi():
att_id, full_path = baseCore.tableUpdate(retData, '江西省国资委', file_name, num, writtenDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
contentWithTag = str(soup.prettify())
content = soup.text
......
......@@ -80,7 +80,7 @@ def liao_ning():
att_id, full_path = baseCore.tableUpdate(retData, '辽宁省国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list)
contentWithTag = str(soup.prettify())
if len(contentWithTag) < 1:
......
......@@ -73,7 +73,7 @@ def ning_xia():
att_id, full_path = baseCore.tableUpdate(retData, '宁夏回族自治区国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list)
contentWithTag = str(soup.prettify())
content = soup.text
......
......@@ -78,7 +78,7 @@ def qing_hai():
att_id, full_path = baseCore.tableUpdate(retData, '青海省国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list)
contentWithTag = str(soup.prettify())
# todo:替换完成之后,将附件上传至文件服务器
......@@ -205,7 +205,7 @@ def qing_hai():
publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list)
contentWithTag = str(soup.prettify())
# todo:替换完成之后,将附件上传至文件服务器
......
......@@ -88,7 +88,7 @@ def shan_xi():
att_id, full_path = baseCore.tableUpdate(retData, '山西省国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list)
contentWithTag = str(soup.prettify())
if len(contentWithTag) < 1:
......
......@@ -114,7 +114,7 @@ def shang_hai():
id_list.append(att_id)
# todo:将返回的地址更新到soup
a['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
a['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
else:
continue
......
......@@ -78,7 +78,7 @@ def shanxi():
att_id, full_path = baseCore.tableUpdate(retData, '陕西省国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list)
contentWithTag = str(soup.prettify())
content = soup.text
......
......@@ -79,7 +79,7 @@ def si_chuan():
continue
att_id, full_path = baseCore.tableUpdate(retData, '四川省国资委', file_name, num, pub_time)
id_list.append(att_id)
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# fu_jian_href_list.append(fu_jian_href)
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
......
......@@ -106,7 +106,7 @@ def tian_jin():
att_id, full_path = baseCore.tableUpdate(retData, '天津市国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list)
contentWithTag = str(soup.prettify())
......@@ -241,7 +241,7 @@ def tian_jin():
att_id, full_path = baseCore.tableUpdate(retData, '天津市国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list)
if id_list:
pass
......@@ -390,7 +390,7 @@ def tian_jin():
att_id, full_path = baseCore.tableUpdate(retData, '天津市国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list)
contentWithTag = str(soup.prettify())
if len(contentWithTag) < 1:
......
......@@ -75,7 +75,7 @@ def xi_zang():
att_id, full_path = baseCore.tableUpdate(retData, '西藏自治区国资委', file_name, num, publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list)
contentWithTag = str(soup.prettify())
# todo:替换完成之后,将附件上传至文件服务器
......
......@@ -75,7 +75,7 @@ def xin_jiang():
publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list)
contentWithTag = str(soup.prettify())
if len(contentWithTag) < 1:
......@@ -183,7 +183,7 @@ def xin_jiang():
publishDate)
id_list.append(att_id)
# todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list)
contentWithTag = str(soup.prettify())
if len(contentWithTag) < 1:
......
......@@ -89,7 +89,7 @@ def yun_nan():
att_id, full_path = baseCore.tableUpdate(retData, '云南省国资委', file_name, num, '')
id_list.append(att_id)
# 将附件链接替换
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
except:
continue
href_resp.close()
......@@ -211,7 +211,7 @@ def yun_nan():
pub_time)
id_list.append(att_id)
# 将附件链接替换
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path)
fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
except:
continue
res_.close()
......
......@@ -305,8 +305,7 @@ class GoogleSpider(object):
self.driver.find_element('xpath', '//div[@id="hdtb-tls"]').click()
time.sleep(2)
# self.driver.find_element('xpath', '//div[@class="hdtb-mn-hd"]/div[text()="按相关性排序"]').click()
self.driver.find_element('xpath',
'//*[@id="tn_1"]/span[3]/g-popup/div[1]/div/div/div[text()="按相关性排序"]').click()
self.driver.find_element('xpath', '//*[@id="tn_1"]/span[3]/g-popup/div[1]/div/div/div[text()="按相关性排序"]').click()
time.sleep(2)
# self.driver.find_element('xpath', '//div[@class="YpcDnf OSrXXb HG1dvd"]/a[text()="按日期排序"]').click()
self.driver.find_element('xpath', '//*[@id="lb"]/div/g-menu/g-menu-item[2]/div/a[text()="按日期排序"]').click()
......
......@@ -145,7 +145,7 @@ def translate(title, contentWithTag):
'contentWithTag': contentWithTag
}
dic_info = json.dumps(dic_info)
req = requests.post('http://117.78.23.14:5000/translate', data=dic_info, headers=headers)
req = requests.post('http://117.78.23.14:5001/translate', data=dic_info, headers=headers)
if req.status_code == '200':
pass
else:
......@@ -180,10 +180,10 @@ def doJob():
publishDate = publishDate.strftime("%Y-%m-%d %H:%M:%S")
if publishDate < '2023-01-20':
continue
is_href = db_storage.find_one({'网址': href})
if is_href:
log.info(f'{href}===已采集')
continue
# is_href = db_storage.find_one({'网址': href})
# if is_href:
# log.info(f'{href}===已采集')
# continue
div.find_all('div')[0].extract()
div.find('span', class_='Z3988').extract()
contentWithTag = div
......
......@@ -452,7 +452,7 @@ def aaaaa(final_output):
print(finall_list)
if __name__ == '__main__':
same_list = ['让我们从一次时光旅行', '开启植物天堂的故事', '地球的午夜', '是在火山喷发中度过的', '到了凌晨三四点', '在海洋深处有了生命的迹象', '清晨6点多', '更加壮丽的生命乐章开始了', '更加壮丽的生命乐草开始了', '更加壮丽的生命乐章开始了', '更加壮丽的生命乐草开始了', '更加壮丽的生命乐章开始了', '种蓝藻细菌', '一种蓝藻细菌', '学会利用二氧化碳水和阳光', '制造生命所需能量', '同时释放出了氧气', '这个被称为光合作用的过程', '为植物世界打开了大门', '此时', '中国的陆地', '也逐渐从海洋露出形成岛屿', '但在相当长的时间里', '陆地十分荒凉没有生机', '这些岩石坚硬', '无法储存水分', '是当时陆地环境的写照', '直到晚上九点多', '也就是四亿年前左右', '些矮小的生命', '开始征服陆地', '她们用一种近似于根的构造', '固定在岩石上', '苔藓', '是陆地最早的拓荒者之', '小', '她们死后的身体', '形成了肥沃的土壤', '让更多的植物可以在这里生存', '从此', '绿色成为植物天堂的底色']
# same_list = ['让我们从一次时光旅行', '开启植物天堂的故事', '地球的午夜', '是在火山喷发中度过的', '到了凌晨三四点', '在海洋深处有了生命的迹象', '清晨6点多', '更加壮丽的生命乐章开始了', '更加壮丽的生命乐草开始了', '更加壮丽的生命乐章开始了', '更加壮丽的生命乐草开始了', '更加壮丽的生命乐章开始了', '种蓝藻细菌', '一种蓝藻细菌', '学会利用二氧化碳水和阳光', '制造生命所需能量', '同时释放出了氧气', '这个被称为光合作用的过程', '为植物世界打开了大门', '此时', '中国的陆地', '也逐渐从海洋露出形成岛屿', '但在相当长的时间里', '陆地十分荒凉没有生机', '这些岩石坚硬', '无法储存水分', '是当时陆地环境的写照', '直到晚上九点多', '也就是四亿年前左右', '些矮小的生命', '开始征服陆地', '她们用一种近似于根的构造', '固定在岩石上', '苔藓', '是陆地最早的拓荒者之', '小', '她们死后的身体', '形成了肥沃的土壤', '让更多的植物可以在这里生存', '从此', '绿色成为植物天堂的底色']
# aaa = aaaaa(same_list)
......@@ -461,4 +461,12 @@ if __name__ == '__main__':
# for i in range(len(same_list)):
# print(i, same_list[i])
#
isHandleSuccess, handleMsg = True, "success"
for i in range(3):
if i <= 3:
HandleSuccess, handleMsg = True, "success"
else:
HandleSuccess, handleMsg = False, "error"
print(i, HandleSuccess, handleMsg)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论