提交 6c7e4c57 作者: XveLingKun

政策法规--域名修改

上级 6ce462aa
...@@ -133,7 +133,7 @@ def bei_jing(): ...@@ -133,7 +133,7 @@ def bei_jing():
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list) # id_ = redefid(id_list)
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
......
...@@ -111,7 +111,7 @@ def chong_qing(): ...@@ -111,7 +111,7 @@ def chong_qing():
pub_time) pub_time)
id_list.append(att_id) id_list.append(att_id)
# 将附件链接替换 # 将附件链接替换
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path) fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
except: except:
continue continue
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
......
...@@ -136,7 +136,7 @@ def fu_jian(): ...@@ -136,7 +136,7 @@ def fu_jian():
pub_time) pub_time)
id_list.append(att_id) id_list.append(att_id)
# 将文件服务器的链接替换 # 将文件服务器的链接替换
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path) fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
except: except:
pub_source = '' pub_source = ''
......
...@@ -108,7 +108,7 @@ def gan_su(): ...@@ -108,7 +108,7 @@ def gan_su():
att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list) # id_ = redefid(id_list)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
content = soup.text content = soup.text
...@@ -265,7 +265,7 @@ def gan_su(): ...@@ -265,7 +265,7 @@ def gan_su():
att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
content = soup.text content = soup.text
...@@ -436,7 +436,7 @@ def gan_su(): ...@@ -436,7 +436,7 @@ def gan_su():
att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '甘肃省国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
content = soup.text content = soup.text
......
...@@ -83,7 +83,7 @@ def guang_dong(): ...@@ -83,7 +83,7 @@ def guang_dong():
att_id, full_path = baseCore.tableUpdate(retData, '广东省国资委', file_name, num, pub_time) att_id, full_path = baseCore.tableUpdate(retData, '广东省国资委', file_name, num, pub_time)
id_list.append(att_id) id_list.append(att_id)
# 将文件服务器的链接替换 # 将文件服务器的链接替换
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path) fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# todo:传kafka字段 # todo:传kafka字段
......
...@@ -109,7 +109,7 @@ def guang_xi(): ...@@ -109,7 +109,7 @@ def guang_xi():
pub_time) pub_time)
id_list.append(att_id) id_list.append(att_id)
# 将附件链接替换 # 将附件链接替换
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path) fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# todo:传kafka字段 # todo:传kafka字段
......
...@@ -92,7 +92,7 @@ def gui_zhou(): ...@@ -92,7 +92,7 @@ def gui_zhou():
att_id, full_path = baseCore.tableUpdate(retData, '贵州省国资委', file_name, num, pub_time) att_id, full_path = baseCore.tableUpdate(retData, '贵州省国资委', file_name, num, pub_time)
id_list.append(att_id) id_list.append(att_id)
# 将附件链接替换 # 将附件链接替换
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path) fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# todo:传kafka字段 # todo:传kafka字段
......
...@@ -152,7 +152,7 @@ def get_content1(): ...@@ -152,7 +152,7 @@ def get_content1():
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
except: except:
log.error(f'{title}...{href}...获取内容失败') log.error(f'{title}...{href}...获取内容失败')
continue continue
......
...@@ -135,7 +135,7 @@ def get_content2(): ...@@ -135,7 +135,7 @@ def get_content2():
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
except: except:
log.error(f'{title}...{href}获取内容失败') log.error(f'{title}...{href}获取内容失败')
continue continue
......
...@@ -90,7 +90,7 @@ def get_content3(): ...@@ -90,7 +90,7 @@ def get_content3():
continue continue
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# todo:传kafka字段 # todo:传kafka字段
dic_news = { dic_news = {
......
...@@ -108,7 +108,7 @@ def hai_nan(): ...@@ -108,7 +108,7 @@ def hai_nan():
pub_time) pub_time)
id_list.append(att_id) id_list.append(att_id)
# 将文件服务器的链接替换 # 将文件服务器的链接替换
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path) fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
except: except:
try: try:
# print(href) # print(href)
...@@ -157,7 +157,7 @@ def hai_nan(): ...@@ -157,7 +157,7 @@ def hai_nan():
att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num, att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num,
pub_time) pub_time)
id_list.append(att_id) id_list.append(att_id)
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path) fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
except: except:
continue continue
...@@ -397,7 +397,7 @@ def hai_nan(): ...@@ -397,7 +397,7 @@ def hai_nan():
att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num, pub_time) att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num, pub_time)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path) fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# print(f'附件:{fu_jian_href}') # print(f'附件:{fu_jian_href}')
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# todo:传kafka字段 # todo:传kafka字段
...@@ -519,7 +519,7 @@ def hai_nan(): ...@@ -519,7 +519,7 @@ def hai_nan():
continue continue
att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num, pub_time) att_id, full_path = baseCore.tableUpdate(retData, '海南省国资委', file_name, num, pub_time)
id_list.append(att_id) id_list.append(att_id)
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path) fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# print(f'----附件:{fu_jian_href}') # print(f'----附件:{fu_jian_href}')
else: else:
pass pass
......
...@@ -61,7 +61,7 @@ def he_bei(): ...@@ -61,7 +61,7 @@ def he_bei():
att_id, full_path = baseCore.tableUpdate(retData, '河北省国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '河北省国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list) # id_ = redefid(id_list)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
if len(contentWithTag) < 1: if len(contentWithTag) < 1:
......
...@@ -67,7 +67,7 @@ def he_nan(): ...@@ -67,7 +67,7 @@ def he_nan():
att_id, full_path = baseCore.tableUpdate(retData, '河南省国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '河南省国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
content = soup.text content = soup.text
......
...@@ -74,7 +74,7 @@ def hei_long_jiang(): ...@@ -74,7 +74,7 @@ def hei_long_jiang():
att_id, full_path = baseCore.tableUpdate(retData, '江苏省国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '江苏省国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
content = soup.text content = soup.text
......
...@@ -87,7 +87,7 @@ def hu_bei(chromr_bin=None): ...@@ -87,7 +87,7 @@ def hu_bei(chromr_bin=None):
att_id, full_path = baseCore.tableUpdate(retData, '湖北省国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '湖北省国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list) # id_ = redefid(id_list)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
if len(contentWithTag) < 1: if len(contentWithTag) < 1:
......
...@@ -82,7 +82,7 @@ def hu_nan(): ...@@ -82,7 +82,7 @@ def hu_nan():
att_id, full_path = baseCore.tableUpdate(retData, '湖南省国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '湖南省国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
content = soup.text content = soup.text
......
...@@ -124,7 +124,7 @@ def ji_lin(): ...@@ -124,7 +124,7 @@ def ji_lin():
id_list.append(att_id) id_list.append(att_id)
# #
# # todo:将返回的地址更新到soup # # todo:将返回的地址更新到soup
li.find('a')['href'] = 'http:zzsn.luyuen.com/' + str(full_path) li.find('a')['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
else: else:
continue continue
else: else:
...@@ -168,7 +168,7 @@ def ji_lin(): ...@@ -168,7 +168,7 @@ def ji_lin():
# id_list.append(att_id) # id_list.append(att_id)
# # # #
# # # todo:将返回的地址更新到soup # # # todo:将返回的地址更新到soup
# fu_jian_href['href'] = 'http:zzsn.luyuen.com/' + str(full_path) # fu_jian_href['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# else: # else:
# continue # continue
......
...@@ -85,7 +85,7 @@ def jiang_su(): ...@@ -85,7 +85,7 @@ def jiang_su():
att_id, full_path = baseCore.tableUpdate(retData, '江苏省国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '江苏省国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
content = soup.text content = soup.text
......
...@@ -101,7 +101,7 @@ def jiang_xi(): ...@@ -101,7 +101,7 @@ def jiang_xi():
att_id, full_path = baseCore.tableUpdate(retData, '江西省国资委', file_name, num, writtenDate) att_id, full_path = baseCore.tableUpdate(retData, '江西省国资委', file_name, num, writtenDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
content = soup.text content = soup.text
......
...@@ -80,7 +80,7 @@ def liao_ning(): ...@@ -80,7 +80,7 @@ def liao_ning():
att_id, full_path = baseCore.tableUpdate(retData, '辽宁省国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '辽宁省国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list) # id_ = redefid(id_list)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
if len(contentWithTag) < 1: if len(contentWithTag) < 1:
......
...@@ -73,7 +73,7 @@ def ning_xia(): ...@@ -73,7 +73,7 @@ def ning_xia():
att_id, full_path = baseCore.tableUpdate(retData, '宁夏回族自治区国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '宁夏回族自治区国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list) # id_ = redefid(id_list)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
content = soup.text content = soup.text
......
...@@ -78,7 +78,7 @@ def qing_hai(): ...@@ -78,7 +78,7 @@ def qing_hai():
att_id, full_path = baseCore.tableUpdate(retData, '青海省国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '青海省国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list) # id_ = redefid(id_list)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
# todo:替换完成之后,将附件上传至文件服务器 # todo:替换完成之后,将附件上传至文件服务器
...@@ -205,7 +205,7 @@ def qing_hai(): ...@@ -205,7 +205,7 @@ def qing_hai():
publishDate) publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list) # id_ = redefid(id_list)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
# todo:替换完成之后,将附件上传至文件服务器 # todo:替换完成之后,将附件上传至文件服务器
......
...@@ -88,7 +88,7 @@ def shan_xi(): ...@@ -88,7 +88,7 @@ def shan_xi():
att_id, full_path = baseCore.tableUpdate(retData, '山西省国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '山西省国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list) # id_ = redefid(id_list)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
if len(contentWithTag) < 1: if len(contentWithTag) < 1:
......
...@@ -114,7 +114,7 @@ def shang_hai(): ...@@ -114,7 +114,7 @@ def shang_hai():
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
a['href'] = 'http:zzsn.luyuen.com/' + str(full_path) a['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
else: else:
continue continue
......
...@@ -78,7 +78,7 @@ def shanxi(): ...@@ -78,7 +78,7 @@ def shanxi():
att_id, full_path = baseCore.tableUpdate(retData, '陕西省国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '陕西省国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list) # id_ = redefid(id_list)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
content = soup.text content = soup.text
......
...@@ -79,7 +79,7 @@ def si_chuan(): ...@@ -79,7 +79,7 @@ def si_chuan():
continue continue
att_id, full_path = baseCore.tableUpdate(retData, '四川省国资委', file_name, num, pub_time) att_id, full_path = baseCore.tableUpdate(retData, '四川省国资委', file_name, num, pub_time)
id_list.append(att_id) id_list.append(att_id)
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path) fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# fu_jian_href_list.append(fu_jian_href) # fu_jian_href_list.append(fu_jian_href)
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
......
...@@ -106,7 +106,7 @@ def tian_jin(): ...@@ -106,7 +106,7 @@ def tian_jin():
att_id, full_path = baseCore.tableUpdate(retData, '天津市国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '天津市国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list) # id_ = redefid(id_list)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
...@@ -241,7 +241,7 @@ def tian_jin(): ...@@ -241,7 +241,7 @@ def tian_jin():
att_id, full_path = baseCore.tableUpdate(retData, '天津市国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '天津市国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list) # id_ = redefid(id_list)
if id_list: if id_list:
pass pass
...@@ -390,7 +390,7 @@ def tian_jin(): ...@@ -390,7 +390,7 @@ def tian_jin():
att_id, full_path = baseCore.tableUpdate(retData, '天津市国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '天津市国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list) # id_ = redefid(id_list)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
if len(contentWithTag) < 1: if len(contentWithTag) < 1:
......
...@@ -75,7 +75,7 @@ def xi_zang(): ...@@ -75,7 +75,7 @@ def xi_zang():
att_id, full_path = baseCore.tableUpdate(retData, '西藏自治区国资委', file_name, num, publishDate) att_id, full_path = baseCore.tableUpdate(retData, '西藏自治区国资委', file_name, num, publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list) # id_ = redefid(id_list)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
# todo:替换完成之后,将附件上传至文件服务器 # todo:替换完成之后,将附件上传至文件服务器
......
...@@ -75,7 +75,7 @@ def xin_jiang(): ...@@ -75,7 +75,7 @@ def xin_jiang():
publishDate) publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list) # id_ = redefid(id_list)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
if len(contentWithTag) < 1: if len(contentWithTag) < 1:
...@@ -183,7 +183,7 @@ def xin_jiang(): ...@@ -183,7 +183,7 @@ def xin_jiang():
publishDate) publishDate)
id_list.append(att_id) id_list.append(att_id)
# todo:将返回的地址更新到soup # todo:将返回的地址更新到soup
file['href'] = 'http:zzsn.luyuen.com/' + str(full_path) file['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
# id_ = redefid(id_list) # id_ = redefid(id_list)
contentWithTag = str(soup.prettify()) contentWithTag = str(soup.prettify())
if len(contentWithTag) < 1: if len(contentWithTag) < 1:
......
...@@ -89,7 +89,7 @@ def yun_nan(): ...@@ -89,7 +89,7 @@ def yun_nan():
att_id, full_path = baseCore.tableUpdate(retData, '云南省国资委', file_name, num, '') att_id, full_path = baseCore.tableUpdate(retData, '云南省国资委', file_name, num, '')
id_list.append(att_id) id_list.append(att_id)
# 将附件链接替换 # 将附件链接替换
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path) fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
except: except:
continue continue
href_resp.close() href_resp.close()
...@@ -211,7 +211,7 @@ def yun_nan(): ...@@ -211,7 +211,7 @@ def yun_nan():
pub_time) pub_time)
id_list.append(att_id) id_list.append(att_id)
# 将附件链接替换 # 将附件链接替换
fu_jian['href'] = 'http:zzsn.luyuen.com/' + str(full_path) fu_jian['href'] = 'http:obs.ciglobal.cn/' + str(full_path)
except: except:
continue continue
res_.close() res_.close()
......
...@@ -305,8 +305,7 @@ class GoogleSpider(object): ...@@ -305,8 +305,7 @@ class GoogleSpider(object):
self.driver.find_element('xpath', '//div[@id="hdtb-tls"]').click() self.driver.find_element('xpath', '//div[@id="hdtb-tls"]').click()
time.sleep(2) time.sleep(2)
# self.driver.find_element('xpath', '//div[@class="hdtb-mn-hd"]/div[text()="按相关性排序"]').click() # self.driver.find_element('xpath', '//div[@class="hdtb-mn-hd"]/div[text()="按相关性排序"]').click()
self.driver.find_element('xpath', self.driver.find_element('xpath', '//*[@id="tn_1"]/span[3]/g-popup/div[1]/div/div/div[text()="按相关性排序"]').click()
'//*[@id="tn_1"]/span[3]/g-popup/div[1]/div/div/div[text()="按相关性排序"]').click()
time.sleep(2) time.sleep(2)
# self.driver.find_element('xpath', '//div[@class="YpcDnf OSrXXb HG1dvd"]/a[text()="按日期排序"]').click() # self.driver.find_element('xpath', '//div[@class="YpcDnf OSrXXb HG1dvd"]/a[text()="按日期排序"]').click()
self.driver.find_element('xpath', '//*[@id="lb"]/div/g-menu/g-menu-item[2]/div/a[text()="按日期排序"]').click() self.driver.find_element('xpath', '//*[@id="lb"]/div/g-menu/g-menu-item[2]/div/a[text()="按日期排序"]').click()
......
...@@ -145,7 +145,7 @@ def translate(title, contentWithTag): ...@@ -145,7 +145,7 @@ def translate(title, contentWithTag):
'contentWithTag': contentWithTag 'contentWithTag': contentWithTag
} }
dic_info = json.dumps(dic_info) dic_info = json.dumps(dic_info)
req = requests.post('http://117.78.23.14:5000/translate', data=dic_info, headers=headers) req = requests.post('http://117.78.23.14:5001/translate', data=dic_info, headers=headers)
if req.status_code == '200': if req.status_code == '200':
pass pass
else: else:
...@@ -180,10 +180,10 @@ def doJob(): ...@@ -180,10 +180,10 @@ def doJob():
publishDate = publishDate.strftime("%Y-%m-%d %H:%M:%S") publishDate = publishDate.strftime("%Y-%m-%d %H:%M:%S")
if publishDate < '2023-01-20': if publishDate < '2023-01-20':
continue continue
is_href = db_storage.find_one({'网址': href}) # is_href = db_storage.find_one({'网址': href})
if is_href: # if is_href:
log.info(f'{href}===已采集') # log.info(f'{href}===已采集')
continue # continue
div.find_all('div')[0].extract() div.find_all('div')[0].extract()
div.find('span', class_='Z3988').extract() div.find('span', class_='Z3988').extract()
contentWithTag = div contentWithTag = div
......
...@@ -452,7 +452,7 @@ def aaaaa(final_output): ...@@ -452,7 +452,7 @@ def aaaaa(final_output):
print(finall_list) print(finall_list)
if __name__ == '__main__': if __name__ == '__main__':
same_list = ['让我们从一次时光旅行', '开启植物天堂的故事', '地球的午夜', '是在火山喷发中度过的', '到了凌晨三四点', '在海洋深处有了生命的迹象', '清晨6点多', '更加壮丽的生命乐章开始了', '更加壮丽的生命乐草开始了', '更加壮丽的生命乐章开始了', '更加壮丽的生命乐草开始了', '更加壮丽的生命乐章开始了', '种蓝藻细菌', '一种蓝藻细菌', '学会利用二氧化碳水和阳光', '制造生命所需能量', '同时释放出了氧气', '这个被称为光合作用的过程', '为植物世界打开了大门', '此时', '中国的陆地', '也逐渐从海洋露出形成岛屿', '但在相当长的时间里', '陆地十分荒凉没有生机', '这些岩石坚硬', '无法储存水分', '是当时陆地环境的写照', '直到晚上九点多', '也就是四亿年前左右', '些矮小的生命', '开始征服陆地', '她们用一种近似于根的构造', '固定在岩石上', '苔藓', '是陆地最早的拓荒者之', '小', '她们死后的身体', '形成了肥沃的土壤', '让更多的植物可以在这里生存', '从此', '绿色成为植物天堂的底色'] # same_list = ['让我们从一次时光旅行', '开启植物天堂的故事', '地球的午夜', '是在火山喷发中度过的', '到了凌晨三四点', '在海洋深处有了生命的迹象', '清晨6点多', '更加壮丽的生命乐章开始了', '更加壮丽的生命乐草开始了', '更加壮丽的生命乐章开始了', '更加壮丽的生命乐草开始了', '更加壮丽的生命乐章开始了', '种蓝藻细菌', '一种蓝藻细菌', '学会利用二氧化碳水和阳光', '制造生命所需能量', '同时释放出了氧气', '这个被称为光合作用的过程', '为植物世界打开了大门', '此时', '中国的陆地', '也逐渐从海洋露出形成岛屿', '但在相当长的时间里', '陆地十分荒凉没有生机', '这些岩石坚硬', '无法储存水分', '是当时陆地环境的写照', '直到晚上九点多', '也就是四亿年前左右', '些矮小的生命', '开始征服陆地', '她们用一种近似于根的构造', '固定在岩石上', '苔藓', '是陆地最早的拓荒者之', '小', '她们死后的身体', '形成了肥沃的土壤', '让更多的植物可以在这里生存', '从此', '绿色成为植物天堂的底色']
# aaa = aaaaa(same_list) # aaa = aaaaa(same_list)
...@@ -461,4 +461,12 @@ if __name__ == '__main__': ...@@ -461,4 +461,12 @@ if __name__ == '__main__':
# for i in range(len(same_list)): # for i in range(len(same_list)):
# print(i, same_list[i]) # print(i, same_list[i])
# #
isHandleSuccess, handleMsg = True, "success"
for i in range(3):
if i <= 3:
HandleSuccess, handleMsg = True, "success"
else:
HandleSuccess, handleMsg = False, "error"
print(i, HandleSuccess, handleMsg)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论