Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
Z
zzsn_spider
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
王景浩
zzsn_spider
Commits
6c7e4c57
提交
6c7e4c57
authored
5月 21, 2024
作者:
XveLingKun
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
政策法规--域名修改
上级
6ce462aa
显示空白字符变更
内嵌
并排
正在显示
33 个修改的文件
包含
56 行增加
和
49 行删除
+56
-49
bei_jing.py
comData/policylaw/bei_jing.py
+1
-1
chong_qing.py
comData/policylaw/chong_qing.py
+1
-1
fu_jian.py
comData/policylaw/fu_jian.py
+1
-1
gan_su.py
comData/policylaw/gan_su.py
+3
-3
guang_dong.py
comData/policylaw/guang_dong.py
+1
-1
guang_xi.py
comData/policylaw/guang_xi.py
+1
-1
gui_zhou.py
comData/policylaw/gui_zhou.py
+1
-1
gwyfile.py
comData/policylaw/gwyfile.py
+1
-1
gwyparts.py
comData/policylaw/gwyparts.py
+1
-1
gwysasac.py
comData/policylaw/gwysasac.py
+1
-1
hai_nan.py
comData/policylaw/hai_nan.py
+4
-4
he_bei.py
comData/policylaw/he_bei.py
+1
-1
he_nan.py
comData/policylaw/he_nan.py
+1
-1
hei_long_jiang.py
comData/policylaw/hei_long_jiang.py
+1
-1
hu_bei.py
comData/policylaw/hu_bei.py
+1
-1
hu_nan.py
comData/policylaw/hu_nan.py
+1
-1
ji_lin.py
comData/policylaw/ji_lin.py
+2
-2
jiang_su.py
comData/policylaw/jiang_su.py
+1
-1
jiang_xi.py
comData/policylaw/jiang_xi.py
+1
-1
liao_ning.py
comData/policylaw/liao_ning.py
+1
-1
ning_xia.py
comData/policylaw/ning_xia.py
+1
-1
qing_hai.py
comData/policylaw/qing_hai.py
+2
-2
shan_xi.py
comData/policylaw/shan_xi.py
+1
-1
shang_hai.py
comData/policylaw/shang_hai.py
+1
-1
shanxi.py
comData/policylaw/shanxi.py
+1
-1
si_chuan.py
comData/policylaw/si_chuan.py
+1
-1
tian_jin.py
comData/policylaw/tian_jin.py
+3
-3
xi_zang.py
comData/policylaw/xi_zang.py
+1
-1
xin_jiang.py
comData/policylaw/xin_jiang.py
+2
-2
yun_nan.py
comData/policylaw/yun_nan.py
+2
-2
googleSpider.py
google_comm/googleSpider.py
+1
-2
lhg.py
gwzk/lhg.py
+5
-5
test.py
test.py
+9
-1
没有找到文件。
comData/policylaw/bei_jing.py
浏览文件 @
6c7e4c57
...
...
@@ -133,7 +133,7 @@ def bei_jing():
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# id_ = redefid(id_list)
time_now
=
time
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
,
time
.
localtime
())
...
...
comData/policylaw/chong_qing.py
浏览文件 @
6c7e4c57
...
...
@@ -111,7 +111,7 @@ def chong_qing():
pub_time
)
id_list
.
append
(
att_id
)
# 将附件链接替换
fu_jian
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
fu_jian
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
except
:
continue
time_now
=
time
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
,
time
.
localtime
())
...
...
comData/policylaw/fu_jian.py
浏览文件 @
6c7e4c57
...
...
@@ -136,7 +136,7 @@ def fu_jian():
pub_time
)
id_list
.
append
(
att_id
)
# 将文件服务器的链接替换
fu_jian
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
fu_jian
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
except
:
pub_source
=
''
...
...
comData/policylaw/gan_su.py
浏览文件 @
6c7e4c57
...
...
@@ -108,7 +108,7 @@ def gan_su():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'甘肃省国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# id_ = redefid(id_list)
contentWithTag
=
str
(
soup
.
prettify
())
content
=
soup
.
text
...
...
@@ -265,7 +265,7 @@ def gan_su():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'甘肃省国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
contentWithTag
=
str
(
soup
.
prettify
())
content
=
soup
.
text
...
...
@@ -436,7 +436,7 @@ def gan_su():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'甘肃省国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
contentWithTag
=
str
(
soup
.
prettify
())
content
=
soup
.
text
...
...
comData/policylaw/guang_dong.py
浏览文件 @
6c7e4c57
...
...
@@ -83,7 +83,7 @@ def guang_dong():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'广东省国资委'
,
file_name
,
num
,
pub_time
)
id_list
.
append
(
att_id
)
# 将文件服务器的链接替换
fu_jian
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
fu_jian
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
time_now
=
time
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
,
time
.
localtime
())
# todo:传kafka字段
...
...
comData/policylaw/guang_xi.py
浏览文件 @
6c7e4c57
...
...
@@ -109,7 +109,7 @@ def guang_xi():
pub_time
)
id_list
.
append
(
att_id
)
# 将附件链接替换
fu_jian
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
fu_jian
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
time_now
=
time
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
,
time
.
localtime
())
# todo:传kafka字段
...
...
comData/policylaw/gui_zhou.py
浏览文件 @
6c7e4c57
...
...
@@ -92,7 +92,7 @@ def gui_zhou():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'贵州省国资委'
,
file_name
,
num
,
pub_time
)
id_list
.
append
(
att_id
)
# 将附件链接替换
fu_jian
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
fu_jian
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
time_now
=
time
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
,
time
.
localtime
())
# todo:传kafka字段
...
...
comData/policylaw/gwyfile.py
浏览文件 @
6c7e4c57
...
...
@@ -152,7 +152,7 @@ def get_content1():
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
except
:
log
.
error
(
f
'{title}...{href}...获取内容失败'
)
continue
...
...
comData/policylaw/gwyparts.py
浏览文件 @
6c7e4c57
...
...
@@ -135,7 +135,7 @@ def get_content2():
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
except
:
log
.
error
(
f
'{title}...{href}获取内容失败'
)
continue
...
...
comData/policylaw/gwysasac.py
浏览文件 @
6c7e4c57
...
...
@@ -90,7 +90,7 @@ def get_content3():
continue
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
time_now
=
time
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
,
time
.
localtime
())
# todo:传kafka字段
dic_news
=
{
...
...
comData/policylaw/hai_nan.py
浏览文件 @
6c7e4c57
...
...
@@ -108,7 +108,7 @@ def hai_nan():
pub_time
)
id_list
.
append
(
att_id
)
# 将文件服务器的链接替换
fu_jian
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
fu_jian
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
except
:
try
:
# print(href)
...
...
@@ -157,7 +157,7 @@ def hai_nan():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'海南省国资委'
,
file_name
,
num
,
pub_time
)
id_list
.
append
(
att_id
)
fu_jian
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
fu_jian
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
except
:
continue
...
...
@@ -397,7 +397,7 @@ def hai_nan():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'海南省国资委'
,
file_name
,
num
,
pub_time
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
fu_jian
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
fu_jian
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# print(f'附件:{fu_jian_href}')
time_now
=
time
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
,
time
.
localtime
())
# todo:传kafka字段
...
...
@@ -519,7 +519,7 @@ def hai_nan():
continue
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'海南省国资委'
,
file_name
,
num
,
pub_time
)
id_list
.
append
(
att_id
)
fu_jian
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
fu_jian
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# print(f'----附件:{fu_jian_href}')
else
:
pass
...
...
comData/policylaw/he_bei.py
浏览文件 @
6c7e4c57
...
...
@@ -61,7 +61,7 @@ def he_bei():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'河北省国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# id_ = redefid(id_list)
contentWithTag
=
str
(
soup
.
prettify
())
if
len
(
contentWithTag
)
<
1
:
...
...
comData/policylaw/he_nan.py
浏览文件 @
6c7e4c57
...
...
@@ -67,7 +67,7 @@ def he_nan():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'河南省国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
contentWithTag
=
str
(
soup
.
prettify
())
content
=
soup
.
text
...
...
comData/policylaw/hei_long_jiang.py
浏览文件 @
6c7e4c57
...
...
@@ -74,7 +74,7 @@ def hei_long_jiang():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'江苏省国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
contentWithTag
=
str
(
soup
.
prettify
())
content
=
soup
.
text
...
...
comData/policylaw/hu_bei.py
浏览文件 @
6c7e4c57
...
...
@@ -87,7 +87,7 @@ def hu_bei(chromr_bin=None):
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'湖北省国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# id_ = redefid(id_list)
contentWithTag
=
str
(
soup
.
prettify
())
if
len
(
contentWithTag
)
<
1
:
...
...
comData/policylaw/hu_nan.py
浏览文件 @
6c7e4c57
...
...
@@ -82,7 +82,7 @@ def hu_nan():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'湖南省国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
contentWithTag
=
str
(
soup
.
prettify
())
content
=
soup
.
text
...
...
comData/policylaw/ji_lin.py
浏览文件 @
6c7e4c57
...
...
@@ -124,7 +124,7 @@ def ji_lin():
id_list
.
append
(
att_id
)
#
# # todo:将返回的地址更新到soup
li
.
find
(
'a'
)[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
li
.
find
(
'a'
)[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
else
:
continue
else
:
...
...
@@ -168,7 +168,7 @@ def ji_lin():
# id_list.append(att_id)
# #
# # # todo:将返回的地址更新到soup
# fu_jian_href['href'] = 'http:
zzsn.luyuen.com
/' + str(full_path)
# fu_jian_href['href'] = 'http:
obs.ciglobal.cn
/' + str(full_path)
# else:
# continue
...
...
comData/policylaw/jiang_su.py
浏览文件 @
6c7e4c57
...
...
@@ -85,7 +85,7 @@ def jiang_su():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'江苏省国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
contentWithTag
=
str
(
soup
.
prettify
())
content
=
soup
.
text
...
...
comData/policylaw/jiang_xi.py
浏览文件 @
6c7e4c57
...
...
@@ -101,7 +101,7 @@ def jiang_xi():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'江西省国资委'
,
file_name
,
num
,
writtenDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
contentWithTag
=
str
(
soup
.
prettify
())
content
=
soup
.
text
...
...
comData/policylaw/liao_ning.py
浏览文件 @
6c7e4c57
...
...
@@ -80,7 +80,7 @@ def liao_ning():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'辽宁省国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# id_ = redefid(id_list)
contentWithTag
=
str
(
soup
.
prettify
())
if
len
(
contentWithTag
)
<
1
:
...
...
comData/policylaw/ning_xia.py
浏览文件 @
6c7e4c57
...
...
@@ -73,7 +73,7 @@ def ning_xia():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'宁夏回族自治区国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# id_ = redefid(id_list)
contentWithTag
=
str
(
soup
.
prettify
())
content
=
soup
.
text
...
...
comData/policylaw/qing_hai.py
浏览文件 @
6c7e4c57
...
...
@@ -78,7 +78,7 @@ def qing_hai():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'青海省国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# id_ = redefid(id_list)
contentWithTag
=
str
(
soup
.
prettify
())
# todo:替换完成之后,将附件上传至文件服务器
...
...
@@ -205,7 +205,7 @@ def qing_hai():
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# id_ = redefid(id_list)
contentWithTag
=
str
(
soup
.
prettify
())
# todo:替换完成之后,将附件上传至文件服务器
...
...
comData/policylaw/shan_xi.py
浏览文件 @
6c7e4c57
...
...
@@ -88,7 +88,7 @@ def shan_xi():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'山西省国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# id_ = redefid(id_list)
contentWithTag
=
str
(
soup
.
prettify
())
if
len
(
contentWithTag
)
<
1
:
...
...
comData/policylaw/shang_hai.py
浏览文件 @
6c7e4c57
...
...
@@ -114,7 +114,7 @@ def shang_hai():
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
a
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
a
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
else
:
continue
...
...
comData/policylaw/shanxi.py
浏览文件 @
6c7e4c57
...
...
@@ -78,7 +78,7 @@ def shanxi():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'陕西省国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# id_ = redefid(id_list)
contentWithTag
=
str
(
soup
.
prettify
())
content
=
soup
.
text
...
...
comData/policylaw/si_chuan.py
浏览文件 @
6c7e4c57
...
...
@@ -79,7 +79,7 @@ def si_chuan():
continue
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'四川省国资委'
,
file_name
,
num
,
pub_time
)
id_list
.
append
(
att_id
)
fu_jian
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
fu_jian
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# fu_jian_href_list.append(fu_jian_href)
time_now
=
time
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
,
time
.
localtime
())
...
...
comData/policylaw/tian_jin.py
浏览文件 @
6c7e4c57
...
...
@@ -106,7 +106,7 @@ def tian_jin():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'天津市国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# id_ = redefid(id_list)
contentWithTag
=
str
(
soup
.
prettify
())
...
...
@@ -241,7 +241,7 @@ def tian_jin():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'天津市国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# id_ = redefid(id_list)
if
id_list
:
pass
...
...
@@ -390,7 +390,7 @@ def tian_jin():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'天津市国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# id_ = redefid(id_list)
contentWithTag
=
str
(
soup
.
prettify
())
if
len
(
contentWithTag
)
<
1
:
...
...
comData/policylaw/xi_zang.py
浏览文件 @
6c7e4c57
...
...
@@ -75,7 +75,7 @@ def xi_zang():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'西藏自治区国资委'
,
file_name
,
num
,
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# id_ = redefid(id_list)
contentWithTag
=
str
(
soup
.
prettify
())
# todo:替换完成之后,将附件上传至文件服务器
...
...
comData/policylaw/xin_jiang.py
浏览文件 @
6c7e4c57
...
...
@@ -75,7 +75,7 @@ def xin_jiang():
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# id_ = redefid(id_list)
contentWithTag
=
str
(
soup
.
prettify
())
if
len
(
contentWithTag
)
<
1
:
...
...
@@ -183,7 +183,7 @@ def xin_jiang():
publishDate
)
id_list
.
append
(
att_id
)
# todo:将返回的地址更新到soup
file
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
file
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
# id_ = redefid(id_list)
contentWithTag
=
str
(
soup
.
prettify
())
if
len
(
contentWithTag
)
<
1
:
...
...
comData/policylaw/yun_nan.py
浏览文件 @
6c7e4c57
...
...
@@ -89,7 +89,7 @@ def yun_nan():
att_id
,
full_path
=
baseCore
.
tableUpdate
(
retData
,
'云南省国资委'
,
file_name
,
num
,
''
)
id_list
.
append
(
att_id
)
# 将附件链接替换
fu_jian
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
fu_jian
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
except
:
continue
href_resp
.
close
()
...
...
@@ -211,7 +211,7 @@ def yun_nan():
pub_time
)
id_list
.
append
(
att_id
)
# 将附件链接替换
fu_jian
[
'href'
]
=
'http:
zzsn.luyuen.com
/'
+
str
(
full_path
)
fu_jian
[
'href'
]
=
'http:
obs.ciglobal.cn
/'
+
str
(
full_path
)
except
:
continue
res_
.
close
()
...
...
google_comm/googleSpider.py
浏览文件 @
6c7e4c57
...
...
@@ -305,8 +305,7 @@ class GoogleSpider(object):
self
.
driver
.
find_element
(
'xpath'
,
'//div[@id="hdtb-tls"]'
)
.
click
()
time
.
sleep
(
2
)
# self.driver.find_element('xpath', '//div[@class="hdtb-mn-hd"]/div[text()="按相关性排序"]').click()
self
.
driver
.
find_element
(
'xpath'
,
'//*[@id="tn_1"]/span[3]/g-popup/div[1]/div/div/div[text()="按相关性排序"]'
)
.
click
()
self
.
driver
.
find_element
(
'xpath'
,
'//*[@id="tn_1"]/span[3]/g-popup/div[1]/div/div/div[text()="按相关性排序"]'
)
.
click
()
time
.
sleep
(
2
)
# self.driver.find_element('xpath', '//div[@class="YpcDnf OSrXXb HG1dvd"]/a[text()="按日期排序"]').click()
self
.
driver
.
find_element
(
'xpath'
,
'//*[@id="lb"]/div/g-menu/g-menu-item[2]/div/a[text()="按日期排序"]'
)
.
click
()
...
...
gwzk/lhg.py
浏览文件 @
6c7e4c57
...
...
@@ -145,7 +145,7 @@ def translate(title, contentWithTag):
'contentWithTag'
:
contentWithTag
}
dic_info
=
json
.
dumps
(
dic_info
)
req
=
requests
.
post
(
'http://117.78.23.14:500
0
/translate'
,
data
=
dic_info
,
headers
=
headers
)
req
=
requests
.
post
(
'http://117.78.23.14:500
1
/translate'
,
data
=
dic_info
,
headers
=
headers
)
if
req
.
status_code
==
'200'
:
pass
else
:
...
...
@@ -180,10 +180,10 @@ def doJob():
publishDate
=
publishDate
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
if
publishDate
<
'2023-01-20'
:
continue
is_href
=
db_storage
.
find_one
({
'网址'
:
href
})
if
is_href
:
log
.
info
(
f
'{href}===已采集'
)
continue
#
is_href = db_storage.find_one({'网址': href})
#
if is_href:
#
log.info(f'{href}===已采集')
#
continue
div
.
find_all
(
'div'
)[
0
]
.
extract
()
div
.
find
(
'span'
,
class_
=
'Z3988'
)
.
extract
()
contentWithTag
=
div
...
...
test.py
浏览文件 @
6c7e4c57
...
...
@@ -452,7 +452,7 @@ def aaaaa(final_output):
print
(
finall_list
)
if
__name__
==
'__main__'
:
same_list
=
[
'让我们从一次时光旅行'
,
'开启植物天堂的故事'
,
'地球的午夜'
,
'是在火山喷发中度过的'
,
'到了凌晨三四点'
,
'在海洋深处有了生命的迹象'
,
'清晨6点多'
,
'更加壮丽的生命乐章开始了'
,
'更加壮丽的生命乐草开始了'
,
'更加壮丽的生命乐章开始了'
,
'更加壮丽的生命乐草开始了'
,
'更加壮丽的生命乐章开始了'
,
'种蓝藻细菌'
,
'一种蓝藻细菌'
,
'学会利用二氧化碳水和阳光'
,
'制造生命所需能量'
,
'同时释放出了氧气'
,
'这个被称为光合作用的过程'
,
'为植物世界打开了大门'
,
'此时'
,
'中国的陆地'
,
'也逐渐从海洋露出形成岛屿'
,
'但在相当长的时间里'
,
'陆地十分荒凉没有生机'
,
'这些岩石坚硬'
,
'无法储存水分'
,
'是当时陆地环境的写照'
,
'直到晚上九点多'
,
'也就是四亿年前左右'
,
'些矮小的生命'
,
'开始征服陆地'
,
'她们用一种近似于根的构造'
,
'固定在岩石上'
,
'苔藓'
,
'是陆地最早的拓荒者之'
,
'小'
,
'她们死后的身体'
,
'形成了肥沃的土壤'
,
'让更多的植物可以在这里生存'
,
'从此'
,
'绿色成为植物天堂的底色'
]
#
same_list = ['让我们从一次时光旅行', '开启植物天堂的故事', '地球的午夜', '是在火山喷发中度过的', '到了凌晨三四点', '在海洋深处有了生命的迹象', '清晨6点多', '更加壮丽的生命乐章开始了', '更加壮丽的生命乐草开始了', '更加壮丽的生命乐章开始了', '更加壮丽的生命乐草开始了', '更加壮丽的生命乐章开始了', '种蓝藻细菌', '一种蓝藻细菌', '学会利用二氧化碳水和阳光', '制造生命所需能量', '同时释放出了氧气', '这个被称为光合作用的过程', '为植物世界打开了大门', '此时', '中国的陆地', '也逐渐从海洋露出形成岛屿', '但在相当长的时间里', '陆地十分荒凉没有生机', '这些岩石坚硬', '无法储存水分', '是当时陆地环境的写照', '直到晚上九点多', '也就是四亿年前左右', '些矮小的生命', '开始征服陆地', '她们用一种近似于根的构造', '固定在岩石上', '苔藓', '是陆地最早的拓荒者之', '小', '她们死后的身体', '形成了肥沃的土壤', '让更多的植物可以在这里生存', '从此', '绿色成为植物天堂的底色']
# aaa = aaaaa(same_list)
...
...
@@ -461,4 +461,12 @@ if __name__ == '__main__':
# for i in range(len(same_list)):
# print(i, same_list[i])
#
isHandleSuccess
,
handleMsg
=
True
,
"success"
for
i
in
range
(
3
):
if
i
<=
3
:
HandleSuccess
,
handleMsg
=
True
,
"success"
else
:
HandleSuccess
,
handleMsg
=
False
,
"error"
print
(
i
,
HandleSuccess
,
handleMsg
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论