Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
platform_zzsn
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
婷婷
platform_zzsn
Commits
3d19bc8f
提交
3d19bc8f
authored
3月 16, 2023
作者:
ctt
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
修改文件路径
上级
36594c06
显示空白字符变更
内嵌
并排
正在显示
6 个修改的文件
包含
7 行增加
和
7 行删除
+7
-7
basic.py
basic_service/views/basic.py
+1
-1
BaseDataProcess.py
model/base/views/data/BaseDataProcess.py
+1
-1
FewMultiClassRunner.py
model/classify/views/few_multi_class/FewMultiClassRunner.py
+2
-2
base_utils.py
platform_base/views/base_utils.py
+1
-1
interaction.py
platform_base/views/interaction.py
+1
-1
word_count.py
scenario_service/views/word_count.py
+1
-1
没有找到文件。
basic_service/views/basic.py
浏览文件 @
3d19bc8f
...
...
@@ -127,7 +127,7 @@ def summary(text, summary_length):
# zh_nlp = stanza.Pipeline('zh-hans')
# en_nlp = stanza.Pipeline('en')
# nlp_dict = {'zh': zh_nlp, 'en': en_nlp}
#model = KeyedVectors.load_word2vec_format(os.path.join(BASE_DIR, 'static/
platform_
base/Tencent_AILab_ChineseEmbedding.bin'), binary=True)
#model = KeyedVectors.load_word2vec_format(os.path.join(BASE_DIR, 'static/base/Tencent_AILab_ChineseEmbedding.bin'), binary=True)
# if __name__ == '__main__':
# print(word_cut('汤姆生病了。他去了医院。'))
# print(word_pos('汤姆生病了。他去了医院。'))
...
...
model/base/views/data/BaseDataProcess.py
浏览文件 @
3d19bc8f
...
...
@@ -124,7 +124,7 @@ class BaseDataProcess:
count
=
0
if
self
.
embedding_config
[
'use_Tencent'
]:
model_tencent
=
gensim
.
models
.
KeyedVectors
.
load_word2vec_format
(
os
.
path
.
join
(
BASE_DIR
,
'static/
platform_
base/Tencent_AILab_ChineseEmbedding.bin'
),
binary
=
True
)
os
.
path
.
join
(
BASE_DIR
,
'static/base/Tencent_AILab_ChineseEmbedding.bin'
),
binary
=
True
)
vocabulary_tencent
=
model_tencent
.
wv
.
vocab
.
keys
()
vector_matrix
=
np
.
zeros
((
len
(
feature_words
),
int
(
self
.
embedding_config
[
'size'
])
+
200
))
for
word
in
feature_words
:
...
...
model/classify/views/few_multi_class/FewMultiClassRunner.py
浏览文件 @
3d19bc8f
...
...
@@ -27,7 +27,7 @@ class DataArguments:
@dataclass
class
ModelArguments
:
model_name_or_path
:
str
=
field
(
default
=
"ernie-3.0-
platform_
base-zh"
,
metadata
=
{
"help"
:
"Build-in pretrained model name or the path to local model."
})
model_name_or_path
:
str
=
field
(
default
=
"ernie-3.0-base-zh"
,
metadata
=
{
"help"
:
"Build-in pretrained model name or the path to local model."
})
export_type
:
str
=
field
(
default
=
'paddle'
,
metadata
=
{
"help"
:
"The type to export. Support `paddle` and `onnx`."
})
...
...
@@ -47,7 +47,7 @@ class FewMultiRunner(BaseRunner.BaseRunner):
self
.
config_path
=
config_path
self
.
config
=
FewMultiConfig
(
self
.
config_path
)
def
train
(
self
,
logger
):
def
train
(
self
,
logger
2
):
py_path
=
os
.
path
.
abspath
(
__file__
)
sys
.
argv
=
[
py_path
]
print
(
self
.
config
)
...
...
platform_base/views/base_utils.py
浏览文件 @
3d19bc8f
...
...
@@ -46,7 +46,7 @@ def merge_para(paras):
return
new_paras
def
filter_stopwords
(
para
):
path
=
os
.
path
.
join
(
BASE_DIR
,
'static/
platform_
base/baidu_stopwords.txt'
)
path
=
os
.
path
.
join
(
BASE_DIR
,
'static/base/baidu_stopwords.txt'
)
stopword_list
=
[
k
.
strip
()
for
k
in
read_txt
(
path
)
if
k
.
strip
()
!=
''
]
words
=
[
word
for
word
in
jieba
.
lcut
(
para
)
if
word
not
in
stopword_list
]
...
...
platform_base/views/interaction.py
浏览文件 @
3d19bc8f
...
...
@@ -88,7 +88,7 @@ def update_config_file(config_path, config_file):
data
[
'data_loader'
][
'dataset_path'
]
=
xlsx_path
if
'save_fname'
in
data
[
'runner'
]
.
keys
():
data
[
'runner'
][
'save_fpath'
]
=
os
.
path
.
join
(
config_path
,
data
[
'runner'
][
'save_fname'
])
data
[
'data_loader'
][
'stopwords_path'
]
=
os
.
path
.
join
(
BASE_DIR
,
'static/
platform_
base/baidu_stopwords.txt'
)
data
[
'data_loader'
][
'stopwords_path'
]
=
os
.
path
.
join
(
BASE_DIR
,
'static/base/baidu_stopwords.txt'
)
file_path
=
os
.
path
.
join
(
config_path
,
'config.yaml'
)
with
open
(
file_path
,
'w'
)
as
yaml_file
:
...
...
scenario_service/views/word_count.py
浏览文件 @
3d19bc8f
...
...
@@ -241,7 +241,7 @@ class WordCount:
seg_list_exact
=
jieba
.
posseg
.
cut
(
string_data
,
HMM
=
True
)
# 精确模式分词+HMM
object_list
=
[]
# 去除停用词
stopwords_path
=
os
.
path
.
join
(
BASE_DIR
,
'static/
platform_
base/baidu_stopwords.txt'
)
stopwords_path
=
os
.
path
.
join
(
BASE_DIR
,
'static/base/baidu_stopwords.txt'
)
with
open
(
stopwords_path
,
'r'
,
encoding
=
'UTF-8'
)
as
meaninglessFile
:
stopwords
=
set
(
meaninglessFile
.
read
()
.
split
(
'
\n
'
))
stopwords
.
add
(
' '
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论