-
Notifications
You must be signed in to change notification settings - Fork 949
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #39 from Huanshere/whisperapi
v0.3.0
- Loading branch information
Showing
26 changed files
with
471 additions
and
386 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -158,9 +158,5 @@ _model_cache/ | |
# large files | ||
/ffmpeg.exe | ||
/ffmpeg | ||
*.mp4 | ||
*.webm | ||
*.mp3 | ||
.DS_Store | ||
runtime/ | ||
dev/ | ||
_config.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,62 @@ | ||
import os,sys | ||
import os,sys,json | ||
import spacy | ||
from spacy.cli import download | ||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) | ||
from config import SPACY_NLP_MODEL | ||
from core.step2_whisperapi import get_whisper_language | ||
|
||
def get_spacy_model(language: str): | ||
language_map = { | ||
"english": "en_core_web_sm", | ||
"chinese": "zh_core_web_sm", | ||
"spanish": "es_core_news_sm", | ||
"french": "fr_core_news_sm", | ||
"german": "de_core_news_sm", | ||
"italian": "it_core_news_sm", | ||
"japanese": "ja_core_news_sm", | ||
"portuguese": "pt_core_news_sm", | ||
"dutch": "nl_core_news_sm", | ||
"greek": "el_core_news_sm", | ||
"russian": "ru_core_news_sm", | ||
"arabic": "ar_core_news_sm", | ||
"hindi": "hi_core_news_sm", | ||
"korean": "ko_core_news_sm", | ||
"polish": "pl_core_news_sm", | ||
"ukrainian": "uk_core_news_sm", | ||
"vietnamese": "vi_core_news_sm", | ||
"turkish": "tr_core_news_sm", | ||
"thai": "th_core_news_sm", | ||
"romanian": "ro_core_news_sm", | ||
"danish": "da_core_news_sm", | ||
"finnish": "fi_core_news_sm", | ||
"hungarian": "hu_core_news_sm", | ||
"norwegian": "nb_core_news_sm", | ||
"swedish": "sv_core_news_sm" | ||
} | ||
|
||
model = language_map.get(language.lower(), "en_core_web_sm") | ||
if language not in language_map: | ||
print(f"Spacy 模型不支持'{language}',使用 en_core_web_sm 模型作为后备选项...") | ||
return model | ||
|
||
def init_nlp(): | ||
print(f"⏳ Loading NLP Spacy model: <{SPACY_NLP_MODEL}> ...") | ||
try: | ||
nlp = spacy.load(SPACY_NLP_MODEL) | ||
language = get_whisper_language() | ||
model = get_spacy_model(language) | ||
print(f"⏳ 正在加载 NLP Spacy 模型: <{model}> ...") | ||
try: | ||
nlp = spacy.load(model) | ||
except: | ||
print(f"正在下载 {model} 模型...") | ||
download(model) | ||
nlp = spacy.load(model) | ||
except: | ||
print(f"Downloading {SPACY_NLP_MODEL} model...") | ||
download(SPACY_NLP_MODEL) | ||
nlp = spacy.load(SPACY_NLP_MODEL) | ||
print(f"✅ NLP Spacy model loaded successfully!") | ||
print(f"未检测到语言,使用 en_core_web_sm 模型作为后备选项...") | ||
model = "en_core_web_sm" | ||
try: | ||
nlp = spacy.load(model) | ||
except: | ||
print(f"正在下载 {model} 模型...") | ||
download(model) | ||
nlp = spacy.load(model) | ||
print(f"✅ NLP Spacy 模型加载成功!") | ||
return nlp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.