Files
server-configs/whisper_example.py
2026-02-13 22:24:27 +08:00

86 lines
2.1 KiB
Python
Executable File

#!/usr/bin/env python3
"""
语音识别示例 - 使用 faster-whisper
"""
from faster_whisper import WhisperModel
import os
# 默认配置
MODEL_SIZE = "base"
LANGUAGE = "zh"
def transcribe(audio_path, model_size=None, language=None):
"""语音转文字"""
model_size = model_size or MODEL_SIZE
language = language or LANGUAGE
print(f"\n🎤 开始识别...")
print(f" 模型: {model_size}")
print(f" 语言: {language}")
print(f" 文件: {audio_path}")
# 加载模型
model = WhisperModel(model_size, device="cpu", compute_type="int8")
# 识别
segments, info = model.transcribe(
audio_path,
language=language,
beam_size=5
)
print(f"\n📝 识别结果:")
print(f" 语言: {info.language} (置信度: {info.language_probability:.2f})")
print("-" * 50)
text = ""
for segment in segments:
text += segment.text
print(f" {segment.text}")
return text.strip()
def main():
import sys
print(f"\n{'='*60}")
print("🎤 语音识别 (faster-whisper)")
print(f"{'='*60}")
if len(sys.argv) < 2:
print("\n用法:")
print(" python whisper_example.py <音频文件>")
print(" python whisper_example.py audio.mp3 --model base --lang zh")
print("\n选项:")
print(" --model, -m 模型大小: tiny, base, small, medium, large")
print(" --lang, -l 语言: zh, en, ja, ko 等")
return
audio_file = sys.argv[1]
model = "base"
lang = "zh"
# 解析参数
i = 2
while i < len(sys.argv):
key = sys.argv[i].replace("--", "")
value = sys.argv[i + 1] if i + 1 < len(sys.argv) else ""
if key in ["model", "m"]:
model = value
elif key in ["lang", "l"]:
lang = value
i += 2
if not os.path.exists(audio_file):
print(f"❌ 文件不存在: {audio_file}")
return
text = transcribe(audio_file, model, lang)
print(f"\n✅ 识别完成: {len(text)} 字符")
if __name__ == "__main__":
main()