#!/usr/bin/env python3 """ 语音识别示例 - 使用 faster-whisper """ from faster_whisper import WhisperModel import os # 默认配置 MODEL_SIZE = "base" LANGUAGE = "zh" def transcribe(audio_path, model_size=None, language=None): """语音转文字""" model_size = model_size or MODEL_SIZE language = language or LANGUAGE print(f"\n🎤 开始识别...") print(f" 模型: {model_size}") print(f" 语言: {language}") print(f" 文件: {audio_path}") # 加载模型 model = WhisperModel(model_size, device="cpu", compute_type="int8") # 识别 segments, info = model.transcribe( audio_path, language=language, beam_size=5 ) print(f"\n📝 识别结果:") print(f" 语言: {info.language} (置信度: {info.language_probability:.2f})") print("-" * 50) text = "" for segment in segments: text += segment.text print(f" {segment.text}") return text.strip() def main(): import sys print(f"\n{'='*60}") print("🎤 语音识别 (faster-whisper)") print(f"{'='*60}") if len(sys.argv) < 2: print("\n用法:") print(" python whisper_example.py <音频文件>") print(" python whisper_example.py audio.mp3 --model base --lang zh") print("\n选项:") print(" --model, -m 模型大小: tiny, base, small, medium, large") print(" --lang, -l 语言: zh, en, ja, ko 等") return audio_file = sys.argv[1] model = "base" lang = "zh" # 解析参数 i = 2 while i < len(sys.argv): key = sys.argv[i].replace("--", "") value = sys.argv[i + 1] if i + 1 < len(sys.argv) else "" if key in ["model", "m"]: model = value elif key in ["lang", "l"]: lang = value i += 2 if not os.path.exists(audio_file): print(f"❌ 文件不存在: {audio_file}") return text = transcribe(audio_file, model, lang) print(f"\n✅ 识别完成: {len(text)} 字符") if __name__ == "__main__": main()