robot_arm/whisper_main.py
2026-02-10 23:31:14 +08:00

31 lines
No EOL
1.1 KiB
Python

# 文件名: whisper_main.py
import sounddevice as sd
import numpy as np
import scipy.io.wavfile as wav
from faster_whisper import WhisperModel
class RobotEar:
def __init__(self, model_size="base"):
self.model = WhisperModel(model_size, device="cuda", compute_type="float16")
self.fs = 16000
self.recording_buffer = []
def start_recording(self):
self.recording_buffer = []
# 开始长录音
sd.start_stream(samplerate=self.fs, channels=1)
print(">>> [耳朵] 录音中...")
def record_callback(self, indata, frames, time, status):
self.recording_buffer.append(indata.copy())
def get_text(self, audio_data):
"""将传入的音频数组转为文字"""
temp_file = "temp_voice.wav"
# 归一化音频数据
audio_np = np.concatenate(audio_data, axis=0)
wav.write(temp_file, self.fs, (audio_np * 32767).astype(np.int16))
segments, info = self.model.transcribe(temp_file, beam_size=5, language="zh")
text = "".join([s.text for s in segments])
return text.strip()