mirror of
https://github.com/m1ngsama/robot_arm.git
synced 2026-03-25 19:53:49 +00:00
merge: fix whisper broken start_recording (closes #2)
This commit is contained in:
commit
f631157887
2 changed files with 28 additions and 30 deletions
|
|
@ -1,17 +1,20 @@
|
||||||
import cv2
|
import json
|
||||||
import numpy as np
|
|
||||||
import time
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import json
|
import time
|
||||||
import torch
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import scipy.io.wavfile as wav
|
||||||
import sounddevice as sd
|
import sounddevice as sd
|
||||||
|
import torch
|
||||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
from ultralytics import YOLO
|
from ultralytics import YOLO
|
||||||
|
|
||||||
from arm_main import RobotArmUltimate
|
from arm_main import RobotArmUltimate
|
||||||
from whisper_main import RobotEar
|
from whisper_main import RobotEar
|
||||||
|
|
||||||
# 禁用代理
|
# Disable proxy for local serial/model communication
|
||||||
os.environ["no_proxy"] = "localhost,127.0.0.1"
|
os.environ["no_proxy"] = "localhost,127.0.0.1"
|
||||||
|
|
||||||
# =========================================================
|
# =========================================================
|
||||||
|
|
@ -686,7 +689,6 @@ class RobotApp:
|
||||||
print(f">>> [语音] 音频太长({duration:.1f}s),截断到15秒")
|
print(f">>> [语音] 音频太长({duration:.1f}s),截断到15秒")
|
||||||
audio_trimmed = audio_trimmed[:16000 * 15]
|
audio_trimmed = audio_trimmed[:16000 * 15]
|
||||||
|
|
||||||
import scipy.io.wavfile as wav
|
|
||||||
temp_file = "temp_voice.wav"
|
temp_file = "temp_voice.wav"
|
||||||
wav.write(temp_file, 16000, (audio_trimmed * 32767).astype(np.int16))
|
wav.write(temp_file, 16000, (audio_trimmed * 32767).astype(np.int16))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,31 +1,27 @@
|
||||||
# 文件名: whisper_main.py
|
|
||||||
import sounddevice as sd
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.io.wavfile as wav
|
import scipy.io.wavfile as wav
|
||||||
|
import sounddevice as sd
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
|
|
||||||
class RobotEar:
|
class RobotEar:
|
||||||
|
"""Speech recognition module backed by faster-whisper."""
|
||||||
|
|
||||||
def __init__(self, model_size="base"):
|
def __init__(self, model_size="base"):
|
||||||
self.model = WhisperModel(model_size, device="cuda", compute_type="float16")
|
self.model = WhisperModel(model_size, device="cuda", compute_type="float16")
|
||||||
self.fs = 16000
|
self.fs = 16000
|
||||||
self.recording_buffer = []
|
|
||||||
|
|
||||||
def start_recording(self):
|
|
||||||
self.recording_buffer = []
|
|
||||||
# 开始长录音
|
|
||||||
sd.start_stream(samplerate=self.fs, channels=1)
|
|
||||||
print(">>> [耳朵] 录音中...")
|
|
||||||
|
|
||||||
def record_callback(self, indata, frames, time, status):
|
|
||||||
self.recording_buffer.append(indata.copy())
|
|
||||||
|
|
||||||
def get_text(self, audio_data):
|
def get_text(self, audio_data):
|
||||||
"""将传入的音频数组转为文字"""
|
"""Transcribe audio frames to text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
audio_data: list of numpy arrays captured from sounddevice InputStream.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Transcribed string (stripped).
|
||||||
|
"""
|
||||||
temp_file = "temp_voice.wav"
|
temp_file = "temp_voice.wav"
|
||||||
# 归一化音频数据
|
|
||||||
audio_np = np.concatenate(audio_data, axis=0)
|
audio_np = np.concatenate(audio_data, axis=0)
|
||||||
wav.write(temp_file, self.fs, (audio_np * 32767).astype(np.int16))
|
wav.write(temp_file, self.fs, (audio_np * 32767).astype(np.int16))
|
||||||
|
segments, _ = self.model.transcribe(temp_file, beam_size=5, language="zh")
|
||||||
segments, info = self.model.transcribe(temp_file, beam_size=5, language="zh")
|
return "".join(s.text for s in segments).strip()
|
||||||
text = "".join([s.text for s in segments])
|
|
||||||
return text.strip()
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue