From a02a134ae62fc9aca47c4e97af25cac881843e18 Mon Sep 17 00:00:00 2001 From: m1ngsama Date: Fri, 20 Feb 2026 21:56:48 +0800 Subject: [PATCH] docs: replace ASCII architecture diagram with Mermaid flowchart --- README.md | 41 ++++++++++------------------------------- README_EN.md | 42 ++++++++++-------------------------------- 2 files changed, 20 insertions(+), 63 deletions(-) diff --git a/README.md b/README.md index 9580f8a..71785a1 100644 --- a/README.md +++ b/README.md @@ -21,37 +21,16 @@ ## 系统架构 -``` -麦克风 - │ - ▼ -┌──────────────────┐ -│ Faster-Whisper │ 中文语音 → 文本 -└────────┬─────────┘ - │ "把削笔刀抬起5厘米" - ▼ -┌──────────────────┐ -│ 规则解析引擎 │ 简单指令直接匹配(松开 / 复位 / 方向移动) -│ (Regex engine) │ 命中 → 直接生成 JSON,跳过 LLM -└────────┬─────────┘ - │ 未命中(含物体名的复杂指令) - ▼ -┌──────────────────┐ -│ DeepSeek-R1-1.5B │ QLoRA 微调推理 -│ (QLoRA, FP16) │ 自然语言 → 结构化 JSON 指令 -└────────┬─────────┘ - │ [{"action": "lift", "target": "part", "height": 50}] - ▼ -┌──────────────────┐ -│ YOLOv8s │ 实时检测目标物体 -│ + Homography │ 像素坐标 → 机械臂工作坐标 (mm) -└────────┬─────────┘ - │ (rx=170, ry=3) - ▼ -┌──────────────────┐ -│ 运动控制引擎 │ D-H 逆运动学 + S-Curve 插值 -│ arm_main.py │ 平滑轨迹 → 串口 → ESP32 → 舵机 -└──────────────────┘ +```mermaid +flowchart TD + MIC["🎤 麦克风"] --> STT["Faster-Whisper\n中文语音识别"] + STT --> RULE{"规则解析引擎\n简单指令匹配"} + RULE -- "命中\n松开 / 复位 / 方向移动" --> ACT["JSON 动作指令"] + RULE -- "未命中\n含物体名的复杂指令" --> LLM["DeepSeek-R1-1.5B\nQLoRA FP16\n自然语言 → JSON"] + LLM --> ACT + ACT --> VIS["YOLOv8s + Homography\n目标检测 · 手眼标定\n像素坐标 → 机械臂坐标 mm"] + VIS --> MOT["arm_main.py\nD-H 逆运动学 + S-Curve"] + MOT --> ESP["ESP32 PWM → 舵机"] ``` --- diff --git a/README_EN.md b/README_EN.md index 9253088..67a1e62 100644 --- a/README_EN.md +++ b/README_EN.md @@ -21,38 +21,16 @@ Total hardware cost **¥317 (~$45 USD)**. Requires an NVIDIA GPU for LLM inferen ## Architecture -``` -Microphone - │ - ▼ -┌──────────────────┐ -│ Faster-Whisper │ Chinese speech → text -└────────┬─────────┘ - │ "lift the pencil sharpener 5cm" - ▼ -┌──────────────────┐ -│ Regex engine │ Simple commands matched directly -│ │ (release / reset / directional moves) -│ │ Hit → emit JSON, skip LLM -└────────┬─────────┘ - │ Miss (complex commands with object names) - ▼ -┌──────────────────┐ -│ DeepSeek-R1-1.5B │ QLoRA fine-tuned inference -│ (QLoRA, FP16) │ Natural language → structured JSON -└────────┬─────────┘ - │ [{"action": "lift", "target": "part", "height": 50}] - ▼ -┌──────────────────┐ -│ YOLOv8s │ Real-time object detection -│ + Homography │ Pixel coords → robot workspace coords (mm) -└────────┬─────────┘ - │ (rx=170, ry=3) - ▼ -┌──────────────────┐ -│ Motion engine │ D-H IK + S-Curve interpolation -│ arm_main.py │ Smooth trajectory → serial → ESP32 → servos -└──────────────────┘ +```mermaid +flowchart TD + MIC["🎤 Microphone"] --> STT["Faster-Whisper\nChinese speech recognition"] + STT --> RULE{"Regex engine\nSimple command match"} + RULE -- "Hit\nrelease / reset / directional" --> ACT["JSON action"] + RULE -- "Miss\ncomplex command with object name" --> LLM["DeepSeek-R1-1.5B\nQLoRA FP16\nNatural language → JSON"] + LLM --> ACT + ACT --> VIS["YOLOv8s + Homography\nObject detection · hand-eye calibration\nPixel coords → robot coords mm"] + VIS --> MOT["arm_main.py\nD-H IK + S-Curve trajectory"] + MOT --> ESP["ESP32 PWM → Servos"] ``` ---