mirror of
https://oauth2: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@github.com/m1ngsama/TNT.git
synced 2026-06-26 05:44:38 +08:00
Add slow-client backpressure regression
This commit is contained in:
parent
e603a55cb3
commit
13b671cc9f
8 changed files with 259 additions and 5 deletions
6
Makefile
6
Makefile
|
|
@ -34,7 +34,7 @@ MANDIR ?= $(PREFIX)/share/man
|
|||
SYSTEMD_UNIT_DIR ?= $(PREFIX)/lib/systemd/system
|
||||
CI_TEST_PORT ?= $(if $(PORT),$(PORT),2222)
|
||||
|
||||
.PHONY: all clean install install-systemd uninstall uninstall-systemd debug release release-check release-check-strict asan valgrind check test test-advisory ci-test unit-test integration-test anonymous-access-test connection-limit-test security-test stress-test soak-test user-lifecycle-test info
|
||||
.PHONY: all clean install install-systemd uninstall uninstall-systemd debug release release-check release-check-strict asan valgrind check test test-advisory ci-test unit-test integration-test anonymous-access-test connection-limit-test security-test stress-test soak-test slow-client-test user-lifecycle-test info
|
||||
|
||||
all: $(TARGETS)
|
||||
|
||||
|
|
@ -148,6 +148,10 @@ soak-test: all
|
|||
@echo "Running soak tests..."
|
||||
@cd tests && PORT=$${PORT:-2222} ./test_soak.sh $${DURATION:-8} $${RECONNECTS:-5}
|
||||
|
||||
slow-client-test: all
|
||||
@echo "Running slow-client tests..."
|
||||
@cd tests && PORT=$${PORT:-2222} ./test_slow_client.sh $${DURATION:-8} $${BURST_CHARS:-1600}
|
||||
|
||||
user-lifecycle-test: all
|
||||
@echo "Running user lifecycle tests..."
|
||||
@cd tests && PORT=$${PORT:-2222} ./test_user_lifecycle.sh
|
||||
|
|
|
|||
10
README.md
10
README.md
|
|
@ -238,6 +238,7 @@ make connection-limit-test # verify per-IP concurrency and rate limits
|
|||
make security-test # run security feature checks
|
||||
make stress-test # run configurable concurrent-client stress test
|
||||
make soak-test # run idle/reconnect/control-plane soak test
|
||||
make slow-client-test # run slow interactive-client backpressure test
|
||||
make user-lifecycle-test # run a two-user TUI lifecycle test
|
||||
make ci-test # run the same checks as GitHub Actions
|
||||
|
||||
|
|
@ -249,6 +250,7 @@ cd tests
|
|||
./test_connection_limits.sh # per-IP concurrency and rate limits
|
||||
./test_stress.sh # stress test
|
||||
./test_soak.sh # soak test
|
||||
./test_slow_client.sh # slow-client backpressure
|
||||
./test_user_lifecycle.sh # two-user TUI lifecycle
|
||||
```
|
||||
|
||||
|
|
@ -257,6 +259,8 @@ cd tests
|
|||
- Anonymous access: 2 tests
|
||||
- Security features: 12 tests
|
||||
- Stress test: configurable concurrent clients (`CLIENTS=20 DURATION=60 make stress-test`)
|
||||
- Slow-client test: an unread interactive SSH client cannot block health,
|
||||
stats, post, tail, or server survival checks
|
||||
|
||||
### Dependencies
|
||||
|
||||
|
|
@ -361,6 +365,12 @@ Before preparing a release locally:
|
|||
make release-check
|
||||
```
|
||||
|
||||
Longer local preflight can opt into runtime soak and slow-client coverage:
|
||||
|
||||
```sh
|
||||
RUN_SOAK=1 RUN_SLOW_CLIENT=1 make release-check
|
||||
```
|
||||
|
||||
Before publishing package recipes, replace placeholder checksums and run:
|
||||
|
||||
```sh
|
||||
|
|
|
|||
|
|
@ -19,6 +19,9 @@
|
|||
- Added a VHS tape draft for recording the core TNT terminal-chat experience.
|
||||
- Added live `:inbox` refresh behavior: `r` refreshes the inbox manually, and
|
||||
an open inbox refreshes when a new private message arrives.
|
||||
- Added `make slow-client-test`, an opt-in regression for an unread
|
||||
interactive SSH client under backpressure while health, stats, post, tail,
|
||||
and server survival stay responsive.
|
||||
|
||||
### Changed
|
||||
- `make install-systemd` now rewrites the installed unit's `ExecStart` to match
|
||||
|
|
@ -51,6 +54,8 @@
|
|||
direct slow-reader blocking path.
|
||||
- `make release-check` can now run the soak test with `RUN_SOAK=1`, keeping
|
||||
longer runtime checks opt-in for local release validation.
|
||||
- `make release-check` can also run the slow-client backpressure test with
|
||||
`RUN_SLOW_CLIENT=1`.
|
||||
- Room capacity and mention notification bookkeeping now follow
|
||||
`TNT_MAX_CONNECTIONS` instead of a hidden fixed 64-client array limit.
|
||||
- Updated the roadmap to reflect completed `tntctl`, stable exec contract, and
|
||||
|
|
|
|||
|
|
@ -41,6 +41,9 @@ Release policy:
|
|||
2. Run the local preflight:
|
||||
make release-check
|
||||
|
||||
For a longer local runtime gate before publishing or production rollout:
|
||||
RUN_SOAK=1 RUN_SLOW_CLIENT=1 make release-check
|
||||
|
||||
3. Commit the release changes and create a local tag. Do not push the tag
|
||||
until strict checks pass:
|
||||
git tag v1.0.1
|
||||
|
|
|
|||
|
|
@ -15,6 +15,9 @@ TEST
|
|||
make connection-limit-test per-IP concurrency/rate-limit checks
|
||||
make security-test security feature checks
|
||||
make stress-test concurrent-client stress test
|
||||
make soak-test idle/reconnect/control-plane soak test
|
||||
make slow-client-test slow interactive-client backpressure test
|
||||
make user-lifecycle-test two-user TUI lifecycle test
|
||||
make ci-test same checks as GitHub Actions
|
||||
|
||||
DEBUG
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ Goal: make regressions harder to introduce.
|
|||
- add sanitizer jobs and targeted fuzzing for UTF-8, log parsing, and command parsing
|
||||
- ✅ add a configurable soak test for idle sessions, reconnects, and control
|
||||
interface availability
|
||||
- add deeper slow-client soak coverage with a deliberately backpressured SSH
|
||||
- ✅ add deeper slow-client coverage with a deliberately backpressured SSH
|
||||
client
|
||||
- keep deployment and test docs aligned with actual runtime behavior
|
||||
- require every user-visible interface change to update docs and tests in the same change set
|
||||
|
|
@ -106,7 +106,5 @@ These are the next changes that should happen before new feature work expands th
|
|||
1. Decide the daemon naming path: keep `tnt` as the server binary for 1.x, or
|
||||
introduce `tntd` later with a compatibility plan.
|
||||
2. Finish untangling client-state ownership into a clearer release path.
|
||||
3. Add deeper slow-client soak coverage with a deliberately backpressured SSH
|
||||
client.
|
||||
4. Replace remaining release placeholders with real maintainer metadata and
|
||||
3. Replace remaining release placeholders with real maintainer metadata and
|
||||
source-archive checksums when cutting a public package release.
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ Default checks:
|
|||
Environment:
|
||||
RUN_INTEGRATION=1 also run full make test
|
||||
RUN_SOAK=1 also run the configurable soak test
|
||||
RUN_SLOW_CLIENT=1 also run the slow-client backpressure test
|
||||
PORT=12720 base port for integration tests
|
||||
|
||||
Strict checks additionally require a clean tree, a vX.Y.Z tag at HEAD, a
|
||||
|
|
@ -123,6 +124,13 @@ if [ "${RUN_SOAK:-0}" = "1" ]; then
|
|||
DURATION="${SOAK_DURATION:-8}" RECONNECTS="${SOAK_RECONNECTS:-5}"
|
||||
fi
|
||||
|
||||
if [ "${RUN_SLOW_CLIENT:-0}" = "1" ]; then
|
||||
step "running slow-client test"
|
||||
make slow-client-test PORT="$((${PORT:-12720} + 40))" \
|
||||
DURATION="${SLOW_CLIENT_DURATION:-8}" \
|
||||
BURST_CHARS="${SLOW_CLIENT_BURST_CHARS:-1600}"
|
||||
fi
|
||||
|
||||
tmpdir=$(mktemp -d "${TMPDIR:-/tmp}/tnt-release-check.XXXXXX")
|
||||
cleanup() {
|
||||
rm -rf "$tmpdir"
|
||||
|
|
|
|||
223
tests/test_slow_client.sh
Executable file
223
tests/test_slow_client.sh
Executable file
|
|
@ -0,0 +1,223 @@
|
|||
#!/bin/sh
|
||||
# Slow interactive-client regression test for TNT.
|
||||
# Usage: ./test_slow_client.sh [hold_seconds] [burst_chars]
|
||||
|
||||
PORT=${PORT:-2222}
|
||||
HOLD_SECONDS=${1:-8}
|
||||
BURST_CHARS=${2:-1600}
|
||||
BIN="../tnt"
|
||||
PASS=0
|
||||
FAIL=0
|
||||
STATE_DIR=$(mktemp -d "${TMPDIR:-/tmp}/tnt-slow-client-test.XXXXXX")
|
||||
SERVER_PID=""
|
||||
SLOW_PID=""
|
||||
|
||||
cleanup() {
|
||||
if [ -n "$SLOW_PID" ]; then
|
||||
kill "$SLOW_PID" 2>/dev/null || true
|
||||
wait "$SLOW_PID" 2>/dev/null || true
|
||||
fi
|
||||
exec 3>&- 2>/dev/null || true
|
||||
if [ -n "$SERVER_PID" ]; then
|
||||
kill "$SERVER_PID" 2>/dev/null || true
|
||||
wait "$SERVER_PID" 2>/dev/null || true
|
||||
fi
|
||||
rm -rf "$STATE_DIR"
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
case "$HOLD_SECONDS" in
|
||||
''|*[!0-9]*)
|
||||
echo "Error: hold_seconds must be a positive integer"
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
case "$BURST_CHARS" in
|
||||
''|*[!0-9]*)
|
||||
echo "Error: burst_chars must be a positive integer"
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ "$HOLD_SECONDS" -lt 1 ] || [ "$BURST_CHARS" -lt 1 ]; then
|
||||
echo "Error: hold_seconds and burst_chars must be positive"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if [ ! -f "$BIN" ]; then
|
||||
echo "Error: Binary $BIN not found. Run make first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SSH_EXEC_OPTS="-n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o BatchMode=yes -o LogLevel=ERROR -o ConnectTimeout=5 -p $PORT"
|
||||
SSH_TTY_OPTS="-e none -tt -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o ConnectTimeout=5 -p $PORT"
|
||||
|
||||
run_ssh_timeout() {
|
||||
seconds=$1
|
||||
outfile=$2
|
||||
shift 2
|
||||
|
||||
ssh $SSH_EXEC_OPTS "$@" >"$outfile" 2>&1 &
|
||||
cmd_pid=$!
|
||||
elapsed=0
|
||||
|
||||
while [ "$elapsed" -lt "$seconds" ]; do
|
||||
if ! kill -0 "$cmd_pid" 2>/dev/null; then
|
||||
wait "$cmd_pid"
|
||||
return $?
|
||||
fi
|
||||
sleep 1
|
||||
elapsed=$((elapsed + 1))
|
||||
done
|
||||
|
||||
if kill -0 "$cmd_pid" 2>/dev/null; then
|
||||
kill "$cmd_pid" 2>/dev/null || true
|
||||
wait "$cmd_pid" 2>/dev/null || true
|
||||
fi
|
||||
return 124
|
||||
}
|
||||
|
||||
wait_for_health() {
|
||||
out=""
|
||||
for _ in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
|
||||
if [ -n "$SERVER_PID" ] && ! kill -0 "$SERVER_PID" 2>/dev/null; then
|
||||
return 1
|
||||
fi
|
||||
out=$(ssh $SSH_EXEC_OPTS localhost health 2>/dev/null || true)
|
||||
[ "$out" = "ok" ] && return 0
|
||||
sleep 1
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
wait_for_slow_user() {
|
||||
out=""
|
||||
for _ in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
|
||||
if [ -n "$SERVER_PID" ] && ! kill -0 "$SERVER_PID" 2>/dev/null; then
|
||||
return 1
|
||||
fi
|
||||
out=$(ssh $SSH_EXEC_OPTS localhost users --json 2>/dev/null || true)
|
||||
printf '%s\n' "$out" | grep -q '"slow"' && return 0
|
||||
sleep 1
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
echo "=== TNT Slow Client Test ==="
|
||||
echo "hold=${HOLD_SECONDS}s burst_chars=$BURST_CHARS port=$PORT"
|
||||
|
||||
TNT_LANG=en "$BIN" \
|
||||
--bind 127.0.0.1 \
|
||||
--public-host slow.local \
|
||||
--max-connections 32 \
|
||||
--max-conn-per-ip 32 \
|
||||
--max-conn-rate-per-ip 64 \
|
||||
--rate-limit 0 \
|
||||
--idle-timeout 0 \
|
||||
--ssh-log-level 1 \
|
||||
-p "$PORT" \
|
||||
-d "$STATE_DIR" >"$STATE_DIR/server.log" 2>&1 &
|
||||
SERVER_PID=$!
|
||||
|
||||
if wait_for_health; then
|
||||
echo "✓ server started"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo "✗ server failed to start"
|
||||
sed -n '1,160p' "$STATE_DIR/server.log"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SLOW_FIFO="$STATE_DIR/slow.out"
|
||||
mkfifo "$SLOW_FIFO"
|
||||
exec 3<>"$SLOW_FIFO"
|
||||
|
||||
(
|
||||
printf 'slow\n'
|
||||
sleep 2
|
||||
i=0
|
||||
while [ "$i" -lt "$BURST_CHARS" ]; do
|
||||
printf 'x'
|
||||
i=$((i + 1))
|
||||
done
|
||||
sleep "$HOLD_SECONDS"
|
||||
) | ssh $SSH_TTY_OPTS slow@127.0.0.1 >"$SLOW_FIFO" 2>"$STATE_DIR/slow.err" &
|
||||
SLOW_PID=$!
|
||||
|
||||
if wait_for_slow_user; then
|
||||
echo "✓ deliberately unread interactive client reached chat"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo "✗ slow client did not reach chat"
|
||||
sed -n '1,120p' "$STATE_DIR/slow.err"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
|
||||
sleep 3
|
||||
|
||||
if run_ssh_timeout 5 "$STATE_DIR/health.out" localhost health &&
|
||||
grep -qx 'ok' "$STATE_DIR/health.out"; then
|
||||
echo "✓ health stayed responsive while slow client was pressured"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo "✗ health blocked or returned unexpected output"
|
||||
cat "$STATE_DIR/health.out" 2>/dev/null || true
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
|
||||
if run_ssh_timeout 5 "$STATE_DIR/stats.out" localhost stats --json &&
|
||||
grep -q '"status":"ok"' "$STATE_DIR/stats.out"; then
|
||||
echo "✓ stats stayed responsive while slow client was pressured"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo "✗ stats blocked or returned unexpected output"
|
||||
cat "$STATE_DIR/stats.out" 2>/dev/null || true
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
|
||||
FLOOD_FAIL=0
|
||||
i=1
|
||||
while [ "$i" -le 8 ]; do
|
||||
msg=$(printf 'slow-client responsive post %02d %0900d' "$i" 0)
|
||||
if ! run_ssh_timeout 5 "$STATE_DIR/post-$i.out" probe@localhost post "$msg" ||
|
||||
! grep -qx 'posted' "$STATE_DIR/post-$i.out"; then
|
||||
echo "✗ post blocked or failed during slow-client pressure at $i/8"
|
||||
cat "$STATE_DIR/post-$i.out" 2>/dev/null || true
|
||||
FAIL=$((FAIL + 1))
|
||||
FLOOD_FAIL=1
|
||||
break
|
||||
fi
|
||||
i=$((i + 1))
|
||||
done
|
||||
|
||||
if [ "$FLOOD_FAIL" -eq 0 ]; then
|
||||
echo "✓ post path stayed responsive during slow-client pressure"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
|
||||
if run_ssh_timeout 5 "$STATE_DIR/tail.out" localhost "tail -n 5" &&
|
||||
grep -q 'slow-client responsive post 08' "$STATE_DIR/tail.out"; then
|
||||
echo "✓ tail sees messages posted during slow-client pressure"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo "✗ tail missing slow-client pressure messages"
|
||||
cat "$STATE_DIR/tail.out" 2>/dev/null || true
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
|
||||
if kill -0 "$SERVER_PID" 2>/dev/null; then
|
||||
echo "✓ server survived slow-client pressure"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo "✗ server exited during slow-client pressure"
|
||||
sed -n '1,160p' "$STATE_DIR/server.log"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "PASSED: $PASS"
|
||||
echo "FAILED: $FAIL"
|
||||
[ "$FAIL" -eq 0 ] && echo "All tests passed" || echo "Some tests failed"
|
||||
exit "$FAIL"
|
||||
Loading…
Reference in a new issue