From 86e1ec8e32e1656cdb57bb13cf26e52bb2cc1fe7 Mon Sep 17 00:00:00 2001 From: m1ngsama Date: Sun, 24 May 2026 08:58:51 +0800 Subject: [PATCH] i18n: tolerate whitespace in language parsing --- docs/CHANGELOG.md | 2 ++ src/i18n.c | 22 +++++++++++++++++++++- tests/unit/test_i18n.c | 15 +++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 9c9218f..b1d6683 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -64,6 +64,8 @@ concurrent-session and connection-rate scenarios. - CI memory-leak smoke checks now use an isolated state directory, wait for real SSH readiness, and clean up the exact server PID instead of `pkill`. +- Language parsing now tolerates surrounding whitespace and accepts the + `english` alias, improving `TNT_LANG` and `:lang` ergonomics. - Refreshed README and quick-reference module maps to match the current `cli_text`, `help_text`, `support_text`, i18n, exec, and rate-limit modules. - NORMAL mode now opens at the latest visible messages instead of the oldest diff --git a/src/i18n.c b/src/i18n.c index c721a51..844053c 100644 --- a/src/i18n.c +++ b/src/i18n.c @@ -2,9 +2,28 @@ #include +static const char *skip_space(const char *value) { + while (value && *value && + isspace((unsigned char)*value)) { + value++; + } + return value; +} + +static bool is_lang_boundary(const char *value) { + if (*value == '\0' || *value == '_' || *value == '-' || *value == '.') { + return true; + } + if (!isspace((unsigned char)*value)) { + return false; + } + return *skip_space(value) == '\0'; +} + static bool starts_with_lang(const char *value, const char *prefix) { if (!value || !prefix) return false; + value = skip_space(value); while (*prefix) { if (tolower((unsigned char)*value) != tolower((unsigned char)*prefix)) { @@ -14,7 +33,7 @@ static bool starts_with_lang(const char *value, const char *prefix) { prefix++; } - return *value == '\0' || *value == '_' || *value == '-' || *value == '.'; + return is_lang_boundary(value); } bool i18n_try_parse_lang(const char *value, help_lang_t *lang) { @@ -30,6 +49,7 @@ bool i18n_try_parse_lang(const char *value, help_lang_t *lang) { } if (starts_with_lang(value, "en") || + starts_with_lang(value, "english") || starts_with_lang(value, "c") || starts_with_lang(value, "posix")) { if (lang) *lang = LANG_EN; diff --git a/tests/unit/test_i18n.c b/tests/unit/test_i18n.c index ef98c60..d1ab38f 100644 --- a/tests/unit/test_i18n.c +++ b/tests/unit/test_i18n.c @@ -38,6 +38,20 @@ TEST(parse_unknown_uses_fallback) { assert(i18n_parse_lang("fr_FR.UTF-8", LANG_ZH) == LANG_ZH); } +TEST(parse_ignores_surrounding_whitespace) { + help_lang_t lang; + + assert(i18n_try_parse_lang(" zh ", &lang) == true); + assert(lang == LANG_ZH); + assert(i18n_parse_lang("\ten_US.UTF-8\n", LANG_ZH) == LANG_EN); + assert(i18n_parse_lang(" english ", LANG_ZH) == LANG_EN); + assert(i18n_try_parse_lang("zh CN", &lang) == false); + + setenv("TNT_LANG", " zh ", 1); + setenv("LC_ALL", "en_US.UTF-8", 1); + assert(i18n_default_lang() == LANG_ZH); +} + TEST(default_prefers_tnt_lang) { setenv("TNT_LANG", "zh_CN.UTF-8", 1); setenv("LC_ALL", "en_US.UTF-8", 1); @@ -123,6 +137,7 @@ int main(void) { RUN_TEST(parse_explicit_languages); RUN_TEST(parse_unknown_uses_fallback); + RUN_TEST(parse_ignores_surrounding_whitespace); RUN_TEST(default_prefers_tnt_lang); RUN_TEST(default_uses_locale_when_no_tnt_lang); RUN_TEST(text_lookup_matches_language);