mirror of
https://github.com/m1ngsama/TNT.git
synced 2026-02-08 08:54:05 +00:00
Add 31 unit tests covering core functionality: - UTF-8 byte length detection - UTF-8 character decoding (1-4 byte sequences) - Character width calculation (ASCII, CJK, Hangul, Hiragana, Katakana) - String width calculation - Character/word removal functions - UTF-8 validation - Message formatting and edge cases Test results: 31/31 passed ✓ Files: - tests/unit/test_utf8.c (20 tests) - tests/unit/test_message.c (11 tests) - tests/unit/Makefile (build configuration)
239 lines
6.7 KiB
C
239 lines
6.7 KiB
C
/* Unit tests for UTF-8 functions */
|
||
#include "../../include/utf8.h"
|
||
#include <stdio.h>
|
||
#include <string.h>
|
||
#include <assert.h>
|
||
|
||
#define TEST(name) static void test_##name()
|
||
#define RUN_TEST(name) do { \
|
||
printf("Running %s... ", #name); \
|
||
test_##name(); \
|
||
printf("✓\n"); \
|
||
tests_passed++; \
|
||
} while(0)
|
||
|
||
static int tests_passed = 0;
|
||
|
||
/* Test UTF-8 byte length detection */
|
||
TEST(utf8_byte_length_ascii) {
|
||
assert(utf8_byte_length('A') == 1);
|
||
assert(utf8_byte_length('z') == 1);
|
||
assert(utf8_byte_length('0') == 1);
|
||
}
|
||
|
||
TEST(utf8_byte_length_multibyte) {
|
||
assert(utf8_byte_length(0xC3) == 2); /* é first byte */
|
||
assert(utf8_byte_length(0xE4) == 3); /* 中 first byte */
|
||
assert(utf8_byte_length(0xF0) == 4); /* 𝕏 first byte */
|
||
}
|
||
|
||
TEST(utf8_byte_length_invalid) {
|
||
assert(utf8_byte_length(0xFF) == 1); /* Invalid UTF-8 */
|
||
assert(utf8_byte_length(0x80) == 1); /* Continuation byte */
|
||
}
|
||
|
||
/* Test UTF-8 decoding */
|
||
TEST(utf8_decode_ascii) {
|
||
int bytes_read;
|
||
assert(utf8_decode("A", &bytes_read) == 'A');
|
||
assert(bytes_read == 1);
|
||
}
|
||
|
||
TEST(utf8_decode_2byte) {
|
||
int bytes_read;
|
||
/* é = U+00E9 = 0xC3 0xA9 */
|
||
const char *e_acute = "\xC3\xA9";
|
||
uint32_t codepoint = utf8_decode(e_acute, &bytes_read);
|
||
assert(codepoint == 0x00E9);
|
||
assert(bytes_read == 2);
|
||
}
|
||
|
||
TEST(utf8_decode_3byte) {
|
||
int bytes_read;
|
||
/* 中 = U+4E2D = 0xE4 0xB8 0xAD */
|
||
const char *zhong = "\xE4\xB8\xAD";
|
||
uint32_t codepoint = utf8_decode(zhong, &bytes_read);
|
||
assert(codepoint == 0x4E2D);
|
||
assert(bytes_read == 3);
|
||
}
|
||
|
||
TEST(utf8_decode_4byte) {
|
||
int bytes_read;
|
||
/* 𝕏 = U+1D54F = 0xF0 0x9D 0x95 0x8F */
|
||
const char *math_x = "\xF0\x9D\x95\x8F";
|
||
uint32_t codepoint = utf8_decode(math_x, &bytes_read);
|
||
assert(codepoint == 0x1D54F);
|
||
assert(bytes_read == 4);
|
||
}
|
||
|
||
/* Test character width calculation */
|
||
TEST(utf8_char_width_ascii) {
|
||
assert(utf8_char_width('A') == 1);
|
||
assert(utf8_char_width(' ') == 1);
|
||
assert(utf8_char_width('0') == 1);
|
||
}
|
||
|
||
TEST(utf8_char_width_cjk) {
|
||
assert(utf8_char_width(0x4E2D) == 2); /* 中 */
|
||
assert(utf8_char_width(0x6587) == 2); /* 文 */
|
||
assert(utf8_char_width(0x5B57) == 2); /* 字 */
|
||
}
|
||
|
||
TEST(utf8_char_width_hangul) {
|
||
assert(utf8_char_width(0xAC00) == 2); /* 가 */
|
||
assert(utf8_char_width(0xD7A3) == 2); /* 힣 */
|
||
}
|
||
|
||
TEST(utf8_char_width_hiragana) {
|
||
assert(utf8_char_width(0x3042) == 2); /* あ */
|
||
assert(utf8_char_width(0x3093) == 2); /* ん */
|
||
}
|
||
|
||
TEST(utf8_char_width_katakana) {
|
||
assert(utf8_char_width(0x30A2) == 2); /* ア */
|
||
assert(utf8_char_width(0x30F3) == 2); /* ン */
|
||
}
|
||
|
||
/* Test string width calculation */
|
||
TEST(utf8_string_width_ascii) {
|
||
assert(utf8_string_width("Hello") == 5);
|
||
assert(utf8_string_width("") == 0);
|
||
assert(utf8_string_width("Test123") == 7);
|
||
}
|
||
|
||
TEST(utf8_string_width_mixed) {
|
||
/* "Hello世界" = 5 ASCII + 2*2 CJK = 9 */
|
||
assert(utf8_string_width("Hello世界") == 9);
|
||
|
||
/* "测试Test" = 2*2 CJK + 4 ASCII = 8 */
|
||
assert(utf8_string_width("测试Test") == 8);
|
||
}
|
||
|
||
TEST(utf8_string_width_cjk_only) {
|
||
/* "中文字符" = 4 * 2 = 8 */
|
||
assert(utf8_string_width("中文字符") == 8);
|
||
}
|
||
|
||
/* Test backspace handling */
|
||
TEST(utf8_remove_last_char) {
|
||
char buffer[256];
|
||
|
||
/* Test ASCII */
|
||
strcpy(buffer, "Hello");
|
||
utf8_remove_last_char(buffer);
|
||
assert(strcmp(buffer, "Hell") == 0);
|
||
|
||
/* Test empty string */
|
||
strcpy(buffer, "");
|
||
utf8_remove_last_char(buffer);
|
||
assert(strcmp(buffer, "") == 0);
|
||
|
||
/* Test single char */
|
||
strcpy(buffer, "A");
|
||
utf8_remove_last_char(buffer);
|
||
assert(strcmp(buffer, "") == 0);
|
||
}
|
||
|
||
TEST(utf8_remove_last_char_multibyte) {
|
||
char buffer[256];
|
||
|
||
/* Test 2-byte UTF-8 */
|
||
strcpy(buffer, "café");
|
||
utf8_remove_last_char(buffer);
|
||
assert(strcmp(buffer, "caf") == 0);
|
||
|
||
/* Test 3-byte UTF-8 (CJK) */
|
||
strcpy(buffer, "你好");
|
||
utf8_remove_last_char(buffer);
|
||
assert(strcmp(buffer, "你") == 0);
|
||
}
|
||
|
||
/* Test word removal (Ctrl+W) */
|
||
TEST(utf8_remove_last_word) {
|
||
char buffer[256];
|
||
|
||
/* Test simple case */
|
||
strcpy(buffer, "hello world");
|
||
utf8_remove_last_word(buffer);
|
||
assert(strcmp(buffer, "hello ") == 0);
|
||
|
||
/* Test multiple words */
|
||
strcpy(buffer, "one two three");
|
||
utf8_remove_last_word(buffer);
|
||
assert(strcmp(buffer, "one two ") == 0);
|
||
|
||
/* Test trailing spaces */
|
||
strcpy(buffer, "hello ");
|
||
utf8_remove_last_word(buffer);
|
||
assert(strcmp(buffer, "") == 0);
|
||
|
||
/* Test single word */
|
||
strcpy(buffer, "word");
|
||
utf8_remove_last_word(buffer);
|
||
assert(strcmp(buffer, "") == 0);
|
||
|
||
/* Test empty string */
|
||
strcpy(buffer, "");
|
||
utf8_remove_last_word(buffer);
|
||
assert(strcmp(buffer, "") == 0);
|
||
}
|
||
|
||
/* Test input validation */
|
||
TEST(utf8_is_valid_sequence) {
|
||
/* Valid sequences */
|
||
assert(utf8_is_valid_sequence("A", 1) == true);
|
||
assert(utf8_is_valid_sequence("\xC3\xA9", 2) == true); /* é */
|
||
assert(utf8_is_valid_sequence("\xE4\xB8\xAD", 3) == true); /* 中 */
|
||
|
||
/* Invalid sequences */
|
||
assert(utf8_is_valid_sequence("\xFF", 1) == false); /* Invalid start */
|
||
assert(utf8_is_valid_sequence("\xC3\xFF", 2) == false); /* Invalid continuation */
|
||
|
||
/* Invalid lengths */
|
||
assert(utf8_is_valid_sequence("", 0) == false);
|
||
assert(utf8_is_valid_sequence("ABCDE", 5) == false); /* Too long */
|
||
assert(utf8_is_valid_sequence(NULL, 1) == false);
|
||
}
|
||
|
||
/* Test boundary cases */
|
||
TEST(utf8_boundary_cases) {
|
||
/* Maximum valid codepoints */
|
||
assert(utf8_char_width(0x10FFFF) == 1); /* Max Unicode codepoint */
|
||
|
||
/* BMP boundary */
|
||
assert(utf8_char_width(0xFFFF) == 1);
|
||
|
||
/* CJK range boundaries */
|
||
assert(utf8_char_width(0x4DFF) == 1); /* Just before CJK Extension A */
|
||
assert(utf8_char_width(0x4E00) == 2); /* Start of CJK Unified */
|
||
assert(utf8_char_width(0x9FFF) == 2); /* End of CJK Unified */
|
||
assert(utf8_char_width(0xA000) == 1); /* Just after CJK Unified */
|
||
}
|
||
|
||
int main(void) {
|
||
printf("Running UTF-8 unit tests...\n\n");
|
||
|
||
RUN_TEST(utf8_byte_length_ascii);
|
||
RUN_TEST(utf8_byte_length_multibyte);
|
||
RUN_TEST(utf8_byte_length_invalid);
|
||
RUN_TEST(utf8_decode_ascii);
|
||
RUN_TEST(utf8_decode_2byte);
|
||
RUN_TEST(utf8_decode_3byte);
|
||
RUN_TEST(utf8_decode_4byte);
|
||
RUN_TEST(utf8_char_width_ascii);
|
||
RUN_TEST(utf8_char_width_cjk);
|
||
RUN_TEST(utf8_char_width_hangul);
|
||
RUN_TEST(utf8_char_width_hiragana);
|
||
RUN_TEST(utf8_char_width_katakana);
|
||
RUN_TEST(utf8_string_width_ascii);
|
||
RUN_TEST(utf8_string_width_mixed);
|
||
RUN_TEST(utf8_string_width_cjk_only);
|
||
RUN_TEST(utf8_remove_last_char);
|
||
RUN_TEST(utf8_remove_last_char_multibyte);
|
||
RUN_TEST(utf8_remove_last_word);
|
||
RUN_TEST(utf8_is_valid_sequence);
|
||
RUN_TEST(utf8_boundary_cases);
|
||
|
||
printf("\n✓ All %d tests passed!\n", tests_passed);
|
||
return 0;
|
||
}
|