Harden message log replay parsing

This commit is contained in:
m1ngsama 2026-05-27 09:18:23 +08:00
parent ec507965b2
commit ceffe59234
5 changed files with 187 additions and 75 deletions

View file

@ -51,6 +51,9 @@
- Session callback refs are now owned and released through `client.c`, so
bootstrap and interactive cleanup no longer need to manually mirror the
main-ref / callback-ref release sequence.
- Message-log replay and search now share one strict record parser and skip
malformed, invalid UTF-8, extra-separator, oversized, or unterminated
records instead of accepting partial replay data.
- The two-user lifecycle test now covers opening `:inbox` before a private
message arrives, matching the way users often leave an inbox page open.
- Private-message inbox access now uses its own mutex instead of sharing the

View file

@ -296,6 +296,11 @@ void room_broadcast(chat_room_t *room, const message_t *msg) {
2024-01-13T10:30:45Z|username|message content
```
Log replay and search use the same strict parser. A record is accepted only
when it has exactly three fields, a strict UTC RFC3339 timestamp, valid UTF-8
username/content, bounded field lengths, and a trailing newline. Unterminated
last lines are treated as partial writes and skipped.
**Optimized loading** (backward scan):
```c
/* Scan backwards from file end */

View file

@ -56,10 +56,10 @@ Goal: make stored history durable, inspectable, and recoverable.
- formalize the message log format and version it
- keep timestamps in a timezone-safe format throughout write and replay
- validate persisted UTF-8 and record structure before replay
- validate persisted UTF-8 and record structure before replay/search
- add log rotation and compaction tooling
- provide an offline inspection/export command
- define recovery behavior for truncated or partially corrupted logs
- define broader recovery behavior for truncated or partially corrupted logs
## Stage 4: Interactive UX

View file

@ -26,6 +26,82 @@ static time_t parse_rfc3339_utc(const char *timestamp_str) {
return timegm(&tm);
}
static void discard_line_remainder(FILE *fp) {
int c;
while ((c = fgetc(fp)) != '\n' && c != EOF) {
}
}
static bool parse_log_record(const char *line, message_t *out,
time_t now) {
char line_copy[2048];
char *first_sep;
char *second_sep;
char *timestamp_str;
char *username;
char *content;
time_t msg_time;
size_t line_len;
if (!line || !out) {
return false;
}
line_len = strlen(line);
if (line_len == 0 || line[line_len - 1] != '\n') {
return false;
}
if (line_len >= sizeof(line_copy)) {
return false;
}
memcpy(line_copy, line, line_len + 1);
line_copy[line_len - 1] = '\0';
first_sep = strchr(line_copy, '|');
if (!first_sep) {
return false;
}
second_sep = strchr(first_sep + 1, '|');
if (!second_sep || strchr(second_sep + 1, '|')) {
return false;
}
*first_sep = '\0';
*second_sep = '\0';
timestamp_str = line_copy;
username = first_sep + 1;
content = second_sep + 1;
if (timestamp_str[0] == '\0' || username[0] == '\0' ||
content[0] == '\0') {
return false;
}
if (strlen(username) >= MAX_USERNAME_LEN ||
strlen(content) >= MAX_MESSAGE_LEN) {
return false;
}
if (!utf8_is_valid_string(username) || !utf8_is_valid_string(content)) {
return false;
}
msg_time = parse_rfc3339_utc(timestamp_str);
if (msg_time == (time_t)-1) {
return false;
}
if (msg_time > now + 86400 || msg_time < now - 31536000 * 10) {
return false;
}
out->timestamp = msg_time;
strncpy(out->username, username, MAX_USERNAME_LEN - 1);
out->username[MAX_USERNAME_LEN - 1] = '\0';
strncpy(out->content, content, MAX_MESSAGE_LEN - 1);
out->content[MAX_MESSAGE_LEN - 1] = '\0';
return true;
}
/* Initialize message subsystem */
void message_init(void) {
/* Nothing to initialize for now */
@ -120,65 +196,23 @@ int message_load(message_t **messages, int max_messages) {
read_messages:;
char line[2048];
int count = 0;
time_t now = time(NULL);
/* Now read forward */
while (fgets(line, sizeof(line), fp) && count < max_messages) {
/* Check for oversized lines */
size_t line_len = strlen(line);
if (line_len >= sizeof(line) - 1) {
/* Skip remainder of line */
int c;
while ((c = fgetc(fp)) != '\n' && c != EOF);
if (line_len >= sizeof(line) - 1 && line[line_len - 1] != '\n') {
discard_line_remainder(fp);
continue;
}
/* Format: RFC3339_timestamp|username|content */
char line_copy[2048];
strncpy(line_copy, line, sizeof(line_copy) - 1);
line_copy[sizeof(line_copy) - 1] = '\0';
char *timestamp_str = strtok(line_copy, "|");
char *username = strtok(NULL, "|");
char *content = strtok(NULL, "\n");
/* Validate all fields exist and are non-empty */
if (!timestamp_str || !username || !content) {
continue;
}
if (username[0] == '\0') {
message_t parsed;
if (!parse_log_record(line, &parsed, now)) {
continue;
}
/* Validate field lengths */
if (strlen(username) >= MAX_USERNAME_LEN) {
continue;
}
if (strlen(content) >= MAX_MESSAGE_LEN) {
continue;
}
if (!utf8_is_valid_string(username) || !utf8_is_valid_string(content)) {
continue;
}
/* Parse strict UTC RFC3339 timestamp */
time_t msg_time = parse_rfc3339_utc(timestamp_str);
if (msg_time == (time_t)-1) {
continue;
}
/* Validate timestamp is reasonable (not in far future or past) */
time_t now = time(NULL);
if (msg_time > now + 86400 || msg_time < now - 31536000 * 10) {
continue;
}
msg_array[count].timestamp = msg_time;
strncpy(msg_array[count].username, username, MAX_USERNAME_LEN - 1);
msg_array[count].username[MAX_USERNAME_LEN - 1] = '\0';
strncpy(msg_array[count].content, content, MAX_MESSAGE_LEN - 1);
msg_array[count].content[MAX_MESSAGE_LEN - 1] = '\0';
count++;
msg_array[count++] = parsed;
}
fclose(fp);
@ -276,38 +310,19 @@ int message_search(const char *query, message_t **results, int max_results) {
char line[2048];
int count = 0;
time_t now = time(NULL);
while (fgets(line, sizeof(line), fp)) {
size_t line_len = strlen(line);
if (line_len >= sizeof(line) - 1) {
int c;
while ((c = fgetc(fp)) != '\n' && c != EOF);
if (line_len >= sizeof(line) - 1 && line[line_len - 1] != '\n') {
discard_line_remainder(fp);
continue;
}
char line_copy[2048];
strncpy(line_copy, line, sizeof(line_copy) - 1);
line_copy[sizeof(line_copy) - 1] = '\0';
char *timestamp_str = strtok(line_copy, "|");
char *username = strtok(NULL, "|");
char *content = strtok(NULL, "\n");
if (!timestamp_str || !username || !content || username[0] == '\0') continue;
if (strlen(username) >= MAX_USERNAME_LEN || strlen(content) >= MAX_MESSAGE_LEN) continue;
if (!utf8_is_valid_string(username) || !utf8_is_valid_string(content)) continue;
if (strcasestr(username, query) == NULL && strcasestr(content, query) == NULL) continue;
time_t msg_time = parse_rfc3339_utc(timestamp_str);
if (msg_time == (time_t)-1) continue;
message_t m;
m.timestamp = msg_time;
strncpy(m.username, username, MAX_USERNAME_LEN - 1);
m.username[MAX_USERNAME_LEN - 1] = '\0';
strncpy(m.content, content, MAX_MESSAGE_LEN - 1);
m.content[MAX_MESSAGE_LEN - 1] = '\0';
if (!parse_log_record(line, &m, now)) continue;
if (strcasestr(m.username, query) == NULL &&
strcasestr(m.content, query) == NULL) continue;
if (count < max_results) {
res[count++] = m;

View file

@ -3,8 +3,10 @@
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/stat.h>
#include <limits.h>
#define TEST(name) static void test_##name()
#define RUN_TEST(name) do { \
@ -16,12 +18,45 @@
static int tests_passed = 0;
static const char *test_log = "test_messages.log";
static char test_state_dir[PATH_MAX];
/* Helper: Clean up test log file */
static void cleanup_test_log(void) {
unlink(test_log);
}
static void cleanup_state_dir(void) {
if (test_state_dir[0] != '\0') {
char log_path[PATH_MAX];
snprintf(log_path, sizeof(log_path), "%s/messages.log", test_state_dir);
unlink(log_path);
rmdir(test_state_dir);
test_state_dir[0] = '\0';
}
unsetenv("TNT_STATE_DIR");
}
static void setup_state_dir(void) {
const char *tmp = getenv("TMPDIR");
cleanup_state_dir();
if (!tmp || tmp[0] == '\0') {
tmp = "/tmp";
}
snprintf(test_state_dir, sizeof(test_state_dir),
"%s/tnt-message-test.XXXXXX", tmp);
assert(mkdtemp(test_state_dir) != NULL);
assert(setenv("TNT_STATE_DIR", test_state_dir, 1) == 0);
}
static void format_rfc3339_now(char *buffer, size_t buf_size) {
time_t now = time(NULL);
struct tm tm_info;
gmtime_r(&now, &tm_info);
strftime(buffer, buf_size, "%Y-%m-%dT%H:%M:%SZ", &tm_info);
}
/* Test message initialization */
TEST(message_init) {
message_init();
@ -122,6 +157,57 @@ TEST(message_save_basic) {
cleanup_test_log();
}
TEST(message_load_skips_malformed_records) {
char ts[64];
char log_path[PATH_MAX];
message_t *messages = NULL;
setup_state_dir();
format_rfc3339_now(ts, sizeof(ts));
snprintf(log_path, sizeof(log_path), "%s/messages.log", test_state_dir);
FILE *fp = fopen(log_path, "wb");
assert(fp != NULL);
fprintf(fp, "%s|alice|valid one\n", ts);
fprintf(fp, "not-a-date|bob|bad date\n");
fprintf(fp, "%s||empty user\n", ts);
fprintf(fp, "%s|mallory|extra|pipe\n", ts);
fprintf(fp, "%s|badutf|bad \xC3\x28\n", ts);
fprintf(fp, "%s|partial|truncated record", ts);
fclose(fp);
int count = message_load(&messages, 10);
assert(count == 1);
assert(strcmp(messages[0].username, "alice") == 0);
assert(strcmp(messages[0].content, "valid one") == 0);
free(messages);
cleanup_state_dir();
}
TEST(message_search_skips_malformed_records) {
char ts[64];
char log_path[PATH_MAX];
message_t *results = NULL;
setup_state_dir();
format_rfc3339_now(ts, sizeof(ts));
snprintf(log_path, sizeof(log_path), "%s/messages.log", test_state_dir);
FILE *fp = fopen(log_path, "wb");
assert(fp != NULL);
fprintf(fp, "%s|alice|needle valid\n", ts);
fprintf(fp, "%s|mallory|needle extra|pipe\n", ts);
fprintf(fp, "%s|partial|needle truncated", ts);
fclose(fp);
int count = message_search("needle", &results, 10);
assert(count == 1);
assert(strcmp(results[0].username, "alice") == 0);
assert(strcmp(results[0].content, "needle valid") == 0);
free(results);
cleanup_state_dir();
}
/* Test edge cases */
TEST(message_edge_cases) {
message_t msg;
@ -215,12 +301,15 @@ int main(void) {
RUN_TEST(message_format_unicode);
RUN_TEST(message_format_width_limits);
RUN_TEST(message_save_basic);
RUN_TEST(message_load_skips_malformed_records);
RUN_TEST(message_search_skips_malformed_records);
RUN_TEST(message_edge_cases);
RUN_TEST(message_special_characters);
RUN_TEST(message_buffer_safety);
RUN_TEST(message_timestamp_formats);
cleanup_test_log();
cleanup_state_dir();
printf("\n✓ All %d tests passed!\n", tests_passed);
return 0;