mirror of
https://oauth2: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@github.com/m1ngsama/TNT.git
synced 2026-06-26 04:34:38 +08:00
Harden message log replay parsing
This commit is contained in:
parent
ec507965b2
commit
ceffe59234
5 changed files with 187 additions and 75 deletions
|
|
@ -51,6 +51,9 @@
|
|||
- Session callback refs are now owned and released through `client.c`, so
|
||||
bootstrap and interactive cleanup no longer need to manually mirror the
|
||||
main-ref / callback-ref release sequence.
|
||||
- Message-log replay and search now share one strict record parser and skip
|
||||
malformed, invalid UTF-8, extra-separator, oversized, or unterminated
|
||||
records instead of accepting partial replay data.
|
||||
- The two-user lifecycle test now covers opening `:inbox` before a private
|
||||
message arrives, matching the way users often leave an inbox page open.
|
||||
- Private-message inbox access now uses its own mutex instead of sharing the
|
||||
|
|
|
|||
|
|
@ -296,6 +296,11 @@ void room_broadcast(chat_room_t *room, const message_t *msg) {
|
|||
2024-01-13T10:30:45Z|username|message content
|
||||
```
|
||||
|
||||
Log replay and search use the same strict parser. A record is accepted only
|
||||
when it has exactly three fields, a strict UTC RFC3339 timestamp, valid UTF-8
|
||||
username/content, bounded field lengths, and a trailing newline. Unterminated
|
||||
last lines are treated as partial writes and skipped.
|
||||
|
||||
**Optimized loading** (backward scan):
|
||||
```c
|
||||
/* Scan backwards from file end */
|
||||
|
|
|
|||
|
|
@ -56,10 +56,10 @@ Goal: make stored history durable, inspectable, and recoverable.
|
|||
|
||||
- formalize the message log format and version it
|
||||
- keep timestamps in a timezone-safe format throughout write and replay
|
||||
- validate persisted UTF-8 and record structure before replay
|
||||
- ✅ validate persisted UTF-8 and record structure before replay/search
|
||||
- add log rotation and compaction tooling
|
||||
- provide an offline inspection/export command
|
||||
- define recovery behavior for truncated or partially corrupted logs
|
||||
- define broader recovery behavior for truncated or partially corrupted logs
|
||||
|
||||
## Stage 4: Interactive UX
|
||||
|
||||
|
|
|
|||
161
src/message.c
161
src/message.c
|
|
@ -26,6 +26,82 @@ static time_t parse_rfc3339_utc(const char *timestamp_str) {
|
|||
return timegm(&tm);
|
||||
}
|
||||
|
||||
static void discard_line_remainder(FILE *fp) {
|
||||
int c;
|
||||
|
||||
while ((c = fgetc(fp)) != '\n' && c != EOF) {
|
||||
}
|
||||
}
|
||||
|
||||
static bool parse_log_record(const char *line, message_t *out,
|
||||
time_t now) {
|
||||
char line_copy[2048];
|
||||
char *first_sep;
|
||||
char *second_sep;
|
||||
char *timestamp_str;
|
||||
char *username;
|
||||
char *content;
|
||||
time_t msg_time;
|
||||
size_t line_len;
|
||||
|
||||
if (!line || !out) {
|
||||
return false;
|
||||
}
|
||||
|
||||
line_len = strlen(line);
|
||||
if (line_len == 0 || line[line_len - 1] != '\n') {
|
||||
return false;
|
||||
}
|
||||
if (line_len >= sizeof(line_copy)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
memcpy(line_copy, line, line_len + 1);
|
||||
line_copy[line_len - 1] = '\0';
|
||||
|
||||
first_sep = strchr(line_copy, '|');
|
||||
if (!first_sep) {
|
||||
return false;
|
||||
}
|
||||
second_sep = strchr(first_sep + 1, '|');
|
||||
if (!second_sep || strchr(second_sep + 1, '|')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*first_sep = '\0';
|
||||
*second_sep = '\0';
|
||||
timestamp_str = line_copy;
|
||||
username = first_sep + 1;
|
||||
content = second_sep + 1;
|
||||
|
||||
if (timestamp_str[0] == '\0' || username[0] == '\0' ||
|
||||
content[0] == '\0') {
|
||||
return false;
|
||||
}
|
||||
if (strlen(username) >= MAX_USERNAME_LEN ||
|
||||
strlen(content) >= MAX_MESSAGE_LEN) {
|
||||
return false;
|
||||
}
|
||||
if (!utf8_is_valid_string(username) || !utf8_is_valid_string(content)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
msg_time = parse_rfc3339_utc(timestamp_str);
|
||||
if (msg_time == (time_t)-1) {
|
||||
return false;
|
||||
}
|
||||
if (msg_time > now + 86400 || msg_time < now - 31536000 * 10) {
|
||||
return false;
|
||||
}
|
||||
|
||||
out->timestamp = msg_time;
|
||||
strncpy(out->username, username, MAX_USERNAME_LEN - 1);
|
||||
out->username[MAX_USERNAME_LEN - 1] = '\0';
|
||||
strncpy(out->content, content, MAX_MESSAGE_LEN - 1);
|
||||
out->content[MAX_MESSAGE_LEN - 1] = '\0';
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Initialize message subsystem */
|
||||
void message_init(void) {
|
||||
/* Nothing to initialize for now */
|
||||
|
|
@ -120,65 +196,23 @@ int message_load(message_t **messages, int max_messages) {
|
|||
read_messages:;
|
||||
char line[2048];
|
||||
int count = 0;
|
||||
time_t now = time(NULL);
|
||||
|
||||
/* Now read forward */
|
||||
while (fgets(line, sizeof(line), fp) && count < max_messages) {
|
||||
/* Check for oversized lines */
|
||||
size_t line_len = strlen(line);
|
||||
if (line_len >= sizeof(line) - 1) {
|
||||
/* Skip remainder of line */
|
||||
int c;
|
||||
while ((c = fgetc(fp)) != '\n' && c != EOF);
|
||||
if (line_len >= sizeof(line) - 1 && line[line_len - 1] != '\n') {
|
||||
discard_line_remainder(fp);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Format: RFC3339_timestamp|username|content */
|
||||
char line_copy[2048];
|
||||
strncpy(line_copy, line, sizeof(line_copy) - 1);
|
||||
line_copy[sizeof(line_copy) - 1] = '\0';
|
||||
|
||||
char *timestamp_str = strtok(line_copy, "|");
|
||||
char *username = strtok(NULL, "|");
|
||||
char *content = strtok(NULL, "\n");
|
||||
|
||||
/* Validate all fields exist and are non-empty */
|
||||
if (!timestamp_str || !username || !content) {
|
||||
continue;
|
||||
}
|
||||
if (username[0] == '\0') {
|
||||
message_t parsed;
|
||||
if (!parse_log_record(line, &parsed, now)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Validate field lengths */
|
||||
if (strlen(username) >= MAX_USERNAME_LEN) {
|
||||
continue;
|
||||
}
|
||||
if (strlen(content) >= MAX_MESSAGE_LEN) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!utf8_is_valid_string(username) || !utf8_is_valid_string(content)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Parse strict UTC RFC3339 timestamp */
|
||||
time_t msg_time = parse_rfc3339_utc(timestamp_str);
|
||||
if (msg_time == (time_t)-1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Validate timestamp is reasonable (not in far future or past) */
|
||||
time_t now = time(NULL);
|
||||
if (msg_time > now + 86400 || msg_time < now - 31536000 * 10) {
|
||||
continue;
|
||||
}
|
||||
|
||||
msg_array[count].timestamp = msg_time;
|
||||
strncpy(msg_array[count].username, username, MAX_USERNAME_LEN - 1);
|
||||
msg_array[count].username[MAX_USERNAME_LEN - 1] = '\0';
|
||||
strncpy(msg_array[count].content, content, MAX_MESSAGE_LEN - 1);
|
||||
msg_array[count].content[MAX_MESSAGE_LEN - 1] = '\0';
|
||||
count++;
|
||||
msg_array[count++] = parsed;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
|
@ -276,38 +310,19 @@ int message_search(const char *query, message_t **results, int max_results) {
|
|||
|
||||
char line[2048];
|
||||
int count = 0;
|
||||
time_t now = time(NULL);
|
||||
|
||||
while (fgets(line, sizeof(line), fp)) {
|
||||
size_t line_len = strlen(line);
|
||||
if (line_len >= sizeof(line) - 1) {
|
||||
int c;
|
||||
while ((c = fgetc(fp)) != '\n' && c != EOF);
|
||||
if (line_len >= sizeof(line) - 1 && line[line_len - 1] != '\n') {
|
||||
discard_line_remainder(fp);
|
||||
continue;
|
||||
}
|
||||
|
||||
char line_copy[2048];
|
||||
strncpy(line_copy, line, sizeof(line_copy) - 1);
|
||||
line_copy[sizeof(line_copy) - 1] = '\0';
|
||||
|
||||
char *timestamp_str = strtok(line_copy, "|");
|
||||
char *username = strtok(NULL, "|");
|
||||
char *content = strtok(NULL, "\n");
|
||||
|
||||
if (!timestamp_str || !username || !content || username[0] == '\0') continue;
|
||||
if (strlen(username) >= MAX_USERNAME_LEN || strlen(content) >= MAX_MESSAGE_LEN) continue;
|
||||
if (!utf8_is_valid_string(username) || !utf8_is_valid_string(content)) continue;
|
||||
|
||||
if (strcasestr(username, query) == NULL && strcasestr(content, query) == NULL) continue;
|
||||
|
||||
time_t msg_time = parse_rfc3339_utc(timestamp_str);
|
||||
if (msg_time == (time_t)-1) continue;
|
||||
|
||||
message_t m;
|
||||
m.timestamp = msg_time;
|
||||
strncpy(m.username, username, MAX_USERNAME_LEN - 1);
|
||||
m.username[MAX_USERNAME_LEN - 1] = '\0';
|
||||
strncpy(m.content, content, MAX_MESSAGE_LEN - 1);
|
||||
m.content[MAX_MESSAGE_LEN - 1] = '\0';
|
||||
if (!parse_log_record(line, &m, now)) continue;
|
||||
if (strcasestr(m.username, query) == NULL &&
|
||||
strcasestr(m.content, query) == NULL) continue;
|
||||
|
||||
if (count < max_results) {
|
||||
res[count++] = m;
|
||||
|
|
|
|||
|
|
@ -3,8 +3,10 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <limits.h>
|
||||
|
||||
#define TEST(name) static void test_##name()
|
||||
#define RUN_TEST(name) do { \
|
||||
|
|
@ -16,12 +18,45 @@
|
|||
|
||||
static int tests_passed = 0;
|
||||
static const char *test_log = "test_messages.log";
|
||||
static char test_state_dir[PATH_MAX];
|
||||
|
||||
/* Helper: Clean up test log file */
|
||||
static void cleanup_test_log(void) {
|
||||
unlink(test_log);
|
||||
}
|
||||
|
||||
static void cleanup_state_dir(void) {
|
||||
if (test_state_dir[0] != '\0') {
|
||||
char log_path[PATH_MAX];
|
||||
snprintf(log_path, sizeof(log_path), "%s/messages.log", test_state_dir);
|
||||
unlink(log_path);
|
||||
rmdir(test_state_dir);
|
||||
test_state_dir[0] = '\0';
|
||||
}
|
||||
unsetenv("TNT_STATE_DIR");
|
||||
}
|
||||
|
||||
static void setup_state_dir(void) {
|
||||
const char *tmp = getenv("TMPDIR");
|
||||
|
||||
cleanup_state_dir();
|
||||
if (!tmp || tmp[0] == '\0') {
|
||||
tmp = "/tmp";
|
||||
}
|
||||
snprintf(test_state_dir, sizeof(test_state_dir),
|
||||
"%s/tnt-message-test.XXXXXX", tmp);
|
||||
assert(mkdtemp(test_state_dir) != NULL);
|
||||
assert(setenv("TNT_STATE_DIR", test_state_dir, 1) == 0);
|
||||
}
|
||||
|
||||
static void format_rfc3339_now(char *buffer, size_t buf_size) {
|
||||
time_t now = time(NULL);
|
||||
struct tm tm_info;
|
||||
|
||||
gmtime_r(&now, &tm_info);
|
||||
strftime(buffer, buf_size, "%Y-%m-%dT%H:%M:%SZ", &tm_info);
|
||||
}
|
||||
|
||||
/* Test message initialization */
|
||||
TEST(message_init) {
|
||||
message_init();
|
||||
|
|
@ -122,6 +157,57 @@ TEST(message_save_basic) {
|
|||
cleanup_test_log();
|
||||
}
|
||||
|
||||
TEST(message_load_skips_malformed_records) {
|
||||
char ts[64];
|
||||
char log_path[PATH_MAX];
|
||||
message_t *messages = NULL;
|
||||
|
||||
setup_state_dir();
|
||||
format_rfc3339_now(ts, sizeof(ts));
|
||||
snprintf(log_path, sizeof(log_path), "%s/messages.log", test_state_dir);
|
||||
|
||||
FILE *fp = fopen(log_path, "wb");
|
||||
assert(fp != NULL);
|
||||
fprintf(fp, "%s|alice|valid one\n", ts);
|
||||
fprintf(fp, "not-a-date|bob|bad date\n");
|
||||
fprintf(fp, "%s||empty user\n", ts);
|
||||
fprintf(fp, "%s|mallory|extra|pipe\n", ts);
|
||||
fprintf(fp, "%s|badutf|bad \xC3\x28\n", ts);
|
||||
fprintf(fp, "%s|partial|truncated record", ts);
|
||||
fclose(fp);
|
||||
|
||||
int count = message_load(&messages, 10);
|
||||
assert(count == 1);
|
||||
assert(strcmp(messages[0].username, "alice") == 0);
|
||||
assert(strcmp(messages[0].content, "valid one") == 0);
|
||||
free(messages);
|
||||
cleanup_state_dir();
|
||||
}
|
||||
|
||||
TEST(message_search_skips_malformed_records) {
|
||||
char ts[64];
|
||||
char log_path[PATH_MAX];
|
||||
message_t *results = NULL;
|
||||
|
||||
setup_state_dir();
|
||||
format_rfc3339_now(ts, sizeof(ts));
|
||||
snprintf(log_path, sizeof(log_path), "%s/messages.log", test_state_dir);
|
||||
|
||||
FILE *fp = fopen(log_path, "wb");
|
||||
assert(fp != NULL);
|
||||
fprintf(fp, "%s|alice|needle valid\n", ts);
|
||||
fprintf(fp, "%s|mallory|needle extra|pipe\n", ts);
|
||||
fprintf(fp, "%s|partial|needle truncated", ts);
|
||||
fclose(fp);
|
||||
|
||||
int count = message_search("needle", &results, 10);
|
||||
assert(count == 1);
|
||||
assert(strcmp(results[0].username, "alice") == 0);
|
||||
assert(strcmp(results[0].content, "needle valid") == 0);
|
||||
free(results);
|
||||
cleanup_state_dir();
|
||||
}
|
||||
|
||||
/* Test edge cases */
|
||||
TEST(message_edge_cases) {
|
||||
message_t msg;
|
||||
|
|
@ -215,12 +301,15 @@ int main(void) {
|
|||
RUN_TEST(message_format_unicode);
|
||||
RUN_TEST(message_format_width_limits);
|
||||
RUN_TEST(message_save_basic);
|
||||
RUN_TEST(message_load_skips_malformed_records);
|
||||
RUN_TEST(message_search_skips_malformed_records);
|
||||
RUN_TEST(message_edge_cases);
|
||||
RUN_TEST(message_special_characters);
|
||||
RUN_TEST(message_buffer_safety);
|
||||
RUN_TEST(message_timestamp_formats);
|
||||
|
||||
cleanup_test_log();
|
||||
cleanup_state_dir();
|
||||
|
||||
printf("\n✓ All %d tests passed!\n", tests_passed);
|
||||
return 0;
|
||||
|
|
|
|||
Loading…
Reference in a new issue