From 5bfbde81501a2060b92b061d651d8337ae5fd64b Mon Sep 17 00:00:00 2001 From: Ben Winston Date: Thu, 6 Jun 2024 22:20:02 -0400 Subject: bugfix: matching $ must count characters from end, not bytes --- booki.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/booki.c b/booki.c index ffe055a..2bd30bf 100644 --- a/booki.c +++ b/booki.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -631,6 +632,23 @@ char* next_book(char* current_pos) { return current_pos; } +char* characters_from_end(char* str, int len, int number_of_chars) { + assert(len >= number_of_chars); + int retchars = number_of_chars; + char* newptr = str + len - 1; + unsigned char ch; + int total_bytes = 0; + while (number_of_chars > 0) { + ch = (unsigned char) *newptr; + // if we're at the first byte of a unicode point, adjust + if (ch < 0x80 || ch >= 0xC3) + number_of_chars--; + newptr--; + total_bytes++; + } + return newptr + 1; +} + /*** search ***/ bool match_string(const char* pattern, const struct es text) { @@ -652,8 +670,8 @@ bool match_string(const char* pattern, const struct es text) { // text must match the pattern starting from pattern + 1 valid = comparable(pattern + 1, text.ptr, pattern_length - 1); else if (tail_match) { - // text starting from (pattern + 1) from the end must match pattern (without $) - valid = comparable(pattern, text.ptr + (text.len - pattern_length + 1), pattern_length - 1); + // text starting from (pattern - 1) characters(!) from the end must match pattern (without $) + valid = comparable(pattern, characters_from_end(text.ptr, text.len, pattern_length - 1), pattern_length - 1); } // we only need to compare while remaining text is -- cgit v1.2.3