aboutsummaryrefslogtreecommitdiff
path: root/booki.c
diff options
context:
space:
mode:
authorBen Winston2024-06-03 22:36:11 -0400
committerBen Winston2024-06-03 22:36:11 -0400
commit84078b5791036f0aaa3979019638aebe129a9bfd (patch)
tree4cb5365356bf37fcd66ce83eabc63ce3eade89b0 /booki.c
parent770f194677027c1961bbfd77e536bd4d7e97b40e (diff)
use hand-rolled strncasecmp, ignore accents
Diffstat (limited to 'booki.c')
-rw-r--r--booki.c96
1 files changed, 92 insertions, 4 deletions
diff --git a/booki.c b/booki.c
index b73dc3d..a530f76 100644
--- a/booki.c
+++ b/booki.c
@@ -13,6 +13,94 @@
#define MAX_SEARCH_OPTS 5
/*** helpers ***/
+bool comparable(const char* pattern, const char* candidate, int len) {
+ unsigned char p = *pattern;
+ unsigned char c = *candidate;
+ while (len > 0) {
+ // make pattern uppercase, if applicable
+ if (p >= 'a' && p <= 'z')
+ p -= 0x20;
+
+ // if we're looking at an ascii character, compare as normal
+ if (c < 0x80) {
+ if (c >= 'a' && c <= 'z')
+ c -= 0x20;
+ if (p != c)
+ return false;
+ }
+ // looking at Latin-1 Supplement
+ else if (c == 0xC3) {
+
+ // go to next candidate byte
+ candidate++;
+ c = *candidate;
+
+ // this set has upper/lower similarly spaced to ascii
+ if (c >= 0xA0)
+ c -= 0x20;
+
+ // TODO "AE" only matches A
+ // TODO latin small y with diareses is not covered, as it has no uppercase!
+ // a-ish
+ if (c >= 0x80 && c <= 0x86) {
+ if (p != 'A')
+ return false;
+ }
+ // c-ish
+ else if (c == 0x87) {
+ if (p != 'C')
+ return false;
+ }
+ // e-ish
+ else if (c >= 0x88 && c <= 0x8B) {
+ if (p != 'E')
+ return false;
+ }
+ // i-ish
+ else if (c >= 0x8C && c <= 0x8F) {
+ if (p != 'I')
+ return false;
+ }
+ // n-ish
+ else if (c == 0x91) {
+ if (p != 'N')
+ return false;
+ }
+ // o-ish
+ else if (((c >= 0x92 && c <= 0x96) || c == 0x98)) {
+ if (p != 'O')
+ return false;
+ }
+ // u-ish
+ else if (c >= 0x99 && c <= 0x9C) {
+ if (p != 'U')
+ return false;
+ }
+ // y-ish
+ else if (c == 0x9D) {
+ if (p != 'Y')
+ return false;
+ }
+ // fallthrough
+ else if (p != c) {
+ return false;
+ }
+ }
+ // TODO latin-1 extended
+ else {
+ // don't know how to compare these
+ return false;
+ }
+
+ pattern++;
+ p = *pattern;
+ candidate++;
+ c = *candidate;
+ len--;
+ }
+ return true;
+}
+
char* load_file(char* filename) {
// open the file
FILE* fp = fopen(filename, "r");
@@ -436,19 +524,19 @@ bool match_string(const char* pattern, const struct es text) {
bool valid = false;
if (head_match && tail_match)
// text must be identical to pattern (minus ^ and $)
- valid = text.len == (pattern_length - 2) && strncasecmp(pattern + 1, text.ptr, pattern_length - 2) == 0;
+ valid = text.len == (pattern_length - 2) && comparable(pattern + 1, text.ptr, pattern_length - 2);
else if (head_match)
// text must match the pattern starting from pattern + 1
- valid = strncasecmp(pattern + 1, text.ptr, pattern_length - 1) == 0;
+ valid = comparable(pattern + 1, text.ptr, pattern_length - 1);
else if (tail_match) {
// text starting from (pattern + 1) from the end must match pattern (without $)
- valid = strncasecmp(pattern, text.ptr + (text.len - pattern_length + 1), pattern_length - 1) == 0;
+ valid = comparable(pattern, text.ptr + (text.len - pattern_length + 1), pattern_length - 1);
}
// we only need to compare while remaining text is
// as long or longer than pattern
for (int i = 0; i <= (text.len - pattern_length); i++) {
- if (strncasecmp(pattern, text.ptr + i, pattern_length) == 0) {
+ if (comparable(pattern, text.ptr + i, pattern_length)) {
valid = true;
break;
}