diff options
| -rw-r--r-- | booki.c | 96 |
1 files changed, 92 insertions, 4 deletions
@@ -13,6 +13,94 @@ #define MAX_SEARCH_OPTS 5 /*** helpers ***/ +bool comparable(const char* pattern, const char* candidate, int len) { + unsigned char p = *pattern; + unsigned char c = *candidate; + while (len > 0) { + // make pattern uppercase, if applicable + if (p >= 'a' && p <= 'z') + p -= 0x20; + + // if we're looking at an ascii character, compare as normal + if (c < 0x80) { + if (c >= 'a' && c <= 'z') + c -= 0x20; + if (p != c) + return false; + } + // looking at Latin-1 Supplement + else if (c == 0xC3) { + + // go to next candidate byte + candidate++; + c = *candidate; + + // this set has upper/lower similarly spaced to ascii + if (c >= 0xA0) + c -= 0x20; + + // TODO "AE" only matches A + // TODO latin small y with diareses is not covered, as it has no uppercase! + // a-ish + if (c >= 0x80 && c <= 0x86) { + if (p != 'A') + return false; + } + // c-ish + else if (c == 0x87) { + if (p != 'C') + return false; + } + // e-ish + else if (c >= 0x88 && c <= 0x8B) { + if (p != 'E') + return false; + } + // i-ish + else if (c >= 0x8C && c <= 0x8F) { + if (p != 'I') + return false; + } + // n-ish + else if (c == 0x91) { + if (p != 'N') + return false; + } + // o-ish + else if (((c >= 0x92 && c <= 0x96) || c == 0x98)) { + if (p != 'O') + return false; + } + // u-ish + else if (c >= 0x99 && c <= 0x9C) { + if (p != 'U') + return false; + } + // y-ish + else if (c == 0x9D) { + if (p != 'Y') + return false; + } + // fallthrough + else if (p != c) { + return false; + } + } + // TODO latin-1 extended + else { + // don't know how to compare these + return false; + } + + pattern++; + p = *pattern; + candidate++; + c = *candidate; + len--; + } + return true; +} + char* load_file(char* filename) { // open the file FILE* fp = fopen(filename, "r"); @@ -436,19 +524,19 @@ bool match_string(const char* pattern, const struct es text) { bool valid = false; if (head_match && tail_match) // text must be identical to pattern (minus ^ and $) - valid = text.len == (pattern_length - 2) && strncasecmp(pattern + 1, text.ptr, pattern_length - 2) == 0; + valid = text.len == (pattern_length - 2) && comparable(pattern + 1, text.ptr, pattern_length - 2); else if (head_match) // text must match the pattern starting from pattern + 1 - valid = strncasecmp(pattern + 1, text.ptr, pattern_length - 1) == 0; + valid = comparable(pattern + 1, text.ptr, pattern_length - 1); else if (tail_match) { // text starting from (pattern + 1) from the end must match pattern (without $) - valid = strncasecmp(pattern, text.ptr + (text.len - pattern_length + 1), pattern_length - 1) == 0; + valid = comparable(pattern, text.ptr + (text.len - pattern_length + 1), pattern_length - 1); } // we only need to compare while remaining text is // as long or longer than pattern for (int i = 0; i <= (text.len - pattern_length); i++) { - if (strncasecmp(pattern, text.ptr + i, pattern_length) == 0) { + if (comparable(pattern, text.ptr + i, pattern_length)) { valid = true; break; } |
