#include #include #include #include #include #include #include #include #include #define EDIT_FILE "/tmp/booki-edit.toml" #define FIXED_FILE "/tmp/booki-fixed.toml" #define MAX_SEARCH_OPTS 5 enum DataType { booki_string, booki_number, booki_raw }; typedef struct DataField DataField; struct DataField { char* name; enum DataType type; bool show; }; static const DataField BOOK_FIELDS[] = { { "title", booki_string, true }, { "author", booki_string, true }, { "isbn", booki_string, true }, { "pages", booki_number, true }, { "published", booki_number, true }, { "language", booki_string, true }, { "translator", booki_string, true }, { "on", booki_string, true }, { "id", booki_number, false }, { "_other", booki_raw, false } }; #define BOOK_FIELDS_COUNT (sizeof(BOOK_FIELDS) / sizeof(BOOK_FIELDS[0])) /*** helpers ***/ char* characters_from_end(char* str, int len_in_bytes, int number_of_chars) { assert(len_in_bytes >= number_of_chars); // let's not zoom past the beginning of the string if (number_of_chars == 0 || len_in_bytes == 0) return str; // start at the end of the string, locate all code points unsigned char ch; do { // get the last/previous byte len_in_bytes--; ch = (unsigned char) *(str + len_in_bytes); // if we're at the first byte of a unicode point, we've found a whole character if (ch < 0x80 || ch >= 0xC3) number_of_chars--; } while (number_of_chars > 0 && len_in_bytes > 0); return str + len_in_bytes; } bool comparable(const char* pattern, const char* candidate, int len) { unsigned char p = *pattern; unsigned char c = *candidate; while (len > 0) { // make pattern uppercase, if applicable if (p >= 'a' && p <= 'z') p -= 0x20; // if we're looking at an ascii character, compare as normal if (c < 0x80) { if (c >= 'a' && c <= 'z') c -= 0x20; if (p != c) return false; } // looking at Latin-1 Supplement else if (c == 0xC3) { // go to next candidate byte candidate++; c = *candidate; // this set has upper/lower similarly spaced to ascii if (c >= 0xA0) c -= 0x20; // TODO "AE" only matches A // TODO latin small y with diareses is not covered, as it has no uppercase! // a-ish if (c >= 0x80 && c <= 0x86) { if (p != 'A') return false; } // c-ish else if (c == 0x87) { if (p != 'C') return false; } // e-ish else if (c >= 0x88 && c <= 0x8B) { if (p != 'E') return false; } // i-ish else if (c >= 0x8C && c <= 0x8F) { if (p != 'I') return false; } // n-ish else if (c == 0x91) { if (p != 'N') return false; } // o-ish else if (((c >= 0x92 && c <= 0x96) || c == 0x98)) { if (p != 'O') return false; } // u-ish else if (c >= 0x99 && c <= 0x9C) { if (p != 'U') return false; } // y-ish else if (c == 0x9D) { if (p != 'Y') return false; } // fallthrough else if (p != c) { return false; } } // latin-1 extended (first half) else if (c == 0xC4) { // go to next candidate byte candidate++; c = *candidate; // a-ish if (c >= 0x80 && c <= 0x85) { if (p != 'A') return false; } // c-ish else if (c >= 0x86 && c <= 0x8D) { if (p != 'C') return false; } // d-ish else if (c >= 0x8E && c <= 0x91) { if (p != 'D') return false; } // e-ish else if (c >= 0x92 && c <= 0x9B) { if (p != 'E') return false; } // g-ish else if (c >= 0x9C && c <= 0xA3) { if (p != 'G') return false; } // h-ish else if (c >= 0xA4 && c <= 0xA7) { if (p != 'H') return false; } // i-ish else if (c >= 0xA8 && c <= 0xB3) { if (p != 'I') return false; } // j-ish else if (c >= 0xB4 && c <= 0xB5) { if (p != 'J') return false; } // k-ish else if (c >= 0xB6 && c <= 0xB8) { if (p != 'K') return false; } // l-ish else if (c >= 0xB9 && c <= 0xBF) { if (p != 'L') return false; } // fallthrough else if (p != c) { return false; } } // latin-1 extended (second half) else if (c == 0xC5) { // go to next candidate byte candidate++; c = *candidate; // l-ish (cont'd) if (c >= 0x80 && c <= 0x82) { if (p != 'L') return false; } // n-ish else if (c >= 0x83 && c <= 0x8B) { if (p != 'N') return false; } // o-ish else if (c >= 0x8C && c <= 0x93) { if (p != 'O') return false; } // r-ish else if (c >= 0x94 && c <= 0x99) { if (p != 'R') return false; } // s-ish else if (c >= 0x9A && c <= 0xA1) { if (p != 'S') return false; } // t-ish else if (c >= 0xA2 && c <= 0xA7) { if (p != 'T') return false; } // u-ish else if (c >= 0xA8 && c <= 0xB3) { if (p != 'U') return false; } // w-ish else if (c >= 0xB4 && c <= 0xB5) { if (p != 'W') return false; } // y-ish else if (c >= 0xB6 && c <= 0xB8) { if (p != 'Y') return false; } // z-ish else if (c >= 0xB9 && c <= 0xBE) { if (p != 'Z') return false; } // fallthrough else if (p != c) { return false; } } // combining diacritics else if (c >= 0xCC && c <= 0xCD) { // these exist afterwards, so skip this one and the following candidate += 2; c = *candidate; continue; } else { // don't know how to compare these return false; } pattern++; p = *pattern; candidate++; c = *candidate; len--; } return true; } char* load_file(char* filename) { // open the file FILE* fp = fopen(filename, "r"); if (!fp) { printf("bad file\n"); return NULL; } // seek to the end fseek(fp, 0, SEEK_END); int size = ftell(fp); rewind(fp); char* data = malloc(size + 1); if (!data) { printf("couldn't malloc\n"); return NULL; } int read_size = fread(data, 1, size, fp); if (read_size != size) { printf("didn't read everything -- %d read of %d\n", read_size, size); return NULL; } data[size] = '\0'; fclose(fp); return data; } void open_with_editor(char* filepath) { char* editor = getenv("EDITOR"); if (!editor) editor = "nano"; pid_t pid; int status; switch ((pid = fork())) { case -1: printf("fork has failed!\n"); break; case 0: execlp(editor, editor, filepath, NULL); printf("child failed :(\n"); break; default: // wait for the child to finish pid = wait(&status); break; } } bool copy(char* src, char* dest) { pid_t pid; int status; switch((pid = fork())) { case -1: printf("fork failed\n"); return false; case 0: execl("/bin/cp", "/bin/cp", src, dest, NULL); printf("copy failed\n"); return false; default: // wait for the chlid to finish pid = wait(&status); return true; } } /*** strings ***/ typedef struct es ES; struct es { int len; char* ptr; ES* next; }; ES default_es = { 0, NULL, NULL }; int concat_es_print(ES* str, char* buf) { int size = sprintf(buf, "%.*s", str->len, str->ptr); ES* next = str; while ((next = next->next) != NULL) size += sprintf(buf + size, ", %.*s", next->len, next->ptr); return size; } int concat_es_toml(ES* str, char* buf) { if (str->next == NULL) { return sprintf(buf, "\"%.*s\"\n", str->len, str->ptr); } else { int size = sprintf(buf, "[ \"%.*s\"", str->len, str->ptr); ES* next = str; while ((next = next->next) != NULL) size += sprintf(buf + size, ", \"%.*s\"", next->len, next->ptr); return size + sprintf(buf + size, " ]\n"); } } void free_es(ES* str) { if (str == NULL) return; ES* tmp; while (str != NULL) { tmp = str->next; free(str); str = tmp; } } /*** books ***/ typedef struct Book BOOK; struct Book { int id; ES title; ES author; int pages; ES isbn; ES language; ES translator; ES on; int published; // catch-all ES _other; }; void init_book(BOOK* book) { book->id = 0; book->title = default_es; book->author = default_es; book->pages = 0; book->isbn = default_es; book->language = default_es; book->translator = default_es; book->on = default_es; book->published = 0; book->_other = default_es; } #define ATTR_MATCH(cand, attr) (strncmp(cand, attr, strlen(attr)) == 0) ES* get_string_field(BOOK* book, char* name) { if (ATTR_MATCH(name, "title")) return &(book->title); else if (ATTR_MATCH(name, "author")) return &(book->author); else if (ATTR_MATCH(name, "isbn")) return &(book->isbn); else if (ATTR_MATCH(name, "language")) return &(book->language); else if (ATTR_MATCH(name, "translator")) return &(book->translator); else if (ATTR_MATCH(name, "on")) return &(book->on); else if (ATTR_MATCH(name, "_other")) return &(book->_other); else return &default_es; } int* get_number_field(BOOK* book, char* name) { if (ATTR_MATCH(name, "pages")) return &(book->pages); else if (ATTR_MATCH(name, "published")) return &(book->published); else if (ATTR_MATCH(name, "id")) return &(book->id); else return 0; } void print_book(BOOK book, bool all_fields) { char str[100]; int size = concat_es_print(&(book.author), str); printf("%.*s by %.*s\n", book.title.len, book.title.ptr, size, str); if (all_fields) { char* esfmt = " - %s: %.*s\n"; char* intfmt = " - %s: %d\n"; // loop through BOOK_FIELDs, skipping title and author DataField datafield; ES string_field; int number_field; for (int i = 2; i < BOOK_FIELDS_COUNT; i++) { datafield = BOOK_FIELDS[i]; if (!datafield.show) continue; // string fields if (datafield.type == booki_string) { string_field = *(get_string_field(&book, datafield.name)); // if we don't have anything in this field, don't print it if (!string_field.ptr) continue; size = concat_es_print(&string_field, str); printf(esfmt, datafield.name, size, str); // number fields } else if (datafield.type == booki_number) { number_field = *(get_number_field(&book, datafield.name)); if (!number_field) continue; printf(intfmt, datafield.name, number_field); } } } } void write_book(BOOK book, FILE *output) { fwrite("[[books]]\n", 1, 10, output); char str[100]; int size; DataField datafield; ES string_field; int number_field; for (int i = 0; i < BOOK_FIELDS_COUNT; i++) { datafield = BOOK_FIELDS[i]; if (datafield.type == booki_string) { string_field = *(get_string_field(&book, datafield.name)); if (!string_field.ptr) continue; size = sprintf(str, "%s = ", datafield.name); size += concat_es_toml(&string_field, str + size); fwrite(str, 1, size, output); } else if (datafield.type == booki_number) { number_field = *(get_number_field(&book, datafield.name)); if (!number_field) continue; size = sprintf(str, "%s = %d\n", datafield.name, number_field); fwrite(str, 1, size, output); } else if (datafield.type == booki_raw) { string_field = *(get_string_field(&book, datafield.name)); if (!string_field.ptr) continue; size = sprintf(str, "%.*s\n", string_field.len, string_field.ptr); fwrite(str, 1, size, output); ES* next = string_field.next; while (next != NULL) { size = sprintf(str, "%.*s\n", next->len, next->ptr); fwrite(str, 1, size, output); next = next->next; } } } fwrite("\n", 1, 1, output); // trailing newline between books } void free_book(BOOK book) { // any string can be a list of strings DataField datafield; ES string_field; for (int i = 0; i < BOOK_FIELDS_COUNT; i++) { datafield = BOOK_FIELDS[i]; if (datafield.type == booki_string || datafield.type == booki_raw) { string_field = *(get_string_field(&book, datafield.name)); free_es(string_field.next); } } } /*** parse toml-ish ***/ const char* get_last_word(const char* str) { const char* last_space = strrchr(str, ' '); if ((last_space - str) > 0) return last_space + 1; else return 0; } long parse_int(char* current_pos, char** new_pos) { // strtol can handle leading spaces // will put the first non-digit into endptr char* endptr; long ret = strtol(current_pos, &endptr, 10); bool valid; switch(*endptr) { case ' ': case '\n': case ']': case ',': valid = true; break; default: valid = false; break; } *new_pos = endptr; if (valid) return ret; else return 0; } #define SEEK_UNTIL(ptr, ch) while (*ptr != ch) ptr++; #define SEEK_UNTIL_EITHER(ptr, ch1, ch2) while (*ptr != ch1 && *ptr != ch2) ptr++; #define SEEK_WHILE(ptr, ch) while (*ptr == ch) ptr++; ES parse_string(char* current_pos, char** new_pos) { // TODO handle failure char* value; // leading spaces SEEK_UNTIL(current_pos, '"'); // go past the quote and set the position of the start of value current_pos++; value = current_pos; // until the next quote SEEK_UNTIL(current_pos, '"'); ES output; output.len = current_pos - value; output.ptr = value; output.next = NULL; // go past the quote current_pos++; // update position *new_pos = current_pos; return output; } ES parse_strings(char* current_pos, char** new_pos) { // loop until we know what we have SEEK_WHILE(current_pos, ' '); if (*current_pos == '"') { return parse_string(current_pos, new_pos); } else if (*current_pos == '[') { // get the first one char* my_new_pos; ES head = parse_string(current_pos, &my_new_pos); current_pos = my_new_pos; ES* prev = &head; while (*current_pos == ',') { prev->next = (struct es*)malloc(sizeof(struct es)); ES str = parse_string(current_pos, &my_new_pos); prev->next->ptr = str.ptr; prev->next->len = str.len; prev->next->next = NULL; prev = prev->next; current_pos = my_new_pos; } SEEK_UNTIL(current_pos, ']'); current_pos++; *new_pos = current_pos; return head; } else { printf("error! +/- 10 context:\n---\n%.21s\n---\n", current_pos - 10); SEEK_UNTIL(current_pos, '\n'); *new_pos = current_pos; return default_es; } } void add_to_other(BOOK* book, char* ptr, int len) { if (book->_other.len == 0) { ES other; other.len = len; other.ptr = ptr; other.next = NULL; book->_other = other; } else { ES* last = &(book->_other); while (last->next != NULL) last = last->next; last->next = (ES*)malloc(sizeof(ES)); last->next->len = len; last->next->ptr = ptr; last->next->next = NULL; } } void parse_book(char* current_pos, BOOK* book) { char* attr; char c = *current_pos; char* new_pos; // loop until we hit the extra newline while (c != '\n') { // we start at the beginning of a line attr = current_pos; SEEK_UNTIL_EITHER(current_pos, '=', '\n'); // if we reached the end of a line, we never hit an equals sign // so capture the line as-is and continue if (*current_pos == '\n') { add_to_other(book, attr, current_pos - attr); c = *(++current_pos); continue; } // go past the equals sign current_pos++; // attr should be the name of the attribute, with (possibly) trailing spaces if (ATTR_MATCH(attr, "title")) { ES title = parse_string(current_pos, &new_pos); book->title = title; current_pos = new_pos; } else if (ATTR_MATCH(attr, "author")) { ES author = parse_strings(current_pos, &new_pos); book->author = author; current_pos = new_pos; } else if (ATTR_MATCH(attr, "language")) { ES language = parse_strings(current_pos, &new_pos); book->language = language; current_pos = new_pos; } else if (ATTR_MATCH(attr, "isbn")) { ES isbn = parse_string(current_pos, &new_pos); book->isbn = isbn; current_pos = new_pos; } else if (ATTR_MATCH(attr, "translator")) { ES translator = parse_strings(current_pos, &new_pos); book->translator = translator; current_pos = new_pos; } else if (ATTR_MATCH(attr, "pages")) { int pages = parse_int(current_pos, &new_pos); book->pages = pages; current_pos = new_pos; } else if (ATTR_MATCH(attr, "published")) { int published = parse_int(current_pos, &new_pos); book->published = published; current_pos = new_pos; } else if (ATTR_MATCH(attr, "id")) { int id = parse_int(current_pos, &new_pos); book->id = id; current_pos = new_pos; } else if (ATTR_MATCH(attr, "on")) { ES on = parse_strings(current_pos, &new_pos); book->on = on; current_pos = new_pos; } else { // anything we didn't match should be captured as-is SEEK_UNTIL(current_pos, '\n'); add_to_other(book, attr, current_pos - attr); } // go to (and then past) the newline SEEK_UNTIL(current_pos, '\n'); c = *(++current_pos); } } char* next_book(char* current_pos) { while (!(ATTR_MATCH(current_pos, "[[books]]"))) { current_pos++; if (*current_pos == '\0') return NULL; } // current_pos is at the beginning of [[books]] // pass it, and any spaces/newlines, then return current_pos += 9; // [[books]] SEEK_UNTIL(current_pos, '\n'); // current_pos is '\n', go past then return current_pos++; return current_pos; } /*** search ***/ bool match_string(const char* pattern, const ES text) { // empty pattern matches everything if (!*pattern) return true; // get lengths int pattern_length = strlen(pattern); bool head_match = *pattern == '^'; bool tail_match = *(pattern + pattern_length - 1) == '$'; // if we have either head or tail (or both), we only need to compare once bool valid = false; if (head_match && tail_match) // text must be identical to pattern (minus ^ and $) valid = text.len == (pattern_length - 2) && comparable(pattern + 1, text.ptr, pattern_length - 2); else if (head_match) // text must match the pattern starting from pattern + 1 valid = comparable(pattern + 1, text.ptr, pattern_length - 1); else if (tail_match) { // text starting from (pattern - 1) characters(!) from the end must match pattern (without $) valid = comparable(pattern, characters_from_end(text.ptr, text.len, pattern_length - 1), pattern_length - 1); } // we only need to compare while remaining text is // as long or longer than pattern for (int i = 0; i <= (text.len - pattern_length); i++) { if (comparable(pattern, text.ptr + i, pattern_length)) { valid = true; break; } } if (valid) return valid; else if (text.next != NULL) return match_string(pattern, *(text.next)); return valid; } bool match_int(char* pattern, int candidate) { // we don't want to match the zero value, unless the pattern is also 0 if (candidate == 0 && strcmp(pattern, "0") != 0) return false; // check for leading signs char* current_pos = pattern; bool lt = false; bool gt = false; if (*current_pos == '+') { gt = true; current_pos++; } else if (*current_pos == ',') { lt = true; current_pos++; } // parse the string to an int char* endptr; long ret = strtol(current_pos, &endptr, 10); if (*endptr != '\0') { printf("couldn't parse pattern as int: '%s'\n", pattern); return false; } // do compares if (lt) return candidate < ret; else if (gt) return candidate > ret; else return ret == candidate; } static struct option search_options[] = { {"show", no_argument, 0, 's'}, {"edit", no_argument, 0, 'e'}, {0, 0, 0, 0} // marks the end of the array }; struct search_opt { int show; int edit; int count; char* opts[MAX_SEARCH_OPTS]; char* args[MAX_SEARCH_OPTS]; }; struct search_opt parse_search_options(int argc, char* argv[]) { // return struct struct search_opt opt_out; int count = 0; int show = false; int edit = false; // opt options int opt; int opt_idx = 0; opterr = 0; // turn off getopt error messages // look at each option while ((opt = getopt_long_only(argc, argv, "", search_options, &opt_idx)) != -1) { switch(opt) { case 's': show = true; break; case 'e': edit = true; break; case '?': // detect if we didn't get an operand if (!argv[optind] || argv[optind][0] == '-') { printf("%s requires an operand\n", argv[optind-1]); opt_out.count = -1; return opt_out; } // detect if we got a field we don't recognize char* no_dashes = argv[optind-1] + 2; // '--example' -> 'example' int i = 0; for (; i < BOOK_FIELDS_COUNT; i++) { if (strcmp(no_dashes, BOOK_FIELDS[i].name) == 0) break; } if (i == BOOK_FIELDS_COUNT) { printf("%s is unrecognized\n", argv[optind-1]); opt_out.count = -1; return opt_out; } // optind points at the argument (one past the option) opt_out.opts[count] = no_dashes; opt_out.args[count] = argv[optind]; count++; break; default: printf("something went wrong with parsing!\n"); break; } } // set the count/show values opt_out.count = count; opt_out.show = show; opt_out.edit = edit; return opt_out; } void search(int argc, char* argv[], char* booki_file) { struct search_opt search_opts = parse_search_options(argc, argv); if (search_opts.count == -1) { return; } // get the books array char* data = load_file(booki_file); if (!data) { printf("couldn't load data from %s\n", booki_file); return; } FILE *edit_file, *fixed_file = NULL; if (search_opts.edit) { edit_file = fopen(EDIT_FILE, "w"); fixed_file = fopen(FIXED_FILE, "w"); } // book loop int book_count = 0; BOOK book; char* cur_data = data; while ((cur_data = next_book(cur_data)) != NULL) { init_book(&book); parse_book(cur_data, &book); char* field; int i; bool match = true; for (i = 0; i < search_opts.count; i++) { field = search_opts.opts[i]; // compare fields if (ATTR_MATCH(field, "title")) { if (!match_string(search_opts.args[i], book.title)) break; } else if (ATTR_MATCH(field, "author")) { if (!match_string(search_opts.args[i], book.author)) break; } else if (ATTR_MATCH(field, "language")) { if (!match_string(search_opts.args[i], book.language)) break; } else if (ATTR_MATCH(field, "translator")) { if (!match_string(search_opts.args[i], book.translator)) break; } else if (ATTR_MATCH(field, "on")) { if (!match_string(search_opts.args[i], book.on)) break; } else if (ATTR_MATCH(field, "pages")) { if (!match_int(search_opts.args[i], book.pages)) break; } else if (ATTR_MATCH(field, "published")) { if (!match_int(search_opts.args[i], book.published)) break; } else { printf("unsupported field: %s\n", field); break; } } match = i == search_opts.count; if (match) { if (search_opts.edit) write_book(book, edit_file); else print_book(book, search_opts.show); book_count++; } else if (search_opts.edit) { write_book(book, fixed_file); } free_book(book); } free(data); // if we're editing, both files are open at this point if (search_opts.edit) { // first, close and let the user edit fclose(edit_file); open_with_editor(EDIT_FILE); // after they've edited, read it in and add it to fixed_file data = load_file(EDIT_FILE); if (!data) { printf("can't open edit file\n"); return; } cur_data = data; while ((cur_data = next_book(cur_data)) != NULL) { init_book(&book); parse_book(cur_data, &book); print_book(book, true); write_book(book, fixed_file); free_book(book); } fclose(fixed_file); free(data); // copy the fixed_file to booki file bool success = copy(FIXED_FILE, booki_file); if (!success) { printf("failed! fixed file is here: %s\n", FIXED_FILE); } else { unlink(EDIT_FILE); unlink(FIXED_FILE); } } } /*** add new books ***/ void add(char *booki_file) { FILE* edit_file = fopen(EDIT_FILE, "w"); // load an empty book to the edit file fputs("[[books]]\n", edit_file); DataField datafield; for (int i = 0; i < BOOK_FIELDS_COUNT; i++) { datafield = BOOK_FIELDS[i]; // skip anything we don't show if (!datafield.show) continue; // otherwise, always write the name first fputs(datafield.name, edit_file); if (datafield.type == booki_string) fputs(" = \"\"\n", edit_file); else if (datafield.type == booki_number) fputs(" = 0\n", edit_file); else fputs(" = ?\n", edit_file); } fputs("\n", edit_file); // open the file for editing fclose(edit_file); open_with_editor(EDIT_FILE); // read in file, add to booki file FILE* output = fopen(booki_file, "a"); char* data = load_file(EDIT_FILE); char* cur_data = data; BOOK book; while ((cur_data = next_book(cur_data)) != NULL) { init_book(&book); parse_book(cur_data, &book); print_book(book, true); write_book(book, output); free_book(book); } free(data); fclose(output); unlink(EDIT_FILE); } void help(bool err) { printf("booki! it's a thing\n"); if (err) { printf("you did something wrong\n"); } } /*** the main event ***/ int main(int argc, char* argv[]) { char* booki_file = getenv("BOOKI_FILE"); if (!booki_file) { printf("expecting BOOKI_FILE variable\n"); return 1; } if (argc == 1) { help(false); return 0; } else if (strcmp(argv[1], "open") == 0) { open_with_editor(booki_file); } else if (strcmp(argv[1], "search") == 0) { search(argc - 1, argv + 1, booki_file); } else if (strcmp(argv[1], "add") == 0) { add(booki_file); } else { printf("unknown subcommand: '%s'\n", argv[1]); return 1; } return 0; }