#include #include #include #include #include #include #include #include /* Simple binary blobs and helper functions */ typedef struct blob { unsigned char *data; int length; int alloclength; } blob; #define BLOBDEFAULTLENGTH 50 void blobinit(blob *b) { b->length = 0; b->alloclength = BLOBDEFAULTLENGTH; b->data = malloc(b->alloclength); } blob *blobnew(void) { blob *b = malloc(sizeof(blob)); blobinit(b); return b; } #define BLOBEXTRALENGTH 50 void blobexpand(blob *b, int newlength) { if(b->length + newlength <= b->alloclength) return; b->alloclength += newlength + BLOBEXTRALENGTH; b->data = realloc(b->data, b->alloclength); } void blobfree(blob *b) { free(b->data); free(b); } void blobaddbytes(blob *b, const unsigned char *bytes, int len) { blobexpand(b, len); memcpy(b->data + b->length, bytes, len); b->length += len; } /* Snappy decoder can pull from earlier-copied data, so can't do it all in one go. */ void blobslowlyaddbytes(blob *b, const unsigned char *bytes, int len) { int i; blobexpand(b, len); for(i = 0; i < len; i++) *(b->data + b->length + i) = *(bytes + i); b->length += len; } void blobaddblob(blob *out, blob *in) { blobaddbytes(out, in->data, in->length); } void blobaddstring(blob *out, char *in) { blobaddbytes(out, (unsigned char *)in, strlen(in)); } blob *blobfromsqlite3(sqlite3_stmt *qs, int col) { blob *b = blobnew(); blobaddbytes(b, sqlite3_column_blob(qs, col), sqlite3_column_bytes(qs, col)); return b; } void blobdump(blob *b) { int i; fprintf(stderr, "Length %d, data: ", b->length); for(i = 0; i < b->length; i++) { if(b->data[i] >= ' ' && b->data[i] <= '~') fprintf(stderr, "%c", b->data[i]); else fprintf(stderr, "_"); } fprintf(stderr, "\n"); } void blobdumpraw(blob *b) { int i; fprintf(stderr, "Length %d, data: ", b->length); for(i = 0; i < b->length; i++) { fprintf(stderr, "%c", b->data[i]); } fprintf(stderr, "\n"); } #define HEXDUMPSIZE 32 void blobdumphex(blob *b, int lines) { int i = 0, j; fprintf(stderr, "Length %d, data:\n", b->length); if(lines < 0) i = b->length - (-lines) * HEXDUMPSIZE; if(i < 0) i = 0; for(; i < b->length && (lines <= 0 || i < (lines * HEXDUMPSIZE)); i += HEXDUMPSIZE) { fprintf(stderr, "%06X: ", i); for(j = 0; j < HEXDUMPSIZE && i+j < b->length; j++) { fprintf(stderr, "%02X ", b->data[i+j]); } fprintf(stderr, " "); for(j = 0; j < HEXDUMPSIZE && i+j < b->length; j++) { if(b->data[i+j] >= ' ' && b->data[i+j] <= '~') fprintf(stderr, "%c", b->data[i+j]); else fprintf(stderr, "_"); } fprintf(stderr, "\n"); } } char *blobtostring(blob *b) { int i, j = 0; char *r; r = malloc((b->length * 4) + 1); /* leave room for escapes, as shittily as possible */ for(i = 0 ; i < b->length; i++) { if(b->data[i] < ' ' || b->data[i] > '~') { if(0 && b->data[i] == '\x0') { r[j] = '_'; j++; } else { sprintf(r+j, "\\x%02X", b->data[i]); j += 4; } } else { r[j] = b->data[i]; j++; } } r[j] = '\x0'; return r; } int blobfind(blob *body, char *match, int startpos) { int i, matchlen = strlen(match); for(i = startpos; i < body->length - matchlen; i++) { if(!strncmp((char *)body->data + i, match, matchlen)) { return i; } } return -1; } /* make use above? */ void blobsearchreplace(blob *body, char *match, blob *replacement) { int i, matchlen = strlen(match); blob *tmp; for(i = 0; i < body->length - matchlen; i++) { if(!strncmp((char *)body->data + i, match, matchlen)) { /* fprintf(stderr, "match!\n"); */ tmp = blobnew(); /* save the end of the blob */ blobaddbytes(tmp, body->data + i + matchlen, body->length - i - matchlen); /* truncate the blob */ body->length = i; /* append the replacement */ blobaddblob(body, replacement); /* and append the original end of the blob */ blobaddblob(body, tmp); /* skip over the replacement so we can't accidentally recurse */ i += replacement->length; blobfree(tmp); } } } void blobsearchreplacestr(blob *body, char *match, char *replacement) { blob *r = blobnew(); blobaddstring(r, replacement); blobsearchreplace(body, match, r); blobfree(r); } int blobwrite(blob *b, FILE *f) { return fwrite(b->data, b->length, 1, f); } /* I borrowed these from something I wrote long ago, may be even shittier than the rest of this code. */ /* error log, as given in docs because I'm lazy */ void errorLogCallback(void *pArg, int iErrCode, const char *zMsg) { fprintf(stderr, "(%d) %s\n", iErrCode, zMsg); } /* prepare only */ sqlite3_stmt *db_rqp(sqlite3 **db, char *q) { sqlite3_stmt *qs; int r; /* fprintf(stderr, "%s\n", q); */ while((r = sqlite3_prepare_v2(*db, q, -1, &qs, NULL)) == SQLITE_BUSY) { fprintf(stderr, "DB locked? Sleeping and trying again.\n"); usleep(50000); } if(r != SQLITE_OK) { fprintf(stderr, "Error preparing '%s': %s\n", q, sqlite3_errmsg(*db)); exit(4); } return qs; } /* step only */ int db_rqs(sqlite3_stmt *qs) { int r; while((r = sqlite3_step(qs)) == SQLITE_BUSY) { fprintf(stderr, "DB Busy? Sleeping and trying again.\n"); usleep(50000); } if(r != SQLITE_DONE && r != SQLITE_ROW) { fprintf(stderr, "Error from sqlite3_step: %d\n", r); exit(4); } return r; } /* finalize only */ void db_rqf(sqlite3_stmt *qs) { if(sqlite3_finalize(qs) != SQLITE_OK) { fprintf(stderr, "Error finalizing statement\n"); exit(4); } } /* quick no-results query */ void db_qq(sqlite3 **db, char *q) { sqlite3_stmt *qs; qs = db_rqp(db, q); db_rqs(qs); db_rqf(qs); } /* quick integer return query */ int db_iq(sqlite3 **db, char *q) { sqlite3_stmt *qs; int r; qs = db_rqp(db, q); if(db_rqs(qs) == SQLITE_ROW) { r = sqlite3_column_int(qs, 0); } else { fprintf(stderr, "Erorr or no rows returned from query!\n"); exit(1); } if(db_rqs(qs) == SQLITE_ROW) { fprintf(stderr, "Multiple rows returned from what should be a single-row query!\n"); } db_rqf(qs); return r; } /* Gets parameter id for a given variable name. Note that the leading : is part of the variable name, which it would be nice if the sqlite3 docs mentioned... */ int db_bindindex(sqlite3_stmt *qs, char *name) { int idx; /* fprintf(stderr, "DEBUG: Statement has %d variable(s).\n", sqlite3_bind_parameter_count(qs)); */ idx = sqlite3_bind_parameter_index(qs, name); if(idx <= 0) { fprintf(stderr, "Error: db_bindindex: variable '%s' not found.\n", name); exit(4); } /* fprintf(stderr, "DEBUG: Parameter index: %d.\n", idx); */ return idx; } /* Bind a string to a variable by name. */ void db_bindstr(sqlite3_stmt *qs, char *name, char *value) { int r, idx; /* fprintf(stderr, "DEBUG: Binding variable '%s' with value '%s'.\n", name, value); */ idx = db_bindindex(qs, name); r = sqlite3_bind_text(qs, idx, value, -1, SQLITE_TRANSIENT); if(r != SQLITE_OK) { fprintf(stderr, "Error: db_bindstr: error binding value: %d\n", r); exit(4); } } /* Bind an integer to a variable by name. */ void db_bindint(sqlite3_stmt *qs, char *name, int value) { int r, idx; /* fprintf(stderr, "DEBUG: Binding variable '%s' with value '%d'.\n", name, value); */ idx = db_bindindex(qs, name); r = sqlite3_bind_int(qs, idx, value); if(r != SQLITE_OK) { fprintf(stderr, "Error: db_bindint: error binding value: %d\n", r); exit(4); } } /* Bind a blob to a variable by name. */ void db_bindblob(sqlite3_stmt *qs, char *name, blob *value) { int r, idx; /* fprintf(stderr, "DEBUG: Binding variable '%s' with blob of length '%d'.\n", name, value->length); */ idx = db_bindindex(qs, name); r = sqlite3_bind_blob(qs, idx, value->data, value->length, SQLITE_TRANSIENT); if(r != SQLITE_OK) { fprintf(stderr, "Error: db_bindint: error binding value: %d\n", r); exit(4); } } /* Dealing with firefox's idb */ /* Only handles basic ASCII. I have no idea why mozilla made this so complicated... */ /* based on https://dxr.mozilla.org/mozilla-central/rev/3bc0d683a41cb63c83cb115d1b6a85d50013d59e/dom/indexedDB/Key.cpp */ char *idbkeytostring(blob *b) { char *key; int i; if(b->length <= 1) { fprintf(stderr, "Error: idbkeytostring: no data in blob or null key.\n"); return NULL; } if(b->data[0] == 0x30) { key = malloc(b->length); for(i = 0 ; i < b->length - 1; i++) { if(b->data[i+1] > 0x7F) { fprintf(stderr, "Error: idbkeytostring: key contains a character outside of 0x00-0x7E. Decoding will be wrong.\n"); } key[i] = b->data[i+1] - 1; } key[i] = '\x0'; } else { fprintf(stderr, "Error: idbkeytostring: key is not a string.\n"); return NULL; } return key; } blob *idbkeyfromstring(char *in) { int i, len = strlen(in); blob *b = blobnew(); blobexpand(b, len + 1); /* indicates key is a string. */ b->data[0] = '\x30'; for(i = 0; i < len; i++) { if(in[i] > 0x7E) { fprintf(stderr, "Error: idbkeyfromstring: key contains a character outside of 0x00-0x7E. Encoding will be wrong.\n"); } b->data[i+1] = in[i] + 1; } b->length = len + 1; return b; } /* gets the object id number given an object name */ int idbgetidbyname(sqlite3 **db, char *name) { int id = -1; sqlite3_stmt *qs = db_rqp(db, "SELECT id FROM object_store WHERE name = :name;"); db_bindstr(qs, ":name", name); if(db_rqs(qs) == SQLITE_ROW) { id = sqlite3_column_int(qs, 0); } else { fprintf(stderr, "Erorr: idbgetidbyname: name not found.\n"); } if(db_rqs(qs) == SQLITE_ROW) { fprintf(stderr, "Erorr: idbgetidbyname: multiple ids for name found.\n"); } db_rqf(qs); return id; } /* returns a list of keys, in blob form, for a given id. result must be freed even if no rows. */ blob **idbgetblobkeysbyid(sqlite3 **db, int id) { int numkeys = 0; blob **keys = malloc(sizeof(blob **)); sqlite3_stmt *qs; keys[0] = NULL; if(id < 0) return keys; qs = db_rqp(db, "SELECT key FROM object_data WHERE object_store_id = :id;"); db_bindint(qs, ":id", id); while(db_rqs(qs) == SQLITE_ROW) { keys = realloc(keys, sizeof(blob *)*(numkeys+2)); keys[numkeys] = blobfromsqlite3(qs, 0); numkeys++; } keys[numkeys] = NULL; /* fprintf(stderr, "DEBUG: Found %d key(s) for id %d.\n", numkeys, id); */ db_rqf(qs); return keys; } blob **idbgetblobkeysbyname(sqlite3 **db, char *name) { return idbgetblobkeysbyid(db, idbgetidbyname(db, name)); } /* get the raw data for a given id and blob key */ blob *idbrawdatabykeyblob(sqlite3 **db, int id, blob *key) { blob *bdata = NULL; sqlite3_stmt *qs; qs = db_rqp(db, "SELECT data FROM object_data WHERE object_store_id = :id AND key = :key;"); db_bindint(qs, ":id", id); db_bindblob(qs, ":key", key); if(db_rqs(qs) == SQLITE_ROW) bdata = blobfromsqlite3(qs, 0); else fprintf(stderr, "Erorr: idbrawdatabykeyblob: key not found.\n"); if(db_rqs(qs) == SQLITE_ROW) fprintf(stderr, "Erorr: idbrawdatabykeyblob: multiple rows returned for what should be a unique key.\n"); db_rqf(qs); return bdata; } blob *idbrawdatabykeyblobname(sqlite3 **db, char *name, blob *key) { return idbrawdatabykeyblob(db, idbgetidbyname(db, name), key); } /* Decompress snappy-compressed data. This probably has bugs. Probably should have found a library to do it for me. Oh well! */ /* Based on details at https://github.com/google/snappy/blob/master/format_description.txt */ blob *desnappy(blob *in) { blob *out = blobnew(); int i = 0, j; unsigned length, offset; unsigned char b, d; int outlength = 0, outshift = 0; int debug = 0; /* in->data[0] = 0xFE; in->data[1] = 0xFF; in->data[2] = 0x7F; */ /* #define TESTSTRING "\xca\x02\xf0\x42\x57\x69\x6b\x69\x70\x65\x64\x69\x61\x20\x69\x73\x20\x61\x20\x66\x72\x65\x65\x2c\x20\x77\x65\x62\x2d\x62\x61\x73\x65\x64\x2c\x20\x63\x6f\x6c\x6c\x61\x62\x6f\x72\x61\x74\x69\x76\x65\x2c\x20\x6d\x75\x6c\x74\x69\x6c\x69\x6e\x67\x75\x61\x6c\x20\x65\x6e\x63\x79\x63\x6c\x6f\x09\x3f\xf0\x81\x70\x72\x6f\x6a\x65\x63\x74\x2e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" memcpy(in->data, TESTSTRING, strlen(TESTSTRING)); */ /* while(i < in->length && (in->data[i] & 0x80)) { outlength += (in->data[i] & 0x7f) << outshift; outshift += 7; i++; } if(i < in->length) { outlength += (in->data[i] & 0x7f) << outshift; i++; } */ while(i < in->length) { outlength += (in->data[i] & 0x7f) << outshift; outshift += 7; i++; if(!(in->data[i-1] & 0x80)) break; } if(debug) fprintf(stderr, "DEBUG: Length was %d bytes long, decoded to %d.\n", i, outlength); while(i < in->length) { if(0 && debug) fprintf(stderr, "%d, %d: %02x %02x %02x %02x: ", i, out->length, in->data[i+0], in->data[i+1], in->data[i+2], in->data[i+3]); if(debug) { fprintf(stderr, "%d, %d: ", i, out->length); for(j = 0; j < 16; j++) fprintf(stderr, "%02X ", in->data[i+j]); } b = in->data[i]; d = b >> 2; switch(b & 0x03) { case 0x00: if(d < 60) { length = d + 1; i += 1; } if(d == 60) { length = in->data[i+1] + 1; i += 2; } if(d == 61) { length = in->data[i+1] + (in->data[i+2] << 8) + 1; i += 3; } if(d == 62) { length = in->data[i+1] + (in->data[i+2] << 8) + (in->data[i+3] << 16) + 1; i += 4; } if(d == 63) { length = in->data[i+1] + (in->data[i+2] << 8) + (in->data[i+3] << 16) + (in->data[i+4] << 24) + 1; i += 5; } if(debug) fprintf(stderr, "DEBUG: 0x00 %d: length %d, new pos %d.\n", d, length, i); if(length > in->length - i) { fprintf(stderr, "Error: desnappy: Excessive literal length.\n"); return out; } blobaddbytes(out, in->data + i, length); i += length; /* blobdump(out); */ if(debug) blobdumphex(out, -2); continue; case 0x01: length = (d & 0x07) + 4; offset = ((unsigned)(d >> 3) << 8) + in->data[i+1]; i += 2; if(debug) fprintf(stderr, "DEBUG: 0x01 %d: length %d offset %d.\n", d, length, offset); break; case 0x02: length = d + 1; offset = in->data[i+1] + (in->data[i+2] << 8); i += 3; if(debug) fprintf(stderr, "DEBUG: 0x02 %d: length %d offset %d.\n", d, length, offset); break; case 0x03: length = d + 1; offset = in->data[i+1] + (in->data[i+2] << 8) + (in->data[i+3] << 16) + (in->data[i+4] << 24); i += 5; if(debug) fprintf(stderr, "DEBUG: 0x03 %d: length %d offset %d.\n", d, length, offset); break; } if(debug) fprintf(stderr, "DEBUG: Copying %d bytes starting from %d offset.\n", length, offset); if(length > in->length) { fprintf(stderr, "Error: desnappy: Unreasonable (but possibly legal) copy length.\n"); return out; } if(offset > out->length) { fprintf(stderr, "Error: desnappy: Illegal copy offset.\n"); return out; } if(length > offset) { if(debug) fprintf(stderr, "DEBUG: Using slow copy due to range including not-yet-copied data.\n"); /* make sure we allocate the storage first, otherwise the copy might realloc it, and the input pointer will no longer point to valid data! */ blobexpand(out, length); blobslowlyaddbytes(out, out->data + out->length - offset, length); } else { blobexpand(out, length); blobaddbytes(out, out->data + out->length - offset, length); } /* blobdump(out); */ if(debug) blobdumphex(out, -2); } if(outlength != out->length) { fprintf(stderr, "Error: desnappy: Header said length was %d bytes, but expanded to %d bytes. Partial or corrupted data, or decompression error.\n", outlength, out->length); } return out; } /* From https://gitlab.com/ntninja/moz-idb-edit/-/blob/master/mozserial.py , converted to C, which it probably was originally. Oh well. */ /* Todo: gut this to only contain relevant things. */ typedef enum jstype { JSTYPENOTSET = 0, /* Special values */ JSFLOAT_MAX = 0xFFF00000, JSHEADER = 0xFFF10000, /* Basic JavaScript types */ JSNULL = 0xFFFF0000, JSUNDEFINED = 0xFFFF0001, JSBOOLEAN = 0xFFFF0002, JSINT32 = 0xFFFF0003, JSSTRING = 0xFFFF0004, /* Extended JavaScript types */ JSDATE_OBJECT = 0xFFFF0005, JSREGEXP_OBJECT = 0xFFFF0006, JSARRAY_OBJECT = 0xFFFF0007, JSOBJECT_OBJECT = 0xFFFF0008, JSARRAY_BUFFER_OBJECT = 0xFFFF0009, JSBOOLEAN_OBJECT = 0xFFFF000A, JSSTRING_OBJECT = 0xFFFF000B, JSNUMBER_OBJECT = 0xFFFF000C, JSBACK_REFERENCE_OBJECT = 0xFFFF000D, /* DO_NOT_USE_1 */ /* DO_NOT_USE_2 */ JSTYPED_ARRAY_OBJECT = 0xFFFF0010, JSMAP_OBJECT = 0xFFFF0011, JSSET_OBJECT = 0xFFFF0012, JSEND_OF_KEYS = 0xFFFF0013, /* DO_NOT_USE_3 */ JSDATA_VIEW_OBJECT = 0xFFFF0015, JSSAVED_FRAME_OBJECT = 0xFFFF0016, /* ? */ /* Principals ? */ JSJSPRINCIPALS = 0xFFFF0017, JSNULL_JSPRINCIPALS = 0xFFFF0018, JSRECONSTRUCTED_SAVED_FRAME_PRINCIPALS_IS_SYSTEM = 0xFFFF0019, JSRECONSTRUCTED_SAVED_FRAME_PRINCIPALS_IS_NOT_SYSTEM = 0xFFFF001A, /* ? */ JSSHARED_ARRAY_BUFFER_OBJECT = 0xFFFF001B, JSSHARED_WASM_MEMORY_OBJECT = 0xFFFF001C, /* Arbitrarily sized integers */ JSBIGINT = 0xFFFF001D, JSBIGINT_OBJECT = 0xFFFF001E, /* Older typed arrays */ JSTYPED_ARRAY_V1_MIN = 0xFFFF0100, JSTYPED_ARRAY_V1_INT8 = JSTYPED_ARRAY_V1_MIN + 0, JSTYPED_ARRAY_V1_UINT8 = JSTYPED_ARRAY_V1_MIN + 1, JSTYPED_ARRAY_V1_INT16 = JSTYPED_ARRAY_V1_MIN + 2, JSTYPED_ARRAY_V1_UINT16 = JSTYPED_ARRAY_V1_MIN + 3, JSTYPED_ARRAY_V1_INT32 = JSTYPED_ARRAY_V1_MIN + 4, JSTYPED_ARRAY_V1_UINT32 = JSTYPED_ARRAY_V1_MIN + 5, JSTYPED_ARRAY_V1_FLOAT32 = JSTYPED_ARRAY_V1_MIN + 6, JSTYPED_ARRAY_V1_FLOAT64 = JSTYPED_ARRAY_V1_MIN + 7, JSTYPED_ARRAY_V1_UINT8_CLAMPED = JSTYPED_ARRAY_V1_MIN + 8, JSTYPED_ARRAY_V1_MAX = JSTYPED_ARRAY_V1_UINT8_CLAMPED, /* Transfer-only tags (not used for persistent data) */ JSTRANSFER_MAP_HEADER = 0xFFFF0200, JSTRANSFER_MAP_PENDING_ENTRY = 0xFFFF0201, JSTRANSFER_MAP_ARRAY_BUFFER = 0xFFFF0202, JSTRANSFER_MAP_STORED_ARRAY_BUFFER = 0xFFFF0203 } jstype; typedef struct jstree { jstype type; void *data; struct jstree *tkey; struct jstree *tdata; struct jstree *next; } jstree; jstree *jstreenew() { jstree *r; r = malloc(sizeof(jstree)); r -> type = 0; r -> data = NULL; r -> next = NULL; r -> tkey = NULL; r -> tdata = NULL; return r; } void jstreedumpsubsub(jstree *t) { char *str; if(t == NULL) { fprintf(stderr, "NULL tree pointer"); return; } switch(t->type) { case 0: fprintf(stderr, "****Zero type****"); break; case JSHEADER: fprintf(stderr, "Header"); break; case JSSTRING: str = blobtostring((blob *)t->data); fprintf(stderr, "'%s'", str); free(str); break; case JSINT32: fprintf(stderr, "%d", *((int *)t->data)); break; case JSOBJECT_OBJECT: fprintf(stderr, "Object"); break; case JSARRAY_OBJECT: fprintf(stderr, "Array"); break; case JSNULL: fprintf(stderr, "NULL"); break; default: fprintf(stderr, "Type %08X", t->type); break; } } void jstreedumpsub(jstree *t, int l) { int i; while(t != NULL) { for(i = 0; i < l * 4; i++) fprintf(stderr, " "); if(t->type == 0) { jstreedumpsubsub(t->tkey); fprintf(stderr, " "); jstreedumpsubsub(t->tdata); fprintf(stderr, "\n"); } else { /* fprintf(stderr, "Tree node unexpectedly contains data.\n"); */ jstreedumpsubsub(t); fprintf(stderr, "\n"); if(t->data != NULL && (t->type == JSOBJECT_OBJECT || t->type == JSARRAY_OBJECT)) { jstreedumpsub((jstree *)t->data, l + 1); } } if(t->tdata != NULL && (t->tdata->type == JSOBJECT_OBJECT || t->tdata->type == JSARRAY_OBJECT)) { jstreedumpsub((jstree *)t->tdata->data, l + 1); } t = t->next; } } void jstreedump(jstree *t) { jstreedumpsub(t, 0); } #define JSTREEDEBUG 0 /* This draws heavily on https://gitlab.com/ntninja/moz-idb-edit/-/blob/master/mozserial.py */ jstree *jsparsesub(blob *in, int *pos) { jstype type; int info; int i, doh, breakloop = 0, noendkeys = 0, iskey = 1; int length, is16; char *str; jstree *r = NULL, **rnext = &r, *leaf = NULL, *tkeytmp = NULL, *tdatatmp = NULL; /* so I'm using i for everything. I'm lazy. */ i = *pos; while(i < in->length && !breakloop) { /* It seems everything is padding to 8 byte boundries. If the last thing we read didn't leave us on one, skip to the next one. */ doh = 0; while(i % 8) { if(in->data[i]) doh = 1; i++; } if(doh) { fprintf(stderr, "ERROR: jsparsesub: Parse error, what we thought was padding contained data.\n"); } /* I probably should use ntohl and stuff for these, or at least a helper... */ info = in->data[i+0] + (in->data[i+1] << 8) + (in->data[i+2] << 16) + (in->data[i+3] << 24); type = in->data[i+4] + (in->data[i+5] << 8) + (in->data[i+6] << 16) + (in->data[i+7] << 24); i += 8; if(JSTREEDEBUG) fprintf(stderr, "type: %08X, info: %08X.\n", type, info); switch(type) { case JSHEADER: if(JSTREEDEBUG) fprintf(stderr, "Header found. Info: %d\n", info); /* I have no idea what the header info is, but the header seems to also serve as a key. */ leaf = jstreenew(); leaf->type = JSHEADER; noendkeys = 1; break; case JSOBJECT_OBJECT: case JSARRAY_OBJECT: if(JSTREEDEBUG) fprintf(stderr, "Found object or array, recursing.\n"); leaf = jstreenew(); leaf->type = type; leaf->data = (void *)jsparsesub(in, &i); /* on return, i should be at the end of the object/array */ break; case JSEND_OF_KEYS: if(JSTREEDEBUG) fprintf(stderr, "Found end of object or array, returning.\n"); breakloop = 1; break; case JSSTRING: is16 = !(info & 0x80000000); /* high bit is encoding width */ length = info & 0x7FFFFFFF; /* rest is string length */ if(is16) { /* fprintf(stderr, "Multibyte strings not fully implemented!\n"); */ length *= 2; } if(i + length > in->length) { fprintf(stderr, "ERROR: jsparsesub: String length extends past end of input data.\n"); return NULL; } else { leaf = jstreenew(); leaf->type = JSSTRING; /* leaf->data = malloc(length + 1); memcpy(leaf->data, in->data + i, length); ((char *)leaf->data)[length] = '\x0'; */ leaf->data = (void *)blobnew(); blobaddbytes((blob *)leaf->data, in->data + i, length); if(JSTREEDEBUG) {str = blobtostring((blob *)leaf->data); fprintf(stderr, "String found. Length %d, encoding %s: '%s'\n", length, is16 ? "utf-16le / 2-byte" : "latin-1 / 1-byte", str); free(str); } } i += length; /* we'll let the main padding remover absorb the padding */ break; case JSINT32: leaf = jstreenew(); leaf->type = JSINT32; leaf->data = malloc(sizeof(int)); *((int *)leaf->data) = info; if(JSTREEDEBUG) fprintf(stderr, "32-bit integer found: %d\n", *((int *)leaf->data)); break; case JSBOOLEAN: leaf = jstreenew(); leaf->type = JSINT32; /* Not going to bother with a seperate boolean type */ leaf->data = malloc(sizeof(int)); *((int *)leaf->data) = info; if(JSTREEDEBUG) fprintf(stderr, "Boolean found: %d\n", *((int *)leaf->data)); break; case JSNULL: case JSUNDEFINED: leaf = jstreenew(); leaf->type = JSNULL; if(JSTREEDEBUG) fprintf(stderr, "Null or undefined value found.\n"); break; default: fprintf(stderr, "ERROR: jsparsesub: Type %08X with info %08X not implemented, or parse error.\n", type, info); /* Stick an empty leaf in as a placeholder, to hopefully minimize desync and further parse errors. */ leaf = jstreenew(); } if(leaf != NULL) { if(iskey) { tkeytmp = leaf; iskey = 0; leaf = NULL; } else { tdatatmp = leaf; iskey = 1; if(*rnext == NULL) { *rnext = jstreenew(); } (*rnext)->tkey = tkeytmp; (*rnext)->tdata = tdatatmp; rnext = &((*rnext)->next); leaf = NULL; } } } if(!breakloop && !noendkeys) { fprintf(stderr, "ERROR: jsparsesub: Reached end of input without finding end of keys marker.\n"); } *pos = i; return r; } jstree *jsparse(blob *in) { int pos = 0; return jsparsesub(in, &pos); } /* Holy fuck, how many different formats does this program have to deal with?! */ jstree *jsparse2sub(blob *in, int *pos) { char c; int i = *pos, iskey = 1, noendkeys = 0, breakloop = 0, length; jstree *r = NULL, **rnext = &r, *leaf = NULL, *tkeytmp = NULL, *tdatatmp = NULL; /* invent a header to make the tree look like the other function's trees */ if(i == 0) { leaf = jstreenew(); leaf->type = JSHEADER; noendkeys = 1; tkeytmp = leaf; leaf = NULL; iskey = 0; } while(i < in->length && !breakloop) { c = in->data[i]; i++; switch(c) { case ':': case ',': case ' ': /* Probably could do some error checking with these, but.... */ break; case '{': leaf = jstreenew(); leaf->type = JSOBJECT_OBJECT; /* fprintf(stderr, "{\n"); */ leaf->data = (void *)jsparse2sub(in, &i); /* on return, i should be at the end of the object/array */ break; case '}': /* fprintf(stderr, "}\n"); */ breakloop = 1; break; case '"': /* this needs to handle how quotes are escaped, but I don't actually see a single quote mark in my sample data to figure it out, so is BROKEN. */ length = 0; while(i + length < in->length && in->data[i + length] != '"') length++; leaf = jstreenew(); leaf->type = JSSTRING; leaf->data = (void *)blobnew(); blobaddbytes((blob *)leaf->data, in->data + i, length); /* fprintf(stderr, "Found string of length %d.\n", length); */ i += length + 1; break; case '0': /* ... 9. Maybe? There's no bare numbers in my sample data, so I don't know if they exist or not. */ break; case 'n': case 'N': /* I _could_ check if it actually has "ull", but.... */ leaf = jstreenew(); leaf->type = JSNULL; /* fprintf(stderr, "Found null\n"); */ i += 3; break; default: fprintf(stderr, "jsparse2sub: Unexpected character '%c'.\n", c); break; } if(leaf != NULL) { if(iskey) { tkeytmp = leaf; iskey = 0; leaf = NULL; } else { tdatatmp = leaf; iskey = 1; if(*rnext == NULL) { *rnext = jstreenew(); } (*rnext)->tkey = tkeytmp; (*rnext)->tdata = tdatatmp; rnext = &((*rnext)->next); leaf = NULL; } } } if(!breakloop && !noendkeys) { fprintf(stderr, "ERROR: jsparse2sub: Reached end of input without finding closing bracket.\n"); } *pos = i; return r; } jstree *jsparse2(blob *in) { int pos = 0; return jsparse2sub(in, &pos); } jstree *jsfindnodesub(jstree *t, char *path) { char *p, *k; int found; if(!path) return t; /* fprintf(stderr, "Path: '%s'.\n", path); */ p = strtok(path, "."); if(!p || !strlen(p)) { return t; } /* fprintf(stderr, "Looking for '%s'.\n", p); */ while(t != NULL) { if(t->type) return t; if(!t->tkey) return NULL; found = 0; if(t->tkey->type == JSINT32) { if(*((int *)t->tkey->data) == atoi(p)) { found = 1; } } if(t->tkey->type == JSSTRING) { k = blobtostring((blob *)t->tkey->data); if(!strcmp(k, p)) { found = 1; } free(k); } if(found) { /* jstreedump(t->tdata); */ if(t->tdata->type == JSOBJECT_OBJECT || t->tdata->type == JSARRAY_OBJECT) { return jsfindnodesub((jstree *)t->tdata->data, path + strlen(p) + 1); } else { return t->tdata; } } t = t->next; } return NULL; } jstree *jsfindnode(jstree *t, char *path) { jstree *r; char *p; /* if(!t || !t->tkey || t->tkey->type != JSHEADER || !t->tdata || t->tdata->type != JSOBJECT_OBJECT) { fprintf(stderr, "ERROR: jsfindnode: no tree, no header, or no object.\n"); return NULL; } */ if(!t || !t->tkey || !t->tdata) { fprintf(stderr, "ERROR: jsfindnode: no tree.\n"); return NULL; } if(t->tkey && t->tkey->type == JSHEADER) { t = (jstree *)t->tdata->data; } p = malloc(strlen(path) + 2); strcpy(p, path); p[strlen(path) + 1] = '\x0'; r = jsfindnodesub(t, p); free(p); return r; } char *jsfindstring(jstree *t, char *path) { jstree *node; node = jsfindnode(t, path); if(!node) return NULL; if(node->type == JSNULL) return NULL; if(node->type != JSSTRING || !node->data) { fprintf(stderr, "ERROR: jsfindstring: not a string.\n"); } return blobtostring((blob *)node->data); } /* the above should call this, but I'm lazy. */ blob *jsfindblob(jstree *t, char *path) { jstree *node; node = jsfindnode(t, path); if(!node) return NULL; if(node->type == JSNULL) return NULL; if(node->type != JSSTRING || !node->data) { fprintf(stderr, "ERROR: jsfindstring: not a string.\n"); } return (blob *)node->data; } int jsfindint(jstree *t, char *path) { jstree *node; node = jsfindnode(t, path); /* these probably should do something other than return 0. */ /* if(!node) return 0; if(node->type == JSNULL) return 0; */ if(!node || node->type != JSINT32 || !node->data) { fprintf(stderr, "ERROR: jsfindstring: not an int.\n"); return 0; } return *((int *)node->data); } void jstreefree(jstree *t) { if(!t) return; if(t->tkey) jstreefree(t->tkey); if(t->tdata) jstreefree(t->tdata); if(t->next) jstreefree(t->next); if(t->type == JSOBJECT_OBJECT || t->type == JSARRAY_OBJECT) jstreefree((jstree *)t->data); if(t->type == JSSTRING) blobfree((blob *)t->data); if(t->type == JSINT32) free(t->data); free(t); } void azaddtodict(char ***dict, int *n, char *str, int len) { int l; char *s, **dp; for(l = 2; l <= len; l++) { s = malloc(l + 1); memcpy(s, str, l); s[l] = '\x0'; /* fprintf(stderr, "%d: %s\n", *n, s); */ /* O(n^2)! */ dp = *dict; while(dp && *dp) { if(strlen(s) == strlen(*dp) && !strcmp(s, *dp)) break; dp++; } if(dp && *dp) { /* fprintf(stderr, "Duplicate, skipping.\n"); */ continue; } *dict = realloc(*dict, sizeof(char *) * (*n+2)); (*dict)[*n] = s; (*n)++; (*dict)[*n] = NULL; } } char **azbuilddict(jstree *cpr) { char *s, **dict = NULL; int n = 0, num; while(cpr) { azaddtodict(&dict, &n, (char *)((blob *)cpr->tdata->data)->data, ((blob *)cpr->tdata->data)->length); cpr = cpr->next; } for(num = 100; num < 1000; num++) { s = malloc(4); sprintf(s, "%d", num); azaddtodict(&dict, &n, s, strlen(s)); free(s); } /* fprintf(stderr, "Added %d dictionary entries.\n", n); */ return dict; } void azfreedict(char **dict) { char **d = dict; while(d && *d) { free(*d); d++; } if(dict) free(dict); } blob *asciiblobtoutf16(blob *in) { blob *out = blobnew(); int i; for(i = 0; i < in->length; i++) { blobaddbytes(out, in->data + i, 1); blobaddbytes(out, (unsigned char *)"\x0", 1); } return out; } blob *asciistringtoutf16(char *in) { blob *tmp = blobnew(), *out; blobaddbytes(tmp, (unsigned char *)in, strlen(in)); out = asciiblobtoutf16(tmp); blobfree(tmp); return out; } /* based on https://github.com/d10r/kindle-fetch/blob/master/main.js */ #define AZMAGIC 9983 #define AZDICT 102 blob *azstring(blob *in, char **dict) { int i, c, c2; int dlen = 0, dent, len; blob *out = blobnew(), *utf, *out8 = blobnew(); char uni[10]; while(dict && dict[dlen]) dlen++; for(i = 0; i + 1 < in->length; i += 2) { c = in->data[i] + (in->data[i+1] << 8); /* if the utf-16 character is less than this magic number, add the input to the output as-is */ if(c < AZMAGIC) { blobaddbytes(out, in->data + i, 2); } /* if it's less than 100ish over that magic number, it specifies a literal with the length being the amount over the magic number */ else if(c < AZMAGIC + AZDICT) { len = c - AZMAGIC; fprintf(stderr, "NOT IMPLEMENTED: Copy with length %d.\n", len); /* There's no examples of this in my test source, so I'm not writing it yet, with no way to test it. */ } /* If it's neither of those, it's a dictionary lookup */ else { dent = c - AZMAGIC - AZDICT; if(dent < 0 || dent >= dlen) { fprintf(stderr, "ERROR: azstring: invalid dictionary reference.\n"); } else { /* the dictionary is given in latin-1, but the input is in utf-16. grrrrrrrrrrrr. */ utf = asciistringtoutf16(dict[dent]); blobaddblob(out, utf); blobfree(utf); } } /*if(c >= ' ' && c <= '~') fprintf(stderr, "%c", c); else fprintf(stderr, "(%d, %d, %s)", c, c - 9983, ((c-9983-102)=0)?dict[c-9983-102]:"");*/ /*if(c >= ' ' && c <= '~') fprintf(stderr, "%c", c); else fprintf(stderr, "%s", ((c-9983-102)=0)?dict[c-9983-102]:"****");*/ } /* Convert utf-16 to utf-8. Mostly untested. Probably doesn't work for all characters. */ for(i = 0; i+1 < out->length; i += 2) { /* Why oh why can't utf-16 just die? */ c = out->data[i] + (out->data[i+1] << 8); if((c & 0xFC00) == 0xD800) { /* leading unit of pair */ if(i+3 < out->length) { c2 = out->data[i+2] + (out->data[i+3] << 8); if((c2 & 0xFC00) == 0xDC00) { c = ((c & 0x0300) << 10) | (c2 & 0x0300); i += 2; } else { fprintf(stderr, "ERROR: Second half of utf-16 pair not found.\n"); } } else { fprintf(stderr, "ERROR: utf-16 pair goes past end of string.\n"); } } /* output it as utf-8 */ if(c <= 0x7F) { uni[0] = c; blobaddbytes(out8, (unsigned char *)uni, 1); } else if(c <= 0x7FF) { uni[0] = 0xC0 | ((c >> 6) & 0x1F); uni[1] = 0x80 | ((c >> 0) & 0x3F); blobaddbytes(out8, (unsigned char *)uni, 2); } else if(c <= 0xFFFF) { uni[0] = 0xE0 | ((c >> 12) & 0x0F); uni[1] = 0x80 | ((c >> 6) & 0x3F); uni[2] = 0x80 | ((c >> 0) & 0x3F); blobaddbytes(out8, (unsigned char *)uni, 3); } else if(c <= 0x10FFFF) { uni[0] = 0xF0 | ((c >> 18) & 0x07); uni[1] = 0x80 | ((c >> 12) & 0x3F); uni[2] = 0x80 | ((c >> 6) & 0x3F); uni[3] = 0x80 | ((c >> 0) & 0x3F); blobaddbytes(out8, (unsigned char *)uni, 4); } else { fprintf(stderr, "ERROR: Excessive unicode character.\n"); } /* if(c > 0x7F) fprintf(stderr, "(%06X)", c); */ } /* blobdumpraw(out8); */ return out8; } jstree *jstreefromkeyblob(sqlite3 **db, int id, blob *bkey) { blob *bdata = idbrawdatabykeyblob(db, id, bkey); blob *sdata = desnappy(bdata); jstree *t = jsparse(sdata); blobfree(bdata); blobfree(sdata); return t; } jstree *jstreefromkeyblobname(sqlite3 **db, char *name, blob *bkey) { return jstreefromkeyblob(db, idbgetidbyname(db, name), bkey); } char *htmlheader = "\n" "\n" "\n" "\n" "___TITLE___\n" "\n" "\n"; char *htmlfooter = "\n" "\n"; #define FIELDSEP '\x1F' #define FOOLEN 4096 int main(void) { char *dbfile = "/home/randyg/.mozilla/firefox/7i52vnrz.default/storage/default/https+++read.amazon.com/idb/1544113634Ks4kWobo.sqlite"; sqlite3 *db; char **dict, *title, *asin, fragkey[FOOLEN], *imagename, imagematch[FOOLEN], outfilename[FOOLEN], link[FOOLEN], newlink[FOOLEN]; jstree *bookinfo, *cpr, *fragmentarray, *fragmententry, *fragmententryobject, *fragment, *other, *imagedata, *imageentry; int kn, fid, pos, startpos, endpos, linkpos, linkendpos, linklen; blob *bfragkey, *piece, *bkey, **keys, *image, *imagereplacement, *html, *tmp; FILE *outfile; sqlite3_config(SQLITE_CONFIG_LOG, errorLogCallback, NULL); if(sqlite3_open(dbfile, &db) != SQLITE_OK) { fprintf(stderr, "Error opening database: %s\n", sqlite3_errmsg(db)); exit(4); } /* default busy handler that keeps trying for 1000ms. */ sqlite3_busy_timeout(db, 1000); /* get the bookinfo objects. One per book, and the key is the asin. */ keys = idbgetblobkeysbyname(&db, "bookinfo"); kn = 0; while((bkey = keys[kn])) { asin = idbkeytostring(bkey); bookinfo = jstreefromkeyblobname(&db, "bookinfo", bkey); if(0) jstreedump(bookinfo); title = jsfindstring(bookinfo, "metadata.title"); if(title) fprintf(stderr, "Converting asin '%s', '%s'.\n", asin, title); snprintf(outfilename, FOOLEN, "/tmp/%s.html", title); /* open the output file */ outfile = fopen(outfilename, "w"); /* add header to it */ html = blobnew(); blobaddstring(html, htmlheader); blobsearchreplacestr(html, "___TITLE___", title); blobwrite(html, outfile); blobfree(html); /* build the compression dictionary */ cpr = jsfindnode(bookinfo, "metadata.cpr"); if(0) jstreedump(cpr); dict = azbuilddict(cpr); /* get the fragment map and parse it for the fragment IDs */ fragmentarray = jsfindnode(bookinfo, "fragmap.fragmentArray"); if(0) jstreedump(fragmentarray); fragmententry = fragmentarray; while(fragmententry) { if(0) jstreedump(fragmententry); fragmententryobject = (jstree *)fragmententry->tdata->data; if(0) jstreedump(fragmententryobject); fid = jsfindint(fragmententryobject, "fId"); if(0) fprintf(stderr, "fId %d\n", fid); snprintf(fragkey, FOOLEN, "%s%c%08d", asin, FIELDSEP, fid); if(0) fprintf(stderr, "Fragment key: '%s'\n", fragkey); bfragkey = idbkeyfromstring(fragkey); if(0) blobdump(bfragkey); /* get the fragment */ fragment = jstreefromkeyblobname(&db, "fragments", bfragkey); if(0) jstreedump(fragment); /* get and decompress the piece */ piece = azstring(jsfindblob(fragment, "piece"), dict); if(0) blobdumpraw(piece); /* this is more complicated than I thought, skip it for now. */ if(0 && jsfindint(fragment, "size") != piece->length) { fprintf(stderr, "Decoded fragment size (%d) does not match fragment size in bookinfo (%d).\n", piece->length, jsfindint(fragment, "size")); } /* there's a nested text-based object as a string inside the fragment object, parse it into its own tree */ other = jsparse2(jsfindblob(fragment, "other")); if(0) jstreedump(other); /* and find the image data */ imagedata = jsfindnode(other, "imageData"); if(0) jstreedump(imagedata); /* loop through all images in this fragment */ imageentry = imagedata; while(imageentry && imageentry->tkey && imageentry->tdata) { /* do some search and replace to change dataUrl="imagename" into src="imagedata" */ /* this is where you'd decode the images and write them to disk if not embedded in the format */ imagename = blobtostring((blob *)imageentry->tkey->data); image = (blob *)imageentry->tdata->data; if(0) fprintf(stderr, "Image '%s' length %d.\n", imagename, image->length); snprintf(imagematch, FOOLEN, "dataUrl=\"%s\"", imagename); imagereplacement = blobnew(); blobaddstring(imagereplacement, "src=\"___IMAGE___\""); blobsearchreplace(imagereplacement, "___IMAGE___", image); blobsearchreplace(piece, imagematch, imagereplacement); blobfree(imagereplacement); if(0) blobdumpraw(piece); free(imagename); imageentry = imageentry->next; } /* replace links with non-javascript versions */ /* */ /* */ pos = 0; while(1) { /* Would be easy if I just added blobregex()... */ linklen = 0; startpos = blobfind(piece, " FOOLEN - 1) linklen = FOOLEN - 1; strncpy(link, (char *)piece->data + linkpos + 1, linklen); link[linklen] = '\x0'; endpos = blobfind(piece, "\"", linkendpos); if(endpos < 0) break; tmp = blobnew(); blobaddbytes(tmp, (unsigned char *)piece->data + endpos + 1, piece->length - endpos - 1); piece->length = startpos; snprintf(newlink, FOOLEN, "next; } /* add footer to output */ html = blobnew(); blobaddstring(html, htmlfooter); blobwrite(html, outfile); blobfree(html); /* yay! */ fclose(outfile); jstreefree(bookinfo); /* azfreedict(dict); */ free(asin); free(title); kn++; } free(keys); } /* Some of this is based on info at: https://stackoverflow.com/questions/54920939/parsing-fb-puritys-firefox-idb-indexed-database-api-object-data-blob-from-lin https://gitlab.com/ntninja/moz-idb-edit/-/blob/master/mozserial.py https://github.com/google/snappy/blob/master/format_description.txt https://dxr.mozilla.org/mozilla-central/rev/3bc0d683a41cb63c83cb115d1b6a85d50013d59e/dom/indexedDB/Key.cpp#32 https://dxr.mozilla.org/mozilla-central/source/js/src/vm/StructuredClone.cpp#2055 https://github.com/d10r/kindle-fetch/blob/master/main.js */