/********************************************************************\ Name: mjson.cxx Created by: Konstantin Olchanski Contents: JSON encoder and decoder The JSON parser is written to the specifications at: http://www.json.org/ http://www.ietf.org/rfc/rfc4627.txt \********************************************************************/ #undef NDEBUG // midas required assert() to be always enabled #include "mjson.h" #include #include #include #include #include #include #include static const char* skip_spaces(const char* s) { while (1) { // per RFC 4627, "Insignificant whitespace" switch (*s) { default: return s; case ' ': s++; break; case '\t': s++; break; case '\n': s++; break; case '\r': s++; break; } } // NOT REACHED } static int hexToInt(char c) { if (c == 0) return -1; if (c >= '0' && c <= '9') return c-'0'; if (c >= 'a' && c <= 'f') return c-'a'+10; if (c >= 'A' && c <= 'F') return c-'A'+10; return -1; } static int xparse_unicode(const char* s, const char** sout) { int unicode = 0; for (int i=0; i<4; i++) { int v = hexToInt(*s); if (v < 0) { *sout = s; return -1; } unicode = unicode*16 + v; s++; } *sout = s; return unicode; } static std::string xoutput_unicode(int unicode, bool* error) { // see http://en.wikipedia.org/wiki/UTF-8 if (unicode >= 0 && unicode <= 0x7F) { // 7 bits char buf[2]; buf[0] = unicode & 0x7F; buf[1] = 0; return buf; } // FIXME: does this unicode gibberish work right? if (unicode >= 0x80 && unicode <= 0x7FF) { // 11 bits char buf[3]; buf[0] = 0x80|0x40|((unicode>>6)&0x1F); // 5 bits buf[1] = 0x80|((unicode>>0)&0x3F); // 6 bits buf[2] = 0; return buf; } if (unicode >= 0x800 && unicode <= 0xFFFF) { // 16 bits char buf[4]; buf[0] = 0x80|0x40|0x20|((unicode>>12)&0xF); // 4 bits buf[1] = 0x80|((unicode>>6)&0x3F); // 6 bits buf[2] = 0x80|((unicode>>0)&0x3F); // 6 bits buf[3] = 0; return buf; } *error = true; return ""; } static std::string xparse_string(const char* s, const char** sout, bool *error) { //printf("xstring-->%s\n", s); std::string v; while (1) { if (*s == 0) { // error *sout = s; *error = true; return ""; } else if (*s == '\"') { // end of string *sout = s+1; return v; } else if (*s == '\\') { // escape sequence s++; //printf("escape %d (%c)\n", *s, *s); switch (*s) { case 0: // maybe error - unexpected end of string *sout = s; *error = true; return v; default: // error - unknown escape *sout = s; *error = true; return v; case '\"': v += '\"'; s++; break; case '\\': v += '\\'; s++; break; case '/': v += '/'; s++; break; case 'b': v += '\b'; s++; break; case 'f': v += '\f'; s++; break; case 'n': v += '\n'; s++; break; case 'r': v += '\r'; s++; break; case 't': v += '\t'; s++; break; case 'u': { s++; int unicode = xparse_unicode(s, sout); //printf("unicode %d (0x%x), next %c\n", unicode, unicode, **sout); if (unicode < 0) { // error - bad unicode *sout = s; *error = true; return v; } v += xoutput_unicode(unicode, error); if (*error) { // error - bad unicode //*sout = s; // stay pointing at the bad unicode *error = true; return v; } s = *sout; break; } } } else { v += *s; s++; } } // NOT REACHED } static MJsonNode* parse_something(const char* sin, const char* s, const char** sout); static MJsonNode* parse_array(const char* sin, const char* s, const char** sout) { //printf("array-->%s\n", s); MJsonNode *n = MJsonNode::MakeArray(); s = skip_spaces(s); if (*s == ']') { // empty array *sout = s+1; return n; } while (1) { s = skip_spaces(s); if (*s == 0) { *sout = s; return MJsonNode::MakeError(n, "unexpected end of string while parsing array", sin, s); } MJsonNode *p = parse_something(sin, s, sout); if (p == NULL) { // sout set by parse_something() return MJsonNode::MakeError(n, "cannot parse array element", sin, *sout); } if (p->GetType() == MJSON_ERROR) { // sout set by parse_something() return MJsonNode::MakeError(n, "error parsing array element", sin, *sout); } n->AddToArray(p); s = skip_spaces(*sout); if (*s == ']') { // end of array *sout = s+1; return n; } if (*s == ',') { s++; continue; } *sout = s; return MJsonNode::MakeError(n, "unexpected char after array element, should be \',\' or \']\'", sin, s); } // NOT REACHED } static MJsonNode* parse_object(const char* sin, const char* s, const char** sout) { //printf("object-->%s\n", s); MJsonNode *n = MJsonNode::MakeObject(); s = skip_spaces(s); if (*s == '}') { // empty object *sout = s+1; return n; } while (1) { s = skip_spaces(s); //printf("xobject-->%s\n", s); if (*s == 0) { *sout = s; return MJsonNode::MakeError(n, "unexpected end of string while parsing object", sin, s); } else if (*s != '\"') { *sout = s; return MJsonNode::MakeError(n, "unexpected char while parsing object, should be \"\"\"", sin, s); } bool error = false; std::string name = xparse_string(s+1, sout, &error); if (error || name.length() < 1) { // sout set by parse_something() return MJsonNode::MakeError(n, "cannot parse name of object element", sin, *sout); } s = skip_spaces(*sout); if (*s == 0) { *sout = s; return MJsonNode::MakeError(n, "unexpected end of string after name of object element", sin, s); } else if (*s != ':') { *sout = s; return MJsonNode::MakeError(n, "unexpected char after name of object element, should be \":\"", sin, s); } MJsonNode *p = parse_something(sin, s+1, sout); if (p == NULL) { // sout set by parse_something() return MJsonNode::MakeError(n, "cannot parse object element", sin, *sout); } if (p->GetType() == MJSON_ERROR) { // sout set by parse_something() return MJsonNode::MakeError(n, "error parsing object element", sin, *sout); } n->AddToObject(name.c_str(), p); s = skip_spaces(*sout); //printf("xobject-->%s\n", s); if (*s == '}') { // end of object *sout = s+1; return n; } if (*s == ',') { s++; continue; } // error *sout = s; return MJsonNode::MakeError(n, "unexpected char after object element, should be \"}\" or \",\"", sin, s); } // NOT REACHED } static MJsonNode* parse_string(const char* sin, const char* s, const char** sout) { //printf("string-->%s\n", s); bool error = false; std::string v = xparse_string(s, sout, &error); if (error) return MJsonNode::MakeError(NULL, "cannot parse string", sin, *sout); return MJsonNode::MakeString(v.c_str()); } static std::string parse_digits(const char* s, const char** sout) { std::string v; v.reserve(32); // allocate space for a longish number while (*s) { if (*s < '0') break; if (*s > '9') break; v += *s; s++; } *sout = s; return v; } bool atoll_with_overflow(const char *str, long long& number) { char *end = nullptr; errno = 0; number = std::strtoll(str, &end, 10); if ((number == LLONG_MAX || number == LLONG_MIN) && errno == ERANGE) // iff stroll sets errno to ERANGE, an over- or underflow occurred return false; if (end == str) // if no characters were converted, the input was invalid return false; return true; } static void test_atoll_with_overflow_value(const char*s, long long v, bool flag) { long long vv; bool ff = atoll_with_overflow(s, vv); //printf("atoll test: [%s] -> %lld (0x%llx) should be %lld (0x%llx)\n", s, vv, vv, v, v); if (vv == v) return; if (ff == flag) return; printf("atoll test failed: [%s] -> %lld (0x%llx) != %lld (0x%llx)\n", s, vv, vv, v, v); assert(!"mjson self test: my atoll() is broken, bye!"); abort(); // DOES NOT RETURN } static void test_atoll_with_overflow() { // test positive values test_atoll_with_overflow_value("0", 0, true); test_atoll_with_overflow_value("1", 1, true); test_atoll_with_overflow_value("12", 12, true); test_atoll_with_overflow_value("1234", 1234, true); // check overflow of 64-bit integers test_atoll_with_overflow_value("9223372036854775806", 9223372036854775806, true); test_atoll_with_overflow_value("9223372036854775807", 9223372036854775807, true); test_atoll_with_overflow_value("9223372036854775808", 0, false); test_atoll_with_overflow_value("9223372036854775809", 0, false); test_atoll_with_overflow_value("999999999999999999999999999999999999999999999999999999", 0, false); // test negative test_atoll_with_overflow_value("-0", 0, true); test_atoll_with_overflow_value("-1", -1, true); test_atoll_with_overflow_value("-12", -12, true); test_atoll_with_overflow_value("-1234", -1234, true); test_atoll_with_overflow_value("-9223372036854775807", 0x8000000000000000+1, true); test_atoll_with_overflow_value("-9223372036854775808", 0x8000000000000000, true); test_atoll_with_overflow_value("-9223372036854775809", 0, false); test_atoll_with_overflow_value("-9223372036854775810", 0, false); test_atoll_with_overflow_value("-999999999999999999999999999999999999999999999999999999", 0, false); //printf("sizeof(int): %zu\n", sizeof(int)); //printf("sizeof(long long): %zu\n", sizeof(long long)); } class TestAtollWithOverflow { public: TestAtollWithOverflow() // ctor { test_atoll_with_overflow(); } }; static TestAtollWithOverflow runme; static MJsonNode* parse_number(const char* sin, const char* s, const char** sout) { //printf("number-->%s\n", s); // thread sanitizer complains about this. run the test // function on program startup (before main()) from // contructor of static object. K.O. // //static int once = 1; //if (once) { // once = 0; // test_atoll_with_overflow(); //} // per RFC 4627 // A number contains an integer component that // may be prefixed with an optional minus sign, which may be followed by // a fraction part and/or an exponent part. // // number = [ minus ] int [ frac ] [ exp ] // decimal-point = %x2E ; . // digit1-9 = %x31-39 ; 1-9 // e = %x65 / %x45 ; e E // exp = e [ minus / plus ] 1*DIGIT // frac = decimal-point 1*DIGIT // int = zero / ( digit1-9 *DIGIT ) // minus = %x2D ; - // plus = %x2B ; + // zero = %x30 ; 0 int sign = 1; std::string sint; std::string sfrac; int expsign = 1; std::string sexp; if (*s == '-') { sign = -1; s++; } if (*s == '0') { sint += *s; s++; } else { sint = parse_digits(s, sout); s = *sout; } if (*s == '.') { s++; sfrac = parse_digits(s, sout); s = *sout; } if (*s == 'e' || *s == 'E') { s++; if (*s == '-') { expsign = -1; s++; } if (*s == '+') { expsign = +1; s++; } sexp = parse_digits(s, sout); s = *sout; } //printf("number: sign %d, sint [%s], sfrac [%s], expsign %d, sexp [%s]\n", sign, sint.c_str(), sfrac.c_str(), expsign, sexp.c_str()); // check for floating point bool flag; long long e; if (expsign < 0 || sfrac.length() > 0) { // definitely floating point number double v1 = atof(sint.c_str()); double v2 = 0; double vm = 0.1; const char* p = sfrac.c_str(); for ( ; *p != 0; p++, vm/=10.0) { v2 += (*p-'0')*vm; } flag = atoll_with_overflow(sexp.c_str(), e); if (flag && (e < 0 || e > 400)) { // overflow or exponent will not fit into IEEE754 double precision number // convert to 0 or +/- infinity printf("overflow!\n"); if (expsign > 0) { *sout = s; double one = 1; double zero = 0; double inf = one/zero; // IEEE-754 1.0/0.0 is "+infinity", +infinity*(-1) => -infinity return MJsonNode::MakeNumber(sign*inf); } else { *sout = s; return MJsonNode::MakeNumber(sign*0.0); } } double ee = 1.0; if (e != 0) ee = pow(10, (double)(expsign*e)); double v = sign*(v1+v2)*ee; //printf("v1: %f, v2: %f, e: %d, ee: %g, v: %g\n", v1, v2, e, ee, v); *sout = s; return MJsonNode::MakeNumber(v); } else { // no sfrac, expsign is positive, so this is an integer, unless it overflows flag = atoll_with_overflow(sexp.c_str(), e); // may overflow if (flag && (e < 0 || e > 400)) { // overflow or exponent will not fit into IEEE754 double precision number // convert to +/- infinity //printf("overflow!\n"); *sout = s; double one = 1; double zero = 0; double inf = one/zero; // IEEE-754 1.0/0.0 is "+infinity", +infinity*(-1) => -infinity return MJsonNode::MakeNumber(sign*inf); } // this is stupid but quicker than calling pow(). Unless they feed us stupid exponents that are not really integers anyway for (int ee=0; ee= '0' && *s <= '9') { return parse_number(sin, s, sout); } else if (*s == 'n') { return parse_null(sin, s, sout); } else if (*s == 't') { return parse_true(sin, s, sout); } else if (*s == 'f') { return parse_false(sin, s, sout); } *sout = s; return MJsonNode::MakeError(NULL, "unexpected char at top level", sin, s); } MJsonNode* MJsonNode::Parse(const char* jsonstring) { const char*sout; return parse_something(jsonstring, jsonstring, &sout); } MJsonNode::~MJsonNode() // dtor { for (unsigned i=0; i 0) { assert(arraybuffer_ptr != NULL); free(arraybuffer_ptr); arraybuffer_size = 0; arraybuffer_ptr = NULL; } // poison deleted nodes type = MJSON_NONE; } static char toHexChar(int c) { assert(c>=0); assert(c<=15); if (c <= 9) return '0' + c; else return 'A' + c - 10; } std::string MJsonNode::Encode(const char* s) { std::string v; while (*s) { switch (*s) { case '\"': v += "\\\""; s++; break; case '\\': v += "\\\\"; s++; break; //case '/': v += "\\/"; s++; break; case '\b': v += "\\b"; s++; break; case '\f': v += "\\f"; s++; break; case '\n': v += "\\n"; s++; break; case '\r': v += "\\r"; s++; break; case '\t': v += "\\t"; s++; break; default: { if (iscntrl(*s)) { v += "\\u"; v += "0"; v += "0"; v += toHexChar(((*s)>>4) & 0xF); v += toHexChar(((*s)>>0) & 0xF); s++; break; } else { v += *s; s++; break; } } } } return v; } std::string MJsonNode::EncodeLL(long long value) { char buf[256]; snprintf(buf, sizeof(buf), "%lld", value); return buf; } std::string MJsonNode::EncodeDouble(double numbervalue) { if (isfinite(numbervalue)) { char buf[256]; snprintf(buf, sizeof(buf), "%.16e", numbervalue); return buf; } else if (isnan(numbervalue)) { return "\"NaN\""; } else if (isinf(numbervalue)) { if (numbervalue > 0) return "\"Infinity\""; else return "\"-Infinity\""; } else { assert(!"this cannot happen!"); } return ""; } std::string MJsonNode::Stringify(int flags) const { switch (type) { case MJSON_ARRAY: { std::string v; v += "["; for (size_t i=0; i 0) v += ","; v += subnodes[i]->Stringify(flags); } v += "]"; return v; } case MJSON_OBJECT: { std::string v; v += "{"; for (size_t i=0; i 0) v += ","; v += std::string("\"") + Encode(object_names[i].c_str()) + "\""; v += ":"; v += subnodes[i]->Stringify(flags); } v += "}"; return v; } case MJSON_STRING: { return std::string("\"") + Encode(string_value.c_str()) + "\""; } case MJSON_INT: { return EncodeLL(ll_value); } case MJSON_NUMBER: { return EncodeDouble(double_value); } case MJSON_BOOL: if (ll_value) return "true"; else return "false"; case MJSON_NULL: return "null"; case MJSON_JSON: return string_value; case MJSON_ARRAYBUFFER: return "arraybuffer"; case MJSON_ERROR: return std::string("json parse error: ") + string_value; default: assert(!"should not come here"); return ""; // NOT REACHED } } MJsonNode* MJsonNode::MakeError(MJsonNode* errornode, const char* errormessage, const char* sin, const char* serror) { MJsonNode* n = new MJsonNode(MJSON_ERROR); if (errornode) n->subnodes.push_back(errornode); n->string_value = errormessage; if (sin && serror) { char msg[256]; char sample[32]; strncpy(sample, serror, 31); sample[31] = 0; int offset = serror-sin; int lineno = 1; int lineoff = 0; for (const char* s = sin; s != serror; s++) { if (*s == 0) break; if (*s == '\n') { lineno++; lineoff=0; } else { lineoff++; } } snprintf(msg, sizeof(msg), " at char \"%c\" file offset %d, line %d position %d, around text \"%s\"", *serror, offset, lineno, lineoff, sample); n->string_value += msg; } return n; } MJsonNode* MJsonNode::MakeArray() { return new MJsonNode(MJSON_ARRAY); } MJsonNode* MJsonNode::MakeObject() { return new MJsonNode(MJSON_OBJECT); } MJsonNode* MJsonNode::MakeString(const char* value) { MJsonNode* n = new MJsonNode(MJSON_STRING); n->string_value = value; return n; } MJsonNode* MJsonNode::MakeInt(long long value) { MJsonNode* n = new MJsonNode(MJSON_INT); n->ll_value = value; return n; } MJsonNode* MJsonNode::MakeNumber(double value) { MJsonNode* n = new MJsonNode(MJSON_NUMBER); n->double_value = value; return n; } MJsonNode* MJsonNode::MakeBool(bool value) { MJsonNode* n = new MJsonNode(MJSON_BOOL); if (value) n->ll_value = 1; else n->ll_value = 0; return n; } MJsonNode* MJsonNode::MakeNull() { return new MJsonNode(MJSON_NULL); } MJsonNode* MJsonNode::MakeJSON(const char* json) { MJsonNode* n = new MJsonNode(MJSON_JSON); n->string_value = json; return n; } MJsonNode* MJsonNode::MakeArrayBuffer(char* ptr, size_t size) { MJsonNode* n = new MJsonNode(MJSON_ARRAYBUFFER); n->arraybuffer_ptr = ptr; n->arraybuffer_size = size; return n; } void MJsonNode::AddToArray(MJsonNode* node) { if (type == MJSON_ARRAY) { subnodes.push_back(node); return; } assert(!"not an array"); } void MJsonNode::AddToObject(const char* name, MJsonNode* node) /// add node to an object { if (type == MJSON_OBJECT) { object_names.push_back(name); subnodes.push_back(node); //objectvalue[name] = node; return; } assert(!"not an object"); } int MJsonNode::GetType() const /// get node type: MJSON_xxx { return type; } const MJsonNodeVector* MJsonNode::GetArray() const { if (type == MJSON_ARRAY || type == MJSON_NULL) return &subnodes; else return NULL; } const MJsonStringVector* MJsonNode::GetObjectNames() const { if (type == MJSON_OBJECT || type == MJSON_NULL) return &object_names; else return NULL; } const MJsonNodeVector* MJsonNode::GetObjectNodes() const { if (type == MJSON_OBJECT || type == MJSON_NULL) return &subnodes; else return NULL; } const MJsonNode* MJsonNode::FindObjectNode(const char* name) const { if (type != MJSON_OBJECT) return NULL; for (unsigned i=0; iDump(nest+1); } break; case MJSON_OBJECT: printf("\n"); for (size_t i=0; iDump(nest+1); } break; case MJSON_ERROR: printf(": %s\n", string_value.c_str()); for (size_t i=0; iDump(nest+1); } break; } } MJsonNode* MJsonNode::Copy() const { MJsonNode* n = new MJsonNode(*this); assert(n->object_names.size() == object_names.size()); assert(n->subnodes.size() == subnodes.size()); for (size_t i=0; isubnodes.size(); i++) { n->subnodes[i] = subnodes[i]->Copy(); } if (arraybuffer_size > 0) { n->arraybuffer_size = arraybuffer_size; n->arraybuffer_ptr = (char*)malloc(arraybuffer_size); assert(n->arraybuffer_ptr != NULL); memcpy(n->arraybuffer_ptr, arraybuffer_ptr, arraybuffer_size); } else { n->arraybuffer_size = 0; n->arraybuffer_ptr = NULL; } return n; } /* emacs * Local Variables: * tab-width: 8 * c-basic-offset: 3 * indent-tabs-mode: nil * End: */