/**************************************************************************** ** ** Copyright (C) 2018 Intel Corporation ** ** Permission is hereby granted, free of charge, to any person obtaining a copy ** of this software and associated documentation files (the "Software"), to deal ** in the Software without restriction, including without limitation the rights ** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ** copies of the Software, and to permit persons to whom the Software is ** furnished to do so, subject to the following conditions: ** ** The above copyright notice and this permission notice shall be included in ** all copies or substantial portions of the Software. ** ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ** THE SOFTWARE. ** ****************************************************************************/ #define _BSD_SOURCE 1 #define _DEFAULT_SOURCE 1 #ifndef __STDC_LIMIT_MACROS # define __STDC_LIMIT_MACROS 1 #endif #include "cbor.h" #include "cborinternal_p.h" #include "compilersupport_p.h" #include "utf8_p.h" #include #include /** * \defgroup CborPretty Converting CBOR to text * \brief Group of functions used to convert CBOR to text form. * * This group contains two functions that can be used to convert a \ref * CborValue object to a text representation. This module attempts to follow * the recommendations from RFC 7049 section 6 "Diagnostic Notation", though it * has a few differences. They are noted below. * * TinyCBOR does not provide a way to convert from the text representation back * to encoded form. To produce a text form meant to be parsed, CborToJson is * recommended instead. * * Either of the functions in this section will attempt to convert exactly one * CborValue object to text. Those functions may return any error documented * for the functions for CborParsing. In addition, if the C standard library * stream functions return with error, the text conversion will return with * error CborErrorIO. * * These functions also perform UTF-8 validation in CBOR text strings. If they * encounter a sequence of bytes that is not permitted in UTF-8, they will return * CborErrorInvalidUtf8TextString. That includes encoding of surrogate points * in UTF-8. * * \warning The output type produced by these functions is not guaranteed to * remain stable. A future update of TinyCBOR may produce different output for * the same input and parsers may be unable to handle it. * * \sa CborParsing, CborToJson, cbor_parser_init() */ /** * \addtogroup CborPretty * @{ *

Text format

* * As described in RFC 7049 section 6 "Diagnostic Notation", the format is * largely borrowed from JSON, but modified to suit CBOR's different data * types. TinyCBOR makes further modifications to distinguish different, but * similar values. * * CBOR values are currently encoded as follows: * \par Integrals (unsigned and negative) * Base-10 (decimal) text representation of the value * \par Byte strings: * "h'" followed by the Base16 (hex) representation of the binary data, followed by an ending quote (') * \par Text strings: * C-style escaped string in quotes, with C11/C++11 escaping of Unicode codepoints above U+007F. * \par Tags: * Tag value, with the tagged value in parentheses. No special encoding of the tagged value is performed. * \par Simple types: * "simple(nn)" where \c nn is the simple value * \par Null: * \c null * \par Undefined: * \c undefined * \par Booleans: * \c true or \c false * \par Floating point: * If NaN or infinite, the actual words \c NaN or \c infinite. * Otherwise, the decimal representation with as many digits as necessary to ensure no loss of information. * By default, float values are suffixed by "f" and half-float values suffixed by "f16" (doubles have no suffix). * If the CborPrettyNumericEncodingIndicators flag is active, the values instead are encoded following the * Section 6 recommended encoding indicators: float values are suffixed with "_2" and half-float with "_1". * A decimal point is always present. * \par Arrays: * Comma-separated list of elements, enclosed in square brackets ("[" and "]"). * \par Maps: * Comma-separated list of key-value pairs, with the key and value separated * by a colon (":"), enclosed in curly braces ("{" and "}"). * * The CborPrettyFlags enumerator contains flags to control some aspects of the * encoding: * \par String fragmentation * When the CborPrettyShowStringFragments option is active, text and byte * strings that are transmitted in fragments are shown instead inside * parentheses ("(" and ")") with no preceding number and each fragment is * displayed individually. If a tag precedes the string, then the output * will contain a double set of parentheses. If the option is not active, * the fragments are merged together and the display will not show any * difference from a string transmitted with determinate length. * \par Encoding indicators * Numbers and lengths in CBOR can be encoded in multiple representations. * If the CborPrettyIndicateOverlongNumbers option is active, numbers * and lengths that are transmitted in a longer encoding than necessary * will be indicated, by appending an underscore ("_") to either the * number or the opening bracket or brace, followed by a number * indicating the CBOR additional information: 0 for 1 byte, 1 for 2 * bytes, 2 for 4 bytes and 3 for 8 bytes. * If the CborPrettyIndicateIndeterminateLength option is active, maps, * arrays and strings encoded with indeterminate length will be marked by * an underscore after the opening bracket or brace or the string (if not * showing fragments), without a number after it. */ /** * \enum CborPrettyFlags * The CborPrettyFlags enum contains flags that control the conversion of CBOR to text format. * * \value CborPrettyNumericEncodingIndicators Use numeric encoding indicators instead of textual for float and half-float. * \value CborPrettyTextualEncodingIndicators Use textual encoding indicators for float ("f") and half-float ("f16"). * \value CborPrettyIndicateIndeterminateLength (default) Indicate when a map or array has indeterminate length. * \value CborPrettyIndicateOverlongNumbers Indicate when a number or length was encoded with more bytes than needed. * \value CborPrettyShowStringFragments If the byte or text string is transmitted in chunks, show each individually. * \value CborPrettyMergeStringFragment Merge all chunked byte or text strings and display them in a single entry. * \value CborPrettyDefaultFlags Default conversion flags. */ #ifndef CBOR_NO_FLOATING_POINT static inline bool convertToUint64(double v, uint64_t *absolute) { double supremum; v = fabs(v); /* C11 standard section 6.3.1.4 "Real floating and integer" says: * * 1 When a finite value of real floating type is converted to an integer * type other than _Bool, the fractional part is discarded (i.e., the * value is truncated toward zero). If the value of the integral part * cannot be represented by the integer type, the behavior is undefined. * * So we must perform a range check that v <= UINT64_MAX, but we can't use * UINT64_MAX + 1.0 because the standard continues: * * 2 When a value of integer type is converted to a real floating type, if * the value being converted can be represented exactly in the new type, * it is unchanged. If the value being converted is in the range of * values that can be represented but cannot be represented exactly, the * result is either the nearest higher or nearest lower representable * value, chosen in an implementation-defined manner. */ supremum = -2.0 * INT64_MIN; /* -2 * (- 2^63) == 2^64 */ if (v >= supremum) return false; /* Now we can convert, these two conversions cannot be UB */ *absolute = v; return *absolute == v; } #endif static void printRecursionLimit(CborStreamFunction stream, void *out) { stream(out, ""); } static CborError hexDump(CborStreamFunction stream, void *out, const void *ptr, size_t n) { const uint8_t *buffer = (const uint8_t *)ptr; CborError err = CborNoError; while (n-- && !err) err = stream(out, "%02" PRIx8, *buffer++); return err; } /* This function decodes buffer as UTF-8 and prints as escaped UTF-16. * On UTF-8 decoding error, it returns CborErrorInvalidUtf8TextString */ static CborError utf8EscapedDump(CborStreamFunction stream, void *out, const void *ptr, size_t n) { const uint8_t *buffer = (const uint8_t *)ptr; const uint8_t *const end = buffer + n; CborError err = CborNoError; while (buffer < end && !err) { uint32_t uc = get_utf8(&buffer, end); if (uc == ~0U) return CborErrorInvalidUtf8TextString; if (uc < 0x80) { /* single-byte UTF-8 */ unsigned char escaped = (unsigned char)uc; if (uc < 0x7f && uc >= 0x20 && uc != '\\' && uc != '"') { err = stream(out, "%c", (char)uc); continue; } /* print as an escape sequence */ switch (uc) { case '"': case '\\': break; case '\b': escaped = 'b'; break; case '\f': escaped = 'f'; break; case '\n': escaped = 'n'; break; case '\r': escaped = 'r'; break; case '\t': escaped = 't'; break; default: goto print_utf16; } err = stream(out, "\\%c", escaped); continue; } /* now print the sequence */ if (uc > 0xffffU) { /* needs surrogate pairs */ err = stream(out, "\\u%04" PRIX32 "\\u%04" PRIX32, (uc >> 10) + 0xd7c0, /* high surrogate */ (uc % 0x0400) + 0xdc00); } else { print_utf16: /* no surrogate pair needed */ err = stream(out, "\\u%04" PRIX32, uc); } } return err; } static const char *resolve_indicator(const uint8_t *ptr, const uint8_t *end, int flags) { static const char indicators[8][3] = { "_0", "_1", "_2", "_3", "", "", "", /* these are not possible */ "_" }; const char *no_indicator = indicators[5]; /* empty string */ uint8_t additional_information; uint8_t expected_information; uint64_t value; CborError err; if (ptr == end) return NULL; /* CborErrorUnexpectedEOF */ additional_information = (*ptr & SmallValueMask); if (additional_information < Value8Bit) return no_indicator; /* determine whether to show anything */ if ((flags & CborPrettyIndicateIndeterminateLength) && additional_information == IndefiniteLength) return indicators[IndefiniteLength - Value8Bit]; if ((flags & CborPrettyIndicateOverlongNumbers) == 0) return no_indicator; err = _cbor_value_extract_number(&ptr, end, &value); if (err) return NULL; /* CborErrorUnexpectedEOF */ expected_information = Value8Bit - 1; if (value >= Value8Bit) ++expected_information; if (value > 0xffU) ++expected_information; if (value > 0xffffU) ++expected_information; if (value > 0xffffffffU) ++expected_information; return expected_information == additional_information ? no_indicator : indicators[additional_information - Value8Bit]; } static const char *get_indicator(const CborValue *it, int flags) { return resolve_indicator(it->ptr, it->parser->end, flags); } static CborError value_to_pretty(CborStreamFunction stream, void *out, CborValue *it, int flags, int recursionsLeft); static CborError container_to_pretty(CborStreamFunction stream, void *out, CborValue *it, CborType containerType, int flags, int recursionsLeft) { const char *comma = ""; CborError err = CborNoError; if (!recursionsLeft) { printRecursionLimit(stream, out); return err; /* do allow the dumping to continue */ } while (!cbor_value_at_end(it) && !err) { err = stream(out, "%s", comma); comma = ", "; if (!err) err = value_to_pretty(stream, out, it, flags, recursionsLeft); if (containerType == CborArrayType) continue; /* map: that was the key, so get the value */ if (!err) err = stream(out, ": "); if (!err) err = value_to_pretty(stream, out, it, flags, recursionsLeft); } return err; } static CborError value_to_pretty(CborStreamFunction stream, void *out, CborValue *it, int flags, int recursionsLeft) { CborError err = CborNoError; CborType type = cbor_value_get_type(it); switch (type) { case CborArrayType: case CborMapType: { /* recursive type */ CborValue recursed; const char *indicator = get_indicator(it, flags); const char *space = *indicator ? " " : indicator; err = stream(out, "%c%s%s", type == CborArrayType ? '[' : '{', indicator, space); if (err) return err; err = cbor_value_enter_container(it, &recursed); if (err) { it->ptr = recursed.ptr; return err; /* parse error */ } err = container_to_pretty(stream, out, &recursed, type, flags, recursionsLeft - 1); if (err) { it->ptr = recursed.ptr; return err; /* parse error */ } err = cbor_value_leave_container(it, &recursed); if (err) return err; /* parse error */ return stream(out, type == CborArrayType ? "]" : "}"); } case CborIntegerType: { uint64_t val; cbor_value_get_raw_integer(it, &val); /* can't fail */ if (cbor_value_is_unsigned_integer(it)) { err = stream(out, "%" PRIu64, val); } else { /* CBOR stores the negative number X as -1 - X * (that is, -1 is stored as 0, -2 as 1 and so forth) */ if (++val) { /* unsigned overflow may happen */ err = stream(out, "-%" PRIu64, val); } else { /* overflown * 0xffff`ffff`ffff`ffff + 1 = * 0x1`0000`0000`0000`0000 = 18446744073709551616 (2^64) */ err = stream(out, "-18446744073709551616"); } } if (!err) err = stream(out, "%s", get_indicator(it, flags)); break; } case CborByteStringType: case CborTextStringType: { size_t n = 0; const void *ptr; bool showingFragments = (flags & CborPrettyShowStringFragments) && !cbor_value_is_length_known(it); const char *separator = ""; char close = '\''; char open[3] = "h'"; const char *indicator = NULL; if (type == CborTextStringType) { close = open[0] = '"'; open[1] = '\0'; } if (showingFragments) { err = stream(out, "(_ "); if (!err) err = _cbor_value_prepare_string_iteration(it); } else { err = stream(out, "%s", open); } while (!err) { if (showingFragments || indicator == NULL) { /* any iteration, except the second for a non-chunked string */ indicator = resolve_indicator(it->ptr, it->parser->end, flags); } err = _cbor_value_get_string_chunk(it, &ptr, &n, it); if (!ptr) break; if (!err && showingFragments) err = stream(out, "%s%s", separator, open); if (!err) err = (type == CborByteStringType ? hexDump(stream, out, ptr, n) : utf8EscapedDump(stream, out, ptr, n)); if (!err && showingFragments) { err = stream(out, "%c%s", close, indicator); separator = ", "; } } if (!err) { if (showingFragments) err = stream(out, ")"); else err = stream(out, "%c%s", close, indicator); } return err; } case CborTagType: { CborTag tag; cbor_value_get_tag(it, &tag); /* can't fail */ err = stream(out, "%" PRIu64 "%s(", tag, get_indicator(it, flags)); if (!err) err = cbor_value_advance_fixed(it); if (!err && recursionsLeft) err = value_to_pretty(stream, out, it, flags, recursionsLeft - 1); else if (!err) printRecursionLimit(stream, out); if (!err) err = stream(out, ")"); return err; } case CborSimpleType: { /* simple types can't fail and can't have overlong encoding */ uint8_t simple_type; cbor_value_get_simple_type(it, &simple_type); err = stream(out, "simple(%" PRIu8 ")", simple_type); break; } case CborNullType: err = stream(out, "null"); break; case CborUndefinedType: err = stream(out, "undefined"); break; case CborBooleanType: { bool val; cbor_value_get_boolean(it, &val); /* can't fail */ err = stream(out, val ? "true" : "false"); break; } #ifndef CBOR_NO_FLOATING_POINT case CborDoubleType: { const char *suffix; double val; int r; uint64_t ival; if (false) { float f; case CborFloatType: cbor_value_get_float(it, &f); val = f; suffix = (flags & CborPrettyNumericEncodingIndicators) ? "_2" : "f"; } else if (false) { uint16_t f16; case CborHalfFloatType: #ifndef CBOR_NO_HALF_FLOAT_TYPE cbor_value_get_half_float(it, &f16); val = decode_half(f16); suffix = (flags & CborPrettyNumericEncodingIndicators) ? "_1" : "f16"; #else (void)f16; err = CborErrorUnsupportedType; break; #endif } else { cbor_value_get_double(it, &val); suffix = ""; } if ((flags & CborPrettyNumericEncodingIndicators) == 0) { r = fpclassify(val); if (r == FP_NAN || r == FP_INFINITE) suffix = ""; } if (convertToUint64(val, &ival)) { /* this double value fits in a 64-bit integer, so show it as such * (followed by a floating point suffix, to disambiguate) */ err = stream(out, "%s%" PRIu64 ".%s", val < 0 ? "-" : "", ival, suffix); } else { /* this number is definitely not a 64-bit integer */ err = stream(out, "%." DBL_DECIMAL_DIG_STR "g%s", val, suffix); } break; } #else case CborDoubleType: case CborFloatType: case CborHalfFloatType: err = CborErrorUnsupportedType; break; #endif /* !CBOR_NO_FLOATING_POINT */ case CborInvalidType: err = stream(out, "invalid"); if (err) return err; return CborErrorUnknownType; } if (!err) err = cbor_value_advance_fixed(it); return err; } /** * Converts the current CBOR type pointed by \a value to its textual * representation and writes it to the stream by calling the \a streamFunction. * If an error occurs, this function returns an error code similar to * \ref CborParsing. * * The textual representation can be controlled by the \a flags parameter (see * \ref CborPrettyFlags for more information). * * If no error ocurred, this function advances \a value to the next element. * Often, concatenating the text representation of multiple elements can be * done by appending a comma to the output stream in between calls to this * function. * * The \a streamFunction function will be called with the \a token value as the * first parameter and a printf-style format string as the second, with a variable * number of further parameters. * * \sa cbor_value_to_pretty(), cbor_value_to_json_advance() */ CborError cbor_value_to_pretty_stream(CborStreamFunction streamFunction, void *token, CborValue *value, int flags) { return value_to_pretty(streamFunction, token, value, flags, CBOR_PARSER_MAX_RECURSIONS); } /** @} */