/****************************************************************************
**
** Copyright (C) 2017 Intel Corporation
**
** Permission is hereby granted, free of charge, to any person obtaining a copy
** of this software and associated documentation files (the "Software"), to deal
** in the Software without restriction, including without limitation the rights
** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
** copies of the Software, and to permit persons to whom the Software is
** furnished to do so, subject to the following conditions:
**
** The above copyright notice and this permission notice shall be included in
** all copies or substantial portions of the Software.
**
** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
** THE SOFTWARE.
**
****************************************************************************/

#define _BSD_SOURCE 1
#define _DEFAULT_SOURCE 1
#ifndef __STDC_LIMIT_MACROS
#  define __STDC_LIMIT_MACROS 1
#endif

#include "cbor.h"
#include "cborinternal_p.h"
#include "compilersupport_p.h"
#include "utf8_p.h"

#include <string.h>

#ifndef CBOR_NO_FLOATING_POINT
#  include <float.h>
#  include <math.h>
#endif


#ifndef CBOR_PARSER_MAX_RECURSIONS
#  define CBOR_PARSER_MAX_RECURSIONS 1024
#endif

/**
 * \addtogroup CborParsing
 * @{
 */

/**
 * \enum CborValidationFlags
 * The CborValidationFlags enum contains flags that control the validation of a
 * CBOR stream.
 *
 * \value CborValidateBasic         Validates only the syntactic correctedness of the stream.
 * \value CborValidateCanonical     Validates that the stream is in canonical format, according to
 *                                  RFC 7049 section 3.9.
 * \value CborValidateStrictMode    Performs strict validation, according to RFC 7049 section 3.10.
 * \value CborValidateStrictest     Attempt to perform the strictest validation we know of.
 *
 * \value CborValidateShortestIntegrals     (Canonical) Validate that integral numbers and lengths are
 *                                          enconded in their shortest form possible.
 * \value CborValidateShortestFloatingPoint (Canonical) Validate that floating-point numbers are encoded
 *                                          in their shortest form possible.
 * \value CborValidateShortestNumbers       (Canonical) Validate both integral and floating-point numbers
 *                                          are in their shortest form possible.
 * \value CborValidateNoIndeterminateLength (Canonical) Validate that no string, array or map uses
 *                                          indeterminate length encoding.
 * \value CborValidateMapIsSorted           (Canonical & Strict mode) Validate that map keys appear in
 *                                          sorted order.
 * \value CborValidateMapKeysAreUnique      (Strict mode) Validate that map keys are unique.
 * \value CborValidateTagUse                (Strict mode) Validate that known tags are used with the
 *                                          correct types. This does not validate that the content of
 *                                          those types is syntactically correct. For example, this
 *                                          option validates that tag 1 (DateTimeString) is used with
 *                                          a Text String, but it does not validate that the string is
 *                                          a valid date/time representation.
 * \value CborValidateUtf8                  (Strict mode) Validate that text strings are appropriately
 *                                          encoded in UTF-8.
 * \value CborValidateMapKeysAreString      Validate that all map keys are text strings.
 * \value CborValidateNoUndefined           Validate that no elements of type "undefined" are present.
 * \value CborValidateNoTags                Validate that no tags are used.
 * \value CborValidateFiniteFloatingPoint   Validate that all floating point numbers are finite (no NaN or
 *                                          infinities are allowed).
 * \value CborValidateCompleteData          Validate that the stream is complete and there is no more data
 *                                          in the buffer.
 * \value CborValidateNoUnknownSimpleTypesSA Validate that all Standards Action simple types are registered
 *                                          with IANA.
 * \value CborValidateNoUnknownSimpleTypes  Validate that all simple types used are registered with IANA.
 * \value CborValidateNoUnknownTagsSA       Validate that all Standard Actions tags are registered with IANA.
 * \value CborValidateNoUnknownTagsSR       Validate that all Standard Actions and Specification Required tags
 *                                          are registered with IANA (see below for limitations).
 * \value CborValidateNoUnkonwnTags         Validate that all tags are registered with IANA
 *                                          (see below for limitations).
 *
 * \par Simple type registry
 * The CBOR specification requires that registration for use of the first 19
 * simple types must be done by way of Standards Action. The rest of the simple
 * types only require a specification. The official list can be obtained from
 *  https://www.iana.org/assignments/cbor-simple-values/cbor-simple-values.xhtml.
 *
 * \par
 * There are no registered simple types recognized by this release of TinyCBOR
 * (beyond those defined by RFC 7049).
 *
 * \par Tag registry
 * The CBOR specification requires that registration for use of the first 23
 * tags must be done by way of Standards Action. The next up to tag 255 only
 * require a specification. Finally, all other tags can be registered on a
 * first-come-first-serve basis. The official list can be ontained from
 *  https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml.
 *
 * \par
 * Given the variability of this list, TinyCBOR cannot recognize all tags
 * registered with IANA. Instead, the implementation only recognizes tags
 * that are backed by an RFC.
 *
 * \par
 * These are the tags known to the current TinyCBOR release:
<table>
  <tr>
    <th>Tag</th>
    <th>Data Item</th>
    <th>Semantics</th>
  </tr>
  <tr>
    <td>0</td>
    <td>UTF-8 text string</td>
    <td>Standard date/time string</td>
  </td>
  <tr>
    <td>1</td>
    <td>integer</td>
    <td>Epoch-based date/time</td>
  </td>
  <tr>
    <td>2</td>
    <td>byte string</td>
    <td>Positive bignum</td>
  </td>
  <tr>
    <td>3</td>
    <td>byte string</td>
    <td>Negative bignum</td>
  </td>
  <tr>
    <td>4</td>
    <td>array</td>
    <td>Decimal fraction</td>
  </td>
  <tr>
    <td>5</td>
    <td>array</td>
    <td>Bigfloat</td>
  </td>
  <tr>
    <td>16</td>
    <td>array</td>
    <td>COSE Single Recipient Encrypted Data Object (RFC 8152)</td>
  </td>
  <tr>
    <td>17</td>
    <td>array</td>
    <td>COSE Mac w/o Recipients Object (RFC 8152)</td>
  </td>
  <tr>
    <td>18</td>
    <td>array</td>
    <td>COSE Single Signer Data Object (RFC 8162)</td>
  </td>
  <tr>
    <td>21</td>
    <td>byte string, array, map</td>
    <td>Expected conversion to base64url encoding</td>
  </td>
  <tr>
    <td>22</td>
    <td>byte string, array, map</td>
    <td>Expected conversion to base64 encoding</td>
  </td>
  <tr>
    <td>23</td>
    <td>byte string, array, map</td>
    <td>Expected conversion to base16 encoding</td>
  </td>
  <tr>
    <td>24</td>
    <td>byte string</td>
    <td>Encoded CBOR data item</td>
  </td>
  <tr>
    <td>32</td>
    <td>UTF-8 text string</td>
    <td>URI</td>
  </td>
  <tr>
    <td>33</td>
    <td>UTF-8 text string</td>
    <td>base64url</td>
  </td>
  <tr>
    <td>34</td>
    <td>UTF-8 text string</td>
    <td>base64</td>
  </td>
  <tr>
    <td>35</td>
    <td>UTF-8 text string</td>
    <td>Regular expression</td>
  </td>
  <tr>
    <td>36</td>
    <td>UTF-8 text string</td>
    <td>MIME message</td>
  </td>
  <tr>
    <td>96</td>
    <td>array</td>
    <td>COSE Encrypted Data Object (RFC 8152)</td>
  </td>
  <tr>
    <td>97</td>
    <td>array</td>
    <td>COSE MACed Data Object (RFC 8152)</td>
  </td>
  <tr>
    <td>98</td>
    <td>array</td>
    <td>COSE Signed Data Object (RFC 8152)</td>
  </td>
  <tr>
    <td>55799</td>
    <td>any</td>
    <td>Self-describe CBOR</td>
  </td>
</table>
 */

struct KnownTagData { uint32_t tag; uint32_t types; };
static const struct KnownTagData knownTagData[] = {
    { 0, (uint32_t)CborTextStringType },
    { 1, (uint32_t)(CborIntegerType + 1) },
    { 2, (uint32_t)CborByteStringType },
    { 3, (uint32_t)CborByteStringType },
    { 4, (uint32_t)CborArrayType },
    { 5, (uint32_t)CborArrayType },
    { 16, (uint32_t)CborArrayType },
    { 17, (uint32_t)CborArrayType },
    { 18, (uint32_t)CborArrayType },
    { 21, (uint32_t)CborByteStringType | ((uint32_t)CborArrayType << 8) | ((uint32_t)CborMapType << 16) },
    { 22, (uint32_t)CborByteStringType | ((uint32_t)CborArrayType << 8) | ((uint32_t)CborMapType << 16) },
    { 23, (uint32_t)CborByteStringType | ((uint32_t)CborArrayType << 8) | ((uint32_t)CborMapType << 16) },
    { 24, (uint32_t)CborByteStringType },
    { 32, (uint32_t)CborTextStringType },
    { 33, (uint32_t)CborTextStringType },
    { 34, (uint32_t)CborTextStringType },
    { 35, (uint32_t)CborTextStringType },
    { 36, (uint32_t)CborTextStringType },
    { 96, (uint32_t)CborArrayType },
    { 97, (uint32_t)CborArrayType },
    { 98, (uint32_t)CborArrayType },
    { 55799, 0U }
};

static CborError validate_value(CborValue *it, uint32_t flags, int recursionLeft);

static inline CborError validate_utf8_string(const void *ptr, size_t n) {
    const uint8_t *buffer = (const uint8_t *)ptr;
    const uint8_t *const end = buffer + n;
    while (buffer < end) {
        uint32_t uc = get_utf8(&buffer, end);
        if (uc == ~0U)
            return CborErrorInvalidUtf8TextString;
    }
    return CborNoError;
}

static inline CborError validate_simple_type(uint8_t simple_type, uint32_t flags) {
    /* At current time, all known simple types are those from RFC 7049,
     * which are parsed by the parser into different CBOR types.
     * That means that if we've got here, the type is unknown */
    if (simple_type < 32)
        return (flags & CborValidateNoUnknownSimpleTypesSA) ? CborErrorUnknownSimpleType : CborNoError;
    return (flags & CborValidateNoUnknownSimpleTypes) == CborValidateNoUnknownSimpleTypes ?
           CborErrorUnknownSimpleType : CborNoError;
}

static inline CborError validate_number(const CborValue *it, CborType type, uint32_t flags) {
    CborError err = CborNoError;
    const uint8_t *ptr = it->ptr;
    size_t bytesUsed, bytesNeeded;
    uint64_t value;

    if ((flags & CborValidateShortestIntegrals) == 0)
        return err;
    if (type >= CborHalfFloatType && type <= CborDoubleType)
        return err;     /* checked elsewhere */

    err = _cbor_value_extract_number(&ptr, it->parser->end, &value);
    if (err)
        return err;

    bytesUsed = (size_t)(ptr - it->ptr - 1);
    bytesNeeded = 0;
    if (value >= Value8Bit)
        ++bytesNeeded;
    if (value > 0xffU)
        ++bytesNeeded;
    if (value > 0xffffU)
        bytesNeeded += 2;
    if (value > 0xffffffffU)
        bytesNeeded += 4;
    if (bytesNeeded < bytesUsed)
        return CborErrorOverlongEncoding;
    return CborNoError;
}

static inline CborError validate_tag(CborValue *it, CborTag tag, uint32_t flags, int recursionLeft) {
    CborType type = cbor_value_get_type(it);
    const size_t knownTagCount = sizeof(knownTagData) / sizeof(knownTagData[0]);
    const struct KnownTagData *tagData = knownTagData;
    const struct KnownTagData *const knownTagDataEnd = knownTagData + knownTagCount;

    if (!recursionLeft)
        return CborErrorNestingTooDeep;
    if (flags & CborValidateNoTags)
        return CborErrorExcludedType;

    /* find the tag data, if any */
    for (; tagData != knownTagDataEnd; ++tagData) {
        if (tagData->tag < tag)
            continue;
        if (tagData->tag > tag)
            tagData = NULL;
        break;
    }
    if (tagData == knownTagDataEnd)
        tagData = NULL;

    if (flags & CborValidateNoUnknownTags && !tagData) {
        /* tag not found */
        if (flags & CborValidateNoUnknownTagsSA && tag < 24)
            return CborErrorUnknownTag;
        if ((flags & CborValidateNoUnknownTagsSR) == CborValidateNoUnknownTagsSR && tag < 256)
            return CborErrorUnknownTag;
        if ((flags & CborValidateNoUnknownTags) == CborValidateNoUnknownTags)
            return CborErrorUnknownTag;
    }

    if (flags & CborValidateTagUse && tagData && tagData->types) {
        uint32_t allowedTypes = tagData->types;

        /* correct Integer so it's not zero */
        if (type == CborIntegerType)
            type = (CborType)(type + 1);

        while (allowedTypes) {
            if ((uint8_t)(allowedTypes & 0xff) == type)
                break;
            allowedTypes >>= 8;
        }
        if (!allowedTypes)
            return CborErrorInappropriateTagForType;
    }

    return validate_value(it, flags, recursionLeft);
}

#ifndef CBOR_NO_FLOATING_POINT
static inline CborError validate_floating_point(CborValue *it, CborType type, uint32_t flags) {
    CborError err;
    int r;
    double val;
    float valf;
    uint16_t valf16;

    if (type != CborDoubleType) {
        if (type == CborFloatType) {
            err = cbor_value_get_float(it, &valf);
            val = valf;
        } else {
#  ifdef CBOR_NO_HALF_FLOAT_TYPE
            (void)valf16;
            return CborErrorUnsupportedType;
#  else
            err = cbor_value_get_half_float(it, &valf16);
            val = decode_half(valf16);
#  endif
        }
    } else {
        err = cbor_value_get_double(it, &val);
    }
    cbor_assert(err == CborNoError);     /* can't fail */

    r = fpclassify(val);
    if (r == FP_NAN || r == FP_INFINITE) {
        if (flags & CborValidateFiniteFloatingPoint)
            return CborErrorExcludedValue;
        if (flags & CborValidateShortestFloatingPoint) {
            if (type == CborDoubleType)
                return CborErrorOverlongEncoding;
#  ifndef CBOR_NO_HALF_FLOAT_TYPE
            if (type == CborFloatType)
                return CborErrorOverlongEncoding;
            if (r == FP_NAN && valf16 != 0x7e00)
                return CborErrorImproperValue;
            if (r == FP_INFINITE && valf16 != 0x7c00 && valf16 != 0xfc00)
                return CborErrorImproperValue;
#  endif
        }
    }

    if (flags & CborValidateShortestFloatingPoint && type > CborHalfFloatType) {
        if (type == CborDoubleType) {
            valf = (float)val;
            if ((double)valf == val)
                return CborErrorOverlongEncoding;
        }
#  ifndef CBOR_NO_HALF_FLOAT_TYPE
        if (type == CborFloatType) {
            valf16 = encode_half(valf);
            if (valf == decode_half(valf16))
                return CborErrorOverlongEncoding;
        }
#  endif
    }

    return CborNoError;
}
#endif

static CborError validate_container(CborValue *it, int containerType, uint32_t flags, int recursionLeft) {
    CborError err;
    const uint8_t *previous = NULL;
    const uint8_t *previous_end = NULL;

    if (!recursionLeft)
        return CborErrorNestingTooDeep;

    while (!cbor_value_at_end(it)) {
        const uint8_t *current = cbor_value_get_next_byte(it);

        if (containerType == CborMapType) {
            if (flags & CborValidateMapKeysAreString) {
                CborType type = cbor_value_get_type(it);
                if (type == CborTagType) {
                    /* skip the tags */
                    CborValue copy = *it;
                    err = cbor_value_skip_tag(&copy);
                    if (err)
                        return err;
                    type = cbor_value_get_type(&copy);
                }
                if (type != CborTextStringType)
                    return CborErrorMapKeyNotString;
            }
        }

        err = validate_value(it, flags, recursionLeft);
        if (err)
            return err;

        if (containerType != CborMapType)
            continue;

        if (flags & CborValidateMapIsSorted) {
            if (previous) {
                uint64_t len1, len2;
                const uint8_t *ptr;

                /* extract the two lengths */
                ptr = previous;
                _cbor_value_extract_number(&ptr, it->parser->end, &len1);
                ptr = current;
                _cbor_value_extract_number(&ptr, it->parser->end, &len2);

                if (len1 > len2)
                    return CborErrorMapNotSorted;
                if (len1 == len2) {
                    size_t bytelen1 = (size_t)(previous_end - previous);
                    size_t bytelen2 = (size_t)(it->ptr - current);
                    int r = memcmp(previous, current, bytelen1 <= bytelen2 ? bytelen1 : bytelen2);

                    if (r == 0 && bytelen1 != bytelen2)
                        r = bytelen1 < bytelen2 ? -1 : +1;
                    if (r > 0)
                        return CborErrorMapNotSorted;
                    if (r == 0 && (flags & CborValidateMapKeysAreUnique) == CborValidateMapKeysAreUnique)
                        return CborErrorMapKeysNotUnique;
                }
            }

            previous = current;
            previous_end = it->ptr;
        }

        /* map: that was the key, so get the value */
        err = validate_value(it, flags, recursionLeft);
        if (err)
            return err;
    }
    return CborNoError;
}

static CborError validate_value(CborValue *it, uint32_t flags, int recursionLeft) {
    CborError err;
    CborType type = cbor_value_get_type(it);

    if (cbor_value_is_length_known(it)) {
        err = validate_number(it, type, flags);
        if (err)
            return err;
    } else {
        if (flags & CborValidateNoIndeterminateLength)
            return CborErrorUnknownLength;
    }

    switch (type) {
        case CborArrayType:
        case CborMapType: {
            /* recursive type */
            CborValue recursed;
            err = cbor_value_enter_container(it, &recursed);
            if (!err)
                err = validate_container(&recursed, type, flags, recursionLeft - 1);
            if (err) {
                it->ptr = recursed.ptr;
                return err;
            }
            err = cbor_value_leave_container(it, &recursed);
            if (err)
                return err;
            return CborNoError;
        }

        case CborIntegerType: {
            uint64_t val;
            err = cbor_value_get_raw_integer(it, &val);
            cbor_assert(err == CborNoError);         /* can't fail */

            break;
        }

        case CborByteStringType:
        case CborTextStringType: {
            size_t n = 0;
            const void *ptr;

            err = _cbor_value_prepare_string_iteration(it);
            if (err)
                return err;

            while (1) {
                err = validate_number(it, type, flags);
                if (err)
                    return err;

                err = _cbor_value_get_string_chunk(it, &ptr, &n, it);
                if (err)
                    return err;
                if (!ptr)
                    break;

                if (type == CborTextStringType && flags & CborValidateUtf8) {
                    err = validate_utf8_string(ptr, n);
                    if (err)
                        return err;
                }
            }

            return CborNoError;
        }

        case CborTagType: {
            CborTag tag;
            err = cbor_value_get_tag(it, &tag);
            cbor_assert(err == CborNoError);     /* can't fail */

            err = cbor_value_advance_fixed(it);
            if (err)
                return err;
            err = validate_tag(it, tag, flags, recursionLeft - 1);
            if (err)
                return err;

            return CborNoError;
        }

        case CborSimpleType: {
            uint8_t simple_type;
            err = cbor_value_get_simple_type(it, &simple_type);
            cbor_assert(err == CborNoError);     /* can't fail */
            err = validate_simple_type(simple_type, flags);
            if (err)
                return err;
            break;
        }

        case CborNullType:
        case CborBooleanType:
            break;

        case CborUndefinedType:
            if (flags & CborValidateNoUndefined)
                return CborErrorExcludedType;
            break;

        case CborHalfFloatType:
        case CborFloatType:
        case CborDoubleType: {
#ifdef CBOR_NO_FLOATING_POINT
            return CborErrorUnsupportedType;
#else
            err = validate_floating_point(it, type, flags);
            if (err)
                return err;
            break;
#endif /* !CBOR_NO_FLOATING_POINT */
        }

        case CborInvalidType:
            return CborErrorUnknownType;
    }

    err = cbor_value_advance_fixed(it);
    return err;
}

/**
 * Performs a full validation, controlled by the \a flags options, of the CBOR
 * stream pointed by \a it and returns the error it found. If no error was
 * found, it returns CborNoError and the application can iterate over the items
 * with certainty that no errors will appear during parsing.
 *
 * If \a flags is CborValidateBasic, the result should be the same as
 * cbor_value_validate_basic().
 *
 * This function has the same timing and memory requirements as
 * cbor_value_advance() and cbor_value_validate_basic().
 *
 * \sa CborValidationFlags, cbor_value_validate_basic(), cbor_value_advance()
 */
CborError cbor_value_validate(const CborValue *it, uint32_t flags) {
    CborValue value = *it;
    CborError err = validate_value(&value, flags, CBOR_PARSER_MAX_RECURSIONS);
    if (err)
        return err;
    if (flags & CborValidateCompleteData && it->ptr != it->parser->end)
        return CborErrorGarbageAtEnd;
    return CborNoError;
}

/**
 * @}
 */