/****************************************************************************
**
** Copyright (C) 2017 Intel Corporation
**
** Permission is hereby granted, free of charge, to any person obtaining a copy
** of this software and associated documentation files (the "Software"), to deal
** in the Software without restriction, including without limitation the rights
** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
** copies of the Software, and to permit persons to whom the Software is
** furnished to do so, subject to the following conditions:
**
** The above copyright notice and this permission notice shall be included in
** all copies or substantial portions of the Software.
**
** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
** THE SOFTWARE.
**
****************************************************************************/

#ifndef CBOR_UTF8_H
#define CBOR_UTF8_H

#include "compilersupport_p.h"

#include <stdint.h>

static inline uint32_t get_utf8(const uint8_t **buffer, const uint8_t *end) {
    int charsNeeded;
    uint32_t uc, min_uc;
    uint8_t b;
    ptrdiff_t n = end - *buffer;
    if (n == 0)
        return ~0U;

    uc = *(*buffer)++;
    if (uc < 0x80) {
        /* single-byte UTF-8 */
        return uc;
    }

    /* multi-byte UTF-8, decode it */
    if (unlikely(uc <= 0xC1))
        return ~0U;
    if (uc < 0xE0) {
        /* two-byte UTF-8 */
        charsNeeded = 2;
        min_uc = 0x80;
        uc &= 0x1f;
    } else if (uc < 0xF0) {
        /* three-byte UTF-8 */
        charsNeeded = 3;
        min_uc = 0x800;
        uc &= 0x0f;
    } else if (uc < 0xF5) {
        /* four-byte UTF-8 */
        charsNeeded = 4;
        min_uc = 0x10000;
        uc &= 0x07;
    } else {
        return ~0U;
    }

    if (n < charsNeeded)
        return ~0U;

    /* first continuation character */
    b = *(*buffer)++;
    if ((b & 0xc0) != 0x80)
        return ~0U;
    uc <<= 6;
    uc |= b & 0x3f;

    if (charsNeeded > 2) {
        /* second continuation character */
        b = *(*buffer)++;
        if ((b & 0xc0) != 0x80)
            return ~0U;
        uc <<= 6;
        uc |= b & 0x3f;

        if (charsNeeded > 3) {
            /* third continuation character */
            b = *(*buffer)++;
            if ((b & 0xc0) != 0x80)
                return ~0U;
            uc <<= 6;
            uc |= b & 0x3f;
        }
    }

    /* overlong sequence? surrogate pair? out or range? */
    if (uc < min_uc || uc - 0xd800U < 2048U || uc > 0x10ffff)
        return ~0U;

    return uc;
}

#endif /* CBOR_UTF8_H */