-
- // 4 byte sequence
- if(p >= text.size())
- throw std::runtime_error("Malformed utf-8 sequence");
- uint32_t c4 = (unsigned char) text[p++];
- if(c <= 0xF4) {
- return (c & 0x07) << 18 | (c2 & 0x3F) << 12 | (c3 & 0x3F) << 6
- | (c4 & 0x3F);
+ else if ((c1 & 0370) == 0360) {
+ // 1111.0xxx: 4 byte sequence
+ if(p+3 >= text.size()) throw std::range_error("Malformed utf-8 sequence");
+ uint32_t c2 = (unsigned char) text[p+1];
+ uint32_t c3 = (unsigned char) text[p+2];
+ uint32_t c4 = (unsigned char) text[p+4];
+ if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8 sequence");
+ if (!has_multibyte_mark(c3)) throw std::runtime_error("Malformed utf-8 sequence");
+ if (!has_multibyte_mark(c4)) throw std::runtime_error("Malformed utf-8 sequence");
+ p+=4;
+ return (c1 & 0007) << 18 | (c2 & 0077) << 12 | (c3 & 0077) << 6 | (c4 & 0077);