draw_chars(chars, text, pos, drawing_effect, alpha);
}
-/** decoding of a byte stream to a single unicode character.
- * This should be correct for well formed utf-8 sequences but doesn't check for
- * all forms of illegal sequences.
- * (see unicode standard section 3.10 table 3-5 and 3-6 for details)
+namespace {
+
+/**
+ * returns true if this byte matches a bitmask of 10xx.xxxx, i.e. it is the 2nd, 3rd or 4th byte of a multibyte utf8 string
+ */
+bool has_multibyte_mark(unsigned char c) {
+ return ((c & 0300) == 0200);
+}
+
+/**
+ * gets unicode character at byte position @a p of UTF-8 encoded @a text, then advances @a p to the next character.
+ * @throws std::runtime_error if decoding fails.
+ * See unicode standard section 3.10 table 3-5 and 3-6 for details.
*/
uint32_t decode_utf8(const std::string& text, size_t& p)
{
- // 1 byte sequence
- uint32_t c = (unsigned char) text[p++];
- if(c <= 0x7F) {
- return c;
- }
-
- // 2 byte sequence
- if(p >= text.size())
- throw std::runtime_error("Malformed utf-8 sequence");
- uint32_t c2 = (unsigned char) text[p++];
- if(c <= 0xDF) {
- if(c < 0xC2)
- throw std::runtime_error("Malformed utf-8 sequence");
- return (c & 0x1F) << 6 | (c2 & 0x3F);
+ uint32_t c1 = (unsigned char) text[p+0];
+
+ if (has_multibyte_mark(c1)) std::runtime_error("Malformed utf-8 sequence");
+
+ if ((c1 & 0200) == 0000) {
+ // 0xxx.xxxx: 1 byte sequence
+ p+=1;
+ return c1;
+ }
+ else if ((c1 & 0340) == 0300) {
+ // 110x.xxxx: 2 byte sequence
+ if(p+1 >= text.size()) throw std::range_error("Malformed utf-8 sequence");
+ uint32_t c2 = (unsigned char) text[p+1];
+ if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8 sequence");
+ p+=2;
+ return (c1 & 0037) << 6 | (c2 & 0077);
}
-
- // 3 byte sequence
- if(p >= text.size())
- throw std::runtime_error("Malformed utf-8 sequence");
- uint32_t c3 = (unsigned char) text[p++];
- if(c <= 0xEF) {
- return (c & 0x0F) << 12 | (c2 & 0x3F) << 6 | (c3 & 0x3F);
+ else if ((c1 & 0360) == 0340) {
+ // 1110.xxxx: 3 byte sequence
+ if(p+2 >= text.size()) throw std::range_error("Malformed utf-8 sequence");
+ uint32_t c2 = (unsigned char) text[p+1];
+ uint32_t c3 = (unsigned char) text[p+2];
+ if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8 sequence");
+ if (!has_multibyte_mark(c3)) throw std::runtime_error("Malformed utf-8 sequence");
+ p+=3;
+ return (c1 & 0017) << 12 | (c2 & 0077) << 6 | (c3 & 0077);
}
-
- // 4 byte sequence
- if(p >= text.size())
- throw std::runtime_error("Malformed utf-8 sequence");
- uint32_t c4 = (unsigned char) text[p++];
- if(c <= 0xF4) {
- return (c & 0x07) << 18 | (c2 & 0x3F) << 12 | (c3 & 0x3F) << 6
- | (c4 & 0x3F);
+ else if ((c1 & 0370) == 0360) {
+ // 1111.0xxx: 4 byte sequence
+ if(p+3 >= text.size()) throw std::range_error("Malformed utf-8 sequence");
+ uint32_t c2 = (unsigned char) text[p+1];
+ uint32_t c3 = (unsigned char) text[p+2];
+ uint32_t c4 = (unsigned char) text[p+4];
+ if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8 sequence");
+ if (!has_multibyte_mark(c3)) throw std::runtime_error("Malformed utf-8 sequence");
+ if (!has_multibyte_mark(c4)) throw std::runtime_error("Malformed utf-8 sequence");
+ p+=4;
+ return (c1 & 0007) << 18 | (c2 & 0077) << 12 | (c3 & 0077) << 6 | (c4 & 0077);
}
-
throw std::runtime_error("Malformed utf-8 sequence");
}
+}
+
void
Font::draw_chars(Surface* pchars, const std::string& text, const Vector& pos,
DrawingEffect drawing_effect, float alpha) const