From ecb9da86aa95e93b04085285d49352055f78b10d Mon Sep 17 00:00:00 2001 From: Noel Berry Date: Fri, 8 Jan 2021 12:01:39 -0800 Subject: [PATCH] batcher and spritefont support utf8 strings --- include/blah/containers/str.h | 8 +++++ src/containers/str.cpp | 44 +++++++++++++++++++++++ src/drawing/batch.cpp | 24 ++++++++----- src/drawing/spritefont.cpp | 66 +++++++++++++++++++++++++---------- 4 files changed, 114 insertions(+), 28 deletions(-) diff --git a/include/blah/containers/str.h b/include/blah/containers/str.h index 2d45097..f372b24 100644 --- a/include/blah/containers/str.h +++ b/include/blah/containers/str.h @@ -75,6 +75,14 @@ namespace Blah // ensures the string has the given capacity void reserve(int capacity); + + // Returns the unicode value at the given index. + // Assumes the index is a valid utf8 starting point. + uint32_t utf8_at(int index) const; + + // Returns the byte-length of the utf8 character. + // Assumes the index is a valid utf8 starting point. + int utf8_length(int index) const; // appends the given character Str& append(char c); diff --git a/src/containers/str.cpp b/src/containers/str.cpp index c5ad1e9..0da9db1 100644 --- a/src/containers/str.cpp +++ b/src/containers/str.cpp @@ -79,6 +79,50 @@ void Str::set_length(int len) m_length = len; } +uint32_t Str::utf8_at(int index) const +{ + uint32_t charcode = 0; + + int t = (unsigned char)(this->operator[](index++)); + if (t < 128) + return t; + + int high_bit_mask = (1 << 6) - 1; + int high_bit_shift = 0; + int total_bits = 0; + int other_bits = 6; + + while ((t & 0xC0) == 0xC0) + { + t <<= 1; + t &= 0xff; + total_bits += 6; + high_bit_mask >>= 1; + high_bit_shift++; + charcode <<= other_bits; + charcode |= ((unsigned char)(this->operator[](index++))) & ((1 << other_bits) - 1); + } + charcode |= ((t >> high_bit_shift) & high_bit_mask) << total_bits; + + return charcode; +} + +int Str::utf8_length(int index) const +{ + auto c = this->operator[](index); + if ((c & 0xFE) == 0xFC) + return 6; + if ((c & 0xFC) == 0xF8) + return 5; + if ((c & 0xF8) == 0xF0) + return 4; + else if ((c & 0xF0) == 0xE0) + return 3; + else if ((c & 0xE0) == 0xC0) + return 2; + return 1; +} + Str& Str::append(char c) { reserve(m_length + 1); diff --git a/src/drawing/batch.cpp b/src/drawing/batch.cpp index d67df25..6fd527f 100644 --- a/src/drawing/batch.cpp +++ b/src/drawing/batch.cpp @@ -1046,6 +1046,7 @@ void Batch::str(const SpriteFont& font, const String& text, const Vec2& pos, Tex else offset.y = (font.ascent + font.descent + font.height() - font.height_of(text)) * 0.5f; + uint32_t last = 0; for (int i = 0, l = text.length(); i < l; i++) { if (text[i] == '\n') @@ -1061,29 +1062,34 @@ void Batch::str(const SpriteFont& font, const String& text, const Vec2& pos, Tex else offset.x = -font.width_of_line(text, i + 1) * 0.5f; + last = 0; continue; } - // TODO: - // This doesn't parse Unicode! - // It will assume it's a 1-byte ASCII char which is incorrect - const auto& ch = font[text[i]]; + // get the character + uint32_t next = text.utf8_at(i); + const auto& ch = font[next]; + // draw it, if the subtexture exists if (ch.subtexture.texture) { Vec2 at = offset + ch.offset; if (i > 0 && text[i - 1] != '\n') - { - // TODO: - // This doesn't parse Unicode! - at.x += font.get_kerning(text[i - 1], text[i]); - } + at.x += font.get_kerning(last, next); tex(ch.subtexture, at, color); } + // move forward offset.x += ch.advance; + + // increment past current character + // (minus 1 since the for loop iterator increments as well) + i += text.utf8_length(i) - 1; + + // keep last codepoint for next char for kerning + last = next; } pop_matrix(); diff --git a/src/drawing/spritefont.cpp b/src/drawing/spritefont.cpp index cff0a63..86b6d0a 100644 --- a/src/drawing/spritefont.cpp +++ b/src/drawing/spritefont.cpp @@ -77,18 +77,33 @@ SpriteFont& SpriteFont::operator=(SpriteFont && src) noexcept float SpriteFont::width_of(const String& text) const { float width = 0; - float lineWidth = 0; - for (auto it = text.begin(); it != text.end(); it++) + float line_width = 0; + + uint32_t last; + for (int i = 0; i < text.length(); i ++) { - if (*it == '\n') - lineWidth = 0; + if (text[i] == '\n') + { + line_width = 0; + continue; + } - // TODO: this doesn't account for Unicode values! - uint32_t codepoint = *it; + // get codepoint + auto next = text.utf8_at(i); - lineWidth += this->operator[](codepoint).advance; - if (lineWidth > width) - width = lineWidth; + // increment length + line_width += this->operator[](next).advance; + + // add kerning + if (i > 0) + line_width += get_kerning(last, next); + + if (line_width > width) + width = line_width; + + // move to thext utf8 character + i += text.utf8_length(i) - 1; + last = next; } return width; @@ -99,19 +114,31 @@ float SpriteFont::width_of_line(const String& text, int start) const if (start < 0) return 0; if (start >= text.length()) return 0; - float lineWidth = 0; - for (auto it = text.begin() + start; it != text.end(); it++) + float width = 0; + + uint32_t last; + for (int i = start; i < text.length(); i ++) { - if (*it == '\n') - return lineWidth; + if (text[i] == '\n') + return width; - // TODO: this doesn't account for Unicode values! - uint32_t codepoint = *it; + // get codepoint + auto next = text.utf8_at(i); - lineWidth += this->operator[](codepoint).advance; + // increment length + width += this->operator[](next).advance; + + // add kerning + if (i > 0) + width += get_kerning(last, next); + + // move to thext utf8 character + i += text.utf8_length(i) - 1; + + last = next; } - return lineWidth; + return width; } float SpriteFont::height_of(const String& text) const @@ -120,10 +147,11 @@ float SpriteFont::height_of(const String& text) const return 0; float height = line_height(); - for (auto it = text.begin(); it != text.end(); it++) + for (int i = 0; i < text.length(); i ++) { - if (*it == '\n') + if (text[i] == '\n') height += line_height(); + i += text.utf8_length(i) - 1; } return height - line_gap;