Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hebrew support #7512

Draft
wants to merge 11 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMake/Assets.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ if(NOT DEFINED DEVILUTIONX_ASSETS_OUTPUT_DIRECTORY)
set(DEVILUTIONX_ASSETS_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/assets")
endif()

set(devilutionx_langs bg cs da de el es fr hr hu it ja ko pl pt_BR ro ru uk sv tr zh_CN zh_TW)
set(devilutionx_langs bg cs da de el es fr hr hu it ja ko pl pt_BR ro ru uk sv tr he zh_CN zh_TW)
Copy link
Collaborator

@glebm glebm Nov 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Estonian has been added a month ago so there is now conflict in this line that will require rebasing

if(USE_GETTEXT_FROM_VCPKG)
# vcpkg doesn't add its own tools directory to the search path
list(APPEND Gettext_ROOT ${CMAKE_CURRENT_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/tools/gettext/bin)
Expand Down
1 change: 1 addition & 0 deletions CMake/Definitions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ foreach(
UNPACKED_MPQS
UNPACKED_SAVES
DEVILUTIONX_WINDOWS_NO_WCHAR
USE_FRIBIDI
)
if(${def_name})
list(APPEND DEVILUTIONX_DEFINITIONS ${def_name})
Expand Down
8 changes: 8 additions & 0 deletions CMake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -284,3 +284,11 @@ if(GPERF)
find_package(Gperftools REQUIRED)
message("INFO: ${GPERFTOOLS_LIBRARIES}")
endif()

find_package(FRIBIDI QUIET)
if(FRIBIDI_FOUND)
message("-- Found FriBidi")
set(USE_FRIBIDI ON)
else()
message("-- Suitable system FriBidi package not found, all strings will be assumed LTR")
endif()
13 changes: 13 additions & 0 deletions CMake/finders/FindFRIBIDI.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
find_package(PkgConfig)
pkg_check_modules(FRIBIDI QUIET fribidi)

find_path(FRIBIDI_INCLUDE_DIR
NAMES fribidi/fribidi.h)

find_library(FRIBIDI_LIBRARY NAMES fribidi)

if (NOT FRIBIDI_FOUND)
message(STATUS "GNU FriBidi not found")
else()
message(STATUS "Found GNU FriBidi: ${FRIBIDI_LIBRARY}")
endif (NOT FRIBIDI_FOUND)
4 changes: 4 additions & 0 deletions Source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -384,11 +384,15 @@ target_link_libraries(libdevilutionx_parse_int PUBLIC
)

add_devilutionx_object_library(libdevilutionx_utf8
utils/unicode-bidi.cpp
utils/utf8.cpp
)
target_link_libraries(libdevilutionx_utf8 PRIVATE
hoehrmann_utf8
)
if(USE_FRIBIDI)
target_link_libraries(libdevilutionx_utf8 PRIVATE ${FRIBIDI_LIBRARY})
endif()

add_devilutionx_object_library(libdevilutionx_strings
utils/str_cat.cpp
Expand Down
138 changes: 93 additions & 45 deletions Source/engine/render/text_render.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "utils/display.h"
#include "utils/language.h"
#include "utils/sdl_compat.h"
#include "utils/unicode-bidi.hpp"
#include "utils/utf8.hpp"

namespace devilution {
Expand Down Expand Up @@ -167,7 +168,7 @@ OptionalClxSpriteList LoadFont(GameFontTables size, text_color color, uint16_t r
const std::string_view language_code = GetLanguageCode();
const std::string_view language_tag = language_code.substr(0, 2);
if (language_tag == "zh" || language_tag == "ja" || language_tag == "ko"
|| (language_tag == "tr" && row == 0)) {
|| (language_tag == "tr" && row == 0)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There seem to be some whitespace changes here that will fail clang-format
We use tabs for structural indentation but spaces for continuation indentation.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will revert this change

GetFontPath(language_code, size, row, ".clx", &path[0]);
font = LoadOptionalClx(path);
}
Expand Down Expand Up @@ -259,13 +260,13 @@ std::size_t CountNewlines(std::string_view fmt, const DrawStringFormatArg *args,
class FmtArgParser {
public:
FmtArgParser(std::string_view fmt,
DrawStringFormatArg *args,
size_t len,
size_t offset = 0)
: fmt_(fmt)
, args_(args)
, len_(len)
, next_(offset)
DrawStringFormatArg *args,
size_t len,
size_t offset = 0)
: fmt_(fmt)
, args_(args)
, len_(len)
, next_(offset)
{
}

Expand Down Expand Up @@ -374,8 +375,8 @@ Surface ClipSurface(const Surface &out, Rectangle rect)
return out.subregion(0, 0, std::min(rect.position.x + rect.size.width, out.w()), out.h());
}
return out.subregion(0, 0,
std::min(rect.position.x + rect.size.width, out.w()),
std::min(rect.position.y + rect.size.height, out.h()));
std::min(rect.position.x + rect.size.width, out.w()),
std::min(rect.position.y + rect.size.height, out.h()));
}

void MaybeWrap(Point &characterPosition, int characterWidth, int rightMargin, int initialX, int lineHeight)
Expand All @@ -396,17 +397,23 @@ int GetLineStartX(UiFlags flags, const Rectangle &rect, int lineWidth)
}

uint32_t DoDrawString(const Surface &out, std::string_view text, Rectangle rect, Point &characterPosition,
int lineWidth, int rightMargin, int bottomMargin, GameFontTables size, text_color color, bool outline,
TextRenderOptions &opts)
int lineWidth, int rightMargin, int bottomMargin, GameFontTables size, text_color color, bool outline,
TextRenderOptions &opts)
{
CurrentFont currentFont;

char32_t next;
std::string_view remaining = text;
size_t cpLen;
std::u32string text32 = ConvertUtf8ToUtf32(text);
Copy link
Collaborator

@glebm glebm Nov 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We like to avoid allocations in code that runs on every frame, is it possible to implement this in a streaming fashion?

It looks like fribidi only supports UTF-32 but perhaps there is another library?
A quick search reveals https://github.com/Tehreer/SheenBidi, which supports UTF-8, and provides a number of handy functions for this. Looks like we can also replace our UTF-8 decoding implementations with calls to theirs.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you, 2 notes regarding this (from my point of view, please feel free to suggest otherwise)

  1. Because I am trying to find and handle each line separately, working with UTF-32 actually greatly simplify the indexing of the string, allowing simpler logic to get each line, which I find harder doing on UTF-8.
  2. I was having trouble setting up additional dependency as I am not yet experienced with cmake. I was able to set up linking with freebidi binary on linux because it is quite popular. I am open to alternative but I would need guidance for setting them up.

Copy link
Collaborator

@glebm glebm Nov 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because I am trying to find and handle each line separately, working with UTF-32 actually greatly simplify the indexing of the string, allowing simpler logic to get each line, which I find harder doing on UTF-8.

UTF-8 is a self-synchronizing encoding, which means explicit line breaks can be found simply via find('\n'), without the need for decoding. If you use UTF-8, you'd use code unit (byte) indices rather than code point indices throughout (which necessitates a BiDi library that supports UTF-8 natively).

I am open to alternative but I would need guidance for setting them up.

It's quite simple, add a file 3rdParty/SheenBidi/CMakeLists.txt with:

include(functions/FetchContent_MakeAvailableExcludeFromAll)

include(FetchContent)
FetchContent_Declare(SheenBidi
    URL https://github.com/glebm/SheenBidi/archive/3721411b5ec2862240f34aeeb3e8ba59283ec2ec.tar.gz
    URL_HASH MD5=7dc0138bda1e16b217cee66cfd95a6f9
)
FetchContent_MakeAvailableExcludeFromAll(SheenBidi)

The target to link to is SheenBidi::sheenbidi.

std::u32string_view remaining = text32;

std::string rem;
uint32_t bytesDrawn = 0;

size_t lineStart = 0;
size_t currPos = 0;
std::u32string line;

const auto maybeDrawCursor = [&]() {
if (opts.cursorPosition == static_cast<int>(text.size() - remaining.size())) {
if (opts.cursorPosition == static_cast<int>(text.size() - rem.size())) {
Point position = characterPosition;
MaybeWrap(position, 2, rightMargin, position.x, opts.lineHeight);
if (GetAnimationFrame(2, 500) != 0) {
Expand All @@ -420,9 +427,57 @@ uint32_t DoDrawString(const Surface &out, std::string_view text, Rectangle rect,
}
};

for (; !remaining.empty() && remaining[0] != '\0'
&& (next = DecodeFirstUtf8CodePoint(remaining, &cpLen)) != Utf8DecodeError;
remaining.remove_prefix(cpLen)) {
const auto drawSingleLine = [&]() {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a little bit unclear at a glance what this lambda captures, consider extracting the contents of the lambda into a function.


line = ConvertLogicalToVisual(text32.substr(lineStart, currPos - lineStart));
AJenbo marked this conversation as resolved.
Show resolved Hide resolved

for (uint16_t j = lineStart; j < currPos; j++) {
remaining = text32.substr(j);
rem = ConvertUtf32ToUtf8(remaining);

char32_t cnext = line[j - lineStart];

if (cnext == ZWSP)
continue;

if (!currentFont.load(size, color, cnext)) {
cnext = U'?';
if (!currentFont.load(size, color, cnext)) {
app_fatal("Missing fonts");
}
}

const uint8_t frame = cnext & 0xFF;
const ClxSprite glyph = (*currentFont.sprite)[frame];
const int w = glyph.width();
const auto byteIndex = static_cast<int>(text.size() - rem.size());

// Draw highlight
if (byteIndex >= opts.highlightRange.begin && byteIndex < opts.highlightRange.end) {
size_t cpLen;
DecodeFirstUtf8CodePoint(rem, &cpLen);
const bool lastInRange = static_cast<int>(byteIndex + cpLen) == opts.highlightRange.end;
FillRect(out, characterPosition.x, characterPosition.y,
glyph.width() + (lastInRange ? 0 : opts.spacing), glyph.height(),
opts.highlightColor);
}

DrawFont(out, characterPosition, glyph, color, outline);
maybeDrawCursor();
characterPosition.x += w + opts.spacing;
}
remaining = text32.substr(currPos);
rem = ConvertUtf32ToUtf8(remaining);
maybeDrawCursor();
};

int16_t lineX = characterPosition.x;


for (currPos = 0; currPos < text32.size() && text32[currPos] != U'\0'; currPos++) {
char32_t next = text32[currPos];
bytesDrawn += ConvertUtf32ToUtf8(text32.substr(currPos, currPos + 1)).size();

if (next == ZWSP)
continue;

Expand All @@ -433,44 +488,37 @@ uint32_t DoDrawString(const Surface &out, std::string_view text, Rectangle rect,
}
}

const uint8_t frame = next & 0xFF;
const uint16_t width = (*currentFont.sprite)[frame].width();
const uint8_t cframe = next & 0xFF;
const uint16_t width = (*currentFont.sprite)[cframe].width();
if (next == U'\n' || characterPosition.x + width > rightMargin) {
if (next == '\n')
maybeDrawCursor();
drawSingleLine();

lineStart = currPos + 1;

const int nextLineY = characterPosition.y + opts.lineHeight;
if (nextLineY >= bottomMargin)
break;
characterPosition.y = nextLineY;

if (HasAnyOf(opts.flags, (UiFlags::AlignCenter | UiFlags::AlignRight))) {
lineWidth = width;
if (remaining.size() > cpLen)
lineWidth += opts.spacing + GetLineWidth(remaining.substr(cpLen), size, opts.spacing);
if (text32.substr(currPos).size() > 1)
lineWidth += opts.spacing + GetLineWidth(ConvertUtf32ToUtf8(text32.substr(currPos + 1)), size, opts.spacing);
}
characterPosition.x = GetLineStartX(opts.flags, rect, lineWidth);
lineX = characterPosition.x;

if (next == U'\n')
continue;
}

const ClxSprite glyph = (*currentFont.sprite)[frame];
const auto byteIndex = static_cast<int>(text.size() - remaining.size());

// Draw highlight
if (byteIndex >= opts.highlightRange.begin && byteIndex < opts.highlightRange.end) {
const bool lastInRange = static_cast<int>(byteIndex + cpLen) == opts.highlightRange.end;
FillRect(out, characterPosition.x, characterPosition.y,
glyph.width() + (lastInRange ? 0 : opts.spacing), glyph.height(),
opts.highlightColor);
}

DrawFont(out, characterPosition, glyph, color, outline);
maybeDrawCursor();
characterPosition.x += width + opts.spacing;
lineX += width + opts.spacing;
}
maybeDrawCursor();
return static_cast<uint32_t>(remaining.data() - text.data());

drawSingleLine();
return bytesDrawn;
}

} // namespace
Expand Down Expand Up @@ -605,7 +653,7 @@ std::string WordWrapString(std::string_view text, unsigned width, GameFontTables
CurrentFont currentFont;

char32_t codepoint = U'\0'; // the current codepoint
char32_t nextCodepoint; // the next codepoint
char32_t nextCodepoint; // the next codepoint
std::size_t nextCodepointLen;
std::string_view remaining = text;
nextCodepoint = DecodeFirstUtf8CodePoint(remaining, &nextCodepointLen);
Expand Down Expand Up @@ -716,7 +764,7 @@ uint32_t DrawString(const Surface &out, std::string_view text, const Rectangle &
}

const uint32_t bytesDrawn = DoDrawString(clippedOut, text, rect, characterPosition,
lineWidth, rightMargin, bottomMargin, size, color, outlined, opts);
lineWidth, rightMargin, bottomMargin, size, color, outlined, opts);

if (HasAnyOf(opts.flags, UiFlags::PentaCursor)) {
const ClxSprite sprite = (*pSPentSpn2Cels)[PentSpn2Spin()];
Expand Down Expand Up @@ -769,16 +817,16 @@ void DrawStringWithColors(const Surface &out, std::string_view fmt, DrawStringFo
FmtArgParser fmtArgParser { fmt, args, argsLen };
size_t cpLen;
for (; !remaining.empty() && remaining[0] != '\0'
&& (next = DecodeFirstUtf8CodePoint(remaining, &cpLen)) != Utf8DecodeError;
remaining.remove_prefix(cpLen), prev = next) {
&& (next = DecodeFirstUtf8CodePoint(remaining, &cpLen)) != Utf8DecodeError;
remaining.remove_prefix(cpLen), prev = next) {
if (((prev == U'{' || prev == U'}') && prev == next)
|| next == ZWSP)
|| next == ZWSP)
continue;

const std::optional<std::size_t> fmtArgPos = fmtArgParser(remaining);
if (fmtArgPos) {
DoDrawString(clippedOut, args[*fmtArgPos].GetFormatted(), rect, characterPosition, lineWidth, rightMargin, bottomMargin, size,
GetColorFromFlags(args[*fmtArgPos].GetFlags()), outlined, opts);
GetColorFromFlags(args[*fmtArgPos].GetFlags()), outlined, opts);
// `fmtArgParser` has already consumed `remaining`. Ensure the loop doesn't consume any more.
cpLen = 0;
// The loop assigns `prev = next`. We want `prev` to be `\0` after this.
Expand Down
1 change: 1 addition & 0 deletions Source/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1245,6 +1245,7 @@ void OptionEntryLanguageCode::CheckLanguagesAreInitialized() const
languages.emplace_back("sv", "Svenska");
languages.emplace_back("tr", "Türkçe");
languages.emplace_back("uk", "Українська");
languages.emplace_back("he", "עברית");

if (HaveExtraFonts()) {
languages.emplace_back("zh_CN", "汉语");
Expand Down
34 changes: 34 additions & 0 deletions Source/utils/unicode-bidi.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include <string>
#include <string_view>

#ifdef USE_FRIBIDI
#include <fribidi/fribidi.h>
#endif

namespace devilution {

std::u32string ConvertLogicalToVisual(std::u32string_view input)
{
#ifndef USE_FRIBIDI
return std::u32string(input);
#else
FriBidiChar *logical = new FriBidiChar[input.size()];
FriBidiChar *visual = new FriBidiChar[input.size()];

for (size_t i = 0; i < input.size(); i++) {
logical[i] = input[i];
}

FriBidiParType baseDirection = FRIBIDI_PAR_ON;
fribidi_log2vis(logical, input.size(), &baseDirection, visual, nullptr, nullptr, nullptr);

std::u32string result(reinterpret_cast<const char32_t *>(visual), input.size());

delete[] logical;
delete[] visual;

return result;
#endif
}

} // namespace devilution
11 changes: 11 additions & 0 deletions Source/utils/unicode-bidi.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#pragma once

#include <cstddef>
#include <string>
#include <string_view>

namespace devilution {

std::u32string ConvertLogicalToVisual(std::u32string_view input);

} // namespace devilution
26 changes: 26 additions & 0 deletions Source/utils/utf8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,32 @@

namespace devilution {

std::u32string ConvertUtf8ToUtf32(std::string_view input)
{
std::u32string result;
std::size_t len = 0;

while (!input.empty()) {
char32_t codepoint = DecodeFirstUtf8CodePoint(input, &len);
if (codepoint == Utf8DecodeError) {
break;
}
result.push_back(codepoint);
input.remove_prefix(len);
}

return result;
}

std::string ConvertUtf32ToUtf8(std::u32string_view input)
{
std::string result;
for (char32_t codepoint : input) {
AppendUtf8(codepoint, result);
}
return result;
}

char32_t DecodeFirstUtf8CodePoint(std::string_view input, std::size_t *len)
{
uint32_t codepoint = 0;
Expand Down
Loading