Skip to content

Commit

Permalink
Merge pull request mixxxdj#13935 from Swiftb0y/review/christophehenry…
Browse files Browse the repository at this point in the history
…/gh13772-charset-encoding-salvage

Salvage mixxxdj#13772
  • Loading branch information
JoergAtGithub authored Dec 17, 2024
2 parents a4e61f9 + 3101eff commit fc59bff
Show file tree
Hide file tree
Showing 5 changed files with 269 additions and 2 deletions.
37 changes: 37 additions & 0 deletions res/controllers/engine-api.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -304,4 +304,41 @@ declare namespace engine {
* SoftStart with low factors would take a while until sound is audible. [default = 1.0]
*/
function softStart(deck: number, activate: boolean, factor?: number): void;

enum Charset {
ASCII, // American Standard Code for Information Interchange (7-Bit)
UTF_8, // Unicode Transformation Format (8-Bit)
UTF_16LE, // UTF-16 for Little-Endian devices (ARM, x86)
UTF_16BE, // UTF-16 for Big-Endian devices (MIPS, PPC)
UTF_32LE, // UTF-32 for Little-Endian devices (ARM, x86)
UTF_32BE, // UTF-32 for Big-Endian devices (MIPS, PPC)
CentralEurope, // Windows_1250 which includes all characters of ISO_8859_2
Cyrillic, // Windows_1251 which includes all characters of ISO_8859_5
Latin1, // Windows_1252 which includes all characters of ISO_8859_1
Greek, // Windows_1253 which includes all characters of ISO_8859_7
Turkish, // Windows_1254 which includes all characters of ISO_8859_9
Hebrew, // Windows_1255 which includes all characters of ISO_8859_8
Arabic, // Windows_1256 which includes all characters of ISO_8859_6
Baltic, // Windows_1257 which includes all characters of ISO_8859_13
Vietnamese, // Windows_1258 which includes all characters of ISO_8859_14
Latin9, // ISO_8859_15
Shift_JIS, // Japanese Industrial Standard (JIS X 0208)
EUC_JP, // Extended Unix Code for Japanese
EUC_KR, // Extended Unix Code for Korean
Big5_HKSCS, // Includes all characters of Big5 and the Hong Kong Supplementary Character Set (HKSCS)
KOI8_U, // Includes all characters of KOI8_R for Russian language and adds Ukrainian language characters
UCS2, // Universal Character Set (2-Byte) ISO_10646
SCSU, // Standard Compression Scheme for Unicode
BOCU_1, // Binary Ordered Compression for Unicode
CESU_8 // Compatibility Encoding Scheme for UTF-16 (8-Bit)
}

/**
* Converts a string into another charset.
*
* @param value The string to encode
* @param targetCharset The charset to encode the string into.
* @returns The converted String as an array of bytes. Will return an empty buffer on conversion error or unavailable charset.
*/
function convertCharset(targetCharset: Charset, value: string): ArrayBuffer
}
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,10 @@ bool ControllerScriptEngineLegacy::initialize() {
ControllerScriptInterfaceLegacy* legacyScriptInterface =
new ControllerScriptInterfaceLegacy(this, m_logger);

engineGlobalObject.setProperty(
"engine", m_pJSEngine->newQObject(legacyScriptInterface));
auto engine = m_pJSEngine->newQObject(legacyScriptInterface);
auto meta = m_pJSEngine->newQMetaObject(&ControllerScriptInterfaceLegacy::staticMetaObject);
engine.setProperty("Charset", meta);
engineGlobalObject.setProperty("engine", m_pJSEngine->newQObject(legacyScriptInterface));

#ifdef MIXXX_USE_QML
if (m_bQmlMode) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "controllerscriptinterfacelegacy.h"

#include <QStringEncoder>
#include <gsl/pointers>

#include "control/controlobject.h"
Expand Down Expand Up @@ -1052,3 +1053,79 @@ void ControllerScriptInterfaceLegacy::softStart(int deck, bool activate, double
// activate the ramping in scratchProcess()
m_ramp[deck] = true;
}

QByteArray ControllerScriptInterfaceLegacy::convertCharset(
const ControllerScriptInterfaceLegacy::Charset targetCharset,
const QString& value) {
using enum Charset;
switch (targetCharset) {
case ASCII:
return convertCharsetInternal(QStringLiteral("US-ASCII"), value);
case UTF_8:
return convertCharsetInternal(QStringLiteral("UTF-8"), value);
case UTF_16LE:
return convertCharsetInternal(QStringLiteral("UTF-16LE"), value);
case UTF_16BE:
return convertCharsetInternal(QStringLiteral("UTF-16BE"), value);
case UTF_32LE:
return convertCharsetInternal(QStringLiteral("UTF-32LE"), value);
case UTF_32BE:
return convertCharsetInternal(QStringLiteral("UTF-32BE"), value);
case CentralEurope:
return convertCharsetInternal(QStringLiteral("windows-1250"), value);
case Cyrillic:
return convertCharsetInternal(QStringLiteral("windows-1251"), value);
case Latin1:
return convertCharsetInternal(QStringLiteral("windows-1252"), value);
case Greek:
return convertCharsetInternal(QStringLiteral("windows-1253"), value);
case Turkish:
return convertCharsetInternal(QStringLiteral("windows-1254"), value);
case Hebrew:
return convertCharsetInternal(QStringLiteral("windows-1255"), value);
case Arabic:
return convertCharsetInternal(QStringLiteral("windows-1256"), value);
case Baltic:
return convertCharsetInternal(QStringLiteral("windows-1257"), value);
case Vietnamese:
return convertCharsetInternal(QStringLiteral("windows-1258"), value);
case Latin9:
return convertCharsetInternal(QStringLiteral("ISO-8859-15"), value);
case Shift_JIS:
return convertCharsetInternal(QStringLiteral("Shift_JIS"), value);
case EUC_JP:
return convertCharsetInternal(QStringLiteral("EUC-JP"), value);
case EUC_KR:
return convertCharsetInternal(QStringLiteral("EUC-KR"), value);
case Big5_HKSCS:
return convertCharsetInternal(QStringLiteral("Big5-HKSCS"), value);
case KOI8_U:
return convertCharsetInternal(QStringLiteral("KOI8-U"), value);
case UCS2:
return convertCharsetInternal(QStringLiteral("ISO-10646-UCS-2"), value);
case SCSU:
return convertCharsetInternal(QStringLiteral("SCSU"), value);
case BOCU_1:
return convertCharsetInternal(QStringLiteral("BOCU-1"), value);
case CESU_8:
return convertCharsetInternal(QStringLiteral("CESU-8"), value);
}
m_pScriptEngineLegacy->logOrThrowError(QStringLiteral("Unknown charset specified"));
return QByteArray();
}

QByteArray ControllerScriptInterfaceLegacy::convertCharsetInternal(
const QString& targetCharset, const QString& value) {
#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0)
QAnyStringView encoderName = QAnyStringView(targetCharset);
#else
QByteArray encoderNameArray = targetCharset.toUtf8();
const char* encoderName = encoderNameArray.constData();
#endif
QStringEncoder fromUtf16 = QStringEncoder(encoderName);
if (!fromUtf16.isValid()) {
m_pScriptEngineLegacy->logOrThrowError(QStringLiteral("Unable to open encoder"));
return QByteArray();
}
return fromUtf16(value);
}
40 changes: 40 additions & 0 deletions src/controllers/scripting/legacy/controllerscriptinterfacelegacy.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,38 @@ class ConfigKey;
class ControllerScriptInterfaceLegacy : public QObject {
Q_OBJECT
public:
// NOTE: these enumerator names are exposed to the JS engine! Removal/Changing of
// any name is likely breaking. Only add more and only remove enumerators if
// they're broken to begin with.
enum class Charset {
ASCII,
UTF_8,
UTF_16LE,
UTF_16BE,
UTF_32LE,
UTF_32BE,
CentralEurope,
Cyrillic,
Latin1,
Greek,
Turkish,
Hebrew,
Arabic,
Baltic,
Vietnamese,
Latin9,
Shift_JIS,
EUC_JP,
EUC_KR,
Big5_HKSCS,
KOI8_U,
UCS2,
SCSU,
BOCU_1,
CESU_8
};
Q_ENUM(Charset)

ControllerScriptInterfaceLegacy(ControllerScriptEngineLegacy* m_pEngine,
const RuntimeLoggingCategory& logger);

Expand Down Expand Up @@ -72,6 +104,11 @@ class ControllerScriptInterfaceLegacy : public QObject {
const double rate = -10.0);
Q_INVOKABLE void softStart(const int deck, bool activate, double factor = 1.0);

Q_INVOKABLE QByteArray convertCharset(
const ControllerScriptInterfaceLegacy::Charset
targetCharset,
const QString& value);

bool removeScriptConnection(const ScriptConnection& conn);
/// Execute a ScriptConnection's JS callback
void triggerScriptConnection(const ScriptConnection& conn);
Expand All @@ -84,6 +121,9 @@ class ControllerScriptInterfaceLegacy : public QObject {
const QString& name,
const QJSValue& callback,
bool skipSuperseded = false);

QByteArray convertCharsetInternal(const QString& targetCharset, const QString& value);

QHash<ConfigKey, ControlObjectScript*> m_controlCache;
ControlObjectScript* getControlObjectScript(const QString& group, const QString& name);

Expand Down
111 changes: 111 additions & 0 deletions src/test/controllerscriptenginelegacy_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include <gmock/gmock.h>
#include <gtest/gtest.h>

#include <QByteArrayView>
#include <QMetaEnum>
#include <QScopedPointer>
#include <QTemporaryFile>
#include <QThread>
Expand All @@ -12,6 +14,7 @@

#include "control/controlobject.h"
#include "control/controlpotmeter.h"
#include "controllers/scripting/legacy/controllerscriptinterfacelegacy.h"
#ifdef MIXXX_USE_QML
#include <QQuickItem>

Expand Down Expand Up @@ -658,6 +661,114 @@ TEST_F(ControllerScriptEngineLegacyTest, connectionExecutesWithCorrectThisObject
EXPECT_DOUBLE_EQ(1.0, pass->get());
}


TEST_F(ControllerScriptEngineLegacyTest, convertCharsetCorrectValueStringCharset) {
const auto result = evaluate(
"engine.convertCharset(engine.Charset.Latin9, 'Hello!')");

EXPECT_EQ(qjsvalue_cast<QByteArray>(result),
QByteArrayView::fromArray({'\x48', '\x65', '\x6c', '\x6c', '\x6f', '\x21'}));
}

TEST_F(ControllerScriptEngineLegacyTest, convertCharsetUnsupportedChars) {
auto result = qjsvalue_cast<QByteArray>(
evaluate("engine.convertCharset(engine.Charset.Latin9, 'مايأ نامز')"));
char sub = '\x1A'; // ASCII/Latin9 SUB character
EXPECT_EQ(result,
QByteArrayView::fromArray(
{sub, sub, sub, sub, '\x20', sub, sub, sub, sub}));
}

TEST_F(ControllerScriptEngineLegacyTest, convertCharsetMultiByteEncoding) {
auto result = qjsvalue_cast<QByteArray>(
evaluate("engine.convertCharset(engine.Charset.UTF_16LE, 'مايأ نامز')"));
EXPECT_EQ(result,
QByteArrayView::fromArray({'\x45',
'\x06',
'\x27',
'\x06',
'\x4A',
'\x06',
'\x23',
'\x06',
'\x20',
'\x00',
'\x46',
'\x06',
'\x27',
'\x06',
'\x45',
'\x06',
'\x32',
'\x06'}));
}

#define COMPLICATEDSTRINGLITERAL "Hello, 世界! שלום! こんにちは! 안녕하세요! 😊"

static int convertedCharsetForString(ControllerScriptInterfaceLegacy::Charset charset) {
// the expected length after conversion of COMPLICATEDSTRINGLITERAL
using enum ControllerScriptInterfaceLegacy::Charset;
switch (charset) {
case UTF_8:
return 63;
case UTF_16LE:
case UTF_16BE:
return 66;
case UTF_32LE:
case UTF_32BE:
return 128;
case ASCII:
case CentralEurope:
case Cyrillic:
case Latin1:
case Greek:
case Turkish:
case Hebrew:
case Arabic:
case Baltic:
case Vietnamese:
case Latin9:
case KOI8_U:
return 32;
case Shift_JIS:
case EUC_JP:
case EUC_KR:
case Big5_HKSCS:
return 49;
case UCS2:
return 68;
case SCSU:
return 51;
case BOCU_1:
return 53;
case CESU_8:
return 65;
}
// unreachable, but gtest does not offer a way to assert this here.
// returning 0 will almost certainly also result in a failure.
return 0;
}

TEST_F(ControllerScriptEngineLegacyTest, convertCharsetAllCharset) {
QMetaEnum charsetEnumEntry = QMetaEnum::fromType<
ControllerScriptInterfaceLegacy::Charset>();

for (int i = 0; i < charsetEnumEntry.keyCount(); ++i) {
QString key = charsetEnumEntry.key(i);
auto enumValue =
static_cast<ControllerScriptInterfaceLegacy::Charset>(
charsetEnumEntry.value(i));
QString source = QStringLiteral(
"engine.convertCharset(engine.Charset.%1, "
"'" COMPLICATEDSTRINGLITERAL "')")
.arg(key);
auto result = qjsvalue_cast<QByteArray>(evaluate(source));
EXPECT_EQ(result.size(), convertedCharsetForString(enumValue))
<< "Unexpected length of converted string for encoding: '"
<< key.toStdString() << "'";
}
}

#ifdef MIXXX_USE_QML
class MockScreenRender : public ControllerRenderingEngine {
public:
Expand Down

0 comments on commit fc59bff

Please sign in to comment.