From 967a91c7ec49a64bce7d3cc14002f385939239b9 Mon Sep 17 00:00:00 2001 From: Chi Tsai Date: Fri, 13 Dec 2024 20:34:56 -0800 Subject: [PATCH] Add createFromUtf16 JSI method (#48211) Summary: Adding the default implementation for `createFromUtf16` method for JSI String and PropNameId. Changelog: [Internal] Differential Revision: D67070206 --- .../ReactCommon/jsi/jsi/decorator.h | 16 +++++ .../react-native/ReactCommon/jsi/jsi/jsi.cpp | 71 +++++++++++++++++++ .../react-native/ReactCommon/jsi/jsi/jsi.h | 34 +++++++++ .../ReactCommon/jsi/jsi/test/testlib.cpp | 43 +++++++++++ 4 files changed, 164 insertions(+) diff --git a/packages/react-native/ReactCommon/jsi/jsi/decorator.h b/packages/react-native/ReactCommon/jsi/jsi/decorator.h index b16ecc21831642..1940c3de3d1dd5 100644 --- a/packages/react-native/ReactCommon/jsi/jsi/decorator.h +++ b/packages/react-native/ReactCommon/jsi/jsi/decorator.h @@ -182,6 +182,10 @@ class RuntimeDecorator : public Base, private jsi::Instrumentation { PropNameID createPropNameIDFromString(const String& str) override { return plain_.createPropNameIDFromString(str); }; + PropNameID createPropNameIDFromUtf16(const char16_t* utf16, size_t length) + override { + return plain_.createPropNameIDFromUtf16(utf16, length); + } PropNameID createPropNameIDFromSymbol(const Symbol& sym) override { return plain_.createPropNameIDFromSymbol(sym); }; @@ -221,6 +225,9 @@ class RuntimeDecorator : public Base, private jsi::Instrumentation { String createStringFromUtf8(const uint8_t* utf8, size_t length) override { return plain_.createStringFromUtf8(utf8, length); }; + String createStringFromUtf16(const char16_t* utf16, size_t length) override { + return plain_.createStringFromUtf16(utf16, length); + } std::string utf8(const String& s) override { return plain_.utf8(s); } @@ -649,6 +656,11 @@ class WithRuntimeDecorator : public RuntimeDecorator { Around around{with_}; return RD::createPropNameIDFromUtf8(utf8, length); }; + PropNameID createPropNameIDFromUtf16(const char16_t* utf16, size_t length) + override { + Around around{with_}; + return RD::createPropNameIDFromUtf16(utf16, length); + } PropNameID createPropNameIDFromString(const String& str) override { Around around{with_}; return RD::createPropNameIDFromString(str); @@ -704,6 +716,10 @@ class WithRuntimeDecorator : public RuntimeDecorator { Around around{with_}; return RD::createStringFromUtf8(utf8, length); }; + String createStringFromUtf16(const char16_t* utf16, size_t length) override { + Around around{with_}; + return RD::createStringFromUtf16(utf16, length); + } std::string utf8(const String& s) override { Around around{with_}; return RD::utf8(s); diff --git a/packages/react-native/ReactCommon/jsi/jsi/jsi.cpp b/packages/react-native/ReactCommon/jsi/jsi/jsi.cpp index e2e4a6faad45a3..29c5b9f51c2800 100644 --- a/packages/react-native/ReactCommon/jsi/jsi/jsi.cpp +++ b/packages/react-native/ReactCommon/jsi/jsi/jsi.cpp @@ -163,6 +163,49 @@ std::u16string convertUTF8ToUTF16(const std::string& utf8) { return ret; } +// Given a unsigned number, which is less than 16, return the hex character. +char hexDigit(unsigned int x) { + return x < 10 ? '0' + x : 'A' + (x - 10); +} + +// Given a sequence of UTF 16 code units, return true if all code units are +// ASCII characters +bool isAllASCII(const char16_t* utf16, size_t length) { + auto* curr = utf16; + auto* end = curr + length; + for (; curr < end; ++curr) { + if (*curr > 0x7F) + return false; + } + return true; +} + +// Given a sequences of UTF 16 code units, return a string that explicitly +// expresses the code units +std::string getUtf16CodeUnitString(const char16_t* utf16, size_t length) { + std::string s; + // Every character will need the character escape "\u" + 4 hex digits. Plus 2 + // character for the opening and closing single quote. + s.reserve(6 * length + 2); + s.append("'"); + + auto* curr = utf16; + auto* end = curr + length; + for (; curr < end; ++curr) { + s.append("\\u"); + char16_t ch = *curr; + char buffer[4]; + + buffer[0] = hexDigit((ch & 0xf000) >> 12); + buffer[1] = hexDigit((ch & 0x0f00) >> 8); + buffer[2] = hexDigit((ch & 0x00f0) >> 4); + buffer[3] = hexDigit(ch & 0x000f); + s.append(buffer, 4); + } + s.append("'"); + return s; +} + } // namespace Buffer::~Buffer() = default; @@ -248,6 +291,34 @@ Value Runtime::createValueFromJsonUtf8(const uint8_t* json, size_t length) { return parseJson.call(*this, String::createFromUtf8(*this, json, length)); } +String Runtime::createStringFromUtf16(const char16_t* utf16, size_t length) { + if (isAllASCII(utf16, length)) { + std::string buffer(utf16, utf16 + length); + return createStringFromAscii(buffer.data(), length); + } + auto s = getUtf16CodeUnitString(utf16, length); + return global() + .getPropertyAsFunction(*this, "eval") + .call(*this, s) + .getString(*this); +} + +PropNameID Runtime::createPropNameIDFromUtf16( + const char16_t* utf16, + size_t length) { + if (isAllASCII(utf16, length)) { + std::string buffer(utf16, utf16 + length); + auto jsString = createStringFromAscii(buffer.data(), length); + return createPropNameIDFromString(jsString); + } + auto s = getUtf16CodeUnitString(utf16, length); + auto jsString = global() + .getPropertyAsFunction(*this, "eval") + .call(*this, s) + .getString(*this); + return createPropNameIDFromString(jsString); +} + std::u16string Runtime::utf16(const PropNameID& sym) { auto utf8Str = utf8(sym); return convertUTF8ToUTF16(utf8Str); diff --git a/packages/react-native/ReactCommon/jsi/jsi/jsi.h b/packages/react-native/ReactCommon/jsi/jsi/jsi.h index 6b59a8945257cc..f60c368e0fcdb7 100644 --- a/packages/react-native/ReactCommon/jsi/jsi/jsi.h +++ b/packages/react-native/ReactCommon/jsi/jsi/jsi.h @@ -306,6 +306,9 @@ class JSI_EXPORT Runtime { virtual PropNameID createPropNameIDFromUtf8( const uint8_t* utf8, size_t length) = 0; + virtual PropNameID createPropNameIDFromUtf16( + const char16_t* utf16, + size_t length); virtual PropNameID createPropNameIDFromString(const String& str) = 0; virtual PropNameID createPropNameIDFromSymbol(const Symbol& sym) = 0; virtual std::string utf8(const PropNameID&) = 0; @@ -322,6 +325,7 @@ class JSI_EXPORT Runtime { virtual String createStringFromAscii(const char* str, size_t length) = 0; virtual String createStringFromUtf8(const uint8_t* utf8, size_t length) = 0; + virtual String createStringFromUtf16(const char16_t* utf16, size_t length); virtual std::string utf8(const String&) = 0; // \return a \c Value created from a utf8-encoded JSON string. The default @@ -515,6 +519,21 @@ class JSI_EXPORT PropNameID : public Pointer { reinterpret_cast(utf8.data()), utf8.size()); } + /// Given a series of UTF-16 encoded code units, create a PropNameId. The + /// input may contain unpaired surrogates, which will be interpreted as a code + /// point of the same value. + static PropNameID + forUtf16(Runtime& runtime, const char16_t* utf16, size_t length) { + return runtime.createPropNameIDFromUtf16(utf16, length); + } + + /// Given a series of UTF-16 encoded code units stored inside std::u16string, + /// create a PropNameId. The input may contain unpaired surrogates, which + /// will be interpreted as a code point of the same value. + static PropNameID forUtf16(Runtime& runtime, const std::u16string& str) { + return runtime.createPropNameIDFromUtf16(str.data(), str.size()); + } + /// Create a PropNameID from a JS string. static PropNameID forString(Runtime& runtime, const jsi::String& str) { return runtime.createPropNameIDFromString(str); @@ -699,6 +718,21 @@ class JSI_EXPORT String : public Pointer { reinterpret_cast(utf8.data()), utf8.length()); } + /// Given a series of UTF-16 encoded code units, create a JS String. The input + /// may contain unpaired surrogates, which will be interpreted as a code point + /// of the same value. + static String + createFromUtf16(Runtime& runtime, const char16_t* utf16, size_t length) { + return runtime.createStringFromUtf16(utf16, length); + } + + /// Given a series of UTF-16 encoded code units stored inside std::u16string, + /// create a JS String. The input may contain unpaired surrogates, which will + /// be interpreted as a code point of the same value. + static String createFromUtf16(Runtime& runtime, const std::u16string& utf16) { + return runtime.createStringFromUtf16(utf16.data(), utf16.length()); + } + /// \return whether a and b contain the same characters. static bool strictEquals(Runtime& runtime, const String& a, const String& b) { return runtime.strictEquals(a, b); diff --git a/packages/react-native/ReactCommon/jsi/jsi/test/testlib.cpp b/packages/react-native/ReactCommon/jsi/jsi/test/testlib.cpp index f6d81f1021b084..91d69c03631341 100644 --- a/packages/react-native/ReactCommon/jsi/jsi/test/testlib.cpp +++ b/packages/react-native/ReactCommon/jsi/jsi/test/testlib.cpp @@ -1723,6 +1723,49 @@ TEST_P(JSITest, ObjectCreateWithPrototype) { EXPECT_TRUE(child.getPrototype(rd).isNull()); } +TEST_P(JSITest, CreateFromUtf16Test) { + // This Runtime Decorator is used to test the default createStringFromUtf16 + // and createPropNameIDFromUtf16 implementation for VMs that do not provide + // their own implementation + class RD : public RuntimeDecorator { + public: + RD(Runtime& rt) : RuntimeDecorator(rt) {} + + String createStringFromUtf16(const char16_t* utf16, size_t length) + override { + return Runtime::createStringFromUtf16(utf16, length); + } + + PropNameID createPropNameIDFromUtf16(const char16_t* utf16, size_t length) + override { + return Runtime::createPropNameIDFromUtf16(utf16, length); + } + }; + + RD rd = RD(rt); + std::u16string utf16 = u"foobar"; + + auto jsString = String::createFromUtf16(rd, utf16); + EXPECT_EQ(jsString.utf16(rd), utf16); + auto prop = PropNameID::forUtf16(rd, utf16); + EXPECT_EQ(prop.utf16(rd), utf16); + + utf16 = u"hello!👋"; + jsString = String::createFromUtf16(rd, utf16.data(), utf16.length()); + EXPECT_EQ(jsString.utf16(rd), utf16); + prop = PropNameID::forUtf16(rd, utf16); + EXPECT_EQ(prop.utf16(rd), utf16); + + utf16 = u"\xd83d"; + jsString = String::createFromUtf16(rd, utf16.data(), utf16.length()); + /// We need to use charCodeAt instead of UTF16 because the default + /// implementation of UTF16 converts to UTF8, then to UTF16, so we will lose + /// the lone surrogate value. + rd.global().setProperty(rd, "loneSurrogate", jsString); + auto cp = eval("loneSurrogate.charCodeAt(0)").getNumber(); + EXPECT_EQ(cp, 55357); // 0xD83D in decimal +} + INSTANTIATE_TEST_CASE_P( Runtimes, JSITest,