#pragma region CPL License /* Nuclex Native Framework Copyright (C) 2002-2023 Nuclex Development Labs This library is free software; you can redistribute it and/or modify it under the terms of the IBM Common Public License as published by the IBM Corporation; either version 1.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the IBM Common Public License for more details. You should have received a copy of the IBM Common Public License along with this library */ #pragma endregion // CPL License // If the library is compiled as a DLL, this ensures symbols are exported #define NUCLEX_SUPPORT_SOURCE 1 #include "Nuclex/Support/Text/StringConverter.h" #include namespace Nuclex { namespace Support { namespace Text { // ------------------------------------------------------------------------------------------- // TEST(StringConverterTest, CanConvertUtf8ToWide) { std::string text = u8"ฤƒัฃ๐” ีฎแปลฟฤฃศŸแŽฅ๐’‹วฉฤพแธฟ๊ž‘ศฏ๐˜ฑ๐‘ž๐—‹๐˜ดศถ๐ž„๐œˆฯˆ๐’™๐˜†๐šฃ"; EXPECT_EQ(StringConverter::WideFromUtf8(text), L"ฤƒัฃ๐” ีฎแปลฟฤฃศŸแŽฅ๐’‹วฉฤพแธฟ๊ž‘ศฏ๐˜ฑ๐‘ž๐—‹๐˜ดศถ๐ž„๐œˆฯˆ๐’™๐˜†๐šฃ"); text = u8"๐˜ˆแธ†๐–ข๐•ฏูคแธžิะว๐™…ฦ˜ิธโฒ˜๐™‰เงฆฮก๐—คษŒ๐“ขศšะฆ๐’ฑั ๐“งฦณศคังแ–ฏฤ‡๐—ฑแป…๐‘“๐™œแ‚น๐žฒ๐‘—๐’Œฤผแนƒล‰ะพ๐žŽ๐’’แตฒ๊œฑ๐™ฉแปซ๐—ลต๐’™๐’šลบ"; EXPECT_EQ( StringConverter::WideFromUtf8(text), L"๐˜ˆแธ†๐–ข๐•ฏูคแธžิะว๐™…ฦ˜ิธโฒ˜๐™‰เงฆฮก๐—คษŒ๐“ขศšะฆ๐’ฑั ๐“งฦณศคังแ–ฏฤ‡๐—ฑแป…๐‘“๐™œแ‚น๐žฒ๐‘—๐’Œฤผแนƒล‰ะพ๐žŽ๐’’แตฒ๊œฑ๐™ฉแปซ๐—ลต๐’™๐’šลบ" ); text = u8"ะแธ‚โฒค๐——๐–ค๐—™๊ž ๊“งศŠ๐‰๐œฅ๊“ก๐‘€๐‘ตวฌ๐™ฟ๐‘„ล–๐‘†๐’ฏ๐–ด๐˜๐˜ž๊“ซลธ๐œกแบฃ๐˜ขฦ€๐–ผแธ‹แบฟแตฎโ„Š๐™แŽฅ๐•›ะบฮนแนƒีคโฑบ๐“…๐˜ฒ๐•ฃ๐–˜ลง๐‘ขแนฝแบ‰๐˜…แƒงลพ"; EXPECT_EQ( StringConverter::WideFromUtf8(text), L"ะแธ‚โฒค๐——๐–ค๐—™๊ž ๊“งศŠ๐‰๐œฅ๊“ก๐‘€๐‘ตวฌ๐™ฟ๐‘„ล–๐‘†๐’ฏ๐–ด๐˜๐˜ž๊“ซลธ๐œกแบฃ๐˜ขฦ€๐–ผแธ‹แบฟแตฎโ„Š๐™แŽฅ๐•›ะบฮนแนƒีคโฑบ๐“…๐˜ฒ๐•ฃ๐–˜ลง๐‘ขแนฝแบ‰๐˜…แƒงลพ" ); text = u8"ัฆ๐™ฑฦ‡แ—žฮฃโ„ฑิาคูก๐”ะš๐“›๐“œฦศŽ๐šธ๐‘„แนš๐“ขแนฎแนบฦฒแ”๊“ซ๐šˆ๐šญ๐œถแรงแƒซ๐‘’๐–ฟ๐—€แธง๐—‚๐ฃาษญแธฟ๐•Ÿ๐จ๐”๐•ขแน›๐“ผั‚รบ๐”ณแบƒโคฌ๐ฒ๐—“"; EXPECT_EQ( StringConverter::WideFromUtf8(text), L"ัฆ๐™ฑฦ‡แ—žฮฃโ„ฑิาคูก๐”ะš๐“›๐“œฦศŽ๐šธ๐‘„แนš๐“ขแนฎแนบฦฒแ”๊“ซ๐šˆ๐šญ๐œถแรงแƒซ๐‘’๐–ฟ๐—€แธง๐—‚๐ฃาษญแธฟ๐•Ÿ๐จ๐”๐•ขแน›๐“ผั‚รบ๐”ณแบƒโคฌ๐ฒ๐—“" ); text = u8"๐– ฮ’๐’ž๐˜‹๐™ด๐“•ฤขศžแปˆ๐•ต๊“—สŸ๐™ผโ„•เงฆ๐šธ๐—คี€๊“ขแนฐว“โ…ค๐”šโฒฌ๐‘Œ๐™•๐˜ข๐•ค"; EXPECT_EQ( StringConverter::WideFromUtf8(text), L"๐– ฮ’๐’ž๐˜‹๐™ด๐“•ฤขศžแปˆ๐•ต๊“—สŸ๐™ผโ„•เงฆ๐šธ๐—คี€๊“ขแนฐว“โ…ค๐”šโฒฌ๐‘Œ๐™•๐˜ข๐•ค" ); } // ------------------------------------------------------------------------------------------- // TEST(StringConverterTest, CanConvertWideToUtf8) { std::wstring text = L"ฤƒัฃ๐” ีฎแปลฟฤฃศŸแŽฅ๐’‹วฉฤพแธฟ๊ž‘ศฏ๐˜ฑ๐‘ž๐—‹๐˜ดศถ๐ž„๐œˆฯˆ๐’™๐˜†๐šฃ"; EXPECT_EQ(StringConverter::Utf8FromWide(text), u8"ฤƒัฃ๐” ีฎแปลฟฤฃศŸแŽฅ๐’‹วฉฤพแธฟ๊ž‘ศฏ๐˜ฑ๐‘ž๐—‹๐˜ดศถ๐ž„๐œˆฯˆ๐’™๐˜†๐šฃ"); text = L"๐˜ˆแธ†๐–ข๐•ฏูคแธžิะว๐™…ฦ˜ิธโฒ˜๐™‰เงฆฮก๐—คษŒ๐“ขศšะฆ๐’ฑั ๐“งฦณศคังแ–ฏฤ‡๐—ฑแป…๐‘“๐™œแ‚น๐žฒ๐‘—๐’Œฤผแนƒล‰ะพ๐žŽ๐’’แตฒ๊œฑ๐™ฉแปซ๐—ลต๐’™๐’šลบ"; EXPECT_EQ( StringConverter::Utf8FromWide(text), u8"๐˜ˆแธ†๐–ข๐•ฏูคแธžิะว๐™…ฦ˜ิธโฒ˜๐™‰เงฆฮก๐—คษŒ๐“ขศšะฆ๐’ฑั ๐“งฦณศคังแ–ฏฤ‡๐—ฑแป…๐‘“๐™œแ‚น๐žฒ๐‘—๐’Œฤผแนƒล‰ะพ๐žŽ๐’’แตฒ๊œฑ๐™ฉแปซ๐—ลต๐’™๐’šลบ" ); text = L"ะแธ‚โฒค๐——๐–ค๐—™๊ž ๊“งศŠ๐‰๐œฅ๊“ก๐‘€๐‘ตวฌ๐™ฟ๐‘„ล–๐‘†๐’ฏ๐–ด๐˜๐˜ž๊“ซลธ๐œกแบฃ๐˜ขฦ€๐–ผแธ‹แบฟแตฎโ„Š๐™แŽฅ๐•›ะบฮนแนƒีคโฑบ๐“…๐˜ฒ๐•ฃ๐–˜ลง๐‘ขแนฝแบ‰๐˜…แƒงลพ"; EXPECT_EQ( StringConverter::Utf8FromWide(text), u8"ะแธ‚โฒค๐——๐–ค๐—™๊ž ๊“งศŠ๐‰๐œฅ๊“ก๐‘€๐‘ตวฌ๐™ฟ๐‘„ล–๐‘†๐’ฏ๐–ด๐˜๐˜ž๊“ซลธ๐œกแบฃ๐˜ขฦ€๐–ผแธ‹แบฟแตฎโ„Š๐™แŽฅ๐•›ะบฮนแนƒีคโฑบ๐“…๐˜ฒ๐•ฃ๐–˜ลง๐‘ขแนฝแบ‰๐˜…แƒงลพ" ); text = L"ัฆ๐™ฑฦ‡แ—žฮฃโ„ฑิาคูก๐”ะš๐“›๐“œฦศŽ๐šธ๐‘„แนš๐“ขแนฎแนบฦฒแ”๊“ซ๐šˆ๐šญ๐œถแรงแƒซ๐‘’๐–ฟ๐—€แธง๐—‚๐ฃาษญแธฟ๐•Ÿ๐จ๐”๐•ขแน›๐“ผั‚รบ๐”ณแบƒโคฌ๐ฒ๐—“"; EXPECT_EQ( StringConverter::Utf8FromWide(text), u8"ัฆ๐™ฑฦ‡แ—žฮฃโ„ฑิาคูก๐”ะš๐“›๐“œฦศŽ๐šธ๐‘„แนš๐“ขแนฎแนบฦฒแ”๊“ซ๐šˆ๐šญ๐œถแรงแƒซ๐‘’๐–ฟ๐—€แธง๐—‚๐ฃาษญแธฟ๐•Ÿ๐จ๐”๐•ขแน›๐“ผั‚รบ๐”ณแบƒโคฌ๐ฒ๐—“" ); text = L"๐– ฮ’๐’ž๐˜‹๐™ด๐“•ฤขศžแปˆ๐•ต๊“—สŸ๐™ผโ„•เงฆ๐šธ๐—คี€๊“ขแนฐว“โ…ค๐”šโฒฌ๐‘Œ๐™•๐˜ข๐•ค"; EXPECT_EQ( StringConverter::Utf8FromWide(text), u8"๐– ฮ’๐’ž๐˜‹๐™ด๐“•ฤขศžแปˆ๐•ต๊“—สŸ๐™ผโ„•เงฆ๐šธ๐—คี€๊“ขแนฐว“โ…ค๐”šโฒฌ๐‘Œ๐™•๐˜ข๐•ค" ); } // ------------------------------------------------------------------------------------------- // TEST(StringConverterTest, Utf8StringsCanBeCaseFolded) { std::string variant1 = u8"HeLlO wOrLd ร„ รถ รœ ฮป ฮฆ ฮด แบž"; std::string variant2 = u8"hElLo WoRlD รค ร– รผ ฮ› ฯ† ฮ” รŸ"; std::string wrong1 = u8"hElLo WoRlD A o U ฮป ฮฆ ฮ” B"; std::string wrong2 = u8"hElLo WoRlD รค รถ รผ ^ & โˆฉ b"; EXPECT_EQ( StringConverter::FoldedLowercaseFromUtf8(variant1), StringConverter::FoldedLowercaseFromUtf8(variant2) ); EXPECT_NE( StringConverter::FoldedLowercaseFromUtf8(variant1), StringConverter::FoldedLowercaseFromUtf8(wrong1) ); EXPECT_NE( StringConverter::FoldedLowercaseFromUtf8(variant2), StringConverter::FoldedLowercaseFromUtf8(wrong2) ); } // ------------------------------------------------------------------------------------------- // TEST(StringConverterTest, Utf8CharactersCanBeCounted) { // Symbols that require 4 bytes to represent in UTF-8. If your IDE doesn't display this, // that's fine. Just, maybe, don't save this file! // (it's a fly, a snake, an alligator and a fish symbol from egyptian hieroglyphs) std::string fourByteSymbols = u8"๐“†ฆ๐“†“๐“†Œ๐“†Ÿ"; EXPECT_EQ(StringConverter::CountUtf8Letters(fourByteSymbols), 4U); // Also try with some less exotic UTF-8 characters using 2 or 3 bytes. std::string otherSymbols = u8"๐– ฮ’๐’ž๐˜‹๐™ด๐“•ฤขศžแปˆ๐•ต๊“—สŸ๐™ผโ„•เงฆ๐šธ๐—คี€๊“ขแนฐว“โ…ค๐”šโฒฌ๐‘Œ๐™•๐˜ข๐•ค"; EXPECT_EQ(StringConverter::CountUtf8Letters(otherSymbols), 28U); } // ------------------------------------------------------------------------------------------- // }}} // namespace Nuclex::Support::Text