1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2024-12-05 01:24:14 +01:00
dxvk/src/util/util_string.cpp
Joshua Ashton 3fddc364ee [util] Fix UTF8 encodeTypedChar for 4 byte chars
Some flipped logic here...
2023-06-10 13:58:33 +02:00

235 lines
5.1 KiB
C++

#include "util_string.h"
namespace dxvk::str {
const uint8_t* decodeTypedChar(
const uint8_t* begin,
const uint8_t* end,
uint32_t& ch) {
uint32_t first = begin[0];
if (likely(first < 0x80)) {
// Basic ASCII character
ch = uint32_t(first);
return begin + 1;
} else if (unlikely(first < 0xC0)) {
// Character starts with a continuation byte,
// just skip until we find the next valid prefix
while ((begin < end) && (((*begin) & 0xC0) == 0x80))
begin += 1;
ch = uint32_t('?');
return begin;
} else {
// The number of leading 1 bits in the first byte
// determines the length of this character
size_t length = bit::lzcnt((~first) << 24);
if (unlikely(begin + length > end)) {
ch = uint32_t('?');
return end;
}
if (first < 0xE0) {
ch = ((uint32_t(begin[0]) & 0x1F) << 6)
| ((uint32_t(begin[1]) & 0x3F));
} else if (first < 0xF0) {
ch = ((uint32_t(begin[0]) & 0x0F) << 12)
| ((uint32_t(begin[1]) & 0x3F) << 6)
| ((uint32_t(begin[2]) & 0x3F));
} else if (first < 0xF8) {
ch = ((uint32_t(begin[0]) & 0x07) << 18)
| ((uint32_t(begin[1]) & 0x3F) << 12)
| ((uint32_t(begin[2]) & 0x3F) << 6)
| ((uint32_t(begin[3]) & 0x3F));
} else {
// Invalid prefix
ch = uint32_t('?');
}
return begin + length;
}
}
const uint16_t* decodeTypedChar(
const uint16_t* begin,
const uint16_t* end,
uint32_t& ch) {
uint32_t first = begin[0];
if (likely(first < 0xD800)) {
ch = first;
return begin + 1;
} else if (first < 0xDC00) {
if (unlikely(begin + 2 > end)) {
ch = uint32_t('?');
return end;
}
ch = 0x10000
+ ((uint32_t(begin[0]) & 0x3FF) << 10)
+ ((uint32_t(begin[1]) & 0x3FF));
return begin + 2;
} else if (unlikely(first < 0xE000)) {
// Stray low surrogate
ch = uint32_t('?');
return begin + 1;
} else {
ch = first;
return begin + 1;
}
}
const uint32_t* decodeTypedChar(
const uint32_t* begin,
const uint32_t* end,
uint32_t& ch) {
ch = begin[0];
return begin + 1;
}
size_t encodeTypedChar(
uint8_t* begin,
uint8_t* end,
uint32_t ch) {
if (likely(ch < 0x80)) {
if (begin) {
if (unlikely(begin + 1 > end))
return 0;
begin[0] = uint8_t(ch);
}
return 1;
} else if (ch < 0x800) {
if (begin) {
if (unlikely(begin + 2 > end))
return 0;
begin[0] = uint8_t(0xC0 | (ch >> 6));
begin[1] = uint8_t(0x80 | (ch & 0x3F));
}
return 2;
} else if (ch < 0x10000) {
if (begin) {
if (unlikely(begin + 3 > end))
return 0;
begin[0] = uint8_t(0xE0 | ((ch >> 12)));
begin[1] = uint8_t(0x80 | ((ch >> 6) & 0x3F));
begin[2] = uint8_t(0x80 | ((ch >> 0) & 0x3F));
}
return 3;
} else if (ch < 0x200000) {
if (begin) {
if (unlikely(begin + 4 > end))
return 0;
begin[0] = uint8_t(0xF0 | ((ch >> 18)));
begin[1] = uint8_t(0x80 | ((ch >> 12) & 0x3F));
begin[2] = uint8_t(0x80 | ((ch >> 6) & 0x3F));
begin[3] = uint8_t(0x80 | ((ch >> 0) & 0x3F));
}
return 4;
} else {
// Invalid code point for UTF-8
return 0;
}
}
size_t encodeTypedChar(
uint16_t* begin,
uint16_t* end,
uint32_t ch) {
if (likely(ch < 0xD800)) {
if (begin) {
if (unlikely(begin + 1 > end))
return 0;
begin[0] = ch;
}
return 1;
} else if (ch < 0xE000) {
// Private use code points,
// we can't encode these
return 0;
} else if (ch < 0x10000) {
if (begin) {
if (unlikely(begin + 1 > end))
return 0;
begin[0] = ch;
}
return 1;
} else if (ch < 0x110000) {
if (begin) {
if (unlikely(begin + 2 > end))
return 0;
ch -= 0x10000;
begin[0] = uint16_t(0xD800 + (ch >> 10));
begin[1] = uint16_t(0xDC00 + (ch & 0x3FF));
}
return 2;
} else {
// Invalid code point
return 0;
}
}
size_t encodeTypedChar(
uint32_t* begin,
uint32_t* end,
uint32_t ch) {
if (begin) {
if (unlikely(begin + 1 > end))
return 0;
begin[0] = ch;
}
return 1;
}
std::string fromws(const WCHAR* ws) {
size_t srcLen = length(ws);
size_t dstLen = transcodeString<char>(
nullptr, 0, ws, srcLen);
std::string result;
result.resize(dstLen);
transcodeString(result.data(),
dstLen, ws, srcLen);
return result;
}
std::wstring tows(const char* mbs) {
size_t srcLen = length(mbs);
size_t dstLen = transcodeString<wchar_t>(
nullptr, 0, mbs, srcLen);
std::wstring result;
result.resize(dstLen);
transcodeString(result.data(),
dstLen, mbs, srcLen);
return result;
}
}