2018-07-21 12:51:50 +02:00
|
|
|
#include "util_string.h"
|
|
|
|
|
|
|
|
namespace dxvk::str {
|
2022-07-28 18:40:46 +02:00
|
|
|
|
|
|
|
const uint8_t* decodeTypedChar(
|
|
|
|
const uint8_t* begin,
|
|
|
|
const uint8_t* end,
|
|
|
|
uint32_t& ch) {
|
|
|
|
uint32_t first = begin[0];
|
|
|
|
|
|
|
|
if (likely(first < 0x80)) {
|
|
|
|
// Basic ASCII character
|
|
|
|
ch = uint32_t(first);
|
|
|
|
return begin + 1;
|
|
|
|
} else if (unlikely(first < 0xC0)) {
|
|
|
|
// Character starts with a continuation byte,
|
|
|
|
// just skip until we find the next valid prefix
|
|
|
|
while ((begin < end) && (((*begin) & 0xC0) == 0x80))
|
|
|
|
begin += 1;
|
|
|
|
|
|
|
|
ch = uint32_t('?');
|
|
|
|
return begin;
|
|
|
|
} else {
|
|
|
|
// The number of leading 1 bits in the first byte
|
|
|
|
// determines the length of this character
|
|
|
|
size_t length = bit::lzcnt((~first) << 24);
|
|
|
|
|
|
|
|
if (unlikely(begin + length > end)) {
|
|
|
|
ch = uint32_t('?');
|
|
|
|
return end;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (first < 0xE0) {
|
|
|
|
ch = ((uint32_t(begin[0]) & 0x1F) << 6)
|
|
|
|
| ((uint32_t(begin[1]) & 0x3F));
|
|
|
|
} else if (first < 0xF0) {
|
|
|
|
ch = ((uint32_t(begin[0]) & 0x0F) << 12)
|
|
|
|
| ((uint32_t(begin[1]) & 0x3F) << 6)
|
|
|
|
| ((uint32_t(begin[2]) & 0x3F));
|
|
|
|
} else if (first < 0xF8) {
|
|
|
|
ch = ((uint32_t(begin[0]) & 0x07) << 18)
|
|
|
|
| ((uint32_t(begin[1]) & 0x3F) << 12)
|
|
|
|
| ((uint32_t(begin[2]) & 0x3F) << 6)
|
|
|
|
| ((uint32_t(begin[3]) & 0x3F));
|
|
|
|
} else {
|
|
|
|
// Invalid prefix
|
|
|
|
ch = uint32_t('?');
|
|
|
|
}
|
|
|
|
|
|
|
|
return begin + length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const uint16_t* decodeTypedChar(
|
|
|
|
const uint16_t* begin,
|
|
|
|
const uint16_t* end,
|
|
|
|
uint32_t& ch) {
|
|
|
|
uint32_t first = begin[0];
|
|
|
|
|
|
|
|
if (likely(first < 0xD800)) {
|
|
|
|
ch = first;
|
|
|
|
return begin + 1;
|
|
|
|
} else if (first < 0xDC00) {
|
|
|
|
if (unlikely(begin + 2 > end)) {
|
|
|
|
ch = uint32_t('?');
|
|
|
|
return end;
|
|
|
|
}
|
|
|
|
|
|
|
|
ch = 0x10000
|
|
|
|
+ ((uint32_t(begin[0]) & 0x3FF) << 10)
|
|
|
|
+ ((uint32_t(begin[1]) & 0x3FF));
|
|
|
|
return begin + 2;
|
|
|
|
} else if (unlikely(first < 0xE000)) {
|
|
|
|
// Stray low surrogate
|
|
|
|
ch = uint32_t('?');
|
|
|
|
return begin + 1;
|
|
|
|
} else {
|
|
|
|
ch = first;
|
|
|
|
return begin + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const uint32_t* decodeTypedChar(
|
|
|
|
const uint32_t* begin,
|
|
|
|
const uint32_t* end,
|
|
|
|
uint32_t& ch) {
|
|
|
|
ch = begin[0];
|
|
|
|
return begin + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
size_t encodeTypedChar(
|
|
|
|
uint8_t* begin,
|
|
|
|
uint8_t* end,
|
|
|
|
uint32_t ch) {
|
|
|
|
if (likely(ch < 0x80)) {
|
|
|
|
if (begin) {
|
|
|
|
if (unlikely(begin + 1 > end))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
begin[0] = uint8_t(ch);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
} else if (ch < 0x800) {
|
|
|
|
if (begin) {
|
|
|
|
if (unlikely(begin + 2 > end))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
begin[0] = uint8_t(0xC0 | (ch >> 6));
|
|
|
|
begin[1] = uint8_t(0x80 | (ch & 0x3F));
|
|
|
|
}
|
|
|
|
|
|
|
|
return 2;
|
|
|
|
} else if (ch < 0x10000) {
|
|
|
|
if (begin) {
|
|
|
|
if (unlikely(begin + 3 > end))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
begin[0] = uint8_t(0xE0 | ((ch >> 12)));
|
|
|
|
begin[1] = uint8_t(0x80 | ((ch >> 6) & 0x3F));
|
|
|
|
begin[2] = uint8_t(0x80 | ((ch >> 0) & 0x3F));
|
|
|
|
}
|
|
|
|
|
|
|
|
return 3;
|
|
|
|
} else if (ch < 0x200000) {
|
|
|
|
if (begin) {
|
|
|
|
if (unlikely(begin + 4 < end))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
begin[0] = uint8_t(0xF0 | ((ch >> 18)));
|
|
|
|
begin[1] = uint8_t(0x80 | ((ch >> 12) & 0x3F));
|
|
|
|
begin[2] = uint8_t(0x80 | ((ch >> 6) & 0x3F));
|
|
|
|
begin[3] = uint8_t(0x80 | ((ch >> 0) & 0x3F));
|
|
|
|
}
|
|
|
|
|
|
|
|
return 4;
|
|
|
|
} else {
|
|
|
|
// Invalid code point for UTF-8
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
size_t encodeTypedChar(
|
|
|
|
uint16_t* begin,
|
|
|
|
uint16_t* end,
|
|
|
|
uint32_t ch) {
|
|
|
|
if (likely(ch < 0xD800)) {
|
|
|
|
if (begin) {
|
|
|
|
if (unlikely(begin + 1 > end))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
begin[0] = ch;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
} else if (ch < 0xE000) {
|
|
|
|
// Private use code points,
|
|
|
|
// we can't encode these
|
|
|
|
return 0;
|
|
|
|
} else if (ch < 0x10000) {
|
|
|
|
if (begin) {
|
|
|
|
if (unlikely(begin + 1 > end))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
begin[0] = ch;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
} else if (ch < 0x110000) {
|
|
|
|
if (begin) {
|
|
|
|
if (unlikely(begin + 2 > end))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
ch -= 0x10000;
|
|
|
|
begin[0] = uint16_t(0xD800 + (ch >> 10));
|
|
|
|
begin[1] = uint16_t(0xDC00 + (ch & 0x3FF));
|
|
|
|
}
|
|
|
|
|
|
|
|
return 2;
|
|
|
|
} else {
|
|
|
|
// Invalid code point
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
size_t encodeTypedChar(
|
|
|
|
uint32_t* begin,
|
|
|
|
uint32_t* end,
|
|
|
|
uint32_t ch) {
|
|
|
|
if (begin) {
|
|
|
|
if (unlikely(begin + 1 > end))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
begin[0] = ch;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-07-28 19:50:16 +02:00
|
|
|
std::string fromws(const WCHAR* ws) {
|
|
|
|
size_t srcLen = length(ws);
|
|
|
|
size_t dstLen = transcodeString<char>(
|
|
|
|
nullptr, 0, ws, srcLen);
|
2018-07-21 12:51:50 +02:00
|
|
|
|
2022-07-28 19:50:16 +02:00
|
|
|
std::string result;
|
|
|
|
result.resize(dstLen);
|
2018-07-21 12:51:50 +02:00
|
|
|
|
2022-07-28 19:50:16 +02:00
|
|
|
transcodeString(result.data(),
|
|
|
|
dstLen, ws, srcLen);
|
2018-07-21 12:51:50 +02:00
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
2019-02-11 21:35:32 +01:00
|
|
|
|
|
|
|
|
2020-09-10 05:02:53 +02:00
|
|
|
std::wstring tows(const char* mbs) {
|
2022-07-28 19:50:16 +02:00
|
|
|
size_t srcLen = length(mbs);
|
|
|
|
size_t dstLen = transcodeString<wchar_t>(
|
|
|
|
nullptr, 0, mbs, srcLen);
|
2020-09-10 05:02:53 +02:00
|
|
|
|
|
|
|
std::wstring result;
|
2022-07-28 19:50:16 +02:00
|
|
|
result.resize(dstLen);
|
|
|
|
|
|
|
|
transcodeString(result.data(),
|
|
|
|
dstLen, mbs, srcLen);
|
|
|
|
|
2020-09-10 05:02:53 +02:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2018-07-21 12:51:50 +02:00
|
|
|
}
|