mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-02-20 19:54:19 +01:00
[util] Implement utility functions for string conversion
This commit is contained in:
parent
bad7d4690b
commit
200df73ba7
@ -1,6 +1,207 @@
|
||||
#include "util_string.h"
|
||||
|
||||
namespace dxvk::str {
|
||||
|
||||
const uint8_t* decodeTypedChar(
|
||||
const uint8_t* begin,
|
||||
const uint8_t* end,
|
||||
uint32_t& ch) {
|
||||
uint32_t first = begin[0];
|
||||
|
||||
if (likely(first < 0x80)) {
|
||||
// Basic ASCII character
|
||||
ch = uint32_t(first);
|
||||
return begin + 1;
|
||||
} else if (unlikely(first < 0xC0)) {
|
||||
// Character starts with a continuation byte,
|
||||
// just skip until we find the next valid prefix
|
||||
while ((begin < end) && (((*begin) & 0xC0) == 0x80))
|
||||
begin += 1;
|
||||
|
||||
ch = uint32_t('?');
|
||||
return begin;
|
||||
} else {
|
||||
// The number of leading 1 bits in the first byte
|
||||
// determines the length of this character
|
||||
size_t length = bit::lzcnt((~first) << 24);
|
||||
|
||||
if (unlikely(begin + length > end)) {
|
||||
ch = uint32_t('?');
|
||||
return end;
|
||||
}
|
||||
|
||||
if (first < 0xE0) {
|
||||
ch = ((uint32_t(begin[0]) & 0x1F) << 6)
|
||||
| ((uint32_t(begin[1]) & 0x3F));
|
||||
} else if (first < 0xF0) {
|
||||
ch = ((uint32_t(begin[0]) & 0x0F) << 12)
|
||||
| ((uint32_t(begin[1]) & 0x3F) << 6)
|
||||
| ((uint32_t(begin[2]) & 0x3F));
|
||||
} else if (first < 0xF8) {
|
||||
ch = ((uint32_t(begin[0]) & 0x07) << 18)
|
||||
| ((uint32_t(begin[1]) & 0x3F) << 12)
|
||||
| ((uint32_t(begin[2]) & 0x3F) << 6)
|
||||
| ((uint32_t(begin[3]) & 0x3F));
|
||||
} else {
|
||||
// Invalid prefix
|
||||
ch = uint32_t('?');
|
||||
}
|
||||
|
||||
return begin + length;
|
||||
}
|
||||
}
|
||||
|
||||
const uint16_t* decodeTypedChar(
|
||||
const uint16_t* begin,
|
||||
const uint16_t* end,
|
||||
uint32_t& ch) {
|
||||
uint32_t first = begin[0];
|
||||
|
||||
if (likely(first < 0xD800)) {
|
||||
ch = first;
|
||||
return begin + 1;
|
||||
} else if (first < 0xDC00) {
|
||||
if (unlikely(begin + 2 > end)) {
|
||||
ch = uint32_t('?');
|
||||
return end;
|
||||
}
|
||||
|
||||
ch = 0x10000
|
||||
+ ((uint32_t(begin[0]) & 0x3FF) << 10)
|
||||
+ ((uint32_t(begin[1]) & 0x3FF));
|
||||
return begin + 2;
|
||||
} else if (unlikely(first < 0xE000)) {
|
||||
// Stray low surrogate
|
||||
ch = uint32_t('?');
|
||||
return begin + 1;
|
||||
} else {
|
||||
ch = first;
|
||||
return begin + 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const uint32_t* decodeTypedChar(
|
||||
const uint32_t* begin,
|
||||
const uint32_t* end,
|
||||
uint32_t& ch) {
|
||||
ch = begin[0];
|
||||
return begin + 1;
|
||||
}
|
||||
|
||||
|
||||
size_t encodeTypedChar(
|
||||
uint8_t* begin,
|
||||
uint8_t* end,
|
||||
uint32_t ch) {
|
||||
if (likely(ch < 0x80)) {
|
||||
if (begin) {
|
||||
if (unlikely(begin + 1 > end))
|
||||
return 0;
|
||||
|
||||
begin[0] = uint8_t(ch);
|
||||
}
|
||||
|
||||
return 1;
|
||||
} else if (ch < 0x800) {
|
||||
if (begin) {
|
||||
if (unlikely(begin + 2 > end))
|
||||
return 0;
|
||||
|
||||
begin[0] = uint8_t(0xC0 | (ch >> 6));
|
||||
begin[1] = uint8_t(0x80 | (ch & 0x3F));
|
||||
}
|
||||
|
||||
return 2;
|
||||
} else if (ch < 0x10000) {
|
||||
if (begin) {
|
||||
if (unlikely(begin + 3 > end))
|
||||
return 0;
|
||||
|
||||
begin[0] = uint8_t(0xE0 | ((ch >> 12)));
|
||||
begin[1] = uint8_t(0x80 | ((ch >> 6) & 0x3F));
|
||||
begin[2] = uint8_t(0x80 | ((ch >> 0) & 0x3F));
|
||||
}
|
||||
|
||||
return 3;
|
||||
} else if (ch < 0x200000) {
|
||||
if (begin) {
|
||||
if (unlikely(begin + 4 < end))
|
||||
return 0;
|
||||
|
||||
begin[0] = uint8_t(0xF0 | ((ch >> 18)));
|
||||
begin[1] = uint8_t(0x80 | ((ch >> 12) & 0x3F));
|
||||
begin[2] = uint8_t(0x80 | ((ch >> 6) & 0x3F));
|
||||
begin[3] = uint8_t(0x80 | ((ch >> 0) & 0x3F));
|
||||
}
|
||||
|
||||
return 4;
|
||||
} else {
|
||||
// Invalid code point for UTF-8
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
size_t encodeTypedChar(
|
||||
uint16_t* begin,
|
||||
uint16_t* end,
|
||||
uint32_t ch) {
|
||||
if (likely(ch < 0xD800)) {
|
||||
if (begin) {
|
||||
if (unlikely(begin + 1 > end))
|
||||
return 0;
|
||||
|
||||
begin[0] = ch;
|
||||
}
|
||||
|
||||
return 1;
|
||||
} else if (ch < 0xE000) {
|
||||
// Private use code points,
|
||||
// we can't encode these
|
||||
return 0;
|
||||
} else if (ch < 0x10000) {
|
||||
if (begin) {
|
||||
if (unlikely(begin + 1 > end))
|
||||
return 0;
|
||||
|
||||
begin[0] = ch;
|
||||
}
|
||||
|
||||
return 1;
|
||||
} else if (ch < 0x110000) {
|
||||
if (begin) {
|
||||
if (unlikely(begin + 2 > end))
|
||||
return 0;
|
||||
|
||||
ch -= 0x10000;
|
||||
begin[0] = uint16_t(0xD800 + (ch >> 10));
|
||||
begin[1] = uint16_t(0xDC00 + (ch & 0x3FF));
|
||||
}
|
||||
|
||||
return 2;
|
||||
} else {
|
||||
// Invalid code point
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
size_t encodeTypedChar(
|
||||
uint32_t* begin,
|
||||
uint32_t* end,
|
||||
uint32_t ch) {
|
||||
if (begin) {
|
||||
if (unlikely(begin + 1 > end))
|
||||
return 0;
|
||||
|
||||
begin[0] = ch;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
std::string fromws(const WCHAR *ws) {
|
||||
size_t len = ::WideCharToMultiByte(CP_UTF8,
|
||||
0, ws, -1, nullptr, 0, nullptr, nullptr);
|
||||
|
@ -7,8 +7,160 @@
|
||||
|
||||
#include "./com/com_include.h"
|
||||
|
||||
#include "util_bit.h"
|
||||
#include "util_likely.h"
|
||||
|
||||
namespace dxvk::str {
|
||||
|
||||
|
||||
template<size_t S> struct UnicodeChar { };
|
||||
template<> struct UnicodeChar<1> { using type = uint8_t; };
|
||||
template<> struct UnicodeChar<2> { using type = uint16_t; };
|
||||
template<> struct UnicodeChar<4> { using type = uint32_t; };
|
||||
|
||||
template<typename T>
|
||||
using UnicodeCharType = typename UnicodeChar<sizeof(T)>::type;
|
||||
|
||||
const uint8_t* decodeTypedChar(
|
||||
const uint8_t* begin,
|
||||
const uint8_t* end,
|
||||
uint32_t& ch);
|
||||
|
||||
const uint16_t* decodeTypedChar(
|
||||
const uint16_t* begin,
|
||||
const uint16_t* end,
|
||||
uint32_t& ch);
|
||||
|
||||
const uint32_t* decodeTypedChar(
|
||||
const uint32_t* begin,
|
||||
const uint32_t* end,
|
||||
uint32_t& ch);
|
||||
|
||||
size_t encodeTypedChar(
|
||||
uint8_t* begin,
|
||||
uint8_t* end,
|
||||
uint32_t ch);
|
||||
|
||||
size_t encodeTypedChar(
|
||||
uint16_t* begin,
|
||||
uint16_t* end,
|
||||
uint32_t ch);
|
||||
|
||||
size_t encodeTypedChar(
|
||||
uint32_t* begin,
|
||||
uint32_t* end,
|
||||
uint32_t ch);
|
||||
|
||||
/**
|
||||
* \brief Decodes a single character
|
||||
*
|
||||
* Note that \c begin and \c end must not be equal.
|
||||
* \param [in] begin Pointer to current position within the input string
|
||||
* \param [in] end Pointer to the end of the input string
|
||||
* \param [out] ch Pointer to the decoded character code
|
||||
* \returns Pointer to next character in the input string
|
||||
*/
|
||||
template<typename T>
|
||||
const T* decodeChar(
|
||||
const T* begin,
|
||||
const T* end,
|
||||
uint32_t& ch) {
|
||||
using CharType = UnicodeCharType<T>;
|
||||
|
||||
const CharType* result = decodeTypedChar(
|
||||
reinterpret_cast<const CharType*>(begin),
|
||||
reinterpret_cast<const CharType*>(end),
|
||||
ch);
|
||||
|
||||
return reinterpret_cast<const T*>(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Encodes a character
|
||||
*
|
||||
* Note that \c begin and \c end may be both be \c nullptr or equal, in
|
||||
* which case only the length of the encoded character will be returned.
|
||||
* \param [in] begin Pointer to current position within the output string
|
||||
* \param [in] end Pointer to the end of the output string
|
||||
* \param [in] ch Character to encode
|
||||
* \returns If begin is \c nullptr , the number of units required to encode
|
||||
* the character. Otherwise, the number of units written to the output.
|
||||
* This may return \c 0 for characters that cannot be written or encoded.
|
||||
*/
|
||||
template<typename T>
|
||||
size_t encodeChar(
|
||||
T* begin,
|
||||
T* end,
|
||||
uint32_t ch) {
|
||||
using CharType = UnicodeCharType<T>;
|
||||
|
||||
return encodeTypedChar(
|
||||
reinterpret_cast<CharType*>(begin),
|
||||
reinterpret_cast<CharType*>(end),
|
||||
ch);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Computes length of a null-terminated string
|
||||
*
|
||||
* \param [in] begin Start of input string
|
||||
* \returns Number of characters in input string,
|
||||
* excluding the terminating null character
|
||||
*/
|
||||
template<typename S>
|
||||
size_t length(const S* string) {
|
||||
size_t result = 0;
|
||||
|
||||
while (string[result])
|
||||
result += 1;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Converts string from one encoding to another
|
||||
*
|
||||
* The output string arguments may be \c nullptr. In that case, the
|
||||
* total length of the transcoded string will be returned, in units
|
||||
* of the output character type. The output string will only be
|
||||
* null-terminated if the input string is also null-terminated.
|
||||
* \tparam D Output character type
|
||||
* \tparam S Input character type
|
||||
* \param [in] dstBegin Start of output string
|
||||
* \param [in] dstLength Length of output string
|
||||
* \param [in] srcBegin Start of input string
|
||||
* \param [in] srcLength Length of input string
|
||||
* \returns If \c dstBegin is \c nullptr , the total number of output
|
||||
* characters required to store the output string. Otherwise, the
|
||||
* total number of characters written to the output string.
|
||||
*/
|
||||
template<typename D, typename S>
|
||||
size_t transcodeString(
|
||||
D* dstBegin,
|
||||
size_t dstLength,
|
||||
const S* srcBegin,
|
||||
size_t srcLength) {
|
||||
size_t totalLength = 0;
|
||||
|
||||
auto dstEnd = dstBegin + dstLength;
|
||||
auto srcEnd = srcBegin + srcLength;
|
||||
|
||||
while (srcBegin < srcEnd) {
|
||||
uint32_t ch;
|
||||
|
||||
srcBegin = decodeChar<S>(srcBegin, srcEnd, ch);
|
||||
|
||||
if (dstBegin)
|
||||
totalLength += encodeChar<D>(dstBegin + totalLength, dstEnd, ch);
|
||||
else
|
||||
totalLength += encodeChar<D>(nullptr, nullptr, ch);
|
||||
|
||||
if (!ch)
|
||||
break;
|
||||
}
|
||||
|
||||
return totalLength;
|
||||
}
|
||||
|
||||
std::string fromws(const WCHAR *ws);
|
||||
|
||||
void tows(const char* mbs, WCHAR* wcs, size_t wcsLen);
|
||||
|
Loading…
x
Reference in New Issue
Block a user