Merge pull request #1694 from DarkflameUniverse/latin1

fix: use encoding on latin1 strings from cdclient
This commit is contained in:
Gie "Max" Vanommeslaeghe 2024-12-25 00:27:00 +01:00 committed by GitHub
commit 6ed6efa921
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 39 additions and 10 deletions

View File

@ -2,16 +2,25 @@
#include <string>
//For reading null-terminated strings
std::string BinaryIO::ReadString(std::istream& instream) {
std::string toReturn;
char buffer;
template<typename StringType>
StringType ReadString(std::istream& instream) {
StringType toReturn{};
typename StringType::value_type buffer{};
BinaryIO::BinaryRead(instream, buffer);
while (buffer != 0x00) {
toReturn += buffer;
BinaryRead(instream, buffer);
BinaryIO::BinaryRead(instream, buffer);
}
return toReturn;
}
std::string BinaryIO::ReadString(std::istream& instream) {
return ::ReadString<std::string>(instream);
}
std::u8string BinaryIO::ReadU8String(std::istream& instream) {
return ::ReadString<std::u8string>(instream);
}

View File

@ -65,6 +65,8 @@ namespace BinaryIO {
std::string ReadString(std::istream& instream);
std::u8string ReadU8String(std::istream& instream);
inline bool DoesFileExist(const std::string& name) {
std::ifstream f(name.c_str());
return f.good();

View File

@ -65,13 +65,14 @@ int64_t FdbToSqlite::Convert::ReadInt64(std::istream& cdClientBuffer) {
return value;
}
// cdclient is encoded in latin1
std::string FdbToSqlite::Convert::ReadString(std::istream& cdClientBuffer) {
int32_t prevPosition = SeekPointer(cdClientBuffer);
auto readString = BinaryIO::ReadString(cdClientBuffer);
const auto readString = BinaryIO::ReadU8String(cdClientBuffer);
cdClientBuffer.seekg(prevPosition);
return readString;
return GeneralUtils::Latin1ToWTF8(readString);
}
int32_t FdbToSqlite::Convert::SeekPointer(std::istream& cdClientBuffer) {

View File

@ -167,17 +167,19 @@ std::u16string GeneralUtils::ASCIIToUTF16(const std::string_view string, const s
return ret;
}
//! Converts a (potentially-ill-formed) UTF-16 string to UTF-8
//! Converts a (potentially-ill-formed) Latin1 string to UTF-8
//! See: <http://simonsapin.github.io/wtf-8/#decoding-ill-formed-utf-16>
std::string GeneralUtils::UTF16ToWTF8(const std::u16string_view string, const size_t size) {
template<typename StringType>
std::string ToWTF8(const StringType string, const size_t size) {
const size_t newSize = MinSize(size, string);
std::string ret;
ret.reserve(newSize);
for (size_t i = 0; i < newSize; ++i) {
const char16_t u = string[i];
const auto u = string[i];
if (IsLeadSurrogate(u) && (i + 1) < newSize) {
const char16_t next = string[i + 1];
const auto next = string[i + 1];
if (IsTrailSurrogate(next)) {
i += 1;
const char32_t cp = 0x10000
@ -194,6 +196,13 @@ std::string GeneralUtils::UTF16ToWTF8(const std::u16string_view string, const si
return ret;
}
std::string GeneralUtils::Latin1ToWTF8(const std::u8string_view string, const size_t size) {
return ToWTF8(string, size);
}
std::string GeneralUtils::UTF16ToWTF8(const std::u16string_view string, const size_t size) {
return ToWTF8(string, size);
}
bool GeneralUtils::CaseInsensitiveStringCompare(const std::string_view a, const std::string_view b) {
return std::equal(a.begin(), a.end(), b.begin(), b.end(), [](char a, char b) { return tolower(a) == tolower(b); });

View File

@ -51,6 +51,14 @@ namespace GeneralUtils {
bool _NextUTF8Char(std::string_view& slice, uint32_t& out);
}
//! Converts a Latin1 string to a UTF-8 string
/*!
\param string The string to convert
\param size A size to trim the string to. Default is SIZE_MAX (No trimming)
\return An UTF-8 representation of the string
*/
std::string Latin1ToWTF8(const std::u8string_view string, const size_t size = SIZE_MAX);
//! Converts a UTF-16 string to a UTF-8 string
/*!
\param string The string to convert