use encoding

use template function

Update GeneralUtils.cpp

consolidate duplicate code

Update GeneralUtils.cpp

Update BinaryIO.cpp

compilers
This commit is contained in:
David Markowitz 2024-12-24 14:09:39 -08:00
parent 2560bb00da
commit 18295017c1
6 changed files with 40 additions and 11 deletions

View File

@ -2,16 +2,25 @@
#include <string> #include <string>
//For reading null-terminated strings //For reading null-terminated strings
std::string BinaryIO::ReadString(std::istream& instream) { template<typename StringType>
std::string toReturn; StringType ReadString(std::istream& instream) {
char buffer; StringType toReturn{};
typename StringType::value_type buffer{};
BinaryIO::BinaryRead(instream, buffer); BinaryIO::BinaryRead(instream, buffer);
while (buffer != 0x00) { while (buffer != 0x00) {
toReturn += buffer; toReturn += buffer;
BinaryRead(instream, buffer); BinaryIO::BinaryRead(instream, buffer);
} }
return toReturn; return toReturn;
} }
std::string BinaryIO::ReadString(std::istream& instream) {
return ::ReadString<std::string>(instream);
}
std::u8string BinaryIO::ReadU8String(std::istream& instream) {
return ::ReadString<std::u8string>(instream);
}

View File

@ -65,6 +65,8 @@ namespace BinaryIO {
std::string ReadString(std::istream& instream); std::string ReadString(std::istream& instream);
std::u8string ReadU8String(std::istream& instream);
inline bool DoesFileExist(const std::string& name) { inline bool DoesFileExist(const std::string& name) {
std::ifstream f(name.c_str()); std::ifstream f(name.c_str());
return f.good(); return f.good();

View File

@ -65,13 +65,14 @@ int64_t FdbToSqlite::Convert::ReadInt64(std::istream& cdClientBuffer) {
return value; return value;
} }
// cdclient is encoded in latin1
std::string FdbToSqlite::Convert::ReadString(std::istream& cdClientBuffer) { std::string FdbToSqlite::Convert::ReadString(std::istream& cdClientBuffer) {
int32_t prevPosition = SeekPointer(cdClientBuffer); int32_t prevPosition = SeekPointer(cdClientBuffer);
auto readString = BinaryIO::ReadString(cdClientBuffer); const auto readString = BinaryIO::ReadU8String(cdClientBuffer);
cdClientBuffer.seekg(prevPosition); cdClientBuffer.seekg(prevPosition);
return readString; return GeneralUtils::Latin1ToWTF8(readString);
} }
int32_t FdbToSqlite::Convert::SeekPointer(std::istream& cdClientBuffer) { int32_t FdbToSqlite::Convert::SeekPointer(std::istream& cdClientBuffer) {

View File

@ -167,17 +167,19 @@ std::u16string GeneralUtils::ASCIIToUTF16(const std::string_view string, const s
return ret; return ret;
} }
//! Converts a (potentially-ill-formed) UTF-16 string to UTF-8
//! Converts a (potentially-ill-formed) Latin1 string to UTF-8
//! See: <http://simonsapin.github.io/wtf-8/#decoding-ill-formed-utf-16> //! See: <http://simonsapin.github.io/wtf-8/#decoding-ill-formed-utf-16>
std::string GeneralUtils::UTF16ToWTF8(const std::u16string_view string, const size_t size) { template<typename StringType>
std::string ToWTF8(const StringType string, const size_t size) {
const size_t newSize = MinSize(size, string); const size_t newSize = MinSize(size, string);
std::string ret; std::string ret;
ret.reserve(newSize); ret.reserve(newSize);
for (size_t i = 0; i < newSize; ++i) { for (size_t i = 0; i < newSize; ++i) {
const char16_t u = string[i]; const auto u = string[i];
if (IsLeadSurrogate(u) && (i + 1) < newSize) { if (IsLeadSurrogate(u) && (i + 1) < newSize) {
const char16_t next = string[i + 1]; const auto next = string[i + 1];
if (IsTrailSurrogate(next)) { if (IsTrailSurrogate(next)) {
i += 1; i += 1;
const char32_t cp = 0x10000 const char32_t cp = 0x10000
@ -194,6 +196,13 @@ std::string GeneralUtils::UTF16ToWTF8(const std::u16string_view string, const si
return ret; return ret;
} }
std::string GeneralUtils::Latin1ToWTF8(const std::u8string_view string, const size_t size) {
return ToWTF8(string, size);
}
std::string GeneralUtils::UTF16ToWTF8(const std::u16string_view string, const size_t size) {
return ToWTF8(string, size);
}
bool GeneralUtils::CaseInsensitiveStringCompare(const std::string_view a, const std::string_view b) { bool GeneralUtils::CaseInsensitiveStringCompare(const std::string_view a, const std::string_view b) {
return std::equal(a.begin(), a.end(), b.begin(), b.end(), [](char a, char b) { return tolower(a) == tolower(b); }); return std::equal(a.begin(), a.end(), b.begin(), b.end(), [](char a, char b) { return tolower(a) == tolower(b); });

View File

@ -51,6 +51,14 @@ namespace GeneralUtils {
bool _NextUTF8Char(std::string_view& slice, uint32_t& out); bool _NextUTF8Char(std::string_view& slice, uint32_t& out);
} }
//! Converts a Latin1 string to a UTF-8 string
/*!
\param string The string to convert
\param size A size to trim the string to. Default is SIZE_MAX (No trimming)
\return An UTF-8 representation of the string
*/
std::string Latin1ToWTF8(const std::u8string_view string, const size_t size = SIZE_MAX);
//! Converts a UTF-16 string to a UTF-8 string //! Converts a UTF-16 string to a UTF-8 string
/*! /*!
\param string The string to convert \param string The string to convert