mirror of
https://github.com/DarkflameUniverse/DarkflameServer.git
synced 2024-11-10 02:08:20 +00:00
427 lines
13 KiB
Python
427 lines
13 KiB
Python
"""
|
|
Module for sequential reading (ReadStream) and writing (WriteStream) from/to bytes.
|
|
Also includes objects for converting datatypes from/to bytes, similar to the standard library struct module.
|
|
"""
|
|
|
|
# https://github.com/lcdr/bitstream/blob/master/bitstream/__init__.py
|
|
|
|
import math
|
|
import struct
|
|
from abc import ABC, abstractmethod
|
|
from typing import AnyStr, ByteString, cast, Generic, overload, SupportsBytes, Type, TypeVar
|
|
|
|
T = TypeVar('T')
|
|
|
|
class _Struct(Generic[T]):
|
|
_struct: struct.Struct
|
|
|
|
def __new__(cls, value: T) -> bytes:
|
|
return cls._struct.pack(value)
|
|
|
|
def __str__(self) -> str:
|
|
return "<Struct %s>" % _Struct._struct.format
|
|
|
|
@classmethod
|
|
def deserialize(cls, stream: "ReadStream") -> T:
|
|
return cast(T, cls._struct.unpack(stream.read(bytes, length=cls._struct.size))[0])
|
|
|
|
class IntStruct(_Struct[int]):
|
|
pass
|
|
|
|
class UnsignedIntStruct(IntStruct):
|
|
@classmethod
|
|
def deserialize_compressed(cls, stream: "ReadStream") -> int:
|
|
number_of_bytes = cls._struct.size
|
|
current_byte = number_of_bytes - 1
|
|
|
|
while current_byte > 0:
|
|
if stream.read(c_bit):
|
|
current_byte -= 1
|
|
else:
|
|
# Read the rest of the bytes
|
|
read = stream.read(bytes, length=current_byte + 1) + bytes(number_of_bytes - current_byte - 1)
|
|
return cast(int, cls._struct.unpack(read)[0])
|
|
|
|
# All but the first bytes are 0. If the upper half of the last byte is a 0 (positive) or 16 (negative) then what we read will be a 1 and the remaining 4 bits.
|
|
# Otherwise we read a 0 and the 8 bits
|
|
if stream.read(c_bit):
|
|
start = bytes([stream.read_bits(4)])
|
|
else:
|
|
start = stream.read(bytes, length=1)
|
|
read = start + bytes(number_of_bytes - current_byte - 1)
|
|
return cast(int, cls._struct.unpack(read)[0])
|
|
|
|
class SignedIntStruct(IntStruct):
|
|
pass
|
|
|
|
class c_bool(_Struct[bool]):
|
|
_struct = struct.Struct("<?")
|
|
|
|
class c_float(_Struct[float]):
|
|
_struct = struct.Struct("<f")
|
|
|
|
class c_double(_Struct[float]):
|
|
_struct = struct.Struct("<d")
|
|
|
|
class c_int(SignedIntStruct):
|
|
_struct = struct.Struct("<i")
|
|
|
|
class c_uint(UnsignedIntStruct):
|
|
_struct = struct.Struct("<I")
|
|
|
|
|
|
class c_byte(SignedIntStruct):
|
|
_struct = struct.Struct("<b")
|
|
|
|
class c_ubyte(UnsignedIntStruct):
|
|
_struct = struct.Struct("<B")
|
|
|
|
class c_short(SignedIntStruct):
|
|
_struct = struct.Struct("<h")
|
|
|
|
class c_ushort(UnsignedIntStruct):
|
|
_struct = struct.Struct("<H")
|
|
|
|
class c_long(SignedIntStruct):
|
|
_struct = struct.Struct("<l")
|
|
|
|
class c_ulong(UnsignedIntStruct):
|
|
_struct = struct.Struct("<L")
|
|
|
|
class c_longlong(SignedIntStruct):
|
|
_struct = struct.Struct("<q")
|
|
|
|
class c_ulonglong(UnsignedIntStruct):
|
|
_struct = struct.Struct("<Q")
|
|
|
|
c_int8 = c_byte
|
|
c_uint8 = c_ubyte
|
|
c_int16 = c_short
|
|
c_uint16 = c_ushort
|
|
c_int32 = c_long
|
|
c_uint32 = c_ulong
|
|
c_int64 = c_longlong
|
|
c_uint64 = c_ulonglong
|
|
|
|
class c_bit:
|
|
def __init__(self, boolean: bool):
|
|
self.value = boolean
|
|
|
|
class Serializable(ABC):
|
|
"""By inheriting from this class you can create types which you can pass to the read/write bitstream functions."""
|
|
@abstractmethod
|
|
def serialize(self, stream: "WriteStream") -> None:
|
|
"""Write this object to the bitstream."""
|
|
pass
|
|
|
|
@classmethod
|
|
@abstractmethod
|
|
def deserialize(cls, stream: "ReadStream") -> "Serializable":
|
|
"""Create a new object from the bitstream."""
|
|
pass
|
|
|
|
S = TypeVar('S', bound=Serializable)
|
|
|
|
class ReadStream:
|
|
"""Allows simple sequential reading from bytes."""
|
|
_data: bytes
|
|
|
|
def __init__(self, data: bytes, unlocked: bool=False):
|
|
self._data = data
|
|
self._unlocked = unlocked
|
|
self._read_offset = 0
|
|
|
|
@property
|
|
def read_offset(self) -> int:
|
|
if not self._unlocked:
|
|
raise RuntimeError("access to read offset on locked stream")
|
|
return self._read_offset
|
|
|
|
@read_offset.setter
|
|
def read_offset(self, value: int) -> None:
|
|
if not self._unlocked:
|
|
raise RuntimeError("access to read offset on locked stream")
|
|
self._read_offset = value
|
|
|
|
def skip_read(self, byte_length: int) -> None:
|
|
"""Skips reading byte_length number of bytes."""
|
|
self._read_offset += byte_length * 8
|
|
|
|
@overload
|
|
def read(self, arg_type: Type[_Struct[T]]) -> T:
|
|
pass
|
|
|
|
@overload
|
|
def read(self, arg_type: Type[c_bit]) -> bool:
|
|
pass
|
|
|
|
@overload
|
|
def read(self, arg_type: Type[S]) -> S:
|
|
pass
|
|
|
|
@overload
|
|
def read(self, arg_type: Type[bytes], length: int) -> bytes:
|
|
pass
|
|
|
|
@overload
|
|
def read(self, arg_type: Type[bytes], allocated_length: int=None, length_type: Type[UnsignedIntStruct]=None) -> bytes:
|
|
pass
|
|
|
|
@overload
|
|
def read(self, arg_type: Type[str], allocated_length: int=None, length_type: Type[UnsignedIntStruct]=None) -> str:
|
|
pass
|
|
|
|
def read(self, arg_type, length=None, allocated_length=None, length_type=None):
|
|
"""
|
|
Read a value of type arg_type from the bitstream.
|
|
allocated_length is for fixed-length strings.
|
|
length_type is for variable-length strings.
|
|
"""
|
|
if issubclass(arg_type, _Struct):
|
|
return arg_type.deserialize(self)
|
|
if issubclass(arg_type, c_bit):
|
|
return self._read_bit()
|
|
if issubclass(arg_type, Serializable):
|
|
return arg_type.deserialize(self)
|
|
if allocated_length is not None or length_type is not None:
|
|
return self._read_str(arg_type, allocated_length, length_type)
|
|
if issubclass(arg_type, bytes):
|
|
return self._read_bytes(length)
|
|
raise TypeError(arg_type)
|
|
|
|
def _read_str(self, arg_type: Type[AnyStr], allocated_length: int=None, length_type: Type[UnsignedIntStruct]=None) -> AnyStr:
|
|
if issubclass(arg_type, str):
|
|
char_size = 2
|
|
else:
|
|
char_size = 1
|
|
|
|
if length_type is not None:
|
|
# Variable-length string
|
|
length = self.read(length_type)
|
|
value = self._read_bytes(length*char_size)
|
|
elif allocated_length is not None:
|
|
# Fixed-length string
|
|
value = self._read_bytes(allocated_length*char_size)
|
|
# find null terminator
|
|
for i in range(len(value)):
|
|
char = value[i*char_size:(i+1)*char_size]
|
|
if char == bytes(char_size):
|
|
value = value[:i*char_size]
|
|
break
|
|
else:
|
|
raise RuntimeError("String doesn't have null terminator")
|
|
else:
|
|
raise ValueError
|
|
|
|
if issubclass(arg_type, str):
|
|
return value.decode("utf-16-le")
|
|
return value
|
|
|
|
def _read_bit(self) -> bool:
|
|
bit = self._data[self._read_offset // 8] & 0x80 >> self._read_offset % 8 != 0
|
|
self._read_offset += 1
|
|
return bit
|
|
|
|
def read_bits(self, number_of_bits: int) -> int:
|
|
assert 0 < number_of_bits < 8
|
|
|
|
output = (self._data[self._read_offset // 8] << self._read_offset % 8) & 0xff # First half
|
|
if self._read_offset % 8 != 0 and number_of_bits > 8 - self._read_offset % 8: # If we have a second half, we didn't read enough bytes in the first half
|
|
output |= self._data[self._read_offset // 8 + 1] >> 8 - self._read_offset % 8 # Second half (overlaps byte boundary)
|
|
output >>= 8 - number_of_bits
|
|
self._read_offset += number_of_bits
|
|
return output
|
|
|
|
def _read_bytes(self, length: int) -> bytes:
|
|
if self._read_offset % 8 == 0:
|
|
num_bytes_read = length
|
|
else:
|
|
num_bytes_read = length+1
|
|
|
|
# check whether there is enough left to read
|
|
if len(self._data) - self._read_offset//8 < num_bytes_read:
|
|
raise EOFError("Trying to read %i bytes but only %i remain" % (num_bytes_read, len(self._data) - self._read_offset // 8))
|
|
|
|
if self._read_offset % 8 == 0:
|
|
output = self._data[self._read_offset // 8:self._read_offset // 8 + num_bytes_read]
|
|
else:
|
|
# data is shifted
|
|
# clear the part before the struct
|
|
|
|
firstbyte = self._data[self._read_offset // 8] & ((1 << 8 - self._read_offset % 8) - 1)
|
|
output = firstbyte.to_bytes(1, "big") + self._data[self._read_offset // 8 + 1:self._read_offset // 8 + num_bytes_read]
|
|
# shift back
|
|
output = (int.from_bytes(output, "big") >> (8 - self._read_offset % 8)).to_bytes(length, "big")
|
|
self._read_offset += length * 8
|
|
return output
|
|
|
|
def read_compressed(self, arg_type: Type[UnsignedIntStruct]) -> int:
|
|
return arg_type.deserialize_compressed(self)
|
|
|
|
def read_remaining(self) -> bytes:
|
|
return self._read_bytes(len(self._data) - int(math.ceil(self._read_offset / 8)))
|
|
|
|
def align_read(self) -> None:
|
|
if self._read_offset % 8 != 0:
|
|
self._read_offset += 8 - self._read_offset % 8
|
|
|
|
def all_read(self) -> bool:
|
|
# This is not accurate to the bit, just to the byte
|
|
return math.ceil(self._read_offset / 8) == len(self._data)
|
|
|
|
# Note: a ton of the logic here assumes that the write offset is never moved back, that is, that you never overwrite things
|
|
# Doing so may break everything
|
|
class WriteStream(SupportsBytes):
|
|
"""Allows simple sequential writing to bytes."""
|
|
_data: bytearray
|
|
|
|
def __init__(self) -> None:
|
|
self._data = bytearray()
|
|
self._write_offset = 0
|
|
self._was_cast_to_bytes = False
|
|
|
|
def __bytes__(self) -> bytes:
|
|
if self._was_cast_to_bytes:
|
|
raise RuntimeError("WriteStream can only be cast to bytes once")
|
|
self._was_cast_to_bytes = True
|
|
return bytes(self._data)
|
|
|
|
@overload
|
|
def write(self, arg: ByteString) -> None:
|
|
pass
|
|
|
|
@overload
|
|
def write(self, arg: _Struct) -> None:
|
|
pass
|
|
|
|
@overload
|
|
def write(self, arg: c_bit) -> None:
|
|
pass
|
|
|
|
@overload
|
|
def write(self, arg: Serializable) -> None:
|
|
pass
|
|
|
|
@overload
|
|
def write(self, arg: AnyStr, allocated_length: int=None, length_type: Type[UnsignedIntStruct]=None) -> None:
|
|
pass
|
|
|
|
def write(self, arg, allocated_length=None, length_type=None):
|
|
"""
|
|
Write a value to the bitstream.
|
|
allocated_length is for fixed-length strings.
|
|
length_type is for variable-length strings.
|
|
"""
|
|
if isinstance(arg, c_bit):
|
|
self._write_bit(arg.value)
|
|
return
|
|
if isinstance(arg, Serializable):
|
|
arg.serialize(self)
|
|
return
|
|
if allocated_length is not None or length_type is not None:
|
|
self._write_str(arg, allocated_length, length_type)
|
|
return
|
|
if isinstance(arg, (bytes, bytearray)):
|
|
self._write_bytes(arg)
|
|
return
|
|
|
|
raise TypeError(arg)
|
|
|
|
def _write_str(self, str_: AnyStr, allocated_length: int=None, length_type: Type[UnsignedIntStruct]=None) -> None:
|
|
# possibly include default encoded length for non-variable-length strings (seems to be 33)
|
|
if isinstance(str_, str):
|
|
encoded_str = str_.encode("utf-16-le")
|
|
else:
|
|
encoded_str = str_
|
|
|
|
if length_type is not None:
|
|
# Variable-length string
|
|
self.write(length_type(len(str_))) # note: there's also a version that uses the length of the encoded string, should that be used?
|
|
elif allocated_length is not None:
|
|
# Fixed-length string
|
|
# null terminator
|
|
if isinstance(str_, str):
|
|
char_size = 2
|
|
else:
|
|
char_size = 1
|
|
|
|
if len(str_)+1 > allocated_length:
|
|
raise ValueError("String too long!")
|
|
encoded_str += bytes(allocated_length*char_size-len(encoded_str))
|
|
self._write_bytes(encoded_str)
|
|
|
|
def _write_bit(self, bit: bool) -> None:
|
|
self._alloc_bits(1)
|
|
if bit: # we don't actually have to do anything if the bit is 0
|
|
self._data[self._write_offset//8] |= 0x80 >> self._write_offset % 8
|
|
|
|
self._write_offset += 1
|
|
|
|
def write_bits(self, value: int, number_of_bits: int) -> None:
|
|
assert 0 < number_of_bits < 8
|
|
self._alloc_bits(number_of_bits)
|
|
|
|
if number_of_bits < 8: # In the case of a partial byte, the bits are aligned from the right (bit 0) rather than the left (as in the normal internal representation)
|
|
value = value << (8 - number_of_bits) & 0xff # Shift left to get the bits on the left, as in our internal representation
|
|
if self._write_offset % 8 == 0:
|
|
self._data[self._write_offset//8] = value
|
|
else:
|
|
self._data[self._write_offset//8] |= value >> self._write_offset % 8 # First half
|
|
if 8 - self._write_offset % 8 < number_of_bits: # If we didn't write it all out in the first half (8 - self._write_offset % 8 is the number we wrote in the first half)
|
|
self._data[self._write_offset//8 + 1] = (value << 8 - self._write_offset % 8) & 0xff # Second half (overlaps byte boundary)
|
|
|
|
self._write_offset += number_of_bits
|
|
|
|
def _write_bytes(self, byte_arg: bytes) -> None:
|
|
if self._write_offset % 8 == 0:
|
|
self._data[self._write_offset//8:self._write_offset//8+len(byte_arg)] = byte_arg
|
|
else:
|
|
# shift new input to current shift
|
|
new = (int.from_bytes(byte_arg, "big") << (8 - self._write_offset % 8)).to_bytes(len(byte_arg)+1, "big")
|
|
# update current byte
|
|
self._data[self._write_offset//8] |= new[0]
|
|
# add rest
|
|
self._data[self._write_offset//8+1:self._write_offset//8+1+len(byte_arg)] = new[1:]
|
|
self._write_offset += len(byte_arg)*8
|
|
|
|
@overload
|
|
def write_compressed(self, byte_arg: UnsignedIntStruct) -> None:
|
|
pass
|
|
|
|
@overload
|
|
def write_compressed(self, byte_arg: bytes) -> None:
|
|
pass
|
|
|
|
def write_compressed(self, byte_arg) -> None:
|
|
current_byte = len(byte_arg) - 1
|
|
|
|
# Write upper bytes with a single 1
|
|
# From high byte to low byte, if high byte is 0 then write 1. Otherwise write 0 and the remaining bytes
|
|
while current_byte > 0:
|
|
is_zero = byte_arg[current_byte] == 0
|
|
self._write_bit(is_zero)
|
|
if not is_zero:
|
|
# Write the remainder of the data
|
|
self._write_bytes(byte_arg[:current_byte + 1])
|
|
return
|
|
current_byte -= 1
|
|
|
|
# If the upper half of the last byte is 0 then write 1 and the remaining 4 bits. Otherwise write 0 and the 8 bits.
|
|
|
|
is_zero = byte_arg[0] & 0xF0 == 0x00
|
|
self._write_bit(is_zero)
|
|
if is_zero:
|
|
self.write_bits(byte_arg[0], 4)
|
|
else:
|
|
self._write_bytes(byte_arg[:1])
|
|
|
|
def align_write(self) -> None:
|
|
"""Align the write offset to the byte boundary."""
|
|
if self._write_offset % 8 != 0:
|
|
self._alloc_bits(8 - self._write_offset % 8)
|
|
self._write_offset += 8 - self._write_offset % 8
|
|
|
|
def _alloc_bits(self, number_of_bits: int) -> None:
|
|
bytes_to_allocate: int = math.ceil((self._write_offset + number_of_bits) / 8) - len(self._data)
|
|
if bytes_to_allocate > 0:
|
|
self._data += bytes(bytes_to_allocate) |