# -*- coding: utf-8 -*-
# cython: language_level=3, always_allow_keywords=True
## Copyright 2011-2024 by LivingLogic AG, Bayreuth/Germany
## Copyright 2011-2024 by Walter Dörwald
##
## All Rights Reserved
##
## See ll/xist/__init__.py for the license
r'''
This module provides functions for encoding and decoding a lightweight
text-based format for serializing the object types supported by UL4.
It is extensible to allow encoding/decoding arbitrary instances (i.e. it is
basically a reimplementation of :mod:`pickle`, but with string input/output
instead of bytes and with an eye towards cross-plattform support).
There are implementations for Python (this module), Java_ and Javascript_
(as part of the UL4 packages for those languages).
.. _Java: https://github.com/LivingLogic/LivingLogic.Java.ul4
.. _Javascript: https://github.com/LivingLogic/LivingLogic.Javascript.ul4
Furthermore there's an `Oracle package`_ that can be used for generating
UL4ON encoded data.
.. _Oracle package: https://github.com/LivingLogic/LivingLogic.Oracle.ul4
Basic usage follows the API design of :mod:`pickle`, :mod:`json`, etc. and
supports most builtin Python types::
>>> from ll import ul4on
>>> ul4on.dumps(None)
'n'
>>> ul4on.loads('n')
>>> ul4on.dumps(False)
'bF'
>>> ul4on.loads('bF')
False
>>> ul4on.dumps(42)
'i42'
>>> ul4on.loads('i42')
42
>>> ul4on.dumps(42.5)
'f42.5'
>>> ul4on.loads('f42.5')
42.5
>>> ul4on.dumps('foo')
"S'foo'"
>>> ul4on.loads("S'foo'")
'foo'
:class:`~datetime.date`, :class:`~datetime.datetime` and
:class:`~datetime.timedelta` objects are supported too::
>>> import datetime
>>> ul4on.dumps(datetime.date.today())
'X i2014 i11 i3'
>>> ul4on.dumps(datetime.datetime.now())
'Z i2014 i11 i3 i18 i16 i45 i314157'
>>> ul4on.loads('X i2014 i11 i3')
datetime.date(2014, 11, 3)
>>> ul4on.loads('Z i2014 i11 i3 i18 i16 i45 i314157')
datetime.datetime(2014, 11, 3, 18, 16, 45, 314157)
>>> ul4on.dumps(datetime.timedelta(days=1))
'T i1 i0 i0'
>>> ul4on.loads('T i1 i0 i0')
datetime.timedelta(1)
:mod:`!ll.ul4on` also supports :class:`~ll.color.Color` objects from
:mod:`ll.color`::
>>> from ll import color
>>> ul4on.dumps(color.red)
'C i255 i0 i0 i255'
>>> ul4on.loads('C i255 i0 i0 i255')
Color(0xff, 0x00, 0x00)
Lists, dictionaries and sets are also supported::
>>> ul4on.dumps([1, 2, 3])
'L i1 i2 i3 ]'
>>> ul4on.loads('L i1 i2 i3 ]')
[1, 2, 3]
>>> ul4on.dumps(dict(one=1, two=2))
"D S'two' i2 S'one' i1 }"
>>> ul4on.loads("D S'two' i2 S'one' i1 }")
{'one': 1, 'two': 2}
>>> ul4on.dumps({1, 2, 3})
'Y i1 i2 i3 }'
>>> ul4on.loads('Y i1 i2 i3 }')
{1, 2, 3}
Recursive data structures
-------------------------
:mod:`!ll.ul4on` can also handle recursive data structures::
>>> r = []
>>> r.append(r)
>>> ul4on.dumps(r)
'L ^0 ]'
>>> r2 = ul4on.loads('L ^0 ]')
>>> r2
[[...]]
>>> r2 is r2[0]
True
>>> r = {}
>>> r['recursive'] = r
>>> ul4on.dumps(r)
"D S'recursive' ^0 }"
>>> r2 = ul4on.loads("D S'recursive' ^0 }")
>>> r2
{'recursive': {...}}
>>> r2['recursive'] is r2
True
.. note::
The ``^0`` part in the dump is a so called "back reference", it tells the
decoder that in this spot an object is referenced that has already been part
of the dump (The ``0`` indicates where in the dump the object can be found).
Extensibility
-------------
UL4ON is extensible. It supports serializing arbitrary instances by registering
the class with the UL4ON serialization machinery::
from ll import ul4on
@ul4on.register("com.example.person")
class Person:
def __init__(self, firstname=None, lastname=None):
self.firstname = firstname
self.lastname = lastname
def __repr__(self):
return f"<Person firstname={self.firstname!r} lastname={self.lastname!r}>"
def ul4ondump(self, encoder):
encoder.dump(self.firstname)
encoder.dump(self.lastname)
def ul4onload(self, decoder):
self.firstname = decoder.load()
self.lastname = decoder.load()
jd = Person("John", "Doe")
output = ul4on.dumps(jd)
print("Dump:", output)
jd2 = ul4on.loads(output)
print("Loaded:", jd2)
This script outputs:
.. sourcecode:: output
Dump: O S'com.example.person' S'John' S'Doe' )
Loaded: <Person firstname='John' lastname='Doe'>
It is also possible to pass a custom registry to :func:`load` and
:func:`loads`::
from ll import ul4on
class Person:
ul4onname = "com.example.person"
def __init__(self, firstname=None, lastname=None):
self.firstname = firstname
self.lastname = lastname
def __repr__(self):
return f"<Person firstname={self.firstname!r} lastname={self.lastname!r}>"
def ul4ondump(self, encoder):
encoder.dump(self.firstname)
encoder.dump(self.lastname)
def ul4onload(self, decoder):
self.firstname = decoder.load()
self.lastname = decoder.load()
jd = Person("John", "Doe")
output = ul4on.dumps(jd)
print("Dump:", output)
jd2 = ul4on.loads(output, {"com.example.person": Person})
print("Loaded:", jd2)
Any type name not found in the registry dict passed in will be looked up in the
global registry.
.. note::
If a class isn't registered with the UL4ON serialization machinery, you have
to set the class attribute ``ul4onname`` yourself for serialization to work.
For deserialization the class **must** be registered either in the local
registry passed to the :class:`Decoder` or globally via :func:`register`.
Object content mismatch
-----------------------
In situations where an UL4ON API is updated frequently it is useful to be able
to update the writing side and the reading side independently. To support this,
:class:`Decoder` has a method :meth:`~Decoder.loadcontent` that is a generator
that reads the content items of an object from the input stream and yields those
items.
This allows to handle both situations:
* When the writing side outputs more items that the reading side expects,
exhausting the iterator returned by :meth:`~loadcontent` will read and
ignore the unrecognized items and leave the input stream in a consistent
state.
* When the writing side outputs less items then the reading side expects,
the remaining items can by initialized with default values.
For our example class it could be used like this::
from ll import ul4on
class Person:
ul4onname = "com.example.person"
def __init__(self, firstname=None, lastname=None):
self.firstname = firstname
self.lastname = lastname
def __repr__(self):
return f"<Person firstname={self.firstname!r} lastname={self.lastname!r}>"
def ul4ondump(self, encoder):
encoder.dump(self.firstname)
encoder.dump(self.lastname)
def ul4onload(self, decoder):
index = -1
for (index, item) in enumerate(decoder.loadcontent()):
if index == 0:
self.firstname = item
elif index == 1:
self.lastname = item
# Initialize attributes that were not loaded by ``loadcontent``
if index < 1:
self.lastname = None
if index < 0:
self.firstname = None
output = """o s'com.example.person' s'John' )"""
j = ul4on.loads(output, {"com.example.person": Person})
print("Loaded:", j)
This outputs:
.. sourcecode:: output
Loaded: <Person firstname='John' lastname=None>
Chunked UL4ON
-------------
:mod:`!ll.ul4on` also provides access to the classes that implement UL4ON
encoding and decoding. This can be used to create multiple UL4ON dumps using the
same encoding context, or recreate multiple objects from those multiple UL4ON
dumps (using the same decoding context).
An example for encoding::
encoder = ul4on.Encoder()
obj = "spam"
print(encoder.dumps(obj))
print(encoder.dumps(obj))
This prints:
.. sourcecode:: output
S'spam'
^0
The second call outputs a back reference, since the encoder remembers that the
string ``"spam"`` has already been output.
An example for decoding::
decoder = ul4on.Decoder()
print(decoder.loads("S'spam'"))
print(decoder.loads("^0"))
This prints:
.. sourcecode:: output
spam
spam
since the decoder remembers which object has been decoded as the first object
from the first dump.
One application of this is embedding multiple related UL4ON dumps as data
attributes in HTML and then deserializing those UL4ON chuncks back into the
appropriate Javascript objects. For example::
from ll import ul4on
from ll.misc import xmlencode as xe
encoder = ul4on.Encoder()
counter = 0
def dump(obj):
global counter
counter += 1
return f"{counter} {encoder.dumps(obj)}"
data = ["gurk", "hurz", "hinz", "kunz"]
def f(s):
return f"<li data-ul4on='{xe(dump(s))}'>{xe(s.upper())}</li>"
items = "\n".join(f(s) for s in data)
html = f"<ul data-ul4on='{xe(dump(data))}'>\n{items}\n</ul>"
print(html)
This outputs:
.. sourcecode:: xml
<ul data-ul4on='5 L ^0 ^1 ^2 ^3 ]'>
<li data-ul4on='1 S'gurk''>GURK</li>
<li data-ul4on='2 S'hurz''>HURZ</li>
<li data-ul4on='3 S'hinz''>HINZ</li>
<li data-ul4on='4 S'kunz''>KUNZ</li>
</ul>
By iterating through the ``data-ul4on`` attributes in the correct order and
feeding each UL4ON chunk to the same decoder, all objects can be recreated and
attached to their appropriate HTML elements.
Incremental UL4ON and persistent objects
----------------------------------------
Objects that have an attribute ``ul4onid`` are considered "persistent" objects.
The combination of ``ul4onname`` and ``ul4onid`` uniquely identifies each
persistent object (even across multiple unrelated UL4ON dumps).
An :class:`Encoder` will dump those objects differently than other objects
without an ``ul4onid`` attribute.
A :class:`Decoder` will remember all persistent objects it has loaded
(under their ``ul4onname`` and ``ul4onid``). If the decoder encounters the
``ul4onname`` and ``ul4onid`` of an object it has remembered, it will not create
a new object, instead :meth:`ul4onload` will be called for the existing object.
If the decoder encounters a persistent objects it hasn't remembered, it will
create a new object (passing the ``ul4onid`` as the only argument to the
constructor) and then call :meth:`ul4onload` on the new object.
This means that with this approach it's possible to use one :class:`Decoder`
object to load multiple unrelated UL4ON dumps "incrementally" one after the
other, but still merge the persistent objects in the subsequent dumps into the
those created by previous dumps.
.. note::
For persistent objects :meth:`ul4onload` and :meth:`ul4ondump` don't have
the dump/load the ``ul4onid`` attribute, as this is done by the
:class:`Encoder`/:class:`Decoder`.
.. note::
If the value of the attribute ``ul4onid`` is :const:`None` the object will
be treated as an "ordinary" (i.e. non-persistent) object.
.. note::
For this approach, the method :meth:`~Decoder.reset` must be called between
calls to :meth:`~Decoder.load` or :meth:`~Decoder.loads` to reset the
information about back references.
Module documentation
--------------------
'''
from typing import *
from typing import TextIO
import datetime, collections, io
from collections import abc
__docformat__ = "reStructuredText"
_registry = {}
[docs]
def register(name : str):
"""
This decorator can be used to register the decorated class with the
:mod:`!ll.ul4on` serialization machinery.
``name`` must be a globally unique name for the class. To avoid
name collisions Java's class naming system should be used (i.e. an
inverted domain name like ``com.example.foo.bar``).
``name`` will be stored in the class attribute ``ul4onname``.
"""
def registration(cls):
cls.ul4onname = name
_registry[name] = cls
return cls
return registration
from ll import ul4c
[docs]
class Encoder:
"""
An :class:`Encoder` is used for serializing an object into an UL4ON dump.
It manages the internal state required for handling backreferences and other
stuff.
"""
ul4_type = ul4c.InstantiableType("ul4on", "Encoder", "An Encoder is used for serializing an object into an UL4ON dump.")
ul4_attrs = {"dumps"}
[docs]
def __init__(self, indent:str=None):
"""
Create an encoder for serializing objects.
When ``indent`` is not :const:`None`, it is used as an indentation string
for pretty printing the output.
"""
self.stream = None # type: Optional[TextIO]
self._level = 0
self.indent = indent
self._lastwaslf = False
# Remember whether we have dumped something into the stream (so we have to write separator whitespace/indentation) or not
self._first = True
# Objects that are available for back references (using the position in the list)
self._objects = [] # type: List[Any]
# Maps object ids to their position in ``_objects``
self._id2index = {} # type: Dict[int, int]
def _record(self, obj:Any) -> None:
# Record that we've written this object and in which position
self._id2index[id(obj)] = len(self._objects)
self._objects.append(obj)
def _line(self, line:str, *items:Any):
if self.indent:
self.stream.write(self.indent*self._level)
else:
if not self._first:
self.stream.write(" ")
self._first = False
self.stream.write(line)
if items:
oldindent = self.indent
try:
self.indent = ""
for item in items:
self.dump(item)
finally:
self.indent = oldindent
if self.indent:
self.stream.write("\n")
[docs]
def dumps(self, obj:Any) -> str:
"""
Serialize ``obj`` and return the resulting dump as a string.
"""
self.stream = io.StringIO()
self._level = 0
self._lastwaslf = False
self._first = True
self.dump(obj)
result = self.stream.getvalue()
self.stream = None
return result
[docs]
def dump(self, obj:Any, stream:Optional[TextIO]=None) -> None:
"""
Serialize ``obj`` into the stream ``stream`` as an UL4ON formatted dump.
``stream`` must provide a :meth:`!write` method.
Passing :const:`None` for ``stream`` may only be done by objects that
call :meth:`!dump` to implement UL4ON serialization in their
own :meth:`ul4ondump` method.
"""
if stream is not None:
self.stream = stream
self._level = 0
self._lastwaslf = False
self._first = True
# Have we written this object already?
if id(obj) in self._id2index:
# Yes: Store a backreference to the object
index = self._id2index[id(obj)]
self._line(f"^{index}")
else:
from ll import color, misc
# No: Write the object itself
# We're not using backreferences if the object itself has a shorter dump
if obj is None:
self._line("n")
elif isinstance(obj, bool):
self._line("bT" if obj else "bF")
elif isinstance(obj, int):
self._line(f"i{obj}")
elif isinstance(obj, float):
self._line(f"f{obj!r}")
elif isinstance(obj, str):
self._record(obj)
dump = repr(obj).replace("<", "\\x3c") # Prevent XSS (when the value is embedded literally in a ``<script>`` tag)
self._line(f"S{dump}")
elif isinstance(obj, slice):
self._record(obj)
self._line("R", obj.start, obj.stop)
elif isinstance(obj, color.Color):
self._record(obj)
self._line("C", obj.r(), obj.g(), obj.b(), obj.a())
elif isinstance(obj, datetime.datetime):
self._record(obj)
self._line("Z", obj.year, obj.month, obj.day, obj.hour, obj.minute, obj.second, obj.microsecond)
elif isinstance(obj, datetime.date):
self._record(obj)
self._line("X", obj.year, obj.month, obj.day)
elif isinstance(obj, datetime.timedelta):
self._record(obj)
self._line("T", obj.days, obj.seconds, obj.microseconds)
elif isinstance(obj, misc.monthdelta):
self._record(obj)
self._line("M", obj.months())
elif isinstance(obj, abc.Sequence):
self._record(obj)
self._line("L")
self._level += 1
for item in obj:
self.dump(item)
self._level -= 1
self._line("]")
elif isinstance(obj, (dict, collections.OrderedDict)):
self._record(obj)
self._line("E")
self._level += 1
for (key, item) in obj.items():
self.dump(key)
self.dump(item)
self._level -= 1
self._line("}")
elif isinstance(obj, abc.Mapping):
self._record(obj)
self._line("D")
self._level += 1
for (key, item) in obj.items():
self.dump(key)
self.dump(item)
self._level -= 1
self._line("}")
elif isinstance(obj, abc.Set):
self._record(obj)
self._line("Y")
self._level += 1
for item in obj:
self.dump(item)
self._level -= 1
self._line("}")
else:
ul4onid = getattr(obj , "ul4onid", None)
self._record(obj)
if ul4onid is not None:
self._line("P", obj.ul4onname, obj.ul4onid)
else:
self._line("O", obj.ul4onname)
self._level += 1
obj.ul4ondump(self)
self._level -= 1
self._line(")")
if stream is not None:
self.stream = None
[docs]
class Decoder:
"""
A :class:`Decoder` is used for deserializing an UL4ON dump.
It manages the internal state required for handling backreferences,
persistent objects and other stuff.
"""
ul4_type = ul4c.InstantiableType("ul4on", "Decoder", "A Decoder is used for deserializing an UL4ON dump.")
ul4_attrs = {"loads", "reset"}
[docs]
def __init__(self, registry:Optional[Dict[str, Callable[..., Any]]]=None):
"""
Create a decoder for deserializing objects from an UL4ON dump.
``registry`` is used as a "custom type registry". It must map UL4ON
type names to callables that create new empty instances of those types.
Any type not found in ``registry`` will be looked up in the global
registry (see :func:`register`).
"""
self.stream = None # type: Optional[TextIO]
# Next character to be read by :meth:`_nextchar`
self._bufferedchar = None # type: Optional[str]
self._objects = [] # type: List[Any]
self._persistent_objects = {} # type: Dict[Tuple[str, str], Any]
# Used for "interning" dictionary keys
self._keycache = {} # type: Dict[str, str]
self.registry = registry
# A stack of types that are currently in the process of being decoded (used in exception messages)
self._stack = None # type: Optional[List[Any]]
[docs]
def loads(self, dump:str) -> Any:
"""
Deserialize the object in the string ``dump`` and return it.
"""
return self.load(io.StringIO(dump))
[docs]
def load(self, stream:Optional[TextIO]=None) -> Any:
"""
Deserialize the next object from the stream ``stream`` and return it.
``stream`` must provide a :meth:`!read` method.
Passing :const:`None` for ``stream`` may only be done by objects that
call :meth:`!load` to implement UL4ON deserialization in their
own :meth:`ul4onload` method.
"""
if stream is not None:
self.stream = stream
self._stack = []
self._bufferedchar = None
typecode = self._nextchar()
if typecode == "^":
position = self._readint()
value = self._objects[position]
elif typecode in "nN":
if typecode == "N":
self._loading(None)
value = None
elif typecode in "bB":
value = self.stream.read(1)
if value == "T":
value = True
elif value == "F":
value = False
else:
raise ValueError(f"broken UL4ON stream at position {self.stream.tell():,}: expected 'T' or 'F' for bool; got {value!r}")
if typecode == "B":
self._loading(value)
elif typecode in "iI":
value = self._readint()
if typecode == "I":
self._loading(value)
elif typecode in "fF":
chars = []
while True:
c = self.stream.read(1)
if c and not c.isspace():
chars.append(c)
else:
value = float("".join(chars))
break
if typecode == "F":
self._loading(value)
elif typecode in "sS":
delimiter = self.stream.read(1)
if not delimiter:
raise EOFError()
buffer = io.StringIO()
while True:
c = self.stream.read(1)
if not c:
raise EOFError()
if c == delimiter:
value = buffer.getvalue().encode("ascii", "backslashreplace").decode("unicode_escape")
break
buffer.write(c)
if c == "\\":
c2 = self.stream.read(1)
if not c2:
raise EOFError()
buffer.write(c2)
if typecode == "S":
self._loading(value)
elif typecode in "cC":
from ll import color
if typecode == "C":
oldpos = self._beginfakeloading()
r = self.load()
g = self.load()
b = self.load()
a = self.load()
value = color.Color(r, g, b, a)
if typecode == "C":
self._endfakeloading(oldpos, value)
elif typecode in "zZ":
if typecode == "Z":
oldpos = self._beginfakeloading()
year = self.load()
month = self.load()
day = self.load()
hour = self.load()
minute = self.load()
second = self.load()
microsecond = self.load()
value = datetime.datetime(year, month, day, hour, minute, second, microsecond)
if typecode == "Z":
self._endfakeloading(oldpos, value)
elif typecode in "xX":
if typecode == "X":
oldpos = self._beginfakeloading()
year = self.load()
month = self.load()
day = self.load()
value = datetime.date(year, month, day)
if typecode == "X":
self._endfakeloading(oldpos, value)
elif typecode in "rR":
if typecode == "R":
oldpos = self._beginfakeloading()
start = self.load()
stop = self.load()
value = slice(start, stop)
if typecode == "R":
self._endfakeloading(oldpos, value)
elif typecode in "tT":
if typecode == "T":
oldpos = self._beginfakeloading()
days = self.load()
seconds = self.load()
microseconds = self.load()
value = datetime.timedelta(days, seconds, microseconds)
if typecode == "T":
self._endfakeloading(oldpos, value)
elif typecode in "mM":
from ll import misc
if typecode == "M":
oldpos = self._beginfakeloading()
months = self.load()
value = misc.monthdelta(months)
if typecode == "M":
self._endfakeloading(oldpos, value)
elif typecode in "lL":
self._stack.append("list")
value = []
if typecode == "L":
self._loading(value)
while True:
typecode = self._nextchar()
if typecode == "]":
self._stack.pop()
break
else:
self._bufferedchar = typecode
item = self.load()
value.append(item)
elif typecode in "dDeE":
self._stack.append("dict" if typecode in "dD" else "odict")
value = {} # Load all dicts as a standard Python 3.6 ordered dict
if typecode in "DE":
self._loading(value)
while True:
typecode = self._nextchar()
if typecode == "}":
self._stack.pop()
break
else:
self._bufferedchar = typecode
key = self.load()
if isinstance(key, str):
if key in self._keycache:
key = self._keycache[key]
else:
self._keycache[key] = key
item = self.load()
value[key] = item
elif typecode in "yY":
self._stack.append("set")
value = set()
if typecode == "Y":
self._loading(value)
while True:
typecode = self._nextchar()
if typecode == "}":
self._stack.pop()
break
else:
self._bufferedchar = typecode
item = self.load()
value.add(item)
elif typecode in "oO":
if typecode == "O":
oldpos = self._beginfakeloading()
name = self.load()
self._stack.append(name)
cls = None
if self.registry is not None:
cls = self.registry.get(name)
if cls is None:
cls = _registry.get(name)
if cls is None:
raise TypeError(f"broken UL4ON stream at position {self.stream.tell():,} (path {self._path()}): can't decode object of type {name!r}")
value = cls()
if typecode == "O":
self._endfakeloading(oldpos, value)
value.ul4onload(self)
typecode = self._nextchar()
if typecode != ")":
raise ValueError(f"broken UL4ON stream at position {self.stream.tell():,} (path {self._path()}): object terminator ')' expected, got {typecode!r}")
self._stack.pop()
elif typecode in "pP":
if typecode == "P":
oldpos = self._beginfakeloading()
name = self.load()
id = self.load()
self._stack.append(f"{name}={id}")
try:
value = self._persistent_objects[(name, id)]
except KeyError:
cls = None
if self.registry is not None:
cls = self.registry.get(name)
if cls is None:
cls = _registry.get(name)
if cls is None:
raise TypeError(f"broken UL4ON stream at position {self.stream.tell():,} (path {self._path()}): can't decode object of type {name!r} with id {id!r}") from None
value = cls(id)
self._persistent_objects[(name, id)] = value
if typecode == "P":
self._endfakeloading(oldpos, value)
value.ul4onload(self)
typecode = self._nextchar()
if typecode != ")":
raise ValueError(f"broken UL4ON stream at position {self.stream.tell():,} (path {self._path()}): object terminator ')' expected, got {typecode!r}")
self._stack.pop()
else:
raise ValueError(f"broken UL4ON stream at position {self.stream.tell():,} (path {self._path()}): unknown typecode {typecode!r}")
if stream is not None:
self.stream = None
self._stack = None
return value
[docs]
def loadcontent(self) -> Generator[Any, None, None]:
"""
Load the content of an object until the "object terminator" is encountered.
This is a generator and might produce fewer or more items than expected.
The caller must be able to handle both cases (e.g. by ignoring additional
items or initializing missing items with a default value).
The iterator should always be exhausted when it is read, otherwise the
stream will be in an undefined state.
"""
while True:
typecode = self._nextchar()
# We always "push back" the typecode we've read so that :meth:`load`
# can treat all cases (i.e. whether :meth:`ul4onload` uses :meth:`load`
# :meth:`loadcontent` or :meth:`loadcontentitems`) the same way.
self._bufferedchar = typecode
if typecode == ")":
break
yield self.load()
[docs]
def loadcontentitems(self) -> Generator[Tuple[str, Any], None, None]:
"""
Similar to :meth:`loadcontent`, but will load the content of an object as
(key, value) pairs.
For further info see :meth:`loadcontent`.
"""
while True:
typecode = self._nextchar()
# We always "push back" the typecode we've read so that :meth:`load`
# can treat all cases (i.e. whether :meth:`ul4onload` uses :meth:`load`
# :meth:`loadcontent` or :meth:`loadcontentitems`) the same way.
self._bufferedchar = typecode
if typecode == ")":
break
key = self.load()
value = self.load()
yield (key, value)
[docs]
def reset(self) -> None:
"""
Clear the internal cache for backreferences so that a new unrelated
UL4ON dump can be loaded.
However the cache for persistent objects will not be cleared.
"""
self._objects.clear()
[docs]
def store_persistent_object(self, object) -> None:
"""
Add a persistent object to the cache of persistent objects.
"""
self._persistent_objects[(object.ul4onname, object.ul4onid)] = object
[docs]
def forget_persistent_object(self, object) -> None:
"""
Remove a persistent object from the cache of persistent objects.
"""
try:
del self._persistent_objects[(object.ul4onname, object.ul4onid)]
except KeyError:
pass
[docs]
def persistent_object(self, name:str, id:str) -> Any:
"""
Return the persistent object with the type ``name`` and the id ``id``,
or :const:`None`, when the decoder hasn't encountered that object yet.
"""
return self._persistent_objects.get((name, id), None)
[docs]
def persistent_objects(self) -> ValuesView[Any]:
"""
Return an iterator over all persistent objects the decoder has encountered
so far.
"""
return self._persistent_objects.values()
def _readint(self) -> int:
buffer = io.StringIO()
while True:
c = self.stream.read(1)
if c and not c.isspace():
buffer.write(c)
else:
return int(buffer.getvalue())
def _loading(self, obj) -> None:
self._objects.append(obj)
def _nextchar(self) -> str:
if self._bufferedchar is not None:
result = self._bufferedchar
self._bufferedchar = None
return result
else:
while True:
nextchar = self.stream.read(1)
if nextchar:
if not nextchar.isspace():
return nextchar
else:
raise EOFError()
def _path(self) -> str:
return "/".join(self._stack)
def _beginfakeloading(self) -> int:
# For loading custom object or immutable objects that have attributes we have a problem:
# We have to record the object we're loading *now*, so that it is available for backreferences.
# However until we've read the UL4ON name of the class (for custom object) or the attributes
# of the object (for immutable objects with attributes), we can't create the object.
# So we push :const:`None` to the backreference list for now and put the right object in this spot,
# once we've created it (via :meth:`_endfakeloading`). This shouldn't lead to problems,
# because during the time the backreference is wrong, only the class name is read,
# so our object won't be referenced. For immutable objects the attributes normally
# don't reference the object itself.
oldpos = len(self._objects)
self._loading(None)
return oldpos
def _endfakeloading(self, oldpos, value) -> None:
# Fix backreference in object list
self._objects[oldpos] = value
[docs]
def dumps(obj:Any, /, indent:Optional[str]=None) -> str:
"""
Serialize ``obj`` as an UL4ON formatted string.
"""
stream = io.StringIO()
Encoder(indent=indent).dump(obj, stream)
return stream.getvalue()
[docs]
def dump(obj:Any, /, stream:TextIO, indent:Optional[str]=None) -> None:
"""
Serialize ``obj`` as an UL4ON formatted stream to ``stream``.
``stream`` must provide a :meth:`write` method.
"""
Encoder(indent=indent).dump(obj, stream)
[docs]
def load(stream:TextIO, /, registry:Optional[Dict[str, Callable[..., Any]]]=None) -> Any:
"""
Deserialize ``stream`` (which must be file-like object with a :meth:`read`
method containing an UL4ON formatted object) to a Python object.
For the meaning of ``registry`` see :meth:`Decoder.__init__`.
"""
return Decoder(registry).load(stream)
[docs]
def loads(dump:str, /, registry:Optional[Dict[str, Callable[..., Any]]]=None) -> Any:
"""
Deserialize ``dump`` (which must be a string containing an UL4ON
formatted object) to a Python object.
For the meaning of ``registry`` see :meth:`Decoder.__init__`.
"""
return Decoder(registry).loads(dump)
[docs]
def loadclob(clob, /, bufsize:int=1024*1024, registry:Optional[Dict[str, Callable[..., Any]]]=None) -> Any:
"""
Deserialize ``clob`` (which must be an :mod:`cx_Oracle` ``CLOB`` variable
containing an UL4ON formatted object) to a Python object.
``bufsize`` specifies the chunk size for reading the underlying ``CLOB``
object.
For the meaning of ``registry`` see :meth:`Decoder.__init__`.
"""
return Decoder(registry).load(StreamBuffer(clob, bufsize))
class StreamBuffer:
# Internal helper class that wraps a file-like object and provides buffering
def __init__(self, stream, bufsize:int=1024*1024):
self.stream = stream
self.bufsize = bufsize
self.buffer = ""
def read(self, size:int) -> AnyStr:
havesize = len(self.buffer)
if havesize >= size:
result = self.buffer[:size]
self.buffer = self.buffer[size:]
return result
else:
needsize = size-havesize
newdata = self.stream.read(max(self.bufsize, needsize))
result = self.buffer + newdata[:needsize]
self.buffer = newdata[needsize:]
return result