Source code for ll.xist.xsc

# -*- coding: utf-8 -*-
# cython: language_level=3, always_allow_keywords=True

## Copyright 1999-2024 by LivingLogic AG, Bayreuth/Germany
## Copyright 1999-2024 by Walter Dörwald
##
## All Rights Reserved
##
## See ll/xist/__init__.py for the license


"""
This module contains all the central XML tree classes, exception and warning
classes and a few helper classes and functions.
"""


__docformat__ = "reStructuredText"


import sys, random, copy, warnings, threading, weakref, types, codecs

import cssutils

from ll import misc, url as url_, xml_codec


xml_xmlns = "http://www.w3.org/XML/1998/namespace"


###
### helpers
###

[docs] def tonode(value): """ Convert ``value`` to an XIST :class:`Node`. If ``value`` is a tuple or list, it will be (recursively) converted to a :class:`Frag`. Integers, strings, etc. will be converted to a :class:`Text`. If ``value`` is a :class:`Node` already, it will be returned unchanged. In the case of :const:`None` the XIST Null (:data:`ll.xist.xsc.Null`) will be returned. If ``value`` is iterable, a :class:`Frag` will be generated from the items. Anything else will raise an :exc:`IllegalObjectError` exception. """ if isinstance(value, Node): if isinstance(value, Attrs): raise IllegalObjectError(value) # we don't have to turn an Attr into a Frag, because this will be done once the Attr is put back into the tree return value elif isinstance(value, (str, int, float)): return Text(value) elif value is None: return Null elif isinstance(value, (list, tuple)): return Frag(*value) elif isinstance(value, url_.URL): return Text(value) elif not isinstance(value, _Node_Meta): # avoid Node classes (whose __getitem__() returns an xfind selector) # Maybe it's an iterator/generator? try: value = tuple(value) except TypeError: pass else: return Frag(*value) raise IllegalObjectError(value) # none of the above => bail out
class ThreadLocalNodeHander(threading.local): handler = None threadlocalnodehandler = ThreadLocalNodeHander()
[docs] class build: """ A :class:`build` object can be used as a context handler to create a new XIST tree:: with xsc.build(): with html.ul() as e: +html.li("gurk") +html.li("hurz") """ def __init__(self): self.stack = [] def __enter__(self): self.prev = threadlocalnodehandler.handler threadlocalnodehandler.handler = self def __exit__(self, type, value, traceback): threadlocalnodehandler.handler = self.prev del self.prev def enter(self, node): if self.stack: self.stack[-1](node) self.stack.append(node) return node def exit(self): self.stack.pop() def add(self, *args, **kwargs): self.stack[-1](*args, **kwargs)
[docs] class addattr: """ An :class:`addattr` object can be used as a context handler to modify an attribute of an element:: with xsc.build(): with html.div() as e: with xsc.addattr("align"): +xsc.Text("right") """
[docs] def __init__(self, attrname): """ Create an :class:`addattr` object for adding to the attribute named ``attrname`` (which can be the Python name of an attribute or an attribute class). """ self.attr = threadlocalnodehandler.handler.stack[-1][attrname]
def __enter__(self): threadlocalnodehandler.handler.stack.append(self.attr) return self.attr def __exit__(self, type, value, traceback): threadlocalnodehandler.handler.stack.pop() def add(self, *args): self.attr(*args)
[docs] def add(*args, **kwargs): """ :func:`add` appends items in ``args`` and sets attributes in ``kwargs`` in the currently active node in the :keyword:`with` stack. """ threadlocalnodehandler.handler.add(*args, **kwargs)
### ### Conversion context ###
[docs] class Context: """ This is an empty class that can be used by the :meth:`convert` method to hold element or namespace specific data during the :meth:`convert` call. The method :meth:`Converter.__getitem__` will return a unique instance of this class. """
### ### Exceptions and warnings ###
[docs] class Error(Exception): """ Base class for all XIST exceptions """ pass
[docs] class Warning(UserWarning): """ Base class for all warning exceptions (i.e. those that won't result in a program termination.) """ pass
[docs] class IllegalAttrValueWarning(Warning): """ Warning that is issued when an attribute has an illegal value. """ def __init__(self, attr): self.attr = attr def __str__(self): return f"Attribute value {str(self.attr)!r} not allowed for {nsclark(self.attr)}"
[docs] class RequiredAttrMissingWarning(Warning): """ Warning that is issued when a required attribute is missing. """ def __init__(self, attrs, attr): self.attrs = attrs self.attr = attr def __str__(self): return f"Required attribute {nsclark(self.attr)} missing in {self.attrs!r}"
[docs] class UndeclaredAttrWarning(Warning): """ Warning that is issued when a local attribute is not declared. """ def __init__(self, attrs, attr): self.attrs = attrs self.attr = attr def __str__(self): return f"Attribute {nsclark(self.attr)} is undeclared in {self.attrs!r}"
[docs] class UndeclaredNodeWarning(Warning): """ Warning that is issued when a node (i.e. element, entity or processing instruction) is not declared. """ def __init__(self, obj): self.obj = obj def __str__(self): return f"{self.obj!r} is undeclared"
[docs] class IllegalPrefixError(Error, LookupError): """ Exception that is raised when a namespace prefix is undefined. """ def __init__(self, prefix): self.prefix = prefix def __str__(self): return f"namespace prefix {self.prefix!r} is undefined"
class MultipleRootsError(Error): def __str__(self): return "can't add namespace attributes: XML tree has multiple roots"
[docs] class FileNotFoundWarning(Warning): """ Warning that is issued when a file can't be found. """ def __init__(self, message, filename, exc): Warning.__init__(self, message, filename, exc) self.message = message self.filename = filename self.exc = exc def __str__(self): return f"{self.message}: {self.filename!r} not found ({self.exc})"
[docs] class IllegalObjectError(Error, TypeError): """ Exception that is raised when an XIST constructor gets passed an unconvertable object. """ def __init__(self, object): self.object = object def __str__(self): return f"can't convert object {self.object!r} of type {type(self.object).__name__} to an XIST node"
[docs] class IllegalCommentContentWarning(Warning): """ Warning that is issued when there is an illegal comment, i.e. one containing ``--`` or ending in ``-``. (This can only happen when the comment was created by code, not when parsed from an XML file.) """ def __init__(self, comment): self.comment = comment def __str__(self): return f"comment with content {self.comment.content!r} is illegal, as it contains '--' or ends in '-'"
[docs] class IllegalProcInstFormatError(Error): """ Exception that is raised when there is an illegal processing instruction, i.e. one containing ``?>``. (This can only happen when the processing instruction was created by code, not when parsed from an XML file.) """ def __init__(self, procinst): self.procinst = procinst def __str__(self): return f"processing instruction with content {self.procinst.content!r} is illegal, as it contains '?>'"
warnings.simplefilter("always", category=Warning) ### ### Context containing state during :meth:`convert` calls ### class ConverterState: def __init__(self, node, root, mode, stage, target, lang, makeaction, makeproject): self.node = node self.root = root self.mode = mode self.stage = stage if target is None: from ll.xist.ns import html target = html self.target = target self.lang = lang self.makeaction = makeaction self.makeproject = makeproject
[docs] class Converter: """ An instance of this class is passed around in calls to the :meth:`~Node.convert` method. A :class:`!Converter` object can be used when some element needs to keep state across a nested :meth:`~Node.convert` call. A typical example are nested chapter/subchapter elements with automatic numbering. For an example see the element :class:`ll.xist.ns.doc.section`. """
[docs] def __init__(self, node=None, root=None, mode=None, stage=None, target=None, lang=None, makeaction=None, makeproject=None): """ Create a :class:`Converter`. Arguments are used to initialize the :class:`Converter` properties of the same name. """ self.states = [ ConverterState(node=node, root=root, mode=mode, stage=stage, target=target, lang=lang, makeaction=makeaction, makeproject=makeproject) ] self.contexts = {}
class node(misc.propclass): """ The root node for which conversion has been called. This is automatically set by the :meth:`conv` method of :class:`Node` objects. """ def __get__(self): return self.states[-1].node def __set__(self, node): self.states[-1].node = node def __delete__(self): self.states[-1].node = None class root(misc.propclass): """ The root URL for the conversion. Resolving URLs during the conversion process should be done relative to :attr:`root`. """ def __get__(self): return self.states[-1].root def __set__(self, root): self.states[-1].root = root def __delete__(self): self.states[-1].root = None class mode(misc.propclass): """ The conversion mode. This corresponds directly to the mode in XSLT. The default is :const:`None`. """ def __get__(self): return self.states[-1].mode def __set__(self, mode): self.states[-1].mode = mode def __delete__(self): self.states[-1].mode = None class stage(misc.propclass): """ If your conversion is done in multiple steps or stages you can use this property to specify in which stage the conversion process currently is. The default is :const:`"deliver"`. """ def __get__(self): if self.states[-1].stage is None: return "deliver" else: return self.states[-1].stage def __set__(self, stage): self.states[-1].stage = stage def __delete__(self): self.states[-1].stage = None class target(misc.propclass): """ Specifies the conversion target. This must be a namespace module or similar object. """ def __get__(self): return self.states[-1].target def __set__(self, target): self.states[-1].target = target def __delete__(self): self.states[-1].target = None class lang(misc.propclass): """ The target language. The default is :const:`None`. """ def __get__(self): return self.states[-1].lang def __set__(self, lang): self.states[-1].lang = lang def __delete__(self): self.states[-1].lang = None class makeaction(misc.propclass): """ If an XIST conversion is done by an :class:`ll.make.XISTConvertAction` this property will hold the action object during that conversion. If you're not using the :mod:`ll.make` module you can simply ignore this property. The default is :const:`None`. """ def __get__(self): return self.states[-1].makeaction def __set__(self, makeaction): self.states[-1].makeaction = makeaction def __delete__(self): self.states[-1].makeaction = None class makeproject(misc.propclass): """ If an XIST conversion is done by an :class:`ll.make.XISTConvertAction` this property will hold the :class:`Project` object during that conversion. If you're not using the :mod:`ll.make` module you can simply ignore this property. """ def __get__(self): return self.states[-1].makeproject def __set__(self, makeproject): self.states[-1].makeproject = makeproject def __delete__(self): self.states[-1].makeproject = None def push(self, node=None, root=None, mode=None, stage=None, target=None, lang=None, makeaction=None, makeproject=None): self.lastnode = None if node is None: node = self.node if root is None: root = self.root if mode is None: mode = self.mode if stage is None: stage = self.stage if target is None: target = self.target if lang is None: lang = self.lang if makeaction is None: makeaction = self.makeaction if makeproject is None: makeproject = self.makeproject self.states.append(ConverterState(node=node, root=root, mode=mode, stage=stage, target=target, lang=lang, makeaction=makeaction, makeproject=makeproject)) def pop(self): if len(self.states) == 1: raise IndexError("can't pop last state") state = self.states.pop() self.lastnode = state.node return state
[docs] def __getitem__(self, key): """ Return a context object for ``key``. Two variants are supported: * ``key`` may be a string, in which case it should be a hierarchical dot-separated name similar to Java package names (e.g. ``"org.example.project.handler"``). This helps avoid name collisions. Context objects of this type must be explicitly created via :meth:`__setitem__`. * ``key`` may be a :class:`ll.xist.xsc.Node` instance or subclass. Each of these classes that defines its own :class:`Context` class gets a unique instance of this class. This instance will be created on the first access and the element can store information there that needs to be available across calls to :meth:`~Node.convert`. """ if isinstance(key, str): return self.contexts[key] else: contextclass = key.Context # don't use :meth:`setdefault`, as constructing the context object might involve some overhead try: return self.contexts[contextclass] except KeyError: context = contextclass() self.contexts[contextclass] = context return context
def __setitem__(self, key, value): self.contexts[key] = value
### ### Publisher for serializing XML trees to strings ###
[docs] class Publisher: """ A :class:`Publisher` object is used for serializing an XIST tree into a byte sequence. """
[docs] def __init__(self, encoding=None, xhtml=1, validate=False, prefixes={}, prefixdefault=False, hidexmlns=(), showxmlns=()): """ Create a publisher. Arguments have the following meaning: ``encoding`` : string or :const:`None` Specifies the encoding to be used for the byte sequence. If :const:`None` is used the encoding in the XML declaration will be used. If there is no XML declaration, UTF-8 will be used. ``xhtml`` : int With the parameter ``xhtml`` you can specify if you want HTML output: HTML (``xhtml==0``) Elements with a empty content model will be published as ``<foo>``. HTML browser compatible XML (``xhtml==1``) Elements with an empty content model will be published as ``<foo />`` and others that just happen to be empty as ``<foo></foo>``. This is the default. Pure XML (``xhtml==2``) All empty elements will be published as ``<foo/>``. ``validate`` : bool Specifies whether validation should be done before publishing. ``prefixes`` : mapping A dictionary that specifies which namespace prefixes should be used for publishing. Keys in the dictionary are either namespace names or objects that have an ``xmlns`` attribute which is the namespace name. Values can be: :const:`False` Treat elements in this namespace as if they are not in any namespace (if global attributes from this namespace are encountered, a non-empty prefix will be used nonetheless). :const:`None` Treat the namespace as the default namespaces (i.e. use unprefixed element names). Global attributes will again result in a non-empty prefix. :const:`True` The publisher uses a unique non-empty prefix for this namespace. A string Use this prefix for the namespace. ``prefixdefault`` : string or :const:`None` If an element or attribute is encountered whose namespace name is not in ``prefixes`` ``prefixdefault`` is used as the fallback. ``hidexmlns`` : list or set ``hidexmlns`` can be a list or set that contains namespace names for which no ``xmlns`` attributes should be published. (This can be used to hide the namespace declarations for e.g. Java taglibs.) ``showxmlns`` : list or set ``showxmlns`` can be a list or set that contains namespace names for which ``xmlns`` attributes *will* be published, even if there are no elements from this namespace in the tree. """ self.base = None self.allowschemerelurls = False self.encoding = encoding self.encoder = None self.xhtml = xhtml self.validate = validate self.prefixes = {nsname(xmlns): prefix for (xmlns, prefix) in prefixes.items()} self.prefixdefault = prefixdefault self.hidexmlns = {nsname(xmlns) for xmlns in hidexmlns} self.showxmlns = {nsname(xmlns) for xmlns in showxmlns} self._ns2prefix = {} self._prefix2ns = {}
[docs] def encode(self, text): """ Encode ``text`` with the encoding and error handling currently active and return the resulting byte string. """ return self.encoder.encode(text)
[docs] def encodetext(self, text): """ Encode ``test`` as text data. ``text`` must be a :class:`str` object. The publisher will apply the configured encoding, error handling and the current text filter (which escapes characters that can't appear in text data (like ``<`` etc.)) and returns the resulting :class:`str` object. """ self.encoder.errors = self.__errors[-1] result = self.encoder.encode(self.__textfilters[-1](text)) self.encoder.errors = "strict" return result
[docs] def pushtextfilter(self, filter): """ Pushes a new text filter function ontp the text filter stack. This function is responsible for escaping characters that can't appear in text data (like ``<``)). This is used to switch on escaping of ``"`` inside attribute values. """ self.__textfilters.append(filter)
[docs] def poptextfilter(self): """ Pops the current text filter function from the stack. """ self.__textfilters.pop()
[docs] def pusherrors(self, errors): """ Pushes a new error handling scheme onto the error handling stack. """ self.__errors.append(errors)
[docs] def poperrors(self): """ Pop the current error handling scheme from the error handling stack. """ self.__errors.pop()
def _newprefix(self): prefix = "ns" suffix = 2 while True: if prefix not in self._prefix2ns: return prefix prefix = f"ns{suffix}" suffix += 1
[docs] def getencoding(self): """ Return the encoding currently in effect. """ if self.encoding is not None: # The encoding has been prescribed, so this *will* be used. return self.encoding elif self.encoder is not None: # The encoding is determined by the XML declaration in the output, # so use that if it has been determined already. If the encoder hasn't # determined the encoding yet (e.g. because nothing has been output # yet) use utf-8 (which will be what the encoder eventually will decide # to use too). Note that this will not work if nothing has been output # yet, but later an XML declaration (using a different encoding) will # be output, but this shouldn't happen anyway. return self.encoder.encoding or "utf-8" return "utf-8"
[docs] def getnamespaceprefix(self, xmlns): """ Return (and register) a namespace prefix for the namespace name ``xmlns``. This honors the namespace configuration from ``self.prefixes`` and ``self.prefixdefault``. Furthermore the same prefix will be returned from now on (except when the empty prefix becomes invalid once global attributes are encountered) """ if xmlns is None: return None if xmlns == xml_xmlns: # We don't need a namespace mapping for the xml namespace prefix = "xml" else: try: prefix = self._ns2prefix[xmlns] except KeyError: # A namespace we haven't encountered yet prefix = self.prefixes.get(xmlns, self.prefixdefault) if prefix is True: prefix = self._newprefix() if prefix is not False: try: oldxmlns = self._prefix2ns[prefix] except KeyError: pass else: # If this prefix has already been used for another namespace, we need a new one if oldxmlns != xmlns: prefix = self._newprefix() self._ns2prefix[xmlns] = prefix self._prefix2ns[prefix] = xmlns return prefix
[docs] def getobjectprefix(self, obj): """ Get and register a namespace prefix for the namespace ``obj`` lives in (specified by the :attr:`xmlns` attribute of ``obj``). Similar to :meth:`getnamespaceprefix` this honors the namespace configuration from ``self.prefixes`` and ``self.prefixdefault`` (except when a global attribute requires a non-empty prefix). """ xmlns = getattr(obj, "xmlns") if xmlns is None: return None if xmlns == xml_xmlns: # We don't need a namespace mapping for the xml namespace prefix = "xml" else: emptyok = isinstance(obj, Element) # If it's e.g. a procinst assume we need a non-empty prefix try: prefix = self._ns2prefix[xmlns] except KeyError: # A namespace we haven't encountered yet prefix = self.prefixes.get(xmlns, self.prefixdefault) # global attributes always require prefixed names if prefix is True or ((prefix is None or prefix is False) and not emptyok): prefix = self._newprefix() if prefix is not False: try: oldxmlns = self._prefix2ns[prefix] except KeyError: pass else: # If this prefix has already been used for another namespace, we need a new one if oldxmlns != xmlns: prefix = self._newprefix() self._ns2prefix[xmlns] = prefix self._prefix2ns[prefix] = xmlns else: # We can't use the unprefixed names for global attributes if (prefix is None or prefix is False) and not emptyok: # Use a new one prefix = self._newprefix() self._ns2prefix[xmlns] = prefix self._prefix2ns[prefix] = xmlns return prefix
[docs] def iterbytes(self, node, base=None, allowschemerelurls=False): """ Output the node ``node``. This method is a generator that will yield the resulting XML byte sequence in fragments. URLs in ``node`` will be published relative to the base URL ``base``. Setting ``allowschemerelurls`` to true allow schema-relative URLs (e.g. ``//www.example.org/about.html``). """ if self.validate: for warning in node.validate(True, [node]): warnings.warn(warning) self._ns2prefix.clear() self._prefix2ns.clear() # iterate through every node in the tree for n in node.walknodes(Element, Attr, enterattrs=True): self.getobjectprefix(n) # Add the prefixes forced by ``self.showxmlns`` for xmlns in self.showxmlns: self.getnamespaceprefix(xmlns) # Do we have to publish xmlns attributes? self._publishxmlns = False if self._ns2prefix: # Determine if we have multiple roots if isinstance(node, Frag): count = 0 for child in node: if isinstance(child, Element) and child.xmlns not in self.hidexmlns: count += 1 if count > 1: raise MultipleRootsError() self._publishxmlns = True self.inattr = 0 self.__textfilters = [ misc.xmlescape_text ] self.__errors = [ "xmlcharrefreplace" ] self.base = url_.URL(base) self.allowschemerelurls = allowschemerelurls self.node = node self.encoder = codecs.getincrementalencoder("xml")(encoding=self.encoding) for part in self.node.publish(self): if part: yield part rest = self.encoder.encode("", True) # finish encoding and flush buffers if rest: yield rest self.inattr = 0 self.__textfilters = [ misc.xmlescape_text ] self.__errors = [ "xmlcharrefreplace" ] self._publishxmlns = False self._ns2prefix.clear() self._prefix2ns.clear() self.encoder = None
[docs] def bytes(self, node, base=None, allowschemerelurls=False): """ Return a :class:`bytes` object in XML format for the XIST node ``node``. """ return b"".join(self.iterbytes(node, base, allowschemerelurls))
[docs] def iterstring(self, node, base=None, allowschemerelurls=False): """ A generator that will produce a serialized string of ``node``. """ decoder = codecs.getincrementaldecoder("xml")(encoding=self.encoding) for part in self.iterbytes(node, base, allowschemerelurls): part = decoder.decode(part, False) if part: yield part part = decoder.decode(b"", True) if part: yield part
[docs] def string(self, node, base=None, allowschemerelurls=False): """ Return a string for ``node``. """ decoder = codecs.getdecoder("xml") result = self.bytes(node, base, allowschemerelurls) return decoder(result, encoding=self.encoding)[0]
[docs] def write(self, stream, node, base=None, allowschemerelurls=False): """ Write ``node`` to the file-like object ``stream`` (which must provide a :meth:`!write` method). """ for part in self.iterbytes(node, base, allowschemerelurls): stream.write(part)
### ### Cursor for the :meth:`Node.walk` method ###
[docs] class Cursor: """ A :class:`Cursor` object is used by the :meth:`~Node.walk` method during tree traversal. It contains information about the state of the traversal and can be used to influence which parts of the tree are traversed and in which order. Information about the state of the traversal is provided in the following attributes: .. attribute:: root The node where traversal has been started (i.e. the object for which the :meth:`~Node.walk` method has been called). .. attribute:: node The current node being traversed. .. attribute:: path A list of nodes that contains the path through the tree from the root to the current node (i.e. ``path[0] is root`` and ``path[-1] is node``). .. attribute:: index A path of indices (e.g. ``[0, 1]`` if the current node is the second child of the first child of the root). Inside attributes the index path will contain the name of the attribute (or a (attribute name, namespace name) tuple inside a global attribute). .. attribute:: event A string that specifies which event is currently handled. Possible values are: ``"enterelementnode"``, ``"leaveelementnode"``, ``"enterattrnode"``, ``"leaveattrnode"``, ``"textnode"``, ``"commentnode"``, ``"doctypenode"`` ``"procinstnode"``, ``"entitynode"`` and ``"nullnode"`` The following attributes specify which part of the tree should be traversed: .. attribute:: entercontent Should the content of an element be entered? .. attribute:: enterattrs Should the attributes of an element be entered? (Note that the attributes will always be entered before the content.) .. attribute:: enterattr Should the content of the attributes of an element be entered? (This is only relevant if :attr:`enterattrs` is true.) .. attribute:: enterelementnode Should the generator yield a ``"enterelementnode"`` event (i.e. return before entering the content or attributes of an element)? .. attribute:: leaveelementnode Should the generator yield an ``"leaveelementnode"`` event (i.e. return after entering the content or attributes of an element)? .. attribute:: enterattrnode Should the generator yield a ``"enterattrnode"`` event (i.e. return before entering the content of an attribute)? This is only relevant if :attr:`enterattrs` is true. .. attribute:: leaveattrnode Should the generator yield an ``"leaveattrnode"`` event (i.e. return after entering the content of an attribute)? This is only relevant if :attr:`enterattrs` is true. Furthermore if :attr:`enterattr` is false, the behaviour is essentially the same as for :attr:`enterattrnode`. Note that if any of these attributes is changed by the code consuming the generator, this new value will be used for the next traversal step once the generator is resumed and will be reset to its initial value (specified in the constructor) afterwards. """
[docs] def __init__(self, node, entercontent=True, enterattrs=False, enterattr=False, enterelementnode=True, leaveelementnode=False, enterattrnode=True, leaveattrnode=False): """ Create a new :class:`Cursor` object for a tree traversal rooted at the node ``node``. The arguments ``entercontent``, ``enterattrs``, ``enterattr``, ``enterelementnode``, ``leaveelementnode``, ``enterattrnode`` and ``leaveattrnode`` are used as the initial values for the attributes of the same name. (see the class docstring for info about their use). """ self.root = self.node = node self.path = [node] self.index = [] self.event = None self.entercontent = self._entercontent = entercontent self.enterattrs = self._enterattrs = enterattrs self.enterattr = self._enterattr = enterattr self.enterelementnode = self._enterelementnode = enterelementnode self.leaveelementnode = self._leaveelementnode = leaveelementnode self.enterattrnode = self._enterattrnode = enterattrnode self.leaveattrnode = self._leaveattrnode = leaveattrnode
[docs] def restore(self): """ Restore the attributes :attr:`entercontent`, :attr:`enterattrs`, :attr:`enterattr`, :attr:`enterelementnode`, :attr:`leaveelementnode`, :attr:`enterattrnode` and :attr:`leaveattrnode` to their initial value. """ self.entercontent = self._entercontent self.enterattrs = self._enterattrs self.enterattr = self._enterattr self.enterelementnode = self._enterelementnode self.leaveelementnode = self._leaveelementnode self.enterattrnode = self._enterattrnode self.leaveattrnode = self._leaveattrnode
### ### The DOM classes and their meta classes ### class _Node_Meta(type): def __new__(cls, name, bases, dict): if "register" not in dict: dict["register"] = True if "xmlname" not in dict: dict["xmlname"] = name return type.__new__(cls, name, bases, dict) def __repr__(self): return f"<class {self.__module__}:{self.__qualname__} at {id(self):#x}>" def _repr_pretty_(self, p, cycle): p.text(repr(self)) def __contains__(self, path): from ll.xist import xfind return path in xfind.IsInstanceSelector(self) def __truediv__(self, other): from ll.xist import xfind return xfind.IsInstanceSelector(self) / other def __floordiv__(self, other): from ll.xist import xfind return xfind.IsInstanceSelector(self) // other def __mul__(self, other): from ll.xist import xfind return xfind.IsInstanceSelector(self) * other def __pow__(self, other): from ll.xist import xfind return xfind.IsInstanceSelector(self) ** other def __and__(self, other): from ll.xist import xfind return xfind.IsInstanceSelector(self) & other if not hasattr(types, "UnionType"): def __or__(self, other): from ll.xist import xfind return xfind.IsInstanceSelector(self) | other def __getitem__(self, index): from ll.xist import xfind return xfind.IsInstanceSelector(self)[index] def __invert__(self): from ll.xist import xfind return ~xfind.IsInstanceSelector(self)
[docs] class Node(object, metaclass=_Node_Meta): """ Base class for nodes in the document tree. Derived classes may overwrite :meth:`convert` or :meth:`publish`. """ # location of this node in the XML file (will be hidden in derived classes, # but is specified here, so that no special tests are required. In derived # classes this will be set by the parser) startloc = None endloc = None # Subclasses relevant for parsing (i.e. Element, ProcInst and Entity) # have an additional class attribute named register. This attribute may have # two values: # :const:`False`: Don't register for parsing. # :const:`True`: Use for parsing. # If register is not set it defaults to :const:`True` Context = Context prettyindentbefore = 0 prettyindentafter = 0 def __repr__(self): return f"<{self.__module__}:{self.__qualname__} object at {id(self):#x}>" def __ne__(self, other): return not self == other xmlname = None xmlns = None def __pos__(self): threadlocalnodehandler.handler.add(self)
[docs] def __truediv__(self, other): """ Return a :class:`~ll.xist.xfind.ChildCombinator` with ``self`` as the left hand selector. """ from ll.xist import xfind return xfind.IsSelector(self) / other
[docs] def __floordiv__(self, other): """ Return a :class:`~ll.xist.xfind.DescendantCombinator` with ``self`` as the left hand selector. """ from ll.xist import xfind return xfind.IsSelector(self) // other
[docs] def __mul__(self, other): """ If ``other`` is an :class:`int`, return a :class:`Frag` with ``other`` times the node as an entry. Note that the node will not be copied, i.e. this is a "shallow :meth:`!__mul__`". If ``other`` is not an :class:`int`, treat this a CSS combinator that creates an :class:`~ll.xist.xfind.AdjacentSiblingCombinator` with ``self`` as the left hand selector. """ if isinstance(other, int): return Frag(*other*[self]) else: from ll.xist import xfind return xfind.IsSelector(self) * other
[docs] def __rmul__(self, other): """ Return a :class:`Frag` with ``other`` times the node as an entry. """ return Frag(*[self]*other)
[docs] def __pow__(self, other): """ Return a :class:`~ll.xist.xfind.GeneralSiblingCombinator` with ``self`` as the left hand selector. """ from ll.xist import xfind return xfind.IsSelector(self) ** other
[docs] def __and__(self, other): """ Return an :class:`~ll.xist.xfind.AndCombinator` with ``self`` as the left hand selector. """ from ll.xist import xfind return xfind.IsSelector(self) & other
[docs] def __or__(self, other): """ Return an :class:`~ll.xist.xfind.OrCombinator` with ``self`` as the left hand selector. """ from ll.xist import xfind return xfind.IsSelector(self) | other
[docs] def clone(self): """ Return a clone of ``self``. Compared to :meth:`deepcopy` :meth:`!clone` will create multiple instances of objects that can be found in the tree more than once. :meth:`!clone` can't clone trees that contain cycles. """ return self
[docs] def copy(self): """ Return a shallow copy of ``self``. """ return self.__copy__()
def __copy__(self): return self
[docs] def deepcopy(self): """ Return a deep copy of ``self``. """ return self.__deepcopy__()
def __deepcopy__(self, memo=None): return self
[docs] @misc.notimplemented def present(self, presenter): """ :meth:`!present` is used as a central dispatch method for the presenter classes. Normally it is not called by the user, but internally by the presenter. The user should use the appropriate presenter class directly. """
# Subclasses of :class:`Node` implement this method by calling the # appropriate ``present*`` method in the publisher (i.e. double dispatch)
[docs] def conv(self, converter=None, root=None, mode=None, stage=None, target=None, lang=None, function=None, makeaction=None, makeproject=None): """ Convenience method for calling :meth:`convert`. :meth:`!conv` will automatically set ``converter.node`` to ``self`` to remember the "document root node" for which :meth:`!conv` has been called. This means that you should not call :meth:`!conv` in any of the recursive calls, as you would loose this information. Call :meth:`convert` directly instead. """ if converter is None: converter = Converter(node=self, root=root, mode=mode, stage=stage, target=target, lang=lang, makeaction=makeaction, makeproject=makeproject) return self.convert(converter) else: converter.push(node=self, root=root, mode=mode, stage=stage, target=target, lang=lang, makeaction=makeaction, makeproject=makeproject) node = self.convert(converter) converter.pop() return node
[docs] @misc.notimplemented def convert(self, converter): """ Implementation of the conversion method. When you define your own element classes you have to overwrite this method and implement the desired conversion. This method must return an instance of :class:`Node`. It may *not* change ``self``. """
[docs] @misc.notimplemented def __str__(self): """ Return the character content of ``self`` as a string. This means that comments and processing instructions will be filtered out. For elements you'll get the element content. :meth:`!__str__` can be used everywhere where a plain string representation of the node is required. For example:: >>> from ll.xist.ns import html >>> e = html.html( ... html.head( ... html.title("The page") ... ), ... html.body( ... html.h1("The header"), ... html.p("The content", class_="content") ... ) ... ) >>> print(e) The pageThe headerThe content """ pass
[docs] def __int__(self): """ Convert the character content of ``self`` to an :class:`int`. """ return int(str(self))
[docs] def asFloat(self, decimal=".", ignore=""): """ Convert the character content of ``self`` to an :class:`float`. ``decimal`` specifies which decimal separator is used in the value (e.g. ``"."`` (the default) or ``","``). ``ignore`` specifies which characters will be ignored. """ s = str(self) for c in ignore: s = s.replace(c, "") if decimal != ".": s = s.replace(decimal, ".") return float(s)
[docs] def __float__(self): """ Convert the character content of ``self`` to an :class:`float`. """ return self.asFloat()
[docs] def __complex__(self): """ Convert the character content of ``self`` to an :class:`complex`. """ return complex(str(self))
[docs] def parsed(self, parser, event): """ This method will be called by the parser ``parser`` once after ``self`` is created by the parser (This is used e.g. by :class:`URLAttr` to incorporate the base URL into the attribute). ``event`` is the parser event that initiated the call. """
[docs] def validate(self, recursive=True, path=None): """ This method will be called when parsing or publishing to check whether ``self`` is valid. If ``self`` is found to be invalid a warning should be issued through the Python warning framework. """ yield from ()
[docs] @misc.notimplemented def publish(self, publisher): """ Generate unicode strings for the node. ``publisher`` must be an instance of :class:`Publisher`. The encoding and xhtml specification are taken from the ``publisher``. """
[docs] def iterbytes(self, base=None, allowschemerelurls=False, publisher=None, **publishargs): """ A generator that will produce this node as a serialized byte string. (i.e. it will output what the method :meth:`bytes` outputs, but incremetally). For the possible parameters see the :class:`Publisher` constructor and its :meth:`~Publisher.iterbytes` method. """ if publisher is None: publisher = Publisher(**publishargs) return publisher.iterbytes(self, base, allowschemerelurls) # return a generator-iterator
[docs] def bytes(self, base=None, allowschemerelurls=False, publisher=None, **publishargs): """ Return ``self`` as a serialized bytes object. For the possible parameters see the :class:`Publisher` constructor. For example:: >>> from ll.xist.ns import html >>> e = html.div( ... html.h1("The header"), ... html.p("The content", class_="content") ... ) >>> print(e.bytes()) b'<div><h1>The header</h1><p class="content">The content</p></div>' """ if publisher is None: publisher = Publisher(**publishargs) return publisher.bytes(self, base, allowschemerelurls)
[docs] def iterstring(self, base=None, allowschemerelurls=False, publisher=None, **publishargs): """ A generator that will produce a serialized string of ``self`` (i.e. it will output what the method :meth:`string` outputs, but incremetally). For the possible parameters see the :class:`Publisher` constructor. """ if publisher is None: publisher = Publisher(**publishargs) return publisher.iterstring(self, base, allowschemerelurls) # return a generator-iterator
[docs] def string(self, base=None, allowschemerelurls=False, publisher=None, **publishargs): """ Return a serialized (unicode) string for ``self``. For the possible parameters see the :class:`Publisher` constructor. For example:: >>> from ll.xist.ns import html >>> e = html.div( ... html.h1("The header"), ... html.p("The content", class_="content") ... ) >>> print(e.string()) <div><h1>The header</h1><p class="content">The content</p></div> """ if publisher is None: publisher = Publisher(**publishargs) return publisher.string(self, base, allowschemerelurls)
[docs] def write(self, stream, base=None, allowschemerelurls=False, publisher=None, **publishargs): """ Write ``self`` to the file-like object ``stream`` (which must provide a :meth:`!write` method). For the rest of the parameters see the :class:`Publisher` constructor. """ if publisher is None: publisher = Publisher(**publishargs) return publisher.write(stream, self, base, allowschemerelurls)
def _walk(self, cursor): yield cursor cursor.restore()
[docs] def walk(self, *selectors, entercontent=True, enterattrs=False, enterattr=False, enterelementnode=True, leaveelementnode=False, enterattrnode=True, leaveattrnode=False): """ Return an iterator for traversing the tree rooted at ``self``. Each item produced by the iterator is a :class:`Cursor` object. It contains information about the state of the traversal and can be used to influence which parts of the tree are traversed and in which order. ``selectors`` is used for filtering which nodes to return from the iterator. The arguments ``entercontent``, ``enterattrs``, ``enterattr``, ``enterelementnode``, ``leaveelementnode``, ``enterattrnode`` and ``leaveattrnode`` specify how the tree should be traversed. For more information see the :class:`Cursor` class. Note that the :class:`Cursor` object is reused by :meth:`!walk`, so you can't rely on any attributes remaining the same across calls to :func:`next`. The following example shows how to extract the text of an HTML ``label`` element for an input element with a specified HTML id:: from ll import misc from ll.xist import xsc, xfind from ll.xist.ns import html def label(doc, id): label = misc.first(doc.walk(xfind.attrhasvalue("for", id)), None) if label is None: return None texts = [] for c in label.node.walk(html.textarea, xsc.Text): if isinstance(c.node, html.textarea): c.entercontent = False else: texts.append(str(c.node)) return " ".join("".join(texts).split()).strip() doc = html.div( html.p( html.label( "Input your text here: ", html.textarea("Default value", rows=20, cols=80, id="foo"), " (just a test)", for_="foo", ) ) ) print(repr(label(doc, "foo"))) This will output:: 'Input your text here: (just a test)' """ cursor = Cursor(self, entercontent=entercontent, enterattrs=enterattrs, enterattr=enterattr, enterelementnode=enterelementnode, leaveelementnode=leaveelementnode, enterattrnode=enterattrnode, leaveattrnode=leaveattrnode) if selectors: from ll.xist import xfind return xfind.filter(self._walk(cursor), *selectors) else: return self._walk(cursor)
[docs] def walknodes(self, *selectors, entercontent=True, enterattrs=False, enterattr=False, enterelementnode=True, leaveelementnode=False, enterattrnode=True, leaveattrnode=False): """ Return an iterator for traversing the tree. The arguments have the same meaning as those for :meth:`walk`. The items produced by the iterator are the nodes themselves. """ cursor = Cursor(self, entercontent=entercontent, enterattrs=enterattrs, enterattr=enterattr, enterelementnode=enterelementnode, leaveelementnode=leaveelementnode, enterattrnode=enterattrnode, leaveattrnode=leaveattrnode) from ll.xist import xfind selector = xfind.selector(*selectors) return misc.Iterator(c.path[-1] for c in self._walk(cursor) if c.path in selector)
[docs] def walkpaths(self, *selectors, entercontent=True, enterattrs=False, enterattr=False, enterelementnode=True, leaveelementnode=False, enterattrnode=True, leaveattrnode=False): """ Return an iterator for traversing the tree. The arguments have the same meaning as those for :meth:`walk`. The items produced by the iterator are copies of the path. """ cursor = Cursor(self, entercontent=entercontent, enterattrs=enterattrs, enterattr=enterattr, enterelementnode=enterelementnode, leaveelementnode=leaveelementnode, enterattrnode=enterattrnode, leaveattrnode=leaveattrnode) from ll.xist import xfind selector = xfind.selector(*selectors) return misc.Iterator(c.path[:] for c in self._walk(cursor) if c.path in selector)
[docs] def compacted(self): """ Return a version of ``self``, where textnodes or character references that contain only linefeeds are removed, i.e. potentially useless whitespace is removed. """ return self
def _decoratenode(self, node): # Decorate the :class:`Node` object ``node`` with the same location # information as ``self``. node.startloc = self.startloc node.endloc = self.endloc return node
[docs] def mapped(self, function, converter=None, **converterargs): """ Return the node mapped through the function ``function``. This call works recursively (for :class:`Frag` and :class:`Element`). When you want an unmodified node you simply can return ``self``. :meth:`!mapped` will make a copy of it and fill the content recursively. Note that element attributes will not be mapped. When you return a different node from ``function`` this node will be incorporated into the result as-is. """ if converter is None: converter = Converter(**converterargs) node = function(self, converter) assert isinstance(node, Node), f"the mapped method returned the illegal object {node!r} (type {type(node)!r}) when mapping {self!r}" return node
[docs] def normalized(self): """ Return a normalized version of ``self``, which means that consecutive :class:`Text` nodes are merged. """ return self
[docs] def pretty(self, level=0, indent="\t"): """ Return a prettyfied version of ``self``, i.e. one with properly nested and indented tags (as far as possible). If an element has mixed content (i.e. :class:`Text` and non-:class:`Text` nodes) the content will be returned as is. Note that whitespace will prevent pretty printing too, so you might want to call :meth:`normalized` and :meth:`compacted` before calling :meth:`pretty` to remove whitespace. """ if level: return Frag(indent*level, self) else: return self
[docs] class CharacterData(Node): """ Base class for XML character data (:class:`Text`, :class:`ProcInst`, :class:`Comment` and :class:`DocType`). (Provides nearly the same functionality as :class:`UserString`, but omits a few methods.) """ __slots__ = ("_content",) def __init__(self, *content): self._content = "".join(str(x) for x in content) def __repr__(self): if self.startloc is not None: loc = f" location={str(self.startloc)!r}" else: loc = "" return f"<{self.__class__.__module__}.{self.__class__.__qualname__} content={self.content!r}{loc} at {id(self):#x}>" def _repr_pretty_(self, p, cycle): with p.group(4, f"<{self.__class__.__module__}.{self.__class__.__qualname__}", ">"): p.breakable() p.text(f"content={self.content!r}") if self.startloc is not None: p.breakable() p.text(f"location={str(self.startloc)!r}") p.breakable() p.text(f"at {id(self):#x}") def __getstate__(self): return self._content def __setstate__(self, content): self._content = content class content(misc.propclass): """ The text content of the node as a :class:`str` object. """ def __get__(self): return self._content def __hash__(self): return self._content.__hash__() def __eq__(self, other): if self.__class__ is other.__class__: return self._content == other._content return NotImplemented def __lt__(self, other): if not issubclass(self.__class__, other.__class__) and not issubclass(other.__class__, self.__class__): raise TypeError("unorderable types") return self._content < other._content def __le__(self, other): if not issubclass(self.__class__, other.__class__) and not issubclass(other.__class__, self.__class__): raise TypeError("unorderable types") return self._content <= other._content def __gt__(self, other): if not issubclass(self.__class__, other.__class__) and not issubclass(other.__class__, self.__class__): raise TypeError("unorderable types") return self._content > other._content def __ge__(self, other): if not issubclass(self.__class__, other.__class__) and not issubclass(other.__class__, self.__class__): raise TypeError("unorderable types") return self._content >= other._content def __len__(self): return self._content.__len__() def __getitem__(self, index): return self.__class__(self._content.__getitem__(index)) def __add__(self, other): return self.__class__(self._content + other) def __radd__(self, other): return self.__class__(str(other) + self._content) def __mul__(self, n): return self.__class__(n * self._content) def __rmul__(self, n): return self.__class__(n * self._content) def capitalize(self): return self.__class__(self._content.capitalize()) def center(self, width): return self.__class__(self._content.center(width)) def count(self, sub, start=0, end=sys.maxsize): return self._content.count(sub, start, end) def endswith(self, suffix, start=0, end=sys.maxsize): return self._content.endswith(suffix, start, end) def index(self, sub, start=0, end=sys.maxsize): return self._content.index(sub, start, end) def isalpha(self): return self._content.isalpha() def isalnum(self): return self._content.isalnum() def isdecimal(self): return self._content.isdecimal() def isdigit(self): return self._content.isdigit() def islower(self): return self._content.islower() def isnumeric(self): return self._content.isnumeric() def isspace(self): return self._content.isspace() def istitle(self): return self._content.istitle() def isupper(self): return self._content.isupper() def join(self, frag): return frag.withsep(self) def ljust(self, width, fill=" "): return self.__class__(self._content.ljust(width, fill)) def lower(self): return self.__class__(self._content.lower()) def lstrip(self, chars=None): return self.__class__(self._content.lstrip(chars)) def replace(self, old, new, maxsplit=-1): return self.__class__(self._content.replace(old, new, maxsplit)) def rjust(self, width, fill=" "): return self.__class__(self._content.rjust(width, fill)) def rstrip(self, chars=None): return self.__class__(self._content.rstrip(chars)) def rfind(self, sub, start=0, end=sys.maxsize): return self._content.rfind(sub, start, end) def rindex(self, sub, start=0, end=sys.maxsize): return self._content.rindex(sub, start, end) def split(self, sep=None, maxsplit=-1): return Frag(self._content.split(sep, maxsplit)) def splitlines(self, keepends=0): return Frag(self._content.splitlines(keepends)) def startswith(self, prefix, start=0, end=sys.maxsize): return self._content.startswith(prefix, start, end) def strip(self, chars=None): return self.__class__(self._content.strip(chars)) def swapcase(self): return self.__class__(self._content.swapcase()) def title(self): return self.__class__(self._content.title()) def translate(self, table): return self.__class__(self._content.translate(table)) def upper(self): return self.__class__(self._content.upper())
[docs] class Text(CharacterData): """ A text node. The characters ``<``, ``>``, ``&`` (and ``"`` inside attributes) will be "escaped" with the appropriate character entities when this node is published. """ def __str__(self): return self._content def _str(self): return "text" def convert(self, converter): return self def publish(self, publisher): yield publisher.encodetext(self._content) def present(self, presenter): return presenter.presentText(self) # return a generator-iterator def compacted(self): return Null if self.content.isspace() else self def pretty(self, level=0, indent="\t"): return self def _walk(self, cursor): cursor.event = "textnode" yield cursor cursor.restore()
[docs] class Frag(Node, list): """ A fragment contains a list of nodes and can be used for dynamically constructing content. The attribute :attr:`content` of an :class:`Element` is a :class:`Frag`. """ def __init__(self, *content): list.__init__(self) for child in content: child = tonode(child) if isinstance(child, Frag): list.extend(self, child) elif child is not Null: list.append(self, child) def __repr__(self): l = len(self) if l == 0: childcount = "no children" elif l == 1: childcount = "1 child" else: childcount = f"{l:,} children" loc = f" location={str(self.startloc)!r}" if self.startloc is not None else "" return f"<{self.__class__.__module__}.{self.__class__.__qualname__} object ({childcount}){loc} at {id(self):#x}>" def _repr_pretty_(self, p, cycle): with p.group(4, f"<{self.__class__.__module__}.{self.__class__.__qualname__}", ">"): if self.startloc is not None: p.breakable() p.text(f"location={str(self.startloc)!r}") if cycle: p.text("...") for child in self: p.breakable() p.pretty(child) p.breakable() p.text(f"at {id(self):#x}") def __str__(self): return "".join(str(child) for child in self) def _str(self): return "fragment" def __enter__(self): return threadlocalnodehandler.handler.enter(self) def __exit__(self, type, value, traceback): threadlocalnodehandler.handler.exit() def __call__(self, *content): self.extend(content) return self def _create(self): """ internal helper that is used to create an empty clone of ``self``. """ # This is overwritten by :class:`Attr` to insure that attributes don't # get initialized with the default value when used in various methods # that create new attributes. return self.__class__()
[docs] def clear(self): """ Make ``self`` empty. """ del self[:]
def convert(self, converter): node = self._create() for child in self: convertedchild = child.convert(converter) assert isinstance(convertedchild, Node), f"the convert method returned the illegal object {convertedchild!r} (type {type(convertedchild)!r}) when converting {self!r}" node.append(convertedchild) return self._decoratenode(node) def clone(self): node = self._create() list.extend(node, (child.clone() for child in self)) return self._decoratenode(node)
[docs] def __copy__(self): """ helper for the :mod:`copy` module. """ node = self._create() list.extend(node, self) return self._decoratenode(node)
[docs] def __deepcopy__(self, memo=None): """ helper for the :mod:`copy` module. """ node = self._create() if memo is None: memo = {} memo[id(self)] = node list.extend(node, (copy.deepcopy(child, memo) for child in self)) return self._decoratenode(node)
def present(self, presenter): return presenter.presentFrag(self) # return a generator-iterator def __eq__(self, other): if self.__class__ is other.__class__: return list.__eq__(self, other) return NotImplemented def validate(self, recursive=True, path=None): if path is None: path = [] path.append(None) for child in self: path[-1] = child yield from child.validate(recursive, path) path.pop() def publish(self, publisher): for child in self: yield from child.publish(publisher)
[docs] def __getitem__(self, index): """ Return the ``index``'th node of the content of the fragment. If ``index`` is a list :meth:`__getitem__` will work recursively. If ``index`` is an empty list, ``self`` will be returned. :meth:`!__getitem__` also supports selectors (i.e. :class:`xfind.Selector` objects). """ if isinstance(index, list): node = self for subindex in index: node = node[subindex] return node elif isinstance(index, int): return list.__getitem__(self, index) elif isinstance(index, slice): node = self._create() list.extend(node, list.__getitem__(self, index)) return node else: from ll.xist import xfind def iterate(selector): path = [self, None] for child in self: path[-1] = child if path in selector: yield child return misc.Iterator(iterate(xfind.selector(index)))
[docs] def __setitem__(self, index, value): """ Allows you to replace the ``index``'th content node of the fragment with the new value ``value`` (which will be converted to a node). If ``index`` is a list :meth:`__setitem__` will be applied to the innermost index after traversing the rest of ``index`` recursively. If ``index`` is an empty list, an exception will be raised. :meth:`!__setitem__` also supports selectors (i.e. :class:`xfind.Selector` objects). """ if isinstance(index, list): if not index: raise ValueError("can't replace self") node = self for subindex in index[:-1]: node = node[subindex] node[index[-1]] = value elif isinstance(index, int): value = Frag(value) if index == -1: l = len(self) list.__setitem__(self, slice(l-1, l), value) else: list.__setitem__(self, slice(index, index+1), value) elif isinstance(index, slice): list.__setitem__(self, index, Frag(value)) else: from ll.xist import xfind selector = xfind.selector(index) value = Frag(value) newcontent = [] path = [self, None] for child in self: path[-1] = child if path in selector: newcontent.extend(value) else: newcontent.append(child) list.__setitem__(self, slice(0, len(self)), newcontent)
[docs] def __delitem__(self, index): """ Remove the ``index``'th content node from the fragment. If ``index`` is a list, the innermost index will be deleted, after traversing the rest of ``index`` recursively. If ``index`` is an empty list, an exception will be raised. Anything except :class:`list`, :class:`int` and :class:`slice` objects will be turned into a selector (i.e. an :class:`xfind.Selector` objects) and any child node matching this selector will be deleted from ``self``. """ if isinstance(index, list): if not index: raise ValueError("can't delete self") node = self for subindex in index[:-1]: node = node[subindex] del node[index[-1]] elif isinstance(index, (int, slice)): list.__delitem__(self, index) else: from ll.xist import xfind selector = xfind.selector(index) list.__setitem__(self, slice(0, len(self)), [child for child in self if [self, child] not in selector])
[docs] def __mul__(self, factor): """ Return a :class:`Frag` with ``factor`` times the content of ``self``. Note that no copies of the content will be generated, so this is a "shallow :meth:`!__mul__`". """ node = self._create() list.extend(node, list.__mul__(self, factor)) return node
__rmul__ = __mul__ def __iadd__(self, other): self.extend(other) return self # no need to implement __len__ or __bool__
[docs] def append(self, *others): """ Append every item in ``others`` to ``self``. """ for other in others: other = tonode(other) if isinstance(other, Frag): list.extend(self, other) elif other is not Null: list.append(self, other)
[docs] def extend(self, items): """ Append all items from the sequence ``items`` to ``self``. """ self.append(items)
[docs] def insert(self, index, *others): """ Insert all items in ``others`` at the position ``index``. (this is the same as ``self[index:index] = others``) """ other = Frag(*others) list.__setitem__(self, slice(index, index), other)
def compacted(self): node = self._create() for child in self: compactedchild = child.compacted() assert isinstance(compactedchild, Node), f"the compact method returned the illegal object {compactedchild!r} (type {type(compactedchild)!r}) when compacting {child!r}" if compactedchild is not Null: list.append(node, compactedchild) return self._decoratenode(node)
[docs] def withsep(self, separator, clone=False): """ Return a version of ``self`` with a separator node between the nodes of ``self``. if ``clone`` is false, one node will be inserted several times, if ``clone`` is true, clones of this node will be used. """ node = self._create() newseparator = tonode(separator) for child in self: if len(node): node.append(newseparator) if clone: newseparator = newseparator.clone() node.append(child) return node
[docs] def reversed(self): """ Return a reversed version of the ``self``. """ node = list(self) node.reverse() return self.__class__(node)
[docs] def filtered(self, function): """ Return a filtered version of the ``self``, i.e. a copy of ``self``, where only content nodes for which ``function`` returns true will be copied. """ node = self._create() list.extend(node, (child for child in self if function(child))) return node
[docs] def shuffled(self): """ Return a shuffled version of ``self``, i.e. a copy of ``self`` where the content nodes are randomly reshuffled. """ content = list(self) node = self._create() while content: index = random.randrange(len(content)) list.append(node, content[index]) del content[index] return node
def mapped(self, function, converter=None, **converterargs): if converter is None: converter = Converter(**converterargs) node = function(self, converter) assert isinstance(node, Node), f"the mapped method returned the illegal object {node!r} (type {type(node)!r}) when mapping {self!r}" if node is self: node = self._create() for child in self: node.append(child.mapped(function, converter)) return node def normalized(self): node = self._create() lasttypeOK = False for child in self: normalizedchild = child.normalized() thistypeOK = isinstance(normalizedchild, Text) if thistypeOK and lasttypeOK: node[-1] += normalizedchild else: list.append(node, normalizedchild) lasttypeOK = thistypeOK return node def pretty(self, level=0, indent="\t"): node = self._create() for (i, child) in enumerate(self): if i: node.append("\n") level += child.prettyindentbefore node.append(child.pretty(level, indent)) level += child.prettyindentafter return node def _walk(self, cursor): # ``Frag``\s don't get tested cursor.path.append(None) cursor.index.append(-1) for child in self: cursor.path[-1] = cursor.node = child cursor.index[-1] += 1 yield from child._walk(cursor) cursor.path.pop() cursor.index.pop() cursor.node = cursor.path[-1]
[docs] class Comment(CharacterData): """ An XML comment. """ def __str__(self): return "" def _str(self): return "comment" def convert(self, converter): return self def present(self, presenter): return presenter.presentComment(self) # return a generator-iterator def publish(self, publisher): if not publisher.inattr: content = self.content if "--" in content or content.endswith("-"): warnings.warn(IllegalCommentContentWarning(self)) yield publisher.encode("<!--") yield publisher.encode(content) yield publisher.encode("-->") def _walk(self, cursor): cursor.event = "commentnode" yield cursor cursor.restore()
class _DocType_Meta(type(Node)): def __repr__(self): return f"<doctype class {self.__module__}:{self.__name__} at {id(self):#x}>"
[docs] class DocType(CharacterData, metaclass=_DocType_Meta): """ An XML document type declaration. """ def convert(self, converter): return self def __str__(self): return "" def present(self, presenter): return presenter.presentDocType(self) # return a generator-iterator def publish(self, publisher): if not publisher.inattr: yield publisher.encode("<!DOCTYPE ") yield publisher.encode(self.content) yield publisher.encode(">") def _walk(self, cursor): cursor.event = "doctypenode" yield cursor cursor.restore()
class _ProcInst_Meta(type(Node)): def __new__(cls, name, bases, dict): self = super(_ProcInst_Meta, cls).__new__(cls, name, bases, dict) if dict.get("register") is not None: # check here as the pool isn't defined yet threadlocalpool.pool.register(self) return self def __repr__(self): if self.xmlname != self.__name__: xmlname = f" xmlname={self.xmlname!r}" else: xmlname = "" return f"<procinst class {self.__module__}:{self.__name__}{xmlname} at {id(self):#x}>"
[docs] class ProcInst(CharacterData, metaclass=_ProcInst_Meta): """ Base class for processing instructions. Processing instructions for specific targets must be implemented as subclasses of :class:`ProcInst`. """ register = None def __repr__(self): if self.xmlname != self.__class__.__name__: xmlname = f" xmlname={self.xmlname!r}" else: xmlname = "" if self.startloc is not None: loc = f" location={str(self.startloc)!r}" else: loc = "" return f"<procinst {self.__class__.__module__}.{self.__class__.__qualname__}{xmlname} content={self.content!r}{loc} at {id(self):#x}>" def _repr_pretty_(self, p, cycle): with p.group(4, f"<procinst {self.__class__.__module__}.{self.__class__.__qualname__}", ">"): if self.xmlname != self.__class__.__name__: p.breakable() p.text(f"xmlname={self.xmlname!r}") p.breakable() p.text(f"content={self.content!r}") if self.startloc is not None: p.breakable() p.text(f"location={str(self.startloc)!r}") p.breakable() p.text(f"at {id(self):#x}") def __str__(self): return "" def _str(self): return f"processing instruction {self.xmlname}" def __eq__(self, other): if isinstance(other, ProcInst): return self.xmlname == other.xmlname and self._content == other._content return NotImplemented def validate(self, recursive=True, path=None): if self.__class__ is ProcInst: yield UndeclaredNodeWarning(self) def convert(self, converter): return self def present(self, presenter): return presenter.presentProcInst(self) # return a generator-iterator def publish(self, publisher): content = self.content if "?>" in content: raise IllegalProcInstFormatError(self) yield publisher.encode(f"<?{self.xmlname} {content}?>") def _walk(self, cursor): cursor.event = "procinstnode" yield cursor cursor.restore() def __mul__(self, n): return Node.__mul__(self, n) # don't inherit ``CharacterData.__mul__`` def __rmul__(self, n): return Node.__rmul__(self, n) # don't inherit ``CharacterData.__rmul__``
class Null(CharacterData): """ node that does not contain anything. """ def __repr__(self): return "ll.xist.xsc.Null" def _repr_pretty_(self, p, cycle): p.text(f"<{self.__class__.__module__}.{self.__class__.__qualname__} at {id(self):#x}>") def __str__(self): return "" def _str(self): return "null" def convert(self, converter): return self def publish(self, publisher): if False: yield "" def present(self, presenter): return presenter.presentNull(self) # return a generator-iterator def _walk(self, cursor): cursor.event = "nullnode" yield cursor cursor.restore() Null = Null() # Singleton, the Python way class _Attr_Meta(type(Frag)): def __new__(cls, name, bases, dict): # can be overwritten in subclasses, to specify that this attributes is required if "required" in dict: dict["required"] = bool(dict["required"]) # convert the default to a Frag if "default" in dict: dict["default"] = Frag(dict["default"]) # convert the entries in values to strings if "values" in dict: values = dict["values"] if values is not None: dict["values"] = tuple(str(entry) for entry in values) self = super(_Attr_Meta, cls).__new__(cls, name, bases, dict) if self.xmlns is not None: threadlocalpool.pool.register(self) return self def __repr__(self): if self.xmlname != self.__name__: xmlname = f" xmlname={self.xmlname!r}" else: xmlname = "" if self.xmlns is not None: isglobal = "global " xmlns = f" xmlns={self.xmlns!r}" else: isglobal = "" xmlns = "" return f"<{isglobal}attribute class {self.__module__}:{self.__qualname__}{xmlname}{xmlns} at {id(self):#x}>"
[docs] class Attr(Frag, metaclass=_Attr_Meta): """ Base class of all attribute classes. The content of an attribute may be any other XIST node. This is different from a normal DOM, where only text and character references are allowed. The reason for this is to allow dynamic content (implemented as elements or processing instructions) to be put into attributes. Of course, this dynamic content when finally converted to HTML should normally result in a fragment consisting only of text and character references. But note that it is allowed to have elements and processing instructions inside of attributes even when publishing. Processing instructions will be published as is and for elements their content will be published:: >>> from ll.xist.ns import html, php >>> node = html.img( ... src=php.php("echo 'eggs.gif'"), ... alt=html.abbr( ... "EGGS", ... title="Extensible Graphics Generation System", ... lang="en" ... ) ... ) >>> print(node.string()) <img alt="EGGS" src="<?php echo 'eggs.gif'?>" /> """ required = False default = None values = None def __repr__(self): if self.xmlname != self.__class__.__name__: xmlname = f" xmlname={self.xmlname!r}" else: xmlname = "" if self.xmlns is not None: isglobal = "global " xmlns = f" xmlns={self.xmlns!r}" else: isglobal = "" xmlns = "" l = len(self) if l == 0: childcount = "no children" elif l == 1: childcount = "1 child" else: childcount = f"{l:,} children" loc = f" location={str(self.startloc)!r}" if self.startloc is not None else "" return f"<{isglobal}attribute {self.__class__.__module__}.{self.__class__.__qualname__}{xmlns}{xmlname} ({childcount}){loc} at {id(self):#x}>" def _repr_pretty_(self, p, cycle): isglobal = "global " if self.xmlns is not None else "" with p.group(4, f"<{isglobal}attribute {self.__class__.__module__}.{self.__class__.__qualname__}", ">"): if self.xmlns is not None: p.breakable() p.text(f"xmlns={self.xmlns!r}") if self.xmlname != self.__class__.__name__: p.breakable() p.text(f"xmlname={self.xmlname!r}") if self.startloc is not None: p.breakable() p.text(f"location={str(self.startloc)!r}") if cycle: p.breakable() p.text("...") else: for child in self: p.breakable() p.pretty(child) p.breakable() p.text(f"at {id(self):#x}") def _str(self): if self.xmlns is not None: return f"attribute {{{self.xmlns}}}{self.xmlname}" else: return f"attribute {self.xmlname}" def _create(self): node = self.__class__() if self.__class__ is Attr: node.xmlname = self.xmlname node.xmlns = self.xmlns return node
[docs] def isfancy(self): """ Return whether ``self`` contains nodes other than :class:`Text`. """ for child in self: if not isinstance(child, Text): return True return False
def present(self, presenter): return presenter.presentAttr(self) # return a generator-iterator
[docs] def validate(self, recursive=True, path=None): """ Check whether ``self`` has an allowed value, i.e. one that is specified in the class attribute ``values``. If the value is not allowed a warning will be issued through the Python warning framework. If ``self`` is "fancy" (i.e. contains non-:class:`Text` nodes), no check will be done. """ if path is None: path = [] values = self.__class__.values if self and isinstance(values, tuple) and not self.isfancy(): value = str(self) if value not in values: yield IllegalAttrValueWarning(self) yield from Frag.validate(self, True, path)
def _publishname(self, publisher): if self.xmlns is not None: prefix = publisher._ns2prefix.get(self.xmlns) if self.xmlns != xml_xmlns else "xml" if prefix is not None: return f"{prefix}:{self.xmlname}" return self.xmlname def _publishattrvalue(self, publisher): # Internal helper that is used to publish the attribute value # (can be overwritten in subclass (done by e.g. :class:`StyleAttr` and # :class:`URLAttr`) return Frag.publish(self, publisher) def publish(self, publisher): if len(self) == 1 and isinstance(self[0], AttrElement): yield from self[0].publishattr(publisher, self) else: publisher.inattr += 1 yield publisher.encode(f' {self._publishname(publisher)}="') publisher.pushtextfilter(misc.xmlescape_attr) yield from self._publishattrvalue(publisher) publisher.poptextfilter() yield publisher.encode('"') publisher.inattr -= 1 def pretty(self, level=0, indent="\t"): return self.clone() def _walk(self, cursor): if cursor.enterattrnode: cursor.event = "enterattrnode" yield cursor # The user may have altered ``cursor`` attributes outside the generator enterattr = cursor.enterattr leaveattrnode = cursor.leaveattrnode cursor.restore() else: # These are the initial options enterattr = cursor.enterattr leaveattrnode = cursor.leaveattrnode if enterattr: yield from Frag._walk(self, cursor) if leaveattrnode: cursor.event = "leaveattrnode" yield cursor cursor.restore()
[docs] class TextAttr(Attr): """ Attribute class that is used for normal text attributes. """
[docs] class IDAttr(Attr): """ Attribute used for ids. """
[docs] class NumberAttr(Attr): """ Attribute class that is used for when the attribute value may be any kind of number. """
[docs] class IntAttr(NumberAttr): """ Attribute class that is used when the attribute value may be an integer. """
[docs] class FloatAttr(NumberAttr): """ Attribute class that is used when the attribute value may be a floating point value. """
[docs] class BoolAttr(Attr): """ Attribute class that is used for boolean attributes. When publishing the value will always be the attribute name, regardless of the real value. """ # We can't simply overwrite :meth:`_publishattrvalue`, because for ``xhtml==0`` we don't output a "proper" attribute def publish(self, publisher): if len(self) == 1 and isinstance(self[0], AttrElement): yield from self[0].publishboolattr(publisher, self) else: publisher.inattr += 1 name = self._publishname(publisher) yield publisher.encode(f" {name}") if publisher.xhtml > 0: yield publisher.encode('="') publisher.pushtextfilter(misc.xmlescape) yield publisher.encode(name) publisher.poptextfilter() yield publisher.encode('"') publisher.inattr -= 1
[docs] class ColorAttr(Attr): """ Attribute class that is used for a color attributes. """
[docs] class StyleAttr(Attr): """ Attribute class that is used for CSS style attributes. """ def _transform(self, replacer): from ll.xist import css stylesheet = cssutils.parseString(f"a{{{self}}}") css.replaceurls(stylesheet, replacer) return stylesheet.cssRules[0].style.getCssText(separator=" ")
[docs] def replaceurls(self, replacer): """ Replace each URL in the style. Each URL will be passed to the callable ``replacer`` and replaced with the returned value. """ self[:] = self._transform(replacer)
def parsed(self, parser, event): if event == "leaveattrns" and not self.isfancy() and parser.base is not None: def prependbase(u): return parser.base/u self.replaceurls(prependbase) def _publishattrvalue(self, publisher): if not self.isfancy() and publisher.base is not None: def reltobase(u): return u.relative(publisher.base, publisher.allowschemerelurls) yield from Frag(self._transform(reltobase)).publish(publisher) else: yield from super(StyleAttr, self)._publishattrvalue(publisher)
[docs] def urls(self, base=None): """ Return a list of all the URLs (as :class:`URL` objects) found in the style attribute. """ from ll.xist import css urls = [] def collect(u): urls.append(u) return u s = cssutils.parseString(f"a{{{self}}}") css.replaceurls(s, collect) return urls
[docs] class URLAttr(Attr): """ Attribute class that is used for URLs. See the module :mod:`ll.url` for more information about URL handling. """ def parsed(self, parser, event): if event == "leaveattrns" and not self.isfancy() and parser.base is not None: self[:] = (url_.URL(parser.base/str(self)),) def _publishattrvalue(self, publisher): if self.isfancy(): return Attr._publishattrvalue(self, publisher) else: new = Attr(url_.URL(str(self)).relative(publisher.base, publisher.allowschemerelurls)) return new._publishattrvalue(publisher)
[docs] def asURL(self): """ Return ``self`` as a :class:`URL` object (note that non-:class:`Text` content will be filtered out). """ return url_.URL(Attr.__str__(self))
[docs] def forInput(self, root=None): """ return a :class:`URL` pointing to the real location of the referenced resource. ``root`` must be the root URL relative to which ``self`` will be interpreted and usually comes from the ``root`` attribute of the ``converter`` argument in :meth:`convert`. """ u = self.asURL() if u.scheme == "root": u.scheme = None u = url_.URL(root)/u return u
[docs] def imagesize(self, root=None): """ Return the size of an image as a tuple. """ return self.openread(root).imagesize
[docs] def contentlength(self, root=None): """ Return the size of a file in bytes. """ return self.openread(root).contentlength
[docs] def lastmodified(self, root=None): """ returns the timestamp for the last modification to the file """ return self.openread(root).lastmodified
[docs] def openread(self, root=None): """ Return a :class:`Resource` for reading from the URL. """ return self.forInput(root).openread()
[docs] def openwrite(self, root=None): """ Return a :class:`Resource` for writing to the URL. """ return self.forInput(root).openwrite()
class _Attrs_Meta(type(Node)): def __new__(cls, name, bases, dict): self = super(_Attrs_Meta, cls).__new__(cls, name, bases, dict) self._byxmlname = weakref.WeakValueDictionary() # map XML name to attribute class self._bypyname = weakref.WeakValueDictionary() # map Python name to attribute class self._defaultattrs = weakref.WeakValueDictionary() # map XML name to attribute class with default value # go through the attributes and register them in the cache for key in dir(self): value = getattr(self, key) if isinstance(value, _Attr_Meta): self.add(value) return self def __repr__(self): l = len(self._bypyname) if l == 0: attrcount = "no attrs" elif l == 1: attrcount = "1 attr" else: attrcount = f"{l:,} attrs" return f"<attributes class {self.__module__}:{self.__qualname__} ({attrcount}) at {id(self):#x}>" def _attrinfo(self, name): if isinstance(name, str): if name.startswith("{"): name = name[1:].partition("}") if name[1] is not None: return (name[0], name[2], self._byxmlname.get((name[0], name[2]), Attr)) try: attrclass = self._byxmlname[(None, name)] except KeyError: return (None, name, Attr) else: return (attrclass.xmlns or self.xmlns, attrclass.xmlname, attrclass) elif isinstance(name, tuple): xmlns = nsname(name[0]) return (xmlns, name[1], self._byxmlname.get((xmlns, name[1]), Attr)) elif isinstance(name, _Attr_Meta): return (name.xmlns, name.xmlname, name) elif isinstance(name, Attr): return (name.xmlns, name.xmlname, name.__class__) else: raise TypeError(f"can't handle attribute name {name!r}") def __contains__(self, key): (attrxmlns, attrname, attrclass) = self._attrinfo(key) return (attrxmlns, attrname) in self._byxmlname
[docs] class Attrs(Node, dict, metaclass=_Attrs_Meta): """ An attribute map. Predefined attributes can be declared through nested subclasses of :class:`Attr`. """ def __init__(self, *args, **kwargs): dict.__init__(self) # set default attribute values for value in self._defaultattrs.values(): self[value] = value.default.clone() # update attributes, this might overwrite (or delete) default attributes self.update(*args, **kwargs) def __repr__(self): l = len(self) if l == 0: attrcount = "no attrs" elif l == 1: attrcount = "1 attr" else: attrcount = f"{l:,} attrs" if self.startloc is not None: loc = f" location={str(self.startloc)!r}" else: loc = "" return f"<attributes {self.__class__.__module__}.{self.__class__.__qualname__} ({attrcount}){loc} at {id(self):#x}>" def _repr_pretty_content_(self, p): for attr in self.values(): p.breakable() p.pretty(attr) def _repr_pretty_(self, p, cycle): with p.group(4, f"<attributes {self.__class__.__module__}.{self.__class__.__qualname__}", ">"): if self.startloc is not None: p.breakable() p.text(f"location={str(self.startloc)!r}") if cycle: p.breakable() p.text("...") else: self._repr_pretty_content_(p) p.breakable() p.text(f"at {id(self):#x}") def __str__(self): return "" def _str(self): return "attrs" def __eq__(self, other): if isinstance(other, Attrs): if len(self) != len(other): return False for (key, value) in self.items(): if other[key] != value: return False return True return NotImplemented @classmethod def add(cls, value): cls._byxmlname[(value.xmlns, value.xmlname)] = value cls._bypyname[(value.xmlns, value.__name__)] = value if value.default: cls._defaultattrs[(value.xmlns, value.xmlname)] = value def _create(self): node = self.__class__() # "virtual" constructor node.clear() return node def clone(self): node = self._create() for (key, value) in dict.items(self): dict.__setitem__(node, key, value.clone()) return self._decoratenode(node) def __copy__(self): node = self._create() for (key, value) in dict.items(self): dict.__setitem__(node, key, value) return self._decoratenode(node) def __deepcopy__(self, memo=None): node = self._create() if memo is None: memo = {} memo[id(self)] = node for (key, value) in dict.items(self): dict.__setitem__(node, key, copy.deepcopy(value, memo)) return self._decoratenode(node)
[docs] def __getitem__(self, name): """ Return the attribute with the name ``name``. ``name`` can be one of the following types: A string ``name`` will be treated as the XML name of a local attribute. A two-item tuple The first item is treated as the XML attribute name and the second item as the namespace name. If the namespace name is :const:`None` this refers to a local attributes, otherwise to a global attribute. An :class:`Attr` subclass """ if isinstance(name, list) and not isinstance(name, Node): node = self for subname in name: node = node[subname] return node (attrxmlns, attrname, attrclass) = self._attrinfo(name) try: return dict.__getitem__(self, (attrxmlns, attrname)) except KeyError: # if the attribute is not there generate a new empty one attrvalue = self._makeattr(attrxmlns, attrname, attrclass) dict.__setitem__(self, (attrxmlns, attrname), attrvalue) return attrvalue
[docs] def __setitem__(self, name, value): """ Set the attribute with the XML ``name`` to the value ``value``. ``name`` may be a string or an attribute class or instance. The newly set attribute object will be returned. """ if isinstance(name, list) and not isinstance(name, Node): if not name: raise ValueError("can't replace self") node = self for subname in name[:-1]: node = node[subname] node[name[-1]] = value (attrxmlns, attrname, attrclass) = self._attrinfo(name) attrvalue = self._makeattr(attrxmlns, attrname, attrclass, value) dict.__setitem__(self, (attrxmlns, attrname), attrvalue)
[docs] def __delitem__(self, name): """ """ if isinstance(name, list) and not isinstance(name, Node): if not name: raise ValueError("can't delete self") node = self for subname in name[:-1]: node = node[subname] del node[name[-1]] (attrxmlns, attrname, attrclass) = self._attrinfo(name) dict.__delitem__(self, (attrxmlns, attrname))
def __contains__(self, name): (attrxmlns, attrname, attrclass) = self._attrinfo(name) return dict.__contains__(self, (attrxmlns, attrname)) and bool(dict.__getitem__(self, (attrxmlns, attrname))) def convert(self, converter): node = self._create() for value in self.values(): newvalue = value.convert(converter) assert isinstance(newvalue, Node), f"the convert method returned the illegal object {newvalue!r} (type {type(newvalue)!r}) when converting the attribute {value.__class__.__qualname__} with the value {value!r}" node[value] = newvalue return node def compacted(self): node = self._create() for value in self.values(): newvalue = value.compacted() assert isinstance(newvalue, Node), f"the compacted method returned the illegal object {newvalue!r} (type {type(newvalue)!r}) when compacting the attribute {value.__class__.__qualname__} with the value {value!r}" node[value] = newvalue return node def normalized(self): node = self._create() for value in self.values(): newvalue = value.normalized() assert isinstance(newvalue, Node), f"the normalized method returned the illegal object {newvalue!r} (type {type(newvalue)!r}) when normalizing the attribute {value.__class__.__qualname__} with the value {value!r}" node[value] = newvalue return node def present(self, presenter): return presenter.presentAttrs(self) # return a generator-iterator def validate(self, recursive=True, path=None): if path is None: path = [] # collect required attributes attrs = {value for value in self.declaredattrs() if value.required} path.append(None) # Check each existing attribute and remove it from the list of required ones for value in self.values(): path[-1] = value yield from self.validateattr(path) yield from value.validate(recursive, path) try: attrs.remove(value.__class__) except KeyError: pass path.pop() # are there any required attributes remaining that haven't been specified? => issue warnings about it for attr in attrs: yield RequiredAttrMissingWarning(self.__class__, attr) def validateattr(self, path): node = path[-1] if node.xmlns is None and not self.isdeclared(node): yield UndeclaredAttrWarning(self.__class__, node) def publish(self, publisher): for value in self.values(): yield from value.publish(publisher) @classmethod def isdeclared(cls, name): (attrxmlns, attrname, attrclass) = cls._attrinfo(name) return (attrxmlns, attrname) in cls._byxmlname def __getattribute__(self, name): xmlns = super().__getattribute__("xmlns") _bypyname = super().__getattribute__("_bypyname") if (xmlns, name) in _bypyname: return self[_bypyname[(xmlns, name)]] else: return super().__getattribute__(name) def __setattr__(self, name, value): if (self.xmlns, name) in self._bypyname: self[self._pyname2xmlname(name)] = value else: super().__setattr__(name, value) def __delattr__(self, name): if (self.xmlns, name) in self._bypyname: del self[self._bypyname[(self.xmlns, name)]] else: super().__delattr__(name)
[docs] def get(self, name, default=None): """ Works like the dictionary method :meth:`~dict.get`, it returns the attribute with the XML name ``name``, or ``default`` if ``self`` has no such attribute. ``name`` may also be an attribute class (either from ``self.Attrs`` or a global attribute). """ attrvalue = self[name] if not attrvalue: (attrxmlns, attrname, attrclass) = self._attrinfo(name) attrvalue = self._makeattr(attrxmlns, attrname, attrclass, default) # pack the attribute into an attribute object return attrvalue
[docs] def setdefault(self, name, default): """ Works like the dictionary method :meth:`~dict.setdefault`, it returns the attribute with the Python name ``name``. If ``self`` has no such attribute, it will be set to ``default`` and ``default`` will be returned as the new attribute value. """ attrvalue = self[name] if not attrvalue: (attrname, attrclass) = self._attrinfo(name) attrvalue = self._makeattr(attrname, attrclass, default) # pack the attribute into an attribute object dict.__setitem__(self, attrname, attrvalue) return attrvalue
[docs] def update(self, *args, **kwargs): """ Copies attributes over from all mappings in ``args`` and from ``kwargs``. Keywords are treated as the Python names of attributes. """ for mapping in args: if mapping is not None: if isinstance(mapping, Attrs): # This makes sure that global attributes are copied properly for (key, value) in dict.items(mapping): # Iterate through all attributes, even the empty ones. dict.__setitem__(self, key, value) else: for (attrname, attrvalue) in mapping.items(): self[attrname] = attrvalue for (attrname, attrvalue) in kwargs.items(): self[self._pyname2xmlname(attrname)] = attrvalue
[docs] @classmethod def declaredattrs(cls): """ Return an iterator over all declared attribute classes. """ return cls._bypyname.values()
@classmethod def _attrinfo(cls, name): return cls.__class__._attrinfo(cls, name) @classmethod def _makeattr(cls, attrxmlns, attrname, attrclass, value=None): attrvalue = attrclass(value) if attrclass is Attr: attrvalue.xmlns = attrxmlns attrvalue.xmlname = attrname return attrvalue @classmethod def _pyname2xmlname(cls, name): # using ``cls.xmlns`` makes sure, that ``element(xml.Attrs(lang='de'))`` really creates a global attribute # (because ``xml.Attrs`` and ``xml.Attrs.lang`` have ``xmlns`` set appropriately) if (cls.xmlns, name) in cls._bypyname: return cls._bypyname[(cls.xmlns, name)] return name def __len__(self): return misc.count(self.values()) def keys(self): for (key, value) in dict.items(self): if value: yield key __iter__ = keys def values(self): for value in dict.values(self): if value: yield value def items(self): for (key, value) in dict.items(self): if value: yield (key, value)
[docs] def filtered(self, function): """ Return a filtered version of ``self``. """ node = self._create() for (name, value) in self.items(): if function(value): node[name] = value return node
def _fixnames(self, names): # Helper for :meth:`withnames` and :meth:`withoutnames` newnames = [] for name in names: (attrxmlns, attrname, attrclass) = self._attrinfo(name) newnames.append((attrxmlns, attrname)) return tuple(newnames)
[docs] def withnames(self, *names): """ Return a copy of ``self`` where only the attributes with XML names in ``names`` are kept, all others are removed. """ def isok(node): if node.xmlns is None: return (node.xmlns, node.xmlname) in names or node.xmlname in names else: return (node.xmlns, node.xmlname) in names names = self._fixnames(names) return self.filtered(isok)
[docs] def withoutnames(self, *names): """ Return a copy of ``self`` where all the attributes with XML names in ``names`` are removed. """ def isok(node): if node.xmlns is None: return (node.xmlns, node.xmlname) not in names and node.xmlname not in names else: return (node.xmlns, node.xmlname) not in names names = self._fixnames(names) return self.filtered(isok)
def _walk(self, cursor): cursor.path.append(None) cursor.index.append(None) for child in self.values(): cursor.path[-1] = cursor.node = child cursor.index[-1] = child.xmlname if child.xmlns is None else (child.xmlname, child.xmlns) yield from child._walk(cursor) cursor.path.pop() cursor.index.pop() cursor.node = cursor.path[-1]
class _Element_Meta(type(Node)): def __new__(cls, name, bases, dict): if "model" in dict and isinstance(dict["model"], bool): from ll.xist import sims dict["model"] = sims.Any() if dict["model"] else sims.Empty() self = super(_Element_Meta, cls).__new__(cls, name, bases, dict) if dict.get("register") is not None: threadlocalpool.pool.register(self) return self def __repr__(self): if self.xmlname != self.__name__: xmlname = f" xmlname={self.xmlname!r}" else: xmlname = "" if self.xmlns is not None: xmlns = f" xmlns={self.xmlns!r}" else: xmlns = "" return f"<element class {self.__module__}:{self.__qualname__}{xmlname}{xmlns} at {id(self):#x}>"
[docs] class Element(Node, metaclass=_Element_Meta): """ This class represents XML/XIST elements. All elements implemented by the user must be derived from this class. Elements support the following class variables: :attr:`model` : object with :meth:`validate` method This is an object that is used for validating the content of the element. See the module :mod:`ll.xist.sims` for more info. If :attr:`model` is :const:`None` validation will be skipped, otherwise it will be performed when parsing or publishing. :attr:`Attrs` : :class:`Element.Attrs` subclass This is a class derived from :class:`Element.Attrs` and must define all attributes as classes nested inside this :class:`Attrs` class. :attr:`xmlns` : string This is the name of the namespace this element belong to. :attr:`register` : bool If :attr:`register` is false the element will never be registered in a :class:`Pool`. The default is :const:`True`. :attr:`xmlname` : string If the class name has to be different from the XML name (e.g. because the XML name is not a valid Python identifier) :attr:`xmlname` can be used to specify the real XML name. Otherwise the XML name will be the Python name. """ model = None register = None Attrs = Attrs
[docs] def __init__(self, *content, **attrs): """ Create a new :class:`Element` instance. Positional arguments are treated as content nodes. Keyword arguments and dictionaries are treated as attributes. """ contentargs = [] attrargs = [] for child in content: if isinstance(child, dict): attrargs.append(child) else: contentargs.append(child) self.content = Frag(*contentargs) self.attrs = self.Attrs(*attrargs, **attrs)
def __repr__(self): if self.xmlns is not None: xmlns = f" xmlns={self.xmlns!r}" else: xmlns = "" if self.xmlname != self.__class__.__name__: xmlname = f" xmlname={self.xmlname!r}" else: xmlname = "" lc = len(self.content) if lc == 0: childcount = "no children" elif lc == 1: childcount = "1 child" else: childcount = f"{lc:,} children" la = len(self.attrs) if la == 0: attrcount = "no attrs" elif la == 1: attrcount = "1 attr" else: attrcount = f"{la:,} attrs" if self.startloc is not None: loc = f" location={str(self.startloc)!r}" else: loc = "" return f"<element {self.__class__.__module__}.{self.__class__.__qualname__}{xmlns}{xmlname} ({childcount}/{attrcount}){loc} at {id(self):#x}>" def _repr_pretty_(self, p, cycle): with p.group(4, f"<element {self.__class__.__module__}.{self.__class__.__qualname__}", ">"): if self.xmlns is not None: p.breakable() p.text(f"xmlns={self.xmlns!r}") if self.xmlname != self.__class__.__name__: p.breakable() p.text(f"xmlname={self.xmlname!r}") if self.startloc is not None: p.breakable() p.text(f"location={str(self.startloc)!r}") if cycle: p.breakable() p.text("...") else: self.attrs._repr_pretty_content_(p) for child in self.content: p.breakable() p.pretty(child) p.breakable() p.text(f"at {id(self):#x}") def __str__(self): return str(self.content) def _str(self): return f"element {{{self.xmlns}}}{self.xmlname}" def __getstate__(self): attrs = {key : (value.__class__.__module__, value.__class__.__qualname__, Frag(value)) for (key, value) in dict.items(self.attrs)} return (self.content, attrs) def __setstate__(self, data): import importlib (content, attrs) = data self.content = content self.attrs = self.Attrs() for (key, value) in attrs.items(): obj = importlib.import_module(value[0]) for name in value[1].split("."): obj = getattr(obj, name) value = obj(value[2]) dict.__setitem__(self.attrs, key, value)
[docs] def __enter__(self): """ :class:`Element` nodes can be used in :keyword:`with` blocks to build XIST trees. Inside a :keyword:`with` block ``+`` and :func:`add` can be used to append node to the currently active element in the :keyword:`with` block:: with xsc.build(): with html.ul() as node: +html.li("I hear and I forget.") +html.li("I see and I believe.") +html.li("I do and I understand.") xsc.add(class_="quote") print(node.bytes()) """ threadlocalnodehandler.handler.enter(self) return self
def __exit__(self, type, value, traceback): threadlocalnodehandler.handler.exit()
[docs] def __call__(self, *content, **attrs): """ Calling an element add items in ``content`` to the element content and set attributes from ``attrs``. The element itself will be returned. """ for child in content: if isinstance(child, dict): self.attrs.update(child) else: self.content.append(child) self.attrs.update({self.attrs._pyname2xmlname(key): value for (key, value) in attrs.items()}) return self
def __eq__(self, other): if isinstance(other, Element): return self.xmlname == other.xmlname and self.xmlns == other.xmlns and self.content == other.content and self.attrs == other.attrs return NotImplemented def validate(self, recursive=True, path=None): if path is None: path = [self] if self.__class__ is Element: yield UndeclaredNodeWarning(self) if self.model is not None: yield from self.model.validate(path) yield from self.attrs.validate(recursive, path) if recursive: yield from self.content.validate(recursive, path)
[docs] def append(self, *items): """ Append every item in ``items`` to the elements content. """ self.content.append(*items)
[docs] def extend(self, items): """ Append all items in ``items`` to the elements content. """ self.content.extend(items)
[docs] def insert(self, index, *items): """ Insert every item in ``items`` at the position ``index``. """ self.content.insert(index, *items)
def _create(self): node = self.__class__() # "virtual" constructor if self.__class__ is Element: node.xmlname = self.xmlname node.xmlns = self.xmlns return node def convert(self, converter): node = self._create() node.content = self.content.convert(converter) node.attrs = self.attrs.convert(converter) return self._decoratenode(node) def clone(self): node = self._create() node.content = self.content.clone() # this is faster than passing it in the constructor (no :func:`tonode` call) node.attrs = self.attrs.clone() return self._decoratenode(node) def __copy__(self): node = self._create() node.content = copy.copy(self.content) node.attrs = copy.copy(self.attrs) return self._decoratenode(node) def __deepcopy__(self, memo=None): node = self._create() if memo is None: memo = {} memo[id(self)] = node node.content = copy.deepcopy(self.content, memo) node.attrs = copy.deepcopy(self.attrs, memo) return self._decoratenode(node) def _addimagesizeattributes(self, url, widthattr=None, heightattr=None): """ Automatically set image width and height attributes. The size of the image with the URL ``url`` will be determined and the width of the image will be put into the attribute with the name ``widthattr`` if ``widthattr`` is not :const:`None` and the attribute is not set already. The same will happen for the height, which will be put into the attribute named ``heighattr``. """ try: size = url.imagesize() except IOError as exc: warnings.warn(FileNotFoundWarning("can't read image", url, exc)) else: for attr in (heightattr, widthattr): if attr is not None: # do something to the width/height if attr not in self.attrs: self[attr] = size[attr == heightattr] def present(self, presenter): return presenter.presentElement(self) # return a generator-iterator def _publishname(self, publisher): if self.xmlns is not None: prefix = publisher._ns2prefix.get(self.xmlns) if prefix is not None: return f"{prefix}:{self.xmlname}" return self.xmlname def _publishstarttag(self, publisher): name = self._publishname(publisher) yield publisher.encode("<") yield publisher.encode(name) # we're the first element to be published, so we have to create the xmlns attributes if publisher._publishxmlns: for (xmlns, prefix) in sorted(publisher._ns2prefix.items(), key=lambda item: item[1] or ""): if xmlns not in publisher.hidexmlns: yield publisher.encode(" xmlns") if prefix is not None: yield publisher.encode(":") yield publisher.encode(prefix) yield publisher.encode('="') yield publisher.encode(xmlns) yield publisher.encode('"') # reset the note, so the next element won't create the attributes again publisher._publishxmlns = False yield from self.attrs.publish(publisher) if len(self): yield publisher.encode(">") else: if publisher.xhtml in (0, 1): if self.model is not None and self.model.empty: if publisher.xhtml == 1: yield publisher.encode(" /") yield publisher.encode(">") else: yield publisher.encode("></") yield publisher.encode(name) yield publisher.encode(">") elif publisher.xhtml == 2: yield publisher.encode("/>") def _publishendtag(self, publisher): name = self._publishname(publisher) if len(self): yield publisher.encode("</") yield publisher.encode(name) yield publisher.encode(">") def _publishfull(self, publisher): """ Does the full publication of the element. If you need full elements inside attributes (e.g. for JSP tag libraries), you can overwrite :meth:`publish` and simply call this method. """ yield from self._publishstarttag(publisher) yield from self.content.publish(publisher) yield from self._publishendtag(publisher) def publish(self, publisher): if publisher.inattr: # publish the content only when we are inside an attribute. This works much like using the plain string value, # but even works with processing instructions, or what the abbreviation entities return return self.content.publish(publisher) # return a generator-iterator else: return self._publishfull(publisher) # return a generator-iterator
[docs] def __getitem__(self, index): """ If ``index`` is a string, return the attribute with this (Python) name. If ``index`` is an attribute class, return the attribute that is an instance of this class. If ``index`` is a number or slice return the appropriate content node. ``index`` may also be a list, in with case :meth:`!__getitem__` will be applied recusively. :meth:`!__getitem__` also supports walk filters. """ if isinstance(index, (str, _Attr_Meta)): return self.attrs[index] elif isinstance(index, int): return self.content[index] elif isinstance(index, list): if index: return self.content[index] else: return self elif isinstance(index, slice): result = self._create() result.content = self.content[index] result.attrs = self.attrs return result else: from ll.xist import xfind def iterate(selector): path = [self, None] for child in self: path[-1] = child if path in selector: yield child return misc.Iterator(iterate(xfind.selector(index)))
[docs] def __setitem__(self, index, value): """ Set an attribute or content node to the value ``value``. For possible types for ``index`` see :meth:`__getitem__`. """ if isinstance(index, (str, _Attr_Meta)): self.attrs[index] = value elif isinstance(index, (list, int, slice)): self.content[index] = value else: from ll.xist import xfind selector = xfind.selector(index) value = Frag(value) newcontent = [] path = [self, None] for child in self: path[-1] = child if path in selector: newcontent.extend(value) else: newcontent.append(child) self.content[:] = newcontent
[docs] def __delitem__(self, index): """ Remove an attribute or content node. For possible types for ``index`` see :meth:`__getitem__`. """ if isinstance(index, (str, _Attr_Meta)): del self.attrs[index] elif isinstance(index, (list, int, slice)): del self.content[index] else: from ll.xist import xfind selector = xfind.selector(index) self.content = Frag(child for child in self if [self, child] not in selector)
def __iadd__(self, other): self.extend(other) return self
[docs] def __len__(self): """ Return the number of children. """ return len(self.content)
def __iter__(self): return iter(self.content) def compacted(self): node = self._create() node.content = self.content.compacted() node.attrs = self.attrs.compacted() return self._decoratenode(node)
[docs] def withsep(self, separator, clone=False): """ Return a version of ``self`` with a separator node between the child nodes of ``self``. For more info see :meth:`Frag.withsep`. """ node = self._create() node.attrs = self.attrs.clone() node.content = self.content.withsep(separator, clone) return node
[docs] def reversed(self): """ Return a reversed version of ``self``. """ node = self._create() node.attrs = self.attrs.clone() node.content = self.content.reversed() return node
[docs] def filtered(self, function): """ Return a filtered version of the ``self``. """ node = self._create() node.attrs = self.attrs.clone() node.content = self.content.filtered(function) return node
[docs] def shuffled(self): """ Return a shuffled version of the ``self``. """ node = self._create() node.attrs = self.attrs.clone() node.content = self.content.shuffled() return node
def mapped(self, function, converter=None, **converterargs): if converter is None: converter = Converter(**converterargs) node = function(self, converter) assert isinstance(node, Node), f"the mapped method returned the illegal object {node!r} (type {type(node)!r}) when mapping {self!r}" if node is self: node = self._create() node.content = Frag(self.content.mapped(function, converter)) node.attrs = self.attrs.clone() return node def normalized(self): node = self._create() node.attrs = self.attrs.normalized() node.content = self.content.normalized() return node def pretty(self, level=0, indent="\t"): orglevel = level # Remember the original indent level, so that any misconfiguration inside the element doesn't mess with the indentation node = self._create() node.attrs.update(self.attrs) if len(self): # search for text content for child in self: if isinstance(child, Text): # leave content alone node.append(self.content.clone()) break else: level += 1 for child in self: level += child.prettyindentbefore node.append("\n", child.pretty(level, indent)) level += child.prettyindentafter node.append("\n", indent*orglevel) if orglevel > 0: node = Frag(indent*orglevel, node) return node def _walk(self, cursor): enterelementnode = cursor.enterelementnode if enterelementnode: cursor.event = "enterelementnode" yield cursor # The user may have altered ``cursor`` attributes outside the generator, so we refetch them entercontent = cursor.entercontent enterattrs = cursor.enterattrs leaveelementnode = cursor.leaveelementnode cursor.restore() else: # These are the initial options entercontent = cursor.entercontent enterattrs = cursor.enterattrs leaveelementnode = cursor.leaveelementnode if enterattrs: yield from self.attrs._walk(cursor) if entercontent: yield from self.content._walk(cursor) if leaveelementnode: cursor.event = "leaveelementnode" yield cursor cursor.restore()
[docs] class AttrElement(Element): """ Special subclass of :class:`Element`. When an :class:`AttrElement` node is the only node in an attribute, it takes over publishing of the attribute (via the methods :meth:`publishattr` and :meth:`publishboolattr`). In all other cases publishing is done in the normal way (and must be overwritten with the :meth:`publish` method). """ register = None
[docs] @misc.notimplemented def publish(self, publisher): """ Publish ``self`` to the publisher ``publisher`` (outside of any attribute) """
[docs] @misc.notimplemented def publishattr(self, publisher, attr): """ Publish the attribute ``attr`` to the publisher ``publisher``. """
[docs] @misc.notimplemented def publishboolattr(self, publisher, attr): """ Publish the boolean attribute ``attr`` to the publisher """
class _Entity_Meta(type(Node)): def __new__(cls, name, bases, dict): self = super(_Entity_Meta, cls).__new__(cls, name, bases, dict) if dict.get("register") is not None: threadlocalpool.pool.register(self) return self def __repr__(self): if self.xmlname != self.__name__: xmlname = f" xmlname={self.xmlname!r}" else: xmlname = "" return f"<entity class {self.__module__}:{self.__qualname__}{xmlname} at {id(self):#x}>"
[docs] class Entity(Node, metaclass=_Entity_Meta): """ Class for entities. Derive your own entities from it and overwrite :meth:`convert`. """ register = None def __repr__(self): if self.xmlname != self.__class__.__name__: xmlname = f" xmlname={self.xmlname!r}" else: xmlname = "" if self.startloc is not None: loc = f" location={str(self.startloc)!r}" else: loc = "" return f"<entity {self.__class__.__module__}.{self.__class__.__qualname__}{xmlname}{loc} at {id(self):#x}>" def _repr_pretty_(self, p, cycle): with p.group(4, f"<entity {self.__class__.__module__}.{self.__class__.__qualname__}", ">"): if self.xmlname != self.__class__.__name__: p.breakable() p.text(f"xmlname={self.xmlname!r}") if self.startloc is not None: p.breakable() p.text(f"location={str(self.startloc)!r}") p.breakable() p.text(f"at {id(self):#x}") def _str(self): return f"entity {self.xmlname}" def __eq__(self, other): if isinstance(other, Entity): return self.xmlname == other.xmlname return NotImplemented def validate(self, recursive=True, path=None): if self.__class__ is Entity: yield UndeclaredNodeWarning(self) def convert(self, converter): return self def compacted(self): return self def present(self, presenter): return presenter.presentEntity(self) # return a generator-iterator def publish(self, publisher): yield publisher.encode("&") yield publisher.encode(self.xmlname) yield publisher.encode(";") def _walk(self, cursor): cursor.event = "entitynode" yield cursor cursor.restore()
class _CharRef_Meta(type(Entity)): # don't subclass type(Text), as this is redundant def __repr__(self): if self.xmlname != self.__name__: xmlname = f" xmlname={self.xmlname!r}" else: xmlname = "" return f"<charref class {self.__module__}:{self.__qualname__}{xmlname} at {id(self):#x}>"
[docs] class CharRef(Text, Entity, metaclass=_CharRef_Meta): """ A simple named character reference, the code point is in the class attribute :attr:`codepoint`. """ register = None def __init__(self): Text.__init__(self, chr(self.codepoint)) Entity.__init__(self) def __repr__(self): if self.xmlname != self.__class__.__name__: xmlname = f" xmlname={self.xmlname!r}" else: xmlname = "" if self.startloc is not None: loc = f" location={str(self.startloc)!r}" else: loc = "" return f"<charref {self.__class__.__module__}.{self.__class__.__qualname__}{xmlname} content={self.content!r}{loc} at {id(self):#x}>" def _repr_pretty_(self, p, cycle): with p.group(4, f"<charref {self.__class__.__module__}.{self.__class__.__qualname__}", ">"): if self.xmlname != self.__class__.__name__: p.breakable() p.text(f"xmlname={self.xmlname!r}") if self.startloc is not None: p.breakable() p.text(f"location={str(self.startloc)!r}") p.breakable() p.text(f"codepoint={self.codepoint:#x}") p.breakable() p.text(f"at {id(self):#x}") def __getnewargs__(self): return () def present(self, presenter): return presenter.presentEntity(self) # return a generator-iterator # The rest is the same as for Text, but does not return CharRefs, but Texts def __getitem__(self, index): return Text(self.content.__getitem__(index)) def __add__(self, other): return Text(self.content + other) def __radd__(self, other): return Text(str(other) + self.content) def __mul__(self, n): return Text(n * self.content) def __rmul__(self, n): return Text(n * self.content) def capitalize(self): return Text(self.content.capitalize()) def center(self, width): return Text(self.content.center(width)) def ljust(self, width, fill=" "): return Text(self.content.ljust(width, fill)) def lower(self): return Text(self.content.lower()) def lstrip(self, chars=None): return Text(self.content.lstrip(chars)) def replace(self, old, new, maxsplit=-1): return Text(self.content.replace(old, new, maxsplit)) def rjust(self, width, fill=" "): return Text(self.content.rjust(width, fill)) def rstrip(self, chars=None): return Text(self.content.rstrip(chars)) def strip(self, chars=None): return Text(self.content.strip(chars)) def swapcase(self): return Text(self.content.swapcase()) def title(self): return Text(self.content.title()) def translate(self, table): return Text(self.content.translate(table)) def upper(self): return Text(self.content.upper())
### ### XML class pool ###
[docs] class Pool(misc.Pool): """ A :class:`Pool` stores a collection of XIST classes and can be passed to a parser. The parser will ask the pool which classes to use when elements, processing instructions etc. have to be instantiated. """
[docs] def __init__(self, *objects): """ Create a :class:`Pool` object. All items in ``objects`` will be registered in the pool. """ self._elementsbyname = {} self._procinstsbyname = {} self._entitiesbyname = {} self._charrefsbyname = {} self._charrefsbycodepoint = {} self._attrsbyname = {} misc.Pool.__init__(self, *objects)
[docs] def register(self, object): """ Register ``object`` in the pool. ``object`` can be: * a :class:`Element`, :class:`ProcInst` or :class:`Entity` class; * an :class:`Attr` class for a global attribute; * an :class:`Attrs` class containing global attributes; * a :class:`dict` (all values will be registered, this makes it possible to e.g. register all local variables by passing ``vars()``); * a module (all attributes in the module will be registered). """ # Note that the following is a complete reimplementation of :meth:`misc.Pool.register`, otherwise the interactions would be too complicated. if isinstance(object, type): if issubclass(object, Element): if object.register: self._elementsbyname[(object.xmlns, object.xmlname)] = object elif issubclass(object, ProcInst): if object.register: self._procinstsbyname[object.xmlname] = object elif issubclass(object, Entity): if object.register: self._entitiesbyname[object.xmlname] = object elif issubclass(object, Attr): if object.xmlns is not None and object.register: self._attrsbyname[(object.xmlns, object.xmlname)] = object elif issubclass(object, Attrs): for attr in object.declaredattrs(): self.register(attr) self._attrs[object.__name__] = object elif isinstance(object, types.ModuleType): self.register(object.__dict__) elif isinstance(object, dict): for (key, value) in object.items(): if key == "__bases__": for base in value: if not isinstance(base, Pool): base = self.__class__(base) self.bases.append(base) elif isinstance(value, type): self.register(value) elif not isinstance(value, (types.ModuleType, dict)): try: self._attrs[key] = value except TypeError: pass elif isinstance(object, Pool): self.bases.append(object)
def __enter__(self): self.prev = threadlocalpool.pool threadlocalpool.pool = self return self def __exit__(self, type, value, traceback): threadlocalpool.pool = self.prev del self.prev
[docs] def clear(self): """ Make ``self`` empty. """ self._elementsbyname.clear() self._procinstsbyname.clear() self._entitiesbyname.clear() self._attrsbyname.clear() misc.Pool.clear(self)
[docs] def clone(self): """ Return a copy of ``self``. """ copy = misc.Pool.clone(self) copy._elementsbyname = self._elementsbyname.copy() copy._procinstsbyname = self._procinstsbyname.copy() copy._entitiesbyname = self._entitiesbyname.copy() copy._attrsbyname = self._attrsbyname.copy() return copy
[docs] def elements(self): """ Return an iterator for all registered element classes. """ seen = set() for element in self._elementsbyname.values(): yield element seen.add((element.xmlns, element.xmlname)) for base in self.bases: for element in base.elements(): if (element.xmlns, element.xmlname) not in seen: yield element seen.add((element.xmlns, element.xmlname))
[docs] def elementclass(self, xmlns, name): """ Return the element class for the element with the XML name ``name`` and the namespace ``xmlns``. If the element can't be found an :class:`Element` will be returned. """ xmlns = nsname(xmlns) try: return self._elementsbyname[(xmlns, name)] except KeyError: for base in self.bases: result = base.elementclass(xmlns, name) if result is not Element: return result return Element
[docs] def element(self, xmlns, name): """ Return an element object for the element type with the XML name ``name`` and the namespace ``xmlns``. """ xmlns = nsname(xmlns) result = self.elementclass(xmlns, name)() if result.__class__ is Element: result.xmlns = xmlns result.xmlname = name return result
[docs] def haselement(self, xmlns, name): """ Is there a registered element class in ``self`` for the element type with the Python name ``name`` and the namespace ``xmlns``? """ return (nsname(xmlns), name) in self._elementsbyname or any(base.haselement(xmlns, name) for base in self.bases)
[docs] def procinsts(self): """ Return an iterator for all registered processing instruction classes. """ seen = set() for procinst in self._procinstsbyname.values(): yield procinst seen.add(procinst.xmlname) for base in self.bases: for procinst in base.procinsts(): if procinst.xmlname not in seen: yield procinst seen.add(procinst.xmlname)
[docs] def procinstclass(self, name): """ Return the processing instruction class for the PI with the target name ``name``. If the processing instruction can't be found an return :class:`ProcInst`. """ try: return self._procinstsbyname[name] except KeyError: for base in self.bases: result = base.procinstclass(name) if result is not ProcInst: return result return ProcInst
[docs] def procinst(self, name, content): """ Return a processing instruction object for the PI type with the target name ``name``. """ result = self.procinstclass(name)(content) if result.__class__ is ProcInst: result.xmlname = name return result
[docs] def hasprocinst(self, name): """ Is there a registered processing instruction class in ``self`` for the PI with the target name ``name``? """ return name in self._procinstsbyname or any(base.hasprocinst(name) for base in self.bases)
[docs] def entities(self): """ Return an iterator for all registered entity classes. """ seen = set() for entity in self._entitiesbyname.values(): yield entity seen.add(entity.xmlname) for base in self.bases: for entity in base.entities(): if entity.xmlname not in seen: yield entity seen.add(entity.xmlname)
[docs] def entityclass(self, name): """ Return the entity class for the entity with the XML name ``name``. If the entity can't be found return :class:`Entity`. """ try: return self._entitiesbyname[name] except KeyError: for base in self.bases: result = base.entityclass(name) if result is not Entity: return result return Entity
[docs] def entity(self, name): """ Return an entity object for the entity with the XML name ``name``. """ result = self.entityclass(name)() if result.__class__ is Entity: result.xmlname = name return result
[docs] def hasentity(self, name): """ Is there a registered entity class in ``self`` for the entity with the XML name ``name``? """ return name in self._entitiesbyname or any(base.hasentity(name) for base in self.bases)
[docs] def attrkey(self, xmlns, name): """ Return the key that can be used to set the attribute with the name ``name`` and the namespace ``xmlns``. If ``self`` (or one of the base pools) has any global attribute registered for that name/namespace, the attribute class will be returned. Otherwise the tuple ``(name, xmlns)`` (or ``name`` itself for a local attribute) will be returned. With this key :meth:`Attrs.__setitem__` will create the appropriate attribute class. """ if xmlns is None: return name xmlns = nsname(xmlns) try: return self._attrsbyname[(xmlns, name)] except KeyError: for base in self.bases: result = base.attrkey(name, xmlns) if isinstance(result, _Attr_Meta): return result return (xmlns, name)
[docs] def text(self, content): """ Create a text node with the content ``content``. """ return Text(content)
[docs] def comment(self, content): """ Create a comment node with the content ``content``. """ return Comment(content)
def __getattr__(self, key): try: return self._attrs[key] except KeyError: for base in self.bases: return getattr(base, key) raise AttributeError(key)
# Default pool (can be temporarily changed via ``with xsc.Pool() as pool:``) class ThreadLocalPool(threading.local): pool = Pool() threadlocalpool = ThreadLocalPool() ### ### Functions for namespace handling ###
[docs] def docpool(): """ Return a pool suitable for parsing XIST docstrings. """ from ll.xist.ns import html, chars, abbr, doc, specials return Pool(doc, specials, html, chars, abbr)
[docs] def nsname(xmlns): """ If ``xmlns`` is a module, return ``xmlns.xmlns``, else return ``xmlns`` unchanged. """ if xmlns is not None and not isinstance(xmlns, str): xmlns = xmlns.xmlns return xmlns
[docs] def nsclark(obj): """ Return a name in Clark notation. ``xmlns`` can be :const:`None`, a string or a module to return a namespace name, or a :class:`Node` instance to return a namespace name + node name combination:: >>> from ll.xist import xsc >>> from ll.xist.ns import html >>> xsc.nsclark(None) '{}' >>> xsc.nsclark(html) '{http://www.w3.org/1999/xhtml}' >>> xsc.nsclark(html.a) '{http://www.w3.org/1999/xhtml}a' >>> xsc.nsclark(html.a()) '{http://www.w3.org/1999/xhtml}a' """ if obj is None: return "{}" elif isinstance(obj, (Element, _Element_Meta)): return f"{{{obj.xmlns}}}{obj.xmlname}" elif isinstance(obj, (Attr, _Attr_Meta)): if obj.xmlns is None: return obj.xmlname else: return f"{{{obj.xmlns}}}{obj.xmlname}" elif isinstance(obj, (Node, _Node_Meta)): return obj.xmlname elif not isinstance(obj, str): return f"{{{obj.xmlns}}}" return f"{{{obj}}}"
# C0 Controls and Basic Latin
[docs] class quot(CharRef): "quotation mark = APL quote, U+0022 ISOnum"; codepoint = 34
[docs] class amp(CharRef): "ampersand, U+0026 ISOnum"; codepoint = 38
[docs] class lt(CharRef): "less-than sign, U+003C ISOnum"; codepoint = 60
[docs] class gt(CharRef): "greater-than sign, U+003E ISOnum"; codepoint = 62
[docs] class apos(CharRef): "apostrophe mark, U+0027 ISOnum"; codepoint = 39
### ### Functions for creating plain elements, entities and processing instructions ###
[docs] def element(xmlns, xmlname, *content, **attrs): """ Create a plain element object with the namespace name ``xmlns`` and the element name ``xmlname``. This object will be an instance of :class:`Element` (not an instance of a subclass). ``content`` and ``attrs`` will be used to initialize the content and attributes of the element. """ element = Element(*content, **attrs) element.xmlns = nsname(xmlns) element.xmlname = xmlname return element
[docs] def entity(xmlname): """ Create a plain entity object with the entity name ``xmlname``. This object will be an instance of :class:`Entity` (not an instance of a subclass). """ entity = Entity() entity.xmlname = xmlname return entity
[docs] def procinst(xmlname, *content): """ Create a plain processing instruction object with the target name ``xmlname``. This object will be an instance of :class:`ProcInst` (not an instance of a subclass). ``content`` will be used to initialize the content of the processing instruction. """ procinst = ProcInst(*content) procinst.xmlname = xmlname return procinst
### ### Location information ###
[docs] class Location: """ Represents a location in an XML entity. """ __slots__ = ("url", "line", "col")
[docs] def __init__(self, url=None, line=None, col=None): """ Create a new :class:`Location` object using the arguments passed in. ``url`` is the URL/filename. ``line`` is the line number and ``col`` is the column number (both starting at 0). """ self.url = url self.line = line self.col = col
[docs] def offset(self, offset): """ Return a location where the line number is incremented by offset (and the column number is reset to 0). """ if offset == 0: return self elif self.line is None: return Location(url=self.url, col=0) return Location(url=self.url, line=self.line+offset, col=0)
def __str__(self): url = str(self.url) if self.url is not None else "???" line = str(self.line) if self.line is not None else "?" col = str(self.col) if self.col is not None else "?" return f"{url}:{line}:{col}" def __repr__(self): attrs = ", ".join(f"{attr}={getattr(self, attr)!r}" for attr in ("url", "line", "col") if getattr(self, attr) is not None) return f"{self.__class__.__qualname__}({attrs})" def __eq__(self, other): if self.__class__ is other.__class__: return self.url == other.url and self.line == other.line and self.col == other.col return NotImplemented