Source code for xml4h.nodes

import six
import collections
import functools

import xml4h


ELEMENT_NODE = 1
ATTRIBUTE_NODE = 2
TEXT_NODE = 3
CDATA_NODE = 4
ENTITY_REFERENCE_NODE = 5
ENTITY_NODE = 6
PROCESSING_INSTRUCTION_NODE = 7
COMMENT_NODE = 8
DOCUMENT_NODE = 9
DOCUMENT_TYPE_NODE = 10
DOCUMENT_FRAGMENT_NODE = 11
NOTATION_NODE = 12


[docs]class Node(object): """ Base class for *xml4h* DOM nodes that represent and interact with a node in the underlying XML implementation. """ XMLNS_URI = 'http://www.w3.org/2000/xmlns/' """URI constant for XMLNS"""
[docs] def __init__(self, node, adapter): """ Construct an object that represents and wraps a DOM node in the underlying XML implementation. :param node: node object from the underlying XML implementation. :param adapter: the :class:`xml4h.impls.XmlImplAdapter` subclass implementation to mediate operations on the node in the underlying XML implementation. """ if node is None: raise xml4h.exceptions.IncorrectArgumentTypeException( node, [object]) if adapter is None: raise xml4h.exceptions.IncorrectArgumentTypeException( adapter, [object]) self._impl_node = node self._adapter = adapter
[docs] def __eq__(self, other): if self is other: return True elif not isinstance(other, Node): return False return (self.impl_document == other.impl_document and self.impl_node == other.impl_node)
[docs] def __repr__(self): return '<%s.%s>' % ( self.__class__.__module__, self.__class__.__name__)
@property def impl_node(self): """ :return: the node object from the underlying XML implementation that is represented by this *xml4h* node. """ return self._impl_node @property def impl_document(self): """ :return: the document object from the underlying XML implementation that contains the node represented by this *xml4h* node. """ return self.adapter.impl_document @property def adapter(self): """ :return: the :class:`xml4h.impls.XmlImplAdapter` subclass implementation that mediates operations on the node in the underlying XML implementation. """ return self._adapter @property def adapter_class(self): """ :return: the ``class`` of the :class:`xml4h.impls.XmlImplAdapter` subclass implementation that mediates operations on the node in the underlying XML implementation. """ return self._adapter.__class__
[docs] def has_feature(self, feature_name): """ :return: *True* if a named feature is supported by the adapter\ implementation underlying this node. """ return self.adapter.has_feature(feature_name)
@property def document(self): """ :return: the :class:`Document` node that contains this node, or ``self`` if this node is the document. """ if self.is_document: return self return self.adapter.wrap_document(self.adapter.impl_document) @property def root(self): """ :return: the root :class:`Element` node of the document that contains this node, or ``self`` if this node is the root element. """ if self.is_root: return self return self.adapter.wrap_node( self.adapter.impl_root_element, self.adapter.impl_document, self.adapter) @property def is_root(self): """:return: *True* if this node is the document's root element""" return self.impl_node == self.adapter.impl_root_element @property def node_type(self): """ :return: an int constant value that identifies the type of this node, such as :data:`ELEMENT_NODE` or :data:`TEXT_NODE`. """ return self._node_type
[docs] def is_type(self, node_type_constant): """ :return: *True* if this node's int type matches the given value. """ return self.node_type == node_type_constant
@property def is_element(self): """ :return: *True* if this is an :class:`Element` node. """ return self.is_type(ELEMENT_NODE) @property def is_attribute(self): """ :return: *True* if this is an :class:`Attribute` node. """ return self.is_type(ATTRIBUTE_NODE) @property def is_text(self): """ :return: *True* if this is a :class:`Text` node. """ return self.is_type(TEXT_NODE) @property def is_cdata(self): """ :return: *True* if this is a :class:`CDATA` node. """ return self.is_type(CDATA_NODE) @property def is_entity_reference(self): """ :return: *True* if this is an :class:`EntityReference` node. """ return self.is_type(ENTITY_REFERENCE_NODE) @property def is_entity(self): """ :return: *True* if this is an :class:`Entity` node. """ return self.is_type(ENTITY_NODE) @property def is_processing_instruction(self): """ :return: *True* if this is a :class:`ProcessingInstruction` node. """ return self.is_type(PROCESSING_INSTRUCTION_NODE) @property def is_comment(self): """ :return: *True* if this is a :class:`Comment` node. """ return self.is_type(COMMENT_NODE) @property def is_document(self): """ :return: *True* if this is a :class:`Document` node. """ return self.is_type(DOCUMENT_NODE) @property def is_document_type(self): """ :return: *True* if this is a :class:`DocumentType` node. """ return self.is_type(DOCUMENT_TYPE_NODE) @property def is_document_fragment(self): """ :return: *True* if this is a :class:`DocumentFragment` node. """ return self.is_type(DOCUMENT_FRAGMENT_NODE) @property def is_notation(self): """ :return: *True* if this is a :class:`Notation` node. """ return self.is_type(NOTATION_NODE)
[docs] def _convert_nodelist(self, impl_nodelist): """ Convert a list of underlying implementation nodes into a list of *xml4h* wrapper nodes. """ nodelist = [ self.adapter.wrap_node(n, self.adapter.impl_document, self.adapter) for n in impl_nodelist] return NodeList(nodelist)
@property def parent(self): """ :return: the parent of this node, or *None* of the node has no parent. """ parent_impl_node = self.adapter.get_node_parent(self.impl_node) return self.adapter.wrap_node( parent_impl_node, self.adapter.impl_document, self.adapter) @property def ancestors(self): """ :return: the ancestors of this node in a list ordered by proximity to this node, that is: parent, grandparent, great-grandparent etc. """ ancestors = [] p = self.parent while p: ancestors.append(p) p = p.parent return NodeList(ancestors) @property def children(self): """ :return: a :class:`NodeList` of this node's child nodes. """ impl_nodelist = self.adapter.get_node_children(self.impl_node) return self._convert_nodelist(impl_nodelist)
[docs] def child(self, local_name=None, name=None, ns_uri=None, node_type=None, filter_fn=None): """ :return: the first child node matching the given constraints, or \ *None* if there are no matching child nodes. Delegates to :meth:`NodeList.filter`. """ return self.children(name=name, local_name=local_name, ns_uri=ns_uri, node_type=node_type, filter_fn=filter_fn, first_only=True)
@property def attributes(self): return None @property def attribute_nodes(self): return None @property def siblings(self): """ :return: a list of this node's sibling nodes. :rtype: NodeList """ impl_nodelist = self.adapter.get_node_children(self.parent.impl_node) return self._convert_nodelist( [n for n in impl_nodelist if n != self.impl_node]) @property def siblings_before(self): """ :return: a list of this node's siblings that occur *before* this node in the DOM. """ impl_nodelist = self.adapter.get_node_children(self.parent.impl_node) before_nodelist = [] for n in impl_nodelist: if n == self.impl_node: break before_nodelist.append(n) return self._convert_nodelist(before_nodelist) @property def siblings_after(self): """ :return: a list of this node's siblings that occur *after* this node in the DOM. """ impl_nodelist = self.adapter.get_node_children(self.parent.impl_node) after_nodelist = [] is_after_myself = False for n in impl_nodelist: if is_after_myself: after_nodelist.append(n) elif n == self.impl_node: is_after_myself = True return self._convert_nodelist(after_nodelist) @property def namespace_uri(self): """ :return: this node's namespace URI or *None*. """ return self.adapter.get_node_namespace_uri(self.impl_node) ns_uri = namespace_uri # Alias """Alias for :meth:`namespace_uri`"""
[docs] def delete(self, destroy=True): """ Delete this node from the owning document. :param bool destroy: if True the child node will be destroyed in addition to being removed from the document. :returns: the removed child node, or *None* if the child was destroyed. """ removed_child = self.adapter.remove_node_child( self.adapter.get_node_parent(self.impl_node), self.impl_node, destroy_node=destroy) if removed_child is not None: return self.adapter.wrap_node(removed_child, None, self.adapter) else: return None
[docs] def clone_node(self, node): """ Clone a node from another document to become a child of this node, by copying the node's data into this document but leaving the node untouched in the source document. The node to be cloned can be a :class:`Node` based on the same underlying XML library implementation and adapter, or a "raw" node from that implementation. :param node: the node in another document to clone. :type node: xml4h or implementation node """ if isinstance(node, xml4h.nodes.Node): child_impl_node = node.impl_node else: child_impl_node = node # Assume it's a valid impl node self.adapter.import_node(self.impl_node, child_impl_node, clone=True)
[docs] def transplant_node(self, node): """ Transplant a node from another document to become a child of this node, removing it from the source document. The node to be transplanted can be a :class:`Node` based on the same underlying XML library implementation and adapter, or a "raw" node from that implementation. :param node: the node in another document to transplant. :type node: xml4h or implementation node """ if isinstance(node, xml4h.nodes.Node): child_impl_node = node.impl_node original_parent_impl_node = node.parent.impl_node else: child_impl_node = node # Assume it's a valid impl node original_parent_impl_node = self.adapter.get_node_parent(node) self.adapter.import_node(self.impl_node, child_impl_node, original_parent_impl_node, clone=False)
[docs] def find(self, name=None, ns_uri=None, first_only=False): """ Find :class:`Element` node descendants of this node, with optional constraints to limit the results. :param name: limit results to elements with this name. If *None* or ``'*'`` all element names are matched. :type name: string or None :param ns_uri: limit results to elements within this namespace URI. If *None* all elements are matched, regardless of namespace. :type ns_uri: string or None :param bool first_only: if *True* only return the first result node or *None* if there is no matching node. :returns: a list of :class:`Element` nodes matching any given constraints, or a single node if ``first_only=True``. """ if name is None: name = '*' # Match all element names if ns_uri is None: ns_uri = '*' # Match all namespaces impl_nodelist = self.adapter.find_node_elements( self.impl_node, name=name, ns_uri=ns_uri) if first_only: if impl_nodelist: return self.adapter.wrap_node( impl_nodelist[0], self.adapter.impl_document, self.adapter) else: return None return self._convert_nodelist(impl_nodelist)
[docs] def find_first(self, name=None, ns_uri=None): """ Find the first :class:`Element` node descendant of this node that matches any optional constraints, or None if there are no matching elements. Delegates to :meth:`find` with ``first_only=True``. """ return self.find(name=name, ns_uri=ns_uri, first_only=True)
[docs] def find_doc(self, name=None, ns_uri=None, first_only=False): """ Find :class:`Element` node descendants of the document containing this node, with optional constraints to limit the results. Delegates to :meth:`find` applied to this node's owning document. """ return self.document.find(name=name, ns_uri=ns_uri, first_only=first_only)
# Methods that operate on this Node implementation adapter
[docs] def write(self, writer, encoding='utf-8', indent=0, newline='', omit_declaration=False, node_depth=0, quote_char='"'): """ Serialize this node and its descendants to text, writing the output to the given *writer*. :param writer: a file or stream to which XML text is written. :type writer: a file, stream, etc :param string encoding: the character encoding for serialized text. :param indent: indentation prefix to apply to descendent nodes for pretty-printing. The value can take many forms: - *int*: the number of spaces to indent. 0 means no indent. - *string*: a literal prefix for indented nodes, such as ``\\t``. - *bool*: no indent if *False*, four spaces indent if *True*. - *None*: no indent :type indent: string, int, bool, or None :param newline: the string value used to separate lines of output. The value can take a number of forms: - *string*: the literal newline value, such as ``\\n`` or ``\\r``. An empty string means no newline. - *bool*: no newline if *False*, ``\\n`` newline if *True*. - *None*: no newline. :type newline: string, bool, or None :param boolean omit_declaration: if *True* the XML declaration header is omitted, otherwise it is included. Note that the declaration is only output when serializing an :class:`xml4h.nodes.Document` node. :param int node_depth: the indentation level to start at, such as 2 to indent output as if the given *node* has two ancestors. This parameter will only be useful if you need to output XML text fragments that can be assembled into a document. This parameter has no effect unless indentation is applied. :param string quote_char: the character that delimits quoted content. You should never need to mess with this. Delegates to :func:`xml4h.writer.write_node` applied to this node. """ xml4h.write_node(self, writer, encoding=encoding, indent=indent, newline=newline, omit_declaration=omit_declaration, node_depth=node_depth, quote_char=quote_char)
[docs] def write_doc(self, writer, *args, **kwargs): """ Serialize to text the document containing this node, writing the output to the given *writer*. :param writer: a file or stream to which XML text is written. :type writer: a file, stream, etc Delegates to :meth:`write` """ self.document.write(writer, *args, **kwargs)
[docs] def xml(self, encoding='utf-8', indent=4, **kwargs): """ :return: this node as an XML string. Delegates to :meth:`write` """ # Use string writer if `encoding` is unset, unusual but possible... if encoding is None: writer = six.StringIO() # ...otherwise and by default, use a bytes writer else: writer = six.BytesIO() self.write(writer, encoding=encoding, indent=indent, **kwargs) xml_bytes = writer.getvalue() if encoding: return xml_bytes.decode(encoding) else: return xml_bytes
[docs] def xml_doc(self, encoding='utf-8', **kwargs): """ :return: the document containing this node as an XML string. Delegates to :meth:`xml` """ return self.document.xml(encoding=encoding, **kwargs)
[docs]class NodeAttrAndChildElementLookupsMixin(object): """ Perform "magical" lookup of a node's attributes via dict-style keyword reference, and child elements via class attribute reference. """
[docs] def __getitem__(self, attr_name): """ Retrieve this node's attribute value by name using dict-style keyword lookup. :param string attr_name: name of the attribute. If the attribute has a namespace prefix that must be included, in other words the name must be a qname not local name. :raise: KeyError if the node has no such attribute. """ result = self.attributes[attr_name] if result is None: raise KeyError(attr_name) else: return result
[docs] def __getattr__(self, child_name): """ Retrieve this node's child element by tag name regardless of the elements namespace, assuming the name given doesn't match an existing attribute or method. :param string child_name: tag name of the child element to look up. To avoid name clashes with class attributes the child name may includes a trailing underscore (``_``) character, which is removed to get the real child tag name. The child name must not begin with underscore characters. :return: the type of the return value depends on how many child elements match the name: - a single :class:`Element` node if only one child element matches - a list of :class:`Element` nodes if there is more than 1 match. :raise: AttributeError if the node has no child element with the given name, or if the given name does not match the required pattern. """ if child_name.startswith('_'): # Never match names with underscore leaders, for safety pass else: # If name is munged with trailing underscore, remove it if child_name.endswith('_'): child_name = child_name[:-1] results = self.children(local_name=child_name, node_type=Element) if len(results) == 1: return results[0] elif len(results) > 1: return results raise AttributeError( "%s object has no attribute '%s'" % (self, child_name))
[docs]class XPathMixin(object): """ Provide :meth:`xpath` method to nodes that support XPath searching. """ def _maybe_wrap_node(self, node): # Don't try and wrap base types (e.g. attribute values or node text) if isinstance(node, (str, int, float)): return node else: return self.adapter.wrap_node( node, self.adapter.impl_document, self.adapter)
[docs] def xpath(self, xpath, **kwargs): """ Perform an XPath query on the current node. :param string xpath: XPath query. :param dict kwargs: Optional keyword arguments that are passed through to the underlying XML library implementation. :return: results of the query as a list of :class:`Node` objects, or a list of base type objects if the XPath query does not reference node objects. """ result = self.adapter.xpath_on_node(self.impl_node, xpath, **kwargs) if isinstance(result, (list, tuple)): return [self._maybe_wrap_node(r) for r in result] else: return self._maybe_wrap_node(result)
[docs]class Document(Node, NodeAttrAndChildElementLookupsMixin, XPathMixin): """ Node representing an entire XML document. """ _node_type = DOCUMENT_NODE
# TODO: doc_type, document_element
[docs]class DocumentType(Node): """ Node representing the type of an XML document. """ _node_type = DOCUMENT_TYPE_NODE
# TODO: name, entities, notations, public_id, system_id
[docs]class DocumentFragment(Node): """ Node representing an XML document fragment. """ _node_type = DOCUMENT_FRAGMENT_NODE
# TODO
[docs]class Notation(Node): """ Node representing a notation in an XML document. """ _node_type = NOTATION_NODE
# TODO: public_id, system_id
[docs]class Entity(Node): """ Node representing an entity in an XML document. """ _node_type = ENTITY_NODE # TODO: public_id, system_id @property def notation_name(self): return self.name
[docs]class EntityReference(Node): """ Node representing an entity reference in an XML document. """ _node_type = ENTITY_REFERENCE_NODE
# TODO
[docs]class NameValueNodeMixin(Node): """ Provide methods to access node name and value attributes, where the node name may also be composed of "prefix" and "local" components. """
[docs] def __repr__(self): return '<%s.%s: "%s">' % ( self.__class__.__module__, self.__class__.__name__, self.name)
def _tounicode(self, value): if value is None or isinstance(value, six.string_types): return value else: return six.text_type(value) @property def prefix(self): """ :return: the namespace prefix component of a node name, or None. """ return self._tounicode( self.adapter.get_node_name_prefix(self.impl_node)) @property def local_name(self): """ :return: the local component of a node name excluding any prefix. """ return self._tounicode( self.adapter.get_node_local_name(self.impl_node)) @property def name(self): """ Get the name of a node, possibly including prefix and local components. """ return self._tounicode( self.adapter.get_node_name(self.impl_node)) @property def value(self): """ Get or set the value of a node. """ return self._tounicode( self.adapter.get_node_value(self.impl_node)) @value.setter def value(self, value): self.adapter.set_node_value(self.impl_node, value)
[docs]class Text(NameValueNodeMixin): """ Node representing text content in an XML document. """ _node_type = TEXT_NODE
[docs]class CDATA(NameValueNodeMixin): """ Node representing character data in an XML document. """ _node_type = CDATA_NODE
[docs]class Comment(NameValueNodeMixin): """ Node representing a comment in an XML document. """ _node_type = COMMENT_NODE
[docs]class Attribute(NameValueNodeMixin): """ Node representing an attribute of a :class:`Document` or :class:`Element` node. """ _node_type = ATTRIBUTE_NODE
[docs]class ProcessingInstruction(NameValueNodeMixin): """ Node representing a processing instruction in an XML document. """ _node_type = PROCESSING_INSTRUCTION_NODE target = NameValueNodeMixin.name data = NameValueNodeMixin.value
[docs]class Element(NameValueNodeMixin, NodeAttrAndChildElementLookupsMixin, XPathMixin): """ Node representing an element in an XML document, with support for manipulating and adding content to the element. """ _node_type = ELEMENT_NODE @property def builder(self): """ :return: a :class:`~xml4h.builder.Builder` representing this element with convenience methods for adding XML content. """ return xml4h.Builder(self) @property def text(self): """ Get or set the text content of this element. """ return self.adapter.get_node_text(self.impl_node) @text.setter def text(self, text): self.adapter.set_node_text(self.impl_node, text) def _set_element_attributes(self, element, attr_obj=None, ns_uri=None, **attr_dict): if attr_obj is not None: if isinstance(attr_obj, dict): attr_dict.update(attr_obj) elif isinstance(attr_obj, (list, tuple)): for n, v in attr_obj: attr_dict[n] = v else: raise xml4h.exceptions.IncorrectArgumentTypeException( attr_obj, [dict, list, tuple]) # Always process 'xmlns' namespace definitions first, in case other # attributes belong to a newly-defined namespace # TODO Modern equivalent for this legacy `cmp` method re-implementation def cmp(a, b): # https://docs.python.org/3.0/whatsnew/3.0.html#ordering-comparisons return (a > b) - (a < b) def _xmlns_first(x, y): nx, ny = x[0], y[0] if nx.startswith('xmlns') and ny.startswith('xmlns'): return cmp(nx, ny) elif nx.startswith('xmlns'): return -1 elif ny.startswith('xmlns'): return 1 else: return cmp(nx, ny) # https://docs.python.org/3/library/functools.html#functools.cmp_to_key # TODO Modern equivalent for this custom sorting `cmp` function _xmlns_first = functools.cmp_to_key(_xmlns_first) attr_list = sorted(list(attr_dict.items()), key=_xmlns_first) # Add attributes for attr_name, v in attr_list: prefix, name, my_ns_uri = self.adapter.get_ns_info_from_node_name( attr_name, element) if ' ' in name: raise ValueError("Invalid attribute name value contains space") # If necessary, add an xmlns defn for new prefix-defined namespace if not prefix and '}' in attr_name: prefix = self.adapter.get_ns_prefix_for_uri( element, my_ns_uri, auto_generate_prefix=True) self.adapter.set_node_attribute_value(element, 'xmlns:%s' % prefix, my_ns_uri, ns_uri=self.XMLNS_URI) # Apply kw-specified namespace if not overridden by prefix name if my_ns_uri is None: my_ns_uri = ns_uri if ns_uri is not None: # Apply attribute namespace URI if different from owning elem if ns_uri == self.adapter.get_node_namespace_uri(element): my_ns_uri = None # Forcibly convert all data to unicode text if not isinstance(v, six.string_types): v = six.text_type(v) if prefix: qname = '%s:%s' % (prefix, name) else: qname = name self.adapter.set_node_attribute_value( element, qname, v, ns_uri=my_ns_uri)
[docs] def set_attributes(self, attr_obj=None, ns_uri=None, **attr_dict): """ Add or update this element's attributes, where attributes can be specified in a number of ways. :param attr_obj: a dictionary or list of attribute name/value pairs. :type attr_obj: dict, list, tuple, or None :param ns_uri: a URI defining a namespace for the new attributes. :type ns_uri: string or None :param dict attr_dict: attribute name and values specified as keyword arguments. """ self._set_element_attributes(self.impl_node, attr_obj=attr_obj, ns_uri=ns_uri, **attr_dict)
@property def attributes(self): """ Get or set this element's attributes as name/value pairs. .. note:: Setting element attributes via this accessor will **remove** any existing attributes, as opposed to the :meth:`set_attributes` method which only updates and replaces them. """ attr_impl_nodes = self.adapter.get_node_attributes(self.impl_node) return AttributeDict(attr_impl_nodes, self.impl_node, self.adapter) @attributes.setter def attributes(self, attr_obj): # Remove existing attributes, leaving namespace definitions until last # to avoid clobbering the namespace of other attributes for attr_name in [a for a in self.attributes if 'xmlns' not in a]: self.adapter.remove_node_attribute(self.impl_node, attr_name) for attr_name in self.attributes: self.adapter.remove_node_attribute(self.impl_node, attr_name) # Add new attributes self._set_element_attributes(self.impl_node, attr_obj=attr_obj) attrib = attributes # Alias """ Alias of :meth:`attributes` """ attrs = attributes # Alias """ Alias of :meth:`attributes` """ @property def attribute_nodes(self): """ :return: a list of this element's attributes as :class:`Attribute` nodes. """ impl_attr_nodes = self.adapter.get_node_attributes(self.impl_node) wrapped_attr_nodes = [ self.adapter.wrap_node(a, self.adapter.impl_document, self.adapter) for a in impl_attr_nodes] return sorted(wrapped_attr_nodes, key=lambda x: x.name)
[docs] def attribute_node(self, name, ns_uri=None): """ :param string name: the name of the attribute to return. :param ns_uri: a URI defining a namespace constraint on the attribute. :type ns_uri: string or None :return: this element's attributes that match ``ns_uri`` as :class:`Attribute` nodes. """ attr_impl_node = self.adapter.get_node_attribute_node( self.impl_node, name, ns_uri) return self.adapter.wrap_node( attr_impl_node, self.adapter.impl_document, self.adapter)
def _add_ns_prefix_attr(self, element, prefix, ns_uri): if prefix is None: ns_name = 'xmlns' self.adapter.set_node_namespace_uri(element, ns_uri) else: ns_name = 'xmlns:%s' % prefix self._set_element_attributes(element, {ns_name: ns_uri}, ns_uri=self.XMLNS_URI)
[docs] def set_ns_prefix(self, prefix, ns_uri): """ Define a namespace prefix that will serve as shorthand for the given namespace URI in element names. :param string prefix: prefix that will serve as an alias for a the namespace URI. :param string ns_uri: namespace URI that will be denoted by the prefix. """ self._add_ns_prefix_attr(self.impl_node, prefix, ns_uri)
[docs] def add_element(self, name, ns_uri=None, attributes=None, text=None, before_this_element=False): """ Add a new child element to this element, with an optional namespace definition. If no namespace is provided the child will be assigned to the default namespace. :param string name: a name for the child node. The name may be used to apply a namespace to the child by including: - a prefix component in the name of the form ``ns_prefix:element_name``, where the prefix has already been defined for a namespace URI (such as via :meth:`set_ns_prefix`). - a literal namespace URI value delimited by curly braces, of the form ``{ns_uri}element_name``. :param ns_uri: a URI specifying the new element's namespace. If the ``name`` parameter specifies a namespace this parameter is ignored. :type ns_uri: string or None :param attributes: collection of attributes to assign to the new child. :type attributes: dict, list, tuple, or None :param text: text value to assign to the new child. :type text: string or None :param bool before_this_element: if *True* the new element is added as a sibling preceding this element, instead of as a child. In other words, the new element will be a child of this element's parent node, and will immediately precent this element in the DOM. :return: the new child as a an :class:`Element` node. """ # Determine local name, namespace and prefix info from tag name prefix, local_name, node_ns_uri = \ self.adapter.get_ns_info_from_node_name(name, self.impl_node) if prefix: qname = '%s:%s' % (prefix, local_name) else: qname = local_name # If no name-derived namespace, apply an alternate namespace if node_ns_uri is None: if ns_uri is None: # Default document namespace node_ns_uri = self.adapter.get_ns_uri_for_prefix( self.impl_node, None) else: # keyword-parameter namespace node_ns_uri = ns_uri # Create element child_elem = self.adapter.new_impl_element( qname, node_ns_uri, parent=self.impl_node) # If element's default namespace was defined by literal uri prefix, # create corresponding xmlns attribute for element... if not prefix and '}' in name: self._set_element_attributes(child_elem, {'xmlns': node_ns_uri}, ns_uri=self.XMLNS_URI) # ...otherwise define keyword-defined namespace as the default, if any elif ns_uri is not None: self._set_element_attributes(child_elem, {'xmlns': ns_uri}, ns_uri=self.XMLNS_URI) # Create subordinate nodes if attributes is not None: self._set_element_attributes(child_elem, attr_obj=attributes) if text is not None: self._add_text(child_elem, text) # Add new element to its parent before a given node... if before_this_element: self.adapter.add_node_child( self.adapter.get_node_parent(self.impl_node), child_elem, before_sibling=self.impl_node) # ...or in the default position, appended after existing nodes else: self.adapter.add_node_child(self.impl_node, child_elem) return self.adapter.wrap_node( child_elem, self.adapter.impl_document, self.adapter)
def _add_text(self, element, text): text_node = self.adapter.new_impl_text(text) self.adapter.add_node_child(element, text_node)
[docs] def add_text(self, text): """ Add a text node to this element. Adding text with this method is subtly different from assigning a new text value with :meth:`text` accessor, because it "appends" to rather than replacing this element's set of text nodes. :param text: text content to add to this element. :param type: string or anything that can be coerced by :func:`unicode`. """ if not isinstance(text, six.string_types): text = six.text_type(text) self._add_text(self.impl_node, text)
def _add_comment(self, element, text): comment_node = self.adapter.new_impl_comment(text) self.adapter.add_node_child(element, comment_node)
[docs] def add_comment(self, text): """ Add a comment node to this element. :param string text: text content to add as a comment. """ self._add_comment(self.impl_node, text)
def _add_instruction(self, element, target, data): instruction_node = self.adapter.new_impl_instruction(target, data) self.adapter.add_node_child(element, instruction_node)
[docs] def add_instruction(self, target, data): """ Add an instruction node to this element. :param string text: text content to add as an instruction. """ self._add_instruction(self.impl_node, target, data)
def _add_cdata(self, element, data): cdata_node = self.adapter.new_impl_cdata(data) self.adapter.add_node_child(element, cdata_node)
[docs] def add_cdata(self, data): """ Add a character data node to this element. :param string data: text content to add as character data. """ self._add_cdata(self.impl_node, data)
[docs]class AttributeDict(object): """ Dictionary-like object of element attributes that always reflects the state of the underlying element node, and that allows for in-place modifications that will immediately affect the element. """
[docs] def __init__(self, attr_impl_nodes, impl_element, adapter): self.impl_element = impl_element self.adapter = adapter
def __len__(self): return len(self.impl_attributes) def __getitem__(self, attr_name): prefix, name, ns_uri = self.adapter.get_ns_info_from_node_name( attr_name, self.impl_element) return self.adapter.get_node_attribute_value( self.impl_element, name, ns_uri) def __setitem__(self, name, value): prefix, name, ns_uri = self.adapter.get_ns_info_from_node_name( name, self.impl_element) if not isinstance(value, str): value = str(value) self.adapter.set_node_attribute_value( self.impl_element, name, value, ns_uri) def __delitem__(self, name): prefix, name, ns_uri = self.adapter.get_ns_info_from_node_name( name, self.impl_element) self.adapter.remove_node_attribute(self.impl_element, name, ns_uri) def __iter__(self): for k in list(self.keys()): yield k iterkeys = __iter__ # Alias, per Python docs recommendation def __contains__(self, name): prefix, name, ns_uri = self.adapter.get_ns_info_from_node_name( name, self.impl_element) return self.adapter.has_node_attribute(self.impl_element, name, ns_uri)
[docs] def __repr__(self): return '<%s.%s: %s>' % ( self.__class__.__module__, self.__class__.__name__, list(self.to_dict.items()))
[docs] def keys(self): """ :return: a list of attribute name strings. """ return [self.adapter.get_node_name(a) for a in self.impl_attributes]
[docs] def values(self): """ :return: a list of attribute value strings. """ return [self.adapter.get_node_value(a) for a in self.impl_attributes]
[docs] def items(self): """ :return: a list of name/value attribute pairs sorted by attribute name. """ sorted_keys = sorted(self.keys()) return [(k, self[k]) for k in sorted_keys]
[docs] def namespace_uri(self, name): """ :param string name: the name of an attribute to look up. :return: the namespace URI associated with the named attribute, or None. """ a_node = self.adapter.get_node_attribute_node(self.impl_element, name) if a_node is None: return None return self.adapter.get_node_namespace_uri(a_node)
[docs] def prefix(self, name): """ :param string name: the name of an attribute to look up. :return: the prefix component of the named attribute's name, or None. """ a_node = self.adapter.get_node_attribute_node(self.impl_element, name) if a_node is None: return None return a_node.prefix
@property def to_dict(self): """ :return: an :class:`~collections.OrderedDict` of attribute name/value pairs. """ return collections.OrderedDict(list(self.items())) @property def element(self): """ :return: the :class:`Element` that contains these attributes. """ return self.adapter.wrap_node( self.impl_element, self.adapter.impl_document, self.adapter) @property def impl_attributes(self): """ :return: the attribute node objects from the underlying XML implementation. """ return self.adapter.get_node_attributes(self.impl_element)
[docs]class NodeList(list): """ Custom implementation for :class:`Node` lists that provides additional functionality, such as node filtering. """
[docs] def filter(self, local_name=None, name=None, ns_uri=None, node_type=None, filter_fn=None, first_only=False): """ Apply filters to the set of nodes in this list. :param local_name: a local name used to filter the nodes. :type local_name: string or None :param name: a name used to filter the nodes. :type name: string or None :param ns_uri: a namespace URI used to filter the nodes. If *None* all nodes are returned regardless of namespace. :type ns_uri: string or None :param node_type: a node type definition used to filter the nodes. :type node_type: int node type constant, class, or None :param filter_fn: an arbitrary function to filter nodes in this list. This function must accept a single :class:`Node` argument and return a bool indicating whether to include the node in the filtered results. .. note:: if ``filter_fn`` is provided all other filter arguments are ignore. :type filter_fn: function or None :return: the type of the return value depends on the value of the ``first_only`` parameter and how many nodes match the filter: - if ``first_only=False`` return a :class:`NodeList` of filtered nodes, which will be empty if there are no matching nodes. - if ``first_only=True`` and at least one node matches, return the first matching :class:`Node` - if ``first_only=True`` and there are no matching nodes, return *None* """ # Build our own filter function unless a custom function is provided if filter_fn is None: def filter_fn(n): # Test node type first in case other tests require this type if node_type is not None: # Node type can be specified as an integer constant (e.g. # ELEMENT_NODE) or a class. if isinstance(node_type, int): if not n.is_type(node_type): return False elif n.__class__ != node_type: return False if name is not None and n.name != name: return False if local_name is not None and n.local_name != local_name: return False if ns_uri is not None and n.ns_uri != ns_uri: return False return True # Filter nodes nodelist = list(filter(filter_fn, self)) # If requested, return just the first node (or None if no nodes) if first_only: return nodelist[0] if nodelist else None else: return NodeList(nodelist)
__call__ = filter # Alias """Alias for :meth:`filter`.""" @property def first(self): """ :return: the first of the available children nodes, or *None* if \ there are no children. """ if len(self) > 0: return self[0] else: return None