Source code for xml4h.writer

"""
Writer to serialize XML DOM documents or sections to text.
"""
# This implementation is adapted (heavily) from the standard library method
# xml.dom.minidom.writexml
import six

import codecs

from xml4h import exceptions


[docs]def write_node(node, writer, encoding='utf-8', indent=0, newline='', omit_declaration=False, node_depth=0, quote_char='"'): """ Serialize an *xml4h* DOM node and its descendants to text, writing the output to the given *writer*. :param node: the DOM node whose content and descendants will be serialized. :type node: an :class:`xml4h.nodes.Node` or subclass :param writer: a file or stream to which XML text is written. :type writer: a file, stream, etc :param string encoding: the character encoding for serialized text. :param indent: indentation prefix to apply to descendent nodes for pretty-printing. The value can take many forms: - *int*: the number of spaces to indent. 0 means no indent. - *string*: a literal prefix for indented nodes, such as ``\\t``. - *bool*: no indent if *False*, four spaces indent if *True*. - *None*: no indent. :type indent: string, int, bool, or None :param newline: the string value used to separate lines of output. The value can take a number of forms: - *string*: the literal newline value, such as ``\\n`` or ``\\r``. An empty string means no newline. - *bool*: no newline if *False*, ``\\n`` newline if *True*. - *None*: no newline. :type newline: string, bool, or None :param boolean omit_declaration: if *True* the XML declaration header is omitted, otherwise it is included. Note that the declaration is only output when serializing an :class:`xml4h.nodes.Document` node. :param int node_depth: the indentation level to start at, such as 2 to indent output as if the given *node* has two ancestors. This parameter will only be useful if you need to output XML text fragments that can be assembled into a document. This parameter has no effect unless indentation is applied. :param string quote_char: the character that delimits quoted content. You should never need to mess with this. """ def _sanitize_write_value(value): """Return XML-encoded value.""" if not value: return value return (value .replace("&", "&amp;") .replace("<", "&lt;") .replace("\"", "&quot;") .replace(">", "&gt;") ) def _write_node_impl(node, node_depth): """ Internal write implementation that does the real work while keeping track of node depth. """ # Output document declaration if we're outputting the whole doc if node.is_document: if not omit_declaration: writer.write( '<?xml version=%s1.0%s' % (quote_char, quote_char)) if encoding: writer.write(' encoding=%s%s%s' % (quote_char, encoding, quote_char)) writer.write('?>%s' % newline) for child in node.children: _write_node_impl(child, node_depth) # node_depth not incremented writer.write(newline) elif node.is_document_type: writer.write("<!DOCTYPE %s SYSTEM %s%s%s" % (node.name, quote_char, node.public_id)) if node.system_id is not None: writer.write( " %s%s%s" % (quote_char, node.system_id, quote_char)) if node.children: writer.write("[") for child in node.children: _write_node_impl(child, node_depth + 1) writer.write("]") writer.write(">") elif node.is_text: writer.write( _sanitize_write_value(node.value) ) elif node.is_cdata: if ']]>' in node.value: raise ValueError("']]>' is not allowed in CDATA node value") writer.write( "<![CDATA[%s]]>" % node.value ) #elif node.is_entity_reference: # TODO elif node.is_entity: writer.write(newline + indent * node_depth) writer.write("<!ENTITY ") if node.is_paremeter_entity: writer.write('%% ') writer.write( "%s %s%s%s>" % (node.name, quote_char, node.value, quote_char) ) elif node.is_processing_instruction: writer.write(newline + indent * node_depth) writer.write("<?%s %s?>" % (node.target, node.data)) elif node.is_comment: if '--' in node.value: raise ValueError("'--' is not allowed in COMMENT node value") writer.write("<!--%s-->" % node.value) elif node.is_notation: writer.write(newline + indent * node_depth) writer.write("<!NOTATION %s" % node.name) if node.is_system_identifier: writer.write(" system %s%s%s>" % (quote_char, node.external_id, quote_char)) elif node.is_system_identifier: writer.write(" system %s%s%s %s%s%s>" % (quote_char, node.external_id, quote_char, quote_char, node.uri, quote_char)) elif node.is_attribute: writer.write( " %s=%s" % (node.name, quote_char) ) writer.write( _sanitize_write_value(node.value) ) writer.write(quote_char) elif node.is_element: # Only need a preceding newline if we're in a sub-element if node_depth > 0: writer.write(newline) writer.write(indent * node_depth) writer.write("<" + node.name) for attr in node.attribute_nodes: _write_node_impl(attr, node_depth) if node.children: found_indented_child = False writer.write(">") for child in node.children: _write_node_impl(child, node_depth + 1) if not (child.is_text or child.is_comment or child.is_cdata): found_indented_child = True if found_indented_child: writer.write(newline + indent * node_depth) writer.write('</%s>' % node.name) else: writer.write('/>') else: raise exceptions.Xml4hImplementationBug( 'Cannot write node with class: %s' % node.__class__) # Sanitize whitespace parameters if indent is True: indent = ' ' * 4 elif indent is False: indent = '' elif isinstance(indent, int): indent = ' ' * indent # If indent but no newline set, always apply a newline (it makes sense) if indent and not newline: newline = True if newline is None or newline is False: newline = '' elif newline is True: newline = '\n' # If we have a target encoding and are writing to a binary IO stream, wrap # the writer with an encoding writer to produce the correct bytes. # We detect binary IO streams by: # - Python 3: the *absence* of the `encoding` attribute that is present on # `io.TextIOBase`-derived objects # - Python 2: the *absence* of the `encode` attribute that is present on # `StringIO` objects if ( encoding and not hasattr(writer, 'encoding') and not hasattr(writer, 'encode') ): writer = codecs.getwriter(encoding)(writer) # Do the business... _write_node_impl(node, node_depth)