Luke Ross

lxmlmeld

6 releases git clone https://lukeross.name/projects/lxmlmeld.git/

Meld-like templating using lxml.

Commit d21e8b4d63fce44d14fe5453e400a50b35eb8c04

Tests, fix write_xhtml

Committed 16 Mar 2017 by Luke Ross

lxmlmeld/__init__.py

@@ -50,9 +50,19 @@ class Element(etree.ElementBase):
             yield thing, data
             thing.addnext(next_thing)
             thing = next_thing
+        if tail:
+            if prev_thing is not None:
+                prev_thing.tail = tail
+            elif thing.getprevious() is not None:
+                prev = thing.getprevious()
+                prev.tail = (prev.tail or "") + tail
+            elif thing.getparent() is not None:
+                parent = thing.getparent()
+                if parent.text:
+                    parent.text += tail
+                else:
+                    parent.text = tail
         thing.getparent().remove(thing)
-        if tail and prev_thing is not None:
-            prev_thing.tail = tail
 
     def replace_child(self, old_element, new_element):
         super(Element, self).replace(old_element, new_element)
@@ -162,7 +172,8 @@ class Element(etree.ElementBase):
         return '<!DOCTYPE {} PUBLIC "{}" "{}">'.format(*doctype)
 
     def write_xml(self, file, encoding=None, doctype=None, fragment=False,
-                  declaration=True, pipeline=False, _kwargs={"method": "xml"}):
+                  declaration=True, pipeline=False, _kwargs={"method": "xml"},
+                  _doc=None):
         kwargs = {k: v for k, v in _kwargs.items()}
         kwargs.update(xml_declaration=declaration, encoding=encoding)
         if doctype:
@@ -170,7 +181,13 @@ class Element(etree.ElementBase):
         if fragment:
             kwargs.update(doctype=None, xml_declaration=False)
 
-        doc = self if pipeline else self._clone_without_own_ns()
+        if _doc is not None:
+            doc = _doc
+        elif pipeline:
+            doc = self
+        else:
+            doc = self._clone_without_own_ns()
+
         if file:
             # ElementTree.write() doesn't support doctype
             file.write(etree.tostring(doc, **kwargs))
@@ -182,7 +199,23 @@ class Element(etree.ElementBase):
         if not doctype[1].startswith("-//W3C//DTD XHTML"):
             # libxml handles xhtml by doctype-sniffing
             raise ValueError("Invalid doctype for XHTML")
-        return self.write_xml(file, encoding=encoding, doctype=doctype)
+
+        if pipeline:
+            return self.write_xml(
+                file, encoding=encoding, doctype=doctype, pipeline=True,
+                declaration=declaration
+            )
+        else:
+            # cleaning up namespaces upsets lxml, need to re-parse :-(
+            intermediate = self.write_xml(
+                file, encoding=encoding, doctype=doctype,
+                declaration=declaration
+            )
+            intermediate = etree.fromstring(intermediate)
+            return self.write_xml(
+                file, encoding=encoding, doctype=doctype, pipeline=True,
+                declaration=declaration, _doc=intermediate
+            )
 
     def write_html(self, file, encoding=None, doctype=_html_doctype,
                    fragment=False):


setup.py

@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-from distutils.core import setup
+from setuptools import setup
 
 setup(
     author_email="luke@lukeross.name",
@@ -9,9 +9,9 @@ setup(
     install_requires=["lxml"],
     license="BSD",
     name="lxmlmeld",
-    packages=["lxmlmeld"],
     url="https://github.com/lukeross/lxmlmeld",
-    version="0.2",
+    packages=["lxmlmeld"],
+    version="0.3",
     classifiers=[
         "Development Status :: 4 - Beta",
         "Environment :: Web Environment",


tests/test_calls.py

@@ -11,24 +11,24 @@ class ReplaceTests(TestCase):
             (  # Naked
                 "<foo xmlns:meld='http://www.plope.com/software/meld3'>"
                 "<replaceme meld:id='r' /></foo>",
-                "<?xml version='1.0' encoding='ASCII'?>\n<foo>{}</foo>",
+                "<?xml version='1.0' encoding='ASCII'?>\n<foo>{}</foo>"
             ),
             (  # Surrounded by elements
                 "<foo xmlns:meld='http://www.plope.com/software/meld3'>"
                 "<bar /><replaceme meld:id='r' /><baz /></foo>",
                 "<?xml version='1.0' encoding='ASCII'?>\n"
-                "<foo><bar/>{}<baz/></foo>",
+                "<foo><bar/>{}<baz/></foo>"
             ),
             (  # Surrounded by text
                 "<foo xmlns:meld='http://www.plope.com/software/meld3'>"
                 "bar <replaceme meld:id='r' />baz</foo>",
-                "<?xml version='1.0' encoding='ASCII'?>\n<foo>bar {}baz</foo>",
+                "<?xml version='1.0' encoding='ASCII'?>\n<foo>bar {}baz</foo>"
             ),
             (  # Surrounded by mixed
                 "<foo xmlns:meld='http://www.plope.com/software/meld3'>"
                 "<bar />bar <replaceme meld:id='r' />baz</foo>",
                 "<?xml version='1.0' encoding='ASCII'?>\n"
-                "<foo><bar/>bar {}baz</foo>",
+                "<foo><bar/>bar {}baz</foo>"
             ),
         )
         for ip, op in docs:
@@ -56,5 +56,109 @@ class ReplaceTests(TestCase):
         replacements = [E("so", {"completely": "yes"}), E("awesome")]
         replacements[0].tail = "-"
         replacements[1].tail = "!"
-        self.as_expected(replacements, '<so completely="yes"/>-<awesome/>!'
+        self.as_expected(replacements, '<so completely="yes"/>-<awesome/>!')
+
+
+class ContentTests(TestCase):
+    def as_expected(self, arg, expected_in_output, **kwargs):
+        docs = (
+            (  # Empty
+                "<foo xmlns:meld='http://www.plope.com/software/meld3'"
+                " meld:id='r' />",
+                "<?xml version='1.0' encoding='ASCII'?>\n<foo>{}</foo>"
+            ),
+            (  # Contains text
+                "<foo xmlns:meld='http://www.plope.com/software/meld3'>"
+                "<bar meld:id='r'>placeholder</bar></foo>",
+                "<?xml version='1.0' encoding='ASCII'?>\n"
+                "<foo><bar>{}</bar></foo>"
+            ),
+            (  # Contains node
+                "<foo xmlns:meld='http://www.plope.com/software/meld3'>"
+                "<bar meld:id='r'><gone /></bar></foo>",
+                "<?xml version='1.0' encoding='ASCII'?>\n"
+                "<foo><bar>{}</bar></foo>"
+            ),
+            (  # Contains mixed
+                "<foo xmlns:meld='http://www.plope.com/software/meld3'>"
+                "<bar meld:id='r'>and it's <gone /></bar></foo>",
+                "<?xml version='1.0' encoding='ASCII'?>\n"
+                "<foo><bar>{}</bar></foo>"
+            ),
         )
+        for ip, op in docs:
+            doc = parse_xmlstring(ip)
+            doc.findmeld("r").content(deepcopy(arg), **kwargs)
+            self.assertEqual(
+                doc.write_xmlstring(),
+                op.format(expected_in_output).encode("ascii"),
+                op.format(expected_in_output)
+            )
+
+    def test_plain_text_content(self):
+        self.as_expected("<hello world!>", "&lt;hello world!&gt;")
+
+    def test_structured_content(self):
+        self.as_expected("<hello word='world' /><a />",
+                         '<hello word="world"/><a/>', structure=True)
+
+    def test_content_nodes(self):
+        replacement = E("awesome")
+        replacement.tail = "!"
+        self.as_expected(replacement, "<awesome/>!")
+
+    def test_content_nodelist(self):
+        replacements = [E("so", {"completely": "yes"}), E("awesome")]
+        replacements[0].tail = "-"
+        replacements[1].tail = "!"
+        self.as_expected(replacements, '<so completely="yes"/>-<awesome/>!')
+
+
+class RepeatTests(TestCase):
+    def as_expected(self, arg, expected_in_output):
+        docs = (
+            (  # Text before
+                "<foo xmlns:meld='http://www.plope.com/software/meld3'>"
+                "yo<bar meld:id='r' /></foo>",
+                "<?xml version='1.0' encoding='ASCII'?>\n<foo>yo{}</foo>"
+            ),
+            (  # Text after
+                "<foo xmlns:meld='http://www.plope.com/software/meld3'>"
+                "<bar meld:id='r' />yo</foo>",
+                "<?xml version='1.0' encoding='ASCII'?>\n<foo>{}yo</foo>"
+            ),
+            (  # Text both sides
+                "<foo xmlns:meld='http://www.plope.com/software/meld3'>"
+                "oy<bar meld:id='r' />yo</foo>",
+                "<?xml version='1.0' encoding='ASCII'?>\n<foo>oy{}yo</foo>"
+            ),
+            (  # Elements both sides
+                "<foo xmlns:meld='http://www.plope.com/software/meld3'>"
+                "<oy/><bar meld:id='r' /><yo/></foo>",
+                "<?xml version='1.0' encoding='ASCII'?>\n"
+                "<foo><oy/>{}<yo/></foo>"
+            ),
+            (  # Mixed
+                "<foo xmlns:meld='http://www.plope.com/software/meld3'>"
+                "<oy/><bar meld:id='r' />yo</foo>",
+                "<?xml version='1.0' encoding='ASCII'?>\n<foo><oy/>{}yo</foo>"
+            ),
+        )
+        for ip, op in docs:
+            doc = parse_xmlstring(ip)
+            for ele, data in doc.findmeld("r").repeat(deepcopy(arg), 'r'):
+                ele.set("a", data)
+            self.assertEqual(
+                doc.write_xmlstring(),
+                op.format(expected_in_output).encode("ascii"),
+                op.format(expected_in_output)
+            )
+
+    def test_repeat_zero(self):
+        self.as_expected([], '')
+
+    def test_repeat_one(self):
+        self.as_expected(['q'], '<bar a="q"/>')
+
+    def test_repeat_multi(self):
+        self.as_expected(['q', 'z'], '<bar a="q"/><bar a="z"/>')


tests/test_parse_serialise.py

@@ -0,0 +1,97 @@
+from io import StringIO
+from unittest import TestCase
+
+from lxmlmeld import parse_xml, parse_xmlstring, parse_html, parse_htmlstring
+
+
+class XMLTests(TestCase):
+    def as_expected(self, handler):
+        scenarios = (
+            (
+                "<xml />",
+                "<?xml version='1.0' encoding='ASCII'?>\n<xml/>"
+            ),
+            (
+                "<?xml version='1.0' ?><xml />",
+                "<?xml version='1.0' encoding='ASCII'?>\n<xml/>"
+            ),
+            (
+                "<xml><a /><b /></xml>",
+                "<?xml version='1.0' encoding='ASCII'?>\n<xml><a/><b/></xml>"
+            ),
+        )
+        for ip, op in scenarios:
+            doc = handler(ip)
+            self.assertEqual(doc.write_xmlstring(), op.encode("ascii"))
+
+    def test_parse_string(self):
+        self.as_expected(parse_xmlstring)
+
+    def test_parse_handle(self):
+        self.as_expected(lambda i: parse_xml(StringIO(i)))
+
+
+class HTMLTests(TestCase):
+    def as_expected(self, handler):
+        scenarios = (
+            (
+                "<html><body><br></body></html>",
+                "<html><body><br></body></html>"
+            ),
+            (
+                "<html><body><script></script></body></html>",
+                "<html><body><script></script></body></html>"
+            ),
+        )
+        for ip, op in scenarios:
+            doc = handler(ip)
+            self.assertEqual(
+                doc.write_htmlstring(doctype=None),
+                op.encode("ascii")
+            )
+
+    def test_parse_string(self):
+        self.as_expected(parse_htmlstring)
+
+    def test_parse_handle(self):
+        self.as_expected(lambda i: parse_html(StringIO(i)))
+
+    def test_find_meld(self):
+        doc = parse_htmlstring("<html><body meld:id='a'><br></body></html>")
+        ele = doc.findmeld('a')
+        self.assertEqual(ele.tag, 'body')
+
+
+class XHTMLTests(TestCase):
+    DT = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" " \
+        "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
+
+    def as_expected(self, handler):
+        scenarios = (
+            (
+                self.DT + "<html xmlns='http://www.w3.org/1999/xhtml' "
+                "xmlns:meld='http://www.plope.com/software/meld3'>"
+                "<body meld:id='b'><br/></body></html>",
+                self.DT + "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
+                "<body><br /></body></html>"
+            ),
+            (
+                self.DT + "<html xmlns='http://www.w3.org/1999/xhtml' "
+                "xmlns:meld='http://www.plope.com/software/meld3'>"
+                "<body meld:id='b'><p/></body></html>",
+                self.DT + "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
+                "<body><p></p></body></html>"
+            ),
+        )
+        for ip, op in scenarios:
+            doc = handler(ip)
+            self.assertEqual(
+                doc.write_xhtmlstring(),
+                op.encode("ascii")
+            )
+
+    def test_parse_string(self):
+        self.as_expected(parse_xmlstring)
+
+    def test_parse_handle(self):
+        self.as_expected(lambda i: parse_xml(StringIO(i)))


tests/test_parsing.py -> (removed)

@@ -1,31 +0,0 @@
-from io import StringIO
-from unittest import TestCase
-
-from lxmlmeld import parse_xml, parse_xmlstring
-
-
-class XMLTests(TestCase):
-    def as_expected(self, handler):
-        scenarios = (
-            (
-                "<xml />",
-                "<?xml version='1.0' encoding='ASCII'?>\n<xml/>"
-            ),
-            (
-                "<?xml version='1.0' ?><xml />",
-                "<?xml version='1.0' encoding='ASCII'?>\n<xml/>"
-            ),
-            (
-                "<xml><a /><b /></xml>",
-                "<?xml version='1.0' encoding='ASCII'?>\n<xml><a/><b/></xml>"
-            ),
-        )
-        for ip, op in scenarios:
-            doc = handler(ip)
-            self.assertEqual(doc.write_xmlstring(), op.encode("ascii"))
-
-    def test_parse_string(self):
-        self.as_expected(parse_xmlstring)
-
-    def test_parse_handle(self):
-        self.as_expected(lambda i: parse_xml(StringIO(i)))