Luke Ross

lxmlmeld

6 releases git clone https://lukeross.name/projects/lxmlmeld.git/

Meld-like templating using lxml.

Commit 6dea3d3b7cc30c16a94ab2950f1bcf1f8705ceaa

Tests and bug-fixes

Committed 17 Mar 2017 by Luke Ross & Luke Ross

lxmlmeld/__init__.py

@@ -1,3 +1,4 @@
+import re
 from copy import deepcopy
 from lxml import etree
 
@@ -142,7 +143,7 @@ class Element(etree.ElementBase):
                 ele.content(v)
             else:
                 missing.add(k)
-        return missing
+        return list(missing)
 
     def parentindex(self):
         parent = self.getparent()
@@ -168,6 +169,8 @@ class Element(etree.ElementBase):
 
     @staticmethod
     def _get_doctype(doctype):
+        if not isinstance(doctype, (list, tuple)):
+            return doctype
         name, public, system = doctype
         return '<!DOCTYPE {} PUBLIC "{}" "{}">'.format(*doctype)
 
@@ -195,28 +198,40 @@ class Element(etree.ElementBase):
             return etree.tostring(doc, **kwargs)
 
     def write_xhtml(self, file, encoding=None, doctype=_xhtml_doctype,
-                    declaration=False, pipeline=False):
-        if not doctype[1].startswith("-//W3C//DTD XHTML"):
+                    fragment=False, declaration=False, pipeline=False):
+        doctype = self._get_doctype(doctype)
+        if not(doctype and "-//W3C//DTD XHTML" in doctype):
             # libxml handles xhtml by doctype-sniffing
             raise ValueError("Invalid doctype for XHTML")
 
+        if fragment:
+            declaration = False
+
         if pipeline:
-            return self.write_xml(
-                file, encoding=encoding, doctype=doctype, pipeline=True,
+            ret = self.write_xml(
+                None, encoding=encoding, doctype=doctype, pipeline=True,
                 declaration=declaration
             )
         else:
             # cleaning up namespaces upsets lxml, need to re-parse :-(
             intermediate = self.write_xml(
-                file, encoding=encoding, doctype=doctype,
+                None, encoding=encoding, doctype=doctype,
                 declaration=declaration
             )
             intermediate = etree.fromstring(intermediate)
-            return self.write_xml(
-                file, encoding=encoding, doctype=doctype, pipeline=True,
+            ret = self.write_xml(
+                None, encoding=encoding, doctype=doctype, pipeline=True,
                 declaration=declaration, _doc=intermediate
             )
 
+        if fragment:
+            ret = re.sub(rb'^.*?<!DOCTYPE.*?>\s+', b'', ret, re.S)
+
+        if file:
+            file.write(ret)
+        else:
+            return ret
+
     def write_html(self, file, encoding=None, doctype=_html_doctype,
                    fragment=False):
         return self.write_xml(


tests/test_calls.py

@@ -58,6 +58,14 @@ class ReplaceTests(TestCase):
         replacements[1].tail = "!"
         self.as_expected(replacements, '<so completely="yes"/>-<awesome/>!')
 
+    def test_replace_no_parent(self):
+        doc = parse_xmlstring("<a/>")
+        doc.replace("nooo")
+        self.assertEqual(
+            doc.write_xmlstring(declaration=False),
+            b'<a/>'
+        )
+
 
 class ContentTests(TestCase):
     def as_expected(self, arg, expected_in_output, **kwargs):
@@ -162,3 +170,127 @@ class RepeatTests(TestCase):
 
     def test_repeat_multi(self):
         self.as_expected(['q', 'z'], '<bar a="q"/><bar a="z"/>')
+
+
+class MeldFindingTests(TestCase):
+    def test_findmeld_exists(self):
+        doc = parse_xmlstring(
+            "<a xmlns:meld='http://www.plope.com/software/meld3'>"
+            "<b meld:id='q'/></a>"
+        )
+        found = doc.findmeld('q')
+        self.assertIsNotNone(found)
+        self.assertEqual(found.tag, 'b')
+        doc = parse_xmlstring(
+            "<a xmlns:meld='http://www.plope.com/software/meld3'"
+            " meld:id='q' />"
+        )
+        found = doc.findmeld('q')
+        self.assertIsNotNone(found)
+        self.assertEqual(found.tag, 'a')
+
+    def test_findmeld_missing(self):
+        doc = parse_xmlstring(
+            "<a xmlns:meld='http://www.plope.com/software/meld3'>"
+            "<b meld:id='q'/></a>"
+        )
+        found = doc.findmeld('z')
+        self.assertIsNone(found)
+        found = doc.findmeld('z', '')
+        self.assertEqual(found, '')
+
+    def test_meldid(self):
+        doc = parse_xmlstring(
+            "<a xmlns:meld='http://www.plope.com/software/meld3'>"
+            "<b meld:id='q'/></a>"
+        )
+        found = doc.findmeld('q')
+        self.assertEqual(found.meldid(), 'q')
+
+    def test_findmelds(self):
+        doc = parse_xmlstring(
+            "<a xmlns:meld='http://www.plope.com/software/meld3' "
+            "meld:id='z'><b meld:id='q'/></a>"
+        )
+        found = list(doc.findmelds())
+        self.assertEqual(len(found), 2)
+        self.assertEqual(set(['q', 'z']), set([
+            e.meldid() for e in found
+        ]))
+
+
+class FillMeldsTests(TestCase):
+    def test_fill_melds(self):
+        doc = parse_xmlstring(
+            "<a xmlns:meld='http://www.plope.com/software/meld3'> "
+            "<b meld:id='z'/><b meld:id='q'/></a>"
+        )
+        ret = doc.fillmelds(z='foo', q='bar', a='ohno')
+        self.assertEqual(
+            doc.write_xmlstring(declaration=False),
+            b'<a> <b>foo</b><b>bar</b></a>'
+        )
+        self.assertEqual(ret, ['a'])
+
+
+class AttributesTests(TestCase):
+    def test_fill_attributes(self):
+        doc = parse_xmlstring("<a/>")
+        doc.attributes(foo='q', bar='z')
+        op = doc.write_xmlstring(declaration=False)
+        self.assertTrue(op.startswith(b"<a "))
+        self.assertTrue(op.endswith(b"/>"))
+        self.assertIn(b'foo="q"', op)
+        self.assertIn(b'bar="z"', op)
+
+
+class CloneTests(TestCase):
+    def test_no_parent(self):
+        doc = parse_xmlstring(
+            "<a xmlns:meld='http://www.plope.com/software/meld3'>"
+            "<b meld:id='z'/></a>"
+        )
+        new = doc.findmeld('z').clone()
+        new.attributes(foo='bar')
+        self.assertEqual(
+            new.write_xmlstring(declaration=False),
+            b'<b foo="bar"/>'
+        )
+        self.assertEqual(
+            doc.write_xmlstring(declaration=False),
+            b'<a><b/></a>'
+        )
+
+    def test_with_parent(self):
+        doc = parse_xmlstring(
+            "<a xmlns:meld='http://www.plope.com/software/meld3'>"
+            "<b meld:id='z'/><c meld:id='q'/></a>"
+        )
+        new = doc.findmeld('z').clone(doc.findmeld('q'))
+        new.attributes(foo='bar')
+        self.assertEqual(
+            new.write_xmlstring(declaration=False),
+            b'<b foo="bar"/>'
+        )
+        self.assertEqual(
+            doc.write_xmlstring(declaration=False),
+            b'<a><b/><c><b foo="bar"/></c></a>'
+        )
+
+
+class DeparentTests(TestCase):
+    def test_deparent(self):
+        doc = parse_xmlstring(
+            "<a xmlns:meld='http://www.plope.com/software/meld3'>"
+            "<b meld:id='z'/></a>"
+        )
+        doc.findmeld('z').deparent()
+        self.assertEqual(
+            doc.write_xmlstring(declaration=False),
+            b'<a/>'
+        )
+        doc.deparent()  # no-op
+        self.assertEqual(
+            doc.write_xmlstring(declaration=False),
+            b'<a/>'
+        )


tests/test_parse_serialise.py

@@ -1,4 +1,4 @@
-from io import StringIO
+from io import BytesIO, StringIO
 from unittest import TestCase
 
 from lxmlmeld import parse_xml, parse_xmlstring, parse_html, parse_htmlstring
@@ -30,6 +30,68 @@ class XMLTests(TestCase):
     def test_parse_handle(self):
         self.as_expected(lambda i: parse_xml(StringIO(i)))
 
+    def test_write_handle(self):
+        doc = parse_xmlstring("<a />")
+        io = BytesIO()
+        doc.write_xml(io, declaration=False)
+        self.assertEqual(io.getvalue(), b"<a/>")
+
+    def test_serialise_options(self):
+        tests = (
+            (
+                {'encoding': 'ASCII', 'declaration': True},
+                b"<?xml version='1.0' encoding='ASCII'?>",
+                {'encoding': 'ASCII', 'declaration': False},
+                b"<?xml",
+                True
+            ),
+            (
+                {'pipeline': True},
+                b"meld:id=",
+                {'pipeline': False},
+                b"meld:id=",
+                False
+            ),
+            (
+                {'doctype': '<!DOCTYPE note SYSTEM "Note.dtd">'},
+                b'<!DOCTYPE note SYSTEM "Note.dtd">',
+                {'doctype': None},
+                b"DOCTYPE",
+                False
+            ),
+            (
+                {'fragment': False, 'declaration': True},
+                b"<?xml version='1.0'",
+                {'fragment': True, 'declaration': True},
+                b"<?xml",
+                True
+            ),
+        )
+
+        doc = parse_xmlstring(
+            "<a xmlns:meld='http://www.plope.com/software/meld3' "
+            "meld:id='r' />"
+        )
+        for turn_on, match_on, turn_off, absent, default in tests:
+            serialised_on = doc.write_xmlstring(**turn_on)
+            self.assertIn(match_on, serialised_on)
+            serialised_off = doc.write_xmlstring(**turn_off)
+            self.assertNotIn(absent, serialised_off)
+            serialised_default = doc.write_xmlstring()
+            if default:
+                self.assertIn(match_on, serialised_default)
+            else:
+                self.assertNotIn(absent, serialised_default)
+            for txt in (serialised_on, serialised_off, serialised_default):
+                self.assertIn(b"<a", txt)
+
+    def test_duplicate_melds(self):
+        with self.assertRaises(ValueError):
+            parse_xmlstring(
+                "<body xmlns:meld='http://www.plope.com/software/meld3' "
+                "meld:id='a'><br meld:id='a'/></body>"
+            )
+
 
 class HTMLTests(TestCase):
     def as_expected(self, handler):
@@ -56,11 +118,57 @@ class HTMLTests(TestCase):
     def test_parse_handle(self):
         self.as_expected(lambda i: parse_html(StringIO(i)))
 
+    def test_write_handle(self):
+        doc = parse_xmlstring("<html><body/></html>")
+        io = BytesIO()
+        doc.write_html(io, doctype=None)
+        self.assertEqual(io.getvalue(), b"<html><body></body></html>")
+
+    def test_duplicate_melds(self):
+        with self.assertRaises(ValueError):
+            parse_htmlstring("<body meld:id='a'><br meld:id='a'/></body>")
+
     def test_find_meld(self):
         doc = parse_htmlstring("<html><body meld:id='a'><br></body></html>")
         ele = doc.findmeld('a')
         self.assertEqual(ele.tag, 'body')
 
+    def test_serialise_options(self):
+        html_dtd = '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">'
+
+        tests = (
+            (
+                {'doctype': html_dtd},
+                b'<!DOCTYPE HTML PUBLIC ',
+                {'doctype': None},
+                b"DOCTYPE",
+                True
+            ),
+            (
+                {'fragment': False, 'doctype': html_dtd},
+                b'<!DOCTYPE HTML PUBLIC ',
+                {'fragment': True, 'doctype': html_dtd},
+                b"DOCTYPE",
+                True
+            ),
+        )
+
+        doc = parse_htmlstring(
+            "<html><body><br><p></p></body></html>"
+        )
+        for turn_on, match_on, turn_off, absent, default in tests:
+            serialised_on = doc.write_htmlstring(**turn_on)
+            self.assertIn(match_on, serialised_on, repr(turn_on))
+            serialised_off = doc.write_htmlstring(**turn_off)
+            self.assertNotIn(absent, serialised_off, repr(turn_off))
+            serialised_default = doc.write_htmlstring()
+            if default:
+                self.assertIn(match_on, serialised_default)
+            else:
+                self.assertNotIn(absent, serialised_default)
+            for txt in (serialised_on, serialised_off, serialised_default):
+                self.assertIn(b"<html><body><br><p></p></body></html>", txt)
+
 
 class XHTMLTests(TestCase):
     DT = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" " \
@@ -95,3 +203,68 @@ class XHTMLTests(TestCase):
 
     def test_parse_handle(self):
         self.as_expected(lambda i: parse_xml(StringIO(i)))
+
+    def test_write_handle(self):
+        doc = parse_xmlstring("<a />")
+        io = BytesIO()
+        doc.write_xhtml(io, fragment=True)
+        self.assertEqual(io.getvalue(), b"<a></a>")
+
+    def test_serialise_options(self):
+        xhtml_strict_dt = '<!DOCTYPE html PUBLIC ' \
+            '"-//W3C//DTD XHTML 1.0 Strict//EN" ' \
+            '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
+
+        tests = (
+            (
+                {'encoding': 'ASCII', 'declaration': True},
+                b"<?xml version='1.0' encoding='ASCII'?>",
+                {'encoding': 'ASCII', 'declaration': False},
+                b"<?xml",
+                False
+            ),
+            (
+                {'pipeline': True},
+                b"meld:id=",
+                {'pipeline': False},
+                b"meld:id=",
+                False
+            ),
+            (
+                {'doctype': xhtml_strict_dt},
+                xhtml_strict_dt.encode("ascii"),
+                {},
+                b"Strict",
+                False
+            ),
+            (
+                {'fragment': False, 'declaration': True},
+                b"<?xml version='1.0'",
+                {'fragment': True, 'declaration': True},
+                b"<?xml",
+                False
+            ),
+        )
+
+        doc = parse_xmlstring(
+            self.DT + "<html xmlns='http://www.w3.org/1999/xhtml' "
+            "xmlns:meld='http://www.plope.com/software/meld3'>"
+            "<body meld:id='b'><br/><p/></body></html>",
+        )
+        for turn_on, match_on, turn_off, absent, default in tests:
+            serialised_on = doc.write_xhtmlstring(**turn_on)
+            self.assertIn(match_on, serialised_on, repr(turn_on))
+            serialised_off = doc.write_xhtmlstring(**turn_off)
+            self.assertNotIn(absent, serialised_off, repr(turn_off))
+            serialised_default = doc.write_xhtmlstring()
+            if default:
+                self.assertIn(match_on, serialised_default)
+            else:
+                self.assertNotIn(absent, serialised_default)
+            for txt in (serialised_on, serialised_off, serialised_default):
+                self.assertIn(b"<br /><p></p></body></html>", txt)
+
+        with self.assertRaises(ValueError):
+            doc.write_xhtmlstring(doctype='<!DOCTYPE note SYSTEM "Note.dtd">')
+        with self.assertRaises(ValueError):
+            doc.write_xhtmlstring(doctype=None)