Skip to content

Commit 154477b

Browse files
gh-50002: xml.dom.minidom now preserves whitespaces in attributes (GH-107947)
Also double quotes (") are now only quoted in attributes.
1 parent 29bc616 commit 154477b

File tree

4 files changed

+65
-7
lines changed

4 files changed

+65
-7
lines changed

Lib/test/test_minidom.py

+40
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,46 @@ def testWriteXML(self):
505505
dom.unlink()
506506
self.confirm(str == domstr)
507507

508+
def test_toxml_quote_text(self):
509+
dom = Document()
510+
elem = dom.appendChild(dom.createElement('elem'))
511+
elem.appendChild(dom.createTextNode('&<>"'))
512+
cr = elem.appendChild(dom.createElement('cr'))
513+
cr.appendChild(dom.createTextNode('\r'))
514+
crlf = elem.appendChild(dom.createElement('crlf'))
515+
crlf.appendChild(dom.createTextNode('\r\n'))
516+
lflf = elem.appendChild(dom.createElement('lflf'))
517+
lflf.appendChild(dom.createTextNode('\n\n'))
518+
ws = elem.appendChild(dom.createElement('ws'))
519+
ws.appendChild(dom.createTextNode('\t\n\r '))
520+
domstr = dom.toxml()
521+
dom.unlink()
522+
self.assertEqual(domstr, '<?xml version="1.0" ?>'
523+
'<elem>&amp;&lt;&gt;"'
524+
'<cr>\r</cr>'
525+
'<crlf>\r\n</crlf>'
526+
'<lflf>\n\n</lflf>'
527+
'<ws>\t\n\r </ws></elem>')
528+
529+
def test_toxml_quote_attrib(self):
530+
dom = Document()
531+
elem = dom.appendChild(dom.createElement('elem'))
532+
elem.setAttribute("a", '&<>"')
533+
elem.setAttribute("cr", "\r")
534+
elem.setAttribute("lf", "\n")
535+
elem.setAttribute("crlf", "\r\n")
536+
elem.setAttribute("lflf", "\n\n")
537+
elem.setAttribute("ws", "\t\n\r ")
538+
domstr = dom.toxml()
539+
dom.unlink()
540+
self.assertEqual(domstr, '<?xml version="1.0" ?>'
541+
'<elem a="&amp;&lt;&gt;&quot;" '
542+
'cr="&#13;" '
543+
'lf="&#10;" '
544+
'crlf="&#13;&#10;" '
545+
'lflf="&#10;&#10;" '
546+
'ws="&#9;&#10;&#13; "/>')
547+
508548
def testAltNewline(self):
509549
str = '<?xml version="1.0" ?>\n<a b="c"/>\n'
510550
dom = parseString(str)

Lib/xml/dom/minidom.py

+23-7
Original file line numberDiff line numberDiff line change
@@ -300,12 +300,28 @@ def _in_document(node):
300300
node = node.parentNode
301301
return False
302302

303-
def _write_data(writer, data):
303+
def _write_data(writer, text, attr):
304304
"Writes datachars to writer."
305-
if data:
306-
data = data.replace("&", "&amp;").replace("<", "&lt;"). \
307-
replace("\"", "&quot;").replace(">", "&gt;")
308-
writer.write(data)
305+
if not text:
306+
return
307+
# See the comments in ElementTree.py for behavior and
308+
# implementation details.
309+
if "&" in text:
310+
text = text.replace("&", "&amp;")
311+
if "<" in text:
312+
text = text.replace("<", "&lt;")
313+
if ">" in text:
314+
text = text.replace(">", "&gt;")
315+
if attr:
316+
if '"' in text:
317+
text = text.replace('"', "&quot;")
318+
if "\r" in text:
319+
text = text.replace("\r", "&#13;")
320+
if "\n" in text:
321+
text = text.replace("\n", "&#10;")
322+
if "\t" in text:
323+
text = text.replace("\t", "&#9;")
324+
writer.write(text)
309325

310326
def _get_elements_by_tagName_helper(parent, name, rc):
311327
for node in parent.childNodes:
@@ -883,7 +899,7 @@ def writexml(self, writer, indent="", addindent="", newl=""):
883899

884900
for a_name in attrs.keys():
885901
writer.write(" %s=\"" % a_name)
886-
_write_data(writer, attrs[a_name].value)
902+
_write_data(writer, attrs[a_name].value, True)
887903
writer.write("\"")
888904
if self.childNodes:
889905
writer.write(">")
@@ -1112,7 +1128,7 @@ def splitText(self, offset):
11121128
return newText
11131129

11141130
def writexml(self, writer, indent="", addindent="", newl=""):
1115-
_write_data(writer, "%s%s%s" % (indent, self.data, newl))
1131+
_write_data(writer, "%s%s%s" % (indent, self.data, newl), False)
11161132

11171133
# DOM Level 3 (WD 9 April 2002)
11181134

Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
:mod:`xml.dom.minidom` now preserves whitespaces in attributes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
:mod:`xml.dom.minidom` now only quotes ``"`` in attributes.

0 commit comments

Comments
 (0)