From f73b7cb9df0c4034bc80e6aadc58991edbea4dfc Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 2 Jun 2022 08:43:11 +0300 Subject: [PATCH 1/2] gh-91810: Fix regression with writing an XML declaration with encoding='unicode' Suppress writing an XML declaration in open files in ElementTree.write() with encoding='unicode' and xml_declaration=None. --- Lib/test/test_xml_etree.py | 8 ++------ Lib/xml/etree/ElementTree.py | 2 ++ .../Library/2022-06-02-08-40-58.gh-issue-91810.Gtk44w.rst | 2 ++ 3 files changed, 6 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-06-02-08-40-58.gh-issue-91810.Gtk44w.rst diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index aea77b192c1006..a7577b7339f899 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -3760,17 +3760,13 @@ def test_write_to_text_file(self): tree.write(f, encoding='unicode') self.assertFalse(f.closed) with open(TESTFN, 'rb') as f: - self.assertEqual(f.read(), convlinesep( - b'''\n''' - b'''ø''')) + self.assertEqual(f.read(), b'''ø''') with open(TESTFN, 'w', encoding='ISO-8859-1') as f: tree.write(f, encoding='unicode') self.assertFalse(f.closed) with open(TESTFN, 'rb') as f: - self.assertEqual(f.read(), convlinesep( - b'''\n''' - b'''\xf8''')) + self.assertEqual(f.read(), b'''\xf8''') def test_write_to_binary_file(self): self.addCleanup(os_helper.unlink, TESTFN) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index a5cc65e789c004..83fb662d7b5896 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -731,6 +731,8 @@ def write(self, file_or_filename, with _get_writer(file_or_filename, encoding) as (write, declared_encoding): if method == "xml" and (xml_declaration or (xml_declaration is None and + not (encoding.lower() == "unicode" and + hasattr(file_or_filename, "write")) and declared_encoding.lower() not in ("utf-8", "us-ascii"))): write("\n" % ( declared_encoding,)) diff --git a/Misc/NEWS.d/next/Library/2022-06-02-08-40-58.gh-issue-91810.Gtk44w.rst b/Misc/NEWS.d/next/Library/2022-06-02-08-40-58.gh-issue-91810.Gtk44w.rst new file mode 100644 index 00000000000000..e40005886afc3e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-06-02-08-40-58.gh-issue-91810.Gtk44w.rst @@ -0,0 +1,2 @@ +Suppress writing an XML declaration in open files in ``ElementTree.write()`` +with ``encoding='unicode'`` and ``xml_declaration=None``. From 46ab6e0f636e8f1aa3dca4a32f0b2bdc1e5eaade Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 12 Jun 2022 15:57:24 +0300 Subject: [PATCH 2/2] Always open files in UTF-8 with encoding='unicode'. --- Lib/test/test_xml_etree.py | 8 +------- Lib/xml/etree/ElementTree.py | 14 +++++--------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index a7577b7339f899..afa4641e6906b7 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -3739,13 +3739,7 @@ def test_write_to_filename_as_unicode(self): tree = ET.ElementTree(ET.XML('''\xf8''')) tree.write(TESTFN, encoding='unicode') with open(TESTFN, 'rb') as f: - data = f.read() - expected = "\xf8".encode(encoding, 'xmlcharrefreplace') - if encoding.lower() in ('utf-8', 'ascii'): - self.assertEqual(data, expected) - else: - self.assertIn(b"\xc3\xb8") def test_write_to_text_file(self): self.addCleanup(os_helper.unlink, TESTFN) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 83fb662d7b5896..1dc80351bf7ddd 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -731,8 +731,7 @@ def write(self, file_or_filename, with _get_writer(file_or_filename, encoding) as (write, declared_encoding): if method == "xml" and (xml_declaration or (xml_declaration is None and - not (encoding.lower() == "unicode" and - hasattr(file_or_filename, "write")) and + encoding.lower() != "unicode" and declared_encoding.lower() not in ("utf-8", "us-ascii"))): write("\n" % ( declared_encoding,)) @@ -759,13 +758,10 @@ def _get_writer(file_or_filename, encoding): except AttributeError: # file_or_filename is a file name if encoding.lower() == "unicode": - file = open(file_or_filename, "w", - errors="xmlcharrefreplace") - else: - file = open(file_or_filename, "w", encoding=encoding, - errors="xmlcharrefreplace") - with file: - yield file.write, file.encoding + encoding="utf-8" + with open(file_or_filename, "w", encoding=encoding, + errors="xmlcharrefreplace") as file: + yield file.write, encoding else: # file_or_filename is a file-like object # encoding determines if it is a text or binary writer