From 861656d8c21d329366e50c5081500d5941ebf566 Mon Sep 17 00:00:00 2001 From: Eashwar Ranganathan Date: Tue, 18 Nov 2025 06:12:40 -0500 Subject: [PATCH] gh-141707: Skip TarInfo DIRTYPE normalization during GNU long name handling --- Lib/tarfile.py | 29 ++++++++++++++++--- Lib/test/test_tarfile.py | 19 ++++++++++++ Misc/ACKS | 1 + ...-11-18-06-35-53.gh-issue-141707.DBmQIy.rst | 2 ++ 4 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-11-18-06-35-53.gh-issue-141707.DBmQIy.rst diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 7db3a40c9b33cf..5fc60f3dfb4c6c 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1277,6 +1277,20 @@ def _create_pax_generic_header(cls, pax_headers, type, encoding): @classmethod def frombuf(cls, buf, encoding, errors): """Construct a TarInfo object from a 512 byte bytes object. + + To support the old v7 tar format AREGTYPE headers are + transformed to DIRTYPE headers if their name ends in '/'. + """ + return cls._frombuf(buf, encoding, errors) + + @classmethod + def _frombuf(cls, buf, encoding, errors, *, dircheck=True): + """Construct a TarInfo object from a 512 byte bytes object. + + If ``dircheck`` is set to ``True`` then ``AREGTYPE`` headers will + be normalized to ``DIRTYPE`` if the name ends in a trailing slash. + ``dircheck`` must be set to ``False`` if this function is called + on a follow-up header such as ``GNUTYPE_LONGNAME``. """ if len(buf) == 0: raise EmptyHeaderError("empty header") @@ -1307,7 +1321,7 @@ def frombuf(cls, buf, encoding, errors): # Old V7 tar format represents a directory as a regular # file with a trailing slash. - if obj.type == AREGTYPE and obj.name.endswith("/"): + if dircheck and obj.type == AREGTYPE and obj.name.endswith("/"): obj.type = DIRTYPE # The old GNU sparse format occupies some of the unused @@ -1342,8 +1356,15 @@ def fromtarfile(cls, tarfile): """Return the next TarInfo object from TarFile object tarfile. """ + return cls._fromtarfile(tarfile) + + @classmethod + def _fromtarfile(cls, tarfile, *, dircheck=True): + """ + See dircheck documentation in _frombuf(). + """ buf = tarfile.fileobj.read(BLOCKSIZE) - obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors) + obj = cls._frombuf(buf, tarfile.encoding, tarfile.errors, dircheck=dircheck) obj.offset = tarfile.fileobj.tell() - BLOCKSIZE return obj._proc_member(tarfile) @@ -1401,7 +1422,7 @@ def _proc_gnulong(self, tarfile): # Fetch the next header and process it. try: - next = self.fromtarfile(tarfile) + next = self._fromtarfile(tarfile, dircheck=False) except HeaderError as e: raise SubsequentHeaderError(str(e)) from None @@ -1536,7 +1557,7 @@ def _proc_pax(self, tarfile): # Fetch the next header. try: - next = self.fromtarfile(tarfile) + next = self._fromtarfile(tarfile, dircheck=False) except HeaderError as e: raise SubsequentHeaderError(str(e)) from None diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 9892005787c8a6..2484556735ff76 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1234,6 +1234,25 @@ def test_longname_directory(self): self.assertIsNotNone(tar.getmember(longdir)) self.assertIsNotNone(tar.getmember(longdir.removesuffix('/'))) + def test_longname_file_not_directory(self): + # Test reading a longname file and ensure it is not handled as a directory + # Issue #141707 + buf = io.BytesIO() + with tarfile.open(mode='w', fileobj=buf, format=self.format) as tar: + ti = tarfile.TarInfo() + ti.type = tarfile.AREGTYPE + ti.name = ('a' * 99) + '/' + ('b' * 3) + tar.addfile(ti) + + expected = {t.name: t.type for t in tar.getmembers()} + + buf.seek(0) + with tarfile.open(mode='r', fileobj=buf) as tar: + actual = {t.name: t.type for t in tar.getmembers()} + + self.assertEqual(expected, actual) + + class GNUReadTest(LongnameTest, ReadTest, unittest.TestCase): subdir = "gnu" diff --git a/Misc/ACKS b/Misc/ACKS index 49a8deb30fc83c..a5794a6eda52e4 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1544,6 +1544,7 @@ Ashwin Ramaswami Jeff Ramnani Grant Ramsay Bayard Randel +Eashwar Ranganathan Varpu Rantala Brodie Rao Rémi Rampin diff --git a/Misc/NEWS.d/next/Library/2025-11-18-06-35-53.gh-issue-141707.DBmQIy.rst b/Misc/NEWS.d/next/Library/2025-11-18-06-35-53.gh-issue-141707.DBmQIy.rst new file mode 100644 index 00000000000000..1f5b8ed90b8a90 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-11-18-06-35-53.gh-issue-141707.DBmQIy.rst @@ -0,0 +1,2 @@ +Don't change :class:`tarfile.TarInfo` type from ``AREGTYPE`` to ``DIRTYPE`` when parsing +GNU long name or link headers.