diff --git a/Lib/tarfile.py b/Lib/tarfile.py index d0e7dec5575047..1394a26f2096ff 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -830,16 +830,22 @@ def _get_filtered_attrs(member, dest_path, for_data=True): if member.islnk() or member.issym(): if os.path.isabs(member.linkname): raise AbsoluteLinkError(member) + # A link member that resolves to the destination directory itself + # would replace it with a (sym)link, redirecting the destination + # for all subsequent members. + if target_path == dest_path: + raise OutsideDestinationError(member, target_path) normalized = os.path.normpath(member.linkname) if normalized != member.linkname: new_attrs['linkname'] = normalized if member.issym(): - target_path = os.path.join(dest_path, - os.path.dirname(name), - member.linkname) + # The symlink is created at `name` with trailing separators + # stripped, so its target is relative to the directory + # containing that path. + link_dir = os.path.dirname(name.rstrip('/' + os.sep)) + target_path = os.path.join(dest_path, link_dir, normalized) else: - target_path = os.path.join(dest_path, - member.linkname) + target_path = os.path.join(dest_path, normalized) target_path = os.path.realpath(target_path, strict=os.path.ALLOW_MISSING) if os.path.commonpath([target_path, dest_path]) != dest_path: diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index e270cbb22e2d1a..192c948edc6056 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -3911,10 +3911,19 @@ def test_parent_symlink(self): + "which is outside the destination") with self.check_context(arc.open(), 'data'): - self.expect_exception( - tarfile.LinkOutsideDestinationError, - """'parent' would link to ['"].*outerdir['"], """ - + "which is outside the destination") + if self.dotdot_resolves_early: + # 'current/../..' normalises to '..', which is rejected. + self.expect_exception( + tarfile.LinkOutsideDestinationError, + """'parent' would link to ['"].*outerdir['"], """ + + "which is outside the destination") + else: + # 'current/..' normalises to '.'; the rewritten link is + # created and 'parent/evil' lands harmlessly inside the + # destination. + self.expect_file('current', symlink_to='.') + self.expect_file('parent', symlink_to='.') + self.expect_file('evil') else: # No symlink support. The symlinks are ignored. @@ -4174,6 +4183,76 @@ def test_sly_relative2(self): + """['"].*moo['"], which is outside the """ + "destination") + @symlink_test + @os_helper.skip_unless_symlink + def test_normpath_realpath_mismatch(self): + # The link-target check must validate the value that will actually + # be written to disk (the normalised linkname), not the original. + # Here 'a' is a symlink to a deep nonexistent path, so realpath() + # of 'a/../../...' stays inside the destination while normpath() + # collapses 'a/..' lexically and escapes. + depth = len(self.destdir.parts) + 5 + deep = '/'.join(f'p{i}' for i in range(depth)) + sneaky = 'a/' + '../' * depth + 'flag' + for kind in 'symlink_to', 'hardlink_to': + with self.subTest(kind): + with ArchiveMaker() as arc: + arc.add('a', symlink_to=deep) + arc.add('escape', **{kind: sneaky}) + with self.check_context(arc.open(), 'data'): + self.expect_exception( + tarfile.LinkOutsideDestinationError) + + @symlink_test + @os_helper.skip_unless_symlink + def test_symlink_trailing_slash(self): + # A trailing slash on a symlink member's name must not cause the + # link target to be resolved relative to the wrong directory. + with ArchiveMaker() as arc: + t = tarfile.TarInfo('x/') + t.type = tarfile.SYMTYPE + t.linkname = '..' + arc.tar_w.addfile(t) + arc.add('x/escaped', content='hi') + + with self.check_context(arc.open(), 'data'): + self.expect_exception(tarfile.LinkOutsideDestinationError) + + @symlink_test + @os_helper.skip_unless_symlink + def test_link_at_destination(self): + # A link member whose name resolves to the destination directory + # itself must be rejected: otherwise the destination is replaced + # by a symlink and later members can be redirected through it. + for name in '', '.', './': + with ArchiveMaker() as arc: + t = tarfile.TarInfo(name) + t.type = tarfile.SYMTYPE + t.linkname = '.' + arc.tar_w.addfile(t) + + with self.check_context(arc.open(), 'data'): + self.expect_exception(tarfile.OutsideDestinationError) + + @symlink_test + @os_helper.skip_unless_symlink + def test_empty_name_symlink_chain(self): + # Regression test for a chain of empty-named symlinks that + # incrementally redirects the destination outwards. + with ArchiveMaker() as arc: + for name, target in [('', ''), ('a/', '..'), + ('', 'dummy'), ('', 'a'), + ('b/', '..'), + ('', 'dummy'), ('', 'a/b')]: + t = tarfile.TarInfo(name) + t.type = tarfile.SYMTYPE + t.linkname = target + arc.tar_w.addfile(t) + arc.add('escaped', content='hi') + + with self.check_context(arc.open(), 'data'): + self.expect_exception(tarfile.FilterError) + @symlink_test def test_deep_symlink(self): # Test that symlinks and hardlinks inside a directory diff --git a/Misc/NEWS.d/next/Security/2026-05-03-21-00-00.gh-issue-149486.tarflt.rst b/Misc/NEWS.d/next/Security/2026-05-03-21-00-00.gh-issue-149486.tarflt.rst new file mode 100644 index 00000000000000..7c69edb683cf80 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-05-03-21-00-00.gh-issue-149486.tarflt.rst @@ -0,0 +1,5 @@ +:func:`tarfile.data_filter` now validates link targets using the same +normalised value that is written to disk, strips trailing separators from +the member name when resolving a symlink's directory, and rejects link +members that would replace the destination directory itself. This closes +several path-traversal bypasses of the ``data`` extraction filter.