Skip to content

Commit

Permalink
unpack_strategy/directory: use mv for nested unpack
Browse files Browse the repository at this point in the history
`mv` should preserve hardlinks and allow faster unpack on the same
filesystem. A secondary pass is done with `cp` to copy over attributes
onto any existing directories.

We only run this for nested unpacks as most direct Directory strategy
usage is for repositories where moving files breaks existing code.

There are also potential user issues that could be due to Apple's
`cp -l` on specific macOS versions. Can consider re-adding `cp -l` with
improved fallback handling if there is noticeable advantages.
  • Loading branch information
cho-m committed Oct 19, 2024
1 parent 6f17b06 commit d26dec1
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 47 deletions.
106 changes: 83 additions & 23 deletions Library/Homebrew/test/unpack_strategy/directory_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
require_relative "shared_examples"

RSpec.describe UnpackStrategy::Directory do
subject(:strategy) { described_class.new(path) }

let(:path) do
mktmpdir.tap do |path|
FileUtils.touch path/"file"
Expand All @@ -17,33 +15,95 @@

let(:unpack_dir) { mktmpdir }

it "does not follow symlinks" do
strategy.extract(to: unpack_dir)
expect(unpack_dir/"symlink").to be_a_symlink
end
shared_examples "extract directory" do |move:|
subject(:strategy) { described_class.new(path, move:) }

it "does not follow top level symlinks to directories" do
strategy.extract(to: unpack_dir)
expect(unpack_dir/"folderSymlink").to be_a_symlink
end
it "does not follow symlinks" do
strategy.extract(to: unpack_dir)
expect(unpack_dir/"symlink").to be_a_symlink
end

it "preserves hardlinks" do
strategy.extract(to: unpack_dir)
expect((unpack_dir/"file").stat.ino).to eq (unpack_dir/"hardlink").stat.ino
end
it "does not follow top level symlinks to directories" do
strategy.extract(to: unpack_dir)
expect(unpack_dir/"folderSymlink").to be_a_symlink
end

it "preserves permissions of contained files" do
FileUtils.chmod 0644, path/"file"

strategy.extract(to: unpack_dir)
expect((unpack_dir/"file").stat.mode & 0777).to eq 0644
end

it "preserves permissions of contained subdirectories" do
FileUtils.mkdir unpack_dir/"folder"
FileUtils.chmod 0755, unpack_dir/"folder"
FileUtils.chmod 0700, path/"folder"

strategy.extract(to: unpack_dir)
expect((unpack_dir/"folder").stat.mode & 0777).to eq 0700
end

it "preserves permissions of the destination directory" do
FileUtils.chmod 0700, path
FileUtils.chmod 0755, unpack_dir

strategy.extract(to: unpack_dir)
expect(unpack_dir.stat.mode & 0777).to eq 0755
end

it "preserves permissions of contained files" do
FileUtils.chmod 0644, path/"file"
it "preserves mtime of contained files and directories" do
FileUtils.mkdir unpack_dir/"folder"
FileUtils.touch path/"folder", mtime: Time.utc(2000, 1, 2, 3, 4, 5, 678999), nocreate: true
mtimes = path.children.to_h { |child| [child.basename, child.lstat.mtime] }

strategy.extract(to: unpack_dir)
expect((unpack_dir/"file").stat.mode & 0777).to eq 0644
strategy.extract(to: unpack_dir)
expect(unpack_dir.children.to_h { |child| [child.basename, child.lstat.mtime] }).to eq mtimes
end

it "preserves unrelated destination files and subdirectories" do
FileUtils.touch unpack_dir/"existing_file"
FileUtils.mkdir unpack_dir/"existing_folder"

strategy.extract(to: unpack_dir)
expect(unpack_dir/"existing_file").to be_a_file
expect(unpack_dir/"existing_folder").to be_a_directory
end

it "overwrites destination files and symlinks" do
FileUtils.touch unpack_dir/"existing_file"
FileUtils.ln_s unpack_dir/"existing_file", unpack_dir/"symlink"
(unpack_dir/"file").write "existing contents"

strategy.extract(to: unpack_dir)
expect((unpack_dir/"file").read).to be_empty
expect((unpack_dir/"symlink").readlink).to eq Pathname("file")
end

it "fails when overwriting a nested directory with a file" do
FileUtils.touch path/"folder/nested"
FileUtils.mkdir_p unpack_dir/"folder/nested"
expect { strategy.extract(to: unpack_dir) }.to raise_error(/Is a directory|cannot overwrite directory/i)
end
end

context "with `move: false`" do
include_examples "extract directory", move: false
end

it "preserves the permissions of the destination directory" do
FileUtils.chmod 0700, path
FileUtils.chmod 0755, unpack_dir
context "with `move: true`" do
include_examples "extract directory", move: true

strategy.extract(to: unpack_dir)
expect(unpack_dir.stat.mode & 0777).to eq 0755
it "preserves hardlinks" do
strategy.extract(to: unpack_dir)
expect((unpack_dir/"file").stat.ino).to eq (unpack_dir/"hardlink").stat.ino
end

# NOTE: We don't test `move: false` because system cp behaviour is inconsistent,
# e.g. Ventura cp does not error but Sequoia and Linux cp will error
it "fails when overwriting a file with a directory" do
FileUtils.touch unpack_dir/"folder"
expect { strategy.extract(to: unpack_dir) }.to raise_error(/cannot overwrite non-directory/i)
end
end
end
2 changes: 1 addition & 1 deletion Library/Homebrew/unpack_strategy.rb
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def extract_nestedly(to: nil, basename: nil, verbose: false, prioritize_extensio
FileUtils.chmod "u+w", path, verbose:
end

Directory.new(tmp_unpack_dir).extract(to:, verbose:)
Directory.new(tmp_unpack_dir, move: true).extract(to:, verbose:)
end
end

Expand Down
72 changes: 49 additions & 23 deletions Library/Homebrew/unpack_strategy/directory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,59 @@ def self.can_extract?(path)
path.directory?
end

sig {
params(
path: T.any(String, Pathname),
ref_type: T.nilable(Symbol),
ref: T.nilable(String),
merge_xattrs: T::Boolean,
move: T::Boolean,
).void
}
def initialize(path, ref_type: nil, ref: nil, merge_xattrs: false, move: false)
super(path, ref_type:, ref:, merge_xattrs:)
@move = move
end

private

sig { override.params(unpack_dir: Pathname, basename: Pathname, verbose: T::Boolean).void }
def extract_to_dir(unpack_dir, basename:, verbose:)
path_children = path.children
return if path_children.empty?

existing = unpack_dir.children

# We run a few cp attempts in the following order:
#
# 1. Start with `-al` to create hardlinks rather than copying files if the source and
# target are on the same filesystem. On macOS, this is the only cp option that can
# preserve hardlinks but it is only available since macOS 12.3 (file_cmds-353.100.22).
# 2. Try `-a` as GNU `cp -a` preserves hardlinks. macOS `cp -a` is identical to `cp -pR`.
# 3. Fall back on `-pR` to handle the case where GNU `cp -a` failed. This may happen if
# installing into a filesystem that doesn't support hardlinks like an exFAT USB drive.
cp_arg_attempts = ["-a", "-pR"]
cp_arg_attempts.unshift("-al") if path.stat.dev == unpack_dir.stat.dev

cp_arg_attempts.each do |arg|
args = [arg, *path_children, unpack_dir]
must_succeed = print_stderr = (arg == cp_arg_attempts.last)
result = system_command("cp", args:, verbose:, must_succeed:, print_stderr:)
break if result.success?

FileUtils.rm_r(unpack_dir.children - existing)
move_to_dir(unpack_dir, verbose:) if @move

path.each_child do |child|
system_command! "cp",
args: ["-pR", (child.directory? && !child.symlink?) ? "#{child}/." : child,
unpack_dir/child.basename],
verbose:
end
end

# Move files and non-conflicting directories from `path` to `unpack_dir`.
# Already existing subdirectories in `unpack_dir` are not modified.
#
# @raise [RuntimeError] on unsupported mv operation, e.g. overwriting a file with a directory
sig { params(unpack_dir: Pathname, verbose: T::Boolean).void }
def move_to_dir(unpack_dir, verbose:)
path.find(ignore_error: false) do |src|
next if src == path

dst = unpack_dir/src.relative_path_from(path)
if dst.exist?
dst_real_dir = dst.directory? && !dst.symlink?
src_real_dir = src.directory? && !src.symlink?
# Avoid moving a directory over an existing non-directory and vice versa.
# This outputs the same error message as GNU mv which is more user friendly than macOS mv.
raise "mv: cannot overwrite non-directory '#{dst}' with directory '#{src}'" if src_real_dir && !dst_real_dir
raise "mv: cannot overwrite directory '#{dst}' with non-directory '#{src}'" if !src_real_dir && dst_real_dir
# Defer writing over existing directories. Handle this later on to copy attributes
next if dst_real_dir

FileUtils.rm(dst, verbose:)
end

FileUtils.mv(src, dst, verbose:)
Find.prune
end
end
end
Expand Down

0 comments on commit d26dec1

Please sign in to comment.