From 5c63521f2478f650400859f4fbb100db25adfadc Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 19 Jan 2026 06:24:17 +0100 Subject: [PATCH 1/2] File.dirname: add a spec for Shift JIS handling While trying to speedup various `File.*` methods, I realized they were way slower and complicated than they should for no apparent reason. However after asking Nobu he explained that Shift JIS encoded text can contain `0x5C` (ASCII backslash) as the second byte of a two byte character sequence. Since on Windows `0x5C` is `File::ALT_SEPARATOR`, this can easily break naive path related algorithms searching for directory separators. --- core/file/dirname_spec.rb | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/core/file/dirname_spec.rb b/core/file/dirname_spec.rb index 63436e19c..d215e3e03 100644 --- a/core/file/dirname_spec.rb +++ b/core/file/dirname_spec.rb @@ -78,6 +78,28 @@ def object.to_int; 2; end File.dirname("foo/../").should == "foo" end + it "rejects strings encoded with non ASCII-compatible encodings" do + Encoding.list.reject(&:ascii_compatible?).reject(&:dummy?).each do |enc| + path = "/foo/bar".encode(enc) + -> { + File.dirname(path) + }.should raise_error(Encoding::CompatibilityError) + end + end + + it "works with all ASCII-compatible encodings" do + Encoding.list.select(&:ascii_compatible?).each do |enc| + File.dirname("/foo/bar".encode(enc)).should == "/foo".encode(enc) + end + end + + it "handles Shift JIS 0x5C (\\) as second byte of a multi-byte sequence" do + # dir/fileソname.txt + path = "dir/file\x83\x5cname.txt".b.force_encoding(Encoding::SHIFT_JIS) + path.valid_encoding?.should be_true + File.dirname(path).should == "dir" + end + platform_is_not :windows do it "ignores repeated leading / (edge cases on non-windows)" do File.dirname("/////foo/bar/").should == "/foo" @@ -98,6 +120,13 @@ def object.to_int; 2; end File.dirname("//foo//").should == "//foo" File.dirname('/////').should == '//' end + + it "handles Shift JIS 0x5C (\\) as second byte of a multi-byte sequence (windows)" do + # dir/fileソname.txt + path = "dir\\file\x83\x5cname.txt".b.force_encoding(Encoding::SHIFT_JIS) + path.valid_encoding?.should be_true + File.dirname(path).should == "dir" + end end it "accepts an object that has a #to_path method" do From 1724265c7317dce4666a1fd39deffc08101f6fe7 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Tue, 20 Jan 2026 13:17:42 +0100 Subject: [PATCH 2/2] Update core/file/dirname_spec.rb --- core/file/dirname_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/file/dirname_spec.rb b/core/file/dirname_spec.rb index d215e3e03..1b006af78 100644 --- a/core/file/dirname_spec.rb +++ b/core/file/dirname_spec.rb @@ -122,7 +122,7 @@ def object.to_int; 2; end end it "handles Shift JIS 0x5C (\\) as second byte of a multi-byte sequence (windows)" do - # dir/fileソname.txt + # dir\fileソname.txt path = "dir\\file\x83\x5cname.txt".b.force_encoding(Encoding::SHIFT_JIS) path.valid_encoding?.should be_true File.dirname(path).should == "dir"