aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorluislavena <luislavena@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-08-24 03:44:56 +0000
committerluislavena <luislavena@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-08-24 03:44:56 +0000
commit86df08dac6fc4642a6bac7bab6ccd5f44b7a293c (patch)
tree8629b0ba3bf876488b74145b455e85cbd030e469
parentd212a89c380683490dc735c770855f08391540a1 (diff)
downloadruby-86df08dac6fc4642a6bac7bab6ccd5f44b7a293c.tar.gz
Improve require/File.expand_path performance on Windows
* configure.in (mingw): add shlwapi to the list of dependency libs for Windows. * win32/Makefile.sub (EXTSOLIBS): ditto. * internal.h: declare internal functions rb_w32_init_file, rb_file_expand_path_internal and rb_file_expand_path_fast. * file.c (Init_File): invoke Windows initialization rb_w32_init_file * win32/file.c (rb_file_load_path_internal): new function. Windows-specific implementation that replaces file_expand_path. [Bug #6836][ruby-core:46996] * win32/file.c (rb_w32_init_file): new function. Initialize codepage cache for faster conversion encodings lookup. * file.c (file_expand_path): rename to rb_file_expand_path_internal. Conditionally exclude from Windows. * file.c (rb_file_expand_path_fast): new function. delegates to rb_file_expand_path_internal without performing a hit to the filesystem. * file.c (file_expand_path_1): use rb_file_expand_path_internal without path expansion (used by require). * file.c (rb_find_file_ext_safe): ditto. * file.c (rb_find_file_safe): ditto. * load.c (rb_get_expanded_load_path): use rb_file_expand_path_fast. * load.c (rb_feature_provided): ditto. * file.c (rb_file_expand_path): use rb_file_expand_path_internal with path expansion. * file.c (rb_file_absolute_path): ditto. * test/ruby/test_file_exhaustive.rb: new tests to exercise rb_file_expand_path_internal implementation and compliance with existing behaviors. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@36811 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog41
-rw-r--r--configure.in2
-rw-r--r--file.c31
-rw-r--r--internal.h7
-rw-r--r--load.c4
-rw-r--r--test/ruby/test_file_exhaustive.rb206
-rw-r--r--win32/.time0
-rw-r--r--win32/Makefile.sub2
-rw-r--r--win32/file.c629
9 files changed, 909 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index b710091d25..a9987cc284 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,44 @@
+Fri Aug 24 12:40:15 2012 Luis Lavena <luislavena@gmail.com>
+
+ * configure.in (mingw): add shlwapi to the list of dependency
+ libs for Windows.
+ * win32/Makefile.sub (EXTSOLIBS): ditto.
+
+ * internal.h: declare internal functions rb_w32_init_file,
+ rb_file_expand_path_internal and rb_file_expand_path_fast.
+
+ * file.c (Init_File): invoke Windows initialization rb_w32_init_file
+
+ * win32/file.c (rb_file_load_path_internal): new function.
+ Windows-specific implementation that replaces file_expand_path.
+ [Bug #6836][ruby-core:46996]
+
+ * win32/file.c (rb_w32_init_file): new function. Initialize codepage
+ cache for faster conversion encodings lookup.
+
+ * file.c (file_expand_path): rename to rb_file_expand_path_internal.
+ Conditionally exclude from Windows.
+
+ * file.c (rb_file_expand_path_fast): new function. delegates to
+ rb_file_expand_path_internal without performing a hit to the
+ filesystem.
+
+ * file.c (file_expand_path_1): use rb_file_expand_path_internal without
+ path expansion (used by require).
+ * file.c (rb_find_file_ext_safe): ditto.
+ * file.c (rb_find_file_safe): ditto.
+
+ * load.c (rb_get_expanded_load_path): use rb_file_expand_path_fast.
+ * load.c (rb_feature_provided): ditto.
+
+ * file.c (rb_file_expand_path): use rb_file_expand_path_internal with
+ path expansion.
+ * file.c (rb_file_absolute_path): ditto.
+
+ * test/ruby/test_file_exhaustive.rb: new tests to exercise
+ rb_file_expand_path_internal implementation and compliance with
+ existing behaviors.
+
Fri Aug 24 07:35:24 2012 Eric Hodel <drbrain@segment7.net>
* lib/net/http/backward.rb (class Net): Restored Net::HTTPSession to
diff --git a/configure.in b/configure.in
index 4a915456f0..b490c45219 100644
--- a/configure.in
+++ b/configure.in
@@ -1186,7 +1186,7 @@ main()
AC_CHECK_FUNCS(cygwin_conv_path)
AC_LIBOBJ([langinfo])
],
-[mingw*], [ LIBS="-lshell32 -lws2_32 -limagehlp $LIBS"
+[mingw*], [ LIBS="-lshell32 -lws2_32 -limagehlp -lshlwapi $LIBS"
ac_cv_header_a_out_h=no
ac_cv_header_pwd_h=no
ac_cv_header_utime_h=no
diff --git a/file.c b/file.c
index 24aa1115fe..8c63f56d01 100644
--- a/file.c
+++ b/file.c
@@ -2882,8 +2882,9 @@ append_fspath(VALUE result, VALUE fname, char *dir, rb_encoding **enc, rb_encodi
return buf + dirlen;
}
-static VALUE
-file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result)
+#ifndef _WIN32
+VALUE
+rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_name, VALUE result)
{
const char *s, *b, *fend;
char *buf, *p, *pend, *root;
@@ -2945,7 +2946,7 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result)
/* specified drive, but not full path */
int same = 0;
if (!NIL_P(dname) && !not_same_drive(dname, s[0])) {
- file_expand_path(dname, Qnil, abs_mode, result);
+ rb_file_expand_path_internal(dname, Qnil, abs_mode, long_name, result);
BUFINIT();
if (has_drive_letter(p) && TOLOWER(p[0]) == TOLOWER(s[0])) {
/* ok, same drive */
@@ -2969,7 +2970,7 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result)
#endif
else if (!rb_is_absolute_path(s)) {
if (!NIL_P(dname)) {
- file_expand_path(dname, Qnil, abs_mode, result);
+ rb_file_expand_path_internal(dname, Qnil, abs_mode, long_name, result);
rb_enc_associate(result, rb_enc_check(result, fname));
BUFINIT();
p = pend;
@@ -3222,6 +3223,7 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result)
ENC_CODERANGE_CLEAR(result);
return result;
}
+#endif /* _WIN32 */
#define EXPAND_PATH_BUFFER() rb_usascii_str_new(0, MAXPATHLEN + 2)
@@ -3232,14 +3234,21 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result)
static VALUE
file_expand_path_1(VALUE fname)
{
- return file_expand_path(fname, Qnil, 0, EXPAND_PATH_BUFFER());
+ return rb_file_expand_path_internal(fname, Qnil, 0, 0, EXPAND_PATH_BUFFER());
}
VALUE
rb_file_expand_path(VALUE fname, VALUE dname)
{
check_expand_path_args(fname, dname);
- return file_expand_path(fname, dname, 0, EXPAND_PATH_BUFFER());
+ return rb_file_expand_path_internal(fname, dname, 0, 1, EXPAND_PATH_BUFFER());
+}
+
+VALUE
+rb_file_expand_path_fast(VALUE fname, VALUE dname)
+{
+ check_expand_path_args(fname, dname);
+ return rb_file_expand_path_internal(fname, dname, 0, 0, EXPAND_PATH_BUFFER());
}
/*
@@ -3276,7 +3285,7 @@ VALUE
rb_file_absolute_path(VALUE fname, VALUE dname)
{
check_expand_path_args(fname, dname);
- return file_expand_path(fname, dname, 1, EXPAND_PATH_BUFFER());
+ return rb_file_expand_path_internal(fname, dname, 1, 1, EXPAND_PATH_BUFFER());
}
/*
@@ -5251,7 +5260,7 @@ rb_find_file_ext_safe(VALUE *filep, const char *const *ext, int safe_level)
RB_GC_GUARD(str) = rb_get_path_check(str, safe_level);
if (RSTRING_LEN(str) == 0) continue;
- file_expand_path(fname, str, 0, tmp);
+ rb_file_expand_path_internal(fname, str, 0, 0, tmp);
if (rb_file_load_ok(RSTRING_PTR(tmp))) {
*filep = copy_path_class(tmp, *filep);
return (int)(j+1);
@@ -5311,7 +5320,7 @@ rb_find_file_safe(VALUE path, int safe_level)
VALUE str = RARRAY_PTR(load_path)[i];
RB_GC_GUARD(str) = rb_get_path_check(str, safe_level);
if (RSTRING_LEN(str) > 0) {
- file_expand_path(path, str, 0, tmp);
+ rb_file_expand_path_internal(path, str, 0, 0, tmp);
f = RSTRING_PTR(tmp);
if (rb_file_load_ok(f)) goto found;
}
@@ -5546,4 +5555,8 @@ Init_File(void)
rb_define_method(rb_cStat, "setuid?", rb_stat_suid, 0);
rb_define_method(rb_cStat, "setgid?", rb_stat_sgid, 0);
rb_define_method(rb_cStat, "sticky?", rb_stat_sticky, 0);
+
+#ifdef _WIN32
+ rb_w32_init_file();
+#endif
}
diff --git a/internal.h b/internal.h
index c091a1c49e..ed4864c1eb 100644
--- a/internal.h
+++ b/internal.h
@@ -105,8 +105,15 @@ VALUE rb_home_dir(const char *user, VALUE result);
VALUE rb_realpath_internal(VALUE basedir, VALUE path, int strict);
void rb_file_const(const char*, VALUE);
int rb_file_load_ok(const char *);
+VALUE rb_file_expand_path_fast(VALUE, VALUE);
+VALUE rb_file_expand_path_internal(VALUE, VALUE, int, int, VALUE);
void Init_File(void);
+#ifdef _WIN32
+/* file.c, win32/file.c */
+void rb_w32_init_file(void);
+#endif
+
/* gc.c */
void Init_heap(void);
void *ruby_mimmalloc(size_t size);
diff --git a/load.c b/load.c
index 7f1b3f65b8..b0e6f16e4f 100644
--- a/load.c
+++ b/load.c
@@ -43,7 +43,7 @@ rb_get_expanded_load_path(void)
ary = rb_ary_new2(RARRAY_LEN(load_path));
for (i = 0; i < RARRAY_LEN(load_path); ++i) {
- VALUE path = rb_file_expand_path(RARRAY_PTR(load_path)[i], Qnil);
+ VALUE path = rb_file_expand_path_fast(RARRAY_PTR(load_path)[i], Qnil);
rb_str_freeze(path);
rb_ary_push(ary, path);
}
@@ -233,7 +233,7 @@ rb_feature_provided(const char *feature, const char **loading)
if (*feature == '.' &&
(feature[1] == '/' || strncmp(feature+1, "./", 2) == 0)) {
- fullpath = rb_file_expand_path(rb_str_new2(feature), Qnil);
+ fullpath = rb_file_expand_path_fast(rb_str_new2(feature), Qnil);
feature = RSTRING_PTR(fullpath);
}
if (ext && !strchr(ext, '/')) {
diff --git a/test/ruby/test_file_exhaustive.rb b/test/ruby/test_file_exhaustive.rb
index 7467bffdfe..1523823bb7 100644
--- a/test/ruby/test_file_exhaustive.rb
+++ b/test/ruby/test_file_exhaustive.rb
@@ -14,6 +14,7 @@ class TestFileExhaustive < Test::Unit::TestCase
def setup
@dir = Dir.mktmpdir("rubytest-file")
+ @rootdir = "#{DRIVE}/"
File.chown(-1, Process.gid, @dir)
@file = make_tmp_filename("file")
@zerofile = make_tmp_filename("zerofile")
@@ -450,9 +451,24 @@ class TestFileExhaustive < Test::Unit::TestCase
assert_equal(expected.force_encoding(cp), File.expand_path(a.dup.force_encoding(cp)), cp)
end
+ path = "\u3042\u3044\u3046\u3048\u304a".encode("EUC-JP")
+ assert_equal("#{Dir.pwd}/#{path}".encode("CP932"), File.expand_path(path).encode("CP932"))
+
+ path = "\u3042\u3044\u3046\u3048\u304a".encode("CP51932")
+ assert_equal("#{Dir.pwd}/#{path}", File.expand_path(path))
+
assert_incompatible_encoding {|d| File.expand_path(d)}
end
+ def test_expand_path_encoding_filesystem
+ path = "~".encode("US-ASCII")
+ dir = "C:/".encode("IBM437")
+ fs = Encoding.find("filesystem")
+
+ assert_equal fs, File.expand_path(path).encoding
+ assert_equal fs, File.expand_path(path, dir).encoding
+ end
+
def test_expand_path_home
assert_kind_of(String, File.expand_path("~")) if ENV["HOME"]
assert_raise(ArgumentError) { File.expand_path("~foo_bar_baz_unknown_user_wahaha") }
@@ -460,7 +476,13 @@ class TestFileExhaustive < Test::Unit::TestCase
begin
bug3630 = '[ruby-core:31537]'
home = ENV["HOME"]
+ home_drive = ENV["HOMEDRIVE"]
+ home_path = ENV["HOMEPATH"]
+ user_profile = ENV["USERPROFILE"]
ENV["HOME"] = nil
+ ENV["HOMEDRIVE"] = nil
+ ENV["HOMEPATH"] = nil
+ ENV["USERPROFILE"] = nil
assert_raise(ArgumentError) { File.expand_path("~") }
ENV["HOME"] = "~"
assert_raise(ArgumentError, bug3630) { File.expand_path("~") }
@@ -468,9 +490,193 @@ class TestFileExhaustive < Test::Unit::TestCase
assert_raise(ArgumentError, bug3630) { File.expand_path("~") }
ensure
ENV["HOME"] = home
+ ENV["HOMEDRIVE"] = home_drive
+ ENV["HOMEPATH"] = home_path
+ ENV["USERPROFILE"] = user_profile
+ end
+ end
+
+ def test_expand_path_remove_trailing_alternative_data
+ assert_equal File.join(@rootdir, "aaa"), File.expand_path("#{@rootdir}/aaa::$DATA")
+ assert_equal File.join(@rootdir, "aa:a"), File.expand_path("#{@rootdir}/aa:a:$DATA")
+ assert_equal File.join(@rootdir, "aaa:$DATA"), File.expand_path("#{@rootdir}/aaa:$DATA")
+ end if DRIVE
+
+ def test_expand_path_resolve_empty_string_current_directory
+ assert_equal(Dir.pwd, File.expand_path(""))
+ end
+
+ def test_expand_path_resolve_dot_current_directory
+ assert_equal(Dir.pwd, File.expand_path("."))
+ end
+
+ def test_expand_path_resolve_file_name_relative_current_directory
+ assert_equal(File.join(Dir.pwd, "foo"), File.expand_path("foo"))
+ end
+
+ def test_ignore_nil_dir_string
+ assert_equal(File.join(Dir.pwd, "foo"), File.expand_path("foo", nil))
+ end
+
+ def test_expand_path_resolve_file_name_and_dir_string_relative
+ assert_equal(File.join(Dir.pwd, "bar", "foo"),
+ File.expand_path("foo", "bar"))
+ end
+
+ def test_expand_path_cleanup_dots_file_name
+ bug = "[ruby-talk:18512]"
+
+ assert_equal(File.join(Dir.pwd, ".a"), File.expand_path(".a"), bug)
+ assert_equal(File.join(Dir.pwd, "..a"), File.expand_path("..a"), bug)
+
+ if DRIVE
+ # cleanup dots only on Windows
+ assert_equal(File.join(Dir.pwd, "a"), File.expand_path("a."), bug)
+ skip "FIXME"
+ assert_equal(File.join(Dir.pwd, "a"), File.expand_path("a.."), bug)
+ else
+ assert_equal(File.join(Dir.pwd, "a."), File.expand_path("a."), bug)
+ assert_equal(File.join(Dir.pwd, "a.."), File.expand_path("a.."), bug)
+ end
+ end
+
+ def test_expand_path_converts_a_pathname_to_an_absolute_pathname_using_a_complete_path
+ assert_equal(@dir, File.expand_path("", "#{@dir}"))
+ assert_equal(File.join(@dir, "a"), File.expand_path("a", "#{@dir}"))
+ assert_equal(File.join(@dir, "a"), File.expand_path("../a", "#{@dir}/xxx"))
+ assert_equal(@rootdir, File.expand_path(".", "#{@rootdir}"))
+ end
+
+ def test_expand_path_ignores_supplied_dir_if_path_contains_a_drive_letter
+ assert_equal(@rootdir, File.expand_path(@rootdir, "D:/"))
+ end if DRIVE
+
+ def test_expand_path_removes_trailing_slashes_from_absolute_path
+ assert_equal(File.join(@rootdir, "foo"), File.expand_path("#{@rootdir}foo/"))
+ assert_equal(File.join(@rootdir, "foo.rb"), File.expand_path("#{@rootdir}foo.rb/"))
+ end
+
+ def test_expand_path_removes_trailing_spaces_from_absolute_path
+ assert_equal(File.join(@rootdir, "a"), File.expand_path("#{@rootdir}a "))
+ end if DRIVE
+
+ def test_expand_path_converts_a_pathname_which_starts_with_a_slash_using_dir_s_drive
+ assert_match(%r"\Az:/foo\z"i, File.expand_path('/foo', "z:/bar"))
+ end if DRIVE
+
+ def test_expand_path_converts_a_pathname_which_starts_with_a_slash_and_unc_pathname
+ assert_equal("//foo", File.expand_path('//foo', "//bar"))
+ assert_equal("//bar/foo", File.expand_path('/foo', "//bar"))
+ assert_equal("//foo", File.expand_path('//foo', "/bar"))
+ end if DRIVE
+
+ def test_expand_path_converts_a_dot_with_unc_dir
+ assert_equal("//", File.expand_path('.', "//"))
+ end
+
+ def test_expand_path_preserves_unc_path_root
+ assert_equal("//", File.expand_path("//"))
+ assert_equal("//", File.expand_path("//."))
+ assert_equal("//", File.expand_path("//.."))
+ end
+
+ def test_expand_path_converts_a_pathname_which_starts_with_a_slash_using_host_share
+ assert_match(%r"\A//host/share/foo\z"i, File.expand_path('/foo', "//host/share/bar"))
+ end if DRIVE
+
+ def test_expand_path_converts_a_pathname_which_starts_with_a_slash_using_a_current_drive
+ assert_match(%r"\A#{DRIVE}/foo\z"i, File.expand_path('/foo'))
+ end
+
+ def test_expand_path_returns_tainted_strings_or_not
+ assert_equal(true, File.expand_path('foo').tainted?)
+ assert_equal(true, File.expand_path('foo'.taint).tainted?)
+ assert_equal(true, File.expand_path('/foo'.taint).tainted?)
+ assert_equal(true, File.expand_path('foo', 'bar').tainted?)
+ assert_equal(true, File.expand_path('foo', '/bar'.taint).tainted?)
+ assert_equal(true, File.expand_path('foo'.taint, '/bar').tainted?)
+ assert_equal(true, File.expand_path('~').tainted?)
+
+ if DRIVE
+ assert_equal(true, File.expand_path('/foo').tainted?)
+ assert_equal(false, File.expand_path('//foo').tainted?)
+ assert_equal(true, File.expand_path('C:/foo'.taint).tainted?)
+ assert_equal(false, File.expand_path('C:/foo').tainted?)
+ assert_equal(true, File.expand_path('foo', '/bar').tainted?)
+ assert_equal(true, File.expand_path('foo', 'C:/bar'.taint).tainted?)
+ assert_equal(true, File.expand_path('foo'.taint, 'C:/bar').tainted?)
+ assert_equal(false, File.expand_path('foo', 'C:/bar').tainted?)
+ assert_equal(false, File.expand_path('C:/foo/../bar').tainted?)
+ assert_equal(false, File.expand_path('foo', '//bar').tainted?)
+ else
+ assert_equal(false, File.expand_path('/foo').tainted?)
+ assert_equal(false, File.expand_path('foo', '/bar').tainted?)
end
end
+ def test_expand_path_converts_a_pathname_to_an_absolute_pathname_using_home_as_base
+ old_home = ENV["HOME"]
+ home = ENV["HOME"] = "#{DRIVE}/UserHome"
+ assert_equal(home, File.expand_path("~"))
+ assert_equal(home, File.expand_path("~", "C:/FooBar"))
+ assert_equal(File.join(home, "a"), File.expand_path("~/a", "C:/FooBar"))
+ ensure
+ ENV["HOME"] = old_home
+ end
+
+ def test_expand_path_converts_a_pathname_to_an_absolute_pathname_using_unc_home
+ old_home = ENV["HOME"]
+ unc_home = ENV["HOME"] = "//UserHome"
+ assert_equal(unc_home, File.expand_path("~"))
+ ensure
+ ENV["HOME"] = old_home
+ end if DRIVE
+
+ def test_expand_path_does_not_modify_a_home_string_argument
+ old_home = ENV["HOME"]
+ home = ENV["HOME"] = "#{DRIVE}/UserHome"
+ str = "~/a"
+ assert_equal("#{home}/a", File.expand_path(str))
+ assert_equal("~/a", str)
+ ensure
+ ENV["HOME"] = old_home
+ end
+
+ def test_expand_path_raises_argument_error_for_any_supplied_username
+ bug = '[ruby-core:39597]'
+ assert_raise(ArgumentError, bug) { File.expand_path("~anything") }
+ end if DRIVE
+
+ def test_expand_path_raises_a_type_error_if_not_passed_a_string_type
+ assert_raise(TypeError) { File.expand_path(1) }
+ assert_raise(TypeError) { File.expand_path(nil) }
+ assert_raise(TypeError) { File.expand_path(true) }
+ end
+
+ def test_expand_path_expands_dot_dir
+ assert_equal("#{DRIVE}/dir", File.expand_path("#{DRIVE}/./dir"))
+ end
+
+ def test_expand_path_does_not_modify_the_string_argument
+ str = "./a/b/../c"
+ assert_equal("#{Dir.pwd}/a/c", File.expand_path(str, Dir.pwd))
+ assert_equal("./a/b/../c", str)
+ end
+
+ def test_expand_path_returns_a_string_when_passed_a_string_subclass
+ sub = Class.new(String)
+ str = sub.new "./a/b/../c"
+ path = File.expand_path(str, Dir.pwd)
+ assert_equal("#{Dir.pwd}/a/c", path)
+ assert_instance_of(String, path)
+ end
+
+ def test_expand_path_accepts_objects_that_have_a_to_path_method
+ klass = Class.new { def to_path; "a/b/c"; end }
+ obj = klass.new
+ assert_equal("#{Dir.pwd}/a/b/c", File.expand_path(obj))
+ end
+
def test_basename
assert_equal(File.basename(@file).sub(/\.test$/, ""), File.basename(@file, ".test"))
assert_equal("", s = File.basename(""))
diff --git a/win32/.time b/win32/.time
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/win32/.time
diff --git a/win32/Makefile.sub b/win32/Makefile.sub
index 13e6471f4b..fe33455ecf 100644
--- a/win32/Makefile.sub
+++ b/win32/Makefile.sub
@@ -226,7 +226,7 @@ EXTLIBS =
EXTSOLIBS =
!endif
!if !defined(LIBS)
-LIBS = oldnames.lib user32.lib advapi32.lib shell32.lib ws2_32.lib imagehlp.lib $(EXTLIBS)
+LIBS = oldnames.lib user32.lib advapi32.lib shell32.lib ws2_32.lib imagehlp.lib shlwapi.lib $(EXTLIBS)
!endif
!if !defined(MISSING)
MISSING = acosh.obj cbrt.obj crypt.obj erf.obj ffs.obj langinfo.obj lgamma_r.obj strlcat.obj strlcpy.obj tgamma.obj win32/win32.obj win32/file.obj setproctitle.obj
diff --git a/win32/file.c b/win32/file.c
index 955f91bc67..c8bb445c82 100644
--- a/win32/file.c
+++ b/win32/file.c
@@ -1,10 +1,630 @@
#include "ruby/ruby.h"
+#include "ruby/encoding.h"
#include <winbase.h>
+#include <wchar.h>
+#include <shlwapi.h>
#ifndef INVALID_FILE_ATTRIBUTES
# define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
#endif
+/* cache 'encoding name' => 'code page' into a hash */
+static VALUE rb_code_page;
+
+#define IS_DIR_SEPARATOR_P(c) (c == L'\\' || c == L'/')
+#define IS_DIR_UNC_P(c) (IS_DIR_SEPARATOR_P(c[0]) && IS_DIR_SEPARATOR_P(c[1]))
+
+/* MultiByteToWideChar() doesn't work with code page 51932 */
+#define INVALID_CODE_PAGE 51932
+#define PATH_BUFFER_SIZE MAX_PATH * 2
+
+#define insecure_obj_p(obj, level) ((level) >= 4 || ((level) > 0 && OBJ_TAINTED(obj)))
+
+static inline void
+replace_wchar(wchar_t *s, int find, int replace)
+{
+ while (*s != 0) {
+ if (*s == find)
+ *s = replace;
+ s++;
+ }
+}
+
+/* Convert str from multibyte char to wchar with specified code page */
+static inline void
+convert_mb_to_wchar(VALUE str, wchar_t **wstr, wchar_t **wstr_pos, size_t *wstr_len, UINT code_page)
+{
+ size_t len;
+
+ if (NIL_P(str))
+ return;
+
+ len = MultiByteToWideChar(code_page, 0, RSTRING_PTR(str), -1, NULL, 0) + 1;
+ *wstr = (wchar_t *)xmalloc(len * sizeof(wchar_t));
+ if (wstr_pos)
+ *wstr_pos = *wstr;
+
+ MultiByteToWideChar(code_page, 0, RSTRING_PTR(str), -1, *wstr, len);
+ *wstr_len = len - 2;
+}
+
+static inline void
+convert_wchar_to_mb(const wchar_t *wstr, char **str, size_t *str_len, UINT code_page)
+{
+ size_t len;
+
+ len = WideCharToMultiByte(code_page, 0, wstr, -1, NULL, 0, NULL, NULL);
+ *str = (char *)xmalloc(len * sizeof(char));
+ WideCharToMultiByte(code_page, 0, wstr, -1, *str, len, NULL, NULL);
+
+ /* do not count terminator as part of the string length */
+ *str_len = len - 1;
+}
+
+/*
+ Return user's home directory using environment variables combinations.
+ Memory allocated by this function should be manually freeded afterwards.
+
+ Try:
+ HOME, HOMEDRIVE + HOMEPATH and USERPROFILE environment variables
+ TODO: Special Folders - Profile and Personal
+*/
+static wchar_t *
+home_dir()
+{
+ wchar_t *buffer = NULL;
+ size_t buffer_len = 0, len = 0;
+ size_t home_env = 0;
+
+ /*
+ GetEnvironmentVariableW when used with NULL will return the required
+ buffer size and its terminating character.
+ http://msdn.microsoft.com/en-us/library/windows/desktop/ms683188(v=vs.85).aspx
+ */
+
+ if (len = GetEnvironmentVariableW(L"HOME", NULL, 0)) {
+ buffer_len = len;
+ home_env = 1;
+ }
+ else if (len = GetEnvironmentVariableW(L"HOMEDRIVE", NULL, 0)) {
+ buffer_len = len;
+ if (len = GetEnvironmentVariableW(L"HOMEPATH", NULL, 0)) {
+ buffer_len += len;
+ home_env = 2;
+ }
+ else {
+ buffer_len = 0;
+ }
+ }
+ else if (len = GetEnvironmentVariableW(L"USERPROFILE", NULL, 0)) {
+ buffer_len = len;
+ home_env = 3;
+ }
+
+ /* allocate buffer */
+ if (home_env)
+ buffer = (wchar_t *)xmalloc(buffer_len * sizeof(wchar_t));
+
+ switch (home_env) {
+ case 1:
+ /* HOME */
+ GetEnvironmentVariableW(L"HOME", buffer, buffer_len);
+ break;
+ case 2:
+ /* HOMEDRIVE + HOMEPATH */
+ len = GetEnvironmentVariableW(L"HOMEDRIVE", buffer, buffer_len);
+ GetEnvironmentVariableW(L"HOMEPATH", buffer + len, buffer_len - len);
+ break;
+ case 3:
+ /* USERPROFILE */
+ GetEnvironmentVariableW(L"USERPROFILE", buffer, buffer_len);
+ break;
+ default:
+ break;
+ }
+
+ if (home_env) {
+ /* sanitize backslashes with forwardslashes */
+ replace_wchar(buffer, L'\\', L'/');
+
+ return buffer;
+ }
+
+ return NULL;
+}
+
+/* Remove trailing invalid ':$DATA' of the path. */
+static inline size_t
+remove_invalid_alternative_data(wchar_t *wfullpath, size_t size) {
+ static const wchar_t prime[] = L":$DATA";
+ enum { prime_len = (sizeof(prime) / sizeof(wchar_t)) -1 };
+
+ if (size <= prime_len || _wcsnicmp(wfullpath + size - prime_len, prime, prime_len) != 0)
+ return size;
+
+ /* alias of stream */
+ /* get rid of a bug of x64 VC++ */
+ if (wfullpath[size - (prime_len + 1)] == ':') {
+ /* remove trailing '::$DATA' */
+ size -= prime_len + 1; /* prime */
+ wfullpath[size] = L'\0';
+ }
+ else {
+ /* remove trailing ':$DATA' of paths like '/aa:a:$DATA' */
+ wchar_t *pos = wfullpath + size - (prime_len + 1);
+ while (!IS_DIR_SEPARATOR_P(*pos) && pos != wfullpath) {
+ if (*pos == L':') {
+ size -= prime_len; /* alternative */
+ wfullpath[size] = L'\0';
+ break;
+ }
+ pos--;
+ }
+ }
+ return size;
+}
+
+/* Return system code page. */
+static inline UINT
+system_code_page() {
+ return AreFileApisANSI() ? CP_ACP : CP_OEMCP;
+}
+
+/*
+ Return code page number of the encoding.
+ Cache code page into a hash for performance since finding the code page in
+ Encoding#names is slow.
+*/
+static UINT
+code_page(rb_encoding *enc)
+{
+ VALUE code_page_value, name_key;
+ VALUE encoding, names_ary = Qundef, name;
+ char *enc_name;
+ struct RString fake_str;
+ ID names;
+ long i;
+
+ if (!enc)
+ return system_code_page();
+
+ enc_name = (char *)rb_enc_name(enc);
+
+ fake_str.basic.flags = T_STRING|RSTRING_NOEMBED;
+ fake_str.basic.klass = rb_cString;
+ fake_str.as.heap.len = strlen(enc_name);
+ fake_str.as.heap.ptr = enc_name;
+ fake_str.as.heap.aux.capa = fake_str.as.heap.len;
+ name_key = (VALUE)&fake_str;
+ ENCODING_CODERANGE_SET(name_key, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
+
+ code_page_value = rb_hash_lookup(rb_code_page, name_key);
+ if (code_page_value != Qnil)
+ return (UINT)FIX2INT(code_page_value);
+
+ name_key = rb_usascii_str_new2(enc_name);
+
+ encoding = rb_enc_from_encoding(enc);
+ if (!NIL_P(encoding)) {
+ CONST_ID(names, "names");
+ names_ary = rb_funcall(encoding, names, 0);
+ }
+
+ /* map US-ASCII and ASCII-8bit as code page 20127 (us-ascii) */
+ if (enc == rb_usascii_encoding() || enc == rb_ascii8bit_encoding()) {
+ UINT code_page = 20127;
+ rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page));
+ return code_page;
+ }
+
+ if (names_ary != Qundef) {
+ for (i = 0; i < RARRAY_LEN(names_ary); i++) {
+ name = RARRAY_PTR(names_ary)[i];
+ if (strncmp("CP", RSTRING_PTR(name), 2) == 0) {
+ int code_page = atoi(RSTRING_PTR(name) + 2);
+ if (code_page != 0) {
+ rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page));
+ return (UINT)code_page;
+ }
+ }
+ }
+ }
+
+ rb_hash_aset(rb_code_page, name_key, INT2FIX(INVALID_CODE_PAGE));
+ return INVALID_CODE_PAGE;
+}
+
+static inline VALUE
+fix_string_encoding(VALUE str, rb_encoding *encoding)
+{
+ VALUE result, tmp;
+
+ tmp = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), encoding);
+ result = rb_str_encode(tmp, rb_enc_from_encoding(rb_utf8_encoding()), 0, Qnil);
+
+ return result;
+}
+
+/*
+ Replace the last part of the path to long name.
+ We try to avoid to call FindFirstFileW() since it takes long time.
+*/
+static inline size_t
+replace_to_long_name(wchar_t **wfullpath, size_t size, int heap) {
+ WIN32_FIND_DATAW find_data;
+ HANDLE find_handle;
+
+ /*
+ Skip long name conversion if the path is already long name.
+ Short name is 8.3 format.
+ http://en.wikipedia.org/wiki/8.3_filename
+ This check can be skipped for directory components that have file
+ extensions longer than 3 characters, or total lengths longer than
+ 12 characters.
+ http://msdn.microsoft.com/en-us/library/windows/desktop/aa364980(v=vs.85).aspx
+ */
+ size_t const max_short_name_size = 8 + 1 + 3;
+ size_t const max_extension_size = 3;
+ size_t path_len = 1, extension_len = 0;
+ wchar_t *pos = *wfullpath;
+
+ if (size == 3 && pos[1] == L':' && pos[2] == L'\\' && pos[3] == L'\0') {
+ /* root path doesn't need short name expansion */
+ return size;
+ }
+
+ pos = *wfullpath + size - 1;
+ while (!IS_DIR_SEPARATOR_P(*pos) && pos != *wfullpath) {
+ if (!extension_len && *pos == L'.') {
+ extension_len = path_len - 1;
+ }
+ if (path_len > max_short_name_size || extension_len > max_extension_size) {
+ return size;
+ }
+ path_len++;
+ pos--;
+ }
+
+ find_handle = FindFirstFileW(*wfullpath, &find_data);
+ if (find_handle != INVALID_HANDLE_VALUE) {
+ size_t trail_pos = wcslen(*wfullpath);
+ size_t file_len = wcslen(find_data.cFileName);
+
+ FindClose(find_handle);
+ while (trail_pos > 0) {
+ if (IS_DIR_SEPARATOR_P((*wfullpath)[trail_pos]))
+ break;
+ trail_pos--;
+ }
+ size = trail_pos + 1 + file_len;
+ if ((size + 1) > sizeof(*wfullpath) / sizeof((*wfullpath)[0])) {
+ wchar_t *buf = (wchar_t *)xmalloc((size + 1) * sizeof(wchar_t));
+ wcsncpy(buf, *wfullpath, trail_pos + 1);
+ if (heap)
+ xfree(*wfullpath);
+ *wfullpath = buf;
+ }
+ wcsncpy(*wfullpath + trail_pos + 1, find_data.cFileName, file_len + 1);
+ }
+ return size;
+}
+
+VALUE
+rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_name, VALUE result)
+{
+ size_t size = 0, wpath_len = 0, wdir_len = 0, whome_len = 0;
+ size_t buffer_len = 0;
+ char *fullpath = NULL;
+ wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL, *wdir = NULL;
+ wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL;
+ UINT path_cp, cp;
+ VALUE path = fname, dir = dname;
+ wchar_t wfullpath_buffer[PATH_BUFFER_SIZE];
+ wchar_t path_drive = L'\0', dir_drive = L'\0';
+ int ignore_dir = 0;
+ rb_encoding *path_encoding;
+ int tainted = 0;
+
+ /* tainted if path is tainted */
+ tainted = OBJ_TAINTED(path);
+
+ /* get path encoding */
+ if (NIL_P(dir)) {
+ path_encoding = rb_enc_get(path);
+ }
+ else {
+ path_encoding = rb_enc_check(path, dir);
+ }
+
+ cp = path_cp = code_page(path_encoding);
+
+ /* workaround invalid codepage */
+ if (path_cp == INVALID_CODE_PAGE) {
+ cp = CP_UTF8;
+ if (!NIL_P(path)) {
+ path = fix_string_encoding(path, path_encoding);
+ }
+ }
+
+ /* convert char * to wchar_t */
+ convert_mb_to_wchar(path, &wpath, &wpath_pos, &wpath_len, cp);
+
+ /* determine if we need the user's home directory */
+ /* expand '~' only if NOT rb_file_absolute_path() where `abs_mode` is 1 */
+ if (abs_mode == 0 && ((wpath_len == 1 && wpath_pos[0] == L'~') ||
+ (wpath_len >= 2 && wpath_pos[0] == L'~' && IS_DIR_SEPARATOR_P(wpath_pos[1])))) {
+ /* tainted if expanding '~' */
+ tainted = 1;
+
+ whome = home_dir();
+ if (whome == NULL) {
+ xfree(wpath);
+ rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'");
+ }
+ whome_len = wcslen(whome);
+
+ if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) {
+ xfree(wpath);
+ rb_raise(rb_eArgError, "non-absolute home");
+ }
+
+ /* use filesystem encoding if expanding home dir */
+ path_encoding = rb_filesystem_encoding();
+ cp = path_cp = system_code_page();
+
+ /* ignores dir since we are expading home */
+ ignore_dir = 1;
+
+ /* exclude ~ from the result */
+ wpath_pos++;
+ wpath_len--;
+
+ /* exclude separator if present */
+ if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
+ wpath_pos++;
+ wpath_len--;
+ }
+ }
+ else if (wpath_len >= 2 && wpath_pos[1] == L':') {
+ if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) {
+ /* ignore dir since path contains a drive letter and a root slash */
+ ignore_dir = 1;
+ }
+ else {
+ /* determine if we ignore dir or not later */
+ path_drive = wpath_pos[0];
+ }
+ }
+ else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') {
+ wchar_t *wuser = wpath_pos + 1;
+ wchar_t *pos = wuser;
+ char *user;
+
+ /* tainted if expanding '~' */
+ tainted = 1;
+
+ while (!IS_DIR_SEPARATOR_P(*pos) && *pos != '\0')
+ pos++;
+
+ *pos = '\0';
+ convert_wchar_to_mb(wuser, &user, &size, cp);
+
+ /* convert to VALUE and set the path encoding */
+ if (path_cp == INVALID_CODE_PAGE) {
+ VALUE tmp = rb_enc_str_new(user, size, rb_utf8_encoding());
+ result = rb_str_encode(tmp, rb_enc_from_encoding(path_encoding), 0, Qnil);
+ rb_str_resize(tmp, 0);
+ }
+ else {
+ result = rb_enc_str_new(user, size, path_encoding);
+ }
+
+ xfree(wpath);
+ if (user)
+ xfree(user);
+
+ rb_raise(rb_eArgError, "can't find user %s", StringValuePtr(result));
+ }
+
+ /* convert dir */
+ if (!ignore_dir && !NIL_P(dir)) {
+ /* fix string encoding */
+ if (path_cp == INVALID_CODE_PAGE) {
+ dir = fix_string_encoding(dir, path_encoding);
+ }
+
+ /* convert char * to wchar_t */
+ convert_mb_to_wchar(dir, &wdir, NULL, &wdir_len, cp);
+
+ if (wdir_len >= 2 && wdir[1] == L':') {
+ dir_drive = wdir[0];
+ if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
+ wdir_len = 2;
+ }
+ }
+ else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) {
+ /* UNC path */
+ if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
+ /* cut the UNC path tail to '//host/share' */
+ size_t separators = 0;
+ size_t pos = 2;
+ while (pos < wdir_len && separators < 2) {
+ if (IS_DIR_SEPARATOR_P(wdir[pos])) {
+ separators++;
+ }
+ pos++;
+ }
+ if (separators == 2)
+ wdir_len = pos - 1;
+ }
+ }
+ }
+
+ /* determine if we ignore dir or not */
+ if (!ignore_dir && path_drive && dir_drive) {
+ if (towupper(path_drive) == towupper(dir_drive)) {
+ /* exclude path drive letter to use dir */
+ wpath_pos += 2;
+ wpath_len -= 2;
+ }
+ else {
+ /* ignore dir since path drive is different from dir drive */
+ ignore_dir = 1;
+ wdir_len = 0;
+ }
+ }
+
+ if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) {
+ /* ignore dir since path has UNC root */
+ ignore_dir = 1;
+ wdir_len = 0;
+ }
+ else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) &&
+ !dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) {
+ /* ignore dir since path has root slash and dir doesn't have drive or UNC root */
+ ignore_dir = 1;
+ wdir_len = 0;
+ }
+
+ buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1;
+
+ buffer = buffer_pos = (wchar_t *)xmalloc((buffer_len + 1) * sizeof(wchar_t));
+
+ /* add home */
+ if (whome_len) {
+ wcsncpy(buffer_pos, whome, whome_len);
+ buffer_pos += whome_len;
+ }
+
+ /* Add separator if required */
+ if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
+ buffer_pos[0] = L'\\';
+ buffer_pos++;
+ }
+
+ if (wdir_len) {
+ /* tainted if dir is used and dir is tainted */
+ if (!tainted && OBJ_TAINTED(dir))
+ tainted = 1;
+
+ wcsncpy(buffer_pos, wdir, wdir_len);
+ buffer_pos += wdir_len;
+ }
+
+ /* add separator if required */
+ if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
+ buffer_pos[0] = L'\\';
+ buffer_pos++;
+ }
+
+ /* now deal with path */
+ if (wpath_len) {
+ wcsncpy(buffer_pos, wpath_pos, wpath_len);
+ buffer_pos += wpath_len;
+ }
+
+ /* GetFullPathNameW requires at least "." to determine current directory */
+ if (wpath_len == 0) {
+ buffer_pos[0] = L'.';
+ buffer_pos++;
+ }
+
+ /* Ensure buffer is NULL terminated */
+ buffer_pos[0] = L'\0';
+
+ /* tainted if path is relative */
+ if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer)))
+ tainted = 1;
+
+ /* FIXME: Make this more robust */
+ /* Determine require buffer size */
+ size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL);
+ if (size > PATH_BUFFER_SIZE) {
+ /* allocate more memory than alloted originally by PATH_BUFFER_SIZE */
+ wfullpath = (wchar_t *)xmalloc(size * sizeof(wchar_t));
+ size = GetFullPathNameW(buffer, size, wfullpath, NULL);
+ }
+ else {
+ wfullpath = wfullpath_buffer;
+ }
+
+ /* Remove any trailing slashes */
+ if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) &&
+ wfullpath[size - 2] != L':' &&
+ !(size == 2 && IS_DIR_UNC_P(wfullpath))) {
+ size -= 1;
+ wfullpath[size] = L'\0';
+ }
+
+ /* Remove any trailing dot */
+ if (wfullpath[size - 1] == L'.') {
+ size -= 1;
+ wfullpath[size] = L'\0';
+ }
+
+ /* removes trailing invalid ':$DATA' */
+ size = remove_invalid_alternative_data(wfullpath, size);
+
+ /* Replace the trailing path to long name */
+ if (long_name)
+ size = replace_to_long_name(&wfullpath, size, (wfullpath != wfullpath_buffer));
+
+ /* sanitize backslashes with forwardslashes */
+ replace_wchar(wfullpath, L'\\', L'/');
+
+ /* convert to char * */
+ size = WideCharToMultiByte(cp, 0, wfullpath, size, NULL, 0, NULL, NULL);
+ if (size > (size_t)RSTRING_LEN(result)) {
+ rb_str_modify(result);
+ rb_str_resize(result, size);
+ }
+
+ WideCharToMultiByte(cp, 0, wfullpath, size, RSTRING_PTR(result), size, NULL, NULL);
+ rb_str_set_len(result, size);
+
+ /* convert to VALUE and set the path encoding */
+ if (path_cp == INVALID_CODE_PAGE) {
+ VALUE tmp;
+ size_t len;
+
+ rb_enc_associate(result, rb_utf8_encoding());
+ ENC_CODERANGE_CLEAR(result);
+ tmp = rb_str_encode(result, rb_enc_from_encoding(path_encoding), 0, Qnil);
+ len = RSTRING_LEN(tmp);
+ rb_str_modify(result);
+ rb_str_resize(result, len);
+ memcpy(RSTRING_PTR(result), RSTRING_PTR(tmp), len);
+ rb_str_resize(tmp, 0);
+ }
+ rb_enc_associate(result, path_encoding);
+ ENC_CODERANGE_CLEAR(result);
+
+ /* makes the result object tainted if expanding tainted strings or returning modified path */
+ if (tainted)
+ OBJ_TAINT(result);
+
+ /* TODO: better cleanup */
+ if (buffer)
+ xfree(buffer);
+
+ if (wpath)
+ xfree(wpath);
+
+ if (wdir)
+ xfree(wdir);
+
+ if (whome)
+ xfree(whome);
+
+ if (wfullpath && wfullpath != wfullpath_buffer)
+ xfree(wfullpath);
+
+ if (fullpath)
+ xfree(fullpath);
+
+ return result;
+}
+
int
rb_file_load_ok(const char *path)
{
@@ -27,3 +647,12 @@ rb_file_load_ok(const char *path)
}
return ret;
}
+
+void
+rb_w32_init_file(void)
+{
+ rb_code_page = rb_hash_new();
+
+ /* prevent GC removing rb_code_page */
+ rb_gc_register_mark_object(rb_code_page);
+}