]> git.proxmox.com Git - libgit2.git/commitdiff
win32: use NT-prefixed "\\?\" paths
authorEdward Thomson <ethomson@microsoft.com>
Mon, 1 Dec 2014 18:09:58 +0000 (13:09 -0500)
committerEdward Thomson <ethomson@microsoft.com>
Tue, 16 Dec 2014 16:08:43 +0000 (10:08 -0600)
When turning UTF-8 paths into UCS-2 paths for Windows, always use
the \\?\-prefixed paths.  Because this bypasses the system's
path canonicalization, handle the canonicalization functions ourselves.

We must:
 1. always use a backslash as a directory separator
 2. only use a single backslash between directories
 3. not rely on the system to translate "." and ".." in paths
 4. remove trailing backslashes, except at the drive root (C:\)

src/win32/findfile.c
src/win32/path_w32.c [new file with mode: 0644]
src/win32/path_w32.h [new file with mode: 0644]
src/win32/posix.h
src/win32/posix_w32.c
src/win32/utf-conv.c
src/win32/utf-conv.h
src/win32/w32_util.h
tests/core/link.c
tests/path/win32.c [new file with mode: 0644]

index 86d4ef5bd8c1af94644a967c21949c3b1f4cad7c..de27dd06062a826e10938662ec039424d5b2efdd 100644 (file)
@@ -5,6 +5,7 @@
  * a Linking Exception. For full terms see the included COPYING file.
  */
 
+#include "path_w32.h"
 #include "utf-conv.h"
 #include "path.h"
 #include "findfile.h"
diff --git a/src/win32/path_w32.c b/src/win32/path_w32.c
new file mode 100644 (file)
index 0000000..f0eacaa
--- /dev/null
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+
+#include "common.h"
+#include "path.h"
+#include "path_w32.h"
+#include "utf-conv.h"
+
+#define PATH__NT_NAMESPACE     L"\\\\?\\"
+#define PATH__NT_NAMESPACE_LEN 4
+
+#define PATH__ABSOLUTE_LEN     3
+
+#define path__is_dirsep(p) ((p) == '/' || (p) == '\\')
+
+#define path__is_absolute(p) \
+       (git__isalpha((p)[0]) && (p)[1] == ':' && ((p)[2] == '\\' || (p)[2] == '/'))
+
+#define path__is_nt_namespace(p) \
+       (((p)[0] == '\\' && (p)[1] == '\\' && (p)[2] == '?' && (p)[3] == '\\') || \
+        ((p)[0] == '/' && (p)[1] == '/' && (p)[2] == '?' && (p)[3] == '/'))
+
+#define path__is_unc(p) \
+       (((p)[0] == '\\' && (p)[1] == '\\') || ((p)[0] == '/' && (p)[1] == '/'))
+
+GIT_INLINE(int) path__cwd(wchar_t *path, int size)
+{
+       int len;
+
+       if ((len = GetCurrentDirectoryW(size, path)) == 0) {
+               errno = GetLastError() == ERROR_ACCESS_DENIED ? EACCES : ENOENT;
+               return -1;
+       } else if (len > size) {
+               errno = ENAMETOOLONG;
+               return -1;
+       }
+
+       /* The Win32 APIs may return "\\?\" once you've used it first.
+        * But it may not.  What a gloriously predictible API!
+        */
+       if (wcsncmp(path, PATH__NT_NAMESPACE, PATH__NT_NAMESPACE_LEN))
+               return len;
+
+       len -= PATH__NT_NAMESPACE_LEN;
+
+       memmove(path, path + PATH__NT_NAMESPACE_LEN, sizeof(wchar_t) * len);
+       return len;
+}
+
+static wchar_t *path__skip_server(wchar_t *path)
+{
+       wchar_t *c;
+
+       for (c = path; *c; c++) {
+               if (path__is_dirsep(*c))
+                       return c + 1;
+       }
+
+       return c;
+}
+
+static wchar_t *path__skip_prefix(wchar_t *path)
+{
+       if (path__is_nt_namespace(path)) {
+               path += PATH__NT_NAMESPACE_LEN;
+
+               if (wcsncmp(path, L"UNC\\", 4) == 0)
+                       path = path__skip_server(path + 4);
+               else if (path__is_absolute(path))
+                       path += PATH__ABSOLUTE_LEN;
+       } else if (path__is_absolute(path)) {
+               path += PATH__ABSOLUTE_LEN;
+       } else if (path__is_unc(path)) {
+               path = path__skip_server(path + 2);
+       }
+
+       return path;
+}
+
+int git_win32_path_canonicalize(git_win32_path path)
+{
+       wchar_t *base, *from, *to, *next;
+       size_t len;
+
+       base = to = path__skip_prefix(path);
+
+       /* Unposixify if the prefix */
+       for (from = path; from < to; from++) {
+               if (*from == L'/')
+                       *from = L'\\';
+       }
+
+       while (*from) {
+               for (next = from; *next; ++next) {
+                       if (*next == L'/') {
+                               *next = L'\\';
+                               break;
+                       }
+
+                       if (*next == L'\\')
+                               break;
+               }
+
+               len = next - from;
+
+               if (len == 1 && from[0] == L'.')
+                       /* do nothing with singleton dot */;
+
+               else if (len == 2 && from[0] == L'.' && from[1] == L'.') {
+                       if (to == base) {
+                               /* no more path segments to strip, eat the "../" */
+                               if (*next == L'\\')
+                                       len++;
+
+                               base = to;
+                       } else {
+                               /* back up a path segment */
+                               while (to > base && to[-1] == L'\\') to--;
+                               while (to > base && to[-1] != L'\\') to--;
+                       }
+               } else {
+                       if (*next == L'\\' && *from != L'\\')
+                               len++;
+
+                       if (to != from)
+                               memmove(to, from, sizeof(wchar_t) * len);
+
+                       to += len;
+               }
+
+               from += len;
+
+               while (*from == L'\\') from++;
+       }
+
+       /* Strip trailing backslashes */
+       while (to > base && to[-1] == L'\\') to--;
+
+       *to = L'\0';
+
+       return (to - path);
+}
+
+int git_win32_path__cwd(wchar_t *out, size_t len)
+{
+       int cwd_len;
+
+       if ((cwd_len = path__cwd(out, len)) < 0)
+               return -1;
+
+       /* UNC paths */
+       if (wcsncmp(L"\\\\", out, 2) == 0) {
+               /* Our buffer must be at least 5 characters larger than the
+                * current working directory:  we swallow one of the leading
+                * '\'s, but we we add a 'UNC' specifier to the path, plus
+                * a trailing directory separator, plus a NUL.
+                */
+               if (cwd_len > MAX_PATH - 4) {
+                       errno = ENAMETOOLONG;
+                       return -1;
+               }
+
+               memmove(out+2, out, sizeof(wchar_t) * cwd_len);
+               out[0] = L'U';
+               out[1] = L'N';
+               out[2] = L'C';
+
+               cwd_len += 2;
+       }
+
+       /* Our buffer must be at least 2 characters larger than the current
+        * working directory.  (One character for the directory separator,
+        * one for the null.
+        */
+       else if (cwd_len > MAX_PATH - 2) {
+               errno = ENAMETOOLONG;
+               return -1;
+       }
+
+       return cwd_len;
+}
+
+int git_win32_path_from_utf8(git_win32_path out, const char *src)
+{
+       wchar_t *dest = out;
+
+       /* All win32 paths are in NT-prefixed format, beginning with "\\?\". */
+       memcpy(dest, PATH__NT_NAMESPACE, sizeof(wchar_t) * PATH__NT_NAMESPACE_LEN);
+       dest += PATH__NT_NAMESPACE_LEN;
+
+       /* See if this is an absolute path (beginning with a drive letter) */
+       if (path__is_absolute(src)) {
+               if (git__utf8_to_16(dest, MAX_PATH, src) < 0)
+                       return -1;
+       }
+       /* File-prefixed NT-style paths beginning with \\?\ */
+       else if (path__is_nt_namespace(src)) {
+               /* Skip the NT prefix, the destination already contains it */
+               if (git__utf8_to_16(dest, MAX_PATH, src + PATH__NT_NAMESPACE_LEN) < 0)
+                       return -1;
+       }
+       /* UNC paths */
+       else if (path__is_unc(src)) {
+               memcpy(dest, L"UNC\\", sizeof(wchar_t) * 4);
+               dest += 4;
+
+               /* Skip the leading "\\" */
+               if (git__utf8_to_16(dest, MAX_PATH - 2, src + 2) < 0)
+                       return -1;
+       }
+       /* Absolute paths omitting the drive letter */
+       else if (src[0] == '\\' || src[0] == '/') {
+               if (path__cwd(dest, MAX_PATH) < 0)
+                       return -1;
+
+               if (!path__is_absolute(dest)) {
+                       errno = ENOENT;
+                       return -1;
+               }
+
+               /* Skip the drive letter specification ("C:") */        
+               if (git__utf8_to_16(dest + 2, MAX_PATH - 2, src) < 0)
+                       return -1;
+       }
+       /* Relative paths */
+       else {
+               int cwd_len;
+
+               if ((cwd_len = git_win32_path__cwd(dest, MAX_PATH)) < 0)
+                       return -1;
+
+               dest[cwd_len++] = L'\\';
+
+               if (git__utf8_to_16(dest + cwd_len, MAX_PATH - cwd_len, src) < 0)
+                       return -1;
+       }
+
+       return git_win32_path_canonicalize(out);
+}
+
+int git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src)
+{
+       char *out = dest;
+       int len;
+
+       /* Strip NT namespacing "\\?\" */
+       if (path__is_nt_namespace(src)) {
+               src += 4;
+
+               /* "\\?\UNC\server\share" -> "\\server\share" */
+               if (wcsncmp(src, L"UNC\\", 4) == 0) {
+                       src += 4;
+
+                       memcpy(dest, "\\\\", 2);
+                       out = dest + 2;
+               }
+       }
+
+       if ((len = git__utf16_to_8(out, GIT_WIN_PATH_UTF8, src)) < 0)
+               return len;
+
+       git_path_mkposix(dest);
+
+       return len;
+}
diff --git a/src/win32/path_w32.h b/src/win32/path_w32.h
new file mode 100644 (file)
index 0000000..dc7a68e
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#ifndef INCLUDE_git_path_w32_h__
+#define INCLUDE_git_path_w32_h__
+
+/*
+ * Provides a large enough buffer to support Windows paths:  MAX_PATH is
+ * 260, corresponding to a maximum path length of 259 characters plus a
+ * NULL terminator.  Prefixing with "\\?\" adds 4 characters, but if the
+ * original was a UNC path, then we turn "\\server\share" into
+ * "\\?\UNC\server\share".  So we replace the first two characters with
+ * 8 characters, a net gain of 6, so the maximum length is MAX_PATH+6.
+ */
+#define GIT_WIN_PATH_UTF16             MAX_PATH+6
+
+/* Maximum size of a UTF-8 Win32 path.  We remove the "\\?\" or "\\?\UNC\"
+ * prefixes for presentation, bringing us back to 259 (non-NULL)
+ * characters.  UTF-8 does have 4-byte sequences, but they are encoded in
+ * UTF-16 using surrogate pairs, which takes up the space of two characters.
+ * Two characters in the range U+0800 -> U+FFFF take up more space in UTF-8
+ * (6 bytes) than one surrogate pair (4 bytes).
+ */
+#define GIT_WIN_PATH_UTF8              (259 * 3 + 1)
+
+/* Win32 path types */
+typedef wchar_t git_win32_path[GIT_WIN_PATH_UTF16];
+typedef char git_win32_utf8_path[GIT_WIN_PATH_UTF8];
+
+/**
+ * Create a Win32 path (in UCS-2 format) from a UTF-8 string.
+ *
+ * @param dest The buffer to receive the wide string.
+ * @param src The UTF-8 string to convert.
+ * @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure
+ */
+extern int git_win32_path_from_utf8(git_win32_path dest, const char *src);
+
+/**
+ * Canonicalize a Win32 UCS-2 path so that it is suitable for delivery to the
+ * Win32 APIs: remove multiple directory separators, squashing to a single one,
+ * strip trailing directory separators, ensure directory separators are all
+ * canonical (always backslashes, never forward slashes) and process any
+ * directory entries of '.' or '..'.
+ *
+ * This processes the buffer in place.
+ *
+ * @param path The buffer to process
+ * @return The new length of the buffer, in wchar_t's (not counting the NULL terminator)
+ */
+extern int git_win32_path_canonicalize(git_win32_path path);
+
+/**
+ * Create an internal format (posix-style) UTF-8 path from a Win32 UCS-2 path.
+ *
+ * @param dest The buffer to receive the UTF-8 string.
+ * @param src The wide string to convert.
+ * @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure
+ */
+extern int git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src);
+
+#endif
index e055a77d0bcdb26525e574d8abc575ef692dd8dd..104966edcaa7c56a5f2e493aa6a59cd046039987 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "common.h"
 #include "../posix.h"
+#include "path_w32.h"
 #include "utf-conv.h"
 #include "dir.h"
 
index 7b4555719ff5f82f299bfba9e65cfca0c923835d..e446ccab03da675630ea77bca83d30f60fb5a93e 100644 (file)
@@ -7,6 +7,7 @@
 #include "../posix.h"
 #include "../fileops.h"
 #include "path.h"
+#include "path_w32.h"
 #include "utf-conv.h"
 #include "repository.h"
 #include "reparse.h"
 /* GetFinalPathNameByHandleW signature */
 typedef DWORD(WINAPI *PFGetFinalPathNameByHandleW)(HANDLE, LPWSTR, DWORD, DWORD);
 
-/* Helper function which converts UTF-8 paths to UTF-16.
- * On failure, errno is set. */
-static int utf8_to_16_with_errno(git_win32_path dest, const char *src)
-{
-       int len = git_win32_path_from_utf8(dest, src);
-
-       if (len < 0) {
-               if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
-                       errno = ENAMETOOLONG;
-               else
-                       errno = EINVAL; /* Bad code point, presumably */
-       }
-
-       return len;
-}
-
 int p_ftruncate(int fd, long size)
 {
 #if defined(_MSC_VER) && _MSC_VER >= 1500
@@ -66,7 +51,7 @@ int p_mkdir(const char *path, mode_t mode)
 
        GIT_UNUSED(mode);
 
-       if (utf8_to_16_with_errno(buf, path) < 0)
+       if (git_win32_path_from_utf8(buf, path) < 0)
                return -1;
 
        return _wmkdir(buf);
@@ -85,7 +70,7 @@ int p_unlink(const char *path)
        git_win32_path buf;
        int error;
 
-       if (utf8_to_16_with_errno(buf, path) < 0)
+       if (git_win32_path_from_utf8(buf, path) < 0)
                return -1;
 
        error = _wunlink(buf);
@@ -292,7 +277,7 @@ static int do_lstat(const char *path, struct stat *buf, bool posixly_correct)
        git_win32_path path_w;
        int len;
 
-       if ((len = utf8_to_16_with_errno(path_w, path)) < 0)
+       if ((len = git_win32_path_from_utf8(path_w, path)) < 0)
                return -1;
 
        git_win32__path_trim_end(path_w, len);
@@ -323,7 +308,7 @@ int p_readlink(const char *path, char *buf, size_t bufsiz)
         * could occur in the middle of the encoding of a code point,
         * we need to buffer the result on the stack. */
 
-       if (utf8_to_16_with_errno(path_w, path) < 0 ||
+       if (git_win32_path_from_utf8(path_w, path) < 0 ||
                readlink_w(target_w, path_w) < 0 ||
                (len = git_win32_path_to_utf8(target, target_w)) < 0)
                return -1;
@@ -347,7 +332,7 @@ int p_open(const char *path, int flags, ...)
        git_win32_path buf;
        mode_t mode = 0;
 
-       if (utf8_to_16_with_errno(buf, path) < 0)
+       if (git_win32_path_from_utf8(buf, path) < 0)
                return -1;
 
        if (flags & O_CREAT) {
@@ -365,7 +350,7 @@ int p_creat(const char *path, mode_t mode)
 {
        git_win32_path buf;
 
-       if (utf8_to_16_with_errno(buf, path) < 0)
+       if (git_win32_path_from_utf8(buf, path) < 0)
                return -1;
 
        return _wopen(buf, _O_WRONLY | _O_CREAT | _O_TRUNC | STANDARD_OPEN_FLAGS, mode);
@@ -463,7 +448,7 @@ int p_stat(const char* path, struct stat* buf)
        git_win32_path path_w;
        int len;
 
-       if ((len = utf8_to_16_with_errno(path_w, path)) < 0)
+       if ((len = git_win32_path_from_utf8(path_w, path)) < 0)
                return -1;
 
        git_win32__path_trim_end(path_w, len);
@@ -483,7 +468,7 @@ int p_chdir(const char* path)
 {
        git_win32_path buf;
 
-       if (utf8_to_16_with_errno(buf, path) < 0)
+       if (git_win32_path_from_utf8(buf, path) < 0)
                return -1;
 
        return _wchdir(buf);
@@ -493,7 +478,7 @@ int p_chmod(const char* path, mode_t mode)
 {
        git_win32_path buf;
 
-       if (utf8_to_16_with_errno(buf, path) < 0)
+       if (git_win32_path_from_utf8(buf, path) < 0)
                return -1;
 
        return _wchmod(buf, mode);
@@ -504,7 +489,7 @@ int p_rmdir(const char* path)
        git_win32_path buf;
        int error;
 
-       if (utf8_to_16_with_errno(buf, path) < 0)
+       if (git_win32_path_from_utf8(buf, path) < 0)
                return -1;
 
        error = _wrmdir(buf);
@@ -533,7 +518,7 @@ char *p_realpath(const char *orig_path, char *buffer)
 {
        git_win32_path orig_path_w, buffer_w;
 
-       if (utf8_to_16_with_errno(orig_path_w, orig_path) < 0)
+       if (git_win32_path_from_utf8(orig_path_w, orig_path) < 0)
                return NULL;
 
        /* Note that if the path provided is a relative path, then the current directory
@@ -554,20 +539,17 @@ char *p_realpath(const char *orig_path, char *buffer)
                return NULL;
        }
 
-       /* Convert the path to UTF-8. */
-       if (buffer) {
-               /* If the caller provided a buffer, then it is assumed to be GIT_WIN_PATH_UTF8
-                * characters in size. If it isn't, then we may overflow. */
-               if (git__utf16_to_8(buffer, GIT_WIN_PATH_UTF8, buffer_w) < 0)
-                       return NULL;
-       } else {
-               /* If the caller did not provide a buffer, then we allocate one for the caller
-                * from the heap. */
-               if (git__utf16_to_8_alloc(&buffer, buffer_w) < 0)
-                       return NULL;
+       if (!buffer && !(buffer = git__malloc(GIT_WIN_PATH_UTF8))) {
+               errno = ENOMEM;
+               return NULL;
        }
 
-       /* Convert backslashes to forward slashes */
+       /* Convert the path to UTF-8. If the caller provided a buffer, then it
+        * is assumed to be GIT_WIN_PATH_UTF8 characters in size. If it isn't,
+        * then we may overflow. */
+       if (git_win32_path_to_utf8(buffer, buffer_w) < 0)
+               return NULL;
+
        git_path_mkposix(buffer);
 
        return buffer;
@@ -608,6 +590,7 @@ int p_snprintf(char *buffer, size_t count, const char *format, ...)
        return r;
 }
 
+/* TODO: wut? */
 int p_mkstemp(char *tmp_path)
 {
 #if defined(_MSC_VER) && _MSC_VER >= 1500
@@ -625,7 +608,7 @@ int p_access(const char* path, mode_t mode)
 {
        git_win32_path buf;
 
-       if (utf8_to_16_with_errno(buf, path) < 0)
+       if (git_win32_path_from_utf8(buf, path) < 0)
                return -1;
 
        return _waccess(buf, mode);
@@ -664,8 +647,8 @@ int p_rename(const char *from, const char *to)
        int rename_succeeded;
        int error;
 
-       if (utf8_to_16_with_errno(wfrom, from) < 0 ||
-               utf8_to_16_with_errno(wto, to) < 0)
+       if (git_win32_path_from_utf8(wfrom, from) < 0 ||
+               git_win32_path_from_utf8(wto, to) < 0)
                return -1;
 
        /* wait up to 50ms if file is locked by another thread or process */
index b9ccfb5e594ce680d536a91ade5e7d250f4ded37..b0205b01975ee9c21a7b42a0bc67e5cb00ab4fd7 100644 (file)
@@ -26,6 +26,14 @@ GIT_INLINE(DWORD) get_wc_flags(void)
        return flags;
 }
 
+GIT_INLINE(void) git__set_errno(void)
+{
+       if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
+               errno = ENAMETOOLONG;
+       else
+               errno = EINVAL;
+}
+
 /**
  * Converts a UTF-8 string to wide characters.
  *
@@ -36,10 +44,15 @@ GIT_INLINE(DWORD) get_wc_flags(void)
  */
 int git__utf8_to_16(wchar_t *dest, size_t dest_size, const char *src)
 {
+       int len;
+
        /* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to
        * turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's
        * length. MultiByteToWideChar never returns int's minvalue, so underflow is not possible */
-       return MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, dest, (int)dest_size) - 1;
+       if ((len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, dest, (int)dest_size) - 1) < 0)
+               git__set_errno();
+
+       return len;
 }
 
 /**
@@ -52,10 +65,15 @@ int git__utf8_to_16(wchar_t *dest, size_t dest_size, const char *src)
  */
 int git__utf16_to_8(char *dest, size_t dest_size, const wchar_t *src)
 {
+       int len;
+
        /* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to
         * turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's
         * length. WideCharToMultiByte never returns int's minvalue, so underflow is not possible */
-       return WideCharToMultiByte(CP_UTF8, get_wc_flags(), src, -1, dest, (int)dest_size, NULL, NULL) - 1;
+       if ((len = WideCharToMultiByte(CP_UTF8, get_wc_flags(), src, -1, dest, (int)dest_size, NULL, NULL) - 1) < 0)
+               git__set_errno();
+
+       return len;
 }
 
 /**
@@ -76,17 +94,23 @@ int git__utf8_to_16_alloc(wchar_t **dest, const char *src)
        /* Length of -1 indicates NULL termination of the input string */
        utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, NULL, 0);
 
-       if (!utf16_size)
+       if (!utf16_size) {
+               git__set_errno();
                return -1;
+       }
 
        *dest = git__malloc(utf16_size * sizeof(wchar_t));
 
-       if (!*dest)
+       if (!*dest) {
+               errno = ENOMEM;
                return -1;
+       }
 
        utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, *dest, utf16_size);
 
        if (!utf16_size) {
+               git__set_errno();
+
                git__free(*dest);
                *dest = NULL;
        }
@@ -116,17 +140,23 @@ int git__utf16_to_8_alloc(char **dest, const wchar_t *src)
        /* Length of -1 indicates NULL termination of the input string */
        utf8_size = WideCharToMultiByte(CP_UTF8, dwFlags, src, -1, NULL, 0, NULL, NULL);
 
-       if (!utf8_size)
+       if (!utf8_size) {
+               git__set_errno();
                return -1;
+       }
 
        *dest = git__malloc(utf8_size);
 
-       if (!*dest)
+       if (!*dest) {
+               errno = ENOMEM;
                return -1;
+       }
 
        utf8_size = WideCharToMultiByte(CP_UTF8, dwFlags, src, -1, *dest, utf8_size, NULL, NULL);
 
        if (!utf8_size) {
+               git__set_errno();
+
                git__free(*dest);
                *dest = NULL;
        }
index a480cd93ed730a9275d8589897942bcfb8710934..89cdb96dac2e1609cf428531ba038aa97d185a2a 100644 (file)
 #include <wchar.h>
 #include "common.h"
 
-/* Equal to the Win32 MAX_PATH constant. The maximum path length is 259
- * characters plus a NULL terminator. */
-#define GIT_WIN_PATH_UTF16             260
-
-/* Maximum size of a UTF-8 Win32 path. UTF-8 does have 4-byte sequences,
- * but they are encoded in UTF-16 using surrogate pairs, which takes up
- * the space of two characters. Two characters in the range U+0800 ->
- * U+FFFF take up more space in UTF-8 (6 bytes) than one surrogate pair
- * (4 bytes). */
-#define GIT_WIN_PATH_UTF8              (259 * 3 + 1)
-
-/* Win32 path types */
-typedef wchar_t git_win32_path[GIT_WIN_PATH_UTF16];
-typedef char git_win32_utf8_path[GIT_WIN_PATH_UTF8];
-
 /**
  * Converts a UTF-8 string to wide characters.
  *
@@ -67,28 +52,4 @@ int git__utf8_to_16_alloc(wchar_t **dest, const char *src);
  */
 int git__utf16_to_8_alloc(char **dest, const wchar_t *src);
 
-/**
- * Converts a UTF-8 Win32 path to wide characters.
- *
- * @param dest The buffer to receive the wide string.
- * @param src The UTF-8 string to convert.
- * @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure
- */
-GIT_INLINE(int) git_win32_path_from_utf8(git_win32_path dest, const char *src)
-{
-       return git__utf8_to_16(dest, GIT_WIN_PATH_UTF16, src);
-}
-
-/**
- * Converts a wide Win32 path to UTF-8.
- *
- * @param dest The buffer to receive the UTF-8 string.
- * @param src The wide string to convert.
- * @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure
- */
-GIT_INLINE(int) git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src)
-{
-       return git__utf16_to_8(dest, GIT_WIN_PATH_UTF8, src);
-}
-
 #endif
index a1d388af53654b19b9ed80c60647c8f99e766c5f..9c1b943598987f3ae403db72fea80b48ee381f99 100644 (file)
@@ -9,6 +9,7 @@
 #define INCLUDE_w32_util_h__
 
 #include "utf-conv.h"
+#include "path_w32.h"
 
 GIT_INLINE(bool) git_win32__isalpha(wchar_t c)
 {
index 83999ebdfddbf74cb512170be786274e40e5de27..ec85ec4e0bc3ba2bf8e201059282e11dd8473837 100644 (file)
@@ -197,19 +197,6 @@ static void do_custom_reparse(const char *path)
 
 #endif
 
-git_buf *unslashify(git_buf *buf)
-{
-#ifdef GIT_WIN32
-       size_t i;
-
-       for (i = 0; i < buf->size; i++)
-               if (buf->ptr[i] == '/')
-                       buf->ptr[i] = '\\';
-#endif
-
-       return buf;
-}
-
 void test_core_link__stat_regular_file(void)
 {
        struct stat st;
@@ -584,7 +571,7 @@ void test_core_link__readlink_symlink(void)
 
        buf[len] = 0;
 
-       cl_assert_equal_s(git_buf_cstr(unslashify(&target_path)), buf);
+       cl_assert_equal_s(git_buf_cstr(&target_path), buf);
 
        git_buf_free(&target_path);
 }
@@ -607,7 +594,7 @@ void test_core_link__readlink_dangling(void)
 
        buf[len] = 0;
 
-       cl_assert_equal_s(git_buf_cstr(unslashify(&target_path)), buf);
+       cl_assert_equal_s(git_buf_cstr(&target_path), buf);
 
        git_buf_free(&target_path);
 }
@@ -636,7 +623,7 @@ void test_core_link__readlink_multiple(void)
 
        buf[len] = 0;
 
-       cl_assert_equal_s(git_buf_cstr(unslashify(&path2)), buf);
+       cl_assert_equal_s(git_buf_cstr(&path2), buf);
 
        git_buf_free(&path1);
        git_buf_free(&path2);
diff --git a/tests/path/win32.c b/tests/path/win32.c
new file mode 100644 (file)
index 0000000..ef0b5d2
--- /dev/null
@@ -0,0 +1,190 @@
+
+#include "clar_libgit2.h"
+#include "path.h"
+
+#ifdef GIT_WIN32
+#include "win32/path_w32.h"
+#endif
+
+void test_utf8_to_utf16(const char *utf8_in, const wchar_t *utf16_expected)
+{
+#ifdef GIT_WIN32
+       git_win32_path path_utf16;
+       int path_utf16len;
+
+       cl_assert((path_utf16len = git_win32_path_from_utf8(path_utf16, utf8_in)) >= 0);
+       cl_assert_equal_wcs(utf16_expected, path_utf16);
+       cl_assert_equal_i(wcslen(utf16_expected), path_utf16len);
+#else
+       GIT_UNUSED(utf8_in);
+       GIT_UNUSED(utf16_expected);
+#endif
+}
+
+void test_path_win32__utf8_to_utf16(void)
+{
+#ifdef GIT_WIN32
+       test_utf8_to_utf16("C:\\", L"\\\\?\\C:\\");
+       test_utf8_to_utf16("c:\\", L"\\\\?\\c:\\");
+       test_utf8_to_utf16("C:/", L"\\\\?\\C:\\");
+       test_utf8_to_utf16("c:/", L"\\\\?\\c:\\");
+#endif
+}
+
+void test_path_win32__removes_trailing_slash(void)
+{
+#ifdef GIT_WIN32
+       test_utf8_to_utf16("C:\\Foo\\", L"\\\\?\\C:\\Foo");
+       test_utf8_to_utf16("C:\\Foo\\\\", L"\\\\?\\C:\\Foo");
+       test_utf8_to_utf16("C:\\Foo\\\\", L"\\\\?\\C:\\Foo");
+       test_utf8_to_utf16("C:/Foo/", L"\\\\?\\C:\\Foo");
+       test_utf8_to_utf16("C:/Foo///", L"\\\\?\\C:\\Foo");
+#endif
+}
+
+void test_path_win32__squashes_multiple_slashes(void)
+{
+#ifdef GIT_WIN32
+       test_utf8_to_utf16("C:\\\\Foo\\Bar\\\\Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar");
+       test_utf8_to_utf16("C://Foo/Bar///Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar");
+#endif
+}
+
+void test_path_win32__unc(void)
+{
+#ifdef GIT_WIN32
+       test_utf8_to_utf16("\\\\server\\c$\\unc\\path", L"\\\\?\\UNC\\server\\c$\\unc\\path");
+       test_utf8_to_utf16("//server/git/style/unc/path", L"\\\\?\\UNC\\server\\git\\style\\unc\\path");
+#endif
+}
+
+void test_path_win32__honors_max_path(void)
+{
+#ifdef GIT_WIN32
+       git_win32_path path_utf16;
+
+       test_utf8_to_utf16("C:\\This path is 259 chars and is the max length in windows\\0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij",
+               L"\\\\?\\C:\\This path is 259 chars and is the max length in windows\\0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij");
+       test_utf8_to_utf16("\\\\unc\\paths may also be 259 characters including the server\\123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij",
+               L"\\\\?\\UNC\\unc\\paths may also be 259 characters including the server\\123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij");
+
+       cl_check_fail(git_win32_path_from_utf8(path_utf16, "C:\\This path is 260 chars and is sadly too long for windows\\0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij"));
+       cl_check_fail(git_win32_path_from_utf8(path_utf16, "\\\\unc\\paths are also bound by 260 character restrictions\\including the server name portion\\bcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij"));
+#endif
+}
+
+void test_path_win32__dot_and_dotdot(void)
+{
+#ifdef GIT_WIN32
+       test_utf8_to_utf16("C:\\Foo\\..\\Foobar", L"\\\\?\\C:\\Foobar");
+       test_utf8_to_utf16("C:\\Foo\\Bar\\..\\Foobar", L"\\\\?\\C:\\Foo\\Foobar");
+       test_utf8_to_utf16("C:\\Foo\\Bar\\..\\Foobar\\..", L"\\\\?\\C:\\Foo");
+       test_utf8_to_utf16("C:\\Foobar\\..", L"\\\\?\\C:\\");
+       test_utf8_to_utf16("C:/Foo/Bar/../Foobar", L"\\\\?\\C:\\Foo\\Foobar");
+       test_utf8_to_utf16("C:/Foo/Bar/../Foobar/../Asdf/", L"\\\\?\\C:\\Foo\\Asdf");
+       test_utf8_to_utf16("C:/Foo/Bar/../Foobar/..", L"\\\\?\\C:\\Foo");
+       test_utf8_to_utf16("C:/Foo/..", L"\\\\?\\C:\\");
+
+       test_utf8_to_utf16("C:\\Foo\\Bar\\.\\Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar");
+       test_utf8_to_utf16("C:\\.\\Foo\\.\\Bar\\.\\Foobar\\.\\", L"\\\\?\\C:\\Foo\\Bar\\Foobar");
+       test_utf8_to_utf16("C:/Foo/Bar/./Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar");
+       test_utf8_to_utf16("C:/Foo/../Bar/./Foobar/../", L"\\\\?\\C:\\Bar");
+
+       test_utf8_to_utf16("C:\\Foo\\..\\..\\Bar", L"\\\\?\\C:\\Bar");
+#endif
+}
+
+void test_path_win32__absolute_from_no_drive_letter(void)
+{
+#ifdef GIT_WIN32
+       test_utf8_to_utf16("\\Foo", L"\\\\?\\C:\\Foo");
+       test_utf8_to_utf16("\\Foo\\Bar", L"\\\\?\\C:\\Foo\\Bar");
+       test_utf8_to_utf16("/Foo/Bar", L"\\\\?\\C:\\Foo\\Bar");
+#endif
+}
+
+void test_path_win32__absolute_from_relative(void)
+{
+#ifdef GIT_WIN32
+       char cwd_backup[MAX_PATH];
+
+       cl_must_pass(p_getcwd(cwd_backup, MAX_PATH));
+       cl_must_pass(p_chdir("C:/"));
+
+       test_utf8_to_utf16("Foo", L"\\\\?\\C:\\Foo");
+       test_utf8_to_utf16("..\\..\\Foo", L"\\\\?\\C:\\Foo");
+       test_utf8_to_utf16("Foo\\..", L"\\\\?\\C:\\");
+       test_utf8_to_utf16("Foo\\..\\..", L"\\\\?\\C:\\");
+       test_utf8_to_utf16("", L"\\\\?\\C:\\");
+
+       cl_must_pass(p_chdir("C:/Windows"));
+
+       test_utf8_to_utf16("Foo", L"\\\\?\\C:\\Windows\\Foo");
+       test_utf8_to_utf16("Foo\\Bar", L"\\\\?\\C:\\Windows\\Foo\\Bar");
+       test_utf8_to_utf16("..\\Foo", L"\\\\?\\C:\\Foo");
+       test_utf8_to_utf16("Foo\\..\\Bar", L"\\\\?\\C:\\Windows\\Bar");
+       test_utf8_to_utf16("", L"\\\\?\\C:\\Windows");
+
+       cl_must_pass(p_chdir(cwd_backup));
+#endif
+}
+
+void test_canonicalize(const wchar_t *in, const wchar_t *expected)
+{
+#ifdef GIT_WIN32
+       git_win32_path canonical;
+
+       cl_assert(wcslen(in) < MAX_PATH);
+       wcscpy(canonical, in);
+
+       cl_must_pass(git_win32_path_canonicalize(canonical));
+       cl_assert_equal_wcs(expected, canonical);
+#else
+       GIT_UNUSED(in);
+       GIT_UNUSED(expected);
+#endif
+}
+
+void test_path_win32__canonicalize(void)
+{
+#ifdef GIT_WIN32
+       test_canonicalize(L"C:\\Foo\\Bar", L"C:\\Foo\\Bar");
+       test_canonicalize(L"C:\\Foo\\", L"C:\\Foo");
+       test_canonicalize(L"C:\\Foo\\\\", L"C:\\Foo");
+       test_canonicalize(L"C:\\Foo\\..\\Bar", L"C:\\Bar");
+       test_canonicalize(L"C:\\Foo\\..\\..\\Bar", L"C:\\Bar");
+       test_canonicalize(L"C:\\Foo\\..\\..\\..\\..\\", L"C:\\");
+       test_canonicalize(L"C:/Foo/Bar", L"C:\\Foo\\Bar");
+       test_canonicalize(L"C:/", L"C:\\");
+
+       test_canonicalize(L"Foo\\\\Bar\\\\Asdf\\\\", L"Foo\\Bar\\Asdf");
+       test_canonicalize(L"Foo\\\\Bar\\\\..\\\\Asdf\\", L"Foo\\Asdf");
+       test_canonicalize(L"Foo\\\\Bar\\\\.\\\\Asdf\\", L"Foo\\Bar\\Asdf");
+       test_canonicalize(L"Foo\\\\..\\Bar\\\\.\\\\Asdf\\", L"Bar\\Asdf");
+       test_canonicalize(L"\\", L"");
+       test_canonicalize(L"", L"");
+       test_canonicalize(L"Foo\\..\\..\\..\\..", L"");
+       test_canonicalize(L"..\\..\\..\\..", L"");
+       test_canonicalize(L"\\..\\..\\..\\..", L"");
+
+       test_canonicalize(L"\\\\?\\C:\\Foo\\Bar", L"\\\\?\\C:\\Foo\\Bar");
+       test_canonicalize(L"\\\\?\\C:\\Foo\\Bar\\", L"\\\\?\\C:\\Foo\\Bar");
+       test_canonicalize(L"\\\\?\\C:\\\\Foo\\.\\Bar\\\\..\\", L"\\\\?\\C:\\Foo");
+       test_canonicalize(L"\\\\?\\C:\\\\", L"\\\\?\\C:\\");
+       test_canonicalize(L"//?/C:/", L"\\\\?\\C:\\");
+       test_canonicalize(L"//?/C:/../../Foo/", L"\\\\?\\C:\\Foo");
+       test_canonicalize(L"//?/C:/Foo/../../", L"\\\\?\\C:\\");
+
+       test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder", L"\\\\?\\UNC\\server\\C$\\folder");
+       test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder\\", L"\\\\?\\UNC\\server\\C$\\folder");
+       test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder\\", L"\\\\?\\UNC\\server\\C$\\folder");
+       test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder\\..\\..\\..\\..\\share\\", L"\\\\?\\UNC\\server\\share");
+
+       test_canonicalize(L"\\\\server\\share", L"\\\\server\\share");
+       test_canonicalize(L"\\\\server\\share\\", L"\\\\server\\share");
+       test_canonicalize(L"\\\\server\\share\\\\foo\\\\bar", L"\\\\server\\share\\foo\\bar");
+       test_canonicalize(L"\\\\server\\\\share\\\\foo\\\\bar", L"\\\\server\\share\\foo\\bar");
+       test_canonicalize(L"\\\\server\\share\\..\\foo", L"\\\\server\\foo");
+       test_canonicalize(L"\\\\server\\..\\..\\share\\.\\foo", L"\\\\server\\share\\foo");
+#endif
+}