]> git.proxmox.com Git - libgit2.git/commitdiff
Pass unconverted data when iconv doesn't like it
authorRussell Belfer <rb@github.com>
Thu, 8 May 2014 20:52:46 +0000 (13:52 -0700)
committerRussell Belfer <rb@github.com>
Thu, 8 May 2014 20:52:46 +0000 (13:52 -0700)
When using Iconv to convert unicode data and iconv doesn't like
the source data (because it thinks that it's not actual UTF-8),
instead of stopping the operation, just use the unconverted data.
This will generally do the right thing on the filesystem, since
that is the source of the non-UTF-8 path data anyhow.

This adds some tests for creating and looking up branches with
messy Unicode names.  Also, this takes the helper function that
was previously internal to `git_repository_init` and makes it
into `git_path_does_fs_decompose_unicode` which is a useful in
tests to understand what the expected results should be.

src/path.c
src/path.h
src/repository.c
tests/refs/branches/create.c

index 2690cd8e898c1a760c994d29cce5142afb893f8c..e0b00a086c165c1de30b884dd0e2382540614e08 100644 (file)
@@ -799,8 +799,11 @@ int git_path_iconv(git_path_iconv_t *ic, char **in, size_t *inlen)
                if (rv != (size_t)-1)
                        break;
 
+               /* if we cannot convert the data (probably because iconv thinks
+                * it is not valid UTF-8 source data), then use original data
+                */
                if (errno != E2BIG)
-                       goto fail;
+                       return 0;
 
                /* make space for 2x the remaining data to be converted
                 * (with per retry overhead to avoid infinite loops)
@@ -823,6 +826,64 @@ fail:
        return -1;
 }
 
+static const char *nfc_file = "\xC3\x85\x73\x74\x72\xC3\xB6\x6D.XXXXXX";
+static const char *nfd_file = "\x41\xCC\x8A\x73\x74\x72\x6F\xCC\x88\x6D.XXXXXX";
+
+/* Check if the platform is decomposing unicode data for us.  We will
+ * emulate core Git and prefer to use precomposed unicode data internally
+ * on these platforms, composing the decomposed unicode on the fly.
+ *
+ * This mainly happens on the Mac where HDFS stores filenames as
+ * decomposed unicode.  Even on VFAT and SAMBA file systems, the Mac will
+ * return decomposed unicode from readdir() even when the actual
+ * filesystem is storing precomposed unicode.
+ */
+bool git_path_does_fs_decompose_unicode(const char *root)
+{
+       git_buf path = GIT_BUF_INIT;
+       int fd;
+       bool found_decomposed = false;
+       char tmp[6];
+
+       /* Create a file using a precomposed path and then try to find it
+        * using the decomposed name.  If the lookup fails, then we will mark
+        * that we should precompose unicode for this repository.
+        */
+       if (git_buf_joinpath(&path, root, nfc_file) < 0 ||
+               (fd = p_mkstemp(path.ptr)) < 0)
+               goto done;
+       p_close(fd);
+
+       /* record trailing digits generated by mkstemp */
+       memcpy(tmp, path.ptr + path.size - sizeof(tmp), sizeof(tmp));
+
+       /* try to look up as NFD path */
+       if (git_buf_joinpath(&path, root, nfd_file) < 0)
+               goto done;
+       memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp));
+
+       found_decomposed = git_path_exists(path.ptr);
+
+       /* remove temporary file (using original precomposed path) */
+       if (git_buf_joinpath(&path, root, nfc_file) < 0)
+               goto done;
+       memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp));
+
+       (void)p_unlink(path.ptr);
+
+done:
+       git_buf_free(&path);
+       return found_decomposed;
+}
+
+#else
+
+bool git_path_does_fs_decompose_unicode(const char *root)
+{
+       GIT_UNUSED(root);
+       return false;
+}
+
 #endif
 
 #if defined(__sun) || defined(__GNU__)
index 2367d707b03aabd39fa62835145118865ba516db..3213c5104508474534101b11d6ce66b6275f8154 100644 (file)
@@ -436,4 +436,6 @@ extern int git_path_iconv(git_path_iconv_t *ic, char **in, size_t *inlen);
 
 #endif /* GIT_USE_ICONV */
 
+extern bool git_path_does_fs_decompose_unicode(const char *root);
+
 #endif
index ac7af769296d0e2d7b1dec6b324960801463a432..466f2d3416a55642f59664e660ad1ac089811360 100644 (file)
@@ -880,60 +880,6 @@ static bool are_symlinks_supported(const char *wd_path)
        return symlinks_supported;
 }
 
-#ifdef GIT_USE_ICONV
-
-static const char *nfc_file = "\xC3\x85\x73\x74\x72\xC3\xB6\x6D.XXXXXX";
-static const char *nfd_file = "\x41\xCC\x8A\x73\x74\x72\x6F\xCC\x88\x6D.XXXXXX";
-
-/* Check if the platform is decomposing unicode data for us.  We will
- * emulate core Git and prefer to use precomposed unicode data internally
- * on these platforms, composing the decomposed unicode on the fly.
- *
- * This mainly happens on the Mac where HDFS stores filenames as
- * decomposed unicode.  Even on VFAT and SAMBA file systems, the Mac will
- * return decomposed unicode from readdir() even when the actual
- * filesystem is storing precomposed unicode.
- */
-static bool does_fs_decompose_unicode_paths(const char *wd_path)
-{
-       git_buf path = GIT_BUF_INIT;
-       int fd;
-       bool found_decomposed = false;
-       char tmp[6];
-
-       /* Create a file using a precomposed path and then try to find it
-        * using the decomposed name.  If the lookup fails, then we will mark
-        * that we should precompose unicode for this repository.
-        */
-       if (git_buf_joinpath(&path, wd_path, nfc_file) < 0 ||
-               (fd = p_mkstemp(path.ptr)) < 0)
-               goto done;
-       p_close(fd);
-
-       /* record trailing digits generated by mkstemp */
-       memcpy(tmp, path.ptr + path.size - sizeof(tmp), sizeof(tmp));
-
-       /* try to look up as NFD path */
-       if (git_buf_joinpath(&path, wd_path, nfd_file) < 0)
-               goto done;
-       memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp));
-
-       found_decomposed = git_path_exists(path.ptr);
-
-       /* remove temporary file (using original precomposed path) */
-       if (git_buf_joinpath(&path, wd_path, nfc_file) < 0)
-               goto done;
-       memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp));
-
-       (void)p_unlink(path.ptr);
-
-done:
-       git_buf_free(&path);
-       return found_decomposed;
-}
-
-#endif
-
 static int create_empty_file(const char *path, mode_t mode)
 {
        int fd;
@@ -1024,8 +970,9 @@ static int repo_init_fs_configs(
 #ifdef GIT_USE_ICONV
        if ((error = git_config_set_bool(
                        cfg, "core.precomposeunicode",
-                       does_fs_decompose_unicode_paths(work_dir))) < 0)
+                       git_path_does_fs_decompose_unicode(work_dir))) < 0)
                return error;
+       /* on non-iconv platforms, don't even set core.precomposeunicode */
 #endif
 
        return 0;
index 38af2f681915413eab1bd2fce379366c61fc35ff..518d4e93e676b53379dd9d4a7d35fe600a82b802 100644 (file)
@@ -1,5 +1,6 @@
 #include "clar_libgit2.h"
 #include "refs.h"
+#include "path.h"
 
 static git_repository *repo;
 static git_commit *target;
@@ -137,3 +138,58 @@ void test_refs_branches_create__default_reflog_message(void)
        git_reflog_free(log);
        git_signature_free(sig);
 }
+
+static void assert_branch_matches_name(
+       const char *expected, const char *lookup_as)
+{
+       git_reference *ref;
+       git_buf b = GIT_BUF_INIT;
+
+       cl_git_pass(git_branch_lookup(&ref, repo, lookup_as, GIT_BRANCH_LOCAL));
+
+       cl_git_pass(git_buf_sets(&b, "refs/heads/"));
+       cl_git_pass(git_buf_puts(&b, expected));
+       cl_assert_equal_s(b.ptr, git_reference_name(ref));
+
+       cl_git_pass(
+               git_oid_cmp(git_reference_target(ref), git_commit_id(target)));
+
+       git_reference_free(ref);
+       git_buf_free(&b);
+}
+
+void test_refs_branches_create__can_create_branch_with_unicode(void)
+{
+       const char *nfc = "\xC3\x85\x73\x74\x72\xC3\xB6\x6D";
+       const char *nfd = "\x41\xCC\x8A\x73\x74\x72\x6F\xCC\x88\x6D";
+       const char *emoji = "\xF0\x9F\x8D\xB7";
+       const char *names[] = { nfc, nfd, emoji };
+       const char *alt[] = { nfd, nfc, NULL };
+       const char *expected[] = { nfc, nfd, emoji };
+       unsigned int i;
+
+       retrieve_known_commit(&target, repo);
+
+       if (cl_repo_get_bool(repo, "core.precomposeunicode"))
+               expected[1] = nfc;
+#ifdef __APPLE__
+       /* test decomp. because not all Mac filesystems decompose unicode */
+       else if (git_path_does_fs_decompose_unicode(git_repository_path(repo)))
+               expected[0] = nfd;
+#endif
+
+       for (i = 0; i < ARRAY_SIZE(names); ++i) {
+               cl_git_pass(git_branch_create(
+                       &branch, repo, names[i], target, 0, NULL, NULL));
+               cl_git_pass(git_oid_cmp(
+                       git_reference_target(branch), git_commit_id(target)));
+
+               assert_branch_matches_name(expected[i], names[i]);
+               if (alt[i])
+                       assert_branch_matches_name(expected[i], alt[i]);
+
+               cl_git_pass(git_branch_delete(branch));
+               git_reference_free(branch);
+               branch = NULL;
+       }
+}