* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
-#include "hashsig.h"
-#include "fileops.h"
+
+#include "common.h"
+
+#include "git2/sys/hashsig.h"
+#include "futils.h"
#include "util.h"
typedef uint32_t hashsig_t;
struct git_hashsig {
hashsig_heap mins;
hashsig_heap maxs;
+ size_t lines;
git_hashsig_option_t opt;
- int considered;
};
#define HEAP_LCHILD_OF(I) (((I)<<1)+1)
{
int i;
- switch (sig->opt) {
- case GIT_HASHSIG_IGNORE_WHITESPACE:
+ /* no more than one can be set */
+ assert(!(sig->opt & GIT_HASHSIG_IGNORE_WHITESPACE) ||
+ !(sig->opt & GIT_HASHSIG_SMART_WHITESPACE));
+
+ if (sig->opt & GIT_HASHSIG_IGNORE_WHITESPACE) {
for (i = 0; i < 256; ++i)
prog->ignore_ch[i] = git__isspace_nonlf(i);
prog->use_ignores = 1;
- break;
- case GIT_HASHSIG_SMART_WHITESPACE:
+ } else if (sig->opt & GIT_HASHSIG_SMART_WHITESPACE) {
for (i = 0; i < 256; ++i)
prog->ignore_ch[i] = git__isspace(i);
prog->use_ignores = 1;
- break;
- default:
+ } else {
memset(prog, 0, sizeof(*prog));
- break;
}
}
-#define HASHSIG_IN_PROGRESS_INIT { 1 }
-
static int hashsig_add_hashes(
git_hashsig *sig,
const uint8_t *data,
if (use_ignores)
for (; scan < end && git__isspace_nonlf(ch); ch = *scan)
++scan;
- else if (sig->opt != GIT_HASHSIG_NORMAL)
+ else if (sig->opt &
+ (GIT_HASHSIG_IGNORE_WHITESPACE | GIT_HASHSIG_SMART_WHITESPACE))
for (; scan < end && ch == '\r'; ch = *scan)
++scan;
/* peek at next character to decide what to do next */
- if (sig->opt == GIT_HASHSIG_SMART_WHITESPACE)
+ if (sig->opt & GIT_HASHSIG_SMART_WHITESPACE)
use_ignores = (ch == '\n');
if (scan >= end)
++scan;
/* check run terminator */
- if (ch == '\n' || ch == '\0')
+ if (ch == '\n' || ch == '\0') {
+ sig->lines++;
break;
+ }
++len;
HASHSIG_HASH_MIX(state, ch);
hashsig_heap_insert(&sig->mins, (hashsig_t)state);
hashsig_heap_insert(&sig->maxs, (hashsig_t)state);
- sig->considered++;
-
while (scan < end && (*scan == '\n' || !*scan))
++scan;
}
static int hashsig_finalize_hashes(git_hashsig *sig)
{
- if (sig->mins.size < HASHSIG_HEAP_MIN_SIZE) {
- giterr_set(GITERR_INVALID,
- "File too small for similarity signature calculation");
+ if (sig->mins.size < HASHSIG_HEAP_MIN_SIZE &&
+ !(sig->opt & GIT_HASHSIG_ALLOW_SMALL_FILES)) {
+ git_error_set(GIT_ERROR_INVALID,
+ "file too small for similarity signature calculation");
return GIT_EBUFS;
}
int error;
hashsig_in_progress prog;
git_hashsig *sig = hashsig_alloc(opts);
- GITERR_CHECK_ALLOC(sig);
+ GIT_ERROR_CHECK_ALLOC(sig);
hashsig_in_progress_init(&prog, sig);
int error = 0, fd;
hashsig_in_progress prog;
git_hashsig *sig = hashsig_alloc(opts);
- GITERR_CHECK_ALLOC(sig);
+ GIT_ERROR_CHECK_ALLOC(sig);
if ((fd = git_futils_open_ro(path)) < 0) {
git__free(sig);
while (!error) {
if ((buflen = p_read(fd, buf, sizeof(buf))) <= 0) {
if ((error = (int)buflen) < 0)
- giterr_set(GITERR_OS,
- "Read error on '%s' calculating similarity hashes", path);
+ git_error_set(GIT_ERROR_OS,
+ "read error on '%s' calculating similarity hashes", path);
break;
}
int git_hashsig_compare(const git_hashsig *a, const git_hashsig *b)
{
+ /* if we have no elements in either file then each file is either
+ * empty or blank. if we're ignoring whitespace then the files are
+ * similar, otherwise they're dissimilar.
+ */
+ if (a->mins.size == 0 && b->mins.size == 0) {
+ if ((!a->lines && !b->lines) ||
+ (a->opt & GIT_HASHSIG_IGNORE_WHITESPACE))
+ return HASHSIG_SCALE;
+ else
+ return 0;
+ }
+
/* if we have fewer than the maximum number of elements, then just use
* one array since the two arrays will be the same
*/