This contains a few bug fixes and some header and API cleanups.
The main API change is that filters should now use GIT_PASSTHROUGH
to indicate that they wish to skip processing a file instead of
GIT_ENOTFOUND.
The bug fixes include a possible out-of-range buffer access in
the ident filter, a filter ordering problem I introduced into the
custom filter tests on Windows, and a filter buf NUL termination
issue that was coming up on Linux.
* The filter lifecycle:
* - initialize - first use of filter
* - shutdown - filter removed/unregistered from system
- * - check - considering for file
- * - apply - applied to file
+ * - check - considering filter for file
+ * - apply - apply filter to file contents
* - cleanup - done with file
*/
/**
* Initialize callback on filter
+ *
+ * Specified as `filter.initialize`, this is an optional callback invoked
+ * before a filter is first used. It will be called once at most.
+ *
+ * If non-NULL, the filter's `initialize` callback will be invoked right
+ * before the first use of the filter, so you can defer expensive
+ * initialization operations (in case libgit2 is being used in a way that
+ * doesn't need the filter).
*/
typedef int (*git_filter_init_fn)(git_filter *self);
/**
* Shutdown callback on filter
+ *
+ * Specified as `filter.shutdown`, this is an optional callback invoked
+ * when the filter is unregistered or when libgit2 is shutting down. It
+ * will be called once at most and should release resources as needed.
+ *
+ * Typically this function will free the `git_filter` object itself.
*/
typedef void (*git_filter_shutdown_fn)(git_filter *self);
/**
* Callback to decide if a given source needs this filter
+ *
+ * Specified as `filter.check`, this is an optional callback that checks
+ * if filtering is needed for a given source.
+ *
+ * It should return 0 if the filter should be applied (i.e. success),
+ * GIT_PASSTHROUGH if the filter should not be applied, or an error code
+ * to fail out of the filter processing pipeline and return to the caller.
+ *
+ * The `attr_values` will be set to the values of any attributes given in
+ * the filter definition. See `git_filter` below for more detail.
+ *
+ * The `payload` will be a pointer to a reference payload for the filter.
+ * This will start as NULL, but `check` can assign to this pointer for
+ * later use by the `apply` callback. Note that the value should be heap
+ * allocated (not stack), so that it doesn't go away before the `apply`
+ * callback can use it. If a filter allocates and assigns a value to the
+ * `payload`, it will need a `cleanup` callback to free the payload.
*/
typedef int (*git_filter_check_fn)(
git_filter *self,
/**
* Callback to actually perform the data filtering
+ *
+ * Specified as `filter.apply`, this is the callback that actually filters
+ * data. If it successfully writes the output, it should return 0. Like
+ * `check`, it can return GIT_PASSTHROUGH to indicate that the filter
+ * doesn't want to run. Other error codes will stop filter processing and
+ * return to the caller.
+ *
+ * The `payload` value will refer to any payload that was set by the
+ * `check` callback. It may be read from or written to as needed.
*/
typedef int (*git_filter_apply_fn)(
git_filter *self,
/**
* Callback to clean up after filtering has been applied
+ *
+ * Specified as `filter.cleanup`, this is an optional callback invoked
+ * after the filter has been applied. If the `check` or `apply` callbacks
+ * allocated a `payload` to keep per-source filter state, use this
+ * callback to free that payload and release resources as required.
*/
typedef void (*git_filter_cleanup_fn)(
git_filter *self,
void *payload);
/**
- * Filter structure used to register a new filter.
+ * Filter structure used to register custom filters.
*
- * To associate extra data with a filter, simply allocate extra data
- * and put the `git_filter` struct at the start of your data buffer,
- * then cast the `self` pointer to your larger structure when your
- * callback is invoked.
+ * To associate extra data with a filter, allocate extra data and put the
+ * `git_filter` struct at the start of your data buffer, then cast the
+ * `self` pointer to your larger structure when your callback is invoked.
*
* `version` should be set to GIT_FILTER_VERSION
*
* a value (i.e. "name=value"), the attribute must match that value for
* the filter to be applied.
*
- * `initialize` is an optional callback invoked before a filter is first
- * used. It will be called once at most.
- *
- * `shutdown` is an optional callback invoked when the filter is
- * unregistered or when libgit2 is shutting down. It will be called once
- * at most and should free any memory as needed.
- *
- * `check` is an optional callback that checks if filtering is needed for
- * a given source. It should return 0 if the filter should be applied
- * (i.e. success), GIT_ENOTFOUND if the filter should not be applied, or
- * an other error code to fail out of the filter processing pipeline and
- * return to the caller.
- *
- * `apply` is the callback that actually filters data. If it successfully
- * writes the output, it should return 0. Like `check`, it can return
- * GIT_ENOTFOUND to indicate that the filter doesn't actually want to run.
- * Other error codes will stop filter processing and return to the caller.
- *
- * `cleanup` is an optional callback that is made after the filter has
- * been applied. Both the `check` and `apply` callbacks are able to
- * allocate a `payload` to keep per-source filter state, and this callback
- * is given that value and can clean up as needed.
+ * The `initialize`, `shutdown`, `check`, `apply`, and `cleanup` callbacks
+ * are all documented above with the respective function pointer typedefs.
*/
struct git_filter {
unsigned int version;
/**
* Register a filter under a given name with a given priority.
*
- * If non-NULL, the filter's initialize callback will be invoked before
- * the first use of the filter, so you can defer expensive operations (in
- * case libgit2 is being used in a way that doesn't need the filter).
+ * As mentioned elsewhere, the initialize callback will not be invoked
+ * immediately. It is deferred until the filter is used in some way.
*
* A filter's attribute checks and `check` and `apply` callbacks will be
* issued in order of `priority` on smudge (to workdir), and in reverse
* Currently the filter registry is not thread safe, so any registering or
* deregistering of filters must be done outside of any possible usage of
* the filters (i.e. during application setup or shutdown).
+ *
+ * @param name A name by which the filter can be referenced. Attempting
+ * to register with an in-use name will return GIT_EEXISTS.
+ * @param filter The filter definition. This pointer will be stored as is
+ * by libgit2 so it must be a durable allocation (either static
+ * or on the heap).
+ * @param priority The priority for filter application
+ * @return 0 on successful registry, error code <0 on failure
*/
GIT_EXTERN(int) git_filter_register(
const char *name, git_filter *filter, int priority);
/**
* Remove the filter with the given name
*
- * It is not allowed to remove the builtin libgit2 filters.
+ * Attempting to remove the builtin libgit2 filters is not permitted and
+ * will return an error.
*
* Currently the filter registry is not thread safe, so any registering or
* deregistering of filters must be done outside of any possible usage of
* the filters (i.e. during application setup or shutdown).
+ *
+ * @param name The name under which the filter was registered
+ * @return 0 on success, error code <0 on failure
*/
GIT_EXTERN(int) git_filter_unregister(const char *name);
#define git_array_alloc(a) \
((a).size >= (a).asize) ? \
git_array_grow(&(a), sizeof(*(a).ptr)) : \
- (a).ptr ? &(a).ptr[(a).size++] : NULL
+ ((a).ptr ? &(a).ptr[(a).size++] : NULL)
#define git_array_last(a) ((a).size ? &(a).ptr[(a).size - 1] : NULL)
* stuff?
*/
if (stats.cr != stats.crlf)
- return GIT_ENOTFOUND;
+ return GIT_PASSTHROUGH;
if (ca->crlf_action == GIT_CRLF_GUESS) {
/*
* This is the new safer autocrlf handling.
*/
if (has_cr_in_index(src))
- return GIT_ENOTFOUND;
+ return GIT_PASSTHROUGH;
}
if (!stats.cr)
- return GIT_ENOTFOUND;
+ return GIT_PASSTHROUGH;
}
/* Actually drop the carriage returns */
/* Don't filter binary files */
if (git_buf_text_is_binary(from))
- return GIT_ENOTFOUND;
+ return GIT_PASSTHROUGH;
/* Determine proper line ending */
workdir_ending = line_ending(ca);
if (!strcmp("\n", workdir_ending)) {
if (ca->crlf_action == GIT_CRLF_GUESS && ca->auto_crlf)
- return GIT_ENOTFOUND;
+ return GIT_PASSTHROUGH;
if (git_buf_find(from, '\r') < 0)
- return GIT_ENOTFOUND;
+ return GIT_PASSTHROUGH;
if (git_buf_text_crlf_to_lf(to, from) < 0)
return -1;
ca.crlf_action = crlf_input_action(&ca);
if (ca.crlf_action == GIT_CRLF_BINARY)
- return GIT_ENOTFOUND;
+ return GIT_PASSTHROUGH;
if (ca.crlf_action == GIT_CRLF_GUESS) {
error = git_repository__cvar(
return error;
if (ca.auto_crlf == GIT_AUTO_CRLF_FALSE)
- return GIT_ENOTFOUND;
+ return GIT_PASSTHROUGH;
}
*payload = git__malloc(sizeof(ca));
/* initialize payload in case `check` was bypassed */
if (!*payload) {
int error = crlf_check(self, payload, src, NULL);
- if (error < 0 && error != GIT_ENOTFOUND)
+ if (error < 0 && error != GIT_PASSTHROUGH)
return error;
}
if (!filter_registry_find(NULL, name)) {
giterr_set(
GITERR_FILTER, "Attempt to reregister existing filter '%s'", name);
- return -1;
+ return GIT_EEXISTS;
}
if (filter_def_scan_attrs(&attrs, &nattr, &nmatch, filter->attributes) < 0)
git_filter_def *fdef;
/* cannot unregister default filters */
- if (!strcmp(GIT_FILTER_CRLF, name)) {
+ if (!strcmp(GIT_FILTER_CRLF, name) || !strcmp(GIT_FILTER_IDENT, name)) {
giterr_set(GITERR_FILTER, "Cannot unregister filter '%s'", name);
return -1;
}
git__free((void *)values);
- if (error == GIT_ENOTFOUND)
+ if (error == GIT_PASSTHROUGH)
error = 0;
else if (error < 0)
break;
error = fe->filter->apply(
fe->filter, &fe->payload, dbuffer[di], dbuffer[si], &fl->source);
- if (error == GIT_ENOTFOUND)
+ if (error == GIT_PASSTHROUGH) {
+ /* PASSTHROUGH means filter decided not to process the buffer */
error = 0;
- else if (!error)
+ } else if (!error) {
+ git_buf_shorten(dbuffer[di], 0); /* force NUL termination */
si = di; /* swap buffers */
- else {
+ } else {
tgt->size = 0;
return error;
}
static int ident_find_id(
const char **id_start, const char **id_end, const char *start, size_t len)
{
- const char *found;
+ const char *end = start + len, *found = NULL;
- while (len > 0 && (found = memchr(start, '$', len)) != NULL) {
- size_t remaining = len - (size_t)(found - start);
+ while (len > 3 && (found = memchr(start, '$', len)) != NULL) {
+ size_t remaining = (size_t)(end - found) - 1;
if (remaining < 3)
return GIT_ENOTFOUND;
- if (found[1] == 'I' && found[2] == 'd')
- break;
+
start = found + 1;
- len = remaining - 1;
+ len = remaining;
+
+ if (start[0] == 'I' && start[1] == 'd')
+ break;
}
- if (!found || len < 3)
+ if (len < 3 || !found)
return GIT_ENOTFOUND;
*id_start = found;
- if ((found = memchr(found + 3, '$', len - 3)) == NULL)
+ if ((found = memchr(start + 2, '$', len - 2)) == NULL)
return GIT_ENOTFOUND;
*id_end = found + 1;
/* replace $Id$ with blob id */
if (!git_filter_source_id(src))
- return GIT_ENOTFOUND;
+ return GIT_PASSTHROUGH;
git_oid_tostr(oid, sizeof(oid), git_filter_source_id(src));
if (ident_find_id(&id_start, &id_end, from->ptr, from->size) < 0)
- return GIT_ENOTFOUND;
+ return GIT_PASSTHROUGH;
need_size = (size_t)(id_start - from->ptr) +
5 /* "$Id: " */ + GIT_OID_HEXSZ + 1 /* "$" */ +
size_t need_size;
if (ident_find_id(&id_start, &id_end, from->ptr, from->size) < 0)
- return GIT_ENOTFOUND;
+ return GIT_PASSTHROUGH;
need_size = (size_t)(id_start - from->ptr) +
4 /* "$Id$" */ + (size_t)(from_end - id_end);
/* Don't filter binary files */
if (git_buf_text_is_binary(from))
- return GIT_ENOTFOUND;
+ return GIT_PASSTHROUGH;
if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE)
return ident_insert_id(to, from, src);
*/
#include "pthread.h"
+#include "../global.h"
int pthread_create(
pthread_t *GIT_RESTRICT thread,
#include "git2/sys/filter.h"
#include "git2/sys/repository.h"
-/* picked these to be >= GIT_FILTER_DRIVER_PRIORITY */
-#define BITFLIP_FILTER_PRIORITY 200
-#define REVERSE_FILTER_PRIORITY 250
+/* going TO_WORKDIR, filters are executed low to high
+ * going TO_ODB, filters are executed high to low
+ */
+#define BITFLIP_FILTER_PRIORITY -1
+#define REVERSE_FILTER_PRIORITY -2
#define VERY_SECURE_ENCRYPTION(b) ((b) ^ 0xff)
git__free(f);
}
-static git_filter *create_reverse_filter(void)
+static git_filter *create_reverse_filter(const char *attrs)
{
git_filter *filter = git__calloc(1, sizeof(git_filter));
cl_assert(filter);
filter->version = GIT_FILTER_VERSION;
- filter->attributes = "+reverse";
+ filter->attributes = attrs;
filter->shutdown = reverse_filter_free;
filter->apply = reverse_filter_apply;
"bitflip", create_bitflip_filter(), BITFLIP_FILTER_PRIORITY));
cl_git_pass(git_filter_register(
- "reverse", create_reverse_filter(), REVERSE_FILTER_PRIORITY));
+ "reverse", create_reverse_filter("+reverse"),
+ REVERSE_FILTER_PRIORITY));
+
+ /* re-register reverse filter with standard filter=xyz priority */
+ cl_git_pass(git_filter_register(
+ "pre-reverse",
+ create_reverse_filter("+prereverse"),
+ GIT_FILTER_DRIVER_PRIORITY));
filters_registered = 1;
}
cl_git_mkfile(
"empty_standard_repo/.gitattributes",
- "hero.*.rev-ident text ident reverse eol=lf\n");
+ "hero.*.rev-ident text ident prereverse eol=lf\n");
cl_git_mkfile(
"empty_standard_repo/hero.1.rev-ident",
git_buf_free(&buf);
}
+
+void test_filter_custom__filter_registry_failure_cases(void)
+{
+ git_filter fake = { GIT_FILTER_VERSION, 0 };
+
+ cl_assert_equal_i(GIT_EEXISTS, git_filter_register("bitflip", &fake, 0));
+
+ cl_git_fail(git_filter_unregister(GIT_FILTER_CRLF));
+ cl_git_fail(git_filter_unregister(GIT_FILTER_IDENT));
+ cl_assert_equal_i(GIT_ENOTFOUND, git_filter_unregister("not-a-filter"));
+}