2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
7 #ifndef INCLUDE_git_diff_h__
8 #define INCLUDE_git_diff_h__
18 * @brief Git tree and file differencing routines.
23 * Calculating diffs is generally done in two phases: building a list of
24 * diffs then traversing it. This makes is easier to share logic across
25 * the various types of diffs (tree vs tree, workdir vs index, etc.), and
26 * also allows you to insert optional diff post-processing phases,
27 * such as rename detection, in between the steps. When you are done with
28 * a diff object, it must be freed.
33 * To understand the diff APIs, you should know the following terms:
35 * - A `diff` represents the cumulative list of differences between two
36 * snapshots of a repository (possibly filtered by a set of file name
37 * patterns). This is the `git_diff` object.
39 * - A `delta` is a file pair with an old and new revision. The old version
40 * may be absent if the file was just created and the new version may be
41 * absent if the file was deleted. A diff is mostly just a list of deltas.
43 * - A `binary` file / delta is a file (or pair) for which no text diffs
44 * should be generated. A diff can contain delta entries that are
45 * binary, but no diff content will be output for those files. There is
46 * a base heuristic for binary detection and you can further tune the
47 * behavior with git attributes or diff flags and option settings.
49 * - A `hunk` is a span of modified lines in a delta along with some stable
50 * surrounding context. You can configure the amount of context and other
51 * properties of how hunks are generated. Each hunk also comes with a
52 * header that described where it starts and ends in both the old and new
53 * versions in the delta.
55 * - A `line` is a range of characters inside a hunk. It could be a context
56 * line (i.e. in both old and new versions), an added line (i.e. only in
57 * the new version), or a removed line (i.e. only in the old version).
58 * Unfortunately, we don't know anything about the encoding of data in the
59 * file being diffed, so we cannot tell you much about the line content.
60 * Line data will not be NUL-byte terminated, however, because it will be
61 * just a span of bytes inside the larger file.
69 * Flags for diff options. A combination of these flags can be passed
70 * in via the `flags` value in the `git_diff_options`.
73 /** Normal diff, the default */
77 * Options controlling which files will be in the diff
80 /** Reverse the sides of the diff */
81 GIT_DIFF_REVERSE
= (1u << 0),
83 /** Include ignored files in the diff */
84 GIT_DIFF_INCLUDE_IGNORED
= (1u << 1),
86 /** Even with GIT_DIFF_INCLUDE_IGNORED, an entire ignored directory
87 * will be marked with only a single entry in the diff; this flag
88 * adds all files under the directory as IGNORED entries, too.
90 GIT_DIFF_RECURSE_IGNORED_DIRS
= (1u << 2),
92 /** Include untracked files in the diff */
93 GIT_DIFF_INCLUDE_UNTRACKED
= (1u << 3),
95 /** Even with GIT_DIFF_INCLUDE_UNTRACKED, an entire untracked
96 * directory will be marked with only a single entry in the diff
97 * (a la what core Git does in `git status`); this flag adds *all*
98 * files under untracked directories as UNTRACKED entries, too.
100 GIT_DIFF_RECURSE_UNTRACKED_DIRS
= (1u << 4),
102 /** Include unmodified files in the diff */
103 GIT_DIFF_INCLUDE_UNMODIFIED
= (1u << 5),
105 /** Normally, a type change between files will be converted into a
106 * DELETED record for the old and an ADDED record for the new; this
107 * options enabled the generation of TYPECHANGE delta records.
109 GIT_DIFF_INCLUDE_TYPECHANGE
= (1u << 6),
111 /** Even with GIT_DIFF_INCLUDE_TYPECHANGE, blob->tree changes still
112 * generally show as a DELETED blob. This flag tries to correctly
113 * label blob->tree transitions as TYPECHANGE records with new_file's
114 * mode set to tree. Note: the tree SHA will not be available.
116 GIT_DIFF_INCLUDE_TYPECHANGE_TREES
= (1u << 7),
118 /** Ignore file mode changes */
119 GIT_DIFF_IGNORE_FILEMODE
= (1u << 8),
121 /** Treat all submodules as unmodified */
122 GIT_DIFF_IGNORE_SUBMODULES
= (1u << 9),
124 /** Use case insensitive filename comparisons */
125 GIT_DIFF_IGNORE_CASE
= (1u << 10),
127 /** May be combined with `GIT_DIFF_IGNORE_CASE` to specify that a file
128 * that has changed case will be returned as an add/delete pair.
130 GIT_DIFF_INCLUDE_CASECHANGE
= (1u << 11),
132 /** If the pathspec is set in the diff options, this flags means to
133 * apply it as an exact match instead of as an fnmatch pattern.
135 GIT_DIFF_DISABLE_PATHSPEC_MATCH
= (1u << 12),
137 /** Disable updating of the `binary` flag in delta records. This is
138 * useful when iterating over a diff if you don't need hunk and data
139 * callbacks and want to avoid having to load file completely.
141 GIT_DIFF_SKIP_BINARY_CHECK
= (1u << 13),
143 /** When diff finds an untracked directory, to match the behavior of
144 * core Git, it scans the contents for IGNORED and UNTRACKED files.
145 * If *all* contents are IGNORED, then the directory is IGNORED; if
146 * any contents are not IGNORED, then the directory is UNTRACKED.
147 * This is extra work that may not matter in many cases. This flag
148 * turns off that scan and immediately labels an untracked directory
149 * as UNTRACKED (changing the behavior to not match core Git).
151 GIT_DIFF_ENABLE_FAST_UNTRACKED_DIRS
= (1u << 14),
153 /** When diff finds a file in the working directory with stat
154 * information different from the index, but the OID ends up being the
155 * same, write the correct stat information into the index. Note:
156 * without this flag, diff will always leave the index untouched.
158 GIT_DIFF_UPDATE_INDEX
= (1u << 15),
160 /** Include unreadable files in the diff */
161 GIT_DIFF_INCLUDE_UNREADABLE
= (1u << 16),
163 /** Include unreadable files in the diff */
164 GIT_DIFF_INCLUDE_UNREADABLE_AS_UNTRACKED
= (1u << 17),
167 * Options controlling how output will be generated
170 /** Treat all files as text, disabling binary attributes & detection */
171 GIT_DIFF_FORCE_TEXT
= (1u << 20),
172 /** Treat all files as binary, disabling text diffs */
173 GIT_DIFF_FORCE_BINARY
= (1u << 21),
175 /** Ignore all whitespace */
176 GIT_DIFF_IGNORE_WHITESPACE
= (1u << 22),
177 /** Ignore changes in amount of whitespace */
178 GIT_DIFF_IGNORE_WHITESPACE_CHANGE
= (1u << 23),
179 /** Ignore whitespace at end of line */
180 GIT_DIFF_IGNORE_WHITESPACE_EOL
= (1u << 24),
182 /** When generating patch text, include the content of untracked
183 * files. This automatically turns on GIT_DIFF_INCLUDE_UNTRACKED but
184 * it does not turn on GIT_DIFF_RECURSE_UNTRACKED_DIRS. Add that
185 * flag if you want the content of every single UNTRACKED file.
187 GIT_DIFF_SHOW_UNTRACKED_CONTENT
= (1u << 25),
189 /** When generating output, include the names of unmodified files if
190 * they are included in the git_diff. Normally these are skipped in
191 * the formats that list files (e.g. name-only, name-status, raw).
192 * Even with this, these will not be included in patch format.
194 GIT_DIFF_SHOW_UNMODIFIED
= (1u << 26),
196 /** Use the "patience diff" algorithm */
197 GIT_DIFF_PATIENCE
= (1u << 28),
198 /** Take extra time to find minimal diff */
199 GIT_DIFF_MINIMAL
= (1 << 29),
201 /** Include the necessary deflate / delta information so that `git-apply`
202 * can apply given diff information to binary files.
204 GIT_DIFF_SHOW_BINARY
= (1 << 30),
208 * The diff object that contains all individual file deltas.
210 * This is an opaque structure which will be allocated by one of the diff
211 * generator functions below (such as `git_diff_tree_to_tree`). You are
212 * responsible for releasing the object memory when done, using the
213 * `git_diff_free()` function.
215 typedef struct git_diff git_diff
;
218 * Flags for the delta object and the file objects on each side.
220 * These flags are used for both the `flags` value of the `git_diff_delta`
221 * and the flags for the `git_diff_file` objects representing the old and
222 * new sides of the delta. Values outside of this public range should be
223 * considered reserved for internal or future use.
226 GIT_DIFF_FLAG_BINARY
= (1u << 0), /**< file(s) treated as binary data */
227 GIT_DIFF_FLAG_NOT_BINARY
= (1u << 1), /**< file(s) treated as text data */
228 GIT_DIFF_FLAG_VALID_ID
= (1u << 2), /**< `id` value is known correct */
229 GIT_DIFF_FLAG_EXISTS
= (1u << 3), /**< file exists at this side of the delta */
233 * What type of change is described by a git_diff_delta?
235 * `GIT_DELTA_RENAMED` and `GIT_DELTA_COPIED` will only show up if you run
236 * `git_diff_find_similar()` on the diff object.
238 * `GIT_DELTA_TYPECHANGE` only shows up given `GIT_DIFF_INCLUDE_TYPECHANGE`
239 * in the option flags (otherwise type changes will be split into ADDED /
243 GIT_DELTA_UNMODIFIED
= 0, /**< no changes */
244 GIT_DELTA_ADDED
= 1, /**< entry does not exist in old version */
245 GIT_DELTA_DELETED
= 2, /**< entry does not exist in new version */
246 GIT_DELTA_MODIFIED
= 3, /**< entry content changed between old and new */
247 GIT_DELTA_RENAMED
= 4, /**< entry was renamed between old and new */
248 GIT_DELTA_COPIED
= 5, /**< entry was copied from another old entry */
249 GIT_DELTA_IGNORED
= 6, /**< entry is ignored item in workdir */
250 GIT_DELTA_UNTRACKED
= 7, /**< entry is untracked item in workdir */
251 GIT_DELTA_TYPECHANGE
= 8, /**< type of entry changed between old and new */
252 GIT_DELTA_UNREADABLE
= 9, /**< entry is unreadable */
253 GIT_DELTA_CONFLICTED
= 10, /**< entry in the index is conflicted */
257 * Description of one side of a delta.
259 * Although this is called a "file", it could represent a file, a symbolic
260 * link, a submodule commit id, or even a tree (although that only if you
261 * are tracking type changes or ignored/untracked directories).
263 * The `oid` is the `git_oid` of the item. If the entry represents an
264 * absent side of a diff (e.g. the `old_file` of a `GIT_DELTA_ADDED` delta),
265 * then the oid will be zeroes.
267 * `path` is the NUL-terminated path to the entry relative to the working
268 * directory of the repository.
270 * `size` is the size of the entry in bytes.
272 * `flags` is a combination of the `git_diff_flag_t` types
274 * `mode` is, roughly, the stat() `st_mode` value for the item. This will
275 * be restricted to one of the `git_filemode_t` values.
286 * Description of changes to one entry.
288 * When iterating over a diff, this will be passed to most callbacks and
289 * you can use the contents to understand exactly what has changed.
291 * The `old_file` represents the "from" side of the diff and the `new_file`
292 * represents to "to" side of the diff. What those means depend on the
293 * function that was used to generate the diff and will be documented below.
294 * You can also use the `GIT_DIFF_REVERSE` flag to flip it around.
296 * Although the two sides of the delta are named "old_file" and "new_file",
297 * they actually may correspond to entries that represent a file, a symbolic
298 * link, a submodule commit id, or even a tree (if you are tracking type
299 * changes or ignored/untracked directories).
301 * Under some circumstances, in the name of efficiency, not all fields will
302 * be filled in, but we generally try to fill in as much as possible. One
303 * example is that the "flags" field may not have either the `BINARY` or the
304 * `NOT_BINARY` flag set to avoid examining file contents if you do not pass
305 * in hunk and/or line callbacks to the diff foreach iteration function. It
306 * will just use the git attributes for those files.
308 * The similarity score is zero unless you call `git_diff_find_similar()`
309 * which does a similarity analysis of files in the diff. Use that
310 * function to do rename and copy detection, and to split heavily modified
311 * files in add/delete pairs. After that call, deltas with a status of
312 * GIT_DELTA_RENAMED or GIT_DELTA_COPIED will have a similarity score
313 * between 0 and 100 indicating how similar the old and new sides are.
315 * If you ask `git_diff_find_similar` to find heavily modified files to
316 * break, but to not *actually* break the records, then GIT_DELTA_MODIFIED
317 * records may have a non-zero similarity score if the self-similarity is
318 * below the split threshold. To display this value like core Git, invert
319 * the score (a la `printf("M%03d", 100 - delta->similarity)`).
323 uint32_t flags
; /**< git_diff_flag_t values */
324 uint16_t similarity
; /**< for RENAMED and COPIED, value 0-100 */
325 uint16_t nfiles
; /**< number of files in this delta */
326 git_diff_file old_file
;
327 git_diff_file new_file
;
331 * Diff notification callback function.
333 * The callback will be called for each file, just before the `git_delta_t`
334 * gets inserted into the diff.
337 * - returns < 0, the diff process will be aborted.
338 * - returns > 0, the delta will not be inserted into the diff, but the
339 * diff process continues.
340 * - returns 0, the delta is inserted into the diff, and the diff process
343 typedef int (*git_diff_notify_cb
)(
344 const git_diff
*diff_so_far
,
345 const git_diff_delta
*delta_to_add
,
346 const char *matched_pathspec
,
350 * Structure describing options about how the diff should be executed.
352 * Setting all values of the structure to zero will yield the default
353 * values. Similarly, passing NULL for the options structure will
354 * give the defaults. The default values are marked below.
356 * - `flags` is a combination of the `git_diff_option_t` values above
357 * - `context_lines` is the number of unchanged lines that define the
358 * boundary of a hunk (and to display before and after)
359 * - `interhunk_lines` is the maximum number of unchanged lines between
360 * hunk boundaries before the hunks will be merged into a one.
361 * - `old_prefix` is the virtual "directory" to prefix to old file names
362 * in hunk headers (default "a")
363 * - `new_prefix` is the virtual "directory" to prefix to new file names
364 * in hunk headers (default "b")
365 * - `pathspec` is an array of paths / fnmatch patterns to constrain diff
366 * - `max_size` is a file size (in bytes) above which a blob will be marked
367 * as binary automatically; pass a negative value to disable.
368 * - `notify_cb` is an optional callback function, notifying the consumer of
369 * which files are being examined as the diff is generated
370 * - `notify_payload` is the payload data to pass to the `notify_cb` function
371 * - `ignore_submodules` overrides the submodule ignore setting for all
372 * submodules in the diff.
375 unsigned int version
; /**< version for the struct */
376 uint32_t flags
; /**< defaults to GIT_DIFF_NORMAL */
378 /* options controlling which files are in the diff */
380 git_submodule_ignore_t ignore_submodules
; /**< submodule ignore rule */
381 git_strarray pathspec
; /**< defaults to include all paths */
382 git_diff_notify_cb notify_cb
;
383 void *notify_payload
;
385 /* options controlling how to diff text is generated */
387 uint32_t context_lines
; /**< defaults to 3 */
388 uint32_t interhunk_lines
; /**< defaults to 0 */
389 uint16_t id_abbrev
; /**< default 'core.abbrev' or 7 if unset */
390 git_off_t max_size
; /**< defaults to 512MB */
391 const char *old_prefix
; /**< defaults to "a" */
392 const char *new_prefix
; /**< defaults to "b" */
395 /* The current version of the diff options structure */
396 #define GIT_DIFF_OPTIONS_VERSION 1
398 /* Stack initializer for diff options. Alternatively use
399 * `git_diff_options_init` programmatic initialization.
401 #define GIT_DIFF_OPTIONS_INIT \
402 {GIT_DIFF_OPTIONS_VERSION, 0, GIT_SUBMODULE_IGNORE_UNSPECIFIED, {NULL,0}, NULL, NULL, 3}
405 * Initializes a `git_diff_options` with default values. Equivalent to
406 * creating an instance with GIT_DIFF_OPTIONS_INIT.
408 * @param opts The `git_diff_options` struct to initialize
409 * @param version Version of struct; pass `GIT_DIFF_OPTIONS_VERSION`
410 * @return Zero on success; -1 on failure.
412 GIT_EXTERN(int) git_diff_init_options(
413 git_diff_options
*opts
,
414 unsigned int version
);
417 * When iterating over a diff, callback that will be made per file.
419 * @param delta A pointer to the delta data for the file
420 * @param progress Goes from 0 to 1 over the diff
421 * @param payload User-specified pointer from foreach function
423 typedef int (*git_diff_file_cb
)(
424 const git_diff_delta
*delta
,
429 * When producing a binary diff, the binary data returned will be
430 * either the deflated full ("literal") contents of the file, or
431 * the deflated binary delta between the two sides (whichever is
435 /** There is no binary delta. */
436 GIT_DIFF_BINARY_NONE
,
438 /** The binary data is the literal contents of the file. */
439 GIT_DIFF_BINARY_LITERAL
,
441 /** The binary data is the delta from one side to the other. */
442 GIT_DIFF_BINARY_DELTA
,
445 /** The contents of one of the files in a binary diff. */
447 /** The type of binary data for this file. */
448 git_diff_binary_t type
;
450 /** The binary data, deflated. */
453 /** The length of the binary data. */
456 /** The length of the binary data after inflation. */
458 } git_diff_binary_file
;
460 /** Structure describing the binary contents of a diff. */
462 git_diff_binary_file old_file
; /**< The contents of the old file. */
463 git_diff_binary_file new_file
; /**< The contents of the new file. */
467 * When iterating over a diff, callback that will be made for
468 * binary content within the diff.
470 typedef int(*git_diff_binary_cb
)(
471 const git_diff_delta
*delta
,
472 const git_diff_binary
*binary
,
476 * Structure describing a hunk of a diff.
479 int old_start
; /**< Starting line number in old_file */
480 int old_lines
; /**< Number of lines in old_file */
481 int new_start
; /**< Starting line number in new_file */
482 int new_lines
; /**< Number of lines in new_file */
483 size_t header_len
; /**< Number of bytes in header text */
484 char header
[128]; /**< Header text, NUL-byte terminated */
488 * When iterating over a diff, callback that will be made per hunk.
490 typedef int (*git_diff_hunk_cb
)(
491 const git_diff_delta
*delta
,
492 const git_diff_hunk
*hunk
,
496 * Line origin constants.
498 * These values describe where a line came from and will be passed to
499 * the git_diff_line_cb when iterating over a diff. There are some
500 * special origin constants at the end that are used for the text
501 * output callbacks to demarcate lines that are actually part of
502 * the file or hunk headers.
505 /* These values will be sent to `git_diff_line_cb` along with the line */
506 GIT_DIFF_LINE_CONTEXT
= ' ',
507 GIT_DIFF_LINE_ADDITION
= '+',
508 GIT_DIFF_LINE_DELETION
= '-',
510 GIT_DIFF_LINE_CONTEXT_EOFNL
= '=', /**< Both files have no LF at end */
511 GIT_DIFF_LINE_ADD_EOFNL
= '>', /**< Old has no LF at end, new does */
512 GIT_DIFF_LINE_DEL_EOFNL
= '<', /**< Old has LF at end, new does not */
514 /* The following values will only be sent to a `git_diff_line_cb` when
515 * the content of a diff is being formatted through `git_diff_print`.
517 GIT_DIFF_LINE_FILE_HDR
= 'F',
518 GIT_DIFF_LINE_HUNK_HDR
= 'H',
519 GIT_DIFF_LINE_BINARY
= 'B' /**< For "Binary files x and y differ" */
523 * Structure describing a line (or data span) of a diff.
526 char origin
; /**< A git_diff_line_t value */
527 int old_lineno
; /**< Line number in old file or -1 for added line */
528 int new_lineno
; /**< Line number in new file or -1 for deleted line */
529 int num_lines
; /**< Number of newline characters in content */
530 size_t content_len
; /**< Number of bytes of data */
531 git_off_t content_offset
; /**< Offset in the original file to the content */
532 const char *content
; /**< Pointer to diff text, not NUL-byte terminated */
536 * When iterating over a diff, callback that will be made per text diff
537 * line. In this context, the provided range will be NULL.
539 * When printing a diff, callback that will be made to output each line
540 * of text. This uses some extra GIT_DIFF_LINE_... constants for output
541 * of lines of file and hunk headers.
543 typedef int (*git_diff_line_cb
)(
544 const git_diff_delta
*delta
, /**< delta that contains this data */
545 const git_diff_hunk
*hunk
, /**< hunk containing this data */
546 const git_diff_line
*line
, /**< line data */
547 void *payload
); /**< user reference data */
550 * Flags to control the behavior of diff rename/copy detection.
553 /** Obey `diff.renames`. Overridden by any other GIT_DIFF_FIND_... flag. */
554 GIT_DIFF_FIND_BY_CONFIG
= 0,
556 /** Look for renames? (`--find-renames`) */
557 GIT_DIFF_FIND_RENAMES
= (1u << 0),
559 /** Consider old side of MODIFIED for renames? (`--break-rewrites=N`) */
560 GIT_DIFF_FIND_RENAMES_FROM_REWRITES
= (1u << 1),
562 /** Look for copies? (a la `--find-copies`). */
563 GIT_DIFF_FIND_COPIES
= (1u << 2),
565 /** Consider UNMODIFIED as copy sources? (`--find-copies-harder`).
567 * For this to work correctly, use GIT_DIFF_INCLUDE_UNMODIFIED when
568 * the initial `git_diff` is being generated.
570 GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED
= (1u << 3),
572 /** Mark significant rewrites for split (`--break-rewrites=/M`) */
573 GIT_DIFF_FIND_REWRITES
= (1u << 4),
574 /** Actually split large rewrites into delete/add pairs */
575 GIT_DIFF_BREAK_REWRITES
= (1u << 5),
576 /** Mark rewrites for split and break into delete/add pairs */
577 GIT_DIFF_FIND_AND_BREAK_REWRITES
=
578 (GIT_DIFF_FIND_REWRITES
| GIT_DIFF_BREAK_REWRITES
),
580 /** Find renames/copies for UNTRACKED items in working directory.
582 * For this to work correctly, use GIT_DIFF_INCLUDE_UNTRACKED when the
583 * initial `git_diff` is being generated (and obviously the diff must
584 * be against the working directory for this to make sense).
586 GIT_DIFF_FIND_FOR_UNTRACKED
= (1u << 6),
588 /** Turn on all finding features. */
589 GIT_DIFF_FIND_ALL
= (0x0ff),
591 /** Measure similarity ignoring leading whitespace (default) */
592 GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE
= 0,
593 /** Measure similarity ignoring all whitespace */
594 GIT_DIFF_FIND_IGNORE_WHITESPACE
= (1u << 12),
595 /** Measure similarity including all data */
596 GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE
= (1u << 13),
597 /** Measure similarity only by comparing SHAs (fast and cheap) */
598 GIT_DIFF_FIND_EXACT_MATCH_ONLY
= (1u << 14),
600 /** Do not break rewrites unless they contribute to a rename.
602 * Normally, GIT_DIFF_FIND_AND_BREAK_REWRITES will measure the self-
603 * similarity of modified files and split the ones that have changed a
604 * lot into a DELETE / ADD pair. Then the sides of that pair will be
605 * considered candidates for rename and copy detection.
607 * If you add this flag in and the split pair is *not* used for an
608 * actual rename or copy, then the modified record will be restored to
609 * a regular MODIFIED record instead of being split.
611 GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY
= (1u << 15),
613 /** Remove any UNMODIFIED deltas after find_similar is done.
615 * Using GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED to emulate the
616 * --find-copies-harder behavior requires building a diff with the
617 * GIT_DIFF_INCLUDE_UNMODIFIED flag. If you do not want UNMODIFIED
618 * records in the final result, pass this flag to have them removed.
620 GIT_DIFF_FIND_REMOVE_UNMODIFIED
= (1u << 16),
624 * Pluggable similarity metric
627 int (*file_signature
)(
628 void **out
, const git_diff_file
*file
,
629 const char *fullpath
, void *payload
);
630 int (*buffer_signature
)(
631 void **out
, const git_diff_file
*file
,
632 const char *buf
, size_t buflen
, void *payload
);
633 void (*free_signature
)(void *sig
, void *payload
);
634 int (*similarity
)(int *score
, void *siga
, void *sigb
, void *payload
);
636 } git_diff_similarity_metric
;
639 * Control behavior of rename and copy detection
641 * These options mostly mimic parameters that can be passed to git-diff.
643 * - `rename_threshold` is the same as the -M option with a value
644 * - `copy_threshold` is the same as the -C option with a value
645 * - `rename_from_rewrite_threshold` matches the top of the -B option
646 * - `break_rewrite_threshold` matches the bottom of the -B option
647 * - `rename_limit` is the maximum number of matches to consider for
648 * a particular file. This is a little different from the `-l` option
649 * to regular Git because we will still process up to this many matches
650 * before abandoning the search.
652 * The `metric` option allows you to plug in a custom similarity metric.
653 * Set it to NULL for the default internal metric which is based on sampling
654 * hashes of ranges of data in the file. The default metric is a pretty
655 * good similarity approximation that should work fairly well for both text
656 * and binary data, and is pretty fast with fixed memory overhead.
659 unsigned int version
;
662 * Combination of git_diff_find_t values (default GIT_DIFF_FIND_BY_CONFIG).
663 * NOTE: if you don't explicitly set this, `diff.renames` could be set
664 * to false, resulting in `git_diff_find_similar` doing nothing.
668 /** Similarity to consider a file renamed (default 50) */
669 uint16_t rename_threshold
;
670 /** Similarity of modified to be eligible rename source (default 50) */
671 uint16_t rename_from_rewrite_threshold
;
672 /** Similarity to consider a file a copy (default 50) */
673 uint16_t copy_threshold
;
674 /** Similarity to split modify into delete/add pair (default 60) */
675 uint16_t break_rewrite_threshold
;
677 /** Maximum similarity sources to examine for a file (somewhat like
678 * git-diff's `-l` option or `diff.renameLimit` config) (default 200)
682 /** Pluggable similarity metric; pass NULL to use internal metric */
683 git_diff_similarity_metric
*metric
;
684 } git_diff_find_options
;
686 #define GIT_DIFF_FIND_OPTIONS_VERSION 1
687 #define GIT_DIFF_FIND_OPTIONS_INIT {GIT_DIFF_FIND_OPTIONS_VERSION}
690 * Initializes a `git_diff_find_options` with default values. Equivalent to
691 * creating an instance with GIT_DIFF_FIND_OPTIONS_INIT.
693 * @param opts The `git_diff_find_options` struct to initialize
694 * @param version Version of struct; pass `GIT_DIFF_FIND_OPTIONS_VERSION`
695 * @return Zero on success; -1 on failure.
697 GIT_EXTERN(int) git_diff_find_init_options(
698 git_diff_find_options
*opts
,
699 unsigned int version
);
701 /** @name Diff Generator Functions
703 * These are the functions you would use to create (or destroy) a
704 * git_diff from various objects in a repository.
711 * @param diff The previously created diff; cannot be used after free.
713 GIT_EXTERN(void) git_diff_free(git_diff
*diff
);
716 * Create a diff with the difference between two tree objects.
718 * This is equivalent to `git diff <old-tree> <new-tree>`
720 * The first tree will be used for the "old_file" side of the delta and the
721 * second tree will be used for the "new_file" side of the delta. You can
722 * pass NULL to indicate an empty tree, although it is an error to pass
723 * NULL for both the `old_tree` and `new_tree`.
725 * @param diff Output pointer to a git_diff pointer to be allocated.
726 * @param repo The repository containing the trees.
727 * @param old_tree A git_tree object to diff from, or NULL for empty tree.
728 * @param new_tree A git_tree object to diff to, or NULL for empty tree.
729 * @param opts Structure with options to influence diff or NULL for defaults.
731 GIT_EXTERN(int) git_diff_tree_to_tree(
733 git_repository
*repo
,
736 const git_diff_options
*opts
); /**< can be NULL for defaults */
739 * Create a diff between a tree and repository index.
741 * This is equivalent to `git diff --cached <treeish>` or if you pass
742 * the HEAD tree, then like `git diff --cached`.
744 * The tree you pass will be used for the "old_file" side of the delta, and
745 * the index will be used for the "new_file" side of the delta.
747 * If you pass NULL for the index, then the existing index of the `repo`
748 * will be used. In this case, the index will be refreshed from disk
749 * (if it has changed) before the diff is generated.
751 * @param diff Output pointer to a git_diff pointer to be allocated.
752 * @param repo The repository containing the tree and index.
753 * @param old_tree A git_tree object to diff from, or NULL for empty tree.
754 * @param index The index to diff with; repo index used if NULL.
755 * @param opts Structure with options to influence diff or NULL for defaults.
757 GIT_EXTERN(int) git_diff_tree_to_index(
759 git_repository
*repo
,
762 const git_diff_options
*opts
); /**< can be NULL for defaults */
765 * Create a diff between the repository index and the workdir directory.
767 * This matches the `git diff` command. See the note below on
768 * `git_diff_tree_to_workdir` for a discussion of the difference between
769 * `git diff` and `git diff HEAD` and how to emulate a `git diff <treeish>`
772 * The index will be used for the "old_file" side of the delta, and the
773 * working directory will be used for the "new_file" side of the delta.
775 * If you pass NULL for the index, then the existing index of the `repo`
776 * will be used. In this case, the index will be refreshed from disk
777 * (if it has changed) before the diff is generated.
779 * @param diff Output pointer to a git_diff pointer to be allocated.
780 * @param repo The repository.
781 * @param index The index to diff from; repo index used if NULL.
782 * @param opts Structure with options to influence diff or NULL for defaults.
784 GIT_EXTERN(int) git_diff_index_to_workdir(
786 git_repository
*repo
,
788 const git_diff_options
*opts
); /**< can be NULL for defaults */
791 * Create a diff between a tree and the working directory.
793 * The tree you provide will be used for the "old_file" side of the delta,
794 * and the working directory will be used for the "new_file" side.
796 * This is not the same as `git diff <treeish>` or `git diff-index
797 * <treeish>`. Those commands use information from the index, whereas this
798 * function strictly returns the differences between the tree and the files
799 * in the working directory, regardless of the state of the index. Use
800 * `git_diff_tree_to_workdir_with_index` to emulate those commands.
802 * To see difference between this and `git_diff_tree_to_workdir_with_index`,
803 * consider the example of a staged file deletion where the file has then
804 * been put back into the working dir and further modified. The
805 * tree-to-workdir diff for that file is 'modified', but `git diff` would
806 * show status 'deleted' since there is a staged delete.
808 * @param diff A pointer to a git_diff pointer that will be allocated.
809 * @param repo The repository containing the tree.
810 * @param old_tree A git_tree object to diff from, or NULL for empty tree.
811 * @param opts Structure with options to influence diff or NULL for defaults.
813 GIT_EXTERN(int) git_diff_tree_to_workdir(
815 git_repository
*repo
,
817 const git_diff_options
*opts
); /**< can be NULL for defaults */
820 * Create a diff between a tree and the working directory using index data
821 * to account for staged deletes, tracked files, etc.
823 * This emulates `git diff <tree>` by diffing the tree to the index and
824 * the index to the working directory and blending the results into a
825 * single diff that includes staged deleted, etc.
827 * @param diff A pointer to a git_diff pointer that will be allocated.
828 * @param repo The repository containing the tree.
829 * @param old_tree A git_tree object to diff from, or NULL for empty tree.
830 * @param opts Structure with options to influence diff or NULL for defaults.
832 GIT_EXTERN(int) git_diff_tree_to_workdir_with_index(
834 git_repository
*repo
,
836 const git_diff_options
*opts
); /**< can be NULL for defaults */
839 * Create a diff with the difference between two index objects.
841 * The first index will be used for the "old_file" side of the delta and the
842 * second index will be used for the "new_file" side of the delta.
844 * @param diff Output pointer to a git_diff pointer to be allocated.
845 * @param repo The repository containing the indexes.
846 * @param old_index A git_index object to diff from.
847 * @param new_index A git_index object to diff to.
848 * @param opts Structure with options to influence diff or NULL for defaults.
850 GIT_EXTERN(int) git_diff_index_to_index(
852 git_repository
*repo
,
853 git_index
*old_index
,
854 git_index
*new_index
,
855 const git_diff_options
*opts
); /**< can be NULL for defaults */
858 * Merge one diff into another.
860 * This merges items from the "from" list into the "onto" list. The
861 * resulting diff will have all items that appear in either list.
862 * If an item appears in both lists, then it will be "merged" to appear
863 * as if the old version was from the "onto" list and the new version
864 * is from the "from" list (with the exception that if the item has a
865 * pending DELETE in the middle, then it will show as deleted).
867 * @param onto Diff to merge into.
868 * @param from Diff to merge.
870 GIT_EXTERN(int) git_diff_merge(
872 const git_diff
*from
);
875 * Transform a diff marking file renames, copies, etc.
877 * This modifies a diff in place, replacing old entries that look
878 * like renames or copies with new entries reflecting those changes.
879 * This also will, if requested, break modified files into add/remove
880 * pairs if the amount of change is above a threshold.
882 * @param diff diff to run detection algorithms on
883 * @param options Control how detection should be run, NULL for defaults
884 * @return 0 on success, -1 on failure
886 GIT_EXTERN(int) git_diff_find_similar(
888 const git_diff_find_options
*options
);
893 /** @name Diff Processor Functions
895 * These are the functions you apply to a diff to process it
896 * or read it in some way.
901 * Query how many diff records are there in a diff.
903 * @param diff A git_diff generated by one of the above functions
904 * @return Count of number of deltas in the list
906 GIT_EXTERN(size_t) git_diff_num_deltas(const git_diff
*diff
);
909 * Query how many diff deltas are there in a diff filtered by type.
911 * This works just like `git_diff_entrycount()` with an extra parameter
912 * that is a `git_delta_t` and returns just the count of how many deltas
913 * match that particular type.
915 * @param diff A git_diff generated by one of the above functions
916 * @param type A git_delta_t value to filter the count
917 * @return Count of number of deltas matching delta_t type
919 GIT_EXTERN(size_t) git_diff_num_deltas_of_type(
920 const git_diff
*diff
, git_delta_t type
);
923 * Return the diff delta for an entry in the diff list.
925 * The `git_diff_delta` pointer points to internal data and you do not
926 * have to release it when you are done with it. It will go away when
927 * the * `git_diff` (or any associated `git_patch`) goes away.
929 * Note that the flags on the delta related to whether it has binary
930 * content or not may not be set if there are no attributes set for the
931 * file and there has been no reason to load the file data at this point.
932 * For now, if you need those flags to be up to date, your only option is
933 * to either use `git_diff_foreach` or create a `git_patch`.
935 * @param diff Diff list object
936 * @param idx Index into diff list
937 * @return Pointer to git_diff_delta (or NULL if `idx` out of range)
939 GIT_EXTERN(const git_diff_delta
*) git_diff_get_delta(
940 const git_diff
*diff
, size_t idx
);
943 * Check if deltas are sorted case sensitively or insensitively.
945 * @param diff diff to check
946 * @return 0 if case sensitive, 1 if case is ignored
948 GIT_EXTERN(int) git_diff_is_sorted_icase(const git_diff
*diff
);
951 * Loop over all deltas in a diff issuing callbacks.
953 * This will iterate through all of the files described in a diff. You
954 * should provide a file callback to learn about each file.
956 * The "hunk" and "line" callbacks are optional, and the text diff of the
957 * files will only be calculated if they are not NULL. Of course, these
958 * callbacks will not be invoked for binary files on the diff or for
959 * files whose only changed is a file mode change.
961 * Returning a non-zero value from any of the callbacks will terminate
962 * the iteration and return the value to the user.
964 * @param diff A git_diff generated by one of the above functions.
965 * @param file_cb Callback function to make per file in the diff.
966 * @param binary_cb Optional callback to make for binary files.
967 * @param hunk_cb Optional callback to make per hunk of text diff. This
968 * callback is called to describe a range of lines in the
969 * diff. It will not be issued for binary files.
970 * @param line_cb Optional callback to make per line of diff text. This
971 * same callback will be made for context lines, added, and
972 * removed lines, and even for a deleted trailing newline.
973 * @param payload Reference pointer that will be passed to your callbacks.
974 * @return 0 on success, non-zero callback return value, or error code
976 GIT_EXTERN(int) git_diff_foreach(
978 git_diff_file_cb file_cb
,
979 git_diff_binary_cb binary_cb
,
980 git_diff_hunk_cb hunk_cb
,
981 git_diff_line_cb line_cb
,
985 * Look up the single character abbreviation for a delta status code.
987 * When you run `git diff --name-status` it uses single letter codes in
988 * the output such as 'A' for added, 'D' for deleted, 'M' for modified,
989 * etc. This function converts a git_delta_t value into these letters for
990 * your own purposes. GIT_DELTA_UNTRACKED will return a space (i.e. ' ').
992 * @param status The git_delta_t value to look up
993 * @return The single character label for that code
995 GIT_EXTERN(char) git_diff_status_char(git_delta_t status
);
998 * Possible output formats for diff data
1001 GIT_DIFF_FORMAT_PATCH
= 1u, /**< full git diff */
1002 GIT_DIFF_FORMAT_PATCH_HEADER
= 2u, /**< just the file headers of patch */
1003 GIT_DIFF_FORMAT_RAW
= 3u, /**< like git diff --raw */
1004 GIT_DIFF_FORMAT_NAME_ONLY
= 4u, /**< like git diff --name-only */
1005 GIT_DIFF_FORMAT_NAME_STATUS
= 5u, /**< like git diff --name-status */
1006 } git_diff_format_t
;
1009 * Iterate over a diff generating formatted text output.
1011 * Returning a non-zero value from the callbacks will terminate the
1012 * iteration and return the non-zero value to the caller.
1014 * @param diff A git_diff generated by one of the above functions.
1015 * @param format A git_diff_format_t value to pick the text format.
1016 * @param print_cb Callback to make per line of diff text.
1017 * @param payload Reference pointer that will be passed to your callback.
1018 * @return 0 on success, non-zero callback return value, or error code
1020 GIT_EXTERN(int) git_diff_print(
1022 git_diff_format_t format
,
1023 git_diff_line_cb print_cb
,
1034 * Directly run a diff on two blobs.
1036 * Compared to a file, a blob lacks some contextual information. As such,
1037 * the `git_diff_file` given to the callback will have some fake data; i.e.
1038 * `mode` will be 0 and `path` will be NULL.
1040 * NULL is allowed for either `old_blob` or `new_blob` and will be treated
1041 * as an empty blob, with the `oid` set to NULL in the `git_diff_file` data.
1042 * Passing NULL for both blobs is a noop; no callbacks will be made at all.
1044 * We do run a binary content check on the blob content and if either blob
1045 * looks like binary data, the `git_diff_delta` binary attribute will be set
1046 * to 1 and no call to the hunk_cb nor line_cb will be made (unless you pass
1047 * `GIT_DIFF_FORCE_TEXT` of course).
1049 * @param old_blob Blob for old side of diff, or NULL for empty blob
1050 * @param old_as_path Treat old blob as if it had this filename; can be NULL
1051 * @param new_blob Blob for new side of diff, or NULL for empty blob
1052 * @param new_as_path Treat new blob as if it had this filename; can be NULL
1053 * @param options Options for diff, or NULL for default options
1054 * @param file_cb Callback for "file"; made once if there is a diff; can be NULL
1055 * @param binary_cb Callback for binary files; can be NULL
1056 * @param hunk_cb Callback for each hunk in diff; can be NULL
1057 * @param line_cb Callback for each line in diff; can be NULL
1058 * @param payload Payload passed to each callback function
1059 * @return 0 on success, non-zero callback return value, or error code
1061 GIT_EXTERN(int) git_diff_blobs(
1062 const git_blob
*old_blob
,
1063 const char *old_as_path
,
1064 const git_blob
*new_blob
,
1065 const char *new_as_path
,
1066 const git_diff_options
*options
,
1067 git_diff_file_cb file_cb
,
1068 git_diff_binary_cb binary_cb
,
1069 git_diff_hunk_cb hunk_cb
,
1070 git_diff_line_cb line_cb
,
1074 * Directly run a diff between a blob and a buffer.
1076 * As with `git_diff_blobs`, comparing a blob and buffer lacks some context,
1077 * so the `git_diff_file` parameters to the callbacks will be faked a la the
1078 * rules for `git_diff_blobs()`.
1080 * Passing NULL for `old_blob` will be treated as an empty blob (i.e. the
1081 * `file_cb` will be invoked with GIT_DELTA_ADDED and the diff will be the
1082 * entire content of the buffer added). Passing NULL to the buffer will do
1083 * the reverse, with GIT_DELTA_REMOVED and blob content removed.
1085 * @param old_blob Blob for old side of diff, or NULL for empty blob
1086 * @param old_as_path Treat old blob as if it had this filename; can be NULL
1087 * @param buffer Raw data for new side of diff, or NULL for empty
1088 * @param buffer_len Length of raw data for new side of diff
1089 * @param buffer_as_path Treat buffer as if it had this filename; can be NULL
1090 * @param options Options for diff, or NULL for default options
1091 * @param file_cb Callback for "file"; made once if there is a diff; can be NULL
1092 * @param binary_cb Callback for binary files; can be NULL
1093 * @param hunk_cb Callback for each hunk in diff; can be NULL
1094 * @param line_cb Callback for each line in diff; can be NULL
1095 * @param payload Payload passed to each callback function
1096 * @return 0 on success, non-zero callback return value, or error code
1098 GIT_EXTERN(int) git_diff_blob_to_buffer(
1099 const git_blob
*old_blob
,
1100 const char *old_as_path
,
1103 const char *buffer_as_path
,
1104 const git_diff_options
*options
,
1105 git_diff_file_cb file_cb
,
1106 git_diff_binary_cb binary_cb
,
1107 git_diff_hunk_cb hunk_cb
,
1108 git_diff_line_cb line_cb
,
1112 * Directly run a diff between two buffers.
1114 * Even more than with `git_diff_blobs`, comparing two buffer lacks
1115 * context, so the `git_diff_file` parameters to the callbacks will be
1116 * faked a la the rules for `git_diff_blobs()`.
1118 * @param old_buffer Raw data for old side of diff, or NULL for empty
1119 * @param old_len Length of the raw data for old side of the diff
1120 * @param old_as_path Treat old buffer as if it had this filename; can be NULL
1121 * @param new_buffer Raw data for new side of diff, or NULL for empty
1122 * @param new_len Length of raw data for new side of diff
1123 * @param new_as_path Treat buffer as if it had this filename; can be NULL
1124 * @param options Options for diff, or NULL for default options
1125 * @param file_cb Callback for "file"; made once if there is a diff; can be NULL
1126 * @param binary_cb Callback for binary files; can be NULL
1127 * @param hunk_cb Callback for each hunk in diff; can be NULL
1128 * @param line_cb Callback for each line in diff; can be NULL
1129 * @param payload Payload passed to each callback function
1130 * @return 0 on success, non-zero callback return value, or error code
1132 GIT_EXTERN(int) git_diff_buffers(
1133 const void *old_buffer
,
1135 const char *old_as_path
,
1136 const void *new_buffer
,
1138 const char *new_as_path
,
1139 const git_diff_options
*options
,
1140 git_diff_file_cb file_cb
,
1141 git_diff_binary_cb binary_cb
,
1142 git_diff_hunk_cb hunk_cb
,
1143 git_diff_line_cb line_cb
,
1147 * This is an opaque structure which is allocated by `git_diff_get_stats`.
1148 * You are responsible for releasing the object memory when done, using the
1149 * `git_diff_stats_free()` function.
1151 typedef struct git_diff_stats git_diff_stats
;
1154 * Formatting options for diff stats
1158 GIT_DIFF_STATS_NONE
= 0,
1160 /** Full statistics, equivalent of `--stat` */
1161 GIT_DIFF_STATS_FULL
= (1u << 0),
1163 /** Short statistics, equivalent of `--shortstat` */
1164 GIT_DIFF_STATS_SHORT
= (1u << 1),
1166 /** Number statistics, equivalent of `--numstat` */
1167 GIT_DIFF_STATS_NUMBER
= (1u << 2),
1169 /** Extended header information such as creations, renames and mode changes, equivalent of `--summary` */
1170 GIT_DIFF_STATS_INCLUDE_SUMMARY
= (1u << 3),
1171 } git_diff_stats_format_t
;
1174 * Accumlate diff statistics for all patches.
1176 * @param out Structure containg the diff statistics.
1177 * @param diff A git_diff generated by one of the above functions.
1178 * @return 0 on success; non-zero on error
1180 GIT_EXTERN(int) git_diff_get_stats(
1181 git_diff_stats
**out
,
1185 * Get the total number of files changed in a diff
1187 * @param stats A `git_diff_stats` generated by one of the above functions.
1188 * @return total number of files changed in the diff
1190 GIT_EXTERN(size_t) git_diff_stats_files_changed(
1191 const git_diff_stats
*stats
);
1194 * Get the total number of insertions in a diff
1196 * @param stats A `git_diff_stats` generated by one of the above functions.
1197 * @return total number of insertions in the diff
1199 GIT_EXTERN(size_t) git_diff_stats_insertions(
1200 const git_diff_stats
*stats
);
1203 * Get the total number of deletions in a diff
1205 * @param stats A `git_diff_stats` generated by one of the above functions.
1206 * @return total number of deletions in the diff
1208 GIT_EXTERN(size_t) git_diff_stats_deletions(
1209 const git_diff_stats
*stats
);
1212 * Print diff statistics to a `git_buf`.
1214 * @param out buffer to store the formatted diff statistics in.
1215 * @param stats A `git_diff_stats` generated by one of the above functions.
1216 * @param format Formatting option.
1217 * @param width Target width for output (only affects GIT_DIFF_STATS_FULL)
1218 * @return 0 on success; non-zero on error
1220 GIT_EXTERN(int) git_diff_stats_to_buf(
1222 const git_diff_stats
*stats
,
1223 git_diff_stats_format_t format
,
1227 * Deallocate a `git_diff_stats`.
1229 * @param stats The previously created statistics object;
1230 * cannot be used after free.
1232 GIT_EXTERN(void) git_diff_stats_free(git_diff_stats
*stats
);
1235 * Formatting options for diff e-mail generation
1238 /** Normal patch, the default */
1239 GIT_DIFF_FORMAT_EMAIL_NONE
= 0,
1241 /** Don't insert "[PATCH]" in the subject header*/
1242 GIT_DIFF_FORMAT_EMAIL_EXCLUDE_SUBJECT_PATCH_MARKER
= (1 << 0),
1244 } git_diff_format_email_flags_t
;
1247 * Options for controlling the formatting of the generated e-mail.
1250 unsigned int version
;
1252 git_diff_format_email_flags_t flags
;
1254 /** This patch number */
1257 /** Total number of patches in this series */
1258 size_t total_patches
;
1260 /** id to use for the commit */
1263 /** Summary of the change */
1264 const char *summary
;
1266 /** Author of the change */
1267 const git_signature
*author
;
1268 } git_diff_format_email_options
;
1270 #define GIT_DIFF_FORMAT_EMAIL_OPTIONS_VERSION 1
1271 #define GIT_DIFF_FORMAT_EMAIL_OPTIONS_INIT {GIT_DIFF_FORMAT_EMAIL_OPTIONS_VERSION, 0, 1, 1, NULL, NULL, NULL}
1274 * Create an e-mail ready patch from a diff.
1276 * @param out buffer to store the e-mail patch in
1277 * @param diff containing the commit
1278 * @param opts structure with options to influence content and formatting.
1279 * @return 0 or an error code
1281 GIT_EXTERN(int) git_diff_format_email(
1284 const git_diff_format_email_options
*opts
);
1287 * Create an e-mail ready patch for a commit.
1289 * Does not support creating patches for merge commits (yet).
1291 * @param out buffer to store the e-mail patch in
1292 * @param repo containing the commit
1293 * @param commit pointer to up commit
1294 * @param patch_no patch number of the commit
1295 * @param total_patches total number of patches in the patch set
1296 * @param flags determines the formatting of the e-mail
1297 * @param diff_opts structure with options to influence diff or NULL for defaults.
1298 * @return 0 or an error code
1300 GIT_EXTERN(int) git_diff_commit_as_email(
1302 git_repository
*repo
,
1305 size_t total_patches
,
1306 git_diff_format_email_flags_t flags
,
1307 const git_diff_options
*diff_opts
);
1310 * Initializes a `git_diff_format_email_options` with default values.
1312 * Equivalent to creating an instance with GIT_DIFF_FORMAT_EMAIL_OPTIONS_INIT.
1314 * @param opts The `git_diff_format_email_options` struct to initialize
1315 * @param version Version of struct; pass `GIT_DIFF_FORMAT_EMAIL_OPTIONS_VERSION`
1316 * @return Zero on success; -1 on failure.
1318 GIT_EXTERN(int) git_diff_format_email_init_options(
1319 git_diff_format_email_options
*opts
,
1320 unsigned int version
);