[libgit2.git] / src / sha1.c

/*
 * Copyright (C) 2009-2011 the libgit2 contributors
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */

#include "common.h"
#include "sha1.h"

#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))

/*
 * Force usage of rol or ror by selecting the one with the smaller constant.
 * It _can_ generate slightly smaller code (a constant of 1 is special), but
 * perhaps more importantly it's possibly faster on any uarch that does a
 * rotate with a loop.
 */

#define SHA_ASM(op, x, n) ({ unsigned int __res; __asm__(op " %1,%0":"=r" (__res):"i" (n), "0" (x)); __res; })
#define SHA_ROL(x,n)	SHA_ASM("rol", x, n)
#define SHA_ROR(x,n)	SHA_ASM("ror", x, n)

#else

#define SHA_ROT(X,l,r)	(((X) << (l)) | ((X) >> (r)))
#define SHA_ROL(X,n)	SHA_ROT(X,n,32-(n))
#define SHA_ROR(X,n)	SHA_ROT(X,32-(n),n)

#endif

/*
 * If you have 32 registers or more, the compiler can (and should)
 * try to change the array[] accesses into registers. However, on
 * machines with less than ~25 registers, that won't really work,
 * and at least gcc will make an unholy mess of it.
 *
 * So to avoid that mess which just slows things down, we force
 * the stores to memory to actually happen (we might be better off
 * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as
 * suggested by Artur Skawina - that will also make gcc unable to
 * try to do the silly "optimize away loads" part because it won't
 * see what the value will be).
 *
 * Ben Herrenschmidt reports that on PPC, the C version comes close
 * to the optimized asm with this (ie on PPC you don't want that
 * 'volatile', since there are lots of registers).
 *
 * On ARM we get the best code generation by forcing a full memory barrier
 * between each SHA_ROUND, otherwise gcc happily get wild with spilling and
 * the stack frame size simply explode and performance goes down the drain.
 */

#if defined(__i386__) || defined(__x86_64__)
 #define setW(x, val) (*(volatile unsigned int *)&W(x) = (val))
#elif defined(__GNUC__) && defined(__arm__)
 #define setW(x, val) do { W(x) = (val); __asm__("":::"memory"); } while (0)
#else
 #define setW(x, val) (W(x) = (val))
#endif

/*
 * Performance might be improved if the CPU architecture is OK with
 * unaligned 32-bit loads and a fast ntohl() is available.
 * Otherwise fall back to byte loads and shifts which is portable,
 * and is faster on architectures with memory alignment issues.
 */

#if defined(__i386__) || defined(__x86_64__) || \
	defined(_M_IX86) || defined(_M_X64) || \
	defined(__ppc__) || defined(__ppc64__) || \
	defined(__powerpc__) || defined(__powerpc64__) || \
	defined(__s390__) || defined(__s390x__)

#define get_be32(p)	ntohl(*(const unsigned int *)(p))
#define put_be32(p, v)	do { *(unsigned int *)(p) = htonl(v); } while (0)

#else

#define get_be32(p)	( \
	(*((const unsigned char *)(p) + 0) << 24) | \
	(*((const unsigned char *)(p) + 1) << 16) | \
	(*((const unsigned char *)(p) + 2) << 8) | \
	(*((const unsigned char *)(p) + 3) << 0) )
#define put_be32(p, v)	do { \
	unsigned int __v = (v); \
	*((unsigned char *)(p) + 0) = __v >> 24; \
	*((unsigned char *)(p) + 1) = __v >> 16; \
	*((unsigned char *)(p) + 2) = __v >> 8; \
	*((unsigned char *)(p) + 3) = __v >> 0; } while (0)

#endif

/* This "rolls" over the 512-bit array */
#define W(x) (array[(x)&15])

/*
 * Where do we get the source from? The first 16 iterations get it from
 * the input data, the next mix it from the 512-bit array.
 */
#define SHA_SRC(t) get_be32(data + t)
#define SHA_MIX(t) SHA_ROL(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1)

#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \
	unsigned int TEMP = input(t); setW(t, TEMP); \
	E += TEMP + SHA_ROL(A,5) + (fn) + (constant); \
	B = SHA_ROR(B, 2); } while (0)

#define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E )
#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E )
#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0xca62c1d6, A, B, C, D, E )

static void blk_SHA1_Block(blk_SHA_CTX *ctx, const unsigned int *data)
{
	unsigned int A,B,C,D,E;
	unsigned int array[16];

	A = ctx->H[0];
	B = ctx->H[1];
	C = ctx->H[2];
	D = ctx->H[3];
	E = ctx->H[4];

	/* Round 1 - iterations 0-16 take their input from 'data' */
	T_0_15( 0, A, B, C, D, E);
	T_0_15( 1, E, A, B, C, D);
	T_0_15( 2, D, E, A, B, C);
	T_0_15( 3, C, D, E, A, B);
	T_0_15( 4, B, C, D, E, A);
	T_0_15( 5, A, B, C, D, E);
	T_0_15( 6, E, A, B, C, D);
	T_0_15( 7, D, E, A, B, C);
	T_0_15( 8, C, D, E, A, B);
	T_0_15( 9, B, C, D, E, A);
	T_0_15(10, A, B, C, D, E);
	T_0_15(11, E, A, B, C, D);
	T_0_15(12, D, E, A, B, C);
	T_0_15(13, C, D, E, A, B);
	T_0_15(14, B, C, D, E, A);
	T_0_15(15, A, B, C, D, E);

	/* Round 1 - tail. Input from 512-bit mixing array */
	T_16_19(16, E, A, B, C, D);
	T_16_19(17, D, E, A, B, C);
	T_16_19(18, C, D, E, A, B);
	T_16_19(19, B, C, D, E, A);

	/* Round 2 */
	T_20_39(20, A, B, C, D, E);
	T_20_39(21, E, A, B, C, D);
	T_20_39(22, D, E, A, B, C);
	T_20_39(23, C, D, E, A, B);
	T_20_39(24, B, C, D, E, A);
	T_20_39(25, A, B, C, D, E);
	T_20_39(26, E, A, B, C, D);
	T_20_39(27, D, E, A, B, C);
	T_20_39(28, C, D, E, A, B);
	T_20_39(29, B, C, D, E, A);
	T_20_39(30, A, B, C, D, E);
	T_20_39(31, E, A, B, C, D);
	T_20_39(32, D, E, A, B, C);
	T_20_39(33, C, D, E, A, B);
	T_20_39(34, B, C, D, E, A);
	T_20_39(35, A, B, C, D, E);
	T_20_39(36, E, A, B, C, D);
	T_20_39(37, D, E, A, B, C);
	T_20_39(38, C, D, E, A, B);
	T_20_39(39, B, C, D, E, A);

	/* Round 3 */
	T_40_59(40, A, B, C, D, E);
	T_40_59(41, E, A, B, C, D);
	T_40_59(42, D, E, A, B, C);
	T_40_59(43, C, D, E, A, B);
	T_40_59(44, B, C, D, E, A);
	T_40_59(45, A, B, C, D, E);
	T_40_59(46, E, A, B, C, D);
	T_40_59(47, D, E, A, B, C);
	T_40_59(48, C, D, E, A, B);
	T_40_59(49, B, C, D, E, A);
	T_40_59(50, A, B, C, D, E);
	T_40_59(51, E, A, B, C, D);
	T_40_59(52, D, E, A, B, C);
	T_40_59(53, C, D, E, A, B);
	T_40_59(54, B, C, D, E, A);
	T_40_59(55, A, B, C, D, E);
	T_40_59(56, E, A, B, C, D);
	T_40_59(57, D, E, A, B, C);
	T_40_59(58, C, D, E, A, B);
	T_40_59(59, B, C, D, E, A);

	/* Round 4 */
	T_60_79(60, A, B, C, D, E);
	T_60_79(61, E, A, B, C, D);
	T_60_79(62, D, E, A, B, C);
	T_60_79(63, C, D, E, A, B);
	T_60_79(64, B, C, D, E, A);
	T_60_79(65, A, B, C, D, E);
	T_60_79(66, E, A, B, C, D);
	T_60_79(67, D, E, A, B, C);
	T_60_79(68, C, D, E, A, B);
	T_60_79(69, B, C, D, E, A);
	T_60_79(70, A, B, C, D, E);
	T_60_79(71, E, A, B, C, D);
	T_60_79(72, D, E, A, B, C);
	T_60_79(73, C, D, E, A, B);
	T_60_79(74, B, C, D, E, A);
	T_60_79(75, A, B, C, D, E);
	T_60_79(76, E, A, B, C, D);
	T_60_79(77, D, E, A, B, C);
	T_60_79(78, C, D, E, A, B);
	T_60_79(79, B, C, D, E, A);

	ctx->H[0] += A;
	ctx->H[1] += B;
	ctx->H[2] += C;
	ctx->H[3] += D;
	ctx->H[4] += E;
}

void git__blk_SHA1_Init(blk_SHA_CTX *ctx)
{
	ctx->size = 0;

	/* Initialize H with the magic constants (see FIPS180 for constants) */
	ctx->H[0] = 0x67452301;
	ctx->H[1] = 0xefcdab89;
	ctx->H[2] = 0x98badcfe;
	ctx->H[3] = 0x10325476;
	ctx->H[4] = 0xc3d2e1f0;
}

void git__blk_SHA1_Update(blk_SHA_CTX *ctx, const void *data, unsigned long len)
{
	unsigned int lenW = ctx->size & 63;

	ctx->size += len;

	/* Read the data into W and process blocks as they get full */
	if (lenW) {
		unsigned int left = 64 - lenW;
		if (len < left)
			left = len;
		memcpy(lenW + (char *)ctx->W, data, left);
		lenW = (lenW + left) & 63;
		len -= left;
		data = ((const char *)data + left);
		if (lenW)
			return;
		blk_SHA1_Block(ctx, ctx->W);
	}
	while (len >= 64) {
		blk_SHA1_Block(ctx, data);
		data = ((const char *)data + 64);
		len -= 64;
	}
	if (len)
		memcpy(ctx->W, data, len);
}

void git__blk_SHA1_Final(unsigned char hashout[20], blk_SHA_CTX *ctx)
{
	static const unsigned char pad[64] = { 0x80 };
	unsigned int padlen[2];
	int i;

	/* Pad with a binary 1 (ie 0x80), then zeroes, then length */
	padlen[0] = htonl((uint32_t)(ctx->size >> 29));
	padlen[1] = htonl((uint32_t)(ctx->size << 3));

	i = ctx->size & 63;
	git__blk_SHA1_Update(ctx, pad, 1+ (63 & (55 - i)));
	git__blk_SHA1_Update(ctx, padlen, 8);

	/* Output hash */
	for (i = 0; i < 5; i++)
		put_be32(hashout + i*4, ctx->H[i]);
}
Commit	Line	Data
5dddf7c8	1	/*
bb742ede	2	* Copyright (C) 2009-2011 the libgit2 contributors
5dddf7c8	3	*
bb742ede VM	4	* This file is part of libgit2, distributed under the GNU GPL v2 with
bb742ede VM	5	* a Linking Exception. For full terms see the included COPYING file.
5dddf7c8 AE	6	*/
	7
	8	#include "common.h"
	9	#include "sha1.h"
	10
	11	#if defined(__GNUC__) && (defined(__i386__) \|\| defined(__x86_64__))
	12
	13	/*
	14	* Force usage of rol or ror by selecting the one with the smaller constant.
	15	* It _can_ generate slightly smaller code (a constant of 1 is special), but
	16	* perhaps more importantly it's possibly faster on any uarch that does a
	17	* rotate with a loop.
	18	*/
	19
	20	#define SHA_ASM(op, x, n) ({ unsigned int __res; __asm__(op " %1,%0":"=r" (__res):"i" (n), "0" (x)); __res; })
	21	#define SHA_ROL(x,n) SHA_ASM("rol", x, n)
	22	#define SHA_ROR(x,n) SHA_ASM("ror", x, n)
	23
	24	#else
	25
	26	#define SHA_ROT(X,l,r) (((X) << (l)) \| ((X) >> (r)))
	27	#define SHA_ROL(X,n) SHA_ROT(X,n,32-(n))
	28	#define SHA_ROR(X,n) SHA_ROT(X,32-(n),n)
	29
	30	#endif
	31
	32	/*
	33	* If you have 32 registers or more, the compiler can (and should)
	34	* try to change the array[] accesses into registers. However, on
	35	* machines with less than ~25 registers, that won't really work,
	36	* and at least gcc will make an unholy mess of it.
	37	*
	38	* So to avoid that mess which just slows things down, we force
	39	* the stores to memory to actually happen (we might be better off
	40	* with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as
	41	* suggested by Artur Skawina - that will also make gcc unable to
	42	* try to do the silly "optimize away loads" part because it won't
	43	* see what the value will be).
	44	*
	45	* Ben Herrenschmidt reports that on PPC, the C version comes close
	46	* to the optimized asm with this (ie on PPC you don't want that
	47	* 'volatile', since there are lots of registers).
	48	*
	49	* On ARM we get the best code generation by forcing a full memory barrier
	50	* between each SHA_ROUND, otherwise gcc happily get wild with spilling and
	51	* the stack frame size simply explode and performance goes down the drain.
	52	*/
	53
	54	#if defined(__i386__) \|\| defined(__x86_64__)
87d9869f	55	#define setW(x, val) ((volatile unsigned int )&W(x) = (val))
5dddf7c8	56	#elif defined(__GNUC__) && defined(__arm__)
87d9869f	57	#define setW(x, val) do { W(x) = (val); __asm__("":::"memory"); } while (0)
5dddf7c8	58	#else
87d9869f	59	#define setW(x, val) (W(x) = (val))
5dddf7c8 AE	60	#endif
	61
	62	/*
	63	* Performance might be improved if the CPU architecture is OK with
	64	* unaligned 32-bit loads and a fast ntohl() is available.
	65	* Otherwise fall back to byte loads and shifts which is portable,
	66	* and is faster on architectures with memory alignment issues.
	67	*/
	68
	69	#if defined(__i386__) \|\| defined(__x86_64__) \|\| \
87d9869f VM	70	defined(_M_IX86) \|\| defined(_M_X64) \|\| \
	71	defined(__ppc__) \|\| defined(__ppc64__) \|\| \
	72	defined(__powerpc__) \|\| defined(__powerpc64__) \|\| \
	73	defined(__s390__) \|\| defined(__s390x__)
5dddf7c8	74
4414b355	75	#define get_be32(p) ntohl((const unsigned int )(p))
5dddf7c8 AE	76	#define put_be32(p, v) do { (unsigned int )(p) = htonl(v); } while (0)
	77
	78	#else
	79
	80	#define get_be32(p) ( \
4414b355 KS	81	(((const unsigned char )(p) + 0) << 24) \| \
4414b355 KS	82	(((const unsigned char )(p) + 1) << 16) \| \
87d9869f VM	83	(((const unsigned char )(p) + 2) << 8) \| \
87d9869f VM	84	(((const unsigned char )(p) + 3) << 0) )
5dddf7c8 AE	85	#define put_be32(p, v) do { \
	86	unsigned int __v = (v); \
	87	((unsigned char )(p) + 0) = __v >> 24; \
	88	((unsigned char )(p) + 1) = __v >> 16; \
87d9869f VM	89	((unsigned char )(p) + 2) = __v >> 8; \
87d9869f VM	90	((unsigned char )(p) + 3) = __v >> 0; } while (0)
5dddf7c8 AE	91
	92	#endif
	93
	94	/* This "rolls" over the 512-bit array */
	95	#define W(x) (array[(x)&15])
	96
	97	/*
	98	* Where do we get the source from? The first 16 iterations get it from
	99	* the input data, the next mix it from the 512-bit array.
	100	*/
	101	#define SHA_SRC(t) get_be32(data + t)
	102	#define SHA_MIX(t) SHA_ROL(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1)
	103
	104	#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \
	105	unsigned int TEMP = input(t); setW(t, TEMP); \
	106	E += TEMP + SHA_ROL(A,5) + (fn) + (constant); \
	107	B = SHA_ROR(B, 2); } while (0)
	108
87d9869f	109	#define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
5dddf7c8 AE	110	#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
	111	#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E )
	112	#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E )
87d9869f	113	#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0xca62c1d6, A, B, C, D, E )
5dddf7c8 AE	114
	115	static void blk_SHA1_Block(blk_SHA_CTX ctx, const unsigned int data)
	116	{
	117	unsigned int A,B,C,D,E;
	118	unsigned int array[16];
	119
	120	A = ctx->H[0];
	121	B = ctx->H[1];
	122	C = ctx->H[2];
	123	D = ctx->H[3];
	124	E = ctx->H[4];
	125
	126	/* Round 1 - iterations 0-16 take their input from 'data' */
	127	T_0_15( 0, A, B, C, D, E);
	128	T_0_15( 1, E, A, B, C, D);
	129	T_0_15( 2, D, E, A, B, C);
	130	T_0_15( 3, C, D, E, A, B);
	131	T_0_15( 4, B, C, D, E, A);
	132	T_0_15( 5, A, B, C, D, E);
	133	T_0_15( 6, E, A, B, C, D);
	134	T_0_15( 7, D, E, A, B, C);
	135	T_0_15( 8, C, D, E, A, B);
	136	T_0_15( 9, B, C, D, E, A);
	137	T_0_15(10, A, B, C, D, E);
	138	T_0_15(11, E, A, B, C, D);
	139	T_0_15(12, D, E, A, B, C);
	140	T_0_15(13, C, D, E, A, B);
	141	T_0_15(14, B, C, D, E, A);
	142	T_0_15(15, A, B, C, D, E);
	143
	144	/* Round 1 - tail. Input from 512-bit mixing array */
	145	T_16_19(16, E, A, B, C, D);
	146	T_16_19(17, D, E, A, B, C);
	147	T_16_19(18, C, D, E, A, B);
	148	T_16_19(19, B, C, D, E, A);
	149
	150	/* Round 2 */
	151	T_20_39(20, A, B, C, D, E);
	152	T_20_39(21, E, A, B, C, D);
	153	T_20_39(22, D, E, A, B, C);
	154	T_20_39(23, C, D, E, A, B);
	155	T_20_39(24, B, C, D, E, A);
	156	T_20_39(25, A, B, C, D, E);
	157	T_20_39(26, E, A, B, C, D);
	158	T_20_39(27, D, E, A, B, C);
	159	T_20_39(28, C, D, E, A, B);
	160	T_20_39(29, B, C, D, E, A);
	161	T_20_39(30, A, B, C, D, E);
	162	T_20_39(31, E, A, B, C, D);
	163	T_20_39(32, D, E, A, B, C);
	164	T_20_39(33, C, D, E, A, B);
	165	T_20_39(34, B, C, D, E, A);
	166	T_20_39(35, A, B, C, D, E);
	167	T_20_39(36, E, A, B, C, D);
	168	T_20_39(37, D, E, A, B, C);
	169	T_20_39(38, C, D, E, A, B);
	170	T_20_39(39, B, C, D, E, A);
	171
	172	/* Round 3 */
	173	T_40_59(40, A, B, C, D, E);
	174	T_40_59(41, E, A, B, C, D);
	175	T_40_59(42, D, E, A, B, C);
	176	T_40_59(43, C, D, E, A, B);
	177	T_40_59(44, B, C, D, E, A);
178	T_40_59(45, A, B, C, D, E);
179	T_40_59(46, E, A, B, C, D);
180	T_40_59(47, D, E, A, B, C);
181	T_40_59(48, C, D, E, A, B);
182	T_40_59(49, B, C, D, E, A);
183	T_40_59(50, A, B, C, D, E);
184	T_40_59(51, E, A, B, C, D);
185	T_40_59(52, D, E, A, B, C);
186	T_40_59(53, C, D, E, A, B);
187	T_40_59(54, B, C, D, E, A);
188	T_40_59(55, A, B, C, D, E);
189	T_40_59(56, E, A, B, C, D);
190	T_40_59(57, D, E, A, B, C);
191	T_40_59(58, C, D, E, A, B);
192	T_40_59(59, B, C, D, E, A);
193
194	/* Round 4 */
195	T_60_79(60, A, B, C, D, E);
196	T_60_79(61, E, A, B, C, D);
197	T_60_79(62, D, E, A, B, C);
198	T_60_79(63, C, D, E, A, B);
199	T_60_79(64, B, C, D, E, A);
200	T_60_79(65, A, B, C, D, E);
201	T_60_79(66, E, A, B, C, D);
202	T_60_79(67, D, E, A, B, C);
203	T_60_79(68, C, D, E, A, B);
204	T_60_79(69, B, C, D, E, A);
205	T_60_79(70, A, B, C, D, E);
206	T_60_79(71, E, A, B, C, D);
207	T_60_79(72, D, E, A, B, C);
208	T_60_79(73, C, D, E, A, B);
209	T_60_79(74, B, C, D, E, A);
210	T_60_79(75, A, B, C, D, E);
211	T_60_79(76, E, A, B, C, D);
212	T_60_79(77, D, E, A, B, C);
213	T_60_79(78, C, D, E, A, B);
214	T_60_79(79, B, C, D, E, A);
215
216	ctx->H[0] += A;
217	ctx->H[1] += B;
218	ctx->H[2] += C;
219	ctx->H[3] += D;
220	ctx->H[4] += E;
221	}
222
4386ee2a	223	void git__blk_SHA1_Init(blk_SHA_CTX *ctx)
5dddf7c8 AE	224	{
	225	ctx->size = 0;
	226
	227	/* Initialize H with the magic constants (see FIPS180 for constants) */
	228	ctx->H[0] = 0x67452301;
	229	ctx->H[1] = 0xefcdab89;
	230	ctx->H[2] = 0x98badcfe;
	231	ctx->H[3] = 0x10325476;
	232	ctx->H[4] = 0xc3d2e1f0;
	233	}
	234
4386ee2a	235	void git__blk_SHA1_Update(blk_SHA_CTX ctx, const void data, unsigned long len)
5dddf7c8	236	{
e272b103	237	unsigned int lenW = ctx->size & 63;
5dddf7c8 AE	238
	239	ctx->size += len;
	240
	241	/* Read the data into W and process blocks as they get full */
	242	if (lenW) {
e272b103	243	unsigned int left = 64 - lenW;
5dddf7c8 AE	244	if (len < left)
	245	left = len;
	246	memcpy(lenW + (char *)ctx->W, data, left);
	247	lenW = (lenW + left) & 63;
	248	len -= left;
	249	data = ((const char *)data + left);
	250	if (lenW)
	251	return;
	252	blk_SHA1_Block(ctx, ctx->W);
	253	}
	254	while (len >= 64) {
	255	blk_SHA1_Block(ctx, data);
	256	data = ((const char *)data + 64);
	257	len -= 64;
	258	}
	259	if (len)
	260	memcpy(ctx->W, data, len);
	261	}
	262
4386ee2a	263	void git__blk_SHA1_Final(unsigned char hashout[20], blk_SHA_CTX *ctx)
5dddf7c8 AE	264	{
	265	static const unsigned char pad[64] = { 0x80 };
	266	unsigned int padlen[2];
	267	int i;
	268
	269	/* Pad with a binary 1 (ie 0x80), then zeroes, then length */
e272b103 RJ	270	padlen[0] = htonl((uint32_t)(ctx->size >> 29));
e272b103 RJ	271	padlen[1] = htonl((uint32_t)(ctx->size << 3));
5dddf7c8 AE	272
5dddf7c8 AE	273	i = ctx->size & 63;
4386ee2a RJ	274	git__blk_SHA1_Update(ctx, pad, 1+ (63 & (55 - i)));
4386ee2a RJ	275	git__blk_SHA1_Update(ctx, padlen, 8);
5dddf7c8 AE	276
	277	/* Output hash */
	278	for (i = 0; i < 5; i++)
	279	put_be32(hashout + i*4, ctx->H[i]);
	280	}