]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * CDDL HEADER START | |
3 | * | |
4 | * This file and its contents are supplied under the terms of the | |
5 | * Common Development and Distribution License ("CDDL"), version 1.0. | |
6 | * You may only use this file in accordance with the terms of version | |
7 | * 1.0 of the CDDL. | |
8 | * | |
9 | * A full copy of the text of the CDDL should have accompanied this | |
10 | * source. A copy of the CDDL is also available via the Internet at | |
11 | * http://www.illumos.org/license/CDDL. | |
12 | * | |
13 | * CDDL HEADER END | |
14 | */ | |
15 | ||
16 | /* | |
17 | * Copyright (c) 2013, 2016 by Delphix. All rights reserved. | |
18 | */ | |
19 | ||
20 | #include <sys/zfs_context.h> | |
21 | #include <sys/zio.h> | |
22 | #include <sys/zio_compress.h> | |
23 | ||
24 | /* | |
25 | * Embedded-data Block Pointers | |
26 | * | |
27 | * Normally, block pointers point (via their DVAs) to a block which holds data. | |
28 | * If the data that we need to store is very small, this is an inefficient | |
29 | * use of space, because a block must be at minimum 1 sector (typically 512 | |
30 | * bytes or 4KB). Additionally, reading these small blocks tends to generate | |
31 | * more random reads. | |
32 | * | |
33 | * Embedded-data Block Pointers allow small pieces of data (the "payload", | |
34 | * up to 112 bytes) to be stored in the block pointer itself, instead of | |
35 | * being pointed to. The "Pointer" part of this name is a bit of a | |
36 | * misnomer, as nothing is pointed to. | |
37 | * | |
38 | * BP_EMBEDDED_TYPE_DATA block pointers allow highly-compressible data to | |
39 | * be embedded in the block pointer. The logic for this is handled in | |
40 | * the SPA, by the zio pipeline. Therefore most code outside the zio | |
41 | * pipeline doesn't need special-cases to handle these block pointers. | |
42 | * | |
43 | * See spa.h for details on the exact layout of embedded block pointers. | |
44 | */ | |
45 | ||
46 | void | |
47 | encode_embedded_bp_compressed(blkptr_t *bp, void *data, | |
48 | enum zio_compress comp, int uncompressed_size, int compressed_size) | |
49 | { | |
50 | uint64_t *bp64 = (uint64_t *)bp; | |
51 | uint64_t w = 0; | |
52 | uint8_t *data8 = data; | |
53 | ||
54 | ASSERT3U(compressed_size, <=, BPE_PAYLOAD_SIZE); | |
55 | ASSERT(uncompressed_size == compressed_size || | |
56 | comp != ZIO_COMPRESS_OFF); | |
57 | ASSERT3U(comp, >=, ZIO_COMPRESS_OFF); | |
58 | ASSERT3U(comp, <, ZIO_COMPRESS_FUNCTIONS); | |
59 | ||
60 | bzero(bp, sizeof (*bp)); | |
61 | BP_SET_EMBEDDED(bp, B_TRUE); | |
62 | BP_SET_COMPRESS(bp, comp); | |
63 | BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); | |
64 | BPE_SET_LSIZE(bp, uncompressed_size); | |
65 | BPE_SET_PSIZE(bp, compressed_size); | |
66 | ||
67 | /* | |
68 | * Encode the byte array into the words of the block pointer. | |
69 | * First byte goes into low bits of first word (little endian). | |
70 | */ | |
71 | for (int i = 0; i < compressed_size; i++) { | |
72 | BF64_SET(w, (i % sizeof (w)) * NBBY, NBBY, data8[i]); | |
73 | if (i % sizeof (w) == sizeof (w) - 1) { | |
74 | /* we've reached the end of a word */ | |
75 | ASSERT3P(bp64, <, bp + 1); | |
76 | *bp64 = w; | |
77 | bp64++; | |
78 | if (!BPE_IS_PAYLOADWORD(bp, bp64)) | |
79 | bp64++; | |
80 | w = 0; | |
81 | } | |
82 | } | |
83 | /* write last partial word */ | |
84 | if (bp64 < (uint64_t *)(bp + 1)) | |
85 | *bp64 = w; | |
86 | } | |
87 | ||
88 | /* | |
89 | * buf must be at least BPE_GET_PSIZE(bp) bytes long (which will never be | |
90 | * more than BPE_PAYLOAD_SIZE bytes). | |
91 | */ | |
92 | void | |
93 | decode_embedded_bp_compressed(const blkptr_t *bp, void *buf) | |
94 | { | |
95 | int psize; | |
96 | uint8_t *buf8 = buf; | |
97 | uint64_t w = 0; | |
98 | const uint64_t *bp64 = (const uint64_t *)bp; | |
99 | ||
100 | ASSERT(BP_IS_EMBEDDED(bp)); | |
101 | ||
102 | psize = BPE_GET_PSIZE(bp); | |
103 | ||
104 | /* | |
105 | * Decode the words of the block pointer into the byte array. | |
106 | * Low bits of first word are the first byte (little endian). | |
107 | */ | |
108 | for (int i = 0; i < psize; i++) { | |
109 | if (i % sizeof (w) == 0) { | |
110 | /* beginning of a word */ | |
111 | ASSERT3P(bp64, <, bp + 1); | |
112 | w = *bp64; | |
113 | bp64++; | |
114 | if (!BPE_IS_PAYLOADWORD(bp, bp64)) | |
115 | bp64++; | |
116 | } | |
117 | buf8[i] = BF64_GET(w, (i % sizeof (w)) * NBBY, NBBY); | |
118 | } | |
119 | } | |
120 | ||
121 | /* | |
122 | * Fill in the buffer with the (decompressed) payload of the embedded | |
123 | * blkptr_t. Takes into account compression and byteorder (the payload is | |
124 | * treated as a stream of bytes). | |
125 | * Return 0 on success, or ENOSPC if it won't fit in the buffer. | |
126 | */ | |
127 | int | |
128 | decode_embedded_bp(const blkptr_t *bp, void *buf, int buflen) | |
129 | { | |
130 | int lsize, psize; | |
131 | ||
132 | ASSERT(BP_IS_EMBEDDED(bp)); | |
133 | ||
134 | lsize = BPE_GET_LSIZE(bp); | |
135 | psize = BPE_GET_PSIZE(bp); | |
136 | ||
137 | if (lsize > buflen) | |
138 | return (SET_ERROR(ENOSPC)); | |
139 | ASSERT3U(lsize, ==, buflen); | |
140 | ||
141 | if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) { | |
142 | uint8_t dstbuf[BPE_PAYLOAD_SIZE]; | |
143 | decode_embedded_bp_compressed(bp, dstbuf); | |
144 | VERIFY0(zio_decompress_data_buf(BP_GET_COMPRESS(bp), | |
145 | dstbuf, buf, psize, buflen)); | |
146 | } else { | |
147 | ASSERT3U(lsize, ==, psize); | |
148 | decode_embedded_bp_compressed(bp, buf); | |
149 | } | |
150 | ||
151 | return (0); | |
152 | } |