]>
Commit | Line | Data |
---|---|---|
a42089dd JF |
1 | /****************************************************************************** |
2 | * blkif.h | |
3 | * | |
4 | * Unified block-device I/O interface for Xen guest OSes. | |
5 | * | |
6 | * Copyright (c) 2003-2004, Keir Fraser | |
7 | */ | |
8 | ||
9 | #ifndef __XEN_PUBLIC_IO_BLKIF_H__ | |
10 | #define __XEN_PUBLIC_IO_BLKIF_H__ | |
11 | ||
a1ce3928 DH |
12 | #include <xen/interface/io/ring.h> |
13 | #include <xen/interface/grant_table.h> | |
a42089dd JF |
14 | |
15 | /* | |
16 | * Front->back notifications: When enqueuing a new request, sending a | |
17 | * notification can be made conditional on req_event (i.e., the generic | |
18 | * hold-off mechanism provided by the ring macros). Backends must set | |
19 | * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). | |
20 | * | |
21 | * Back->front notifications: When enqueuing a new response, sending a | |
22 | * notification can be made conditional on rsp_event (i.e., the generic | |
23 | * hold-off mechanism provided by the ring macros). Frontends must set | |
24 | * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). | |
25 | */ | |
26 | ||
27 | typedef uint16_t blkif_vdev_t; | |
28 | typedef uint64_t blkif_sector_t; | |
29 | ||
eb5df87f BL |
30 | /* |
31 | * Multiple hardware queues/rings: | |
32 | * If supported, the backend will write the key "multi-queue-max-queues" to | |
33 | * the directory for that vbd, and set its value to the maximum supported | |
34 | * number of queues. | |
35 | * Frontends that are aware of this feature and wish to use it can write the | |
36 | * key "multi-queue-num-queues" with the number they wish to use, which must be | |
37 | * greater than zero, and no more than the value reported by the backend in | |
38 | * "multi-queue-max-queues". | |
39 | * | |
40 | * For frontends requesting just one queue, the usual event-channel and | |
41 | * ring-ref keys are written as before, simplifying the backend processing | |
42 | * to avoid distinguishing between a frontend that doesn't understand the | |
43 | * multi-queue feature, and one that does, but requested only one queue. | |
44 | * | |
45 | * Frontends requesting two or more queues must not write the toplevel | |
46 | * event-channel and ring-ref keys, instead writing those keys under sub-keys | |
47 | * having the name "queue-N" where N is the integer ID of the queue/ring for | |
48 | * which those keys belong. Queues are indexed from zero. | |
49 | * For example, a frontend with two queues must write the following set of | |
50 | * queue-related keys: | |
51 | * | |
52 | * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" | |
53 | * /local/domain/1/device/vbd/0/queue-0 = "" | |
54 | * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>" | |
55 | * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" | |
56 | * /local/domain/1/device/vbd/0/queue-1 = "" | |
57 | * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>" | |
58 | * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" | |
59 | * | |
60 | * It is also possible to use multiple queues/rings together with | |
61 | * feature multi-page ring buffer. | |
62 | * For example, a frontend requests two queues/rings and the size of each ring | |
63 | * buffer is two pages must write the following set of related keys: | |
64 | * | |
65 | * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" | |
66 | * /local/domain/1/device/vbd/0/ring-page-order = "1" | |
67 | * /local/domain/1/device/vbd/0/queue-0 = "" | |
68 | * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>" | |
69 | * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>" | |
70 | * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" | |
71 | * /local/domain/1/device/vbd/0/queue-1 = "" | |
72 | * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>" | |
73 | * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>" | |
74 | * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" | |
75 | * | |
76 | */ | |
77 | ||
a42089dd JF |
78 | /* |
79 | * REQUEST CODES. | |
80 | */ | |
81 | #define BLKIF_OP_READ 0 | |
82 | #define BLKIF_OP_WRITE 1 | |
83 | /* | |
84 | * Recognised only if "feature-barrier" is present in backend xenbus info. | |
85 | * The "feature_barrier" node contains a boolean indicating whether barrier | |
86 | * requests are likely to succeed or fail. Either way, a barrier request | |
87 | * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by | |
88 | * the underlying block-device hardware. The boolean simply indicates whether | |
89 | * or not it is worthwhile for the frontend to attempt barrier requests. | |
90 | * If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not* | |
91 | * create the "feature-barrier" node! | |
92 | */ | |
93 | #define BLKIF_OP_WRITE_BARRIER 2 | |
94 | ||
6dcfb751 KRW |
95 | /* |
96 | * Recognised if "feature-flush-cache" is present in backend xenbus | |
97 | * info. A flush will ask the underlying storage hardware to flush its | |
98 | * non-volatile caches as appropriate. The "feature-flush-cache" node | |
99 | * contains a boolean indicating whether flush requests are likely to | |
100 | * succeed or fail. Either way, a flush request may fail at any time | |
101 | * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying | |
102 | * block-device hardware. The boolean simply indicates whether or not it | |
103 | * is worthwhile for the frontend to attempt flushes. If a backend does | |
104 | * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the | |
105 | * "feature-flush-cache" node! | |
106 | */ | |
107 | #define BLKIF_OP_FLUSH_DISKCACHE 3 | |
32a8d26c LD |
108 | |
109 | /* | |
110 | * Recognised only if "feature-discard" is present in backend xenbus info. | |
111 | * The "feature-discard" node contains a boolean indicating whether trim | |
112 | * (ATA) or unmap (SCSI) - conviently called discard requests are likely | |
113 | * to succeed or fail. Either way, a discard request | |
114 | * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by | |
115 | * the underlying block-device hardware. The boolean simply indicates whether | |
116 | * or not it is worthwhile for the frontend to attempt discard requests. | |
117 | * If a backend does not recognise BLKIF_OP_DISCARD, it should *not* | |
118 | * create the "feature-discard" node! | |
119 | * | |
120 | * Discard operation is a request for the underlying block device to mark | |
121 | * extents to be erased. However, discard does not guarantee that the blocks | |
122 | * will be erased from the device - it is just a hint to the device | |
123 | * controller that these blocks are no longer in use. What the device | |
124 | * controller does with that information is left to the controller. | |
125 | * Discard operations are passed with sector_number as the | |
126 | * sector index to begin discard operations at and nr_sectors as the number of | |
127 | * sectors to be discarded. The specified sectors should be discarded if the | |
128 | * underlying block device supports trim (ATA) or unmap (SCSI) operations, | |
129 | * or a BLKIF_RSP_EOPNOTSUPP should be returned. | |
130 | * More information about trim/unmap operations at: | |
131 | * http://t13.org/Documents/UploadedDocuments/docs2008/ | |
132 | * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc | |
133 | * http://www.seagate.com/staticfiles/support/disc/manuals/ | |
134 | * Interface%20manuals/100293068c.pdf | |
5ea42986 KRW |
135 | * The backend can optionally provide three extra XenBus attributes to |
136 | * further optimize the discard functionality: | |
1c339ef7 | 137 | * 'discard-alignment' - Devices that support discard functionality may |
5ea42986 KRW |
138 | * internally allocate space in units that are bigger than the exported |
139 | * logical block size. The discard-alignment parameter indicates how many bytes | |
140 | * the beginning of the partition is offset from the internal allocation unit's | |
141 | * natural alignment. | |
142 | * 'discard-granularity' - Devices that support discard functionality may | |
143 | * internally allocate space using units that are bigger than the logical block | |
144 | * size. The discard-granularity parameter indicates the size of the internal | |
145 | * allocation unit in bytes if reported by the device. Otherwise the | |
146 | * discard-granularity will be set to match the device's physical block size. | |
147 | * 'discard-secure' - All copies of the discarded sectors (potentially created | |
148 | * by garbage collection) must also be erased. To use this feature, the flag | |
149 | * BLKIF_DISCARD_SECURE must be set in the blkif_request_trim. | |
32a8d26c LD |
150 | */ |
151 | #define BLKIF_OP_DISCARD 5 | |
152 | ||
402b27f9 RPM |
153 | /* |
154 | * Recognized if "feature-max-indirect-segments" in present in the backend | |
155 | * xenbus info. The "feature-max-indirect-segments" node contains the maximum | |
156 | * number of segments allowed by the backend per request. If the node is | |
157 | * present, the frontend might use blkif_request_indirect structs in order to | |
158 | * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The | |
159 | * maximum number of indirect segments is fixed by the backend, but the | |
160 | * frontend can issue requests with any number of indirect segments as long as | |
161 | * it's less than the number provided by the backend. The indirect_grefs field | |
162 | * in blkif_request_indirect should be filled by the frontend with the | |
163 | * grant references of the pages that are holding the indirect segments. | |
80bfa2f6 RPM |
164 | * These pages are filled with an array of blkif_request_segment that hold the |
165 | * information about the segments. The number of indirect pages to use is | |
166 | * determined by the number of segments an indirect request contains. Every | |
167 | * indirect page can contain a maximum of | |
168 | * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to | |
169 | * calculate the number of indirect pages to use we have to do | |
170 | * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). | |
402b27f9 RPM |
171 | * |
172 | * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* | |
173 | * create the "feature-max-indirect-segments" node! | |
174 | */ | |
175 | #define BLKIF_OP_INDIRECT 6 | |
176 | ||
a42089dd JF |
177 | /* |
178 | * Maximum scatter/gather segments per request. | |
179 | * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. | |
180 | * NB. This could be 12 if the ring indexes weren't stored in the same page. | |
181 | */ | |
182 | #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 | |
183 | ||
402b27f9 RPM |
184 | #define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 |
185 | ||
80bfa2f6 RPM |
186 | struct blkif_request_segment { |
187 | grant_ref_t gref; /* reference to I/O buffer frame */ | |
188 | /* @first_sect: first sector in frame to transfer (inclusive). */ | |
189 | /* @last_sect: last sector in frame to transfer (inclusive). */ | |
190 | uint8_t first_sect, last_sect; | |
191 | }; | |
402b27f9 | 192 | |
51de6952 | 193 | struct blkif_request_rw { |
97e36834 KRW |
194 | uint8_t nr_segments; /* number of segments */ |
195 | blkif_vdev_t handle; /* only for read/write requests */ | |
380108d8 | 196 | #ifndef CONFIG_X86_32 |
97e36834 KRW |
197 | uint32_t _pad1; /* offsetof(blkif_request,u.rw.id) == 8 */ |
198 | #endif | |
199 | uint64_t id; /* private guest value, echoed in resp */ | |
a42089dd | 200 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
80bfa2f6 | 201 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
97e36834 | 202 | } __attribute__((__packed__)); |
a42089dd | 203 | |
32a8d26c | 204 | struct blkif_request_discard { |
5ea42986 KRW |
205 | uint8_t flag; /* BLKIF_DISCARD_SECURE or zero. */ |
206 | #define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */ | |
97e36834 | 207 | blkif_vdev_t _pad1; /* only for read/write requests */ |
380108d8 | 208 | #ifndef CONFIG_X86_32 |
97e36834 KRW |
209 | uint32_t _pad2; /* offsetof(blkif_req..,u.discard.id)==8*/ |
210 | #endif | |
211 | uint64_t id; /* private guest value, echoed in resp */ | |
32a8d26c | 212 | blkif_sector_t sector_number; |
97e36834 KRW |
213 | uint64_t nr_sectors; |
214 | uint8_t _pad3; | |
215 | } __attribute__((__packed__)); | |
32a8d26c | 216 | |
0e367ae4 DV |
217 | struct blkif_request_other { |
218 | uint8_t _pad1; | |
219 | blkif_vdev_t _pad2; /* only for read/write requests */ | |
380108d8 | 220 | #ifndef CONFIG_X86_32 |
0e367ae4 DV |
221 | uint32_t _pad3; /* offsetof(blkif_req..,u.other.id)==8*/ |
222 | #endif | |
223 | uint64_t id; /* private guest value, echoed in resp */ | |
224 | } __attribute__((__packed__)); | |
225 | ||
402b27f9 RPM |
226 | struct blkif_request_indirect { |
227 | uint8_t indirect_op; | |
228 | uint16_t nr_segments; | |
380108d8 | 229 | #ifndef CONFIG_X86_32 |
402b27f9 RPM |
230 | uint32_t _pad1; /* offsetof(blkif_...,u.indirect.id) == 8 */ |
231 | #endif | |
232 | uint64_t id; | |
233 | blkif_sector_t sector_number; | |
234 | blkif_vdev_t handle; | |
235 | uint16_t _pad2; | |
236 | grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; | |
380108d8 | 237 | #ifndef CONFIG_X86_32 |
402b27f9 RPM |
238 | uint32_t _pad3; /* make it 64 byte aligned */ |
239 | #else | |
240 | uint64_t _pad3; /* make it 64 byte aligned */ | |
241 | #endif | |
242 | } __attribute__((__packed__)); | |
243 | ||
51de6952 OS |
244 | struct blkif_request { |
245 | uint8_t operation; /* BLKIF_OP_??? */ | |
51de6952 OS |
246 | union { |
247 | struct blkif_request_rw rw; | |
32a8d26c | 248 | struct blkif_request_discard discard; |
0e367ae4 | 249 | struct blkif_request_other other; |
402b27f9 | 250 | struct blkif_request_indirect indirect; |
51de6952 | 251 | } u; |
97e36834 | 252 | } __attribute__((__packed__)); |
51de6952 | 253 | |
a42089dd JF |
254 | struct blkif_response { |
255 | uint64_t id; /* copied from request */ | |
256 | uint8_t operation; /* copied from request */ | |
257 | int16_t status; /* BLKIF_RSP_??? */ | |
258 | }; | |
259 | ||
260 | /* | |
261 | * STATUS RETURN CODES. | |
262 | */ | |
263 | /* Operation not supported (only happens on barrier writes). */ | |
264 | #define BLKIF_RSP_EOPNOTSUPP -2 | |
265 | /* Operation failed for some unspecified reason (-EIO). */ | |
266 | #define BLKIF_RSP_ERROR -1 | |
267 | /* Operation completed successfully. */ | |
268 | #define BLKIF_RSP_OKAY 0 | |
269 | ||
270 | /* | |
271 | * Generate blkif ring structures and types. | |
272 | */ | |
273 | ||
274 | DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); | |
275 | ||
276 | #define VDISK_CDROM 0x1 | |
277 | #define VDISK_REMOVABLE 0x2 | |
278 | #define VDISK_READONLY 0x4 | |
279 | ||
c80a4209 SS |
280 | /* Xen-defined major numbers for virtual disks, they look strangely |
281 | * familiar */ | |
282 | #define XEN_IDE0_MAJOR 3 | |
283 | #define XEN_IDE1_MAJOR 22 | |
284 | #define XEN_SCSI_DISK0_MAJOR 8 | |
285 | #define XEN_SCSI_DISK1_MAJOR 65 | |
286 | #define XEN_SCSI_DISK2_MAJOR 66 | |
287 | #define XEN_SCSI_DISK3_MAJOR 67 | |
288 | #define XEN_SCSI_DISK4_MAJOR 68 | |
289 | #define XEN_SCSI_DISK5_MAJOR 69 | |
290 | #define XEN_SCSI_DISK6_MAJOR 70 | |
291 | #define XEN_SCSI_DISK7_MAJOR 71 | |
292 | #define XEN_SCSI_DISK8_MAJOR 128 | |
293 | #define XEN_SCSI_DISK9_MAJOR 129 | |
294 | #define XEN_SCSI_DISK10_MAJOR 130 | |
295 | #define XEN_SCSI_DISK11_MAJOR 131 | |
296 | #define XEN_SCSI_DISK12_MAJOR 132 | |
297 | #define XEN_SCSI_DISK13_MAJOR 133 | |
298 | #define XEN_SCSI_DISK14_MAJOR 134 | |
299 | #define XEN_SCSI_DISK15_MAJOR 135 | |
300 | ||
a42089dd | 301 | #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ |