]>
Commit | Line | Data |
---|---|---|
c93407d0 BH |
1 | /* |
2 | * pNFS Objects layout implementation over open-osd initiator library | |
3 | * | |
4 | * Copyright (C) 2009 Panasas Inc. [year of first publication] | |
5 | * All rights reserved. | |
6 | * | |
7 | * Benny Halevy <bhalevy@panasas.com> | |
8 | * Boaz Harrosh <bharrosh@panasas.com> | |
9 | * | |
10 | * This program is free software; you can redistribute it and/or modify | |
11 | * it under the terms of the GNU General Public License version 2 | |
12 | * See the file COPYING included with this distribution for more details. | |
13 | * | |
14 | * Redistribution and use in source and binary forms, with or without | |
15 | * modification, are permitted provided that the following conditions | |
16 | * are met: | |
17 | * | |
18 | * 1. Redistributions of source code must retain the above copyright | |
19 | * notice, this list of conditions and the following disclaimer. | |
20 | * 2. Redistributions in binary form must reproduce the above copyright | |
21 | * notice, this list of conditions and the following disclaimer in the | |
22 | * documentation and/or other materials provided with the distribution. | |
23 | * 3. Neither the name of the Panasas company nor the names of its | |
24 | * contributors may be used to endorse or promote products derived | |
25 | * from this software without specific prior written permission. | |
26 | * | |
27 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | |
28 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | |
29 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
30 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
32 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
33 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | |
34 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
35 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
36 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
37 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
38 | */ | |
39 | ||
40 | #include <linux/module.h> | |
af4f5b54 | 41 | #include <scsi/osd_ore.h> |
09f5bf4e BH |
42 | |
43 | #include "objlayout.h" | |
44 | ||
45 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | |
46 | ||
b6c05f16 BH |
47 | struct objio_dev_ent { |
48 | struct nfs4_deviceid_node id_node; | |
af4f5b54 | 49 | struct ore_dev od; |
b6c05f16 BH |
50 | }; |
51 | ||
52 | static void | |
53 | objio_free_deviceid_node(struct nfs4_deviceid_node *d) | |
54 | { | |
55 | struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node); | |
56 | ||
af4f5b54 BH |
57 | dprintk("%s: free od=%p\n", __func__, de->od.od); |
58 | osduld_put_device(de->od.od); | |
b6c05f16 BH |
59 | kfree(de); |
60 | } | |
61 | ||
62 | static struct objio_dev_ent *_dev_list_find(const struct nfs_server *nfss, | |
63 | const struct nfs4_deviceid *d_id) | |
64 | { | |
65 | struct nfs4_deviceid_node *d; | |
66 | struct objio_dev_ent *de; | |
67 | ||
68 | d = nfs4_find_get_deviceid(nfss->pnfs_curr_ld, nfss->nfs_client, d_id); | |
69 | if (!d) | |
70 | return NULL; | |
71 | ||
72 | de = container_of(d, struct objio_dev_ent, id_node); | |
73 | return de; | |
74 | } | |
75 | ||
76 | static struct objio_dev_ent * | |
77 | _dev_list_add(const struct nfs_server *nfss, | |
78 | const struct nfs4_deviceid *d_id, struct osd_dev *od, | |
79 | gfp_t gfp_flags) | |
80 | { | |
81 | struct nfs4_deviceid_node *d; | |
82 | struct objio_dev_ent *de = kzalloc(sizeof(*de), gfp_flags); | |
83 | struct objio_dev_ent *n; | |
84 | ||
85 | if (!de) { | |
86 | dprintk("%s: -ENOMEM od=%p\n", __func__, od); | |
87 | return NULL; | |
88 | } | |
89 | ||
90 | dprintk("%s: Adding od=%p\n", __func__, od); | |
91 | nfs4_init_deviceid_node(&de->id_node, | |
92 | nfss->pnfs_curr_ld, | |
93 | nfss->nfs_client, | |
94 | d_id); | |
af4f5b54 | 95 | de->od.od = od; |
b6c05f16 BH |
96 | |
97 | d = nfs4_insert_deviceid_node(&de->id_node); | |
98 | n = container_of(d, struct objio_dev_ent, id_node); | |
99 | if (n != de) { | |
af4f5b54 | 100 | dprintk("%s: Race with other n->od=%p\n", __func__, n->od.od); |
b6c05f16 BH |
101 | objio_free_deviceid_node(&de->id_node); |
102 | de = n; | |
103 | } | |
104 | ||
b6c05f16 BH |
105 | return de; |
106 | } | |
107 | ||
09f5bf4e BH |
108 | struct objio_segment { |
109 | struct pnfs_layout_segment lseg; | |
110 | ||
af4f5b54 BH |
111 | struct ore_layout layout; |
112 | struct ore_components oc; | |
09f5bf4e BH |
113 | }; |
114 | ||
115 | static inline struct objio_segment * | |
116 | OBJIO_LSEG(struct pnfs_layout_segment *lseg) | |
117 | { | |
118 | return container_of(lseg, struct objio_segment, lseg); | |
119 | } | |
120 | ||
04f83450 BH |
121 | struct objio_state { |
122 | /* Generic layer */ | |
e2e04355 | 123 | struct objlayout_io_res oir; |
04f83450 | 124 | |
96218556 | 125 | bool sync; |
eecfc631 BH |
126 | /*FIXME: Support for extra_bytes at ore_get_rw_state() */ |
127 | struct ore_io_state *ios; | |
04f83450 BH |
128 | }; |
129 | ||
b6c05f16 BH |
130 | /* Send and wait for a get_device_info of devices in the layout, |
131 | then look them up with the osd_initiator library */ | |
af4f5b54 BH |
132 | static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, |
133 | struct objio_segment *objio_seg, unsigned c, struct nfs4_deviceid *d_id, | |
134 | gfp_t gfp_flags) | |
b6c05f16 BH |
135 | { |
136 | struct pnfs_osd_deviceaddr *deviceaddr; | |
b6c05f16 BH |
137 | struct objio_dev_ent *ode; |
138 | struct osd_dev *od; | |
139 | struct osd_dev_info odi; | |
18d98f6c | 140 | bool retry_flag = true; |
b6c05f16 BH |
141 | int err; |
142 | ||
b6c05f16 | 143 | ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); |
af4f5b54 BH |
144 | if (ode) { |
145 | objio_seg->oc.ods[c] = &ode->od; /* must use container_of */ | |
146 | return 0; | |
147 | } | |
b6c05f16 BH |
148 | |
149 | err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags); | |
150 | if (unlikely(err)) { | |
151 | dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n", | |
152 | __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err); | |
af4f5b54 | 153 | return err; |
b6c05f16 BH |
154 | } |
155 | ||
156 | odi.systemid_len = deviceaddr->oda_systemid.len; | |
157 | if (odi.systemid_len > sizeof(odi.systemid)) { | |
af4f5b54 BH |
158 | dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n", |
159 | __func__, sizeof(odi.systemid)); | |
b6c05f16 BH |
160 | err = -EINVAL; |
161 | goto out; | |
162 | } else if (odi.systemid_len) | |
163 | memcpy(odi.systemid, deviceaddr->oda_systemid.data, | |
164 | odi.systemid_len); | |
165 | odi.osdname_len = deviceaddr->oda_osdname.len; | |
166 | odi.osdname = (u8 *)deviceaddr->oda_osdname.data; | |
167 | ||
168 | if (!odi.osdname_len && !odi.systemid_len) { | |
169 | dprintk("%s: !odi.osdname_len && !odi.systemid_len\n", | |
170 | __func__); | |
171 | err = -ENODEV; | |
172 | goto out; | |
173 | } | |
174 | ||
18d98f6c | 175 | retry_lookup: |
b6c05f16 BH |
176 | od = osduld_info_lookup(&odi); |
177 | if (unlikely(IS_ERR(od))) { | |
178 | err = PTR_ERR(od); | |
179 | dprintk("%s: osduld_info_lookup => %d\n", __func__, err); | |
18d98f6c SB |
180 | if (err == -ENODEV && retry_flag) { |
181 | err = objlayout_autologin(deviceaddr); | |
182 | if (likely(!err)) { | |
183 | retry_flag = false; | |
184 | goto retry_lookup; | |
185 | } | |
186 | } | |
b6c05f16 BH |
187 | goto out; |
188 | } | |
189 | ||
190 | ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od, | |
191 | gfp_flags); | |
af4f5b54 BH |
192 | objio_seg->oc.ods[c] = &ode->od; /* must use container_of */ |
193 | dprintk("Adding new dev_id(%llx:%llx)\n", | |
194 | _DEVID_LO(d_id), _DEVID_HI(d_id)); | |
b6c05f16 | 195 | out: |
b6c05f16 | 196 | objlayout_put_deviceinfo(deviceaddr); |
b6c05f16 BH |
197 | return err; |
198 | } | |
199 | ||
af4f5b54 BH |
200 | static void copy_single_comp(struct ore_components *oc, unsigned c, |
201 | struct pnfs_osd_object_cred *src_comp) | |
09f5bf4e | 202 | { |
af4f5b54 | 203 | struct ore_comp *ocomp = &oc->comps[c]; |
09f5bf4e | 204 | |
af4f5b54 BH |
205 | WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */ |
206 | WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred)); | |
09f5bf4e | 207 | |
af4f5b54 BH |
208 | ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id; |
209 | ocomp->obj.id = src_comp->oc_object_id.oid_object_id; | |
09f5bf4e | 210 | |
af4f5b54 BH |
211 | memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred)); |
212 | } | |
213 | ||
1385b811 | 214 | static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, |
af4f5b54 BH |
215 | struct objio_segment **pseg) |
216 | { | |
5318a29c BH |
217 | /* This is the in memory structure of the objio_segment |
218 | * | |
219 | * struct __alloc_objio_segment { | |
220 | * struct objio_segment olseg; | |
221 | * struct ore_dev *ods[numdevs]; | |
222 | * struct ore_comp comps[numdevs]; | |
223 | * } *aolseg; | |
224 | * NOTE: The code as above compiles and runs perfectly. It is elegant, | |
225 | * type safe and compact. At some Past time Linus has decided he does not | |
226 | * like variable length arrays, For the sake of this principal we uglify | |
227 | * the code as below. | |
228 | */ | |
229 | struct objio_segment *lseg; | |
230 | size_t lseg_size = sizeof(*lseg) + | |
231 | numdevs * sizeof(lseg->oc.ods[0]) + | |
232 | numdevs * sizeof(*lseg->oc.comps); | |
233 | ||
234 | lseg = kzalloc(lseg_size, gfp_flags); | |
235 | if (unlikely(!lseg)) { | |
af4f5b54 | 236 | dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__, |
5318a29c | 237 | numdevs, lseg_size); |
af4f5b54 BH |
238 | return -ENOMEM; |
239 | } | |
240 | ||
5318a29c BH |
241 | lseg->oc.numdevs = numdevs; |
242 | lseg->oc.single_comp = EC_MULTPLE_COMPS; | |
243 | lseg->oc.ods = (void *)(lseg + 1); | |
244 | lseg->oc.comps = (void *)(lseg->oc.ods + numdevs); | |
af4f5b54 | 245 | |
5318a29c | 246 | *pseg = lseg; |
af4f5b54 | 247 | return 0; |
09f5bf4e BH |
248 | } |
249 | ||
250 | int objio_alloc_lseg(struct pnfs_layout_segment **outp, | |
251 | struct pnfs_layout_hdr *pnfslay, | |
252 | struct pnfs_layout_range *range, | |
253 | struct xdr_stream *xdr, | |
254 | gfp_t gfp_flags) | |
255 | { | |
256 | struct objio_segment *objio_seg; | |
257 | struct pnfs_osd_xdr_decode_layout_iter iter; | |
258 | struct pnfs_osd_layout layout; | |
af4f5b54 BH |
259 | struct pnfs_osd_object_cred src_comp; |
260 | unsigned cur_comp; | |
09f5bf4e BH |
261 | int err; |
262 | ||
263 | err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); | |
264 | if (unlikely(err)) | |
265 | return err; | |
266 | ||
af4f5b54 | 267 | err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg); |
09f5bf4e BH |
268 | if (unlikely(err)) |
269 | return err; | |
270 | ||
af4f5b54 BH |
271 | objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit; |
272 | objio_seg->layout.group_width = layout.olo_map.odm_group_width; | |
273 | objio_seg->layout.group_depth = layout.olo_map.odm_group_depth; | |
274 | objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; | |
275 | objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm; | |
09f5bf4e | 276 | |
af4f5b54 BH |
277 | err = ore_verify_layout(layout.olo_map.odm_num_comps, |
278 | &objio_seg->layout); | |
09f5bf4e BH |
279 | if (unlikely(err)) |
280 | goto err; | |
281 | ||
af4f5b54 BH |
282 | objio_seg->oc.first_dev = layout.olo_comps_index; |
283 | cur_comp = 0; | |
284 | while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) { | |
285 | copy_single_comp(&objio_seg->oc, cur_comp, &src_comp); | |
286 | err = objio_devices_lookup(pnfslay, objio_seg, cur_comp, | |
287 | &src_comp.oc_object_id.oid_device_id, | |
288 | gfp_flags); | |
289 | if (err) | |
290 | goto err; | |
291 | ++cur_comp; | |
09f5bf4e | 292 | } |
af4f5b54 BH |
293 | /* pnfs_osd_xdr_decode_layout_comp returns false on error */ |
294 | if (unlikely(err)) | |
295 | goto err; | |
93420770 | 296 | |
09f5bf4e BH |
297 | *outp = &objio_seg->lseg; |
298 | return 0; | |
299 | ||
300 | err: | |
301 | kfree(objio_seg); | |
302 | dprintk("%s: Error: return %d\n", __func__, err); | |
303 | *outp = NULL; | |
304 | return err; | |
305 | } | |
306 | ||
307 | void objio_free_lseg(struct pnfs_layout_segment *lseg) | |
308 | { | |
b6c05f16 | 309 | int i; |
09f5bf4e BH |
310 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); |
311 | ||
af4f5b54 BH |
312 | for (i = 0; i < objio_seg->oc.numdevs; i++) { |
313 | struct ore_dev *od = objio_seg->oc.ods[i]; | |
314 | struct objio_dev_ent *ode; | |
315 | ||
316 | if (!od) | |
b6c05f16 | 317 | break; |
af4f5b54 BH |
318 | ode = container_of(od, typeof(*ode), od); |
319 | nfs4_put_deviceid_node(&ode->id_node); | |
b6c05f16 | 320 | } |
09f5bf4e BH |
321 | kfree(objio_seg); |
322 | } | |
323 | ||
96218556 | 324 | static int |
eecfc631 | 325 | objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading, |
96218556 BH |
326 | struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase, |
327 | loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags, | |
328 | struct objio_state **outp) | |
04f83450 BH |
329 | { |
330 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); | |
eecfc631 BH |
331 | struct ore_io_state *ios; |
332 | int ret; | |
96218556 BH |
333 | struct __alloc_objio_state { |
334 | struct objio_state objios; | |
af4f5b54 | 335 | struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs]; |
96218556 BH |
336 | } *aos; |
337 | ||
338 | aos = kzalloc(sizeof(*aos), gfp_flags); | |
339 | if (unlikely(!aos)) | |
04f83450 BH |
340 | return -ENOMEM; |
341 | ||
af4f5b54 | 342 | objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs, |
96218556 BH |
343 | aos->ioerrs, rpcdata, pnfs_layout_type); |
344 | ||
eecfc631 BH |
345 | ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading, |
346 | offset, count, &ios); | |
347 | if (unlikely(ret)) { | |
348 | kfree(aos); | |
349 | return ret; | |
350 | } | |
351 | ||
96218556 BH |
352 | ios->pages = pages; |
353 | ios->pgbase = pgbase; | |
eecfc631 | 354 | ios->private = aos; |
96218556 BH |
355 | BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT); |
356 | ||
eecfc631 BH |
357 | aos->objios.sync = 0; |
358 | aos->objios.ios = ios; | |
359 | *outp = &aos->objios; | |
04f83450 BH |
360 | return 0; |
361 | } | |
362 | ||
e2e04355 | 363 | void objio_free_result(struct objlayout_io_res *oir) |
04f83450 | 364 | { |
eecfc631 | 365 | struct objio_state *objios = container_of(oir, struct objio_state, oir); |
04f83450 | 366 | |
eecfc631 BH |
367 | ore_put_io_state(objios->ios); |
368 | kfree(objios); | |
04f83450 BH |
369 | } |
370 | ||
adb58535 BH |
371 | enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) |
372 | { | |
373 | switch (oep) { | |
374 | case OSD_ERR_PRI_NO_ERROR: | |
375 | return (enum pnfs_osd_errno)0; | |
376 | ||
377 | case OSD_ERR_PRI_CLEAR_PAGES: | |
378 | BUG_ON(1); | |
379 | return 0; | |
380 | ||
381 | case OSD_ERR_PRI_RESOURCE: | |
382 | return PNFS_OSD_ERR_RESOURCE; | |
383 | case OSD_ERR_PRI_BAD_CRED: | |
384 | return PNFS_OSD_ERR_BAD_CRED; | |
385 | case OSD_ERR_PRI_NO_ACCESS: | |
386 | return PNFS_OSD_ERR_NO_ACCESS; | |
387 | case OSD_ERR_PRI_UNREACHABLE: | |
388 | return PNFS_OSD_ERR_UNREACHABLE; | |
389 | case OSD_ERR_PRI_NOT_FOUND: | |
390 | return PNFS_OSD_ERR_NOT_FOUND; | |
391 | case OSD_ERR_PRI_NO_SPACE: | |
392 | return PNFS_OSD_ERR_NO_SPACE; | |
393 | default: | |
394 | WARN_ON(1); | |
395 | /* fallthrough */ | |
396 | case OSD_ERR_PRI_EIO: | |
397 | return PNFS_OSD_ERR_EIO; | |
398 | } | |
399 | } | |
400 | ||
eecfc631 | 401 | static void __on_dev_error(struct ore_io_state *ios, |
af4f5b54 BH |
402 | struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep, |
403 | u64 dev_offset, u64 dev_len) | |
404 | { | |
405 | struct objio_state *objios = ios->private; | |
406 | struct pnfs_osd_objid pooid; | |
407 | struct objio_dev_ent *ode = container_of(od, typeof(*ode), od); | |
408 | /* FIXME: what to do with more-then-one-group layouts. We need to | |
409 | * translate from ore_io_state index to oc->comps index | |
410 | */ | |
411 | unsigned comp = dev_index; | |
412 | ||
413 | pooid.oid_device_id = ode->id_node.deviceid; | |
414 | pooid.oid_partition_id = ios->oc->comps[comp].obj.partition; | |
415 | pooid.oid_object_id = ios->oc->comps[comp].obj.id; | |
416 | ||
417 | objlayout_io_set_result(&objios->oir, comp, | |
418 | &pooid, osd_pri_2_pnfs_err(oep), | |
eecfc631 | 419 | dev_offset, dev_len, !ios->reading); |
af4f5b54 BH |
420 | } |
421 | ||
04f83450 BH |
422 | /* |
423 | * read | |
424 | */ | |
eecfc631 | 425 | static void _read_done(struct ore_io_state *ios, void *private) |
04f83450 | 426 | { |
eecfc631 | 427 | struct objio_state *objios = private; |
04f83450 | 428 | ssize_t status; |
eecfc631 | 429 | int ret = ore_check_io(ios, &__on_dev_error); |
04f83450 | 430 | |
eecfc631 | 431 | /* FIXME: _io_free(ios) can we dealocate the libosd resources; */ |
04f83450 BH |
432 | |
433 | if (likely(!ret)) | |
434 | status = ios->length; | |
435 | else | |
436 | status = ret; | |
437 | ||
eecfc631 | 438 | objlayout_read_done(&objios->oir, status, objios->sync); |
04f83450 BH |
439 | } |
440 | ||
96218556 | 441 | int objio_read_pagelist(struct nfs_read_data *rdata) |
04f83450 | 442 | { |
cd841605 | 443 | struct nfs_pgio_header *hdr = rdata->header; |
eecfc631 | 444 | struct objio_state *objios; |
04f83450 BH |
445 | int ret; |
446 | ||
cd841605 FI |
447 | ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true, |
448 | hdr->lseg, rdata->args.pages, rdata->args.pgbase, | |
96218556 | 449 | rdata->args.offset, rdata->args.count, rdata, |
eecfc631 | 450 | GFP_KERNEL, &objios); |
04f83450 BH |
451 | if (unlikely(ret)) |
452 | return ret; | |
453 | ||
eecfc631 BH |
454 | objios->ios->done = _read_done; |
455 | dprintk("%s: offset=0x%llx length=0x%x\n", __func__, | |
456 | rdata->args.offset, rdata->args.count); | |
9909d45a BH |
457 | ret = ore_read(objios->ios); |
458 | if (unlikely(ret)) | |
459 | objio_free_result(&objios->oir); | |
460 | return ret; | |
04f83450 BH |
461 | } |
462 | ||
463 | /* | |
464 | * write | |
465 | */ | |
eecfc631 | 466 | static void _write_done(struct ore_io_state *ios, void *private) |
04f83450 | 467 | { |
eecfc631 | 468 | struct objio_state *objios = private; |
04f83450 | 469 | ssize_t status; |
eecfc631 | 470 | int ret = ore_check_io(ios, &__on_dev_error); |
04f83450 | 471 | |
eecfc631 | 472 | /* FIXME: _io_free(ios) can we dealocate the libosd resources; */ |
04f83450 BH |
473 | |
474 | if (likely(!ret)) { | |
475 | /* FIXME: should be based on the OSD's persistence model | |
476 | * See OSD2r05 Section 4.13 Data persistence model */ | |
eecfc631 | 477 | objios->oir.committed = NFS_FILE_SYNC; |
04f83450 BH |
478 | status = ios->length; |
479 | } else { | |
480 | status = ret; | |
481 | } | |
482 | ||
eecfc631 | 483 | objlayout_write_done(&objios->oir, status, objios->sync); |
04f83450 BH |
484 | } |
485 | ||
278c023a BH |
486 | static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) |
487 | { | |
488 | struct objio_state *objios = priv; | |
489 | struct nfs_write_data *wdata = objios->oir.rpcdata; | |
cd841605 | 490 | struct address_space *mapping = wdata->header->inode->i_mapping; |
278c023a | 491 | pgoff_t index = offset / PAGE_SIZE; |
c999ff68 BH |
492 | struct page *page; |
493 | loff_t i_size = i_size_read(wdata->header->inode); | |
278c023a | 494 | |
c999ff68 BH |
495 | if (offset >= i_size) { |
496 | *uptodate = true; | |
497 | dprintk("%s: g_zero_page index=0x%lx\n", __func__, index); | |
498 | return ZERO_PAGE(0); | |
499 | } | |
500 | ||
501 | page = find_get_page(mapping, index); | |
278c023a | 502 | if (!page) { |
cd841605 | 503 | page = find_or_create_page(mapping, index, GFP_NOFS); |
278c023a BH |
504 | if (unlikely(!page)) { |
505 | dprintk("%s: grab_cache_page Failed index=0x%lx\n", | |
506 | __func__, index); | |
507 | return NULL; | |
508 | } | |
509 | unlock_page(page); | |
510 | } | |
511 | if (PageDirty(page) || PageWriteback(page)) | |
512 | *uptodate = true; | |
513 | else | |
514 | *uptodate = PageUptodate(page); | |
515 | dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate); | |
516 | return page; | |
517 | } | |
518 | ||
519 | static void __r4w_put_page(void *priv, struct page *page) | |
520 | { | |
c999ff68 BH |
521 | dprintk("%s: index=0x%lx\n", __func__, |
522 | (page == ZERO_PAGE(0)) ? -1UL : page->index); | |
523 | if (ZERO_PAGE(0) != page) | |
524 | page_cache_release(page); | |
278c023a BH |
525 | return; |
526 | } | |
527 | ||
528 | static const struct _ore_r4w_op _r4w_op = { | |
529 | .get_page = &__r4w_get_page, | |
530 | .put_page = &__r4w_put_page, | |
531 | }; | |
532 | ||
96218556 | 533 | int objio_write_pagelist(struct nfs_write_data *wdata, int how) |
04f83450 | 534 | { |
cd841605 | 535 | struct nfs_pgio_header *hdr = wdata->header; |
eecfc631 | 536 | struct objio_state *objios; |
04f83450 BH |
537 | int ret; |
538 | ||
cd841605 FI |
539 | ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false, |
540 | hdr->lseg, wdata->args.pages, wdata->args.pgbase, | |
96218556 | 541 | wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, |
eecfc631 | 542 | &objios); |
96218556 BH |
543 | if (unlikely(ret)) |
544 | return ret; | |
545 | ||
eecfc631 | 546 | objios->sync = 0 != (how & FLUSH_SYNC); |
278c023a | 547 | objios->ios->r4w = &_r4w_op; |
96218556 | 548 | |
eecfc631 BH |
549 | if (!objios->sync) |
550 | objios->ios->done = _write_done; | |
551 | ||
552 | dprintk("%s: offset=0x%llx length=0x%x\n", __func__, | |
553 | wdata->args.offset, wdata->args.count); | |
554 | ret = ore_write(objios->ios); | |
9909d45a BH |
555 | if (unlikely(ret)) { |
556 | objio_free_result(&objios->oir); | |
04f83450 | 557 | return ret; |
9909d45a | 558 | } |
04f83450 | 559 | |
eecfc631 BH |
560 | if (objios->sync) |
561 | _write_done(objios->ios, objios); | |
562 | ||
563 | return 0; | |
04f83450 BH |
564 | } |
565 | ||
93420770 BH |
566 | static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, |
567 | struct nfs_page *prev, struct nfs_page *req) | |
568 | { | |
569 | if (!pnfs_generic_pg_test(pgio, prev, req)) | |
570 | return false; | |
571 | ||
572 | return pgio->pg_count + req->wb_bytes <= | |
7de6e284 BH |
573 | (unsigned long)pgio->pg_layout_private; |
574 | } | |
575 | ||
576 | void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | |
577 | { | |
578 | pnfs_generic_pg_init_read(pgio, req); | |
579 | if (unlikely(pgio->pg_lseg == NULL)) | |
580 | return; /* Not pNFS */ | |
581 | ||
582 | pgio->pg_layout_private = (void *) | |
583 | OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; | |
584 | } | |
585 | ||
586 | static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout, | |
587 | unsigned long *stripe_end) | |
588 | { | |
589 | u32 stripe_off; | |
590 | unsigned stripe_size; | |
591 | ||
592 | if (layout->raid_algorithm == PNFS_OSD_RAID_0) | |
593 | return true; | |
594 | ||
595 | stripe_size = layout->stripe_unit * | |
596 | (layout->group_width - layout->parity); | |
597 | ||
598 | div_u64_rem(offset, stripe_size, &stripe_off); | |
599 | if (!stripe_off) | |
600 | return true; | |
601 | ||
602 | *stripe_end = stripe_size - stripe_off; | |
603 | return false; | |
604 | } | |
605 | ||
606 | void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | |
607 | { | |
608 | unsigned long stripe_end = 0; | |
609 | ||
610 | pnfs_generic_pg_init_write(pgio, req); | |
611 | if (unlikely(pgio->pg_lseg == NULL)) | |
612 | return; /* Not pNFS */ | |
613 | ||
614 | if (req->wb_offset || | |
615 | !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE, | |
616 | &OBJIO_LSEG(pgio->pg_lseg)->layout, | |
617 | &stripe_end)) { | |
618 | pgio->pg_layout_private = (void *)stripe_end; | |
619 | } else { | |
620 | pgio->pg_layout_private = (void *) | |
621 | OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; | |
622 | } | |
93420770 BH |
623 | } |
624 | ||
1751c363 | 625 | static const struct nfs_pageio_ops objio_pg_read_ops = { |
7de6e284 | 626 | .pg_init = objio_init_read, |
1751c363 | 627 | .pg_test = objio_pg_test, |
493292dd | 628 | .pg_doio = pnfs_generic_pg_readpages, |
1751c363 TM |
629 | }; |
630 | ||
631 | static const struct nfs_pageio_ops objio_pg_write_ops = { | |
7de6e284 | 632 | .pg_init = objio_init_write, |
1751c363 | 633 | .pg_test = objio_pg_test, |
dce81290 | 634 | .pg_doio = pnfs_generic_pg_writepages, |
1751c363 TM |
635 | }; |
636 | ||
c93407d0 BH |
637 | static struct pnfs_layoutdriver_type objlayout_type = { |
638 | .id = LAYOUT_OSD2_OBJECTS, | |
639 | .name = "LAYOUT_OSD2_OBJECTS", | |
fe0fe835 BH |
640 | .flags = PNFS_LAYOUTRET_ON_SETATTR | |
641 | PNFS_LAYOUTRET_ON_ERROR, | |
09f5bf4e | 642 | |
e51b841d BH |
643 | .alloc_layout_hdr = objlayout_alloc_layout_hdr, |
644 | .free_layout_hdr = objlayout_free_layout_hdr, | |
645 | ||
09f5bf4e BH |
646 | .alloc_lseg = objlayout_alloc_lseg, |
647 | .free_lseg = objlayout_free_lseg, | |
b6c05f16 | 648 | |
04f83450 BH |
649 | .read_pagelist = objlayout_read_pagelist, |
650 | .write_pagelist = objlayout_write_pagelist, | |
1751c363 TM |
651 | .pg_read_ops = &objio_pg_read_ops, |
652 | .pg_write_ops = &objio_pg_write_ops, | |
04f83450 | 653 | |
b6c05f16 | 654 | .free_deviceid_node = objio_free_deviceid_node, |
adb58535 | 655 | |
a0fe8bf4 | 656 | .encode_layoutcommit = objlayout_encode_layoutcommit, |
adb58535 | 657 | .encode_layoutreturn = objlayout_encode_layoutreturn, |
c93407d0 BH |
658 | }; |
659 | ||
660 | MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects"); | |
661 | MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>"); | |
662 | MODULE_LICENSE("GPL"); | |
663 | ||
664 | static int __init | |
665 | objlayout_init(void) | |
666 | { | |
667 | int ret = pnfs_register_layoutdriver(&objlayout_type); | |
668 | ||
669 | if (ret) | |
670 | printk(KERN_INFO | |
a030889a | 671 | "NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n", |
c93407d0 BH |
672 | __func__, ret); |
673 | else | |
a030889a | 674 | printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n", |
c93407d0 BH |
675 | __func__); |
676 | return ret; | |
677 | } | |
678 | ||
679 | static void __exit | |
680 | objlayout_exit(void) | |
681 | { | |
682 | pnfs_unregister_layoutdriver(&objlayout_type); | |
a030889a | 683 | printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n", |
c93407d0 BH |
684 | __func__); |
685 | } | |
686 | ||
f85ef69c BF |
687 | MODULE_ALIAS("nfs-layouttype4-2"); |
688 | ||
c93407d0 BH |
689 | module_init(objlayout_init); |
690 | module_exit(objlayout_exit); |