]>
Commit | Line | Data |
---|---|---|
a8c879a7 AG |
1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | |
3 | * | |
4 | * This software is available to you under a choice of one of two | |
5 | * licenses. You may choose to be licensed under the terms of the GNU | |
6 | * General Public License (GPL) Version 2, available from the file | |
7 | * COPYING in the main directory of this source tree, or the | |
8 | * OpenIB.org BSD license below: | |
9 | * | |
10 | * Redistribution and use in source and binary forms, with or | |
11 | * without modification, are permitted provided that the following | |
12 | * conditions are met: | |
13 | * | |
14 | * - Redistributions of source code must retain the above | |
15 | * copyright notice, this list of conditions and the following | |
16 | * disclaimer. | |
17 | * | |
18 | * - Redistributions in binary form must reproduce the above | |
19 | * copyright notice, this list of conditions and the following | |
20 | * disclaimer in the documentation and/or other materials | |
21 | * provided with the distribution. | |
22 | * | |
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
30 | * SOFTWARE. | |
31 | * | |
32 | */ | |
33 | #include <linux/percpu.h> | |
34 | #include <linux/seq_file.h> | |
5a0e3ad6 | 35 | #include <linux/slab.h> |
a8c879a7 | 36 | #include <linux/proc_fs.h> |
bc3b2d7f | 37 | #include <linux/export.h> |
a8c879a7 AG |
38 | |
39 | #include "rds.h" | |
40 | ||
41 | /* | |
42 | * This file implements a getsockopt() call which copies a set of fixed | |
43 | * sized structs into a user-specified buffer as a means of providing | |
44 | * read-only information about RDS. | |
45 | * | |
46 | * For a given information source there are a given number of fixed sized | |
47 | * structs at a given time. The structs are only copied if the user-specified | |
48 | * buffer is big enough. The destination pages that make up the buffer | |
49 | * are pinned for the duration of the copy. | |
50 | * | |
51 | * This gives us the following benefits: | |
52 | * | |
53 | * - simple implementation, no copy "position" across multiple calls | |
54 | * - consistent snapshot of an info source | |
55 | * - atomic copy works well with whatever locking info source has | |
56 | * - one portable tool to get rds info across implementations | |
57 | * - long-lived tool can get info without allocating | |
58 | * | |
59 | * at the following costs: | |
60 | * | |
61 | * - info source copy must be pinned, may be "large" | |
62 | */ | |
63 | ||
64 | struct rds_info_iterator { | |
65 | struct page **pages; | |
66 | void *addr; | |
67 | unsigned long offset; | |
68 | }; | |
69 | ||
70 | static DEFINE_SPINLOCK(rds_info_lock); | |
71 | static rds_info_func rds_info_funcs[RDS_INFO_LAST - RDS_INFO_FIRST + 1]; | |
72 | ||
73 | void rds_info_register_func(int optname, rds_info_func func) | |
74 | { | |
75 | int offset = optname - RDS_INFO_FIRST; | |
76 | ||
77 | BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); | |
78 | ||
79 | spin_lock(&rds_info_lock); | |
8690bfa1 | 80 | BUG_ON(rds_info_funcs[offset]); |
a8c879a7 AG |
81 | rds_info_funcs[offset] = func; |
82 | spin_unlock(&rds_info_lock); | |
83 | } | |
616b757a | 84 | EXPORT_SYMBOL_GPL(rds_info_register_func); |
a8c879a7 AG |
85 | |
86 | void rds_info_deregister_func(int optname, rds_info_func func) | |
87 | { | |
88 | int offset = optname - RDS_INFO_FIRST; | |
89 | ||
90 | BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); | |
91 | ||
92 | spin_lock(&rds_info_lock); | |
93 | BUG_ON(rds_info_funcs[offset] != func); | |
94 | rds_info_funcs[offset] = NULL; | |
95 | spin_unlock(&rds_info_lock); | |
96 | } | |
616b757a | 97 | EXPORT_SYMBOL_GPL(rds_info_deregister_func); |
a8c879a7 AG |
98 | |
99 | /* | |
100 | * Typically we hold an atomic kmap across multiple rds_info_copy() calls | |
101 | * because the kmap is so expensive. This must be called before using blocking | |
102 | * operations while holding the mapping and as the iterator is torn down. | |
103 | */ | |
104 | void rds_info_iter_unmap(struct rds_info_iterator *iter) | |
105 | { | |
8690bfa1 | 106 | if (iter->addr) { |
6114eab5 | 107 | kunmap_atomic(iter->addr); |
a8c879a7 AG |
108 | iter->addr = NULL; |
109 | } | |
110 | } | |
111 | ||
112 | /* | |
113 | * get_user_pages() called flush_dcache_page() on the pages for us. | |
114 | */ | |
115 | void rds_info_copy(struct rds_info_iterator *iter, void *data, | |
116 | unsigned long bytes) | |
117 | { | |
118 | unsigned long this; | |
119 | ||
120 | while (bytes) { | |
8690bfa1 | 121 | if (!iter->addr) |
6114eab5 | 122 | iter->addr = kmap_atomic(*iter->pages); |
a8c879a7 AG |
123 | |
124 | this = min(bytes, PAGE_SIZE - iter->offset); | |
125 | ||
126 | rdsdebug("page %p addr %p offset %lu this %lu data %p " | |
127 | "bytes %lu\n", *iter->pages, iter->addr, | |
128 | iter->offset, this, data, bytes); | |
129 | ||
130 | memcpy(iter->addr + iter->offset, data, this); | |
131 | ||
132 | data += this; | |
133 | bytes -= this; | |
134 | iter->offset += this; | |
135 | ||
136 | if (iter->offset == PAGE_SIZE) { | |
6114eab5 | 137 | kunmap_atomic(iter->addr); |
a8c879a7 AG |
138 | iter->addr = NULL; |
139 | iter->offset = 0; | |
140 | iter->pages++; | |
141 | } | |
142 | } | |
143 | } | |
616b757a | 144 | EXPORT_SYMBOL_GPL(rds_info_copy); |
a8c879a7 AG |
145 | |
146 | /* | |
147 | * @optval points to the userspace buffer that the information snapshot | |
148 | * will be copied into. | |
149 | * | |
150 | * @optlen on input is the size of the buffer in userspace. @optlen | |
151 | * on output is the size of the requested snapshot in bytes. | |
152 | * | |
153 | * This function returns -errno if there is a failure, particularly -ENOSPC | |
154 | * if the given userspace buffer was not large enough to fit the snapshot. | |
155 | * On success it returns the positive number of bytes of each array element | |
156 | * in the snapshot. | |
157 | */ | |
158 | int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, | |
159 | int __user *optlen) | |
160 | { | |
161 | struct rds_info_iterator iter; | |
162 | struct rds_info_lengths lens; | |
163 | unsigned long nr_pages = 0; | |
164 | unsigned long start; | |
165 | unsigned long i; | |
166 | rds_info_func func; | |
167 | struct page **pages = NULL; | |
168 | int ret; | |
169 | int len; | |
170 | int total; | |
171 | ||
172 | if (get_user(len, optlen)) { | |
173 | ret = -EFAULT; | |
174 | goto out; | |
175 | } | |
176 | ||
177 | /* check for all kinds of wrapping and the like */ | |
178 | start = (unsigned long)optval; | |
468b732b | 179 | if (len < 0 || len > INT_MAX - PAGE_SIZE + 1 || start + len < start) { |
a8c879a7 AG |
180 | ret = -EINVAL; |
181 | goto out; | |
182 | } | |
183 | ||
184 | /* a 0 len call is just trying to probe its length */ | |
185 | if (len == 0) | |
186 | goto call_func; | |
187 | ||
188 | nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK)) | |
189 | >> PAGE_SHIFT; | |
190 | ||
191 | pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); | |
8690bfa1 | 192 | if (!pages) { |
a8c879a7 AG |
193 | ret = -ENOMEM; |
194 | goto out; | |
195 | } | |
830eb7d5 | 196 | ret = get_user_pages_fast(start, nr_pages, 1, pages); |
a8c879a7 AG |
197 | if (ret != nr_pages) { |
198 | if (ret > 0) | |
199 | nr_pages = ret; | |
200 | else | |
201 | nr_pages = 0; | |
202 | ret = -EAGAIN; /* XXX ? */ | |
203 | goto out; | |
204 | } | |
205 | ||
206 | rdsdebug("len %d nr_pages %lu\n", len, nr_pages); | |
207 | ||
208 | call_func: | |
209 | func = rds_info_funcs[optname - RDS_INFO_FIRST]; | |
8690bfa1 | 210 | if (!func) { |
a8c879a7 AG |
211 | ret = -ENOPROTOOPT; |
212 | goto out; | |
213 | } | |
214 | ||
215 | iter.pages = pages; | |
216 | iter.addr = NULL; | |
217 | iter.offset = start & (PAGE_SIZE - 1); | |
218 | ||
219 | func(sock, len, &iter, &lens); | |
220 | BUG_ON(lens.each == 0); | |
221 | ||
222 | total = lens.nr * lens.each; | |
223 | ||
224 | rds_info_iter_unmap(&iter); | |
225 | ||
226 | if (total > len) { | |
227 | len = total; | |
228 | ret = -ENOSPC; | |
229 | } else { | |
230 | len = total; | |
231 | ret = lens.each; | |
232 | } | |
233 | ||
234 | if (put_user(len, optlen)) | |
235 | ret = -EFAULT; | |
236 | ||
237 | out: | |
8690bfa1 | 238 | for (i = 0; pages && i < nr_pages; i++) |
a8c879a7 AG |
239 | put_page(pages[i]); |
240 | kfree(pages); | |
241 | ||
242 | return ret; | |
243 | } |