]> git.proxmox.com Git - mirror_iproute2.git/blame - Patches/af_unix.dif
(Logical change 1.3)
[mirror_iproute2.git] / Patches / af_unix.dif
CommitLineData
aba5acdf
SH
1diff -ur ../vger3-011229/linux/net/unix/af_unix.c linux/net/unix/af_unix.c
2--- ../vger3-011229/linux/net/unix/af_unix.c Mon Dec 3 20:24:03 2001
3+++ linux/net/unix/af_unix.c Sat Jan 5 04:30:19 2002
4@@ -112,6 +112,7 @@
5 #include <asm/checksum.h>
6
7 int sysctl_unix_max_dgram_qlen = 10;
8+int sysctl_unix_stream_pages = MAX_SKB_FRAGS;
9
10 unix_socket *unix_socket_table[UNIX_HASH_SIZE+1];
11 rwlock_t unix_table_lock = RW_LOCK_UNLOCKED;
12@@ -1123,9 +1124,6 @@
13 struct scm_cookie scm;
14 memset(&scm, 0, sizeof(scm));
15 unix_detach_fds(&scm, skb);
16-
17- /* Alas, it calls VFS */
18- /* So fscking what? fput() had been SMP-safe since the last Summer */
19 scm_destroy(&scm);
20 sock_wfree(skb);
21 }
22@@ -1140,6 +1138,67 @@
23 scm->fp = NULL;
24 }
25
26+int datagram_copy_fromiovec(struct iovec *iov, struct sk_buff *skb, int size)
27+{
28+ struct sock *sk;
29+ struct sk_buff **tail, *skb1;
30+ int copy = min_t(int, size, skb_tailroom(skb));
31+
32+ if (memcpy_fromiovec(skb_put(skb, copy), iov, copy))
33+ goto do_fault;
34+
35+ if ((size -= copy) == 0)
36+ return 0;
37+
38+ sk = skb->sk;
39+ skb1 = skb;
40+ tail = &skb_shinfo(skb)->frag_list;
41+
42+ do {
43+ struct page *page;
44+ int i = skb_shinfo(skb1)->nr_frags;
45+
46+ if (i == MAX_SKB_FRAGS) {
47+ skb1 = alloc_skb(0, sk->allocation);
48+ if (skb1 == NULL)
49+ goto do_oom;
50+ *tail = skb1;
51+ tail = &skb1->next;
52+ i = 0;
53+ skb->truesize += skb1->truesize;
54+ atomic_add(skb1->truesize, &sk->wmem_alloc);
55+ }
56+
57+ page = alloc_pages(sk->allocation, 0);
58+ if (page == NULL)
59+ goto do_oom;
60+
61+ copy = min_t(int, size, PAGE_SIZE);
62+ skb_shinfo(skb1)->nr_frags=i+1;
63+ skb_shinfo(skb1)->frags[i].page = page;
64+ skb_shinfo(skb1)->frags[i].page_offset = 0;
65+ skb_shinfo(skb1)->frags[i].size = copy;
66+
67+ skb1->len += copy;
68+ skb1->data_len += copy;
69+ if (skb != skb1) {
70+ skb->len += copy;
71+ skb->data_len += copy;
72+ }
73+ skb->truesize += PAGE_SIZE;
74+ atomic_add(PAGE_SIZE, &sk->wmem_alloc);
75+ if (memcpy_fromiovec(page_address(page), iov, copy))
76+ goto do_fault;
77+ } while ((size -= copy) > 0);
78+ return 0;
79+
80+do_oom:
81+ return -ENOMEM;
82+
83+do_fault:
84+ return -EFAULT;
85+}
86+
87 /*
88 * Send AF_UNIX data.
89 */
90@@ -1155,6 +1214,7 @@
91 unsigned hash;
92 struct sk_buff *skb;
93 long timeo;
94+ int alloc;
95
96 err = -EOPNOTSUPP;
97 if (msg->msg_flags&MSG_OOB)
98@@ -1178,10 +1238,14 @@
99 goto out;
100
101 err = -EMSGSIZE;
102- if ((unsigned)len > sk->sndbuf - 32)
103+ if ((unsigned)len > sk->sndbuf)
104 goto out;
105
106- skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
107+ alloc = len;
108+ if (alloc > SKB_MAX_HEAD(0))
109+ alloc = SKB_MAX_HEAD(0);
110+
111+ skb = sock_alloc_send_skb(sk, alloc, msg->msg_flags&MSG_DONTWAIT, &err);
112 if (skb==NULL)
113 goto out;
114
115@@ -1190,7 +1254,7 @@
116 unix_attach_fds(scm, skb);
117
118 skb->h.raw = skb->data;
119- err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
120+ err = datagram_copy_fromiovec(msg->msg_iov, skb, len);
121 if (err)
122 goto out_free;
123
124@@ -1275,74 +1339,57 @@
125 return err;
126 }
127
128-
129 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len,
130 struct scm_cookie *scm)
131 {
132 struct sock *sk = sock->sk;
133 unix_socket *other = NULL;
134- struct sockaddr_un *sunaddr=msg->msg_name;
135- int err,size;
136 struct sk_buff *skb;
137+ int err;
138 int sent=0;
139
140 err = -EOPNOTSUPP;
141 if (msg->msg_flags&MSG_OOB)
142 goto out_err;
143
144- if (msg->msg_namelen) {
145- err = (sk->state==TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP);
146+ err = -ENOTCONN;
147+ other = unix_peer_get(sk);
148+ if (!other)
149 goto out_err;
150- } else {
151- sunaddr = NULL;
152- err = -ENOTCONN;
153- other = unix_peer_get(sk);
154- if (!other)
155- goto out_err;
156- }
157
158 if (sk->shutdown&SEND_SHUTDOWN)
159 goto pipe_err;
160
161- while(sent < len)
162- {
163- /*
164- * Optimisation for the fact that under 0.01% of X messages typically
165- * need breaking up.
166- */
167+ while(sent < len) {
168+ int size, alloc;
169
170- size=len-sent;
171+ size = len-sent;
172
173 /* Keep two messages in the pipe so it schedules better */
174- if (size > sk->sndbuf/2 - 64)
175- size = sk->sndbuf/2 - 64;
176+ if (size > sk->sndbuf/2)
177+ size = sk->sndbuf/2;
178
179- if (size > SKB_MAX_ALLOC)
180- size = SKB_MAX_ALLOC;
181-
182 /*
183 * Grab a buffer
184 */
185-
186- skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
187+ alloc = size;
188+
189+ if (size > SKB_MAX_HEAD(0)) {
190+ alloc = SKB_MAX_HEAD(0);
191+ if (size > alloc + sysctl_unix_stream_pages*PAGE_SIZE)
192+ size = alloc + sysctl_unix_stream_pages*PAGE_SIZE;
193+ }
194+
195+ skb=sock_alloc_send_skb(sk,alloc,msg->msg_flags&MSG_DONTWAIT, &err);
196
197 if (skb==NULL)
198 goto out_err;
199
200- /*
201- * If you pass two values to the sock_alloc_send_skb
202- * it tries to grab the large buffer with GFP_NOFS
203- * (which can fail easily), and if it fails grab the
204- * fallback size buffer which is under a page and will
205- * succeed. [Alan]
206- */
207- size = min_t(int, size, skb_tailroom(skb));
208-
209 memcpy(UNIXCREDS(skb), &scm->creds, sizeof(struct ucred));
210 if (scm->fp)
211 unix_attach_fds(scm, skb);
212
213- if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
214+ if ((err = datagram_copy_fromiovec(msg->msg_iov, skb, size)) != 0) {
215 kfree_skb(skb);
216 goto out_err;
217 }
218@@ -1418,13 +1465,10 @@
219
220 scm->creds = *UNIXCREDS(skb);
221
222- if (!(flags & MSG_PEEK))
223- {
224+ if (!(flags & MSG_PEEK)) {
225 if (UNIXCB(skb).fp)
226 unix_detach_fds(scm, skb);
227- }
228- else
229- {
230+ } else {
231 /* It is questionable: on PEEK we could:
232 - do not return fds - good, but too simple 8)
233 - return fds, and do not return them on read (old strategy,
234@@ -1483,13 +1527,10 @@
235 return timeo;
236 }
237
238-
239-
240 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size,
241 int flags, struct scm_cookie *scm)
242 {
243 struct sock *sk = sock->sk;
244- struct sockaddr_un *sunaddr=msg->msg_name;
245 int copied = 0;
246 int check_creds = 0;
247 int target;
248@@ -1515,21 +1556,18 @@
249
250 down(&sk->protinfo.af_unix.readsem);
251
252- do
253- {
254+ do {
255 int chunk;
256 struct sk_buff *skb;
257
258 skb=skb_dequeue(&sk->receive_queue);
259- if (skb==NULL)
260- {
261+ if (skb==NULL) {
262 if (copied >= target)
263 break;
264
265 /*
266 * POSIX 1003.1g mandates this order.
267 */
268-
269 if ((err = sock_error(sk)) != 0)
270 break;
271 if (sk->shutdown & RCV_SHUTDOWN)
272@@ -1551,60 +1589,44 @@
273
274 if (check_creds) {
275 /* Never glue messages from different writers */
276- if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0) {
277- skb_queue_head(&sk->receive_queue, skb);
278- break;
279- }
280+ if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0)
281+ goto out_put_back;
282 } else {
283 /* Copy credentials */
284 scm->creds = *UNIXCREDS(skb);
285 check_creds = 1;
286 }
287
288- /* Copy address just once */
289- if (sunaddr)
290- {
291- unix_copy_addr(msg, skb->sk);
292- sunaddr = NULL;
293- }
294+ chunk = min_t(int, skb->len - sk->protinfo.af_unix.copied, size);
295+ err = skb_copy_datagram_iovec(skb, sk->protinfo.af_unix.copied, msg->msg_iov, chunk);
296+ if (err)
297+ goto out_put_back;
298
299- chunk = min_t(unsigned int, skb->len, size);
300- if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
301- skb_queue_head(&sk->receive_queue, skb);
302- if (copied == 0)
303- copied = -EFAULT;
304- break;
305- }
306 copied += chunk;
307 size -= chunk;
308
309 /* Mark read part of skb as used */
310- if (!(flags & MSG_PEEK))
311- {
312- skb_pull(skb, chunk);
313-
314+ if (!(flags & MSG_PEEK)) {
315 if (UNIXCB(skb).fp)
316 unix_detach_fds(scm, skb);
317
318 /* put the skb back if we didn't use it up.. */
319- if (skb->len)
320- {
321- skb_queue_head(&sk->receive_queue, skb);
322- break;
323- }
324+ if ((sk->protinfo.af_unix.copied += chunk) < skb->len)
325+ goto out_put_back;
326+
327+ sk->protinfo.af_unix.copied = 0;
328
329 kfree_skb(skb);
330
331 if (scm->fp)
332 break;
333- }
334- else
335- {
336+ } else {
337 /* It is questionable, see note in unix_dgram_recvmsg.
338 */
339 if (UNIXCB(skb).fp)
340 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
341
342+out_put_back:
343 /* put message back and return */
344 skb_queue_head(&sk->receive_queue, skb);
345 break;
346@@ -1676,10 +1698,12 @@
347 break;
348 }
349
350+ down(&sk->protinfo.af_unix.readsem);
351 spin_lock(&sk->receive_queue.lock);
352 if((skb=skb_peek(&sk->receive_queue))!=NULL)
353- amount=skb->len;
354+ amount=skb->len - sk->protinfo.af_unix.copied;
355 spin_unlock(&sk->receive_queue.lock);
356+ up(&sk->protinfo.af_unix.readsem);
357 err = put_user(amount, (int *)arg);
358 break;
359 }
360@@ -1734,7 +1758,7 @@
361 int i;
362 unix_socket *s;
363
364- len+= sprintf(buffer,"Num RefCount Protocol Flags Type St "
365+ len+= sprintf(buffer,"Peer RcvQueue WMem Flags Type St "
366 "Inode Path\n");
367
368 read_lock(&unix_table_lock);
369@@ -1742,10 +1766,10 @@
370 {
371 unix_state_rlock(s);
372
373- len+=sprintf(buffer+len,"%p: %08X %08X %08X %04X %02X %5ld",
374- s,
375- atomic_read(&s->refcnt),
376- 0,
377+ len+=sprintf(buffer+len,"%08lX: %08X %08X %08X %04X %02X %5ld",
378+ unix_peer(s) ? sock_i_ino(unix_peer(s)) : 0,
379+ skb_queue_len(&s->receive_queue),
380+ atomic_read(&s->wmem_alloc),
381 s->state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
382 s->type,
383 s->socket ?
384diff -ur ../vger3-011229/linux/net/unix/sysctl_net_unix.c linux/net/unix/sysctl_net_unix.c
385--- ../vger3-011229/linux/net/unix/sysctl_net_unix.c Tue Jan 30 21:20:16 2001
386+++ linux/net/unix/sysctl_net_unix.c Sat Jan 5 04:10:58 2002
387@@ -13,10 +13,14 @@
388 #include <linux/sysctl.h>
389
390 extern int sysctl_unix_max_dgram_qlen;
391+extern int sysctl_unix_stream_pages;
392
393 ctl_table unix_table[] = {
394 {NET_UNIX_MAX_DGRAM_QLEN, "max_dgram_qlen",
395 &sysctl_unix_max_dgram_qlen, sizeof(int), 0600, NULL,
396+ &proc_dointvec },
397+ {NET_UNIX_STREAM_PAGES, "stream_pages",
398+ &sysctl_unix_stream_pages, sizeof(int), 0600, NULL,
399 &proc_dointvec },
400 {0}
401 };