net/core/sock.c

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Generic socket support routines. Memory allocators, socket lock/release
   7  *              handler for protocols to use and generic option handler.
   8  *
   9  *
  10  * Version:     $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
  11  *
  12  * Authors:     Ross Biro
  13  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Alan Cox, <A.Cox@swansea.ac.uk>
  16  *
  17  * Fixes:
  18  *              Alan Cox        :       Numerous verify_area() problems
  19  *              Alan Cox        :       Connecting on a connecting socket
  20  *                                      now returns an error for tcp.
  21  *              Alan Cox        :       sock->protocol is set correctly.
  22  *                                      and is not sometimes left as 0.
  23  *              Alan Cox        :       connect handles icmp errors on a
  24  *                                      connect properly. Unfortunately there
  25  *                                      is a restart syscall nasty there. I
  26  *                                      can't match BSD without hacking the C
  27  *                                      library. Ideas urgently sought!
  28  *              Alan Cox        :       Disallow bind() to addresses that are
  29  *                                      not ours - especially broadcast ones!!
  30  *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
  31  *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
  32  *                                      instead they leave that for the DESTROY timer.
  33  *              Alan Cox        :       Clean up error flag in accept
  34  *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
  35  *                                      was buggy. Put a remove_sock() in the handler
  36  *                                      for memory when we hit 0. Also altered the timer
  37  *                                      code. The ACK stuff can wait and needs major
  38  *                                      TCP layer surgery.
  39  *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
  40  *                                      and fixed timer/inet_bh race.
  41  *              Alan Cox        :       Added zapped flag for TCP
  42  *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
  43  *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
  44  *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
  45  *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
  46  *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
  47  *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
  48  *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
  49  *      Pauline Middelink       :       identd support
  50  *              Alan Cox        :       Fixed connect() taking signals I think.
  51  *              Alan Cox        :       SO_LINGER supported
  52  *              Alan Cox        :       Error reporting fixes
  53  *              Anonymous       :       inet_create tidied up (sk->reuse setting)
  54  *              Alan Cox        :       inet sockets don't set sk->type!
  55  *              Alan Cox        :       Split socket option code
  56  *              Alan Cox        :       Callbacks
  57  *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
  58  *              Alex            :       Removed restriction on inet fioctl
  59  *              Alan Cox        :       Splitting INET from NET core
  60  *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
  61  *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
  62  *              Alan Cox        :       Split IP from generic code
  63  *              Alan Cox        :       New kfree_skbmem()
  64  *              Alan Cox        :       Make SO_DEBUG superuser only.
  65  *              Alan Cox        :       Allow anyone to clear SO_DEBUG
  66  *                                      (compatibility fix)
  67  *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
  68  *              Alan Cox        :       Allocator for a socket is settable.
  69  *              Alan Cox        :       SO_ERROR includes soft errors.
  70  *              Alan Cox        :       Allow NULL arguments on some SO_ opts
  71  *              Alan Cox        :       Generic socket allocation to make hooks
  72  *                                      easier (suggested by Craig Metz).
  73  *              Michael Pall    :       SO_ERROR returns positive errno again
  74  *              Steve Whitehouse:       Added default destructor to free
  75  *                                      protocol private data.
  76  *              Steve Whitehouse:       Added various other default routines
  77  *                                      common to several socket families.
  78  *              Chris Evans     :       Call suser() check last on F_SETOWN
  79  *              Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
  80  *              Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
  81  *              Andi Kleen      :       Fix write_space callback
  82  *              Chris Evans     :       Security fixes - signedness again
  83  *              Arnaldo C. Melo :       cleanups, use skb_queue_purge
  84  *
  85  * To Fix:
  86  *
  87  *
  88  *              This program is free software; you can redistribute it and/or
  89  *              modify it under the terms of the GNU General Public License
  90  *              as published by the Free Software Foundation; either version
  91  *              2 of the License, or (at your option) any later version.
  92  */
  93
  94 #include <linux/capability.h>
  95 #include <linux/errno.h>
  96 #include <linux/types.h>
  97 #include <linux/socket.h>
  98 #include <linux/in.h>
  99 #include <linux/kernel.h>
 100 #include <linux/module.h>
 101 #include <linux/proc_fs.h>
 102 #include <linux/seq_file.h>
 103 #include <linux/sched.h>
 104 #include <linux/timer.h>
 105 #include <linux/string.h>
 106 #include <linux/sockios.h>
 107 #include <linux/net.h>
 108 #include <linux/mm.h>
 109 #include <linux/slab.h>
 110 #include <linux/interrupt.h>
 111 #include <linux/poll.h>
 112 #include <linux/tcp.h>
 113 #include <linux/init.h>
 114 #include <linux/highmem.h>
 115
 116 #include <asm/uaccess.h>
 117 #include <asm/system.h>
 118
 119 #include <linux/netdevice.h>
 120 #include <net/protocol.h>
 121 #include <linux/skbuff.h>
 122 #include <net/request_sock.h>
 123 #include <net/sock.h>
 124 #include <net/xfrm.h>
 125 #include <linux/ipsec.h>
 126
 127 #include <linux/filter.h>
 128
 129 #ifdef CONFIG_INET
 130 #include <net/tcp.h>
 131 #endif
 132
 133 /*
 134  * Each address family might have different locking rules, so we have
 135  * one slock key per address family:
 136  */
 137 static struct lock_class_key af_family_keys[AF_MAX];
 138 static struct lock_class_key af_family_slock_keys[AF_MAX];
 139
 140 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 141 /*
 142  * Make lock validator output more readable. (we pre-construct these
 143  * strings build-time, so that runtime initialization of socket
 144  * locks is fast):
 145  */
 146 static const char *af_family_key_strings[AF_MAX+1] = {
 147   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
 148   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
 149   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
 150   "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
 151   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
 152   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
 153   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
 154   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
 155   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
 156   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
 157   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
 158   "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
 159 };
 160 static const char *af_family_slock_key_strings[AF_MAX+1] = {
 161   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
 162   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
 163   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
 164   "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
 165   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
 166   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
 167   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
 168   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
 169   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
 170   "slock-27"       , "slock-28"          , "slock-29"          ,
 171   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
 172   "slock-AF_RXRPC" , "slock-AF_MAX"
 173 };
 174 static const char *af_family_clock_key_strings[AF_MAX+1] = {
 175   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
 176   "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
 177   "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
 178   "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
 179   "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
 180   "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
 181   "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
 182   "clock-21"       , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
 183   "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
 184   "clock-27"       , "clock-28"          , "clock-29"          ,
 185   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_MAX"
 186 };
 187 #endif
 188
 189 /*
 190  * sk_callback_lock locking rules are per-address-family,
 191  * so split the lock classes by using a per-AF key:
 192  */
 193 static struct lock_class_key af_callback_keys[AF_MAX];
 194
 195 /* Take into consideration the size of the struct sk_buff overhead in the
 196  * determination of these values, since that is non-constant across
 197  * platforms.  This makes socket queueing behavior and performance
 198  * not depend upon such differences.
 199  */
 200 #define _SK_MEM_PACKETS         256
 201 #define _SK_MEM_OVERHEAD        (sizeof(struct sk_buff) + 256)
 202 #define SK_WMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 203 #define SK_RMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 204
 205 /* Run time adjustable parameters. */
 206 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
 207 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
 208 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
 209 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 210
 211 /* Maximal space eaten by iovec or ancilliary data plus some space */
 212 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 213
 214 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 215 {
 216         struct timeval tv;
 217
 218         if (optlen < sizeof(tv))
 219                 return -EINVAL;
 220         if (copy_from_user(&tv, optval, sizeof(tv)))
 221                 return -EFAULT;
 222         if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
 223                 return -EDOM;
 224
 225         if (tv.tv_sec < 0) {
 226                 static int warned __read_mostly;
 227
 228                 *timeo_p = 0;
 229                 if (warned < 10 && net_ratelimit())
 230                         warned++;
 231                         printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
 232                                "tries to set negative timeout\n",
 233                                 current->comm, current->pid);
 234                 return 0;
 235         }
 236         *timeo_p = MAX_SCHEDULE_TIMEOUT;
 237         if (tv.tv_sec == 0 && tv.tv_usec == 0)
 238                 return 0;
 239         if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
 240                 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
 241         return 0;
 242 }
 243
 244 static void sock_warn_obsolete_bsdism(const char *name)
 245 {
 246         static int warned;
 247         static char warncomm[TASK_COMM_LEN];
 248         if (strcmp(warncomm, current->comm) && warned < 5) {
 249                 strcpy(warncomm,  current->comm);
 250                 printk(KERN_WARNING "process `%s' is using obsolete "
 251                        "%s SO_BSDCOMPAT\n", warncomm, name);
 252                 warned++;
 253         }
 254 }
 255
 256 static void sock_disable_timestamp(struct sock *sk)
 257 {
 258         if (sock_flag(sk, SOCK_TIMESTAMP)) {
 259                 sock_reset_flag(sk, SOCK_TIMESTAMP);
 260                 net_disable_timestamp();
 261         }
 262 }
 263
 264
 265 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 266 {
 267         int err = 0;
 268         int skb_len;
 269
 270         /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
 271            number of warnings when compiling with -W --ANK
 272          */
 273         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
 274             (unsigned)sk->sk_rcvbuf) {
 275                 err = -ENOMEM;
 276                 goto out;
 277         }
 278
 279         err = sk_filter(sk, skb);
 280         if (err)
 281                 goto out;
 282
 283         skb->dev = NULL;
 284         skb_set_owner_r(skb, sk);
 285
 286         /* Cache the SKB length before we tack it onto the receive
 287          * queue.  Once it is added it no longer belongs to us and
 288          * may be freed by other threads of control pulling packets
 289          * from the queue.
 290          */
 291         skb_len = skb->len;
 292
 293         skb_queue_tail(&sk->sk_receive_queue, skb);
 294
 295         if (!sock_flag(sk, SOCK_DEAD))
 296                 sk->sk_data_ready(sk, skb_len);
 297 out:
 298         return err;
 299 }
 300 EXPORT_SYMBOL(sock_queue_rcv_skb);
 301
 302 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 303 {
 304         int rc = NET_RX_SUCCESS;
 305
 306         if (sk_filter(sk, skb))
 307                 goto discard_and_relse;
 308
 309         skb->dev = NULL;
 310
 311         if (nested)
 312                 bh_lock_sock_nested(sk);
 313         else
 314                 bh_lock_sock(sk);
 315         if (!sock_owned_by_user(sk)) {
 316                 /*
 317                  * trylock + unlock semantics:
 318                  */
 319                 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
 320
 321                 rc = sk->sk_backlog_rcv(sk, skb);
 322
 323                 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
 324         } else
 325                 sk_add_backlog(sk, skb);
 326         bh_unlock_sock(sk);
 327 out:
 328         sock_put(sk);
 329         return rc;
 330 discard_and_relse:
 331         kfree_skb(skb);
 332         goto out;
 333 }
 334 EXPORT_SYMBOL(sk_receive_skb);
 335
 336 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
 337 {
 338         struct dst_entry *dst = sk->sk_dst_cache;
 339
 340         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
 341                 sk->sk_dst_cache = NULL;
 342                 dst_release(dst);
 343                 return NULL;
 344         }
 345
 346         return dst;
 347 }
 348 EXPORT_SYMBOL(__sk_dst_check);
 349
 350 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
 351 {
 352         struct dst_entry *dst = sk_dst_get(sk);
 353
 354         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
 355                 sk_dst_reset(sk);
 356                 dst_release(dst);
 357                 return NULL;
 358         }
 359
 360         return dst;
 361 }
 362 EXPORT_SYMBOL(sk_dst_check);
 363
 364 /*
 365  *      This is meant for all protocols to use and covers goings on
 366  *      at the socket level. Everything here is generic.
 367  */
 368
 369 int sock_setsockopt(struct socket *sock, int level, int optname,
 370                     char __user *optval, int optlen)
 371 {
 372         struct sock *sk=sock->sk;
 373         struct sk_filter *filter;
 374         int val;
 375         int valbool;
 376         struct linger ling;
 377         int ret = 0;
 378
 379         /*
 380          *      Options without arguments
 381          */
 382
 383 #ifdef SO_DONTLINGER            /* Compatibility item... */
 384         if (optname == SO_DONTLINGER) {
 385                 lock_sock(sk);
 386                 sock_reset_flag(sk, SOCK_LINGER);
 387                 release_sock(sk);
 388                 return 0;
 389         }
 390 #endif
 391
 392         if (optlen < sizeof(int))
 393                 return -EINVAL;
 394
 395         if (get_user(val, (int __user *)optval))
 396                 return -EFAULT;
 397
 398         valbool = val?1:0;
 399
 400         lock_sock(sk);
 401
 402         switch(optname) {
 403         case SO_DEBUG:
 404                 if (val && !capable(CAP_NET_ADMIN)) {
 405                         ret = -EACCES;
 406                 }
 407                 else if (valbool)
 408                         sock_set_flag(sk, SOCK_DBG);
 409                 else
 410                         sock_reset_flag(sk, SOCK_DBG);
 411                 break;
 412         case SO_REUSEADDR:
 413                 sk->sk_reuse = valbool;
 414                 break;
 415         case SO_TYPE:
 416         case SO_ERROR:
 417                 ret = -ENOPROTOOPT;
 418                 break;
 419         case SO_DONTROUTE:
 420                 if (valbool)
 421                         sock_set_flag(sk, SOCK_LOCALROUTE);
 422                 else
 423                         sock_reset_flag(sk, SOCK_LOCALROUTE);
 424                 break;
 425         case SO_BROADCAST:
 426                 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
 427                 break;
 428         case SO_SNDBUF:
 429                 /* Don't error on this BSD doesn't and if you think
 430                    about it this is right. Otherwise apps have to
 431                    play 'guess the biggest size' games. RCVBUF/SNDBUF
 432                    are treated in BSD as hints */
 433
 434                 if (val > sysctl_wmem_max)
 435                         val = sysctl_wmem_max;
 436 set_sndbuf:
 437                 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
 438                 if ((val * 2) < SOCK_MIN_SNDBUF)
 439                         sk->sk_sndbuf = SOCK_MIN_SNDBUF;
 440                 else
 441                         sk->sk_sndbuf = val * 2;
 442
 443                 /*
 444                  *      Wake up sending tasks if we
 445                  *      upped the value.
 446                  */
 447                 sk->sk_write_space(sk);
 448                 break;
 449
 450         case SO_SNDBUFFORCE:
 451                 if (!capable(CAP_NET_ADMIN)) {
 452                         ret = -EPERM;
 453                         break;
 454                 }
 455                 goto set_sndbuf;
 456
 457         case SO_RCVBUF:
 458                 /* Don't error on this BSD doesn't and if you think
 459                    about it this is right. Otherwise apps have to
 460                    play 'guess the biggest size' games. RCVBUF/SNDBUF
 461                    are treated in BSD as hints */
 462
 463                 if (val > sysctl_rmem_max)
 464                         val = sysctl_rmem_max;
 465 set_rcvbuf:
 466                 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
 467                 /*
 468                  * We double it on the way in to account for
 469                  * "struct sk_buff" etc. overhead.   Applications
 470                  * assume that the SO_RCVBUF setting they make will
 471                  * allow that much actual data to be received on that
 472                  * socket.
 473                  *
 474                  * Applications are unaware that "struct sk_buff" and
 475                  * other overheads allocate from the receive buffer
 476                  * during socket buffer allocation.
 477                  *
 478                  * And after considering the possible alternatives,
 479                  * returning the value we actually used in getsockopt
 480                  * is the most desirable behavior.
 481                  */
 482                 if ((val * 2) < SOCK_MIN_RCVBUF)
 483                         sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
 484                 else
 485                         sk->sk_rcvbuf = val * 2;
 486                 break;
 487
 488         case SO_RCVBUFFORCE:
 489                 if (!capable(CAP_NET_ADMIN)) {
 490                         ret = -EPERM;
 491                         break;
 492                 }
 493                 goto set_rcvbuf;
 494
 495         case SO_KEEPALIVE:
 496 #ifdef CONFIG_INET
 497                 if (sk->sk_protocol == IPPROTO_TCP)
 498                         tcp_set_keepalive(sk, valbool);
 499 #endif
 500                 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
 501                 break;
 502
 503         case SO_OOBINLINE:
 504                 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
 505                 break;
 506
 507         case SO_NO_CHECK:
 508                 sk->sk_no_check = valbool;
 509                 break;
 510
 511         case SO_PRIORITY:
 512                 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
 513                         sk->sk_priority = val;
 514                 else
 515                         ret = -EPERM;
 516                 break;
 517
 518         case SO_LINGER:
 519                 if (optlen < sizeof(ling)) {
 520                         ret = -EINVAL;  /* 1003.1g */
 521                         break;
 522                 }
 523                 if (copy_from_user(&ling,optval,sizeof(ling))) {
 524                         ret = -EFAULT;
 525                         break;
 526                 }
 527                 if (!ling.l_onoff)
 528                         sock_reset_flag(sk, SOCK_LINGER);
 529                 else {
 530 #if (BITS_PER_LONG == 32)
 531                         if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
 532                                 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
 533                         else
 534 #endif
 535                                 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
 536                         sock_set_flag(sk, SOCK_LINGER);
 537                 }
 538                 break;
 539
 540         case SO_BSDCOMPAT:
 541                 sock_warn_obsolete_bsdism("setsockopt");
 542                 break;
 543
 544         case SO_PASSCRED:
 545                 if (valbool)
 546                         set_bit(SOCK_PASSCRED, &sock->flags);
 547                 else
 548                         clear_bit(SOCK_PASSCRED, &sock->flags);
 549                 break;
 550
 551         case SO_TIMESTAMP:
 552         case SO_TIMESTAMPNS:
 553                 if (valbool)  {
 554                         if (optname == SO_TIMESTAMP)
 555                                 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
 556                         else
 557                                 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
 558                         sock_set_flag(sk, SOCK_RCVTSTAMP);
 559                         sock_enable_timestamp(sk);
 560                 } else {
 561                         sock_reset_flag(sk, SOCK_RCVTSTAMP);
 562                         sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
 563                 }
 564                 break;
 565
 566         case SO_RCVLOWAT:
 567                 if (val < 0)
 568                         val = INT_MAX;
 569                 sk->sk_rcvlowat = val ? : 1;
 570                 break;
 571
 572         case SO_RCVTIMEO:
 573                 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
 574                 break;
 575
 576         case SO_SNDTIMEO:
 577                 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
 578                 break;
 579
 580 #ifdef CONFIG_NETDEVICES
 581         case SO_BINDTODEVICE:
 582         {
 583                 char devname[IFNAMSIZ];
 584
 585                 /* Sorry... */
 586                 if (!capable(CAP_NET_RAW)) {
 587                         ret = -EPERM;
 588                         break;
 589                 }
 590
 591                 /* Bind this socket to a particular device like "eth0",
 592                  * as specified in the passed interface name. If the
 593                  * name is "" or the option length is zero the socket
 594                  * is not bound.
 595                  */
 596
 597                 if (!valbool) {
 598                         sk->sk_bound_dev_if = 0;
 599                 } else {
 600                         if (optlen > IFNAMSIZ - 1)
 601                                 optlen = IFNAMSIZ - 1;
 602                         memset(devname, 0, sizeof(devname));
 603                         if (copy_from_user(devname, optval, optlen)) {
 604                                 ret = -EFAULT;
 605                                 break;
 606                         }
 607
 608                         /* Remove any cached route for this socket. */
 609                         sk_dst_reset(sk);
 610
 611                         if (devname[0] == '\0') {
 612                                 sk->sk_bound_dev_if = 0;
 613                         } else {
 614                                 struct net_device *dev = dev_get_by_name(devname);
 615                                 if (!dev) {
 616                                         ret = -ENODEV;
 617                                         break;
 618                                 }
 619                                 sk->sk_bound_dev_if = dev->ifindex;
 620                                 dev_put(dev);
 621                         }
 622                 }
 623                 break;
 624         }
 625 #endif
 626
 627
 628         case SO_ATTACH_FILTER:
 629                 ret = -EINVAL;
 630                 if (optlen == sizeof(struct sock_fprog)) {
 631                         struct sock_fprog fprog;
 632
 633                         ret = -EFAULT;
 634                         if (copy_from_user(&fprog, optval, sizeof(fprog)))
 635                                 break;
 636
 637                         ret = sk_attach_filter(&fprog, sk);
 638                 }
 639                 break;
 640
 641         case SO_DETACH_FILTER:
 642                 rcu_read_lock_bh();
 643                 filter = rcu_dereference(sk->sk_filter);
 644                 if (filter) {
 645                         rcu_assign_pointer(sk->sk_filter, NULL);
 646                         sk_filter_release(sk, filter);
 647                         rcu_read_unlock_bh();
 648                         break;
 649                 }
 650                 rcu_read_unlock_bh();
 651                 ret = -ENONET;
 652                 break;
 653
 654         case SO_PASSSEC:
 655                 if (valbool)
 656                         set_bit(SOCK_PASSSEC, &sock->flags);
 657                 else
 658                         clear_bit(SOCK_PASSSEC, &sock->flags);
 659                 break;
 660
 661                 /* We implement the SO_SNDLOWAT etc to
 662                    not be settable (1003.1g 5.3) */
 663         default:
 664                 ret = -ENOPROTOOPT;
 665                 break;
 666         }
 667         release_sock(sk);
 668         return ret;
 669 }
 670
 671
 672 int sock_getsockopt(struct socket *sock, int level, int optname,
 673                     char __user *optval, int __user *optlen)
 674 {
 675         struct sock *sk = sock->sk;
 676
 677         union {
 678                 int val;
 679                 struct linger ling;
 680                 struct timeval tm;
 681         } v;
 682
 683         unsigned int lv = sizeof(int);
 684         int len;
 685
 686         if (get_user(len, optlen))
 687                 return -EFAULT;
 688         if (len < 0)
 689                 return -EINVAL;
 690
 691         switch(optname) {
 692         case SO_DEBUG:
 693                 v.val = sock_flag(sk, SOCK_DBG);
 694                 break;
 695
 696         case SO_DONTROUTE:
 697                 v.val = sock_flag(sk, SOCK_LOCALROUTE);
 698                 break;
 699
 700         case SO_BROADCAST:
 701                 v.val = !!sock_flag(sk, SOCK_BROADCAST);
 702                 break;
 703
 704         case SO_SNDBUF:
 705                 v.val = sk->sk_sndbuf;
 706                 break;
 707
 708         case SO_RCVBUF:
 709                 v.val = sk->sk_rcvbuf;
 710                 break;
 711
 712         case SO_REUSEADDR:
 713                 v.val = sk->sk_reuse;
 714                 break;
 715
 716         case SO_KEEPALIVE:
 717                 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
 718                 break;
 719
 720         case SO_TYPE:
 721                 v.val = sk->sk_type;
 722                 break;
 723
 724         case SO_ERROR:
 725                 v.val = -sock_error(sk);
 726                 if (v.val==0)
 727                         v.val = xchg(&sk->sk_err_soft, 0);
 728                 break;
 729
 730         case SO_OOBINLINE:
 731                 v.val = !!sock_flag(sk, SOCK_URGINLINE);
 732                 break;
 733
 734         case SO_NO_CHECK:
 735                 v.val = sk->sk_no_check;
 736                 break;
 737
 738         case SO_PRIORITY:
 739                 v.val = sk->sk_priority;
 740                 break;
 741
 742         case SO_LINGER:
 743                 lv              = sizeof(v.ling);
 744                 v.ling.l_onoff  = !!sock_flag(sk, SOCK_LINGER);
 745                 v.ling.l_linger = sk->sk_lingertime / HZ;
 746                 break;
 747
 748         case SO_BSDCOMPAT:
 749                 sock_warn_obsolete_bsdism("getsockopt");
 750                 break;
 751
 752         case SO_TIMESTAMP:
 753                 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
 754                                 !sock_flag(sk, SOCK_RCVTSTAMPNS);
 755                 break;
 756
 757         case SO_TIMESTAMPNS:
 758                 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
 759                 break;
 760
 761         case SO_RCVTIMEO:
 762                 lv=sizeof(struct timeval);
 763                 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
 764                         v.tm.tv_sec = 0;
 765                         v.tm.tv_usec = 0;
 766                 } else {
 767                         v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
 768                         v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
 769                 }
 770                 break;
 771
 772         case SO_SNDTIMEO:
 773                 lv=sizeof(struct timeval);
 774                 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
 775                         v.tm.tv_sec = 0;
 776                         v.tm.tv_usec = 0;
 777                 } else {
 778                         v.tm.tv_sec = sk->sk_sndtimeo / HZ;
 779                         v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
 780                 }
 781                 break;
 782
 783         case SO_RCVLOWAT:
 784                 v.val = sk->sk_rcvlowat;
 785                 break;
 786
 787         case SO_SNDLOWAT:
 788                 v.val=1;
 789                 break;
 790
 791         case SO_PASSCRED:
 792                 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
 793                 break;
 794
 795         case SO_PEERCRED:
 796                 if (len > sizeof(sk->sk_peercred))
 797                         len = sizeof(sk->sk_peercred);
 798                 if (copy_to_user(optval, &sk->sk_peercred, len))
 799                         return -EFAULT;
 800                 goto lenout;
 801
 802         case SO_PEERNAME:
 803         {
 804                 char address[128];
 805
 806                 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
 807                         return -ENOTCONN;
 808                 if (lv < len)
 809                         return -EINVAL;
 810                 if (copy_to_user(optval, address, len))
 811                         return -EFAULT;
 812                 goto lenout;
 813         }
 814
 815         /* Dubious BSD thing... Probably nobody even uses it, but
 816          * the UNIX standard wants it for whatever reason... -DaveM
 817          */
 818         case SO_ACCEPTCONN:
 819                 v.val = sk->sk_state == TCP_LISTEN;
 820                 break;
 821
 822         case SO_PASSSEC:
 823                 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
 824                 break;
 825
 826         case SO_PEERSEC:
 827                 return security_socket_getpeersec_stream(sock, optval, optlen, len);
 828
 829         default:
 830                 return -ENOPROTOOPT;
 831         }
 832
 833         if (len > lv)
 834                 len = lv;
 835         if (copy_to_user(optval, &v, len))
 836                 return -EFAULT;
 837 lenout:
 838         if (put_user(len, optlen))
 839                 return -EFAULT;
 840         return 0;
 841 }
 842
 843 /*
 844  * Initialize an sk_lock.
 845  *
 846  * (We also register the sk_lock with the lock validator.)
 847  */
 848 static inline void sock_lock_init(struct sock *sk)
 849 {
 850         sock_lock_init_class_and_name(sk,
 851                         af_family_slock_key_strings[sk->sk_family],
 852                         af_family_slock_keys + sk->sk_family,
 853                         af_family_key_strings[sk->sk_family],
 854                         af_family_keys + sk->sk_family);
 855 }
 856
 857 /**
 858  *      sk_alloc - All socket objects are allocated here
 859  *      @family: protocol family
 860  *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
 861  *      @prot: struct proto associated with this new sock instance
 862  *      @zero_it: if we should zero the newly allocated sock
 863  */
 864 struct sock *sk_alloc(int family, gfp_t priority,
 865                       struct proto *prot, int zero_it)
 866 {
 867         struct sock *sk = NULL;
 868         struct kmem_cache *slab = prot->slab;
 869
 870         if (slab != NULL)
 871                 sk = kmem_cache_alloc(slab, priority);
 872         else
 873                 sk = kmalloc(prot->obj_size, priority);
 874
 875         if (sk) {
 876                 if (zero_it) {
 877                         memset(sk, 0, prot->obj_size);
 878                         sk->sk_family = family;
 879                         /*
 880                          * See comment in struct sock definition to understand
 881                          * why we need sk_prot_creator -acme
 882                          */
 883                         sk->sk_prot = sk->sk_prot_creator = prot;
 884                         sock_lock_init(sk);
 885                 }
 886
 887                 if (security_sk_alloc(sk, family, priority))
 888                         goto out_free;
 889
 890                 if (!try_module_get(prot->owner))
 891                         goto out_free;
 892         }
 893         return sk;
 894
 895 out_free:
 896         if (slab != NULL)
 897                 kmem_cache_free(slab, sk);
 898         else
 899                 kfree(sk);
 900         return NULL;
 901 }
 902
 903 void sk_free(struct sock *sk)
 904 {
 905         struct sk_filter *filter;
 906         struct module *owner = sk->sk_prot_creator->owner;
 907
 908         if (sk->sk_destruct)
 909                 sk->sk_destruct(sk);
 910
 911         filter = rcu_dereference(sk->sk_filter);
 912         if (filter) {
 913                 sk_filter_release(sk, filter);
 914                 rcu_assign_pointer(sk->sk_filter, NULL);
 915         }
 916
 917         sock_disable_timestamp(sk);
 918
 919         if (atomic_read(&sk->sk_omem_alloc))
 920                 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
 921                        __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
 922
 923         security_sk_free(sk);
 924         if (sk->sk_prot_creator->slab != NULL)
 925                 kmem_cache_free(sk->sk_prot_creator->slab, sk);
 926         else
 927                 kfree(sk);
 928         module_put(owner);
 929 }
 930
 931 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 932 {
 933         struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
 934
 935         if (newsk != NULL) {
 936                 struct sk_filter *filter;
 937
 938                 sock_copy(newsk, sk);
 939
 940                 /* SANITY */
 941                 sk_node_init(&newsk->sk_node);
 942                 sock_lock_init(newsk);
 943                 bh_lock_sock(newsk);
 944                 newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
 945
 946                 atomic_set(&newsk->sk_rmem_alloc, 0);
 947                 atomic_set(&newsk->sk_wmem_alloc, 0);
 948                 atomic_set(&newsk->sk_omem_alloc, 0);
 949                 skb_queue_head_init(&newsk->sk_receive_queue);
 950                 skb_queue_head_init(&newsk->sk_write_queue);
 951 #ifdef CONFIG_NET_DMA
 952                 skb_queue_head_init(&newsk->sk_async_wait_queue);
 953 #endif
 954
 955                 rwlock_init(&newsk->sk_dst_lock);
 956                 rwlock_init(&newsk->sk_callback_lock);
 957                 lockdep_set_class_and_name(&newsk->sk_callback_lock,
 958                                 af_callback_keys + newsk->sk_family,
 959                                 af_family_clock_key_strings[newsk->sk_family]);
 960
 961                 newsk->sk_dst_cache     = NULL;
 962                 newsk->sk_wmem_queued   = 0;
 963                 newsk->sk_forward_alloc = 0;
 964                 newsk->sk_send_head     = NULL;
 965                 newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
 966
 967                 sock_reset_flag(newsk, SOCK_DONE);
 968                 skb_queue_head_init(&newsk->sk_error_queue);
 969
 970                 filter = newsk->sk_filter;
 971                 if (filter != NULL)
 972                         sk_filter_charge(newsk, filter);
 973
 974                 if (unlikely(xfrm_sk_clone_policy(newsk))) {
 975                         /* It is still raw copy of parent, so invalidate
 976                          * destructor and make plain sk_free() */
 977                         newsk->sk_destruct = NULL;
 978                         sk_free(newsk);
 979                         newsk = NULL;
 980                         goto out;
 981                 }
 982
 983                 newsk->sk_err      = 0;
 984                 newsk->sk_priority = 0;
 985                 atomic_set(&newsk->sk_refcnt, 2);
 986
 987                 /*
 988                  * Increment the counter in the same struct proto as the master
 989                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
 990                  * is the same as sk->sk_prot->socks, as this field was copied
 991                  * with memcpy).
 992                  *
 993                  * This _changes_ the previous behaviour, where
 994                  * tcp_create_openreq_child always was incrementing the
 995                  * equivalent to tcp_prot->socks (inet_sock_nr), so this have
 996                  * to be taken into account in all callers. -acme
 997                  */
 998                 sk_refcnt_debug_inc(newsk);
 999                 newsk->sk_socket = NULL;
1000                 newsk->sk_sleep  = NULL;
1001
1002                 if (newsk->sk_prot->sockets_allocated)
1003                         atomic_inc(newsk->sk_prot->sockets_allocated);
1004         }
1005 out:
1006         return newsk;
1007 }
1008
1009 EXPORT_SYMBOL_GPL(sk_clone);
1010
1011 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1012 {
1013         __sk_dst_set(sk, dst);
1014         sk->sk_route_caps = dst->dev->features;
1015         if (sk->sk_route_caps & NETIF_F_GSO)
1016                 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1017         if (sk_can_gso(sk)) {
1018                 if (dst->header_len)
1019                         sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1020                 else
1021                         sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1022         }
1023 }
1024 EXPORT_SYMBOL_GPL(sk_setup_caps);
1025
1026 void __init sk_init(void)
1027 {
1028         if (num_physpages <= 4096) {
1029                 sysctl_wmem_max = 32767;
1030                 sysctl_rmem_max = 32767;
1031                 sysctl_wmem_default = 32767;
1032                 sysctl_rmem_default = 32767;
1033         } else if (num_physpages >= 131072) {
1034                 sysctl_wmem_max = 131071;
1035                 sysctl_rmem_max = 131071;
1036         }
1037 }
1038
1039 /*
1040  *      Simple resource managers for sockets.
1041  */
1042
1043
1044 /*
1045  * Write buffer destructor automatically called from kfree_skb.
1046  */
1047 void sock_wfree(struct sk_buff *skb)
1048 {
1049         struct sock *sk = skb->sk;
1050
1051         /* In case it might be waiting for more memory. */
1052         atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1053         if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1054                 sk->sk_write_space(sk);
1055         sock_put(sk);
1056 }
1057
1058 /*
1059  * Read buffer destructor automatically called from kfree_skb.
1060  */
1061 void sock_rfree(struct sk_buff *skb)
1062 {
1063         struct sock *sk = skb->sk;
1064
1065         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1066 }
1067
1068
1069 int sock_i_uid(struct sock *sk)
1070 {
1071         int uid;
1072
1073         read_lock(&sk->sk_callback_lock);
1074         uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1075         read_unlock(&sk->sk_callback_lock);
1076         return uid;
1077 }
1078
1079 unsigned long sock_i_ino(struct sock *sk)
1080 {
1081         unsigned long ino;
1082
1083         read_lock(&sk->sk_callback_lock);
1084         ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1085         read_unlock(&sk->sk_callback_lock);
1086         return ino;
1087 }
1088
1089 /*
1090  * Allocate a skb from the socket's send buffer.
1091  */
1092 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1093                              gfp_t priority)
1094 {
1095         if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1096                 struct sk_buff * skb = alloc_skb(size, priority);
1097                 if (skb) {
1098                         skb_set_owner_w(skb, sk);
1099                         return skb;
1100                 }
1101         }
1102         return NULL;
1103 }
1104
1105 /*
1106  * Allocate a skb from the socket's receive buffer.
1107  */
1108 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1109                              gfp_t priority)
1110 {
1111         if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1112                 struct sk_buff *skb = alloc_skb(size, priority);
1113                 if (skb) {
1114                         skb_set_owner_r(skb, sk);
1115                         return skb;
1116                 }
1117         }
1118         return NULL;
1119 }
1120
1121 /*
1122  * Allocate a memory block from the socket's option memory buffer.
1123  */
1124 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1125 {
1126         if ((unsigned)size <= sysctl_optmem_max &&
1127             atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1128                 void *mem;
1129                 /* First do the add, to avoid the race if kmalloc
1130                  * might sleep.
1131                  */
1132                 atomic_add(size, &sk->sk_omem_alloc);
1133                 mem = kmalloc(size, priority);
1134                 if (mem)
1135                         return mem;
1136                 atomic_sub(size, &sk->sk_omem_alloc);
1137         }
1138         return NULL;
1139 }
1140
1141 /*
1142  * Free an option memory block.
1143  */
1144 void sock_kfree_s(struct sock *sk, void *mem, int size)
1145 {
1146         kfree(mem);
1147         atomic_sub(size, &sk->sk_omem_alloc);
1148 }
1149
1150 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1151    I think, these locks should be removed for datagram sockets.
1152  */
1153 static long sock_wait_for_wmem(struct sock * sk, long timeo)
1154 {
1155         DEFINE_WAIT(wait);
1156
1157         clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1158         for (;;) {
1159                 if (!timeo)
1160                         break;
1161                 if (signal_pending(current))
1162                         break;
1163                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1164                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1165                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1166                         break;
1167                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1168                         break;
1169                 if (sk->sk_err)
1170                         break;
1171                 timeo = schedule_timeout(timeo);
1172         }
1173         finish_wait(sk->sk_sleep, &wait);
1174         return timeo;
1175 }
1176
1177
1178 /*
1179  *      Generic send/receive buffer handlers
1180  */
1181
1182 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1183                                             unsigned long header_len,
1184                                             unsigned long data_len,
1185                                             int noblock, int *errcode)
1186 {
1187         struct sk_buff *skb;
1188         gfp_t gfp_mask;
1189         long timeo;
1190         int err;
1191
1192         gfp_mask = sk->sk_allocation;
1193         if (gfp_mask & __GFP_WAIT)
1194                 gfp_mask |= __GFP_REPEAT;
1195
1196         timeo = sock_sndtimeo(sk, noblock);
1197         while (1) {
1198                 err = sock_error(sk);
1199                 if (err != 0)
1200                         goto failure;
1201
1202                 err = -EPIPE;
1203                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1204                         goto failure;
1205
1206                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1207                         skb = alloc_skb(header_len, gfp_mask);
1208                         if (skb) {
1209                                 int npages;
1210                                 int i;
1211
1212                                 /* No pages, we're done... */
1213                                 if (!data_len)
1214                                         break;
1215
1216                                 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1217                                 skb->truesize += data_len;
1218                                 skb_shinfo(skb)->nr_frags = npages;
1219                                 for (i = 0; i < npages; i++) {
1220                                         struct page *page;
1221                                         skb_frag_t *frag;
1222
1223                                         page = alloc_pages(sk->sk_allocation, 0);
1224                                         if (!page) {
1225                                                 err = -ENOBUFS;
1226                                                 skb_shinfo(skb)->nr_frags = i;
1227                                                 kfree_skb(skb);
1228                                                 goto failure;
1229                                         }
1230
1231                                         frag = &skb_shinfo(skb)->frags[i];
1232                                         frag->page = page;
1233                                         frag->page_offset = 0;
1234                                         frag->size = (data_len >= PAGE_SIZE ?
1235                                                       PAGE_SIZE :
1236                                                       data_len);
1237                                         data_len -= PAGE_SIZE;
1238                                 }
1239
1240                                 /* Full success... */
1241                                 break;
1242                         }
1243                         err = -ENOBUFS;
1244                         goto failure;
1245                 }
1246                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1247                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1248                 err = -EAGAIN;
1249                 if (!timeo)
1250                         goto failure;
1251                 if (signal_pending(current))
1252                         goto interrupted;
1253                 timeo = sock_wait_for_wmem(sk, timeo);
1254         }
1255
1256         skb_set_owner_w(skb, sk);
1257         return skb;
1258
1259 interrupted:
1260         err = sock_intr_errno(timeo);
1261 failure:
1262         *errcode = err;
1263         return NULL;
1264 }
1265
1266 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1267                                     int noblock, int *errcode)
1268 {
1269         return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1270 }
1271
1272 static void __lock_sock(struct sock *sk)
1273 {
1274         DEFINE_WAIT(wait);
1275
1276         for (;;) {
1277                 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1278                                         TASK_UNINTERRUPTIBLE);
1279                 spin_unlock_bh(&sk->sk_lock.slock);
1280                 schedule();
1281                 spin_lock_bh(&sk->sk_lock.slock);
1282                 if (!sock_owned_by_user(sk))
1283                         break;
1284         }
1285         finish_wait(&sk->sk_lock.wq, &wait);
1286 }
1287
1288 static void __release_sock(struct sock *sk)
1289 {
1290         struct sk_buff *skb = sk->sk_backlog.head;
1291
1292         do {
1293                 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1294                 bh_unlock_sock(sk);
1295
1296                 do {
1297                         struct sk_buff *next = skb->next;
1298
1299                         skb->next = NULL;
1300                         sk->sk_backlog_rcv(sk, skb);
1301
1302                         /*
1303                          * We are in process context here with softirqs
1304                          * disabled, use cond_resched_softirq() to preempt.
1305                          * This is safe to do because we've taken the backlog
1306                          * queue private:
1307                          */
1308                         cond_resched_softirq();
1309
1310                         skb = next;
1311                 } while (skb != NULL);
1312
1313                 bh_lock_sock(sk);
1314         } while ((skb = sk->sk_backlog.head) != NULL);
1315 }
1316
1317 /**
1318  * sk_wait_data - wait for data to arrive at sk_receive_queue
1319  * @sk:    sock to wait on
1320  * @timeo: for how long
1321  *
1322  * Now socket state including sk->sk_err is changed only under lock,
1323  * hence we may omit checks after joining wait queue.
1324  * We check receive queue before schedule() only as optimization;
1325  * it is very likely that release_sock() added new data.
1326  */
1327 int sk_wait_data(struct sock *sk, long *timeo)
1328 {
1329         int rc;
1330         DEFINE_WAIT(wait);
1331
1332         prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1333         set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1334         rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1335         clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1336         finish_wait(sk->sk_sleep, &wait);
1337         return rc;
1338 }
1339
1340 EXPORT_SYMBOL(sk_wait_data);
1341
1342 /*
1343  * Set of default routines for initialising struct proto_ops when
1344  * the protocol does not support a particular function. In certain
1345  * cases where it makes no sense for a protocol to have a "do nothing"
1346  * function, some default processing is provided.
1347  */
1348
1349 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1350 {
1351         return -EOPNOTSUPP;
1352 }
1353
1354 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1355                     int len, int flags)
1356 {
1357         return -EOPNOTSUPP;
1358 }
1359
1360 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1361 {
1362         return -EOPNOTSUPP;
1363 }
1364
1365 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1366 {
1367         return -EOPNOTSUPP;
1368 }
1369
1370 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1371                     int *len, int peer)
1372 {
1373         return -EOPNOTSUPP;
1374 }
1375
1376 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1377 {
1378         return 0;
1379 }
1380
1381 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1382 {
1383         return -EOPNOTSUPP;
1384 }
1385
1386 int sock_no_listen(struct socket *sock, int backlog)
1387 {
1388         return -EOPNOTSUPP;
1389 }
1390
1391 int sock_no_shutdown(struct socket *sock, int how)
1392 {
1393         return -EOPNOTSUPP;
1394 }
1395
1396 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1397                     char __user *optval, int optlen)
1398 {
1399         return -EOPNOTSUPP;
1400 }
1401
1402 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1403                     char __user *optval, int __user *optlen)
1404 {
1405         return -EOPNOTSUPP;
1406 }
1407
1408 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1409                     size_t len)
1410 {
1411         return -EOPNOTSUPP;
1412 }
1413
1414 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1415                     size_t len, int flags)
1416 {
1417         return -EOPNOTSUPP;
1418 }
1419
1420 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1421 {
1422         /* Mirror missing mmap method error code */
1423         return -ENODEV;
1424 }
1425
1426 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1427 {
1428         ssize_t res;
1429         struct msghdr msg = {.msg_flags = flags};
1430         struct kvec iov;
1431         char *kaddr = kmap(page);
1432         iov.iov_base = kaddr + offset;
1433         iov.iov_len = size;
1434         res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1435         kunmap(page);
1436         return res;
1437 }
1438
1439 /*
1440  *      Default Socket Callbacks
1441  */
1442
1443 static void sock_def_wakeup(struct sock *sk)
1444 {
1445         read_lock(&sk->sk_callback_lock);
1446         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1447                 wake_up_interruptible_all(sk->sk_sleep);
1448         read_unlock(&sk->sk_callback_lock);
1449 }
1450
1451 static void sock_def_error_report(struct sock *sk)
1452 {
1453         read_lock(&sk->sk_callback_lock);
1454         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1455                 wake_up_interruptible(sk->sk_sleep);
1456         sk_wake_async(sk,0,POLL_ERR);
1457         read_unlock(&sk->sk_callback_lock);
1458 }
1459
1460 static void sock_def_readable(struct sock *sk, int len)
1461 {
1462         read_lock(&sk->sk_callback_lock);
1463         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1464                 wake_up_interruptible(sk->sk_sleep);
1465         sk_wake_async(sk,1,POLL_IN);
1466         read_unlock(&sk->sk_callback_lock);
1467 }
1468
1469 static void sock_def_write_space(struct sock *sk)
1470 {
1471         read_lock(&sk->sk_callback_lock);
1472
1473         /* Do not wake up a writer until he can make "significant"
1474          * progress.  --DaveM
1475          */
1476         if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1477                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1478                         wake_up_interruptible(sk->sk_sleep);
1479
1480                 /* Should agree with poll, otherwise some programs break */
1481                 if (sock_writeable(sk))
1482                         sk_wake_async(sk, 2, POLL_OUT);
1483         }
1484
1485         read_unlock(&sk->sk_callback_lock);
1486 }
1487
1488 static void sock_def_destruct(struct sock *sk)
1489 {
1490         kfree(sk->sk_protinfo);
1491 }
1492
1493 void sk_send_sigurg(struct sock *sk)
1494 {
1495         if (sk->sk_socket && sk->sk_socket->file)
1496                 if (send_sigurg(&sk->sk_socket->file->f_owner))
1497                         sk_wake_async(sk, 3, POLL_PRI);
1498 }
1499
1500 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1501                     unsigned long expires)
1502 {
1503         if (!mod_timer(timer, expires))
1504                 sock_hold(sk);
1505 }
1506
1507 EXPORT_SYMBOL(sk_reset_timer);
1508
1509 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1510 {
1511         if (timer_pending(timer) && del_timer(timer))
1512                 __sock_put(sk);
1513 }
1514
1515 EXPORT_SYMBOL(sk_stop_timer);
1516
1517 void sock_init_data(struct socket *sock, struct sock *sk)
1518 {
1519         skb_queue_head_init(&sk->sk_receive_queue);
1520         skb_queue_head_init(&sk->sk_write_queue);
1521         skb_queue_head_init(&sk->sk_error_queue);
1522 #ifdef CONFIG_NET_DMA
1523         skb_queue_head_init(&sk->sk_async_wait_queue);
1524 #endif
1525
1526         sk->sk_send_head        =       NULL;
1527
1528         init_timer(&sk->sk_timer);
1529
1530         sk->sk_allocation       =       GFP_KERNEL;
1531         sk->sk_rcvbuf           =       sysctl_rmem_default;
1532         sk->sk_sndbuf           =       sysctl_wmem_default;
1533         sk->sk_state            =       TCP_CLOSE;
1534         sk->sk_socket           =       sock;
1535
1536         sock_set_flag(sk, SOCK_ZAPPED);
1537
1538         if (sock) {
1539                 sk->sk_type     =       sock->type;
1540                 sk->sk_sleep    =       &sock->wait;
1541                 sock->sk        =       sk;
1542         } else
1543                 sk->sk_sleep    =       NULL;
1544
1545         rwlock_init(&sk->sk_dst_lock);
1546         rwlock_init(&sk->sk_callback_lock);
1547         lockdep_set_class_and_name(&sk->sk_callback_lock,
1548                         af_callback_keys + sk->sk_family,
1549                         af_family_clock_key_strings[sk->sk_family]);
1550
1551         sk->sk_state_change     =       sock_def_wakeup;
1552         sk->sk_data_ready       =       sock_def_readable;
1553         sk->sk_write_space      =       sock_def_write_space;
1554         sk->sk_error_report     =       sock_def_error_report;
1555         sk->sk_destruct         =       sock_def_destruct;
1556
1557         sk->sk_sndmsg_page      =       NULL;
1558         sk->sk_sndmsg_off       =       0;
1559
1560         sk->sk_peercred.pid     =       0;
1561         sk->sk_peercred.uid     =       -1;
1562         sk->sk_peercred.gid     =       -1;
1563         sk->sk_write_pending    =       0;
1564         sk->sk_rcvlowat         =       1;
1565         sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
1566         sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
1567
1568         sk->sk_stamp = ktime_set(-1L, -1L);
1569
1570         atomic_set(&sk->sk_refcnt, 1);
1571 }
1572
1573 void fastcall lock_sock_nested(struct sock *sk, int subclass)
1574 {
1575         might_sleep();
1576         spin_lock_bh(&sk->sk_lock.slock);
1577         if (sk->sk_lock.owner)
1578                 __lock_sock(sk);
1579         sk->sk_lock.owner = (void *)1;
1580         spin_unlock(&sk->sk_lock.slock);
1581         /*
1582          * The sk_lock has mutex_lock() semantics here:
1583          */
1584         mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
1585         local_bh_enable();
1586 }
1587
1588 EXPORT_SYMBOL(lock_sock_nested);
1589
1590 void fastcall release_sock(struct sock *sk)
1591 {
1592         /*
1593          * The sk_lock has mutex_unlock() semantics:
1594          */
1595         mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1596
1597         spin_lock_bh(&sk->sk_lock.slock);
1598         if (sk->sk_backlog.tail)
1599                 __release_sock(sk);
1600         sk->sk_lock.owner = NULL;
1601         if (waitqueue_active(&sk->sk_lock.wq))
1602                 wake_up(&sk->sk_lock.wq);
1603         spin_unlock_bh(&sk->sk_lock.slock);
1604 }
1605 EXPORT_SYMBOL(release_sock);
1606
1607 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1608 {
1609         struct timeval tv;
1610         if (!sock_flag(sk, SOCK_TIMESTAMP))
1611                 sock_enable_timestamp(sk);
1612         tv = ktime_to_timeval(sk->sk_stamp);
1613         if (tv.tv_sec == -1)
1614                 return -ENOENT;
1615         if (tv.tv_sec == 0) {
1616                 sk->sk_stamp = ktime_get_real();
1617                 tv = ktime_to_timeval(sk->sk_stamp);
1618         }
1619         return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1620 }
1621 EXPORT_SYMBOL(sock_get_timestamp);
1622
1623 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1624 {
1625         struct timespec ts;
1626         if (!sock_flag(sk, SOCK_TIMESTAMP))
1627                 sock_enable_timestamp(sk);
1628         ts = ktime_to_timespec(sk->sk_stamp);
1629         if (ts.tv_sec == -1)
1630                 return -ENOENT;
1631         if (ts.tv_sec == 0) {
1632                 sk->sk_stamp = ktime_get_real();
1633                 ts = ktime_to_timespec(sk->sk_stamp);
1634         }
1635         return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1636 }
1637 EXPORT_SYMBOL(sock_get_timestampns);
1638
1639 void sock_enable_timestamp(struct sock *sk)
1640 {
1641         if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1642                 sock_set_flag(sk, SOCK_TIMESTAMP);
1643                 net_enable_timestamp();
1644         }
1645 }
1646 EXPORT_SYMBOL(sock_enable_timestamp);
1647
1648 /*
1649  *      Get a socket option on an socket.
1650  *
1651  *      FIX: POSIX 1003.1g is very ambiguous here. It states that
1652  *      asynchronous errors should be reported by getsockopt. We assume
1653  *      this means if you specify SO_ERROR (otherwise whats the point of it).
1654  */
1655 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1656                            char __user *optval, int __user *optlen)
1657 {
1658         struct sock *sk = sock->sk;
1659
1660         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1661 }
1662
1663 EXPORT_SYMBOL(sock_common_getsockopt);
1664
1665 #ifdef CONFIG_COMPAT
1666 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1667                                   char __user *optval, int __user *optlen)
1668 {
1669         struct sock *sk = sock->sk;
1670
1671         if (sk->sk_prot->compat_getsockopt != NULL)
1672                 return sk->sk_prot->compat_getsockopt(sk, level, optname,
1673                                                       optval, optlen);
1674         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1675 }
1676 EXPORT_SYMBOL(compat_sock_common_getsockopt);
1677 #endif
1678
1679 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1680                         struct msghdr *msg, size_t size, int flags)
1681 {
1682         struct sock *sk = sock->sk;
1683         int addr_len = 0;
1684         int err;
1685
1686         err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1687                                    flags & ~MSG_DONTWAIT, &addr_len);
1688         if (err >= 0)
1689                 msg->msg_namelen = addr_len;
1690         return err;
1691 }
1692
1693 EXPORT_SYMBOL(sock_common_recvmsg);
1694
1695 /*
1696  *      Set socket options on an inet socket.
1697  */
1698 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1699                            char __user *optval, int optlen)
1700 {
1701         struct sock *sk = sock->sk;
1702
1703         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1704 }
1705
1706 EXPORT_SYMBOL(sock_common_setsockopt);
1707
1708 #ifdef CONFIG_COMPAT
1709 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1710                                   char __user *optval, int optlen)
1711 {
1712         struct sock *sk = sock->sk;
1713
1714         if (sk->sk_prot->compat_setsockopt != NULL)
1715                 return sk->sk_prot->compat_setsockopt(sk, level, optname,
1716                                                       optval, optlen);
1717         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1718 }
1719 EXPORT_SYMBOL(compat_sock_common_setsockopt);
1720 #endif
1721
1722 void sk_common_release(struct sock *sk)
1723 {
1724         if (sk->sk_prot->destroy)
1725                 sk->sk_prot->destroy(sk);
1726
1727         /*
1728          * Observation: when sock_common_release is called, processes have
1729          * no access to socket. But net still has.
1730          * Step one, detach it from networking:
1731          *
1732          * A. Remove from hash tables.
1733          */
1734
1735         sk->sk_prot->unhash(sk);
1736
1737         /*
1738          * In this point socket cannot receive new packets, but it is possible
1739          * that some packets are in flight because some CPU runs receiver and
1740          * did hash table lookup before we unhashed socket. They will achieve
1741          * receive queue and will be purged by socket destructor.
1742          *
1743          * Also we still have packets pending on receive queue and probably,
1744          * our own packets waiting in device queues. sock_destroy will drain
1745          * receive queue, but transmitted packets will delay socket destruction
1746          * until the last reference will be released.
1747          */
1748
1749         sock_orphan(sk);
1750
1751         xfrm_sk_free_policy(sk);
1752
1753         sk_refcnt_debug_release(sk);
1754         sock_put(sk);
1755 }
1756
1757 EXPORT_SYMBOL(sk_common_release);
1758
1759 static DEFINE_RWLOCK(proto_list_lock);
1760 static LIST_HEAD(proto_list);
1761
1762 int proto_register(struct proto *prot, int alloc_slab)
1763 {
1764         char *request_sock_slab_name = NULL;
1765         char *timewait_sock_slab_name;
1766         int rc = -ENOBUFS;
1767
1768         if (alloc_slab) {
1769                 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1770                                                SLAB_HWCACHE_ALIGN, NULL);
1771
1772                 if (prot->slab == NULL) {
1773                         printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1774                                prot->name);
1775                         goto out;
1776                 }
1777
1778                 if (prot->rsk_prot != NULL) {
1779                         static const char mask[] = "request_sock_%s";
1780
1781                         request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1782                         if (request_sock_slab_name == NULL)
1783                                 goto out_free_sock_slab;
1784
1785                         sprintf(request_sock_slab_name, mask, prot->name);
1786                         prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1787                                                                  prot->rsk_prot->obj_size, 0,
1788                                                                  SLAB_HWCACHE_ALIGN, NULL);
1789
1790                         if (prot->rsk_prot->slab == NULL) {
1791                                 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1792                                        prot->name);
1793                                 goto out_free_request_sock_slab_name;
1794                         }
1795                 }
1796
1797                 if (prot->twsk_prot != NULL) {
1798                         static const char mask[] = "tw_sock_%s";
1799
1800                         timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1801
1802                         if (timewait_sock_slab_name == NULL)
1803                                 goto out_free_request_sock_slab;
1804
1805                         sprintf(timewait_sock_slab_name, mask, prot->name);
1806                         prot->twsk_prot->twsk_slab =
1807                                 kmem_cache_create(timewait_sock_slab_name,
1808                                                   prot->twsk_prot->twsk_obj_size,
1809                                                   0, SLAB_HWCACHE_ALIGN,
1810                                                   NULL);
1811                         if (prot->twsk_prot->twsk_slab == NULL)
1812                                 goto out_free_timewait_sock_slab_name;
1813                 }
1814         }
1815
1816         write_lock(&proto_list_lock);
1817         list_add(&prot->node, &proto_list);
1818         write_unlock(&proto_list_lock);
1819         rc = 0;
1820 out:
1821         return rc;
1822 out_free_timewait_sock_slab_name:
1823         kfree(timewait_sock_slab_name);
1824 out_free_request_sock_slab:
1825         if (prot->rsk_prot && prot->rsk_prot->slab) {
1826                 kmem_cache_destroy(prot->rsk_prot->slab);
1827                 prot->rsk_prot->slab = NULL;
1828         }
1829 out_free_request_sock_slab_name:
1830         kfree(request_sock_slab_name);
1831 out_free_sock_slab:
1832         kmem_cache_destroy(prot->slab);
1833         prot->slab = NULL;
1834         goto out;
1835 }
1836
1837 EXPORT_SYMBOL(proto_register);
1838
1839 void proto_unregister(struct proto *prot)
1840 {
1841         write_lock(&proto_list_lock);
1842         list_del(&prot->node);
1843         write_unlock(&proto_list_lock);
1844
1845         if (prot->slab != NULL) {
1846                 kmem_cache_destroy(prot->slab);
1847                 prot->slab = NULL;
1848         }
1849
1850         if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1851                 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1852
1853                 kmem_cache_destroy(prot->rsk_prot->slab);
1854                 kfree(name);
1855                 prot->rsk_prot->slab = NULL;
1856         }
1857
1858         if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1859                 const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1860
1861                 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1862                 kfree(name);
1863                 prot->twsk_prot->twsk_slab = NULL;
1864         }
1865 }
1866
1867 EXPORT_SYMBOL(proto_unregister);
1868
1869 #ifdef CONFIG_PROC_FS
1870 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1871 {
1872         read_lock(&proto_list_lock);
1873         return seq_list_start_head(&proto_list, *pos);
1874 }
1875
1876 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1877 {
1878         return seq_list_next(v, &proto_list, pos);
1879 }
1880
1881 static void proto_seq_stop(struct seq_file *seq, void *v)
1882 {
1883         read_unlock(&proto_list_lock);
1884 }
1885
1886 static char proto_method_implemented(const void *method)
1887 {
1888         return method == NULL ? 'n' : 'y';
1889 }
1890
1891 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1892 {
1893         seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1894                         "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1895                    proto->name,
1896                    proto->obj_size,
1897                    proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1898                    proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1899                    proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1900                    proto->max_header,
1901                    proto->slab == NULL ? "no" : "yes",
1902                    module_name(proto->owner),
1903                    proto_method_implemented(proto->close),
1904                    proto_method_implemented(proto->connect),
1905                    proto_method_implemented(proto->disconnect),
1906                    proto_method_implemented(proto->accept),
1907                    proto_method_implemented(proto->ioctl),
1908                    proto_method_implemented(proto->init),
1909                    proto_method_implemented(proto->destroy),
1910                    proto_method_implemented(proto->shutdown),
1911                    proto_method_implemented(proto->setsockopt),
1912                    proto_method_implemented(proto->getsockopt),
1913                    proto_method_implemented(proto->sendmsg),
1914                    proto_method_implemented(proto->recvmsg),
1915                    proto_method_implemented(proto->sendpage),
1916                    proto_method_implemented(proto->bind),
1917                    proto_method_implemented(proto->backlog_rcv),
1918                    proto_method_implemented(proto->hash),
1919                    proto_method_implemented(proto->unhash),
1920                    proto_method_implemented(proto->get_port),
1921                    proto_method_implemented(proto->enter_memory_pressure));
1922 }
1923
1924 static int proto_seq_show(struct seq_file *seq, void *v)
1925 {
1926         if (v == &proto_list)
1927                 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1928                            "protocol",
1929                            "size",
1930                            "sockets",
1931                            "memory",
1932                            "press",
1933                            "maxhdr",
1934                            "slab",
1935                            "module",
1936                            "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1937         else
1938                 proto_seq_printf(seq, list_entry(v, struct proto, node));
1939         return 0;
1940 }
1941
1942 static const struct seq_operations proto_seq_ops = {
1943         .start  = proto_seq_start,
1944         .next   = proto_seq_next,
1945         .stop   = proto_seq_stop,
1946         .show   = proto_seq_show,
1947 };
1948
1949 static int proto_seq_open(struct inode *inode, struct file *file)
1950 {
1951         return seq_open(file, &proto_seq_ops);
1952 }
1953
1954 static const struct file_operations proto_seq_fops = {
1955         .owner          = THIS_MODULE,
1956         .open           = proto_seq_open,
1957         .read           = seq_read,
1958         .llseek         = seq_lseek,
1959         .release        = seq_release,
1960 };
1961
1962 static int __init proto_init(void)
1963 {
1964         /* register /proc/net/protocols */
1965         return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1966 }
1967
1968 subsys_initcall(proto_init);
1969
1970 #endif /* PROC_FS */
1971
1972 EXPORT_SYMBOL(sk_alloc);
1973 EXPORT_SYMBOL(sk_free);
1974 EXPORT_SYMBOL(sk_send_sigurg);
1975 EXPORT_SYMBOL(sock_alloc_send_skb);
1976 EXPORT_SYMBOL(sock_init_data);
1977 EXPORT_SYMBOL(sock_kfree_s);
1978 EXPORT_SYMBOL(sock_kmalloc);
1979 EXPORT_SYMBOL(sock_no_accept);
1980 EXPORT_SYMBOL(sock_no_bind);
1981 EXPORT_SYMBOL(sock_no_connect);
1982 EXPORT_SYMBOL(sock_no_getname);
1983 EXPORT_SYMBOL(sock_no_getsockopt);
1984 EXPORT_SYMBOL(sock_no_ioctl);
1985 EXPORT_SYMBOL(sock_no_listen);
1986 EXPORT_SYMBOL(sock_no_mmap);
1987 EXPORT_SYMBOL(sock_no_poll);
1988 EXPORT_SYMBOL(sock_no_recvmsg);
1989 EXPORT_SYMBOL(sock_no_sendmsg);
1990 EXPORT_SYMBOL(sock_no_sendpage);
1991 EXPORT_SYMBOL(sock_no_setsockopt);
1992 EXPORT_SYMBOL(sock_no_shutdown);
1993 EXPORT_SYMBOL(sock_no_socketpair);
1994 EXPORT_SYMBOL(sock_rfree);
1995 EXPORT_SYMBOL(sock_setsockopt);
1996 EXPORT_SYMBOL(sock_wfree);
1997 EXPORT_SYMBOL(sock_wmalloc);
1998 EXPORT_SYMBOL(sock_i_uid);
1999 EXPORT_SYMBOL(sock_i_ino);
2000 EXPORT_SYMBOL(sysctl_optmem_max);
2001 #ifdef CONFIG_SYSCTL
2002 EXPORT_SYMBOL(sysctl_rmem_max);
2003 EXPORT_SYMBOL(sysctl_wmem_max);
2004 #endif