net/core/sock.c

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Generic socket support routines. Memory allocators, socket lock/release
   7  *              handler for protocols to use and generic option handler.
   8  *
   9  *
  10  * Version:     $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
  11  *
  12  * Authors:     Ross Biro
  13  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Alan Cox, <A.Cox@swansea.ac.uk>
  16  *
  17  * Fixes:
  18  *              Alan Cox        :       Numerous verify_area() problems
  19  *              Alan Cox        :       Connecting on a connecting socket
  20  *                                      now returns an error for tcp.
  21  *              Alan Cox        :       sock->protocol is set correctly.
  22  *                                      and is not sometimes left as 0.
  23  *              Alan Cox        :       connect handles icmp errors on a
  24  *                                      connect properly. Unfortunately there
  25  *                                      is a restart syscall nasty there. I
  26  *                                      can't match BSD without hacking the C
  27  *                                      library. Ideas urgently sought!
  28  *              Alan Cox        :       Disallow bind() to addresses that are
  29  *                                      not ours - especially broadcast ones!!
  30  *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
  31  *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
  32  *                                      instead they leave that for the DESTROY timer.
  33  *              Alan Cox        :       Clean up error flag in accept
  34  *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
  35  *                                      was buggy. Put a remove_sock() in the handler
  36  *                                      for memory when we hit 0. Also altered the timer
  37  *                                      code. The ACK stuff can wait and needs major
  38  *                                      TCP layer surgery.
  39  *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
  40  *                                      and fixed timer/inet_bh race.
  41  *              Alan Cox        :       Added zapped flag for TCP
  42  *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
  43  *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
  44  *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
  45  *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
  46  *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
  47  *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
  48  *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
  49  *      Pauline Middelink       :       identd support
  50  *              Alan Cox        :       Fixed connect() taking signals I think.
  51  *              Alan Cox        :       SO_LINGER supported
  52  *              Alan Cox        :       Error reporting fixes
  53  *              Anonymous       :       inet_create tidied up (sk->reuse setting)
  54  *              Alan Cox        :       inet sockets don't set sk->type!
  55  *              Alan Cox        :       Split socket option code
  56  *              Alan Cox        :       Callbacks
  57  *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
  58  *              Alex            :       Removed restriction on inet fioctl
  59  *              Alan Cox        :       Splitting INET from NET core
  60  *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
  61  *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
  62  *              Alan Cox        :       Split IP from generic code
  63  *              Alan Cox        :       New kfree_skbmem()
  64  *              Alan Cox        :       Make SO_DEBUG superuser only.
  65  *              Alan Cox        :       Allow anyone to clear SO_DEBUG
  66  *                                      (compatibility fix)
  67  *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
  68  *              Alan Cox        :       Allocator for a socket is settable.
  69  *              Alan Cox        :       SO_ERROR includes soft errors.
  70  *              Alan Cox        :       Allow NULL arguments on some SO_ opts
  71  *              Alan Cox        :       Generic socket allocation to make hooks
  72  *                                      easier (suggested by Craig Metz).
  73  *              Michael Pall    :       SO_ERROR returns positive errno again
  74  *              Steve Whitehouse:       Added default destructor to free
  75  *                                      protocol private data.
  76  *              Steve Whitehouse:       Added various other default routines
  77  *                                      common to several socket families.
  78  *              Chris Evans     :       Call suser() check last on F_SETOWN
  79  *              Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
  80  *              Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
  81  *              Andi Kleen      :       Fix write_space callback
  82  *              Chris Evans     :       Security fixes - signedness again
  83  *              Arnaldo C. Melo :       cleanups, use skb_queue_purge
  84  *
  85  * To Fix:
  86  *
  87  *
  88  *              This program is free software; you can redistribute it and/or
  89  *              modify it under the terms of the GNU General Public License
  90  *              as published by the Free Software Foundation; either version
  91  *              2 of the License, or (at your option) any later version.
  92  */
  93
  94 #include <linux/capability.h>
  95 #include <linux/errno.h>
  96 #include <linux/types.h>
  97 #include <linux/socket.h>
  98 #include <linux/in.h>
  99 #include <linux/kernel.h>
 100 #include <linux/module.h>
 101 #include <linux/proc_fs.h>
 102 #include <linux/seq_file.h>
 103 #include <linux/sched.h>
 104 #include <linux/timer.h>
 105 #include <linux/string.h>
 106 #include <linux/sockios.h>
 107 #include <linux/net.h>
 108 #include <linux/mm.h>
 109 #include <linux/slab.h>
 110 #include <linux/interrupt.h>
 111 #include <linux/poll.h>
 112 #include <linux/tcp.h>
 113 #include <linux/init.h>
 114 #include <linux/highmem.h>
 115
 116 #include <asm/uaccess.h>
 117 #include <asm/system.h>
 118
 119 #include <linux/netdevice.h>
 120 #include <net/protocol.h>
 121 #include <linux/skbuff.h>
 122 #include <net/request_sock.h>
 123 #include <net/sock.h>
 124 #include <net/xfrm.h>
 125 #include <linux/ipsec.h>
 126
 127 #include <linux/filter.h>
 128
 129 #ifdef CONFIG_INET
 130 #include <net/tcp.h>
 131 #endif
 132
 133 /*
 134  * Each address family might have different locking rules, so we have
 135  * one slock key per address family:
 136  */
 137 static struct lock_class_key af_family_keys[AF_MAX];
 138 static struct lock_class_key af_family_slock_keys[AF_MAX];
 139
 140 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 141 /*
 142  * Make lock validator output more readable. (we pre-construct these
 143  * strings build-time, so that runtime initialization of socket
 144  * locks is fast):
 145  */
 146 static const char *af_family_key_strings[AF_MAX+1] = {
 147   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
 148   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
 149   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
 150   "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
 151   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
 152   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
 153   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
 154   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
 155   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
 156   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
 157   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX"
 158 };
 159 static const char *af_family_slock_key_strings[AF_MAX+1] = {
 160   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
 161   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
 162   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
 163   "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
 164   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
 165   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
 166   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
 167   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
 168   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
 169   "slock-27"       , "slock-28"          , "slock-29"          ,
 170   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_MAX"
 171 };
 172 #endif
 173
 174 /*
 175  * sk_callback_lock locking rules are per-address-family,
 176  * so split the lock classes by using a per-AF key:
 177  */
 178 static struct lock_class_key af_callback_keys[AF_MAX];
 179
 180 /* Take into consideration the size of the struct sk_buff overhead in the
 181  * determination of these values, since that is non-constant across
 182  * platforms.  This makes socket queueing behavior and performance
 183  * not depend upon such differences.
 184  */
 185 #define _SK_MEM_PACKETS         256
 186 #define _SK_MEM_OVERHEAD        (sizeof(struct sk_buff) + 256)
 187 #define SK_WMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 188 #define SK_RMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 189
 190 /* Run time adjustable parameters. */
 191 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
 192 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
 193 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
 194 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 195
 196 /* Maximal space eaten by iovec or ancilliary data plus some space */
 197 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 198
 199 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 200 {
 201         struct timeval tv;
 202
 203         if (optlen < sizeof(tv))
 204                 return -EINVAL;
 205         if (copy_from_user(&tv, optval, sizeof(tv)))
 206                 return -EFAULT;
 207
 208         *timeo_p = MAX_SCHEDULE_TIMEOUT;
 209         if (tv.tv_sec == 0 && tv.tv_usec == 0)
 210                 return 0;
 211         if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
 212                 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
 213         return 0;
 214 }
 215
 216 static void sock_warn_obsolete_bsdism(const char *name)
 217 {
 218         static int warned;
 219         static char warncomm[TASK_COMM_LEN];
 220         if (strcmp(warncomm, current->comm) && warned < 5) {
 221                 strcpy(warncomm,  current->comm);
 222                 printk(KERN_WARNING "process `%s' is using obsolete "
 223                        "%s SO_BSDCOMPAT\n", warncomm, name);
 224                 warned++;
 225         }
 226 }
 227
 228 static void sock_disable_timestamp(struct sock *sk)
 229 {
 230         if (sock_flag(sk, SOCK_TIMESTAMP)) {
 231                 sock_reset_flag(sk, SOCK_TIMESTAMP);
 232                 net_disable_timestamp();
 233         }
 234 }
 235
 236
 237 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 238 {
 239         int err = 0;
 240         int skb_len;
 241
 242         /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
 243            number of warnings when compiling with -W --ANK
 244          */
 245         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
 246             (unsigned)sk->sk_rcvbuf) {
 247                 err = -ENOMEM;
 248                 goto out;
 249         }
 250
 251         err = sk_filter(sk, skb);
 252         if (err)
 253                 goto out;
 254
 255         skb->dev = NULL;
 256         skb_set_owner_r(skb, sk);
 257
 258         /* Cache the SKB length before we tack it onto the receive
 259          * queue.  Once it is added it no longer belongs to us and
 260          * may be freed by other threads of control pulling packets
 261          * from the queue.
 262          */
 263         skb_len = skb->len;
 264
 265         skb_queue_tail(&sk->sk_receive_queue, skb);
 266
 267         if (!sock_flag(sk, SOCK_DEAD))
 268                 sk->sk_data_ready(sk, skb_len);
 269 out:
 270         return err;
 271 }
 272 EXPORT_SYMBOL(sock_queue_rcv_skb);
 273
 274 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 275 {
 276         int rc = NET_RX_SUCCESS;
 277
 278         if (sk_filter(sk, skb))
 279                 goto discard_and_relse;
 280
 281         skb->dev = NULL;
 282
 283         if (nested)
 284                 bh_lock_sock_nested(sk);
 285         else
 286                 bh_lock_sock(sk);
 287         if (!sock_owned_by_user(sk)) {
 288                 /*
 289                  * trylock + unlock semantics:
 290                  */
 291                 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
 292
 293                 rc = sk->sk_backlog_rcv(sk, skb);
 294
 295                 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
 296         } else
 297                 sk_add_backlog(sk, skb);
 298         bh_unlock_sock(sk);
 299 out:
 300         sock_put(sk);
 301         return rc;
 302 discard_and_relse:
 303         kfree_skb(skb);
 304         goto out;
 305 }
 306 EXPORT_SYMBOL(sk_receive_skb);
 307
 308 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
 309 {
 310         struct dst_entry *dst = sk->sk_dst_cache;
 311
 312         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
 313                 sk->sk_dst_cache = NULL;
 314                 dst_release(dst);
 315                 return NULL;
 316         }
 317
 318         return dst;
 319 }
 320 EXPORT_SYMBOL(__sk_dst_check);
 321
 322 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
 323 {
 324         struct dst_entry *dst = sk_dst_get(sk);
 325
 326         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
 327                 sk_dst_reset(sk);
 328                 dst_release(dst);
 329                 return NULL;
 330         }
 331
 332         return dst;
 333 }
 334 EXPORT_SYMBOL(sk_dst_check);
 335
 336 /*
 337  *      This is meant for all protocols to use and covers goings on
 338  *      at the socket level. Everything here is generic.
 339  */
 340
 341 int sock_setsockopt(struct socket *sock, int level, int optname,
 342                     char __user *optval, int optlen)
 343 {
 344         struct sock *sk=sock->sk;
 345         struct sk_filter *filter;
 346         int val;
 347         int valbool;
 348         struct linger ling;
 349         int ret = 0;
 350
 351         /*
 352          *      Options without arguments
 353          */
 354
 355 #ifdef SO_DONTLINGER            /* Compatibility item... */
 356         if (optname == SO_DONTLINGER) {
 357                 lock_sock(sk);
 358                 sock_reset_flag(sk, SOCK_LINGER);
 359                 release_sock(sk);
 360                 return 0;
 361         }
 362 #endif
 363
 364         if (optlen < sizeof(int))
 365                 return -EINVAL;
 366
 367         if (get_user(val, (int __user *)optval))
 368                 return -EFAULT;
 369
 370         valbool = val?1:0;
 371
 372         lock_sock(sk);
 373
 374         switch(optname) {
 375         case SO_DEBUG:
 376                 if (val && !capable(CAP_NET_ADMIN)) {
 377                         ret = -EACCES;
 378                 }
 379                 else if (valbool)
 380                         sock_set_flag(sk, SOCK_DBG);
 381                 else
 382                         sock_reset_flag(sk, SOCK_DBG);
 383                 break;
 384         case SO_REUSEADDR:
 385                 sk->sk_reuse = valbool;
 386                 break;
 387         case SO_TYPE:
 388         case SO_ERROR:
 389                 ret = -ENOPROTOOPT;
 390                 break;
 391         case SO_DONTROUTE:
 392                 if (valbool)
 393                         sock_set_flag(sk, SOCK_LOCALROUTE);
 394                 else
 395                         sock_reset_flag(sk, SOCK_LOCALROUTE);
 396                 break;
 397         case SO_BROADCAST:
 398                 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
 399                 break;
 400         case SO_SNDBUF:
 401                 /* Don't error on this BSD doesn't and if you think
 402                    about it this is right. Otherwise apps have to
 403                    play 'guess the biggest size' games. RCVBUF/SNDBUF
 404                    are treated in BSD as hints */
 405
 406                 if (val > sysctl_wmem_max)
 407                         val = sysctl_wmem_max;
 408 set_sndbuf:
 409                 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
 410                 if ((val * 2) < SOCK_MIN_SNDBUF)
 411                         sk->sk_sndbuf = SOCK_MIN_SNDBUF;
 412                 else
 413                         sk->sk_sndbuf = val * 2;
 414
 415                 /*
 416                  *      Wake up sending tasks if we
 417                  *      upped the value.
 418                  */
 419                 sk->sk_write_space(sk);
 420                 break;
 421
 422         case SO_SNDBUFFORCE:
 423                 if (!capable(CAP_NET_ADMIN)) {
 424                         ret = -EPERM;
 425                         break;
 426                 }
 427                 goto set_sndbuf;
 428
 429         case SO_RCVBUF:
 430                 /* Don't error on this BSD doesn't and if you think
 431                    about it this is right. Otherwise apps have to
 432                    play 'guess the biggest size' games. RCVBUF/SNDBUF
 433                    are treated in BSD as hints */
 434
 435                 if (val > sysctl_rmem_max)
 436                         val = sysctl_rmem_max;
 437 set_rcvbuf:
 438                 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
 439                 /*
 440                  * We double it on the way in to account for
 441                  * "struct sk_buff" etc. overhead.   Applications
 442                  * assume that the SO_RCVBUF setting they make will
 443                  * allow that much actual data to be received on that
 444                  * socket.
 445                  *
 446                  * Applications are unaware that "struct sk_buff" and
 447                  * other overheads allocate from the receive buffer
 448                  * during socket buffer allocation.
 449                  *
 450                  * And after considering the possible alternatives,
 451                  * returning the value we actually used in getsockopt
 452                  * is the most desirable behavior.
 453                  */
 454                 if ((val * 2) < SOCK_MIN_RCVBUF)
 455                         sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
 456                 else
 457                         sk->sk_rcvbuf = val * 2;
 458                 break;
 459
 460         case SO_RCVBUFFORCE:
 461                 if (!capable(CAP_NET_ADMIN)) {
 462                         ret = -EPERM;
 463                         break;
 464                 }
 465                 goto set_rcvbuf;
 466
 467         case SO_KEEPALIVE:
 468 #ifdef CONFIG_INET
 469                 if (sk->sk_protocol == IPPROTO_TCP)
 470                         tcp_set_keepalive(sk, valbool);
 471 #endif
 472                 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
 473                 break;
 474
 475         case SO_OOBINLINE:
 476                 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
 477                 break;
 478
 479         case SO_NO_CHECK:
 480                 sk->sk_no_check = valbool;
 481                 break;
 482
 483         case SO_PRIORITY:
 484                 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
 485                         sk->sk_priority = val;
 486                 else
 487                         ret = -EPERM;
 488                 break;
 489
 490         case SO_LINGER:
 491                 if (optlen < sizeof(ling)) {
 492                         ret = -EINVAL;  /* 1003.1g */
 493                         break;
 494                 }
 495                 if (copy_from_user(&ling,optval,sizeof(ling))) {
 496                         ret = -EFAULT;
 497                         break;
 498                 }
 499                 if (!ling.l_onoff)
 500                         sock_reset_flag(sk, SOCK_LINGER);
 501                 else {
 502 #if (BITS_PER_LONG == 32)
 503                         if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
 504                                 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
 505                         else
 506 #endif
 507                                 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
 508                         sock_set_flag(sk, SOCK_LINGER);
 509                 }
 510                 break;
 511
 512         case SO_BSDCOMPAT:
 513                 sock_warn_obsolete_bsdism("setsockopt");
 514                 break;
 515
 516         case SO_PASSCRED:
 517                 if (valbool)
 518                         set_bit(SOCK_PASSCRED, &sock->flags);
 519                 else
 520                         clear_bit(SOCK_PASSCRED, &sock->flags);
 521                 break;
 522
 523         case SO_TIMESTAMP:
 524         case SO_TIMESTAMPNS:
 525                 if (valbool)  {
 526                         if (optname == SO_TIMESTAMP)
 527                                 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
 528                         else
 529                                 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
 530                         sock_set_flag(sk, SOCK_RCVTSTAMP);
 531                         sock_enable_timestamp(sk);
 532                 } else {
 533                         sock_reset_flag(sk, SOCK_RCVTSTAMP);
 534                         sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
 535                 }
 536                 break;
 537
 538         case SO_RCVLOWAT:
 539                 if (val < 0)
 540                         val = INT_MAX;
 541                 sk->sk_rcvlowat = val ? : 1;
 542                 break;
 543
 544         case SO_RCVTIMEO:
 545                 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
 546                 break;
 547
 548         case SO_SNDTIMEO:
 549                 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
 550                 break;
 551
 552 #ifdef CONFIG_NETDEVICES
 553         case SO_BINDTODEVICE:
 554         {
 555                 char devname[IFNAMSIZ];
 556
 557                 /* Sorry... */
 558                 if (!capable(CAP_NET_RAW)) {
 559                         ret = -EPERM;
 560                         break;
 561                 }
 562
 563                 /* Bind this socket to a particular device like "eth0",
 564                  * as specified in the passed interface name. If the
 565                  * name is "" or the option length is zero the socket
 566                  * is not bound.
 567                  */
 568
 569                 if (!valbool) {
 570                         sk->sk_bound_dev_if = 0;
 571                 } else {
 572                         if (optlen > IFNAMSIZ - 1)
 573                                 optlen = IFNAMSIZ - 1;
 574                         memset(devname, 0, sizeof(devname));
 575                         if (copy_from_user(devname, optval, optlen)) {
 576                                 ret = -EFAULT;
 577                                 break;
 578                         }
 579
 580                         /* Remove any cached route for this socket. */
 581                         sk_dst_reset(sk);
 582
 583                         if (devname[0] == '\0') {
 584                                 sk->sk_bound_dev_if = 0;
 585                         } else {
 586                                 struct net_device *dev = dev_get_by_name(devname);
 587                                 if (!dev) {
 588                                         ret = -ENODEV;
 589                                         break;
 590                                 }
 591                                 sk->sk_bound_dev_if = dev->ifindex;
 592                                 dev_put(dev);
 593                         }
 594                 }
 595                 break;
 596         }
 597 #endif
 598
 599
 600         case SO_ATTACH_FILTER:
 601                 ret = -EINVAL;
 602                 if (optlen == sizeof(struct sock_fprog)) {
 603                         struct sock_fprog fprog;
 604
 605                         ret = -EFAULT;
 606                         if (copy_from_user(&fprog, optval, sizeof(fprog)))
 607                                 break;
 608
 609                         ret = sk_attach_filter(&fprog, sk);
 610                 }
 611                 break;
 612
 613         case SO_DETACH_FILTER:
 614                 rcu_read_lock_bh();
 615                 filter = rcu_dereference(sk->sk_filter);
 616                 if (filter) {
 617                         rcu_assign_pointer(sk->sk_filter, NULL);
 618                         sk_filter_release(sk, filter);
 619                         rcu_read_unlock_bh();
 620                         break;
 621                 }
 622                 rcu_read_unlock_bh();
 623                 ret = -ENONET;
 624                 break;
 625
 626         case SO_PASSSEC:
 627                 if (valbool)
 628                         set_bit(SOCK_PASSSEC, &sock->flags);
 629                 else
 630                         clear_bit(SOCK_PASSSEC, &sock->flags);
 631                 break;
 632
 633                 /* We implement the SO_SNDLOWAT etc to
 634                    not be settable (1003.1g 5.3) */
 635         default:
 636                 ret = -ENOPROTOOPT;
 637                 break;
 638         }
 639         release_sock(sk);
 640         return ret;
 641 }
 642
 643
 644 int sock_getsockopt(struct socket *sock, int level, int optname,
 645                     char __user *optval, int __user *optlen)
 646 {
 647         struct sock *sk = sock->sk;
 648
 649         union {
 650                 int val;
 651                 struct linger ling;
 652                 struct timeval tm;
 653         } v;
 654
 655         unsigned int lv = sizeof(int);
 656         int len;
 657
 658         if (get_user(len, optlen))
 659                 return -EFAULT;
 660         if (len < 0)
 661                 return -EINVAL;
 662
 663         switch(optname) {
 664         case SO_DEBUG:
 665                 v.val = sock_flag(sk, SOCK_DBG);
 666                 break;
 667
 668         case SO_DONTROUTE:
 669                 v.val = sock_flag(sk, SOCK_LOCALROUTE);
 670                 break;
 671
 672         case SO_BROADCAST:
 673                 v.val = !!sock_flag(sk, SOCK_BROADCAST);
 674                 break;
 675
 676         case SO_SNDBUF:
 677                 v.val = sk->sk_sndbuf;
 678                 break;
 679
 680         case SO_RCVBUF:
 681                 v.val = sk->sk_rcvbuf;
 682                 break;
 683
 684         case SO_REUSEADDR:
 685                 v.val = sk->sk_reuse;
 686                 break;
 687
 688         case SO_KEEPALIVE:
 689                 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
 690                 break;
 691
 692         case SO_TYPE:
 693                 v.val = sk->sk_type;
 694                 break;
 695
 696         case SO_ERROR:
 697                 v.val = -sock_error(sk);
 698                 if (v.val==0)
 699                         v.val = xchg(&sk->sk_err_soft, 0);
 700                 break;
 701
 702         case SO_OOBINLINE:
 703                 v.val = !!sock_flag(sk, SOCK_URGINLINE);
 704                 break;
 705
 706         case SO_NO_CHECK:
 707                 v.val = sk->sk_no_check;
 708                 break;
 709
 710         case SO_PRIORITY:
 711                 v.val = sk->sk_priority;
 712                 break;
 713
 714         case SO_LINGER:
 715                 lv              = sizeof(v.ling);
 716                 v.ling.l_onoff  = !!sock_flag(sk, SOCK_LINGER);
 717                 v.ling.l_linger = sk->sk_lingertime / HZ;
 718                 break;
 719
 720         case SO_BSDCOMPAT:
 721                 sock_warn_obsolete_bsdism("getsockopt");
 722                 break;
 723
 724         case SO_TIMESTAMP:
 725                 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
 726                                 !sock_flag(sk, SOCK_RCVTSTAMPNS);
 727                 break;
 728
 729         case SO_TIMESTAMPNS:
 730                 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
 731                 break;
 732
 733         case SO_RCVTIMEO:
 734                 lv=sizeof(struct timeval);
 735                 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
 736                         v.tm.tv_sec = 0;
 737                         v.tm.tv_usec = 0;
 738                 } else {
 739                         v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
 740                         v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
 741                 }
 742                 break;
 743
 744         case SO_SNDTIMEO:
 745                 lv=sizeof(struct timeval);
 746                 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
 747                         v.tm.tv_sec = 0;
 748                         v.tm.tv_usec = 0;
 749                 } else {
 750                         v.tm.tv_sec = sk->sk_sndtimeo / HZ;
 751                         v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
 752                 }
 753                 break;
 754
 755         case SO_RCVLOWAT:
 756                 v.val = sk->sk_rcvlowat;
 757                 break;
 758
 759         case SO_SNDLOWAT:
 760                 v.val=1;
 761                 break;
 762
 763         case SO_PASSCRED:
 764                 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
 765                 break;
 766
 767         case SO_PEERCRED:
 768                 if (len > sizeof(sk->sk_peercred))
 769                         len = sizeof(sk->sk_peercred);
 770                 if (copy_to_user(optval, &sk->sk_peercred, len))
 771                         return -EFAULT;
 772                 goto lenout;
 773
 774         case SO_PEERNAME:
 775         {
 776                 char address[128];
 777
 778                 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
 779                         return -ENOTCONN;
 780                 if (lv < len)
 781                         return -EINVAL;
 782                 if (copy_to_user(optval, address, len))
 783                         return -EFAULT;
 784                 goto lenout;
 785         }
 786
 787         /* Dubious BSD thing... Probably nobody even uses it, but
 788          * the UNIX standard wants it for whatever reason... -DaveM
 789          */
 790         case SO_ACCEPTCONN:
 791                 v.val = sk->sk_state == TCP_LISTEN;
 792                 break;
 793
 794         case SO_PASSSEC:
 795                 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
 796                 break;
 797
 798         case SO_PEERSEC:
 799                 return security_socket_getpeersec_stream(sock, optval, optlen, len);
 800
 801         default:
 802                 return -ENOPROTOOPT;
 803         }
 804
 805         if (len > lv)
 806                 len = lv;
 807         if (copy_to_user(optval, &v, len))
 808                 return -EFAULT;
 809 lenout:
 810         if (put_user(len, optlen))
 811                 return -EFAULT;
 812         return 0;
 813 }
 814
 815 /*
 816  * Initialize an sk_lock.
 817  *
 818  * (We also register the sk_lock with the lock validator.)
 819  */
 820 static inline void sock_lock_init(struct sock *sk)
 821 {
 822         sock_lock_init_class_and_name(sk,
 823                         af_family_slock_key_strings[sk->sk_family],
 824                         af_family_slock_keys + sk->sk_family,
 825                         af_family_key_strings[sk->sk_family],
 826                         af_family_keys + sk->sk_family);
 827 }
 828
 829 /**
 830  *      sk_alloc - All socket objects are allocated here
 831  *      @family: protocol family
 832  *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
 833  *      @prot: struct proto associated with this new sock instance
 834  *      @zero_it: if we should zero the newly allocated sock
 835  */
 836 struct sock *sk_alloc(int family, gfp_t priority,
 837                       struct proto *prot, int zero_it)
 838 {
 839         struct sock *sk = NULL;
 840         struct kmem_cache *slab = prot->slab;
 841
 842         if (slab != NULL)
 843                 sk = kmem_cache_alloc(slab, priority);
 844         else
 845                 sk = kmalloc(prot->obj_size, priority);
 846
 847         if (sk) {
 848                 if (zero_it) {
 849                         memset(sk, 0, prot->obj_size);
 850                         sk->sk_family = family;
 851                         /*
 852                          * See comment in struct sock definition to understand
 853                          * why we need sk_prot_creator -acme
 854                          */
 855                         sk->sk_prot = sk->sk_prot_creator = prot;
 856                         sock_lock_init(sk);
 857                 }
 858
 859                 if (security_sk_alloc(sk, family, priority))
 860                         goto out_free;
 861
 862                 if (!try_module_get(prot->owner))
 863                         goto out_free;
 864         }
 865         return sk;
 866
 867 out_free:
 868         if (slab != NULL)
 869                 kmem_cache_free(slab, sk);
 870         else
 871                 kfree(sk);
 872         return NULL;
 873 }
 874
 875 void sk_free(struct sock *sk)
 876 {
 877         struct sk_filter *filter;
 878         struct module *owner = sk->sk_prot_creator->owner;
 879
 880         if (sk->sk_destruct)
 881                 sk->sk_destruct(sk);
 882
 883         filter = rcu_dereference(sk->sk_filter);
 884         if (filter) {
 885                 sk_filter_release(sk, filter);
 886                 rcu_assign_pointer(sk->sk_filter, NULL);
 887         }
 888
 889         sock_disable_timestamp(sk);
 890
 891         if (atomic_read(&sk->sk_omem_alloc))
 892                 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
 893                        __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
 894
 895         security_sk_free(sk);
 896         if (sk->sk_prot_creator->slab != NULL)
 897                 kmem_cache_free(sk->sk_prot_creator->slab, sk);
 898         else
 899                 kfree(sk);
 900         module_put(owner);
 901 }
 902
 903 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 904 {
 905         struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
 906
 907         if (newsk != NULL) {
 908                 struct sk_filter *filter;
 909
 910                 sock_copy(newsk, sk);
 911
 912                 /* SANITY */
 913                 sk_node_init(&newsk->sk_node);
 914                 sock_lock_init(newsk);
 915                 bh_lock_sock(newsk);
 916                 newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
 917
 918                 atomic_set(&newsk->sk_rmem_alloc, 0);
 919                 atomic_set(&newsk->sk_wmem_alloc, 0);
 920                 atomic_set(&newsk->sk_omem_alloc, 0);
 921                 skb_queue_head_init(&newsk->sk_receive_queue);
 922                 skb_queue_head_init(&newsk->sk_write_queue);
 923 #ifdef CONFIG_NET_DMA
 924                 skb_queue_head_init(&newsk->sk_async_wait_queue);
 925 #endif
 926
 927                 rwlock_init(&newsk->sk_dst_lock);
 928                 rwlock_init(&newsk->sk_callback_lock);
 929                 lockdep_set_class(&newsk->sk_callback_lock,
 930                                    af_callback_keys + newsk->sk_family);
 931
 932                 newsk->sk_dst_cache     = NULL;
 933                 newsk->sk_wmem_queued   = 0;
 934                 newsk->sk_forward_alloc = 0;
 935                 newsk->sk_send_head     = NULL;
 936                 newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
 937
 938                 sock_reset_flag(newsk, SOCK_DONE);
 939                 skb_queue_head_init(&newsk->sk_error_queue);
 940
 941                 filter = newsk->sk_filter;
 942                 if (filter != NULL)
 943                         sk_filter_charge(newsk, filter);
 944
 945                 if (unlikely(xfrm_sk_clone_policy(newsk))) {
 946                         /* It is still raw copy of parent, so invalidate
 947                          * destructor and make plain sk_free() */
 948                         newsk->sk_destruct = NULL;
 949                         sk_free(newsk);
 950                         newsk = NULL;
 951                         goto out;
 952                 }
 953
 954                 newsk->sk_err      = 0;
 955                 newsk->sk_priority = 0;
 956                 atomic_set(&newsk->sk_refcnt, 2);
 957
 958                 /*
 959                  * Increment the counter in the same struct proto as the master
 960                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
 961                  * is the same as sk->sk_prot->socks, as this field was copied
 962                  * with memcpy).
 963                  *
 964                  * This _changes_ the previous behaviour, where
 965                  * tcp_create_openreq_child always was incrementing the
 966                  * equivalent to tcp_prot->socks (inet_sock_nr), so this have
 967                  * to be taken into account in all callers. -acme
 968                  */
 969                 sk_refcnt_debug_inc(newsk);
 970                 newsk->sk_socket = NULL;
 971                 newsk->sk_sleep  = NULL;
 972
 973                 if (newsk->sk_prot->sockets_allocated)
 974                         atomic_inc(newsk->sk_prot->sockets_allocated);
 975         }
 976 out:
 977         return newsk;
 978 }
 979
 980 EXPORT_SYMBOL_GPL(sk_clone);
 981
 982 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 983 {
 984         __sk_dst_set(sk, dst);
 985         sk->sk_route_caps = dst->dev->features;
 986         if (sk->sk_route_caps & NETIF_F_GSO)
 987                 sk->sk_route_caps |= NETIF_F_GSO_MASK;
 988         if (sk_can_gso(sk)) {
 989                 if (dst->header_len)
 990                         sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 991                 else
 992                         sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
 993         }
 994 }
 995 EXPORT_SYMBOL_GPL(sk_setup_caps);
 996
 997 void __init sk_init(void)
 998 {
 999         if (num_physpages <= 4096) {
1000                 sysctl_wmem_max = 32767;
1001                 sysctl_rmem_max = 32767;
1002                 sysctl_wmem_default = 32767;
1003                 sysctl_rmem_default = 32767;
1004         } else if (num_physpages >= 131072) {
1005                 sysctl_wmem_max = 131071;
1006                 sysctl_rmem_max = 131071;
1007         }
1008 }
1009
1010 /*
1011  *      Simple resource managers for sockets.
1012  */
1013
1014
1015 /*
1016  * Write buffer destructor automatically called from kfree_skb.
1017  */
1018 void sock_wfree(struct sk_buff *skb)
1019 {
1020         struct sock *sk = skb->sk;
1021
1022         /* In case it might be waiting for more memory. */
1023         atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1024         if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1025                 sk->sk_write_space(sk);
1026         sock_put(sk);
1027 }
1028
1029 /*
1030  * Read buffer destructor automatically called from kfree_skb.
1031  */
1032 void sock_rfree(struct sk_buff *skb)
1033 {
1034         struct sock *sk = skb->sk;
1035
1036         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1037 }
1038
1039
1040 int sock_i_uid(struct sock *sk)
1041 {
1042         int uid;
1043
1044         read_lock(&sk->sk_callback_lock);
1045         uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1046         read_unlock(&sk->sk_callback_lock);
1047         return uid;
1048 }
1049
1050 unsigned long sock_i_ino(struct sock *sk)
1051 {
1052         unsigned long ino;
1053
1054         read_lock(&sk->sk_callback_lock);
1055         ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1056         read_unlock(&sk->sk_callback_lock);
1057         return ino;
1058 }
1059
1060 /*
1061  * Allocate a skb from the socket's send buffer.
1062  */
1063 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1064                              gfp_t priority)
1065 {
1066         if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1067                 struct sk_buff * skb = alloc_skb(size, priority);
1068                 if (skb) {
1069                         skb_set_owner_w(skb, sk);
1070                         return skb;
1071                 }
1072         }
1073         return NULL;
1074 }
1075
1076 /*
1077  * Allocate a skb from the socket's receive buffer.
1078  */
1079 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1080                              gfp_t priority)
1081 {
1082         if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1083                 struct sk_buff *skb = alloc_skb(size, priority);
1084                 if (skb) {
1085                         skb_set_owner_r(skb, sk);
1086                         return skb;
1087                 }
1088         }
1089         return NULL;
1090 }
1091
1092 /*
1093  * Allocate a memory block from the socket's option memory buffer.
1094  */
1095 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1096 {
1097         if ((unsigned)size <= sysctl_optmem_max &&
1098             atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1099                 void *mem;
1100                 /* First do the add, to avoid the race if kmalloc
1101                  * might sleep.
1102                  */
1103                 atomic_add(size, &sk->sk_omem_alloc);
1104                 mem = kmalloc(size, priority);
1105                 if (mem)
1106                         return mem;
1107                 atomic_sub(size, &sk->sk_omem_alloc);
1108         }
1109         return NULL;
1110 }
1111
1112 /*
1113  * Free an option memory block.
1114  */
1115 void sock_kfree_s(struct sock *sk, void *mem, int size)
1116 {
1117         kfree(mem);
1118         atomic_sub(size, &sk->sk_omem_alloc);
1119 }
1120
1121 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1122    I think, these locks should be removed for datagram sockets.
1123  */
1124 static long sock_wait_for_wmem(struct sock * sk, long timeo)
1125 {
1126         DEFINE_WAIT(wait);
1127
1128         clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1129         for (;;) {
1130                 if (!timeo)
1131                         break;
1132                 if (signal_pending(current))
1133                         break;
1134                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1135                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1136                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1137                         break;
1138                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1139                         break;
1140                 if (sk->sk_err)
1141                         break;
1142                 timeo = schedule_timeout(timeo);
1143         }
1144         finish_wait(sk->sk_sleep, &wait);
1145         return timeo;
1146 }
1147
1148
1149 /*
1150  *      Generic send/receive buffer handlers
1151  */
1152
1153 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1154                                             unsigned long header_len,
1155                                             unsigned long data_len,
1156                                             int noblock, int *errcode)
1157 {
1158         struct sk_buff *skb;
1159         gfp_t gfp_mask;
1160         long timeo;
1161         int err;
1162
1163         gfp_mask = sk->sk_allocation;
1164         if (gfp_mask & __GFP_WAIT)
1165                 gfp_mask |= __GFP_REPEAT;
1166
1167         timeo = sock_sndtimeo(sk, noblock);
1168         while (1) {
1169                 err = sock_error(sk);
1170                 if (err != 0)
1171                         goto failure;
1172
1173                 err = -EPIPE;
1174                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1175                         goto failure;
1176
1177                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1178                         skb = alloc_skb(header_len, gfp_mask);
1179                         if (skb) {
1180                                 int npages;
1181                                 int i;
1182
1183                                 /* No pages, we're done... */
1184                                 if (!data_len)
1185                                         break;
1186
1187                                 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1188                                 skb->truesize += data_len;
1189                                 skb_shinfo(skb)->nr_frags = npages;
1190                                 for (i = 0; i < npages; i++) {
1191                                         struct page *page;
1192                                         skb_frag_t *frag;
1193
1194                                         page = alloc_pages(sk->sk_allocation, 0);
1195                                         if (!page) {
1196                                                 err = -ENOBUFS;
1197                                                 skb_shinfo(skb)->nr_frags = i;
1198                                                 kfree_skb(skb);
1199                                                 goto failure;
1200                                         }
1201
1202                                         frag = &skb_shinfo(skb)->frags[i];
1203                                         frag->page = page;
1204                                         frag->page_offset = 0;
1205                                         frag->size = (data_len >= PAGE_SIZE ?
1206                                                       PAGE_SIZE :
1207                                                       data_len);
1208                                         data_len -= PAGE_SIZE;
1209                                 }
1210
1211                                 /* Full success... */
1212                                 break;
1213                         }
1214                         err = -ENOBUFS;
1215                         goto failure;
1216                 }
1217                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1218                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1219                 err = -EAGAIN;
1220                 if (!timeo)
1221                         goto failure;
1222                 if (signal_pending(current))
1223                         goto interrupted;
1224                 timeo = sock_wait_for_wmem(sk, timeo);
1225         }
1226
1227         skb_set_owner_w(skb, sk);
1228         return skb;
1229
1230 interrupted:
1231         err = sock_intr_errno(timeo);
1232 failure:
1233         *errcode = err;
1234         return NULL;
1235 }
1236
1237 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1238                                     int noblock, int *errcode)
1239 {
1240         return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1241 }
1242
1243 static void __lock_sock(struct sock *sk)
1244 {
1245         DEFINE_WAIT(wait);
1246
1247         for (;;) {
1248                 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1249                                         TASK_UNINTERRUPTIBLE);
1250                 spin_unlock_bh(&sk->sk_lock.slock);
1251                 schedule();
1252                 spin_lock_bh(&sk->sk_lock.slock);
1253                 if (!sock_owned_by_user(sk))
1254                         break;
1255         }
1256         finish_wait(&sk->sk_lock.wq, &wait);
1257 }
1258
1259 static void __release_sock(struct sock *sk)
1260 {
1261         struct sk_buff *skb = sk->sk_backlog.head;
1262
1263         do {
1264                 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1265                 bh_unlock_sock(sk);
1266
1267                 do {
1268                         struct sk_buff *next = skb->next;
1269
1270                         skb->next = NULL;
1271                         sk->sk_backlog_rcv(sk, skb);
1272
1273                         /*
1274                          * We are in process context here with softirqs
1275                          * disabled, use cond_resched_softirq() to preempt.
1276                          * This is safe to do because we've taken the backlog
1277                          * queue private:
1278                          */
1279                         cond_resched_softirq();
1280
1281                         skb = next;
1282                 } while (skb != NULL);
1283
1284                 bh_lock_sock(sk);
1285         } while ((skb = sk->sk_backlog.head) != NULL);
1286 }
1287
1288 /**
1289  * sk_wait_data - wait for data to arrive at sk_receive_queue
1290  * @sk:    sock to wait on
1291  * @timeo: for how long
1292  *
1293  * Now socket state including sk->sk_err is changed only under lock,
1294  * hence we may omit checks after joining wait queue.
1295  * We check receive queue before schedule() only as optimization;
1296  * it is very likely that release_sock() added new data.
1297  */
1298 int sk_wait_data(struct sock *sk, long *timeo)
1299 {
1300         int rc;
1301         DEFINE_WAIT(wait);
1302
1303         prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1304         set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1305         rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1306         clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1307         finish_wait(sk->sk_sleep, &wait);
1308         return rc;
1309 }
1310
1311 EXPORT_SYMBOL(sk_wait_data);
1312
1313 /*
1314  * Set of default routines for initialising struct proto_ops when
1315  * the protocol does not support a particular function. In certain
1316  * cases where it makes no sense for a protocol to have a "do nothing"
1317  * function, some default processing is provided.
1318  */
1319
1320 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1321 {
1322         return -EOPNOTSUPP;
1323 }
1324
1325 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1326                     int len, int flags)
1327 {
1328         return -EOPNOTSUPP;
1329 }
1330
1331 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1332 {
1333         return -EOPNOTSUPP;
1334 }
1335
1336 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1337 {
1338         return -EOPNOTSUPP;
1339 }
1340
1341 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1342                     int *len, int peer)
1343 {
1344         return -EOPNOTSUPP;
1345 }
1346
1347 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1348 {
1349         return 0;
1350 }
1351
1352 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1353 {
1354         return -EOPNOTSUPP;
1355 }
1356
1357 int sock_no_listen(struct socket *sock, int backlog)
1358 {
1359         return -EOPNOTSUPP;
1360 }
1361
1362 int sock_no_shutdown(struct socket *sock, int how)
1363 {
1364         return -EOPNOTSUPP;
1365 }
1366
1367 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1368                     char __user *optval, int optlen)
1369 {
1370         return -EOPNOTSUPP;
1371 }
1372
1373 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1374                     char __user *optval, int __user *optlen)
1375 {
1376         return -EOPNOTSUPP;
1377 }
1378
1379 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1380                     size_t len)
1381 {
1382         return -EOPNOTSUPP;
1383 }
1384
1385 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1386                     size_t len, int flags)
1387 {
1388         return -EOPNOTSUPP;
1389 }
1390
1391 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1392 {
1393         /* Mirror missing mmap method error code */
1394         return -ENODEV;
1395 }
1396
1397 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1398 {
1399         ssize_t res;
1400         struct msghdr msg = {.msg_flags = flags};
1401         struct kvec iov;
1402         char *kaddr = kmap(page);
1403         iov.iov_base = kaddr + offset;
1404         iov.iov_len = size;
1405         res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1406         kunmap(page);
1407         return res;
1408 }
1409
1410 /*
1411  *      Default Socket Callbacks
1412  */
1413
1414 static void sock_def_wakeup(struct sock *sk)
1415 {
1416         read_lock(&sk->sk_callback_lock);
1417         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1418                 wake_up_interruptible_all(sk->sk_sleep);
1419         read_unlock(&sk->sk_callback_lock);
1420 }
1421
1422 static void sock_def_error_report(struct sock *sk)
1423 {
1424         read_lock(&sk->sk_callback_lock);
1425         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1426                 wake_up_interruptible(sk->sk_sleep);
1427         sk_wake_async(sk,0,POLL_ERR);
1428         read_unlock(&sk->sk_callback_lock);
1429 }
1430
1431 static void sock_def_readable(struct sock *sk, int len)
1432 {
1433         read_lock(&sk->sk_callback_lock);
1434         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1435                 wake_up_interruptible(sk->sk_sleep);
1436         sk_wake_async(sk,1,POLL_IN);
1437         read_unlock(&sk->sk_callback_lock);
1438 }
1439
1440 static void sock_def_write_space(struct sock *sk)
1441 {
1442         read_lock(&sk->sk_callback_lock);
1443
1444         /* Do not wake up a writer until he can make "significant"
1445          * progress.  --DaveM
1446          */
1447         if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1448                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1449                         wake_up_interruptible(sk->sk_sleep);
1450
1451                 /* Should agree with poll, otherwise some programs break */
1452                 if (sock_writeable(sk))
1453                         sk_wake_async(sk, 2, POLL_OUT);
1454         }
1455
1456         read_unlock(&sk->sk_callback_lock);
1457 }
1458
1459 static void sock_def_destruct(struct sock *sk)
1460 {
1461         kfree(sk->sk_protinfo);
1462 }
1463
1464 void sk_send_sigurg(struct sock *sk)
1465 {
1466         if (sk->sk_socket && sk->sk_socket->file)
1467                 if (send_sigurg(&sk->sk_socket->file->f_owner))
1468                         sk_wake_async(sk, 3, POLL_PRI);
1469 }
1470
1471 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1472                     unsigned long expires)
1473 {
1474         if (!mod_timer(timer, expires))
1475                 sock_hold(sk);
1476 }
1477
1478 EXPORT_SYMBOL(sk_reset_timer);
1479
1480 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1481 {
1482         if (timer_pending(timer) && del_timer(timer))
1483                 __sock_put(sk);
1484 }
1485
1486 EXPORT_SYMBOL(sk_stop_timer);
1487
1488 void sock_init_data(struct socket *sock, struct sock *sk)
1489 {
1490         skb_queue_head_init(&sk->sk_receive_queue);
1491         skb_queue_head_init(&sk->sk_write_queue);
1492         skb_queue_head_init(&sk->sk_error_queue);
1493 #ifdef CONFIG_NET_DMA
1494         skb_queue_head_init(&sk->sk_async_wait_queue);
1495 #endif
1496
1497         sk->sk_send_head        =       NULL;
1498
1499         init_timer(&sk->sk_timer);
1500
1501         sk->sk_allocation       =       GFP_KERNEL;
1502         sk->sk_rcvbuf           =       sysctl_rmem_default;
1503         sk->sk_sndbuf           =       sysctl_wmem_default;
1504         sk->sk_state            =       TCP_CLOSE;
1505         sk->sk_socket           =       sock;
1506
1507         sock_set_flag(sk, SOCK_ZAPPED);
1508
1509         if (sock) {
1510                 sk->sk_type     =       sock->type;
1511                 sk->sk_sleep    =       &sock->wait;
1512                 sock->sk        =       sk;
1513         } else
1514                 sk->sk_sleep    =       NULL;
1515
1516         rwlock_init(&sk->sk_dst_lock);
1517         rwlock_init(&sk->sk_callback_lock);
1518         lockdep_set_class(&sk->sk_callback_lock,
1519                            af_callback_keys + sk->sk_family);
1520
1521         sk->sk_state_change     =       sock_def_wakeup;
1522         sk->sk_data_ready       =       sock_def_readable;
1523         sk->sk_write_space      =       sock_def_write_space;
1524         sk->sk_error_report     =       sock_def_error_report;
1525         sk->sk_destruct         =       sock_def_destruct;
1526
1527         sk->sk_sndmsg_page      =       NULL;
1528         sk->sk_sndmsg_off       =       0;
1529
1530         sk->sk_peercred.pid     =       0;
1531         sk->sk_peercred.uid     =       -1;
1532         sk->sk_peercred.gid     =       -1;
1533         sk->sk_write_pending    =       0;
1534         sk->sk_rcvlowat         =       1;
1535         sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
1536         sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
1537
1538         sk->sk_stamp = ktime_set(-1L, -1L);
1539
1540         atomic_set(&sk->sk_refcnt, 1);
1541 }
1542
1543 void fastcall lock_sock_nested(struct sock *sk, int subclass)
1544 {
1545         might_sleep();
1546         spin_lock_bh(&sk->sk_lock.slock);
1547         if (sk->sk_lock.owner)
1548                 __lock_sock(sk);
1549         sk->sk_lock.owner = (void *)1;
1550         spin_unlock(&sk->sk_lock.slock);
1551         /*
1552          * The sk_lock has mutex_lock() semantics here:
1553          */
1554         mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
1555         local_bh_enable();
1556 }
1557
1558 EXPORT_SYMBOL(lock_sock_nested);
1559
1560 void fastcall release_sock(struct sock *sk)
1561 {
1562         /*
1563          * The sk_lock has mutex_unlock() semantics:
1564          */
1565         mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1566
1567         spin_lock_bh(&sk->sk_lock.slock);
1568         if (sk->sk_backlog.tail)
1569                 __release_sock(sk);
1570         sk->sk_lock.owner = NULL;
1571         if (waitqueue_active(&sk->sk_lock.wq))
1572                 wake_up(&sk->sk_lock.wq);
1573         spin_unlock_bh(&sk->sk_lock.slock);
1574 }
1575 EXPORT_SYMBOL(release_sock);
1576
1577 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1578 {
1579         struct timeval tv;
1580         if (!sock_flag(sk, SOCK_TIMESTAMP))
1581                 sock_enable_timestamp(sk);
1582         tv = ktime_to_timeval(sk->sk_stamp);
1583         if (tv.tv_sec == -1)
1584                 return -ENOENT;
1585         if (tv.tv_sec == 0) {
1586                 sk->sk_stamp = ktime_get_real();
1587                 tv = ktime_to_timeval(sk->sk_stamp);
1588         }
1589         return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1590 }
1591 EXPORT_SYMBOL(sock_get_timestamp);
1592
1593 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1594 {
1595         struct timespec ts;
1596         if (!sock_flag(sk, SOCK_TIMESTAMP))
1597                 sock_enable_timestamp(sk);
1598         ts = ktime_to_timespec(sk->sk_stamp);
1599         if (ts.tv_sec == -1)
1600                 return -ENOENT;
1601         if (ts.tv_sec == 0) {
1602                 sk->sk_stamp = ktime_get_real();
1603                 ts = ktime_to_timespec(sk->sk_stamp);
1604         }
1605         return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1606 }
1607 EXPORT_SYMBOL(sock_get_timestampns);
1608
1609 void sock_enable_timestamp(struct sock *sk)
1610 {
1611         if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1612                 sock_set_flag(sk, SOCK_TIMESTAMP);
1613                 net_enable_timestamp();
1614         }
1615 }
1616 EXPORT_SYMBOL(sock_enable_timestamp);
1617
1618 /*
1619  *      Get a socket option on an socket.
1620  *
1621  *      FIX: POSIX 1003.1g is very ambiguous here. It states that
1622  *      asynchronous errors should be reported by getsockopt. We assume
1623  *      this means if you specify SO_ERROR (otherwise whats the point of it).
1624  */
1625 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1626                            char __user *optval, int __user *optlen)
1627 {
1628         struct sock *sk = sock->sk;
1629
1630         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1631 }
1632
1633 EXPORT_SYMBOL(sock_common_getsockopt);
1634
1635 #ifdef CONFIG_COMPAT
1636 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1637                                   char __user *optval, int __user *optlen)
1638 {
1639         struct sock *sk = sock->sk;
1640
1641         if (sk->sk_prot->compat_getsockopt != NULL)
1642                 return sk->sk_prot->compat_getsockopt(sk, level, optname,
1643                                                       optval, optlen);
1644         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1645 }
1646 EXPORT_SYMBOL(compat_sock_common_getsockopt);
1647 #endif
1648
1649 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1650                         struct msghdr *msg, size_t size, int flags)
1651 {
1652         struct sock *sk = sock->sk;
1653         int addr_len = 0;
1654         int err;
1655
1656         err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1657                                    flags & ~MSG_DONTWAIT, &addr_len);
1658         if (err >= 0)
1659                 msg->msg_namelen = addr_len;
1660         return err;
1661 }
1662
1663 EXPORT_SYMBOL(sock_common_recvmsg);
1664
1665 /*
1666  *      Set socket options on an inet socket.
1667  */
1668 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1669                            char __user *optval, int optlen)
1670 {
1671         struct sock *sk = sock->sk;
1672
1673         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1674 }
1675
1676 EXPORT_SYMBOL(sock_common_setsockopt);
1677
1678 #ifdef CONFIG_COMPAT
1679 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1680                                   char __user *optval, int optlen)
1681 {
1682         struct sock *sk = sock->sk;
1683
1684         if (sk->sk_prot->compat_setsockopt != NULL)
1685                 return sk->sk_prot->compat_setsockopt(sk, level, optname,
1686                                                       optval, optlen);
1687         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1688 }
1689 EXPORT_SYMBOL(compat_sock_common_setsockopt);
1690 #endif
1691
1692 void sk_common_release(struct sock *sk)
1693 {
1694         if (sk->sk_prot->destroy)
1695                 sk->sk_prot->destroy(sk);
1696
1697         /*
1698          * Observation: when sock_common_release is called, processes have
1699          * no access to socket. But net still has.
1700          * Step one, detach it from networking:
1701          *
1702          * A. Remove from hash tables.
1703          */
1704
1705         sk->sk_prot->unhash(sk);
1706
1707         /*
1708          * In this point socket cannot receive new packets, but it is possible
1709          * that some packets are in flight because some CPU runs receiver and
1710          * did hash table lookup before we unhashed socket. They will achieve
1711          * receive queue and will be purged by socket destructor.
1712          *
1713          * Also we still have packets pending on receive queue and probably,
1714          * our own packets waiting in device queues. sock_destroy will drain
1715          * receive queue, but transmitted packets will delay socket destruction
1716          * until the last reference will be released.
1717          */
1718
1719         sock_orphan(sk);
1720
1721         xfrm_sk_free_policy(sk);
1722
1723         sk_refcnt_debug_release(sk);
1724         sock_put(sk);
1725 }
1726
1727 EXPORT_SYMBOL(sk_common_release);
1728
1729 static DEFINE_RWLOCK(proto_list_lock);
1730 static LIST_HEAD(proto_list);
1731
1732 int proto_register(struct proto *prot, int alloc_slab)
1733 {
1734         char *request_sock_slab_name = NULL;
1735         char *timewait_sock_slab_name;
1736         int rc = -ENOBUFS;
1737
1738         if (alloc_slab) {
1739                 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1740                                                SLAB_HWCACHE_ALIGN, NULL, NULL);
1741
1742                 if (prot->slab == NULL) {
1743                         printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1744                                prot->name);
1745                         goto out;
1746                 }
1747
1748                 if (prot->rsk_prot != NULL) {
1749                         static const char mask[] = "request_sock_%s";
1750
1751                         request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1752                         if (request_sock_slab_name == NULL)
1753                                 goto out_free_sock_slab;
1754
1755                         sprintf(request_sock_slab_name, mask, prot->name);
1756                         prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1757                                                                  prot->rsk_prot->obj_size, 0,
1758                                                                  SLAB_HWCACHE_ALIGN, NULL, NULL);
1759
1760                         if (prot->rsk_prot->slab == NULL) {
1761                                 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1762                                        prot->name);
1763                                 goto out_free_request_sock_slab_name;
1764                         }
1765                 }
1766
1767                 if (prot->twsk_prot != NULL) {
1768                         static const char mask[] = "tw_sock_%s";
1769
1770                         timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1771
1772                         if (timewait_sock_slab_name == NULL)
1773                                 goto out_free_request_sock_slab;
1774
1775                         sprintf(timewait_sock_slab_name, mask, prot->name);
1776                         prot->twsk_prot->twsk_slab =
1777                                 kmem_cache_create(timewait_sock_slab_name,
1778                                                   prot->twsk_prot->twsk_obj_size,
1779                                                   0, SLAB_HWCACHE_ALIGN,
1780                                                   NULL, NULL);
1781                         if (prot->twsk_prot->twsk_slab == NULL)
1782                                 goto out_free_timewait_sock_slab_name;
1783                 }
1784         }
1785
1786         write_lock(&proto_list_lock);
1787         list_add(&prot->node, &proto_list);
1788         write_unlock(&proto_list_lock);
1789         rc = 0;
1790 out:
1791         return rc;
1792 out_free_timewait_sock_slab_name:
1793         kfree(timewait_sock_slab_name);
1794 out_free_request_sock_slab:
1795         if (prot->rsk_prot && prot->rsk_prot->slab) {
1796                 kmem_cache_destroy(prot->rsk_prot->slab);
1797                 prot->rsk_prot->slab = NULL;
1798         }
1799 out_free_request_sock_slab_name:
1800         kfree(request_sock_slab_name);
1801 out_free_sock_slab:
1802         kmem_cache_destroy(prot->slab);
1803         prot->slab = NULL;
1804         goto out;
1805 }
1806
1807 EXPORT_SYMBOL(proto_register);
1808
1809 void proto_unregister(struct proto *prot)
1810 {
1811         write_lock(&proto_list_lock);
1812         list_del(&prot->node);
1813         write_unlock(&proto_list_lock);
1814
1815         if (prot->slab != NULL) {
1816                 kmem_cache_destroy(prot->slab);
1817                 prot->slab = NULL;
1818         }
1819
1820         if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1821                 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1822
1823                 kmem_cache_destroy(prot->rsk_prot->slab);
1824                 kfree(name);
1825                 prot->rsk_prot->slab = NULL;
1826         }
1827
1828         if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1829                 const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1830
1831                 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1832                 kfree(name);
1833                 prot->twsk_prot->twsk_slab = NULL;
1834         }
1835 }
1836
1837 EXPORT_SYMBOL(proto_unregister);
1838
1839 #ifdef CONFIG_PROC_FS
1840 static inline struct proto *__proto_head(void)
1841 {
1842         return list_entry(proto_list.next, struct proto, node);
1843 }
1844
1845 static inline struct proto *proto_head(void)
1846 {
1847         return list_empty(&proto_list) ? NULL : __proto_head();
1848 }
1849
1850 static inline struct proto *proto_next(struct proto *proto)
1851 {
1852         return proto->node.next == &proto_list ? NULL :
1853                 list_entry(proto->node.next, struct proto, node);
1854 }
1855
1856 static inline struct proto *proto_get_idx(loff_t pos)
1857 {
1858         struct proto *proto;
1859         loff_t i = 0;
1860
1861         list_for_each_entry(proto, &proto_list, node)
1862                 if (i++ == pos)
1863                         goto out;
1864
1865         proto = NULL;
1866 out:
1867         return proto;
1868 }
1869
1870 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1871 {
1872         read_lock(&proto_list_lock);
1873         return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1874 }
1875
1876 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1877 {
1878         ++*pos;
1879         return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1880 }
1881
1882 static void proto_seq_stop(struct seq_file *seq, void *v)
1883 {
1884         read_unlock(&proto_list_lock);
1885 }
1886
1887 static char proto_method_implemented(const void *method)
1888 {
1889         return method == NULL ? 'n' : 'y';
1890 }
1891
1892 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1893 {
1894         seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1895                         "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1896                    proto->name,
1897                    proto->obj_size,
1898                    proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1899                    proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1900                    proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1901                    proto->max_header,
1902                    proto->slab == NULL ? "no" : "yes",
1903                    module_name(proto->owner),
1904                    proto_method_implemented(proto->close),
1905                    proto_method_implemented(proto->connect),
1906                    proto_method_implemented(proto->disconnect),
1907                    proto_method_implemented(proto->accept),
1908                    proto_method_implemented(proto->ioctl),
1909                    proto_method_implemented(proto->init),
1910                    proto_method_implemented(proto->destroy),
1911                    proto_method_implemented(proto->shutdown),
1912                    proto_method_implemented(proto->setsockopt),
1913                    proto_method_implemented(proto->getsockopt),
1914                    proto_method_implemented(proto->sendmsg),
1915                    proto_method_implemented(proto->recvmsg),
1916                    proto_method_implemented(proto->sendpage),
1917                    proto_method_implemented(proto->bind),
1918                    proto_method_implemented(proto->backlog_rcv),
1919                    proto_method_implemented(proto->hash),
1920                    proto_method_implemented(proto->unhash),
1921                    proto_method_implemented(proto->get_port),
1922                    proto_method_implemented(proto->enter_memory_pressure));
1923 }
1924
1925 static int proto_seq_show(struct seq_file *seq, void *v)
1926 {
1927         if (v == SEQ_START_TOKEN)
1928                 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1929                            "protocol",
1930                            "size",
1931                            "sockets",
1932                            "memory",
1933                            "press",
1934                            "maxhdr",
1935                            "slab",
1936                            "module",
1937                            "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1938         else
1939                 proto_seq_printf(seq, v);
1940         return 0;
1941 }
1942
1943 static const struct seq_operations proto_seq_ops = {
1944         .start  = proto_seq_start,
1945         .next   = proto_seq_next,
1946         .stop   = proto_seq_stop,
1947         .show   = proto_seq_show,
1948 };
1949
1950 static int proto_seq_open(struct inode *inode, struct file *file)
1951 {
1952         return seq_open(file, &proto_seq_ops);
1953 }
1954
1955 static const struct file_operations proto_seq_fops = {
1956         .owner          = THIS_MODULE,
1957         .open           = proto_seq_open,
1958         .read           = seq_read,
1959         .llseek         = seq_lseek,
1960         .release        = seq_release,
1961 };
1962
1963 static int __init proto_init(void)
1964 {
1965         /* register /proc/net/protocols */
1966         return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1967 }
1968
1969 subsys_initcall(proto_init);
1970
1971 #endif /* PROC_FS */
1972
1973 EXPORT_SYMBOL(sk_alloc);
1974 EXPORT_SYMBOL(sk_free);
1975 EXPORT_SYMBOL(sk_send_sigurg);
1976 EXPORT_SYMBOL(sock_alloc_send_skb);
1977 EXPORT_SYMBOL(sock_init_data);
1978 EXPORT_SYMBOL(sock_kfree_s);
1979 EXPORT_SYMBOL(sock_kmalloc);
1980 EXPORT_SYMBOL(sock_no_accept);
1981 EXPORT_SYMBOL(sock_no_bind);
1982 EXPORT_SYMBOL(sock_no_connect);
1983 EXPORT_SYMBOL(sock_no_getname);
1984 EXPORT_SYMBOL(sock_no_getsockopt);
1985 EXPORT_SYMBOL(sock_no_ioctl);
1986 EXPORT_SYMBOL(sock_no_listen);
1987 EXPORT_SYMBOL(sock_no_mmap);
1988 EXPORT_SYMBOL(sock_no_poll);
1989 EXPORT_SYMBOL(sock_no_recvmsg);
1990 EXPORT_SYMBOL(sock_no_sendmsg);
1991 EXPORT_SYMBOL(sock_no_sendpage);
1992 EXPORT_SYMBOL(sock_no_setsockopt);
1993 EXPORT_SYMBOL(sock_no_shutdown);
1994 EXPORT_SYMBOL(sock_no_socketpair);
1995 EXPORT_SYMBOL(sock_rfree);
1996 EXPORT_SYMBOL(sock_setsockopt);
1997 EXPORT_SYMBOL(sock_wfree);
1998 EXPORT_SYMBOL(sock_wmalloc);
1999 EXPORT_SYMBOL(sock_i_uid);
2000 EXPORT_SYMBOL(sock_i_ino);
2001 EXPORT_SYMBOL(sysctl_optmem_max);
2002 #ifdef CONFIG_SYSCTL
2003 EXPORT_SYMBOL(sysctl_rmem_max);
2004 EXPORT_SYMBOL(sysctl_wmem_max);
2005 #endif