drivers/staging/unisys/visornic/visornic_main.c

   1 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
   2  * All rights reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms and conditions of the GNU General Public License,
   6  * version 2, as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful, but
   9  * WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11  * NON INFRINGEMENT.  See the GNU General Public License for more
  12  * details.
  13  */
  14
  15 /* This driver lives in a spar partition, and registers to ethernet io
  16  * channels from the visorbus driver. It creates netdev devices and
  17  * forwards transmit to the IO channel and accepts rcvs from the IO
  18  * Partition via the IO channel.
  19  */
  20
  21 #include <linux/debugfs.h>
  22 #include <linux/etherdevice.h>
  23 #include <linux/module.h>
  24 #include <linux/netdevice.h>
  25 #include <linux/kthread.h>
  26 #include <linux/skbuff.h>
  27 #include <linux/rtnetlink.h>
  28
  29 #include "visorbus.h"
  30 #include "iochannel.h"
  31
  32 #define VISORNIC_INFINITE_RSP_WAIT 0
  33
  34 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
  35  *         = 163840 bytes
  36  */
  37 #define MAX_BUF 163840
  38 #define NAPI_WEIGHT 64
  39
  40 /* GUIDS for director channel type supported by this driver.  */
  41 /* {8cd5994d-c58e-11da-95a9-00e08161165f} */
  42 #define VISOR_VNIC_CHANNEL_GUID \
  43         GUID_INIT(0x8cd5994d, 0xc58e, 0x11da, \
  44                 0x95, 0xa9, 0x0, 0xe0, 0x81, 0x61, 0x16, 0x5f)
  45 #define VISOR_VNIC_CHANNEL_GUID_STR \
  46         "8cd5994d-c58e-11da-95a9-00e08161165f"
  47
  48 static struct visor_channeltype_descriptor visornic_channel_types[] = {
  49         /* Note that the only channel type we expect to be reported by the
  50          * bus driver is the VISOR_VNIC channel.
  51          */
  52         { VISOR_VNIC_CHANNEL_GUID, "ultravnic", sizeof(struct channel_header),
  53           VISOR_VNIC_CHANNEL_VERSIONID },
  54         {}
  55 };
  56 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
  57 /* FIXME XXX: This next line of code must be fixed and removed before
  58  * acceptance into the 'normal' part of the kernel.  It is only here as a place
  59  * holder to get module autoloading functionality working for visorbus.  Code
  60  * must be added to scripts/mode/file2alias.c, etc., to get this working
  61  * properly.
  62  */
  63 MODULE_ALIAS("visorbus:" VISOR_VNIC_CHANNEL_GUID_STR);
  64
  65 struct chanstat {
  66         unsigned long got_rcv;
  67         unsigned long got_enbdisack;
  68         unsigned long got_xmit_done;
  69         unsigned long xmit_fail;
  70         unsigned long sent_enbdis;
  71         unsigned long sent_promisc;
  72         unsigned long sent_post;
  73         unsigned long sent_post_failed;
  74         unsigned long sent_xmit;
  75         unsigned long reject_count;
  76         unsigned long extra_rcvbufs_sent;
  77 };
  78
  79 /* struct visornic_devdata
  80  * @enabled:                        0 disabled 1 enabled to receive.
  81  * @enab_dis_acked:                 NET_RCV_ENABLE/DISABLE acked by IOPART.
  82  * @struct *dev:
  83  * @struct *netdev:
  84  * @struct net_stats:
  85  * @interrupt_rcvd:
  86  * @rsp_queue:
  87  * @struct **rcvbuf:
  88  * @incarnation_id:                 incarnation_id lets IOPART know about
  89  *                                  re-birth.
  90  * @old_flags:                      flags as they were prior to
  91  *                                  set_multicast_list.
  92  * @usage:                          count of users.
  93  * @num_rcv_bufs:                   number of rcv buffers the vnic will post.
  94  * @num_rcv_bufs_could_not_alloc:
  95  * @num_rcvbuf_in_iovm:
  96  * @alloc_failed_in_if_needed_cnt:
  97  * @alloc_failed_in_repost_rtn_cnt:
  98  * @max_outstanding_net_xmits:      absolute max number of outstanding xmits
  99  *                                  - should never hit this.
 100  * @upper_threshold_net_xmits:      high water mark for calling
 101  *                                  netif_stop_queue().
 102  * @lower_threshold_net_xmits:      high water mark for calling
 103  *                                  netif_wake_queue().
 104  * @struct xmitbufhead:             xmitbufhead - head of the xmit buffer list
 105  *                                  sent to the IOPART end.
 106  * @server_down_complete_func:
 107  * @struct timeout_reset:
 108  * @struct *cmdrsp_rcv:             cmdrsp_rcv is used for posting/unposting rcv
 109  *                                  buffers.
 110  * @struct *xmit_cmdrsp:            xmit_cmdrsp - issues NET_XMIT - only one
 111  *                                  active xmit at a time.
 112  * @server_down:                    IOPART is down.
 113  * @server_change_state:            Processing SERVER_CHANGESTATE msg.
 114  * @going_away:                     device is being torn down.
 115  * @struct *eth_debugfs_dir:
 116  * @interrupts_rcvd:
 117  * @interrupts_notme:
 118  * @interrupts_disabled:
 119  * @busy_cnt:
 120  * @priv_lock:                      spinlock to access devdata structures.
 121  * @flow_control_upper_hits:
 122  * @flow_control_lower_hits:
 123  * @n_rcv0:                         # rcvs of 0 buffers.
 124  * @n_rcv1:                         # rcvs of 1 buffers.
 125  * @n_rcv2:                         # rcvs of 2 buffers.
 126  * @n_rcvx:                         # rcvs of >2 buffers.
 127  * @found_repost_rcvbuf_cnt:        # repost_rcvbuf_cnt.
 128  * @repost_found_skb_cnt:           # of found the skb.
 129  * @n_repost_deficit:               # of lost rcv buffers.
 130  * @bad_rcv_buf:                    # of unknown rcv skb not freed.
 131  * @n_rcv_packets_not_accepted:     # bogs rcv packets.
 132  * @queuefullmsg_logged:
 133  * @struct chstat:
 134  * @struct irq_poll_timer:
 135  * @struct napi:
 136  * @struct cmdrsp:
 137  */
 138 struct visornic_devdata {
 139         unsigned short enabled;
 140         unsigned short enab_dis_acked;
 141
 142         struct visor_device *dev;
 143         struct net_device *netdev;
 144         struct net_device_stats net_stats;
 145         atomic_t interrupt_rcvd;
 146         wait_queue_head_t rsp_queue;
 147         struct sk_buff **rcvbuf;
 148         u64 incarnation_id;
 149         unsigned short old_flags;
 150         atomic_t usage;
 151
 152         int num_rcv_bufs;
 153         int num_rcv_bufs_could_not_alloc;
 154         atomic_t num_rcvbuf_in_iovm;
 155         unsigned long alloc_failed_in_if_needed_cnt;
 156         unsigned long alloc_failed_in_repost_rtn_cnt;
 157
 158         unsigned long max_outstanding_net_xmits;
 159         unsigned long upper_threshold_net_xmits;
 160         unsigned long lower_threshold_net_xmits;
 161         struct sk_buff_head xmitbufhead;
 162
 163         visorbus_state_complete_func server_down_complete_func;
 164         struct work_struct timeout_reset;
 165         struct uiscmdrsp *cmdrsp_rcv;
 166         struct uiscmdrsp *xmit_cmdrsp;
 167         bool server_down;
 168         bool server_change_state;
 169         bool going_away;
 170         struct dentry *eth_debugfs_dir;
 171         u64 interrupts_rcvd;
 172         u64 interrupts_notme;
 173         u64 interrupts_disabled;
 174         u64 busy_cnt;
 175         /* spinlock to access devdata structures. */
 176         spinlock_t priv_lock;
 177
 178         /* flow control counter */
 179         u64 flow_control_upper_hits;
 180         u64 flow_control_lower_hits;
 181
 182         /* debug counters */
 183         unsigned long n_rcv0;
 184         unsigned long n_rcv1;
 185         unsigned long n_rcv2;
 186         unsigned long n_rcvx;
 187         unsigned long found_repost_rcvbuf_cnt;
 188         unsigned long repost_found_skb_cnt;
 189         unsigned long n_repost_deficit;
 190         unsigned long bad_rcv_buf;
 191         unsigned long n_rcv_packets_not_accepted;
 192
 193         int queuefullmsg_logged;
 194         struct chanstat chstat;
 195         struct timer_list irq_poll_timer;
 196         struct napi_struct napi;
 197         struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
 198 };
 199
 200 /* Returns next non-zero index on success or 0 on failure (i.e. out of room). */
 201 static u16 add_physinfo_entries(u64 inp_pfn, u16 inp_off, u16 inp_len,
 202                                 u16 index, u16 max_pi_arr_entries,
 203                                 struct phys_info pi_arr[])
 204 {
 205         u16 i, len, firstlen;
 206
 207         firstlen = PI_PAGE_SIZE - inp_off;
 208         if (inp_len <= firstlen) {
 209                 /* The input entry spans only one page - add as is. */
 210                 if (index >= max_pi_arr_entries)
 211                         return 0;
 212                 pi_arr[index].pi_pfn = inp_pfn;
 213                 pi_arr[index].pi_off = (u16)inp_off;
 214                 pi_arr[index].pi_len = (u16)inp_len;
 215                 return index + 1;
 216         }
 217
 218         /* This entry spans multiple pages. */
 219         for (len = inp_len, i = 0; len;
 220                 len -= pi_arr[index + i].pi_len, i++) {
 221                 if (index + i >= max_pi_arr_entries)
 222                         return 0;
 223                 pi_arr[index + i].pi_pfn = inp_pfn + i;
 224                 if (i == 0) {
 225                         pi_arr[index].pi_off = inp_off;
 226                         pi_arr[index].pi_len = firstlen;
 227                 } else {
 228                         pi_arr[index + i].pi_off = 0;
 229                         pi_arr[index + i].pi_len = min_t(u16, len,
 230                                                          PI_PAGE_SIZE);
 231                 }
 232         }
 233         return index + i;
 234 }
 235
 236 /* visor_copy_fragsinfo_from_skb - copy fragment list in the SKB to a phys_info
 237  *                                 array that the IOPART understands
 238  * @skb:          Skbuff that we are pulling the frags from.
 239  * @firstfraglen: Length of first fragment in skb.
 240  * @frags_max:    Max len of frags array.
 241  * @frags:        Frags array filled in on output.
 242  *
 243  * Return: Positive integer indicating number of entries filled in frags on
 244  *         success, negative integer on error.
 245  */
 246 static int visor_copy_fragsinfo_from_skb(struct sk_buff *skb,
 247                                          unsigned int firstfraglen,
 248                                          unsigned int frags_max,
 249                                          struct phys_info frags[])
 250 {
 251         unsigned int count = 0, frag, size, offset = 0, numfrags;
 252         unsigned int total_count;
 253
 254         numfrags = skb_shinfo(skb)->nr_frags;
 255
 256         /* Compute the number of fragments this skb has, and if its more than
 257          * frag array can hold, linearize the skb
 258          */
 259         total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
 260         if (firstfraglen % PI_PAGE_SIZE)
 261                 total_count++;
 262
 263         if (total_count > frags_max) {
 264                 if (skb_linearize(skb))
 265                         return -EINVAL;
 266                 numfrags = skb_shinfo(skb)->nr_frags;
 267                 firstfraglen = 0;
 268         }
 269
 270         while (firstfraglen) {
 271                 if (count == frags_max)
 272                         return -EINVAL;
 273
 274                 frags[count].pi_pfn =
 275                         page_to_pfn(virt_to_page(skb->data + offset));
 276                 frags[count].pi_off =
 277                         (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
 278                 size = min_t(unsigned int, firstfraglen,
 279                              PI_PAGE_SIZE - frags[count].pi_off);
 280
 281                 /* can take smallest of firstfraglen (what's left) OR
 282                  * bytes left in the page
 283                  */
 284                 frags[count].pi_len = size;
 285                 firstfraglen -= size;
 286                 offset += size;
 287                 count++;
 288         }
 289         if (numfrags) {
 290                 if ((count + numfrags) > frags_max)
 291                         return -EINVAL;
 292
 293                 for (frag = 0; frag < numfrags; frag++) {
 294                         count = add_physinfo_entries(page_to_pfn(
 295                                   skb_frag_page(&skb_shinfo(skb)->frags[frag])),
 296                                   skb_shinfo(skb)->frags[frag].page_offset,
 297                                   skb_shinfo(skb)->frags[frag].size, count,
 298                                   frags_max, frags);
 299                         /* add_physinfo_entries only returns
 300                          * zero if the frags array is out of room
 301                          * That should never happen because we
 302                          * fail above, if count+numfrags > frags_max.
 303                          */
 304                         if (!count)
 305                                 return -EINVAL;
 306                 }
 307         }
 308         if (skb_shinfo(skb)->frag_list) {
 309                 struct sk_buff *skbinlist;
 310                 int c;
 311
 312                 for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
 313                      skbinlist = skbinlist->next) {
 314                         c = visor_copy_fragsinfo_from_skb(skbinlist,
 315                                                           skbinlist->len -
 316                                                           skbinlist->data_len,
 317                                                           frags_max - count,
 318                                                           &frags[count]);
 319                         if (c < 0)
 320                                 return c;
 321                         count += c;
 322                 }
 323         }
 324         return count;
 325 }
 326
 327 static ssize_t enable_ints_write(struct file *file,
 328                                  const char __user *buffer,
 329                                  size_t count, loff_t *ppos)
 330 {
 331         /* Don't want to break ABI here by having a debugfs
 332          * file that no longer exists or is writable, so
 333          * lets just make this a vestigual function
 334          */
 335         return count;
 336 }
 337
 338 static const struct file_operations debugfs_enable_ints_fops = {
 339         .write = enable_ints_write,
 340 };
 341
 342 /* visornic_serverdown_complete - pause device following IOPART going down
 343  * @devdata: Device managed by IOPART.
 344  *
 345  * The IO partition has gone down, and we need to do some cleanup for when it
 346  * comes back. Treat the IO partition as the link being down.
 347  */
 348 static void visornic_serverdown_complete(struct visornic_devdata *devdata)
 349 {
 350         struct net_device *netdev = devdata->netdev;
 351
 352         /* Stop polling for interrupts */
 353         del_timer_sync(&devdata->irq_poll_timer);
 354
 355         rtnl_lock();
 356         dev_close(netdev);
 357         rtnl_unlock();
 358
 359         atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
 360         devdata->chstat.sent_xmit = 0;
 361         devdata->chstat.got_xmit_done = 0;
 362
 363         if (devdata->server_down_complete_func)
 364                 (*devdata->server_down_complete_func)(devdata->dev, 0);
 365
 366         devdata->server_down = true;
 367         devdata->server_change_state = false;
 368         devdata->server_down_complete_func = NULL;
 369 }
 370
 371 /* visornic_serverdown - Command has notified us that IOPART is down
 372  * @devdata:       Device managed by IOPART.
 373  * @complete_func: Function to call when finished.
 374  *
 375  * Schedule the work needed to handle the server down request. Make sure we
 376  * haven't already handled the server change state event.
 377  *
 378  * Return: 0 if we scheduled the work, negative integer on error.
 379  */
 380 static int visornic_serverdown(struct visornic_devdata *devdata,
 381                                visorbus_state_complete_func complete_func)
 382 {
 383         unsigned long flags;
 384         int err;
 385
 386         spin_lock_irqsave(&devdata->priv_lock, flags);
 387         if (devdata->server_change_state) {
 388                 dev_dbg(&devdata->dev->device, "%s changing state\n",
 389                         __func__);
 390                 err = -EINVAL;
 391                 goto err_unlock;
 392         }
 393         if (devdata->server_down) {
 394                 dev_dbg(&devdata->dev->device, "%s already down\n",
 395                         __func__);
 396                 err = -EINVAL;
 397                 goto err_unlock;
 398         }
 399         if (devdata->going_away) {
 400                 dev_dbg(&devdata->dev->device,
 401                         "%s aborting because device removal pending\n",
 402                         __func__);
 403                 err = -ENODEV;
 404                 goto err_unlock;
 405         }
 406         devdata->server_change_state = true;
 407         devdata->server_down_complete_func = complete_func;
 408         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 409
 410         visornic_serverdown_complete(devdata);
 411         return 0;
 412
 413 err_unlock:
 414         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 415         return err;
 416 }
 417
 418 /* alloc_rcv_buf - alloc rcv buffer to be given to the IO Partition
 419  * @netdev: Network adapter the rcv bufs are attached too.
 420  *
 421  * Create an sk_buff (rcv_buf) that will be passed to the IO Partition
 422  * so that it can write rcv data into our memory space.
 423  *
 424  * Return: Pointer to sk_buff.
 425  */
 426 static struct sk_buff *alloc_rcv_buf(struct net_device *netdev)
 427 {
 428         struct sk_buff *skb;
 429
 430         /* NOTE: the first fragment in each rcv buffer is pointed to by
 431          * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
 432          * in length, so the first frag is large enough to hold 1514.
 433          */
 434         skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
 435         if (!skb)
 436                 return NULL;
 437         skb->dev = netdev;
 438         /* current value of mtu doesn't come into play here; large
 439          * packets will just end up using multiple rcv buffers all of
 440          * same size.
 441          */
 442         skb->len = RCVPOST_BUF_SIZE;
 443         /* alloc_skb already zeroes it out for clarification. */
 444         skb->data_len = 0;
 445         return skb;
 446 }
 447
 448 /* post_skb - post a skb to the IO Partition
 449  * @cmdrsp:  Cmdrsp packet to be send to the IO Partition.
 450  * @devdata: visornic_devdata to post the skb to.
 451  * @skb:     Skb to give to the IO partition.
 452  *
 453  * Return: 0 on success, negative integer on error.
 454  */
 455 static int post_skb(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
 456                     struct sk_buff *skb)
 457 {
 458         int err;
 459
 460         cmdrsp->net.buf = skb;
 461         cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
 462         cmdrsp->net.rcvpost.frag.pi_off =
 463                 (unsigned long)skb->data & PI_PAGE_MASK;
 464         cmdrsp->net.rcvpost.frag.pi_len = skb->len;
 465         cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
 466
 467         if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) > PI_PAGE_SIZE)
 468                 return -EINVAL;
 469
 470         cmdrsp->net.type = NET_RCV_POST;
 471         cmdrsp->cmdtype = CMD_NET_TYPE;
 472         err = visorchannel_signalinsert(devdata->dev->visorchannel,
 473                                         IOCHAN_TO_IOPART,
 474                                         cmdrsp);
 475         if (err) {
 476                 devdata->chstat.sent_post_failed++;
 477                 return err;
 478         }
 479
 480         atomic_inc(&devdata->num_rcvbuf_in_iovm);
 481         devdata->chstat.sent_post++;
 482         return 0;
 483 }
 484
 485 /* send_enbdis - Send NET_RCV_ENBDIS to IO Partition
 486  * @netdev:  Netdevice we are enabling/disabling, used as context return value.
 487  * @state:   Enable = 1/disable = 0.
 488  * @devdata: Visornic device we are enabling/disabling.
 489  *
 490  * Send the enable/disable message to the IO Partition.
 491  *
 492  * Return: 0 on success, negative integer on error.
 493  */
 494 static int send_enbdis(struct net_device *netdev, int state,
 495                        struct visornic_devdata *devdata)
 496 {
 497         int err;
 498
 499         devdata->cmdrsp_rcv->net.enbdis.enable = state;
 500         devdata->cmdrsp_rcv->net.enbdis.context = netdev;
 501         devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
 502         devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
 503         err = visorchannel_signalinsert(devdata->dev->visorchannel,
 504                                         IOCHAN_TO_IOPART,
 505                                         devdata->cmdrsp_rcv);
 506         if (err)
 507                 return err;
 508         devdata->chstat.sent_enbdis++;
 509         return 0;
 510 }
 511
 512 /* visornic_disable_with_timeout - disable network adapter
 513  * @netdev:  netdevice to disable.
 514  * @timeout: Timeout to wait for disable.
 515  *
 516  * Disable the network adapter and inform the IO Partition that we are disabled.
 517  * Reclaim memory from rcv bufs.
 518  *
 519  * Return: 0 on success, negative integer on failure of IO Partition responding.
 520  */
 521 static int visornic_disable_with_timeout(struct net_device *netdev,
 522                                          const int timeout)
 523 {
 524         struct visornic_devdata *devdata = netdev_priv(netdev);
 525         int i;
 526         unsigned long flags;
 527         int wait = 0;
 528         int err;
 529
 530         /* send a msg telling the other end we are stopping incoming pkts */
 531         spin_lock_irqsave(&devdata->priv_lock, flags);
 532         devdata->enabled = 0;
 533         /* must wait for ack */
 534         devdata->enab_dis_acked = 0;
 535         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 536
 537         /* send disable and wait for ack -- don't hold lock when sending
 538          * disable because if the queue is full, insert might sleep.
 539          * If an error occurs, don't wait for the timeout.
 540          */
 541         err = send_enbdis(netdev, 0, devdata);
 542         if (err)
 543                 return err;
 544
 545         /* wait for ack to arrive before we try to free rcv buffers
 546          * NOTE: the other end automatically unposts the rcv buffers when
 547          * when it gets a disable.
 548          */
 549         spin_lock_irqsave(&devdata->priv_lock, flags);
 550         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
 551                (wait < timeout)) {
 552                 if (devdata->enab_dis_acked)
 553                         break;
 554                 if (devdata->server_down || devdata->server_change_state) {
 555                         dev_dbg(&netdev->dev, "%s server went away\n",
 556                                 __func__);
 557                         break;
 558                 }
 559                 set_current_state(TASK_INTERRUPTIBLE);
 560                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 561                 wait += schedule_timeout(msecs_to_jiffies(10));
 562                 spin_lock_irqsave(&devdata->priv_lock, flags);
 563         }
 564
 565         /* Wait for usage to go to 1 (no other users) before freeing
 566          * rcv buffers
 567          */
 568         if (atomic_read(&devdata->usage) > 1) {
 569                 while (1) {
 570                         set_current_state(TASK_INTERRUPTIBLE);
 571                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 572                         schedule_timeout(msecs_to_jiffies(10));
 573                         spin_lock_irqsave(&devdata->priv_lock, flags);
 574                         if (atomic_read(&devdata->usage))
 575                                 break;
 576                 }
 577         }
 578         /* we've set enabled to 0, so we can give up the lock. */
 579         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 580
 581         /* stop the transmit queue so nothing more can be transmitted */
 582         netif_stop_queue(netdev);
 583
 584         napi_disable(&devdata->napi);
 585
 586         skb_queue_purge(&devdata->xmitbufhead);
 587
 588         /* Free rcv buffers - other end has automatically unposed them on
 589          * disable
 590          */
 591         for (i = 0; i < devdata->num_rcv_bufs; i++) {
 592                 if (devdata->rcvbuf[i]) {
 593                         kfree_skb(devdata->rcvbuf[i]);
 594                         devdata->rcvbuf[i] = NULL;
 595                 }
 596         }
 597
 598         return 0;
 599 }
 600
 601 /* init_rcv_bufs - initialize receive buffs and send them to the IO Partition
 602  * @netdev:  struct netdevice.
 603  * @devdata: visornic_devdata.
 604  *
 605  * Allocate rcv buffers and post them to the IO Partition.
 606  *
 607  * Return: 0 on success, negative integer on failure.
 608  */
 609 static int init_rcv_bufs(struct net_device *netdev,
 610                          struct visornic_devdata *devdata)
 611 {
 612         int i, j, count, err;
 613
 614         /* allocate fixed number of receive buffers to post to uisnic
 615          * post receive buffers after we've allocated a required amount
 616          */
 617         for (i = 0; i < devdata->num_rcv_bufs; i++) {
 618                 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
 619                 /* if we failed to allocate one let us stop */
 620                 if (!devdata->rcvbuf[i])
 621                         break;
 622         }
 623         /* couldn't even allocate one -- bail out */
 624         if (i == 0)
 625                 return -ENOMEM;
 626         count = i;
 627
 628         /* Ensure we can alloc 2/3rd of the requested number of buffers.
 629          * 2/3 is an arbitrary choice; used also in ndis init.c
 630          */
 631         if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
 632                 /* free receive buffers we did alloc and then bail out */
 633                 for (i = 0; i < count; i++) {
 634                         kfree_skb(devdata->rcvbuf[i]);
 635                         devdata->rcvbuf[i] = NULL;
 636                 }
 637                 return -ENOMEM;
 638         }
 639
 640         /* post receive buffers to receive incoming input - without holding
 641          * lock - we've not enabled nor started the queue so there shouldn't
 642          * be any rcv or xmit activity
 643          */
 644         for (i = 0; i < count; i++) {
 645                 err = post_skb(devdata->cmdrsp_rcv, devdata,
 646                                devdata->rcvbuf[i]);
 647                 if (!err)
 648                         continue;
 649
 650                 /* Error handling -
 651                  * If we posted at least one skb, we should return success,
 652                  * but need to free the resources that we have not successfully
 653                  * posted.
 654                  */
 655                 for (j = i; j < count; j++) {
 656                         kfree_skb(devdata->rcvbuf[j]);
 657                         devdata->rcvbuf[j] = NULL;
 658                 }
 659                 if (i == 0)
 660                         return err;
 661                 break;
 662         }
 663
 664         return 0;
 665 }
 666
 667 /* visornic_enable_with_timeout - send enable to IO Partition
 668  * @netdev:  struct net_device.
 669  * @timeout: Time to wait for the ACK from the enable.
 670  *
 671  * Sends enable to IOVM and inits, and posts receive buffers to IOVM. Timeout is
 672  * defined in msecs (timeout of 0 specifies infinite wait).
 673  *
 674  * Return: 0 on success, negative integer on failure.
 675  */
 676 static int visornic_enable_with_timeout(struct net_device *netdev,
 677                                         const int timeout)
 678 {
 679         int err = 0;
 680         struct visornic_devdata *devdata = netdev_priv(netdev);
 681         unsigned long flags;
 682         int wait = 0;
 683
 684         napi_enable(&devdata->napi);
 685
 686         /* NOTE: the other end automatically unposts the rcv buffers when it
 687          * gets a disable.
 688          */
 689         err = init_rcv_bufs(netdev, devdata);
 690         if (err < 0) {
 691                 dev_err(&netdev->dev,
 692                         "%s failed to init rcv bufs\n", __func__);
 693                 return err;
 694         }
 695
 696         spin_lock_irqsave(&devdata->priv_lock, flags);
 697         devdata->enabled = 1;
 698         devdata->enab_dis_acked = 0;
 699
 700         /* now we're ready, let's send an ENB to uisnic but until we get
 701          * an ACK back from uisnic, we'll drop the packets
 702          */
 703         devdata->n_rcv_packets_not_accepted = 0;
 704         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 705
 706         /* send enable and wait for ack -- don't hold lock when sending enable
 707          * because if the queue is full, insert might sleep. If an error
 708          * occurs error out.
 709          */
 710         err = send_enbdis(netdev, 1, devdata);
 711         if (err)
 712                 return err;
 713
 714         spin_lock_irqsave(&devdata->priv_lock, flags);
 715         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
 716                (wait < timeout)) {
 717                 if (devdata->enab_dis_acked)
 718                         break;
 719                 if (devdata->server_down || devdata->server_change_state) {
 720                         dev_dbg(&netdev->dev, "%s server went away\n",
 721                                 __func__);
 722                         break;
 723                 }
 724                 set_current_state(TASK_INTERRUPTIBLE);
 725                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 726                 wait += schedule_timeout(msecs_to_jiffies(10));
 727                 spin_lock_irqsave(&devdata->priv_lock, flags);
 728         }
 729
 730         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 731
 732         if (!devdata->enab_dis_acked) {
 733                 dev_err(&netdev->dev, "%s missing ACK\n", __func__);
 734                 return -EIO;
 735         }
 736
 737         netif_start_queue(netdev);
 738         return 0;
 739 }
 740
 741 /* visornic_timeout_reset - handle xmit timeout resets
 742  * @work: Work item that scheduled the work.
 743  *
 744  * Transmit timeouts are typically handled by resetting the device for our
 745  * virtual NIC; we will send a disable and enable to the IOVM. If it doesn't
 746  * respond, we will trigger a serverdown.
 747  */
 748 static void visornic_timeout_reset(struct work_struct *work)
 749 {
 750         struct visornic_devdata *devdata;
 751         struct net_device *netdev;
 752         int response = 0;
 753
 754         devdata = container_of(work, struct visornic_devdata, timeout_reset);
 755         netdev = devdata->netdev;
 756
 757         rtnl_lock();
 758         if (!netif_running(netdev)) {
 759                 rtnl_unlock();
 760                 return;
 761         }
 762
 763         response = visornic_disable_with_timeout(netdev,
 764                                                  VISORNIC_INFINITE_RSP_WAIT);
 765         if (response)
 766                 goto call_serverdown;
 767
 768         response = visornic_enable_with_timeout(netdev,
 769                                                 VISORNIC_INFINITE_RSP_WAIT);
 770         if (response)
 771                 goto call_serverdown;
 772
 773         rtnl_unlock();
 774
 775         return;
 776
 777 call_serverdown:
 778         visornic_serverdown(devdata, NULL);
 779         rtnl_unlock();
 780 }
 781
 782 /* visornic_open - enable the visornic device and mark the queue started
 783  * @netdev: netdevice to start.
 784  *
 785  * Enable the device and start the transmit queue.
 786  *
 787  * Return: 0 on success.
 788  */
 789 static int visornic_open(struct net_device *netdev)
 790 {
 791         visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
 792         return 0;
 793 }
 794
 795 /* visornic_close - disables the visornic device and stops the queues
 796  * @netdev: netdevice to stop.
 797  *
 798  * Disable the device and stop the transmit queue.
 799  *
 800  * Return 0 on success.
 801  */
 802 static int visornic_close(struct net_device *netdev)
 803 {
 804         visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
 805         return 0;
 806 }
 807
 808 /* devdata_xmits_outstanding - compute outstanding xmits
 809  * @devdata: visornic_devdata for device
 810  *
 811  * Return: Long integer representing the number of outstanding xmits.
 812  */
 813 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
 814 {
 815         if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
 816                 return devdata->chstat.sent_xmit -
 817                         devdata->chstat.got_xmit_done;
 818         return (ULONG_MAX - devdata->chstat.got_xmit_done
 819                 + devdata->chstat.sent_xmit + 1);
 820 }
 821
 822 /* vnic_hit_high_watermark
 823  * @devdata:        Indicates visornic device we are checking.
 824  * @high_watermark: Max num of unacked xmits we will tolerate before we will
 825  *                  start throttling.
 826  *
 827  * Return: True iff the number of unacked xmits sent to the IO Partition is >=
 828  *         high_watermark. False otherwise.
 829  */
 830 static bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
 831                                     ulong high_watermark)
 832 {
 833         return (devdata_xmits_outstanding(devdata) >= high_watermark);
 834 }
 835
 836 /* vnic_hit_low_watermark
 837  * @devdata:       Indicates visornic device we are checking.
 838  * @low_watermark: We will wait until the num of unacked xmits drops to this
 839  *                 value or lower before we start transmitting again.
 840  *
 841  * Return: True iff the number of unacked xmits sent to the IO Partition is <=
 842  *         low_watermark.
 843  */
 844 static bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
 845                                    ulong low_watermark)
 846 {
 847         return (devdata_xmits_outstanding(devdata) <= low_watermark);
 848 }
 849
 850 /* visornic_xmit - send a packet to the IO Partition
 851  * @skb:    Packet to be sent.
 852  * @netdev: Net device the packet is being sent from.
 853  *
 854  * Convert the skb to a cmdrsp so the IO Partition can understand it, and send
 855  * the XMIT command to the IO Partition for processing. This function is
 856  * protected from concurrent calls by a spinlock xmit_lock in the net_device
 857  * struct. As soon as the function returns, it can be called again.
 858  *
 859  * Return: NETDEV_TX_OK.
 860  */
 861 static int visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
 862 {
 863         struct visornic_devdata *devdata;
 864         int len, firstfraglen, padlen;
 865         struct uiscmdrsp *cmdrsp = NULL;
 866         unsigned long flags;
 867         int err;
 868
 869         devdata = netdev_priv(netdev);
 870         spin_lock_irqsave(&devdata->priv_lock, flags);
 871
 872         if (netif_queue_stopped(netdev) || devdata->server_down ||
 873             devdata->server_change_state) {
 874                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 875                 devdata->busy_cnt++;
 876                 dev_dbg(&netdev->dev,
 877                         "%s busy - queue stopped\n", __func__);
 878                 kfree_skb(skb);
 879                 return NETDEV_TX_OK;
 880         }
 881
 882         /* sk_buff struct is used to host network data throughout all the
 883          * linux network subsystems
 884          */
 885         len = skb->len;
 886
 887         /* skb->len is the FULL length of data (including fragmentary portion)
 888          * skb->data_len is the length of the fragment portion in frags
 889          * skb->len - skb->data_len is size of the 1st fragment in skb->data
 890          * calculate the length of the first fragment that skb->data is
 891          * pointing to
 892          */
 893         firstfraglen = skb->len - skb->data_len;
 894         if (firstfraglen < ETH_HLEN) {
 895                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 896                 devdata->busy_cnt++;
 897                 dev_err(&netdev->dev,
 898                         "%s busy - first frag too small (%d)\n",
 899                         __func__, firstfraglen);
 900                 kfree_skb(skb);
 901                 return NETDEV_TX_OK;
 902         }
 903
 904         if (len < ETH_MIN_PACKET_SIZE &&
 905             ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
 906                 /* pad the packet out to minimum size */
 907                 padlen = ETH_MIN_PACKET_SIZE - len;
 908                 memset(&skb->data[len], 0, padlen);
 909                 skb->tail += padlen;
 910                 skb->len += padlen;
 911                 len += padlen;
 912                 firstfraglen += padlen;
 913         }
 914
 915         cmdrsp = devdata->xmit_cmdrsp;
 916         /* clear cmdrsp */
 917         memset(cmdrsp, 0, SIZEOF_CMDRSP);
 918         cmdrsp->net.type = NET_XMIT;
 919         cmdrsp->cmdtype = CMD_NET_TYPE;
 920
 921         /* save the pointer to skb -- we'll need it for completion */
 922         cmdrsp->net.buf = skb;
 923
 924         if (vnic_hit_high_watermark(devdata,
 925                                     devdata->max_outstanding_net_xmits)) {
 926                 /* extra NET_XMITs queued over to IOVM - need to wait */
 927                 devdata->chstat.reject_count++;
 928                 if (!devdata->queuefullmsg_logged &&
 929                     ((devdata->chstat.reject_count & 0x3ff) == 1))
 930                         devdata->queuefullmsg_logged = 1;
 931                 netif_stop_queue(netdev);
 932                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 933                 devdata->busy_cnt++;
 934                 dev_dbg(&netdev->dev,
 935                         "%s busy - waiting for iovm to catch up\n",
 936                         __func__);
 937                 kfree_skb(skb);
 938                 return NETDEV_TX_OK;
 939         }
 940         if (devdata->queuefullmsg_logged)
 941                 devdata->queuefullmsg_logged = 0;
 942
 943         if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
 944                 cmdrsp->net.xmt.lincsum.valid = 1;
 945                 cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
 946                 if (skb_transport_header(skb) > skb->data) {
 947                         cmdrsp->net.xmt.lincsum.hrawoff =
 948                                 skb_transport_header(skb) - skb->data;
 949                         cmdrsp->net.xmt.lincsum.hrawoff = 1;
 950                 }
 951                 if (skb_network_header(skb) > skb->data) {
 952                         cmdrsp->net.xmt.lincsum.nhrawoff =
 953                                 skb_network_header(skb) - skb->data;
 954                         cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
 955                 }
 956                 cmdrsp->net.xmt.lincsum.csum = skb->csum;
 957         } else {
 958                 cmdrsp->net.xmt.lincsum.valid = 0;
 959         }
 960
 961         /* save off the length of the entire data packet */
 962         cmdrsp->net.xmt.len = len;
 963
 964         /* copy ethernet header from first frag into ocmdrsp
 965          * - everything else will be pass in frags & DMA'ed
 966          */
 967         memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HLEN);
 968
 969         /* copy frags info - from skb->data we need to only provide access
 970          * beyond eth header
 971          */
 972         cmdrsp->net.xmt.num_frags =
 973                 visor_copy_fragsinfo_from_skb(skb, firstfraglen,
 974                                               MAX_PHYS_INFO,
 975                                               cmdrsp->net.xmt.frags);
 976         if (cmdrsp->net.xmt.num_frags < 0) {
 977                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 978                 devdata->busy_cnt++;
 979                 dev_err(&netdev->dev,
 980                         "%s busy - copy frags failed\n", __func__);
 981                 kfree_skb(skb);
 982                 return NETDEV_TX_OK;
 983         }
 984
 985         err = visorchannel_signalinsert(devdata->dev->visorchannel,
 986                                         IOCHAN_TO_IOPART, cmdrsp);
 987         if (err) {
 988                 netif_stop_queue(netdev);
 989                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 990                 devdata->busy_cnt++;
 991                 dev_dbg(&netdev->dev,
 992                         "%s busy - signalinsert failed\n", __func__);
 993                 kfree_skb(skb);
 994                 return NETDEV_TX_OK;
 995         }
 996
 997         /* Track the skbs that have been sent to the IOVM for XMIT */
 998         skb_queue_head(&devdata->xmitbufhead, skb);
 999
1000         /* update xmt stats */
1001         devdata->net_stats.tx_packets++;
1002         devdata->net_stats.tx_bytes += skb->len;
1003         devdata->chstat.sent_xmit++;
1004
1005         /* check if we have hit the high watermark for netif_stop_queue() */
1006         if (vnic_hit_high_watermark(devdata,
1007                                     devdata->upper_threshold_net_xmits)) {
1008                 /* extra NET_XMITs queued over to IOVM - need to wait */
1009                 /* stop queue - call netif_wake_queue() after lower threshold */
1010                 netif_stop_queue(netdev);
1011                 dev_dbg(&netdev->dev,
1012                         "%s busy - invoking iovm flow control\n",
1013                         __func__);
1014                 devdata->flow_control_upper_hits++;
1015         }
1016         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1017
1018         /* skb will be freed when we get back NET_XMIT_DONE */
1019         return NETDEV_TX_OK;
1020 }
1021
1022 /* visornic_get_stats - returns net_stats of the visornic device
1023  * @netdev: netdevice.
1024  *
1025  * Return: Pointer to the net_device_stats struct for the device.
1026  */
1027 static struct net_device_stats *visornic_get_stats(struct net_device *netdev)
1028 {
1029         struct visornic_devdata *devdata = netdev_priv(netdev);
1030
1031         return &devdata->net_stats;
1032 }
1033
1034 /* visornic_change_mtu - changes mtu of device
1035  * @netdev: netdevice.
1036  * @new_mtu: Value of new mtu.
1037  *
1038  * The device's MTU cannot be changed by system; it must be changed via a
1039  * CONTROLVM message. All vnics and pnics in a switch have to have the same MTU
1040  * for everything to work. Currently not supported.
1041  *
1042  * Return: -EINVAL.
1043  */
1044 static int visornic_change_mtu(struct net_device *netdev, int new_mtu)
1045 {
1046         return -EINVAL;
1047 }
1048
1049 /* visornic_set_multi - set visornic device flags
1050  * @netdev: netdevice.
1051  *
1052  * The only flag we currently support is IFF_PROMISC.
1053  */
1054 static void visornic_set_multi(struct net_device *netdev)
1055 {
1056         struct uiscmdrsp *cmdrsp;
1057         struct visornic_devdata *devdata = netdev_priv(netdev);
1058         int err = 0;
1059
1060         if (devdata->old_flags == netdev->flags)
1061                 return;
1062
1063         if ((netdev->flags & IFF_PROMISC) ==
1064             (devdata->old_flags & IFF_PROMISC))
1065                 goto out_save_flags;
1066
1067         cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1068         if (!cmdrsp)
1069                 return;
1070         cmdrsp->cmdtype = CMD_NET_TYPE;
1071         cmdrsp->net.type = NET_RCV_PROMISC;
1072         cmdrsp->net.enbdis.context = netdev;
1073         cmdrsp->net.enbdis.enable =
1074                 netdev->flags & IFF_PROMISC;
1075         err = visorchannel_signalinsert(devdata->dev->visorchannel,
1076                                         IOCHAN_TO_IOPART,
1077                                         cmdrsp);
1078         kfree(cmdrsp);
1079         if (err)
1080                 return;
1081
1082 out_save_flags:
1083         devdata->old_flags = netdev->flags;
1084 }
1085
1086 /* visornic_xmit_timeout - request to timeout the xmit
1087  * @netdev: netdevice.
1088  *
1089  * Queue the work and return. Make sure we have not already been informed that
1090  * the IO Partition is gone; if so, we will have already timed-out the xmits.
1091  */
1092 static void visornic_xmit_timeout(struct net_device *netdev)
1093 {
1094         struct visornic_devdata *devdata = netdev_priv(netdev);
1095         unsigned long flags;
1096
1097         spin_lock_irqsave(&devdata->priv_lock, flags);
1098         if (devdata->going_away) {
1099                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1100                 dev_dbg(&devdata->dev->device,
1101                         "%s aborting because device removal pending\n",
1102                         __func__);
1103                 return;
1104         }
1105
1106         /* Ensure that a ServerDown message hasn't been received */
1107         if (!devdata->enabled ||
1108             (devdata->server_down && !devdata->server_change_state)) {
1109                 dev_dbg(&netdev->dev, "%s no processing\n",
1110                         __func__);
1111                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1112                 return;
1113         }
1114         schedule_work(&devdata->timeout_reset);
1115         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1116 }
1117
1118 /* repost_return - repost rcv bufs that have come back
1119  * @cmdrsp: IO channel command struct to post.
1120  * @devdata: Visornic devdata for the device.
1121  * @skb: Socket buffer.
1122  * @netdev: netdevice.
1123  *
1124  * Repost rcv buffers that have been returned to us when we are finished
1125  * with them.
1126  *
1127  * Return: 0 for success, negative integer on error.
1128  */
1129 static int repost_return(struct uiscmdrsp *cmdrsp,
1130                          struct visornic_devdata *devdata,
1131                          struct sk_buff *skb, struct net_device *netdev)
1132 {
1133         struct net_pkt_rcv copy;
1134         int i = 0, cc, numreposted;
1135         int found_skb = 0;
1136         int status = 0;
1137
1138         copy = cmdrsp->net.rcv;
1139         switch (copy.numrcvbufs) {
1140         case 0:
1141                 devdata->n_rcv0++;
1142                 break;
1143         case 1:
1144                 devdata->n_rcv1++;
1145                 break;
1146         case 2:
1147                 devdata->n_rcv2++;
1148                 break;
1149         default:
1150                 devdata->n_rcvx++;
1151                 break;
1152         }
1153         for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1154                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1155                         if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1156                                 continue;
1157
1158                         if ((skb) && devdata->rcvbuf[i] == skb) {
1159                                 devdata->found_repost_rcvbuf_cnt++;
1160                                 found_skb = 1;
1161                                 devdata->repost_found_skb_cnt++;
1162                         }
1163                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1164                         if (!devdata->rcvbuf[i]) {
1165                                 devdata->num_rcv_bufs_could_not_alloc++;
1166                                 devdata->alloc_failed_in_repost_rtn_cnt++;
1167                                 status = -ENOMEM;
1168                                 break;
1169                         }
1170                         status = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1171                         if (status) {
1172                                 kfree_skb(devdata->rcvbuf[i]);
1173                                 devdata->rcvbuf[i] = NULL;
1174                                 break;
1175                         }
1176                         numreposted++;
1177                         break;
1178                 }
1179         }
1180         if (numreposted != copy.numrcvbufs) {
1181                 devdata->n_repost_deficit++;
1182                 status = -EINVAL;
1183         }
1184         if (skb) {
1185                 if (found_skb) {
1186                         kfree_skb(skb);
1187                 } else {
1188                         status = -EINVAL;
1189                         devdata->bad_rcv_buf++;
1190                 }
1191         }
1192         return status;
1193 }
1194
1195 /* visornic_rx - handle receive packets coming back from IO Partition
1196  * @cmdrsp: Receive packet returned from IO Partition.
1197  *
1198  * Got a receive packet back from the IO Partition; handle it and send it up
1199  * the stack.
1200
1201  * Return: 1 iff an skb was received, otherwise 0.
1202  */
1203 static int visornic_rx(struct uiscmdrsp *cmdrsp)
1204 {
1205         struct visornic_devdata *devdata;
1206         struct sk_buff *skb, *prev, *curr;
1207         struct net_device *netdev;
1208         int cc, currsize, off;
1209         struct ethhdr *eth;
1210         unsigned long flags;
1211
1212         /* post new rcv buf to the other end using the cmdrsp we have at hand
1213          * post it without holding lock - but we'll use the signal lock to
1214          * synchronize the queue insert the cmdrsp that contains the net.rcv
1215          * is the one we are using to repost, so copy the info we need from it.
1216          */
1217         skb = cmdrsp->net.buf;
1218         netdev = skb->dev;
1219
1220         devdata = netdev_priv(netdev);
1221
1222         spin_lock_irqsave(&devdata->priv_lock, flags);
1223         atomic_dec(&devdata->num_rcvbuf_in_iovm);
1224
1225         /* set length to how much was ACTUALLY received -
1226          * NOTE: rcv_done_len includes actual length of data rcvd
1227          * including ethhdr
1228          */
1229         skb->len = cmdrsp->net.rcv.rcv_done_len;
1230
1231         /* update rcv stats - call it with priv_lock held */
1232         devdata->net_stats.rx_packets++;
1233         devdata->net_stats.rx_bytes += skb->len;
1234
1235         /* test enabled while holding lock */
1236         if (!(devdata->enabled && devdata->enab_dis_acked)) {
1237                 /* don't process it unless we're in enable mode and until
1238                  * we've gotten an ACK saying the other end got our RCV enable
1239                  */
1240                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1241                 repost_return(cmdrsp, devdata, skb, netdev);
1242                 return 0;
1243         }
1244
1245         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1246
1247         /* when skb was allocated, skb->dev, skb->data, skb->len and
1248          * skb->data_len were setup. AND, data has already put into the
1249          * skb (both first frag and in frags pages)
1250          * NOTE: firstfragslen is the amount of data in skb->data and that
1251          * which is not in nr_frags or frag_list. This is now simply
1252          * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1253          * firstfrag & set data_len to show rest see if we have to chain
1254          * frag_list.
1255          */
1256         /* do PRECAUTIONARY check */
1257         if (skb->len > RCVPOST_BUF_SIZE) {
1258                 if (cmdrsp->net.rcv.numrcvbufs < 2) {
1259                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1260                                 dev_err(&devdata->netdev->dev,
1261                                         "repost_return failed");
1262                         return 0;
1263                 }
1264                 /* length rcvd is greater than firstfrag in this skb rcv buf  */
1265                 /* amount in skb->data */
1266                 skb->tail += RCVPOST_BUF_SIZE;
1267                 /* amount that will be in frag_list */
1268                 skb->data_len = skb->len - RCVPOST_BUF_SIZE;
1269         } else {
1270                 /* data fits in this skb - no chaining - do
1271                  * PRECAUTIONARY check
1272                  */
1273                 /* should be 1 */
1274                 if (cmdrsp->net.rcv.numrcvbufs != 1) {
1275                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1276                                 dev_err(&devdata->netdev->dev,
1277                                         "repost_return failed");
1278                         return 0;
1279                 }
1280                 skb->tail += skb->len;
1281                 /* nothing rcvd in frag_list */
1282                 skb->data_len = 0;
1283         }
1284         off = skb_tail_pointer(skb) - skb->data;
1285
1286         /* amount we bumped tail by in the head skb
1287          * it is used to calculate the size of each chained skb below
1288          * it is also used to index into bufline to continue the copy
1289          * (for chansocktwopc)
1290          * if necessary chain the rcv skbs together.
1291          * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1292          * chain the rest to that one.
1293          * - do PRECAUTIONARY check
1294          */
1295         if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1296                 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1297                         dev_err(&devdata->netdev->dev, "repost_return failed");
1298                 return 0;
1299         }
1300
1301         if (cmdrsp->net.rcv.numrcvbufs > 1) {
1302                 /* chain the various rcv buffers into the skb's frag_list. */
1303                 /* Note: off was initialized above  */
1304                 for (cc = 1, prev = NULL;
1305                      cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1306                         curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1307                         curr->next = NULL;
1308                         /* start of list- set head */
1309                         if (!prev)
1310                                 skb_shinfo(skb)->frag_list = curr;
1311                         else
1312                                 prev->next = curr;
1313                         prev = curr;
1314
1315                         /* should we set skb->len and skb->data_len for each
1316                          * buffer being chained??? can't hurt!
1317                          */
1318                         currsize = min(skb->len - off,
1319                                        (unsigned int)RCVPOST_BUF_SIZE);
1320                         curr->len = currsize;
1321                         curr->tail += currsize;
1322                         curr->data_len = 0;
1323                         off += currsize;
1324                 }
1325                 /* assert skb->len == off */
1326                 if (skb->len != off) {
1327                         netdev_err(devdata->netdev,
1328                                    "something wrong; skb->len:%d != off:%d\n",
1329                                    skb->len, off);
1330                 }
1331         }
1332
1333         /* set up packet's protocol type using ethernet header - this
1334          * sets up skb->pkt_type & it also PULLS out the eth header
1335          */
1336         skb->protocol = eth_type_trans(skb, netdev);
1337         eth = eth_hdr(skb);
1338         skb->csum = 0;
1339         skb->ip_summed = CHECKSUM_NONE;
1340
1341         do {
1342                 /* accept all packets */
1343                 if (netdev->flags & IFF_PROMISC)
1344                         break;
1345                 if (skb->pkt_type == PACKET_BROADCAST) {
1346                         /* accept all broadcast packets */
1347                         if (netdev->flags & IFF_BROADCAST)
1348                                 break;
1349                 } else if (skb->pkt_type == PACKET_MULTICAST) {
1350                         if ((netdev->flags & IFF_MULTICAST) &&
1351                             (netdev_mc_count(netdev))) {
1352                                 struct netdev_hw_addr *ha;
1353                                 int found_mc = 0;
1354
1355                                 /* only accept multicast packets that we can
1356                                  * find in our multicast address list
1357                                  */
1358                                 netdev_for_each_mc_addr(ha, netdev) {
1359                                         if (ether_addr_equal(eth->h_dest,
1360                                                              ha->addr)) {
1361                                                 found_mc = 1;
1362                                                 break;
1363                                         }
1364                                 }
1365                                 /* accept pkt, dest matches a multicast addr */
1366                                 if (found_mc)
1367                                         break;
1368                         }
1369                 /* accept packet, h_dest must match vnic  mac address */
1370                 } else if (skb->pkt_type == PACKET_HOST) {
1371                         break;
1372                 } else if (skb->pkt_type == PACKET_OTHERHOST) {
1373                         /* something is not right */
1374                         dev_err(&devdata->netdev->dev,
1375                                 "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1376                                 netdev->name, eth->h_dest, netdev->dev_addr);
1377                 }
1378                 /* drop packet - don't forward it up to OS */
1379                 devdata->n_rcv_packets_not_accepted++;
1380                 repost_return(cmdrsp, devdata, skb, netdev);
1381                 return 0;
1382         } while (0);
1383
1384         netif_receive_skb(skb);
1385         /* netif_rx returns various values, but "in practice most drivers
1386          * ignore the return value
1387          */
1388
1389         skb = NULL;
1390         /* whether the packet got dropped or handled, the skb is freed by
1391          * kernel code, so we shouldn't free it. but we should repost a
1392          * new rcv buffer.
1393          */
1394         repost_return(cmdrsp, devdata, skb, netdev);
1395         return 1;
1396 }
1397
1398 /* devdata_initialize - initialize devdata structure
1399  * @devdata: visornic_devdata structure to initialize.
1400  * @dev:     visorbus_device it belongs to.
1401  *
1402  * Setup initial values for the visornic, based on channel and default values.
1403  *
1404  * Return: A pointer to the devdata structure.
1405  */
1406 static struct visornic_devdata *devdata_initialize(
1407                                         struct visornic_devdata *devdata,
1408                                         struct visor_device *dev)
1409 {
1410         devdata->dev = dev;
1411         devdata->incarnation_id = get_jiffies_64();
1412         return devdata;
1413 }
1414
1415 /* devdata_release - free up references in devdata
1416  * @devdata: Struct to clean up.
1417  */
1418 static void devdata_release(struct visornic_devdata *devdata)
1419 {
1420         kfree(devdata->rcvbuf);
1421         kfree(devdata->cmdrsp_rcv);
1422         kfree(devdata->xmit_cmdrsp);
1423 }
1424
1425 static const struct net_device_ops visornic_dev_ops = {
1426         .ndo_open = visornic_open,
1427         .ndo_stop = visornic_close,
1428         .ndo_start_xmit = visornic_xmit,
1429         .ndo_get_stats = visornic_get_stats,
1430         .ndo_change_mtu = visornic_change_mtu,
1431         .ndo_tx_timeout = visornic_xmit_timeout,
1432         .ndo_set_rx_mode = visornic_set_multi,
1433 };
1434
1435 /* DebugFS code */
1436 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1437                                  size_t len, loff_t *offset)
1438 {
1439         ssize_t bytes_read = 0;
1440         int str_pos = 0;
1441         struct visornic_devdata *devdata;
1442         struct net_device *dev;
1443         char *vbuf;
1444
1445         if (len > MAX_BUF)
1446                 len = MAX_BUF;
1447         vbuf = kzalloc(len, GFP_KERNEL);
1448         if (!vbuf)
1449                 return -ENOMEM;
1450
1451         /* for each vnic channel dump out channel specific data */
1452         rcu_read_lock();
1453         for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1454                 /* Only consider netdevs that are visornic, and are open */
1455                 if (dev->netdev_ops != &visornic_dev_ops ||
1456                     (!netif_queue_stopped(dev)))
1457                         continue;
1458
1459                 devdata = netdev_priv(dev);
1460                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1461                                      "netdev = %s (0x%p), MAC Addr %pM\n",
1462                                      dev->name,
1463                                      dev,
1464                                      dev->dev_addr);
1465                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1466                                      "VisorNic Dev Info = 0x%p\n", devdata);
1467                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1468                                      " num_rcv_bufs = %d\n",
1469                                      devdata->num_rcv_bufs);
1470                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1471                                      " max_outstanding_next_xmits = %lu\n",
1472                                     devdata->max_outstanding_net_xmits);
1473                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1474                                      " upper_threshold_net_xmits = %lu\n",
1475                                      devdata->upper_threshold_net_xmits);
1476                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1477                                      " lower_threshold_net_xmits = %lu\n",
1478                                      devdata->lower_threshold_net_xmits);
1479                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1480                                      " queuefullmsg_logged = %d\n",
1481                                      devdata->queuefullmsg_logged);
1482                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1483                                      " chstat.got_rcv = %lu\n",
1484                                      devdata->chstat.got_rcv);
1485                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1486                                      " chstat.got_enbdisack = %lu\n",
1487                                      devdata->chstat.got_enbdisack);
1488                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1489                                      " chstat.got_xmit_done = %lu\n",
1490                                      devdata->chstat.got_xmit_done);
1491                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1492                                      " chstat.xmit_fail = %lu\n",
1493                                      devdata->chstat.xmit_fail);
1494                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1495                                      " chstat.sent_enbdis = %lu\n",
1496                                      devdata->chstat.sent_enbdis);
1497                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1498                                      " chstat.sent_promisc = %lu\n",
1499                                      devdata->chstat.sent_promisc);
1500                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1501                                      " chstat.sent_post = %lu\n",
1502                                      devdata->chstat.sent_post);
1503                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1504                                      " chstat.sent_post_failed = %lu\n",
1505                                      devdata->chstat.sent_post_failed);
1506                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1507                                      " chstat.sent_xmit = %lu\n",
1508                                      devdata->chstat.sent_xmit);
1509                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1510                                      " chstat.reject_count = %lu\n",
1511                                      devdata->chstat.reject_count);
1512                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1513                                      " chstat.extra_rcvbufs_sent = %lu\n",
1514                                      devdata->chstat.extra_rcvbufs_sent);
1515                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1516                                      " n_rcv0 = %lu\n", devdata->n_rcv0);
1517                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1518                                      " n_rcv1 = %lu\n", devdata->n_rcv1);
1519                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1520                                      " n_rcv2 = %lu\n", devdata->n_rcv2);
1521                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1522                                      " n_rcvx = %lu\n", devdata->n_rcvx);
1523                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1524                                      " num_rcvbuf_in_iovm = %d\n",
1525                                      atomic_read(&devdata->num_rcvbuf_in_iovm));
1526                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1527                                      " alloc_failed_in_if_needed_cnt = %lu\n",
1528                                      devdata->alloc_failed_in_if_needed_cnt);
1529                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1530                                      " alloc_failed_in_repost_rtn_cnt = %lu\n",
1531                                      devdata->alloc_failed_in_repost_rtn_cnt);
1532                 /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1533                  *                   " inner_loop_limit_reached_cnt = %lu\n",
1534                  *                   devdata->inner_loop_limit_reached_cnt);
1535                  */
1536                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1537                                      " found_repost_rcvbuf_cnt = %lu\n",
1538                                      devdata->found_repost_rcvbuf_cnt);
1539                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1540                                      " repost_found_skb_cnt = %lu\n",
1541                                      devdata->repost_found_skb_cnt);
1542                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1543                                      " n_repost_deficit = %lu\n",
1544                                      devdata->n_repost_deficit);
1545                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1546                                      " bad_rcv_buf = %lu\n",
1547                                      devdata->bad_rcv_buf);
1548                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1549                                      " n_rcv_packets_not_accepted = %lu\n",
1550                                      devdata->n_rcv_packets_not_accepted);
1551                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1552                                      " interrupts_rcvd = %llu\n",
1553                                      devdata->interrupts_rcvd);
1554                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1555                                      " interrupts_notme = %llu\n",
1556                                      devdata->interrupts_notme);
1557                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1558                                      " interrupts_disabled = %llu\n",
1559                                      devdata->interrupts_disabled);
1560                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1561                                      " busy_cnt = %llu\n",
1562                                      devdata->busy_cnt);
1563                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1564                                      " flow_control_upper_hits = %llu\n",
1565                                      devdata->flow_control_upper_hits);
1566                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1567                                      " flow_control_lower_hits = %llu\n",
1568                                      devdata->flow_control_lower_hits);
1569                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1570                                      " netif_queue = %s\n",
1571                                      netif_queue_stopped(devdata->netdev) ?
1572                                      "stopped" : "running");
1573                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1574                                      " xmits_outstanding = %lu\n",
1575                                      devdata_xmits_outstanding(devdata));
1576         }
1577         rcu_read_unlock();
1578         bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1579         kfree(vbuf);
1580         return bytes_read;
1581 }
1582
1583 static struct dentry *visornic_debugfs_dir;
1584 static const struct file_operations debugfs_info_fops = {
1585         .read = info_debugfs_read,
1586 };
1587
1588 /* send_rcv_posts_if_needed - send receive buffers to the IO Partition.
1589  * @devdata: Visornic device.
1590  */
1591 static void send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1592 {
1593         int i;
1594         struct net_device *netdev;
1595         struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1596         int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1597         int err;
1598
1599         /* don't do this until vnic is marked ready */
1600         if (!(devdata->enabled && devdata->enab_dis_acked))
1601                 return;
1602
1603         netdev = devdata->netdev;
1604         rcv_bufs_allocated = 0;
1605         /* this code is trying to prevent getting stuck here forever,
1606          * but still retry it if you cant allocate them all this time.
1607          */
1608         cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1609         while (cur_num_rcv_bufs_to_alloc > 0) {
1610                 cur_num_rcv_bufs_to_alloc--;
1611                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1612                         if (devdata->rcvbuf[i])
1613                                 continue;
1614                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1615                         if (!devdata->rcvbuf[i]) {
1616                                 devdata->alloc_failed_in_if_needed_cnt++;
1617                                 break;
1618                         }
1619                         rcv_bufs_allocated++;
1620                         err = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1621                         if (err) {
1622                                 kfree_skb(devdata->rcvbuf[i]);
1623                                 devdata->rcvbuf[i] = NULL;
1624                                 break;
1625                         }
1626                         devdata->chstat.extra_rcvbufs_sent++;
1627                 }
1628         }
1629         devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1630 }
1631
1632 /* drain_resp_queue - drains and ignores all messages from the resp queue
1633  * @cmdrsp:  IO channel command response message.
1634  * @devdata: Visornic device to drain.
1635  */
1636 static void drain_resp_queue(struct uiscmdrsp *cmdrsp,
1637                              struct visornic_devdata *devdata)
1638 {
1639         while (!visorchannel_signalremove(devdata->dev->visorchannel,
1640                                           IOCHAN_FROM_IOPART,
1641                                           cmdrsp))
1642                 ;
1643 }
1644
1645 /* service_resp_queue - drain the response queue
1646  * @cmdrsp:  IO channel command response message.
1647  * @devdata: Visornic device to drain.
1648  * @rx_work_done:
1649  * @budget:
1650  *
1651  * Drain the response queue of any responses from the IO Partition. Process the
1652  * responses as we get them.
1653  */
1654 static void service_resp_queue(struct uiscmdrsp *cmdrsp,
1655                                struct visornic_devdata *devdata,
1656                                int *rx_work_done, int budget)
1657 {
1658         unsigned long flags;
1659         struct net_device *netdev;
1660
1661         while (*rx_work_done < budget) {
1662                 /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1663                  * moment
1664                  */
1665                 /* queue empty */
1666                 if (visorchannel_signalremove(devdata->dev->visorchannel,
1667                                               IOCHAN_FROM_IOPART,
1668                                               cmdrsp))
1669                         break;
1670
1671                 switch (cmdrsp->net.type) {
1672                 case NET_RCV:
1673                         devdata->chstat.got_rcv++;
1674                         /* process incoming packet */
1675                         *rx_work_done += visornic_rx(cmdrsp);
1676                         break;
1677                 case NET_XMIT_DONE:
1678                         spin_lock_irqsave(&devdata->priv_lock, flags);
1679                         devdata->chstat.got_xmit_done++;
1680                         if (cmdrsp->net.xmtdone.xmt_done_result)
1681                                 devdata->chstat.xmit_fail++;
1682                         /* only call queue wake if we stopped it */
1683                         netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1684                         /* ASSERT netdev == vnicinfo->netdev; */
1685                         if (netdev == devdata->netdev &&
1686                             netif_queue_stopped(netdev)) {
1687                                 /* check if we have crossed the lower watermark
1688                                  * for netif_wake_queue()
1689                                  */
1690                                 if (vnic_hit_low_watermark
1691                                     (devdata,
1692                                      devdata->lower_threshold_net_xmits)) {
1693                                         /* enough NET_XMITs completed
1694                                          * so can restart netif queue
1695                                          */
1696                                         netif_wake_queue(netdev);
1697                                         devdata->flow_control_lower_hits++;
1698                                 }
1699                         }
1700                         skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1701                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1702                         kfree_skb(cmdrsp->net.buf);
1703                         break;
1704                 case NET_RCV_ENBDIS_ACK:
1705                         devdata->chstat.got_enbdisack++;
1706                         netdev = (struct net_device *)
1707                         cmdrsp->net.enbdis.context;
1708                         spin_lock_irqsave(&devdata->priv_lock, flags);
1709                         devdata->enab_dis_acked = 1;
1710                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1711
1712                         if (devdata->server_down &&
1713                             devdata->server_change_state) {
1714                                 /* Inform Linux that the link is up */
1715                                 devdata->server_down = false;
1716                                 devdata->server_change_state = false;
1717                                 netif_wake_queue(netdev);
1718                                 netif_carrier_on(netdev);
1719                         }
1720                         break;
1721                 case NET_CONNECT_STATUS:
1722                         netdev = devdata->netdev;
1723                         if (cmdrsp->net.enbdis.enable == 1) {
1724                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1725                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1726                                 spin_unlock_irqrestore(&devdata->priv_lock,
1727                                                        flags);
1728                                 netif_wake_queue(netdev);
1729                                 netif_carrier_on(netdev);
1730                         } else {
1731                                 netif_stop_queue(netdev);
1732                                 netif_carrier_off(netdev);
1733                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1734                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1735                                 spin_unlock_irqrestore(&devdata->priv_lock,
1736                                                        flags);
1737                         }
1738                         break;
1739                 default:
1740                         break;
1741                 }
1742                 /* cmdrsp is now available for reuse  */
1743         }
1744 }
1745
1746 static int visornic_poll(struct napi_struct *napi, int budget)
1747 {
1748         struct visornic_devdata *devdata = container_of(napi,
1749                                                         struct visornic_devdata,
1750                                                         napi);
1751         int rx_count = 0;
1752
1753         send_rcv_posts_if_needed(devdata);
1754         service_resp_queue(devdata->cmdrsp, devdata, &rx_count, budget);
1755
1756         /* If there aren't any more packets to receive stop the poll */
1757         if (rx_count < budget)
1758                 napi_complete_done(napi, rx_count);
1759
1760         return rx_count;
1761 }
1762
1763 /* poll_for_irq - checks the status of the response queue
1764  * @v: Void pointer to the visronic devdata struct.
1765  *
1766  * Main function of the vnic_incoming thread. Periodically check the response
1767  * queue and drain it if needed.
1768  */
1769 static void poll_for_irq(unsigned long v)
1770 {
1771         struct visornic_devdata *devdata = (struct visornic_devdata *)v;
1772
1773         if (!visorchannel_signalempty(
1774                                    devdata->dev->visorchannel,
1775                                    IOCHAN_FROM_IOPART))
1776                 napi_schedule(&devdata->napi);
1777
1778         atomic_set(&devdata->interrupt_rcvd, 0);
1779
1780         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1781 }
1782
1783 /* visornic_probe - probe function for visornic devices
1784  * @dev: The visor device discovered.
1785  *
1786  * Called when visorbus discovers a visornic device on its bus. It creates a new
1787  * visornic ethernet adapter.
1788  *
1789  * Return: 0 on success, or negative integer on error.
1790  */
1791 static int visornic_probe(struct visor_device *dev)
1792 {
1793         struct visornic_devdata *devdata = NULL;
1794         struct net_device *netdev = NULL;
1795         int err;
1796         int channel_offset = 0;
1797         u64 features;
1798
1799         netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1800         if (!netdev) {
1801                 dev_err(&dev->device,
1802                         "%s alloc_etherdev failed\n", __func__);
1803                 return -ENOMEM;
1804         }
1805
1806         netdev->netdev_ops = &visornic_dev_ops;
1807         netdev->watchdog_timeo = 5 * HZ;
1808         SET_NETDEV_DEV(netdev, &dev->device);
1809
1810         /* Get MAC address from channel and read it into the device. */
1811         netdev->addr_len = ETH_ALEN;
1812         channel_offset = offsetof(struct visor_io_channel, vnic.macaddr);
1813         err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1814                                     ETH_ALEN);
1815         if (err < 0) {
1816                 dev_err(&dev->device,
1817                         "%s failed to get mac addr from chan (%d)\n",
1818                         __func__, err);
1819                 goto cleanup_netdev;
1820         }
1821
1822         devdata = devdata_initialize(netdev_priv(netdev), dev);
1823         if (!devdata) {
1824                 dev_err(&dev->device,
1825                         "%s devdata_initialize failed\n", __func__);
1826                 err = -ENOMEM;
1827                 goto cleanup_netdev;
1828         }
1829         /* don't trust messages laying around in the channel */
1830         drain_resp_queue(devdata->cmdrsp, devdata);
1831
1832         devdata->netdev = netdev;
1833         dev_set_drvdata(&dev->device, devdata);
1834         init_waitqueue_head(&devdata->rsp_queue);
1835         spin_lock_init(&devdata->priv_lock);
1836         /* not yet */
1837         devdata->enabled = 0;
1838         atomic_set(&devdata->usage, 1);
1839
1840         /* Setup rcv bufs */
1841         channel_offset = offsetof(struct visor_io_channel, vnic.num_rcv_bufs);
1842         err = visorbus_read_channel(dev, channel_offset,
1843                                     &devdata->num_rcv_bufs, 4);
1844         if (err) {
1845                 dev_err(&dev->device,
1846                         "%s failed to get #rcv bufs from chan (%d)\n",
1847                         __func__, err);
1848                 goto cleanup_netdev;
1849         }
1850
1851         devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1852                                   sizeof(struct sk_buff *), GFP_KERNEL);
1853         if (!devdata->rcvbuf) {
1854                 err = -ENOMEM;
1855                 goto cleanup_netdev;
1856         }
1857
1858         /* set the net_xmit outstanding threshold
1859          * always leave two slots open but you should have 3 at a minimum
1860          * note that max_outstanding_net_xmits must be > 0
1861          */
1862         devdata->max_outstanding_net_xmits =
1863                 max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1864         devdata->upper_threshold_net_xmits =
1865                 max_t(unsigned long,
1866                       2, (devdata->max_outstanding_net_xmits - 1));
1867         devdata->lower_threshold_net_xmits =
1868                 max_t(unsigned long,
1869                       1, (devdata->max_outstanding_net_xmits / 2));
1870
1871         skb_queue_head_init(&devdata->xmitbufhead);
1872
1873         /* create a cmdrsp we can use to post and unpost rcv buffers */
1874         devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1875         if (!devdata->cmdrsp_rcv) {
1876                 err = -ENOMEM;
1877                 goto cleanup_rcvbuf;
1878         }
1879         devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1880         if (!devdata->xmit_cmdrsp) {
1881                 err = -ENOMEM;
1882                 goto cleanup_cmdrsp_rcv;
1883         }
1884         INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1885         devdata->server_down = false;
1886         devdata->server_change_state = false;
1887
1888         /*set the default mtu */
1889         channel_offset = offsetof(struct visor_io_channel, vnic.mtu);
1890         err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1891         if (err) {
1892                 dev_err(&dev->device,
1893                         "%s failed to get mtu from chan (%d)\n",
1894                         __func__, err);
1895                 goto cleanup_xmit_cmdrsp;
1896         }
1897
1898         /* TODO: Setup Interrupt information */
1899         /* Let's start our threads to get responses */
1900         netif_napi_add(netdev, &devdata->napi, visornic_poll, NAPI_WEIGHT);
1901
1902         setup_timer(&devdata->irq_poll_timer, poll_for_irq,
1903                     (unsigned long)devdata);
1904         /* Note: This time has to start running before the while
1905          * loop below because the napi routine is responsible for
1906          * setting enab_dis_acked
1907          */
1908         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1909
1910         channel_offset = offsetof(struct visor_io_channel,
1911                                   channel_header.features);
1912         err = visorbus_read_channel(dev, channel_offset, &features, 8);
1913         if (err) {
1914                 dev_err(&dev->device,
1915                         "%s failed to get features from chan (%d)\n",
1916                         __func__, err);
1917                 goto cleanup_napi_add;
1918         }
1919
1920         features |= VISOR_CHANNEL_IS_POLLING;
1921         features |= VISOR_DRIVER_ENHANCED_RCVBUF_CHECKING;
1922         err = visorbus_write_channel(dev, channel_offset, &features, 8);
1923         if (err) {
1924                 dev_err(&dev->device,
1925                         "%s failed to set features in chan (%d)\n",
1926                         __func__, err);
1927                 goto cleanup_napi_add;
1928         }
1929
1930         /* Note: Interrupts have to be enable before the while
1931          * loop below because the napi routine is responsible for
1932          * setting enab_dis_acked
1933          */
1934         visorbus_enable_channel_interrupts(dev);
1935
1936         err = register_netdev(netdev);
1937         if (err) {
1938                 dev_err(&dev->device,
1939                         "%s register_netdev failed (%d)\n", __func__, err);
1940                 goto cleanup_napi_add;
1941         }
1942
1943         /* create debug/sysfs directories */
1944         devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1945                                                       visornic_debugfs_dir);
1946         if (!devdata->eth_debugfs_dir) {
1947                 dev_err(&dev->device,
1948                         "%s debugfs_create_dir %s failed\n",
1949                         __func__, netdev->name);
1950                 err = -ENOMEM;
1951                 goto cleanup_register_netdev;
1952         }
1953
1954         dev_info(&dev->device, "%s success netdev=%s\n",
1955                  __func__, netdev->name);
1956         return 0;
1957
1958 cleanup_register_netdev:
1959         unregister_netdev(netdev);
1960
1961 cleanup_napi_add:
1962         del_timer_sync(&devdata->irq_poll_timer);
1963         netif_napi_del(&devdata->napi);
1964
1965 cleanup_xmit_cmdrsp:
1966         kfree(devdata->xmit_cmdrsp);
1967
1968 cleanup_cmdrsp_rcv:
1969         kfree(devdata->cmdrsp_rcv);
1970
1971 cleanup_rcvbuf:
1972         kfree(devdata->rcvbuf);
1973
1974 cleanup_netdev:
1975         free_netdev(netdev);
1976         return err;
1977 }
1978
1979 /* host_side_disappeared - IO Partition is gone
1980  * @devdata: Device object.
1981  *
1982  * IO partition servicing this device is gone; do cleanup.
1983  */
1984 static void host_side_disappeared(struct visornic_devdata *devdata)
1985 {
1986         unsigned long flags;
1987
1988         spin_lock_irqsave(&devdata->priv_lock, flags);
1989         /* indicate device destroyed */
1990         devdata->dev = NULL;
1991         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1992 }
1993
1994 /* visornic_remove - called when visornic dev goes away
1995  * @dev: Visornic device that is being removed.
1996  *
1997  * Called when DEVICE_DESTROY gets called to remove device.
1998  */
1999 static void visornic_remove(struct visor_device *dev)
2000 {
2001         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2002         struct net_device *netdev;
2003         unsigned long flags;
2004
2005         if (!devdata) {
2006                 dev_err(&dev->device, "%s no devdata\n", __func__);
2007                 return;
2008         }
2009         spin_lock_irqsave(&devdata->priv_lock, flags);
2010         if (devdata->going_away) {
2011                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2012                 dev_err(&dev->device, "%s already being removed\n", __func__);
2013                 return;
2014         }
2015         devdata->going_away = true;
2016         spin_unlock_irqrestore(&devdata->priv_lock, flags);
2017         netdev = devdata->netdev;
2018         if (!netdev) {
2019                 dev_err(&dev->device, "%s not net device\n", __func__);
2020                 return;
2021         }
2022
2023         /* going_away prevents new items being added to the workqueues */
2024         cancel_work_sync(&devdata->timeout_reset);
2025
2026         debugfs_remove_recursive(devdata->eth_debugfs_dir);
2027         /* this will call visornic_close() */
2028         unregister_netdev(netdev);
2029
2030         del_timer_sync(&devdata->irq_poll_timer);
2031         netif_napi_del(&devdata->napi);
2032
2033         dev_set_drvdata(&dev->device, NULL);
2034         host_side_disappeared(devdata);
2035         devdata_release(devdata);
2036         free_netdev(netdev);
2037 }
2038
2039 /* visornic_pause - called when IO Part disappears
2040  * @dev:           Visornic device that is being serviced.
2041  * @complete_func: Call when finished.
2042  *
2043  * Called when the IO Partition has gone down. Need to free up resources and
2044  * wait for IO partition to come back. Mark link as down and don't attempt any
2045  * DMA. When we have freed memory, call the complete_func so that Command knows
2046  * we are done. If we don't call complete_func, the IO Partition will never
2047  * come back.
2048  *
2049  * Return: 0 on success.
2050  */
2051 static int visornic_pause(struct visor_device *dev,
2052                           visorbus_state_complete_func complete_func)
2053 {
2054         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2055
2056         visornic_serverdown(devdata, complete_func);
2057         return 0;
2058 }
2059
2060 /* visornic_resume - called when IO Partition has recovered
2061  * @dev:           Visornic device that is being serviced.
2062  * @compelte_func: Call when finished.
2063  *
2064  * Called when the IO partition has recovered. Re-establish connection to the IO
2065  * Partition and set the link up. Okay to do DMA again.
2066  *
2067  * Returns 0 for success, negative integer on error.
2068  */
2069 static int visornic_resume(struct visor_device *dev,
2070                            visorbus_state_complete_func complete_func)
2071 {
2072         struct visornic_devdata *devdata;
2073         struct net_device *netdev;
2074         unsigned long flags;
2075
2076         devdata = dev_get_drvdata(&dev->device);
2077         if (!devdata) {
2078                 dev_err(&dev->device, "%s no devdata\n", __func__);
2079                 return -EINVAL;
2080         }
2081
2082         netdev = devdata->netdev;
2083
2084         spin_lock_irqsave(&devdata->priv_lock, flags);
2085         if (devdata->server_change_state) {
2086                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2087                 dev_err(&dev->device, "%s server already changing state\n",
2088                         __func__);
2089                 return -EINVAL;
2090         }
2091         if (!devdata->server_down) {
2092                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2093                 dev_err(&dev->device, "%s server not down\n", __func__);
2094                 complete_func(dev, 0);
2095                 return 0;
2096         }
2097         devdata->server_change_state = true;
2098         spin_unlock_irqrestore(&devdata->priv_lock, flags);
2099
2100         /* Must transition channel to ATTACHED state BEFORE
2101          * we can start using the device again.
2102          * TODO: State transitions
2103          */
2104         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2105
2106         rtnl_lock();
2107         dev_open(netdev);
2108         rtnl_unlock();
2109
2110         complete_func(dev, 0);
2111         return 0;
2112 }
2113
2114 /* This is used to tell the visorbus driver which types of visor devices
2115  * we support, and what functions to call when a visor device that we support
2116  * is attached or removed.
2117  */
2118 static struct visor_driver visornic_driver = {
2119         .name = "visornic",
2120         .owner = THIS_MODULE,
2121         .channel_types = visornic_channel_types,
2122         .probe = visornic_probe,
2123         .remove = visornic_remove,
2124         .pause = visornic_pause,
2125         .resume = visornic_resume,
2126         .channel_interrupt = NULL,
2127 };
2128
2129 /* visornic_init - init function
2130  *
2131  * Init function for the visornic driver. Do initial driver setup and wait
2132  * for devices.
2133  *
2134  * Return: 0 on success, negative integer on error.
2135  */
2136 static int visornic_init(void)
2137 {
2138         struct dentry *ret;
2139         int err = -ENOMEM;
2140
2141         visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2142         if (!visornic_debugfs_dir)
2143                 return err;
2144
2145         ret = debugfs_create_file("info", 0400, visornic_debugfs_dir, NULL,
2146                                   &debugfs_info_fops);
2147         if (!ret)
2148                 goto cleanup_debugfs;
2149         ret = debugfs_create_file("enable_ints", 0200, visornic_debugfs_dir,
2150                                   NULL, &debugfs_enable_ints_fops);
2151         if (!ret)
2152                 goto cleanup_debugfs;
2153
2154         err = visorbus_register_visor_driver(&visornic_driver);
2155         if (err)
2156                 goto cleanup_debugfs;
2157
2158         return 0;
2159
2160 cleanup_debugfs:
2161         debugfs_remove_recursive(visornic_debugfs_dir);
2162         return err;
2163 }
2164
2165 /* visornic_cleanup - driver exit routine
2166  *
2167  * Unregister driver from the bus and free up memory.
2168  */
2169 static void visornic_cleanup(void)
2170 {
2171         visorbus_unregister_visor_driver(&visornic_driver);
2172         debugfs_remove_recursive(visornic_debugfs_dir);
2173 }
2174
2175 module_init(visornic_init);
2176 module_exit(visornic_cleanup);
2177
2178 MODULE_AUTHOR("Unisys");
2179 MODULE_LICENSE("GPL");
2180 MODULE_DESCRIPTION("s-Par NIC driver for virtual network devices");