drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c

   1 /*
   2  * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
   3  * driver for Linux.
   4  *
   5  * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
   6  *
   7  * This software is available to you under a choice of one of two
   8  * licenses.  You may choose to be licensed under the terms of the GNU
   9  * General Public License (GPL) Version 2, available from the file
  10  * COPYING in the main directory of this source tree, or the
  11  * OpenIB.org BSD license below:
  12  *
  13  *     Redistribution and use in source and binary forms, with or
  14  *     without modification, are permitted provided that the following
  15  *     conditions are met:
  16  *
  17  *      - Redistributions of source code must retain the above
  18  *        copyright notice, this list of conditions and the following
  19  *        disclaimer.
  20  *
  21  *      - Redistributions in binary form must reproduce the above
  22  *        copyright notice, this list of conditions and the following
  23  *        disclaimer in the documentation and/or other materials
  24  *        provided with the distribution.
  25  *
  26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33  * SOFTWARE.
  34  */
  35
  36 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  37
  38 #include <linux/module.h>
  39 #include <linux/moduleparam.h>
  40 #include <linux/init.h>
  41 #include <linux/pci.h>
  42 #include <linux/dma-mapping.h>
  43 #include <linux/netdevice.h>
  44 #include <linux/etherdevice.h>
  45 #include <linux/debugfs.h>
  46 #include <linux/ethtool.h>
  47 #include <linux/mdio.h>
  48
  49 #include "t4vf_common.h"
  50 #include "t4vf_defs.h"
  51
  52 #include "../cxgb4/t4_regs.h"
  53 #include "../cxgb4/t4_msg.h"
  54
  55 /*
  56  * Generic information about the driver.
  57  */
  58 #define DRV_VERSION "2.0.0-ko"
  59 #define DRV_DESC "Chelsio T4/T5/T6 Virtual Function (VF) Network Driver"
  60
  61 /*
  62  * Module Parameters.
  63  * ==================
  64  */
  65
  66 /*
  67  * Default ethtool "message level" for adapters.
  68  */
  69 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
  70                          NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
  71                          NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
  72
  73 /*
  74  * The driver uses the best interrupt scheme available on a platform in the
  75  * order MSI-X then MSI.  This parameter determines which of these schemes the
  76  * driver may consider as follows:
  77  *
  78  *     msi = 2: choose from among MSI-X and MSI
  79  *     msi = 1: only consider MSI interrupts
  80  *
  81  * Note that unlike the Physical Function driver, this Virtual Function driver
  82  * does _not_ support legacy INTx interrupts (this limitation is mandated by
  83  * the PCI-E SR-IOV standard).
  84  */
  85 #define MSI_MSIX        2
  86 #define MSI_MSI         1
  87 #define MSI_DEFAULT     MSI_MSIX
  88
  89 static int msi = MSI_DEFAULT;
  90
  91 module_param(msi, int, 0644);
  92 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
  93
  94 /*
  95  * Fundamental constants.
  96  * ======================
  97  */
  98
  99 enum {
 100         MAX_TXQ_ENTRIES         = 16384,
 101         MAX_RSPQ_ENTRIES        = 16384,
 102         MAX_RX_BUFFERS          = 16384,
 103
 104         MIN_TXQ_ENTRIES         = 32,
 105         MIN_RSPQ_ENTRIES        = 128,
 106         MIN_FL_ENTRIES          = 16,
 107
 108         /*
 109          * For purposes of manipulating the Free List size we need to
 110          * recognize that Free Lists are actually Egress Queues (the host
 111          * produces free buffers which the hardware consumes), Egress Queues
 112          * indices are all in units of Egress Context Units bytes, and free
 113          * list entries are 64-bit PCI DMA addresses.  And since the state of
 114          * the Producer Index == the Consumer Index implies an EMPTY list, we
 115          * always have at least one Egress Unit's worth of Free List entries
 116          * unused.  See sge.c for more details ...
 117          */
 118         EQ_UNIT = SGE_EQ_IDXSIZE,
 119         FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
 120         MIN_FL_RESID = FL_PER_EQ_UNIT,
 121 };
 122
 123 /*
 124  * Global driver state.
 125  * ====================
 126  */
 127
 128 static struct dentry *cxgb4vf_debugfs_root;
 129
 130 /*
 131  * OS "Callback" functions.
 132  * ========================
 133  */
 134
 135 /*
 136  * The link status has changed on the indicated "port" (Virtual Interface).
 137  */
 138 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 139 {
 140         struct net_device *dev = adapter->port[pidx];
 141
 142         /*
 143          * If the port is disabled or the current recorded "link up"
 144          * status matches the new status, just return.
 145          */
 146         if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
 147                 return;
 148
 149         /*
 150          * Tell the OS that the link status has changed and print a short
 151          * informative message on the console about the event.
 152          */
 153         if (link_ok) {
 154                 const char *s;
 155                 const char *fc;
 156                 const struct port_info *pi = netdev_priv(dev);
 157
 158                 netif_carrier_on(dev);
 159
 160                 switch (pi->link_cfg.speed) {
 161                 case 100:
 162                         s = "100Mbps";
 163                         break;
 164                 case 1000:
 165                         s = "1Gbps";
 166                         break;
 167                 case 10000:
 168                         s = "10Gbps";
 169                         break;
 170                 case 25000:
 171                         s = "25Gbps";
 172                         break;
 173                 case 40000:
 174                         s = "40Gbps";
 175                         break;
 176                 case 100000:
 177                         s = "100Gbps";
 178                         break;
 179
 180                 default:
 181                         s = "unknown";
 182                         break;
 183                 }
 184
 185                 switch ((int)pi->link_cfg.fc) {
 186                 case PAUSE_RX:
 187                         fc = "RX";
 188                         break;
 189
 190                 case PAUSE_TX:
 191                         fc = "TX";
 192                         break;
 193
 194                 case PAUSE_RX | PAUSE_TX:
 195                         fc = "RX/TX";
 196                         break;
 197
 198                 default:
 199                         fc = "no";
 200                         break;
 201                 }
 202
 203                 netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
 204         } else {
 205                 netif_carrier_off(dev);
 206                 netdev_info(dev, "link down\n");
 207         }
 208 }
 209
 210 /*
 211  * THe port module type has changed on the indicated "port" (Virtual
 212  * Interface).
 213  */
 214 void t4vf_os_portmod_changed(struct adapter *adapter, int pidx)
 215 {
 216         static const char * const mod_str[] = {
 217                 NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
 218         };
 219         const struct net_device *dev = adapter->port[pidx];
 220         const struct port_info *pi = netdev_priv(dev);
 221
 222         if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
 223                 dev_info(adapter->pdev_dev, "%s: port module unplugged\n",
 224                          dev->name);
 225         else if (pi->mod_type < ARRAY_SIZE(mod_str))
 226                 dev_info(adapter->pdev_dev, "%s: %s port module inserted\n",
 227                          dev->name, mod_str[pi->mod_type]);
 228         else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
 229                 dev_info(adapter->pdev_dev, "%s: unsupported optical port "
 230                          "module inserted\n", dev->name);
 231         else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
 232                 dev_info(adapter->pdev_dev, "%s: unknown port module inserted,"
 233                          "forcing TWINAX\n", dev->name);
 234         else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
 235                 dev_info(adapter->pdev_dev, "%s: transceiver module error\n",
 236                          dev->name);
 237         else
 238                 dev_info(adapter->pdev_dev, "%s: unknown module type %d "
 239                          "inserted\n", dev->name, pi->mod_type);
 240 }
 241
 242 /*
 243  * Net device operations.
 244  * ======================
 245  */
 246
 247
 248
 249
 250 /*
 251  * Perform the MAC and PHY actions needed to enable a "port" (Virtual
 252  * Interface).
 253  */
 254 static int link_start(struct net_device *dev)
 255 {
 256         int ret;
 257         struct port_info *pi = netdev_priv(dev);
 258
 259         /*
 260          * We do not set address filters and promiscuity here, the stack does
 261          * that step explicitly. Enable vlan accel.
 262          */
 263         ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
 264                               true);
 265         if (ret == 0) {
 266                 ret = t4vf_change_mac(pi->adapter, pi->viid,
 267                                       pi->xact_addr_filt, dev->dev_addr, true);
 268                 if (ret >= 0) {
 269                         pi->xact_addr_filt = ret;
 270                         ret = 0;
 271                 }
 272         }
 273
 274         /*
 275          * We don't need to actually "start the link" itself since the
 276          * firmware will do that for us when the first Virtual Interface
 277          * is enabled on a port.
 278          */
 279         if (ret == 0)
 280                 ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
 281         return ret;
 282 }
 283
 284 /*
 285  * Name the MSI-X interrupts.
 286  */
 287 static void name_msix_vecs(struct adapter *adapter)
 288 {
 289         int namelen = sizeof(adapter->msix_info[0].desc) - 1;
 290         int pidx;
 291
 292         /*
 293          * Firmware events.
 294          */
 295         snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
 296                  "%s-FWeventq", adapter->name);
 297         adapter->msix_info[MSIX_FW].desc[namelen] = 0;
 298
 299         /*
 300          * Ethernet queues.
 301          */
 302         for_each_port(adapter, pidx) {
 303                 struct net_device *dev = adapter->port[pidx];
 304                 const struct port_info *pi = netdev_priv(dev);
 305                 int qs, msi;
 306
 307                 for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
 308                         snprintf(adapter->msix_info[msi].desc, namelen,
 309                                  "%s-%d", dev->name, qs);
 310                         adapter->msix_info[msi].desc[namelen] = 0;
 311                 }
 312         }
 313 }
 314
 315 /*
 316  * Request all of our MSI-X resources.
 317  */
 318 static int request_msix_queue_irqs(struct adapter *adapter)
 319 {
 320         struct sge *s = &adapter->sge;
 321         int rxq, msi, err;
 322
 323         /*
 324          * Firmware events.
 325          */
 326         err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
 327                           0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
 328         if (err)
 329                 return err;
 330
 331         /*
 332          * Ethernet queues.
 333          */
 334         msi = MSIX_IQFLINT;
 335         for_each_ethrxq(s, rxq) {
 336                 err = request_irq(adapter->msix_info[msi].vec,
 337                                   t4vf_sge_intr_msix, 0,
 338                                   adapter->msix_info[msi].desc,
 339                                   &s->ethrxq[rxq].rspq);
 340                 if (err)
 341                         goto err_free_irqs;
 342                 msi++;
 343         }
 344         return 0;
 345
 346 err_free_irqs:
 347         while (--rxq >= 0)
 348                 free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
 349         free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
 350         return err;
 351 }
 352
 353 /*
 354  * Free our MSI-X resources.
 355  */
 356 static void free_msix_queue_irqs(struct adapter *adapter)
 357 {
 358         struct sge *s = &adapter->sge;
 359         int rxq, msi;
 360
 361         free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
 362         msi = MSIX_IQFLINT;
 363         for_each_ethrxq(s, rxq)
 364                 free_irq(adapter->msix_info[msi++].vec,
 365                          &s->ethrxq[rxq].rspq);
 366 }
 367
 368 /*
 369  * Turn on NAPI and start up interrupts on a response queue.
 370  */
 371 static void qenable(struct sge_rspq *rspq)
 372 {
 373         napi_enable(&rspq->napi);
 374
 375         /*
 376          * 0-increment the Going To Sleep register to start the timer and
 377          * enable interrupts.
 378          */
 379         t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 380                      CIDXINC_V(0) |
 381                      SEINTARM_V(rspq->intr_params) |
 382                      INGRESSQID_V(rspq->cntxt_id));
 383 }
 384
 385 /*
 386  * Enable NAPI scheduling and interrupt generation for all Receive Queues.
 387  */
 388 static void enable_rx(struct adapter *adapter)
 389 {
 390         int rxq;
 391         struct sge *s = &adapter->sge;
 392
 393         for_each_ethrxq(s, rxq)
 394                 qenable(&s->ethrxq[rxq].rspq);
 395         qenable(&s->fw_evtq);
 396
 397         /*
 398          * The interrupt queue doesn't use NAPI so we do the 0-increment of
 399          * its Going To Sleep register here to get it started.
 400          */
 401         if (adapter->flags & USING_MSI)
 402                 t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 403                              CIDXINC_V(0) |
 404                              SEINTARM_V(s->intrq.intr_params) |
 405                              INGRESSQID_V(s->intrq.cntxt_id));
 406
 407 }
 408
 409 /*
 410  * Wait until all NAPI handlers are descheduled.
 411  */
 412 static void quiesce_rx(struct adapter *adapter)
 413 {
 414         struct sge *s = &adapter->sge;
 415         int rxq;
 416
 417         for_each_ethrxq(s, rxq)
 418                 napi_disable(&s->ethrxq[rxq].rspq.napi);
 419         napi_disable(&s->fw_evtq.napi);
 420 }
 421
 422 /*
 423  * Response queue handler for the firmware event queue.
 424  */
 425 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
 426                           const struct pkt_gl *gl)
 427 {
 428         /*
 429          * Extract response opcode and get pointer to CPL message body.
 430          */
 431         struct adapter *adapter = rspq->adapter;
 432         u8 opcode = ((const struct rss_header *)rsp)->opcode;
 433         void *cpl = (void *)(rsp + 1);
 434
 435         switch (opcode) {
 436         case CPL_FW6_MSG: {
 437                 /*
 438                  * We've received an asynchronous message from the firmware.
 439                  */
 440                 const struct cpl_fw6_msg *fw_msg = cpl;
 441                 if (fw_msg->type == FW6_TYPE_CMD_RPL)
 442                         t4vf_handle_fw_rpl(adapter, fw_msg->data);
 443                 break;
 444         }
 445
 446         case CPL_FW4_MSG: {
 447                 /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
 448                  */
 449                 const struct cpl_sge_egr_update *p = (void *)(rsp + 3);
 450                 opcode = CPL_OPCODE_G(ntohl(p->opcode_qid));
 451                 if (opcode != CPL_SGE_EGR_UPDATE) {
 452                         dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
 453                                 , opcode);
 454                         break;
 455                 }
 456                 cpl = (void *)p;
 457                 /*FALLTHROUGH*/
 458         }
 459
 460         case CPL_SGE_EGR_UPDATE: {
 461                 /*
 462                  * We've received an Egress Queue Status Update message.  We
 463                  * get these, if the SGE is configured to send these when the
 464                  * firmware passes certain points in processing our TX
 465                  * Ethernet Queue or if we make an explicit request for one.
 466                  * We use these updates to determine when we may need to
 467                  * restart a TX Ethernet Queue which was stopped for lack of
 468                  * free TX Queue Descriptors ...
 469                  */
 470                 const struct cpl_sge_egr_update *p = cpl;
 471                 unsigned int qid = EGR_QID_G(be32_to_cpu(p->opcode_qid));
 472                 struct sge *s = &adapter->sge;
 473                 struct sge_txq *tq;
 474                 struct sge_eth_txq *txq;
 475                 unsigned int eq_idx;
 476
 477                 /*
 478                  * Perform sanity checking on the Queue ID to make sure it
 479                  * really refers to one of our TX Ethernet Egress Queues which
 480                  * is active and matches the queue's ID.  None of these error
 481                  * conditions should ever happen so we may want to either make
 482                  * them fatal and/or conditionalized under DEBUG.
 483                  */
 484                 eq_idx = EQ_IDX(s, qid);
 485                 if (unlikely(eq_idx >= MAX_EGRQ)) {
 486                         dev_err(adapter->pdev_dev,
 487                                 "Egress Update QID %d out of range\n", qid);
 488                         break;
 489                 }
 490                 tq = s->egr_map[eq_idx];
 491                 if (unlikely(tq == NULL)) {
 492                         dev_err(adapter->pdev_dev,
 493                                 "Egress Update QID %d TXQ=NULL\n", qid);
 494                         break;
 495                 }
 496                 txq = container_of(tq, struct sge_eth_txq, q);
 497                 if (unlikely(tq->abs_id != qid)) {
 498                         dev_err(adapter->pdev_dev,
 499                                 "Egress Update QID %d refers to TXQ %d\n",
 500                                 qid, tq->abs_id);
 501                         break;
 502                 }
 503
 504                 /*
 505                  * Restart a stopped TX Queue which has less than half of its
 506                  * TX ring in use ...
 507                  */
 508                 txq->q.restarts++;
 509                 netif_tx_wake_queue(txq->txq);
 510                 break;
 511         }
 512
 513         default:
 514                 dev_err(adapter->pdev_dev,
 515                         "unexpected CPL %#x on FW event queue\n", opcode);
 516         }
 517
 518         return 0;
 519 }
 520
 521 /*
 522  * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
 523  * to use and initializes them.  We support multiple "Queue Sets" per port if
 524  * we have MSI-X, otherwise just one queue set per port.
 525  */
 526 static int setup_sge_queues(struct adapter *adapter)
 527 {
 528         struct sge *s = &adapter->sge;
 529         int err, pidx, msix;
 530
 531         /*
 532          * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
 533          * state.
 534          */
 535         bitmap_zero(s->starving_fl, MAX_EGRQ);
 536
 537         /*
 538          * If we're using MSI interrupt mode we need to set up a "forwarded
 539          * interrupt" queue which we'll set up with our MSI vector.  The rest
 540          * of the ingress queues will be set up to forward their interrupts to
 541          * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
 542          * the intrq's queue ID as the interrupt forwarding queue for the
 543          * subsequent calls ...
 544          */
 545         if (adapter->flags & USING_MSI) {
 546                 err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
 547                                          adapter->port[0], 0, NULL, NULL);
 548                 if (err)
 549                         goto err_free_queues;
 550         }
 551
 552         /*
 553          * Allocate our ingress queue for asynchronous firmware messages.
 554          */
 555         err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
 556                                  MSIX_FW, NULL, fwevtq_handler);
 557         if (err)
 558                 goto err_free_queues;
 559
 560         /*
 561          * Allocate each "port"'s initial Queue Sets.  These can be changed
 562          * later on ... up to the point where any interface on the adapter is
 563          * brought up at which point lots of things get nailed down
 564          * permanently ...
 565          */
 566         msix = MSIX_IQFLINT;
 567         for_each_port(adapter, pidx) {
 568                 struct net_device *dev = adapter->port[pidx];
 569                 struct port_info *pi = netdev_priv(dev);
 570                 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
 571                 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
 572                 int qs;
 573
 574                 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
 575                         err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
 576                                                  dev, msix++,
 577                                                  &rxq->fl, t4vf_ethrx_handler);
 578                         if (err)
 579                                 goto err_free_queues;
 580
 581                         err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
 582                                              netdev_get_tx_queue(dev, qs),
 583                                              s->fw_evtq.cntxt_id);
 584                         if (err)
 585                                 goto err_free_queues;
 586
 587                         rxq->rspq.idx = qs;
 588                         memset(&rxq->stats, 0, sizeof(rxq->stats));
 589                 }
 590         }
 591
 592         /*
 593          * Create the reverse mappings for the queues.
 594          */
 595         s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
 596         s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
 597         IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
 598         for_each_port(adapter, pidx) {
 599                 struct net_device *dev = adapter->port[pidx];
 600                 struct port_info *pi = netdev_priv(dev);
 601                 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
 602                 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
 603                 int qs;
 604
 605                 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
 606                         IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
 607                         EQ_MAP(s, txq->q.abs_id) = &txq->q;
 608
 609                         /*
 610                          * The FW_IQ_CMD doesn't return the Absolute Queue IDs
 611                          * for Free Lists but since all of the Egress Queues
 612                          * (including Free Lists) have Relative Queue IDs
 613                          * which are computed as Absolute - Base Queue ID, we
 614                          * can synthesize the Absolute Queue IDs for the Free
 615                          * Lists.  This is useful for debugging purposes when
 616                          * we want to dump Queue Contexts via the PF Driver.
 617                          */
 618                         rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
 619                         EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
 620                 }
 621         }
 622         return 0;
 623
 624 err_free_queues:
 625         t4vf_free_sge_resources(adapter);
 626         return err;
 627 }
 628
 629 /*
 630  * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
 631  * queues.  We configure the RSS CPU lookup table to distribute to the number
 632  * of HW receive queues, and the response queue lookup table to narrow that
 633  * down to the response queues actually configured for each "port" (Virtual
 634  * Interface).  We always configure the RSS mapping for all ports since the
 635  * mapping table has plenty of entries.
 636  */
 637 static int setup_rss(struct adapter *adapter)
 638 {
 639         int pidx;
 640
 641         for_each_port(adapter, pidx) {
 642                 struct port_info *pi = adap2pinfo(adapter, pidx);
 643                 struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
 644                 u16 rss[MAX_PORT_QSETS];
 645                 int qs, err;
 646
 647                 for (qs = 0; qs < pi->nqsets; qs++)
 648                         rss[qs] = rxq[qs].rspq.abs_id;
 649
 650                 err = t4vf_config_rss_range(adapter, pi->viid,
 651                                             0, pi->rss_size, rss, pi->nqsets);
 652                 if (err)
 653                         return err;
 654
 655                 /*
 656                  * Perform Global RSS Mode-specific initialization.
 657                  */
 658                 switch (adapter->params.rss.mode) {
 659                 case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
 660                         /*
 661                          * If Tunnel All Lookup isn't specified in the global
 662                          * RSS Configuration, then we need to specify a
 663                          * default Ingress Queue for any ingress packets which
 664                          * aren't hashed.  We'll use our first ingress queue
 665                          * ...
 666                          */
 667                         if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
 668                                 union rss_vi_config config;
 669                                 err = t4vf_read_rss_vi_config(adapter,
 670                                                               pi->viid,
 671                                                               &config);
 672                                 if (err)
 673                                         return err;
 674                                 config.basicvirtual.defaultq =
 675                                         rxq[0].rspq.abs_id;
 676                                 err = t4vf_write_rss_vi_config(adapter,
 677                                                                pi->viid,
 678                                                                &config);
 679                                 if (err)
 680                                         return err;
 681                         }
 682                         break;
 683                 }
 684         }
 685
 686         return 0;
 687 }
 688
 689 /*
 690  * Bring the adapter up.  Called whenever we go from no "ports" open to having
 691  * one open.  This function performs the actions necessary to make an adapter
 692  * operational, such as completing the initialization of HW modules, and
 693  * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
 694  * this is called "cxgb_up" in the PF Driver.)
 695  */
 696 static int adapter_up(struct adapter *adapter)
 697 {
 698         int err;
 699
 700         /*
 701          * If this is the first time we've been called, perform basic
 702          * adapter setup.  Once we've done this, many of our adapter
 703          * parameters can no longer be changed ...
 704          */
 705         if ((adapter->flags & FULL_INIT_DONE) == 0) {
 706                 err = setup_sge_queues(adapter);
 707                 if (err)
 708                         return err;
 709                 err = setup_rss(adapter);
 710                 if (err) {
 711                         t4vf_free_sge_resources(adapter);
 712                         return err;
 713                 }
 714
 715                 if (adapter->flags & USING_MSIX)
 716                         name_msix_vecs(adapter);
 717
 718                 /* Initialize hash mac addr list*/
 719                 INIT_LIST_HEAD(&adapter->mac_hlist);
 720
 721                 adapter->flags |= FULL_INIT_DONE;
 722         }
 723
 724         /*
 725          * Acquire our interrupt resources.  We only support MSI-X and MSI.
 726          */
 727         BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
 728         if (adapter->flags & USING_MSIX)
 729                 err = request_msix_queue_irqs(adapter);
 730         else
 731                 err = request_irq(adapter->pdev->irq,
 732                                   t4vf_intr_handler(adapter), 0,
 733                                   adapter->name, adapter);
 734         if (err) {
 735                 dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
 736                         err);
 737                 return err;
 738         }
 739
 740         /*
 741          * Enable NAPI ingress processing and return success.
 742          */
 743         enable_rx(adapter);
 744         t4vf_sge_start(adapter);
 745
 746         return 0;
 747 }
 748
 749 /*
 750  * Bring the adapter down.  Called whenever the last "port" (Virtual
 751  * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
 752  * Driver.)
 753  */
 754 static void adapter_down(struct adapter *adapter)
 755 {
 756         /*
 757          * Free interrupt resources.
 758          */
 759         if (adapter->flags & USING_MSIX)
 760                 free_msix_queue_irqs(adapter);
 761         else
 762                 free_irq(adapter->pdev->irq, adapter);
 763
 764         /*
 765          * Wait for NAPI handlers to finish.
 766          */
 767         quiesce_rx(adapter);
 768 }
 769
 770 /*
 771  * Start up a net device.
 772  */
 773 static int cxgb4vf_open(struct net_device *dev)
 774 {
 775         int err;
 776         struct port_info *pi = netdev_priv(dev);
 777         struct adapter *adapter = pi->adapter;
 778
 779         /*
 780          * If this is the first interface that we're opening on the "adapter",
 781          * bring the "adapter" up now.
 782          */
 783         if (adapter->open_device_map == 0) {
 784                 err = adapter_up(adapter);
 785                 if (err)
 786                         return err;
 787         }
 788
 789         /*
 790          * Note that this interface is up and start everything up ...
 791          */
 792         err = link_start(dev);
 793         if (err)
 794                 goto err_unwind;
 795
 796         netif_tx_start_all_queues(dev);
 797         set_bit(pi->port_id, &adapter->open_device_map);
 798         return 0;
 799
 800 err_unwind:
 801         if (adapter->open_device_map == 0)
 802                 adapter_down(adapter);
 803         return err;
 804 }
 805
 806 /*
 807  * Shut down a net device.  This routine is called "cxgb_close" in the PF
 808  * Driver ...
 809  */
 810 static int cxgb4vf_stop(struct net_device *dev)
 811 {
 812         struct port_info *pi = netdev_priv(dev);
 813         struct adapter *adapter = pi->adapter;
 814
 815         netif_tx_stop_all_queues(dev);
 816         netif_carrier_off(dev);
 817         t4vf_enable_vi(adapter, pi->viid, false, false);
 818         pi->link_cfg.link_ok = 0;
 819
 820         clear_bit(pi->port_id, &adapter->open_device_map);
 821         if (adapter->open_device_map == 0)
 822                 adapter_down(adapter);
 823         return 0;
 824 }
 825
 826 /*
 827  * Translate our basic statistics into the standard "ifconfig" statistics.
 828  */
 829 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
 830 {
 831         struct t4vf_port_stats stats;
 832         struct port_info *pi = netdev2pinfo(dev);
 833         struct adapter *adapter = pi->adapter;
 834         struct net_device_stats *ns = &dev->stats;
 835         int err;
 836
 837         spin_lock(&adapter->stats_lock);
 838         err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
 839         spin_unlock(&adapter->stats_lock);
 840
 841         memset(ns, 0, sizeof(*ns));
 842         if (err)
 843                 return ns;
 844
 845         ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
 846                         stats.tx_ucast_bytes + stats.tx_offload_bytes);
 847         ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
 848                           stats.tx_ucast_frames + stats.tx_offload_frames);
 849         ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
 850                         stats.rx_ucast_bytes);
 851         ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
 852                           stats.rx_ucast_frames);
 853         ns->multicast = stats.rx_mcast_frames;
 854         ns->tx_errors = stats.tx_drop_frames;
 855         ns->rx_errors = stats.rx_err_frames;
 856
 857         return ns;
 858 }
 859
 860 static inline int cxgb4vf_set_addr_hash(struct port_info *pi)
 861 {
 862         struct adapter *adapter = pi->adapter;
 863         u64 vec = 0;
 864         bool ucast = false;
 865         struct hash_mac_addr *entry;
 866
 867         /* Calculate the hash vector for the updated list and program it */
 868         list_for_each_entry(entry, &adapter->mac_hlist, list) {
 869                 ucast |= is_unicast_ether_addr(entry->addr);
 870                 vec |= (1ULL << hash_mac_addr(entry->addr));
 871         }
 872         return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false);
 873 }
 874
 875 static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr)
 876 {
 877         struct port_info *pi = netdev_priv(netdev);
 878         struct adapter *adapter = pi->adapter;
 879         int ret;
 880         u64 mhash = 0;
 881         u64 uhash = 0;
 882         bool free = false;
 883         bool ucast = is_unicast_ether_addr(mac_addr);
 884         const u8 *maclist[1] = {mac_addr};
 885         struct hash_mac_addr *new_entry;
 886
 887         ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist,
 888                                   NULL, ucast ? &uhash : &mhash, false);
 889         if (ret < 0)
 890                 goto out;
 891         /* if hash != 0, then add the addr to hash addr list
 892          * so on the end we will calculate the hash for the
 893          * list and program it
 894          */
 895         if (uhash || mhash) {
 896                 new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC);
 897                 if (!new_entry)
 898                         return -ENOMEM;
 899                 ether_addr_copy(new_entry->addr, mac_addr);
 900                 list_add_tail(&new_entry->list, &adapter->mac_hlist);
 901                 ret = cxgb4vf_set_addr_hash(pi);
 902         }
 903 out:
 904         return ret < 0 ? ret : 0;
 905 }
 906
 907 static int cxgb4vf_mac_unsync(struct net_device *netdev, const u8 *mac_addr)
 908 {
 909         struct port_info *pi = netdev_priv(netdev);
 910         struct adapter *adapter = pi->adapter;
 911         int ret;
 912         const u8 *maclist[1] = {mac_addr};
 913         struct hash_mac_addr *entry, *tmp;
 914
 915         /* If the MAC address to be removed is in the hash addr
 916          * list, delete it from the list and update hash vector
 917          */
 918         list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) {
 919                 if (ether_addr_equal(entry->addr, mac_addr)) {
 920                         list_del(&entry->list);
 921                         kfree(entry);
 922                         return cxgb4vf_set_addr_hash(pi);
 923                 }
 924         }
 925
 926         ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false);
 927         return ret < 0 ? -EINVAL : 0;
 928 }
 929
 930 /*
 931  * Set RX properties of a port, such as promiscruity, address filters, and MTU.
 932  * If @mtu is -1 it is left unchanged.
 933  */
 934 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
 935 {
 936         struct port_info *pi = netdev_priv(dev);
 937
 938         __dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
 939         __dev_mc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
 940         return t4vf_set_rxmode(pi->adapter, pi->viid, -1,
 941                                (dev->flags & IFF_PROMISC) != 0,
 942                                (dev->flags & IFF_ALLMULTI) != 0,
 943                                1, -1, sleep_ok);
 944 }
 945
 946 /*
 947  * Set the current receive modes on the device.
 948  */
 949 static void cxgb4vf_set_rxmode(struct net_device *dev)
 950 {
 951         /* unfortunately we can't return errors to the stack */
 952         set_rxmode(dev, -1, false);
 953 }
 954
 955 /*
 956  * Find the entry in the interrupt holdoff timer value array which comes
 957  * closest to the specified interrupt holdoff value.
 958  */
 959 static int closest_timer(const struct sge *s, int us)
 960 {
 961         int i, timer_idx = 0, min_delta = INT_MAX;
 962
 963         for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
 964                 int delta = us - s->timer_val[i];
 965                 if (delta < 0)
 966                         delta = -delta;
 967                 if (delta < min_delta) {
 968                         min_delta = delta;
 969                         timer_idx = i;
 970                 }
 971         }
 972         return timer_idx;
 973 }
 974
 975 static int closest_thres(const struct sge *s, int thres)
 976 {
 977         int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
 978
 979         for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
 980                 delta = thres - s->counter_val[i];
 981                 if (delta < 0)
 982                         delta = -delta;
 983                 if (delta < min_delta) {
 984                         min_delta = delta;
 985                         pktcnt_idx = i;
 986                 }
 987         }
 988         return pktcnt_idx;
 989 }
 990
 991 /*
 992  * Return a queue's interrupt hold-off time in us.  0 means no timer.
 993  */
 994 static unsigned int qtimer_val(const struct adapter *adapter,
 995                                const struct sge_rspq *rspq)
 996 {
 997         unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params);
 998
 999         return timer_idx < SGE_NTIMERS
1000                 ? adapter->sge.timer_val[timer_idx]
1001                 : 0;
1002 }
1003
1004 /**
1005  *      set_rxq_intr_params - set a queue's interrupt holdoff parameters
1006  *      @adapter: the adapter
1007  *      @rspq: the RX response queue
1008  *      @us: the hold-off time in us, or 0 to disable timer
1009  *      @cnt: the hold-off packet count, or 0 to disable counter
1010  *
1011  *      Sets an RX response queue's interrupt hold-off time and packet count.
1012  *      At least one of the two needs to be enabled for the queue to generate
1013  *      interrupts.
1014  */
1015 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
1016                                unsigned int us, unsigned int cnt)
1017 {
1018         unsigned int timer_idx;
1019
1020         /*
1021          * If both the interrupt holdoff timer and count are specified as
1022          * zero, default to a holdoff count of 1 ...
1023          */
1024         if ((us | cnt) == 0)
1025                 cnt = 1;
1026
1027         /*
1028          * If an interrupt holdoff count has been specified, then find the
1029          * closest configured holdoff count and use that.  If the response
1030          * queue has already been created, then update its queue context
1031          * parameters ...
1032          */
1033         if (cnt) {
1034                 int err;
1035                 u32 v, pktcnt_idx;
1036
1037                 pktcnt_idx = closest_thres(&adapter->sge, cnt);
1038                 if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1039                         v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
1040                             FW_PARAMS_PARAM_X_V(
1041                                         FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1042                             FW_PARAMS_PARAM_YZ_V(rspq->cntxt_id);
1043                         err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1044                         if (err)
1045                                 return err;
1046                 }
1047                 rspq->pktcnt_idx = pktcnt_idx;
1048         }
1049
1050         /*
1051          * Compute the closest holdoff timer index from the supplied holdoff
1052          * timer value.
1053          */
1054         timer_idx = (us == 0
1055                      ? SGE_TIMER_RSTRT_CNTR
1056                      : closest_timer(&adapter->sge, us));
1057
1058         /*
1059          * Update the response queue's interrupt coalescing parameters and
1060          * return success.
1061          */
1062         rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
1063                              QINTR_CNT_EN_V(cnt > 0));
1064         return 0;
1065 }
1066
1067 /*
1068  * Return a version number to identify the type of adapter.  The scheme is:
1069  * - bits 0..9: chip version
1070  * - bits 10..15: chip revision
1071  */
1072 static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1073 {
1074         /*
1075          * Chip version 4, revision 0x3f (cxgb4vf).
1076          */
1077         return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10);
1078 }
1079
1080 /*
1081  * Execute the specified ioctl command.
1082  */
1083 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1084 {
1085         int ret = 0;
1086
1087         switch (cmd) {
1088             /*
1089              * The VF Driver doesn't have access to any of the other
1090              * common Ethernet device ioctl()'s (like reading/writing
1091              * PHY registers, etc.
1092              */
1093
1094         default:
1095                 ret = -EOPNOTSUPP;
1096                 break;
1097         }
1098         return ret;
1099 }
1100
1101 /*
1102  * Change the device's MTU.
1103  */
1104 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1105 {
1106         int ret;
1107         struct port_info *pi = netdev_priv(dev);
1108
1109         ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1110                               -1, -1, -1, -1, true);
1111         if (!ret)
1112                 dev->mtu = new_mtu;
1113         return ret;
1114 }
1115
1116 static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1117         netdev_features_t features)
1118 {
1119         /*
1120          * Since there is no support for separate rx/tx vlan accel
1121          * enable/disable make sure tx flag is always in same state as rx.
1122          */
1123         if (features & NETIF_F_HW_VLAN_CTAG_RX)
1124                 features |= NETIF_F_HW_VLAN_CTAG_TX;
1125         else
1126                 features &= ~NETIF_F_HW_VLAN_CTAG_TX;
1127
1128         return features;
1129 }
1130
1131 static int cxgb4vf_set_features(struct net_device *dev,
1132         netdev_features_t features)
1133 {
1134         struct port_info *pi = netdev_priv(dev);
1135         netdev_features_t changed = dev->features ^ features;
1136
1137         if (changed & NETIF_F_HW_VLAN_CTAG_RX)
1138                 t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1139                                 features & NETIF_F_HW_VLAN_CTAG_TX, 0);
1140
1141         return 0;
1142 }
1143
1144 /*
1145  * Change the devices MAC address.
1146  */
1147 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1148 {
1149         int ret;
1150         struct sockaddr *addr = _addr;
1151         struct port_info *pi = netdev_priv(dev);
1152
1153         if (!is_valid_ether_addr(addr->sa_data))
1154                 return -EADDRNOTAVAIL;
1155
1156         ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1157                               addr->sa_data, true);
1158         if (ret < 0)
1159                 return ret;
1160
1161         memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1162         pi->xact_addr_filt = ret;
1163         return 0;
1164 }
1165
1166 #ifdef CONFIG_NET_POLL_CONTROLLER
1167 /*
1168  * Poll all of our receive queues.  This is called outside of normal interrupt
1169  * context.
1170  */
1171 static void cxgb4vf_poll_controller(struct net_device *dev)
1172 {
1173         struct port_info *pi = netdev_priv(dev);
1174         struct adapter *adapter = pi->adapter;
1175
1176         if (adapter->flags & USING_MSIX) {
1177                 struct sge_eth_rxq *rxq;
1178                 int nqsets;
1179
1180                 rxq = &adapter->sge.ethrxq[pi->first_qset];
1181                 for (nqsets = pi->nqsets; nqsets; nqsets--) {
1182                         t4vf_sge_intr_msix(0, &rxq->rspq);
1183                         rxq++;
1184                 }
1185         } else
1186                 t4vf_intr_handler(adapter)(0, adapter);
1187 }
1188 #endif
1189
1190 /*
1191  * Ethtool operations.
1192  * ===================
1193  *
1194  * Note that we don't support any ethtool operations which change the physical
1195  * state of the port to which we're linked.
1196  */
1197
1198 /**
1199  *      from_fw_port_mod_type - translate Firmware Port/Module type to Ethtool
1200  *      @port_type: Firmware Port Type
1201  *      @mod_type: Firmware Module Type
1202  *
1203  *      Translate Firmware Port/Module type to Ethtool Port Type.
1204  */
1205 static int from_fw_port_mod_type(enum fw_port_type port_type,
1206                                  enum fw_port_module_type mod_type)
1207 {
1208         if (port_type == FW_PORT_TYPE_BT_SGMII ||
1209             port_type == FW_PORT_TYPE_BT_XFI ||
1210             port_type == FW_PORT_TYPE_BT_XAUI) {
1211                 return PORT_TP;
1212         } else if (port_type == FW_PORT_TYPE_FIBER_XFI ||
1213                    port_type == FW_PORT_TYPE_FIBER_XAUI) {
1214                 return PORT_FIBRE;
1215         } else if (port_type == FW_PORT_TYPE_SFP ||
1216                    port_type == FW_PORT_TYPE_QSFP_10G ||
1217                    port_type == FW_PORT_TYPE_QSA ||
1218                    port_type == FW_PORT_TYPE_QSFP ||
1219                    port_type == FW_PORT_TYPE_CR4_QSFP ||
1220                    port_type == FW_PORT_TYPE_CR_QSFP ||
1221                    port_type == FW_PORT_TYPE_CR2_QSFP ||
1222                    port_type == FW_PORT_TYPE_SFP28) {
1223                 if (mod_type == FW_PORT_MOD_TYPE_LR ||
1224                     mod_type == FW_PORT_MOD_TYPE_SR ||
1225                     mod_type == FW_PORT_MOD_TYPE_ER ||
1226                     mod_type == FW_PORT_MOD_TYPE_LRM)
1227                         return PORT_FIBRE;
1228                 else if (mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1229                          mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1230                         return PORT_DA;
1231                 else
1232                         return PORT_OTHER;
1233         } else if (port_type == FW_PORT_TYPE_KR4_100G ||
1234                    port_type == FW_PORT_TYPE_KR_SFP28) {
1235                 return PORT_NONE;
1236         }
1237
1238         return PORT_OTHER;
1239 }
1240
1241 /**
1242  *      fw_caps_to_lmm - translate Firmware to ethtool Link Mode Mask
1243  *      @port_type: Firmware Port Type
1244  *      @fw_caps: Firmware Port Capabilities
1245  *      @link_mode_mask: ethtool Link Mode Mask
1246  *
1247  *      Translate a Firmware Port Capabilities specification to an ethtool
1248  *      Link Mode Mask.
1249  */
1250 static void fw_caps_to_lmm(enum fw_port_type port_type,
1251                            unsigned int fw_caps,
1252                            unsigned long *link_mode_mask)
1253 {
1254         #define SET_LMM(__lmm_name) \
1255                 __set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \
1256                           link_mode_mask)
1257
1258         #define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \
1259                 do { \
1260                         if (fw_caps & FW_PORT_CAP32_ ## __fw_name) \
1261                                 SET_LMM(__lmm_name); \
1262                 } while (0)
1263
1264         switch (port_type) {
1265         case FW_PORT_TYPE_BT_SGMII:
1266         case FW_PORT_TYPE_BT_XFI:
1267         case FW_PORT_TYPE_BT_XAUI:
1268                 SET_LMM(TP);
1269                 FW_CAPS_TO_LMM(SPEED_100M, 100baseT_Full);
1270                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1271                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1272                 break;
1273
1274         case FW_PORT_TYPE_KX4:
1275         case FW_PORT_TYPE_KX:
1276                 SET_LMM(Backplane);
1277                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1278                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
1279                 break;
1280
1281         case FW_PORT_TYPE_KR:
1282                 SET_LMM(Backplane);
1283                 SET_LMM(10000baseKR_Full);
1284                 break;
1285
1286         case FW_PORT_TYPE_BP_AP:
1287                 SET_LMM(Backplane);
1288                 SET_LMM(10000baseR_FEC);
1289                 SET_LMM(10000baseKR_Full);
1290                 SET_LMM(1000baseKX_Full);
1291                 break;
1292
1293         case FW_PORT_TYPE_BP4_AP:
1294                 SET_LMM(Backplane);
1295                 SET_LMM(10000baseR_FEC);
1296                 SET_LMM(10000baseKR_Full);
1297                 SET_LMM(1000baseKX_Full);
1298                 SET_LMM(10000baseKX4_Full);
1299                 break;
1300
1301         case FW_PORT_TYPE_FIBER_XFI:
1302         case FW_PORT_TYPE_FIBER_XAUI:
1303         case FW_PORT_TYPE_SFP:
1304         case FW_PORT_TYPE_QSFP_10G:
1305         case FW_PORT_TYPE_QSA:
1306                 SET_LMM(FIBRE);
1307                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1308                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1309                 break;
1310
1311         case FW_PORT_TYPE_BP40_BA:
1312         case FW_PORT_TYPE_QSFP:
1313                 SET_LMM(FIBRE);
1314                 SET_LMM(40000baseSR4_Full);
1315                 break;
1316
1317         case FW_PORT_TYPE_CR_QSFP:
1318         case FW_PORT_TYPE_SFP28:
1319                 SET_LMM(FIBRE);
1320                 SET_LMM(25000baseCR_Full);
1321                 break;
1322
1323         case FW_PORT_TYPE_KR_SFP28:
1324                 SET_LMM(Backplane);
1325                 SET_LMM(25000baseKR_Full);
1326                 break;
1327
1328         case FW_PORT_TYPE_CR2_QSFP:
1329                 SET_LMM(FIBRE);
1330                 SET_LMM(50000baseSR2_Full);
1331                 break;
1332
1333         case FW_PORT_TYPE_KR4_100G:
1334         case FW_PORT_TYPE_CR4_QSFP:
1335                 SET_LMM(FIBRE);
1336                 SET_LMM(100000baseCR4_Full);
1337                 break;
1338
1339         default:
1340                 break;
1341         }
1342
1343         FW_CAPS_TO_LMM(ANEG, Autoneg);
1344         FW_CAPS_TO_LMM(802_3_PAUSE, Pause);
1345         FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause);
1346
1347         #undef FW_CAPS_TO_LMM
1348         #undef SET_LMM
1349 }
1350
1351 static int cxgb4vf_get_link_ksettings(struct net_device *dev,
1352                                   struct ethtool_link_ksettings *link_ksettings)
1353 {
1354         struct port_info *pi = netdev_priv(dev);
1355         struct ethtool_link_settings *base = &link_ksettings->base;
1356
1357         /* For the nonce, the Firmware doesn't send up Port State changes
1358          * when the Virtual Interface attached to the Port is down.  So
1359          * if it's down, let's grab any changes.
1360          */
1361         if (!netif_running(dev))
1362                 (void)t4vf_update_port_info(pi);
1363
1364         ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
1365         ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
1366         ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
1367
1368         base->port = from_fw_port_mod_type(pi->port_type, pi->mod_type);
1369
1370         if (pi->mdio_addr >= 0) {
1371                 base->phy_address = pi->mdio_addr;
1372                 base->mdio_support = (pi->port_type == FW_PORT_TYPE_BT_SGMII
1373                                       ? ETH_MDIO_SUPPORTS_C22
1374                                       : ETH_MDIO_SUPPORTS_C45);
1375         } else {
1376                 base->phy_address = 255;
1377                 base->mdio_support = 0;
1378         }
1379
1380         fw_caps_to_lmm(pi->port_type, pi->link_cfg.pcaps,
1381                        link_ksettings->link_modes.supported);
1382         fw_caps_to_lmm(pi->port_type, pi->link_cfg.acaps,
1383                        link_ksettings->link_modes.advertising);
1384         fw_caps_to_lmm(pi->port_type, pi->link_cfg.lpacaps,
1385                        link_ksettings->link_modes.lp_advertising);
1386
1387         if (netif_carrier_ok(dev)) {
1388                 base->speed = pi->link_cfg.speed;
1389                 base->duplex = DUPLEX_FULL;
1390         } else {
1391                 base->speed = SPEED_UNKNOWN;
1392                 base->duplex = DUPLEX_UNKNOWN;
1393         }
1394
1395         base->autoneg = pi->link_cfg.autoneg;
1396         if (pi->link_cfg.pcaps & FW_PORT_CAP32_ANEG)
1397                 ethtool_link_ksettings_add_link_mode(link_ksettings,
1398                                                      supported, Autoneg);
1399         if (pi->link_cfg.autoneg)
1400                 ethtool_link_ksettings_add_link_mode(link_ksettings,
1401                                                      advertising, Autoneg);
1402
1403         return 0;
1404 }
1405
1406 /* Translate the Firmware FEC value into the ethtool value. */
1407 static inline unsigned int fwcap_to_eth_fec(unsigned int fw_fec)
1408 {
1409         unsigned int eth_fec = 0;
1410
1411         if (fw_fec & FW_PORT_CAP32_FEC_RS)
1412                 eth_fec |= ETHTOOL_FEC_RS;
1413         if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS)
1414                 eth_fec |= ETHTOOL_FEC_BASER;
1415
1416         /* if nothing is set, then FEC is off */
1417         if (!eth_fec)
1418                 eth_fec = ETHTOOL_FEC_OFF;
1419
1420         return eth_fec;
1421 }
1422
1423 /* Translate Common Code FEC value into ethtool value. */
1424 static inline unsigned int cc_to_eth_fec(unsigned int cc_fec)
1425 {
1426         unsigned int eth_fec = 0;
1427
1428         if (cc_fec & FEC_AUTO)
1429                 eth_fec |= ETHTOOL_FEC_AUTO;
1430         if (cc_fec & FEC_RS)
1431                 eth_fec |= ETHTOOL_FEC_RS;
1432         if (cc_fec & FEC_BASER_RS)
1433                 eth_fec |= ETHTOOL_FEC_BASER;
1434
1435         /* if nothing is set, then FEC is off */
1436         if (!eth_fec)
1437                 eth_fec = ETHTOOL_FEC_OFF;
1438
1439         return eth_fec;
1440 }
1441
1442 static int cxgb4vf_get_fecparam(struct net_device *dev,
1443                                 struct ethtool_fecparam *fec)
1444 {
1445         const struct port_info *pi = netdev_priv(dev);
1446         const struct link_config *lc = &pi->link_cfg;
1447
1448         /* Translate the Firmware FEC Support into the ethtool value.  We
1449          * always support IEEE 802.3 "automatic" selection of Link FEC type if
1450          * any FEC is supported.
1451          */
1452         fec->fec = fwcap_to_eth_fec(lc->pcaps);
1453         if (fec->fec != ETHTOOL_FEC_OFF)
1454                 fec->fec |= ETHTOOL_FEC_AUTO;
1455
1456         /* Translate the current internal FEC parameters into the
1457          * ethtool values.
1458          */
1459         fec->active_fec = cc_to_eth_fec(lc->fec);
1460         return 0;
1461 }
1462
1463 /*
1464  * Return our driver information.
1465  */
1466 static void cxgb4vf_get_drvinfo(struct net_device *dev,
1467                                 struct ethtool_drvinfo *drvinfo)
1468 {
1469         struct adapter *adapter = netdev2adap(dev);
1470
1471         strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1472         strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
1473         strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1474                 sizeof(drvinfo->bus_info));
1475         snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1476                  "%u.%u.%u.%u, TP %u.%u.%u.%u",
1477                  FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.fwrev),
1478                  FW_HDR_FW_VER_MINOR_G(adapter->params.dev.fwrev),
1479                  FW_HDR_FW_VER_MICRO_G(adapter->params.dev.fwrev),
1480                  FW_HDR_FW_VER_BUILD_G(adapter->params.dev.fwrev),
1481                  FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.tprev),
1482                  FW_HDR_FW_VER_MINOR_G(adapter->params.dev.tprev),
1483                  FW_HDR_FW_VER_MICRO_G(adapter->params.dev.tprev),
1484                  FW_HDR_FW_VER_BUILD_G(adapter->params.dev.tprev));
1485 }
1486
1487 /*
1488  * Return current adapter message level.
1489  */
1490 static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1491 {
1492         return netdev2adap(dev)->msg_enable;
1493 }
1494
1495 /*
1496  * Set current adapter message level.
1497  */
1498 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1499 {
1500         netdev2adap(dev)->msg_enable = msglevel;
1501 }
1502
1503 /*
1504  * Return the device's current Queue Set ring size parameters along with the
1505  * allowed maximum values.  Since ethtool doesn't understand the concept of
1506  * multi-queue devices, we just return the current values associated with the
1507  * first Queue Set.
1508  */
1509 static void cxgb4vf_get_ringparam(struct net_device *dev,
1510                                   struct ethtool_ringparam *rp)
1511 {
1512         const struct port_info *pi = netdev_priv(dev);
1513         const struct sge *s = &pi->adapter->sge;
1514
1515         rp->rx_max_pending = MAX_RX_BUFFERS;
1516         rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1517         rp->rx_jumbo_max_pending = 0;
1518         rp->tx_max_pending = MAX_TXQ_ENTRIES;
1519
1520         rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1521         rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1522         rp->rx_jumbo_pending = 0;
1523         rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1524 }
1525
1526 /*
1527  * Set the Queue Set ring size parameters for the device.  Again, since
1528  * ethtool doesn't allow for the concept of multiple queues per device, we'll
1529  * apply these new values across all of the Queue Sets associated with the
1530  * device -- after vetting them of course!
1531  */
1532 static int cxgb4vf_set_ringparam(struct net_device *dev,
1533                                  struct ethtool_ringparam *rp)
1534 {
1535         const struct port_info *pi = netdev_priv(dev);
1536         struct adapter *adapter = pi->adapter;
1537         struct sge *s = &adapter->sge;
1538         int qs;
1539
1540         if (rp->rx_pending > MAX_RX_BUFFERS ||
1541             rp->rx_jumbo_pending ||
1542             rp->tx_pending > MAX_TXQ_ENTRIES ||
1543             rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1544             rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1545             rp->rx_pending < MIN_FL_ENTRIES ||
1546             rp->tx_pending < MIN_TXQ_ENTRIES)
1547                 return -EINVAL;
1548
1549         if (adapter->flags & FULL_INIT_DONE)
1550                 return -EBUSY;
1551
1552         for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1553                 s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1554                 s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1555                 s->ethtxq[qs].q.size = rp->tx_pending;
1556         }
1557         return 0;
1558 }
1559
1560 /*
1561  * Return the interrupt holdoff timer and count for the first Queue Set on the
1562  * device.  Our extension ioctl() (the cxgbtool interface) allows the
1563  * interrupt holdoff timer to be read on all of the device's Queue Sets.
1564  */
1565 static int cxgb4vf_get_coalesce(struct net_device *dev,
1566                                 struct ethtool_coalesce *coalesce)
1567 {
1568         const struct port_info *pi = netdev_priv(dev);
1569         const struct adapter *adapter = pi->adapter;
1570         const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1571
1572         coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1573         coalesce->rx_max_coalesced_frames =
1574                 ((rspq->intr_params & QINTR_CNT_EN_F)
1575                  ? adapter->sge.counter_val[rspq->pktcnt_idx]
1576                  : 0);
1577         return 0;
1578 }
1579
1580 /*
1581  * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1582  * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1583  * the interrupt holdoff timer on any of the device's Queue Sets.
1584  */
1585 static int cxgb4vf_set_coalesce(struct net_device *dev,
1586                                 struct ethtool_coalesce *coalesce)
1587 {
1588         const struct port_info *pi = netdev_priv(dev);
1589         struct adapter *adapter = pi->adapter;
1590
1591         return set_rxq_intr_params(adapter,
1592                                    &adapter->sge.ethrxq[pi->first_qset].rspq,
1593                                    coalesce->rx_coalesce_usecs,
1594                                    coalesce->rx_max_coalesced_frames);
1595 }
1596
1597 /*
1598  * Report current port link pause parameter settings.
1599  */
1600 static void cxgb4vf_get_pauseparam(struct net_device *dev,
1601                                    struct ethtool_pauseparam *pauseparam)
1602 {
1603         struct port_info *pi = netdev_priv(dev);
1604
1605         pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1606         pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1607         pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1608 }
1609
1610 /*
1611  * Identify the port by blinking the port's LED.
1612  */
1613 static int cxgb4vf_phys_id(struct net_device *dev,
1614                            enum ethtool_phys_id_state state)
1615 {
1616         unsigned int val;
1617         struct port_info *pi = netdev_priv(dev);
1618
1619         if (state == ETHTOOL_ID_ACTIVE)
1620                 val = 0xffff;
1621         else if (state == ETHTOOL_ID_INACTIVE)
1622                 val = 0;
1623         else
1624                 return -EINVAL;
1625
1626         return t4vf_identify_port(pi->adapter, pi->viid, val);
1627 }
1628
1629 /*
1630  * Port stats maintained per queue of the port.
1631  */
1632 struct queue_port_stats {
1633         u64 tso;
1634         u64 tx_csum;
1635         u64 rx_csum;
1636         u64 vlan_ex;
1637         u64 vlan_ins;
1638         u64 lro_pkts;
1639         u64 lro_merged;
1640 };
1641
1642 /*
1643  * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1644  * these need to match the order of statistics returned by
1645  * t4vf_get_port_stats().
1646  */
1647 static const char stats_strings[][ETH_GSTRING_LEN] = {
1648         /*
1649          * These must match the layout of the t4vf_port_stats structure.
1650          */
1651         "TxBroadcastBytes  ",
1652         "TxBroadcastFrames ",
1653         "TxMulticastBytes  ",
1654         "TxMulticastFrames ",
1655         "TxUnicastBytes    ",
1656         "TxUnicastFrames   ",
1657         "TxDroppedFrames   ",
1658         "TxOffloadBytes    ",
1659         "TxOffloadFrames   ",
1660         "RxBroadcastBytes  ",
1661         "RxBroadcastFrames ",
1662         "RxMulticastBytes  ",
1663         "RxMulticastFrames ",
1664         "RxUnicastBytes    ",
1665         "RxUnicastFrames   ",
1666         "RxErrorFrames     ",
1667
1668         /*
1669          * These are accumulated per-queue statistics and must match the
1670          * order of the fields in the queue_port_stats structure.
1671          */
1672         "TSO               ",
1673         "TxCsumOffload     ",
1674         "RxCsumGood        ",
1675         "VLANextractions   ",
1676         "VLANinsertions    ",
1677         "GROPackets        ",
1678         "GROMerged         ",
1679 };
1680
1681 /*
1682  * Return the number of statistics in the specified statistics set.
1683  */
1684 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1685 {
1686         switch (sset) {
1687         case ETH_SS_STATS:
1688                 return ARRAY_SIZE(stats_strings);
1689         default:
1690                 return -EOPNOTSUPP;
1691         }
1692         /*NOTREACHED*/
1693 }
1694
1695 /*
1696  * Return the strings for the specified statistics set.
1697  */
1698 static void cxgb4vf_get_strings(struct net_device *dev,
1699                                 u32 sset,
1700                                 u8 *data)
1701 {
1702         switch (sset) {
1703         case ETH_SS_STATS:
1704                 memcpy(data, stats_strings, sizeof(stats_strings));
1705                 break;
1706         }
1707 }
1708
1709 /*
1710  * Small utility routine to accumulate queue statistics across the queues of
1711  * a "port".
1712  */
1713 static void collect_sge_port_stats(const struct adapter *adapter,
1714                                    const struct port_info *pi,
1715                                    struct queue_port_stats *stats)
1716 {
1717         const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1718         const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1719         int qs;
1720
1721         memset(stats, 0, sizeof(*stats));
1722         for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1723                 stats->tso += txq->tso;
1724                 stats->tx_csum += txq->tx_cso;
1725                 stats->rx_csum += rxq->stats.rx_cso;
1726                 stats->vlan_ex += rxq->stats.vlan_ex;
1727                 stats->vlan_ins += txq->vlan_ins;
1728                 stats->lro_pkts += rxq->stats.lro_pkts;
1729                 stats->lro_merged += rxq->stats.lro_merged;
1730         }
1731 }
1732
1733 /*
1734  * Return the ETH_SS_STATS statistics set.
1735  */
1736 static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1737                                       struct ethtool_stats *stats,
1738                                       u64 *data)
1739 {
1740         struct port_info *pi = netdev2pinfo(dev);
1741         struct adapter *adapter = pi->adapter;
1742         int err = t4vf_get_port_stats(adapter, pi->pidx,
1743                                       (struct t4vf_port_stats *)data);
1744         if (err)
1745                 memset(data, 0, sizeof(struct t4vf_port_stats));
1746
1747         data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1748         collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1749 }
1750
1751 /*
1752  * Return the size of our register map.
1753  */
1754 static int cxgb4vf_get_regs_len(struct net_device *dev)
1755 {
1756         return T4VF_REGMAP_SIZE;
1757 }
1758
1759 /*
1760  * Dump a block of registers, start to end inclusive, into a buffer.
1761  */
1762 static void reg_block_dump(struct adapter *adapter, void *regbuf,
1763                            unsigned int start, unsigned int end)
1764 {
1765         u32 *bp = regbuf + start - T4VF_REGMAP_START;
1766
1767         for ( ; start <= end; start += sizeof(u32)) {
1768                 /*
1769                  * Avoid reading the Mailbox Control register since that
1770                  * can trigger a Mailbox Ownership Arbitration cycle and
1771                  * interfere with communication with the firmware.
1772                  */
1773                 if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1774                         *bp++ = 0xffff;
1775                 else
1776                         *bp++ = t4_read_reg(adapter, start);
1777         }
1778 }
1779
1780 /*
1781  * Copy our entire register map into the provided buffer.
1782  */
1783 static void cxgb4vf_get_regs(struct net_device *dev,
1784                              struct ethtool_regs *regs,
1785                              void *regbuf)
1786 {
1787         struct adapter *adapter = netdev2adap(dev);
1788
1789         regs->version = mk_adap_vers(adapter);
1790
1791         /*
1792          * Fill in register buffer with our register map.
1793          */
1794         memset(regbuf, 0, T4VF_REGMAP_SIZE);
1795
1796         reg_block_dump(adapter, regbuf,
1797                        T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1798                        T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1799         reg_block_dump(adapter, regbuf,
1800                        T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1801                        T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1802
1803         /* T5 adds new registers in the PL Register map.
1804          */
1805         reg_block_dump(adapter, regbuf,
1806                        T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1807                        T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip)
1808                        ? PL_VF_WHOAMI_A : PL_VF_REVISION_A));
1809         reg_block_dump(adapter, regbuf,
1810                        T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1811                        T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1812
1813         reg_block_dump(adapter, regbuf,
1814                        T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1815                        T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1816 }
1817
1818 /*
1819  * Report current Wake On LAN settings.
1820  */
1821 static void cxgb4vf_get_wol(struct net_device *dev,
1822                             struct ethtool_wolinfo *wol)
1823 {
1824         wol->supported = 0;
1825         wol->wolopts = 0;
1826         memset(&wol->sopass, 0, sizeof(wol->sopass));
1827 }
1828
1829 /*
1830  * TCP Segmentation Offload flags which we support.
1831  */
1832 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1833
1834 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1835         .get_link_ksettings     = cxgb4vf_get_link_ksettings,
1836         .get_fecparam           = cxgb4vf_get_fecparam,
1837         .get_drvinfo            = cxgb4vf_get_drvinfo,
1838         .get_msglevel           = cxgb4vf_get_msglevel,
1839         .set_msglevel           = cxgb4vf_set_msglevel,
1840         .get_ringparam          = cxgb4vf_get_ringparam,
1841         .set_ringparam          = cxgb4vf_set_ringparam,
1842         .get_coalesce           = cxgb4vf_get_coalesce,
1843         .set_coalesce           = cxgb4vf_set_coalesce,
1844         .get_pauseparam         = cxgb4vf_get_pauseparam,
1845         .get_link               = ethtool_op_get_link,
1846         .get_strings            = cxgb4vf_get_strings,
1847         .set_phys_id            = cxgb4vf_phys_id,
1848         .get_sset_count         = cxgb4vf_get_sset_count,
1849         .get_ethtool_stats      = cxgb4vf_get_ethtool_stats,
1850         .get_regs_len           = cxgb4vf_get_regs_len,
1851         .get_regs               = cxgb4vf_get_regs,
1852         .get_wol                = cxgb4vf_get_wol,
1853 };
1854
1855 /*
1856  * /sys/kernel/debug/cxgb4vf support code and data.
1857  * ================================================
1858  */
1859
1860 /*
1861  * Show Firmware Mailbox Command/Reply Log
1862  *
1863  * Note that we don't do any locking when dumping the Firmware Mailbox Log so
1864  * it's possible that we can catch things during a log update and therefore
1865  * see partially corrupted log entries.  But i9t's probably Good Enough(tm).
1866  * If we ever decide that we want to make sure that we're dumping a coherent
1867  * log, we'd need to perform locking in the mailbox logging and in
1868  * mboxlog_open() where we'd need to grab the entire mailbox log in one go
1869  * like we do for the Firmware Device Log.  But as stated above, meh ...
1870  */
1871 static int mboxlog_show(struct seq_file *seq, void *v)
1872 {
1873         struct adapter *adapter = seq->private;
1874         struct mbox_cmd_log *log = adapter->mbox_log;
1875         struct mbox_cmd *entry;
1876         int entry_idx, i;
1877
1878         if (v == SEQ_START_TOKEN) {
1879                 seq_printf(seq,
1880                            "%10s  %15s  %5s  %5s  %s\n",
1881                            "Seq#", "Tstamp", "Atime", "Etime",
1882                            "Command/Reply");
1883                 return 0;
1884         }
1885
1886         entry_idx = log->cursor + ((uintptr_t)v - 2);
1887         if (entry_idx >= log->size)
1888                 entry_idx -= log->size;
1889         entry = mbox_cmd_log_entry(log, entry_idx);
1890
1891         /* skip over unused entries */
1892         if (entry->timestamp == 0)
1893                 return 0;
1894
1895         seq_printf(seq, "%10u  %15llu  %5d  %5d",
1896                    entry->seqno, entry->timestamp,
1897                    entry->access, entry->execute);
1898         for (i = 0; i < MBOX_LEN / 8; i++) {
1899                 u64 flit = entry->cmd[i];
1900                 u32 hi = (u32)(flit >> 32);
1901                 u32 lo = (u32)flit;
1902
1903                 seq_printf(seq, "  %08x %08x", hi, lo);
1904         }
1905         seq_puts(seq, "\n");
1906         return 0;
1907 }
1908
1909 static inline void *mboxlog_get_idx(struct seq_file *seq, loff_t pos)
1910 {
1911         struct adapter *adapter = seq->private;
1912         struct mbox_cmd_log *log = adapter->mbox_log;
1913
1914         return ((pos <= log->size) ? (void *)(uintptr_t)(pos + 1) : NULL);
1915 }
1916
1917 static void *mboxlog_start(struct seq_file *seq, loff_t *pos)
1918 {
1919         return *pos ? mboxlog_get_idx(seq, *pos) : SEQ_START_TOKEN;
1920 }
1921
1922 static void *mboxlog_next(struct seq_file *seq, void *v, loff_t *pos)
1923 {
1924         ++*pos;
1925         return mboxlog_get_idx(seq, *pos);
1926 }
1927
1928 static void mboxlog_stop(struct seq_file *seq, void *v)
1929 {
1930 }
1931
1932 static const struct seq_operations mboxlog_seq_ops = {
1933         .start = mboxlog_start,
1934         .next  = mboxlog_next,
1935         .stop  = mboxlog_stop,
1936         .show  = mboxlog_show
1937 };
1938
1939 static int mboxlog_open(struct inode *inode, struct file *file)
1940 {
1941         int res = seq_open(file, &mboxlog_seq_ops);
1942
1943         if (!res) {
1944                 struct seq_file *seq = file->private_data;
1945
1946                 seq->private = inode->i_private;
1947         }
1948         return res;
1949 }
1950
1951 static const struct file_operations mboxlog_fops = {
1952         .owner   = THIS_MODULE,
1953         .open    = mboxlog_open,
1954         .read    = seq_read,
1955         .llseek  = seq_lseek,
1956         .release = seq_release,
1957 };
1958
1959 /*
1960  * Show SGE Queue Set information.  We display QPL Queues Sets per line.
1961  */
1962 #define QPL     4
1963
1964 static int sge_qinfo_show(struct seq_file *seq, void *v)
1965 {
1966         struct adapter *adapter = seq->private;
1967         int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1968         int qs, r = (uintptr_t)v - 1;
1969
1970         if (r)
1971                 seq_putc(seq, '\n');
1972
1973         #define S3(fmt_spec, s, v) \
1974                 do {\
1975                         seq_printf(seq, "%-12s", s); \
1976                         for (qs = 0; qs < n; ++qs) \
1977                                 seq_printf(seq, " %16" fmt_spec, v); \
1978                         seq_putc(seq, '\n'); \
1979                 } while (0)
1980         #define S(s, v)         S3("s", s, v)
1981         #define T(s, v)         S3("u", s, txq[qs].v)
1982         #define R(s, v)         S3("u", s, rxq[qs].v)
1983
1984         if (r < eth_entries) {
1985                 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1986                 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1987                 int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1988
1989                 S("QType:", "Ethernet");
1990                 S("Interface:",
1991                   (rxq[qs].rspq.netdev
1992                    ? rxq[qs].rspq.netdev->name
1993                    : "N/A"));
1994                 S3("d", "Port:",
1995                    (rxq[qs].rspq.netdev
1996                     ? ((struct port_info *)
1997                        netdev_priv(rxq[qs].rspq.netdev))->port_id
1998                     : -1));
1999                 T("TxQ ID:", q.abs_id);
2000                 T("TxQ size:", q.size);
2001                 T("TxQ inuse:", q.in_use);
2002                 T("TxQ PIdx:", q.pidx);
2003                 T("TxQ CIdx:", q.cidx);
2004                 R("RspQ ID:", rspq.abs_id);
2005                 R("RspQ size:", rspq.size);
2006                 R("RspQE size:", rspq.iqe_len);
2007                 S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
2008                 S3("u", "Intr pktcnt:",
2009                    adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
2010                 R("RspQ CIdx:", rspq.cidx);
2011                 R("RspQ Gen:", rspq.gen);
2012                 R("FL ID:", fl.abs_id);
2013                 R("FL size:", fl.size - MIN_FL_RESID);
2014                 R("FL avail:", fl.avail);
2015                 R("FL PIdx:", fl.pidx);
2016                 R("FL CIdx:", fl.cidx);
2017                 return 0;
2018         }
2019
2020         r -= eth_entries;
2021         if (r == 0) {
2022                 const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2023
2024                 seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
2025                 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
2026                 seq_printf(seq, "%-12s %16u\n", "Intr delay:",
2027                            qtimer_val(adapter, evtq));
2028                 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
2029                            adapter->sge.counter_val[evtq->pktcnt_idx]);
2030                 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
2031                 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
2032         } else if (r == 1) {
2033                 const struct sge_rspq *intrq = &adapter->sge.intrq;
2034
2035                 seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
2036                 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
2037                 seq_printf(seq, "%-12s %16u\n", "Intr delay:",
2038                            qtimer_val(adapter, intrq));
2039                 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
2040                            adapter->sge.counter_val[intrq->pktcnt_idx]);
2041                 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
2042                 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
2043         }
2044
2045         #undef R
2046         #undef T
2047         #undef S
2048         #undef S3
2049
2050         return 0;
2051 }
2052
2053 /*
2054  * Return the number of "entries" in our "file".  We group the multi-Queue
2055  * sections with QPL Queue Sets per "entry".  The sections of the output are:
2056  *
2057  *     Ethernet RX/TX Queue Sets
2058  *     Firmware Event Queue
2059  *     Forwarded Interrupt Queue (if in MSI mode)
2060  */
2061 static int sge_queue_entries(const struct adapter *adapter)
2062 {
2063         return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2064                 ((adapter->flags & USING_MSI) != 0);
2065 }
2066
2067 static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
2068 {
2069         int entries = sge_queue_entries(seq->private);
2070
2071         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2072 }
2073
2074 static void sge_queue_stop(struct seq_file *seq, void *v)
2075 {
2076 }
2077
2078 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
2079 {
2080         int entries = sge_queue_entries(seq->private);
2081
2082         ++*pos;
2083         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2084 }
2085
2086 static const struct seq_operations sge_qinfo_seq_ops = {
2087         .start = sge_queue_start,
2088         .next  = sge_queue_next,
2089         .stop  = sge_queue_stop,
2090         .show  = sge_qinfo_show
2091 };
2092
2093 static int sge_qinfo_open(struct inode *inode, struct file *file)
2094 {
2095         int res = seq_open(file, &sge_qinfo_seq_ops);
2096
2097         if (!res) {
2098                 struct seq_file *seq = file->private_data;
2099                 seq->private = inode->i_private;
2100         }
2101         return res;
2102 }
2103
2104 static const struct file_operations sge_qinfo_debugfs_fops = {
2105         .owner   = THIS_MODULE,
2106         .open    = sge_qinfo_open,
2107         .read    = seq_read,
2108         .llseek  = seq_lseek,
2109         .release = seq_release,
2110 };
2111
2112 /*
2113  * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
2114  */
2115 #define QPL     4
2116
2117 static int sge_qstats_show(struct seq_file *seq, void *v)
2118 {
2119         struct adapter *adapter = seq->private;
2120         int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
2121         int qs, r = (uintptr_t)v - 1;
2122
2123         if (r)
2124                 seq_putc(seq, '\n');
2125
2126         #define S3(fmt, s, v) \
2127                 do { \
2128                         seq_printf(seq, "%-16s", s); \
2129                         for (qs = 0; qs < n; ++qs) \
2130                                 seq_printf(seq, " %8" fmt, v); \
2131                         seq_putc(seq, '\n'); \
2132                 } while (0)
2133         #define S(s, v)         S3("s", s, v)
2134
2135         #define T3(fmt, s, v)   S3(fmt, s, txq[qs].v)
2136         #define T(s, v)         T3("lu", s, v)
2137
2138         #define R3(fmt, s, v)   S3(fmt, s, rxq[qs].v)
2139         #define R(s, v)         R3("lu", s, v)
2140
2141         if (r < eth_entries) {
2142                 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
2143                 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
2144                 int n = min(QPL, adapter->sge.ethqsets - QPL * r);
2145
2146                 S("QType:", "Ethernet");
2147                 S("Interface:",
2148                   (rxq[qs].rspq.netdev
2149                    ? rxq[qs].rspq.netdev->name
2150                    : "N/A"));
2151                 R3("u", "RspQNullInts:", rspq.unhandled_irqs);
2152                 R("RxPackets:", stats.pkts);
2153                 R("RxCSO:", stats.rx_cso);
2154                 R("VLANxtract:", stats.vlan_ex);
2155                 R("LROmerged:", stats.lro_merged);
2156                 R("LROpackets:", stats.lro_pkts);
2157                 R("RxDrops:", stats.rx_drops);
2158                 T("TSO:", tso);
2159                 T("TxCSO:", tx_cso);
2160                 T("VLANins:", vlan_ins);
2161                 T("TxQFull:", q.stops);
2162                 T("TxQRestarts:", q.restarts);
2163                 T("TxMapErr:", mapping_err);
2164                 R("FLAllocErr:", fl.alloc_failed);
2165                 R("FLLrgAlcErr:", fl.large_alloc_failed);
2166                 R("FLStarving:", fl.starving);
2167                 return 0;
2168         }
2169
2170         r -= eth_entries;
2171         if (r == 0) {
2172                 const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2173
2174                 seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
2175                 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2176                            evtq->unhandled_irqs);
2177                 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
2178                 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
2179         } else if (r == 1) {
2180                 const struct sge_rspq *intrq = &adapter->sge.intrq;
2181
2182                 seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
2183                 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2184                            intrq->unhandled_irqs);
2185                 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
2186                 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
2187         }
2188
2189         #undef R
2190         #undef T
2191         #undef S
2192         #undef R3
2193         #undef T3
2194         #undef S3
2195
2196         return 0;
2197 }
2198
2199 /*
2200  * Return the number of "entries" in our "file".  We group the multi-Queue
2201  * sections with QPL Queue Sets per "entry".  The sections of the output are:
2202  *
2203  *     Ethernet RX/TX Queue Sets
2204  *     Firmware Event Queue
2205  *     Forwarded Interrupt Queue (if in MSI mode)
2206  */
2207 static int sge_qstats_entries(const struct adapter *adapter)
2208 {
2209         return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2210                 ((adapter->flags & USING_MSI) != 0);
2211 }
2212
2213 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
2214 {
2215         int entries = sge_qstats_entries(seq->private);
2216
2217         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2218 }
2219
2220 static void sge_qstats_stop(struct seq_file *seq, void *v)
2221 {
2222 }
2223
2224 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
2225 {
2226         int entries = sge_qstats_entries(seq->private);
2227
2228         (*pos)++;
2229         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2230 }
2231
2232 static const struct seq_operations sge_qstats_seq_ops = {
2233         .start = sge_qstats_start,
2234         .next  = sge_qstats_next,
2235         .stop  = sge_qstats_stop,
2236         .show  = sge_qstats_show
2237 };
2238
2239 static int sge_qstats_open(struct inode *inode, struct file *file)
2240 {
2241         int res = seq_open(file, &sge_qstats_seq_ops);
2242
2243         if (res == 0) {
2244                 struct seq_file *seq = file->private_data;
2245                 seq->private = inode->i_private;
2246         }
2247         return res;
2248 }
2249
2250 static const struct file_operations sge_qstats_proc_fops = {
2251         .owner   = THIS_MODULE,
2252         .open    = sge_qstats_open,
2253         .read    = seq_read,
2254         .llseek  = seq_lseek,
2255         .release = seq_release,
2256 };
2257
2258 /*
2259  * Show PCI-E SR-IOV Virtual Function Resource Limits.
2260  */
2261 static int resources_show(struct seq_file *seq, void *v)
2262 {
2263         struct adapter *adapter = seq->private;
2264         struct vf_resources *vfres = &adapter->params.vfres;
2265
2266         #define S(desc, fmt, var) \
2267                 seq_printf(seq, "%-60s " fmt "\n", \
2268                            desc " (" #var "):", vfres->var)
2269
2270         S("Virtual Interfaces", "%d", nvi);
2271         S("Egress Queues", "%d", neq);
2272         S("Ethernet Control", "%d", nethctrl);
2273         S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
2274         S("Ingress Queues", "%d", niq);
2275         S("Traffic Class", "%d", tc);
2276         S("Port Access Rights Mask", "%#x", pmask);
2277         S("MAC Address Filters", "%d", nexactf);
2278         S("Firmware Command Read Capabilities", "%#x", r_caps);
2279         S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
2280
2281         #undef S
2282
2283         return 0;
2284 }
2285
2286 static int resources_open(struct inode *inode, struct file *file)
2287 {
2288         return single_open(file, resources_show, inode->i_private);
2289 }
2290
2291 static const struct file_operations resources_proc_fops = {
2292         .owner   = THIS_MODULE,
2293         .open    = resources_open,
2294         .read    = seq_read,
2295         .llseek  = seq_lseek,
2296         .release = single_release,
2297 };
2298
2299 /*
2300  * Show Virtual Interfaces.
2301  */
2302 static int interfaces_show(struct seq_file *seq, void *v)
2303 {
2304         if (v == SEQ_START_TOKEN) {
2305                 seq_puts(seq, "Interface  Port   VIID\n");
2306         } else {
2307                 struct adapter *adapter = seq->private;
2308                 int pidx = (uintptr_t)v - 2;
2309                 struct net_device *dev = adapter->port[pidx];
2310                 struct port_info *pi = netdev_priv(dev);
2311
2312                 seq_printf(seq, "%9s  %4d  %#5x\n",
2313                            dev->name, pi->port_id, pi->viid);
2314         }
2315         return 0;
2316 }
2317
2318 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
2319 {
2320         return pos <= adapter->params.nports
2321                 ? (void *)(uintptr_t)(pos + 1)
2322                 : NULL;
2323 }
2324
2325 static void *interfaces_start(struct seq_file *seq, loff_t *pos)
2326 {
2327         return *pos
2328                 ? interfaces_get_idx(seq->private, *pos)
2329                 : SEQ_START_TOKEN;
2330 }
2331
2332 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
2333 {
2334         (*pos)++;
2335         return interfaces_get_idx(seq->private, *pos);
2336 }
2337
2338 static void interfaces_stop(struct seq_file *seq, void *v)
2339 {
2340 }
2341
2342 static const struct seq_operations interfaces_seq_ops = {
2343         .start = interfaces_start,
2344         .next  = interfaces_next,
2345         .stop  = interfaces_stop,
2346         .show  = interfaces_show
2347 };
2348
2349 static int interfaces_open(struct inode *inode, struct file *file)
2350 {
2351         int res = seq_open(file, &interfaces_seq_ops);
2352
2353         if (res == 0) {
2354                 struct seq_file *seq = file->private_data;
2355                 seq->private = inode->i_private;
2356         }
2357         return res;
2358 }
2359
2360 static const struct file_operations interfaces_proc_fops = {
2361         .owner   = THIS_MODULE,
2362         .open    = interfaces_open,
2363         .read    = seq_read,
2364         .llseek  = seq_lseek,
2365         .release = seq_release,
2366 };
2367
2368 /*
2369  * /sys/kernel/debugfs/cxgb4vf/ files list.
2370  */
2371 struct cxgb4vf_debugfs_entry {
2372         const char *name;               /* name of debugfs node */
2373         umode_t mode;                   /* file system mode */
2374         const struct file_operations *fops;
2375 };
2376
2377 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2378         { "mboxlog",    S_IRUGO, &mboxlog_fops },
2379         { "sge_qinfo",  S_IRUGO, &sge_qinfo_debugfs_fops },
2380         { "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
2381         { "resources",  S_IRUGO, &resources_proc_fops },
2382         { "interfaces", S_IRUGO, &interfaces_proc_fops },
2383 };
2384
2385 /*
2386  * Module and device initialization and cleanup code.
2387  * ==================================================
2388  */
2389
2390 /*
2391  * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2392  * directory (debugfs_root) has already been set up.
2393  */
2394 static int setup_debugfs(struct adapter *adapter)
2395 {
2396         int i;
2397
2398         BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2399
2400         /*
2401          * Debugfs support is best effort.
2402          */
2403         for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2404                 (void)debugfs_create_file(debugfs_files[i].name,
2405                                   debugfs_files[i].mode,
2406                                   adapter->debugfs_root,
2407                                   (void *)adapter,
2408                                   debugfs_files[i].fops);
2409
2410         return 0;
2411 }
2412
2413 /*
2414  * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2415  * it to our caller to tear down the directory (debugfs_root).
2416  */
2417 static void cleanup_debugfs(struct adapter *adapter)
2418 {
2419         BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2420
2421         /*
2422          * Unlike our sister routine cleanup_proc(), we don't need to remove
2423          * individual entries because a call will be made to
2424          * debugfs_remove_recursive().  We just need to clean up any ancillary
2425          * persistent state.
2426          */
2427         /* nothing to do */
2428 }
2429
2430 /* Figure out how many Ports and Queue Sets we can support.  This depends on
2431  * knowing our Virtual Function Resources and may be called a second time if
2432  * we fall back from MSI-X to MSI Interrupt Mode.
2433  */
2434 static void size_nports_qsets(struct adapter *adapter)
2435 {
2436         struct vf_resources *vfres = &adapter->params.vfres;
2437         unsigned int ethqsets, pmask_nports;
2438
2439         /* The number of "ports" which we support is equal to the number of
2440          * Virtual Interfaces with which we've been provisioned.
2441          */
2442         adapter->params.nports = vfres->nvi;
2443         if (adapter->params.nports > MAX_NPORTS) {
2444                 dev_warn(adapter->pdev_dev, "only using %d of %d maximum"
2445                          " allowed virtual interfaces\n", MAX_NPORTS,
2446                          adapter->params.nports);
2447                 adapter->params.nports = MAX_NPORTS;
2448         }
2449
2450         /* We may have been provisioned with more VIs than the number of
2451          * ports we're allowed to access (our Port Access Rights Mask).
2452          * This is obviously a configuration conflict but we don't want to
2453          * crash the kernel or anything silly just because of that.
2454          */
2455         pmask_nports = hweight32(adapter->params.vfres.pmask);
2456         if (pmask_nports < adapter->params.nports) {
2457                 dev_warn(adapter->pdev_dev, "only using %d of %d provisioned"
2458                          " virtual interfaces; limited by Port Access Rights"
2459                          " mask %#x\n", pmask_nports, adapter->params.nports,
2460                          adapter->params.vfres.pmask);
2461                 adapter->params.nports = pmask_nports;
2462         }
2463
2464         /* We need to reserve an Ingress Queue for the Asynchronous Firmware
2465          * Event Queue.  And if we're using MSI Interrupts, we'll also need to
2466          * reserve an Ingress Queue for a Forwarded Interrupts.
2467          *
2468          * The rest of the FL/Intr-capable ingress queues will be matched up
2469          * one-for-one with Ethernet/Control egress queues in order to form
2470          * "Queue Sets" which will be aportioned between the "ports".  For
2471          * each Queue Set, we'll need the ability to allocate two Egress
2472          * Contexts -- one for the Ingress Queue Free List and one for the TX
2473          * Ethernet Queue.
2474          *
2475          * Note that even if we're currently configured to use MSI-X
2476          * Interrupts (module variable msi == MSI_MSIX) we may get downgraded
2477          * to MSI Interrupts if we can't get enough MSI-X Interrupts.  If that
2478          * happens we'll need to adjust things later.
2479          */
2480         ethqsets = vfres->niqflint - 1 - (msi == MSI_MSI);
2481         if (vfres->nethctrl != ethqsets)
2482                 ethqsets = min(vfres->nethctrl, ethqsets);
2483         if (vfres->neq < ethqsets*2)
2484                 ethqsets = vfres->neq/2;
2485         if (ethqsets > MAX_ETH_QSETS)
2486                 ethqsets = MAX_ETH_QSETS;
2487         adapter->sge.max_ethqsets = ethqsets;
2488
2489         if (adapter->sge.max_ethqsets < adapter->params.nports) {
2490                 dev_warn(adapter->pdev_dev, "only using %d of %d available"
2491                          " virtual interfaces (too few Queue Sets)\n",
2492                          adapter->sge.max_ethqsets, adapter->params.nports);
2493                 adapter->params.nports = adapter->sge.max_ethqsets;
2494         }
2495 }
2496
2497 /*
2498  * Perform early "adapter" initialization.  This is where we discover what
2499  * adapter parameters we're going to be using and initialize basic adapter
2500  * hardware support.
2501  */
2502 static int adap_init0(struct adapter *adapter)
2503 {
2504         struct sge_params *sge_params = &adapter->params.sge;
2505         struct sge *s = &adapter->sge;
2506         int err;
2507         u32 param, val = 0;
2508
2509         /*
2510          * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2511          * 2.6.31 and later we can't call pci_reset_function() in order to
2512          * issue an FLR because of a self- deadlock on the device semaphore.
2513          * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2514          * cases where they're needed -- for instance, some versions of KVM
2515          * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2516          * use the firmware based reset in order to reset any per function
2517          * state.
2518          */
2519         err = t4vf_fw_reset(adapter);
2520         if (err < 0) {
2521                 dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2522                 return err;
2523         }
2524
2525         /*
2526          * Grab basic operational parameters.  These will predominantly have
2527          * been set up by the Physical Function Driver or will be hard coded
2528          * into the adapter.  We just have to live with them ...  Note that
2529          * we _must_ get our VPD parameters before our SGE parameters because
2530          * we need to know the adapter's core clock from the VPD in order to
2531          * properly decode the SGE Timer Values.
2532          */
2533         err = t4vf_get_dev_params(adapter);
2534         if (err) {
2535                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2536                         " device parameters: err=%d\n", err);
2537                 return err;
2538         }
2539         err = t4vf_get_vpd_params(adapter);
2540         if (err) {
2541                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2542                         " VPD parameters: err=%d\n", err);
2543                 return err;
2544         }
2545         err = t4vf_get_sge_params(adapter);
2546         if (err) {
2547                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2548                         " SGE parameters: err=%d\n", err);
2549                 return err;
2550         }
2551         err = t4vf_get_rss_glb_config(adapter);
2552         if (err) {
2553                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2554                         " RSS parameters: err=%d\n", err);
2555                 return err;
2556         }
2557         if (adapter->params.rss.mode !=
2558             FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2559                 dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2560                         " mode %d\n", adapter->params.rss.mode);
2561                 return -EINVAL;
2562         }
2563         err = t4vf_sge_init(adapter);
2564         if (err) {
2565                 dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2566                         " err=%d\n", err);
2567                 return err;
2568         }
2569
2570         /* If we're running on newer firmware, let it know that we're
2571          * prepared to deal with encapsulated CPL messages.  Older
2572          * firmware won't understand this and we'll just get
2573          * unencapsulated messages ...
2574          */
2575         param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
2576                 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP);
2577         val = 1;
2578         (void) t4vf_set_params(adapter, 1, &param, &val);
2579
2580         /*
2581          * Retrieve our RX interrupt holdoff timer values and counter
2582          * threshold values from the SGE parameters.
2583          */
2584         s->timer_val[0] = core_ticks_to_us(adapter,
2585                 TIMERVALUE0_G(sge_params->sge_timer_value_0_and_1));
2586         s->timer_val[1] = core_ticks_to_us(adapter,
2587                 TIMERVALUE1_G(sge_params->sge_timer_value_0_and_1));
2588         s->timer_val[2] = core_ticks_to_us(adapter,
2589                 TIMERVALUE0_G(sge_params->sge_timer_value_2_and_3));
2590         s->timer_val[3] = core_ticks_to_us(adapter,
2591                 TIMERVALUE1_G(sge_params->sge_timer_value_2_and_3));
2592         s->timer_val[4] = core_ticks_to_us(adapter,
2593                 TIMERVALUE0_G(sge_params->sge_timer_value_4_and_5));
2594         s->timer_val[5] = core_ticks_to_us(adapter,
2595                 TIMERVALUE1_G(sge_params->sge_timer_value_4_and_5));
2596
2597         s->counter_val[0] = THRESHOLD_0_G(sge_params->sge_ingress_rx_threshold);
2598         s->counter_val[1] = THRESHOLD_1_G(sge_params->sge_ingress_rx_threshold);
2599         s->counter_val[2] = THRESHOLD_2_G(sge_params->sge_ingress_rx_threshold);
2600         s->counter_val[3] = THRESHOLD_3_G(sge_params->sge_ingress_rx_threshold);
2601
2602         /*
2603          * Grab our Virtual Interface resource allocation, extract the
2604          * features that we're interested in and do a bit of sanity testing on
2605          * what we discover.
2606          */
2607         err = t4vf_get_vfres(adapter);
2608         if (err) {
2609                 dev_err(adapter->pdev_dev, "unable to get virtual interface"
2610                         " resources: err=%d\n", err);
2611                 return err;
2612         }
2613
2614         /* Check for various parameter sanity issues */
2615         if (adapter->params.vfres.pmask == 0) {
2616                 dev_err(adapter->pdev_dev, "no port access configured\n"
2617                         "usable!\n");
2618                 return -EINVAL;
2619         }
2620         if (adapter->params.vfres.nvi == 0) {
2621                 dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2622                         "usable!\n");
2623                 return -EINVAL;
2624         }
2625
2626         /* Initialize nports and max_ethqsets now that we have our Virtual
2627          * Function Resources.
2628          */
2629         size_nports_qsets(adapter);
2630
2631         return 0;
2632 }
2633
2634 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2635                              u8 pkt_cnt_idx, unsigned int size,
2636                              unsigned int iqe_size)
2637 {
2638         rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
2639                              (pkt_cnt_idx < SGE_NCOUNTERS ?
2640                               QINTR_CNT_EN_F : 0));
2641         rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2642                             ? pkt_cnt_idx
2643                             : 0);
2644         rspq->iqe_len = iqe_size;
2645         rspq->size = size;
2646 }
2647
2648 /*
2649  * Perform default configuration of DMA queues depending on the number and
2650  * type of ports we found and the number of available CPUs.  Most settings can
2651  * be modified by the admin via ethtool and cxgbtool prior to the adapter
2652  * being brought up for the first time.
2653  */
2654 static void cfg_queues(struct adapter *adapter)
2655 {
2656         struct sge *s = &adapter->sge;
2657         int q10g, n10g, qidx, pidx, qs;
2658         size_t iqe_size;
2659
2660         /*
2661          * We should not be called till we know how many Queue Sets we can
2662          * support.  In particular, this means that we need to know what kind
2663          * of interrupts we'll be using ...
2664          */
2665         BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2666
2667         /*
2668          * Count the number of 10GbE Virtual Interfaces that we have.
2669          */
2670         n10g = 0;
2671         for_each_port(adapter, pidx)
2672                 n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2673
2674         /*
2675          * We default to 1 queue per non-10G port and up to # of cores queues
2676          * per 10G port.
2677          */
2678         if (n10g == 0)
2679                 q10g = 0;
2680         else {
2681                 int n1g = (adapter->params.nports - n10g);
2682                 q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2683                 if (q10g > num_online_cpus())
2684                         q10g = num_online_cpus();
2685         }
2686
2687         /*
2688          * Allocate the "Queue Sets" to the various Virtual Interfaces.
2689          * The layout will be established in setup_sge_queues() when the
2690          * adapter is brough up for the first time.
2691          */
2692         qidx = 0;
2693         for_each_port(adapter, pidx) {
2694                 struct port_info *pi = adap2pinfo(adapter, pidx);
2695
2696                 pi->first_qset = qidx;
2697                 pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
2698                 qidx += pi->nqsets;
2699         }
2700         s->ethqsets = qidx;
2701
2702         /*
2703          * The Ingress Queue Entry Size for our various Response Queues needs
2704          * to be big enough to accommodate the largest message we can receive
2705          * from the chip/firmware; which is 64 bytes ...
2706          */
2707         iqe_size = 64;
2708
2709         /*
2710          * Set up default Queue Set parameters ...  Start off with the
2711          * shortest interrupt holdoff timer.
2712          */
2713         for (qs = 0; qs < s->max_ethqsets; qs++) {
2714                 struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2715                 struct sge_eth_txq *txq = &s->ethtxq[qs];
2716
2717                 init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2718                 rxq->fl.size = 72;
2719                 txq->q.size = 1024;
2720         }
2721
2722         /*
2723          * The firmware event queue is used for link state changes and
2724          * notifications of TX DMA completions.
2725          */
2726         init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2727
2728         /*
2729          * The forwarded interrupt queue is used when we're in MSI interrupt
2730          * mode.  In this mode all interrupts associated with RX queues will
2731          * be forwarded to a single queue which we'll associate with our MSI
2732          * interrupt vector.  The messages dropped in the forwarded interrupt
2733          * queue will indicate which ingress queue needs servicing ...  This
2734          * queue needs to be large enough to accommodate all of the ingress
2735          * queues which are forwarding their interrupt (+1 to prevent the PIDX
2736          * from equalling the CIDX if every ingress queue has an outstanding
2737          * interrupt).  The queue doesn't need to be any larger because no
2738          * ingress queue will ever have more than one outstanding interrupt at
2739          * any time ...
2740          */
2741         init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2742                   iqe_size);
2743 }
2744
2745 /*
2746  * Reduce the number of Ethernet queues across all ports to at most n.
2747  * n provides at least one queue per port.
2748  */
2749 static void reduce_ethqs(struct adapter *adapter, int n)
2750 {
2751         int i;
2752         struct port_info *pi;
2753
2754         /*
2755          * While we have too many active Ether Queue Sets, interate across the
2756          * "ports" and reduce their individual Queue Set allocations.
2757          */
2758         BUG_ON(n < adapter->params.nports);
2759         while (n < adapter->sge.ethqsets)
2760                 for_each_port(adapter, i) {
2761                         pi = adap2pinfo(adapter, i);
2762                         if (pi->nqsets > 1) {
2763                                 pi->nqsets--;
2764                                 adapter->sge.ethqsets--;
2765                                 if (adapter->sge.ethqsets <= n)
2766                                         break;
2767                         }
2768                 }
2769
2770         /*
2771          * Reassign the starting Queue Sets for each of the "ports" ...
2772          */
2773         n = 0;
2774         for_each_port(adapter, i) {
2775                 pi = adap2pinfo(adapter, i);
2776                 pi->first_qset = n;
2777                 n += pi->nqsets;
2778         }
2779 }
2780
2781 /*
2782  * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2783  * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2784  * need.  Minimally we need one for every Virtual Interface plus those needed
2785  * for our "extras".  Note that this process may lower the maximum number of
2786  * allowed Queue Sets ...
2787  */
2788 static int enable_msix(struct adapter *adapter)
2789 {
2790         int i, want, need, nqsets;
2791         struct msix_entry entries[MSIX_ENTRIES];
2792         struct sge *s = &adapter->sge;
2793
2794         for (i = 0; i < MSIX_ENTRIES; ++i)
2795                 entries[i].entry = i;
2796
2797         /*
2798          * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2799          * plus those needed for our "extras" (for example, the firmware
2800          * message queue).  We _need_ at least one "Queue Set" per Virtual
2801          * Interface plus those needed for our "extras".  So now we get to see
2802          * if the song is right ...
2803          */
2804         want = s->max_ethqsets + MSIX_EXTRAS;
2805         need = adapter->params.nports + MSIX_EXTRAS;
2806
2807         want = pci_enable_msix_range(adapter->pdev, entries, need, want);
2808         if (want < 0)
2809                 return want;
2810
2811         nqsets = want - MSIX_EXTRAS;
2812         if (nqsets < s->max_ethqsets) {
2813                 dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2814                          " for %d Queue Sets\n", nqsets);
2815                 s->max_ethqsets = nqsets;
2816                 if (nqsets < s->ethqsets)
2817                         reduce_ethqs(adapter, nqsets);
2818         }
2819         for (i = 0; i < want; ++i)
2820                 adapter->msix_info[i].vec = entries[i].vector;
2821
2822         return 0;
2823 }
2824
2825 static const struct net_device_ops cxgb4vf_netdev_ops   = {
2826         .ndo_open               = cxgb4vf_open,
2827         .ndo_stop               = cxgb4vf_stop,
2828         .ndo_start_xmit         = t4vf_eth_xmit,
2829         .ndo_get_stats          = cxgb4vf_get_stats,
2830         .ndo_set_rx_mode        = cxgb4vf_set_rxmode,
2831         .ndo_set_mac_address    = cxgb4vf_set_mac_addr,
2832         .ndo_validate_addr      = eth_validate_addr,
2833         .ndo_do_ioctl           = cxgb4vf_do_ioctl,
2834         .ndo_change_mtu         = cxgb4vf_change_mtu,
2835         .ndo_fix_features       = cxgb4vf_fix_features,
2836         .ndo_set_features       = cxgb4vf_set_features,
2837 #ifdef CONFIG_NET_POLL_CONTROLLER
2838         .ndo_poll_controller    = cxgb4vf_poll_controller,
2839 #endif
2840 };
2841
2842 /*
2843  * "Probe" a device: initialize a device and construct all kernel and driver
2844  * state needed to manage the device.  This routine is called "init_one" in
2845  * the PF Driver ...
2846  */
2847 static int cxgb4vf_pci_probe(struct pci_dev *pdev,
2848                              const struct pci_device_id *ent)
2849 {
2850         int pci_using_dac;
2851         int err, pidx;
2852         unsigned int pmask;
2853         struct adapter *adapter;
2854         struct port_info *pi;
2855         struct net_device *netdev;
2856         unsigned int pf;
2857
2858         /*
2859          * Print our driver banner the first time we're called to initialize a
2860          * device.
2861          */
2862         pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
2863
2864         /*
2865          * Initialize generic PCI device state.
2866          */
2867         err = pci_enable_device(pdev);
2868         if (err) {
2869                 dev_err(&pdev->dev, "cannot enable PCI device\n");
2870                 return err;
2871         }
2872
2873         /*
2874          * Reserve PCI resources for the device.  If we can't get them some
2875          * other driver may have already claimed the device ...
2876          */
2877         err = pci_request_regions(pdev, KBUILD_MODNAME);
2878         if (err) {
2879                 dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2880                 goto err_disable_device;
2881         }
2882
2883         /*
2884          * Set up our DMA mask: try for 64-bit address masking first and
2885          * fall back to 32-bit if we can't get 64 bits ...
2886          */
2887         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2888         if (err == 0) {
2889                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2890                 if (err) {
2891                         dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2892                                 " coherent allocations\n");
2893                         goto err_release_regions;
2894                 }
2895                 pci_using_dac = 1;
2896         } else {
2897                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2898                 if (err != 0) {
2899                         dev_err(&pdev->dev, "no usable DMA configuration\n");
2900                         goto err_release_regions;
2901                 }
2902                 pci_using_dac = 0;
2903         }
2904
2905         /*
2906          * Enable bus mastering for the device ...
2907          */
2908         pci_set_master(pdev);
2909
2910         /*
2911          * Allocate our adapter data structure and attach it to the device.
2912          */
2913         adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2914         if (!adapter) {
2915                 err = -ENOMEM;
2916                 goto err_release_regions;
2917         }
2918         pci_set_drvdata(pdev, adapter);
2919         adapter->pdev = pdev;
2920         adapter->pdev_dev = &pdev->dev;
2921
2922         adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
2923                                     (sizeof(struct mbox_cmd) *
2924                                      T4VF_OS_LOG_MBOX_CMDS),
2925                                     GFP_KERNEL);
2926         if (!adapter->mbox_log) {
2927                 err = -ENOMEM;
2928                 goto err_free_adapter;
2929         }
2930         adapter->mbox_log->size = T4VF_OS_LOG_MBOX_CMDS;
2931
2932         /*
2933          * Initialize SMP data synchronization resources.
2934          */
2935         spin_lock_init(&adapter->stats_lock);
2936         spin_lock_init(&adapter->mbox_lock);
2937         INIT_LIST_HEAD(&adapter->mlist.list);
2938
2939         /*
2940          * Map our I/O registers in BAR0.
2941          */
2942         adapter->regs = pci_ioremap_bar(pdev, 0);
2943         if (!adapter->regs) {
2944                 dev_err(&pdev->dev, "cannot map device registers\n");
2945                 err = -ENOMEM;
2946                 goto err_free_adapter;
2947         }
2948
2949         /* Wait for the device to become ready before proceeding ...
2950          */
2951         err = t4vf_prep_adapter(adapter);
2952         if (err) {
2953                 dev_err(adapter->pdev_dev, "device didn't become ready:"
2954                         " err=%d\n", err);
2955                 goto err_unmap_bar0;
2956         }
2957
2958         /* For T5 and later we want to use the new BAR-based User Doorbells,
2959          * so we need to map BAR2 here ...
2960          */
2961         if (!is_t4(adapter->params.chip)) {
2962                 adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
2963                                            pci_resource_len(pdev, 2));
2964                 if (!adapter->bar2) {
2965                         dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n");
2966                         err = -ENOMEM;
2967                         goto err_unmap_bar0;
2968                 }
2969         }
2970         /*
2971          * Initialize adapter level features.
2972          */
2973         adapter->name = pci_name(pdev);
2974         adapter->msg_enable = DFLT_MSG_ENABLE;
2975
2976         /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
2977          * Ingress Packet Data to Free List Buffers in order to allow for
2978          * chipset performance optimizations between the Root Complex and
2979          * Memory Controllers.  (Messages to the associated Ingress Queue
2980          * notifying new Packet Placement in the Free Lists Buffers will be
2981          * send without the Relaxed Ordering Attribute thus guaranteeing that
2982          * all preceding PCIe Transaction Layer Packets will be processed
2983          * first.)  But some Root Complexes have various issues with Upstream
2984          * Transaction Layer Packets with the Relaxed Ordering Attribute set.
2985          * The PCIe devices which under the Root Complexes will be cleared the
2986          * Relaxed Ordering bit in the configuration space, So we check our
2987          * PCIe configuration space to see if it's flagged with advice against
2988          * using Relaxed Ordering.
2989          */
2990         if (!pcie_relaxed_ordering_enabled(pdev))
2991                 adapter->flags |= ROOT_NO_RELAXED_ORDERING;
2992
2993         err = adap_init0(adapter);
2994         if (err)
2995                 goto err_unmap_bar;
2996
2997         /*
2998          * Allocate our "adapter ports" and stitch everything together.
2999          */
3000         pmask = adapter->params.vfres.pmask;
3001         pf = t4vf_get_pf_from_vf(adapter);
3002         for_each_port(adapter, pidx) {
3003                 int port_id, viid;
3004                 u8 mac[ETH_ALEN];
3005                 unsigned int naddr = 1;
3006
3007                 /*
3008                  * We simplistically allocate our virtual interfaces
3009                  * sequentially across the port numbers to which we have
3010                  * access rights.  This should be configurable in some manner
3011                  * ...
3012                  */
3013                 if (pmask == 0)
3014                         break;
3015                 port_id = ffs(pmask) - 1;
3016                 pmask &= ~(1 << port_id);
3017                 viid = t4vf_alloc_vi(adapter, port_id);
3018                 if (viid < 0) {
3019                         dev_err(&pdev->dev, "cannot allocate VI for port %d:"
3020                                 " err=%d\n", port_id, viid);
3021                         err = viid;
3022                         goto err_free_dev;
3023                 }
3024
3025                 /*
3026                  * Allocate our network device and stitch things together.
3027                  */
3028                 netdev = alloc_etherdev_mq(sizeof(struct port_info),
3029                                            MAX_PORT_QSETS);
3030                 if (netdev == NULL) {
3031                         t4vf_free_vi(adapter, viid);
3032                         err = -ENOMEM;
3033                         goto err_free_dev;
3034                 }
3035                 adapter->port[pidx] = netdev;
3036                 SET_NETDEV_DEV(netdev, &pdev->dev);
3037                 pi = netdev_priv(netdev);
3038                 pi->adapter = adapter;
3039                 pi->pidx = pidx;
3040                 pi->port_id = port_id;
3041                 pi->viid = viid;
3042
3043                 /*
3044                  * Initialize the starting state of our "port" and register
3045                  * it.
3046                  */
3047                 pi->xact_addr_filt = -1;
3048                 netif_carrier_off(netdev);
3049                 netdev->irq = pdev->irq;
3050
3051                 netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
3052                         NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3053                         NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM;
3054                 netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
3055                         NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3056                         NETIF_F_HIGHDMA;
3057                 netdev->features = netdev->hw_features |
3058                                    NETIF_F_HW_VLAN_CTAG_TX;
3059                 if (pci_using_dac)
3060                         netdev->features |= NETIF_F_HIGHDMA;
3061
3062                 netdev->priv_flags |= IFF_UNICAST_FLT;
3063                 netdev->min_mtu = 81;
3064                 netdev->max_mtu = ETH_MAX_MTU;
3065
3066                 netdev->netdev_ops = &cxgb4vf_netdev_ops;
3067                 netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
3068                 netdev->dev_port = pi->port_id;
3069
3070                 /*
3071                  * Initialize the hardware/software state for the port.
3072                  */
3073                 err = t4vf_port_init(adapter, pidx);
3074                 if (err) {
3075                         dev_err(&pdev->dev, "cannot initialize port %d\n",
3076                                 pidx);
3077                         goto err_free_dev;
3078                 }
3079
3080                 err = t4vf_get_vf_mac_acl(adapter, pf, &naddr, mac);
3081                 if (err) {
3082                         dev_err(&pdev->dev,
3083                                 "unable to determine MAC ACL address, "
3084                                 "continuing anyway.. (status %d)\n", err);
3085                 } else if (naddr && adapter->params.vfres.nvi == 1) {
3086                         struct sockaddr addr;
3087
3088                         ether_addr_copy(addr.sa_data, mac);
3089                         err = cxgb4vf_set_mac_addr(netdev, &addr);
3090                         if (err) {
3091                                 dev_err(&pdev->dev,
3092                                         "unable to set MAC address %pM\n",
3093                                         mac);
3094                                 goto err_free_dev;
3095                         }
3096                         dev_info(&pdev->dev,
3097                                  "Using assigned MAC ACL: %pM\n", mac);
3098                 }
3099         }
3100
3101         /* See what interrupts we'll be using.  If we've been configured to
3102          * use MSI-X interrupts, try to enable them but fall back to using
3103          * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
3104          * get MSI interrupts we bail with the error.
3105          */
3106         if (msi == MSI_MSIX && enable_msix(adapter) == 0)
3107                 adapter->flags |= USING_MSIX;
3108         else {
3109                 if (msi == MSI_MSIX) {
3110                         dev_info(adapter->pdev_dev,
3111                                  "Unable to use MSI-X Interrupts; falling "
3112                                  "back to MSI Interrupts\n");
3113
3114                         /* We're going to need a Forwarded Interrupt Queue so
3115                          * that may cut into how many Queue Sets we can
3116                          * support.
3117                          */
3118                         msi = MSI_MSI;
3119                         size_nports_qsets(adapter);
3120                 }
3121                 err = pci_enable_msi(pdev);
3122                 if (err) {
3123                         dev_err(&pdev->dev, "Unable to allocate MSI Interrupts;"
3124                                 " err=%d\n", err);
3125                         goto err_free_dev;
3126                 }
3127                 adapter->flags |= USING_MSI;
3128         }
3129
3130         /* Now that we know how many "ports" we have and what interrupt
3131          * mechanism we're going to use, we can configure our queue resources.
3132          */
3133         cfg_queues(adapter);
3134
3135         /*
3136          * The "card" is now ready to go.  If any errors occur during device
3137          * registration we do not fail the whole "card" but rather proceed
3138          * only with the ports we manage to register successfully.  However we
3139          * must register at least one net device.
3140          */
3141         for_each_port(adapter, pidx) {
3142                 struct port_info *pi = netdev_priv(adapter->port[pidx]);
3143                 netdev = adapter->port[pidx];
3144                 if (netdev == NULL)
3145                         continue;
3146
3147                 netif_set_real_num_tx_queues(netdev, pi->nqsets);
3148                 netif_set_real_num_rx_queues(netdev, pi->nqsets);
3149
3150                 err = register_netdev(netdev);
3151                 if (err) {
3152                         dev_warn(&pdev->dev, "cannot register net device %s,"
3153                                  " skipping\n", netdev->name);
3154                         continue;
3155                 }
3156
3157                 set_bit(pidx, &adapter->registered_device_map);
3158         }
3159         if (adapter->registered_device_map == 0) {
3160                 dev_err(&pdev->dev, "could not register any net devices\n");
3161                 goto err_disable_interrupts;
3162         }
3163
3164         /*
3165          * Set up our debugfs entries.
3166          */
3167         if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
3168                 adapter->debugfs_root =
3169                         debugfs_create_dir(pci_name(pdev),
3170                                            cxgb4vf_debugfs_root);
3171                 if (IS_ERR_OR_NULL(adapter->debugfs_root))
3172                         dev_warn(&pdev->dev, "could not create debugfs"
3173                                  " directory");
3174                 else
3175                         setup_debugfs(adapter);
3176         }
3177
3178         /*
3179          * Print a short notice on the existence and configuration of the new
3180          * VF network device ...
3181          */
3182         for_each_port(adapter, pidx) {
3183                 dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
3184                          adapter->port[pidx]->name,
3185                          (adapter->flags & USING_MSIX) ? "MSI-X" :
3186                          (adapter->flags & USING_MSI)  ? "MSI" : "");
3187         }
3188
3189         /*
3190          * Return success!
3191          */
3192         return 0;
3193
3194         /*
3195          * Error recovery and exit code.  Unwind state that's been created
3196          * so far and return the error.
3197          */
3198 err_disable_interrupts:
3199         if (adapter->flags & USING_MSIX) {
3200                 pci_disable_msix(adapter->pdev);
3201                 adapter->flags &= ~USING_MSIX;
3202         } else if (adapter->flags & USING_MSI) {
3203                 pci_disable_msi(adapter->pdev);
3204                 adapter->flags &= ~USING_MSI;
3205         }
3206
3207 err_free_dev:
3208         for_each_port(adapter, pidx) {
3209                 netdev = adapter->port[pidx];
3210                 if (netdev == NULL)
3211                         continue;
3212                 pi = netdev_priv(netdev);
3213                 t4vf_free_vi(adapter, pi->viid);
3214                 if (test_bit(pidx, &adapter->registered_device_map))
3215                         unregister_netdev(netdev);
3216                 free_netdev(netdev);
3217         }
3218
3219 err_unmap_bar:
3220         if (!is_t4(adapter->params.chip))
3221                 iounmap(adapter->bar2);
3222
3223 err_unmap_bar0:
3224         iounmap(adapter->regs);
3225
3226 err_free_adapter:
3227         kfree(adapter->mbox_log);
3228         kfree(adapter);
3229
3230 err_release_regions:
3231         pci_release_regions(pdev);
3232         pci_clear_master(pdev);
3233
3234 err_disable_device:
3235         pci_disable_device(pdev);
3236
3237         return err;
3238 }
3239
3240 /*
3241  * "Remove" a device: tear down all kernel and driver state created in the
3242  * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
3243  * that this is called "remove_one" in the PF Driver.)
3244  */
3245 static void cxgb4vf_pci_remove(struct pci_dev *pdev)
3246 {
3247         struct adapter *adapter = pci_get_drvdata(pdev);
3248
3249         /*
3250          * Tear down driver state associated with device.
3251          */
3252         if (adapter) {
3253                 int pidx;
3254
3255                 /*
3256                  * Stop all of our activity.  Unregister network port,
3257                  * disable interrupts, etc.
3258                  */
3259                 for_each_port(adapter, pidx)
3260                         if (test_bit(pidx, &adapter->registered_device_map))
3261                                 unregister_netdev(adapter->port[pidx]);
3262                 t4vf_sge_stop(adapter);
3263                 if (adapter->flags & USING_MSIX) {
3264                         pci_disable_msix(adapter->pdev);
3265                         adapter->flags &= ~USING_MSIX;
3266                 } else if (adapter->flags & USING_MSI) {
3267                         pci_disable_msi(adapter->pdev);
3268                         adapter->flags &= ~USING_MSI;
3269                 }
3270
3271                 /*
3272                  * Tear down our debugfs entries.
3273                  */
3274                 if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
3275                         cleanup_debugfs(adapter);
3276                         debugfs_remove_recursive(adapter->debugfs_root);
3277                 }
3278
3279                 /*
3280                  * Free all of the various resources which we've acquired ...
3281                  */
3282                 t4vf_free_sge_resources(adapter);
3283                 for_each_port(adapter, pidx) {
3284                         struct net_device *netdev = adapter->port[pidx];
3285                         struct port_info *pi;
3286
3287                         if (netdev == NULL)
3288                                 continue;
3289
3290                         pi = netdev_priv(netdev);
3291                         t4vf_free_vi(adapter, pi->viid);
3292                         free_netdev(netdev);
3293                 }
3294                 iounmap(adapter->regs);
3295                 if (!is_t4(adapter->params.chip))
3296                         iounmap(adapter->bar2);
3297                 kfree(adapter->mbox_log);
3298                 kfree(adapter);
3299         }
3300
3301         /*
3302          * Disable the device and release its PCI resources.
3303          */
3304         pci_disable_device(pdev);
3305         pci_clear_master(pdev);
3306         pci_release_regions(pdev);
3307 }
3308
3309 /*
3310  * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
3311  * delivery.
3312  */
3313 static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
3314 {
3315         struct adapter *adapter;
3316         int pidx;
3317
3318         adapter = pci_get_drvdata(pdev);
3319         if (!adapter)
3320                 return;
3321
3322         /* Disable all Virtual Interfaces.  This will shut down the
3323          * delivery of all ingress packets into the chip for these
3324          * Virtual Interfaces.
3325          */
3326         for_each_port(adapter, pidx)
3327                 if (test_bit(pidx, &adapter->registered_device_map))
3328                         unregister_netdev(adapter->port[pidx]);
3329
3330         /* Free up all Queues which will prevent further DMA and
3331          * Interrupts allowing various internal pathways to drain.
3332          */
3333         t4vf_sge_stop(adapter);
3334         if (adapter->flags & USING_MSIX) {
3335                 pci_disable_msix(adapter->pdev);
3336                 adapter->flags &= ~USING_MSIX;
3337         } else if (adapter->flags & USING_MSI) {
3338                 pci_disable_msi(adapter->pdev);
3339                 adapter->flags &= ~USING_MSI;
3340         }
3341
3342         /*
3343          * Free up all Queues which will prevent further DMA and
3344          * Interrupts allowing various internal pathways to drain.
3345          */
3346         t4vf_free_sge_resources(adapter);
3347         pci_set_drvdata(pdev, NULL);
3348 }
3349
3350 /* Macros needed to support the PCI Device ID Table ...
3351  */
3352 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
3353         static const struct pci_device_id cxgb4vf_pci_tbl[] = {
3354 #define CH_PCI_DEVICE_ID_FUNCTION       0x8
3355
3356 #define CH_PCI_ID_TABLE_ENTRY(devid) \
3357                 { PCI_VDEVICE(CHELSIO, (devid)), 0 }
3358
3359 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } }
3360
3361 #include "../cxgb4/t4_pci_id_tbl.h"
3362
3363 MODULE_DESCRIPTION(DRV_DESC);
3364 MODULE_AUTHOR("Chelsio Communications");
3365 MODULE_LICENSE("Dual BSD/GPL");
3366 MODULE_VERSION(DRV_VERSION);
3367 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
3368
3369 static struct pci_driver cxgb4vf_driver = {
3370         .name           = KBUILD_MODNAME,
3371         .id_table       = cxgb4vf_pci_tbl,
3372         .probe          = cxgb4vf_pci_probe,
3373         .remove         = cxgb4vf_pci_remove,
3374         .shutdown       = cxgb4vf_pci_shutdown,
3375 };
3376
3377 /*
3378  * Initialize global driver state.
3379  */
3380 static int __init cxgb4vf_module_init(void)
3381 {
3382         int ret;
3383
3384         /*
3385          * Vet our module parameters.
3386          */
3387         if (msi != MSI_MSIX && msi != MSI_MSI) {
3388                 pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n",
3389                         msi, MSI_MSIX, MSI_MSI);
3390                 return -EINVAL;
3391         }
3392
3393         /* Debugfs support is optional, just warn if this fails */
3394         cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3395         if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3396                 pr_warn("could not create debugfs entry, continuing\n");
3397
3398         ret = pci_register_driver(&cxgb4vf_driver);
3399         if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3400                 debugfs_remove(cxgb4vf_debugfs_root);
3401         return ret;
3402 }
3403
3404 /*
3405  * Tear down global driver state.
3406  */
3407 static void __exit cxgb4vf_module_exit(void)
3408 {
3409         pci_unregister_driver(&cxgb4vf_driver);
3410         debugfs_remove(cxgb4vf_debugfs_root);
3411 }
3412
3413 module_init(cxgb4vf_module_init);
3414 module_exit(cxgb4vf_module_exit);