1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/errno.h>
  28 #include <sys/sysmacros.h>
  29 #include <sys/param.h>
  30 #include <sys/machsystm.h>
  31 #include <sys/stream.h>
  32 #include <sys/strsubr.h>
  33 #include <sys/kmem.h>
  34 #include <sys/conf.h>
  35 #include <sys/devops.h>
  36 #include <sys/ksynch.h>
  37 #include <sys/stat.h>
  38 #include <sys/modctl.h>
  39 #include <sys/debug.h>
  40 #include <sys/ethernet.h>
  41 #include <sys/ddi.h>
  42 #include <sys/sunddi.h>
  43 #include <sys/strsun.h>
  44 #include <sys/note.h>
  45 #include <sys/mac_provider.h>
  46 #include <sys/mac_ether.h>
  47 #include <sys/ldc.h>
  48 #include <sys/mach_descrip.h>
  49 #include <sys/mdeg.h>
  50 #include <net/if.h>
  51 #include <sys/vnet.h>
  52 #include <sys/vio_mailbox.h>
  53 #include <sys/vio_common.h>
  54 #include <sys/vnet_common.h>
  55 #include <sys/vnet_mailbox.h>
  56 #include <sys/vio_util.h>
  57 #include <sys/vnet_gen.h>
  58 #include <sys/atomic.h>
  59 #include <sys/callb.h>
  60 #include <sys/sdt.h>
  61 #include <sys/intr.h>
  62 #include <sys/pattr.h>
  63 #include <sys/vlan.h>
  64 
  65 /*
  66  * Implementation of the mac provider functionality for vnet using the
  67  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
  68  */
  69 
  70 /* Entry Points */
  71 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
  72     const uint8_t *macaddr, void **vgenhdl);
  73 int vgen_init_mdeg(void *arg);
  74 void vgen_uninit(void *arg);
  75 int vgen_dds_tx(void *arg, void *dmsg);
  76 int vgen_enable_intr(void *arg);
  77 int vgen_disable_intr(void *arg);
  78 mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup);
  79 static int vgen_start(void *arg);
  80 static void vgen_stop(void *arg);
  81 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
  82 static int vgen_multicst(void *arg, boolean_t add,
  83         const uint8_t *mca);
  84 static int vgen_promisc(void *arg, boolean_t on);
  85 static int vgen_unicst(void *arg, const uint8_t *mca);
  86 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
  87 static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp);
  88 #ifdef  VNET_IOC_DEBUG
  89 static int vgen_force_link_state(vgen_port_t *portp, int link_state);
  90 #endif
  91 
  92 /* Port/LDC Configuration */
  93 static int vgen_read_mdprops(vgen_t *vgenp);
  94 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
  95 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
  96         mde_cookie_t node);
  97 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
  98         uint32_t *mtu);
  99 static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
 100         boolean_t *pls);
 101 static void vgen_detach_ports(vgen_t *vgenp);
 102 static void vgen_port_detach(vgen_port_t *portp);
 103 static void vgen_port_list_insert(vgen_port_t *portp);
 104 static void vgen_port_list_remove(vgen_port_t *portp);
 105 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
 106         int port_num);
 107 static int vgen_mdeg_reg(vgen_t *vgenp);
 108 static void vgen_mdeg_unreg(vgen_t *vgenp);
 109 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
 110 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
 111 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
 112 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
 113         mde_cookie_t mdex);
 114 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
 115 static int vgen_port_attach(vgen_port_t *portp);
 116 static void vgen_port_detach_mdeg(vgen_port_t *portp);
 117 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
 118         mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
 119 static uint64_t vgen_port_stat(vgen_port_t *portp, uint_t stat);
 120 static void vgen_port_reset(vgen_port_t *portp);
 121 static void vgen_reset_vsw_port(vgen_t *vgenp);
 122 static int vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
 123 static void vgen_ldc_up(vgen_ldc_t *ldcp);
 124 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
 125 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
 126 static void vgen_port_init(vgen_port_t *portp);
 127 static void vgen_port_uninit(vgen_port_t *portp);
 128 static int vgen_ldc_init(vgen_ldc_t *ldcp);
 129 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
 130 static uint64_t vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
 131 
 132 /* I/O Processing */
 133 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
 134 static int vgen_ldcsend(void *arg, mblk_t *mp);
 135 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
 136 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
 137 static void vgen_tx_watchdog(void *arg);
 138 
 139 /*  Dring Configuration */
 140 static int vgen_create_dring(vgen_ldc_t *ldcp);
 141 static void vgen_destroy_dring(vgen_ldc_t *ldcp);
 142 static int vgen_map_dring(vgen_ldc_t *ldcp, void *pkt);
 143 static void vgen_unmap_dring(vgen_ldc_t *ldcp);
 144 static int vgen_mapin_avail(vgen_ldc_t *ldcp);
 145 
 146 /* VIO Message Processing */
 147 static int vgen_handshake(vgen_ldc_t *ldcp);
 148 static int vgen_handshake_done(vgen_ldc_t *ldcp);
 149 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
 150 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
 151 static int vgen_handshake_phase3(vgen_ldc_t *ldcp);
 152 static void vgen_setup_handshake_params(vgen_ldc_t *ldcp);
 153 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
 154 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
 155 static int vgen_send_rx_dring_reg(vgen_ldc_t *ldcp);
 156 static int vgen_send_tx_dring_reg(vgen_ldc_t *ldcp);
 157 static void vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg,
 158         uint8_t option);
 159 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
 160 static int vgen_send_dringdata(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
 161 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
 162 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
 163         vio_msg_tag_t *tagp);
 164 static int vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
 165 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg);
 166 static int vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg);
 167 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
 168 static int vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
 169 static int vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
 170 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
 171 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
 172 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
 173 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
 174 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
 175         uint32_t msglen);
 176 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
 177 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
 178 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
 179 static int vgen_process_reset(vgen_ldc_t *ldcp, int flags);
 180 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
 181 static void vgen_hwatchdog(void *arg);
 182 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
 183 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
 184 static void vgen_link_update(vgen_t *vgenp, link_state_t link_state);
 185 
 186 /* VLANs */
 187 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
 188         mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
 189         uint16_t *nvidsp, uint16_t *default_idp);
 190 static void vgen_vlan_create_hash(vgen_port_t *portp);
 191 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
 192 static void vgen_vlan_add_ids(vgen_port_t *portp);
 193 static void vgen_vlan_remove_ids(vgen_port_t *portp);
 194 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
 195 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
 196         uint16_t *vidp);
 197 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
 198         boolean_t is_tagged, uint16_t vid);
 199 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
 200 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
 201 
 202 /* Exported functions */
 203 int vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller);
 204 int vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
 205 void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
 206 void vgen_destroy_rxpools(void *arg);
 207 
 208 /* Externs */
 209 extern void vnet_dds_rx(void *arg, void *dmsg);
 210 extern void vnet_dds_cleanup_hio(vnet_t *vnetp);
 211 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
 212 extern void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
 213 extern int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
 214     boolean_t caller_holds_lock);
 215 extern void vgen_stop_msg_thread(vgen_ldc_t *ldcp);
 216 extern int vgen_create_tx_dring(vgen_ldc_t *ldcp);
 217 extern void vgen_destroy_tx_dring(vgen_ldc_t *ldcp);
 218 extern int vgen_map_rx_dring(vgen_ldc_t *ldcp, void *pkt);
 219 extern void vgen_unmap_rx_dring(vgen_ldc_t *ldcp);
 220 extern int vgen_create_rx_dring(vgen_ldc_t *ldcp);
 221 extern void vgen_destroy_rx_dring(vgen_ldc_t *ldcp);
 222 extern int vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt);
 223 extern void vgen_unmap_tx_dring(vgen_ldc_t *ldcp);
 224 extern int vgen_map_data(vgen_ldc_t *ldcp, void *pkt);
 225 extern int vgen_handle_dringdata_shm(void *arg1, void *arg2);
 226 extern int vgen_handle_dringdata(void *arg1, void *arg2);
 227 extern int vgen_dringsend_shm(void *arg, mblk_t *mp);
 228 extern int vgen_dringsend(void *arg, mblk_t *mp);
 229 extern void vgen_ldc_msg_worker(void *arg);
 230 extern int vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
 231     uint32_t start, int32_t end, uint8_t pstate);
 232 extern mblk_t *vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup);
 233 extern mblk_t *vgen_poll_rcv(vgen_ldc_t *ldcp, int bytes_to_pickup);
 234 extern int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
 235 
 236 #define VGEN_PRI_ETH_DEFINED(vgenp)     ((vgenp)->pri_num_types != 0)
 237 
 238 #define LDC_LOCK(ldcp)  \
 239                                 mutex_enter(&((ldcp)->cblock));\
 240                                 mutex_enter(&((ldcp)->rxlock));\
 241                                 mutex_enter(&((ldcp)->wrlock));\
 242                                 mutex_enter(&((ldcp)->txlock));\
 243                                 mutex_enter(&((ldcp)->tclock));
 244 #define LDC_UNLOCK(ldcp)        \
 245                                 mutex_exit(&((ldcp)->tclock));\
 246                                 mutex_exit(&((ldcp)->txlock));\
 247                                 mutex_exit(&((ldcp)->wrlock));\
 248                                 mutex_exit(&((ldcp)->rxlock));\
 249                                 mutex_exit(&((ldcp)->cblock));
 250 
 251 #define VGEN_VER_EQ(ldcp, major, minor) \
 252         ((ldcp)->local_hparams.ver_major == (major) &&       \
 253             (ldcp)->local_hparams.ver_minor == (minor))
 254 
 255 #define VGEN_VER_LT(ldcp, major, minor) \
 256         (((ldcp)->local_hparams.ver_major < (major)) ||   \
 257             ((ldcp)->local_hparams.ver_major == (major) &&   \
 258             (ldcp)->local_hparams.ver_minor < (minor)))
 259 
 260 #define VGEN_VER_GTEQ(ldcp, major, minor)       \
 261         (((ldcp)->local_hparams.ver_major > (major)) ||   \
 262             ((ldcp)->local_hparams.ver_major == (major) &&   \
 263             (ldcp)->local_hparams.ver_minor >= (minor)))
 264 
 265 /*
 266  * Property names
 267  */
 268 static char macaddr_propname[] = "mac-address";
 269 static char rmacaddr_propname[] = "remote-mac-address";
 270 static char channel_propname[] = "channel-endpoint";
 271 static char reg_propname[] = "reg";
 272 static char port_propname[] = "port";
 273 static char swport_propname[] = "switch-port";
 274 static char id_propname[] = "id";
 275 static char vdev_propname[] = "virtual-device";
 276 static char vnet_propname[] = "network";
 277 static char pri_types_propname[] = "priority-ether-types";
 278 static char vgen_pvid_propname[] = "port-vlan-id";
 279 static char vgen_vid_propname[] = "vlan-id";
 280 static char vgen_dvid_propname[] = "default-vlan-id";
 281 static char port_pvid_propname[] = "remote-port-vlan-id";
 282 static char port_vid_propname[] = "remote-vlan-id";
 283 static char vgen_mtu_propname[] = "mtu";
 284 static char vgen_linkprop_propname[] = "linkprop";
 285 
 286 /*
 287  * VIO Protocol Version Info:
 288  *
 289  * The version specified below represents the version of protocol currently
 290  * supported in the driver. It means the driver can negotiate with peers with
 291  * versions <= this version. Here is a summary of the feature(s) that are
 292  * supported at each version of the protocol:
 293  *
 294  * 1.0                  Basic VIO protocol.
 295  * 1.1                  vDisk protocol update (no virtual network update).
 296  * 1.2                  Support for priority frames (priority-ether-types).
 297  * 1.3                  VLAN and HybridIO support.
 298  * 1.4                  Jumbo Frame support.
 299  * 1.5                  Link State Notification support with optional support
 300  *                      for Physical Link information.
 301  * 1.6                  Support for RxDringData mode.
 302  */
 303 static vgen_ver_t vgen_versions[VGEN_NUM_VER] =  { {1, 6} };
 304 
 305 /* Tunables */
 306 uint32_t vgen_hwd_interval = 5;         /* handshake watchdog freq in sec */
 307 uint32_t vgen_ldcwr_retries = 10;       /* max # of ldc_write() retries */
 308 uint32_t vgen_ldcup_retries = 5;        /* max # of ldc_up() retries */
 309 uint32_t vgen_ldccl_retries = 5;        /* max # of ldc_close() retries */
 310 uint32_t vgen_tx_delay = 0x30;          /* delay when tx descr not available */
 311 uint32_t vgen_ldc_mtu = VGEN_LDC_MTU;           /* ldc mtu */
 312 uint32_t vgen_txwd_interval = VGEN_TXWD_INTERVAL; /* watchdog freq in msec */
 313 uint32_t vgen_txwd_timeout = VGEN_TXWD_TIMEOUT;   /* tx timeout in msec */
 314 
 315 /*
 316  * Max # of channel resets allowed during handshake.
 317  */
 318 uint32_t vgen_ldc_max_resets = 5;
 319 
 320 /*
 321  * See comments in vsw.c for details on the dring modes supported.
 322  * In RxDringData mode, # of buffers is determined by multiplying the # of
 323  * descriptors with the factor below. Note that the factor must be > 1; i.e,
 324  * the # of buffers must always be > # of descriptors. This is needed because,
 325  * while the shared memory buffers are sent up the stack on the receiver, the
 326  * sender needs additional buffers that can be used for further transmits.
 327  * See vgen_create_rx_dring() for details.
 328  */
 329 uint32_t vgen_nrbufs_factor = 2;
 330 
 331 /*
 332  * Retry delay used while destroying rx mblk pools. Used in both Dring modes.
 333  */
 334 int vgen_rxpool_cleanup_delay = 100000; /* 100ms */
 335 
 336 /*
 337  * Delay when rx descr not ready; used in TxDring mode only.
 338  */
 339 uint32_t vgen_recv_delay = 1;
 340 
 341 /*
 342  * Retry when rx descr not ready; used in TxDring mode only.
 343  */
 344 uint32_t vgen_recv_retries = 10;
 345 
 346 /*
 347  * Max # of packets accumulated prior to sending them up. It is best
 348  * to keep this at 60% of the number of receive buffers. Used in TxDring mode
 349  * by the msg worker thread. Used in RxDringData mode while in interrupt mode
 350  * (not used in polled mode).
 351  */
 352 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
 353 
 354 /*
 355  * Internal tunables for receive buffer pools, that is,  the size and number of
 356  * mblks for each pool. At least 3 sizes must be specified if these are used.
 357  * The sizes must be specified in increasing order. Non-zero value of the first
 358  * size will be used as a hint to use these values instead of the algorithm
 359  * that determines the sizes based on MTU. Used in TxDring mode only.
 360  */
 361 uint32_t vgen_rbufsz1 = 0;
 362 uint32_t vgen_rbufsz2 = 0;
 363 uint32_t vgen_rbufsz3 = 0;
 364 uint32_t vgen_rbufsz4 = 0;
 365 
 366 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
 367 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
 368 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
 369 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
 370 
 371 /*
 372  * In the absence of "priority-ether-types" property in MD, the following
 373  * internal tunable can be set to specify a single priority ethertype.
 374  */
 375 uint64_t vgen_pri_eth_type = 0;
 376 
 377 /*
 378  * Number of transmit priority buffers that are preallocated per device.
 379  * This number is chosen to be a small value to throttle transmission
 380  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
 381  */
 382 uint32_t vgen_pri_tx_nmblks = 64;
 383 
 384 uint32_t        vgen_vlan_nchains = 4;  /* # of chains in vlan id hash table */
 385 
 386 /*
 387  * Matching criteria passed to the MDEG to register interest
 388  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
 389  * by their 'name' and 'cfg-handle' properties.
 390  */
 391 static md_prop_match_t vdev_prop_match[] = {
 392         { MDET_PROP_STR,    "name"   },
 393         { MDET_PROP_VAL,    "cfg-handle" },
 394         { MDET_LIST_END,    NULL    }
 395 };
 396 
 397 static mdeg_node_match_t vdev_match = { "virtual-device",
 398                                                 vdev_prop_match };
 399 
 400 /* MD update matching structure */
 401 static md_prop_match_t  vport_prop_match[] = {
 402         { MDET_PROP_VAL,        "id" },
 403         { MDET_LIST_END,        NULL }
 404 };
 405 
 406 static mdeg_node_match_t vport_match = { "virtual-device-port",
 407                                         vport_prop_match };
 408 
 409 /* Template for matching a particular vnet instance */
 410 static mdeg_prop_spec_t vgen_prop_template[] = {
 411         { MDET_PROP_STR,        "name",         "network" },
 412         { MDET_PROP_VAL,        "cfg-handle",   NULL },
 413         { MDET_LIST_END,        NULL,           NULL }
 414 };
 415 
 416 #define VGEN_SET_MDEG_PROP_INST(specp, val)     (specp)[1].ps_val = (val)
 417 
 418 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
 419 
 420 #ifdef  VNET_IOC_DEBUG
 421 #define VGEN_M_CALLBACK_FLAGS   (MC_IOCTL)
 422 #else
 423 #define VGEN_M_CALLBACK_FLAGS   (0)
 424 #endif
 425 
 426 static mac_callbacks_t vgen_m_callbacks = {
 427         VGEN_M_CALLBACK_FLAGS,
 428         vgen_stat,
 429         vgen_start,
 430         vgen_stop,
 431         vgen_promisc,
 432         vgen_multicst,
 433         vgen_unicst,
 434         vgen_tx,
 435         NULL,
 436         vgen_ioctl,
 437         NULL,
 438         NULL
 439 };
 440 
 441 /* Externs */
 442 extern pri_t    maxclsyspri;
 443 extern proc_t   p0;
 444 extern uint32_t vnet_ethermtu;
 445 extern uint16_t vnet_default_vlan_id;
 446 extern uint32_t vnet_num_descriptors;
 447 
 448 #ifdef DEBUG
 449 
 450 #define DEBUG_PRINTF    vgen_debug_printf
 451 
 452 extern int vnet_dbglevel;
 453 
 454 void vgen_debug_printf(const char *fname, vgen_t *vgenp,
 455         vgen_ldc_t *ldcp, const char *fmt, ...);
 456 
 457 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
 458 int vgendbg_ldcid = -1;
 459 
 460 /* Flags to simulate error conditions for debugging */
 461 int vgen_inject_err_flag = 0;
 462 
 463 
 464 boolean_t
 465 vgen_inject_error(vgen_ldc_t *ldcp, int error)
 466 {
 467         if ((vgendbg_ldcid == ldcp->ldc_id) &&
 468             (vgen_inject_err_flag & error)) {
 469                 return (B_TRUE);
 470         }
 471         return (B_FALSE);
 472 }
 473 
 474 #endif
 475 
 476 /*
 477  * vgen_init() is called by an instance of vnet driver to initialize the
 478  * corresponding generic transport layer. This layer uses Logical Domain
 479  * Channels (LDCs) to communicate with the virtual switch in the service domain
 480  * and also with peer vnets in other guest domains in the system.
 481  *
 482  * Arguments:
 483  *   vnetp:   an opaque pointer to the vnet instance
 484  *   regprop: frame to be transmitted
 485  *   vnetdip: dip of the vnet device
 486  *   macaddr: mac address of the vnet device
 487  *
 488  * Returns:
 489  *      Sucess:  a handle to the vgen instance (vgen_t)
 490  *      Failure: NULL
 491  */
 492 int
 493 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
 494     const uint8_t *macaddr, void **vgenhdl)
 495 {
 496         vgen_t  *vgenp;
 497         int     instance;
 498         int     rv;
 499         char    qname[TASKQ_NAMELEN];
 500 
 501         if ((vnetp == NULL) || (vnetdip == NULL))
 502                 return (DDI_FAILURE);
 503 
 504         instance = ddi_get_instance(vnetdip);
 505 
 506         DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
 507 
 508         vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
 509 
 510         vgenp->vnetp = vnetp;
 511         vgenp->instance = instance;
 512         vgenp->regprop = regprop;
 513         vgenp->vnetdip = vnetdip;
 514         bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
 515         vgenp->phys_link_state = LINK_STATE_UNKNOWN;
 516 
 517         /* allocate multicast table */
 518         vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
 519             sizeof (struct ether_addr), KM_SLEEP);
 520         vgenp->mccount = 0;
 521         vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
 522 
 523         mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
 524         rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
 525 
 526         (void) snprintf(qname, TASKQ_NAMELEN, "rxpool_taskq%d",
 527             instance);
 528         if ((vgenp->rxp_taskq = ddi_taskq_create(vnetdip, qname, 1,
 529             TASKQ_DEFAULTPRI, 0)) == NULL) {
 530                 cmn_err(CE_WARN, "!vnet%d: Unable to create rx pool task queue",
 531                     instance);
 532                 goto vgen_init_fail;
 533         }
 534 
 535         rv = vgen_read_mdprops(vgenp);
 536         if (rv != 0) {
 537                 goto vgen_init_fail;
 538         }
 539         *vgenhdl = (void *)vgenp;
 540 
 541         DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
 542         return (DDI_SUCCESS);
 543 
 544 vgen_init_fail:
 545         rw_destroy(&vgenp->vgenports.rwlock);
 546         mutex_destroy(&vgenp->lock);
 547         kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
 548             sizeof (struct ether_addr));
 549         if (VGEN_PRI_ETH_DEFINED(vgenp)) {
 550                 kmem_free(vgenp->pri_types,
 551                     sizeof (uint16_t) * vgenp->pri_num_types);
 552                 (void) vio_destroy_mblks(vgenp->pri_tx_vmp);
 553         }
 554         if (vgenp->rxp_taskq != NULL) {
 555                 ddi_taskq_destroy(vgenp->rxp_taskq);
 556                 vgenp->rxp_taskq = NULL;
 557         }
 558         KMEM_FREE(vgenp);
 559         return (DDI_FAILURE);
 560 }
 561 
 562 int
 563 vgen_init_mdeg(void *arg)
 564 {
 565         vgen_t  *vgenp = (vgen_t *)arg;
 566 
 567         /* register with MD event generator */
 568         return (vgen_mdeg_reg(vgenp));
 569 }
 570 
 571 /*
 572  * Called by vnet to undo the initializations done by vgen_init().
 573  * The handle provided by generic transport during vgen_init() is the argument.
 574  */
 575 void
 576 vgen_uninit(void *arg)
 577 {
 578         vgen_t  *vgenp = (vgen_t *)arg;
 579 
 580         if (vgenp == NULL) {
 581                 return;
 582         }
 583 
 584         DBG1(vgenp, NULL, "enter\n");
 585 
 586         /* Unregister with MD event generator */
 587         vgen_mdeg_unreg(vgenp);
 588 
 589         mutex_enter(&vgenp->lock);
 590 
 591         /*
 592          * Detach all ports from the device; note that the device should have
 593          * been unplumbed by this time (See vnet_unattach() for the sequence)
 594          * and thus vgen_stop() has already been invoked on all the ports.
 595          */
 596         vgen_detach_ports(vgenp);
 597 
 598         /*
 599          * We now destroy the taskq used to clean up rx mblk pools that
 600          * couldn't be destroyed when the ports/channels were detached.
 601          * We implicitly wait for those tasks to complete in
 602          * ddi_taskq_destroy().
 603          */
 604         if (vgenp->rxp_taskq != NULL) {
 605                 ddi_taskq_destroy(vgenp->rxp_taskq);
 606                 vgenp->rxp_taskq = NULL;
 607         }
 608 
 609         /* Free multicast table */
 610         kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
 611 
 612         /* Free pri_types table */
 613         if (VGEN_PRI_ETH_DEFINED(vgenp)) {
 614                 kmem_free(vgenp->pri_types,
 615                     sizeof (uint16_t) * vgenp->pri_num_types);
 616                 (void) vio_destroy_mblks(vgenp->pri_tx_vmp);
 617         }
 618 
 619         mutex_exit(&vgenp->lock);
 620         rw_destroy(&vgenp->vgenports.rwlock);
 621         mutex_destroy(&vgenp->lock);
 622 
 623         DBG1(vgenp, NULL, "exit\n");
 624         KMEM_FREE(vgenp);
 625 }
 626 
 627 /* enable transmit/receive for the device */
 628 int
 629 vgen_start(void *arg)
 630 {
 631         vgen_port_t     *portp = (vgen_port_t *)arg;
 632         vgen_t          *vgenp = portp->vgenp;
 633 
 634         DBG1(vgenp, NULL, "enter\n");
 635         mutex_enter(&portp->lock);
 636         vgen_port_init(portp);
 637         portp->flags |= VGEN_STARTED;
 638         mutex_exit(&portp->lock);
 639         DBG1(vgenp, NULL, "exit\n");
 640 
 641         return (DDI_SUCCESS);
 642 }
 643 
 644 /* stop transmit/receive */
 645 void
 646 vgen_stop(void *arg)
 647 {
 648         vgen_port_t     *portp = (vgen_port_t *)arg;
 649         vgen_t          *vgenp = portp->vgenp;
 650 
 651         DBG1(vgenp, NULL, "enter\n");
 652 
 653         mutex_enter(&portp->lock);
 654         if (portp->flags & VGEN_STARTED) {
 655                 vgen_port_uninit(portp);
 656                 portp->flags &= ~(VGEN_STARTED);
 657         }
 658         mutex_exit(&portp->lock);
 659         DBG1(vgenp, NULL, "exit\n");
 660 
 661 }
 662 
 663 /* vgen transmit function */
 664 static mblk_t *
 665 vgen_tx(void *arg, mblk_t *mp)
 666 {
 667         vgen_port_t     *portp;
 668         int             status;
 669 
 670         portp = (vgen_port_t *)arg;
 671         status = vgen_portsend(portp, mp);
 672         if (status != VGEN_SUCCESS) {
 673                 /* failure */
 674                 return (mp);
 675         }
 676         /* success */
 677         return (NULL);
 678 }
 679 
 680 /*
 681  * This function provides any necessary tagging/untagging of the frames
 682  * that are being transmitted over the port. It first verifies the vlan
 683  * membership of the destination(port) and drops the packet if the
 684  * destination doesn't belong to the given vlan.
 685  *
 686  * Arguments:
 687  *   portp:     port over which the frames should be transmitted
 688  *   mp:        frame to be transmitted
 689  *   is_tagged:
 690  *              B_TRUE: indicates frame header contains the vlan tag already.
 691  *              B_FALSE: indicates frame is untagged.
 692  *   vid:       vlan in which the frame should be transmitted.
 693  *
 694  * Returns:
 695  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
 696  *              Failure: NULL
 697  */
 698 static mblk_t *
 699 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
 700         uint16_t vid)
 701 {
 702         vgen_t          *vgenp;
 703         boolean_t       dst_tagged;
 704         int             rv;
 705 
 706         vgenp = portp->vgenp;
 707 
 708         /*
 709          * If the packet is going to a vnet:
 710          *   Check if the destination vnet is in the same vlan.
 711          *   Check the frame header if tag or untag is needed.
 712          *
 713          * We do not check the above conditions if the packet is going to vsw:
 714          *   vsw must be present implicitly in all the vlans that a vnet device
 715          *   is configured into; even if vsw itself is not assigned to those
 716          *   vlans as an interface. For instance, the packet might be destined
 717          *   to another vnet(indirectly through vsw) or to an external host
 718          *   which is in the same vlan as this vnet and vsw itself may not be
 719          *   present in that vlan. Similarly packets going to vsw must be
 720          *   always tagged(unless in the default-vlan) if not already tagged,
 721          *   as we do not know the final destination. This is needed because
 722          *   vsw must always invoke its switching function only after tagging
 723          *   the packet; otherwise after switching function determines the
 724          *   destination we cannot figure out if the destination belongs to the
 725          *   the same vlan that the frame originated from and if it needs tag/
 726          *   untag. Note that vsw will tag the packet itself when it receives
 727          *   it over the channel from a client if needed. However, that is
 728          *   needed only in the case of vlan unaware clients such as obp or
 729          *   earlier versions of vnet.
 730          *
 731          */
 732         if (portp != vgenp->vsw_portp) {
 733                 /*
 734                  * Packet going to a vnet. Check if the destination vnet is in
 735                  * the same vlan. Then check the frame header if tag/untag is
 736                  * needed.
 737                  */
 738                 rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
 739                 if (rv == B_FALSE) {
 740                         /* drop the packet */
 741                         freemsg(mp);
 742                         return (NULL);
 743                 }
 744 
 745                 /* is the destination tagged or untagged in this vlan? */
 746                 (vid == portp->pvid) ? (dst_tagged = B_FALSE) :
 747                     (dst_tagged = B_TRUE);
 748 
 749                 if (is_tagged == dst_tagged) {
 750                         /* no tagging/untagging needed */
 751                         return (mp);
 752                 }
 753 
 754                 if (is_tagged == B_TRUE) {
 755                         /* frame is tagged; destination needs untagged */
 756                         mp = vnet_vlan_remove_tag(mp);
 757                         return (mp);
 758                 }
 759 
 760                 /* (is_tagged == B_FALSE): fallthru to tag tx packet: */
 761         }
 762 
 763         /*
 764          * Packet going to a vnet needs tagging.
 765          * OR
 766          * If the packet is going to vsw, then it must be tagged in all cases:
 767          * unknown unicast, broadcast/multicast or to vsw interface.
 768          */
 769 
 770         if (is_tagged == B_FALSE) {
 771                 mp = vnet_vlan_insert_tag(mp, vid);
 772         }
 773 
 774         return (mp);
 775 }
 776 
 777 /* transmit packets over the given port */
 778 static int
 779 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
 780 {
 781         vgen_ldc_t              *ldcp;
 782         int                     status;
 783         int                     rv = VGEN_SUCCESS;
 784         vgen_t                  *vgenp = portp->vgenp;
 785         vnet_t                  *vnetp = vgenp->vnetp;
 786         boolean_t               is_tagged;
 787         boolean_t               dec_refcnt = B_FALSE;
 788         uint16_t                vlan_id;
 789         struct ether_header     *ehp;
 790 
 791         if (portp == NULL) {
 792                 return (VGEN_FAILURE);
 793         }
 794 
 795         if (portp->use_vsw_port) {
 796                 (void) atomic_inc_32(&vgenp->vsw_port_refcnt);
 797                 portp = portp->vgenp->vsw_portp;
 798                 ASSERT(portp != NULL);
 799                 dec_refcnt = B_TRUE;
 800         }
 801 
 802         /*
 803          * Determine the vlan id that the frame belongs to.
 804          */
 805         ehp = (struct ether_header *)mp->b_rptr;
 806         is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
 807 
 808         if (vlan_id == vnetp->default_vlan_id) {
 809 
 810                 /* Frames in default vlan must be untagged */
 811                 ASSERT(is_tagged == B_FALSE);
 812 
 813                 /*
 814                  * If the destination is a vnet-port verify it belongs to the
 815                  * default vlan; otherwise drop the packet. We do not need
 816                  * this check for vsw-port, as it should implicitly belong to
 817                  * this vlan; see comments in vgen_vlan_frame_fixtag().
 818                  */
 819                 if (portp != vgenp->vsw_portp &&
 820                     portp->pvid != vnetp->default_vlan_id) {
 821                         freemsg(mp);
 822                         goto portsend_ret;
 823                 }
 824 
 825         } else {        /* frame not in default-vlan */
 826 
 827                 mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
 828                 if (mp == NULL) {
 829                         goto portsend_ret;
 830                 }
 831 
 832         }
 833 
 834         ldcp = portp->ldcp;
 835         status = ldcp->tx(ldcp, mp);
 836 
 837         if (status != VGEN_TX_SUCCESS) {
 838                 rv = VGEN_FAILURE;
 839         }
 840 
 841 portsend_ret:
 842         if (dec_refcnt == B_TRUE) {
 843                 (void) atomic_dec_32(&vgenp->vsw_port_refcnt);
 844         }
 845         return (rv);
 846 }
 847 
 848 /*
 849  * Wrapper function to transmit normal and/or priority frames over the channel.
 850  */
 851 static int
 852 vgen_ldcsend(void *arg, mblk_t *mp)
 853 {
 854         vgen_ldc_t              *ldcp = (vgen_ldc_t *)arg;
 855         int                     status;
 856         struct ether_header     *ehp;
 857         vgen_t                  *vgenp = LDC_TO_VGEN(ldcp);
 858         uint32_t                num_types;
 859         uint16_t                *types;
 860         int                     i;
 861 
 862         ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
 863 
 864         num_types = vgenp->pri_num_types;
 865         types = vgenp->pri_types;
 866         ehp = (struct ether_header *)mp->b_rptr;
 867 
 868         for (i = 0; i < num_types; i++) {
 869 
 870                 if (ehp->ether_type == types[i]) {
 871                         /* priority frame, use pri tx function */
 872                         vgen_ldcsend_pkt(ldcp, mp);
 873                         return (VGEN_SUCCESS);
 874                 }
 875 
 876         }
 877 
 878         if (ldcp->tx_dringdata == NULL) {
 879                 freemsg(mp);
 880                 return (VGEN_SUCCESS);
 881         }
 882 
 883         status  = ldcp->tx_dringdata(ldcp, mp);
 884         return (status);
 885 }
 886 
 887 /*
 888  * This function transmits the frame in the payload of a raw data
 889  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
 890  * send special frames with high priorities, without going through
 891  * the normal data path which uses descriptor ring mechanism.
 892  */
 893 static void
 894 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
 895 {
 896         vgen_ldc_t              *ldcp = (vgen_ldc_t *)arg;
 897         vio_raw_data_msg_t      *pkt;
 898         mblk_t                  *bp;
 899         mblk_t                  *nmp = NULL;
 900         vio_mblk_t              *vmp;
 901         caddr_t                 dst;
 902         uint32_t                mblksz;
 903         uint32_t                size;
 904         uint32_t                nbytes;
 905         int                     rv;
 906         vgen_t                  *vgenp = LDC_TO_VGEN(ldcp);
 907         vgen_stats_t            *statsp = &ldcp->stats;
 908 
 909         /* drop the packet if ldc is not up or handshake is not done */
 910         if (ldcp->ldc_status != LDC_UP) {
 911                 (void) atomic_inc_32(&statsp->tx_pri_fail);
 912                 DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
 913                     ldcp->ldc_status);
 914                 goto send_pkt_exit;
 915         }
 916 
 917         if (ldcp->hphase != VH_DONE) {
 918                 (void) atomic_inc_32(&statsp->tx_pri_fail);
 919                 DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
 920                     ldcp->hphase);
 921                 goto send_pkt_exit;
 922         }
 923 
 924         size = msgsize(mp);
 925 
 926         /* frame size bigger than available payload len of raw data msg ? */
 927         if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
 928                 (void) atomic_inc_32(&statsp->tx_pri_fail);
 929                 DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
 930                 goto send_pkt_exit;
 931         }
 932 
 933         if (size < ETHERMIN)
 934                 size = ETHERMIN;
 935 
 936         /* alloc space for a raw data message */
 937         vmp = vio_allocb(vgenp->pri_tx_vmp);
 938         if (vmp == NULL) {
 939                 (void) atomic_inc_32(&statsp->tx_pri_fail);
 940                 DWARN(vgenp, ldcp, "vio_allocb failed\n");
 941                 goto send_pkt_exit;
 942         } else {
 943                 nmp = vmp->mp;
 944         }
 945         pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
 946 
 947         /* copy frame into the payload of raw data message */
 948         dst = (caddr_t)pkt->data;
 949         for (bp = mp; bp != NULL; bp = bp->b_cont) {
 950                 mblksz = MBLKL(bp);
 951                 bcopy(bp->b_rptr, dst, mblksz);
 952                 dst += mblksz;
 953         }
 954 
 955         vmp->state = VIO_MBLK_HAS_DATA;
 956 
 957         /* setup the raw data msg */
 958         pkt->tag.vio_msgtype = VIO_TYPE_DATA;
 959         pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
 960         pkt->tag.vio_subtype_env = VIO_PKT_DATA;
 961         pkt->tag.vio_sid = ldcp->local_sid;
 962         nbytes = VIO_PKT_DATA_HDRSIZE + size;
 963 
 964         /* send the msg over ldc */
 965         rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
 966         if (rv != VGEN_SUCCESS) {
 967                 (void) atomic_inc_32(&statsp->tx_pri_fail);
 968                 DWARN(vgenp, ldcp, "Error sending priority frame\n");
 969                 if (rv == ECONNRESET) {
 970                         (void) vgen_handle_evt_reset(ldcp, VGEN_OTHER);
 971                 }
 972                 goto send_pkt_exit;
 973         }
 974 
 975         /* update stats */
 976         (void) atomic_inc_64(&statsp->tx_pri_packets);
 977         (void) atomic_add_64(&statsp->tx_pri_bytes, size);
 978 
 979 send_pkt_exit:
 980         if (nmp != NULL)
 981                 freemsg(nmp);
 982         freemsg(mp);
 983 }
 984 
 985 /*
 986  * enable/disable a multicast address
 987  * note that the cblock of the ldc channel connected to the vsw is used for
 988  * synchronization of the mctab.
 989  */
 990 int
 991 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
 992 {
 993         vgen_t                  *vgenp;
 994         vnet_mcast_msg_t        mcastmsg;
 995         vio_msg_tag_t           *tagp;
 996         vgen_port_t             *portp;
 997         vgen_ldc_t              *ldcp;
 998         struct ether_addr       *addrp;
 999         int                     rv = DDI_FAILURE;
1000         uint32_t                i;
1001 
1002         portp = (vgen_port_t *)arg;
1003         vgenp = portp->vgenp;
1004 
1005         if (portp->is_vsw_port != B_TRUE) {
1006                 return (DDI_SUCCESS);
1007         }
1008 
1009         addrp = (struct ether_addr *)mca;
1010         tagp = &mcastmsg.tag;
1011         bzero(&mcastmsg, sizeof (mcastmsg));
1012 
1013         ldcp = portp->ldcp;
1014         if (ldcp == NULL) {
1015                 return (DDI_FAILURE);
1016         }
1017 
1018         mutex_enter(&ldcp->cblock);
1019 
1020         if (ldcp->hphase == VH_DONE) {
1021                 /*
1022                  * If handshake is done, send a msg to vsw to add/remove
1023                  * the multicast address. Otherwise, we just update this
1024                  * mcast address in our table and the table will be sync'd
1025                  * with vsw when handshake completes.
1026                  */
1027                 tagp->vio_msgtype = VIO_TYPE_CTRL;
1028                 tagp->vio_subtype = VIO_SUBTYPE_INFO;
1029                 tagp->vio_subtype_env = VNET_MCAST_INFO;
1030                 tagp->vio_sid = ldcp->local_sid;
1031                 bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1032                 mcastmsg.set = add;
1033                 mcastmsg.count = 1;
1034                 if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1035                     B_FALSE) != VGEN_SUCCESS) {
1036                         DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1037                         rv = DDI_FAILURE;
1038                         goto vgen_mcast_exit;
1039                 }
1040         }
1041 
1042         if (add) {
1043 
1044                 /* expand multicast table if necessary */
1045                 if (vgenp->mccount >= vgenp->mcsize) {
1046                         struct ether_addr       *newtab;
1047                         uint32_t                newsize;
1048 
1049 
1050                         newsize = vgenp->mcsize * 2;
1051 
1052                         newtab = kmem_zalloc(newsize *
1053                             sizeof (struct ether_addr), KM_NOSLEEP);
1054                         if (newtab == NULL)
1055                                 goto vgen_mcast_exit;
1056                         bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1057                             sizeof (struct ether_addr));
1058                         kmem_free(vgenp->mctab,
1059                             vgenp->mcsize * sizeof (struct ether_addr));
1060 
1061                         vgenp->mctab = newtab;
1062                         vgenp->mcsize = newsize;
1063                 }
1064 
1065                 /* add address to the table */
1066                 vgenp->mctab[vgenp->mccount++] = *addrp;
1067 
1068         } else {
1069 
1070                 /* delete address from the table */
1071                 for (i = 0; i < vgenp->mccount; i++) {
1072                         if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1073 
1074                                 /*
1075                                  * If there's more than one address in this
1076                                  * table, delete the unwanted one by moving
1077                                  * the last one in the list over top of it;
1078                                  * otherwise, just remove it.
1079                                  */
1080                                 if (vgenp->mccount > 1) {
1081                                         vgenp->mctab[i] =
1082                                             vgenp->mctab[vgenp->mccount-1];
1083                                 }
1084                                 vgenp->mccount--;
1085                                 break;
1086                         }
1087                 }
1088         }
1089 
1090         rv = DDI_SUCCESS;
1091 
1092 vgen_mcast_exit:
1093 
1094         mutex_exit(&ldcp->cblock);
1095         return (rv);
1096 }
1097 
1098 /* set or clear promiscuous mode on the device */
1099 static int
1100 vgen_promisc(void *arg, boolean_t on)
1101 {
1102         _NOTE(ARGUNUSED(arg, on))
1103         return (DDI_SUCCESS);
1104 }
1105 
1106 /* set the unicast mac address of the device */
1107 static int
1108 vgen_unicst(void *arg, const uint8_t *mca)
1109 {
1110         _NOTE(ARGUNUSED(arg, mca))
1111         return (DDI_SUCCESS);
1112 }
1113 
1114 /* get device statistics */
1115 int
1116 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1117 {
1118         vgen_port_t     *portp = (vgen_port_t *)arg;
1119 
1120         *val = vgen_port_stat(portp, stat);
1121         return (0);
1122 }
1123 
1124 /* vgen internal functions */
1125 /* detach all ports from the device */
1126 static void
1127 vgen_detach_ports(vgen_t *vgenp)
1128 {
1129         vgen_port_t     *portp;
1130         vgen_portlist_t *plistp;
1131 
1132         plistp = &(vgenp->vgenports);
1133         WRITE_ENTER(&plistp->rwlock);
1134         while ((portp = plistp->headp) != NULL) {
1135                 vgen_port_detach(portp);
1136         }
1137         RW_EXIT(&plistp->rwlock);
1138 }
1139 
1140 /*
1141  * detach the given port.
1142  */
1143 static void
1144 vgen_port_detach(vgen_port_t *portp)
1145 {
1146         vgen_t          *vgenp;
1147         int             port_num;
1148 
1149         vgenp = portp->vgenp;
1150         port_num = portp->port_num;
1151 
1152         DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1153 
1154         /*
1155          * If this port is connected to the vswitch, then
1156          * potentially there could be ports that may be using
1157          * this port to transmit packets. To address this do
1158          * the following:
1159          *      - First set vgenp->vsw_portp to NULL, so that
1160          *        its not used after that.
1161          *      - Then wait for the refcnt to go down to 0.
1162          *      - Now we can safely detach this port.
1163          */
1164         if (vgenp->vsw_portp == portp) {
1165                 vgenp->vsw_portp = NULL;
1166                 while (vgenp->vsw_port_refcnt > 0) {
1167                         delay(drv_usectohz(vgen_tx_delay));
1168                 }
1169                 (void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1170         }
1171 
1172         if (portp->vhp != NULL) {
1173                 vio_net_resource_unreg(portp->vhp);
1174                 portp->vhp = NULL;
1175         }
1176 
1177         vgen_vlan_destroy_hash(portp);
1178 
1179         /* remove it from port list */
1180         vgen_port_list_remove(portp);
1181 
1182         /* detach channels from this port */
1183         vgen_ldc_detach(portp->ldcp);
1184 
1185         if (portp->num_ldcs != 0) {
1186                 kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1187                 portp->num_ldcs = 0;
1188         }
1189 
1190         mutex_destroy(&portp->lock);
1191         KMEM_FREE(portp);
1192 
1193         DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1194 }
1195 
1196 /* add a port to port list */
1197 static void
1198 vgen_port_list_insert(vgen_port_t *portp)
1199 {
1200         vgen_portlist_t *plistp;
1201         vgen_t          *vgenp;
1202 
1203         vgenp = portp->vgenp;
1204         plistp = &(vgenp->vgenports);
1205 
1206         if (plistp->headp == NULL) {
1207                 plistp->headp = portp;
1208         } else {
1209                 plistp->tailp->nextp = portp;
1210         }
1211         plistp->tailp = portp;
1212         portp->nextp = NULL;
1213 }
1214 
1215 /* remove a port from port list */
1216 static void
1217 vgen_port_list_remove(vgen_port_t *portp)
1218 {
1219         vgen_port_t     *prevp;
1220         vgen_port_t     *nextp;
1221         vgen_portlist_t *plistp;
1222         vgen_t          *vgenp;
1223 
1224         vgenp = portp->vgenp;
1225 
1226         plistp = &(vgenp->vgenports);
1227 
1228         if (plistp->headp == NULL)
1229                 return;
1230 
1231         if (portp == plistp->headp) {
1232                 plistp->headp = portp->nextp;
1233                 if (portp == plistp->tailp)
1234                         plistp->tailp = plistp->headp;
1235         } else {
1236                 for (prevp = plistp->headp;
1237                     ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1238                     prevp = nextp)
1239                         ;
1240                 if (nextp == portp) {
1241                         prevp->nextp = portp->nextp;
1242                 }
1243                 if (portp == plistp->tailp)
1244                         plistp->tailp = prevp;
1245         }
1246 }
1247 
1248 /* lookup a port in the list based on port_num */
1249 static vgen_port_t *
1250 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1251 {
1252         vgen_port_t *portp = NULL;
1253 
1254         for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1255                 if (portp->port_num == port_num) {
1256                         break;
1257                 }
1258         }
1259 
1260         return (portp);
1261 }
1262 
1263 static void
1264 vgen_port_init(vgen_port_t *portp)
1265 {
1266         /* Add the port to the specified vlans */
1267         vgen_vlan_add_ids(portp);
1268 
1269         /* Bring up the channel */
1270         (void) vgen_ldc_init(portp->ldcp);
1271 }
1272 
1273 static void
1274 vgen_port_uninit(vgen_port_t *portp)
1275 {
1276         vgen_ldc_uninit(portp->ldcp);
1277 
1278         /* remove the port from vlans it has been assigned to */
1279         vgen_vlan_remove_ids(portp);
1280 }
1281 
1282 /*
1283  * Scan the machine description for this instance of vnet
1284  * and read its properties. Called only from vgen_init().
1285  * Returns: 0 on success, 1 on failure.
1286  */
1287 static int
1288 vgen_read_mdprops(vgen_t *vgenp)
1289 {
1290         vnet_t          *vnetp = vgenp->vnetp;
1291         md_t            *mdp = NULL;
1292         mde_cookie_t    rootnode;
1293         mde_cookie_t    *listp = NULL;
1294         uint64_t        cfgh;
1295         char            *name;
1296         int             rv = 1;
1297         int             num_nodes = 0;
1298         int             num_devs = 0;
1299         int             listsz = 0;
1300         int             i;
1301 
1302         if ((mdp = md_get_handle()) == NULL) {
1303                 return (rv);
1304         }
1305 
1306         num_nodes = md_node_count(mdp);
1307         ASSERT(num_nodes > 0);
1308 
1309         listsz = num_nodes * sizeof (mde_cookie_t);
1310         listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1311 
1312         rootnode = md_root_node(mdp);
1313 
1314         /* search for all "virtual_device" nodes */
1315         num_devs = md_scan_dag(mdp, rootnode,
1316             md_find_name(mdp, vdev_propname),
1317             md_find_name(mdp, "fwd"), listp);
1318         if (num_devs <= 0) {
1319                 goto vgen_readmd_exit;
1320         }
1321 
1322         /*
1323          * Now loop through the list of virtual-devices looking for
1324          * devices with name "network" and for each such device compare
1325          * its instance with what we have from the 'reg' property to
1326          * find the right node in MD and then read all its properties.
1327          */
1328         for (i = 0; i < num_devs; i++) {
1329 
1330                 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1331                         goto vgen_readmd_exit;
1332                 }
1333 
1334                 /* is this a "network" device? */
1335                 if (strcmp(name, vnet_propname) != 0)
1336                         continue;
1337 
1338                 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1339                         goto vgen_readmd_exit;
1340                 }
1341 
1342                 /* is this the required instance of vnet? */
1343                 if (vgenp->regprop != cfgh)
1344                         continue;
1345 
1346                 /*
1347                  * Read the 'linkprop' property to know if this vnet
1348                  * device should get physical link updates from vswitch.
1349                  */
1350                 vgen_linkprop_read(vgenp, mdp, listp[i],
1351                     &vnetp->pls_update);
1352 
1353                 /*
1354                  * Read the mtu. Note that we set the mtu of vnet device within
1355                  * this routine itself, after validating the range.
1356                  */
1357                 vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1358                 if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1359                         vnetp->mtu = ETHERMTU;
1360                 }
1361                 vgenp->max_frame_size = vnetp->mtu +
1362                     sizeof (struct ether_header) + VLAN_TAGSZ;
1363 
1364                 /* read priority ether types */
1365                 vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1366 
1367                 /* read vlan id properties of this vnet instance */
1368                 vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1369                     &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1370                     &vnetp->default_vlan_id);
1371 
1372                 rv = 0;
1373                 break;
1374         }
1375 
1376 vgen_readmd_exit:
1377 
1378         kmem_free(listp, listsz);
1379         (void) md_fini_handle(mdp);
1380         return (rv);
1381 }
1382 
1383 /*
1384  * Read vlan id properties of the given MD node.
1385  * Arguments:
1386  *   arg:          device argument(vnet device or a port)
1387  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1388  *   mdp:          machine description
1389  *   node:         md node cookie
1390  *
1391  * Returns:
1392  *   pvidp:        port-vlan-id of the node
1393  *   vidspp:       list of vlan-ids of the node
1394  *   nvidsp:       # of vlan-ids in the list
1395  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1396  */
1397 static void
1398 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1399         uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1400         uint16_t *default_idp)
1401 {
1402         vgen_t          *vgenp;
1403         vnet_t          *vnetp;
1404         vgen_port_t     *portp;
1405         char            *pvid_propname;
1406         char            *vid_propname;
1407         uint_t          nvids;
1408         uint32_t        vids_size;
1409         int             rv;
1410         int             i;
1411         uint64_t        *data;
1412         uint64_t        val;
1413         int             size;
1414         int             inst;
1415 
1416         if (type == VGEN_LOCAL) {
1417 
1418                 vgenp = (vgen_t *)arg;
1419                 vnetp = vgenp->vnetp;
1420                 pvid_propname = vgen_pvid_propname;
1421                 vid_propname = vgen_vid_propname;
1422                 inst = vnetp->instance;
1423 
1424         } else if (type == VGEN_PEER) {
1425 
1426                 portp = (vgen_port_t *)arg;
1427                 vgenp = portp->vgenp;
1428                 vnetp = vgenp->vnetp;
1429                 pvid_propname = port_pvid_propname;
1430                 vid_propname = port_vid_propname;
1431                 inst = portp->port_num;
1432 
1433         } else {
1434                 return;
1435         }
1436 
1437         if (type == VGEN_LOCAL && default_idp != NULL) {
1438                 rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1439                 if (rv != 0) {
1440                         DWARN(vgenp, NULL, "prop(%s) not found",
1441                             vgen_dvid_propname);
1442 
1443                         *default_idp = vnet_default_vlan_id;
1444                 } else {
1445                         *default_idp = val & 0xFFF;
1446                         DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1447                             inst, *default_idp);
1448                 }
1449         }
1450 
1451         rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1452         if (rv != 0) {
1453                 DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1454                 *pvidp = vnet_default_vlan_id;
1455         } else {
1456 
1457                 *pvidp = val & 0xFFF;
1458                 DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1459                     pvid_propname, inst, *pvidp);
1460         }
1461 
1462         rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1463             &size);
1464         if (rv != 0) {
1465                 DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1466                 size = 0;
1467         } else {
1468                 size /= sizeof (uint64_t);
1469         }
1470         nvids = size;
1471 
1472         if (nvids != 0) {
1473                 DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1474                 vids_size = sizeof (uint16_t) * nvids;
1475                 *vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1476                 for (i = 0; i < nvids; i++) {
1477                         (*vidspp)[i] = data[i] & 0xFFFF;
1478                         DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1479                 }
1480                 DBG2(vgenp, NULL, "\n");
1481         }
1482 
1483         *nvidsp = nvids;
1484 }
1485 
1486 /*
1487  * Create a vlan id hash table for the given port.
1488  */
1489 static void
1490 vgen_vlan_create_hash(vgen_port_t *portp)
1491 {
1492         char            hashname[MAXNAMELEN];
1493 
1494         (void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1495             portp->port_num);
1496 
1497         portp->vlan_nchains = vgen_vlan_nchains;
1498         portp->vlan_hashp = mod_hash_create_idhash(hashname,
1499             portp->vlan_nchains, mod_hash_null_valdtor);
1500 }
1501 
1502 /*
1503  * Destroy the vlan id hash table in the given port.
1504  */
1505 static void
1506 vgen_vlan_destroy_hash(vgen_port_t *portp)
1507 {
1508         if (portp->vlan_hashp != NULL) {
1509                 mod_hash_destroy_hash(portp->vlan_hashp);
1510                 portp->vlan_hashp = NULL;
1511                 portp->vlan_nchains = 0;
1512         }
1513 }
1514 
1515 /*
1516  * Add a port to the vlans specified in its port properites.
1517  */
1518 static void
1519 vgen_vlan_add_ids(vgen_port_t *portp)
1520 {
1521         int             rv;
1522         int             i;
1523 
1524         rv = mod_hash_insert(portp->vlan_hashp,
1525             (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1526             (mod_hash_val_t)B_TRUE);
1527         ASSERT(rv == 0);
1528 
1529         for (i = 0; i < portp->nvids; i++) {
1530                 rv = mod_hash_insert(portp->vlan_hashp,
1531                     (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1532                     (mod_hash_val_t)B_TRUE);
1533                 ASSERT(rv == 0);
1534         }
1535 }
1536 
1537 /*
1538  * Remove a port from the vlans it has been assigned to.
1539  */
1540 static void
1541 vgen_vlan_remove_ids(vgen_port_t *portp)
1542 {
1543         int             rv;
1544         int             i;
1545         mod_hash_val_t  vp;
1546 
1547         rv = mod_hash_remove(portp->vlan_hashp,
1548             (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1549             (mod_hash_val_t *)&vp);
1550         ASSERT(rv == 0);
1551 
1552         for (i = 0; i < portp->nvids; i++) {
1553                 rv = mod_hash_remove(portp->vlan_hashp,
1554                     (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1555                     (mod_hash_val_t *)&vp);
1556                 ASSERT(rv == 0);
1557         }
1558 }
1559 
1560 /*
1561  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1562  * then the vlan-id is available in the tag; otherwise, its vlan id is
1563  * implicitly obtained from the port-vlan-id of the vnet device.
1564  * The vlan id determined is returned in vidp.
1565  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1566  */
1567 static boolean_t
1568 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1569 {
1570         struct ether_vlan_header        *evhp;
1571 
1572         /* If it's a tagged frame, get the vlan id from vlan header */
1573         if (ehp->ether_type == ETHERTYPE_VLAN) {
1574 
1575                 evhp = (struct ether_vlan_header *)ehp;
1576                 *vidp = VLAN_ID(ntohs(evhp->ether_tci));
1577                 return (B_TRUE);
1578         }
1579 
1580         /* Untagged frame, vlan-id is the pvid of vnet device */
1581         *vidp = vnetp->pvid;
1582         return (B_FALSE);
1583 }
1584 
1585 /*
1586  * Find the given vlan id in the hash table.
1587  * Return: B_TRUE if the id is found; B_FALSE if not found.
1588  */
1589 static boolean_t
1590 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1591 {
1592         int             rv;
1593         mod_hash_val_t  vp;
1594 
1595         rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1596 
1597         if (rv != 0)
1598                 return (B_FALSE);
1599 
1600         return (B_TRUE);
1601 }
1602 
1603 /*
1604  * This function reads "priority-ether-types" property from md. This property
1605  * is used to enable support for priority frames. Applications which need
1606  * guaranteed and timely delivery of certain high priority frames to/from
1607  * a vnet or vsw within ldoms, should configure this property by providing
1608  * the ether type(s) for which the priority facility is needed.
1609  * Normal data frames are delivered over a ldc channel using the descriptor
1610  * ring mechanism which is constrained by factors such as descriptor ring size,
1611  * the rate at which the ring is processed at the peer ldc end point, etc.
1612  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1613  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1614  * descriptor ring path and enables a more reliable and timely delivery of
1615  * frames to the peer.
1616  */
1617 static void
1618 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1619 {
1620         int             rv;
1621         uint16_t        *types;
1622         uint64_t        *data;
1623         int             size;
1624         int             i;
1625         size_t          mblk_sz;
1626 
1627         rv = md_get_prop_data(mdp, node, pri_types_propname,
1628             (uint8_t **)&data, &size);
1629         if (rv != 0) {
1630                 /*
1631                  * Property may not exist if we are running pre-ldoms1.1 f/w.
1632                  * Check if 'vgen_pri_eth_type' has been set in that case.
1633                  */
1634                 if (vgen_pri_eth_type != 0) {
1635                         size = sizeof (vgen_pri_eth_type);
1636                         data = &vgen_pri_eth_type;
1637                 } else {
1638                         DBG2(vgenp, NULL,
1639                             "prop(%s) not found", pri_types_propname);
1640                         size = 0;
1641                 }
1642         }
1643 
1644         if (size == 0) {
1645                 vgenp->pri_num_types = 0;
1646                 return;
1647         }
1648 
1649         /*
1650          * we have some priority-ether-types defined;
1651          * allocate a table of these types and also
1652          * allocate a pool of mblks to transmit these
1653          * priority packets.
1654          */
1655         size /= sizeof (uint64_t);
1656         vgenp->pri_num_types = size;
1657         vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1658         for (i = 0, types = vgenp->pri_types; i < size; i++) {
1659                 types[i] = data[i] & 0xFFFF;
1660         }
1661         mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1662         (void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz, NULL,
1663             &vgenp->pri_tx_vmp);
1664 }
1665 
1666 static void
1667 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1668 {
1669         int             rv;
1670         uint64_t        val;
1671         char            *mtu_propname;
1672 
1673         mtu_propname = vgen_mtu_propname;
1674 
1675         rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1676         if (rv != 0) {
1677                 DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1678                 *mtu = vnet_ethermtu;
1679         } else {
1680 
1681                 *mtu = val & 0xFFFF;
1682                 DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
1683                     vgenp->instance, *mtu);
1684         }
1685 }
1686 
1687 static void
1688 vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
1689         boolean_t *pls)
1690 {
1691         int             rv;
1692         uint64_t        val;
1693         char            *linkpropname;
1694 
1695         linkpropname = vgen_linkprop_propname;
1696 
1697         rv = md_get_prop_val(mdp, node, linkpropname, &val);
1698         if (rv != 0) {
1699                 DWARN(vgenp, NULL, "prop(%s) not found", linkpropname);
1700                 *pls = B_FALSE;
1701         } else {
1702 
1703                 *pls = (val & 0x1) ?  B_TRUE : B_FALSE;
1704                 DBG2(vgenp, NULL, "%s(%d): (%d)\n", linkpropname,
1705                     vgenp->instance, *pls);
1706         }
1707 }
1708 
1709 /* register with MD event generator */
1710 static int
1711 vgen_mdeg_reg(vgen_t *vgenp)
1712 {
1713         mdeg_prop_spec_t        *pspecp;
1714         mdeg_node_spec_t        *parentp;
1715         uint_t                  templatesz;
1716         int                     rv;
1717         mdeg_handle_t           dev_hdl = NULL;
1718         mdeg_handle_t           port_hdl = NULL;
1719 
1720         templatesz = sizeof (vgen_prop_template);
1721         pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1722         if (pspecp == NULL) {
1723                 return (DDI_FAILURE);
1724         }
1725         parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1726         if (parentp == NULL) {
1727                 kmem_free(pspecp, templatesz);
1728                 return (DDI_FAILURE);
1729         }
1730 
1731         bcopy(vgen_prop_template, pspecp, templatesz);
1732 
1733         /*
1734          * NOTE: The instance here refers to the value of "reg" property and
1735          * not the dev_info instance (ddi_get_instance()) of vnet.
1736          */
1737         VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
1738 
1739         parentp->namep = "virtual-device";
1740         parentp->specp = pspecp;
1741 
1742         /* save parentp in vgen_t */
1743         vgenp->mdeg_parentp = parentp;
1744 
1745         /*
1746          * Register an interest in 'virtual-device' nodes with a
1747          * 'name' property of 'network'
1748          */
1749         rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
1750         if (rv != MDEG_SUCCESS) {
1751                 DERR(vgenp, NULL, "mdeg_register failed\n");
1752                 goto mdeg_reg_fail;
1753         }
1754 
1755         /* Register an interest in 'port' nodes */
1756         rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
1757             &port_hdl);
1758         if (rv != MDEG_SUCCESS) {
1759                 DERR(vgenp, NULL, "mdeg_register failed\n");
1760                 goto mdeg_reg_fail;
1761         }
1762 
1763         /* save mdeg handle in vgen_t */
1764         vgenp->mdeg_dev_hdl = dev_hdl;
1765         vgenp->mdeg_port_hdl = port_hdl;
1766 
1767         return (DDI_SUCCESS);
1768 
1769 mdeg_reg_fail:
1770         if (dev_hdl != NULL) {
1771                 (void) mdeg_unregister(dev_hdl);
1772         }
1773         KMEM_FREE(parentp);
1774         kmem_free(pspecp, templatesz);
1775         vgenp->mdeg_parentp = NULL;
1776         return (DDI_FAILURE);
1777 }
1778 
1779 /* unregister with MD event generator */
1780 static void
1781 vgen_mdeg_unreg(vgen_t *vgenp)
1782 {
1783         if (vgenp->mdeg_dev_hdl != NULL) {
1784                 (void) mdeg_unregister(vgenp->mdeg_dev_hdl);
1785                 vgenp->mdeg_dev_hdl = NULL;
1786         }
1787         if (vgenp->mdeg_port_hdl != NULL) {
1788                 (void) mdeg_unregister(vgenp->mdeg_port_hdl);
1789                 vgenp->mdeg_port_hdl = NULL;
1790         }
1791 
1792         if (vgenp->mdeg_parentp != NULL) {
1793                 kmem_free(vgenp->mdeg_parentp->specp,
1794                     sizeof (vgen_prop_template));
1795                 KMEM_FREE(vgenp->mdeg_parentp);
1796                 vgenp->mdeg_parentp = NULL;
1797         }
1798 }
1799 
1800 /* mdeg callback function for the port node */
1801 static int
1802 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
1803 {
1804         int             idx;
1805         int             vsw_idx = -1;
1806         uint64_t        val;
1807         vgen_t          *vgenp;
1808 
1809         if ((resp == NULL) || (cb_argp == NULL)) {
1810                 return (MDEG_FAILURE);
1811         }
1812 
1813         vgenp = (vgen_t *)cb_argp;
1814         DBG1(vgenp, NULL, "enter\n");
1815 
1816         mutex_enter(&vgenp->lock);
1817 
1818         DBG1(vgenp, NULL, "ports: removed(%x), "
1819         "added(%x), updated(%x)\n", resp->removed.nelem,
1820             resp->added.nelem, resp->match_curr.nelem);
1821 
1822         for (idx = 0; idx < resp->removed.nelem; idx++) {
1823                 (void) vgen_remove_port(vgenp, resp->removed.mdp,
1824                     resp->removed.mdep[idx]);
1825         }
1826 
1827         if (vgenp->vsw_portp == NULL) {
1828                 /*
1829                  * find vsw_port and add it first, because other ports need
1830                  * this when adding fdb entry (see vgen_port_init()).
1831                  */
1832                 for (idx = 0; idx < resp->added.nelem; idx++) {
1833                         if (!(md_get_prop_val(resp->added.mdp,
1834                             resp->added.mdep[idx], swport_propname, &val))) {
1835                                 if (val == 0) {
1836                                         /*
1837                                          * This port is connected to the
1838                                          * vsw on service domain.
1839                                          */
1840                                         vsw_idx = idx;
1841                                         if (vgen_add_port(vgenp,
1842                                             resp->added.mdp,
1843                                             resp->added.mdep[idx]) !=
1844                                             DDI_SUCCESS) {
1845                                                 cmn_err(CE_NOTE, "vnet%d Could "
1846                                                     "not initialize virtual "
1847                                                     "switch port.",
1848                                                     vgenp->instance);
1849                                                 mutex_exit(&vgenp->lock);
1850                                                 return (MDEG_FAILURE);
1851                                         }
1852                                         break;
1853                                 }
1854                         }
1855                 }
1856                 if (vsw_idx == -1) {
1857                         DWARN(vgenp, NULL, "can't find vsw_port\n");
1858                         mutex_exit(&vgenp->lock);
1859                         return (MDEG_FAILURE);
1860                 }
1861         }
1862 
1863         for (idx = 0; idx < resp->added.nelem; idx++) {
1864                 if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
1865                         continue;
1866 
1867                 /* If this port can't be added just skip it. */
1868                 (void) vgen_add_port(vgenp, resp->added.mdp,
1869                     resp->added.mdep[idx]);
1870         }
1871 
1872         for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1873                 (void) vgen_update_port(vgenp, resp->match_curr.mdp,
1874                     resp->match_curr.mdep[idx],
1875                     resp->match_prev.mdp,
1876                     resp->match_prev.mdep[idx]);
1877         }
1878 
1879         mutex_exit(&vgenp->lock);
1880         DBG1(vgenp, NULL, "exit\n");
1881         return (MDEG_SUCCESS);
1882 }
1883 
1884 /* mdeg callback function for the vnet node */
1885 static int
1886 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1887 {
1888         vgen_t          *vgenp;
1889         vnet_t          *vnetp;
1890         md_t            *mdp;
1891         mde_cookie_t    node;
1892         uint64_t        inst;
1893         char            *node_name = NULL;
1894 
1895         if ((resp == NULL) || (cb_argp == NULL)) {
1896                 return (MDEG_FAILURE);
1897         }
1898 
1899         vgenp = (vgen_t *)cb_argp;
1900         vnetp = vgenp->vnetp;
1901 
1902         DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
1903             " : prev matched %d", resp->added.nelem, resp->removed.nelem,
1904             resp->match_curr.nelem, resp->match_prev.nelem);
1905 
1906         mutex_enter(&vgenp->lock);
1907 
1908         /*
1909          * We get an initial callback for this node as 'added' after
1910          * registering with mdeg. Note that we would have already gathered
1911          * information about this vnet node by walking MD earlier during attach
1912          * (in vgen_read_mdprops()). So, there is a window where the properties
1913          * of this node might have changed when we get this initial 'added'
1914          * callback. We handle this as if an update occured and invoke the same
1915          * function which handles updates to the properties of this vnet-node
1916          * if any. A non-zero 'match' value indicates that the MD has been
1917          * updated and that a 'network' node is present which may or may not
1918          * have been updated. It is up to the clients to examine their own
1919          * nodes and determine if they have changed.
1920          */
1921         if (resp->added.nelem != 0) {
1922 
1923                 if (resp->added.nelem != 1) {
1924                         cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
1925                             "invalid: %d\n", vnetp->instance,
1926                             resp->added.nelem);
1927                         goto vgen_mdeg_cb_err;
1928                 }
1929 
1930                 mdp = resp->added.mdp;
1931                 node = resp->added.mdep[0];
1932 
1933         } else if (resp->match_curr.nelem != 0) {
1934 
1935                 if (resp->match_curr.nelem != 1) {
1936                         cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
1937                             "invalid: %d\n", vnetp->instance,
1938                             resp->match_curr.nelem);
1939                         goto vgen_mdeg_cb_err;
1940                 }
1941 
1942                 mdp = resp->match_curr.mdp;
1943                 node = resp->match_curr.mdep[0];
1944 
1945         } else {
1946                 goto vgen_mdeg_cb_err;
1947         }
1948 
1949         /* Validate name and instance */
1950         if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1951                 DERR(vgenp, NULL, "unable to get node name\n");
1952                 goto vgen_mdeg_cb_err;
1953         }
1954 
1955         /* is this a virtual-network device? */
1956         if (strcmp(node_name, vnet_propname) != 0) {
1957                 DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
1958                 goto vgen_mdeg_cb_err;
1959         }
1960 
1961         if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1962                 DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
1963                 goto vgen_mdeg_cb_err;
1964         }
1965 
1966         /* is this the right instance of vnet? */
1967         if (inst != vgenp->regprop) {
1968                 DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
1969                 goto vgen_mdeg_cb_err;
1970         }
1971 
1972         vgen_update_md_prop(vgenp, mdp, node);
1973 
1974         mutex_exit(&vgenp->lock);
1975         return (MDEG_SUCCESS);
1976 
1977 vgen_mdeg_cb_err:
1978         mutex_exit(&vgenp->lock);
1979         return (MDEG_FAILURE);
1980 }
1981 
1982 /*
1983  * Check to see if the relevant properties in the specified node have
1984  * changed, and if so take the appropriate action.
1985  */
1986 static void
1987 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1988 {
1989         uint16_t        pvid;
1990         uint16_t        *vids;
1991         uint16_t        nvids;
1992         vnet_t          *vnetp = vgenp->vnetp;
1993         uint32_t        mtu;
1994         boolean_t       pls_update;
1995         enum            { MD_init = 0x1,
1996                             MD_vlans = 0x2,
1997                             MD_mtu = 0x4,
1998                             MD_pls = 0x8 } updated;
1999         int             rv;
2000 
2001         updated = MD_init;
2002 
2003         /* Read the vlan ids */
2004         vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2005             &nvids, NULL);
2006 
2007         /* Determine if there are any vlan id updates */
2008         if ((pvid != vnetp->pvid) ||         /* pvid changed? */
2009             (nvids != vnetp->nvids) ||               /* # of vids changed? */
2010             ((nvids != 0) && (vnetp->nvids != 0) &&  /* vids changed? */
2011             bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2012                 updated |= MD_vlans;
2013         }
2014 
2015         /* Read mtu */
2016         vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2017         if (mtu != vnetp->mtu) {
2018                 if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2019                         updated |= MD_mtu;
2020                 } else {
2021                         cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2022                             " as the specified value:%d is invalid\n",
2023                             vnetp->instance, mtu);
2024                 }
2025         }
2026 
2027         /*
2028          * Read the 'linkprop' property.
2029          */
2030         vgen_linkprop_read(vgenp, mdp, mdex, &pls_update);
2031         if (pls_update != vnetp->pls_update) {
2032                 updated |= MD_pls;
2033         }
2034 
2035         /* Now process the updated props */
2036 
2037         if (updated & MD_vlans) {
2038 
2039                 /* save the new vlan ids */
2040                 vnetp->pvid = pvid;
2041                 if (vnetp->nvids != 0) {
2042                         kmem_free(vnetp->vids,
2043                             sizeof (uint16_t) * vnetp->nvids);
2044                         vnetp->nvids = 0;
2045                 }
2046                 if (nvids != 0) {
2047                         vnetp->nvids = nvids;
2048                         vnetp->vids = vids;
2049                 }
2050 
2051                 /* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2052                 vgen_reset_vlan_unaware_ports(vgenp);
2053 
2054         } else {
2055 
2056                 if (nvids != 0) {
2057                         kmem_free(vids, sizeof (uint16_t) * nvids);
2058                 }
2059         }
2060 
2061         if (updated & MD_mtu) {
2062 
2063                 DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2064                     vnetp->mtu, mtu);
2065 
2066                 rv = vnet_mtu_update(vnetp, mtu);
2067                 if (rv == 0) {
2068                         vgenp->max_frame_size = mtu +
2069                             sizeof (struct ether_header) + VLAN_TAGSZ;
2070                 }
2071         }
2072 
2073         if (updated & MD_pls) {
2074                 /* enable/disable physical link state updates */
2075                 vnetp->pls_update = pls_update;
2076                 mutex_exit(&vgenp->lock);
2077 
2078                 /* reset vsw-port to re-negotiate with the updated prop. */
2079                 vgen_reset_vsw_port(vgenp);
2080 
2081                 mutex_enter(&vgenp->lock);
2082         }
2083 }
2084 
2085 /* add a new port to the device */
2086 static int
2087 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2088 {
2089         vgen_port_t     *portp;
2090         int             rv;
2091 
2092         portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2093 
2094         rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2095         if (rv != DDI_SUCCESS) {
2096                 KMEM_FREE(portp);
2097                 return (DDI_FAILURE);
2098         }
2099 
2100         rv = vgen_port_attach(portp);
2101         if (rv != DDI_SUCCESS) {
2102                 return (DDI_FAILURE);
2103         }
2104 
2105         return (DDI_SUCCESS);
2106 }
2107 
2108 /* read properties of the port from its md node */
2109 static int
2110 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2111         mde_cookie_t mdex)
2112 {
2113         uint64_t                port_num;
2114         uint64_t                *ldc_ids;
2115         uint64_t                macaddr;
2116         uint64_t                val;
2117         int                     num_ldcs;
2118         int                     i;
2119         int                     addrsz;
2120         int                     num_nodes = 0;
2121         int                     listsz = 0;
2122         mde_cookie_t            *listp = NULL;
2123         uint8_t                 *addrp;
2124         struct ether_addr       ea;
2125 
2126         /* read "id" property to get the port number */
2127         if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2128                 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2129                 return (DDI_FAILURE);
2130         }
2131 
2132         /*
2133          * Find the channel endpoint node(s) under this port node.
2134          */
2135         if ((num_nodes = md_node_count(mdp)) <= 0) {
2136                 DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2137                     num_nodes);
2138                 return (DDI_FAILURE);
2139         }
2140 
2141         /* allocate space for node list */
2142         listsz = num_nodes * sizeof (mde_cookie_t);
2143         listp = kmem_zalloc(listsz, KM_NOSLEEP);
2144         if (listp == NULL)
2145                 return (DDI_FAILURE);
2146 
2147         num_ldcs = md_scan_dag(mdp, mdex,
2148             md_find_name(mdp, channel_propname),
2149             md_find_name(mdp, "fwd"), listp);
2150 
2151         if (num_ldcs <= 0) {
2152                 DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2153                 kmem_free(listp, listsz);
2154                 return (DDI_FAILURE);
2155         }
2156 
2157         if (num_ldcs > 1) {
2158                 DWARN(vgenp, NULL, "Port %d: Number of channels %d > 1\n",
2159                     port_num, num_ldcs);
2160         }
2161 
2162         ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2163         if (ldc_ids == NULL) {
2164                 kmem_free(listp, listsz);
2165                 return (DDI_FAILURE);
2166         }
2167 
2168         for (i = 0; i < num_ldcs; i++) {
2169                 /* read channel ids */
2170                 if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2171                         DWARN(vgenp, NULL, "prop(%s) not found\n",
2172                             id_propname);
2173                         kmem_free(listp, listsz);
2174                         kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2175                         return (DDI_FAILURE);
2176                 }
2177                 DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2178         }
2179 
2180         kmem_free(listp, listsz);
2181 
2182         if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2183             &addrsz)) {
2184                 DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2185                 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2186                 return (DDI_FAILURE);
2187         }
2188 
2189         if (addrsz < ETHERADDRL) {
2190                 DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2191                 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2192                 return (DDI_FAILURE);
2193         }
2194 
2195         macaddr = *((uint64_t *)addrp);
2196 
2197         DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2198 
2199         for (i = ETHERADDRL - 1; i >= 0; i--) {
2200                 ea.ether_addr_octet[i] = macaddr & 0xFF;
2201                 macaddr >>= 8;
2202         }
2203 
2204         if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2205                 if (val == 0) {
2206                         /* This port is connected to the vswitch */
2207                         portp->is_vsw_port = B_TRUE;
2208                 } else {
2209                         portp->is_vsw_port = B_FALSE;
2210                 }
2211         }
2212 
2213         /* now update all properties into the port */
2214         portp->vgenp = vgenp;
2215         portp->port_num = port_num;
2216         ether_copy(&ea, &portp->macaddr);
2217         portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2218         bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2219         portp->num_ldcs = num_ldcs;
2220 
2221         /* read vlan id properties of this port node */
2222         vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2223             &portp->vids, &portp->nvids, NULL);
2224 
2225         kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2226 
2227         return (DDI_SUCCESS);
2228 }
2229 
2230 /* remove a port from the device */
2231 static int
2232 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2233 {
2234         uint64_t        port_num;
2235         vgen_port_t     *portp;
2236         vgen_portlist_t *plistp;
2237 
2238         /* read "id" property to get the port number */
2239         if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2240                 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2241                 return (DDI_FAILURE);
2242         }
2243 
2244         plistp = &(vgenp->vgenports);
2245 
2246         WRITE_ENTER(&plistp->rwlock);
2247         portp = vgen_port_lookup(plistp, (int)port_num);
2248         if (portp == NULL) {
2249                 DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2250                 RW_EXIT(&plistp->rwlock);
2251                 return (DDI_FAILURE);
2252         }
2253 
2254         vgen_port_detach_mdeg(portp);
2255         RW_EXIT(&plistp->rwlock);
2256 
2257         return (DDI_SUCCESS);
2258 }
2259 
2260 /* attach a port to the device based on mdeg data */
2261 static int
2262 vgen_port_attach(vgen_port_t *portp)
2263 {
2264         vgen_portlist_t         *plistp;
2265         vgen_t                  *vgenp;
2266         uint64_t                *ldcids;
2267         mac_register_t          *macp;
2268         vio_net_res_type_t      type;
2269         int                     rv;
2270 
2271         ASSERT(portp != NULL);
2272         vgenp = portp->vgenp;
2273         ldcids = portp->ldc_ids;
2274 
2275         DBG2(vgenp, NULL, "port_num(%d), ldcid(%lx)\n",
2276             portp->port_num, ldcids[0]);
2277 
2278         mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2279 
2280         /*
2281          * attach the channel under the port using its channel id;
2282          * note that we only support one channel per port for now.
2283          */
2284         if (vgen_ldc_attach(portp, ldcids[0]) == DDI_FAILURE) {
2285                 vgen_port_detach(portp);
2286                 return (DDI_FAILURE);
2287         }
2288 
2289         /* create vlan id hash table */
2290         vgen_vlan_create_hash(portp);
2291 
2292         if (portp->is_vsw_port == B_TRUE) {
2293                 /* This port is connected to the switch port */
2294                 (void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2295                 type = VIO_NET_RES_LDC_SERVICE;
2296         } else {
2297                 (void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2298                 type = VIO_NET_RES_LDC_GUEST;
2299         }
2300 
2301         if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2302                 vgen_port_detach(portp);
2303                 return (DDI_FAILURE);
2304         }
2305         macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2306         macp->m_driver = portp;
2307         macp->m_dip = vgenp->vnetdip;
2308         macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2309         macp->m_callbacks = &vgen_m_callbacks;
2310         macp->m_min_sdu = 0;
2311         macp->m_max_sdu = ETHERMTU;
2312 
2313         mutex_enter(&portp->lock);
2314         rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2315             portp->macaddr, &portp->vhp, &portp->vcb);
2316         mutex_exit(&portp->lock);
2317         mac_free(macp);
2318 
2319         if (rv == 0) {
2320                 /* link it into the list of ports */
2321                 plistp = &(vgenp->vgenports);
2322                 WRITE_ENTER(&plistp->rwlock);
2323                 vgen_port_list_insert(portp);
2324                 RW_EXIT(&plistp->rwlock);
2325 
2326                 if (portp->is_vsw_port == B_TRUE) {
2327                         /* We now have the vswitch port attached */
2328                         vgenp->vsw_portp = portp;
2329                         (void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
2330                 }
2331         } else {
2332                 DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2333                     portp);
2334                 vgen_port_detach(portp);
2335         }
2336 
2337         DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2338         return (DDI_SUCCESS);
2339 }
2340 
2341 /* detach a port from the device based on mdeg data */
2342 static void
2343 vgen_port_detach_mdeg(vgen_port_t *portp)
2344 {
2345         vgen_t *vgenp = portp->vgenp;
2346 
2347         DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2348 
2349         mutex_enter(&portp->lock);
2350 
2351         /* stop the port if needed */
2352         if (portp->flags & VGEN_STARTED) {
2353                 vgen_port_uninit(portp);
2354                 portp->flags &= ~(VGEN_STARTED);
2355         }
2356 
2357         mutex_exit(&portp->lock);
2358         vgen_port_detach(portp);
2359 
2360         DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2361 }
2362 
2363 static int
2364 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2365         md_t *prev_mdp, mde_cookie_t prev_mdex)
2366 {
2367         uint64_t        cport_num;
2368         uint64_t        pport_num;
2369         vgen_portlist_t *plistp;
2370         vgen_port_t     *portp;
2371         boolean_t       updated_vlans = B_FALSE;
2372         uint16_t        pvid;
2373         uint16_t        *vids;
2374         uint16_t        nvids;
2375 
2376         /*
2377          * For now, we get port updates only if vlan ids changed.
2378          * We read the port num and do some sanity check.
2379          */
2380         if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2381                 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2382                 return (DDI_FAILURE);
2383         }
2384 
2385         if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2386                 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2387                 return (DDI_FAILURE);
2388         }
2389         if (cport_num != pport_num)
2390                 return (DDI_FAILURE);
2391 
2392         plistp = &(vgenp->vgenports);
2393 
2394         READ_ENTER(&plistp->rwlock);
2395 
2396         portp = vgen_port_lookup(plistp, (int)cport_num);
2397         if (portp == NULL) {
2398                 DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2399                 RW_EXIT(&plistp->rwlock);
2400                 return (DDI_FAILURE);
2401         }
2402 
2403         /* Read the vlan ids */
2404         vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2405             &nvids, NULL);
2406 
2407         /* Determine if there are any vlan id updates */
2408         if ((pvid != portp->pvid) ||         /* pvid changed? */
2409             (nvids != portp->nvids) ||               /* # of vids changed? */
2410             ((nvids != 0) && (portp->nvids != 0) &&  /* vids changed? */
2411             bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2412                 updated_vlans = B_TRUE;
2413         }
2414 
2415         if (updated_vlans == B_FALSE) {
2416                 RW_EXIT(&plistp->rwlock);
2417                 return (DDI_FAILURE);
2418         }
2419 
2420         /* remove the port from vlans it has been assigned to */
2421         vgen_vlan_remove_ids(portp);
2422 
2423         /* save the new vlan ids */
2424         portp->pvid = pvid;
2425         if (portp->nvids != 0) {
2426                 kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2427                 portp->nvids = 0;
2428         }
2429         if (nvids != 0) {
2430                 portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2431                 bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2432                 portp->nvids = nvids;
2433                 kmem_free(vids, sizeof (uint16_t) * nvids);
2434         }
2435 
2436         /* add port to the new vlans */
2437         vgen_vlan_add_ids(portp);
2438 
2439         /* reset the port if it is vlan unaware (ver < 1.3) */
2440         vgen_vlan_unaware_port_reset(portp);
2441 
2442         RW_EXIT(&plistp->rwlock);
2443 
2444         return (DDI_SUCCESS);
2445 }
2446 
2447 static uint64_t
2448 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2449 {
2450         return (vgen_ldc_stat(portp->ldcp, stat));
2451 }
2452 
2453 /* attach the channel corresponding to the given ldc_id to the port */
2454 static int
2455 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2456 {
2457         vgen_t          *vgenp;
2458         vgen_ldc_t      *ldcp;
2459         ldc_attr_t      attr;
2460         int             status;
2461         ldc_status_t    istatus;
2462         char            kname[MAXNAMELEN];
2463         int             instance;
2464         enum    {AST_init = 0x0, AST_ldc_alloc = 0x1,
2465                 AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2466                 AST_ldc_reg_cb = 0x8 } attach_state;
2467 
2468         attach_state = AST_init;
2469         vgenp = portp->vgenp;
2470 
2471         ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2472         if (ldcp == NULL) {
2473                 goto ldc_attach_failed;
2474         }
2475         ldcp->ldc_id = ldc_id;
2476         ldcp->portp = portp;
2477 
2478         attach_state |= AST_ldc_alloc;
2479 
2480         mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2481         mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2482         mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2483         mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2484         mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2485         mutex_init(&ldcp->pollq_lock, NULL, MUTEX_DRIVER, NULL);
2486         mutex_init(&ldcp->msg_thr_lock, NULL, MUTEX_DRIVER, NULL);
2487         cv_init(&ldcp->msg_thr_cv, NULL, CV_DRIVER, NULL);
2488 
2489         attach_state |= AST_mutex_init;
2490 
2491         attr.devclass = LDC_DEV_NT;
2492         attr.instance = vgenp->instance;
2493         attr.mode = LDC_MODE_UNRELIABLE;
2494         attr.mtu = vgen_ldc_mtu;
2495         status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2496         if (status != 0) {
2497                 DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2498                 goto ldc_attach_failed;
2499         }
2500         attach_state |= AST_ldc_init;
2501 
2502         status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2503         if (status != 0) {
2504                 DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2505                     status);
2506                 goto ldc_attach_failed;
2507         }
2508         /*
2509          * allocate a message for ldc_read()s, big enough to hold ctrl and
2510          * data msgs, including raw data msgs used to recv priority frames.
2511          */
2512         ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2513         ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2514         attach_state |= AST_ldc_reg_cb;
2515 
2516         (void) ldc_status(ldcp->ldc_handle, &istatus);
2517         ASSERT(istatus == LDC_INIT);
2518         ldcp->ldc_status = istatus;
2519 
2520         /* Setup kstats for the channel */
2521         instance = vgenp->instance;
2522         (void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2523         ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2524         if (ldcp->ksp == NULL) {
2525                 goto ldc_attach_failed;
2526         }
2527 
2528         /* initialize vgen_versions supported */
2529         bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2530         vgen_reset_vnet_proto_ops(ldcp);
2531 
2532         /* Link this channel to the port */
2533         portp->ldcp = ldcp;
2534 
2535         ldcp->link_state = LINK_STATE_UNKNOWN;
2536 #ifdef  VNET_IOC_DEBUG
2537         ldcp->link_down_forced = B_FALSE;
2538 #endif
2539         ldcp->flags |= CHANNEL_ATTACHED;
2540         return (DDI_SUCCESS);
2541 
2542 ldc_attach_failed:
2543         if (attach_state & AST_ldc_reg_cb) {
2544                 (void) ldc_unreg_callback(ldcp->ldc_handle);
2545                 kmem_free(ldcp->ldcmsg, ldcp->msglen);
2546         }
2547 
2548         if (attach_state & AST_ldc_init) {
2549                 (void) ldc_fini(ldcp->ldc_handle);
2550         }
2551         if (attach_state & AST_mutex_init) {
2552                 mutex_destroy(&ldcp->tclock);
2553                 mutex_destroy(&ldcp->txlock);
2554                 mutex_destroy(&ldcp->cblock);
2555                 mutex_destroy(&ldcp->wrlock);
2556                 mutex_destroy(&ldcp->rxlock);
2557                 mutex_destroy(&ldcp->pollq_lock);
2558         }
2559         if (attach_state & AST_ldc_alloc) {
2560                 KMEM_FREE(ldcp);
2561         }
2562         return (DDI_FAILURE);
2563 }
2564 
2565 /* detach a channel from the port */
2566 static void
2567 vgen_ldc_detach(vgen_ldc_t *ldcp)
2568 {
2569         vgen_port_t     *portp;
2570         vgen_t          *vgenp;
2571 
2572         ASSERT(ldcp != NULL);
2573 
2574         portp = ldcp->portp;
2575         vgenp = portp->vgenp;
2576 
2577         if (ldcp->ldc_status != LDC_INIT) {
2578                 DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
2579         }
2580 
2581         if (ldcp->flags & CHANNEL_ATTACHED) {
2582                 ldcp->flags &= ~(CHANNEL_ATTACHED);
2583 
2584                 (void) ldc_unreg_callback(ldcp->ldc_handle);
2585                 (void) ldc_fini(ldcp->ldc_handle);
2586 
2587                 kmem_free(ldcp->ldcmsg, ldcp->msglen);
2588                 vgen_destroy_kstats(ldcp->ksp);
2589                 ldcp->ksp = NULL;
2590                 mutex_destroy(&ldcp->tclock);
2591                 mutex_destroy(&ldcp->txlock);
2592                 mutex_destroy(&ldcp->cblock);
2593                 mutex_destroy(&ldcp->wrlock);
2594                 mutex_destroy(&ldcp->rxlock);
2595                 mutex_destroy(&ldcp->pollq_lock);
2596                 mutex_destroy(&ldcp->msg_thr_lock);
2597                 cv_destroy(&ldcp->msg_thr_cv);
2598 
2599                 KMEM_FREE(ldcp);
2600         }
2601 }
2602 
2603 /* enable transmit/receive on the channel */
2604 static int
2605 vgen_ldc_init(vgen_ldc_t *ldcp)
2606 {
2607         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
2608         ldc_status_t    istatus;
2609         int             rv;
2610         enum            { ST_init = 0x0, ST_ldc_open = 0x1,
2611                             ST_cb_enable = 0x2} init_state;
2612         int             flag = 0;
2613 
2614         init_state = ST_init;
2615 
2616         DBG1(vgenp, ldcp, "enter\n");
2617         LDC_LOCK(ldcp);
2618 
2619         rv = ldc_open(ldcp->ldc_handle);
2620         if (rv != 0) {
2621                 DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
2622                 goto ldcinit_failed;
2623         }
2624         init_state |= ST_ldc_open;
2625 
2626         (void) ldc_status(ldcp->ldc_handle, &istatus);
2627         if (istatus != LDC_OPEN && istatus != LDC_READY) {
2628                 DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
2629                 goto ldcinit_failed;
2630         }
2631         ldcp->ldc_status = istatus;
2632 
2633         rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
2634         if (rv != 0) {
2635                 DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
2636                 goto ldcinit_failed;
2637         }
2638 
2639         init_state |= ST_cb_enable;
2640 
2641         vgen_ldc_up(ldcp);
2642 
2643         (void) ldc_status(ldcp->ldc_handle, &istatus);
2644         if (istatus == LDC_UP) {
2645                 DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
2646         }
2647 
2648         ldcp->ldc_status = istatus;
2649 
2650         ldcp->hphase = VH_PHASE0;
2651         ldcp->hstate = 0;
2652         ldcp->flags |= CHANNEL_STARTED;
2653 
2654         vgen_setup_handshake_params(ldcp);
2655 
2656         /* if channel is already UP - start handshake */
2657         if (istatus == LDC_UP) {
2658                 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2659                 if (ldcp->portp != vgenp->vsw_portp) {
2660                         /*
2661                          * As the channel is up, use this port from now on.
2662                          */
2663                         (void) atomic_swap_32(
2664                             &ldcp->portp->use_vsw_port, B_FALSE);
2665                 }
2666 
2667                 /* Initialize local session id */
2668                 ldcp->local_sid = ddi_get_lbolt();
2669 
2670                 /* clear peer session id */
2671                 ldcp->peer_sid = 0;
2672 
2673                 mutex_exit(&ldcp->tclock);
2674                 mutex_exit(&ldcp->txlock);
2675                 mutex_exit(&ldcp->wrlock);
2676                 mutex_exit(&ldcp->rxlock);
2677                 rv = vgen_handshake(vh_nextphase(ldcp));
2678                 mutex_exit(&ldcp->cblock);
2679                 if (rv != 0) {
2680                         flag = (rv == ECONNRESET) ? VGEN_FLAG_EVT_RESET :
2681                             VGEN_FLAG_NEED_LDCRESET;
2682                         (void) vgen_process_reset(ldcp, flag);
2683                 }
2684         } else {
2685                 LDC_UNLOCK(ldcp);
2686         }
2687 
2688         return (DDI_SUCCESS);
2689 
2690 ldcinit_failed:
2691         if (init_state & ST_cb_enable) {
2692                 (void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
2693         }
2694         if (init_state & ST_ldc_open) {
2695                 (void) ldc_close(ldcp->ldc_handle);
2696         }
2697         LDC_UNLOCK(ldcp);
2698         DBG1(vgenp, ldcp, "exit\n");
2699         return (DDI_FAILURE);
2700 }
2701 
2702 /* stop transmit/receive on the channel */
2703 static void
2704 vgen_ldc_uninit(vgen_ldc_t *ldcp)
2705 {
2706         vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2707 
2708         DBG1(vgenp, ldcp, "enter\n");
2709 
2710         LDC_LOCK(ldcp);
2711 
2712         if ((ldcp->flags & CHANNEL_STARTED) == 0) {
2713                 LDC_UNLOCK(ldcp);
2714                 DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
2715                 return;
2716         }
2717 
2718         LDC_UNLOCK(ldcp);
2719 
2720         while (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
2721                 delay(drv_usectohz(VGEN_LDC_UNINIT_DELAY));
2722         }
2723 
2724         (void) vgen_process_reset(ldcp, VGEN_FLAG_UNINIT);
2725 
2726         DBG1(vgenp, ldcp, "exit\n");
2727 }
2728 
2729 /*
2730  * Create a descriptor ring, that will be exported to the peer for mapping.
2731  */
2732 static int
2733 vgen_create_dring(vgen_ldc_t *ldcp)
2734 {
2735         vgen_hparams_t  *lp = &ldcp->local_hparams;
2736         int             rv;
2737 
2738         if (lp->dring_mode == VIO_RX_DRING_DATA) {
2739                 rv = vgen_create_rx_dring(ldcp);
2740         } else {
2741                 rv = vgen_create_tx_dring(ldcp);
2742         }
2743 
2744         return (rv);
2745 }
2746 
2747 /*
2748  * Destroy the descriptor ring.
2749  */
2750 static void
2751 vgen_destroy_dring(vgen_ldc_t *ldcp)
2752 {
2753         vgen_hparams_t  *lp = &ldcp->local_hparams;
2754 
2755         if (lp->dring_mode == VIO_RX_DRING_DATA) {
2756                 vgen_destroy_rx_dring(ldcp);
2757         } else {
2758                 vgen_destroy_tx_dring(ldcp);
2759         }
2760 }
2761 
2762 /*
2763  * Map the descriptor ring exported by the peer.
2764  */
2765 static int
2766 vgen_map_dring(vgen_ldc_t *ldcp, void *pkt)
2767 {
2768         int             rv;
2769         vgen_hparams_t  *lp = &ldcp->local_hparams;
2770 
2771         if (lp->dring_mode == VIO_RX_DRING_DATA) {
2772                 /*
2773                  * In RxDringData mode, dring that we map in
2774                  * becomes our transmit descriptor ring.
2775                  */
2776                 rv = vgen_map_tx_dring(ldcp, pkt);
2777         } else {
2778 
2779                 /*
2780                  * In TxDring mode, dring that we map in
2781                  * becomes our receive descriptor ring.
2782                  */
2783                 rv = vgen_map_rx_dring(ldcp, pkt);
2784         }
2785 
2786         return (rv);
2787 }
2788 
2789 /*
2790  * Unmap the descriptor ring exported by the peer.
2791  */
2792 static void
2793 vgen_unmap_dring(vgen_ldc_t *ldcp)
2794 {
2795         vgen_hparams_t  *lp = &ldcp->local_hparams;
2796 
2797         if (lp->dring_mode == VIO_RX_DRING_DATA) {
2798                 vgen_unmap_tx_dring(ldcp);
2799         } else {
2800                 vgen_unmap_rx_dring(ldcp);
2801         }
2802 }
2803 
2804 void
2805 vgen_destroy_rxpools(void *arg)
2806 {
2807         vio_mblk_pool_t *poolp = (vio_mblk_pool_t *)arg;
2808         vio_mblk_pool_t *npoolp;
2809 
2810         while (poolp != NULL) {
2811                 npoolp =  poolp->nextp;
2812                 while (vio_destroy_mblks(poolp) != 0) {
2813                         delay(drv_usectohz(vgen_rxpool_cleanup_delay));
2814                 }
2815                 poolp = npoolp;
2816         }
2817 }
2818 
2819 /* get channel statistics */
2820 static uint64_t
2821 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
2822 {
2823         vgen_stats_t    *statsp;
2824         uint64_t        val;
2825 
2826         val = 0;
2827         statsp = &ldcp->stats;
2828         switch (stat) {
2829 
2830         case MAC_STAT_MULTIRCV:
2831                 val = statsp->multircv;
2832                 break;
2833 
2834         case MAC_STAT_BRDCSTRCV:
2835                 val = statsp->brdcstrcv;
2836                 break;
2837 
2838         case MAC_STAT_MULTIXMT:
2839                 val = statsp->multixmt;
2840                 break;
2841 
2842         case MAC_STAT_BRDCSTXMT:
2843                 val = statsp->brdcstxmt;
2844                 break;
2845 
2846         case MAC_STAT_NORCVBUF:
2847                 val = statsp->norcvbuf;
2848                 break;
2849 
2850         case MAC_STAT_IERRORS:
2851                 val = statsp->ierrors;
2852                 break;
2853 
2854         case MAC_STAT_NOXMTBUF:
2855                 val = statsp->noxmtbuf;
2856                 break;
2857 
2858         case MAC_STAT_OERRORS:
2859                 val = statsp->oerrors;
2860                 break;
2861 
2862         case MAC_STAT_COLLISIONS:
2863                 break;
2864 
2865         case MAC_STAT_RBYTES:
2866                 val = statsp->rbytes;
2867                 break;
2868 
2869         case MAC_STAT_IPACKETS:
2870                 val = statsp->ipackets;
2871                 break;
2872 
2873         case MAC_STAT_OBYTES:
2874                 val = statsp->obytes;
2875                 break;
2876 
2877         case MAC_STAT_OPACKETS:
2878                 val = statsp->opackets;
2879                 break;
2880 
2881         /* stats not relevant to ldc, return 0 */
2882         case MAC_STAT_IFSPEED:
2883         case ETHER_STAT_ALIGN_ERRORS:
2884         case ETHER_STAT_FCS_ERRORS:
2885         case ETHER_STAT_FIRST_COLLISIONS:
2886         case ETHER_STAT_MULTI_COLLISIONS:
2887         case ETHER_STAT_DEFER_XMTS:
2888         case ETHER_STAT_TX_LATE_COLLISIONS:
2889         case ETHER_STAT_EX_COLLISIONS:
2890         case ETHER_STAT_MACXMT_ERRORS:
2891         case ETHER_STAT_CARRIER_ERRORS:
2892         case ETHER_STAT_TOOLONG_ERRORS:
2893         case ETHER_STAT_XCVR_ADDR:
2894         case ETHER_STAT_XCVR_ID:
2895         case ETHER_STAT_XCVR_INUSE:
2896         case ETHER_STAT_CAP_1000FDX:
2897         case ETHER_STAT_CAP_1000HDX:
2898         case ETHER_STAT_CAP_100FDX:
2899         case ETHER_STAT_CAP_100HDX:
2900         case ETHER_STAT_CAP_10FDX:
2901         case ETHER_STAT_CAP_10HDX:
2902         case ETHER_STAT_CAP_ASMPAUSE:
2903         case ETHER_STAT_CAP_PAUSE:
2904         case ETHER_STAT_CAP_AUTONEG:
2905         case ETHER_STAT_ADV_CAP_1000FDX:
2906         case ETHER_STAT_ADV_CAP_1000HDX:
2907         case ETHER_STAT_ADV_CAP_100FDX:
2908         case ETHER_STAT_ADV_CAP_100HDX:
2909         case ETHER_STAT_ADV_CAP_10FDX:
2910         case ETHER_STAT_ADV_CAP_10HDX:
2911         case ETHER_STAT_ADV_CAP_ASMPAUSE:
2912         case ETHER_STAT_ADV_CAP_PAUSE:
2913         case ETHER_STAT_ADV_CAP_AUTONEG:
2914         case ETHER_STAT_LP_CAP_1000FDX:
2915         case ETHER_STAT_LP_CAP_1000HDX:
2916         case ETHER_STAT_LP_CAP_100FDX:
2917         case ETHER_STAT_LP_CAP_100HDX:
2918         case ETHER_STAT_LP_CAP_10FDX:
2919         case ETHER_STAT_LP_CAP_10HDX:
2920         case ETHER_STAT_LP_CAP_ASMPAUSE:
2921         case ETHER_STAT_LP_CAP_PAUSE:
2922         case ETHER_STAT_LP_CAP_AUTONEG:
2923         case ETHER_STAT_LINK_ASMPAUSE:
2924         case ETHER_STAT_LINK_PAUSE:
2925         case ETHER_STAT_LINK_AUTONEG:
2926         case ETHER_STAT_LINK_DUPLEX:
2927         default:
2928                 val = 0;
2929                 break;
2930 
2931         }
2932         return (val);
2933 }
2934 
2935 /*
2936  * LDC channel is UP, start handshake process with peer.
2937  */
2938 static void
2939 vgen_handle_evt_up(vgen_ldc_t *ldcp)
2940 {
2941         vgen_t  *vgenp = LDC_TO_VGEN(ldcp);
2942 
2943         DBG1(vgenp, ldcp, "enter\n");
2944 
2945         ASSERT(MUTEX_HELD(&ldcp->cblock));
2946 
2947         if (ldcp->portp != vgenp->vsw_portp) {
2948                 /*
2949                  * As the channel is up, use this port from now on.
2950                  */
2951                 (void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
2952         }
2953 
2954         /* Initialize local session id */
2955         ldcp->local_sid = ddi_get_lbolt();
2956 
2957         /* clear peer session id */
2958         ldcp->peer_sid = 0;
2959 
2960         /* Initiate Handshake process with peer ldc endpoint */
2961         (void) vgen_handshake(vh_nextphase(ldcp));
2962 
2963         DBG1(vgenp, ldcp, "exit\n");
2964 }
2965 
2966 /*
2967  * LDC channel is Reset, terminate connection with peer and try to
2968  * bring the channel up again.
2969  */
2970 int
2971 vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller)
2972 {
2973         if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2974                 ASSERT(MUTEX_HELD(&ldcp->cblock));
2975         }
2976 
2977         /* Set the flag to indicate reset is in progress */
2978         if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
2979                 /* another thread is already in the process of resetting */
2980                 return (EBUSY);
2981         }
2982 
2983         if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2984                 mutex_exit(&ldcp->cblock);
2985         }
2986 
2987         (void) vgen_process_reset(ldcp, VGEN_FLAG_EVT_RESET);
2988 
2989         if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2990                 mutex_enter(&ldcp->cblock);
2991         }
2992 
2993         return (0);
2994 }
2995 
2996 /* Interrupt handler for the channel */
2997 static uint_t
2998 vgen_ldc_cb(uint64_t event, caddr_t arg)
2999 {
3000         _NOTE(ARGUNUSED(event))
3001         vgen_ldc_t      *ldcp;
3002         vgen_t          *vgenp;
3003         ldc_status_t    istatus;
3004         vgen_stats_t    *statsp;
3005         uint_t          ret = LDC_SUCCESS;
3006 
3007         ldcp = (vgen_ldc_t *)arg;
3008         vgenp = LDC_TO_VGEN(ldcp);
3009         statsp = &ldcp->stats;
3010 
3011         DBG1(vgenp, ldcp, "enter\n");
3012 
3013         mutex_enter(&ldcp->cblock);
3014         statsp->callbacks++;
3015         if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3016                 DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3017                     ldcp->ldc_status);
3018                 mutex_exit(&ldcp->cblock);
3019                 return (LDC_SUCCESS);
3020         }
3021 
3022         /*
3023          * NOTE: not using switch() as event could be triggered by
3024          * a state change and a read request. Also the ordering of the
3025          * check for the event types is deliberate.
3026          */
3027         if (event & LDC_EVT_UP) {
3028                 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3029                         DWARN(vgenp, ldcp, "ldc_status err\n");
3030                         /* status couldn't be determined */
3031                         ret = LDC_FAILURE;
3032                         goto ldc_cb_ret;
3033                 }
3034                 ldcp->ldc_status = istatus;
3035                 if (ldcp->ldc_status != LDC_UP) {
3036                         DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3037                             " but ldc status is not UP(0x%x)\n",
3038                             ldcp->ldc_status);
3039                         /* spurious interrupt, return success */
3040                         goto ldc_cb_ret;
3041                 }
3042                 DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3043                     event, ldcp->ldc_status);
3044 
3045                 vgen_handle_evt_up(ldcp);
3046 
3047                 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3048         }
3049 
3050         /* Handle RESET/DOWN before READ event */
3051         if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3052                 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3053                         DWARN(vgenp, ldcp, "ldc_status error\n");
3054                         /* status couldn't be determined */
3055                         ret = LDC_FAILURE;
3056                         goto ldc_cb_ret;
3057                 }
3058                 ldcp->ldc_status = istatus;
3059                 DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3060                     event, ldcp->ldc_status);
3061 
3062                 (void) vgen_handle_evt_reset(ldcp, VGEN_LDC_CB);
3063 
3064                 /*
3065                  * As the channel is down/reset, ignore READ event
3066                  * but print a debug warning message.
3067                  */
3068                 if (event & LDC_EVT_READ) {
3069                         DWARN(vgenp, ldcp,
3070                             "LDC_EVT_READ set along with RESET/DOWN\n");
3071                         event &= ~LDC_EVT_READ;
3072                 }
3073         }
3074 
3075         if (event & LDC_EVT_READ) {
3076                 DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3077                     event, ldcp->ldc_status);
3078 
3079                 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3080 
3081                 if (ldcp->msg_thread != NULL) {
3082                         /*
3083                          * If the receive thread is enabled, then
3084                          * wakeup the receive thread to process the
3085                          * LDC messages.
3086                          */
3087                         mutex_exit(&ldcp->cblock);
3088                         mutex_enter(&ldcp->msg_thr_lock);
3089                         if (!(ldcp->msg_thr_flags & VGEN_WTHR_DATARCVD)) {
3090                                 ldcp->msg_thr_flags |= VGEN_WTHR_DATARCVD;
3091                                 cv_signal(&ldcp->msg_thr_cv);
3092                         }
3093                         mutex_exit(&ldcp->msg_thr_lock);
3094                         mutex_enter(&ldcp->cblock);
3095                 } else  {
3096                         (void) vgen_handle_evt_read(ldcp, VGEN_LDC_CB);
3097                 }
3098         }
3099 
3100 ldc_cb_ret:
3101         mutex_exit(&ldcp->cblock);
3102         DBG1(vgenp, ldcp, "exit\n");
3103         return (ret);
3104 }
3105 
3106 int
3107 vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller)
3108 {
3109         int             rv;
3110         uint64_t        *ldcmsg;
3111         size_t          msglen;
3112         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
3113         vio_msg_tag_t   *tagp;
3114         ldc_status_t    istatus;
3115         boolean_t       has_data;
3116 
3117         DBG1(vgenp, ldcp, "enter\n");
3118 
3119         if (caller == VGEN_LDC_CB) {
3120                 ASSERT(MUTEX_HELD(&ldcp->cblock));
3121         } else if (caller == VGEN_MSG_THR) {
3122                 mutex_enter(&ldcp->cblock);
3123         } else {
3124                 return (EINVAL);
3125         }
3126 
3127         ldcmsg = ldcp->ldcmsg;
3128 
3129 vgen_evtread:
3130         do {
3131                 msglen = ldcp->msglen;
3132                 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
3133 
3134                 if (rv != 0) {
3135                         DWARN(vgenp, ldcp, "ldc_read() failed "
3136                             "rv(%d) len(%d)\n", rv, msglen);
3137                         if (rv == ECONNRESET)
3138                                 goto vgen_evtread_error;
3139                         break;
3140                 }
3141                 if (msglen == 0) {
3142                         DBG2(vgenp, ldcp, "ldc_read NODATA");
3143                         break;
3144                 }
3145                 DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
3146 
3147                 tagp = (vio_msg_tag_t *)ldcmsg;
3148 
3149                 if (ldcp->peer_sid) {
3150                         /*
3151                          * check sid only after we have received peer's sid
3152                          * in the version negotiate msg.
3153                          */
3154 #ifdef DEBUG
3155                         if (vgen_inject_error(ldcp, VGEN_ERR_HSID)) {
3156                                 /* simulate bad sid condition */
3157                                 tagp->vio_sid = 0;
3158                                 vgen_inject_err_flag &= ~(VGEN_ERR_HSID);
3159                         }
3160 #endif
3161                         rv = vgen_check_sid(ldcp, tagp);
3162                         if (rv != VGEN_SUCCESS) {
3163                                 /*
3164                                  * If sid mismatch is detected,
3165                                  * reset the channel.
3166                                  */
3167                                 DWARN(vgenp, ldcp, "vgen_check_sid() failed\n");
3168                                 goto vgen_evtread_error;
3169                         }
3170                 }
3171 
3172                 switch (tagp->vio_msgtype) {
3173                 case VIO_TYPE_CTRL:
3174                         rv = vgen_handle_ctrlmsg(ldcp, tagp);
3175                         if (rv != 0) {
3176                                 DWARN(vgenp, ldcp, "vgen_handle_ctrlmsg()"
3177                                     " failed rv(%d)\n", rv);
3178                         }
3179                         break;
3180 
3181                 case VIO_TYPE_DATA:
3182                         rv = vgen_handle_datamsg(ldcp, tagp, msglen);
3183                         if (rv != 0) {
3184                                 DWARN(vgenp, ldcp, "vgen_handle_datamsg()"
3185                                     " failed rv(%d)\n", rv);
3186                         }
3187                         break;
3188 
3189                 case VIO_TYPE_ERR:
3190                         vgen_handle_errmsg(ldcp, tagp);
3191                         break;
3192 
3193                 default:
3194                         DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
3195                             tagp->vio_msgtype);
3196                         break;
3197                 }
3198 
3199                 /*
3200                  * If an error is encountered, stop processing and
3201                  * handle the error.
3202                  */
3203                 if (rv != 0) {
3204                         goto vgen_evtread_error;
3205                 }
3206 
3207         } while (msglen);
3208 
3209         /* check once more before exiting */
3210         rv = ldc_chkq(ldcp->ldc_handle, &has_data);
3211         if ((rv == 0) && (has_data == B_TRUE)) {
3212                 DTRACE_PROBE1(vgen_chkq, vgen_ldc_t *, ldcp);
3213                 goto vgen_evtread;
3214         }
3215 
3216 vgen_evtread_error:
3217         if (rv != 0) {
3218                 /*
3219                  * We handle the error and then return the error value. If we
3220                  * are running in the context of the msg worker, the error
3221                  * tells the worker thread to exit, as the channel would have
3222                  * been reset.
3223                  */
3224                 if (rv == ECONNRESET) {
3225                         if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3226                                 DWARN(vgenp, ldcp, "ldc_status err\n");
3227                         } else {
3228                                 ldcp->ldc_status = istatus;
3229                         }
3230                         (void) vgen_handle_evt_reset(ldcp, caller);
3231                 } else {
3232                         DWARN(vgenp, ldcp, "Calling vgen_ldc_reset()...\n");
3233                         (void) vgen_ldc_reset(ldcp, caller);
3234                 }
3235         }
3236 
3237         if (caller == VGEN_MSG_THR) {
3238                 mutex_exit(&ldcp->cblock);
3239         }
3240 
3241         DBG1(vgenp, ldcp, "exit\n");
3242         return (rv);
3243 }
3244 
3245 /* vgen handshake functions */
3246 
3247 /* change the hphase for the channel to the next phase */
3248 static vgen_ldc_t *
3249 vh_nextphase(vgen_ldc_t *ldcp)
3250 {
3251         if (ldcp->hphase == VH_PHASE4) {
3252                 ldcp->hphase = VH_DONE;
3253         } else {
3254                 ldcp->hphase++;
3255         }
3256         return (ldcp);
3257 }
3258 
3259 /* send version negotiate message to the peer over ldc */
3260 static int
3261 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
3262 {
3263         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
3264         vio_ver_msg_t   vermsg;
3265         vio_msg_tag_t   *tagp = &vermsg.tag;
3266         int             rv;
3267 
3268         bzero(&vermsg, sizeof (vermsg));
3269 
3270         tagp->vio_msgtype = VIO_TYPE_CTRL;
3271         tagp->vio_subtype = VIO_SUBTYPE_INFO;
3272         tagp->vio_subtype_env = VIO_VER_INFO;
3273         tagp->vio_sid = ldcp->local_sid;
3274 
3275         /* get version msg payload from ldcp->local */
3276         vermsg.ver_major = ldcp->local_hparams.ver_major;
3277         vermsg.ver_minor = ldcp->local_hparams.ver_minor;
3278         vermsg.dev_class = ldcp->local_hparams.dev_class;
3279 
3280         rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
3281         if (rv != VGEN_SUCCESS) {
3282                 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3283                 return (rv);
3284         }
3285 
3286         ldcp->hstate |= VER_INFO_SENT;
3287         DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
3288             vermsg.ver_major, vermsg.ver_minor);
3289 
3290         return (VGEN_SUCCESS);
3291 }
3292 
3293 /* send attr info message to the peer over ldc */
3294 static int
3295 vgen_send_attr_info(vgen_ldc_t *ldcp)
3296 {
3297         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
3298         vnet_attr_msg_t attrmsg;
3299         vio_msg_tag_t   *tagp = &attrmsg.tag;
3300         int             rv;
3301 
3302         bzero(&attrmsg, sizeof (attrmsg));
3303 
3304         tagp->vio_msgtype = VIO_TYPE_CTRL;
3305         tagp->vio_subtype = VIO_SUBTYPE_INFO;
3306         tagp->vio_subtype_env = VIO_ATTR_INFO;
3307         tagp->vio_sid = ldcp->local_sid;
3308 
3309         /* get attr msg payload from ldcp->local */
3310         attrmsg.mtu = ldcp->local_hparams.mtu;
3311         attrmsg.addr = ldcp->local_hparams.addr;
3312         attrmsg.addr_type = ldcp->local_hparams.addr_type;
3313         attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
3314         attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
3315         attrmsg.physlink_update = ldcp->local_hparams.physlink_update;
3316         attrmsg.options = ldcp->local_hparams.dring_mode;
3317 
3318         rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
3319         if (rv != VGEN_SUCCESS) {
3320                 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3321                 return (rv);
3322         }
3323 
3324         ldcp->hstate |= ATTR_INFO_SENT;
3325         DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
3326 
3327         return (VGEN_SUCCESS);
3328 }
3329 
3330 /*
3331  * Send descriptor ring register message to the peer over ldc.
3332  * Invoked in RxDringData mode.
3333  */
3334 static int
3335 vgen_send_rx_dring_reg(vgen_ldc_t *ldcp)
3336 {
3337         vgen_t                  *vgenp = LDC_TO_VGEN(ldcp);
3338         vio_dring_reg_msg_t     *msg;
3339         vio_dring_reg_ext_msg_t *emsg;
3340         int                     rv;
3341         uint8_t                 *buf;
3342         uint_t                  msgsize;
3343 
3344         msgsize = VNET_DRING_REG_EXT_MSG_SIZE(ldcp->rx_data_ncookies);
3345         msg = kmem_zalloc(msgsize, KM_SLEEP);
3346 
3347         /* Initialize the common part of dring reg msg */
3348         vgen_init_dring_reg_msg(ldcp, msg, VIO_RX_DRING_DATA);
3349 
3350         /* skip over dring cookies at the tail of common section */
3351         buf = (uint8_t *)msg->cookie;
3352         ASSERT(msg->ncookies == 1);
3353         buf += (msg->ncookies * sizeof (ldc_mem_cookie_t));
3354 
3355         /* Now setup the extended part, specific to RxDringData mode */
3356         emsg = (vio_dring_reg_ext_msg_t *)buf;
3357 
3358         /* copy data_ncookies in the msg */
3359         emsg->data_ncookies = ldcp->rx_data_ncookies;
3360 
3361         /* copy data area size in the msg */
3362         emsg->data_area_size = ldcp->rx_data_sz;
3363 
3364         /* copy data area cookies in the msg */
3365         bcopy(ldcp->rx_data_cookie, (ldc_mem_cookie_t *)emsg->data_cookie,
3366             sizeof (ldc_mem_cookie_t) * ldcp->rx_data_ncookies);
3367 
3368         rv = vgen_sendmsg(ldcp, (caddr_t)msg, msgsize, B_FALSE);
3369         if (rv != VGEN_SUCCESS) {
3370                 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3371                 kmem_free(msg, msgsize);
3372                 return (rv);
3373         }
3374 
3375         ldcp->hstate |= DRING_INFO_SENT;
3376         DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
3377 
3378         kmem_free(msg, msgsize);
3379         return (VGEN_SUCCESS);
3380 }
3381 
3382 /*
3383  * Send descriptor ring register message to the peer over ldc.
3384  * Invoked in TxDring mode.
3385  */
3386 static int
3387 vgen_send_tx_dring_reg(vgen_ldc_t *ldcp)
3388 {
3389         vgen_t                  *vgenp = LDC_TO_VGEN(ldcp);
3390         vio_dring_reg_msg_t     msg;
3391         int                     rv;
3392 
3393         bzero(&msg, sizeof (msg));
3394 
3395         /*
3396          * Initialize only the common part of dring reg msg in TxDring mode.
3397          */
3398         vgen_init_dring_reg_msg(ldcp, &msg, VIO_TX_DRING);
3399 
3400         rv = vgen_sendmsg(ldcp, (caddr_t)&msg, sizeof (msg), B_FALSE);
3401         if (rv != VGEN_SUCCESS) {
3402                 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3403                 return (rv);
3404         }
3405 
3406         ldcp->hstate |= DRING_INFO_SENT;
3407         DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
3408 
3409         return (VGEN_SUCCESS);
3410 }
3411 
3412 static int
3413 vgen_send_rdx_info(vgen_ldc_t *ldcp)
3414 {
3415         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
3416         vio_rdx_msg_t   rdxmsg;
3417         vio_msg_tag_t   *tagp = &rdxmsg.tag;
3418         int             rv;
3419 
3420         bzero(&rdxmsg, sizeof (rdxmsg));
3421 
3422         tagp->vio_msgtype = VIO_TYPE_CTRL;
3423         tagp->vio_subtype = VIO_SUBTYPE_INFO;
3424         tagp->vio_subtype_env = VIO_RDX;
3425         tagp->vio_sid = ldcp->local_sid;
3426 
3427         rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
3428         if (rv != VGEN_SUCCESS) {
3429                 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3430                 return (rv);
3431         }
3432 
3433         ldcp->hstate |= RDX_INFO_SENT;
3434         DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
3435 
3436         return (VGEN_SUCCESS);
3437 }
3438 
3439 /* send multicast addr info message to vsw */
3440 static int
3441 vgen_send_mcast_info(vgen_ldc_t *ldcp)
3442 {
3443         vnet_mcast_msg_t        mcastmsg;
3444         vnet_mcast_msg_t        *msgp;
3445         vio_msg_tag_t           *tagp;
3446         vgen_t                  *vgenp;
3447         struct ether_addr       *mca;
3448         int                     rv;
3449         int                     i;
3450         uint32_t                size;
3451         uint32_t                mccount;
3452         uint32_t                n;
3453 
3454         msgp = &mcastmsg;
3455         tagp = &msgp->tag;
3456         vgenp = LDC_TO_VGEN(ldcp);
3457 
3458         mccount = vgenp->mccount;
3459         i = 0;
3460 
3461         do {
3462                 tagp->vio_msgtype = VIO_TYPE_CTRL;
3463                 tagp->vio_subtype = VIO_SUBTYPE_INFO;
3464                 tagp->vio_subtype_env = VNET_MCAST_INFO;
3465                 tagp->vio_sid = ldcp->local_sid;
3466 
3467                 n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
3468                 size = n * sizeof (struct ether_addr);
3469 
3470                 mca = &(vgenp->mctab[i]);
3471                 bcopy(mca, (msgp->mca), size);
3472                 msgp->set = B_TRUE;
3473                 msgp->count = n;
3474 
3475                 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
3476                     B_FALSE);
3477                 if (rv != VGEN_SUCCESS) {
3478                         DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
3479                         return (rv);
3480                 }
3481 
3482                 mccount -= n;
3483                 i += n;
3484 
3485         } while (mccount);
3486 
3487         return (VGEN_SUCCESS);
3488 }
3489 
3490 /*
3491  * vgen_dds_rx -- post DDS messages to vnet.
3492  */
3493 static int
3494 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3495 {
3496         vio_dds_msg_t   *dmsg = (vio_dds_msg_t *)tagp;
3497         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
3498 
3499         if (dmsg->dds_class != DDS_VNET_NIU) {
3500                 DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
3501                 return (EBADMSG);
3502         }
3503         vnet_dds_rx(vgenp->vnetp, dmsg);
3504         return (0);
3505 }
3506 
3507 /*
3508  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
3509  */
3510 int
3511 vgen_dds_tx(void *arg, void *msg)
3512 {
3513         vgen_t          *vgenp = arg;
3514         vio_dds_msg_t   *dmsg = msg;
3515         vgen_portlist_t *plistp = &vgenp->vgenports;
3516         vgen_ldc_t      *ldcp;
3517         int             rv = EIO;
3518 
3519         READ_ENTER(&plistp->rwlock);
3520         ldcp = vgenp->vsw_portp->ldcp;
3521         if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
3522                 goto vgen_dsend_exit;
3523         }
3524 
3525         dmsg->tag.vio_sid = ldcp->local_sid;
3526         rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
3527         if (rv != VGEN_SUCCESS) {
3528                 rv = EIO;
3529         } else {
3530                 rv = 0;
3531         }
3532 
3533 vgen_dsend_exit:
3534         RW_EXIT(&plistp->rwlock);
3535         return (rv);
3536 
3537 }
3538 
3539 /* Initiate Phase 2 of handshake */
3540 static int
3541 vgen_handshake_phase2(vgen_ldc_t *ldcp)
3542 {
3543         int     rv;
3544 
3545 #ifdef DEBUG
3546         if (vgen_inject_error(ldcp, VGEN_ERR_HSTATE)) {
3547                 /* simulate out of state condition */
3548                 vgen_inject_err_flag &= ~(VGEN_ERR_HSTATE);
3549                 rv = vgen_send_rdx_info(ldcp);
3550                 return (rv);
3551         }
3552         if (vgen_inject_error(ldcp, VGEN_ERR_HTIMEOUT)) {
3553                 /* simulate timeout condition */
3554                 vgen_inject_err_flag &= ~(VGEN_ERR_HTIMEOUT);
3555                 return (VGEN_SUCCESS);
3556         }
3557 #endif
3558         rv = vgen_send_attr_info(ldcp);
3559         if (rv != VGEN_SUCCESS) {
3560                 return (rv);
3561         }
3562 
3563         return (VGEN_SUCCESS);
3564 }
3565 
3566 static int
3567 vgen_handshake_phase3(vgen_ldc_t *ldcp)
3568 {
3569         int             rv;
3570         vgen_hparams_t  *lp = &ldcp->local_hparams;
3571         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
3572         vgen_stats_t    *statsp = &ldcp->stats;
3573 
3574         /* dring mode has been negotiated in attr phase; save in stats */
3575         statsp->dring_mode = lp->dring_mode;
3576 
3577         if (lp->dring_mode == VIO_RX_DRING_DATA) {   /* RxDringData mode */
3578                 ldcp->rx_dringdata = vgen_handle_dringdata_shm;
3579                 ldcp->tx_dringdata = vgen_dringsend_shm;
3580                 if (!VGEN_PRI_ETH_DEFINED(vgenp)) {
3581                         /*
3582                          * If priority frames are not in use, we don't need a
3583                          * separate wrapper function for 'tx', so we set it to
3584                          * 'tx_dringdata'. If priority frames are configured,
3585                          * we leave the 'tx' pointer as is (initialized in
3586                          * vgen_set_vnet_proto_ops()).
3587                          */
3588                         ldcp->tx = ldcp->tx_dringdata;
3589                 }
3590         } else {                                        /* TxDring mode */
3591                 ldcp->msg_thread = thread_create(NULL,
3592                     2 * DEFAULTSTKSZ, vgen_ldc_msg_worker, ldcp, 0,
3593                     &p0, TS_RUN, maxclsyspri);
3594         }
3595 
3596         rv = vgen_create_dring(ldcp);
3597         if (rv != VGEN_SUCCESS) {
3598                 return (rv);
3599         }
3600 
3601         /* update local dring_info params */
3602         if (lp->dring_mode == VIO_RX_DRING_DATA) {
3603                 bcopy(&(ldcp->rx_dring_cookie),
3604                     &(ldcp->local_hparams.dring_cookie),
3605                     sizeof (ldc_mem_cookie_t));
3606                 ldcp->local_hparams.dring_ncookies = ldcp->rx_dring_ncookies;
3607                 ldcp->local_hparams.num_desc = ldcp->num_rxds;
3608                 ldcp->local_hparams.desc_size =
3609                     sizeof (vnet_rx_dringdata_desc_t);
3610                 rv = vgen_send_rx_dring_reg(ldcp);
3611         } else {
3612                 bcopy(&(ldcp->tx_dring_cookie),
3613                     &(ldcp->local_hparams.dring_cookie),
3614                     sizeof (ldc_mem_cookie_t));
3615                 ldcp->local_hparams.dring_ncookies = ldcp->tx_dring_ncookies;
3616                 ldcp->local_hparams.num_desc = ldcp->num_txds;
3617                 ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
3618                 rv = vgen_send_tx_dring_reg(ldcp);
3619         }
3620 
3621         if (rv != VGEN_SUCCESS) {
3622                 return (rv);
3623         }
3624 
3625         return (VGEN_SUCCESS);
3626 }
3627 
3628 /*
3629  * Set vnet-protocol-version dependent functions based on version.
3630  */
3631 static void
3632 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
3633 {
3634         vgen_hparams_t  *lp = &ldcp->local_hparams;
3635         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
3636 
3637         /*
3638          * Setup the appropriate dring data processing routine and any
3639          * associated thread based on the version.
3640          *
3641          * In versions < 1.6, we only support TxDring mode. In this mode, the
3642          * msg worker thread processes all types of VIO msgs (ctrl and data).
3643          *
3644          * In versions >= 1.6, we also support RxDringData mode. In this mode,
3645          * all msgs including dring data messages are handled directly by the
3646          * callback (intr) thread. The dring data msgs (msgtype: VIO_TYPE_DATA,
3647          * subtype: VIO_SUBTYPE_INFO, subtype_env: VIO_DRING_DATA) can also be
3648          * disabled while the polling thread is active, in which case the
3649          * polling thread processes the rcv descriptor ring.
3650          *
3651          * However, for versions >= 1.6, we can force to only use TxDring mode.
3652          * This could happen if RxDringData mode has been disabled (see
3653          * below) on this guest or on the peer guest. This info is determined
3654          * as part of attr exchange phase of handshake. Hence, we setup these
3655          * pointers for v1.6 after attr msg phase completes during handshake.
3656          */
3657         if (VGEN_VER_GTEQ(ldcp, 1, 6)) {        /* Ver >= 1.6 */
3658                 /*
3659                  * Set data dring mode for vgen_send_attr_info().
3660                  */
3661                 if (vgen_mapin_avail(ldcp) == B_TRUE) {
3662                         lp->dring_mode = (VIO_RX_DRING_DATA | VIO_TX_DRING);
3663                 } else {
3664                         lp->dring_mode = VIO_TX_DRING;
3665                 }
3666         } else {                                /* Ver <= 1.5 */
3667                 lp->dring_mode = VIO_TX_DRING;
3668         }
3669 
3670         if (VGEN_VER_GTEQ(ldcp, 1, 5)) {
3671                 vgen_port_t     *portp = ldcp->portp;
3672                 vnet_t          *vnetp = vgenp->vnetp;
3673                 /*
3674                  * If the version negotiated with vswitch is >= 1.5 (link
3675                  * status update support), set the required bits in our
3676                  * attributes if this vnet device has been configured to get
3677                  * physical link state updates.
3678                  */
3679                 if (portp == vgenp->vsw_portp && vnetp->pls_update == B_TRUE) {
3680                         lp->physlink_update = PHYSLINK_UPDATE_STATE;
3681                 } else {
3682                         lp->physlink_update = PHYSLINK_UPDATE_NONE;
3683                 }
3684         }
3685 
3686         if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
3687                 /*
3688                  * If the version negotiated with peer is >= 1.4(Jumbo Frame
3689                  * Support), set the mtu in our attributes to max_frame_size.
3690                  */
3691                 lp->mtu = vgenp->max_frame_size;
3692         } else  if (VGEN_VER_EQ(ldcp, 1, 3)) {
3693                 /*
3694                  * If the version negotiated with peer is == 1.3 (Vlan Tag
3695                  * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
3696                  */
3697                 lp->mtu = ETHERMAX + VLAN_TAGSZ;
3698         } else {
3699                 vgen_port_t     *portp = ldcp->portp;
3700                 vnet_t          *vnetp = vgenp->vnetp;
3701                 /*
3702                  * Pre-1.3 peers expect max frame size of ETHERMAX.
3703                  * We can negotiate that size with those peers provided the
3704                  * following conditions are true:
3705                  * - Only pvid is defined for our peer and there are no vids.
3706                  * - pvids are equal.
3707                  * If the above conditions are true, then we can send/recv only
3708                  * untagged frames of max size ETHERMAX.
3709                  */
3710                 if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
3711                         lp->mtu = ETHERMAX;
3712                 }
3713         }
3714 
3715         if (VGEN_VER_GTEQ(ldcp, 1, 2)) {        /* Versions >= 1.2 */
3716                 /*
3717                  * Starting v1.2 we support priority frames; so set the
3718                  * dring processing routines and xfer modes based on the
3719                  * version. Note that the dring routines could be changed after
3720                  * attribute handshake phase for versions >= 1.6 (See
3721                  * vgen_handshake_phase3())
3722                  */
3723                 ldcp->tx_dringdata = vgen_dringsend;
3724                 ldcp->rx_dringdata = vgen_handle_dringdata;
3725 
3726                 if (VGEN_PRI_ETH_DEFINED(vgenp)) {
3727                         /*
3728                          * Enable priority routines and pkt mode only if
3729                          * at least one pri-eth-type is specified in MD.
3730                          */
3731                         ldcp->tx = vgen_ldcsend;
3732                         ldcp->rx_pktdata = vgen_handle_pkt_data;
3733 
3734                         /* set xfer mode for vgen_send_attr_info() */
3735                         lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
3736                 } else {
3737                         /* No priority eth types defined in MD */
3738                         ldcp->tx = ldcp->tx_dringdata;
3739                         ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
3740 
3741                         /* Set xfer mode for vgen_send_attr_info() */
3742                         lp->xfer_mode = VIO_DRING_MODE_V1_2;
3743                 }
3744         } else { /* Versions prior to 1.2  */
3745                 vgen_reset_vnet_proto_ops(ldcp);
3746         }
3747 }
3748 
3749 /*
3750  * Reset vnet-protocol-version dependent functions to pre-v1.2.
3751  */
3752 static void
3753 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
3754 {
3755         vgen_hparams_t  *lp = &ldcp->local_hparams;
3756 
3757         ldcp->tx = ldcp->tx_dringdata = vgen_dringsend;
3758         ldcp->rx_dringdata = vgen_handle_dringdata;
3759         ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
3760 
3761         /* set xfer mode for vgen_send_attr_info() */
3762         lp->xfer_mode = VIO_DRING_MODE_V1_0;
3763 }
3764 
3765 static void
3766 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
3767 {
3768         vgen_ldc_t      *ldcp = portp->ldcp;
3769         vgen_t          *vgenp = portp->vgenp;
3770         vnet_t          *vnetp = vgenp->vnetp;
3771         boolean_t       need_reset = B_FALSE;
3772 
3773         mutex_enter(&ldcp->cblock);
3774 
3775         /*
3776          * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
3777          * the connection. See comments in vgen_set_vnet_proto_ops().
3778          */
3779         if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
3780             (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
3781                 need_reset = B_TRUE;
3782         }
3783         mutex_exit(&ldcp->cblock);
3784 
3785         if (need_reset == B_TRUE) {
3786                 (void) vgen_ldc_reset(ldcp, VGEN_OTHER);
3787         }
3788 }
3789 
3790 static void
3791 vgen_port_reset(vgen_port_t *portp)
3792 {
3793         (void) vgen_ldc_reset(portp->ldcp, VGEN_OTHER);
3794 }
3795 
3796 static void
3797 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
3798 {
3799         vgen_port_t     *portp;
3800         vgen_portlist_t *plistp;
3801 
3802         plistp = &(vgenp->vgenports);
3803         READ_ENTER(&plistp->rwlock);
3804 
3805         for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
3806 
3807                 vgen_vlan_unaware_port_reset(portp);
3808 
3809         }
3810 
3811         RW_EXIT(&plistp->rwlock);
3812 }
3813 
3814 static void
3815 vgen_reset_vsw_port(vgen_t *vgenp)
3816 {
3817         vgen_port_t     *portp;
3818 
3819         if ((portp = vgenp->vsw_portp) != NULL) {
3820                 vgen_port_reset(portp);
3821         }
3822 }
3823 
3824 static void
3825 vgen_setup_handshake_params(vgen_ldc_t *ldcp)
3826 {
3827         vgen_t  *vgenp = LDC_TO_VGEN(ldcp);
3828 
3829         /*
3830          * clear local handshake params and initialize.
3831          */
3832         bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
3833 
3834         /* set version to the highest version supported */
3835         ldcp->local_hparams.ver_major =
3836             ldcp->vgen_versions[0].ver_major;
3837         ldcp->local_hparams.ver_minor =
3838             ldcp->vgen_versions[0].ver_minor;
3839         ldcp->local_hparams.dev_class = VDEV_NETWORK;
3840 
3841         /* set attr_info params */
3842         ldcp->local_hparams.mtu = vgenp->max_frame_size;
3843         ldcp->local_hparams.addr =
3844             vnet_macaddr_strtoul(vgenp->macaddr);
3845         ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
3846         ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
3847         ldcp->local_hparams.ack_freq = 0;    /* don't need acks */
3848         ldcp->local_hparams.physlink_update = PHYSLINK_UPDATE_NONE;
3849 
3850         /* reset protocol version specific function pointers */
3851         vgen_reset_vnet_proto_ops(ldcp);
3852         ldcp->local_hparams.dring_ident = 0;
3853         ldcp->local_hparams.dring_ready = B_FALSE;
3854 
3855         /* clear peer_hparams */
3856         bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
3857         ldcp->peer_hparams.dring_ready = B_FALSE;
3858 }
3859 
3860 /*
3861  * Process Channel Reset. We tear down the resources (timers, threads,
3862  * descriptor rings etc) associated with the channel and reinitialize the
3863  * channel based on the flags.
3864  *
3865  * Arguments:
3866  *    ldcp:     The channel being processed.
3867  *
3868  *    flags:
3869  *      VGEN_FLAG_EVT_RESET:
3870  *              A ECONNRESET error occured while doing ldc operations such as
3871  *              ldc_read() or ldc_write(); the channel is already reset and it
3872  *              needs to be handled.
3873  *      VGEN_FLAG_NEED_LDCRESET:
3874  *              Some other errors occured and the error handling code needs to
3875  *              explicitly reset the channel and restart handshake with the
3876  *              peer. The error could be either in ldc operations or other
3877  *              parts of the code such as timeouts or mdeg events etc.
3878  *      VGEN_FLAG_UNINIT:
3879  *              The channel is being torn down; no need to bring up the channel
3880  *              after resetting.
3881  */
3882 static int
3883 vgen_process_reset(vgen_ldc_t *ldcp, int flags)
3884 {
3885         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
3886         vgen_port_t     *portp = ldcp->portp;
3887         vgen_hparams_t  *lp = &ldcp->local_hparams;
3888         boolean_t       is_vsw_port = B_FALSE;
3889         boolean_t       link_update = B_FALSE;
3890         ldc_status_t    istatus;
3891         int             rv;
3892         uint_t          retries = 0;
3893         timeout_id_t    htid = 0;
3894         timeout_id_t    wd_tid = 0;
3895 
3896         if (portp == vgenp->vsw_portp) { /* vswitch port ? */
3897                 is_vsw_port = B_TRUE;
3898         }
3899 
3900         /*
3901          * Report that the channel is being reset; it ensures that any HybridIO
3902          * configuration is torn down before we reset the channel if it is not
3903          * already reset (flags == VGEN_FLAG_NEED_LDCRESET).
3904          */
3905         if (is_vsw_port == B_TRUE) {
3906                 vio_net_report_err_t rep_err = portp->vcb.vio_net_report_err;
3907                 rep_err(portp->vhp, VIO_NET_RES_DOWN);
3908         }
3909 
3910 again:
3911         mutex_enter(&ldcp->cblock);
3912 
3913         /* Clear hstate and hphase */
3914         ldcp->hstate = 0;
3915         ldcp->hphase = VH_PHASE0;
3916         if (flags == VGEN_FLAG_NEED_LDCRESET || flags == VGEN_FLAG_UNINIT) {
3917                 DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
3918                 (void) ldc_down(ldcp->ldc_handle);
3919                 (void) ldc_status(ldcp->ldc_handle, &istatus);
3920                 DWARN(vgenp, ldcp, "Reset Done, ldc_status(%d)\n", istatus);
3921                 ldcp->ldc_status = istatus;
3922 
3923                 if (flags == VGEN_FLAG_UNINIT) {
3924                         /* disable further callbacks */
3925                         rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3926                         if (rv != 0) {
3927                                 DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3928                         }
3929                 }
3930 
3931         } else {
3932                 /* flags == VGEN_FLAG_EVT_RESET */
3933                 DWARN(vgenp, ldcp, "ldc status(%d)\n", ldcp->ldc_status);
3934         }
3935 
3936         /*
3937          * As the connection is now reset, mark the channel
3938          * link_state as 'down' and notify the stack if needed.
3939          */
3940         if (ldcp->link_state != LINK_STATE_DOWN) {
3941                 ldcp->link_state = LINK_STATE_DOWN;
3942 
3943                 if (is_vsw_port == B_TRUE) { /* vswitch port ? */
3944                         /*
3945                          * As the channel link is down, mark physical link also
3946                          * as down. After the channel comes back up and
3947                          * handshake completes, we will get an update on the
3948                          * physlink state from vswitch (if this device has been
3949                          * configured to get phys link updates).
3950                          */
3951                         vgenp->phys_link_state = LINK_STATE_DOWN;
3952                         link_update = B_TRUE;
3953 
3954                 }
3955         }
3956 
3957         if (ldcp->htid != 0) {
3958                 htid = ldcp->htid;
3959                 ldcp->htid = 0;
3960         }
3961 
3962         if (ldcp->wd_tid != 0) {
3963                 wd_tid = ldcp->wd_tid;
3964                 ldcp->wd_tid = 0;
3965         }
3966 
3967         mutex_exit(&ldcp->cblock);
3968 
3969         /* Update link state to the stack */
3970         if (link_update == B_TRUE) {
3971                 vgen_link_update(vgenp, ldcp->link_state);
3972         }
3973 
3974         /*
3975          * As the channel is being reset, redirect traffic to the peer through
3976          * vswitch, until the channel becomes ready to be used again.
3977          */
3978         if (is_vsw_port == B_FALSE && vgenp->vsw_portp != NULL) {
3979                 (void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
3980         }
3981 
3982         /* Cancel handshake watchdog timeout */
3983         if (htid) {
3984                 (void) untimeout(htid);
3985         }
3986 
3987         /* Cancel transmit watchdog timeout */
3988         if (wd_tid) {
3989                 (void) untimeout(wd_tid);
3990         }
3991 
3992         /* Stop the msg worker thread */
3993         if (lp->dring_mode == VIO_TX_DRING && curthread != ldcp->msg_thread) {
3994                 vgen_stop_msg_thread(ldcp);
3995         }
3996 
3997         /* Grab all locks while we tear down tx/rx resources */
3998         LDC_LOCK(ldcp);
3999 
4000         /* Destroy the local dring which is exported to the peer */
4001         vgen_destroy_dring(ldcp);
4002 
4003         /* Unmap the remote dring which is imported from the peer */
4004         vgen_unmap_dring(ldcp);
4005 
4006         /*
4007          * Bring up the channel and restart handshake
4008          * only if the channel is not being torn down.
4009          */
4010         if (flags != VGEN_FLAG_UNINIT) {
4011 
4012                 /* Setup handshake parameters to restart a new handshake */
4013                 vgen_setup_handshake_params(ldcp);
4014 
4015                 /* Bring the channel up */
4016                 vgen_ldc_up(ldcp);
4017 
4018                 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4019                         DWARN(vgenp, ldcp, "ldc_status err\n");
4020                 } else {
4021                         ldcp->ldc_status = istatus;
4022                 }
4023 
4024                 /* If the channel is UP, start handshake */
4025                 if (ldcp->ldc_status == LDC_UP) {
4026 
4027                         if (is_vsw_port == B_FALSE) {
4028                                 /*
4029                                  * Channel is up; use this port from now on.
4030                                  */
4031                                 (void) atomic_swap_32(&portp->use_vsw_port,
4032                                     B_FALSE);
4033                         }
4034 
4035                         /* Initialize local session id */
4036                         ldcp->local_sid = ddi_get_lbolt();
4037 
4038                         /* clear peer session id */
4039                         ldcp->peer_sid = 0;
4040 
4041                         /*
4042                          * Initiate Handshake process with peer ldc endpoint by
4043                          * sending version info vio message. If that fails we
4044                          * go back to the top of this function to process the
4045                          * error again. Note that we can be in this loop for
4046                          * 'vgen_ldc_max_resets' times, after which the channel
4047                          * is not brought up.
4048                          */
4049                         mutex_exit(&ldcp->tclock);
4050                         mutex_exit(&ldcp->txlock);
4051                         mutex_exit(&ldcp->wrlock);
4052                         mutex_exit(&ldcp->rxlock);
4053                         rv = vgen_handshake(vh_nextphase(ldcp));
4054                         mutex_exit(&ldcp->cblock);
4055                         if (rv != 0) {
4056                                 if (rv == ECONNRESET) {
4057                                         flags = VGEN_FLAG_EVT_RESET;
4058                                 } else {
4059                                         flags = VGEN_FLAG_NEED_LDCRESET;
4060                                 }
4061 
4062                                 /*
4063                                  * We still hold 'reset_in_progress'; so we can
4064                                  * just loop back to the top to restart error
4065                                  * processing.
4066                                  */
4067                                 goto again;
4068                         }
4069                 } else {
4070                         LDC_UNLOCK(ldcp);
4071                 }
4072 
4073         } else {        /* flags == VGEN_FLAG_UNINIT */
4074 
4075                 /* Close the channel - retry on EAGAIN */
4076                 while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
4077                         if (++retries > vgen_ldccl_retries) {
4078                                 break;
4079                         }
4080                         drv_usecwait(VGEN_LDC_CLOSE_DELAY);
4081                 }
4082                 if (rv != 0) {
4083                         cmn_err(CE_NOTE,
4084                             "!vnet%d: Error(%d) closing the channel(0x%lx)\n",
4085                             vgenp->instance, rv, ldcp->ldc_id);
4086                 }
4087 
4088                 ldcp->ldc_reset_count = 0;
4089                 ldcp->ldc_status = LDC_INIT;
4090                 ldcp->flags &= ~(CHANNEL_STARTED);
4091 
4092                 LDC_UNLOCK(ldcp);
4093         }
4094 
4095         /* Done processing channel reset; clear the atomic flag */
4096         ldcp->reset_in_progress = 0;
4097         return (0);
4098 }
4099 
4100 /*
4101  * Initiate handshake with the peer by sending various messages
4102  * based on the handshake-phase that the channel is currently in.
4103  */
4104 static int
4105 vgen_handshake(vgen_ldc_t *ldcp)
4106 {
4107         uint32_t        hphase = ldcp->hphase;
4108         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
4109         int             rv = 0;
4110         timeout_id_t    htid;
4111 
4112         switch (hphase) {
4113 
4114         case VH_PHASE1:
4115 
4116                 /*
4117                  * start timer, for entire handshake process, turn this timer
4118                  * off if all phases of handshake complete successfully and
4119                  * hphase goes to VH_DONE(below) or channel is reset due to
4120                  * errors or vgen_ldc_uninit() is invoked(vgen_stop).
4121                  */
4122                 ASSERT(ldcp->htid == 0);
4123                 ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4124                     drv_usectohz(vgen_hwd_interval * MICROSEC));
4125 
4126                 /* Phase 1 involves negotiating the version */
4127                 rv = vgen_send_version_negotiate(ldcp);
4128                 break;
4129 
4130         case VH_PHASE2:
4131                 rv = vgen_handshake_phase2(ldcp);
4132                 break;
4133 
4134         case VH_PHASE3:
4135                 rv = vgen_handshake_phase3(ldcp);
4136                 break;
4137 
4138         case VH_PHASE4:
4139                 rv = vgen_send_rdx_info(ldcp);
4140                 break;
4141 
4142         case VH_DONE:
4143 
4144                 ldcp->ldc_reset_count = 0;
4145 
4146                 DBG1(vgenp, ldcp, "Handshake Done\n");
4147 
4148                 /*
4149                  * The channel is up and handshake is done successfully. Now we
4150                  * can mark the channel link_state as 'up'. We also notify the
4151                  * stack if the channel is connected to vswitch.
4152                  */
4153                 ldcp->link_state = LINK_STATE_UP;
4154 
4155                 if (ldcp->portp == vgenp->vsw_portp) {
4156                         /*
4157                          * If this channel(port) is connected to vsw,
4158                          * need to sync multicast table with vsw.
4159                          */
4160                         rv = vgen_send_mcast_info(ldcp);
4161                         if (rv != VGEN_SUCCESS)
4162                                 break;
4163 
4164                         if (vgenp->pls_negotiated == B_FALSE) {
4165                                 /*
4166                                  * We haven't negotiated with vswitch to get
4167                                  * physical link state updates. We can update
4168                                  * update the stack at this point as the
4169                                  * channel to vswitch is up and the handshake
4170                                  * is done successfully.
4171                                  *
4172                                  * If we have negotiated to get physical link
4173                                  * state updates, then we won't notify the
4174                                  * the stack here; we do that as soon as
4175                                  * vswitch sends us the initial phys link state
4176                                  * (see vgen_handle_physlink_info()).
4177                                  */
4178                                 mutex_exit(&ldcp->cblock);
4179                                 vgen_link_update(vgenp, ldcp->link_state);
4180                                 mutex_enter(&ldcp->cblock);
4181                         }
4182                 }
4183 
4184                 if (ldcp->htid != 0) {
4185                         htid = ldcp->htid;
4186                         ldcp->htid = 0;
4187 
4188                         mutex_exit(&ldcp->cblock);
4189                         (void) untimeout(htid);
4190                         mutex_enter(&ldcp->cblock);
4191                 }
4192 
4193                 /*
4194                  * Check if mac layer should be notified to restart
4195                  * transmissions. This can happen if the channel got
4196                  * reset and while tx_blocked is set.
4197                  */
4198                 mutex_enter(&ldcp->tclock);
4199                 if (ldcp->tx_blocked) {
4200                         vio_net_tx_update_t vtx_update =
4201                             ldcp->portp->vcb.vio_net_tx_update;
4202 
4203                         ldcp->tx_blocked = B_FALSE;
4204                         vtx_update(ldcp->portp->vhp);
4205                 }
4206                 mutex_exit(&ldcp->tclock);
4207 
4208                 /* start transmit watchdog timer */
4209                 ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp,
4210                     drv_usectohz(vgen_txwd_interval * 1000));
4211 
4212                 break;
4213 
4214         default:
4215                 break;
4216         }
4217 
4218         return (rv);
4219 }
4220 
4221 /*
4222  * Check if the current handshake phase has completed successfully and
4223  * return the status.
4224  */
4225 static int
4226 vgen_handshake_done(vgen_ldc_t *ldcp)
4227 {
4228         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
4229         uint32_t        hphase = ldcp->hphase;
4230         int             status = 0;
4231 
4232         switch (hphase) {
4233 
4234         case VH_PHASE1:
4235                 /*
4236                  * Phase1 is done, if version negotiation
4237                  * completed successfully.
4238                  */
4239                 status = ((ldcp->hstate & VER_NEGOTIATED) ==
4240                     VER_NEGOTIATED);
4241                 break;
4242 
4243         case VH_PHASE2:
4244                 /*
4245                  * Phase 2 is done, if attr info
4246                  * has been exchanged successfully.
4247                  */
4248                 status = ((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
4249                     ATTR_INFO_EXCHANGED);
4250                 break;
4251 
4252         case VH_PHASE3:
4253                 /*
4254                  * Phase 3 is done, if dring registration
4255                  * has been exchanged successfully.
4256                  */
4257                 status = ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
4258                     DRING_INFO_EXCHANGED);
4259                 break;
4260 
4261         case VH_PHASE4:
4262                 /* Phase 4 is done, if rdx msg has been exchanged */
4263                 status = ((ldcp->hstate & RDX_EXCHANGED) ==
4264                     RDX_EXCHANGED);
4265                 break;
4266 
4267         default:
4268                 break;
4269         }
4270 
4271         if (status == 0) {
4272                 return (VGEN_FAILURE);
4273         }
4274         DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
4275         return (VGEN_SUCCESS);
4276 }
4277 
4278 /*
4279  * Link State Update Notes:
4280  * The link state of the channel connected to vswitch is reported as the link
4281  * state of the vnet device, by default. If the channel is down or reset, then
4282  * the link state is marked 'down'. If the channel is 'up' *and* handshake
4283  * between the vnet and vswitch is successful, then the link state is marked
4284  * 'up'. If physical network link state is desired, then the vnet device must
4285  * be configured to get physical link updates and the 'linkprop' property
4286  * in the virtual-device MD node indicates this. As part of attribute exchange
4287  * the vnet device negotiates with the vswitch to obtain physical link state
4288  * updates. If it successfully negotiates, vswitch sends an initial physlink
4289  * msg once the handshake is done and further whenever the physical link state
4290  * changes. Currently we don't have mac layer interfaces to report two distinct
4291  * link states - virtual and physical. Thus, if the vnet has been configured to
4292  * get physical link updates, then the link status will be reported as 'up'
4293  * only when both the virtual and physical links are up.
4294  */
4295 static void
4296 vgen_link_update(vgen_t *vgenp, link_state_t link_state)
4297 {
4298         vnet_link_update(vgenp->vnetp, link_state);
4299 }
4300 
4301 /*
4302  * Handle a version info msg from the peer or an ACK/NACK from the peer
4303  * to a version info msg that we sent.
4304  */
4305 static int
4306 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4307 {
4308         vgen_t          *vgenp;
4309         vio_ver_msg_t   *vermsg = (vio_ver_msg_t *)tagp;
4310         int             ack = 0;
4311         int             failed = 0;
4312         int             idx;
4313         vgen_ver_t      *versions = ldcp->vgen_versions;
4314         int             rv = 0;
4315 
4316         vgenp = LDC_TO_VGEN(ldcp);
4317         DBG1(vgenp, ldcp, "enter\n");
4318         switch (tagp->vio_subtype) {
4319         case VIO_SUBTYPE_INFO:
4320 
4321                 /*  Cache sid of peer if this is the first time */
4322                 if (ldcp->peer_sid == 0) {
4323                         DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
4324                             tagp->vio_sid);
4325                         ldcp->peer_sid = tagp->vio_sid;
4326                 }
4327 
4328                 if (ldcp->hphase != VH_PHASE1) {
4329                         /*
4330                          * If we are not already in VH_PHASE1, reset to
4331                          * pre-handshake state, and initiate handshake
4332                          * to the peer too.
4333                          */
4334                         return (EINVAL);
4335                 }
4336 
4337                 ldcp->hstate |= VER_INFO_RCVD;
4338 
4339                 /* save peer's requested values */
4340                 ldcp->peer_hparams.ver_major = vermsg->ver_major;
4341                 ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
4342                 ldcp->peer_hparams.dev_class = vermsg->dev_class;
4343 
4344                 if ((vermsg->dev_class != VDEV_NETWORK) &&
4345                     (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
4346                         /* unsupported dev_class, send NACK */
4347 
4348                         DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4349 
4350                         tagp->vio_subtype = VIO_SUBTYPE_NACK;
4351                         tagp->vio_sid = ldcp->local_sid;
4352                         /* send reply msg back to peer */
4353                         rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4354                             sizeof (*vermsg), B_FALSE);
4355                         if (rv != VGEN_SUCCESS) {
4356                                 return (rv);
4357                         }
4358                         return (VGEN_FAILURE);
4359                 }
4360 
4361                 DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
4362                     vermsg->ver_major,  vermsg->ver_minor);
4363 
4364                 idx = 0;
4365 
4366                 for (;;) {
4367 
4368                         if (vermsg->ver_major > versions[idx].ver_major) {
4369 
4370                                 /* nack with next lower version */
4371                                 tagp->vio_subtype = VIO_SUBTYPE_NACK;
4372                                 vermsg->ver_major = versions[idx].ver_major;
4373                                 vermsg->ver_minor = versions[idx].ver_minor;
4374                                 break;
4375                         }
4376 
4377                         if (vermsg->ver_major == versions[idx].ver_major) {
4378 
4379                                 /* major version match - ACK version */
4380                                 tagp->vio_subtype = VIO_SUBTYPE_ACK;
4381                                 ack = 1;
4382 
4383                                 /*
4384                                  * lower minor version to the one this endpt
4385                                  * supports, if necessary
4386                                  */
4387                                 if (vermsg->ver_minor >
4388                                     versions[idx].ver_minor) {
4389                                         vermsg->ver_minor =
4390                                             versions[idx].ver_minor;
4391                                         ldcp->peer_hparams.ver_minor =
4392                                             versions[idx].ver_minor;
4393                                 }
4394                                 break;
4395                         }
4396 
4397                         idx++;
4398 
4399                         if (idx == VGEN_NUM_VER) {
4400 
4401                                 /* no version match - send NACK */
4402                                 tagp->vio_subtype = VIO_SUBTYPE_NACK;
4403                                 vermsg->ver_major = 0;
4404                                 vermsg->ver_minor = 0;
4405                                 failed = 1;
4406                                 break;
4407                         }
4408 
4409                 }
4410 
4411                 tagp->vio_sid = ldcp->local_sid;
4412 
4413                 /* send reply msg back to peer */
4414                 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
4415                     B_FALSE);
4416                 if (rv != VGEN_SUCCESS) {
4417                         return (rv);
4418                 }
4419 
4420                 if (ack) {
4421                         ldcp->hstate |= VER_ACK_SENT;
4422                         DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
4423                             vermsg->ver_major, vermsg->ver_minor);
4424                 }
4425                 if (failed) {
4426                         DWARN(vgenp, ldcp, "Negotiation Failed\n");
4427                         return (VGEN_FAILURE);
4428                 }
4429                 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4430 
4431                         /*  VER_ACK_SENT and VER_ACK_RCVD */
4432 
4433                         /* local and peer versions match? */
4434                         ASSERT((ldcp->local_hparams.ver_major ==
4435                             ldcp->peer_hparams.ver_major) &&
4436                             (ldcp->local_hparams.ver_minor ==
4437                             ldcp->peer_hparams.ver_minor));
4438 
4439                         vgen_set_vnet_proto_ops(ldcp);
4440 
4441                         /* move to the next phase */
4442                         rv = vgen_handshake(vh_nextphase(ldcp));
4443                         if (rv != 0) {
4444                                 return (rv);
4445                         }
4446                 }
4447 
4448                 break;
4449 
4450         case VIO_SUBTYPE_ACK:
4451 
4452                 if (ldcp->hphase != VH_PHASE1) {
4453                         /*  This should not happen. */
4454                         DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
4455                         return (VGEN_FAILURE);
4456                 }
4457 
4458                 /* SUCCESS - we have agreed on a version */
4459                 ldcp->local_hparams.ver_major = vermsg->ver_major;
4460                 ldcp->local_hparams.ver_minor = vermsg->ver_minor;
4461                 ldcp->hstate |= VER_ACK_RCVD;
4462 
4463                 DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
4464                     vermsg->ver_major,  vermsg->ver_minor);
4465 
4466                 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4467 
4468                         /*  VER_ACK_SENT and VER_ACK_RCVD */
4469 
4470                         /* local and peer versions match? */
4471                         ASSERT((ldcp->local_hparams.ver_major ==
4472                             ldcp->peer_hparams.ver_major) &&
4473                             (ldcp->local_hparams.ver_minor ==
4474                             ldcp->peer_hparams.ver_minor));
4475 
4476                         vgen_set_vnet_proto_ops(ldcp);
4477 
4478                         /* move to the next phase */
4479                         rv = vgen_handshake(vh_nextphase(ldcp));
4480                         if (rv != 0) {
4481                                 return (rv);
4482                         }
4483                 }
4484                 break;
4485 
4486         case VIO_SUBTYPE_NACK:
4487 
4488                 if (ldcp->hphase != VH_PHASE1) {
4489                         /*  This should not happen.  */
4490                         DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
4491                         "Phase(%u)\n", ldcp->hphase);
4492                         return (VGEN_FAILURE);
4493                 }
4494 
4495                 DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
4496                     vermsg->ver_major, vermsg->ver_minor);
4497 
4498                 /* check if version in NACK is zero */
4499                 if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
4500                         /*
4501                          * Version Negotiation has failed.
4502                          */
4503                         DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4504                         return (VGEN_FAILURE);
4505                 }
4506 
4507                 idx = 0;
4508 
4509                 for (;;) {
4510 
4511                         if (vermsg->ver_major > versions[idx].ver_major) {
4512                                 /* select next lower version */
4513 
4514                                 ldcp->local_hparams.ver_major =
4515                                     versions[idx].ver_major;
4516                                 ldcp->local_hparams.ver_minor =
4517                                     versions[idx].ver_minor;
4518                                 break;
4519                         }
4520 
4521                         if (vermsg->ver_major == versions[idx].ver_major) {
4522                                 /* major version match */
4523 
4524                                 ldcp->local_hparams.ver_major =
4525                                     versions[idx].ver_major;
4526 
4527                                 ldcp->local_hparams.ver_minor =
4528                                     versions[idx].ver_minor;
4529                                 break;
4530                         }
4531 
4532                         idx++;
4533 
4534                         if (idx == VGEN_NUM_VER) {
4535                                 /*
4536                                  * no version match.
4537                                  * Version Negotiation has failed.
4538                                  */
4539                                 DWARN(vgenp, ldcp,
4540                                     "Version Negotiation Failed\n");
4541                                 return (VGEN_FAILURE);
4542                         }
4543 
4544                 }
4545 
4546                 rv = vgen_send_version_negotiate(ldcp);
4547                 if (rv != VGEN_SUCCESS) {
4548                         return (rv);
4549                 }
4550 
4551                 break;
4552         }
4553 
4554         DBG1(vgenp, ldcp, "exit\n");
4555         return (VGEN_SUCCESS);
4556 }
4557 
4558 static int
4559 vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4560 {
4561         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
4562         vgen_hparams_t  *lp = &ldcp->local_hparams;
4563         vgen_hparams_t  *rp = &ldcp->peer_hparams;
4564         uint32_t        mtu;
4565         uint8_t         dring_mode;
4566 
4567         ldcp->hstate |= ATTR_INFO_RCVD;
4568 
4569         /* save peer's values */
4570         rp->mtu = msg->mtu;
4571         rp->addr = msg->addr;
4572         rp->addr_type = msg->addr_type;
4573         rp->xfer_mode = msg->xfer_mode;
4574         rp->ack_freq = msg->ack_freq;
4575         rp->dring_mode = msg->options;
4576 
4577         /*
4578          * Process address type, ack frequency and transfer mode attributes.
4579          */
4580         if ((msg->addr_type != ADDR_TYPE_MAC) ||
4581             (msg->ack_freq > 64) ||
4582             (msg->xfer_mode != lp->xfer_mode)) {
4583                 return (VGEN_FAILURE);
4584         }
4585 
4586         /*
4587          * Process dring mode attribute.
4588          */
4589         if (VGEN_VER_GTEQ(ldcp, 1, 6)) {
4590                 /*
4591                  * Versions >= 1.6:
4592                  * Though we are operating in v1.6 mode, it is possible that
4593                  * RxDringData mode has been disabled either on this guest or
4594                  * on the peer guest. If so, we revert to pre v1.6 behavior of
4595                  * TxDring mode. But this must be agreed upon in both
4596                  * directions of attr exchange. We first determine the mode
4597                  * that can be negotiated.
4598                  */
4599                 if ((msg->options & VIO_RX_DRING_DATA) != 0 &&
4600                     vgen_mapin_avail(ldcp) == B_TRUE) {
4601                         /*
4602                          * We are capable of handling RxDringData AND the peer
4603                          * is also capable of it; we enable RxDringData mode on
4604                          * this channel.
4605                          */
4606                         dring_mode = VIO_RX_DRING_DATA;
4607                 } else if ((msg->options & VIO_TX_DRING) != 0) {
4608                         /*
4609                          * If the peer is capable of TxDring mode, we
4610                          * negotiate TxDring mode on this channel.
4611                          */
4612                         dring_mode = VIO_TX_DRING;
4613                 } else {
4614                         /*
4615                          * We support only VIO_TX_DRING and VIO_RX_DRING_DATA
4616                          * modes. We don't support VIO_RX_DRING mode.
4617                          */
4618                         return (VGEN_FAILURE);
4619                 }
4620 
4621                 /*
4622                  * If we have received an ack for the attr info that we sent,
4623                  * then check if the dring mode matches what the peer had ack'd
4624                  * (saved in local hparams). If they don't match, we fail the
4625                  * handshake.
4626                  */
4627                 if (ldcp->hstate & ATTR_ACK_RCVD) {
4628                         if (msg->options != lp->dring_mode) {
4629                                 /* send NACK */
4630                                 return (VGEN_FAILURE);
4631                         }
4632                 } else {
4633                         /*
4634                          * Save the negotiated dring mode in our attr
4635                          * parameters, so it gets sent in the attr info from us
4636                          * to the peer.
4637                          */
4638                         lp->dring_mode = dring_mode;
4639                 }
4640 
4641                 /* save the negotiated dring mode in the msg to be replied */
4642                 msg->options = dring_mode;
4643         }
4644 
4645         /*
4646          * Process MTU attribute.
4647          */
4648         if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4649                 /*
4650                  * Versions >= 1.4:
4651                  * Validate mtu of the peer is at least ETHERMAX. Then, the mtu
4652                  * is negotiated down to the minimum of our mtu and peer's mtu.
4653                  */
4654                 if (msg->mtu < ETHERMAX) {
4655                         return (VGEN_FAILURE);
4656                 }
4657 
4658                 mtu = MIN(msg->mtu, vgenp->max_frame_size);
4659 
4660                 /*
4661                  * If we have received an ack for the attr info
4662                  * that we sent, then check if the mtu computed
4663                  * above matches the mtu that the peer had ack'd
4664                  * (saved in local hparams). If they don't
4665                  * match, we fail the handshake.
4666                  */
4667                 if (ldcp->hstate & ATTR_ACK_RCVD) {
4668                         if (mtu != lp->mtu) {
4669                                 /* send NACK */
4670                                 return (VGEN_FAILURE);
4671                         }
4672                 } else {
4673                         /*
4674                          * Save the mtu computed above in our
4675                          * attr parameters, so it gets sent in
4676                          * the attr info from us to the peer.
4677                          */
4678                         lp->mtu = mtu;
4679                 }
4680 
4681                 /* save the MIN mtu in the msg to be replied */
4682                 msg->mtu = mtu;
4683 
4684         } else {
4685                 /* versions < 1.4, mtu must match */
4686                 if (msg->mtu != lp->mtu) {
4687                         return (VGEN_FAILURE);
4688                 }
4689         }
4690 
4691         return (VGEN_SUCCESS);
4692 }
4693 
4694 static int
4695 vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4696 {
4697         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
4698         vgen_hparams_t  *lp = &ldcp->local_hparams;
4699 
4700         /*
4701          * Process dring mode attribute.
4702          */
4703         if (VGEN_VER_GTEQ(ldcp, 1, 6)) {
4704                 /*
4705                  * Versions >= 1.6:
4706                  * The ack msg sent by the peer contains the negotiated dring
4707                  * mode between our capability (that we had sent in our attr
4708                  * info) and the peer's capability.
4709                  */
4710                 if (ldcp->hstate & ATTR_ACK_SENT) {
4711                         /*
4712                          * If we have sent an ack for the attr info msg from
4713                          * the peer, check if the dring mode that was
4714                          * negotiated then (saved in local hparams) matches the
4715                          * mode that the peer has ack'd. If they don't match,
4716                          * we fail the handshake.
4717                          */
4718                         if (lp->dring_mode != msg->options) {
4719                                 return (VGEN_FAILURE);
4720                         }
4721                 } else {
4722                         if ((msg->options & lp->dring_mode) == 0) {
4723                                 /*
4724                                  * Peer ack'd with a mode that we don't
4725                                  * support; we fail the handshake.
4726                                  */
4727                                 return (VGEN_FAILURE);
4728                         }
4729                         if ((msg->options & (VIO_TX_DRING|VIO_RX_DRING_DATA))
4730                             == (VIO_TX_DRING|VIO_RX_DRING_DATA)) {
4731                                 /*
4732                                  * Peer must ack with only one negotiated mode.
4733                                  * Otherwise fail handshake.
4734                                  */
4735                                 return (VGEN_FAILURE);
4736                         }
4737 
4738                         /*
4739                          * Save the negotiated mode, so we can validate it when
4740                          * we receive attr info from the peer.
4741                          */
4742                         lp->dring_mode = msg->options;
4743                 }
4744         }
4745 
4746         /*
4747          * Process Physical Link Update attribute.
4748          */
4749         if (VGEN_VER_GTEQ(ldcp, 1, 5) &&
4750             ldcp->portp == vgenp->vsw_portp) {
4751                 /*
4752                  * Versions >= 1.5:
4753                  * If the vnet device has been configured to get
4754                  * physical link state updates, check the corresponding
4755                  * bits in the ack msg, if the peer is vswitch.
4756                  */
4757                 if (((lp->physlink_update & PHYSLINK_UPDATE_STATE_MASK) ==
4758                     PHYSLINK_UPDATE_STATE) &&
4759                     ((msg->physlink_update & PHYSLINK_UPDATE_STATE_MASK) ==
4760                     PHYSLINK_UPDATE_STATE_ACK)) {
4761                         vgenp->pls_negotiated = B_TRUE;
4762                 } else {
4763                         vgenp->pls_negotiated = B_FALSE;
4764                 }
4765         }
4766 
4767         /*
4768          * Process MTU attribute.
4769          */
4770         if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4771                 /*
4772                  * Versions >= 1.4:
4773                  * The ack msg sent by the peer contains the minimum of
4774                  * our mtu (that we had sent in our attr info) and the
4775                  * peer's mtu.
4776                  *
4777                  * If we have sent an ack for the attr info msg from
4778                  * the peer, check if the mtu that was computed then
4779                  * (saved in local hparams) matches the mtu that the
4780                  * peer has ack'd. If they don't match, we fail the
4781                  * handshake.
4782                  */
4783                 if (ldcp->hstate & ATTR_ACK_SENT) {
4784                         if (lp->mtu != msg->mtu) {
4785                                 return (VGEN_FAILURE);
4786                         }
4787                 } else {
4788                         /*
4789                          * If the mtu ack'd by the peer is > our mtu
4790                          * fail handshake. Otherwise, save the mtu, so
4791                          * we can validate it when we receive attr info
4792                          * from our peer.
4793                          */
4794                         if (msg->mtu > lp->mtu) {
4795                                 return (VGEN_FAILURE);
4796                         }
4797                         if (msg->mtu <= lp->mtu) {
4798                                 lp->mtu = msg->mtu;
4799                         }
4800                 }
4801         }
4802 
4803         return (VGEN_SUCCESS);
4804 }
4805 
4806 
4807 /*
4808  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
4809  * to an attr info msg that we sent.
4810  */
4811 static int
4812 vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4813 {
4814         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
4815         vnet_attr_msg_t *msg = (vnet_attr_msg_t *)tagp;
4816         int             rv = 0;
4817 
4818         DBG1(vgenp, ldcp, "enter\n");
4819         if (ldcp->hphase != VH_PHASE2) {
4820                 DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
4821                 " Invalid Phase(%u)\n",
4822                     tagp->vio_subtype, ldcp->hphase);
4823                 return (VGEN_FAILURE);
4824         }
4825         switch (tagp->vio_subtype) {
4826         case VIO_SUBTYPE_INFO:
4827 
4828                 rv = vgen_handle_attr_info(ldcp, msg);
4829                 if (rv == VGEN_SUCCESS) {
4830                         tagp->vio_subtype = VIO_SUBTYPE_ACK;
4831                 } else {
4832                         tagp->vio_subtype = VIO_SUBTYPE_NACK;
4833                 }
4834                 tagp->vio_sid = ldcp->local_sid;
4835 
4836                 /* send reply msg back to peer */
4837                 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
4838                     B_FALSE);
4839                 if (rv != VGEN_SUCCESS) {
4840                         return (rv);
4841                 }
4842 
4843                 if (tagp->vio_subtype == VIO_SUBTYPE_NACK) {
4844                         DWARN(vgenp, ldcp, "ATTR_NACK_SENT");
4845                         break;
4846                 }
4847 
4848                 ldcp->hstate |= ATTR_ACK_SENT;
4849                 DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
4850                 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4851                         rv = vgen_handshake(vh_nextphase(ldcp));
4852                         if (rv != 0) {
4853                                 return (rv);
4854                         }
4855                 }
4856 
4857                 break;
4858 
4859         case VIO_SUBTYPE_ACK:
4860 
4861                 rv = vgen_handle_attr_ack(ldcp, msg);
4862                 if (rv == VGEN_FAILURE) {
4863                         break;
4864                 }
4865 
4866                 ldcp->hstate |= ATTR_ACK_RCVD;
4867                 DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
4868 
4869                 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4870                         rv = vgen_handshake(vh_nextphase(ldcp));
4871                         if (rv != 0) {
4872                                 return (rv);
4873                         }
4874                 }
4875                 break;
4876 
4877         case VIO_SUBTYPE_NACK:
4878 
4879                 DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
4880                 return (VGEN_FAILURE);
4881         }
4882         DBG1(vgenp, ldcp, "exit\n");
4883         return (VGEN_SUCCESS);
4884 }
4885 
4886 static int
4887 vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4888 {
4889         int             rv = 0;
4890         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
4891         vgen_hparams_t  *lp = &ldcp->local_hparams;
4892 
4893         DBG2(vgenp, ldcp, "DRING_INFO_RCVD");
4894         ldcp->hstate |= DRING_INFO_RCVD;
4895 
4896         if (VGEN_VER_GTEQ(ldcp, 1, 6) &&
4897             (lp->dring_mode != ((vio_dring_reg_msg_t *)tagp)->options)) {
4898                 /*
4899                  * The earlier version of Solaris vnet driver doesn't set the
4900                  * option (VIO_TX_DRING in its case) correctly in its dring reg
4901                  * message. We workaround that here by doing the check only
4902                  * for versions >= v1.6.
4903                  */
4904                 DWARN(vgenp, ldcp,
4905                     "Rcvd dring reg option (%d), negotiated mode (%d)\n",
4906                     ((vio_dring_reg_msg_t *)tagp)->options, lp->dring_mode);
4907                 return (VGEN_FAILURE);
4908         }
4909 
4910         /*
4911          * Map dring exported by the peer.
4912          */
4913         rv = vgen_map_dring(ldcp, (void *)tagp);
4914         if (rv != VGEN_SUCCESS) {
4915                 return (rv);
4916         }
4917 
4918         /*
4919          * Map data buffers exported by the peer if we are in RxDringData mode.
4920          */
4921         if (lp->dring_mode == VIO_RX_DRING_DATA) {
4922                 rv = vgen_map_data(ldcp, (void *)tagp);
4923                 if (rv != VGEN_SUCCESS) {
4924                         vgen_unmap_dring(ldcp);
4925                         return (rv);
4926                 }
4927         }
4928 
4929         if (ldcp->peer_hparams.dring_ready == B_FALSE) {
4930                 ldcp->peer_hparams.dring_ready = B_TRUE;
4931         }
4932 
4933         return (VGEN_SUCCESS);
4934 }
4935 
4936 static int
4937 vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4938 {
4939         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
4940         vgen_hparams_t  *lp = &ldcp->local_hparams;
4941 
4942         DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
4943         ldcp->hstate |= DRING_ACK_RCVD;
4944 
4945         if (lp->dring_ready) {
4946                 return (VGEN_SUCCESS);
4947         }
4948 
4949         /* save dring_ident acked by peer */
4950         lp->dring_ident = ((vio_dring_reg_msg_t *)tagp)->dring_ident;
4951 
4952         /* local dring is now ready */
4953         lp->dring_ready = B_TRUE;
4954 
4955         return (VGEN_SUCCESS);
4956 }
4957 
4958 /*
4959  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
4960  * the peer to a dring register msg that we sent.
4961  */
4962 static int
4963 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4964 {
4965         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
4966         int             rv = 0;
4967         int             msgsize;
4968         vgen_hparams_t  *lp = &ldcp->local_hparams;
4969 
4970         DBG1(vgenp, ldcp, "enter\n");
4971         if (ldcp->hphase < VH_PHASE2) {
4972                 /* dring_info can be rcvd in any of the phases after Phase1 */
4973                 DWARN(vgenp, ldcp,
4974                     "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
4975                     tagp->vio_subtype, ldcp->hphase);
4976                 return (VGEN_FAILURE);
4977         }
4978 
4979         switch (tagp->vio_subtype) {
4980         case VIO_SUBTYPE_INFO:
4981 
4982                 rv = vgen_handle_dring_reg_info(ldcp, tagp);
4983                 if (rv == VGEN_SUCCESS) {
4984                         tagp->vio_subtype = VIO_SUBTYPE_ACK;
4985                 } else {
4986                         tagp->vio_subtype = VIO_SUBTYPE_NACK;
4987                 }
4988 
4989                 tagp->vio_sid = ldcp->local_sid;
4990 
4991                 if (lp->dring_mode == VIO_RX_DRING_DATA) {
4992                         msgsize =
4993                             VNET_DRING_REG_EXT_MSG_SIZE(ldcp->tx_data_ncookies);
4994                 } else {
4995                         msgsize = sizeof (vio_dring_reg_msg_t);
4996                 }
4997 
4998                 /* send reply msg back to peer */
4999                 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, msgsize,
5000                     B_FALSE);
5001                 if (rv != VGEN_SUCCESS) {
5002                         return (rv);
5003                 }
5004 
5005                 if (tagp->vio_subtype == VIO_SUBTYPE_NACK) {
5006                         DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5007                         return (VGEN_FAILURE);
5008                 }
5009 
5010                 ldcp->hstate |= DRING_ACK_SENT;
5011                 DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5012 
5013                 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5014                         rv = vgen_handshake(vh_nextphase(ldcp));
5015                         if (rv != 0) {
5016                                 return (rv);
5017                         }
5018                 }
5019                 break;
5020 
5021         case VIO_SUBTYPE_ACK:
5022 
5023                 rv = vgen_handle_dring_reg_ack(ldcp, tagp);
5024                 if (rv == VGEN_FAILURE) {
5025                         return (rv);
5026                 }
5027 
5028                 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5029                         rv = vgen_handshake(vh_nextphase(ldcp));
5030                         if (rv != 0) {
5031                                 return (rv);
5032                         }
5033                 }
5034 
5035                 break;
5036 
5037         case VIO_SUBTYPE_NACK:
5038 
5039                 DWARN(vgenp, ldcp, "DRING_NACK_RCVD");
5040                 return (VGEN_FAILURE);
5041         }
5042         DBG1(vgenp, ldcp, "exit\n");
5043         return (VGEN_SUCCESS);
5044 }
5045 
5046 /*
5047  * Handle a rdx info msg from the peer or an ACK/NACK
5048  * from the peer to a rdx info msg that we sent.
5049  */
5050 static int
5051 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5052 {
5053         int     rv = 0;
5054         vgen_t  *vgenp = LDC_TO_VGEN(ldcp);
5055 
5056         DBG1(vgenp, ldcp, "enter\n");
5057         if (ldcp->hphase != VH_PHASE4) {
5058                 DWARN(vgenp, ldcp,
5059                     "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5060                     tagp->vio_subtype, ldcp->hphase);
5061                 return (VGEN_FAILURE);
5062         }
5063         switch (tagp->vio_subtype) {
5064         case VIO_SUBTYPE_INFO:
5065 
5066                 DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5067                 ldcp->hstate |= RDX_INFO_RCVD;
5068 
5069                 tagp->vio_subtype = VIO_SUBTYPE_ACK;
5070                 tagp->vio_sid = ldcp->local_sid;
5071                 /* send reply msg back to peer */
5072                 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5073                     B_FALSE);
5074                 if (rv != VGEN_SUCCESS) {
5075                         return (rv);
5076                 }
5077 
5078                 ldcp->hstate |= RDX_ACK_SENT;
5079                 DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5080 
5081                 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5082                         rv = vgen_handshake(vh_nextphase(ldcp));
5083                         if (rv != 0) {
5084                                 return (rv);
5085                         }
5086                 }
5087 
5088                 break;
5089 
5090         case VIO_SUBTYPE_ACK:
5091 
5092                 ldcp->hstate |= RDX_ACK_RCVD;
5093 
5094                 DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5095 
5096                 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5097                         rv = vgen_handshake(vh_nextphase(ldcp));
5098                         if (rv != 0) {
5099                                 return (rv);
5100                         }
5101                 }
5102                 break;
5103 
5104         case VIO_SUBTYPE_NACK:
5105 
5106                 DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5107                 return (VGEN_FAILURE);
5108         }
5109         DBG1(vgenp, ldcp, "exit\n");
5110         return (VGEN_SUCCESS);
5111 }
5112 
5113 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5114 static int
5115 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5116 {
5117         vgen_t                  *vgenp = LDC_TO_VGEN(ldcp);
5118         vnet_mcast_msg_t        *msgp = (vnet_mcast_msg_t *)tagp;
5119         struct ether_addr       *addrp;
5120         int                     count;
5121         int                     i;
5122 
5123         DBG1(vgenp, ldcp, "enter\n");
5124         switch (tagp->vio_subtype) {
5125 
5126         case VIO_SUBTYPE_INFO:
5127 
5128                 /* vnet shouldn't recv set mcast msg, only vsw handles it */
5129                 DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5130                 break;
5131 
5132         case VIO_SUBTYPE_ACK:
5133 
5134                 /* success adding/removing multicast addr */
5135                 DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5136                 break;
5137 
5138         case VIO_SUBTYPE_NACK:
5139 
5140                 DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5141                 if (!(msgp->set)) {
5142                         /* multicast remove request failed */
5143                         break;
5144                 }
5145 
5146                 /* multicast add request failed */
5147                 for (count = 0; count < msgp->count; count++) {
5148                         addrp = &(msgp->mca[count]);
5149 
5150                         /* delete address from the table */
5151                         for (i = 0; i < vgenp->mccount; i++) {
5152                                 if (ether_cmp(addrp,
5153                                     &(vgenp->mctab[i])) == 0) {
5154                                         if (vgenp->mccount > 1) {
5155                                                 int t = vgenp->mccount - 1;
5156                                                 vgenp->mctab[i] =
5157                                                     vgenp->mctab[t];
5158                                         }
5159                                         vgenp->mccount--;
5160                                         break;
5161                                 }
5162                         }
5163                 }
5164                 break;
5165 
5166         }
5167         DBG1(vgenp, ldcp, "exit\n");
5168 
5169         return (VGEN_SUCCESS);
5170 }
5171 
5172 /*
5173  * Physical link information message from the peer. Only vswitch should send
5174  * us this message; if the vnet device has been configured to get physical link
5175  * state updates. Note that we must have already negotiated this with the
5176  * vswitch during attribute exchange phase of handshake.
5177  */
5178 static int
5179 vgen_handle_physlink_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5180 {
5181         vgen_t                  *vgenp = LDC_TO_VGEN(ldcp);
5182         vnet_physlink_msg_t     *msgp = (vnet_physlink_msg_t *)tagp;
5183         link_state_t            link_state;
5184         int                     rv;
5185 
5186         if (ldcp->portp != vgenp->vsw_portp) {
5187                 /*
5188                  * drop the message and don't process; as we should
5189                  * receive physlink_info message from only vswitch.
5190                  */
5191                 return (VGEN_SUCCESS);
5192         }
5193 
5194         if (vgenp->pls_negotiated == B_FALSE) {
5195                 /*
5196                  * drop the message and don't process; as we should receive
5197                  * physlink_info message only if physlink update is enabled for
5198                  * the device and negotiated with vswitch.
5199                  */
5200                 return (VGEN_SUCCESS);
5201         }
5202 
5203         switch (tagp->vio_subtype) {
5204 
5205         case VIO_SUBTYPE_INFO:
5206 
5207                 if ((msgp->physlink_info & VNET_PHYSLINK_STATE_MASK) ==
5208                     VNET_PHYSLINK_STATE_UP) {
5209                         link_state = LINK_STATE_UP;
5210                 } else {
5211                         link_state = LINK_STATE_DOWN;
5212                 }
5213 
5214                 if (vgenp->phys_link_state != link_state) {
5215                         vgenp->phys_link_state = link_state;
5216                         mutex_exit(&ldcp->cblock);
5217 
5218                         /* Now update the stack */
5219                         vgen_link_update(vgenp, link_state);
5220 
5221                         mutex_enter(&ldcp->cblock);
5222                 }
5223 
5224                 tagp->vio_subtype = VIO_SUBTYPE_ACK;
5225                 tagp->vio_sid = ldcp->local_sid;
5226 
5227                 /* send reply msg back to peer */
5228                 rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5229                     sizeof (vnet_physlink_msg_t), B_FALSE);
5230                 if (rv != VGEN_SUCCESS) {
5231                         return (rv);
5232                 }
5233                 break;
5234 
5235         case VIO_SUBTYPE_ACK:
5236 
5237                 /* vnet shouldn't recv physlink acks */
5238                 DWARN(vgenp, ldcp, "rcvd PHYSLINK_ACK \n");
5239                 break;
5240 
5241         case VIO_SUBTYPE_NACK:
5242 
5243                 /* vnet shouldn't recv physlink nacks */
5244                 DWARN(vgenp, ldcp, "rcvd PHYSLINK_NACK \n");
5245                 break;
5246 
5247         }
5248         DBG1(vgenp, ldcp, "exit\n");
5249 
5250         return (VGEN_SUCCESS);
5251 }
5252 
5253 /* handler for control messages received from the peer ldc end-point */
5254 static int
5255 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5256 {
5257         int     rv = 0;
5258         vgen_t  *vgenp = LDC_TO_VGEN(ldcp);
5259 
5260         DBG1(vgenp, ldcp, "enter\n");
5261         switch (tagp->vio_subtype_env) {
5262 
5263         case VIO_VER_INFO:
5264                 rv = vgen_handle_version_negotiate(ldcp, tagp);
5265                 break;
5266 
5267         case VIO_ATTR_INFO:
5268                 rv = vgen_handle_attr_msg(ldcp, tagp);
5269                 break;
5270 
5271         case VIO_DRING_REG:
5272                 rv = vgen_handle_dring_reg(ldcp, tagp);
5273                 break;
5274 
5275         case VIO_RDX:
5276                 rv = vgen_handle_rdx_info(ldcp, tagp);
5277                 break;
5278 
5279         case VNET_MCAST_INFO:
5280                 rv = vgen_handle_mcast_info(ldcp, tagp);
5281                 break;
5282 
5283         case VIO_DDS_INFO:
5284                 /*
5285                  * If we are in the process of resetting the vswitch channel,
5286                  * drop the dds message. A new handshake will be initiated
5287                  * when the channel comes back up after the reset and dds
5288                  * negotiation can then continue.
5289                  */
5290                 if (ldcp->reset_in_progress == 1) {
5291                         break;
5292                 }
5293                 rv = vgen_dds_rx(ldcp, tagp);
5294                 break;
5295 
5296         case VNET_PHYSLINK_INFO:
5297                 rv = vgen_handle_physlink_info(ldcp, tagp);
5298                 break;
5299         }
5300 
5301         DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5302         return (rv);
5303 }
5304 
5305 /* handler for error messages received from the peer ldc end-point */
5306 static void
5307 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5308 {
5309         _NOTE(ARGUNUSED(ldcp, tagp))
5310 }
5311 
5312 /*
5313  * This function handles raw pkt data messages received over the channel.
5314  * Currently, only priority-eth-type frames are received through this mechanism.
5315  * In this case, the frame(data) is present within the message itself which
5316  * is copied into an mblk before sending it up the stack.
5317  */
5318 void
5319 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
5320 {
5321         vgen_ldc_t              *ldcp = (vgen_ldc_t *)arg1;
5322         vio_raw_data_msg_t      *pkt    = (vio_raw_data_msg_t *)arg2;
5323         uint32_t                size;
5324         mblk_t                  *mp;
5325         vio_mblk_t              *vmp;
5326         vio_net_rx_cb_t         vrx_cb = NULL;
5327         vgen_t                  *vgenp = LDC_TO_VGEN(ldcp);
5328         vgen_stats_t            *statsp = &ldcp->stats;
5329         vgen_hparams_t          *lp = &ldcp->local_hparams;
5330         uint_t                  dring_mode = lp->dring_mode;
5331 
5332         ASSERT(MUTEX_HELD(&ldcp->cblock));
5333 
5334         mutex_exit(&ldcp->cblock);
5335 
5336         size = msglen - VIO_PKT_DATA_HDRSIZE;
5337         if (size < ETHERMIN || size > lp->mtu) {
5338                 (void) atomic_inc_32(&statsp->rx_pri_fail);
5339                 mutex_enter(&ldcp->cblock);
5340                 return;
5341         }
5342 
5343         vmp = vio_multipool_allocb(&ldcp->vmp, size);
5344         if (vmp == NULL) {
5345                 mp = allocb(size, BPRI_MED);
5346                 if (mp == NULL) {
5347                         (void) atomic_inc_32(&statsp->rx_pri_fail);
5348                         DWARN(vgenp, ldcp, "allocb failure, "
5349                             "unable to process priority frame\n");
5350                         mutex_enter(&ldcp->cblock);
5351                         return;
5352                 }
5353         } else {
5354                 mp = vmp->mp;
5355         }
5356 
5357         /* copy the frame from the payload of raw data msg into the mblk */
5358         bcopy(pkt->data, mp->b_rptr, size);
5359         mp->b_wptr = mp->b_rptr + size;
5360 
5361         if (vmp != NULL) {
5362                 vmp->state = VIO_MBLK_HAS_DATA;
5363         }
5364 
5365         /* update stats */
5366         (void) atomic_inc_64(&statsp->rx_pri_packets);
5367         (void) atomic_add_64(&statsp->rx_pri_bytes, size);
5368 
5369         /*
5370          * If polling is currently enabled, add the packet to the priority
5371          * packets list and return. It will be picked up by the polling thread.
5372          */
5373         if (dring_mode == VIO_RX_DRING_DATA) {
5374                 mutex_enter(&ldcp->rxlock);
5375         } else {
5376                 mutex_enter(&ldcp->pollq_lock);
5377         }
5378 
5379         if (ldcp->polling_on == B_TRUE) {
5380                 if (ldcp->rx_pri_tail != NULL) {
5381                         ldcp->rx_pri_tail->b_next = mp;
5382                 } else {
5383                         ldcp->rx_pri_head = ldcp->rx_pri_tail = mp;
5384                 }
5385         } else {
5386                 vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
5387         }
5388 
5389         if (dring_mode == VIO_RX_DRING_DATA) {
5390                 mutex_exit(&ldcp->rxlock);
5391         } else {
5392                 mutex_exit(&ldcp->pollq_lock);
5393         }
5394 
5395         if (vrx_cb != NULL) {
5396                 vrx_cb(ldcp->portp->vhp, mp);
5397         }
5398 
5399         mutex_enter(&ldcp->cblock);
5400 }
5401 
5402 /*
5403  * dummy pkt data handler function for vnet protocol version 1.0
5404  */
5405 static void
5406 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
5407 {
5408         _NOTE(ARGUNUSED(arg1, arg2, msglen))
5409 }
5410 
5411 /* handler for data messages received from the peer ldc end-point */
5412 static int
5413 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5414 {
5415         int             rv = 0;
5416         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
5417         vgen_hparams_t  *lp = &ldcp->local_hparams;
5418 
5419         DBG1(vgenp, ldcp, "enter\n");
5420 
5421         if (ldcp->hphase != VH_DONE) {
5422                 return (0);
5423         }
5424 
5425         /*
5426          * We check the data msg seqnum. This is needed only in TxDring mode.
5427          */
5428         if (lp->dring_mode == VIO_TX_DRING &&
5429             tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5430                 rv = vgen_check_datamsg_seq(ldcp, tagp);
5431                 if (rv != 0) {
5432                         return (rv);
5433                 }
5434         }
5435 
5436         switch (tagp->vio_subtype_env) {
5437         case VIO_DRING_DATA:
5438                 rv = ldcp->rx_dringdata((void *)ldcp, (void *)tagp);
5439                 break;
5440 
5441         case VIO_PKT_DATA:
5442                 ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5443                 break;
5444         default:
5445                 break;
5446         }
5447 
5448         DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5449         return (rv);
5450 }
5451 
5452 
5453 static int
5454 vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller)
5455 {
5456         int     rv;
5457 
5458         if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5459                 ASSERT(MUTEX_HELD(&ldcp->cblock));
5460         }
5461 
5462         /* Set the flag to indicate reset is in progress */
5463         if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
5464                 /* another thread is already in the process of resetting */
5465                 return (EBUSY);
5466         }
5467 
5468         if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5469                 mutex_exit(&ldcp->cblock);
5470         }
5471 
5472         rv = vgen_process_reset(ldcp, VGEN_FLAG_NEED_LDCRESET);
5473 
5474         if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5475                 mutex_enter(&ldcp->cblock);
5476         }
5477 
5478         return (rv);
5479 }
5480 
5481 static void
5482 vgen_ldc_up(vgen_ldc_t *ldcp)
5483 {
5484         int             rv;
5485         uint32_t        retries = 0;
5486         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
5487 
5488         ASSERT(MUTEX_HELD(&ldcp->cblock));
5489 
5490         /*
5491          * If the channel has been reset max # of times, without successfully
5492          * completing handshake, stop and do not bring the channel up.
5493          */
5494         if (ldcp->ldc_reset_count == vgen_ldc_max_resets) {
5495                 cmn_err(CE_WARN, "!vnet%d: exceeded number of permitted"
5496                     " handshake attempts (%d) on channel %ld",
5497                     vgenp->instance, vgen_ldc_max_resets, ldcp->ldc_id);
5498                 return;
5499         }
5500         ldcp->ldc_reset_count++;
5501 
5502         do {
5503                 rv = ldc_up(ldcp->ldc_handle);
5504                 if ((rv != 0) && (rv == EWOULDBLOCK)) {
5505                         drv_usecwait(VGEN_LDC_UP_DELAY);
5506                 }
5507                 if (retries++ >= vgen_ldcup_retries)
5508                         break;
5509         } while (rv == EWOULDBLOCK);
5510 
5511         if (rv != 0) {
5512                 DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
5513         }
5514 }
5515 
5516 int
5517 vgen_enable_intr(void *arg)
5518 {
5519         uint32_t                end_ix;
5520         vio_dring_msg_t         msg;
5521         vgen_port_t             *portp = (vgen_port_t *)arg;
5522         vgen_ldc_t              *ldcp = portp->ldcp;
5523         vgen_hparams_t          *lp = &ldcp->local_hparams;
5524 
5525         if (lp->dring_mode == VIO_RX_DRING_DATA) {
5526                 mutex_enter(&ldcp->rxlock);
5527 
5528                 ldcp->polling_on = B_FALSE;
5529                 /*
5530                  * We send a stopped message to peer (sender) as we are turning
5531                  * off polled mode. This effectively restarts data interrupts
5532                  * by allowing the peer to send further dring data msgs to us.
5533                  */
5534                 end_ix = ldcp->next_rxi;
5535                 DECR_RXI(end_ix, ldcp);
5536                 msg.dring_ident = ldcp->peer_hparams.dring_ident;
5537                 (void) vgen_send_dringack_shm(ldcp, (vio_msg_tag_t *)&msg,
5538                     VNET_START_IDX_UNSPEC, end_ix, VIO_DP_STOPPED);
5539 
5540                 mutex_exit(&ldcp->rxlock);
5541         } else {
5542                 mutex_enter(&ldcp->pollq_lock);
5543                 ldcp->polling_on = B_FALSE;
5544                 mutex_exit(&ldcp->pollq_lock);
5545         }
5546 
5547         return (0);
5548 }
5549 
5550 int
5551 vgen_disable_intr(void *arg)
5552 {
5553         vgen_port_t             *portp = (vgen_port_t *)arg;
5554         vgen_ldc_t              *ldcp = portp->ldcp;
5555         vgen_hparams_t          *lp = &ldcp->local_hparams;
5556 
5557         if (lp->dring_mode == VIO_RX_DRING_DATA) {
5558                 mutex_enter(&ldcp->rxlock);
5559                 ldcp->polling_on = B_TRUE;
5560                 mutex_exit(&ldcp->rxlock);
5561         } else {
5562                 mutex_enter(&ldcp->pollq_lock);
5563                 ldcp->polling_on = B_TRUE;
5564                 mutex_exit(&ldcp->pollq_lock);
5565         }
5566 
5567         return (0);
5568 }
5569 
5570 mblk_t *
5571 vgen_rx_poll(void *arg, int bytes_to_pickup)
5572 {
5573         vgen_port_t             *portp = (vgen_port_t *)arg;
5574         vgen_ldc_t              *ldcp = portp->ldcp;
5575         vgen_hparams_t          *lp = &ldcp->local_hparams;
5576         mblk_t                  *mp = NULL;
5577 
5578         if (lp->dring_mode == VIO_RX_DRING_DATA) {
5579                 mp = vgen_poll_rcv_shm(ldcp, bytes_to_pickup);
5580         } else {
5581                 mp = vgen_poll_rcv(ldcp, bytes_to_pickup);
5582         }
5583 
5584         return (mp);
5585 }
5586 
5587 /* transmit watchdog timeout handler */
5588 static void
5589 vgen_tx_watchdog(void *arg)
5590 {
5591         vgen_ldc_t      *ldcp;
5592         vgen_t          *vgenp;
5593         int             rv;
5594         boolean_t       tx_blocked;
5595         clock_t         tx_blocked_lbolt;
5596 
5597         ldcp = (vgen_ldc_t *)arg;
5598         vgenp = LDC_TO_VGEN(ldcp);
5599 
5600         tx_blocked = ldcp->tx_blocked;
5601         tx_blocked_lbolt = ldcp->tx_blocked_lbolt;
5602 
5603         if (vgen_txwd_timeout &&
5604             (tx_blocked == B_TRUE) &&
5605             ((ddi_get_lbolt() - tx_blocked_lbolt) >
5606             drv_usectohz(vgen_txwd_timeout * 1000))) {
5607                 /*
5608                  * Something is wrong; the peer is not picking up the packets
5609                  * in the transmit dring. We now go ahead and reset the channel
5610                  * to break out of this condition.
5611                  */
5612                 DWARN(vgenp, ldcp, "transmit timeout lbolt(%lx), "
5613                     "tx_blocked_lbolt(%lx)\n",
5614                     ddi_get_lbolt(), tx_blocked_lbolt);
5615 
5616 #ifdef DEBUG
5617                 if (vgen_inject_error(ldcp, VGEN_ERR_TXTIMEOUT)) {
5618                         /* tx timeout triggered for debugging */
5619                         vgen_inject_err_flag &= ~(VGEN_ERR_TXTIMEOUT);
5620                 }
5621 #endif
5622 
5623                 /*
5624                  * Clear tid before invoking vgen_ldc_reset(). Otherwise,
5625                  * it will result in a deadlock when vgen_process_reset() tries
5626                  * to untimeout() on seeing a non-zero tid, but it is being
5627                  * invoked by the timer itself in this case.
5628                  */
5629                 mutex_enter(&ldcp->cblock);
5630                 if (ldcp->wd_tid == 0) {
5631                         /* Cancelled by vgen_process_reset() */
5632                         mutex_exit(&ldcp->cblock);
5633                         return;
5634                 }
5635                 ldcp->wd_tid = 0;
5636                 mutex_exit(&ldcp->cblock);
5637 
5638                 /*
5639                  * Now reset the channel.
5640                  */
5641                 rv = vgen_ldc_reset(ldcp, VGEN_OTHER);
5642                 if (rv == 0) {
5643                         /*
5644                          * We have successfully reset the channel. If we are
5645                          * in tx flow controlled state, clear it now and enable
5646                          * transmit in the upper layer.
5647                          */
5648                         if (ldcp->tx_blocked) {
5649                                 vio_net_tx_update_t vtx_update =
5650                                     ldcp->portp->vcb.vio_net_tx_update;
5651 
5652                                 ldcp->tx_blocked = B_FALSE;
5653                                 vtx_update(ldcp->portp->vhp);
5654                         }
5655                 }
5656 
5657                 /*
5658                  * Channel has been reset by us or some other thread is already
5659                  * in the process of resetting. In either case, we return
5660                  * without restarting the timer. When handshake completes and
5661                  * the channel is ready for data transmit/receive we start a
5662                  * new watchdog timer.
5663                  */
5664                 return;
5665         }
5666 
5667 restart_timer:
5668         /* Restart the timer */
5669         mutex_enter(&ldcp->cblock);
5670         if (ldcp->wd_tid == 0) {
5671                 /* Cancelled by vgen_process_reset() */
5672                 mutex_exit(&ldcp->cblock);
5673                 return;
5674         }
5675         ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp,
5676             drv_usectohz(vgen_txwd_interval * 1000));
5677         mutex_exit(&ldcp->cblock);
5678 }
5679 
5680 /* Handshake watchdog timeout handler */
5681 static void
5682 vgen_hwatchdog(void *arg)
5683 {
5684         vgen_ldc_t      *ldcp = (vgen_ldc_t *)arg;
5685         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
5686 
5687         DWARN(vgenp, ldcp, "handshake timeout phase(%x) state(%x)\n",
5688             ldcp->hphase, ldcp->hstate);
5689 
5690         mutex_enter(&ldcp->cblock);
5691         if (ldcp->htid == 0) {
5692                 /* Cancelled by vgen_process_reset() */
5693                 mutex_exit(&ldcp->cblock);
5694                 return;
5695         }
5696         ldcp->htid = 0;
5697         mutex_exit(&ldcp->cblock);
5698 
5699         /*
5700          * Something is wrong; handshake with the peer seems to be hung. We now
5701          * go ahead and reset the channel to break out of this condition.
5702          */
5703         (void) vgen_ldc_reset(ldcp, VGEN_OTHER);
5704 }
5705 
5706 /* Check if the session id in the received message is valid */
5707 static int
5708 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5709 {
5710         vgen_t  *vgenp = LDC_TO_VGEN(ldcp);
5711 
5712         if (tagp->vio_sid != ldcp->peer_sid) {
5713                 DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
5714                     ldcp->peer_sid, tagp->vio_sid);
5715                 return (VGEN_FAILURE);
5716         }
5717         else
5718                 return (VGEN_SUCCESS);
5719 }
5720 
5721 /*
5722  * Initialize the common part of dring registration
5723  * message; used in both TxDring and RxDringData modes.
5724  */
5725 static void
5726 vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg,
5727         uint8_t option)
5728 {
5729         vio_msg_tag_t           *tagp;
5730 
5731         tagp = &msg->tag;
5732         tagp->vio_msgtype = VIO_TYPE_CTRL;
5733         tagp->vio_subtype = VIO_SUBTYPE_INFO;
5734         tagp->vio_subtype_env = VIO_DRING_REG;
5735         tagp->vio_sid = ldcp->local_sid;
5736 
5737         /* get dring info msg payload from ldcp->local */
5738         bcopy(&(ldcp->local_hparams.dring_cookie), &(msg->cookie[0]),
5739             sizeof (ldc_mem_cookie_t));
5740         msg->ncookies = ldcp->local_hparams.dring_ncookies;
5741         msg->num_descriptors = ldcp->local_hparams.num_desc;
5742         msg->descriptor_size = ldcp->local_hparams.desc_size;
5743 
5744         msg->options = option;
5745 
5746         /*
5747          * dring_ident is set to 0. After mapping the dring, peer sets this
5748          * value and sends it in the ack, which is saved in
5749          * vgen_handle_dring_reg().
5750          */
5751         msg->dring_ident = 0;
5752 }
5753 
5754 static int
5755 vgen_mapin_avail(vgen_ldc_t *ldcp)
5756 {
5757         int             rv;
5758         ldc_info_t      info;
5759         uint64_t        mapin_sz_req;
5760         uint64_t        dblk_sz;
5761         vgen_t          *vgenp = LDC_TO_VGEN(ldcp);
5762 
5763         rv = ldc_info(ldcp->ldc_handle, &info);
5764         if (rv != 0) {
5765                 return (B_FALSE);
5766         }
5767 
5768         dblk_sz = RXDRING_DBLK_SZ(vgenp->max_frame_size);
5769         mapin_sz_req = (VGEN_RXDRING_NRBUFS * dblk_sz);
5770 
5771         if (info.direct_map_size_max >= mapin_sz_req) {
5772                 return (B_TRUE);
5773         }
5774 
5775         return (B_FALSE);
5776 }
5777 
5778 #if DEBUG
5779 
5780 /*
5781  * Print debug messages - set to 0xf to enable all msgs
5782  */
5783 void
5784 vgen_debug_printf(const char *fname, vgen_t *vgenp,
5785     vgen_ldc_t *ldcp, const char *fmt, ...)
5786 {
5787         char    buf[256];
5788         char    *bufp = buf;
5789         va_list ap;
5790 
5791         if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
5792                 (void) sprintf(bufp, "vnet%d:",
5793                     ((vnet_t *)(vgenp->vnetp))->instance);
5794                 bufp += strlen(bufp);
5795         }
5796         if (ldcp != NULL) {
5797                 (void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
5798                 bufp += strlen(bufp);
5799         }
5800         (void) sprintf(bufp, "%s: ", fname);
5801         bufp += strlen(bufp);
5802 
5803         va_start(ap, fmt);
5804         (void) vsprintf(bufp, fmt, ap);
5805         va_end(ap);
5806 
5807         if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
5808             (vgendbg_ldcid == ldcp->ldc_id)) {
5809                 cmn_err(CE_CONT, "%s\n", buf);
5810         }
5811 }
5812 #endif
5813 
5814 #ifdef  VNET_IOC_DEBUG
5815 
5816 static void
5817 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
5818 {
5819         struct iocblk   *iocp;
5820         vgen_port_t     *portp;
5821         enum            ioc_reply {
5822                         IOC_INVAL = -1,         /* bad, NAK with EINVAL */
5823                         IOC_ACK                 /* OK, just send ACK    */
5824         }               status;
5825         int             rv;
5826 
5827         iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
5828         iocp->ioc_error = 0;
5829         portp = (vgen_port_t *)arg;
5830 
5831         if (portp == NULL) {
5832                 status = IOC_INVAL;
5833                 goto vgen_ioc_exit;
5834         }
5835 
5836         mutex_enter(&portp->lock);
5837 
5838         switch (iocp->ioc_cmd) {
5839 
5840         case VNET_FORCE_LINK_DOWN:
5841         case VNET_FORCE_LINK_UP:
5842                 rv = vgen_force_link_state(portp, iocp->ioc_cmd);
5843                 (rv == 0) ? (status = IOC_ACK) : (status = IOC_INVAL);
5844                 break;
5845 
5846         default:
5847                 status = IOC_INVAL;
5848                 break;
5849 
5850         }
5851 
5852         mutex_exit(&portp->lock);
5853 
5854 vgen_ioc_exit:
5855 
5856         switch (status) {
5857         default:
5858         case IOC_INVAL:
5859                 /* Error, reply with a NAK and EINVAL error */
5860                 miocnak(q, mp, 0, EINVAL);
5861                 break;
5862         case IOC_ACK:
5863                 /* OK, reply with an ACK */
5864                 miocack(q, mp, 0, 0);
5865                 break;
5866         }
5867 }
5868 
5869 static int
5870 vgen_force_link_state(vgen_port_t *portp, int cmd)
5871 {
5872         ldc_status_t    istatus;
5873         int             rv;
5874         vgen_ldc_t      *ldcp = portp->ldcp;
5875         vgen_t          *vgenp = portp->vgenp;
5876 
5877         mutex_enter(&ldcp->cblock);
5878 
5879         switch (cmd) {
5880 
5881         case VNET_FORCE_LINK_DOWN:
5882                 (void) ldc_down(ldcp->ldc_handle);
5883                 ldcp->link_down_forced = B_TRUE;
5884                 break;
5885 
5886         case VNET_FORCE_LINK_UP:
5887                 vgen_ldc_up(ldcp);
5888                 ldcp->link_down_forced = B_FALSE;
5889 
5890                 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
5891                         DWARN(vgenp, ldcp, "ldc_status err\n");
5892                 } else {
5893                         ldcp->ldc_status = istatus;
5894                 }
5895 
5896                 /* if channel is already UP - restart handshake */
5897                 if (ldcp->ldc_status == LDC_UP) {
5898                         vgen_handle_evt_up(ldcp);
5899                 }
5900                 break;
5901 
5902         }
5903 
5904         mutex_exit(&ldcp->cblock);
5905 
5906         return (0);
5907 }
5908 
5909 #else
5910 
5911 static void
5912 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
5913 {
5914         vgen_port_t     *portp;
5915 
5916         portp = (vgen_port_t *)arg;
5917 
5918         if (portp == NULL) {
5919                 miocnak(q, mp, 0, EINVAL);
5920                 return;
5921         }
5922 
5923         miocnak(q, mp, 0, ENOTSUP);
5924 }
5925 
5926 #endif