1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 #include <sys/sysmacros.h>
  27 #include <sys/socket.h>
  28 #include <sys/ddi.h>
  29 #include <sys/sunddi.h>
  30 #include <sys/tsol/tndb.h>
  31 #include <sys/tsol/tnet.h>
  32 
  33 #include <netinet/in.h>
  34 #include <netinet/ip6.h>
  35 
  36 #include <inet/common.h>
  37 #include <inet/ip.h>
  38 #include <inet/ip6.h>
  39 #include <inet/ipclassifier.h>
  40 #include <inet/ipsec_impl.h>
  41 #include <inet/ipp_common.h>
  42 #include <inet/sctp_ip.h>
  43 
  44 #include "sctp_impl.h"
  45 #include "sctp_addr.h"
  46 
  47 /* Default association hash size.  The size must be a power of 2. */
  48 #define SCTP_CONN_HASH_SIZE     8192
  49 
  50 uint_t          sctp_conn_hash_size = SCTP_CONN_HASH_SIZE; /* /etc/system */
  51 
  52 /*
  53  * Cluster networking hook for traversing current assoc list.
  54  * This routine is used to extract the current list of live associations
  55  * which must continue to to be dispatched to this node.
  56  */
  57 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
  58     boolean_t);
  59 static int cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *,
  60     void *), void *arg, boolean_t cansleep, sctp_stack_t *sctps);
  61 
  62 void
  63 sctp_hash_init(sctp_stack_t *sctps)
  64 {
  65         int i;
  66 
  67         /* Start with /etc/system value */
  68         sctps->sctps_conn_hash_size = sctp_conn_hash_size;
  69 
  70         if (!ISP2(sctps->sctps_conn_hash_size)) {
  71                 /* Not a power of two. Round up to nearest power of two */
  72                 for (i = 0; i < 31; i++) {
  73                         if (sctps->sctps_conn_hash_size < (1 << i))
  74                                 break;
  75                 }
  76                 sctps->sctps_conn_hash_size = 1 << i;
  77         }
  78         if (sctps->sctps_conn_hash_size < SCTP_CONN_HASH_SIZE) {
  79                 sctps->sctps_conn_hash_size = SCTP_CONN_HASH_SIZE;
  80                 cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
  81                     sctps->sctps_conn_hash_size);
  82         }
  83         sctps->sctps_conn_fanout =
  84             (sctp_tf_t *)kmem_zalloc(sctps->sctps_conn_hash_size *
  85             sizeof (sctp_tf_t), KM_SLEEP);
  86         for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
  87                 mutex_init(&sctps->sctps_conn_fanout[i].tf_lock, NULL,
  88                     MUTEX_DEFAULT, NULL);
  89         }
  90         sctps->sctps_listen_fanout = kmem_zalloc(SCTP_LISTEN_FANOUT_SIZE *
  91             sizeof (sctp_tf_t), KM_SLEEP);
  92         for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
  93                 mutex_init(&sctps->sctps_listen_fanout[i].tf_lock, NULL,
  94                     MUTEX_DEFAULT, NULL);
  95         }
  96         sctps->sctps_bind_fanout = kmem_zalloc(SCTP_BIND_FANOUT_SIZE *
  97             sizeof (sctp_tf_t), KM_SLEEP);
  98         for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
  99                 mutex_init(&sctps->sctps_bind_fanout[i].tf_lock, NULL,
 100                     MUTEX_DEFAULT, NULL);
 101         }
 102 }
 103 
 104 void
 105 sctp_hash_destroy(sctp_stack_t *sctps)
 106 {
 107         int i;
 108 
 109         for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
 110                 mutex_destroy(&sctps->sctps_conn_fanout[i].tf_lock);
 111         }
 112         kmem_free(sctps->sctps_conn_fanout, sctps->sctps_conn_hash_size *
 113             sizeof (sctp_tf_t));
 114         sctps->sctps_conn_fanout = NULL;
 115 
 116         for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
 117                 mutex_destroy(&sctps->sctps_listen_fanout[i].tf_lock);
 118         }
 119         kmem_free(sctps->sctps_listen_fanout, SCTP_LISTEN_FANOUT_SIZE *
 120             sizeof (sctp_tf_t));
 121         sctps->sctps_listen_fanout = NULL;
 122 
 123         for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
 124                 mutex_destroy(&sctps->sctps_bind_fanout[i].tf_lock);
 125         }
 126         kmem_free(sctps->sctps_bind_fanout, SCTP_BIND_FANOUT_SIZE *
 127             sizeof (sctp_tf_t));
 128         sctps->sctps_bind_fanout = NULL;
 129 }
 130 
 131 /*
 132  * Exported routine for extracting active SCTP associations.
 133  * Like TCP, we terminate the walk if the callback returns non-zero.
 134  *
 135  * Need to walk all sctp_stack_t instances since this clustering
 136  * interface is assumed global for all instances
 137  */
 138 int
 139 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *),
 140     void *arg, boolean_t cansleep)
 141 {
 142         netstack_handle_t nh;
 143         netstack_t *ns;
 144         int ret = 0;
 145 
 146         netstack_next_init(&nh);
 147         while ((ns = netstack_next(&nh)) != NULL) {
 148                 ret = cl_sctp_walk_list_stack(cl_callback, arg, cansleep,
 149                     ns->netstack_sctp);
 150                 netstack_rele(ns);
 151         }
 152         netstack_next_fini(&nh);
 153         return (ret);
 154 }
 155 
 156 static int
 157 cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *, void *),
 158     void *arg, boolean_t cansleep, sctp_stack_t *sctps)
 159 {
 160         sctp_t          *sctp;
 161         sctp_t          *sctp_prev;
 162         cl_sctp_info_t  cl_sctpi;
 163         uchar_t         *slist;
 164         uchar_t         *flist;
 165 
 166         sctp_prev = NULL;
 167         mutex_enter(&sctps->sctps_g_lock);
 168         sctp = list_head(&sctps->sctps_g_list);
 169         while (sctp != NULL) {
 170                 size_t  ssize;
 171                 size_t  fsize;
 172 
 173                 mutex_enter(&sctp->sctp_reflock);
 174                 if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
 175                         mutex_exit(&sctp->sctp_reflock);
 176                         sctp = list_next(&sctps->sctps_g_list, sctp);
 177                         continue;
 178                 }
 179                 sctp->sctp_refcnt++;
 180                 mutex_exit(&sctp->sctp_reflock);
 181                 mutex_exit(&sctps->sctps_g_lock);
 182                 if (sctp_prev != NULL)
 183                         SCTP_REFRELE(sctp_prev);
 184                 RUN_SCTP(sctp);
 185                 ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
 186                 fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
 187 
 188                 slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
 189                 flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
 190                 if (slist == NULL || flist == NULL) {
 191                         WAKE_SCTP(sctp);
 192                         if (slist != NULL)
 193                                 kmem_free(slist, ssize);
 194                         if (flist != NULL)
 195                                 kmem_free(flist, fsize);
 196                         SCTP_REFRELE(sctp);
 197                         return (1);
 198                 }
 199                 cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
 200                 sctp_get_saddr_list(sctp, slist, ssize);
 201                 sctp_get_faddr_list(sctp, flist, fsize);
 202                 cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
 203                 cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
 204                 cl_sctpi.cl_sctpi_family = sctp->sctp_connp->conn_family;
 205                 if (cl_sctpi.cl_sctpi_family == AF_INET)
 206                         cl_sctpi.cl_sctpi_ipversion = IPV4_VERSION;
 207                 else
 208                         cl_sctpi.cl_sctpi_ipversion = IPV6_VERSION;
 209                 cl_sctpi.cl_sctpi_state = sctp->sctp_state;
 210                 cl_sctpi.cl_sctpi_lport = sctp->sctp_connp->conn_lport;
 211                 cl_sctpi.cl_sctpi_fport = sctp->sctp_connp->conn_fport;
 212                 cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
 213                 WAKE_SCTP(sctp);
 214                 cl_sctpi.cl_sctpi_laddrp = slist;
 215                 cl_sctpi.cl_sctpi_faddrp = flist;
 216                 if ((*cl_callback)(&cl_sctpi, arg) != 0) {
 217                         kmem_free(slist, ssize);
 218                         kmem_free(flist, fsize);
 219                         SCTP_REFRELE(sctp);
 220                         return (1);
 221                 }
 222                 /* list will be freed by cl_callback */
 223                 sctp_prev = sctp;
 224                 mutex_enter(&sctps->sctps_g_lock);
 225                 sctp = list_next(&sctps->sctps_g_list, sctp);
 226         }
 227         mutex_exit(&sctps->sctps_g_lock);
 228         if (sctp_prev != NULL)
 229                 SCTP_REFRELE(sctp_prev);
 230         return (0);
 231 }
 232 
 233 sctp_t *
 234 sctp_conn_match(in6_addr_t **faddrpp, uint32_t nfaddr, in6_addr_t *laddr,
 235     uint32_t ports, zoneid_t zoneid, iaflags_t iraflags, sctp_stack_t *sctps)
 236 {
 237         sctp_tf_t               *tf;
 238         sctp_t                  *sctp;
 239         sctp_faddr_t            *fp;
 240         conn_t                  *connp;
 241         in6_addr_t              **faddrs, **endaddrs = &faddrpp[nfaddr];
 242 
 243         tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]);
 244         mutex_enter(&tf->tf_lock);
 245 
 246         for (sctp = tf->tf_sctp; sctp != NULL; sctp =
 247             sctp->sctp_conn_hash_next) {
 248                 connp = sctp->sctp_connp;
 249                 if (ports != connp->conn_ports)
 250                         continue;
 251                 if (!(connp->conn_zoneid == zoneid ||
 252                     connp->conn_allzones ||
 253                     ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
 254                     (iraflags & IRAF_TX_MAC_EXEMPTABLE) &&
 255                     (iraflags & IRAF_TX_SHARED_ADDR))))
 256                         continue;
 257 
 258                 /* check for faddr match */
 259                 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
 260                         for (faddrs = faddrpp; faddrs < endaddrs; faddrs++) {
 261                                 if (IN6_ARE_ADDR_EQUAL(*faddrs,
 262                                     &fp->sf_faddr)) {
 263                                         /* check for laddr match */
 264                                         if (sctp_saddr_lookup(sctp, laddr, 0)
 265                                             != NULL) {
 266                                                 SCTP_REFHOLD(sctp);
 267                                                 mutex_exit(&tf->tf_lock);
 268                                                 return (sctp);
 269                                         }
 270                                 }
 271                         }
 272                 }
 273 
 274                 /* no match; continue to the next in the chain */
 275         }
 276 
 277         mutex_exit(&tf->tf_lock);
 278         return (sctp);
 279 }
 280 
 281 static sctp_t *
 282 listen_match(in6_addr_t *laddr, uint32_t ports, zoneid_t zoneid,
 283     iaflags_t iraflags, sctp_stack_t *sctps)
 284 {
 285         sctp_t                  *sctp;
 286         sctp_tf_t               *tf;
 287         uint16_t                lport;
 288         conn_t                  *connp;
 289 
 290         lport = ((uint16_t *)&ports)[1];
 291 
 292         tf = &(sctps->sctps_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
 293         mutex_enter(&tf->tf_lock);
 294 
 295         for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
 296                 connp = sctp->sctp_connp;
 297                 if (lport != connp->conn_lport)
 298                         continue;
 299 
 300                 if (!(connp->conn_zoneid == zoneid ||
 301                     connp->conn_allzones ||
 302                     ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
 303                     (iraflags & IRAF_TX_MAC_EXEMPTABLE) &&
 304                     (iraflags & IRAF_TX_SHARED_ADDR))))
 305                         continue;
 306 
 307                 if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
 308                         SCTP_REFHOLD(sctp);
 309                         goto done;
 310                 }
 311                 /* no match; continue to the next in the chain */
 312         }
 313 
 314 done:
 315         mutex_exit(&tf->tf_lock);
 316         return (sctp);
 317 }
 318 
 319 /* called by ipsec_sctp_pol */
 320 conn_t *
 321 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
 322     zoneid_t zoneid, iaflags_t iraflags, sctp_stack_t *sctps)
 323 {
 324         sctp_t *sctp;
 325 
 326         sctp = sctp_conn_match(&src, 1, dst, ports, zoneid, iraflags, sctps);
 327         if (sctp == NULL) {
 328                 /* Not in conn fanout; check listen fanout */
 329                 sctp = listen_match(dst, ports, zoneid, iraflags, sctps);
 330                 if (sctp == NULL)
 331                         return (NULL);
 332         }
 333         return (sctp->sctp_connp);
 334 }
 335 
 336 /*
 337  * This is called from sctp_fanout() with IP header src & dst addresses.
 338  * First call sctp_conn_match() to get a match by passing in src & dst
 339  * addresses from IP header.
 340  * However sctp_conn_match() can return no match under condition such as :
 341  * A host can send an INIT ACK from a different address than the INIT was sent
 342  * to (in a multi-homed env).
 343  * According to RFC4960, a host can send additional addresses in an INIT
 344  * ACK chunk.
 345  * Therefore extract all addresses from the INIT ACK chunk, pass to
 346  * sctp_conn_match() to get a match.
 347  */
 348 static sctp_t *
 349 sctp_lookup_by_faddrs(mblk_t *mp, sctp_hdr_t *sctph, in6_addr_t *srcp,
 350     in6_addr_t *dstp, uint32_t ports, zoneid_t zoneid, sctp_stack_t *sctps,
 351     iaflags_t iraflags)
 352 {
 353         sctp_t                  *sctp;
 354         sctp_chunk_hdr_t        *ich;
 355         sctp_init_chunk_t       *iack;
 356         sctp_parm_hdr_t         *ph;
 357         ssize_t                 mlen, remaining;
 358         uint16_t                param_type, addr_len = PARM_ADDR4_LEN;
 359         in6_addr_t              src;
 360         in6_addr_t              **addrbuf = NULL, **faddrpp = NULL;
 361         boolean_t               isv4;
 362         uint32_t                totaddr, nfaddr = 0;
 363 
 364         /*
 365          * If we get a match with the passed-in IP header src & dst addresses,
 366          * quickly return the matched sctp.
 367          */
 368         if ((sctp = sctp_conn_match(&srcp, 1, dstp, ports, zoneid, iraflags,
 369             sctps)) != NULL) {
 370                 return (sctp);
 371         }
 372 
 373         /*
 374          * Currently sctph is set to NULL in icmp error fanout case
 375          * (ip_fanout_sctp()).
 376          * The above sctp_conn_match() should handle that, otherwise
 377          * return no match found.
 378          */
 379         if (sctph == NULL)
 380                 return (NULL);
 381 
 382         /*
 383          * Do a pullup again in case the previous one was partially successful,
 384          * so try to complete the pullup here and have a single contiguous
 385          * chunk for processing of entire INIT ACK chunk below.
 386          */
 387         if (mp->b_cont != NULL) {
 388                 if (pullupmsg(mp, -1) == 0) {
 389                         return (NULL);
 390                 }
 391         }
 392 
 393         mlen = mp->b_wptr - (uchar_t *)(sctph + 1);
 394         if ((ich = sctp_first_chunk((uchar_t *)(sctph + 1), mlen)) == NULL) {
 395                 return (NULL);
 396         }
 397 
 398         if (ich->sch_id == CHUNK_INIT_ACK) {
 399                 remaining = ntohs(ich->sch_len) - sizeof (*ich) -
 400                     sizeof (*iack);
 401                 if (remaining < sizeof (*ph)) {
 402                         return (NULL);
 403                 }
 404 
 405                 isv4 = (iraflags & IRAF_IS_IPV4) ? B_TRUE : B_FALSE;
 406                 if (!isv4)
 407                         addr_len = PARM_ADDR6_LEN;
 408                 totaddr = remaining/addr_len;
 409 
 410                 iack = (sctp_init_chunk_t *)(ich + 1);
 411                 ph = (sctp_parm_hdr_t *)(iack + 1);
 412 
 413                 addrbuf = (in6_addr_t **)
 414                     kmem_zalloc(totaddr * sizeof (in6_addr_t *), KM_NOSLEEP);
 415                 if (addrbuf == NULL)
 416                         return (NULL);
 417                 faddrpp = addrbuf;
 418 
 419                 while (ph != NULL) {
 420                         /*
 421                          * According to RFC4960 :
 422                          * All integer fields in an SCTP packet MUST be
 423                          * transmitted in network byte order,
 424                          * unless otherwise stated.
 425                          * Therefore convert the param type to host byte order.
 426                          * Also do not add src address present in IP header
 427                          * as it has already been thru sctp_conn_match() above.
 428                          */
 429                         param_type = ntohs(ph->sph_type);
 430                         switch (param_type) {
 431                         case PARM_ADDR4:
 432                                 IN6_INADDR_TO_V4MAPPED((struct in_addr *)
 433                                     (ph + 1), &src);
 434                                 if (IN6_ARE_ADDR_EQUAL(&src, srcp))
 435                                         break;
 436                                 *faddrpp = (in6_addr_t *)
 437                                     kmem_zalloc(sizeof (in6_addr_t),
 438                                     KM_NOSLEEP);
 439                                 if (*faddrpp == NULL)
 440                                         break;
 441                                 IN6_INADDR_TO_V4MAPPED((struct in_addr *)
 442                                     (ph + 1), *faddrpp);
 443                                 nfaddr++;
 444                                 faddrpp++;
 445                                 break;
 446                         case PARM_ADDR6:
 447                                 *faddrpp = (in6_addr_t *)(ph + 1);
 448                                 if (IN6_ARE_ADDR_EQUAL(*faddrpp, srcp))
 449                                         break;
 450                                 nfaddr++;
 451                                 faddrpp++;
 452                                 break;
 453                         default:
 454                                 break;
 455                         }
 456                         ph = sctp_next_parm(ph, &remaining);
 457                 }
 458 
 459                 ASSERT(nfaddr < totaddr);
 460 
 461                 if (nfaddr > 0) {
 462                         sctp = sctp_conn_match(addrbuf, nfaddr, dstp, ports,
 463                             zoneid, iraflags, sctps);
 464 
 465                         if (isv4) {
 466                                 for (faddrpp = addrbuf; nfaddr > 0;
 467                                     faddrpp++, nfaddr--) {
 468                                         if (IN6_IS_ADDR_V4MAPPED(*faddrpp)) {
 469                                                 kmem_free(*faddrpp,
 470                                                     sizeof (in6_addr_t));
 471                                         }
 472                                 }
 473                         }
 474                 }
 475                 kmem_free(addrbuf, totaddr * sizeof (in6_addr_t *));
 476         }
 477         return (sctp);
 478 }
 479 
 480 /*
 481  * Fanout to a sctp instance.
 482  */
 483 conn_t *
 484 sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
 485     ip_recv_attr_t *ira, mblk_t *mp, sctp_stack_t *sctps, sctp_hdr_t *sctph)
 486 {
 487         zoneid_t zoneid = ira->ira_zoneid;
 488         iaflags_t iraflags = ira->ira_flags;
 489         sctp_t *sctp;
 490 
 491         sctp = sctp_lookup_by_faddrs(mp, sctph, src, dst, ports, zoneid,
 492             sctps, iraflags);
 493         if (sctp == NULL) {
 494                 /* Not in conn fanout; check listen fanout */
 495                 sctp = listen_match(dst, ports, zoneid, iraflags, sctps);
 496                 if (sctp == NULL)
 497                         return (NULL);
 498                 /*
 499                  * On systems running trusted extensions, check if dst
 500                  * should accept the packet. "IPV6_VERSION" indicates
 501                  * that dst is in 16 byte AF_INET6 format. IPv4-mapped
 502                  * IPv6 addresses are supported.
 503                  */
 504                 if ((iraflags & IRAF_SYSTEM_LABELED) &&
 505                     !tsol_receive_local(mp, dst, IPV6_VERSION, ira,
 506                     sctp->sctp_connp)) {
 507                         DTRACE_PROBE3(
 508                             tx__ip__log__info__classify__sctp,
 509                             char *,
 510                             "connp(1) could not receive mp(2)",
 511                             conn_t *, sctp->sctp_connp, mblk_t *, mp);
 512                         SCTP_REFRELE(sctp);
 513                         return (NULL);
 514                 }
 515         }
 516         /*
 517          * For labeled systems, there's no need to check the
 518          * label here.  It's known to be good as we checked
 519          * before allowing the connection to become bound.
 520          */
 521         return (sctp->sctp_connp);
 522 }
 523 
 524 /*
 525  * Fanout for ICMP errors for SCTP
 526  * The caller puts <fport, lport> in the ports parameter.
 527  */
 528 void
 529 ip_fanout_sctp(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h, uint32_t ports,
 530     ip_recv_attr_t *ira)
 531 {
 532         sctp_t          *sctp;
 533         conn_t          *connp;
 534         in6_addr_t      map_src, map_dst;
 535         in6_addr_t      *src, *dst;
 536         boolean_t       secure;
 537         ill_t           *ill = ira->ira_ill;
 538         ip_stack_t      *ipst = ill->ill_ipst;
 539         netstack_t      *ns = ipst->ips_netstack;
 540         ipsec_stack_t   *ipss = ns->netstack_ipsec;
 541         sctp_stack_t    *sctps = ns->netstack_sctp;
 542         iaflags_t       iraflags = ira->ira_flags;
 543         ill_t           *rill = ira->ira_rill;
 544 
 545         ASSERT(iraflags & IRAF_ICMP_ERROR);
 546 
 547         secure = iraflags & IRAF_IPSEC_SECURE;
 548 
 549         /* Assume IP provides aligned packets - otherwise toss */
 550         if (!OK_32PTR(mp->b_rptr)) {
 551                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 552                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
 553                 freemsg(mp);
 554                 return;
 555         }
 556 
 557         if (!(iraflags & IRAF_IS_IPV4)) {
 558                 src = &ip6h->ip6_src;
 559                 dst = &ip6h->ip6_dst;
 560         } else {
 561                 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
 562                 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
 563                 src = &map_src;
 564                 dst = &map_dst;
 565         }
 566         connp = sctp_fanout(src, dst, ports, ira, mp, sctps, NULL);
 567         if (connp == NULL) {
 568                 ip_fanout_sctp_raw(mp, ipha, ip6h, ports, ira);
 569                 return;
 570         }
 571         sctp = CONN2SCTP(connp);
 572 
 573         /*
 574          * We check some fields in conn_t without holding a lock.
 575          * This should be fine.
 576          */
 577         if (((iraflags & IRAF_IS_IPV4) ?
 578             CONN_INBOUND_POLICY_PRESENT(connp, ipss) :
 579             CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss)) ||
 580             secure) {
 581                 mp = ipsec_check_inbound_policy(mp, connp, ipha,
 582                     ip6h, ira);
 583                 if (mp == NULL) {
 584                         SCTP_REFRELE(sctp);
 585                         return;
 586                 }
 587         }
 588 
 589         ira->ira_ill = ira->ira_rill = NULL;
 590 
 591         mutex_enter(&sctp->sctp_lock);
 592         if (sctp->sctp_running) {
 593                 sctp_add_recvq(sctp, mp, B_FALSE, ira);
 594                 mutex_exit(&sctp->sctp_lock);
 595         } else {
 596                 sctp->sctp_running = B_TRUE;
 597                 mutex_exit(&sctp->sctp_lock);
 598 
 599                 mutex_enter(&sctp->sctp_recvq_lock);
 600                 if (sctp->sctp_recvq != NULL) {
 601                         sctp_add_recvq(sctp, mp, B_TRUE, ira);
 602                         mutex_exit(&sctp->sctp_recvq_lock);
 603                         WAKE_SCTP(sctp);
 604                 } else {
 605                         mutex_exit(&sctp->sctp_recvq_lock);
 606                         if (ira->ira_flags & IRAF_ICMP_ERROR) {
 607                                 sctp_icmp_error(sctp, mp);
 608                         } else {
 609                                 sctp_input_data(sctp, mp, ira);
 610                         }
 611                         WAKE_SCTP(sctp);
 612                 }
 613         }
 614         SCTP_REFRELE(sctp);
 615         ira->ira_ill = ill;
 616         ira->ira_rill = rill;
 617 }
 618 
 619 void
 620 sctp_conn_hash_remove(sctp_t *sctp)
 621 {
 622         sctp_tf_t *tf = sctp->sctp_conn_tfp;
 623 
 624         if (!tf) {
 625                 return;
 626         }
 627         /*
 628          * On a clustered note send this notification to the clustering
 629          * subsystem.
 630          */
 631         if (cl_sctp_disconnect != NULL) {
 632                 (*cl_sctp_disconnect)(sctp->sctp_connp->conn_family,
 633                     (cl_sctp_handle_t)sctp);
 634         }
 635 
 636         mutex_enter(&tf->tf_lock);
 637         ASSERT(tf->tf_sctp);
 638         if (tf->tf_sctp == sctp) {
 639                 tf->tf_sctp = sctp->sctp_conn_hash_next;
 640                 if (sctp->sctp_conn_hash_next) {
 641                         ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
 642                         tf->tf_sctp->sctp_conn_hash_prev = NULL;
 643                 }
 644         } else {
 645                 ASSERT(sctp->sctp_conn_hash_prev);
 646                 ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
 647                 sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
 648                     sctp->sctp_conn_hash_next;
 649 
 650                 if (sctp->sctp_conn_hash_next) {
 651                         ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
 652                             == sctp);
 653                         sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
 654                             sctp->sctp_conn_hash_prev;
 655                 }
 656         }
 657         sctp->sctp_conn_hash_next = NULL;
 658         sctp->sctp_conn_hash_prev = NULL;
 659         sctp->sctp_conn_tfp = NULL;
 660         mutex_exit(&tf->tf_lock);
 661 }
 662 
 663 void
 664 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
 665 {
 666         if (sctp->sctp_conn_tfp) {
 667                 sctp_conn_hash_remove(sctp);
 668         }
 669 
 670         if (!caller_holds_lock) {
 671                 mutex_enter(&tf->tf_lock);
 672         } else {
 673                 ASSERT(MUTEX_HELD(&tf->tf_lock));
 674         }
 675 
 676         sctp->sctp_conn_hash_next = tf->tf_sctp;
 677         if (tf->tf_sctp) {
 678                 tf->tf_sctp->sctp_conn_hash_prev = sctp;
 679         }
 680         sctp->sctp_conn_hash_prev = NULL;
 681         tf->tf_sctp = sctp;
 682         sctp->sctp_conn_tfp = tf;
 683         if (!caller_holds_lock) {
 684                 mutex_exit(&tf->tf_lock);
 685         }
 686 }
 687 
 688 void
 689 sctp_listen_hash_remove(sctp_t *sctp)
 690 {
 691         sctp_tf_t *tf = sctp->sctp_listen_tfp;
 692         conn_t  *connp = sctp->sctp_connp;
 693 
 694         if (!tf) {
 695                 return;
 696         }
 697         /*
 698          * On a clustered note send this notification to the clustering
 699          * subsystem.
 700          */
 701         if (cl_sctp_unlisten != NULL) {
 702                 uchar_t *slist;
 703                 ssize_t ssize;
 704 
 705                 ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
 706                 slist = kmem_alloc(ssize, KM_SLEEP);
 707                 sctp_get_saddr_list(sctp, slist, ssize);
 708                 (*cl_sctp_unlisten)(connp->conn_family, slist,
 709                     sctp->sctp_nsaddrs, connp->conn_lport);
 710                 /* list will be freed by the clustering module */
 711         }
 712 
 713         mutex_enter(&tf->tf_lock);
 714         ASSERT(tf->tf_sctp);
 715         if (tf->tf_sctp == sctp) {
 716                 tf->tf_sctp = sctp->sctp_listen_hash_next;
 717                 if (sctp->sctp_listen_hash_next != NULL) {
 718                         ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
 719                         tf->tf_sctp->sctp_listen_hash_prev = NULL;
 720                 }
 721         } else {
 722                 ASSERT(sctp->sctp_listen_hash_prev);
 723                 ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
 724                     sctp);
 725                 ASSERT(sctp->sctp_listen_hash_next == NULL ||
 726                     sctp->sctp_listen_hash_next->sctp_listen_hash_prev == sctp);
 727 
 728                 sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
 729                     sctp->sctp_listen_hash_next;
 730 
 731                 if (sctp->sctp_listen_hash_next != NULL) {
 732                         sctp_t *next = sctp->sctp_listen_hash_next;
 733 
 734                         ASSERT(next->sctp_listen_hash_prev == sctp);
 735                         next->sctp_listen_hash_prev =
 736                             sctp->sctp_listen_hash_prev;
 737                 }
 738         }
 739         sctp->sctp_listen_hash_next = NULL;
 740         sctp->sctp_listen_hash_prev = NULL;
 741         sctp->sctp_listen_tfp = NULL;
 742         mutex_exit(&tf->tf_lock);
 743 }
 744 
 745 void
 746 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
 747 {
 748         conn_t  *connp = sctp->sctp_connp;
 749 
 750         if (sctp->sctp_listen_tfp) {
 751                 sctp_listen_hash_remove(sctp);
 752         }
 753 
 754         mutex_enter(&tf->tf_lock);
 755         sctp->sctp_listen_hash_next = tf->tf_sctp;
 756         if (tf->tf_sctp) {
 757                 tf->tf_sctp->sctp_listen_hash_prev = sctp;
 758         }
 759         sctp->sctp_listen_hash_prev = NULL;
 760         tf->tf_sctp = sctp;
 761         sctp->sctp_listen_tfp = tf;
 762         mutex_exit(&tf->tf_lock);
 763         /*
 764          * On a clustered note send this notification to the clustering
 765          * subsystem.
 766          */
 767         if (cl_sctp_listen != NULL) {
 768                 uchar_t *slist;
 769                 ssize_t ssize;
 770 
 771                 ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
 772                 slist = kmem_alloc(ssize, KM_SLEEP);
 773                 sctp_get_saddr_list(sctp, slist, ssize);
 774                 (*cl_sctp_listen)(connp->conn_family, slist,
 775                     sctp->sctp_nsaddrs, connp->conn_lport);
 776                 /* list will be freed by the clustering module */
 777         }
 778 }
 779 
 780 /*
 781  * Hash list insertion routine for sctp_t structures.
 782  * Inserts entries with the ones bound to a specific IP address first
 783  * followed by those bound to INADDR_ANY.
 784  */
 785 void
 786 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
 787 {
 788         sctp_t  **sctpp;
 789         sctp_t  *sctpnext;
 790 
 791         if (sctp->sctp_ptpbhn != NULL) {
 792                 ASSERT(!caller_holds_lock);
 793                 sctp_bind_hash_remove(sctp);
 794         }
 795         sctpp = &tbf->tf_sctp;
 796         if (!caller_holds_lock) {
 797                 mutex_enter(&tbf->tf_lock);
 798         } else {
 799                 ASSERT(MUTEX_HELD(&tbf->tf_lock));
 800         }
 801         sctpnext = sctpp[0];
 802         if (sctpnext) {
 803                 sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
 804         }
 805         sctp->sctp_bind_hash = sctpnext;
 806         sctp->sctp_ptpbhn = sctpp;
 807         sctpp[0] = sctp;
 808         /* For sctp_*_hash_remove */
 809         sctp->sctp_bind_lockp = &tbf->tf_lock;
 810         if (!caller_holds_lock)
 811                 mutex_exit(&tbf->tf_lock);
 812 }
 813 
 814 /*
 815  * Hash list removal routine for sctp_t structures.
 816  */
 817 void
 818 sctp_bind_hash_remove(sctp_t *sctp)
 819 {
 820         sctp_t  *sctpnext;
 821         kmutex_t *lockp;
 822 
 823         lockp = sctp->sctp_bind_lockp;
 824 
 825         if (sctp->sctp_ptpbhn == NULL)
 826                 return;
 827 
 828         ASSERT(lockp != NULL);
 829         mutex_enter(lockp);
 830         if (sctp->sctp_ptpbhn) {
 831                 sctpnext = sctp->sctp_bind_hash;
 832                 if (sctpnext) {
 833                         sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
 834                         sctp->sctp_bind_hash = NULL;
 835                 }
 836                 *sctp->sctp_ptpbhn = sctpnext;
 837                 sctp->sctp_ptpbhn = NULL;
 838         }
 839         mutex_exit(lockp);
 840         sctp->sctp_bind_lockp = NULL;
 841 }
 842 
 843 /*
 844  * Similar to but different from sctp_conn_match().
 845  *
 846  * Matches sets of addresses as follows: if the argument addr set is
 847  * a complete subset of the corresponding addr set in the sctp_t, it
 848  * is a match.
 849  *
 850  * Caller must hold tf->tf_lock.
 851  *
 852  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
 853  */
 854 sctp_t *
 855 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
 856     int min_state)
 857 {
 858         sctp_t *sctp;
 859         sctp_faddr_t *fp;
 860 
 861         ASSERT(MUTEX_HELD(&tf->tf_lock));
 862 
 863         for (sctp = tf->tf_sctp; sctp != NULL;
 864             sctp = sctp->sctp_conn_hash_next) {
 865                 if (*ports != sctp->sctp_connp->conn_ports ||
 866                     sctp->sctp_state < min_state) {
 867                         continue;
 868                 }
 869 
 870                 /* check for faddr match */
 871                 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
 872                         if (IN6_ARE_ADDR_EQUAL(faddr, &fp->sf_faddr)) {
 873                                 break;
 874                         }
 875                 }
 876 
 877                 if (fp == NULL) {
 878                         /* no faddr match; keep looking */
 879                         continue;
 880                 }
 881 
 882                 /*
 883                  * There is an existing association with the same peer
 884                  * address.  So now we need to check if our local address
 885                  * set overlaps with the one of the existing association.
 886                  * If they overlap, we should return it.
 887                  */
 888                 if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_OVERLAP) {
 889                         goto done;
 890                 }
 891 
 892                 /* no match; continue searching */
 893         }
 894 
 895 done:
 896         if (sctp != NULL) {
 897                 SCTP_REFHOLD(sctp);
 898         }
 899         return (sctp);
 900 }