1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/systm.h>
  28 #include <sys/stream.h>
  29 #include <sys/cmn_err.h>
  30 #include <sys/kmem.h>
  31 #define _SUN_TPI_VERSION 2
  32 #include <sys/tihdr.h>
  33 #include <sys/stropts.h>
  34 #include <sys/strsubr.h>
  35 #include <sys/socket.h>
  36 #include <sys/tsol/tndb.h>
  37 
  38 #include <netinet/in.h>
  39 #include <netinet/ip6.h>
  40 
  41 #include <inet/common.h>
  42 #include <inet/ip.h>
  43 #include <inet/ip6.h>
  44 #include <inet/ipclassifier.h>
  45 #include <inet/ipsec_impl.h>
  46 
  47 #include "sctp_impl.h"
  48 #include "sctp_addr.h"
  49 
  50 /*
  51  * Common accept code.  Called by sctp_conn_request.
  52  * cr_pkt is the INIT / INIT ACK packet.
  53  */
  54 static int
  55 sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt,
  56     uint_t ip_hdr_len, sctp_init_chunk_t *iack)
  57 {
  58 
  59         sctp_hdr_t              *sctph;
  60         sctp_chunk_hdr_t        *ich;
  61         sctp_init_chunk_t       *init;
  62         int                     err;
  63         uint_t                  sctp_options;
  64         conn_t                  *aconnp;
  65         conn_t                  *lconnp;
  66         sctp_stack_t            *sctps = listener->sctp_sctps;
  67 
  68         sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len);
  69         ASSERT(OK_32PTR(sctph));
  70 
  71         aconnp = acceptor->sctp_connp;
  72         lconnp = listener->sctp_connp;
  73         aconnp->conn_lport = lconnp->conn_lport;
  74         aconnp->conn_fport = sctph->sh_sport;
  75 
  76         ich = (sctp_chunk_hdr_t *)(iack + 1);
  77         init = (sctp_init_chunk_t *)(ich + 1);
  78 
  79         /* acceptor isn't in any fanouts yet, so don't need to hold locks */
  80         ASSERT(acceptor->sctp_faddrs == NULL);
  81         err = sctp_get_addrparams(acceptor, listener, cr_pkt, ich,
  82             &sctp_options);
  83         if (err != 0)
  84                 return (err);
  85 
  86         if ((err = sctp_set_hdraddrs(acceptor)) != 0)
  87                 return (err);
  88 
  89         if ((err = sctp_build_hdrs(acceptor, KM_NOSLEEP)) != 0)
  90                 return (err);
  91 
  92         if ((sctp_options & SCTP_PRSCTP_OPTION) &&
  93             listener->sctp_prsctp_aware && sctps->sctps_prsctp_enabled) {
  94                 acceptor->sctp_prsctp_aware = B_TRUE;
  95         } else {
  96                 acceptor->sctp_prsctp_aware = B_FALSE;
  97         }
  98 
  99         /* Get  initial TSNs */
 100         acceptor->sctp_ltsn = ntohl(iack->sic_inittsn);
 101         acceptor->sctp_recovery_tsn = acceptor->sctp_lastack_rxd =
 102             acceptor->sctp_ltsn - 1;
 103         acceptor->sctp_adv_pap = acceptor->sctp_lastack_rxd;
 104         /* Serial numbers are initialized to the same value as the TSNs */
 105         acceptor->sctp_lcsn = acceptor->sctp_ltsn;
 106 
 107         if (!sctp_initialize_params(acceptor, init, iack))
 108                 return (ENOMEM);
 109 
 110         /*
 111          * Copy sctp_secret from the listener in case we need to validate
 112          * a possibly delayed cookie.
 113          */
 114         bcopy(listener->sctp_secret, acceptor->sctp_secret, SCTP_SECRET_LEN);
 115         bcopy(listener->sctp_old_secret, acceptor->sctp_old_secret,
 116             SCTP_SECRET_LEN);
 117         acceptor->sctp_last_secret_update = ddi_get_lbolt64();
 118 
 119         /*
 120          * After acceptor is inserted in the hash list, it can be found.
 121          * So we need to lock it here.
 122          */
 123         RUN_SCTP(acceptor);
 124 
 125         sctp_conn_hash_insert(&sctps->sctps_conn_fanout[
 126             SCTP_CONN_HASH(sctps, aconnp->conn_ports)], acceptor, 0);
 127         sctp_bind_hash_insert(&sctps->sctps_bind_fanout[
 128             SCTP_BIND_HASH(ntohs(aconnp->conn_lport))], acceptor, 0);
 129 
 130         SCTP_ASSOC_EST(sctps, acceptor);
 131         return (0);
 132 }
 133 
 134 /* Process the COOKIE packet, mp, directed at the listener 'sctp' */
 135 sctp_t *
 136 sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len,
 137     sctp_init_chunk_t *iack, ip_recv_attr_t *ira)
 138 {
 139         sctp_t  *eager;
 140         ip6_t   *ip6h;
 141         int     err;
 142         conn_t  *connp, *econnp;
 143         sctp_stack_t    *sctps;
 144         cred_t          *cr;
 145         pid_t           cpid;
 146         in6_addr_t      faddr, laddr;
 147         ip_xmit_attr_t  *ixa;
 148         sctp_listen_cnt_t *slc = sctp->sctp_listen_cnt;
 149         boolean_t       slc_set = B_FALSE;
 150 
 151         /*
 152          * No need to check for duplicate as this is the listener
 153          * and we are holding the lock.  This means that no new
 154          * connection can be created out of it.  And since the
 155          * fanout already done cannot find a match, it means that
 156          * there is no duplicate.
 157          */
 158         ASSERT(OK_32PTR(mp->b_rptr));
 159 
 160         connp = sctp->sctp_connp;
 161         sctps = sctp->sctp_sctps;
 162 
 163         /*
 164          * Enforce the limit set on the number of connections per listener.
 165          * Note that tlc_cnt starts with 1.  So need to add 1 to tlc_max
 166          * for comparison.
 167          */
 168         if (slc != NULL) {
 169                 int64_t now;
 170 
 171                 if (atomic_inc_32_nv(&slc->slc_cnt) > slc->slc_max + 1) {
 172                         now = ddi_get_lbolt64();
 173                         atomic_dec_32(&slc->slc_cnt);
 174                         SCTP_KSTAT(sctps, sctp_listen_cnt_drop);
 175                         slc->slc_drop++;
 176                         if (now - slc->slc_report_time >
 177                             MSEC_TO_TICK(SCTP_SLC_REPORT_INTERVAL)) {
 178                                 zcmn_err(connp->conn_zoneid, CE_WARN,
 179                                     "SCTP listener (port %d) association max "
 180                                     "(%u) reached: %u attempts dropped total\n",
 181                                     ntohs(connp->conn_lport),
 182                                     slc->slc_max, slc->slc_drop);
 183                                 slc->slc_report_time = now;
 184                         }
 185                         return (NULL);
 186                 }
 187                 slc_set = B_TRUE;
 188         }
 189 
 190         if ((eager = sctp_create_eager(sctp)) == NULL) {
 191                 if (slc_set)
 192                         atomic_dec_32(&slc->slc_cnt);
 193                 return (NULL);
 194         }
 195         econnp = eager->sctp_connp;
 196 
 197         if (connp->conn_policy != NULL) {
 198                 /* Inherit the policy from the listener; use actions from ira */
 199                 if (!ip_ipsec_policy_inherit(econnp, connp, ira)) {
 200                         sctp_close_eager(eager);
 201                         SCTPS_BUMP_MIB(sctps, sctpListenDrop);
 202                         return (NULL);
 203                 }
 204         }
 205 
 206         ip6h = (ip6_t *)mp->b_rptr;
 207         if (ira->ira_flags & IXAF_IS_IPV4) {
 208                 ipha_t  *ipha;
 209 
 210                 ipha = (ipha_t *)ip6h;
 211                 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &laddr);
 212                 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &faddr);
 213         } else {
 214                 laddr = ip6h->ip6_dst;
 215                 faddr = ip6h->ip6_src;
 216         }
 217 
 218         if (ira->ira_flags & IRAF_IPSEC_SECURE) {
 219                 /*
 220                  * XXX need to fix the cached policy issue here.
 221                  * We temporarily set the conn_laddr/conn_faddr here so
 222                  * that IPsec can use it for the latched policy
 223                  * selector.  This is obvioursly wrong as SCTP can
 224                  * use different addresses...
 225                  */
 226                 econnp->conn_laddr_v6 = laddr;
 227                 econnp->conn_faddr_v6 = faddr;
 228                 econnp->conn_saddr_v6 = laddr;
 229         }
 230         if (ipsec_conn_cache_policy(econnp,
 231             (ira->ira_flags & IRAF_IS_IPV4) != 0) != 0) {
 232                 sctp_close_eager(eager);
 233                 SCTPS_BUMP_MIB(sctps, sctpListenDrop);
 234                 return (NULL);
 235         }
 236 
 237         /* Save for getpeerucred */
 238         cr = ira->ira_cred;
 239         cpid = ira->ira_cpid;
 240 
 241         if (is_system_labeled()) {
 242                 ip_xmit_attr_t *ixa = econnp->conn_ixa;
 243 
 244                 ASSERT(ira->ira_tsl != NULL);
 245 
 246                 /* Discard any old label */
 247                 if (ixa->ixa_free_flags & IXA_FREE_TSL) {
 248                         ASSERT(ixa->ixa_tsl != NULL);
 249                         label_rele(ixa->ixa_tsl);
 250                         ixa->ixa_free_flags &= ~IXA_FREE_TSL;
 251                         ixa->ixa_tsl = NULL;
 252                 }
 253 
 254                 if ((connp->conn_mlp_type != mlptSingle ||
 255                     connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
 256                     ira->ira_tsl != NULL) {
 257                         /*
 258                          * If this is an MLP connection or a MAC-Exempt
 259                          * connection with an unlabeled node, packets are to be
 260                          * exchanged using the security label of the received
 261                          * Cookie packet instead of the server application's
 262                          * label.
 263                          * tsol_check_dest called from ip_set_destination
 264                          * might later update TSF_UNLABELED by replacing
 265                          * ixa_tsl with a new label.
 266                          */
 267                         label_hold(ira->ira_tsl);
 268                         ip_xmit_attr_replace_tsl(ixa, ira->ira_tsl);
 269                 } else {
 270                         ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
 271                 }
 272         }
 273 
 274         err = sctp_accept_comm(sctp, eager, mp, ip_hdr_len, iack);
 275         if (err != 0) {
 276                 sctp_close_eager(eager);
 277                 SCTPS_BUMP_MIB(sctps, sctpListenDrop);
 278                 return (NULL);
 279         }
 280 
 281         ASSERT(eager->sctp_current->sf_ixa != NULL);
 282 
 283         ixa = eager->sctp_current->sf_ixa;
 284         if (!(ira->ira_flags & IXAF_IS_IPV4)) {
 285                 ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
 286 
 287                 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
 288                     IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) {
 289                         eager->sctp_linklocal = 1;
 290 
 291                         ixa->ixa_flags |= IXAF_SCOPEID_SET;
 292                         ixa->ixa_scopeid = ifindex;
 293                         econnp->conn_incoming_ifindex = ifindex;
 294                 }
 295         }
 296 
 297         /*
 298          * On a clustered note send this notification to the clustering
 299          * subsystem.
 300          */
 301         if (cl_sctp_connect != NULL) {
 302                 uchar_t *slist;
 303                 uchar_t *flist;
 304                 size_t  fsize;
 305                 size_t  ssize;
 306 
 307                 fsize = sizeof (in6_addr_t) * eager->sctp_nfaddrs;
 308                 ssize = sizeof (in6_addr_t) * eager->sctp_nsaddrs;
 309                 slist = kmem_alloc(ssize, KM_NOSLEEP);
 310                 flist = kmem_alloc(fsize, KM_NOSLEEP);
 311                 if (slist == NULL || flist == NULL) {
 312                         if (slist != NULL)
 313                                 kmem_free(slist, ssize);
 314                         if (flist != NULL)
 315                                 kmem_free(flist, fsize);
 316                         sctp_close_eager(eager);
 317                         SCTPS_BUMP_MIB(sctps, sctpListenDrop);
 318                         SCTP_KSTAT(sctps, sctp_cl_connect);
 319                         return (NULL);
 320                 }
 321                 /* The clustering module frees these list */
 322                 sctp_get_saddr_list(eager, slist, ssize);
 323                 sctp_get_faddr_list(eager, flist, fsize);
 324                 (*cl_sctp_connect)(econnp->conn_family, slist,
 325                     eager->sctp_nsaddrs, econnp->conn_lport, flist,
 326                     eager->sctp_nfaddrs, econnp->conn_fport, B_FALSE,
 327                     (cl_sctp_handle_t)eager);
 328         }
 329 
 330         /* Connection established, so send up the conn_ind */
 331         if ((eager->sctp_ulpd = sctp->sctp_ulp_newconn(sctp->sctp_ulpd,
 332             (sock_lower_handle_t)eager, NULL, cr, cpid,
 333             &eager->sctp_upcalls)) == NULL) {
 334                 sctp_close_eager(eager);
 335                 SCTPS_BUMP_MIB(sctps, sctpListenDrop);
 336                 return (NULL);
 337         }
 338         ASSERT(SCTP_IS_DETACHED(eager));
 339         eager->sctp_detached = B_FALSE;
 340         return (eager);
 341 }
 342 
 343 /*
 344  * Connect to a peer - this function inserts the sctp in the
 345  * bind and conn fanouts, sends the INIT, and replies to the client
 346  * with an OK ack.
 347  */
 348 int
 349 sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen,
 350     cred_t *cr, pid_t pid)
 351 {
 352         sin_t           *sin;
 353         sin6_t          *sin6;
 354         in6_addr_t      dstaddr;
 355         in_port_t       dstport;
 356         mblk_t          *initmp;
 357         sctp_tf_t       *tbf;
 358         sctp_t          *lsctp;
 359         char            buf[INET6_ADDRSTRLEN];
 360         int             sleep = sctp->sctp_cansleep ? KM_SLEEP : KM_NOSLEEP;
 361         int             err;
 362         sctp_faddr_t    *cur_fp;
 363         sctp_stack_t    *sctps = sctp->sctp_sctps;
 364         conn_t          *connp = sctp->sctp_connp;
 365         uint_t          scope_id = 0;
 366         ip_xmit_attr_t  *ixa;
 367 
 368         /*
 369          * Determine packet type based on type of address passed in
 370          * the request should contain an IPv4 or IPv6 address.
 371          * Make sure that address family matches the type of
 372          * family of the address passed down.
 373          */
 374         if (addrlen < sizeof (sin_t)) {
 375                 return (EINVAL);
 376         }
 377         switch (dst->sa_family) {
 378         case AF_INET:
 379                 sin = (sin_t *)dst;
 380 
 381                 /* Check for attempt to connect to non-unicast */
 382                 if (CLASSD(sin->sin_addr.s_addr) ||
 383                     (sin->sin_addr.s_addr == INADDR_BROADCAST)) {
 384                         ip0dbg(("sctp_connect: non-unicast\n"));
 385                         return (EINVAL);
 386                 }
 387                 if (connp->conn_ipv6_v6only)
 388                         return (EAFNOSUPPORT);
 389 
 390                 /* convert to v6 mapped */
 391                 /* Check for attempt to connect to INADDR_ANY */
 392                 if (sin->sin_addr.s_addr == INADDR_ANY)  {
 393                         struct in_addr v4_addr;
 394                         /*
 395                          * SunOS 4.x and 4.3 BSD allow an application
 396                          * to connect a TCP socket to INADDR_ANY.
 397                          * When they do this, the kernel picks the
 398                          * address of one interface and uses it
 399                          * instead.  The kernel usually ends up
 400                          * picking the address of the loopback
 401                          * interface.  This is an undocumented feature.
 402                          * However, we provide the same thing here
 403                          * in case any TCP apps that use this feature
 404                          * are being ported to SCTP...
 405                          */
 406                         v4_addr.s_addr = htonl(INADDR_LOOPBACK);
 407                         IN6_INADDR_TO_V4MAPPED(&v4_addr, &dstaddr);
 408                 } else {
 409                         IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &dstaddr);
 410                 }
 411                 dstport = sin->sin_port;
 412                 break;
 413         case AF_INET6:
 414                 sin6 = (sin6_t *)dst;
 415                 /* Check for attempt to connect to non-unicast. */
 416                 if ((addrlen < sizeof (sin6_t)) ||
 417                     IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 418                         ip0dbg(("sctp_connect: non-unicast\n"));
 419                         return (EINVAL);
 420                 }
 421                 if (connp->conn_ipv6_v6only &&
 422                     IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 423                         return (EAFNOSUPPORT);
 424                 }
 425                 /* check for attempt to connect to unspec */
 426                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 427                         dstaddr = ipv6_loopback;
 428                 } else {
 429                         dstaddr = sin6->sin6_addr;
 430                         if (IN6_IS_ADDR_LINKLOCAL(&dstaddr)) {
 431                                 sctp->sctp_linklocal = 1;
 432                                 scope_id = sin6->sin6_scope_id;
 433                         }
 434                 }
 435                 dstport = sin6->sin6_port;
 436                 connp->conn_flowinfo = sin6->sin6_flowinfo;
 437                 break;
 438         default:
 439                 dprint(1, ("sctp_connect: unknown family %d\n",
 440                     dst->sa_family));
 441                 return (EAFNOSUPPORT);
 442         }
 443 
 444         (void) inet_ntop(AF_INET6, &dstaddr, buf, sizeof (buf));
 445         dprint(1, ("sctp_connect: attempting connect to %s...\n", buf));
 446 
 447         RUN_SCTP(sctp);
 448 
 449         if (connp->conn_family != dst->sa_family ||
 450             (connp->conn_state_flags & CONN_CLOSING)) {
 451                 WAKE_SCTP(sctp);
 452                 return (EINVAL);
 453         }
 454 
 455         /* We update our cred/cpid based on the caller of connect */
 456         if (connp->conn_cred != cr) {
 457                 crhold(cr);
 458                 crfree(connp->conn_cred);
 459                 connp->conn_cred = cr;
 460         }
 461         connp->conn_cpid = pid;
 462 
 463         /* Cache things in conn_ixa without any refhold */
 464         ixa = connp->conn_ixa;
 465         ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
 466         ixa->ixa_cred = cr;
 467         ixa->ixa_cpid = pid;
 468         if (is_system_labeled()) {
 469                 /* We need to restart with a label based on the cred */
 470                 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
 471         }
 472 
 473         switch (sctp->sctp_state) {
 474         case SCTPS_IDLE: {
 475                 struct sockaddr_storage ss;
 476 
 477                 /*
 478                  * We support a quick connect capability here, allowing
 479                  * clients to transition directly from IDLE to COOKIE_WAIT.
 480                  * sctp_bindi will pick an unused port, insert the connection
 481                  * in the bind hash and transition to BOUND state. SCTP
 482                  * picks and uses what it considers the optimal local address
 483                  * set (just like specifiying INADDR_ANY to bind()).
 484                  */
 485                 dprint(1, ("sctp_connect: idle, attempting bind...\n"));
 486                 ASSERT(sctp->sctp_nsaddrs == 0);
 487 
 488                 bzero(&ss, sizeof (ss));
 489                 ss.ss_family = connp->conn_family;
 490                 WAKE_SCTP(sctp);
 491                 if ((err = sctp_bind(sctp, (struct sockaddr *)&ss,
 492                     sizeof (ss))) != 0) {
 493                         return (err);
 494                 }
 495                 RUN_SCTP(sctp);
 496                 /* FALLTHRU */
 497         }
 498 
 499         case SCTPS_BOUND:
 500                 ASSERT(sctp->sctp_nsaddrs > 0);
 501 
 502                 /* do the connect */
 503                 /* XXX check for attempt to connect to self */
 504                 connp->conn_fport = dstport;
 505 
 506                 /*
 507                  * Don't allow this connection to completely duplicate
 508                  * an existing connection.
 509                  *
 510                  * Ensure that the duplicate check and insertion is atomic.
 511                  */
 512                 sctp_conn_hash_remove(sctp);
 513                 tbf = &sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps,
 514                     connp->conn_ports)];
 515                 mutex_enter(&tbf->tf_lock);
 516                 lsctp = sctp_lookup(sctp, &dstaddr, tbf, &connp->conn_ports,
 517                     SCTPS_COOKIE_WAIT);
 518                 if (lsctp != NULL) {
 519                         /* found a duplicate connection */
 520                         mutex_exit(&tbf->tf_lock);
 521                         SCTP_REFRELE(lsctp);
 522                         WAKE_SCTP(sctp);
 523                         return (EADDRINUSE);
 524                 }
 525 
 526                 /*
 527                  * OK; set up the peer addr (this may grow after we get
 528                  * the INIT ACK from the peer with additional addresses).
 529                  */
 530                 if ((err = sctp_add_faddr(sctp, &dstaddr, sleep,
 531                     B_FALSE)) != 0) {
 532                         mutex_exit(&tbf->tf_lock);
 533                         WAKE_SCTP(sctp);
 534                         return (err);
 535                 }
 536                 cur_fp = sctp->sctp_faddrs;
 537                 ASSERT(cur_fp->sf_ixa != NULL);
 538 
 539                 /* No valid src addr, return. */
 540                 if (cur_fp->sf_state == SCTP_FADDRS_UNREACH) {
 541                         mutex_exit(&tbf->tf_lock);
 542                         WAKE_SCTP(sctp);
 543                         return (EADDRNOTAVAIL);
 544                 }
 545 
 546                 sctp->sctp_primary = cur_fp;
 547                 sctp->sctp_current = cur_fp;
 548                 sctp->sctp_mss = cur_fp->sf_pmss;
 549                 sctp_conn_hash_insert(tbf, sctp, 1);
 550                 mutex_exit(&tbf->tf_lock);
 551 
 552                 ixa = cur_fp->sf_ixa;
 553                 ASSERT(ixa->ixa_cred != NULL);
 554 
 555                 if (scope_id != 0) {
 556                         ixa->ixa_flags |= IXAF_SCOPEID_SET;
 557                         ixa->ixa_scopeid = scope_id;
 558                 } else {
 559                         ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
 560                 }
 561 
 562                 /* initialize composite headers */
 563                 if ((err = sctp_set_hdraddrs(sctp)) != 0) {
 564                         sctp_conn_hash_remove(sctp);
 565                         WAKE_SCTP(sctp);
 566                         return (err);
 567                 }
 568 
 569                 if ((err = sctp_build_hdrs(sctp, KM_SLEEP)) != 0) {
 570                         sctp_conn_hash_remove(sctp);
 571                         WAKE_SCTP(sctp);
 572                         return (err);
 573                 }
 574 
 575                 /*
 576                  * Turn off the don't fragment bit on the (only) faddr,
 577                  * so that if one of the messages exchanged during the
 578                  * initialization sequence exceeds the path mtu, it
 579                  * at least has a chance to get there. SCTP does no
 580                  * fragmentation of initialization messages.  The DF bit
 581                  * will be turned on again in sctp_send_cookie_echo()
 582                  * (but the cookie echo will still be sent with the df bit
 583                  * off).
 584                  */
 585                 cur_fp->sf_df = B_FALSE;
 586 
 587                 /* Mark this address as alive */
 588                 cur_fp->sf_state = SCTP_FADDRS_ALIVE;
 589 
 590                 /* Send the INIT to the peer */
 591                 SCTP_FADDR_TIMER_RESTART(sctp, cur_fp, cur_fp->sf_rto);
 592                 sctp->sctp_state = SCTPS_COOKIE_WAIT;
 593                 /*
 594                  * sctp_init_mp() could result in modifying the source
 595                  * address list, so take the hash lock.
 596                  */
 597                 mutex_enter(&tbf->tf_lock);
 598                 initmp = sctp_init_mp(sctp, cur_fp);
 599                 if (initmp == NULL) {
 600                         mutex_exit(&tbf->tf_lock);
 601                         /*
 602                          * It may happen that all the source addresses
 603                          * (loopback/link local) are removed.  In that case,
 604                          * faile the connect.
 605                          */
 606                         if (sctp->sctp_nsaddrs == 0) {
 607                                 sctp_conn_hash_remove(sctp);
 608                                 SCTP_FADDR_TIMER_STOP(cur_fp);
 609                                 WAKE_SCTP(sctp);
 610                                 return (EADDRNOTAVAIL);
 611                         }
 612 
 613                         /* Otherwise, let the retransmission timer retry */
 614                         WAKE_SCTP(sctp);
 615                         goto notify_ulp;
 616                 }
 617                 mutex_exit(&tbf->tf_lock);
 618 
 619                 /*
 620                  * On a clustered note send this notification to the clustering
 621                  * subsystem.
 622                  */
 623                 if (cl_sctp_connect != NULL) {
 624                         uchar_t         *slist;
 625                         uchar_t         *flist;
 626                         size_t          ssize;
 627                         size_t          fsize;
 628 
 629                         fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
 630                         ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
 631                         slist = kmem_alloc(ssize, KM_SLEEP);
 632                         flist = kmem_alloc(fsize, KM_SLEEP);
 633                         /* The clustering module frees the lists */
 634                         sctp_get_saddr_list(sctp, slist, ssize);
 635                         sctp_get_faddr_list(sctp, flist, fsize);
 636                         (*cl_sctp_connect)(connp->conn_family, slist,
 637                             sctp->sctp_nsaddrs, connp->conn_lport,
 638                             flist, sctp->sctp_nfaddrs, connp->conn_fport,
 639                             B_TRUE, (cl_sctp_handle_t)sctp);
 640                 }
 641                 ASSERT(ixa->ixa_cred != NULL);
 642                 ASSERT(ixa->ixa_ire != NULL);
 643 
 644                 (void) conn_ip_output(initmp, ixa);
 645                 BUMP_LOCAL(sctp->sctp_opkts);
 646                 WAKE_SCTP(sctp);
 647 
 648 notify_ulp:
 649                 sctp_set_ulp_prop(sctp);
 650 
 651                 return (0);
 652         default:
 653                 ip0dbg(("sctp_connect: invalid state. %d\n", sctp->sctp_state));
 654                 WAKE_SCTP(sctp);
 655                 return (EINVAL);
 656         }
 657 }