1 /*
   2  * Copyright (C) 1993-2001, 2003 by Darren Reed.
   3  *
   4  * See the IPFILTER.LICENCE file for details on licencing.
   5  *
   6  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
   7  */
   8 
   9 #if !defined(lint)
  10 static const char sccsid[] = "@(#)ip_fil_solaris.c      1.7 07/22/06 (C) 1993-2000 Darren Reed";
  11 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
  12 #endif
  13 
  14 #include <sys/types.h>
  15 #include <sys/errno.h>
  16 #include <sys/param.h>
  17 #include <sys/cpuvar.h>
  18 #include <sys/open.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/filio.h>
  21 #include <sys/systm.h>
  22 #include <sys/strsubr.h>
  23 #include <sys/cred.h>
  24 #include <sys/ddi.h>
  25 #include <sys/sunddi.h>
  26 #include <sys/ksynch.h>
  27 #include <sys/kmem.h>
  28 #include <sys/mkdev.h>
  29 #include <sys/protosw.h>
  30 #include <sys/socket.h>
  31 #include <sys/dditypes.h>
  32 #include <sys/cmn_err.h>
  33 #include <sys/zone.h>
  34 #include <net/if.h>
  35 #include <net/af.h>
  36 #include <net/route.h>
  37 #include <netinet/in.h>
  38 #include <netinet/in_systm.h>
  39 #include <netinet/ip.h>
  40 #include <netinet/ip_var.h>
  41 #include <netinet/tcp.h>
  42 #include <netinet/udp.h>
  43 #include <netinet/tcpip.h>
  44 #include <netinet/ip_icmp.h>
  45 #include "netinet/ip_compat.h"
  46 #ifdef  USE_INET6
  47 # include <netinet/icmp6.h>
  48 #endif
  49 #include "netinet/ip_fil.h"
  50 #include "netinet/ip_nat.h"
  51 #include "netinet/ip_frag.h"
  52 #include "netinet/ip_state.h"
  53 #include "netinet/ip_auth.h"
  54 #include "netinet/ip_proxy.h"
  55 #include "netinet/ipf_stack.h"
  56 #ifdef  IPFILTER_LOOKUP
  57 # include "netinet/ip_lookup.h"
  58 #endif
  59 #include <inet/ip_ire.h>
  60 
  61 #include <sys/md5.h>
  62 #include <sys/neti.h>
  63 
  64 static  int     frzerostats __P((caddr_t, ipf_stack_t *));
  65 static  int     fr_setipfloopback __P((int, ipf_stack_t *));
  66 static  int     fr_enableipf __P((ipf_stack_t *, int));
  67 static  int     fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
  68 static  int     ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
  69 static  int     ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
  70 static  int     ipf_hook __P((hook_data_t, int, int, void *));
  71 static  int     ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
  72 static  int     ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
  73 static  int     ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
  74     void *));
  75 static  int     ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
  76 static  int     ipf_hook4 __P((hook_data_t, int, int, void *));
  77 static  int     ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
  78 static  int     ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
  79 static  int     ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
  80     void *));
  81 static  int     ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
  82     void *));
  83 static  int     ipf_hook6 __P((hook_data_t, int, int, void *));
  84 extern  int     ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
  85 extern  int     ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
  86 
  87 #if SOLARIS2 < 10
  88 #if SOLARIS2 >= 7
  89 u_int           *ip_ttl_ptr = NULL;
  90 u_int           *ip_mtudisc = NULL;
  91 # if SOLARIS2 >= 8
  92 int             *ip_forwarding = NULL;
  93 u_int           *ip6_forwarding = NULL;
  94 # else
  95 u_int           *ip_forwarding = NULL;
  96 # endif
  97 #else
  98 u_long          *ip_ttl_ptr = NULL;
  99 u_long          *ip_mtudisc = NULL;
 100 u_long          *ip_forwarding = NULL;
 101 #endif
 102 #endif
 103 
 104 
 105 /* ------------------------------------------------------------------------ */
 106 /* Function:    ipldetach                                                   */
 107 /* Returns:     int - 0 == success, else error.                             */
 108 /* Parameters:  Nil                                                         */
 109 /*                                                                          */
 110 /* This function is responsible for undoing anything that might have been   */
 111 /* done in a call to iplattach().  It must be able to clean up from a call  */
 112 /* to iplattach() that did not succeed.  Why might that happen?  Someone    */
 113 /* configures a table to be so large that we cannot allocate enough memory  */
 114 /* for it.                                                                  */
 115 /* ------------------------------------------------------------------------ */
 116 int ipldetach(ifs)
 117 ipf_stack_t *ifs;
 118 {
 119 
 120         ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
 121 
 122 #if SOLARIS2 < 10
 123 
 124         if (ifs->ifs_fr_control_forwarding & 2) {
 125                 if (ip_forwarding != NULL)
 126                         *ip_forwarding = 0;
 127 #if SOLARIS2 >= 8
 128                 if (ip6_forwarding != NULL)
 129                         *ip6_forwarding = 0;
 130 #endif
 131         }
 132 #endif
 133 
 134         /*
 135          * This lock needs to be dropped around the net_hook_unregister calls
 136          * because we can deadlock here with:
 137          * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
 138          * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
 139          */
 140         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 141 
 142 #define UNDO_HOOK(_f, _b, _e, _h)                                       \
 143         do {                                                            \
 144                 if (ifs->_f != NULL) {                                       \
 145                         if (ifs->_b) {                                       \
 146                                 ifs->_b = (net_hook_unregister(ifs->_f,   \
 147                                            _e, ifs->_h) != 0);               \
 148                                 if (!ifs->_b) {                              \
 149                                         hook_free(ifs->_h);          \
 150                                         ifs->_h = NULL;                      \
 151                                 }                                       \
 152                         } else if (ifs->_h != NULL) {                        \
 153                                 hook_free(ifs->_h);                  \
 154                                 ifs->_h = NULL;                              \
 155                         }                                               \
 156                 }                                                       \
 157                 _NOTE(CONSTCOND)                                        \
 158         } while (0)
 159 
 160         /*
 161          * Remove IPv6 Hooks
 162          */
 163         if (ifs->ifs_ipf_ipv6 != NULL) {
 164                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
 165                           NH_PHYSICAL_IN, ifs_ipfhook6_in);
 166                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
 167                           NH_PHYSICAL_OUT, ifs_ipfhook6_out);
 168                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
 169                           NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
 170                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
 171                           NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
 172                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
 173                           NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
 174 
 175                 if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
 176                         goto detach_failed;
 177                 ifs->ifs_ipf_ipv6 = NULL;
 178         }
 179 
 180         /*
 181          * Remove IPv4 Hooks
 182          */
 183         if (ifs->ifs_ipf_ipv4 != NULL) {
 184                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
 185                           NH_PHYSICAL_IN, ifs_ipfhook4_in);
 186                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
 187                           NH_PHYSICAL_OUT, ifs_ipfhook4_out);
 188                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
 189                           NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
 190                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
 191                           NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
 192                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
 193                           NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
 194 
 195                 if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
 196                         goto detach_failed;
 197                 ifs->ifs_ipf_ipv4 = NULL;
 198         }
 199 
 200 #undef UNDO_HOOK
 201 
 202 #ifdef  IPFDEBUG
 203         cmn_err(CE_CONT, "ipldetach()\n");
 204 #endif
 205 
 206         WRITE_ENTER(&ifs->ifs_ipf_global);
 207         fr_deinitialise(ifs);
 208 
 209         (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
 210         (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
 211 
 212         if (ifs->ifs_ipf_locks_done == 1) {
 213                 MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
 214                 MUTEX_DESTROY(&ifs->ifs_ipf_rw);
 215                 RW_DESTROY(&ifs->ifs_ipf_tokens);
 216                 RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
 217                 ifs->ifs_ipf_locks_done = 0;
 218         }
 219 
 220         if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
 221             ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
 222             ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
 223             ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
 224             ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
 225                 return -1;
 226 
 227         return 0;
 228 
 229 detach_failed:
 230         WRITE_ENTER(&ifs->ifs_ipf_global);
 231         return -1;
 232 }
 233 
 234 int iplattach(ifs)
 235 ipf_stack_t *ifs;
 236 {
 237 #if SOLARIS2 < 10
 238         int i;
 239 #endif
 240         netid_t id = ifs->ifs_netid;
 241 
 242 #ifdef  IPFDEBUG
 243         cmn_err(CE_CONT, "iplattach()\n");
 244 #endif
 245 
 246         ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
 247         ifs->ifs_fr_flags = IPF_LOGGING;
 248 #ifdef _KERNEL
 249         ifs->ifs_fr_update_ipid = 0;
 250 #else
 251         ifs->ifs_fr_update_ipid = 1;
 252 #endif
 253         ifs->ifs_fr_minttl = 4;
 254         ifs->ifs_fr_icmpminfragmtu = 68;
 255 #if defined(IPFILTER_DEFAULT_BLOCK)
 256         ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
 257 #else
 258         ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
 259 #endif
 260 
 261         bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
 262         MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
 263         MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
 264         RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
 265         RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
 266         ifs->ifs_ipf_locks_done = 1;
 267 
 268         if (fr_initialise(ifs) < 0)
 269                 return -1;
 270 
 271         HOOK_INIT(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
 272                   "ipfilter_hook4_nicevents", ifs);
 273         HOOK_INIT(ifs->ifs_ipfhook4_in, ipf_hook4_in,
 274                   "ipfilter_hook4_in", ifs);
 275         HOOK_INIT(ifs->ifs_ipfhook4_out, ipf_hook4_out,
 276                   "ipfilter_hook4_out", ifs);
 277         HOOK_INIT(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
 278                   "ipfilter_hook4_loop_in", ifs);
 279         HOOK_INIT(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
 280                   "ipfilter_hook4_loop_out", ifs);
 281 
 282         /*
 283          * If we hold this lock over all of the net_hook_register calls, we
 284          * can cause a deadlock to occur with the following lock ordering:
 285          * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
 286          * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
 287          */
 288         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 289 
 290         /*
 291          * Add IPv4 hooks
 292          */
 293         ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
 294         if (ifs->ifs_ipf_ipv4 == NULL)
 295                 goto hookup_failed;
 296 
 297         ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
 298             NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
 299         if (!ifs->ifs_hook4_nic_events)
 300                 goto hookup_failed;
 301 
 302         ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
 303             NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
 304         if (!ifs->ifs_hook4_physical_in)
 305                 goto hookup_failed;
 306 
 307         ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
 308             NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
 309         if (!ifs->ifs_hook4_physical_out)
 310                 goto hookup_failed;
 311 
 312         if (ifs->ifs_ipf_loopback) {
 313                 ifs->ifs_hook4_loopback_in = (net_hook_register(
 314                     ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
 315                     ifs->ifs_ipfhook4_loop_in) == 0);
 316                 if (!ifs->ifs_hook4_loopback_in)
 317                         goto hookup_failed;
 318 
 319                 ifs->ifs_hook4_loopback_out = (net_hook_register(
 320                     ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
 321                     ifs->ifs_ipfhook4_loop_out) == 0);
 322                 if (!ifs->ifs_hook4_loopback_out)
 323                         goto hookup_failed;
 324         }
 325         /*
 326          * Add IPv6 hooks
 327          */
 328         ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
 329         if (ifs->ifs_ipf_ipv6 == NULL)
 330                 goto hookup_failed;
 331 
 332         HOOK_INIT(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
 333                   "ipfilter_hook6_nicevents", ifs);
 334         HOOK_INIT(ifs->ifs_ipfhook6_in, ipf_hook6_in,
 335                   "ipfilter_hook6_in", ifs);
 336         HOOK_INIT(ifs->ifs_ipfhook6_out, ipf_hook6_out,
 337                   "ipfilter_hook6_out", ifs);
 338         HOOK_INIT(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
 339                   "ipfilter_hook6_loop_in", ifs);
 340         HOOK_INIT(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
 341                   "ipfilter_hook6_loop_out", ifs);
 342 
 343         ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
 344             NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
 345         if (!ifs->ifs_hook6_nic_events)
 346                 goto hookup_failed;
 347 
 348         ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
 349             NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
 350         if (!ifs->ifs_hook6_physical_in)
 351                 goto hookup_failed;
 352 
 353         ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
 354             NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
 355         if (!ifs->ifs_hook6_physical_out)
 356                 goto hookup_failed;
 357 
 358         if (ifs->ifs_ipf_loopback) {
 359                 ifs->ifs_hook6_loopback_in = (net_hook_register(
 360                     ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
 361                     ifs->ifs_ipfhook6_loop_in) == 0);
 362                 if (!ifs->ifs_hook6_loopback_in)
 363                         goto hookup_failed;
 364 
 365                 ifs->ifs_hook6_loopback_out = (net_hook_register(
 366                     ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
 367                     ifs->ifs_ipfhook6_loop_out) == 0);
 368                 if (!ifs->ifs_hook6_loopback_out)
 369                         goto hookup_failed;
 370         }
 371 
 372         /*
 373          * Reacquire ipf_global, now it is safe.
 374          */
 375         WRITE_ENTER(&ifs->ifs_ipf_global);
 376 
 377 /* Do not use private interface ip_params_arr[] in Solaris 10 */
 378 #if SOLARIS2 < 10
 379 
 380 #if SOLARIS2 >= 8
 381         ip_forwarding = &ip_g_forward;
 382 #endif
 383         /*
 384          * XXX - There is no terminator for this array, so it is not possible
 385          * to tell if what we are looking for is missing and go off the end
 386          * of the array.
 387          */
 388 
 389 #if SOLARIS2 <= 8
 390         for (i = 0; ; i++) {
 391                 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
 392                         ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
 393                 } else if (!strcmp(ip_param_arr[i].ip_param_name,
 394                             "ip_path_mtu_discovery")) {
 395                         ip_mtudisc = &ip_param_arr[i].ip_param_value;
 396                 }
 397 #if SOLARIS2 < 8
 398                 else if (!strcmp(ip_param_arr[i].ip_param_name,
 399                             "ip_forwarding")) {
 400                         ip_forwarding = &ip_param_arr[i].ip_param_value;
 401                 }
 402 #else
 403                 else if (!strcmp(ip_param_arr[i].ip_param_name,
 404                             "ip6_forwarding")) {
 405                         ip6_forwarding = &ip_param_arr[i].ip_param_value;
 406                 }
 407 #endif
 408 
 409                 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
 410 #if SOLARIS2 >= 8
 411                     ip6_forwarding != NULL &&
 412 #endif
 413                     ip_forwarding != NULL)
 414                         break;
 415         }
 416 #endif
 417 
 418         if (ifs->ifs_fr_control_forwarding & 1) {
 419                 if (ip_forwarding != NULL)
 420                         *ip_forwarding = 1;
 421 #if SOLARIS2 >= 8
 422                 if (ip6_forwarding != NULL)
 423                         *ip6_forwarding = 1;
 424 #endif
 425         }
 426 
 427 #endif
 428 
 429         return 0;
 430 hookup_failed:
 431         WRITE_ENTER(&ifs->ifs_ipf_global);
 432         return -1;
 433 }
 434 
 435 static  int     fr_setipfloopback(set, ifs)
 436 int set;
 437 ipf_stack_t *ifs;
 438 {
 439         if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
 440                 return EFAULT;
 441 
 442         if (set && !ifs->ifs_ipf_loopback) {
 443                 ifs->ifs_ipf_loopback = 1;
 444 
 445                 ifs->ifs_hook4_loopback_in = (net_hook_register(
 446                     ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
 447                     ifs->ifs_ipfhook4_loop_in) == 0);
 448                 if (!ifs->ifs_hook4_loopback_in)
 449                         return EINVAL;
 450 
 451                 ifs->ifs_hook4_loopback_out = (net_hook_register(
 452                     ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
 453                     ifs->ifs_ipfhook4_loop_out) == 0);
 454                 if (!ifs->ifs_hook4_loopback_out)
 455                         return EINVAL;
 456 
 457                 ifs->ifs_hook6_loopback_in = (net_hook_register(
 458                     ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
 459                     ifs->ifs_ipfhook6_loop_in) == 0);
 460                 if (!ifs->ifs_hook6_loopback_in)
 461                         return EINVAL;
 462 
 463                 ifs->ifs_hook6_loopback_out = (net_hook_register(
 464                     ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
 465                     ifs->ifs_ipfhook6_loop_out) == 0);
 466                 if (!ifs->ifs_hook6_loopback_out)
 467                         return EINVAL;
 468 
 469         } else if (!set && ifs->ifs_ipf_loopback) {
 470                 ifs->ifs_ipf_loopback = 0;
 471 
 472                 ifs->ifs_hook4_loopback_in =
 473                     (net_hook_unregister(ifs->ifs_ipf_ipv4,
 474                     NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
 475                 if (ifs->ifs_hook4_loopback_in)
 476                         return EBUSY;
 477 
 478                 ifs->ifs_hook4_loopback_out =
 479                     (net_hook_unregister(ifs->ifs_ipf_ipv4,
 480                     NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
 481                 if (ifs->ifs_hook4_loopback_out)
 482                         return EBUSY;
 483 
 484                 ifs->ifs_hook6_loopback_in =
 485                     (net_hook_unregister(ifs->ifs_ipf_ipv6,
 486                     NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
 487                 if (ifs->ifs_hook6_loopback_in)
 488                         return EBUSY;
 489 
 490                 ifs->ifs_hook6_loopback_out =
 491                     (net_hook_unregister(ifs->ifs_ipf_ipv6,
 492                     NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
 493                 if (ifs->ifs_hook6_loopback_out)
 494                         return EBUSY;
 495         }
 496         return 0;
 497 }
 498 
 499 
 500 /*
 501  * Filter ioctl interface.
 502  */
 503 /*ARGSUSED*/
 504 int iplioctl(dev, cmd, data, mode, cp, rp)
 505 dev_t dev;
 506 int cmd;
 507 #if SOLARIS2 >= 7
 508 intptr_t data;
 509 #else
 510 int *data;
 511 #endif
 512 int mode;
 513 cred_t *cp;
 514 int *rp;
 515 {
 516         int error = 0, tmp;
 517         friostat_t fio;
 518         minor_t unit;
 519         u_int enable;
 520         ipf_stack_t *ifs;
 521 
 522 #ifdef  IPFDEBUG
 523         cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
 524                 dev, cmd, data, mode, cp, rp);
 525 #endif
 526         unit = getminor(dev);
 527         if (IPL_LOGMAX < unit)
 528                 return ENXIO;
 529 
 530         /*
 531          * As we're calling ipf_find_stack in user space, from a given zone
 532          * to find the stack pointer for this zone, there is no need to have
 533          * a hold/refence count here.
 534          */
 535         ifs = ipf_find_stack(crgetzoneid(cp));
 536         ASSERT(ifs != NULL);
 537 
 538         if (ifs->ifs_fr_running <= 0) {
 539                 if (unit != IPL_LOGIPF) {
 540                         return EIO;
 541                 }
 542                 if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
 543                     cmd != SIOCIPFSET && cmd != SIOCFRENB &&
 544                     cmd != SIOCGETFS && cmd != SIOCGETFF) {
 545                         return EIO;
 546                 }
 547         }
 548 
 549         READ_ENTER(&ifs->ifs_ipf_global);
 550         if (ifs->ifs_fr_enable_active != 0) {
 551                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
 552                 return EBUSY;
 553         }
 554 
 555         error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
 556                                curproc, ifs);
 557         if (error != -1) {
 558                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
 559                 return error;
 560         }
 561         error = 0;
 562 
 563         switch (cmd)
 564         {
 565         case SIOCFRENB :
 566                 if (!(mode & FWRITE))
 567                         error = EPERM;
 568                 else {
 569                         error = COPYIN((caddr_t)data, (caddr_t)&enable,
 570                                        sizeof(enable));
 571                         if (error != 0) {
 572                                 error = EFAULT;
 573                                 break;
 574                         }
 575 
 576                         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 577                         WRITE_ENTER(&ifs->ifs_ipf_global);
 578 
 579                         /*
 580                          * We must recheck fr_enable_active here, since we've
 581                          * dropped ifs_ipf_global from R in order to get it
 582                          * exclusively.
 583                          */
 584                         if (ifs->ifs_fr_enable_active == 0) {
 585                                 ifs->ifs_fr_enable_active = 1;
 586                                 error = fr_enableipf(ifs, enable);
 587                                 ifs->ifs_fr_enable_active = 0;
 588                         }
 589                 }
 590                 break;
 591         case SIOCIPFSET :
 592                 if (!(mode & FWRITE)) {
 593                         error = EPERM;
 594                         break;
 595                 }
 596                 /* FALLTHRU */
 597         case SIOCIPFGETNEXT :
 598         case SIOCIPFGET :
 599                 error = fr_ipftune(cmd, (void *)data, ifs);
 600                 break;
 601         case SIOCSETFF :
 602                 if (!(mode & FWRITE))
 603                         error = EPERM;
 604                 else {
 605                         error = COPYIN((caddr_t)data,
 606                                        (caddr_t)&ifs->ifs_fr_flags,
 607                                        sizeof(ifs->ifs_fr_flags));
 608                         if (error != 0)
 609                                 error = EFAULT;
 610                 }
 611                 break;
 612         case SIOCIPFLP :
 613                 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
 614                                sizeof(tmp));
 615                 if (error != 0)
 616                         error = EFAULT;
 617                 else
 618                         error = fr_setipfloopback(tmp, ifs);
 619                 break;
 620         case SIOCGETFF :
 621                 error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
 622                                 sizeof(ifs->ifs_fr_flags));
 623                 if (error != 0)
 624                         error = EFAULT;
 625                 break;
 626         case SIOCFUNCL :
 627                 error = fr_resolvefunc((void *)data);
 628                 break;
 629         case SIOCINAFR :
 630         case SIOCRMAFR :
 631         case SIOCADAFR :
 632         case SIOCZRLST :
 633                 if (!(mode & FWRITE))
 634                         error = EPERM;
 635                 else
 636                         error = frrequest(unit, cmd, (caddr_t)data,
 637                                           ifs->ifs_fr_active, 1, ifs);
 638                 break;
 639         case SIOCINIFR :
 640         case SIOCRMIFR :
 641         case SIOCADIFR :
 642                 if (!(mode & FWRITE))
 643                         error = EPERM;
 644                 else
 645                         error = frrequest(unit, cmd, (caddr_t)data,
 646                                           1 - ifs->ifs_fr_active, 1, ifs);
 647                 break;
 648         case SIOCSWAPA :
 649                 if (!(mode & FWRITE))
 650                         error = EPERM;
 651                 else {
 652                         WRITE_ENTER(&ifs->ifs_ipf_mutex);
 653                         bzero((char *)ifs->ifs_frcache,
 654                             sizeof (ifs->ifs_frcache));
 655                         error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
 656                                         (caddr_t)data,
 657                                         sizeof(ifs->ifs_fr_active));
 658                         if (error != 0)
 659                                 error = EFAULT;
 660                         else
 661                                 ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
 662                         RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
 663                 }
 664                 break;
 665         case SIOCGETFS :
 666                 fr_getstat(&fio, ifs);
 667                 error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
 668                 break;
 669         case SIOCFRZST :
 670                 if (!(mode & FWRITE))
 671                         error = EPERM;
 672                 else
 673                         error = fr_zerostats((caddr_t)data, ifs);
 674                 break;
 675         case    SIOCIPFFL :
 676                 if (!(mode & FWRITE))
 677                         error = EPERM;
 678                 else {
 679                         error = COPYIN((caddr_t)data, (caddr_t)&tmp,
 680                                        sizeof(tmp));
 681                         if (!error) {
 682                                 tmp = frflush(unit, 4, tmp, ifs);
 683                                 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
 684                                                 sizeof(tmp));
 685                                 if (error != 0)
 686                                         error = EFAULT;
 687                         } else
 688                                 error = EFAULT;
 689                 }
 690                 break;
 691 #ifdef USE_INET6
 692         case    SIOCIPFL6 :
 693                 if (!(mode & FWRITE))
 694                         error = EPERM;
 695                 else {
 696                         error = COPYIN((caddr_t)data, (caddr_t)&tmp,
 697                                        sizeof(tmp));
 698                         if (!error) {
 699                                 tmp = frflush(unit, 6, tmp, ifs);
 700                                 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
 701                                                 sizeof(tmp));
 702                                 if (error != 0)
 703                                         error = EFAULT;
 704                         } else
 705                                 error = EFAULT;
 706                 }
 707                 break;
 708 #endif
 709         case SIOCSTLCK :
 710                 error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
 711                 if (error == 0) {
 712                         ifs->ifs_fr_state_lock = tmp;
 713                         ifs->ifs_fr_nat_lock = tmp;
 714                         ifs->ifs_fr_frag_lock = tmp;
 715                         ifs->ifs_fr_auth_lock = tmp;
 716                 } else
 717                         error = EFAULT;
 718         break;
 719 #ifdef  IPFILTER_LOG
 720         case    SIOCIPFFB :
 721                 if (!(mode & FWRITE))
 722                         error = EPERM;
 723                 else {
 724                         tmp = ipflog_clear(unit, ifs);
 725                         error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
 726                                        sizeof(tmp));
 727                         if (error)
 728                                 error = EFAULT;
 729                 }
 730                 break;
 731 #endif /* IPFILTER_LOG */
 732         case SIOCFRSYN :
 733                 if (!(mode & FWRITE))
 734                         error = EPERM;
 735                 else {
 736                         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 737                         WRITE_ENTER(&ifs->ifs_ipf_global);
 738 
 739                         frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
 740                         fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
 741                         fr_nataddrsync(0, NULL, NULL, ifs);
 742                         fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
 743                         error = 0;
 744                 }
 745                 break;
 746         case SIOCGFRST :
 747                 error = fr_outobj((void *)data, fr_fragstats(ifs),
 748                                   IPFOBJ_FRAGSTAT);
 749                 break;
 750         case FIONREAD :
 751 #ifdef  IPFILTER_LOG
 752                 tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
 753 
 754                 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
 755                 if (error != 0)
 756                         error = EFAULT;
 757 #endif
 758                 break;
 759         case SIOCIPFITER :
 760                 error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
 761                                        curproc, ifs);
 762                 break;
 763 
 764         case SIOCGENITER :
 765                 error = ipf_genericiter((caddr_t)data, crgetuid(cp),
 766                                         curproc, ifs);
 767                 break;
 768 
 769         case SIOCIPFDELTOK :
 770                 error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
 771                 if (error != 0) {
 772                         error = EFAULT;
 773                 } else {
 774                         error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
 775                 }
 776                 break;
 777 
 778         default :
 779 #ifdef  IPFDEBUG
 780                 cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
 781                         cmd, (void *)data);
 782 #endif
 783                 error = EINVAL;
 784                 break;
 785         }
 786         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 787         return error;
 788 }
 789 
 790 
 791 static int fr_enableipf(ifs, enable)
 792 ipf_stack_t *ifs;
 793 int enable;
 794 {
 795         int error;
 796 
 797         if (!enable) {
 798                 error = ipldetach(ifs);
 799                 if (error == 0)
 800                         ifs->ifs_fr_running = -1;
 801                 return error;
 802         }
 803 
 804         if (ifs->ifs_fr_running > 0)
 805                 return 0;
 806 
 807         error = iplattach(ifs);
 808         if (error == 0) {
 809                 if (ifs->ifs_fr_timer_id == NULL) {
 810                         int hz = drv_usectohz(500000);
 811 
 812                         ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
 813                                                        (void *)ifs,
 814                                                        hz);
 815                 }
 816                 ifs->ifs_fr_running = 1;
 817         } else {
 818                 (void) ipldetach(ifs);
 819         }
 820         return error;
 821 }
 822 
 823 
 824 phy_if_t get_unit(name, v, ifs)
 825 char *name;
 826 int v;
 827 ipf_stack_t *ifs;
 828 {
 829         net_handle_t nif;
 830  
 831         if (v == 4)
 832                 nif = ifs->ifs_ipf_ipv4;
 833         else if (v == 6)
 834                 nif = ifs->ifs_ipf_ipv6;
 835         else
 836                 return 0;
 837 
 838         return (net_phylookup(nif, name));
 839 }
 840 
 841 /*
 842  * routines below for saving IP headers to buffer
 843  */
 844 /*ARGSUSED*/
 845 int iplopen(devp, flags, otype, cred)
 846 dev_t *devp;
 847 int flags, otype;
 848 cred_t *cred;
 849 {
 850         minor_t min = getminor(*devp);
 851 
 852 #ifdef  IPFDEBUG
 853         cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
 854 #endif
 855         if (!(otype & OTYP_CHR))
 856                 return ENXIO;
 857 
 858         min = (IPL_LOGMAX < min) ? ENXIO : 0;
 859         return min;
 860 }
 861 
 862 
 863 /*ARGSUSED*/
 864 int iplclose(dev, flags, otype, cred)
 865 dev_t dev;
 866 int flags, otype;
 867 cred_t *cred;
 868 {
 869         minor_t min = getminor(dev);
 870 
 871 #ifdef  IPFDEBUG
 872         cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
 873 #endif
 874 
 875         min = (IPL_LOGMAX < min) ? ENXIO : 0;
 876         return min;
 877 }
 878 
 879 #ifdef  IPFILTER_LOG
 880 /*
 881  * iplread/ipllog
 882  * both of these must operate with at least splnet() lest they be
 883  * called during packet processing and cause an inconsistancy to appear in
 884  * the filter lists.
 885  */
 886 /*ARGSUSED*/
 887 int iplread(dev, uio, cp)
 888 dev_t dev;
 889 register struct uio *uio;
 890 cred_t *cp;
 891 {
 892         ipf_stack_t *ifs;
 893         int ret;
 894 
 895         /*
 896          * As we're calling ipf_find_stack in user space, from a given zone
 897          * to find the stack pointer for this zone, there is no need to have
 898          * a hold/refence count here.
 899          */
 900         ifs = ipf_find_stack(crgetzoneid(cp));
 901         ASSERT(ifs != NULL);
 902 
 903 # ifdef IPFDEBUG
 904         cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
 905 # endif
 906 
 907         if (ifs->ifs_fr_running < 1) {
 908                 return EIO;
 909         }
 910 
 911 # ifdef IPFILTER_SYNC
 912         if (getminor(dev) == IPL_LOGSYNC) {
 913                 return ipfsync_read(uio);
 914         }
 915 # endif
 916 
 917         ret = ipflog_read(getminor(dev), uio, ifs);
 918         return ret;
 919 }
 920 #endif /* IPFILTER_LOG */
 921 
 922 
 923 /*
 924  * iplread/ipllog
 925  * both of these must operate with at least splnet() lest they be
 926  * called during packet processing and cause an inconsistancy to appear in
 927  * the filter lists.
 928  */
 929 int iplwrite(dev, uio, cp)
 930 dev_t dev;
 931 register struct uio *uio;
 932 cred_t *cp;
 933 {
 934         ipf_stack_t *ifs;
 935 
 936         /*
 937          * As we're calling ipf_find_stack in user space, from a given zone
 938          * to find the stack pointer for this zone, there is no need to have
 939          * a hold/refence count here.
 940          */
 941         ifs = ipf_find_stack(crgetzoneid(cp));
 942         ASSERT(ifs != NULL);
 943 
 944 #ifdef  IPFDEBUG
 945         cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
 946 #endif
 947 
 948         if (ifs->ifs_fr_running < 1) {
 949                 return EIO;
 950         }
 951 
 952 #ifdef  IPFILTER_SYNC
 953         if (getminor(dev) == IPL_LOGSYNC)
 954                 return ipfsync_write(uio);
 955 #endif /* IPFILTER_SYNC */
 956         dev = dev;      /* LINT */
 957         uio = uio;      /* LINT */
 958         cp = cp;        /* LINT */
 959         return ENXIO;
 960 }
 961 
 962 
 963 /*
 964  * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
 965  * requires a large amount of setting up and isn't any more efficient.
 966  */
 967 int fr_send_reset(fin)
 968 fr_info_t *fin;
 969 {
 970         tcphdr_t *tcp, *tcp2;
 971         int tlen, hlen;
 972         mblk_t *m;
 973 #ifdef  USE_INET6
 974         ip6_t *ip6;
 975 #endif
 976         ip_t *ip;
 977 
 978         tcp = fin->fin_dp;
 979         if (tcp->th_flags & TH_RST)
 980                 return -1;
 981 
 982 #ifndef IPFILTER_CKSUM
 983         if (fr_checkl4sum(fin) == -1)
 984                 return -1;
 985 #endif
 986 
 987         tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
 988 #ifdef  USE_INET6
 989         if (fin->fin_v == 6)
 990                 hlen = sizeof(ip6_t);
 991         else
 992 #endif
 993                 hlen = sizeof(ip_t);
 994         hlen += sizeof(*tcp2);
 995         if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
 996                 return -1;
 997 
 998         m->b_rptr += 64;
 999         MTYPE(m) = M_DATA;
1000         m->b_wptr = m->b_rptr + hlen;
1001         ip = (ip_t *)m->b_rptr;
1002         bzero((char *)ip, hlen);
1003         tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
1004         tcp2->th_dport = tcp->th_sport;
1005         tcp2->th_sport = tcp->th_dport;
1006         if (tcp->th_flags & TH_ACK) {
1007                 tcp2->th_seq = tcp->th_ack;
1008                 tcp2->th_flags = TH_RST;
1009         } else {
1010                 tcp2->th_ack = ntohl(tcp->th_seq);
1011                 tcp2->th_ack += tlen;
1012                 tcp2->th_ack = htonl(tcp2->th_ack);
1013                 tcp2->th_flags = TH_RST|TH_ACK;
1014         }
1015         tcp2->th_off = sizeof(struct tcphdr) >> 2;
1016 
1017         ip->ip_v = fin->fin_v;
1018 #ifdef  USE_INET6
1019         if (fin->fin_v == 6) {
1020                 ip6 = (ip6_t *)m->b_rptr;
1021                 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1022                 ip6->ip6_src = fin->fin_dst6.in6;
1023                 ip6->ip6_dst = fin->fin_src6.in6;
1024                 ip6->ip6_plen = htons(sizeof(*tcp));
1025                 ip6->ip6_nxt = IPPROTO_TCP;
1026                 tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1027         } else
1028 #endif
1029         {
1030                 ip->ip_src.s_addr = fin->fin_daddr;
1031                 ip->ip_dst.s_addr = fin->fin_saddr;
1032                 ip->ip_id = fr_nextipid(fin);
1033                 ip->ip_hl = sizeof(*ip) >> 2;
1034                 ip->ip_p = IPPROTO_TCP;
1035                 ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1036                 ip->ip_tos = fin->fin_ip->ip_tos;
1037                 tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1038         }
1039         return fr_send_ip(fin, m, &m);
1040 }
1041 
1042 /*
1043  * Function:    fr_send_ip
1044  * Returns:      0: success
1045  *              -1: failed
1046  * Parameters:
1047  *      fin: packet information
1048  *      m: the message block where ip head starts
1049  *
1050  * Send a new packet through the IP stack. 
1051  *
1052  * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1053  * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1054  * function).
1055  *
1056  * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1057  * in by this function.
1058  *
1059  * All other portions of the packet must be in on-the-wire format.
1060  */
1061 /*ARGSUSED*/
1062 static int fr_send_ip(fin, m, mpp)
1063 fr_info_t *fin;
1064 mblk_t *m, **mpp;
1065 {
1066         qpktinfo_t qpi, *qpip;
1067         fr_info_t fnew;
1068         ip_t *ip;
1069         int i, hlen;
1070         ipf_stack_t *ifs = fin->fin_ifs;
1071 
1072         ip = (ip_t *)m->b_rptr;
1073         bzero((char *)&fnew, sizeof(fnew));
1074 
1075 #ifdef  USE_INET6
1076         if (fin->fin_v == 6) {
1077                 ip6_t *ip6;
1078 
1079                 ip6 = (ip6_t *)ip;
1080                 ip6->ip6_vfc = 0x60;
1081                 ip6->ip6_hlim = 127;
1082                 fnew.fin_v = 6;
1083                 hlen = sizeof(*ip6);
1084                 fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1085         } else
1086 #endif
1087         {
1088                 fnew.fin_v = 4;
1089 #if SOLARIS2 >= 10
1090                 ip->ip_ttl = 255;
1091                 if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1092                         ip->ip_off = htons(IP_DF);
1093 #else
1094                 if (ip_ttl_ptr != NULL)
1095                         ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1096                 else
1097                         ip->ip_ttl = 63;
1098                 if (ip_mtudisc != NULL)
1099                         ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1100                 else
1101                         ip->ip_off = htons(IP_DF);
1102 #endif
1103                 /*
1104                  * The dance with byte order and ip_len/ip_off is because in
1105                  * fr_fastroute, it expects them to be in host byte order but
1106                  * ipf_cksum expects them to be in network byte order.
1107                  */
1108                 ip->ip_len = htons(ip->ip_len);
1109                 ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1110                 ip->ip_len = ntohs(ip->ip_len);
1111                 ip->ip_off = ntohs(ip->ip_off);
1112                 hlen = sizeof(*ip);
1113                 fnew.fin_plen = ip->ip_len;
1114         }
1115 
1116         qpip = fin->fin_qpi;
1117         qpi.qpi_off = 0;
1118         qpi.qpi_ill = qpip->qpi_ill;
1119         qpi.qpi_m = m;
1120         qpi.qpi_data = ip;
1121         fnew.fin_qpi = &qpi;
1122         fnew.fin_ifp = fin->fin_ifp;
1123         fnew.fin_flx = FI_NOCKSUM;
1124         fnew.fin_m = m;
1125         fnew.fin_qfm = m;
1126         fnew.fin_ip = ip;
1127         fnew.fin_mp = mpp;
1128         fnew.fin_hlen = hlen;
1129         fnew.fin_dp = (char *)ip + hlen;
1130         fnew.fin_ifs = fin->fin_ifs;
1131         (void) fr_makefrip(hlen, ip, &fnew);
1132 
1133         i = fr_fastroute(m, mpp, &fnew, NULL);
1134         return i;
1135 }
1136 
1137 
1138 int fr_send_icmp_err(type, fin, dst)
1139 int type;
1140 fr_info_t *fin;
1141 int dst;
1142 {
1143         struct in_addr dst4;
1144         struct icmp *icmp;
1145         qpktinfo_t *qpi;
1146         int hlen, code;
1147         phy_if_t phy;
1148         u_short sz;
1149 #ifdef  USE_INET6
1150         mblk_t *mb;
1151 #endif
1152         mblk_t *m;
1153 #ifdef  USE_INET6
1154         ip6_t *ip6;
1155 #endif
1156         ip_t *ip;
1157         ipf_stack_t *ifs = fin->fin_ifs;
1158 
1159         if ((type < 0) || (type > ICMP_MAXTYPE))
1160                 return -1;
1161 
1162         code = fin->fin_icode;
1163 #ifdef USE_INET6
1164         if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1165                 return -1;
1166 #endif
1167 
1168 #ifndef IPFILTER_CKSUM
1169         if (fr_checkl4sum(fin) == -1)
1170                 return -1;
1171 #endif
1172 
1173         qpi = fin->fin_qpi;
1174 
1175 #ifdef  USE_INET6
1176         mb = fin->fin_qfm;
1177 
1178         if (fin->fin_v == 6) {
1179                 sz = sizeof(ip6_t);
1180                 sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1181                 hlen = sizeof(ip6_t);
1182                 type = icmptoicmp6types[type];
1183                 if (type == ICMP6_DST_UNREACH)
1184                         code = icmptoicmp6unreach[code];
1185         } else
1186 #endif
1187         {
1188                 if ((fin->fin_p == IPPROTO_ICMP) &&
1189                     !(fin->fin_flx & FI_SHORT))
1190                         switch (ntohs(fin->fin_data[0]) >> 8)
1191                         {
1192                         case ICMP_ECHO :
1193                         case ICMP_TSTAMP :
1194                         case ICMP_IREQ :
1195                         case ICMP_MASKREQ :
1196                                 break;
1197                         default :
1198                                 return 0;
1199                         }
1200 
1201                 sz = sizeof(ip_t) * 2;
1202                 sz += 8;                /* 64 bits of data */
1203                 hlen = sizeof(ip_t);
1204         }
1205 
1206         sz += offsetof(struct icmp, icmp_ip);
1207         if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1208                 return -1;
1209         MTYPE(m) = M_DATA;
1210         m->b_rptr += 64;
1211         m->b_wptr = m->b_rptr + sz;
1212         bzero((char *)m->b_rptr, (size_t)sz);
1213         ip = (ip_t *)m->b_rptr;
1214         ip->ip_v = fin->fin_v;
1215         icmp = (struct icmp *)(m->b_rptr + hlen);
1216         icmp->icmp_type = type & 0xff;
1217         icmp->icmp_code = code & 0xff;
1218         phy = (phy_if_t)qpi->qpi_ill; 
1219         if (type == ICMP_UNREACH && (phy != 0) && 
1220             fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1221                 icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1222 
1223 #ifdef  USE_INET6
1224         if (fin->fin_v == 6) {
1225                 struct in6_addr dst6;
1226                 int csz;
1227 
1228                 if (dst == 0) {
1229                         ipf_stack_t *ifs = fin->fin_ifs;
1230 
1231                         if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1232                                        (void *)&dst6, NULL, ifs) == -1) {
1233                                 FREE_MB_T(m);
1234                                 return -1;
1235                         }
1236                 } else
1237                         dst6 = fin->fin_dst6.in6;
1238 
1239                 csz = sz;
1240                 sz -= sizeof(ip6_t);
1241                 ip6 = (ip6_t *)m->b_rptr;
1242                 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1243                 ip6->ip6_plen = htons((u_short)sz);
1244                 ip6->ip6_nxt = IPPROTO_ICMPV6;
1245                 ip6->ip6_src = dst6;
1246                 ip6->ip6_dst = fin->fin_src6.in6;
1247                 sz -= offsetof(struct icmp, icmp_ip);
1248                 bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1249                 icmp->icmp_cksum = csz - sizeof(ip6_t);
1250         } else
1251 #endif
1252         {
1253                 ip->ip_hl = sizeof(*ip) >> 2;
1254                 ip->ip_p = IPPROTO_ICMP;
1255                 ip->ip_id = fin->fin_ip->ip_id;
1256                 ip->ip_tos = fin->fin_ip->ip_tos;
1257                 ip->ip_len = (u_short)sz;
1258                 if (dst == 0) {
1259                         ipf_stack_t *ifs = fin->fin_ifs;
1260 
1261                         if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1262                                        (void *)&dst4, NULL, ifs) == -1) {
1263                                 FREE_MB_T(m);
1264                                 return -1;
1265                         }
1266                 } else {
1267                         dst4 = fin->fin_dst;
1268                 }
1269                 ip->ip_src = dst4;
1270                 ip->ip_dst = fin->fin_src;
1271                 bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1272                       sizeof(*fin->fin_ip));
1273                 bcopy((char *)fin->fin_ip + fin->fin_hlen,
1274                       (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1275                 icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1276                 icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1277                 icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1278                                              sz - sizeof(ip_t));
1279         }
1280 
1281         /*
1282          * Need to exit out of these so we don't recursively call rw_enter
1283          * from fr_qout.
1284          */
1285         return fr_send_ip(fin, m, &m);
1286 }
1287 
1288 #include <sys/time.h>
1289 #include <sys/varargs.h>
1290 
1291 #ifndef _KERNEL
1292 #include <stdio.h>
1293 #endif
1294 
1295 #define NULLADDR_RATE_LIMIT 10  /* 10 seconds */
1296 
1297 
1298 /*
1299  * Print out warning message at rate-limited speed.
1300  */
1301 static void rate_limit_message(ipf_stack_t *ifs,
1302                                int rate, const char *message, ...)
1303 {
1304         static time_t last_time = 0;
1305         time_t now;
1306         va_list args;
1307         char msg_buf[256];
1308         int  need_printed = 0;
1309 
1310         now = ddi_get_time();
1311 
1312         /* make sure, no multiple entries */
1313         ASSERT(MUTEX_NOT_HELD(&(ifs->ifs_ipf_rw.ipf_lk)));
1314         MUTEX_ENTER(&ifs->ifs_ipf_rw);
1315         if (now - last_time >= rate) {
1316                 need_printed = 1;
1317                 last_time = now;
1318         }
1319         MUTEX_EXIT(&ifs->ifs_ipf_rw);
1320 
1321         if (need_printed) {
1322                 va_start(args, message);
1323                 (void)vsnprintf(msg_buf, 255, message, args);
1324                 va_end(args);
1325 #ifdef _KERNEL
1326                 cmn_err(CE_WARN, msg_buf);
1327 #else
1328                 fprintf(std_err, msg_buf);
1329 #endif
1330         }
1331 }
1332 
1333 /*
1334  * Return the first IP Address associated with an interface
1335  * For IPv6, we walk through the list of logical interfaces and return
1336  * the address of the first one that isn't a link-local interface.
1337  * We can't assume that it is :1 because another link-local address
1338  * may have been assigned there.
1339  */
1340 /*ARGSUSED*/
1341 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1342 int v, atype;
1343 void *ifptr;
1344 struct in_addr  *inp, *inpmask;
1345 ipf_stack_t *ifs;
1346 {
1347         struct sockaddr_in6 v6addr[2];
1348         struct sockaddr_in v4addr[2];
1349         net_ifaddr_t type[2];
1350         net_handle_t net_data;
1351         phy_if_t phyif;
1352         void *array;
1353 
1354         switch (v)
1355         {
1356         case 4:
1357                 net_data = ifs->ifs_ipf_ipv4;
1358                 array = v4addr;
1359                 break;
1360         case 6:
1361                 net_data = ifs->ifs_ipf_ipv6;
1362                 array = v6addr;
1363                 break;
1364         default:
1365                 net_data = NULL;
1366                 break;
1367         }
1368 
1369         if (net_data == NULL)
1370                 return -1;
1371 
1372         phyif = (phy_if_t)ifptr;
1373 
1374         switch (atype)
1375         {
1376         case FRI_PEERADDR :
1377                 type[0] = NA_PEER;
1378                 break;
1379 
1380         case FRI_BROADCAST :
1381                 type[0] = NA_BROADCAST;
1382                 break;
1383 
1384         default :
1385                 type[0] = NA_ADDRESS;
1386                 break;
1387         }
1388 
1389         type[1] = NA_NETMASK;
1390 
1391         if (v == 6) {
1392                 lif_if_t idx = 0;
1393 
1394                 do {
1395                         idx = net_lifgetnext(net_data, phyif, idx);
1396                         if (net_getlifaddr(net_data, phyif, idx, 2, type,
1397                                            array) < 0)
1398                                 return -1;
1399                         if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1400                             !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1401                                 break;
1402                 } while (idx != 0);
1403 
1404                 if (idx == 0)
1405                         return -1;
1406 
1407                 return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1408                                         inp, inpmask);
1409         }
1410 
1411         if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1412                 return -1;
1413 
1414         return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1415 }
1416 
1417 
1418 u_32_t fr_newisn(fin)
1419 fr_info_t *fin;
1420 {
1421         static int iss_seq_off = 0;
1422         u_char hash[16];
1423         u_32_t newiss;
1424         MD5_CTX ctx;
1425         ipf_stack_t *ifs = fin->fin_ifs;
1426 
1427         /*
1428          * Compute the base value of the ISS.  It is a hash
1429          * of (saddr, sport, daddr, dport, secret).
1430          */
1431         MD5Init(&ctx);
1432 
1433         MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1434                   sizeof(fin->fin_fi.fi_src));
1435         MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1436                   sizeof(fin->fin_fi.fi_dst));
1437         MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1438 
1439         MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1440 
1441         MD5Final(hash, &ctx);
1442 
1443         bcopy(hash, &newiss, sizeof(newiss));
1444 
1445         /*
1446          * Now increment our "timer", and add it in to
1447          * the computed value.
1448          *
1449          * XXX Use `addin'?
1450          * XXX TCP_ISSINCR too large to use?
1451          */
1452         iss_seq_off += 0x00010000;
1453         newiss += iss_seq_off;
1454         return newiss;
1455 }
1456 
1457 
1458 /* ------------------------------------------------------------------------ */
1459 /* Function:    fr_nextipid                                                 */
1460 /* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
1461 /* Parameters:  fin(I) - pointer to packet information                      */
1462 /*                                                                          */
1463 /* Returns the next IPv4 ID to use for this packet.                         */
1464 /* ------------------------------------------------------------------------ */
1465 u_short fr_nextipid(fin)
1466 fr_info_t *fin;
1467 {
1468         static u_short ipid = 0;
1469         u_short id;
1470         ipf_stack_t *ifs = fin->fin_ifs;
1471 
1472         MUTEX_ENTER(&ifs->ifs_ipf_rw);
1473         if (fin->fin_pktnum != 0) {
1474                 id = fin->fin_pktnum & 0xffff;
1475         } else {
1476                 id = ipid++;
1477         }
1478         MUTEX_EXIT(&ifs->ifs_ipf_rw);
1479 
1480         return id;
1481 }
1482 
1483 
1484 #ifndef IPFILTER_CKSUM
1485 /* ARGSUSED */
1486 #endif
1487 INLINE void fr_checkv4sum(fin)
1488 fr_info_t *fin;
1489 {
1490 #ifdef IPFILTER_CKSUM
1491         if (fr_checkl4sum(fin) == -1)
1492                 fin->fin_flx |= FI_BAD;
1493 #endif
1494 }
1495 
1496 
1497 #ifdef USE_INET6
1498 # ifndef IPFILTER_CKSUM
1499 /* ARGSUSED */
1500 # endif
1501 INLINE void fr_checkv6sum(fin)
1502 fr_info_t *fin;
1503 {
1504 # ifdef IPFILTER_CKSUM
1505         if (fr_checkl4sum(fin) == -1)
1506                 fin->fin_flx |= FI_BAD;
1507 # endif
1508 }
1509 #endif /* USE_INET6 */
1510 
1511 
1512 #if (SOLARIS2 < 7)
1513 void fr_slowtimer()
1514 #else
1515 /*ARGSUSED*/
1516 void fr_slowtimer __P((void *arg))
1517 #endif
1518 {
1519         ipf_stack_t *ifs = arg;
1520 
1521         READ_ENTER(&ifs->ifs_ipf_global);
1522         if (ifs->ifs_fr_running != 1) {
1523                 ifs->ifs_fr_timer_id = NULL;
1524                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1525                 return;
1526         }
1527         ipf_expiretokens(ifs);
1528         fr_fragexpire(ifs);
1529         fr_timeoutstate(ifs);
1530         fr_natexpire(ifs);
1531         fr_authexpire(ifs);
1532         ifs->ifs_fr_ticks++;
1533         if (ifs->ifs_fr_running == 1)
1534                 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1535                     drv_usectohz(500000));
1536         else
1537                 ifs->ifs_fr_timer_id = NULL;
1538         RWLOCK_EXIT(&ifs->ifs_ipf_global);
1539 }
1540 
1541 
1542 /* ------------------------------------------------------------------------ */
1543 /* Function:    fr_pullup                                                   */
1544 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
1545 /* Parameters:  m(I)   - pointer to buffer where data packet starts         */
1546 /*              fin(I) - pointer to packet information                      */
1547 /*              len(I) - number of bytes to pullup                          */
1548 /*                                                                          */
1549 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1550 /* single buffer for ease of access.  Operating system native functions are */
1551 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
1552 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has  */
1553 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
1554 /* and ONLY if the pullup succeeds.                                         */
1555 /*                                                                          */
1556 /* We assume that 'min' is a pointer to a buffer that is part of the chain  */
1557 /* of buffers that starts at *fin->fin_mp.                                  */
1558 /* ------------------------------------------------------------------------ */
1559 void *fr_pullup(min, fin, len)
1560 mb_t *min;
1561 fr_info_t *fin;
1562 int len;
1563 {
1564         qpktinfo_t *qpi = fin->fin_qpi;
1565         int out = fin->fin_out, dpoff, ipoff;
1566         mb_t *m = min, *m1, *m2;
1567         char *ip;
1568         uint32_t start, stuff, end, value, flags;
1569         ipf_stack_t *ifs = fin->fin_ifs;
1570 
1571         if (m == NULL)
1572                 return NULL;
1573 
1574         ip = (char *)fin->fin_ip;
1575         if ((fin->fin_flx & FI_COALESCE) != 0)
1576                 return ip;
1577 
1578         ipoff = fin->fin_ipoff;
1579         if (fin->fin_dp != NULL)
1580                 dpoff = (char *)fin->fin_dp - (char *)ip;
1581         else
1582                 dpoff = 0;
1583 
1584         if (M_LEN(m) < len + ipoff) {
1585 
1586                 /*
1587                  * pfil_precheck ensures the IP header is on a 32bit
1588                  * aligned address so simply fail if that isn't currently
1589                  * the case (should never happen).
1590                  */
1591                 int inc = 0;
1592 
1593                 if (ipoff > 0) {
1594                         if ((ipoff & 3) != 0) {
1595                                 inc = 4 - (ipoff & 3);
1596                                 if (m->b_rptr - inc >= m->b_datap->db_base)
1597                                         m->b_rptr -= inc;
1598                                 else
1599                                         inc = 0;
1600                         }
1601                 }
1602 
1603                 /*
1604                  * XXX This is here as a work around for a bug with DEBUG
1605                  * XXX Solaris kernels.  The problem is b_prev is used by IP
1606                  * XXX code as a way to stash the phyint_index for a packet,
1607                  * XXX this doesn't get reset by IP but freeb does an ASSERT()
1608                  * XXX for both of these to be NULL.  See 6442390.
1609                  */
1610                 m1 = m;
1611                 m2 = m->b_prev;
1612 
1613                 do {
1614                         m1->b_next = NULL;
1615                         m1->b_prev = NULL;
1616                         m1 = m1->b_cont;
1617                 } while (m1);
1618 
1619                 /*
1620                  * Need to preserve checksum information by copying them
1621                  * to newmp which heads the pulluped message.
1622                  */
1623                 hcksum_retrieve(m, NULL, NULL, &start, &stuff, &end,
1624                     &value, &flags);
1625 
1626                 if (pullupmsg(m, len + ipoff + inc) == 0) {
1627                         ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
1628                         FREE_MB_T(*fin->fin_mp);
1629                         *fin->fin_mp = NULL;
1630                         fin->fin_m = NULL;
1631                         fin->fin_ip = NULL;
1632                         fin->fin_dp = NULL;
1633                         qpi->qpi_data = NULL;
1634                         return NULL;
1635                 }
1636 
1637                 (void) hcksum_assoc(m, NULL, NULL, start, stuff, end,
1638                     value, flags, 0);
1639 
1640                 m->b_prev = m2;
1641                 m->b_rptr += inc;
1642                 fin->fin_m = m;
1643                 ip = MTOD(m, char *) + ipoff;
1644                 qpi->qpi_data = ip;
1645         }
1646 
1647         ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
1648         fin->fin_ip = (ip_t *)ip;
1649         if (fin->fin_dp != NULL)
1650                 fin->fin_dp = (char *)fin->fin_ip + dpoff;
1651 
1652         if (len == fin->fin_plen)
1653                 fin->fin_flx |= FI_COALESCE;
1654         return ip;
1655 }
1656 
1657 
1658 /*
1659  * Function:    fr_verifysrc
1660  * Returns:     int (really boolean)
1661  * Parameters:  fin - packet information
1662  *
1663  * Check whether the packet has a valid source address for the interface on
1664  * which the packet arrived, implementing the "fr_chksrc" feature.
1665  * Returns true iff the packet's source address is valid.
1666  */
1667 int fr_verifysrc(fin)
1668 fr_info_t *fin;
1669 {
1670         net_handle_t net_data_p;
1671         phy_if_t phy_ifdata_routeto;
1672         struct sockaddr sin;
1673         ipf_stack_t *ifs = fin->fin_ifs;
1674 
1675         if (fin->fin_v == 4) { 
1676                 net_data_p = ifs->ifs_ipf_ipv4;
1677         } else if (fin->fin_v == 6) { 
1678                 net_data_p = ifs->ifs_ipf_ipv6;
1679         } else { 
1680                 return (0); 
1681         }
1682 
1683         /* Get the index corresponding to the if name */
1684         sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1685         bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
1686         phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
1687 
1688         return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0); 
1689 }
1690 
1691 
1692 /*
1693  * Function:    fr_fastroute
1694  * Returns:      0: success;
1695  *              -1: failed
1696  * Parameters:
1697  *      mb: the message block where ip head starts
1698  *      mpp: the pointer to the pointer of the orignal
1699  *              packet message
1700  *      fin: packet information
1701  *      fdp: destination interface information
1702  *      if it is NULL, no interface information provided.
1703  *
1704  * This function is for fastroute/to/dup-to rules. It calls
1705  * pfil_make_lay2_packet to search route, make lay-2 header
1706  * ,and identify output queue for the IP packet.
1707  * The destination address depends on the following conditions:
1708  * 1: for fastroute rule, fdp is passed in as NULL, so the
1709  *      destination address is the IP Packet's destination address
1710  * 2: for to/dup-to rule, if an ip address is specified after
1711  *      the interface name, this address is the as destination
1712  *      address. Otherwise IP Packet's destination address is used
1713  */
1714 int fr_fastroute(mb, mpp, fin, fdp)
1715 mblk_t *mb, **mpp;
1716 fr_info_t *fin;
1717 frdest_t *fdp;
1718 {
1719         net_handle_t net_data_p;
1720         net_inject_t *inj;
1721         mblk_t *mp = NULL;
1722         frentry_t *fr = fin->fin_fr;
1723         qpktinfo_t *qpi;
1724         ip_t *ip;
1725 
1726         struct sockaddr_in *sin;
1727         struct sockaddr_in6 *sin6;
1728         struct sockaddr *sinp;
1729         ipf_stack_t *ifs = fin->fin_ifs;
1730 #ifndef sparc
1731         u_short __iplen, __ipoff;
1732 #endif
1733 
1734         if (fin->fin_v == 4) {
1735                 net_data_p = ifs->ifs_ipf_ipv4;
1736         } else if (fin->fin_v == 6) {
1737                 net_data_p = ifs->ifs_ipf_ipv6;
1738         } else {
1739                 return (-1);
1740         }
1741 
1742         inj = net_inject_alloc(NETINFO_VERSION);
1743         if (inj == NULL)
1744                 return -1;
1745 
1746         ip = fin->fin_ip;
1747         qpi = fin->fin_qpi;
1748 
1749         /*
1750          * If this is a duplicate mblk then we want ip to point at that
1751          * data, not the original, if and only if it is already pointing at
1752          * the current mblk data.
1753          *
1754          * Otherwise, if it's not a duplicate, and we're not already pointing
1755          * at the current mblk data, then we want to ensure that the data
1756          * points at ip.
1757          */
1758 
1759         if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
1760                 ip = (ip_t *)mb->b_rptr;
1761         } else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
1762                 qpi->qpi_m->b_rptr = (uchar_t *)ip;
1763                 qpi->qpi_off = 0;
1764         }
1765 
1766         /*
1767          * If there is another M_PROTO, we don't want it
1768          */
1769         if (*mpp != mb) {
1770                 mp = unlinkb(*mpp);
1771                 freeb(*mpp);
1772                 *mpp = mp;
1773         }
1774 
1775         sinp = (struct sockaddr *)&inj->ni_addr;
1776         sin = (struct sockaddr_in *)sinp;
1777         sin6 = (struct sockaddr_in6 *)sinp;
1778         bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
1779         inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1780         inj->ni_packet = mb;
1781 
1782         /*
1783          * In case we're here due to "to <if>" being used with
1784          * "keep state", check that we're going in the correct
1785          * direction.
1786          */
1787         if (fdp != NULL) {
1788                 if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
1789                         (fin->fin_rev != 0) && (fdp == &fr->fr_tif))
1790                         goto bad_fastroute;
1791                 inj->ni_physical = (phy_if_t)fdp->fd_ifp;
1792                 if (fin->fin_v == 4) {
1793                         sin->sin_addr = fdp->fd_ip;
1794                 } else {
1795                         sin6->sin6_addr = fdp->fd_ip6.in6;
1796                 }
1797         } else {
1798                 if (fin->fin_v == 4) {
1799                         sin->sin_addr = ip->ip_dst;
1800                 } else {
1801                         sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
1802                 }
1803                 inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
1804         }
1805 
1806         /*
1807          * Clear the hardware checksum flags from packets that we are doing
1808          * input processing on as leaving them set will cause the outgoing
1809          * NIC (if it supports hardware checksum) to calculate them anew,
1810          * using the old (correct) checksums as the pseudo value to start
1811          * from.
1812          */
1813         if (fin->fin_out == 0) {
1814                 DB_CKSUMFLAGS(mb) = 0;
1815         }
1816 
1817         *mpp = mb;
1818 
1819         if (fin->fin_out == 0) {
1820                 void *saveifp;
1821                 u_32_t pass;
1822 
1823                 saveifp = fin->fin_ifp;
1824                 fin->fin_ifp = (void *)inj->ni_physical;
1825                 fin->fin_flx &= ~FI_STATE;
1826                 fin->fin_out = 1;
1827                 (void) fr_acctpkt(fin, &pass);
1828                 fin->fin_fr = NULL;
1829                 if (!fr || !(fr->fr_flags & FR_RETMASK))
1830                         (void) fr_checkstate(fin, &pass);
1831                 if (fr_checknatout(fin, NULL) == -1)
1832                         goto bad_fastroute;
1833                 fin->fin_out = 0;
1834                 fin->fin_ifp = saveifp;
1835         }
1836 #ifndef sparc
1837         if (fin->fin_v == 4) {
1838                 __iplen = (u_short)ip->ip_len,
1839                 __ipoff = (u_short)ip->ip_off;
1840 
1841                 ip->ip_len = htons(__iplen);
1842                 ip->ip_off = htons(__ipoff);
1843         }
1844 #endif
1845 
1846         if (net_data_p) {
1847                 if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
1848                         net_inject_free(inj);
1849                         return (-1);
1850                 }
1851         }
1852 
1853         ifs->ifs_fr_frouteok[0]++;
1854         net_inject_free(inj);
1855         return 0;
1856 bad_fastroute:
1857         net_inject_free(inj);
1858         freemsg(mb);
1859         ifs->ifs_fr_frouteok[1]++;
1860         return -1;
1861 }
1862 
1863 
1864 /* ------------------------------------------------------------------------ */
1865 /* Function:    ipf_hook4_out                                               */
1866 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1867 /* Parameters:  event(I)     - pointer to event                             */
1868 /*              info(I)      - pointer to hook information for firewalling  */
1869 /*                                                                          */
1870 /* Calling ipf_hook.                                                        */
1871 /* ------------------------------------------------------------------------ */
1872 /*ARGSUSED*/
1873 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
1874 {
1875         return ipf_hook(info, 1, 0, arg);
1876 }
1877 /*ARGSUSED*/
1878 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
1879 {
1880         return ipf_hook6(info, 1, 0, arg);
1881 }
1882 
1883 /* ------------------------------------------------------------------------ */
1884 /* Function:    ipf_hook4_in                                                */
1885 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1886 /* Parameters:  event(I)     - pointer to event                             */
1887 /*              info(I)      - pointer to hook information for firewalling  */
1888 /*                                                                          */
1889 /* Calling ipf_hook.                                                        */
1890 /* ------------------------------------------------------------------------ */
1891 /*ARGSUSED*/
1892 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
1893 {
1894         return ipf_hook(info, 0, 0, arg);
1895 }
1896 /*ARGSUSED*/
1897 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
1898 {
1899         return ipf_hook6(info, 0, 0, arg);
1900 }
1901 
1902 
1903 /* ------------------------------------------------------------------------ */
1904 /* Function:    ipf_hook4_loop_out                                          */
1905 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1906 /* Parameters:  event(I)     - pointer to event                             */
1907 /*              info(I)      - pointer to hook information for firewalling  */
1908 /*                                                                          */
1909 /* Calling ipf_hook.                                                        */
1910 /* ------------------------------------------------------------------------ */
1911 /*ARGSUSED*/
1912 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
1913 {
1914         return ipf_hook(info, 1, FI_NOCKSUM, arg);
1915 }
1916 /*ARGSUSED*/
1917 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
1918 {
1919         return ipf_hook6(info, 1, FI_NOCKSUM, arg);
1920 }
1921 
1922 /* ------------------------------------------------------------------------ */
1923 /* Function:    ipf_hook4_loop_in                                           */
1924 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1925 /* Parameters:  event(I)     - pointer to event                             */
1926 /*              info(I)      - pointer to hook information for firewalling  */
1927 /*                                                                          */
1928 /* Calling ipf_hook.                                                        */
1929 /* ------------------------------------------------------------------------ */
1930 /*ARGSUSED*/
1931 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
1932 {
1933         return ipf_hook(info, 0, FI_NOCKSUM, arg);
1934 }
1935 /*ARGSUSED*/
1936 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
1937 {
1938         return ipf_hook6(info, 0, FI_NOCKSUM, arg);
1939 }
1940 
1941 /* ------------------------------------------------------------------------ */
1942 /* Function:    ipf_hook                                                    */
1943 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1944 /* Parameters:  info(I)      - pointer to hook information for firewalling  */
1945 /*              out(I)       - whether packet is going in or out            */
1946 /*              loopback(I)  - whether packet is a loopback packet or not   */
1947 /*                                                                          */
1948 /* Stepping stone function between the IP mainline and IPFilter.  Extracts  */
1949 /* parameters out of the info structure and forms them up to be useful for  */
1950 /* calling ipfilter.                                                        */
1951 /* ------------------------------------------------------------------------ */
1952 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
1953 {
1954         hook_pkt_event_t *fw;
1955         ipf_stack_t *ifs;
1956         qpktinfo_t qpi;
1957         int rval, hlen;
1958         u_short swap;
1959         phy_if_t phy; 
1960         ip_t *ip;
1961 
1962         ifs = arg;
1963         fw = (hook_pkt_event_t *)info;
1964 
1965         ASSERT(fw != NULL);
1966         phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
1967 
1968         ip = fw->hpe_hdr;
1969         swap = ntohs(ip->ip_len);
1970         ip->ip_len = swap;
1971         swap = ntohs(ip->ip_off);
1972         ip->ip_off = swap;
1973         hlen = IPH_HDR_LENGTH(ip);
1974 
1975         qpi.qpi_m = fw->hpe_mb;
1976         qpi.qpi_data = fw->hpe_hdr;
1977         qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
1978         qpi.qpi_ill = (void *)phy;
1979         qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
1980         if (qpi.qpi_flags)
1981                 qpi.qpi_flags |= FI_MBCAST;
1982         qpi.qpi_flags |= loopback;
1983 
1984         rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
1985             &qpi, fw->hpe_mp, ifs);
1986 
1987         /* For fastroute cases, fr_check returns 0 with mp set to NULL */
1988         if (rval == 0 && *(fw->hpe_mp) == NULL)
1989                 rval = 1;
1990 
1991         /* Notify IP the packet mblk_t and IP header pointers. */
1992         fw->hpe_mb = qpi.qpi_m;
1993         fw->hpe_hdr = qpi.qpi_data;
1994         if (rval == 0) {
1995                 ip = qpi.qpi_data;
1996                 swap = ntohs(ip->ip_len);
1997                 ip->ip_len = swap;
1998                 swap = ntohs(ip->ip_off);
1999                 ip->ip_off = swap;
2000         }
2001         return rval;
2002 
2003 }
2004 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
2005 {
2006         hook_pkt_event_t *fw;
2007         int rval, hlen;
2008         qpktinfo_t qpi;
2009         phy_if_t phy; 
2010 
2011         fw = (hook_pkt_event_t *)info;
2012 
2013         ASSERT(fw != NULL);
2014         phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2015 
2016         hlen = sizeof (ip6_t);
2017 
2018         qpi.qpi_m = fw->hpe_mb;
2019         qpi.qpi_data = fw->hpe_hdr;
2020         qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2021         qpi.qpi_ill = (void *)phy;
2022         qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2023         if (qpi.qpi_flags)
2024                 qpi.qpi_flags |= FI_MBCAST;
2025         qpi.qpi_flags |= loopback;
2026 
2027         rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2028             &qpi, fw->hpe_mp, arg);
2029 
2030         /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2031         if (rval == 0 && *(fw->hpe_mp) == NULL)
2032                 rval = 1;
2033 
2034         /* Notify IP the packet mblk_t and IP header pointers. */
2035         fw->hpe_mb = qpi.qpi_m;
2036         fw->hpe_hdr = qpi.qpi_data;
2037         return rval;
2038 
2039 }
2040 
2041 
2042 /* ------------------------------------------------------------------------ */
2043 /* Function:    ipf_nic_event_v4                                            */
2044 /* Returns:     int - 0 == no problems encountered                          */
2045 /* Parameters:  event(I)     - pointer to event                             */
2046 /*              info(I)      - pointer to information about a NIC event     */
2047 /*                                                                          */
2048 /* Function to receive asynchronous NIC events from IP                      */
2049 /* ------------------------------------------------------------------------ */
2050 /*ARGSUSED*/
2051 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2052 {
2053         struct sockaddr_in *sin;
2054         hook_nic_event_t *hn;
2055         ipf_stack_t *ifs = arg;
2056         void *new_ifp = NULL;
2057 
2058         if (ifs->ifs_fr_running <= 0)
2059                 return (0);
2060 
2061         hn = (hook_nic_event_t *)info;
2062 
2063         switch (hn->hne_event)
2064         {
2065         case NE_PLUMB :
2066                 frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2067                        ifs);
2068                 fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2069                               hn->hne_data, ifs);
2070                 fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2071                              hn->hne_data, ifs);
2072                 break;
2073 
2074         case NE_UNPLUMB :
2075                 frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2076                 fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2077                               ifs);
2078                 fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2079                 break;
2080 
2081         case NE_ADDRESS_CHANGE :
2082                 /*
2083                  * We only respond to events for logical interface 0 because
2084                  * IPFilter only uses the first address given to a network
2085                  * interface.  We check for hne_lif==1 because the netinfo
2086                  * code maps adds 1 to the lif number so that it can return
2087                  * 0 to indicate "no more lifs" when walking them.
2088                  */
2089                 if (hn->hne_lif == 1) {
2090                         frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2091                             ifs);
2092                         sin = hn->hne_data;
2093                         fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2094                             ifs);
2095                 }
2096                 break;
2097 
2098 #if SOLARIS2 >= 10
2099         case NE_IFINDEX_CHANGE :
2100                 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2101 
2102                 if (hn->hne_data != NULL) {
2103                         /*
2104                          * The netinfo passes interface index as int (hne_data should be
2105                          * handled as a pointer to int), which is always 32bit. We need to
2106                          * convert it to void pointer here, since interfaces are
2107                          * represented as pointers to void in IPF. The pointers are 64 bits
2108                          * long on 64bit platforms. Doing something like
2109                          *      (void *)((int) x)
2110                          * will throw warning:
2111                          *   "cast to pointer from integer of different size"
2112                          * during 64bit compilation.
2113                          *
2114                          * The line below uses (size_t) to typecast int to
2115                          * size_t, which might be 64bit/32bit (depending
2116                          * on architecture). Once we have proper 64bit/32bit
2117                          * type (size_t), we can safely convert it to void pointer.
2118                          */
2119                         new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2120                         fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2121                         fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2122                         fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2123                 }
2124                 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2125                 break;
2126 #endif
2127 
2128         default :
2129                 break;
2130         }
2131 
2132         return 0;
2133 }
2134 
2135 
2136 /* ------------------------------------------------------------------------ */
2137 /* Function:    ipf_nic_event_v6                                            */
2138 /* Returns:     int - 0 == no problems encountered                          */
2139 /* Parameters:  event(I)     - pointer to event                             */
2140 /*              info(I)      - pointer to information about a NIC event     */
2141 /*                                                                          */
2142 /* Function to receive asynchronous NIC events from IP                      */
2143 /* ------------------------------------------------------------------------ */
2144 /*ARGSUSED*/
2145 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2146 {
2147         struct sockaddr_in6 *sin6;
2148         hook_nic_event_t *hn;
2149         ipf_stack_t *ifs = arg;
2150         void *new_ifp = NULL;
2151 
2152         if (ifs->ifs_fr_running <= 0)
2153                 return (0);
2154 
2155         hn = (hook_nic_event_t *)info;
2156 
2157         switch (hn->hne_event)
2158         {
2159         case NE_PLUMB :
2160                 frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2161                        hn->hne_data, ifs);
2162                 fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2163                               hn->hne_data, ifs);
2164                 fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2165                              hn->hne_data, ifs);
2166                 break;
2167 
2168         case NE_UNPLUMB :
2169                 frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2170                 fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2171                               ifs);
2172                 fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2173                 break;
2174 
2175         case NE_ADDRESS_CHANGE :
2176                 if (hn->hne_lif == 1) {
2177                         sin6 = hn->hne_data;
2178                         fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2179                                        ifs);
2180                 }
2181                 break;
2182 
2183 #if SOLARIS2 >= 10
2184         case NE_IFINDEX_CHANGE :
2185                 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2186                 if (hn->hne_data != NULL) {
2187                         /*
2188                          * The netinfo passes interface index as int (hne_data should be
2189                          * handled as a pointer to int), which is always 32bit. We need to
2190                          * convert it to void pointer here, since interfaces are
2191                          * represented as pointers to void in IPF. The pointers are 64 bits
2192                          * long on 64bit platforms. Doing something like
2193                          *      (void *)((int) x)
2194                          * will throw warning:
2195                          *   "cast to pointer from integer of different size"
2196                          * during 64bit compilation.
2197                          *
2198                          * The line below uses (size_t) to typecast int to
2199                          * size_t, which might be 64bit/32bit (depending
2200                          * on architecture). Once we have proper 64bit/32bit
2201                          * type (size_t), we can safely convert it to void pointer.
2202                          */
2203                         new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2204                         fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2205                         fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2206                         fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2207                 }
2208                 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2209                 break;
2210 #endif
2211 
2212         default :
2213                 break;
2214         }
2215 
2216         return 0;
2217 }
2218 
2219 /*
2220  * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2221  * are needed in Solaris kernel only. We don't need them in
2222  * ipftest to pretend the ICMP/RST packet was sent as a response.
2223  */
2224 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2225 /* ------------------------------------------------------------------------ */
2226 /* Function:    fr_make_rst                                                 */
2227 /* Returns:     int - 0 on success, -1 on failure                           */
2228 /* Parameters:  fin(I) - pointer to packet information                      */
2229 /*                                                                          */
2230 /* We must alter the original mblks passed to IPF from IP stack via         */
2231 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations.  */
2232 /* IPF can basicaly do only these things with mblk representing the packet: */
2233 /*      leave it as it is (pass the packet)                                 */
2234 /*                                                                          */
2235 /*      discard it (block the packet)                                       */
2236 /*                                                                          */
2237 /*      alter it (i.e. NAT)                                                 */
2238 /*                                                                          */
2239 /* As you can see IPF can not simply discard the mblk and supply a new one  */
2240 /* instead to IP stack via FW_HOOKS.                                        */
2241 /*                                                                          */
2242 /* The return-rst action for packets coming via NIC is handled as follows:  */
2243 /*      mblk with packet is discarded                                       */
2244 /*                                                                          */
2245 /*      new mblk with RST response is constructed and injected to network   */
2246 /*                                                                          */
2247 /* IPF can't inject packets to loopback interface, this is just another     */
2248 /* limitation we have to deal with here. The only option to send RST        */
2249 /* response to offending TCP packet coming via loopback is to alter it.     */
2250 /*                                                                          */
2251 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on      */
2252 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to     */
2253 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers.                            */
2254 /* ------------------------------------------------------------------------ */
2255 int fr_make_rst(fin)
2256 fr_info_t *fin;
2257 {
2258         uint16_t tmp_port;
2259         int rv = -1;
2260         uint32_t old_ack;
2261         tcphdr_t *tcp = NULL;
2262         struct in_addr tmp_src;
2263 #ifdef USE_INET6
2264         struct in6_addr tmp_src6;
2265 #endif
2266         
2267         ASSERT(fin->fin_p == IPPROTO_TCP);
2268 
2269         /*
2270          * We do not need to adjust chksum, since it is not being checked by
2271          * Solaris IP stack for loopback clients.
2272          */
2273         if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2274             ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2275 
2276                 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2277                         /* Swap IPv4 addresses. */
2278                         tmp_src = fin->fin_ip->ip_src;
2279                         fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2280                         fin->fin_ip->ip_dst = tmp_src;
2281 
2282                         rv = 0;
2283                 }
2284                 else
2285                         tcp = NULL;
2286         }
2287 #ifdef USE_INET6
2288         else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2289             ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2290                 /*
2291                  * We are relying on fact the next header is TCP, which is true
2292                  * for regular TCP packets coming in over loopback.
2293                  */
2294                 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2295                         /* Swap IPv6 addresses. */
2296                         tmp_src6 = fin->fin_ip6->ip6_src;
2297                         fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2298                         fin->fin_ip6->ip6_dst = tmp_src6;
2299 
2300                         rv = 0;
2301                 }
2302                 else
2303                         tcp = NULL;
2304         }
2305 #endif
2306 
2307         if (tcp != NULL) {
2308                 /* 
2309                  * Adjust TCP header:
2310                  *      swap ports,
2311                  *      set flags,
2312                  *      set correct ACK number
2313                  */
2314                 tmp_port = tcp->th_sport;
2315                 tcp->th_sport = tcp->th_dport;
2316                 tcp->th_dport = tmp_port;
2317                 old_ack = tcp->th_ack;
2318                 tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2319                 tcp->th_seq = old_ack;
2320                 tcp->th_flags = TH_RST | TH_ACK;
2321         }
2322 
2323         return (rv);
2324 }
2325 
2326 /* ------------------------------------------------------------------------ */
2327 /* Function:    fr_make_icmp_v4                                             */
2328 /* Returns:     int - 0 on success, -1 on failure                           */
2329 /* Parameters:  fin(I) - pointer to packet information                      */
2330 /*                                                                          */
2331 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2332 /* what is going to happen here and why. Once you read the comment there,   */
2333 /* continue here with next paragraph.                                       */
2334 /*                                                                          */
2335 /* To turn IPv4 packet into ICMPv4 response packet, these things must       */
2336 /* happen here:                                                             */
2337 /*      (1) Original mblk is copied (duplicated).                           */
2338 /*                                                                          */
2339 /*      (2) ICMP header is created.                                         */
2340 /*                                                                          */
2341 /*      (3) Link ICMP header with copy of original mblk, we have ICMPv4     */
2342 /*          data ready then.                                                */
2343 /*                                                                          */
2344 /*      (4) Swap IP addresses in original mblk and adjust IP header data.   */
2345 /*                                                                          */
2346 /*      (5) The mblk containing original packet is trimmed to contain IP    */
2347 /*          header only and ICMP chksum is computed.                        */
2348 /*                                                                          */
2349 /*      (6) The ICMP header we have from (3) is linked to original mblk,    */
2350 /*          which now contains new IP header. If original packet was spread */
2351 /*          over several mblks, only the first mblk is kept.                */
2352 /* ------------------------------------------------------------------------ */
2353 static int fr_make_icmp_v4(fin)
2354 fr_info_t *fin;
2355 {
2356         struct in_addr tmp_src;
2357         tcphdr_t *tcp;
2358         struct icmp *icmp;
2359         mblk_t *mblk_icmp;
2360         mblk_t *mblk_ip;
2361         size_t icmp_pld_len;    /* octets to append to ICMP header */
2362         size_t orig_iphdr_len;  /* length of IP header only */
2363         uint32_t sum;
2364         uint16_t *buf;
2365         int len;
2366 
2367 
2368         if (fin->fin_v != 4)
2369                 return (-1);
2370 
2371         /*
2372          * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2373          * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2374          */
2375         tcp = (tcphdr_t *) fin->fin_dp;
2376 
2377         if ((fin->fin_p == IPPROTO_TCP) && 
2378             ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2379                 return (-1);
2380 
2381         /*
2382          * Step (1)
2383          *
2384          * Make copy of original mblk.
2385          *
2386          * We want to copy as much data as necessary, not less, not more.  The
2387          * ICMPv4 payload length for unreachable messages is:
2388          *      original IP header + 8 bytes of L4 (if there are any).
2389          *
2390          * We determine if there are at least 8 bytes of L4 data following IP
2391          * header first.
2392          */
2393         icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
2394                 ICMPERR_ICMPHLEN : fin->fin_dlen;
2395         /*
2396          * Since we don't want to copy more data than necessary, we must trim
2397          * the original mblk here.  The right way (STREAMish) would be to use
2398          * adjmsg() to trim it.  However we would have to calculate the length
2399          * argument for adjmsg() from pointers we already have here.
2400          *
2401          * Since we have pointers and offsets, it's faster and easier for
2402          * us to just adjust pointers by hand instead of using adjmsg().
2403          */
2404         fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
2405         fin->fin_m->b_wptr += icmp_pld_len;
2406         icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
2407 
2408         /*
2409          * Also we don't want to copy any L2 stuff, which might precede IP
2410          * header, so we have have to set b_rptr to point to the start of IP
2411          * header.
2412          */
2413         fin->fin_m->b_rptr += fin->fin_ipoff;
2414         if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2415                 return (-1);
2416         fin->fin_m->b_rptr -= fin->fin_ipoff;
2417 
2418         /*
2419          * Step (2)
2420          *
2421          * Create an ICMP header, which will be appened to original mblk later.
2422          * ICMP header is just another mblk.
2423          */
2424         mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
2425         if (mblk_icmp == NULL) {
2426                 FREE_MB_T(mblk_ip);
2427                 return (-1);
2428         }
2429 
2430         MTYPE(mblk_icmp) = M_DATA;
2431         icmp = (struct icmp *) mblk_icmp->b_wptr;
2432         icmp->icmp_type = ICMP_UNREACH;
2433         icmp->icmp_code = fin->fin_icode & 0xFF;
2434         icmp->icmp_void = 0;
2435         icmp->icmp_cksum = 0;
2436         mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
2437 
2438         /*
2439          * Step (3)
2440          *
2441          * Complete ICMP packet - link ICMP header with L4 data from original
2442          * IP packet.
2443          */
2444         linkb(mblk_icmp, mblk_ip);
2445 
2446         /*
2447          * Step (4)
2448          *
2449          * Swap IP addresses and change IP header fields accordingly in
2450          * original IP packet.
2451          *
2452          * There is a rule option return-icmp as a dest for physical
2453          * interfaces. This option becomes useless for loopback, since IPF box
2454          * uses same address as a loopback destination. We ignore the option
2455          * here, the ICMP packet will always look like as it would have been
2456          * sent from the original destination host.
2457          */
2458         tmp_src = fin->fin_ip->ip_src;
2459         fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2460         fin->fin_ip->ip_dst = tmp_src;
2461         fin->fin_ip->ip_p = IPPROTO_ICMP;
2462         fin->fin_ip->ip_sum = 0;
2463 
2464         /*
2465          * Step (5)
2466          *
2467          * We trim the orignal mblk to hold IP header only.
2468          */
2469         fin->fin_m->b_wptr = fin->fin_dp;
2470         orig_iphdr_len = fin->fin_m->b_wptr -
2471                             (fin->fin_m->b_rptr + fin->fin_ipoff);
2472         fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
2473                             orig_iphdr_len);
2474 
2475         /*
2476          * ICMP chksum calculation. The data we are calculating chksum for are
2477          * spread over two mblks, therefore we have to use two for loops.
2478          *
2479          * First for loop computes chksum part for ICMP header.
2480          */
2481         buf = (uint16_t *) icmp;
2482         len = ICMPERR_ICMPHLEN;
2483         for (sum = 0; len > 1; len -= 2)
2484                 sum += *buf++;
2485 
2486         /*
2487          * Here we add chksum part for ICMP payload.
2488          */
2489         len = icmp_pld_len;
2490         buf = (uint16_t *) mblk_ip->b_rptr;
2491         for (; len > 1; len -= 2)
2492                 sum += *buf++;
2493 
2494         /*
2495          * Chksum is done.
2496          */
2497         sum = (sum >> 16) + (sum & 0xffff);
2498         sum += (sum >> 16);
2499         icmp->icmp_cksum = ~sum; 
2500 
2501         /*
2502          * Step (6)
2503          *
2504          * Release all packet mblks, except the first one.
2505          */
2506         if (fin->fin_m->b_cont != NULL) {
2507                 FREE_MB_T(fin->fin_m->b_cont);
2508         }
2509 
2510         /*
2511          * Append ICMP payload to first mblk, which already contains new IP
2512          * header.
2513          */
2514         linkb(fin->fin_m, mblk_icmp);
2515 
2516         return (0);
2517 }
2518 
2519 #ifdef USE_INET6
2520 /* ------------------------------------------------------------------------ */
2521 /* Function:    fr_make_icmp_v6                                             */
2522 /* Returns:     int - 0 on success, -1 on failure                           */
2523 /* Parameters:  fin(I) - pointer to packet information                      */
2524 /*                                                                          */
2525 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2526 /* what and why is going to happen here. Once you read the comment there,   */
2527 /* continue here with next paragraph.                                       */
2528 /*                                                                          */
2529 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response.    */
2530 /* The algorithm is fairly simple:                                          */
2531 /*      1) We need to get copy of complete mblk.                            */
2532 /*                                                                          */
2533 /*      2) New ICMPv6 header is created.                                    */
2534 /*                                                                          */
2535 /*      3) The copy of original mblk with packet is linked to ICMPv6        */
2536 /*         header.                                                          */
2537 /*                                                                          */
2538 /*      4) The checksum must be adjusted.                                   */
2539 /*                                                                          */
2540 /*      5) IP addresses in original mblk are swapped and IP header data     */
2541 /*         are adjusted (protocol number).                                  */
2542 /*                                                                          */
2543 /*      6) Original mblk is trimmed to hold IPv6 header only, then it is    */
2544 /*         linked with the ICMPv6 data we got from (3).                     */
2545 /* ------------------------------------------------------------------------ */
2546 static int fr_make_icmp_v6(fin)
2547 fr_info_t *fin;
2548 {
2549         struct icmp6_hdr *icmp6;
2550         tcphdr_t *tcp;
2551         struct in6_addr tmp_src6;
2552         size_t icmp_pld_len;
2553         mblk_t *mblk_ip, *mblk_icmp;
2554 
2555         if (fin->fin_v != 6)
2556                 return (-1);
2557 
2558         /*
2559          * If we are dealing with TCP, then packet must SYN/FIN to be routed by
2560          * IP stack. If it is not SYN/FIN, then we must drop it silently.
2561          */
2562         tcp = (tcphdr_t *) fin->fin_dp;
2563 
2564         if ((fin->fin_p == IPPROTO_TCP) && 
2565             ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2566                 return (-1);
2567 
2568         /*
2569          * Step (1)
2570          *
2571          * We need to copy complete packet in case of IPv6, no trimming is
2572          * needed (except the L2 headers).
2573          */
2574         icmp_pld_len = M_LEN(fin->fin_m);
2575         fin->fin_m->b_rptr += fin->fin_ipoff;
2576         if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2577                 return (-1);
2578         fin->fin_m->b_rptr -= fin->fin_ipoff;
2579 
2580         /*
2581          * Step (2)
2582          *
2583          * Allocate and create ICMP header.
2584          */
2585         mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
2586                         BPRI_HI);
2587 
2588         if (mblk_icmp == NULL)
2589                 return (-1);
2590         
2591         MTYPE(mblk_icmp) = M_DATA;
2592         icmp6 =  (struct icmp6_hdr *) mblk_icmp->b_wptr;
2593         icmp6->icmp6_type = ICMP6_DST_UNREACH;
2594         icmp6->icmp6_code = fin->fin_icode & 0xFF;
2595         icmp6->icmp6_data32[0] = 0;
2596         mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
2597         
2598         /*
2599          * Step (3)
2600          *
2601          * Link the copy of IP packet to ICMP header.
2602          */
2603         linkb(mblk_icmp, mblk_ip);
2604 
2605         /* 
2606          * Step (4)
2607          *
2608          * Calculate chksum - this is much more easier task than in case of
2609          * IPv4  - ICMPv6 chksum only covers IP addresses, and payload length.
2610          * We are making compensation just for change of packet length.
2611          */
2612         icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
2613 
2614         /*
2615          * Step (5)
2616          *
2617          * Swap IP addresses.
2618          */
2619         tmp_src6 = fin->fin_ip6->ip6_src;
2620         fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2621         fin->fin_ip6->ip6_dst = tmp_src6;
2622 
2623         /*
2624          * and adjust IP header data.
2625          */
2626         fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
2627         fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
2628 
2629         /*
2630          * Step (6)
2631          *
2632          * We must release all linked mblks from original packet and keep only
2633          * the first mblk with IP header to link ICMP data.
2634          */
2635         fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
2636 
2637         if (fin->fin_m->b_cont != NULL) {
2638                 FREE_MB_T(fin->fin_m->b_cont);
2639         }
2640 
2641         /*
2642          * Append ICMP payload to IP header.
2643          */
2644         linkb(fin->fin_m, mblk_icmp);
2645 
2646         return (0);
2647 }
2648 #endif  /* USE_INET6 */
2649 
2650 /* ------------------------------------------------------------------------ */
2651 /* Function:    fr_make_icmp                                                */
2652 /* Returns:     int - 0 on success, -1 on failure                           */
2653 /* Parameters:  fin(I) - pointer to packet information                      */
2654 /*                                                                          */
2655 /* We must alter the original mblks passed to IPF from IP stack via         */
2656 /* FW_HOOKS. The reasons why we must alter packet are discussed within      */
2657 /* comment at fr_make_rst() function.                                       */
2658 /*                                                                          */
2659 /* The fr_make_icmp() function acts as a wrapper, which passes the code     */
2660 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on         */
2661 /* protocol version. However there are some details, which are common to    */
2662 /* both IP versions. The details are going to be explained here.            */
2663 /*                                                                          */
2664 /* The packet looks as follows:                                             */
2665 /*    xxx | IP hdr | IP payload    ...  |                                   */
2666 /*    ^   ^        ^                    ^                                   */
2667 /*    |   |        |                    |                                   */
2668 /*    |   |        |            fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
2669 /*    |   |        |                                                        */
2670 /*    |   |        `- fin_m->fin_dp (in case of IPv4 points to L4 header)   */
2671 /*    |   |                                                                 */
2672 /*    |   `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case  */
2673 /*    |      of loopback)                                                   */
2674 /*    |                                                                     */
2675 /*    `- fin_m->b_rptr -  points to L2 header in case of physical NIC            */
2676 /*                                                                          */
2677 /* All relevant IP headers are pulled up into the first mblk. It happened   */
2678 /* well in advance before the matching rule was found (the rule, which took */
2679 /* us here, to fr_make_icmp() function).                                    */
2680 /*                                                                          */
2681 /* Both functions will turn packet passed in fin->fin_m mblk into a new          */
2682 /* packet. New packet will be represented as chain of mblks.                */
2683 /* orig mblk |- b_cont ---.                                                 */
2684 /*    ^                    `-> ICMP hdr |- b_cont--.                     */
2685 /*    |                           ^                 `-> duped orig mblk          */
2686 /*    |                           |                             ^           */
2687 /*    `- The original mblk        |                             |           */
2688 /*       will be trimmed to       |                             |           */
2689 /*       to contain IP header     |                             |           */
2690 /*       only                     |                             |           */
2691 /*                                |                             |           */
2692 /*                                `- This is newly              |           */
2693 /*                                   allocated mblk to          |           */
2694 /*                                   hold ICMPv6 data.          |           */
2695 /*                                                              |           */
2696 /*                                                              |           */
2697 /*                                                              |           */
2698 /*          This is the copy of original mblk, it will contain -'           */
2699 /*          orignal IP  packet in case of ICMPv6. In case of                */
2700 /*          ICMPv4 it will contain up to 8 bytes of IP payload              */
2701 /*          (TCP/UDP/L4) data from original packet.                         */
2702 /* ------------------------------------------------------------------------ */
2703 int fr_make_icmp(fin)
2704 fr_info_t *fin;
2705 {
2706         int rv;
2707         
2708         if (fin->fin_v == 4)
2709                 rv = fr_make_icmp_v4(fin);
2710 #ifdef USE_INET6
2711         else if (fin->fin_v == 6)
2712                 rv = fr_make_icmp_v6(fin);
2713 #endif
2714         else
2715                 rv = -1;
2716 
2717         return (rv);
2718 }
2719 
2720 /* ------------------------------------------------------------------------ */
2721 /* Function:    fr_buf_sum                                                  */
2722 /* Returns:     unsigned int - sum of buffer buf                            */
2723 /* Parameters:  buf - pointer to buf we want to sum up                      */
2724 /*              len - length of buffer buf                                  */
2725 /*                                                                          */
2726 /* Sums buffer buf. The result is used for chksum calculation. The buf      */
2727 /* argument must be aligned.                                                */
2728 /* ------------------------------------------------------------------------ */
2729 static uint32_t fr_buf_sum(buf, len)
2730 const void *buf;
2731 unsigned int len;
2732 {
2733         uint32_t        sum = 0;
2734         uint16_t        *b = (uint16_t *)buf;
2735 
2736         while (len > 1) {
2737                 sum += *b++;
2738                 len -= 2;
2739         }
2740 
2741         if (len == 1)
2742                 sum += htons((*(unsigned char *)b) << 8);
2743 
2744         return (sum);
2745 }
2746 
2747 /* ------------------------------------------------------------------------ */
2748 /* Function:    fr_calc_chksum                                              */
2749 /* Returns:     void                                                        */
2750 /* Parameters:  fin - pointer to fr_info_t instance with packet data        */
2751 /*              pkt - pointer to duplicated packet                          */
2752 /*                                                                          */
2753 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP        */
2754 /* versions.                                                                */
2755 /* ------------------------------------------------------------------------ */
2756 void fr_calc_chksum(fin, pkt)
2757 fr_info_t *fin;
2758 mb_t *pkt;
2759 {
2760         struct pseudo_hdr {
2761                 union {
2762                         struct in_addr  in4;
2763 #ifdef USE_INET6
2764                         struct in6_addr in6;
2765 #endif
2766                 } src_addr;
2767                 union {
2768                         struct in_addr  in4;
2769 #ifdef USE_INET6
2770                         struct in6_addr in6;
2771 #endif
2772                 } dst_addr;
2773                 char            zero;
2774                 char            proto;
2775                 uint16_t        len;
2776         }       phdr;
2777         uint32_t        sum, ip_sum;
2778         void    *buf;
2779         uint16_t        *l4_csum_p;
2780         tcphdr_t        *tcp;
2781         udphdr_t        *udp;
2782         icmphdr_t       *icmp;
2783 #ifdef USE_INET6
2784         struct icmp6_hdr        *icmp6;
2785 #endif
2786         ip_t            *ip;
2787         unsigned int    len;
2788         int             pld_len;
2789 
2790         /*
2791          * We need to pullup the packet to the single continuous buffer to avoid
2792          * potential misaligment of b_rptr member in mblk chain.
2793          */
2794         if (pullupmsg(pkt, -1) == 0) {
2795                 cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum"
2796                     " will not be computed by IPF");
2797                 return;
2798         }
2799 
2800         /*
2801          * It is guaranteed IP header starts right at b_rptr, because we are
2802          * working with a copy of the original packet.
2803          *
2804          * Compute pseudo header chksum for TCP and UDP.
2805          */
2806         if ((fin->fin_p == IPPROTO_UDP) ||
2807             (fin->fin_p == IPPROTO_TCP)) {
2808                 bzero(&phdr, sizeof (phdr));
2809 #ifdef USE_INET6
2810                 if (fin->fin_v == 6) {
2811                         phdr.src_addr.in6 = fin->fin_srcip6;
2812                         phdr.dst_addr.in6 = fin->fin_dstip6;
2813                 } else {
2814                         phdr.src_addr.in4 = fin->fin_src;
2815                         phdr.dst_addr.in4 = fin->fin_dst;
2816                 }
2817 #else
2818                 phdr.src_addr.in4 = fin->fin_src;
2819                 phdr.dst_addr.in4 = fin->fin_dst;
2820 #endif
2821                 phdr.zero = (char) 0;
2822                 phdr.proto = fin->fin_p;
2823                 phdr.len = htons((uint16_t)fin->fin_dlen);
2824                 sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr));
2825         } else {
2826                 sum = 0;
2827         }
2828 
2829         /*
2830          * Set pointer to the L4 chksum field in the packet, set buf pointer to
2831          * the L4 header start.
2832          */
2833         switch (fin->fin_p) {
2834                 case IPPROTO_UDP:
2835                         udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2836                         l4_csum_p = &udp->uh_sum;
2837                         buf = udp;
2838                         break;
2839                 case IPPROTO_TCP:
2840                         tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2841                         l4_csum_p = &tcp->th_sum;
2842                         buf = tcp;
2843                         break;
2844                 case IPPROTO_ICMP:
2845                         icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2846                         l4_csum_p = &icmp->icmp_cksum;
2847                         buf = icmp;
2848                         break;
2849 #ifdef USE_INET6
2850                 case IPPROTO_ICMPV6:
2851                         icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen);
2852                         l4_csum_p = &icmp6->icmp6_cksum;
2853                         buf = icmp6;
2854                         break;
2855 #endif
2856                 default:
2857                         l4_csum_p = NULL;
2858         }
2859 
2860         /*
2861          * Compute L4 chksum if needed.
2862          */
2863         if (l4_csum_p != NULL) {
2864                 *l4_csum_p = (uint16_t)0;
2865                 pld_len = fin->fin_dlen;
2866                 len = pkt->b_wptr - (unsigned char *)buf;
2867                 ASSERT(len == pld_len);
2868                 /*
2869                  * Add payload sum to pseudoheader sum.
2870                  */
2871                 sum += fr_buf_sum(buf, len);
2872                 while (sum >> 16)
2873                         sum = (sum & 0xFFFF) + (sum >> 16);
2874 
2875                 *l4_csum_p = ~((uint16_t)sum);
2876                 DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p);
2877         }
2878 
2879         /*
2880          * The IP header chksum is needed just for IPv4.
2881          */
2882         if (fin->fin_v == 4) {
2883                 /*
2884                  * Compute IPv4 header chksum.
2885                  */
2886                 ip = (ip_t *)pkt->b_rptr;
2887                 ip->ip_sum = (uint16_t)0;
2888                 ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen);
2889                 while (ip_sum >> 16)
2890                         ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16);
2891 
2892                 ip->ip_sum = ~((uint16_t)ip_sum);
2893                 DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum);
2894         }
2895 
2896         return;
2897 }
2898 
2899 #endif  /* _KERNEL && SOLARIS2 >= 10 */