1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/kmem.h>
  29 #include <sys/conf.h>
  30 #include <sys/atomic.h>
  31 #include <netinet/in.h>
  32 #include <netinet/in_systm.h>
  33 #include <netinet/ip6.h>
  34 #include <sys/socket.h>
  35 #include <sys/acct.h>
  36 #include <sys/exacct.h>
  37 #include <inet/common.h>
  38 #include <inet/ip.h>
  39 #include <inet/ip6.h>
  40 #include <sys/ddi.h>
  41 #include <sys/strsun.h>
  42 #include <sys/strsubr.h>
  43 #include <ipp/flowacct/flowacct_impl.h>
  44 
  45 /*
  46  * flowacct - IPQoS accounting module. The module maintains an array
  47  * of 256 hash buckets. When the action routine is invoked for a flow,
  48  * if the flow (identified by the 5-tuple: saddr, daddr, sport, dport, proto)
  49  * is already present in the flow table (indexed by the hash function FLOW_HASH)
  50  * then a check is made to see if an item for this flow with the same
  51  * dsfield, projid & user id is present. If it is, then the number of packets
  52  * and the bytes are incremented for that item. If the item does
  53  * not exist a new item is added for the flow. If the flow is not present
  54  * an entry is made for the flow.
  55  *
  56  * A timer runs thru the table and writes all the flow items that have
  57  * timed out to the accounting file (via exacct PSARC/1999/119), if present
  58  * Configuration commands to change the timing interval is provided. The
  59  * flow timeout value can also be configured. While the timeout is in nsec,
  60  * the flow timer interval is in usec.
  61  * Information for an active flow can be obtained by using kstats.
  62  */
  63 
  64 /* Used in computing the hash index */
  65 #define FLOWACCT_ADDR_HASH(addr)                        \
  66         ((addr).s6_addr8[8] ^ (addr).s6_addr8[9] ^      \
  67         (addr).s6_addr8[10] ^ (addr).s6_addr8[13] ^     \
  68         (addr).s6_addr8[14] ^ (addr).s6_addr8[15])
  69 
  70 #define FLOWACCT_FLOW_HASH(f)                           \
  71         (((FLOWACCT_ADDR_HASH(f->saddr)) +           \
  72         (FLOWACCT_ADDR_HASH(f->daddr)) +             \
  73         (f->proto) + (f->sport) + (f->dport))          \
  74         % FLOW_TBL_COUNT)
  75 
  76 /*
  77  * Compute difference between a and b in nsec and store in delta.
  78  * delta should be a hrtime_t. Taken from ip_mroute.c.
  79  */
  80 #define FLOWACCT_DELTA(a, b, delta) { \
  81         int xxs; \
  82  \
  83         delta = (a).tv_nsec - (b).tv_nsec; \
  84         if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \
  85                 switch (xxs) { \
  86                 case 2: \
  87                     delta += NANOSEC; \
  88                     /*FALLTHRU*/ \
  89                 case 1: \
  90                     delta += NANOSEC; \
  91                     break; \
  92                 default: \
  93                     delta += ((hrtime_t)NANOSEC * xxs); \
  94                 } \
  95         } \
  96 }
  97 
  98 /* Debug level */
  99 int flowacct_debug = 0;
 100 
 101 /* Collect timed out flows to be written to the accounting file */
 102 typedef struct flow_records_s {
 103         flow_usage_t *fl_use;
 104         struct flow_records_s *next;
 105 }flow_records_t;
 106 
 107 /* Get port information from the packet. Ignore fragments. */
 108 static void
 109 flowacct_port_info(header_t *header, void *iph, int af, mblk_t *mp)
 110 {
 111         uint16_t *up;
 112 
 113         if (af == AF_INET) {
 114                 ipha_t *ipha = (ipha_t *)iph;
 115                 uint32_t u2, u1;
 116                 uint_t iplen;
 117 
 118                 u2 = ntohs(ipha->ipha_fragment_offset_and_flags);
 119                 u1 = u2 & (IPH_MF | IPH_OFFSET);
 120                 if (u1 != 0) {
 121                         return;
 122                 }
 123                 iplen = (ipha->ipha_version_and_hdr_length & 0xF) << 2;
 124                 up = (uint16_t *)(mp->b_rptr + iplen);
 125                 header->sport = (uint16_t)*up++;
 126                 header->dport = (uint16_t)*up;
 127         } else {
 128                 ip6_t *ip6h = (ip6_t *)iph;
 129                 uint_t  length = IPV6_HDR_LEN;
 130                 uint_t  ehdrlen;
 131                 uint8_t *nexthdrp, *whereptr, *endptr;
 132                 ip6_dest_t *desthdr;
 133                 ip6_rthdr_t *rthdr;
 134                 ip6_hbh_t *hbhhdr;
 135 
 136                 whereptr = ((uint8_t *)&ip6h[1]);
 137                 endptr = mp->b_wptr;
 138                 nexthdrp = &ip6h->ip6_nxt;
 139                 while (whereptr < endptr) {
 140                         switch (*nexthdrp) {
 141                         case IPPROTO_HOPOPTS:
 142                                 hbhhdr = (ip6_hbh_t *)whereptr;
 143                                 ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
 144                                 if ((uchar_t *)hbhhdr +  ehdrlen > endptr)
 145                                         return;
 146                                 nexthdrp = &hbhhdr->ip6h_nxt;
 147                                 break;
 148                         case IPPROTO_DSTOPTS:
 149                                 desthdr = (ip6_dest_t *)whereptr;
 150                                 ehdrlen = 8 * (desthdr->ip6d_len + 1);
 151                                 if ((uchar_t *)desthdr +  ehdrlen > endptr)
 152                                         return;
 153                                 nexthdrp = &desthdr->ip6d_nxt;
 154                                 break;
 155                         case IPPROTO_ROUTING:
 156                                 rthdr = (ip6_rthdr_t *)whereptr;
 157                                 ehdrlen =  8 * (rthdr->ip6r_len + 1);
 158                                 if ((uchar_t *)rthdr +  ehdrlen > endptr)
 159                                         return;
 160                                 nexthdrp = &rthdr->ip6r_nxt;
 161                                 break;
 162                         case IPPROTO_FRAGMENT:
 163                                 return;
 164                         case IPPROTO_TCP:
 165                         case IPPROTO_UDP:
 166                         case IPPROTO_SCTP:
 167                                 /*
 168                                  * Verify we have at least ICMP_MIN_TP_HDR_LEN
 169                                  * bytes of the ULP's header to get the port
 170                                  * info.
 171                                  */
 172                                 if (((uchar_t *)ip6h + length +
 173                                     ICMP_MIN_TP_HDR_LEN)  > endptr) {
 174                                         return;
 175                                 }
 176                                 /* Get the protocol & ports */
 177                                 header->proto = *nexthdrp;
 178                                 up = (uint16_t *)((uchar_t *)ip6h + length);
 179                                 header->sport = (uint16_t)*up++;
 180                                 header->dport = (uint16_t)*up;
 181                                 return;
 182                         case IPPROTO_ICMPV6:
 183                         case IPPROTO_ENCAP:
 184                         case IPPROTO_IPV6:
 185                         case IPPROTO_ESP:
 186                         case IPPROTO_AH:
 187                                 header->proto = *nexthdrp;
 188                                 return;
 189                         case IPPROTO_NONE:
 190                         default:
 191                                 return;
 192                         }
 193                         length += ehdrlen;
 194                         whereptr += ehdrlen;
 195                 }
 196         }
 197 }
 198 
 199 /*
 200  * flowacct_find_ids(mp, header)
 201  *
 202  * attempt to discern the uid and projid of the originator of a packet by
 203  * looking at the dblks making up the packet - yeuch!
 204  *
 205  * We do it by skipping any fragments with a credp of NULL (originated in
 206  * kernel), taking the first value that isn't NULL to be the cred_t for the
 207  * whole packet.
 208  */
 209 static void
 210 flowacct_find_ids(mblk_t *mp, header_t *header)
 211 {
 212         cred_t *cr;
 213 
 214         cr = msg_getcred(mp, NULL);
 215         if (cr != NULL) {
 216                 header->uid = crgetuid(cr);
 217                 header->projid = crgetprojid(cr);
 218         } else {
 219                 header->uid = (uid_t)-1;
 220                 header->projid = -1;
 221         }
 222 }
 223 
 224 /*
 225  * Extract header information in a header_t structure so that we don't have
 226  * have to parse the packet everytime.
 227  */
 228 static int
 229 flowacct_extract_header(mblk_t *mp, header_t *header)
 230 {
 231         ipha_t *ipha;
 232         ip6_t *ip6h;
 233 #define rptr    ((uchar_t *)ipha)
 234 
 235         /* 0 means no port extracted. */
 236         header->sport = 0;
 237         header->dport = 0;
 238         flowacct_find_ids(mp, header);
 239 
 240         V6_SET_ZERO(header->saddr);
 241         V6_SET_ZERO(header->daddr);
 242 
 243         ipha = (ipha_t *)mp->b_rptr;
 244         header->isv4 = IPH_HDR_VERSION(ipha) == IPV4_VERSION;
 245         if (header->isv4) {
 246                 ipha = (ipha_t *)mp->b_rptr;
 247                 V4_PART_OF_V6(header->saddr) = (int32_t)ipha->ipha_src;
 248                 V4_PART_OF_V6(header->daddr) = (int32_t)ipha->ipha_dst;
 249                 header->dsfield = ipha->ipha_type_of_service;
 250                 header->proto = ipha->ipha_protocol;
 251                 header->pktlen = ntohs(ipha->ipha_length);
 252                 if ((header->proto == IPPROTO_TCP) ||
 253                     (header->proto == IPPROTO_UDP) ||
 254                     (header->proto == IPPROTO_SCTP)) {
 255                         flowacct_port_info(header, ipha, AF_INET, mp);
 256                 }
 257         } else {
 258                 /*
 259                  * Need to pullup everything.
 260                  */
 261                 if (mp->b_cont != NULL) {
 262                         if (!pullupmsg(mp, -1)) {
 263                                 flowacct0dbg(("flowacct_extract_header: "\
 264                                     "pullup error"));
 265                                 return (-1);
 266                         }
 267                 }
 268                 ip6h = (ip6_t *)mp->b_rptr;
 269                 bcopy(ip6h->ip6_src.s6_addr32, header->saddr.s6_addr32,
 270                     sizeof (ip6h->ip6_src.s6_addr32));
 271                 bcopy(ip6h->ip6_dst.s6_addr32, header->daddr.s6_addr32,
 272                     sizeof (ip6h->ip6_dst.s6_addr32));
 273                 header->dsfield = __IPV6_TCLASS_FROM_FLOW(ip6h->ip6_vcf);
 274                 header->proto = ip6h->ip6_nxt;
 275                 header->pktlen = ntohs(ip6h->ip6_plen) +
 276                     ip_hdr_length_v6(mp, ip6h);
 277                 flowacct_port_info(header, ip6h, AF_INET6, mp);
 278 
 279         }
 280 #undef  rptr
 281         return (0);
 282 }
 283 
 284 /* Check if the flow (identified by the 5-tuple) exists in the hash table */
 285 static flow_t *
 286 flowacct_flow_present(header_t *header, int index,
 287     flowacct_data_t *flowacct_data)
 288 {
 289         list_hdr_t *hdr = flowacct_data->flows_tbl[index].head;
 290         flow_t *flow;
 291 
 292         while (hdr != NULL) {
 293                 flow = (flow_t *)hdr->objp;
 294                 if ((flow != NULL) &&
 295                     (IN6_ARE_ADDR_EQUAL(&flow->saddr, &header->saddr)) &&
 296                     (IN6_ARE_ADDR_EQUAL(&flow->daddr, &header->daddr)) &&
 297                     (flow->proto == header->proto) &&
 298                     (flow->sport == header->sport) &&
 299                     (flow->dport == header->dport)) {
 300                         return (flow);
 301                 }
 302                 hdr = hdr->next;
 303         }
 304         return ((flow_t *)NULL);
 305 }
 306 
 307 /*
 308  * Add an object to the list at insert_point. This could be a flow item or
 309  * a flow itself.
 310  */
 311 static list_hdr_t *
 312 flowacct_add_obj(list_head_t *tophdr, list_hdr_t *insert_point, void *obj)
 313 {
 314         list_hdr_t *new_hdr;
 315 
 316         if (tophdr == NULL) {
 317                 return ((list_hdr_t *)NULL);
 318         }
 319 
 320         new_hdr = (list_hdr_t *)kmem_zalloc(FLOWACCT_HDR_SZ, KM_NOSLEEP);
 321         if (new_hdr == NULL) {
 322                 flowacct0dbg(("flowacct_add_obj: error allocating mem"));
 323                 return ((list_hdr_t *)NULL);
 324         }
 325         gethrestime(&new_hdr->last_seen);
 326         new_hdr->objp = obj;
 327         tophdr->nbr_items++;
 328 
 329         if (insert_point == NULL) {
 330                 if (tophdr->head == NULL) {
 331                         tophdr->head = new_hdr;
 332                         tophdr->tail = new_hdr;
 333                         return (new_hdr);
 334                 }
 335 
 336                 new_hdr->next = tophdr->head;
 337                 tophdr->head->prev = new_hdr;
 338                 tophdr->head = new_hdr;
 339                 return (new_hdr);
 340         }
 341 
 342         if (insert_point == tophdr->tail) {
 343                 tophdr->tail->next = new_hdr;
 344                 new_hdr->prev = tophdr->tail;
 345                 tophdr->tail = new_hdr;
 346                 return (new_hdr);
 347         }
 348 
 349         new_hdr->next = insert_point->next;
 350         new_hdr->prev = insert_point;
 351         insert_point->next->prev = new_hdr;
 352         insert_point->next = new_hdr;
 353         return (new_hdr);
 354 }
 355 
 356 /* Delete an obj from the list. This could be a flow item or the flow itself */
 357 static void
 358 flowacct_del_obj(list_head_t *tophdr, list_hdr_t *hdr, uint_t mode)
 359 {
 360         size_t  length;
 361         uint_t  type;
 362 
 363         if ((tophdr == NULL) || (hdr == NULL)) {
 364                 return;
 365         }
 366 
 367         type = ((flow_t *)hdr->objp)->type;
 368 
 369         tophdr->nbr_items--;
 370 
 371         if (hdr->next != NULL) {
 372                 hdr->next->prev = hdr->prev;
 373         }
 374         if (hdr->prev != NULL) {
 375                 hdr->prev->next = hdr->next;
 376         }
 377         if (tophdr->head == hdr) {
 378                 tophdr->head = hdr->next;
 379         }
 380         if (tophdr->tail == hdr) {
 381                 tophdr->tail = hdr->prev;
 382         }
 383 
 384         if (mode == FLOWACCT_DEL_OBJ) {
 385                 switch (type) {
 386                 case FLOWACCT_FLOW:
 387                         length = FLOWACCT_FLOW_SZ;
 388                         break;
 389                 case FLOWACCT_ITEM:
 390                         length = FLOWACCT_ITEM_SZ;
 391                         break;
 392                 }
 393                 kmem_free(hdr->objp, length);
 394                 hdr->objp = NULL;
 395         }
 396 
 397         kmem_free((void *)hdr, FLOWACCT_HDR_SZ);
 398 }
 399 
 400 /*
 401  * Checks if the given item (identified by dsfield, project id and uid)
 402  * is already present for the flow.
 403  */
 404 static flow_item_t *
 405 flowacct_item_present(flow_t *flow, uint8_t dsfield, pid_t proj_id, uint_t uid)
 406 {
 407         list_hdr_t      *itemhdr;
 408         flow_item_t     *item;
 409 
 410         itemhdr = flow->items.head;
 411 
 412         while (itemhdr != NULL) {
 413                 item = (flow_item_t *)itemhdr->objp;
 414 
 415                 if ((item->dsfield != dsfield) || (item->projid != proj_id) ||
 416                     (item->uid != uid)) {
 417                         itemhdr = itemhdr->next;
 418                         continue;
 419                 }
 420                 return (item);
 421         }
 422 
 423         return ((flow_item_t *)NULL);
 424 }
 425 
 426 /*
 427  * Add the flow to the table, if not already present. If the flow is
 428  * present in the table, add the item. Also, update the flow stats.
 429  * Additionally, re-adjust the timout list as well.
 430  */
 431 static int
 432 flowacct_update_flows_tbl(header_t *header, flowacct_data_t *flowacct_data)
 433 {
 434         int index;
 435         list_head_t *fhead;
 436         list_head_t *thead;
 437         list_head_t *ihead;
 438         boolean_t added_flow = B_FALSE;
 439         timespec_t  now;
 440         flow_item_t *item;
 441         flow_t *flow;
 442 
 443         index = FLOWACCT_FLOW_HASH(header);
 444         fhead = &flowacct_data->flows_tbl[index];
 445 
 446         /* The timeout list */
 447         thead = &flowacct_data->flows_tbl[FLOW_TBL_COUNT];
 448 
 449         mutex_enter(&fhead->lock);
 450         flow = flowacct_flow_present(header, index, flowacct_data);
 451         if (flow == NULL) {
 452                 flow = (flow_t *)kmem_zalloc(FLOWACCT_FLOW_SZ, KM_NOSLEEP);
 453                 if (flow == NULL) {
 454                         mutex_exit(&fhead->lock);
 455                         flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\
 456                             "error"));
 457                         return (-1);
 458                 }
 459                 flow->hdr = flowacct_add_obj(fhead, fhead->tail, (void *)flow);
 460                 if (flow->hdr == NULL) {
 461                         mutex_exit(&fhead->lock);
 462                         kmem_free(flow, FLOWACCT_FLOW_SZ);
 463                         flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\
 464                             "error"));
 465                         return (-1);
 466                 }
 467 
 468                 flow->type = FLOWACCT_FLOW;
 469                 flow->isv4 = header->isv4;
 470                 bcopy(header->saddr.s6_addr32, flow->saddr.s6_addr32,
 471                     sizeof (header->saddr.s6_addr32));
 472                 bcopy(header->daddr.s6_addr32, flow->daddr.s6_addr32,
 473                     sizeof (header->daddr.s6_addr32));
 474                 flow->proto = header->proto;
 475                 flow->sport = header->sport;
 476                 flow->dport = header->dport;
 477                 flow->back_ptr = fhead;
 478                 added_flow = B_TRUE;
 479         } else {
 480                 /*
 481                  * We need to make sure that this 'flow' is not deleted
 482                  * either by a scheduled timeout or an explict call
 483                  * to flowacct_timer() below.
 484                  */
 485                 flow->inuse = B_TRUE;
 486         }
 487 
 488         ihead = &flow->items;
 489         item = flowacct_item_present(flow, header->dsfield, header->projid,
 490             header->uid);
 491         if (item == NULL) {
 492                 boolean_t just_once = B_TRUE;
 493                 /*
 494                  * For all practical purposes, we limit the no. of entries in
 495                  * the flow table - i.e. the max_limt that a user specifies is
 496                  * the maximum no. of flow items in the table.
 497                  */
 498         try_again:
 499                 atomic_add_32(&flowacct_data->nflows, 1);
 500                 if (flowacct_data->nflows > flowacct_data->max_limit) {
 501                         atomic_add_32(&flowacct_data->nflows, -1);
 502 
 503                         /* Try timing out once */
 504                         if (just_once) {
 505                                 /*
 506                                  * Need to release the lock, as this entry
 507                                  * could contain a flow that can be timed
 508                                  * out.
 509                                  */
 510                                 mutex_exit(&fhead->lock);
 511                                 flowacct_timer(FLOWACCT_JUST_ONE,
 512                                     flowacct_data);
 513                                 mutex_enter(&fhead->lock);
 514                                 /* Lets check again */
 515                                 just_once = B_FALSE;
 516                                 goto try_again;
 517                         } else {
 518                                 flow->inuse = B_FALSE;
 519                                 /* Need to remove the flow, if one was added */
 520                                 if (added_flow) {
 521                                         flowacct_del_obj(fhead, flow->hdr,
 522                                             FLOWACCT_DEL_OBJ);
 523                                 }
 524                                 mutex_exit(&fhead->lock);
 525                                 flowacct1dbg(("flowacct_update_flows_tbl: "\
 526                                     "maximum active flows exceeded\n"));
 527                                 return (-1);
 528                         }
 529                 }
 530                 item = (flow_item_t *)kmem_zalloc(FLOWACCT_ITEM_SZ, KM_NOSLEEP);
 531                 if (item == NULL) {
 532                         flow->inuse = B_FALSE;
 533                         /* Need to remove the flow, if one was added */
 534                         if (added_flow) {
 535                                 flowacct_del_obj(fhead, flow->hdr,
 536                                     FLOWACCT_DEL_OBJ);
 537                         }
 538                         mutex_exit(&fhead->lock);
 539                         atomic_add_32(&flowacct_data->nflows, -1);
 540                         flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\
 541                             "error"));
 542                         return (-1);
 543                 }
 544                 item->hdr = flowacct_add_obj(ihead, ihead->tail, (void *)item);
 545                 if (item->hdr == NULL) {
 546                         flow->inuse = B_FALSE;
 547                         /* Need to remove the flow, if one was added */
 548                         if (added_flow) {
 549                                 flowacct_del_obj(fhead, flow->hdr,
 550                                     FLOWACCT_DEL_OBJ);
 551                         }
 552                         mutex_exit(&fhead->lock);
 553                         atomic_add_32(&flowacct_data->nflows, -1);
 554                         kmem_free(item, FLOWACCT_ITEM_SZ);
 555                         flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\
 556                             "error\n"));
 557                         return (-1);
 558                 }
 559                 /* If a flow was added, add it too */
 560                 if (added_flow) {
 561                         atomic_add_64(&flowacct_data->usedmem,
 562                             FLOWACCT_FLOW_RECORD_SZ);
 563                 }
 564                 atomic_add_64(&flowacct_data->usedmem, FLOWACCT_ITEM_RECORD_SZ);
 565 
 566                 item->type = FLOWACCT_ITEM;
 567                 item->dsfield = header->dsfield;
 568                 item->projid = header->projid;
 569                 item->uid = header->uid;
 570                 item->npackets = 1;
 571                 item->nbytes = header->pktlen;
 572                 item->creation_time = item->hdr->last_seen;
 573         } else {
 574                 item->npackets++;
 575                 item->nbytes += header->pktlen;
 576         }
 577         gethrestime(&now);
 578         flow->hdr->last_seen = item->hdr->last_seen = now;
 579         mutex_exit(&fhead->lock);
 580 
 581         /*
 582          * Re-adjust the timeout list. The timer takes the thead lock
 583          * follwed by fhead lock(s), so we release fhead, take thead
 584          * and re-take fhead.
 585          */
 586         mutex_enter(&thead->lock);
 587         mutex_enter(&fhead->lock);
 588         /* If the flow was added, append it to the tail of the timeout list */
 589         if (added_flow) {
 590                 if (thead->head == NULL) {
 591                         thead->head = flow->hdr;
 592                         thead->tail = flow->hdr;
 593                 } else {
 594                         thead->tail->timeout_next = flow->hdr;
 595                         flow->hdr->timeout_prev = thead->tail;
 596                         thead->tail = flow->hdr;
 597                 }
 598         /*
 599          * Else, move this flow to the tail of the timeout list, if it is not
 600          * already.
 601          * flow->hdr in the timeout list :-
 602          * timeout_next = NULL, timeout_prev != NULL, at the tail end.
 603          * timeout_next != NULL, timeout_prev = NULL, at the head.
 604          * timeout_next != NULL, timeout_prev != NULL, in the middle.
 605          * timeout_next = NULL, timeout_prev = NULL, not in the timeout list,
 606          * ignore such flow.
 607          */
 608         } else if ((flow->hdr->timeout_next != NULL) ||
 609             (flow->hdr->timeout_prev != NULL)) {
 610                 if (flow->hdr != thead->tail) {
 611                         if (flow->hdr == thead->head) {
 612                                 thead->head->timeout_next->timeout_prev = NULL;
 613                                 thead->head = thead->head->timeout_next;
 614                                 flow->hdr->timeout_next = NULL;
 615                                 thead->tail->timeout_next = flow->hdr;
 616                                 flow->hdr->timeout_prev = thead->tail;
 617                                 thead->tail = flow->hdr;
 618                         } else {
 619                                 flow->hdr->timeout_prev->timeout_next =
 620                                     flow->hdr->timeout_next;
 621                                 flow->hdr->timeout_next->timeout_prev =
 622                                     flow->hdr->timeout_prev;
 623                                 flow->hdr->timeout_next = NULL;
 624                                 thead->tail->timeout_next = flow->hdr;
 625                                 flow->hdr->timeout_prev = thead->tail;
 626                                 thead->tail = flow->hdr;
 627                         }
 628                 }
 629         }
 630         /*
 631          * Unset this variable, now it is fine even if this
 632          * flow gets deleted (i.e. after timing out its
 633          * flow items) since we are done using it.
 634          */
 635         flow->inuse = B_FALSE;
 636         mutex_exit(&fhead->lock);
 637         mutex_exit(&thead->lock);
 638         atomic_add_64(&flowacct_data->tbytes, header->pktlen);
 639         return (0);
 640 }
 641 
 642 /* Timer for timing out flows/items from the flow table */
 643 void
 644 flowacct_timeout_flows(void *args)
 645 {
 646         flowacct_data_t *flowacct_data = (flowacct_data_t *)args;
 647         flowacct_timer(FLOWACCT_FLOW_TIMER, flowacct_data);
 648         flowacct_data->flow_tid = timeout(flowacct_timeout_flows, flowacct_data,
 649             drv_usectohz(flowacct_data->timer));
 650 }
 651 
 652 
 653 /* Delete the item from the flow in the flow table */
 654 static void
 655 flowacct_timeout_item(flow_t **flow, list_hdr_t **item_hdr)
 656 {
 657         list_hdr_t *next_it_hdr;
 658 
 659         next_it_hdr = (*item_hdr)->next;
 660         flowacct_del_obj(&(*flow)->items, *item_hdr, FLOWACCT_DEL_OBJ);
 661         *item_hdr = next_it_hdr;
 662 }
 663 
 664 /* Create a flow record for this timed out item */
 665 static flow_records_t *
 666 flowacct_create_record(flow_t *flow, list_hdr_t *ithdr)
 667 {
 668         int count;
 669         flow_item_t *item = (flow_item_t *)ithdr->objp;
 670         flow_records_t *tmp_frec = NULL;
 671 
 672         /* Record to be written into the accounting file */
 673         tmp_frec = kmem_zalloc(sizeof (flow_records_t), KM_NOSLEEP);
 674         if (tmp_frec == NULL) {
 675                 flowacct0dbg(("flowacct_create_record: mem alloc error.\n"));
 676                 return (NULL);
 677         }
 678         tmp_frec->fl_use = kmem_zalloc(sizeof (flow_usage_t), KM_NOSLEEP);
 679         if (tmp_frec->fl_use == NULL) {
 680                 flowacct0dbg(("flowacct_create_record: mem alloc error\n"));
 681                 kmem_free(tmp_frec, sizeof (flow_records_t));
 682                 return (NULL);
 683         }
 684 
 685         /* Copy the IP address */
 686         for (count = 0; count < 4; count++) {
 687                 tmp_frec->fl_use->fu_saddr[count] =
 688                     htonl(flow->saddr.s6_addr32[count]);
 689                 tmp_frec->fl_use->fu_daddr[count] =
 690                     htonl(flow->daddr.s6_addr32[count]);
 691         }
 692 
 693         /*
 694          * Ports, protocol, version, dsfield, project id, uid, nbytes, npackets
 695          * creation time and last seen.
 696          */
 697         tmp_frec->fl_use->fu_sport = htons(flow->sport);
 698         tmp_frec->fl_use->fu_dport = htons(flow->dport);
 699         tmp_frec->fl_use->fu_protocol = flow->proto;
 700         tmp_frec->fl_use->fu_isv4 = flow->isv4;
 701         tmp_frec->fl_use->fu_dsfield = item->dsfield;
 702         tmp_frec->fl_use->fu_projid = item->projid;
 703         tmp_frec->fl_use->fu_userid = item->uid;
 704         tmp_frec->fl_use->fu_nbytes = item->nbytes;
 705         tmp_frec->fl_use->fu_npackets = item->npackets;
 706         tmp_frec->fl_use->fu_lseen =
 707             (uint64_t)(ulong_t)ithdr->last_seen.tv_sec;
 708         tmp_frec->fl_use->fu_ctime =
 709             (uint64_t)(ulong_t)item->creation_time.tv_sec;
 710 
 711         return (tmp_frec);
 712 }
 713 
 714 /*
 715  * Scan thru the timeout list and write the records to the accounting file, if
 716  * possible. Basically step thru the timeout list maintained in the last
 717  * hash bucket, FLOW_COUNT_TBL + 1, and timeout flows. This could be called
 718  * from the timer, FLOWACCT_TIMER - delete only timed out flows or when this
 719  * instance is deleted, FLOWACCT_PURGE_FLOW - delete all the flows from the
 720  * table or as FLOWACCT_JUST_ONE - delete the first timed out flow. Since the
 721  * flows are cronologically arranged in the timeout list,  when called as
 722  * FLOWACCT_TIMER and FLOWACCT_JUST_ONE, we can stop when we come across
 723  * the first flow that has not timed out (which means none of the following
 724  * flows would have timed out).
 725  */
 726 void
 727 flowacct_timer(int type, flowacct_data_t *flowacct_data)
 728 {
 729         hrtime_t diff;
 730         timespec_t now;
 731         list_head_t *head, *thead;
 732         flow_t *flow;
 733         flow_item_t *item;
 734         list_hdr_t *fl_hdr, *next_fl_hdr;
 735         list_hdr_t *ithdr = (list_hdr_t *)NULL;
 736         flow_records_t *frec = NULL, *tmp_frec, *tail;
 737         uint64_t flow_size;
 738         uint64_t item_size;
 739 
 740         ASSERT(flowacct_data != NULL);
 741 
 742         /* 2s-complement for subtraction */
 743         flow_size = ~FLOWACCT_FLOW_RECORD_SZ + 1;
 744         item_size = ~FLOWACCT_ITEM_RECORD_SZ + 1;
 745 
 746         /* Get the current time */
 747         gethrestime(&now);
 748 
 749         /*
 750          * For each flow in the table, scan thru all the items and delete
 751          * those that have exceeded the timeout. If all the items in a
 752          * flow have timed out, delete the flow entry as well. Finally,
 753          * write all the delted items to the accounting file.
 754          */
 755         thead = &flowacct_data->flows_tbl[FLOW_TBL_COUNT];
 756 
 757         mutex_enter(&thead->lock);
 758         fl_hdr = thead->head;
 759         while (fl_hdr != NULL) {
 760                 uint32_t        items_deleted = 0;
 761 
 762                 next_fl_hdr = fl_hdr->timeout_next;
 763                 flow = (flow_t *)fl_hdr->objp;
 764                 head = flow->back_ptr;
 765                 mutex_enter(&head->lock);
 766 
 767                 /*LINTED*/
 768                 FLOWACCT_DELTA(now, fl_hdr->last_seen, diff);
 769 
 770                 /*
 771                  * If type is FLOW_TIMER, then check if the item has timed out.
 772                  * If type is FLOW_PURGE delete the entry anyways.
 773                  */
 774                 if ((type != FLOWACCT_PURGE_FLOW) &&
 775                     (diff < flowacct_data->timeout)) {
 776                         mutex_exit(&head->lock);
 777                         mutex_exit(&thead->lock);
 778                         goto write_records;
 779                 }
 780 
 781                 ithdr = flow->items.head;
 782                 while (ithdr != NULL) {
 783                         item = (flow_item_t *)ithdr->objp;
 784                         /*
 785                          * Fill in the flow record to be
 786                          * written to the accounting file.
 787                          */
 788                         tmp_frec = flowacct_create_record(flow, ithdr);
 789                         /*
 790                          * If we don't have memory for records,
 791                          * we will come back in case this is
 792                          * called as FLOW_TIMER, else we will
 793                          * go ahead and delete the item from
 794                          * the table (when asked to PURGE the
 795                          * table), so there could be some
 796                          * entries not written to the file
 797                          * when this action instance is
 798                          * deleted.
 799                          */
 800                         if (tmp_frec != NULL) {
 801                                 tmp_frec->fl_use->fu_aname =
 802                                     flowacct_data->act_name;
 803                                 if (frec == NULL) {
 804                                         frec = tmp_frec;
 805                                         tail = frec;
 806                                 } else {
 807                                         tail->next = tmp_frec;
 808                                         tail = tmp_frec;
 809                                 }
 810                         } else if (type != FLOWACCT_PURGE_FLOW) {
 811                                 mutex_exit(&head->lock);
 812                                 mutex_exit(&thead->lock);
 813                                 atomic_add_32(&flowacct_data->nflows,
 814                                     (~items_deleted + 1));
 815                                 goto write_records;
 816                         }
 817 
 818                         /* Update stats */
 819                         atomic_add_64(&flowacct_data->tbytes, (~item->nbytes +
 820                             1));
 821 
 822                         /* Delete the item */
 823                         flowacct_timeout_item(&flow, &ithdr);
 824                         items_deleted++;
 825                         atomic_add_64(&flowacct_data->usedmem, item_size);
 826                 }
 827                 ASSERT(flow->items.nbr_items == 0);
 828                 atomic_add_32(&flowacct_data->nflows, (~items_deleted + 1));
 829 
 830                 /*
 831                  * Don't delete this flow if we are making place for
 832                  * a new item for this flow.
 833                  */
 834                 if (!flow->inuse) {
 835                         if (fl_hdr->timeout_prev != NULL) {
 836                                 fl_hdr->timeout_prev->timeout_next =
 837                                     fl_hdr->timeout_next;
 838                         } else {
 839                                 thead->head = fl_hdr->timeout_next;
 840                         }
 841                         if (fl_hdr->timeout_next != NULL) {
 842                                 fl_hdr->timeout_next->timeout_prev =
 843                                     fl_hdr->timeout_prev;
 844                         } else {
 845                                 thead->tail = fl_hdr->timeout_prev;
 846                         }
 847                         fl_hdr->timeout_prev = NULL;
 848                         fl_hdr->timeout_next = NULL;
 849                         flowacct_del_obj(head, fl_hdr, FLOWACCT_DEL_OBJ);
 850                         atomic_add_64(&flowacct_data->usedmem, flow_size);
 851                 }
 852                 mutex_exit(&head->lock);
 853                 if (type == FLOWACCT_JUST_ONE) {
 854                         mutex_exit(&thead->lock);
 855                         goto write_records;
 856                 }
 857                 fl_hdr = next_fl_hdr;
 858         }
 859         mutex_exit(&thead->lock);
 860 write_records:
 861         /* Write all the timed out flows to the accounting file */
 862         while (frec != NULL) {
 863                 tmp_frec = frec->next;
 864                 exacct_commit_flow(frec->fl_use);
 865                 kmem_free(frec->fl_use, sizeof (flow_usage_t));
 866                 kmem_free(frec, sizeof (flow_records_t));
 867                 frec = tmp_frec;
 868         }
 869 }
 870 
 871 /*
 872  * Get the IP header contents from the packet, update the flow table with
 873  * this item and return.
 874  */
 875 int
 876 flowacct_process(mblk_t **mpp, flowacct_data_t *flowacct_data)
 877 {
 878         header_t *header;
 879         mblk_t *mp = *mpp;
 880 
 881         ASSERT(mp != NULL);
 882 
 883         /* If we don't find an M_DATA, return error */
 884         if (mp->b_datap->db_type != M_DATA) {
 885                 if ((mp->b_cont != NULL) &&
 886                     (mp->b_cont->b_datap->db_type == M_DATA)) {
 887                         mp = mp->b_cont;
 888                 } else {
 889                         flowacct0dbg(("flowacct_process: no data\n"));
 890                         atomic_add_64(&flowacct_data->epackets, 1);
 891                         return (EINVAL);
 892                 }
 893         }
 894 
 895         header = kmem_zalloc(FLOWACCT_HEADER_SZ, KM_NOSLEEP);
 896         if (header == NULL) {
 897                 flowacct0dbg(("flowacct_process: error allocing mem"));
 898                 atomic_add_64(&flowacct_data->epackets, 1);
 899                 return (ENOMEM);
 900         }
 901 
 902         /* Get all the required information into header. */
 903         if (flowacct_extract_header(mp, header) != 0) {
 904                 kmem_free(header, FLOWACCT_HEADER_SZ);
 905                 atomic_add_64(&flowacct_data->epackets, 1);
 906                 return (EINVAL);
 907         }
 908 
 909         /* Updated the flow table with this entry */
 910         if (flowacct_update_flows_tbl(header, flowacct_data) != 0) {
 911                 kmem_free(header, FLOWACCT_HEADER_SZ);
 912                 atomic_add_64(&flowacct_data->epackets, 1);
 913                 return (ENOMEM);
 914         }
 915 
 916         /* Update global stats */
 917         atomic_add_64(&flowacct_data->npackets, 1);
 918         atomic_add_64(&flowacct_data->nbytes, header->pktlen);
 919 
 920         kmem_free(header, FLOWACCT_HEADER_SZ);
 921         if (flowacct_data->flow_tid == 0) {
 922                 flowacct_data->flow_tid = timeout(flowacct_timeout_flows,
 923                     flowacct_data, drv_usectohz(flowacct_data->timer));
 924         }
 925         return (0);
 926 }