1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/kmem.h>
  29 #include <sys/random.h>
  30 #include <netinet/in.h>
  31 #include <netinet/in_systm.h>
  32 #include <netinet/ip6.h>
  33 #include <inet/common.h>
  34 #include <inet/ip.h>
  35 #include <inet/ip6.h>
  36 #include <ipp/meters/meter_impl.h>
  37 
  38 /*
  39  * Module : Time Sliding Window meter - tswtclmtr
  40  * Description
  41  * This module implements the metering part of RFC 2859. It accepts the
  42  * committed rate, peak rate and the window for a flow and determines
  43  * if the flow is within the committed/peak rate and assigns the appropriate
  44  * next action.
  45  * The meter provides an estimate of the running average bandwidth for the
  46  * flow over the specified window. It uses probability to benefit TCP flows
  47  * as it reduces the likelihood of dropping multiple packets within a TCP
  48  * window without adversely effecting UDP flows.
  49  */
  50 
  51 int tswtcl_debug = 0;
  52 
  53 /*
  54  * Given a packet and the tswtcl_data it belongs to, this routine meters the
  55  * ToS or DSCP for IPv4 and IPv6 resp. with the values configured for
  56  * the tswtcl_data.
  57  */
  58 /* ARGSUSED */
  59 int
  60 tswtcl_process(mblk_t **mpp, tswtcl_data_t *tswtcl_data,
  61     ipp_action_id_t *next_action)
  62 {
  63         ipha_t *ipha;
  64         hrtime_t now;
  65         ip6_t *ip6_hdr;
  66         uint32_t pkt_len;
  67         mblk_t *mp = *mpp;
  68         hrtime_t deltaT;
  69         uint64_t bitsinwin;
  70         uint32_t min = 0, additive, rnd;
  71         tswtcl_cfg_t *cfg_parms = tswtcl_data->cfg_parms;
  72 
  73         if (mp == NULL) {
  74                 tswtcl0dbg(("tswtcl_process: null mp!\n"));
  75                 atomic_inc_64(&tswtcl_data->epackets);
  76                 return (EINVAL);
  77         }
  78 
  79         if (mp->b_datap->db_type != M_DATA) {
  80                 if ((mp->b_cont != NULL) &&
  81                     (mp->b_cont->b_datap->db_type == M_DATA)) {
  82                         mp = mp->b_cont;
  83                 } else {
  84                         tswtcl0dbg(("tswtcl_process: no data\n"));
  85                         atomic_inc_64(&tswtcl_data->epackets);
  86                         return (EINVAL);
  87                 }
  88         }
  89 
  90         /* Figure out the ToS/Traffic Class and length from the message */
  91         if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) {
  92                 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) {
  93                         tswtcl0dbg(("tswtcl_process: pullup error\n"));
  94                         atomic_inc_64(&tswtcl_data->epackets);
  95                         return (EINVAL);
  96                 }
  97         }
  98         ipha = (ipha_t *)mp->b_rptr;
  99         if (IPH_HDR_VERSION(ipha) == IPV4_VERSION) {
 100                 pkt_len = ntohs(ipha->ipha_length);
 101         } else {
 102                 ip6_hdr = (ip6_t *)mp->b_rptr;
 103                 pkt_len = ntohs(ip6_hdr->ip6_plen) +
 104                     ip_hdr_length_v6(mp, ip6_hdr);
 105         }
 106 
 107         /* Convert into bits */
 108         pkt_len <<= 3;
 109 
 110         /* Get current time */
 111         now = gethrtime();
 112 
 113         /* Update the avg_rate and win_front tswtcl_data */
 114         mutex_enter(&tswtcl_data->tswtcl_lock);
 115 
 116         /* avg_rate = bits/sec and window in msec */
 117         bitsinwin = ((uint64_t)tswtcl_data->avg_rate * cfg_parms->window /
 118             1000) + pkt_len;
 119 
 120         deltaT = now - tswtcl_data->win_front + cfg_parms->nsecwindow;
 121 
 122         tswtcl_data->avg_rate = (uint64_t)bitsinwin * METER_SEC_TO_NSEC /
 123             deltaT;
 124         tswtcl_data->win_front = now;
 125 
 126         if (tswtcl_data->avg_rate <= cfg_parms->committed_rate) {
 127                 *next_action = cfg_parms->green_action;
 128         } else if (tswtcl_data->avg_rate <= cfg_parms->peak_rate) {
 129                 /*
 130                  * Compute the probability:
 131                  *
 132                  * p0 = (avg_rate - committed_rate) / avg_rate
 133                  *
 134                  * Yellow with probability p0
 135                  * Green with probability (1 - p0)
 136                  *
 137                  */
 138                 uint32_t aminusc;
 139 
 140                 /* Get a random no. betweeen 0 and avg_rate */
 141                 (void) random_get_pseudo_bytes((uint8_t *)&additive,
 142                     sizeof (additive));
 143                 rnd = min + (additive % (tswtcl_data->avg_rate - min + 1));
 144 
 145                 aminusc = tswtcl_data->avg_rate - cfg_parms->committed_rate;
 146                 if (aminusc >= rnd) {
 147                         *next_action = cfg_parms->yellow_action;
 148                 } else {
 149                         *next_action = cfg_parms->green_action;
 150                 }
 151         } else {
 152                 /*
 153                  * Compute the probability:
 154                  *
 155                  * p1 = (avg_rate - peak_rate) / avg_rate
 156                  * p2 = (peak_rate - committed_rate) / avg_rate
 157                  *
 158                  * Red with probability p1
 159                  * Yellow with probability p2
 160                  * Green with probability (1 - (p1 + p2))
 161                  *
 162                  */
 163                 uint32_t  aminusp;
 164 
 165                 /* Get a random no. betweeen 0 and avg_rate */
 166                 (void) random_get_pseudo_bytes((uint8_t *)&additive,
 167                     sizeof (additive));
 168                 rnd = min + (additive % (tswtcl_data->avg_rate - min + 1));
 169 
 170                 aminusp = tswtcl_data->avg_rate - cfg_parms->peak_rate;
 171 
 172                 if (aminusp >= rnd) {
 173                         *next_action = cfg_parms->red_action;
 174                 } else if ((cfg_parms->pminusc + aminusp) >= rnd) {
 175                         *next_action = cfg_parms->yellow_action;
 176                 } else {
 177                         *next_action = cfg_parms->green_action;
 178                 }
 179 
 180         }
 181         mutex_exit(&tswtcl_data->tswtcl_lock);
 182 
 183         /* Update Stats */
 184         if (*next_action == cfg_parms->green_action) {
 185                 atomic_inc_64(&tswtcl_data->green_packets);
 186                 atomic_add_64(&tswtcl_data->green_bits, pkt_len);
 187         } else if (*next_action == cfg_parms->yellow_action) {
 188                 atomic_inc_64(&tswtcl_data->yellow_packets);
 189                 atomic_add_64(&tswtcl_data->yellow_bits, pkt_len);
 190         } else {
 191                 ASSERT(*next_action == cfg_parms->red_action);
 192                 atomic_inc_64(&tswtcl_data->red_packets);
 193                 atomic_add_64(&tswtcl_data->red_bits, pkt_len);
 194         }
 195         return (0);
 196 }