1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28 
  29 #include <sys/types.h>
  30 #include <sys/kmem.h>
  31 #include <sys/random.h>
  32 #include <netinet/in.h>
  33 #include <netinet/in_systm.h>
  34 #include <netinet/ip6.h>
  35 #include <inet/common.h>
  36 #include <inet/ip.h>
  37 #include <inet/ip6.h>
  38 #include <ipp/meters/meter_impl.h>
  39 
  40 /*
  41  * Module : Time Sliding Window meter - tswtclmtr
  42  * Description
  43  * This module implements the metering part of RFC 2859. It accepts the
  44  * committed rate, peak rate and the window for a flow and determines
  45  * if the flow is within the committed/peak rate and assigns the appropriate
  46  * next action.
  47  * The meter provides an estimate of the running average bandwidth for the
  48  * flow over the specified window. It uses probability to benefit TCP flows
  49  * as it reduces the likelihood of dropping multiple packets within a TCP
  50  * window without adversely effecting UDP flows.
  51  */
  52 
  53 int tswtcl_debug = 0;
  54 
  55 /*
  56  * Given a packet and the tswtcl_data it belongs to, this routine meters the
  57  * ToS or DSCP for IPv4 and IPv6 resp. with the values configured for
  58  * the tswtcl_data.
  59  */
  60 /* ARGSUSED */
  61 int
  62 tswtcl_process(mblk_t **mpp, tswtcl_data_t *tswtcl_data,
  63     ipp_action_id_t *next_action)
  64 {
  65         ipha_t *ipha;
  66         hrtime_t now;
  67         ip6_t *ip6_hdr;
  68         uint32_t pkt_len;
  69         mblk_t *mp = *mpp;
  70         hrtime_t deltaT;
  71         uint64_t bitsinwin;
  72         uint32_t min = 0, additive, rnd;
  73         tswtcl_cfg_t *cfg_parms = tswtcl_data->cfg_parms;
  74 
  75         if (mp == NULL) {
  76                 tswtcl0dbg(("tswtcl_process: null mp!\n"));
  77                 atomic_add_64(&tswtcl_data->epackets, 1);
  78                 return (EINVAL);
  79         }
  80 
  81         if (mp->b_datap->db_type != M_DATA) {
  82                 if ((mp->b_cont != NULL) &&
  83                     (mp->b_cont->b_datap->db_type == M_DATA)) {
  84                         mp = mp->b_cont;
  85                 } else {
  86                         tswtcl0dbg(("tswtcl_process: no data\n"));
  87                         atomic_add_64(&tswtcl_data->epackets, 1);
  88                         return (EINVAL);
  89                 }
  90         }
  91 
  92         /* Figure out the ToS/Traffic Class and length from the message */
  93         if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) {
  94                 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) {
  95                         tswtcl0dbg(("tswtcl_process: pullup error\n"));
  96                         atomic_add_64(&tswtcl_data->epackets, 1);
  97                         return (EINVAL);
  98                 }
  99         }
 100         ipha = (ipha_t *)mp->b_rptr;
 101         if (IPH_HDR_VERSION(ipha) == IPV4_VERSION) {
 102                 pkt_len = ntohs(ipha->ipha_length);
 103         } else {
 104                 ip6_hdr = (ip6_t *)mp->b_rptr;
 105                 pkt_len = ntohs(ip6_hdr->ip6_plen) +
 106                     ip_hdr_length_v6(mp, ip6_hdr);
 107         }
 108 
 109         /* Convert into bits */
 110         pkt_len <<= 3;
 111 
 112         /* Get current time */
 113         now = gethrtime();
 114 
 115         /* Update the avg_rate and win_front tswtcl_data */
 116         mutex_enter(&tswtcl_data->tswtcl_lock);
 117 
 118         /* avg_rate = bits/sec and window in msec */
 119         bitsinwin = ((uint64_t)tswtcl_data->avg_rate * cfg_parms->window /
 120             1000) + pkt_len;
 121 
 122         deltaT = now - tswtcl_data->win_front + cfg_parms->nsecwindow;
 123 
 124         tswtcl_data->avg_rate = (uint64_t)bitsinwin * METER_SEC_TO_NSEC /
 125             deltaT;
 126         tswtcl_data->win_front = now;
 127 
 128         if (tswtcl_data->avg_rate <= cfg_parms->committed_rate) {
 129                 *next_action = cfg_parms->green_action;
 130         } else if (tswtcl_data->avg_rate <= cfg_parms->peak_rate) {
 131                 /*
 132                  * Compute the probability:
 133                  *
 134                  * p0 = (avg_rate - committed_rate) / avg_rate
 135                  *
 136                  * Yellow with probability p0
 137                  * Green with probability (1 - p0)
 138                  *
 139                  */
 140                 uint32_t aminusc;
 141 
 142                 /* Get a random no. betweeen 0 and avg_rate */
 143                 (void) random_get_pseudo_bytes((uint8_t *)&additive,
 144                     sizeof (additive));
 145                 rnd = min + (additive % (tswtcl_data->avg_rate - min + 1));
 146 
 147                 aminusc = tswtcl_data->avg_rate - cfg_parms->committed_rate;
 148                 if (aminusc >= rnd) {
 149                         *next_action = cfg_parms->yellow_action;
 150                 } else {
 151                         *next_action = cfg_parms->green_action;
 152                 }
 153         } else {
 154                 /*
 155                  * Compute the probability:
 156                  *
 157                  * p1 = (avg_rate - peak_rate) / avg_rate
 158                  * p2 = (peak_rate - committed_rate) / avg_rate
 159                  *
 160                  * Red with probability p1
 161                  * Yellow with probability p2
 162                  * Green with probability (1 - (p1 + p2))
 163                  *
 164                  */
 165                 uint32_t  aminusp;
 166 
 167                 /* Get a random no. betweeen 0 and avg_rate */
 168                 (void) random_get_pseudo_bytes((uint8_t *)&additive,
 169                     sizeof (additive));
 170                 rnd = min + (additive % (tswtcl_data->avg_rate - min + 1));
 171 
 172                 aminusp = tswtcl_data->avg_rate - cfg_parms->peak_rate;
 173 
 174                 if (aminusp >= rnd) {
 175                         *next_action = cfg_parms->red_action;
 176                 } else if ((cfg_parms->pminusc + aminusp) >= rnd) {
 177                         *next_action = cfg_parms->yellow_action;
 178                 } else {
 179                         *next_action = cfg_parms->green_action;
 180                 }
 181 
 182         }
 183         mutex_exit(&tswtcl_data->tswtcl_lock);
 184 
 185         /* Update Stats */
 186         if (*next_action == cfg_parms->green_action) {
 187                 atomic_add_64(&tswtcl_data->green_packets, 1);
 188                 atomic_add_64(&tswtcl_data->green_bits, pkt_len);
 189         } else if (*next_action == cfg_parms->yellow_action) {
 190                 atomic_add_64(&tswtcl_data->yellow_packets, 1);
 191                 atomic_add_64(&tswtcl_data->yellow_bits, pkt_len);
 192         } else {
 193                 ASSERT(*next_action == cfg_parms->red_action);
 194                 atomic_add_64(&tswtcl_data->red_packets, 1);
 195                 atomic_add_64(&tswtcl_data->red_bits, pkt_len);
 196         }
 197         return (0);
 198 }