1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/strsubr.h>
  26 #include <sys/strsun.h>
  27 #include <sys/param.h>
  28 #include <sys/sysmacros.h>
  29 #include <vm/seg_map.h>
  30 #include <vm/seg_kpm.h>
  31 #include <sys/condvar_impl.h>
  32 #include <sys/sendfile.h>
  33 #include <fs/sockfs/nl7c.h>
  34 #include <fs/sockfs/nl7curi.h>
  35 #include <fs/sockfs/socktpi_impl.h>
  36 
  37 #include <inet/common.h>
  38 #include <inet/ip.h>
  39 #include <inet/ip6.h>
  40 #include <inet/tcp.h>
  41 #include <inet/led.h>
  42 #include <inet/mi.h>
  43 
  44 #include <inet/nca/ncadoorhdr.h>
  45 #include <inet/nca/ncalogd.h>
  46 #include <inet/nca/ncandd.h>
  47 
  48 #include <sys/promif.h>
  49 
  50 /*
  51  * Some externs:
  52  */
  53 
  54 extern boolean_t        nl7c_logd_enabled;
  55 extern void             nl7c_logd_log(uri_desc_t *, uri_desc_t *,
  56                             time_t, ipaddr_t);
  57 extern boolean_t        nl7c_close_addr(struct sonode *);
  58 extern struct sonode    *nl7c_addr2portso(void *);
  59 extern uri_desc_t       *nl7c_http_cond(uri_desc_t *, uri_desc_t *);
  60 
  61 /*
  62  * Various global tuneables:
  63  */
  64 
  65 clock_t         nl7c_uri_ttl = -1;      /* TTL in seconds (-1 == infinite) */
  66 
  67 boolean_t       nl7c_use_kmem = B_FALSE; /* Force use of kmem (no segmap) */
  68 
  69 uint64_t        nl7c_file_prefetch = 1; /* File cache prefetch pages */
  70 
  71 uint64_t        nl7c_uri_max = 0;       /* Maximum bytes (0 == infinite) */
  72 uint64_t        nl7c_uri_bytes = 0;     /* Bytes of kmem used by URIs */
  73 
  74 /*
  75  * Locals:
  76  */
  77 
  78 static int      uri_rd_response(struct sonode *, uri_desc_t *,
  79                     uri_rd_t *, boolean_t);
  80 static int      uri_response(struct sonode *, uri_desc_t *);
  81 
  82 /*
  83  * HTTP scheme functions called from nl7chttp.c:
  84  */
  85 
  86 boolean_t nl7c_http_request(char **, char *, uri_desc_t *, struct sonode *);
  87 boolean_t nl7c_http_response(char **, char *, uri_desc_t *, struct sonode *);
  88 boolean_t nl7c_http_cmp(void *, void *);
  89 mblk_t *nl7c_http_persist(struct sonode *);
  90 void nl7c_http_free(void *arg);
  91 void nl7c_http_init(void);
  92 
  93 /*
  94  * Counters that need to move to kstat and/or be removed:
  95  */
  96 
  97 volatile uint64_t nl7c_uri_request = 0;
  98 volatile uint64_t nl7c_uri_hit = 0;
  99 volatile uint64_t nl7c_uri_pass = 0;
 100 volatile uint64_t nl7c_uri_miss = 0;
 101 volatile uint64_t nl7c_uri_temp = 0;
 102 volatile uint64_t nl7c_uri_more = 0;
 103 volatile uint64_t nl7c_uri_data = 0;
 104 volatile uint64_t nl7c_uri_sendfilev = 0;
 105 volatile uint64_t nl7c_uri_reclaim_calls = 0;
 106 volatile uint64_t nl7c_uri_reclaim_cnt = 0;
 107 volatile uint64_t nl7c_uri_pass_urifail = 0;
 108 volatile uint64_t nl7c_uri_pass_dupbfail = 0;
 109 volatile uint64_t nl7c_uri_more_get = 0;
 110 volatile uint64_t nl7c_uri_pass_method = 0;
 111 volatile uint64_t nl7c_uri_pass_option = 0;
 112 volatile uint64_t nl7c_uri_more_eol = 0;
 113 volatile uint64_t nl7c_uri_more_http = 0;
 114 volatile uint64_t nl7c_uri_pass_http = 0;
 115 volatile uint64_t nl7c_uri_pass_addfail = 0;
 116 volatile uint64_t nl7c_uri_pass_temp = 0;
 117 volatile uint64_t nl7c_uri_expire = 0;
 118 volatile uint64_t nl7c_uri_purge = 0;
 119 volatile uint64_t nl7c_uri_NULL1 = 0;
 120 volatile uint64_t nl7c_uri_NULL2 = 0;
 121 volatile uint64_t nl7c_uri_close = 0;
 122 volatile uint64_t nl7c_uri_temp_close = 0;
 123 volatile uint64_t nl7c_uri_free = 0;
 124 volatile uint64_t nl7c_uri_temp_free = 0;
 125 volatile uint64_t nl7c_uri_temp_mk = 0;
 126 volatile uint64_t nl7c_uri_rd_EAGAIN = 0;
 127 
 128 /*
 129  * Various kmem_cache_t's:
 130  */
 131 
 132 kmem_cache_t *nl7c_uri_kmc;
 133 kmem_cache_t *nl7c_uri_rd_kmc;
 134 static kmem_cache_t *uri_desb_kmc;
 135 static kmem_cache_t *uri_segmap_kmc;
 136 
 137 static void uri_kmc_reclaim(void *);
 138 
 139 static void nl7c_uri_reclaim(void);
 140 
 141 /*
 142  * The URI hash is a dynamically sized A/B bucket hash, when the current
 143  * hash's average bucket chain length exceeds URI_HASH_AVRG a new hash of
 144  * the next P2Ps[] size is created.
 145  *
 146  * All lookups are done in the current hash then the new hash (if any),
 147  * if there is a new has then when a current hash bucket chain is examined
 148  * any uri_desc_t members will be migrated to the new hash and when the
 149  * last uri_desc_t has been migrated then the new hash will become the
 150  * current and the previous current hash will be freed leaving a single
 151  * hash.
 152  *
 153  * uri_hash_t - hash bucket (chain) type, contained in the uri_hash_ab[]
 154  * and can be accessed only after aquiring the uri_hash_access lock (for
 155  * READER or WRITER) then acquiring the lock uri_hash_t.lock, the uri_hash_t
 156  * and all linked uri_desc_t.hash members are protected. Note, a REF_HOLD()
 157  * is placed on all uri_desc_t uri_hash_t list members.
 158  *
 159  * uri_hash_access - rwlock for all uri_hash_* variables, READER for read
 160  * access and WRITER for write access. Note, WRITER is only required for
 161  * hash geometry changes.
 162  *
 163  * uri_hash_which - which uri_hash_ab[] is the current hash.
 164  *
 165  * uri_hash_n[] - the P2Ps[] index for each uri_hash_ab[].
 166  *
 167  * uri_hash_sz[] - the size for each uri_hash_ab[].
 168  *
 169  * uri_hash_cnt[] - the total uri_desc_t members for each uri_hash_ab[].
 170  *
 171  * uri_hash_overflow[] - the uri_hash_cnt[] for each uri_hash_ab[] when
 172  * a new uri_hash_ab[] needs to be created.
 173  *
 174  * uri_hash_ab[] - the uri_hash_t entries.
 175  *
 176  * uri_hash_lru[] - the last uri_hash_ab[] walked for lru reclaim.
 177  */
 178 
 179 typedef struct uri_hash_s {
 180         struct uri_desc_s       *list;          /* List of uri_t(s) */
 181         kmutex_t                lock;
 182 } uri_hash_t;
 183 
 184 #define URI_HASH_AVRG   5       /* Desired average hash chain length */
 185 #define URI_HASH_N_INIT 9       /* P2Ps[] initial index */
 186 
 187 static krwlock_t        uri_hash_access;
 188 static uint32_t         uri_hash_which = 0;
 189 static uint32_t         uri_hash_n[2] = {URI_HASH_N_INIT, 0};
 190 static uint32_t         uri_hash_sz[2] = {0, 0};
 191 static uint32_t         uri_hash_cnt[2] = {0, 0};
 192 static uint32_t         uri_hash_overflow[2] = {0, 0};
 193 static uri_hash_t       *uri_hash_ab[2] = {NULL, NULL};
 194 static uri_hash_t       *uri_hash_lru[2] = {NULL, NULL};
 195 
 196 /*
 197  * Primes for N of 3 - 24 where P is first prime less then (2^(N-1))+(2^(N-2))
 198  * these primes have been foud to be useful for prime sized hash tables.
 199  */
 200 
 201 static const int P2Ps[] = {
 202         0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,
 203         6143, 12281, 24571, 49139, 98299, 196597, 393209,
 204         786431, 1572853, 3145721, 6291449, 12582893, 0};
 205 
 206 /*
 207  * Hash macros:
 208  *
 209  *    H2A(char *cp, char *ep, char c) - convert the escaped octet (ASCII)
 210  *    hex multichar of the format "%HH" pointeded to by *cp to a char and
 211  *    return in c, *ep points to past end of (char *), on return *cp will
 212  *    point to the last char consumed.
 213  *
 214  *    URI_HASH(unsigned hix, char *cp, char *ep) - hash the char(s) from
 215  *    *cp to *ep to the unsigned hix, cp nor ep are modified.
 216  *
 217  *    URI_HASH_IX(unsigned hix, int which) - convert the hash value hix to
 218  *    a hash index 0 - (uri_hash_sz[which] - 1).
 219  *
 220  *    URI_HASH_MIGRATE(from, hp, to) - migrate the uri_hash_t *hp list
 221  *    uri_desc_t members from hash from to hash to.
 222  *
 223  *    URI_HASH_UNLINK(cur, new, hp, puri, uri) - unlink the uri_desc_t
 224  *    *uri which is a member of the uri_hash_t *hp list with a previous
 225  *    list member of *puri for the uri_hash_ab[] cur. After unlinking
 226  *    check for cur hash empty, if so make new cur. Note, as this macro
 227  *    can change a hash chain it needs to be run under hash_access as
 228  *    RW_WRITER, futher as it can change the new hash to cur any access
 229  *    to the hash state must be done after either dropping locks and
 230  *    starting over or making sure the global state is consistent after
 231  *    as before.
 232  */
 233 
 234 #define H2A(cp, ep, c) {                                                \
 235         int     _h = 2;                                                 \
 236         int     _n = 0;                                                 \
 237         char    _hc;                                                    \
 238                                                                         \
 239         while (_h > 0 && ++(cp) < (ep)) {                         \
 240                 if (_h == 1)                                            \
 241                         _n *= 0x10;                                     \
 242                 _hc = *(cp);                                            \
 243                 if (_hc >= '0' && _hc <= '9')                             \
 244                         _n += _hc - '0';                                \
 245                 else if (_hc >= 'a' || _hc <= 'f')                        \
 246                         _n += _hc - 'W';                                \
 247                 else if (_hc >= 'A' || _hc <= 'F')                        \
 248                         _n += _hc - '7';                                \
 249                 _h--;                                                   \
 250         }                                                               \
 251         (c) = _n;                                                       \
 252 }
 253 
 254 #define URI_HASH(hv, cp, ep) {                                          \
 255         char    *_s = (cp);                                             \
 256         char    _c;                                                     \
 257                                                                         \
 258         while (_s < (ep)) {                                          \
 259                 if ((_c = *_s) == '%') {                                \
 260                         H2A(_s, (ep), _c);                              \
 261                 }                                                       \
 262                 CHASH(hv, _c);                                          \
 263                 _s++;                                                   \
 264         }                                                               \
 265 }
 266 
 267 #define URI_HASH_IX(hix, which) (hix) = (hix) % (uri_hash_sz[(which)])
 268 
 269 #define URI_HASH_MIGRATE(from, hp, to) {                                \
 270         uri_desc_t      *_nuri;                                         \
 271         uint32_t        _nhix;                                          \
 272         uri_hash_t      *_nhp;                                          \
 273                                                                         \
 274         mutex_enter(&(hp)->lock);                                        \
 275         while ((_nuri = (hp)->list) != NULL) {                               \
 276                 (hp)->list = _nuri->hash;                         \
 277                 atomic_dec_32(&uri_hash_cnt[(from)]);               \
 278                 atomic_inc_32(&uri_hash_cnt[(to)]);                 \
 279                 _nhix = _nuri->hvalue;                                       \
 280                 URI_HASH_IX(_nhix, to);                                 \
 281                 _nhp = &uri_hash_ab[(to)][_nhix];                   \
 282                 mutex_enter(&_nhp->lock);                                \
 283                 _nuri->hash = _nhp->list;                         \
 284                 _nhp->list = _nuri;                                  \
 285                 _nuri->hit = 0;                                              \
 286                 mutex_exit(&_nhp->lock);                         \
 287         }                                                               \
 288         mutex_exit(&(hp)->lock);                                 \
 289 }
 290 
 291 #define URI_HASH_UNLINK(cur, new, hp, puri, uri) {                      \
 292         if ((puri) != NULL) {                                           \
 293                 (puri)->hash = (uri)->hash;                               \
 294         } else {                                                        \
 295                 (hp)->list = (uri)->hash;                         \
 296         }                                                               \
 297         if (atomic_dec_32_nv(&uri_hash_cnt[(cur)]) == 0 &&          \
 298             uri_hash_ab[(new)] != NULL) {                               \
 299                 kmem_free(uri_hash_ab[cur],                             \
 300                     sizeof (uri_hash_t) * uri_hash_sz[cur]);            \
 301                 uri_hash_ab[(cur)] = NULL;                              \
 302                 uri_hash_lru[(cur)] = NULL;                             \
 303                 uri_hash_which = (new);                                 \
 304         } else {                                                        \
 305                 uri_hash_lru[(cur)] = (hp);                             \
 306         }                                                               \
 307 }
 308 
 309 void
 310 nl7c_uri_init(void)
 311 {
 312         uint32_t        cur = uri_hash_which;
 313 
 314         rw_init(&uri_hash_access, NULL, RW_DEFAULT, NULL);
 315 
 316         uri_hash_sz[cur] = P2Ps[URI_HASH_N_INIT];
 317         uri_hash_overflow[cur] = P2Ps[URI_HASH_N_INIT] * URI_HASH_AVRG;
 318         uri_hash_ab[cur] = kmem_zalloc(sizeof (uri_hash_t) * uri_hash_sz[cur],
 319             KM_SLEEP);
 320         uri_hash_lru[cur] = uri_hash_ab[cur];
 321 
 322         nl7c_uri_kmc = kmem_cache_create("NL7C_uri_kmc", sizeof (uri_desc_t),
 323             0, NULL, NULL, uri_kmc_reclaim, NULL, NULL, 0);
 324 
 325         nl7c_uri_rd_kmc = kmem_cache_create("NL7C_uri_rd_kmc",
 326             sizeof (uri_rd_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
 327 
 328         uri_desb_kmc = kmem_cache_create("NL7C_uri_desb_kmc",
 329             sizeof (uri_desb_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
 330 
 331         uri_segmap_kmc = kmem_cache_create("NL7C_uri_segmap_kmc",
 332             sizeof (uri_segmap_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
 333 
 334         nl7c_http_init();
 335 }
 336 
 337 #define CV_SZ   16
 338 
 339 void
 340 nl7c_mi_report_hash(mblk_t *mp)
 341 {
 342         uri_hash_t      *hp, *pend;
 343         uri_desc_t      *uri;
 344         uint32_t        cur;
 345         uint32_t        new;
 346         int             n, nz, tot;
 347         uint32_t        cv[CV_SZ + 1];
 348 
 349         rw_enter(&uri_hash_access, RW_READER);
 350         cur = uri_hash_which;
 351         new = cur ? 0 : 1;
 352 next:
 353         for (n = 0; n <= CV_SZ; n++)
 354                 cv[n] = 0;
 355         nz = 0;
 356         tot = 0;
 357         hp = &uri_hash_ab[cur][0];
 358         pend = &uri_hash_ab[cur][uri_hash_sz[cur]];
 359         while (hp < pend) {
 360                 n = 0;
 361                 for (uri = hp->list; uri != NULL; uri = uri->hash) {
 362                         n++;
 363                 }
 364                 tot += n;
 365                 if (n > 0)
 366                         nz++;
 367                 if (n > CV_SZ)
 368                         n = CV_SZ;
 369                 cv[n]++;
 370                 hp++;
 371         }
 372 
 373         (void) mi_mpprintf(mp, "\nHash=%s, Buckets=%d, "
 374             "Avrg=%d\nCount by bucket:", cur != new ? "CUR" : "NEW",
 375             uri_hash_sz[cur], nz != 0 ? ((tot * 10 + 5) / nz) / 10 : 0);
 376         (void) mi_mpprintf(mp, "Free=%d", cv[0]);
 377         for (n = 1; n < CV_SZ; n++) {
 378                 int     pn = 0;
 379                 char    pv[5];
 380                 char    *pp = pv;
 381 
 382                 for (pn = n; pn < 1000; pn *= 10)
 383                         *pp++ = ' ';
 384                 *pp = 0;
 385                 (void) mi_mpprintf(mp, "%s%d=%d", pv, n, cv[n]);
 386         }
 387         (void) mi_mpprintf(mp, "Long=%d", cv[CV_SZ]);
 388 
 389         if (cur != new && uri_hash_ab[new] != NULL) {
 390                 cur = new;
 391                 goto next;
 392         }
 393         rw_exit(&uri_hash_access);
 394 }
 395 
 396 void
 397 nl7c_mi_report_uri(mblk_t *mp)
 398 {
 399         uri_hash_t      *hp;
 400         uri_desc_t      *uri;
 401         uint32_t        cur;
 402         uint32_t        new;
 403         int             ix;
 404         int             ret;
 405         char            sc;
 406 
 407         rw_enter(&uri_hash_access, RW_READER);
 408         cur = uri_hash_which;
 409         new = cur ? 0 : 1;
 410 next:
 411         for (ix = 0; ix < uri_hash_sz[cur]; ix++) {
 412                 hp = &uri_hash_ab[cur][ix];
 413                 mutex_enter(&hp->lock);
 414                 uri = hp->list;
 415                 while (uri != NULL) {
 416                         sc = *(uri->path.ep);
 417                         *(uri->path.ep) = 0;
 418                         ret = mi_mpprintf(mp, "%s: %d %d %d",
 419                             uri->path.cp, (int)uri->resplen,
 420                             (int)uri->respclen, (int)uri->count);
 421                         *(uri->path.ep) = sc;
 422                         if (ret == -1) break;
 423                         uri = uri->hash;
 424                 }
 425                 mutex_exit(&hp->lock);
 426                 if (ret == -1) break;
 427         }
 428         if (ret != -1 && cur != new && uri_hash_ab[new] != NULL) {
 429                 cur = new;
 430                 goto next;
 431         }
 432         rw_exit(&uri_hash_access);
 433 }
 434 
 435 /*
 436  * The uri_desc_t ref_t inactive function called on the last REF_RELE(),
 437  * free all resources contained in the uri_desc_t. Note, the uri_desc_t
 438  * will be freed by REF_RELE() on return.
 439  */
 440 
 441 void
 442 nl7c_uri_inactive(uri_desc_t *uri)
 443 {
 444         int64_t  bytes = 0;
 445 
 446         if (uri->tail) {
 447                 uri_rd_t *rdp = &uri->response;
 448                 uri_rd_t *free = NULL;
 449 
 450                 while (rdp) {
 451                         if (rdp->off == -1) {
 452                                 bytes += rdp->sz;
 453                                 kmem_free(rdp->data.kmem, rdp->sz);
 454                         } else {
 455                                 VN_RELE(rdp->data.vnode);
 456                         }
 457                         rdp = rdp->next;
 458                         if (free != NULL) {
 459                                 kmem_cache_free(nl7c_uri_rd_kmc, free);
 460                         }
 461                         free = rdp;
 462                 }
 463         }
 464         if (bytes) {
 465                 atomic_add_64(&nl7c_uri_bytes, -bytes);
 466         }
 467         if (uri->scheme != NULL) {
 468                 nl7c_http_free(uri->scheme);
 469         }
 470         if (uri->reqmp) {
 471                 freeb(uri->reqmp);
 472         }
 473 }
 474 
 475 /*
 476  * The reclaim is called by the kmem subsystem when kmem is running
 477  * low. More work is needed to determine the best reclaim policy, for
 478  * now we just manipulate the nl7c_uri_max global maximum bytes threshold
 479  * value using a simple arithmetic backoff of the value every time this
 480  * function is called then call uri_reclaim() to enforce it.
 481  *
 482  * Note, this value remains in place and enforced for all subsequent
 483  * URI request/response processing.
 484  *
 485  * Note, nl7c_uri_max is currently initialized to 0 or infinite such that
 486  * the first call here set it to the current uri_bytes value then backoff
 487  * from there.
 488  *
 489  * XXX how do we determine when to increase nl7c_uri_max ???
 490  */
 491 
 492 /*ARGSUSED*/
 493 static void
 494 uri_kmc_reclaim(void *arg)
 495 {
 496         uint64_t new_max;
 497 
 498         if ((new_max = nl7c_uri_max) == 0) {
 499                 /* Currently infinite, initialize to current bytes used */
 500                 nl7c_uri_max = nl7c_uri_bytes;
 501                 new_max = nl7c_uri_bytes;
 502         }
 503         if (new_max > 1) {
 504                 /* Lower max_bytes to 93% of current value */
 505                 new_max >>= 1;                    /* 50% */
 506                 new_max += (new_max >> 1);        /* 75% */
 507                 new_max += (new_max >> 2);        /* 93% */
 508                 if (new_max < nl7c_uri_max)
 509                         nl7c_uri_max = new_max;
 510                 else
 511                         nl7c_uri_max = 1;
 512         }
 513         nl7c_uri_reclaim();
 514 }
 515 
 516 /*
 517  * Delete a uri_desc_t from the URI hash.
 518  */
 519 
 520 static void
 521 uri_delete(uri_desc_t *del)
 522 {
 523         uint32_t        hix;
 524         uri_hash_t      *hp;
 525         uri_desc_t      *uri;
 526         uri_desc_t      *puri;
 527         uint32_t        cur;
 528         uint32_t        new;
 529 
 530         ASSERT(del->hash != URI_TEMP);
 531         rw_enter(&uri_hash_access, RW_WRITER);
 532         cur = uri_hash_which;
 533         new = cur ? 0 : 1;
 534 next:
 535         puri = NULL;
 536         hix = del->hvalue;
 537         URI_HASH_IX(hix, cur);
 538         hp = &uri_hash_ab[cur][hix];
 539         for (uri = hp->list; uri != NULL; uri = uri->hash) {
 540                 if (uri != del) {
 541                         puri = uri;
 542                         continue;
 543                 }
 544                 /*
 545                  * Found the URI, unlink from the hash chain,
 546                  * drop locks, ref release it.
 547                  */
 548                 URI_HASH_UNLINK(cur, new, hp, puri, uri);
 549                 rw_exit(&uri_hash_access);
 550                 REF_RELE(uri);
 551                 return;
 552         }
 553         if (cur != new && uri_hash_ab[new] != NULL) {
 554                 /*
 555                  * Not found in current hash and have a new hash so
 556                  * check the new hash next.
 557                  */
 558                 cur = new;
 559                 goto next;
 560         }
 561         rw_exit(&uri_hash_access);
 562 }
 563 
 564 /*
 565  * Add a uri_desc_t to the URI hash.
 566  */
 567 
 568 static void
 569 uri_add(uri_desc_t *uri, krw_t rwlock, boolean_t nonblocking)
 570 {
 571         uint32_t        hix;
 572         uri_hash_t      *hp;
 573         uint32_t        cur = uri_hash_which;
 574         uint32_t        new = cur ? 0 : 1;
 575 
 576         /*
 577          * Caller of uri_add() must hold the uri_hash_access rwlock.
 578          */
 579         ASSERT((rwlock == RW_READER && RW_READ_HELD(&uri_hash_access)) ||
 580             (rwlock == RW_WRITER && RW_WRITE_HELD(&uri_hash_access)));
 581         /*
 582          * uri_add() always succeeds so add a hash ref to the URI now.
 583          */
 584         REF_HOLD(uri);
 585 again:
 586         hix = uri->hvalue;
 587         URI_HASH_IX(hix, cur);
 588         if (uri_hash_ab[new] == NULL &&
 589             uri_hash_cnt[cur] < uri_hash_overflow[cur]) {
 590                 /*
 591                  * Easy case, no new hash and current hasn't overflowed,
 592                  * add URI to current hash and return.
 593                  *
 594                  * Note, the check for uri_hash_cnt[] above aren't done
 595                  * atomictally, i.e. multiple threads can be in this code
 596                  * as RW_READER and update the cnt[], this isn't a problem
 597                  * as the check is only advisory.
 598                  */
 599         fast:
 600                 atomic_inc_32(&uri_hash_cnt[cur]);
 601                 hp = &uri_hash_ab[cur][hix];
 602                 mutex_enter(&hp->lock);
 603                 uri->hash = hp->list;
 604                 hp->list = uri;
 605                 mutex_exit(&hp->lock);
 606                 rw_exit(&uri_hash_access);
 607                 return;
 608         }
 609         if (uri_hash_ab[new] == NULL) {
 610                 /*
 611                  * Need a new a or b hash, if not already RW_WRITER
 612                  * try to upgrade our lock to writer.
 613                  */
 614                 if (rwlock != RW_WRITER && ! rw_tryupgrade(&uri_hash_access)) {
 615                         /*
 616                          * Upgrade failed, we can't simple exit and reenter
 617                          * the lock as after the exit and before the reenter
 618                          * the whole world can change so just wait for writer
 619                          * then do everything again.
 620                          */
 621                         if (nonblocking) {
 622                                 /*
 623                                  * Can't block, use fast-path above.
 624                                  *
 625                                  * XXX should have a background thread to
 626                                  * handle new ab[] in this case so as to
 627                                  * not overflow the cur hash to much.
 628                                  */
 629                                 goto fast;
 630                         }
 631                         rw_exit(&uri_hash_access);
 632                         rwlock = RW_WRITER;
 633                         rw_enter(&uri_hash_access, rwlock);
 634                         cur = uri_hash_which;
 635                         new = cur ? 0 : 1;
 636                         goto again;
 637                 }
 638                 rwlock = RW_WRITER;
 639                 if (uri_hash_ab[new] == NULL) {
 640                         /*
 641                          * Still need a new hash, allocate and initialize
 642                          * the new hash.
 643                          */
 644                         uri_hash_n[new] = uri_hash_n[cur] + 1;
 645                         if (uri_hash_n[new] == 0) {
 646                                 /*
 647                                  * No larger P2Ps[] value so use current,
 648                                  * i.e. 2 of the largest are better than 1 ?
 649                                  */
 650                                 uri_hash_n[new] = uri_hash_n[cur];
 651                                 cmn_err(CE_NOTE, "NL7C: hash index overflow");
 652                         }
 653                         uri_hash_sz[new] = P2Ps[uri_hash_n[new]];
 654                         ASSERT(uri_hash_cnt[new] == 0);
 655                         uri_hash_overflow[new] = uri_hash_sz[new] *
 656                             URI_HASH_AVRG;
 657                         uri_hash_ab[new] = kmem_zalloc(sizeof (uri_hash_t) *
 658                             uri_hash_sz[new], nonblocking ? KM_NOSLEEP :
 659                             KM_SLEEP);
 660                         if (uri_hash_ab[new] == NULL) {
 661                                 /*
 662                                  * Alloc failed, use fast-path above.
 663                                  *
 664                                  * XXX should have a background thread to
 665                                  * handle new ab[] in this case so as to
 666                                  * not overflow the cur hash to much.
 667                                  */
 668                                 goto fast;
 669                         }
 670                         uri_hash_lru[new] = uri_hash_ab[new];
 671                 }
 672         }
 673         /*
 674          * Hashed against current hash so migrate any current hash chain
 675          * members, if any.
 676          *
 677          * Note, the hash chain list can be checked for a non empty list
 678          * outside of the hash chain list lock as the hash chain struct
 679          * can't be destroyed while in the uri_hash_access rwlock, worst
 680          * case is that a non empty list is found and after acquiring the
 681          * lock another thread beats us to it (i.e. migrated the list).
 682          */
 683         hp = &uri_hash_ab[cur][hix];
 684         if (hp->list != NULL) {
 685                 URI_HASH_MIGRATE(cur, hp, new);
 686         }
 687         /*
 688          * If new hash has overflowed before current hash has been
 689          * completely migrated then walk all current hash chains and
 690          * migrate list members now.
 691          */
 692         if (atomic_inc_32_nv(&uri_hash_cnt[new]) >= uri_hash_overflow[new]) {
 693                 for (hix = 0; hix < uri_hash_sz[cur]; hix++) {
 694                         hp = &uri_hash_ab[cur][hix];
 695                         if (hp->list != NULL) {
 696                                 URI_HASH_MIGRATE(cur, hp, new);
 697                         }
 698                 }
 699         }
 700         /*
 701          * Add URI to new hash.
 702          */
 703         hix = uri->hvalue;
 704         URI_HASH_IX(hix, new);
 705         hp = &uri_hash_ab[new][hix];
 706         mutex_enter(&hp->lock);
 707         uri->hash = hp->list;
 708         hp->list = uri;
 709         mutex_exit(&hp->lock);
 710         /*
 711          * Last, check to see if last cur hash chain has been
 712          * migrated, if so free cur hash and make new hash cur.
 713          */
 714         if (uri_hash_cnt[cur] == 0) {
 715                 /*
 716                  * If we don't already hold the uri_hash_access rwlock for
 717                  * RW_WRITE try to upgrade to RW_WRITE and if successful
 718                  * check again and to see if still need to do the free.
 719                  */
 720                 if ((rwlock == RW_WRITER || rw_tryupgrade(&uri_hash_access)) &&
 721                     uri_hash_cnt[cur] == 0 && uri_hash_ab[new] != 0) {
 722                         kmem_free(uri_hash_ab[cur],
 723                             sizeof (uri_hash_t) * uri_hash_sz[cur]);
 724                         uri_hash_ab[cur] = NULL;
 725                         uri_hash_lru[cur] = NULL;
 726                         uri_hash_which = new;
 727                 }
 728         }
 729         rw_exit(&uri_hash_access);
 730 }
 731 
 732 /*
 733  * Lookup a uri_desc_t in the URI hash, if found free the request uri_desc_t
 734  * and return the found uri_desc_t with a REF_HOLD() placed on it. Else, if
 735  * add B_TRUE use the request URI to create a new hash entry. Else if add
 736  * B_FALSE ...
 737  */
 738 
 739 static uri_desc_t *
 740 uri_lookup(uri_desc_t *ruri, boolean_t add, boolean_t nonblocking)
 741 {
 742         uint32_t        hix;
 743         uri_hash_t      *hp;
 744         uri_desc_t      *uri;
 745         uri_desc_t      *puri;
 746         uint32_t        cur;
 747         uint32_t        new;
 748         char            *rcp = ruri->path.cp;
 749         char            *rep = ruri->path.ep;
 750 
 751 again:
 752         rw_enter(&uri_hash_access, RW_READER);
 753         cur = uri_hash_which;
 754         new = cur ? 0 : 1;
 755 nexthash:
 756         puri = NULL;
 757         hix = ruri->hvalue;
 758         URI_HASH_IX(hix, cur);
 759         hp = &uri_hash_ab[cur][hix];
 760         mutex_enter(&hp->lock);
 761         for (uri = hp->list; uri != NULL; uri = uri->hash) {
 762                 char    *ap = uri->path.cp;
 763                 char    *bp = rcp;
 764                 char    a, b;
 765 
 766                 /* Compare paths */
 767                 while (bp < rep && ap < uri->path.ep) {
 768                         if ((a = *ap) == '%') {
 769                                 /* Escaped hex multichar, convert it */
 770                                 H2A(ap, uri->path.ep, a);
 771                         }
 772                         if ((b = *bp) == '%') {
 773                                 /* Escaped hex multichar, convert it */
 774                                 H2A(bp, rep, b);
 775                         }
 776                         if (a != b) {
 777                                 /* Char's don't match */
 778                                 goto nexturi;
 779                         }
 780                         ap++;
 781                         bp++;
 782                 }
 783                 if (bp != rep || ap != uri->path.ep) {
 784                         /* Not same length */
 785                         goto nexturi;
 786                 }
 787                 ap = uri->auth.cp;
 788                 bp = ruri->auth.cp;
 789                 if (ap != NULL) {
 790                         if (bp == NULL) {
 791                                 /* URI has auth request URI doesn't */
 792                                 goto nexturi;
 793                         }
 794                         while (bp < ruri->auth.ep && ap < uri->auth.ep) {
 795                                 if ((a = *ap) == '%') {
 796                                         /* Escaped hex multichar, convert it */
 797                                         H2A(ap, uri->path.ep, a);
 798                                 }
 799                                 if ((b = *bp) == '%') {
 800                                         /* Escaped hex multichar, convert it */
 801                                         H2A(bp, rep, b);
 802                                 }
 803                                 if (a != b) {
 804                                         /* Char's don't match */
 805                                         goto nexturi;
 806                                 }
 807                                 ap++;
 808                                 bp++;
 809                         }
 810                         if (bp != ruri->auth.ep || ap != uri->auth.ep) {
 811                                 /* Not same length */
 812                                 goto nexturi;
 813                         }
 814                 } else if (bp != NULL) {
 815                         /* URI doesn't have auth and request URI does */
 816                         goto nexturi;
 817                 }
 818                 /*
 819                  * Have a path/auth match so before any other processing
 820                  * of requested URI, check for expire or request no cache
 821                  * purge.
 822                  */
 823                 if (uri->expire >= 0 && uri->expire <= ddi_get_lbolt() ||
 824                     ruri->nocache) {
 825                         /*
 826                          * URI has expired or request specified to not use
 827                          * the cached version, unlink the URI from the hash
 828                          * chain, release all locks, release the hash ref
 829                          * on the URI, and last look it up again.
 830                          *
 831                          * Note, this will cause all variants of the named
 832                          * URI to be purged.
 833                          */
 834                         if (puri != NULL) {
 835                                 puri->hash = uri->hash;
 836                         } else {
 837                                 hp->list = uri->hash;
 838                         }
 839                         mutex_exit(&hp->lock);
 840                         atomic_dec_32(&uri_hash_cnt[cur]);
 841                         rw_exit(&uri_hash_access);
 842                         if (ruri->nocache)
 843                                 nl7c_uri_purge++;
 844                         else
 845                                 nl7c_uri_expire++;
 846                         REF_RELE(uri);
 847                         goto again;
 848                 }
 849                 if (uri->scheme != NULL) {
 850                         /*
 851                          * URI has scheme private qualifier(s), if request
 852                          * URI doesn't or if no match skip this URI.
 853                          */
 854                         if (ruri->scheme == NULL ||
 855                             ! nl7c_http_cmp(uri->scheme, ruri->scheme))
 856                                 goto nexturi;
 857                 } else if (ruri->scheme != NULL) {
 858                         /*
 859                          * URI doesn't have scheme private qualifiers but
 860                          * request URI does, no match, skip this URI.
 861                          */
 862                         goto nexturi;
 863                 }
 864                 /*
 865                  * Have a match, ready URI for return, first put a reference
 866                  * hold on the URI, if this URI is currently being processed
 867                  * then have to wait for the processing to be completed and
 868                  * redo the lookup, else return it.
 869                  */
 870                 REF_HOLD(uri);
 871                 mutex_enter(&uri->proclock);
 872                 if (uri->proc != NULL) {
 873                         /* The URI is being processed, wait for completion */
 874                         mutex_exit(&hp->lock);
 875                         rw_exit(&uri_hash_access);
 876                         if (! nonblocking &&
 877                             cv_wait_sig(&uri->waiting, &uri->proclock)) {
 878                                 /*
 879                                  * URI has been processed but things may
 880                                  * have changed while we were away so do
 881                                  * most everything again.
 882                                  */
 883                                 mutex_exit(&uri->proclock);
 884                                 REF_RELE(uri);
 885                                 goto again;
 886                         } else {
 887                                 /*
 888                                  * A nonblocking socket or an interrupted
 889                                  * cv_wait_sig() in the first case can't
 890                                  * block waiting for the processing of the
 891                                  * uri hash hit uri to complete, in both
 892                                  * cases just return failure to lookup.
 893                                  */
 894                                 mutex_exit(&uri->proclock);
 895                                 REF_RELE(uri);
 896                                 return (NULL);
 897                         }
 898                 }
 899                 mutex_exit(&uri->proclock);
 900                 uri->hit++;
 901                 mutex_exit(&hp->lock);
 902                 rw_exit(&uri_hash_access);
 903                 return (uri);
 904         nexturi:
 905                 puri = uri;
 906         }
 907         mutex_exit(&hp->lock);
 908         if (cur != new && uri_hash_ab[new] != NULL) {
 909                 /*
 910                  * Not found in current hash and have a new hash so
 911                  * check the new hash next.
 912                  */
 913                 cur = new;
 914                 goto nexthash;
 915         }
 916 add:
 917         if (! add) {
 918                 /* Lookup only so return failure */
 919                 rw_exit(&uri_hash_access);
 920                 return (NULL);
 921         }
 922         /*
 923          * URI not hashed, finish intialization of the
 924          * request URI, add it to the hash, return it.
 925          */
 926         ruri->hit = 0;
 927         ruri->expire = -1;
 928         ruri->response.sz = 0;
 929         ruri->proc = (struct sonode *)~NULL;
 930         cv_init(&ruri->waiting, NULL, CV_DEFAULT, NULL);
 931         mutex_init(&ruri->proclock, NULL, MUTEX_DEFAULT, NULL);
 932         uri_add(ruri, RW_READER, nonblocking);
 933         /* uri_add() has done rw_exit(&uri_hash_access) */
 934         return (ruri);
 935 }
 936 
 937 /*
 938  * Reclaim URIs until max cache size threshold has been reached.
 939  *
 940  * A CLOCK based reclaim modified with a history (hit counter) counter.
 941  */
 942 
 943 static void
 944 nl7c_uri_reclaim(void)
 945 {
 946         uri_hash_t      *hp, *start, *pend;
 947         uri_desc_t      *uri;
 948         uri_desc_t      *puri;
 949         uint32_t        cur;
 950         uint32_t        new;
 951 
 952         nl7c_uri_reclaim_calls++;
 953 again:
 954         rw_enter(&uri_hash_access, RW_WRITER);
 955         cur = uri_hash_which;
 956         new = cur ? 0 : 1;
 957 next:
 958         hp = uri_hash_lru[cur];
 959         start = hp;
 960         pend = &uri_hash_ab[cur][uri_hash_sz[cur]];
 961         while (nl7c_uri_bytes > nl7c_uri_max) {
 962                 puri = NULL;
 963                 for (uri = hp->list; uri != NULL; uri = uri->hash) {
 964                         if (uri->hit != 0) {
 965                                 /*
 966                                  * Decrement URI activity counter and skip.
 967                                  */
 968                                 uri->hit--;
 969                                 puri = uri;
 970                                 continue;
 971                         }
 972                         if (uri->proc != NULL) {
 973                                 /*
 974                                  * Currently being processed by a socket, skip.
 975                                  */
 976                                 continue;
 977                         }
 978                         /*
 979                          * Found a candidate, no hit(s) since added or last
 980                          * reclaim pass, unlink from it's hash chain, update
 981                          * lru scan pointer, drop lock, ref release it.
 982                          */
 983                         URI_HASH_UNLINK(cur, new, hp, puri, uri);
 984                         if (cur == uri_hash_which) {
 985                                 if (++hp == pend) {
 986                                         /* Wrap pointer */
 987                                         hp = uri_hash_ab[cur];
 988                                 }
 989                                 uri_hash_lru[cur] = hp;
 990                         }
 991                         rw_exit(&uri_hash_access);
 992                         REF_RELE(uri);
 993                         nl7c_uri_reclaim_cnt++;
 994                         goto again;
 995                 }
 996                 if (++hp == pend) {
 997                         /* Wrap pointer */
 998                         hp = uri_hash_ab[cur];
 999                 }
1000                 if (hp == start) {
1001                         if (cur != new && uri_hash_ab[new] != NULL) {
1002                                 /*
1003                                  * Done with the current hash and have a
1004                                  * new hash so check the new hash next.
1005                                  */
1006                                 cur = new;
1007                                 goto next;
1008                         }
1009                 }
1010         }
1011         rw_exit(&uri_hash_access);
1012 }
1013 
1014 /*
1015  * Called for a socket which is being freed prior to close, e.g. errored.
1016  */
1017 
1018 void
1019 nl7c_urifree(struct sonode *so)
1020 {
1021         sotpi_info_t *sti = SOTOTPI(so);
1022         uri_desc_t *uri = (uri_desc_t *)sti->sti_nl7c_uri;
1023 
1024         sti->sti_nl7c_uri = NULL;
1025         if (uri->hash != URI_TEMP) {
1026                 uri_delete(uri);
1027                 mutex_enter(&uri->proclock);
1028                 uri->proc = NULL;
1029                 if (CV_HAS_WAITERS(&uri->waiting)) {
1030                         cv_broadcast(&uri->waiting);
1031                 }
1032                 mutex_exit(&uri->proclock);
1033                 nl7c_uri_free++;
1034         } else {
1035                 /* No proclock as uri exclusively owned by so */
1036                 uri->proc = NULL;
1037                 nl7c_uri_temp_free++;
1038         }
1039         REF_RELE(uri);
1040 }
1041 
1042 /*
1043  * ...
1044  *
1045  *      < 0  need more data
1046  *
1047  *        0     parse complete
1048  *
1049  *      > 0  parse error
1050  */
1051 
1052 volatile uint64_t nl7c_resp_pfail = 0;
1053 volatile uint64_t nl7c_resp_ntemp = 0;
1054 volatile uint64_t nl7c_resp_pass = 0;
1055 
1056 static int
1057 nl7c_resp_parse(struct sonode *so, uri_desc_t *uri, char *data, int sz)
1058 {
1059         if (! nl7c_http_response(&data, &data[sz], uri, so)) {
1060                 if (data == NULL) {
1061                         /* Parse fail */
1062                         goto pfail;
1063                 }
1064                 /* More data */
1065                 data = NULL;
1066         } else if (data == NULL) {
1067                 goto pass;
1068         }
1069         if (uri->hash != URI_TEMP && uri->nocache) {
1070                 /*
1071                  * After response parse now no cache,
1072                  * delete it from cache, wakeup any
1073                  * waiters on this URI, make URI_TEMP.
1074                  */
1075                 uri_delete(uri);
1076                 mutex_enter(&uri->proclock);
1077                 if (CV_HAS_WAITERS(&uri->waiting)) {
1078                         cv_broadcast(&uri->waiting);
1079                 }
1080                 mutex_exit(&uri->proclock);
1081                 uri->hash = URI_TEMP;
1082                 nl7c_uri_temp_mk++;
1083         }
1084         if (data == NULL) {
1085                 /* More data needed */
1086                 return (-1);
1087         }
1088         /* Success */
1089         return (0);
1090 
1091 pfail:
1092         nl7c_resp_pfail++;
1093         return (EINVAL);
1094 
1095 pass:
1096         nl7c_resp_pass++;
1097         return (ENOTSUP);
1098 }
1099 
1100 /*
1101  * Called to sink application response data, the processing of the data
1102  * is the same for a cached or temp URI (i.e. a URI for which we aren't
1103  * going to cache the URI but want to parse it for detecting response
1104  * data end such that for a persistent connection we can parse the next
1105  * request).
1106  *
1107  * On return 0 is returned for sink success, > 0 on error, and < 0 on
1108  * no so URI (note, data not sinked).
1109  */
1110 
1111 int
1112 nl7c_data(struct sonode *so, uio_t *uio)
1113 {
1114         sotpi_info_t    *sti = SOTOTPI(so);
1115         uri_desc_t      *uri = (uri_desc_t *)sti->sti_nl7c_uri;
1116         iovec_t         *iov;
1117         int             cnt;
1118         int             sz = uio->uio_resid;
1119         char            *data, *alloc;
1120         char            *bp;
1121         uri_rd_t        *rdp;
1122         boolean_t       first;
1123         int             error, perror;
1124 
1125         nl7c_uri_data++;
1126 
1127         if (uri == NULL) {
1128                 /* Socket & NL7C out of sync, disable NL7C */
1129                 sti->sti_nl7c_flags = 0;
1130                 nl7c_uri_NULL1++;
1131                 return (-1);
1132         }
1133 
1134         if (sti->sti_nl7c_flags & NL7C_WAITWRITE) {
1135                 sti->sti_nl7c_flags &= ~NL7C_WAITWRITE;
1136                 first = B_TRUE;
1137         } else {
1138                 first = B_FALSE;
1139         }
1140 
1141         alloc = kmem_alloc(sz, KM_SLEEP);
1142         URI_RD_ADD(uri, rdp, sz, -1);
1143         if (rdp == NULL) {
1144                 error = ENOMEM;
1145                 goto fail;
1146         }
1147 
1148         if (uri->hash != URI_TEMP && uri->count > nca_max_cache_size) {
1149                 uri_delete(uri);
1150                 uri->hash = URI_TEMP;
1151         }
1152         data = alloc;
1153         alloc = NULL;
1154         rdp->data.kmem = data;
1155         atomic_add_64(&nl7c_uri_bytes, sz);
1156 
1157         bp = data;
1158         while (uio->uio_resid > 0) {
1159                 iov = uio->uio_iov;
1160                 if ((cnt = iov->iov_len) == 0) {
1161                         goto next;
1162                 }
1163                 cnt = MIN(cnt, uio->uio_resid);
1164                 error = xcopyin(iov->iov_base, bp, cnt);
1165                 if (error)
1166                         goto fail;
1167 
1168                 iov->iov_base += cnt;
1169                 iov->iov_len -= cnt;
1170                 uio->uio_resid -= cnt;
1171                 uio->uio_loffset += cnt;
1172                 bp += cnt;
1173         next:
1174                 uio->uio_iov++;
1175                 uio->uio_iovcnt--;
1176         }
1177 
1178         /* Successfull sink of data, response parse the data */
1179         perror = nl7c_resp_parse(so, uri, data, sz);
1180 
1181         /* Send the data out the connection */
1182         error = uri_rd_response(so, uri, rdp, first);
1183         if (error)
1184                 goto fail;
1185 
1186         /* Success */
1187         if (perror == 0 &&
1188             ((uri->respclen == URI_LEN_NOVALUE &&
1189             uri->resplen == URI_LEN_NOVALUE) ||
1190             uri->count >= uri->resplen)) {
1191                 /*
1192                  * No more data needed and no pending response
1193                  * data or current data count >= response length
1194                  * so close the URI processing for this so.
1195                  */
1196                 nl7c_close(so);
1197                 if (! (sti->sti_nl7c_flags & NL7C_SOPERSIST)) {
1198                         /* Not a persistent connection */
1199                         sti->sti_nl7c_flags = 0;
1200                 }
1201         }
1202 
1203         return (0);
1204 
1205 fail:
1206         if (alloc != NULL) {
1207                 kmem_free(alloc, sz);
1208         }
1209         sti->sti_nl7c_flags = 0;
1210         nl7c_urifree(so);
1211 
1212         return (error);
1213 }
1214 
1215 /*
1216  * Called to read data from file "*fp" at offset "*off" of length "*len"
1217  * for a maximum of "*max_rem" bytes.
1218  *
1219  * On success a pointer to the kmem_alloc()ed file data is returned, "*off"
1220  * and "*len" are updated for the acutal number of bytes read and "*max_rem"
1221  * is updated with the number of bytes remaining to be read.
1222  *
1223  * Else, "NULL" is returned.
1224  */
1225 
1226 static char *
1227 nl7c_readfile(file_t *fp, u_offset_t *off, int *len, int max, int *ret)
1228 {
1229         vnode_t *vp = fp->f_vnode;
1230         int     flg = 0;
1231         size_t  size = MIN(*len, max);
1232         char    *data;
1233         int     error;
1234         uio_t   uio;
1235         iovec_t iov;
1236 
1237         (void) VOP_RWLOCK(vp, flg, NULL);
1238 
1239         if (*off > MAXOFFSET_T) {
1240                 VOP_RWUNLOCK(vp, flg, NULL);
1241                 *ret = EFBIG;
1242                 return (NULL);
1243         }
1244 
1245         if (*off + size > MAXOFFSET_T)
1246                 size = (ssize32_t)(MAXOFFSET_T - *off);
1247 
1248         data = kmem_alloc(size, KM_SLEEP);
1249 
1250         iov.iov_base = data;
1251         iov.iov_len = size;
1252         uio.uio_loffset = *off;
1253         uio.uio_iov = &iov;
1254         uio.uio_iovcnt = 1;
1255         uio.uio_resid = size;
1256         uio.uio_segflg = UIO_SYSSPACE;
1257         uio.uio_llimit = MAXOFFSET_T;
1258         uio.uio_fmode = fp->f_flag;
1259 
1260         error = VOP_READ(vp, &uio, fp->f_flag, fp->f_cred, NULL);
1261         VOP_RWUNLOCK(vp, flg, NULL);
1262         *ret = error;
1263         if (error) {
1264                 kmem_free(data, size);
1265                 return (NULL);
1266         }
1267         *len = size;
1268         *off += size;
1269         return (data);
1270 }
1271 
1272 /*
1273  * Called to sink application response sendfilev, as with nl7c_data() above
1274  * all the data will be processed by NL7C unless there's an error.
1275  */
1276 
1277 int
1278 nl7c_sendfilev(struct sonode *so, u_offset_t *fileoff, sendfilevec_t *sfvp,
1279         int sfvc, ssize_t *xfer)
1280 {
1281         sotpi_info_t    *sti = SOTOTPI(so);
1282         uri_desc_t      *uri = (uri_desc_t *)sti->sti_nl7c_uri;
1283         file_t          *fp = NULL;
1284         vnode_t         *vp = NULL;
1285         char            *data = NULL;
1286         u_offset_t      off;
1287         int             len;
1288         int             cnt;
1289         int             total_count = 0;
1290         char            *alloc;
1291         uri_rd_t        *rdp;
1292         int             max;
1293         int             perror;
1294         int             error = 0;
1295         boolean_t       first = B_TRUE;
1296 
1297         nl7c_uri_sendfilev++;
1298 
1299         if (uri == NULL) {
1300                 /* Socket & NL7C out of sync, disable NL7C */
1301                 sti->sti_nl7c_flags = 0;
1302                 nl7c_uri_NULL2++;
1303                 return (0);
1304         }
1305 
1306         if (sti->sti_nl7c_flags & NL7C_WAITWRITE)
1307                 sti->sti_nl7c_flags &= ~NL7C_WAITWRITE;
1308 
1309         while (sfvc-- > 0) {
1310                 /*
1311                  * off - the current sfv read file offset or user address.
1312                  *
1313                  * len - the current sfv length in bytes.
1314                  *
1315                  * cnt - number of bytes kmem_alloc()ed.
1316                  *
1317                  * alloc - the kmem_alloc()ed buffer of size "cnt".
1318                  *
1319                  * data - copy of "alloc" used for post alloc references.
1320                  *
1321                  * fp - the current sfv file_t pointer.
1322                  *
1323                  * vp - the current "*vp" vnode_t pointer.
1324                  *
1325                  * Note, for "data" and "fp" and "vp" a NULL value is used
1326                  * when not allocated such that the common failure path "fail"
1327                  * is used.
1328                  */
1329                 off = sfvp->sfv_off;
1330                 len = sfvp->sfv_len;
1331                 cnt = len;
1332 
1333                 if (len == 0) {
1334                         sfvp++;
1335                         continue;
1336                 }
1337 
1338                 if (sfvp->sfv_fd == SFV_FD_SELF) {
1339                         /*
1340                          * User memory, copyin() all the bytes.
1341                          */
1342                         alloc = kmem_alloc(cnt, KM_SLEEP);
1343                         error = xcopyin((caddr_t)(uintptr_t)off, alloc, cnt);
1344                         if (error)
1345                                 goto fail;
1346                 } else {
1347                         /*
1348                          * File descriptor, prefetch some bytes.
1349                          */
1350                         if ((fp = getf(sfvp->sfv_fd)) == NULL) {
1351                                 error = EBADF;
1352                                 goto fail;
1353                         }
1354                         if ((fp->f_flag & FREAD) == 0) {
1355                                 error = EACCES;
1356                                 goto fail;
1357                         }
1358                         vp = fp->f_vnode;
1359                         if (vp->v_type != VREG) {
1360                                 error = EINVAL;
1361                                 goto fail;
1362                         }
1363                         VN_HOLD(vp);
1364 
1365                         /* Read max_rem bytes from file for prefetch */
1366                         if (nl7c_use_kmem) {
1367                                 max = cnt;
1368                         } else {
1369                                 max = MAXBSIZE * nl7c_file_prefetch;
1370                         }
1371                         alloc = nl7c_readfile(fp, &off, &cnt, max, &error);
1372                         if (alloc == NULL)
1373                                 goto fail;
1374 
1375                         releasef(sfvp->sfv_fd);
1376                         fp = NULL;
1377                 }
1378                 URI_RD_ADD(uri, rdp, cnt, -1);
1379                 if (rdp == NULL) {
1380                         error = ENOMEM;
1381                         goto fail;
1382                 }
1383                 data = alloc;
1384                 alloc = NULL;
1385                 rdp->data.kmem = data;
1386                 total_count += cnt;
1387                 if (uri->hash != URI_TEMP && total_count > nca_max_cache_size) {
1388                         uri_delete(uri);
1389                         uri->hash = URI_TEMP;
1390                 }
1391 
1392                 /* Response parse */
1393                 perror = nl7c_resp_parse(so, uri, data, len);
1394 
1395                 /* Send kmem data out the connection */
1396                 error = uri_rd_response(so, uri, rdp, first);
1397 
1398                 if (error)
1399                         goto fail;
1400 
1401                 if (sfvp->sfv_fd != SFV_FD_SELF) {
1402                         /*
1403                          * File descriptor, if any bytes left save vnode_t.
1404                          */
1405                         if (len > cnt) {
1406                                 /* More file data so add it */
1407                                 URI_RD_ADD(uri, rdp, len - cnt, off);
1408                                 if (rdp == NULL) {
1409                                         error = ENOMEM;
1410                                         goto fail;
1411                                 }
1412                                 rdp->data.vnode = vp;
1413 
1414                                 /* Send vnode data out the connection */
1415                                 error = uri_rd_response(so, uri, rdp, first);
1416                         } else {
1417                                 /* All file data fit in the prefetch */
1418                                 VN_RELE(vp);
1419                         }
1420                         *fileoff += len;
1421                         vp = NULL;
1422                 }
1423                 *xfer += len;
1424                 sfvp++;
1425 
1426                 if (first)
1427                         first = B_FALSE;
1428         }
1429         if (total_count > 0) {
1430                 atomic_add_64(&nl7c_uri_bytes, total_count);
1431         }
1432         if (perror == 0 &&
1433             ((uri->respclen == URI_LEN_NOVALUE &&
1434             uri->resplen == URI_LEN_NOVALUE) ||
1435             uri->count >= uri->resplen)) {
1436                 /*
1437                  * No more data needed and no pending response
1438                  * data or current data count >= response length
1439                  * so close the URI processing for this so.
1440                  */
1441                 nl7c_close(so);
1442                 if (! (sti->sti_nl7c_flags & NL7C_SOPERSIST)) {
1443                         /* Not a persistent connection */
1444                         sti->sti_nl7c_flags = 0;
1445                 }
1446         }
1447 
1448         return (0);
1449 
1450 fail:
1451         if (error == EPIPE)
1452                 tsignal(curthread, SIGPIPE);
1453 
1454         if (alloc != NULL)
1455                 kmem_free(data, len);
1456 
1457         if (vp != NULL)
1458                 VN_RELE(vp);
1459 
1460         if (fp != NULL)
1461                 releasef(sfvp->sfv_fd);
1462 
1463         if (total_count > 0) {
1464                 atomic_add_64(&nl7c_uri_bytes, total_count);
1465         }
1466 
1467         sti->sti_nl7c_flags = 0;
1468         nl7c_urifree(so);
1469 
1470         return (error);
1471 }
1472 
1473 /*
1474  * Called for a socket which is closing or when an application has
1475  * completed sending all the response data (i.e. for a persistent
1476  * connection called once for each completed application response).
1477  */
1478 
1479 void
1480 nl7c_close(struct sonode *so)
1481 {
1482         sotpi_info_t    *sti = SOTOTPI(so);
1483         uri_desc_t      *uri = (uri_desc_t *)sti->sti_nl7c_uri;
1484 
1485         if (uri == NULL) {
1486                 /*
1487                  * No URI being processed so might be a listen()er
1488                  * if so do any cleanup, else nothing more to do.
1489                  */
1490                 if (so->so_state & SS_ACCEPTCONN) {
1491                         (void) nl7c_close_addr(so);
1492                 }
1493                 return;
1494         }
1495         sti->sti_nl7c_uri = NULL;
1496         if (uri->hash != URI_TEMP) {
1497                 mutex_enter(&uri->proclock);
1498                 uri->proc = NULL;
1499                 if (CV_HAS_WAITERS(&uri->waiting)) {
1500                         cv_broadcast(&uri->waiting);
1501                 }
1502                 mutex_exit(&uri->proclock);
1503                 nl7c_uri_close++;
1504         } else {
1505                 /* No proclock as uri exclusively owned by so */
1506                 uri->proc = NULL;
1507                 nl7c_uri_temp_close++;
1508         }
1509         REF_RELE(uri);
1510         if (nl7c_uri_max > 0 && nl7c_uri_bytes > nl7c_uri_max) {
1511                 nl7c_uri_reclaim();
1512         }
1513 }
1514 
1515 /*
1516  * The uri_segmap_t ref_t inactive function called on the last REF_RELE(),
1517  * release the segmap mapping. Note, the uri_segmap_t will be freed by
1518  * REF_RELE() on return.
1519  */
1520 
1521 void
1522 uri_segmap_inactive(uri_segmap_t *smp)
1523 {
1524         if (!segmap_kpm) {
1525                 (void) segmap_fault(kas.a_hat, segkmap, smp->base,
1526                     smp->len, F_SOFTUNLOCK, S_OTHER);
1527         }
1528         (void) segmap_release(segkmap, smp->base, SM_DONTNEED);
1529         VN_RELE(smp->vp);
1530 }
1531 
1532 /*
1533  * The call-back for desballoc()ed mblk_t's, if a segmap mapped mblk_t
1534  * release the reference, one per desballoc() of a segmap page, if a rd_t
1535  * mapped mblk_t release the reference, one per desballoc() of a uri_desc_t,
1536  * last kmem free the uri_desb_t.
1537  */
1538 
1539 static void
1540 uri_desb_free(uri_desb_t *desb)
1541 {
1542         if (desb->segmap != NULL) {
1543                 REF_RELE(desb->segmap);
1544         }
1545         REF_RELE(desb->uri);
1546         kmem_cache_free(uri_desb_kmc, desb);
1547 }
1548 
1549 /*
1550  * Segmap map up to a page of a uri_rd_t file descriptor.
1551  */
1552 
1553 uri_segmap_t *
1554 uri_segmap_map(uri_rd_t *rdp, int bytes)
1555 {
1556         uri_segmap_t    *segmap = kmem_cache_alloc(uri_segmap_kmc, KM_SLEEP);
1557         int             len = MIN(rdp->sz, MAXBSIZE);
1558 
1559         if (len > bytes)
1560                 len = bytes;
1561 
1562         REF_INIT(segmap, 1, uri_segmap_inactive, uri_segmap_kmc);
1563         segmap->len = len;
1564         VN_HOLD(rdp->data.vnode);
1565         segmap->vp = rdp->data.vnode;
1566 
1567         segmap->base = segmap_getmapflt(segkmap, segmap->vp, rdp->off, len,
1568             segmap_kpm ? SM_FAULT : 0, S_READ);
1569 
1570         if (segmap_fault(kas.a_hat, segkmap, segmap->base, len,
1571             F_SOFTLOCK, S_READ) != 0) {
1572                 REF_RELE(segmap);
1573                 return (NULL);
1574         }
1575         return (segmap);
1576 }
1577 
1578 /*
1579  * Chop up the kernel virtual memory area *data of size *sz bytes for
1580  * a maximum of *bytes bytes into an besballoc()ed mblk_t chain using
1581  * the given template uri_desb_t *temp of max_mblk bytes per.
1582  *
1583  * The values of *data, *sz, and *bytes are updated on return, the
1584  * mblk_t chain is returned.
1585  */
1586 
1587 static mblk_t *
1588 uri_desb_chop(
1589         char            **data,
1590         size_t          *sz,
1591         int             *bytes,
1592         uri_desb_t      *temp,
1593         int             max_mblk,
1594         char            *eoh,
1595         mblk_t          *persist
1596 )
1597 {
1598         char            *ldata = *data;
1599         size_t          lsz = *sz;
1600         int             lbytes = bytes ? *bytes : lsz;
1601         uri_desb_t      *desb;
1602         mblk_t          *mp = NULL;
1603         mblk_t          *nmp, *pmp = NULL;
1604         int             msz;
1605 
1606         if (lbytes == 0 && lsz == 0)
1607                 return (NULL);
1608 
1609         while (lbytes > 0 && lsz > 0) {
1610                 msz = MIN(lbytes, max_mblk);
1611                 msz = MIN(msz, lsz);
1612                 if (persist && eoh >= ldata && eoh < &ldata[msz]) {
1613                         msz = (eoh - ldata);
1614                         pmp = persist;
1615                         persist = NULL;
1616                         if (msz == 0) {
1617                                 nmp = pmp;
1618                                 pmp = NULL;
1619                                 goto zero;
1620                         }
1621                 }
1622                 desb = kmem_cache_alloc(uri_desb_kmc, KM_SLEEP);
1623                 REF_HOLD(temp->uri);
1624                 if (temp->segmap) {
1625                         REF_HOLD(temp->segmap);
1626                 }
1627                 bcopy(temp, desb, sizeof (*desb));
1628                 desb->frtn.free_arg = (caddr_t)desb;
1629                 nmp = desballoc((uchar_t *)ldata, msz, BPRI_HI, &desb->frtn);
1630                 if (nmp == NULL) {
1631                         if (temp->segmap) {
1632                                 REF_RELE(temp->segmap);
1633                         }
1634                         REF_RELE(temp->uri);
1635                         if (mp != NULL) {
1636                                 mp->b_next = NULL;
1637                                 freemsg(mp);
1638                         }
1639                         if (persist != NULL) {
1640                                 freeb(persist);
1641                         }
1642                         return (NULL);
1643                 }
1644                 nmp->b_wptr += msz;
1645         zero:
1646                 if (mp != NULL) {
1647                         mp->b_next->b_cont = nmp;
1648                 } else {
1649                         mp = nmp;
1650                 }
1651                 if (pmp != NULL) {
1652                         nmp->b_cont = pmp;
1653                         nmp = pmp;
1654                         pmp = NULL;
1655                 }
1656                 mp->b_next = nmp;
1657                 ldata += msz;
1658                 lsz -= msz;
1659                 lbytes -= msz;
1660         }
1661         *data = ldata;
1662         *sz = lsz;
1663         if (bytes)
1664                 *bytes = lbytes;
1665         return (mp);
1666 }
1667 
1668 /*
1669  * Experimential noqwait (i.e. no canput()/qwait() checks), just send
1670  * the entire mblk_t chain down without flow-control checks.
1671  */
1672 
1673 static int
1674 kstrwritempnoqwait(struct vnode *vp, mblk_t *mp)
1675 {
1676         struct stdata *stp;
1677         int error = 0;
1678 
1679         ASSERT(vp->v_stream);
1680         stp = vp->v_stream;
1681 
1682         /* Fast check of flags before acquiring the lock */
1683         if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
1684                 mutex_enter(&stp->sd_lock);
1685                 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0);
1686                 mutex_exit(&stp->sd_lock);
1687                 if (error != 0) {
1688                         if (!(stp->sd_flag & STPLEX) &&
1689                             (stp->sd_wput_opt & SW_SIGPIPE)) {
1690                                 error = EPIPE;
1691                         }
1692                         return (error);
1693                 }
1694         }
1695         putnext(stp->sd_wrq, mp);
1696         return (0);
1697 }
1698 
1699 /*
1700  * Send the URI uri_desc_t *uri response uri_rd_t *rdp out the socket_t *so.
1701  */
1702 
1703 static int
1704 uri_rd_response(struct sonode *so,
1705     uri_desc_t *uri,
1706     uri_rd_t *rdp,
1707     boolean_t first)
1708 {
1709         vnode_t         *vp = SOTOV(so);
1710         int             max_mblk = (int)vp->v_stream->sd_maxblk;
1711         int             wsz;
1712         mblk_t          *mp, *wmp, *persist;
1713         int             write_bytes;
1714         uri_rd_t        rd;
1715         uri_desb_t      desb;
1716         uri_segmap_t    *segmap = NULL;
1717         char            *segmap_data;
1718         size_t          segmap_sz;
1719         int             error;
1720         int             fflg = ((so->so_state & SS_NDELAY) ? FNDELAY : 0) |
1721             ((so->so_state & SS_NONBLOCK) ? FNONBLOCK : 0);
1722 
1723 
1724         /* Initialize template uri_desb_t */
1725         desb.frtn.free_func = uri_desb_free;
1726         desb.frtn.free_arg = NULL;
1727         desb.uri = uri;
1728 
1729         /* Get a local copy of the rd_t */
1730         bcopy(rdp, &rd, sizeof (rd));
1731         do {
1732                 if (first) {
1733                         /*
1734                          * For first kstrwrite() enough data to get
1735                          * things going, note non blocking version of
1736                          * kstrwrite() will be used below.
1737                          */
1738                         write_bytes = P2ROUNDUP((max_mblk * 4),
1739                             MAXBSIZE * nl7c_file_prefetch);
1740                 } else {
1741                         if ((write_bytes = so->so_sndbuf) == 0)
1742                                 write_bytes = vp->v_stream->sd_qn_maxpsz;
1743                         ASSERT(write_bytes > 0);
1744                         write_bytes = P2ROUNDUP(write_bytes, MAXBSIZE);
1745                 }
1746                 /*
1747                  * Chop up to a write_bytes worth of data.
1748                  */
1749                 wmp = NULL;
1750                 wsz = write_bytes;
1751                 do {
1752                         if (rd.sz == 0)
1753                                 break;
1754                         if (rd.off == -1) {
1755                                 if (uri->eoh >= rd.data.kmem &&
1756                                     uri->eoh < &rd.data.kmem[rd.sz]) {
1757                                         persist = nl7c_http_persist(so);
1758                                 } else {
1759                                         persist = NULL;
1760                                 }
1761                                 desb.segmap = NULL;
1762                                 mp = uri_desb_chop(&rd.data.kmem, &rd.sz,
1763                                     &wsz, &desb, max_mblk, uri->eoh, persist);
1764                                 if (mp == NULL) {
1765                                         error = ENOMEM;
1766                                         goto invalidate;
1767                                 }
1768                         } else {
1769                                 if (segmap == NULL) {
1770                                         segmap = uri_segmap_map(&rd,
1771                                             write_bytes);
1772                                         if (segmap == NULL) {
1773                                                 error = ENOMEM;
1774                                                 goto invalidate;
1775                                         }
1776                                         desb.segmap = segmap;
1777                                         segmap_data = segmap->base;
1778                                         segmap_sz = segmap->len;
1779                                 }
1780                                 mp = uri_desb_chop(&segmap_data, &segmap_sz,
1781                                     &wsz, &desb, max_mblk, NULL, NULL);
1782                                 if (mp == NULL) {
1783                                         error = ENOMEM;
1784                                         goto invalidate;
1785                                 }
1786                                 if (segmap_sz == 0) {
1787                                         rd.sz -= segmap->len;
1788                                         rd.off += segmap->len;
1789                                         REF_RELE(segmap);
1790                                         segmap = NULL;
1791                                 }
1792                         }
1793                         if (wmp == NULL) {
1794                                 wmp = mp;
1795                         } else {
1796                                 wmp->b_next->b_cont = mp;
1797                                 wmp->b_next = mp->b_next;
1798                                 mp->b_next = NULL;
1799                         }
1800                 } while (wsz > 0 && rd.sz > 0);
1801 
1802                 wmp->b_next = NULL;
1803                 if (first) {
1804                         /* First kstrwrite(), use noqwait */
1805                         if ((error = kstrwritempnoqwait(vp, wmp)) != 0)
1806                                 goto invalidate;
1807                         /*
1808                          * For the rest of the kstrwrite()s use SO_SNDBUF
1809                          * worth of data at a time, note these kstrwrite()s
1810                          * may (will) block one or more times.
1811                          */
1812                         first = B_FALSE;
1813                 } else {
1814                         if ((error = kstrwritemp(vp, wmp, fflg)) != 0) {
1815                                 if (error == EAGAIN) {
1816                                         nl7c_uri_rd_EAGAIN++;
1817                                         if ((error =
1818                                             kstrwritempnoqwait(vp, wmp)) != 0)
1819                                                 goto invalidate;
1820                                 } else
1821                                         goto invalidate;
1822                         }
1823                 }
1824         } while (rd.sz > 0);
1825 
1826         return (0);
1827 
1828 invalidate:
1829         if (segmap) {
1830                 REF_RELE(segmap);
1831         }
1832         if (wmp)
1833                 freemsg(wmp);
1834 
1835         return (error);
1836 }
1837 
1838 /*
1839  * Send the URI uri_desc_t *uri response out the socket_t *so.
1840  */
1841 
1842 static int
1843 uri_response(struct sonode *so, uri_desc_t *uri)
1844 {
1845         uri_rd_t        *rdp = &uri->response;
1846         boolean_t       first = B_TRUE;
1847         int             error;
1848 
1849         while (rdp != NULL) {
1850                 error = uri_rd_response(so, uri, rdp, first);
1851                 if (error != 0) {
1852                         goto invalidate;
1853                 }
1854                 first = B_FALSE;
1855                 rdp = rdp->next;
1856         }
1857         return (0);
1858 
1859 invalidate:
1860         if (uri->hash != URI_TEMP)
1861                 uri_delete(uri);
1862         return (error);
1863 }
1864 
1865 /*
1866  * The pchars[] array is indexed by a char to determine if it's a
1867  * valid URI path component chararcter where:
1868  *
1869  *    pchar       = unreserved | escaped |
1870  *                  ":" | "@" | "&" | "=" | "+" | "$" | ","
1871  *
1872  *    unreserved  = alphanum | mark
1873  *
1874  *    alphanum    = alpha | digit
1875  *
1876  *    alpha       = lowalpha | upalpha
1877  *
1878  *    lowalpha    = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" |
1879  *                  "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" |
1880  *                  "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" |
1881  *                  "y" | "z"
1882  *
1883  *    upalpha     = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" |
1884  *                  "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" |
1885  *                  "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" |
1886  *                  "Y" | "Z"
1887  *
1888  *    digit       = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
1889  *                  "8" | "9"
1890  *
1891  *    mark        = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
1892  *
1893  *    escaped     = "%" hex hex
1894  *    hex         = digit | "A" | "B" | "C" | "D" | "E" | "F" |
1895  *                  "a" | "b" | "c" | "d" | "e" | "f"
1896  */
1897 
1898 static char pchars[] = {
1899     0, 0, 0, 0, 0, 0, 0, 0,     /* 0x00 - 0x07 */
1900     0, 0, 0, 0, 0, 0, 0, 0,     /* 0x08 - 0x0F */
1901     0, 0, 0, 0, 0, 0, 0, 0,     /* 0x10 - 0x17 */
1902     0, 0, 0, 0, 0, 0, 0, 0,     /* 0x18 - 0x1F */
1903     0, 1, 0, 0, 1, 1, 1, 1,     /* 0x20 - 0x27 */
1904     0, 0, 1, 1, 1, 1, 1, 1,     /* 0x28 - 0x2F */
1905     1, 1, 1, 1, 1, 1, 1, 1,     /* 0x30 - 0x37 */
1906     1, 1, 1, 0, 0, 1, 0, 0,     /* 0x38 - 0x3F */
1907     1, 1, 1, 1, 1, 1, 1, 1,     /* 0x40 - 0x47 */
1908     1, 1, 1, 1, 1, 1, 1, 1,     /* 0x48 - 0x4F */
1909     1, 1, 1, 1, 1, 1, 1, 1,     /* 0x50 - 0x57 */
1910     1, 1, 1, 0, 0, 0, 0, 1,     /* 0x58 - 0x5F */
1911     0, 1, 1, 1, 1, 1, 1, 1,     /* 0x60 - 0x67 */
1912     1, 1, 1, 1, 1, 1, 1, 1,     /* 0x68 - 0x6F */
1913     1, 1, 1, 1, 1, 1, 1, 1,     /* 0x70 - 0x77 */
1914     1, 1, 1, 0, 0, 0, 1, 0      /* 0x78 - 0x7F */
1915 };
1916 
1917 #define PCHARS_MASK 0x7F
1918 
1919 /*
1920  * This is the main L7 request message parse, we are called each time
1921  * new data is availble for a socket, each time a single buffer of the
1922  * entire message to date is given.
1923  *
1924  * Here we parse the request looking for the URI, parse it, and if a
1925  * supported scheme call the scheme parser to commplete the parse of any
1926  * headers which may further qualify the identity of the requested object
1927  * then lookup it up in the URI hash.
1928  *
1929  * Return B_TRUE for more processing.
1930  *
1931  * Note, at this time the parser supports the generic message format as
1932  * specified in RFC 822 with potentional limitations as specified in RFC
1933  * 2616 for HTTP messages.
1934  *
1935  * Note, the caller supports an mblk_t chain, for now the parser(s)
1936  * require the complete header in a single mblk_t. This is the common
1937  * case and certainly for high performance environments, if at a future
1938  * date mblk_t chains are important the parse can be reved to process
1939  * mblk_t chains.
1940  */
1941 
1942 boolean_t
1943 nl7c_parse(struct sonode *so, boolean_t nonblocking, boolean_t *ret)
1944 {
1945         sotpi_info_t *sti = SOTOTPI(so);
1946         char    *cp = (char *)sti->sti_nl7c_rcv_mp->b_rptr;
1947         char    *ep = (char *)sti->sti_nl7c_rcv_mp->b_wptr;
1948         char    *get = "GET ";
1949         char    *post = "POST ";
1950         char    c;
1951         char    *uris;
1952         uri_desc_t *uri = NULL;
1953         uri_desc_t *ruri = NULL;
1954         mblk_t  *reqmp;
1955         uint32_t hv = 0;
1956 
1957         if ((reqmp = dupb(sti->sti_nl7c_rcv_mp)) == NULL) {
1958                 nl7c_uri_pass_dupbfail++;
1959                 goto pass;
1960         }
1961         /*
1962          * Allocate and initialize minimumal state for the request
1963          * uri_desc_t, in the cache hit case this uri_desc_t will
1964          * be freed.
1965          */
1966         uri = kmem_cache_alloc(nl7c_uri_kmc, KM_SLEEP);
1967         REF_INIT(uri, 1, nl7c_uri_inactive, nl7c_uri_kmc);
1968         uri->hash = NULL;
1969         uri->tail = NULL;
1970         uri->scheme = NULL;
1971         uri->count = 0;
1972         uri->reqmp = reqmp;
1973 
1974         /*
1975          * Set request time to current time.
1976          */
1977         sti->sti_nl7c_rtime = gethrestime_sec();
1978 
1979         /*
1980          * Parse the Request-Line for the URI.
1981          *
1982          * For backwards HTTP version compatable reasons skip any leading
1983          * CRLF (or CR or LF) line terminator(s) preceding Request-Line.
1984          */
1985         while (cp < ep && (*cp == '\r' || *cp == '\n')) {
1986                 cp++;
1987         }
1988         while (cp < ep && *get == *cp) {
1989                 get++;
1990                 cp++;
1991         }
1992         if (*get != 0) {
1993                 /* Note a "GET", check for "POST" */
1994                 while (cp < ep && *post == *cp) {
1995                         post++;
1996                         cp++;
1997                 }
1998                 if (*post != 0) {
1999                         if (cp == ep) {
2000                                 nl7c_uri_more_get++;
2001                                 goto more;
2002                         }
2003                         /* Not a "GET" or a "POST", just pass */
2004                         nl7c_uri_pass_method++;
2005                         goto pass;
2006                 }
2007                 /* "POST", don't cache but still may want to parse */
2008                 uri->hash = URI_TEMP;
2009         }
2010         /*
2011          * Skip over URI path char(s) and save start and past end pointers.
2012          */
2013         uris = cp;
2014         while (cp < ep && (c = *cp) != ' ' && c != '\r') {
2015                 if (c == '?') {
2016                         /* Don't cache but still may want to parse */
2017                         uri->hash = URI_TEMP;
2018                 }
2019                 CHASH(hv, c);
2020                 cp++;
2021         }
2022         if (c != '\r' && cp == ep) {
2023                 nl7c_uri_more_eol++;
2024                 goto more;
2025         }
2026         /*
2027          * Request-Line URI parsed, pass the rest of the request on
2028          * to the the http scheme parse.
2029          */
2030         uri->path.cp = uris;
2031         uri->path.ep = cp;
2032         uri->hvalue = hv;
2033         if (! nl7c_http_request(&cp, ep, uri, so) || cp == NULL) {
2034                 /*
2035                  * Parse not successful or pass on request, the pointer
2036                  * to the parse pointer "cp" is overloaded such that ! NULL
2037                  * for more data and NULL for bad parse of request or pass.
2038                  */
2039                 if (cp != NULL) {
2040                         nl7c_uri_more_http++;
2041                         goto more;
2042                 }
2043                 nl7c_uri_pass_http++;
2044                 goto pass;
2045         }
2046         if (uri->nocache) {
2047                 uri->hash = URI_TEMP;
2048                 (void) uri_lookup(uri, B_FALSE, nonblocking);
2049         } else if (uri->hash == URI_TEMP) {
2050                 uri->nocache = B_TRUE;
2051                 (void) uri_lookup(uri, B_FALSE, nonblocking);
2052         }
2053 
2054         if (uri->hash == URI_TEMP) {
2055                 if (sti->sti_nl7c_flags & NL7C_SOPERSIST) {
2056                         /* Temporary URI so skip hash processing */
2057                         nl7c_uri_request++;
2058                         nl7c_uri_temp++;
2059                         goto temp;
2060                 }
2061                 /* Not persistent so not interested in the response */
2062                 nl7c_uri_pass_temp++;
2063                 goto pass;
2064         }
2065         /*
2066          * Check the URI hash for a cached response, save the request
2067          * uri in case we need it below.
2068          */
2069         ruri = uri;
2070         if ((uri = uri_lookup(uri, B_TRUE, nonblocking)) == NULL) {
2071                 /*
2072                  * Failed to lookup due to nonblocking wait required,
2073                  * interrupted cv_wait_sig(), KM_NOSLEEP memory alloc
2074                  * failure, ... Just pass on this request.
2075                  */
2076                 nl7c_uri_pass_addfail++;
2077                 goto pass;
2078         }
2079         nl7c_uri_request++;
2080         if (uri->response.sz > 0) {
2081                 /*
2082                  * We have the response cached, update recv mblk rptr
2083                  * to reflect the data consumed in parse.
2084                  */
2085                 mblk_t  *mp = sti->sti_nl7c_rcv_mp;
2086 
2087                 if (cp == (char *)mp->b_wptr) {
2088                         sti->sti_nl7c_rcv_mp = mp->b_cont;
2089                         mp->b_cont = NULL;
2090                         freeb(mp);
2091                 } else {
2092                         mp->b_rptr = (unsigned char *)cp;
2093                 }
2094                 nl7c_uri_hit++;
2095                 /* If logging enabled log request */
2096                 if (nl7c_logd_enabled) {
2097                         ipaddr_t faddr;
2098 
2099                         if (so->so_family == AF_INET) {
2100                                 /* Only support IPv4 addrs */
2101                                 faddr = ((struct sockaddr_in *)
2102                                     sti->sti_faddr_sa) ->sin_addr.s_addr;
2103                         } else {
2104                                 faddr = 0;
2105                         }
2106                         /* XXX need to pass response type, e.g. 200, 304 */
2107                         nl7c_logd_log(ruri, uri, sti->sti_nl7c_rtime, faddr);
2108                 }
2109 
2110                 /* If conditional request check for substitute response */
2111                 if (ruri->conditional) {
2112                         uri = nl7c_http_cond(ruri, uri);
2113                 }
2114 
2115                 /*
2116                  * Release reference on request URI, send the response out
2117                  * the socket, release reference on response uri, set the
2118                  * *ret value to B_TRUE to indicate request was consumed
2119                  * then return B_FALSE to indcate no more data needed.
2120                  */
2121                 REF_RELE(ruri);
2122                 (void) uri_response(so, uri);
2123                 REF_RELE(uri);
2124                 *ret = B_TRUE;
2125                 return (B_FALSE);
2126         }
2127         /*
2128          * Miss the cache, the request URI is in the cache waiting for
2129          * application write-side data to fill it.
2130          */
2131         nl7c_uri_miss++;
2132 temp:
2133         /*
2134          * A miss or temp URI for which response data is needed, link
2135          * uri to so and so to uri, set WAITWRITE in the so such that
2136          * read-side processing is suspended (so the next read() gets
2137          * the request data) until a write() is processed by NL7C.
2138          *
2139          * Note, sti->sti_nl7c_uri now owns the REF_INIT() ref.
2140          */
2141         uri->proc = so;
2142         sti->sti_nl7c_uri = uri;
2143         sti->sti_nl7c_flags |= NL7C_WAITWRITE;
2144         *ret = B_FALSE;
2145         return (B_FALSE);
2146 
2147 more:
2148         /* More data is needed, note fragmented recv not supported */
2149         nl7c_uri_more++;
2150 
2151 pass:
2152         /* Pass on this request */
2153         nl7c_uri_pass++;
2154         nl7c_uri_request++;
2155         if (ruri != NULL) {
2156                 REF_RELE(ruri);
2157         }
2158         if (uri) {
2159                 REF_RELE(uri);
2160         }
2161         sti->sti_nl7c_flags = 0;
2162         *ret = B_FALSE;
2163         return (B_FALSE);
2164 }