1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  22 /*        All Rights Reserved   */
  23 
  24 
  25 /*
  26  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 /*
  30  * Copyright 2015 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
  31  */
  32 
  33 /*
  34  * UNIX Device Driver Interface functions
  35  *
  36  * This file contains functions that are to be added to the kernel
  37  * to put the interface presented to drivers in conformance with
  38  * the DDI standard. Of the functions added to the kernel, 17 are
  39  * function equivalents of existing macros in sysmacros.h,
  40  * stream.h, and param.h
  41  *
  42  * 17 additional functions -- drv_getparm(), drv_setparm(),
  43  * getrbuf(), freerbuf(),
  44  * getemajor(), geteminor(), etoimajor(), itoemajor(), drv_usectohz(),
  45  * drv_hztousec(), drv_usecwait(), drv_priv(), and kvtoppid() --
  46  * are specified by DDI to exist in the kernel and are implemented here.
  47  *
  48  * Note that putnext() and put() are not in this file. The C version of
  49  * these routines are in uts/common/os/putnext.c and assembly versions
  50  * might exist for some architectures.
  51  */
  52 
  53 #include <sys/types.h>
  54 #include <sys/param.h>
  55 #include <sys/t_lock.h>
  56 #include <sys/time.h>
  57 #include <sys/systm.h>
  58 #include <sys/cpuvar.h>
  59 #include <sys/signal.h>
  60 #include <sys/pcb.h>
  61 #include <sys/user.h>
  62 #include <sys/errno.h>
  63 #include <sys/buf.h>
  64 #include <sys/proc.h>
  65 #include <sys/cmn_err.h>
  66 #include <sys/stream.h>
  67 #include <sys/strsubr.h>
  68 #include <sys/uio.h>
  69 #include <sys/kmem.h>
  70 #include <sys/conf.h>
  71 #include <sys/cred.h>
  72 #include <sys/vnode.h>
  73 #include <sys/file.h>
  74 #include <sys/poll.h>
  75 #include <sys/session.h>
  76 #include <sys/ddi.h>
  77 #include <sys/sunddi.h>
  78 #include <sys/esunddi.h>
  79 #include <sys/mkdev.h>
  80 #include <sys/debug.h>
  81 #include <sys/vtrace.h>
  82 
  83 /*
  84  * return internal major number corresponding to device
  85  * number (new format) argument
  86  */
  87 major_t
  88 getmajor(dev_t dev)
  89 {
  90 #ifdef _LP64
  91         return ((major_t)((dev >> NBITSMINOR64) & MAXMAJ64));
  92 #else
  93         return ((major_t)((dev >> NBITSMINOR) & MAXMAJ));
  94 #endif
  95 }
  96 
  97 /*
  98  * return external major number corresponding to device
  99  * number (new format) argument
 100  */
 101 major_t
 102 getemajor(dev_t dev)
 103 {
 104 #ifdef _LP64
 105         return ((major_t)((dev >> NBITSMINOR64) & MAXMAJ64));
 106 #else
 107         return ((major_t)((dev >> NBITSMINOR) & MAXMAJ));
 108 #endif
 109 }
 110 
 111 /*
 112  * return internal minor number corresponding to device
 113  * number (new format) argument
 114  */
 115 minor_t
 116 getminor(dev_t dev)
 117 {
 118 #ifdef _LP64
 119         return ((minor_t)(dev & MAXMIN64));
 120 #else
 121         return ((minor_t)(dev & MAXMIN));
 122 #endif
 123 }
 124 
 125 /*
 126  * return external minor number corresponding to device
 127  * number (new format) argument
 128  */
 129 minor_t
 130 geteminor(dev_t dev)
 131 {
 132 #ifdef _LP64
 133         return ((minor_t)(dev & MAXMIN64));
 134 #else
 135         return ((minor_t)(dev & MAXMIN));
 136 #endif
 137 }
 138 
 139 /*
 140  * return internal major number corresponding to external
 141  * major number.
 142  */
 143 int
 144 etoimajor(major_t emajnum)
 145 {
 146 #ifdef _LP64
 147         if (emajnum >= devcnt)
 148                 return (-1); /* invalid external major */
 149 #else
 150         if (emajnum > MAXMAJ || emajnum >= devcnt)
 151                 return (-1); /* invalid external major */
 152 #endif
 153         return ((int)emajnum);
 154 }
 155 
 156 /*
 157  * return external major number corresponding to internal
 158  * major number argument or -1 if no external major number
 159  * can be found after lastemaj that maps to the internal
 160  * major number. Pass a lastemaj val of -1 to start
 161  * the search initially. (Typical use of this function is
 162  * of the form:
 163  *
 164  *      lastemaj = -1;
 165  *      while ((lastemaj = itoemajor(imag, lastemaj)) != -1)
 166  *              { process major number }
 167  */
 168 int
 169 itoemajor(major_t imajnum, int lastemaj)
 170 {
 171         if (imajnum >= devcnt)
 172                 return (-1);
 173 
 174         /*
 175          * if lastemaj == -1 then start from beginning of
 176          * the (imaginary) MAJOR table
 177          */
 178         if (lastemaj < -1)
 179                 return (-1);
 180 
 181         /*
 182          * given that there's a 1-1 mapping of internal to external
 183          * major numbers, searching is somewhat pointless ... let's
 184          * just go there directly.
 185          */
 186         if (++lastemaj < devcnt && imajnum < devcnt)
 187                 return (imajnum);
 188         return (-1);
 189 }
 190 
 191 /*
 192  * encode external major and minor number arguments into a
 193  * new format device number
 194  */
 195 dev_t
 196 makedevice(major_t maj, minor_t minor)
 197 {
 198 #ifdef _LP64
 199         return (((dev_t)maj << NBITSMINOR64) | (minor & MAXMIN64));
 200 #else
 201         return (((dev_t)maj << NBITSMINOR) | (minor & MAXMIN));
 202 #endif
 203 }
 204 
 205 /*
 206  * cmpdev - compress new device format to old device format
 207  */
 208 o_dev_t
 209 cmpdev(dev_t dev)
 210 {
 211         major_t major_d;
 212         minor_t minor_d;
 213 
 214 #ifdef _LP64
 215         major_d = dev >> NBITSMINOR64;
 216         minor_d = dev & MAXMIN64;
 217 #else
 218         major_d = dev >> NBITSMINOR;
 219         minor_d = dev & MAXMIN;
 220 #endif
 221         if (major_d > OMAXMAJ || minor_d > OMAXMIN)
 222                 return ((o_dev_t)NODEV);
 223         return ((o_dev_t)((major_d << ONBITSMINOR) | minor_d));
 224 }
 225 
 226 dev_t
 227 expdev(dev_t dev)
 228 {
 229         major_t major_d;
 230         minor_t minor_d;
 231 
 232         major_d = ((dev >> ONBITSMINOR) & OMAXMAJ);
 233         minor_d = (dev & OMAXMIN);
 234 #ifdef _LP64
 235         return ((((dev_t)major_d << NBITSMINOR64) | minor_d));
 236 #else
 237         return ((((dev_t)major_d << NBITSMINOR) | minor_d));
 238 #endif
 239 }
 240 
 241 /*
 242  * return true (1) if the message type input is a data
 243  * message type, 0 otherwise
 244  */
 245 #undef datamsg
 246 int
 247 datamsg(unsigned char db_type)
 248 {
 249         return (db_type == M_DATA || db_type == M_PROTO ||
 250             db_type == M_PCPROTO || db_type == M_DELAY);
 251 }
 252 
 253 /*
 254  * return a pointer to the other queue in the queue pair of qp
 255  */
 256 queue_t *
 257 OTHERQ(queue_t *q)
 258 {
 259         return (_OTHERQ(q));
 260 }
 261 
 262 /*
 263  * return a pointer to the read queue in the queue pair of qp.
 264  */
 265 queue_t *
 266 RD(queue_t *q)
 267 {
 268                 return (_RD(q));
 269 
 270 }
 271 
 272 /*
 273  * return a pointer to the write queue in the queue pair of qp.
 274  */
 275 int
 276 SAMESTR(queue_t *q)
 277 {
 278         return (_SAMESTR(q));
 279 }
 280 
 281 /*
 282  * return a pointer to the write queue in the queue pair of qp.
 283  */
 284 queue_t *
 285 WR(queue_t *q)
 286 {
 287         return (_WR(q));
 288 }
 289 
 290 /*
 291  * store value of kernel parameter associated with parm
 292  */
 293 int
 294 drv_getparm(unsigned int parm, void *valuep)
 295 {
 296         proc_t  *p = curproc;
 297         time_t  now;
 298 
 299         switch (parm) {
 300         case UPROCP:
 301                 *(proc_t **)valuep = p;
 302                 break;
 303         case PPGRP:
 304                 mutex_enter(&p->p_lock);
 305                 *(pid_t *)valuep = p->p_pgrp;
 306                 mutex_exit(&p->p_lock);
 307                 break;
 308         case LBOLT:
 309                 *(clock_t *)valuep = ddi_get_lbolt();
 310                 break;
 311         case TIME:
 312                 if ((now = gethrestime_sec()) == 0) {
 313                         timestruc_t ts;
 314                         mutex_enter(&tod_lock);
 315                         ts = tod_get();
 316                         mutex_exit(&tod_lock);
 317                         *(time_t *)valuep = ts.tv_sec;
 318                 } else {
 319                         *(time_t *)valuep = now;
 320                 }
 321                 break;
 322         case PPID:
 323                 *(pid_t *)valuep = p->p_pid;
 324                 break;
 325         case PSID:
 326                 mutex_enter(&p->p_splock);
 327                 *(pid_t *)valuep = p->p_sessp->s_sid;
 328                 mutex_exit(&p->p_splock);
 329                 break;
 330         case UCRED:
 331                 *(cred_t **)valuep = CRED();
 332                 break;
 333         default:
 334                 return (-1);
 335         }
 336 
 337         return (0);
 338 }
 339 
 340 /*
 341  * set value of kernel parameter associated with parm
 342  */
 343 int
 344 drv_setparm(unsigned int parm, unsigned long value)
 345 {
 346         switch (parm) {
 347         case SYSRINT:
 348                 CPU_STATS_ADDQ(CPU, sys, rcvint, value);
 349                 break;
 350         case SYSXINT:
 351                 CPU_STATS_ADDQ(CPU, sys, xmtint, value);
 352                 break;
 353         case SYSMINT:
 354                 CPU_STATS_ADDQ(CPU, sys, mdmint, value);
 355                 break;
 356         case SYSRAWC:
 357                 CPU_STATS_ADDQ(CPU, sys, rawch, value);
 358                 break;
 359         case SYSCANC:
 360                 CPU_STATS_ADDQ(CPU, sys, canch, value);
 361                 break;
 362         case SYSOUTC:
 363                 CPU_STATS_ADDQ(CPU, sys, outch, value);
 364                 break;
 365         default:
 366                 return (-1);
 367         }
 368 
 369         return (0);
 370 }
 371 
 372 /*
 373  * allocate space for buffer header and return pointer to it.
 374  * preferred means of obtaining space for a local buf header.
 375  * returns pointer to buf upon success, NULL for failure
 376  */
 377 struct buf *
 378 getrbuf(int sleep)
 379 {
 380         struct buf *bp;
 381 
 382         bp = kmem_alloc(sizeof (struct buf), sleep);
 383         if (bp == NULL)
 384                 return (NULL);
 385         bioinit(bp);
 386 
 387         return (bp);
 388 }
 389 
 390 /*
 391  * free up space allocated by getrbuf()
 392  */
 393 void
 394 freerbuf(struct buf *bp)
 395 {
 396         biofini(bp);
 397         kmem_free(bp, sizeof (struct buf));
 398 }
 399 
 400 /*
 401  * convert byte count input to logical page units
 402  * (byte counts that are not a page-size multiple
 403  * are rounded down)
 404  */
 405 pgcnt_t
 406 btop(size_t numbytes)
 407 {
 408         return (numbytes >> PAGESHIFT);
 409 }
 410 
 411 /*
 412  * convert byte count input to logical page units
 413  * (byte counts that are not a page-size multiple
 414  * are rounded up)
 415  */
 416 pgcnt_t
 417 btopr(size_t numbytes)
 418 {
 419         return ((numbytes + PAGEOFFSET) >> PAGESHIFT);
 420 }
 421 
 422 /*
 423  * convert size in pages to bytes.
 424  */
 425 size_t
 426 ptob(pgcnt_t numpages)
 427 {
 428         return (numpages << PAGESHIFT);
 429 }
 430 
 431 #define MAXCLOCK_T LONG_MAX
 432 
 433 /*
 434  * Convert from system time units (hz) to microseconds.
 435  *
 436  * If ticks <= 0, return 0.
 437  * If converting ticks to usecs would overflow, return MAXCLOCK_T.
 438  * Otherwise, convert ticks to microseconds.
 439  */
 440 clock_t
 441 drv_hztousec(clock_t ticks)
 442 {
 443         if (ticks <= 0)
 444                 return (0);
 445 
 446         if (ticks > MAXCLOCK_T / usec_per_tick)
 447                 return (MAXCLOCK_T);
 448 
 449         return (TICK_TO_USEC(ticks));
 450 }
 451 
 452 
 453 /*
 454  * Convert from microseconds to system time units (hz), rounded up.
 455  *
 456  * If ticks <= 0, return 0.
 457  * Otherwise, convert microseconds to ticks, rounding up.
 458  */
 459 clock_t
 460 drv_usectohz(clock_t microsecs)
 461 {
 462         if (microsecs <= 0)
 463                 return (0);
 464 
 465         return (USEC_TO_TICK_ROUNDUP(microsecs));
 466 }
 467 
 468 /*
 469  * Convert from seconds to system time units (hz).
 470  *
 471  * If secs <= 0, return 0.
 472  * Otherwise, convert seconds to ticks, rounding up.
 473  */
 474 clock_t
 475 drv_sectohz(clock_t secs)
 476 {
 477         if (secs <= 0)
 478                 return (0);
 479 
 480         return (SEC_TO_TICK(secs));
 481 }
 482 
 483 #ifdef  sun
 484 /*
 485  * drv_usecwait implemented in each architecture's machine
 486  * specific code somewhere. For sparc, it is the alternate entry
 487  * to usec_delay (eventually usec_delay goes away). See
 488  * sparc/os/ml/sparc_subr.s
 489  */
 490 #endif
 491 
 492 /*
 493  * bcanputnext, canputnext assume called from timeout, bufcall,
 494  * or esballoc free routines.  since these are driven by
 495  * clock interrupts, instead of system calls the appropriate plumbing
 496  * locks have not been acquired.
 497  */
 498 int
 499 bcanputnext(queue_t *q, unsigned char band)
 500 {
 501         int     ret;
 502 
 503         claimstr(q);
 504         ret = bcanput(q->q_next, band);
 505         releasestr(q);
 506         return (ret);
 507 }
 508 
 509 int
 510 canputnext(queue_t *q)
 511 {
 512         queue_t *qofsq = q;
 513         struct stdata *stp = STREAM(q);
 514         kmutex_t *sdlock;
 515 
 516         TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_IN,
 517             "canputnext?:%p\n", q);
 518 
 519         if (stp->sd_ciputctrl != NULL) {
 520                 int ix = CPU->cpu_seqid & stp->sd_nciputctrl;
 521                 sdlock = &stp->sd_ciputctrl[ix].ciputctrl_lock;
 522                 mutex_enter(sdlock);
 523         } else
 524                 mutex_enter(sdlock = &stp->sd_reflock);
 525 
 526         /* get next module forward with a service queue */
 527         q = q->q_next->q_nfsrv;
 528         ASSERT(q != NULL);
 529 
 530         /* this is for loopback transports, they should not do a canputnext */
 531         ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(qofsq));
 532 
 533         if (!(q->q_flag & QFULL)) {
 534                 mutex_exit(sdlock);
 535                 TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_OUT,
 536                     "canputnext:%p %d", q, 1);
 537                 return (1);
 538         }
 539 
 540         if (sdlock != &stp->sd_reflock) {
 541                 mutex_exit(sdlock);
 542                 mutex_enter(&stp->sd_reflock);
 543         }
 544 
 545         /* the above is the most frequently used path */
 546         stp->sd_refcnt++;
 547         ASSERT(stp->sd_refcnt != 0); /* Wraparound */
 548         mutex_exit(&stp->sd_reflock);
 549 
 550         mutex_enter(QLOCK(q));
 551         if (q->q_flag & QFULL) {
 552                 q->q_flag |= QWANTW;
 553                 mutex_exit(QLOCK(q));
 554                 TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_OUT,
 555                     "canputnext:%p %d", q, 0);
 556                 releasestr(qofsq);
 557 
 558                 return (0);
 559         }
 560         mutex_exit(QLOCK(q));
 561         TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_OUT, "canputnext:%p %d", q, 1);
 562         releasestr(qofsq);
 563 
 564         return (1);
 565 }
 566 
 567 
 568 /*
 569  * Open has progressed to the point where it is safe to send/receive messages.
 570  *
 571  * "qprocson enables the put and service routines of the driver
 572  * or module... Prior to the call to qprocson, the put and service
 573  * routines of a newly pushed module or newly opened driver are
 574  * disabled.  For the module, messages flow around it as if it
 575  * were not present in the stream... qprocson must be called by
 576  * the first open of a module or driver after allocation and
 577  * initialization of any resource on which the put and service
 578  * routines depend."
 579  *
 580  * Note that before calling qprocson a module/driver could itself cause its
 581  * put or service procedures to be run by using put() or qenable().
 582  */
 583 void
 584 qprocson(queue_t *q)
 585 {
 586         ASSERT(q->q_flag & QREADR);
 587         /*
 588          * Do not call insertq() if it is a re-open.  But if _QINSERTING
 589          * is set, q_next will not be NULL and we need to call insertq().
 590          */
 591         if ((q->q_next == NULL && WR(q)->q_next == NULL) ||
 592             (q->q_flag & _QINSERTING))
 593                 insertq(STREAM(q), q);
 594 }
 595 
 596 /*
 597  * Close has reached a point where it can no longer allow put/service
 598  * into the queue.
 599  *
 600  * "qprocsoff disables the put and service routines of the driver
 601  * or module... When the routines are disabled in a module, messages
 602  * flow around the module as if it were not present in the stream.
 603  * qprocsoff must be called by the close routine of a driver or module
 604  * before deallocating any resources on which the driver/module's
 605  * put and service routines depend.  qprocsoff will remove the
 606  * queue's service routines from the list of service routines to be
 607  * run and waits until any concurrent put or service routines are
 608  * finished."
 609  *
 610  * Note that after calling qprocsoff a module/driver could itself cause its
 611  * put procedures to be run by using put().
 612  */
 613 void
 614 qprocsoff(queue_t *q)
 615 {
 616         ASSERT(q->q_flag & QREADR);
 617         if (q->q_flag & QWCLOSE) {
 618                 /* Called more than once */
 619                 return;
 620         }
 621         disable_svc(q);
 622         removeq(q);
 623 }
 624 
 625 /*
 626  * "freezestr() freezes the state of the entire STREAM  containing
 627  *  the  queue  pair  q.  A frozen STREAM blocks any thread
 628  *  attempting to enter any open, close, put or service  routine
 629  *  belonging  to  any  queue instance in the STREAM, and blocks
 630  *  any thread currently within the STREAM if it attempts to put
 631  *  messages  onto  or take messages off of any queue within the
 632  *  STREAM (with the sole exception  of  the  caller).   Threads
 633  *  blocked  by  this  mechanism  remain  so until the STREAM is
 634  *  thawed by a call to unfreezestr().
 635  *
 636  * Use strblock to set SQ_FROZEN in all syncqs in the stream (prevents
 637  * further entry into put, service, open, and close procedures) and
 638  * grab (and hold) all the QLOCKs in the stream (to block putq, getq etc.)
 639  *
 640  * Note: this has to be the only code that acquires one QLOCK while holding
 641  * another QLOCK (otherwise we would have locking hirarchy/ordering violations.)
 642  */
 643 void
 644 freezestr(queue_t *q)
 645 {
 646         struct stdata *stp = STREAM(q);
 647 
 648         /*
 649          * Increment refcnt to prevent q_next from changing during the strblock
 650          * as well as while the stream is frozen.
 651          */
 652         claimstr(RD(q));
 653 
 654         strblock(q);
 655         ASSERT(stp->sd_freezer == NULL);
 656         stp->sd_freezer = curthread;
 657         for (q = stp->sd_wrq; q != NULL; q = SAMESTR(q) ? q->q_next : NULL) {
 658                 mutex_enter(QLOCK(q));
 659                 mutex_enter(QLOCK(RD(q)));
 660         }
 661 }
 662 
 663 /*
 664  * Undo what freezestr did.
 665  * Have to drop the QLOCKs before the strunblock since strunblock will
 666  * potentially call other put procedures.
 667  */
 668 void
 669 unfreezestr(queue_t *q)
 670 {
 671         struct stdata *stp = STREAM(q);
 672         queue_t *q1;
 673 
 674         for (q1 = stp->sd_wrq; q1 != NULL;
 675             q1 = SAMESTR(q1) ? q1->q_next : NULL) {
 676                 mutex_exit(QLOCK(q1));
 677                 mutex_exit(QLOCK(RD(q1)));
 678         }
 679         ASSERT(stp->sd_freezer == curthread);
 680         stp->sd_freezer = NULL;
 681         strunblock(q);
 682         releasestr(RD(q));
 683 }
 684 
 685 /*
 686  * Used by open and close procedures to "sleep" waiting for messages to
 687  * arrive. Note: can only be used in open and close procedures.
 688  *
 689  * Lower the gate and let in either messages on the syncq (if there are
 690  * any) or put/service procedures.
 691  *
 692  * If the queue has an outer perimeter this will not prevent entry into this
 693  * syncq (since outer_enter does not set SQ_WRITER on the syncq that gets the
 694  * exclusive access to the outer perimeter.)
 695  *
 696  * Return 0 is the cv_wait_sig was interrupted; otherwise 1.
 697  *
 698  * It only makes sense to grab sq_putlocks for !SQ_CIOC sync queues because
 699  * otherwise put entry points were not blocked in the first place. if this is
 700  * SQ_CIOC then qwait is used to wait for service procedure to run since syncq
 701  * is always SQ_CIPUT if it is SQ_CIOC.
 702  *
 703  * Note that SQ_EXCL is dropped and SQ_WANTEXITWAKEUP set in sq_flags
 704  * atomically under sq_putlocks to make sure putnext will not miss a pending
 705  * wakeup.
 706  */
 707 int
 708 qwait_sig(queue_t *q)
 709 {
 710         syncq_t         *sq, *outer;
 711         uint_t          flags;
 712         int             ret = 1;
 713         int             is_sq_cioc;
 714 
 715         /*
 716          * Perform the same operations as a leavesq(sq, SQ_OPENCLOSE)
 717          * while detecting all cases where the perimeter is entered
 718          * so that qwait_sig can return to the caller.
 719          *
 720          * Drain the syncq if possible. Otherwise reset SQ_EXCL and
 721          * wait for a thread to leave the syncq.
 722          */
 723         sq = q->q_syncq;
 724         ASSERT(sq);
 725         is_sq_cioc = (sq->sq_type & SQ_CIOC) ? 1 : 0;
 726         ASSERT(sq->sq_outer == NULL || sq->sq_outer->sq_flags & SQ_WRITER);
 727         outer = sq->sq_outer;
 728         /*
 729          * XXX this does not work if there is only an outer perimeter.
 730          * The semantics of qwait/qwait_sig are undefined in this case.
 731          */
 732         if (outer)
 733                 outer_exit(outer);
 734 
 735         mutex_enter(SQLOCK(sq));
 736         if (is_sq_cioc == 0) {
 737                 SQ_PUTLOCKS_ENTER(sq);
 738         }
 739         flags = sq->sq_flags;
 740         /*
 741          * Drop SQ_EXCL and sq_count but hold the SQLOCK
 742          * to prevent any undetected entry and exit into the perimeter.
 743          */
 744         ASSERT(sq->sq_count > 0);
 745         sq->sq_count--;
 746 
 747         if (is_sq_cioc == 0) {
 748                 ASSERT(flags & SQ_EXCL);
 749                 flags &= ~SQ_EXCL;
 750         }
 751         /*
 752          * Unblock any thread blocked in an entersq or outer_enter.
 753          * Note: we do not unblock a thread waiting in qwait/qwait_sig,
 754          * since that could lead to livelock with two threads in
 755          * qwait for the same (per module) inner perimeter.
 756          */
 757         if (flags & SQ_WANTWAKEUP) {
 758                 cv_broadcast(&sq->sq_wait);
 759                 flags &= ~SQ_WANTWAKEUP;
 760         }
 761         sq->sq_flags = flags;
 762         if ((flags & SQ_QUEUED) && !(flags & SQ_STAYAWAY)) {
 763                 if (is_sq_cioc == 0) {
 764                         SQ_PUTLOCKS_EXIT(sq);
 765                 }
 766                 /* drain_syncq() drops SQLOCK */
 767                 drain_syncq(sq);
 768                 ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
 769                 entersq(sq, SQ_OPENCLOSE);
 770                 return (1);
 771         }
 772         /*
 773          * Sleep on sq_exitwait to only be woken up when threads leave the
 774          * put or service procedures. We can not sleep on sq_wait since an
 775          * outer_exit in a qwait running in the same outer perimeter would
 776          * cause a livelock "ping-pong" between two or more qwait'ers.
 777          */
 778         do {
 779                 sq->sq_flags |= SQ_WANTEXWAKEUP;
 780                 if (is_sq_cioc == 0) {
 781                         SQ_PUTLOCKS_EXIT(sq);
 782                 }
 783                 ret = cv_wait_sig(&sq->sq_exitwait, SQLOCK(sq));
 784                 if (is_sq_cioc == 0) {
 785                         SQ_PUTLOCKS_ENTER(sq);
 786                 }
 787         } while (ret && (sq->sq_flags & SQ_WANTEXWAKEUP));
 788         if (is_sq_cioc == 0) {
 789                 SQ_PUTLOCKS_EXIT(sq);
 790         }
 791         mutex_exit(SQLOCK(sq));
 792 
 793         /*
 794          * Re-enter the perimeters again
 795          */
 796         entersq(sq, SQ_OPENCLOSE);
 797         return (ret);
 798 }
 799 
 800 /*
 801  * Used by open and close procedures to "sleep" waiting for messages to
 802  * arrive. Note: can only be used in open and close procedures.
 803  *
 804  * Lower the gate and let in either messages on the syncq (if there are
 805  * any) or put/service procedures.
 806  *
 807  * If the queue has an outer perimeter this will not prevent entry into this
 808  * syncq (since outer_enter does not set SQ_WRITER on the syncq that gets the
 809  * exclusive access to the outer perimeter.)
 810  *
 811  * It only makes sense to grab sq_putlocks for !SQ_CIOC sync queues because
 812  * otherwise put entry points were not blocked in the first place. if this is
 813  * SQ_CIOC then qwait is used to wait for service procedure to run since syncq
 814  * is always SQ_CIPUT if it is SQ_CIOC.
 815  *
 816  * Note that SQ_EXCL is dropped and SQ_WANTEXITWAKEUP set in sq_flags
 817  * atomically under sq_putlocks to make sure putnext will not miss a pending
 818  * wakeup.
 819  */
 820 void
 821 qwait(queue_t *q)
 822 {
 823         syncq_t         *sq, *outer;
 824         uint_t          flags;
 825         int             is_sq_cioc;
 826 
 827         /*
 828          * Perform the same operations as a leavesq(sq, SQ_OPENCLOSE)
 829          * while detecting all cases where the perimeter is entered
 830          * so that qwait can return to the caller.
 831          *
 832          * Drain the syncq if possible. Otherwise reset SQ_EXCL and
 833          * wait for a thread to leave the syncq.
 834          */
 835         sq = q->q_syncq;
 836         ASSERT(sq);
 837         is_sq_cioc = (sq->sq_type & SQ_CIOC) ? 1 : 0;
 838         ASSERT(sq->sq_outer == NULL || sq->sq_outer->sq_flags & SQ_WRITER);
 839         outer = sq->sq_outer;
 840         /*
 841          * XXX this does not work if there is only an outer perimeter.
 842          * The semantics of qwait/qwait_sig are undefined in this case.
 843          */
 844         if (outer)
 845                 outer_exit(outer);
 846 
 847         mutex_enter(SQLOCK(sq));
 848         if (is_sq_cioc == 0) {
 849                 SQ_PUTLOCKS_ENTER(sq);
 850         }
 851         flags = sq->sq_flags;
 852         /*
 853          * Drop SQ_EXCL and sq_count but hold the SQLOCK
 854          * to prevent any undetected entry and exit into the perimeter.
 855          */
 856         ASSERT(sq->sq_count > 0);
 857         sq->sq_count--;
 858 
 859         if (is_sq_cioc == 0) {
 860                 ASSERT(flags & SQ_EXCL);
 861                 flags &= ~SQ_EXCL;
 862         }
 863         /*
 864          * Unblock any thread blocked in an entersq or outer_enter.
 865          * Note: we do not unblock a thread waiting in qwait/qwait_sig,
 866          * since that could lead to livelock with two threads in
 867          * qwait for the same (per module) inner perimeter.
 868          */
 869         if (flags & SQ_WANTWAKEUP) {
 870                 cv_broadcast(&sq->sq_wait);
 871                 flags &= ~SQ_WANTWAKEUP;
 872         }
 873         sq->sq_flags = flags;
 874         if ((flags & SQ_QUEUED) && !(flags & SQ_STAYAWAY)) {
 875                 if (is_sq_cioc == 0) {
 876                         SQ_PUTLOCKS_EXIT(sq);
 877                 }
 878                 /* drain_syncq() drops SQLOCK */
 879                 drain_syncq(sq);
 880                 ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
 881                 entersq(sq, SQ_OPENCLOSE);
 882                 return;
 883         }
 884         /*
 885          * Sleep on sq_exitwait to only be woken up when threads leave the
 886          * put or service procedures. We can not sleep on sq_wait since an
 887          * outer_exit in a qwait running in the same outer perimeter would
 888          * cause a livelock "ping-pong" between two or more qwait'ers.
 889          */
 890         do {
 891                 sq->sq_flags |= SQ_WANTEXWAKEUP;
 892                 if (is_sq_cioc == 0) {
 893                         SQ_PUTLOCKS_EXIT(sq);
 894                 }
 895                 cv_wait(&sq->sq_exitwait, SQLOCK(sq));
 896                 if (is_sq_cioc == 0) {
 897                         SQ_PUTLOCKS_ENTER(sq);
 898                 }
 899         } while (sq->sq_flags & SQ_WANTEXWAKEUP);
 900         if (is_sq_cioc == 0) {
 901                 SQ_PUTLOCKS_EXIT(sq);
 902         }
 903         mutex_exit(SQLOCK(sq));
 904 
 905         /*
 906          * Re-enter the perimeters again
 907          */
 908         entersq(sq, SQ_OPENCLOSE);
 909 }
 910 
 911 /*
 912  * Used for the synchronous streams entrypoints when sleeping outside
 913  * the perimeters. Must never be called from regular put entrypoint.
 914  *
 915  * There's no need to grab sq_putlocks here (which only exist for CIPUT sync
 916  * queues). If it is CIPUT sync queue put entry points were not blocked in the
 917  * first place by rwnext/infonext which are treated as put entrypoints for
 918  * permiter syncronization purposes.
 919  *
 920  * Consolidation private.
 921  */
 922 boolean_t
 923 qwait_rw(queue_t *q)
 924 {
 925         syncq_t         *sq;
 926         ulong_t         flags;
 927         boolean_t       gotsignal = B_FALSE;
 928 
 929         /*
 930          * Perform the same operations as a leavesq(sq, SQ_PUT)
 931          * while detecting all cases where the perimeter is entered
 932          * so that qwait_rw can return to the caller.
 933          *
 934          * Drain the syncq if possible. Otherwise reset SQ_EXCL and
 935          * wait for a thread to leave the syncq.
 936          */
 937         sq = q->q_syncq;
 938         ASSERT(sq);
 939 
 940         mutex_enter(SQLOCK(sq));
 941         flags = sq->sq_flags;
 942         /*
 943          * Drop SQ_EXCL and sq_count but hold the SQLOCK until to prevent any
 944          * undetected entry and exit into the perimeter.
 945          */
 946         ASSERT(sq->sq_count > 0);
 947         sq->sq_count--;
 948         if (!(sq->sq_type & SQ_CIPUT)) {
 949                 ASSERT(flags & SQ_EXCL);
 950                 flags &= ~SQ_EXCL;
 951         }
 952         /*
 953          * Unblock any thread blocked in an entersq or outer_enter.
 954          * Note: we do not unblock a thread waiting in qwait/qwait_sig,
 955          * since that could lead to livelock with two threads in
 956          * qwait for the same (per module) inner perimeter.
 957          */
 958         if (flags & SQ_WANTWAKEUP) {
 959                 cv_broadcast(&sq->sq_wait);
 960                 flags &= ~SQ_WANTWAKEUP;
 961         }
 962         sq->sq_flags = flags;
 963         if ((flags & SQ_QUEUED) && !(flags & SQ_STAYAWAY)) {
 964                 /* drain_syncq() drops SQLOCK */
 965                 drain_syncq(sq);
 966                 ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
 967                 entersq(sq, SQ_PUT);
 968                 return (B_FALSE);
 969         }
 970         /*
 971          * Sleep on sq_exitwait to only be woken up when threads leave the
 972          * put or service procedures. We can not sleep on sq_wait since an
 973          * outer_exit in a qwait running in the same outer perimeter would
 974          * cause a livelock "ping-pong" between two or more qwait'ers.
 975          */
 976         do {
 977                 sq->sq_flags |= SQ_WANTEXWAKEUP;
 978                 if (cv_wait_sig(&sq->sq_exitwait, SQLOCK(sq)) <= 0) {
 979                         sq->sq_flags &= ~SQ_WANTEXWAKEUP;
 980                         gotsignal = B_TRUE;
 981                         break;
 982                 }
 983         } while (sq->sq_flags & SQ_WANTEXWAKEUP);
 984         mutex_exit(SQLOCK(sq));
 985 
 986         /*
 987          * Re-enter the perimeters again
 988          */
 989         entersq(sq, SQ_PUT);
 990         return (gotsignal);
 991 }
 992 
 993 /*
 994  * Asynchronously upgrade to exclusive access at either the inner or
 995  * outer perimeter.
 996  */
 997 void
 998 qwriter(queue_t *q, mblk_t *mp, void (*func)(), int perim)
 999 {
1000         if (perim == PERIM_INNER)
1001                 qwriter_inner(q, mp, func);
1002         else if (perim == PERIM_OUTER)
1003                 qwriter_outer(q, mp, func);
1004         else
1005                 panic("qwriter: wrong \"perimeter\" parameter");
1006 }
1007 
1008 /*
1009  * Schedule a synchronous streams timeout
1010  */
1011 timeout_id_t
1012 qtimeout(queue_t *q, void (*func)(void *), void *arg, clock_t tim)
1013 {
1014         syncq_t         *sq;
1015         callbparams_t   *cbp;
1016         timeout_id_t    tid;
1017 
1018         sq = q->q_syncq;
1019         /*
1020          * you don't want the timeout firing before its params are set up
1021          * callbparams_alloc() acquires SQLOCK(sq)
1022          * qtimeout() can't fail and can't sleep, so panic if memory is not
1023          * available.
1024          */
1025         cbp = callbparams_alloc(sq, func, arg, KM_NOSLEEP | KM_PANIC);
1026         /*
1027          * the callbflags in the sq use the same flags. They get anded
1028          * in the callbwrapper to determine if a qun* of this callback type
1029          * is required. This is not a request to cancel.
1030          */
1031         cbp->cbp_flags = SQ_CANCEL_TOUT;
1032         /* check new timeout version return codes */
1033         tid = timeout(qcallbwrapper, cbp, tim);
1034         cbp->cbp_id = (callbparams_id_t)tid;
1035         mutex_exit(SQLOCK(sq));
1036         /* use local id because the cbp memory could be free by now */
1037         return (tid);
1038 }
1039 
1040 bufcall_id_t
1041 qbufcall(queue_t *q, size_t size, uint_t pri, void (*func)(void *), void *arg)
1042 {
1043         syncq_t         *sq;
1044         callbparams_t   *cbp;
1045         bufcall_id_t    bid;
1046 
1047         sq = q->q_syncq;
1048         /*
1049          * you don't want the timeout firing before its params are set up
1050          * callbparams_alloc() acquires SQLOCK(sq) if successful.
1051          */
1052         cbp = callbparams_alloc(sq, func, arg, KM_NOSLEEP);
1053         if (cbp == NULL)
1054                 return ((bufcall_id_t)0);
1055 
1056         /*
1057          * the callbflags in the sq use the same flags. They get anded
1058          * in the callbwrapper to determine if a qun* of this callback type
1059          * is required. This is not a request to cancel.
1060          */
1061         cbp->cbp_flags = SQ_CANCEL_BUFCALL;
1062         /* check new timeout version return codes */
1063         bid = bufcall(size, pri, qcallbwrapper, cbp);
1064         cbp->cbp_id = (callbparams_id_t)bid;
1065         if (bid == 0) {
1066                 callbparams_free(sq, cbp);
1067         }
1068         mutex_exit(SQLOCK(sq));
1069         /* use local id because the params memory could be free by now */
1070         return (bid);
1071 }
1072 
1073 /*
1074  * cancel a timeout callback which enters the inner perimeter.
1075  * cancelling of all callback types on a given syncq is serialized.
1076  * the SQ_CALLB_BYPASSED flag indicates that the callback fn did
1077  * not execute. The quntimeout return value needs to reflect this.
1078  * As with out existing callback programming model - callbacks must
1079  * be cancelled before a close completes - so ensuring that the sq
1080  * is valid when the callback wrapper is executed.
1081  */
1082 clock_t
1083 quntimeout(queue_t *q, timeout_id_t id)
1084 {
1085         syncq_t *sq = q->q_syncq;
1086         clock_t ret;
1087 
1088         mutex_enter(SQLOCK(sq));
1089         /* callbacks are processed serially on each syncq */
1090         while (sq->sq_callbflags & SQ_CALLB_CANCEL_MASK) {
1091                 sq->sq_flags |= SQ_WANTWAKEUP;
1092                 cv_wait(&sq->sq_wait, SQLOCK(sq));
1093         }
1094         sq->sq_cancelid = (callbparams_id_t)id;
1095         sq->sq_callbflags = SQ_CANCEL_TOUT;
1096         if (sq->sq_flags & SQ_WANTWAKEUP) {
1097                 cv_broadcast(&sq->sq_wait);
1098                 sq->sq_flags &= ~SQ_WANTWAKEUP;
1099         }
1100         mutex_exit(SQLOCK(sq));
1101         ret = untimeout(id);
1102         mutex_enter(SQLOCK(sq));
1103         if (ret != -1) {
1104                 /* The wrapper was never called - need to free based on id */
1105                 callbparams_free_id(sq, (callbparams_id_t)id, SQ_CANCEL_TOUT);
1106         }
1107         if (sq->sq_callbflags & SQ_CALLB_BYPASSED) {
1108                 ret = 0;        /* this was how much time left */
1109         }
1110         sq->sq_callbflags = 0;
1111         if (sq->sq_flags & SQ_WANTWAKEUP) {
1112                 cv_broadcast(&sq->sq_wait);
1113                 sq->sq_flags &= ~SQ_WANTWAKEUP;
1114         }
1115         mutex_exit(SQLOCK(sq));
1116         return (ret);
1117 }
1118 
1119 
1120 void
1121 qunbufcall(queue_t *q, bufcall_id_t id)
1122 {
1123         syncq_t *sq = q->q_syncq;
1124 
1125         mutex_enter(SQLOCK(sq));
1126         /* callbacks are processed serially on each syncq */
1127         while (sq->sq_callbflags & SQ_CALLB_CANCEL_MASK) {
1128                 sq->sq_flags |= SQ_WANTWAKEUP;
1129                 cv_wait(&sq->sq_wait, SQLOCK(sq));
1130         }
1131         sq->sq_cancelid = (callbparams_id_t)id;
1132         sq->sq_callbflags = SQ_CANCEL_BUFCALL;
1133         if (sq->sq_flags & SQ_WANTWAKEUP) {
1134                 cv_broadcast(&sq->sq_wait);
1135                 sq->sq_flags &= ~SQ_WANTWAKEUP;
1136         }
1137         mutex_exit(SQLOCK(sq));
1138         unbufcall(id);
1139         mutex_enter(SQLOCK(sq));
1140         /*
1141          * No indication from unbufcall if the callback has already run.
1142          * Always attempt to free it.
1143          */
1144         callbparams_free_id(sq, (callbparams_id_t)id, SQ_CANCEL_BUFCALL);
1145         sq->sq_callbflags = 0;
1146         if (sq->sq_flags & SQ_WANTWAKEUP) {
1147                 cv_broadcast(&sq->sq_wait);
1148                 sq->sq_flags &= ~SQ_WANTWAKEUP;
1149         }
1150         mutex_exit(SQLOCK(sq));
1151 }
1152 
1153 /*
1154  * Associate the stream with an instance of the bottom driver.  This
1155  * function is called by APIs that establish or modify the hardware
1156  * association (ppa) of an open stream.  Two examples of such
1157  * post-open(9E) APIs are the dlpi(7p) DL_ATTACH_REQ message, and the
1158  * ndd(1M) "instance=" ioctl(2).  This interface may be called from a
1159  * stream driver's wput procedure and from within syncq perimeters,
1160  * so it can't block.
1161  *
1162  * The qassociate() "model" is that it should drive attach(9E), yet it
1163  * can't really do that because driving attach(9E) is a blocking
1164  * operation.  Instead, the qassociate() implementation has complex
1165  * dependencies on the implementation behavior of other parts of the
1166  * kernel to ensure all appropriate instances (ones that have not been
1167  * made inaccessible by DR) are attached at stream open() time, and
1168  * that they will not autodetach.  The code relies on the fact that an
1169  * open() of a stream that ends up using qassociate() always occurs on
1170  * a minor node created with CLONE_DEV.  The open() comes through
1171  * clnopen() and since clnopen() calls ddi_hold_installed_driver() we
1172  * attach all instances and mark them DN_NO_AUTODETACH (given
1173  * DN_DRIVER_HELD is maintained correctly).
1174  *
1175  * Since qassociate() can't really drive attach(9E), there are corner
1176  * cases where the compromise described above leads to qassociate()
1177  * returning failure.  This can happen when administrative functions
1178  * that cause detach(9E), such as "update_drv" or "modunload -i", are
1179  * performed on the driver between the time the stream was opened and
1180  * the time its hardware association was established.  Although this can
1181  * theoretically be an arbitrary amount of time, in practice the window
1182  * is usually quite small, since applications almost always issue their
1183  * hardware association request immediately after opening the stream,
1184  * and do not typically switch association while open.  When these
1185  * corner cases occur, and qassociate() finds the requested instance
1186  * detached, it will return failure.  This failure should be propagated
1187  * to the requesting administrative application using the appropriate
1188  * post-open(9E) API error mechanism.
1189  *
1190  * All qassociate() callers are expected to check for and gracefully handle
1191  * failure return, propagating errors back to the requesting administrative
1192  * application.
1193  */
1194 int
1195 qassociate(queue_t *q, int instance)
1196 {
1197         vnode_t *vp;
1198         major_t major;
1199         dev_info_t *dip;
1200 
1201         if (instance == -1) {
1202                 ddi_assoc_queue_with_devi(q, NULL);
1203                 return (0);
1204         }
1205 
1206         vp = STREAM(q)->sd_vnode;
1207         major = getmajor(vp->v_rdev);
1208         dip = ddi_hold_devi_by_instance(major, instance,
1209             E_DDI_HOLD_DEVI_NOATTACH);
1210         if (dip == NULL)
1211                 return (-1);
1212 
1213         ddi_assoc_queue_with_devi(q, dip);
1214         ddi_release_devi(dip);
1215         return (0);
1216 }
1217 
1218 /*
1219  * This routine is the SVR4MP 'replacement' for
1220  * hat_getkpfnum.  The only major difference is
1221  * the return value for illegal addresses - since
1222  * sunm_getkpfnum() and srmmu_getkpfnum() both
1223  * return '-1' for bogus mappings, we can (more or
1224  * less) return the value directly.
1225  */
1226 ppid_t
1227 kvtoppid(caddr_t addr)
1228 {
1229         return ((ppid_t)hat_getpfnum(kas.a_hat, addr));
1230 }
1231 
1232 /*
1233  * This is used to set the timeout value for cv_timed_wait() or
1234  * cv_timedwait_sig().
1235  */
1236 void
1237 time_to_wait(clock_t *now, clock_t time)
1238 {
1239         *now = ddi_get_lbolt() + time;
1240 }