swapping-v2 Wdiff usr/src/uts/common/disp/thread.c

Print this page

patch delete-t_stime
patch remove-load-flag
patch remove-dont-swap-flag

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/disp/thread.c
          +++ new/usr/src/uts/common/disp/thread.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
  25   25   */
  26   26  
  27   27  #include <sys/types.h>
  28   28  #include <sys/param.h>
  29   29  #include <sys/sysmacros.h>
  30   30  #include <sys/signal.h>
  31   31  #include <sys/stack.h>
  32   32  #include <sys/pcb.h>
  33   33  #include <sys/user.h>
  34   34  #include <sys/systm.h>
  35   35  #include <sys/sysinfo.h>
  36   36  #include <sys/errno.h>
  37   37  #include <sys/cmn_err.h>
  38   38  #include <sys/cred.h>
  39   39  #include <sys/resource.h>
  40   40  #include <sys/task.h>
  41   41  #include <sys/project.h>
  42   42  #include <sys/proc.h>
  43   43  #include <sys/debug.h>
  44   44  #include <sys/disp.h>
  45   45  #include <sys/class.h>
  46   46  #include <vm/seg_kmem.h>
  47   47  #include <vm/seg_kp.h>
  48   48  #include <sys/machlock.h>
  49   49  #include <sys/kmem.h>
  50   50  #include <sys/varargs.h>
  51   51  #include <sys/turnstile.h>
  52   52  #include <sys/poll.h>
  53   53  #include <sys/vtrace.h>
  54   54  #include <sys/callb.h>
  55   55  #include <c2/audit.h>
  56   56  #include <sys/tnf.h>
  57   57  #include <sys/sobject.h>
  58   58  #include <sys/cpupart.h>
  59   59  #include <sys/pset.h>
  60   60  #include <sys/door.h>
  61   61  #include <sys/spl.h>
  62   62  #include <sys/copyops.h>
  63   63  #include <sys/rctl.h>
  64   64  #include <sys/brand.h>
  65   65  #include <sys/pool.h>
  66   66  #include <sys/zone.h>
  67   67  #include <sys/tsol/label.h>
  68   68  #include <sys/tsol/tndb.h>
  69   69  #include <sys/cpc_impl.h>
  70   70  #include <sys/sdt.h>
  71   71  #include <sys/reboot.h>
  72   72  #include <sys/kdi.h>
  73   73  #include <sys/schedctl.h>
  74   74  #include <sys/waitq.h>
  75   75  #include <sys/cpucaps.h>
  76   76  #include <sys/kiconv.h>
  77   77  
  78   78  struct kmem_cache *thread_cache;        /* cache of free threads */
  79   79  struct kmem_cache *lwp_cache;           /* cache of free lwps */
  80   80  struct kmem_cache *turnstile_cache;     /* cache of free turnstiles */
  81   81  
  82   82  /*
  83   83   * allthreads is only for use by kmem_readers.  All kernel loops can use
  84   84   * the current thread as a start/end point.
  85   85   */
  86   86  static kthread_t *allthreads = &t0;     /* circular list of all threads */
  87   87  
  88   88  static kcondvar_t reaper_cv;            /* synchronization var */
  89   89  kthread_t       *thread_deathrow;       /* circular list of reapable threads */
  90   90  kthread_t       *lwp_deathrow;          /* circular list of reapable threads */
  91   91  kmutex_t        reaplock;               /* protects lwp and thread deathrows */
  92   92  int     thread_reapcnt = 0;             /* number of threads on deathrow */
  93   93  int     lwp_reapcnt = 0;                /* number of lwps on deathrow */
  94   94  int     reaplimit = 16;                 /* delay reaping until reaplimit */
  95   95  
  96   96  thread_free_lock_t      *thread_free_lock;
  97   97                                          /* protects tick thread from reaper */
  98   98  
  99   99  extern int nthread;
 100  100  
 101  101  /* System Scheduling classes. */
 102  102  id_t    syscid;                         /* system scheduling class ID */
 103  103  id_t    sysdccid = CLASS_UNUSED;        /* reset when SDC loads */
 104  104  
 105  105  void    *segkp_thread;                  /* cookie for segkp pool */
 106  106  
 107  107  int lwp_cache_sz = 32;
 108  108  int t_cache_sz = 8;
 109  109  static kt_did_t next_t_id = 1;
 110  110  
 111  111  /* Default mode for thread binding to CPUs and processor sets */
 112  112  int default_binding_mode = TB_ALLHARD;
 113  113  
 114  114  /*
 115  115   * Min/Max stack sizes for stack size parameters
 116  116   */
 117  117  #define MAX_STKSIZE     (32 * DEFAULTSTKSZ)
 118  118  #define MIN_STKSIZE     DEFAULTSTKSZ
 119  119  
 120  120  /*
 121  121   * default_stksize overrides lwp_default_stksize if it is set.
 122  122   */
 123  123  int     default_stksize;
 124  124  int     lwp_default_stksize;
 125  125  
 126  126  static zone_key_t zone_thread_key;
 127  127  
 128  128  unsigned int kmem_stackinfo;            /* stackinfo feature on-off */
 129  129  kmem_stkinfo_t *kmem_stkinfo_log;       /* stackinfo circular log */
 130  130  static kmutex_t kmem_stkinfo_lock;      /* protects kmem_stkinfo_log */
 131  131  
 132  132  /*
 133  133   * forward declarations for internal thread specific data (tsd)
 134  134   */
 135  135  static void *tsd_realloc(void *, size_t, size_t);
 136  136  
 137  137  void thread_reaper(void);
 138  138  
 139  139  /* forward declarations for stackinfo feature */
 140  140  static void stkinfo_begin(kthread_t *);
 141  141  static void stkinfo_end(kthread_t *);
 142  142  static size_t stkinfo_percent(caddr_t, caddr_t, caddr_t);
 143  143  
 144  144  /*ARGSUSED*/
 145  145  static int
 146  146  turnstile_constructor(void *buf, void *cdrarg, int kmflags)
 147  147  {
 148  148          bzero(buf, sizeof (turnstile_t));
 149  149          return (0);
 150  150  }
 151  151  
 152  152  /*ARGSUSED*/
 153  153  static void
 154  154  turnstile_destructor(void *buf, void *cdrarg)
 155  155  {
 156  156          turnstile_t *ts = buf;
 157  157  
 158  158          ASSERT(ts->ts_free == NULL);
 159  159          ASSERT(ts->ts_waiters == 0);
 160  160          ASSERT(ts->ts_inheritor == NULL);
 161  161          ASSERT(ts->ts_sleepq[0].sq_first == NULL);
 162  162          ASSERT(ts->ts_sleepq[1].sq_first == NULL);
 163  163  }
 164  164  
 165  165  void
 166  166  thread_init(void)
 167  167  {
 168  168          kthread_t *tp;
 169  169          extern char sys_name[];
 170  170          extern void idle();
 171  171          struct cpu *cpu = CPU;
 172  172          int i;
 173  173          kmutex_t *lp;
 174  174  
 175  175          mutex_init(&reaplock, NULL, MUTEX_SPIN, (void *)ipltospl(DISP_LEVEL));
 176  176          thread_free_lock =
 177  177              kmem_alloc(sizeof (thread_free_lock_t) * THREAD_FREE_NUM, KM_SLEEP);
 178  178          for (i = 0; i < THREAD_FREE_NUM; i++) {
 179  179                  lp = &thread_free_lock[i].tf_lock;
 180  180                  mutex_init(lp, NULL, MUTEX_DEFAULT, NULL);
 181  181          }
 182  182  
 183  183  #if defined(__i386) || defined(__amd64)
 184  184          thread_cache = kmem_cache_create("thread_cache", sizeof (kthread_t),
 185  185              PTR24_ALIGN, NULL, NULL, NULL, NULL, NULL, 0);
 186  186  
 187  187          /*
 188  188           * "struct _klwp" includes a "struct pcb", which includes a
 189  189           * "struct fpu", which needs to be 64-byte aligned on amd64
 190  190           * (and even on i386) for xsave/xrstor.
 191  191           */
 192  192          lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t),
 193  193              64, NULL, NULL, NULL, NULL, NULL, 0);
 194  194  #else
 195  195          /*
 196  196           * Allocate thread structures from static_arena.  This prevents
 197  197           * issues where a thread tries to relocate its own thread
 198  198           * structure and touches it after the mapping has been suspended.
 199  199           */
 200  200          thread_cache = kmem_cache_create("thread_cache", sizeof (kthread_t),
 201  201              PTR24_ALIGN, NULL, NULL, NULL, NULL, static_arena, 0);
 202  202  
 203  203          lwp_stk_cache_init();
 204  204  
 205  205          lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t),
 206  206              0, NULL, NULL, NULL, NULL, NULL, 0);
 207  207  #endif
 208  208  
 209  209          turnstile_cache = kmem_cache_create("turnstile_cache",
 210  210              sizeof (turnstile_t), 0,
 211  211              turnstile_constructor, turnstile_destructor, NULL, NULL, NULL, 0);
 212  212  
 213  213          label_init();
 214  214          cred_init();
 215  215  
 216  216          /*
 217  217           * Initialize various resource management facilities.
 218  218           */
 219  219          rctl_init();
 220  220          cpucaps_init();
 221  221          /*
 222  222           * Zone_init() should be called before project_init() so that project ID
 223  223           * for the first project is initialized correctly.
 224  224           */
 225  225          zone_init();
 226  226          project_init();
 227  227          brand_init();
 228  228          kiconv_init();
 229  229          task_init();
 230  230          tcache_init();
 231  231          pool_init();
 232  232  
 233  233          curthread->t_ts = kmem_cache_alloc(turnstile_cache, KM_SLEEP);
 234  234  
 235  235          /*
 236  236           * Originally, we had two parameters to set default stack
 237  237           * size: one for lwp's (lwp_default_stksize), and one for
 238  238           * kernel-only threads (DEFAULTSTKSZ, a.k.a. _defaultstksz).
 239  239           * Now we have a third parameter that overrides both if it is
 240  240           * set to a legal stack size, called default_stksize.
 241  241           */
 242  242  
 243  243          if (default_stksize == 0) {
 244  244                  default_stksize = DEFAULTSTKSZ;
 245  245          } else if (default_stksize % PAGESIZE != 0 ||
 246  246              default_stksize > MAX_STKSIZE ||
 247  247              default_stksize < MIN_STKSIZE) {
 248  248                  cmn_err(CE_WARN, "Illegal stack size. Using %d",
 249  249                      (int)DEFAULTSTKSZ);
 250  250                  default_stksize = DEFAULTSTKSZ;
 251  251          } else {
 252  252                  lwp_default_stksize = default_stksize;
 253  253          }
 254  254  
 255  255          if (lwp_default_stksize == 0) {
 256  256                  lwp_default_stksize = default_stksize;
 257  257          } else if (lwp_default_stksize % PAGESIZE != 0 ||
 258  258              lwp_default_stksize > MAX_STKSIZE ||
 259  259              lwp_default_stksize < MIN_STKSIZE) {
 260  260                  cmn_err(CE_WARN, "Illegal stack size. Using %d",
 261  261                      default_stksize);
 262  262                  lwp_default_stksize = default_stksize;
 263  263          }
 264  264  
 265  265          segkp_lwp = segkp_cache_init(segkp, lwp_cache_sz,
 266  266              lwp_default_stksize,
 267  267              (KPD_NOWAIT | KPD_HASREDZONE | KPD_LOCKED));
 268  268  
 269  269          segkp_thread = segkp_cache_init(segkp, t_cache_sz,
 270  270              default_stksize, KPD_HASREDZONE | KPD_LOCKED | KPD_NO_ANON);
 271  271  
 272  272          (void) getcid(sys_name, &syscid);
 273  273          curthread->t_cid = syscid;      /* current thread is t0 */
 274  274  
 275  275          /*
 276  276           * Set up the first CPU's idle thread.
 277  277           * It runs whenever the CPU has nothing worthwhile to do.
 278  278           */
 279  279          tp = thread_create(NULL, 0, idle, NULL, 0, &p0, TS_STOPPED, -1);
 280  280          cpu->cpu_idle_thread = tp;
 281  281          tp->t_preempt = 1;
 282  282          tp->t_disp_queue = cpu->cpu_disp;
 283  283          ASSERT(tp->t_disp_queue != NULL);
 284  284          tp->t_bound_cpu = cpu;
 285  285          tp->t_affinitycnt = 1;
 286  286  
 287  287          /*
 288  288           * Registering a thread in the callback table is usually
 289  289           * done in the initialization code of the thread. In this
 290  290           * case, we do it right after thread creation to avoid
 291  291           * blocking idle thread while registering itself. It also
 292  292           * avoids the possibility of reregistration in case a CPU
 293  293           * restarts its idle thread.
 294  294           */
 295  295          CALLB_CPR_INIT_SAFE(tp, "idle");
 296  296  
 297  297          /*
 298  298           * Create the thread_reaper daemon. From this point on, exited
 299  299           * threads will get reaped.
 300  300           */
 301  301          (void) thread_create(NULL, 0, (void (*)())thread_reaper,
 302  302              NULL, 0, &p0, TS_RUN, minclsyspri);
 303  303  
 304  304          /*
 305  305           * Finish initializing the kernel memory allocator now that
 306  306           * thread_create() is available.
 307  307           */
 308  308          kmem_thread_init();
 309  309  
 310  310          if (boothowto & RB_DEBUG)
 311  311                  kdi_dvec_thravail();
 312  312  }
 313  313  
 314  314  /*
 315  315   * Create a thread.
 316  316   *
 317  317   * thread_create() blocks for memory if necessary.  It never fails.
 318  318   *
 319  319   * If stk is NULL, the thread is created at the base of the stack
 320  320   * and cannot be swapped.
 321  321   */
 322  322  kthread_t *
 323  323  thread_create(
 324  324          caddr_t stk,
 325  325          size_t  stksize,
 326  326          void    (*proc)(),
 327  327          void    *arg,
 328  328          size_t  len,
 329  329          proc_t   *pp,
 330  330          int     state,
 331  331          pri_t   pri)
 332  332  {
 333  333          kthread_t *t;
 334  334          extern struct classfuncs sys_classfuncs;
 335  335          turnstile_t *ts;
 336  336  
 337  337          /*
 338  338           * Every thread keeps a turnstile around in case it needs to block.
 339  339           * The only reason the turnstile is not simply part of the thread
 340  340           * structure is that we may have to break the association whenever
 341  341           * more than one thread blocks on a given synchronization object.
 342  342           * From a memory-management standpoint, turnstiles are like the
 343  343           * "attached mblks" that hang off dblks in the streams allocator.
 344  344           */
 345  345          ts = kmem_cache_alloc(turnstile_cache, KM_SLEEP);
 346  346  
 347  347          if (stk == NULL) {
 348  348                  /*
 349  349                   * alloc both thread and stack in segkp chunk
 350  350                   */
 351  351  
 352  352                  if (stksize < default_stksize)
 353  353                          stksize = default_stksize;
 354  354  
 355  355                  if (stksize == default_stksize) {
 356  356                          stk = (caddr_t)segkp_cache_get(segkp_thread);
 357  357                  } else {
 358  358                          stksize = roundup(stksize, PAGESIZE);
 359  359                          stk = (caddr_t)segkp_get(segkp, stksize,
 360  360                              (KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED));
 361  361                  }
 362  362  
 363  363                  ASSERT(stk != NULL);
 364  364  
 365  365                  /*
 366  366                   * The machine-dependent mutex code may require that
 367  367                   * thread pointers (since they may be used for mutex owner
 368  368                   * fields) have certain alignment requirements.
 369  369                   * PTR24_ALIGN is the size of the alignment quanta.
 370  370                   * XXX - assumes stack grows toward low addresses.
 371  371                   */
 372  372                  if (stksize <= sizeof (kthread_t) + PTR24_ALIGN)
 373  373                          cmn_err(CE_PANIC, "thread_create: proposed stack size"
 374  374                              " too small to hold thread.");
 375  375  #ifdef STACK_GROWTH_DOWN
 376  376                  stksize -= SA(sizeof (kthread_t) + PTR24_ALIGN - 1);
 377  377                  stksize &= -PTR24_ALIGN;        /* make thread aligned */
 378  378                  t = (kthread_t *)(stk + stksize);
 379  379                  bzero(t, sizeof (kthread_t));
 380  380                  if (audit_active)
 381  381                          audit_thread_create(t);
 382  382                  t->t_stk = stk + stksize;
 383  383                  t->t_stkbase = stk;
 384  384  #else   /* stack grows to larger addresses */
 385  385                  stksize -= SA(sizeof (kthread_t));
 386  386                  t = (kthread_t *)(stk);
 387  387                  bzero(t, sizeof (kthread_t));
 388  388                  t->t_stk = stk + sizeof (kthread_t);
 389  389                  t->t_stkbase = stk + stksize + sizeof (kthread_t);
 390  390  #endif  /* STACK_GROWTH_DOWN */
 391  391                  t->t_flag |= T_TALLOCSTK;
 392  392                  t->t_swap = stk;
 393  393          } else {
 394  394                  t = kmem_cache_alloc(thread_cache, KM_SLEEP);
 395  395                  bzero(t, sizeof (kthread_t));
 396  396                  ASSERT(((uintptr_t)t & (PTR24_ALIGN - 1)) == 0);
 397  397                  if (audit_active)
 398  398                          audit_thread_create(t);
 399  399                  /*
 400  400                   * Initialize t_stk to the kernel stack pointer to use
 401  401                   * upon entry to the kernel
 402  402                   */
 403  403  #ifdef STACK_GROWTH_DOWN
 404  404                  t->t_stk = stk + stksize;
 405  405                  t->t_stkbase = stk;
 406  406  #else
 407  407                  t->t_stk = stk;                 /* 3b2-like */
 408  408                  t->t_stkbase = stk + stksize;
 409  409  #endif /* STACK_GROWTH_DOWN */
 410  410          }
 411  411  
 412  412          if (kmem_stackinfo != 0) {
 413  413                  stkinfo_begin(t);
 414  414          }
 415  415  
 416  416          t->t_ts = ts;
 417  417  
 418  418          /*
 419  419           * p_cred could be NULL if it thread_create is called before cred_init
 420  420           * is called in main.
 421  421           */

↓ open down ↓

421 lines elided

↑ open up ↑

 422  422          mutex_enter(&pp->p_crlock);
 423  423          if (pp->p_cred)
 424  424                  crhold(t->t_cred = pp->p_cred);
 425  425          mutex_exit(&pp->p_crlock);
 426  426          t->t_start = gethrestime_sec();
 427  427          t->t_startpc = proc;
 428  428          t->t_procp = pp;
 429  429          t->t_clfuncs = &sys_classfuncs.thread;
 430  430          t->t_cid = syscid;
 431  431          t->t_pri = pri;
 432      -        t->t_stime = ddi_get_lbolt();
 433      -        t->t_schedflag = TS_LOAD | TS_DONT_SWAP;
      432 +        t->t_schedflag = 0;
 434  433          t->t_bind_cpu = PBIND_NONE;
 435  434          t->t_bindflag = (uchar_t)default_binding_mode;
 436  435          t->t_bind_pset = PS_NONE;
 437  436          t->t_plockp = &pp->p_lock;
 438  437          t->t_copyops = NULL;
 439  438          t->t_taskq = NULL;
 440  439          t->t_anttime = 0;
 441  440          t->t_hatdepth = 0;
 442  441  
 443  442          t->t_dtrace_vtime = 1;  /* assure vtimestamp is always non-zero */

 444  443  
 445  444          CPU_STATS_ADDQ(CPU, sys, nthreads, 1);
 446  445  #ifndef NPROBE
 447  446          /* Kernel probe */
 448  447          tnf_thread_create(t);
 449  448  #endif /* NPROBE */
 450  449          LOCK_INIT_CLEAR(&t->t_lock);
 451  450  
 452  451          /*
 453  452           * Callers who give us a NULL proc must do their own
 454  453           * stack initialization.  e.g. lwp_create()
 455  454           */
 456  455          if (proc != NULL) {
 457  456                  t->t_stk = thread_stk_init(t->t_stk);
 458  457                  thread_load(t, proc, arg, len);
 459  458          }
 460  459  
 461  460          /*
 462  461           * Put a hold on project0. If this thread is actually in a
 463  462           * different project, then t_proj will be changed later in
 464  463           * lwp_create().  All kernel-only threads must be in project 0.
 465  464           */
 466  465          t->t_proj = project_hold(proj0p);
 467  466  
 468  467          lgrp_affinity_init(&t->t_lgrp_affinity);
 469  468  
 470  469          mutex_enter(&pidlock);
 471  470          nthread++;
 472  471          t->t_did = next_t_id++;
 473  472          t->t_prev = curthread->t_prev;
 474  473          t->t_next = curthread;
 475  474  
 476  475          /*
 477  476           * Add the thread to the list of all threads, and initialize
 478  477           * its t_cpu pointer.  We need to block preemption since
 479  478           * cpu_offline walks the thread list looking for threads
 480  479           * with t_cpu pointing to the CPU being offlined.  We want
 481  480           * to make sure that the list is consistent and that if t_cpu
 482  481           * is set, the thread is on the list.
 483  482           */
 484  483          kpreempt_disable();
 485  484          curthread->t_prev->t_next = t;
 486  485          curthread->t_prev = t;
 487  486  
 488  487          /*
 489  488           * Threads should never have a NULL t_cpu pointer so assign it
 490  489           * here.  If the thread is being created with state TS_RUN a
 491  490           * better CPU may be chosen when it is placed on the run queue.
 492  491           *
 493  492           * We need to keep kernel preemption disabled when setting all
 494  493           * three fields to keep them in sync.  Also, always create in
 495  494           * the default partition since that's where kernel threads go
 496  495           * (if this isn't a kernel thread, t_cpupart will be changed
 497  496           * in lwp_create before setting the thread runnable).
 498  497           */
 499  498          t->t_cpupart = &cp_default;
 500  499  
 501  500          /*
 502  501           * For now, affiliate this thread with the root lgroup.
 503  502           * Since the kernel does not (presently) allocate its memory
 504  503           * in a locality aware fashion, the root is an appropriate home.
 505  504           * If this thread is later associated with an lwp, it will have
 506  505           * it's lgroup re-assigned at that time.
 507  506           */
 508  507          lgrp_move_thread(t, &cp_default.cp_lgrploads[LGRP_ROOTID], 1);
 509  508  
 510  509          /*
 511  510           * Inherit the current cpu.  If this cpu isn't part of the chosen
 512  511           * lgroup, a new cpu will be chosen by cpu_choose when the thread
 513  512           * is ready to run.
 514  513           */
 515  514          if (CPU->cpu_part == &cp_default)
 516  515                  t->t_cpu = CPU;
 517  516          else
 518  517                  t->t_cpu = disp_lowpri_cpu(cp_default.cp_cpulist, t->t_lpl,
 519  518                      t->t_pri, NULL);
 520  519  
 521  520          t->t_disp_queue = t->t_cpu->cpu_disp;
 522  521          kpreempt_enable();
 523  522  
 524  523          /*
 525  524           * Initialize thread state and the dispatcher lock pointer.
 526  525           * Need to hold onto pidlock to block allthreads walkers until
 527  526           * the state is set.
 528  527           */
 529  528          switch (state) {
 530  529          case TS_RUN:
 531  530                  curthread->t_oldspl = splhigh();        /* get dispatcher spl */
 532  531                  THREAD_SET_STATE(t, TS_STOPPED, &transition_lock);
 533  532                  CL_SETRUN(t);
 534  533                  thread_unlock(t);
 535  534                  break;
 536  535  
 537  536          case TS_ONPROC:
 538  537                  THREAD_ONPROC(t, t->t_cpu);
 539  538                  break;
 540  539  
 541  540          case TS_FREE:
 542  541                  /*
 543  542                   * Free state will be used for intr threads.
 544  543                   * The interrupt routine must set the thread dispatcher
 545  544                   * lock pointer (t_lockp) if starting on a CPU
 546  545                   * other than the current one.
 547  546                   */
 548  547                  THREAD_FREEINTR(t, CPU);
 549  548                  break;
 550  549  
 551  550          case TS_STOPPED:
 552  551                  THREAD_SET_STATE(t, TS_STOPPED, &stop_lock);
 553  552                  break;
 554  553  
 555  554          default:                        /* TS_SLEEP, TS_ZOMB or TS_TRANS */
 556  555                  cmn_err(CE_PANIC, "thread_create: invalid state %d", state);
 557  556          }
 558  557          mutex_exit(&pidlock);
 559  558          return (t);
 560  559  }
 561  560  
 562  561  /*
 563  562   * Move thread to project0 and take care of project reference counters.
 564  563   */
 565  564  void
 566  565  thread_rele(kthread_t *t)
 567  566  {
 568  567          kproject_t *kpj;
 569  568  
 570  569          thread_lock(t);
 571  570  
 572  571          ASSERT(t == curthread || t->t_state == TS_FREE || t->t_procp == &p0);
 573  572          kpj = ttoproj(t);
 574  573          t->t_proj = proj0p;
 575  574  
 576  575          thread_unlock(t);
 577  576  
 578  577          if (kpj != proj0p) {
 579  578                  project_rele(kpj);
 580  579                  (void) project_hold(proj0p);
 581  580          }
 582  581  }
 583  582  
 584  583  void
 585  584  thread_exit(void)
 586  585  {
 587  586          kthread_t *t = curthread;
 588  587  
 589  588          if ((t->t_proc_flag & TP_ZTHREAD) != 0)
 590  589                  cmn_err(CE_PANIC, "thread_exit: zthread_exit() not called");
 591  590  
 592  591          tsd_exit();             /* Clean up this thread's TSD */
 593  592  
 594  593          kcpc_passivate();       /* clean up performance counter state */
 595  594  
 596  595          /*
 597  596           * No kernel thread should have called poll() without arranging
 598  597           * calling pollcleanup() here.
 599  598           */
 600  599          ASSERT(t->t_pollstate == NULL);
 601  600          ASSERT(t->t_schedctl == NULL);
 602  601          if (t->t_door)
 603  602                  door_slam();    /* in case thread did an upcall */
 604  603  
 605  604  #ifndef NPROBE
 606  605          /* Kernel probe */
 607  606          if (t->t_tnf_tpdp)
 608  607                  tnf_thread_exit();
 609  608  #endif /* NPROBE */
 610  609  
 611  610          thread_rele(t);
 612  611          t->t_preempt++;
 613  612  
 614  613          /*
 615  614           * remove thread from the all threads list so that
 616  615           * death-row can use the same pointers.
 617  616           */
 618  617          mutex_enter(&pidlock);
 619  618          t->t_next->t_prev = t->t_prev;
 620  619          t->t_prev->t_next = t->t_next;
 621  620          ASSERT(allthreads != t);        /* t0 never exits */
 622  621          cv_broadcast(&t->t_joincv);     /* wake up anyone in thread_join */
 623  622          mutex_exit(&pidlock);
 624  623  
 625  624          if (t->t_ctx != NULL)
 626  625                  exitctx(t);
 627  626          if (t->t_procp->p_pctx != NULL)
 628  627                  exitpctx(t->t_procp);
 629  628  
 630  629          if (kmem_stackinfo != 0) {
 631  630                  stkinfo_end(t);
 632  631          }
 633  632  
 634  633          t->t_state = TS_ZOMB;   /* set zombie thread */
 635  634  
 636  635          swtch_from_zombie();    /* give up the CPU */
 637  636          /* NOTREACHED */
 638  637  }
 639  638  
 640  639  /*
 641  640   * Check to see if the specified thread is active (defined as being on
 642  641   * the thread list).  This is certainly a slow way to do this; if there's
 643  642   * ever a reason to speed it up, we could maintain a hash table of active
 644  643   * threads indexed by their t_did.
 645  644   */
 646  645  static kthread_t *
 647  646  did_to_thread(kt_did_t tid)
 648  647  {
 649  648          kthread_t *t;
 650  649  
 651  650          ASSERT(MUTEX_HELD(&pidlock));
 652  651          for (t = curthread->t_next; t != curthread; t = t->t_next) {
 653  652                  if (t->t_did == tid)
 654  653                          break;
 655  654          }
 656  655          if (t->t_did == tid)
 657  656                  return (t);
 658  657          else
 659  658                  return (NULL);
 660  659  }
 661  660  
 662  661  /*
 663  662   * Wait for specified thread to exit.  Returns immediately if the thread
 664  663   * could not be found, meaning that it has either already exited or never
 665  664   * existed.
 666  665   */
 667  666  void
 668  667  thread_join(kt_did_t tid)
 669  668  {
 670  669          kthread_t *t;
 671  670  
 672  671          ASSERT(tid != curthread->t_did);
 673  672          ASSERT(tid != t0.t_did);
 674  673  
 675  674          mutex_enter(&pidlock);
 676  675          /*
 677  676           * Make sure we check that the thread is on the thread list
 678  677           * before blocking on it; otherwise we could end up blocking on
 679  678           * a cv that's already been freed.  In other words, don't cache
 680  679           * the thread pointer across calls to cv_wait.
 681  680           *
 682  681           * The choice of loop invariant means that whenever a thread
 683  682           * is taken off the allthreads list, a cv_broadcast must be
 684  683           * performed on that thread's t_joincv to wake up any waiters.
 685  684           * The broadcast doesn't have to happen right away, but it
 686  685           * shouldn't be postponed indefinitely (e.g., by doing it in
 687  686           * thread_free which may only be executed when the deathrow
 688  687           * queue is processed.
 689  688           */
 690  689          while (t = did_to_thread(tid))
 691  690                  cv_wait(&t->t_joincv, &pidlock);
 692  691          mutex_exit(&pidlock);
 693  692  }
 694  693  
 695  694  void
 696  695  thread_free_prevent(kthread_t *t)
 697  696  {
 698  697          kmutex_t *lp;
 699  698  
 700  699          lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
 701  700          mutex_enter(lp);
 702  701  }
 703  702  
 704  703  void
 705  704  thread_free_allow(kthread_t *t)
 706  705  {
 707  706          kmutex_t *lp;
 708  707  
 709  708          lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
 710  709          mutex_exit(lp);
 711  710  }
 712  711  
 713  712  static void
 714  713  thread_free_barrier(kthread_t *t)
 715  714  {
 716  715          kmutex_t *lp;
 717  716  
 718  717          lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
 719  718          mutex_enter(lp);
 720  719          mutex_exit(lp);
 721  720  }
 722  721  
 723  722  void
 724  723  thread_free(kthread_t *t)
 725  724  {
 726  725          boolean_t allocstk = (t->t_flag & T_TALLOCSTK);
 727  726          klwp_t *lwp = t->t_lwp;
 728  727          caddr_t swap = t->t_swap;
 729  728  
 730  729          ASSERT(t != &t0 && t->t_state == TS_FREE);
 731  730          ASSERT(t->t_door == NULL);
 732  731          ASSERT(t->t_schedctl == NULL);
 733  732          ASSERT(t->t_pollstate == NULL);
 734  733  
 735  734          t->t_pri = 0;
 736  735          t->t_pc = 0;
 737  736          t->t_sp = 0;
 738  737          t->t_wchan0 = NULL;
 739  738          t->t_wchan = NULL;
 740  739          if (t->t_cred != NULL) {
 741  740                  crfree(t->t_cred);
 742  741                  t->t_cred = 0;
 743  742          }
 744  743          if (t->t_pdmsg) {
 745  744                  kmem_free(t->t_pdmsg, strlen(t->t_pdmsg) + 1);
 746  745                  t->t_pdmsg = NULL;
 747  746          }
 748  747          if (audit_active)
 749  748                  audit_thread_free(t);
 750  749  #ifndef NPROBE
 751  750          if (t->t_tnf_tpdp)
 752  751                  tnf_thread_free(t);
 753  752  #endif /* NPROBE */
 754  753          if (t->t_cldata) {
 755  754                  CL_EXITCLASS(t->t_cid, (caddr_t *)t->t_cldata);
 756  755          }
 757  756          if (t->t_rprof != NULL) {
 758  757                  kmem_free(t->t_rprof, sizeof (*t->t_rprof));
 759  758                  t->t_rprof = NULL;
 760  759          }
 761  760          t->t_lockp = NULL;      /* nothing should try to lock this thread now */
 762  761          if (lwp)
 763  762                  lwp_freeregs(lwp, 0);
 764  763          if (t->t_ctx)
 765  764                  freectx(t, 0);
 766  765          t->t_stk = NULL;
 767  766          if (lwp)
 768  767                  lwp_stk_fini(lwp);
 769  768          lock_clear(&t->t_lock);
 770  769  
 771  770          if (t->t_ts->ts_waiters > 0)
 772  771                  panic("thread_free: turnstile still active");
 773  772  
 774  773          kmem_cache_free(turnstile_cache, t->t_ts);
 775  774  
 776  775          free_afd(&t->t_activefd);
 777  776  
 778  777          /*
 779  778           * Barrier for the tick accounting code.  The tick accounting code
 780  779           * holds this lock to keep the thread from going away while it's
 781  780           * looking at it.
 782  781           */
 783  782          thread_free_barrier(t);
 784  783  
 785  784          ASSERT(ttoproj(t) == proj0p);
 786  785          project_rele(ttoproj(t));
 787  786  
 788  787          lgrp_affinity_free(&t->t_lgrp_affinity);
 789  788  
 790  789          mutex_enter(&pidlock);
 791  790          nthread--;
 792  791          mutex_exit(&pidlock);
 793  792  
 794  793          /*
 795  794           * Free thread, lwp and stack.  This needs to be done carefully, since
 796  795           * if T_TALLOCSTK is set, the thread is part of the stack.
 797  796           */
 798  797          t->t_lwp = NULL;
 799  798          t->t_swap = NULL;
 800  799  
 801  800          if (swap) {
 802  801                  segkp_release(segkp, swap);
 803  802          }
 804  803          if (lwp) {
 805  804                  kmem_cache_free(lwp_cache, lwp);
 806  805          }
 807  806          if (!allocstk) {
 808  807                  kmem_cache_free(thread_cache, t);
 809  808          }
 810  809  }
 811  810  
 812  811  /*
 813  812   * Removes threads associated with the given zone from a deathrow queue.
 814  813   * tp is a pointer to the head of the deathrow queue, and countp is a
 815  814   * pointer to the current deathrow count.  Returns a linked list of
 816  815   * threads removed from the list.
 817  816   */
 818  817  static kthread_t *
 819  818  thread_zone_cleanup(kthread_t **tp, int *countp, zoneid_t zoneid)
 820  819  {
 821  820          kthread_t *tmp, *list = NULL;
 822  821          cred_t *cr;
 823  822  
 824  823          ASSERT(MUTEX_HELD(&reaplock));
 825  824          while (*tp != NULL) {
 826  825                  if ((cr = (*tp)->t_cred) != NULL && crgetzoneid(cr) == zoneid) {
 827  826                          tmp = *tp;
 828  827                          *tp = tmp->t_forw;
 829  828                          tmp->t_forw = list;
 830  829                          list = tmp;
 831  830                          (*countp)--;
 832  831                  } else {
 833  832                          tp = &(*tp)->t_forw;
 834  833                  }
 835  834          }
 836  835          return (list);
 837  836  }
 838  837  
 839  838  static void
 840  839  thread_reap_list(kthread_t *t)
 841  840  {
 842  841          kthread_t *next;
 843  842  
 844  843          while (t != NULL) {
 845  844                  next = t->t_forw;
 846  845                  thread_free(t);
 847  846                  t = next;
 848  847          }
 849  848  }
 850  849  
 851  850  /* ARGSUSED */
 852  851  static void
 853  852  thread_zone_destroy(zoneid_t zoneid, void *unused)
 854  853  {
 855  854          kthread_t *t, *l;
 856  855  
 857  856          mutex_enter(&reaplock);
 858  857          /*
 859  858           * Pull threads and lwps associated with zone off deathrow lists.
 860  859           */
 861  860          t = thread_zone_cleanup(&thread_deathrow, &thread_reapcnt, zoneid);
 862  861          l = thread_zone_cleanup(&lwp_deathrow, &lwp_reapcnt, zoneid);
 863  862          mutex_exit(&reaplock);
 864  863  
 865  864          /*
 866  865           * Guard against race condition in mutex_owner_running:
 867  866           *      thread=owner(mutex)
 868  867           *      <interrupt>
 869  868           *                              thread exits mutex
 870  869           *                              thread exits
 871  870           *                              thread reaped
 872  871           *                              thread struct freed
 873  872           * cpu = thread->t_cpu <- BAD POINTER DEREFERENCE.
 874  873           * A cross call to all cpus will cause the interrupt handler
 875  874           * to reset the PC if it is in mutex_owner_running, refreshing
 876  875           * stale thread pointers.
 877  876           */
 878  877          mutex_sync();   /* sync with mutex code */
 879  878  
 880  879          /*
 881  880           * Reap threads
 882  881           */
 883  882          thread_reap_list(t);
 884  883  
 885  884          /*
 886  885           * Reap lwps
 887  886           */
 888  887          thread_reap_list(l);
 889  888  }
 890  889  
 891  890  /*
 892  891   * cleanup zombie threads that are on deathrow.
 893  892   */
 894  893  void
 895  894  thread_reaper()
 896  895  {
 897  896          kthread_t *t, *l;
 898  897          callb_cpr_t cprinfo;
 899  898  
 900  899          /*
 901  900           * Register callback to clean up threads when zone is destroyed.
 902  901           */
 903  902          zone_key_create(&zone_thread_key, NULL, NULL, thread_zone_destroy);
 904  903  
 905  904          CALLB_CPR_INIT(&cprinfo, &reaplock, callb_generic_cpr, "t_reaper");
 906  905          for (;;) {
 907  906                  mutex_enter(&reaplock);
 908  907                  while (thread_deathrow == NULL && lwp_deathrow == NULL) {
 909  908                          CALLB_CPR_SAFE_BEGIN(&cprinfo);
 910  909                          cv_wait(&reaper_cv, &reaplock);
 911  910                          CALLB_CPR_SAFE_END(&cprinfo, &reaplock);
 912  911                  }
 913  912                  /*
 914  913                   * mutex_sync() needs to be called when reaping, but
 915  914                   * not too often.  We limit reaping rate to once
 916  915                   * per second.  Reaplimit is max rate at which threads can
 917  916                   * be freed. Does not impact thread destruction/creation.
 918  917                   */
 919  918                  t = thread_deathrow;
 920  919                  l = lwp_deathrow;
 921  920                  thread_deathrow = NULL;
 922  921                  lwp_deathrow = NULL;
 923  922                  thread_reapcnt = 0;
 924  923                  lwp_reapcnt = 0;
 925  924                  mutex_exit(&reaplock);
 926  925  
 927  926                  /*
 928  927                   * Guard against race condition in mutex_owner_running:
 929  928                   *      thread=owner(mutex)
 930  929                   *      <interrupt>
 931  930                   *                              thread exits mutex
 932  931                   *                              thread exits
 933  932                   *                              thread reaped
 934  933                   *                              thread struct freed
 935  934                   * cpu = thread->t_cpu <- BAD POINTER DEREFERENCE.
 936  935                   * A cross call to all cpus will cause the interrupt handler
 937  936                   * to reset the PC if it is in mutex_owner_running, refreshing
 938  937                   * stale thread pointers.
 939  938                   */
 940  939                  mutex_sync();   /* sync with mutex code */
 941  940                  /*
 942  941                   * Reap threads
 943  942                   */
 944  943                  thread_reap_list(t);
 945  944  
 946  945                  /*
 947  946                   * Reap lwps
 948  947                   */
 949  948                  thread_reap_list(l);
 950  949                  delay(hz);
 951  950          }
 952  951  }
 953  952  
 954  953  /*
 955  954   * This is called by lwpcreate, etc.() to put a lwp_deathrow thread onto
 956  955   * thread_deathrow. The thread's state is changed already TS_FREE to indicate
 957  956   * that is reapable. The thread already holds the reaplock, and was already
 958  957   * freed.
 959  958   */
 960  959  void
 961  960  reapq_move_lq_to_tq(kthread_t *t)
 962  961  {
 963  962          ASSERT(t->t_state == TS_FREE);
 964  963          ASSERT(MUTEX_HELD(&reaplock));
 965  964          t->t_forw = thread_deathrow;
 966  965          thread_deathrow = t;
 967  966          thread_reapcnt++;
 968  967          if (lwp_reapcnt + thread_reapcnt > reaplimit)
 969  968                  cv_signal(&reaper_cv);  /* wake the reaper */
 970  969  }
 971  970  
 972  971  /*
 973  972   * This is called by resume() to put a zombie thread onto deathrow.
 974  973   * The thread's state is changed to TS_FREE to indicate that is reapable.
 975  974   * This is called from the idle thread so it must not block - just spin.
 976  975   */
 977  976  void
 978  977  reapq_add(kthread_t *t)
 979  978  {
 980  979          mutex_enter(&reaplock);
 981  980  
 982  981          /*
 983  982           * lwp_deathrow contains threads with lwp linkage and
 984  983           * swappable thread stacks which have the default stacksize.
 985  984           * These threads' lwps and stacks may be reused by lwp_create().
 986  985           *
 987  986           * Anything else goes on thread_deathrow(), where it will eventually
 988  987           * be thread_free()d.
 989  988           */
 990  989          if (t->t_flag & T_LWPREUSE) {
 991  990                  ASSERT(ttolwp(t) != NULL);
 992  991                  t->t_forw = lwp_deathrow;
 993  992                  lwp_deathrow = t;
 994  993                  lwp_reapcnt++;
 995  994          } else {
 996  995                  t->t_forw = thread_deathrow;
 997  996                  thread_deathrow = t;
 998  997                  thread_reapcnt++;
 999  998          }
1000  999          if (lwp_reapcnt + thread_reapcnt > reaplimit)
1001 1000                  cv_signal(&reaper_cv);  /* wake the reaper */
1002 1001          t->t_state = TS_FREE;
1003 1002          lock_clear(&t->t_lock);
1004 1003  
1005 1004          /*
1006 1005           * Before we return, we need to grab and drop the thread lock for
1007 1006           * the dead thread.  At this point, the current thread is the idle
1008 1007           * thread, and the dead thread's CPU lock points to the current
1009 1008           * CPU -- and we must grab and drop the lock to synchronize with
1010 1009           * a racing thread walking a blocking chain that the zombie thread
1011 1010           * was recently in.  By this point, that blocking chain is (by
1012 1011           * definition) stale:  the dead thread is not holding any locks, and
1013 1012           * is therefore not in any blocking chains -- but if we do not regrab
1014 1013           * our lock before freeing the dead thread's data structures, the
1015 1014           * thread walking the (stale) blocking chain will die on memory
1016 1015           * corruption when it attempts to drop the dead thread's lock.  We
1017 1016           * only need do this once because there is no way for the dead thread
1018 1017           * to ever again be on a blocking chain:  once we have grabbed and
1019 1018           * dropped the thread lock, we are guaranteed that anyone that could
1020 1019           * have seen this thread in a blocking chain can no longer see it.
1021 1020           */
1022 1021          thread_lock(t);
1023 1022          thread_unlock(t);
1024 1023  
1025 1024          mutex_exit(&reaplock);
1026 1025  }
1027 1026  
1028 1027  /*
1029 1028   * Install thread context ops for the current thread.
1030 1029   */
1031 1030  void
1032 1031  installctx(
1033 1032          kthread_t *t,
1034 1033          void    *arg,
1035 1034          void    (*save)(void *),
1036 1035          void    (*restore)(void *),
1037 1036          void    (*fork)(void *, void *),
1038 1037          void    (*lwp_create)(void *, void *),
1039 1038          void    (*exit)(void *),
1040 1039          void    (*free)(void *, int))
1041 1040  {
1042 1041          struct ctxop *ctx;
1043 1042  
1044 1043          ctx = kmem_alloc(sizeof (struct ctxop), KM_SLEEP);
1045 1044          ctx->save_op = save;
1046 1045          ctx->restore_op = restore;
1047 1046          ctx->fork_op = fork;
1048 1047          ctx->lwp_create_op = lwp_create;
1049 1048          ctx->exit_op = exit;
1050 1049          ctx->free_op = free;
1051 1050          ctx->arg = arg;
1052 1051          ctx->next = t->t_ctx;
1053 1052          t->t_ctx = ctx;
1054 1053  }
1055 1054  
1056 1055  /*
1057 1056   * Remove the thread context ops from a thread.
1058 1057   */
1059 1058  int
1060 1059  removectx(
1061 1060          kthread_t *t,
1062 1061          void    *arg,
1063 1062          void    (*save)(void *),
1064 1063          void    (*restore)(void *),
1065 1064          void    (*fork)(void *, void *),
1066 1065          void    (*lwp_create)(void *, void *),
1067 1066          void    (*exit)(void *),
1068 1067          void    (*free)(void *, int))
1069 1068  {
1070 1069          struct ctxop *ctx, *prev_ctx;
1071 1070  
1072 1071          /*
1073 1072           * The incoming kthread_t (which is the thread for which the
1074 1073           * context ops will be removed) should be one of the following:
1075 1074           *
1076 1075           * a) the current thread,
1077 1076           *
1078 1077           * b) a thread of a process that's being forked (SIDL),
1079 1078           *
1080 1079           * c) a thread that belongs to the same process as the current
1081 1080           *    thread and for which the current thread is the agent thread,
1082 1081           *
1083 1082           * d) a thread that is TS_STOPPED which is indicative of it
1084 1083           *    being (if curthread is not an agent) a thread being created
1085 1084           *    as part of an lwp creation.
1086 1085           */
1087 1086          ASSERT(t == curthread || ttoproc(t)->p_stat == SIDL ||
1088 1087              ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
1089 1088  
1090 1089          /*
1091 1090           * Serialize modifications to t->t_ctx to prevent the agent thread
1092 1091           * and the target thread from racing with each other during lwp exit.
1093 1092           */
1094 1093          mutex_enter(&t->t_ctx_lock);
1095 1094          prev_ctx = NULL;
1096 1095          kpreempt_disable();
1097 1096          for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) {
1098 1097                  if (ctx->save_op == save && ctx->restore_op == restore &&
1099 1098                      ctx->fork_op == fork && ctx->lwp_create_op == lwp_create &&
1100 1099                      ctx->exit_op == exit && ctx->free_op == free &&
1101 1100                      ctx->arg == arg) {
1102 1101                          if (prev_ctx)
1103 1102                                  prev_ctx->next = ctx->next;
1104 1103                          else
1105 1104                                  t->t_ctx = ctx->next;
1106 1105                          mutex_exit(&t->t_ctx_lock);
1107 1106                          if (ctx->free_op != NULL)
1108 1107                                  (ctx->free_op)(ctx->arg, 0);
1109 1108                          kmem_free(ctx, sizeof (struct ctxop));
1110 1109                          kpreempt_enable();
1111 1110                          return (1);
1112 1111                  }
1113 1112                  prev_ctx = ctx;
1114 1113          }
1115 1114          mutex_exit(&t->t_ctx_lock);
1116 1115          kpreempt_enable();
1117 1116  
1118 1117          return (0);
1119 1118  }
1120 1119  
1121 1120  void
1122 1121  savectx(kthread_t *t)
1123 1122  {
1124 1123          struct ctxop *ctx;
1125 1124  
1126 1125          ASSERT(t == curthread);
1127 1126          for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
1128 1127                  if (ctx->save_op != NULL)
1129 1128                          (ctx->save_op)(ctx->arg);
1130 1129  }
1131 1130  
1132 1131  void
1133 1132  restorectx(kthread_t *t)
1134 1133  {
1135 1134          struct ctxop *ctx;
1136 1135  
1137 1136          ASSERT(t == curthread);
1138 1137          for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
1139 1138                  if (ctx->restore_op != NULL)
1140 1139                          (ctx->restore_op)(ctx->arg);
1141 1140  }
1142 1141  
1143 1142  void
1144 1143  forkctx(kthread_t *t, kthread_t *ct)
1145 1144  {
1146 1145          struct ctxop *ctx;
1147 1146  
1148 1147          for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
1149 1148                  if (ctx->fork_op != NULL)
1150 1149                          (ctx->fork_op)(t, ct);
1151 1150  }
1152 1151  
1153 1152  /*
1154 1153   * Note that this operator is only invoked via the _lwp_create
1155 1154   * system call.  The system may have other reasons to create lwps
1156 1155   * e.g. the agent lwp or the doors unreferenced lwp.
1157 1156   */
1158 1157  void
1159 1158  lwp_createctx(kthread_t *t, kthread_t *ct)
1160 1159  {
1161 1160          struct ctxop *ctx;
1162 1161  
1163 1162          for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
1164 1163                  if (ctx->lwp_create_op != NULL)
1165 1164                          (ctx->lwp_create_op)(t, ct);
1166 1165  }
1167 1166  
1168 1167  /*
1169 1168   * exitctx is called from thread_exit() and lwp_exit() to perform any actions
1170 1169   * needed when the thread/LWP leaves the processor for the last time. This
1171 1170   * routine is not intended to deal with freeing memory; freectx() is used for
1172 1171   * that purpose during thread_free(). This routine is provided to allow for
1173 1172   * clean-up that can't wait until thread_free().
1174 1173   */
1175 1174  void
1176 1175  exitctx(kthread_t *t)
1177 1176  {
1178 1177          struct ctxop *ctx;
1179 1178  
1180 1179          for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
1181 1180                  if (ctx->exit_op != NULL)
1182 1181                          (ctx->exit_op)(t);
1183 1182  }
1184 1183  
1185 1184  /*
1186 1185   * freectx is called from thread_free() and exec() to get
1187 1186   * rid of old thread context ops.
1188 1187   */
1189 1188  void
1190 1189  freectx(kthread_t *t, int isexec)
1191 1190  {
1192 1191          struct ctxop *ctx;
1193 1192  
1194 1193          kpreempt_disable();
1195 1194          while ((ctx = t->t_ctx) != NULL) {
1196 1195                  t->t_ctx = ctx->next;
1197 1196                  if (ctx->free_op != NULL)
1198 1197                          (ctx->free_op)(ctx->arg, isexec);
1199 1198                  kmem_free(ctx, sizeof (struct ctxop));
1200 1199          }
1201 1200          kpreempt_enable();
1202 1201  }
1203 1202  
1204 1203  /*
1205 1204   * freectx_ctx is called from lwp_create() when lwp is reused from
1206 1205   * lwp_deathrow and its thread structure is added to thread_deathrow.
1207 1206   * The thread structure to which this ctx was attached may be already
1208 1207   * freed by the thread reaper so free_op implementations shouldn't rely
1209 1208   * on thread structure to which this ctx was attached still being around.
1210 1209   */
1211 1210  void
1212 1211  freectx_ctx(struct ctxop *ctx)
1213 1212  {
1214 1213          struct ctxop *nctx;
1215 1214  
1216 1215          ASSERT(ctx != NULL);
1217 1216  
1218 1217          kpreempt_disable();
1219 1218          do {
1220 1219                  nctx = ctx->next;
1221 1220                  if (ctx->free_op != NULL)
1222 1221                          (ctx->free_op)(ctx->arg, 0);
1223 1222                  kmem_free(ctx, sizeof (struct ctxop));
1224 1223          } while ((ctx = nctx) != NULL);
1225 1224          kpreempt_enable();
1226 1225  }
1227 1226  
1228 1227  /*
1229 1228   * Set the thread running; arrange for it to be swapped in if necessary.
1230 1229   */
1231 1230  void
1232 1231  setrun_locked(kthread_t *t)
1233 1232  {
1234 1233          ASSERT(THREAD_LOCK_HELD(t));
1235 1234          if (t->t_state == TS_SLEEP) {
1236 1235                  /*
1237 1236                   * Take off sleep queue.
1238 1237                   */
1239 1238                  SOBJ_UNSLEEP(t->t_sobj_ops, t);
1240 1239          } else if (t->t_state & (TS_RUN | TS_ONPROC)) {
1241 1240                  /*
1242 1241                   * Already on dispatcher queue.
1243 1242                   */
1244 1243                  return;
1245 1244          } else if (t->t_state == TS_WAIT) {
1246 1245                  waitq_setrun(t);
1247 1246          } else if (t->t_state == TS_STOPPED) {
1248 1247                  /*
1249 1248                   * All of the sending of SIGCONT (TC_XSTART) and /proc
1250 1249                   * (TC_PSTART) and lwp_continue() (TC_CSTART) must have
1251 1250                   * requested that the thread be run.
1252 1251                   * Just calling setrun() is not sufficient to set a stopped
1253 1252                   * thread running.  TP_TXSTART is always set if the thread
1254 1253                   * is not stopped by a jobcontrol stop signal.
1255 1254                   * TP_TPSTART is always set if /proc is not controlling it.
1256 1255                   * TP_TCSTART is always set if lwp_suspend() didn't stop it.
1257 1256                   * The thread won't be stopped unless one of these
1258 1257                   * three mechanisms did it.
1259 1258                   *
1260 1259                   * These flags must be set before calling setrun_locked(t).
1261 1260                   * They can't be passed as arguments because the streams
1262 1261                   * code calls setrun() indirectly and the mechanism for
1263 1262                   * doing so admits only one argument.  Note that the
1264 1263                   * thread must be locked in order to change t_schedflags.
1265 1264                   */
1266 1265                  if ((t->t_schedflag & TS_ALLSTART) != TS_ALLSTART)
1267 1266                          return;
1268 1267                  /*
1269 1268                   * Process is no longer stopped (a thread is running).
1270 1269                   */
1271 1270                  t->t_whystop = 0;
1272 1271                  t->t_whatstop = 0;
1273 1272                  /*
1274 1273                   * Strictly speaking, we do not have to clear these
1275 1274                   * flags here; they are cleared on entry to stop().
1276 1275                   * However, they are confusing when doing kernel
1277 1276                   * debugging or when they are revealed by ps(1).
1278 1277                   */
1279 1278                  t->t_schedflag &= ~TS_ALLSTART;
1280 1279                  THREAD_TRANSITION(t);   /* drop stopped-thread lock */
1281 1280                  ASSERT(t->t_lockp == &transition_lock);
1282 1281                  ASSERT(t->t_wchan0 == NULL && t->t_wchan == NULL);
1283 1282                  /*
1284 1283                   * Let the class put the process on the dispatcher queue.
1285 1284                   */
1286 1285                  CL_SETRUN(t);
1287 1286          }
1288 1287  }
1289 1288  
1290 1289  void
1291 1290  setrun(kthread_t *t)
1292 1291  {
1293 1292          thread_lock(t);
1294 1293          setrun_locked(t);
1295 1294          thread_unlock(t);
1296 1295  }
1297 1296  
1298 1297  /*
1299 1298   * Unpin an interrupted thread.
1300 1299   *      When an interrupt occurs, the interrupt is handled on the stack
1301 1300   *      of an interrupt thread, taken from a pool linked to the CPU structure.
1302 1301   *
1303 1302   *      When swtch() is switching away from an interrupt thread because it
1304 1303   *      blocked or was preempted, this routine is called to complete the
1305 1304   *      saving of the interrupted thread state, and returns the interrupted
1306 1305   *      thread pointer so it may be resumed.
1307 1306   *
1308 1307   *      Called by swtch() only at high spl.
1309 1308   */
1310 1309  kthread_t *
1311 1310  thread_unpin()
1312 1311  {
1313 1312          kthread_t       *t = curthread; /* current thread */
1314 1313          kthread_t       *itp;           /* interrupted thread */
1315 1314          int             i;              /* interrupt level */
1316 1315          extern int      intr_passivate();
1317 1316  
1318 1317          ASSERT(t->t_intr != NULL);
1319 1318  
1320 1319          itp = t->t_intr;                /* interrupted thread */
1321 1320          t->t_intr = NULL;               /* clear interrupt ptr */
1322 1321  
1323 1322          /*
1324 1323           * Get state from interrupt thread for the one
1325 1324           * it interrupted.
1326 1325           */
1327 1326  
1328 1327          i = intr_passivate(t, itp);
1329 1328  
1330 1329          TRACE_5(TR_FAC_INTR, TR_INTR_PASSIVATE,
1331 1330              "intr_passivate:level %d curthread %p (%T) ithread %p (%T)",
1332 1331              i, t, t, itp, itp);
1333 1332  
1334 1333          /*
1335 1334           * Dissociate the current thread from the interrupted thread's LWP.
1336 1335           */
1337 1336          t->t_lwp = NULL;
1338 1337  
1339 1338          /*
1340 1339           * Interrupt handlers above the level that spinlocks block must
1341 1340           * not block.
1342 1341           */
1343 1342  #if DEBUG
1344 1343          if (i < 0 || i > LOCK_LEVEL)
1345 1344                  cmn_err(CE_PANIC, "thread_unpin: ipl out of range %x", i);
1346 1345  #endif
1347 1346  
1348 1347          /*
1349 1348           * Compute the CPU's base interrupt level based on the active
1350 1349           * interrupts.
1351 1350           */
1352 1351          ASSERT(CPU->cpu_intr_actv & (1 << i));
1353 1352          set_base_spl();
1354 1353  
1355 1354          return (itp);
1356 1355  }
1357 1356  
1358 1357  /*
1359 1358   * Create and initialize an interrupt thread.
1360 1359   *      Returns non-zero on error.
1361 1360   *      Called at spl7() or better.
1362 1361   */
1363 1362  void
1364 1363  thread_create_intr(struct cpu *cp)
1365 1364  {
1366 1365          kthread_t *tp;
1367 1366  
1368 1367          tp = thread_create(NULL, 0,
1369 1368              (void (*)())thread_create_intr, NULL, 0, &p0, TS_ONPROC, 0);
1370 1369  
1371 1370          /*
1372 1371           * Set the thread in the TS_FREE state.  The state will change
1373 1372           * to TS_ONPROC only while the interrupt is active.  Think of these
1374 1373           * as being on a private free list for the CPU.  Being TS_FREE keeps
1375 1374           * inactive interrupt threads out of debugger thread lists.
1376 1375           *
1377 1376           * We cannot call thread_create with TS_FREE because of the current
1378 1377           * checks there for ONPROC.  Fix this when thread_create takes flags.
1379 1378           */
1380 1379          THREAD_FREEINTR(tp, cp);
1381 1380  
1382 1381          /*
1383 1382           * Nobody should ever reference the credentials of an interrupt
1384 1383           * thread so make it NULL to catch any such references.
1385 1384           */
1386 1385          tp->t_cred = NULL;
1387 1386          tp->t_flag |= T_INTR_THREAD;
1388 1387          tp->t_cpu = cp;
1389 1388          tp->t_bound_cpu = cp;
1390 1389          tp->t_disp_queue = cp->cpu_disp;
1391 1390          tp->t_affinitycnt = 1;
1392 1391          tp->t_preempt = 1;
1393 1392  
1394 1393          /*
1395 1394           * Don't make a user-requested binding on this thread so that
1396 1395           * the processor can be offlined.
1397 1396           */
1398 1397          tp->t_bind_cpu = PBIND_NONE;    /* no USER-requested binding */
1399 1398          tp->t_bind_pset = PS_NONE;
1400 1399  
1401 1400  #if defined(__i386) || defined(__amd64)
1402 1401          tp->t_stk -= STACK_ALIGN;
1403 1402          *(tp->t_stk) = 0;               /* terminate intr thread stack */
1404 1403  #endif
1405 1404  
1406 1405          /*
1407 1406           * Link onto CPU's interrupt pool.
1408 1407           */
1409 1408          tp->t_link = cp->cpu_intr_thread;
1410 1409          cp->cpu_intr_thread = tp;
1411 1410  }
1412 1411  
1413 1412  /*
1414 1413   * TSD -- THREAD SPECIFIC DATA
1415 1414   */
1416 1415  static kmutex_t         tsd_mutex;       /* linked list spin lock */
1417 1416  static uint_t           tsd_nkeys;       /* size of destructor array */
1418 1417  /* per-key destructor funcs */
1419 1418  static void             (**tsd_destructor)(void *);
1420 1419  /* list of tsd_thread's */
1421 1420  static struct tsd_thread        *tsd_list;
1422 1421  
1423 1422  /*
1424 1423   * Default destructor
1425 1424   *      Needed because NULL destructor means that the key is unused
1426 1425   */
1427 1426  /* ARGSUSED */
1428 1427  void
1429 1428  tsd_defaultdestructor(void *value)
1430 1429  {}
1431 1430  
1432 1431  /*
1433 1432   * Create a key (index into per thread array)
1434 1433   *      Locks out tsd_create, tsd_destroy, and tsd_exit
1435 1434   *      May allocate memory with lock held
1436 1435   */
1437 1436  void
1438 1437  tsd_create(uint_t *keyp, void (*destructor)(void *))
1439 1438  {
1440 1439          int     i;
1441 1440          uint_t  nkeys;
1442 1441  
1443 1442          /*
1444 1443           * if key is allocated, do nothing
1445 1444           */
1446 1445          mutex_enter(&tsd_mutex);
1447 1446          if (*keyp) {
1448 1447                  mutex_exit(&tsd_mutex);
1449 1448                  return;
1450 1449          }
1451 1450          /*
1452 1451           * find an unused key
1453 1452           */
1454 1453          if (destructor == NULL)
1455 1454                  destructor = tsd_defaultdestructor;
1456 1455  
1457 1456          for (i = 0; i < tsd_nkeys; ++i)
1458 1457                  if (tsd_destructor[i] == NULL)
1459 1458                          break;
1460 1459  
1461 1460          /*
1462 1461           * if no unused keys, increase the size of the destructor array
1463 1462           */
1464 1463          if (i == tsd_nkeys) {
1465 1464                  if ((nkeys = (tsd_nkeys << 1)) == 0)
1466 1465                          nkeys = 1;
1467 1466                  tsd_destructor =
1468 1467                      (void (**)(void *))tsd_realloc((void *)tsd_destructor,
1469 1468                      (size_t)(tsd_nkeys * sizeof (void (*)(void *))),
1470 1469                      (size_t)(nkeys * sizeof (void (*)(void *))));
1471 1470                  tsd_nkeys = nkeys;
1472 1471          }
1473 1472  
1474 1473          /*
1475 1474           * allocate the next available unused key
1476 1475           */
1477 1476          tsd_destructor[i] = destructor;
1478 1477          *keyp = i + 1;
1479 1478          mutex_exit(&tsd_mutex);
1480 1479  }
1481 1480  
1482 1481  /*
1483 1482   * Destroy a key -- this is for unloadable modules
1484 1483   *
1485 1484   * Assumes that the caller is preventing tsd_set and tsd_get
1486 1485   * Locks out tsd_create, tsd_destroy, and tsd_exit
1487 1486   * May free memory with lock held
1488 1487   */
1489 1488  void
1490 1489  tsd_destroy(uint_t *keyp)
1491 1490  {
1492 1491          uint_t key;
1493 1492          struct tsd_thread *tsd;
1494 1493  
1495 1494          /*
1496 1495           * protect the key namespace and our destructor lists
1497 1496           */
1498 1497          mutex_enter(&tsd_mutex);
1499 1498          key = *keyp;
1500 1499          *keyp = 0;
1501 1500  
1502 1501          ASSERT(key <= tsd_nkeys);
1503 1502  
1504 1503          /*
1505 1504           * if the key is valid
1506 1505           */
1507 1506          if (key != 0) {
1508 1507                  uint_t k = key - 1;
1509 1508                  /*
1510 1509                   * for every thread with TSD, call key's destructor
1511 1510                   */
1512 1511                  for (tsd = tsd_list; tsd; tsd = tsd->ts_next) {
1513 1512                          /*
1514 1513                           * no TSD for key in this thread
1515 1514                           */
1516 1515                          if (key > tsd->ts_nkeys)
1517 1516                                  continue;
1518 1517                          /*
1519 1518                           * call destructor for key
1520 1519                           */
1521 1520                          if (tsd->ts_value[k] && tsd_destructor[k])
1522 1521                                  (*tsd_destructor[k])(tsd->ts_value[k]);
1523 1522                          /*
1524 1523                           * reset value for key
1525 1524                           */
1526 1525                          tsd->ts_value[k] = NULL;
1527 1526                  }
1528 1527                  /*
1529 1528                   * actually free the key (NULL destructor == unused)
1530 1529                   */
1531 1530                  tsd_destructor[k] = NULL;
1532 1531          }
1533 1532  
1534 1533          mutex_exit(&tsd_mutex);
1535 1534  }
1536 1535  
1537 1536  /*
1538 1537   * Quickly return the per thread value that was stored with the specified key
1539 1538   * Assumes the caller is protecting key from tsd_create and tsd_destroy
1540 1539   */
1541 1540  void *
1542 1541  tsd_get(uint_t key)
1543 1542  {
1544 1543          return (tsd_agent_get(curthread, key));
1545 1544  }
1546 1545  
1547 1546  /*
1548 1547   * Set a per thread value indexed with the specified key
1549 1548   */
1550 1549  int
1551 1550  tsd_set(uint_t key, void *value)
1552 1551  {
1553 1552          return (tsd_agent_set(curthread, key, value));
1554 1553  }
1555 1554  
1556 1555  /*
1557 1556   * Like tsd_get(), except that the agent lwp can get the tsd of
1558 1557   * another thread in the same process (the agent thread only runs when the
1559 1558   * process is completely stopped by /proc), or syslwp is creating a new lwp.
1560 1559   */
1561 1560  void *
1562 1561  tsd_agent_get(kthread_t *t, uint_t key)
1563 1562  {
1564 1563          struct tsd_thread *tsd = t->t_tsd;
1565 1564  
1566 1565          ASSERT(t == curthread ||
1567 1566              ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
1568 1567  
1569 1568          if (key && tsd != NULL && key <= tsd->ts_nkeys)
1570 1569                  return (tsd->ts_value[key - 1]);
1571 1570          return (NULL);
1572 1571  }
1573 1572  
1574 1573  /*
1575 1574   * Like tsd_set(), except that the agent lwp can set the tsd of
1576 1575   * another thread in the same process, or syslwp can set the tsd
1577 1576   * of a thread it's in the middle of creating.
1578 1577   *
1579 1578   * Assumes the caller is protecting key from tsd_create and tsd_destroy
1580 1579   * May lock out tsd_destroy (and tsd_create), may allocate memory with
1581 1580   * lock held
1582 1581   */
1583 1582  int
1584 1583  tsd_agent_set(kthread_t *t, uint_t key, void *value)
1585 1584  {
1586 1585          struct tsd_thread *tsd = t->t_tsd;
1587 1586  
1588 1587          ASSERT(t == curthread ||
1589 1588              ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
1590 1589  
1591 1590          if (key == 0)
1592 1591                  return (EINVAL);
1593 1592          if (tsd == NULL)
1594 1593                  tsd = t->t_tsd = kmem_zalloc(sizeof (*tsd), KM_SLEEP);
1595 1594          if (key <= tsd->ts_nkeys) {
1596 1595                  tsd->ts_value[key - 1] = value;
1597 1596                  return (0);
1598 1597          }
1599 1598  
1600 1599          ASSERT(key <= tsd_nkeys);
1601 1600  
1602 1601          /*
1603 1602           * lock out tsd_destroy()
1604 1603           */
1605 1604          mutex_enter(&tsd_mutex);
1606 1605          if (tsd->ts_nkeys == 0) {
1607 1606                  /*
1608 1607                   * Link onto list of threads with TSD
1609 1608                   */
1610 1609                  if ((tsd->ts_next = tsd_list) != NULL)
1611 1610                          tsd_list->ts_prev = tsd;
1612 1611                  tsd_list = tsd;
1613 1612          }
1614 1613  
1615 1614          /*
1616 1615           * Allocate thread local storage and set the value for key
1617 1616           */
1618 1617          tsd->ts_value = tsd_realloc(tsd->ts_value,
1619 1618              tsd->ts_nkeys * sizeof (void *),
1620 1619              key * sizeof (void *));
1621 1620          tsd->ts_nkeys = key;
1622 1621          tsd->ts_value[key - 1] = value;
1623 1622          mutex_exit(&tsd_mutex);
1624 1623  
1625 1624          return (0);
1626 1625  }
1627 1626  
1628 1627  
1629 1628  /*
1630 1629   * Return the per thread value that was stored with the specified key
1631 1630   *      If necessary, create the key and the value
1632 1631   *      Assumes the caller is protecting *keyp from tsd_destroy
1633 1632   */
1634 1633  void *
1635 1634  tsd_getcreate(uint_t *keyp, void (*destroy)(void *), void *(*allocate)(void))
1636 1635  {
1637 1636          void *value;
1638 1637          uint_t key = *keyp;
1639 1638          struct tsd_thread *tsd = curthread->t_tsd;
1640 1639  
1641 1640          if (tsd == NULL)
1642 1641                  tsd = curthread->t_tsd = kmem_zalloc(sizeof (*tsd), KM_SLEEP);
1643 1642          if (key && key <= tsd->ts_nkeys && (value = tsd->ts_value[key - 1]))
1644 1643                  return (value);
1645 1644          if (key == 0)
1646 1645                  tsd_create(keyp, destroy);
1647 1646          (void) tsd_set(*keyp, value = (*allocate)());
1648 1647  
1649 1648          return (value);
1650 1649  }
1651 1650  
1652 1651  /*
1653 1652   * Called from thread_exit() to run the destructor function for each tsd
1654 1653   *      Locks out tsd_create and tsd_destroy
1655 1654   *      Assumes that the destructor *DOES NOT* use tsd
1656 1655   */
1657 1656  void
1658 1657  tsd_exit(void)
1659 1658  {
1660 1659          int i;
1661 1660          struct tsd_thread *tsd = curthread->t_tsd;
1662 1661  
1663 1662          if (tsd == NULL)
1664 1663                  return;
1665 1664  
1666 1665          if (tsd->ts_nkeys == 0) {
1667 1666                  kmem_free(tsd, sizeof (*tsd));
1668 1667                  curthread->t_tsd = NULL;
1669 1668                  return;
1670 1669          }
1671 1670  
1672 1671          /*
1673 1672           * lock out tsd_create and tsd_destroy, call
1674 1673           * the destructor, and mark the value as destroyed.
1675 1674           */
1676 1675          mutex_enter(&tsd_mutex);
1677 1676  
1678 1677          for (i = 0; i < tsd->ts_nkeys; i++) {
1679 1678                  if (tsd->ts_value[i] && tsd_destructor[i])
1680 1679                          (*tsd_destructor[i])(tsd->ts_value[i]);
1681 1680                  tsd->ts_value[i] = NULL;
1682 1681          }
1683 1682  
1684 1683          /*
1685 1684           * remove from linked list of threads with TSD
1686 1685           */
1687 1686          if (tsd->ts_next)
1688 1687                  tsd->ts_next->ts_prev = tsd->ts_prev;
1689 1688          if (tsd->ts_prev)
1690 1689                  tsd->ts_prev->ts_next = tsd->ts_next;
1691 1690          if (tsd_list == tsd)
1692 1691                  tsd_list = tsd->ts_next;
1693 1692  
1694 1693          mutex_exit(&tsd_mutex);
1695 1694  
1696 1695          /*
1697 1696           * free up the TSD
1698 1697           */
1699 1698          kmem_free(tsd->ts_value, tsd->ts_nkeys * sizeof (void *));
1700 1699          kmem_free(tsd, sizeof (struct tsd_thread));
1701 1700          curthread->t_tsd = NULL;
1702 1701  }
1703 1702  
1704 1703  /*
1705 1704   * realloc
1706 1705   */
1707 1706  static void *
1708 1707  tsd_realloc(void *old, size_t osize, size_t nsize)
1709 1708  {
1710 1709          void *new;
1711 1710  
1712 1711          new = kmem_zalloc(nsize, KM_SLEEP);
1713 1712          if (old) {
1714 1713                  bcopy(old, new, osize);
1715 1714                  kmem_free(old, osize);
1716 1715          }
1717 1716          return (new);
1718 1717  }
1719 1718  
1720 1719  /*
1721 1720   * Return non-zero if an interrupt is being serviced.
1722 1721   */
1723 1722  int
1724 1723  servicing_interrupt()
1725 1724  {
1726 1725          int onintr = 0;
1727 1726  
1728 1727          /* Are we an interrupt thread */
1729 1728          if (curthread->t_flag & T_INTR_THREAD)
1730 1729                  return (1);
1731 1730          /* Are we servicing a high level interrupt? */
1732 1731          if (CPU_ON_INTR(CPU)) {
1733 1732                  kpreempt_disable();
1734 1733                  onintr = CPU_ON_INTR(CPU);
1735 1734                  kpreempt_enable();
1736 1735          }
1737 1736          return (onintr);
1738 1737  }
1739 1738  
1740 1739  
1741 1740  /*
1742 1741   * Change the dispatch priority of a thread in the system.
1743 1742   * Used when raising or lowering a thread's priority.
1744 1743   * (E.g., priority inheritance)
1745 1744   *
1746 1745   * Since threads are queued according to their priority, we
1747 1746   * we must check the thread's state to determine whether it
1748 1747   * is on a queue somewhere. If it is, we've got to:
1749 1748   *
1750 1749   *      o Dequeue the thread.
1751 1750   *      o Change its effective priority.
1752 1751   *      o Enqueue the thread.
1753 1752   *
1754 1753   * Assumptions: The thread whose priority we wish to change
1755 1754   * must be locked before we call thread_change_(e)pri().
1756 1755   * The thread_change(e)pri() function doesn't drop the thread
1757 1756   * lock--that must be done by its caller.
1758 1757   */
1759 1758  void
1760 1759  thread_change_epri(kthread_t *t, pri_t disp_pri)
1761 1760  {
1762 1761          uint_t  state;
1763 1762  
1764 1763          ASSERT(THREAD_LOCK_HELD(t));
1765 1764  
1766 1765          /*
1767 1766           * If the inherited priority hasn't actually changed,
1768 1767           * just return.
1769 1768           */
1770 1769          if (t->t_epri == disp_pri)
1771 1770                  return;
1772 1771  
1773 1772          state = t->t_state;
1774 1773  
1775 1774          /*
1776 1775           * If it's not on a queue, change the priority with impunity.
1777 1776           */
1778 1777          if ((state & (TS_SLEEP | TS_RUN | TS_WAIT)) == 0) {
1779 1778                  t->t_epri = disp_pri;
1780 1779                  if (state == TS_ONPROC) {
1781 1780                          cpu_t *cp = t->t_disp_queue->disp_cpu;
1782 1781  
1783 1782                          if (t == cp->cpu_dispthread)
1784 1783                                  cp->cpu_dispatch_pri = DISP_PRIO(t);
1785 1784                  }
1786 1785          } else if (state == TS_SLEEP) {
1787 1786                  /*
1788 1787                   * Take the thread out of its sleep queue.
1789 1788                   * Change the inherited priority.
1790 1789                   * Re-enqueue the thread.
1791 1790                   * Each synchronization object exports a function
1792 1791                   * to do this in an appropriate manner.
1793 1792                   */
1794 1793                  SOBJ_CHANGE_EPRI(t->t_sobj_ops, t, disp_pri);
1795 1794          } else if (state == TS_WAIT) {
1796 1795                  /*
1797 1796                   * Re-enqueue a thread on the wait queue if its
1798 1797                   * effective priority needs to change.
1799 1798                   */
1800 1799                  if (disp_pri != t->t_epri)
1801 1800                          waitq_change_pri(t, disp_pri);
1802 1801          } else {
1803 1802                  /*
1804 1803                   * The thread is on a run queue.
1805 1804                   * Note: setbackdq() may not put the thread
1806 1805                   * back on the same run queue where it originally
1807 1806                   * resided.
1808 1807                   */
1809 1808                  (void) dispdeq(t);
1810 1809                  t->t_epri = disp_pri;
1811 1810                  setbackdq(t);
1812 1811          }
1813 1812          schedctl_set_cidpri(t);
1814 1813  }
1815 1814  
1816 1815  /*
1817 1816   * Function: Change the t_pri field of a thread.
1818 1817   * Side Effects: Adjust the thread ordering on a run queue
1819 1818   *               or sleep queue, if necessary.
1820 1819   * Returns: 1 if the thread was on a run queue, else 0.
1821 1820   */
1822 1821  int
1823 1822  thread_change_pri(kthread_t *t, pri_t disp_pri, int front)
1824 1823  {
1825 1824          uint_t  state;
1826 1825          int     on_rq = 0;
1827 1826  
1828 1827          ASSERT(THREAD_LOCK_HELD(t));
1829 1828  
1830 1829          state = t->t_state;
1831 1830          THREAD_WILLCHANGE_PRI(t, disp_pri);
1832 1831  
1833 1832          /*
1834 1833           * If it's not on a queue, change the priority with impunity.
1835 1834           */
1836 1835          if ((state & (TS_SLEEP | TS_RUN | TS_WAIT)) == 0) {
1837 1836                  t->t_pri = disp_pri;
1838 1837  
1839 1838                  if (state == TS_ONPROC) {
1840 1839                          cpu_t *cp = t->t_disp_queue->disp_cpu;
1841 1840  
1842 1841                          if (t == cp->cpu_dispthread)
1843 1842                                  cp->cpu_dispatch_pri = DISP_PRIO(t);
1844 1843                  }
1845 1844          } else if (state == TS_SLEEP) {
1846 1845                  /*
1847 1846                   * If the priority has changed, take the thread out of
1848 1847                   * its sleep queue and change the priority.
1849 1848                   * Re-enqueue the thread.
1850 1849                   * Each synchronization object exports a function
1851 1850                   * to do this in an appropriate manner.
1852 1851                   */
1853 1852                  if (disp_pri != t->t_pri)
1854 1853                          SOBJ_CHANGE_PRI(t->t_sobj_ops, t, disp_pri);
1855 1854          } else if (state == TS_WAIT) {
1856 1855                  /*
1857 1856                   * Re-enqueue a thread on the wait queue if its
1858 1857                   * priority needs to change.
1859 1858                   */
1860 1859                  if (disp_pri != t->t_pri)
1861 1860                          waitq_change_pri(t, disp_pri);
1862 1861          } else {
1863 1862                  /*
1864 1863                   * The thread is on a run queue.
1865 1864                   * Note: setbackdq() may not put the thread
1866 1865                   * back on the same run queue where it originally
1867 1866                   * resided.
1868 1867                   *
1869 1868                   * We still requeue the thread even if the priority
1870 1869                   * is unchanged to preserve round-robin (and other)
1871 1870                   * effects between threads of the same priority.
1872 1871                   */
1873 1872                  on_rq = dispdeq(t);
1874 1873                  ASSERT(on_rq);
1875 1874                  t->t_pri = disp_pri;
1876 1875                  if (front) {
1877 1876                          setfrontdq(t);
1878 1877                  } else {
1879 1878                          setbackdq(t);
1880 1879                  }
1881 1880          }
1882 1881          schedctl_set_cidpri(t);
1883 1882          return (on_rq);
1884 1883  }
1885 1884  
1886 1885  /*
1887 1886   * Tunable kmem_stackinfo is set, fill the kernel thread stack with a
1888 1887   * specific pattern.
1889 1888   */
1890 1889  static void
1891 1890  stkinfo_begin(kthread_t *t)
1892 1891  {
1893 1892          caddr_t start;  /* stack start */
1894 1893          caddr_t end;    /* stack end  */
1895 1894          uint64_t *ptr;  /* pattern pointer */
1896 1895  
1897 1896          /*
1898 1897           * Stack grows up or down, see thread_create(),
1899 1898           * compute stack memory area start and end (start < end).
1900 1899           */
1901 1900          if (t->t_stk > t->t_stkbase) {
1902 1901                  /* stack grows down */
1903 1902                  start = t->t_stkbase;
1904 1903                  end = t->t_stk;
1905 1904          } else {
1906 1905                  /* stack grows up */
1907 1906                  start = t->t_stk;
1908 1907                  end = t->t_stkbase;
1909 1908          }
1910 1909  
1911 1910          /*
1912 1911           * Stackinfo pattern size is 8 bytes. Ensure proper 8 bytes
1913 1912           * alignement for start and end in stack area boundaries
1914 1913           * (protection against corrupt t_stkbase/t_stk data).
1915 1914           */
1916 1915          if ((((uintptr_t)start) & 0x7) != 0) {
1917 1916                  start = (caddr_t)((((uintptr_t)start) & (~0x7)) + 8);
1918 1917          }
1919 1918          end = (caddr_t)(((uintptr_t)end) & (~0x7));
1920 1919  
1921 1920          if ((end <= start) || (end - start) > (1024 * 1024)) {
1922 1921                  /* negative or stack size > 1 meg, assume bogus */
1923 1922                  return;
1924 1923          }
1925 1924  
1926 1925          /* fill stack area with a pattern (instead of zeros) */
1927 1926          ptr = (uint64_t *)((void *)start);
1928 1927          while (ptr < (uint64_t *)((void *)end)) {
1929 1928                  *ptr++ = KMEM_STKINFO_PATTERN;
1930 1929          }
1931 1930  }
1932 1931  
1933 1932  
1934 1933  /*
1935 1934   * Tunable kmem_stackinfo is set, create stackinfo log if doesn't already exist,
1936 1935   * compute the percentage of kernel stack really used, and set in the log
1937 1936   * if it's the latest highest percentage.
1938 1937   */
1939 1938  static void
1940 1939  stkinfo_end(kthread_t *t)
1941 1940  {
1942 1941          caddr_t start;  /* stack start */
1943 1942          caddr_t end;    /* stack end  */
1944 1943          uint64_t *ptr;  /* pattern pointer */
1945 1944          size_t stksz;   /* stack size */
1946 1945          size_t smallest = 0;
1947 1946          size_t percent = 0;
1948 1947          uint_t index = 0;
1949 1948          uint_t i;
1950 1949          static size_t smallest_percent = (size_t)-1;
1951 1950          static uint_t full = 0;
1952 1951  
1953 1952          /* create the stackinfo log, if doesn't already exist */
1954 1953          mutex_enter(&kmem_stkinfo_lock);
1955 1954          if (kmem_stkinfo_log == NULL) {
1956 1955                  kmem_stkinfo_log = (kmem_stkinfo_t *)
1957 1956                      kmem_zalloc(KMEM_STKINFO_LOG_SIZE *
1958 1957                      (sizeof (kmem_stkinfo_t)), KM_NOSLEEP);
1959 1958                  if (kmem_stkinfo_log == NULL) {
1960 1959                          mutex_exit(&kmem_stkinfo_lock);
1961 1960                          return;
1962 1961                  }
1963 1962          }
1964 1963          mutex_exit(&kmem_stkinfo_lock);
1965 1964  
1966 1965          /*
1967 1966           * Stack grows up or down, see thread_create(),
1968 1967           * compute stack memory area start and end (start < end).
1969 1968           */
1970 1969          if (t->t_stk > t->t_stkbase) {
1971 1970                  /* stack grows down */
1972 1971                  start = t->t_stkbase;
1973 1972                  end = t->t_stk;
1974 1973          } else {
1975 1974                  /* stack grows up */
1976 1975                  start = t->t_stk;
1977 1976                  end = t->t_stkbase;
1978 1977          }
1979 1978  
1980 1979          /* stack size as found in kthread_t */
1981 1980          stksz = end - start;
1982 1981  
1983 1982          /*
1984 1983           * Stackinfo pattern size is 8 bytes. Ensure proper 8 bytes
1985 1984           * alignement for start and end in stack area boundaries
1986 1985           * (protection against corrupt t_stkbase/t_stk data).
1987 1986           */
1988 1987          if ((((uintptr_t)start) & 0x7) != 0) {
1989 1988                  start = (caddr_t)((((uintptr_t)start) & (~0x7)) + 8);
1990 1989          }
1991 1990          end = (caddr_t)(((uintptr_t)end) & (~0x7));
1992 1991  
1993 1992          if ((end <= start) || (end - start) > (1024 * 1024)) {
1994 1993                  /* negative or stack size > 1 meg, assume bogus */
1995 1994                  return;
1996 1995          }
1997 1996  
1998 1997          /* search until no pattern in the stack */
1999 1998          if (t->t_stk > t->t_stkbase) {
2000 1999                  /* stack grows down */
2001 2000  #if defined(__i386) || defined(__amd64)
2002 2001                  /*
2003 2002                   * 6 longs are pushed on stack, see thread_load(). Skip
2004 2003                   * them, so if kthread has never run, percent is zero.
2005 2004                   * 8 bytes alignement is preserved for a 32 bit kernel,
2006 2005                   * 6 x 4 = 24, 24 is a multiple of 8.
2007 2006                   *
2008 2007                   */
2009 2008                  end -= (6 * sizeof (long));
2010 2009  #endif
2011 2010                  ptr = (uint64_t *)((void *)start);
2012 2011                  while (ptr < (uint64_t *)((void *)end)) {
2013 2012                          if (*ptr != KMEM_STKINFO_PATTERN) {
2014 2013                                  percent = stkinfo_percent(end,
2015 2014                                      start, (caddr_t)ptr);
2016 2015                                  break;
2017 2016                          }
2018 2017                          ptr++;
2019 2018                  }
2020 2019          } else {
2021 2020                  /* stack grows up */
2022 2021                  ptr = (uint64_t *)((void *)end);
2023 2022                  ptr--;
2024 2023                  while (ptr >= (uint64_t *)((void *)start)) {
2025 2024                          if (*ptr != KMEM_STKINFO_PATTERN) {
2026 2025                                  percent = stkinfo_percent(start,
2027 2026                                      end, (caddr_t)ptr);
2028 2027                                  break;
2029 2028                          }
2030 2029                          ptr--;
2031 2030                  }
2032 2031          }
2033 2032  
2034 2033          DTRACE_PROBE3(stack__usage, kthread_t *, t,
2035 2034              size_t, stksz, size_t, percent);
2036 2035  
2037 2036          if (percent == 0) {
2038 2037                  return;
2039 2038          }
2040 2039  
2041 2040          mutex_enter(&kmem_stkinfo_lock);
2042 2041          if (full == KMEM_STKINFO_LOG_SIZE && percent < smallest_percent) {
2043 2042                  /*
2044 2043                   * The log is full and already contains the highest values
2045 2044                   */
2046 2045                  mutex_exit(&kmem_stkinfo_lock);
2047 2046                  return;
2048 2047          }
2049 2048  
2050 2049          /* keep a log of the highest used stack */
2051 2050          for (i = 0; i < KMEM_STKINFO_LOG_SIZE; i++) {
2052 2051                  if (kmem_stkinfo_log[i].percent == 0) {
2053 2052                          index = i;
2054 2053                          full++;
2055 2054                          break;
2056 2055                  }
2057 2056                  if (smallest == 0) {
2058 2057                          smallest = kmem_stkinfo_log[i].percent;
2059 2058                          index = i;
2060 2059                          continue;
2061 2060                  }
2062 2061                  if (kmem_stkinfo_log[i].percent < smallest) {
2063 2062                          smallest = kmem_stkinfo_log[i].percent;
2064 2063                          index = i;
2065 2064                  }
2066 2065          }
2067 2066  
2068 2067          if (percent >= kmem_stkinfo_log[index].percent) {
2069 2068                  kmem_stkinfo_log[index].kthread = (caddr_t)t;
2070 2069                  kmem_stkinfo_log[index].t_startpc = (caddr_t)t->t_startpc;
2071 2070                  kmem_stkinfo_log[index].start = start;
2072 2071                  kmem_stkinfo_log[index].stksz = stksz;
2073 2072                  kmem_stkinfo_log[index].percent = percent;
2074 2073                  kmem_stkinfo_log[index].t_tid = t->t_tid;
2075 2074                  kmem_stkinfo_log[index].cmd[0] = '\0';
2076 2075                  if (t->t_tid != 0) {
2077 2076                          stksz = strlen((t->t_procp)->p_user.u_comm);
2078 2077                          if (stksz >= KMEM_STKINFO_STR_SIZE) {
2079 2078                                  stksz = KMEM_STKINFO_STR_SIZE - 1;
2080 2079                                  kmem_stkinfo_log[index].cmd[stksz] = '\0';
2081 2080                          } else {
2082 2081                                  stksz += 1;
2083 2082                          }
2084 2083                          (void) memcpy(kmem_stkinfo_log[index].cmd,
2085 2084                              (t->t_procp)->p_user.u_comm, stksz);
2086 2085                  }
2087 2086                  if (percent < smallest_percent) {
2088 2087                          smallest_percent = percent;
2089 2088                  }
2090 2089          }
2091 2090          mutex_exit(&kmem_stkinfo_lock);
2092 2091  }
2093 2092  
2094 2093  /*
2095 2094   * Tunable kmem_stackinfo is set, compute stack utilization percentage.
2096 2095   */
2097 2096  static size_t
2098 2097  stkinfo_percent(caddr_t t_stk, caddr_t t_stkbase, caddr_t sp)
2099 2098  {
2100 2099          size_t percent;
2101 2100          size_t s;
2102 2101  
2103 2102          if (t_stk > t_stkbase) {
2104 2103                  /* stack grows down */
2105 2104                  if (sp > t_stk) {
2106 2105                          return (0);
2107 2106                  }
2108 2107                  if (sp < t_stkbase) {
2109 2108                          return (100);
2110 2109                  }
2111 2110                  percent = t_stk - sp + 1;
2112 2111                  s = t_stk - t_stkbase + 1;
2113 2112          } else {
2114 2113                  /* stack grows up */
2115 2114                  if (sp < t_stk) {
2116 2115                          return (0);
2117 2116                  }
2118 2117                  if (sp > t_stkbase) {
2119 2118                          return (100);
2120 2119                  }
2121 2120                  percent = sp - t_stk + 1;
2122 2121                  s = t_stkbase - t_stk + 1;
2123 2122          }
2124 2123          percent = ((100 * percent) / s) + 1;
2125 2124          if (percent > 100) {
2126 2125                  percent = 100;
2127 2126          }
2128 2127          return (percent);
2129 2128  }

↓ open down ↓

1686 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX