combined Wdiff usr/src/uts/common/os/timers.c

Print this page

remove whole-process swapping
Long before Unix supported paging, it used process swapping to reclaim
memory.  The code is there and in theory it runs when we get *extremely* low
on memory.  In practice, it never runs since the definition of low-on-memory
is antiquated. (XXX: define what antiquated means)
You can check the number of swapout/swapin events with kstats:
$ kstat -p ::vm:swapin ::vm:swapout

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/os/timers.c
          +++ new/usr/src/uts/common/os/timers.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   */
  26   26  
  27   27  /*
  28   28   * Copyright (c) 1982, 1986 Regents of the University of California.
  29   29   * All rights reserved.  The Berkeley software License Agreement
  30   30   * specifies the terms and conditions for redistribution.
  31   31   */
  32   32  
  33   33  #include <sys/param.h>
  34   34  #include <sys/user.h>
  35   35  #include <sys/vnode.h>
  36   36  #include <sys/proc.h>
  37   37  #include <sys/time.h>
  38   38  #include <sys/systm.h>
  39   39  #include <sys/kmem.h>
  40   40  #include <sys/cmn_err.h>
  41   41  #include <sys/cpuvar.h>
  42   42  #include <sys/timer.h>
  43   43  #include <sys/debug.h>
  44   44  #include <sys/sysmacros.h>
  45   45  #include <sys/cyclic.h>
  46   46  
  47   47  static void     realitexpire(void *);
  48   48  static void     realprofexpire(void *);
  49   49  static void     timeval_advance(struct timeval *, struct timeval *);
  50   50  
  51   51  kmutex_t tod_lock;      /* protects time-of-day stuff */
  52   52  
  53   53  /*
  54   54   * Constant to define the minimum interval value of the ITIMER_REALPROF timer.
  55   55   * Value is in microseconds; defaults to 500 usecs.  Setting this value
  56   56   * significantly lower may allow for denial-of-service attacks.
  57   57   */
  58   58  int itimer_realprof_minimum = 500;
  59   59  
  60   60  /*
  61   61   * macro to compare a timeval to a timestruc
  62   62   */
  63   63  
  64   64  #define TVTSCMP(tvp, tsp, cmp) \
  65   65          /* CSTYLED */ \
  66   66          ((tvp)->tv_sec cmp (tsp)->tv_sec || \
  67   67          ((tvp)->tv_sec == (tsp)->tv_sec && \
  68   68          /* CSTYLED */ \
  69   69          (tvp)->tv_usec * 1000 cmp (tsp)->tv_nsec))
  70   70  
  71   71  /*
  72   72   * Time of day and interval timer support.
  73   73   *
  74   74   * These routines provide the kernel entry points to get and set
  75   75   * the time-of-day and per-process interval timers.  Subroutines
  76   76   * here provide support for adding and subtracting timeval structures
  77   77   * and decrementing interval timers, optionally reloading the interval
  78   78   * timers when they expire.
  79   79   */
  80   80  
  81   81  /*
  82   82   * SunOS function to generate monotonically increasing time values.
  83   83   */
  84   84  void
  85   85  uniqtime(struct timeval *tv)
  86   86  {
  87   87          static struct timeval last;
  88   88          static int last_timechanged;
  89   89          timestruc_t ts;
  90   90          time_t sec;
  91   91          int usec, nsec;
  92   92  
  93   93          /*
  94   94           * protect modification of last
  95   95           */
  96   96          mutex_enter(&tod_lock);
  97   97          gethrestime(&ts);
  98   98  
  99   99          /*
 100  100           * Fast algorithm to convert nsec to usec -- see hrt2ts()
 101  101           * in common/os/timers.c for a full description.
 102  102           */
 103  103          nsec = ts.tv_nsec;
 104  104          usec = nsec + (nsec >> 2);
 105  105          usec = nsec + (usec >> 1);
 106  106          usec = nsec + (usec >> 2);
 107  107          usec = nsec + (usec >> 4);
 108  108          usec = nsec - (usec >> 3);
 109  109          usec = nsec + (usec >> 2);
 110  110          usec = nsec + (usec >> 3);
 111  111          usec = nsec + (usec >> 4);
 112  112          usec = nsec + (usec >> 1);
 113  113          usec = nsec + (usec >> 6);
 114  114          usec = usec >> 10;
 115  115          sec = ts.tv_sec;
 116  116  
 117  117          /*
 118  118           * If the system hres time has been changed since the last time
 119  119           * we are called. then all bets are off; just update our
 120  120           * local copy of timechanged and accept the reported time as is.
 121  121           */
 122  122          if (last_timechanged != timechanged) {
 123  123                  last_timechanged = timechanged;
 124  124          }
 125  125          /*
 126  126           * Try to keep timestamps unique, but don't be obsessive about
 127  127           * it in the face of large differences.
 128  128           */
 129  129          else if ((sec <= last.tv_sec) &&        /* same or lower seconds, and */
 130  130              ((sec != last.tv_sec) ||            /* either different second or */
 131  131              (usec <= last.tv_usec)) &&          /* lower microsecond, and */
 132  132              ((last.tv_sec - sec) <= 5)) {       /* not way back in time */
 133  133                  sec = last.tv_sec;
 134  134                  usec = last.tv_usec + 1;
 135  135                  if (usec >= MICROSEC) {
 136  136                          usec -= MICROSEC;
 137  137                          sec++;
 138  138                  }
 139  139          }
 140  140          last.tv_sec = sec;
 141  141          last.tv_usec = usec;
 142  142          mutex_exit(&tod_lock);
 143  143  
 144  144          tv->tv_sec = sec;
 145  145          tv->tv_usec = usec;
 146  146  }
 147  147  
 148  148  /*
 149  149   * Timestamps are exported from the kernel in several places.
 150  150   * Such timestamps are commonly used for either uniqueness or for
 151  151   * sequencing - truncation to 32-bits is fine for uniqueness,
 152  152   * but sequencing is going to take more work as we get closer to 2038!
 153  153   */
 154  154  void
 155  155  uniqtime32(struct timeval32 *tv32p)
 156  156  {
 157  157          struct timeval tv;
 158  158  
 159  159          uniqtime(&tv);
 160  160          TIMEVAL_TO_TIMEVAL32(tv32p, &tv);
 161  161  }
 162  162  
 163  163  int
 164  164  gettimeofday(struct timeval *tp)
 165  165  {
 166  166          struct timeval atv;
 167  167  
 168  168          if (tp) {
 169  169                  uniqtime(&atv);
 170  170                  if (get_udatamodel() == DATAMODEL_NATIVE) {
 171  171                          if (copyout(&atv, tp, sizeof (atv)))
 172  172                                  return (set_errno(EFAULT));
 173  173                  } else {
 174  174                          struct timeval32 tv32;
 175  175  
 176  176                          if (TIMEVAL_OVERFLOW(&atv))
 177  177                                  return (set_errno(EOVERFLOW));
 178  178                          TIMEVAL_TO_TIMEVAL32(&tv32, &atv);
 179  179  
 180  180                          if (copyout(&tv32, tp, sizeof (tv32)))
 181  181                                  return (set_errno(EFAULT));
 182  182                  }
 183  183          }
 184  184          return (0);
 185  185  }
 186  186  
 187  187  int
 188  188  getitimer(uint_t which, struct itimerval *itv)
 189  189  {
 190  190          int error;
 191  191  
 192  192          if (get_udatamodel() == DATAMODEL_NATIVE)
 193  193                  error = xgetitimer(which, itv, 0);
 194  194          else {
 195  195                  struct itimerval kitv;
 196  196  
 197  197                  if ((error = xgetitimer(which, &kitv, 1)) == 0) {
 198  198                          if (ITIMERVAL_OVERFLOW(&kitv)) {
 199  199                                  error = EOVERFLOW;
 200  200                          } else {
 201  201                                  struct itimerval32 itv32;
 202  202  
 203  203                                  ITIMERVAL_TO_ITIMERVAL32(&itv32, &kitv);
 204  204                                  if (copyout(&itv32, itv, sizeof (itv32)) != 0)
 205  205                                          error = EFAULT;
 206  206                          }
 207  207                  }
 208  208          }
 209  209  
 210  210          return (error ? (set_errno(error)) : 0);
 211  211  }
 212  212  
 213  213  int
 214  214  xgetitimer(uint_t which, struct itimerval *itv, int iskaddr)
 215  215  {
 216  216          struct proc *p = curproc;
 217  217          struct timeval now;
 218  218          struct itimerval aitv;
 219  219          hrtime_t ts, first, interval, remain;
 220  220  
 221  221          mutex_enter(&p->p_lock);
 222  222  
 223  223          switch (which) {
 224  224          case ITIMER_VIRTUAL:
 225  225          case ITIMER_PROF:
 226  226                  aitv = ttolwp(curthread)->lwp_timer[which];
 227  227                  break;
 228  228  
 229  229          case ITIMER_REAL:
 230  230                  uniqtime(&now);
 231  231                  aitv = p->p_realitimer;
 232  232  
 233  233                  if (timerisset(&aitv.it_value)) {
 234  234                          /*CSTYLED*/
 235  235                          if (timercmp(&aitv.it_value, &now, <)) {
 236  236                                  timerclear(&aitv.it_value);
 237  237                          } else {
 238  238                                  timevalsub(&aitv.it_value, &now);
 239  239                          }
 240  240                  }
 241  241                  break;
 242  242  
 243  243          case ITIMER_REALPROF:
 244  244                  if (curproc->p_rprof_cyclic == CYCLIC_NONE) {
 245  245                          bzero(&aitv, sizeof (aitv));
 246  246                          break;
 247  247                  }
 248  248  
 249  249                  aitv = curproc->p_rprof_timer;
 250  250  
 251  251                  first = tv2hrt(&aitv.it_value);
 252  252                  interval = tv2hrt(&aitv.it_interval);
 253  253  
 254  254                  if ((ts = gethrtime()) < first) {
 255  255                          /*
 256  256                           * We haven't gone off for the first time; the time
 257  257                           * remaining is simply the first time we will go
 258  258                           * off minus the current time.
 259  259                           */
 260  260                          remain = first - ts;
 261  261                  } else {
 262  262                          if (interval == 0) {
 263  263                                  /*
 264  264                                   * This was set as a one-shot, and we've
 265  265                                   * already gone off; there is no time
 266  266                                   * remaining.
 267  267                                   */
 268  268                                  remain = 0;
 269  269                          } else {
 270  270                                  /*
 271  271                                   * We have a non-zero interval; we need to
 272  272                                   * determine how far we are into the current
 273  273                                   * interval, and subtract that from the
 274  274                                   * interval to determine the time remaining.
 275  275                                   */
 276  276                                  remain = interval - ((ts - first) % interval);
 277  277                          }
 278  278                  }
 279  279  
 280  280                  hrt2tv(remain, &aitv.it_value);
 281  281                  break;
 282  282  
 283  283          default:
 284  284                  mutex_exit(&p->p_lock);
 285  285                  return (EINVAL);
 286  286          }
 287  287  
 288  288          mutex_exit(&p->p_lock);
 289  289  
 290  290          if (iskaddr) {
 291  291                  bcopy(&aitv, itv, sizeof (*itv));
 292  292          } else {
 293  293                  ASSERT(get_udatamodel() == DATAMODEL_NATIVE);
 294  294                  if (copyout(&aitv, itv, sizeof (*itv)))
 295  295                          return (EFAULT);
 296  296          }
 297  297  
 298  298          return (0);
 299  299  }
 300  300  
 301  301  
 302  302  int
 303  303  setitimer(uint_t which, struct itimerval *itv, struct itimerval *oitv)
 304  304  {
 305  305          int error;
 306  306  
 307  307          if (oitv != NULL)
 308  308                  if ((error = getitimer(which, oitv)) != 0)
 309  309                          return (error);
 310  310  
 311  311          if (itv == NULL)
 312  312                  return (0);
 313  313  
 314  314          if (get_udatamodel() == DATAMODEL_NATIVE)
 315  315                  error = xsetitimer(which, itv, 0);
 316  316          else {
 317  317                  struct itimerval32 itv32;
 318  318                  struct itimerval kitv;
 319  319  
 320  320                  if (copyin(itv, &itv32, sizeof (itv32)))
 321  321                          error = EFAULT;
 322  322                  ITIMERVAL32_TO_ITIMERVAL(&kitv, &itv32);
 323  323                  error = xsetitimer(which, &kitv, 1);
 324  324          }
 325  325  
 326  326          return (error ? (set_errno(error)) : 0);
 327  327  }
 328  328  
 329  329  int
 330  330  xsetitimer(uint_t which, struct itimerval *itv, int iskaddr)
 331  331  {
 332  332          struct itimerval aitv;
 333  333          struct timeval now;
 334  334          struct proc *p = curproc;
 335  335          kthread_t *t;
 336  336          timeout_id_t tmp_id;
 337  337          cyc_handler_t hdlr;
 338  338          cyc_time_t when;
 339  339          cyclic_id_t cyclic;
 340  340          hrtime_t ts;
 341  341          int min;
 342  342  
 343  343          if (itv == NULL)
 344  344                  return (0);
 345  345  
 346  346          if (iskaddr) {
 347  347                  bcopy(itv, &aitv, sizeof (aitv));
 348  348          } else {
 349  349                  ASSERT(get_udatamodel() == DATAMODEL_NATIVE);
 350  350                  if (copyin(itv, &aitv, sizeof (aitv)))
 351  351                          return (EFAULT);
 352  352          }
 353  353  
 354  354          if (which == ITIMER_REALPROF) {
 355  355                  min = MAX((int)(cyclic_getres() / (NANOSEC / MICROSEC)),
 356  356                      itimer_realprof_minimum);
 357  357          } else {
 358  358                  min = usec_per_tick;
 359  359          }
 360  360  
 361  361          if (itimerfix(&aitv.it_value, min) ||
 362  362              (itimerfix(&aitv.it_interval, min) && timerisset(&aitv.it_value)))
 363  363                  return (EINVAL);
 364  364  
 365  365          mutex_enter(&p->p_lock);
 366  366          switch (which) {
 367  367          case ITIMER_REAL:
 368  368                  /*
 369  369                   * The SITBUSY flag prevents conflicts with multiple
 370  370                   * threads attempting to perform setitimer(ITIMER_REAL)
 371  371                   * at the same time, even when we drop p->p_lock below.
 372  372                   * Any blocked thread returns successfully because the
 373  373                   * effect is the same as if it got here first, finished,
 374  374                   * and the other thread then came through and destroyed
 375  375                   * what it did.  We are just protecting the system from
 376  376                   * malfunctioning due to the race condition.
 377  377                   */
 378  378                  if (p->p_flag & SITBUSY) {
 379  379                          mutex_exit(&p->p_lock);
 380  380                          return (0);
 381  381                  }
 382  382                  p->p_flag |= SITBUSY;
 383  383                  while ((tmp_id = p->p_itimerid) != 0) {
 384  384                          /*
 385  385                           * Avoid deadlock in callout_delete (called from
 386  386                           * untimeout) which may go to sleep (while holding
 387  387                           * p_lock). Drop p_lock and re-acquire it after
 388  388                           * untimeout returns. Need to clear p_itimerid
 389  389                           * while holding p_lock.
 390  390                           */
 391  391                          p->p_itimerid = 0;
 392  392                          mutex_exit(&p->p_lock);
 393  393                          (void) untimeout(tmp_id);
 394  394                          mutex_enter(&p->p_lock);
 395  395                  }
 396  396                  if (timerisset(&aitv.it_value)) {
 397  397                          uniqtime(&now);
 398  398                          timevaladd(&aitv.it_value, &now);
 399  399                          p->p_itimerid = realtime_timeout(realitexpire,
 400  400                              p, hzto(&aitv.it_value));
 401  401                  }
 402  402                  p->p_realitimer = aitv;
 403  403                  p->p_flag &= ~SITBUSY;
 404  404                  break;
 405  405  
 406  406          case ITIMER_REALPROF:
 407  407                  cyclic = p->p_rprof_cyclic;
 408  408                  p->p_rprof_cyclic = CYCLIC_NONE;
 409  409  
 410  410                  mutex_exit(&p->p_lock);
 411  411  
 412  412                  /*
 413  413                   * We're now going to acquire cpu_lock, remove the old cyclic
 414  414                   * if necessary, and add our new cyclic.
 415  415                   */
 416  416                  mutex_enter(&cpu_lock);
 417  417  
 418  418                  if (cyclic != CYCLIC_NONE)
 419  419                          cyclic_remove(cyclic);
 420  420  
 421  421                  if (!timerisset(&aitv.it_value)) {
 422  422                          /*
 423  423                           * If we were passed a value of 0, we're done.
 424  424                           */
 425  425                          mutex_exit(&cpu_lock);
 426  426                          return (0);
 427  427                  }
 428  428  
 429  429                  hdlr.cyh_func = realprofexpire;
 430  430                  hdlr.cyh_arg = p;
 431  431                  hdlr.cyh_level = CY_LOW_LEVEL;
 432  432  
 433  433                  when.cyt_when = (ts = gethrtime() + tv2hrt(&aitv.it_value));
 434  434                  when.cyt_interval = tv2hrt(&aitv.it_interval);
 435  435  
 436  436                  if (when.cyt_interval == 0) {
 437  437                          /*
 438  438                           * Using the same logic as for CLOCK_HIGHRES timers, we
 439  439                           * set the interval to be INT64_MAX - when.cyt_when to
 440  440                           * effect a one-shot; see the comment in clock_highres.c
 441  441                           * for more details on why this works.
 442  442                           */
 443  443                          when.cyt_interval = INT64_MAX - when.cyt_when;
 444  444                  }
 445  445  
 446  446                  cyclic = cyclic_add(&hdlr, &when);
 447  447  
 448  448                  mutex_exit(&cpu_lock);
 449  449  
 450  450                  /*
 451  451                   * We have now successfully added the cyclic.  Reacquire
 452  452                   * p_lock, and see if anyone has snuck in.
 453  453                   */
 454  454                  mutex_enter(&p->p_lock);
 455  455  
 456  456                  if (p->p_rprof_cyclic != CYCLIC_NONE) {
 457  457                          /*
 458  458                           * We're racing with another thread establishing an
 459  459                           * ITIMER_REALPROF interval timer.  We'll let the other
 460  460                           * thread win (this is a race at the application level,
 461  461                           * so letting the other thread win is acceptable).
 462  462                           */
 463  463                          mutex_exit(&p->p_lock);
 464  464                          mutex_enter(&cpu_lock);
 465  465                          cyclic_remove(cyclic);
 466  466                          mutex_exit(&cpu_lock);
 467  467  
 468  468                          return (0);
 469  469                  }
 470  470  
 471  471                  /*
 472  472                   * Success.  Set our tracking variables in the proc structure,
 473  473                   * cancel any outstanding ITIMER_PROF, and allocate the
 474  474                   * per-thread SIGPROF buffers, if possible.
 475  475                   */
 476  476                  hrt2tv(ts, &aitv.it_value);
 477  477                  p->p_rprof_timer = aitv;
 478  478                  p->p_rprof_cyclic = cyclic;
 479  479  
 480  480                  t = p->p_tlist;
 481  481                  do {
 482  482                          struct itimerval *itvp;
 483  483  
 484  484                          itvp = &ttolwp(t)->lwp_timer[ITIMER_PROF];
 485  485                          timerclear(&itvp->it_interval);
 486  486                          timerclear(&itvp->it_value);
 487  487  
 488  488                          if (t->t_rprof != NULL)
 489  489                                  continue;
 490  490  
 491  491                          t->t_rprof =
 492  492                              kmem_zalloc(sizeof (struct rprof), KM_NOSLEEP);
 493  493                          aston(t);
 494  494                  } while ((t = t->t_forw) != p->p_tlist);
 495  495  
 496  496                  break;
 497  497  
 498  498          case ITIMER_VIRTUAL:
 499  499                  ttolwp(curthread)->lwp_timer[ITIMER_VIRTUAL] = aitv;
 500  500                  break;
 501  501  
 502  502          case ITIMER_PROF:
 503  503                  if (p->p_rprof_cyclic != CYCLIC_NONE) {
 504  504                          /*
 505  505                           * Silently ignore ITIMER_PROF if ITIMER_REALPROF
 506  506                           * is in effect.
 507  507                           */
 508  508                          break;
 509  509                  }
 510  510  
 511  511                  ttolwp(curthread)->lwp_timer[ITIMER_PROF] = aitv;
 512  512                  break;
 513  513  
 514  514          default:
 515  515                  mutex_exit(&p->p_lock);
 516  516                  return (EINVAL);
 517  517          }
 518  518          mutex_exit(&p->p_lock);
 519  519          return (0);
 520  520  }
 521  521  
 522  522  /*
 523  523   * Delete the ITIMER_REALPROF interval timer.
 524  524   * Called only from exec_args() when exec occurs.
 525  525   * The other ITIMER_* interval timers are specified
 526  526   * to be inherited across exec(), so leave them alone.
 527  527   */
 528  528  void
 529  529  delete_itimer_realprof(void)
 530  530  {
 531  531          kthread_t *t = curthread;
 532  532          struct proc *p = ttoproc(t);
 533  533          klwp_t *lwp = ttolwp(t);
 534  534          cyclic_id_t cyclic;
 535  535  
 536  536          mutex_enter(&p->p_lock);
 537  537  
 538  538          /* we are performing execve(); assert we are single-threaded */
 539  539          ASSERT(t == p->p_tlist && t == t->t_forw);
 540  540  
 541  541          if ((cyclic = p->p_rprof_cyclic) == CYCLIC_NONE) {
 542  542                  mutex_exit(&p->p_lock);
 543  543          } else {
 544  544                  p->p_rprof_cyclic = CYCLIC_NONE;
 545  545                  /*
 546  546                   * Delete any current instance of SIGPROF.
 547  547                   */
 548  548                  if (lwp->lwp_cursig == SIGPROF) {
 549  549                          lwp->lwp_cursig = 0;
 550  550                          lwp->lwp_extsig = 0;
 551  551                          if (lwp->lwp_curinfo) {
 552  552                                  siginfofree(lwp->lwp_curinfo);
 553  553                                  lwp->lwp_curinfo = NULL;
 554  554                          }
 555  555                  }
 556  556                  /*
 557  557                   * Delete any pending instances of SIGPROF.
 558  558                   */
 559  559                  sigdelset(&p->p_sig, SIGPROF);
 560  560                  sigdelset(&p->p_extsig, SIGPROF);
 561  561                  sigdelq(p, NULL, SIGPROF);
 562  562                  sigdelset(&t->t_sig, SIGPROF);
 563  563                  sigdelset(&t->t_extsig, SIGPROF);
 564  564                  sigdelq(p, t, SIGPROF);
 565  565  
 566  566                  mutex_exit(&p->p_lock);
 567  567  
 568  568                  /*
 569  569                   * Remove the ITIMER_REALPROF cyclic.
 570  570                   */
 571  571                  mutex_enter(&cpu_lock);
 572  572                  cyclic_remove(cyclic);
 573  573                  mutex_exit(&cpu_lock);
 574  574          }
 575  575  }
 576  576  
 577  577  /*
 578  578   * Real interval timer expired:
 579  579   * send process whose timer expired an alarm signal.
 580  580   * If time is not set up to reload, then just return.
 581  581   * Else compute next time timer should go off which is > current time.
 582  582   * This is where delay in processing this timeout causes multiple
 583  583   * SIGALRM calls to be compressed into one.
 584  584   */
 585  585  static void
 586  586  realitexpire(void *arg)
 587  587  {
 588  588          struct proc *p = arg;
 589  589          struct timeval *valp = &p->p_realitimer.it_value;
 590  590          struct timeval *intervalp = &p->p_realitimer.it_interval;
 591  591  #if !defined(_LP64)
 592  592          clock_t ticks;
 593  593  #endif
 594  594  
 595  595          mutex_enter(&p->p_lock);
 596  596  #if !defined(_LP64)
 597  597          if ((ticks = hzto(valp)) > 1) {
 598  598                  /*
 599  599                   * If we are executing before we were meant to, it must be
 600  600                   * because of an overflow in a prior hzto() calculation.
 601  601                   * In this case, we want to go to sleep for the recalculated
 602  602                   * number of ticks. For the special meaning of the value "1"
 603  603                   * see comment in timespectohz().
 604  604                   */
 605  605                  p->p_itimerid = realtime_timeout(realitexpire, p, ticks);
 606  606                  mutex_exit(&p->p_lock);
 607  607                  return;
 608  608          }
 609  609  #endif
 610  610          sigtoproc(p, NULL, SIGALRM);
 611  611          if (!timerisset(intervalp)) {
 612  612                  timerclear(valp);
 613  613                  p->p_itimerid = 0;
 614  614          } else {
 615  615                  /* advance timer value past current time */
 616  616                  timeval_advance(valp, intervalp);
 617  617                  p->p_itimerid = realtime_timeout(realitexpire, p, hzto(valp));
 618  618          }
 619  619          mutex_exit(&p->p_lock);
 620  620  }
 621  621  
 622  622  /*
 623  623   * Real time profiling interval timer expired:
 624  624   * Increment microstate counters for each lwp in the process
 625  625   * and ensure that running lwps are kicked into the kernel.
 626  626   * If time is not set up to reload, then just return.
 627  627   * Else compute next time timer should go off which is > current time,
 628  628   * as above.
 629  629   */
 630  630  static void
 631  631  realprofexpire(void *arg)
 632  632  {
 633  633          struct proc *p = arg;
 634  634          kthread_t *t;
 635  635  
 636  636          mutex_enter(&p->p_lock);
 637  637          if (p->p_rprof_cyclic == CYCLIC_NONE ||
 638  638              (t = p->p_tlist) == NULL) {
 639  639                  mutex_exit(&p->p_lock);
 640  640                  return;
 641  641          }
 642  642          do {
 643  643                  int mstate;
 644  644  
 645  645                  /*
 646  646                   * Attempt to allocate the SIGPROF buffer, but don't sleep.

↓ open down ↓

646 lines elided

↑ open up ↑

 647  647                   */
 648  648                  if (t->t_rprof == NULL)
 649  649                          t->t_rprof = kmem_zalloc(sizeof (struct rprof),
 650  650                              KM_NOSLEEP);
 651  651                  if (t->t_rprof == NULL)
 652  652                          continue;
 653  653  
 654  654                  thread_lock(t);
 655  655                  switch (t->t_state) {
 656  656                  case TS_SLEEP:
 657      -                        /*
 658      -                         * Don't touch the lwp is it is swapped out.
 659      -                         */
 660      -                        if (!(t->t_schedflag & TS_LOAD)) {
 661      -                                mstate = LMS_SLEEP;
 662      -                                break;
 663      -                        }
 664  657                          switch (mstate = ttolwp(t)->lwp_mstate.ms_prev) {
 665  658                          case LMS_TFAULT:
 666  659                          case LMS_DFAULT:
 667  660                          case LMS_KFAULT:
 668  661                          case LMS_USER_LOCK:
 669  662                                  break;
 670  663                          default:
 671  664                                  mstate = LMS_SLEEP;
 672  665                                  break;
 673  666                          }

 674  667                          break;
 675  668                  case TS_RUN:
 676  669                  case TS_WAIT:
 677  670                          mstate = LMS_WAIT_CPU;
 678  671                          break;
 679  672                  case TS_ONPROC:
 680  673                          switch (mstate = t->t_mstate) {
 681  674                          case LMS_USER:
 682  675                          case LMS_SYSTEM:
 683  676                          case LMS_TRAP:
 684  677                                  break;
 685  678                          default:
 686  679                                  mstate = LMS_SYSTEM;
 687  680                                  break;
 688  681                          }
 689  682                          break;
 690  683                  default:
 691  684                          mstate = t->t_mstate;
 692  685                          break;
 693  686                  }
 694  687                  t->t_rprof->rp_anystate = 1;
 695  688                  t->t_rprof->rp_state[mstate]++;
 696  689                  aston(t);
 697  690                  /*
 698  691                   * force the thread into the kernel
 699  692                   * if it is not already there.
 700  693                   */
 701  694                  if (t->t_state == TS_ONPROC && t->t_cpu != CPU)
 702  695                          poke_cpu(t->t_cpu->cpu_id);
 703  696                  thread_unlock(t);
 704  697          } while ((t = t->t_forw) != p->p_tlist);
 705  698  
 706  699          mutex_exit(&p->p_lock);
 707  700  }
 708  701  
 709  702  /*
 710  703   * Advances timer value past the current time of day.  See the detailed
 711  704   * comment for this logic in realitsexpire(), above.
 712  705   */
 713  706  static void
 714  707  timeval_advance(struct timeval *valp, struct timeval *intervalp)
 715  708  {
 716  709          int cnt2nth;
 717  710          struct timeval interval2nth;
 718  711  
 719  712          for (;;) {
 720  713                  interval2nth = *intervalp;
 721  714                  for (cnt2nth = 0; ; cnt2nth++) {
 722  715                          timevaladd(valp, &interval2nth);
 723  716                          /*CSTYLED*/
 724  717                          if (TVTSCMP(valp, &hrestime, >))
 725  718                                  break;
 726  719                          timevaladd(&interval2nth, &interval2nth);
 727  720                  }
 728  721                  if (cnt2nth == 0)
 729  722                          break;
 730  723                  timevalsub(valp, &interval2nth);
 731  724          }
 732  725  }
 733  726  
 734  727  /*
 735  728   * Check that a proposed value to load into the .it_value or .it_interval
 736  729   * part of an interval timer is acceptable, and set it to at least a
 737  730   * specified minimal value.
 738  731   */
 739  732  int
 740  733  itimerfix(struct timeval *tv, int minimum)
 741  734  {
 742  735          if (tv->tv_sec < 0 || tv->tv_sec > 100000000 ||
 743  736              tv->tv_usec < 0 || tv->tv_usec >= MICROSEC)
 744  737                  return (EINVAL);
 745  738          if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < minimum)
 746  739                  tv->tv_usec = minimum;
 747  740          return (0);
 748  741  }
 749  742  
 750  743  /*
 751  744   * Same as itimerfix, except a) it takes a timespec instead of a timeval and
 752  745   * b) it doesn't truncate based on timeout granularity; consumers of this
 753  746   * interface (e.g. timer_settime()) depend on the passed timespec not being
 754  747   * modified implicitly.
 755  748   */
 756  749  int
 757  750  itimerspecfix(timespec_t *tv)
 758  751  {
 759  752          if (tv->tv_sec < 0 || tv->tv_nsec < 0 || tv->tv_nsec >= NANOSEC)
 760  753                  return (EINVAL);
 761  754          return (0);
 762  755  }
 763  756  
 764  757  /*
 765  758   * Decrement an interval timer by a specified number
 766  759   * of microseconds, which must be less than a second,
 767  760   * i.e. < 1000000.  If the timer expires, then reload
 768  761   * it.  In this case, carry over (usec - old value) to
 769  762   * reducint the value reloaded into the timer so that
 770  763   * the timer does not drift.  This routine assumes
 771  764   * that it is called in a context where the timers
 772  765   * on which it is operating cannot change in value.
 773  766   */
 774  767  int
 775  768  itimerdecr(struct itimerval *itp, int usec)
 776  769  {
 777  770          if (itp->it_value.tv_usec < usec) {
 778  771                  if (itp->it_value.tv_sec == 0) {
 779  772                          /* expired, and already in next interval */
 780  773                          usec -= itp->it_value.tv_usec;
 781  774                          goto expire;
 782  775                  }
 783  776                  itp->it_value.tv_usec += MICROSEC;
 784  777                  itp->it_value.tv_sec--;
 785  778          }
 786  779          itp->it_value.tv_usec -= usec;
 787  780          usec = 0;
 788  781          if (timerisset(&itp->it_value))
 789  782                  return (1);
 790  783          /* expired, exactly at end of interval */
 791  784  expire:
 792  785          if (timerisset(&itp->it_interval)) {
 793  786                  itp->it_value = itp->it_interval;
 794  787                  itp->it_value.tv_usec -= usec;
 795  788                  if (itp->it_value.tv_usec < 0) {
 796  789                          itp->it_value.tv_usec += MICROSEC;
 797  790                          itp->it_value.tv_sec--;
 798  791                  }
 799  792          } else
 800  793                  itp->it_value.tv_usec = 0;              /* sec is already 0 */
 801  794          return (0);
 802  795  }
 803  796  
 804  797  /*
 805  798   * Add and subtract routines for timevals.
 806  799   * N.B.: subtract routine doesn't deal with
 807  800   * results which are before the beginning,
 808  801   * it just gets very confused in this case.
 809  802   * Caveat emptor.
 810  803   */
 811  804  void
 812  805  timevaladd(struct timeval *t1, struct timeval *t2)
 813  806  {
 814  807          t1->tv_sec += t2->tv_sec;
 815  808          t1->tv_usec += t2->tv_usec;
 816  809          timevalfix(t1);
 817  810  }
 818  811  
 819  812  void
 820  813  timevalsub(struct timeval *t1, struct timeval *t2)
 821  814  {
 822  815          t1->tv_sec -= t2->tv_sec;
 823  816          t1->tv_usec -= t2->tv_usec;
 824  817          timevalfix(t1);
 825  818  }
 826  819  
 827  820  void
 828  821  timevalfix(struct timeval *t1)
 829  822  {
 830  823          if (t1->tv_usec < 0) {
 831  824                  t1->tv_sec--;
 832  825                  t1->tv_usec += MICROSEC;
 833  826          }
 834  827          if (t1->tv_usec >= MICROSEC) {
 835  828                  t1->tv_sec++;
 836  829                  t1->tv_usec -= MICROSEC;
 837  830          }
 838  831  }
 839  832  
 840  833  /*
 841  834   * Same as the routines above. These routines take a timespec instead
 842  835   * of a timeval.
 843  836   */
 844  837  void
 845  838  timespecadd(timespec_t *t1, timespec_t *t2)
 846  839  {
 847  840          t1->tv_sec += t2->tv_sec;
 848  841          t1->tv_nsec += t2->tv_nsec;
 849  842          timespecfix(t1);
 850  843  }
 851  844  
 852  845  void
 853  846  timespecsub(timespec_t *t1, timespec_t *t2)
 854  847  {
 855  848          t1->tv_sec -= t2->tv_sec;
 856  849          t1->tv_nsec -= t2->tv_nsec;
 857  850          timespecfix(t1);
 858  851  }
 859  852  
 860  853  void
 861  854  timespecfix(timespec_t *t1)
 862  855  {
 863  856          if (t1->tv_nsec < 0) {
 864  857                  t1->tv_sec--;
 865  858                  t1->tv_nsec += NANOSEC;
 866  859          } else {
 867  860                  if (t1->tv_nsec >= NANOSEC) {
 868  861                          t1->tv_sec++;
 869  862                          t1->tv_nsec -= NANOSEC;
 870  863                  }
 871  864          }
 872  865  }
 873  866  
 874  867  /*
 875  868   * Compute number of hz until specified time.
 876  869   * Used to compute third argument to timeout() from an absolute time.
 877  870   */
 878  871  clock_t
 879  872  hzto(struct timeval *tv)
 880  873  {
 881  874          timespec_t ts, now;
 882  875  
 883  876          ts.tv_sec = tv->tv_sec;
 884  877          ts.tv_nsec = tv->tv_usec * 1000;
 885  878          gethrestime_lasttick(&now);
 886  879  
 887  880          return (timespectohz(&ts, now));
 888  881  }
 889  882  
 890  883  /*
 891  884   * Compute number of hz until specified time for a given timespec value.
 892  885   * Used to compute third argument to timeout() from an absolute time.
 893  886   */
 894  887  clock_t
 895  888  timespectohz(timespec_t *tv, timespec_t now)
 896  889  {
 897  890          clock_t ticks;
 898  891          time_t  sec;
 899  892          int     nsec;
 900  893  
 901  894          /*
 902  895           * Compute number of ticks we will see between now and
 903  896           * the target time; returns "1" if the destination time
 904  897           * is before the next tick, so we always get some delay,
 905  898           * and returns LONG_MAX ticks if we would overflow.
 906  899           */
 907  900          sec = tv->tv_sec - now.tv_sec;
 908  901          nsec = tv->tv_nsec - now.tv_nsec + nsec_per_tick - 1;
 909  902  
 910  903          if (nsec < 0) {
 911  904                  sec--;
 912  905                  nsec += NANOSEC;
 913  906          } else if (nsec >= NANOSEC) {
 914  907                  sec++;
 915  908                  nsec -= NANOSEC;
 916  909          }
 917  910  
 918  911          ticks = NSEC_TO_TICK(nsec);
 919  912  
 920  913          /*
 921  914           * Compute ticks, accounting for negative and overflow as above.
 922  915           * Overflow protection kicks in at about 70 weeks for hz=50
 923  916           * and at about 35 weeks for hz=100. (Rather longer for the 64-bit
 924  917           * kernel :-)
 925  918           */
 926  919          if (sec < 0 || (sec == 0 && ticks < 1))
 927  920                  ticks = 1;                      /* protect vs nonpositive */
 928  921          else if (sec > (LONG_MAX - ticks) / hz)
 929  922                  ticks = LONG_MAX;               /* protect vs overflow */
 930  923          else
 931  924                  ticks += sec * hz;              /* common case */
 932  925  
 933  926          return (ticks);
 934  927  }
 935  928  
 936  929  /*
 937  930   * Compute number of hz with the timespec tv specified.
 938  931   * The return type must be 64 bit integer.
 939  932   */
 940  933  int64_t
 941  934  timespectohz64(timespec_t *tv)
 942  935  {
 943  936          int64_t ticks;
 944  937          int64_t sec;
 945  938          int64_t nsec;
 946  939  
 947  940          sec = tv->tv_sec;
 948  941          nsec = tv->tv_nsec + nsec_per_tick - 1;
 949  942  
 950  943          if (nsec < 0) {
 951  944                  sec--;
 952  945                  nsec += NANOSEC;
 953  946          } else if (nsec >= NANOSEC) {
 954  947                  sec++;
 955  948                  nsec -= NANOSEC;
 956  949          }
 957  950  
 958  951          ticks = NSEC_TO_TICK(nsec);
 959  952  
 960  953          /*
 961  954           * Compute ticks, accounting for negative and overflow as above.
 962  955           * Overflow protection kicks in at about 70 weeks for hz=50
 963  956           * and at about 35 weeks for hz=100. (Rather longer for the 64-bit
 964  957           * kernel
 965  958           */
 966  959          if (sec < 0 || (sec == 0 && ticks < 1))
 967  960                  ticks = 1;                      /* protect vs nonpositive */
 968  961          else if (sec > (((~0ULL) >> 1) - ticks) / hz)
 969  962                  ticks = (~0ULL) >> 1;           /* protect vs overflow */
 970  963          else
 971  964                  ticks += sec * hz;              /* common case */
 972  965  
 973  966          return (ticks);
 974  967  }
 975  968  
 976  969  /*
 977  970   * hrt2ts(): convert from hrtime_t to timestruc_t.
 978  971   *
 979  972   * All this routine really does is:
 980  973   *
 981  974   *      tsp->sec  = hrt / NANOSEC;
 982  975   *      tsp->nsec = hrt % NANOSEC;
 983  976   *
 984  977   * The black magic below avoids doing a 64-bit by 32-bit integer divide,
 985  978   * which is quite expensive.  There's actually much more going on here than
 986  979   * it might first appear -- don't try this at home.
 987  980   *
 988  981   * For the adventuresome, here's an explanation of how it works.
 989  982   *
 990  983   * Multiplication by a fixed constant is easy -- you just do the appropriate
 991  984   * shifts and adds.  For example, to multiply by 10, we observe that
 992  985   *
 993  986   *      x * 10  = x * (8 + 2)
 994  987   *              = (x * 8) + (x * 2)
 995  988   *              = (x << 3) + (x << 1).
 996  989   *
 997  990   * In general, you can read the algorithm right off the bits: the number 10
 998  991   * is 1010 in binary; bits 1 and 3 are ones, so x * 10 = (x << 1) + (x << 3).
 999  992   *
1000  993   * Sometimes you can do better.  For example, 15 is 1111 binary, so the normal
1001  994   * shift/add computation is x * 15 = (x << 0) + (x << 1) + (x << 2) + (x << 3).
1002  995   * But, it's cheaper if you capitalize on the fact that you have a run of ones:
1003  996   * 1111 = 10000 - 1, hence x * 15 = (x << 4) - (x << 0).  [You would never
1004  997   * actually perform the operation << 0, since it's a no-op; I'm just writing
1005  998   * it that way for clarity.]
1006  999   *
1007 1000   * The other way you can win is if you get lucky with the prime factorization
1008 1001   * of your constant.  The number 1,000,000,000, which we have to multiply
1009 1002   * by below, is a good example.  One billion is 111011100110101100101000000000
1010 1003   * in binary.  If you apply the bit-grouping trick, it doesn't buy you very
1011 1004   * much, because it's only a win for groups of three or more equal bits:
1012 1005   *
1013 1006   * 111011100110101100101000000000 = 1000000000000000000000000000000
1014 1007   *                                -  000100011001010011011000000000
1015 1008   *
1016 1009   * Thus, instead of the 13 shift/add pairs (26 operations) implied by the LHS,
1017 1010   * we have reduced this to 10 shift/add pairs (20 operations) on the RHS.
1018 1011   * This is better, but not great.
1019 1012   *
1020 1013   * However, we can factor 1,000,000,000 = 2^9 * 5^9 = 2^9 * 125 * 125 * 125,
1021 1014   * and multiply by each factor.  Multiplication by 125 is particularly easy,
1022 1015   * since 128 is nearby: x * 125 = (x << 7) - x - x - x, which is just four
1023 1016   * operations.  So, to multiply by 1,000,000,000, we perform three multipli-
1024 1017   * cations by 125, then << 9, a total of only 3 * 4 + 1 = 13 operations.
1025 1018   * This is the algorithm we actually use in both hrt2ts() and ts2hrt().
1026 1019   *
1027 1020   * Division is harder; there is no equivalent of the simple shift-add algorithm
1028 1021   * we used for multiplication.  However, we can convert the division problem
1029 1022   * into a multiplication problem by pre-computing the binary representation
1030 1023   * of the reciprocal of the divisor.  For the case of interest, we have
1031 1024   *
1032 1025   *      1 / 1,000,000,000 = 1.0001001011100000101111101000001B-30,
1033 1026   *
1034 1027   * to 32 bits of precision.  (The notation B-30 means "* 2^-30", just like
1035 1028   * E-18 means "* 10^-18".)
1036 1029   *
1037 1030   * So, to compute x / 1,000,000,000, we just multiply x by the 32-bit
1038 1031   * integer 10001001011100000101111101000001, then normalize (shift) the
1039 1032   * result.  This constant has several large bits runs, so the multiply
1040 1033   * is relatively cheap:
1041 1034   *
1042 1035   *      10001001011100000101111101000001 = 10001001100000000110000001000001
1043 1036   *                                       - 00000000000100000000000100000000
1044 1037   *
1045 1038   * Again, you can just read the algorithm right off the bits:
1046 1039   *
1047 1040   *                      sec = hrt;
1048 1041   *                      sec += (hrt << 6);
1049 1042   *                      sec -= (hrt << 8);
1050 1043   *                      sec += (hrt << 13);
1051 1044   *                      sec += (hrt << 14);
1052 1045   *                      sec -= (hrt << 20);
1053 1046   *                      sec += (hrt << 23);
1054 1047   *                      sec += (hrt << 24);
1055 1048   *                      sec += (hrt << 27);
1056 1049   *                      sec += (hrt << 31);
1057 1050   *                      sec >>= (32 + 30);
1058 1051   *
1059 1052   * Voila!  The only problem is, since hrt is 64 bits, we need to use 96-bit
1060 1053   * arithmetic to perform this calculation.  That's a waste, because ultimately
1061 1054   * we only need the highest 32 bits of the result.
1062 1055   *
1063 1056   * The first thing we do is to realize that we don't need to use all of hrt
1064 1057   * in the calculation.  The lowest 30 bits can contribute at most 1 to the
1065 1058   * quotient (2^30 / 1,000,000,000 = 1.07...), so we'll deal with them later.
1066 1059   * The highest 2 bits have to be zero, or hrt won't fit in a timestruc_t.
1067 1060   * Thus, the only bits of hrt that matter for division are bits 30..61.
1068 1061   * These 32 bits are just the lower-order word of (hrt >> 30).  This brings
1069 1062   * us down from 96-bit math to 64-bit math, and our algorithm becomes:
1070 1063   *
1071 1064   *                      tmp = (uint32_t) (hrt >> 30);
1072 1065   *                      sec = tmp;
1073 1066   *                      sec += (tmp << 6);
1074 1067   *                      sec -= (tmp << 8);
1075 1068   *                      sec += (tmp << 13);
1076 1069   *                      sec += (tmp << 14);
1077 1070   *                      sec -= (tmp << 20);
1078 1071   *                      sec += (tmp << 23);
1079 1072   *                      sec += (tmp << 24);
1080 1073   *                      sec += (tmp << 27);
1081 1074   *                      sec += (tmp << 31);
1082 1075   *                      sec >>= 32;
1083 1076   *
1084 1077   * Next, we're going to reduce this 64-bit computation to a 32-bit
1085 1078   * computation.  We begin by rewriting the above algorithm to use relative
1086 1079   * shifts instead of absolute shifts.  That is, instead of computing
1087 1080   * tmp << 6, tmp << 8, tmp << 13, etc, we'll just shift incrementally:
1088 1081   * tmp <<= 6, tmp <<= 2 (== 8 - 6), tmp <<= 5 (== 13 - 8), etc:
1089 1082   *
1090 1083   *                      tmp = (uint32_t) (hrt >> 30);
1091 1084   *                      sec = tmp;
1092 1085   *                      tmp <<= 6; sec += tmp;
1093 1086   *                      tmp <<= 2; sec -= tmp;
1094 1087   *                      tmp <<= 5; sec += tmp;
1095 1088   *                      tmp <<= 1; sec += tmp;
1096 1089   *                      tmp <<= 6; sec -= tmp;
1097 1090   *                      tmp <<= 3; sec += tmp;
1098 1091   *                      tmp <<= 1; sec += tmp;
1099 1092   *                      tmp <<= 3; sec += tmp;
1100 1093   *                      tmp <<= 4; sec += tmp;
1101 1094   *                      sec >>= 32;
1102 1095   *
1103 1096   * Now for the final step.  Instead of throwing away the low 32 bits at
1104 1097   * the end, we can throw them away as we go, only keeping the high 32 bits
1105 1098   * of the product at each step.  So, for example, where we now have
1106 1099   *
1107 1100   *                      tmp <<= 6; sec = sec + tmp;
1108 1101   * we will instead have
1109 1102   *                      tmp <<= 6; sec = (sec + tmp) >> 6;
1110 1103   * which is equivalent to
1111 1104   *                      sec = (sec >> 6) + tmp;
1112 1105   *
1113 1106   * The final shift ("sec >>= 32") goes away.
1114 1107   *
1115 1108   * All we're really doing here is long multiplication, just like we learned in
1116 1109   * grade school, except that at each step, we only look at the leftmost 32
1117 1110   * columns.  The cumulative error is, at most, the sum of all the bits we
1118 1111   * throw away, which is 2^-32 + 2^-31 + ... + 2^-2 + 2^-1 == 1 - 2^-32.
1119 1112   * Thus, the final result ("sec") is correct to +/- 1.
1120 1113   *
1121 1114   * It turns out to be important to keep "sec" positive at each step, because
1122 1115   * we don't want to have to explicitly extend the sign bit.  Therefore,
1123 1116   * starting with the last line of code above, each line that would have read
1124 1117   * "sec = (sec >> n) - tmp" must be changed to "sec = tmp - (sec >> n)", and
1125 1118   * the operators (+ or -) in all previous lines must be toggled accordingly.
1126 1119   * Thus, we end up with:
1127 1120   *
1128 1121   *                      tmp = (uint32_t) (hrt >> 30);
1129 1122   *                      sec = tmp + (sec >> 6);
1130 1123   *                      sec = tmp - (tmp >> 2);
1131 1124   *                      sec = tmp - (sec >> 5);
1132 1125   *                      sec = tmp + (sec >> 1);
1133 1126   *                      sec = tmp - (sec >> 6);
1134 1127   *                      sec = tmp - (sec >> 3);
1135 1128   *                      sec = tmp + (sec >> 1);
1136 1129   *                      sec = tmp + (sec >> 3);
1137 1130   *                      sec = tmp + (sec >> 4);
1138 1131   *
1139 1132   * This yields a value for sec that is accurate to +1/-1, so we have two
1140 1133   * cases to deal with.  The mysterious-looking "+ 7" in the code below biases
1141 1134   * the rounding toward zero, so that sec is always less than or equal to
1142 1135   * the correct value.  With this modified code, sec is accurate to +0/-2, with
1143 1136   * the -2 case being very rare in practice.  With this change, we only have to
1144 1137   * deal with one case (sec too small) in the cleanup code.
1145 1138   *
1146 1139   * The other modification we make is to delete the second line above
1147 1140   * ("sec = tmp + (sec >> 6);"), since it only has an effect when bit 31 is
1148 1141   * set, and the cleanup code can handle that rare case.  This reduces the
1149 1142   * *guaranteed* accuracy of sec to +0/-3, but speeds up the common cases.
1150 1143   *
1151 1144   * Finally, we compute nsec = hrt - (sec * 1,000,000,000).  nsec will always
1152 1145   * be positive (since sec is never too large), and will at most be equal to
1153 1146   * the error in sec (times 1,000,000,000) plus the low-order 30 bits of hrt.
1154 1147   * Thus, nsec < 3 * 1,000,000,000 + 2^30, which is less than 2^32, so we can
1155 1148   * safely assume that nsec fits in 32 bits.  Consequently, when we compute
1156 1149   * sec * 1,000,000,000, we only need the low 32 bits, so we can just do 32-bit
1157 1150   * arithmetic and let the high-order bits fall off the end.
1158 1151   *
1159 1152   * Since nsec < 3 * 1,000,000,000 + 2^30 == 4,073,741,824, the cleanup loop:
1160 1153   *
1161 1154   *                      while (nsec >= NANOSEC) {
1162 1155   *                              nsec -= NANOSEC;
1163 1156   *                              sec++;
1164 1157   *                      }
1165 1158   *
1166 1159   * is guaranteed to complete in at most 4 iterations.  In practice, the loop
1167 1160   * completes in 0 or 1 iteration over 95% of the time.
1168 1161   *
1169 1162   * On an SS2, this implementation of hrt2ts() takes 1.7 usec, versus about
1170 1163   * 35 usec for software division -- about 20 times faster.
1171 1164   */
1172 1165  void
1173 1166  hrt2ts(hrtime_t hrt, timestruc_t *tsp)
1174 1167  {
1175 1168          uint32_t sec, nsec, tmp;
1176 1169  
1177 1170          tmp = (uint32_t)(hrt >> 30);
1178 1171          sec = tmp - (tmp >> 2);
1179 1172          sec = tmp - (sec >> 5);
1180 1173          sec = tmp + (sec >> 1);
1181 1174          sec = tmp - (sec >> 6) + 7;
1182 1175          sec = tmp - (sec >> 3);
1183 1176          sec = tmp + (sec >> 1);
1184 1177          sec = tmp + (sec >> 3);
1185 1178          sec = tmp + (sec >> 4);
1186 1179          tmp = (sec << 7) - sec - sec - sec;
1187 1180          tmp = (tmp << 7) - tmp - tmp - tmp;
1188 1181          tmp = (tmp << 7) - tmp - tmp - tmp;
1189 1182          nsec = (uint32_t)hrt - (tmp << 9);
1190 1183          while (nsec >= NANOSEC) {
1191 1184                  nsec -= NANOSEC;
1192 1185                  sec++;
1193 1186          }
1194 1187          tsp->tv_sec = (time_t)sec;
1195 1188          tsp->tv_nsec = nsec;
1196 1189  }
1197 1190  
1198 1191  /*
1199 1192   * Convert from timestruc_t to hrtime_t.
1200 1193   *
1201 1194   * The code below is equivalent to:
1202 1195   *
1203 1196   *      hrt = tsp->tv_sec * NANOSEC + tsp->tv_nsec;
1204 1197   *
1205 1198   * but requires no integer multiply.
1206 1199   */
1207 1200  hrtime_t
1208 1201  ts2hrt(const timestruc_t *tsp)
1209 1202  {
1210 1203          hrtime_t hrt;
1211 1204  
1212 1205          hrt = tsp->tv_sec;
1213 1206          hrt = (hrt << 7) - hrt - hrt - hrt;
1214 1207          hrt = (hrt << 7) - hrt - hrt - hrt;
1215 1208          hrt = (hrt << 7) - hrt - hrt - hrt;
1216 1209          hrt = (hrt << 9) + tsp->tv_nsec;
1217 1210          return (hrt);
1218 1211  }
1219 1212  
1220 1213  /*
1221 1214   * For the various 32-bit "compatibility" paths in the system.
1222 1215   */
1223 1216  void
1224 1217  hrt2ts32(hrtime_t hrt, timestruc32_t *ts32p)
1225 1218  {
1226 1219          timestruc_t ts;
1227 1220  
1228 1221          hrt2ts(hrt, &ts);
1229 1222          TIMESPEC_TO_TIMESPEC32(ts32p, &ts);
1230 1223  }
1231 1224  
1232 1225  /*
1233 1226   * If this ever becomes performance critical (ha!), we can borrow the
1234 1227   * code from ts2hrt(), above, to multiply tv_sec by 1,000,000 and the
1235 1228   * straightforward (x << 10) - (x << 5) + (x << 3) to multiply tv_usec by
1236 1229   * 1,000.  For now, we'll opt for readability (besides, the compiler does
1237 1230   * a passable job of optimizing constant multiplication into shifts and adds).
1238 1231   */
1239 1232  hrtime_t
1240 1233  tv2hrt(struct timeval *tvp)
1241 1234  {
1242 1235          return ((hrtime_t)tvp->tv_sec * NANOSEC +
1243 1236              (hrtime_t)tvp->tv_usec * (NANOSEC / MICROSEC));
1244 1237  }
1245 1238  
1246 1239  void
1247 1240  hrt2tv(hrtime_t hrt, struct timeval *tvp)
1248 1241  {
1249 1242          uint32_t sec, nsec, tmp;
1250 1243          uint32_t q, r, t;
1251 1244  
1252 1245          tmp = (uint32_t)(hrt >> 30);
1253 1246          sec = tmp - (tmp >> 2);
1254 1247          sec = tmp - (sec >> 5);
1255 1248          sec = tmp + (sec >> 1);
1256 1249          sec = tmp - (sec >> 6) + 7;
1257 1250          sec = tmp - (sec >> 3);
1258 1251          sec = tmp + (sec >> 1);
1259 1252          sec = tmp + (sec >> 3);
1260 1253          sec = tmp + (sec >> 4);
1261 1254          tmp = (sec << 7) - sec - sec - sec;
1262 1255          tmp = (tmp << 7) - tmp - tmp - tmp;
1263 1256          tmp = (tmp << 7) - tmp - tmp - tmp;
1264 1257          nsec = (uint32_t)hrt - (tmp << 9);
1265 1258          while (nsec >= NANOSEC) {
1266 1259                  nsec -= NANOSEC;
1267 1260                  sec++;
1268 1261          }
1269 1262          tvp->tv_sec = (time_t)sec;
1270 1263  /*
1271 1264   * this routine is very similar to hr2ts, but requires microseconds
1272 1265   * instead of nanoseconds, so an interger divide by 1000 routine
1273 1266   * completes the conversion
1274 1267   */
1275 1268          t = (nsec >> 7) + (nsec >> 8) + (nsec >> 12);
1276 1269          q = (nsec >> 1) + t + (nsec >> 15) + (t >> 11) + (t >> 14);
1277 1270          q = q >> 9;
1278 1271          r = nsec - q*1000;
1279 1272          tvp->tv_usec = q + ((r + 24) >> 10);
1280 1273  
1281 1274  }
1282 1275  
1283 1276  int
1284 1277  nanosleep(timespec_t *rqtp, timespec_t *rmtp)
1285 1278  {
1286 1279          timespec_t rqtime;
1287 1280          timespec_t rmtime;
1288 1281          timespec_t now;
1289 1282          int timecheck;
1290 1283          int ret = 1;
1291 1284          model_t datamodel = get_udatamodel();
1292 1285  
1293 1286          timecheck = timechanged;
1294 1287          gethrestime(&now);
1295 1288  
1296 1289          if (datamodel == DATAMODEL_NATIVE) {
1297 1290                  if (copyin(rqtp, &rqtime, sizeof (rqtime)))
1298 1291                          return (set_errno(EFAULT));
1299 1292          } else {
1300 1293                  timespec32_t rqtime32;
1301 1294  
1302 1295                  if (copyin(rqtp, &rqtime32, sizeof (rqtime32)))
1303 1296                          return (set_errno(EFAULT));
1304 1297                  TIMESPEC32_TO_TIMESPEC(&rqtime, &rqtime32);
1305 1298          }
1306 1299  
1307 1300          if (rqtime.tv_sec < 0 || rqtime.tv_nsec < 0 ||
1308 1301              rqtime.tv_nsec >= NANOSEC)
1309 1302                  return (set_errno(EINVAL));
1310 1303  
1311 1304          if (timerspecisset(&rqtime)) {
1312 1305                  timespecadd(&rqtime, &now);
1313 1306                  mutex_enter(&curthread->t_delay_lock);
1314 1307                  while ((ret = cv_waituntil_sig(&curthread->t_delay_cv,
1315 1308                      &curthread->t_delay_lock, &rqtime, timecheck)) > 0)
1316 1309                          continue;
1317 1310                  mutex_exit(&curthread->t_delay_lock);
1318 1311          }
1319 1312  
1320 1313          if (rmtp) {
1321 1314                  /*
1322 1315                   * If cv_waituntil_sig() returned due to a signal, and
1323 1316                   * there is time remaining, then set the time remaining.
1324 1317                   * Else set time remaining to zero
1325 1318                   */
1326 1319                  rmtime.tv_sec = rmtime.tv_nsec = 0;
1327 1320                  if (ret == 0) {
1328 1321                          timespec_t delta = rqtime;
1329 1322  
1330 1323                          gethrestime(&now);
1331 1324                          timespecsub(&delta, &now);
1332 1325                          if (delta.tv_sec > 0 || (delta.tv_sec == 0 &&
1333 1326                              delta.tv_nsec > 0))
1334 1327                                  rmtime = delta;
1335 1328                  }
1336 1329  
1337 1330                  if (datamodel == DATAMODEL_NATIVE) {
1338 1331                          if (copyout(&rmtime, rmtp, sizeof (rmtime)))
1339 1332                                  return (set_errno(EFAULT));
1340 1333                  } else {
1341 1334                          timespec32_t rmtime32;
1342 1335  
1343 1336                          TIMESPEC_TO_TIMESPEC32(&rmtime32, &rmtime);
1344 1337                          if (copyout(&rmtime32, rmtp, sizeof (rmtime32)))
1345 1338                                  return (set_errno(EFAULT));
1346 1339                  }
1347 1340          }
1348 1341  
1349 1342          if (ret == 0)
1350 1343                  return (set_errno(EINTR));
1351 1344          return (0);
1352 1345  }
1353 1346  
1354 1347  /*
1355 1348   * Routines to convert standard UNIX time (seconds since Jan 1, 1970)
1356 1349   * into year/month/day/hour/minute/second format, and back again.
1357 1350   * Note: these routines require tod_lock held to protect cached state.
1358 1351   */
1359 1352  static int days_thru_month[64] = {
1360 1353          0, 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366, 0, 0,
1361 1354          0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1362 1355          0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1363 1356          0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1364 1357  };
1365 1358  
1366 1359  todinfo_t saved_tod;
1367 1360  int saved_utc = -60;
1368 1361  
1369 1362  todinfo_t
1370 1363  utc_to_tod(time_t utc)
1371 1364  {
1372 1365          long dse, day, month, year;
1373 1366          todinfo_t tod;
1374 1367  
1375 1368          ASSERT(MUTEX_HELD(&tod_lock));
1376 1369  
1377 1370          /*
1378 1371           * Note that tod_set_prev() assumes utc will be set to zero in
1379 1372           * the case of it being negative.  Consequently, any change made
1380 1373           * to this behavior would have to be reflected in that function
1381 1374           * as well.
1382 1375           */
1383 1376          if (utc < 0)                    /* should never happen */
1384 1377                  utc = 0;
1385 1378  
1386 1379          saved_tod.tod_sec += utc - saved_utc;
1387 1380          saved_utc = utc;
1388 1381          if (saved_tod.tod_sec >= 0 && saved_tod.tod_sec < 60)
1389 1382                  return (saved_tod);     /* only the seconds changed */
1390 1383  
1391 1384          dse = utc / 86400;              /* days since epoch */
1392 1385  
1393 1386          tod.tod_sec = utc % 60;
1394 1387          tod.tod_min = (utc % 3600) / 60;
1395 1388          tod.tod_hour = (utc % 86400) / 3600;
1396 1389          tod.tod_dow = (dse + 4) % 7 + 1;        /* epoch was a Thursday */
1397 1390  
1398 1391          year = dse / 365 + 72;  /* first guess -- always a bit too large */
1399 1392          do {
1400 1393                  year--;
1401 1394                  day = dse - 365 * (year - 70) - ((year - 69) >> 2);
1402 1395          } while (day < 0);
1403 1396  
1404 1397          month = ((year & 3) << 4) + 1;
1405 1398          while (day >= days_thru_month[month + 1])
1406 1399                  month++;
1407 1400  
1408 1401          tod.tod_day = day - days_thru_month[month] + 1;
1409 1402          tod.tod_month = month & 15;
1410 1403          tod.tod_year = year;
1411 1404  
1412 1405          saved_tod = tod;
1413 1406          return (tod);
1414 1407  }
1415 1408  
1416 1409  time_t
1417 1410  tod_to_utc(todinfo_t tod)
1418 1411  {
1419 1412          time_t utc;
1420 1413          int year = tod.tod_year;
1421 1414          int month = tod.tod_month + ((year & 3) << 4);
1422 1415  #ifdef DEBUG
1423 1416          /* only warn once, not each time called */
1424 1417          static int year_warn = 1;
1425 1418          static int month_warn = 1;
1426 1419          static int day_warn = 1;
1427 1420          static int hour_warn = 1;
1428 1421          static int min_warn = 1;
1429 1422          static int sec_warn = 1;
1430 1423          int days_diff = days_thru_month[month + 1] - days_thru_month[month];
1431 1424  #endif
1432 1425  
1433 1426          ASSERT(MUTEX_HELD(&tod_lock));
1434 1427  
1435 1428  #ifdef DEBUG
1436 1429          if (year_warn && (year < 70 || year > 8029)) {
1437 1430                  cmn_err(CE_WARN,
1438 1431                      "The hardware real-time clock appears to have the "
1439 1432                      "wrong years value %d -- time needs to be reset\n",
1440 1433                      year);
1441 1434                  year_warn = 0;
1442 1435          }
1443 1436  
1444 1437          if (month_warn && (tod.tod_month < 1 || tod.tod_month > 12)) {
1445 1438                  cmn_err(CE_WARN,
1446 1439                      "The hardware real-time clock appears to have the "
1447 1440                      "wrong months value %d -- time needs to be reset\n",
1448 1441                      tod.tod_month);
1449 1442                  month_warn = 0;
1450 1443          }
1451 1444  
1452 1445          if (day_warn && (tod.tod_day < 1 || tod.tod_day > days_diff)) {
1453 1446                  cmn_err(CE_WARN,
1454 1447                      "The hardware real-time clock appears to have the "
1455 1448                      "wrong days value %d -- time needs to be reset\n",
1456 1449                      tod.tod_day);
1457 1450                  day_warn = 0;
1458 1451          }
1459 1452  
1460 1453          if (hour_warn && (tod.tod_hour < 0 || tod.tod_hour > 23)) {
1461 1454                  cmn_err(CE_WARN,
1462 1455                      "The hardware real-time clock appears to have the "
1463 1456                      "wrong hours value %d -- time needs to be reset\n",
1464 1457                      tod.tod_hour);
1465 1458                  hour_warn = 0;
1466 1459          }
1467 1460  
1468 1461          if (min_warn && (tod.tod_min < 0 || tod.tod_min > 59)) {
1469 1462                  cmn_err(CE_WARN,
1470 1463                      "The hardware real-time clock appears to have the "
1471 1464                      "wrong minutes value %d -- time needs to be reset\n",
1472 1465                      tod.tod_min);
1473 1466                  min_warn = 0;
1474 1467          }
1475 1468  
1476 1469          if (sec_warn && (tod.tod_sec < 0 || tod.tod_sec > 59)) {
1477 1470                  cmn_err(CE_WARN,
1478 1471                      "The hardware real-time clock appears to have the "
1479 1472                      "wrong seconds value %d -- time needs to be reset\n",
1480 1473                      tod.tod_sec);
1481 1474                  sec_warn = 0;
1482 1475          }
1483 1476  #endif
1484 1477  
1485 1478          utc = (year - 70);              /* next 3 lines: utc = 365y + y/4 */
1486 1479          utc += (utc << 3) + (utc << 6);
1487 1480          utc += (utc << 2) + ((year - 69) >> 2);
1488 1481          utc += days_thru_month[month] + tod.tod_day - 1;
1489 1482          utc = (utc << 3) + (utc << 4) + tod.tod_hour;   /* 24 * day + hour */
1490 1483          utc = (utc << 6) - (utc << 2) + tod.tod_min;    /* 60 * hour + min */
1491 1484          utc = (utc << 6) - (utc << 2) + tod.tod_sec;    /* 60 * min + sec */
1492 1485  
1493 1486          return (utc);
1494 1487  }

↓ open down ↓

821 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX