Print this page
remove whole-process swapping
Long before Unix supported paging, it used process swapping to reclaim
memory. The code is there and in theory it runs when we get *extremely* low
on memory. In practice, it never runs since the definition of low-on-memory
is antiquated. (XXX: define what antiquated means)
You can check the number of swapout/swapin events with kstats:
$ kstat -p ::vm:swapin ::vm:swapout
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/timers.c
+++ new/usr/src/uts/common/os/timers.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * Copyright (c) 1982, 1986 Regents of the University of California.
29 29 * All rights reserved. The Berkeley software License Agreement
30 30 * specifies the terms and conditions for redistribution.
31 31 */
32 32
33 33 #include <sys/param.h>
34 34 #include <sys/user.h>
35 35 #include <sys/vnode.h>
36 36 #include <sys/proc.h>
37 37 #include <sys/time.h>
38 38 #include <sys/systm.h>
39 39 #include <sys/kmem.h>
40 40 #include <sys/cmn_err.h>
41 41 #include <sys/cpuvar.h>
42 42 #include <sys/timer.h>
43 43 #include <sys/debug.h>
44 44 #include <sys/sysmacros.h>
45 45 #include <sys/cyclic.h>
46 46
47 47 static void realitexpire(void *);
48 48 static void realprofexpire(void *);
49 49 static void timeval_advance(struct timeval *, struct timeval *);
50 50
51 51 kmutex_t tod_lock; /* protects time-of-day stuff */
52 52
53 53 /*
54 54 * Constant to define the minimum interval value of the ITIMER_REALPROF timer.
55 55 * Value is in microseconds; defaults to 500 usecs. Setting this value
56 56 * significantly lower may allow for denial-of-service attacks.
57 57 */
58 58 int itimer_realprof_minimum = 500;
59 59
60 60 /*
61 61 * macro to compare a timeval to a timestruc
62 62 */
63 63
64 64 #define TVTSCMP(tvp, tsp, cmp) \
65 65 /* CSTYLED */ \
66 66 ((tvp)->tv_sec cmp (tsp)->tv_sec || \
67 67 ((tvp)->tv_sec == (tsp)->tv_sec && \
68 68 /* CSTYLED */ \
69 69 (tvp)->tv_usec * 1000 cmp (tsp)->tv_nsec))
70 70
71 71 /*
72 72 * Time of day and interval timer support.
73 73 *
74 74 * These routines provide the kernel entry points to get and set
75 75 * the time-of-day and per-process interval timers. Subroutines
76 76 * here provide support for adding and subtracting timeval structures
77 77 * and decrementing interval timers, optionally reloading the interval
78 78 * timers when they expire.
79 79 */
80 80
81 81 /*
82 82 * SunOS function to generate monotonically increasing time values.
83 83 */
84 84 void
85 85 uniqtime(struct timeval *tv)
86 86 {
87 87 static struct timeval last;
88 88 static int last_timechanged;
89 89 timestruc_t ts;
90 90 time_t sec;
91 91 int usec, nsec;
92 92
93 93 /*
94 94 * protect modification of last
95 95 */
96 96 mutex_enter(&tod_lock);
97 97 gethrestime(&ts);
98 98
99 99 /*
100 100 * Fast algorithm to convert nsec to usec -- see hrt2ts()
101 101 * in common/os/timers.c for a full description.
102 102 */
103 103 nsec = ts.tv_nsec;
104 104 usec = nsec + (nsec >> 2);
105 105 usec = nsec + (usec >> 1);
106 106 usec = nsec + (usec >> 2);
107 107 usec = nsec + (usec >> 4);
108 108 usec = nsec - (usec >> 3);
109 109 usec = nsec + (usec >> 2);
110 110 usec = nsec + (usec >> 3);
111 111 usec = nsec + (usec >> 4);
112 112 usec = nsec + (usec >> 1);
113 113 usec = nsec + (usec >> 6);
114 114 usec = usec >> 10;
115 115 sec = ts.tv_sec;
116 116
117 117 /*
118 118 * If the system hres time has been changed since the last time
119 119 * we are called. then all bets are off; just update our
120 120 * local copy of timechanged and accept the reported time as is.
121 121 */
122 122 if (last_timechanged != timechanged) {
123 123 last_timechanged = timechanged;
124 124 }
125 125 /*
126 126 * Try to keep timestamps unique, but don't be obsessive about
127 127 * it in the face of large differences.
128 128 */
129 129 else if ((sec <= last.tv_sec) && /* same or lower seconds, and */
130 130 ((sec != last.tv_sec) || /* either different second or */
131 131 (usec <= last.tv_usec)) && /* lower microsecond, and */
132 132 ((last.tv_sec - sec) <= 5)) { /* not way back in time */
133 133 sec = last.tv_sec;
134 134 usec = last.tv_usec + 1;
135 135 if (usec >= MICROSEC) {
136 136 usec -= MICROSEC;
137 137 sec++;
138 138 }
139 139 }
140 140 last.tv_sec = sec;
141 141 last.tv_usec = usec;
142 142 mutex_exit(&tod_lock);
143 143
144 144 tv->tv_sec = sec;
145 145 tv->tv_usec = usec;
146 146 }
147 147
148 148 /*
149 149 * Timestamps are exported from the kernel in several places.
150 150 * Such timestamps are commonly used for either uniqueness or for
151 151 * sequencing - truncation to 32-bits is fine for uniqueness,
152 152 * but sequencing is going to take more work as we get closer to 2038!
153 153 */
154 154 void
155 155 uniqtime32(struct timeval32 *tv32p)
156 156 {
157 157 struct timeval tv;
158 158
159 159 uniqtime(&tv);
160 160 TIMEVAL_TO_TIMEVAL32(tv32p, &tv);
161 161 }
162 162
163 163 int
164 164 gettimeofday(struct timeval *tp)
165 165 {
166 166 struct timeval atv;
167 167
168 168 if (tp) {
169 169 uniqtime(&atv);
170 170 if (get_udatamodel() == DATAMODEL_NATIVE) {
171 171 if (copyout(&atv, tp, sizeof (atv)))
172 172 return (set_errno(EFAULT));
173 173 } else {
174 174 struct timeval32 tv32;
175 175
176 176 if (TIMEVAL_OVERFLOW(&atv))
177 177 return (set_errno(EOVERFLOW));
178 178 TIMEVAL_TO_TIMEVAL32(&tv32, &atv);
179 179
180 180 if (copyout(&tv32, tp, sizeof (tv32)))
181 181 return (set_errno(EFAULT));
182 182 }
183 183 }
184 184 return (0);
185 185 }
186 186
187 187 int
188 188 getitimer(uint_t which, struct itimerval *itv)
189 189 {
190 190 int error;
191 191
192 192 if (get_udatamodel() == DATAMODEL_NATIVE)
193 193 error = xgetitimer(which, itv, 0);
194 194 else {
195 195 struct itimerval kitv;
196 196
197 197 if ((error = xgetitimer(which, &kitv, 1)) == 0) {
198 198 if (ITIMERVAL_OVERFLOW(&kitv)) {
199 199 error = EOVERFLOW;
200 200 } else {
201 201 struct itimerval32 itv32;
202 202
203 203 ITIMERVAL_TO_ITIMERVAL32(&itv32, &kitv);
204 204 if (copyout(&itv32, itv, sizeof (itv32)) != 0)
205 205 error = EFAULT;
206 206 }
207 207 }
208 208 }
209 209
210 210 return (error ? (set_errno(error)) : 0);
211 211 }
212 212
213 213 int
214 214 xgetitimer(uint_t which, struct itimerval *itv, int iskaddr)
215 215 {
216 216 struct proc *p = curproc;
217 217 struct timeval now;
218 218 struct itimerval aitv;
219 219 hrtime_t ts, first, interval, remain;
220 220
221 221 mutex_enter(&p->p_lock);
222 222
223 223 switch (which) {
224 224 case ITIMER_VIRTUAL:
225 225 case ITIMER_PROF:
226 226 aitv = ttolwp(curthread)->lwp_timer[which];
227 227 break;
228 228
229 229 case ITIMER_REAL:
230 230 uniqtime(&now);
231 231 aitv = p->p_realitimer;
232 232
233 233 if (timerisset(&aitv.it_value)) {
234 234 /*CSTYLED*/
235 235 if (timercmp(&aitv.it_value, &now, <)) {
236 236 timerclear(&aitv.it_value);
237 237 } else {
238 238 timevalsub(&aitv.it_value, &now);
239 239 }
240 240 }
241 241 break;
242 242
243 243 case ITIMER_REALPROF:
244 244 if (curproc->p_rprof_cyclic == CYCLIC_NONE) {
245 245 bzero(&aitv, sizeof (aitv));
246 246 break;
247 247 }
248 248
249 249 aitv = curproc->p_rprof_timer;
250 250
251 251 first = tv2hrt(&aitv.it_value);
252 252 interval = tv2hrt(&aitv.it_interval);
253 253
254 254 if ((ts = gethrtime()) < first) {
255 255 /*
256 256 * We haven't gone off for the first time; the time
257 257 * remaining is simply the first time we will go
258 258 * off minus the current time.
259 259 */
260 260 remain = first - ts;
261 261 } else {
262 262 if (interval == 0) {
263 263 /*
264 264 * This was set as a one-shot, and we've
265 265 * already gone off; there is no time
266 266 * remaining.
267 267 */
268 268 remain = 0;
269 269 } else {
270 270 /*
271 271 * We have a non-zero interval; we need to
272 272 * determine how far we are into the current
273 273 * interval, and subtract that from the
274 274 * interval to determine the time remaining.
275 275 */
276 276 remain = interval - ((ts - first) % interval);
277 277 }
278 278 }
279 279
280 280 hrt2tv(remain, &aitv.it_value);
281 281 break;
282 282
283 283 default:
284 284 mutex_exit(&p->p_lock);
285 285 return (EINVAL);
286 286 }
287 287
288 288 mutex_exit(&p->p_lock);
289 289
290 290 if (iskaddr) {
291 291 bcopy(&aitv, itv, sizeof (*itv));
292 292 } else {
293 293 ASSERT(get_udatamodel() == DATAMODEL_NATIVE);
294 294 if (copyout(&aitv, itv, sizeof (*itv)))
295 295 return (EFAULT);
296 296 }
297 297
298 298 return (0);
299 299 }
300 300
301 301
302 302 int
303 303 setitimer(uint_t which, struct itimerval *itv, struct itimerval *oitv)
304 304 {
305 305 int error;
306 306
307 307 if (oitv != NULL)
308 308 if ((error = getitimer(which, oitv)) != 0)
309 309 return (error);
310 310
311 311 if (itv == NULL)
312 312 return (0);
313 313
314 314 if (get_udatamodel() == DATAMODEL_NATIVE)
315 315 error = xsetitimer(which, itv, 0);
316 316 else {
317 317 struct itimerval32 itv32;
318 318 struct itimerval kitv;
319 319
320 320 if (copyin(itv, &itv32, sizeof (itv32)))
321 321 error = EFAULT;
322 322 ITIMERVAL32_TO_ITIMERVAL(&kitv, &itv32);
323 323 error = xsetitimer(which, &kitv, 1);
324 324 }
325 325
326 326 return (error ? (set_errno(error)) : 0);
327 327 }
328 328
329 329 int
330 330 xsetitimer(uint_t which, struct itimerval *itv, int iskaddr)
331 331 {
332 332 struct itimerval aitv;
333 333 struct timeval now;
334 334 struct proc *p = curproc;
335 335 kthread_t *t;
336 336 timeout_id_t tmp_id;
337 337 cyc_handler_t hdlr;
338 338 cyc_time_t when;
339 339 cyclic_id_t cyclic;
340 340 hrtime_t ts;
341 341 int min;
342 342
343 343 if (itv == NULL)
344 344 return (0);
345 345
346 346 if (iskaddr) {
347 347 bcopy(itv, &aitv, sizeof (aitv));
348 348 } else {
349 349 ASSERT(get_udatamodel() == DATAMODEL_NATIVE);
350 350 if (copyin(itv, &aitv, sizeof (aitv)))
351 351 return (EFAULT);
352 352 }
353 353
354 354 if (which == ITIMER_REALPROF) {
355 355 min = MAX((int)(cyclic_getres() / (NANOSEC / MICROSEC)),
356 356 itimer_realprof_minimum);
357 357 } else {
358 358 min = usec_per_tick;
359 359 }
360 360
361 361 if (itimerfix(&aitv.it_value, min) ||
362 362 (itimerfix(&aitv.it_interval, min) && timerisset(&aitv.it_value)))
363 363 return (EINVAL);
364 364
365 365 mutex_enter(&p->p_lock);
366 366 switch (which) {
367 367 case ITIMER_REAL:
368 368 /*
369 369 * The SITBUSY flag prevents conflicts with multiple
370 370 * threads attempting to perform setitimer(ITIMER_REAL)
371 371 * at the same time, even when we drop p->p_lock below.
372 372 * Any blocked thread returns successfully because the
373 373 * effect is the same as if it got here first, finished,
374 374 * and the other thread then came through and destroyed
375 375 * what it did. We are just protecting the system from
376 376 * malfunctioning due to the race condition.
377 377 */
378 378 if (p->p_flag & SITBUSY) {
379 379 mutex_exit(&p->p_lock);
380 380 return (0);
381 381 }
382 382 p->p_flag |= SITBUSY;
383 383 while ((tmp_id = p->p_itimerid) != 0) {
384 384 /*
385 385 * Avoid deadlock in callout_delete (called from
386 386 * untimeout) which may go to sleep (while holding
387 387 * p_lock). Drop p_lock and re-acquire it after
388 388 * untimeout returns. Need to clear p_itimerid
389 389 * while holding p_lock.
390 390 */
391 391 p->p_itimerid = 0;
392 392 mutex_exit(&p->p_lock);
393 393 (void) untimeout(tmp_id);
394 394 mutex_enter(&p->p_lock);
395 395 }
396 396 if (timerisset(&aitv.it_value)) {
397 397 uniqtime(&now);
398 398 timevaladd(&aitv.it_value, &now);
399 399 p->p_itimerid = realtime_timeout(realitexpire,
400 400 p, hzto(&aitv.it_value));
401 401 }
402 402 p->p_realitimer = aitv;
403 403 p->p_flag &= ~SITBUSY;
404 404 break;
405 405
406 406 case ITIMER_REALPROF:
407 407 cyclic = p->p_rprof_cyclic;
408 408 p->p_rprof_cyclic = CYCLIC_NONE;
409 409
410 410 mutex_exit(&p->p_lock);
411 411
412 412 /*
413 413 * We're now going to acquire cpu_lock, remove the old cyclic
414 414 * if necessary, and add our new cyclic.
415 415 */
416 416 mutex_enter(&cpu_lock);
417 417
418 418 if (cyclic != CYCLIC_NONE)
419 419 cyclic_remove(cyclic);
420 420
421 421 if (!timerisset(&aitv.it_value)) {
422 422 /*
423 423 * If we were passed a value of 0, we're done.
424 424 */
425 425 mutex_exit(&cpu_lock);
426 426 return (0);
427 427 }
428 428
429 429 hdlr.cyh_func = realprofexpire;
430 430 hdlr.cyh_arg = p;
431 431 hdlr.cyh_level = CY_LOW_LEVEL;
432 432
433 433 when.cyt_when = (ts = gethrtime() + tv2hrt(&aitv.it_value));
434 434 when.cyt_interval = tv2hrt(&aitv.it_interval);
435 435
436 436 if (when.cyt_interval == 0) {
437 437 /*
438 438 * Using the same logic as for CLOCK_HIGHRES timers, we
439 439 * set the interval to be INT64_MAX - when.cyt_when to
440 440 * effect a one-shot; see the comment in clock_highres.c
441 441 * for more details on why this works.
442 442 */
443 443 when.cyt_interval = INT64_MAX - when.cyt_when;
444 444 }
445 445
446 446 cyclic = cyclic_add(&hdlr, &when);
447 447
448 448 mutex_exit(&cpu_lock);
449 449
450 450 /*
451 451 * We have now successfully added the cyclic. Reacquire
452 452 * p_lock, and see if anyone has snuck in.
453 453 */
454 454 mutex_enter(&p->p_lock);
455 455
456 456 if (p->p_rprof_cyclic != CYCLIC_NONE) {
457 457 /*
458 458 * We're racing with another thread establishing an
459 459 * ITIMER_REALPROF interval timer. We'll let the other
460 460 * thread win (this is a race at the application level,
461 461 * so letting the other thread win is acceptable).
462 462 */
463 463 mutex_exit(&p->p_lock);
464 464 mutex_enter(&cpu_lock);
465 465 cyclic_remove(cyclic);
466 466 mutex_exit(&cpu_lock);
467 467
468 468 return (0);
469 469 }
470 470
471 471 /*
472 472 * Success. Set our tracking variables in the proc structure,
473 473 * cancel any outstanding ITIMER_PROF, and allocate the
474 474 * per-thread SIGPROF buffers, if possible.
475 475 */
476 476 hrt2tv(ts, &aitv.it_value);
477 477 p->p_rprof_timer = aitv;
478 478 p->p_rprof_cyclic = cyclic;
479 479
480 480 t = p->p_tlist;
481 481 do {
482 482 struct itimerval *itvp;
483 483
484 484 itvp = &ttolwp(t)->lwp_timer[ITIMER_PROF];
485 485 timerclear(&itvp->it_interval);
486 486 timerclear(&itvp->it_value);
487 487
488 488 if (t->t_rprof != NULL)
489 489 continue;
490 490
491 491 t->t_rprof =
492 492 kmem_zalloc(sizeof (struct rprof), KM_NOSLEEP);
493 493 aston(t);
494 494 } while ((t = t->t_forw) != p->p_tlist);
495 495
496 496 break;
497 497
498 498 case ITIMER_VIRTUAL:
499 499 ttolwp(curthread)->lwp_timer[ITIMER_VIRTUAL] = aitv;
500 500 break;
501 501
502 502 case ITIMER_PROF:
503 503 if (p->p_rprof_cyclic != CYCLIC_NONE) {
504 504 /*
505 505 * Silently ignore ITIMER_PROF if ITIMER_REALPROF
506 506 * is in effect.
507 507 */
508 508 break;
509 509 }
510 510
511 511 ttolwp(curthread)->lwp_timer[ITIMER_PROF] = aitv;
512 512 break;
513 513
514 514 default:
515 515 mutex_exit(&p->p_lock);
516 516 return (EINVAL);
517 517 }
518 518 mutex_exit(&p->p_lock);
519 519 return (0);
520 520 }
521 521
522 522 /*
523 523 * Delete the ITIMER_REALPROF interval timer.
524 524 * Called only from exec_args() when exec occurs.
525 525 * The other ITIMER_* interval timers are specified
526 526 * to be inherited across exec(), so leave them alone.
527 527 */
528 528 void
529 529 delete_itimer_realprof(void)
530 530 {
531 531 kthread_t *t = curthread;
532 532 struct proc *p = ttoproc(t);
533 533 klwp_t *lwp = ttolwp(t);
534 534 cyclic_id_t cyclic;
535 535
536 536 mutex_enter(&p->p_lock);
537 537
538 538 /* we are performing execve(); assert we are single-threaded */
539 539 ASSERT(t == p->p_tlist && t == t->t_forw);
540 540
541 541 if ((cyclic = p->p_rprof_cyclic) == CYCLIC_NONE) {
542 542 mutex_exit(&p->p_lock);
543 543 } else {
544 544 p->p_rprof_cyclic = CYCLIC_NONE;
545 545 /*
546 546 * Delete any current instance of SIGPROF.
547 547 */
548 548 if (lwp->lwp_cursig == SIGPROF) {
549 549 lwp->lwp_cursig = 0;
550 550 lwp->lwp_extsig = 0;
551 551 if (lwp->lwp_curinfo) {
552 552 siginfofree(lwp->lwp_curinfo);
553 553 lwp->lwp_curinfo = NULL;
554 554 }
555 555 }
556 556 /*
557 557 * Delete any pending instances of SIGPROF.
558 558 */
559 559 sigdelset(&p->p_sig, SIGPROF);
560 560 sigdelset(&p->p_extsig, SIGPROF);
561 561 sigdelq(p, NULL, SIGPROF);
562 562 sigdelset(&t->t_sig, SIGPROF);
563 563 sigdelset(&t->t_extsig, SIGPROF);
564 564 sigdelq(p, t, SIGPROF);
565 565
566 566 mutex_exit(&p->p_lock);
567 567
568 568 /*
569 569 * Remove the ITIMER_REALPROF cyclic.
570 570 */
571 571 mutex_enter(&cpu_lock);
572 572 cyclic_remove(cyclic);
573 573 mutex_exit(&cpu_lock);
574 574 }
575 575 }
576 576
577 577 /*
578 578 * Real interval timer expired:
579 579 * send process whose timer expired an alarm signal.
580 580 * If time is not set up to reload, then just return.
581 581 * Else compute next time timer should go off which is > current time.
582 582 * This is where delay in processing this timeout causes multiple
583 583 * SIGALRM calls to be compressed into one.
584 584 */
585 585 static void
586 586 realitexpire(void *arg)
587 587 {
588 588 struct proc *p = arg;
589 589 struct timeval *valp = &p->p_realitimer.it_value;
590 590 struct timeval *intervalp = &p->p_realitimer.it_interval;
591 591 #if !defined(_LP64)
592 592 clock_t ticks;
593 593 #endif
594 594
595 595 mutex_enter(&p->p_lock);
596 596 #if !defined(_LP64)
597 597 if ((ticks = hzto(valp)) > 1) {
598 598 /*
599 599 * If we are executing before we were meant to, it must be
600 600 * because of an overflow in a prior hzto() calculation.
601 601 * In this case, we want to go to sleep for the recalculated
602 602 * number of ticks. For the special meaning of the value "1"
603 603 * see comment in timespectohz().
604 604 */
605 605 p->p_itimerid = realtime_timeout(realitexpire, p, ticks);
606 606 mutex_exit(&p->p_lock);
607 607 return;
608 608 }
609 609 #endif
610 610 sigtoproc(p, NULL, SIGALRM);
611 611 if (!timerisset(intervalp)) {
612 612 timerclear(valp);
613 613 p->p_itimerid = 0;
614 614 } else {
615 615 /* advance timer value past current time */
616 616 timeval_advance(valp, intervalp);
617 617 p->p_itimerid = realtime_timeout(realitexpire, p, hzto(valp));
618 618 }
619 619 mutex_exit(&p->p_lock);
620 620 }
621 621
622 622 /*
623 623 * Real time profiling interval timer expired:
624 624 * Increment microstate counters for each lwp in the process
625 625 * and ensure that running lwps are kicked into the kernel.
626 626 * If time is not set up to reload, then just return.
627 627 * Else compute next time timer should go off which is > current time,
628 628 * as above.
629 629 */
630 630 static void
631 631 realprofexpire(void *arg)
632 632 {
633 633 struct proc *p = arg;
634 634 kthread_t *t;
635 635
636 636 mutex_enter(&p->p_lock);
637 637 if (p->p_rprof_cyclic == CYCLIC_NONE ||
638 638 (t = p->p_tlist) == NULL) {
639 639 mutex_exit(&p->p_lock);
640 640 return;
641 641 }
642 642 do {
643 643 int mstate;
644 644
645 645 /*
646 646 * Attempt to allocate the SIGPROF buffer, but don't sleep.
↓ open down ↓ |
646 lines elided |
↑ open up ↑ |
647 647 */
648 648 if (t->t_rprof == NULL)
649 649 t->t_rprof = kmem_zalloc(sizeof (struct rprof),
650 650 KM_NOSLEEP);
651 651 if (t->t_rprof == NULL)
652 652 continue;
653 653
654 654 thread_lock(t);
655 655 switch (t->t_state) {
656 656 case TS_SLEEP:
657 - /*
658 - * Don't touch the lwp is it is swapped out.
659 - */
660 - if (!(t->t_schedflag & TS_LOAD)) {
661 - mstate = LMS_SLEEP;
662 - break;
663 - }
664 657 switch (mstate = ttolwp(t)->lwp_mstate.ms_prev) {
665 658 case LMS_TFAULT:
666 659 case LMS_DFAULT:
667 660 case LMS_KFAULT:
668 661 case LMS_USER_LOCK:
669 662 break;
670 663 default:
671 664 mstate = LMS_SLEEP;
672 665 break;
673 666 }
674 667 break;
675 668 case TS_RUN:
676 669 case TS_WAIT:
677 670 mstate = LMS_WAIT_CPU;
678 671 break;
679 672 case TS_ONPROC:
680 673 switch (mstate = t->t_mstate) {
681 674 case LMS_USER:
682 675 case LMS_SYSTEM:
683 676 case LMS_TRAP:
684 677 break;
685 678 default:
686 679 mstate = LMS_SYSTEM;
687 680 break;
688 681 }
689 682 break;
690 683 default:
691 684 mstate = t->t_mstate;
692 685 break;
693 686 }
694 687 t->t_rprof->rp_anystate = 1;
695 688 t->t_rprof->rp_state[mstate]++;
696 689 aston(t);
697 690 /*
698 691 * force the thread into the kernel
699 692 * if it is not already there.
700 693 */
701 694 if (t->t_state == TS_ONPROC && t->t_cpu != CPU)
702 695 poke_cpu(t->t_cpu->cpu_id);
703 696 thread_unlock(t);
704 697 } while ((t = t->t_forw) != p->p_tlist);
705 698
706 699 mutex_exit(&p->p_lock);
707 700 }
708 701
709 702 /*
710 703 * Advances timer value past the current time of day. See the detailed
711 704 * comment for this logic in realitsexpire(), above.
712 705 */
713 706 static void
714 707 timeval_advance(struct timeval *valp, struct timeval *intervalp)
715 708 {
716 709 int cnt2nth;
717 710 struct timeval interval2nth;
718 711
719 712 for (;;) {
720 713 interval2nth = *intervalp;
721 714 for (cnt2nth = 0; ; cnt2nth++) {
722 715 timevaladd(valp, &interval2nth);
723 716 /*CSTYLED*/
724 717 if (TVTSCMP(valp, &hrestime, >))
725 718 break;
726 719 timevaladd(&interval2nth, &interval2nth);
727 720 }
728 721 if (cnt2nth == 0)
729 722 break;
730 723 timevalsub(valp, &interval2nth);
731 724 }
732 725 }
733 726
734 727 /*
735 728 * Check that a proposed value to load into the .it_value or .it_interval
736 729 * part of an interval timer is acceptable, and set it to at least a
737 730 * specified minimal value.
738 731 */
739 732 int
740 733 itimerfix(struct timeval *tv, int minimum)
741 734 {
742 735 if (tv->tv_sec < 0 || tv->tv_sec > 100000000 ||
743 736 tv->tv_usec < 0 || tv->tv_usec >= MICROSEC)
744 737 return (EINVAL);
745 738 if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < minimum)
746 739 tv->tv_usec = minimum;
747 740 return (0);
748 741 }
749 742
750 743 /*
751 744 * Same as itimerfix, except a) it takes a timespec instead of a timeval and
752 745 * b) it doesn't truncate based on timeout granularity; consumers of this
753 746 * interface (e.g. timer_settime()) depend on the passed timespec not being
754 747 * modified implicitly.
755 748 */
756 749 int
757 750 itimerspecfix(timespec_t *tv)
758 751 {
759 752 if (tv->tv_sec < 0 || tv->tv_nsec < 0 || tv->tv_nsec >= NANOSEC)
760 753 return (EINVAL);
761 754 return (0);
762 755 }
763 756
764 757 /*
765 758 * Decrement an interval timer by a specified number
766 759 * of microseconds, which must be less than a second,
767 760 * i.e. < 1000000. If the timer expires, then reload
768 761 * it. In this case, carry over (usec - old value) to
769 762 * reducint the value reloaded into the timer so that
770 763 * the timer does not drift. This routine assumes
771 764 * that it is called in a context where the timers
772 765 * on which it is operating cannot change in value.
773 766 */
774 767 int
775 768 itimerdecr(struct itimerval *itp, int usec)
776 769 {
777 770 if (itp->it_value.tv_usec < usec) {
778 771 if (itp->it_value.tv_sec == 0) {
779 772 /* expired, and already in next interval */
780 773 usec -= itp->it_value.tv_usec;
781 774 goto expire;
782 775 }
783 776 itp->it_value.tv_usec += MICROSEC;
784 777 itp->it_value.tv_sec--;
785 778 }
786 779 itp->it_value.tv_usec -= usec;
787 780 usec = 0;
788 781 if (timerisset(&itp->it_value))
789 782 return (1);
790 783 /* expired, exactly at end of interval */
791 784 expire:
792 785 if (timerisset(&itp->it_interval)) {
793 786 itp->it_value = itp->it_interval;
794 787 itp->it_value.tv_usec -= usec;
795 788 if (itp->it_value.tv_usec < 0) {
796 789 itp->it_value.tv_usec += MICROSEC;
797 790 itp->it_value.tv_sec--;
798 791 }
799 792 } else
800 793 itp->it_value.tv_usec = 0; /* sec is already 0 */
801 794 return (0);
802 795 }
803 796
804 797 /*
805 798 * Add and subtract routines for timevals.
806 799 * N.B.: subtract routine doesn't deal with
807 800 * results which are before the beginning,
808 801 * it just gets very confused in this case.
809 802 * Caveat emptor.
810 803 */
811 804 void
812 805 timevaladd(struct timeval *t1, struct timeval *t2)
813 806 {
814 807 t1->tv_sec += t2->tv_sec;
815 808 t1->tv_usec += t2->tv_usec;
816 809 timevalfix(t1);
817 810 }
818 811
819 812 void
820 813 timevalsub(struct timeval *t1, struct timeval *t2)
821 814 {
822 815 t1->tv_sec -= t2->tv_sec;
823 816 t1->tv_usec -= t2->tv_usec;
824 817 timevalfix(t1);
825 818 }
826 819
827 820 void
828 821 timevalfix(struct timeval *t1)
829 822 {
830 823 if (t1->tv_usec < 0) {
831 824 t1->tv_sec--;
832 825 t1->tv_usec += MICROSEC;
833 826 }
834 827 if (t1->tv_usec >= MICROSEC) {
835 828 t1->tv_sec++;
836 829 t1->tv_usec -= MICROSEC;
837 830 }
838 831 }
839 832
840 833 /*
841 834 * Same as the routines above. These routines take a timespec instead
842 835 * of a timeval.
843 836 */
844 837 void
845 838 timespecadd(timespec_t *t1, timespec_t *t2)
846 839 {
847 840 t1->tv_sec += t2->tv_sec;
848 841 t1->tv_nsec += t2->tv_nsec;
849 842 timespecfix(t1);
850 843 }
851 844
852 845 void
853 846 timespecsub(timespec_t *t1, timespec_t *t2)
854 847 {
855 848 t1->tv_sec -= t2->tv_sec;
856 849 t1->tv_nsec -= t2->tv_nsec;
857 850 timespecfix(t1);
858 851 }
859 852
860 853 void
861 854 timespecfix(timespec_t *t1)
862 855 {
863 856 if (t1->tv_nsec < 0) {
864 857 t1->tv_sec--;
865 858 t1->tv_nsec += NANOSEC;
866 859 } else {
867 860 if (t1->tv_nsec >= NANOSEC) {
868 861 t1->tv_sec++;
869 862 t1->tv_nsec -= NANOSEC;
870 863 }
871 864 }
872 865 }
873 866
874 867 /*
875 868 * Compute number of hz until specified time.
876 869 * Used to compute third argument to timeout() from an absolute time.
877 870 */
878 871 clock_t
879 872 hzto(struct timeval *tv)
880 873 {
881 874 timespec_t ts, now;
882 875
883 876 ts.tv_sec = tv->tv_sec;
884 877 ts.tv_nsec = tv->tv_usec * 1000;
885 878 gethrestime_lasttick(&now);
886 879
887 880 return (timespectohz(&ts, now));
888 881 }
889 882
890 883 /*
891 884 * Compute number of hz until specified time for a given timespec value.
892 885 * Used to compute third argument to timeout() from an absolute time.
893 886 */
894 887 clock_t
895 888 timespectohz(timespec_t *tv, timespec_t now)
896 889 {
897 890 clock_t ticks;
898 891 time_t sec;
899 892 int nsec;
900 893
901 894 /*
902 895 * Compute number of ticks we will see between now and
903 896 * the target time; returns "1" if the destination time
904 897 * is before the next tick, so we always get some delay,
905 898 * and returns LONG_MAX ticks if we would overflow.
906 899 */
907 900 sec = tv->tv_sec - now.tv_sec;
908 901 nsec = tv->tv_nsec - now.tv_nsec + nsec_per_tick - 1;
909 902
910 903 if (nsec < 0) {
911 904 sec--;
912 905 nsec += NANOSEC;
913 906 } else if (nsec >= NANOSEC) {
914 907 sec++;
915 908 nsec -= NANOSEC;
916 909 }
917 910
918 911 ticks = NSEC_TO_TICK(nsec);
919 912
920 913 /*
921 914 * Compute ticks, accounting for negative and overflow as above.
922 915 * Overflow protection kicks in at about 70 weeks for hz=50
923 916 * and at about 35 weeks for hz=100. (Rather longer for the 64-bit
924 917 * kernel :-)
925 918 */
926 919 if (sec < 0 || (sec == 0 && ticks < 1))
927 920 ticks = 1; /* protect vs nonpositive */
928 921 else if (sec > (LONG_MAX - ticks) / hz)
929 922 ticks = LONG_MAX; /* protect vs overflow */
930 923 else
931 924 ticks += sec * hz; /* common case */
932 925
933 926 return (ticks);
934 927 }
935 928
936 929 /*
937 930 * Compute number of hz with the timespec tv specified.
938 931 * The return type must be 64 bit integer.
939 932 */
940 933 int64_t
941 934 timespectohz64(timespec_t *tv)
942 935 {
943 936 int64_t ticks;
944 937 int64_t sec;
945 938 int64_t nsec;
946 939
947 940 sec = tv->tv_sec;
948 941 nsec = tv->tv_nsec + nsec_per_tick - 1;
949 942
950 943 if (nsec < 0) {
951 944 sec--;
952 945 nsec += NANOSEC;
953 946 } else if (nsec >= NANOSEC) {
954 947 sec++;
955 948 nsec -= NANOSEC;
956 949 }
957 950
958 951 ticks = NSEC_TO_TICK(nsec);
959 952
960 953 /*
961 954 * Compute ticks, accounting for negative and overflow as above.
962 955 * Overflow protection kicks in at about 70 weeks for hz=50
963 956 * and at about 35 weeks for hz=100. (Rather longer for the 64-bit
964 957 * kernel
965 958 */
966 959 if (sec < 0 || (sec == 0 && ticks < 1))
967 960 ticks = 1; /* protect vs nonpositive */
968 961 else if (sec > (((~0ULL) >> 1) - ticks) / hz)
969 962 ticks = (~0ULL) >> 1; /* protect vs overflow */
970 963 else
971 964 ticks += sec * hz; /* common case */
972 965
973 966 return (ticks);
974 967 }
975 968
976 969 /*
977 970 * hrt2ts(): convert from hrtime_t to timestruc_t.
978 971 *
979 972 * All this routine really does is:
980 973 *
981 974 * tsp->sec = hrt / NANOSEC;
982 975 * tsp->nsec = hrt % NANOSEC;
983 976 *
984 977 * The black magic below avoids doing a 64-bit by 32-bit integer divide,
985 978 * which is quite expensive. There's actually much more going on here than
986 979 * it might first appear -- don't try this at home.
987 980 *
988 981 * For the adventuresome, here's an explanation of how it works.
989 982 *
990 983 * Multiplication by a fixed constant is easy -- you just do the appropriate
991 984 * shifts and adds. For example, to multiply by 10, we observe that
992 985 *
993 986 * x * 10 = x * (8 + 2)
994 987 * = (x * 8) + (x * 2)
995 988 * = (x << 3) + (x << 1).
996 989 *
997 990 * In general, you can read the algorithm right off the bits: the number 10
998 991 * is 1010 in binary; bits 1 and 3 are ones, so x * 10 = (x << 1) + (x << 3).
999 992 *
1000 993 * Sometimes you can do better. For example, 15 is 1111 binary, so the normal
1001 994 * shift/add computation is x * 15 = (x << 0) + (x << 1) + (x << 2) + (x << 3).
1002 995 * But, it's cheaper if you capitalize on the fact that you have a run of ones:
1003 996 * 1111 = 10000 - 1, hence x * 15 = (x << 4) - (x << 0). [You would never
1004 997 * actually perform the operation << 0, since it's a no-op; I'm just writing
1005 998 * it that way for clarity.]
1006 999 *
1007 1000 * The other way you can win is if you get lucky with the prime factorization
1008 1001 * of your constant. The number 1,000,000,000, which we have to multiply
1009 1002 * by below, is a good example. One billion is 111011100110101100101000000000
1010 1003 * in binary. If you apply the bit-grouping trick, it doesn't buy you very
1011 1004 * much, because it's only a win for groups of three or more equal bits:
1012 1005 *
1013 1006 * 111011100110101100101000000000 = 1000000000000000000000000000000
1014 1007 * - 000100011001010011011000000000
1015 1008 *
1016 1009 * Thus, instead of the 13 shift/add pairs (26 operations) implied by the LHS,
1017 1010 * we have reduced this to 10 shift/add pairs (20 operations) on the RHS.
1018 1011 * This is better, but not great.
1019 1012 *
1020 1013 * However, we can factor 1,000,000,000 = 2^9 * 5^9 = 2^9 * 125 * 125 * 125,
1021 1014 * and multiply by each factor. Multiplication by 125 is particularly easy,
1022 1015 * since 128 is nearby: x * 125 = (x << 7) - x - x - x, which is just four
1023 1016 * operations. So, to multiply by 1,000,000,000, we perform three multipli-
1024 1017 * cations by 125, then << 9, a total of only 3 * 4 + 1 = 13 operations.
1025 1018 * This is the algorithm we actually use in both hrt2ts() and ts2hrt().
1026 1019 *
1027 1020 * Division is harder; there is no equivalent of the simple shift-add algorithm
1028 1021 * we used for multiplication. However, we can convert the division problem
1029 1022 * into a multiplication problem by pre-computing the binary representation
1030 1023 * of the reciprocal of the divisor. For the case of interest, we have
1031 1024 *
1032 1025 * 1 / 1,000,000,000 = 1.0001001011100000101111101000001B-30,
1033 1026 *
1034 1027 * to 32 bits of precision. (The notation B-30 means "* 2^-30", just like
1035 1028 * E-18 means "* 10^-18".)
1036 1029 *
1037 1030 * So, to compute x / 1,000,000,000, we just multiply x by the 32-bit
1038 1031 * integer 10001001011100000101111101000001, then normalize (shift) the
1039 1032 * result. This constant has several large bits runs, so the multiply
1040 1033 * is relatively cheap:
1041 1034 *
1042 1035 * 10001001011100000101111101000001 = 10001001100000000110000001000001
1043 1036 * - 00000000000100000000000100000000
1044 1037 *
1045 1038 * Again, you can just read the algorithm right off the bits:
1046 1039 *
1047 1040 * sec = hrt;
1048 1041 * sec += (hrt << 6);
1049 1042 * sec -= (hrt << 8);
1050 1043 * sec += (hrt << 13);
1051 1044 * sec += (hrt << 14);
1052 1045 * sec -= (hrt << 20);
1053 1046 * sec += (hrt << 23);
1054 1047 * sec += (hrt << 24);
1055 1048 * sec += (hrt << 27);
1056 1049 * sec += (hrt << 31);
1057 1050 * sec >>= (32 + 30);
1058 1051 *
1059 1052 * Voila! The only problem is, since hrt is 64 bits, we need to use 96-bit
1060 1053 * arithmetic to perform this calculation. That's a waste, because ultimately
1061 1054 * we only need the highest 32 bits of the result.
1062 1055 *
1063 1056 * The first thing we do is to realize that we don't need to use all of hrt
1064 1057 * in the calculation. The lowest 30 bits can contribute at most 1 to the
1065 1058 * quotient (2^30 / 1,000,000,000 = 1.07...), so we'll deal with them later.
1066 1059 * The highest 2 bits have to be zero, or hrt won't fit in a timestruc_t.
1067 1060 * Thus, the only bits of hrt that matter for division are bits 30..61.
1068 1061 * These 32 bits are just the lower-order word of (hrt >> 30). This brings
1069 1062 * us down from 96-bit math to 64-bit math, and our algorithm becomes:
1070 1063 *
1071 1064 * tmp = (uint32_t) (hrt >> 30);
1072 1065 * sec = tmp;
1073 1066 * sec += (tmp << 6);
1074 1067 * sec -= (tmp << 8);
1075 1068 * sec += (tmp << 13);
1076 1069 * sec += (tmp << 14);
1077 1070 * sec -= (tmp << 20);
1078 1071 * sec += (tmp << 23);
1079 1072 * sec += (tmp << 24);
1080 1073 * sec += (tmp << 27);
1081 1074 * sec += (tmp << 31);
1082 1075 * sec >>= 32;
1083 1076 *
1084 1077 * Next, we're going to reduce this 64-bit computation to a 32-bit
1085 1078 * computation. We begin by rewriting the above algorithm to use relative
1086 1079 * shifts instead of absolute shifts. That is, instead of computing
1087 1080 * tmp << 6, tmp << 8, tmp << 13, etc, we'll just shift incrementally:
1088 1081 * tmp <<= 6, tmp <<= 2 (== 8 - 6), tmp <<= 5 (== 13 - 8), etc:
1089 1082 *
1090 1083 * tmp = (uint32_t) (hrt >> 30);
1091 1084 * sec = tmp;
1092 1085 * tmp <<= 6; sec += tmp;
1093 1086 * tmp <<= 2; sec -= tmp;
1094 1087 * tmp <<= 5; sec += tmp;
1095 1088 * tmp <<= 1; sec += tmp;
1096 1089 * tmp <<= 6; sec -= tmp;
1097 1090 * tmp <<= 3; sec += tmp;
1098 1091 * tmp <<= 1; sec += tmp;
1099 1092 * tmp <<= 3; sec += tmp;
1100 1093 * tmp <<= 4; sec += tmp;
1101 1094 * sec >>= 32;
1102 1095 *
1103 1096 * Now for the final step. Instead of throwing away the low 32 bits at
1104 1097 * the end, we can throw them away as we go, only keeping the high 32 bits
1105 1098 * of the product at each step. So, for example, where we now have
1106 1099 *
1107 1100 * tmp <<= 6; sec = sec + tmp;
1108 1101 * we will instead have
1109 1102 * tmp <<= 6; sec = (sec + tmp) >> 6;
1110 1103 * which is equivalent to
1111 1104 * sec = (sec >> 6) + tmp;
1112 1105 *
1113 1106 * The final shift ("sec >>= 32") goes away.
1114 1107 *
1115 1108 * All we're really doing here is long multiplication, just like we learned in
1116 1109 * grade school, except that at each step, we only look at the leftmost 32
1117 1110 * columns. The cumulative error is, at most, the sum of all the bits we
1118 1111 * throw away, which is 2^-32 + 2^-31 + ... + 2^-2 + 2^-1 == 1 - 2^-32.
1119 1112 * Thus, the final result ("sec") is correct to +/- 1.
1120 1113 *
1121 1114 * It turns out to be important to keep "sec" positive at each step, because
1122 1115 * we don't want to have to explicitly extend the sign bit. Therefore,
1123 1116 * starting with the last line of code above, each line that would have read
1124 1117 * "sec = (sec >> n) - tmp" must be changed to "sec = tmp - (sec >> n)", and
1125 1118 * the operators (+ or -) in all previous lines must be toggled accordingly.
1126 1119 * Thus, we end up with:
1127 1120 *
1128 1121 * tmp = (uint32_t) (hrt >> 30);
1129 1122 * sec = tmp + (sec >> 6);
1130 1123 * sec = tmp - (tmp >> 2);
1131 1124 * sec = tmp - (sec >> 5);
1132 1125 * sec = tmp + (sec >> 1);
1133 1126 * sec = tmp - (sec >> 6);
1134 1127 * sec = tmp - (sec >> 3);
1135 1128 * sec = tmp + (sec >> 1);
1136 1129 * sec = tmp + (sec >> 3);
1137 1130 * sec = tmp + (sec >> 4);
1138 1131 *
1139 1132 * This yields a value for sec that is accurate to +1/-1, so we have two
1140 1133 * cases to deal with. The mysterious-looking "+ 7" in the code below biases
1141 1134 * the rounding toward zero, so that sec is always less than or equal to
1142 1135 * the correct value. With this modified code, sec is accurate to +0/-2, with
1143 1136 * the -2 case being very rare in practice. With this change, we only have to
1144 1137 * deal with one case (sec too small) in the cleanup code.
1145 1138 *
1146 1139 * The other modification we make is to delete the second line above
1147 1140 * ("sec = tmp + (sec >> 6);"), since it only has an effect when bit 31 is
1148 1141 * set, and the cleanup code can handle that rare case. This reduces the
1149 1142 * *guaranteed* accuracy of sec to +0/-3, but speeds up the common cases.
1150 1143 *
1151 1144 * Finally, we compute nsec = hrt - (sec * 1,000,000,000). nsec will always
1152 1145 * be positive (since sec is never too large), and will at most be equal to
1153 1146 * the error in sec (times 1,000,000,000) plus the low-order 30 bits of hrt.
1154 1147 * Thus, nsec < 3 * 1,000,000,000 + 2^30, which is less than 2^32, so we can
1155 1148 * safely assume that nsec fits in 32 bits. Consequently, when we compute
1156 1149 * sec * 1,000,000,000, we only need the low 32 bits, so we can just do 32-bit
1157 1150 * arithmetic and let the high-order bits fall off the end.
1158 1151 *
1159 1152 * Since nsec < 3 * 1,000,000,000 + 2^30 == 4,073,741,824, the cleanup loop:
1160 1153 *
1161 1154 * while (nsec >= NANOSEC) {
1162 1155 * nsec -= NANOSEC;
1163 1156 * sec++;
1164 1157 * }
1165 1158 *
1166 1159 * is guaranteed to complete in at most 4 iterations. In practice, the loop
1167 1160 * completes in 0 or 1 iteration over 95% of the time.
1168 1161 *
1169 1162 * On an SS2, this implementation of hrt2ts() takes 1.7 usec, versus about
1170 1163 * 35 usec for software division -- about 20 times faster.
1171 1164 */
1172 1165 void
1173 1166 hrt2ts(hrtime_t hrt, timestruc_t *tsp)
1174 1167 {
1175 1168 uint32_t sec, nsec, tmp;
1176 1169
1177 1170 tmp = (uint32_t)(hrt >> 30);
1178 1171 sec = tmp - (tmp >> 2);
1179 1172 sec = tmp - (sec >> 5);
1180 1173 sec = tmp + (sec >> 1);
1181 1174 sec = tmp - (sec >> 6) + 7;
1182 1175 sec = tmp - (sec >> 3);
1183 1176 sec = tmp + (sec >> 1);
1184 1177 sec = tmp + (sec >> 3);
1185 1178 sec = tmp + (sec >> 4);
1186 1179 tmp = (sec << 7) - sec - sec - sec;
1187 1180 tmp = (tmp << 7) - tmp - tmp - tmp;
1188 1181 tmp = (tmp << 7) - tmp - tmp - tmp;
1189 1182 nsec = (uint32_t)hrt - (tmp << 9);
1190 1183 while (nsec >= NANOSEC) {
1191 1184 nsec -= NANOSEC;
1192 1185 sec++;
1193 1186 }
1194 1187 tsp->tv_sec = (time_t)sec;
1195 1188 tsp->tv_nsec = nsec;
1196 1189 }
1197 1190
1198 1191 /*
1199 1192 * Convert from timestruc_t to hrtime_t.
1200 1193 *
1201 1194 * The code below is equivalent to:
1202 1195 *
1203 1196 * hrt = tsp->tv_sec * NANOSEC + tsp->tv_nsec;
1204 1197 *
1205 1198 * but requires no integer multiply.
1206 1199 */
1207 1200 hrtime_t
1208 1201 ts2hrt(const timestruc_t *tsp)
1209 1202 {
1210 1203 hrtime_t hrt;
1211 1204
1212 1205 hrt = tsp->tv_sec;
1213 1206 hrt = (hrt << 7) - hrt - hrt - hrt;
1214 1207 hrt = (hrt << 7) - hrt - hrt - hrt;
1215 1208 hrt = (hrt << 7) - hrt - hrt - hrt;
1216 1209 hrt = (hrt << 9) + tsp->tv_nsec;
1217 1210 return (hrt);
1218 1211 }
1219 1212
1220 1213 /*
1221 1214 * For the various 32-bit "compatibility" paths in the system.
1222 1215 */
1223 1216 void
1224 1217 hrt2ts32(hrtime_t hrt, timestruc32_t *ts32p)
1225 1218 {
1226 1219 timestruc_t ts;
1227 1220
1228 1221 hrt2ts(hrt, &ts);
1229 1222 TIMESPEC_TO_TIMESPEC32(ts32p, &ts);
1230 1223 }
1231 1224
1232 1225 /*
1233 1226 * If this ever becomes performance critical (ha!), we can borrow the
1234 1227 * code from ts2hrt(), above, to multiply tv_sec by 1,000,000 and the
1235 1228 * straightforward (x << 10) - (x << 5) + (x << 3) to multiply tv_usec by
1236 1229 * 1,000. For now, we'll opt for readability (besides, the compiler does
1237 1230 * a passable job of optimizing constant multiplication into shifts and adds).
1238 1231 */
1239 1232 hrtime_t
1240 1233 tv2hrt(struct timeval *tvp)
1241 1234 {
1242 1235 return ((hrtime_t)tvp->tv_sec * NANOSEC +
1243 1236 (hrtime_t)tvp->tv_usec * (NANOSEC / MICROSEC));
1244 1237 }
1245 1238
1246 1239 void
1247 1240 hrt2tv(hrtime_t hrt, struct timeval *tvp)
1248 1241 {
1249 1242 uint32_t sec, nsec, tmp;
1250 1243 uint32_t q, r, t;
1251 1244
1252 1245 tmp = (uint32_t)(hrt >> 30);
1253 1246 sec = tmp - (tmp >> 2);
1254 1247 sec = tmp - (sec >> 5);
1255 1248 sec = tmp + (sec >> 1);
1256 1249 sec = tmp - (sec >> 6) + 7;
1257 1250 sec = tmp - (sec >> 3);
1258 1251 sec = tmp + (sec >> 1);
1259 1252 sec = tmp + (sec >> 3);
1260 1253 sec = tmp + (sec >> 4);
1261 1254 tmp = (sec << 7) - sec - sec - sec;
1262 1255 tmp = (tmp << 7) - tmp - tmp - tmp;
1263 1256 tmp = (tmp << 7) - tmp - tmp - tmp;
1264 1257 nsec = (uint32_t)hrt - (tmp << 9);
1265 1258 while (nsec >= NANOSEC) {
1266 1259 nsec -= NANOSEC;
1267 1260 sec++;
1268 1261 }
1269 1262 tvp->tv_sec = (time_t)sec;
1270 1263 /*
1271 1264 * this routine is very similar to hr2ts, but requires microseconds
1272 1265 * instead of nanoseconds, so an interger divide by 1000 routine
1273 1266 * completes the conversion
1274 1267 */
1275 1268 t = (nsec >> 7) + (nsec >> 8) + (nsec >> 12);
1276 1269 q = (nsec >> 1) + t + (nsec >> 15) + (t >> 11) + (t >> 14);
1277 1270 q = q >> 9;
1278 1271 r = nsec - q*1000;
1279 1272 tvp->tv_usec = q + ((r + 24) >> 10);
1280 1273
1281 1274 }
1282 1275
1283 1276 int
1284 1277 nanosleep(timespec_t *rqtp, timespec_t *rmtp)
1285 1278 {
1286 1279 timespec_t rqtime;
1287 1280 timespec_t rmtime;
1288 1281 timespec_t now;
1289 1282 int timecheck;
1290 1283 int ret = 1;
1291 1284 model_t datamodel = get_udatamodel();
1292 1285
1293 1286 timecheck = timechanged;
1294 1287 gethrestime(&now);
1295 1288
1296 1289 if (datamodel == DATAMODEL_NATIVE) {
1297 1290 if (copyin(rqtp, &rqtime, sizeof (rqtime)))
1298 1291 return (set_errno(EFAULT));
1299 1292 } else {
1300 1293 timespec32_t rqtime32;
1301 1294
1302 1295 if (copyin(rqtp, &rqtime32, sizeof (rqtime32)))
1303 1296 return (set_errno(EFAULT));
1304 1297 TIMESPEC32_TO_TIMESPEC(&rqtime, &rqtime32);
1305 1298 }
1306 1299
1307 1300 if (rqtime.tv_sec < 0 || rqtime.tv_nsec < 0 ||
1308 1301 rqtime.tv_nsec >= NANOSEC)
1309 1302 return (set_errno(EINVAL));
1310 1303
1311 1304 if (timerspecisset(&rqtime)) {
1312 1305 timespecadd(&rqtime, &now);
1313 1306 mutex_enter(&curthread->t_delay_lock);
1314 1307 while ((ret = cv_waituntil_sig(&curthread->t_delay_cv,
1315 1308 &curthread->t_delay_lock, &rqtime, timecheck)) > 0)
1316 1309 continue;
1317 1310 mutex_exit(&curthread->t_delay_lock);
1318 1311 }
1319 1312
1320 1313 if (rmtp) {
1321 1314 /*
1322 1315 * If cv_waituntil_sig() returned due to a signal, and
1323 1316 * there is time remaining, then set the time remaining.
1324 1317 * Else set time remaining to zero
1325 1318 */
1326 1319 rmtime.tv_sec = rmtime.tv_nsec = 0;
1327 1320 if (ret == 0) {
1328 1321 timespec_t delta = rqtime;
1329 1322
1330 1323 gethrestime(&now);
1331 1324 timespecsub(&delta, &now);
1332 1325 if (delta.tv_sec > 0 || (delta.tv_sec == 0 &&
1333 1326 delta.tv_nsec > 0))
1334 1327 rmtime = delta;
1335 1328 }
1336 1329
1337 1330 if (datamodel == DATAMODEL_NATIVE) {
1338 1331 if (copyout(&rmtime, rmtp, sizeof (rmtime)))
1339 1332 return (set_errno(EFAULT));
1340 1333 } else {
1341 1334 timespec32_t rmtime32;
1342 1335
1343 1336 TIMESPEC_TO_TIMESPEC32(&rmtime32, &rmtime);
1344 1337 if (copyout(&rmtime32, rmtp, sizeof (rmtime32)))
1345 1338 return (set_errno(EFAULT));
1346 1339 }
1347 1340 }
1348 1341
1349 1342 if (ret == 0)
1350 1343 return (set_errno(EINTR));
1351 1344 return (0);
1352 1345 }
1353 1346
1354 1347 /*
1355 1348 * Routines to convert standard UNIX time (seconds since Jan 1, 1970)
1356 1349 * into year/month/day/hour/minute/second format, and back again.
1357 1350 * Note: these routines require tod_lock held to protect cached state.
1358 1351 */
1359 1352 static int days_thru_month[64] = {
1360 1353 0, 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366, 0, 0,
1361 1354 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1362 1355 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1363 1356 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1364 1357 };
1365 1358
1366 1359 todinfo_t saved_tod;
1367 1360 int saved_utc = -60;
1368 1361
1369 1362 todinfo_t
1370 1363 utc_to_tod(time_t utc)
1371 1364 {
1372 1365 long dse, day, month, year;
1373 1366 todinfo_t tod;
1374 1367
1375 1368 ASSERT(MUTEX_HELD(&tod_lock));
1376 1369
1377 1370 /*
1378 1371 * Note that tod_set_prev() assumes utc will be set to zero in
1379 1372 * the case of it being negative. Consequently, any change made
1380 1373 * to this behavior would have to be reflected in that function
1381 1374 * as well.
1382 1375 */
1383 1376 if (utc < 0) /* should never happen */
1384 1377 utc = 0;
1385 1378
1386 1379 saved_tod.tod_sec += utc - saved_utc;
1387 1380 saved_utc = utc;
1388 1381 if (saved_tod.tod_sec >= 0 && saved_tod.tod_sec < 60)
1389 1382 return (saved_tod); /* only the seconds changed */
1390 1383
1391 1384 dse = utc / 86400; /* days since epoch */
1392 1385
1393 1386 tod.tod_sec = utc % 60;
1394 1387 tod.tod_min = (utc % 3600) / 60;
1395 1388 tod.tod_hour = (utc % 86400) / 3600;
1396 1389 tod.tod_dow = (dse + 4) % 7 + 1; /* epoch was a Thursday */
1397 1390
1398 1391 year = dse / 365 + 72; /* first guess -- always a bit too large */
1399 1392 do {
1400 1393 year--;
1401 1394 day = dse - 365 * (year - 70) - ((year - 69) >> 2);
1402 1395 } while (day < 0);
1403 1396
1404 1397 month = ((year & 3) << 4) + 1;
1405 1398 while (day >= days_thru_month[month + 1])
1406 1399 month++;
1407 1400
1408 1401 tod.tod_day = day - days_thru_month[month] + 1;
1409 1402 tod.tod_month = month & 15;
1410 1403 tod.tod_year = year;
1411 1404
1412 1405 saved_tod = tod;
1413 1406 return (tod);
1414 1407 }
1415 1408
1416 1409 time_t
1417 1410 tod_to_utc(todinfo_t tod)
1418 1411 {
1419 1412 time_t utc;
1420 1413 int year = tod.tod_year;
1421 1414 int month = tod.tod_month + ((year & 3) << 4);
1422 1415 #ifdef DEBUG
1423 1416 /* only warn once, not each time called */
1424 1417 static int year_warn = 1;
1425 1418 static int month_warn = 1;
1426 1419 static int day_warn = 1;
1427 1420 static int hour_warn = 1;
1428 1421 static int min_warn = 1;
1429 1422 static int sec_warn = 1;
1430 1423 int days_diff = days_thru_month[month + 1] - days_thru_month[month];
1431 1424 #endif
1432 1425
1433 1426 ASSERT(MUTEX_HELD(&tod_lock));
1434 1427
1435 1428 #ifdef DEBUG
1436 1429 if (year_warn && (year < 70 || year > 8029)) {
1437 1430 cmn_err(CE_WARN,
1438 1431 "The hardware real-time clock appears to have the "
1439 1432 "wrong years value %d -- time needs to be reset\n",
1440 1433 year);
1441 1434 year_warn = 0;
1442 1435 }
1443 1436
1444 1437 if (month_warn && (tod.tod_month < 1 || tod.tod_month > 12)) {
1445 1438 cmn_err(CE_WARN,
1446 1439 "The hardware real-time clock appears to have the "
1447 1440 "wrong months value %d -- time needs to be reset\n",
1448 1441 tod.tod_month);
1449 1442 month_warn = 0;
1450 1443 }
1451 1444
1452 1445 if (day_warn && (tod.tod_day < 1 || tod.tod_day > days_diff)) {
1453 1446 cmn_err(CE_WARN,
1454 1447 "The hardware real-time clock appears to have the "
1455 1448 "wrong days value %d -- time needs to be reset\n",
1456 1449 tod.tod_day);
1457 1450 day_warn = 0;
1458 1451 }
1459 1452
1460 1453 if (hour_warn && (tod.tod_hour < 0 || tod.tod_hour > 23)) {
1461 1454 cmn_err(CE_WARN,
1462 1455 "The hardware real-time clock appears to have the "
1463 1456 "wrong hours value %d -- time needs to be reset\n",
1464 1457 tod.tod_hour);
1465 1458 hour_warn = 0;
1466 1459 }
1467 1460
1468 1461 if (min_warn && (tod.tod_min < 0 || tod.tod_min > 59)) {
1469 1462 cmn_err(CE_WARN,
1470 1463 "The hardware real-time clock appears to have the "
1471 1464 "wrong minutes value %d -- time needs to be reset\n",
1472 1465 tod.tod_min);
1473 1466 min_warn = 0;
1474 1467 }
1475 1468
1476 1469 if (sec_warn && (tod.tod_sec < 0 || tod.tod_sec > 59)) {
1477 1470 cmn_err(CE_WARN,
1478 1471 "The hardware real-time clock appears to have the "
1479 1472 "wrong seconds value %d -- time needs to be reset\n",
1480 1473 tod.tod_sec);
1481 1474 sec_warn = 0;
1482 1475 }
1483 1476 #endif
1484 1477
1485 1478 utc = (year - 70); /* next 3 lines: utc = 365y + y/4 */
1486 1479 utc += (utc << 3) + (utc << 6);
1487 1480 utc += (utc << 2) + ((year - 69) >> 2);
1488 1481 utc += days_thru_month[month] + tod.tod_day - 1;
1489 1482 utc = (utc << 3) + (utc << 4) + tod.tod_hour; /* 24 * day + hour */
1490 1483 utc = (utc << 6) - (utc << 2) + tod.tod_min; /* 60 * hour + min */
1491 1484 utc = (utc << 6) - (utc << 2) + tod.tod_sec; /* 60 * min + sec */
1492 1485
1493 1486 return (utc);
1494 1487 }
↓ open down ↓ |
821 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX