Print this page
XXXX introduce drv_sectohz
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86xpv/os/xen_machdep.c
+++ new/usr/src/uts/i86xpv/os/xen_machdep.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /* derived from netbsd's xen_machdep.c 1.1.2.1 */
28 28
29 29 /*
30 30 *
31 31 * Copyright (c) 2004 Christian Limpach.
32 32 * All rights reserved.
33 33 *
34 34 * Redistribution and use in source and binary forms, with or without
35 35 * modification, are permitted provided that the following conditions
36 36 * are met:
37 37 * 1. Redistributions of source code must retain the above copyright
38 38 * notice, this list of conditions and the following disclaimer.
39 39 * 2. Redistributions in binary form must reproduce the above copyright
40 40 * notice, this list of conditions and the following disclaimer in the
41 41 * documentation and/or other materials provided with the distribution.
42 42 * 3. This section intentionally left blank.
43 43 * 4. The name of the author may not be used to endorse or promote products
44 44 * derived from this software without specific prior written permission.
45 45 *
46 46 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
47 47 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
48 48 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
49 49 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
50 50 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51 51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
52 52 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
53 53 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
54 54 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
55 55 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
56 56 */
57 57 /*
58 58 * Section 3 of the above license was updated in response to bug 6379571.
59 59 */
60 60
61 61 #include <sys/xpv_user.h>
62 62
63 63 /* XXX 3.3. TODO remove this include */
64 64 #include <xen/public/arch-x86/xen-mca.h>
65 65
66 66 #include <sys/ctype.h>
67 67 #include <sys/types.h>
68 68 #include <sys/cmn_err.h>
69 69 #include <sys/trap.h>
70 70 #include <sys/segments.h>
71 71 #include <sys/hypervisor.h>
72 72 #include <sys/xen_mmu.h>
73 73 #include <sys/machsystm.h>
74 74 #include <sys/promif.h>
75 75 #include <sys/bootconf.h>
76 76 #include <sys/bootinfo.h>
77 77 #include <sys/cpr.h>
78 78 #include <sys/taskq.h>
79 79 #include <sys/uadmin.h>
80 80 #include <sys/evtchn_impl.h>
81 81 #include <sys/archsystm.h>
82 82 #include <xen/sys/xenbus_impl.h>
83 83 #include <sys/mach_mmu.h>
84 84 #include <vm/hat_i86.h>
85 85 #include <sys/gnttab.h>
86 86 #include <sys/reboot.h>
87 87 #include <sys/stack.h>
88 88 #include <sys/clock.h>
89 89 #include <sys/bitmap.h>
90 90 #include <sys/processor.h>
91 91 #include <sys/xen_errno.h>
92 92 #include <sys/xpv_panic.h>
93 93 #include <sys/smp_impldefs.h>
94 94 #include <sys/cpu.h>
95 95 #include <sys/balloon_impl.h>
96 96 #include <sys/ddi.h>
97 97
98 98 #ifdef DEBUG
99 99 #define SUSPEND_DEBUG if (xen_suspend_debug) xen_printf
100 100 #else
101 101 #define SUSPEND_DEBUG(...)
102 102 #endif
103 103
104 104 int cpr_debug;
105 105 cpuset_t cpu_suspend_lost_set;
106 106 static int xen_suspend_debug;
107 107
108 108 uint_t xen_phys_ncpus;
109 109 xen_mc_logical_cpu_t *xen_phys_cpus;
110 110 int xen_physinfo_debug = 0;
111 111
112 112 /*
113 113 * Determine helpful version information.
114 114 *
115 115 * (And leave copies in the data segment so we can look at them later
116 116 * with e.g. kmdb.)
117 117 */
118 118
119 119 typedef enum xen_version {
120 120 XENVER_BOOT_IDX,
121 121 XENVER_CURRENT_IDX
122 122 } xen_version_t;
123 123
124 124 struct xenver {
125 125 ulong_t xv_major;
126 126 ulong_t xv_minor;
127 127 ulong_t xv_revision;
128 128 xen_extraversion_t xv_ver;
129 129 ulong_t xv_is_xvm;
130 130 xen_changeset_info_t xv_chgset;
131 131 xen_compile_info_t xv_build;
132 132 xen_capabilities_info_t xv_caps;
133 133 } xenver[2];
134 134
135 135 #define XENVER_BOOT(m) (xenver[XENVER_BOOT_IDX].m)
136 136 #define XENVER_CURRENT(m) (xenver[XENVER_CURRENT_IDX].m)
137 137
138 138 /*
139 139 * Update the xenver data. We maintain two copies, boot and
140 140 * current. If we are setting the boot, then also set current.
141 141 */
142 142 static void
143 143 xen_set_version(xen_version_t idx)
144 144 {
145 145 ulong_t ver;
146 146
147 147 bzero(&xenver[idx], sizeof (xenver[idx]));
148 148
149 149 ver = HYPERVISOR_xen_version(XENVER_version, 0);
150 150
151 151 xenver[idx].xv_major = BITX(ver, 31, 16);
152 152 xenver[idx].xv_minor = BITX(ver, 15, 0);
153 153
154 154 (void) HYPERVISOR_xen_version(XENVER_extraversion, &xenver[idx].xv_ver);
155 155
156 156 /*
157 157 * The revision is buried in the extraversion information that is
158 158 * maintained by the hypervisor. For our purposes we expect that
159 159 * the revision number is:
160 160 * - the second character in the extraversion information
161 161 * - one character long
162 162 * - numeric digit
163 163 * If it isn't then we can't extract the revision and we leave it
164 164 * set to 0.
165 165 */
166 166 if (strlen(xenver[idx].xv_ver) > 1 && isdigit(xenver[idx].xv_ver[1]))
167 167 xenver[idx].xv_revision = xenver[idx].xv_ver[1] - '0';
168 168 else
169 169 cmn_err(CE_WARN, "Cannot extract revision on this hypervisor "
170 170 "version: v%s, unexpected version format",
171 171 xenver[idx].xv_ver);
172 172
173 173 xenver[idx].xv_is_xvm = 0;
174 174
175 175 if (strstr(xenver[idx].xv_ver, "-xvm") != NULL)
176 176 xenver[idx].xv_is_xvm = 1;
177 177
178 178 (void) HYPERVISOR_xen_version(XENVER_changeset,
179 179 &xenver[idx].xv_chgset);
180 180
181 181 (void) HYPERVISOR_xen_version(XENVER_compile_info,
182 182 &xenver[idx].xv_build);
183 183 /*
184 184 * Capabilities are a set of space separated ascii strings
185 185 * e.g. 'xen-3.1-x86_32p' or 'hvm-3.2-x86_64'
186 186 */
187 187 (void) HYPERVISOR_xen_version(XENVER_capabilities,
188 188 &xenver[idx].xv_caps);
189 189
190 190 cmn_err(CE_CONT, "?v%lu.%lu%s chgset '%s'\n", xenver[idx].xv_major,
191 191 xenver[idx].xv_minor, xenver[idx].xv_ver, xenver[idx].xv_chgset);
192 192
193 193 if (idx == XENVER_BOOT_IDX)
194 194 bcopy(&xenver[XENVER_BOOT_IDX], &xenver[XENVER_CURRENT_IDX],
195 195 sizeof (xenver[XENVER_BOOT_IDX]));
196 196 }
197 197
198 198 typedef enum xen_hypervisor_check {
199 199 XEN_RUN_CHECK,
200 200 XEN_SUSPEND_CHECK
201 201 } xen_hypervisor_check_t;
202 202
203 203 /*
204 204 * To run the hypervisor must be 3.0.4 or better. To suspend/resume
205 205 * we need 3.0.4 or better and if it is 3.0.4. then it must be provided
206 206 * by the Solaris xVM project.
207 207 * Checking can be disabled for testing purposes by setting the
208 208 * xen_suspend_debug variable.
209 209 */
210 210 static int
211 211 xen_hypervisor_supports_solaris(xen_hypervisor_check_t check)
212 212 {
213 213 if (xen_suspend_debug == 1)
214 214 return (1);
215 215 if (XENVER_CURRENT(xv_major) < 3)
216 216 return (0);
217 217 if (XENVER_CURRENT(xv_major) > 3)
218 218 return (1);
219 219 if (XENVER_CURRENT(xv_minor) > 0)
220 220 return (1);
221 221 if (XENVER_CURRENT(xv_revision) < 4)
222 222 return (0);
223 223 if (check == XEN_SUSPEND_CHECK && XENVER_CURRENT(xv_revision) == 4 &&
224 224 !XENVER_CURRENT(xv_is_xvm))
225 225 return (0);
226 226
227 227 return (1);
228 228 }
229 229
230 230 /*
231 231 * If the hypervisor is -xvm, or 3.1.2 or higher, we don't need the
232 232 * workaround.
233 233 */
234 234 static void
235 235 xen_pte_workaround(void)
236 236 {
237 237 #if defined(__amd64)
238 238 extern int pt_kern;
239 239
240 240 if (XENVER_CURRENT(xv_major) != 3)
241 241 return;
242 242 if (XENVER_CURRENT(xv_minor) > 1)
243 243 return;
244 244 if (XENVER_CURRENT(xv_minor) == 1 &&
245 245 XENVER_CURRENT(xv_revision) > 1)
246 246 return;
247 247 if (XENVER_CURRENT(xv_is_xvm))
248 248 return;
249 249
250 250 pt_kern = PT_USER;
251 251 #endif
252 252 }
253 253
254 254 void
255 255 xen_set_callback(void (*func)(void), uint_t type, uint_t flags)
256 256 {
257 257 struct callback_register cb;
258 258
259 259 bzero(&cb, sizeof (cb));
260 260 #if defined(__amd64)
261 261 cb.address = (ulong_t)func;
262 262 #elif defined(__i386)
263 263 cb.address.cs = KCS_SEL;
264 264 cb.address.eip = (ulong_t)func;
265 265 #endif
266 266 cb.type = type;
267 267 cb.flags = flags;
268 268
269 269 /*
270 270 * XXPV always ignore return value for NMI
271 271 */
272 272 if (HYPERVISOR_callback_op(CALLBACKOP_register, &cb) != 0 &&
273 273 type != CALLBACKTYPE_nmi)
274 274 panic("HYPERVISOR_callback_op failed");
275 275 }
276 276
277 277 void
278 278 xen_init_callbacks(void)
279 279 {
280 280 /*
281 281 * register event (interrupt) handler.
282 282 */
283 283 xen_set_callback(xen_callback, CALLBACKTYPE_event, 0);
284 284
285 285 /*
286 286 * failsafe handler.
287 287 */
288 288 xen_set_callback(xen_failsafe_callback, CALLBACKTYPE_failsafe,
289 289 CALLBACKF_mask_events);
290 290
291 291 /*
292 292 * NMI handler.
293 293 */
294 294 xen_set_callback(nmiint, CALLBACKTYPE_nmi, 0);
295 295
296 296 /*
297 297 * system call handler
298 298 * XXPV move to init_cpu_syscall?
299 299 */
300 300 #if defined(__amd64)
301 301 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
302 302 CALLBACKF_mask_events);
303 303 #endif /* __amd64 */
304 304 }
305 305
306 306
307 307 /*
308 308 * cmn_err() followed by a 1/4 second delay; this gives the
309 309 * logging service a chance to flush messages and helps avoid
310 310 * intermixing output from prom_printf().
311 311 * XXPV: doesn't exactly help us on UP though.
312 312 */
313 313 /*PRINTFLIKE2*/
314 314 void
315 315 cpr_err(int ce, const char *fmt, ...)
316 316 {
317 317 va_list adx;
318 318
319 319 va_start(adx, fmt);
320 320 vcmn_err(ce, fmt, adx);
321 321 va_end(adx);
322 322 drv_usecwait(MICROSEC >> 2);
323 323 }
324 324
325 325 void
326 326 xen_suspend_devices(void)
327 327 {
328 328 int rc;
329 329
330 330 SUSPEND_DEBUG("xen_suspend_devices\n");
331 331
332 332 if ((rc = cpr_suspend_devices(ddi_root_node())) != 0)
333 333 panic("failed to suspend devices: %d", rc);
334 334 }
335 335
336 336 void
337 337 xen_resume_devices(void)
338 338 {
339 339 int rc;
340 340
341 341 SUSPEND_DEBUG("xen_resume_devices\n");
342 342
343 343 if ((rc = cpr_resume_devices(ddi_root_node(), 0)) != 0)
344 344 panic("failed to resume devices: %d", rc);
345 345 }
346 346
347 347 /*
348 348 * The list of mfn pages is out of date. Recompute it.
349 349 */
350 350 static void
351 351 rebuild_mfn_list(void)
352 352 {
353 353 int i = 0;
354 354 size_t sz;
355 355 size_t off;
356 356 pfn_t pfn;
357 357
358 358 SUSPEND_DEBUG("rebuild_mfn_list\n");
359 359
360 360 sz = ((mfn_count * sizeof (mfn_t)) + MMU_PAGEOFFSET) & MMU_PAGEMASK;
361 361
362 362 for (off = 0; off < sz; off += MMU_PAGESIZE) {
363 363 size_t j = mmu_btop(off);
364 364 if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) {
365 365 pfn = hat_getpfnum(kas.a_hat,
366 366 (caddr_t)&mfn_list_pages[j]);
367 367 mfn_list_pages_page[i++] = pfn_to_mfn(pfn);
368 368 }
369 369
370 370 pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list + off);
371 371 mfn_list_pages[j] = pfn_to_mfn(pfn);
372 372 }
373 373
374 374 pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list_pages_page);
375 375 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
376 376 = pfn_to_mfn(pfn);
377 377 }
378 378
379 379 static void
380 380 suspend_cpus(void)
381 381 {
382 382 int i;
383 383
384 384 SUSPEND_DEBUG("suspend_cpus\n");
385 385
386 386 mp_enter_barrier();
387 387
388 388 for (i = 1; i < ncpus; i++) {
389 389 if (!CPU_IN_SET(cpu_suspend_lost_set, i)) {
390 390 SUSPEND_DEBUG("xen_vcpu_down %d\n", i);
391 391 (void) xen_vcpu_down(i);
392 392 }
393 393
394 394 mach_cpucontext_reset(cpu[i]);
395 395 }
396 396 }
397 397
398 398 static void
399 399 resume_cpus(void)
400 400 {
401 401 int i;
402 402
403 403 for (i = 1; i < ncpus; i++) {
404 404 if (cpu[i] == NULL)
405 405 continue;
406 406
407 407 if (!CPU_IN_SET(cpu_suspend_lost_set, i)) {
408 408 SUSPEND_DEBUG("xen_vcpu_up %d\n", i);
409 409 mach_cpucontext_restore(cpu[i]);
410 410 (void) xen_vcpu_up(i);
411 411 }
412 412 }
413 413
414 414 mp_leave_barrier();
415 415 }
416 416
417 417 /*
418 418 * Top level routine to direct suspend/resume of a domain.
419 419 */
420 420 void
421 421 xen_suspend_domain(void)
422 422 {
423 423 extern void rtcsync(void);
424 424 extern hrtime_t hres_last_tick;
425 425 mfn_t start_info_mfn;
426 426 ulong_t flags;
427 427 pfn_t pfn;
428 428 int i;
429 429
430 430 /*
431 431 * Check that we are happy to suspend on this hypervisor.
432 432 */
433 433 if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) {
434 434 cpr_err(CE_WARN, "Cannot suspend on this hypervisor "
435 435 "version: v%lu.%lu%s, need at least version v3.0.4 or "
436 436 "-xvm based hypervisor", XENVER_CURRENT(xv_major),
437 437 XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver));
438 438 return;
439 439 }
440 440
441 441 /*
442 442 * XXPV - Are we definitely OK to suspend by the time we've connected
443 443 * the handler?
444 444 */
445 445
446 446 cpr_err(CE_NOTE, "Domain suspending for save/migrate");
447 447
448 448 SUSPEND_DEBUG("xen_suspend_domain\n");
449 449
450 450 /*
451 451 * suspend interrupts and devices
452 452 * XXPV - we use suspend/resume for both save/restore domains (like sun
453 453 * cpr) and for migration. Would be nice to know the difference if
454 454 * possible. For save/restore where down time may be a long time, we
455 455 * may want to do more of the things that cpr does. (i.e. notify user
456 456 * processes, shrink memory footprint for faster restore, etc.)
457 457 */
458 458 xen_suspend_devices();
459 459 SUSPEND_DEBUG("xenbus_suspend\n");
460 460 xenbus_suspend();
461 461
462 462 pfn = hat_getpfnum(kas.a_hat, (caddr_t)xen_info);
463 463 start_info_mfn = pfn_to_mfn(pfn);
464 464
465 465 /*
466 466 * XXPV: cpu hotplug can hold this under a xenbus watch. Are we safe
467 467 * wrt xenbus being suspended here?
468 468 */
469 469 mutex_enter(&cpu_lock);
470 470
471 471 /*
472 472 * Suspend must be done on vcpu 0, as no context for other CPUs is
473 473 * saved.
474 474 *
475 475 * XXPV - add to taskq API ?
476 476 */
477 477 thread_affinity_set(curthread, 0);
478 478 kpreempt_disable();
479 479
480 480 SUSPEND_DEBUG("xen_start_migrate\n");
481 481 xen_start_migrate();
482 482 if (ncpus > 1)
483 483 suspend_cpus();
484 484
485 485 /*
486 486 * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
487 487 * any holder would have dropped it to get through suspend_cpus().
488 488 */
489 489 mutex_enter(&ec_lock);
490 490
491 491 /*
492 492 * From here on in, we can't take locks.
493 493 */
494 494 SUSPEND_DEBUG("ec_suspend\n");
495 495 ec_suspend();
496 496 SUSPEND_DEBUG("gnttab_suspend\n");
497 497 gnttab_suspend();
498 498
499 499 flags = intr_clear();
500 500
501 501 xpv_time_suspend();
502 502
503 503 /*
504 504 * Currently, the hypervisor incorrectly fails to bring back
505 505 * powered-down VCPUs. Thus we need to record any powered-down VCPUs
506 506 * to prevent any attempts to operate on them. But we have to do this
507 507 * *after* the very first time we do ec_suspend().
508 508 */
509 509 for (i = 1; i < ncpus; i++) {
510 510 if (cpu[i] == NULL)
511 511 continue;
512 512
513 513 if (cpu_get_state(cpu[i]) == P_POWEROFF)
514 514 CPUSET_ATOMIC_ADD(cpu_suspend_lost_set, i);
515 515 }
516 516
517 517 /*
518 518 * The dom0 save/migrate code doesn't automatically translate
519 519 * these into PFNs, but expects them to be, so we do it here.
520 520 * We don't use mfn_to_pfn() because so many OS services have
521 521 * been disabled at this point.
522 522 */
523 523 xen_info->store_mfn = mfn_to_pfn_mapping[xen_info->store_mfn];
524 524 xen_info->console.domU.mfn =
525 525 mfn_to_pfn_mapping[xen_info->console.domU.mfn];
526 526
527 527 if (CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0) {
528 528 prom_printf("xen_suspend_domain(): "
529 529 "CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask not set\n");
530 530 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
531 531 }
532 532
533 533 if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
534 534 0, UVMF_INVLPG)) {
535 535 prom_printf("xen_suspend_domain(): "
536 536 "HYPERVISOR_update_va_mapping() failed\n");
537 537 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
538 538 }
539 539
540 540 SUSPEND_DEBUG("HYPERVISOR_suspend\n");
541 541
542 542 /*
543 543 * At this point we suspend and sometime later resume.
544 544 */
545 545 if (HYPERVISOR_suspend(start_info_mfn)) {
546 546 prom_printf("xen_suspend_domain(): "
547 547 "HYPERVISOR_suspend() failed\n");
548 548 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
549 549 }
550 550
551 551 /*
552 552 * Point HYPERVISOR_shared_info to its new value.
553 553 */
554 554 if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
555 555 xen_info->shared_info | PT_NOCONSIST | PT_VALID | PT_WRITABLE,
556 556 UVMF_INVLPG))
557 557 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
558 558
559 559 if (xen_info->nr_pages != mfn_count) {
560 560 prom_printf("xen_suspend_domain(): number of pages"
561 561 " changed, was 0x%lx, now 0x%lx\n", mfn_count,
562 562 xen_info->nr_pages);
563 563 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
564 564 }
565 565
566 566 xpv_time_resume();
567 567
568 568 cached_max_mfn = 0;
569 569
570 570 SUSPEND_DEBUG("gnttab_resume\n");
571 571 gnttab_resume();
572 572
573 573 /* XXPV: add a note that this must be lockless. */
574 574 SUSPEND_DEBUG("ec_resume\n");
575 575 ec_resume();
576 576
577 577 intr_restore(flags);
578 578
579 579 if (ncpus > 1)
580 580 resume_cpus();
581 581
582 582 mutex_exit(&ec_lock);
583 583 xen_end_migrate();
584 584 mutex_exit(&cpu_lock);
585 585
586 586 /*
587 587 * Now we can take locks again.
588 588 */
589 589
590 590 /*
591 591 * Force the tick value used for tv_nsec in hres_tick() to be up to
592 592 * date. rtcsync() will reset the hrestime value appropriately.
593 593 */
594 594 hres_last_tick = xpv_gethrtime();
595 595
596 596 /*
597 597 * XXPV: we need to have resumed the CPUs since this takes locks, but
598 598 * can remote CPUs see bad state? Presumably yes. Should probably nest
599 599 * taking of todlock inside of cpu_lock, or vice versa, then provide an
600 600 * unlocked version. Probably need to call clkinitf to reset cpu freq
601 601 * and re-calibrate if we migrated to a different speed cpu. Also need
602 602 * to make a (re)init_cpu_info call to update processor info structs
603 603 * and device tree info. That remains to be written at the moment.
604 604 */
605 605 rtcsync();
606 606
607 607 rebuild_mfn_list();
608 608
609 609 SUSPEND_DEBUG("xenbus_resume\n");
610 610 xenbus_resume();
611 611 SUSPEND_DEBUG("xenbus_resume_devices\n");
612 612 xen_resume_devices();
613 613
614 614 thread_affinity_clear(curthread);
615 615 kpreempt_enable();
616 616
617 617 SUSPEND_DEBUG("finished xen_suspend_domain\n");
618 618
619 619 /*
620 620 * We have restarted our suspended domain, update the hypervisor
621 621 * details. NB: This must be done at the end of this function,
622 622 * since we need the domain to be completely resumed before
623 623 * these functions will work correctly.
624 624 */
625 625 xen_set_version(XENVER_CURRENT_IDX);
626 626
627 627 /*
628 628 * We can check and report a warning, but we don't stop the
629 629 * process.
630 630 */
631 631 if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0)
632 632 cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s "
633 633 "but need at least version v3.0.4",
634 634 XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor),
635 635 XENVER_CURRENT(xv_ver));
636 636
637 637 cmn_err(CE_NOTE, "domain restore/migrate completed");
638 638 }
639 639
640 640 /*ARGSUSED*/
641 641 int
642 642 xen_debug_handler(void *arg)
643 643 {
644 644 debug_enter("External debug event received");
645 645
646 646 /*
647 647 * If we've not got KMDB loaded, output some stuff difficult to capture
648 648 * from a domain core.
649 649 */
650 650 if (!(boothowto & RB_DEBUG)) {
651 651 shared_info_t *si = HYPERVISOR_shared_info;
652 652 int i;
653 653
654 654 prom_printf("evtchn_pending [ ");
655 655 for (i = 0; i < 8; i++)
656 656 prom_printf("%lx ", si->evtchn_pending[i]);
657 657 prom_printf("]\nevtchn_mask [ ");
658 658 for (i = 0; i < 8; i++)
659 659 prom_printf("%lx ", si->evtchn_mask[i]);
660 660 prom_printf("]\n");
661 661
662 662 for (i = 0; i < ncpus; i++) {
663 663 vcpu_info_t *vcpu = &si->vcpu_info[i];
664 664 if (cpu[i] == NULL)
665 665 continue;
666 666 prom_printf("CPU%d pending %d mask %d sel %lx\n",
667 667 i, vcpu->evtchn_upcall_pending,
668 668 vcpu->evtchn_upcall_mask,
669 669 vcpu->evtchn_pending_sel);
670 670 }
671 671 }
672 672
673 673 return (0);
674 674 }
675 675
676 676 /*ARGSUSED*/
677 677 static void
678 678 xen_sysrq_handler(struct xenbus_watch *watch, const char **vec,
679 679 unsigned int len)
680 680 {
681 681 xenbus_transaction_t xbt;
682 682 char key = '\0';
683 683 int ret;
684 684
685 685 retry:
686 686 if (xenbus_transaction_start(&xbt)) {
687 687 cmn_err(CE_WARN, "failed to start sysrq transaction");
688 688 return;
689 689 }
690 690
691 691 if ((ret = xenbus_scanf(xbt, "control", "sysrq", "%c", &key)) != 0) {
692 692 /*
693 693 * ENOENT happens in response to our own xenbus_rm.
694 694 * XXPV - this happens spuriously on boot?
695 695 */
696 696 if (ret != ENOENT)
697 697 cmn_err(CE_WARN, "failed to read sysrq: %d", ret);
698 698 goto out;
699 699 }
700 700
701 701 if ((ret = xenbus_rm(xbt, "control", "sysrq")) != 0) {
702 702 cmn_err(CE_WARN, "failed to reset sysrq: %d", ret);
703 703 goto out;
704 704 }
705 705
706 706 if (xenbus_transaction_end(xbt, 0) == EAGAIN)
707 707 goto retry;
708 708
709 709 /*
710 710 * Somewhat arbitrary - on Linux this means 'reboot'. We could just
711 711 * accept any key, but this might increase the risk of sending a
712 712 * harmless sysrq to the wrong domain...
713 713 */
714 714 if (key == 'b')
715 715 (void) xen_debug_handler(NULL);
716 716 else
717 717 cmn_err(CE_WARN, "Ignored sysrq %c", key);
718 718 return;
719 719
720 720 out:
721 721 (void) xenbus_transaction_end(xbt, 1);
722 722 }
723 723
724 724 taskq_t *xen_shutdown_tq;
725 725
726 726 #define SHUTDOWN_INVALID -1
727 727 #define SHUTDOWN_POWEROFF 0
728 728 #define SHUTDOWN_REBOOT 1
729 729 #define SHUTDOWN_SUSPEND 2
730 730 #define SHUTDOWN_HALT 3
731 731 #define SHUTDOWN_MAX 4
732 732
733 733 #define SHUTDOWN_TIMEOUT_SECS (60 * 5)
734 734
735 735 static const char *cmd_strings[SHUTDOWN_MAX] = {
736 736 "poweroff",
737 737 "reboot",
738 738 "suspend",
739 739 "halt"
740 740 };
741 741
742 742 static void
743 743 xen_dirty_shutdown(void *arg)
744 744 {
745 745 int cmd = (uintptr_t)arg;
746 746
747 747 cmn_err(CE_WARN, "Externally requested shutdown failed or "
748 748 "timed out.\nShutting down.\n");
749 749
750 750 switch (cmd) {
751 751 case SHUTDOWN_HALT:
752 752 case SHUTDOWN_POWEROFF:
753 753 (void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred);
754 754 break;
755 755 case SHUTDOWN_REBOOT:
756 756 (void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred);
757 757 break;
758 758 }
759 759 }
760 760
761 761 static void
762 762 xen_shutdown(void *arg)
763 763 {
764 764 int cmd = (uintptr_t)arg;
765 765 proc_t *initpp;
766 766
767 767 ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX);
768 768
769 769 if (cmd == SHUTDOWN_SUSPEND) {
770 770 xen_suspend_domain();
771 771 return;
772 772 }
773 773
774 774 switch (cmd) {
775 775 case SHUTDOWN_POWEROFF:
776 776 force_shutdown_method = AD_POWEROFF;
777 777 break;
778 778 case SHUTDOWN_HALT:
779 779 force_shutdown_method = AD_HALT;
780 780 break;
781 781 case SHUTDOWN_REBOOT:
782 782 force_shutdown_method = AD_BOOT;
783 783 break;
784 784 }
785 785
786 786 /*
787 787 * If we're still booting and init(1) isn't set up yet, simply halt.
788 788 */
789 789 mutex_enter(&pidlock);
790 790 initpp = prfind(P_INITPID);
791 791 mutex_exit(&pidlock);
792 792 if (initpp == NULL) {
↓ open down ↓ |
792 lines elided |
↑ open up ↑ |
793 793 extern void halt(char *);
794 794 halt("Power off the System"); /* just in case */
795 795 }
796 796
797 797 /*
798 798 * else, graceful shutdown with inittab and all getting involved
799 799 */
800 800 psignal(initpp, SIGPWR);
801 801
802 802 (void) timeout(xen_dirty_shutdown, arg,
803 - SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC));
803 + drv_sectohz(SHUTDOWN_TIMEOUT_SECS));
804 804 }
805 805
806 806 /*ARGSUSED*/
807 807 static void
808 808 xen_shutdown_handler(struct xenbus_watch *watch, const char **vec,
809 809 unsigned int len)
810 810 {
811 811 char *str;
812 812 xenbus_transaction_t xbt;
813 813 int err, shutdown_code = SHUTDOWN_INVALID;
814 814 unsigned int slen;
815 815
816 816 again:
817 817 err = xenbus_transaction_start(&xbt);
818 818 if (err)
819 819 return;
820 820 if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) {
821 821 (void) xenbus_transaction_end(xbt, 1);
822 822 return;
823 823 }
824 824
825 825 SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str);
826 826
827 827 /*
828 828 * If this is a watch fired from our write below, check out early to
829 829 * avoid an infinite loop.
830 830 */
831 831 if (strcmp(str, "") == 0) {
832 832 (void) xenbus_transaction_end(xbt, 0);
833 833 kmem_free(str, slen);
834 834 return;
835 835 } else if (strcmp(str, "poweroff") == 0) {
836 836 shutdown_code = SHUTDOWN_POWEROFF;
837 837 } else if (strcmp(str, "reboot") == 0) {
838 838 shutdown_code = SHUTDOWN_REBOOT;
839 839 } else if (strcmp(str, "suspend") == 0) {
840 840 shutdown_code = SHUTDOWN_SUSPEND;
841 841 } else if (strcmp(str, "halt") == 0) {
842 842 shutdown_code = SHUTDOWN_HALT;
843 843 } else {
844 844 printf("Ignoring shutdown request: %s\n", str);
845 845 }
846 846
847 847 /*
848 848 * XXPV Should we check the value of xenbus_write() too, or are all
849 849 * errors automatically folded into xenbus_transaction_end() ??
850 850 */
851 851 (void) xenbus_write(xbt, "control", "shutdown", "");
852 852 err = xenbus_transaction_end(xbt, 0);
853 853 if (err == EAGAIN) {
854 854 SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id);
855 855 kmem_free(str, slen);
856 856 goto again;
857 857 }
858 858
859 859 kmem_free(str, slen);
860 860 if (shutdown_code != SHUTDOWN_INVALID) {
861 861 (void) taskq_dispatch(xen_shutdown_tq, xen_shutdown,
862 862 (void *)(intptr_t)shutdown_code, 0);
863 863 }
864 864 }
865 865
866 866 static struct xenbus_watch shutdown_watch;
867 867 static struct xenbus_watch sysrq_watch;
868 868
869 869 void
870 870 xen_late_startup(void)
871 871 {
872 872 if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
873 873 xen_shutdown_tq = taskq_create("shutdown_taskq", 1,
874 874 maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
875 875 shutdown_watch.node = "control/shutdown";
876 876 shutdown_watch.callback = xen_shutdown_handler;
877 877 if (register_xenbus_watch(&shutdown_watch))
878 878 cmn_err(CE_WARN, "Failed to set shutdown watcher");
879 879
880 880 sysrq_watch.node = "control/sysrq";
881 881 sysrq_watch.callback = xen_sysrq_handler;
882 882 if (register_xenbus_watch(&sysrq_watch))
883 883 cmn_err(CE_WARN, "Failed to set sysrq watcher");
884 884 }
885 885 balloon_init(xen_info->nr_pages);
886 886 }
887 887
888 888 #ifdef DEBUG
889 889 #define XEN_PRINTF_BUFSIZE 1024
890 890
891 891 char xen_printf_buffer[XEN_PRINTF_BUFSIZE];
892 892
893 893 /*
894 894 * Printf function that calls hypervisor directly. For DomU it only
895 895 * works when running on a xen hypervisor built with debug on. Works
896 896 * always since no I/O ring interaction is needed.
897 897 */
898 898 /*PRINTFLIKE1*/
899 899 void
900 900 xen_printf(const char *fmt, ...)
901 901 {
902 902 va_list ap;
903 903
904 904 va_start(ap, fmt);
905 905 (void) vsnprintf(xen_printf_buffer, XEN_PRINTF_BUFSIZE, fmt, ap);
906 906 va_end(ap);
907 907
908 908 (void) HYPERVISOR_console_io(CONSOLEIO_write,
909 909 strlen(xen_printf_buffer), xen_printf_buffer);
910 910 }
911 911 #else
912 912 void
913 913 xen_printf(const char *fmt, ...)
914 914 {
915 915 }
916 916 #endif /* DEBUG */
917 917
918 918 void
919 919 startup_xen_version(void)
920 920 {
921 921 xen_set_version(XENVER_BOOT_IDX);
922 922 if (xen_hypervisor_supports_solaris(XEN_RUN_CHECK) == 0)
923 923 cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s "
924 924 "but need at least version v3.0.4",
925 925 XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor),
926 926 XENVER_CURRENT(xv_ver));
927 927 xen_pte_workaround();
928 928 }
929 929
930 930 int xen_mca_simulate_mc_physinfo_failure = 0;
931 931
932 932 void
933 933 startup_xen_mca(void)
934 934 {
935 935 if (!DOMAIN_IS_INITDOMAIN(xen_info))
936 936 return;
937 937
938 938 xen_phys_ncpus = 0;
939 939 xen_phys_cpus = NULL;
940 940
941 941 if (xen_mca_simulate_mc_physinfo_failure ||
942 942 xen_get_mc_physcpuinfo(NULL, &xen_phys_ncpus) != 0) {
943 943 cmn_err(CE_WARN,
944 944 "%sxen_get_mc_physinfo failure during xen MCA startup: "
945 945 "there will be no machine check support",
946 946 xen_mca_simulate_mc_physinfo_failure ? "(simulated) " : "");
947 947 return;
948 948 }
949 949
950 950 xen_phys_cpus = kmem_alloc(xen_phys_ncpus *
951 951 sizeof (xen_mc_logical_cpu_t), KM_NOSLEEP);
952 952
953 953 if (xen_phys_cpus == NULL) {
954 954 cmn_err(CE_WARN,
955 955 "xen_get_mc_physinfo failure: can't allocate CPU array");
956 956 return;
957 957 }
958 958
959 959 if (xen_get_mc_physcpuinfo(xen_phys_cpus, &xen_phys_ncpus) != 0) {
960 960 cmn_err(CE_WARN, "xen_get_mc_physinfo failure: no "
961 961 "physical CPU info");
962 962 kmem_free(xen_phys_cpus,
963 963 xen_phys_ncpus * sizeof (xen_mc_logical_cpu_t));
964 964 xen_phys_ncpus = 0;
965 965 xen_phys_cpus = NULL;
966 966 }
967 967
968 968 if (xen_physinfo_debug) {
969 969 xen_mc_logical_cpu_t *xcp;
970 970 unsigned i;
971 971
972 972 cmn_err(CE_NOTE, "xvm mca: %u physical cpus:\n",
973 973 xen_phys_ncpus);
974 974 for (i = 0; i < xen_phys_ncpus; i++) {
975 975 xcp = &xen_phys_cpus[i];
976 976 cmn_err(CE_NOTE, "cpu%u: (%u, %u, %u) apid %u",
977 977 xcp->mc_cpunr, xcp->mc_chipid, xcp->mc_coreid,
978 978 xcp->mc_threadid, xcp->mc_apicid);
979 979 }
980 980 }
981 981 }
982 982
983 983 /*
984 984 * Miscellaneous hypercall wrappers with slightly more verbose diagnostics.
985 985 */
986 986
987 987 void
988 988 xen_set_gdt(ulong_t *frame_list, int entries)
989 989 {
990 990 int err;
991 991 if ((err = HYPERVISOR_set_gdt(frame_list, entries)) != 0) {
992 992 /*
993 993 * X_EINVAL: reserved entry or bad frames
994 994 * X_EFAULT: bad address
995 995 */
996 996 panic("xen_set_gdt(%p, %d): error %d",
997 997 (void *)frame_list, entries, -(int)err);
998 998 }
999 999 }
1000 1000
1001 1001 void
1002 1002 xen_set_ldt(user_desc_t *ldt, uint_t nsels)
1003 1003 {
1004 1004 struct mmuext_op op;
1005 1005 long err;
1006 1006
1007 1007 op.cmd = MMUEXT_SET_LDT;
1008 1008 op.arg1.linear_addr = (uintptr_t)ldt;
1009 1009 op.arg2.nr_ents = nsels;
1010 1010
1011 1011 if ((err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) != 0) {
1012 1012 panic("xen_set_ldt(%p, %d): error %d",
1013 1013 (void *)ldt, nsels, -(int)err);
1014 1014 }
1015 1015 }
1016 1016
1017 1017 void
1018 1018 xen_stack_switch(ulong_t ss, ulong_t esp)
1019 1019 {
1020 1020 long err;
1021 1021
1022 1022 if ((err = HYPERVISOR_stack_switch(ss, esp)) != 0) {
1023 1023 /*
1024 1024 * X_EPERM: bad selector
1025 1025 */
1026 1026 panic("xen_stack_switch(%lx, %lx): error %d", ss, esp,
1027 1027 -(int)err);
1028 1028 }
1029 1029 }
1030 1030
1031 1031 long
1032 1032 xen_set_trap_table(trap_info_t *table)
1033 1033 {
1034 1034 long err;
1035 1035
1036 1036 if ((err = HYPERVISOR_set_trap_table(table)) != 0) {
1037 1037 /*
1038 1038 * X_EFAULT: bad address
1039 1039 * X_EPERM: bad selector
1040 1040 */
1041 1041 panic("xen_set_trap_table(%p): error %d", (void *)table,
1042 1042 -(int)err);
1043 1043 }
1044 1044 return (err);
1045 1045 }
1046 1046
1047 1047 #if defined(__amd64)
1048 1048 void
1049 1049 xen_set_segment_base(int reg, ulong_t value)
1050 1050 {
1051 1051 long err;
1052 1052
1053 1053 if ((err = HYPERVISOR_set_segment_base(reg, value)) != 0) {
1054 1054 /*
1055 1055 * X_EFAULT: bad address
1056 1056 * X_EINVAL: bad type
1057 1057 */
1058 1058 panic("xen_set_segment_base(%d, %lx): error %d",
1059 1059 reg, value, -(int)err);
1060 1060 }
1061 1061 }
1062 1062 #endif /* __amd64 */
1063 1063
1064 1064 /*
1065 1065 * Translate a hypervisor errcode to a Solaris error code.
1066 1066 */
1067 1067 int
1068 1068 xen_xlate_errcode(int error)
1069 1069 {
1070 1070 switch (-error) {
1071 1071
1072 1072 /*
1073 1073 * Translate hypervisor errno's into native errno's
1074 1074 */
1075 1075
1076 1076 #define CASE(num) case X_##num: error = num; break
1077 1077
1078 1078 CASE(EPERM); CASE(ENOENT); CASE(ESRCH);
1079 1079 CASE(EINTR); CASE(EIO); CASE(ENXIO);
1080 1080 CASE(E2BIG); CASE(ENOMEM); CASE(EACCES);
1081 1081 CASE(EFAULT); CASE(EBUSY); CASE(EEXIST);
1082 1082 CASE(ENODEV); CASE(EISDIR); CASE(EINVAL);
1083 1083 CASE(ENOSPC); CASE(ESPIPE); CASE(EROFS);
1084 1084 CASE(ENOSYS); CASE(ENOTEMPTY); CASE(EISCONN);
1085 1085 CASE(ENODATA); CASE(EAGAIN);
1086 1086
1087 1087 #undef CASE
1088 1088
1089 1089 default:
1090 1090 panic("xen_xlate_errcode: unknown error %d", error);
1091 1091 }
1092 1092
1093 1093 return (error);
1094 1094 }
1095 1095
1096 1096 /*
1097 1097 * Raise PS_IOPL on current vcpu to user level.
1098 1098 * Caller responsible for preventing kernel preemption.
1099 1099 */
1100 1100 void
1101 1101 xen_enable_user_iopl(void)
1102 1102 {
1103 1103 physdev_set_iopl_t set_iopl;
1104 1104 set_iopl.iopl = 3; /* user ring 3 */
1105 1105 (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1106 1106 }
1107 1107
1108 1108 /*
1109 1109 * Drop PS_IOPL on current vcpu to kernel level
1110 1110 */
1111 1111 void
1112 1112 xen_disable_user_iopl(void)
1113 1113 {
1114 1114 physdev_set_iopl_t set_iopl;
1115 1115 set_iopl.iopl = 1; /* kernel pseudo ring 1 */
1116 1116 (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1117 1117 }
1118 1118
1119 1119 int
1120 1120 xen_gdt_setprot(cpu_t *cp, uint_t prot)
1121 1121 {
1122 1122 int err;
1123 1123 #if defined(__amd64)
1124 1124 int pt_bits = PT_VALID;
1125 1125 if (prot & PROT_WRITE)
1126 1126 pt_bits |= PT_WRITABLE;
1127 1127 #endif
1128 1128
1129 1129 if ((err = as_setprot(&kas, (caddr_t)cp->cpu_gdt,
1130 1130 MMU_PAGESIZE, prot)) != 0)
1131 1131 goto done;
1132 1132
1133 1133 #if defined(__amd64)
1134 1134 err = xen_kpm_page(mmu_btop(cp->cpu_m.mcpu_gdtpa), pt_bits);
1135 1135 #endif
1136 1136
1137 1137 done:
1138 1138 if (err) {
1139 1139 cmn_err(CE_WARN, "cpu%d: xen_gdt_setprot(%s) failed: error %d",
1140 1140 cp->cpu_id, (prot & PROT_WRITE) ? "writable" : "read-only",
1141 1141 err);
1142 1142 }
1143 1143
1144 1144 return (err);
1145 1145 }
1146 1146
1147 1147 int
1148 1148 xen_ldt_setprot(user_desc_t *ldt, size_t lsize, uint_t prot)
1149 1149 {
1150 1150 int err;
1151 1151 caddr_t lva = (caddr_t)ldt;
1152 1152 #if defined(__amd64)
1153 1153 int pt_bits = PT_VALID;
1154 1154 pgcnt_t npgs;
1155 1155 if (prot & PROT_WRITE)
1156 1156 pt_bits |= PT_WRITABLE;
1157 1157 #endif /* __amd64 */
1158 1158
1159 1159 if ((err = as_setprot(&kas, (caddr_t)ldt, lsize, prot)) != 0)
1160 1160 goto done;
1161 1161
1162 1162 #if defined(__amd64)
1163 1163
1164 1164 ASSERT(IS_P2ALIGNED(lsize, PAGESIZE));
1165 1165 npgs = mmu_btop(lsize);
1166 1166 while (npgs--) {
1167 1167 if ((err = xen_kpm_page(hat_getpfnum(kas.a_hat, lva),
1168 1168 pt_bits)) != 0)
1169 1169 break;
1170 1170 lva += PAGESIZE;
1171 1171 }
1172 1172 #endif /* __amd64 */
1173 1173
1174 1174 done:
1175 1175 if (err) {
1176 1176 cmn_err(CE_WARN, "xen_ldt_setprot(%p, %s) failed: error %d",
1177 1177 (void *)lva,
1178 1178 (prot & PROT_WRITE) ? "writable" : "read-only", err);
1179 1179 }
1180 1180
1181 1181 return (err);
1182 1182 }
1183 1183
1184 1184 int
1185 1185 xen_get_mc_physcpuinfo(xen_mc_logical_cpu_t *log_cpus, uint_t *ncpus)
1186 1186 {
1187 1187 xen_mc_t xmc;
1188 1188 struct xen_mc_physcpuinfo *cpi = &xmc.u.mc_physcpuinfo;
1189 1189
1190 1190 cpi->ncpus = *ncpus;
1191 1191 /*LINTED: constant in conditional context*/
1192 1192 set_xen_guest_handle(cpi->info, log_cpus);
1193 1193
1194 1194 if (HYPERVISOR_mca(XEN_MC_physcpuinfo, &xmc) != 0)
1195 1195 return (-1);
1196 1196
1197 1197 *ncpus = cpi->ncpus;
1198 1198 return (0);
1199 1199 }
1200 1200
1201 1201 void
1202 1202 print_panic(const char *str)
1203 1203 {
1204 1204 xen_printf(str);
1205 1205 }
1206 1206
1207 1207 /*
1208 1208 * Interfaces to iterate over real cpu information, but only that info
1209 1209 * which we choose to expose here. These are of interest to dom0
1210 1210 * only (and the backing hypercall should not work for domu).
1211 1211 */
1212 1212
1213 1213 xen_mc_lcpu_cookie_t
1214 1214 xen_physcpu_next(xen_mc_lcpu_cookie_t cookie)
1215 1215 {
1216 1216 xen_mc_logical_cpu_t *xcp = (xen_mc_logical_cpu_t *)cookie;
1217 1217
1218 1218 if (!DOMAIN_IS_INITDOMAIN(xen_info))
1219 1219 return (NULL);
1220 1220
1221 1221 if (cookie == NULL)
1222 1222 return ((xen_mc_lcpu_cookie_t)xen_phys_cpus);
1223 1223
1224 1224 if (xcp == xen_phys_cpus + xen_phys_ncpus - 1)
1225 1225 return (NULL);
1226 1226 else
1227 1227 return ((xen_mc_lcpu_cookie_t)++xcp);
1228 1228 }
1229 1229
1230 1230 #define COOKIE2XCP(c) ((xen_mc_logical_cpu_t *)(c))
1231 1231
1232 1232 const char *
1233 1233 xen_physcpu_vendorstr(xen_mc_lcpu_cookie_t cookie)
1234 1234 {
1235 1235 xen_mc_logical_cpu_t *xcp = COOKIE2XCP(cookie);
1236 1236
1237 1237 return ((const char *)&xcp->mc_vendorid[0]);
1238 1238 }
1239 1239
1240 1240 int
1241 1241 xen_physcpu_family(xen_mc_lcpu_cookie_t cookie)
1242 1242 {
1243 1243 return (COOKIE2XCP(cookie)->mc_family);
1244 1244 }
1245 1245
1246 1246 int
1247 1247 xen_physcpu_model(xen_mc_lcpu_cookie_t cookie)
1248 1248 {
1249 1249 return (COOKIE2XCP(cookie)->mc_model);
1250 1250 }
1251 1251
1252 1252 int
1253 1253 xen_physcpu_stepping(xen_mc_lcpu_cookie_t cookie)
1254 1254 {
1255 1255 return (COOKIE2XCP(cookie)->mc_step);
1256 1256 }
1257 1257
1258 1258 id_t
1259 1259 xen_physcpu_chipid(xen_mc_lcpu_cookie_t cookie)
1260 1260 {
1261 1261 return (COOKIE2XCP(cookie)->mc_chipid);
1262 1262 }
1263 1263
1264 1264 id_t
1265 1265 xen_physcpu_coreid(xen_mc_lcpu_cookie_t cookie)
1266 1266 {
1267 1267 return (COOKIE2XCP(cookie)->mc_coreid);
1268 1268 }
1269 1269
1270 1270 id_t
1271 1271 xen_physcpu_strandid(xen_mc_lcpu_cookie_t cookie)
1272 1272 {
1273 1273 return (COOKIE2XCP(cookie)->mc_threadid);
1274 1274 }
1275 1275
1276 1276 id_t
1277 1277 xen_physcpu_initial_apicid(xen_mc_lcpu_cookie_t cookie)
1278 1278 {
1279 1279 return (COOKIE2XCP(cookie)->mc_clusterid);
1280 1280 }
1281 1281
1282 1282 id_t
1283 1283 xen_physcpu_logical_id(xen_mc_lcpu_cookie_t cookie)
1284 1284 {
1285 1285 return (COOKIE2XCP(cookie)->mc_cpunr);
1286 1286 }
1287 1287
1288 1288 boolean_t
1289 1289 xen_physcpu_is_cmt(xen_mc_lcpu_cookie_t cookie)
1290 1290 {
1291 1291 return (COOKIE2XCP(cookie)->mc_nthreads > 1);
1292 1292 }
1293 1293
1294 1294 uint64_t
1295 1295 xen_physcpu_mcg_cap(xen_mc_lcpu_cookie_t cookie)
1296 1296 {
1297 1297 xen_mc_logical_cpu_t *xcp = COOKIE2XCP(cookie);
1298 1298
1299 1299 /*
1300 1300 * Need to #define the indices, or search through the array.
1301 1301 */
1302 1302 return (xcp->mc_msrvalues[0].value);
1303 1303 }
1304 1304
1305 1305 int
1306 1306 xen_map_gref(uint_t cmd, gnttab_map_grant_ref_t *mapop, uint_t count,
1307 1307 boolean_t uvaddr)
1308 1308 {
1309 1309 long rc;
1310 1310 uint_t i;
1311 1311
1312 1312 ASSERT(cmd == GNTTABOP_map_grant_ref);
1313 1313
1314 1314 #if !defined(_BOOT)
1315 1315 if (uvaddr == B_FALSE) {
1316 1316 for (i = 0; i < count; ++i) {
1317 1317 mapop[i].flags |= (PT_FOREIGN <<_GNTMAP_guest_avail0);
1318 1318 }
1319 1319 }
1320 1320 #endif
1321 1321
1322 1322 rc = HYPERVISOR_grant_table_op(cmd, mapop, count);
1323 1323
1324 1324 return (rc);
1325 1325 }
1326 1326
1327 1327 static int
1328 1328 xpv_get_physinfo(xen_sysctl_physinfo_t *pi)
1329 1329 {
1330 1330 xen_sysctl_t op;
1331 1331 struct sp { void *p; } *sp = (struct sp *)&op.u.physinfo.cpu_to_node;
1332 1332 int ret;
1333 1333
1334 1334 bzero(&op, sizeof (op));
1335 1335 op.cmd = XEN_SYSCTL_physinfo;
1336 1336 op.interface_version = XEN_SYSCTL_INTERFACE_VERSION;
1337 1337 /*LINTED: constant in conditional context*/
1338 1338 set_xen_guest_handle(*sp, NULL);
1339 1339
1340 1340 ret = HYPERVISOR_sysctl(&op);
1341 1341
1342 1342 if (ret != 0)
1343 1343 return (xen_xlate_errcode(ret));
1344 1344
1345 1345 bcopy(&op.u.physinfo, pi, sizeof (op.u.physinfo));
1346 1346 return (0);
1347 1347 }
1348 1348
1349 1349 /*
1350 1350 * On dom0, we can determine the number of physical cpus on the machine.
1351 1351 * This number is important when figuring out what workarounds are
1352 1352 * appropriate, so compute it now.
1353 1353 */
1354 1354 uint_t
1355 1355 xpv_nr_phys_cpus(void)
1356 1356 {
1357 1357 static uint_t nphyscpus = 0;
1358 1358
1359 1359 ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1360 1360
1361 1361 if (nphyscpus == 0) {
1362 1362 xen_sysctl_physinfo_t pi;
1363 1363 int ret;
1364 1364
1365 1365 if ((ret = xpv_get_physinfo(&pi)) != 0)
1366 1366 panic("xpv_get_physinfo() failed: %d\n", ret);
1367 1367 nphyscpus = pi.nr_cpus;
1368 1368 }
1369 1369 return (nphyscpus);
1370 1370 }
1371 1371
1372 1372 pgcnt_t
1373 1373 xpv_nr_phys_pages(void)
1374 1374 {
1375 1375 xen_sysctl_physinfo_t pi;
1376 1376 int ret;
1377 1377
1378 1378 ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1379 1379
1380 1380 if ((ret = xpv_get_physinfo(&pi)) != 0)
1381 1381 panic("xpv_get_physinfo() failed: %d\n", ret);
1382 1382
1383 1383 return ((pgcnt_t)pi.total_pages);
1384 1384 }
1385 1385
1386 1386 uint64_t
1387 1387 xpv_cpu_khz(void)
1388 1388 {
1389 1389 xen_sysctl_physinfo_t pi;
1390 1390 int ret;
1391 1391
1392 1392 ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1393 1393
1394 1394 if ((ret = xpv_get_physinfo(&pi)) != 0)
1395 1395 panic("xpv_get_physinfo() failed: %d\n", ret);
1396 1396 return ((uint64_t)pi.cpu_khz);
1397 1397 }
↓ open down ↓ |
584 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX