Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/sparc/dtrace/fasttrap_isa.c
+++ new/usr/src/uts/sparc/dtrace/fasttrap_isa.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 -#pragma ident "%Z%%M% %I% %E% SMI"
28 -
29 27 #include <sys/fasttrap_isa.h>
30 28 #include <sys/fasttrap_impl.h>
31 29 #include <sys/dtrace.h>
32 30 #include <sys/dtrace_impl.h>
33 31 #include <sys/cmn_err.h>
34 32 #include <sys/frame.h>
35 33 #include <sys/stack.h>
36 34 #include <sys/sysmacros.h>
37 35 #include <sys/trap.h>
38 36
39 37 #include <v9/sys/machpcb.h>
40 38 #include <v9/sys/privregs.h>
41 39
42 40 /*
43 41 * Lossless User-Land Tracing on SPARC
44 42 * -----------------------------------
45 43 *
46 44 * The Basic Idea
47 45 *
48 46 * The most important design constraint is, of course, correct execution of
49 47 * the user thread above all else. The next most important goal is rapid
50 48 * execution. We combine execution of instructions in user-land with
51 49 * emulation of certain instructions in the kernel to aim for complete
52 50 * correctness and maximal performance.
53 51 *
54 52 * We take advantage of the split PC/NPC architecture to speed up logical
55 53 * single-stepping; when we copy an instruction out to the scratch space in
56 54 * the ulwp_t structure (held in the %g7 register on SPARC), we can
57 55 * effectively single step by setting the PC to our scratch space and leaving
58 56 * the NPC alone. This executes the replaced instruction and then continues
59 57 * on without having to reenter the kernel as with single- stepping. The
60 58 * obvious caveat is for instructions whose execution is PC dependant --
61 59 * branches, call and link instructions (call and jmpl), and the rdpc
62 60 * instruction. These instructions cannot be executed in the manner described
63 61 * so they must be emulated in the kernel.
64 62 *
65 63 * Emulation for this small set of instructions if fairly simple; the most
66 64 * difficult part being emulating branch conditions.
67 65 *
68 66 *
69 67 * A Cache Heavy Portfolio
70 68 *
71 69 * It's important to note at this time that copying an instruction out to the
72 70 * ulwp_t scratch space in user-land is rather complicated. SPARC has
73 71 * separate data and instruction caches so any writes to the D$ (using a
74 72 * store instruction for example) aren't necessarily reflected in the I$.
75 73 * The flush instruction can be used to synchronize the two and must be used
76 74 * for any self-modifying code, but the flush instruction only applies to the
77 75 * primary address space (the absence of a flusha analogue to the flush
78 76 * instruction that accepts an ASI argument is an obvious omission from SPARC
79 77 * v9 where the notion of the alternate address space was introduced on
80 78 * SPARC). To correctly copy out the instruction we must use a block store
81 79 * that doesn't allocate in the D$ and ensures synchronization with the I$;
82 80 * see dtrace_blksuword32() for the implementation (this function uses
83 81 * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner
84 82 * described). Refer to the UltraSPARC I/II manual for details on the
85 83 * ASI_BLK_COMMIT_S ASI.
86 84 *
87 85 *
88 86 * Return Subtleties
89 87 *
90 88 * When we're firing a return probe we need to expose the value returned by
91 89 * the function being traced. Since the function can set the return value
92 90 * in its last instruction, we need to fire the return probe only _after_
93 91 * the effects of the instruction are apparent. For instructions that we
94 92 * emulate, we can call dtrace_probe() after we've performed the emulation;
95 93 * for instructions that we execute after we return to user-land, we set
96 94 * %pc to the instruction we copied out (as described above) and set %npc
97 95 * to a trap instruction stashed in the ulwp_t structure. After the traced
98 96 * instruction is executed, the trap instruction returns control to the
99 97 * kernel where we can fire the return probe.
100 98 *
101 99 * This need for a second trap in cases where we execute the traced
102 100 * instruction makes it all the more important to emulate the most common
103 101 * instructions to avoid the second trip in and out of the kernel.
104 102 *
105 103 *
106 104 * Making it Fast
107 105 *
108 106 * Since copying out an instruction is neither simple nor inexpensive for the
109 107 * CPU, we should attempt to avoid doing it in as many cases as possible.
110 108 * Since function entry and return are usually the most interesting probe
111 109 * sites, we attempt to tune the performance of the fasttrap provider around
112 110 * instructions typically in those places.
113 111 *
114 112 * Looking at a bunch of functions in libraries and executables reveals that
115 113 * most functions begin with either a save or a sethi (to setup a larger
116 114 * argument to the save) and end with a restore or an or (in the case of leaf
117 115 * functions). To try to improve performance, we emulate all of these
118 116 * instructions in the kernel.
119 117 *
120 118 * The save and restore instructions are a little tricky since they perform
121 119 * register window maniplulation. Rather than trying to tinker with the
122 120 * register windows from the kernel, we emulate the implicit add that takes
123 121 * place as part of those instructions and set the %pc to point to a simple
124 122 * save or restore we've hidden in the ulwp_t structure. If we're in a return
125 123 * probe so want to make it seem as though the tracepoint has been completely
126 124 * executed we need to remember that we've pulled this trick with restore and
127 125 * pull registers from the previous window (the one that we'll switch to once
128 126 * the simple store instruction is executed) rather than the current one. This
129 127 * is why in the case of emulating a restore we set the DTrace CPU flag
130 128 * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes
131 129 * (see fasttrap_return_common()).
132 130 */
133 131
134 132 #define OP(x) ((x) >> 30)
135 133 #define OP2(x) (((x) >> 22) & 0x07)
136 134 #define OP3(x) (((x) >> 19) & 0x3f)
137 135 #define RCOND(x) (((x) >> 25) & 0x07)
138 136 #define COND(x) (((x) >> 25) & 0x0f)
139 137 #define A(x) (((x) >> 29) & 0x01)
140 138 #define I(x) (((x) >> 13) & 0x01)
141 139 #define RD(x) (((x) >> 25) & 0x1f)
142 140 #define RS1(x) (((x) >> 14) & 0x1f)
143 141 #define RS2(x) (((x) >> 0) & 0x1f)
144 142 #define CC(x) (((x) >> 20) & 0x03)
145 143 #define DISP16(x) ((((x) >> 6) & 0xc000) | ((x) & 0x3fff))
146 144 #define DISP22(x) ((x) & 0x3fffff)
147 145 #define DISP19(x) ((x) & 0x7ffff)
148 146 #define DISP30(x) ((x) & 0x3fffffff)
149 147 #define SW_TRAP(x) ((x) & 0x7f)
150 148
151 149 #define OP3_OR 0x02
152 150 #define OP3_RD 0x28
153 151 #define OP3_JMPL 0x38
154 152 #define OP3_RETURN 0x39
155 153 #define OP3_TCC 0x3a
156 154 #define OP3_SAVE 0x3c
157 155 #define OP3_RESTORE 0x3d
158 156
159 157 #define OP3_PREFETCH 0x2d
160 158 #define OP3_CASA 0x3c
161 159 #define OP3_PREFETCHA 0x3d
162 160 #define OP3_CASXA 0x3e
163 161
164 162 #define OP2_ILLTRAP 0x0
165 163 #define OP2_BPcc 0x1
166 164 #define OP2_Bicc 0x2
167 165 #define OP2_BPr 0x3
168 166 #define OP2_SETHI 0x4
169 167 #define OP2_FBPfcc 0x5
170 168 #define OP2_FBfcc 0x6
171 169
172 170 #define R_G0 0
173 171 #define R_O0 8
174 172 #define R_SP 14
175 173 #define R_I0 24
176 174 #define R_I1 25
177 175 #define R_I2 26
178 176 #define R_I3 27
179 177 #define R_I4 28
180 178
181 179 /*
182 180 * Check the comment in fasttrap.h when changing these offsets or adding
183 181 * new instructions.
184 182 */
185 183 #define FASTTRAP_OFF_SAVE 64
186 184 #define FASTTRAP_OFF_RESTORE 68
187 185 #define FASTTRAP_OFF_FTRET 72
188 186 #define FASTTRAP_OFF_RETURN 76
189 187
190 188 #define BREAKPOINT_INSTR 0x91d02001 /* ta 1 */
191 189
192 190 /*
193 191 * Tunable to let users turn off the fancy save instruction optimization.
194 192 * If a program is non-ABI compliant, there's a possibility that the save
195 193 * instruction optimization could cause an error.
196 194 */
197 195 int fasttrap_optimize_save = 1;
198 196
199 197 static uint64_t
200 198 fasttrap_anarg(struct regs *rp, int argno)
201 199 {
202 200 uint64_t value;
203 201
204 202 if (argno < 6)
205 203 return ((&rp->r_o0)[argno]);
206 204
207 205 if (curproc->p_model == DATAMODEL_NATIVE) {
208 206 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
209 207
210 208 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
211 209 value = dtrace_fulword(&fr->fr_argd[argno]);
212 210 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
213 211 CPU_DTRACE_BADALIGN);
214 212 } else {
215 213 struct frame32 *fr = (struct frame32 *)rp->r_sp;
216 214
217 215 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
218 216 value = dtrace_fuword32(&fr->fr_argd[argno]);
219 217 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
220 218 CPU_DTRACE_BADALIGN);
221 219 }
222 220
223 221 return (value);
224 222 }
225 223
226 224 static ulong_t fasttrap_getreg(struct regs *, uint_t);
227 225 static void fasttrap_putreg(struct regs *, uint_t, ulong_t);
228 226
229 227 static void
230 228 fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp,
231 229 uint_t fake_restore, int argc, uintptr_t *argv)
232 230 {
233 231 int i, x, cap = MIN(argc, probe->ftp_nargs);
234 232 int inc = (fake_restore ? 16 : 0);
235 233
236 234 /*
237 235 * The only way we'll hit the fake_restore case is if a USDT probe is
238 236 * invoked as a tail-call. While it wouldn't be incorrect, we can
239 237 * avoid a call to fasttrap_getreg(), and safely use rp->r_sp
240 238 * directly since a tail-call can't be made if the invoked function
241 239 * would use the argument dump space (i.e. if there were more than
242 240 * 6 arguments). We take this shortcut because unconditionally rooting
243 241 * around for R_FP (R_SP + 16) would be unnecessarily painful.
244 242 */
245 243
246 244 if (curproc->p_model == DATAMODEL_NATIVE) {
247 245 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
248 246 uintptr_t v;
249 247
250 248 for (i = 0; i < cap; i++) {
251 249 x = probe->ftp_argmap[i];
252 250
253 251 if (x < 6)
254 252 argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
255 253 else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0)
256 254 argv[i] = 0;
257 255 }
258 256
259 257 } else {
260 258 struct frame32 *fr = (struct frame32 *)rp->r_sp;
261 259 uint32_t v;
262 260
263 261 for (i = 0; i < cap; i++) {
264 262 x = probe->ftp_argmap[i];
265 263
266 264 if (x < 6)
267 265 argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
268 266 else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0)
269 267 argv[i] = 0;
270 268 }
271 269 }
272 270
273 271 for (; i < argc; i++) {
274 272 argv[i] = 0;
275 273 }
276 274 }
277 275
278 276 static void
279 277 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
280 278 uint_t fake_restore)
281 279 {
282 280 fasttrap_tracepoint_t *tp;
283 281 fasttrap_bucket_t *bucket;
284 282 fasttrap_id_t *id;
285 283 kmutex_t *pid_mtx;
286 284 dtrace_icookie_t cookie;
287 285
288 286 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
289 287 mutex_enter(pid_mtx);
290 288 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
291 289
292 290 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
293 291 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
294 292 tp->ftt_proc->ftpc_acount != 0)
295 293 break;
296 294 }
297 295
298 296 /*
299 297 * Don't sweat it if we can't find the tracepoint again; unlike
300 298 * when we're in fasttrap_pid_probe(), finding the tracepoint here
301 299 * is not essential to the correct execution of the process.
302 300 */
303 301 if (tp == NULL || tp->ftt_retids == NULL) {
304 302 mutex_exit(pid_mtx);
305 303 return;
306 304 }
307 305
308 306 for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
309 307 fasttrap_probe_t *probe = id->fti_probe;
310 308
311 309 if (id->fti_ptype == DTFTP_POST_OFFSETS) {
312 310 if (probe->ftp_argmap != NULL && fake_restore) {
313 311 uintptr_t t[5];
314 312
315 313 fasttrap_usdt_args(probe, rp, fake_restore,
316 314 sizeof (t) / sizeof (t[0]), t);
317 315
318 316 cookie = dtrace_interrupt_disable();
319 317 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
320 318 dtrace_probe(probe->ftp_id, t[0], t[1],
321 319 t[2], t[3], t[4]);
322 320 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
323 321 dtrace_interrupt_enable(cookie);
324 322
325 323 } else if (probe->ftp_argmap != NULL) {
326 324 uintptr_t t[5];
327 325
328 326 fasttrap_usdt_args(probe, rp, fake_restore,
329 327 sizeof (t) / sizeof (t[0]), t);
330 328
331 329 dtrace_probe(probe->ftp_id, t[0], t[1],
332 330 t[2], t[3], t[4]);
333 331
334 332 } else if (fake_restore) {
335 333 uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
336 334 uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
337 335 uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
338 336 uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
339 337 uintptr_t arg4 = fasttrap_getreg(rp, R_I4);
340 338
341 339 cookie = dtrace_interrupt_disable();
342 340 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
343 341 dtrace_probe(probe->ftp_id, arg0, arg1,
344 342 arg2, arg3, arg4);
345 343 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
346 344 dtrace_interrupt_enable(cookie);
347 345
348 346 } else {
349 347 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1,
350 348 rp->r_o2, rp->r_o3, rp->r_o4);
351 349 }
352 350
353 351 continue;
354 352 }
355 353
356 354 /*
357 355 * If this is only a possible return point, we must
358 356 * be looking at a potential tail call in leaf context.
359 357 * If the %npc is still within this function, then we
360 358 * must have misidentified a jmpl as a tail-call when it
361 359 * is, in fact, part of a jump table. It would be nice to
362 360 * remove this tracepoint, but this is neither the time
363 361 * nor the place.
364 362 */
365 363 if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) &&
366 364 rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
367 365 continue;
368 366
369 367 /*
370 368 * It's possible for a function to branch to the delay slot
371 369 * of an instruction that we've identified as a return site.
372 370 * We can dectect this spurious return probe activation by
373 371 * observing that in this case %npc will be %pc + 4 and %npc
374 372 * will be inside the current function (unless the user is
375 373 * doing _crazy_ instruction picking in which case there's
376 374 * very little we can do). The second check is important
377 375 * in case the last instructions of a function make a tail-
378 376 * call to the function located immediately subsequent.
379 377 */
380 378 if (rp->r_npc == rp->r_pc + 4 &&
381 379 rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
382 380 continue;
383 381
384 382 /*
385 383 * The first argument is the offset of return tracepoint
386 384 * in the function; the remaining arguments are the return
387 385 * values.
388 386 *
389 387 * If fake_restore is set, we need to pull the return values
390 388 * out of the %i's rather than the %o's -- a little trickier.
391 389 */
392 390 if (!fake_restore) {
393 391 dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
394 392 rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3);
395 393 } else {
396 394 uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
397 395 uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
398 396 uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
399 397 uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
400 398
401 399 cookie = dtrace_interrupt_disable();
402 400 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
403 401 dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
404 402 arg0, arg1, arg2, arg3);
405 403 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
406 404 dtrace_interrupt_enable(cookie);
407 405 }
408 406 }
409 407
410 408 mutex_exit(pid_mtx);
411 409 }
412 410
413 411 int
414 412 fasttrap_pid_probe(struct regs *rp)
415 413 {
416 414 proc_t *p = curproc;
417 415 fasttrap_tracepoint_t *tp, tp_local;
418 416 fasttrap_id_t *id;
419 417 pid_t pid;
420 418 uintptr_t pc = rp->r_pc;
421 419 uintptr_t npc = rp->r_npc;
422 420 uintptr_t orig_pc = pc;
423 421 fasttrap_bucket_t *bucket;
424 422 kmutex_t *pid_mtx;
425 423 uint_t fake_restore = 0, is_enabled = 0;
426 424 dtrace_icookie_t cookie;
427 425
428 426 /*
429 427 * It's possible that a user (in a veritable orgy of bad planning)
430 428 * could redirect this thread's flow of control before it reached the
431 429 * return probe fasttrap. In this case we need to kill the process
432 430 * since it's in a unrecoverable state.
433 431 */
434 432 if (curthread->t_dtrace_step) {
435 433 ASSERT(curthread->t_dtrace_on);
436 434 fasttrap_sigtrap(p, curthread, pc);
437 435 return (0);
438 436 }
439 437
440 438 /*
441 439 * Clear all user tracing flags.
442 440 */
443 441 curthread->t_dtrace_ft = 0;
444 442 curthread->t_dtrace_pc = 0;
445 443 curthread->t_dtrace_npc = 0;
446 444 curthread->t_dtrace_scrpc = 0;
447 445 curthread->t_dtrace_astpc = 0;
448 446
449 447 /*
450 448 * Treat a child created by a call to vfork(2) as if it were its
451 449 * parent. We know that there's only one thread of control in such a
452 450 * process: this one.
453 451 */
454 452 while (p->p_flag & SVFORK) {
455 453 p = p->p_parent;
456 454 }
457 455
458 456 pid = p->p_pid;
459 457 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
460 458 mutex_enter(pid_mtx);
461 459 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
462 460
463 461 /*
464 462 * Lookup the tracepoint that the process just hit.
465 463 */
466 464 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
467 465 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
468 466 tp->ftt_proc->ftpc_acount != 0)
469 467 break;
470 468 }
471 469
472 470 /*
473 471 * If we couldn't find a matching tracepoint, either a tracepoint has
474 472 * been inserted without using the pid<pid> ioctl interface (see
475 473 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
476 474 */
477 475 if (tp == NULL) {
478 476 mutex_exit(pid_mtx);
479 477 return (-1);
480 478 }
481 479
482 480 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
483 481 fasttrap_probe_t *probe = id->fti_probe;
484 482 int isentry = (id->fti_ptype == DTFTP_ENTRY);
485 483
486 484 if (id->fti_ptype == DTFTP_IS_ENABLED) {
487 485 is_enabled = 1;
488 486 continue;
489 487 }
490 488
491 489 /*
492 490 * We note that this was an entry probe to help ustack() find
493 491 * the first caller.
494 492 */
495 493 if (isentry) {
496 494 cookie = dtrace_interrupt_disable();
497 495 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
498 496 }
499 497 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2,
500 498 rp->r_o3, rp->r_o4);
501 499 if (isentry) {
502 500 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
503 501 dtrace_interrupt_enable(cookie);
504 502 }
505 503 }
506 504
507 505 /*
508 506 * We're about to do a bunch of work so we cache a local copy of
509 507 * the tracepoint to emulate the instruction, and then find the
510 508 * tracepoint again later if we need to light up any return probes.
511 509 */
512 510 tp_local = *tp;
513 511 mutex_exit(pid_mtx);
514 512 tp = &tp_local;
515 513
516 514 /*
517 515 * If there's an is-enabled probe conntected to this tracepoint it
518 516 * means that there was a 'mov %g0, %o0' instruction that was placed
519 517 * there by DTrace when the binary was linked. As this probe is, in
520 518 * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can
521 519 * bypass all the instruction emulation logic since we know the
522 520 * inevitable result. It's possible that a user could construct a
523 521 * scenario where the 'is-enabled' probe was on some other
524 522 * instruction, but that would be a rather exotic way to shoot oneself
525 523 * in the foot.
526 524 */
527 525 if (is_enabled) {
528 526 rp->r_o0 = 1;
529 527 pc = rp->r_npc;
530 528 npc = pc + 4;
531 529 goto done;
532 530 }
533 531
534 532 /*
535 533 * We emulate certain types of instructions to ensure correctness
536 534 * (in the case of position dependent instructions) or optimize
537 535 * common cases. The rest we have the thread execute back in user-
538 536 * land.
539 537 */
540 538 switch (tp->ftt_type) {
541 539 case FASTTRAP_T_SAVE:
542 540 {
543 541 int32_t imm;
544 542
545 543 /*
546 544 * This an optimization to let us handle function entry
547 545 * probes more efficiently. Many functions begin with a save
548 546 * instruction that follows the pattern:
549 547 * save %sp, <imm>, %sp
550 548 *
551 549 * Meanwhile, we've stashed the instruction:
552 550 * save %g1, %g0, %sp
553 551 *
554 552 * off of %g7, so all we have to do is stick the right value
555 553 * into %g1 and reset %pc to point to the instruction we've
556 554 * cleverly hidden (%npc should not be touched).
557 555 */
558 556
559 557 imm = tp->ftt_instr << 19;
560 558 imm >>= 19;
561 559 rp->r_g1 = rp->r_sp + imm;
562 560 pc = rp->r_g7 + FASTTRAP_OFF_SAVE;
563 561 break;
564 562 }
565 563
566 564 case FASTTRAP_T_RESTORE:
567 565 {
568 566 ulong_t value;
569 567 uint_t rd;
570 568
571 569 /*
572 570 * This is an optimization to let us handle function
573 571 * return probes more efficiently. Most non-leaf functions
574 572 * end with the sequence:
575 573 * ret
576 574 * restore <reg>, <reg_or_imm>, %oX
577 575 *
578 576 * We've stashed the instruction:
579 577 * restore %g0, %g0, %g0
580 578 *
581 579 * off of %g7 so we just need to place the correct value
582 580 * in the right %i register (since after our fake-o
583 581 * restore, the %i's will become the %o's) and set the %pc
584 582 * to point to our hidden restore. We also set fake_restore to
585 583 * let fasttrap_return_common() know that it will find the
586 584 * return values in the %i's rather than the %o's.
587 585 */
588 586
589 587 if (I(tp->ftt_instr)) {
590 588 int32_t imm;
591 589
592 590 imm = tp->ftt_instr << 19;
593 591 imm >>= 19;
594 592 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
595 593 } else {
596 594 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
597 595 fasttrap_getreg(rp, RS2(tp->ftt_instr));
598 596 }
599 597
600 598 /*
601 599 * Convert %o's to %i's; leave %g's as they are.
602 600 */
603 601 rd = RD(tp->ftt_instr);
604 602 fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value);
605 603
606 604 pc = rp->r_g7 + FASTTRAP_OFF_RESTORE;
607 605 fake_restore = 1;
608 606 break;
609 607 }
610 608
611 609 case FASTTRAP_T_RETURN:
612 610 {
613 611 uintptr_t target;
614 612
615 613 /*
616 614 * A return instruction is like a jmpl (without the link
617 615 * part) that executes an implicit restore. We've stashed
618 616 * the instruction:
619 617 * return %o0
620 618 *
621 619 * off of %g7 so we just need to place the target in %o0
622 620 * and set the %pc to point to the stashed return instruction.
623 621 * We use %o0 since that register disappears after the return
624 622 * executes, erasing any evidence of this tampering.
625 623 */
626 624 if (I(tp->ftt_instr)) {
627 625 int32_t imm;
628 626
629 627 imm = tp->ftt_instr << 19;
630 628 imm >>= 19;
631 629 target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
632 630 } else {
633 631 target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
634 632 fasttrap_getreg(rp, RS2(tp->ftt_instr));
635 633 }
636 634
637 635 fasttrap_putreg(rp, R_O0, target);
638 636
639 637 pc = rp->r_g7 + FASTTRAP_OFF_RETURN;
640 638 fake_restore = 1;
641 639 break;
642 640 }
643 641
644 642 case FASTTRAP_T_OR:
645 643 {
646 644 ulong_t value;
647 645
648 646 if (I(tp->ftt_instr)) {
649 647 int32_t imm;
650 648
651 649 imm = tp->ftt_instr << 19;
652 650 imm >>= 19;
653 651 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm;
654 652 } else {
655 653 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) |
656 654 fasttrap_getreg(rp, RS2(tp->ftt_instr));
657 655 }
658 656
659 657 fasttrap_putreg(rp, RD(tp->ftt_instr), value);
660 658 pc = rp->r_npc;
661 659 npc = pc + 4;
662 660 break;
663 661 }
664 662
665 663 case FASTTRAP_T_SETHI:
666 664 if (RD(tp->ftt_instr) != R_G0) {
667 665 uint32_t imm32 = tp->ftt_instr << 10;
668 666 fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32);
669 667 }
670 668 pc = rp->r_npc;
671 669 npc = pc + 4;
672 670 break;
673 671
674 672 case FASTTRAP_T_CCR:
675 673 {
676 674 uint_t c, v, z, n, taken;
677 675 uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT;
678 676
679 677 if (tp->ftt_cc != 0)
680 678 ccr >>= 4;
681 679
682 680 c = (ccr >> 0) & 1;
683 681 v = (ccr >> 1) & 1;
684 682 z = (ccr >> 2) & 1;
685 683 n = (ccr >> 3) & 1;
686 684
687 685 switch (tp->ftt_code) {
688 686 case 0x0: /* BN */
689 687 taken = 0; break;
690 688 case 0x1: /* BE */
691 689 taken = z; break;
692 690 case 0x2: /* BLE */
693 691 taken = z | (n ^ v); break;
694 692 case 0x3: /* BL */
695 693 taken = n ^ v; break;
696 694 case 0x4: /* BLEU */
697 695 taken = c | z; break;
698 696 case 0x5: /* BCS (BLU) */
699 697 taken = c; break;
700 698 case 0x6: /* BNEG */
701 699 taken = n; break;
702 700 case 0x7: /* BVS */
703 701 taken = v; break;
704 702 case 0x8: /* BA */
705 703 /*
706 704 * We handle the BA case differently since the annul
707 705 * bit means something slightly different.
708 706 */
709 707 panic("fasttrap: mishandled a branch");
710 708 taken = 1; break;
711 709 case 0x9: /* BNE */
712 710 taken = ~z; break;
713 711 case 0xa: /* BG */
714 712 taken = ~(z | (n ^ v)); break;
715 713 case 0xb: /* BGE */
716 714 taken = ~(n ^ v); break;
717 715 case 0xc: /* BGU */
718 716 taken = ~(c | z); break;
719 717 case 0xd: /* BCC (BGEU) */
720 718 taken = ~c; break;
721 719 case 0xe: /* BPOS */
722 720 taken = ~n; break;
723 721 case 0xf: /* BVC */
724 722 taken = ~v; break;
725 723 }
726 724
727 725 if (taken & 1) {
728 726 pc = rp->r_npc;
729 727 npc = tp->ftt_dest;
730 728 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
731 729 /*
732 730 * Untaken annulled branches don't execute the
733 731 * instruction in the delay slot.
734 732 */
735 733 pc = rp->r_npc + 4;
736 734 npc = pc + 4;
737 735 } else {
738 736 pc = rp->r_npc;
739 737 npc = pc + 4;
740 738 }
741 739 break;
742 740 }
743 741
744 742 case FASTTRAP_T_FCC:
745 743 {
746 744 uint_t fcc;
747 745 uint_t taken;
748 746 uint64_t fsr;
749 747
750 748 dtrace_getfsr(&fsr);
751 749
752 750 if (tp->ftt_cc == 0) {
753 751 fcc = (fsr >> 10) & 0x3;
754 752 } else {
755 753 uint_t shift;
756 754 ASSERT(tp->ftt_cc <= 3);
757 755 shift = 30 + tp->ftt_cc * 2;
758 756 fcc = (fsr >> shift) & 0x3;
759 757 }
760 758
761 759 switch (tp->ftt_code) {
762 760 case 0x0: /* FBN */
763 761 taken = (1 << fcc) & (0|0|0|0); break;
764 762 case 0x1: /* FBNE */
765 763 taken = (1 << fcc) & (8|4|2|0); break;
766 764 case 0x2: /* FBLG */
767 765 taken = (1 << fcc) & (0|4|2|0); break;
768 766 case 0x3: /* FBUL */
769 767 taken = (1 << fcc) & (8|0|2|0); break;
770 768 case 0x4: /* FBL */
771 769 taken = (1 << fcc) & (0|0|2|0); break;
772 770 case 0x5: /* FBUG */
773 771 taken = (1 << fcc) & (8|4|0|0); break;
774 772 case 0x6: /* FBG */
775 773 taken = (1 << fcc) & (0|4|0|0); break;
776 774 case 0x7: /* FBU */
777 775 taken = (1 << fcc) & (8|0|0|0); break;
778 776 case 0x8: /* FBA */
779 777 /*
780 778 * We handle the FBA case differently since the annul
781 779 * bit means something slightly different.
782 780 */
783 781 panic("fasttrap: mishandled a branch");
784 782 taken = (1 << fcc) & (8|4|2|1); break;
785 783 case 0x9: /* FBE */
786 784 taken = (1 << fcc) & (0|0|0|1); break;
787 785 case 0xa: /* FBUE */
788 786 taken = (1 << fcc) & (8|0|0|1); break;
789 787 case 0xb: /* FBGE */
790 788 taken = (1 << fcc) & (0|4|0|1); break;
791 789 case 0xc: /* FBUGE */
792 790 taken = (1 << fcc) & (8|4|0|1); break;
793 791 case 0xd: /* FBLE */
794 792 taken = (1 << fcc) & (0|0|2|1); break;
795 793 case 0xe: /* FBULE */
796 794 taken = (1 << fcc) & (8|0|2|1); break;
797 795 case 0xf: /* FBO */
798 796 taken = (1 << fcc) & (0|4|2|1); break;
799 797 }
800 798
801 799 if (taken) {
802 800 pc = rp->r_npc;
803 801 npc = tp->ftt_dest;
804 802 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
805 803 /*
806 804 * Untaken annulled branches don't execute the
807 805 * instruction in the delay slot.
808 806 */
809 807 pc = rp->r_npc + 4;
810 808 npc = pc + 4;
811 809 } else {
812 810 pc = rp->r_npc;
813 811 npc = pc + 4;
814 812 }
815 813 break;
816 814 }
817 815
818 816 case FASTTRAP_T_REG:
819 817 {
820 818 int64_t value;
821 819 uint_t taken;
822 820 uint_t reg = RS1(tp->ftt_instr);
823 821
824 822 /*
825 823 * An ILP32 process shouldn't be using a branch predicated on
826 824 * an %i or an %l since it would violate the ABI. It's a
827 825 * violation of the ABI because we can't ensure deterministic
828 826 * behavior. We should have identified this case when we
829 827 * enabled the probe.
830 828 */
831 829 ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16);
832 830
833 831 value = (int64_t)fasttrap_getreg(rp, reg);
834 832
835 833 switch (tp->ftt_code) {
836 834 case 0x1: /* BRZ */
837 835 taken = (value == 0); break;
838 836 case 0x2: /* BRLEZ */
839 837 taken = (value <= 0); break;
840 838 case 0x3: /* BRLZ */
841 839 taken = (value < 0); break;
842 840 case 0x5: /* BRNZ */
843 841 taken = (value != 0); break;
844 842 case 0x6: /* BRGZ */
845 843 taken = (value > 0); break;
846 844 case 0x7: /* BRGEZ */
847 845 taken = (value >= 0); break;
848 846 default:
849 847 case 0x0:
850 848 case 0x4:
851 849 panic("fasttrap: mishandled a branch");
852 850 }
853 851
854 852 if (taken) {
855 853 pc = rp->r_npc;
856 854 npc = tp->ftt_dest;
857 855 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
858 856 /*
859 857 * Untaken annulled branches don't execute the
860 858 * instruction in the delay slot.
861 859 */
862 860 pc = rp->r_npc + 4;
863 861 npc = pc + 4;
864 862 } else {
865 863 pc = rp->r_npc;
866 864 npc = pc + 4;
867 865 }
868 866 break;
869 867 }
870 868
871 869 case FASTTRAP_T_ALWAYS:
872 870 /*
873 871 * BAs, BA,As...
874 872 */
875 873
876 874 if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
877 875 /*
878 876 * Annulled branch always instructions never execute
879 877 * the instruction in the delay slot.
880 878 */
881 879 pc = tp->ftt_dest;
882 880 npc = tp->ftt_dest + 4;
883 881 } else {
884 882 pc = rp->r_npc;
885 883 npc = tp->ftt_dest;
886 884 }
887 885 break;
888 886
889 887 case FASTTRAP_T_RDPC:
890 888 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
891 889 pc = rp->r_npc;
892 890 npc = pc + 4;
893 891 break;
894 892
895 893 case FASTTRAP_T_CALL:
896 894 /*
897 895 * It's a call _and_ link remember...
898 896 */
899 897 rp->r_o7 = rp->r_pc;
900 898 pc = rp->r_npc;
901 899 npc = tp->ftt_dest;
902 900 break;
903 901
904 902 case FASTTRAP_T_JMPL:
905 903 pc = rp->r_npc;
906 904
907 905 if (I(tp->ftt_instr)) {
908 906 uint_t rs1 = RS1(tp->ftt_instr);
909 907 int32_t imm;
910 908
911 909 imm = tp->ftt_instr << 19;
912 910 imm >>= 19;
913 911 npc = fasttrap_getreg(rp, rs1) + imm;
914 912 } else {
915 913 uint_t rs1 = RS1(tp->ftt_instr);
916 914 uint_t rs2 = RS2(tp->ftt_instr);
917 915
918 916 npc = fasttrap_getreg(rp, rs1) +
919 917 fasttrap_getreg(rp, rs2);
920 918 }
921 919
922 920 /*
923 921 * Do the link part of the jump-and-link instruction.
924 922 */
925 923 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
926 924
927 925 break;
928 926
929 927 case FASTTRAP_T_COMMON:
930 928 {
931 929 curthread->t_dtrace_scrpc = rp->r_g7;
932 930 curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET;
933 931
934 932 /*
935 933 * Copy the instruction to a reserved location in the
936 934 * user-land thread structure, then set the PC to that
937 935 * location and leave the NPC alone. We take pains to ensure
938 936 * consistency in the instruction stream (See SPARC
939 937 * Architecture Manual Version 9, sections 8.4.7, A.20, and
940 938 * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1,
941 939 * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the
942 940 * instruction into the user's address space without
943 941 * bypassing the I$. There's no AS_USER version of this ASI
944 942 * (as exist for other ASIs) so we use the lofault
945 943 * mechanism to catch faults.
946 944 */
947 945 if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) {
948 946 /*
949 947 * If the copyout fails, then the process's state
950 948 * is not consistent (the effects of the traced
951 949 * instruction will never be seen). This process
952 950 * cannot be allowed to continue execution.
953 951 */
954 952 fasttrap_sigtrap(curproc, curthread, pc);
955 953 return (0);
956 954 }
957 955
958 956 curthread->t_dtrace_pc = pc;
959 957 curthread->t_dtrace_npc = npc;
960 958 curthread->t_dtrace_on = 1;
961 959
962 960 pc = curthread->t_dtrace_scrpc;
963 961
964 962 if (tp->ftt_retids != NULL) {
965 963 curthread->t_dtrace_step = 1;
966 964 curthread->t_dtrace_ret = 1;
967 965 npc = curthread->t_dtrace_astpc;
968 966 }
969 967 break;
970 968 }
971 969
972 970 default:
973 971 panic("fasttrap: mishandled an instruction");
974 972 }
975 973
976 974 /*
977 975 * This bit me in the ass a couple of times, so lets toss this
978 976 * in as a cursory sanity check.
979 977 */
980 978 ASSERT(pc != rp->r_g7 + 4);
981 979 ASSERT(pc != rp->r_g7 + 8);
982 980
983 981 done:
984 982 /*
985 983 * If there were no return probes when we first found the tracepoint,
986 984 * we should feel no obligation to honor any return probes that were
987 985 * subsequently enabled -- they'll just have to wait until the next
988 986 * time around.
989 987 */
990 988 if (tp->ftt_retids != NULL) {
991 989 /*
992 990 * We need to wait until the results of the instruction are
993 991 * apparent before invoking any return probes. If this
994 992 * instruction was emulated we can just call
995 993 * fasttrap_return_common(); if it needs to be executed, we
996 994 * need to wait until we return to the kernel.
997 995 */
998 996 if (tp->ftt_type != FASTTRAP_T_COMMON) {
999 997 fasttrap_return_common(rp, orig_pc, pid, fake_restore);
1000 998 } else {
1001 999 ASSERT(curthread->t_dtrace_ret != 0);
1002 1000 ASSERT(curthread->t_dtrace_pc == orig_pc);
1003 1001 ASSERT(curthread->t_dtrace_scrpc == rp->r_g7);
1004 1002 ASSERT(npc == curthread->t_dtrace_astpc);
1005 1003 }
1006 1004 }
1007 1005
1008 1006 ASSERT(pc != 0);
1009 1007 rp->r_pc = pc;
1010 1008 rp->r_npc = npc;
1011 1009
1012 1010 return (0);
1013 1011 }
1014 1012
1015 1013 int
1016 1014 fasttrap_return_probe(struct regs *rp)
1017 1015 {
1018 1016 proc_t *p = ttoproc(curthread);
1019 1017 pid_t pid;
1020 1018 uintptr_t pc = curthread->t_dtrace_pc;
1021 1019 uintptr_t npc = curthread->t_dtrace_npc;
1022 1020
1023 1021 curthread->t_dtrace_pc = 0;
1024 1022 curthread->t_dtrace_npc = 0;
1025 1023 curthread->t_dtrace_scrpc = 0;
1026 1024 curthread->t_dtrace_astpc = 0;
1027 1025
1028 1026 /*
1029 1027 * Treat a child created by a call to vfork(2) as if it were its
1030 1028 * parent. We know there's only one thread of control in such a
1031 1029 * process: this one.
1032 1030 */
1033 1031 while (p->p_flag & SVFORK) {
1034 1032 p = p->p_parent;
1035 1033 }
1036 1034
1037 1035 /*
1038 1036 * We set the %pc and %npc to their values when the traced
1039 1037 * instruction was initially executed so that it appears to
1040 1038 * dtrace_probe() that we're on the original instruction, and so that
1041 1039 * the user can't easily detect our complex web of lies.
1042 1040 * dtrace_return_probe() (our caller) will correctly set %pc and %npc
1043 1041 * after we return.
1044 1042 */
1045 1043 rp->r_pc = pc;
1046 1044 rp->r_npc = npc;
1047 1045
1048 1046 pid = p->p_pid;
1049 1047 fasttrap_return_common(rp, pc, pid, 0);
1050 1048
1051 1049 return (0);
1052 1050 }
1053 1051
1054 1052 int
1055 1053 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
1056 1054 {
1057 1055 fasttrap_instr_t instr = FASTTRAP_INSTR;
1058 1056
1059 1057 if (uwrite(p, &instr, 4, tp->ftt_pc) != 0)
1060 1058 return (-1);
1061 1059
1062 1060 return (0);
1063 1061 }
1064 1062
1065 1063 int
1066 1064 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
1067 1065 {
1068 1066 fasttrap_instr_t instr;
1069 1067
1070 1068 /*
1071 1069 * Distinguish between read or write failures and a changed
1072 1070 * instruction.
1073 1071 */
1074 1072 if (uread(p, &instr, 4, tp->ftt_pc) != 0)
1075 1073 return (0);
1076 1074 if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR)
1077 1075 return (0);
1078 1076 if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0)
1079 1077 return (-1);
1080 1078
1081 1079 return (0);
1082 1080 }
1083 1081
1084 1082 int
1085 1083 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
1086 1084 fasttrap_probe_type_t type)
1087 1085 {
1088 1086 uint32_t instr;
1089 1087 int32_t disp;
1090 1088
1091 1089 /*
1092 1090 * Read the instruction at the given address out of the process's
1093 1091 * address space. We don't have to worry about a debugger
1094 1092 * changing this instruction before we overwrite it with our trap
1095 1093 * instruction since P_PR_LOCK is set.
1096 1094 */
1097 1095 if (uread(p, &instr, 4, pc) != 0)
1098 1096 return (-1);
1099 1097
1100 1098 /*
1101 1099 * Decode the instruction to fill in the probe flags. We can have
1102 1100 * the process execute most instructions on its own using a pc/npc
1103 1101 * trick, but pc-relative control transfer present a problem since
1104 1102 * we're relocating the instruction. We emulate these instructions
1105 1103 * in the kernel. We assume a default type and over-write that as
1106 1104 * needed.
1107 1105 *
1108 1106 * pc-relative instructions must be emulated for correctness;
1109 1107 * other instructions (which represent a large set of commonly traced
1110 1108 * instructions) are emulated or otherwise optimized for performance.
1111 1109 */
1112 1110 tp->ftt_type = FASTTRAP_T_COMMON;
1113 1111 if (OP(instr) == 1) {
1114 1112 /*
1115 1113 * Call instructions.
1116 1114 */
1117 1115 tp->ftt_type = FASTTRAP_T_CALL;
1118 1116 disp = DISP30(instr) << 2;
1119 1117 tp->ftt_dest = pc + (intptr_t)disp;
1120 1118
1121 1119 } else if (OP(instr) == 0) {
1122 1120 /*
1123 1121 * Branch instructions.
1124 1122 *
1125 1123 * Unconditional branches need careful attention when they're
1126 1124 * annulled: annulled unconditional branches never execute
1127 1125 * the instruction in the delay slot.
1128 1126 */
1129 1127 switch (OP2(instr)) {
1130 1128 case OP2_ILLTRAP:
1131 1129 case 0x7:
1132 1130 /*
1133 1131 * The compiler may place an illtrap after a call to
1134 1132 * a function that returns a structure. In the case of
1135 1133 * a returned structure, the compiler places an illtrap
1136 1134 * whose const22 field is the size of the returned
1137 1135 * structure immediately following the delay slot of
1138 1136 * the call. To stay out of the way, we refuse to
1139 1137 * place tracepoints on top of illtrap instructions.
1140 1138 *
1141 1139 * This is one of the dumbest architectural decisions
1142 1140 * I've ever had to work around.
1143 1141 *
1144 1142 * We also identify the only illegal op2 value (See
1145 1143 * SPARC Architecture Manual Version 9, E.2 table 31).
1146 1144 */
1147 1145 return (-1);
1148 1146
1149 1147 case OP2_BPcc:
1150 1148 if (COND(instr) == 8) {
1151 1149 tp->ftt_type = FASTTRAP_T_ALWAYS;
1152 1150 } else {
1153 1151 /*
1154 1152 * Check for an illegal instruction.
1155 1153 */
1156 1154 if (CC(instr) & 1)
1157 1155 return (-1);
1158 1156 tp->ftt_type = FASTTRAP_T_CCR;
1159 1157 tp->ftt_cc = CC(instr);
1160 1158 tp->ftt_code = COND(instr);
1161 1159 }
1162 1160
1163 1161 if (A(instr) != 0)
1164 1162 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1165 1163
1166 1164 disp = DISP19(instr);
1167 1165 disp <<= 13;
1168 1166 disp >>= 11;
1169 1167 tp->ftt_dest = pc + (intptr_t)disp;
1170 1168 break;
1171 1169
1172 1170 case OP2_Bicc:
1173 1171 if (COND(instr) == 8) {
1174 1172 tp->ftt_type = FASTTRAP_T_ALWAYS;
1175 1173 } else {
1176 1174 tp->ftt_type = FASTTRAP_T_CCR;
1177 1175 tp->ftt_cc = 0;
1178 1176 tp->ftt_code = COND(instr);
1179 1177 }
1180 1178
1181 1179 if (A(instr) != 0)
1182 1180 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1183 1181
1184 1182 disp = DISP22(instr);
1185 1183 disp <<= 10;
1186 1184 disp >>= 8;
1187 1185 tp->ftt_dest = pc + (intptr_t)disp;
1188 1186 break;
1189 1187
1190 1188 case OP2_BPr:
1191 1189 /*
1192 1190 * Check for an illegal instruction.
1193 1191 */
1194 1192 if ((RCOND(instr) & 3) == 0)
1195 1193 return (-1);
1196 1194
1197 1195 /*
1198 1196 * It's a violation of the v8plus ABI to use a
1199 1197 * register-predicated branch in a 32-bit app if
1200 1198 * the register used is an %l or an %i (%gs and %os
1201 1199 * are legit because they're not saved to the stack
1202 1200 * in 32-bit words when we take a trap).
1203 1201 */
1204 1202 if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16)
1205 1203 return (-1);
1206 1204
1207 1205 tp->ftt_type = FASTTRAP_T_REG;
1208 1206 if (A(instr) != 0)
1209 1207 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1210 1208 disp = DISP16(instr);
1211 1209 disp <<= 16;
1212 1210 disp >>= 14;
1213 1211 tp->ftt_dest = pc + (intptr_t)disp;
1214 1212 tp->ftt_code = RCOND(instr);
1215 1213 break;
1216 1214
1217 1215 case OP2_SETHI:
1218 1216 tp->ftt_type = FASTTRAP_T_SETHI;
1219 1217 break;
1220 1218
1221 1219 case OP2_FBPfcc:
1222 1220 if (COND(instr) == 8) {
1223 1221 tp->ftt_type = FASTTRAP_T_ALWAYS;
1224 1222 } else {
1225 1223 tp->ftt_type = FASTTRAP_T_FCC;
1226 1224 tp->ftt_cc = CC(instr);
1227 1225 tp->ftt_code = COND(instr);
1228 1226 }
1229 1227
1230 1228 if (A(instr) != 0)
1231 1229 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1232 1230
1233 1231 disp = DISP19(instr);
1234 1232 disp <<= 13;
1235 1233 disp >>= 11;
1236 1234 tp->ftt_dest = pc + (intptr_t)disp;
1237 1235 break;
1238 1236
1239 1237 case OP2_FBfcc:
1240 1238 if (COND(instr) == 8) {
1241 1239 tp->ftt_type = FASTTRAP_T_ALWAYS;
1242 1240 } else {
1243 1241 tp->ftt_type = FASTTRAP_T_FCC;
1244 1242 tp->ftt_cc = 0;
1245 1243 tp->ftt_code = COND(instr);
1246 1244 }
1247 1245
1248 1246 if (A(instr) != 0)
1249 1247 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1250 1248
1251 1249 disp = DISP22(instr);
1252 1250 disp <<= 10;
1253 1251 disp >>= 8;
1254 1252 tp->ftt_dest = pc + (intptr_t)disp;
1255 1253 break;
1256 1254 }
1257 1255
1258 1256 } else if (OP(instr) == 2) {
1259 1257 switch (OP3(instr)) {
1260 1258 case OP3_RETURN:
1261 1259 tp->ftt_type = FASTTRAP_T_RETURN;
1262 1260 break;
1263 1261
1264 1262 case OP3_JMPL:
1265 1263 tp->ftt_type = FASTTRAP_T_JMPL;
1266 1264 break;
1267 1265
1268 1266 case OP3_RD:
1269 1267 if (RS1(instr) == 5)
1270 1268 tp->ftt_type = FASTTRAP_T_RDPC;
1271 1269 break;
1272 1270
1273 1271 case OP3_SAVE:
1274 1272 /*
1275 1273 * We optimize for save instructions at function
1276 1274 * entry; see the comment in fasttrap_pid_probe()
1277 1275 * (near FASTTRAP_T_SAVE) for details.
1278 1276 */
1279 1277 if (fasttrap_optimize_save != 0 &&
1280 1278 type == DTFTP_ENTRY &&
1281 1279 I(instr) == 1 && RD(instr) == R_SP)
1282 1280 tp->ftt_type = FASTTRAP_T_SAVE;
1283 1281 break;
1284 1282
1285 1283 case OP3_RESTORE:
1286 1284 /*
1287 1285 * We optimize restore instructions at function
1288 1286 * return; see the comment in fasttrap_pid_probe()
1289 1287 * (near FASTTRAP_T_RESTORE) for details.
1290 1288 *
1291 1289 * rd must be an %o or %g register.
1292 1290 */
1293 1291 if ((RD(instr) & 0x10) == 0)
1294 1292 tp->ftt_type = FASTTRAP_T_RESTORE;
1295 1293 break;
1296 1294
1297 1295 case OP3_OR:
1298 1296 /*
1299 1297 * A large proportion of instructions in the delay
1300 1298 * slot of retl instructions are or's so we emulate
1301 1299 * these downstairs as an optimization.
1302 1300 */
1303 1301 tp->ftt_type = FASTTRAP_T_OR;
1304 1302 break;
1305 1303
1306 1304 case OP3_TCC:
1307 1305 /*
1308 1306 * Breakpoint instructions are effectively position-
1309 1307 * dependent since the debugger uses the %pc value
1310 1308 * to lookup which breakpoint was executed. As a
1311 1309 * result, we can't actually instrument breakpoints.
1312 1310 */
1313 1311 if (SW_TRAP(instr) == ST_BREAKPOINT)
1314 1312 return (-1);
1315 1313 break;
1316 1314
1317 1315 case 0x19:
1318 1316 case 0x1d:
1319 1317 case 0x29:
1320 1318 case 0x33:
1321 1319 case 0x3f:
1322 1320 /*
1323 1321 * Identify illegal instructions (See SPARC
1324 1322 * Architecture Manual Version 9, E.2 table 32).
1325 1323 */
1326 1324 return (-1);
1327 1325 }
1328 1326 } else if (OP(instr) == 3) {
1329 1327 uint32_t op3 = OP3(instr);
1330 1328
1331 1329 /*
1332 1330 * Identify illegal instructions (See SPARC Architecture
1333 1331 * Manual Version 9, E.2 table 33).
1334 1332 */
1335 1333 if ((op3 & 0x28) == 0x28) {
1336 1334 if (op3 != OP3_PREFETCH && op3 != OP3_CASA &&
1337 1335 op3 != OP3_PREFETCHA && op3 != OP3_CASXA)
1338 1336 return (-1);
1339 1337 } else {
1340 1338 if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31)
1341 1339 return (-1);
1342 1340 }
1343 1341 }
1344 1342
1345 1343 tp->ftt_instr = instr;
1346 1344
1347 1345 /*
1348 1346 * We don't know how this tracepoint is going to be used, but in case
1349 1347 * it's used as part of a function return probe, we need to indicate
1350 1348 * whether it's always a return site or only potentially a return
1351 1349 * site. If it's part of a return probe, it's always going to be a
1352 1350 * return from that function if it's a restore instruction or if
1353 1351 * the previous instruction was a return. If we could reliably
1354 1352 * distinguish jump tables from return sites, this wouldn't be
1355 1353 * necessary.
1356 1354 */
1357 1355 if (tp->ftt_type != FASTTRAP_T_RESTORE &&
1358 1356 (uread(p, &instr, 4, pc - sizeof (instr)) != 0 ||
1359 1357 !(OP(instr) == 2 && OP3(instr) == OP3_RETURN)))
1360 1358 tp->ftt_flags |= FASTTRAP_F_RETMAYBE;
1361 1359
1362 1360 return (0);
1363 1361 }
1364 1362
1365 1363 /*ARGSUSED*/
1366 1364 uint64_t
1367 1365 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1368 1366 int aframes)
1369 1367 {
1370 1368 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1371 1369 }
1372 1370
1373 1371 /*ARGSUSED*/
1374 1372 uint64_t
1375 1373 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1376 1374 int aframes)
1377 1375 {
1378 1376 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1379 1377 }
1380 1378
1381 1379 static uint64_t fasttrap_getreg_fast_cnt;
1382 1380 static uint64_t fasttrap_getreg_mpcb_cnt;
1383 1381 static uint64_t fasttrap_getreg_slow_cnt;
1384 1382
1385 1383 static ulong_t
1386 1384 fasttrap_getreg(struct regs *rp, uint_t reg)
1387 1385 {
1388 1386 ulong_t value;
1389 1387 dtrace_icookie_t cookie;
1390 1388 struct machpcb *mpcb;
1391 1389 extern ulong_t dtrace_getreg_win(uint_t, uint_t);
1392 1390
1393 1391 /*
1394 1392 * We have the %os and %gs in our struct regs, but if we need to
1395 1393 * snag a %l or %i we need to go scrounging around in the process's
1396 1394 * address space.
1397 1395 */
1398 1396 if (reg == 0)
1399 1397 return (0);
1400 1398
1401 1399 if (reg < 16)
1402 1400 return ((&rp->r_g1)[reg - 1]);
↓ open down ↓ |
1364 lines elided |
↑ open up ↑ |
1403 1401
1404 1402 /*
1405 1403 * Before we look at the user's stack, we'll check the register
1406 1404 * windows to see if the information we want is in there.
1407 1405 */
1408 1406 cookie = dtrace_interrupt_disable();
1409 1407 if (dtrace_getotherwin() > 0) {
1410 1408 value = dtrace_getreg_win(reg, 1);
1411 1409 dtrace_interrupt_enable(cookie);
1412 1410
1413 - atomic_add_64(&fasttrap_getreg_fast_cnt, 1);
1411 + atomic_inc_64(&fasttrap_getreg_fast_cnt);
1414 1412
1415 1413 return (value);
1416 1414 }
1417 1415 dtrace_interrupt_enable(cookie);
1418 1416
1419 1417 /*
1420 1418 * First check the machpcb structure to see if we've already read
1421 1419 * in the register window we're looking for; if we haven't, (and
1422 1420 * we probably haven't) try to copy in the value of the register.
1423 1421 */
1424 1422 /* LINTED - alignment */
1425 1423 mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1426 1424
1427 1425 if (get_udatamodel() == DATAMODEL_NATIVE) {
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
1428 1426 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1429 1427
1430 1428 if (mpcb->mpcb_wbcnt > 0) {
1431 1429 struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
1432 1430 int i = mpcb->mpcb_wbcnt;
1433 1431 do {
1434 1432 i--;
1435 1433 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1436 1434 continue;
1437 1435
1438 - atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1);
1436 + atomic_inc_64(&fasttrap_getreg_mpcb_cnt);
1439 1437 return (rwin[i].rw_local[reg - 16]);
1440 1438 } while (i > 0);
1441 1439 }
1442 1440
1443 1441 if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0)
1444 1442 goto err;
1445 1443 } else {
1446 1444 struct frame32 *fr =
1447 1445 (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1448 1446 uint32_t *v32 = (uint32_t *)&value;
1449 1447
1450 1448 if (mpcb->mpcb_wbcnt > 0) {
1451 1449 struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
1452 1450 int i = mpcb->mpcb_wbcnt;
1453 1451 do {
1454 1452 i--;
1455 1453 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1456 1454 continue;
1457 1455
1458 - atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1);
1456 + atomic_inc_64(&fasttrap_getreg_mpcb_cnt);
1459 1457 return (rwin[i].rw_local[reg - 16]);
1460 1458 } while (i > 0);
1461 1459 }
1462 1460
1463 1461 if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0)
1464 1462 goto err;
1465 1463
1466 1464 v32[0] = 0;
1467 1465 }
1468 1466
1469 - atomic_add_64(&fasttrap_getreg_slow_cnt, 1);
1467 + atomic_inc_64(&fasttrap_getreg_slow_cnt);
1470 1468 return (value);
1471 1469
1472 1470 err:
1473 1471 /*
1474 1472 * If the copy in failed, the process will be in a irrecoverable
1475 1473 * state, and we have no choice but to kill it.
1476 1474 */
1477 1475 psignal(ttoproc(curthread), SIGILL);
1478 1476 return (0);
1479 1477 }
1480 1478
1481 1479 static uint64_t fasttrap_putreg_fast_cnt;
1482 1480 static uint64_t fasttrap_putreg_mpcb_cnt;
1483 1481 static uint64_t fasttrap_putreg_slow_cnt;
1484 1482
1485 1483 static void
1486 1484 fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value)
1487 1485 {
1488 1486 dtrace_icookie_t cookie;
1489 1487 struct machpcb *mpcb;
1490 1488 extern void dtrace_putreg_win(uint_t, ulong_t);
1491 1489
1492 1490 if (reg == 0)
1493 1491 return;
1494 1492
1495 1493 if (reg < 16) {
1496 1494 (&rp->r_g1)[reg - 1] = value;
1497 1495 return;
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
1498 1496 }
1499 1497
1500 1498 /*
1501 1499 * If the user process is still using some register windows, we
1502 1500 * can just place the value in the correct window.
1503 1501 */
1504 1502 cookie = dtrace_interrupt_disable();
1505 1503 if (dtrace_getotherwin() > 0) {
1506 1504 dtrace_putreg_win(reg, value);
1507 1505 dtrace_interrupt_enable(cookie);
1508 - atomic_add_64(&fasttrap_putreg_fast_cnt, 1);
1506 + atomic_inc_64(&fasttrap_putreg_fast_cnt);
1509 1507 return;
1510 1508 }
1511 1509 dtrace_interrupt_enable(cookie);
1512 1510
1513 1511 /*
1514 1512 * First see if there's a copy of the register window in the
1515 1513 * machpcb structure that we can modify; if there isn't try to
1516 1514 * copy out the value. If that fails, we try to create a new
1517 1515 * register window in the machpcb structure. While this isn't
1518 1516 * _precisely_ the intended use of the machpcb structure, it
1519 1517 * can't cause any problems since we know at this point in the
1520 1518 * code that all of the user's data have been flushed out of the
1521 1519 * register file (since %otherwin is 0).
1522 1520 */
1523 1521 /* LINTED - alignment */
1524 1522 mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1525 1523
1526 1524 if (get_udatamodel() == DATAMODEL_NATIVE) {
1527 1525 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1528 1526 /* LINTED - alignment */
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
1529 1527 struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf;
1530 1528
1531 1529 if (mpcb->mpcb_wbcnt > 0) {
1532 1530 int i = mpcb->mpcb_wbcnt;
1533 1531 do {
1534 1532 i--;
1535 1533 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1536 1534 continue;
1537 1535
1538 1536 rwin[i].rw_local[reg - 16] = value;
1539 - atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1537 + atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1540 1538 return;
1541 1539 } while (i > 0);
1542 1540 }
1543 1541
1544 1542 if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) {
1545 1543 if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1546 1544 &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1547 1545 goto err;
1548 1546
1549 1547 rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value;
1550 1548 mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1551 1549 mpcb->mpcb_wbcnt++;
1552 - atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1550 + atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1553 1551 return;
1554 1552 }
1555 1553 } else {
1556 1554 struct frame32 *fr =
1557 1555 (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1558 1556 /* LINTED - alignment */
1559 1557 struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf;
1560 1558 uint32_t v32 = (uint32_t)value;
1561 1559
1562 1560 if (mpcb->mpcb_wbcnt > 0) {
1563 1561 int i = mpcb->mpcb_wbcnt;
1564 1562 do {
1565 1563 i--;
1566 1564 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1567 1565 continue;
1568 1566
1569 1567 rwin[i].rw_local[reg - 16] = v32;
1570 - atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1568 + atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1571 1569 return;
1572 1570 } while (i > 0);
1573 1571 }
1574 1572
1575 1573 if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) {
1576 1574 if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1577 1575 &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1578 1576 goto err;
1579 1577
1580 1578 rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32;
1581 1579 mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1582 1580 mpcb->mpcb_wbcnt++;
1583 - atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1581 + atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1584 1582 return;
1585 1583 }
1586 1584 }
1587 1585
1588 - atomic_add_64(&fasttrap_putreg_slow_cnt, 1);
1586 + atomic_inc_64(&fasttrap_putreg_slow_cnt);
1589 1587 return;
1590 1588
1591 1589 err:
1592 1590 /*
1593 1591 * If we couldn't record this register's value, the process is in an
1594 1592 * irrecoverable state and we have no choice but to euthanize it.
1595 1593 */
1596 1594 psignal(ttoproc(curthread), SIGILL);
1597 1595 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX