1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28 
  29 #include <sys/fasttrap_isa.h>
  30 #include <sys/fasttrap_impl.h>
  31 #include <sys/dtrace.h>
  32 #include <sys/dtrace_impl.h>
  33 #include <sys/cmn_err.h>
  34 #include <sys/frame.h>
  35 #include <sys/stack.h>
  36 #include <sys/sysmacros.h>
  37 #include <sys/trap.h>
  38 
  39 #include <v9/sys/machpcb.h>
  40 #include <v9/sys/privregs.h>
  41 
  42 /*
  43  * Lossless User-Land Tracing on SPARC
  44  * -----------------------------------
  45  *
  46  * The Basic Idea
  47  *
  48  * The most important design constraint is, of course, correct execution of
  49  * the user thread above all else. The next most important goal is rapid
  50  * execution. We combine execution of instructions in user-land with
  51  * emulation of certain instructions in the kernel to aim for complete
  52  * correctness and maximal performance.
  53  *
  54  * We take advantage of the split PC/NPC architecture to speed up logical
  55  * single-stepping; when we copy an instruction out to the scratch space in
  56  * the ulwp_t structure (held in the %g7 register on SPARC), we can
  57  * effectively single step by setting the PC to our scratch space and leaving
  58  * the NPC alone. This executes the replaced instruction and then continues
  59  * on without having to reenter the kernel as with single- stepping. The
  60  * obvious caveat is for instructions whose execution is PC dependant --
  61  * branches, call and link instructions (call and jmpl), and the rdpc
  62  * instruction. These instructions cannot be executed in the manner described
  63  * so they must be emulated in the kernel.
  64  *
  65  * Emulation for this small set of instructions if fairly simple; the most
  66  * difficult part being emulating branch conditions.
  67  *
  68  *
  69  * A Cache Heavy Portfolio
  70  *
  71  * It's important to note at this time that copying an instruction out to the
  72  * ulwp_t scratch space in user-land is rather complicated. SPARC has
  73  * separate data and instruction caches so any writes to the D$ (using a
  74  * store instruction for example) aren't necessarily reflected in the I$.
  75  * The flush instruction can be used to synchronize the two and must be used
  76  * for any self-modifying code, but the flush instruction only applies to the
  77  * primary address space (the absence of a flusha analogue to the flush
  78  * instruction that accepts an ASI argument is an obvious omission from SPARC
  79  * v9 where the notion of the alternate address space was introduced on
  80  * SPARC). To correctly copy out the instruction we must use a block store
  81  * that doesn't allocate in the D$ and ensures synchronization with the I$;
  82  * see dtrace_blksuword32() for the implementation  (this function uses
  83  * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner
  84  * described). Refer to the UltraSPARC I/II manual for details on the
  85  * ASI_BLK_COMMIT_S ASI.
  86  *
  87  *
  88  * Return Subtleties
  89  *
  90  * When we're firing a return probe we need to expose the value returned by
  91  * the function being traced. Since the function can set the return value
  92  * in its last instruction, we need to fire the return probe only _after_
  93  * the effects of the instruction are apparent. For instructions that we
  94  * emulate, we can call dtrace_probe() after we've performed the emulation;
  95  * for instructions that we execute after we return to user-land, we set
  96  * %pc to the instruction we copied out (as described above) and set %npc
  97  * to a trap instruction stashed in the ulwp_t structure. After the traced
  98  * instruction is executed, the trap instruction returns control to the
  99  * kernel where we can fire the return probe.
 100  *
 101  * This need for a second trap in cases where we execute the traced
 102  * instruction makes it all the more important to emulate the most common
 103  * instructions to avoid the second trip in and out of the kernel.
 104  *
 105  *
 106  * Making it Fast
 107  *
 108  * Since copying out an instruction is neither simple nor inexpensive for the
 109  * CPU, we should attempt to avoid doing it in as many cases as possible.
 110  * Since function entry and return are usually the most interesting probe
 111  * sites, we attempt to tune the performance of the fasttrap provider around
 112  * instructions typically in those places.
 113  *
 114  * Looking at a bunch of functions in libraries and executables reveals that
 115  * most functions begin with either a save or a sethi (to setup a larger
 116  * argument to the save) and end with a restore or an or (in the case of leaf
 117  * functions). To try to improve performance, we emulate all of these
 118  * instructions in the kernel.
 119  *
 120  * The save and restore instructions are a little tricky since they perform
 121  * register window maniplulation. Rather than trying to tinker with the
 122  * register windows from the kernel, we emulate the implicit add that takes
 123  * place as part of those instructions and set the %pc to point to a simple
 124  * save or restore we've hidden in the ulwp_t structure. If we're in a return
 125  * probe so want to make it seem as though the tracepoint has been completely
 126  * executed we need to remember that we've pulled this trick with restore and
 127  * pull registers from the previous window (the one that we'll switch to once
 128  * the simple store instruction is executed) rather than the current one. This
 129  * is why in the case of emulating a restore we set the DTrace CPU flag
 130  * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes
 131  * (see fasttrap_return_common()).
 132  */
 133 
 134 #define OP(x)           ((x) >> 30)
 135 #define OP2(x)          (((x) >> 22) & 0x07)
 136 #define OP3(x)          (((x) >> 19) & 0x3f)
 137 #define RCOND(x)        (((x) >> 25) & 0x07)
 138 #define COND(x)         (((x) >> 25) & 0x0f)
 139 #define A(x)            (((x) >> 29) & 0x01)
 140 #define I(x)            (((x) >> 13) & 0x01)
 141 #define RD(x)           (((x) >> 25) & 0x1f)
 142 #define RS1(x)          (((x) >> 14) & 0x1f)
 143 #define RS2(x)          (((x) >> 0) & 0x1f)
 144 #define CC(x)           (((x) >> 20) & 0x03)
 145 #define DISP16(x)       ((((x) >> 6) & 0xc000) | ((x) & 0x3fff))
 146 #define DISP22(x)       ((x) & 0x3fffff)
 147 #define DISP19(x)       ((x) & 0x7ffff)
 148 #define DISP30(x)       ((x) & 0x3fffffff)
 149 #define SW_TRAP(x)      ((x) & 0x7f)
 150 
 151 #define OP3_OR          0x02
 152 #define OP3_RD          0x28
 153 #define OP3_JMPL        0x38
 154 #define OP3_RETURN      0x39
 155 #define OP3_TCC         0x3a
 156 #define OP3_SAVE        0x3c
 157 #define OP3_RESTORE     0x3d
 158 
 159 #define OP3_PREFETCH    0x2d
 160 #define OP3_CASA        0x3c
 161 #define OP3_PREFETCHA   0x3d
 162 #define OP3_CASXA       0x3e
 163 
 164 #define OP2_ILLTRAP     0x0
 165 #define OP2_BPcc        0x1
 166 #define OP2_Bicc        0x2
 167 #define OP2_BPr         0x3
 168 #define OP2_SETHI       0x4
 169 #define OP2_FBPfcc      0x5
 170 #define OP2_FBfcc       0x6
 171 
 172 #define R_G0            0
 173 #define R_O0            8
 174 #define R_SP            14
 175 #define R_I0            24
 176 #define R_I1            25
 177 #define R_I2            26
 178 #define R_I3            27
 179 #define R_I4            28
 180 
 181 /*
 182  * Check the comment in fasttrap.h when changing these offsets or adding
 183  * new instructions.
 184  */
 185 #define FASTTRAP_OFF_SAVE       64
 186 #define FASTTRAP_OFF_RESTORE    68
 187 #define FASTTRAP_OFF_FTRET      72
 188 #define FASTTRAP_OFF_RETURN     76
 189 
 190 #define BREAKPOINT_INSTR        0x91d02001      /* ta 1 */
 191 
 192 /*
 193  * Tunable to let users turn off the fancy save instruction optimization.
 194  * If a program is non-ABI compliant, there's a possibility that the save
 195  * instruction optimization could cause an error.
 196  */
 197 int fasttrap_optimize_save = 1;
 198 
 199 static uint64_t
 200 fasttrap_anarg(struct regs *rp, int argno)
 201 {
 202         uint64_t value;
 203 
 204         if (argno < 6)
 205                 return ((&rp->r_o0)[argno]);
 206 
 207         if (curproc->p_model == DATAMODEL_NATIVE) {
 208                 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
 209 
 210                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 211                 value = dtrace_fulword(&fr->fr_argd[argno]);
 212                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
 213                     CPU_DTRACE_BADALIGN);
 214         } else {
 215                 struct frame32 *fr = (struct frame32 *)rp->r_sp;
 216 
 217                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 218                 value = dtrace_fuword32(&fr->fr_argd[argno]);
 219                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
 220                     CPU_DTRACE_BADALIGN);
 221         }
 222 
 223         return (value);
 224 }
 225 
 226 static ulong_t fasttrap_getreg(struct regs *, uint_t);
 227 static void fasttrap_putreg(struct regs *, uint_t, ulong_t);
 228 
 229 static void
 230 fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp,
 231     uint_t fake_restore, int argc, uintptr_t *argv)
 232 {
 233         int i, x, cap = MIN(argc, probe->ftp_nargs);
 234         int inc = (fake_restore ? 16 : 0);
 235 
 236         /*
 237          * The only way we'll hit the fake_restore case is if a USDT probe is
 238          * invoked as a tail-call. While it wouldn't be incorrect, we can
 239          * avoid a call to fasttrap_getreg(), and safely use rp->r_sp
 240          * directly since a tail-call can't be made if the invoked function
 241          * would use the argument dump space (i.e. if there were more than
 242          * 6 arguments). We take this shortcut because unconditionally rooting
 243          * around for R_FP (R_SP + 16) would be unnecessarily painful.
 244          */
 245 
 246         if (curproc->p_model == DATAMODEL_NATIVE) {
 247                 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
 248                 uintptr_t v;
 249 
 250                 for (i = 0; i < cap; i++) {
 251                         x = probe->ftp_argmap[i];
 252 
 253                         if (x < 6)
 254                                 argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
 255                         else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0)
 256                                 argv[i] = 0;
 257                 }
 258 
 259         } else {
 260                 struct frame32 *fr = (struct frame32 *)rp->r_sp;
 261                 uint32_t v;
 262 
 263                 for (i = 0; i < cap; i++) {
 264                         x = probe->ftp_argmap[i];
 265 
 266                         if (x < 6)
 267                                 argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
 268                         else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0)
 269                                 argv[i] = 0;
 270                 }
 271         }
 272 
 273         for (; i < argc; i++) {
 274                 argv[i] = 0;
 275         }
 276 }
 277 
 278 static void
 279 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
 280     uint_t fake_restore)
 281 {
 282         fasttrap_tracepoint_t *tp;
 283         fasttrap_bucket_t *bucket;
 284         fasttrap_id_t *id;
 285         kmutex_t *pid_mtx;
 286         dtrace_icookie_t cookie;
 287 
 288         pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
 289         mutex_enter(pid_mtx);
 290         bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
 291 
 292         for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 293                 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
 294                     tp->ftt_proc->ftpc_acount != 0)
 295                         break;
 296         }
 297 
 298         /*
 299          * Don't sweat it if we can't find the tracepoint again; unlike
 300          * when we're in fasttrap_pid_probe(), finding the tracepoint here
 301          * is not essential to the correct execution of the process.
 302          */
 303         if (tp == NULL || tp->ftt_retids == NULL) {
 304                 mutex_exit(pid_mtx);
 305                 return;
 306         }
 307 
 308         for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
 309                 fasttrap_probe_t *probe = id->fti_probe;
 310 
 311                 if (id->fti_ptype == DTFTP_POST_OFFSETS) {
 312                         if (probe->ftp_argmap != NULL && fake_restore) {
 313                                 uintptr_t t[5];
 314 
 315                                 fasttrap_usdt_args(probe, rp, fake_restore,
 316                                     sizeof (t) / sizeof (t[0]), t);
 317 
 318                                 cookie = dtrace_interrupt_disable();
 319                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
 320                                 dtrace_probe(probe->ftp_id, t[0], t[1],
 321                                     t[2], t[3], t[4]);
 322                                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
 323                                 dtrace_interrupt_enable(cookie);
 324 
 325                         } else if (probe->ftp_argmap != NULL) {
 326                                 uintptr_t t[5];
 327 
 328                                 fasttrap_usdt_args(probe, rp, fake_restore,
 329                                     sizeof (t) / sizeof (t[0]), t);
 330 
 331                                 dtrace_probe(probe->ftp_id, t[0], t[1],
 332                                     t[2], t[3], t[4]);
 333 
 334                         } else if (fake_restore) {
 335                                 uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
 336                                 uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
 337                                 uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
 338                                 uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
 339                                 uintptr_t arg4 = fasttrap_getreg(rp, R_I4);
 340 
 341                                 cookie = dtrace_interrupt_disable();
 342                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
 343                                 dtrace_probe(probe->ftp_id, arg0, arg1,
 344                                     arg2, arg3, arg4);
 345                                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
 346                                 dtrace_interrupt_enable(cookie);
 347 
 348                         } else {
 349                                 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1,
 350                                     rp->r_o2, rp->r_o3, rp->r_o4);
 351                         }
 352 
 353                         continue;
 354                 }
 355 
 356                 /*
 357                  * If this is only a possible return point, we must
 358                  * be looking at a potential tail call in leaf context.
 359                  * If the %npc is still within this function, then we
 360                  * must have misidentified a jmpl as a tail-call when it
 361                  * is, in fact, part of a jump table. It would be nice to
 362                  * remove this tracepoint, but this is neither the time
 363                  * nor the place.
 364                  */
 365                 if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) &&
 366                     rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
 367                         continue;
 368 
 369                 /*
 370                  * It's possible for a function to branch to the delay slot
 371                  * of an instruction that we've identified as a return site.
 372                  * We can dectect this spurious return probe activation by
 373                  * observing that in this case %npc will be %pc + 4 and %npc
 374                  * will be inside the current function (unless the user is
 375                  * doing _crazy_ instruction picking in which case there's
 376                  * very little we can do). The second check is important
 377                  * in case the last instructions of a function make a tail-
 378                  * call to the function located immediately subsequent.
 379                  */
 380                 if (rp->r_npc == rp->r_pc + 4 &&
 381                     rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
 382                         continue;
 383 
 384                 /*
 385                  * The first argument is the offset of return tracepoint
 386                  * in the function; the remaining arguments are the return
 387                  * values.
 388                  *
 389                  * If fake_restore is set, we need to pull the return values
 390                  * out of the %i's rather than the %o's -- a little trickier.
 391                  */
 392                 if (!fake_restore) {
 393                         dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
 394                             rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3);
 395                 } else {
 396                         uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
 397                         uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
 398                         uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
 399                         uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
 400 
 401                         cookie = dtrace_interrupt_disable();
 402                         DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
 403                         dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
 404                             arg0, arg1, arg2, arg3);
 405                         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
 406                         dtrace_interrupt_enable(cookie);
 407                 }
 408         }
 409 
 410         mutex_exit(pid_mtx);
 411 }
 412 
 413 int
 414 fasttrap_pid_probe(struct regs *rp)
 415 {
 416         proc_t *p = curproc;
 417         fasttrap_tracepoint_t *tp, tp_local;
 418         fasttrap_id_t *id;
 419         pid_t pid;
 420         uintptr_t pc = rp->r_pc;
 421         uintptr_t npc = rp->r_npc;
 422         uintptr_t orig_pc = pc;
 423         fasttrap_bucket_t *bucket;
 424         kmutex_t *pid_mtx;
 425         uint_t fake_restore = 0, is_enabled = 0;
 426         dtrace_icookie_t cookie;
 427 
 428         /*
 429          * It's possible that a user (in a veritable orgy of bad planning)
 430          * could redirect this thread's flow of control before it reached the
 431          * return probe fasttrap. In this case we need to kill the process
 432          * since it's in a unrecoverable state.
 433          */
 434         if (curthread->t_dtrace_step) {
 435                 ASSERT(curthread->t_dtrace_on);
 436                 fasttrap_sigtrap(p, curthread, pc);
 437                 return (0);
 438         }
 439 
 440         /*
 441          * Clear all user tracing flags.
 442          */
 443         curthread->t_dtrace_ft = 0;
 444         curthread->t_dtrace_pc = 0;
 445         curthread->t_dtrace_npc = 0;
 446         curthread->t_dtrace_scrpc = 0;
 447         curthread->t_dtrace_astpc = 0;
 448 
 449         /*
 450          * Treat a child created by a call to vfork(2) as if it were its
 451          * parent. We know that there's only one thread of control in such a
 452          * process: this one.
 453          */
 454         while (p->p_flag & SVFORK) {
 455                 p = p->p_parent;
 456         }
 457 
 458         pid = p->p_pid;
 459         pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
 460         mutex_enter(pid_mtx);
 461         bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
 462 
 463         /*
 464          * Lookup the tracepoint that the process just hit.
 465          */
 466         for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 467                 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
 468                     tp->ftt_proc->ftpc_acount != 0)
 469                         break;
 470         }
 471 
 472         /*
 473          * If we couldn't find a matching tracepoint, either a tracepoint has
 474          * been inserted without using the pid<pid> ioctl interface (see
 475          * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
 476          */
 477         if (tp == NULL) {
 478                 mutex_exit(pid_mtx);
 479                 return (-1);
 480         }
 481 
 482         for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
 483                 fasttrap_probe_t *probe = id->fti_probe;
 484                 int isentry = (id->fti_ptype == DTFTP_ENTRY);
 485 
 486                 if (id->fti_ptype == DTFTP_IS_ENABLED) {
 487                         is_enabled = 1;
 488                         continue;
 489                 }
 490 
 491                 /*
 492                  * We note that this was an entry probe to help ustack() find
 493                  * the first caller.
 494                  */
 495                 if (isentry) {
 496                         cookie = dtrace_interrupt_disable();
 497                         DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
 498                 }
 499                 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2,
 500                     rp->r_o3, rp->r_o4);
 501                 if (isentry) {
 502                         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
 503                         dtrace_interrupt_enable(cookie);
 504                 }
 505         }
 506 
 507         /*
 508          * We're about to do a bunch of work so we cache a local copy of
 509          * the tracepoint to emulate the instruction, and then find the
 510          * tracepoint again later if we need to light up any return probes.
 511          */
 512         tp_local = *tp;
 513         mutex_exit(pid_mtx);
 514         tp = &tp_local;
 515 
 516         /*
 517          * If there's an is-enabled probe conntected to this tracepoint it
 518          * means that there was a 'mov %g0, %o0' instruction that was placed
 519          * there by DTrace when the binary was linked. As this probe is, in
 520          * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can
 521          * bypass all the instruction emulation logic since we know the
 522          * inevitable result. It's possible that a user could construct a
 523          * scenario where the 'is-enabled' probe was on some other
 524          * instruction, but that would be a rather exotic way to shoot oneself
 525          * in the foot.
 526          */
 527         if (is_enabled) {
 528                 rp->r_o0 = 1;
 529                 pc = rp->r_npc;
 530                 npc = pc + 4;
 531                 goto done;
 532         }
 533 
 534         /*
 535          * We emulate certain types of instructions to ensure correctness
 536          * (in the case of position dependent instructions) or optimize
 537          * common cases. The rest we have the thread execute back in user-
 538          * land.
 539          */
 540         switch (tp->ftt_type) {
 541         case FASTTRAP_T_SAVE:
 542         {
 543                 int32_t imm;
 544 
 545                 /*
 546                  * This an optimization to let us handle function entry
 547                  * probes more efficiently. Many functions begin with a save
 548                  * instruction that follows the pattern:
 549                  *      save    %sp, <imm>, %sp
 550                  *
 551                  * Meanwhile, we've stashed the instruction:
 552                  *      save    %g1, %g0, %sp
 553                  *
 554                  * off of %g7, so all we have to do is stick the right value
 555                  * into %g1 and reset %pc to point to the instruction we've
 556                  * cleverly hidden (%npc should not be touched).
 557                  */
 558 
 559                 imm = tp->ftt_instr << 19;
 560                 imm >>= 19;
 561                 rp->r_g1 = rp->r_sp + imm;
 562                 pc = rp->r_g7 + FASTTRAP_OFF_SAVE;
 563                 break;
 564         }
 565 
 566         case FASTTRAP_T_RESTORE:
 567         {
 568                 ulong_t value;
 569                 uint_t rd;
 570 
 571                 /*
 572                  * This is an optimization to let us handle function
 573                  * return probes more efficiently. Most non-leaf functions
 574                  * end with the sequence:
 575                  *      ret
 576                  *      restore <reg>, <reg_or_imm>, %oX
 577                  *
 578                  * We've stashed the instruction:
 579                  *      restore %g0, %g0, %g0
 580                  *
 581                  * off of %g7 so we just need to place the correct value
 582                  * in the right %i register (since after our fake-o
 583                  * restore, the %i's will become the %o's) and set the %pc
 584                  * to point to our hidden restore. We also set fake_restore to
 585                  * let fasttrap_return_common() know that it will find the
 586                  * return values in the %i's rather than the %o's.
 587                  */
 588 
 589                 if (I(tp->ftt_instr)) {
 590                         int32_t imm;
 591 
 592                         imm = tp->ftt_instr << 19;
 593                         imm >>= 19;
 594                         value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
 595                 } else {
 596                         value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
 597                             fasttrap_getreg(rp, RS2(tp->ftt_instr));
 598                 }
 599 
 600                 /*
 601                  * Convert %o's to %i's; leave %g's as they are.
 602                  */
 603                 rd = RD(tp->ftt_instr);
 604                 fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value);
 605 
 606                 pc = rp->r_g7 + FASTTRAP_OFF_RESTORE;
 607                 fake_restore = 1;
 608                 break;
 609         }
 610 
 611         case FASTTRAP_T_RETURN:
 612         {
 613                 uintptr_t target;
 614 
 615                 /*
 616                  * A return instruction is like a jmpl (without the link
 617                  * part) that executes an implicit restore. We've stashed
 618                  * the instruction:
 619                  *      return %o0
 620                  *
 621                  * off of %g7 so we just need to place the target in %o0
 622                  * and set the %pc to point to the stashed return instruction.
 623                  * We use %o0 since that register disappears after the return
 624                  * executes, erasing any evidence of this tampering.
 625                  */
 626                 if (I(tp->ftt_instr)) {
 627                         int32_t imm;
 628 
 629                         imm = tp->ftt_instr << 19;
 630                         imm >>= 19;
 631                         target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
 632                 } else {
 633                         target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
 634                             fasttrap_getreg(rp, RS2(tp->ftt_instr));
 635                 }
 636 
 637                 fasttrap_putreg(rp, R_O0, target);
 638 
 639                 pc = rp->r_g7 + FASTTRAP_OFF_RETURN;
 640                 fake_restore = 1;
 641                 break;
 642         }
 643 
 644         case FASTTRAP_T_OR:
 645         {
 646                 ulong_t value;
 647 
 648                 if (I(tp->ftt_instr)) {
 649                         int32_t imm;
 650 
 651                         imm = tp->ftt_instr << 19;
 652                         imm >>= 19;
 653                         value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm;
 654                 } else {
 655                         value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) |
 656                             fasttrap_getreg(rp, RS2(tp->ftt_instr));
 657                 }
 658 
 659                 fasttrap_putreg(rp, RD(tp->ftt_instr), value);
 660                 pc = rp->r_npc;
 661                 npc = pc + 4;
 662                 break;
 663         }
 664 
 665         case FASTTRAP_T_SETHI:
 666                 if (RD(tp->ftt_instr) != R_G0) {
 667                         uint32_t imm32 = tp->ftt_instr << 10;
 668                         fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32);
 669                 }
 670                 pc = rp->r_npc;
 671                 npc = pc + 4;
 672                 break;
 673 
 674         case FASTTRAP_T_CCR:
 675         {
 676                 uint_t c, v, z, n, taken;
 677                 uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT;
 678 
 679                 if (tp->ftt_cc != 0)
 680                         ccr >>= 4;
 681 
 682                 c = (ccr >> 0) & 1;
 683                 v = (ccr >> 1) & 1;
 684                 z = (ccr >> 2) & 1;
 685                 n = (ccr >> 3) & 1;
 686 
 687                 switch (tp->ftt_code) {
 688                 case 0x0:       /* BN */
 689                         taken = 0;              break;
 690                 case 0x1:       /* BE */
 691                         taken = z;              break;
 692                 case 0x2:       /* BLE */
 693                         taken = z | (n ^ v);    break;
 694                 case 0x3:       /* BL */
 695                         taken = n ^ v;          break;
 696                 case 0x4:       /* BLEU */
 697                         taken = c | z;          break;
 698                 case 0x5:       /* BCS (BLU) */
 699                         taken = c;              break;
 700                 case 0x6:       /* BNEG */
 701                         taken = n;              break;
 702                 case 0x7:       /* BVS */
 703                         taken = v;              break;
 704                 case 0x8:       /* BA */
 705                         /*
 706                          * We handle the BA case differently since the annul
 707                          * bit means something slightly different.
 708                          */
 709                         panic("fasttrap: mishandled a branch");
 710                         taken = 1;              break;
 711                 case 0x9:       /* BNE */
 712                         taken = ~z;             break;
 713                 case 0xa:       /* BG */
 714                         taken = ~(z | (n ^ v)); break;
 715                 case 0xb:       /* BGE */
 716                         taken = ~(n ^ v);       break;
 717                 case 0xc:       /* BGU */
 718                         taken = ~(c | z);       break;
 719                 case 0xd:       /* BCC (BGEU) */
 720                         taken = ~c;             break;
 721                 case 0xe:       /* BPOS */
 722                         taken = ~n;             break;
 723                 case 0xf:       /* BVC */
 724                         taken = ~v;             break;
 725                 }
 726 
 727                 if (taken & 1) {
 728                         pc = rp->r_npc;
 729                         npc = tp->ftt_dest;
 730                 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
 731                         /*
 732                          * Untaken annulled branches don't execute the
 733                          * instruction in the delay slot.
 734                          */
 735                         pc = rp->r_npc + 4;
 736                         npc = pc + 4;
 737                 } else {
 738                         pc = rp->r_npc;
 739                         npc = pc + 4;
 740                 }
 741                 break;
 742         }
 743 
 744         case FASTTRAP_T_FCC:
 745         {
 746                 uint_t fcc;
 747                 uint_t taken;
 748                 uint64_t fsr;
 749 
 750                 dtrace_getfsr(&fsr);
 751 
 752                 if (tp->ftt_cc == 0) {
 753                         fcc = (fsr >> 10) & 0x3;
 754                 } else {
 755                         uint_t shift;
 756                         ASSERT(tp->ftt_cc <= 3);
 757                         shift = 30 + tp->ftt_cc * 2;
 758                         fcc = (fsr >> shift) & 0x3;
 759                 }
 760 
 761                 switch (tp->ftt_code) {
 762                 case 0x0:       /* FBN */
 763                         taken = (1 << fcc) & (0|0|0|0);       break;
 764                 case 0x1:       /* FBNE */
 765                         taken = (1 << fcc) & (8|4|2|0);       break;
 766                 case 0x2:       /* FBLG */
 767                         taken = (1 << fcc) & (0|4|2|0);       break;
 768                 case 0x3:       /* FBUL */
 769                         taken = (1 << fcc) & (8|0|2|0);       break;
 770                 case 0x4:       /* FBL */
 771                         taken = (1 << fcc) & (0|0|2|0);       break;
 772                 case 0x5:       /* FBUG */
 773                         taken = (1 << fcc) & (8|4|0|0);       break;
 774                 case 0x6:       /* FBG */
 775                         taken = (1 << fcc) & (0|4|0|0);       break;
 776                 case 0x7:       /* FBU */
 777                         taken = (1 << fcc) & (8|0|0|0);       break;
 778                 case 0x8:       /* FBA */
 779                         /*
 780                          * We handle the FBA case differently since the annul
 781                          * bit means something slightly different.
 782                          */
 783                         panic("fasttrap: mishandled a branch");
 784                         taken = (1 << fcc) & (8|4|2|1);       break;
 785                 case 0x9:       /* FBE */
 786                         taken = (1 << fcc) & (0|0|0|1);       break;
 787                 case 0xa:       /* FBUE */
 788                         taken = (1 << fcc) & (8|0|0|1);       break;
 789                 case 0xb:       /* FBGE */
 790                         taken = (1 << fcc) & (0|4|0|1);       break;
 791                 case 0xc:       /* FBUGE */
 792                         taken = (1 << fcc) & (8|4|0|1);       break;
 793                 case 0xd:       /* FBLE */
 794                         taken = (1 << fcc) & (0|0|2|1);       break;
 795                 case 0xe:       /* FBULE */
 796                         taken = (1 << fcc) & (8|0|2|1);       break;
 797                 case 0xf:       /* FBO */
 798                         taken = (1 << fcc) & (0|4|2|1);       break;
 799                 }
 800 
 801                 if (taken) {
 802                         pc = rp->r_npc;
 803                         npc = tp->ftt_dest;
 804                 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
 805                         /*
 806                          * Untaken annulled branches don't execute the
 807                          * instruction in the delay slot.
 808                          */
 809                         pc = rp->r_npc + 4;
 810                         npc = pc + 4;
 811                 } else {
 812                         pc = rp->r_npc;
 813                         npc = pc + 4;
 814                 }
 815                 break;
 816         }
 817 
 818         case FASTTRAP_T_REG:
 819         {
 820                 int64_t value;
 821                 uint_t taken;
 822                 uint_t reg = RS1(tp->ftt_instr);
 823 
 824                 /*
 825                  * An ILP32 process shouldn't be using a branch predicated on
 826                  * an %i or an %l since it would violate the ABI. It's a
 827                  * violation of the ABI because we can't ensure deterministic
 828                  * behavior. We should have identified this case when we
 829                  * enabled the probe.
 830                  */
 831                 ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16);
 832 
 833                 value = (int64_t)fasttrap_getreg(rp, reg);
 834 
 835                 switch (tp->ftt_code) {
 836                 case 0x1:       /* BRZ */
 837                         taken = (value == 0);   break;
 838                 case 0x2:       /* BRLEZ */
 839                         taken = (value <= 0);        break;
 840                 case 0x3:       /* BRLZ */
 841                         taken = (value < 0); break;
 842                 case 0x5:       /* BRNZ */
 843                         taken = (value != 0);   break;
 844                 case 0x6:       /* BRGZ */
 845                         taken = (value > 0); break;
 846                 case 0x7:       /* BRGEZ */
 847                         taken = (value >= 0);        break;
 848                 default:
 849                 case 0x0:
 850                 case 0x4:
 851                         panic("fasttrap: mishandled a branch");
 852                 }
 853 
 854                 if (taken) {
 855                         pc = rp->r_npc;
 856                         npc = tp->ftt_dest;
 857                 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
 858                         /*
 859                          * Untaken annulled branches don't execute the
 860                          * instruction in the delay slot.
 861                          */
 862                         pc = rp->r_npc + 4;
 863                         npc = pc + 4;
 864                 } else {
 865                         pc = rp->r_npc;
 866                         npc = pc + 4;
 867                 }
 868                 break;
 869         }
 870 
 871         case FASTTRAP_T_ALWAYS:
 872                 /*
 873                  * BAs, BA,As...
 874                  */
 875 
 876                 if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
 877                         /*
 878                          * Annulled branch always instructions never execute
 879                          * the instruction in the delay slot.
 880                          */
 881                         pc = tp->ftt_dest;
 882                         npc = tp->ftt_dest + 4;
 883                 } else {
 884                         pc = rp->r_npc;
 885                         npc = tp->ftt_dest;
 886                 }
 887                 break;
 888 
 889         case FASTTRAP_T_RDPC:
 890                 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
 891                 pc = rp->r_npc;
 892                 npc = pc + 4;
 893                 break;
 894 
 895         case FASTTRAP_T_CALL:
 896                 /*
 897                  * It's a call _and_ link remember...
 898                  */
 899                 rp->r_o7 = rp->r_pc;
 900                 pc = rp->r_npc;
 901                 npc = tp->ftt_dest;
 902                 break;
 903 
 904         case FASTTRAP_T_JMPL:
 905                 pc = rp->r_npc;
 906 
 907                 if (I(tp->ftt_instr)) {
 908                         uint_t rs1 = RS1(tp->ftt_instr);
 909                         int32_t imm;
 910 
 911                         imm = tp->ftt_instr << 19;
 912                         imm >>= 19;
 913                         npc = fasttrap_getreg(rp, rs1) + imm;
 914                 } else {
 915                         uint_t rs1 = RS1(tp->ftt_instr);
 916                         uint_t rs2 = RS2(tp->ftt_instr);
 917 
 918                         npc = fasttrap_getreg(rp, rs1) +
 919                             fasttrap_getreg(rp, rs2);
 920                 }
 921 
 922                 /*
 923                  * Do the link part of the jump-and-link instruction.
 924                  */
 925                 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
 926 
 927                 break;
 928 
 929         case FASTTRAP_T_COMMON:
 930         {
 931                 curthread->t_dtrace_scrpc = rp->r_g7;
 932                 curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET;
 933 
 934                 /*
 935                  * Copy the instruction to a reserved location in the
 936                  * user-land thread structure, then set the PC to that
 937                  * location and leave the NPC alone. We take pains to ensure
 938                  * consistency in the instruction stream (See SPARC
 939                  * Architecture Manual Version 9, sections 8.4.7, A.20, and
 940                  * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1,
 941                  * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the
 942                  * instruction into the user's address space without
 943                  * bypassing the I$. There's no AS_USER version of this ASI
 944                  * (as exist for other ASIs) so we use the lofault
 945                  * mechanism to catch faults.
 946                  */
 947                 if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) {
 948                         /*
 949                          * If the copyout fails, then the process's state
 950                          * is not consistent (the effects of the traced
 951                          * instruction will never be seen). This process
 952                          * cannot be allowed to continue execution.
 953                          */
 954                         fasttrap_sigtrap(curproc, curthread, pc);
 955                         return (0);
 956                 }
 957 
 958                 curthread->t_dtrace_pc = pc;
 959                 curthread->t_dtrace_npc = npc;
 960                 curthread->t_dtrace_on = 1;
 961 
 962                 pc = curthread->t_dtrace_scrpc;
 963 
 964                 if (tp->ftt_retids != NULL) {
 965                         curthread->t_dtrace_step = 1;
 966                         curthread->t_dtrace_ret = 1;
 967                         npc = curthread->t_dtrace_astpc;
 968                 }
 969                 break;
 970         }
 971 
 972         default:
 973                 panic("fasttrap: mishandled an instruction");
 974         }
 975 
 976         /*
 977          * This bit me in the ass a couple of times, so lets toss this
 978          * in as a cursory sanity check.
 979          */
 980         ASSERT(pc != rp->r_g7 + 4);
 981         ASSERT(pc != rp->r_g7 + 8);
 982 
 983 done:
 984         /*
 985          * If there were no return probes when we first found the tracepoint,
 986          * we should feel no obligation to honor any return probes that were
 987          * subsequently enabled -- they'll just have to wait until the next
 988          * time around.
 989          */
 990         if (tp->ftt_retids != NULL) {
 991                 /*
 992                  * We need to wait until the results of the instruction are
 993                  * apparent before invoking any return probes. If this
 994                  * instruction was emulated we can just call
 995                  * fasttrap_return_common(); if it needs to be executed, we
 996                  * need to wait until we return to the kernel.
 997                  */
 998                 if (tp->ftt_type != FASTTRAP_T_COMMON) {
 999                         fasttrap_return_common(rp, orig_pc, pid, fake_restore);
1000                 } else {
1001                         ASSERT(curthread->t_dtrace_ret != 0);
1002                         ASSERT(curthread->t_dtrace_pc == orig_pc);
1003                         ASSERT(curthread->t_dtrace_scrpc == rp->r_g7);
1004                         ASSERT(npc == curthread->t_dtrace_astpc);
1005                 }
1006         }
1007 
1008         ASSERT(pc != 0);
1009         rp->r_pc = pc;
1010         rp->r_npc = npc;
1011 
1012         return (0);
1013 }
1014 
1015 int
1016 fasttrap_return_probe(struct regs *rp)
1017 {
1018         proc_t *p = ttoproc(curthread);
1019         pid_t pid;
1020         uintptr_t pc = curthread->t_dtrace_pc;
1021         uintptr_t npc = curthread->t_dtrace_npc;
1022 
1023         curthread->t_dtrace_pc = 0;
1024         curthread->t_dtrace_npc = 0;
1025         curthread->t_dtrace_scrpc = 0;
1026         curthread->t_dtrace_astpc = 0;
1027 
1028         /*
1029          * Treat a child created by a call to vfork(2) as if it were its
1030          * parent. We know there's only one thread of control in such a
1031          * process: this one.
1032          */
1033         while (p->p_flag & SVFORK) {
1034                 p = p->p_parent;
1035         }
1036 
1037         /*
1038          * We set the %pc and %npc to their values when the traced
1039          * instruction was initially executed so that it appears to
1040          * dtrace_probe() that we're on the original instruction, and so that
1041          * the user can't easily detect our complex web of lies.
1042          * dtrace_return_probe() (our caller) will correctly set %pc and %npc
1043          * after we return.
1044          */
1045         rp->r_pc = pc;
1046         rp->r_npc = npc;
1047 
1048         pid = p->p_pid;
1049         fasttrap_return_common(rp, pc, pid, 0);
1050 
1051         return (0);
1052 }
1053 
1054 int
1055 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
1056 {
1057         fasttrap_instr_t instr = FASTTRAP_INSTR;
1058 
1059         if (uwrite(p, &instr, 4, tp->ftt_pc) != 0)
1060                 return (-1);
1061 
1062         return (0);
1063 }
1064 
1065 int
1066 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
1067 {
1068         fasttrap_instr_t instr;
1069 
1070         /*
1071          * Distinguish between read or write failures and a changed
1072          * instruction.
1073          */
1074         if (uread(p, &instr, 4, tp->ftt_pc) != 0)
1075                 return (0);
1076         if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR)
1077                 return (0);
1078         if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0)
1079                 return (-1);
1080 
1081         return (0);
1082 }
1083 
1084 int
1085 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
1086     fasttrap_probe_type_t type)
1087 {
1088         uint32_t instr;
1089         int32_t disp;
1090 
1091         /*
1092          * Read the instruction at the given address out of the process's
1093          * address space. We don't have to worry about a debugger
1094          * changing this instruction before we overwrite it with our trap
1095          * instruction since P_PR_LOCK is set.
1096          */
1097         if (uread(p, &instr, 4, pc) != 0)
1098                 return (-1);
1099 
1100         /*
1101          * Decode the instruction to fill in the probe flags. We can have
1102          * the process execute most instructions on its own using a pc/npc
1103          * trick, but pc-relative control transfer present a problem since
1104          * we're relocating the instruction. We emulate these instructions
1105          * in the kernel. We assume a default type and over-write that as
1106          * needed.
1107          *
1108          * pc-relative instructions must be emulated for correctness;
1109          * other instructions (which represent a large set of commonly traced
1110          * instructions) are emulated or otherwise optimized for performance.
1111          */
1112         tp->ftt_type = FASTTRAP_T_COMMON;
1113         if (OP(instr) == 1) {
1114                 /*
1115                  * Call instructions.
1116                  */
1117                 tp->ftt_type = FASTTRAP_T_CALL;
1118                 disp = DISP30(instr) << 2;
1119                 tp->ftt_dest = pc + (intptr_t)disp;
1120 
1121         } else if (OP(instr) == 0) {
1122                 /*
1123                  * Branch instructions.
1124                  *
1125                  * Unconditional branches need careful attention when they're
1126                  * annulled: annulled unconditional branches never execute
1127                  * the instruction in the delay slot.
1128                  */
1129                 switch (OP2(instr)) {
1130                 case OP2_ILLTRAP:
1131                 case 0x7:
1132                         /*
1133                          * The compiler may place an illtrap after a call to
1134                          * a function that returns a structure. In the case of
1135                          * a returned structure, the compiler places an illtrap
1136                          * whose const22 field is the size of the returned
1137                          * structure immediately following the delay slot of
1138                          * the call. To stay out of the way, we refuse to
1139                          * place tracepoints on top of illtrap instructions.
1140                          *
1141                          * This is one of the dumbest architectural decisions
1142                          * I've ever had to work around.
1143                          *
1144                          * We also identify the only illegal op2 value (See
1145                          * SPARC Architecture Manual Version 9, E.2 table 31).
1146                          */
1147                         return (-1);
1148 
1149                 case OP2_BPcc:
1150                         if (COND(instr) == 8) {
1151                                 tp->ftt_type = FASTTRAP_T_ALWAYS;
1152                         } else {
1153                                 /*
1154                                  * Check for an illegal instruction.
1155                                  */
1156                                 if (CC(instr) & 1)
1157                                         return (-1);
1158                                 tp->ftt_type = FASTTRAP_T_CCR;
1159                                 tp->ftt_cc = CC(instr);
1160                                 tp->ftt_code = COND(instr);
1161                         }
1162 
1163                         if (A(instr) != 0)
1164                                 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1165 
1166                         disp = DISP19(instr);
1167                         disp <<= 13;
1168                         disp >>= 11;
1169                         tp->ftt_dest = pc + (intptr_t)disp;
1170                         break;
1171 
1172                 case OP2_Bicc:
1173                         if (COND(instr) == 8) {
1174                                 tp->ftt_type = FASTTRAP_T_ALWAYS;
1175                         } else {
1176                                 tp->ftt_type = FASTTRAP_T_CCR;
1177                                 tp->ftt_cc = 0;
1178                                 tp->ftt_code = COND(instr);
1179                         }
1180 
1181                         if (A(instr) != 0)
1182                                 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1183 
1184                         disp = DISP22(instr);
1185                         disp <<= 10;
1186                         disp >>= 8;
1187                         tp->ftt_dest = pc + (intptr_t)disp;
1188                         break;
1189 
1190                 case OP2_BPr:
1191                         /*
1192                          * Check for an illegal instruction.
1193                          */
1194                         if ((RCOND(instr) & 3) == 0)
1195                                 return (-1);
1196 
1197                         /*
1198                          * It's a violation of the v8plus ABI to use a
1199                          * register-predicated branch in a 32-bit app if
1200                          * the register used is an %l or an %i (%gs and %os
1201                          * are legit because they're not saved to the stack
1202                          * in 32-bit words when we take a trap).
1203                          */
1204                         if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16)
1205                                 return (-1);
1206 
1207                         tp->ftt_type = FASTTRAP_T_REG;
1208                         if (A(instr) != 0)
1209                                 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1210                         disp = DISP16(instr);
1211                         disp <<= 16;
1212                         disp >>= 14;
1213                         tp->ftt_dest = pc + (intptr_t)disp;
1214                         tp->ftt_code = RCOND(instr);
1215                         break;
1216 
1217                 case OP2_SETHI:
1218                         tp->ftt_type = FASTTRAP_T_SETHI;
1219                         break;
1220 
1221                 case OP2_FBPfcc:
1222                         if (COND(instr) == 8) {
1223                                 tp->ftt_type = FASTTRAP_T_ALWAYS;
1224                         } else {
1225                                 tp->ftt_type = FASTTRAP_T_FCC;
1226                                 tp->ftt_cc = CC(instr);
1227                                 tp->ftt_code = COND(instr);
1228                         }
1229 
1230                         if (A(instr) != 0)
1231                                 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1232 
1233                         disp = DISP19(instr);
1234                         disp <<= 13;
1235                         disp >>= 11;
1236                         tp->ftt_dest = pc + (intptr_t)disp;
1237                         break;
1238 
1239                 case OP2_FBfcc:
1240                         if (COND(instr) == 8) {
1241                                 tp->ftt_type = FASTTRAP_T_ALWAYS;
1242                         } else {
1243                                 tp->ftt_type = FASTTRAP_T_FCC;
1244                                 tp->ftt_cc = 0;
1245                                 tp->ftt_code = COND(instr);
1246                         }
1247 
1248                         if (A(instr) != 0)
1249                                 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1250 
1251                         disp = DISP22(instr);
1252                         disp <<= 10;
1253                         disp >>= 8;
1254                         tp->ftt_dest = pc + (intptr_t)disp;
1255                         break;
1256                 }
1257 
1258         } else if (OP(instr) == 2) {
1259                 switch (OP3(instr)) {
1260                 case OP3_RETURN:
1261                         tp->ftt_type = FASTTRAP_T_RETURN;
1262                         break;
1263 
1264                 case OP3_JMPL:
1265                         tp->ftt_type = FASTTRAP_T_JMPL;
1266                         break;
1267 
1268                 case OP3_RD:
1269                         if (RS1(instr) == 5)
1270                                 tp->ftt_type = FASTTRAP_T_RDPC;
1271                         break;
1272 
1273                 case OP3_SAVE:
1274                         /*
1275                          * We optimize for save instructions at function
1276                          * entry; see the comment in fasttrap_pid_probe()
1277                          * (near FASTTRAP_T_SAVE) for details.
1278                          */
1279                         if (fasttrap_optimize_save != 0 &&
1280                             type == DTFTP_ENTRY &&
1281                             I(instr) == 1 && RD(instr) == R_SP)
1282                                 tp->ftt_type = FASTTRAP_T_SAVE;
1283                         break;
1284 
1285                 case OP3_RESTORE:
1286                         /*
1287                          * We optimize restore instructions at function
1288                          * return; see the comment in fasttrap_pid_probe()
1289                          * (near FASTTRAP_T_RESTORE) for details.
1290                          *
1291                          * rd must be an %o or %g register.
1292                          */
1293                         if ((RD(instr) & 0x10) == 0)
1294                                 tp->ftt_type = FASTTRAP_T_RESTORE;
1295                         break;
1296 
1297                 case OP3_OR:
1298                         /*
1299                          * A large proportion of instructions in the delay
1300                          * slot of retl instructions are or's so we emulate
1301                          * these downstairs as an optimization.
1302                          */
1303                         tp->ftt_type = FASTTRAP_T_OR;
1304                         break;
1305 
1306                 case OP3_TCC:
1307                         /*
1308                          * Breakpoint instructions are effectively position-
1309                          * dependent since the debugger uses the %pc value
1310                          * to lookup which breakpoint was executed. As a
1311                          * result, we can't actually instrument breakpoints.
1312                          */
1313                         if (SW_TRAP(instr) == ST_BREAKPOINT)
1314                                 return (-1);
1315                         break;
1316 
1317                 case 0x19:
1318                 case 0x1d:
1319                 case 0x29:
1320                 case 0x33:
1321                 case 0x3f:
1322                         /*
1323                          * Identify illegal instructions (See SPARC
1324                          * Architecture Manual Version 9, E.2 table 32).
1325                          */
1326                         return (-1);
1327                 }
1328         } else if (OP(instr) == 3) {
1329                 uint32_t op3 = OP3(instr);
1330 
1331                 /*
1332                  * Identify illegal instructions (See SPARC Architecture
1333                  * Manual Version 9, E.2 table 33).
1334                  */
1335                 if ((op3 & 0x28) == 0x28) {
1336                         if (op3 != OP3_PREFETCH && op3 != OP3_CASA &&
1337                             op3 != OP3_PREFETCHA && op3 != OP3_CASXA)
1338                                 return (-1);
1339                 } else {
1340                         if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31)
1341                                 return (-1);
1342                 }
1343         }
1344 
1345         tp->ftt_instr = instr;
1346 
1347         /*
1348          * We don't know how this tracepoint is going to be used, but in case
1349          * it's used as part of a function return probe, we need to indicate
1350          * whether it's always a return site or only potentially a return
1351          * site. If it's part of a return probe, it's always going to be a
1352          * return from that function if it's a restore instruction or if
1353          * the previous instruction was a return. If we could reliably
1354          * distinguish jump tables from return sites, this wouldn't be
1355          * necessary.
1356          */
1357         if (tp->ftt_type != FASTTRAP_T_RESTORE &&
1358             (uread(p, &instr, 4, pc - sizeof (instr)) != 0 ||
1359             !(OP(instr) == 2 && OP3(instr) == OP3_RETURN)))
1360                 tp->ftt_flags |= FASTTRAP_F_RETMAYBE;
1361 
1362         return (0);
1363 }
1364 
1365 /*ARGSUSED*/
1366 uint64_t
1367 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1368     int aframes)
1369 {
1370         return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1371 }
1372 
1373 /*ARGSUSED*/
1374 uint64_t
1375 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1376     int aframes)
1377 {
1378         return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1379 }
1380 
1381 static uint64_t fasttrap_getreg_fast_cnt;
1382 static uint64_t fasttrap_getreg_mpcb_cnt;
1383 static uint64_t fasttrap_getreg_slow_cnt;
1384 
1385 static ulong_t
1386 fasttrap_getreg(struct regs *rp, uint_t reg)
1387 {
1388         ulong_t value;
1389         dtrace_icookie_t cookie;
1390         struct machpcb *mpcb;
1391         extern ulong_t dtrace_getreg_win(uint_t, uint_t);
1392 
1393         /*
1394          * We have the %os and %gs in our struct regs, but if we need to
1395          * snag a %l or %i we need to go scrounging around in the process's
1396          * address space.
1397          */
1398         if (reg == 0)
1399                 return (0);
1400 
1401         if (reg < 16)
1402                 return ((&rp->r_g1)[reg - 1]);
1403 
1404         /*
1405          * Before we look at the user's stack, we'll check the register
1406          * windows to see if the information we want is in there.
1407          */
1408         cookie = dtrace_interrupt_disable();
1409         if (dtrace_getotherwin() > 0) {
1410                 value = dtrace_getreg_win(reg, 1);
1411                 dtrace_interrupt_enable(cookie);
1412 
1413                 atomic_add_64(&fasttrap_getreg_fast_cnt, 1);
1414 
1415                 return (value);
1416         }
1417         dtrace_interrupt_enable(cookie);
1418 
1419         /*
1420          * First check the machpcb structure to see if we've already read
1421          * in the register window we're looking for; if we haven't, (and
1422          * we probably haven't) try to copy in the value of the register.
1423          */
1424         /* LINTED - alignment */
1425         mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1426 
1427         if (get_udatamodel() == DATAMODEL_NATIVE) {
1428                 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1429 
1430                 if (mpcb->mpcb_wbcnt > 0) {
1431                         struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
1432                         int i = mpcb->mpcb_wbcnt;
1433                         do {
1434                                 i--;
1435                                 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1436                                         continue;
1437 
1438                                 atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1);
1439                                 return (rwin[i].rw_local[reg - 16]);
1440                         } while (i > 0);
1441                 }
1442 
1443                 if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0)
1444                         goto err;
1445         } else {
1446                 struct frame32 *fr =
1447                     (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1448                 uint32_t *v32 = (uint32_t *)&value;
1449 
1450                 if (mpcb->mpcb_wbcnt > 0) {
1451                         struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
1452                         int i = mpcb->mpcb_wbcnt;
1453                         do {
1454                                 i--;
1455                                 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1456                                         continue;
1457 
1458                                 atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1);
1459                                 return (rwin[i].rw_local[reg - 16]);
1460                         } while (i > 0);
1461                 }
1462 
1463                 if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0)
1464                         goto err;
1465 
1466                 v32[0] = 0;
1467         }
1468 
1469         atomic_add_64(&fasttrap_getreg_slow_cnt, 1);
1470         return (value);
1471 
1472 err:
1473         /*
1474          * If the copy in failed, the process will be in a irrecoverable
1475          * state, and we have no choice but to kill it.
1476          */
1477         psignal(ttoproc(curthread), SIGILL);
1478         return (0);
1479 }
1480 
1481 static uint64_t fasttrap_putreg_fast_cnt;
1482 static uint64_t fasttrap_putreg_mpcb_cnt;
1483 static uint64_t fasttrap_putreg_slow_cnt;
1484 
1485 static void
1486 fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value)
1487 {
1488         dtrace_icookie_t cookie;
1489         struct machpcb *mpcb;
1490         extern void dtrace_putreg_win(uint_t, ulong_t);
1491 
1492         if (reg == 0)
1493                 return;
1494 
1495         if (reg < 16) {
1496                 (&rp->r_g1)[reg - 1] = value;
1497                 return;
1498         }
1499 
1500         /*
1501          * If the user process is still using some register windows, we
1502          * can just place the value in the correct window.
1503          */
1504         cookie = dtrace_interrupt_disable();
1505         if (dtrace_getotherwin() > 0) {
1506                 dtrace_putreg_win(reg, value);
1507                 dtrace_interrupt_enable(cookie);
1508                 atomic_add_64(&fasttrap_putreg_fast_cnt, 1);
1509                 return;
1510         }
1511         dtrace_interrupt_enable(cookie);
1512 
1513         /*
1514          * First see if there's a copy of the register window in the
1515          * machpcb structure that we can modify; if there isn't try to
1516          * copy out the value. If that fails, we try to create a new
1517          * register window in the machpcb structure. While this isn't
1518          * _precisely_ the intended use of the machpcb structure, it
1519          * can't cause any problems since we know at this point in the
1520          * code that all of the user's data have been flushed out of the
1521          * register file (since %otherwin is 0).
1522          */
1523         /* LINTED - alignment */
1524         mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1525 
1526         if (get_udatamodel() == DATAMODEL_NATIVE) {
1527                 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1528                 /* LINTED - alignment */
1529                 struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf;
1530 
1531                 if (mpcb->mpcb_wbcnt > 0) {
1532                         int i = mpcb->mpcb_wbcnt;
1533                         do {
1534                                 i--;
1535                                 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1536                                         continue;
1537 
1538                                 rwin[i].rw_local[reg - 16] = value;
1539                                 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1540                                 return;
1541                         } while (i > 0);
1542                 }
1543 
1544                 if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) {
1545                         if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1546                             &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1547                                 goto err;
1548 
1549                         rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value;
1550                         mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1551                         mpcb->mpcb_wbcnt++;
1552                         atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1553                         return;
1554                 }
1555         } else {
1556                 struct frame32 *fr =
1557                     (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1558                 /* LINTED - alignment */
1559                 struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf;
1560                 uint32_t v32 = (uint32_t)value;
1561 
1562                 if (mpcb->mpcb_wbcnt > 0) {
1563                         int i = mpcb->mpcb_wbcnt;
1564                         do {
1565                                 i--;
1566                                 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1567                                         continue;
1568 
1569                                 rwin[i].rw_local[reg - 16] = v32;
1570                                 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1571                                 return;
1572                         } while (i > 0);
1573                 }
1574 
1575                 if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) {
1576                         if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1577                             &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1578                                 goto err;
1579 
1580                         rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32;
1581                         mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1582                         mpcb->mpcb_wbcnt++;
1583                         atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1584                         return;
1585                 }
1586         }
1587 
1588         atomic_add_64(&fasttrap_putreg_slow_cnt, 1);
1589         return;
1590 
1591 err:
1592         /*
1593          * If we couldn't record this register's value, the process is in an
1594          * irrecoverable state and we have no choice but to euthanize it.
1595          */
1596         psignal(ttoproc(curthread), SIGILL);
1597 }