1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/fasttrap_isa.h>
  28 #include <sys/fasttrap_impl.h>
  29 #include <sys/dtrace.h>
  30 #include <sys/dtrace_impl.h>
  31 #include <sys/cmn_err.h>
  32 #include <sys/frame.h>
  33 #include <sys/stack.h>
  34 #include <sys/sysmacros.h>
  35 #include <sys/trap.h>
  36 
  37 #include <v9/sys/machpcb.h>
  38 #include <v9/sys/privregs.h>
  39 
  40 /*
  41  * Lossless User-Land Tracing on SPARC
  42  * -----------------------------------
  43  *
  44  * The Basic Idea
  45  *
  46  * The most important design constraint is, of course, correct execution of
  47  * the user thread above all else. The next most important goal is rapid
  48  * execution. We combine execution of instructions in user-land with
  49  * emulation of certain instructions in the kernel to aim for complete
  50  * correctness and maximal performance.
  51  *
  52  * We take advantage of the split PC/NPC architecture to speed up logical
  53  * single-stepping; when we copy an instruction out to the scratch space in
  54  * the ulwp_t structure (held in the %g7 register on SPARC), we can
  55  * effectively single step by setting the PC to our scratch space and leaving
  56  * the NPC alone. This executes the replaced instruction and then continues
  57  * on without having to reenter the kernel as with single- stepping. The
  58  * obvious caveat is for instructions whose execution is PC dependant --
  59  * branches, call and link instructions (call and jmpl), and the rdpc
  60  * instruction. These instructions cannot be executed in the manner described
  61  * so they must be emulated in the kernel.
  62  *
  63  * Emulation for this small set of instructions if fairly simple; the most
  64  * difficult part being emulating branch conditions.
  65  *
  66  *
  67  * A Cache Heavy Portfolio
  68  *
  69  * It's important to note at this time that copying an instruction out to the
  70  * ulwp_t scratch space in user-land is rather complicated. SPARC has
  71  * separate data and instruction caches so any writes to the D$ (using a
  72  * store instruction for example) aren't necessarily reflected in the I$.
  73  * The flush instruction can be used to synchronize the two and must be used
  74  * for any self-modifying code, but the flush instruction only applies to the
  75  * primary address space (the absence of a flusha analogue to the flush
  76  * instruction that accepts an ASI argument is an obvious omission from SPARC
  77  * v9 where the notion of the alternate address space was introduced on
  78  * SPARC). To correctly copy out the instruction we must use a block store
  79  * that doesn't allocate in the D$ and ensures synchronization with the I$;
  80  * see dtrace_blksuword32() for the implementation  (this function uses
  81  * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner
  82  * described). Refer to the UltraSPARC I/II manual for details on the
  83  * ASI_BLK_COMMIT_S ASI.
  84  *
  85  *
  86  * Return Subtleties
  87  *
  88  * When we're firing a return probe we need to expose the value returned by
  89  * the function being traced. Since the function can set the return value
  90  * in its last instruction, we need to fire the return probe only _after_
  91  * the effects of the instruction are apparent. For instructions that we
  92  * emulate, we can call dtrace_probe() after we've performed the emulation;
  93  * for instructions that we execute after we return to user-land, we set
  94  * %pc to the instruction we copied out (as described above) and set %npc
  95  * to a trap instruction stashed in the ulwp_t structure. After the traced
  96  * instruction is executed, the trap instruction returns control to the
  97  * kernel where we can fire the return probe.
  98  *
  99  * This need for a second trap in cases where we execute the traced
 100  * instruction makes it all the more important to emulate the most common
 101  * instructions to avoid the second trip in and out of the kernel.
 102  *
 103  *
 104  * Making it Fast
 105  *
 106  * Since copying out an instruction is neither simple nor inexpensive for the
 107  * CPU, we should attempt to avoid doing it in as many cases as possible.
 108  * Since function entry and return are usually the most interesting probe
 109  * sites, we attempt to tune the performance of the fasttrap provider around
 110  * instructions typically in those places.
 111  *
 112  * Looking at a bunch of functions in libraries and executables reveals that
 113  * most functions begin with either a save or a sethi (to setup a larger
 114  * argument to the save) and end with a restore or an or (in the case of leaf
 115  * functions). To try to improve performance, we emulate all of these
 116  * instructions in the kernel.
 117  *
 118  * The save and restore instructions are a little tricky since they perform
 119  * register window maniplulation. Rather than trying to tinker with the
 120  * register windows from the kernel, we emulate the implicit add that takes
 121  * place as part of those instructions and set the %pc to point to a simple
 122  * save or restore we've hidden in the ulwp_t structure. If we're in a return
 123  * probe so want to make it seem as though the tracepoint has been completely
 124  * executed we need to remember that we've pulled this trick with restore and
 125  * pull registers from the previous window (the one that we'll switch to once
 126  * the simple store instruction is executed) rather than the current one. This
 127  * is why in the case of emulating a restore we set the DTrace CPU flag
 128  * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes
 129  * (see fasttrap_return_common()).
 130  */
 131 
 132 #define OP(x)           ((x) >> 30)
 133 #define OP2(x)          (((x) >> 22) & 0x07)
 134 #define OP3(x)          (((x) >> 19) & 0x3f)
 135 #define RCOND(x)        (((x) >> 25) & 0x07)
 136 #define COND(x)         (((x) >> 25) & 0x0f)
 137 #define A(x)            (((x) >> 29) & 0x01)
 138 #define I(x)            (((x) >> 13) & 0x01)
 139 #define RD(x)           (((x) >> 25) & 0x1f)
 140 #define RS1(x)          (((x) >> 14) & 0x1f)
 141 #define RS2(x)          (((x) >> 0) & 0x1f)
 142 #define CC(x)           (((x) >> 20) & 0x03)
 143 #define DISP16(x)       ((((x) >> 6) & 0xc000) | ((x) & 0x3fff))
 144 #define DISP22(x)       ((x) & 0x3fffff)
 145 #define DISP19(x)       ((x) & 0x7ffff)
 146 #define DISP30(x)       ((x) & 0x3fffffff)
 147 #define SW_TRAP(x)      ((x) & 0x7f)
 148 
 149 #define OP3_OR          0x02
 150 #define OP3_RD          0x28
 151 #define OP3_JMPL        0x38
 152 #define OP3_RETURN      0x39
 153 #define OP3_TCC         0x3a
 154 #define OP3_SAVE        0x3c
 155 #define OP3_RESTORE     0x3d
 156 
 157 #define OP3_PREFETCH    0x2d
 158 #define OP3_CASA        0x3c
 159 #define OP3_PREFETCHA   0x3d
 160 #define OP3_CASXA       0x3e
 161 
 162 #define OP2_ILLTRAP     0x0
 163 #define OP2_BPcc        0x1
 164 #define OP2_Bicc        0x2
 165 #define OP2_BPr         0x3
 166 #define OP2_SETHI       0x4
 167 #define OP2_FBPfcc      0x5
 168 #define OP2_FBfcc       0x6
 169 
 170 #define R_G0            0
 171 #define R_O0            8
 172 #define R_SP            14
 173 #define R_I0            24
 174 #define R_I1            25
 175 #define R_I2            26
 176 #define R_I3            27
 177 #define R_I4            28
 178 
 179 /*
 180  * Check the comment in fasttrap.h when changing these offsets or adding
 181  * new instructions.
 182  */
 183 #define FASTTRAP_OFF_SAVE       64
 184 #define FASTTRAP_OFF_RESTORE    68
 185 #define FASTTRAP_OFF_FTRET      72
 186 #define FASTTRAP_OFF_RETURN     76
 187 
 188 #define BREAKPOINT_INSTR        0x91d02001      /* ta 1 */
 189 
 190 /*
 191  * Tunable to let users turn off the fancy save instruction optimization.
 192  * If a program is non-ABI compliant, there's a possibility that the save
 193  * instruction optimization could cause an error.
 194  */
 195 int fasttrap_optimize_save = 1;
 196 
 197 static uint64_t
 198 fasttrap_anarg(struct regs *rp, int argno)
 199 {
 200         uint64_t value;
 201 
 202         if (argno < 6)
 203                 return ((&rp->r_o0)[argno]);
 204 
 205         if (curproc->p_model == DATAMODEL_NATIVE) {
 206                 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
 207 
 208                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 209                 value = dtrace_fulword(&fr->fr_argd[argno]);
 210                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
 211                     CPU_DTRACE_BADALIGN);
 212         } else {
 213                 struct frame32 *fr = (struct frame32 *)rp->r_sp;
 214 
 215                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 216                 value = dtrace_fuword32(&fr->fr_argd[argno]);
 217                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
 218                     CPU_DTRACE_BADALIGN);
 219         }
 220 
 221         return (value);
 222 }
 223 
 224 static ulong_t fasttrap_getreg(struct regs *, uint_t);
 225 static void fasttrap_putreg(struct regs *, uint_t, ulong_t);
 226 
 227 static void
 228 fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp,
 229     uint_t fake_restore, int argc, uintptr_t *argv)
 230 {
 231         int i, x, cap = MIN(argc, probe->ftp_nargs);
 232         int inc = (fake_restore ? 16 : 0);
 233 
 234         /*
 235          * The only way we'll hit the fake_restore case is if a USDT probe is
 236          * invoked as a tail-call. While it wouldn't be incorrect, we can
 237          * avoid a call to fasttrap_getreg(), and safely use rp->r_sp
 238          * directly since a tail-call can't be made if the invoked function
 239          * would use the argument dump space (i.e. if there were more than
 240          * 6 arguments). We take this shortcut because unconditionally rooting
 241          * around for R_FP (R_SP + 16) would be unnecessarily painful.
 242          */
 243 
 244         if (curproc->p_model == DATAMODEL_NATIVE) {
 245                 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
 246                 uintptr_t v;
 247 
 248                 for (i = 0; i < cap; i++) {
 249                         x = probe->ftp_argmap[i];
 250 
 251                         if (x < 6)
 252                                 argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
 253                         else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0)
 254                                 argv[i] = 0;
 255                 }
 256 
 257         } else {
 258                 struct frame32 *fr = (struct frame32 *)rp->r_sp;
 259                 uint32_t v;
 260 
 261                 for (i = 0; i < cap; i++) {
 262                         x = probe->ftp_argmap[i];
 263 
 264                         if (x < 6)
 265                                 argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
 266                         else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0)
 267                                 argv[i] = 0;
 268                 }
 269         }
 270 
 271         for (; i < argc; i++) {
 272                 argv[i] = 0;
 273         }
 274 }
 275 
 276 static void
 277 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
 278     uint_t fake_restore)
 279 {
 280         fasttrap_tracepoint_t *tp;
 281         fasttrap_bucket_t *bucket;
 282         fasttrap_id_t *id;
 283         kmutex_t *pid_mtx;
 284         dtrace_icookie_t cookie;
 285 
 286         pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
 287         mutex_enter(pid_mtx);
 288         bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
 289 
 290         for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 291                 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
 292                     tp->ftt_proc->ftpc_acount != 0)
 293                         break;
 294         }
 295 
 296         /*
 297          * Don't sweat it if we can't find the tracepoint again; unlike
 298          * when we're in fasttrap_pid_probe(), finding the tracepoint here
 299          * is not essential to the correct execution of the process.
 300          */
 301         if (tp == NULL || tp->ftt_retids == NULL) {
 302                 mutex_exit(pid_mtx);
 303                 return;
 304         }
 305 
 306         for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
 307                 fasttrap_probe_t *probe = id->fti_probe;
 308 
 309                 if (id->fti_ptype == DTFTP_POST_OFFSETS) {
 310                         if (probe->ftp_argmap != NULL && fake_restore) {
 311                                 uintptr_t t[5];
 312 
 313                                 fasttrap_usdt_args(probe, rp, fake_restore,
 314                                     sizeof (t) / sizeof (t[0]), t);
 315 
 316                                 cookie = dtrace_interrupt_disable();
 317                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
 318                                 dtrace_probe(probe->ftp_id, t[0], t[1],
 319                                     t[2], t[3], t[4]);
 320                                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
 321                                 dtrace_interrupt_enable(cookie);
 322 
 323                         } else if (probe->ftp_argmap != NULL) {
 324                                 uintptr_t t[5];
 325 
 326                                 fasttrap_usdt_args(probe, rp, fake_restore,
 327                                     sizeof (t) / sizeof (t[0]), t);
 328 
 329                                 dtrace_probe(probe->ftp_id, t[0], t[1],
 330                                     t[2], t[3], t[4]);
 331 
 332                         } else if (fake_restore) {
 333                                 uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
 334                                 uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
 335                                 uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
 336                                 uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
 337                                 uintptr_t arg4 = fasttrap_getreg(rp, R_I4);
 338 
 339                                 cookie = dtrace_interrupt_disable();
 340                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
 341                                 dtrace_probe(probe->ftp_id, arg0, arg1,
 342                                     arg2, arg3, arg4);
 343                                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
 344                                 dtrace_interrupt_enable(cookie);
 345 
 346                         } else {
 347                                 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1,
 348                                     rp->r_o2, rp->r_o3, rp->r_o4);
 349                         }
 350 
 351                         continue;
 352                 }
 353 
 354                 /*
 355                  * If this is only a possible return point, we must
 356                  * be looking at a potential tail call in leaf context.
 357                  * If the %npc is still within this function, then we
 358                  * must have misidentified a jmpl as a tail-call when it
 359                  * is, in fact, part of a jump table. It would be nice to
 360                  * remove this tracepoint, but this is neither the time
 361                  * nor the place.
 362                  */
 363                 if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) &&
 364                     rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
 365                         continue;
 366 
 367                 /*
 368                  * It's possible for a function to branch to the delay slot
 369                  * of an instruction that we've identified as a return site.
 370                  * We can dectect this spurious return probe activation by
 371                  * observing that in this case %npc will be %pc + 4 and %npc
 372                  * will be inside the current function (unless the user is
 373                  * doing _crazy_ instruction picking in which case there's
 374                  * very little we can do). The second check is important
 375                  * in case the last instructions of a function make a tail-
 376                  * call to the function located immediately subsequent.
 377                  */
 378                 if (rp->r_npc == rp->r_pc + 4 &&
 379                     rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
 380                         continue;
 381 
 382                 /*
 383                  * The first argument is the offset of return tracepoint
 384                  * in the function; the remaining arguments are the return
 385                  * values.
 386                  *
 387                  * If fake_restore is set, we need to pull the return values
 388                  * out of the %i's rather than the %o's -- a little trickier.
 389                  */
 390                 if (!fake_restore) {
 391                         dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
 392                             rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3);
 393                 } else {
 394                         uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
 395                         uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
 396                         uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
 397                         uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
 398 
 399                         cookie = dtrace_interrupt_disable();
 400                         DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
 401                         dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
 402                             arg0, arg1, arg2, arg3);
 403                         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
 404                         dtrace_interrupt_enable(cookie);
 405                 }
 406         }
 407 
 408         mutex_exit(pid_mtx);
 409 }
 410 
 411 int
 412 fasttrap_pid_probe(struct regs *rp)
 413 {
 414         proc_t *p = curproc;
 415         fasttrap_tracepoint_t *tp, tp_local;
 416         fasttrap_id_t *id;
 417         pid_t pid;
 418         uintptr_t pc = rp->r_pc;
 419         uintptr_t npc = rp->r_npc;
 420         uintptr_t orig_pc = pc;
 421         fasttrap_bucket_t *bucket;
 422         kmutex_t *pid_mtx;
 423         uint_t fake_restore = 0, is_enabled = 0;
 424         dtrace_icookie_t cookie;
 425 
 426         /*
 427          * It's possible that a user (in a veritable orgy of bad planning)
 428          * could redirect this thread's flow of control before it reached the
 429          * return probe fasttrap. In this case we need to kill the process
 430          * since it's in a unrecoverable state.
 431          */
 432         if (curthread->t_dtrace_step) {
 433                 ASSERT(curthread->t_dtrace_on);
 434                 fasttrap_sigtrap(p, curthread, pc);
 435                 return (0);
 436         }
 437 
 438         /*
 439          * Clear all user tracing flags.
 440          */
 441         curthread->t_dtrace_ft = 0;
 442         curthread->t_dtrace_pc = 0;
 443         curthread->t_dtrace_npc = 0;
 444         curthread->t_dtrace_scrpc = 0;
 445         curthread->t_dtrace_astpc = 0;
 446 
 447         /*
 448          * Treat a child created by a call to vfork(2) as if it were its
 449          * parent. We know that there's only one thread of control in such a
 450          * process: this one.
 451          */
 452         while (p->p_flag & SVFORK) {
 453                 p = p->p_parent;
 454         }
 455 
 456         pid = p->p_pid;
 457         pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
 458         mutex_enter(pid_mtx);
 459         bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
 460 
 461         /*
 462          * Lookup the tracepoint that the process just hit.
 463          */
 464         for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 465                 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
 466                     tp->ftt_proc->ftpc_acount != 0)
 467                         break;
 468         }
 469 
 470         /*
 471          * If we couldn't find a matching tracepoint, either a tracepoint has
 472          * been inserted without using the pid<pid> ioctl interface (see
 473          * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
 474          */
 475         if (tp == NULL) {
 476                 mutex_exit(pid_mtx);
 477                 return (-1);
 478         }
 479 
 480         for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
 481                 fasttrap_probe_t *probe = id->fti_probe;
 482                 int isentry = (id->fti_ptype == DTFTP_ENTRY);
 483 
 484                 if (id->fti_ptype == DTFTP_IS_ENABLED) {
 485                         is_enabled = 1;
 486                         continue;
 487                 }
 488 
 489                 /*
 490                  * We note that this was an entry probe to help ustack() find
 491                  * the first caller.
 492                  */
 493                 if (isentry) {
 494                         cookie = dtrace_interrupt_disable();
 495                         DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
 496                 }
 497                 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2,
 498                     rp->r_o3, rp->r_o4);
 499                 if (isentry) {
 500                         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
 501                         dtrace_interrupt_enable(cookie);
 502                 }
 503         }
 504 
 505         /*
 506          * We're about to do a bunch of work so we cache a local copy of
 507          * the tracepoint to emulate the instruction, and then find the
 508          * tracepoint again later if we need to light up any return probes.
 509          */
 510         tp_local = *tp;
 511         mutex_exit(pid_mtx);
 512         tp = &tp_local;
 513 
 514         /*
 515          * If there's an is-enabled probe conntected to this tracepoint it
 516          * means that there was a 'mov %g0, %o0' instruction that was placed
 517          * there by DTrace when the binary was linked. As this probe is, in
 518          * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can
 519          * bypass all the instruction emulation logic since we know the
 520          * inevitable result. It's possible that a user could construct a
 521          * scenario where the 'is-enabled' probe was on some other
 522          * instruction, but that would be a rather exotic way to shoot oneself
 523          * in the foot.
 524          */
 525         if (is_enabled) {
 526                 rp->r_o0 = 1;
 527                 pc = rp->r_npc;
 528                 npc = pc + 4;
 529                 goto done;
 530         }
 531 
 532         /*
 533          * We emulate certain types of instructions to ensure correctness
 534          * (in the case of position dependent instructions) or optimize
 535          * common cases. The rest we have the thread execute back in user-
 536          * land.
 537          */
 538         switch (tp->ftt_type) {
 539         case FASTTRAP_T_SAVE:
 540         {
 541                 int32_t imm;
 542 
 543                 /*
 544                  * This an optimization to let us handle function entry
 545                  * probes more efficiently. Many functions begin with a save
 546                  * instruction that follows the pattern:
 547                  *      save    %sp, <imm>, %sp
 548                  *
 549                  * Meanwhile, we've stashed the instruction:
 550                  *      save    %g1, %g0, %sp
 551                  *
 552                  * off of %g7, so all we have to do is stick the right value
 553                  * into %g1 and reset %pc to point to the instruction we've
 554                  * cleverly hidden (%npc should not be touched).
 555                  */
 556 
 557                 imm = tp->ftt_instr << 19;
 558                 imm >>= 19;
 559                 rp->r_g1 = rp->r_sp + imm;
 560                 pc = rp->r_g7 + FASTTRAP_OFF_SAVE;
 561                 break;
 562         }
 563 
 564         case FASTTRAP_T_RESTORE:
 565         {
 566                 ulong_t value;
 567                 uint_t rd;
 568 
 569                 /*
 570                  * This is an optimization to let us handle function
 571                  * return probes more efficiently. Most non-leaf functions
 572                  * end with the sequence:
 573                  *      ret
 574                  *      restore <reg>, <reg_or_imm>, %oX
 575                  *
 576                  * We've stashed the instruction:
 577                  *      restore %g0, %g0, %g0
 578                  *
 579                  * off of %g7 so we just need to place the correct value
 580                  * in the right %i register (since after our fake-o
 581                  * restore, the %i's will become the %o's) and set the %pc
 582                  * to point to our hidden restore. We also set fake_restore to
 583                  * let fasttrap_return_common() know that it will find the
 584                  * return values in the %i's rather than the %o's.
 585                  */
 586 
 587                 if (I(tp->ftt_instr)) {
 588                         int32_t imm;
 589 
 590                         imm = tp->ftt_instr << 19;
 591                         imm >>= 19;
 592                         value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
 593                 } else {
 594                         value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
 595                             fasttrap_getreg(rp, RS2(tp->ftt_instr));
 596                 }
 597 
 598                 /*
 599                  * Convert %o's to %i's; leave %g's as they are.
 600                  */
 601                 rd = RD(tp->ftt_instr);
 602                 fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value);
 603 
 604                 pc = rp->r_g7 + FASTTRAP_OFF_RESTORE;
 605                 fake_restore = 1;
 606                 break;
 607         }
 608 
 609         case FASTTRAP_T_RETURN:
 610         {
 611                 uintptr_t target;
 612 
 613                 /*
 614                  * A return instruction is like a jmpl (without the link
 615                  * part) that executes an implicit restore. We've stashed
 616                  * the instruction:
 617                  *      return %o0
 618                  *
 619                  * off of %g7 so we just need to place the target in %o0
 620                  * and set the %pc to point to the stashed return instruction.
 621                  * We use %o0 since that register disappears after the return
 622                  * executes, erasing any evidence of this tampering.
 623                  */
 624                 if (I(tp->ftt_instr)) {
 625                         int32_t imm;
 626 
 627                         imm = tp->ftt_instr << 19;
 628                         imm >>= 19;
 629                         target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
 630                 } else {
 631                         target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
 632                             fasttrap_getreg(rp, RS2(tp->ftt_instr));
 633                 }
 634 
 635                 fasttrap_putreg(rp, R_O0, target);
 636 
 637                 pc = rp->r_g7 + FASTTRAP_OFF_RETURN;
 638                 fake_restore = 1;
 639                 break;
 640         }
 641 
 642         case FASTTRAP_T_OR:
 643         {
 644                 ulong_t value;
 645 
 646                 if (I(tp->ftt_instr)) {
 647                         int32_t imm;
 648 
 649                         imm = tp->ftt_instr << 19;
 650                         imm >>= 19;
 651                         value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm;
 652                 } else {
 653                         value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) |
 654                             fasttrap_getreg(rp, RS2(tp->ftt_instr));
 655                 }
 656 
 657                 fasttrap_putreg(rp, RD(tp->ftt_instr), value);
 658                 pc = rp->r_npc;
 659                 npc = pc + 4;
 660                 break;
 661         }
 662 
 663         case FASTTRAP_T_SETHI:
 664                 if (RD(tp->ftt_instr) != R_G0) {
 665                         uint32_t imm32 = tp->ftt_instr << 10;
 666                         fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32);
 667                 }
 668                 pc = rp->r_npc;
 669                 npc = pc + 4;
 670                 break;
 671 
 672         case FASTTRAP_T_CCR:
 673         {
 674                 uint_t c, v, z, n, taken;
 675                 uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT;
 676 
 677                 if (tp->ftt_cc != 0)
 678                         ccr >>= 4;
 679 
 680                 c = (ccr >> 0) & 1;
 681                 v = (ccr >> 1) & 1;
 682                 z = (ccr >> 2) & 1;
 683                 n = (ccr >> 3) & 1;
 684 
 685                 switch (tp->ftt_code) {
 686                 case 0x0:       /* BN */
 687                         taken = 0;              break;
 688                 case 0x1:       /* BE */
 689                         taken = z;              break;
 690                 case 0x2:       /* BLE */
 691                         taken = z | (n ^ v);    break;
 692                 case 0x3:       /* BL */
 693                         taken = n ^ v;          break;
 694                 case 0x4:       /* BLEU */
 695                         taken = c | z;          break;
 696                 case 0x5:       /* BCS (BLU) */
 697                         taken = c;              break;
 698                 case 0x6:       /* BNEG */
 699                         taken = n;              break;
 700                 case 0x7:       /* BVS */
 701                         taken = v;              break;
 702                 case 0x8:       /* BA */
 703                         /*
 704                          * We handle the BA case differently since the annul
 705                          * bit means something slightly different.
 706                          */
 707                         panic("fasttrap: mishandled a branch");
 708                         taken = 1;              break;
 709                 case 0x9:       /* BNE */
 710                         taken = ~z;             break;
 711                 case 0xa:       /* BG */
 712                         taken = ~(z | (n ^ v)); break;
 713                 case 0xb:       /* BGE */
 714                         taken = ~(n ^ v);       break;
 715                 case 0xc:       /* BGU */
 716                         taken = ~(c | z);       break;
 717                 case 0xd:       /* BCC (BGEU) */
 718                         taken = ~c;             break;
 719                 case 0xe:       /* BPOS */
 720                         taken = ~n;             break;
 721                 case 0xf:       /* BVC */
 722                         taken = ~v;             break;
 723                 }
 724 
 725                 if (taken & 1) {
 726                         pc = rp->r_npc;
 727                         npc = tp->ftt_dest;
 728                 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
 729                         /*
 730                          * Untaken annulled branches don't execute the
 731                          * instruction in the delay slot.
 732                          */
 733                         pc = rp->r_npc + 4;
 734                         npc = pc + 4;
 735                 } else {
 736                         pc = rp->r_npc;
 737                         npc = pc + 4;
 738                 }
 739                 break;
 740         }
 741 
 742         case FASTTRAP_T_FCC:
 743         {
 744                 uint_t fcc;
 745                 uint_t taken;
 746                 uint64_t fsr;
 747 
 748                 dtrace_getfsr(&fsr);
 749 
 750                 if (tp->ftt_cc == 0) {
 751                         fcc = (fsr >> 10) & 0x3;
 752                 } else {
 753                         uint_t shift;
 754                         ASSERT(tp->ftt_cc <= 3);
 755                         shift = 30 + tp->ftt_cc * 2;
 756                         fcc = (fsr >> shift) & 0x3;
 757                 }
 758 
 759                 switch (tp->ftt_code) {
 760                 case 0x0:       /* FBN */
 761                         taken = (1 << fcc) & (0|0|0|0);       break;
 762                 case 0x1:       /* FBNE */
 763                         taken = (1 << fcc) & (8|4|2|0);       break;
 764                 case 0x2:       /* FBLG */
 765                         taken = (1 << fcc) & (0|4|2|0);       break;
 766                 case 0x3:       /* FBUL */
 767                         taken = (1 << fcc) & (8|0|2|0);       break;
 768                 case 0x4:       /* FBL */
 769                         taken = (1 << fcc) & (0|0|2|0);       break;
 770                 case 0x5:       /* FBUG */
 771                         taken = (1 << fcc) & (8|4|0|0);       break;
 772                 case 0x6:       /* FBG */
 773                         taken = (1 << fcc) & (0|4|0|0);       break;
 774                 case 0x7:       /* FBU */
 775                         taken = (1 << fcc) & (8|0|0|0);       break;
 776                 case 0x8:       /* FBA */
 777                         /*
 778                          * We handle the FBA case differently since the annul
 779                          * bit means something slightly different.
 780                          */
 781                         panic("fasttrap: mishandled a branch");
 782                         taken = (1 << fcc) & (8|4|2|1);       break;
 783                 case 0x9:       /* FBE */
 784                         taken = (1 << fcc) & (0|0|0|1);       break;
 785                 case 0xa:       /* FBUE */
 786                         taken = (1 << fcc) & (8|0|0|1);       break;
 787                 case 0xb:       /* FBGE */
 788                         taken = (1 << fcc) & (0|4|0|1);       break;
 789                 case 0xc:       /* FBUGE */
 790                         taken = (1 << fcc) & (8|4|0|1);       break;
 791                 case 0xd:       /* FBLE */
 792                         taken = (1 << fcc) & (0|0|2|1);       break;
 793                 case 0xe:       /* FBULE */
 794                         taken = (1 << fcc) & (8|0|2|1);       break;
 795                 case 0xf:       /* FBO */
 796                         taken = (1 << fcc) & (0|4|2|1);       break;
 797                 }
 798 
 799                 if (taken) {
 800                         pc = rp->r_npc;
 801                         npc = tp->ftt_dest;
 802                 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
 803                         /*
 804                          * Untaken annulled branches don't execute the
 805                          * instruction in the delay slot.
 806                          */
 807                         pc = rp->r_npc + 4;
 808                         npc = pc + 4;
 809                 } else {
 810                         pc = rp->r_npc;
 811                         npc = pc + 4;
 812                 }
 813                 break;
 814         }
 815 
 816         case FASTTRAP_T_REG:
 817         {
 818                 int64_t value;
 819                 uint_t taken;
 820                 uint_t reg = RS1(tp->ftt_instr);
 821 
 822                 /*
 823                  * An ILP32 process shouldn't be using a branch predicated on
 824                  * an %i or an %l since it would violate the ABI. It's a
 825                  * violation of the ABI because we can't ensure deterministic
 826                  * behavior. We should have identified this case when we
 827                  * enabled the probe.
 828                  */
 829                 ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16);
 830 
 831                 value = (int64_t)fasttrap_getreg(rp, reg);
 832 
 833                 switch (tp->ftt_code) {
 834                 case 0x1:       /* BRZ */
 835                         taken = (value == 0);   break;
 836                 case 0x2:       /* BRLEZ */
 837                         taken = (value <= 0);        break;
 838                 case 0x3:       /* BRLZ */
 839                         taken = (value < 0); break;
 840                 case 0x5:       /* BRNZ */
 841                         taken = (value != 0);   break;
 842                 case 0x6:       /* BRGZ */
 843                         taken = (value > 0); break;
 844                 case 0x7:       /* BRGEZ */
 845                         taken = (value >= 0);        break;
 846                 default:
 847                 case 0x0:
 848                 case 0x4:
 849                         panic("fasttrap: mishandled a branch");
 850                 }
 851 
 852                 if (taken) {
 853                         pc = rp->r_npc;
 854                         npc = tp->ftt_dest;
 855                 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
 856                         /*
 857                          * Untaken annulled branches don't execute the
 858                          * instruction in the delay slot.
 859                          */
 860                         pc = rp->r_npc + 4;
 861                         npc = pc + 4;
 862                 } else {
 863                         pc = rp->r_npc;
 864                         npc = pc + 4;
 865                 }
 866                 break;
 867         }
 868 
 869         case FASTTRAP_T_ALWAYS:
 870                 /*
 871                  * BAs, BA,As...
 872                  */
 873 
 874                 if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
 875                         /*
 876                          * Annulled branch always instructions never execute
 877                          * the instruction in the delay slot.
 878                          */
 879                         pc = tp->ftt_dest;
 880                         npc = tp->ftt_dest + 4;
 881                 } else {
 882                         pc = rp->r_npc;
 883                         npc = tp->ftt_dest;
 884                 }
 885                 break;
 886 
 887         case FASTTRAP_T_RDPC:
 888                 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
 889                 pc = rp->r_npc;
 890                 npc = pc + 4;
 891                 break;
 892 
 893         case FASTTRAP_T_CALL:
 894                 /*
 895                  * It's a call _and_ link remember...
 896                  */
 897                 rp->r_o7 = rp->r_pc;
 898                 pc = rp->r_npc;
 899                 npc = tp->ftt_dest;
 900                 break;
 901 
 902         case FASTTRAP_T_JMPL:
 903                 pc = rp->r_npc;
 904 
 905                 if (I(tp->ftt_instr)) {
 906                         uint_t rs1 = RS1(tp->ftt_instr);
 907                         int32_t imm;
 908 
 909                         imm = tp->ftt_instr << 19;
 910                         imm >>= 19;
 911                         npc = fasttrap_getreg(rp, rs1) + imm;
 912                 } else {
 913                         uint_t rs1 = RS1(tp->ftt_instr);
 914                         uint_t rs2 = RS2(tp->ftt_instr);
 915 
 916                         npc = fasttrap_getreg(rp, rs1) +
 917                             fasttrap_getreg(rp, rs2);
 918                 }
 919 
 920                 /*
 921                  * Do the link part of the jump-and-link instruction.
 922                  */
 923                 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
 924 
 925                 break;
 926 
 927         case FASTTRAP_T_COMMON:
 928         {
 929                 curthread->t_dtrace_scrpc = rp->r_g7;
 930                 curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET;
 931 
 932                 /*
 933                  * Copy the instruction to a reserved location in the
 934                  * user-land thread structure, then set the PC to that
 935                  * location and leave the NPC alone. We take pains to ensure
 936                  * consistency in the instruction stream (See SPARC
 937                  * Architecture Manual Version 9, sections 8.4.7, A.20, and
 938                  * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1,
 939                  * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the
 940                  * instruction into the user's address space without
 941                  * bypassing the I$. There's no AS_USER version of this ASI
 942                  * (as exist for other ASIs) so we use the lofault
 943                  * mechanism to catch faults.
 944                  */
 945                 if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) {
 946                         /*
 947                          * If the copyout fails, then the process's state
 948                          * is not consistent (the effects of the traced
 949                          * instruction will never be seen). This process
 950                          * cannot be allowed to continue execution.
 951                          */
 952                         fasttrap_sigtrap(curproc, curthread, pc);
 953                         return (0);
 954                 }
 955 
 956                 curthread->t_dtrace_pc = pc;
 957                 curthread->t_dtrace_npc = npc;
 958                 curthread->t_dtrace_on = 1;
 959 
 960                 pc = curthread->t_dtrace_scrpc;
 961 
 962                 if (tp->ftt_retids != NULL) {
 963                         curthread->t_dtrace_step = 1;
 964                         curthread->t_dtrace_ret = 1;
 965                         npc = curthread->t_dtrace_astpc;
 966                 }
 967                 break;
 968         }
 969 
 970         default:
 971                 panic("fasttrap: mishandled an instruction");
 972         }
 973 
 974         /*
 975          * This bit me in the ass a couple of times, so lets toss this
 976          * in as a cursory sanity check.
 977          */
 978         ASSERT(pc != rp->r_g7 + 4);
 979         ASSERT(pc != rp->r_g7 + 8);
 980 
 981 done:
 982         /*
 983          * If there were no return probes when we first found the tracepoint,
 984          * we should feel no obligation to honor any return probes that were
 985          * subsequently enabled -- they'll just have to wait until the next
 986          * time around.
 987          */
 988         if (tp->ftt_retids != NULL) {
 989                 /*
 990                  * We need to wait until the results of the instruction are
 991                  * apparent before invoking any return probes. If this
 992                  * instruction was emulated we can just call
 993                  * fasttrap_return_common(); if it needs to be executed, we
 994                  * need to wait until we return to the kernel.
 995                  */
 996                 if (tp->ftt_type != FASTTRAP_T_COMMON) {
 997                         fasttrap_return_common(rp, orig_pc, pid, fake_restore);
 998                 } else {
 999                         ASSERT(curthread->t_dtrace_ret != 0);
1000                         ASSERT(curthread->t_dtrace_pc == orig_pc);
1001                         ASSERT(curthread->t_dtrace_scrpc == rp->r_g7);
1002                         ASSERT(npc == curthread->t_dtrace_astpc);
1003                 }
1004         }
1005 
1006         ASSERT(pc != 0);
1007         rp->r_pc = pc;
1008         rp->r_npc = npc;
1009 
1010         return (0);
1011 }
1012 
1013 int
1014 fasttrap_return_probe(struct regs *rp)
1015 {
1016         proc_t *p = ttoproc(curthread);
1017         pid_t pid;
1018         uintptr_t pc = curthread->t_dtrace_pc;
1019         uintptr_t npc = curthread->t_dtrace_npc;
1020 
1021         curthread->t_dtrace_pc = 0;
1022         curthread->t_dtrace_npc = 0;
1023         curthread->t_dtrace_scrpc = 0;
1024         curthread->t_dtrace_astpc = 0;
1025 
1026         /*
1027          * Treat a child created by a call to vfork(2) as if it were its
1028          * parent. We know there's only one thread of control in such a
1029          * process: this one.
1030          */
1031         while (p->p_flag & SVFORK) {
1032                 p = p->p_parent;
1033         }
1034 
1035         /*
1036          * We set the %pc and %npc to their values when the traced
1037          * instruction was initially executed so that it appears to
1038          * dtrace_probe() that we're on the original instruction, and so that
1039          * the user can't easily detect our complex web of lies.
1040          * dtrace_return_probe() (our caller) will correctly set %pc and %npc
1041          * after we return.
1042          */
1043         rp->r_pc = pc;
1044         rp->r_npc = npc;
1045 
1046         pid = p->p_pid;
1047         fasttrap_return_common(rp, pc, pid, 0);
1048 
1049         return (0);
1050 }
1051 
1052 int
1053 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
1054 {
1055         fasttrap_instr_t instr = FASTTRAP_INSTR;
1056 
1057         if (uwrite(p, &instr, 4, tp->ftt_pc) != 0)
1058                 return (-1);
1059 
1060         return (0);
1061 }
1062 
1063 int
1064 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
1065 {
1066         fasttrap_instr_t instr;
1067 
1068         /*
1069          * Distinguish between read or write failures and a changed
1070          * instruction.
1071          */
1072         if (uread(p, &instr, 4, tp->ftt_pc) != 0)
1073                 return (0);
1074         if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR)
1075                 return (0);
1076         if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0)
1077                 return (-1);
1078 
1079         return (0);
1080 }
1081 
1082 int
1083 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
1084     fasttrap_probe_type_t type)
1085 {
1086         uint32_t instr;
1087         int32_t disp;
1088 
1089         /*
1090          * Read the instruction at the given address out of the process's
1091          * address space. We don't have to worry about a debugger
1092          * changing this instruction before we overwrite it with our trap
1093          * instruction since P_PR_LOCK is set.
1094          */
1095         if (uread(p, &instr, 4, pc) != 0)
1096                 return (-1);
1097 
1098         /*
1099          * Decode the instruction to fill in the probe flags. We can have
1100          * the process execute most instructions on its own using a pc/npc
1101          * trick, but pc-relative control transfer present a problem since
1102          * we're relocating the instruction. We emulate these instructions
1103          * in the kernel. We assume a default type and over-write that as
1104          * needed.
1105          *
1106          * pc-relative instructions must be emulated for correctness;
1107          * other instructions (which represent a large set of commonly traced
1108          * instructions) are emulated or otherwise optimized for performance.
1109          */
1110         tp->ftt_type = FASTTRAP_T_COMMON;
1111         if (OP(instr) == 1) {
1112                 /*
1113                  * Call instructions.
1114                  */
1115                 tp->ftt_type = FASTTRAP_T_CALL;
1116                 disp = DISP30(instr) << 2;
1117                 tp->ftt_dest = pc + (intptr_t)disp;
1118 
1119         } else if (OP(instr) == 0) {
1120                 /*
1121                  * Branch instructions.
1122                  *
1123                  * Unconditional branches need careful attention when they're
1124                  * annulled: annulled unconditional branches never execute
1125                  * the instruction in the delay slot.
1126                  */
1127                 switch (OP2(instr)) {
1128                 case OP2_ILLTRAP:
1129                 case 0x7:
1130                         /*
1131                          * The compiler may place an illtrap after a call to
1132                          * a function that returns a structure. In the case of
1133                          * a returned structure, the compiler places an illtrap
1134                          * whose const22 field is the size of the returned
1135                          * structure immediately following the delay slot of
1136                          * the call. To stay out of the way, we refuse to
1137                          * place tracepoints on top of illtrap instructions.
1138                          *
1139                          * This is one of the dumbest architectural decisions
1140                          * I've ever had to work around.
1141                          *
1142                          * We also identify the only illegal op2 value (See
1143                          * SPARC Architecture Manual Version 9, E.2 table 31).
1144                          */
1145                         return (-1);
1146 
1147                 case OP2_BPcc:
1148                         if (COND(instr) == 8) {
1149                                 tp->ftt_type = FASTTRAP_T_ALWAYS;
1150                         } else {
1151                                 /*
1152                                  * Check for an illegal instruction.
1153                                  */
1154                                 if (CC(instr) & 1)
1155                                         return (-1);
1156                                 tp->ftt_type = FASTTRAP_T_CCR;
1157                                 tp->ftt_cc = CC(instr);
1158                                 tp->ftt_code = COND(instr);
1159                         }
1160 
1161                         if (A(instr) != 0)
1162                                 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1163 
1164                         disp = DISP19(instr);
1165                         disp <<= 13;
1166                         disp >>= 11;
1167                         tp->ftt_dest = pc + (intptr_t)disp;
1168                         break;
1169 
1170                 case OP2_Bicc:
1171                         if (COND(instr) == 8) {
1172                                 tp->ftt_type = FASTTRAP_T_ALWAYS;
1173                         } else {
1174                                 tp->ftt_type = FASTTRAP_T_CCR;
1175                                 tp->ftt_cc = 0;
1176                                 tp->ftt_code = COND(instr);
1177                         }
1178 
1179                         if (A(instr) != 0)
1180                                 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1181 
1182                         disp = DISP22(instr);
1183                         disp <<= 10;
1184                         disp >>= 8;
1185                         tp->ftt_dest = pc + (intptr_t)disp;
1186                         break;
1187 
1188                 case OP2_BPr:
1189                         /*
1190                          * Check for an illegal instruction.
1191                          */
1192                         if ((RCOND(instr) & 3) == 0)
1193                                 return (-1);
1194 
1195                         /*
1196                          * It's a violation of the v8plus ABI to use a
1197                          * register-predicated branch in a 32-bit app if
1198                          * the register used is an %l or an %i (%gs and %os
1199                          * are legit because they're not saved to the stack
1200                          * in 32-bit words when we take a trap).
1201                          */
1202                         if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16)
1203                                 return (-1);
1204 
1205                         tp->ftt_type = FASTTRAP_T_REG;
1206                         if (A(instr) != 0)
1207                                 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1208                         disp = DISP16(instr);
1209                         disp <<= 16;
1210                         disp >>= 14;
1211                         tp->ftt_dest = pc + (intptr_t)disp;
1212                         tp->ftt_code = RCOND(instr);
1213                         break;
1214 
1215                 case OP2_SETHI:
1216                         tp->ftt_type = FASTTRAP_T_SETHI;
1217                         break;
1218 
1219                 case OP2_FBPfcc:
1220                         if (COND(instr) == 8) {
1221                                 tp->ftt_type = FASTTRAP_T_ALWAYS;
1222                         } else {
1223                                 tp->ftt_type = FASTTRAP_T_FCC;
1224                                 tp->ftt_cc = CC(instr);
1225                                 tp->ftt_code = COND(instr);
1226                         }
1227 
1228                         if (A(instr) != 0)
1229                                 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1230 
1231                         disp = DISP19(instr);
1232                         disp <<= 13;
1233                         disp >>= 11;
1234                         tp->ftt_dest = pc + (intptr_t)disp;
1235                         break;
1236 
1237                 case OP2_FBfcc:
1238                         if (COND(instr) == 8) {
1239                                 tp->ftt_type = FASTTRAP_T_ALWAYS;
1240                         } else {
1241                                 tp->ftt_type = FASTTRAP_T_FCC;
1242                                 tp->ftt_cc = 0;
1243                                 tp->ftt_code = COND(instr);
1244                         }
1245 
1246                         if (A(instr) != 0)
1247                                 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1248 
1249                         disp = DISP22(instr);
1250                         disp <<= 10;
1251                         disp >>= 8;
1252                         tp->ftt_dest = pc + (intptr_t)disp;
1253                         break;
1254                 }
1255 
1256         } else if (OP(instr) == 2) {
1257                 switch (OP3(instr)) {
1258                 case OP3_RETURN:
1259                         tp->ftt_type = FASTTRAP_T_RETURN;
1260                         break;
1261 
1262                 case OP3_JMPL:
1263                         tp->ftt_type = FASTTRAP_T_JMPL;
1264                         break;
1265 
1266                 case OP3_RD:
1267                         if (RS1(instr) == 5)
1268                                 tp->ftt_type = FASTTRAP_T_RDPC;
1269                         break;
1270 
1271                 case OP3_SAVE:
1272                         /*
1273                          * We optimize for save instructions at function
1274                          * entry; see the comment in fasttrap_pid_probe()
1275                          * (near FASTTRAP_T_SAVE) for details.
1276                          */
1277                         if (fasttrap_optimize_save != 0 &&
1278                             type == DTFTP_ENTRY &&
1279                             I(instr) == 1 && RD(instr) == R_SP)
1280                                 tp->ftt_type = FASTTRAP_T_SAVE;
1281                         break;
1282 
1283                 case OP3_RESTORE:
1284                         /*
1285                          * We optimize restore instructions at function
1286                          * return; see the comment in fasttrap_pid_probe()
1287                          * (near FASTTRAP_T_RESTORE) for details.
1288                          *
1289                          * rd must be an %o or %g register.
1290                          */
1291                         if ((RD(instr) & 0x10) == 0)
1292                                 tp->ftt_type = FASTTRAP_T_RESTORE;
1293                         break;
1294 
1295                 case OP3_OR:
1296                         /*
1297                          * A large proportion of instructions in the delay
1298                          * slot of retl instructions are or's so we emulate
1299                          * these downstairs as an optimization.
1300                          */
1301                         tp->ftt_type = FASTTRAP_T_OR;
1302                         break;
1303 
1304                 case OP3_TCC:
1305                         /*
1306                          * Breakpoint instructions are effectively position-
1307                          * dependent since the debugger uses the %pc value
1308                          * to lookup which breakpoint was executed. As a
1309                          * result, we can't actually instrument breakpoints.
1310                          */
1311                         if (SW_TRAP(instr) == ST_BREAKPOINT)
1312                                 return (-1);
1313                         break;
1314 
1315                 case 0x19:
1316                 case 0x1d:
1317                 case 0x29:
1318                 case 0x33:
1319                 case 0x3f:
1320                         /*
1321                          * Identify illegal instructions (See SPARC
1322                          * Architecture Manual Version 9, E.2 table 32).
1323                          */
1324                         return (-1);
1325                 }
1326         } else if (OP(instr) == 3) {
1327                 uint32_t op3 = OP3(instr);
1328 
1329                 /*
1330                  * Identify illegal instructions (See SPARC Architecture
1331                  * Manual Version 9, E.2 table 33).
1332                  */
1333                 if ((op3 & 0x28) == 0x28) {
1334                         if (op3 != OP3_PREFETCH && op3 != OP3_CASA &&
1335                             op3 != OP3_PREFETCHA && op3 != OP3_CASXA)
1336                                 return (-1);
1337                 } else {
1338                         if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31)
1339                                 return (-1);
1340                 }
1341         }
1342 
1343         tp->ftt_instr = instr;
1344 
1345         /*
1346          * We don't know how this tracepoint is going to be used, but in case
1347          * it's used as part of a function return probe, we need to indicate
1348          * whether it's always a return site or only potentially a return
1349          * site. If it's part of a return probe, it's always going to be a
1350          * return from that function if it's a restore instruction or if
1351          * the previous instruction was a return. If we could reliably
1352          * distinguish jump tables from return sites, this wouldn't be
1353          * necessary.
1354          */
1355         if (tp->ftt_type != FASTTRAP_T_RESTORE &&
1356             (uread(p, &instr, 4, pc - sizeof (instr)) != 0 ||
1357             !(OP(instr) == 2 && OP3(instr) == OP3_RETURN)))
1358                 tp->ftt_flags |= FASTTRAP_F_RETMAYBE;
1359 
1360         return (0);
1361 }
1362 
1363 /*ARGSUSED*/
1364 uint64_t
1365 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1366     int aframes)
1367 {
1368         return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1369 }
1370 
1371 /*ARGSUSED*/
1372 uint64_t
1373 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1374     int aframes)
1375 {
1376         return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1377 }
1378 
1379 static uint64_t fasttrap_getreg_fast_cnt;
1380 static uint64_t fasttrap_getreg_mpcb_cnt;
1381 static uint64_t fasttrap_getreg_slow_cnt;
1382 
1383 static ulong_t
1384 fasttrap_getreg(struct regs *rp, uint_t reg)
1385 {
1386         ulong_t value;
1387         dtrace_icookie_t cookie;
1388         struct machpcb *mpcb;
1389         extern ulong_t dtrace_getreg_win(uint_t, uint_t);
1390 
1391         /*
1392          * We have the %os and %gs in our struct regs, but if we need to
1393          * snag a %l or %i we need to go scrounging around in the process's
1394          * address space.
1395          */
1396         if (reg == 0)
1397                 return (0);
1398 
1399         if (reg < 16)
1400                 return ((&rp->r_g1)[reg - 1]);
1401 
1402         /*
1403          * Before we look at the user's stack, we'll check the register
1404          * windows to see if the information we want is in there.
1405          */
1406         cookie = dtrace_interrupt_disable();
1407         if (dtrace_getotherwin() > 0) {
1408                 value = dtrace_getreg_win(reg, 1);
1409                 dtrace_interrupt_enable(cookie);
1410 
1411                 atomic_inc_64(&fasttrap_getreg_fast_cnt);
1412 
1413                 return (value);
1414         }
1415         dtrace_interrupt_enable(cookie);
1416 
1417         /*
1418          * First check the machpcb structure to see if we've already read
1419          * in the register window we're looking for; if we haven't, (and
1420          * we probably haven't) try to copy in the value of the register.
1421          */
1422         /* LINTED - alignment */
1423         mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1424 
1425         if (get_udatamodel() == DATAMODEL_NATIVE) {
1426                 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1427 
1428                 if (mpcb->mpcb_wbcnt > 0) {
1429                         struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
1430                         int i = mpcb->mpcb_wbcnt;
1431                         do {
1432                                 i--;
1433                                 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1434                                         continue;
1435 
1436                                 atomic_inc_64(&fasttrap_getreg_mpcb_cnt);
1437                                 return (rwin[i].rw_local[reg - 16]);
1438                         } while (i > 0);
1439                 }
1440 
1441                 if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0)
1442                         goto err;
1443         } else {
1444                 struct frame32 *fr =
1445                     (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1446                 uint32_t *v32 = (uint32_t *)&value;
1447 
1448                 if (mpcb->mpcb_wbcnt > 0) {
1449                         struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
1450                         int i = mpcb->mpcb_wbcnt;
1451                         do {
1452                                 i--;
1453                                 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1454                                         continue;
1455 
1456                                 atomic_inc_64(&fasttrap_getreg_mpcb_cnt);
1457                                 return (rwin[i].rw_local[reg - 16]);
1458                         } while (i > 0);
1459                 }
1460 
1461                 if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0)
1462                         goto err;
1463 
1464                 v32[0] = 0;
1465         }
1466 
1467         atomic_inc_64(&fasttrap_getreg_slow_cnt);
1468         return (value);
1469 
1470 err:
1471         /*
1472          * If the copy in failed, the process will be in a irrecoverable
1473          * state, and we have no choice but to kill it.
1474          */
1475         psignal(ttoproc(curthread), SIGILL);
1476         return (0);
1477 }
1478 
1479 static uint64_t fasttrap_putreg_fast_cnt;
1480 static uint64_t fasttrap_putreg_mpcb_cnt;
1481 static uint64_t fasttrap_putreg_slow_cnt;
1482 
1483 static void
1484 fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value)
1485 {
1486         dtrace_icookie_t cookie;
1487         struct machpcb *mpcb;
1488         extern void dtrace_putreg_win(uint_t, ulong_t);
1489 
1490         if (reg == 0)
1491                 return;
1492 
1493         if (reg < 16) {
1494                 (&rp->r_g1)[reg - 1] = value;
1495                 return;
1496         }
1497 
1498         /*
1499          * If the user process is still using some register windows, we
1500          * can just place the value in the correct window.
1501          */
1502         cookie = dtrace_interrupt_disable();
1503         if (dtrace_getotherwin() > 0) {
1504                 dtrace_putreg_win(reg, value);
1505                 dtrace_interrupt_enable(cookie);
1506                 atomic_inc_64(&fasttrap_putreg_fast_cnt);
1507                 return;
1508         }
1509         dtrace_interrupt_enable(cookie);
1510 
1511         /*
1512          * First see if there's a copy of the register window in the
1513          * machpcb structure that we can modify; if there isn't try to
1514          * copy out the value. If that fails, we try to create a new
1515          * register window in the machpcb structure. While this isn't
1516          * _precisely_ the intended use of the machpcb structure, it
1517          * can't cause any problems since we know at this point in the
1518          * code that all of the user's data have been flushed out of the
1519          * register file (since %otherwin is 0).
1520          */
1521         /* LINTED - alignment */
1522         mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1523 
1524         if (get_udatamodel() == DATAMODEL_NATIVE) {
1525                 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1526                 /* LINTED - alignment */
1527                 struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf;
1528 
1529                 if (mpcb->mpcb_wbcnt > 0) {
1530                         int i = mpcb->mpcb_wbcnt;
1531                         do {
1532                                 i--;
1533                                 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1534                                         continue;
1535 
1536                                 rwin[i].rw_local[reg - 16] = value;
1537                                 atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1538                                 return;
1539                         } while (i > 0);
1540                 }
1541 
1542                 if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) {
1543                         if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1544                             &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1545                                 goto err;
1546 
1547                         rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value;
1548                         mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1549                         mpcb->mpcb_wbcnt++;
1550                         atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1551                         return;
1552                 }
1553         } else {
1554                 struct frame32 *fr =
1555                     (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1556                 /* LINTED - alignment */
1557                 struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf;
1558                 uint32_t v32 = (uint32_t)value;
1559 
1560                 if (mpcb->mpcb_wbcnt > 0) {
1561                         int i = mpcb->mpcb_wbcnt;
1562                         do {
1563                                 i--;
1564                                 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1565                                         continue;
1566 
1567                                 rwin[i].rw_local[reg - 16] = v32;
1568                                 atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1569                                 return;
1570                         } while (i > 0);
1571                 }
1572 
1573                 if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) {
1574                         if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1575                             &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1576                                 goto err;
1577 
1578                         rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32;
1579                         mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1580                         mpcb->mpcb_wbcnt++;
1581                         atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1582                         return;
1583                 }
1584         }
1585 
1586         atomic_inc_64(&fasttrap_putreg_slow_cnt);
1587         return;
1588 
1589 err:
1590         /*
1591          * If we couldn't record this register's value, the process is in an
1592          * irrecoverable state and we have no choice but to euthanize it.
1593          */
1594         psignal(ttoproc(curthread), SIGILL);
1595 }