1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 
  27 #include <sys/dtrace.h>
  28 #include <sys/systrace.h>
  29 #include <sys/stat.h>
  30 #include <sys/systm.h>
  31 #include <sys/conf.h>
  32 #include <sys/ddi.h>
  33 #include <sys/sunddi.h>
  34 #include <sys/atomic.h>
  35 
  36 #define SYSTRACE_ARTIFICIAL_FRAMES      1
  37 
  38 #define SYSTRACE_SHIFT                  16
  39 #define SYSTRACE_ISENTRY(x)             ((int)(x) >> SYSTRACE_SHIFT)
  40 #define SYSTRACE_SYSNUM(x)              ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
  41 #define SYSTRACE_ENTRY(id)              ((1 << SYSTRACE_SHIFT) | (id))
  42 #define SYSTRACE_RETURN(id)             (id)
  43 
  44 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
  45 #error 1 << SYSTRACE_SHIFT must exceed number of system calls
  46 #endif
  47 
  48 static dev_info_t *systrace_devi;
  49 static dtrace_provider_id_t systrace_id;
  50 
  51 static void
  52 systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
  53 {
  54         systrace_sysent_t *sysent = *interposed;
  55         int i;
  56 
  57         if (sysent == NULL) {
  58                 *interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) *
  59                     NSYSCALL, KM_SLEEP);
  60         }
  61 
  62         for (i = 0; i < NSYSCALL; i++) {
  63                 struct sysent *a = &actual[i];
  64                 systrace_sysent_t *s = &sysent[i];
  65 
  66                 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
  67                         continue;
  68 
  69                 if (a->sy_callc == dtrace_systrace_syscall)
  70                         continue;
  71 
  72 #ifdef _SYSCALL32_IMPL
  73                 if (a->sy_callc == dtrace_systrace_syscall32)
  74                         continue;
  75 #endif
  76 
  77                 s->stsy_underlying = a->sy_callc;
  78         }
  79 }
  80 
  81 /*ARGSUSED*/
  82 static void
  83 systrace_provide(void *arg, const dtrace_probedesc_t *desc)
  84 {
  85         int i;
  86 
  87         if (desc != NULL)
  88                 return;
  89 
  90         systrace_init(sysent, &systrace_sysent);
  91 #ifdef _SYSCALL32_IMPL
  92         systrace_init(sysent32, &systrace_sysent32);
  93 #endif
  94 
  95         for (i = 0; i < NSYSCALL; i++) {
  96                 if (systrace_sysent[i].stsy_underlying == NULL)
  97                         continue;
  98 
  99                 if (dtrace_probe_lookup(systrace_id, NULL,
 100                     syscallnames[i], "entry") != 0)
 101                         continue;
 102 
 103                 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
 104                     "entry", SYSTRACE_ARTIFICIAL_FRAMES,
 105                     (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
 106                 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
 107                     "return", SYSTRACE_ARTIFICIAL_FRAMES,
 108                     (void *)((uintptr_t)SYSTRACE_RETURN(i)));
 109 
 110                 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
 111                 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
 112 #ifdef _SYSCALL32_IMPL
 113                 systrace_sysent32[i].stsy_entry = DTRACE_IDNONE;
 114                 systrace_sysent32[i].stsy_return = DTRACE_IDNONE;
 115 #endif
 116         }
 117 }
 118 
 119 /*ARGSUSED*/
 120 static void
 121 systrace_destroy(void *arg, dtrace_id_t id, void *parg)
 122 {
 123         int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
 124 
 125         /*
 126          * There's nothing to do here but assert that we have actually been
 127          * disabled.
 128          */
 129         if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
 130                 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
 131 #ifdef _SYSCALL32_IMPL
 132                 ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE);
 133 #endif
 134         } else {
 135                 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
 136 #ifdef _SYSCALL32_IMPL
 137                 ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE);
 138 #endif
 139         }
 140 }
 141 
 142 /*ARGSUSED*/
 143 static int
 144 systrace_enable(void *arg, dtrace_id_t id, void *parg)
 145 {
 146         int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
 147         int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
 148             systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
 149 
 150         if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
 151                 systrace_sysent[sysnum].stsy_entry = id;
 152 #ifdef _SYSCALL32_IMPL
 153                 systrace_sysent32[sysnum].stsy_entry = id;
 154 #endif
 155         } else {
 156                 systrace_sysent[sysnum].stsy_return = id;
 157 #ifdef _SYSCALL32_IMPL
 158                 systrace_sysent32[sysnum].stsy_return = id;
 159 #endif
 160         }
 161 
 162         if (enabled) {
 163                 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
 164                 return (0);
 165         }
 166 
 167         (void) atomic_cas_ptr(&sysent[sysnum].sy_callc,
 168             (void *)systrace_sysent[sysnum].stsy_underlying,
 169             (void *)dtrace_systrace_syscall);
 170 #ifdef _SYSCALL32_IMPL
 171         (void) atomic_cas_ptr(&sysent32[sysnum].sy_callc,
 172             (void *)systrace_sysent32[sysnum].stsy_underlying,
 173             (void *)dtrace_systrace_syscall32);
 174 #endif
 175         return (0);
 176 }
 177 
 178 /*ARGSUSED*/
 179 static void
 180 systrace_disable(void *arg, dtrace_id_t id, void *parg)
 181 {
 182         int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
 183         int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
 184             systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
 185 
 186         if (disable) {
 187                 (void) atomic_cas_ptr(&sysent[sysnum].sy_callc,
 188                     (void *)dtrace_systrace_syscall,
 189                     (void *)systrace_sysent[sysnum].stsy_underlying);
 190 
 191 #ifdef _SYSCALL32_IMPL
 192                 (void) atomic_cas_ptr(&sysent32[sysnum].sy_callc,
 193                     (void *)dtrace_systrace_syscall32,
 194                     (void *)systrace_sysent32[sysnum].stsy_underlying);
 195 #endif
 196         }
 197 
 198         if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
 199                 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
 200 #ifdef _SYSCALL32_IMPL
 201                 systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE;
 202 #endif
 203         } else {
 204                 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
 205 #ifdef _SYSCALL32_IMPL
 206                 systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE;
 207 #endif
 208         }
 209 }
 210 
 211 static dtrace_pattr_t systrace_attr = {
 212 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
 213 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
 214 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
 215 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
 216 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
 217 };
 218 
 219 static dtrace_pops_t systrace_pops = {
 220         systrace_provide,
 221         NULL,
 222         systrace_enable,
 223         systrace_disable,
 224         NULL,
 225         NULL,
 226         NULL,
 227         NULL,
 228         NULL,
 229         systrace_destroy
 230 };
 231 
 232 static int
 233 systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 234 {
 235         switch (cmd) {
 236         case DDI_ATTACH:
 237                 break;
 238         case DDI_RESUME:
 239                 return (DDI_SUCCESS);
 240         default:
 241                 return (DDI_FAILURE);
 242         }
 243 
 244         systrace_probe = (void (*)())dtrace_probe;
 245         membar_enter();
 246 
 247         if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
 248             DDI_PSEUDO, NULL) == DDI_FAILURE ||
 249             dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
 250             &systrace_pops, NULL, &systrace_id) != 0) {
 251                 systrace_probe = systrace_stub;
 252                 ddi_remove_minor_node(devi, NULL);
 253                 return (DDI_FAILURE);
 254         }
 255 
 256         ddi_report_dev(devi);
 257         systrace_devi = devi;
 258 
 259         return (DDI_SUCCESS);
 260 }
 261 
 262 static int
 263 systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
 264 {
 265         switch (cmd) {
 266         case DDI_DETACH:
 267                 break;
 268         case DDI_SUSPEND:
 269                 return (DDI_SUCCESS);
 270         default:
 271                 return (DDI_FAILURE);
 272         }
 273 
 274         if (dtrace_unregister(systrace_id) != 0)
 275                 return (DDI_FAILURE);
 276 
 277         ddi_remove_minor_node(devi, NULL);
 278         systrace_probe = systrace_stub;
 279         return (DDI_SUCCESS);
 280 }
 281 
 282 /*ARGSUSED*/
 283 static int
 284 systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 285 {
 286         int error;
 287 
 288         switch (infocmd) {
 289         case DDI_INFO_DEVT2DEVINFO:
 290                 *result = (void *)systrace_devi;
 291                 error = DDI_SUCCESS;
 292                 break;
 293         case DDI_INFO_DEVT2INSTANCE:
 294                 *result = (void *)0;
 295                 error = DDI_SUCCESS;
 296                 break;
 297         default:
 298                 error = DDI_FAILURE;
 299         }
 300         return (error);
 301 }
 302 
 303 /*ARGSUSED*/
 304 static int
 305 systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
 306 {
 307         return (0);
 308 }
 309 
 310 static struct cb_ops systrace_cb_ops = {
 311         systrace_open,          /* open */
 312         nodev,                  /* close */
 313         nulldev,                /* strategy */
 314         nulldev,                /* print */
 315         nodev,                  /* dump */
 316         nodev,                  /* read */
 317         nodev,                  /* write */
 318         nodev,                  /* ioctl */
 319         nodev,                  /* devmap */
 320         nodev,                  /* mmap */
 321         nodev,                  /* segmap */
 322         nochpoll,               /* poll */
 323         ddi_prop_op,            /* cb_prop_op */
 324         0,                      /* streamtab  */
 325         D_NEW | D_MP            /* Driver compatibility flag */
 326 };
 327 
 328 static struct dev_ops systrace_ops = {
 329         DEVO_REV,               /* devo_rev, */
 330         0,                      /* refcnt  */
 331         systrace_info,          /* get_dev_info */
 332         nulldev,                /* identify */
 333         nulldev,                /* probe */
 334         systrace_attach,        /* attach */
 335         systrace_detach,        /* detach */
 336         nodev,                  /* reset */
 337         &systrace_cb_ops,   /* driver operations */
 338         NULL,                   /* bus operations */
 339         nodev,                  /* dev power */
 340         ddi_quiesce_not_needed,         /* quiesce */
 341 };
 342 
 343 /*
 344  * Module linkage information for the kernel.
 345  */
 346 static struct modldrv modldrv = {
 347         &mod_driverops,             /* module type (this is a pseudo driver) */
 348         "System Call Tracing",  /* name of module */
 349         &systrace_ops,              /* driver ops */
 350 };
 351 
 352 static struct modlinkage modlinkage = {
 353         MODREV_1,
 354         (void *)&modldrv,
 355         NULL
 356 };
 357 
 358 int
 359 _init(void)
 360 {
 361         return (mod_install(&modlinkage));
 362 }
 363 
 364 int
 365 _info(struct modinfo *modinfop)
 366 {
 367         return (mod_info(&modlinkage, modinfop));
 368 }
 369 
 370 int
 371 _fini(void)
 372 {
 373         return (mod_remove(&modlinkage));
 374 }