1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright 2015 Joyent, Inc.
  26  */
  27 
  28 /*
  29  * lgroup system calls
  30  */
  31 
  32 #include <sys/types.h>
  33 #include <sys/errno.h>
  34 #include <sys/sunddi.h>
  35 #include <sys/systm.h>
  36 #include <sys/mman.h>
  37 #include <sys/cpupart.h>
  38 #include <sys/lgrp.h>
  39 #include <sys/lgrp_user.h>
  40 #include <sys/promif.h>           /* for prom_printf() */
  41 #include <sys/sysmacros.h>
  42 #include <sys/policy.h>
  43 
  44 #include <vm/as.h>
  45 
  46 
  47 /* definitions for mi_validity */
  48 #define VALID_ADDR      1
  49 #define VALID_REQ       2
  50 
  51 /*
  52  * run through the given number of addresses and requests and return the
  53  * corresponding memory information for each address
  54  */
  55 static int
  56 meminfo(int addr_count, struct meminfo *mip)
  57 {
  58         size_t          in_size, out_size, req_size, val_size;
  59         struct as       *as;
  60         struct hat      *hat;
  61         int             i, j, out_idx, info_count;
  62         lgrp_t          *lgrp;
  63         pfn_t           pfn;
  64         ssize_t         pgsz;
  65         int             *req_array, *val_array;
  66         uint64_t        *in_array, *out_array;
  67         uint64_t        addr, paddr;
  68         uintptr_t       vaddr;
  69         int             ret = 0;
  70         struct meminfo minfo;
  71 #if defined(_SYSCALL32_IMPL)
  72         struct meminfo32 minfo32;
  73 #endif
  74 
  75         /*
  76          * Make sure that there is at least one address to translate and
  77          * limit how many virtual addresses the kernel can do per call
  78          */
  79         if (addr_count < 1)
  80                 return (set_errno(EINVAL));
  81         else if (addr_count > MAX_MEMINFO_CNT)
  82                 addr_count = MAX_MEMINFO_CNT;
  83 
  84         if (get_udatamodel() == DATAMODEL_NATIVE) {
  85                 if (copyin(mip, &minfo, sizeof (struct meminfo)))
  86                         return (set_errno(EFAULT));
  87         }
  88 #if defined(_SYSCALL32_IMPL)
  89         else {
  90                 bzero(&minfo, sizeof (minfo));
  91                 if (copyin(mip, &minfo32, sizeof (struct meminfo32)))
  92                         return (set_errno(EFAULT));
  93                 minfo.mi_inaddr = (const uint64_t *)(uintptr_t)
  94                     minfo32.mi_inaddr;
  95                 minfo.mi_info_req = (const uint_t *)(uintptr_t)
  96                     minfo32.mi_info_req;
  97                 minfo.mi_info_count = minfo32.mi_info_count;
  98                 minfo.mi_outdata = (uint64_t *)(uintptr_t)
  99                     minfo32.mi_outdata;
 100                 minfo.mi_validity = (uint_t *)(uintptr_t)
 101                     minfo32.mi_validity;
 102         }
 103 #endif
 104         /*
 105          * all the input parameters have been copied in:-
 106          * addr_count - number of input addresses
 107          * minfo.mi_inaddr - array of input addresses
 108          * minfo.mi_info_req - array of types of information requested
 109          * minfo.mi_info_count - no. of pieces of info requested for each addr
 110          * minfo.mi_outdata - array into which the results are placed
 111          * minfo.mi_validity -  array containing bitwise result codes; 0th bit
 112          *                      evaluates validity of corresponding input
 113          *                      address, 1st bit validity of response to first
 114          *                      member of info_req, etc.
 115          */
 116 
 117         /* make sure mi_info_count is within limit */
 118         info_count = minfo.mi_info_count;
 119         if (info_count < 1 || info_count > MAX_MEMINFO_REQ)
 120                 return (set_errno(EINVAL));
 121 
 122         /*
 123          * allocate buffer in_array for the input addresses and copy them in
 124          */
 125         in_size = sizeof (uint64_t) * addr_count;
 126         in_array = kmem_alloc(in_size, KM_SLEEP);
 127         if (copyin(minfo.mi_inaddr, in_array, in_size)) {
 128                 kmem_free(in_array, in_size);
 129                 return (set_errno(EFAULT));
 130         }
 131 
 132         /*
 133          * allocate buffer req_array for the input info_reqs and copy them in
 134          */
 135         req_size = sizeof (uint_t) * info_count;
 136         req_array = kmem_alloc(req_size, KM_SLEEP);
 137         if (copyin(minfo.mi_info_req, req_array, req_size)) {
 138                 kmem_free(req_array, req_size);
 139                 kmem_free(in_array, in_size);
 140                 return (set_errno(EFAULT));
 141         }
 142 
 143         /*
 144          * Validate privs for each req.
 145          */
 146         for (i = 0; i < info_count; i++) {
 147                 switch (req_array[i] & MEMINFO_MASK) {
 148                 case MEMINFO_VLGRP:
 149                 case MEMINFO_VPAGESIZE:
 150                         break;
 151                 default:
 152                         if (secpolicy_meminfo(CRED()) != 0) {
 153                                 kmem_free(req_array, req_size);
 154                                 kmem_free(in_array, in_size);
 155                                 return (set_errno(EPERM));
 156                         }
 157                         break;
 158                 }
 159         }
 160 
 161         /*
 162          * allocate buffer out_array which holds the results and will have
 163          * to be copied out later
 164          */
 165         out_size = sizeof (uint64_t) * addr_count * info_count;
 166         out_array = kmem_alloc(out_size, KM_SLEEP);
 167 
 168         /*
 169          * allocate buffer val_array which holds the validity bits and will
 170          * have to be copied out later
 171          */
 172         val_size = sizeof (uint_t) * addr_count;
 173         val_array = kmem_alloc(val_size, KM_SLEEP);
 174 
 175         if ((req_array[0] & MEMINFO_MASK) == MEMINFO_PLGRP) {
 176                 /* find the corresponding lgroup for each physical address */
 177                 for (i = 0; i < addr_count; i++) {
 178                         paddr = in_array[i];
 179                         pfn = btop(paddr);
 180                         lgrp = lgrp_pfn_to_lgrp(pfn);
 181                         if (lgrp) {
 182                                 out_array[i] = lgrp->lgrp_id;
 183                                 val_array[i] = VALID_ADDR | VALID_REQ;
 184                         } else {
 185                                 out_array[i] = NULL;
 186                                 val_array[i] = 0;
 187                         }
 188                 }
 189         } else {
 190                 /* get the corresponding memory info for each virtual address */
 191                 as = curproc->p_as;
 192 
 193                 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
 194                 hat = as->a_hat;
 195                 for (i = out_idx = 0; i < addr_count; i++, out_idx +=
 196                     info_count) {
 197                         addr = in_array[i];
 198                         vaddr = (uintptr_t)(addr & ~PAGEOFFSET);
 199                         if (!as_segat(as, (caddr_t)vaddr)) {
 200                                 val_array[i] = 0;
 201                                 continue;
 202                         }
 203                         val_array[i] = VALID_ADDR;
 204                         pfn = hat_getpfnum(hat, (caddr_t)vaddr);
 205                         if (pfn != PFN_INVALID) {
 206                                 paddr = (uint64_t)((pfn << PAGESHIFT) |
 207                                     (addr & PAGEOFFSET));
 208                                 for (j = 0; j < info_count; j++) {
 209                                         switch (req_array[j] & MEMINFO_MASK) {
 210                                         case MEMINFO_VPHYSICAL:
 211                                                 /*
 212                                                  * return the physical address
 213                                                  * corresponding to the input
 214                                                  * virtual address
 215                                                  */
 216                                                 out_array[out_idx + j] = paddr;
 217                                                 val_array[i] |= VALID_REQ << j;
 218                                                 break;
 219                                         case MEMINFO_VLGRP:
 220                                                 /*
 221                                                  * return the lgroup of physical
 222                                                  * page corresponding to the
 223                                                  * input virtual address
 224                                                  */
 225                                                 lgrp = lgrp_pfn_to_lgrp(pfn);
 226                                                 if (lgrp) {
 227                                                         out_array[out_idx + j] =
 228                                                             lgrp->lgrp_id;
 229                                                         val_array[i] |=
 230                                                             VALID_REQ << j;
 231                                                 }
 232                                                 break;
 233                                         case MEMINFO_VPAGESIZE:
 234                                                 /*
 235                                                  * return the size of physical
 236                                                  * page corresponding to the
 237                                                  * input virtual address
 238                                                  */
 239                                                 pgsz = hat_getpagesize(hat,
 240                                                     (caddr_t)vaddr);
 241                                                 if (pgsz != -1) {
 242                                                         out_array[out_idx + j] =
 243                                                             pgsz;
 244                                                         val_array[i] |=
 245                                                             VALID_REQ << j;
 246                                                 }
 247                                                 break;
 248                                         case MEMINFO_VREPLCNT:
 249                                                 /*
 250                                                  * for future use:-
 251                                                  * return the no. replicated
 252                                                  * physical pages corresponding
 253                                                  * to the input virtual address,
 254                                                  * so it is always 0 at the
 255                                                  * moment
 256                                                  */
 257                                                 out_array[out_idx + j] = 0;
 258                                                 val_array[i] |= VALID_REQ << j;
 259                                                 break;
 260                                         case MEMINFO_VREPL:
 261                                                 /*
 262                                                  * for future use:-
 263                                                  * return the nth physical
 264                                                  * replica of the specified
 265                                                  * virtual address
 266                                                  */
 267                                                 break;
 268                                         case MEMINFO_VREPL_LGRP:
 269                                                 /*
 270                                                  * for future use:-
 271                                                  * return the lgroup of nth
 272                                                  * physical replica of the
 273                                                  * specified virtual address
 274                                                  */
 275                                                 break;
 276                                         case MEMINFO_PLGRP:
 277                                                 /*
 278                                                  * this is for physical address
 279                                                  * only, shouldn't mix with
 280                                                  * virtual address
 281                                                  */
 282                                                 break;
 283                                         default:
 284                                                 break;
 285                                         }
 286                                 }
 287                         }
 288                 }
 289                 AS_LOCK_EXIT(as, &as->a_lock);
 290         }
 291 
 292         /* copy out the results and validity bits and free the buffers */
 293         if ((copyout(out_array, minfo.mi_outdata, out_size) != 0) ||
 294             (copyout(val_array, minfo.mi_validity, val_size) != 0))
 295                 ret = set_errno(EFAULT);
 296 
 297         kmem_free(in_array, in_size);
 298         kmem_free(out_array, out_size);
 299         kmem_free(req_array, req_size);
 300         kmem_free(val_array, val_size);
 301 
 302         return (ret);
 303 }
 304 
 305 
 306 /*
 307  * Initialize lgroup affinities for thread
 308  */
 309 void
 310 lgrp_affinity_init(lgrp_affinity_t **bufaddr)
 311 {
 312         if (bufaddr)
 313                 *bufaddr = NULL;
 314 }
 315 
 316 
 317 /*
 318  * Free lgroup affinities for thread and set to NULL
 319  * just in case thread gets recycled
 320  */
 321 void
 322 lgrp_affinity_free(lgrp_affinity_t **bufaddr)
 323 {
 324         if (bufaddr && *bufaddr) {
 325                 kmem_free(*bufaddr, nlgrpsmax * sizeof (lgrp_affinity_t));
 326                 *bufaddr = NULL;
 327         }
 328 }
 329 
 330 
 331 #define P_ANY   -2      /* cookie specifying any ID */
 332 
 333 
 334 /*
 335  * Find LWP with given ID in specified process and get its affinity for
 336  * specified lgroup
 337  */
 338 lgrp_affinity_t
 339 lgrp_affinity_get_thread(proc_t *p, id_t lwpid, lgrp_id_t lgrp)
 340 {
 341         lgrp_affinity_t aff;
 342         int             found;
 343         kthread_t       *t;
 344 
 345         ASSERT(MUTEX_HELD(&p->p_lock));
 346 
 347         aff = LGRP_AFF_NONE;
 348         found = 0;
 349         t = p->p_tlist;
 350         /*
 351          * The process may be executing in proc_exit() and its p->p_list may be
 352          * already NULL.
 353          */
 354         if (t == NULL)
 355                 return (set_errno(ESRCH));
 356 
 357         do {
 358                 if (t->t_tid == lwpid || lwpid == P_ANY) {
 359                         thread_lock(t);
 360                         /*
 361                          * Check to see whether caller has permission to set
 362                          * affinity for LWP
 363                          */
 364                         if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
 365                                 thread_unlock(t);
 366                                 return (set_errno(EPERM));
 367                         }
 368 
 369                         if (t->t_lgrp_affinity)
 370                                 aff = t->t_lgrp_affinity[lgrp];
 371                         thread_unlock(t);
 372                         found = 1;
 373                         break;
 374                 }
 375         } while ((t = t->t_forw) != p->p_tlist);
 376         if (!found)
 377                 aff = set_errno(ESRCH);
 378 
 379         return (aff);
 380 }
 381 
 382 
 383 /*
 384  * Get lgroup affinity for given LWP
 385  */
 386 lgrp_affinity_t
 387 lgrp_affinity_get(lgrp_affinity_args_t *ap)
 388 {
 389         lgrp_affinity_t         aff;
 390         lgrp_affinity_args_t    args;
 391         id_t                    id;
 392         idtype_t                idtype;
 393         lgrp_id_t               lgrp;
 394         proc_t                  *p;
 395         kthread_t               *t;
 396 
 397         /*
 398          * Copyin arguments
 399          */
 400         if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
 401                 return (set_errno(EFAULT));
 402 
 403         id = args.id;
 404         idtype = args.idtype;
 405         lgrp = args.lgrp;
 406 
 407         /*
 408          * Check for invalid lgroup
 409          */
 410         if (lgrp < 0 || lgrp == LGRP_NONE)
 411                 return (set_errno(EINVAL));
 412 
 413         /*
 414          * Check for existing lgroup
 415          */
 416         if (lgrp > lgrp_alloc_max)
 417                 return (set_errno(ESRCH));
 418 
 419         /*
 420          * Get lgroup affinity for given LWP or process
 421          */
 422         switch (idtype) {
 423 
 424         case P_LWPID:
 425                 /*
 426                  * LWP in current process
 427                  */
 428                 p = curproc;
 429                 mutex_enter(&p->p_lock);
 430                 if (id != P_MYID)       /* different thread */
 431                         aff = lgrp_affinity_get_thread(p, id, lgrp);
 432                 else {                  /* current thread */
 433                         aff = LGRP_AFF_NONE;
 434                         t = curthread;
 435                         thread_lock(t);
 436                         if (t->t_lgrp_affinity)
 437                                 aff = t->t_lgrp_affinity[lgrp];
 438                         thread_unlock(t);
 439                 }
 440                 mutex_exit(&p->p_lock);
 441                 break;
 442 
 443         case P_PID:
 444                 /*
 445                  * Process
 446                  */
 447                 mutex_enter(&pidlock);
 448 
 449                 if (id == P_MYID)
 450                         p = curproc;
 451                 else {
 452                         p = prfind(id);
 453                         if (p == NULL) {
 454                                 mutex_exit(&pidlock);
 455                                 return (set_errno(ESRCH));
 456                         }
 457                 }
 458 
 459                 mutex_enter(&p->p_lock);
 460                 aff = lgrp_affinity_get_thread(p, P_ANY, lgrp);
 461                 mutex_exit(&p->p_lock);
 462 
 463                 mutex_exit(&pidlock);
 464                 break;
 465 
 466         default:
 467                 aff = set_errno(EINVAL);
 468                 break;
 469         }
 470 
 471         return (aff);
 472 }
 473 
 474 
 475 /*
 476  * Find lgroup for which this thread has most affinity in specified partition
 477  * starting from home lgroup unless specified starting lgroup is preferred
 478  */
 479 lpl_t *
 480 lgrp_affinity_best(kthread_t *t, struct cpupart *cpupart, lgrp_id_t start,
 481     boolean_t prefer_start)
 482 {
 483         lgrp_affinity_t *affs;
 484         lgrp_affinity_t best_aff;
 485         lpl_t           *best_lpl;
 486         lgrp_id_t       finish;
 487         lgrp_id_t       home;
 488         lgrp_id_t       lgrpid;
 489         lpl_t           *lpl;
 490 
 491         ASSERT(t != NULL);
 492         ASSERT((MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0) ||
 493             (MUTEX_HELD(&ttoproc(t)->p_lock) && THREAD_LOCK_HELD(t)));
 494         ASSERT(cpupart != NULL);
 495 
 496         if (t->t_lgrp_affinity == NULL)
 497                 return (NULL);
 498 
 499         affs = t->t_lgrp_affinity;
 500 
 501         /*
 502          * Thread bound to CPU
 503          */
 504         if (t->t_bind_cpu != PBIND_NONE) {
 505                 cpu_t   *cp;
 506 
 507                 /*
 508                  * Find which lpl has most affinity among leaf lpl directly
 509                  * containing CPU and its ancestor lpls
 510                  */
 511                 cp = cpu[t->t_bind_cpu];
 512 
 513                 best_lpl = lpl = cp->cpu_lpl;
 514                 best_aff = affs[best_lpl->lpl_lgrpid];
 515                 while (lpl->lpl_parent != NULL) {
 516                         lpl = lpl->lpl_parent;
 517                         lgrpid = lpl->lpl_lgrpid;
 518                         if (affs[lgrpid] > best_aff) {
 519                                 best_lpl = lpl;
 520                                 best_aff = affs[lgrpid];
 521                         }
 522                 }
 523                 return (best_lpl);
 524         }
 525 
 526         /*
 527          * Start searching from home lgroup unless given starting lgroup is
 528          * preferred or home lgroup isn't in given pset.  Use root lgroup as
 529          * starting point if both home and starting lgroups aren't in given
 530          * pset.
 531          */
 532         ASSERT(start >= 0 && start <= lgrp_alloc_max);
 533         home = t->t_lpl->lpl_lgrpid;
 534         if (!prefer_start && LGRP_CPUS_IN_PART(home, cpupart))
 535                 lgrpid = home;
 536         else if (start != LGRP_NONE && LGRP_CPUS_IN_PART(start, cpupart))
 537                 lgrpid = start;
 538         else
 539                 lgrpid = LGRP_ROOTID;
 540 
 541         best_lpl = &cpupart->cp_lgrploads[lgrpid];
 542         best_aff = affs[lgrpid];
 543         finish = lgrpid;
 544         do {
 545                 /*
 546                  * Skip any lgroups that don't have CPU resources
 547                  * in this processor set.
 548                  */
 549                 if (!LGRP_CPUS_IN_PART(lgrpid, cpupart)) {
 550                         if (++lgrpid > lgrp_alloc_max)
 551                                 lgrpid = 0;     /* wrap the search */
 552                         continue;
 553                 }
 554 
 555                 /*
 556                  * Find lgroup with most affinity
 557                  */
 558                 lpl = &cpupart->cp_lgrploads[lgrpid];
 559                 if (affs[lgrpid] > best_aff) {
 560                         best_aff = affs[lgrpid];
 561                         best_lpl = lpl;
 562                 }
 563 
 564                 if (++lgrpid > lgrp_alloc_max)
 565                         lgrpid = 0;     /* wrap the search */
 566 
 567         } while (lgrpid != finish);
 568 
 569         /*
 570          * No lgroup (in this pset) with any affinity
 571          */
 572         if (best_aff == LGRP_AFF_NONE)
 573                 return (NULL);
 574 
 575         lgrpid = best_lpl->lpl_lgrpid;
 576         ASSERT(LGRP_CPUS_IN_PART(lgrpid, cpupart) && best_lpl->lpl_ncpu > 0);
 577 
 578         return (best_lpl);
 579 }
 580 
 581 
 582 /*
 583  * Set thread's affinity for given lgroup
 584  */
 585 int
 586 lgrp_affinity_set_thread(kthread_t *t, lgrp_id_t lgrp, lgrp_affinity_t aff,
 587     lgrp_affinity_t **aff_buf)
 588 {
 589         lgrp_affinity_t *affs;
 590         lgrp_id_t       best;
 591         lpl_t           *best_lpl;
 592         lgrp_id_t       home;
 593         int             retval;
 594 
 595         ASSERT(t != NULL);
 596         ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
 597 
 598         retval = 0;
 599 
 600         thread_lock(t);
 601 
 602         /*
 603          * Check to see whether caller has permission to set affinity for
 604          * thread
 605          */
 606         if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
 607                 thread_unlock(t);
 608                 return (set_errno(EPERM));
 609         }
 610 
 611         if (t->t_lgrp_affinity == NULL) {
 612                 if (aff == LGRP_AFF_NONE) {
 613                         thread_unlock(t);
 614                         return (0);
 615                 }
 616                 ASSERT(aff_buf != NULL && *aff_buf != NULL);
 617                 t->t_lgrp_affinity = *aff_buf;
 618                 *aff_buf = NULL;
 619         }
 620 
 621         affs = t->t_lgrp_affinity;
 622         affs[lgrp] = aff;
 623 
 624         /*
 625          * Find lgroup for which thread has most affinity,
 626          * starting with lgroup for which affinity being set
 627          */
 628         best_lpl = lgrp_affinity_best(t, t->t_cpupart, lgrp, B_TRUE);
 629 
 630         /*
 631          * Rehome if found lgroup with more affinity than home or lgroup for
 632          * which affinity is being set has same affinity as home
 633          */
 634         home = t->t_lpl->lpl_lgrpid;
 635         if (best_lpl != NULL && best_lpl != t->t_lpl) {
 636                 best = best_lpl->lpl_lgrpid;
 637                 if (affs[best] > affs[home] || (affs[best] == affs[home] &&
 638                     best == lgrp))
 639                         lgrp_move_thread(t, best_lpl, 1);
 640         }
 641 
 642         thread_unlock(t);
 643 
 644         return (retval);
 645 }
 646 
 647 
 648 /*
 649  * Set process' affinity for specified lgroup
 650  */
 651 int
 652 lgrp_affinity_set_proc(proc_t *p, lgrp_id_t lgrp, lgrp_affinity_t aff,
 653     lgrp_affinity_t **aff_buf_array)
 654 {
 655         lgrp_affinity_t *buf;
 656         int             err = 0;
 657         int             i;
 658         int             retval;
 659         kthread_t       *t;
 660 
 661         ASSERT(MUTEX_HELD(&pidlock) && MUTEX_HELD(&p->p_lock));
 662         ASSERT(aff_buf_array != NULL);
 663 
 664         i = 0;
 665         t = p->p_tlist;
 666         if (t != NULL) {
 667                 do {
 668                         /*
 669                          * Set lgroup affinity for thread
 670                          */
 671                         buf = aff_buf_array[i];
 672                         retval = lgrp_affinity_set_thread(t, lgrp, aff, &buf);
 673 
 674                         if (err == 0 && retval != 0)
 675                                 err = retval;
 676 
 677                         /*
 678                          * Advance pointer to next buffer
 679                          */
 680                         if (buf == NULL) {
 681                                 ASSERT(i < p->p_lwpcnt);
 682                                 aff_buf_array[i] = NULL;
 683                                 i++;
 684                         }
 685 
 686                 } while ((t = t->t_forw) != p->p_tlist);
 687         }
 688         return (err);
 689 }
 690 
 691 
 692 /*
 693  * Set LWP's or process' affinity for specified lgroup
 694  *
 695  * When setting affinities, pidlock, process p_lock, and thread_lock()
 696  * need to be held in that order to protect target thread's pset, process,
 697  * process contents, and thread contents.  thread_lock() does splhigh(),
 698  * so it ends up having similiar effect as kpreempt_disable(), so it will
 699  * protect calls to lgrp_move_thread() and lgrp_choose() from pset changes.
 700  */
 701 int
 702 lgrp_affinity_set(lgrp_affinity_args_t *ap)
 703 {
 704         lgrp_affinity_t         aff;
 705         lgrp_affinity_t         *aff_buf;
 706         lgrp_affinity_args_t    args;
 707         id_t                    id;
 708         idtype_t                idtype;
 709         lgrp_id_t               lgrp;
 710         int                     nthreads;
 711         proc_t                  *p;
 712         int                     retval;
 713 
 714         /*
 715          * Copyin arguments
 716          */
 717         if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
 718                 return (set_errno(EFAULT));
 719 
 720         idtype = args.idtype;
 721         id = args.id;
 722         lgrp = args.lgrp;
 723         aff = args.aff;
 724 
 725         /*
 726          * Check for invalid lgroup
 727          */
 728         if (lgrp < 0 || lgrp == LGRP_NONE)
 729                 return (set_errno(EINVAL));
 730 
 731         /*
 732          * Check for existing lgroup
 733          */
 734         if (lgrp > lgrp_alloc_max)
 735                 return (set_errno(ESRCH));
 736 
 737         /*
 738          * Check for legal affinity
 739          */
 740         if (aff != LGRP_AFF_NONE && aff != LGRP_AFF_WEAK &&
 741             aff != LGRP_AFF_STRONG)
 742                 return (set_errno(EINVAL));
 743 
 744         /*
 745          * Must be process or LWP ID
 746          */
 747         if (idtype != P_LWPID && idtype != P_PID)
 748                 return (set_errno(EINVAL));
 749 
 750         /*
 751          * Set given LWP's or process' affinity for specified lgroup
 752          */
 753         switch (idtype) {
 754 
 755         case P_LWPID:
 756                 /*
 757                  * Allocate memory for thread's lgroup affinities
 758                  * ahead of time w/o holding locks
 759                  */
 760                 aff_buf = kmem_zalloc(nlgrpsmax * sizeof (lgrp_affinity_t),
 761                     KM_SLEEP);
 762 
 763                 p = curproc;
 764 
 765                 /*
 766                  * Set affinity for thread
 767                  */
 768                 mutex_enter(&p->p_lock);
 769                 if (id == P_MYID) {             /* current thread */
 770                         retval = lgrp_affinity_set_thread(curthread, lgrp, aff,
 771                             &aff_buf);
 772                 } else if (p->p_tlist == NULL) {
 773                         retval = set_errno(ESRCH);
 774                 } else {                        /* other thread */
 775                         int             found = 0;
 776                         kthread_t       *t;
 777 
 778                         t = p->p_tlist;
 779                         do {
 780                                 if (t->t_tid == id) {
 781                                         retval = lgrp_affinity_set_thread(t,
 782                                             lgrp, aff, &aff_buf);
 783                                         found = 1;
 784                                         break;
 785                                 }
 786                         } while ((t = t->t_forw) != p->p_tlist);
 787                         if (!found)
 788                                 retval = set_errno(ESRCH);
 789                 }
 790                 mutex_exit(&p->p_lock);
 791 
 792                 /*
 793                  * Free memory for lgroup affinities,
 794                  * since thread didn't need it
 795                  */
 796                 if (aff_buf)
 797                         kmem_free(aff_buf,
 798                             nlgrpsmax * sizeof (lgrp_affinity_t));
 799 
 800                 break;
 801 
 802         case P_PID:
 803 
 804                 do {
 805                         lgrp_affinity_t **aff_buf_array;
 806                         int             i;
 807                         size_t          size;
 808 
 809                         /*
 810                          * Get process
 811                          */
 812                         mutex_enter(&pidlock);
 813 
 814                         if (id == P_MYID)
 815                                 p = curproc;
 816                         else
 817                                 p = prfind(id);
 818 
 819                         if (p == NULL) {
 820                                 mutex_exit(&pidlock);
 821                                 return (set_errno(ESRCH));
 822                         }
 823 
 824                         /*
 825                          * Get number of threads in process
 826                          *
 827                          * NOTE: Only care about user processes,
 828                          *       so p_lwpcnt should be number of threads.
 829                          */
 830                         mutex_enter(&p->p_lock);
 831                         nthreads = p->p_lwpcnt;
 832                         mutex_exit(&p->p_lock);
 833 
 834                         mutex_exit(&pidlock);
 835 
 836                         if (nthreads < 1)
 837                                 return (set_errno(ESRCH));
 838 
 839                         /*
 840                          * Preallocate memory for lgroup affinities for
 841                          * each thread in process now to avoid holding
 842                          * any locks.  Allocate an array to hold a buffer
 843                          * for each thread.
 844                          */
 845                         aff_buf_array = kmem_zalloc(nthreads *
 846                             sizeof (lgrp_affinity_t *), KM_SLEEP);
 847 
 848                         size = nlgrpsmax * sizeof (lgrp_affinity_t);
 849                         for (i = 0; i < nthreads; i++)
 850                                 aff_buf_array[i] = kmem_zalloc(size, KM_SLEEP);
 851 
 852                         mutex_enter(&pidlock);
 853 
 854                         /*
 855                          * Get process again since dropped locks to allocate
 856                          * memory (except current process)
 857                          */
 858                         if (id != P_MYID)
 859                                 p = prfind(id);
 860 
 861                         /*
 862                          * Process went away after we dropped locks and before
 863                          * reacquiring them, so drop locks, free memory, and
 864                          * return.
 865                          */
 866                         if (p == NULL) {
 867                                 mutex_exit(&pidlock);
 868                                 for (i = 0; i < nthreads; i++)
 869                                         kmem_free(aff_buf_array[i], size);
 870                                 kmem_free(aff_buf_array,
 871                                     nthreads * sizeof (lgrp_affinity_t *));
 872                                 return (set_errno(ESRCH));
 873                         }
 874 
 875                         mutex_enter(&p->p_lock);
 876 
 877                         /*
 878                          * See whether number of threads is same
 879                          * If not, drop locks, free memory, and try again
 880                          */
 881                         if (nthreads != p->p_lwpcnt) {
 882                                 mutex_exit(&p->p_lock);
 883                                 mutex_exit(&pidlock);
 884                                 for (i = 0; i < nthreads; i++)
 885                                         kmem_free(aff_buf_array[i], size);
 886                                 kmem_free(aff_buf_array,
 887                                     nthreads * sizeof (lgrp_affinity_t *));
 888                                 continue;
 889                         }
 890 
 891                         /*
 892                          * Set lgroup affinity for threads in process
 893                          */
 894                         retval = lgrp_affinity_set_proc(p, lgrp, aff,
 895                             aff_buf_array);
 896 
 897                         mutex_exit(&p->p_lock);
 898                         mutex_exit(&pidlock);
 899 
 900                         /*
 901                          * Free any leftover memory, since some threads may
 902                          * have already allocated memory and set lgroup
 903                          * affinities before
 904                          */
 905                         for (i = 0; i < nthreads; i++)
 906                                 if (aff_buf_array[i] != NULL)
 907                                         kmem_free(aff_buf_array[i], size);
 908                         kmem_free(aff_buf_array,
 909                             nthreads * sizeof (lgrp_affinity_t *));
 910 
 911                         break;
 912 
 913                 } while (nthreads != p->p_lwpcnt);
 914 
 915                 break;
 916 
 917         default:
 918                 retval = set_errno(EINVAL);
 919                 break;
 920         }
 921 
 922         return (retval);
 923 }
 924 
 925 
 926 /*
 927  * Return the latest generation number for the lgroup hierarchy
 928  * with the given view
 929  */
 930 lgrp_gen_t
 931 lgrp_generation(lgrp_view_t view)
 932 {
 933         cpupart_t       *cpupart;
 934         uint_t          gen;
 935 
 936         kpreempt_disable();
 937 
 938         /*
 939          * Determine generation number for given view
 940          */
 941         if (view == LGRP_VIEW_OS)
 942                 /*
 943                  * Return generation number of lgroup hierarchy for OS view
 944                  */
 945                 gen = lgrp_gen;
 946         else {
 947                 /*
 948                  * For caller's view, use generation numbers for lgroup
 949                  * hierarchy and caller's pset
 950                  * NOTE: Caller needs to check for change in pset ID
 951                  */
 952                 cpupart = curthread->t_cpupart;
 953                 ASSERT(cpupart);
 954                 gen = lgrp_gen + cpupart->cp_gen;
 955         }
 956 
 957         kpreempt_enable();
 958 
 959         return (gen);
 960 }
 961 
 962 
 963 lgrp_id_t
 964 lgrp_home_thread(kthread_t *t)
 965 {
 966         lgrp_id_t       home;
 967 
 968         ASSERT(t != NULL);
 969         ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
 970 
 971         thread_lock(t);
 972 
 973         /*
 974          * Check to see whether caller has permission to set affinity for
 975          * thread
 976          */
 977         if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
 978                 thread_unlock(t);
 979                 return (set_errno(EPERM));
 980         }
 981 
 982         home = lgrp_home_id(t);
 983 
 984         thread_unlock(t);
 985         return (home);
 986 }
 987 
 988 
 989 /*
 990  * Get home lgroup of given process or thread
 991  */
 992 lgrp_id_t
 993 lgrp_home_get(idtype_t idtype, id_t id)
 994 {
 995         proc_t          *p;
 996         lgrp_id_t       retval;
 997         kthread_t       *t;
 998 
 999         /*
1000          * Get home lgroup of given LWP or process
1001          */
1002         switch (idtype) {
1003 
1004         case P_LWPID:
1005                 p = curproc;
1006 
1007                 /*
1008                  * Set affinity for thread
1009                  */
1010                 mutex_enter(&p->p_lock);
1011                 if (id == P_MYID) {             /* current thread */
1012                         retval = lgrp_home_thread(curthread);
1013                 } else if (p->p_tlist == NULL) {
1014                         retval = set_errno(ESRCH);
1015                 } else {                        /* other thread */
1016                         int     found = 0;
1017 
1018                         t = p->p_tlist;
1019                         do {
1020                                 if (t->t_tid == id) {
1021                                         retval = lgrp_home_thread(t);
1022                                         found = 1;
1023                                         break;
1024                                 }
1025                         } while ((t = t->t_forw) != p->p_tlist);
1026                         if (!found)
1027                                 retval = set_errno(ESRCH);
1028                 }
1029                 mutex_exit(&p->p_lock);
1030                 break;
1031 
1032         case P_PID:
1033                 /*
1034                  * Get process
1035                  */
1036                 mutex_enter(&pidlock);
1037 
1038                 if (id == P_MYID)
1039                         p = curproc;
1040                 else
1041                         p = prfind(id);
1042 
1043                 if (p == NULL) {
1044                         mutex_exit(&pidlock);
1045                         return (set_errno(ESRCH));
1046                 }
1047 
1048                 mutex_enter(&p->p_lock);
1049                 t = p->p_tlist;
1050                 if (t == NULL)
1051                         retval = set_errno(ESRCH);
1052                 else
1053                         retval = lgrp_home_thread(t);
1054                 mutex_exit(&p->p_lock);
1055 
1056                 mutex_exit(&pidlock);
1057 
1058                 break;
1059 
1060         default:
1061                 retval = set_errno(EINVAL);
1062                 break;
1063         }
1064 
1065         return (retval);
1066 }
1067 
1068 
1069 /*
1070  * Return latency between "from" and "to" lgroups
1071  *
1072  * This latency number can only be used for relative comparison
1073  * between lgroups on the running system, cannot be used across platforms,
1074  * and may not reflect the actual latency.  It is platform and implementation
1075  * specific, so platform gets to decide its value.  It would be nice if the
1076  * number was at least proportional to make comparisons more meaningful though.
1077  */
1078 int
1079 lgrp_latency(lgrp_id_t from, lgrp_id_t to)
1080 {
1081         lgrp_t          *from_lgrp;
1082         int             i;
1083         int             latency;
1084         int             latency_max;
1085         lgrp_t          *to_lgrp;
1086 
1087         ASSERT(MUTEX_HELD(&cpu_lock));
1088 
1089         if (from < 0 || to < 0)
1090                 return (set_errno(EINVAL));
1091 
1092         if (from > lgrp_alloc_max || to > lgrp_alloc_max)
1093                 return (set_errno(ESRCH));
1094 
1095         from_lgrp = lgrp_table[from];
1096         to_lgrp = lgrp_table[to];
1097 
1098         if (!LGRP_EXISTS(from_lgrp) || !LGRP_EXISTS(to_lgrp)) {
1099                 return (set_errno(ESRCH));
1100         }
1101 
1102         /*
1103          * Get latency for same lgroup
1104          */
1105         if (from == to) {
1106                 latency = from_lgrp->lgrp_latency;
1107                 return (latency);
1108         }
1109 
1110         /*
1111          * Get latency between leaf lgroups
1112          */
1113         if (from_lgrp->lgrp_childcnt == 0 && to_lgrp->lgrp_childcnt == 0)
1114                 return (lgrp_plat_latency(from_lgrp->lgrp_plathand,
1115                     to_lgrp->lgrp_plathand));
1116 
1117         /*
1118          * Determine max latency between resources in two lgroups
1119          */
1120         latency_max = 0;
1121         for (i = 0; i <= lgrp_alloc_max; i++) {
1122                 lgrp_t  *from_rsrc;
1123                 int     j;
1124                 lgrp_t  *to_rsrc;
1125 
1126                 from_rsrc = lgrp_table[i];
1127                 if (!LGRP_EXISTS(from_rsrc) ||
1128                     !klgrpset_ismember(from_lgrp->lgrp_set[LGRP_RSRC_CPU], i))
1129                         continue;
1130 
1131                 for (j = 0; j <= lgrp_alloc_max; j++) {
1132                         to_rsrc = lgrp_table[j];
1133                         if (!LGRP_EXISTS(to_rsrc) ||
1134                             klgrpset_ismember(to_lgrp->lgrp_set[LGRP_RSRC_MEM],
1135                             j) == 0)
1136                                 continue;
1137                         latency = lgrp_plat_latency(from_rsrc->lgrp_plathand,
1138                             to_rsrc->lgrp_plathand);
1139                         if (latency > latency_max)
1140                                 latency_max = latency;
1141                 }
1142         }
1143         return (latency_max);
1144 }
1145 
1146 
1147 /*
1148  * Return lgroup interface version number
1149  * 0 - none
1150  * 1 - original
1151  * 2 - lgrp_latency_cookie() and lgrp_resources() added
1152  */
1153 int
1154 lgrp_version(int version)
1155 {
1156         /*
1157          * Return LGRP_VER_NONE when requested version isn't supported
1158          */
1159         if (version < LGRP_VER_NONE || version > LGRP_VER_CURRENT)
1160                 return (LGRP_VER_NONE);
1161 
1162         /*
1163          * Return current version when LGRP_VER_NONE passed in
1164          */
1165         if (version == LGRP_VER_NONE)
1166                 return (LGRP_VER_CURRENT);
1167 
1168         /*
1169          * Otherwise, return supported version.
1170          */
1171         return (version);
1172 }
1173 
1174 
1175 /*
1176  * Snapshot of lgroup hieararchy
1177  *
1178  * One snapshot is kept and is based on the kernel's native data model, so
1179  * a 32-bit snapshot is kept for the 32-bit kernel and a 64-bit one for the
1180  * 64-bit kernel.  If a 32-bit user wants a snapshot from the 64-bit kernel,
1181  * the kernel generates a 32-bit snapshot from the data in its 64-bit snapshot.
1182  *
1183  * The format is defined by lgroup snapshot header and the layout of
1184  * the snapshot in memory is as follows:
1185  * 1) lgroup snapshot header
1186  *    - specifies format of snapshot
1187  *    - defined by lgrp_snapshot_header_t
1188  * 2) lgroup info array
1189  *    - contains information about each lgroup
1190  *    - one element for each lgroup
1191  *    - each element is defined by lgrp_info_t
1192  * 3) lgroup CPU ID array
1193  *    - contains list (array) of CPU IDs for each lgroup
1194  *    - lgrp_info_t points into array and specifies how many CPUs belong to
1195  *      given lgroup
1196  * 4) lgroup parents array
1197  *    - contains lgroup bitmask of parents for each lgroup
1198  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1199  * 5) lgroup children array
1200  *    - contains lgroup bitmask of children for each lgroup
1201  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1202  * 6) lgroup resources array
1203  *    - contains lgroup bitmask of resources for each lgroup
1204  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1205  * 7) lgroup latency table
1206  *    - contains latency from each lgroup to each of other lgroups
1207  *
1208  * NOTE:  Must use nlgrpsmax for per lgroup data structures because lgroups
1209  *        may be sparsely allocated.
1210  */
1211 lgrp_snapshot_header_t  *lgrp_snap = NULL;      /* lgroup snapshot */
1212 static kmutex_t         lgrp_snap_lock;         /* snapshot lock */
1213 
1214 
1215 /*
1216  * Take a snapshot of lgroup hierarchy and return size of buffer
1217  * needed to hold snapshot
1218  */
1219 static int
1220 lgrp_snapshot(void)
1221 {
1222         size_t          bitmask_size;
1223         size_t          bitmasks_size;
1224         size_t          bufsize;
1225         int             cpu_index;
1226         size_t          cpuids_size;
1227         int             i;
1228         int             j;
1229         size_t          info_size;
1230         size_t          lats_size;
1231         ulong_t         *lgrp_children;
1232         processorid_t   *lgrp_cpuids;
1233         lgrp_info_t     *lgrp_info;
1234         int             **lgrp_lats;
1235         ulong_t         *lgrp_parents;
1236         ulong_t         *lgrp_rsets;
1237         ulong_t         *lgrpset;
1238         int             snap_ncpus;
1239         int             snap_nlgrps;
1240         int             snap_nlgrpsmax;
1241         size_t          snap_hdr_size;
1242 #ifdef  _SYSCALL32_IMPL
1243         model_t         model = DATAMODEL_NATIVE;
1244 
1245         /*
1246          * Have up-to-date snapshot, so check to see whether caller is 32-bit
1247          * program and need to return size of 32-bit snapshot now.
1248          */
1249         model = get_udatamodel();
1250         if (model == DATAMODEL_ILP32 && lgrp_snap &&
1251             lgrp_snap->ss_gen == lgrp_gen) {
1252 
1253                 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1254 
1255                 /*
1256                  * Calculate size of buffer needed for 32-bit snapshot,
1257                  * rounding up size of each object to allow for alignment
1258                  * of next object in buffer.
1259                  */
1260                 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1261                     sizeof (caddr32_t));
1262                 info_size =
1263                     P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1264                     sizeof (processorid_t));
1265                 cpuids_size =
1266                     P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1267                     sizeof (ulong_t));
1268 
1269                 /*
1270                  * lgroup bitmasks needed for parents, children, and resources
1271                  * for each lgroup and pset lgroup set
1272                  */
1273                 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1274                 bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1275                     snap_nlgrpsmax) + 1) * bitmask_size;
1276 
1277                 /*
1278                  * Size of latency table and buffer
1279                  */
1280                 lats_size = snap_nlgrpsmax * sizeof (caddr32_t) +
1281                     snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1282 
1283                 bufsize = snap_hdr_size + info_size + cpuids_size +
1284                     bitmasks_size + lats_size;
1285                 return (bufsize);
1286         }
1287 #endif  /* _SYSCALL32_IMPL */
1288 
1289         /*
1290          * Check whether snapshot is up-to-date
1291          * Free it and take another one if not
1292          */
1293         if (lgrp_snap) {
1294                 if (lgrp_snap->ss_gen == lgrp_gen)
1295                         return (lgrp_snap->ss_size);
1296 
1297                 kmem_free(lgrp_snap, lgrp_snap->ss_size);
1298                 lgrp_snap = NULL;
1299         }
1300 
1301         /*
1302          * Allocate memory for snapshot
1303          * w/o holding cpu_lock while waiting for memory
1304          */
1305         while (lgrp_snap == NULL) {
1306                 int     old_generation;
1307 
1308                 /*
1309                  * Take snapshot of lgroup generation number
1310                  * and configuration size dependent information
1311                  * NOTE: Only count number of online CPUs,
1312                  * since only online CPUs appear in lgroups.
1313                  */
1314                 mutex_enter(&cpu_lock);
1315                 old_generation = lgrp_gen;
1316                 snap_ncpus = ncpus_online;
1317                 snap_nlgrps = nlgrps;
1318                 snap_nlgrpsmax = nlgrpsmax;
1319                 mutex_exit(&cpu_lock);
1320 
1321                 /*
1322                  * Calculate size of buffer needed for snapshot,
1323                  * rounding up size of each object to allow for alignment
1324                  * of next object in buffer.
1325                  */
1326                 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1327                     sizeof (void *));
1328                 info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1329                     sizeof (processorid_t));
1330                 cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1331                     sizeof (ulong_t));
1332                 /*
1333                  * lgroup bitmasks needed for pset lgroup set and  parents,
1334                  * children, and resource sets for each lgroup
1335                  */
1336                 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1337                 bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1338                     snap_nlgrpsmax) + 1) * bitmask_size;
1339 
1340                 /*
1341                  * Size of latency table and buffer
1342                  */
1343                 lats_size = snap_nlgrpsmax * sizeof (int *) +
1344                     snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1345 
1346                 bufsize = snap_hdr_size + info_size + cpuids_size +
1347                     bitmasks_size + lats_size;
1348 
1349                 /*
1350                  * Allocate memory for buffer
1351                  */
1352                 lgrp_snap = kmem_zalloc(bufsize, KM_NOSLEEP);
1353                 if (lgrp_snap == NULL)
1354                         return (set_errno(ENOMEM));
1355 
1356                 /*
1357                  * Check whether generation number has changed
1358                  */
1359                 mutex_enter(&cpu_lock);
1360                 if (lgrp_gen == old_generation)
1361                         break;          /* hasn't change, so done. */
1362 
1363                 /*
1364                  * Generation number changed, so free memory and try again.
1365                  */
1366                 mutex_exit(&cpu_lock);
1367                 kmem_free(lgrp_snap, bufsize);
1368                 lgrp_snap = NULL;
1369         }
1370 
1371         /*
1372          * Fill in lgroup snapshot header
1373          * (including pointers to tables of lgroup info, CPU IDs, and parents
1374          * and children)
1375          */
1376         lgrp_snap->ss_version = LGRP_VER_CURRENT;
1377 
1378         /*
1379          * XXX For now, liblgrp only needs to know whether the hierarchy
1380          * XXX only has one level or not
1381          */
1382         if (snap_nlgrps == 1)
1383                 lgrp_snap->ss_levels = 1;
1384         else
1385                 lgrp_snap->ss_levels = 2;
1386 
1387         lgrp_snap->ss_root = LGRP_ROOTID;
1388 
1389         lgrp_snap->ss_nlgrps = lgrp_snap->ss_nlgrps_os = snap_nlgrps;
1390         lgrp_snap->ss_nlgrps_max = snap_nlgrpsmax;
1391         lgrp_snap->ss_ncpus = snap_ncpus;
1392         lgrp_snap->ss_gen = lgrp_gen;
1393         lgrp_snap->ss_view = LGRP_VIEW_OS;
1394         lgrp_snap->ss_pset = 0;              /* NOTE: caller should set if needed */
1395         lgrp_snap->ss_size = bufsize;
1396         lgrp_snap->ss_magic = (uintptr_t)lgrp_snap;
1397 
1398         lgrp_snap->ss_info = lgrp_info =
1399             (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1400 
1401         lgrp_snap->ss_cpuids = lgrp_cpuids =
1402             (processorid_t *)((uintptr_t)lgrp_info + info_size);
1403 
1404         lgrp_snap->ss_lgrpset = lgrpset =
1405             (ulong_t *)((uintptr_t)lgrp_cpuids + cpuids_size);
1406 
1407         lgrp_snap->ss_parents = lgrp_parents =
1408             (ulong_t *)((uintptr_t)lgrpset + bitmask_size);
1409 
1410         lgrp_snap->ss_children = lgrp_children =
1411             (ulong_t *)((uintptr_t)lgrp_parents + (snap_nlgrpsmax *
1412             bitmask_size));
1413 
1414         lgrp_snap->ss_rsets = lgrp_rsets =
1415             (ulong_t *)((uintptr_t)lgrp_children + (snap_nlgrpsmax *
1416             bitmask_size));
1417 
1418         lgrp_snap->ss_latencies = lgrp_lats =
1419             (int **)((uintptr_t)lgrp_rsets + (LGRP_RSRC_COUNT *
1420             snap_nlgrpsmax * bitmask_size));
1421 
1422         /*
1423          * Fill in lgroup information
1424          */
1425         cpu_index = 0;
1426         for (i = 0; i < snap_nlgrpsmax; i++) {
1427                 struct cpu      *cp;
1428                 int             cpu_count;
1429                 struct cpu      *head;
1430                 int             k;
1431                 lgrp_t          *lgrp;
1432 
1433                 lgrp = lgrp_table[i];
1434                 if (!LGRP_EXISTS(lgrp)) {
1435                         bzero(&lgrp_info[i], sizeof (lgrp_info[i]));
1436                         lgrp_info[i].info_lgrpid = LGRP_NONE;
1437                         continue;
1438                 }
1439 
1440                 lgrp_info[i].info_lgrpid = i;
1441                 lgrp_info[i].info_latency = lgrp->lgrp_latency;
1442 
1443                 /*
1444                  * Fill in parents, children, and lgroup resources
1445                  */
1446                 lgrp_info[i].info_parents =
1447                     (ulong_t *)((uintptr_t)lgrp_parents + (i * bitmask_size));
1448 
1449                 if (lgrp->lgrp_parent)
1450                         BT_SET(lgrp_info[i].info_parents,
1451                             lgrp->lgrp_parent->lgrp_id);
1452 
1453                 lgrp_info[i].info_children =
1454                     (ulong_t *)((uintptr_t)lgrp_children + (i * bitmask_size));
1455 
1456                 for (j = 0; j < snap_nlgrpsmax; j++)
1457                         if (klgrpset_ismember(lgrp->lgrp_children, j))
1458                                 BT_SET(lgrp_info[i].info_children, j);
1459 
1460                 lgrp_info[i].info_rset =
1461                     (ulong_t *)((uintptr_t)lgrp_rsets +
1462                     (i * LGRP_RSRC_COUNT * bitmask_size));
1463 
1464                 for (j = 0; j < LGRP_RSRC_COUNT; j++) {
1465                         ulong_t *rset;
1466 
1467                         rset = (ulong_t *)((uintptr_t)lgrp_info[i].info_rset +
1468                             (j * bitmask_size));
1469                         for (k = 0; k < snap_nlgrpsmax; k++)
1470                                 if (klgrpset_ismember(lgrp->lgrp_set[j], k))
1471                                         BT_SET(rset, k);
1472                 }
1473 
1474                 /*
1475                  * Fill in CPU IDs
1476                  */
1477                 cpu_count = 0;
1478                 lgrp_info[i].info_cpuids = NULL;
1479                 cp = head = lgrp->lgrp_cpu;
1480                 if (head != NULL) {
1481                         lgrp_info[i].info_cpuids = &lgrp_cpuids[cpu_index];
1482                         do {
1483                                 lgrp_cpuids[cpu_index] = cp->cpu_id;
1484                                 cpu_index++;
1485                                 cpu_count++;
1486                                 cp = cp->cpu_next_lgrp;
1487                         } while (cp != head);
1488                 }
1489                 ASSERT(cpu_count == lgrp->lgrp_cpucnt);
1490                 lgrp_info[i].info_ncpus = cpu_count;
1491 
1492                 /*
1493                  * Fill in memory sizes for lgroups that directly contain
1494                  * memory
1495                  */
1496                 if (klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], i)) {
1497                         lgrp_info[i].info_mem_free =
1498                             lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1499                         lgrp_info[i].info_mem_install =
1500                             lgrp_mem_size(i, LGRP_MEM_SIZE_INSTALL);
1501                 }
1502 
1503                 /*
1504                  * Fill in latency table and buffer
1505                  */
1506                 lgrp_lats[i] = (int *)((uintptr_t)lgrp_lats + snap_nlgrpsmax *
1507                     sizeof (int *) + i * snap_nlgrpsmax * sizeof (int));
1508                 for (j = 0; j < snap_nlgrpsmax; j++) {
1509                         lgrp_t  *to;
1510 
1511                         to = lgrp_table[j];
1512                         if (!LGRP_EXISTS(to))
1513                                 continue;
1514                         lgrp_lats[i][j] = lgrp_latency(lgrp->lgrp_id,
1515                             to->lgrp_id);
1516                 }
1517         }
1518         ASSERT(cpu_index == snap_ncpus);
1519 
1520 
1521         mutex_exit(&cpu_lock);
1522 
1523 #ifdef  _SYSCALL32_IMPL
1524         /*
1525          * Check to see whether caller is 32-bit program and need to return
1526          * size of 32-bit snapshot now that snapshot has been taken/updated.
1527          * May not have been able to do this earlier if snapshot was out of
1528          * date or didn't exist yet.
1529          */
1530         if (model == DATAMODEL_ILP32) {
1531 
1532                 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1533 
1534                 /*
1535                  * Calculate size of buffer needed for 32-bit snapshot,
1536                  * rounding up size of each object to allow for alignment
1537                  * of next object in buffer.
1538                  */
1539                 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1540                     sizeof (caddr32_t));
1541                 info_size =
1542                     P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1543                     sizeof (processorid_t));
1544                 cpuids_size =
1545                     P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1546                     sizeof (ulong_t));
1547 
1548                 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1549                 bitmasks_size = (((2 + LGRP_RSRC_COUNT) * snap_nlgrpsmax) +
1550                     1) * bitmask_size;
1551 
1552 
1553                 /*
1554                  * Size of latency table and buffer
1555                  */
1556                 lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1557                     (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1558 
1559                 bufsize = snap_hdr_size + info_size + cpuids_size +
1560                     bitmasks_size + lats_size;
1561                 return (bufsize);
1562         }
1563 #endif  /* _SYSCALL32_IMPL */
1564 
1565         return (lgrp_snap->ss_size);
1566 }
1567 
1568 
1569 /*
1570  * Copy snapshot into given user buffer, fix up any pointers in buffer to point
1571  * into user instead of kernel address space, and return size of buffer
1572  * needed to hold snapshot
1573  */
1574 static int
1575 lgrp_snapshot_copy(char *buf, size_t bufsize)
1576 {
1577         size_t                  bitmask_size;
1578         int                     cpu_index;
1579         size_t                  cpuids_size;
1580         int                     i;
1581         size_t                  info_size;
1582         lgrp_info_t             *lgrp_info;
1583         int                     retval;
1584         size_t                  snap_hdr_size;
1585         int                     snap_ncpus;
1586         int                     snap_nlgrpsmax;
1587         lgrp_snapshot_header_t  *user_snap;
1588         lgrp_info_t             *user_info;
1589         lgrp_info_t             *user_info_buffer;
1590         processorid_t           *user_cpuids;
1591         ulong_t                 *user_lgrpset;
1592         ulong_t                 *user_parents;
1593         ulong_t                 *user_children;
1594         int                     **user_lats;
1595         int                     **user_lats_buffer;
1596         ulong_t                 *user_rsets;
1597 
1598         if (lgrp_snap == NULL)
1599                 return (0);
1600 
1601         if (buf == NULL || bufsize <= 0)
1602                 return (lgrp_snap->ss_size);
1603 
1604         /*
1605          * User needs to try getting size of buffer again
1606          * because given buffer size is too small.
1607          * The lgroup hierarchy may have changed after they asked for the size
1608          * but before the snapshot was taken.
1609          */
1610         if (bufsize < lgrp_snap->ss_size)
1611                 return (set_errno(EAGAIN));
1612 
1613         snap_ncpus = lgrp_snap->ss_ncpus;
1614         snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1615 
1616         /*
1617          * Fill in lgrpset now because caller may have change psets
1618          */
1619         kpreempt_disable();
1620         for (i = 0; i < snap_nlgrpsmax; i++) {
1621                 if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1622                     i)) {
1623                         BT_SET(lgrp_snap->ss_lgrpset, i);
1624                 }
1625         }
1626         kpreempt_enable();
1627 
1628         /*
1629          * Copy lgroup snapshot (snapshot header, lgroup info, and CPU IDs)
1630          * into user buffer all at once
1631          */
1632         if (copyout(lgrp_snap, buf, lgrp_snap->ss_size) != 0)
1633                 return (set_errno(EFAULT));
1634 
1635         /*
1636          * Round up sizes of lgroup snapshot header and info for alignment
1637          */
1638         snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1639             sizeof (void *));
1640         info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1641             sizeof (processorid_t));
1642         cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1643             sizeof (ulong_t));
1644 
1645         bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1646 
1647         /*
1648          * Calculate pointers into user buffer for lgroup snapshot header,
1649          * info, and CPU IDs
1650          */
1651         user_snap = (lgrp_snapshot_header_t *)buf;
1652         user_info = (lgrp_info_t *)((uintptr_t)user_snap + snap_hdr_size);
1653         user_cpuids = (processorid_t *)((uintptr_t)user_info + info_size);
1654         user_lgrpset = (ulong_t *)((uintptr_t)user_cpuids + cpuids_size);
1655         user_parents = (ulong_t *)((uintptr_t)user_lgrpset + bitmask_size);
1656         user_children = (ulong_t *)((uintptr_t)user_parents +
1657             (snap_nlgrpsmax * bitmask_size));
1658         user_rsets = (ulong_t *)((uintptr_t)user_children +
1659             (snap_nlgrpsmax * bitmask_size));
1660         user_lats = (int **)((uintptr_t)user_rsets +
1661             (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size));
1662 
1663         /*
1664          * Copyout magic number (ie. pointer to beginning of buffer)
1665          */
1666         if (copyout(&buf, &user_snap->ss_magic, sizeof (buf)) != 0)
1667                 return (set_errno(EFAULT));
1668 
1669         /*
1670          * Fix up pointers in user buffer to point into user buffer
1671          * not kernel snapshot
1672          */
1673         if (copyout(&user_info, &user_snap->ss_info, sizeof (user_info)) != 0)
1674                 return (set_errno(EFAULT));
1675 
1676         if (copyout(&user_cpuids, &user_snap->ss_cpuids,
1677             sizeof (user_cpuids)) != 0)
1678                 return (set_errno(EFAULT));
1679 
1680         if (copyout(&user_lgrpset, &user_snap->ss_lgrpset,
1681             sizeof (user_lgrpset)) != 0)
1682                 return (set_errno(EFAULT));
1683 
1684         if (copyout(&user_parents, &user_snap->ss_parents,
1685             sizeof (user_parents)) != 0)
1686                 return (set_errno(EFAULT));
1687 
1688         if (copyout(&user_children, &user_snap->ss_children,
1689             sizeof (user_children)) != 0)
1690                 return (set_errno(EFAULT));
1691 
1692         if (copyout(&user_rsets, &user_snap->ss_rsets,
1693             sizeof (user_rsets)) != 0)
1694                 return (set_errno(EFAULT));
1695 
1696         if (copyout(&user_lats, &user_snap->ss_latencies,
1697             sizeof (user_lats)) != 0)
1698                 return (set_errno(EFAULT));
1699 
1700         /*
1701          * Make copies of lgroup info and latency table, fix up pointers,
1702          * and then copy them into user buffer
1703          */
1704         user_info_buffer = kmem_zalloc(info_size, KM_NOSLEEP);
1705         if (user_info_buffer == NULL)
1706                 return (set_errno(ENOMEM));
1707 
1708         user_lats_buffer = kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1709             KM_NOSLEEP);
1710         if (user_lats_buffer == NULL) {
1711                 kmem_free(user_info_buffer, info_size);
1712                 return (set_errno(ENOMEM));
1713         }
1714 
1715         lgrp_info = (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1716         bcopy(lgrp_info, user_info_buffer, info_size);
1717 
1718         cpu_index = 0;
1719         for (i = 0; i < snap_nlgrpsmax; i++) {
1720                 ulong_t *snap_rset;
1721 
1722                 /*
1723                  * Skip non-existent lgroups
1724                  */
1725                 if (user_info_buffer[i].info_lgrpid == LGRP_NONE)
1726                         continue;
1727 
1728                 /*
1729                  * Update free memory size since it changes frequently
1730                  * Only do so for lgroups directly containing memory
1731                  *
1732                  * NOTE: This must be done before changing the pointers to
1733                  *       point into user space since we need to dereference
1734                  *       lgroup resource set
1735                  */
1736                 snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
1737                     BT_BITOUL(snap_nlgrpsmax)];
1738                 if (BT_TEST(snap_rset, i))
1739                         user_info_buffer[i].info_mem_free =
1740                             lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1741 
1742                 /*
1743                  * Fix up pointers to parents, children, resources, and
1744                  * latencies
1745                  */
1746                 user_info_buffer[i].info_parents =
1747                     (ulong_t *)((uintptr_t)user_parents + (i * bitmask_size));
1748                 user_info_buffer[i].info_children =
1749                     (ulong_t *)((uintptr_t)user_children + (i * bitmask_size));
1750                 user_info_buffer[i].info_rset =
1751                     (ulong_t *)((uintptr_t)user_rsets +
1752                     (i * LGRP_RSRC_COUNT * bitmask_size));
1753                 user_lats_buffer[i] = (int *)((uintptr_t)user_lats +
1754                     (snap_nlgrpsmax * sizeof (int *)) + (i * snap_nlgrpsmax *
1755                     sizeof (int)));
1756 
1757                 /*
1758                  * Fix up pointer to CPU IDs
1759                  */
1760                 if (user_info_buffer[i].info_ncpus == 0) {
1761                         user_info_buffer[i].info_cpuids = NULL;
1762                         continue;
1763                 }
1764                 user_info_buffer[i].info_cpuids = &user_cpuids[cpu_index];
1765                 cpu_index += user_info_buffer[i].info_ncpus;
1766         }
1767         ASSERT(cpu_index == snap_ncpus);
1768 
1769         /*
1770          * Copy lgroup info and latency table with pointers fixed up to point
1771          * into user buffer out to user buffer now
1772          */
1773         retval = lgrp_snap->ss_size;
1774         if (copyout(user_info_buffer, user_info, info_size) != 0)
1775                 retval = set_errno(EFAULT);
1776         kmem_free(user_info_buffer, info_size);
1777 
1778         if (copyout(user_lats_buffer, user_lats, snap_nlgrpsmax *
1779             sizeof (int *)) != 0)
1780                 retval = set_errno(EFAULT);
1781         kmem_free(user_lats_buffer, snap_nlgrpsmax * sizeof (int *));
1782 
1783         return (retval);
1784 }
1785 
1786 
1787 #ifdef  _SYSCALL32_IMPL
1788 /*
1789  * Make 32-bit copy of snapshot, fix up any pointers in buffer to point
1790  * into user instead of kernel address space, copy 32-bit snapshot into
1791  * given user buffer, and return size of buffer needed to hold snapshot
1792  */
1793 static int
1794 lgrp_snapshot_copy32(caddr32_t buf, size32_t bufsize)
1795 {
1796         size32_t                        bitmask_size;
1797         size32_t                        bitmasks_size;
1798         size32_t                        children_size;
1799         int                             cpu_index;
1800         size32_t                        cpuids_size;
1801         int                             i;
1802         int                             j;
1803         size32_t                        info_size;
1804         size32_t                        lats_size;
1805         lgrp_info_t                     *lgrp_info;
1806         lgrp_snapshot_header32_t        *lgrp_snap32;
1807         lgrp_info32_t                   *lgrp_info32;
1808         processorid_t                   *lgrp_cpuids32;
1809         caddr32_t                       *lgrp_lats32;
1810         int                             **lgrp_lats32_kernel;
1811         uint_t                          *lgrp_set32;
1812         uint_t                          *lgrp_parents32;
1813         uint_t                          *lgrp_children32;
1814         uint_t                          *lgrp_rsets32;
1815         size32_t                        parents_size;
1816         size32_t                        rsets_size;
1817         size32_t                        set_size;
1818         size32_t                        snap_hdr_size;
1819         int                             snap_ncpus;
1820         int                             snap_nlgrpsmax;
1821         size32_t                        snap_size;
1822 
1823         if (lgrp_snap == NULL)
1824                 return (0);
1825 
1826         snap_ncpus = lgrp_snap->ss_ncpus;
1827         snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1828 
1829         /*
1830          * Calculate size of buffer needed for 32-bit snapshot,
1831          * rounding up size of each object to allow for alignment
1832          * of next object in buffer.
1833          */
1834         snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1835             sizeof (caddr32_t));
1836         info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1837             sizeof (processorid_t));
1838         cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1839             sizeof (ulong_t));
1840 
1841         bitmask_size = BT_SIZEOFMAP32(snap_nlgrpsmax);
1842 
1843         set_size = bitmask_size;
1844         parents_size = snap_nlgrpsmax * bitmask_size;
1845         children_size = snap_nlgrpsmax * bitmask_size;
1846         rsets_size = P2ROUNDUP(LGRP_RSRC_COUNT * snap_nlgrpsmax *
1847             (int)bitmask_size, sizeof (caddr32_t));
1848 
1849         bitmasks_size = set_size + parents_size + children_size + rsets_size;
1850 
1851         /*
1852          * Size of latency table and buffer
1853          */
1854         lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1855             (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1856 
1857         snap_size = snap_hdr_size + info_size + cpuids_size + bitmasks_size +
1858             lats_size;
1859 
1860         if (buf == NULL || bufsize <= 0) {
1861                 return (snap_size);
1862         }
1863 
1864         /*
1865          * User needs to try getting size of buffer again
1866          * because given buffer size is too small.
1867          * The lgroup hierarchy may have changed after they asked for the size
1868          * but before the snapshot was taken.
1869          */
1870         if (bufsize < snap_size)
1871                 return (set_errno(EAGAIN));
1872 
1873         /*
1874          * Make 32-bit copy of snapshot, fix up pointers to point into user
1875          * buffer not kernel, and then copy whole thing into user buffer
1876          */
1877         lgrp_snap32 = kmem_zalloc(snap_size, KM_NOSLEEP);
1878         if (lgrp_snap32 == NULL)
1879                 return (set_errno(ENOMEM));
1880 
1881         /*
1882          * Calculate pointers into 32-bit copy of snapshot
1883          * for lgroup info, CPU IDs, pset lgroup bitmask, parents, children,
1884          * resources, and latency table and buffer
1885          */
1886         lgrp_info32 = (lgrp_info32_t *)((uintptr_t)lgrp_snap32 +
1887             snap_hdr_size);
1888         lgrp_cpuids32 = (processorid_t *)((uintptr_t)lgrp_info32 + info_size);
1889         lgrp_set32 = (uint_t *)((uintptr_t)lgrp_cpuids32 + cpuids_size);
1890         lgrp_parents32 = (uint_t *)((uintptr_t)lgrp_set32 + set_size);
1891         lgrp_children32 = (uint_t *)((uintptr_t)lgrp_parents32 + parents_size);
1892         lgrp_rsets32 = (uint_t *)((uintptr_t)lgrp_children32 + children_size);
1893         lgrp_lats32 = (caddr32_t *)((uintptr_t)lgrp_rsets32 + rsets_size);
1894 
1895         /*
1896          * Make temporary lgroup latency table of pointers for kernel to use
1897          * to fill in rows of table with latencies from each lgroup
1898          */
1899         lgrp_lats32_kernel =  kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1900             KM_NOSLEEP);
1901         if (lgrp_lats32_kernel == NULL) {
1902                 kmem_free(lgrp_snap32, snap_size);
1903                 return (set_errno(ENOMEM));
1904         }
1905 
1906         /*
1907          * Fill in 32-bit lgroup snapshot header
1908          * (with pointers into user's buffer for lgroup info, CPU IDs,
1909          * bit masks, and latencies)
1910          */
1911         lgrp_snap32->ss_version = lgrp_snap->ss_version;
1912         lgrp_snap32->ss_levels = lgrp_snap->ss_levels;
1913         lgrp_snap32->ss_nlgrps = lgrp_snap32->ss_nlgrps_os =
1914             lgrp_snap->ss_nlgrps;
1915         lgrp_snap32->ss_nlgrps_max = snap_nlgrpsmax;
1916         lgrp_snap32->ss_root = lgrp_snap->ss_root;
1917         lgrp_snap32->ss_ncpus = lgrp_snap->ss_ncpus;
1918         lgrp_snap32->ss_gen = lgrp_snap->ss_gen;
1919         lgrp_snap32->ss_view = LGRP_VIEW_OS;
1920         lgrp_snap32->ss_size = snap_size;
1921         lgrp_snap32->ss_magic = buf;
1922         lgrp_snap32->ss_info = buf + snap_hdr_size;
1923         lgrp_snap32->ss_cpuids = lgrp_snap32->ss_info + info_size;
1924         lgrp_snap32->ss_lgrpset = lgrp_snap32->ss_cpuids + cpuids_size;
1925         lgrp_snap32->ss_parents = lgrp_snap32->ss_lgrpset + bitmask_size;
1926         lgrp_snap32->ss_children = lgrp_snap32->ss_parents +
1927             (snap_nlgrpsmax * bitmask_size);
1928         lgrp_snap32->ss_rsets = lgrp_snap32->ss_children +
1929             (snap_nlgrpsmax * bitmask_size);
1930         lgrp_snap32->ss_latencies = lgrp_snap32->ss_rsets +
1931             (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size);
1932 
1933         /*
1934          * Fill in lgrpset now because caller may have change psets
1935          */
1936         kpreempt_disable();
1937         for (i = 0; i < snap_nlgrpsmax; i++) {
1938                 if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1939                     i)) {
1940                         BT_SET32(lgrp_set32, i);
1941                 }
1942         }
1943         kpreempt_enable();
1944 
1945         /*
1946          * Fill in 32-bit copy of lgroup info and fix up pointers
1947          * to point into user's buffer instead of kernel's
1948          */
1949         cpu_index = 0;
1950         lgrp_info = lgrp_snap->ss_info;
1951         for (i = 0; i < snap_nlgrpsmax; i++) {
1952                 uint_t  *children;
1953                 uint_t  *lgrp_rset;
1954                 uint_t  *parents;
1955                 ulong_t *snap_rset;
1956 
1957                 /*
1958                  * Skip non-existent lgroups
1959                  */
1960                 if (lgrp_info[i].info_lgrpid == LGRP_NONE) {
1961                         bzero(&lgrp_info32[i], sizeof (lgrp_info32[i]));
1962                         lgrp_info32[i].info_lgrpid = LGRP_NONE;
1963                         continue;
1964                 }
1965 
1966                 /*
1967                  * Fill in parents, children, lgroup resource set, and
1968                  * latencies from snapshot
1969                  */
1970                 parents = (uint_t *)((uintptr_t)lgrp_parents32 +
1971                     i * bitmask_size);
1972                 children = (uint_t *)((uintptr_t)lgrp_children32 +
1973                     i * bitmask_size);
1974                 snap_rset = (ulong_t *)((uintptr_t)lgrp_snap->ss_rsets +
1975                     (i * LGRP_RSRC_COUNT * BT_SIZEOFMAP(snap_nlgrpsmax)));
1976                 lgrp_rset = (uint_t *)((uintptr_t)lgrp_rsets32 +
1977                     (i * LGRP_RSRC_COUNT * bitmask_size));
1978                 lgrp_lats32_kernel[i] = (int *)((uintptr_t)lgrp_lats32 +
1979                     snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
1980                     sizeof (int));
1981                 for (j = 0; j < snap_nlgrpsmax; j++) {
1982                         int     k;
1983                         uint_t  *rset;
1984 
1985                         if (BT_TEST(&lgrp_snap->ss_parents[i], j))
1986                                 BT_SET32(parents, j);
1987 
1988                         if (BT_TEST(&lgrp_snap->ss_children[i], j))
1989                                 BT_SET32(children, j);
1990 
1991                         for (k = 0; k < LGRP_RSRC_COUNT; k++) {
1992                                 rset = (uint_t *)((uintptr_t)lgrp_rset +
1993                                     k * bitmask_size);
1994                                 if (BT_TEST(&snap_rset[k], j))
1995                                         BT_SET32(rset, j);
1996                         }
1997 
1998                         lgrp_lats32_kernel[i][j] =
1999                             lgrp_snap->ss_latencies[i][j];
2000                 }
2001 
2002                 /*
2003                  * Fix up pointer to latency buffer
2004                  */
2005                 lgrp_lats32[i] = lgrp_snap32->ss_latencies +
2006                     snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
2007                     sizeof (int);
2008 
2009                 /*
2010                  * Fix up pointers for parents, children, and resources
2011                  */
2012                 lgrp_info32[i].info_parents = lgrp_snap32->ss_parents +
2013                     (i * bitmask_size);
2014                 lgrp_info32[i].info_children = lgrp_snap32->ss_children +
2015                     (i * bitmask_size);
2016                 lgrp_info32[i].info_rset = lgrp_snap32->ss_rsets +
2017                     (i * LGRP_RSRC_COUNT * bitmask_size);
2018 
2019                 /*
2020                  * Fill in memory and CPU info
2021                  * Only fill in memory for lgroups directly containing memory
2022                  */
2023                 snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
2024                     BT_BITOUL(snap_nlgrpsmax)];
2025                 if (BT_TEST(snap_rset, i)) {
2026                         lgrp_info32[i].info_mem_free = lgrp_mem_size(i,
2027                             LGRP_MEM_SIZE_FREE);
2028                         lgrp_info32[i].info_mem_install =
2029                             lgrp_info[i].info_mem_install;
2030                 }
2031 
2032                 lgrp_info32[i].info_ncpus = lgrp_info[i].info_ncpus;
2033 
2034                 lgrp_info32[i].info_lgrpid = lgrp_info[i].info_lgrpid;
2035                 lgrp_info32[i].info_latency = lgrp_info[i].info_latency;
2036 
2037                 if (lgrp_info32[i].info_ncpus == 0) {
2038                         lgrp_info32[i].info_cpuids = 0;
2039                         continue;
2040                 }
2041 
2042                 /*
2043                  * Fix up pointer for CPU IDs
2044                  */
2045                 lgrp_info32[i].info_cpuids = lgrp_snap32->ss_cpuids +
2046                     (cpu_index * sizeof (processorid_t));
2047                 cpu_index += lgrp_info32[i].info_ncpus;
2048         }
2049         ASSERT(cpu_index == snap_ncpus);
2050 
2051         /*
2052          * Copy lgroup CPU IDs into 32-bit snapshot
2053          * before copying it out into user's buffer
2054          */
2055         bcopy(lgrp_snap->ss_cpuids, lgrp_cpuids32, cpuids_size);
2056 
2057         /*
2058          * Copy 32-bit lgroup snapshot into user's buffer all at once
2059          */
2060         if (copyout(lgrp_snap32, (void *)(uintptr_t)buf, snap_size) != 0) {
2061                 kmem_free(lgrp_snap32, snap_size);
2062                 kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2063                 return (set_errno(EFAULT));
2064         }
2065 
2066         kmem_free(lgrp_snap32, snap_size);
2067         kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2068 
2069         return (snap_size);
2070 }
2071 #endif  /* _SYSCALL32_IMPL */
2072 
2073 
2074 int
2075 lgrpsys(int subcode, long ia, void *ap)
2076 {
2077         size_t  bufsize;
2078         int     latency;
2079 
2080         switch (subcode) {
2081 
2082         case LGRP_SYS_AFFINITY_GET:
2083                 return (lgrp_affinity_get((lgrp_affinity_args_t *)ap));
2084 
2085         case LGRP_SYS_AFFINITY_SET:
2086                 return (lgrp_affinity_set((lgrp_affinity_args_t *)ap));
2087 
2088         case LGRP_SYS_GENERATION:
2089                 return (lgrp_generation(ia));
2090 
2091         case LGRP_SYS_HOME:
2092                 return (lgrp_home_get((idtype_t)ia, (id_t)(uintptr_t)ap));
2093 
2094         case LGRP_SYS_LATENCY:
2095                 mutex_enter(&cpu_lock);
2096                 latency = lgrp_latency(ia, (lgrp_id_t)(uintptr_t)ap);
2097                 mutex_exit(&cpu_lock);
2098                 return (latency);
2099 
2100         case LGRP_SYS_MEMINFO:
2101                 return (meminfo(ia, (struct meminfo *)ap));
2102 
2103         case LGRP_SYS_VERSION:
2104                 return (lgrp_version(ia));
2105 
2106         case LGRP_SYS_SNAPSHOT:
2107                 mutex_enter(&lgrp_snap_lock);
2108                 bufsize = lgrp_snapshot();
2109                 if (ap && ia > 0) {
2110                         if (get_udatamodel() == DATAMODEL_NATIVE)
2111                                 bufsize = lgrp_snapshot_copy(ap, ia);
2112 #ifdef  _SYSCALL32_IMPL
2113                         else
2114                                 bufsize = lgrp_snapshot_copy32(
2115                                     (caddr32_t)(uintptr_t)ap, ia);
2116 #endif  /* _SYSCALL32_IMPL */
2117                 }
2118                 mutex_exit(&lgrp_snap_lock);
2119                 return (bufsize);
2120 
2121         default:
2122                 break;
2123 
2124         }
2125 
2126         return (set_errno(EINVAL));
2127 }