1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/systm.h>
  27 #include <sys/platform_module.h>
  28 #include <sys/sysmacros.h>
  29 #include <sys/atomic.h>
  30 #include <sys/memlist.h>
  31 #include <sys/memnode.h>
  32 #include <vm/vm_dep.h>
  33 
  34 int max_mem_nodes = 1;          /* max memory nodes on this system */
  35 
  36 struct mem_node_conf mem_node_config[MAX_MEM_NODES];
  37 int mem_node_pfn_shift;
  38 /*
  39  * num_memnodes should be updated atomically and always >=
  40  * the number of bits in memnodes_mask or the algorithm may fail.
  41  */
  42 uint16_t num_memnodes;
  43 mnodeset_t memnodes_mask; /* assumes 8*(sizeof(mnodeset_t)) >= MAX_MEM_NODES */
  44 
  45 /*
  46  * If set, mem_node_physalign should be a power of two, and
  47  * should reflect the minimum address alignment of each node.
  48  */
  49 uint64_t mem_node_physalign;
  50 
  51 /*
  52  * Platform hooks we will need.
  53  */
  54 
  55 #pragma weak plat_build_mem_nodes
  56 #pragma weak plat_slice_add
  57 #pragma weak plat_slice_del
  58 
  59 /*
  60  * Adjust the memnode config after a DR operation.
  61  *
  62  * It is rather tricky to do these updates since we can't
  63  * protect the memnode structures with locks, so we must
  64  * be mindful of the order in which updates and reads to
  65  * these values can occur.
  66  */
  67 void
  68 mem_node_add_slice(pfn_t start, pfn_t end)
  69 {
  70         int mnode;
  71 
  72         /*
  73          * DR will pass us the first pfn that is allocatable.
  74          * We need to round down to get the real start of
  75          * the slice.
  76          */
  77         if (mem_node_physalign) {
  78                 start &= ~(btop(mem_node_physalign) - 1);
  79                 end = roundup(end, btop(mem_node_physalign)) - 1;
  80         }
  81 
  82         mnode = PFN_2_MEM_NODE(start);
  83         ASSERT(mnode < max_mem_nodes);
  84 
  85         if (atomic_cas_32((uint32_t *)&mem_node_config[mnode].exists, 0, 1)) {
  86                 /*
  87                  * Add slice to existing node.
  88                  */
  89                 if (start < mem_node_config[mnode].physbase)
  90                         mem_node_config[mnode].physbase = start;
  91                 if (end > mem_node_config[mnode].physmax)
  92                         mem_node_config[mnode].physmax = end;
  93         } else {
  94                 mem_node_config[mnode].physbase = start;
  95                 mem_node_config[mnode].physmax = end;
  96                 atomic_inc_16(&num_memnodes);
  97                 atomic_or_64(&memnodes_mask, 1ull << mnode);
  98         }
  99         /*
 100          * Let the common lgrp framework know about the new memory
 101          */
 102         lgrp_config(LGRP_CONFIG_MEM_ADD, mnode, MEM_NODE_2_LGRPHAND(mnode));
 103 }
 104 
 105 /*
 106  * Remove a PFN range from a memnode.  On some platforms,
 107  * the memnode will be created with physbase at the first
 108  * allocatable PFN, but later deleted with the MC slice
 109  * base address converted to a PFN, in which case we need
 110  * to assume physbase and up.
 111  */
 112 void
 113 mem_node_del_slice(pfn_t start, pfn_t end)
 114 {
 115         int mnode;
 116         pgcnt_t delta_pgcnt, node_size;
 117 
 118         if (mem_node_physalign) {
 119                 start &= ~(btop(mem_node_physalign) - 1);
 120                 end = roundup(end, btop(mem_node_physalign)) - 1;
 121         }
 122         mnode = PFN_2_MEM_NODE(start);
 123 
 124         ASSERT(mnode < max_mem_nodes);
 125         ASSERT(mem_node_config[mnode].exists == 1);
 126 
 127         delta_pgcnt = end - start;
 128         node_size = mem_node_config[mnode].physmax -
 129             mem_node_config[mnode].physbase;
 130 
 131         if (node_size > delta_pgcnt) {
 132                 /*
 133                  * Subtract the slice from the memnode.
 134                  */
 135                 if (start <= mem_node_config[mnode].physbase)
 136                         mem_node_config[mnode].physbase = end + 1;
 137                 ASSERT(end <= mem_node_config[mnode].physmax);
 138                 if (end == mem_node_config[mnode].physmax)
 139                         mem_node_config[mnode].physmax = start - 1;
 140         } else {
 141 
 142                 /*
 143                  * Let the common lgrp framework know the mnode is
 144                  * leaving
 145                  */
 146                 lgrp_config(LGRP_CONFIG_MEM_DEL, mnode,
 147                     MEM_NODE_2_LGRPHAND(mnode));
 148 
 149                 /*
 150                  * Delete the whole node.
 151                  */
 152                 ASSERT(MNODE_PGCNT(mnode) == 0);
 153                 atomic_and_64(&memnodes_mask, ~(1ull << mnode));
 154                 atomic_dec_16(&num_memnodes);
 155                 mem_node_config[mnode].exists = 0;
 156         }
 157 }
 158 
 159 void
 160 mem_node_add_range(pfn_t start, pfn_t end)
 161 {
 162         if (&plat_slice_add != NULL)
 163                 plat_slice_add(start, end);
 164         else
 165                 mem_node_add_slice(start, end);
 166 }
 167 
 168 void
 169 mem_node_del_range(pfn_t start, pfn_t end)
 170 {
 171         if (&plat_slice_del != NULL)
 172                 plat_slice_del(start, end);
 173         else
 174                 mem_node_del_slice(start, end);
 175 }
 176 
 177 void
 178 startup_build_mem_nodes(prom_memlist_t *list, size_t nelems)
 179 {
 180         size_t  elem;
 181         pfn_t   basepfn;
 182         pgcnt_t npgs;
 183 
 184         /* LINTED: ASSERT will always true or false */
 185         ASSERT(NBBY * sizeof (mnodeset_t) >= max_mem_nodes);
 186 
 187         if (&plat_build_mem_nodes != NULL) {
 188                 plat_build_mem_nodes(list, nelems);
 189         } else {
 190                 /*
 191                  * Boot install lists are arranged <addr, len>, ...
 192                  */
 193                 for (elem = 0; elem < nelems; list++, elem++) {
 194                         basepfn = btop(list->addr);
 195                         npgs = btop(list->size);
 196                         mem_node_add_range(basepfn, basepfn + npgs - 1);
 197                 }
 198         }
 199 }
 200 
 201 /*
 202  * Allocate an unassigned memnode.
 203  */
 204 int
 205 mem_node_alloc()
 206 {
 207         int mnode;
 208 
 209         /*
 210          * Find an unused memnode.  Update it atomically to prevent
 211          * a first time memnode creation race.
 212          */
 213         for (mnode = 0; mnode < max_mem_nodes; mnode++)
 214                 if (atomic_cas_32((uint32_t *)&mem_node_config[mnode].exists,
 215                     0, 1) == 0)
 216                         break;
 217 
 218         if (mnode >= max_mem_nodes)
 219                         panic("Out of free memnodes\n");
 220 
 221         mem_node_config[mnode].physbase = (uint64_t)-1;
 222         mem_node_config[mnode].physmax = 0;
 223         atomic_inc_16(&num_memnodes);
 224         atomic_or_64(&memnodes_mask, 1ull << mnode);
 225 
 226         return (mnode);
 227 }
 228 
 229 /*
 230  * Find the intersection between a memnode and a memlist
 231  * and returns the number of pages that overlap.
 232  *
 233  * Grab the memlist lock to protect the list from DR operations.
 234  */
 235 pgcnt_t
 236 mem_node_memlist_pages(int mnode, struct memlist *mlist)
 237 {
 238         pfn_t           base, end;
 239         pfn_t           cur_base, cur_end;
 240         pgcnt_t         npgs = 0;
 241         pgcnt_t         pages;
 242         struct memlist  *pmem;
 243 
 244         if (&plat_mem_node_intersect_range != NULL) {
 245                 memlist_read_lock();
 246 
 247                 for (pmem = mlist; pmem; pmem = pmem->ml_next) {
 248                         plat_mem_node_intersect_range(btop(pmem->ml_address),
 249                             btop(pmem->ml_size), mnode, &pages);
 250                         npgs += pages;
 251                 }
 252 
 253                 memlist_read_unlock();
 254                 return (npgs);
 255         }
 256 
 257         base = mem_node_config[mnode].physbase;
 258         end = mem_node_config[mnode].physmax;
 259 
 260         memlist_read_lock();
 261 
 262         for (pmem = mlist; pmem; pmem = pmem->ml_next) {
 263                 cur_base = btop(pmem->ml_address);
 264                 cur_end = cur_base + btop(pmem->ml_size) - 1;
 265                 if (end < cur_base || base > cur_end)
 266                         continue;
 267                 npgs = npgs + (MIN(cur_end, end) -
 268                     MAX(cur_base, base)) + 1;
 269         }
 270 
 271         memlist_read_unlock();
 272 
 273         return (npgs);
 274 }
 275 
 276 /*
 277  * Find MIN(physbase) and MAX(physmax) over all mnodes
 278  *
 279  * Called during startup and DR to find hpm_counters limits when
 280  * interleaved_mnodes is set.
 281  * NOTE: there is a race condition with DR if it tries to change more than
 282  * one mnode in parallel. Sizing shared hpm_counters depends on finding the
 283  * min(physbase) and max(physmax) across all mnodes. Therefore, the caller of
 284  * page_ctrs_adjust must ensure that mem_node_config does not change while it
 285  * is running.
 286  */
 287 void
 288 mem_node_max_range(pfn_t *basep, pfn_t *maxp)
 289 {
 290         int mnode;
 291         pfn_t max = 0;
 292         pfn_t base = (pfn_t)-1;
 293 
 294         for (mnode = 0; mnode < max_mem_nodes; mnode++) {
 295                 if (mem_node_config[mnode].exists == 0)
 296                         continue;
 297                 if (max < mem_node_config[mnode].physmax)
 298                         max = mem_node_config[mnode].physmax;
 299                 if (base > mem_node_config[mnode].physbase)
 300                         base = mem_node_config[mnode].physbase;
 301         }
 302         ASSERT(base != (pfn_t)-1 && max != 0);
 303         *basep = base;
 304         *maxp = max;
 305 }