combined Wdiff usr/src/uts/common/vm/vm_as.c

Print this page

const-ify make segment ops structures
There is no reason to keep the segment ops structures writable.
use NULL getmemid segop as a shorthand for ENODEV
Instead of forcing every segment driver to implement a dummy function to
return (hopefully) ENODEV, handle NULL getmemid segop function pointer as
"return ENODEV" shorthand.
seg_inherit_notsup is redundant since segop_inherit checks for NULL properly
patch lower-case-segops
remove whole-process swapping
Long before Unix supported paging, it used process swapping to reclaim
memory.  The code is there and in theory it runs when we get *extremely* low
on memory.  In practice, it never runs since the definition of low-on-memory
is antiquated. (XXX: define what antiquated means)
You can check the number of swapout/swapin events with kstats:
$ kstat -p ::vm:swapin ::vm:swapout
remove xhat
The xhat infrastructure was added to support hardware such as the zulu
graphics card - hardware which had on-board MMUs.  The VM used the xhat code
to keep the CPU's and Zulu's page tables in-sync.  Since the only xhat user
was zulu (which is gone), we can safely remove it simplifying the whole VM
subsystem.
Assorted notes:
- AS_BUSY flag was used solely by xhat

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/vm/vm_as.c
          +++ new/usr/src/uts/common/vm/vm_as.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   * Copyright 2015, Joyent, Inc.  All rights reserved.
  25   25   */
  26   26  
  27   27  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  28   28  /*        All Rights Reserved   */
  29   29  
  30   30  /*
  31   31   * University Copyright- Copyright (c) 1982, 1986, 1988
  32   32   * The Regents of the University of California
  33   33   * All Rights Reserved
  34   34   *
  35   35   * University Acknowledgment- Portions of this document are derived from
  36   36   * software developed by the University of California, Berkeley, and its
  37   37   * contributors.
  38   38   */
  39   39  
  40   40  /*
  41   41   * VM - address spaces.
  42   42   */
  43   43  
  44   44  #include <sys/types.h>
  45   45  #include <sys/t_lock.h>
  46   46  #include <sys/param.h>
  47   47  #include <sys/errno.h>
  48   48  #include <sys/systm.h>
  49   49  #include <sys/mman.h>
  50   50  #include <sys/sysmacros.h>
  51   51  #include <sys/cpuvar.h>

↓ open down ↓

51 lines elided

↑ open up ↑

  52   52  #include <sys/sysinfo.h>
  53   53  #include <sys/kmem.h>
  54   54  #include <sys/vnode.h>
  55   55  #include <sys/vmsystm.h>
  56   56  #include <sys/cmn_err.h>
  57   57  #include <sys/debug.h>
  58   58  #include <sys/tnf_probe.h>
  59   59  #include <sys/vtrace.h>
  60   60  
  61   61  #include <vm/hat.h>
  62      -#include <vm/xhat.h>
  63   62  #include <vm/as.h>
  64   63  #include <vm/seg.h>
  65   64  #include <vm/seg_vn.h>
  66   65  #include <vm/seg_dev.h>
  67   66  #include <vm/seg_kmem.h>
  68   67  #include <vm/seg_map.h>
  69   68  #include <vm/seg_spt.h>
  70   69  #include <vm/page.h>
  71   70  
  72   71  clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */

  73   72  
  74   73  static struct kmem_cache *as_cache;
  75   74  
  76   75  static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t);
  77   76  static void as_clearwatchprot(struct as *, caddr_t, size_t);
  78   77  int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *);
  79   78  
  80   79  
  81   80  /*
  82   81   * Verifying the segment lists is very time-consuming; it may not be
  83   82   * desirable always to define VERIFY_SEGLIST when DEBUG is set.
  84   83   */
  85   84  #ifdef DEBUG
  86   85  #define VERIFY_SEGLIST
  87   86  int do_as_verify = 0;
  88   87  #endif
  89   88  
  90   89  /*
  91   90   * Allocate a new callback data structure entry and fill in the events of
  92   91   * interest, the address range of interest, and the callback argument.
  93   92   * Link the entry on the as->a_callbacks list. A callback entry for the
  94   93   * entire address space may be specified with vaddr = 0 and size = -1.
  95   94   *
  96   95   * CALLERS RESPONSIBILITY: If not calling from within the process context for
  97   96   * the specified as, the caller must guarantee persistence of the specified as
  98   97   * for the duration of this function (eg. pages being locked within the as
  99   98   * will guarantee persistence).
 100   99   */
 101  100  int
 102  101  as_add_callback(struct as *as, void (*cb_func)(), void *arg, uint_t events,
 103  102                  caddr_t vaddr, size_t size, int sleepflag)
 104  103  {
 105  104          struct as_callback      *current_head, *cb;
 106  105          caddr_t                 saddr;
 107  106          size_t                  rsize;
 108  107  
 109  108          /* callback function and an event are mandatory */
 110  109          if ((cb_func == NULL) || ((events & AS_ALL_EVENT) == 0))
 111  110                  return (EINVAL);
 112  111  
 113  112          /* Adding a callback after as_free has been called is not allowed */
 114  113          if (as == &kas)
 115  114                  return (ENOMEM);
 116  115  
 117  116          /*
 118  117           * vaddr = 0 and size = -1 is used to indicate that the callback range
 119  118           * is the entire address space so no rounding is done in that case.
 120  119           */
 121  120          if (size != -1) {
 122  121                  saddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
 123  122                  rsize = (((size_t)(vaddr + size) + PAGEOFFSET) & PAGEMASK) -
 124  123                      (size_t)saddr;
 125  124                  /* check for wraparound */
 126  125                  if (saddr + rsize < saddr)
 127  126                          return (ENOMEM);
 128  127          } else {
 129  128                  if (vaddr != 0)
 130  129                          return (EINVAL);
 131  130                  saddr = vaddr;
 132  131                  rsize = size;
 133  132          }
 134  133  
 135  134          /* Allocate and initialize a callback entry */
 136  135          cb = kmem_zalloc(sizeof (struct as_callback), sleepflag);
 137  136          if (cb == NULL)
 138  137                  return (EAGAIN);
 139  138  
 140  139          cb->ascb_func = cb_func;
 141  140          cb->ascb_arg = arg;
 142  141          cb->ascb_events = events;
 143  142          cb->ascb_saddr = saddr;
 144  143          cb->ascb_len = rsize;
 145  144  
 146  145          /* Add the entry to the list */
 147  146          mutex_enter(&as->a_contents);
 148  147          current_head = as->a_callbacks;
 149  148          as->a_callbacks = cb;
 150  149          cb->ascb_next = current_head;
 151  150  
 152  151          /*
 153  152           * The call to this function may lose in a race with
 154  153           * a pertinent event - eg. a thread does long term memory locking
 155  154           * but before the callback is added another thread executes as_unmap.
 156  155           * A broadcast here resolves that.
 157  156           */
 158  157          if ((cb->ascb_events & AS_UNMAPWAIT_EVENT) && AS_ISUNMAPWAIT(as)) {
 159  158                  AS_CLRUNMAPWAIT(as);
 160  159                  cv_broadcast(&as->a_cv);
 161  160          }
 162  161  
 163  162          mutex_exit(&as->a_contents);
 164  163          return (0);
 165  164  }
 166  165  
 167  166  /*
 168  167   * Search the callback list for an entry which pertains to arg.
 169  168   *
 170  169   * This is called from within the client upon completion of the callback.
 171  170   * RETURN VALUES:
 172  171   *      AS_CALLBACK_DELETED  (callback entry found and deleted)
 173  172   *      AS_CALLBACK_NOTFOUND (no callback entry found - this is ok)
 174  173   *      AS_CALLBACK_DELETE_DEFERRED (callback is in process, delete of this
 175  174   *                      entry will be made in as_do_callbacks)
 176  175   *
 177  176   * If as_delete_callback encounters a matching entry with AS_CALLBACK_CALLED
 178  177   * set, it indicates that as_do_callbacks is processing this entry.  The
 179  178   * AS_ALL_EVENT events are cleared in the entry, and a broadcast is made
 180  179   * to unblock as_do_callbacks, in case it is blocked.
 181  180   *
 182  181   * CALLERS RESPONSIBILITY: If not calling from within the process context for
 183  182   * the specified as, the caller must guarantee persistence of the specified as
 184  183   * for the duration of this function (eg. pages being locked within the as
 185  184   * will guarantee persistence).
 186  185   */
 187  186  uint_t
 188  187  as_delete_callback(struct as *as, void *arg)
 189  188  {
 190  189          struct as_callback **prevcb = &as->a_callbacks;
 191  190          struct as_callback *cb;
 192  191          uint_t rc = AS_CALLBACK_NOTFOUND;
 193  192  
 194  193          mutex_enter(&as->a_contents);
 195  194          for (cb = as->a_callbacks; cb; prevcb = &cb->ascb_next, cb = *prevcb) {
 196  195                  if (cb->ascb_arg != arg)
 197  196                          continue;
 198  197  
 199  198                  /*
 200  199                   * If the events indicate AS_CALLBACK_CALLED, just clear
 201  200                   * AS_ALL_EVENT in the events field and wakeup the thread
 202  201                   * that may be waiting in as_do_callbacks.  as_do_callbacks
 203  202                   * will take care of removing this entry from the list.  In
 204  203                   * that case, return AS_CALLBACK_DELETE_DEFERRED.  Otherwise
 205  204                   * (AS_CALLBACK_CALLED not set), just remove it from the
 206  205                   * list, return the memory and return AS_CALLBACK_DELETED.
 207  206                   */
 208  207                  if ((cb->ascb_events & AS_CALLBACK_CALLED) != 0) {
 209  208                          /* leave AS_CALLBACK_CALLED */
 210  209                          cb->ascb_events &= ~AS_ALL_EVENT;
 211  210                          rc = AS_CALLBACK_DELETE_DEFERRED;
 212  211                          cv_broadcast(&as->a_cv);
 213  212                  } else {
 214  213                          *prevcb = cb->ascb_next;
 215  214                          kmem_free(cb, sizeof (struct as_callback));
 216  215                          rc = AS_CALLBACK_DELETED;
 217  216                  }
 218  217                  break;
 219  218          }
 220  219          mutex_exit(&as->a_contents);
 221  220          return (rc);
 222  221  }
 223  222  
 224  223  /*
 225  224   * Searches the as callback list for a matching entry.
 226  225   * Returns a pointer to the first matching callback, or NULL if
 227  226   * nothing is found.
 228  227   * This function never sleeps so it is ok to call it with more
 229  228   * locks held but the (required) a_contents mutex.
 230  229   *
 231  230   * See also comment on as_do_callbacks below.
 232  231   */
 233  232  static struct as_callback *
 234  233  as_find_callback(struct as *as, uint_t events, caddr_t event_addr,
 235  234                          size_t event_len)
 236  235  {
 237  236          struct as_callback      *cb;
 238  237  
 239  238          ASSERT(MUTEX_HELD(&as->a_contents));
 240  239          for (cb = as->a_callbacks; cb != NULL; cb = cb->ascb_next) {
 241  240                  /*
 242  241                   * If the callback has not already been called, then
 243  242                   * check if events or address range pertains.  An event_len
 244  243                   * of zero means do an unconditional callback.
 245  244                   */
 246  245                  if (((cb->ascb_events & AS_CALLBACK_CALLED) != 0) ||
 247  246                      ((event_len != 0) && (((cb->ascb_events & events) == 0) ||
 248  247                      (event_addr + event_len < cb->ascb_saddr) ||
 249  248                      (event_addr > (cb->ascb_saddr + cb->ascb_len))))) {
 250  249                          continue;
 251  250                  }
 252  251                  break;
 253  252          }
 254  253          return (cb);
 255  254  }
 256  255  
 257  256  /*
 258  257   * Executes a given callback and removes it from the callback list for
 259  258   * this address space.
 260  259   * This function may sleep so the caller must drop all locks except
 261  260   * a_contents before calling this func.
 262  261   *
 263  262   * See also comments on as_do_callbacks below.
 264  263   */
 265  264  static void
 266  265  as_execute_callback(struct as *as, struct as_callback *cb,
 267  266                                  uint_t events)
 268  267  {
 269  268          struct as_callback **prevcb;
 270  269          void    *cb_arg;
 271  270  
 272  271          ASSERT(MUTEX_HELD(&as->a_contents) && (cb->ascb_events & events));
 273  272          cb->ascb_events |= AS_CALLBACK_CALLED;
 274  273          mutex_exit(&as->a_contents);
 275  274          (*cb->ascb_func)(as, cb->ascb_arg, events);
 276  275          mutex_enter(&as->a_contents);
 277  276          /*
 278  277           * the callback function is required to delete the callback
 279  278           * when the callback function determines it is OK for
 280  279           * this thread to continue. as_delete_callback will clear
 281  280           * the AS_ALL_EVENT in the events field when it is deleted.
 282  281           * If the callback function called as_delete_callback,
 283  282           * events will already be cleared and there will be no blocking.
 284  283           */
 285  284          while ((cb->ascb_events & events) != 0) {
 286  285                  cv_wait(&as->a_cv, &as->a_contents);
 287  286          }
 288  287          /*
 289  288           * This entry needs to be taken off the list. Normally, the
 290  289           * callback func itself does that, but unfortunately the list
 291  290           * may have changed while the callback was running because the
 292  291           * a_contents mutex was dropped and someone else other than the
 293  292           * callback func itself could have called as_delete_callback,
 294  293           * so we have to search to find this entry again.  The entry
 295  294           * must have AS_CALLBACK_CALLED, and have the same 'arg'.
 296  295           */
 297  296          cb_arg = cb->ascb_arg;
 298  297          prevcb = &as->a_callbacks;
 299  298          for (cb = as->a_callbacks; cb != NULL;
 300  299              prevcb = &cb->ascb_next, cb = *prevcb) {
 301  300                  if (((cb->ascb_events & AS_CALLBACK_CALLED) == 0) ||
 302  301                      (cb_arg != cb->ascb_arg)) {
 303  302                          continue;
 304  303                  }
 305  304                  *prevcb = cb->ascb_next;
 306  305                  kmem_free(cb, sizeof (struct as_callback));
 307  306                  break;
 308  307          }
 309  308  }
 310  309  
 311  310  /*
 312  311   * Check the callback list for a matching event and intersection of
 313  312   * address range. If there is a match invoke the callback.  Skip an entry if:
 314  313   *    - a callback is already in progress for this entry (AS_CALLBACK_CALLED)
 315  314   *    - not event of interest
 316  315   *    - not address range of interest
 317  316   *
 318  317   * An event_len of zero indicates a request for an unconditional callback
 319  318   * (regardless of event), only the AS_CALLBACK_CALLED is checked.  The
 320  319   * a_contents lock must be dropped before a callback, so only one callback
 321  320   * can be done before returning. Return -1 (true) if a callback was
 322  321   * executed and removed from the list, else return 0 (false).
 323  322   *
 324  323   * The logically separate parts, i.e. finding a matching callback and
 325  324   * executing a given callback have been separated into two functions
 326  325   * so that they can be called with different sets of locks held beyond
 327  326   * the always-required a_contents. as_find_callback does not sleep so
 328  327   * it is ok to call it if more locks than a_contents (i.e. the a_lock
 329  328   * rwlock) are held. as_execute_callback on the other hand may sleep
 330  329   * so all locks beyond a_contents must be dropped by the caller if one
 331  330   * does not want to end comatose.
 332  331   */
 333  332  static int
 334  333  as_do_callbacks(struct as *as, uint_t events, caddr_t event_addr,
 335  334                          size_t event_len)
 336  335  {
 337  336          struct as_callback *cb;
 338  337  
 339  338          if ((cb = as_find_callback(as, events, event_addr, event_len))) {
 340  339                  as_execute_callback(as, cb, events);
 341  340                  return (-1);
 342  341          }
 343  342          return (0);
 344  343  }
 345  344  
 346  345  /*
 347  346   * Search for the segment containing addr. If a segment containing addr
 348  347   * exists, that segment is returned.  If no such segment exists, and
 349  348   * the list spans addresses greater than addr, then the first segment
 350  349   * whose base is greater than addr is returned; otherwise, NULL is
 351  350   * returned unless tail is true, in which case the last element of the
 352  351   * list is returned.
 353  352   *
 354  353   * a_seglast is used to cache the last found segment for repeated
 355  354   * searches to the same addr (which happens frequently).
 356  355   */
 357  356  struct seg *
 358  357  as_findseg(struct as *as, caddr_t addr, int tail)
 359  358  {
 360  359          struct seg *seg = as->a_seglast;
 361  360          avl_index_t where;
 362  361  
 363  362          ASSERT(AS_LOCK_HELD(as, &as->a_lock));
 364  363  
 365  364          if (seg != NULL &&
 366  365              seg->s_base <= addr &&
 367  366              addr < seg->s_base + seg->s_size)
 368  367                  return (seg);
 369  368  
 370  369          seg = avl_find(&as->a_segtree, &addr, &where);
 371  370          if (seg != NULL)
 372  371                  return (as->a_seglast = seg);
 373  372  
 374  373          seg = avl_nearest(&as->a_segtree, where, AVL_AFTER);
 375  374          if (seg == NULL && tail)
 376  375                  seg = avl_last(&as->a_segtree);
 377  376          return (as->a_seglast = seg);
 378  377  }
 379  378  
 380  379  #ifdef VERIFY_SEGLIST
 381  380  /*
 382  381   * verify that the linked list is coherent
 383  382   */
 384  383  static void
 385  384  as_verify(struct as *as)
 386  385  {
 387  386          struct seg *seg, *seglast, *p, *n;
 388  387          uint_t nsegs = 0;
 389  388  
 390  389          if (do_as_verify == 0)
 391  390                  return;
 392  391  
 393  392          seglast = as->a_seglast;
 394  393  
 395  394          for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
 396  395                  ASSERT(seg->s_as == as);
 397  396                  p = AS_SEGPREV(as, seg);
 398  397                  n = AS_SEGNEXT(as, seg);
 399  398                  ASSERT(p == NULL || p->s_as == as);
 400  399                  ASSERT(p == NULL || p->s_base < seg->s_base);
 401  400                  ASSERT(n == NULL || n->s_base > seg->s_base);
 402  401                  ASSERT(n != NULL || seg == avl_last(&as->a_segtree));
 403  402                  if (seg == seglast)
 404  403                          seglast = NULL;
 405  404                  nsegs++;
 406  405          }
 407  406          ASSERT(seglast == NULL);
 408  407          ASSERT(avl_numnodes(&as->a_segtree) == nsegs);
 409  408  }
 410  409  #endif /* VERIFY_SEGLIST */
 411  410  
 412  411  /*
 413  412   * Add a new segment to the address space. The avl_find()
 414  413   * may be expensive so we attempt to use last segment accessed
 415  414   * in as_gap() as an insertion point.
 416  415   */
 417  416  int
 418  417  as_addseg(struct as  *as, struct seg *newseg)
 419  418  {
 420  419          struct seg *seg;
 421  420          caddr_t addr;
 422  421          caddr_t eaddr;
 423  422          avl_index_t where;
 424  423  
 425  424          ASSERT(AS_WRITE_HELD(as, &as->a_lock));
 426  425  
 427  426          as->a_updatedir = 1;    /* inform /proc */
 428  427          gethrestime(&as->a_updatetime);
 429  428  
 430  429          if (as->a_lastgaphl != NULL) {
 431  430                  struct seg *hseg = NULL;
 432  431                  struct seg *lseg = NULL;
 433  432  
 434  433                  if (as->a_lastgaphl->s_base > newseg->s_base) {
 435  434                          hseg = as->a_lastgaphl;
 436  435                          lseg = AVL_PREV(&as->a_segtree, hseg);
 437  436                  } else {
 438  437                          lseg = as->a_lastgaphl;
 439  438                          hseg = AVL_NEXT(&as->a_segtree, lseg);
 440  439                  }
 441  440  
 442  441                  if (hseg && lseg && lseg->s_base < newseg->s_base &&
 443  442                      hseg->s_base > newseg->s_base) {
 444  443                          avl_insert_here(&as->a_segtree, newseg, lseg,
 445  444                              AVL_AFTER);
 446  445                          as->a_lastgaphl = NULL;
 447  446                          as->a_seglast = newseg;
 448  447                          return (0);
 449  448                  }
 450  449                  as->a_lastgaphl = NULL;
 451  450          }
 452  451  
 453  452          addr = newseg->s_base;
 454  453          eaddr = addr + newseg->s_size;
 455  454  again:
 456  455  
 457  456          seg = avl_find(&as->a_segtree, &addr, &where);
 458  457  
 459  458          if (seg == NULL)
 460  459                  seg = avl_nearest(&as->a_segtree, where, AVL_AFTER);
 461  460  
 462  461          if (seg == NULL)
 463  462                  seg = avl_last(&as->a_segtree);
 464  463  
 465  464          if (seg != NULL) {
 466  465                  caddr_t base = seg->s_base;

↓ open down ↓

394 lines elided

↑ open up ↑

 467  466  
 468  467                  /*
 469  468                   * If top of seg is below the requested address, then
 470  469                   * the insertion point is at the end of the linked list,
 471  470                   * and seg points to the tail of the list.  Otherwise,
 472  471                   * the insertion point is immediately before seg.
 473  472                   */
 474  473                  if (base + seg->s_size > addr) {
 475  474                          if (addr >= base || eaddr > base) {
 476  475  #ifdef __sparc
 477      -                                extern struct seg_ops segnf_ops;
      476 +                                extern const struct seg_ops segnf_ops;
 478  477  
 479  478                                  /*
 480  479                                   * no-fault segs must disappear if overlaid.
 481  480                                   * XXX need new segment type so
 482  481                                   * we don't have to check s_ops
 483  482                                   */
 484  483                                  if (seg->s_ops == &segnf_ops) {
 485  484                                          seg_unmap(seg);
 486  485                                          goto again;
 487  486                                  }

 488  487  #endif
 489  488                                  return (-1);    /* overlapping segment */
 490  489                          }
 491  490                  }
 492  491          }
 493  492          as->a_seglast = newseg;
 494  493          avl_insert(&as->a_segtree, newseg, where);
 495  494  
 496  495  #ifdef VERIFY_SEGLIST
 497  496          as_verify(as);
 498  497  #endif
 499  498          return (0);
 500  499  }
 501  500  
 502  501  struct seg *
 503  502  as_removeseg(struct as *as, struct seg *seg)
 504  503  {
 505  504          avl_tree_t *t;
 506  505  
 507  506          ASSERT(AS_WRITE_HELD(as, &as->a_lock));
 508  507  
 509  508          as->a_updatedir = 1;    /* inform /proc */
 510  509          gethrestime(&as->a_updatetime);
 511  510  
 512  511          if (seg == NULL)
 513  512                  return (NULL);
 514  513  
 515  514          t = &as->a_segtree;
 516  515          if (as->a_seglast == seg)
 517  516                  as->a_seglast = NULL;
 518  517          as->a_lastgaphl = NULL;
 519  518  
 520  519          /*
 521  520           * if this segment is at an address higher than
 522  521           * a_lastgap, set a_lastgap to the next segment (NULL if last segment)
 523  522           */
 524  523          if (as->a_lastgap &&
 525  524              (seg == as->a_lastgap || seg->s_base > as->a_lastgap->s_base))
 526  525                  as->a_lastgap = AVL_NEXT(t, seg);
 527  526  
 528  527          /*
 529  528           * remove the segment from the seg tree
 530  529           */
 531  530          avl_remove(t, seg);
 532  531  
 533  532  #ifdef VERIFY_SEGLIST
 534  533          as_verify(as);
 535  534  #endif
 536  535          return (seg);
 537  536  }
 538  537  
 539  538  /*
 540  539   * Find a segment containing addr.
 541  540   */
 542  541  struct seg *
 543  542  as_segat(struct as *as, caddr_t addr)
 544  543  {
 545  544          struct seg *seg = as->a_seglast;
 546  545  
 547  546          ASSERT(AS_LOCK_HELD(as, &as->a_lock));
 548  547  
 549  548          if (seg != NULL && seg->s_base <= addr &&
 550  549              addr < seg->s_base + seg->s_size)
 551  550                  return (seg);
 552  551  
 553  552          seg = avl_find(&as->a_segtree, &addr, NULL);
 554  553          return (seg);
 555  554  }
 556  555  
 557  556  /*
 558  557   * Serialize all searches for holes in an address space to
 559  558   * prevent two or more threads from allocating the same virtual
 560  559   * address range.  The address space must not be "read/write"
 561  560   * locked by the caller since we may block.
 562  561   */
 563  562  void
 564  563  as_rangelock(struct as *as)
 565  564  {
 566  565          mutex_enter(&as->a_contents);
 567  566          while (AS_ISCLAIMGAP(as))
 568  567                  cv_wait(&as->a_cv, &as->a_contents);
 569  568          AS_SETCLAIMGAP(as);
 570  569          mutex_exit(&as->a_contents);
 571  570  }
 572  571  
 573  572  /*
 574  573   * Release hold on a_state & AS_CLAIMGAP and signal any other blocked threads.
 575  574   */
 576  575  void
 577  576  as_rangeunlock(struct as *as)
 578  577  {
 579  578          mutex_enter(&as->a_contents);
 580  579          AS_CLRCLAIMGAP(as);
 581  580          cv_signal(&as->a_cv);
 582  581          mutex_exit(&as->a_contents);
 583  582  }
 584  583  
 585  584  /*
 586  585   * compar segments (or just an address) by segment address range
 587  586   */
 588  587  static int
 589  588  as_segcompar(const void *x, const void *y)
 590  589  {
 591  590          struct seg *a = (struct seg *)x;
 592  591          struct seg *b = (struct seg *)y;
 593  592  
 594  593          if (a->s_base < b->s_base)
 595  594                  return (-1);
 596  595          if (a->s_base >= b->s_base + b->s_size)
 597  596                  return (1);
 598  597          return (0);
 599  598  }
 600  599  
 601  600  
 602  601  void
 603  602  as_avlinit(struct as *as)
 604  603  {
 605  604          avl_create(&as->a_segtree, as_segcompar, sizeof (struct seg),
 606  605              offsetof(struct seg, s_tree));
 607  606          avl_create(&as->a_wpage, wp_compare, sizeof (struct watched_page),
 608  607              offsetof(struct watched_page, wp_link));
 609  608  }
 610  609  
 611  610  /*ARGSUSED*/
 612  611  static int
 613  612  as_constructor(void *buf, void *cdrarg, int kmflags)
 614  613  {
 615  614          struct as *as = buf;
 616  615  
 617  616          mutex_init(&as->a_contents, NULL, MUTEX_DEFAULT, NULL);
 618  617          cv_init(&as->a_cv, NULL, CV_DEFAULT, NULL);
 619  618          rw_init(&as->a_lock, NULL, RW_DEFAULT, NULL);
 620  619          as_avlinit(as);
 621  620          return (0);
 622  621  }
 623  622  
 624  623  /*ARGSUSED1*/
 625  624  static void
 626  625  as_destructor(void *buf, void *cdrarg)
 627  626  {
 628  627          struct as *as = buf;
 629  628  
 630  629          avl_destroy(&as->a_segtree);
 631  630          mutex_destroy(&as->a_contents);
 632  631          cv_destroy(&as->a_cv);
 633  632          rw_destroy(&as->a_lock);
 634  633  }
 635  634  
 636  635  void
 637  636  as_init(void)
 638  637  {
 639  638          as_cache = kmem_cache_create("as_cache", sizeof (struct as), 0,
 640  639              as_constructor, as_destructor, NULL, NULL, NULL, 0);
 641  640  }
 642  641  
 643  642  /*
 644  643   * Allocate and initialize an address space data structure.
 645  644   * We call hat_alloc to allow any machine dependent
 646  645   * information in the hat structure to be initialized.
 647  646   */
 648  647  struct as *
 649  648  as_alloc(void)
 650  649  {
 651  650          struct as *as;
 652  651  
 653  652          as = kmem_cache_alloc(as_cache, KM_SLEEP);
 654  653  
 655  654          as->a_flags             = 0;
 656  655          as->a_vbits             = 0;
 657  656          as->a_hrm               = NULL;
 658  657          as->a_seglast           = NULL;
 659  658          as->a_size              = 0;
 660  659          as->a_resvsize          = 0;
 661  660          as->a_updatedir         = 0;
 662  661          gethrestime(&as->a_updatetime);
 663  662          as->a_objectdir         = NULL;

↓ open down ↓

176 lines elided

↑ open up ↑

 664  663          as->a_sizedir           = 0;
 665  664          as->a_userlimit         = (caddr_t)USERLIMIT;
 666  665          as->a_lastgap           = NULL;
 667  666          as->a_lastgaphl         = NULL;
 668  667          as->a_callbacks         = NULL;
 669  668  
 670  669          AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 671  670          as->a_hat = hat_alloc(as);      /* create hat for default system mmu */
 672  671          AS_LOCK_EXIT(as, &as->a_lock);
 673  672  
 674      -        as->a_xhat = NULL;
 675      -
 676  673          return (as);
 677  674  }
 678  675  
 679  676  /*
 680  677   * Free an address space data structure.
 681  678   * Need to free the hat first and then
 682  679   * all the segments on this as and finally
 683  680   * the space for the as struct itself.
 684  681   */
 685  682  void
 686  683  as_free(struct as *as)
 687  684  {
 688  685          struct hat *hat = as->a_hat;
 689  686          struct seg *seg, *next;
 690      -        int called = 0;
      687 +        boolean_t free_started = B_FALSE;
 691  688  
 692  689  top:
 693  690          /*
 694  691           * Invoke ALL callbacks. as_do_callbacks will do one callback
 695  692           * per call, and not return (-1) until the callback has completed.
 696  693           * When as_do_callbacks returns zero, all callbacks have completed.
 697  694           */
 698  695          mutex_enter(&as->a_contents);
 699  696          while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0))
 700  697                  ;
 701  698  
 702      -        /* This will prevent new XHATs from attaching to as */
 703      -        if (!called)
 704      -                AS_SETBUSY(as);
 705  699          mutex_exit(&as->a_contents);
 706  700          AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 707  701  
 708      -        if (!called) {
 709      -                called = 1;
      702 +        if (!free_started) {
      703 +                free_started = B_TRUE;
 710  704                  hat_free_start(hat);
 711      -                if (as->a_xhat != NULL)
 712      -                        xhat_free_start_all(as);
 713  705          }
 714  706          for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) {
 715  707                  int err;
 716  708  
 717  709                  next = AS_SEGNEXT(as, seg);
 718  710  retry:
 719      -                err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
      711 +                err = segop_unmap(seg, seg->s_base, seg->s_size);
 720  712                  if (err == EAGAIN) {
 721  713                          mutex_enter(&as->a_contents);
 722  714                          if (as->a_callbacks) {
 723  715                                  AS_LOCK_EXIT(as, &as->a_lock);
 724  716                          } else if (!AS_ISNOUNMAPWAIT(as)) {
 725  717                                  /*
 726  718                                   * Memory is currently locked. Wait for a
 727  719                                   * cv_signal that it has been unlocked, then
 728  720                                   * try the operation again.
 729  721                                   */

 730  722                                  if (AS_ISUNMAPWAIT(as) == 0)
 731  723                                          cv_broadcast(&as->a_cv);
 732  724                                  AS_SETUNMAPWAIT(as);
 733  725                                  AS_LOCK_EXIT(as, &as->a_lock);
 734  726                                  while (AS_ISUNMAPWAIT(as))
 735  727                                          cv_wait(&as->a_cv, &as->a_contents);
 736  728                          } else {
 737  729                                  /*
 738  730                                   * We may have raced with
 739  731                                   * segvn_reclaim()/segspt_reclaim(). In this
 740  732                                   * case clean nounmapwait flag and retry since
 741  733                                   * softlockcnt in this segment may be already
 742  734                                   * 0.  We don't drop as writer lock so our
 743  735                                   * number of retries without sleeping should
 744  736                                   * be very small. See segvn_reclaim() for
 745  737                                   * more comments.
 746  738                                   */
 747  739                                  AS_CLRNOUNMAPWAIT(as);
 748  740                                  mutex_exit(&as->a_contents);
 749  741                                  goto retry;
 750  742                          }
 751  743                          mutex_exit(&as->a_contents);

↓ open down ↓

22 lines elided

↑ open up ↑

 752  744                          goto top;
 753  745                  } else {
 754  746                          /*
 755  747                           * We do not expect any other error return at this
 756  748                           * time. This is similar to an ASSERT in seg_unmap()
 757  749                           */
 758  750                          ASSERT(err == 0);
 759  751                  }
 760  752          }
 761  753          hat_free_end(hat);
 762      -        if (as->a_xhat != NULL)
 763      -                xhat_free_end_all(as);
 764  754          AS_LOCK_EXIT(as, &as->a_lock);
 765  755  
 766  756          /* /proc stuff */
 767  757          ASSERT(avl_numnodes(&as->a_wpage) == 0);
 768  758          if (as->a_objectdir) {
 769  759                  kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *));
 770  760                  as->a_objectdir = NULL;
 771  761                  as->a_sizedir = 0;
 772  762          }
 773  763

 774  764          /*
 775  765           * Free the struct as back to kmem.  Assert it has no segments.
 776  766           */
 777  767          ASSERT(avl_numnodes(&as->a_segtree) == 0);
 778  768          kmem_cache_free(as_cache, as);
 779  769  }
 780  770  
 781  771  int
 782  772  as_dup(struct as *as, struct proc *forkedproc)
 783  773  {
 784  774          struct as *newas;
 785  775          struct seg *seg, *newseg;
 786  776          size_t  purgesize = 0;

↓ open down ↓

13 lines elided

↑ open up ↑

 787  777          int error;
 788  778  
 789  779          AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 790  780          as_clearwatch(as);
 791  781          newas = as_alloc();
 792  782          newas->a_userlimit = as->a_userlimit;
 793  783          newas->a_proc = forkedproc;
 794  784  
 795  785          AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER);
 796  786  
 797      -        /* This will prevent new XHATs from attaching */
 798      -        mutex_enter(&as->a_contents);
 799      -        AS_SETBUSY(as);
 800      -        mutex_exit(&as->a_contents);
 801      -        mutex_enter(&newas->a_contents);
 802      -        AS_SETBUSY(newas);
 803      -        mutex_exit(&newas->a_contents);
 804      -
 805  787          (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
 806  788  
 807  789          for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
 808  790  
 809  791                  if (seg->s_flags & S_PURGE) {
 810  792                          purgesize += seg->s_size;
 811  793                          continue;
 812  794                  }
 813  795  
 814  796                  newseg = seg_alloc(newas, seg->s_base, seg->s_size);
 815  797                  if (newseg == NULL) {
 816  798                          AS_LOCK_EXIT(newas, &newas->a_lock);
 817  799                          as_setwatch(as);
 818      -                        mutex_enter(&as->a_contents);
 819      -                        AS_CLRBUSY(as);
 820      -                        mutex_exit(&as->a_contents);
 821  800                          AS_LOCK_EXIT(as, &as->a_lock);
 822  801                          as_free(newas);
 823  802                          return (-1);
 824  803                  }
 825      -                if ((error = SEGOP_DUP(seg, newseg)) != 0) {
      804 +                if ((error = segop_dup(seg, newseg)) != 0) {
 826  805                          /*
 827  806                           * We call seg_free() on the new seg
 828  807                           * because the segment is not set up
 829  808                           * completely; i.e. it has no ops.
 830  809                           */
 831  810                          as_setwatch(as);
 832      -                        mutex_enter(&as->a_contents);
 833      -                        AS_CLRBUSY(as);
 834      -                        mutex_exit(&as->a_contents);
 835  811                          AS_LOCK_EXIT(as, &as->a_lock);
 836  812                          seg_free(newseg);
 837  813                          AS_LOCK_EXIT(newas, &newas->a_lock);
 838  814                          as_free(newas);
 839  815                          return (error);
 840  816                  }
 841  817                  newas->a_size += seg->s_size;
 842  818          }
 843  819          newas->a_resvsize = as->a_resvsize - purgesize;
 844  820  
 845  821          error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL);
 846      -        if (as->a_xhat != NULL)
 847      -                error |= xhat_dup_all(as, newas, NULL, 0, HAT_DUP_ALL);
 848  822  
 849      -        mutex_enter(&newas->a_contents);
 850      -        AS_CLRBUSY(newas);
 851      -        mutex_exit(&newas->a_contents);
 852  823          AS_LOCK_EXIT(newas, &newas->a_lock);
 853  824  
 854  825          as_setwatch(as);
 855      -        mutex_enter(&as->a_contents);
 856      -        AS_CLRBUSY(as);
 857      -        mutex_exit(&as->a_contents);
 858  826          AS_LOCK_EXIT(as, &as->a_lock);
 859  827          if (error != 0) {
 860  828                  as_free(newas);
 861  829                  return (error);
 862  830          }
 863  831          forkedproc->p_as = newas;
 864  832          return (0);
 865  833  }
 866  834  
 867  835  /*

 868  836   * Handle a ``fault'' at addr for size bytes.
 869  837   */
 870  838  faultcode_t
 871  839  as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size,
 872  840          enum fault_type type, enum seg_rw rw)

↓ open down ↓

5 lines elided

↑ open up ↑

 873  841  {
 874  842          struct seg *seg;
 875  843          caddr_t raddr;                  /* rounded down addr */
 876  844          size_t rsize;                   /* rounded up size */
 877  845          size_t ssize;
 878  846          faultcode_t res = 0;
 879  847          caddr_t addrsav;
 880  848          struct seg *segsav;
 881  849          int as_lock_held;
 882  850          klwp_t *lwp = ttolwp(curthread);
 883      -        int is_xhat = 0;
 884  851          int holding_wpage = 0;
 885      -        extern struct seg_ops   segdev_ops;
 886      -
 887  852  
 888  853  
 889      -        if (as->a_hat != hat) {
 890      -                /* This must be an XHAT then */
 891      -                is_xhat = 1;
 892      -
 893      -                if ((type != F_INVAL) || (as == &kas))
 894      -                        return (FC_NOSUPPORT);
 895      -        }
 896  854  
 897  855  retry:
 898      -        if (!is_xhat) {
 899      -                /*
 900      -                 * Indicate that the lwp is not to be stopped while waiting
 901      -                 * for a pagefault.  This is to avoid deadlock while debugging
 902      -                 * a process via /proc over NFS (in particular).
 903      -                 */
 904      -                if (lwp != NULL)
 905      -                        lwp->lwp_nostop++;
      856 +        /*
      857 +         * Indicate that the lwp is not to be stopped while waiting for a
      858 +         * pagefault.  This is to avoid deadlock while debugging a process
      859 +         * via /proc over NFS (in particular).
      860 +         */
      861 +        if (lwp != NULL)
      862 +                lwp->lwp_nostop++;
 906  863  
 907      -                /*
 908      -                 * same length must be used when we softlock and softunlock.
 909      -                 * We don't support softunlocking lengths less than
 910      -                 * the original length when there is largepage support.
 911      -                 * See seg_dev.c for more comments.
 912      -                 */
 913      -                switch (type) {
      864 +        /*
      865 +         * same length must be used when we softlock and softunlock.  We
      866 +         * don't support softunlocking lengths less than the original length
      867 +         * when there is largepage support.  See seg_dev.c for more
      868 +         * comments.
      869 +         */
      870 +        switch (type) {
 914  871  
 915      -                case F_SOFTLOCK:
 916      -                        CPU_STATS_ADD_K(vm, softlock, 1);
 917      -                        break;
      872 +        case F_SOFTLOCK:
      873 +                CPU_STATS_ADD_K(vm, softlock, 1);
      874 +                break;
 918  875  
 919      -                case F_SOFTUNLOCK:
 920      -                        break;
      876 +        case F_SOFTUNLOCK:
      877 +                break;
 921  878  
 922      -                case F_PROT:
 923      -                        CPU_STATS_ADD_K(vm, prot_fault, 1);
 924      -                        break;
      879 +        case F_PROT:
      880 +                CPU_STATS_ADD_K(vm, prot_fault, 1);
      881 +                break;
 925  882  
 926      -                case F_INVAL:
 927      -                        CPU_STATS_ENTER_K();
 928      -                        CPU_STATS_ADDQ(CPU, vm, as_fault, 1);
 929      -                        if (as == &kas)
 930      -                                CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1);
 931      -                        CPU_STATS_EXIT_K();
 932      -                        break;
 933      -                }
      883 +        case F_INVAL:
      884 +                CPU_STATS_ENTER_K();
      885 +                CPU_STATS_ADDQ(CPU, vm, as_fault, 1);
      886 +                if (as == &kas)
      887 +                        CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1);
      888 +                CPU_STATS_EXIT_K();
      889 +                break;
 934  890          }
 935  891  
 936  892          /* Kernel probe */
 937  893          TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */,
 938  894              tnf_opaque, address,        addr,
 939  895              tnf_fault_type,     fault_type,     type,
 940  896              tnf_seg_access,     access,         rw);
 941  897  
 942  898          raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
 943  899          rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -

 944  900              (size_t)raddr;

↓ open down ↓

1 lines elided

↑ open up ↑

 945  901  
 946  902          /*
 947  903           * XXX -- Don't grab the as lock for segkmap. We should grab it for
 948  904           * correctness, but then we could be stuck holding this lock for
 949  905           * a LONG time if the fault needs to be resolved on a slow
 950  906           * filesystem, and then no-one will be able to exec new commands,
 951  907           * as exec'ing requires the write lock on the as.
 952  908           */
 953  909          if (as == &kas && segkmap && segkmap->s_base <= raddr &&
 954  910              raddr + size < segkmap->s_base + segkmap->s_size) {
 955      -                /*
 956      -                 * if (as==&kas), this can't be XHAT: we've already returned
 957      -                 * FC_NOSUPPORT.
 958      -                 */
 959  911                  seg = segkmap;
 960  912                  as_lock_held = 0;
 961  913          } else {
 962  914                  AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
 963      -                if (is_xhat && avl_numnodes(&as->a_wpage) != 0) {
 964      -                        /*
 965      -                         * Grab and hold the writers' lock on the as
 966      -                         * if the fault is to a watched page.
 967      -                         * This will keep CPUs from "peeking" at the
 968      -                         * address range while we're temporarily boosting
 969      -                         * the permissions for the XHAT device to
 970      -                         * resolve the fault in the segment layer.
 971      -                         *
 972      -                         * We could check whether faulted address
 973      -                         * is within a watched page and only then grab
 974      -                         * the writer lock, but this is simpler.
 975      -                         */
 976      -                        AS_LOCK_EXIT(as, &as->a_lock);
 977      -                        AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 978      -                }
 979  915  
 980  916                  seg = as_segat(as, raddr);
 981  917                  if (seg == NULL) {
 982  918                          AS_LOCK_EXIT(as, &as->a_lock);
 983      -                        if ((lwp != NULL) && (!is_xhat))
      919 +                        if (lwp != NULL)
 984  920                                  lwp->lwp_nostop--;
 985  921                          return (FC_NOMAP);
 986  922                  }
 987  923  
 988  924                  as_lock_held = 1;
 989  925          }
 990  926  
 991  927          addrsav = raddr;
 992  928          segsav = seg;
 993  929

 994  930          for (; rsize != 0; rsize -= ssize, raddr += ssize) {
 995  931                  if (raddr >= seg->s_base + seg->s_size) {
 996  932                          seg = AS_SEGNEXT(as, seg);

↓ open down ↓

3 lines elided

↑ open up ↑

 997  933                          if (seg == NULL || raddr != seg->s_base) {
 998  934                                  res = FC_NOMAP;
 999  935                                  break;
1000  936                          }
1001  937                  }
1002  938                  if (raddr + rsize > seg->s_base + seg->s_size)
1003  939                          ssize = seg->s_base + seg->s_size - raddr;
1004  940                  else
1005  941                          ssize = rsize;
1006  942  
1007      -                if (!is_xhat || (seg->s_ops != &segdev_ops)) {
1008      -
1009      -                        if (is_xhat && avl_numnodes(&as->a_wpage) != 0 &&
1010      -                            pr_is_watchpage_as(raddr, rw, as)) {
1011      -                                /*
1012      -                                 * Handle watch pages.  If we're faulting on a
1013      -                                 * watched page from an X-hat, we have to
1014      -                                 * restore the original permissions while we
1015      -                                 * handle the fault.
1016      -                                 */
1017      -                                as_clearwatch(as);
1018      -                                holding_wpage = 1;
1019      -                        }
1020      -
1021      -                        res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw);
      943 +                res = segop_fault(hat, seg, raddr, ssize, type, rw);
1022  944  
1023      -                        /* Restore watchpoints */
1024      -                        if (holding_wpage) {
1025      -                                as_setwatch(as);
1026      -                                holding_wpage = 0;
1027      -                        }
      945 +                /* Restore watchpoints */
      946 +                if (holding_wpage) {
      947 +                        as_setwatch(as);
      948 +                        holding_wpage = 0;
      949 +                }
1028  950  
1029      -                        if (res != 0)
1030      -                                break;
1031      -                } else {
1032      -                        /* XHAT does not support seg_dev */
1033      -                        res = FC_NOSUPPORT;
      951 +                if (res != 0)
1034  952                          break;
1035      -                }
1036  953          }
1037  954  
1038  955          /*
1039  956           * If we were SOFTLOCKing and encountered a failure,
1040  957           * we must SOFTUNLOCK the range we already did. (Maybe we
1041  958           * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing
1042  959           * right here...)
1043  960           */
1044  961          if (res != 0 && type == F_SOFTLOCK) {
1045  962                  for (seg = segsav; addrsav < raddr; addrsav += ssize) {

1046  963                          if (addrsav >= seg->s_base + seg->s_size)
1047  964                                  seg = AS_SEGNEXT(as, seg);

↓ open down ↓

2 lines elided

↑ open up ↑

1048  965                          ASSERT(seg != NULL);
1049  966                          /*
1050  967                           * Now call the fault routine again to perform the
1051  968                           * unlock using S_OTHER instead of the rw variable
1052  969                           * since we never got a chance to touch the pages.
1053  970                           */
1054  971                          if (raddr > seg->s_base + seg->s_size)
1055  972                                  ssize = seg->s_base + seg->s_size - addrsav;
1056  973                          else
1057  974                                  ssize = raddr - addrsav;
1058      -                        (void) SEGOP_FAULT(hat, seg, addrsav, ssize,
      975 +                        (void) segop_fault(hat, seg, addrsav, ssize,
1059  976                              F_SOFTUNLOCK, S_OTHER);
1060  977                  }
1061  978          }
1062  979          if (as_lock_held)
1063  980                  AS_LOCK_EXIT(as, &as->a_lock);
1064      -        if ((lwp != NULL) && (!is_xhat))
      981 +        if (lwp != NULL)
1065  982                  lwp->lwp_nostop--;
1066  983  
1067  984          /*
1068  985           * If the lower levels returned EDEADLK for a fault,
1069  986           * It means that we should retry the fault.  Let's wait
1070  987           * a bit also to let the deadlock causing condition clear.
1071  988           * This is part of a gross hack to work around a design flaw
1072  989           * in the ufs/sds logging code and should go away when the
1073  990           * logging code is re-designed to fix the problem. See bug
1074  991           * 4125102 for details of the problem.

1075  992           */
1076  993          if (FC_ERRNO(res) == EDEADLK) {
1077  994                  delay(deadlk_wait);
1078  995                  res = 0;
1079  996                  goto retry;
1080  997          }
1081  998          return (res);
1082  999  }
1083 1000  
1084 1001  
1085 1002  
1086 1003  /*
1087 1004   * Asynchronous ``fault'' at addr for size bytes.
1088 1005   */
1089 1006  faultcode_t
1090 1007  as_faulta(struct as *as, caddr_t addr, size_t size)
1091 1008  {
1092 1009          struct seg *seg;
1093 1010          caddr_t raddr;                  /* rounded down addr */
1094 1011          size_t rsize;                   /* rounded up size */
1095 1012          faultcode_t res = 0;
1096 1013          klwp_t *lwp = ttolwp(curthread);
1097 1014  
1098 1015  retry:
1099 1016          /*
1100 1017           * Indicate that the lwp is not to be stopped while waiting
1101 1018           * for a pagefault.  This is to avoid deadlock while debugging
1102 1019           * a process via /proc over NFS (in particular).
1103 1020           */
1104 1021          if (lwp != NULL)
1105 1022                  lwp->lwp_nostop++;
1106 1023  
1107 1024          raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1108 1025          rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1109 1026              (size_t)raddr;
1110 1027  
1111 1028          AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1112 1029          seg = as_segat(as, raddr);
1113 1030          if (seg == NULL) {
1114 1031                  AS_LOCK_EXIT(as, &as->a_lock);
1115 1032                  if (lwp != NULL)
1116 1033                          lwp->lwp_nostop--;
1117 1034                  return (FC_NOMAP);

↓ open down ↓

43 lines elided

↑ open up ↑

1118 1035          }
1119 1036  
1120 1037          for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) {
1121 1038                  if (raddr >= seg->s_base + seg->s_size) {
1122 1039                          seg = AS_SEGNEXT(as, seg);
1123 1040                          if (seg == NULL || raddr != seg->s_base) {
1124 1041                                  res = FC_NOMAP;
1125 1042                                  break;
1126 1043                          }
1127 1044                  }
1128      -                res = SEGOP_FAULTA(seg, raddr);
     1045 +                res = segop_faulta(seg, raddr);
1129 1046                  if (res != 0)
1130 1047                          break;
1131 1048          }
1132 1049          AS_LOCK_EXIT(as, &as->a_lock);
1133 1050          if (lwp != NULL)
1134 1051                  lwp->lwp_nostop--;
1135 1052          /*
1136 1053           * If the lower levels returned EDEADLK for a fault,
1137 1054           * It means that we should retry the fault.  Let's wait
1138 1055           * a bit also to let the deadlock causing condition clear.

1139 1056           * This is part of a gross hack to work around a design flaw
1140 1057           * in the ufs/sds logging code and should go away when the
1141 1058           * logging code is re-designed to fix the problem. See bug
1142 1059           * 4125102 for details of the problem.
1143 1060           */
1144 1061          if (FC_ERRNO(res) == EDEADLK) {
1145 1062                  delay(deadlk_wait);
1146 1063                  res = 0;
1147 1064                  goto retry;
1148 1065          }
1149 1066          return (res);
1150 1067  }
1151 1068  
1152 1069  /*
1153 1070   * Set the virtual mapping for the interval from [addr : addr + size)
1154 1071   * in address space `as' to have the specified protection.
1155 1072   * It is ok for the range to cross over several segments,
1156 1073   * as long as they are contiguous.
1157 1074   */
1158 1075  int
1159 1076  as_setprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
1160 1077  {
1161 1078          struct seg *seg;
1162 1079          struct as_callback *cb;
1163 1080          size_t ssize;
1164 1081          caddr_t raddr;                  /* rounded down addr */
1165 1082          size_t rsize;                   /* rounded up size */
1166 1083          int error = 0, writer = 0;
1167 1084          caddr_t saveraddr;
1168 1085          size_t saversize;
1169 1086  
1170 1087  setprot_top:
1171 1088          raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1172 1089          rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1173 1090              (size_t)raddr;
1174 1091  
1175 1092          if (raddr + rsize < raddr)              /* check for wraparound */
1176 1093                  return (ENOMEM);
1177 1094  
1178 1095          saveraddr = raddr;
1179 1096          saversize = rsize;
1180 1097  
1181 1098          /*
1182 1099           * Normally we only lock the as as a reader. But
1183 1100           * if due to setprot the segment driver needs to split
1184 1101           * a segment it will return IE_RETRY. Therefore we re-acquire
1185 1102           * the as lock as a writer so the segment driver can change
1186 1103           * the seg list. Also the segment driver will return IE_RETRY
1187 1104           * after it has changed the segment list so we therefore keep
1188 1105           * locking as a writer. Since these opeartions should be rare
1189 1106           * want to only lock as a writer when necessary.
1190 1107           */
1191 1108          if (writer || avl_numnodes(&as->a_wpage) != 0) {
1192 1109                  AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1193 1110          } else {
1194 1111                  AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1195 1112          }
1196 1113  
1197 1114          as_clearwatchprot(as, raddr, rsize);
1198 1115          seg = as_segat(as, raddr);
1199 1116          if (seg == NULL) {
1200 1117                  as_setwatch(as);
1201 1118                  AS_LOCK_EXIT(as, &as->a_lock);
1202 1119                  return (ENOMEM);
1203 1120          }
1204 1121  
1205 1122          for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1206 1123                  if (raddr >= seg->s_base + seg->s_size) {
1207 1124                          seg = AS_SEGNEXT(as, seg);

↓ open down ↓

69 lines elided

↑ open up ↑

1208 1125                          if (seg == NULL || raddr != seg->s_base) {
1209 1126                                  error = ENOMEM;
1210 1127                                  break;
1211 1128                          }
1212 1129                  }
1213 1130                  if ((raddr + rsize) > (seg->s_base + seg->s_size))
1214 1131                          ssize = seg->s_base + seg->s_size - raddr;
1215 1132                  else
1216 1133                          ssize = rsize;
1217 1134  retry:
1218      -                error = SEGOP_SETPROT(seg, raddr, ssize, prot);
     1135 +                error = segop_setprot(seg, raddr, ssize, prot);
1219 1136  
1220 1137                  if (error == IE_NOMEM) {
1221 1138                          error = EAGAIN;
1222 1139                          break;
1223 1140                  }
1224 1141  
1225 1142                  if (error == IE_RETRY) {
1226 1143                          AS_LOCK_EXIT(as, &as->a_lock);
1227 1144                          writer = 1;
1228 1145                          goto setprot_top;

1229 1146                  }
1230 1147  
1231 1148                  if (error == EAGAIN) {
1232 1149                          /*
1233 1150                           * Make sure we have a_lock as writer.
1234 1151                           */
1235 1152                          if (writer == 0) {
1236 1153                                  AS_LOCK_EXIT(as, &as->a_lock);
1237 1154                                  writer = 1;
1238 1155                                  goto setprot_top;
1239 1156                          }
1240 1157  
1241 1158                          /*
1242 1159                           * Memory is currently locked.  It must be unlocked
1243 1160                           * before this operation can succeed through a retry.
1244 1161                           * The possible reasons for locked memory and
1245 1162                           * corresponding strategies for unlocking are:
1246 1163                           * (1) Normal I/O
1247 1164                           *      wait for a signal that the I/O operation
1248 1165                           *      has completed and the memory is unlocked.
1249 1166                           * (2) Asynchronous I/O
1250 1167                           *      The aio subsystem does not unlock pages when
1251 1168                           *      the I/O is completed. Those pages are unlocked
1252 1169                           *      when the application calls aiowait/aioerror.
1253 1170                           *      So, to prevent blocking forever, cv_broadcast()
1254 1171                           *      is done to wake up aio_cleanup_thread.
1255 1172                           *      Subsequently, segvn_reclaim will be called, and
1256 1173                           *      that will do AS_CLRUNMAPWAIT() and wake us up.
1257 1174                           * (3) Long term page locking:
1258 1175                           *      Drivers intending to have pages locked for a
1259 1176                           *      period considerably longer than for normal I/O
1260 1177                           *      (essentially forever) may have registered for a
1261 1178                           *      callback so they may unlock these pages on
1262 1179                           *      request. This is needed to allow this operation
1263 1180                           *      to succeed. Each entry on the callback list is
1264 1181                           *      examined. If the event or address range pertains
1265 1182                           *      the callback is invoked (unless it already is in
1266 1183                           *      progress). The a_contents lock must be dropped
1267 1184                           *      before the callback, so only one callback can
1268 1185                           *      be done at a time. Go to the top and do more
1269 1186                           *      until zero is returned. If zero is returned,
1270 1187                           *      either there were no callbacks for this event
1271 1188                           *      or they were already in progress.
1272 1189                           */
1273 1190                          mutex_enter(&as->a_contents);
1274 1191                          if (as->a_callbacks &&
1275 1192                              (cb = as_find_callback(as, AS_SETPROT_EVENT,
1276 1193                              seg->s_base, seg->s_size))) {
1277 1194                                  AS_LOCK_EXIT(as, &as->a_lock);
1278 1195                                  as_execute_callback(as, cb, AS_SETPROT_EVENT);
1279 1196                          } else if (!AS_ISNOUNMAPWAIT(as)) {
1280 1197                                  if (AS_ISUNMAPWAIT(as) == 0)
1281 1198                                          cv_broadcast(&as->a_cv);
1282 1199                                  AS_SETUNMAPWAIT(as);
1283 1200                                  AS_LOCK_EXIT(as, &as->a_lock);
1284 1201                                  while (AS_ISUNMAPWAIT(as))
1285 1202                                          cv_wait(&as->a_cv, &as->a_contents);
1286 1203                          } else {
1287 1204                                  /*
1288 1205                                   * We may have raced with
1289 1206                                   * segvn_reclaim()/segspt_reclaim(). In this
1290 1207                                   * case clean nounmapwait flag and retry since
1291 1208                                   * softlockcnt in this segment may be already
1292 1209                                   * 0.  We don't drop as writer lock so our
1293 1210                                   * number of retries without sleeping should
1294 1211                                   * be very small. See segvn_reclaim() for
1295 1212                                   * more comments.
1296 1213                                   */
1297 1214                                  AS_CLRNOUNMAPWAIT(as);
1298 1215                                  mutex_exit(&as->a_contents);
1299 1216                                  goto retry;
1300 1217                          }
1301 1218                          mutex_exit(&as->a_contents);
1302 1219                          goto setprot_top;
1303 1220                  } else if (error != 0)
1304 1221                          break;
1305 1222          }
1306 1223          if (error != 0) {
1307 1224                  as_setwatch(as);
1308 1225          } else {
1309 1226                  as_setwatchprot(as, saveraddr, saversize, prot);
1310 1227          }
1311 1228          AS_LOCK_EXIT(as, &as->a_lock);
1312 1229          return (error);
1313 1230  }
1314 1231  
1315 1232  /*
1316 1233   * Check to make sure that the interval [addr, addr + size)
1317 1234   * in address space `as' has at least the specified protection.
1318 1235   * It is ok for the range to cross over several segments, as long
1319 1236   * as they are contiguous.
1320 1237   */
1321 1238  int
1322 1239  as_checkprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
1323 1240  {
1324 1241          struct seg *seg;
1325 1242          size_t ssize;
1326 1243          caddr_t raddr;                  /* rounded down addr */
1327 1244          size_t rsize;                   /* rounded up size */
1328 1245          int error = 0;
1329 1246  
1330 1247          raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1331 1248          rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1332 1249              (size_t)raddr;
1333 1250  
1334 1251          if (raddr + rsize < raddr)              /* check for wraparound */
1335 1252                  return (ENOMEM);
1336 1253  
1337 1254          /*
1338 1255           * This is ugly as sin...
1339 1256           * Normally, we only acquire the address space readers lock.
1340 1257           * However, if the address space has watchpoints present,
1341 1258           * we must acquire the writer lock on the address space for
1342 1259           * the benefit of as_clearwatchprot() and as_setwatchprot().
1343 1260           */
1344 1261          if (avl_numnodes(&as->a_wpage) != 0)
1345 1262                  AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1346 1263          else
1347 1264                  AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1348 1265          as_clearwatchprot(as, raddr, rsize);
1349 1266          seg = as_segat(as, raddr);
1350 1267          if (seg == NULL) {
1351 1268                  as_setwatch(as);
1352 1269                  AS_LOCK_EXIT(as, &as->a_lock);
1353 1270                  return (ENOMEM);
1354 1271          }
1355 1272  
1356 1273          for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1357 1274                  if (raddr >= seg->s_base + seg->s_size) {
1358 1275                          seg = AS_SEGNEXT(as, seg);

↓ open down ↓

130 lines elided

↑ open up ↑

1359 1276                          if (seg == NULL || raddr != seg->s_base) {
1360 1277                                  error = ENOMEM;
1361 1278                                  break;
1362 1279                          }
1363 1280                  }
1364 1281                  if ((raddr + rsize) > (seg->s_base + seg->s_size))
1365 1282                          ssize = seg->s_base + seg->s_size - raddr;
1366 1283                  else
1367 1284                          ssize = rsize;
1368 1285  
1369      -                error = SEGOP_CHECKPROT(seg, raddr, ssize, prot);
     1286 +                error = segop_checkprot(seg, raddr, ssize, prot);
1370 1287                  if (error != 0)
1371 1288                          break;
1372 1289          }
1373 1290          as_setwatch(as);
1374 1291          AS_LOCK_EXIT(as, &as->a_lock);
1375 1292          return (error);
1376 1293  }
1377 1294  
1378 1295  int
1379 1296  as_unmap(struct as *as, caddr_t addr, size_t size)

1380 1297  {
1381 1298          struct seg *seg, *seg_next;
1382 1299          struct as_callback *cb;
1383 1300          caddr_t raddr, eaddr;
1384 1301          size_t ssize, rsize = 0;
1385 1302          int err;
1386 1303  
1387 1304  top:
1388 1305          raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1389 1306          eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) &
1390 1307              (uintptr_t)PAGEMASK);
1391 1308  
1392 1309          AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1393 1310  
1394 1311          as->a_updatedir = 1;    /* inform /proc */
1395 1312          gethrestime(&as->a_updatetime);
1396 1313  
1397 1314          /*
1398 1315           * Use as_findseg to find the first segment in the range, then
1399 1316           * step through the segments in order, following s_next.
1400 1317           */
1401 1318          as_clearwatchprot(as, raddr, eaddr - raddr);
1402 1319  
1403 1320          for (seg = as_findseg(as, raddr, 0); seg != NULL; seg = seg_next) {
1404 1321                  if (eaddr <= seg->s_base)
1405 1322                          break;          /* eaddr was in a gap; all done */
1406 1323  
1407 1324                  /* this is implied by the test above */
1408 1325                  ASSERT(raddr < eaddr);
1409 1326  
1410 1327                  if (raddr < seg->s_base)
1411 1328                          raddr = seg->s_base;    /* raddr was in a gap */
1412 1329  
1413 1330                  if (eaddr > (seg->s_base + seg->s_size))
1414 1331                          ssize = seg->s_base + seg->s_size - raddr;
1415 1332                  else
1416 1333                          ssize = eaddr - raddr;
1417 1334  
1418 1335                  /*
1419 1336                   * Save next segment pointer since seg can be
1420 1337                   * destroyed during the segment unmap operation.
1421 1338                   */
1422 1339                  seg_next = AS_SEGNEXT(as, seg);
1423 1340  
1424 1341                  /*

↓ open down ↓

45 lines elided

↑ open up ↑

1425 1342                   * We didn't count /dev/null mappings, so ignore them here.
1426 1343                   * We'll handle MAP_NORESERVE cases in segvn_unmap(). (Again,
1427 1344                   * we have to do this check here while we have seg.)
1428 1345                   */
1429 1346                  rsize = 0;
1430 1347                  if (!SEG_IS_DEVNULL_MAPPING(seg) &&
1431 1348                      !SEG_IS_PARTIAL_RESV(seg))
1432 1349                          rsize = ssize;
1433 1350  
1434 1351  retry:
1435      -                err = SEGOP_UNMAP(seg, raddr, ssize);
     1352 +                err = segop_unmap(seg, raddr, ssize);
1436 1353                  if (err == EAGAIN) {
1437 1354                          /*
1438 1355                           * Memory is currently locked.  It must be unlocked
1439 1356                           * before this operation can succeed through a retry.
1440 1357                           * The possible reasons for locked memory and
1441 1358                           * corresponding strategies for unlocking are:
1442 1359                           * (1) Normal I/O
1443 1360                           *      wait for a signal that the I/O operation
1444 1361                           *      has completed and the memory is unlocked.
1445 1362                           * (2) Asynchronous I/O

1446 1363                           *      The aio subsystem does not unlock pages when
1447 1364                           *      the I/O is completed. Those pages are unlocked
1448 1365                           *      when the application calls aiowait/aioerror.
1449 1366                           *      So, to prevent blocking forever, cv_broadcast()
1450 1367                           *      is done to wake up aio_cleanup_thread.
1451 1368                           *      Subsequently, segvn_reclaim will be called, and
1452 1369                           *      that will do AS_CLRUNMAPWAIT() and wake us up.
1453 1370                           * (3) Long term page locking:
1454 1371                           *      Drivers intending to have pages locked for a
1455 1372                           *      period considerably longer than for normal I/O
1456 1373                           *      (essentially forever) may have registered for a
1457 1374                           *      callback so they may unlock these pages on
1458 1375                           *      request. This is needed to allow this operation
1459 1376                           *      to succeed. Each entry on the callback list is
1460 1377                           *      examined. If the event or address range pertains
1461 1378                           *      the callback is invoked (unless it already is in
1462 1379                           *      progress). The a_contents lock must be dropped
1463 1380                           *      before the callback, so only one callback can
1464 1381                           *      be done at a time. Go to the top and do more
1465 1382                           *      until zero is returned. If zero is returned,
1466 1383                           *      either there were no callbacks for this event
1467 1384                           *      or they were already in progress.
1468 1385                           */
1469 1386                          mutex_enter(&as->a_contents);
1470 1387                          if (as->a_callbacks &&
1471 1388                              (cb = as_find_callback(as, AS_UNMAP_EVENT,
1472 1389                              seg->s_base, seg->s_size))) {
1473 1390                                  AS_LOCK_EXIT(as, &as->a_lock);
1474 1391                                  as_execute_callback(as, cb, AS_UNMAP_EVENT);
1475 1392                          } else if (!AS_ISNOUNMAPWAIT(as)) {
1476 1393                                  if (AS_ISUNMAPWAIT(as) == 0)
1477 1394                                          cv_broadcast(&as->a_cv);
1478 1395                                  AS_SETUNMAPWAIT(as);
1479 1396                                  AS_LOCK_EXIT(as, &as->a_lock);
1480 1397                                  while (AS_ISUNMAPWAIT(as))
1481 1398                                          cv_wait(&as->a_cv, &as->a_contents);
1482 1399                          } else {
1483 1400                                  /*
1484 1401                                   * We may have raced with
1485 1402                                   * segvn_reclaim()/segspt_reclaim(). In this
1486 1403                                   * case clean nounmapwait flag and retry since
1487 1404                                   * softlockcnt in this segment may be already
1488 1405                                   * 0.  We don't drop as writer lock so our
1489 1406                                   * number of retries without sleeping should
1490 1407                                   * be very small. See segvn_reclaim() for
1491 1408                                   * more comments.
1492 1409                                   */
1493 1410                                  AS_CLRNOUNMAPWAIT(as);
1494 1411                                  mutex_exit(&as->a_contents);
1495 1412                                  goto retry;
1496 1413                          }
1497 1414                          mutex_exit(&as->a_contents);
1498 1415                          goto top;
1499 1416                  } else if (err == IE_RETRY) {
1500 1417                          AS_LOCK_EXIT(as, &as->a_lock);
1501 1418                          goto top;
1502 1419                  } else if (err) {
1503 1420                          as_setwatch(as);
1504 1421                          AS_LOCK_EXIT(as, &as->a_lock);
1505 1422                          return (-1);
1506 1423                  }
1507 1424  
1508 1425                  as->a_size -= ssize;
1509 1426                  if (rsize)
1510 1427                          as->a_resvsize -= rsize;
1511 1428                  raddr += ssize;
1512 1429          }
1513 1430          AS_LOCK_EXIT(as, &as->a_lock);
1514 1431          return (0);
1515 1432  }
1516 1433  
1517 1434  static int
1518 1435  as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec,
1519 1436      int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1520 1437  {
1521 1438          uint_t szc;
1522 1439          uint_t nszc;
1523 1440          int error;
1524 1441          caddr_t a;
1525 1442          caddr_t eaddr;
1526 1443          size_t segsize;
1527 1444          struct seg *seg;
1528 1445          size_t pgsz;
1529 1446          int do_off = (vn_a->vp != NULL || vn_a->amp != NULL);
1530 1447          uint_t save_szcvec;
1531 1448  
1532 1449          ASSERT(AS_WRITE_HELD(as, &as->a_lock));
1533 1450          ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1534 1451          ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1535 1452          ASSERT(vn_a->vp == NULL || vn_a->amp == NULL);
1536 1453          if (!do_off) {
1537 1454                  vn_a->offset = 0;
1538 1455          }
1539 1456  
1540 1457          if (szcvec <= 1) {
1541 1458                  seg = seg_alloc(as, addr, size);
1542 1459                  if (seg == NULL) {
1543 1460                          return (ENOMEM);
1544 1461                  }
1545 1462                  vn_a->szc = 0;
1546 1463                  error = (*crfp)(seg, vn_a);
1547 1464                  if (error != 0) {
1548 1465                          seg_free(seg);
1549 1466                  } else {
1550 1467                          as->a_size += size;
1551 1468                          as->a_resvsize += size;
1552 1469                  }
1553 1470                  return (error);
1554 1471          }
1555 1472  
1556 1473          eaddr = addr + size;
1557 1474          save_szcvec = szcvec;
1558 1475          szcvec >>= 1;
1559 1476          szc = 0;
1560 1477          nszc = 0;
1561 1478          while (szcvec) {
1562 1479                  if ((szcvec & 0x1) == 0) {
1563 1480                          nszc++;
1564 1481                          szcvec >>= 1;
1565 1482                          continue;
1566 1483                  }
1567 1484                  nszc++;
1568 1485                  pgsz = page_get_pagesize(nszc);
1569 1486                  a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
1570 1487                  if (a != addr) {
1571 1488                          ASSERT(a < eaddr);
1572 1489                          segsize = a - addr;
1573 1490                          seg = seg_alloc(as, addr, segsize);
1574 1491                          if (seg == NULL) {
1575 1492                                  return (ENOMEM);
1576 1493                          }
1577 1494                          vn_a->szc = szc;
1578 1495                          error = (*crfp)(seg, vn_a);
1579 1496                          if (error != 0) {
1580 1497                                  seg_free(seg);
1581 1498                                  return (error);
1582 1499                          }
1583 1500                          as->a_size += segsize;
1584 1501                          as->a_resvsize += segsize;
1585 1502                          *segcreated = 1;
1586 1503                          if (do_off) {
1587 1504                                  vn_a->offset += segsize;
1588 1505                          }
1589 1506                          addr = a;
1590 1507                  }
1591 1508                  szc = nszc;
1592 1509                  szcvec >>= 1;
1593 1510          }
1594 1511  
1595 1512          ASSERT(addr < eaddr);
1596 1513          szcvec = save_szcvec | 1; /* add 8K pages */
1597 1514          while (szcvec) {
1598 1515                  a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
1599 1516                  ASSERT(a >= addr);
1600 1517                  if (a != addr) {
1601 1518                          segsize = a - addr;
1602 1519                          seg = seg_alloc(as, addr, segsize);
1603 1520                          if (seg == NULL) {
1604 1521                                  return (ENOMEM);
1605 1522                          }
1606 1523                          vn_a->szc = szc;
1607 1524                          error = (*crfp)(seg, vn_a);
1608 1525                          if (error != 0) {
1609 1526                                  seg_free(seg);
1610 1527                                  return (error);
1611 1528                          }
1612 1529                          as->a_size += segsize;
1613 1530                          as->a_resvsize += segsize;
1614 1531                          *segcreated = 1;
1615 1532                          if (do_off) {
1616 1533                                  vn_a->offset += segsize;
1617 1534                          }
1618 1535                          addr = a;
1619 1536                  }
1620 1537                  szcvec &= ~(1 << szc);
1621 1538                  if (szcvec) {
1622 1539                          szc = highbit(szcvec) - 1;
1623 1540                          pgsz = page_get_pagesize(szc);
1624 1541                  }
1625 1542          }
1626 1543          ASSERT(addr == eaddr);
1627 1544  
1628 1545          return (0);
1629 1546  }
1630 1547  
1631 1548  static int
1632 1549  as_map_vnsegs(struct as *as, caddr_t addr, size_t size,
1633 1550      int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1634 1551  {
1635 1552          uint_t mapflags = vn_a->flags & (MAP_TEXT | MAP_INITDATA);
1636 1553          int type = (vn_a->type == MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM;
1637 1554          uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags,
1638 1555              type, 0);
1639 1556          int error;
1640 1557          struct seg *seg;
1641 1558          struct vattr va;
1642 1559          u_offset_t eoff;
1643 1560          size_t save_size = 0;
1644 1561          extern size_t textrepl_size_thresh;
1645 1562  
1646 1563          ASSERT(AS_WRITE_HELD(as, &as->a_lock));
1647 1564          ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1648 1565          ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1649 1566          ASSERT(vn_a->vp != NULL);
1650 1567          ASSERT(vn_a->amp == NULL);
1651 1568  
1652 1569  again:
1653 1570          if (szcvec <= 1) {
1654 1571                  seg = seg_alloc(as, addr, size);
1655 1572                  if (seg == NULL) {
1656 1573                          return (ENOMEM);
1657 1574                  }
1658 1575                  vn_a->szc = 0;
1659 1576                  error = (*crfp)(seg, vn_a);
1660 1577                  if (error != 0) {
1661 1578                          seg_free(seg);
1662 1579                  } else {
1663 1580                          as->a_size += size;
1664 1581                          as->a_resvsize += size;
1665 1582                  }
1666 1583                  return (error);
1667 1584          }
1668 1585  
1669 1586          va.va_mask = AT_SIZE;
1670 1587          if (VOP_GETATTR(vn_a->vp, &va, ATTR_HINT, vn_a->cred, NULL) != 0) {
1671 1588                  szcvec = 0;
1672 1589                  goto again;
1673 1590          }
1674 1591          eoff = vn_a->offset & PAGEMASK;
1675 1592          if (eoff >= va.va_size) {
1676 1593                  szcvec = 0;
1677 1594                  goto again;
1678 1595          }
1679 1596          eoff += size;
1680 1597          if (btopr(va.va_size) < btopr(eoff)) {
1681 1598                  save_size = size;
1682 1599                  size = va.va_size - (vn_a->offset & PAGEMASK);
1683 1600                  size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t);
1684 1601                  szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags,
1685 1602                      type, 0);
1686 1603                  if (szcvec <= 1) {
1687 1604                          size = save_size;
1688 1605                          goto again;
1689 1606                  }
1690 1607          }
1691 1608  
1692 1609          if (size > textrepl_size_thresh) {
1693 1610                  vn_a->flags |= _MAP_TEXTREPL;
1694 1611          }
1695 1612          error = as_map_segvn_segs(as, addr, size, szcvec, crfp, vn_a,
1696 1613              segcreated);
1697 1614          if (error != 0) {
1698 1615                  return (error);
1699 1616          }
1700 1617          if (save_size) {
1701 1618                  addr += size;
1702 1619                  size = save_size - size;
1703 1620                  szcvec = 0;
1704 1621                  goto again;
1705 1622          }
1706 1623          return (0);
1707 1624  }
1708 1625  
1709 1626  /*
1710 1627   * as_map_ansegs: shared or private anonymous memory.  Note that the flags
1711 1628   * passed to map_pgszvec cannot be MAP_INITDATA, for anon.
1712 1629   */
1713 1630  static int
1714 1631  as_map_ansegs(struct as *as, caddr_t addr, size_t size,
1715 1632      int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1716 1633  {
1717 1634          uint_t szcvec;
1718 1635          uchar_t type;
1719 1636  
1720 1637          ASSERT(vn_a->type == MAP_SHARED || vn_a->type == MAP_PRIVATE);
1721 1638          if (vn_a->type == MAP_SHARED) {
1722 1639                  type = MAPPGSZC_SHM;
1723 1640          } else if (vn_a->type == MAP_PRIVATE) {
1724 1641                  if (vn_a->szc == AS_MAP_HEAP) {
1725 1642                          type = MAPPGSZC_HEAP;
1726 1643                  } else if (vn_a->szc == AS_MAP_STACK) {
1727 1644                          type = MAPPGSZC_STACK;
1728 1645                  } else {
1729 1646                          type = MAPPGSZC_PRIVM;
1730 1647                  }
1731 1648          }
1732 1649          szcvec = map_pgszcvec(addr, size, vn_a->amp == NULL ?
1733 1650              (uintptr_t)addr : (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE),
1734 1651              (vn_a->flags & MAP_TEXT), type, 0);
1735 1652          ASSERT(AS_WRITE_HELD(as, &as->a_lock));
1736 1653          ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1737 1654          ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1738 1655          ASSERT(vn_a->vp == NULL);
1739 1656  
1740 1657          return (as_map_segvn_segs(as, addr, size, szcvec,
1741 1658              crfp, vn_a, segcreated));
1742 1659  }
1743 1660  
1744 1661  int
1745 1662  as_map(struct as *as, caddr_t addr, size_t size, int (*crfp)(), void *argsp)
1746 1663  {
1747 1664          AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1748 1665          return (as_map_locked(as, addr, size, crfp, argsp));
1749 1666  }
1750 1667  
1751 1668  int
1752 1669  as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(),
1753 1670                  void *argsp)
1754 1671  {
1755 1672          struct seg *seg = NULL;
1756 1673          caddr_t raddr;                  /* rounded down addr */
1757 1674          size_t rsize;                   /* rounded up size */
1758 1675          int error;
1759 1676          int unmap = 0;
1760 1677          struct proc *p = curproc;
1761 1678          struct segvn_crargs crargs;
1762 1679  
1763 1680          raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1764 1681          rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1765 1682              (size_t)raddr;
1766 1683  
1767 1684          /*
1768 1685           * check for wrap around
1769 1686           */
1770 1687          if ((raddr + rsize < raddr) || (as->a_size > (ULONG_MAX - size))) {
1771 1688                  AS_LOCK_EXIT(as, &as->a_lock);
1772 1689                  return (ENOMEM);
1773 1690          }
1774 1691  
1775 1692          as->a_updatedir = 1;    /* inform /proc */
1776 1693          gethrestime(&as->a_updatetime);
1777 1694  
1778 1695          if (as != &kas && as->a_size + rsize > (size_t)p->p_vmem_ctl) {
1779 1696                  AS_LOCK_EXIT(as, &as->a_lock);
1780 1697  
1781 1698                  (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
1782 1699                      RCA_UNSAFE_ALL);
1783 1700  
1784 1701                  return (ENOMEM);
1785 1702          }
1786 1703  
1787 1704          if (AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp)) {
1788 1705                  crargs = *(struct segvn_crargs *)argsp;
1789 1706                  error = as_map_vnsegs(as, raddr, rsize, crfp, &crargs, &unmap);
1790 1707                  if (error != 0) {
1791 1708                          AS_LOCK_EXIT(as, &as->a_lock);
1792 1709                          if (unmap) {
1793 1710                                  (void) as_unmap(as, addr, size);
1794 1711                          }
1795 1712                          return (error);
1796 1713                  }
1797 1714          } else if (AS_MAP_CHECK_ANON_LPOOB(crfp, argsp)) {
1798 1715                  crargs = *(struct segvn_crargs *)argsp;
1799 1716                  error = as_map_ansegs(as, raddr, rsize, crfp, &crargs, &unmap);
1800 1717                  if (error != 0) {
1801 1718                          AS_LOCK_EXIT(as, &as->a_lock);
1802 1719                          if (unmap) {
1803 1720                                  (void) as_unmap(as, addr, size);
1804 1721                          }
1805 1722                          return (error);
1806 1723                  }
1807 1724          } else {
1808 1725                  seg = seg_alloc(as, addr, size);
1809 1726                  if (seg == NULL) {
1810 1727                          AS_LOCK_EXIT(as, &as->a_lock);
1811 1728                          return (ENOMEM);
1812 1729                  }
1813 1730  
1814 1731                  error = (*crfp)(seg, argsp);
1815 1732                  if (error != 0) {
1816 1733                          seg_free(seg);
1817 1734                          AS_LOCK_EXIT(as, &as->a_lock);
1818 1735                          return (error);
1819 1736                  }
1820 1737                  /*
1821 1738                   * Add size now so as_unmap will work if as_ctl fails.
1822 1739                   */
1823 1740                  as->a_size += rsize;
1824 1741                  as->a_resvsize += rsize;
1825 1742          }
1826 1743  
1827 1744          as_setwatch(as);
1828 1745  
1829 1746          /*
1830 1747           * If the address space is locked,
1831 1748           * establish memory locks for the new segment.
1832 1749           */
1833 1750          mutex_enter(&as->a_contents);
1834 1751          if (AS_ISPGLCK(as)) {
1835 1752                  mutex_exit(&as->a_contents);
1836 1753                  AS_LOCK_EXIT(as, &as->a_lock);
1837 1754                  error = as_ctl(as, addr, size, MC_LOCK, 0, 0, NULL, 0);
1838 1755                  if (error != 0)
1839 1756                          (void) as_unmap(as, addr, size);
1840 1757          } else {
1841 1758                  mutex_exit(&as->a_contents);
1842 1759                  AS_LOCK_EXIT(as, &as->a_lock);
1843 1760          }
1844 1761          return (error);
1845 1762  }
1846 1763  
1847 1764  
1848 1765  /*
1849 1766   * Delete all segments in the address space marked with S_PURGE.
1850 1767   * This is currently used for Sparc V9 nofault ASI segments (seg_nf.c).
1851 1768   * These segments are deleted as a first step before calls to as_gap(), so
1852 1769   * that they don't affect mmap() or shmat().
1853 1770   */
1854 1771  void
1855 1772  as_purge(struct as *as)
1856 1773  {
1857 1774          struct seg *seg;
1858 1775          struct seg *next_seg;
1859 1776  
1860 1777          /*
1861 1778           * the setting of NEEDSPURGE is protect by as_rangelock(), so
1862 1779           * no need to grab a_contents mutex for this check

↓ open down ↓

417 lines elided

↑ open up ↑

1863 1780           */
1864 1781          if ((as->a_flags & AS_NEEDSPURGE) == 0)
1865 1782                  return;
1866 1783  
1867 1784          AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1868 1785          next_seg = NULL;
1869 1786          seg = AS_SEGFIRST(as);
1870 1787          while (seg != NULL) {
1871 1788                  next_seg = AS_SEGNEXT(as, seg);
1872 1789                  if (seg->s_flags & S_PURGE)
1873      -                        SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
     1790 +                        segop_unmap(seg, seg->s_base, seg->s_size);
1874 1791                  seg = next_seg;
1875 1792          }
1876 1793          AS_LOCK_EXIT(as, &as->a_lock);
1877 1794  
1878 1795          mutex_enter(&as->a_contents);
1879 1796          as->a_flags &= ~AS_NEEDSPURGE;
1880 1797          mutex_exit(&as->a_contents);
1881 1798  }
1882 1799  
1883 1800  /*

1884 1801   * Find a hole within [*basep, *basep + *lenp), which contains a mappable
1885 1802   * range of addresses at least "minlen" long, where the base of the range is
1886 1803   * at "off" phase from an "align" boundary and there is space for a
1887 1804   * "redzone"-sized redzone on eithe rside of the range.  Thus,
1888 1805   * if align was 4M and off was 16k, the user wants a hole which will start
1889 1806   * 16k into a 4M page.
1890 1807   *
1891 1808   * If flags specifies AH_HI, the hole will have the highest possible address
1892 1809   * in the range.  We use the as->a_lastgap field to figure out where to
1893 1810   * start looking for a gap.
1894 1811   *
1895 1812   * Otherwise, the gap will have the lowest possible address.
1896 1813   *
1897 1814   * If flags specifies AH_CONTAIN, the hole will contain the address addr.
1898 1815   *
1899 1816   * If an adequate hole is found, *basep and *lenp are set to reflect the part of
1900 1817   * the hole that is within range, and 0 is returned. On failure, -1 is returned.
1901 1818   *
1902 1819   * NOTE: This routine is not correct when base+len overflows caddr_t.
1903 1820   */
1904 1821  int
1905 1822  as_gap_aligned(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp,
1906 1823      uint_t flags, caddr_t addr, size_t align, size_t redzone, size_t off)
1907 1824  {
1908 1825          caddr_t lobound = *basep;
1909 1826          caddr_t hibound = lobound + *lenp;
1910 1827          struct seg *lseg, *hseg;
1911 1828          caddr_t lo, hi;
1912 1829          int forward;
1913 1830          caddr_t save_base;
1914 1831          size_t save_len;
1915 1832          size_t save_minlen;
1916 1833          size_t save_redzone;
1917 1834          int fast_path = 1;
1918 1835  
1919 1836          save_base = *basep;
1920 1837          save_len = *lenp;
1921 1838          save_minlen = minlen;
1922 1839          save_redzone = redzone;
1923 1840  
1924 1841          /*
1925 1842           * For the first pass/fast_path, just add align and redzone into
1926 1843           * minlen since if we get an allocation, we can guarantee that it
1927 1844           * will fit the alignment and redzone requested.
1928 1845           * This increases the chance that hibound will be adjusted to
1929 1846           * a_lastgap->s_base which will likely allow us to find an
1930 1847           * acceptable hole in the address space quicker.
1931 1848           * If we can't find a hole with this fast_path, then we look for
1932 1849           * smaller holes in which the alignment and offset may allow
1933 1850           * the allocation to fit.
1934 1851           */
1935 1852          minlen += align;
1936 1853          minlen += 2 * redzone;
1937 1854          redzone = 0;
1938 1855  
1939 1856          AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1940 1857          if (AS_SEGFIRST(as) == NULL) {
1941 1858                  if (valid_va_range_aligned(basep, lenp, minlen, flags & AH_DIR,
1942 1859                      align, redzone, off)) {
1943 1860                          AS_LOCK_EXIT(as, &as->a_lock);
1944 1861                          return (0);
1945 1862                  } else {
1946 1863                          AS_LOCK_EXIT(as, &as->a_lock);
1947 1864                          *basep = save_base;
1948 1865                          *lenp = save_len;
1949 1866                          return (-1);
1950 1867                  }
1951 1868          }
1952 1869  
1953 1870  retry:
1954 1871          /*
1955 1872           * Set up to iterate over all the inter-segment holes in the given
1956 1873           * direction.  lseg is NULL for the lowest-addressed hole and hseg is
1957 1874           * NULL for the highest-addressed hole.  If moving backwards, we reset
1958 1875           * sseg to denote the highest-addressed segment.
1959 1876           */
1960 1877          forward = (flags & AH_DIR) == AH_LO;
1961 1878          if (forward) {
1962 1879                  hseg = as_findseg(as, lobound, 1);
1963 1880                  lseg = AS_SEGPREV(as, hseg);
1964 1881          } else {
1965 1882  
1966 1883                  /*
1967 1884                   * If allocating at least as much as the last allocation,
1968 1885                   * use a_lastgap's base as a better estimate of hibound.
1969 1886                   */
1970 1887                  if (as->a_lastgap &&
1971 1888                      minlen >= as->a_lastgap->s_size &&
1972 1889                      hibound >= as->a_lastgap->s_base)
1973 1890                          hibound = as->a_lastgap->s_base;
1974 1891  
1975 1892                  hseg = as_findseg(as, hibound, 1);
1976 1893                  if (hseg->s_base + hseg->s_size < hibound) {
1977 1894                          lseg = hseg;
1978 1895                          hseg = NULL;
1979 1896                  } else {
1980 1897                          lseg = AS_SEGPREV(as, hseg);
1981 1898                  }
1982 1899          }
1983 1900  
1984 1901          for (;;) {
1985 1902                  /*
1986 1903                   * Set lo and hi to the hole's boundaries.  (We should really
1987 1904                   * use MAXADDR in place of hibound in the expression below,
1988 1905                   * but can't express it easily; using hibound in its place is
1989 1906                   * harmless.)
1990 1907                   */
1991 1908                  lo = (lseg == NULL) ? 0 : lseg->s_base + lseg->s_size;
1992 1909                  hi = (hseg == NULL) ? hibound : hseg->s_base;
1993 1910                  /*
1994 1911                   * If the iteration has moved past the interval from lobound
1995 1912                   * to hibound it's pointless to continue.
1996 1913                   */
1997 1914                  if ((forward && lo > hibound) || (!forward && hi < lobound))
1998 1915                          break;
1999 1916                  else if (lo > hibound || hi < lobound)
2000 1917                          goto cont;
2001 1918                  /*
2002 1919                   * Candidate hole lies at least partially within the allowable
2003 1920                   * range.  Restrict it to fall completely within that range,
2004 1921                   * i.e., to [max(lo, lobound), min(hi, hibound)].
2005 1922                   */
2006 1923                  if (lo < lobound)
2007 1924                          lo = lobound;
2008 1925                  if (hi > hibound)
2009 1926                          hi = hibound;
2010 1927                  /*
2011 1928                   * Verify that the candidate hole is big enough and meets
2012 1929                   * hardware constraints.  If the hole is too small, no need
2013 1930                   * to do the further checks since they will fail.
2014 1931                   */
2015 1932                  *basep = lo;
2016 1933                  *lenp = hi - lo;
2017 1934                  if (*lenp >= minlen && valid_va_range_aligned(basep, lenp,
2018 1935                      minlen, forward ? AH_LO : AH_HI, align, redzone, off) &&
2019 1936                      ((flags & AH_CONTAIN) == 0 ||
2020 1937                      (*basep <= addr && *basep + *lenp > addr))) {
2021 1938                          if (!forward)
2022 1939                                  as->a_lastgap = hseg;
2023 1940                          if (hseg != NULL)
2024 1941                                  as->a_lastgaphl = hseg;
2025 1942                          else
2026 1943                                  as->a_lastgaphl = lseg;
2027 1944                          AS_LOCK_EXIT(as, &as->a_lock);
2028 1945                          return (0);
2029 1946                  }
2030 1947          cont:
2031 1948                  /*
2032 1949                   * Move to the next hole.
2033 1950                   */
2034 1951                  if (forward) {
2035 1952                          lseg = hseg;
2036 1953                          if (lseg == NULL)
2037 1954                                  break;
2038 1955                          hseg = AS_SEGNEXT(as, hseg);
2039 1956                  } else {
2040 1957                          hseg = lseg;
2041 1958                          if (hseg == NULL)
2042 1959                                  break;
2043 1960                          lseg = AS_SEGPREV(as, lseg);
2044 1961                  }
2045 1962          }
2046 1963          if (fast_path && (align != 0 || save_redzone != 0)) {
2047 1964                  fast_path = 0;
2048 1965                  minlen = save_minlen;
2049 1966                  redzone = save_redzone;
2050 1967                  goto retry;
2051 1968          }
2052 1969          *basep = save_base;
2053 1970          *lenp = save_len;
2054 1971          AS_LOCK_EXIT(as, &as->a_lock);
2055 1972          return (-1);
2056 1973  }
2057 1974  
2058 1975  /*
2059 1976   * Find a hole of at least size minlen within [*basep, *basep + *lenp).
2060 1977   *
2061 1978   * If flags specifies AH_HI, the hole will have the highest possible address
2062 1979   * in the range.  We use the as->a_lastgap field to figure out where to
2063 1980   * start looking for a gap.
2064 1981   *
2065 1982   * Otherwise, the gap will have the lowest possible address.
2066 1983   *
2067 1984   * If flags specifies AH_CONTAIN, the hole will contain the address addr.
2068 1985   *
2069 1986   * If an adequate hole is found, base and len are set to reflect the part of
2070 1987   * the hole that is within range, and 0 is returned, otherwise,
2071 1988   * -1 is returned.
2072 1989   *
2073 1990   * NOTE: This routine is not correct when base+len overflows caddr_t.
2074 1991   */
2075 1992  int
2076 1993  as_gap(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, uint_t flags,
2077 1994      caddr_t addr)
2078 1995  {
2079 1996  
2080 1997          return (as_gap_aligned(as, minlen, basep, lenp, flags, addr, 0, 0, 0));

↓ open down ↓

197 lines elided

↑ open up ↑

2081 1998  }
2082 1999  
2083 2000  /*
2084 2001   * Return the next range within [base, base + len) that is backed
2085 2002   * with "real memory".  Skip holes and non-seg_vn segments.
2086 2003   * We're lazy and only return one segment at a time.
2087 2004   */
2088 2005  int
2089 2006  as_memory(struct as *as, caddr_t *basep, size_t *lenp)
2090 2007  {
2091      -        extern struct seg_ops segspt_shmops;    /* needs a header file */
     2008 +        extern const struct seg_ops segspt_shmops;      /* needs a header file */
2092 2009          struct seg *seg;
2093 2010          caddr_t addr, eaddr;
2094 2011          caddr_t segend;
2095 2012  
2096 2013          AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2097 2014  
2098 2015          addr = *basep;
2099 2016          eaddr = addr + *lenp;
2100 2017  
2101 2018          seg = as_findseg(as, addr, 0);

2102 2019          if (seg != NULL)
2103 2020                  addr = MAX(seg->s_base, addr);
2104 2021  
2105 2022          for (;;) {
2106 2023                  if (seg == NULL || addr >= eaddr || eaddr <= seg->s_base) {
2107 2024                          AS_LOCK_EXIT(as, &as->a_lock);
2108 2025                          return (EINVAL);
2109 2026                  }
2110 2027  
2111 2028                  if (seg->s_ops == &segvn_ops) {
2112 2029                          segend = seg->s_base + seg->s_size;
2113 2030                          break;
2114 2031                  }
2115 2032  
2116 2033                  /*
2117 2034                   * We do ISM by looking into the private data
2118 2035                   * to determine the real size of the segment.
2119 2036                   */
2120 2037                  if (seg->s_ops == &segspt_shmops) {
2121 2038                          segend = seg->s_base + spt_realsize(seg);
2122 2039                          if (addr < segend)
2123 2040                                  break;
2124 2041                  }
2125 2042  
2126 2043                  seg = AS_SEGNEXT(as, seg);
2127 2044  
2128 2045                  if (seg != NULL)
2129 2046                          addr = seg->s_base;
2130 2047          }
2131 2048  
2132 2049          *basep = addr;
2133 2050

↓ open down ↓

32 lines elided

↑ open up ↑

2134 2051          if (segend > eaddr)
2135 2052                  *lenp = eaddr - addr;
2136 2053          else
2137 2054                  *lenp = segend - addr;
2138 2055  
2139 2056          AS_LOCK_EXIT(as, &as->a_lock);
2140 2057          return (0);
2141 2058  }
2142 2059  
2143 2060  /*
2144      - * Swap the pages associated with the address space as out to
2145      - * secondary storage, returning the number of bytes actually
2146      - * swapped.
2147      - *
2148      - * The value returned is intended to correlate well with the process's
2149      - * memory requirements.  Its usefulness for this purpose depends on
2150      - * how well the segment-level routines do at returning accurate
2151      - * information.
2152      - */
2153      -size_t
2154      -as_swapout(struct as *as)
2155      -{
2156      -        struct seg *seg;
2157      -        size_t swpcnt = 0;
2158      -
2159      -        /*
2160      -         * Kernel-only processes have given up their address
2161      -         * spaces.  Of course, we shouldn't be attempting to
2162      -         * swap out such processes in the first place...
2163      -         */
2164      -        if (as == NULL)
2165      -                return (0);
2166      -
2167      -        AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2168      -
2169      -        /* Prevent XHATs from attaching */
2170      -        mutex_enter(&as->a_contents);
2171      -        AS_SETBUSY(as);
2172      -        mutex_exit(&as->a_contents);
2173      -
2174      -
2175      -        /*
2176      -         * Free all mapping resources associated with the address
2177      -         * space.  The segment-level swapout routines capitalize
2178      -         * on this unmapping by scavanging pages that have become
2179      -         * unmapped here.
2180      -         */
2181      -        hat_swapout(as->a_hat);
2182      -        if (as->a_xhat != NULL)
2183      -                xhat_swapout_all(as);
2184      -
2185      -        mutex_enter(&as->a_contents);
2186      -        AS_CLRBUSY(as);
2187      -        mutex_exit(&as->a_contents);
2188      -
2189      -        /*
2190      -         * Call the swapout routines of all segments in the address
2191      -         * space to do the actual work, accumulating the amount of
2192      -         * space reclaimed.
2193      -         */
2194      -        for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2195      -                struct seg_ops *ov = seg->s_ops;
2196      -
2197      -                /*
2198      -                 * We have to check to see if the seg has
2199      -                 * an ops vector because the seg may have
2200      -                 * been in the middle of being set up when
2201      -                 * the process was picked for swapout.
2202      -                 */
2203      -                if ((ov != NULL) && (ov->swapout != NULL))
2204      -                        swpcnt += SEGOP_SWAPOUT(seg);
2205      -        }
2206      -        AS_LOCK_EXIT(as, &as->a_lock);
2207      -        return (swpcnt);
2208      -}
2209      -
2210      -/*
2211 2061   * Determine whether data from the mappings in interval [addr, addr + size)
2212 2062   * are in the primary memory (core) cache.
2213 2063   */
2214 2064  int
2215 2065  as_incore(struct as *as, caddr_t addr,
2216 2066      size_t size, char *vec, size_t *sizep)
2217 2067  {
2218 2068          struct seg *seg;
2219 2069          size_t ssize;
2220 2070          caddr_t raddr;          /* rounded down addr */

2221 2071          size_t rsize;           /* rounded up size */
2222 2072          size_t isize;                   /* iteration size */
2223 2073          int error = 0;          /* result, assume success */
2224 2074  
2225 2075          *sizep = 0;
2226 2076          raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2227 2077          rsize = ((((size_t)addr + size) + PAGEOFFSET) & PAGEMASK) -
2228 2078              (size_t)raddr;
2229 2079  
2230 2080          if (raddr + rsize < raddr)              /* check for wraparound */
2231 2081                  return (ENOMEM);
2232 2082  
2233 2083          AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2234 2084          seg = as_segat(as, raddr);
2235 2085          if (seg == NULL) {
2236 2086                  AS_LOCK_EXIT(as, &as->a_lock);
2237 2087                  return (-1);
2238 2088          }
2239 2089  
2240 2090          for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2241 2091                  if (raddr >= seg->s_base + seg->s_size) {

↓ open down ↓

21 lines elided

↑ open up ↑

2242 2092                          seg = AS_SEGNEXT(as, seg);
2243 2093                          if (seg == NULL || raddr != seg->s_base) {
2244 2094                                  error = -1;
2245 2095                                  break;
2246 2096                          }
2247 2097                  }
2248 2098                  if ((raddr + rsize) > (seg->s_base + seg->s_size))
2249 2099                          ssize = seg->s_base + seg->s_size - raddr;
2250 2100                  else
2251 2101                          ssize = rsize;
2252      -                *sizep += isize = SEGOP_INCORE(seg, raddr, ssize, vec);
     2102 +                *sizep += isize = segop_incore(seg, raddr, ssize, vec);
2253 2103                  if (isize != ssize) {
2254 2104                          error = -1;
2255 2105                          break;
2256 2106                  }
2257 2107                  vec += btopr(ssize);
2258 2108          }
2259 2109          AS_LOCK_EXIT(as, &as->a_lock);
2260 2110          return (error);
2261 2111  }
2262 2112

2263 2113  static void
2264 2114  as_segunlock(struct seg *seg, caddr_t addr, int attr,
2265 2115          ulong_t *bitmap, size_t position, size_t npages)
2266 2116  {
2267 2117          caddr_t range_start;

↓ open down ↓

5 lines elided

↑ open up ↑

2268 2118          size_t  pos1 = position;
2269 2119          size_t  pos2;
2270 2120          size_t  size;
2271 2121          size_t  end_pos = npages + position;
2272 2122  
2273 2123          while (bt_range(bitmap, &pos1, &pos2, end_pos)) {
2274 2124                  size = ptob((pos2 - pos1));
2275 2125                  range_start = (caddr_t)((uintptr_t)addr +
2276 2126                      ptob(pos1 - position));
2277 2127  
2278      -                (void) SEGOP_LOCKOP(seg, range_start, size, attr, MC_UNLOCK,
     2128 +                (void) segop_lockop(seg, range_start, size, attr, MC_UNLOCK,
2279 2129                      (ulong_t *)NULL, (size_t)NULL);
2280 2130                  pos1 = pos2;
2281 2131          }
2282 2132  }
2283 2133  
2284 2134  static void
2285 2135  as_unlockerr(struct as *as, int attr, ulong_t *mlock_map,
2286 2136          caddr_t raddr, size_t rsize)
2287 2137  {
2288 2138          struct seg *seg = as_segat(as, raddr);

2289 2139          size_t ssize;
2290 2140  
2291 2141          while (rsize != 0) {
2292 2142                  if (raddr >= seg->s_base + seg->s_size)
2293 2143                          seg = AS_SEGNEXT(as, seg);
2294 2144  
2295 2145                  if ((raddr + rsize) > (seg->s_base + seg->s_size))
2296 2146                          ssize = seg->s_base + seg->s_size - raddr;
2297 2147                  else
2298 2148                          ssize = rsize;
2299 2149  
2300 2150                  as_segunlock(seg, raddr, attr, mlock_map, 0, btopr(ssize));
2301 2151  
2302 2152                  rsize -= ssize;
2303 2153                  raddr += ssize;
2304 2154          }
2305 2155  }
2306 2156  
2307 2157  /*
2308 2158   * Cache control operations over the interval [addr, addr + size) in
2309 2159   * address space "as".
2310 2160   */
2311 2161  /*ARGSUSED*/
2312 2162  int
2313 2163  as_ctl(struct as *as, caddr_t addr, size_t size, int func, int attr,
2314 2164      uintptr_t arg, ulong_t *lock_map, size_t pos)
2315 2165  {
2316 2166          struct seg *seg;        /* working segment */
2317 2167          caddr_t raddr;          /* rounded down addr */
2318 2168          caddr_t initraddr;      /* saved initial rounded down addr */
2319 2169          size_t rsize;           /* rounded up size */
2320 2170          size_t initrsize;       /* saved initial rounded up size */
2321 2171          size_t ssize;           /* size of seg */
2322 2172          int error = 0;                  /* result */
2323 2173          size_t mlock_size;      /* size of bitmap */
2324 2174          ulong_t *mlock_map;     /* pointer to bitmap used */
2325 2175                                  /* to represent the locked */
2326 2176                                  /* pages. */
2327 2177  retry:
2328 2178          if (error == IE_RETRY)
2329 2179                  AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2330 2180          else
2331 2181                  AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2332 2182  
2333 2183          /*
2334 2184           * If these are address space lock/unlock operations, loop over
2335 2185           * all segments in the address space, as appropriate.
2336 2186           */
2337 2187          if (func == MC_LOCKAS) {
2338 2188                  size_t npages, idx;
2339 2189                  size_t rlen = 0;        /* rounded as length */
2340 2190  
2341 2191                  idx = pos;
2342 2192  
2343 2193                  if (arg & MCL_FUTURE) {
2344 2194                          mutex_enter(&as->a_contents);
2345 2195                          AS_SETPGLCK(as);
2346 2196                          mutex_exit(&as->a_contents);
2347 2197                  }
2348 2198                  if ((arg & MCL_CURRENT) == 0) {
2349 2199                          AS_LOCK_EXIT(as, &as->a_lock);
2350 2200                          return (0);
2351 2201                  }
2352 2202  
2353 2203                  seg = AS_SEGFIRST(as);
2354 2204                  if (seg == NULL) {
2355 2205                          AS_LOCK_EXIT(as, &as->a_lock);
2356 2206                          return (0);
2357 2207                  }
2358 2208  
2359 2209                  do {
2360 2210                          raddr = (caddr_t)((uintptr_t)seg->s_base &
2361 2211                              (uintptr_t)PAGEMASK);
2362 2212                          rlen += (((uintptr_t)(seg->s_base + seg->s_size) +
2363 2213                              PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr;

↓ open down ↓

75 lines elided

↑ open up ↑

2364 2214                  } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2365 2215  
2366 2216                  mlock_size = BT_BITOUL(btopr(rlen));
2367 2217                  if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2368 2218                      sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2369 2219                                  AS_LOCK_EXIT(as, &as->a_lock);
2370 2220                                  return (EAGAIN);
2371 2221                  }
2372 2222  
2373 2223                  for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2374      -                        error = SEGOP_LOCKOP(seg, seg->s_base,
     2224 +                        error = segop_lockop(seg, seg->s_base,
2375 2225                              seg->s_size, attr, MC_LOCK, mlock_map, pos);
2376 2226                          if (error != 0)
2377 2227                                  break;
2378 2228                          pos += seg_pages(seg);
2379 2229                  }
2380 2230  
2381 2231                  if (error) {
2382 2232                          for (seg = AS_SEGFIRST(as); seg != NULL;
2383 2233                              seg = AS_SEGNEXT(as, seg)) {
2384 2234

2385 2235                                  raddr = (caddr_t)((uintptr_t)seg->s_base &
2386 2236                                      (uintptr_t)PAGEMASK);
2387 2237                                  npages = seg_pages(seg);
2388 2238                                  as_segunlock(seg, raddr, attr, mlock_map,
2389 2239                                      idx, npages);
2390 2240                                  idx += npages;
2391 2241                          }
2392 2242                  }

↓ open down ↓

8 lines elided

↑ open up ↑

2393 2243  
2394 2244                  kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2395 2245                  AS_LOCK_EXIT(as, &as->a_lock);
2396 2246                  goto lockerr;
2397 2247          } else if (func == MC_UNLOCKAS) {
2398 2248                  mutex_enter(&as->a_contents);
2399 2249                  AS_CLRPGLCK(as);
2400 2250                  mutex_exit(&as->a_contents);
2401 2251  
2402 2252                  for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2403      -                        error = SEGOP_LOCKOP(seg, seg->s_base,
     2253 +                        error = segop_lockop(seg, seg->s_base,
2404 2254                              seg->s_size, attr, MC_UNLOCK, NULL, 0);
2405 2255                          if (error != 0)
2406 2256                                  break;
2407 2257                  }
2408 2258  
2409 2259                  AS_LOCK_EXIT(as, &as->a_lock);
2410 2260                  goto lockerr;
2411 2261          }
2412 2262  
2413 2263          /*

2414 2264           * Normalize addresses and sizes.
2415 2265           */
2416 2266          initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2417 2267          initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2418 2268              (size_t)raddr;
2419 2269  
2420 2270          if (raddr + rsize < raddr) {            /* check for wraparound */
2421 2271                  AS_LOCK_EXIT(as, &as->a_lock);
2422 2272                  return (ENOMEM);
2423 2273          }
2424 2274  
2425 2275          /*
2426 2276           * Get initial segment.
2427 2277           */
2428 2278          if ((seg = as_segat(as, raddr)) == NULL) {
2429 2279                  AS_LOCK_EXIT(as, &as->a_lock);
2430 2280                  return (ENOMEM);
2431 2281          }
2432 2282  
2433 2283          if (func == MC_LOCK) {
2434 2284                  mlock_size = BT_BITOUL(btopr(rsize));
2435 2285                  if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2436 2286                      sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2437 2287                                  AS_LOCK_EXIT(as, &as->a_lock);
2438 2288                                  return (EAGAIN);
2439 2289                  }
2440 2290          }
2441 2291  
2442 2292          /*
2443 2293           * Loop over all segments.  If a hole in the address range is
2444 2294           * discovered, then fail.  For each segment, perform the appropriate
2445 2295           * control operation.
2446 2296           */
2447 2297          while (rsize != 0) {
2448 2298  
2449 2299                  /*
2450 2300                   * Make sure there's no hole, calculate the portion
2451 2301                   * of the next segment to be operated over.
2452 2302                   */
2453 2303                  if (raddr >= seg->s_base + seg->s_size) {
2454 2304                          seg = AS_SEGNEXT(as, seg);
2455 2305                          if (seg == NULL || raddr != seg->s_base) {
2456 2306                                  if (func == MC_LOCK) {
2457 2307                                          as_unlockerr(as, attr, mlock_map,
2458 2308                                              initraddr, initrsize - rsize);
2459 2309                                          kmem_free(mlock_map,
2460 2310                                              mlock_size * sizeof (ulong_t));
2461 2311                                  }
2462 2312                                  AS_LOCK_EXIT(as, &as->a_lock);
2463 2313                                  return (ENOMEM);
2464 2314                          }
2465 2315                  }
2466 2316                  if ((raddr + rsize) > (seg->s_base + seg->s_size))
2467 2317                          ssize = seg->s_base + seg->s_size - raddr;
2468 2318                  else
2469 2319                          ssize = rsize;
2470 2320

↓ open down ↓

57 lines elided

↑ open up ↑

2471 2321                  /*
2472 2322                   * Dispatch on specific function.
2473 2323                   */
2474 2324                  switch (func) {
2475 2325  
2476 2326                  /*
2477 2327                   * Synchronize cached data from mappings with backing
2478 2328                   * objects.
2479 2329                   */
2480 2330                  case MC_SYNC:
2481      -                        if (error = SEGOP_SYNC(seg, raddr, ssize,
     2331 +                        if (error = segop_sync(seg, raddr, ssize,
2482 2332                              attr, (uint_t)arg)) {
2483 2333                                  AS_LOCK_EXIT(as, &as->a_lock);
2484 2334                                  return (error);
2485 2335                          }
2486 2336                          break;
2487 2337  
2488 2338                  /*
2489 2339                   * Lock pages in memory.
2490 2340                   */
2491 2341                  case MC_LOCK:
2492      -                        if (error = SEGOP_LOCKOP(seg, raddr, ssize,
     2342 +                        if (error = segop_lockop(seg, raddr, ssize,
2493 2343                              attr, func, mlock_map, pos)) {
2494 2344                                  as_unlockerr(as, attr, mlock_map, initraddr,
2495 2345                                      initrsize - rsize + ssize);
2496 2346                                  kmem_free(mlock_map, mlock_size *
2497 2347                                      sizeof (ulong_t));
2498 2348                                  AS_LOCK_EXIT(as, &as->a_lock);
2499 2349                                  goto lockerr;
2500 2350                          }
2501 2351                          break;
2502 2352  
2503 2353                  /*
2504 2354                   * Unlock mapped pages.
2505 2355                   */
2506 2356                  case MC_UNLOCK:
2507      -                        (void) SEGOP_LOCKOP(seg, raddr, ssize, attr, func,
     2357 +                        (void) segop_lockop(seg, raddr, ssize, attr, func,
2508 2358                              (ulong_t *)NULL, (size_t)NULL);
2509 2359                          break;
2510 2360  
2511 2361                  /*
2512 2362                   * Store VM advise for mapped pages in segment layer.
2513 2363                   */
2514 2364                  case MC_ADVISE:
2515      -                        error = SEGOP_ADVISE(seg, raddr, ssize, (uint_t)arg);
     2365 +                        error = segop_advise(seg, raddr, ssize, (uint_t)arg);
2516 2366  
2517 2367                          /*
2518 2368                           * Check for regular errors and special retry error
2519 2369                           */
2520 2370                          if (error) {
2521 2371                                  if (error == IE_RETRY) {
2522 2372                                          /*
2523 2373                                           * Need to acquire writers lock, so
2524 2374                                           * have to drop readers lock and start
2525 2375                                           * all over again

2526 2376                                           */
2527 2377                                          AS_LOCK_EXIT(as, &as->a_lock);
2528 2378                                          goto retry;
2529 2379                                  } else if (error == IE_REATTACH) {
2530 2380                                          /*
2531 2381                                           * Find segment for current address
2532 2382                                           * because current segment just got
2533 2383                                           * split or concatenated
2534 2384                                           */
2535 2385                                          seg = as_segat(as, raddr);
2536 2386                                          if (seg == NULL) {
2537 2387                                                  AS_LOCK_EXIT(as, &as->a_lock);
2538 2388                                                  return (ENOMEM);
2539 2389                                          }
2540 2390                                  } else {

↓ open down ↓

15 lines elided

↑ open up ↑

2541 2391                                          /*
2542 2392                                           * Regular error
2543 2393                                           */
2544 2394                                          AS_LOCK_EXIT(as, &as->a_lock);
2545 2395                                          return (error);
2546 2396                                  }
2547 2397                          }
2548 2398                          break;
2549 2399  
2550 2400                  case MC_INHERIT_ZERO:
2551      -                        if (seg->s_ops->inherit == NULL) {
2552      -                                error = ENOTSUP;
2553      -                        } else {
2554      -                                error = SEGOP_INHERIT(seg, raddr, ssize,
2555      -                                    SEGP_INH_ZERO);
2556      -                        }
     2401 +                        error = segop_inherit(seg, raddr, ssize, SEGP_INH_ZERO);
2557 2402                          if (error != 0) {
2558 2403                                  AS_LOCK_EXIT(as, &as->a_lock);
2559 2404                                  return (error);
2560 2405                          }
2561 2406                          break;
2562 2407  
2563 2408                  /*
2564 2409                   * Can't happen.
2565 2410                   */
2566 2411                  default:

2567 2412                          panic("as_ctl: bad operation %d", func);
2568 2413                          /*NOTREACHED*/
2569 2414                  }
2570 2415  
2571 2416                  rsize -= ssize;
2572 2417                  raddr += ssize;
2573 2418          }
2574 2419  
2575 2420          if (func == MC_LOCK)
2576 2421                  kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2577 2422          AS_LOCK_EXIT(as, &as->a_lock);
2578 2423          return (0);
2579 2424  lockerr:
2580 2425  
2581 2426          /*
2582 2427           * If the lower levels returned EDEADLK for a segment lockop,
2583 2428           * it means that we should retry the operation.  Let's wait
2584 2429           * a bit also to let the deadlock causing condition clear.
2585 2430           * This is part of a gross hack to work around a design flaw
2586 2431           * in the ufs/sds logging code and should go away when the
2587 2432           * logging code is re-designed to fix the problem. See bug
2588 2433           * 4125102 for details of the problem.
2589 2434           */
2590 2435          if (error == EDEADLK) {
2591 2436                  delay(deadlk_wait);
2592 2437                  error = 0;
2593 2438                  goto retry;
2594 2439          }
2595 2440          return (error);
2596 2441  }
2597 2442  
2598 2443  int
2599 2444  fc_decode(faultcode_t fault_err)
2600 2445  {
2601 2446          int error = 0;
2602 2447  
2603 2448          switch (FC_CODE(fault_err)) {
2604 2449          case FC_OBJERR:
2605 2450                  error = FC_ERRNO(fault_err);
2606 2451                  break;
2607 2452          case FC_PROT:
2608 2453                  error = EACCES;
2609 2454                  break;
2610 2455          default:
2611 2456                  error = EFAULT;
2612 2457                  break;
2613 2458          }
2614 2459          return (error);
2615 2460  }
2616 2461  
2617 2462  /*
2618 2463   * Pagelock pages from a range that spans more than 1 segment.  Obtain shadow
2619 2464   * lists from each segment and copy them to one contiguous shadow list (plist)
2620 2465   * as expected by the caller.  Save pointers to per segment shadow lists at
2621 2466   * the tail of plist so that they can be used during as_pageunlock().
2622 2467   */
2623 2468  static int
2624 2469  as_pagelock_segs(struct as *as, struct seg *seg, struct page ***ppp,
2625 2470      caddr_t addr, size_t size, enum seg_rw rw)
2626 2471  {
2627 2472          caddr_t sv_addr = addr;
2628 2473          size_t sv_size = size;
2629 2474          struct seg *sv_seg = seg;

↓ open down ↓

63 lines elided

↑ open up ↑

2630 2475          ulong_t segcnt = 1;
2631 2476          ulong_t cnt;
2632 2477          size_t ssize;
2633 2478          pgcnt_t npages = btop(size);
2634 2479          page_t **plist;
2635 2480          page_t **pl;
2636 2481          int error;
2637 2482          caddr_t eaddr;
2638 2483          faultcode_t fault_err = 0;
2639 2484          pgcnt_t pl_off;
2640      -        extern struct seg_ops segspt_shmops;
     2485 +        extern const struct seg_ops segspt_shmops;
2641 2486  
2642 2487          ASSERT(AS_LOCK_HELD(as, &as->a_lock));
2643 2488          ASSERT(seg != NULL);
2644 2489          ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2645 2490          ASSERT(addr + size > seg->s_base + seg->s_size);
2646 2491          ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2647 2492          ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2648 2493  
2649 2494          /*
2650 2495           * Count the number of segments covered by the range we are about to

2651 2496           * lock. The segment count is used to size the shadow list we return
2652 2497           * back to the caller.
2653 2498           */
2654 2499          for (; size != 0; size -= ssize, addr += ssize) {
2655 2500                  if (addr >= seg->s_base + seg->s_size) {
2656 2501  
2657 2502                          seg = AS_SEGNEXT(as, seg);
2658 2503                          if (seg == NULL || addr != seg->s_base) {

↓ open down ↓

8 lines elided

↑ open up ↑

2659 2504                                  AS_LOCK_EXIT(as, &as->a_lock);
2660 2505                                  return (EFAULT);
2661 2506                          }
2662 2507                          /*
2663 2508                           * Do a quick check if subsequent segments
2664 2509                           * will most likely support pagelock.
2665 2510                           */
2666 2511                          if (seg->s_ops == &segvn_ops) {
2667 2512                                  vnode_t *vp;
2668 2513  
2669      -                                if (SEGOP_GETVP(seg, addr, &vp) != 0 ||
     2514 +                                if (segop_getvp(seg, addr, &vp) != 0 ||
2670 2515                                      vp != NULL) {
2671 2516                                          AS_LOCK_EXIT(as, &as->a_lock);
2672 2517                                          goto slow;
2673 2518                                  }
2674 2519                          } else if (seg->s_ops != &segspt_shmops) {
2675 2520                                  AS_LOCK_EXIT(as, &as->a_lock);
2676 2521                                  goto slow;
2677 2522                          }
2678 2523                          segcnt++;
2679 2524                  }

2680 2525                  if (addr + size > seg->s_base + seg->s_size) {
2681 2526                          ssize = seg->s_base + seg->s_size - addr;
2682 2527                  } else {
2683 2528                          ssize = size;
2684 2529                  }
2685 2530          }
2686 2531          ASSERT(segcnt > 1);
2687 2532  
2688 2533          plist = kmem_zalloc((npages + segcnt) * sizeof (page_t *), KM_SLEEP);
2689 2534  
2690 2535          addr = sv_addr;
2691 2536          size = sv_size;
2692 2537          seg = sv_seg;
2693 2538  
2694 2539          for (cnt = 0, pl_off = 0; size != 0; size -= ssize, addr += ssize) {
2695 2540                  if (addr >= seg->s_base + seg->s_size) {
2696 2541                          seg = AS_SEGNEXT(as, seg);

↓ open down ↓

17 lines elided

↑ open up ↑

2697 2542                          ASSERT(seg != NULL && addr == seg->s_base);
2698 2543                          cnt++;
2699 2544                          ASSERT(cnt < segcnt);
2700 2545                  }
2701 2546                  if (addr + size > seg->s_base + seg->s_size) {
2702 2547                          ssize = seg->s_base + seg->s_size - addr;
2703 2548                  } else {
2704 2549                          ssize = size;
2705 2550                  }
2706 2551                  pl = &plist[npages + cnt];
2707      -                error = SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
     2552 +                error = segop_pagelock(seg, addr, ssize, (page_t ***)pl,
2708 2553                      L_PAGELOCK, rw);
2709 2554                  if (error) {
2710 2555                          break;
2711 2556                  }
2712 2557                  ASSERT(plist[npages + cnt] != NULL);
2713 2558                  ASSERT(pl_off + btop(ssize) <= npages);
2714 2559                  bcopy(plist[npages + cnt], &plist[pl_off],
2715 2560                      btop(ssize) * sizeof (page_t *));
2716 2561                  pl_off += btop(ssize);
2717 2562          }

2718 2563  
2719 2564          if (size == 0) {
2720 2565                  AS_LOCK_EXIT(as, &as->a_lock);
2721 2566                  ASSERT(cnt == segcnt - 1);
2722 2567                  *ppp = plist;
2723 2568                  return (0);
2724 2569          }
2725 2570  
2726 2571          /*
2727 2572           * one of pagelock calls failed. The error type is in error variable.
2728 2573           * Unlock what we've locked so far and retry with F_SOFTLOCK if error
2729 2574           * type is either EFAULT or ENOTSUP. Otherwise just return the error
2730 2575           * back to the caller.
2731 2576           */
2732 2577  
2733 2578          eaddr = addr;
2734 2579          seg = sv_seg;
2735 2580  
2736 2581          for (cnt = 0, addr = sv_addr; addr < eaddr; addr += ssize) {
2737 2582                  if (addr >= seg->s_base + seg->s_size) {
2738 2583                          seg = AS_SEGNEXT(as, seg);
2739 2584                          ASSERT(seg != NULL && addr == seg->s_base);

↓ open down ↓

22 lines elided

↑ open up ↑

2740 2585                          cnt++;
2741 2586                          ASSERT(cnt < segcnt);
2742 2587                  }
2743 2588                  if (eaddr > seg->s_base + seg->s_size) {
2744 2589                          ssize = seg->s_base + seg->s_size - addr;
2745 2590                  } else {
2746 2591                          ssize = eaddr - addr;
2747 2592                  }
2748 2593                  pl = &plist[npages + cnt];
2749 2594                  ASSERT(*pl != NULL);
2750      -                (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
     2595 +                (void) segop_pagelock(seg, addr, ssize, (page_t ***)pl,
2751 2596                      L_PAGEUNLOCK, rw);
2752 2597          }
2753 2598  
2754 2599          AS_LOCK_EXIT(as, &as->a_lock);
2755 2600  
2756 2601          kmem_free(plist, (npages + segcnt) * sizeof (page_t *));
2757 2602  
2758 2603          if (error != ENOTSUP && error != EFAULT) {
2759 2604                  return (error);
2760 2605          }

2761 2606  
2762 2607  slow:
2763 2608          /*
2764 2609           * If we are here because pagelock failed due to the need to cow fault
2765 2610           * in the pages we want to lock F_SOFTLOCK will do this job and in
2766 2611           * next as_pagelock() call for this address range pagelock will
2767 2612           * hopefully succeed.
2768 2613           */
2769 2614          fault_err = as_fault(as->a_hat, as, sv_addr, sv_size, F_SOFTLOCK, rw);
2770 2615          if (fault_err != 0) {
2771 2616                  return (fc_decode(fault_err));
2772 2617          }
2773 2618          *ppp = NULL;
2774 2619  
2775 2620          return (0);
2776 2621  }
2777 2622  
2778 2623  /*
2779 2624   * lock pages in a given address space. Return shadow list. If
2780 2625   * the list is NULL, the MMU mapping is also locked.
2781 2626   */
2782 2627  int
2783 2628  as_pagelock(struct as *as, struct page ***ppp, caddr_t addr,
2784 2629      size_t size, enum seg_rw rw)
2785 2630  {
2786 2631          size_t rsize;
2787 2632          caddr_t raddr;
2788 2633          faultcode_t fault_err;
2789 2634          struct seg *seg;
2790 2635          int err;
2791 2636  
2792 2637          TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_START,
2793 2638              "as_pagelock_start: addr %p size %ld", addr, size);
2794 2639  
2795 2640          raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2796 2641          rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2797 2642              (size_t)raddr;
2798 2643  
2799 2644          /*
2800 2645           * if the request crosses two segments let
2801 2646           * as_fault handle it.
2802 2647           */
2803 2648          AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2804 2649  
2805 2650          seg = as_segat(as, raddr);
2806 2651          if (seg == NULL) {
2807 2652                  AS_LOCK_EXIT(as, &as->a_lock);
2808 2653                  return (EFAULT);
2809 2654          }
2810 2655          ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2811 2656          if (raddr + rsize > seg->s_base + seg->s_size) {
2812 2657                  return (as_pagelock_segs(as, seg, ppp, raddr, rsize, rw));
2813 2658          }
2814 2659          if (raddr + rsize <= raddr) {

↓ open down ↓

54 lines elided

↑ open up ↑

2815 2660                  AS_LOCK_EXIT(as, &as->a_lock);
2816 2661                  return (EFAULT);
2817 2662          }
2818 2663  
2819 2664          TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START,
2820 2665              "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize);
2821 2666  
2822 2667          /*
2823 2668           * try to lock pages and pass back shadow list
2824 2669           */
2825      -        err = SEGOP_PAGELOCK(seg, raddr, rsize, ppp, L_PAGELOCK, rw);
     2670 +        err = segop_pagelock(seg, raddr, rsize, ppp, L_PAGELOCK, rw);
2826 2671  
2827 2672          TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end");
2828 2673  
2829 2674          AS_LOCK_EXIT(as, &as->a_lock);
2830 2675  
2831 2676          if (err == 0 || (err != ENOTSUP && err != EFAULT)) {
2832 2677                  return (err);
2833 2678          }
2834 2679  
2835 2680          /*

2836 2681           * Use F_SOFTLOCK to lock the pages because pagelock failed either due
2837 2682           * to no pagelock support for this segment or pages need to be cow
2838 2683           * faulted in. If fault is needed F_SOFTLOCK will do this job for
2839 2684           * this as_pagelock() call and in the next as_pagelock() call for the
2840 2685           * same address range pagelock call will hopefull succeed.
2841 2686           */
2842 2687          fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw);
2843 2688          if (fault_err != 0) {
2844 2689                  return (fc_decode(fault_err));
2845 2690          }
2846 2691          *ppp = NULL;
2847 2692  
2848 2693          TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_END, "as_pagelock_end");
2849 2694          return (0);
2850 2695  }
2851 2696  
2852 2697  /*
2853 2698   * unlock pages locked by as_pagelock_segs().  Retrieve per segment shadow
2854 2699   * lists from the end of plist and call pageunlock interface for each segment.
2855 2700   * Drop as lock and free plist.
2856 2701   */
2857 2702  static void
2858 2703  as_pageunlock_segs(struct as *as, struct seg *seg, caddr_t addr, size_t size,
2859 2704      struct page **plist, enum seg_rw rw)
2860 2705  {
2861 2706          ulong_t cnt;
2862 2707          caddr_t eaddr = addr + size;
2863 2708          pgcnt_t npages = btop(size);
2864 2709          size_t ssize;
2865 2710          page_t **pl;
2866 2711  
2867 2712          ASSERT(AS_LOCK_HELD(as, &as->a_lock));
2868 2713          ASSERT(seg != NULL);
2869 2714          ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2870 2715          ASSERT(addr + size > seg->s_base + seg->s_size);
2871 2716          ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2872 2717          ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2873 2718          ASSERT(plist != NULL);
2874 2719  
2875 2720          for (cnt = 0; addr < eaddr; addr += ssize) {
2876 2721                  if (addr >= seg->s_base + seg->s_size) {
2877 2722                          seg = AS_SEGNEXT(as, seg);

↓ open down ↓

42 lines elided

↑ open up ↑

2878 2723                          ASSERT(seg != NULL && addr == seg->s_base);
2879 2724                          cnt++;
2880 2725                  }
2881 2726                  if (eaddr > seg->s_base + seg->s_size) {
2882 2727                          ssize = seg->s_base + seg->s_size - addr;
2883 2728                  } else {
2884 2729                          ssize = eaddr - addr;
2885 2730                  }
2886 2731                  pl = &plist[npages + cnt];
2887 2732                  ASSERT(*pl != NULL);
2888      -                (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
     2733 +                (void) segop_pagelock(seg, addr, ssize, (page_t ***)pl,
2889 2734                      L_PAGEUNLOCK, rw);
2890 2735          }
2891 2736          ASSERT(cnt > 0);
2892 2737          AS_LOCK_EXIT(as, &as->a_lock);
2893 2738  
2894 2739          cnt++;
2895 2740          kmem_free(plist, (npages + cnt) * sizeof (page_t *));
2896 2741  }
2897 2742  
2898 2743  /*

2899 2744   * unlock pages in a given address range
2900 2745   */
2901 2746  void
2902 2747  as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size,
2903 2748      enum seg_rw rw)
2904 2749  {
2905 2750          struct seg *seg;
2906 2751          size_t rsize;
2907 2752          caddr_t raddr;
2908 2753  
2909 2754          TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_START,
2910 2755              "as_pageunlock_start: addr %p size %ld", addr, size);
2911 2756  
2912 2757          /*
2913 2758           * if the shadow list is NULL, as_pagelock was
2914 2759           * falling back to as_fault
2915 2760           */
2916 2761          if (pp == NULL) {
2917 2762                  (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw);
2918 2763                  return;
2919 2764          }
2920 2765  
2921 2766          raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2922 2767          rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2923 2768              (size_t)raddr;

↓ open down ↓

25 lines elided

↑ open up ↑

2924 2769  
2925 2770          AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2926 2771          seg = as_segat(as, raddr);
2927 2772          ASSERT(seg != NULL);
2928 2773  
2929 2774          TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START,
2930 2775              "seg_unlock_start: raddr %p rsize %ld", raddr, rsize);
2931 2776  
2932 2777          ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2933 2778          if (raddr + rsize <= seg->s_base + seg->s_size) {
2934      -                SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw);
     2779 +                segop_pagelock(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw);
2935 2780          } else {
2936 2781                  as_pageunlock_segs(as, seg, raddr, rsize, pp, rw);
2937 2782                  return;
2938 2783          }
2939 2784          AS_LOCK_EXIT(as, &as->a_lock);
2940 2785          TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end");
2941 2786  }
2942 2787  
2943 2788  int
2944 2789  as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc,

2945 2790      boolean_t wait)
2946 2791  {
2947 2792          struct seg *seg;
2948 2793          size_t ssize;
2949 2794          caddr_t raddr;                  /* rounded down addr */
2950 2795          size_t rsize;                   /* rounded up size */
2951 2796          int error = 0;
2952 2797          size_t pgsz = page_get_pagesize(szc);
2953 2798  
2954 2799  setpgsz_top:
2955 2800          if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(size, pgsz)) {
2956 2801                  return (EINVAL);
2957 2802          }
2958 2803  
2959 2804          raddr = addr;
2960 2805          rsize = size;
2961 2806  
2962 2807          if (raddr + rsize < raddr)              /* check for wraparound */
2963 2808                  return (ENOMEM);
2964 2809  
2965 2810          AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2966 2811          as_clearwatchprot(as, raddr, rsize);
2967 2812          seg = as_segat(as, raddr);
2968 2813          if (seg == NULL) {
2969 2814                  as_setwatch(as);
2970 2815                  AS_LOCK_EXIT(as, &as->a_lock);
2971 2816                  return (ENOMEM);
2972 2817          }
2973 2818  
2974 2819          for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2975 2820                  if (raddr >= seg->s_base + seg->s_size) {
2976 2821                          seg = AS_SEGNEXT(as, seg);
2977 2822                          if (seg == NULL || raddr != seg->s_base) {
2978 2823                                  error = ENOMEM;

↓ open down ↓

34 lines elided

↑ open up ↑

2979 2824                                  break;
2980 2825                          }
2981 2826                  }
2982 2827                  if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2983 2828                          ssize = seg->s_base + seg->s_size - raddr;
2984 2829                  } else {
2985 2830                          ssize = rsize;
2986 2831                  }
2987 2832  
2988 2833  retry:
2989      -                error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc);
     2834 +                error = segop_setpagesize(seg, raddr, ssize, szc);
2990 2835  
2991 2836                  if (error == IE_NOMEM) {
2992 2837                          error = EAGAIN;
2993 2838                          break;
2994 2839                  }
2995 2840  
2996 2841                  if (error == IE_RETRY) {
2997 2842                          AS_LOCK_EXIT(as, &as->a_lock);
2998 2843                          goto setpgsz_top;
2999 2844                  }

3000 2845  
3001 2846                  if (error == ENOTSUP) {
3002 2847                          error = EINVAL;
3003 2848                          break;
3004 2849                  }
3005 2850  
3006 2851                  if (wait && (error == EAGAIN)) {
3007 2852                          /*
3008 2853                           * Memory is currently locked.  It must be unlocked
3009 2854                           * before this operation can succeed through a retry.
3010 2855                           * The possible reasons for locked memory and
3011 2856                           * corresponding strategies for unlocking are:
3012 2857                           * (1) Normal I/O
3013 2858                           *      wait for a signal that the I/O operation
3014 2859                           *      has completed and the memory is unlocked.
3015 2860                           * (2) Asynchronous I/O
3016 2861                           *      The aio subsystem does not unlock pages when
3017 2862                           *      the I/O is completed. Those pages are unlocked
3018 2863                           *      when the application calls aiowait/aioerror.
3019 2864                           *      So, to prevent blocking forever, cv_broadcast()
3020 2865                           *      is done to wake up aio_cleanup_thread.
3021 2866                           *      Subsequently, segvn_reclaim will be called, and
3022 2867                           *      that will do AS_CLRUNMAPWAIT() and wake us up.
3023 2868                           * (3) Long term page locking:
3024 2869                           *      This is not relevant for as_setpagesize()
3025 2870                           *      because we cannot change the page size for
3026 2871                           *      driver memory. The attempt to do so will
3027 2872                           *      fail with a different error than EAGAIN so
3028 2873                           *      there's no need to trigger as callbacks like
3029 2874                           *      as_unmap, as_setprot or as_free would do.
3030 2875                           */
3031 2876                          mutex_enter(&as->a_contents);
3032 2877                          if (!AS_ISNOUNMAPWAIT(as)) {
3033 2878                                  if (AS_ISUNMAPWAIT(as) == 0) {
3034 2879                                          cv_broadcast(&as->a_cv);
3035 2880                                  }
3036 2881                                  AS_SETUNMAPWAIT(as);
3037 2882                                  AS_LOCK_EXIT(as, &as->a_lock);
3038 2883                                  while (AS_ISUNMAPWAIT(as)) {
3039 2884                                          cv_wait(&as->a_cv, &as->a_contents);
3040 2885                                  }
3041 2886                          } else {
3042 2887                                  /*
3043 2888                                   * We may have raced with
3044 2889                                   * segvn_reclaim()/segspt_reclaim(). In this
3045 2890                                   * case clean nounmapwait flag and retry since
3046 2891                                   * softlockcnt in this segment may be already
3047 2892                                   * 0.  We don't drop as writer lock so our
3048 2893                                   * number of retries without sleeping should
3049 2894                                   * be very small. See segvn_reclaim() for
3050 2895                                   * more comments.
3051 2896                                   */
3052 2897                                  AS_CLRNOUNMAPWAIT(as);
3053 2898                                  mutex_exit(&as->a_contents);
3054 2899                                  goto retry;
3055 2900                          }
3056 2901                          mutex_exit(&as->a_contents);
3057 2902                          goto setpgsz_top;

↓ open down ↓

58 lines elided

↑ open up ↑

3058 2903                  } else if (error != 0) {
3059 2904                          break;
3060 2905                  }
3061 2906          }
3062 2907          as_setwatch(as);
3063 2908          AS_LOCK_EXIT(as, &as->a_lock);
3064 2909          return (error);
3065 2910  }
3066 2911  
3067 2912  /*
3068      - * as_iset3_default_lpsize() just calls SEGOP_SETPAGESIZE() on all segments
     2913 + * as_iset3_default_lpsize() just calls segop_setpagesize() on all segments
3069 2914   * in its chunk where s_szc is less than the szc we want to set.
3070 2915   */
3071 2916  static int
3072 2917  as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
3073 2918      int *retry)
3074 2919  {
3075 2920          struct seg *seg;
3076 2921          size_t ssize;
3077 2922          int error;
3078 2923

3079 2924          ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3080 2925  
3081 2926          seg = as_segat(as, raddr);
3082 2927          if (seg == NULL) {
3083 2928                  panic("as_iset3_default_lpsize: no seg");
3084 2929          }
3085 2930  
3086 2931          for (; rsize != 0; rsize -= ssize, raddr += ssize) {
3087 2932                  if (raddr >= seg->s_base + seg->s_size) {
3088 2933                          seg = AS_SEGNEXT(as, seg);
3089 2934                          if (seg == NULL || raddr != seg->s_base) {

↓ open down ↓

11 lines elided

↑ open up ↑

3090 2935                                  panic("as_iset3_default_lpsize: as changed");
3091 2936                          }
3092 2937                  }
3093 2938                  if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
3094 2939                          ssize = seg->s_base + seg->s_size - raddr;
3095 2940                  } else {
3096 2941                          ssize = rsize;
3097 2942                  }
3098 2943  
3099 2944                  if (szc > seg->s_szc) {
3100      -                        error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc);
     2945 +                        error = segop_setpagesize(seg, raddr, ssize, szc);
3101 2946                          /* Only retry on EINVAL segments that have no vnode. */
3102 2947                          if (error == EINVAL) {
3103 2948                                  vnode_t *vp = NULL;
3104      -                                if ((SEGOP_GETTYPE(seg, raddr) & MAP_SHARED) &&
3105      -                                    (SEGOP_GETVP(seg, raddr, &vp) != 0 ||
     2949 +                                if ((segop_gettype(seg, raddr) & MAP_SHARED) &&
     2950 +                                    (segop_getvp(seg, raddr, &vp) != 0 ||
3106 2951                                      vp == NULL)) {
3107 2952                                          *retry = 1;
3108 2953                                  } else {
3109 2954                                          *retry = 0;
3110 2955                                  }
3111 2956                          }
3112 2957                          if (error) {
3113 2958                                  return (error);
3114 2959                          }
3115 2960                  }

3116 2961          }
3117 2962          return (0);
3118 2963  }
3119 2964  
3120 2965  /*
3121 2966   * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the
3122 2967   * pagesize on each segment in its range, but if any fails with EINVAL,
3123 2968   * then it reduces the pagesizes to the next size in the bitmap and
3124 2969   * retries as_iset3_default_lpsize(). The reason why the code retries
3125 2970   * smaller allowed sizes on EINVAL is because (a) the anon offset may not
3126 2971   * match the bigger sizes, and (b) it's hard to get this offset (to begin
3127 2972   * with) to pass to map_pgszcvec().
3128 2973   */
3129 2974  static int
3130 2975  as_iset2_default_lpsize(struct as *as, caddr_t addr, size_t size, uint_t szc,
3131 2976      uint_t szcvec)
3132 2977  {
3133 2978          int error;
3134 2979          int retry;
3135 2980  
3136 2981          ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3137 2982  
3138 2983          for (;;) {
3139 2984                  error = as_iset3_default_lpsize(as, addr, size, szc, &retry);
3140 2985                  if (error == EINVAL && retry) {
3141 2986                          szcvec &= ~(1 << szc);
3142 2987                          if (szcvec <= 1) {
3143 2988                                  return (EINVAL);
3144 2989                          }
3145 2990                          szc = highbit(szcvec) - 1;
3146 2991                  } else {
3147 2992                          return (error);
3148 2993                  }
3149 2994          }
3150 2995  }
3151 2996  
3152 2997  /*
3153 2998   * as_iset1_default_lpsize() breaks its chunk into areas where existing
3154 2999   * segments have a smaller szc than we want to set. For each such area,
3155 3000   * it calls as_iset2_default_lpsize()
3156 3001   */
3157 3002  static int
3158 3003  as_iset1_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
3159 3004      uint_t szcvec)
3160 3005  {
3161 3006          struct seg *seg;
3162 3007          size_t ssize;
3163 3008          caddr_t setaddr = raddr;
3164 3009          size_t setsize = 0;
3165 3010          int set;
3166 3011          int error;
3167 3012  
3168 3013          ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3169 3014  
3170 3015          seg = as_segat(as, raddr);
3171 3016          if (seg == NULL) {
3172 3017                  panic("as_iset1_default_lpsize: no seg");
3173 3018          }
3174 3019          if (seg->s_szc < szc) {
3175 3020                  set = 1;
3176 3021          } else {
3177 3022                  set = 0;
3178 3023          }
3179 3024  
3180 3025          for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3181 3026                  if (raddr >= seg->s_base + seg->s_size) {
3182 3027                          seg = AS_SEGNEXT(as, seg);
3183 3028                          if (seg == NULL || raddr != seg->s_base) {
3184 3029                                  panic("as_iset1_default_lpsize: as changed");
3185 3030                          }
3186 3031                          if (seg->s_szc >= szc && set) {
3187 3032                                  ASSERT(setsize != 0);
3188 3033                                  error = as_iset2_default_lpsize(as,
3189 3034                                      setaddr, setsize, szc, szcvec);
3190 3035                                  if (error) {
3191 3036                                          return (error);
3192 3037                                  }
3193 3038                                  set = 0;
3194 3039                          } else if (seg->s_szc < szc && !set) {
3195 3040                                  setaddr = raddr;
3196 3041                                  setsize = 0;
3197 3042                                  set = 1;
3198 3043                          }
3199 3044                  }
3200 3045                  if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
3201 3046                          ssize = seg->s_base + seg->s_size - raddr;
3202 3047                  } else {
3203 3048                          ssize = rsize;
3204 3049                  }
3205 3050          }
3206 3051          error = 0;
3207 3052          if (set) {
3208 3053                  ASSERT(setsize != 0);
3209 3054                  error = as_iset2_default_lpsize(as, setaddr, setsize,
3210 3055                      szc, szcvec);
3211 3056          }
3212 3057          return (error);
3213 3058  }
3214 3059  
3215 3060  /*
3216 3061   * as_iset_default_lpsize() breaks its chunk according to the size code bitmap
3217 3062   * returned by map_pgszcvec() (similar to as_map_segvn_segs()), and passes each
3218 3063   * chunk to as_iset1_default_lpsize().
3219 3064   */
3220 3065  static int
3221 3066  as_iset_default_lpsize(struct as *as, caddr_t addr, size_t size, int flags,
3222 3067      int type)
3223 3068  {
3224 3069          int rtype = (type & MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM;
3225 3070          uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr,
3226 3071              flags, rtype, 1);
3227 3072          uint_t szc;
3228 3073          uint_t nszc;
3229 3074          int error;
3230 3075          caddr_t a;
3231 3076          caddr_t eaddr;
3232 3077          size_t segsize;
3233 3078          size_t pgsz;
3234 3079          uint_t save_szcvec;
3235 3080  
3236 3081          ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3237 3082          ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
3238 3083          ASSERT(IS_P2ALIGNED(size, PAGESIZE));
3239 3084  
3240 3085          szcvec &= ~1;
3241 3086          if (szcvec <= 1) {      /* skip if base page size */
3242 3087                  return (0);
3243 3088          }
3244 3089  
3245 3090          /* Get the pagesize of the first larger page size. */
3246 3091          szc = lowbit(szcvec) - 1;
3247 3092          pgsz = page_get_pagesize(szc);
3248 3093          eaddr = addr + size;
3249 3094          addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
3250 3095          eaddr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
3251 3096  
3252 3097          save_szcvec = szcvec;
3253 3098          szcvec >>= (szc + 1);
3254 3099          nszc = szc;
3255 3100          while (szcvec) {
3256 3101                  if ((szcvec & 0x1) == 0) {
3257 3102                          nszc++;
3258 3103                          szcvec >>= 1;
3259 3104                          continue;
3260 3105                  }
3261 3106                  nszc++;
3262 3107                  pgsz = page_get_pagesize(nszc);
3263 3108                  a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
3264 3109                  if (a != addr) {
3265 3110                          ASSERT(szc > 0);
3266 3111                          ASSERT(a < eaddr);
3267 3112                          segsize = a - addr;
3268 3113                          error = as_iset1_default_lpsize(as, addr, segsize, szc,
3269 3114                              save_szcvec);
3270 3115                          if (error) {
3271 3116                                  return (error);
3272 3117                          }
3273 3118                          addr = a;
3274 3119                  }
3275 3120                  szc = nszc;
3276 3121                  szcvec >>= 1;
3277 3122          }
3278 3123  
3279 3124          ASSERT(addr < eaddr);
3280 3125          szcvec = save_szcvec;
3281 3126          while (szcvec) {
3282 3127                  a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
3283 3128                  ASSERT(a >= addr);
3284 3129                  if (a != addr) {
3285 3130                          ASSERT(szc > 0);
3286 3131                          segsize = a - addr;
3287 3132                          error = as_iset1_default_lpsize(as, addr, segsize, szc,
3288 3133                              save_szcvec);
3289 3134                          if (error) {
3290 3135                                  return (error);
3291 3136                          }
3292 3137                          addr = a;
3293 3138                  }
3294 3139                  szcvec &= ~(1 << szc);
3295 3140                  if (szcvec) {
3296 3141                          szc = highbit(szcvec) - 1;
3297 3142                          pgsz = page_get_pagesize(szc);
3298 3143                  }
3299 3144          }
3300 3145          ASSERT(addr == eaddr);
3301 3146  
3302 3147          return (0);
3303 3148  }
3304 3149  
3305 3150  /*
3306 3151   * Set the default large page size for the range. Called via memcntl with
3307 3152   * page size set to 0. as_set_default_lpsize breaks the range down into
3308 3153   * chunks with the same type/flags, ignores-non segvn segments, and passes
3309 3154   * each chunk to as_iset_default_lpsize().
3310 3155   */
3311 3156  int
3312 3157  as_set_default_lpsize(struct as *as, caddr_t addr, size_t size)
3313 3158  {
3314 3159          struct seg *seg;
3315 3160          caddr_t raddr;
3316 3161          size_t rsize;
3317 3162          size_t ssize;
3318 3163          int rtype, rflags;
3319 3164          int stype, sflags;
3320 3165          int error;
3321 3166          caddr_t setaddr;
3322 3167          size_t setsize;
3323 3168          int segvn;
3324 3169  
3325 3170          if (size == 0)
3326 3171                  return (0);
3327 3172  
3328 3173          AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3329 3174  again:
3330 3175          error = 0;
3331 3176  
3332 3177          raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3333 3178          rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
3334 3179              (size_t)raddr;
3335 3180  
3336 3181          if (raddr + rsize < raddr) {            /* check for wraparound */
3337 3182                  AS_LOCK_EXIT(as, &as->a_lock);

↓ open down ↓

222 lines elided

↑ open up ↑

3338 3183                  return (ENOMEM);
3339 3184          }
3340 3185          as_clearwatchprot(as, raddr, rsize);
3341 3186          seg = as_segat(as, raddr);
3342 3187          if (seg == NULL) {
3343 3188                  as_setwatch(as);
3344 3189                  AS_LOCK_EXIT(as, &as->a_lock);
3345 3190                  return (ENOMEM);
3346 3191          }
3347 3192          if (seg->s_ops == &segvn_ops) {
3348      -                rtype = SEGOP_GETTYPE(seg, addr);
     3193 +                rtype = segop_gettype(seg, addr);
3349 3194                  rflags = rtype & (MAP_TEXT | MAP_INITDATA);
3350 3195                  rtype = rtype & (MAP_SHARED | MAP_PRIVATE);
3351 3196                  segvn = 1;
3352 3197          } else {
3353 3198                  segvn = 0;
3354 3199          }
3355 3200          setaddr = raddr;
3356 3201          setsize = 0;
3357 3202  
3358 3203          for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3359 3204                  if (raddr >= (seg->s_base + seg->s_size)) {
3360 3205                          seg = AS_SEGNEXT(as, seg);
3361 3206                          if (seg == NULL || raddr != seg->s_base) {
3362 3207                                  error = ENOMEM;
3363 3208                                  break;
3364 3209                          }
3365 3210                          if (seg->s_ops == &segvn_ops) {
3366      -                                stype = SEGOP_GETTYPE(seg, raddr);
     3211 +                                stype = segop_gettype(seg, raddr);
3367 3212                                  sflags = stype & (MAP_TEXT | MAP_INITDATA);
3368 3213                                  stype &= (MAP_SHARED | MAP_PRIVATE);
3369 3214                                  if (segvn && (rflags != sflags ||
3370 3215                                      rtype != stype)) {
3371 3216                                          /*
3372 3217                                           * The next segment is also segvn but
3373 3218                                           * has different flags and/or type.
3374 3219                                           */
3375 3220                                          ASSERT(setsize != 0);
3376 3221                                          error = as_iset_default_lpsize(as,

3377 3222                                              setaddr, setsize, rflags, rtype);
3378 3223                                          if (error) {
3379 3224                                                  break;
3380 3225                                          }
3381 3226                                          rflags = sflags;
3382 3227                                          rtype = stype;
3383 3228                                          setaddr = raddr;
3384 3229                                          setsize = 0;
3385 3230                                  } else if (!segvn) {
3386 3231                                          rflags = sflags;
3387 3232                                          rtype = stype;
3388 3233                                          setaddr = raddr;
3389 3234                                          setsize = 0;
3390 3235                                          segvn = 1;
3391 3236                                  }
3392 3237                          } else if (segvn) {
3393 3238                                  /* The next segment is not segvn. */
3394 3239                                  ASSERT(setsize != 0);
3395 3240                                  error = as_iset_default_lpsize(as,
3396 3241                                      setaddr, setsize, rflags, rtype);
3397 3242                                  if (error) {
3398 3243                                          break;
3399 3244                                  }
3400 3245                                  segvn = 0;
3401 3246                          }
3402 3247                  }
3403 3248                  if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
3404 3249                          ssize = seg->s_base + seg->s_size - raddr;
3405 3250                  } else {
3406 3251                          ssize = rsize;
3407 3252                  }
3408 3253          }
3409 3254          if (error == 0 && segvn) {
3410 3255                  /* The last chunk when rsize == 0. */
3411 3256                  ASSERT(setsize != 0);
3412 3257                  error = as_iset_default_lpsize(as, setaddr, setsize,
3413 3258                      rflags, rtype);
3414 3259          }
3415 3260  
3416 3261          if (error == IE_RETRY) {
3417 3262                  goto again;
3418 3263          } else if (error == IE_NOMEM) {
3419 3264                  error = EAGAIN;
3420 3265          } else if (error == ENOTSUP) {
3421 3266                  error = EINVAL;
3422 3267          } else if (error == EAGAIN) {
3423 3268                  mutex_enter(&as->a_contents);
3424 3269                  if (!AS_ISNOUNMAPWAIT(as)) {
3425 3270                          if (AS_ISUNMAPWAIT(as) == 0) {
3426 3271                                  cv_broadcast(&as->a_cv);
3427 3272                          }
3428 3273                          AS_SETUNMAPWAIT(as);
3429 3274                          AS_LOCK_EXIT(as, &as->a_lock);
3430 3275                          while (AS_ISUNMAPWAIT(as)) {
3431 3276                                  cv_wait(&as->a_cv, &as->a_contents);
3432 3277                          }
3433 3278                          mutex_exit(&as->a_contents);
3434 3279                          AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3435 3280                  } else {
3436 3281                          /*
3437 3282                           * We may have raced with
3438 3283                           * segvn_reclaim()/segspt_reclaim(). In this case
3439 3284                           * clean nounmapwait flag and retry since softlockcnt
3440 3285                           * in this segment may be already 0.  We don't drop as
3441 3286                           * writer lock so our number of retries without
3442 3287                           * sleeping should be very small. See segvn_reclaim()
3443 3288                           * for more comments.
3444 3289                           */
3445 3290                          AS_CLRNOUNMAPWAIT(as);
3446 3291                          mutex_exit(&as->a_contents);
3447 3292                  }
3448 3293                  goto again;
3449 3294          }
3450 3295  
3451 3296          as_setwatch(as);
3452 3297          AS_LOCK_EXIT(as, &as->a_lock);
3453 3298          return (error);
3454 3299  }
3455 3300  
3456 3301  /*
3457 3302   * Setup all of the uninitialized watched pages that we can.
3458 3303   */
3459 3304  void
3460 3305  as_setwatch(struct as *as)
3461 3306  {
3462 3307          struct watched_page *pwp;
3463 3308          struct seg *seg;
3464 3309          caddr_t vaddr;
3465 3310          uint_t prot;
3466 3311          int  err, retrycnt;
3467 3312  
3468 3313          if (avl_numnodes(&as->a_wpage) == 0)
3469 3314                  return;

↓ open down ↓

93 lines elided

↑ open up ↑

3470 3315  
3471 3316          ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3472 3317  
3473 3318          for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3474 3319              pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3475 3320                  retrycnt = 0;
3476 3321          retry:
3477 3322                  vaddr = pwp->wp_vaddr;
3478 3323                  if (pwp->wp_oprot != 0 ||       /* already set up */
3479 3324                      (seg = as_segat(as, vaddr)) == NULL ||
3480      -                    SEGOP_GETPROT(seg, vaddr, 0, &prot) != 0)
     3325 +                    segop_getprot(seg, vaddr, 0, &prot) != 0)
3481 3326                          continue;
3482 3327  
3483 3328                  pwp->wp_oprot = prot;
3484 3329                  if (pwp->wp_read)
3485 3330                          prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3486 3331                  if (pwp->wp_write)
3487 3332                          prot &= ~PROT_WRITE;
3488 3333                  if (pwp->wp_exec)
3489 3334                          prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3490 3335                  if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) {
3491      -                        err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
     3336 +                        err = segop_setprot(seg, vaddr, PAGESIZE, prot);
3492 3337                          if (err == IE_RETRY) {
3493 3338                                  pwp->wp_oprot = 0;
3494 3339                                  ASSERT(retrycnt == 0);
3495 3340                                  retrycnt++;
3496 3341                                  goto retry;
3497 3342                          }
3498 3343                  }
3499 3344                  pwp->wp_prot = prot;
3500 3345          }
3501 3346  }

3502 3347  
3503 3348  /*
3504 3349   * Clear all of the watched pages in the address space.
3505 3350   */
3506 3351  void
3507 3352  as_clearwatch(struct as *as)
3508 3353  {
3509 3354          struct watched_page *pwp;
3510 3355          struct seg *seg;
3511 3356          caddr_t vaddr;
3512 3357          uint_t prot;
3513 3358          int err, retrycnt;
3514 3359  
3515 3360          if (avl_numnodes(&as->a_wpage) == 0)
3516 3361                  return;
3517 3362  
3518 3363          ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3519 3364

↓ open down ↓

18 lines elided

↑ open up ↑

3520 3365          for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3521 3366              pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3522 3367                  retrycnt = 0;
3523 3368          retry:
3524 3369                  vaddr = pwp->wp_vaddr;
3525 3370                  if (pwp->wp_oprot == 0 ||       /* not set up */
3526 3371                      (seg = as_segat(as, vaddr)) == NULL)
3527 3372                          continue;
3528 3373  
3529 3374                  if ((prot = pwp->wp_oprot) != pwp->wp_prot) {
3530      -                        err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
     3375 +                        err = segop_setprot(seg, vaddr, PAGESIZE, prot);
3531 3376                          if (err == IE_RETRY) {
3532 3377                                  ASSERT(retrycnt == 0);
3533 3378                                  retrycnt++;
3534 3379                                  goto retry;
3535 3380                          }
3536 3381                  }
3537 3382                  pwp->wp_oprot = 0;
3538 3383                  pwp->wp_prot = 0;
3539 3384          }
3540 3385  }

3541 3386  
3542 3387  /*
3543 3388   * Force a new setup for all the watched pages in the range.
3544 3389   */
3545 3390  static void
3546 3391  as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
3547 3392  {
3548 3393          struct watched_page *pwp;
3549 3394          struct watched_page tpw;
3550 3395          caddr_t eaddr = addr + size;
3551 3396          caddr_t vaddr;
3552 3397          struct seg *seg;
3553 3398          int err, retrycnt;
3554 3399          uint_t  wprot;
3555 3400          avl_index_t where;
3556 3401  
3557 3402          if (avl_numnodes(&as->a_wpage) == 0)
3558 3403                  return;
3559 3404  
3560 3405          ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3561 3406  
3562 3407          tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3563 3408          if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3564 3409                  pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3565 3410  
3566 3411          while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3567 3412                  retrycnt = 0;
3568 3413                  vaddr = pwp->wp_vaddr;
3569 3414  
3570 3415                  wprot = prot;
3571 3416                  if (pwp->wp_read)
3572 3417                          wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3573 3418                  if (pwp->wp_write)

↓ open down ↓

33 lines elided

↑ open up ↑

3574 3419                          wprot &= ~PROT_WRITE;
3575 3420                  if (pwp->wp_exec)
3576 3421                          wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3577 3422                  if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) {
3578 3423                  retry:
3579 3424                          seg = as_segat(as, vaddr);
3580 3425                          if (seg == NULL) {
3581 3426                                  panic("as_setwatchprot: no seg");
3582 3427                                  /*NOTREACHED*/
3583 3428                          }
3584      -                        err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, wprot);
     3429 +                        err = segop_setprot(seg, vaddr, PAGESIZE, wprot);
3585 3430                          if (err == IE_RETRY) {
3586 3431                                  ASSERT(retrycnt == 0);
3587 3432                                  retrycnt++;
3588 3433                                  goto retry;
3589 3434                          }
3590 3435                  }
3591 3436                  pwp->wp_oprot = prot;
3592 3437                  pwp->wp_prot = wprot;
3593 3438  
3594 3439                  pwp = AVL_NEXT(&as->a_wpage, pwp);

3595 3440          }
3596 3441  }
3597 3442  
3598 3443  /*
3599 3444   * Clear all of the watched pages in the range.
3600 3445   */
3601 3446  static void
3602 3447  as_clearwatchprot(struct as *as, caddr_t addr, size_t size)
3603 3448  {
3604 3449          caddr_t eaddr = addr + size;
3605 3450          struct watched_page *pwp;
3606 3451          struct watched_page tpw;
3607 3452          uint_t prot;
3608 3453          struct seg *seg;
3609 3454          int err, retrycnt;
3610 3455          avl_index_t where;
3611 3456  
3612 3457          if (avl_numnodes(&as->a_wpage) == 0)
3613 3458                  return;
3614 3459  
3615 3460          tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3616 3461          if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3617 3462                  pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3618 3463  
3619 3464          ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3620 3465

↓ open down ↓

26 lines elided

↑ open up ↑

3621 3466          while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3622 3467  
3623 3468                  if ((prot = pwp->wp_oprot) != 0) {
3624 3469                          retrycnt = 0;
3625 3470  
3626 3471                          if (prot != pwp->wp_prot) {
3627 3472                          retry:
3628 3473                                  seg = as_segat(as, pwp->wp_vaddr);
3629 3474                                  if (seg == NULL)
3630 3475                                          continue;
3631      -                                err = SEGOP_SETPROT(seg, pwp->wp_vaddr,
     3476 +                                err = segop_setprot(seg, pwp->wp_vaddr,
3632 3477                                      PAGESIZE, prot);
3633 3478                                  if (err == IE_RETRY) {
3634 3479                                          ASSERT(retrycnt == 0);
3635 3480                                          retrycnt++;
3636 3481                                          goto retry;
3637 3482  
3638 3483                                  }
3639 3484                          }
3640 3485                          pwp->wp_oprot = 0;
3641 3486                          pwp->wp_prot = 0;

3642 3487                  }
3643 3488  
3644 3489                  pwp = AVL_NEXT(&as->a_wpage, pwp);
3645 3490          }
3646 3491  }
3647 3492  
3648 3493  void
3649 3494  as_signal_proc(struct as *as, k_siginfo_t *siginfo)
3650 3495  {
3651 3496          struct proc *p;
3652 3497  
3653 3498          mutex_enter(&pidlock);
3654 3499          for (p = practive; p; p = p->p_next) {
3655 3500                  if (p->p_as == as) {
3656 3501                          mutex_enter(&p->p_lock);
3657 3502                          if (p->p_as == as)
3658 3503                                  sigaddq(p, NULL, siginfo, KM_NOSLEEP);
3659 3504                          mutex_exit(&p->p_lock);
3660 3505                  }
3661 3506          }
3662 3507          mutex_exit(&pidlock);
3663 3508  }
3664 3509  
3665 3510  /*
3666 3511   * return memory object ID
3667 3512   */
3668 3513  int
3669 3514  as_getmemid(struct as *as, caddr_t addr, memid_t *memidp)

↓ open down ↓

28 lines elided

↑ open up ↑

3670 3515  {
3671 3516          struct seg      *seg;
3672 3517          int             sts;
3673 3518  
3674 3519          AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
3675 3520          seg = as_segat(as, addr);
3676 3521          if (seg == NULL) {
3677 3522                  AS_LOCK_EXIT(as, &as->a_lock);
3678 3523                  return (EFAULT);
3679 3524          }
3680      -        /*
3681      -         * catch old drivers which may not support getmemid
3682      -         */
3683      -        if (seg->s_ops->getmemid == NULL) {
3684      -                AS_LOCK_EXIT(as, &as->a_lock);
3685      -                return (ENODEV);
3686      -        }
3687 3525  
3688      -        sts = SEGOP_GETMEMID(seg, addr, memidp);
     3526 +        sts = segop_getmemid(seg, addr, memidp);
3689 3527  
3690 3528          AS_LOCK_EXIT(as, &as->a_lock);
3691 3529          return (sts);
3692 3530  }

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX