1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright (c) 2015, Joyent, Inc.
  25  */
  26 
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  28 /*        All Rights Reserved   */
  29 
  30 /*
  31  * University Copyright- Copyright (c) 1982, 1986, 1988
  32  * The Regents of the University of California
  33  * All Rights Reserved
  34  *
  35  * University Acknowledgment- Portions of this document are derived from
  36  * software developed by the University of California, Berkeley, and its
  37  * contributors.
  38  */
  39 
  40 #ifndef _VM_SEG_H
  41 #define _VM_SEG_H
  42 
  43 #include <sys/vnode.h>
  44 #include <sys/avl.h>
  45 #include <vm/seg_enum.h>
  46 #include <vm/faultcode.h>
  47 #include <vm/hat.h>
  48 
  49 #ifdef  __cplusplus
  50 extern "C" {
  51 #endif
  52 
  53 /*
  54  * VM - Segments.
  55  */
  56 
  57 struct anon_map;
  58 
  59 /*
  60  * kstat statistics for segment advise
  61  */
  62 typedef struct {
  63         kstat_named_t MADV_FREE_hit;
  64         kstat_named_t MADV_FREE_miss;
  65 } segadvstat_t;
  66 
  67 /*
  68  * memory object ids
  69  */
  70 typedef struct memid { u_longlong_t val[2]; } memid_t;
  71 
  72 /*
  73  * An address space contains a set of segments, managed by drivers.
  74  * Drivers support mapped devices, sharing, copy-on-write, etc.
  75  *
  76  * The seg structure contains a lock to prevent races, the base virtual
  77  * address and size of the segment, a back pointer to the containing
  78  * address space, pointers to maintain an AVL tree of segments in the
  79  * same address space, and procedure and data hooks for the driver.
  80  * The AVL tree of segments for the address space is sorted by
  81  * ascending base addresses and overlapping segments are not allowed.
  82  *
  83  * After a segment is created, faults may occur on pages of the segment.
  84  * When a fault occurs, the fault handling code must get the desired
  85  * object and set up the hardware translation to the object.  For some
  86  * objects, the fault handling code also implements copy-on-write.
  87  *
  88  * When the hat wants to unload a translation, it can call the unload
  89  * routine which is responsible for processing reference and modify bits.
  90  *
  91  * Each segment is protected by it's containing address space lock.  To
  92  * access any field in the segment structure, the "as" must be locked.
  93  * If a segment field is to be modified, the address space lock must be
  94  * write locked.
  95  */
  96 
  97 typedef struct pcache_link {
  98         struct pcache_link      *p_lnext;
  99         struct pcache_link      *p_lprev;
 100 } pcache_link_t;
 101 
 102 typedef struct seg {
 103         caddr_t s_base;                 /* base virtual address */
 104         size_t  s_size;                 /* size in bytes */
 105         uint_t  s_szc;                  /* max page size code */
 106         uint_t  s_flags;                /* flags for segment, see below */
 107         struct  as *s_as;               /* containing address space */
 108         avl_node_t s_tree;              /* AVL tree links to segs in this as */
 109         struct  seg_ops *s_ops;         /* ops vector: see below */
 110         void *s_data;                   /* private data for instance */
 111         kmutex_t s_pmtx;                /* protects seg's pcache list */
 112         pcache_link_t s_phead;          /* head of seg's pcache list */
 113 } seg_t;
 114 
 115 #define S_PURGE         (0x01)          /* seg should be purged in as_gap() */
 116 
 117 struct  seg_ops {
 118         int     (*dup)(struct seg *, struct seg *);
 119         int     (*unmap)(struct seg *, caddr_t, size_t);
 120         void    (*free)(struct seg *);
 121         faultcode_t (*fault)(struct hat *, struct seg *, caddr_t, size_t,
 122             enum fault_type, enum seg_rw);
 123         faultcode_t (*faulta)(struct seg *, caddr_t);
 124         int     (*setprot)(struct seg *, caddr_t, size_t, uint_t);
 125         int     (*checkprot)(struct seg *, caddr_t, size_t, uint_t);
 126         int     (*kluster)(struct seg *, caddr_t, ssize_t);
 127         size_t  (*swapout)(struct seg *);
 128         int     (*sync)(struct seg *, caddr_t, size_t, int, uint_t);
 129         size_t  (*incore)(struct seg *, caddr_t, size_t, char *);
 130         int     (*lockop)(struct seg *, caddr_t, size_t, int, int, ulong_t *,
 131                         size_t);
 132         int     (*getprot)(struct seg *, caddr_t, size_t, uint_t *);
 133         u_offset_t      (*getoffset)(struct seg *, caddr_t);
 134         int     (*gettype)(struct seg *, caddr_t);
 135         int     (*getvp)(struct seg *, caddr_t, struct vnode **);
 136         int     (*advise)(struct seg *, caddr_t, size_t, uint_t);
 137         void    (*dump)(struct seg *);
 138         int     (*pagelock)(struct seg *, caddr_t, size_t, struct page ***,
 139                         enum lock_type, enum seg_rw);
 140         int     (*setpagesize)(struct seg *, caddr_t, size_t, uint_t);
 141         int     (*getmemid)(struct seg *, caddr_t, memid_t *);
 142         struct lgrp_mem_policy_info     *(*getpolicy)(struct seg *, caddr_t);
 143         int     (*capable)(struct seg *, segcapability_t);
 144         int     (*inherit)(struct seg *, caddr_t, size_t, uint_t);
 145 };
 146 
 147 #ifdef _KERNEL
 148 
 149 /*
 150  * Generic segment operations
 151  */
 152 extern  void    seg_init(void);
 153 extern  struct  seg *seg_alloc(struct as *as, caddr_t base, size_t size);
 154 extern  int     seg_attach(struct as *as, caddr_t base, size_t size,
 155                         struct seg *seg);
 156 extern  void    seg_unmap(struct seg *seg);
 157 extern  void    seg_free(struct seg *seg);
 158 
 159 /*
 160  * functions for pagelock cache support
 161  */
 162 typedef int (*seg_preclaim_cbfunc_t)(void *, caddr_t, size_t,
 163     struct page **, enum seg_rw, int);
 164 
 165 extern  struct  page **seg_plookup(struct seg *seg, struct anon_map *amp,
 166     caddr_t addr, size_t len, enum seg_rw rw, uint_t flags);
 167 extern  void    seg_pinactive(struct seg *seg, struct anon_map *amp,
 168     caddr_t addr, size_t len, struct page **pp, enum seg_rw rw,
 169     uint_t flags, seg_preclaim_cbfunc_t callback);
 170 
 171 extern  void    seg_ppurge(struct seg *seg, struct anon_map *amp,
 172     uint_t flags);
 173 extern  void    seg_ppurge_wiredpp(struct page **pp);
 174 
 175 extern  int     seg_pinsert_check(struct seg *seg, struct anon_map *amp,
 176     caddr_t addr, size_t len, uint_t flags);
 177 extern  int     seg_pinsert(struct seg *seg, struct anon_map *amp,
 178     caddr_t addr, size_t len, size_t wlen, struct page **pp, enum seg_rw rw,
 179     uint_t flags, seg_preclaim_cbfunc_t callback);
 180 
 181 extern  void    seg_pasync_thread(void);
 182 extern  void    seg_preap(void);
 183 extern  int     seg_p_disable(void);
 184 extern  void    seg_p_enable(void);
 185 
 186 extern  segadvstat_t    segadvstat;
 187 
 188 /*
 189  * Flags for pagelock cache support.
 190  * Flags argument is passed as uint_t to pcache routines.  upper 16 bits of
 191  * the flags argument are reserved for alignment page shift when SEGP_PSHIFT
 192  * is set.
 193  */
 194 #define SEGP_FORCE_WIRED        0x1     /* skip check against seg_pwindow */
 195 #define SEGP_AMP                0x2     /* anon map's pcache entry */
 196 #define SEGP_PSHIFT             0x4     /* addr pgsz shift for hash function */
 197 
 198 /*
 199  * Return values for seg_pinsert and seg_pinsert_check functions.
 200  */
 201 #define SEGP_SUCCESS            0       /* seg_pinsert() succeeded */
 202 #define SEGP_FAIL               1       /* seg_pinsert() failed */
 203 
 204 /* Page status bits for segop_incore */
 205 #define SEG_PAGE_INCORE         0x01    /* VA has a page backing it */
 206 #define SEG_PAGE_LOCKED         0x02    /* VA has a page that is locked */
 207 #define SEG_PAGE_HASCOW         0x04    /* VA has a page with a copy-on-write */
 208 #define SEG_PAGE_SOFTLOCK       0x08    /* VA has a page with softlock held */
 209 #define SEG_PAGE_VNODEBACKED    0x10    /* Segment is backed by a vnode */
 210 #define SEG_PAGE_ANON           0x20    /* VA has an anonymous page */
 211 #define SEG_PAGE_VNODE          0x40    /* VA has a vnode page backing it */
 212 
 213 #define SEGOP_DUP(s, n)             (*(s)->s_ops->dup)((s), (n))
 214 #define SEGOP_UNMAP(s, a, l)        (*(s)->s_ops->unmap)((s), (a), (l))
 215 #define SEGOP_FREE(s)               (*(s)->s_ops->free)((s))
 216 #define SEGOP_FAULT(h, s, a, l, t, rw) \
 217                 (*(s)->s_ops->fault)((h), (s), (a), (l), (t), (rw))
 218 #define SEGOP_FAULTA(s, a)          (*(s)->s_ops->faulta)((s), (a))
 219 #define SEGOP_SETPROT(s, a, l, p)   (*(s)->s_ops->setprot)((s), (a), (l), (p))
 220 #define SEGOP_CHECKPROT(s, a, l, p) (*(s)->s_ops->checkprot)((s), (a), (l), (p))
 221 #define SEGOP_KLUSTER(s, a, d)      (*(s)->s_ops->kluster)((s), (a), (d))
 222 #define SEGOP_SWAPOUT(s)            (*(s)->s_ops->swapout)((s))
 223 #define SEGOP_SYNC(s, a, l, atr, f) \
 224                 (*(s)->s_ops->sync)((s), (a), (l), (atr), (f))
 225 #define SEGOP_INCORE(s, a, l, v)    (*(s)->s_ops->incore)((s), (a), (l), (v))
 226 #define SEGOP_LOCKOP(s, a, l, atr, op, b, p) \
 227                 (*(s)->s_ops->lockop)((s), (a), (l), (atr), (op), (b), (p))
 228 #define SEGOP_GETPROT(s, a, l, p)   (*(s)->s_ops->getprot)((s), (a), (l), (p))
 229 #define SEGOP_GETOFFSET(s, a)       (*(s)->s_ops->getoffset)((s), (a))
 230 #define SEGOP_GETTYPE(s, a)         (*(s)->s_ops->gettype)((s), (a))
 231 #define SEGOP_GETVP(s, a, vpp)      (*(s)->s_ops->getvp)((s), (a), (vpp))
 232 #define SEGOP_ADVISE(s, a, l, b)    (*(s)->s_ops->advise)((s), (a), (l), (b))
 233 #define SEGOP_DUMP(s)               (*(s)->s_ops->dump)((s))
 234 #define SEGOP_PAGELOCK(s, a, l, p, t, rw) \
 235                 (*(s)->s_ops->pagelock)((s), (a), (l), (p), (t), (rw))
 236 #define SEGOP_SETPAGESIZE(s, a, l, szc) \
 237                 (*(s)->s_ops->setpagesize)((s), (a), (l), (szc))
 238 #define SEGOP_GETMEMID(s, a, mp)    (*(s)->s_ops->getmemid)((s), (a), (mp))
 239 #define SEGOP_GETPOLICY(s, a)       (*(s)->s_ops->getpolicy)((s), (a))
 240 #define SEGOP_CAPABLE(s, c)         (*(s)->s_ops->capable)((s), (c))
 241 #define SEGOP_INHERIT(s, a, l, b)   (*(s)->s_ops->inherit)((s), (a), (l), (b))
 242 
 243 #define seg_page(seg, addr) \
 244         (((uintptr_t)((addr) - (seg)->s_base)) >> PAGESHIFT)
 245 
 246 #define seg_pages(seg) \
 247         (((uintptr_t)((seg)->s_size + PAGEOFFSET)) >> PAGESHIFT)
 248 
 249 #define IE_NOMEM        -1      /* internal to seg layer */
 250 #define IE_RETRY        -2      /* internal to seg layer */
 251 #define IE_REATTACH     -3      /* internal to seg layer */
 252 
 253 /* Values for SEGOP_INHERIT */
 254 #define SEGP_INH_ZERO   0x01
 255 
 256 int seg_inherit_notsup(struct seg *, caddr_t, size_t, uint_t);
 257 
 258 /* Delay/retry factors for seg_p_mem_config_pre_del */
 259 #define SEGP_PREDEL_DELAY_FACTOR        4
 260 /*
 261  * As a workaround to being unable to purge the pagelock
 262  * cache during a DR delete memory operation, we use
 263  * a stall threshold that is twice the maximum seen
 264  * during testing.  This workaround will be removed
 265  * when a suitable fix is found.
 266  */
 267 #define SEGP_STALL_SECONDS      25
 268 #define SEGP_STALL_THRESHOLD \
 269         (SEGP_STALL_SECONDS * SEGP_PREDEL_DELAY_FACTOR)
 270 
 271 #ifdef VMDEBUG
 272 
 273 uint_t  seg_page(struct seg *, caddr_t);
 274 uint_t  seg_pages(struct seg *);
 275 
 276 #endif  /* VMDEBUG */
 277 
 278 boolean_t       seg_can_change_zones(struct seg *);
 279 size_t          seg_swresv(struct seg *);
 280 
 281 #endif  /* _KERNEL */
 282 
 283 #ifdef  __cplusplus
 284 }
 285 #endif
 286 
 287 #endif  /* _VM_SEG_H */