1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #ifndef _SYS_ROOTNEX_H
  26 #define _SYS_ROOTNEX_H
  27 
  28 /*
  29  * x86 root nexus implementation specific state
  30  */
  31 
  32 #include <sys/types.h>
  33 #include <sys/conf.h>
  34 #include <sys/modctl.h>
  35 #include <sys/sunddi.h>
  36 #include <sys/iommulib.h>
  37 #include <sys/sdt.h>
  38 
  39 #ifdef  __cplusplus
  40 extern "C" {
  41 #endif
  42 
  43 
  44 /* size of buffer used for ctlop reportdev */
  45 #define REPORTDEV_BUFSIZE       1024
  46 
  47 /* min and max interrupt vectors */
  48 #define VEC_MIN                 1
  49 #define VEC_MAX                 255
  50 
  51 /* atomic increment/decrement to keep track of outstanding binds, etc */
  52 #ifdef DEBUG
  53 #define ROOTNEX_DPROF_INC(addr)         atomic_inc_64(addr)
  54 #define ROOTNEX_DPROF_DEC(addr)         atomic_add_64(addr, -1)
  55 #define ROOTNEX_DPROBE1(name, type1, arg1) \
  56         DTRACE_PROBE1(name, type1, arg1)
  57 #define ROOTNEX_DPROBE2(name, type1, arg1, type2, arg2) \
  58         DTRACE_PROBE2(name, type1, arg1, type2, arg2)
  59 #define ROOTNEX_DPROBE3(name, type1, arg1, type2, arg2, type3, arg3) \
  60         DTRACE_PROBE3(name, type1, arg1, type2, arg2, type3, arg3)
  61 #define ROOTNEX_DPROBE4(name, type1, arg1, type2, arg2, type3, arg3, \
  62     type4, arg4) \
  63         DTRACE_PROBE4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4)
  64 #else
  65 #define ROOTNEX_DPROF_INC(addr)
  66 #define ROOTNEX_DPROF_DEC(addr)
  67 #define ROOTNEX_DPROBE1(name, type1, arg1)
  68 #define ROOTNEX_DPROBE2(name, type1, arg1, type2, arg2)
  69 #define ROOTNEX_DPROBE3(name, type1, arg1, type2, arg2, type3, arg3)
  70 #define ROOTNEX_DPROBE4(name, type1, arg1, type2, arg2, type3, arg3, \
  71     type4, arg4)
  72 #endif
  73 
  74 /* set in dmac_type to signify that this cookie uses the copy buffer */
  75 #define ROOTNEX_USES_COPYBUF            0x80000000
  76 
  77 /*
  78  * integer or boolean property name and value. A few static rootnex properties
  79  * are created during rootnex attach from an array of rootnex_intprop_t..
  80  */
  81 typedef struct rootnex_intprop_s {
  82         char    *prop_name;
  83         int     prop_value;
  84 } rootnex_intprop_t;
  85 
  86 /*
  87  * sgl related information which is visible to rootnex_get_sgl(). Trying to
  88  * isolate get_sgl() as much as possible so it can be easily replaced.
  89  */
  90 typedef struct rootnex_sglinfo_s {
  91         /*
  92          * Used to simplify calculations to get the maximum number
  93          * of cookies.
  94          */
  95         boolean_t       si_cancross;
  96 
  97         /*
  98          * These are passed into rootnex_get_sgl().
  99          *
 100          * si_min_addr - the minimum physical address
 101          * si_max_addr - the maximum physical address
 102          * si_max_cookie_size - the maximum size of a physically contiguous
 103          *    piece of memory that we can handle in a sgl.
 104          * si_segmask - segment mask to determine if we cross a segment boundary
 105          * si_flags - dma_attr_flags
 106          * si_max_pages - max number of pages this sgl could occupy (which
 107          *    is also the maximum number of cookies we might see.
 108          */
 109         uint64_t        si_min_addr;
 110         uint64_t        si_max_addr;
 111         uint64_t        si_max_cookie_size;
 112         uint64_t        si_segmask;
 113         uint_t          si_flags;
 114         uint_t          si_max_pages;
 115 
 116         /*
 117          * these are returned by rootnex_get_sgl()
 118          *
 119          * si_bounce_on_seg - if we need to use bounce buffer for pages above
 120          *    ddi_dma_seg
 121          * si_copybuf_req - amount of copy buffer needed by the buffer.
 122          * si_buf_offset - The initial offset into the first page of the buffer.
 123          *    It's set in get sgl and used in the bind slow path to help
 124          *    calculate the current page index & offset from the current offset
 125          *    which is relative to the start of the buffer.
 126          * si_asp - address space of buffer passed in.
 127          * si_sgl_size - The actual number of cookies in the sgl. This does
 128          *    not reflect and sharing that we might do on window boundaries.
 129          */
 130         boolean_t       si_bounce_on_seg;
 131         size_t          si_copybuf_req;
 132         off_t           si_buf_offset;
 133         struct as       *si_asp;
 134         uint_t          si_sgl_size;
 135 } rootnex_sglinfo_t;
 136 
 137 /*
 138  * When we have to use the copy buffer, we allocate one of these structures per
 139  * buffer page to track which pages need the copy buffer, what the kernel
 140  * virtual address is (which the device can't reach), and what the copy buffer
 141  * virtual address is (where the device dma's to/from). For 32-bit kernels,
 142  * since we can't use seg kpm, we also need to keep the page_t around and state
 143  * if we've currently mapped in the page into KVA space for buffers which don't
 144  * have kva already and when we have multiple windows because we used up all our
 145  * copy buffer space.
 146  */
 147 typedef struct rootnex_pgmap_s {
 148         boolean_t       pm_uses_copybuf;
 149 #if !defined(__amd64)
 150         boolean_t       pm_mapped;
 151         page_t          *pm_pp;
 152         caddr_t         pm_vaddr;
 153 #endif
 154         caddr_t         pm_kaddr;
 155         caddr_t         pm_cbaddr;
 156 } rootnex_pgmap_t;
 157 
 158 /*
 159  * We only need to trim a buffer when we have multiple windows. Each window has
 160  * trim state. We might have trimmed the end of the previous window, leaving the
 161  * first cookie of this window trimmed[tr_trim_first] (which basically means we
 162  * won't start with a new cookie), or we might need to trim the end of the
 163  * current window [tr_trim_last] (which basically means we won't end with a
 164  * complete cookie). We keep the same state for the first & last cookie in a
 165  * window (a window can have one or more cookies). However, when we trim the
 166  * last cookie, we keep a pointer to the last cookie in the trim state since we
 167  * only need this info when we trim. The pointer to the first cookie in the
 168  * window is in the window state since we need to know what the first cookie in
 169  * the window is in various places.
 170  *
 171  * If we do trim a cookie, we save away the physical address and size of the
 172  * cookie so that we can over write the cookie when we switch windows (the
 173  * space for a cookie which is in two windows is shared between the windows.
 174  * We keep around the same information for the last page in a window.
 175  *
 176  * if we happened to trim on a page that uses the copy buffer, and that page
 177  * is also in the middle of a window boundary because we have filled up the
 178  * copy buffer, we need to remember the copy buffer address for both windows
 179  * since the same page will have different copy buffer addresses in the two
 180  * windows. We need to due the same for kaddr in the 32-bit kernel since we
 181  * have a limited kva space which we map to.
 182  */
 183 typedef struct rootnex_trim_s {
 184         boolean_t               tr_trim_first;
 185         boolean_t               tr_trim_last;
 186         ddi_dma_cookie_t        *tr_last_cookie;
 187         uint64_t                tr_first_paddr;
 188         uint64_t                tr_last_paddr;
 189         size_t                  tr_first_size;
 190         size_t                  tr_last_size;
 191 
 192         boolean_t               tr_first_copybuf_win;
 193         boolean_t               tr_last_copybuf_win;
 194         uint_t                  tr_first_pidx;
 195         uint_t                  tr_last_pidx;
 196         caddr_t                 tr_first_cbaddr;
 197         caddr_t                 tr_last_cbaddr;
 198 #if !defined(__amd64)
 199         caddr_t                 tr_first_kaddr;
 200         caddr_t                 tr_last_kaddr;
 201 #endif
 202 } rootnex_trim_t;
 203 
 204 /*
 205  * per window state. A bound DMA handle can have multiple windows. Each window
 206  * will have the following state. We track if this window needs to sync,
 207  * the offset into the buffer where the window starts, the size of the window.
 208  * a pointer to the first cookie in the window, the number of cookies in the
 209  * window, and the trim state for the window. For the 32-bit kernel, we keep
 210  * track of if we need to remap the copy buffer when we switch to a this window
 211  */
 212 typedef struct rootnex_window_s {
 213         boolean_t               wd_dosync;
 214         uint_t                  wd_cookie_cnt;
 215         off_t                   wd_offset;
 216         size_t                  wd_size;
 217         ddi_dma_cookie_t        *wd_first_cookie;
 218         rootnex_trim_t          wd_trim;
 219 #if !defined(__amd64)
 220         boolean_t               wd_remap_copybuf;
 221 #endif
 222 } rootnex_window_t;
 223 
 224 /* per dma handle private state */
 225 typedef struct rootnex_dma_s {
 226         /*
 227          * sgl related state used to build and describe the sgl.
 228          *
 229          * dp_partial_required - used in the bind slow path to identify if we
 230          *    need to do a partial mapping or not.
 231          * dp_trim_required - used in the bind slow path to identify if we
 232          *    need to trim when switching to a new window. This should only be
 233          *    set when partial is set.
 234          * dp_granularity_power_2 - set in alloc handle and used in bind slow
 235          *    path to determine if we & or % to calculate the trim.
 236          * dp_dma - copy of dma "object" passed in during bind
 237          * dp_maxxfer - trimmed dma_attr_maxxfer so that it is a whole
 238          *    multiple of granularity
 239          * dp_sglinfo - See rootnex_sglinfo_t above.
 240          */
 241         boolean_t               dp_partial_required;
 242         boolean_t               dp_trim_required;
 243         boolean_t               dp_granularity_power_2;
 244         uint64_t                dp_maxxfer;
 245 
 246         boolean_t               dp_dvma_used;
 247         ddi_dma_obj_t           dp_dma;
 248         ddi_dma_obj_t           dp_dvma;
 249         rootnex_sglinfo_t       dp_sglinfo;
 250 
 251         /*
 252          * Copy buffer related state
 253          *
 254          * dp_copybuf_size - the actual size of the copy buffer that we are
 255          *    using. This can be smaller that dp_copybuf_req, i.e. bind size >
 256          *    max copy buffer size.
 257          * dp_cbaddr - kernel address of copy buffer. Used to determine where
 258          *    where to copy to/from.
 259          * dp_cbsize - the "real" size returned from the copy buffer alloc.
 260          *    Set in the copybuf alloc and used to free copybuf.
 261          * dp_pgmap - page map used in sync to determine which pages in the
 262          *    buffer use the copy buffer and what addresses to use to copy to/
 263          *    from.
 264          * dp_cb_remaping - status if this bind causes us to have to remap
 265          *    the copybuf when switching to new windows. This is only used in
 266          *    the 32-bit kernel since we use seg kpm in the 64-bit kernel for
 267          *    this case.
 268          * dp_kva - kernel heap arena vmem space for mapping to buffers which
 269          *    we don't have a kernel VA to bcopy to/from. This is only used in
 270          *    the 32-bit kernel since we use seg kpm in the 64-bit kernel for
 271          *    this case.
 272          */
 273         size_t                  dp_copybuf_size;
 274         caddr_t                 dp_cbaddr;
 275         size_t                  dp_cbsize;
 276         rootnex_pgmap_t         *dp_pgmap;
 277 #if !defined(__amd64)
 278         boolean_t               dp_cb_remaping;
 279         caddr_t                 dp_kva;
 280 #endif
 281 
 282         /*
 283          * window related state. The pointer to the window state array which may
 284          * be a pointer into the pre allocated state, or we may have had to
 285          * allocate the window array on the fly because it wouldn't fit. If
 286          * we allocate it, we'll use dp_need_to_free_window and dp_window_size
 287          * during cleanup. dp_current_win keeps track of the current window.
 288          * dp_max_win is the maximum number of windows we could have.
 289          */
 290         uint_t                  dp_current_win;
 291         rootnex_window_t        *dp_window;
 292         boolean_t               dp_need_to_free_window;
 293         uint_t                  dp_window_size;
 294         uint_t                  dp_max_win;
 295 
 296         /* dip of driver which "owns" handle. set to rdip in alloc_handle() */
 297         dev_info_t              *dp_dip;
 298 
 299         /*
 300          * dp_mutex and dp_inuse are only used to see if a driver is trying to
 301          * bind to an already bound dma handle. dp_mutex only used for dp_inuse
 302          */
 303         kmutex_t                dp_mutex;
 304         boolean_t               dp_inuse;
 305 
 306         /*
 307          * cookie related state. The pointer to the cookies (dp_cookies) may
 308          * be a pointer into the pre allocated state, or we may have had to
 309          * allocate the cookie array on the fly because it wouldn't fit. If
 310          * we allocate it, we'll use dp_need_to_free_cookie and dp_cookie_size
 311          * during cleanup. dp_current_cookie is only used in the obsoleted
 312          * interfaces to determine when we've used up all the cookies in a
 313          * window during nextseg()..
 314          */
 315         size_t                  dp_cookie_size;
 316         ddi_dma_cookie_t        *dp_cookies;
 317         boolean_t               dp_need_to_free_cookie;
 318         uint_t                  dp_current_cookie; /* for obsoleted I/Fs */
 319         ddi_dma_cookie_t        *dp_saved_cookies;
 320         boolean_t               dp_need_to_switch_cookies;
 321 
 322         void                    *dp_iommu_private;
 323 
 324         /*
 325          * pre allocated space for the bind state, allocated during alloc
 326          * handle. For a lot of devices, this will save us from having to do
 327          * kmem_alloc's during the bind most of the time. kmem_alloc's can be
 328          * expensive on x86 when the cpu count goes up since xcalls are
 329          * expensive on x86.
 330          */
 331         uchar_t                 *dp_prealloc_buffer;
 332 
 333         /*
 334          * sleep flags set on bind and unset on unbind
 335          */
 336         int                     dp_sleep_flags;
 337 } rootnex_dma_t;
 338 
 339 /*
 340  * profile/performance counters. Most things will be dtrace probes, but there
 341  * are a couple of things we want to keep track all the time. We track the
 342  * total number of active handles and binds (i.e. an alloc without a free or
 343  * a bind without an unbind) since rootnex attach. We also track the total
 344  * number of binds which have failed since rootnex attach.
 345  */
 346 typedef enum {
 347         ROOTNEX_CNT_ACTIVE_HDLS = 0,
 348         ROOTNEX_CNT_ACTIVE_BINDS = 1,
 349         ROOTNEX_CNT_ALLOC_FAIL = 2,
 350         ROOTNEX_CNT_BIND_FAIL = 3,
 351         ROOTNEX_CNT_SYNC_FAIL = 4,
 352         ROOTNEX_CNT_GETWIN_FAIL = 5,
 353 
 354         /* This one must be last */
 355         ROOTNEX_CNT_LAST
 356 } rootnex_cnt_t;
 357 
 358 /*
 359  * global driver state.
 360  *   r_dmahdl_cache - dma_handle kmem_cache
 361  *   r_dvma_call_list_id - ddi_set_callback() id
 362  *   r_peekpoke_mutex - serialize peeks and pokes.
 363  *   r_dip - rootnex dip
 364  *   r_reserved_msg_printed - ctlops reserve message threshold
 365  *   r_counters - profile/performance counters
 366  */
 367 typedef struct rootnex_state_s {
 368         uint_t                  r_prealloc_cookies;
 369         uint_t                  r_prealloc_size;
 370         kmem_cache_t            *r_dmahdl_cache;
 371         uintptr_t               r_dvma_call_list_id;
 372         kmutex_t                r_peekpoke_mutex;
 373         dev_info_t              *r_dip;
 374         ddi_iblock_cookie_t     r_err_ibc;
 375         boolean_t               r_reserved_msg_printed;
 376         uint64_t                r_counters[ROOTNEX_CNT_LAST];
 377         iommulib_nexhandle_t    r_iommulib_handle;
 378 } rootnex_state_t;
 379 
 380 #ifdef  __cplusplus
 381 }
 382 #endif
 383 
 384 #endif  /* _SYS_ROOTNEX_H */