1 /* BEGIN CSTYLED */
   2 
   3 /*
   4  * Copyright (c) 2009, Intel Corporation.
   5  * All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the next
  15  * paragraph) shall be included in all copies or substantial portions of the
  16  * Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  24  * IN THE SOFTWARE.
  25  *
  26  * Authors:
  27  *    Eric Anholt <eric@anholt.net>
  28  *
  29  */
  30 
  31 /*
  32  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  33  * Use is subject to license terms.
  34  */
  35 
  36 #include <sys/sysmacros.h>
  37 #include "drmP.h"
  38 #include "drm.h"
  39 #include "i915_drm.h"
  40 #include "i915_drv.h"
  41 
  42 /** @file i915_gem_tiling.c
  43  *
  44  * Support for managing tiling state of buffer objects.
  45  *
  46  * The idea behind tiling is to increase cache hit rates by rearranging
  47  * pixel data so that a group of pixel accesses are in the same cacheline.
  48  * Performance improvement from doing this on the back/depth buffer are on
  49  * the order of 30%.
  50  *
  51  * Intel architectures make this somewhat more complicated, though, by
  52  * adjustments made to addressing of data when the memory is in interleaved
  53  * mode (matched pairs of DIMMS) to improve memory bandwidth.
  54  * For interleaved memory, the CPU sends every sequential 64 bytes
  55  * to an alternate memory channel so it can get the bandwidth from both.
  56  *
  57  * The GPU also rearranges its accesses for increased bandwidth to interleaved
  58  * memory, and it matches what the CPU does for non-tiled.  However, when tiled
  59  * it does it a little differently, since one walks addresses not just in the
  60  * X direction but also Y.  So, along with alternating channels when bit
  61  * 6 of the address flips, it also alternates when other bits flip --  Bits 9
  62  * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
  63  * are common to both the 915 and 965-class hardware.
  64  *
  65  * The CPU also sometimes XORs in higher bits as well, to improve
  66  * bandwidth doing strided access like we do so frequently in graphics.  This
  67  * is called "Channel XOR Randomization" in the MCH documentation.  The result
  68  * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
  69  * decode.
  70  *
  71  * All of this bit 6 XORing has an effect on our memory management,
  72  * as we need to make sure that the 3d driver can correctly address object
  73  * contents.
  74  *
  75  * If we don't have interleaved memory, all tiling is safe and no swizzling is
  76  * required.
  77  *
  78  * When bit 17 is XORed in, we simply refuse to tile at all.  Bit
  79  * 17 is not just a page offset, so as we page an objet out and back in,
  80  * individual pages in it will have different bit 17 addresses, resulting in
  81  * each 64 bytes being swapped with its neighbor!
  82  *
  83  * Otherwise, if interleaved, we have to tell the 3d driver what the address
  84  * swizzling it needs to do is, since it's writing with the CPU to the pages
  85  * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
  86  * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
  87  * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
  88  * to match what the GPU expects.
  89  */
  90 
  91 /**
  92  * Detects bit 6 swizzling of address lookup between IGD access and CPU
  93  * access through main memory.
  94  */
  95 void
  96 i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
  97 {
  98         drm_i915_private_t *dev_priv = dev->dev_private;
  99         uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 100         uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 101 
 102         if (!IS_I9XX(dev)) {
 103                 /* As far as we know, the 865 doesn't have these bit 6
 104                  * swizzling issues.
 105                  */
 106                 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
 107                 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
 108                 } else if (IS_MOBILE(dev)) {
 109                 uint32_t dcc;
 110 
 111                 /* On mobile 9xx chipsets, channel interleave by the CPU is
 112                  * determined by DCC.  For single-channel, neither the CPU
 113                  * nor the GPU do swizzling.  For dual channel interleaved,
 114                  * the GPU's interleave is bit 9 and 10 for X tiled, and bit
 115                  * 9 for Y tiled.  The CPU's interleave is independent, and
 116                  * can be based on either bit 11 (haven't seen this yet) or
 117                  * bit 17 (common).
 118                  */
 119 
 120                 dcc = I915_READ(DCC);
 121                 switch (dcc & DCC_ADDRESSING_MODE_MASK) {
 122                 case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
 123                 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
 124                         swizzle_x = I915_BIT_6_SWIZZLE_NONE;
 125                         swizzle_y = I915_BIT_6_SWIZZLE_NONE;
 126                         break;
 127                 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
 128                         if (dcc & DCC_CHANNEL_XOR_DISABLE) {
 129                                 /* This is the base swizzling by the GPU for
 130                                  * tiled buffers.
 131                                  */
 132                                 swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 133                                 swizzle_y = I915_BIT_6_SWIZZLE_9;
 134                         } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
 135                                 /* Bit 11 swizzling by the CPU in addition. */
 136                                 swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
 137                                 swizzle_y = I915_BIT_6_SWIZZLE_9_11;
 138                         } else {
 139                                 /* Bit 17 swizzling by the CPU in addition. */  
 140                                 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 141                                 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 142                         }
 143                         break;
 144                 }
 145                 if (dcc == 0xffffffff) {
 146                         DRM_ERROR("Couldn't read from MCHBAR.  "
 147                                   "Disabling tiling.\n");
 148                         swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 149                         swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 150                 }
 151         } else {
 152                 /* The 965, G33, and newer, have a very flexible memory
 153                  * configuration.  It will enable dual-channel mode
 154                  * (interleaving) on as much memory as it can, and the GPU
 155                  * will additionally sometimes enable different bit 6
 156                  * swizzling for tiled objects from the CPU.
 157                  *
 158                  * Here's what I found on the G965:
 159                  *    slot fill         memory size  swizzling
 160                  * 0A   0B   1A   1B    1-ch   2-ch
 161                  * 512  0    0    0     512    0     O
 162                  * 512  0    512  0     16     1008  X
 163                  * 512  0    0    512   16     1008  X
 164                  * 0    512  0    512   16     1008  X
 165                  * 1024 1024 1024 0     2048   1024  O
 166                  *
 167                  * We could probably detect this based on either the DRB
 168                  * matching, which was the case for the swizzling required in
 169                  * the table above, or from the 1-ch value being less than
 170                  * the minimum size of a rank.
 171                  */
 172                 if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) {
 173                         swizzle_x = I915_BIT_6_SWIZZLE_NONE;
 174                         swizzle_y = I915_BIT_6_SWIZZLE_NONE;
 175                 } else {
 176                         swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 177                         swizzle_y = I915_BIT_6_SWIZZLE_9;
 178                 }
 179         }
 180 
 181        /* FIXME: check with memory config on IGDNG */
 182        if (IS_IGDNG(dev)) {
 183                swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 184                swizzle_y = I915_BIT_6_SWIZZLE_9;
 185        }
 186 
 187         dev_priv->mm.bit_6_swizzle_x = swizzle_x;
 188         dev_priv->mm.bit_6_swizzle_y = swizzle_y;
 189 }
 190 
 191 
 192 /**
 193  * Returns the size of the fence for a tiled object of the given size.
 194  */
 195 static int
 196 i915_get_fence_size(struct drm_device *dev, int size)
 197 {
 198         int i;
 199         int start;
 200 
 201         if (IS_I965G(dev)) {
 202                 /* The 965 can have fences at any page boundary. */
 203 
 204                 return (size + PAGE_SIZE-1) & ~(PAGE_SIZE-1);
 205         } else {
 206                 /* Align the size to a power of two greater than the smallest
 207                  * fence size.
 208                  */
 209                 if (IS_I9XX(dev))
 210                         start = 1024 * 1024;
 211                 else
 212                         start = 512 * 1024;
 213 
 214                 for (i = start; i < size; i <<= 1)
 215                         ;
 216 
 217                 return i;
 218         }
 219 }
 220 
 221 /* Check pitch constriants for all chips & tiling formats */
 222 static int 
 223 i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
 224 {
 225         int tile_width;
 226 
 227         /* Linear is always fine */
 228         if (tiling_mode == I915_TILING_NONE)
 229                 return 1;
 230 
 231         if (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
 232                 tile_width = 128;
 233         else
 234                 tile_width = 512;
 235 
 236         if (stride == 0)
 237                 return 0;
 238 
 239         /* 965+ just needs multiples of tile width */
 240         if (IS_I965G(dev)) {
 241                 if (stride & (tile_width - 1))
 242                         return 0;
 243                 return 1;
 244         }
 245 
 246         /* Pre-965 needs power of two tile widths */
 247         if (stride < tile_width)
 248                 return 0;
 249 
 250         if (!ISP2(stride))
 251                 return 0;
 252 
 253         /* We don't handle the aperture area covered by the fence being bigger
 254          * than the object size.
 255          */
 256         if (i915_get_fence_size(dev, size) != size)
 257                 return 0;
 258 
 259         return 1;
 260 }
 261 
 262 /**
 263  * Sets the tiling mode of an object, returning the required swizzling of
 264  * bit 6 of addresses in the object.
 265  */
 266 /*ARGSUSED*/
 267 int
 268 i915_gem_set_tiling(DRM_IOCTL_ARGS)
 269 {
 270         DRM_DEVICE;
 271         struct drm_i915_gem_set_tiling args;
 272         drm_i915_private_t *dev_priv = dev->dev_private;
 273         struct drm_gem_object *obj;
 274         struct drm_i915_gem_object *obj_priv;
 275         int ret;
 276 
 277         if (dev->driver->use_gem != 1)
 278                 return ENODEV;
 279 
 280         DRM_COPYFROM_WITH_RETURN(&args,
 281             (struct drm_i915_gem_set_tiling __user *) data, sizeof(args));
 282 
 283         obj = drm_gem_object_lookup(fpriv, args.handle);
 284         if (obj == NULL)
 285                 return EINVAL;
 286         obj_priv = obj->driver_private;
 287 
 288         if (!i915_tiling_ok(dev, args.stride, obj->size, args.tiling_mode)) {
 289                 drm_gem_object_unreference(obj);
 290                 DRM_DEBUG("i915 tiling is not OK");
 291                 return EINVAL;
 292         }
 293 
 294         spin_lock(&dev->struct_mutex);
 295 
 296         if (args.tiling_mode == I915_TILING_NONE) {
 297                 args.swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
 298         } else {
 299                 if (args.tiling_mode == I915_TILING_X)
 300                         args.swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
 301                 else
 302                         args.swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
 303                 /* If we can't handle the swizzling, make it untiled. */
 304                 if (args.swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) {
 305                         args.tiling_mode = I915_TILING_NONE;
 306                         args.swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
 307                 }
 308         }
 309 
 310         if (args.tiling_mode != obj_priv->tiling_mode) {
 311                 int ret;
 312 
 313                 /* Unbind the object, as switching tiling means we're
 314                  * switching the cache organization due to fencing, probably.
 315                  */
 316                 ret = i915_gem_object_unbind(obj, 1);
 317                 if (ret != 0) {
 318                         args.tiling_mode = obj_priv->tiling_mode;
 319                         spin_unlock(&dev->struct_mutex);
 320                         drm_gem_object_unreference(obj);
 321                         DRM_ERROR("tiling switch!! unbind error %d", ret);
 322                         return ret;
 323                 }
 324                 obj_priv->tiling_mode = args.tiling_mode;
 325         }
 326         obj_priv->stride = args.stride;
 327 
 328         ret = DRM_COPY_TO_USER((struct drm_i915_gem_set_tiling __user *) data, &args, sizeof(args));
 329         if ( ret != 0)
 330                 DRM_ERROR(" gem set tiling error! %d", ret);
 331 
 332         drm_gem_object_unreference(obj);
 333         spin_unlock(&dev->struct_mutex);
 334 
 335         return 0;
 336 }
 337 
 338 /**
 339  * Returns the current tiling mode and required bit 6 swizzling for the object.
 340  */
 341 /*ARGSUSED*/
 342 int
 343 i915_gem_get_tiling(DRM_IOCTL_ARGS)
 344 {
 345         DRM_DEVICE;
 346         struct drm_i915_gem_get_tiling args;
 347         drm_i915_private_t *dev_priv = dev->dev_private;
 348         struct drm_gem_object *obj;
 349         struct drm_i915_gem_object *obj_priv;
 350         int ret;
 351 
 352         if (dev->driver->use_gem != 1)
 353                 return ENODEV;
 354 
 355         DRM_COPYFROM_WITH_RETURN(&args,
 356             (struct drm_i915_gem_get_tiling __user *) data, sizeof(args));
 357 
 358         obj = drm_gem_object_lookup(fpriv, args.handle);
 359         if (obj == NULL)
 360                 return EINVAL;
 361         obj_priv = obj->driver_private;
 362 
 363         spin_lock(&dev->struct_mutex);
 364 
 365         args.tiling_mode = obj_priv->tiling_mode;
 366         switch (obj_priv->tiling_mode) {
 367         case I915_TILING_X:
 368                 args.swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
 369                 break;
 370         case I915_TILING_Y:
 371                 args.swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
 372                 break;
 373         case I915_TILING_NONE:
 374                 args.swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
 375                 break;
 376         default:
 377                 DRM_ERROR("unknown tiling mode\n");
 378         }
 379 
 380 
 381 
 382         ret = DRM_COPY_TO_USER((struct drm_i915_gem_get_tiling __user *) data, &args, sizeof(args));
 383         if ( ret != 0)
 384                 DRM_ERROR(" gem get tiling error! %d", ret);
 385 
 386         drm_gem_object_unreference(obj);
 387         spin_unlock(&dev->struct_mutex);
 388 
 389         return 0;
 390 }