1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27         .file   "atomic.s"
  28 
  29 #include <sys/asm_linkage.h>
  30 
  31 /*
  32  * ATOMIC_BO_ENABLE_SHIFT can be selectively defined by processors
  33  * to enable exponential backoff. No definition means backoff is
  34  * not desired i.e. backoff should be disabled.
  35  * By default, the shift value is used to generate a power of 2
  36  * value for backoff limit. In the kernel, processors scale this
  37  * shift value with the number of online cpus.
  38  */
  39 
  40 #if defined(_KERNEL)
  41         /*
  42          * Legacy kernel interfaces; they will go away the moment our closed
  43          * bins no longer require them.
  44          */
  45         ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function)
  46         ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function)
  47         ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function)
  48         ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function)
  49         ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function)
  50         ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function)
  51         ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function)
  52         ANSI_PRAGMA_WEAK2(swapl,atomic_swap_32,function)
  53 
  54 #ifdef ATOMIC_BO_ENABLE_SHIFT
  55 
  56 #if !defined(lint)
  57         .weak   cpu_atomic_delay
  58         .type   cpu_atomic_delay, #function
  59 #endif  /* lint */
  60 
  61 /*
  62  * For the kernel, invoke processor specific delay routine to perform
  63  * low-impact spin delay. The value of ATOMIC_BO_ENABLE_SHIFT is tuned
  64  * with respect to the specific spin delay implementation.
  65  */
  66 #define DELAY_SPIN(label, tmp1, tmp2)                                   \
  67         /*                                                              ; \
  68          * Define a pragma weak reference to a cpu specific             ; \
  69          * delay routine for atomic backoff. For CPUs that              ; \
  70          * have no such delay routine defined, the delay becomes        ; \
  71          * just a simple tight loop.                                    ; \
  72          *                                                              ; \
  73          * tmp1 = holds CPU specific delay routine                      ; \
  74          * tmp2 = holds atomic routine's callee return address          ; \
  75          */                                                             ; \
  76         sethi   %hi(cpu_atomic_delay), tmp1                             ; \
  77         or      tmp1, %lo(cpu_atomic_delay), tmp1                       ; \
  78 label/**/0:                                                             ; \
  79         brz,pn  tmp1, label/**/1                                        ; \
  80         mov     %o7, tmp2                                               ; \
  81         jmpl    tmp1, %o7       /* call CPU specific delay routine */   ; \
  82           nop                   /* delay slot : do nothing */           ; \
  83         mov     tmp2, %o7       /* restore callee's return address */   ; \
  84 label/**/1:
  85 
  86 /*
  87  * For the kernel, we take into consideration of cas failures
  88  * and also scale the backoff limit w.r.t. the number of cpus.
  89  * For cas failures, we reset the backoff value to 1 if the cas
  90  * failures exceed or equal to the number of online cpus. This
  91  * will enforce some degree of fairness and prevent starvation.
  92  * We also scale/normalize the processor provided specific
  93  * ATOMIC_BO_ENABLE_SHIFT w.r.t. the number of online cpus to
  94  * obtain the actual final limit to use.
  95  */
  96 #define ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label)            \
  97         brnz,pt ncpu, label/**/0                                        ; \
  98           inc   cas_cnt                                                 ; \
  99         sethi   %hi(ncpus_online), ncpu                                 ; \
 100         ld      [ncpu + %lo(ncpus_online)], ncpu                        ; \
 101 label/**/0:                                                             ; \
 102         cmp     cas_cnt, ncpu                                           ; \
 103         blu,pt  %xcc, label/**/1                                        ; \
 104           sllx  ncpu, ATOMIC_BO_ENABLE_SHIFT, limit                     ; \
 105         mov     %g0, cas_cnt                                            ; \
 106         mov     1, val                                                  ; \
 107 label/**/1:
 108 #endif  /* ATOMIC_BO_ENABLE_SHIFT */
 109 
 110 #else   /* _KERNEL */
 111 
 112 /*
 113  * ATOMIC_BO_ENABLE_SHIFT may be enabled/defined here for generic
 114  * libc atomics. None for now.
 115  */
 116 #ifdef ATOMIC_BO_ENABLE_SHIFT
 117 #define DELAY_SPIN(label, tmp1, tmp2)   \
 118 label/**/0:
 119 
 120 #define ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label)  \
 121         set     1 << ATOMIC_BO_ENABLE_SHIFT, limit
 122 #endif  /* ATOMIC_BO_ENABLE_SHIFT */
 123 #endif  /* _KERNEL */
 124 
 125 #ifdef ATOMIC_BO_ENABLE_SHIFT
 126 /*
 127  * ATOMIC_BACKOFF_INIT macro for initialization.
 128  * backoff val is initialized to 1.
 129  * ncpu is initialized to 0
 130  * The cas_cnt counts the cas instruction failure and is
 131  * initialized to 0.
 132  */
 133 #define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt) \
 134         mov     1, val                          ; \
 135         mov     %g0, ncpu                       ; \
 136         mov     %g0, cas_cnt
 137 
 138 #define ATOMIC_BACKOFF_BRANCH(cr, backoff, loop) \
 139         bne,a,pn cr, backoff
 140 
 141 /*
 142  * Main ATOMIC_BACKOFF_BACKOFF macro for backoff.
 143  */
 144 #define ATOMIC_BACKOFF_BACKOFF(val, limit, ncpu, cas_cnt, label, retlabel) \
 145         ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label/**/_0)      ; \
 146         cmp     val, limit                                              ; \
 147         blu,a,pt %xcc, label/**/_1                                      ; \
 148           mov   val, limit                                              ; \
 149 label/**/_1:                                                            ; \
 150         mov     limit, val                                              ; \
 151         DELAY_SPIN(label/**/_2, %g2, %g3)                               ; \
 152         deccc   limit                                                   ; \
 153         bgu,pn  %xcc, label/**/_20 /* branch to middle of DELAY_SPIN */ ; \
 154           nop                                                           ; \
 155         ba      retlabel                                                ; \
 156         sllx    val, 1, val
 157 
 158 #else   /* ATOMIC_BO_ENABLE_SHIFT */
 159 #define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt)
 160 
 161 #define ATOMIC_BACKOFF_BRANCH(cr, backoff, loop) \
 162         bne,a,pn cr, loop
 163 
 164 #define ATOMIC_BACKOFF_BACKOFF(val, limit, ncpu, cas_cnt, label, retlabel)
 165 #endif  /* ATOMIC_BO_ENABLE_SHIFT */
 166 
 167         /*
 168          * NOTE: If atomic_inc_8 and atomic_inc_8_nv are ever
 169          * separated, you need to also edit the libc sparcv9 platform
 170          * specific mapfile and remove the NODYNSORT attribute
 171          * from atomic_inc_8_nv.
 172          */
 173         ENTRY(atomic_inc_8)
 174         ALTENTRY(atomic_inc_8_nv)
 175         ALTENTRY(atomic_inc_uchar)
 176         ALTENTRY(atomic_inc_uchar_nv)
 177         ba      add_8
 178           add   %g0, 1, %o1
 179         SET_SIZE(atomic_inc_uchar_nv)
 180         SET_SIZE(atomic_inc_uchar)
 181         SET_SIZE(atomic_inc_8_nv)
 182         SET_SIZE(atomic_inc_8)
 183 
 184         /*
 185          * NOTE: If atomic_dec_8 and atomic_dec_8_nv are ever
 186          * separated, you need to also edit the libc sparcv9 platform
 187          * specific mapfile and remove the NODYNSORT attribute
 188          * from atomic_dec_8_nv.
 189          */
 190         ENTRY(atomic_dec_8)
 191         ALTENTRY(atomic_dec_8_nv)
 192         ALTENTRY(atomic_dec_uchar)
 193         ALTENTRY(atomic_dec_uchar_nv)
 194         ba      add_8
 195           sub   %g0, 1, %o1
 196         SET_SIZE(atomic_dec_uchar_nv)
 197         SET_SIZE(atomic_dec_uchar)
 198         SET_SIZE(atomic_dec_8_nv)
 199         SET_SIZE(atomic_dec_8)
 200 
 201         /*
 202          * NOTE: If atomic_add_8 and atomic_add_8_nv are ever
 203          * separated, you need to also edit the libc sparcv9 platform
 204          * specific mapfile and remove the NODYNSORT attribute
 205          * from atomic_add_8_nv.
 206          */
 207         ENTRY(atomic_add_8)
 208         ALTENTRY(atomic_add_8_nv)
 209         ALTENTRY(atomic_add_char)
 210         ALTENTRY(atomic_add_char_nv)
 211 add_8:
 212         and     %o0, 0x3, %o4           ! %o4 = byte offset, left-to-right
 213         xor     %o4, 0x3, %g1           ! %g1 = byte offset, right-to-left
 214         sll     %g1, 3, %g1             ! %g1 = bit offset, right-to-left
 215         set     0xff, %o3               ! %o3 = mask
 216         sll     %o3, %g1, %o3           ! %o3 = shifted to bit offset
 217         sll     %o1, %g1, %o1           ! %o1 = shifted to bit offset
 218         and     %o1, %o3, %o1           ! %o1 = single byte value
 219         andn    %o0, 0x3, %o0           ! %o0 = word address
 220         ld      [%o0], %o2              ! read old value
 221 1:
 222         add     %o2, %o1, %o5           ! add value to the old value
 223         and     %o5, %o3, %o5           ! clear other bits
 224         andn    %o2, %o3, %o4           ! clear target bits
 225         or      %o4, %o5, %o5           ! insert the new value
 226         cas     [%o0], %o2, %o5
 227         cmp     %o2, %o5
 228         bne,a,pn %icc, 1b
 229           mov   %o5, %o2                ! %o2 = old value
 230         add     %o2, %o1, %o5
 231         and     %o5, %o3, %o5
 232         retl
 233         srl     %o5, %g1, %o0           ! %o0 = new value
 234         SET_SIZE(atomic_add_char_nv)
 235         SET_SIZE(atomic_add_char)
 236         SET_SIZE(atomic_add_8_nv)
 237         SET_SIZE(atomic_add_8)
 238 
 239         /*
 240          * NOTE: If atomic_inc_16 and atomic_inc_16_nv are ever
 241          * separated, you need to also edit the libc sparcv9 platform
 242          * specific mapfile and remove the NODYNSORT attribute
 243          * from atomic_inc_16_nv.
 244          */
 245         ENTRY(atomic_inc_16)
 246         ALTENTRY(atomic_inc_16_nv)
 247         ALTENTRY(atomic_inc_ushort)
 248         ALTENTRY(atomic_inc_ushort_nv)
 249         ba      add_16
 250           add   %g0, 1, %o1
 251         SET_SIZE(atomic_inc_ushort_nv)
 252         SET_SIZE(atomic_inc_ushort)
 253         SET_SIZE(atomic_inc_16_nv)
 254         SET_SIZE(atomic_inc_16)
 255 
 256         /*
 257          * NOTE: If atomic_dec_16 and atomic_dec_16_nv are ever
 258          * separated, you need to also edit the libc sparcv9 platform
 259          * specific mapfile and remove the NODYNSORT attribute
 260          * from atomic_dec_16_nv.
 261          */
 262         ENTRY(atomic_dec_16)
 263         ALTENTRY(atomic_dec_16_nv)
 264         ALTENTRY(atomic_dec_ushort)
 265         ALTENTRY(atomic_dec_ushort_nv)
 266         ba      add_16
 267           sub   %g0, 1, %o1
 268         SET_SIZE(atomic_dec_ushort_nv)
 269         SET_SIZE(atomic_dec_ushort)
 270         SET_SIZE(atomic_dec_16_nv)
 271         SET_SIZE(atomic_dec_16)
 272 
 273         /*
 274          * NOTE: If atomic_add_16 and atomic_add_16_nv are ever
 275          * separated, you need to also edit the libc sparcv9 platform
 276          * specific mapfile and remove the NODYNSORT attribute
 277          * from atomic_add_16_nv.
 278          */
 279         ENTRY(atomic_add_16)
 280         ALTENTRY(atomic_add_16_nv)
 281         ALTENTRY(atomic_add_short)
 282         ALTENTRY(atomic_add_short_nv)
 283 add_16:
 284         and     %o0, 0x2, %o4           ! %o4 = byte offset, left-to-right
 285         xor     %o4, 0x2, %g1           ! %g1 = byte offset, right-to-left
 286         sll     %o4, 3, %o4             ! %o4 = bit offset, left-to-right
 287         sll     %g1, 3, %g1             ! %g1 = bit offset, right-to-left
 288         sethi   %hi(0xffff0000), %o3    ! %o3 = mask
 289         srl     %o3, %o4, %o3           ! %o3 = shifted to bit offset
 290         sll     %o1, %g1, %o1           ! %o1 = shifted to bit offset
 291         and     %o1, %o3, %o1           ! %o1 = single short value
 292         andn    %o0, 0x2, %o0           ! %o0 = word address
 293         ! if low-order bit is 1, we will properly get an alignment fault here
 294         ld      [%o0], %o2              ! read old value
 295 1:
 296         add     %o1, %o2, %o5           ! add value to the old value
 297         and     %o5, %o3, %o5           ! clear other bits
 298         andn    %o2, %o3, %o4           ! clear target bits
 299         or      %o4, %o5, %o5           ! insert the new value
 300         cas     [%o0], %o2, %o5
 301         cmp     %o2, %o5
 302         bne,a,pn %icc, 1b
 303           mov   %o5, %o2                ! %o2 = old value
 304         add     %o1, %o2, %o5
 305         and     %o5, %o3, %o5
 306         retl
 307         srl     %o5, %g1, %o0           ! %o0 = new value
 308         SET_SIZE(atomic_add_short_nv)
 309         SET_SIZE(atomic_add_short)
 310         SET_SIZE(atomic_add_16_nv)
 311         SET_SIZE(atomic_add_16)
 312 
 313         /*
 314          * NOTE: If atomic_inc_32 and atomic_inc_32_nv are ever
 315          * separated, you need to also edit the libc sparcv9 platform
 316          * specific mapfile and remove the NODYNSORT attribute
 317          * from atomic_inc_32_nv.
 318          */
 319         ENTRY(atomic_inc_32)
 320         ALTENTRY(atomic_inc_32_nv)
 321         ALTENTRY(atomic_inc_uint)
 322         ALTENTRY(atomic_inc_uint_nv)
 323         ba      add_32
 324           add   %g0, 1, %o1
 325         SET_SIZE(atomic_inc_uint_nv)
 326         SET_SIZE(atomic_inc_uint)
 327         SET_SIZE(atomic_inc_32_nv)
 328         SET_SIZE(atomic_inc_32)
 329 
 330         /*
 331          * NOTE: If atomic_dec_32 and atomic_dec_32_nv are ever
 332          * separated, you need to also edit the libc sparcv9 platform
 333          * specific mapfile and remove the NODYNSORT attribute
 334          * from atomic_dec_32_nv.
 335          */
 336         ENTRY(atomic_dec_32)
 337         ALTENTRY(atomic_dec_32_nv)
 338         ALTENTRY(atomic_dec_uint)
 339         ALTENTRY(atomic_dec_uint_nv)
 340         ba      add_32
 341           sub   %g0, 1, %o1
 342         SET_SIZE(atomic_dec_uint_nv)
 343         SET_SIZE(atomic_dec_uint)
 344         SET_SIZE(atomic_dec_32_nv)
 345         SET_SIZE(atomic_dec_32)
 346 
 347         /*
 348          * NOTE: If atomic_add_32 and atomic_add_32_nv are ever
 349          * separated, you need to also edit the libc sparcv9 platform
 350          * specific mapfile and remove the NODYNSORT attribute
 351          * from atomic_add_32_nv.
 352          */
 353         ENTRY(atomic_add_32)
 354         ALTENTRY(atomic_add_32_nv)
 355         ALTENTRY(atomic_add_int)
 356         ALTENTRY(atomic_add_int_nv)
 357 add_32:
 358         ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
 359 0:
 360         ld      [%o0], %o2
 361 1:
 362         add     %o2, %o1, %o3
 363         cas     [%o0], %o2, %o3
 364         cmp     %o2, %o3
 365         ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
 366           mov   %o3, %o2
 367         retl
 368         add     %o2, %o1, %o0           ! return new value
 369 2:
 370         ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, add32, 0b)
 371         SET_SIZE(atomic_add_int_nv)
 372         SET_SIZE(atomic_add_int)
 373         SET_SIZE(atomic_add_32_nv)
 374         SET_SIZE(atomic_add_32)
 375 
 376         /*
 377          * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever
 378          * separated, you need to also edit the libc sparcv9 platform
 379          * specific mapfile and remove the NODYNSORT attribute
 380          * from atomic_inc_64_nv.
 381          */
 382         ENTRY(atomic_inc_64)
 383         ALTENTRY(atomic_inc_64_nv)
 384         ALTENTRY(atomic_inc_ulong)
 385         ALTENTRY(atomic_inc_ulong_nv)
 386         ba      add_64
 387           add   %g0, 1, %o1
 388         SET_SIZE(atomic_inc_ulong_nv)
 389         SET_SIZE(atomic_inc_ulong)
 390         SET_SIZE(atomic_inc_64_nv)
 391         SET_SIZE(atomic_inc_64)
 392 
 393         /*
 394          * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever
 395          * separated, you need to also edit the libc sparcv9 platform
 396          * specific mapfile and remove the NODYNSORT attribute
 397          * from atomic_dec_64_nv.
 398          */
 399         ENTRY(atomic_dec_64)
 400         ALTENTRY(atomic_dec_64_nv)
 401         ALTENTRY(atomic_dec_ulong)
 402         ALTENTRY(atomic_dec_ulong_nv)
 403         ba      add_64
 404           sub   %g0, 1, %o1
 405         SET_SIZE(atomic_dec_ulong_nv)
 406         SET_SIZE(atomic_dec_ulong)
 407         SET_SIZE(atomic_dec_64_nv)
 408         SET_SIZE(atomic_dec_64)
 409 
 410         /*
 411          * NOTE: If atomic_add_64 and atomic_add_64_nv are ever
 412          * separated, you need to also edit the libc sparcv9 platform
 413          * specific mapfile and remove the NODYNSORT attribute
 414          * from atomic_add_64_nv.
 415          */
 416         ENTRY(atomic_add_64)
 417         ALTENTRY(atomic_add_64_nv)
 418         ALTENTRY(atomic_add_ptr)
 419         ALTENTRY(atomic_add_ptr_nv)
 420         ALTENTRY(atomic_add_long)
 421         ALTENTRY(atomic_add_long_nv)
 422 add_64:
 423         ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
 424 0:
 425         ldx     [%o0], %o2
 426 1:
 427         add     %o2, %o1, %o3
 428         casx    [%o0], %o2, %o3
 429         cmp     %o2, %o3
 430         ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
 431           mov   %o3, %o2
 432         retl
 433         add     %o2, %o1, %o0           ! return new value
 434 2:
 435         ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, add64, 0b)
 436         SET_SIZE(atomic_add_long_nv)
 437         SET_SIZE(atomic_add_long)
 438         SET_SIZE(atomic_add_ptr_nv)
 439         SET_SIZE(atomic_add_ptr)
 440         SET_SIZE(atomic_add_64_nv)
 441         SET_SIZE(atomic_add_64)
 442 
 443         /*
 444          * NOTE: If atomic_or_8 and atomic_or_8_nv are ever
 445          * separated, you need to also edit the libc sparcv9 platform
 446          * specific mapfile and remove the NODYNSORT attribute
 447          * from atomic_or_8_nv.
 448          */
 449         ENTRY(atomic_or_8)
 450         ALTENTRY(atomic_or_8_nv)
 451         ALTENTRY(atomic_or_uchar)
 452         ALTENTRY(atomic_or_uchar_nv)
 453         and     %o0, 0x3, %o4           ! %o4 = byte offset, left-to-right
 454         xor     %o4, 0x3, %g1           ! %g1 = byte offset, right-to-left
 455         sll     %g1, 3, %g1             ! %g1 = bit offset, right-to-left
 456         set     0xff, %o3               ! %o3 = mask
 457         sll     %o3, %g1, %o3           ! %o3 = shifted to bit offset
 458         sll     %o1, %g1, %o1           ! %o1 = shifted to bit offset
 459         and     %o1, %o3, %o1           ! %o1 = single byte value
 460         andn    %o0, 0x3, %o0           ! %o0 = word address
 461         ld      [%o0], %o2              ! read old value
 462 1:
 463         or      %o2, %o1, %o5           ! or in the new value
 464         cas     [%o0], %o2, %o5
 465         cmp     %o2, %o5
 466         bne,a,pn %icc, 1b
 467           mov   %o5, %o2                ! %o2 = old value
 468         or      %o2, %o1, %o5
 469         and     %o5, %o3, %o5
 470         retl
 471         srl     %o5, %g1, %o0           ! %o0 = new value
 472         SET_SIZE(atomic_or_uchar_nv)
 473         SET_SIZE(atomic_or_uchar)
 474         SET_SIZE(atomic_or_8_nv)
 475         SET_SIZE(atomic_or_8)
 476 
 477         /*
 478          * NOTE: If atomic_or_16 and atomic_or_16_nv are ever
 479          * separated, you need to also edit the libc sparcv9 platform
 480          * specific mapfile and remove the NODYNSORT attribute
 481          * from atomic_or_16_nv.
 482          */
 483         ENTRY(atomic_or_16)
 484         ALTENTRY(atomic_or_16_nv)
 485         ALTENTRY(atomic_or_ushort)
 486         ALTENTRY(atomic_or_ushort_nv)
 487         and     %o0, 0x2, %o4           ! %o4 = byte offset, left-to-right
 488         xor     %o4, 0x2, %g1           ! %g1 = byte offset, right-to-left
 489         sll     %o4, 3, %o4             ! %o4 = bit offset, left-to-right
 490         sll     %g1, 3, %g1             ! %g1 = bit offset, right-to-left
 491         sethi   %hi(0xffff0000), %o3    ! %o3 = mask
 492         srl     %o3, %o4, %o3           ! %o3 = shifted to bit offset
 493         sll     %o1, %g1, %o1           ! %o1 = shifted to bit offset
 494         and     %o1, %o3, %o1           ! %o1 = single short value
 495         andn    %o0, 0x2, %o0           ! %o0 = word address
 496         ! if low-order bit is 1, we will properly get an alignment fault here
 497         ld      [%o0], %o2              ! read old value
 498 1:
 499         or      %o2, %o1, %o5           ! or in the new value
 500         cas     [%o0], %o2, %o5
 501         cmp     %o2, %o5
 502         bne,a,pn %icc, 1b
 503           mov   %o5, %o2                ! %o2 = old value
 504         or      %o2, %o1, %o5           ! or in the new value
 505         and     %o5, %o3, %o5
 506         retl
 507         srl     %o5, %g1, %o0           ! %o0 = new value
 508         SET_SIZE(atomic_or_ushort_nv)
 509         SET_SIZE(atomic_or_ushort)
 510         SET_SIZE(atomic_or_16_nv)
 511         SET_SIZE(atomic_or_16)
 512 
 513         /*
 514          * NOTE: If atomic_or_32 and atomic_or_32_nv are ever
 515          * separated, you need to also edit the libc sparcv9 platform
 516          * specific mapfile and remove the NODYNSORT attribute
 517          * from atomic_or_32_nv.
 518          */
 519         ENTRY(atomic_or_32)
 520         ALTENTRY(atomic_or_32_nv)
 521         ALTENTRY(atomic_or_uint)
 522         ALTENTRY(atomic_or_uint_nv)
 523         ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
 524 0:
 525         ld      [%o0], %o2
 526 1:
 527         or      %o2, %o1, %o3
 528         cas     [%o0], %o2, %o3
 529         cmp     %o2, %o3
 530         ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
 531           mov   %o3, %o2
 532         retl
 533         or      %o2, %o1, %o0           ! return new value
 534 2:
 535         ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, or32, 0b)
 536         SET_SIZE(atomic_or_uint_nv)
 537         SET_SIZE(atomic_or_uint)
 538         SET_SIZE(atomic_or_32_nv)
 539         SET_SIZE(atomic_or_32)
 540 
 541         /*
 542          * NOTE: If atomic_or_64 and atomic_or_64_nv are ever
 543          * separated, you need to also edit the libc sparcv9 platform
 544          * specific mapfile and remove the NODYNSORT attribute
 545          * from atomic_or_64_nv.
 546          */
 547         ENTRY(atomic_or_64)
 548         ALTENTRY(atomic_or_64_nv)
 549         ALTENTRY(atomic_or_ulong)
 550         ALTENTRY(atomic_or_ulong_nv)
 551         ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
 552 0:
 553         ldx     [%o0], %o2
 554 1:
 555         or      %o2, %o1, %o3
 556         casx    [%o0], %o2, %o3
 557         cmp     %o2, %o3
 558         ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
 559           mov   %o3, %o2
 560         retl
 561         or      %o2, %o1, %o0           ! return new value
 562 2:
 563         ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, or64, 0b)
 564         SET_SIZE(atomic_or_ulong_nv)
 565         SET_SIZE(atomic_or_ulong)
 566         SET_SIZE(atomic_or_64_nv)
 567         SET_SIZE(atomic_or_64)
 568 
 569         /*
 570          * NOTE: If atomic_and_8 and atomic_and_8_nv are ever
 571          * separated, you need to also edit the libc sparcv9 platform
 572          * specific mapfile and remove the NODYNSORT attribute
 573          * from atomic_and_8_nv.
 574          */
 575         ENTRY(atomic_and_8)
 576         ALTENTRY(atomic_and_8_nv)
 577         ALTENTRY(atomic_and_uchar)
 578         ALTENTRY(atomic_and_uchar_nv)
 579         and     %o0, 0x3, %o4           ! %o4 = byte offset, left-to-right
 580         xor     %o4, 0x3, %g1           ! %g1 = byte offset, right-to-left
 581         sll     %g1, 3, %g1             ! %g1 = bit offset, right-to-left
 582         set     0xff, %o3               ! %o3 = mask
 583         sll     %o3, %g1, %o3           ! %o3 = shifted to bit offset
 584         sll     %o1, %g1, %o1           ! %o1 = shifted to bit offset
 585         orn     %o1, %o3, %o1           ! all ones in other bytes
 586         andn    %o0, 0x3, %o0           ! %o0 = word address
 587         ld      [%o0], %o2              ! read old value
 588 1:
 589         and     %o2, %o1, %o5           ! and in the new value
 590         cas     [%o0], %o2, %o5
 591         cmp     %o2, %o5
 592         bne,a,pn %icc, 1b
 593           mov   %o5, %o2                ! %o2 = old value
 594         and     %o2, %o1, %o5
 595         and     %o5, %o3, %o5
 596         retl
 597         srl     %o5, %g1, %o0           ! %o0 = new value
 598         SET_SIZE(atomic_and_uchar_nv)
 599         SET_SIZE(atomic_and_uchar)
 600         SET_SIZE(atomic_and_8_nv)
 601         SET_SIZE(atomic_and_8)
 602 
 603         /*
 604          * NOTE: If atomic_and_16 and atomic_and_16_nv are ever
 605          * separated, you need to also edit the libc sparcv9 platform
 606          * specific mapfile and remove the NODYNSORT attribute
 607          * from atomic_and_16_nv.
 608          */
 609         ENTRY(atomic_and_16)
 610         ALTENTRY(atomic_and_16_nv)
 611         ALTENTRY(atomic_and_ushort)
 612         ALTENTRY(atomic_and_ushort_nv)
 613         and     %o0, 0x2, %o4           ! %o4 = byte offset, left-to-right
 614         xor     %o4, 0x2, %g1           ! %g1 = byte offset, right-to-left
 615         sll     %o4, 3, %o4             ! %o4 = bit offset, left-to-right
 616         sll     %g1, 3, %g1             ! %g1 = bit offset, right-to-left
 617         sethi   %hi(0xffff0000), %o3    ! %o3 = mask
 618         srl     %o3, %o4, %o3           ! %o3 = shifted to bit offset
 619         sll     %o1, %g1, %o1           ! %o1 = shifted to bit offset
 620         orn     %o1, %o3, %o1           ! all ones in the other half
 621         andn    %o0, 0x2, %o0           ! %o0 = word address
 622         ! if low-order bit is 1, we will properly get an alignment fault here
 623         ld      [%o0], %o2              ! read old value
 624 1:
 625         and     %o2, %o1, %o5           ! and in the new value
 626         cas     [%o0], %o2, %o5
 627         cmp     %o2, %o5
 628         bne,a,pn %icc, 1b
 629           mov   %o5, %o2                ! %o2 = old value
 630         and     %o2, %o1, %o5
 631         and     %o5, %o3, %o5
 632         retl
 633         srl     %o5, %g1, %o0           ! %o0 = new value
 634         SET_SIZE(atomic_and_ushort_nv)
 635         SET_SIZE(atomic_and_ushort)
 636         SET_SIZE(atomic_and_16_nv)
 637         SET_SIZE(atomic_and_16)
 638 
 639         /*
 640          * NOTE: If atomic_and_32 and atomic_and_32_nv are ever
 641          * separated, you need to also edit the libc sparcv9 platform
 642          * specific mapfile and remove the NODYNSORT attribute
 643          * from atomic_and_32_nv.
 644          */
 645         ENTRY(atomic_and_32)
 646         ALTENTRY(atomic_and_32_nv)
 647         ALTENTRY(atomic_and_uint)
 648         ALTENTRY(atomic_and_uint_nv)
 649         ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
 650 0:
 651         ld      [%o0], %o2
 652 1:
 653         and     %o2, %o1, %o3
 654         cas     [%o0], %o2, %o3
 655         cmp     %o2, %o3
 656         ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
 657           mov   %o3, %o2
 658         retl
 659         and     %o2, %o1, %o0           ! return new value
 660 2:
 661         ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, and32, 0b)
 662         SET_SIZE(atomic_and_uint_nv)
 663         SET_SIZE(atomic_and_uint)
 664         SET_SIZE(atomic_and_32_nv)
 665         SET_SIZE(atomic_and_32)
 666 
 667         /*
 668          * NOTE: If atomic_and_64 and atomic_and_64_nv are ever
 669          * separated, you need to also edit the libc sparcv9 platform
 670          * specific mapfile and remove the NODYNSORT attribute
 671          * from atomic_and_64_nv.
 672          */
 673         ENTRY(atomic_and_64)
 674         ALTENTRY(atomic_and_64_nv)
 675         ALTENTRY(atomic_and_ulong)
 676         ALTENTRY(atomic_and_ulong_nv)
 677         ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
 678 0:
 679         ldx     [%o0], %o2
 680 1:
 681         and     %o2, %o1, %o3
 682         casx    [%o0], %o2, %o3
 683         cmp     %o2, %o3
 684         ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
 685           mov   %o3, %o2
 686         retl
 687         and     %o2, %o1, %o0           ! return new value
 688 2:
 689         ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, and64, 0b)
 690         SET_SIZE(atomic_and_ulong_nv)
 691         SET_SIZE(atomic_and_ulong)
 692         SET_SIZE(atomic_and_64_nv)
 693         SET_SIZE(atomic_and_64)
 694 
 695         ENTRY(atomic_cas_8)
 696         ALTENTRY(atomic_cas_uchar)
 697         and     %o0, 0x3, %o4           ! %o4 = byte offset, left-to-right
 698         xor     %o4, 0x3, %g1           ! %g1 = byte offset, right-to-left
 699         sll     %g1, 3, %g1             ! %g1 = bit offset, right-to-left
 700         set     0xff, %o3               ! %o3 = mask
 701         sll     %o3, %g1, %o3           ! %o3 = shifted to bit offset
 702         sll     %o1, %g1, %o1           ! %o1 = shifted to bit offset
 703         and     %o1, %o3, %o1           ! %o1 = single byte value
 704         sll     %o2, %g1, %o2           ! %o2 = shifted to bit offset
 705         and     %o2, %o3, %o2           ! %o2 = single byte value
 706         andn    %o0, 0x3, %o0           ! %o0 = word address
 707         ld      [%o0], %o4              ! read old value
 708 1:
 709         andn    %o4, %o3, %o4           ! clear target bits
 710         or      %o4, %o2, %o5           ! insert the new value
 711         or      %o4, %o1, %o4           ! insert the comparison value
 712         cas     [%o0], %o4, %o5
 713         cmp     %o4, %o5                ! did we succeed?
 714         be,pt   %icc, 2f
 715           and   %o5, %o3, %o4           ! isolate the old value
 716         cmp     %o1, %o4                ! should we have succeeded?
 717         be,a,pt %icc, 1b                ! yes, try again
 718           mov   %o5, %o4                ! %o4 = old value
 719 2:
 720         retl
 721         srl     %o4, %g1, %o0           ! %o0 = old value
 722         SET_SIZE(atomic_cas_uchar)
 723         SET_SIZE(atomic_cas_8)
 724 
 725         ENTRY(atomic_cas_16)
 726         ALTENTRY(atomic_cas_ushort)
 727         and     %o0, 0x2, %o4           ! %o4 = byte offset, left-to-right
 728         xor     %o4, 0x2, %g1           ! %g1 = byte offset, right-to-left
 729         sll     %o4, 3, %o4             ! %o4 = bit offset, left-to-right
 730         sll     %g1, 3, %g1             ! %g1 = bit offset, right-to-left
 731         sethi   %hi(0xffff0000), %o3    ! %o3 = mask
 732         srl     %o3, %o4, %o3           ! %o3 = shifted to bit offset
 733         sll     %o1, %g1, %o1           ! %o1 = shifted to bit offset
 734         and     %o1, %o3, %o1           ! %o1 = single short value
 735         sll     %o2, %g1, %o2           ! %o2 = shifted to bit offset
 736         and     %o2, %o3, %o2           ! %o2 = single short value
 737         andn    %o0, 0x2, %o0           ! %o0 = word address
 738         ! if low-order bit is 1, we will properly get an alignment fault here
 739         ld      [%o0], %o4              ! read old value
 740 1:
 741         andn    %o4, %o3, %o4           ! clear target bits
 742         or      %o4, %o2, %o5           ! insert the new value
 743         or      %o4, %o1, %o4           ! insert the comparison value
 744         cas     [%o0], %o4, %o5
 745         cmp     %o4, %o5                ! did we succeed?
 746         be,pt   %icc, 2f
 747           and   %o5, %o3, %o4           ! isolate the old value
 748         cmp     %o1, %o4                ! should we have succeeded?
 749         be,a,pt %icc, 1b                ! yes, try again
 750           mov   %o5, %o4                ! %o4 = old value
 751 2:
 752         retl
 753         srl     %o4, %g1, %o0           ! %o0 = old value
 754         SET_SIZE(atomic_cas_ushort)
 755         SET_SIZE(atomic_cas_16)
 756 
 757         ENTRY(atomic_cas_32)
 758         ALTENTRY(atomic_cas_uint)
 759         cas     [%o0], %o1, %o2
 760         retl
 761         mov     %o2, %o0
 762         SET_SIZE(atomic_cas_uint)
 763         SET_SIZE(atomic_cas_32)
 764 
 765         ENTRY(atomic_cas_64)
 766         ALTENTRY(atomic_cas_ptr)
 767         ALTENTRY(atomic_cas_ulong)
 768         casx    [%o0], %o1, %o2
 769         retl
 770         mov     %o2, %o0
 771         SET_SIZE(atomic_cas_ulong)
 772         SET_SIZE(atomic_cas_ptr)
 773         SET_SIZE(atomic_cas_64)
 774 
 775         ENTRY(atomic_swap_8)
 776         ALTENTRY(atomic_swap_uchar)
 777         and     %o0, 0x3, %o4           ! %o4 = byte offset, left-to-right
 778         xor     %o4, 0x3, %g1           ! %g1 = byte offset, right-to-left
 779         sll     %g1, 3, %g1             ! %g1 = bit offset, right-to-left
 780         set     0xff, %o3               ! %o3 = mask
 781         sll     %o3, %g1, %o3           ! %o3 = shifted to bit offset
 782         sll     %o1, %g1, %o1           ! %o1 = shifted to bit offset
 783         and     %o1, %o3, %o1           ! %o1 = single byte value
 784         andn    %o0, 0x3, %o0           ! %o0 = word address
 785         ld      [%o0], %o2              ! read old value
 786 1:
 787         andn    %o2, %o3, %o5           ! clear target bits
 788         or      %o5, %o1, %o5           ! insert the new value
 789         cas     [%o0], %o2, %o5
 790         cmp     %o2, %o5
 791         bne,a,pn %icc, 1b
 792           mov   %o5, %o2                ! %o2 = old value
 793         and     %o5, %o3, %o5
 794         retl
 795         srl     %o5, %g1, %o0           ! %o0 = old value
 796         SET_SIZE(atomic_swap_uchar)
 797         SET_SIZE(atomic_swap_8)
 798 
 799         ENTRY(atomic_swap_16)
 800         ALTENTRY(atomic_swap_ushort)
 801         and     %o0, 0x2, %o4           ! %o4 = byte offset, left-to-right
 802         xor     %o4, 0x2, %g1           ! %g1 = byte offset, right-to-left
 803         sll     %o4, 3, %o4             ! %o4 = bit offset, left-to-right
 804         sll     %g1, 3, %g1             ! %g1 = bit offset, right-to-left
 805         sethi   %hi(0xffff0000), %o3    ! %o3 = mask
 806         srl     %o3, %o4, %o3           ! %o3 = shifted to bit offset
 807         sll     %o1, %g1, %o1           ! %o1 = shifted to bit offset
 808         and     %o1, %o3, %o1           ! %o1 = single short value
 809         andn    %o0, 0x2, %o0           ! %o0 = word address
 810         ! if low-order bit is 1, we will properly get an alignment fault here
 811         ld      [%o0], %o2              ! read old value
 812 1:
 813         andn    %o2, %o3, %o5           ! clear target bits
 814         or      %o5, %o1, %o5           ! insert the new value
 815         cas     [%o0], %o2, %o5
 816         cmp     %o2, %o5
 817         bne,a,pn %icc, 1b
 818           mov   %o5, %o2                ! %o2 = old value
 819         and     %o5, %o3, %o5
 820         retl
 821         srl     %o5, %g1, %o0           ! %o0 = old value
 822         SET_SIZE(atomic_swap_ushort)
 823         SET_SIZE(atomic_swap_16)
 824 
 825         ENTRY(atomic_swap_32)
 826         ALTENTRY(atomic_swap_uint)
 827         ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
 828 0:
 829         ld      [%o0], %o2
 830 1:
 831         mov     %o1, %o3
 832         cas     [%o0], %o2, %o3
 833         cmp     %o2, %o3
 834         ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
 835           mov   %o3, %o2
 836         retl
 837         mov     %o3, %o0
 838 2:
 839         ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, swap32, 0b)
 840         SET_SIZE(atomic_swap_uint)
 841         SET_SIZE(atomic_swap_32)
 842 
 843         ENTRY(atomic_swap_64)
 844         ALTENTRY(atomic_swap_ptr)
 845         ALTENTRY(atomic_swap_ulong)
 846         ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
 847 0:
 848         ldx     [%o0], %o2
 849 1:
 850         mov     %o1, %o3
 851         casx    [%o0], %o2, %o3
 852         cmp     %o2, %o3
 853         ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
 854           mov   %o3, %o2
 855         retl
 856         mov     %o3, %o0
 857 2:
 858         ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, swap64, 0b)
 859         SET_SIZE(atomic_swap_ulong)
 860         SET_SIZE(atomic_swap_ptr)
 861         SET_SIZE(atomic_swap_64)
 862 
 863         ENTRY(atomic_set_long_excl)
 864         ATOMIC_BACKOFF_INIT(%o5, %g4, %g5)
 865         mov     1, %o3
 866         slln    %o3, %o1, %o3
 867 0:
 868         ldn     [%o0], %o2
 869 1:
 870         andcc   %o2, %o3, %g0           ! test if the bit is set
 871         bnz,a,pn %ncc, 2f               ! if so, then fail out
 872           mov   -1, %o0
 873         or      %o2, %o3, %o4           ! set the bit, and try to commit it
 874         casn    [%o0], %o2, %o4
 875         cmp     %o2, %o4
 876         ATOMIC_BACKOFF_BRANCH(%ncc, 5f, 1b)
 877           mov   %o4, %o2
 878         mov     %g0, %o0
 879 2:
 880         retl
 881         nop
 882 5:
 883         ATOMIC_BACKOFF_BACKOFF(%o5, %g1, %g4, %g5, setlongexcl, 0b)
 884         SET_SIZE(atomic_set_long_excl)
 885 
 886         ENTRY(atomic_clear_long_excl)
 887         ATOMIC_BACKOFF_INIT(%o5, %g4, %g5)
 888         mov     1, %o3
 889         slln    %o3, %o1, %o3
 890 0:
 891         ldn     [%o0], %o2
 892 1:
 893         andncc  %o3, %o2, %g0           ! test if the bit is clear
 894         bnz,a,pn %ncc, 2f               ! if so, then fail out
 895           mov   -1, %o0
 896         andn    %o2, %o3, %o4           ! clear the bit, and try to commit it
 897         casn    [%o0], %o2, %o4
 898         cmp     %o2, %o4
 899         ATOMIC_BACKOFF_BRANCH(%ncc, 5f, 1b)
 900           mov   %o4, %o2
 901         mov     %g0, %o0
 902 2:
 903         retl
 904         nop
 905 5:
 906         ATOMIC_BACKOFF_BACKOFF(%o5, %g1, %g4, %g5, clrlongexcl, 0b)
 907         SET_SIZE(atomic_clear_long_excl)
 908 
 909 #if !defined(_KERNEL)
 910 
 911         /*
 912          * Spitfires and Blackbirds have a problem with membars in the
 913          * delay slot (SF_ERRATA_51).  For safety's sake, we assume
 914          * that the whole world needs the workaround.
 915          */
 916         ENTRY(membar_enter)
 917         membar  #StoreLoad|#StoreStore
 918         retl
 919         nop
 920         SET_SIZE(membar_enter)
 921 
 922         ENTRY(membar_exit)
 923         membar  #LoadStore|#StoreStore
 924         retl
 925         nop
 926         SET_SIZE(membar_exit)
 927 
 928         ENTRY(membar_producer)
 929         membar  #StoreStore
 930         retl
 931         nop
 932         SET_SIZE(membar_producer)
 933 
 934         ENTRY(membar_consumer)
 935         membar  #LoadLoad
 936         retl
 937         nop
 938         SET_SIZE(membar_consumer)
 939 
 940 #endif  /* !_KERNEL */