Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*


 316                         ire_list = ire_unlink(irb);
 317                         rw_exit(&irb->irb_lock);
 318                         ASSERT(ire_list != NULL);
 319                         ire_cleanup(ire_list);
 320                 } else {
 321                         rw_exit(&irb->irb_lock);
 322                 }
 323         }
 324 }
 325 
 326 
 327 /*
 328  * Bump up the reference count on the IRE. We cannot assert that the
 329  * bucket lock is being held as it is legal to bump up the reference
 330  * count after the first lookup has returned the IRE without
 331  * holding the lock.
 332  */
 333 void
 334 ire_refhold(ire_t *ire)
 335 {
 336         atomic_add_32(&(ire)->ire_refcnt, 1);
 337         ASSERT((ire)->ire_refcnt != 0);
 338 #ifdef DEBUG
 339         ire_trace_ref(ire);
 340 #endif
 341 }
 342 
 343 void
 344 ire_refhold_notr(ire_t *ire)
 345 {
 346         atomic_add_32(&(ire)->ire_refcnt, 1);
 347         ASSERT((ire)->ire_refcnt != 0);
 348 }
 349 
 350 void
 351 ire_refhold_locked(ire_t *ire)
 352 {
 353 #ifdef DEBUG
 354         ire_trace_ref(ire);
 355 #endif
 356         ire->ire_refcnt++;
 357 }
 358 
 359 /*
 360  * Release a ref on an IRE.
 361  *
 362  * Must not be called while holding any locks. Otherwise if this is
 363  * the last reference to be released there is a chance of recursive mutex
 364  * panic due to ire_refrele -> ipif_ill_refrele_tail -> qwriter_ip trying
 365  * to restart an ioctl. The one exception is when the caller is sure that
 366  * this is not the last reference to be released. Eg. if the caller is
 367  * sure that the ire has not been deleted and won't be deleted.
 368  *
 369  * In architectures e.g sun4u, where atomic_add_32_nv is just
 370  * a cas, we need to maintain the right memory barrier semantics
 371  * as that of mutex_exit i.e all the loads and stores should complete
 372  * before the cas is executed. membar_exit() does that here.
 373  */
 374 void
 375 ire_refrele(ire_t *ire)
 376 {
 377 #ifdef DEBUG
 378         ire_untrace_ref(ire);
 379 #endif
 380         ASSERT((ire)->ire_refcnt != 0);
 381         membar_exit();
 382         if (atomic_add_32_nv(&(ire)->ire_refcnt, -1) == 0)
 383                 ire_inactive(ire);
 384 }
 385 
 386 void
 387 ire_refrele_notr(ire_t *ire)
 388 {
 389         ASSERT((ire)->ire_refcnt != 0);
 390         membar_exit();
 391         if (atomic_add_32_nv(&(ire)->ire_refcnt, -1) == 0)
 392                 ire_inactive(ire);
 393 }
 394 
 395 /*
 396  * This function is associated with the IP_IOC_IRE_DELETE[_NO_REPLY]
 397  * IOCTL[s].  The NO_REPLY form is used by TCP to tell IP that it is
 398  * having problems reaching a particular destination.
 399  * This will make IP consider alternate routes (e.g., when there are
 400  * muliple default routes), and it will also make IP discard any (potentially)
 401  * stale redirect.
 402  * Management processes may want to use the version that generates a reply.
 403  *
 404  * With the use of NUD like behavior for IPv4/ARP in addition to IPv6
 405  * this function shouldn't be necessary for IP to recover from a bad redirect,
 406  * a bad default router (when there are multiple default routers), or
 407  * a stale ND/ARP entry. But we retain it in any case.
 408  * For instance, this is helpful when TCP suspects a failure before NUD does.
 409  */
 410 int
 411 ip_ire_delete(queue_t *q, mblk_t *mp, cred_t *ioc_cr)


1255                  * in the gateway security attributes; such routes are
1256                  * considered duplicates.
1257                  * To change that we explicitly have to treat them as
1258                  * different here.
1259                  */
1260                 if (ire_match_args(ire1, ire->ire_addr, ire->ire_mask,
1261                     ire->ire_gateway_addr, ire->ire_type, ire->ire_ill,
1262                     ire->ire_zoneid, NULL, match_flags)) {
1263                         /*
1264                          * Return the old ire after doing a REFHOLD.
1265                          * As most of the callers continue to use the IRE
1266                          * after adding, we return a held ire. This will
1267                          * avoid a lookup in the caller again. If the callers
1268                          * don't want to use it, they need to do a REFRELE.
1269                          *
1270                          * We only allow exactly one IRE_IF_CLONE for any dst,
1271                          * so, if the is an IF_CLONE, return the ire without
1272                          * an identical_ref, but with an ire_ref held.
1273                          */
1274                         if (ire->ire_type != IRE_IF_CLONE) {
1275                                 atomic_add_32(&ire1->ire_identical_ref, 1);
1276                                 DTRACE_PROBE2(ire__add__exist, ire_t *, ire1,
1277                                     ire_t *, ire);
1278                         }
1279                         ire_refhold(ire1);
1280                         ire_atomic_end(irb_ptr, ire);
1281                         ire_delete(ire);
1282                         irb_refrele(irb_ptr);
1283                         return (ire1);
1284                 }
1285         }
1286 
1287         /*
1288          * Normally we do head insertion since most things do not care about
1289          * the order of the IREs in the bucket. Note that ip_cgtp_bcast_add
1290          * assumes we at least do head insertion so that its IRE_BROADCAST
1291          * arrive ahead of existing IRE_HOST for the same address.
1292          * However, due to shared-IP zones (and restrict_interzone_loopback)
1293          * we can have an IRE_LOCAL as well as IRE_IF_CLONE for the same
1294          * address. For that reason we do tail insertion for IRE_IF_CLONE.
1295          * Due to the IRE_BROADCAST on cgtp0, which must be last in the bucket,


1516                         parent->ire_ib_pkt_count += ire->ire_ib_pkt_count;
1517                         ire->ire_ob_pkt_count = 0;
1518                         ire->ire_ib_pkt_count = 0;
1519                 }
1520                 rw_exit(&ipst->ips_ire_dep_lock);
1521         }
1522 
1523         rw_enter(&irb->irb_lock, RW_WRITER);
1524         if (ire->ire_ptpn == NULL) {
1525                 /*
1526                  * Some other thread has removed us from the list.
1527                  * It should have done the REFRELE for us.
1528                  */
1529                 rw_exit(&irb->irb_lock);
1530                 return;
1531         }
1532 
1533         if (!IRE_IS_CONDEMNED(ire)) {
1534                 /* Is this an IRE representing multiple duplicate entries? */
1535                 ASSERT(ire->ire_identical_ref >= 1);
1536                 if (atomic_add_32_nv(&ire->ire_identical_ref, -1) != 0) {
1537                         /* Removed one of the identical parties */
1538                         rw_exit(&irb->irb_lock);
1539                         return;
1540                 }
1541 
1542                 irb->irb_ire_cnt--;
1543                 ire_make_condemned(ire);
1544         }
1545 
1546         if (irb->irb_refcnt != 0) {
1547                 /*
1548                  * The last thread to leave this bucket will
1549                  * delete this ire.
1550                  */
1551                 irb->irb_marks |= IRB_MARK_CONDEMNED;
1552                 rw_exit(&irb->irb_lock);
1553                 return;
1554         }
1555 
1556         /*


2601         return (ire_nce_init(ill, addr6, ire_type));
2602 }
2603 
2604 /*
2605  * The caller should hold irb_lock as a writer if the ire is in a bucket.
2606  * This routine will clear ire_nce_cache, and we make sure that we can never
2607  * set ire_nce_cache after the ire is marked condemned.
2608  */
2609 void
2610 ire_make_condemned(ire_t *ire)
2611 {
2612         ip_stack_t      *ipst = ire->ire_ipst;
2613         nce_t           *nce;
2614 
2615         mutex_enter(&ire->ire_lock);
2616         ASSERT(ire->ire_bucket == NULL ||
2617             RW_WRITE_HELD(&ire->ire_bucket->irb_lock));
2618         ASSERT(!IRE_IS_CONDEMNED(ire));
2619         ire->ire_generation = IRE_GENERATION_CONDEMNED;
2620         /* Count how many condemned ires for kmem_cache callback */
2621         atomic_add_32(&ipst->ips_num_ire_condemned, 1);
2622         nce = ire->ire_nce_cache;
2623         ire->ire_nce_cache = NULL;
2624         mutex_exit(&ire->ire_lock);
2625         if (nce != NULL)
2626                 nce_refrele(nce);
2627 }
2628 
2629 /*
2630  * Increment the generation avoiding the special condemned value
2631  */
2632 void
2633 ire_increment_generation(ire_t *ire)
2634 {
2635         uint_t generation;
2636 
2637         mutex_enter(&ire->ire_lock);
2638         /*
2639          * Even though the caller has a hold it can't prevent a concurrent
2640          * ire_delete marking the IRE condemned
2641          */




 316                         ire_list = ire_unlink(irb);
 317                         rw_exit(&irb->irb_lock);
 318                         ASSERT(ire_list != NULL);
 319                         ire_cleanup(ire_list);
 320                 } else {
 321                         rw_exit(&irb->irb_lock);
 322                 }
 323         }
 324 }
 325 
 326 
 327 /*
 328  * Bump up the reference count on the IRE. We cannot assert that the
 329  * bucket lock is being held as it is legal to bump up the reference
 330  * count after the first lookup has returned the IRE without
 331  * holding the lock.
 332  */
 333 void
 334 ire_refhold(ire_t *ire)
 335 {
 336         atomic_inc_32(&(ire)->ire_refcnt);
 337         ASSERT((ire)->ire_refcnt != 0);
 338 #ifdef DEBUG
 339         ire_trace_ref(ire);
 340 #endif
 341 }
 342 
 343 void
 344 ire_refhold_notr(ire_t *ire)
 345 {
 346         atomic_inc_32(&(ire)->ire_refcnt);
 347         ASSERT((ire)->ire_refcnt != 0);
 348 }
 349 
 350 void
 351 ire_refhold_locked(ire_t *ire)
 352 {
 353 #ifdef DEBUG
 354         ire_trace_ref(ire);
 355 #endif
 356         ire->ire_refcnt++;
 357 }
 358 
 359 /*
 360  * Release a ref on an IRE.
 361  *
 362  * Must not be called while holding any locks. Otherwise if this is
 363  * the last reference to be released there is a chance of recursive mutex
 364  * panic due to ire_refrele -> ipif_ill_refrele_tail -> qwriter_ip trying
 365  * to restart an ioctl. The one exception is when the caller is sure that
 366  * this is not the last reference to be released. Eg. if the caller is
 367  * sure that the ire has not been deleted and won't be deleted.
 368  *
 369  * In architectures e.g sun4u, where atomic_add_32_nv is just
 370  * a cas, we need to maintain the right memory barrier semantics
 371  * as that of mutex_exit i.e all the loads and stores should complete
 372  * before the cas is executed. membar_exit() does that here.
 373  */
 374 void
 375 ire_refrele(ire_t *ire)
 376 {
 377 #ifdef DEBUG
 378         ire_untrace_ref(ire);
 379 #endif
 380         ASSERT((ire)->ire_refcnt != 0);
 381         membar_exit();
 382         if (atomic_dec_32_nv(&(ire)->ire_refcnt) == 0)
 383                 ire_inactive(ire);
 384 }
 385 
 386 void
 387 ire_refrele_notr(ire_t *ire)
 388 {
 389         ASSERT((ire)->ire_refcnt != 0);
 390         membar_exit();
 391         if (atomic_dec_32_nv(&(ire)->ire_refcnt) == 0)
 392                 ire_inactive(ire);
 393 }
 394 
 395 /*
 396  * This function is associated with the IP_IOC_IRE_DELETE[_NO_REPLY]
 397  * IOCTL[s].  The NO_REPLY form is used by TCP to tell IP that it is
 398  * having problems reaching a particular destination.
 399  * This will make IP consider alternate routes (e.g., when there are
 400  * muliple default routes), and it will also make IP discard any (potentially)
 401  * stale redirect.
 402  * Management processes may want to use the version that generates a reply.
 403  *
 404  * With the use of NUD like behavior for IPv4/ARP in addition to IPv6
 405  * this function shouldn't be necessary for IP to recover from a bad redirect,
 406  * a bad default router (when there are multiple default routers), or
 407  * a stale ND/ARP entry. But we retain it in any case.
 408  * For instance, this is helpful when TCP suspects a failure before NUD does.
 409  */
 410 int
 411 ip_ire_delete(queue_t *q, mblk_t *mp, cred_t *ioc_cr)


1255                  * in the gateway security attributes; such routes are
1256                  * considered duplicates.
1257                  * To change that we explicitly have to treat them as
1258                  * different here.
1259                  */
1260                 if (ire_match_args(ire1, ire->ire_addr, ire->ire_mask,
1261                     ire->ire_gateway_addr, ire->ire_type, ire->ire_ill,
1262                     ire->ire_zoneid, NULL, match_flags)) {
1263                         /*
1264                          * Return the old ire after doing a REFHOLD.
1265                          * As most of the callers continue to use the IRE
1266                          * after adding, we return a held ire. This will
1267                          * avoid a lookup in the caller again. If the callers
1268                          * don't want to use it, they need to do a REFRELE.
1269                          *
1270                          * We only allow exactly one IRE_IF_CLONE for any dst,
1271                          * so, if the is an IF_CLONE, return the ire without
1272                          * an identical_ref, but with an ire_ref held.
1273                          */
1274                         if (ire->ire_type != IRE_IF_CLONE) {
1275                                 atomic_inc_32(&ire1->ire_identical_ref);
1276                                 DTRACE_PROBE2(ire__add__exist, ire_t *, ire1,
1277                                     ire_t *, ire);
1278                         }
1279                         ire_refhold(ire1);
1280                         ire_atomic_end(irb_ptr, ire);
1281                         ire_delete(ire);
1282                         irb_refrele(irb_ptr);
1283                         return (ire1);
1284                 }
1285         }
1286 
1287         /*
1288          * Normally we do head insertion since most things do not care about
1289          * the order of the IREs in the bucket. Note that ip_cgtp_bcast_add
1290          * assumes we at least do head insertion so that its IRE_BROADCAST
1291          * arrive ahead of existing IRE_HOST for the same address.
1292          * However, due to shared-IP zones (and restrict_interzone_loopback)
1293          * we can have an IRE_LOCAL as well as IRE_IF_CLONE for the same
1294          * address. For that reason we do tail insertion for IRE_IF_CLONE.
1295          * Due to the IRE_BROADCAST on cgtp0, which must be last in the bucket,


1516                         parent->ire_ib_pkt_count += ire->ire_ib_pkt_count;
1517                         ire->ire_ob_pkt_count = 0;
1518                         ire->ire_ib_pkt_count = 0;
1519                 }
1520                 rw_exit(&ipst->ips_ire_dep_lock);
1521         }
1522 
1523         rw_enter(&irb->irb_lock, RW_WRITER);
1524         if (ire->ire_ptpn == NULL) {
1525                 /*
1526                  * Some other thread has removed us from the list.
1527                  * It should have done the REFRELE for us.
1528                  */
1529                 rw_exit(&irb->irb_lock);
1530                 return;
1531         }
1532 
1533         if (!IRE_IS_CONDEMNED(ire)) {
1534                 /* Is this an IRE representing multiple duplicate entries? */
1535                 ASSERT(ire->ire_identical_ref >= 1);
1536                 if (atomic_dec_32_nv(&ire->ire_identical_ref) != 0) {
1537                         /* Removed one of the identical parties */
1538                         rw_exit(&irb->irb_lock);
1539                         return;
1540                 }
1541 
1542                 irb->irb_ire_cnt--;
1543                 ire_make_condemned(ire);
1544         }
1545 
1546         if (irb->irb_refcnt != 0) {
1547                 /*
1548                  * The last thread to leave this bucket will
1549                  * delete this ire.
1550                  */
1551                 irb->irb_marks |= IRB_MARK_CONDEMNED;
1552                 rw_exit(&irb->irb_lock);
1553                 return;
1554         }
1555 
1556         /*


2601         return (ire_nce_init(ill, addr6, ire_type));
2602 }
2603 
2604 /*
2605  * The caller should hold irb_lock as a writer if the ire is in a bucket.
2606  * This routine will clear ire_nce_cache, and we make sure that we can never
2607  * set ire_nce_cache after the ire is marked condemned.
2608  */
2609 void
2610 ire_make_condemned(ire_t *ire)
2611 {
2612         ip_stack_t      *ipst = ire->ire_ipst;
2613         nce_t           *nce;
2614 
2615         mutex_enter(&ire->ire_lock);
2616         ASSERT(ire->ire_bucket == NULL ||
2617             RW_WRITE_HELD(&ire->ire_bucket->irb_lock));
2618         ASSERT(!IRE_IS_CONDEMNED(ire));
2619         ire->ire_generation = IRE_GENERATION_CONDEMNED;
2620         /* Count how many condemned ires for kmem_cache callback */
2621         atomic_inc_32(&ipst->ips_num_ire_condemned);
2622         nce = ire->ire_nce_cache;
2623         ire->ire_nce_cache = NULL;
2624         mutex_exit(&ire->ire_lock);
2625         if (nce != NULL)
2626                 nce_refrele(nce);
2627 }
2628 
2629 /*
2630  * Increment the generation avoiding the special condemned value
2631  */
2632 void
2633 ire_increment_generation(ire_t *ire)
2634 {
2635         uint_t generation;
2636 
2637         mutex_enter(&ire->ire_lock);
2638         /*
2639          * Even though the caller has a hold it can't prevent a concurrent
2640          * ire_delete marking the IRE condemned
2641          */