507 /*
508 * Finally loop back to the start and look for any free bits starting
509 * from the beginning of the bitmap to the current rotor position.
510 */
511 return (bt_availbit(bitmap, nextindex));
512 }
513
514 /*
515 * Dispatch a new error into the queue for later processing. The specified
516 * data buffer is copied into a preallocated queue element. If 'len' is
517 * smaller than the queue element size, the remainder of the queue element is
518 * filled with zeroes. This function may be called from any context subject
519 * to the Platform Considerations described above.
520 */
521 void
522 errorq_dispatch(errorq_t *eqp, const void *data, size_t len, uint_t flag)
523 {
524 errorq_elem_t *eep, *old;
525
526 if (eqp == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) {
527 atomic_add_64(&errorq_lost, 1);
528 return; /* drop error if queue is uninitialized or disabled */
529 }
530
531 for (;;) {
532 int i, rval;
533
534 if ((i = errorq_availbit(eqp->eq_bitmap, eqp->eq_qlen,
535 eqp->eq_rotor)) == -1) {
536 atomic_add_64(&eqp->eq_kstat.eqk_dropped.value.ui64, 1);
537 return;
538 }
539 BT_ATOMIC_SET_EXCL(eqp->eq_bitmap, i, rval);
540 if (rval == 0) {
541 eqp->eq_rotor = i;
542 eep = &eqp->eq_elems[i];
543 break;
544 }
545 }
546
547 ASSERT(len <= eqp->eq_size);
548 bcopy(data, eep->eqe_data, MIN(eqp->eq_size, len));
549
550 if (len < eqp->eq_size)
551 bzero((caddr_t)eep->eqe_data + len, eqp->eq_size - len);
552
553 for (;;) {
554 old = eqp->eq_pend;
555 eep->eqe_prev = old;
556 membar_producer();
557
558 if (atomic_cas_ptr(&eqp->eq_pend, old, eep) == old)
559 break;
560 }
561
562 atomic_add_64(&eqp->eq_kstat.eqk_dispatched.value.ui64, 1);
563
564 if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL)
565 ddi_trigger_softintr(eqp->eq_id);
566 }
567
568 /*
569 * Drain the specified error queue by calling eq_func() for each pending error.
570 * This function must be called at or below LOCK_LEVEL or from panic context.
571 * In order to synchronize with other attempts to drain the queue, we acquire
572 * the adaptive eq_lock, blocking other consumers. Once this lock is held,
573 * we must use compare-and-swap to move the pending list to the processing
574 * list and to return elements to the free pool in order to synchronize
575 * with producers, who do not acquire any locks and only use atomic set/clear.
576 *
577 * An additional constraint on this function is that if the system panics
578 * while this function is running, the panic code must be able to detect and
579 * handle all intermediate states and correctly dequeue all errors. The
580 * errorq_panic() function below will be used for detecting and handling
581 * these intermediate states. The comments in errorq_drain() below explain
582 * how we make sure each intermediate state is distinct and consistent.
850
851 if (errorq_panic_drain(ERRORQ_VITAL) <= errorq_vitalmin)
852 (void) errorq_panic_drain(0);
853 (void) errorq_panic_drain(ERRORQ_VITAL | ERRORQ_NVLIST);
854 (void) errorq_panic_drain(ERRORQ_NVLIST);
855 }
856
857 /*
858 * Reserve an error queue element for later processing and dispatching. The
859 * element is returned to the caller who may add error-specific data to
860 * element. The element is retured to the free pool when either
861 * errorq_commit() is called and the element asynchronously processed
862 * or immediately when errorq_cancel() is called.
863 */
864 errorq_elem_t *
865 errorq_reserve(errorq_t *eqp)
866 {
867 errorq_elem_t *eqep;
868
869 if (eqp == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) {
870 atomic_add_64(&errorq_lost, 1);
871 return (NULL);
872 }
873
874 for (;;) {
875 int i, rval;
876
877 if ((i = errorq_availbit(eqp->eq_bitmap, eqp->eq_qlen,
878 eqp->eq_rotor)) == -1) {
879 atomic_add_64(&eqp->eq_kstat.eqk_dropped.value.ui64, 1);
880 return (NULL);
881 }
882 BT_ATOMIC_SET_EXCL(eqp->eq_bitmap, i, rval);
883 if (rval == 0) {
884 eqp->eq_rotor = i;
885 eqep = &eqp->eq_elems[i];
886 break;
887 }
888 }
889
890 if (eqp->eq_flags & ERRORQ_NVLIST) {
891 errorq_nvelem_t *eqnp = eqep->eqe_data;
892 nv_alloc_reset(eqnp->eqn_nva);
893 eqnp->eqn_nvl = fm_nvlist_create(eqnp->eqn_nva);
894 }
895
896 atomic_add_64(&eqp->eq_kstat.eqk_reserved.value.ui64, 1);
897 return (eqep);
898 }
899
900 /*
901 * Commit an errorq element (eqep) for dispatching.
902 * This function may be called from any context subject
903 * to the Platform Considerations described above.
904 */
905 void
906 errorq_commit(errorq_t *eqp, errorq_elem_t *eqep, uint_t flag)
907 {
908 errorq_elem_t *old;
909
910 if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) {
911 atomic_add_64(&eqp->eq_kstat.eqk_commit_fail.value.ui64, 1);
912 return;
913 }
914
915 for (;;) {
916 old = eqp->eq_pend;
917 eqep->eqe_prev = old;
918 membar_producer();
919
920 if (atomic_cas_ptr(&eqp->eq_pend, old, eqep) == old)
921 break;
922 }
923
924 atomic_add_64(&eqp->eq_kstat.eqk_committed.value.ui64, 1);
925
926 if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL)
927 ddi_trigger_softintr(eqp->eq_id);
928 }
929
930 /*
931 * Cancel an errorq element reservation by returning the specified element
932 * to the free pool. Duplicate or invalid frees are not supported.
933 */
934 void
935 errorq_cancel(errorq_t *eqp, errorq_elem_t *eqep)
936 {
937 if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE))
938 return;
939
940 BT_ATOMIC_CLEAR(eqp->eq_bitmap, eqep - eqp->eq_elems);
941
942 atomic_add_64(&eqp->eq_kstat.eqk_cancelled.value.ui64, 1);
943 }
944
945 /*
946 * Write elements on the dump list of each nvlist errorq to the dump device.
947 * Upon reboot, fmd(1M) will extract and replay them for diagnosis.
948 */
949 void
950 errorq_dump(void)
951 {
952 errorq_elem_t *eep;
953 errorq_t *eqp;
954
955 if (ereport_dumpbuf == NULL)
956 return; /* reboot or panic before errorq is even set up */
957
958 for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) {
959 if (!(eqp->eq_flags & ERRORQ_NVLIST) ||
960 !(eqp->eq_flags & ERRORQ_ACTIVE))
961 continue; /* do not dump this queue on panic */
962
|
507 /*
508 * Finally loop back to the start and look for any free bits starting
509 * from the beginning of the bitmap to the current rotor position.
510 */
511 return (bt_availbit(bitmap, nextindex));
512 }
513
514 /*
515 * Dispatch a new error into the queue for later processing. The specified
516 * data buffer is copied into a preallocated queue element. If 'len' is
517 * smaller than the queue element size, the remainder of the queue element is
518 * filled with zeroes. This function may be called from any context subject
519 * to the Platform Considerations described above.
520 */
521 void
522 errorq_dispatch(errorq_t *eqp, const void *data, size_t len, uint_t flag)
523 {
524 errorq_elem_t *eep, *old;
525
526 if (eqp == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) {
527 atomic_inc_64(&errorq_lost);
528 return; /* drop error if queue is uninitialized or disabled */
529 }
530
531 for (;;) {
532 int i, rval;
533
534 if ((i = errorq_availbit(eqp->eq_bitmap, eqp->eq_qlen,
535 eqp->eq_rotor)) == -1) {
536 atomic_inc_64(&eqp->eq_kstat.eqk_dropped.value.ui64);
537 return;
538 }
539 BT_ATOMIC_SET_EXCL(eqp->eq_bitmap, i, rval);
540 if (rval == 0) {
541 eqp->eq_rotor = i;
542 eep = &eqp->eq_elems[i];
543 break;
544 }
545 }
546
547 ASSERT(len <= eqp->eq_size);
548 bcopy(data, eep->eqe_data, MIN(eqp->eq_size, len));
549
550 if (len < eqp->eq_size)
551 bzero((caddr_t)eep->eqe_data + len, eqp->eq_size - len);
552
553 for (;;) {
554 old = eqp->eq_pend;
555 eep->eqe_prev = old;
556 membar_producer();
557
558 if (atomic_cas_ptr(&eqp->eq_pend, old, eep) == old)
559 break;
560 }
561
562 atomic_inc_64(&eqp->eq_kstat.eqk_dispatched.value.ui64);
563
564 if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL)
565 ddi_trigger_softintr(eqp->eq_id);
566 }
567
568 /*
569 * Drain the specified error queue by calling eq_func() for each pending error.
570 * This function must be called at or below LOCK_LEVEL or from panic context.
571 * In order to synchronize with other attempts to drain the queue, we acquire
572 * the adaptive eq_lock, blocking other consumers. Once this lock is held,
573 * we must use compare-and-swap to move the pending list to the processing
574 * list and to return elements to the free pool in order to synchronize
575 * with producers, who do not acquire any locks and only use atomic set/clear.
576 *
577 * An additional constraint on this function is that if the system panics
578 * while this function is running, the panic code must be able to detect and
579 * handle all intermediate states and correctly dequeue all errors. The
580 * errorq_panic() function below will be used for detecting and handling
581 * these intermediate states. The comments in errorq_drain() below explain
582 * how we make sure each intermediate state is distinct and consistent.
850
851 if (errorq_panic_drain(ERRORQ_VITAL) <= errorq_vitalmin)
852 (void) errorq_panic_drain(0);
853 (void) errorq_panic_drain(ERRORQ_VITAL | ERRORQ_NVLIST);
854 (void) errorq_panic_drain(ERRORQ_NVLIST);
855 }
856
857 /*
858 * Reserve an error queue element for later processing and dispatching. The
859 * element is returned to the caller who may add error-specific data to
860 * element. The element is retured to the free pool when either
861 * errorq_commit() is called and the element asynchronously processed
862 * or immediately when errorq_cancel() is called.
863 */
864 errorq_elem_t *
865 errorq_reserve(errorq_t *eqp)
866 {
867 errorq_elem_t *eqep;
868
869 if (eqp == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) {
870 atomic_inc_64(&errorq_lost);
871 return (NULL);
872 }
873
874 for (;;) {
875 int i, rval;
876
877 if ((i = errorq_availbit(eqp->eq_bitmap, eqp->eq_qlen,
878 eqp->eq_rotor)) == -1) {
879 atomic_inc_64(&eqp->eq_kstat.eqk_dropped.value.ui64);
880 return (NULL);
881 }
882 BT_ATOMIC_SET_EXCL(eqp->eq_bitmap, i, rval);
883 if (rval == 0) {
884 eqp->eq_rotor = i;
885 eqep = &eqp->eq_elems[i];
886 break;
887 }
888 }
889
890 if (eqp->eq_flags & ERRORQ_NVLIST) {
891 errorq_nvelem_t *eqnp = eqep->eqe_data;
892 nv_alloc_reset(eqnp->eqn_nva);
893 eqnp->eqn_nvl = fm_nvlist_create(eqnp->eqn_nva);
894 }
895
896 atomic_inc_64(&eqp->eq_kstat.eqk_reserved.value.ui64);
897 return (eqep);
898 }
899
900 /*
901 * Commit an errorq element (eqep) for dispatching.
902 * This function may be called from any context subject
903 * to the Platform Considerations described above.
904 */
905 void
906 errorq_commit(errorq_t *eqp, errorq_elem_t *eqep, uint_t flag)
907 {
908 errorq_elem_t *old;
909
910 if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) {
911 atomic_inc_64(&eqp->eq_kstat.eqk_commit_fail.value.ui64);
912 return;
913 }
914
915 for (;;) {
916 old = eqp->eq_pend;
917 eqep->eqe_prev = old;
918 membar_producer();
919
920 if (atomic_cas_ptr(&eqp->eq_pend, old, eqep) == old)
921 break;
922 }
923
924 atomic_inc_64(&eqp->eq_kstat.eqk_committed.value.ui64);
925
926 if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL)
927 ddi_trigger_softintr(eqp->eq_id);
928 }
929
930 /*
931 * Cancel an errorq element reservation by returning the specified element
932 * to the free pool. Duplicate or invalid frees are not supported.
933 */
934 void
935 errorq_cancel(errorq_t *eqp, errorq_elem_t *eqep)
936 {
937 if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE))
938 return;
939
940 BT_ATOMIC_CLEAR(eqp->eq_bitmap, eqep - eqp->eq_elems);
941
942 atomic_inc_64(&eqp->eq_kstat.eqk_cancelled.value.ui64);
943 }
944
945 /*
946 * Write elements on the dump list of each nvlist errorq to the dump device.
947 * Upon reboot, fmd(1M) will extract and replay them for diagnosis.
948 */
949 void
950 errorq_dump(void)
951 {
952 errorq_elem_t *eep;
953 errorq_t *eqp;
954
955 if (ereport_dumpbuf == NULL)
956 return; /* reboot or panic before errorq is even set up */
957
958 for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) {
959 if (!(eqp->eq_flags & ERRORQ_NVLIST) ||
960 !(eqp->eq_flags & ERRORQ_ACTIVE))
961 continue; /* do not dump this queue on panic */
962
|