916 LOCKFS_IS_ROELOCK(lockfsp)) && !from_log) {
917 if (ufs_checkaccton(vp) || ufs_checkswapon(vp)) {
918 vfs_unlock(vfsp);
919 return (EDEADLK);
920 }
921 }
922
923 ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
924 ulp = &ufsvfsp->vfs_ulockfs;
925 head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
926 SEARCH_ULOCKFSP(head, ulp, info);
927
928 /*
929 * Suspend both the reclaim thread and the delete thread.
930 * This must be done outside the lockfs locking protocol.
931 */
932 ufs_thread_suspend(&ufsvfsp->vfs_reclaim);
933 ufs_thread_suspend(&ufsvfsp->vfs_delete);
934
935 mutex_enter(&ulp->ul_lock);
936 atomic_add_long(&ufs_quiesce_pend, 1);
937
938 /*
939 * Quit if there is another lockfs request in progress
940 * that is waiting for existing ufs_vnops to complete.
941 */
942 if (ULOCKFS_IS_BUSY(ulp)) {
943 error = EBUSY;
944 goto errexit;
945 }
946
947 /* cannot ulocked or downgrade a hard-lock */
948 if (ULOCKFS_IS_HLOCK(ulp)) {
949 error = EIO;
950 goto errexit;
951 }
952
953 /* an error lock may be unlocked or relocked, only */
954 if (ULOCKFS_IS_ELOCK(ulp)) {
955 if (!LOCKFS_IS_ULOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) {
956 error = EBUSY;
1146 if (lfs.lf_comment && lfs.lf_comlen != 0) {
1147 kmem_free(lfs.lf_comment, lfs.lf_comlen);
1148 lfs.lf_comment = NULL;
1149 lfs.lf_comlen = 0;
1150 }
1151
1152 /* do error lock cleanup */
1153 if (errlck == UN_ERRLCK)
1154 ufsfx_unlockfs(ufsvfsp);
1155
1156 else if (errlck == RE_ERRLCK)
1157 ufsfx_lockfs(ufsvfsp);
1158
1159 /* don't allow error lock from user to invoke panic */
1160 else if (from_user && errlck == SET_ERRLCK &&
1161 !(ufsvfsp->vfs_fsfx.fx_flags & (UFSMNT_ONERROR_PANIC >> 4)))
1162 (void) ufs_fault(ufsvfsp->vfs_root,
1163 ulp->ul_lockfs.lf_comment && ulp->ul_lockfs.lf_comlen > 0 ?
1164 ulp->ul_lockfs.lf_comment: "user-applied error lock");
1165
1166 atomic_add_long(&ufs_quiesce_pend, -1);
1167 mutex_exit(&ulp->ul_lock);
1168 vfs_unlock(vfsp);
1169
1170 if (ULOCKFS_IS_HLOCK(&ufsvfsp->vfs_ulockfs))
1171 poll_events |= POLLERR;
1172
1173 pollwakeup(&ufs_pollhd, poll_events);
1174
1175 /*
1176 * Allow both the delete thread and the reclaim thread to
1177 * continue.
1178 */
1179 ufs_thread_continue(&ufsvfsp->vfs_delete);
1180 ufs_thread_continue(&ufsvfsp->vfs_reclaim);
1181
1182 return (0);
1183
1184 errout:
1185 /*
1186 * Lock failed. Reset the old lock in ufsvfs if not hard locked.
1187 */
1188 if (!LOCKFS_IS_HLOCK(&ulp->ul_lockfs)) {
1189 bcopy(&lfs, &ulp->ul_lockfs, sizeof (struct lockfs));
1190 ulp->ul_fs_lock = (1 << lfs.lf_lock);
1191 }
1192
1193 /*
1194 * Don't call ufs_thaw() when there's a signal during
1195 * ufs quiesce operation as it can lead to deadlock
1196 * with getpage.
1197 */
1198 if (signal == 0)
1199 (void) ufs_thaw(vfsp, ufsvfsp, ulp);
1200
1201 ULOCKFS_CLR_BUSY(ulp);
1202 LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
1203
1204 errexit:
1205 atomic_add_long(&ufs_quiesce_pend, -1);
1206 mutex_exit(&ulp->ul_lock);
1207 vfs_unlock(vfsp);
1208
1209 /*
1210 * Allow both the delete thread and the reclaim thread to
1211 * continue.
1212 */
1213 ufs_thread_continue(&ufsvfsp->vfs_delete);
1214 ufs_thread_continue(&ufsvfsp->vfs_reclaim);
1215
1216 return (error);
1217 }
1218
1219 /*
1220 * fiolfss
1221 * return the current file system locking state info
1222 */
1223 int
1224 ufs_fiolfss(struct vnode *vp, struct lockfs *lockfsp)
1225 {
1282 if (ULOCKFS_IS_HLOCK(ulp) ||
1283 (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
1284 return (EIO);
1285
1286 /*
1287 * wait for lock status to change
1288 */
1289 if (slock || ufsvfsp->vfs_nointr) {
1290 cv_wait(&ulp->ul_cv, &ulp->ul_lock);
1291 } else {
1292 sigintr(&smask, 1);
1293 sig = cv_wait_sig(&ulp->ul_cv, &ulp->ul_lock);
1294 sigunintr(&smask);
1295 if ((!sig && (ulp->ul_fs_lock & mask)) ||
1296 ufsvfsp->vfs_dontblock)
1297 return (EINTR);
1298 }
1299 }
1300
1301 if (mask & ULOCKFS_FWLOCK) {
1302 atomic_add_long(&ulp->ul_falloc_cnt, 1);
1303 ULOCKFS_SET_FALLOC(ulp);
1304 } else {
1305 atomic_add_long(&ulp->ul_vnops_cnt, 1);
1306 }
1307
1308 return (0);
1309 }
1310
1311 /*
1312 * Check whether we came across the handcrafted lockfs protocol path. We can't
1313 * simply check for T_DONTBLOCK here as one would assume since this can also
1314 * falsely catch recursive VOP's going to a different filesystem, instead we
1315 * check if we already hold the ulockfs->ul_lock mutex.
1316 */
1317 static int
1318 ufs_lockfs_is_under_rawlockfs(struct ulockfs *ulp)
1319 {
1320 return ((mutex_owner(&ulp->ul_lock) != curthread) ? 0 : 1);
1321 }
1322
1323 /*
1324 * ufs_lockfs_begin - start the lockfs locking protocol
1325 */
1363 KM_NOSLEEP)) == NULL) {
1364 *ulpp = NULL;
1365 return (ENOMEM);
1366 }
1367 }
1368 }
1369
1370 /*
1371 * First time VOP call
1372 *
1373 * Increment the ctr irrespective of the lockfs state. If the lockfs
1374 * state is not ULOCKFS_ULOCK, we can decrement it later. However,
1375 * before incrementing we need to check if there is a pending quiesce
1376 * request because if we have a continuous stream of ufs_lockfs_begin
1377 * requests pounding on a few cpu's then the ufs_quiesce thread might
1378 * never see the value of zero for ctr - a livelock kind of scenario.
1379 */
1380 ctr = (mask & ULOCKFS_FWLOCK) ?
1381 &ulp->ul_falloc_cnt : &ulp->ul_vnops_cnt;
1382 if (!ULOCKFS_IS_SLOCK(ulp)) {
1383 atomic_add_long(ctr, 1);
1384 op_cnt_incremented++;
1385 }
1386
1387 /*
1388 * If the lockfs state (indicated by ul_fs_lock) is not just
1389 * ULOCKFS_ULOCK, then we will be routed through ufs_check_lockfs
1390 * where there is a check with an appropriate mask to selectively allow
1391 * operations permitted for that kind of lockfs state.
1392 *
1393 * Even these selective operations should not be allowed to go through
1394 * if a lockfs request is in progress because that could result in inode
1395 * modifications during a quiesce and could hence result in inode
1396 * reconciliation failures. ULOCKFS_SLOCK alone would not be sufficient,
1397 * so make use of ufs_quiesce_pend to disallow vnode operations when a
1398 * quiesce is in progress.
1399 */
1400 if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
1401 if (op_cnt_incremented)
1402 if (!atomic_add_long_nv(ctr, -1))
1403 cv_broadcast(&ulp->ul_cv);
1404 mutex_enter(&ulp->ul_lock);
1405 error = ufs_check_lockfs(ufsvfsp, ulp, mask);
1406 mutex_exit(&ulp->ul_lock);
1407 if (error) {
1408 if (ulockfs_info_free == NULL)
1409 kmem_free(ulockfs_info_temp,
1410 sizeof (ulockfs_info_t));
1411 return (error);
1412 }
1413 } else {
1414 /*
1415 * This is the common case of file system in a unlocked state.
1416 *
1417 * If a file system is unlocked, we would expect the ctr to have
1418 * been incremented by now. But this will not be true when a
1419 * quiesce is winding up - SLOCK was set when we checked before
1420 * incrementing the ctr, but by the time we checked for
1421 * ULOCKFS_IS_JUSTULOCK, the quiesce thread was gone. It is okay
1422 * to take ul_lock and go through the slow path in this uncommon
1497 SEARCH_ULOCKFSP(head, ulp, info);
1498
1499 /*
1500 * If we're called from a first level VOP, we have to have a
1501 * valid ulockfs record in the TSD.
1502 */
1503 ASSERT(info != NULL);
1504
1505 /*
1506 * Invalidate the ulockfs record.
1507 */
1508 info->ulp = NULL;
1509
1510 if (ufs_lockfs_top_vop_return(head))
1511 curthread->t_flag &= ~T_DONTBLOCK;
1512
1513 /* fallocate thread */
1514 if (ULOCKFS_IS_FALLOC(ulp) && info->flags & ULOCK_INFO_FALLOCATE) {
1515 /* Clear the thread's fallocate state */
1516 info->flags &= ~ULOCK_INFO_FALLOCATE;
1517 if (!atomic_add_long_nv(&ulp->ul_falloc_cnt, -1)) {
1518 mutex_enter(&ulp->ul_lock);
1519 ULOCKFS_CLR_FALLOC(ulp);
1520 cv_broadcast(&ulp->ul_cv);
1521 mutex_exit(&ulp->ul_lock);
1522 }
1523 } else { /* normal thread */
1524 if (!atomic_add_long_nv(&ulp->ul_vnops_cnt, -1))
1525 cv_broadcast(&ulp->ul_cv);
1526 }
1527 }
1528
1529 /*
1530 * ufs_lockfs_trybegin - try to start the lockfs locking protocol without
1531 * blocking.
1532 */
1533 int
1534 ufs_lockfs_trybegin(struct ufsvfs *ufsvfsp, struct ulockfs **ulpp, ulong_t mask)
1535 {
1536 int error = 0;
1537 int rec_vop;
1538 ushort_t op_cnt_incremented = 0;
1539 ulong_t *ctr;
1540 struct ulockfs *ulp;
1541 ulockfs_info_t *ulockfs_info;
1542 ulockfs_info_t *ulockfs_info_free;
1543 ulockfs_info_t *ulockfs_info_temp;
1544
1570 KM_NOSLEEP)) == NULL) {
1571 *ulpp = NULL;
1572 return (ENOMEM);
1573 }
1574 }
1575 }
1576
1577 /*
1578 * First time VOP call
1579 *
1580 * Increment the ctr irrespective of the lockfs state. If the lockfs
1581 * state is not ULOCKFS_ULOCK, we can decrement it later. However,
1582 * before incrementing we need to check if there is a pending quiesce
1583 * request because if we have a continuous stream of ufs_lockfs_begin
1584 * requests pounding on a few cpu's then the ufs_quiesce thread might
1585 * never see the value of zero for ctr - a livelock kind of scenario.
1586 */
1587 ctr = (mask & ULOCKFS_FWLOCK) ?
1588 &ulp->ul_falloc_cnt : &ulp->ul_vnops_cnt;
1589 if (!ULOCKFS_IS_SLOCK(ulp)) {
1590 atomic_add_long(ctr, 1);
1591 op_cnt_incremented++;
1592 }
1593
1594 if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
1595 /*
1596 * Non-blocking version of ufs_check_lockfs() code.
1597 *
1598 * If the file system is not hard locked or error locked
1599 * and if ulp->ul_fs_lock allows this operation, increment
1600 * the appropriate counter and proceed (For eg., In case the
1601 * file system is delete locked, a mmap can still go through).
1602 */
1603 if (op_cnt_incremented)
1604 if (!atomic_add_long_nv(ctr, -1))
1605 cv_broadcast(&ulp->ul_cv);
1606 mutex_enter(&ulp->ul_lock);
1607 if (ULOCKFS_IS_HLOCK(ulp) ||
1608 (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
1609 error = EIO;
1610 else if (ulp->ul_fs_lock & mask)
1611 error = EAGAIN;
1612
1613 if (error) {
1614 mutex_exit(&ulp->ul_lock);
1615 if (ulockfs_info_free == NULL)
1616 kmem_free(ulockfs_info_temp,
1617 sizeof (ulockfs_info_t));
1618 return (error);
1619 }
1620 atomic_add_long(ctr, 1);
1621 if (mask & ULOCKFS_FWLOCK)
1622 ULOCKFS_SET_FALLOC(ulp);
1623 mutex_exit(&ulp->ul_lock);
1624 } else {
1625 /*
1626 * This is the common case of file system in a unlocked state.
1627 *
1628 * If a file system is unlocked, we would expect the ctr to have
1629 * been incremented by now. But this will not be true when a
1630 * quiesce is winding up - SLOCK was set when we checked before
1631 * incrementing the ctr, but by the time we checked for
1632 * ULOCKFS_IS_JUSTULOCK, the quiesce thread was gone. Take
1633 * ul_lock and go through the non-blocking version of
1634 * ufs_check_lockfs() code.
1635 */
1636 if (op_cnt_incremented == 0) {
1637 mutex_enter(&ulp->ul_lock);
1638 if (ULOCKFS_IS_HLOCK(ulp) ||
1639 (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
1640 error = EIO;
1641 else if (ulp->ul_fs_lock & mask)
1642 error = EAGAIN;
1643
1644 if (error) {
1645 mutex_exit(&ulp->ul_lock);
1646 if (ulockfs_info_free == NULL)
1647 kmem_free(ulockfs_info_temp,
1648 sizeof (ulockfs_info_t));
1649 return (error);
1650 }
1651 atomic_add_long(ctr, 1);
1652 if (mask & ULOCKFS_FWLOCK)
1653 ULOCKFS_SET_FALLOC(ulp);
1654 mutex_exit(&ulp->ul_lock);
1655 } else if (mask & ULOCKFS_FWLOCK) {
1656 mutex_enter(&ulp->ul_lock);
1657 ULOCKFS_SET_FALLOC(ulp);
1658 mutex_exit(&ulp->ul_lock);
1659 }
1660 }
1661
1662 if (ulockfs_info_free != NULL) {
1663 ulockfs_info_free->ulp = ulp;
1664 if (mask & ULOCKFS_FWLOCK)
1665 ulockfs_info_free->flags |= ULOCK_INFO_FALLOCATE;
1666 } else {
1667 ulockfs_info_temp->ulp = ulp;
1668 ulockfs_info_temp->next = ulockfs_info;
1669 if (mask & ULOCKFS_FWLOCK)
1670 ulockfs_info_temp->flags |= ULOCK_INFO_FALLOCATE;
1671 ASSERT(ufs_lockfs_key != 0);
1713 * Detect recursive VOP call or handcrafted internal lockfs protocol
1714 * path and bail out in that case.
1715 */
1716 if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
1717 *ulpp = NULL;
1718 return (0);
1719 } else {
1720 if (ulockfs_info_free == NULL) {
1721 if ((ulockfs_info_temp = (ulockfs_info_t *)
1722 kmem_zalloc(sizeof (ulockfs_info_t),
1723 KM_NOSLEEP)) == NULL) {
1724 *ulpp = NULL;
1725 return (ENOMEM);
1726 }
1727 }
1728 }
1729
1730 /*
1731 * First time VOP call
1732 */
1733 atomic_add_long(&ulp->ul_vnops_cnt, 1);
1734 if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
1735 if (!atomic_add_long_nv(&ulp->ul_vnops_cnt, -1))
1736 cv_broadcast(&ulp->ul_cv);
1737 mutex_enter(&ulp->ul_lock);
1738 if (seg->s_ops == &segvn_ops &&
1739 ((struct segvn_data *)seg->s_data)->type != MAP_SHARED) {
1740 mask = (ulong_t)ULOCKFS_GETREAD_MASK;
1741 } else if (protp && read_access) {
1742 /*
1743 * Restrict the mapping to readonly.
1744 * Writes to this mapping will cause
1745 * another fault which will then
1746 * be suspended if fs is write locked
1747 */
1748 *protp &= ~PROT_WRITE;
1749 mask = (ulong_t)ULOCKFS_GETREAD_MASK;
1750 } else
1751 mask = (ulong_t)ULOCKFS_GETWRITE_MASK;
1752
1753 /*
1754 * will sleep if this fs is locked against this VOP
1755 */
|
916 LOCKFS_IS_ROELOCK(lockfsp)) && !from_log) {
917 if (ufs_checkaccton(vp) || ufs_checkswapon(vp)) {
918 vfs_unlock(vfsp);
919 return (EDEADLK);
920 }
921 }
922
923 ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
924 ulp = &ufsvfsp->vfs_ulockfs;
925 head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
926 SEARCH_ULOCKFSP(head, ulp, info);
927
928 /*
929 * Suspend both the reclaim thread and the delete thread.
930 * This must be done outside the lockfs locking protocol.
931 */
932 ufs_thread_suspend(&ufsvfsp->vfs_reclaim);
933 ufs_thread_suspend(&ufsvfsp->vfs_delete);
934
935 mutex_enter(&ulp->ul_lock);
936 atomic_inc_ulong(&ufs_quiesce_pend);
937
938 /*
939 * Quit if there is another lockfs request in progress
940 * that is waiting for existing ufs_vnops to complete.
941 */
942 if (ULOCKFS_IS_BUSY(ulp)) {
943 error = EBUSY;
944 goto errexit;
945 }
946
947 /* cannot ulocked or downgrade a hard-lock */
948 if (ULOCKFS_IS_HLOCK(ulp)) {
949 error = EIO;
950 goto errexit;
951 }
952
953 /* an error lock may be unlocked or relocked, only */
954 if (ULOCKFS_IS_ELOCK(ulp)) {
955 if (!LOCKFS_IS_ULOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) {
956 error = EBUSY;
1146 if (lfs.lf_comment && lfs.lf_comlen != 0) {
1147 kmem_free(lfs.lf_comment, lfs.lf_comlen);
1148 lfs.lf_comment = NULL;
1149 lfs.lf_comlen = 0;
1150 }
1151
1152 /* do error lock cleanup */
1153 if (errlck == UN_ERRLCK)
1154 ufsfx_unlockfs(ufsvfsp);
1155
1156 else if (errlck == RE_ERRLCK)
1157 ufsfx_lockfs(ufsvfsp);
1158
1159 /* don't allow error lock from user to invoke panic */
1160 else if (from_user && errlck == SET_ERRLCK &&
1161 !(ufsvfsp->vfs_fsfx.fx_flags & (UFSMNT_ONERROR_PANIC >> 4)))
1162 (void) ufs_fault(ufsvfsp->vfs_root,
1163 ulp->ul_lockfs.lf_comment && ulp->ul_lockfs.lf_comlen > 0 ?
1164 ulp->ul_lockfs.lf_comment: "user-applied error lock");
1165
1166 atomic_dec_ulong(&ufs_quiesce_pend);
1167 mutex_exit(&ulp->ul_lock);
1168 vfs_unlock(vfsp);
1169
1170 if (ULOCKFS_IS_HLOCK(&ufsvfsp->vfs_ulockfs))
1171 poll_events |= POLLERR;
1172
1173 pollwakeup(&ufs_pollhd, poll_events);
1174
1175 /*
1176 * Allow both the delete thread and the reclaim thread to
1177 * continue.
1178 */
1179 ufs_thread_continue(&ufsvfsp->vfs_delete);
1180 ufs_thread_continue(&ufsvfsp->vfs_reclaim);
1181
1182 return (0);
1183
1184 errout:
1185 /*
1186 * Lock failed. Reset the old lock in ufsvfs if not hard locked.
1187 */
1188 if (!LOCKFS_IS_HLOCK(&ulp->ul_lockfs)) {
1189 bcopy(&lfs, &ulp->ul_lockfs, sizeof (struct lockfs));
1190 ulp->ul_fs_lock = (1 << lfs.lf_lock);
1191 }
1192
1193 /*
1194 * Don't call ufs_thaw() when there's a signal during
1195 * ufs quiesce operation as it can lead to deadlock
1196 * with getpage.
1197 */
1198 if (signal == 0)
1199 (void) ufs_thaw(vfsp, ufsvfsp, ulp);
1200
1201 ULOCKFS_CLR_BUSY(ulp);
1202 LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
1203
1204 errexit:
1205 atomic_dec_ulong(&ufs_quiesce_pend);
1206 mutex_exit(&ulp->ul_lock);
1207 vfs_unlock(vfsp);
1208
1209 /*
1210 * Allow both the delete thread and the reclaim thread to
1211 * continue.
1212 */
1213 ufs_thread_continue(&ufsvfsp->vfs_delete);
1214 ufs_thread_continue(&ufsvfsp->vfs_reclaim);
1215
1216 return (error);
1217 }
1218
1219 /*
1220 * fiolfss
1221 * return the current file system locking state info
1222 */
1223 int
1224 ufs_fiolfss(struct vnode *vp, struct lockfs *lockfsp)
1225 {
1282 if (ULOCKFS_IS_HLOCK(ulp) ||
1283 (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
1284 return (EIO);
1285
1286 /*
1287 * wait for lock status to change
1288 */
1289 if (slock || ufsvfsp->vfs_nointr) {
1290 cv_wait(&ulp->ul_cv, &ulp->ul_lock);
1291 } else {
1292 sigintr(&smask, 1);
1293 sig = cv_wait_sig(&ulp->ul_cv, &ulp->ul_lock);
1294 sigunintr(&smask);
1295 if ((!sig && (ulp->ul_fs_lock & mask)) ||
1296 ufsvfsp->vfs_dontblock)
1297 return (EINTR);
1298 }
1299 }
1300
1301 if (mask & ULOCKFS_FWLOCK) {
1302 atomic_inc_ulong(&ulp->ul_falloc_cnt);
1303 ULOCKFS_SET_FALLOC(ulp);
1304 } else {
1305 atomic_inc_ulong(&ulp->ul_vnops_cnt);
1306 }
1307
1308 return (0);
1309 }
1310
1311 /*
1312 * Check whether we came across the handcrafted lockfs protocol path. We can't
1313 * simply check for T_DONTBLOCK here as one would assume since this can also
1314 * falsely catch recursive VOP's going to a different filesystem, instead we
1315 * check if we already hold the ulockfs->ul_lock mutex.
1316 */
1317 static int
1318 ufs_lockfs_is_under_rawlockfs(struct ulockfs *ulp)
1319 {
1320 return ((mutex_owner(&ulp->ul_lock) != curthread) ? 0 : 1);
1321 }
1322
1323 /*
1324 * ufs_lockfs_begin - start the lockfs locking protocol
1325 */
1363 KM_NOSLEEP)) == NULL) {
1364 *ulpp = NULL;
1365 return (ENOMEM);
1366 }
1367 }
1368 }
1369
1370 /*
1371 * First time VOP call
1372 *
1373 * Increment the ctr irrespective of the lockfs state. If the lockfs
1374 * state is not ULOCKFS_ULOCK, we can decrement it later. However,
1375 * before incrementing we need to check if there is a pending quiesce
1376 * request because if we have a continuous stream of ufs_lockfs_begin
1377 * requests pounding on a few cpu's then the ufs_quiesce thread might
1378 * never see the value of zero for ctr - a livelock kind of scenario.
1379 */
1380 ctr = (mask & ULOCKFS_FWLOCK) ?
1381 &ulp->ul_falloc_cnt : &ulp->ul_vnops_cnt;
1382 if (!ULOCKFS_IS_SLOCK(ulp)) {
1383 atomic_inc_ulong(ctr);
1384 op_cnt_incremented++;
1385 }
1386
1387 /*
1388 * If the lockfs state (indicated by ul_fs_lock) is not just
1389 * ULOCKFS_ULOCK, then we will be routed through ufs_check_lockfs
1390 * where there is a check with an appropriate mask to selectively allow
1391 * operations permitted for that kind of lockfs state.
1392 *
1393 * Even these selective operations should not be allowed to go through
1394 * if a lockfs request is in progress because that could result in inode
1395 * modifications during a quiesce and could hence result in inode
1396 * reconciliation failures. ULOCKFS_SLOCK alone would not be sufficient,
1397 * so make use of ufs_quiesce_pend to disallow vnode operations when a
1398 * quiesce is in progress.
1399 */
1400 if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
1401 if (op_cnt_incremented)
1402 if (!atomic_dec_ulong_nv(ctr))
1403 cv_broadcast(&ulp->ul_cv);
1404 mutex_enter(&ulp->ul_lock);
1405 error = ufs_check_lockfs(ufsvfsp, ulp, mask);
1406 mutex_exit(&ulp->ul_lock);
1407 if (error) {
1408 if (ulockfs_info_free == NULL)
1409 kmem_free(ulockfs_info_temp,
1410 sizeof (ulockfs_info_t));
1411 return (error);
1412 }
1413 } else {
1414 /*
1415 * This is the common case of file system in a unlocked state.
1416 *
1417 * If a file system is unlocked, we would expect the ctr to have
1418 * been incremented by now. But this will not be true when a
1419 * quiesce is winding up - SLOCK was set when we checked before
1420 * incrementing the ctr, but by the time we checked for
1421 * ULOCKFS_IS_JUSTULOCK, the quiesce thread was gone. It is okay
1422 * to take ul_lock and go through the slow path in this uncommon
1497 SEARCH_ULOCKFSP(head, ulp, info);
1498
1499 /*
1500 * If we're called from a first level VOP, we have to have a
1501 * valid ulockfs record in the TSD.
1502 */
1503 ASSERT(info != NULL);
1504
1505 /*
1506 * Invalidate the ulockfs record.
1507 */
1508 info->ulp = NULL;
1509
1510 if (ufs_lockfs_top_vop_return(head))
1511 curthread->t_flag &= ~T_DONTBLOCK;
1512
1513 /* fallocate thread */
1514 if (ULOCKFS_IS_FALLOC(ulp) && info->flags & ULOCK_INFO_FALLOCATE) {
1515 /* Clear the thread's fallocate state */
1516 info->flags &= ~ULOCK_INFO_FALLOCATE;
1517 if (!atomic_dec_ulong_nv(&ulp->ul_falloc_cnt)) {
1518 mutex_enter(&ulp->ul_lock);
1519 ULOCKFS_CLR_FALLOC(ulp);
1520 cv_broadcast(&ulp->ul_cv);
1521 mutex_exit(&ulp->ul_lock);
1522 }
1523 } else { /* normal thread */
1524 if (!atomic_dec_ulong_nv(&ulp->ul_vnops_cnt))
1525 cv_broadcast(&ulp->ul_cv);
1526 }
1527 }
1528
1529 /*
1530 * ufs_lockfs_trybegin - try to start the lockfs locking protocol without
1531 * blocking.
1532 */
1533 int
1534 ufs_lockfs_trybegin(struct ufsvfs *ufsvfsp, struct ulockfs **ulpp, ulong_t mask)
1535 {
1536 int error = 0;
1537 int rec_vop;
1538 ushort_t op_cnt_incremented = 0;
1539 ulong_t *ctr;
1540 struct ulockfs *ulp;
1541 ulockfs_info_t *ulockfs_info;
1542 ulockfs_info_t *ulockfs_info_free;
1543 ulockfs_info_t *ulockfs_info_temp;
1544
1570 KM_NOSLEEP)) == NULL) {
1571 *ulpp = NULL;
1572 return (ENOMEM);
1573 }
1574 }
1575 }
1576
1577 /*
1578 * First time VOP call
1579 *
1580 * Increment the ctr irrespective of the lockfs state. If the lockfs
1581 * state is not ULOCKFS_ULOCK, we can decrement it later. However,
1582 * before incrementing we need to check if there is a pending quiesce
1583 * request because if we have a continuous stream of ufs_lockfs_begin
1584 * requests pounding on a few cpu's then the ufs_quiesce thread might
1585 * never see the value of zero for ctr - a livelock kind of scenario.
1586 */
1587 ctr = (mask & ULOCKFS_FWLOCK) ?
1588 &ulp->ul_falloc_cnt : &ulp->ul_vnops_cnt;
1589 if (!ULOCKFS_IS_SLOCK(ulp)) {
1590 atomic_inc_ulong(ctr);
1591 op_cnt_incremented++;
1592 }
1593
1594 if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
1595 /*
1596 * Non-blocking version of ufs_check_lockfs() code.
1597 *
1598 * If the file system is not hard locked or error locked
1599 * and if ulp->ul_fs_lock allows this operation, increment
1600 * the appropriate counter and proceed (For eg., In case the
1601 * file system is delete locked, a mmap can still go through).
1602 */
1603 if (op_cnt_incremented)
1604 if (!atomic_dec_ulong_nv(ctr))
1605 cv_broadcast(&ulp->ul_cv);
1606 mutex_enter(&ulp->ul_lock);
1607 if (ULOCKFS_IS_HLOCK(ulp) ||
1608 (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
1609 error = EIO;
1610 else if (ulp->ul_fs_lock & mask)
1611 error = EAGAIN;
1612
1613 if (error) {
1614 mutex_exit(&ulp->ul_lock);
1615 if (ulockfs_info_free == NULL)
1616 kmem_free(ulockfs_info_temp,
1617 sizeof (ulockfs_info_t));
1618 return (error);
1619 }
1620 atomic_inc_ulong(ctr);
1621 if (mask & ULOCKFS_FWLOCK)
1622 ULOCKFS_SET_FALLOC(ulp);
1623 mutex_exit(&ulp->ul_lock);
1624 } else {
1625 /*
1626 * This is the common case of file system in a unlocked state.
1627 *
1628 * If a file system is unlocked, we would expect the ctr to have
1629 * been incremented by now. But this will not be true when a
1630 * quiesce is winding up - SLOCK was set when we checked before
1631 * incrementing the ctr, but by the time we checked for
1632 * ULOCKFS_IS_JUSTULOCK, the quiesce thread was gone. Take
1633 * ul_lock and go through the non-blocking version of
1634 * ufs_check_lockfs() code.
1635 */
1636 if (op_cnt_incremented == 0) {
1637 mutex_enter(&ulp->ul_lock);
1638 if (ULOCKFS_IS_HLOCK(ulp) ||
1639 (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
1640 error = EIO;
1641 else if (ulp->ul_fs_lock & mask)
1642 error = EAGAIN;
1643
1644 if (error) {
1645 mutex_exit(&ulp->ul_lock);
1646 if (ulockfs_info_free == NULL)
1647 kmem_free(ulockfs_info_temp,
1648 sizeof (ulockfs_info_t));
1649 return (error);
1650 }
1651 atomic_inc_ulong(ctr);
1652 if (mask & ULOCKFS_FWLOCK)
1653 ULOCKFS_SET_FALLOC(ulp);
1654 mutex_exit(&ulp->ul_lock);
1655 } else if (mask & ULOCKFS_FWLOCK) {
1656 mutex_enter(&ulp->ul_lock);
1657 ULOCKFS_SET_FALLOC(ulp);
1658 mutex_exit(&ulp->ul_lock);
1659 }
1660 }
1661
1662 if (ulockfs_info_free != NULL) {
1663 ulockfs_info_free->ulp = ulp;
1664 if (mask & ULOCKFS_FWLOCK)
1665 ulockfs_info_free->flags |= ULOCK_INFO_FALLOCATE;
1666 } else {
1667 ulockfs_info_temp->ulp = ulp;
1668 ulockfs_info_temp->next = ulockfs_info;
1669 if (mask & ULOCKFS_FWLOCK)
1670 ulockfs_info_temp->flags |= ULOCK_INFO_FALLOCATE;
1671 ASSERT(ufs_lockfs_key != 0);
1713 * Detect recursive VOP call or handcrafted internal lockfs protocol
1714 * path and bail out in that case.
1715 */
1716 if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
1717 *ulpp = NULL;
1718 return (0);
1719 } else {
1720 if (ulockfs_info_free == NULL) {
1721 if ((ulockfs_info_temp = (ulockfs_info_t *)
1722 kmem_zalloc(sizeof (ulockfs_info_t),
1723 KM_NOSLEEP)) == NULL) {
1724 *ulpp = NULL;
1725 return (ENOMEM);
1726 }
1727 }
1728 }
1729
1730 /*
1731 * First time VOP call
1732 */
1733 atomic_inc_ulong(&ulp->ul_vnops_cnt);
1734 if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
1735 if (!atomic_dec_ulong_nv(&ulp->ul_vnops_cnt))
1736 cv_broadcast(&ulp->ul_cv);
1737 mutex_enter(&ulp->ul_lock);
1738 if (seg->s_ops == &segvn_ops &&
1739 ((struct segvn_data *)seg->s_data)->type != MAP_SHARED) {
1740 mask = (ulong_t)ULOCKFS_GETREAD_MASK;
1741 } else if (protp && read_access) {
1742 /*
1743 * Restrict the mapping to readonly.
1744 * Writes to this mapping will cause
1745 * another fault which will then
1746 * be suspended if fs is write locked
1747 */
1748 *protp &= ~PROT_WRITE;
1749 mask = (ulong_t)ULOCKFS_GETREAD_MASK;
1750 } else
1751 mask = (ulong_t)ULOCKFS_GETWRITE_MASK;
1752
1753 /*
1754 * will sleep if this fs is locked against this VOP
1755 */
|