Print this page
XXXX introduce drv_sectohz
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/devcache.c
+++ new/usr/src/uts/common/os/devcache.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 #include <sys/note.h>
27 27 #include <sys/t_lock.h>
28 28 #include <sys/cmn_err.h>
29 29 #include <sys/instance.h>
30 30 #include <sys/conf.h>
31 31 #include <sys/stat.h>
32 32 #include <sys/ddi.h>
33 33 #include <sys/hwconf.h>
34 34 #include <sys/sunddi.h>
35 35 #include <sys/sunndi.h>
36 36 #include <sys/ddi_impldefs.h>
37 37 #include <sys/ndi_impldefs.h>
38 38 #include <sys/modctl.h>
39 39 #include <sys/dacf.h>
40 40 #include <sys/promif.h>
41 41 #include <sys/cpuvar.h>
42 42 #include <sys/pathname.h>
43 43 #include <sys/kobj.h>
44 44 #include <sys/devcache.h>
45 45 #include <sys/devcache_impl.h>
46 46 #include <sys/sysmacros.h>
47 47 #include <sys/varargs.h>
48 48 #include <sys/callb.h>
49 49
50 50 /*
51 51 * This facility provides interfaces to clients to register,
52 52 * read and update cache data in persisted backing store files,
53 53 * usually in /etc/devices. The data persisted through this
54 54 * mechanism should be stateless data, functioning in the sense
55 55 * of a cache. Writes are performed by a background daemon
56 56 * thread, permitting a client to schedule an update without
57 57 * blocking, then continue updating the data state in
58 58 * parallel. The data is only locked by the daemon thread
59 59 * to pack the data in preparation for the write.
60 60 *
61 61 * Data persisted through this mechanism should be capable
62 62 * of being regenerated through normal system operation,
63 63 * for example attaching all disk devices would cause all
64 64 * devids to be registered for those devices. By caching
65 65 * a devid-device tuple, the system can operate in a
66 66 * more optimal way, directly attaching the device mapped
67 67 * to a devid, rather than burdensomely driving attach of
68 68 * the entire device tree to discover a single device.
69 69 *
70 70 * Note that a client should only need to include
71 71 * <sys/devcache.h> for the supported interfaces.
72 72 *
73 73 * The data per client is entirely within the control of
74 74 * the client. When reading, data unpacked from the backing
75 75 * store should be inserted in the list. The pointer to
76 76 * the list can be retrieved via nvf_list(). When writing,
77 77 * the data on the list is to be packed and returned to the
78 78 * nvpdaemon as an nvlist.
79 79 *
80 80 * Obvious restrictions are imposed by the limits of the
81 81 * nvlist format. The data cannot be read or written
82 82 * piecemeal, and large amounts of data aren't recommended.
83 83 * However, nvlists do allow that data be named and typed
84 84 * and can be size-of-int invariant, and the cached data
85 85 * can be versioned conveniently.
86 86 *
87 87 * The registration involves two steps: a handle is
88 88 * allocated by calling the registration function.
89 89 * This sets up the data referenced by the handle and
90 90 * initializes the lock. Following registration, the
91 91 * client must initialize the data list. The list
92 92 * interfaces require that the list element with offset
93 93 * to the node link be provided. The format of the
94 94 * list element is under the control of the client.
95 95 *
96 96 * Locking: the address of the data list r/w lock provided
97 97 * can be accessed with nvf_lock(). The lock must be held
98 98 * as reader when traversing the list or checking state,
99 99 * such as nvf_is_dirty(). The lock must be held as
100 100 * writer when updating the list or marking it dirty.
101 101 * The lock must not be held when waking the daemon.
102 102 *
103 103 * The data r/w lock is held as writer when the pack,
104 104 * unpack and free list handlers are called. The
105 105 * lock should not be dropped and must be still held
106 106 * upon return. The client should also hold the lock
107 107 * as reader when checking if the list is dirty, and
108 108 * as writer when marking the list dirty or initiating
109 109 * a read.
110 110 *
111 111 * The asynchronous nature of updates allows for the
112 112 * possibility that the data may continue to be updated
113 113 * once the daemon has been notified that an update is
114 114 * desired. The data only needs to be locked against
115 115 * updates when packing the data into the form to be
116 116 * written. When the write of the packed data has
117 117 * completed, the daemon will automatically reschedule
118 118 * an update if the data was marked dirty after the
119 119 * point at which it was packed. Before beginning an
120 120 * update, the daemon attempts to lock the data as
121 121 * writer; if the writer lock is already held, it
122 122 * backs off and retries later. The model is to give
123 123 * priority to the kernel processes generating the
124 124 * data, and that the nature of the data is that
125 125 * it does not change often, can be re-generated when
126 126 * needed, so updates should not happen often and
127 127 * can be delayed until the data stops changing.
128 128 * The client may update the list or mark it dirty
129 129 * any time it is able to acquire the lock as
130 130 * writer first.
131 131 *
132 132 * A failed write will be retried after some delay,
133 133 * in the hope that the cause of the error will be
134 134 * transient, for example a filesystem with no space
135 135 * available. An update on a read-only filesystem
136 136 * is failed silently and not retried; this would be
137 137 * the case when booted off install media.
138 138 *
139 139 * There is no unregister mechanism as of yet, as it
140 140 * hasn't been needed so far.
141 141 */
142 142
143 143 /*
144 144 * Global list of files registered and updated by the nvpflush
145 145 * daemon, protected by the nvf_cache_mutex. While an
146 146 * update is taking place, a file is temporarily moved to
147 147 * the dirty list to avoid locking the primary list for
148 148 * the duration of the update.
149 149 */
150 150 list_t nvf_cache_files;
151 151 list_t nvf_dirty_files;
152 152 kmutex_t nvf_cache_mutex;
153 153
154 154
155 155 /*
156 156 * Allow some delay from an update of the data before flushing
157 157 * to permit simultaneous updates of multiple changes.
158 158 * Changes in the data are expected to be bursty, ie
159 159 * reconfig or hot-plug of a new adapter.
160 160 *
161 161 * kfio_report_error (default 0)
162 162 * Set to 1 to enable some error messages related to low-level
163 163 * kernel file i/o operations.
164 164 *
165 165 * nvpflush_delay (default 10)
166 166 * The number of seconds after data is marked dirty before the
167 167 * flush daemon is triggered to flush the data. A longer period
168 168 * of time permits more data updates per write. Note that
↓ open down ↓ |
168 lines elided |
↑ open up ↑ |
169 169 * every update resets the timer so no repository write will
170 170 * occur while data is being updated continuously.
171 171 *
172 172 * nvpdaemon_idle_time (default 60)
173 173 * The number of seconds the daemon will sleep idle before exiting.
174 174 *
175 175 */
176 176 #define NVPFLUSH_DELAY 10
177 177 #define NVPDAEMON_IDLE_TIME 60
178 178
179 -#define TICKS_PER_SECOND (drv_usectohz(1000000))
179 +#define TICKS_PER_SECOND drv_sectohz(1)
180 180
181 181 /*
182 182 * Tunables
183 183 */
184 184 int kfio_report_error = 0; /* kernel file i/o operations */
185 185 int kfio_disable_read = 0; /* disable all reads */
186 186 int kfio_disable_write = 0; /* disable all writes */
187 187
188 188 int nvpflush_delay = NVPFLUSH_DELAY;
189 189 int nvpdaemon_idle_time = NVPDAEMON_IDLE_TIME;
190 190
191 191 static timeout_id_t nvpflush_id = 0;
192 192 static int nvpflush_timer_busy = 0;
193 193 static int nvpflush_daemon_active = 0;
194 194 static kthread_t *nvpflush_thr_id = 0;
195 195
196 196 static int do_nvpflush = 0;
197 197 static int nvpbusy = 0;
198 198 static kmutex_t nvpflush_lock;
199 199 static kcondvar_t nvpflush_cv;
200 200 static kthread_id_t nvpflush_thread;
201 201 static clock_t nvpticks;
202 202
203 203 static void nvpflush_daemon(void);
204 204
205 205 #ifdef DEBUG
206 206 int nvpdaemon_debug = 0;
207 207 int kfio_debug = 0;
208 208 #endif /* DEBUG */
209 209
210 210 extern int modrootloaded;
211 211 extern void mdi_read_devices_files(void);
212 212 extern void mdi_clean_vhcache(void);
213 213 extern int sys_shutdown;
214 214
215 215 /*
216 216 * Initialize the overall cache file management
217 217 */
218 218 void
219 219 i_ddi_devices_init(void)
220 220 {
221 221 list_create(&nvf_cache_files, sizeof (nvfd_t),
222 222 offsetof(nvfd_t, nvf_link));
223 223 list_create(&nvf_dirty_files, sizeof (nvfd_t),
224 224 offsetof(nvfd_t, nvf_link));
225 225 mutex_init(&nvf_cache_mutex, NULL, MUTEX_DEFAULT, NULL);
226 226 retire_store_init();
227 227 devid_cache_init();
228 228 }
229 229
230 230 /*
231 231 * Read cache files
232 232 * The files read here should be restricted to those
233 233 * that may be required to mount root.
234 234 */
235 235 void
236 236 i_ddi_read_devices_files(void)
237 237 {
238 238 /*
239 239 * The retire store should be the first file read as it
240 240 * may need to offline devices. kfio_disable_read is not
241 241 * used for retire. For the rationale see the tunable
242 242 * ddi_retire_store_bypass and comments in:
243 243 * uts/common/os/retire_store.c
244 244 */
245 245
246 246 retire_store_read();
247 247
248 248 if (!kfio_disable_read) {
249 249 mdi_read_devices_files();
250 250 devid_cache_read();
251 251 }
252 252 }
253 253
254 254 void
255 255 i_ddi_start_flush_daemon(void)
256 256 {
257 257 nvfd_t *nvfdp;
258 258
259 259 ASSERT(i_ddi_io_initialized());
260 260
261 261 mutex_init(&nvpflush_lock, NULL, MUTEX_DRIVER, NULL);
262 262 cv_init(&nvpflush_cv, NULL, CV_DRIVER, NULL);
263 263
264 264 mutex_enter(&nvf_cache_mutex);
265 265 for (nvfdp = list_head(&nvf_cache_files); nvfdp;
266 266 nvfdp = list_next(&nvf_cache_files, nvfdp)) {
267 267 if (NVF_IS_DIRTY(nvfdp)) {
268 268 nvf_wake_daemon();
269 269 break;
270 270 }
271 271 }
272 272 mutex_exit(&nvf_cache_mutex);
273 273 }
274 274
275 275 void
276 276 i_ddi_clean_devices_files(void)
277 277 {
278 278 devid_cache_cleanup();
279 279 mdi_clean_vhcache();
280 280 }
281 281
282 282 /*
283 283 * Register a cache file to be managed and updated by the nvpflush daemon.
284 284 * All operations are performed through the returned handle.
285 285 * There is no unregister mechanism for now.
286 286 */
287 287 nvf_handle_t
288 288 nvf_register_file(nvf_ops_t *ops)
289 289 {
290 290 nvfd_t *nvfdp;
291 291
292 292 nvfdp = kmem_zalloc(sizeof (*nvfdp), KM_SLEEP);
293 293
294 294 nvfdp->nvf_ops = ops;
295 295 nvfdp->nvf_flags = 0;
296 296 rw_init(&nvfdp->nvf_lock, NULL, RW_DRIVER, NULL);
297 297
298 298 mutex_enter(&nvf_cache_mutex);
299 299 list_insert_tail(&nvf_cache_files, nvfdp);
300 300 mutex_exit(&nvf_cache_mutex);
301 301
302 302 return ((nvf_handle_t)nvfdp);
303 303 }
304 304
305 305 /*PRINTFLIKE1*/
306 306 void
307 307 nvf_error(const char *fmt, ...)
308 308 {
309 309 va_list ap;
310 310
311 311 if (kfio_report_error) {
312 312 va_start(ap, fmt);
313 313 vcmn_err(CE_NOTE, fmt, ap);
314 314 va_end(ap);
315 315 }
316 316 }
317 317
318 318 /*
319 319 * Some operations clients may use to manage the data
320 320 * to be persisted in a cache file.
321 321 */
322 322 char *
323 323 nvf_cache_name(nvf_handle_t handle)
324 324 {
325 325 return (((nvfd_t *)handle)->nvf_cache_path);
326 326 }
327 327
328 328 krwlock_t *
329 329 nvf_lock(nvf_handle_t handle)
330 330 {
331 331 return (&(((nvfd_t *)handle)->nvf_lock));
332 332 }
333 333
334 334 list_t *
335 335 nvf_list(nvf_handle_t handle)
336 336 {
337 337 return (&(((nvfd_t *)handle)->nvf_data_list));
338 338 }
339 339
340 340 void
341 341 nvf_mark_dirty(nvf_handle_t handle)
342 342 {
343 343 ASSERT(RW_WRITE_HELD(&(((nvfd_t *)handle)->nvf_lock)));
344 344 NVF_MARK_DIRTY((nvfd_t *)handle);
345 345 }
346 346
347 347 int
348 348 nvf_is_dirty(nvf_handle_t handle)
349 349 {
350 350 ASSERT(RW_LOCK_HELD(&(((nvfd_t *)handle)->nvf_lock)));
351 351 return (NVF_IS_DIRTY((nvfd_t *)handle));
352 352 }
353 353
354 354 static uint16_t
355 355 nvp_cksum(uchar_t *buf, int64_t buflen)
356 356 {
357 357 uint16_t cksum = 0;
358 358 uint16_t *p = (uint16_t *)buf;
359 359 int64_t n;
360 360
361 361 if ((buflen & 0x01) != 0) {
362 362 buflen--;
363 363 cksum = buf[buflen];
364 364 }
365 365 n = buflen / 2;
366 366 while (n-- > 0)
367 367 cksum ^= *p++;
368 368 return (cksum);
369 369 }
370 370
371 371 int
372 372 fread_nvlist(char *filename, nvlist_t **ret_nvlist)
373 373 {
374 374 struct _buf *file;
375 375 nvpf_hdr_t hdr;
376 376 char *buf;
377 377 nvlist_t *nvl;
378 378 int rval;
379 379 uint_t offset;
380 380 int n;
381 381 char c;
382 382 uint16_t cksum, hdrsum;
383 383
384 384 *ret_nvlist = NULL;
385 385
386 386 file = kobj_open_file(filename);
387 387 if (file == (struct _buf *)-1) {
388 388 KFDEBUG((CE_CONT, "cannot open file: %s\n", filename));
389 389 return (ENOENT);
390 390 }
391 391
392 392 offset = 0;
393 393 n = kobj_read_file(file, (char *)&hdr, sizeof (hdr), offset);
394 394 if (n != sizeof (hdr)) {
395 395 kobj_close_file(file);
396 396 if (n < 0) {
397 397 nvf_error("error reading header: %s\n", filename);
398 398 return (EIO);
399 399 } else if (n == 0) {
400 400 KFDEBUG((CE_CONT, "file empty: %s\n", filename));
401 401 } else {
402 402 nvf_error("header size incorrect: %s\n", filename);
403 403 }
404 404 return (EINVAL);
405 405 }
406 406 offset += n;
407 407
408 408 KFDEBUG2((CE_CONT, "nvpf_magic: 0x%x\n", hdr.nvpf_magic));
409 409 KFDEBUG2((CE_CONT, "nvpf_version: %d\n", hdr.nvpf_version));
410 410 KFDEBUG2((CE_CONT, "nvpf_size: %lld\n",
411 411 (longlong_t)hdr.nvpf_size));
412 412 KFDEBUG2((CE_CONT, "nvpf_hdr_chksum: 0x%x\n",
413 413 hdr.nvpf_hdr_chksum));
414 414 KFDEBUG2((CE_CONT, "nvpf_chksum: 0x%x\n", hdr.nvpf_chksum));
415 415
416 416 cksum = hdr.nvpf_hdr_chksum;
417 417 hdr.nvpf_hdr_chksum = 0;
418 418 hdrsum = nvp_cksum((uchar_t *)&hdr, sizeof (hdr));
419 419
420 420 if (hdr.nvpf_magic != NVPF_HDR_MAGIC ||
421 421 hdr.nvpf_version != NVPF_HDR_VERSION || hdrsum != cksum) {
422 422 kobj_close_file(file);
423 423 if (hdrsum != cksum) {
424 424 nvf_error("%s: checksum error "
425 425 "(actual 0x%x, expected 0x%x)\n",
426 426 filename, hdrsum, cksum);
427 427 }
428 428 nvf_error("%s: header information incorrect", filename);
429 429 return (EINVAL);
430 430 }
431 431
432 432 ASSERT(hdr.nvpf_size >= 0);
433 433
434 434 buf = kmem_alloc(hdr.nvpf_size, KM_SLEEP);
435 435 n = kobj_read_file(file, buf, hdr.nvpf_size, offset);
436 436 if (n != hdr.nvpf_size) {
437 437 kmem_free(buf, hdr.nvpf_size);
438 438 kobj_close_file(file);
439 439 if (n < 0) {
440 440 nvf_error("%s: read error %d", filename, n);
441 441 } else {
442 442 nvf_error("%s: incomplete read %d/%lld",
443 443 filename, n, (longlong_t)hdr.nvpf_size);
444 444 }
445 445 return (EINVAL);
446 446 }
447 447 offset += n;
448 448
449 449 rval = kobj_read_file(file, &c, 1, offset);
450 450 kobj_close_file(file);
451 451 if (rval > 0) {
452 452 nvf_error("%s is larger than %lld\n",
453 453 filename, (longlong_t)hdr.nvpf_size);
454 454 kmem_free(buf, hdr.nvpf_size);
455 455 return (EINVAL);
456 456 }
457 457
458 458 cksum = nvp_cksum((uchar_t *)buf, hdr.nvpf_size);
459 459 if (hdr.nvpf_chksum != cksum) {
460 460 nvf_error("%s: checksum error (actual 0x%x, expected 0x%x)\n",
461 461 filename, hdr.nvpf_chksum, cksum);
462 462 kmem_free(buf, hdr.nvpf_size);
463 463 return (EINVAL);
464 464 }
465 465
466 466 nvl = NULL;
467 467 rval = nvlist_unpack(buf, hdr.nvpf_size, &nvl, 0);
468 468 if (rval != 0) {
469 469 nvf_error("%s: error %d unpacking nvlist\n",
470 470 filename, rval);
471 471 kmem_free(buf, hdr.nvpf_size);
472 472 return (EINVAL);
473 473 }
474 474
475 475 kmem_free(buf, hdr.nvpf_size);
476 476 *ret_nvlist = nvl;
477 477 return (0);
478 478 }
479 479
480 480 static int
481 481 kfcreate(char *filename, kfile_t **kfilep)
482 482 {
483 483 kfile_t *fp;
484 484 int rval;
485 485
486 486 ASSERT(modrootloaded);
487 487
488 488 fp = kmem_alloc(sizeof (kfile_t), KM_SLEEP);
489 489
490 490 fp->kf_vnflags = FCREAT | FWRITE | FTRUNC;
491 491 fp->kf_fname = filename;
492 492 fp->kf_fpos = 0;
493 493 fp->kf_state = 0;
494 494
495 495 KFDEBUG((CE_CONT, "create: %s flags 0x%x\n",
496 496 filename, fp->kf_vnflags));
497 497 rval = vn_open(filename, UIO_SYSSPACE, fp->kf_vnflags,
498 498 0444, &fp->kf_vp, CRCREAT, 0);
499 499 if (rval != 0) {
500 500 kmem_free(fp, sizeof (kfile_t));
501 501 KFDEBUG((CE_CONT, "%s: create error %d\n",
502 502 filename, rval));
503 503 return (rval);
504 504 }
505 505
506 506 *kfilep = fp;
507 507 return (0);
508 508 }
509 509
510 510 static int
511 511 kfremove(char *filename)
512 512 {
513 513 int rval;
514 514
515 515 KFDEBUG((CE_CONT, "remove: %s\n", filename));
516 516 rval = vn_remove(filename, UIO_SYSSPACE, RMFILE);
517 517 if (rval != 0) {
518 518 KFDEBUG((CE_CONT, "%s: remove error %d\n",
519 519 filename, rval));
520 520 }
521 521 return (rval);
522 522 }
523 523
524 524 static int
525 525 kfread(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n)
526 526 {
527 527 ssize_t resid;
528 528 int err;
529 529 ssize_t n;
530 530
531 531 ASSERT(modrootloaded);
532 532
533 533 if (fp->kf_state != 0)
534 534 return (fp->kf_state);
535 535
536 536 err = vn_rdwr(UIO_READ, fp->kf_vp, buf, bufsiz, fp->kf_fpos,
537 537 UIO_SYSSPACE, 0, (rlim64_t)0, kcred, &resid);
538 538 if (err != 0) {
539 539 KFDEBUG((CE_CONT, "%s: read error %d\n",
540 540 fp->kf_fname, err));
541 541 fp->kf_state = err;
542 542 return (err);
543 543 }
544 544
545 545 ASSERT(resid >= 0 && resid <= bufsiz);
546 546 n = bufsiz - resid;
547 547
548 548 KFDEBUG1((CE_CONT, "%s: read %ld bytes ok %ld bufsiz, %ld resid\n",
549 549 fp->kf_fname, n, bufsiz, resid));
550 550
551 551 fp->kf_fpos += n;
552 552 *ret_n = n;
553 553 return (0);
554 554 }
555 555
556 556 static int
557 557 kfwrite(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n)
558 558 {
559 559 rlim64_t rlimit;
560 560 ssize_t resid;
561 561 int err;
562 562 ssize_t len;
563 563 ssize_t n = 0;
564 564
565 565 ASSERT(modrootloaded);
566 566
567 567 if (fp->kf_state != 0)
568 568 return (fp->kf_state);
569 569
570 570 len = bufsiz;
571 571 rlimit = bufsiz + 1;
572 572 for (;;) {
573 573 err = vn_rdwr(UIO_WRITE, fp->kf_vp, buf, len, fp->kf_fpos,
574 574 UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid);
575 575 if (err) {
576 576 KFDEBUG((CE_CONT, "%s: write error %d\n",
577 577 fp->kf_fname, err));
578 578 fp->kf_state = err;
579 579 return (err);
580 580 }
581 581
582 582 KFDEBUG1((CE_CONT, "%s: write %ld bytes ok %ld resid\n",
583 583 fp->kf_fname, len-resid, resid));
584 584
585 585 ASSERT(resid >= 0 && resid <= len);
586 586
587 587 n += (len - resid);
588 588 if (resid == 0)
589 589 break;
590 590
591 591 if (resid == len) {
592 592 KFDEBUG((CE_CONT, "%s: filesystem full?\n",
593 593 fp->kf_fname));
594 594 fp->kf_state = ENOSPC;
595 595 return (ENOSPC);
596 596 }
597 597
598 598 len -= resid;
599 599 buf += len;
600 600 fp->kf_fpos += len;
601 601 len = resid;
602 602 }
603 603
604 604 ASSERT(n == bufsiz);
605 605 KFDEBUG1((CE_CONT, "%s: wrote %ld bytes ok\n", fp->kf_fname, n));
606 606
607 607 *ret_n = n;
608 608 return (0);
609 609 }
610 610
611 611
612 612 static int
613 613 kfclose(kfile_t *fp)
614 614 {
615 615 int rval;
616 616
617 617 KFDEBUG((CE_CONT, "close: %s\n", fp->kf_fname));
618 618
619 619 if ((fp->kf_vnflags & FWRITE) && fp->kf_state == 0) {
620 620 rval = VOP_FSYNC(fp->kf_vp, FSYNC, kcred, NULL);
621 621 if (rval != 0) {
622 622 nvf_error("%s: sync error %d\n",
623 623 fp->kf_fname, rval);
624 624 }
625 625 KFDEBUG((CE_CONT, "%s: sync ok\n", fp->kf_fname));
626 626 }
627 627
628 628 rval = VOP_CLOSE(fp->kf_vp, fp->kf_vnflags, 1,
629 629 (offset_t)0, kcred, NULL);
630 630 if (rval != 0) {
631 631 if (fp->kf_state == 0) {
632 632 nvf_error("%s: close error %d\n",
633 633 fp->kf_fname, rval);
634 634 }
635 635 } else {
636 636 if (fp->kf_state == 0)
637 637 KFDEBUG((CE_CONT, "%s: close ok\n", fp->kf_fname));
638 638 }
639 639
640 640 VN_RELE(fp->kf_vp);
641 641 kmem_free(fp, sizeof (kfile_t));
642 642 return (rval);
643 643 }
644 644
645 645 static int
646 646 kfrename(char *oldname, char *newname)
647 647 {
648 648 int rval;
649 649
650 650 ASSERT(modrootloaded);
651 651
652 652 KFDEBUG((CE_CONT, "renaming %s to %s\n", oldname, newname));
653 653
654 654 if ((rval = vn_rename(oldname, newname, UIO_SYSSPACE)) != 0) {
655 655 KFDEBUG((CE_CONT, "rename %s to %s: %d\n",
656 656 oldname, newname, rval));
657 657 }
658 658
659 659 return (rval);
660 660 }
661 661
662 662 int
663 663 fwrite_nvlist(char *filename, nvlist_t *nvl)
664 664 {
665 665 char *buf;
666 666 char *nvbuf;
667 667 kfile_t *fp;
668 668 char *newname;
669 669 int len, err, err1;
670 670 size_t buflen;
671 671 ssize_t n;
672 672
673 673 ASSERT(modrootloaded);
674 674
675 675 nvbuf = NULL;
676 676 err = nvlist_pack(nvl, &nvbuf, &buflen, NV_ENCODE_NATIVE, 0);
677 677 if (err != 0) {
678 678 nvf_error("%s: error %d packing nvlist\n",
679 679 filename, err);
680 680 return (err);
681 681 }
682 682
683 683 buf = kmem_alloc(sizeof (nvpf_hdr_t) + buflen, KM_SLEEP);
684 684 bzero(buf, sizeof (nvpf_hdr_t));
685 685
686 686 ((nvpf_hdr_t *)buf)->nvpf_magic = NVPF_HDR_MAGIC;
687 687 ((nvpf_hdr_t *)buf)->nvpf_version = NVPF_HDR_VERSION;
688 688 ((nvpf_hdr_t *)buf)->nvpf_size = buflen;
689 689 ((nvpf_hdr_t *)buf)->nvpf_chksum = nvp_cksum((uchar_t *)nvbuf, buflen);
690 690 ((nvpf_hdr_t *)buf)->nvpf_hdr_chksum =
691 691 nvp_cksum((uchar_t *)buf, sizeof (nvpf_hdr_t));
692 692
693 693 bcopy(nvbuf, buf + sizeof (nvpf_hdr_t), buflen);
694 694 kmem_free(nvbuf, buflen);
695 695 buflen += sizeof (nvpf_hdr_t);
696 696
697 697 len = strlen(filename) + MAX_SUFFIX_LEN + 2;
698 698 newname = kmem_alloc(len, KM_SLEEP);
699 699
700 700
701 701 (void) sprintf(newname, "%s.%s", filename, NEW_FILENAME_SUFFIX);
702 702
703 703 /*
704 704 * To make it unlikely we suffer data loss, write
705 705 * data to the new temporary file. Once successful
706 706 * complete the transaction by renaming the new file
707 707 * to replace the previous.
708 708 */
709 709
710 710 if ((err = kfcreate(newname, &fp)) == 0) {
711 711 err = kfwrite(fp, buf, buflen, &n);
712 712 if (err) {
713 713 nvf_error("%s: write error - %d\n",
714 714 newname, err);
715 715 } else {
716 716 if (n != buflen) {
717 717 nvf_error(
718 718 "%s: partial write %ld of %ld bytes\n",
719 719 newname, n, buflen);
720 720 nvf_error("%s: filesystem may be full?\n",
721 721 newname);
722 722 err = EIO;
723 723 }
724 724 }
725 725 if ((err1 = kfclose(fp)) != 0) {
726 726 nvf_error("%s: close error\n", newname);
727 727 if (err == 0)
728 728 err = err1;
729 729 }
730 730 if (err != 0) {
731 731 if (kfremove(newname) != 0) {
732 732 nvf_error("%s: remove failed\n",
733 733 newname);
734 734 }
735 735 }
736 736 } else {
737 737 nvf_error("%s: create failed - %d\n", filename, err);
738 738 }
739 739
740 740 if (err == 0) {
741 741 if ((err = kfrename(newname, filename)) != 0) {
742 742 nvf_error("%s: rename from %s failed\n",
743 743 newname, filename);
744 744 }
745 745 }
746 746
747 747 kmem_free(newname, len);
748 748 kmem_free(buf, buflen);
749 749
750 750 return (err);
751 751 }
752 752
753 753 static int
754 754 e_fwrite_nvlist(nvfd_t *nvfd, nvlist_t *nvl)
755 755 {
756 756 int err;
757 757
758 758 if ((err = fwrite_nvlist(nvfd->nvf_cache_path, nvl)) == 0)
759 759 return (DDI_SUCCESS);
760 760 else {
761 761 if (err == EROFS)
762 762 NVF_MARK_READONLY(nvfd);
763 763 return (DDI_FAILURE);
764 764 }
765 765 }
766 766
767 767 static void
768 768 nvp_list_free(nvfd_t *nvf)
769 769 {
770 770 ASSERT(RW_WRITE_HELD(&nvf->nvf_lock));
771 771 (nvf->nvf_list_free)((nvf_handle_t)nvf);
772 772 ASSERT(RW_WRITE_HELD(&nvf->nvf_lock));
773 773 }
774 774
775 775 /*
776 776 * Read a file in the nvlist format
777 777 * EIO - i/o error during read
778 778 * ENOENT - file not found
779 779 * EINVAL - file contents corrupted
780 780 */
781 781 static int
782 782 fread_nvp_list(nvfd_t *nvfd)
783 783 {
784 784 nvlist_t *nvl;
785 785 nvpair_t *nvp;
786 786 char *name;
787 787 nvlist_t *sublist;
788 788 int rval;
789 789 int rv;
790 790
791 791 ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock)));
792 792
793 793 rval = fread_nvlist(nvfd->nvf_cache_path, &nvl);
794 794 if (rval != 0)
795 795 return (rval);
796 796 ASSERT(nvl != NULL);
797 797
798 798 nvp = NULL;
799 799 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
800 800 name = nvpair_name(nvp);
801 801 ASSERT(strlen(name) > 0);
802 802
803 803 switch (nvpair_type(nvp)) {
804 804 case DATA_TYPE_NVLIST:
805 805 rval = nvpair_value_nvlist(nvp, &sublist);
806 806 if (rval != 0) {
807 807 nvf_error(
808 808 "nvpair_value_nvlist error %s %d\n",
809 809 name, rval);
810 810 goto error;
811 811 }
812 812
813 813 /*
814 814 * unpack nvlist for this device and
815 815 * add elements to data list.
816 816 */
817 817 ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock)));
818 818 rv = (nvfd->nvf_unpack_nvlist)
819 819 ((nvf_handle_t)nvfd, sublist, name);
820 820 ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock)));
821 821 if (rv != 0) {
822 822 nvf_error(
823 823 "%s: %s invalid list element\n",
824 824 nvfd->nvf_cache_path, name);
825 825 rval = EINVAL;
826 826 goto error;
827 827 }
828 828 break;
829 829
830 830 default:
831 831 nvf_error("%s: %s unsupported data type %d\n",
832 832 nvfd->nvf_cache_path, name, nvpair_type(nvp));
833 833 rval = EINVAL;
834 834 goto error;
835 835 }
836 836 }
837 837
838 838 nvlist_free(nvl);
839 839
840 840 return (0);
841 841
842 842 error:
843 843 nvlist_free(nvl);
844 844 nvp_list_free(nvfd);
845 845 return (rval);
846 846 }
847 847
848 848
849 849 int
850 850 nvf_read_file(nvf_handle_t nvf_handle)
851 851 {
852 852 nvfd_t *nvfd = (nvfd_t *)nvf_handle;
853 853 int rval;
854 854
855 855 ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock));
856 856
857 857 if (kfio_disable_read)
858 858 return (0);
859 859
860 860 KFDEBUG((CE_CONT, "reading %s\n", nvfd->nvf_cache_path));
861 861
862 862 rval = fread_nvp_list(nvfd);
863 863 if (rval) {
864 864 switch (rval) {
865 865 case EIO:
866 866 nvfd->nvf_flags |= NVF_F_REBUILD_MSG;
867 867 cmn_err(CE_WARN, "%s: I/O error",
868 868 nvfd->nvf_cache_path);
869 869 break;
870 870 case ENOENT:
871 871 nvfd->nvf_flags |= NVF_F_CREATE_MSG;
872 872 nvf_error("%s: not found\n",
873 873 nvfd->nvf_cache_path);
874 874 break;
875 875 case EINVAL:
876 876 default:
877 877 nvfd->nvf_flags |= NVF_F_REBUILD_MSG;
878 878 cmn_err(CE_WARN, "%s: data file corrupted",
879 879 nvfd->nvf_cache_path);
880 880 break;
881 881 }
882 882 }
883 883 return (rval);
884 884 }
885 885
886 886 static void
887 887 nvf_write_is_complete(nvfd_t *fd)
888 888 {
889 889 if (fd->nvf_write_complete) {
890 890 (fd->nvf_write_complete)((nvf_handle_t)fd);
891 891 }
892 892 }
893 893
894 894 /*ARGSUSED*/
895 895 static void
896 896 nvpflush_timeout(void *arg)
897 897 {
898 898 clock_t nticks;
899 899
900 900 mutex_enter(&nvpflush_lock);
901 901 nticks = nvpticks - ddi_get_lbolt();
902 902 if (nticks > 4) {
903 903 nvpflush_timer_busy = 1;
904 904 mutex_exit(&nvpflush_lock);
905 905 nvpflush_id = timeout(nvpflush_timeout, NULL, nticks);
906 906 } else {
907 907 do_nvpflush = 1;
908 908 NVPDAEMON_DEBUG((CE_CONT, "signal nvpdaemon\n"));
909 909 cv_signal(&nvpflush_cv);
910 910 nvpflush_id = 0;
911 911 nvpflush_timer_busy = 0;
912 912 mutex_exit(&nvpflush_lock);
913 913 }
914 914 }
915 915
916 916 /*
917 917 * After marking a list as dirty, wake the nvpflush daemon
918 918 * to perform the update.
919 919 */
920 920 void
921 921 nvf_wake_daemon(void)
922 922 {
923 923 clock_t nticks;
924 924
925 925 /*
926 926 * If the system isn't up yet or is shutting down,
927 927 * don't even think about starting a flush.
928 928 */
929 929 if (!i_ddi_io_initialized() || sys_shutdown)
930 930 return;
931 931
932 932 mutex_enter(&nvpflush_lock);
933 933
934 934 if (nvpflush_daemon_active == 0) {
935 935 nvpflush_daemon_active = 1;
936 936 mutex_exit(&nvpflush_lock);
937 937 NVPDAEMON_DEBUG((CE_CONT, "starting nvpdaemon thread\n"));
938 938 nvpflush_thr_id = thread_create(NULL, 0,
939 939 (void (*)())nvpflush_daemon,
940 940 NULL, 0, &p0, TS_RUN, minclsyspri);
941 941 mutex_enter(&nvpflush_lock);
942 942 }
943 943
944 944 nticks = nvpflush_delay * TICKS_PER_SECOND;
945 945 nvpticks = ddi_get_lbolt() + nticks;
946 946 if (nvpflush_timer_busy == 0) {
947 947 nvpflush_timer_busy = 1;
948 948 mutex_exit(&nvpflush_lock);
949 949 nvpflush_id = timeout(nvpflush_timeout, NULL, nticks + 4);
950 950 } else
951 951 mutex_exit(&nvpflush_lock);
952 952 }
953 953
954 954 static int
955 955 nvpflush_one(nvfd_t *nvfd)
956 956 {
957 957 int rval = DDI_SUCCESS;
958 958 nvlist_t *nvl;
959 959
960 960 rw_enter(&nvfd->nvf_lock, RW_READER);
961 961
962 962 ASSERT((nvfd->nvf_flags & NVF_F_FLUSHING) == 0);
963 963
964 964 if (!NVF_IS_DIRTY(nvfd) ||
965 965 NVF_IS_READONLY(nvfd) || kfio_disable_write || sys_shutdown) {
966 966 NVF_CLEAR_DIRTY(nvfd);
967 967 rw_exit(&nvfd->nvf_lock);
968 968 return (DDI_SUCCESS);
969 969 }
970 970
971 971 if (rw_tryupgrade(&nvfd->nvf_lock) == 0) {
972 972 nvf_error("nvpflush: "
973 973 "%s rw upgrade failed\n", nvfd->nvf_cache_path);
974 974 rw_exit(&nvfd->nvf_lock);
975 975 return (DDI_FAILURE);
976 976 }
977 977 if (((nvfd->nvf_pack_list)
978 978 ((nvf_handle_t)nvfd, &nvl)) != DDI_SUCCESS) {
979 979 nvf_error("nvpflush: "
980 980 "%s nvlist construction failed\n", nvfd->nvf_cache_path);
981 981 ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock));
982 982 rw_exit(&nvfd->nvf_lock);
983 983 return (DDI_FAILURE);
984 984 }
985 985 ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock));
986 986
987 987 NVF_CLEAR_DIRTY(nvfd);
988 988 nvfd->nvf_flags |= NVF_F_FLUSHING;
989 989 rw_exit(&nvfd->nvf_lock);
990 990
991 991 rval = e_fwrite_nvlist(nvfd, nvl);
992 992 nvlist_free(nvl);
993 993
994 994 rw_enter(&nvfd->nvf_lock, RW_WRITER);
995 995 nvfd->nvf_flags &= ~NVF_F_FLUSHING;
996 996 if (rval == DDI_FAILURE) {
997 997 if (NVF_IS_READONLY(nvfd)) {
998 998 rval = DDI_SUCCESS;
999 999 nvfd->nvf_flags &= ~(NVF_F_ERROR | NVF_F_DIRTY);
1000 1000 } else if ((nvfd->nvf_flags & NVF_F_ERROR) == 0) {
1001 1001 cmn_err(CE_CONT,
1002 1002 "%s: update failed\n", nvfd->nvf_cache_path);
1003 1003 nvfd->nvf_flags |= NVF_F_ERROR | NVF_F_DIRTY;
1004 1004 }
1005 1005 } else {
1006 1006 if (nvfd->nvf_flags & NVF_F_CREATE_MSG) {
1007 1007 cmn_err(CE_CONT,
1008 1008 "!Creating %s\n", nvfd->nvf_cache_path);
1009 1009 nvfd->nvf_flags &= ~NVF_F_CREATE_MSG;
1010 1010 }
1011 1011 if (nvfd->nvf_flags & NVF_F_REBUILD_MSG) {
1012 1012 cmn_err(CE_CONT,
1013 1013 "!Rebuilding %s\n", nvfd->nvf_cache_path);
1014 1014 nvfd->nvf_flags &= ~NVF_F_REBUILD_MSG;
1015 1015 }
1016 1016 if (nvfd->nvf_flags & NVF_F_ERROR) {
1017 1017 cmn_err(CE_CONT,
1018 1018 "%s: update now ok\n", nvfd->nvf_cache_path);
1019 1019 nvfd->nvf_flags &= ~NVF_F_ERROR;
1020 1020 }
1021 1021 /*
1022 1022 * The file may need to be flushed again if the cached
1023 1023 * data was touched while writing the earlier contents.
1024 1024 */
1025 1025 if (NVF_IS_DIRTY(nvfd))
1026 1026 rval = DDI_FAILURE;
1027 1027 }
1028 1028
1029 1029 rw_exit(&nvfd->nvf_lock);
1030 1030 return (rval);
1031 1031 }
1032 1032
1033 1033
1034 1034 static void
1035 1035 nvpflush_daemon(void)
1036 1036 {
1037 1037 callb_cpr_t cprinfo;
1038 1038 nvfd_t *nvfdp, *nextfdp;
1039 1039 clock_t clk;
1040 1040 int rval;
1041 1041 int want_wakeup;
1042 1042 int is_now_clean;
1043 1043
1044 1044 ASSERT(modrootloaded);
1045 1045
1046 1046 nvpflush_thread = curthread;
1047 1047 NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: init\n"));
1048 1048
1049 1049 CALLB_CPR_INIT(&cprinfo, &nvpflush_lock, callb_generic_cpr, "nvp");
1050 1050 mutex_enter(&nvpflush_lock);
1051 1051 for (;;) {
1052 1052 CALLB_CPR_SAFE_BEGIN(&cprinfo);
1053 1053 while (do_nvpflush == 0) {
1054 1054 clk = cv_reltimedwait(&nvpflush_cv, &nvpflush_lock,
1055 1055 (nvpdaemon_idle_time * TICKS_PER_SECOND),
1056 1056 TR_CLOCK_TICK);
1057 1057 if ((clk == -1 && do_nvpflush == 0 &&
1058 1058 nvpflush_timer_busy == 0) || sys_shutdown) {
1059 1059 /*
1060 1060 * Note that CALLB_CPR_EXIT calls mutex_exit()
1061 1061 * on the lock passed in to CALLB_CPR_INIT,
1062 1062 * so the lock must be held when invoking it.
1063 1063 */
1064 1064 CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock);
1065 1065 NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: exit\n"));
1066 1066 ASSERT(mutex_owned(&nvpflush_lock));
1067 1067 nvpflush_thr_id = NULL;
1068 1068 nvpflush_daemon_active = 0;
1069 1069 CALLB_CPR_EXIT(&cprinfo);
1070 1070 thread_exit();
1071 1071 }
1072 1072 }
1073 1073 CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock);
1074 1074
1075 1075 nvpbusy = 1;
1076 1076 want_wakeup = 0;
1077 1077 do_nvpflush = 0;
1078 1078 mutex_exit(&nvpflush_lock);
1079 1079
1080 1080 /*
1081 1081 * Try flushing what's dirty, reschedule if there's
1082 1082 * a failure or data gets marked as dirty again.
1083 1083 * First move each file marked dirty to the dirty
1084 1084 * list to avoid locking the list across the write.
1085 1085 */
1086 1086 mutex_enter(&nvf_cache_mutex);
1087 1087 for (nvfdp = list_head(&nvf_cache_files);
1088 1088 nvfdp; nvfdp = nextfdp) {
1089 1089 nextfdp = list_next(&nvf_cache_files, nvfdp);
1090 1090 rw_enter(&nvfdp->nvf_lock, RW_READER);
1091 1091 if (NVF_IS_DIRTY(nvfdp)) {
1092 1092 list_remove(&nvf_cache_files, nvfdp);
1093 1093 list_insert_tail(&nvf_dirty_files, nvfdp);
1094 1094 rw_exit(&nvfdp->nvf_lock);
1095 1095 } else {
1096 1096 NVPDAEMON_DEBUG((CE_CONT,
1097 1097 "nvpdaemon: not dirty %s\n",
1098 1098 nvfdp->nvf_cache_path));
1099 1099 rw_exit(&nvfdp->nvf_lock);
1100 1100 }
1101 1101 }
1102 1102 mutex_exit(&nvf_cache_mutex);
1103 1103
1104 1104 /*
1105 1105 * Now go through the dirty list
1106 1106 */
1107 1107 for (nvfdp = list_head(&nvf_dirty_files);
1108 1108 nvfdp; nvfdp = nextfdp) {
1109 1109 nextfdp = list_next(&nvf_dirty_files, nvfdp);
1110 1110
1111 1111 is_now_clean = 0;
1112 1112 rw_enter(&nvfdp->nvf_lock, RW_READER);
1113 1113 if (NVF_IS_DIRTY(nvfdp)) {
1114 1114 NVPDAEMON_DEBUG((CE_CONT,
1115 1115 "nvpdaemon: flush %s\n",
1116 1116 nvfdp->nvf_cache_path));
1117 1117 rw_exit(&nvfdp->nvf_lock);
1118 1118 rval = nvpflush_one(nvfdp);
1119 1119 rw_enter(&nvfdp->nvf_lock, RW_READER);
1120 1120 if (rval != DDI_SUCCESS ||
1121 1121 NVF_IS_DIRTY(nvfdp)) {
1122 1122 rw_exit(&nvfdp->nvf_lock);
1123 1123 NVPDAEMON_DEBUG((CE_CONT,
1124 1124 "nvpdaemon: %s dirty again\n",
1125 1125 nvfdp->nvf_cache_path));
1126 1126 want_wakeup = 1;
1127 1127 } else {
1128 1128 rw_exit(&nvfdp->nvf_lock);
1129 1129 nvf_write_is_complete(nvfdp);
1130 1130 is_now_clean = 1;
1131 1131 }
1132 1132 } else {
1133 1133 NVPDAEMON_DEBUG((CE_CONT,
1134 1134 "nvpdaemon: not dirty %s\n",
1135 1135 nvfdp->nvf_cache_path));
1136 1136 rw_exit(&nvfdp->nvf_lock);
1137 1137 is_now_clean = 1;
1138 1138 }
1139 1139
1140 1140 if (is_now_clean) {
1141 1141 mutex_enter(&nvf_cache_mutex);
1142 1142 list_remove(&nvf_dirty_files, nvfdp);
1143 1143 list_insert_tail(&nvf_cache_files,
1144 1144 nvfdp);
1145 1145 mutex_exit(&nvf_cache_mutex);
1146 1146 }
1147 1147 }
1148 1148
1149 1149 if (want_wakeup)
1150 1150 nvf_wake_daemon();
1151 1151
1152 1152 mutex_enter(&nvpflush_lock);
1153 1153 nvpbusy = 0;
1154 1154 }
1155 1155 }
↓ open down ↓ |
966 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX