Print this page
XXXX introduce drv_sectohz
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/rpc/clnt_rdma.c
+++ new/usr/src/uts/common/rpc/clnt_rdma.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
26 26 /* All Rights Reserved */
27 27 /*
28 28 * Portions of this source code were derived from Berkeley
29 29 * 4.3 BSD under license from the Regents of the University of
30 30 * California.
31 31 */
32 32
33 33 #include <sys/param.h>
34 34 #include <sys/types.h>
35 35 #include <sys/user.h>
36 36 #include <sys/systm.h>
37 37 #include <sys/sysmacros.h>
38 38 #include <sys/errno.h>
39 39 #include <sys/kmem.h>
40 40 #include <sys/debug.h>
41 41 #include <sys/systm.h>
42 42 #include <sys/kstat.h>
43 43 #include <sys/t_lock.h>
44 44 #include <sys/ddi.h>
45 45 #include <sys/cmn_err.h>
46 46 #include <sys/time.h>
47 47 #include <sys/isa_defs.h>
48 48 #include <sys/zone.h>
49 49 #include <sys/sdt.h>
50 50
51 51 #include <rpc/types.h>
52 52 #include <rpc/xdr.h>
53 53 #include <rpc/auth.h>
54 54 #include <rpc/clnt.h>
55 55 #include <rpc/rpc_msg.h>
56 56 #include <rpc/rpc_rdma.h>
57 57 #include <nfs/nfs.h>
58 58 #include <nfs/nfs4_kprot.h>
59 59
60 60 static uint32_t rdma_bufs_rqst = RDMA_BUFS_RQST;
61 61
62 62 static int clnt_compose_rpcmsg(CLIENT *, rpcproc_t, rdma_buf_t *,
63 63 XDR *, xdrproc_t, caddr_t);
64 64 static int clnt_compose_rdma_header(CONN *, CLIENT *, rdma_buf_t *,
65 65 XDR **, uint_t *);
66 66 static int clnt_setup_rlist(CONN *, XDR *, XDR *);
67 67 static int clnt_setup_wlist(CONN *, XDR *, XDR *, rdma_buf_t *);
68 68 static int clnt_setup_long_reply(CONN *, struct clist **, uint_t);
69 69 static void clnt_check_credit(CONN *);
70 70 static void clnt_return_credit(CONN *);
71 71 static void clnt_decode_long_reply(CONN *, struct clist *,
72 72 struct clist *, XDR *, XDR **, struct clist *,
73 73 struct clist *, uint_t, uint_t);
74 74
75 75 static void clnt_update_credit(CONN *, uint32_t);
76 76
77 77 static enum clnt_stat clnt_rdma_kcallit(CLIENT *, rpcproc_t, xdrproc_t,
78 78 caddr_t, xdrproc_t, caddr_t, struct timeval);
79 79 static void clnt_rdma_kabort(CLIENT *);
80 80 static void clnt_rdma_kerror(CLIENT *, struct rpc_err *);
81 81 static bool_t clnt_rdma_kfreeres(CLIENT *, xdrproc_t, caddr_t);
82 82 static void clnt_rdma_kdestroy(CLIENT *);
83 83 static bool_t clnt_rdma_kcontrol(CLIENT *, int, char *);
84 84 static int clnt_rdma_ksettimers(CLIENT *, struct rpc_timers *,
85 85 struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t);
86 86
87 87 /*
88 88 * Operations vector for RDMA based RPC
89 89 */
90 90 static struct clnt_ops rdma_clnt_ops = {
91 91 clnt_rdma_kcallit, /* do rpc call */
92 92 clnt_rdma_kabort, /* abort call */
93 93 clnt_rdma_kerror, /* return error status */
94 94 clnt_rdma_kfreeres, /* free results */
95 95 clnt_rdma_kdestroy, /* destroy rpc handle */
96 96 clnt_rdma_kcontrol, /* the ioctl() of rpc */
97 97 clnt_rdma_ksettimers, /* set retry timers */
98 98 };
99 99
100 100 /*
101 101 * The size of the preserialized RPC header information.
102 102 */
103 103 #define CKU_HDRSIZE 20
104 104 #define CLNT_RDMA_SUCCESS 0
105 105 #define CLNT_RDMA_FAIL (-1)
106 106
107 107 #define AUTH_REFRESH_COUNT 2
108 108
109 109 #define IS_RPCSEC_GSS(authh) \
110 110 (authh->cl_auth->ah_cred.oa_flavor == RPCSEC_GSS)
111 111
112 112 /*
113 113 * Per RPC RDMA endpoint details
114 114 */
115 115 typedef struct cku_private {
116 116 CLIENT cku_client; /* client handle */
117 117 rdma_mod_t *cku_rd_mod; /* underlying RDMA mod */
118 118 void *cku_rd_handle; /* underlying RDMA device */
119 119 struct netbuf cku_srcaddr; /* source address for retries */
120 120 struct netbuf cku_addr; /* remote netbuf address */
121 121 int cku_addrfmly; /* for finding addr_type */
122 122 struct rpc_err cku_err; /* error status */
123 123 struct cred *cku_cred; /* credentials */
124 124 XDR cku_outxdr; /* xdr stream for output */
125 125 uint32_t cku_outsz;
126 126 XDR cku_inxdr; /* xdr stream for input */
127 127 char cku_rpchdr[CKU_HDRSIZE+4]; /* rpc header */
128 128 uint32_t cku_xid; /* current XID */
129 129 } cku_private_t;
130 130
131 131 #define CLNT_RDMA_DELAY 10 /* secs to delay after a connection failure */
132 132 static int clnt_rdma_min_delay = CLNT_RDMA_DELAY;
133 133
134 134 struct {
135 135 kstat_named_t rccalls;
136 136 kstat_named_t rcbadcalls;
137 137 kstat_named_t rcbadxids;
138 138 kstat_named_t rctimeouts;
139 139 kstat_named_t rcnewcreds;
140 140 kstat_named_t rcbadverfs;
141 141 kstat_named_t rctimers;
142 142 kstat_named_t rccantconn;
143 143 kstat_named_t rcnomem;
144 144 kstat_named_t rcintrs;
145 145 kstat_named_t rclongrpcs;
146 146 } rdmarcstat = {
147 147 { "calls", KSTAT_DATA_UINT64 },
148 148 { "badcalls", KSTAT_DATA_UINT64 },
149 149 { "badxids", KSTAT_DATA_UINT64 },
150 150 { "timeouts", KSTAT_DATA_UINT64 },
151 151 { "newcreds", KSTAT_DATA_UINT64 },
152 152 { "badverfs", KSTAT_DATA_UINT64 },
153 153 { "timers", KSTAT_DATA_UINT64 },
154 154 { "cantconn", KSTAT_DATA_UINT64 },
155 155 { "nomem", KSTAT_DATA_UINT64 },
156 156 { "interrupts", KSTAT_DATA_UINT64 },
157 157 { "longrpc", KSTAT_DATA_UINT64 }
158 158 };
159 159
160 160 kstat_named_t *rdmarcstat_ptr = (kstat_named_t *)&rdmarcstat;
161 161 uint_t rdmarcstat_ndata = sizeof (rdmarcstat) / sizeof (kstat_named_t);
162 162
163 163 #ifdef DEBUG
164 164 int rdma_clnt_debug = 0;
165 165 #endif
166 166
167 167 #ifdef accurate_stats
168 168 extern kmutex_t rdmarcstat_lock; /* mutex for rcstat updates */
169 169
170 170 #define RCSTAT_INCR(x) \
171 171 mutex_enter(&rdmarcstat_lock); \
172 172 rdmarcstat.x.value.ui64++; \
173 173 mutex_exit(&rdmarcstat_lock);
174 174 #else
175 175 #define RCSTAT_INCR(x) \
176 176 rdmarcstat.x.value.ui64++;
177 177 #endif
178 178
179 179 #define ptoh(p) (&((p)->cku_client))
180 180 #define htop(h) ((cku_private_t *)((h)->cl_private))
181 181
182 182 uint_t
183 183 calc_length(uint_t len)
184 184 {
185 185 len = RNDUP(len);
186 186
187 187 if (len <= 64 * 1024) {
188 188 if (len > 32 * 1024) {
189 189 len = 64 * 1024;
190 190 } else {
191 191 if (len > 16 * 1024) {
192 192 len = 32 * 1024;
193 193 } else {
194 194 if (len > 8 * 1024) {
195 195 len = 16 * 1024;
196 196 } else {
197 197 len = 8 * 1024;
198 198 }
199 199 }
200 200 }
201 201 }
202 202 return (len);
203 203 }
204 204 int
205 205 clnt_rdma_kcreate(char *proto, void *handle, struct netbuf *raddr, int family,
206 206 rpcprog_t pgm, rpcvers_t vers, struct cred *cred, CLIENT **cl)
207 207 {
208 208 CLIENT *h;
209 209 struct cku_private *p;
210 210 struct rpc_msg call_msg;
211 211 rdma_registry_t *rp;
212 212
213 213 ASSERT(INGLOBALZONE(curproc));
214 214
215 215 if (cl == NULL)
216 216 return (EINVAL);
217 217 *cl = NULL;
218 218
219 219 p = kmem_zalloc(sizeof (*p), KM_SLEEP);
220 220
221 221 /*
222 222 * Find underlying RDMATF plugin
223 223 */
224 224 rw_enter(&rdma_lock, RW_READER);
225 225 rp = rdma_mod_head;
226 226 while (rp != NULL) {
227 227 if (strcmp(rp->r_mod->rdma_api, proto))
228 228 rp = rp->r_next;
229 229 else {
230 230 p->cku_rd_mod = rp->r_mod;
231 231 p->cku_rd_handle = handle;
232 232 break;
233 233 }
234 234 }
235 235 rw_exit(&rdma_lock);
236 236
237 237 if (p->cku_rd_mod == NULL) {
238 238 /*
239 239 * Should not happen.
240 240 * No matching RDMATF plugin.
241 241 */
242 242 kmem_free(p, sizeof (struct cku_private));
243 243 return (EINVAL);
244 244 }
245 245
246 246 h = ptoh(p);
247 247 h->cl_ops = &rdma_clnt_ops;
248 248 h->cl_private = (caddr_t)p;
249 249 h->cl_auth = authkern_create();
250 250
251 251 /* call message, just used to pre-serialize below */
252 252 call_msg.rm_xid = 0;
253 253 call_msg.rm_direction = CALL;
254 254 call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION;
255 255 call_msg.rm_call.cb_prog = pgm;
256 256 call_msg.rm_call.cb_vers = vers;
257 257
258 258 xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, CKU_HDRSIZE, XDR_ENCODE);
259 259 /* pre-serialize call message header */
260 260 if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) {
261 261 XDR_DESTROY(&p->cku_outxdr);
262 262 auth_destroy(h->cl_auth);
263 263 kmem_free(p, sizeof (struct cku_private));
264 264 return (EINVAL);
265 265 }
266 266
267 267 /*
268 268 * Set up the rpc information
269 269 */
270 270 p->cku_cred = cred;
271 271 p->cku_srcaddr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
272 272 p->cku_srcaddr.maxlen = raddr->maxlen;
273 273 p->cku_srcaddr.len = 0;
274 274 p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
275 275 p->cku_addr.maxlen = raddr->maxlen;
276 276 p->cku_addr.len = raddr->len;
277 277 bcopy(raddr->buf, p->cku_addr.buf, raddr->len);
278 278 p->cku_addrfmly = family;
279 279
280 280 *cl = h;
281 281 return (0);
282 282 }
283 283
284 284 static void
285 285 clnt_rdma_kdestroy(CLIENT *h)
286 286 {
287 287 struct cku_private *p = htop(h);
288 288
289 289 kmem_free(p->cku_srcaddr.buf, p->cku_srcaddr.maxlen);
290 290 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
291 291 kmem_free(p, sizeof (*p));
292 292 }
293 293
294 294 void
295 295 clnt_rdma_kinit(CLIENT *h, char *proto, void *handle, struct netbuf *raddr,
296 296 struct cred *cred)
297 297 {
298 298 struct cku_private *p = htop(h);
299 299 rdma_registry_t *rp;
300 300
301 301 ASSERT(INGLOBALZONE(curproc));
302 302 /*
303 303 * Find underlying RDMATF plugin
304 304 */
305 305 p->cku_rd_mod = NULL;
306 306 rw_enter(&rdma_lock, RW_READER);
307 307 rp = rdma_mod_head;
308 308 while (rp != NULL) {
309 309 if (strcmp(rp->r_mod->rdma_api, proto))
310 310 rp = rp->r_next;
311 311 else {
312 312 p->cku_rd_mod = rp->r_mod;
313 313 p->cku_rd_handle = handle;
314 314 break;
315 315 }
316 316
317 317 }
318 318 rw_exit(&rdma_lock);
319 319
320 320 /*
321 321 * Set up the rpc information
322 322 */
323 323 p->cku_cred = cred;
324 324 p->cku_xid = 0;
325 325
326 326 if (p->cku_addr.maxlen < raddr->len) {
327 327 if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL)
328 328 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
329 329 p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
330 330 p->cku_addr.maxlen = raddr->maxlen;
331 331 }
332 332
333 333 p->cku_srcaddr.len = 0;
334 334
335 335 p->cku_addr.len = raddr->len;
336 336 bcopy(raddr->buf, p->cku_addr.buf, raddr->len);
337 337 h->cl_ops = &rdma_clnt_ops;
338 338 }
339 339
340 340 static int
341 341 clnt_compose_rpcmsg(CLIENT *h, rpcproc_t procnum,
342 342 rdma_buf_t *rpcmsg, XDR *xdrs,
343 343 xdrproc_t xdr_args, caddr_t argsp)
344 344 {
345 345 cku_private_t *p = htop(h);
346 346
347 347 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
348 348 /*
349 349 * Copy in the preserialized RPC header
350 350 * information.
351 351 */
352 352 bcopy(p->cku_rpchdr, rpcmsg->addr, CKU_HDRSIZE);
353 353
354 354 /*
355 355 * transaction id is the 1st thing in the output
356 356 * buffer.
357 357 */
358 358 /* LINTED pointer alignment */
359 359 (*(uint32_t *)(rpcmsg->addr)) = p->cku_xid;
360 360
361 361 /* Skip the preserialized stuff. */
362 362 XDR_SETPOS(xdrs, CKU_HDRSIZE);
363 363
364 364 /* Serialize dynamic stuff into the output buffer. */
365 365 if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) ||
366 366 (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) ||
367 367 (!(*xdr_args)(xdrs, argsp))) {
368 368 DTRACE_PROBE(krpc__e__clntrdma__rpcmsg__dynargs);
369 369 return (CLNT_RDMA_FAIL);
370 370 }
371 371 p->cku_outsz = XDR_GETPOS(xdrs);
372 372 } else {
373 373 uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[CKU_HDRSIZE];
374 374 IXDR_PUT_U_INT32(uproc, procnum);
375 375 (*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid;
376 376 XDR_SETPOS(xdrs, 0);
377 377
378 378 /* Serialize the procedure number and the arguments. */
379 379 if (!AUTH_WRAP(h->cl_auth, (caddr_t)p->cku_rpchdr,
380 380 CKU_HDRSIZE+4, xdrs, xdr_args, argsp)) {
381 381 if (rpcmsg->addr != xdrs->x_base) {
382 382 rpcmsg->addr = xdrs->x_base;
383 383 rpcmsg->len = xdr_getbufsize(xdrs);
384 384 }
385 385 DTRACE_PROBE(krpc__e__clntrdma__rpcmsg__procnum);
386 386 return (CLNT_RDMA_FAIL);
387 387 }
388 388 /*
389 389 * If we had to allocate a new buffer while encoding
390 390 * then update the addr and len.
391 391 */
392 392 if (rpcmsg->addr != xdrs->x_base) {
393 393 rpcmsg->addr = xdrs->x_base;
394 394 rpcmsg->len = xdr_getbufsize(xdrs);
395 395 }
396 396
397 397 p->cku_outsz = XDR_GETPOS(xdrs);
398 398 DTRACE_PROBE1(krpc__i__compose__size__sec, int, p->cku_outsz)
399 399 }
400 400
401 401 return (CLNT_RDMA_SUCCESS);
402 402 }
403 403
404 404 static int
405 405 clnt_compose_rdma_header(CONN *conn, CLIENT *h, rdma_buf_t *clmsg,
406 406 XDR **xdrs, uint_t *op)
407 407 {
408 408 cku_private_t *p = htop(h);
409 409 uint_t vers;
410 410 uint32_t rdma_credit = rdma_bufs_rqst;
411 411
412 412 vers = RPCRDMA_VERS;
413 413 clmsg->type = SEND_BUFFER;
414 414
415 415 if (rdma_buf_alloc(conn, clmsg)) {
416 416 return (CLNT_RDMA_FAIL);
417 417 }
418 418
419 419 *xdrs = &p->cku_outxdr;
420 420 xdrmem_create(*xdrs, clmsg->addr, clmsg->len, XDR_ENCODE);
421 421
422 422 (*(uint32_t *)clmsg->addr) = p->cku_xid;
423 423 XDR_SETPOS(*xdrs, sizeof (uint32_t));
424 424 (void) xdr_u_int(*xdrs, &vers);
425 425 (void) xdr_u_int(*xdrs, &rdma_credit);
426 426 (void) xdr_u_int(*xdrs, op);
427 427
428 428 return (CLNT_RDMA_SUCCESS);
429 429 }
430 430
431 431 /*
432 432 * If xp_cl is NULL value, then the RPC payload will NOT carry
433 433 * an RDMA READ chunk list, in this case we insert FALSE into
434 434 * the XDR stream. Otherwise we use the clist and RDMA register
435 435 * the memory and encode the clist into the outbound XDR stream.
436 436 */
437 437 static int
438 438 clnt_setup_rlist(CONN *conn, XDR *xdrs, XDR *call_xdrp)
439 439 {
440 440 int status;
441 441 struct clist *rclp;
442 442 int32_t xdr_flag = XDR_RDMA_RLIST_REG;
443 443
444 444 XDR_CONTROL(call_xdrp, XDR_RDMA_GET_RLIST, &rclp);
445 445
446 446 if (rclp != NULL) {
447 447 status = clist_register(conn, rclp, CLIST_REG_SOURCE);
448 448 if (status != RDMA_SUCCESS) {
449 449 return (CLNT_RDMA_FAIL);
450 450 }
451 451 XDR_CONTROL(call_xdrp, XDR_RDMA_SET_FLAGS, &xdr_flag);
452 452 }
453 453 (void) xdr_do_clist(xdrs, &rclp);
454 454
455 455 return (CLNT_RDMA_SUCCESS);
456 456 }
457 457
458 458 /*
459 459 * If xp_wcl is NULL value, then the RPC payload will NOT carry
460 460 * an RDMA WRITE chunk list, in this case we insert FALSE into
461 461 * the XDR stream. Otherwise we use the clist and RDMA register
462 462 * the memory and encode the clist into the outbound XDR stream.
463 463 */
464 464 static int
465 465 clnt_setup_wlist(CONN *conn, XDR *xdrs, XDR *call_xdrp, rdma_buf_t *rndbuf)
466 466 {
467 467 int status;
468 468 struct clist *wlist, *rndcl;
469 469 int wlen, rndlen;
470 470 int32_t xdr_flag = XDR_RDMA_WLIST_REG;
471 471
472 472 XDR_CONTROL(call_xdrp, XDR_RDMA_GET_WLIST, &wlist);
473 473
474 474 if (wlist != NULL) {
475 475 /*
476 476 * If we are sending a non 4-byte alligned length
477 477 * the server will roundup the length to 4-byte
478 478 * boundary. In such a case, a trailing chunk is
479 479 * added to take any spill over roundup bytes.
480 480 */
481 481 wlen = clist_len(wlist);
482 482 rndlen = (roundup(wlen, BYTES_PER_XDR_UNIT) - wlen);
483 483 if (rndlen) {
484 484 rndcl = clist_alloc();
485 485 /*
486 486 * calc_length() will allocate a PAGESIZE
487 487 * buffer below.
488 488 */
489 489 rndcl->c_len = calc_length(rndlen);
490 490 rndcl->rb_longbuf.type = RDMA_LONG_BUFFER;
491 491 rndcl->rb_longbuf.len = rndcl->c_len;
492 492 if (rdma_buf_alloc(conn, &rndcl->rb_longbuf)) {
493 493 clist_free(rndcl);
494 494 return (CLNT_RDMA_FAIL);
495 495 }
496 496
497 497 /* Roundup buffer freed back in caller */
498 498 *rndbuf = rndcl->rb_longbuf;
499 499
500 500 rndcl->u.c_daddr3 = rndcl->rb_longbuf.addr;
501 501 rndcl->c_next = NULL;
502 502 rndcl->c_dmemhandle = rndcl->rb_longbuf.handle;
503 503 wlist->c_next = rndcl;
504 504 }
505 505
506 506 status = clist_register(conn, wlist, CLIST_REG_DST);
507 507 if (status != RDMA_SUCCESS) {
508 508 rdma_buf_free(conn, rndbuf);
509 509 bzero(rndbuf, sizeof (rdma_buf_t));
510 510 return (CLNT_RDMA_FAIL);
511 511 }
512 512 XDR_CONTROL(call_xdrp, XDR_RDMA_SET_FLAGS, &xdr_flag);
513 513 }
514 514
515 515 if (!xdr_encode_wlist(xdrs, wlist)) {
516 516 if (rndlen) {
517 517 rdma_buf_free(conn, rndbuf);
518 518 bzero(rndbuf, sizeof (rdma_buf_t));
519 519 }
520 520 return (CLNT_RDMA_FAIL);
521 521 }
522 522
523 523 return (CLNT_RDMA_SUCCESS);
524 524 }
525 525
526 526 static int
527 527 clnt_setup_long_reply(CONN *conn, struct clist **clpp, uint_t length)
528 528 {
529 529 if (length == 0) {
530 530 *clpp = NULL;
531 531 return (CLNT_RDMA_SUCCESS);
532 532 }
533 533
534 534 *clpp = clist_alloc();
535 535
536 536 (*clpp)->rb_longbuf.len = calc_length(length);
537 537 (*clpp)->rb_longbuf.type = RDMA_LONG_BUFFER;
538 538
539 539 if (rdma_buf_alloc(conn, &((*clpp)->rb_longbuf))) {
540 540 clist_free(*clpp);
541 541 *clpp = NULL;
542 542 return (CLNT_RDMA_FAIL);
543 543 }
544 544
545 545 (*clpp)->u.c_daddr3 = (*clpp)->rb_longbuf.addr;
546 546 (*clpp)->c_len = (*clpp)->rb_longbuf.len;
547 547 (*clpp)->c_next = NULL;
548 548 (*clpp)->c_dmemhandle = (*clpp)->rb_longbuf.handle;
549 549
550 550 if (clist_register(conn, *clpp, CLIST_REG_DST)) {
551 551 DTRACE_PROBE(krpc__e__clntrdma__longrep_regbuf);
552 552 rdma_buf_free(conn, &((*clpp)->rb_longbuf));
553 553 clist_free(*clpp);
554 554 *clpp = NULL;
555 555 return (CLNT_RDMA_FAIL);
556 556 }
557 557
558 558 return (CLNT_RDMA_SUCCESS);
559 559 }
560 560
561 561 /* ARGSUSED */
562 562 static enum clnt_stat
563 563 clnt_rdma_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args,
564 564 caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp,
565 565 struct timeval wait)
566 566 {
567 567 cku_private_t *p = htop(h);
568 568
569 569 int try_call_again;
570 570 int refresh_attempt = AUTH_REFRESH_COUNT;
571 571 int status;
572 572 int msglen;
573 573
574 574 XDR *call_xdrp, callxdr; /* for xdrrdma encoding the RPC call */
575 575 XDR *reply_xdrp, replyxdr; /* for xdrrdma decoding the RPC reply */
576 576 XDR *rdmahdr_o_xdrs, *rdmahdr_i_xdrs;
577 577
578 578 struct rpc_msg reply_msg;
579 579 rdma_registry_t *m;
580 580
581 581 struct clist *cl_sendlist;
582 582 struct clist *cl_recvlist;
583 583 struct clist *cl;
584 584 struct clist *cl_rpcmsg;
585 585 struct clist *cl_rdma_reply;
586 586 struct clist *cl_rpcreply_wlist;
587 587 struct clist *cl_long_reply;
588 588 rdma_buf_t rndup;
589 589
590 590 uint_t vers;
591 591 uint_t op;
592 592 uint_t off;
593 593 uint32_t seg_array_len;
594 594 uint_t long_reply_len;
595 595 uint_t rpcsec_gss;
596 596 uint_t gss_i_or_p;
597 597
598 598 CONN *conn = NULL;
599 599 rdma_buf_t clmsg;
600 600 rdma_buf_t rpcmsg;
601 601 rdma_chunkinfo_lengths_t rcil;
602 602
603 603 clock_t ticks;
604 604 bool_t wlist_exists_reply;
605 605
606 606 uint32_t rdma_credit = rdma_bufs_rqst;
607 607
608 608 RCSTAT_INCR(rccalls);
609 609
610 610 call_again:
611 611
612 612 bzero(&clmsg, sizeof (clmsg));
613 613 bzero(&rpcmsg, sizeof (rpcmsg));
614 614 bzero(&rndup, sizeof (rndup));
615 615 try_call_again = 0;
616 616 cl_sendlist = NULL;
617 617 cl_recvlist = NULL;
618 618 cl = NULL;
619 619 cl_rpcmsg = NULL;
620 620 cl_rdma_reply = NULL;
621 621 call_xdrp = NULL;
622 622 reply_xdrp = NULL;
623 623 wlist_exists_reply = FALSE;
624 624 cl_rpcreply_wlist = NULL;
625 625 cl_long_reply = NULL;
626 626 rcil.rcil_len = 0;
627 627 rcil.rcil_len_alt = 0;
628 628 long_reply_len = 0;
629 629
↓ open down ↓ |
629 lines elided |
↑ open up ↑ |
630 630 rw_enter(&rdma_lock, RW_READER);
631 631 m = (rdma_registry_t *)p->cku_rd_handle;
632 632 if (m->r_mod_state == RDMA_MOD_INACTIVE) {
633 633 /*
634 634 * If we didn't find a matching RDMA module in the registry
635 635 * then there is no transport.
636 636 */
637 637 rw_exit(&rdma_lock);
638 638 p->cku_err.re_status = RPC_CANTSEND;
639 639 p->cku_err.re_errno = EIO;
640 - ticks = clnt_rdma_min_delay * drv_usectohz(1000000);
640 + ticks = drv_sectohz(clnt_rdma_min_delay);
641 641 if (h->cl_nosignal == TRUE) {
642 642 delay(ticks);
643 643 } else {
644 644 if (delay_sig(ticks) == EINTR) {
645 645 p->cku_err.re_status = RPC_INTR;
646 646 p->cku_err.re_errno = EINTR;
647 647 }
648 648 }
649 649 return (RPC_CANTSEND);
650 650 }
651 651 /*
652 652 * Get unique xid
653 653 */
654 654 if (p->cku_xid == 0)
655 655 p->cku_xid = alloc_xid();
656 656
657 657 status = RDMA_GET_CONN(p->cku_rd_mod->rdma_ops, &p->cku_srcaddr,
658 658 &p->cku_addr, p->cku_addrfmly, p->cku_rd_handle, &conn);
659 659 rw_exit(&rdma_lock);
660 660
661 661 /*
662 662 * If there is a problem with the connection reflect the issue
663 663 * back to the higher level to address, we MAY delay for a short
664 664 * period so that we are kind to the transport.
665 665 */
666 666 if (conn == NULL) {
667 667 /*
668 668 * Connect failed to server. Could be because of one
669 669 * of several things. In some cases we don't want
670 670 * the caller to retry immediately - delay before
671 671 * returning to caller.
672 672 */
673 673 switch (status) {
674 674 case RDMA_TIMEDOUT:
675 675 /*
676 676 * Already timed out. No need to delay
677 677 * some more.
678 678 */
679 679 p->cku_err.re_status = RPC_TIMEDOUT;
680 680 p->cku_err.re_errno = ETIMEDOUT;
681 681 break;
682 682 case RDMA_INTR:
683 683 /*
684 684 * Failed because of an signal. Very likely
685 685 * the caller will not retry.
↓ open down ↓ |
35 lines elided |
↑ open up ↑ |
686 686 */
687 687 p->cku_err.re_status = RPC_INTR;
688 688 p->cku_err.re_errno = EINTR;
689 689 break;
690 690 default:
691 691 /*
692 692 * All other failures - server down or service
693 693 * down or temporary resource failure. Delay before
694 694 * returning to caller.
695 695 */
696 - ticks = clnt_rdma_min_delay * drv_usectohz(1000000);
696 + ticks = drv_sectohz(clnt_rdma_min_delay);
697 697 p->cku_err.re_status = RPC_CANTCONNECT;
698 698 p->cku_err.re_errno = EIO;
699 699
700 700 if (h->cl_nosignal == TRUE) {
701 701 delay(ticks);
702 702 } else {
703 703 if (delay_sig(ticks) == EINTR) {
704 704 p->cku_err.re_status = RPC_INTR;
705 705 p->cku_err.re_errno = EINTR;
706 706 }
707 707 }
708 708 break;
709 709 }
710 710
711 711 return (p->cku_err.re_status);
712 712 }
713 713
714 714 if (p->cku_srcaddr.maxlen < conn->c_laddr.len) {
715 715 if ((p->cku_srcaddr.maxlen != 0) &&
716 716 (p->cku_srcaddr.buf != NULL))
717 717 kmem_free(p->cku_srcaddr.buf, p->cku_srcaddr.maxlen);
718 718 p->cku_srcaddr.buf = kmem_zalloc(conn->c_laddr.maxlen,
719 719 KM_SLEEP);
720 720 p->cku_srcaddr.maxlen = conn->c_laddr.maxlen;
721 721 }
722 722
723 723 p->cku_srcaddr.len = conn->c_laddr.len;
724 724 bcopy(conn->c_laddr.buf, p->cku_srcaddr.buf, conn->c_laddr.len);
725 725
726 726 clnt_check_credit(conn);
727 727
728 728 status = CLNT_RDMA_FAIL;
729 729
730 730 rpcsec_gss = gss_i_or_p = FALSE;
731 731
732 732 if (IS_RPCSEC_GSS(h)) {
733 733 rpcsec_gss = TRUE;
734 734 if (rpc_gss_get_service_type(h->cl_auth) ==
735 735 rpc_gss_svc_integrity ||
736 736 rpc_gss_get_service_type(h->cl_auth) ==
737 737 rpc_gss_svc_privacy)
738 738 gss_i_or_p = TRUE;
739 739 }
740 740
741 741 /*
742 742 * Try a regular RDMA message if RPCSEC_GSS is not being used
743 743 * or if RPCSEC_GSS is being used for authentication only.
744 744 */
745 745 if (rpcsec_gss == FALSE ||
746 746 (rpcsec_gss == TRUE && gss_i_or_p == FALSE)) {
747 747 /*
748 748 * Grab a send buffer for the request. Try to
749 749 * encode it to see if it fits. If not, then it
750 750 * needs to be sent in a chunk.
751 751 */
752 752 rpcmsg.type = SEND_BUFFER;
753 753 if (rdma_buf_alloc(conn, &rpcmsg)) {
754 754 DTRACE_PROBE(krpc__e__clntrdma__callit_nobufs);
755 755 goto done;
756 756 }
757 757
758 758 /* First try to encode into regular send buffer */
759 759 op = RDMA_MSG;
760 760
761 761 call_xdrp = &callxdr;
762 762
763 763 xdrrdma_create(call_xdrp, rpcmsg.addr, rpcmsg.len,
764 764 rdma_minchunk, NULL, XDR_ENCODE, conn);
765 765
766 766 status = clnt_compose_rpcmsg(h, procnum, &rpcmsg, call_xdrp,
767 767 xdr_args, argsp);
768 768
769 769 if (status != CLNT_RDMA_SUCCESS) {
770 770 /* Clean up from previous encode attempt */
771 771 rdma_buf_free(conn, &rpcmsg);
772 772 XDR_DESTROY(call_xdrp);
773 773 } else {
774 774 XDR_CONTROL(call_xdrp, XDR_RDMA_GET_CHUNK_LEN, &rcil);
775 775 }
776 776 }
777 777
778 778 /* If the encode didn't work, then try a NOMSG */
779 779 if (status != CLNT_RDMA_SUCCESS) {
780 780
781 781 msglen = CKU_HDRSIZE + BYTES_PER_XDR_UNIT + MAX_AUTH_BYTES +
782 782 xdr_sizeof(xdr_args, argsp);
783 783
784 784 msglen = calc_length(msglen);
785 785
786 786 /* pick up the lengths for the reply buffer needed */
787 787 (void) xdrrdma_sizeof(xdr_args, argsp, 0,
788 788 &rcil.rcil_len, &rcil.rcil_len_alt);
789 789
790 790 /*
791 791 * Construct a clist to describe the CHUNK_BUFFER
792 792 * for the rpcmsg.
793 793 */
794 794 cl_rpcmsg = clist_alloc();
795 795 cl_rpcmsg->c_len = msglen;
796 796 cl_rpcmsg->rb_longbuf.type = RDMA_LONG_BUFFER;
797 797 cl_rpcmsg->rb_longbuf.len = msglen;
798 798 if (rdma_buf_alloc(conn, &cl_rpcmsg->rb_longbuf)) {
799 799 clist_free(cl_rpcmsg);
800 800 goto done;
801 801 }
802 802 cl_rpcmsg->w.c_saddr3 = cl_rpcmsg->rb_longbuf.addr;
803 803
804 804 op = RDMA_NOMSG;
805 805 call_xdrp = &callxdr;
806 806
807 807 xdrrdma_create(call_xdrp, cl_rpcmsg->rb_longbuf.addr,
808 808 cl_rpcmsg->rb_longbuf.len, 0,
809 809 cl_rpcmsg, XDR_ENCODE, conn);
810 810
811 811 status = clnt_compose_rpcmsg(h, procnum, &cl_rpcmsg->rb_longbuf,
812 812 call_xdrp, xdr_args, argsp);
813 813
814 814 DTRACE_PROBE2(krpc__i__clntrdma__callit__longbuf, int, status,
815 815 int, msglen);
816 816 if (status != CLNT_RDMA_SUCCESS) {
817 817 p->cku_err.re_status = RPC_CANTENCODEARGS;
818 818 p->cku_err.re_errno = EIO;
819 819 DTRACE_PROBE(krpc__e__clntrdma__callit__composemsg);
820 820 goto done;
821 821 }
822 822 }
823 823
824 824 /*
825 825 * During the XDR_ENCODE we may have "allocated" an RDMA READ or
826 826 * RDMA WRITE clist.
827 827 *
828 828 * First pull the RDMA READ chunk list from the XDR private
829 829 * area to keep it handy.
830 830 */
831 831 XDR_CONTROL(call_xdrp, XDR_RDMA_GET_RLIST, &cl);
832 832
833 833 if (gss_i_or_p) {
834 834 long_reply_len = rcil.rcil_len + rcil.rcil_len_alt;
835 835 long_reply_len += MAX_AUTH_BYTES;
836 836 } else {
837 837 long_reply_len = rcil.rcil_len;
838 838 }
839 839
840 840 /*
841 841 * Update the chunk size information for the Long RPC msg.
842 842 */
843 843 if (cl && op == RDMA_NOMSG)
844 844 cl->c_len = p->cku_outsz;
845 845
846 846 /*
847 847 * Prepare the RDMA header. On success xdrs will hold the result
848 848 * of xdrmem_create() for a SEND_BUFFER.
849 849 */
850 850 status = clnt_compose_rdma_header(conn, h, &clmsg,
851 851 &rdmahdr_o_xdrs, &op);
852 852
853 853 if (status != CLNT_RDMA_SUCCESS) {
854 854 p->cku_err.re_status = RPC_CANTSEND;
855 855 p->cku_err.re_errno = EIO;
856 856 RCSTAT_INCR(rcnomem);
857 857 DTRACE_PROBE(krpc__e__clntrdma__callit__nobufs2);
858 858 goto done;
859 859 }
860 860
861 861 /*
862 862 * Now insert the RDMA READ list iff present
863 863 */
864 864 status = clnt_setup_rlist(conn, rdmahdr_o_xdrs, call_xdrp);
865 865 if (status != CLNT_RDMA_SUCCESS) {
866 866 DTRACE_PROBE(krpc__e__clntrdma__callit__clistreg);
867 867 rdma_buf_free(conn, &clmsg);
868 868 p->cku_err.re_status = RPC_CANTSEND;
869 869 p->cku_err.re_errno = EIO;
870 870 goto done;
871 871 }
872 872
873 873 /*
874 874 * Setup RDMA WRITE chunk list for nfs read operation
875 875 * other operations will have a NULL which will result
876 876 * as a NULL list in the XDR stream.
877 877 */
878 878 status = clnt_setup_wlist(conn, rdmahdr_o_xdrs, call_xdrp, &rndup);
879 879 if (status != CLNT_RDMA_SUCCESS) {
880 880 rdma_buf_free(conn, &clmsg);
881 881 p->cku_err.re_status = RPC_CANTSEND;
882 882 p->cku_err.re_errno = EIO;
883 883 goto done;
884 884 }
885 885
886 886 /*
887 887 * If NULL call and RPCSEC_GSS, provide a chunk such that
888 888 * large responses can flow back to the client.
889 889 * If RPCSEC_GSS with integrity or privacy is in use, get chunk.
890 890 */
891 891 if ((procnum == 0 && rpcsec_gss == TRUE) ||
892 892 (rpcsec_gss == TRUE && gss_i_or_p == TRUE))
893 893 long_reply_len += 1024;
894 894
895 895 status = clnt_setup_long_reply(conn, &cl_long_reply, long_reply_len);
896 896
897 897 DTRACE_PROBE2(krpc__i__clntrdma__callit__longreply, int, status,
898 898 int, long_reply_len);
899 899
900 900 if (status != CLNT_RDMA_SUCCESS) {
901 901 rdma_buf_free(conn, &clmsg);
902 902 p->cku_err.re_status = RPC_CANTSEND;
903 903 p->cku_err.re_errno = EIO;
904 904 goto done;
905 905 }
906 906
907 907 /*
908 908 * XDR encode the RDMA_REPLY write chunk
909 909 */
910 910 seg_array_len = (cl_long_reply ? 1 : 0);
911 911 (void) xdr_encode_reply_wchunk(rdmahdr_o_xdrs, cl_long_reply,
912 912 seg_array_len);
913 913
914 914 /*
915 915 * Construct a clist in "sendlist" that represents what we
916 916 * will push over the wire.
917 917 *
918 918 * Start with the RDMA header and clist (if any)
919 919 */
920 920 clist_add(&cl_sendlist, 0, XDR_GETPOS(rdmahdr_o_xdrs), &clmsg.handle,
921 921 clmsg.addr, NULL, NULL);
922 922
923 923 /*
924 924 * Put the RPC call message in sendlist if small RPC
925 925 */
926 926 if (op == RDMA_MSG) {
927 927 clist_add(&cl_sendlist, 0, p->cku_outsz, &rpcmsg.handle,
928 928 rpcmsg.addr, NULL, NULL);
929 929 } else {
930 930 /* Long RPC already in chunk list */
931 931 RCSTAT_INCR(rclongrpcs);
932 932 }
933 933
934 934 /*
935 935 * Set up a reply buffer ready for the reply
936 936 */
937 937 status = rdma_clnt_postrecv(conn, p->cku_xid);
938 938 if (status != RDMA_SUCCESS) {
939 939 rdma_buf_free(conn, &clmsg);
940 940 p->cku_err.re_status = RPC_CANTSEND;
941 941 p->cku_err.re_errno = EIO;
942 942 goto done;
943 943 }
944 944
945 945 /*
946 946 * sync the memory for dma
947 947 */
948 948 if (cl != NULL) {
949 949 status = clist_syncmem(conn, cl, CLIST_REG_SOURCE);
950 950 if (status != RDMA_SUCCESS) {
951 951 (void) rdma_clnt_postrecv_remove(conn, p->cku_xid);
952 952 rdma_buf_free(conn, &clmsg);
953 953 p->cku_err.re_status = RPC_CANTSEND;
954 954 p->cku_err.re_errno = EIO;
955 955 goto done;
956 956 }
957 957 }
958 958
959 959 /*
960 960 * Send the RDMA Header and RPC call message to the server
961 961 */
962 962 status = RDMA_SEND(conn, cl_sendlist, p->cku_xid);
963 963 if (status != RDMA_SUCCESS) {
964 964 (void) rdma_clnt_postrecv_remove(conn, p->cku_xid);
965 965 p->cku_err.re_status = RPC_CANTSEND;
966 966 p->cku_err.re_errno = EIO;
967 967 goto done;
968 968 }
969 969
970 970 /*
971 971 * RDMA plugin now owns the send msg buffers.
972 972 * Clear them out and don't free them.
973 973 */
974 974 clmsg.addr = NULL;
975 975 if (rpcmsg.type == SEND_BUFFER)
976 976 rpcmsg.addr = NULL;
977 977
978 978 /*
979 979 * Recv rpc reply
980 980 */
981 981 status = RDMA_RECV(conn, &cl_recvlist, p->cku_xid);
982 982
983 983 /*
984 984 * Now check recv status
985 985 */
986 986 if (status != 0) {
987 987 if (status == RDMA_INTR) {
988 988 p->cku_err.re_status = RPC_INTR;
989 989 p->cku_err.re_errno = EINTR;
990 990 RCSTAT_INCR(rcintrs);
991 991 } else if (status == RPC_TIMEDOUT) {
992 992 p->cku_err.re_status = RPC_TIMEDOUT;
993 993 p->cku_err.re_errno = ETIMEDOUT;
994 994 RCSTAT_INCR(rctimeouts);
995 995 } else {
996 996 p->cku_err.re_status = RPC_CANTRECV;
997 997 p->cku_err.re_errno = EIO;
998 998 }
999 999 goto done;
1000 1000 }
1001 1001
1002 1002 /*
1003 1003 * Process the reply message.
1004 1004 *
1005 1005 * First the chunk list (if any)
1006 1006 */
1007 1007 rdmahdr_i_xdrs = &(p->cku_inxdr);
1008 1008 xdrmem_create(rdmahdr_i_xdrs,
1009 1009 (caddr_t)(uintptr_t)cl_recvlist->w.c_saddr3,
1010 1010 cl_recvlist->c_len, XDR_DECODE);
1011 1011
1012 1012 /*
1013 1013 * Treat xid as opaque (xid is the first entity
1014 1014 * in the rpc rdma message).
1015 1015 * Skip xid and set the xdr position accordingly.
1016 1016 */
1017 1017 XDR_SETPOS(rdmahdr_i_xdrs, sizeof (uint32_t));
1018 1018 (void) xdr_u_int(rdmahdr_i_xdrs, &vers);
1019 1019 (void) xdr_u_int(rdmahdr_i_xdrs, &rdma_credit);
1020 1020 (void) xdr_u_int(rdmahdr_i_xdrs, &op);
1021 1021 (void) xdr_do_clist(rdmahdr_i_xdrs, &cl);
1022 1022
1023 1023 clnt_update_credit(conn, rdma_credit);
1024 1024
1025 1025 wlist_exists_reply = FALSE;
1026 1026 if (! xdr_decode_wlist(rdmahdr_i_xdrs, &cl_rpcreply_wlist,
1027 1027 &wlist_exists_reply)) {
1028 1028 DTRACE_PROBE(krpc__e__clntrdma__callit__wlist_decode);
1029 1029 p->cku_err.re_status = RPC_CANTDECODERES;
1030 1030 p->cku_err.re_errno = EIO;
1031 1031 goto done;
1032 1032 }
1033 1033
1034 1034 /*
1035 1035 * The server shouldn't have sent a RDMA_SEND that
1036 1036 * the client needs to RDMA_WRITE a reply back to
1037 1037 * the server. So silently ignoring what the
1038 1038 * server returns in the rdma_reply section of the
1039 1039 * header.
1040 1040 */
1041 1041 (void) xdr_decode_reply_wchunk(rdmahdr_i_xdrs, &cl_rdma_reply);
1042 1042 off = xdr_getpos(rdmahdr_i_xdrs);
1043 1043
1044 1044 clnt_decode_long_reply(conn, cl_long_reply,
1045 1045 cl_rdma_reply, &replyxdr, &reply_xdrp,
1046 1046 cl, cl_recvlist, op, off);
1047 1047
1048 1048 if (reply_xdrp == NULL)
1049 1049 goto done;
1050 1050
1051 1051 if (wlist_exists_reply) {
1052 1052 XDR_CONTROL(reply_xdrp, XDR_RDMA_SET_WLIST, cl_rpcreply_wlist);
1053 1053 }
1054 1054
1055 1055 reply_msg.rm_direction = REPLY;
1056 1056 reply_msg.rm_reply.rp_stat = MSG_ACCEPTED;
1057 1057 reply_msg.acpted_rply.ar_stat = SUCCESS;
1058 1058 reply_msg.acpted_rply.ar_verf = _null_auth;
1059 1059
1060 1060 /*
1061 1061 * xdr_results will be done in AUTH_UNWRAP.
1062 1062 */
1063 1063 reply_msg.acpted_rply.ar_results.where = NULL;
1064 1064 reply_msg.acpted_rply.ar_results.proc = xdr_void;
1065 1065
1066 1066 /*
1067 1067 * Decode and validate the response.
1068 1068 */
1069 1069 if (xdr_replymsg(reply_xdrp, &reply_msg)) {
1070 1070 enum clnt_stat re_status;
1071 1071
1072 1072 _seterr_reply(&reply_msg, &(p->cku_err));
1073 1073
1074 1074 re_status = p->cku_err.re_status;
1075 1075 if (re_status == RPC_SUCCESS) {
1076 1076 /*
1077 1077 * Reply is good, check auth.
1078 1078 */
1079 1079 if (!AUTH_VALIDATE(h->cl_auth,
1080 1080 &reply_msg.acpted_rply.ar_verf)) {
1081 1081 p->cku_err.re_status = RPC_AUTHERROR;
1082 1082 p->cku_err.re_why = AUTH_INVALIDRESP;
1083 1083 RCSTAT_INCR(rcbadverfs);
1084 1084 DTRACE_PROBE(
1085 1085 krpc__e__clntrdma__callit__authvalidate);
1086 1086 } else if (!AUTH_UNWRAP(h->cl_auth, reply_xdrp,
1087 1087 xdr_results, resultsp)) {
1088 1088 p->cku_err.re_status = RPC_CANTDECODERES;
1089 1089 p->cku_err.re_errno = EIO;
1090 1090 DTRACE_PROBE(
1091 1091 krpc__e__clntrdma__callit__authunwrap);
1092 1092 }
1093 1093 } else {
1094 1094 /* set errno in case we can't recover */
1095 1095 if (re_status != RPC_VERSMISMATCH &&
1096 1096 re_status != RPC_AUTHERROR &&
1097 1097 re_status != RPC_PROGVERSMISMATCH)
1098 1098 p->cku_err.re_errno = EIO;
1099 1099
1100 1100 if (re_status == RPC_AUTHERROR) {
1101 1101 if ((refresh_attempt > 0) &&
1102 1102 AUTH_REFRESH(h->cl_auth, &reply_msg,
1103 1103 p->cku_cred)) {
1104 1104 refresh_attempt--;
1105 1105 try_call_again = 1;
1106 1106 goto done;
1107 1107 }
1108 1108
1109 1109 try_call_again = 0;
1110 1110
1111 1111 /*
1112 1112 * We have used the client handle to
1113 1113 * do an AUTH_REFRESH and the RPC status may
1114 1114 * be set to RPC_SUCCESS; Let's make sure to
1115 1115 * set it to RPC_AUTHERROR.
1116 1116 */
1117 1117 p->cku_err.re_status = RPC_AUTHERROR;
1118 1118
1119 1119 /*
1120 1120 * Map recoverable and unrecoverable
1121 1121 * authentication errors to appropriate
1122 1122 * errno
1123 1123 */
1124 1124 switch (p->cku_err.re_why) {
1125 1125 case AUTH_BADCRED:
1126 1126 case AUTH_BADVERF:
1127 1127 case AUTH_INVALIDRESP:
1128 1128 case AUTH_TOOWEAK:
1129 1129 case AUTH_FAILED:
1130 1130 case RPCSEC_GSS_NOCRED:
1131 1131 case RPCSEC_GSS_FAILED:
1132 1132 p->cku_err.re_errno = EACCES;
1133 1133 break;
1134 1134 case AUTH_REJECTEDCRED:
1135 1135 case AUTH_REJECTEDVERF:
1136 1136 default:
1137 1137 p->cku_err.re_errno = EIO;
1138 1138 break;
1139 1139 }
1140 1140 }
1141 1141 DTRACE_PROBE1(krpc__e__clntrdma__callit__rpcfailed,
1142 1142 int, p->cku_err.re_why);
1143 1143 }
1144 1144 } else {
1145 1145 p->cku_err.re_status = RPC_CANTDECODERES;
1146 1146 p->cku_err.re_errno = EIO;
1147 1147 DTRACE_PROBE(krpc__e__clntrdma__callit__replymsg);
1148 1148 }
1149 1149
1150 1150 done:
1151 1151 clnt_return_credit(conn);
1152 1152
1153 1153 if (cl_sendlist != NULL)
1154 1154 clist_free(cl_sendlist);
1155 1155
1156 1156 /*
1157 1157 * If rpc reply is in a chunk, free it now.
1158 1158 */
1159 1159 if (cl_long_reply) {
1160 1160 (void) clist_deregister(conn, cl_long_reply);
1161 1161 rdma_buf_free(conn, &cl_long_reply->rb_longbuf);
1162 1162 clist_free(cl_long_reply);
1163 1163 }
1164 1164
1165 1165 if (call_xdrp)
1166 1166 XDR_DESTROY(call_xdrp);
1167 1167
1168 1168 if (rndup.rb_private) {
1169 1169 rdma_buf_free(conn, &rndup);
1170 1170 }
1171 1171
1172 1172 if (reply_xdrp) {
1173 1173 (void) xdr_rpc_free_verifier(reply_xdrp, &reply_msg);
1174 1174 XDR_DESTROY(reply_xdrp);
1175 1175 }
1176 1176
1177 1177 if (cl_rdma_reply) {
1178 1178 clist_free(cl_rdma_reply);
1179 1179 }
1180 1180
1181 1181 if (cl_recvlist) {
1182 1182 rdma_buf_t recvmsg = {0};
1183 1183 recvmsg.addr = (caddr_t)(uintptr_t)cl_recvlist->w.c_saddr3;
1184 1184 recvmsg.type = RECV_BUFFER;
1185 1185 RDMA_BUF_FREE(conn, &recvmsg);
1186 1186 clist_free(cl_recvlist);
1187 1187 }
1188 1188
1189 1189 RDMA_REL_CONN(conn);
1190 1190
1191 1191 if (try_call_again)
1192 1192 goto call_again;
1193 1193
1194 1194 if (p->cku_err.re_status != RPC_SUCCESS) {
1195 1195 RCSTAT_INCR(rcbadcalls);
1196 1196 }
1197 1197 return (p->cku_err.re_status);
1198 1198 }
1199 1199
1200 1200
1201 1201 static void
1202 1202 clnt_decode_long_reply(CONN *conn,
1203 1203 struct clist *cl_long_reply,
1204 1204 struct clist *cl_rdma_reply, XDR *xdrs,
1205 1205 XDR **rxdrp, struct clist *cl,
1206 1206 struct clist *cl_recvlist,
1207 1207 uint_t op, uint_t off)
1208 1208 {
1209 1209 if (op != RDMA_NOMSG) {
1210 1210 DTRACE_PROBE1(krpc__i__longrepl__rdmamsg__len,
1211 1211 int, cl_recvlist->c_len - off);
1212 1212 xdrrdma_create(xdrs,
1213 1213 (caddr_t)(uintptr_t)(cl_recvlist->w.c_saddr3 + off),
1214 1214 cl_recvlist->c_len - off, 0, cl, XDR_DECODE, conn);
1215 1215 *rxdrp = xdrs;
1216 1216 return;
1217 1217 }
1218 1218
1219 1219 /* op must be RDMA_NOMSG */
1220 1220 if (cl) {
1221 1221 DTRACE_PROBE(krpc__e__clntrdma__declongreply__serverreadlist);
1222 1222 return;
1223 1223 }
1224 1224
1225 1225 if (cl_long_reply->u.c_daddr) {
1226 1226 DTRACE_PROBE1(krpc__i__longrepl__rdmanomsg__len,
1227 1227 int, cl_rdma_reply->c_len);
1228 1228
1229 1229 xdrrdma_create(xdrs, (caddr_t)cl_long_reply->u.c_daddr3,
1230 1230 cl_rdma_reply->c_len, 0, NULL, XDR_DECODE, conn);
1231 1231
1232 1232 *rxdrp = xdrs;
1233 1233 }
1234 1234 }
1235 1235
1236 1236 static void
1237 1237 clnt_return_credit(CONN *conn)
1238 1238 {
1239 1239 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1240 1240
1241 1241 mutex_enter(&conn->c_lock);
1242 1242 cc_info->clnt_cc_in_flight_ops--;
1243 1243 cv_signal(&cc_info->clnt_cc_cv);
1244 1244 mutex_exit(&conn->c_lock);
1245 1245 }
1246 1246
1247 1247 static void
1248 1248 clnt_update_credit(CONN *conn, uint32_t rdma_credit)
1249 1249 {
1250 1250 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1251 1251
1252 1252 /*
1253 1253 * If the granted has not altered, avoid taking the
1254 1254 * mutex, to essentially do nothing..
1255 1255 */
1256 1256 if (cc_info->clnt_cc_granted_ops == rdma_credit)
1257 1257 return;
1258 1258 /*
1259 1259 * Get the granted number of buffers for credit control.
1260 1260 */
1261 1261 mutex_enter(&conn->c_lock);
1262 1262 cc_info->clnt_cc_granted_ops = rdma_credit;
1263 1263 mutex_exit(&conn->c_lock);
1264 1264 }
1265 1265
1266 1266 static void
1267 1267 clnt_check_credit(CONN *conn)
1268 1268 {
1269 1269 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1270 1270
1271 1271 /*
1272 1272 * Make sure we are not going over our allowed buffer use
1273 1273 * (and make sure we have gotten a granted value before).
1274 1274 */
1275 1275 mutex_enter(&conn->c_lock);
1276 1276 while (cc_info->clnt_cc_in_flight_ops >= cc_info->clnt_cc_granted_ops &&
1277 1277 cc_info->clnt_cc_granted_ops != 0) {
1278 1278 /*
1279 1279 * Client has maxed out its granted buffers due to
1280 1280 * credit control. Current handling is to block and wait.
1281 1281 */
1282 1282 cv_wait(&cc_info->clnt_cc_cv, &conn->c_lock);
1283 1283 }
1284 1284 cc_info->clnt_cc_in_flight_ops++;
1285 1285 mutex_exit(&conn->c_lock);
1286 1286 }
1287 1287
1288 1288 /* ARGSUSED */
1289 1289 static void
1290 1290 clnt_rdma_kabort(CLIENT *h)
1291 1291 {
1292 1292 }
1293 1293
1294 1294 static void
1295 1295 clnt_rdma_kerror(CLIENT *h, struct rpc_err *err)
1296 1296 {
1297 1297 struct cku_private *p = htop(h);
1298 1298 *err = p->cku_err;
1299 1299 }
1300 1300
1301 1301 static bool_t
1302 1302 clnt_rdma_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr)
1303 1303 {
1304 1304 struct cku_private *p = htop(h);
1305 1305 XDR *xdrs;
1306 1306
1307 1307 xdrs = &(p->cku_outxdr);
1308 1308 xdrs->x_op = XDR_FREE;
1309 1309 return ((*xdr_res)(xdrs, res_ptr));
1310 1310 }
1311 1311
1312 1312 /* ARGSUSED */
1313 1313 static bool_t
1314 1314 clnt_rdma_kcontrol(CLIENT *h, int cmd, char *arg)
1315 1315 {
1316 1316 return (TRUE);
1317 1317 }
1318 1318
1319 1319 /* ARGSUSED */
1320 1320 static int
1321 1321 clnt_rdma_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all,
1322 1322 int minimum, void(*feedback)(int, int, caddr_t), caddr_t arg,
1323 1323 uint32_t xid)
1324 1324 {
1325 1325 RCSTAT_INCR(rctimers);
1326 1326 return (0);
1327 1327 }
1328 1328
1329 1329 int
1330 1330 rdma_reachable(int addr_type, struct netbuf *addr, struct knetconfig **knconf)
1331 1331 {
1332 1332 rdma_registry_t *rp;
1333 1333 void *handle = NULL;
1334 1334 struct knetconfig *knc;
1335 1335 char *pf, *p;
1336 1336 rdma_stat status;
1337 1337 int error = 0;
1338 1338
1339 1339 if (!INGLOBALZONE(curproc))
1340 1340 return (-1);
1341 1341
1342 1342 /*
1343 1343 * modload the RDMA plugins if not already done.
1344 1344 */
1345 1345 if (!rdma_modloaded) {
1346 1346 mutex_enter(&rdma_modload_lock);
1347 1347 if (!rdma_modloaded) {
1348 1348 error = rdma_modload();
1349 1349 }
1350 1350 mutex_exit(&rdma_modload_lock);
1351 1351 if (error)
1352 1352 return (-1);
1353 1353 }
1354 1354
1355 1355 if (!rdma_dev_available)
1356 1356 return (-1);
1357 1357
1358 1358 rw_enter(&rdma_lock, RW_READER);
1359 1359 rp = rdma_mod_head;
1360 1360 while (rp != NULL) {
1361 1361 if (rp->r_mod_state == RDMA_MOD_INACTIVE) {
1362 1362 rp = rp->r_next;
1363 1363 continue;
1364 1364 }
1365 1365 status = RDMA_REACHABLE(rp->r_mod->rdma_ops, addr_type, addr,
1366 1366 &handle);
1367 1367 if (status == RDMA_SUCCESS) {
1368 1368 knc = kmem_zalloc(sizeof (struct knetconfig),
1369 1369 KM_SLEEP);
1370 1370 knc->knc_semantics = NC_TPI_RDMA;
1371 1371 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
1372 1372 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
1373 1373 if (addr_type == AF_INET)
1374 1374 (void) strncpy(pf, NC_INET, KNC_STRSIZE);
1375 1375 else if (addr_type == AF_INET6)
1376 1376 (void) strncpy(pf, NC_INET6, KNC_STRSIZE);
1377 1377 pf[KNC_STRSIZE - 1] = '\0';
1378 1378
1379 1379 (void) strncpy(p, rp->r_mod->rdma_api, KNC_STRSIZE);
1380 1380 p[KNC_STRSIZE - 1] = '\0';
1381 1381
1382 1382 knc->knc_protofmly = pf;
1383 1383 knc->knc_proto = p;
1384 1384 knc->knc_rdev = (dev_t)rp;
1385 1385 *knconf = knc;
1386 1386 rw_exit(&rdma_lock);
1387 1387 return (0);
1388 1388 }
1389 1389 rp = rp->r_next;
1390 1390 }
1391 1391 rw_exit(&rdma_lock);
1392 1392 return (-1);
1393 1393 }
↓ open down ↓ |
687 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX