1/* 2 * GPL HEADER START 3 * 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 only, 8 * as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License version 2 for more details (a copy is included 14 * in the LICENSE file that accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License 17 * version 2 along with this program; If not, see 18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf 19 * 20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 21 * CA 95054 USA or visit www.sun.com if you need additional information or 22 * have any questions. 23 * 24 * GPL HEADER END 25 */ 26/* 27 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 28 * Use is subject to license terms. 29 * 30 * Copyright (c) 2011, 2012, Intel Corporation. 31 */ 32/* 33 * This file is part of Lustre, http://www.lustre.org/ 34 * Lustre is a trademark of Sun Microsystems, Inc. 35 */ 36 37#define DEBUG_SUBSYSTEM S_LNET 38#include "../../include/linux/lnet/lib-lnet.h" 39#include <linux/log2.h> 40 41#define D_LNI D_CONSOLE 42 43lnet_t the_lnet; /* THE state of the network */ 44EXPORT_SYMBOL(the_lnet); 45 46 47static char *ip2nets = ""; 48module_param(ip2nets, charp, 0444); 49MODULE_PARM_DESC(ip2nets, "LNET network <- IP table"); 50 51static char *networks = ""; 52module_param(networks, charp, 0444); 53MODULE_PARM_DESC(networks, "local networks"); 54 55static char *routes = ""; 56module_param(routes, charp, 0444); 57MODULE_PARM_DESC(routes, "routes to non-local networks"); 58 59static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT; 60module_param(rnet_htable_size, int, 0444); 61MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table"); 62 63static char * 64lnet_get_routes(void) 65{ 66 return routes; 67} 68 69static char * 70lnet_get_networks(void) 71{ 72 char *nets; 73 int rc; 74 75 if (*networks != 0 && *ip2nets != 0) { 76 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or 'ip2nets' but not both at once\n"); 77 return NULL; 78 } 79 80 if (*ip2nets != 0) { 81 rc = lnet_parse_ip2nets(&nets, ip2nets); 82 return (rc == 0) ? nets : NULL; 83 } 84 85 if (*networks != 0) 86 return networks; 87 88 return "tcp"; 89} 90 91static void 92lnet_init_locks(void) 93{ 94 spin_lock_init(&the_lnet.ln_eq_wait_lock); 95 init_waitqueue_head(&the_lnet.ln_eq_waitq); 96 mutex_init(&the_lnet.ln_lnd_mutex); 97 mutex_init(&the_lnet.ln_api_mutex); 98} 99 100static void 101lnet_fini_locks(void) 102{ 103} 104 105 106static int 107lnet_create_remote_nets_table(void) 108{ 109 int i; 110 struct list_head *hash; 111 112 LASSERT(the_lnet.ln_remote_nets_hash == NULL); 113 LASSERT(the_lnet.ln_remote_nets_hbits > 0); 114 LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash)); 115 if (hash == NULL) { 116 CERROR("Failed to create remote nets hash table\n"); 117 return -ENOMEM; 118 } 119 120 for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) 121 INIT_LIST_HEAD(&hash[i]); 122 the_lnet.ln_remote_nets_hash = hash; 123 return 0; 124} 125 126static void 127lnet_destroy_remote_nets_table(void) 128{ 129 int i; 130 131 if (the_lnet.ln_remote_nets_hash == NULL) 132 return; 133 134 for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) 135 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i])); 136 137 LIBCFS_FREE(the_lnet.ln_remote_nets_hash, 138 LNET_REMOTE_NETS_HASH_SIZE * 139 sizeof(the_lnet.ln_remote_nets_hash[0])); 140 the_lnet.ln_remote_nets_hash = NULL; 141} 142 143static void 144lnet_destroy_locks(void) 145{ 146 if (the_lnet.ln_res_lock != NULL) { 147 cfs_percpt_lock_free(the_lnet.ln_res_lock); 148 the_lnet.ln_res_lock = NULL; 149 } 150 151 if (the_lnet.ln_net_lock != NULL) { 152 cfs_percpt_lock_free(the_lnet.ln_net_lock); 153 the_lnet.ln_net_lock = NULL; 154 } 155 156 lnet_fini_locks(); 157} 158 159static int 160lnet_create_locks(void) 161{ 162 lnet_init_locks(); 163 164 the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table()); 165 if (the_lnet.ln_res_lock == NULL) 166 goto failed; 167 168 the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table()); 169 if (the_lnet.ln_net_lock == NULL) 170 goto failed; 171 172 return 0; 173 174 failed: 175 lnet_destroy_locks(); 176 return -ENOMEM; 177} 178 179static void lnet_assert_wire_constants(void) 180{ 181 /* Wire protocol assertions generated by 'wirecheck' 182 * running on Linux robert.bartonsoftware.com 2.6.8-1.521 183 * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux 184 * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */ 185 186 /* Constants... */ 187 CLASSERT(LNET_PROTO_TCP_MAGIC == 0xeebc0ded); 188 CLASSERT(LNET_PROTO_TCP_VERSION_MAJOR == 1); 189 CLASSERT(LNET_PROTO_TCP_VERSION_MINOR == 0); 190 CLASSERT(LNET_MSG_ACK == 0); 191 CLASSERT(LNET_MSG_PUT == 1); 192 CLASSERT(LNET_MSG_GET == 2); 193 CLASSERT(LNET_MSG_REPLY == 3); 194 CLASSERT(LNET_MSG_HELLO == 4); 195 196 /* Checks for struct ptl_handle_wire_t */ 197 CLASSERT((int)sizeof(lnet_handle_wire_t) == 16); 198 CLASSERT((int)offsetof(lnet_handle_wire_t, wh_interface_cookie) == 0); 199 CLASSERT((int)sizeof(((lnet_handle_wire_t *)0)->wh_interface_cookie) == 8); 200 CLASSERT((int)offsetof(lnet_handle_wire_t, wh_object_cookie) == 8); 201 CLASSERT((int)sizeof(((lnet_handle_wire_t *)0)->wh_object_cookie) == 8); 202 203 /* Checks for struct lnet_magicversion_t */ 204 CLASSERT((int)sizeof(lnet_magicversion_t) == 8); 205 CLASSERT((int)offsetof(lnet_magicversion_t, magic) == 0); 206 CLASSERT((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4); 207 CLASSERT((int)offsetof(lnet_magicversion_t, version_major) == 4); 208 CLASSERT((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2); 209 CLASSERT((int)offsetof(lnet_magicversion_t, version_minor) == 6); 210 CLASSERT((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2); 211 212 /* Checks for struct lnet_hdr_t */ 213 CLASSERT((int)sizeof(lnet_hdr_t) == 72); 214 CLASSERT((int)offsetof(lnet_hdr_t, dest_nid) == 0); 215 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8); 216 CLASSERT((int)offsetof(lnet_hdr_t, src_nid) == 8); 217 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8); 218 CLASSERT((int)offsetof(lnet_hdr_t, dest_pid) == 16); 219 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4); 220 CLASSERT((int)offsetof(lnet_hdr_t, src_pid) == 20); 221 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4); 222 CLASSERT((int)offsetof(lnet_hdr_t, type) == 24); 223 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->type) == 4); 224 CLASSERT((int)offsetof(lnet_hdr_t, payload_length) == 28); 225 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4); 226 CLASSERT((int)offsetof(lnet_hdr_t, msg) == 32); 227 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg) == 40); 228 229 /* Ack */ 230 CLASSERT((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32); 231 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16); 232 CLASSERT((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48); 233 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8); 234 CLASSERT((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56); 235 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4); 236 237 /* Put */ 238 CLASSERT((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32); 239 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16); 240 CLASSERT((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48); 241 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8); 242 CLASSERT((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56); 243 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8); 244 CLASSERT((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64); 245 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4); 246 CLASSERT((int)offsetof(lnet_hdr_t, msg.put.offset) == 68); 247 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4); 248 249 /* Get */ 250 CLASSERT((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32); 251 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16); 252 CLASSERT((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48); 253 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8); 254 CLASSERT((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56); 255 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4); 256 CLASSERT((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60); 257 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4); 258 CLASSERT((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64); 259 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4); 260 261 /* Reply */ 262 CLASSERT((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32); 263 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16); 264 265 /* Hello */ 266 CLASSERT((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32); 267 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8); 268 CLASSERT((int)offsetof(lnet_hdr_t, msg.hello.type) == 40); 269 CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4); 270} 271 272static lnd_t * 273lnet_find_lnd_by_type(int type) 274{ 275 lnd_t *lnd; 276 struct list_head *tmp; 277 278 /* holding lnd mutex */ 279 list_for_each (tmp, &the_lnet.ln_lnds) { 280 lnd = list_entry(tmp, lnd_t, lnd_list); 281 282 if ((int)lnd->lnd_type == type) 283 return lnd; 284 } 285 286 return NULL; 287} 288 289void 290lnet_register_lnd(lnd_t *lnd) 291{ 292 LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex); 293 294 LASSERT(the_lnet.ln_init); 295 LASSERT(libcfs_isknown_lnd(lnd->lnd_type)); 296 LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL); 297 298 list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds); 299 lnd->lnd_refcount = 0; 300 301 CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type)); 302 303 LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex); 304} 305EXPORT_SYMBOL(lnet_register_lnd); 306 307void 308lnet_unregister_lnd(lnd_t *lnd) 309{ 310 LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex); 311 312 LASSERT(the_lnet.ln_init); 313 LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd); 314 LASSERT(lnd->lnd_refcount == 0); 315 316 list_del(&lnd->lnd_list); 317 CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type)); 318 319 LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex); 320} 321EXPORT_SYMBOL(lnet_unregister_lnd); 322 323void 324lnet_counters_get(lnet_counters_t *counters) 325{ 326 lnet_counters_t *ctr; 327 int i; 328 329 memset(counters, 0, sizeof(*counters)); 330 331 lnet_net_lock(LNET_LOCK_EX); 332 333 cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) { 334 counters->msgs_max += ctr->msgs_max; 335 counters->msgs_alloc += ctr->msgs_alloc; 336 counters->errors += ctr->errors; 337 counters->send_count += ctr->send_count; 338 counters->recv_count += ctr->recv_count; 339 counters->route_count += ctr->route_count; 340 counters->drop_count += ctr->drop_count; 341 counters->send_length += ctr->send_length; 342 counters->recv_length += ctr->recv_length; 343 counters->route_length += ctr->route_length; 344 counters->drop_length += ctr->drop_length; 345 346 } 347 lnet_net_unlock(LNET_LOCK_EX); 348} 349EXPORT_SYMBOL(lnet_counters_get); 350 351void 352lnet_counters_reset(void) 353{ 354 lnet_counters_t *counters; 355 int i; 356 357 lnet_net_lock(LNET_LOCK_EX); 358 359 cfs_percpt_for_each(counters, i, the_lnet.ln_counters) 360 memset(counters, 0, sizeof(lnet_counters_t)); 361 362 lnet_net_unlock(LNET_LOCK_EX); 363} 364EXPORT_SYMBOL(lnet_counters_reset); 365 366#ifdef LNET_USE_LIB_FREELIST 367 368int 369lnet_freelist_init(lnet_freelist_t *fl, int n, int size) 370{ 371 char *space; 372 373 LASSERT(n > 0); 374 375 size += offsetof(lnet_freeobj_t, fo_contents); 376 377 LIBCFS_ALLOC(space, n * size); 378 if (space == NULL) 379 return -ENOMEM; 380 381 INIT_LIST_HEAD(&fl->fl_list); 382 fl->fl_objs = space; 383 fl->fl_nobjs = n; 384 fl->fl_objsize = size; 385 386 do { 387 memset(space, 0, size); 388 list_add((struct list_head *)space, &fl->fl_list); 389 space += size; 390 } while (--n != 0); 391 392 return 0; 393} 394 395void 396lnet_freelist_fini(lnet_freelist_t *fl) 397{ 398 struct list_head *el; 399 int count; 400 401 if (fl->fl_nobjs == 0) 402 return; 403 404 count = 0; 405 for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next) 406 count++; 407 408 LASSERT(count == fl->fl_nobjs); 409 410 LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize); 411 memset(fl, 0, sizeof(*fl)); 412} 413 414#endif /* LNET_USE_LIB_FREELIST */ 415 416static __u64 417lnet_create_interface_cookie(void) 418{ 419 /* NB the interface cookie in wire handles guards against delayed 420 * replies and ACKs appearing valid after reboot. Initialisation time, 421 * even if it's only implemented to millisecond resolution is probably 422 * easily good enough. */ 423 struct timeval tv; 424 __u64 cookie; 425 426 do_gettimeofday(&tv); 427 cookie = tv.tv_sec; 428 cookie *= 1000000; 429 cookie += tv.tv_usec; 430 return cookie; 431} 432 433static char * 434lnet_res_type2str(int type) 435{ 436 switch (type) { 437 default: 438 LBUG(); 439 case LNET_COOKIE_TYPE_MD: 440 return "MD"; 441 case LNET_COOKIE_TYPE_ME: 442 return "ME"; 443 case LNET_COOKIE_TYPE_EQ: 444 return "EQ"; 445 } 446} 447 448static void 449lnet_res_container_cleanup(struct lnet_res_container *rec) 450{ 451 int count = 0; 452 453 if (rec->rec_type == 0) /* not set yet, it's uninitialized */ 454 return; 455 456 while (!list_empty(&rec->rec_active)) { 457 struct list_head *e = rec->rec_active.next; 458 459 list_del_init(e); 460 if (rec->rec_type == LNET_COOKIE_TYPE_EQ) { 461 lnet_eq_free(list_entry(e, lnet_eq_t, eq_list)); 462 463 } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) { 464 lnet_md_free(list_entry(e, lnet_libmd_t, md_list)); 465 466 } else { /* NB: Active MEs should be attached on portals */ 467 LBUG(); 468 } 469 count++; 470 } 471 472 if (count > 0) { 473 /* Found alive MD/ME/EQ, user really should unlink/free 474 * all of them before finalize LNet, but if someone didn't, 475 * we have to recycle garbage for him */ 476 CERROR("%d active elements on exit of %s container\n", 477 count, lnet_res_type2str(rec->rec_type)); 478 } 479 480#ifdef LNET_USE_LIB_FREELIST 481 lnet_freelist_fini(&rec->rec_freelist); 482#endif 483 if (rec->rec_lh_hash != NULL) { 484 LIBCFS_FREE(rec->rec_lh_hash, 485 LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0])); 486 rec->rec_lh_hash = NULL; 487 } 488 489 rec->rec_type = 0; /* mark it as finalized */ 490} 491 492static int 493lnet_res_container_setup(struct lnet_res_container *rec, 494 int cpt, int type, int objnum, int objsz) 495{ 496 int rc = 0; 497 int i; 498 499 LASSERT(rec->rec_type == 0); 500 501 rec->rec_type = type; 502 INIT_LIST_HEAD(&rec->rec_active); 503 504#ifdef LNET_USE_LIB_FREELIST 505 memset(&rec->rec_freelist, 0, sizeof(rec->rec_freelist)); 506 rc = lnet_freelist_init(&rec->rec_freelist, objnum, objsz); 507 if (rc != 0) 508 goto out; 509#endif 510 rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type; 511 512 /* Arbitrary choice of hash table size */ 513 LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt, 514 LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0])); 515 if (rec->rec_lh_hash == NULL) { 516 rc = -ENOMEM; 517 goto out; 518 } 519 520 for (i = 0; i < LNET_LH_HASH_SIZE; i++) 521 INIT_LIST_HEAD(&rec->rec_lh_hash[i]); 522 523 return 0; 524 525out: 526 CERROR("Failed to setup %s resource container\n", 527 lnet_res_type2str(type)); 528 lnet_res_container_cleanup(rec); 529 return rc; 530} 531 532static void 533lnet_res_containers_destroy(struct lnet_res_container **recs) 534{ 535 struct lnet_res_container *rec; 536 int i; 537 538 cfs_percpt_for_each(rec, i, recs) 539 lnet_res_container_cleanup(rec); 540 541 cfs_percpt_free(recs); 542} 543 544static struct lnet_res_container ** 545lnet_res_containers_create(int type, int objnum, int objsz) 546{ 547 struct lnet_res_container **recs; 548 struct lnet_res_container *rec; 549 int rc; 550 int i; 551 552 recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec)); 553 if (recs == NULL) { 554 CERROR("Failed to allocate %s resource containers\n", 555 lnet_res_type2str(type)); 556 return NULL; 557 } 558 559 cfs_percpt_for_each(rec, i, recs) { 560 rc = lnet_res_container_setup(rec, i, type, objnum, objsz); 561 if (rc != 0) { 562 lnet_res_containers_destroy(recs); 563 return NULL; 564 } 565 } 566 567 return recs; 568} 569 570lnet_libhandle_t * 571lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie) 572{ 573 /* ALWAYS called with lnet_res_lock held */ 574 struct list_head *head; 575 lnet_libhandle_t *lh; 576 unsigned int hash; 577 578 if ((cookie & LNET_COOKIE_MASK) != rec->rec_type) 579 return NULL; 580 581 hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS); 582 head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK]; 583 584 list_for_each_entry(lh, head, lh_hash_chain) { 585 if (lh->lh_cookie == cookie) 586 return lh; 587 } 588 589 return NULL; 590} 591 592void 593lnet_res_lh_initialize(struct lnet_res_container *rec, lnet_libhandle_t *lh) 594{ 595 /* ALWAYS called with lnet_res_lock held */ 596 unsigned int ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS; 597 unsigned int hash; 598 599 lh->lh_cookie = rec->rec_lh_cookie; 600 rec->rec_lh_cookie += 1 << ibits; 601 602 hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK; 603 604 list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]); 605} 606 607 608int lnet_unprepare(void); 609 610static int 611lnet_prepare(lnet_pid_t requested_pid) 612{ 613 /* Prepare to bring up the network */ 614 struct lnet_res_container **recs; 615 int rc = 0; 616 617 LASSERT(the_lnet.ln_refcount == 0); 618 619 the_lnet.ln_routing = 0; 620 621 LASSERT((requested_pid & LNET_PID_USERFLAG) == 0); 622 the_lnet.ln_pid = requested_pid; 623 624 INIT_LIST_HEAD(&the_lnet.ln_test_peers); 625 INIT_LIST_HEAD(&the_lnet.ln_nis); 626 INIT_LIST_HEAD(&the_lnet.ln_nis_cpt); 627 INIT_LIST_HEAD(&the_lnet.ln_nis_zombie); 628 INIT_LIST_HEAD(&the_lnet.ln_routers); 629 630 rc = lnet_create_remote_nets_table(); 631 if (rc != 0) 632 goto failed; 633 634 the_lnet.ln_interface_cookie = lnet_create_interface_cookie(); 635 636 the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(), 637 sizeof(lnet_counters_t)); 638 if (the_lnet.ln_counters == NULL) { 639 CERROR("Failed to allocate counters for LNet\n"); 640 rc = -ENOMEM; 641 goto failed; 642 } 643 644 rc = lnet_peer_tables_create(); 645 if (rc != 0) 646 goto failed; 647 648 rc = lnet_msg_containers_create(); 649 if (rc != 0) 650 goto failed; 651 652 rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0, 653 LNET_COOKIE_TYPE_EQ, LNET_FL_MAX_EQS, 654 sizeof(lnet_eq_t)); 655 if (rc != 0) 656 goto failed; 657 658 recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME, LNET_FL_MAX_MES, 659 sizeof(lnet_me_t)); 660 if (recs == NULL) 661 goto failed; 662 663 the_lnet.ln_me_containers = recs; 664 665 recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD, LNET_FL_MAX_MDS, 666 sizeof(lnet_libmd_t)); 667 if (recs == NULL) 668 goto failed; 669 670 the_lnet.ln_md_containers = recs; 671 672 rc = lnet_portals_create(); 673 if (rc != 0) { 674 CERROR("Failed to create portals for LNet: %d\n", rc); 675 goto failed; 676 } 677 678 return 0; 679 680 failed: 681 lnet_unprepare(); 682 return rc; 683} 684 685int 686lnet_unprepare(void) 687{ 688 /* NB no LNET_LOCK since this is the last reference. All LND instances 689 * have shut down already, so it is safe to unlink and free all 690 * descriptors, even those that appear committed to a network op (eg MD 691 * with non-zero pending count) */ 692 693 lnet_fail_nid(LNET_NID_ANY, 0); 694 695 LASSERT(the_lnet.ln_refcount == 0); 696 LASSERT(list_empty(&the_lnet.ln_test_peers)); 697 LASSERT(list_empty(&the_lnet.ln_nis)); 698 LASSERT(list_empty(&the_lnet.ln_nis_cpt)); 699 LASSERT(list_empty(&the_lnet.ln_nis_zombie)); 700 701 lnet_portals_destroy(); 702 703 if (the_lnet.ln_md_containers != NULL) { 704 lnet_res_containers_destroy(the_lnet.ln_md_containers); 705 the_lnet.ln_md_containers = NULL; 706 } 707 708 if (the_lnet.ln_me_containers != NULL) { 709 lnet_res_containers_destroy(the_lnet.ln_me_containers); 710 the_lnet.ln_me_containers = NULL; 711 } 712 713 lnet_res_container_cleanup(&the_lnet.ln_eq_container); 714 715 lnet_msg_containers_destroy(); 716 lnet_peer_tables_destroy(); 717 lnet_rtrpools_free(); 718 719 if (the_lnet.ln_counters != NULL) { 720 cfs_percpt_free(the_lnet.ln_counters); 721 the_lnet.ln_counters = NULL; 722 } 723 lnet_destroy_remote_nets_table(); 724 725 return 0; 726} 727 728lnet_ni_t * 729lnet_net2ni_locked(__u32 net, int cpt) 730{ 731 struct list_head *tmp; 732 lnet_ni_t *ni; 733 734 LASSERT(cpt != LNET_LOCK_EX); 735 736 list_for_each(tmp, &the_lnet.ln_nis) { 737 ni = list_entry(tmp, lnet_ni_t, ni_list); 738 739 if (LNET_NIDNET(ni->ni_nid) == net) { 740 lnet_ni_addref_locked(ni, cpt); 741 return ni; 742 } 743 } 744 745 return NULL; 746} 747 748lnet_ni_t * 749lnet_net2ni(__u32 net) 750{ 751 lnet_ni_t *ni; 752 753 lnet_net_lock(0); 754 ni = lnet_net2ni_locked(net, 0); 755 lnet_net_unlock(0); 756 757 return ni; 758} 759EXPORT_SYMBOL(lnet_net2ni); 760 761static unsigned int 762lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number) 763{ 764 __u64 key = nid; 765 unsigned int val; 766 767 LASSERT(number >= 1 && number <= LNET_CPT_NUMBER); 768 769 if (number == 1) 770 return 0; 771 772 val = hash_long(key, LNET_CPT_BITS); 773 /* NB: LNET_CP_NUMBER doesn't have to be PO2 */ 774 if (val < number) 775 return val; 776 777 return (unsigned int)(key + val + (val >> 1)) % number; 778} 779 780int 781lnet_cpt_of_nid_locked(lnet_nid_t nid) 782{ 783 struct lnet_ni *ni; 784 785 /* must called with hold of lnet_net_lock */ 786 if (LNET_CPT_NUMBER == 1) 787 return 0; /* the only one */ 788 789 /* take lnet_net_lock(any) would be OK */ 790 if (!list_empty(&the_lnet.ln_nis_cpt)) { 791 list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) { 792 if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) 793 continue; 794 795 LASSERT(ni->ni_cpts != NULL); 796 return ni->ni_cpts[lnet_nid_cpt_hash 797 (nid, ni->ni_ncpts)]; 798 } 799 } 800 801 return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER); 802} 803 804int 805lnet_cpt_of_nid(lnet_nid_t nid) 806{ 807 int cpt; 808 int cpt2; 809 810 if (LNET_CPT_NUMBER == 1) 811 return 0; /* the only one */ 812 813 if (list_empty(&the_lnet.ln_nis_cpt)) 814 return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER); 815 816 cpt = lnet_net_lock_current(); 817 cpt2 = lnet_cpt_of_nid_locked(nid); 818 lnet_net_unlock(cpt); 819 820 return cpt2; 821} 822EXPORT_SYMBOL(lnet_cpt_of_nid); 823 824int 825lnet_islocalnet(__u32 net) 826{ 827 struct lnet_ni *ni; 828 int cpt; 829 830 cpt = lnet_net_lock_current(); 831 832 ni = lnet_net2ni_locked(net, cpt); 833 if (ni != NULL) 834 lnet_ni_decref_locked(ni, cpt); 835 836 lnet_net_unlock(cpt); 837 838 return ni != NULL; 839} 840 841lnet_ni_t * 842lnet_nid2ni_locked(lnet_nid_t nid, int cpt) 843{ 844 struct lnet_ni *ni; 845 struct list_head *tmp; 846 847 LASSERT(cpt != LNET_LOCK_EX); 848 849 list_for_each(tmp, &the_lnet.ln_nis) { 850 ni = list_entry(tmp, lnet_ni_t, ni_list); 851 852 if (ni->ni_nid == nid) { 853 lnet_ni_addref_locked(ni, cpt); 854 return ni; 855 } 856 } 857 858 return NULL; 859} 860 861int 862lnet_islocalnid(lnet_nid_t nid) 863{ 864 struct lnet_ni *ni; 865 int cpt; 866 867 cpt = lnet_net_lock_current(); 868 ni = lnet_nid2ni_locked(nid, cpt); 869 if (ni != NULL) 870 lnet_ni_decref_locked(ni, cpt); 871 lnet_net_unlock(cpt); 872 873 return ni != NULL; 874} 875 876int 877lnet_count_acceptor_nis(void) 878{ 879 /* Return the # of NIs that need the acceptor. */ 880 int count = 0; 881 struct list_head *tmp; 882 struct lnet_ni *ni; 883 int cpt; 884 885 cpt = lnet_net_lock_current(); 886 list_for_each(tmp, &the_lnet.ln_nis) { 887 ni = list_entry(tmp, lnet_ni_t, ni_list); 888 889 if (ni->ni_lnd->lnd_accept != NULL) 890 count++; 891 } 892 893 lnet_net_unlock(cpt); 894 895 return count; 896} 897 898static int 899lnet_ni_tq_credits(lnet_ni_t *ni) 900{ 901 int credits; 902 903 LASSERT(ni->ni_ncpts >= 1); 904 905 if (ni->ni_ncpts == 1) 906 return ni->ni_maxtxcredits; 907 908 credits = ni->ni_maxtxcredits / ni->ni_ncpts; 909 credits = max(credits, 8 * ni->ni_peertxcredits); 910 credits = min(credits, ni->ni_maxtxcredits); 911 912 return credits; 913} 914 915static void 916lnet_shutdown_lndnis(void) 917{ 918 int i; 919 int islo; 920 lnet_ni_t *ni; 921 922 /* NB called holding the global mutex */ 923 924 /* All quiet on the API front */ 925 LASSERT(!the_lnet.ln_shutdown); 926 LASSERT(the_lnet.ln_refcount == 0); 927 LASSERT(list_empty(&the_lnet.ln_nis_zombie)); 928 929 lnet_net_lock(LNET_LOCK_EX); 930 the_lnet.ln_shutdown = 1; /* flag shutdown */ 931 932 /* Unlink NIs from the global table */ 933 while (!list_empty(&the_lnet.ln_nis)) { 934 ni = list_entry(the_lnet.ln_nis.next, 935 lnet_ni_t, ni_list); 936 /* move it to zombie list and nobody can find it anymore */ 937 list_move(&ni->ni_list, &the_lnet.ln_nis_zombie); 938 lnet_ni_decref_locked(ni, 0); /* drop ln_nis' ref */ 939 940 if (!list_empty(&ni->ni_cptlist)) { 941 list_del_init(&ni->ni_cptlist); 942 lnet_ni_decref_locked(ni, 0); 943 } 944 } 945 946 /* Drop the cached eqwait NI. */ 947 if (the_lnet.ln_eq_waitni != NULL) { 948 lnet_ni_decref_locked(the_lnet.ln_eq_waitni, 0); 949 the_lnet.ln_eq_waitni = NULL; 950 } 951 952 /* Drop the cached loopback NI. */ 953 if (the_lnet.ln_loni != NULL) { 954 lnet_ni_decref_locked(the_lnet.ln_loni, 0); 955 the_lnet.ln_loni = NULL; 956 } 957 958 lnet_net_unlock(LNET_LOCK_EX); 959 960 /* Clear lazy portals and drop delayed messages which hold refs 961 * on their lnet_msg_t::msg_rxpeer */ 962 for (i = 0; i < the_lnet.ln_nportals; i++) 963 LNetClearLazyPortal(i); 964 965 /* Clear the peer table and wait for all peers to go (they hold refs on 966 * their NIs) */ 967 lnet_peer_tables_cleanup(); 968 969 lnet_net_lock(LNET_LOCK_EX); 970 /* Now wait for the NI's I just nuked to show up on ln_zombie_nis 971 * and shut them down in guaranteed thread context */ 972 i = 2; 973 while (!list_empty(&the_lnet.ln_nis_zombie)) { 974 int *ref; 975 int j; 976 977 ni = list_entry(the_lnet.ln_nis_zombie.next, 978 lnet_ni_t, ni_list); 979 list_del_init(&ni->ni_list); 980 cfs_percpt_for_each(ref, j, ni->ni_refs) { 981 if (*ref == 0) 982 continue; 983 /* still busy, add it back to zombie list */ 984 list_add(&ni->ni_list, &the_lnet.ln_nis_zombie); 985 break; 986 } 987 988 if (!list_empty(&ni->ni_list)) { 989 lnet_net_unlock(LNET_LOCK_EX); 990 ++i; 991 if ((i & (-i)) == i) { 992 CDEBUG(D_WARNING, "Waiting for zombie LNI %s\n", 993 libcfs_nid2str(ni->ni_nid)); 994 } 995 set_current_state(TASK_UNINTERRUPTIBLE); 996 schedule_timeout(cfs_time_seconds(1)); 997 lnet_net_lock(LNET_LOCK_EX); 998 continue; 999 } 1000 1001 ni->ni_lnd->lnd_refcount--; 1002 lnet_net_unlock(LNET_LOCK_EX); 1003 1004 islo = ni->ni_lnd->lnd_type == LOLND; 1005 1006 LASSERT(!in_interrupt()); 1007 (ni->ni_lnd->lnd_shutdown)(ni); 1008 1009 /* can't deref lnd anymore now; it might have unregistered 1010 * itself... */ 1011 1012 if (!islo) 1013 CDEBUG(D_LNI, "Removed LNI %s\n", 1014 libcfs_nid2str(ni->ni_nid)); 1015 1016 lnet_ni_free(ni); 1017 i = 2; 1018 1019 lnet_net_lock(LNET_LOCK_EX); 1020 } 1021 1022 the_lnet.ln_shutdown = 0; 1023 lnet_net_unlock(LNET_LOCK_EX); 1024 1025 if (the_lnet.ln_network_tokens != NULL) { 1026 LIBCFS_FREE(the_lnet.ln_network_tokens, 1027 the_lnet.ln_network_tokens_nob); 1028 the_lnet.ln_network_tokens = NULL; 1029 } 1030} 1031 1032static int 1033lnet_startup_lndnis(void) 1034{ 1035 lnd_t *lnd; 1036 struct lnet_ni *ni; 1037 struct lnet_tx_queue *tq; 1038 struct list_head nilist; 1039 int i; 1040 int rc = 0; 1041 int lnd_type; 1042 int nicount = 0; 1043 char *nets = lnet_get_networks(); 1044 1045 INIT_LIST_HEAD(&nilist); 1046 1047 if (nets == NULL) 1048 goto failed; 1049 1050 rc = lnet_parse_networks(&nilist, nets); 1051 if (rc != 0) 1052 goto failed; 1053 1054 while (!list_empty(&nilist)) { 1055 ni = list_entry(nilist.next, lnet_ni_t, ni_list); 1056 lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid)); 1057 1058 LASSERT(libcfs_isknown_lnd(lnd_type)); 1059 1060 if (lnd_type == CIBLND || 1061 lnd_type == OPENIBLND || 1062 lnd_type == IIBLND || 1063 lnd_type == VIBLND) { 1064 CERROR("LND %s obsoleted\n", 1065 libcfs_lnd2str(lnd_type)); 1066 goto failed; 1067 } 1068 1069 LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex); 1070 lnd = lnet_find_lnd_by_type(lnd_type); 1071 1072 if (lnd == NULL) { 1073 LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex); 1074 rc = request_module("%s", 1075 libcfs_lnd2modname(lnd_type)); 1076 LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex); 1077 1078 lnd = lnet_find_lnd_by_type(lnd_type); 1079 if (lnd == NULL) { 1080 LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex); 1081 CERROR("Can't load LND %s, module %s, rc=%d\n", 1082 libcfs_lnd2str(lnd_type), 1083 libcfs_lnd2modname(lnd_type), rc); 1084 goto failed; 1085 } 1086 } 1087 1088 lnet_net_lock(LNET_LOCK_EX); 1089 lnd->lnd_refcount++; 1090 lnet_net_unlock(LNET_LOCK_EX); 1091 1092 ni->ni_lnd = lnd; 1093 1094 rc = (lnd->lnd_startup)(ni); 1095 1096 LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex); 1097 1098 if (rc != 0) { 1099 LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n", 1100 rc, libcfs_lnd2str(lnd->lnd_type)); 1101 lnet_net_lock(LNET_LOCK_EX); 1102 lnd->lnd_refcount--; 1103 lnet_net_unlock(LNET_LOCK_EX); 1104 goto failed; 1105 } 1106 1107 LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL); 1108 1109 list_del(&ni->ni_list); 1110 1111 lnet_net_lock(LNET_LOCK_EX); 1112 /* refcount for ln_nis */ 1113 lnet_ni_addref_locked(ni, 0); 1114 list_add_tail(&ni->ni_list, &the_lnet.ln_nis); 1115 if (ni->ni_cpts != NULL) { 1116 list_add_tail(&ni->ni_cptlist, 1117 &the_lnet.ln_nis_cpt); 1118 lnet_ni_addref_locked(ni, 0); 1119 } 1120 1121 lnet_net_unlock(LNET_LOCK_EX); 1122 1123 if (lnd->lnd_type == LOLND) { 1124 lnet_ni_addref(ni); 1125 LASSERT(the_lnet.ln_loni == NULL); 1126 the_lnet.ln_loni = ni; 1127 continue; 1128 } 1129 1130 if (ni->ni_peertxcredits == 0 || 1131 ni->ni_maxtxcredits == 0) { 1132 LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n", 1133 libcfs_lnd2str(lnd->lnd_type), 1134 ni->ni_peertxcredits == 0 ? 1135 "" : "per-peer "); 1136 goto failed; 1137 } 1138 1139 cfs_percpt_for_each(tq, i, ni->ni_tx_queues) { 1140 tq->tq_credits_min = 1141 tq->tq_credits_max = 1142 tq->tq_credits = lnet_ni_tq_credits(ni); 1143 } 1144 1145 CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n", 1146 libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits, 1147 lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER, 1148 ni->ni_peerrtrcredits, ni->ni_peertimeout); 1149 1150 nicount++; 1151 } 1152 1153 if (the_lnet.ln_eq_waitni != NULL && nicount > 1) { 1154 lnd_type = the_lnet.ln_eq_waitni->ni_lnd->lnd_type; 1155 LCONSOLE_ERROR_MSG(0x109, "LND %s can only run single-network\n", 1156 libcfs_lnd2str(lnd_type)); 1157 goto failed; 1158 } 1159 1160 return 0; 1161 1162 failed: 1163 lnet_shutdown_lndnis(); 1164 1165 while (!list_empty(&nilist)) { 1166 ni = list_entry(nilist.next, lnet_ni_t, ni_list); 1167 list_del(&ni->ni_list); 1168 lnet_ni_free(ni); 1169 } 1170 1171 return -ENETDOWN; 1172} 1173 1174/** 1175 * Initialize LNet library. 1176 * 1177 * Only userspace program needs to call this function - it's automatically 1178 * called in the kernel at module loading time. Caller has to call LNetFini() 1179 * after a call to LNetInit(), if and only if the latter returned 0. It must 1180 * be called exactly once. 1181 * 1182 * \return 0 on success, and -ve on failures. 1183 */ 1184int 1185LNetInit(void) 1186{ 1187 int rc; 1188 1189 lnet_assert_wire_constants(); 1190 LASSERT(!the_lnet.ln_init); 1191 1192 memset(&the_lnet, 0, sizeof(the_lnet)); 1193 1194 /* refer to global cfs_cpt_table for now */ 1195 the_lnet.ln_cpt_table = cfs_cpt_table; 1196 the_lnet.ln_cpt_number = cfs_cpt_number(cfs_cpt_table); 1197 1198 LASSERT(the_lnet.ln_cpt_number > 0); 1199 if (the_lnet.ln_cpt_number > LNET_CPT_MAX) { 1200 /* we are under risk of consuming all lh_cookie */ 1201 CERROR("Can't have %d CPTs for LNet (max allowed is %d), please change setting of CPT-table and retry\n", 1202 the_lnet.ln_cpt_number, LNET_CPT_MAX); 1203 return -1; 1204 } 1205 1206 while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number) 1207 the_lnet.ln_cpt_bits++; 1208 1209 rc = lnet_create_locks(); 1210 if (rc != 0) { 1211 CERROR("Can't create LNet global locks: %d\n", rc); 1212 return -1; 1213 } 1214 1215 the_lnet.ln_refcount = 0; 1216 the_lnet.ln_init = 1; 1217 LNetInvalidateHandle(&the_lnet.ln_rc_eqh); 1218 INIT_LIST_HEAD(&the_lnet.ln_lnds); 1219 INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie); 1220 INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow); 1221 1222 /* The hash table size is the number of bits it takes to express the set 1223 * ln_num_routes, minus 1 (better to under estimate than over so we 1224 * don't waste memory). */ 1225 if (rnet_htable_size <= 0) 1226 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT; 1227 else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX) 1228 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX; 1229 the_lnet.ln_remote_nets_hbits = max_t(int, 1, 1230 order_base_2(rnet_htable_size) - 1); 1231 1232 /* All LNDs apart from the LOLND are in separate modules. They 1233 * register themselves when their module loads, and unregister 1234 * themselves when their module is unloaded. */ 1235 lnet_register_lnd(&the_lolnd); 1236 return 0; 1237} 1238EXPORT_SYMBOL(LNetInit); 1239 1240/** 1241 * Finalize LNet library. 1242 * 1243 * Only userspace program needs to call this function. It can be called 1244 * at most once. 1245 * 1246 * \pre LNetInit() called with success. 1247 * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls. 1248 */ 1249void 1250LNetFini(void) 1251{ 1252 LASSERT(the_lnet.ln_init); 1253 LASSERT(the_lnet.ln_refcount == 0); 1254 1255 while (!list_empty(&the_lnet.ln_lnds)) 1256 lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next, 1257 lnd_t, lnd_list)); 1258 lnet_destroy_locks(); 1259 1260 the_lnet.ln_init = 0; 1261} 1262EXPORT_SYMBOL(LNetFini); 1263 1264/** 1265 * Set LNet PID and start LNet interfaces, routing, and forwarding. 1266 * 1267 * Userspace program should call this after a successful call to LNetInit(). 1268 * Users must call this function at least once before any other functions. 1269 * For each successful call there must be a corresponding call to 1270 * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is 1271 * ignored. 1272 * 1273 * The PID used by LNet may be different from the one requested. 1274 * See LNetGetId(). 1275 * 1276 * \param requested_pid PID requested by the caller. 1277 * 1278 * \return >= 0 on success, and < 0 error code on failures. 1279 */ 1280int 1281LNetNIInit(lnet_pid_t requested_pid) 1282{ 1283 int im_a_router = 0; 1284 int rc; 1285 1286 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex); 1287 1288 LASSERT(the_lnet.ln_init); 1289 CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount); 1290 1291 if (the_lnet.ln_refcount > 0) { 1292 rc = the_lnet.ln_refcount++; 1293 goto out; 1294 } 1295 1296 lnet_get_tunables(); 1297 1298 if (requested_pid == LNET_PID_ANY) { 1299 /* Don't instantiate LNET just for me */ 1300 rc = -ENETDOWN; 1301 goto failed0; 1302 } 1303 1304 rc = lnet_prepare(requested_pid); 1305 if (rc != 0) 1306 goto failed0; 1307 1308 rc = lnet_startup_lndnis(); 1309 if (rc != 0) 1310 goto failed1; 1311 1312 rc = lnet_parse_routes(lnet_get_routes(), &im_a_router); 1313 if (rc != 0) 1314 goto failed2; 1315 1316 rc = lnet_check_routes(); 1317 if (rc != 0) 1318 goto failed2; 1319 1320 rc = lnet_rtrpools_alloc(im_a_router); 1321 if (rc != 0) 1322 goto failed2; 1323 1324 rc = lnet_acceptor_start(); 1325 if (rc != 0) 1326 goto failed2; 1327 1328 the_lnet.ln_refcount = 1; 1329 /* Now I may use my own API functions... */ 1330 1331 /* NB router checker needs the_lnet.ln_ping_info in 1332 * lnet_router_checker -> lnet_update_ni_status_locked */ 1333 rc = lnet_ping_target_init(); 1334 if (rc != 0) 1335 goto failed3; 1336 1337 rc = lnet_router_checker_start(); 1338 if (rc != 0) 1339 goto failed4; 1340 1341 lnet_proc_init(); 1342 goto out; 1343 1344 failed4: 1345 lnet_ping_target_fini(); 1346 failed3: 1347 the_lnet.ln_refcount = 0; 1348 lnet_acceptor_stop(); 1349 failed2: 1350 lnet_destroy_routes(); 1351 lnet_shutdown_lndnis(); 1352 failed1: 1353 lnet_unprepare(); 1354 failed0: 1355 LASSERT(rc < 0); 1356 out: 1357 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex); 1358 return rc; 1359} 1360EXPORT_SYMBOL(LNetNIInit); 1361 1362/** 1363 * Stop LNet interfaces, routing, and forwarding. 1364 * 1365 * Users must call this function once for each successful call to LNetNIInit(). 1366 * Once the LNetNIFini() operation has been started, the results of pending 1367 * API operations are undefined. 1368 * 1369 * \return always 0 for current implementation. 1370 */ 1371int 1372LNetNIFini(void) 1373{ 1374 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex); 1375 1376 LASSERT(the_lnet.ln_init); 1377 LASSERT(the_lnet.ln_refcount > 0); 1378 1379 if (the_lnet.ln_refcount != 1) { 1380 the_lnet.ln_refcount--; 1381 } else { 1382 LASSERT(!the_lnet.ln_niinit_self); 1383 1384 lnet_proc_fini(); 1385 lnet_router_checker_stop(); 1386 lnet_ping_target_fini(); 1387 1388 /* Teardown fns that use my own API functions BEFORE here */ 1389 the_lnet.ln_refcount = 0; 1390 1391 lnet_acceptor_stop(); 1392 lnet_destroy_routes(); 1393 lnet_shutdown_lndnis(); 1394 lnet_unprepare(); 1395 } 1396 1397 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex); 1398 return 0; 1399} 1400EXPORT_SYMBOL(LNetNIFini); 1401 1402/** 1403 * This is an ugly hack to export IOC_LIBCFS_DEBUG_PEER and 1404 * IOC_LIBCFS_PORTALS_COMPATIBILITY commands to users, by tweaking the LNet 1405 * internal ioctl handler. 1406 * 1407 * IOC_LIBCFS_PORTALS_COMPATIBILITY is now deprecated, don't use it. 1408 * 1409 * \param cmd IOC_LIBCFS_DEBUG_PEER to print debugging data about a peer. 1410 * The data will be printed to system console. Don't use it excessively. 1411 * \param arg A pointer to lnet_process_id_t, process ID of the peer. 1412 * 1413 * \return Always return 0 when called by users directly (i.e., not via ioctl). 1414 */ 1415int 1416LNetCtl(unsigned int cmd, void *arg) 1417{ 1418 struct libcfs_ioctl_data *data = arg; 1419 lnet_process_id_t id = {0}; 1420 lnet_ni_t *ni; 1421 int rc; 1422 1423 LASSERT(the_lnet.ln_init); 1424 LASSERT(the_lnet.ln_refcount > 0); 1425 1426 switch (cmd) { 1427 case IOC_LIBCFS_GET_NI: 1428 rc = LNetGetId(data->ioc_count, &id); 1429 data->ioc_nid = id.nid; 1430 return rc; 1431 1432 case IOC_LIBCFS_FAIL_NID: 1433 return lnet_fail_nid(data->ioc_nid, data->ioc_count); 1434 1435 case IOC_LIBCFS_ADD_ROUTE: 1436 rc = lnet_add_route(data->ioc_net, data->ioc_count, 1437 data->ioc_nid, data->ioc_priority); 1438 return (rc != 0) ? rc : lnet_check_routes(); 1439 1440 case IOC_LIBCFS_DEL_ROUTE: 1441 return lnet_del_route(data->ioc_net, data->ioc_nid); 1442 1443 case IOC_LIBCFS_GET_ROUTE: 1444 return lnet_get_route(data->ioc_count, 1445 &data->ioc_net, &data->ioc_count, 1446 &data->ioc_nid, &data->ioc_flags, 1447 &data->ioc_priority); 1448 case IOC_LIBCFS_NOTIFY_ROUTER: 1449 return lnet_notify(NULL, data->ioc_nid, data->ioc_flags, 1450 cfs_time_current() - 1451 cfs_time_seconds(get_seconds() - 1452 (time_t)data->ioc_u64[0])); 1453 1454 case IOC_LIBCFS_PORTALS_COMPATIBILITY: 1455 /* This can be removed once lustre stops calling it */ 1456 return 0; 1457 1458 case IOC_LIBCFS_LNET_DIST: 1459 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]); 1460 if (rc < 0 && rc != -EHOSTUNREACH) 1461 return rc; 1462 1463 data->ioc_u32[0] = rc; 1464 return 0; 1465 1466 case IOC_LIBCFS_TESTPROTOCOMPAT: 1467 lnet_net_lock(LNET_LOCK_EX); 1468 the_lnet.ln_testprotocompat = data->ioc_flags; 1469 lnet_net_unlock(LNET_LOCK_EX); 1470 return 0; 1471 1472 case IOC_LIBCFS_PING: 1473 id.nid = data->ioc_nid; 1474 id.pid = data->ioc_u32[0]; 1475 rc = lnet_ping(id, data->ioc_u32[1], /* timeout */ 1476 (lnet_process_id_t *)data->ioc_pbuf1, 1477 data->ioc_plen1/sizeof(lnet_process_id_t)); 1478 if (rc < 0) 1479 return rc; 1480 data->ioc_count = rc; 1481 return 0; 1482 1483 case IOC_LIBCFS_DEBUG_PEER: { 1484 /* CAVEAT EMPTOR: this one designed for calling directly; not 1485 * via an ioctl */ 1486 id = *((lnet_process_id_t *) arg); 1487 1488 lnet_debug_peer(id.nid); 1489 1490 ni = lnet_net2ni(LNET_NIDNET(id.nid)); 1491 if (ni == NULL) { 1492 CDEBUG(D_WARNING, "No NI for %s\n", libcfs_id2str(id)); 1493 } else { 1494 if (ni->ni_lnd->lnd_ctl == NULL) { 1495 CDEBUG(D_WARNING, "No ctl for %s\n", 1496 libcfs_id2str(id)); 1497 } else { 1498 (void)ni->ni_lnd->lnd_ctl(ni, cmd, arg); 1499 } 1500 1501 lnet_ni_decref(ni); 1502 } 1503 return 0; 1504 } 1505 1506 default: 1507 ni = lnet_net2ni(data->ioc_net); 1508 if (ni == NULL) 1509 return -EINVAL; 1510 1511 if (ni->ni_lnd->lnd_ctl == NULL) 1512 rc = -EINVAL; 1513 else 1514 rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg); 1515 1516 lnet_ni_decref(ni); 1517 return rc; 1518 } 1519 /* not reached */ 1520} 1521EXPORT_SYMBOL(LNetCtl); 1522 1523/** 1524 * Retrieve the lnet_process_id_t ID of LNet interface at \a index. Note that 1525 * all interfaces share a same PID, as requested by LNetNIInit(). 1526 * 1527 * \param index Index of the interface to look up. 1528 * \param id On successful return, this location will hold the 1529 * lnet_process_id_t ID of the interface. 1530 * 1531 * \retval 0 If an interface exists at \a index. 1532 * \retval -ENOENT If no interface has been found. 1533 */ 1534int 1535LNetGetId(unsigned int index, lnet_process_id_t *id) 1536{ 1537 struct lnet_ni *ni; 1538 struct list_head *tmp; 1539 int cpt; 1540 int rc = -ENOENT; 1541 1542 LASSERT(the_lnet.ln_init); 1543 1544 /* LNetNI initilization failed? */ 1545 if (the_lnet.ln_refcount == 0) 1546 return rc; 1547 1548 cpt = lnet_net_lock_current(); 1549 1550 list_for_each(tmp, &the_lnet.ln_nis) { 1551 if (index-- != 0) 1552 continue; 1553 1554 ni = list_entry(tmp, lnet_ni_t, ni_list); 1555 1556 id->nid = ni->ni_nid; 1557 id->pid = the_lnet.ln_pid; 1558 rc = 0; 1559 break; 1560 } 1561 1562 lnet_net_unlock(cpt); 1563 return rc; 1564} 1565EXPORT_SYMBOL(LNetGetId); 1566 1567/** 1568 * Print a string representation of handle \a h into buffer \a str of 1569 * \a len bytes. 1570 */ 1571void 1572LNetSnprintHandle(char *str, int len, lnet_handle_any_t h) 1573{ 1574 snprintf(str, len, "%#llx", h.cookie); 1575} 1576EXPORT_SYMBOL(LNetSnprintHandle); 1577 1578static int 1579lnet_create_ping_info(void) 1580{ 1581 int i; 1582 int n; 1583 int rc; 1584 unsigned int infosz; 1585 lnet_ni_t *ni; 1586 lnet_process_id_t id; 1587 lnet_ping_info_t *pinfo; 1588 1589 for (n = 0; ; n++) { 1590 rc = LNetGetId(n, &id); 1591 if (rc == -ENOENT) 1592 break; 1593 1594 LASSERT(rc == 0); 1595 } 1596 1597 infosz = offsetof(lnet_ping_info_t, pi_ni[n]); 1598 LIBCFS_ALLOC(pinfo, infosz); 1599 if (pinfo == NULL) { 1600 CERROR("Can't allocate ping info[%d]\n", n); 1601 return -ENOMEM; 1602 } 1603 1604 pinfo->pi_nnis = n; 1605 pinfo->pi_pid = the_lnet.ln_pid; 1606 pinfo->pi_magic = LNET_PROTO_PING_MAGIC; 1607 pinfo->pi_features = LNET_PING_FEAT_NI_STATUS; 1608 1609 for (i = 0; i < n; i++) { 1610 lnet_ni_status_t *ns = &pinfo->pi_ni[i]; 1611 1612 rc = LNetGetId(i, &id); 1613 LASSERT(rc == 0); 1614 1615 ns->ns_nid = id.nid; 1616 ns->ns_status = LNET_NI_STATUS_UP; 1617 1618 lnet_net_lock(0); 1619 1620 ni = lnet_nid2ni_locked(id.nid, 0); 1621 LASSERT(ni != NULL); 1622 1623 lnet_ni_lock(ni); 1624 LASSERT(ni->ni_status == NULL); 1625 ni->ni_status = ns; 1626 lnet_ni_unlock(ni); 1627 1628 lnet_ni_decref_locked(ni, 0); 1629 lnet_net_unlock(0); 1630 } 1631 1632 the_lnet.ln_ping_info = pinfo; 1633 return 0; 1634} 1635 1636static void 1637lnet_destroy_ping_info(void) 1638{ 1639 struct lnet_ni *ni; 1640 1641 lnet_net_lock(0); 1642 1643 list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) { 1644 lnet_ni_lock(ni); 1645 ni->ni_status = NULL; 1646 lnet_ni_unlock(ni); 1647 } 1648 1649 lnet_net_unlock(0); 1650 1651 LIBCFS_FREE(the_lnet.ln_ping_info, 1652 offsetof(lnet_ping_info_t, 1653 pi_ni[the_lnet.ln_ping_info->pi_nnis])); 1654 the_lnet.ln_ping_info = NULL; 1655 return; 1656} 1657 1658int 1659lnet_ping_target_init(void) 1660{ 1661 lnet_md_t md = { NULL }; 1662 lnet_handle_me_t meh; 1663 lnet_process_id_t id; 1664 int rc; 1665 int rc2; 1666 int infosz; 1667 1668 rc = lnet_create_ping_info(); 1669 if (rc != 0) 1670 return rc; 1671 1672 /* We can have a tiny EQ since we only need to see the unlink event on 1673 * teardown, which by definition is the last one! */ 1674 rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &the_lnet.ln_ping_target_eq); 1675 if (rc != 0) { 1676 CERROR("Can't allocate ping EQ: %d\n", rc); 1677 goto failed_0; 1678 } 1679 1680 memset(&id, 0, sizeof(lnet_process_id_t)); 1681 id.nid = LNET_NID_ANY; 1682 id.pid = LNET_PID_ANY; 1683 1684 rc = LNetMEAttach(LNET_RESERVED_PORTAL, id, 1685 LNET_PROTO_PING_MATCHBITS, 0, 1686 LNET_UNLINK, LNET_INS_AFTER, 1687 &meh); 1688 if (rc != 0) { 1689 CERROR("Can't create ping ME: %d\n", rc); 1690 goto failed_1; 1691 } 1692 1693 /* initialize md content */ 1694 infosz = offsetof(lnet_ping_info_t, 1695 pi_ni[the_lnet.ln_ping_info->pi_nnis]); 1696 md.start = the_lnet.ln_ping_info; 1697 md.length = infosz; 1698 md.threshold = LNET_MD_THRESH_INF; 1699 md.max_size = 0; 1700 md.options = LNET_MD_OP_GET | LNET_MD_TRUNCATE | 1701 LNET_MD_MANAGE_REMOTE; 1702 md.user_ptr = NULL; 1703 md.eq_handle = the_lnet.ln_ping_target_eq; 1704 1705 rc = LNetMDAttach(meh, md, 1706 LNET_RETAIN, 1707 &the_lnet.ln_ping_target_md); 1708 if (rc != 0) { 1709 CERROR("Can't attach ping MD: %d\n", rc); 1710 goto failed_2; 1711 } 1712 1713 return 0; 1714 1715 failed_2: 1716 rc2 = LNetMEUnlink(meh); 1717 LASSERT(rc2 == 0); 1718 failed_1: 1719 rc2 = LNetEQFree(the_lnet.ln_ping_target_eq); 1720 LASSERT(rc2 == 0); 1721 failed_0: 1722 lnet_destroy_ping_info(); 1723 return rc; 1724} 1725 1726void 1727lnet_ping_target_fini(void) 1728{ 1729 lnet_event_t event; 1730 int rc; 1731 int which; 1732 int timeout_ms = 1000; 1733 sigset_t blocked = cfs_block_allsigs(); 1734 1735 LNetMDUnlink(the_lnet.ln_ping_target_md); 1736 /* NB md could be busy; this just starts the unlink */ 1737 1738 for (;;) { 1739 rc = LNetEQPoll(&the_lnet.ln_ping_target_eq, 1, 1740 timeout_ms, &event, &which); 1741 1742 /* I expect overflow... */ 1743 LASSERT(rc >= 0 || rc == -EOVERFLOW); 1744 1745 if (rc == 0) { 1746 /* timed out: provide a diagnostic */ 1747 CWARN("Still waiting for ping MD to unlink\n"); 1748 timeout_ms *= 2; 1749 continue; 1750 } 1751 1752 /* Got a valid event */ 1753 if (event.unlinked) 1754 break; 1755 } 1756 1757 rc = LNetEQFree(the_lnet.ln_ping_target_eq); 1758 LASSERT(rc == 0); 1759 lnet_destroy_ping_info(); 1760 cfs_restore_sigs(blocked); 1761} 1762 1763int 1764lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_ids) 1765{ 1766 lnet_handle_eq_t eqh; 1767 lnet_handle_md_t mdh; 1768 lnet_event_t event; 1769 lnet_md_t md = { NULL }; 1770 int which; 1771 int unlinked = 0; 1772 int replied = 0; 1773 const int a_long_time = 60000; /* mS */ 1774 int infosz = offsetof(lnet_ping_info_t, pi_ni[n_ids]); 1775 lnet_ping_info_t *info; 1776 lnet_process_id_t tmpid; 1777 int i; 1778 int nob; 1779 int rc; 1780 int rc2; 1781 sigset_t blocked; 1782 1783 if (n_ids <= 0 || 1784 id.nid == LNET_NID_ANY || 1785 timeout_ms > 500000 || /* arbitrary limit! */ 1786 n_ids > 20) /* arbitrary limit! */ 1787 return -EINVAL; 1788 1789 if (id.pid == LNET_PID_ANY) 1790 id.pid = LUSTRE_SRV_LNET_PID; 1791 1792 LIBCFS_ALLOC(info, infosz); 1793 if (info == NULL) 1794 return -ENOMEM; 1795 1796 /* NB 2 events max (including any unlink event) */ 1797 rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh); 1798 if (rc != 0) { 1799 CERROR("Can't allocate EQ: %d\n", rc); 1800 goto out_0; 1801 } 1802 1803 /* initialize md content */ 1804 md.start = info; 1805 md.length = infosz; 1806 md.threshold = 2; /*GET/REPLY*/ 1807 md.max_size = 0; 1808 md.options = LNET_MD_TRUNCATE; 1809 md.user_ptr = NULL; 1810 md.eq_handle = eqh; 1811 1812 rc = LNetMDBind(md, LNET_UNLINK, &mdh); 1813 if (rc != 0) { 1814 CERROR("Can't bind MD: %d\n", rc); 1815 goto out_1; 1816 } 1817 1818 rc = LNetGet(LNET_NID_ANY, mdh, id, 1819 LNET_RESERVED_PORTAL, 1820 LNET_PROTO_PING_MATCHBITS, 0); 1821 1822 if (rc != 0) { 1823 /* Don't CERROR; this could be deliberate! */ 1824 1825 rc2 = LNetMDUnlink(mdh); 1826 LASSERT(rc2 == 0); 1827 1828 /* NB must wait for the UNLINK event below... */ 1829 unlinked = 1; 1830 timeout_ms = a_long_time; 1831 } 1832 1833 do { 1834 /* MUST block for unlink to complete */ 1835 if (unlinked) 1836 blocked = cfs_block_allsigs(); 1837 1838 rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which); 1839 1840 if (unlinked) 1841 cfs_restore_sigs(blocked); 1842 1843 CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2, 1844 (rc2 <= 0) ? -1 : event.type, 1845 (rc2 <= 0) ? -1 : event.status, 1846 (rc2 > 0 && event.unlinked) ? " unlinked" : ""); 1847 1848 LASSERT(rc2 != -EOVERFLOW); /* can't miss anything */ 1849 1850 if (rc2 <= 0 || event.status != 0) { 1851 /* timeout or error */ 1852 if (!replied && rc == 0) 1853 rc = (rc2 < 0) ? rc2 : 1854 (rc2 == 0) ? -ETIMEDOUT : 1855 event.status; 1856 1857 if (!unlinked) { 1858 /* Ensure completion in finite time... */ 1859 LNetMDUnlink(mdh); 1860 /* No assertion (racing with network) */ 1861 unlinked = 1; 1862 timeout_ms = a_long_time; 1863 } else if (rc2 == 0) { 1864 /* timed out waiting for unlink */ 1865 CWARN("ping %s: late network completion\n", 1866 libcfs_id2str(id)); 1867 } 1868 } else if (event.type == LNET_EVENT_REPLY) { 1869 replied = 1; 1870 rc = event.mlength; 1871 } 1872 1873 } while (rc2 <= 0 || !event.unlinked); 1874 1875 if (!replied) { 1876 if (rc >= 0) 1877 CWARN("%s: Unexpected rc >= 0 but no reply!\n", 1878 libcfs_id2str(id)); 1879 rc = -EIO; 1880 goto out_1; 1881 } 1882 1883 nob = rc; 1884 LASSERT(nob >= 0 && nob <= infosz); 1885 1886 rc = -EPROTO; /* if I can't parse... */ 1887 1888 if (nob < 8) { 1889 /* can't check magic/version */ 1890 CERROR("%s: ping info too short %d\n", 1891 libcfs_id2str(id), nob); 1892 goto out_1; 1893 } 1894 1895 if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) { 1896 lnet_swap_pinginfo(info); 1897 } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) { 1898 CERROR("%s: Unexpected magic %08x\n", 1899 libcfs_id2str(id), info->pi_magic); 1900 goto out_1; 1901 } 1902 1903 if ((info->pi_features & LNET_PING_FEAT_NI_STATUS) == 0) { 1904 CERROR("%s: ping w/o NI status: 0x%x\n", 1905 libcfs_id2str(id), info->pi_features); 1906 goto out_1; 1907 } 1908 1909 if (nob < offsetof(lnet_ping_info_t, pi_ni[0])) { 1910 CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id), 1911 nob, (int)offsetof(lnet_ping_info_t, pi_ni[0])); 1912 goto out_1; 1913 } 1914 1915 if (info->pi_nnis < n_ids) 1916 n_ids = info->pi_nnis; 1917 1918 if (nob < offsetof(lnet_ping_info_t, pi_ni[n_ids])) { 1919 CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id), 1920 nob, (int)offsetof(lnet_ping_info_t, pi_ni[n_ids])); 1921 goto out_1; 1922 } 1923 1924 rc = -EFAULT; /* If I SEGV... */ 1925 1926 memset(&tmpid, 0, sizeof(tmpid)); 1927 for (i = 0; i < n_ids; i++) { 1928 tmpid.pid = info->pi_pid; 1929 tmpid.nid = info->pi_ni[i].ns_nid; 1930 if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid))) 1931 goto out_1; 1932 } 1933 rc = info->pi_nnis; 1934 1935 out_1: 1936 rc2 = LNetEQFree(eqh); 1937 if (rc2 != 0) 1938 CERROR("rc2 %d\n", rc2); 1939 LASSERT(rc2 == 0); 1940 1941 out_0: 1942 LIBCFS_FREE(info, infosz); 1943 return rc; 1944} 1945