1/* 2 * GPL HEADER START 3 * 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 only, 8 * as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License version 2 for more details (a copy is included 14 * in the LICENSE file that accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License 17 * version 2 along with this program; If not, see 18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf 19 * 20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 21 * CA 95054 USA or visit www.sun.com if you need additional information or 22 * have any questions. 23 * 24 * GPL HEADER END 25 */ 26/* 27 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 28 * Use is subject to license terms. 29 * 30 * Copyright (c) 2011, 2012, Intel Corporation. 31 */ 32/* 33 * This file is part of Lustre, http://www.lustre.org/ 34 * Lustre is a trademark of Sun Microsystems, Inc. 35 * 36 * lnet/klnds/socklnd/socklnd.c 37 * 38 * Author: Zach Brown <zab@zabbo.net> 39 * Author: Peter J. Braam <braam@clusterfs.com> 40 * Author: Phil Schwan <phil@clusterfs.com> 41 * Author: Eric Barton <eric@bartonsoftware.com> 42 */ 43 44#include "socklnd.h" 45 46static lnd_t the_ksocklnd; 47ksock_nal_data_t ksocknal_data; 48 49static ksock_interface_t * 50ksocknal_ip2iface(lnet_ni_t *ni, __u32 ip) 51{ 52 ksock_net_t *net = ni->ni_data; 53 int i; 54 ksock_interface_t *iface; 55 56 for (i = 0; i < net->ksnn_ninterfaces; i++) { 57 LASSERT(i < LNET_MAX_INTERFACES); 58 iface = &net->ksnn_interfaces[i]; 59 60 if (iface->ksni_ipaddr == ip) 61 return iface; 62 } 63 64 return NULL; 65} 66 67static ksock_route_t * 68ksocknal_create_route (__u32 ipaddr, int port) 69{ 70 ksock_route_t *route; 71 72 LIBCFS_ALLOC (route, sizeof (*route)); 73 if (route == NULL) 74 return NULL; 75 76 atomic_set (&route->ksnr_refcount, 1); 77 route->ksnr_peer = NULL; 78 route->ksnr_retry_interval = 0; /* OK to connect at any time */ 79 route->ksnr_ipaddr = ipaddr; 80 route->ksnr_port = port; 81 route->ksnr_scheduled = 0; 82 route->ksnr_connecting = 0; 83 route->ksnr_connected = 0; 84 route->ksnr_deleted = 0; 85 route->ksnr_conn_count = 0; 86 route->ksnr_share_count = 0; 87 88 return route; 89} 90 91void 92ksocknal_destroy_route (ksock_route_t *route) 93{ 94 LASSERT (atomic_read(&route->ksnr_refcount) == 0); 95 96 if (route->ksnr_peer != NULL) 97 ksocknal_peer_decref(route->ksnr_peer); 98 99 LIBCFS_FREE (route, sizeof (*route)); 100} 101 102static int 103ksocknal_create_peer (ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id) 104{ 105 ksock_net_t *net = ni->ni_data; 106 ksock_peer_t *peer; 107 108 LASSERT (id.nid != LNET_NID_ANY); 109 LASSERT (id.pid != LNET_PID_ANY); 110 LASSERT (!in_interrupt()); 111 112 LIBCFS_ALLOC (peer, sizeof (*peer)); 113 if (peer == NULL) 114 return -ENOMEM; 115 116 peer->ksnp_ni = ni; 117 peer->ksnp_id = id; 118 atomic_set (&peer->ksnp_refcount, 1); /* 1 ref for caller */ 119 peer->ksnp_closing = 0; 120 peer->ksnp_accepting = 0; 121 peer->ksnp_proto = NULL; 122 peer->ksnp_last_alive = 0; 123 peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1; 124 125 INIT_LIST_HEAD (&peer->ksnp_conns); 126 INIT_LIST_HEAD (&peer->ksnp_routes); 127 INIT_LIST_HEAD (&peer->ksnp_tx_queue); 128 INIT_LIST_HEAD (&peer->ksnp_zc_req_list); 129 spin_lock_init(&peer->ksnp_lock); 130 131 spin_lock_bh(&net->ksnn_lock); 132 133 if (net->ksnn_shutdown) { 134 spin_unlock_bh(&net->ksnn_lock); 135 136 LIBCFS_FREE(peer, sizeof(*peer)); 137 CERROR("Can't create peer: network shutdown\n"); 138 return -ESHUTDOWN; 139 } 140 141 net->ksnn_npeers++; 142 143 spin_unlock_bh(&net->ksnn_lock); 144 145 *peerp = peer; 146 return 0; 147} 148 149void 150ksocknal_destroy_peer (ksock_peer_t *peer) 151{ 152 ksock_net_t *net = peer->ksnp_ni->ni_data; 153 154 CDEBUG (D_NET, "peer %s %p deleted\n", 155 libcfs_id2str(peer->ksnp_id), peer); 156 157 LASSERT (atomic_read (&peer->ksnp_refcount) == 0); 158 LASSERT (peer->ksnp_accepting == 0); 159 LASSERT (list_empty (&peer->ksnp_conns)); 160 LASSERT (list_empty (&peer->ksnp_routes)); 161 LASSERT (list_empty (&peer->ksnp_tx_queue)); 162 LASSERT (list_empty (&peer->ksnp_zc_req_list)); 163 164 LIBCFS_FREE (peer, sizeof (*peer)); 165 166 /* NB a peer's connections and routes keep a reference on their peer 167 * until they are destroyed, so we can be assured that _all_ state to 168 * do with this peer has been cleaned up when its refcount drops to 169 * zero. */ 170 spin_lock_bh(&net->ksnn_lock); 171 net->ksnn_npeers--; 172 spin_unlock_bh(&net->ksnn_lock); 173} 174 175ksock_peer_t * 176ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id) 177{ 178 struct list_head *peer_list = ksocknal_nid2peerlist(id.nid); 179 struct list_head *tmp; 180 ksock_peer_t *peer; 181 182 list_for_each (tmp, peer_list) { 183 184 peer = list_entry (tmp, ksock_peer_t, ksnp_list); 185 186 LASSERT (!peer->ksnp_closing); 187 188 if (peer->ksnp_ni != ni) 189 continue; 190 191 if (peer->ksnp_id.nid != id.nid || 192 peer->ksnp_id.pid != id.pid) 193 continue; 194 195 CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n", 196 peer, libcfs_id2str(id), 197 atomic_read(&peer->ksnp_refcount)); 198 return peer; 199 } 200 return NULL; 201} 202 203ksock_peer_t * 204ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id) 205{ 206 ksock_peer_t *peer; 207 208 read_lock(&ksocknal_data.ksnd_global_lock); 209 peer = ksocknal_find_peer_locked(ni, id); 210 if (peer != NULL) /* +1 ref for caller? */ 211 ksocknal_peer_addref(peer); 212 read_unlock(&ksocknal_data.ksnd_global_lock); 213 214 return peer; 215} 216 217static void 218ksocknal_unlink_peer_locked (ksock_peer_t *peer) 219{ 220 int i; 221 __u32 ip; 222 ksock_interface_t *iface; 223 224 for (i = 0; i < peer->ksnp_n_passive_ips; i++) { 225 LASSERT (i < LNET_MAX_INTERFACES); 226 ip = peer->ksnp_passive_ips[i]; 227 228 iface = ksocknal_ip2iface(peer->ksnp_ni, ip); 229 /* All IPs in peer->ksnp_passive_ips[] come from the 230 * interface list, therefore the call must succeed. */ 231 LASSERT (iface != NULL); 232 233 CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n", 234 peer, iface, iface->ksni_nroutes); 235 iface->ksni_npeers--; 236 } 237 238 LASSERT (list_empty(&peer->ksnp_conns)); 239 LASSERT (list_empty(&peer->ksnp_routes)); 240 LASSERT (!peer->ksnp_closing); 241 peer->ksnp_closing = 1; 242 list_del (&peer->ksnp_list); 243 /* lose peerlist's ref */ 244 ksocknal_peer_decref(peer); 245} 246 247static int 248ksocknal_get_peer_info (lnet_ni_t *ni, int index, 249 lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip, 250 int *port, int *conn_count, int *share_count) 251{ 252 ksock_peer_t *peer; 253 struct list_head *ptmp; 254 ksock_route_t *route; 255 struct list_head *rtmp; 256 int i; 257 int j; 258 int rc = -ENOENT; 259 260 read_lock(&ksocknal_data.ksnd_global_lock); 261 262 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { 263 264 list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) { 265 peer = list_entry (ptmp, ksock_peer_t, ksnp_list); 266 267 if (peer->ksnp_ni != ni) 268 continue; 269 270 if (peer->ksnp_n_passive_ips == 0 && 271 list_empty(&peer->ksnp_routes)) { 272 if (index-- > 0) 273 continue; 274 275 *id = peer->ksnp_id; 276 *myip = 0; 277 *peer_ip = 0; 278 *port = 0; 279 *conn_count = 0; 280 *share_count = 0; 281 rc = 0; 282 goto out; 283 } 284 285 for (j = 0; j < peer->ksnp_n_passive_ips; j++) { 286 if (index-- > 0) 287 continue; 288 289 *id = peer->ksnp_id; 290 *myip = peer->ksnp_passive_ips[j]; 291 *peer_ip = 0; 292 *port = 0; 293 *conn_count = 0; 294 *share_count = 0; 295 rc = 0; 296 goto out; 297 } 298 299 list_for_each (rtmp, &peer->ksnp_routes) { 300 if (index-- > 0) 301 continue; 302 303 route = list_entry(rtmp, ksock_route_t, 304 ksnr_list); 305 306 *id = peer->ksnp_id; 307 *myip = route->ksnr_myipaddr; 308 *peer_ip = route->ksnr_ipaddr; 309 *port = route->ksnr_port; 310 *conn_count = route->ksnr_conn_count; 311 *share_count = route->ksnr_share_count; 312 rc = 0; 313 goto out; 314 } 315 } 316 } 317 out: 318 read_unlock(&ksocknal_data.ksnd_global_lock); 319 return rc; 320} 321 322static void 323ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn) 324{ 325 ksock_peer_t *peer = route->ksnr_peer; 326 int type = conn->ksnc_type; 327 ksock_interface_t *iface; 328 329 conn->ksnc_route = route; 330 ksocknal_route_addref(route); 331 332 if (route->ksnr_myipaddr != conn->ksnc_myipaddr) { 333 if (route->ksnr_myipaddr == 0) { 334 /* route wasn't bound locally yet (the initial route) */ 335 CDEBUG(D_NET, "Binding %s %pI4h to %pI4h\n", 336 libcfs_id2str(peer->ksnp_id), 337 &route->ksnr_ipaddr, 338 &conn->ksnc_myipaddr); 339 } else { 340 CDEBUG(D_NET, "Rebinding %s %pI4h from " 341 "%pI4h to %pI4h\n", 342 libcfs_id2str(peer->ksnp_id), 343 &route->ksnr_ipaddr, 344 &route->ksnr_myipaddr, 345 &conn->ksnc_myipaddr); 346 347 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, 348 route->ksnr_myipaddr); 349 if (iface != NULL) 350 iface->ksni_nroutes--; 351 } 352 route->ksnr_myipaddr = conn->ksnc_myipaddr; 353 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, 354 route->ksnr_myipaddr); 355 if (iface != NULL) 356 iface->ksni_nroutes++; 357 } 358 359 route->ksnr_connected |= (1<<type); 360 route->ksnr_conn_count++; 361 362 /* Successful connection => further attempts can 363 * proceed immediately */ 364 route->ksnr_retry_interval = 0; 365} 366 367static void 368ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route) 369{ 370 struct list_head *tmp; 371 ksock_conn_t *conn; 372 ksock_route_t *route2; 373 374 LASSERT (!peer->ksnp_closing); 375 LASSERT (route->ksnr_peer == NULL); 376 LASSERT (!route->ksnr_scheduled); 377 LASSERT (!route->ksnr_connecting); 378 LASSERT (route->ksnr_connected == 0); 379 380 /* LASSERT(unique) */ 381 list_for_each(tmp, &peer->ksnp_routes) { 382 route2 = list_entry(tmp, ksock_route_t, ksnr_list); 383 384 if (route2->ksnr_ipaddr == route->ksnr_ipaddr) { 385 CERROR("Duplicate route %s %pI4h\n", 386 libcfs_id2str(peer->ksnp_id), 387 &route->ksnr_ipaddr); 388 LBUG(); 389 } 390 } 391 392 route->ksnr_peer = peer; 393 ksocknal_peer_addref(peer); 394 /* peer's routelist takes over my ref on 'route' */ 395 list_add_tail(&route->ksnr_list, &peer->ksnp_routes); 396 397 list_for_each(tmp, &peer->ksnp_conns) { 398 conn = list_entry(tmp, ksock_conn_t, ksnc_list); 399 400 if (conn->ksnc_ipaddr != route->ksnr_ipaddr) 401 continue; 402 403 ksocknal_associate_route_conn_locked(route, conn); 404 /* keep going (typed routes) */ 405 } 406} 407 408static void 409ksocknal_del_route_locked (ksock_route_t *route) 410{ 411 ksock_peer_t *peer = route->ksnr_peer; 412 ksock_interface_t *iface; 413 ksock_conn_t *conn; 414 struct list_head *ctmp; 415 struct list_head *cnxt; 416 417 LASSERT (!route->ksnr_deleted); 418 419 /* Close associated conns */ 420 list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { 421 conn = list_entry(ctmp, ksock_conn_t, ksnc_list); 422 423 if (conn->ksnc_route != route) 424 continue; 425 426 ksocknal_close_conn_locked (conn, 0); 427 } 428 429 if (route->ksnr_myipaddr != 0) { 430 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, 431 route->ksnr_myipaddr); 432 if (iface != NULL) 433 iface->ksni_nroutes--; 434 } 435 436 route->ksnr_deleted = 1; 437 list_del (&route->ksnr_list); 438 ksocknal_route_decref(route); /* drop peer's ref */ 439 440 if (list_empty (&peer->ksnp_routes) && 441 list_empty (&peer->ksnp_conns)) { 442 /* I've just removed the last route to a peer with no active 443 * connections */ 444 ksocknal_unlink_peer_locked (peer); 445 } 446} 447 448int 449ksocknal_add_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port) 450{ 451 struct list_head *tmp; 452 ksock_peer_t *peer; 453 ksock_peer_t *peer2; 454 ksock_route_t *route; 455 ksock_route_t *route2; 456 int rc; 457 458 if (id.nid == LNET_NID_ANY || 459 id.pid == LNET_PID_ANY) 460 return -EINVAL; 461 462 /* Have a brand new peer ready... */ 463 rc = ksocknal_create_peer(&peer, ni, id); 464 if (rc != 0) 465 return rc; 466 467 route = ksocknal_create_route (ipaddr, port); 468 if (route == NULL) { 469 ksocknal_peer_decref(peer); 470 return -ENOMEM; 471 } 472 473 write_lock_bh(&ksocknal_data.ksnd_global_lock); 474 475 /* always called with a ref on ni, so shutdown can't have started */ 476 LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0); 477 478 peer2 = ksocknal_find_peer_locked (ni, id); 479 if (peer2 != NULL) { 480 ksocknal_peer_decref(peer); 481 peer = peer2; 482 } else { 483 /* peer table takes my ref on peer */ 484 list_add_tail (&peer->ksnp_list, 485 ksocknal_nid2peerlist (id.nid)); 486 } 487 488 route2 = NULL; 489 list_for_each (tmp, &peer->ksnp_routes) { 490 route2 = list_entry(tmp, ksock_route_t, ksnr_list); 491 492 if (route2->ksnr_ipaddr == ipaddr) 493 break; 494 495 route2 = NULL; 496 } 497 if (route2 == NULL) { 498 ksocknal_add_route_locked(peer, route); 499 route->ksnr_share_count++; 500 } else { 501 ksocknal_route_decref(route); 502 route2->ksnr_share_count++; 503 } 504 505 write_unlock_bh(&ksocknal_data.ksnd_global_lock); 506 507 return 0; 508} 509 510static void 511ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip) 512{ 513 ksock_conn_t *conn; 514 ksock_route_t *route; 515 struct list_head *tmp; 516 struct list_head *nxt; 517 int nshared; 518 519 LASSERT (!peer->ksnp_closing); 520 521 /* Extra ref prevents peer disappearing until I'm done with it */ 522 ksocknal_peer_addref(peer); 523 524 list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { 525 route = list_entry(tmp, ksock_route_t, ksnr_list); 526 527 /* no match */ 528 if (!(ip == 0 || route->ksnr_ipaddr == ip)) 529 continue; 530 531 route->ksnr_share_count = 0; 532 /* This deletes associated conns too */ 533 ksocknal_del_route_locked (route); 534 } 535 536 nshared = 0; 537 list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { 538 route = list_entry(tmp, ksock_route_t, ksnr_list); 539 nshared += route->ksnr_share_count; 540 } 541 542 if (nshared == 0) { 543 /* remove everything else if there are no explicit entries 544 * left */ 545 546 list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { 547 route = list_entry(tmp, ksock_route_t, ksnr_list); 548 549 /* we should only be removing auto-entries */ 550 LASSERT(route->ksnr_share_count == 0); 551 ksocknal_del_route_locked (route); 552 } 553 554 list_for_each_safe (tmp, nxt, &peer->ksnp_conns) { 555 conn = list_entry(tmp, ksock_conn_t, ksnc_list); 556 557 ksocknal_close_conn_locked(conn, 0); 558 } 559 } 560 561 ksocknal_peer_decref(peer); 562 /* NB peer unlinks itself when last conn/route is removed */ 563} 564 565static int 566ksocknal_del_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ip) 567{ 568 LIST_HEAD (zombies); 569 struct list_head *ptmp; 570 struct list_head *pnxt; 571 ksock_peer_t *peer; 572 int lo; 573 int hi; 574 int i; 575 int rc = -ENOENT; 576 577 write_lock_bh(&ksocknal_data.ksnd_global_lock); 578 579 if (id.nid != LNET_NID_ANY) 580 lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); 581 else { 582 lo = 0; 583 hi = ksocknal_data.ksnd_peer_hash_size - 1; 584 } 585 586 for (i = lo; i <= hi; i++) { 587 list_for_each_safe (ptmp, pnxt, 588 &ksocknal_data.ksnd_peers[i]) { 589 peer = list_entry (ptmp, ksock_peer_t, ksnp_list); 590 591 if (peer->ksnp_ni != ni) 592 continue; 593 594 if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) && 595 (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid))) 596 continue; 597 598 ksocknal_peer_addref(peer); /* a ref for me... */ 599 600 ksocknal_del_peer_locked (peer, ip); 601 602 if (peer->ksnp_closing && 603 !list_empty(&peer->ksnp_tx_queue)) { 604 LASSERT (list_empty(&peer->ksnp_conns)); 605 LASSERT (list_empty(&peer->ksnp_routes)); 606 607 list_splice_init(&peer->ksnp_tx_queue, 608 &zombies); 609 } 610 611 ksocknal_peer_decref(peer); /* ...till here */ 612 613 rc = 0; /* matched! */ 614 } 615 } 616 617 write_unlock_bh(&ksocknal_data.ksnd_global_lock); 618 619 ksocknal_txlist_done(ni, &zombies, 1); 620 621 return rc; 622} 623 624static ksock_conn_t * 625ksocknal_get_conn_by_idx (lnet_ni_t *ni, int index) 626{ 627 ksock_peer_t *peer; 628 struct list_head *ptmp; 629 ksock_conn_t *conn; 630 struct list_head *ctmp; 631 int i; 632 633 read_lock(&ksocknal_data.ksnd_global_lock); 634 635 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { 636 list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) { 637 peer = list_entry (ptmp, ksock_peer_t, ksnp_list); 638 639 LASSERT (!peer->ksnp_closing); 640 641 if (peer->ksnp_ni != ni) 642 continue; 643 644 list_for_each (ctmp, &peer->ksnp_conns) { 645 if (index-- > 0) 646 continue; 647 648 conn = list_entry (ctmp, ksock_conn_t, 649 ksnc_list); 650 ksocknal_conn_addref(conn); 651 read_unlock(&ksocknal_data.ksnd_global_lock); 652 return conn; 653 } 654 } 655 } 656 657 read_unlock(&ksocknal_data.ksnd_global_lock); 658 return NULL; 659} 660 661static ksock_sched_t * 662ksocknal_choose_scheduler_locked(unsigned int cpt) 663{ 664 struct ksock_sched_info *info = ksocknal_data.ksnd_sched_info[cpt]; 665 ksock_sched_t *sched; 666 int i; 667 668 LASSERT(info->ksi_nthreads > 0); 669 670 sched = &info->ksi_scheds[0]; 671 /* 672 * NB: it's safe so far, but info->ksi_nthreads could be changed 673 * at runtime when we have dynamic LNet configuration, then we 674 * need to take care of this. 675 */ 676 for (i = 1; i < info->ksi_nthreads; i++) { 677 if (sched->kss_nconns > info->ksi_scheds[i].kss_nconns) 678 sched = &info->ksi_scheds[i]; 679 } 680 681 return sched; 682} 683 684static int 685ksocknal_local_ipvec (lnet_ni_t *ni, __u32 *ipaddrs) 686{ 687 ksock_net_t *net = ni->ni_data; 688 int i; 689 int nip; 690 691 read_lock(&ksocknal_data.ksnd_global_lock); 692 693 nip = net->ksnn_ninterfaces; 694 LASSERT (nip <= LNET_MAX_INTERFACES); 695 696 /* Only offer interfaces for additional connections if I have 697 * more than one. */ 698 if (nip < 2) { 699 read_unlock(&ksocknal_data.ksnd_global_lock); 700 return 0; 701 } 702 703 for (i = 0; i < nip; i++) { 704 ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr; 705 LASSERT (ipaddrs[i] != 0); 706 } 707 708 read_unlock(&ksocknal_data.ksnd_global_lock); 709 return nip; 710} 711 712static int 713ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips) 714{ 715 int best_netmatch = 0; 716 int best_xor = 0; 717 int best = -1; 718 int this_xor; 719 int this_netmatch; 720 int i; 721 722 for (i = 0; i < nips; i++) { 723 if (ips[i] == 0) 724 continue; 725 726 this_xor = (ips[i] ^ iface->ksni_ipaddr); 727 this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0; 728 729 if (!(best < 0 || 730 best_netmatch < this_netmatch || 731 (best_netmatch == this_netmatch && 732 best_xor > this_xor))) 733 continue; 734 735 best = i; 736 best_netmatch = this_netmatch; 737 best_xor = this_xor; 738 } 739 740 LASSERT (best >= 0); 741 return best; 742} 743 744static int 745ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips) 746{ 747 rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; 748 ksock_net_t *net = peer->ksnp_ni->ni_data; 749 ksock_interface_t *iface; 750 ksock_interface_t *best_iface; 751 int n_ips; 752 int i; 753 int j; 754 int k; 755 __u32 ip; 756 __u32 xor; 757 int this_netmatch; 758 int best_netmatch; 759 int best_npeers; 760 761 /* CAVEAT EMPTOR: We do all our interface matching with an 762 * exclusive hold of global lock at IRQ priority. We're only 763 * expecting to be dealing with small numbers of interfaces, so the 764 * O(n**3)-ness shouldn't matter */ 765 766 /* Also note that I'm not going to return more than n_peerips 767 * interfaces, even if I have more myself */ 768 769 write_lock_bh(global_lock); 770 771 LASSERT (n_peerips <= LNET_MAX_INTERFACES); 772 LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES); 773 774 /* Only match interfaces for additional connections 775 * if I have > 1 interface */ 776 n_ips = (net->ksnn_ninterfaces < 2) ? 0 : 777 MIN(n_peerips, net->ksnn_ninterfaces); 778 779 for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) { 780 /* ^ yes really... */ 781 782 /* If we have any new interfaces, first tick off all the 783 * peer IPs that match old interfaces, then choose new 784 * interfaces to match the remaining peer IPS. 785 * We don't forget interfaces we've stopped using; we might 786 * start using them again... */ 787 788 if (i < peer->ksnp_n_passive_ips) { 789 /* Old interface. */ 790 ip = peer->ksnp_passive_ips[i]; 791 best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip); 792 793 } else { 794 /* choose a new interface */ 795 LASSERT (i == peer->ksnp_n_passive_ips); 796 797 best_iface = NULL; 798 best_netmatch = 0; 799 best_npeers = 0; 800 801 for (j = 0; j < net->ksnn_ninterfaces; j++) { 802 iface = &net->ksnn_interfaces[j]; 803 ip = iface->ksni_ipaddr; 804 805 for (k = 0; k < peer->ksnp_n_passive_ips; k++) 806 if (peer->ksnp_passive_ips[k] == ip) 807 break; 808 809 if (k < peer->ksnp_n_passive_ips) /* using it already */ 810 continue; 811 812 k = ksocknal_match_peerip(iface, peerips, n_peerips); 813 xor = (ip ^ peerips[k]); 814 this_netmatch = ((xor & iface->ksni_netmask) == 0) ? 1 : 0; 815 816 if (!(best_iface == NULL || 817 best_netmatch < this_netmatch || 818 (best_netmatch == this_netmatch && 819 best_npeers > iface->ksni_npeers))) 820 continue; 821 822 best_iface = iface; 823 best_netmatch = this_netmatch; 824 best_npeers = iface->ksni_npeers; 825 } 826 827 best_iface->ksni_npeers++; 828 ip = best_iface->ksni_ipaddr; 829 peer->ksnp_passive_ips[i] = ip; 830 peer->ksnp_n_passive_ips = i+1; 831 } 832 833 /* mark the best matching peer IP used */ 834 j = ksocknal_match_peerip(best_iface, peerips, n_peerips); 835 peerips[j] = 0; 836 } 837 838 /* Overwrite input peer IP addresses */ 839 memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips)); 840 841 write_unlock_bh(global_lock); 842 843 return n_ips; 844} 845 846static void 847ksocknal_create_routes(ksock_peer_t *peer, int port, 848 __u32 *peer_ipaddrs, int npeer_ipaddrs) 849{ 850 ksock_route_t *newroute = NULL; 851 rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; 852 lnet_ni_t *ni = peer->ksnp_ni; 853 ksock_net_t *net = ni->ni_data; 854 struct list_head *rtmp; 855 ksock_route_t *route; 856 ksock_interface_t *iface; 857 ksock_interface_t *best_iface; 858 int best_netmatch; 859 int this_netmatch; 860 int best_nroutes; 861 int i; 862 int j; 863 864 /* CAVEAT EMPTOR: We do all our interface matching with an 865 * exclusive hold of global lock at IRQ priority. We're only 866 * expecting to be dealing with small numbers of interfaces, so the 867 * O(n**3)-ness here shouldn't matter */ 868 869 write_lock_bh(global_lock); 870 871 if (net->ksnn_ninterfaces < 2) { 872 /* Only create additional connections 873 * if I have > 1 interface */ 874 write_unlock_bh(global_lock); 875 return; 876 } 877 878 LASSERT (npeer_ipaddrs <= LNET_MAX_INTERFACES); 879 880 for (i = 0; i < npeer_ipaddrs; i++) { 881 if (newroute != NULL) { 882 newroute->ksnr_ipaddr = peer_ipaddrs[i]; 883 } else { 884 write_unlock_bh(global_lock); 885 886 newroute = ksocknal_create_route(peer_ipaddrs[i], port); 887 if (newroute == NULL) 888 return; 889 890 write_lock_bh(global_lock); 891 } 892 893 if (peer->ksnp_closing) { 894 /* peer got closed under me */ 895 break; 896 } 897 898 /* Already got a route? */ 899 route = NULL; 900 list_for_each(rtmp, &peer->ksnp_routes) { 901 route = list_entry(rtmp, ksock_route_t, ksnr_list); 902 903 if (route->ksnr_ipaddr == newroute->ksnr_ipaddr) 904 break; 905 906 route = NULL; 907 } 908 if (route != NULL) 909 continue; 910 911 best_iface = NULL; 912 best_nroutes = 0; 913 best_netmatch = 0; 914 915 LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES); 916 917 /* Select interface to connect from */ 918 for (j = 0; j < net->ksnn_ninterfaces; j++) { 919 iface = &net->ksnn_interfaces[j]; 920 921 /* Using this interface already? */ 922 list_for_each(rtmp, &peer->ksnp_routes) { 923 route = list_entry(rtmp, ksock_route_t, 924 ksnr_list); 925 926 if (route->ksnr_myipaddr == iface->ksni_ipaddr) 927 break; 928 929 route = NULL; 930 } 931 if (route != NULL) 932 continue; 933 934 this_netmatch = (((iface->ksni_ipaddr ^ 935 newroute->ksnr_ipaddr) & 936 iface->ksni_netmask) == 0) ? 1 : 0; 937 938 if (!(best_iface == NULL || 939 best_netmatch < this_netmatch || 940 (best_netmatch == this_netmatch && 941 best_nroutes > iface->ksni_nroutes))) 942 continue; 943 944 best_iface = iface; 945 best_netmatch = this_netmatch; 946 best_nroutes = iface->ksni_nroutes; 947 } 948 949 if (best_iface == NULL) 950 continue; 951 952 newroute->ksnr_myipaddr = best_iface->ksni_ipaddr; 953 best_iface->ksni_nroutes++; 954 955 ksocknal_add_route_locked(peer, newroute); 956 newroute = NULL; 957 } 958 959 write_unlock_bh(global_lock); 960 if (newroute != NULL) 961 ksocknal_route_decref(newroute); 962} 963 964int 965ksocknal_accept (lnet_ni_t *ni, struct socket *sock) 966{ 967 ksock_connreq_t *cr; 968 int rc; 969 __u32 peer_ip; 970 int peer_port; 971 972 rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port); 973 LASSERT (rc == 0); /* we succeeded before */ 974 975 LIBCFS_ALLOC(cr, sizeof(*cr)); 976 if (cr == NULL) { 977 LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from " 978 "%pI4h: memory exhausted\n", 979 &peer_ip); 980 return -ENOMEM; 981 } 982 983 lnet_ni_addref(ni); 984 cr->ksncr_ni = ni; 985 cr->ksncr_sock = sock; 986 987 spin_lock_bh(&ksocknal_data.ksnd_connd_lock); 988 989 list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs); 990 wake_up(&ksocknal_data.ksnd_connd_waitq); 991 992 spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); 993 return 0; 994} 995 996static int 997ksocknal_connecting (ksock_peer_t *peer, __u32 ipaddr) 998{ 999 ksock_route_t *route; 1000 1001 list_for_each_entry (route, &peer->ksnp_routes, ksnr_list) { 1002 1003 if (route->ksnr_ipaddr == ipaddr) 1004 return route->ksnr_connecting; 1005 } 1006 return 0; 1007} 1008 1009int 1010ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, 1011 struct socket *sock, int type) 1012{ 1013 rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; 1014 LIST_HEAD (zombies); 1015 lnet_process_id_t peerid; 1016 struct list_head *tmp; 1017 __u64 incarnation; 1018 ksock_conn_t *conn; 1019 ksock_conn_t *conn2; 1020 ksock_peer_t *peer = NULL; 1021 ksock_peer_t *peer2; 1022 ksock_sched_t *sched; 1023 ksock_hello_msg_t *hello; 1024 int cpt; 1025 ksock_tx_t *tx; 1026 ksock_tx_t *txtmp; 1027 int rc; 1028 int active; 1029 char *warn = NULL; 1030 1031 active = (route != NULL); 1032 1033 LASSERT (active == (type != SOCKLND_CONN_NONE)); 1034 1035 LIBCFS_ALLOC(conn, sizeof(*conn)); 1036 if (conn == NULL) { 1037 rc = -ENOMEM; 1038 goto failed_0; 1039 } 1040 1041 conn->ksnc_peer = NULL; 1042 conn->ksnc_route = NULL; 1043 conn->ksnc_sock = sock; 1044 /* 2 ref, 1 for conn, another extra ref prevents socket 1045 * being closed before establishment of connection */ 1046 atomic_set (&conn->ksnc_sock_refcount, 2); 1047 conn->ksnc_type = type; 1048 ksocknal_lib_save_callback(sock, conn); 1049 atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */ 1050 1051 conn->ksnc_rx_ready = 0; 1052 conn->ksnc_rx_scheduled = 0; 1053 1054 INIT_LIST_HEAD (&conn->ksnc_tx_queue); 1055 conn->ksnc_tx_ready = 0; 1056 conn->ksnc_tx_scheduled = 0; 1057 conn->ksnc_tx_carrier = NULL; 1058 atomic_set (&conn->ksnc_tx_nob, 0); 1059 1060 LIBCFS_ALLOC(hello, offsetof(ksock_hello_msg_t, 1061 kshm_ips[LNET_MAX_INTERFACES])); 1062 if (hello == NULL) { 1063 rc = -ENOMEM; 1064 goto failed_1; 1065 } 1066 1067 /* stash conn's local and remote addrs */ 1068 rc = ksocknal_lib_get_conn_addrs (conn); 1069 if (rc != 0) 1070 goto failed_1; 1071 1072 /* Find out/confirm peer's NID and connection type and get the 1073 * vector of interfaces she's willing to let me connect to. 1074 * Passive connections use the listener timeout since the peer sends 1075 * eagerly */ 1076 1077 if (active) { 1078 peer = route->ksnr_peer; 1079 LASSERT(ni == peer->ksnp_ni); 1080 1081 /* Active connection sends HELLO eagerly */ 1082 hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips); 1083 peerid = peer->ksnp_id; 1084 1085 write_lock_bh(global_lock); 1086 conn->ksnc_proto = peer->ksnp_proto; 1087 write_unlock_bh(global_lock); 1088 1089 if (conn->ksnc_proto == NULL) { 1090 conn->ksnc_proto = &ksocknal_protocol_v3x; 1091#if SOCKNAL_VERSION_DEBUG 1092 if (*ksocknal_tunables.ksnd_protocol == 2) 1093 conn->ksnc_proto = &ksocknal_protocol_v2x; 1094 else if (*ksocknal_tunables.ksnd_protocol == 1) 1095 conn->ksnc_proto = &ksocknal_protocol_v1x; 1096#endif 1097 } 1098 1099 rc = ksocknal_send_hello (ni, conn, peerid.nid, hello); 1100 if (rc != 0) 1101 goto failed_1; 1102 } else { 1103 peerid.nid = LNET_NID_ANY; 1104 peerid.pid = LNET_PID_ANY; 1105 1106 /* Passive, get protocol from peer */ 1107 conn->ksnc_proto = NULL; 1108 } 1109 1110 rc = ksocknal_recv_hello (ni, conn, hello, &peerid, &incarnation); 1111 if (rc < 0) 1112 goto failed_1; 1113 1114 LASSERT (rc == 0 || active); 1115 LASSERT (conn->ksnc_proto != NULL); 1116 LASSERT (peerid.nid != LNET_NID_ANY); 1117 1118 cpt = lnet_cpt_of_nid(peerid.nid); 1119 1120 if (active) { 1121 ksocknal_peer_addref(peer); 1122 write_lock_bh(global_lock); 1123 } else { 1124 rc = ksocknal_create_peer(&peer, ni, peerid); 1125 if (rc != 0) 1126 goto failed_1; 1127 1128 write_lock_bh(global_lock); 1129 1130 /* called with a ref on ni, so shutdown can't have started */ 1131 LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0); 1132 1133 peer2 = ksocknal_find_peer_locked(ni, peerid); 1134 if (peer2 == NULL) { 1135 /* NB this puts an "empty" peer in the peer 1136 * table (which takes my ref) */ 1137 list_add_tail(&peer->ksnp_list, 1138 ksocknal_nid2peerlist(peerid.nid)); 1139 } else { 1140 ksocknal_peer_decref(peer); 1141 peer = peer2; 1142 } 1143 1144 /* +1 ref for me */ 1145 ksocknal_peer_addref(peer); 1146 peer->ksnp_accepting++; 1147 1148 /* Am I already connecting to this guy? Resolve in 1149 * favour of higher NID... */ 1150 if (peerid.nid < ni->ni_nid && 1151 ksocknal_connecting(peer, conn->ksnc_ipaddr)) { 1152 rc = EALREADY; 1153 warn = "connection race resolution"; 1154 goto failed_2; 1155 } 1156 } 1157 1158 if (peer->ksnp_closing || 1159 (active && route->ksnr_deleted)) { 1160 /* peer/route got closed under me */ 1161 rc = -ESTALE; 1162 warn = "peer/route removed"; 1163 goto failed_2; 1164 } 1165 1166 if (peer->ksnp_proto == NULL) { 1167 /* Never connected before. 1168 * NB recv_hello may have returned EPROTO to signal my peer 1169 * wants a different protocol than the one I asked for. 1170 */ 1171 LASSERT (list_empty(&peer->ksnp_conns)); 1172 1173 peer->ksnp_proto = conn->ksnc_proto; 1174 peer->ksnp_incarnation = incarnation; 1175 } 1176 1177 if (peer->ksnp_proto != conn->ksnc_proto || 1178 peer->ksnp_incarnation != incarnation) { 1179 /* Peer rebooted or I've got the wrong protocol version */ 1180 ksocknal_close_peer_conns_locked(peer, 0, 0); 1181 1182 peer->ksnp_proto = NULL; 1183 rc = ESTALE; 1184 warn = peer->ksnp_incarnation != incarnation ? 1185 "peer rebooted" : 1186 "wrong proto version"; 1187 goto failed_2; 1188 } 1189 1190 switch (rc) { 1191 default: 1192 LBUG(); 1193 case 0: 1194 break; 1195 case EALREADY: 1196 warn = "lost conn race"; 1197 goto failed_2; 1198 case EPROTO: 1199 warn = "retry with different protocol version"; 1200 goto failed_2; 1201 } 1202 1203 /* Refuse to duplicate an existing connection, unless this is a 1204 * loopback connection */ 1205 if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) { 1206 list_for_each(tmp, &peer->ksnp_conns) { 1207 conn2 = list_entry(tmp, ksock_conn_t, ksnc_list); 1208 1209 if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr || 1210 conn2->ksnc_myipaddr != conn->ksnc_myipaddr || 1211 conn2->ksnc_type != conn->ksnc_type) 1212 continue; 1213 1214 /* Reply on a passive connection attempt so the peer 1215 * realises we're connected. */ 1216 LASSERT (rc == 0); 1217 if (!active) 1218 rc = EALREADY; 1219 1220 warn = "duplicate"; 1221 goto failed_2; 1222 } 1223 } 1224 1225 /* If the connection created by this route didn't bind to the IP 1226 * address the route connected to, the connection/route matching 1227 * code below probably isn't going to work. */ 1228 if (active && 1229 route->ksnr_ipaddr != conn->ksnc_ipaddr) { 1230 CERROR("Route %s %pI4h connected to %pI4h\n", 1231 libcfs_id2str(peer->ksnp_id), 1232 &route->ksnr_ipaddr, 1233 &conn->ksnc_ipaddr); 1234 } 1235 1236 /* Search for a route corresponding to the new connection and 1237 * create an association. This allows incoming connections created 1238 * by routes in my peer to match my own route entries so I don't 1239 * continually create duplicate routes. */ 1240 list_for_each (tmp, &peer->ksnp_routes) { 1241 route = list_entry(tmp, ksock_route_t, ksnr_list); 1242 1243 if (route->ksnr_ipaddr != conn->ksnc_ipaddr) 1244 continue; 1245 1246 ksocknal_associate_route_conn_locked(route, conn); 1247 break; 1248 } 1249 1250 conn->ksnc_peer = peer; /* conn takes my ref on peer */ 1251 peer->ksnp_last_alive = cfs_time_current(); 1252 peer->ksnp_send_keepalive = 0; 1253 peer->ksnp_error = 0; 1254 1255 sched = ksocknal_choose_scheduler_locked(cpt); 1256 sched->kss_nconns++; 1257 conn->ksnc_scheduler = sched; 1258 1259 conn->ksnc_tx_last_post = cfs_time_current(); 1260 /* Set the deadline for the outgoing HELLO to drain */ 1261 conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued; 1262 conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout); 1263 mb(); /* order with adding to peer's conn list */ 1264 1265 list_add (&conn->ksnc_list, &peer->ksnp_conns); 1266 ksocknal_conn_addref(conn); 1267 1268 ksocknal_new_packet(conn, 0); 1269 1270 conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn); 1271 1272 /* Take packets blocking for this connection. */ 1273 list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) { 1274 if (conn->ksnc_proto->pro_match_tx(conn, tx, tx->tx_nonblk) == SOCKNAL_MATCH_NO) 1275 continue; 1276 1277 list_del (&tx->tx_list); 1278 ksocknal_queue_tx_locked (tx, conn); 1279 } 1280 1281 write_unlock_bh(global_lock); 1282 1283 /* We've now got a new connection. Any errors from here on are just 1284 * like "normal" comms errors and we close the connection normally. 1285 * NB (a) we still have to send the reply HELLO for passive 1286 * connections, 1287 * (b) normal I/O on the conn is blocked until I setup and call the 1288 * socket callbacks. 1289 */ 1290 1291 CDEBUG(D_NET, "New conn %s p %d.x %pI4h -> %pI4h/%d" 1292 " incarnation:%lld sched[%d:%d]\n", 1293 libcfs_id2str(peerid), conn->ksnc_proto->pro_version, 1294 &conn->ksnc_myipaddr, &conn->ksnc_ipaddr, 1295 conn->ksnc_port, incarnation, cpt, 1296 (int)(sched - &sched->kss_info->ksi_scheds[0])); 1297 1298 if (active) { 1299 /* additional routes after interface exchange? */ 1300 ksocknal_create_routes(peer, conn->ksnc_port, 1301 hello->kshm_ips, hello->kshm_nips); 1302 } else { 1303 hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips, 1304 hello->kshm_nips); 1305 rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); 1306 } 1307 1308 LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t, 1309 kshm_ips[LNET_MAX_INTERFACES])); 1310 1311 /* setup the socket AFTER I've received hello (it disables 1312 * SO_LINGER). I might call back to the acceptor who may want 1313 * to send a protocol version response and then close the 1314 * socket; this ensures the socket only tears down after the 1315 * response has been sent. */ 1316 if (rc == 0) 1317 rc = ksocknal_lib_setup_sock(sock); 1318 1319 write_lock_bh(global_lock); 1320 1321 /* NB my callbacks block while I hold ksnd_global_lock */ 1322 ksocknal_lib_set_callback(sock, conn); 1323 1324 if (!active) 1325 peer->ksnp_accepting--; 1326 1327 write_unlock_bh(global_lock); 1328 1329 if (rc != 0) { 1330 write_lock_bh(global_lock); 1331 if (!conn->ksnc_closing) { 1332 /* could be closed by another thread */ 1333 ksocknal_close_conn_locked(conn, rc); 1334 } 1335 write_unlock_bh(global_lock); 1336 } else if (ksocknal_connsock_addref(conn) == 0) { 1337 /* Allow I/O to proceed. */ 1338 ksocknal_read_callback(conn); 1339 ksocknal_write_callback(conn); 1340 ksocknal_connsock_decref(conn); 1341 } 1342 1343 ksocknal_connsock_decref(conn); 1344 ksocknal_conn_decref(conn); 1345 return rc; 1346 1347 failed_2: 1348 if (!peer->ksnp_closing && 1349 list_empty (&peer->ksnp_conns) && 1350 list_empty (&peer->ksnp_routes)) { 1351 list_add(&zombies, &peer->ksnp_tx_queue); 1352 list_del_init(&peer->ksnp_tx_queue); 1353 ksocknal_unlink_peer_locked(peer); 1354 } 1355 1356 write_unlock_bh(global_lock); 1357 1358 if (warn != NULL) { 1359 if (rc < 0) 1360 CERROR("Not creating conn %s type %d: %s\n", 1361 libcfs_id2str(peerid), conn->ksnc_type, warn); 1362 else 1363 CDEBUG(D_NET, "Not creating conn %s type %d: %s\n", 1364 libcfs_id2str(peerid), conn->ksnc_type, warn); 1365 } 1366 1367 if (!active) { 1368 if (rc > 0) { 1369 /* Request retry by replying with CONN_NONE 1370 * ksnc_proto has been set already */ 1371 conn->ksnc_type = SOCKLND_CONN_NONE; 1372 hello->kshm_nips = 0; 1373 ksocknal_send_hello(ni, conn, peerid.nid, hello); 1374 } 1375 1376 write_lock_bh(global_lock); 1377 peer->ksnp_accepting--; 1378 write_unlock_bh(global_lock); 1379 } 1380 1381 ksocknal_txlist_done(ni, &zombies, 1); 1382 ksocknal_peer_decref(peer); 1383 1384 failed_1: 1385 if (hello != NULL) 1386 LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t, 1387 kshm_ips[LNET_MAX_INTERFACES])); 1388 1389 LIBCFS_FREE (conn, sizeof(*conn)); 1390 1391 failed_0: 1392 libcfs_sock_release(sock); 1393 return rc; 1394} 1395 1396void 1397ksocknal_close_conn_locked (ksock_conn_t *conn, int error) 1398{ 1399 /* This just does the immmediate housekeeping, and queues the 1400 * connection for the reaper to terminate. 1401 * Caller holds ksnd_global_lock exclusively in irq context */ 1402 ksock_peer_t *peer = conn->ksnc_peer; 1403 ksock_route_t *route; 1404 ksock_conn_t *conn2; 1405 struct list_head *tmp; 1406 1407 LASSERT (peer->ksnp_error == 0); 1408 LASSERT (!conn->ksnc_closing); 1409 conn->ksnc_closing = 1; 1410 1411 /* ksnd_deathrow_conns takes over peer's ref */ 1412 list_del (&conn->ksnc_list); 1413 1414 route = conn->ksnc_route; 1415 if (route != NULL) { 1416 /* dissociate conn from route... */ 1417 LASSERT (!route->ksnr_deleted); 1418 LASSERT ((route->ksnr_connected & (1 << conn->ksnc_type)) != 0); 1419 1420 conn2 = NULL; 1421 list_for_each(tmp, &peer->ksnp_conns) { 1422 conn2 = list_entry(tmp, ksock_conn_t, ksnc_list); 1423 1424 if (conn2->ksnc_route == route && 1425 conn2->ksnc_type == conn->ksnc_type) 1426 break; 1427 1428 conn2 = NULL; 1429 } 1430 if (conn2 == NULL) 1431 route->ksnr_connected &= ~(1 << conn->ksnc_type); 1432 1433 conn->ksnc_route = NULL; 1434 1435#if 0 /* irrelevant with only eager routes */ 1436 /* make route least favourite */ 1437 list_del (&route->ksnr_list); 1438 list_add_tail (&route->ksnr_list, &peer->ksnp_routes); 1439#endif 1440 ksocknal_route_decref(route); /* drop conn's ref on route */ 1441 } 1442 1443 if (list_empty (&peer->ksnp_conns)) { 1444 /* No more connections to this peer */ 1445 1446 if (!list_empty(&peer->ksnp_tx_queue)) { 1447 ksock_tx_t *tx; 1448 1449 LASSERT (conn->ksnc_proto == &ksocknal_protocol_v3x); 1450 1451 /* throw them to the last connection..., 1452 * these TXs will be send to /dev/null by scheduler */ 1453 list_for_each_entry(tx, &peer->ksnp_tx_queue, 1454 tx_list) 1455 ksocknal_tx_prep(conn, tx); 1456 1457 spin_lock_bh(&conn->ksnc_scheduler->kss_lock); 1458 list_splice_init(&peer->ksnp_tx_queue, 1459 &conn->ksnc_tx_queue); 1460 spin_unlock_bh(&conn->ksnc_scheduler->kss_lock); 1461 } 1462 1463 peer->ksnp_proto = NULL; /* renegotiate protocol version */ 1464 peer->ksnp_error = error; /* stash last conn close reason */ 1465 1466 if (list_empty (&peer->ksnp_routes)) { 1467 /* I've just closed last conn belonging to a 1468 * peer with no routes to it */ 1469 ksocknal_unlink_peer_locked (peer); 1470 } 1471 } 1472 1473 spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); 1474 1475 list_add_tail(&conn->ksnc_list, 1476 &ksocknal_data.ksnd_deathrow_conns); 1477 wake_up(&ksocknal_data.ksnd_reaper_waitq); 1478 1479 spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); 1480} 1481 1482void 1483ksocknal_peer_failed (ksock_peer_t *peer) 1484{ 1485 int notify = 0; 1486 unsigned long last_alive = 0; 1487 1488 /* There has been a connection failure or comms error; but I'll only 1489 * tell LNET I think the peer is dead if it's to another kernel and 1490 * there are no connections or connection attempts in existence. */ 1491 1492 read_lock(&ksocknal_data.ksnd_global_lock); 1493 1494 if ((peer->ksnp_id.pid & LNET_PID_USERFLAG) == 0 && 1495 list_empty(&peer->ksnp_conns) && 1496 peer->ksnp_accepting == 0 && 1497 ksocknal_find_connecting_route_locked(peer) == NULL) { 1498 notify = 1; 1499 last_alive = peer->ksnp_last_alive; 1500 } 1501 1502 read_unlock(&ksocknal_data.ksnd_global_lock); 1503 1504 if (notify) 1505 lnet_notify (peer->ksnp_ni, peer->ksnp_id.nid, 0, 1506 last_alive); 1507} 1508 1509void 1510ksocknal_finalize_zcreq(ksock_conn_t *conn) 1511{ 1512 ksock_peer_t *peer = conn->ksnc_peer; 1513 ksock_tx_t *tx; 1514 ksock_tx_t *tmp; 1515 LIST_HEAD (zlist); 1516 1517 /* NB safe to finalize TXs because closing of socket will 1518 * abort all buffered data */ 1519 LASSERT (conn->ksnc_sock == NULL); 1520 1521 spin_lock(&peer->ksnp_lock); 1522 1523 list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) { 1524 if (tx->tx_conn != conn) 1525 continue; 1526 1527 LASSERT (tx->tx_msg.ksm_zc_cookies[0] != 0); 1528 1529 tx->tx_msg.ksm_zc_cookies[0] = 0; 1530 tx->tx_zc_aborted = 1; /* mark it as not-acked */ 1531 list_del(&tx->tx_zc_list); 1532 list_add(&tx->tx_zc_list, &zlist); 1533 } 1534 1535 spin_unlock(&peer->ksnp_lock); 1536 1537 while (!list_empty(&zlist)) { 1538 tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list); 1539 1540 list_del(&tx->tx_zc_list); 1541 ksocknal_tx_decref(tx); 1542 } 1543} 1544 1545void 1546ksocknal_terminate_conn (ksock_conn_t *conn) 1547{ 1548 /* This gets called by the reaper (guaranteed thread context) to 1549 * disengage the socket from its callbacks and close it. 1550 * ksnc_refcount will eventually hit zero, and then the reaper will 1551 * destroy it. */ 1552 ksock_peer_t *peer = conn->ksnc_peer; 1553 ksock_sched_t *sched = conn->ksnc_scheduler; 1554 int failed = 0; 1555 1556 LASSERT(conn->ksnc_closing); 1557 1558 /* wake up the scheduler to "send" all remaining packets to /dev/null */ 1559 spin_lock_bh(&sched->kss_lock); 1560 1561 /* a closing conn is always ready to tx */ 1562 conn->ksnc_tx_ready = 1; 1563 1564 if (!conn->ksnc_tx_scheduled && 1565 !list_empty(&conn->ksnc_tx_queue)) { 1566 list_add_tail (&conn->ksnc_tx_list, 1567 &sched->kss_tx_conns); 1568 conn->ksnc_tx_scheduled = 1; 1569 /* extra ref for scheduler */ 1570 ksocknal_conn_addref(conn); 1571 1572 wake_up (&sched->kss_waitq); 1573 } 1574 1575 spin_unlock_bh(&sched->kss_lock); 1576 1577 /* serialise with callbacks */ 1578 write_lock_bh(&ksocknal_data.ksnd_global_lock); 1579 1580 ksocknal_lib_reset_callback(conn->ksnc_sock, conn); 1581 1582 /* OK, so this conn may not be completely disengaged from its 1583 * scheduler yet, but it _has_ committed to terminate... */ 1584 conn->ksnc_scheduler->kss_nconns--; 1585 1586 if (peer->ksnp_error != 0) { 1587 /* peer's last conn closed in error */ 1588 LASSERT (list_empty (&peer->ksnp_conns)); 1589 failed = 1; 1590 peer->ksnp_error = 0; /* avoid multiple notifications */ 1591 } 1592 1593 write_unlock_bh(&ksocknal_data.ksnd_global_lock); 1594 1595 if (failed) 1596 ksocknal_peer_failed(peer); 1597 1598 /* The socket is closed on the final put; either here, or in 1599 * ksocknal_{send,recv}msg(). Since we set up the linger2 option 1600 * when the connection was established, this will close the socket 1601 * immediately, aborting anything buffered in it. Any hung 1602 * zero-copy transmits will therefore complete in finite time. */ 1603 ksocknal_connsock_decref(conn); 1604} 1605 1606void 1607ksocknal_queue_zombie_conn (ksock_conn_t *conn) 1608{ 1609 /* Queue the conn for the reaper to destroy */ 1610 1611 LASSERT(atomic_read(&conn->ksnc_conn_refcount) == 0); 1612 spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); 1613 1614 list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns); 1615 wake_up(&ksocknal_data.ksnd_reaper_waitq); 1616 1617 spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); 1618} 1619 1620void 1621ksocknal_destroy_conn (ksock_conn_t *conn) 1622{ 1623 unsigned long last_rcv; 1624 1625 /* Final coup-de-grace of the reaper */ 1626 CDEBUG (D_NET, "connection %p\n", conn); 1627 1628 LASSERT (atomic_read (&conn->ksnc_conn_refcount) == 0); 1629 LASSERT (atomic_read (&conn->ksnc_sock_refcount) == 0); 1630 LASSERT (conn->ksnc_sock == NULL); 1631 LASSERT (conn->ksnc_route == NULL); 1632 LASSERT (!conn->ksnc_tx_scheduled); 1633 LASSERT (!conn->ksnc_rx_scheduled); 1634 LASSERT (list_empty(&conn->ksnc_tx_queue)); 1635 1636 /* complete current receive if any */ 1637 switch (conn->ksnc_rx_state) { 1638 case SOCKNAL_RX_LNET_PAYLOAD: 1639 last_rcv = conn->ksnc_rx_deadline - 1640 cfs_time_seconds(*ksocknal_tunables.ksnd_timeout); 1641 CERROR("Completing partial receive from %s[%d]" 1642 ", ip %pI4h:%d, with error, wanted: %d, left: %d, " 1643 "last alive is %ld secs ago\n", 1644 libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type, 1645 &conn->ksnc_ipaddr, conn->ksnc_port, 1646 conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left, 1647 cfs_duration_sec(cfs_time_sub(cfs_time_current(), 1648 last_rcv))); 1649 lnet_finalize (conn->ksnc_peer->ksnp_ni, 1650 conn->ksnc_cookie, -EIO); 1651 break; 1652 case SOCKNAL_RX_LNET_HEADER: 1653 if (conn->ksnc_rx_started) 1654 CERROR("Incomplete receive of lnet header from %s" 1655 ", ip %pI4h:%d, with error, protocol: %d.x.\n", 1656 libcfs_id2str(conn->ksnc_peer->ksnp_id), 1657 &conn->ksnc_ipaddr, conn->ksnc_port, 1658 conn->ksnc_proto->pro_version); 1659 break; 1660 case SOCKNAL_RX_KSM_HEADER: 1661 if (conn->ksnc_rx_started) 1662 CERROR("Incomplete receive of ksock message from %s" 1663 ", ip %pI4h:%d, with error, protocol: %d.x.\n", 1664 libcfs_id2str(conn->ksnc_peer->ksnp_id), 1665 &conn->ksnc_ipaddr, conn->ksnc_port, 1666 conn->ksnc_proto->pro_version); 1667 break; 1668 case SOCKNAL_RX_SLOP: 1669 if (conn->ksnc_rx_started) 1670 CERROR("Incomplete receive of slops from %s" 1671 ", ip %pI4h:%d, with error\n", 1672 libcfs_id2str(conn->ksnc_peer->ksnp_id), 1673 &conn->ksnc_ipaddr, conn->ksnc_port); 1674 break; 1675 default: 1676 LBUG (); 1677 break; 1678 } 1679 1680 ksocknal_peer_decref(conn->ksnc_peer); 1681 1682 LIBCFS_FREE (conn, sizeof (*conn)); 1683} 1684 1685int 1686ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why) 1687{ 1688 ksock_conn_t *conn; 1689 struct list_head *ctmp; 1690 struct list_head *cnxt; 1691 int count = 0; 1692 1693 list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { 1694 conn = list_entry (ctmp, ksock_conn_t, ksnc_list); 1695 1696 if (ipaddr == 0 || 1697 conn->ksnc_ipaddr == ipaddr) { 1698 count++; 1699 ksocknal_close_conn_locked (conn, why); 1700 } 1701 } 1702 1703 return count; 1704} 1705 1706int 1707ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) 1708{ 1709 ksock_peer_t *peer = conn->ksnc_peer; 1710 __u32 ipaddr = conn->ksnc_ipaddr; 1711 int count; 1712 1713 write_lock_bh(&ksocknal_data.ksnd_global_lock); 1714 1715 count = ksocknal_close_peer_conns_locked (peer, ipaddr, why); 1716 1717 write_unlock_bh(&ksocknal_data.ksnd_global_lock); 1718 1719 return count; 1720} 1721 1722int 1723ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr) 1724{ 1725 ksock_peer_t *peer; 1726 struct list_head *ptmp; 1727 struct list_head *pnxt; 1728 int lo; 1729 int hi; 1730 int i; 1731 int count = 0; 1732 1733 write_lock_bh(&ksocknal_data.ksnd_global_lock); 1734 1735 if (id.nid != LNET_NID_ANY) 1736 lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); 1737 else { 1738 lo = 0; 1739 hi = ksocknal_data.ksnd_peer_hash_size - 1; 1740 } 1741 1742 for (i = lo; i <= hi; i++) { 1743 list_for_each_safe (ptmp, pnxt, 1744 &ksocknal_data.ksnd_peers[i]) { 1745 1746 peer = list_entry (ptmp, ksock_peer_t, ksnp_list); 1747 1748 if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) && 1749 (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid))) 1750 continue; 1751 1752 count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0); 1753 } 1754 } 1755 1756 write_unlock_bh(&ksocknal_data.ksnd_global_lock); 1757 1758 /* wildcards always succeed */ 1759 if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0) 1760 return 0; 1761 1762 if (count == 0) 1763 return -ENOENT; 1764 else 1765 return 0; 1766} 1767 1768void 1769ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive) 1770{ 1771 /* The router is telling me she's been notified of a change in 1772 * gateway state.... */ 1773 lnet_process_id_t id = {0}; 1774 1775 id.nid = gw_nid; 1776 id.pid = LNET_PID_ANY; 1777 1778 CDEBUG (D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid), 1779 alive ? "up" : "down"); 1780 1781 if (!alive) { 1782 /* If the gateway crashed, close all open connections... */ 1783 ksocknal_close_matching_conns (id, 0); 1784 return; 1785 } 1786 1787 /* ...otherwise do nothing. We can only establish new connections 1788 * if we have autroutes, and these connect on demand. */ 1789} 1790 1791void 1792ksocknal_query (lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when) 1793{ 1794 int connect = 1; 1795 unsigned long last_alive = 0; 1796 unsigned long now = cfs_time_current(); 1797 ksock_peer_t *peer = NULL; 1798 rwlock_t *glock = &ksocknal_data.ksnd_global_lock; 1799 lnet_process_id_t id = {.nid = nid, .pid = LUSTRE_SRV_LNET_PID}; 1800 1801 read_lock(glock); 1802 1803 peer = ksocknal_find_peer_locked(ni, id); 1804 if (peer != NULL) { 1805 struct list_head *tmp; 1806 ksock_conn_t *conn; 1807 int bufnob; 1808 1809 list_for_each (tmp, &peer->ksnp_conns) { 1810 conn = list_entry(tmp, ksock_conn_t, ksnc_list); 1811 bufnob = conn->ksnc_sock->sk->sk_wmem_queued; 1812 1813 if (bufnob < conn->ksnc_tx_bufnob) { 1814 /* something got ACKed */ 1815 conn->ksnc_tx_deadline = 1816 cfs_time_shift(*ksocknal_tunables.ksnd_timeout); 1817 peer->ksnp_last_alive = now; 1818 conn->ksnc_tx_bufnob = bufnob; 1819 } 1820 } 1821 1822 last_alive = peer->ksnp_last_alive; 1823 if (ksocknal_find_connectable_route_locked(peer) == NULL) 1824 connect = 0; 1825 } 1826 1827 read_unlock(glock); 1828 1829 if (last_alive != 0) 1830 *when = last_alive; 1831 1832 CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n", 1833 libcfs_nid2str(nid), peer, 1834 last_alive ? cfs_duration_sec(now - last_alive) : -1, 1835 connect); 1836 1837 if (!connect) 1838 return; 1839 1840 ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port()); 1841 1842 write_lock_bh(glock); 1843 1844 peer = ksocknal_find_peer_locked(ni, id); 1845 if (peer != NULL) 1846 ksocknal_launch_all_connections_locked(peer); 1847 1848 write_unlock_bh(glock); 1849 return; 1850} 1851 1852static void 1853ksocknal_push_peer (ksock_peer_t *peer) 1854{ 1855 int index; 1856 int i; 1857 struct list_head *tmp; 1858 ksock_conn_t *conn; 1859 1860 for (index = 0; ; index++) { 1861 read_lock(&ksocknal_data.ksnd_global_lock); 1862 1863 i = 0; 1864 conn = NULL; 1865 1866 list_for_each (tmp, &peer->ksnp_conns) { 1867 if (i++ == index) { 1868 conn = list_entry (tmp, ksock_conn_t, 1869 ksnc_list); 1870 ksocknal_conn_addref(conn); 1871 break; 1872 } 1873 } 1874 1875 read_unlock(&ksocknal_data.ksnd_global_lock); 1876 1877 if (conn == NULL) 1878 break; 1879 1880 ksocknal_lib_push_conn (conn); 1881 ksocknal_conn_decref(conn); 1882 } 1883} 1884 1885static int 1886ksocknal_push (lnet_ni_t *ni, lnet_process_id_t id) 1887{ 1888 ksock_peer_t *peer; 1889 struct list_head *tmp; 1890 int index; 1891 int i; 1892 int j; 1893 int rc = -ENOENT; 1894 1895 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { 1896 for (j = 0; ; j++) { 1897 read_lock(&ksocknal_data.ksnd_global_lock); 1898 1899 index = 0; 1900 peer = NULL; 1901 1902 list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) { 1903 peer = list_entry(tmp, ksock_peer_t, 1904 ksnp_list); 1905 1906 if (!((id.nid == LNET_NID_ANY || 1907 id.nid == peer->ksnp_id.nid) && 1908 (id.pid == LNET_PID_ANY || 1909 id.pid == peer->ksnp_id.pid))) { 1910 peer = NULL; 1911 continue; 1912 } 1913 1914 if (index++ == j) { 1915 ksocknal_peer_addref(peer); 1916 break; 1917 } 1918 } 1919 1920 read_unlock(&ksocknal_data.ksnd_global_lock); 1921 1922 if (peer != NULL) { 1923 rc = 0; 1924 ksocknal_push_peer (peer); 1925 ksocknal_peer_decref(peer); 1926 } 1927 } 1928 1929 } 1930 1931 return rc; 1932} 1933 1934static int 1935ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask) 1936{ 1937 ksock_net_t *net = ni->ni_data; 1938 ksock_interface_t *iface; 1939 int rc; 1940 int i; 1941 int j; 1942 struct list_head *ptmp; 1943 ksock_peer_t *peer; 1944 struct list_head *rtmp; 1945 ksock_route_t *route; 1946 1947 if (ipaddress == 0 || 1948 netmask == 0) 1949 return -EINVAL; 1950 1951 write_lock_bh(&ksocknal_data.ksnd_global_lock); 1952 1953 iface = ksocknal_ip2iface(ni, ipaddress); 1954 if (iface != NULL) { 1955 /* silently ignore dups */ 1956 rc = 0; 1957 } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) { 1958 rc = -ENOSPC; 1959 } else { 1960 iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++]; 1961 1962 iface->ksni_ipaddr = ipaddress; 1963 iface->ksni_netmask = netmask; 1964 iface->ksni_nroutes = 0; 1965 iface->ksni_npeers = 0; 1966 1967 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { 1968 list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { 1969 peer = list_entry(ptmp, ksock_peer_t, 1970 ksnp_list); 1971 1972 for (j = 0; j < peer->ksnp_n_passive_ips; j++) 1973 if (peer->ksnp_passive_ips[j] == ipaddress) 1974 iface->ksni_npeers++; 1975 1976 list_for_each(rtmp, &peer->ksnp_routes) { 1977 route = list_entry(rtmp, 1978 ksock_route_t, 1979 ksnr_list); 1980 1981 if (route->ksnr_myipaddr == ipaddress) 1982 iface->ksni_nroutes++; 1983 } 1984 } 1985 } 1986 1987 rc = 0; 1988 /* NB only new connections will pay attention to the new interface! */ 1989 } 1990 1991 write_unlock_bh(&ksocknal_data.ksnd_global_lock); 1992 1993 return rc; 1994} 1995 1996static void 1997ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr) 1998{ 1999 struct list_head *tmp; 2000 struct list_head *nxt; 2001 ksock_route_t *route; 2002 ksock_conn_t *conn; 2003 int i; 2004 int j; 2005 2006 for (i = 0; i < peer->ksnp_n_passive_ips; i++) 2007 if (peer->ksnp_passive_ips[i] == ipaddr) { 2008 for (j = i+1; j < peer->ksnp_n_passive_ips; j++) 2009 peer->ksnp_passive_ips[j-1] = 2010 peer->ksnp_passive_ips[j]; 2011 peer->ksnp_n_passive_ips--; 2012 break; 2013 } 2014 2015 list_for_each_safe(tmp, nxt, &peer->ksnp_routes) { 2016 route = list_entry (tmp, ksock_route_t, ksnr_list); 2017 2018 if (route->ksnr_myipaddr != ipaddr) 2019 continue; 2020 2021 if (route->ksnr_share_count != 0) { 2022 /* Manually created; keep, but unbind */ 2023 route->ksnr_myipaddr = 0; 2024 } else { 2025 ksocknal_del_route_locked(route); 2026 } 2027 } 2028 2029 list_for_each_safe(tmp, nxt, &peer->ksnp_conns) { 2030 conn = list_entry(tmp, ksock_conn_t, ksnc_list); 2031 2032 if (conn->ksnc_myipaddr == ipaddr) 2033 ksocknal_close_conn_locked (conn, 0); 2034 } 2035} 2036 2037static int 2038ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress) 2039{ 2040 ksock_net_t *net = ni->ni_data; 2041 int rc = -ENOENT; 2042 struct list_head *tmp; 2043 struct list_head *nxt; 2044 ksock_peer_t *peer; 2045 __u32 this_ip; 2046 int i; 2047 int j; 2048 2049 write_lock_bh(&ksocknal_data.ksnd_global_lock); 2050 2051 for (i = 0; i < net->ksnn_ninterfaces; i++) { 2052 this_ip = net->ksnn_interfaces[i].ksni_ipaddr; 2053 2054 if (!(ipaddress == 0 || 2055 ipaddress == this_ip)) 2056 continue; 2057 2058 rc = 0; 2059 2060 for (j = i+1; j < net->ksnn_ninterfaces; j++) 2061 net->ksnn_interfaces[j-1] = 2062 net->ksnn_interfaces[j]; 2063 2064 net->ksnn_ninterfaces--; 2065 2066 for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) { 2067 list_for_each_safe(tmp, nxt, 2068 &ksocknal_data.ksnd_peers[j]) { 2069 peer = list_entry(tmp, ksock_peer_t, 2070 ksnp_list); 2071 2072 if (peer->ksnp_ni != ni) 2073 continue; 2074 2075 ksocknal_peer_del_interface_locked(peer, this_ip); 2076 } 2077 } 2078 } 2079 2080 write_unlock_bh(&ksocknal_data.ksnd_global_lock); 2081 2082 return rc; 2083} 2084 2085int 2086ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) 2087{ 2088 lnet_process_id_t id = {0}; 2089 struct libcfs_ioctl_data *data = arg; 2090 int rc; 2091 2092 switch (cmd) { 2093 case IOC_LIBCFS_GET_INTERFACE: { 2094 ksock_net_t *net = ni->ni_data; 2095 ksock_interface_t *iface; 2096 2097 read_lock(&ksocknal_data.ksnd_global_lock); 2098 2099 if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) { 2100 rc = -ENOENT; 2101 } else { 2102 rc = 0; 2103 iface = &net->ksnn_interfaces[data->ioc_count]; 2104 2105 data->ioc_u32[0] = iface->ksni_ipaddr; 2106 data->ioc_u32[1] = iface->ksni_netmask; 2107 data->ioc_u32[2] = iface->ksni_npeers; 2108 data->ioc_u32[3] = iface->ksni_nroutes; 2109 } 2110 2111 read_unlock(&ksocknal_data.ksnd_global_lock); 2112 return rc; 2113 } 2114 2115 case IOC_LIBCFS_ADD_INTERFACE: 2116 return ksocknal_add_interface(ni, 2117 data->ioc_u32[0], /* IP address */ 2118 data->ioc_u32[1]); /* net mask */ 2119 2120 case IOC_LIBCFS_DEL_INTERFACE: 2121 return ksocknal_del_interface(ni, 2122 data->ioc_u32[0]); /* IP address */ 2123 2124 case IOC_LIBCFS_GET_PEER: { 2125 __u32 myip = 0; 2126 __u32 ip = 0; 2127 int port = 0; 2128 int conn_count = 0; 2129 int share_count = 0; 2130 2131 rc = ksocknal_get_peer_info(ni, data->ioc_count, 2132 &id, &myip, &ip, &port, 2133 &conn_count, &share_count); 2134 if (rc != 0) 2135 return rc; 2136 2137 data->ioc_nid = id.nid; 2138 data->ioc_count = share_count; 2139 data->ioc_u32[0] = ip; 2140 data->ioc_u32[1] = port; 2141 data->ioc_u32[2] = myip; 2142 data->ioc_u32[3] = conn_count; 2143 data->ioc_u32[4] = id.pid; 2144 return 0; 2145 } 2146 2147 case IOC_LIBCFS_ADD_PEER: 2148 id.nid = data->ioc_nid; 2149 id.pid = LUSTRE_SRV_LNET_PID; 2150 return ksocknal_add_peer (ni, id, 2151 data->ioc_u32[0], /* IP */ 2152 data->ioc_u32[1]); /* port */ 2153 2154 case IOC_LIBCFS_DEL_PEER: 2155 id.nid = data->ioc_nid; 2156 id.pid = LNET_PID_ANY; 2157 return ksocknal_del_peer (ni, id, 2158 data->ioc_u32[0]); /* IP */ 2159 2160 case IOC_LIBCFS_GET_CONN: { 2161 int txmem; 2162 int rxmem; 2163 int nagle; 2164 ksock_conn_t *conn = ksocknal_get_conn_by_idx (ni, data->ioc_count); 2165 2166 if (conn == NULL) 2167 return -ENOENT; 2168 2169 ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle); 2170 2171 data->ioc_count = txmem; 2172 data->ioc_nid = conn->ksnc_peer->ksnp_id.nid; 2173 data->ioc_flags = nagle; 2174 data->ioc_u32[0] = conn->ksnc_ipaddr; 2175 data->ioc_u32[1] = conn->ksnc_port; 2176 data->ioc_u32[2] = conn->ksnc_myipaddr; 2177 data->ioc_u32[3] = conn->ksnc_type; 2178 data->ioc_u32[4] = conn->ksnc_scheduler->kss_info->ksi_cpt; 2179 data->ioc_u32[5] = rxmem; 2180 data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid; 2181 ksocknal_conn_decref(conn); 2182 return 0; 2183 } 2184 2185 case IOC_LIBCFS_CLOSE_CONNECTION: 2186 id.nid = data->ioc_nid; 2187 id.pid = LNET_PID_ANY; 2188 return ksocknal_close_matching_conns (id, 2189 data->ioc_u32[0]); 2190 2191 case IOC_LIBCFS_REGISTER_MYNID: 2192 /* Ignore if this is a noop */ 2193 if (data->ioc_nid == ni->ni_nid) 2194 return 0; 2195 2196 CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", 2197 libcfs_nid2str(data->ioc_nid), 2198 libcfs_nid2str(ni->ni_nid)); 2199 return -EINVAL; 2200 2201 case IOC_LIBCFS_PUSH_CONNECTION: 2202 id.nid = data->ioc_nid; 2203 id.pid = LNET_PID_ANY; 2204 return ksocknal_push(ni, id); 2205 2206 default: 2207 return -EINVAL; 2208 } 2209 /* not reached */ 2210} 2211 2212static void 2213ksocknal_free_buffers (void) 2214{ 2215 LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0); 2216 2217 if (ksocknal_data.ksnd_sched_info != NULL) { 2218 struct ksock_sched_info *info; 2219 int i; 2220 2221 cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) { 2222 if (info->ksi_scheds != NULL) { 2223 LIBCFS_FREE(info->ksi_scheds, 2224 info->ksi_nthreads_max * 2225 sizeof(info->ksi_scheds[0])); 2226 } 2227 } 2228 cfs_percpt_free(ksocknal_data.ksnd_sched_info); 2229 } 2230 2231 LIBCFS_FREE (ksocknal_data.ksnd_peers, 2232 sizeof (struct list_head) * 2233 ksocknal_data.ksnd_peer_hash_size); 2234 2235 spin_lock(&ksocknal_data.ksnd_tx_lock); 2236 2237 if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) { 2238 struct list_head zlist; 2239 ksock_tx_t *tx; 2240 2241 list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs); 2242 list_del_init(&ksocknal_data.ksnd_idle_noop_txs); 2243 spin_unlock(&ksocknal_data.ksnd_tx_lock); 2244 2245 while (!list_empty(&zlist)) { 2246 tx = list_entry(zlist.next, ksock_tx_t, tx_list); 2247 list_del(&tx->tx_list); 2248 LIBCFS_FREE(tx, tx->tx_desc_size); 2249 } 2250 } else { 2251 spin_unlock(&ksocknal_data.ksnd_tx_lock); 2252 } 2253} 2254 2255static void 2256ksocknal_base_shutdown(void) 2257{ 2258 struct ksock_sched_info *info; 2259 ksock_sched_t *sched; 2260 int i; 2261 int j; 2262 2263 CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", 2264 atomic_read (&libcfs_kmemory)); 2265 LASSERT (ksocknal_data.ksnd_nnets == 0); 2266 2267 switch (ksocknal_data.ksnd_init) { 2268 default: 2269 LASSERT (0); 2270 2271 case SOCKNAL_INIT_ALL: 2272 case SOCKNAL_INIT_DATA: 2273 LASSERT (ksocknal_data.ksnd_peers != NULL); 2274 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { 2275 LASSERT (list_empty (&ksocknal_data.ksnd_peers[i])); 2276 } 2277 2278 LASSERT(list_empty(&ksocknal_data.ksnd_nets)); 2279 LASSERT (list_empty (&ksocknal_data.ksnd_enomem_conns)); 2280 LASSERT (list_empty (&ksocknal_data.ksnd_zombie_conns)); 2281 LASSERT (list_empty (&ksocknal_data.ksnd_connd_connreqs)); 2282 LASSERT (list_empty (&ksocknal_data.ksnd_connd_routes)); 2283 2284 if (ksocknal_data.ksnd_sched_info != NULL) { 2285 cfs_percpt_for_each(info, i, 2286 ksocknal_data.ksnd_sched_info) { 2287 if (info->ksi_scheds == NULL) 2288 continue; 2289 2290 for (j = 0; j < info->ksi_nthreads_max; j++) { 2291 2292 sched = &info->ksi_scheds[j]; 2293 LASSERT(list_empty( 2294 &sched->kss_tx_conns)); 2295 LASSERT(list_empty( 2296 &sched->kss_rx_conns)); 2297 LASSERT(list_empty( 2298 &sched->kss_zombie_noop_txs)); 2299 LASSERT(sched->kss_nconns == 0); 2300 } 2301 } 2302 } 2303 2304 /* flag threads to terminate; wake and wait for them to die */ 2305 ksocknal_data.ksnd_shuttingdown = 1; 2306 wake_up_all(&ksocknal_data.ksnd_connd_waitq); 2307 wake_up_all(&ksocknal_data.ksnd_reaper_waitq); 2308 2309 if (ksocknal_data.ksnd_sched_info != NULL) { 2310 cfs_percpt_for_each(info, i, 2311 ksocknal_data.ksnd_sched_info) { 2312 if (info->ksi_scheds == NULL) 2313 continue; 2314 2315 for (j = 0; j < info->ksi_nthreads_max; j++) { 2316 sched = &info->ksi_scheds[j]; 2317 wake_up_all(&sched->kss_waitq); 2318 } 2319 } 2320 } 2321 2322 i = 4; 2323 read_lock(&ksocknal_data.ksnd_global_lock); 2324 while (ksocknal_data.ksnd_nthreads != 0) { 2325 i++; 2326 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ 2327 "waiting for %d threads to terminate\n", 2328 ksocknal_data.ksnd_nthreads); 2329 read_unlock(&ksocknal_data.ksnd_global_lock); 2330 set_current_state(TASK_UNINTERRUPTIBLE); 2331 schedule_timeout(cfs_time_seconds(1)); 2332 read_lock(&ksocknal_data.ksnd_global_lock); 2333 } 2334 read_unlock(&ksocknal_data.ksnd_global_lock); 2335 2336 ksocknal_free_buffers(); 2337 2338 ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING; 2339 break; 2340 } 2341 2342 CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", 2343 atomic_read (&libcfs_kmemory)); 2344 2345 module_put(THIS_MODULE); 2346} 2347 2348static __u64 2349ksocknal_new_incarnation (void) 2350{ 2351 struct timeval tv; 2352 2353 /* The incarnation number is the time this module loaded and it 2354 * identifies this particular instance of the socknal. Hopefully 2355 * we won't be able to reboot more frequently than 1MHz for the 2356 * foreseeable future :) */ 2357 2358 do_gettimeofday(&tv); 2359 2360 return (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; 2361} 2362 2363static int 2364ksocknal_base_startup(void) 2365{ 2366 struct ksock_sched_info *info; 2367 int rc; 2368 int i; 2369 2370 LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); 2371 LASSERT (ksocknal_data.ksnd_nnets == 0); 2372 2373 memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */ 2374 2375 ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; 2376 LIBCFS_ALLOC (ksocknal_data.ksnd_peers, 2377 sizeof (struct list_head) * 2378 ksocknal_data.ksnd_peer_hash_size); 2379 if (ksocknal_data.ksnd_peers == NULL) 2380 return -ENOMEM; 2381 2382 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) 2383 INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); 2384 2385 rwlock_init(&ksocknal_data.ksnd_global_lock); 2386 INIT_LIST_HEAD(&ksocknal_data.ksnd_nets); 2387 2388 spin_lock_init(&ksocknal_data.ksnd_reaper_lock); 2389 INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns); 2390 INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns); 2391 INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns); 2392 init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq); 2393 2394 spin_lock_init(&ksocknal_data.ksnd_connd_lock); 2395 INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_connreqs); 2396 INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_routes); 2397 init_waitqueue_head(&ksocknal_data.ksnd_connd_waitq); 2398 2399 spin_lock_init(&ksocknal_data.ksnd_tx_lock); 2400 INIT_LIST_HEAD (&ksocknal_data.ksnd_idle_noop_txs); 2401 2402 /* NB memset above zeros whole of ksocknal_data */ 2403 2404 /* flag lists/ptrs/locks initialised */ 2405 ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; 2406 try_module_get(THIS_MODULE); 2407 2408 ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(), 2409 sizeof(*info)); 2410 if (ksocknal_data.ksnd_sched_info == NULL) 2411 goto failed; 2412 2413 cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) { 2414 ksock_sched_t *sched; 2415 int nthrs; 2416 2417 nthrs = cfs_cpt_weight(lnet_cpt_table(), i); 2418 if (*ksocknal_tunables.ksnd_nscheds > 0) { 2419 nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds); 2420 } else { 2421 /* max to half of CPUs, assume another half should be 2422 * reserved for upper layer modules */ 2423 nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs); 2424 } 2425 2426 info->ksi_nthreads_max = nthrs; 2427 info->ksi_cpt = i; 2428 2429 LIBCFS_CPT_ALLOC(info->ksi_scheds, lnet_cpt_table(), i, 2430 info->ksi_nthreads_max * sizeof(*sched)); 2431 if (info->ksi_scheds == NULL) 2432 goto failed; 2433 2434 for (; nthrs > 0; nthrs--) { 2435 sched = &info->ksi_scheds[nthrs - 1]; 2436 2437 sched->kss_info = info; 2438 spin_lock_init(&sched->kss_lock); 2439 INIT_LIST_HEAD(&sched->kss_rx_conns); 2440 INIT_LIST_HEAD(&sched->kss_tx_conns); 2441 INIT_LIST_HEAD(&sched->kss_zombie_noop_txs); 2442 init_waitqueue_head(&sched->kss_waitq); 2443 } 2444 } 2445 2446 ksocknal_data.ksnd_connd_starting = 0; 2447 ksocknal_data.ksnd_connd_failed_stamp = 0; 2448 ksocknal_data.ksnd_connd_starting_stamp = get_seconds(); 2449 /* must have at least 2 connds to remain responsive to accepts while 2450 * connecting */ 2451 if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1) 2452 *ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1; 2453 2454 if (*ksocknal_tunables.ksnd_nconnds_max < 2455 *ksocknal_tunables.ksnd_nconnds) { 2456 ksocknal_tunables.ksnd_nconnds_max = 2457 ksocknal_tunables.ksnd_nconnds; 2458 } 2459 2460 for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) { 2461 char name[16]; 2462 spin_lock_bh(&ksocknal_data.ksnd_connd_lock); 2463 ksocknal_data.ksnd_connd_starting++; 2464 spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); 2465 2466 2467 snprintf(name, sizeof(name), "socknal_cd%02d", i); 2468 rc = ksocknal_thread_start(ksocknal_connd, 2469 (void *)((ulong_ptr_t)i), name); 2470 if (rc != 0) { 2471 spin_lock_bh(&ksocknal_data.ksnd_connd_lock); 2472 ksocknal_data.ksnd_connd_starting--; 2473 spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); 2474 CERROR("Can't spawn socknal connd: %d\n", rc); 2475 goto failed; 2476 } 2477 } 2478 2479 rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper"); 2480 if (rc != 0) { 2481 CERROR ("Can't spawn socknal reaper: %d\n", rc); 2482 goto failed; 2483 } 2484 2485 /* flag everything initialised */ 2486 ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; 2487 2488 return 0; 2489 2490 failed: 2491 ksocknal_base_shutdown(); 2492 return -ENETDOWN; 2493} 2494 2495static void 2496ksocknal_debug_peerhash (lnet_ni_t *ni) 2497{ 2498 ksock_peer_t *peer = NULL; 2499 struct list_head *tmp; 2500 int i; 2501 2502 read_lock(&ksocknal_data.ksnd_global_lock); 2503 2504 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { 2505 list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) { 2506 peer = list_entry (tmp, ksock_peer_t, ksnp_list); 2507 2508 if (peer->ksnp_ni == ni) 2509 break; 2510 2511 peer = NULL; 2512 } 2513 } 2514 2515 if (peer != NULL) { 2516 ksock_route_t *route; 2517 ksock_conn_t *conn; 2518 2519 CWARN ("Active peer on shutdown: %s, ref %d, scnt %d, " 2520 "closing %d, accepting %d, err %d, zcookie %llu, " 2521 "txq %d, zc_req %d\n", libcfs_id2str(peer->ksnp_id), 2522 atomic_read(&peer->ksnp_refcount), 2523 peer->ksnp_sharecount, peer->ksnp_closing, 2524 peer->ksnp_accepting, peer->ksnp_error, 2525 peer->ksnp_zc_next_cookie, 2526 !list_empty(&peer->ksnp_tx_queue), 2527 !list_empty(&peer->ksnp_zc_req_list)); 2528 2529 list_for_each (tmp, &peer->ksnp_routes) { 2530 route = list_entry(tmp, ksock_route_t, ksnr_list); 2531 CWARN ("Route: ref %d, schd %d, conn %d, cnted %d, " 2532 "del %d\n", atomic_read(&route->ksnr_refcount), 2533 route->ksnr_scheduled, route->ksnr_connecting, 2534 route->ksnr_connected, route->ksnr_deleted); 2535 } 2536 2537 list_for_each (tmp, &peer->ksnp_conns) { 2538 conn = list_entry(tmp, ksock_conn_t, ksnc_list); 2539 CWARN ("Conn: ref %d, sref %d, t %d, c %d\n", 2540 atomic_read(&conn->ksnc_conn_refcount), 2541 atomic_read(&conn->ksnc_sock_refcount), 2542 conn->ksnc_type, conn->ksnc_closing); 2543 } 2544 } 2545 2546 read_unlock(&ksocknal_data.ksnd_global_lock); 2547 return; 2548} 2549 2550void 2551ksocknal_shutdown (lnet_ni_t *ni) 2552{ 2553 ksock_net_t *net = ni->ni_data; 2554 int i; 2555 lnet_process_id_t anyid = {0}; 2556 2557 anyid.nid = LNET_NID_ANY; 2558 anyid.pid = LNET_PID_ANY; 2559 2560 LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL); 2561 LASSERT(ksocknal_data.ksnd_nnets > 0); 2562 2563 spin_lock_bh(&net->ksnn_lock); 2564 net->ksnn_shutdown = 1; /* prevent new peers */ 2565 spin_unlock_bh(&net->ksnn_lock); 2566 2567 /* Delete all peers */ 2568 ksocknal_del_peer(ni, anyid, 0); 2569 2570 /* Wait for all peer state to clean up */ 2571 i = 2; 2572 spin_lock_bh(&net->ksnn_lock); 2573 while (net->ksnn_npeers != 0) { 2574 spin_unlock_bh(&net->ksnn_lock); 2575 2576 i++; 2577 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ 2578 "waiting for %d peers to disconnect\n", 2579 net->ksnn_npeers); 2580 set_current_state(TASK_UNINTERRUPTIBLE); 2581 schedule_timeout(cfs_time_seconds(1)); 2582 2583 ksocknal_debug_peerhash(ni); 2584 2585 spin_lock_bh(&net->ksnn_lock); 2586 } 2587 spin_unlock_bh(&net->ksnn_lock); 2588 2589 for (i = 0; i < net->ksnn_ninterfaces; i++) { 2590 LASSERT (net->ksnn_interfaces[i].ksni_npeers == 0); 2591 LASSERT (net->ksnn_interfaces[i].ksni_nroutes == 0); 2592 } 2593 2594 list_del(&net->ksnn_list); 2595 LIBCFS_FREE(net, sizeof(*net)); 2596 2597 ksocknal_data.ksnd_nnets--; 2598 if (ksocknal_data.ksnd_nnets == 0) 2599 ksocknal_base_shutdown(); 2600} 2601 2602static int 2603ksocknal_enumerate_interfaces(ksock_net_t *net) 2604{ 2605 char **names; 2606 int i; 2607 int j; 2608 int rc; 2609 int n; 2610 2611 n = libcfs_ipif_enumerate(&names); 2612 if (n <= 0) { 2613 CERROR("Can't enumerate interfaces: %d\n", n); 2614 return n; 2615 } 2616 2617 for (i = j = 0; i < n; i++) { 2618 int up; 2619 __u32 ip; 2620 __u32 mask; 2621 2622 if (!strcmp(names[i], "lo")) /* skip the loopback IF */ 2623 continue; 2624 2625 rc = libcfs_ipif_query(names[i], &up, &ip, &mask); 2626 if (rc != 0) { 2627 CWARN("Can't get interface %s info: %d\n", 2628 names[i], rc); 2629 continue; 2630 } 2631 2632 if (!up) { 2633 CWARN("Ignoring interface %s (down)\n", 2634 names[i]); 2635 continue; 2636 } 2637 2638 if (j == LNET_MAX_INTERFACES) { 2639 CWARN("Ignoring interface %s (too many interfaces)\n", 2640 names[i]); 2641 continue; 2642 } 2643 2644 net->ksnn_interfaces[j].ksni_ipaddr = ip; 2645 net->ksnn_interfaces[j].ksni_netmask = mask; 2646 strncpy(&net->ksnn_interfaces[j].ksni_name[0], 2647 names[i], IFNAMSIZ); 2648 j++; 2649 } 2650 2651 libcfs_ipif_free_enumeration(names, n); 2652 2653 if (j == 0) 2654 CERROR("Can't find any usable interfaces\n"); 2655 2656 return j; 2657} 2658 2659static int 2660ksocknal_search_new_ipif(ksock_net_t *net) 2661{ 2662 int new_ipif = 0; 2663 int i; 2664 2665 for (i = 0; i < net->ksnn_ninterfaces; i++) { 2666 char *ifnam = &net->ksnn_interfaces[i].ksni_name[0]; 2667 char *colon = strchr(ifnam, ':'); 2668 int found = 0; 2669 ksock_net_t *tmp; 2670 int j; 2671 2672 if (colon != NULL) /* ignore alias device */ 2673 *colon = 0; 2674 2675 list_for_each_entry(tmp, &ksocknal_data.ksnd_nets, 2676 ksnn_list) { 2677 for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) { 2678 char *ifnam2 = 2679 &tmp->ksnn_interfaces[j].ksni_name[0]; 2680 char *colon2 = strchr(ifnam2, ':'); 2681 2682 if (colon2 != NULL) 2683 *colon2 = 0; 2684 2685 found = strcmp(ifnam, ifnam2) == 0; 2686 if (colon2 != NULL) 2687 *colon2 = ':'; 2688 } 2689 if (found) 2690 break; 2691 } 2692 2693 new_ipif += !found; 2694 if (colon != NULL) 2695 *colon = ':'; 2696 } 2697 2698 return new_ipif; 2699} 2700 2701static int 2702ksocknal_start_schedulers(struct ksock_sched_info *info) 2703{ 2704 int nthrs; 2705 int rc = 0; 2706 int i; 2707 2708 if (info->ksi_nthreads == 0) { 2709 if (*ksocknal_tunables.ksnd_nscheds > 0) { 2710 nthrs = info->ksi_nthreads_max; 2711 } else { 2712 nthrs = cfs_cpt_weight(lnet_cpt_table(), 2713 info->ksi_cpt); 2714 nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs); 2715 nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs); 2716 } 2717 nthrs = min(nthrs, info->ksi_nthreads_max); 2718 } else { 2719 LASSERT(info->ksi_nthreads <= info->ksi_nthreads_max); 2720 /* increase two threads if there is new interface */ 2721 nthrs = min(2, info->ksi_nthreads_max - info->ksi_nthreads); 2722 } 2723 2724 for (i = 0; i < nthrs; i++) { 2725 long id; 2726 char name[20]; 2727 ksock_sched_t *sched; 2728 id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i); 2729 sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)]; 2730 snprintf(name, sizeof(name), "socknal_sd%02d_%02d", 2731 info->ksi_cpt, (int)(sched - &info->ksi_scheds[0])); 2732 2733 rc = ksocknal_thread_start(ksocknal_scheduler, 2734 (void *)id, name); 2735 if (rc == 0) 2736 continue; 2737 2738 CERROR("Can't spawn thread %d for scheduler[%d]: %d\n", 2739 info->ksi_cpt, info->ksi_nthreads + i, rc); 2740 break; 2741 } 2742 2743 info->ksi_nthreads += i; 2744 return rc; 2745} 2746 2747static int 2748ksocknal_net_start_threads(ksock_net_t *net, __u32 *cpts, int ncpts) 2749{ 2750 int newif = ksocknal_search_new_ipif(net); 2751 int rc; 2752 int i; 2753 2754 LASSERT(ncpts > 0 && ncpts <= cfs_cpt_number(lnet_cpt_table())); 2755 2756 for (i = 0; i < ncpts; i++) { 2757 struct ksock_sched_info *info; 2758 int cpt = (cpts == NULL) ? i : cpts[i]; 2759 2760 LASSERT(cpt < cfs_cpt_number(lnet_cpt_table())); 2761 info = ksocknal_data.ksnd_sched_info[cpt]; 2762 2763 if (!newif && info->ksi_nthreads > 0) 2764 continue; 2765 2766 rc = ksocknal_start_schedulers(info); 2767 if (rc != 0) 2768 return rc; 2769 } 2770 return 0; 2771} 2772 2773int 2774ksocknal_startup (lnet_ni_t *ni) 2775{ 2776 ksock_net_t *net; 2777 int rc; 2778 int i; 2779 2780 LASSERT (ni->ni_lnd == &the_ksocklnd); 2781 2782 if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) { 2783 rc = ksocknal_base_startup(); 2784 if (rc != 0) 2785 return rc; 2786 } 2787 2788 LIBCFS_ALLOC(net, sizeof(*net)); 2789 if (net == NULL) 2790 goto fail_0; 2791 2792 spin_lock_init(&net->ksnn_lock); 2793 net->ksnn_incarnation = ksocknal_new_incarnation(); 2794 ni->ni_data = net; 2795 ni->ni_peertimeout = *ksocknal_tunables.ksnd_peertimeout; 2796 ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits; 2797 ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peertxcredits; 2798 ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits; 2799 2800 if (ni->ni_interfaces[0] == NULL) { 2801 rc = ksocknal_enumerate_interfaces(net); 2802 if (rc <= 0) 2803 goto fail_1; 2804 2805 net->ksnn_ninterfaces = 1; 2806 } else { 2807 for (i = 0; i < LNET_MAX_INTERFACES; i++) { 2808 int up; 2809 2810 if (ni->ni_interfaces[i] == NULL) 2811 break; 2812 2813 rc = libcfs_ipif_query( 2814 ni->ni_interfaces[i], &up, 2815 &net->ksnn_interfaces[i].ksni_ipaddr, 2816 &net->ksnn_interfaces[i].ksni_netmask); 2817 2818 if (rc != 0) { 2819 CERROR("Can't get interface %s info: %d\n", 2820 ni->ni_interfaces[i], rc); 2821 goto fail_1; 2822 } 2823 2824 if (!up) { 2825 CERROR("Interface %s is down\n", 2826 ni->ni_interfaces[i]); 2827 goto fail_1; 2828 } 2829 2830 strncpy(&net->ksnn_interfaces[i].ksni_name[0], 2831 ni->ni_interfaces[i], IFNAMSIZ); 2832 } 2833 net->ksnn_ninterfaces = i; 2834 } 2835 2836 /* call it before add it to ksocknal_data.ksnd_nets */ 2837 rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts); 2838 if (rc != 0) 2839 goto fail_1; 2840 2841 ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), 2842 net->ksnn_interfaces[0].ksni_ipaddr); 2843 list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets); 2844 2845 ksocknal_data.ksnd_nnets++; 2846 2847 return 0; 2848 2849 fail_1: 2850 LIBCFS_FREE(net, sizeof(*net)); 2851 fail_0: 2852 if (ksocknal_data.ksnd_nnets == 0) 2853 ksocknal_base_shutdown(); 2854 2855 return -ENETDOWN; 2856} 2857 2858 2859static void __exit 2860ksocknal_module_fini (void) 2861{ 2862 lnet_unregister_lnd(&the_ksocklnd); 2863} 2864 2865static int __init 2866ksocknal_module_init (void) 2867{ 2868 int rc; 2869 2870 /* check ksnr_connected/connecting field large enough */ 2871 CLASSERT (SOCKLND_CONN_NTYPES <= 4); 2872 CLASSERT (SOCKLND_CONN_ACK == SOCKLND_CONN_BULK_IN); 2873 2874 /* initialize the_ksocklnd */ 2875 the_ksocklnd.lnd_type = SOCKLND; 2876 the_ksocklnd.lnd_startup = ksocknal_startup; 2877 the_ksocklnd.lnd_shutdown = ksocknal_shutdown; 2878 the_ksocklnd.lnd_ctl = ksocknal_ctl; 2879 the_ksocklnd.lnd_send = ksocknal_send; 2880 the_ksocklnd.lnd_recv = ksocknal_recv; 2881 the_ksocklnd.lnd_notify = ksocknal_notify; 2882 the_ksocklnd.lnd_query = ksocknal_query; 2883 the_ksocklnd.lnd_accept = ksocknal_accept; 2884 2885 rc = ksocknal_tunables_init(); 2886 if (rc != 0) 2887 return rc; 2888 2889 lnet_register_lnd(&the_ksocklnd); 2890 2891 return 0; 2892} 2893 2894MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>"); 2895MODULE_DESCRIPTION("Kernel TCP Socket LND v3.0.0"); 2896MODULE_LICENSE("GPL"); 2897MODULE_VERSION("3.0.0"); 2898 2899module_init(ksocknal_module_init); 2900module_exit(ksocknal_module_fini); 2901