1/* 2 * GPL HEADER START 3 * 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 only, 8 * as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License version 2 for more details (a copy is included 14 * in the LICENSE file that accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License 17 * version 2 along with this program; If not, see 18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf 19 * 20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 21 * CA 95054 USA or visit www.sun.com if you need additional information or 22 * have any questions. 23 * 24 * GPL HEADER END 25 */ 26/* 27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 28 * Use is subject to license terms. 29 * 30 * Copyright (c) 2011, 2012, Intel Corporation. 31 */ 32/* 33 * This file is part of Lustre, http://www.lustre.org/ 34 * Lustre is a trademark of Sun Microsystems, Inc. 35 * 36 * lustre/lov/lov_obd.c 37 * 38 * Author: Phil Schwan <phil@clusterfs.com> 39 * Author: Peter Braam <braam@clusterfs.com> 40 * Author: Mike Shaver <shaver@clusterfs.com> 41 * Author: Nathan Rutman <nathan@clusterfs.com> 42 */ 43 44#define DEBUG_SUBSYSTEM S_LOV 45#include "../../include/linux/libcfs/libcfs.h" 46 47#include "../include/obd_support.h" 48#include "../include/lustre_lib.h" 49#include "../include/lustre_net.h" 50#include "../include/lustre/lustre_idl.h" 51#include "../include/lustre_dlm.h" 52#include "../include/lustre_mds.h" 53#include "../include/obd_class.h" 54#include "../include/lprocfs_status.h" 55#include "../include/lustre_param.h" 56#include "../include/cl_object.h" 57#include "../include/lclient.h" /* for cl_client_lru */ 58#include "../include/lustre/ll_fiemap.h" 59#include "../include/lustre_fid.h" 60 61#include "lov_internal.h" 62 63/* Keep a refcount of lov->tgt usage to prevent racing with addition/deletion. 64 Any function that expects lov_tgts to remain stationary must take a ref. */ 65static void lov_getref(struct obd_device *obd) 66{ 67 struct lov_obd *lov = &obd->u.lov; 68 69 /* nobody gets through here until lov_putref is done */ 70 mutex_lock(&lov->lov_lock); 71 atomic_inc(&lov->lov_refcount); 72 mutex_unlock(&lov->lov_lock); 73 return; 74} 75 76static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt); 77 78static void lov_putref(struct obd_device *obd) 79{ 80 struct lov_obd *lov = &obd->u.lov; 81 82 mutex_lock(&lov->lov_lock); 83 /* ok to dec to 0 more than once -- ltd_exp's will be null */ 84 if (atomic_dec_and_test(&lov->lov_refcount) && lov->lov_death_row) { 85 LIST_HEAD(kill); 86 int i; 87 struct lov_tgt_desc *tgt, *n; 88 CDEBUG(D_CONFIG, "destroying %d lov targets\n", 89 lov->lov_death_row); 90 for (i = 0; i < lov->desc.ld_tgt_count; i++) { 91 tgt = lov->lov_tgts[i]; 92 93 if (!tgt || !tgt->ltd_reap) 94 continue; 95 list_add(&tgt->ltd_kill, &kill); 96 /* XXX - right now there is a dependency on ld_tgt_count 97 * being the maximum tgt index for computing the 98 * mds_max_easize. So we can't shrink it. */ 99 lov_ost_pool_remove(&lov->lov_packed, i); 100 lov->lov_tgts[i] = NULL; 101 lov->lov_death_row--; 102 } 103 mutex_unlock(&lov->lov_lock); 104 105 list_for_each_entry_safe(tgt, n, &kill, ltd_kill) { 106 list_del(&tgt->ltd_kill); 107 /* Disconnect */ 108 __lov_del_obd(obd, tgt); 109 } 110 } else { 111 mutex_unlock(&lov->lov_lock); 112 } 113} 114 115static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid, 116 enum obd_notify_event ev); 117static int lov_notify(struct obd_device *obd, struct obd_device *watched, 118 enum obd_notify_event ev, void *data); 119 120 121#define MAX_STRING_SIZE 128 122int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, 123 struct obd_connect_data *data) 124{ 125 struct lov_obd *lov = &obd->u.lov; 126 struct obd_uuid *tgt_uuid; 127 struct obd_device *tgt_obd; 128 static struct obd_uuid lov_osc_uuid = { "LOV_OSC_UUID" }; 129 struct obd_import *imp; 130 struct proc_dir_entry *lov_proc_dir; 131 int rc; 132 133 if (!lov->lov_tgts[index]) 134 return -EINVAL; 135 136 tgt_uuid = &lov->lov_tgts[index]->ltd_uuid; 137 tgt_obd = lov->lov_tgts[index]->ltd_obd; 138 139 if (!tgt_obd->obd_set_up) { 140 CERROR("Target %s not set up\n", obd_uuid2str(tgt_uuid)); 141 return -EINVAL; 142 } 143 144 /* override the sp_me from lov */ 145 tgt_obd->u.cli.cl_sp_me = lov->lov_sp_me; 146 147 if (data && (data->ocd_connect_flags & OBD_CONNECT_INDEX)) 148 data->ocd_index = index; 149 150 /* 151 * Divine LOV knows that OBDs under it are OSCs. 152 */ 153 imp = tgt_obd->u.cli.cl_import; 154 155 if (activate) { 156 tgt_obd->obd_no_recov = 0; 157 /* FIXME this is probably supposed to be 158 ptlrpc_set_import_active. Horrible naming. */ 159 ptlrpc_activate_import(imp); 160 } 161 162 rc = obd_register_observer(tgt_obd, obd); 163 if (rc) { 164 CERROR("Target %s register_observer error %d\n", 165 obd_uuid2str(tgt_uuid), rc); 166 return rc; 167 } 168 169 170 if (imp->imp_invalid) { 171 CDEBUG(D_CONFIG, "not connecting OSC %s; administratively " 172 "disabled\n", obd_uuid2str(tgt_uuid)); 173 return 0; 174 } 175 176 rc = obd_connect(NULL, &lov->lov_tgts[index]->ltd_exp, tgt_obd, 177 &lov_osc_uuid, data, NULL); 178 if (rc || !lov->lov_tgts[index]->ltd_exp) { 179 CERROR("Target %s connect error %d\n", 180 obd_uuid2str(tgt_uuid), rc); 181 return -ENODEV; 182 } 183 184 lov->lov_tgts[index]->ltd_reap = 0; 185 186 CDEBUG(D_CONFIG, "Connected tgt idx %d %s (%s) %sactive\n", index, 187 obd_uuid2str(tgt_uuid), tgt_obd->obd_name, activate ? "":"in"); 188 189 lov_proc_dir = obd->obd_proc_private; 190 if (lov_proc_dir) { 191 struct obd_device *osc_obd = lov->lov_tgts[index]->ltd_exp->exp_obd; 192 struct proc_dir_entry *osc_symlink; 193 194 LASSERT(osc_obd != NULL); 195 LASSERT(osc_obd->obd_magic == OBD_DEVICE_MAGIC); 196 LASSERT(osc_obd->obd_type->typ_name != NULL); 197 198 osc_symlink = lprocfs_add_symlink(osc_obd->obd_name, 199 lov_proc_dir, 200 "../../../%s/%s", 201 osc_obd->obd_type->typ_name, 202 osc_obd->obd_name); 203 if (osc_symlink == NULL) { 204 CERROR("could not register LOV target " 205 "/proc/fs/lustre/%s/%s/target_obds/%s.", 206 obd->obd_type->typ_name, obd->obd_name, 207 osc_obd->obd_name); 208 lprocfs_remove(&lov_proc_dir); 209 obd->obd_proc_private = NULL; 210 } 211 } 212 213 return 0; 214} 215 216static int lov_connect(const struct lu_env *env, 217 struct obd_export **exp, struct obd_device *obd, 218 struct obd_uuid *cluuid, struct obd_connect_data *data, 219 void *localdata) 220{ 221 struct lov_obd *lov = &obd->u.lov; 222 struct lov_tgt_desc *tgt; 223 struct lustre_handle conn; 224 int i, rc; 225 226 CDEBUG(D_CONFIG, "connect #%d\n", lov->lov_connects); 227 228 rc = class_connect(&conn, obd, cluuid); 229 if (rc) 230 return rc; 231 232 *exp = class_conn2export(&conn); 233 234 /* Why should there ever be more than 1 connect? */ 235 lov->lov_connects++; 236 LASSERT(lov->lov_connects == 1); 237 238 memset(&lov->lov_ocd, 0, sizeof(lov->lov_ocd)); 239 if (data) 240 lov->lov_ocd = *data; 241 242 obd_getref(obd); 243 for (i = 0; i < lov->desc.ld_tgt_count; i++) { 244 tgt = lov->lov_tgts[i]; 245 if (!tgt || obd_uuid_empty(&tgt->ltd_uuid)) 246 continue; 247 /* Flags will be lowest common denominator */ 248 rc = lov_connect_obd(obd, i, tgt->ltd_activate, &lov->lov_ocd); 249 if (rc) { 250 CERROR("%s: lov connect tgt %d failed: %d\n", 251 obd->obd_name, i, rc); 252 continue; 253 } 254 /* connect to administrative disabled ost */ 255 if (!lov->lov_tgts[i]->ltd_exp) 256 continue; 257 258 rc = lov_notify(obd, lov->lov_tgts[i]->ltd_exp->exp_obd, 259 OBD_NOTIFY_CONNECT, (void *)&i); 260 if (rc) { 261 CERROR("%s error sending notify %d\n", 262 obd->obd_name, rc); 263 } 264 } 265 obd_putref(obd); 266 267 return 0; 268} 269 270static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) 271{ 272 struct proc_dir_entry *lov_proc_dir; 273 struct lov_obd *lov = &obd->u.lov; 274 struct obd_device *osc_obd; 275 int rc; 276 277 osc_obd = class_exp2obd(tgt->ltd_exp); 278 CDEBUG(D_CONFIG, "%s: disconnecting target %s\n", 279 obd->obd_name, osc_obd ? osc_obd->obd_name : "NULL"); 280 281 if (tgt->ltd_active) { 282 tgt->ltd_active = 0; 283 lov->desc.ld_active_tgt_count--; 284 tgt->ltd_exp->exp_obd->obd_inactive = 1; 285 } 286 287 if (osc_obd) { 288 lov_proc_dir = obd->obd_proc_private; 289 if (lov_proc_dir) { 290 lprocfs_remove_proc_entry(osc_obd->obd_name, lov_proc_dir); 291 } 292 /* Pass it on to our clients. 293 * XXX This should be an argument to disconnect, 294 * XXX not a back-door flag on the OBD. Ah well. 295 */ 296 osc_obd->obd_force = obd->obd_force; 297 osc_obd->obd_fail = obd->obd_fail; 298 osc_obd->obd_no_recov = obd->obd_no_recov; 299 } 300 301 obd_register_observer(osc_obd, NULL); 302 303 rc = obd_disconnect(tgt->ltd_exp); 304 if (rc) { 305 CERROR("Target %s disconnect error %d\n", 306 tgt->ltd_uuid.uuid, rc); 307 rc = 0; 308 } 309 310 tgt->ltd_exp = NULL; 311 return 0; 312} 313 314static int lov_disconnect(struct obd_export *exp) 315{ 316 struct obd_device *obd = class_exp2obd(exp); 317 struct lov_obd *lov = &obd->u.lov; 318 int i, rc; 319 320 if (!lov->lov_tgts) 321 goto out; 322 323 /* Only disconnect the underlying layers on the final disconnect. */ 324 lov->lov_connects--; 325 if (lov->lov_connects != 0) { 326 /* why should there be more than 1 connect? */ 327 CERROR("disconnect #%d\n", lov->lov_connects); 328 goto out; 329 } 330 331 /* Let's hold another reference so lov_del_obd doesn't spin through 332 putref every time */ 333 obd_getref(obd); 334 335 for (i = 0; i < lov->desc.ld_tgt_count; i++) { 336 if (lov->lov_tgts[i] && lov->lov_tgts[i]->ltd_exp) { 337 /* Disconnection is the last we know about an obd */ 338 lov_del_target(obd, i, NULL, lov->lov_tgts[i]->ltd_gen); 339 } 340 } 341 obd_putref(obd); 342 343out: 344 rc = class_disconnect(exp); /* bz 9811 */ 345 return rc; 346} 347 348/* Error codes: 349 * 350 * -EINVAL : UUID can't be found in the LOV's target list 351 * -ENOTCONN: The UUID is found, but the target connection is bad (!) 352 * -EBADF : The UUID is found, but the OBD is the wrong type (!) 353 * any >= 0 : is log target index 354 */ 355static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid, 356 enum obd_notify_event ev) 357{ 358 struct lov_obd *lov = &obd->u.lov; 359 struct lov_tgt_desc *tgt; 360 int index, activate, active; 361 362 CDEBUG(D_INFO, "Searching in lov %p for uuid %s event(%d)\n", 363 lov, uuid->uuid, ev); 364 365 obd_getref(obd); 366 for (index = 0; index < lov->desc.ld_tgt_count; index++) { 367 tgt = lov->lov_tgts[index]; 368 if (!tgt) 369 continue; 370 /* 371 * LU-642, initially inactive OSC could miss the obd_connect, 372 * we make up for it here. 373 */ 374 if (ev == OBD_NOTIFY_ACTIVATE && tgt->ltd_exp == NULL && 375 obd_uuid_equals(uuid, &tgt->ltd_uuid)) { 376 struct obd_uuid lov_osc_uuid = {"LOV_OSC_UUID"}; 377 378 obd_connect(NULL, &tgt->ltd_exp, tgt->ltd_obd, 379 &lov_osc_uuid, &lov->lov_ocd, NULL); 380 } 381 if (!tgt->ltd_exp) 382 continue; 383 384 CDEBUG(D_INFO, "lov idx %d is %s conn %#llx\n", 385 index, obd_uuid2str(&tgt->ltd_uuid), 386 tgt->ltd_exp->exp_handle.h_cookie); 387 if (obd_uuid_equals(uuid, &tgt->ltd_uuid)) 388 break; 389 } 390 391 if (index == lov->desc.ld_tgt_count) { 392 index = -EINVAL; 393 goto out; 394 } 395 396 if (ev == OBD_NOTIFY_DEACTIVATE || ev == OBD_NOTIFY_ACTIVATE) { 397 activate = (ev == OBD_NOTIFY_ACTIVATE) ? 1 : 0; 398 399 if (lov->lov_tgts[index]->ltd_activate == activate) { 400 CDEBUG(D_INFO, "OSC %s already %sactivate!\n", 401 uuid->uuid, activate ? "" : "de"); 402 } else { 403 lov->lov_tgts[index]->ltd_activate = activate; 404 CDEBUG(D_CONFIG, "%sactivate OSC %s\n", 405 activate ? "" : "de", obd_uuid2str(uuid)); 406 } 407 408 } else if (ev == OBD_NOTIFY_INACTIVE || ev == OBD_NOTIFY_ACTIVE) { 409 active = (ev == OBD_NOTIFY_ACTIVE) ? 1 : 0; 410 411 if (lov->lov_tgts[index]->ltd_active == active) { 412 CDEBUG(D_INFO, "OSC %s already %sactive!\n", 413 uuid->uuid, active ? "" : "in"); 414 goto out; 415 } else { 416 CDEBUG(D_CONFIG, "Marking OSC %s %sactive\n", 417 obd_uuid2str(uuid), active ? "" : "in"); 418 } 419 420 lov->lov_tgts[index]->ltd_active = active; 421 if (active) { 422 lov->desc.ld_active_tgt_count++; 423 lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 0; 424 } else { 425 lov->desc.ld_active_tgt_count--; 426 lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 1; 427 } 428 } else { 429 CERROR("Unknown event(%d) for uuid %s", ev, uuid->uuid); 430 } 431 432 out: 433 obd_putref(obd); 434 return index; 435} 436 437static int lov_notify(struct obd_device *obd, struct obd_device *watched, 438 enum obd_notify_event ev, void *data) 439{ 440 int rc = 0; 441 struct lov_obd *lov = &obd->u.lov; 442 443 down_read(&lov->lov_notify_lock); 444 if (!lov->lov_connects) { 445 up_read(&lov->lov_notify_lock); 446 return rc; 447 } 448 449 if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE || 450 ev == OBD_NOTIFY_ACTIVATE || ev == OBD_NOTIFY_DEACTIVATE) { 451 struct obd_uuid *uuid; 452 453 LASSERT(watched); 454 455 if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) { 456 up_read(&lov->lov_notify_lock); 457 CERROR("unexpected notification of %s %s!\n", 458 watched->obd_type->typ_name, 459 watched->obd_name); 460 return -EINVAL; 461 } 462 uuid = &watched->u.cli.cl_target_uuid; 463 464 /* Set OSC as active before notifying the observer, so the 465 * observer can use the OSC normally. 466 */ 467 rc = lov_set_osc_active(obd, uuid, ev); 468 if (rc < 0) { 469 up_read(&lov->lov_notify_lock); 470 CERROR("event(%d) of %s failed: %d\n", ev, 471 obd_uuid2str(uuid), rc); 472 return rc; 473 } 474 /* active event should be pass lov target index as data */ 475 data = &rc; 476 } 477 478 /* Pass the notification up the chain. */ 479 if (watched) { 480 rc = obd_notify_observer(obd, watched, ev, data); 481 } else { 482 /* NULL watched means all osc's in the lov (only for syncs) */ 483 /* sync event should be send lov idx as data */ 484 struct lov_obd *lov = &obd->u.lov; 485 int i, is_sync; 486 487 data = &i; 488 is_sync = (ev == OBD_NOTIFY_SYNC) || 489 (ev == OBD_NOTIFY_SYNC_NONBLOCK); 490 491 obd_getref(obd); 492 for (i = 0; i < lov->desc.ld_tgt_count; i++) { 493 if (!lov->lov_tgts[i]) 494 continue; 495 496 /* don't send sync event if target not 497 * connected/activated */ 498 if (is_sync && !lov->lov_tgts[i]->ltd_active) 499 continue; 500 501 rc = obd_notify_observer(obd, lov->lov_tgts[i]->ltd_obd, 502 ev, data); 503 if (rc) { 504 CERROR("%s: notify %s of %s failed %d\n", 505 obd->obd_name, 506 obd->obd_observer->obd_name, 507 lov->lov_tgts[i]->ltd_obd->obd_name, 508 rc); 509 } 510 } 511 obd_putref(obd); 512 } 513 514 up_read(&lov->lov_notify_lock); 515 return rc; 516} 517 518static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, 519 __u32 index, int gen, int active) 520{ 521 struct lov_obd *lov = &obd->u.lov; 522 struct lov_tgt_desc *tgt; 523 struct obd_device *tgt_obd; 524 int rc; 525 526 CDEBUG(D_CONFIG, "uuid:%s idx:%d gen:%d active:%d\n", 527 uuidp->uuid, index, gen, active); 528 529 if (gen <= 0) { 530 CERROR("request to add OBD %s with invalid generation: %d\n", 531 uuidp->uuid, gen); 532 return -EINVAL; 533 } 534 535 tgt_obd = class_find_client_obd(uuidp, LUSTRE_OSC_NAME, 536 &obd->obd_uuid); 537 if (tgt_obd == NULL) 538 return -EINVAL; 539 540 mutex_lock(&lov->lov_lock); 541 542 if ((index < lov->lov_tgt_size) && (lov->lov_tgts[index] != NULL)) { 543 tgt = lov->lov_tgts[index]; 544 CERROR("UUID %s already assigned at LOV target index %d\n", 545 obd_uuid2str(&tgt->ltd_uuid), index); 546 mutex_unlock(&lov->lov_lock); 547 return -EEXIST; 548 } 549 550 if (index >= lov->lov_tgt_size) { 551 /* We need to reallocate the lov target array. */ 552 struct lov_tgt_desc **newtgts, **old = NULL; 553 __u32 newsize, oldsize = 0; 554 555 newsize = max_t(__u32, lov->lov_tgt_size, 2); 556 while (newsize < index + 1) 557 newsize = newsize << 1; 558 OBD_ALLOC(newtgts, sizeof(*newtgts) * newsize); 559 if (newtgts == NULL) { 560 mutex_unlock(&lov->lov_lock); 561 return -ENOMEM; 562 } 563 564 if (lov->lov_tgt_size) { 565 memcpy(newtgts, lov->lov_tgts, sizeof(*newtgts) * 566 lov->lov_tgt_size); 567 old = lov->lov_tgts; 568 oldsize = lov->lov_tgt_size; 569 } 570 571 lov->lov_tgts = newtgts; 572 lov->lov_tgt_size = newsize; 573 smp_rmb(); 574 if (old) 575 OBD_FREE(old, sizeof(*old) * oldsize); 576 577 CDEBUG(D_CONFIG, "tgts: %p size: %d\n", 578 lov->lov_tgts, lov->lov_tgt_size); 579 } 580 581 OBD_ALLOC_PTR(tgt); 582 if (!tgt) { 583 mutex_unlock(&lov->lov_lock); 584 return -ENOMEM; 585 } 586 587 rc = lov_ost_pool_add(&lov->lov_packed, index, lov->lov_tgt_size); 588 if (rc) { 589 mutex_unlock(&lov->lov_lock); 590 OBD_FREE_PTR(tgt); 591 return rc; 592 } 593 594 tgt->ltd_uuid = *uuidp; 595 tgt->ltd_obd = tgt_obd; 596 /* XXX - add a sanity check on the generation number. */ 597 tgt->ltd_gen = gen; 598 tgt->ltd_index = index; 599 tgt->ltd_activate = active; 600 lov->lov_tgts[index] = tgt; 601 if (index >= lov->desc.ld_tgt_count) 602 lov->desc.ld_tgt_count = index + 1; 603 604 mutex_unlock(&lov->lov_lock); 605 606 CDEBUG(D_CONFIG, "idx=%d ltd_gen=%d ld_tgt_count=%d\n", 607 index, tgt->ltd_gen, lov->desc.ld_tgt_count); 608 609 rc = obd_notify(obd, tgt_obd, OBD_NOTIFY_CREATE, &index); 610 611 if (lov->lov_connects == 0) { 612 /* lov_connect hasn't been called yet. We'll do the 613 lov_connect_obd on this target when that fn first runs, 614 because we don't know the connect flags yet. */ 615 return 0; 616 } 617 618 obd_getref(obd); 619 620 rc = lov_connect_obd(obd, index, active, &lov->lov_ocd); 621 if (rc) 622 goto out; 623 624 /* connect to administrative disabled ost */ 625 if (!tgt->ltd_exp) { 626 rc = 0; 627 goto out; 628 } 629 630 if (lov->lov_cache != NULL) { 631 rc = obd_set_info_async(NULL, tgt->ltd_exp, 632 sizeof(KEY_CACHE_SET), KEY_CACHE_SET, 633 sizeof(struct cl_client_cache), lov->lov_cache, 634 NULL); 635 if (rc < 0) 636 goto out; 637 } 638 639 rc = lov_notify(obd, tgt->ltd_exp->exp_obd, 640 active ? OBD_NOTIFY_CONNECT : OBD_NOTIFY_INACTIVE, 641 (void *)&index); 642 643out: 644 if (rc) { 645 CERROR("add failed (%d), deleting %s\n", rc, 646 obd_uuid2str(&tgt->ltd_uuid)); 647 lov_del_target(obd, index, NULL, 0); 648 } 649 obd_putref(obd); 650 return rc; 651} 652 653/* Schedule a target for deletion */ 654int lov_del_target(struct obd_device *obd, __u32 index, 655 struct obd_uuid *uuidp, int gen) 656{ 657 struct lov_obd *lov = &obd->u.lov; 658 int count = lov->desc.ld_tgt_count; 659 int rc = 0; 660 661 if (index >= count) { 662 CERROR("LOV target index %d >= number of LOV OBDs %d.\n", 663 index, count); 664 return -EINVAL; 665 } 666 667 /* to make sure there's no ongoing lov_notify() now */ 668 down_write(&lov->lov_notify_lock); 669 obd_getref(obd); 670 671 if (!lov->lov_tgts[index]) { 672 CERROR("LOV target at index %d is not setup.\n", index); 673 rc = -EINVAL; 674 goto out; 675 } 676 677 if (uuidp && !obd_uuid_equals(uuidp, &lov->lov_tgts[index]->ltd_uuid)) { 678 CERROR("LOV target UUID %s at index %d doesn't match %s.\n", 679 lov_uuid2str(lov, index), index, 680 obd_uuid2str(uuidp)); 681 rc = -EINVAL; 682 goto out; 683 } 684 685 CDEBUG(D_CONFIG, "uuid: %s idx: %d gen: %d exp: %p active: %d\n", 686 lov_uuid2str(lov, index), index, 687 lov->lov_tgts[index]->ltd_gen, lov->lov_tgts[index]->ltd_exp, 688 lov->lov_tgts[index]->ltd_active); 689 690 lov->lov_tgts[index]->ltd_reap = 1; 691 lov->lov_death_row++; 692 /* we really delete it from obd_putref */ 693out: 694 obd_putref(obd); 695 up_write(&lov->lov_notify_lock); 696 697 return rc; 698} 699 700static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) 701{ 702 struct obd_device *osc_obd; 703 704 LASSERT(tgt); 705 LASSERT(tgt->ltd_reap); 706 707 osc_obd = class_exp2obd(tgt->ltd_exp); 708 709 CDEBUG(D_CONFIG, "Removing tgt %s : %s\n", 710 tgt->ltd_uuid.uuid, 711 osc_obd ? osc_obd->obd_name : "<no obd>"); 712 713 if (tgt->ltd_exp) 714 lov_disconnect_obd(obd, tgt); 715 716 OBD_FREE_PTR(tgt); 717 718 /* Manual cleanup - no cleanup logs to clean up the osc's. We must 719 do it ourselves. And we can't do it from lov_cleanup, 720 because we just lost our only reference to it. */ 721 if (osc_obd) 722 class_manual_cleanup(osc_obd); 723} 724 725void lov_fix_desc_stripe_size(__u64 *val) 726{ 727 if (*val < LOV_MIN_STRIPE_SIZE) { 728 if (*val != 0) 729 LCONSOLE_INFO("Increasing default stripe size to " 730 "minimum %u\n", 731 LOV_DESC_STRIPE_SIZE_DEFAULT); 732 *val = LOV_DESC_STRIPE_SIZE_DEFAULT; 733 } else if (*val & (LOV_MIN_STRIPE_SIZE - 1)) { 734 *val &= ~(LOV_MIN_STRIPE_SIZE - 1); 735 LCONSOLE_WARN("Changing default stripe size to %llu (a multiple of %u)\n", 736 *val, LOV_MIN_STRIPE_SIZE); 737 } 738} 739 740void lov_fix_desc_stripe_count(__u32 *val) 741{ 742 if (*val == 0) 743 *val = 1; 744} 745 746void lov_fix_desc_pattern(__u32 *val) 747{ 748 /* from lov_setstripe */ 749 if ((*val != 0) && (*val != LOV_PATTERN_RAID0)) { 750 LCONSOLE_WARN("Unknown stripe pattern: %#x\n", *val); 751 *val = 0; 752 } 753} 754 755void lov_fix_desc_qos_maxage(__u32 *val) 756{ 757 if (*val == 0) 758 *val = LOV_DESC_QOS_MAXAGE_DEFAULT; 759} 760 761void lov_fix_desc(struct lov_desc *desc) 762{ 763 lov_fix_desc_stripe_size(&desc->ld_default_stripe_size); 764 lov_fix_desc_stripe_count(&desc->ld_default_stripe_count); 765 lov_fix_desc_pattern(&desc->ld_pattern); 766 lov_fix_desc_qos_maxage(&desc->ld_qos_maxage); 767} 768 769int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg) 770{ 771 struct lprocfs_static_vars lvars = { NULL }; 772 struct lov_desc *desc; 773 struct lov_obd *lov = &obd->u.lov; 774 int rc; 775 776 if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) { 777 CERROR("LOV setup requires a descriptor\n"); 778 return -EINVAL; 779 } 780 781 desc = (struct lov_desc *)lustre_cfg_buf(lcfg, 1); 782 783 if (sizeof(*desc) > LUSTRE_CFG_BUFLEN(lcfg, 1)) { 784 CERROR("descriptor size wrong: %d > %d\n", 785 (int)sizeof(*desc), LUSTRE_CFG_BUFLEN(lcfg, 1)); 786 return -EINVAL; 787 } 788 789 if (desc->ld_magic != LOV_DESC_MAGIC) { 790 if (desc->ld_magic == __swab32(LOV_DESC_MAGIC)) { 791 CDEBUG(D_OTHER, "%s: Swabbing lov desc %p\n", 792 obd->obd_name, desc); 793 lustre_swab_lov_desc(desc); 794 } else { 795 CERROR("%s: Bad lov desc magic: %#x\n", 796 obd->obd_name, desc->ld_magic); 797 return -EINVAL; 798 } 799 } 800 801 lov_fix_desc(desc); 802 803 desc->ld_active_tgt_count = 0; 804 lov->desc = *desc; 805 lov->lov_tgt_size = 0; 806 807 mutex_init(&lov->lov_lock); 808 atomic_set(&lov->lov_refcount, 0); 809 lov->lov_sp_me = LUSTRE_SP_CLI; 810 811 init_rwsem(&lov->lov_notify_lock); 812 813 lov->lov_pools_hash_body = cfs_hash_create("POOLS", HASH_POOLS_CUR_BITS, 814 HASH_POOLS_MAX_BITS, 815 HASH_POOLS_BKT_BITS, 0, 816 CFS_HASH_MIN_THETA, 817 CFS_HASH_MAX_THETA, 818 &pool_hash_operations, 819 CFS_HASH_DEFAULT); 820 INIT_LIST_HEAD(&lov->lov_pool_list); 821 lov->lov_pool_count = 0; 822 rc = lov_ost_pool_init(&lov->lov_packed, 0); 823 if (rc) 824 goto out; 825 826 lprocfs_lov_init_vars(&lvars); 827 lprocfs_obd_setup(obd, lvars.obd_vars); 828#if defined (CONFIG_PROC_FS) 829 { 830 int rc1; 831 832 rc1 = lprocfs_seq_create(obd->obd_proc_entry, "target_obd", 833 0444, &lov_proc_target_fops, obd); 834 if (rc1) 835 CWARN("Error adding the target_obd file\n"); 836 } 837#endif 838 lov->lov_pool_proc_entry = lprocfs_register("pools", 839 obd->obd_proc_entry, 840 NULL, NULL); 841 842 return 0; 843 844out: 845 return rc; 846} 847 848static int lov_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) 849{ 850 int rc = 0; 851 struct lov_obd *lov = &obd->u.lov; 852 853 switch (stage) { 854 case OBD_CLEANUP_EARLY: { 855 int i; 856 for (i = 0; i < lov->desc.ld_tgt_count; i++) { 857 if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) 858 continue; 859 obd_precleanup(class_exp2obd(lov->lov_tgts[i]->ltd_exp), 860 OBD_CLEANUP_EARLY); 861 } 862 break; 863 } 864 default: 865 break; 866 } 867 868 return rc; 869} 870 871static int lov_cleanup(struct obd_device *obd) 872{ 873 struct lov_obd *lov = &obd->u.lov; 874 struct list_head *pos, *tmp; 875 struct pool_desc *pool; 876 877 list_for_each_safe(pos, tmp, &lov->lov_pool_list) { 878 pool = list_entry(pos, struct pool_desc, pool_list); 879 /* free pool structs */ 880 CDEBUG(D_INFO, "delete pool %p\n", pool); 881 /* In the function below, .hs_keycmp resolves to 882 * pool_hashkey_keycmp() */ 883 /* coverity[overrun-buffer-val] */ 884 lov_pool_del(obd, pool->pool_name); 885 } 886 cfs_hash_putref(lov->lov_pools_hash_body); 887 lov_ost_pool_free(&lov->lov_packed); 888 889 lprocfs_obd_cleanup(obd); 890 if (lov->lov_tgts) { 891 int i; 892 obd_getref(obd); 893 for (i = 0; i < lov->desc.ld_tgt_count; i++) { 894 if (!lov->lov_tgts[i]) 895 continue; 896 897 /* Inactive targets may never have connected */ 898 if (lov->lov_tgts[i]->ltd_active || 899 atomic_read(&lov->lov_refcount)) 900 /* We should never get here - these 901 should have been removed in the 902 disconnect. */ 903 CERROR("lov tgt %d not cleaned!" 904 " deathrow=%d, lovrc=%d\n", 905 i, lov->lov_death_row, 906 atomic_read(&lov->lov_refcount)); 907 lov_del_target(obd, i, NULL, 0); 908 } 909 obd_putref(obd); 910 OBD_FREE(lov->lov_tgts, sizeof(*lov->lov_tgts) * 911 lov->lov_tgt_size); 912 lov->lov_tgt_size = 0; 913 } 914 return 0; 915} 916 917int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg, 918 __u32 *indexp, int *genp) 919{ 920 struct obd_uuid obd_uuid; 921 int cmd; 922 int rc = 0; 923 924 switch (cmd = lcfg->lcfg_command) { 925 case LCFG_LOV_ADD_OBD: 926 case LCFG_LOV_ADD_INA: 927 case LCFG_LOV_DEL_OBD: { 928 __u32 index; 929 int gen; 930 /* lov_modify_tgts add 0:lov_mdsA 1:ost1_UUID 2:0 3:1 */ 931 if (LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(obd_uuid.uuid)) { 932 rc = -EINVAL; 933 goto out; 934 } 935 936 obd_str2uuid(&obd_uuid, lustre_cfg_buf(lcfg, 1)); 937 938 if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", indexp) != 1) { 939 rc = -EINVAL; 940 goto out; 941 } 942 if (sscanf(lustre_cfg_buf(lcfg, 3), "%d", genp) != 1) { 943 rc = -EINVAL; 944 goto out; 945 } 946 index = *indexp; 947 gen = *genp; 948 if (cmd == LCFG_LOV_ADD_OBD) 949 rc = lov_add_target(obd, &obd_uuid, index, gen, 1); 950 else if (cmd == LCFG_LOV_ADD_INA) 951 rc = lov_add_target(obd, &obd_uuid, index, gen, 0); 952 else 953 rc = lov_del_target(obd, index, &obd_uuid, gen); 954 goto out; 955 } 956 case LCFG_PARAM: { 957 struct lprocfs_static_vars lvars = { NULL }; 958 struct lov_desc *desc = &(obd->u.lov.desc); 959 960 if (!desc) { 961 rc = -EINVAL; 962 goto out; 963 } 964 965 lprocfs_lov_init_vars(&lvars); 966 967 rc = class_process_proc_param(PARAM_LOV, lvars.obd_vars, 968 lcfg, obd); 969 if (rc > 0) 970 rc = 0; 971 goto out; 972 } 973 case LCFG_POOL_NEW: 974 case LCFG_POOL_ADD: 975 case LCFG_POOL_DEL: 976 case LCFG_POOL_REM: 977 goto out; 978 979 default: { 980 CERROR("Unknown command: %d\n", lcfg->lcfg_command); 981 rc = -EINVAL; 982 goto out; 983 984 } 985 } 986out: 987 return rc; 988} 989 990static int lov_recreate(struct obd_export *exp, struct obdo *src_oa, 991 struct lov_stripe_md **ea, struct obd_trans_info *oti) 992{ 993 struct lov_stripe_md *obj_mdp, *lsm; 994 struct lov_obd *lov = &exp->exp_obd->u.lov; 995 unsigned ost_idx; 996 int rc, i; 997 998 LASSERT(src_oa->o_valid & OBD_MD_FLFLAGS && 999 src_oa->o_flags & OBD_FL_RECREATE_OBJS); 1000 1001 OBD_ALLOC(obj_mdp, sizeof(*obj_mdp)); 1002 if (obj_mdp == NULL) 1003 return -ENOMEM; 1004 1005 ost_idx = src_oa->o_nlink; 1006 lsm = *ea; 1007 if (lsm == NULL) { 1008 rc = -EINVAL; 1009 goto out; 1010 } 1011 if (ost_idx >= lov->desc.ld_tgt_count || 1012 !lov->lov_tgts[ost_idx]) { 1013 rc = -EINVAL; 1014 goto out; 1015 } 1016 1017 for (i = 0; i < lsm->lsm_stripe_count; i++) { 1018 if (lsm->lsm_oinfo[i]->loi_ost_idx == ost_idx) { 1019 if (ostid_id(&lsm->lsm_oinfo[i]->loi_oi) != 1020 ostid_id(&src_oa->o_oi)) { 1021 rc = -EINVAL; 1022 goto out; 1023 } 1024 break; 1025 } 1026 } 1027 if (i == lsm->lsm_stripe_count) { 1028 rc = -EINVAL; 1029 goto out; 1030 } 1031 1032 rc = obd_create(NULL, lov->lov_tgts[ost_idx]->ltd_exp, 1033 src_oa, &obj_mdp, oti); 1034out: 1035 OBD_FREE(obj_mdp, sizeof(*obj_mdp)); 1036 return rc; 1037} 1038 1039/* the LOV expects oa->o_id to be set to the LOV object id */ 1040static int lov_create(const struct lu_env *env, struct obd_export *exp, 1041 struct obdo *src_oa, struct lov_stripe_md **ea, 1042 struct obd_trans_info *oti) 1043{ 1044 struct lov_obd *lov; 1045 int rc = 0; 1046 1047 LASSERT(ea != NULL); 1048 if (exp == NULL) 1049 return -EINVAL; 1050 1051 if ((src_oa->o_valid & OBD_MD_FLFLAGS) && 1052 src_oa->o_flags == OBD_FL_DELORPHAN) { 1053 /* should be used with LOV anymore */ 1054 LBUG(); 1055 } 1056 1057 lov = &exp->exp_obd->u.lov; 1058 if (!lov->desc.ld_active_tgt_count) 1059 return -EIO; 1060 1061 obd_getref(exp->exp_obd); 1062 /* Recreate a specific object id at the given OST index */ 1063 if ((src_oa->o_valid & OBD_MD_FLFLAGS) && 1064 (src_oa->o_flags & OBD_FL_RECREATE_OBJS)) { 1065 rc = lov_recreate(exp, src_oa, ea, oti); 1066 } 1067 1068 obd_putref(exp->exp_obd); 1069 return rc; 1070} 1071 1072#define ASSERT_LSM_MAGIC(lsmp) \ 1073do { \ 1074 LASSERT((lsmp) != NULL); \ 1075 LASSERTF(((lsmp)->lsm_magic == LOV_MAGIC_V1 || \ 1076 (lsmp)->lsm_magic == LOV_MAGIC_V3), \ 1077 "%p->lsm_magic=%x\n", (lsmp), (lsmp)->lsm_magic); \ 1078} while (0) 1079 1080static int lov_destroy(const struct lu_env *env, struct obd_export *exp, 1081 struct obdo *oa, struct lov_stripe_md *lsm, 1082 struct obd_trans_info *oti, struct obd_export *md_exp, 1083 void *capa) 1084{ 1085 struct lov_request_set *set; 1086 struct obd_info oinfo; 1087 struct lov_request *req; 1088 struct list_head *pos; 1089 struct lov_obd *lov; 1090 int rc = 0, err = 0; 1091 1092 ASSERT_LSM_MAGIC(lsm); 1093 1094 if (!exp || !exp->exp_obd) 1095 return -ENODEV; 1096 1097 if (oa->o_valid & OBD_MD_FLCOOKIE) { 1098 LASSERT(oti); 1099 LASSERT(oti->oti_logcookies); 1100 } 1101 1102 lov = &exp->exp_obd->u.lov; 1103 obd_getref(exp->exp_obd); 1104 rc = lov_prep_destroy_set(exp, &oinfo, oa, lsm, oti, &set); 1105 if (rc) 1106 goto out; 1107 1108 list_for_each(pos, &set->set_list) { 1109 req = list_entry(pos, struct lov_request, rq_link); 1110 1111 if (oa->o_valid & OBD_MD_FLCOOKIE) 1112 oti->oti_logcookies = set->set_cookies + req->rq_stripe; 1113 1114 err = obd_destroy(env, lov->lov_tgts[req->rq_idx]->ltd_exp, 1115 req->rq_oi.oi_oa, NULL, oti, NULL, capa); 1116 err = lov_update_common_set(set, req, err); 1117 if (err) { 1118 CERROR("%s: destroying objid "DOSTID" subobj " 1119 DOSTID" on OST idx %d: rc = %d\n", 1120 exp->exp_obd->obd_name, POSTID(&oa->o_oi), 1121 POSTID(&req->rq_oi.oi_oa->o_oi), 1122 req->rq_idx, err); 1123 if (!rc) 1124 rc = err; 1125 } 1126 } 1127 1128 if (rc == 0) { 1129 LASSERT(lsm_op_find(lsm->lsm_magic) != NULL); 1130 rc = lsm_op_find(lsm->lsm_magic)->lsm_destroy(lsm, oa, md_exp); 1131 } 1132 err = lov_fini_destroy_set(set); 1133out: 1134 obd_putref(exp->exp_obd); 1135 return rc ? rc : err; 1136} 1137 1138static int lov_getattr_interpret(struct ptlrpc_request_set *rqset, 1139 void *data, int rc) 1140{ 1141 struct lov_request_set *lovset = (struct lov_request_set *)data; 1142 int err; 1143 1144 /* don't do attribute merge if this async op failed */ 1145 if (rc) 1146 atomic_set(&lovset->set_completes, 0); 1147 err = lov_fini_getattr_set(lovset); 1148 return rc ? rc : err; 1149} 1150 1151static int lov_getattr_async(struct obd_export *exp, struct obd_info *oinfo, 1152 struct ptlrpc_request_set *rqset) 1153{ 1154 struct lov_request_set *lovset; 1155 struct lov_obd *lov; 1156 struct list_head *pos; 1157 struct lov_request *req; 1158 int rc = 0, err; 1159 1160 LASSERT(oinfo); 1161 ASSERT_LSM_MAGIC(oinfo->oi_md); 1162 1163 if (!exp || !exp->exp_obd) 1164 return -ENODEV; 1165 1166 lov = &exp->exp_obd->u.lov; 1167 1168 rc = lov_prep_getattr_set(exp, oinfo, &lovset); 1169 if (rc) 1170 return rc; 1171 1172 CDEBUG(D_INFO, "objid "DOSTID": %ux%u byte stripes\n", 1173 POSTID(&oinfo->oi_md->lsm_oi), oinfo->oi_md->lsm_stripe_count, 1174 oinfo->oi_md->lsm_stripe_size); 1175 1176 list_for_each(pos, &lovset->set_list) { 1177 req = list_entry(pos, struct lov_request, rq_link); 1178 1179 CDEBUG(D_INFO, "objid "DOSTID"[%d] has subobj "DOSTID" at idx" 1180 "%u\n", POSTID(&oinfo->oi_oa->o_oi), req->rq_stripe, 1181 POSTID(&req->rq_oi.oi_oa->o_oi), req->rq_idx); 1182 rc = obd_getattr_async(lov->lov_tgts[req->rq_idx]->ltd_exp, 1183 &req->rq_oi, rqset); 1184 if (rc) { 1185 CERROR("%s: getattr objid "DOSTID" subobj" 1186 DOSTID" on OST idx %d: rc = %d\n", 1187 exp->exp_obd->obd_name, 1188 POSTID(&oinfo->oi_oa->o_oi), 1189 POSTID(&req->rq_oi.oi_oa->o_oi), 1190 req->rq_idx, rc); 1191 goto out; 1192 } 1193 } 1194 1195 if (!list_empty(&rqset->set_requests)) { 1196 LASSERT(rc == 0); 1197 LASSERT(rqset->set_interpret == NULL); 1198 rqset->set_interpret = lov_getattr_interpret; 1199 rqset->set_arg = (void *)lovset; 1200 return rc; 1201 } 1202out: 1203 if (rc) 1204 atomic_set(&lovset->set_completes, 0); 1205 err = lov_fini_getattr_set(lovset); 1206 return rc ? rc : err; 1207} 1208 1209static int lov_setattr_interpret(struct ptlrpc_request_set *rqset, 1210 void *data, int rc) 1211{ 1212 struct lov_request_set *lovset = (struct lov_request_set *)data; 1213 int err; 1214 1215 if (rc) 1216 atomic_set(&lovset->set_completes, 0); 1217 err = lov_fini_setattr_set(lovset); 1218 return rc ? rc : err; 1219} 1220 1221/* If @oti is given, the request goes from MDS and responses from OSTs are not 1222 needed. Otherwise, a client is waiting for responses. */ 1223static int lov_setattr_async(struct obd_export *exp, struct obd_info *oinfo, 1224 struct obd_trans_info *oti, 1225 struct ptlrpc_request_set *rqset) 1226{ 1227 struct lov_request_set *set; 1228 struct lov_request *req; 1229 struct list_head *pos; 1230 struct lov_obd *lov; 1231 int rc = 0; 1232 1233 LASSERT(oinfo); 1234 ASSERT_LSM_MAGIC(oinfo->oi_md); 1235 if (oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) { 1236 LASSERT(oti); 1237 LASSERT(oti->oti_logcookies); 1238 } 1239 1240 if (!exp || !exp->exp_obd) 1241 return -ENODEV; 1242 1243 lov = &exp->exp_obd->u.lov; 1244 rc = lov_prep_setattr_set(exp, oinfo, oti, &set); 1245 if (rc) 1246 return rc; 1247 1248 CDEBUG(D_INFO, "objid "DOSTID": %ux%u byte stripes\n", 1249 POSTID(&oinfo->oi_md->lsm_oi), 1250 oinfo->oi_md->lsm_stripe_count, 1251 oinfo->oi_md->lsm_stripe_size); 1252 1253 list_for_each(pos, &set->set_list) { 1254 req = list_entry(pos, struct lov_request, rq_link); 1255 1256 if (oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) 1257 oti->oti_logcookies = set->set_cookies + req->rq_stripe; 1258 1259 CDEBUG(D_INFO, "objid "DOSTID"[%d] has subobj "DOSTID" at idx" 1260 "%u\n", POSTID(&oinfo->oi_oa->o_oi), req->rq_stripe, 1261 POSTID(&req->rq_oi.oi_oa->o_oi), req->rq_idx); 1262 1263 rc = obd_setattr_async(lov->lov_tgts[req->rq_idx]->ltd_exp, 1264 &req->rq_oi, oti, rqset); 1265 if (rc) { 1266 CERROR("error: setattr objid "DOSTID" subobj" 1267 DOSTID" on OST idx %d: rc = %d\n", 1268 POSTID(&set->set_oi->oi_oa->o_oi), 1269 POSTID(&req->rq_oi.oi_oa->o_oi), 1270 req->rq_idx, rc); 1271 break; 1272 } 1273 } 1274 1275 /* If we are not waiting for responses on async requests, return. */ 1276 if (rc || !rqset || list_empty(&rqset->set_requests)) { 1277 int err; 1278 if (rc) 1279 atomic_set(&set->set_completes, 0); 1280 err = lov_fini_setattr_set(set); 1281 return rc ? rc : err; 1282 } 1283 1284 LASSERT(rqset->set_interpret == NULL); 1285 rqset->set_interpret = lov_setattr_interpret; 1286 rqset->set_arg = (void *)set; 1287 1288 return 0; 1289} 1290 1291/* find any ldlm lock of the inode in lov 1292 * return 0 not find 1293 * 1 find one 1294 * < 0 error */ 1295static int lov_find_cbdata(struct obd_export *exp, 1296 struct lov_stripe_md *lsm, ldlm_iterator_t it, 1297 void *data) 1298{ 1299 struct lov_obd *lov; 1300 int rc = 0, i; 1301 1302 ASSERT_LSM_MAGIC(lsm); 1303 1304 if (!exp || !exp->exp_obd) 1305 return -ENODEV; 1306 1307 lov = &exp->exp_obd->u.lov; 1308 for (i = 0; i < lsm->lsm_stripe_count; i++) { 1309 struct lov_stripe_md submd; 1310 struct lov_oinfo *loi = lsm->lsm_oinfo[i]; 1311 1312 if (!lov->lov_tgts[loi->loi_ost_idx]) { 1313 CDEBUG(D_HA, "lov idx %d NULL \n", loi->loi_ost_idx); 1314 continue; 1315 } 1316 submd.lsm_oi = loi->loi_oi; 1317 submd.lsm_stripe_count = 0; 1318 rc = obd_find_cbdata(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp, 1319 &submd, it, data); 1320 if (rc != 0) 1321 return rc; 1322 } 1323 return rc; 1324} 1325 1326int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc) 1327{ 1328 struct lov_request_set *lovset = (struct lov_request_set *)data; 1329 int err; 1330 1331 if (rc) 1332 atomic_set(&lovset->set_completes, 0); 1333 1334 err = lov_fini_statfs_set(lovset); 1335 return rc ? rc : err; 1336} 1337 1338static int lov_statfs_async(struct obd_export *exp, struct obd_info *oinfo, 1339 __u64 max_age, struct ptlrpc_request_set *rqset) 1340{ 1341 struct obd_device *obd = class_exp2obd(exp); 1342 struct lov_request_set *set; 1343 struct lov_request *req; 1344 struct list_head *pos; 1345 struct lov_obd *lov; 1346 int rc = 0; 1347 1348 LASSERT(oinfo != NULL); 1349 LASSERT(oinfo->oi_osfs != NULL); 1350 1351 lov = &obd->u.lov; 1352 rc = lov_prep_statfs_set(obd, oinfo, &set); 1353 if (rc) 1354 return rc; 1355 1356 list_for_each(pos, &set->set_list) { 1357 req = list_entry(pos, struct lov_request, rq_link); 1358 rc = obd_statfs_async(lov->lov_tgts[req->rq_idx]->ltd_exp, 1359 &req->rq_oi, max_age, rqset); 1360 if (rc) 1361 break; 1362 } 1363 1364 if (rc || list_empty(&rqset->set_requests)) { 1365 int err; 1366 if (rc) 1367 atomic_set(&set->set_completes, 0); 1368 err = lov_fini_statfs_set(set); 1369 return rc ? rc : err; 1370 } 1371 1372 LASSERT(rqset->set_interpret == NULL); 1373 rqset->set_interpret = lov_statfs_interpret; 1374 rqset->set_arg = (void *)set; 1375 return 0; 1376} 1377 1378static int lov_statfs(const struct lu_env *env, struct obd_export *exp, 1379 struct obd_statfs *osfs, __u64 max_age, __u32 flags) 1380{ 1381 struct ptlrpc_request_set *set = NULL; 1382 struct obd_info oinfo = { { { 0 } } }; 1383 int rc = 0; 1384 1385 /* for obdclass we forbid using obd_statfs_rqset, but prefer using async 1386 * statfs requests */ 1387 set = ptlrpc_prep_set(); 1388 if (set == NULL) 1389 return -ENOMEM; 1390 1391 oinfo.oi_osfs = osfs; 1392 oinfo.oi_flags = flags; 1393 rc = lov_statfs_async(exp, &oinfo, max_age, set); 1394 if (rc == 0) 1395 rc = ptlrpc_set_wait(set); 1396 ptlrpc_set_destroy(set); 1397 1398 return rc; 1399} 1400 1401static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, 1402 void *karg, void *uarg) 1403{ 1404 struct obd_device *obddev = class_exp2obd(exp); 1405 struct lov_obd *lov = &obddev->u.lov; 1406 int i = 0, rc = 0, count = lov->desc.ld_tgt_count; 1407 struct obd_uuid *uuidp; 1408 1409 switch (cmd) { 1410 case IOC_OBD_STATFS: { 1411 struct obd_ioctl_data *data = karg; 1412 struct obd_device *osc_obd; 1413 struct obd_statfs stat_buf = {0}; 1414 __u32 index; 1415 __u32 flags; 1416 1417 memcpy(&index, data->ioc_inlbuf2, sizeof(__u32)); 1418 if ((index >= count)) 1419 return -ENODEV; 1420 1421 if (!lov->lov_tgts[index]) 1422 /* Try again with the next index */ 1423 return -EAGAIN; 1424 if (!lov->lov_tgts[index]->ltd_active) 1425 return -ENODATA; 1426 1427 osc_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp); 1428 if (!osc_obd) 1429 return -EINVAL; 1430 1431 /* copy UUID */ 1432 if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(osc_obd), 1433 min((int) data->ioc_plen2, 1434 (int) sizeof(struct obd_uuid)))) 1435 return -EFAULT; 1436 1437 flags = uarg ? *(__u32 *)uarg : 0; 1438 /* got statfs data */ 1439 rc = obd_statfs(NULL, lov->lov_tgts[index]->ltd_exp, &stat_buf, 1440 cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS), 1441 flags); 1442 if (rc) 1443 return rc; 1444 if (copy_to_user(data->ioc_pbuf1, &stat_buf, 1445 min((int) data->ioc_plen1, 1446 (int) sizeof(stat_buf)))) 1447 return -EFAULT; 1448 break; 1449 } 1450 case OBD_IOC_LOV_GET_CONFIG: { 1451 struct obd_ioctl_data *data; 1452 struct lov_desc *desc; 1453 char *buf = NULL; 1454 __u32 *genp; 1455 1456 len = 0; 1457 if (obd_ioctl_getdata(&buf, &len, (void *)uarg)) 1458 return -EINVAL; 1459 1460 data = (struct obd_ioctl_data *)buf; 1461 1462 if (sizeof(*desc) > data->ioc_inllen1) { 1463 obd_ioctl_freedata(buf, len); 1464 return -EINVAL; 1465 } 1466 1467 if (sizeof(uuidp->uuid) * count > data->ioc_inllen2) { 1468 obd_ioctl_freedata(buf, len); 1469 return -EINVAL; 1470 } 1471 1472 if (sizeof(__u32) * count > data->ioc_inllen3) { 1473 obd_ioctl_freedata(buf, len); 1474 return -EINVAL; 1475 } 1476 1477 desc = (struct lov_desc *)data->ioc_inlbuf1; 1478 memcpy(desc, &(lov->desc), sizeof(*desc)); 1479 1480 uuidp = (struct obd_uuid *)data->ioc_inlbuf2; 1481 genp = (__u32 *)data->ioc_inlbuf3; 1482 /* the uuid will be empty for deleted OSTs */ 1483 for (i = 0; i < count; i++, uuidp++, genp++) { 1484 if (!lov->lov_tgts[i]) 1485 continue; 1486 *uuidp = lov->lov_tgts[i]->ltd_uuid; 1487 *genp = lov->lov_tgts[i]->ltd_gen; 1488 } 1489 1490 if (copy_to_user((void *)uarg, buf, len)) 1491 rc = -EFAULT; 1492 obd_ioctl_freedata(buf, len); 1493 break; 1494 } 1495 case LL_IOC_LOV_GETSTRIPE: 1496 rc = lov_getstripe(exp, karg, uarg); 1497 break; 1498 case OBD_IOC_QUOTACTL: { 1499 struct if_quotactl *qctl = karg; 1500 struct lov_tgt_desc *tgt = NULL; 1501 struct obd_quotactl *oqctl; 1502 1503 if (qctl->qc_valid == QC_OSTIDX) { 1504 if (qctl->qc_idx < 0 || count <= qctl->qc_idx) 1505 return -EINVAL; 1506 1507 tgt = lov->lov_tgts[qctl->qc_idx]; 1508 if (!tgt || !tgt->ltd_exp) 1509 return -EINVAL; 1510 } else if (qctl->qc_valid == QC_UUID) { 1511 for (i = 0; i < count; i++) { 1512 tgt = lov->lov_tgts[i]; 1513 if (!tgt || 1514 !obd_uuid_equals(&tgt->ltd_uuid, 1515 &qctl->obd_uuid)) 1516 continue; 1517 1518 if (tgt->ltd_exp == NULL) 1519 return -EINVAL; 1520 1521 break; 1522 } 1523 } else { 1524 return -EINVAL; 1525 } 1526 1527 if (i >= count) 1528 return -EAGAIN; 1529 1530 LASSERT(tgt && tgt->ltd_exp); 1531 OBD_ALLOC_PTR(oqctl); 1532 if (!oqctl) 1533 return -ENOMEM; 1534 1535 QCTL_COPY(oqctl, qctl); 1536 rc = obd_quotactl(tgt->ltd_exp, oqctl); 1537 if (rc == 0) { 1538 QCTL_COPY(qctl, oqctl); 1539 qctl->qc_valid = QC_OSTIDX; 1540 qctl->obd_uuid = tgt->ltd_uuid; 1541 } 1542 OBD_FREE_PTR(oqctl); 1543 break; 1544 } 1545 default: { 1546 int set = 0; 1547 1548 if (count == 0) 1549 return -ENOTTY; 1550 1551 for (i = 0; i < count; i++) { 1552 int err; 1553 struct obd_device *osc_obd; 1554 1555 /* OST was disconnected */ 1556 if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_exp) 1557 continue; 1558 1559 /* ll_umount_begin() sets force flag but for lov, not 1560 * osc. Let's pass it through */ 1561 osc_obd = class_exp2obd(lov->lov_tgts[i]->ltd_exp); 1562 osc_obd->obd_force = obddev->obd_force; 1563 err = obd_iocontrol(cmd, lov->lov_tgts[i]->ltd_exp, 1564 len, karg, uarg); 1565 if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK) { 1566 return err; 1567 } else if (err) { 1568 if (lov->lov_tgts[i]->ltd_active) { 1569 CDEBUG(err == -ENOTTY ? 1570 D_IOCTL : D_WARNING, 1571 "iocontrol OSC %s on OST " 1572 "idx %d cmd %x: err = %d\n", 1573 lov_uuid2str(lov, i), 1574 i, cmd, err); 1575 if (!rc) 1576 rc = err; 1577 } 1578 } else { 1579 set = 1; 1580 } 1581 } 1582 if (!set && !rc) 1583 rc = -EIO; 1584 } 1585 } 1586 1587 return rc; 1588} 1589 1590#define FIEMAP_BUFFER_SIZE 4096 1591 1592/** 1593 * Non-zero fe_logical indicates that this is a continuation FIEMAP 1594 * call. The local end offset and the device are sent in the first 1595 * fm_extent. This function calculates the stripe number from the index. 1596 * This function returns a stripe_no on which mapping is to be restarted. 1597 * 1598 * This function returns fm_end_offset which is the in-OST offset at which 1599 * mapping should be restarted. If fm_end_offset=0 is returned then caller 1600 * will re-calculate proper offset in next stripe. 1601 * Note that the first extent is passed to lov_get_info via the value field. 1602 * 1603 * \param fiemap fiemap request header 1604 * \param lsm striping information for the file 1605 * \param fm_start logical start of mapping 1606 * \param fm_end logical end of mapping 1607 * \param start_stripe starting stripe will be returned in this 1608 */ 1609u64 fiemap_calc_fm_end_offset(struct ll_user_fiemap *fiemap, 1610 struct lov_stripe_md *lsm, u64 fm_start, 1611 u64 fm_end, int *start_stripe) 1612{ 1613 u64 local_end = fiemap->fm_extents[0].fe_logical; 1614 u64 lun_start, lun_end; 1615 u64 fm_end_offset; 1616 int stripe_no = -1, i; 1617 1618 if (fiemap->fm_extent_count == 0 || 1619 fiemap->fm_extents[0].fe_logical == 0) 1620 return 0; 1621 1622 /* Find out stripe_no from ost_index saved in the fe_device */ 1623 for (i = 0; i < lsm->lsm_stripe_count; i++) { 1624 if (lsm->lsm_oinfo[i]->loi_ost_idx == 1625 fiemap->fm_extents[0].fe_device) { 1626 stripe_no = i; 1627 break; 1628 } 1629 } 1630 if (stripe_no == -1) 1631 return -EINVAL; 1632 1633 /* If we have finished mapping on previous device, shift logical 1634 * offset to start of next device */ 1635 if ((lov_stripe_intersects(lsm, stripe_no, fm_start, fm_end, 1636 &lun_start, &lun_end)) != 0 && 1637 local_end < lun_end) { 1638 fm_end_offset = local_end; 1639 *start_stripe = stripe_no; 1640 } else { 1641 /* This is a special value to indicate that caller should 1642 * calculate offset in next stripe. */ 1643 fm_end_offset = 0; 1644 *start_stripe = (stripe_no + 1) % lsm->lsm_stripe_count; 1645 } 1646 1647 return fm_end_offset; 1648} 1649 1650/** 1651 * We calculate on which OST the mapping will end. If the length of mapping 1652 * is greater than (stripe_size * stripe_count) then the last_stripe will 1653 * will be one just before start_stripe. Else we check if the mapping 1654 * intersects each OST and find last_stripe. 1655 * This function returns the last_stripe and also sets the stripe_count 1656 * over which the mapping is spread 1657 * 1658 * \param lsm striping information for the file 1659 * \param fm_start logical start of mapping 1660 * \param fm_end logical end of mapping 1661 * \param start_stripe starting stripe of the mapping 1662 * \param stripe_count the number of stripes across which to map is returned 1663 * 1664 * \retval last_stripe return the last stripe of the mapping 1665 */ 1666int fiemap_calc_last_stripe(struct lov_stripe_md *lsm, u64 fm_start, 1667 u64 fm_end, int start_stripe, 1668 int *stripe_count) 1669{ 1670 int last_stripe; 1671 u64 obd_start, obd_end; 1672 int i, j; 1673 1674 if (fm_end - fm_start > lsm->lsm_stripe_size * lsm->lsm_stripe_count) { 1675 last_stripe = (start_stripe < 1 ? lsm->lsm_stripe_count - 1 : 1676 start_stripe - 1); 1677 *stripe_count = lsm->lsm_stripe_count; 1678 } else { 1679 for (j = 0, i = start_stripe; j < lsm->lsm_stripe_count; 1680 i = (i + 1) % lsm->lsm_stripe_count, j++) { 1681 if ((lov_stripe_intersects(lsm, i, fm_start, fm_end, 1682 &obd_start, &obd_end)) == 0) 1683 break; 1684 } 1685 *stripe_count = j; 1686 last_stripe = (start_stripe + j - 1) %lsm->lsm_stripe_count; 1687 } 1688 1689 return last_stripe; 1690} 1691 1692/** 1693 * Set fe_device and copy extents from local buffer into main return buffer. 1694 * 1695 * \param fiemap fiemap request header 1696 * \param lcl_fm_ext array of local fiemap extents to be copied 1697 * \param ost_index OST index to be written into the fm_device field for each 1698 extent 1699 * \param ext_count number of extents to be copied 1700 * \param current_extent where to start copying in main extent array 1701 */ 1702void fiemap_prepare_and_copy_exts(struct ll_user_fiemap *fiemap, 1703 struct ll_fiemap_extent *lcl_fm_ext, 1704 int ost_index, unsigned int ext_count, 1705 int current_extent) 1706{ 1707 char *to; 1708 int ext; 1709 1710 for (ext = 0; ext < ext_count; ext++) { 1711 lcl_fm_ext[ext].fe_device = ost_index; 1712 lcl_fm_ext[ext].fe_flags |= FIEMAP_EXTENT_NET; 1713 } 1714 1715 /* Copy fm_extent's from fm_local to return buffer */ 1716 to = (char *)fiemap + fiemap_count_to_size(current_extent); 1717 memcpy(to, lcl_fm_ext, ext_count * sizeof(struct ll_fiemap_extent)); 1718} 1719 1720/** 1721 * Break down the FIEMAP request and send appropriate calls to individual OSTs. 1722 * This also handles the restarting of FIEMAP calls in case mapping overflows 1723 * the available number of extents in single call. 1724 */ 1725static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key, 1726 __u32 *vallen, void *val, struct lov_stripe_md *lsm) 1727{ 1728 struct ll_fiemap_info_key *fm_key = key; 1729 struct ll_user_fiemap *fiemap = val; 1730 struct ll_user_fiemap *fm_local = NULL; 1731 struct ll_fiemap_extent *lcl_fm_ext; 1732 int count_local; 1733 unsigned int get_num_extents = 0; 1734 int ost_index = 0, actual_start_stripe, start_stripe; 1735 u64 fm_start, fm_end, fm_length, fm_end_offset; 1736 u64 curr_loc; 1737 int current_extent = 0, rc = 0, i; 1738 int ost_eof = 0; /* EOF for object */ 1739 int ost_done = 0; /* done with required mapping for this OST? */ 1740 int last_stripe; 1741 int cur_stripe = 0, cur_stripe_wrap = 0, stripe_count; 1742 unsigned int buffer_size = FIEMAP_BUFFER_SIZE; 1743 1744 if (!lsm_has_objects(lsm)) { 1745 rc = 0; 1746 goto out; 1747 } 1748 1749 if (fiemap_count_to_size(fm_key->fiemap.fm_extent_count) < buffer_size) 1750 buffer_size = fiemap_count_to_size(fm_key->fiemap.fm_extent_count); 1751 1752 OBD_ALLOC_LARGE(fm_local, buffer_size); 1753 if (fm_local == NULL) { 1754 rc = -ENOMEM; 1755 goto out; 1756 } 1757 lcl_fm_ext = &fm_local->fm_extents[0]; 1758 1759 count_local = fiemap_size_to_count(buffer_size); 1760 1761 memcpy(fiemap, &fm_key->fiemap, sizeof(*fiemap)); 1762 fm_start = fiemap->fm_start; 1763 fm_length = fiemap->fm_length; 1764 /* Calculate start stripe, last stripe and length of mapping */ 1765 actual_start_stripe = start_stripe = lov_stripe_number(lsm, fm_start); 1766 fm_end = (fm_length == ~0ULL ? fm_key->oa.o_size : 1767 fm_start + fm_length - 1); 1768 /* If fm_length != ~0ULL but fm_start+fm_length-1 exceeds file size */ 1769 if (fm_end > fm_key->oa.o_size) 1770 fm_end = fm_key->oa.o_size; 1771 1772 last_stripe = fiemap_calc_last_stripe(lsm, fm_start, fm_end, 1773 actual_start_stripe, &stripe_count); 1774 1775 fm_end_offset = fiemap_calc_fm_end_offset(fiemap, lsm, fm_start, 1776 fm_end, &start_stripe); 1777 if (fm_end_offset == -EINVAL) { 1778 rc = -EINVAL; 1779 goto out; 1780 } 1781 1782 if (fiemap_count_to_size(fiemap->fm_extent_count) > *vallen) 1783 fiemap->fm_extent_count = fiemap_size_to_count(*vallen); 1784 if (fiemap->fm_extent_count == 0) { 1785 get_num_extents = 1; 1786 count_local = 0; 1787 } 1788 /* Check each stripe */ 1789 for (cur_stripe = start_stripe, i = 0; i < stripe_count; 1790 i++, cur_stripe = (cur_stripe + 1) % lsm->lsm_stripe_count) { 1791 u64 req_fm_len; /* Stores length of required mapping */ 1792 u64 len_mapped_single_call; 1793 u64 lun_start, lun_end, obd_object_end; 1794 unsigned int ext_count; 1795 1796 cur_stripe_wrap = cur_stripe; 1797 1798 /* Find out range of mapping on this stripe */ 1799 if ((lov_stripe_intersects(lsm, cur_stripe, fm_start, fm_end, 1800 &lun_start, &obd_object_end)) == 0) 1801 continue; 1802 1803 /* If this is a continuation FIEMAP call and we are on 1804 * starting stripe then lun_start needs to be set to 1805 * fm_end_offset */ 1806 if (fm_end_offset != 0 && cur_stripe == start_stripe) 1807 lun_start = fm_end_offset; 1808 1809 if (fm_length != ~0ULL) { 1810 /* Handle fm_start + fm_length overflow */ 1811 if (fm_start + fm_length < fm_start) 1812 fm_length = ~0ULL - fm_start; 1813 lun_end = lov_size_to_stripe(lsm, fm_start + fm_length, 1814 cur_stripe); 1815 } else { 1816 lun_end = ~0ULL; 1817 } 1818 1819 if (lun_start == lun_end) 1820 continue; 1821 1822 req_fm_len = obd_object_end - lun_start; 1823 fm_local->fm_length = 0; 1824 len_mapped_single_call = 0; 1825 1826 /* If the output buffer is very large and the objects have many 1827 * extents we may need to loop on a single OST repeatedly */ 1828 ost_eof = 0; 1829 ost_done = 0; 1830 do { 1831 if (get_num_extents == 0) { 1832 /* Don't get too many extents. */ 1833 if (current_extent + count_local > 1834 fiemap->fm_extent_count) 1835 count_local = fiemap->fm_extent_count - 1836 current_extent; 1837 } 1838 1839 lun_start += len_mapped_single_call; 1840 fm_local->fm_length = req_fm_len - len_mapped_single_call; 1841 req_fm_len = fm_local->fm_length; 1842 fm_local->fm_extent_count = count_local; 1843 fm_local->fm_mapped_extents = 0; 1844 fm_local->fm_flags = fiemap->fm_flags; 1845 1846 fm_key->oa.o_oi = lsm->lsm_oinfo[cur_stripe]->loi_oi; 1847 ost_index = lsm->lsm_oinfo[cur_stripe]->loi_ost_idx; 1848 1849 if (ost_index < 0 || 1850 ost_index >= lov->desc.ld_tgt_count) { 1851 rc = -EINVAL; 1852 goto out; 1853 } 1854 1855 /* If OST is inactive, return extent with UNKNOWN flag */ 1856 if (!lov->lov_tgts[ost_index]->ltd_active) { 1857 fm_local->fm_flags |= FIEMAP_EXTENT_LAST; 1858 fm_local->fm_mapped_extents = 1; 1859 1860 lcl_fm_ext[0].fe_logical = lun_start; 1861 lcl_fm_ext[0].fe_length = obd_object_end - 1862 lun_start; 1863 lcl_fm_ext[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN; 1864 1865 goto inactive_tgt; 1866 } 1867 1868 fm_local->fm_start = lun_start; 1869 fm_local->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER; 1870 memcpy(&fm_key->fiemap, fm_local, sizeof(*fm_local)); 1871 *vallen=fiemap_count_to_size(fm_local->fm_extent_count); 1872 rc = obd_get_info(NULL, 1873 lov->lov_tgts[ost_index]->ltd_exp, 1874 keylen, key, vallen, fm_local, lsm); 1875 if (rc != 0) 1876 goto out; 1877 1878inactive_tgt: 1879 ext_count = fm_local->fm_mapped_extents; 1880 if (ext_count == 0) { 1881 ost_done = 1; 1882 /* If last stripe has hole at the end, 1883 * then we need to return */ 1884 if (cur_stripe_wrap == last_stripe) { 1885 fiemap->fm_mapped_extents = 0; 1886 goto finish; 1887 } 1888 break; 1889 } 1890 1891 /* If we just need num of extents then go to next device */ 1892 if (get_num_extents) { 1893 current_extent += ext_count; 1894 break; 1895 } 1896 1897 len_mapped_single_call = lcl_fm_ext[ext_count-1].fe_logical - 1898 lun_start + lcl_fm_ext[ext_count - 1].fe_length; 1899 1900 /* Have we finished mapping on this device? */ 1901 if (req_fm_len <= len_mapped_single_call) 1902 ost_done = 1; 1903 1904 /* Clear the EXTENT_LAST flag which can be present on 1905 * last extent */ 1906 if (lcl_fm_ext[ext_count-1].fe_flags & FIEMAP_EXTENT_LAST) 1907 lcl_fm_ext[ext_count - 1].fe_flags &= 1908 ~FIEMAP_EXTENT_LAST; 1909 1910 curr_loc = lov_stripe_size(lsm, 1911 lcl_fm_ext[ext_count - 1].fe_logical+ 1912 lcl_fm_ext[ext_count - 1].fe_length, 1913 cur_stripe); 1914 if (curr_loc >= fm_key->oa.o_size) 1915 ost_eof = 1; 1916 1917 fiemap_prepare_and_copy_exts(fiemap, lcl_fm_ext, 1918 ost_index, ext_count, 1919 current_extent); 1920 1921 current_extent += ext_count; 1922 1923 /* Ran out of available extents? */ 1924 if (current_extent >= fiemap->fm_extent_count) 1925 goto finish; 1926 } while (ost_done == 0 && ost_eof == 0); 1927 1928 if (cur_stripe_wrap == last_stripe) 1929 goto finish; 1930 } 1931 1932finish: 1933 /* Indicate that we are returning device offsets unless file just has 1934 * single stripe */ 1935 if (lsm->lsm_stripe_count > 1) 1936 fiemap->fm_flags |= FIEMAP_FLAG_DEVICE_ORDER; 1937 1938 if (get_num_extents) 1939 goto skip_last_device_calc; 1940 1941 /* Check if we have reached the last stripe and whether mapping for that 1942 * stripe is done. */ 1943 if (cur_stripe_wrap == last_stripe) { 1944 if (ost_done || ost_eof) 1945 fiemap->fm_extents[current_extent - 1].fe_flags |= 1946 FIEMAP_EXTENT_LAST; 1947 } 1948 1949skip_last_device_calc: 1950 fiemap->fm_mapped_extents = current_extent; 1951 1952out: 1953 OBD_FREE_LARGE(fm_local, buffer_size); 1954 return rc; 1955} 1956 1957static int lov_get_info(const struct lu_env *env, struct obd_export *exp, 1958 __u32 keylen, void *key, __u32 *vallen, void *val, 1959 struct lov_stripe_md *lsm) 1960{ 1961 struct obd_device *obddev = class_exp2obd(exp); 1962 struct lov_obd *lov = &obddev->u.lov; 1963 int i, rc; 1964 1965 if (!vallen || !val) 1966 return -EFAULT; 1967 1968 obd_getref(obddev); 1969 1970 if (KEY_IS(KEY_LOCK_TO_STRIPE)) { 1971 struct { 1972 char name[16]; 1973 struct ldlm_lock *lock; 1974 } *data = key; 1975 struct ldlm_res_id *res_id = &data->lock->l_resource->lr_name; 1976 struct lov_oinfo *loi; 1977 __u32 *stripe = val; 1978 1979 if (*vallen < sizeof(*stripe)) { 1980 rc = -EFAULT; 1981 goto out; 1982 } 1983 *vallen = sizeof(*stripe); 1984 1985 /* XXX This is another one of those bits that will need to 1986 * change if we ever actually support nested LOVs. It uses 1987 * the lock's export to find out which stripe it is. */ 1988 /* XXX - it's assumed all the locks for deleted OSTs have 1989 * been cancelled. Also, the export for deleted OSTs will 1990 * be NULL and won't match the lock's export. */ 1991 for (i = 0; i < lsm->lsm_stripe_count; i++) { 1992 loi = lsm->lsm_oinfo[i]; 1993 if (!lov->lov_tgts[loi->loi_ost_idx]) 1994 continue; 1995 if (lov->lov_tgts[loi->loi_ost_idx]->ltd_exp == 1996 data->lock->l_conn_export && 1997 ostid_res_name_eq(&loi->loi_oi, res_id)) { 1998 *stripe = i; 1999 rc = 0; 2000 goto out; 2001 } 2002 } 2003 LDLM_ERROR(data->lock, "lock on inode without such object"); 2004 dump_lsm(D_ERROR, lsm); 2005 rc = -ENXIO; 2006 goto out; 2007 } else if (KEY_IS(KEY_LAST_ID)) { 2008 struct obd_id_info *info = val; 2009 __u32 size = sizeof(u64); 2010 struct lov_tgt_desc *tgt; 2011 2012 LASSERT(*vallen == sizeof(struct obd_id_info)); 2013 tgt = lov->lov_tgts[info->idx]; 2014 2015 if (!tgt || !tgt->ltd_active) { 2016 rc = -ESRCH; 2017 goto out; 2018 } 2019 2020 rc = obd_get_info(env, tgt->ltd_exp, keylen, key, 2021 &size, info->data, NULL); 2022 rc = 0; 2023 goto out; 2024 } else if (KEY_IS(KEY_LOVDESC)) { 2025 struct lov_desc *desc_ret = val; 2026 *desc_ret = lov->desc; 2027 2028 rc = 0; 2029 goto out; 2030 } else if (KEY_IS(KEY_FIEMAP)) { 2031 rc = lov_fiemap(lov, keylen, key, vallen, val, lsm); 2032 goto out; 2033 } else if (KEY_IS(KEY_CONNECT_FLAG)) { 2034 struct lov_tgt_desc *tgt; 2035 __u64 ost_idx = *((__u64 *)val); 2036 2037 LASSERT(*vallen == sizeof(__u64)); 2038 LASSERT(ost_idx < lov->desc.ld_tgt_count); 2039 tgt = lov->lov_tgts[ost_idx]; 2040 2041 if (!tgt || !tgt->ltd_exp) { 2042 rc = -ESRCH; 2043 goto out; 2044 } 2045 2046 *((__u64 *)val) = exp_connect_flags(tgt->ltd_exp); 2047 rc = 0; 2048 goto out; 2049 } else if (KEY_IS(KEY_TGT_COUNT)) { 2050 *((int *)val) = lov->desc.ld_tgt_count; 2051 rc = 0; 2052 goto out; 2053 } 2054 2055 rc = -EINVAL; 2056 2057out: 2058 obd_putref(obddev); 2059 return rc; 2060} 2061 2062static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp, 2063 u32 keylen, void *key, u32 vallen, 2064 void *val, struct ptlrpc_request_set *set) 2065{ 2066 struct obd_device *obddev = class_exp2obd(exp); 2067 struct lov_obd *lov = &obddev->u.lov; 2068 u32 count; 2069 int i, rc = 0, err; 2070 struct lov_tgt_desc *tgt; 2071 unsigned incr, check_uuid, 2072 do_inactive, no_set; 2073 unsigned next_id = 0, mds_con = 0, capa = 0; 2074 2075 incr = check_uuid = do_inactive = no_set = 0; 2076 if (set == NULL) { 2077 no_set = 1; 2078 set = ptlrpc_prep_set(); 2079 if (!set) 2080 return -ENOMEM; 2081 } 2082 2083 obd_getref(obddev); 2084 count = lov->desc.ld_tgt_count; 2085 2086 if (KEY_IS(KEY_NEXT_ID)) { 2087 count = vallen / sizeof(struct obd_id_info); 2088 vallen = sizeof(u64); 2089 incr = sizeof(struct obd_id_info); 2090 do_inactive = 1; 2091 next_id = 1; 2092 } else if (KEY_IS(KEY_CHECKSUM)) { 2093 do_inactive = 1; 2094 } else if (KEY_IS(KEY_EVICT_BY_NID)) { 2095 /* use defaults: do_inactive = incr = 0; */ 2096 } else if (KEY_IS(KEY_MDS_CONN)) { 2097 mds_con = 1; 2098 } else if (KEY_IS(KEY_CAPA_KEY)) { 2099 capa = 1; 2100 } else if (KEY_IS(KEY_CACHE_SET)) { 2101 LASSERT(lov->lov_cache == NULL); 2102 lov->lov_cache = val; 2103 do_inactive = 1; 2104 } 2105 2106 for (i = 0; i < count; i++, val = (char *)val + incr) { 2107 if (next_id) { 2108 tgt = lov->lov_tgts[((struct obd_id_info *)val)->idx]; 2109 } else { 2110 tgt = lov->lov_tgts[i]; 2111 } 2112 /* OST was disconnected */ 2113 if (!tgt || !tgt->ltd_exp) 2114 continue; 2115 2116 /* OST is inactive and we don't want inactive OSCs */ 2117 if (!tgt->ltd_active && !do_inactive) 2118 continue; 2119 2120 if (mds_con) { 2121 struct mds_group_info *mgi; 2122 2123 LASSERT(vallen == sizeof(*mgi)); 2124 mgi = (struct mds_group_info *)val; 2125 2126 /* Only want a specific OSC */ 2127 if (mgi->uuid && !obd_uuid_equals(mgi->uuid, 2128 &tgt->ltd_uuid)) 2129 continue; 2130 2131 err = obd_set_info_async(env, tgt->ltd_exp, 2132 keylen, key, sizeof(int), 2133 &mgi->group, set); 2134 } else if (next_id) { 2135 err = obd_set_info_async(env, tgt->ltd_exp, 2136 keylen, key, vallen, 2137 ((struct obd_id_info *)val)->data, set); 2138 } else if (capa) { 2139 struct mds_capa_info *info = (struct mds_capa_info *)val; 2140 2141 LASSERT(vallen == sizeof(*info)); 2142 2143 /* Only want a specific OSC */ 2144 if (info->uuid && 2145 !obd_uuid_equals(info->uuid, &tgt->ltd_uuid)) 2146 continue; 2147 2148 err = obd_set_info_async(env, tgt->ltd_exp, keylen, 2149 key, sizeof(*info->capa), 2150 info->capa, set); 2151 } else { 2152 /* Only want a specific OSC */ 2153 if (check_uuid && 2154 !obd_uuid_equals(val, &tgt->ltd_uuid)) 2155 continue; 2156 2157 err = obd_set_info_async(env, tgt->ltd_exp, 2158 keylen, key, vallen, val, set); 2159 } 2160 2161 if (!rc) 2162 rc = err; 2163 } 2164 2165 obd_putref(obddev); 2166 if (no_set) { 2167 err = ptlrpc_set_wait(set); 2168 if (!rc) 2169 rc = err; 2170 ptlrpc_set_destroy(set); 2171 } 2172 return rc; 2173} 2174 2175void lov_stripe_lock(struct lov_stripe_md *md) 2176 __acquires(&md->lsm_lock) 2177{ 2178 LASSERT(md->lsm_lock_owner != current_pid()); 2179 spin_lock(&md->lsm_lock); 2180 LASSERT(md->lsm_lock_owner == 0); 2181 md->lsm_lock_owner = current_pid(); 2182} 2183EXPORT_SYMBOL(lov_stripe_lock); 2184 2185void lov_stripe_unlock(struct lov_stripe_md *md) 2186 __releases(&md->lsm_lock) 2187{ 2188 LASSERT(md->lsm_lock_owner == current_pid()); 2189 md->lsm_lock_owner = 0; 2190 spin_unlock(&md->lsm_lock); 2191} 2192EXPORT_SYMBOL(lov_stripe_unlock); 2193 2194static int lov_quotactl(struct obd_device *obd, struct obd_export *exp, 2195 struct obd_quotactl *oqctl) 2196{ 2197 struct lov_obd *lov = &obd->u.lov; 2198 struct lov_tgt_desc *tgt; 2199 __u64 curspace = 0; 2200 __u64 bhardlimit = 0; 2201 int i, rc = 0; 2202 2203 if (oqctl->qc_cmd != LUSTRE_Q_QUOTAON && 2204 oqctl->qc_cmd != LUSTRE_Q_QUOTAOFF && 2205 oqctl->qc_cmd != Q_GETOQUOTA && 2206 oqctl->qc_cmd != Q_INITQUOTA && 2207 oqctl->qc_cmd != LUSTRE_Q_SETQUOTA && 2208 oqctl->qc_cmd != Q_FINVALIDATE) { 2209 CERROR("bad quota opc %x for lov obd", oqctl->qc_cmd); 2210 return -EFAULT; 2211 } 2212 2213 /* for lov tgt */ 2214 obd_getref(obd); 2215 for (i = 0; i < lov->desc.ld_tgt_count; i++) { 2216 int err; 2217 2218 tgt = lov->lov_tgts[i]; 2219 2220 if (!tgt) 2221 continue; 2222 2223 if (!tgt->ltd_active || tgt->ltd_reap) { 2224 if (oqctl->qc_cmd == Q_GETOQUOTA && 2225 lov->lov_tgts[i]->ltd_activate) { 2226 rc = -EREMOTEIO; 2227 CERROR("ost %d is inactive\n", i); 2228 } else { 2229 CDEBUG(D_HA, "ost %d is inactive\n", i); 2230 } 2231 continue; 2232 } 2233 2234 err = obd_quotactl(tgt->ltd_exp, oqctl); 2235 if (err) { 2236 if (tgt->ltd_active && !rc) 2237 rc = err; 2238 continue; 2239 } 2240 2241 if (oqctl->qc_cmd == Q_GETOQUOTA) { 2242 curspace += oqctl->qc_dqblk.dqb_curspace; 2243 bhardlimit += oqctl->qc_dqblk.dqb_bhardlimit; 2244 } 2245 } 2246 obd_putref(obd); 2247 2248 if (oqctl->qc_cmd == Q_GETOQUOTA) { 2249 oqctl->qc_dqblk.dqb_curspace = curspace; 2250 oqctl->qc_dqblk.dqb_bhardlimit = bhardlimit; 2251 } 2252 return rc; 2253} 2254 2255static int lov_quotacheck(struct obd_device *obd, struct obd_export *exp, 2256 struct obd_quotactl *oqctl) 2257{ 2258 struct lov_obd *lov = &obd->u.lov; 2259 int i, rc = 0; 2260 2261 obd_getref(obd); 2262 2263 for (i = 0; i < lov->desc.ld_tgt_count; i++) { 2264 if (!lov->lov_tgts[i]) 2265 continue; 2266 2267 /* Skip quota check on the administratively disabled OSTs. */ 2268 if (!lov->lov_tgts[i]->ltd_activate) { 2269 CWARN("lov idx %d was administratively disabled, " 2270 "skip quotacheck on it.\n", i); 2271 continue; 2272 } 2273 2274 if (!lov->lov_tgts[i]->ltd_active) { 2275 CERROR("lov idx %d inactive\n", i); 2276 rc = -EIO; 2277 goto out; 2278 } 2279 } 2280 2281 for (i = 0; i < lov->desc.ld_tgt_count; i++) { 2282 int err; 2283 2284 if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_activate) 2285 continue; 2286 2287 err = obd_quotacheck(lov->lov_tgts[i]->ltd_exp, oqctl); 2288 if (err && !rc) 2289 rc = err; 2290 } 2291 2292out: 2293 obd_putref(obd); 2294 2295 return rc; 2296} 2297 2298struct obd_ops lov_obd_ops = { 2299 .o_owner = THIS_MODULE, 2300 .o_setup = lov_setup, 2301 .o_precleanup = lov_precleanup, 2302 .o_cleanup = lov_cleanup, 2303 /*.o_process_config = lov_process_config,*/ 2304 .o_connect = lov_connect, 2305 .o_disconnect = lov_disconnect, 2306 .o_statfs = lov_statfs, 2307 .o_statfs_async = lov_statfs_async, 2308 .o_packmd = lov_packmd, 2309 .o_unpackmd = lov_unpackmd, 2310 .o_create = lov_create, 2311 .o_destroy = lov_destroy, 2312 .o_getattr_async = lov_getattr_async, 2313 .o_setattr_async = lov_setattr_async, 2314 .o_adjust_kms = lov_adjust_kms, 2315 .o_find_cbdata = lov_find_cbdata, 2316 .o_iocontrol = lov_iocontrol, 2317 .o_get_info = lov_get_info, 2318 .o_set_info_async = lov_set_info_async, 2319 .o_notify = lov_notify, 2320 .o_pool_new = lov_pool_new, 2321 .o_pool_rem = lov_pool_remove, 2322 .o_pool_add = lov_pool_add, 2323 .o_pool_del = lov_pool_del, 2324 .o_getref = lov_getref, 2325 .o_putref = lov_putref, 2326 .o_quotactl = lov_quotactl, 2327 .o_quotacheck = lov_quotacheck, 2328}; 2329 2330struct kmem_cache *lov_oinfo_slab; 2331 2332int __init lov_init(void) 2333{ 2334 struct lprocfs_static_vars lvars = { NULL }; 2335 int rc; 2336 2337 /* print an address of _any_ initialized kernel symbol from this 2338 * module, to allow debugging with gdb that doesn't support data 2339 * symbols from modules.*/ 2340 CDEBUG(D_INFO, "Lustre LOV module (%p).\n", &lov_caches); 2341 2342 rc = lu_kmem_init(lov_caches); 2343 if (rc) 2344 return rc; 2345 2346 lov_oinfo_slab = kmem_cache_create("lov_oinfo", 2347 sizeof(struct lov_oinfo), 2348 0, SLAB_HWCACHE_ALIGN, NULL); 2349 if (lov_oinfo_slab == NULL) { 2350 lu_kmem_fini(lov_caches); 2351 return -ENOMEM; 2352 } 2353 lprocfs_lov_init_vars(&lvars); 2354 2355 rc = class_register_type(&lov_obd_ops, NULL, lvars.module_vars, 2356 LUSTRE_LOV_NAME, &lov_device_type); 2357 2358 if (rc) { 2359 kmem_cache_destroy(lov_oinfo_slab); 2360 lu_kmem_fini(lov_caches); 2361 } 2362 2363 return rc; 2364} 2365 2366static void /*__exit*/ lov_exit(void) 2367{ 2368 class_unregister_type(LUSTRE_LOV_NAME); 2369 kmem_cache_destroy(lov_oinfo_slab); 2370 2371 lu_kmem_fini(lov_caches); 2372} 2373 2374MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>"); 2375MODULE_DESCRIPTION("Lustre Logical Object Volume OBD driver"); 2376MODULE_LICENSE("GPL"); 2377MODULE_VERSION(LUSTRE_VERSION_STRING); 2378 2379module_init(lov_init); 2380module_exit(lov_exit); 2381