1/* 2 * GPL HEADER START 3 * 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 only, 8 * as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License version 2 for more details (a copy is included 14 * in the LICENSE file that accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License 17 * version 2 along with this program; If not, see 18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf 19 * 20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 21 * CA 95054 USA or visit www.sun.com if you need additional information or 22 * have any questions. 23 * 24 * GPL HEADER END 25 */ 26/* 27 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 28 * Use is subject to license terms. 29 * 30 * Copyright (c) 2011, 2012, Intel Corporation. 31 */ 32/* 33 * This file is part of Lustre, http://www.lustre.org/ 34 * Lustre is a trademark of Sun Microsystems, Inc. 35 */ 36 37#define DEBUG_SUBSYSTEM S_LMV 38#include <linux/slab.h> 39#include <linux/module.h> 40#include <linux/init.h> 41#include <linux/pagemap.h> 42#include <linux/mm.h> 43#include <asm/div64.h> 44#include <linux/seq_file.h> 45#include <linux/namei.h> 46#include <asm/uaccess.h> 47 48#include "../include/lustre/lustre_idl.h" 49#include "../include/obd_support.h" 50#include "../include/lustre_lib.h" 51#include "../include/lustre_net.h" 52#include "../include/obd_class.h" 53#include "../include/lprocfs_status.h" 54#include "../include/lustre_lite.h" 55#include "../include/lustre_fid.h" 56#include "lmv_internal.h" 57 58static void lmv_activate_target(struct lmv_obd *lmv, 59 struct lmv_tgt_desc *tgt, 60 int activate) 61{ 62 if (tgt->ltd_active == activate) 63 return; 64 65 tgt->ltd_active = activate; 66 lmv->desc.ld_active_tgt_count += (activate ? 1 : -1); 67} 68 69/** 70 * Error codes: 71 * 72 * -EINVAL : UUID can't be found in the LMV's target list 73 * -ENOTCONN: The UUID is found, but the target connection is bad (!) 74 * -EBADF : The UUID is found, but the OBD of the wrong type (!) 75 */ 76static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid, 77 int activate) 78{ 79 struct lmv_tgt_desc *uninitialized_var(tgt); 80 struct obd_device *obd; 81 int i; 82 int rc = 0; 83 84 CDEBUG(D_INFO, "Searching in lmv %p for uuid %s (activate=%d)\n", 85 lmv, uuid->uuid, activate); 86 87 spin_lock(&lmv->lmv_lock); 88 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 89 tgt = lmv->tgts[i]; 90 if (tgt == NULL || tgt->ltd_exp == NULL) 91 continue; 92 93 CDEBUG(D_INFO, "Target idx %d is %s conn %#llx\n", i, 94 tgt->ltd_uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie); 95 96 if (obd_uuid_equals(uuid, &tgt->ltd_uuid)) 97 break; 98 } 99 100 if (i == lmv->desc.ld_tgt_count) { 101 rc = -EINVAL; 102 goto out_lmv_lock; 103 } 104 105 obd = class_exp2obd(tgt->ltd_exp); 106 if (obd == NULL) { 107 rc = -ENOTCONN; 108 goto out_lmv_lock; 109 } 110 111 CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LMV idx %d\n", 112 obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd, 113 obd->obd_type->typ_name, i); 114 LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0); 115 116 if (tgt->ltd_active == activate) { 117 CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd, 118 activate ? "" : "in"); 119 goto out_lmv_lock; 120 } 121 122 CDEBUG(D_INFO, "Marking OBD %p %sactive\n", obd, 123 activate ? "" : "in"); 124 lmv_activate_target(lmv, tgt, activate); 125 126 out_lmv_lock: 127 spin_unlock(&lmv->lmv_lock); 128 return rc; 129} 130 131struct obd_uuid *lmv_get_uuid(struct obd_export *exp) 132{ 133 struct lmv_obd *lmv = &exp->exp_obd->u.lmv; 134 135 return obd_get_uuid(lmv->tgts[0]->ltd_exp); 136} 137 138static int lmv_notify(struct obd_device *obd, struct obd_device *watched, 139 enum obd_notify_event ev, void *data) 140{ 141 struct obd_connect_data *conn_data; 142 struct lmv_obd *lmv = &obd->u.lmv; 143 struct obd_uuid *uuid; 144 int rc = 0; 145 146 if (strcmp(watched->obd_type->typ_name, LUSTRE_MDC_NAME)) { 147 CERROR("unexpected notification of %s %s!\n", 148 watched->obd_type->typ_name, 149 watched->obd_name); 150 return -EINVAL; 151 } 152 153 uuid = &watched->u.cli.cl_target_uuid; 154 if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE) { 155 /* 156 * Set MDC as active before notifying the observer, so the 157 * observer can use the MDC normally. 158 */ 159 rc = lmv_set_mdc_active(lmv, uuid, 160 ev == OBD_NOTIFY_ACTIVE); 161 if (rc) { 162 CERROR("%sactivation of %s failed: %d\n", 163 ev == OBD_NOTIFY_ACTIVE ? "" : "de", 164 uuid->uuid, rc); 165 return rc; 166 } 167 } else if (ev == OBD_NOTIFY_OCD) { 168 conn_data = &watched->u.cli.cl_import->imp_connect_data; 169 /* 170 * XXX: Make sure that ocd_connect_flags from all targets are 171 * the same. Otherwise one of MDTs runs wrong version or 172 * something like this. --umka 173 */ 174 obd->obd_self_export->exp_connect_data = *conn_data; 175 } 176#if 0 177 else if (ev == OBD_NOTIFY_DISCON) { 178 /* 179 * For disconnect event, flush fld cache for failout MDS case. 180 */ 181 fld_client_flush(&lmv->lmv_fld); 182 } 183#endif 184 /* 185 * Pass the notification up the chain. 186 */ 187 if (obd->obd_observer) 188 rc = obd_notify(obd->obd_observer, watched, ev, data); 189 190 return rc; 191} 192 193/** 194 * This is fake connect function. Its purpose is to initialize lmv and say 195 * caller that everything is okay. Real connection will be performed later. 196 */ 197static int lmv_connect(const struct lu_env *env, 198 struct obd_export **exp, struct obd_device *obd, 199 struct obd_uuid *cluuid, struct obd_connect_data *data, 200 void *localdata) 201{ 202 struct proc_dir_entry *lmv_proc_dir; 203 struct lmv_obd *lmv = &obd->u.lmv; 204 struct lustre_handle conn = { 0 }; 205 int rc = 0; 206 207 /* 208 * We don't want to actually do the underlying connections more than 209 * once, so keep track. 210 */ 211 lmv->refcount++; 212 if (lmv->refcount > 1) { 213 *exp = NULL; 214 return 0; 215 } 216 217 rc = class_connect(&conn, obd, cluuid); 218 if (rc) { 219 CERROR("class_connection() returned %d\n", rc); 220 return rc; 221 } 222 223 *exp = class_conn2export(&conn); 224 class_export_get(*exp); 225 226 lmv->exp = *exp; 227 lmv->connected = 0; 228 lmv->cluuid = *cluuid; 229 230 if (data) 231 lmv->conn_data = *data; 232 233 if (obd->obd_proc_private != NULL) { 234 lmv_proc_dir = obd->obd_proc_private; 235 } else { 236 lmv_proc_dir = lprocfs_register("target_obds", obd->obd_proc_entry, 237 NULL, NULL); 238 if (IS_ERR(lmv_proc_dir)) { 239 CERROR("could not register /proc/fs/lustre/%s/%s/target_obds.", 240 obd->obd_type->typ_name, obd->obd_name); 241 lmv_proc_dir = NULL; 242 } 243 obd->obd_proc_private = lmv_proc_dir; 244 } 245 246 /* 247 * All real clients should perform actual connection right away, because 248 * it is possible, that LMV will not have opportunity to connect targets 249 * and MDC stuff will be called directly, for instance while reading 250 * ../mdc/../kbytesfree procfs file, etc. 251 */ 252 if (data->ocd_connect_flags & OBD_CONNECT_REAL) 253 rc = lmv_check_connect(obd); 254 255 if (rc && lmv_proc_dir) { 256 lprocfs_remove(&lmv_proc_dir); 257 obd->obd_proc_private = NULL; 258 } 259 260 return rc; 261} 262 263static void lmv_set_timeouts(struct obd_device *obd) 264{ 265 struct lmv_tgt_desc *tgt; 266 struct lmv_obd *lmv; 267 int i; 268 269 lmv = &obd->u.lmv; 270 if (lmv->server_timeout == 0) 271 return; 272 273 if (lmv->connected == 0) 274 return; 275 276 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 277 tgt = lmv->tgts[i]; 278 if (tgt == NULL || tgt->ltd_exp == NULL || tgt->ltd_active == 0) 279 continue; 280 281 obd_set_info_async(NULL, tgt->ltd_exp, sizeof(KEY_INTERMDS), 282 KEY_INTERMDS, 0, NULL, NULL); 283 } 284} 285 286static int lmv_init_ea_size(struct obd_export *exp, int easize, 287 int def_easize, int cookiesize, int def_cookiesize) 288{ 289 struct obd_device *obd = exp->exp_obd; 290 struct lmv_obd *lmv = &obd->u.lmv; 291 int i; 292 int rc = 0; 293 int change = 0; 294 295 if (lmv->max_easize < easize) { 296 lmv->max_easize = easize; 297 change = 1; 298 } 299 if (lmv->max_def_easize < def_easize) { 300 lmv->max_def_easize = def_easize; 301 change = 1; 302 } 303 if (lmv->max_cookiesize < cookiesize) { 304 lmv->max_cookiesize = cookiesize; 305 change = 1; 306 } 307 if (lmv->max_def_cookiesize < def_cookiesize) { 308 lmv->max_def_cookiesize = def_cookiesize; 309 change = 1; 310 } 311 if (change == 0) 312 return 0; 313 314 if (lmv->connected == 0) 315 return 0; 316 317 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 318 if (lmv->tgts[i] == NULL || 319 lmv->tgts[i]->ltd_exp == NULL || 320 lmv->tgts[i]->ltd_active == 0) { 321 CWARN("%s: NULL export for %d\n", obd->obd_name, i); 322 continue; 323 } 324 325 rc = md_init_ea_size(lmv->tgts[i]->ltd_exp, easize, def_easize, 326 cookiesize, def_cookiesize); 327 if (rc) { 328 CERROR("%s: obd_init_ea_size() failed on MDT target %d:" 329 " rc = %d.\n", obd->obd_name, i, rc); 330 break; 331 } 332 } 333 return rc; 334} 335 336#define MAX_STRING_SIZE 128 337 338int lmv_connect_mdc(struct obd_device *obd, struct lmv_tgt_desc *tgt) 339{ 340 struct proc_dir_entry *lmv_proc_dir; 341 struct lmv_obd *lmv = &obd->u.lmv; 342 struct obd_uuid *cluuid = &lmv->cluuid; 343 struct obd_uuid lmv_mdc_uuid = { "LMV_MDC_UUID" }; 344 struct obd_device *mdc_obd; 345 struct obd_export *mdc_exp; 346 struct lu_fld_target target; 347 int rc; 348 349 mdc_obd = class_find_client_obd(&tgt->ltd_uuid, LUSTRE_MDC_NAME, 350 &obd->obd_uuid); 351 if (!mdc_obd) { 352 CERROR("target %s not attached\n", tgt->ltd_uuid.uuid); 353 return -EINVAL; 354 } 355 356 CDEBUG(D_CONFIG, "connect to %s(%s) - %s, %s FOR %s\n", 357 mdc_obd->obd_name, mdc_obd->obd_uuid.uuid, 358 tgt->ltd_uuid.uuid, obd->obd_uuid.uuid, 359 cluuid->uuid); 360 361 if (!mdc_obd->obd_set_up) { 362 CERROR("target %s is not set up\n", tgt->ltd_uuid.uuid); 363 return -EINVAL; 364 } 365 366 rc = obd_connect(NULL, &mdc_exp, mdc_obd, &lmv_mdc_uuid, 367 &lmv->conn_data, NULL); 368 if (rc) { 369 CERROR("target %s connect error %d\n", tgt->ltd_uuid.uuid, rc); 370 return rc; 371 } 372 373 /* 374 * Init fid sequence client for this mdc and add new fld target. 375 */ 376 rc = obd_fid_init(mdc_obd, mdc_exp, LUSTRE_SEQ_METADATA); 377 if (rc) 378 return rc; 379 380 target.ft_srv = NULL; 381 target.ft_exp = mdc_exp; 382 target.ft_idx = tgt->ltd_idx; 383 384 fld_client_add_target(&lmv->lmv_fld, &target); 385 386 rc = obd_register_observer(mdc_obd, obd); 387 if (rc) { 388 obd_disconnect(mdc_exp); 389 CERROR("target %s register_observer error %d\n", 390 tgt->ltd_uuid.uuid, rc); 391 return rc; 392 } 393 394 if (obd->obd_observer) { 395 /* 396 * Tell the observer about the new target. 397 */ 398 rc = obd_notify(obd->obd_observer, mdc_exp->exp_obd, 399 OBD_NOTIFY_ACTIVE, 400 (void *)(tgt - lmv->tgts[0])); 401 if (rc) { 402 obd_disconnect(mdc_exp); 403 return rc; 404 } 405 } 406 407 tgt->ltd_active = 1; 408 tgt->ltd_exp = mdc_exp; 409 lmv->desc.ld_active_tgt_count++; 410 411 md_init_ea_size(tgt->ltd_exp, lmv->max_easize, lmv->max_def_easize, 412 lmv->max_cookiesize, lmv->max_def_cookiesize); 413 414 CDEBUG(D_CONFIG, "Connected to %s(%s) successfully (%d)\n", 415 mdc_obd->obd_name, mdc_obd->obd_uuid.uuid, 416 atomic_read(&obd->obd_refcount)); 417 418 lmv_proc_dir = obd->obd_proc_private; 419 if (lmv_proc_dir) { 420 struct proc_dir_entry *mdc_symlink; 421 422 LASSERT(mdc_obd->obd_type != NULL); 423 LASSERT(mdc_obd->obd_type->typ_name != NULL); 424 mdc_symlink = lprocfs_add_symlink(mdc_obd->obd_name, 425 lmv_proc_dir, 426 "../../../%s/%s", 427 mdc_obd->obd_type->typ_name, 428 mdc_obd->obd_name); 429 if (mdc_symlink == NULL) { 430 CERROR("Could not register LMV target " 431 "/proc/fs/lustre/%s/%s/target_obds/%s.", 432 obd->obd_type->typ_name, obd->obd_name, 433 mdc_obd->obd_name); 434 lprocfs_remove(&lmv_proc_dir); 435 obd->obd_proc_private = NULL; 436 } 437 } 438 return 0; 439} 440 441static void lmv_del_target(struct lmv_obd *lmv, int index) 442{ 443 if (lmv->tgts[index] == NULL) 444 return; 445 446 OBD_FREE_PTR(lmv->tgts[index]); 447 lmv->tgts[index] = NULL; 448 return; 449} 450 451static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, 452 __u32 index, int gen) 453{ 454 struct lmv_obd *lmv = &obd->u.lmv; 455 struct lmv_tgt_desc *tgt; 456 int rc = 0; 457 458 CDEBUG(D_CONFIG, "Target uuid: %s. index %d\n", uuidp->uuid, index); 459 460 lmv_init_lock(lmv); 461 462 if (lmv->desc.ld_tgt_count == 0) { 463 struct obd_device *mdc_obd; 464 465 mdc_obd = class_find_client_obd(uuidp, LUSTRE_MDC_NAME, 466 &obd->obd_uuid); 467 if (!mdc_obd) { 468 lmv_init_unlock(lmv); 469 CERROR("%s: Target %s not attached: rc = %d\n", 470 obd->obd_name, uuidp->uuid, -EINVAL); 471 return -EINVAL; 472 } 473 } 474 475 if ((index < lmv->tgts_size) && (lmv->tgts[index] != NULL)) { 476 tgt = lmv->tgts[index]; 477 CERROR("%s: UUID %s already assigned at LOV target index %d:" 478 " rc = %d\n", obd->obd_name, 479 obd_uuid2str(&tgt->ltd_uuid), index, -EEXIST); 480 lmv_init_unlock(lmv); 481 return -EEXIST; 482 } 483 484 if (index >= lmv->tgts_size) { 485 /* We need to reallocate the lmv target array. */ 486 struct lmv_tgt_desc **newtgts, **old = NULL; 487 __u32 newsize = 1; 488 __u32 oldsize = 0; 489 490 while (newsize < index + 1) 491 newsize = newsize << 1; 492 OBD_ALLOC(newtgts, sizeof(*newtgts) * newsize); 493 if (newtgts == NULL) { 494 lmv_init_unlock(lmv); 495 return -ENOMEM; 496 } 497 498 if (lmv->tgts_size) { 499 memcpy(newtgts, lmv->tgts, 500 sizeof(*newtgts) * lmv->tgts_size); 501 old = lmv->tgts; 502 oldsize = lmv->tgts_size; 503 } 504 505 lmv->tgts = newtgts; 506 lmv->tgts_size = newsize; 507 smp_rmb(); 508 if (old) 509 OBD_FREE(old, sizeof(*old) * oldsize); 510 511 CDEBUG(D_CONFIG, "tgts: %p size: %d\n", lmv->tgts, 512 lmv->tgts_size); 513 } 514 515 OBD_ALLOC_PTR(tgt); 516 if (!tgt) { 517 lmv_init_unlock(lmv); 518 return -ENOMEM; 519 } 520 521 mutex_init(&tgt->ltd_fid_mutex); 522 tgt->ltd_idx = index; 523 tgt->ltd_uuid = *uuidp; 524 tgt->ltd_active = 0; 525 lmv->tgts[index] = tgt; 526 if (index >= lmv->desc.ld_tgt_count) 527 lmv->desc.ld_tgt_count = index + 1; 528 529 if (lmv->connected) { 530 rc = lmv_connect_mdc(obd, tgt); 531 if (rc) { 532 spin_lock(&lmv->lmv_lock); 533 lmv->desc.ld_tgt_count--; 534 memset(tgt, 0, sizeof(*tgt)); 535 spin_unlock(&lmv->lmv_lock); 536 } else { 537 int easize = sizeof(struct lmv_stripe_md) + 538 lmv->desc.ld_tgt_count * sizeof(struct lu_fid); 539 lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0); 540 } 541 } 542 543 lmv_init_unlock(lmv); 544 return rc; 545} 546 547int lmv_check_connect(struct obd_device *obd) 548{ 549 struct lmv_obd *lmv = &obd->u.lmv; 550 struct lmv_tgt_desc *tgt; 551 int i; 552 int rc; 553 int easize; 554 555 if (lmv->connected) 556 return 0; 557 558 lmv_init_lock(lmv); 559 if (lmv->connected) { 560 lmv_init_unlock(lmv); 561 return 0; 562 } 563 564 if (lmv->desc.ld_tgt_count == 0) { 565 lmv_init_unlock(lmv); 566 CERROR("%s: no targets configured.\n", obd->obd_name); 567 return -EINVAL; 568 } 569 570 CDEBUG(D_CONFIG, "Time to connect %s to %s\n", 571 lmv->cluuid.uuid, obd->obd_name); 572 573 LASSERT(lmv->tgts != NULL); 574 575 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 576 tgt = lmv->tgts[i]; 577 if (tgt == NULL) 578 continue; 579 rc = lmv_connect_mdc(obd, tgt); 580 if (rc) 581 goto out_disc; 582 } 583 584 lmv_set_timeouts(obd); 585 class_export_put(lmv->exp); 586 lmv->connected = 1; 587 easize = lmv_get_easize(lmv); 588 lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0); 589 lmv_init_unlock(lmv); 590 return 0; 591 592 out_disc: 593 while (i-- > 0) { 594 int rc2; 595 tgt = lmv->tgts[i]; 596 if (tgt == NULL) 597 continue; 598 tgt->ltd_active = 0; 599 if (tgt->ltd_exp) { 600 --lmv->desc.ld_active_tgt_count; 601 rc2 = obd_disconnect(tgt->ltd_exp); 602 if (rc2) { 603 CERROR("LMV target %s disconnect on " 604 "MDC idx %d: error %d\n", 605 tgt->ltd_uuid.uuid, i, rc2); 606 } 607 } 608 } 609 class_disconnect(lmv->exp); 610 lmv_init_unlock(lmv); 611 return rc; 612} 613 614static int lmv_disconnect_mdc(struct obd_device *obd, struct lmv_tgt_desc *tgt) 615{ 616 struct proc_dir_entry *lmv_proc_dir; 617 struct lmv_obd *lmv = &obd->u.lmv; 618 struct obd_device *mdc_obd; 619 int rc; 620 621 LASSERT(tgt != NULL); 622 LASSERT(obd != NULL); 623 624 mdc_obd = class_exp2obd(tgt->ltd_exp); 625 626 if (mdc_obd) { 627 mdc_obd->obd_force = obd->obd_force; 628 mdc_obd->obd_fail = obd->obd_fail; 629 mdc_obd->obd_no_recov = obd->obd_no_recov; 630 } 631 632 lmv_proc_dir = obd->obd_proc_private; 633 if (lmv_proc_dir) 634 lprocfs_remove_proc_entry(mdc_obd->obd_name, lmv_proc_dir); 635 636 rc = obd_fid_fini(tgt->ltd_exp->exp_obd); 637 if (rc) 638 CERROR("Can't finalize fids factory\n"); 639 640 CDEBUG(D_INFO, "Disconnected from %s(%s) successfully\n", 641 tgt->ltd_exp->exp_obd->obd_name, 642 tgt->ltd_exp->exp_obd->obd_uuid.uuid); 643 644 obd_register_observer(tgt->ltd_exp->exp_obd, NULL); 645 rc = obd_disconnect(tgt->ltd_exp); 646 if (rc) { 647 if (tgt->ltd_active) { 648 CERROR("Target %s disconnect error %d\n", 649 tgt->ltd_uuid.uuid, rc); 650 } 651 } 652 653 lmv_activate_target(lmv, tgt, 0); 654 tgt->ltd_exp = NULL; 655 return 0; 656} 657 658static int lmv_disconnect(struct obd_export *exp) 659{ 660 struct obd_device *obd = class_exp2obd(exp); 661 struct lmv_obd *lmv = &obd->u.lmv; 662 int rc; 663 int i; 664 665 if (!lmv->tgts) 666 goto out_local; 667 668 /* 669 * Only disconnect the underlying layers on the final disconnect. 670 */ 671 lmv->refcount--; 672 if (lmv->refcount != 0) 673 goto out_local; 674 675 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 676 if (lmv->tgts[i] == NULL || lmv->tgts[i]->ltd_exp == NULL) 677 continue; 678 679 lmv_disconnect_mdc(obd, lmv->tgts[i]); 680 } 681 682 if (obd->obd_proc_private) 683 lprocfs_remove((struct proc_dir_entry **)&obd->obd_proc_private); 684 else 685 CERROR("/proc/fs/lustre/%s/%s/target_obds missing\n", 686 obd->obd_type->typ_name, obd->obd_name); 687 688out_local: 689 /* 690 * This is the case when no real connection is established by 691 * lmv_check_connect(). 692 */ 693 if (!lmv->connected) 694 class_export_put(exp); 695 rc = class_disconnect(exp); 696 if (lmv->refcount == 0) 697 lmv->connected = 0; 698 return rc; 699} 700 701static int lmv_fid2path(struct obd_export *exp, int len, void *karg, void *uarg) 702{ 703 struct obd_device *obddev = class_exp2obd(exp); 704 struct lmv_obd *lmv = &obddev->u.lmv; 705 struct getinfo_fid2path *gf; 706 struct lmv_tgt_desc *tgt; 707 struct getinfo_fid2path *remote_gf = NULL; 708 int remote_gf_size = 0; 709 int rc; 710 711 gf = (struct getinfo_fid2path *)karg; 712 tgt = lmv_find_target(lmv, &gf->gf_fid); 713 if (IS_ERR(tgt)) 714 return PTR_ERR(tgt); 715 716repeat_fid2path: 717 rc = obd_iocontrol(OBD_IOC_FID2PATH, tgt->ltd_exp, len, gf, uarg); 718 if (rc != 0 && rc != -EREMOTE) 719 goto out_fid2path; 720 721 /* If remote_gf != NULL, it means just building the 722 * path on the remote MDT, copy this path segment to gf */ 723 if (remote_gf != NULL) { 724 struct getinfo_fid2path *ori_gf; 725 char *ptr; 726 727 ori_gf = (struct getinfo_fid2path *)karg; 728 if (strlen(ori_gf->gf_path) + 729 strlen(gf->gf_path) > ori_gf->gf_pathlen) { 730 rc = -EOVERFLOW; 731 goto out_fid2path; 732 } 733 734 ptr = ori_gf->gf_path; 735 736 memmove(ptr + strlen(gf->gf_path) + 1, ptr, 737 strlen(ori_gf->gf_path)); 738 739 strncpy(ptr, gf->gf_path, strlen(gf->gf_path)); 740 ptr += strlen(gf->gf_path); 741 *ptr = '/'; 742 } 743 744 CDEBUG(D_INFO, "%s: get path %s "DFID" rec: %llu ln: %u\n", 745 tgt->ltd_exp->exp_obd->obd_name, 746 gf->gf_path, PFID(&gf->gf_fid), gf->gf_recno, 747 gf->gf_linkno); 748 749 if (rc == 0) 750 goto out_fid2path; 751 752 /* sigh, has to go to another MDT to do path building further */ 753 if (remote_gf == NULL) { 754 remote_gf_size = sizeof(*remote_gf) + PATH_MAX; 755 OBD_ALLOC(remote_gf, remote_gf_size); 756 if (remote_gf == NULL) { 757 rc = -ENOMEM; 758 goto out_fid2path; 759 } 760 remote_gf->gf_pathlen = PATH_MAX; 761 } 762 763 if (!fid_is_sane(&gf->gf_fid)) { 764 CERROR("%s: invalid FID "DFID": rc = %d\n", 765 tgt->ltd_exp->exp_obd->obd_name, 766 PFID(&gf->gf_fid), -EINVAL); 767 rc = -EINVAL; 768 goto out_fid2path; 769 } 770 771 tgt = lmv_find_target(lmv, &gf->gf_fid); 772 if (IS_ERR(tgt)) { 773 rc = -EINVAL; 774 goto out_fid2path; 775 } 776 777 remote_gf->gf_fid = gf->gf_fid; 778 remote_gf->gf_recno = -1; 779 remote_gf->gf_linkno = -1; 780 memset(remote_gf->gf_path, 0, remote_gf->gf_pathlen); 781 gf = remote_gf; 782 goto repeat_fid2path; 783 784out_fid2path: 785 if (remote_gf != NULL) 786 OBD_FREE(remote_gf, remote_gf_size); 787 return rc; 788} 789 790static int lmv_hsm_req_count(struct lmv_obd *lmv, 791 const struct hsm_user_request *hur, 792 const struct lmv_tgt_desc *tgt_mds) 793{ 794 int i, nr = 0; 795 struct lmv_tgt_desc *curr_tgt; 796 797 /* count how many requests must be sent to the given target */ 798 for (i = 0; i < hur->hur_request.hr_itemcount; i++) { 799 curr_tgt = lmv_find_target(lmv, &hur->hur_user_item[i].hui_fid); 800 if (obd_uuid_equals(&curr_tgt->ltd_uuid, &tgt_mds->ltd_uuid)) 801 nr++; 802 } 803 return nr; 804} 805 806static void lmv_hsm_req_build(struct lmv_obd *lmv, 807 struct hsm_user_request *hur_in, 808 const struct lmv_tgt_desc *tgt_mds, 809 struct hsm_user_request *hur_out) 810{ 811 int i, nr_out; 812 struct lmv_tgt_desc *curr_tgt; 813 814 /* build the hsm_user_request for the given target */ 815 hur_out->hur_request = hur_in->hur_request; 816 nr_out = 0; 817 for (i = 0; i < hur_in->hur_request.hr_itemcount; i++) { 818 curr_tgt = lmv_find_target(lmv, 819 &hur_in->hur_user_item[i].hui_fid); 820 if (obd_uuid_equals(&curr_tgt->ltd_uuid, &tgt_mds->ltd_uuid)) { 821 hur_out->hur_user_item[nr_out] = 822 hur_in->hur_user_item[i]; 823 nr_out++; 824 } 825 } 826 hur_out->hur_request.hr_itemcount = nr_out; 827 memcpy(hur_data(hur_out), hur_data(hur_in), 828 hur_in->hur_request.hr_data_len); 829} 830 831static int lmv_hsm_ct_unregister(struct lmv_obd *lmv, unsigned int cmd, int len, 832 struct lustre_kernelcomm *lk, void *uarg) 833{ 834 int i, rc = 0; 835 836 /* unregister request (call from llapi_hsm_copytool_fini) */ 837 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 838 /* best effort: try to clean as much as possible 839 * (continue on error) */ 840 obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, len, lk, uarg); 841 } 842 843 /* Whatever the result, remove copytool from kuc groups. 844 * Unreached coordinators will get EPIPE on next requests 845 * and will unregister automatically. 846 */ 847 rc = libcfs_kkuc_group_rem(lk->lk_uid, lk->lk_group); 848 return rc; 849} 850 851static int lmv_hsm_ct_register(struct lmv_obd *lmv, unsigned int cmd, int len, 852 struct lustre_kernelcomm *lk, void *uarg) 853{ 854 struct file *filp; 855 int i, j, err; 856 int rc = 0; 857 bool any_set = false; 858 859 /* All or nothing: try to register to all MDS. 860 * In case of failure, unregister from previous MDS, 861 * except if it because of inactive target. */ 862 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 863 err = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, 864 len, lk, uarg); 865 if (err) { 866 if (lmv->tgts[i]->ltd_active) { 867 /* permanent error */ 868 CERROR("error: iocontrol MDC %s on MDT" 869 "idx %d cmd %x: err = %d\n", 870 lmv->tgts[i]->ltd_uuid.uuid, 871 i, cmd, err); 872 rc = err; 873 lk->lk_flags |= LK_FLG_STOP; 874 /* unregister from previous MDS */ 875 for (j = 0; j < i; j++) 876 obd_iocontrol(cmd, 877 lmv->tgts[j]->ltd_exp, 878 len, lk, uarg); 879 return rc; 880 } 881 /* else: transient error. 882 * kuc will register to the missing MDT 883 * when it is back */ 884 } else { 885 any_set = true; 886 } 887 } 888 889 if (!any_set) 890 /* no registration done: return error */ 891 return -ENOTCONN; 892 893 /* at least one registration done, with no failure */ 894 filp = fget(lk->lk_wfd); 895 if (filp == NULL) { 896 return -EBADF; 897 } 898 rc = libcfs_kkuc_group_add(filp, lk->lk_uid, lk->lk_group, lk->lk_data); 899 if (rc != 0 && filp != NULL) 900 fput(filp); 901 return rc; 902} 903 904 905 906 907static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, 908 int len, void *karg, void *uarg) 909{ 910 struct obd_device *obddev = class_exp2obd(exp); 911 struct lmv_obd *lmv = &obddev->u.lmv; 912 int i = 0; 913 int rc = 0; 914 int set = 0; 915 int count = lmv->desc.ld_tgt_count; 916 917 if (count == 0) 918 return -ENOTTY; 919 920 switch (cmd) { 921 case IOC_OBD_STATFS: { 922 struct obd_ioctl_data *data = karg; 923 struct obd_device *mdc_obd; 924 struct obd_statfs stat_buf = {0}; 925 __u32 index; 926 927 memcpy(&index, data->ioc_inlbuf2, sizeof(__u32)); 928 if ((index >= count)) 929 return -ENODEV; 930 931 if (lmv->tgts[index] == NULL || 932 lmv->tgts[index]->ltd_active == 0) 933 return -ENODATA; 934 935 mdc_obd = class_exp2obd(lmv->tgts[index]->ltd_exp); 936 if (!mdc_obd) 937 return -EINVAL; 938 939 /* copy UUID */ 940 if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(mdc_obd), 941 min((int) data->ioc_plen2, 942 (int) sizeof(struct obd_uuid)))) 943 return -EFAULT; 944 945 rc = obd_statfs(NULL, lmv->tgts[index]->ltd_exp, &stat_buf, 946 cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS), 947 0); 948 if (rc) 949 return rc; 950 if (copy_to_user(data->ioc_pbuf1, &stat_buf, 951 min((int) data->ioc_plen1, 952 (int) sizeof(stat_buf)))) 953 return -EFAULT; 954 break; 955 } 956 case OBD_IOC_QUOTACTL: { 957 struct if_quotactl *qctl = karg; 958 struct lmv_tgt_desc *tgt = NULL; 959 struct obd_quotactl *oqctl; 960 961 if (qctl->qc_valid == QC_MDTIDX) { 962 if (qctl->qc_idx < 0 || count <= qctl->qc_idx) 963 return -EINVAL; 964 965 tgt = lmv->tgts[qctl->qc_idx]; 966 if (tgt == NULL || tgt->ltd_exp == NULL) 967 return -EINVAL; 968 } else if (qctl->qc_valid == QC_UUID) { 969 for (i = 0; i < count; i++) { 970 tgt = lmv->tgts[i]; 971 if (tgt == NULL) 972 continue; 973 if (!obd_uuid_equals(&tgt->ltd_uuid, 974 &qctl->obd_uuid)) 975 continue; 976 977 if (tgt->ltd_exp == NULL) 978 return -EINVAL; 979 980 break; 981 } 982 } else { 983 return -EINVAL; 984 } 985 986 if (i >= count) 987 return -EAGAIN; 988 989 LASSERT(tgt && tgt->ltd_exp); 990 OBD_ALLOC_PTR(oqctl); 991 if (!oqctl) 992 return -ENOMEM; 993 994 QCTL_COPY(oqctl, qctl); 995 rc = obd_quotactl(tgt->ltd_exp, oqctl); 996 if (rc == 0) { 997 QCTL_COPY(qctl, oqctl); 998 qctl->qc_valid = QC_MDTIDX; 999 qctl->obd_uuid = tgt->ltd_uuid; 1000 } 1001 OBD_FREE_PTR(oqctl); 1002 break; 1003 } 1004 case OBD_IOC_CHANGELOG_SEND: 1005 case OBD_IOC_CHANGELOG_CLEAR: { 1006 struct ioc_changelog *icc = karg; 1007 1008 if (icc->icc_mdtindex >= count) 1009 return -ENODEV; 1010 1011 if (lmv->tgts[icc->icc_mdtindex] == NULL || 1012 lmv->tgts[icc->icc_mdtindex]->ltd_exp == NULL || 1013 lmv->tgts[icc->icc_mdtindex]->ltd_active == 0) 1014 return -ENODEV; 1015 rc = obd_iocontrol(cmd, lmv->tgts[icc->icc_mdtindex]->ltd_exp, 1016 sizeof(*icc), icc, NULL); 1017 break; 1018 } 1019 case LL_IOC_GET_CONNECT_FLAGS: { 1020 if (lmv->tgts[0] == NULL) 1021 return -ENODATA; 1022 rc = obd_iocontrol(cmd, lmv->tgts[0]->ltd_exp, len, karg, uarg); 1023 break; 1024 } 1025 case OBD_IOC_FID2PATH: { 1026 rc = lmv_fid2path(exp, len, karg, uarg); 1027 break; 1028 } 1029 case LL_IOC_HSM_STATE_GET: 1030 case LL_IOC_HSM_STATE_SET: 1031 case LL_IOC_HSM_ACTION: { 1032 struct md_op_data *op_data = karg; 1033 struct lmv_tgt_desc *tgt; 1034 1035 tgt = lmv_find_target(lmv, &op_data->op_fid1); 1036 if (IS_ERR(tgt)) 1037 return PTR_ERR(tgt); 1038 1039 if (tgt->ltd_exp == NULL) 1040 return -EINVAL; 1041 1042 rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg); 1043 break; 1044 } 1045 case LL_IOC_HSM_PROGRESS: { 1046 const struct hsm_progress_kernel *hpk = karg; 1047 struct lmv_tgt_desc *tgt; 1048 1049 tgt = lmv_find_target(lmv, &hpk->hpk_fid); 1050 if (IS_ERR(tgt)) 1051 return PTR_ERR(tgt); 1052 rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg); 1053 break; 1054 } 1055 case LL_IOC_HSM_REQUEST: { 1056 struct hsm_user_request *hur = karg; 1057 struct lmv_tgt_desc *tgt; 1058 unsigned int reqcount = hur->hur_request.hr_itemcount; 1059 1060 if (reqcount == 0) 1061 return 0; 1062 1063 /* if the request is about a single fid 1064 * or if there is a single MDS, no need to split 1065 * the request. */ 1066 if (reqcount == 1 || count == 1) { 1067 tgt = lmv_find_target(lmv, 1068 &hur->hur_user_item[0].hui_fid); 1069 if (IS_ERR(tgt)) 1070 return PTR_ERR(tgt); 1071 rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg); 1072 } else { 1073 /* split fid list to their respective MDS */ 1074 for (i = 0; i < count; i++) { 1075 unsigned int nr, reqlen; 1076 int rc1; 1077 struct hsm_user_request *req; 1078 1079 nr = lmv_hsm_req_count(lmv, hur, lmv->tgts[i]); 1080 if (nr == 0) /* nothing for this MDS */ 1081 continue; 1082 1083 /* build a request with fids for this MDS */ 1084 reqlen = offsetof(typeof(*hur), 1085 hur_user_item[nr]) 1086 + hur->hur_request.hr_data_len; 1087 OBD_ALLOC_LARGE(req, reqlen); 1088 if (req == NULL) 1089 return -ENOMEM; 1090 1091 lmv_hsm_req_build(lmv, hur, lmv->tgts[i], req); 1092 1093 rc1 = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, 1094 reqlen, req, uarg); 1095 if (rc1 != 0 && rc == 0) 1096 rc = rc1; 1097 OBD_FREE_LARGE(req, reqlen); 1098 } 1099 } 1100 break; 1101 } 1102 case LL_IOC_LOV_SWAP_LAYOUTS: { 1103 struct md_op_data *op_data = karg; 1104 struct lmv_tgt_desc *tgt1, *tgt2; 1105 1106 tgt1 = lmv_find_target(lmv, &op_data->op_fid1); 1107 if (IS_ERR(tgt1)) 1108 return PTR_ERR(tgt1); 1109 1110 tgt2 = lmv_find_target(lmv, &op_data->op_fid2); 1111 if (IS_ERR(tgt2)) 1112 return PTR_ERR(tgt2); 1113 1114 if ((tgt1->ltd_exp == NULL) || (tgt2->ltd_exp == NULL)) 1115 return -EINVAL; 1116 1117 /* only files on same MDT can have their layouts swapped */ 1118 if (tgt1->ltd_idx != tgt2->ltd_idx) 1119 return -EPERM; 1120 1121 rc = obd_iocontrol(cmd, tgt1->ltd_exp, len, karg, uarg); 1122 break; 1123 } 1124 case LL_IOC_HSM_CT_START: { 1125 struct lustre_kernelcomm *lk = karg; 1126 if (lk->lk_flags & LK_FLG_STOP) 1127 rc = lmv_hsm_ct_unregister(lmv, cmd, len, lk, uarg); 1128 else 1129 rc = lmv_hsm_ct_register(lmv, cmd, len, lk, uarg); 1130 break; 1131 } 1132 default: 1133 for (i = 0; i < count; i++) { 1134 struct obd_device *mdc_obd; 1135 int err; 1136 1137 if (lmv->tgts[i] == NULL || 1138 lmv->tgts[i]->ltd_exp == NULL) 1139 continue; 1140 /* ll_umount_begin() sets force flag but for lmv, not 1141 * mdc. Let's pass it through */ 1142 mdc_obd = class_exp2obd(lmv->tgts[i]->ltd_exp); 1143 mdc_obd->obd_force = obddev->obd_force; 1144 err = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, len, 1145 karg, uarg); 1146 if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK) { 1147 return err; 1148 } else if (err) { 1149 if (lmv->tgts[i]->ltd_active) { 1150 CERROR("error: iocontrol MDC %s on MDT" 1151 "idx %d cmd %x: err = %d\n", 1152 lmv->tgts[i]->ltd_uuid.uuid, 1153 i, cmd, err); 1154 if (!rc) 1155 rc = err; 1156 } 1157 } else 1158 set = 1; 1159 } 1160 if (!set && !rc) 1161 rc = -EIO; 1162 } 1163 return rc; 1164} 1165 1166#if 0 1167static int lmv_all_chars_policy(int count, const char *name, 1168 int len) 1169{ 1170 unsigned int c = 0; 1171 1172 while (len > 0) 1173 c += name[--len]; 1174 c = c % count; 1175 return c; 1176} 1177 1178static int lmv_nid_policy(struct lmv_obd *lmv) 1179{ 1180 struct obd_import *imp; 1181 __u32 id; 1182 1183 /* 1184 * XXX: To get nid we assume that underlying obd device is mdc. 1185 */ 1186 imp = class_exp2cliimp(lmv->tgts[0].ltd_exp); 1187 id = imp->imp_connection->c_self ^ (imp->imp_connection->c_self >> 32); 1188 return id % lmv->desc.ld_tgt_count; 1189} 1190 1191static int lmv_choose_mds(struct lmv_obd *lmv, struct md_op_data *op_data, 1192 enum placement_policy placement) 1193{ 1194 switch (placement) { 1195 case PLACEMENT_CHAR_POLICY: 1196 return lmv_all_chars_policy(lmv->desc.ld_tgt_count, 1197 op_data->op_name, 1198 op_data->op_namelen); 1199 case PLACEMENT_NID_POLICY: 1200 return lmv_nid_policy(lmv); 1201 1202 default: 1203 break; 1204 } 1205 1206 CERROR("Unsupported placement policy %x\n", placement); 1207 return -EINVAL; 1208} 1209#endif 1210 1211/** 1212 * This is _inode_ placement policy function (not name). 1213 */ 1214static int lmv_placement_policy(struct obd_device *obd, 1215 struct md_op_data *op_data, u32 *mds) 1216{ 1217 struct lmv_obd *lmv = &obd->u.lmv; 1218 1219 LASSERT(mds != NULL); 1220 1221 if (lmv->desc.ld_tgt_count == 1) { 1222 *mds = 0; 1223 return 0; 1224 } 1225 1226 /** 1227 * If stripe_offset is provided during setdirstripe 1228 * (setdirstripe -i xx), xx MDS will be chosen. 1229 */ 1230 if (op_data->op_cli_flags & CLI_SET_MEA) { 1231 struct lmv_user_md *lum; 1232 1233 lum = (struct lmv_user_md *)op_data->op_data; 1234 if (lum->lum_type == LMV_STRIPE_TYPE && 1235 lum->lum_stripe_offset != -1) { 1236 if (lum->lum_stripe_offset >= lmv->desc.ld_tgt_count) { 1237 CERROR("%s: Stripe_offset %d > MDT count %d:" 1238 " rc = %d\n", obd->obd_name, 1239 lum->lum_stripe_offset, 1240 lmv->desc.ld_tgt_count, -ERANGE); 1241 return -ERANGE; 1242 } 1243 *mds = lum->lum_stripe_offset; 1244 return 0; 1245 } 1246 } 1247 1248 /* Allocate new fid on target according to operation type and parent 1249 * home mds. */ 1250 *mds = op_data->op_mds; 1251 return 0; 1252} 1253 1254int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds) 1255{ 1256 struct lmv_tgt_desc *tgt; 1257 int rc; 1258 1259 tgt = lmv_get_target(lmv, mds); 1260 if (IS_ERR(tgt)) 1261 return PTR_ERR(tgt); 1262 1263 /* 1264 * New seq alloc and FLD setup should be atomic. Otherwise we may find 1265 * on server that seq in new allocated fid is not yet known. 1266 */ 1267 mutex_lock(&tgt->ltd_fid_mutex); 1268 1269 if (tgt->ltd_active == 0 || tgt->ltd_exp == NULL) { 1270 rc = -ENODEV; 1271 goto out; 1272 } 1273 1274 /* 1275 * Asking underlaying tgt layer to allocate new fid. 1276 */ 1277 rc = obd_fid_alloc(tgt->ltd_exp, fid, NULL); 1278 if (rc > 0) { 1279 LASSERT(fid_is_sane(fid)); 1280 rc = 0; 1281 } 1282 1283out: 1284 mutex_unlock(&tgt->ltd_fid_mutex); 1285 return rc; 1286} 1287 1288int lmv_fid_alloc(struct obd_export *exp, struct lu_fid *fid, 1289 struct md_op_data *op_data) 1290{ 1291 struct obd_device *obd = class_exp2obd(exp); 1292 struct lmv_obd *lmv = &obd->u.lmv; 1293 u32 mds = 0; 1294 int rc; 1295 1296 LASSERT(op_data != NULL); 1297 LASSERT(fid != NULL); 1298 1299 rc = lmv_placement_policy(obd, op_data, &mds); 1300 if (rc) { 1301 CERROR("Can't get target for allocating fid, " 1302 "rc %d\n", rc); 1303 return rc; 1304 } 1305 1306 rc = __lmv_fid_alloc(lmv, fid, mds); 1307 if (rc) { 1308 CERROR("Can't alloc new fid, rc %d\n", rc); 1309 return rc; 1310 } 1311 1312 return rc; 1313} 1314 1315static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg) 1316{ 1317 struct lmv_obd *lmv = &obd->u.lmv; 1318 struct lprocfs_static_vars lvars; 1319 struct lmv_desc *desc; 1320 int rc; 1321 1322 if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) { 1323 CERROR("LMV setup requires a descriptor\n"); 1324 return -EINVAL; 1325 } 1326 1327 desc = (struct lmv_desc *)lustre_cfg_buf(lcfg, 1); 1328 if (sizeof(*desc) > LUSTRE_CFG_BUFLEN(lcfg, 1)) { 1329 CERROR("Lmv descriptor size wrong: %d > %d\n", 1330 (int)sizeof(*desc), LUSTRE_CFG_BUFLEN(lcfg, 1)); 1331 return -EINVAL; 1332 } 1333 1334 OBD_ALLOC(lmv->tgts, sizeof(*lmv->tgts) * 32); 1335 if (lmv->tgts == NULL) 1336 return -ENOMEM; 1337 lmv->tgts_size = 32; 1338 1339 obd_str2uuid(&lmv->desc.ld_uuid, desc->ld_uuid.uuid); 1340 lmv->desc.ld_tgt_count = 0; 1341 lmv->desc.ld_active_tgt_count = 0; 1342 lmv->max_cookiesize = 0; 1343 lmv->max_def_easize = 0; 1344 lmv->max_easize = 0; 1345 lmv->lmv_placement = PLACEMENT_CHAR_POLICY; 1346 1347 spin_lock_init(&lmv->lmv_lock); 1348 mutex_init(&lmv->init_mutex); 1349 1350 lprocfs_lmv_init_vars(&lvars); 1351 1352 lprocfs_obd_setup(obd, lvars.obd_vars); 1353#if defined (CONFIG_PROC_FS) 1354 { 1355 rc = lprocfs_seq_create(obd->obd_proc_entry, "target_obd", 1356 0444, &lmv_proc_target_fops, obd); 1357 if (rc) 1358 CWARN("%s: error adding LMV target_obd file: rc = %d\n", 1359 obd->obd_name, rc); 1360 } 1361#endif 1362 rc = fld_client_init(&lmv->lmv_fld, obd->obd_name, 1363 LUSTRE_CLI_FLD_HASH_DHT); 1364 if (rc) { 1365 CERROR("Can't init FLD, err %d\n", rc); 1366 goto out; 1367 } 1368 1369 return 0; 1370 1371out: 1372 return rc; 1373} 1374 1375static int lmv_cleanup(struct obd_device *obd) 1376{ 1377 struct lmv_obd *lmv = &obd->u.lmv; 1378 1379 fld_client_fini(&lmv->lmv_fld); 1380 if (lmv->tgts != NULL) { 1381 int i; 1382 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 1383 if (lmv->tgts[i] == NULL) 1384 continue; 1385 lmv_del_target(lmv, i); 1386 } 1387 OBD_FREE(lmv->tgts, sizeof(*lmv->tgts) * lmv->tgts_size); 1388 lmv->tgts_size = 0; 1389 } 1390 return 0; 1391} 1392 1393static int lmv_process_config(struct obd_device *obd, u32 len, void *buf) 1394{ 1395 struct lustre_cfg *lcfg = buf; 1396 struct obd_uuid obd_uuid; 1397 int gen; 1398 __u32 index; 1399 int rc; 1400 1401 switch (lcfg->lcfg_command) { 1402 case LCFG_ADD_MDC: 1403 /* modify_mdc_tgts add 0:lustre-clilmv 1:lustre-MDT0000_UUID 1404 * 2:0 3:1 4:lustre-MDT0000-mdc_UUID */ 1405 if (LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(obd_uuid.uuid)) { 1406 rc = -EINVAL; 1407 goto out; 1408 } 1409 1410 obd_str2uuid(&obd_uuid, lustre_cfg_buf(lcfg, 1)); 1411 1412 if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1) { 1413 rc = -EINVAL; 1414 goto out; 1415 } 1416 if (sscanf(lustre_cfg_buf(lcfg, 3), "%d", &gen) != 1) { 1417 rc = -EINVAL; 1418 goto out; 1419 } 1420 rc = lmv_add_target(obd, &obd_uuid, index, gen); 1421 goto out; 1422 default: 1423 CERROR("Unknown command: %d\n", lcfg->lcfg_command); 1424 rc = -EINVAL; 1425 goto out; 1426 } 1427out: 1428 return rc; 1429} 1430 1431static int lmv_statfs(const struct lu_env *env, struct obd_export *exp, 1432 struct obd_statfs *osfs, __u64 max_age, __u32 flags) 1433{ 1434 struct obd_device *obd = class_exp2obd(exp); 1435 struct lmv_obd *lmv = &obd->u.lmv; 1436 struct obd_statfs *temp; 1437 int rc = 0; 1438 int i; 1439 1440 rc = lmv_check_connect(obd); 1441 if (rc) 1442 return rc; 1443 1444 OBD_ALLOC(temp, sizeof(*temp)); 1445 if (temp == NULL) 1446 return -ENOMEM; 1447 1448 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 1449 if (lmv->tgts[i] == NULL || lmv->tgts[i]->ltd_exp == NULL) 1450 continue; 1451 1452 rc = obd_statfs(env, lmv->tgts[i]->ltd_exp, temp, 1453 max_age, flags); 1454 if (rc) { 1455 CERROR("can't stat MDS #%d (%s), error %d\n", i, 1456 lmv->tgts[i]->ltd_exp->exp_obd->obd_name, 1457 rc); 1458 goto out_free_temp; 1459 } 1460 1461 if (i == 0) { 1462 *osfs = *temp; 1463 /* If the statfs is from mount, it will needs 1464 * retrieve necessary information from MDT0. 1465 * i.e. mount does not need the merged osfs 1466 * from all of MDT. 1467 * And also clients can be mounted as long as 1468 * MDT0 is in service*/ 1469 if (flags & OBD_STATFS_FOR_MDT0) 1470 goto out_free_temp; 1471 } else { 1472 osfs->os_bavail += temp->os_bavail; 1473 osfs->os_blocks += temp->os_blocks; 1474 osfs->os_ffree += temp->os_ffree; 1475 osfs->os_files += temp->os_files; 1476 } 1477 } 1478 1479out_free_temp: 1480 OBD_FREE(temp, sizeof(*temp)); 1481 return rc; 1482} 1483 1484static int lmv_getstatus(struct obd_export *exp, 1485 struct lu_fid *fid, 1486 struct obd_capa **pc) 1487{ 1488 struct obd_device *obd = exp->exp_obd; 1489 struct lmv_obd *lmv = &obd->u.lmv; 1490 int rc; 1491 1492 rc = lmv_check_connect(obd); 1493 if (rc) 1494 return rc; 1495 1496 rc = md_getstatus(lmv->tgts[0]->ltd_exp, fid, pc); 1497 return rc; 1498} 1499 1500static int lmv_getxattr(struct obd_export *exp, const struct lu_fid *fid, 1501 struct obd_capa *oc, u64 valid, const char *name, 1502 const char *input, int input_size, int output_size, 1503 int flags, struct ptlrpc_request **request) 1504{ 1505 struct obd_device *obd = exp->exp_obd; 1506 struct lmv_obd *lmv = &obd->u.lmv; 1507 struct lmv_tgt_desc *tgt; 1508 int rc; 1509 1510 rc = lmv_check_connect(obd); 1511 if (rc) 1512 return rc; 1513 1514 tgt = lmv_find_target(lmv, fid); 1515 if (IS_ERR(tgt)) 1516 return PTR_ERR(tgt); 1517 1518 rc = md_getxattr(tgt->ltd_exp, fid, oc, valid, name, input, 1519 input_size, output_size, flags, request); 1520 1521 return rc; 1522} 1523 1524static int lmv_setxattr(struct obd_export *exp, const struct lu_fid *fid, 1525 struct obd_capa *oc, u64 valid, const char *name, 1526 const char *input, int input_size, int output_size, 1527 int flags, __u32 suppgid, 1528 struct ptlrpc_request **request) 1529{ 1530 struct obd_device *obd = exp->exp_obd; 1531 struct lmv_obd *lmv = &obd->u.lmv; 1532 struct lmv_tgt_desc *tgt; 1533 int rc; 1534 1535 rc = lmv_check_connect(obd); 1536 if (rc) 1537 return rc; 1538 1539 tgt = lmv_find_target(lmv, fid); 1540 if (IS_ERR(tgt)) 1541 return PTR_ERR(tgt); 1542 1543 rc = md_setxattr(tgt->ltd_exp, fid, oc, valid, name, input, 1544 input_size, output_size, flags, suppgid, 1545 request); 1546 1547 return rc; 1548} 1549 1550static int lmv_getattr(struct obd_export *exp, struct md_op_data *op_data, 1551 struct ptlrpc_request **request) 1552{ 1553 struct obd_device *obd = exp->exp_obd; 1554 struct lmv_obd *lmv = &obd->u.lmv; 1555 struct lmv_tgt_desc *tgt; 1556 int rc; 1557 1558 rc = lmv_check_connect(obd); 1559 if (rc) 1560 return rc; 1561 1562 tgt = lmv_find_target(lmv, &op_data->op_fid1); 1563 if (IS_ERR(tgt)) 1564 return PTR_ERR(tgt); 1565 1566 if (op_data->op_flags & MF_GET_MDT_IDX) { 1567 op_data->op_mds = tgt->ltd_idx; 1568 return 0; 1569 } 1570 1571 rc = md_getattr(tgt->ltd_exp, op_data, request); 1572 1573 return rc; 1574} 1575 1576static int lmv_null_inode(struct obd_export *exp, const struct lu_fid *fid) 1577{ 1578 struct obd_device *obd = exp->exp_obd; 1579 struct lmv_obd *lmv = &obd->u.lmv; 1580 int i; 1581 int rc; 1582 1583 rc = lmv_check_connect(obd); 1584 if (rc) 1585 return rc; 1586 1587 CDEBUG(D_INODE, "CBDATA for "DFID"\n", PFID(fid)); 1588 1589 /* 1590 * With DNE every object can have two locks in different namespaces: 1591 * lookup lock in space of MDT storing direntry and update/open lock in 1592 * space of MDT storing inode. 1593 */ 1594 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 1595 if (lmv->tgts[i] == NULL || lmv->tgts[i]->ltd_exp == NULL) 1596 continue; 1597 md_null_inode(lmv->tgts[i]->ltd_exp, fid); 1598 } 1599 1600 return 0; 1601} 1602 1603static int lmv_find_cbdata(struct obd_export *exp, const struct lu_fid *fid, 1604 ldlm_iterator_t it, void *data) 1605{ 1606 struct obd_device *obd = exp->exp_obd; 1607 struct lmv_obd *lmv = &obd->u.lmv; 1608 int i; 1609 int rc; 1610 1611 rc = lmv_check_connect(obd); 1612 if (rc) 1613 return rc; 1614 1615 CDEBUG(D_INODE, "CBDATA for "DFID"\n", PFID(fid)); 1616 1617 /* 1618 * With DNE every object can have two locks in different namespaces: 1619 * lookup lock in space of MDT storing direntry and update/open lock in 1620 * space of MDT storing inode. 1621 */ 1622 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 1623 if (lmv->tgts[i] == NULL || lmv->tgts[i]->ltd_exp == NULL) 1624 continue; 1625 rc = md_find_cbdata(lmv->tgts[i]->ltd_exp, fid, it, data); 1626 if (rc) 1627 return rc; 1628 } 1629 1630 return rc; 1631} 1632 1633 1634static int lmv_close(struct obd_export *exp, struct md_op_data *op_data, 1635 struct md_open_data *mod, struct ptlrpc_request **request) 1636{ 1637 struct obd_device *obd = exp->exp_obd; 1638 struct lmv_obd *lmv = &obd->u.lmv; 1639 struct lmv_tgt_desc *tgt; 1640 int rc; 1641 1642 rc = lmv_check_connect(obd); 1643 if (rc) 1644 return rc; 1645 1646 tgt = lmv_find_target(lmv, &op_data->op_fid1); 1647 if (IS_ERR(tgt)) 1648 return PTR_ERR(tgt); 1649 1650 CDEBUG(D_INODE, "CLOSE "DFID"\n", PFID(&op_data->op_fid1)); 1651 rc = md_close(tgt->ltd_exp, op_data, mod, request); 1652 return rc; 1653} 1654 1655struct lmv_tgt_desc 1656*lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data, 1657 struct lu_fid *fid) 1658{ 1659 struct lmv_tgt_desc *tgt; 1660 1661 tgt = lmv_find_target(lmv, fid); 1662 if (IS_ERR(tgt)) 1663 return tgt; 1664 1665 op_data->op_mds = tgt->ltd_idx; 1666 1667 return tgt; 1668} 1669 1670int lmv_create(struct obd_export *exp, struct md_op_data *op_data, 1671 const void *data, int datalen, int mode, __u32 uid, 1672 __u32 gid, cfs_cap_t cap_effective, __u64 rdev, 1673 struct ptlrpc_request **request) 1674{ 1675 struct obd_device *obd = exp->exp_obd; 1676 struct lmv_obd *lmv = &obd->u.lmv; 1677 struct lmv_tgt_desc *tgt; 1678 int rc; 1679 1680 rc = lmv_check_connect(obd); 1681 if (rc) 1682 return rc; 1683 1684 if (!lmv->desc.ld_active_tgt_count) 1685 return -EIO; 1686 1687 tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); 1688 if (IS_ERR(tgt)) 1689 return PTR_ERR(tgt); 1690 1691 rc = lmv_fid_alloc(exp, &op_data->op_fid2, op_data); 1692 if (rc) 1693 return rc; 1694 1695 CDEBUG(D_INODE, "CREATE '%*s' on "DFID" -> mds #%x\n", 1696 op_data->op_namelen, op_data->op_name, PFID(&op_data->op_fid1), 1697 op_data->op_mds); 1698 1699 op_data->op_flags |= MF_MDC_CANCEL_FID1; 1700 rc = md_create(tgt->ltd_exp, op_data, data, datalen, mode, uid, gid, 1701 cap_effective, rdev, request); 1702 1703 if (rc == 0) { 1704 if (*request == NULL) 1705 return rc; 1706 CDEBUG(D_INODE, "Created - "DFID"\n", PFID(&op_data->op_fid2)); 1707 } 1708 return rc; 1709} 1710 1711static int lmv_done_writing(struct obd_export *exp, 1712 struct md_op_data *op_data, 1713 struct md_open_data *mod) 1714{ 1715 struct obd_device *obd = exp->exp_obd; 1716 struct lmv_obd *lmv = &obd->u.lmv; 1717 struct lmv_tgt_desc *tgt; 1718 int rc; 1719 1720 rc = lmv_check_connect(obd); 1721 if (rc) 1722 return rc; 1723 1724 tgt = lmv_find_target(lmv, &op_data->op_fid1); 1725 if (IS_ERR(tgt)) 1726 return PTR_ERR(tgt); 1727 1728 rc = md_done_writing(tgt->ltd_exp, op_data, mod); 1729 return rc; 1730} 1731 1732static int 1733lmv_enqueue_remote(struct obd_export *exp, struct ldlm_enqueue_info *einfo, 1734 struct lookup_intent *it, struct md_op_data *op_data, 1735 struct lustre_handle *lockh, void *lmm, int lmmsize, 1736 __u64 extra_lock_flags) 1737{ 1738 struct ptlrpc_request *req = it->d.lustre.it_data; 1739 struct obd_device *obd = exp->exp_obd; 1740 struct lmv_obd *lmv = &obd->u.lmv; 1741 struct lustre_handle plock; 1742 struct lmv_tgt_desc *tgt; 1743 struct md_op_data *rdata; 1744 struct lu_fid fid1; 1745 struct mdt_body *body; 1746 int rc = 0; 1747 int pmode; 1748 1749 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 1750 LASSERT(body != NULL); 1751 1752 if (!(body->valid & OBD_MD_MDS)) 1753 return 0; 1754 1755 CDEBUG(D_INODE, "REMOTE_ENQUEUE '%s' on "DFID" -> "DFID"\n", 1756 LL_IT2STR(it), PFID(&op_data->op_fid1), PFID(&body->fid1)); 1757 1758 /* 1759 * We got LOOKUP lock, but we really need attrs. 1760 */ 1761 pmode = it->d.lustre.it_lock_mode; 1762 LASSERT(pmode != 0); 1763 memcpy(&plock, lockh, sizeof(plock)); 1764 it->d.lustre.it_lock_mode = 0; 1765 it->d.lustre.it_data = NULL; 1766 fid1 = body->fid1; 1767 1768 ptlrpc_req_finished(req); 1769 1770 tgt = lmv_find_target(lmv, &fid1); 1771 if (IS_ERR(tgt)) { 1772 rc = PTR_ERR(tgt); 1773 goto out; 1774 } 1775 1776 OBD_ALLOC_PTR(rdata); 1777 if (rdata == NULL) { 1778 rc = -ENOMEM; 1779 goto out; 1780 } 1781 1782 rdata->op_fid1 = fid1; 1783 rdata->op_bias = MDS_CROSS_REF; 1784 1785 rc = md_enqueue(tgt->ltd_exp, einfo, it, rdata, lockh, 1786 lmm, lmmsize, NULL, extra_lock_flags); 1787 OBD_FREE_PTR(rdata); 1788out: 1789 ldlm_lock_decref(&plock, pmode); 1790 return rc; 1791} 1792 1793static int 1794lmv_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, 1795 struct lookup_intent *it, struct md_op_data *op_data, 1796 struct lustre_handle *lockh, void *lmm, int lmmsize, 1797 struct ptlrpc_request **req, __u64 extra_lock_flags) 1798{ 1799 struct obd_device *obd = exp->exp_obd; 1800 struct lmv_obd *lmv = &obd->u.lmv; 1801 struct lmv_tgt_desc *tgt; 1802 int rc; 1803 1804 rc = lmv_check_connect(obd); 1805 if (rc) 1806 return rc; 1807 1808 CDEBUG(D_INODE, "ENQUEUE '%s' on "DFID"\n", 1809 LL_IT2STR(it), PFID(&op_data->op_fid1)); 1810 1811 tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); 1812 if (IS_ERR(tgt)) 1813 return PTR_ERR(tgt); 1814 1815 CDEBUG(D_INODE, "ENQUEUE '%s' on "DFID" -> mds #%d\n", 1816 LL_IT2STR(it), PFID(&op_data->op_fid1), tgt->ltd_idx); 1817 1818 rc = md_enqueue(tgt->ltd_exp, einfo, it, op_data, lockh, 1819 lmm, lmmsize, req, extra_lock_flags); 1820 1821 if (rc == 0 && it && it->it_op == IT_OPEN) { 1822 rc = lmv_enqueue_remote(exp, einfo, it, op_data, lockh, 1823 lmm, lmmsize, extra_lock_flags); 1824 } 1825 return rc; 1826} 1827 1828static int 1829lmv_getattr_name(struct obd_export *exp, struct md_op_data *op_data, 1830 struct ptlrpc_request **request) 1831{ 1832 struct ptlrpc_request *req = NULL; 1833 struct obd_device *obd = exp->exp_obd; 1834 struct lmv_obd *lmv = &obd->u.lmv; 1835 struct lmv_tgt_desc *tgt; 1836 struct mdt_body *body; 1837 int rc; 1838 1839 rc = lmv_check_connect(obd); 1840 if (rc) 1841 return rc; 1842 1843 tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); 1844 if (IS_ERR(tgt)) 1845 return PTR_ERR(tgt); 1846 1847 CDEBUG(D_INODE, "GETATTR_NAME for %*s on "DFID" -> mds #%d\n", 1848 op_data->op_namelen, op_data->op_name, PFID(&op_data->op_fid1), 1849 tgt->ltd_idx); 1850 1851 rc = md_getattr_name(tgt->ltd_exp, op_data, request); 1852 if (rc != 0) 1853 return rc; 1854 1855 body = req_capsule_server_get(&(*request)->rq_pill, 1856 &RMF_MDT_BODY); 1857 LASSERT(body != NULL); 1858 1859 if (body->valid & OBD_MD_MDS) { 1860 struct lu_fid rid = body->fid1; 1861 CDEBUG(D_INODE, "Request attrs for "DFID"\n", 1862 PFID(&rid)); 1863 1864 tgt = lmv_find_target(lmv, &rid); 1865 if (IS_ERR(tgt)) { 1866 ptlrpc_req_finished(*request); 1867 return PTR_ERR(tgt); 1868 } 1869 1870 op_data->op_fid1 = rid; 1871 op_data->op_valid |= OBD_MD_FLCROSSREF; 1872 op_data->op_namelen = 0; 1873 op_data->op_name = NULL; 1874 rc = md_getattr_name(tgt->ltd_exp, op_data, &req); 1875 ptlrpc_req_finished(*request); 1876 *request = req; 1877 } 1878 1879 return rc; 1880} 1881 1882#define md_op_data_fid(op_data, fl) \ 1883 (fl == MF_MDC_CANCEL_FID1 ? &op_data->op_fid1 : \ 1884 fl == MF_MDC_CANCEL_FID2 ? &op_data->op_fid2 : \ 1885 fl == MF_MDC_CANCEL_FID3 ? &op_data->op_fid3 : \ 1886 fl == MF_MDC_CANCEL_FID4 ? &op_data->op_fid4 : \ 1887 NULL) 1888 1889static int lmv_early_cancel(struct obd_export *exp, struct md_op_data *op_data, 1890 int op_tgt, ldlm_mode_t mode, int bits, int flag) 1891{ 1892 struct lu_fid *fid = md_op_data_fid(op_data, flag); 1893 struct obd_device *obd = exp->exp_obd; 1894 struct lmv_obd *lmv = &obd->u.lmv; 1895 struct lmv_tgt_desc *tgt; 1896 ldlm_policy_data_t policy = {{0}}; 1897 int rc = 0; 1898 1899 if (!fid_is_sane(fid)) 1900 return 0; 1901 1902 tgt = lmv_find_target(lmv, fid); 1903 if (IS_ERR(tgt)) 1904 return PTR_ERR(tgt); 1905 1906 if (tgt->ltd_idx != op_tgt) { 1907 CDEBUG(D_INODE, "EARLY_CANCEL on "DFID"\n", PFID(fid)); 1908 policy.l_inodebits.bits = bits; 1909 rc = md_cancel_unused(tgt->ltd_exp, fid, &policy, 1910 mode, LCF_ASYNC, NULL); 1911 } else { 1912 CDEBUG(D_INODE, 1913 "EARLY_CANCEL skip operation target %d on "DFID"\n", 1914 op_tgt, PFID(fid)); 1915 op_data->op_flags |= flag; 1916 rc = 0; 1917 } 1918 1919 return rc; 1920} 1921 1922/* 1923 * llite passes fid of an target inode in op_data->op_fid1 and id of directory in 1924 * op_data->op_fid2 1925 */ 1926static int lmv_link(struct obd_export *exp, struct md_op_data *op_data, 1927 struct ptlrpc_request **request) 1928{ 1929 struct obd_device *obd = exp->exp_obd; 1930 struct lmv_obd *lmv = &obd->u.lmv; 1931 struct lmv_tgt_desc *tgt; 1932 int rc; 1933 1934 rc = lmv_check_connect(obd); 1935 if (rc) 1936 return rc; 1937 1938 LASSERT(op_data->op_namelen != 0); 1939 1940 CDEBUG(D_INODE, "LINK "DFID":%*s to "DFID"\n", 1941 PFID(&op_data->op_fid2), op_data->op_namelen, 1942 op_data->op_name, PFID(&op_data->op_fid1)); 1943 1944 op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid()); 1945 op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid()); 1946 op_data->op_cap = cfs_curproc_cap_pack(); 1947 tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2); 1948 if (IS_ERR(tgt)) 1949 return PTR_ERR(tgt); 1950 1951 /* 1952 * Cancel UPDATE lock on child (fid1). 1953 */ 1954 op_data->op_flags |= MF_MDC_CANCEL_FID2; 1955 rc = lmv_early_cancel(exp, op_data, tgt->ltd_idx, LCK_EX, 1956 MDS_INODELOCK_UPDATE, MF_MDC_CANCEL_FID1); 1957 if (rc != 0) 1958 return rc; 1959 1960 rc = md_link(tgt->ltd_exp, op_data, request); 1961 1962 return rc; 1963} 1964 1965static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data, 1966 const char *old, int oldlen, const char *new, int newlen, 1967 struct ptlrpc_request **request) 1968{ 1969 struct obd_device *obd = exp->exp_obd; 1970 struct lmv_obd *lmv = &obd->u.lmv; 1971 struct lmv_tgt_desc *src_tgt; 1972 struct lmv_tgt_desc *tgt_tgt; 1973 int rc; 1974 1975 LASSERT(oldlen != 0); 1976 1977 CDEBUG(D_INODE, "RENAME %*s in "DFID" to %*s in "DFID"\n", 1978 oldlen, old, PFID(&op_data->op_fid1), 1979 newlen, new, PFID(&op_data->op_fid2)); 1980 1981 rc = lmv_check_connect(obd); 1982 if (rc) 1983 return rc; 1984 1985 op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid()); 1986 op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid()); 1987 op_data->op_cap = cfs_curproc_cap_pack(); 1988 src_tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); 1989 if (IS_ERR(src_tgt)) 1990 return PTR_ERR(src_tgt); 1991 1992 tgt_tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2); 1993 if (IS_ERR(tgt_tgt)) 1994 return PTR_ERR(tgt_tgt); 1995 /* 1996 * LOOKUP lock on src child (fid3) should also be cancelled for 1997 * src_tgt in mdc_rename. 1998 */ 1999 op_data->op_flags |= MF_MDC_CANCEL_FID1 | MF_MDC_CANCEL_FID3; 2000 2001 /* 2002 * Cancel UPDATE locks on tgt parent (fid2), tgt_tgt is its 2003 * own target. 2004 */ 2005 rc = lmv_early_cancel(exp, op_data, src_tgt->ltd_idx, 2006 LCK_EX, MDS_INODELOCK_UPDATE, 2007 MF_MDC_CANCEL_FID2); 2008 2009 /* 2010 * Cancel LOOKUP locks on tgt child (fid4) for parent tgt_tgt. 2011 */ 2012 if (rc == 0) { 2013 rc = lmv_early_cancel(exp, op_data, src_tgt->ltd_idx, 2014 LCK_EX, MDS_INODELOCK_LOOKUP, 2015 MF_MDC_CANCEL_FID4); 2016 } 2017 2018 /* 2019 * Cancel all the locks on tgt child (fid4). 2020 */ 2021 if (rc == 0) 2022 rc = lmv_early_cancel(exp, op_data, src_tgt->ltd_idx, 2023 LCK_EX, MDS_INODELOCK_FULL, 2024 MF_MDC_CANCEL_FID4); 2025 2026 if (rc == 0) 2027 rc = md_rename(src_tgt->ltd_exp, op_data, old, oldlen, 2028 new, newlen, request); 2029 return rc; 2030} 2031 2032static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data, 2033 void *ea, int ealen, void *ea2, int ea2len, 2034 struct ptlrpc_request **request, 2035 struct md_open_data **mod) 2036{ 2037 struct obd_device *obd = exp->exp_obd; 2038 struct lmv_obd *lmv = &obd->u.lmv; 2039 struct lmv_tgt_desc *tgt; 2040 int rc = 0; 2041 2042 rc = lmv_check_connect(obd); 2043 if (rc) 2044 return rc; 2045 2046 CDEBUG(D_INODE, "SETATTR for "DFID", valid 0x%x\n", 2047 PFID(&op_data->op_fid1), op_data->op_attr.ia_valid); 2048 2049 op_data->op_flags |= MF_MDC_CANCEL_FID1; 2050 tgt = lmv_find_target(lmv, &op_data->op_fid1); 2051 if (IS_ERR(tgt)) 2052 return PTR_ERR(tgt); 2053 2054 rc = md_setattr(tgt->ltd_exp, op_data, ea, ealen, ea2, 2055 ea2len, request, mod); 2056 2057 return rc; 2058} 2059 2060static int lmv_sync(struct obd_export *exp, const struct lu_fid *fid, 2061 struct obd_capa *oc, struct ptlrpc_request **request) 2062{ 2063 struct obd_device *obd = exp->exp_obd; 2064 struct lmv_obd *lmv = &obd->u.lmv; 2065 struct lmv_tgt_desc *tgt; 2066 int rc; 2067 2068 rc = lmv_check_connect(obd); 2069 if (rc) 2070 return rc; 2071 2072 tgt = lmv_find_target(lmv, fid); 2073 if (IS_ERR(tgt)) 2074 return PTR_ERR(tgt); 2075 2076 rc = md_sync(tgt->ltd_exp, fid, oc, request); 2077 return rc; 2078} 2079 2080/* 2081 * Adjust a set of pages, each page containing an array of lu_dirpages, 2082 * so that each page can be used as a single logical lu_dirpage. 2083 * 2084 * A lu_dirpage is laid out as follows, where s = ldp_hash_start, 2085 * e = ldp_hash_end, f = ldp_flags, p = padding, and each "ent" is a 2086 * struct lu_dirent. It has size up to LU_PAGE_SIZE. The ldp_hash_end 2087 * value is used as a cookie to request the next lu_dirpage in a 2088 * directory listing that spans multiple pages (two in this example): 2089 * ________ 2090 * | | 2091 * .|--------v------- -----. 2092 * |s|e|f|p|ent|ent| ... |ent| 2093 * '--|-------------- -----' Each CFS_PAGE contains a single 2094 * '------. lu_dirpage. 2095 * .---------v------- -----. 2096 * |s|e|f|p|ent| 0 | ... | 0 | 2097 * '----------------- -----' 2098 * 2099 * However, on hosts where the native VM page size (PAGE_CACHE_SIZE) is 2100 * larger than LU_PAGE_SIZE, a single host page may contain multiple 2101 * lu_dirpages. After reading the lu_dirpages from the MDS, the 2102 * ldp_hash_end of the first lu_dirpage refers to the one immediately 2103 * after it in the same CFS_PAGE (arrows simplified for brevity, but 2104 * in general e0==s1, e1==s2, etc.): 2105 * 2106 * .-------------------- -----. 2107 * |s0|e0|f0|p|ent|ent| ... |ent| 2108 * |---v---------------- -----| 2109 * |s1|e1|f1|p|ent|ent| ... |ent| 2110 * |---v---------------- -----| Here, each CFS_PAGE contains 2111 * ... multiple lu_dirpages. 2112 * |---v---------------- -----| 2113 * |s'|e'|f'|p|ent|ent| ... |ent| 2114 * '---|---------------- -----' 2115 * v 2116 * .----------------------------. 2117 * | next CFS_PAGE | 2118 * 2119 * This structure is transformed into a single logical lu_dirpage as follows: 2120 * 2121 * - Replace e0 with e' so the request for the next lu_dirpage gets the page 2122 * labeled 'next CFS_PAGE'. 2123 * 2124 * - Copy the LDF_COLLIDE flag from f' to f0 to correctly reflect whether 2125 * a hash collision with the next page exists. 2126 * 2127 * - Adjust the lde_reclen of the ending entry of each lu_dirpage to span 2128 * to the first entry of the next lu_dirpage. 2129 */ 2130#if PAGE_CACHE_SIZE > LU_PAGE_SIZE 2131static void lmv_adjust_dirpages(struct page **pages, int ncfspgs, int nlupgs) 2132{ 2133 int i; 2134 2135 for (i = 0; i < ncfspgs; i++) { 2136 struct lu_dirpage *dp = kmap(pages[i]); 2137 struct lu_dirpage *first = dp; 2138 struct lu_dirent *end_dirent = NULL; 2139 struct lu_dirent *ent; 2140 __u64 hash_end = dp->ldp_hash_end; 2141 __u32 flags = dp->ldp_flags; 2142 2143 while (--nlupgs > 0) { 2144 ent = lu_dirent_start(dp); 2145 for (end_dirent = ent; ent != NULL; 2146 end_dirent = ent, ent = lu_dirent_next(ent)); 2147 2148 /* Advance dp to next lu_dirpage. */ 2149 dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE); 2150 2151 /* Check if we've reached the end of the CFS_PAGE. */ 2152 if (!((unsigned long)dp & ~CFS_PAGE_MASK)) 2153 break; 2154 2155 /* Save the hash and flags of this lu_dirpage. */ 2156 hash_end = dp->ldp_hash_end; 2157 flags = dp->ldp_flags; 2158 2159 /* Check if lu_dirpage contains no entries. */ 2160 if (!end_dirent) 2161 break; 2162 2163 /* Enlarge the end entry lde_reclen from 0 to 2164 * first entry of next lu_dirpage. */ 2165 LASSERT(le16_to_cpu(end_dirent->lde_reclen) == 0); 2166 end_dirent->lde_reclen = 2167 cpu_to_le16((char *)(dp->ldp_entries) - 2168 (char *)end_dirent); 2169 } 2170 2171 first->ldp_hash_end = hash_end; 2172 first->ldp_flags &= ~cpu_to_le32(LDF_COLLIDE); 2173 first->ldp_flags |= flags & cpu_to_le32(LDF_COLLIDE); 2174 2175 kunmap(pages[i]); 2176 } 2177 LASSERTF(nlupgs == 0, "left = %d", nlupgs); 2178} 2179#else 2180#define lmv_adjust_dirpages(pages, ncfspgs, nlupgs) do {} while (0) 2181#endif /* PAGE_CACHE_SIZE > LU_PAGE_SIZE */ 2182 2183static int lmv_readpage(struct obd_export *exp, struct md_op_data *op_data, 2184 struct page **pages, struct ptlrpc_request **request) 2185{ 2186 struct obd_device *obd = exp->exp_obd; 2187 struct lmv_obd *lmv = &obd->u.lmv; 2188 __u64 offset = op_data->op_offset; 2189 int rc; 2190 int ncfspgs; /* pages read in PAGE_CACHE_SIZE */ 2191 int nlupgs; /* pages read in LU_PAGE_SIZE */ 2192 struct lmv_tgt_desc *tgt; 2193 2194 rc = lmv_check_connect(obd); 2195 if (rc) 2196 return rc; 2197 2198 CDEBUG(D_INODE, "READPAGE at %#llx from "DFID"\n", 2199 offset, PFID(&op_data->op_fid1)); 2200 2201 tgt = lmv_find_target(lmv, &op_data->op_fid1); 2202 if (IS_ERR(tgt)) 2203 return PTR_ERR(tgt); 2204 2205 rc = md_readpage(tgt->ltd_exp, op_data, pages, request); 2206 if (rc != 0) 2207 return rc; 2208 2209 ncfspgs = ((*request)->rq_bulk->bd_nob_transferred + PAGE_CACHE_SIZE - 1) 2210 >> PAGE_CACHE_SHIFT; 2211 nlupgs = (*request)->rq_bulk->bd_nob_transferred >> LU_PAGE_SHIFT; 2212 LASSERT(!((*request)->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK)); 2213 LASSERT(ncfspgs > 0 && ncfspgs <= op_data->op_npages); 2214 2215 CDEBUG(D_INODE, "read %d(%d)/%d pages\n", ncfspgs, nlupgs, 2216 op_data->op_npages); 2217 2218 lmv_adjust_dirpages(pages, ncfspgs, nlupgs); 2219 2220 return rc; 2221} 2222 2223static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data, 2224 struct ptlrpc_request **request) 2225{ 2226 struct obd_device *obd = exp->exp_obd; 2227 struct lmv_obd *lmv = &obd->u.lmv; 2228 struct lmv_tgt_desc *tgt = NULL; 2229 struct mdt_body *body; 2230 int rc; 2231 2232 rc = lmv_check_connect(obd); 2233 if (rc) 2234 return rc; 2235retry: 2236 /* Send unlink requests to the MDT where the child is located */ 2237 if (likely(!fid_is_zero(&op_data->op_fid2))) 2238 tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2); 2239 else 2240 tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); 2241 if (IS_ERR(tgt)) 2242 return PTR_ERR(tgt); 2243 2244 op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid()); 2245 op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid()); 2246 op_data->op_cap = cfs_curproc_cap_pack(); 2247 2248 /* 2249 * If child's fid is given, cancel unused locks for it if it is from 2250 * another export than parent. 2251 * 2252 * LOOKUP lock for child (fid3) should also be cancelled on parent 2253 * tgt_tgt in mdc_unlink(). 2254 */ 2255 op_data->op_flags |= MF_MDC_CANCEL_FID1 | MF_MDC_CANCEL_FID3; 2256 2257 /* 2258 * Cancel FULL locks on child (fid3). 2259 */ 2260 rc = lmv_early_cancel(exp, op_data, tgt->ltd_idx, LCK_EX, 2261 MDS_INODELOCK_FULL, MF_MDC_CANCEL_FID3); 2262 2263 if (rc != 0) 2264 return rc; 2265 2266 CDEBUG(D_INODE, "unlink with fid="DFID"/"DFID" -> mds #%d\n", 2267 PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), tgt->ltd_idx); 2268 2269 rc = md_unlink(tgt->ltd_exp, op_data, request); 2270 if (rc != 0 && rc != -EREMOTE) 2271 return rc; 2272 2273 body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY); 2274 if (body == NULL) 2275 return -EPROTO; 2276 2277 /* Not cross-ref case, just get out of here. */ 2278 if (likely(!(body->valid & OBD_MD_MDS))) 2279 return 0; 2280 2281 CDEBUG(D_INODE, "%s: try unlink to another MDT for "DFID"\n", 2282 exp->exp_obd->obd_name, PFID(&body->fid1)); 2283 2284 /* This is a remote object, try remote MDT, Note: it may 2285 * try more than 1 time here, Considering following case 2286 * /mnt/lustre is root on MDT0, remote1 is on MDT1 2287 * 1. Initially A does not know where remote1 is, it send 2288 * unlink RPC to MDT0, MDT0 return -EREMOTE, it will 2289 * resend unlink RPC to MDT1 (retry 1st time). 2290 * 2291 * 2. During the unlink RPC in flight, 2292 * client B mv /mnt/lustre/remote1 /mnt/lustre/remote2 2293 * and create new remote1, but on MDT0 2294 * 2295 * 3. MDT1 get unlink RPC(from A), then do remote lock on 2296 * /mnt/lustre, then lookup get fid of remote1, and find 2297 * it is remote dir again, and replay -EREMOTE again. 2298 * 2299 * 4. Then A will resend unlink RPC to MDT0. (retry 2nd times). 2300 * 2301 * In theory, it might try unlimited time here, but it should 2302 * be very rare case. */ 2303 op_data->op_fid2 = body->fid1; 2304 ptlrpc_req_finished(*request); 2305 *request = NULL; 2306 2307 goto retry; 2308} 2309 2310static int lmv_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) 2311{ 2312 struct lmv_obd *lmv = &obd->u.lmv; 2313 int rc = 0; 2314 2315 switch (stage) { 2316 case OBD_CLEANUP_EARLY: 2317 /* XXX: here should be calling obd_precleanup() down to 2318 * stack. */ 2319 break; 2320 case OBD_CLEANUP_EXPORTS: 2321 fld_client_proc_fini(&lmv->lmv_fld); 2322 lprocfs_obd_cleanup(obd); 2323 break; 2324 default: 2325 break; 2326 } 2327 return rc; 2328} 2329 2330static int lmv_get_info(const struct lu_env *env, struct obd_export *exp, 2331 __u32 keylen, void *key, __u32 *vallen, void *val, 2332 struct lov_stripe_md *lsm) 2333{ 2334 struct obd_device *obd; 2335 struct lmv_obd *lmv; 2336 int rc = 0; 2337 2338 obd = class_exp2obd(exp); 2339 if (obd == NULL) { 2340 CDEBUG(D_IOCTL, "Invalid client cookie %#llx\n", 2341 exp->exp_handle.h_cookie); 2342 return -EINVAL; 2343 } 2344 2345 lmv = &obd->u.lmv; 2346 if (keylen >= strlen("remote_flag") && !strcmp(key, "remote_flag")) { 2347 struct lmv_tgt_desc *tgt; 2348 int i; 2349 2350 rc = lmv_check_connect(obd); 2351 if (rc) 2352 return rc; 2353 2354 LASSERT(*vallen == sizeof(__u32)); 2355 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 2356 tgt = lmv->tgts[i]; 2357 /* 2358 * All tgts should be connected when this gets called. 2359 */ 2360 if (tgt == NULL || tgt->ltd_exp == NULL) 2361 continue; 2362 2363 if (!obd_get_info(env, tgt->ltd_exp, keylen, key, 2364 vallen, val, NULL)) 2365 return 0; 2366 } 2367 return -EINVAL; 2368 } else if (KEY_IS(KEY_MAX_EASIZE) || 2369 KEY_IS(KEY_DEFAULT_EASIZE) || 2370 KEY_IS(KEY_MAX_COOKIESIZE) || 2371 KEY_IS(KEY_DEFAULT_COOKIESIZE) || 2372 KEY_IS(KEY_CONN_DATA)) { 2373 rc = lmv_check_connect(obd); 2374 if (rc) 2375 return rc; 2376 2377 /* 2378 * Forwarding this request to first MDS, it should know LOV 2379 * desc. 2380 */ 2381 rc = obd_get_info(env, lmv->tgts[0]->ltd_exp, keylen, key, 2382 vallen, val, NULL); 2383 if (!rc && KEY_IS(KEY_CONN_DATA)) 2384 exp->exp_connect_data = *(struct obd_connect_data *)val; 2385 return rc; 2386 } else if (KEY_IS(KEY_TGT_COUNT)) { 2387 *((int *)val) = lmv->desc.ld_tgt_count; 2388 return 0; 2389 } 2390 2391 CDEBUG(D_IOCTL, "Invalid key\n"); 2392 return -EINVAL; 2393} 2394 2395int lmv_set_info_async(const struct lu_env *env, struct obd_export *exp, 2396 u32 keylen, void *key, u32 vallen, 2397 void *val, struct ptlrpc_request_set *set) 2398{ 2399 struct lmv_tgt_desc *tgt; 2400 struct obd_device *obd; 2401 struct lmv_obd *lmv; 2402 int rc = 0; 2403 2404 obd = class_exp2obd(exp); 2405 if (obd == NULL) { 2406 CDEBUG(D_IOCTL, "Invalid client cookie %#llx\n", 2407 exp->exp_handle.h_cookie); 2408 return -EINVAL; 2409 } 2410 lmv = &obd->u.lmv; 2411 2412 if (KEY_IS(KEY_READ_ONLY) || KEY_IS(KEY_FLUSH_CTX)) { 2413 int i, err = 0; 2414 2415 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 2416 tgt = lmv->tgts[i]; 2417 2418 if (tgt == NULL || tgt->ltd_exp == NULL) 2419 continue; 2420 2421 err = obd_set_info_async(env, tgt->ltd_exp, 2422 keylen, key, vallen, val, set); 2423 if (err && rc == 0) 2424 rc = err; 2425 } 2426 2427 return rc; 2428 } 2429 2430 return -EINVAL; 2431} 2432 2433int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, 2434 struct lov_stripe_md *lsm) 2435{ 2436 struct obd_device *obd = class_exp2obd(exp); 2437 struct lmv_obd *lmv = &obd->u.lmv; 2438 struct lmv_stripe_md *meap; 2439 struct lmv_stripe_md *lsmp; 2440 int mea_size; 2441 int i; 2442 2443 mea_size = lmv_get_easize(lmv); 2444 if (!lmmp) 2445 return mea_size; 2446 2447 if (*lmmp && !lsm) { 2448 OBD_FREE_LARGE(*lmmp, mea_size); 2449 *lmmp = NULL; 2450 return 0; 2451 } 2452 2453 if (*lmmp == NULL) { 2454 OBD_ALLOC_LARGE(*lmmp, mea_size); 2455 if (*lmmp == NULL) 2456 return -ENOMEM; 2457 } 2458 2459 if (!lsm) 2460 return mea_size; 2461 2462 lsmp = (struct lmv_stripe_md *)lsm; 2463 meap = (struct lmv_stripe_md *)*lmmp; 2464 2465 if (lsmp->mea_magic != MEA_MAGIC_LAST_CHAR && 2466 lsmp->mea_magic != MEA_MAGIC_ALL_CHARS) 2467 return -EINVAL; 2468 2469 meap->mea_magic = cpu_to_le32(lsmp->mea_magic); 2470 meap->mea_count = cpu_to_le32(lsmp->mea_count); 2471 meap->mea_master = cpu_to_le32(lsmp->mea_master); 2472 2473 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 2474 meap->mea_ids[i] = lsmp->mea_ids[i]; 2475 fid_cpu_to_le(&meap->mea_ids[i], &lsmp->mea_ids[i]); 2476 } 2477 2478 return mea_size; 2479} 2480 2481int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, 2482 struct lov_mds_md *lmm, int lmm_size) 2483{ 2484 struct obd_device *obd = class_exp2obd(exp); 2485 struct lmv_stripe_md **tmea = (struct lmv_stripe_md **)lsmp; 2486 struct lmv_stripe_md *mea = (struct lmv_stripe_md *)lmm; 2487 struct lmv_obd *lmv = &obd->u.lmv; 2488 int mea_size; 2489 int i; 2490 __u32 magic; 2491 2492 mea_size = lmv_get_easize(lmv); 2493 if (lsmp == NULL) 2494 return mea_size; 2495 2496 if (*lsmp != NULL && lmm == NULL) { 2497 OBD_FREE_LARGE(*tmea, mea_size); 2498 *lsmp = NULL; 2499 return 0; 2500 } 2501 2502 LASSERT(mea_size == lmm_size); 2503 2504 OBD_ALLOC_LARGE(*tmea, mea_size); 2505 if (*tmea == NULL) 2506 return -ENOMEM; 2507 2508 if (!lmm) 2509 return mea_size; 2510 2511 if (mea->mea_magic == MEA_MAGIC_LAST_CHAR || 2512 mea->mea_magic == MEA_MAGIC_ALL_CHARS || 2513 mea->mea_magic == MEA_MAGIC_HASH_SEGMENT) { 2514 magic = le32_to_cpu(mea->mea_magic); 2515 } else { 2516 /* 2517 * Old mea is not handled here. 2518 */ 2519 CERROR("Old not supportable EA is found\n"); 2520 LBUG(); 2521 } 2522 2523 (*tmea)->mea_magic = magic; 2524 (*tmea)->mea_count = le32_to_cpu(mea->mea_count); 2525 (*tmea)->mea_master = le32_to_cpu(mea->mea_master); 2526 2527 for (i = 0; i < (*tmea)->mea_count; i++) { 2528 (*tmea)->mea_ids[i] = mea->mea_ids[i]; 2529 fid_le_to_cpu(&(*tmea)->mea_ids[i], &(*tmea)->mea_ids[i]); 2530 } 2531 return mea_size; 2532} 2533 2534static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid, 2535 ldlm_policy_data_t *policy, ldlm_mode_t mode, 2536 ldlm_cancel_flags_t flags, void *opaque) 2537{ 2538 struct obd_device *obd = exp->exp_obd; 2539 struct lmv_obd *lmv = &obd->u.lmv; 2540 int rc = 0; 2541 int err; 2542 int i; 2543 2544 LASSERT(fid != NULL); 2545 2546 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 2547 if (lmv->tgts[i] == NULL || lmv->tgts[i]->ltd_exp == NULL || 2548 lmv->tgts[i]->ltd_active == 0) 2549 continue; 2550 2551 err = md_cancel_unused(lmv->tgts[i]->ltd_exp, fid, 2552 policy, mode, flags, opaque); 2553 if (!rc) 2554 rc = err; 2555 } 2556 return rc; 2557} 2558 2559int lmv_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data, 2560 __u64 *bits) 2561{ 2562 struct lmv_obd *lmv = &exp->exp_obd->u.lmv; 2563 int rc; 2564 2565 rc = md_set_lock_data(lmv->tgts[0]->ltd_exp, lockh, data, bits); 2566 return rc; 2567} 2568 2569ldlm_mode_t lmv_lock_match(struct obd_export *exp, __u64 flags, 2570 const struct lu_fid *fid, ldlm_type_t type, 2571 ldlm_policy_data_t *policy, ldlm_mode_t mode, 2572 struct lustre_handle *lockh) 2573{ 2574 struct obd_device *obd = exp->exp_obd; 2575 struct lmv_obd *lmv = &obd->u.lmv; 2576 ldlm_mode_t rc; 2577 int i; 2578 2579 CDEBUG(D_INODE, "Lock match for "DFID"\n", PFID(fid)); 2580 2581 /* 2582 * With CMD every object can have two locks in different namespaces: 2583 * lookup lock in space of mds storing direntry and update/open lock in 2584 * space of mds storing inode. Thus we check all targets, not only that 2585 * one fid was created in. 2586 */ 2587 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 2588 if (lmv->tgts[i] == NULL || 2589 lmv->tgts[i]->ltd_exp == NULL || 2590 lmv->tgts[i]->ltd_active == 0) 2591 continue; 2592 2593 rc = md_lock_match(lmv->tgts[i]->ltd_exp, flags, fid, 2594 type, policy, mode, lockh); 2595 if (rc) 2596 return rc; 2597 } 2598 2599 return 0; 2600} 2601 2602int lmv_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req, 2603 struct obd_export *dt_exp, struct obd_export *md_exp, 2604 struct lustre_md *md) 2605{ 2606 struct lmv_obd *lmv = &exp->exp_obd->u.lmv; 2607 2608 return md_get_lustre_md(lmv->tgts[0]->ltd_exp, req, dt_exp, md_exp, md); 2609} 2610 2611int lmv_free_lustre_md(struct obd_export *exp, struct lustre_md *md) 2612{ 2613 struct obd_device *obd = exp->exp_obd; 2614 struct lmv_obd *lmv = &obd->u.lmv; 2615 2616 if (md->mea) 2617 obd_free_memmd(exp, (void *)&md->mea); 2618 return md_free_lustre_md(lmv->tgts[0]->ltd_exp, md); 2619} 2620 2621int lmv_set_open_replay_data(struct obd_export *exp, 2622 struct obd_client_handle *och, 2623 struct lookup_intent *it) 2624{ 2625 struct obd_device *obd = exp->exp_obd; 2626 struct lmv_obd *lmv = &obd->u.lmv; 2627 struct lmv_tgt_desc *tgt; 2628 2629 tgt = lmv_find_target(lmv, &och->och_fid); 2630 if (IS_ERR(tgt)) 2631 return PTR_ERR(tgt); 2632 2633 return md_set_open_replay_data(tgt->ltd_exp, och, it); 2634} 2635 2636int lmv_clear_open_replay_data(struct obd_export *exp, 2637 struct obd_client_handle *och) 2638{ 2639 struct obd_device *obd = exp->exp_obd; 2640 struct lmv_obd *lmv = &obd->u.lmv; 2641 struct lmv_tgt_desc *tgt; 2642 2643 tgt = lmv_find_target(lmv, &och->och_fid); 2644 if (IS_ERR(tgt)) 2645 return PTR_ERR(tgt); 2646 2647 return md_clear_open_replay_data(tgt->ltd_exp, och); 2648} 2649 2650static int lmv_get_remote_perm(struct obd_export *exp, 2651 const struct lu_fid *fid, 2652 struct obd_capa *oc, __u32 suppgid, 2653 struct ptlrpc_request **request) 2654{ 2655 struct obd_device *obd = exp->exp_obd; 2656 struct lmv_obd *lmv = &obd->u.lmv; 2657 struct lmv_tgt_desc *tgt; 2658 int rc; 2659 2660 rc = lmv_check_connect(obd); 2661 if (rc) 2662 return rc; 2663 2664 tgt = lmv_find_target(lmv, fid); 2665 if (IS_ERR(tgt)) 2666 return PTR_ERR(tgt); 2667 2668 rc = md_get_remote_perm(tgt->ltd_exp, fid, oc, suppgid, request); 2669 return rc; 2670} 2671 2672static int lmv_renew_capa(struct obd_export *exp, struct obd_capa *oc, 2673 renew_capa_cb_t cb) 2674{ 2675 struct obd_device *obd = exp->exp_obd; 2676 struct lmv_obd *lmv = &obd->u.lmv; 2677 struct lmv_tgt_desc *tgt; 2678 int rc; 2679 2680 rc = lmv_check_connect(obd); 2681 if (rc) 2682 return rc; 2683 2684 tgt = lmv_find_target(lmv, &oc->c_capa.lc_fid); 2685 if (IS_ERR(tgt)) 2686 return PTR_ERR(tgt); 2687 2688 rc = md_renew_capa(tgt->ltd_exp, oc, cb); 2689 return rc; 2690} 2691 2692int lmv_unpack_capa(struct obd_export *exp, struct ptlrpc_request *req, 2693 const struct req_msg_field *field, struct obd_capa **oc) 2694{ 2695 struct lmv_obd *lmv = &exp->exp_obd->u.lmv; 2696 2697 return md_unpack_capa(lmv->tgts[0]->ltd_exp, req, field, oc); 2698} 2699 2700int lmv_intent_getattr_async(struct obd_export *exp, 2701 struct md_enqueue_info *minfo, 2702 struct ldlm_enqueue_info *einfo) 2703{ 2704 struct md_op_data *op_data = &minfo->mi_data; 2705 struct obd_device *obd = exp->exp_obd; 2706 struct lmv_obd *lmv = &obd->u.lmv; 2707 struct lmv_tgt_desc *tgt = NULL; 2708 int rc; 2709 2710 rc = lmv_check_connect(obd); 2711 if (rc) 2712 return rc; 2713 2714 tgt = lmv_find_target(lmv, &op_data->op_fid1); 2715 if (IS_ERR(tgt)) 2716 return PTR_ERR(tgt); 2717 2718 rc = md_intent_getattr_async(tgt->ltd_exp, minfo, einfo); 2719 return rc; 2720} 2721 2722int lmv_revalidate_lock(struct obd_export *exp, struct lookup_intent *it, 2723 struct lu_fid *fid, __u64 *bits) 2724{ 2725 struct obd_device *obd = exp->exp_obd; 2726 struct lmv_obd *lmv = &obd->u.lmv; 2727 struct lmv_tgt_desc *tgt; 2728 int rc; 2729 2730 rc = lmv_check_connect(obd); 2731 if (rc) 2732 return rc; 2733 2734 tgt = lmv_find_target(lmv, fid); 2735 if (IS_ERR(tgt)) 2736 return PTR_ERR(tgt); 2737 2738 rc = md_revalidate_lock(tgt->ltd_exp, it, fid, bits); 2739 return rc; 2740} 2741 2742/** 2743 * For lmv, only need to send request to master MDT, and the master MDT will 2744 * process with other slave MDTs. The only exception is Q_GETOQUOTA for which 2745 * we directly fetch data from the slave MDTs. 2746 */ 2747int lmv_quotactl(struct obd_device *unused, struct obd_export *exp, 2748 struct obd_quotactl *oqctl) 2749{ 2750 struct obd_device *obd = class_exp2obd(exp); 2751 struct lmv_obd *lmv = &obd->u.lmv; 2752 struct lmv_tgt_desc *tgt = lmv->tgts[0]; 2753 int rc = 0, i; 2754 __u64 curspace, curinodes; 2755 2756 if (!lmv->desc.ld_tgt_count || !tgt->ltd_active) { 2757 CERROR("master lmv inactive\n"); 2758 return -EIO; 2759 } 2760 2761 if (oqctl->qc_cmd != Q_GETOQUOTA) { 2762 rc = obd_quotactl(tgt->ltd_exp, oqctl); 2763 return rc; 2764 } 2765 2766 curspace = curinodes = 0; 2767 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 2768 int err; 2769 tgt = lmv->tgts[i]; 2770 2771 if (tgt == NULL || tgt->ltd_exp == NULL || tgt->ltd_active == 0) 2772 continue; 2773 if (!tgt->ltd_active) { 2774 CDEBUG(D_HA, "mdt %d is inactive.\n", i); 2775 continue; 2776 } 2777 2778 err = obd_quotactl(tgt->ltd_exp, oqctl); 2779 if (err) { 2780 CERROR("getquota on mdt %d failed. %d\n", i, err); 2781 if (!rc) 2782 rc = err; 2783 } else { 2784 curspace += oqctl->qc_dqblk.dqb_curspace; 2785 curinodes += oqctl->qc_dqblk.dqb_curinodes; 2786 } 2787 } 2788 oqctl->qc_dqblk.dqb_curspace = curspace; 2789 oqctl->qc_dqblk.dqb_curinodes = curinodes; 2790 2791 return rc; 2792} 2793 2794int lmv_quotacheck(struct obd_device *unused, struct obd_export *exp, 2795 struct obd_quotactl *oqctl) 2796{ 2797 struct obd_device *obd = class_exp2obd(exp); 2798 struct lmv_obd *lmv = &obd->u.lmv; 2799 struct lmv_tgt_desc *tgt; 2800 int i, rc = 0; 2801 2802 for (i = 0; i < lmv->desc.ld_tgt_count; i++) { 2803 int err; 2804 tgt = lmv->tgts[i]; 2805 if (tgt == NULL || tgt->ltd_exp == NULL || !tgt->ltd_active) { 2806 CERROR("lmv idx %d inactive\n", i); 2807 return -EIO; 2808 } 2809 2810 err = obd_quotacheck(tgt->ltd_exp, oqctl); 2811 if (err && !rc) 2812 rc = err; 2813 } 2814 2815 return rc; 2816} 2817 2818struct obd_ops lmv_obd_ops = { 2819 .o_owner = THIS_MODULE, 2820 .o_setup = lmv_setup, 2821 .o_cleanup = lmv_cleanup, 2822 .o_precleanup = lmv_precleanup, 2823 .o_process_config = lmv_process_config, 2824 .o_connect = lmv_connect, 2825 .o_disconnect = lmv_disconnect, 2826 .o_statfs = lmv_statfs, 2827 .o_get_info = lmv_get_info, 2828 .o_set_info_async = lmv_set_info_async, 2829 .o_packmd = lmv_packmd, 2830 .o_unpackmd = lmv_unpackmd, 2831 .o_notify = lmv_notify, 2832 .o_get_uuid = lmv_get_uuid, 2833 .o_iocontrol = lmv_iocontrol, 2834 .o_quotacheck = lmv_quotacheck, 2835 .o_quotactl = lmv_quotactl 2836}; 2837 2838struct md_ops lmv_md_ops = { 2839 .m_getstatus = lmv_getstatus, 2840 .m_null_inode = lmv_null_inode, 2841 .m_find_cbdata = lmv_find_cbdata, 2842 .m_close = lmv_close, 2843 .m_create = lmv_create, 2844 .m_done_writing = lmv_done_writing, 2845 .m_enqueue = lmv_enqueue, 2846 .m_getattr = lmv_getattr, 2847 .m_getxattr = lmv_getxattr, 2848 .m_getattr_name = lmv_getattr_name, 2849 .m_intent_lock = lmv_intent_lock, 2850 .m_link = lmv_link, 2851 .m_rename = lmv_rename, 2852 .m_setattr = lmv_setattr, 2853 .m_setxattr = lmv_setxattr, 2854 .m_sync = lmv_sync, 2855 .m_readpage = lmv_readpage, 2856 .m_unlink = lmv_unlink, 2857 .m_init_ea_size = lmv_init_ea_size, 2858 .m_cancel_unused = lmv_cancel_unused, 2859 .m_set_lock_data = lmv_set_lock_data, 2860 .m_lock_match = lmv_lock_match, 2861 .m_get_lustre_md = lmv_get_lustre_md, 2862 .m_free_lustre_md = lmv_free_lustre_md, 2863 .m_set_open_replay_data = lmv_set_open_replay_data, 2864 .m_clear_open_replay_data = lmv_clear_open_replay_data, 2865 .m_renew_capa = lmv_renew_capa, 2866 .m_unpack_capa = lmv_unpack_capa, 2867 .m_get_remote_perm = lmv_get_remote_perm, 2868 .m_intent_getattr_async = lmv_intent_getattr_async, 2869 .m_revalidate_lock = lmv_revalidate_lock 2870}; 2871 2872int __init lmv_init(void) 2873{ 2874 struct lprocfs_static_vars lvars; 2875 int rc; 2876 2877 lprocfs_lmv_init_vars(&lvars); 2878 2879 rc = class_register_type(&lmv_obd_ops, &lmv_md_ops, 2880 lvars.module_vars, LUSTRE_LMV_NAME, NULL); 2881 return rc; 2882} 2883 2884static void lmv_exit(void) 2885{ 2886 class_unregister_type(LUSTRE_LMV_NAME); 2887} 2888 2889MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>"); 2890MODULE_DESCRIPTION("Lustre Logical Metadata Volume OBD driver"); 2891MODULE_LICENSE("GPL"); 2892 2893module_init(lmv_init); 2894module_exit(lmv_exit); 2895