1/* 2 * GPL HEADER START 3 * 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 only, 8 * as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License version 2 for more details (a copy is included 14 * in the LICENSE file that accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License 17 * version 2 along with this program; If not, see 18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf 19 * 20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 21 * CA 95054 USA or visit www.sun.com if you need additional information or 22 * have any questions. 23 * 24 * GPL HEADER END 25 */ 26/* 27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 28 * Use is subject to license terms. 29 * 30 * Copyright (c) 2011, 2012, Intel Corporation. 31 */ 32/* 33 * This file is part of Lustre, http://www.lustre.org/ 34 * Lustre is a trademark of Sun Microsystems, Inc. 35 * 36 * lustre/llite/dir.c 37 * 38 * Directory code for lustre client. 39 */ 40 41#include <linux/fs.h> 42#include <linux/pagemap.h> 43#include <linux/mm.h> 44#include <asm/uaccess.h> 45#include <linux/buffer_head.h> /* for wait_on_buffer */ 46#include <linux/pagevec.h> 47#include <linux/prefetch.h> 48 49#define DEBUG_SUBSYSTEM S_LLITE 50 51#include "../include/obd_support.h" 52#include "../include/obd_class.h" 53#include "../include/lustre_lib.h" 54#include "../include/lustre/lustre_idl.h" 55#include "../include/lustre_lite.h" 56#include "../include/lustre_dlm.h" 57#include "../include/lustre_fid.h" 58#include "llite_internal.h" 59 60/* 61 * (new) readdir implementation overview. 62 * 63 * Original lustre readdir implementation cached exact copy of raw directory 64 * pages on the client. These pages were indexed in client page cache by 65 * logical offset in the directory file. This design, while very simple and 66 * intuitive had some inherent problems: 67 * 68 * . it implies that byte offset to the directory entry serves as a 69 * telldir(3)/seekdir(3) cookie, but that offset is not stable: in 70 * ext3/htree directory entries may move due to splits, and more 71 * importantly, 72 * 73 * . it is incompatible with the design of split directories for cmd3, 74 * that assumes that names are distributed across nodes based on their 75 * hash, and so readdir should be done in hash order. 76 * 77 * New readdir implementation does readdir in hash order, and uses hash of a 78 * file name as a telldir/seekdir cookie. This led to number of complications: 79 * 80 * . hash is not unique, so it cannot be used to index cached directory 81 * pages on the client (note, that it requires a whole pageful of hash 82 * collided entries to cause two pages to have identical hashes); 83 * 84 * . hash is not unique, so it cannot, strictly speaking, be used as an 85 * entry cookie. ext3/htree has the same problem and lustre implementation 86 * mimics their solution: seekdir(hash) positions directory at the first 87 * entry with the given hash. 88 * 89 * Client side. 90 * 91 * 0. caching 92 * 93 * Client caches directory pages using hash of the first entry as an index. As 94 * noted above hash is not unique, so this solution doesn't work as is: 95 * special processing is needed for "page hash chains" (i.e., sequences of 96 * pages filled with entries all having the same hash value). 97 * 98 * First, such chains have to be detected. To this end, server returns to the 99 * client the hash of the first entry on the page next to one returned. When 100 * client detects that this hash is the same as hash of the first entry on the 101 * returned page, page hash collision has to be handled. Pages in the 102 * hash chain, except first one, are termed "overflow pages". 103 * 104 * Solution to index uniqueness problem is to not cache overflow 105 * pages. Instead, when page hash collision is detected, all overflow pages 106 * from emerging chain are immediately requested from the server and placed in 107 * a special data structure (struct ll_dir_chain). This data structure is used 108 * by ll_readdir() to process entries from overflow pages. When readdir 109 * invocation finishes, overflow pages are discarded. If page hash collision 110 * chain weren't completely processed, next call to readdir will again detect 111 * page hash collision, again read overflow pages in, process next portion of 112 * entries and again discard the pages. This is not as wasteful as it looks, 113 * because, given reasonable hash, page hash collisions are extremely rare. 114 * 115 * 1. directory positioning 116 * 117 * When seekdir(hash) is called, original 118 * 119 * 120 * 121 * 122 * 123 * 124 * 125 * 126 * Server. 127 * 128 * identification of and access to overflow pages 129 * 130 * page format 131 * 132 * Page in MDS_READPAGE RPC is packed in LU_PAGE_SIZE, and each page contains 133 * a header lu_dirpage which describes the start/end hash, and whether this 134 * page is empty (contains no dir entry) or hash collide with next page. 135 * After client receives reply, several pages will be integrated into dir page 136 * in PAGE_CACHE_SIZE (if PAGE_CACHE_SIZE greater than LU_PAGE_SIZE), and the 137 * lu_dirpage for this integrated page will be adjusted. See 138 * lmv_adjust_dirpages(). 139 * 140 */ 141 142/* returns the page unlocked, but with a reference */ 143static int ll_dir_filler(void *_hash, struct page *page0) 144{ 145 struct inode *inode = page0->mapping->host; 146 int hash64 = ll_i2sbi(inode)->ll_flags & LL_SBI_64BIT_HASH; 147 struct obd_export *exp = ll_i2sbi(inode)->ll_md_exp; 148 struct ptlrpc_request *request; 149 struct mdt_body *body; 150 struct md_op_data *op_data; 151 __u64 hash = *((__u64 *)_hash); 152 struct page **page_pool; 153 struct page *page; 154 struct lu_dirpage *dp; 155 int max_pages = ll_i2sbi(inode)->ll_md_brw_size >> PAGE_CACHE_SHIFT; 156 int nrdpgs = 0; /* number of pages read actually */ 157 int npages; 158 int i; 159 int rc; 160 161 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) hash %llu\n", 162 inode->i_ino, inode->i_generation, inode, hash); 163 164 LASSERT(max_pages > 0 && max_pages <= MD_MAX_BRW_PAGES); 165 166 page_pool = kzalloc(sizeof(page) * max_pages, GFP_NOFS); 167 if (page_pool) { 168 page_pool[0] = page0; 169 } else { 170 page_pool = &page0; 171 max_pages = 1; 172 } 173 for (npages = 1; npages < max_pages; npages++) { 174 page = page_cache_alloc_cold(inode->i_mapping); 175 if (!page) 176 break; 177 page_pool[npages] = page; 178 } 179 180 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, 181 LUSTRE_OPC_ANY, NULL); 182 op_data->op_npages = npages; 183 op_data->op_offset = hash; 184 rc = md_readpage(exp, op_data, page_pool, &request); 185 ll_finish_md_op_data(op_data); 186 if (rc == 0) { 187 body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY); 188 /* Checked by mdc_readpage() */ 189 LASSERT(body != NULL); 190 191 if (body->valid & OBD_MD_FLSIZE) 192 cl_isize_write(inode, body->size); 193 194 nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_CACHE_SIZE-1) 195 >> PAGE_CACHE_SHIFT; 196 SetPageUptodate(page0); 197 } 198 unlock_page(page0); 199 ptlrpc_req_finished(request); 200 201 CDEBUG(D_VFSTRACE, "read %d/%d pages\n", nrdpgs, npages); 202 203 ll_pagevec_init(&lru_pvec, 0); 204 for (i = 1; i < npages; i++) { 205 unsigned long offset; 206 int ret; 207 208 page = page_pool[i]; 209 210 if (rc < 0 || i >= nrdpgs) { 211 page_cache_release(page); 212 continue; 213 } 214 215 SetPageUptodate(page); 216 217 dp = kmap(page); 218 hash = le64_to_cpu(dp->ldp_hash_start); 219 kunmap(page); 220 221 offset = hash_x_index(hash, hash64); 222 223 prefetchw(&page->flags); 224 ret = add_to_page_cache_lru(page, inode->i_mapping, offset, 225 GFP_KERNEL); 226 if (ret == 0) { 227 unlock_page(page); 228 if (ll_pagevec_add(&lru_pvec, page) == 0) 229 ll_pagevec_lru_add_file(&lru_pvec); 230 } else { 231 CDEBUG(D_VFSTRACE, "page %lu add to page cache failed:" 232 " %d\n", offset, ret); 233 } 234 page_cache_release(page); 235 } 236 ll_pagevec_lru_add_file(&lru_pvec); 237 238 if (page_pool != &page0) 239 OBD_FREE(page_pool, sizeof(struct page *) * max_pages); 240 return rc; 241} 242 243static void ll_check_page(struct inode *dir, struct page *page) 244{ 245 /* XXX: check page format later */ 246 SetPageChecked(page); 247} 248 249void ll_release_page(struct page *page, int remove) 250{ 251 kunmap(page); 252 if (remove) { 253 lock_page(page); 254 if (likely(page->mapping != NULL)) 255 truncate_complete_page(page->mapping, page); 256 unlock_page(page); 257 } 258 page_cache_release(page); 259} 260 261/* 262 * Find, kmap and return page that contains given hash. 263 */ 264static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash, 265 __u64 *start, __u64 *end) 266{ 267 int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH; 268 struct address_space *mapping = dir->i_mapping; 269 /* 270 * Complement of hash is used as an index so that 271 * radix_tree_gang_lookup() can be used to find a page with starting 272 * hash _smaller_ than one we are looking for. 273 */ 274 unsigned long offset = hash_x_index(*hash, hash64); 275 struct page *page; 276 int found; 277 278 TREE_READ_LOCK_IRQ(mapping); 279 found = radix_tree_gang_lookup(&mapping->page_tree, 280 (void **)&page, offset, 1); 281 if (found > 0) { 282 struct lu_dirpage *dp; 283 284 page_cache_get(page); 285 TREE_READ_UNLOCK_IRQ(mapping); 286 /* 287 * In contrast to find_lock_page() we are sure that directory 288 * page cannot be truncated (while DLM lock is held) and, 289 * hence, can avoid restart. 290 * 291 * In fact, page cannot be locked here at all, because 292 * ll_dir_filler() does synchronous io. 293 */ 294 wait_on_page_locked(page); 295 if (PageUptodate(page)) { 296 dp = kmap(page); 297 if (BITS_PER_LONG == 32 && hash64) { 298 *start = le64_to_cpu(dp->ldp_hash_start) >> 32; 299 *end = le64_to_cpu(dp->ldp_hash_end) >> 32; 300 *hash = *hash >> 32; 301 } else { 302 *start = le64_to_cpu(dp->ldp_hash_start); 303 *end = le64_to_cpu(dp->ldp_hash_end); 304 } 305 LASSERTF(*start <= *hash, "start = %#llx,end = %#llx,hash = %#llx\n", 306 *start, *end, *hash); 307 CDEBUG(D_VFSTRACE, "page %lu [%llu %llu], hash %llu\n", 308 offset, *start, *end, *hash); 309 if (*hash > *end) { 310 ll_release_page(page, 0); 311 page = NULL; 312 } else if (*end != *start && *hash == *end) { 313 /* 314 * upon hash collision, remove this page, 315 * otherwise put page reference, and 316 * ll_get_dir_page() will issue RPC to fetch 317 * the page we want. 318 */ 319 ll_release_page(page, 320 le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE); 321 page = NULL; 322 } 323 } else { 324 page_cache_release(page); 325 page = ERR_PTR(-EIO); 326 } 327 328 } else { 329 TREE_READ_UNLOCK_IRQ(mapping); 330 page = NULL; 331 } 332 return page; 333} 334 335struct page *ll_get_dir_page(struct inode *dir, __u64 hash, 336 struct ll_dir_chain *chain) 337{ 338 ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} }; 339 struct address_space *mapping = dir->i_mapping; 340 struct lustre_handle lockh; 341 struct lu_dirpage *dp; 342 struct page *page; 343 ldlm_mode_t mode; 344 int rc; 345 __u64 start = 0; 346 __u64 end = 0; 347 __u64 lhash = hash; 348 struct ll_inode_info *lli = ll_i2info(dir); 349 int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH; 350 351 mode = LCK_PR; 352 rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED, 353 ll_inode2fid(dir), LDLM_IBITS, &policy, mode, &lockh); 354 if (!rc) { 355 struct ldlm_enqueue_info einfo = { 356 .ei_type = LDLM_IBITS, 357 .ei_mode = mode, 358 .ei_cb_bl = ll_md_blocking_ast, 359 .ei_cb_cp = ldlm_completion_ast, 360 }; 361 struct lookup_intent it = { .it_op = IT_READDIR }; 362 struct ptlrpc_request *request; 363 struct md_op_data *op_data; 364 365 op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0, 366 LUSTRE_OPC_ANY, NULL); 367 if (IS_ERR(op_data)) 368 return (void *)op_data; 369 370 rc = md_enqueue(ll_i2sbi(dir)->ll_md_exp, &einfo, &it, 371 op_data, &lockh, NULL, 0, NULL, 0); 372 373 ll_finish_md_op_data(op_data); 374 375 request = (struct ptlrpc_request *)it.d.lustre.it_data; 376 if (request) 377 ptlrpc_req_finished(request); 378 if (rc < 0) { 379 CERROR("lock enqueue: "DFID" at %llu: rc %d\n", 380 PFID(ll_inode2fid(dir)), hash, rc); 381 return ERR_PTR(rc); 382 } 383 384 CDEBUG(D_INODE, "setting lr_lvb_inode to inode %p (%lu/%u)\n", 385 dir, dir->i_ino, dir->i_generation); 386 md_set_lock_data(ll_i2sbi(dir)->ll_md_exp, 387 &it.d.lustre.it_lock_handle, dir, NULL); 388 } else { 389 /* for cross-ref object, l_ast_data of the lock may not be set, 390 * we reset it here */ 391 md_set_lock_data(ll_i2sbi(dir)->ll_md_exp, &lockh.cookie, 392 dir, NULL); 393 } 394 ldlm_lock_dump_handle(D_OTHER, &lockh); 395 396 mutex_lock(&lli->lli_readdir_mutex); 397 page = ll_dir_page_locate(dir, &lhash, &start, &end); 398 if (IS_ERR(page)) { 399 CERROR("dir page locate: "DFID" at %llu: rc %ld\n", 400 PFID(ll_inode2fid(dir)), lhash, PTR_ERR(page)); 401 goto out_unlock; 402 } else if (page != NULL) { 403 /* 404 * XXX nikita: not entirely correct handling of a corner case: 405 * suppose hash chain of entries with hash value HASH crosses 406 * border between pages P0 and P1. First both P0 and P1 are 407 * cached, seekdir() is called for some entry from the P0 part 408 * of the chain. Later P0 goes out of cache. telldir(HASH) 409 * happens and finds P1, as it starts with matching hash 410 * value. Remaining entries from P0 part of the chain are 411 * skipped. (Is that really a bug?) 412 * 413 * Possible solutions: 0. don't cache P1 is such case, handle 414 * it as an "overflow" page. 1. invalidate all pages at 415 * once. 2. use HASH|1 as an index for P1. 416 */ 417 goto hash_collision; 418 } 419 420 page = read_cache_page(mapping, hash_x_index(hash, hash64), 421 ll_dir_filler, &lhash); 422 if (IS_ERR(page)) { 423 CERROR("read cache page: "DFID" at %llu: rc %ld\n", 424 PFID(ll_inode2fid(dir)), hash, PTR_ERR(page)); 425 goto out_unlock; 426 } 427 428 wait_on_page_locked(page); 429 (void)kmap(page); 430 if (!PageUptodate(page)) { 431 CERROR("page not updated: "DFID" at %llu: rc %d\n", 432 PFID(ll_inode2fid(dir)), hash, -5); 433 goto fail; 434 } 435 if (!PageChecked(page)) 436 ll_check_page(dir, page); 437 if (PageError(page)) { 438 CERROR("page error: "DFID" at %llu: rc %d\n", 439 PFID(ll_inode2fid(dir)), hash, -5); 440 goto fail; 441 } 442hash_collision: 443 dp = page_address(page); 444 if (BITS_PER_LONG == 32 && hash64) { 445 start = le64_to_cpu(dp->ldp_hash_start) >> 32; 446 end = le64_to_cpu(dp->ldp_hash_end) >> 32; 447 lhash = hash >> 32; 448 } else { 449 start = le64_to_cpu(dp->ldp_hash_start); 450 end = le64_to_cpu(dp->ldp_hash_end); 451 lhash = hash; 452 } 453 if (end == start) { 454 LASSERT(start == lhash); 455 CWARN("Page-wide hash collision: %llu\n", end); 456 if (BITS_PER_LONG == 32 && hash64) 457 CWARN("Real page-wide hash collision at [%llu %llu] with hash %llu\n", 458 le64_to_cpu(dp->ldp_hash_start), 459 le64_to_cpu(dp->ldp_hash_end), hash); 460 /* 461 * Fetch whole overflow chain... 462 * 463 * XXX not yet. 464 */ 465 goto fail; 466 } 467out_unlock: 468 mutex_unlock(&lli->lli_readdir_mutex); 469 ldlm_lock_decref(&lockh, mode); 470 return page; 471 472fail: 473 ll_release_page(page, 1); 474 page = ERR_PTR(-EIO); 475 goto out_unlock; 476} 477 478int ll_dir_read(struct inode *inode, struct dir_context *ctx) 479{ 480 struct ll_inode_info *info = ll_i2info(inode); 481 struct ll_sb_info *sbi = ll_i2sbi(inode); 482 __u64 pos = ctx->pos; 483 int api32 = ll_need_32bit_api(sbi); 484 int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH; 485 struct page *page; 486 struct ll_dir_chain chain; 487 int done = 0; 488 int rc = 0; 489 490 ll_dir_chain_init(&chain); 491 492 page = ll_get_dir_page(inode, pos, &chain); 493 494 while (rc == 0 && !done) { 495 struct lu_dirpage *dp; 496 struct lu_dirent *ent; 497 498 if (!IS_ERR(page)) { 499 /* 500 * If page is empty (end of directory is reached), 501 * use this value. 502 */ 503 __u64 hash = MDS_DIR_END_OFF; 504 __u64 next; 505 506 dp = page_address(page); 507 for (ent = lu_dirent_start(dp); ent != NULL && !done; 508 ent = lu_dirent_next(ent)) { 509 __u16 type; 510 int namelen; 511 struct lu_fid fid; 512 __u64 lhash; 513 __u64 ino; 514 515 /* 516 * XXX: implement correct swabbing here. 517 */ 518 519 hash = le64_to_cpu(ent->lde_hash); 520 if (hash < pos) 521 /* 522 * Skip until we find target hash 523 * value. 524 */ 525 continue; 526 527 namelen = le16_to_cpu(ent->lde_namelen); 528 if (namelen == 0) 529 /* 530 * Skip dummy record. 531 */ 532 continue; 533 534 if (api32 && hash64) 535 lhash = hash >> 32; 536 else 537 lhash = hash; 538 fid_le_to_cpu(&fid, &ent->lde_fid); 539 ino = cl_fid_build_ino(&fid, api32); 540 type = ll_dirent_type_get(ent); 541 ctx->pos = lhash; 542 /* For 'll_nfs_get_name_filldir()', it will try 543 * to access the 'ent' through its 'lde_name', 544 * so the parameter 'name' for 'ctx->actor()' 545 * must be part of the 'ent'. 546 */ 547 done = !dir_emit(ctx, ent->lde_name, 548 namelen, ino, type); 549 } 550 next = le64_to_cpu(dp->ldp_hash_end); 551 if (!done) { 552 pos = next; 553 if (pos == MDS_DIR_END_OFF) { 554 /* 555 * End of directory reached. 556 */ 557 done = 1; 558 ll_release_page(page, 0); 559 } else if (1 /* chain is exhausted*/) { 560 /* 561 * Normal case: continue to the next 562 * page. 563 */ 564 ll_release_page(page, 565 le32_to_cpu(dp->ldp_flags) & 566 LDF_COLLIDE); 567 next = pos; 568 page = ll_get_dir_page(inode, pos, 569 &chain); 570 } else { 571 /* 572 * go into overflow page. 573 */ 574 LASSERT(le32_to_cpu(dp->ldp_flags) & 575 LDF_COLLIDE); 576 ll_release_page(page, 1); 577 } 578 } else { 579 pos = hash; 580 ll_release_page(page, 0); 581 } 582 } else { 583 rc = PTR_ERR(page); 584 CERROR("error reading dir "DFID" at %lu: rc %d\n", 585 PFID(&info->lli_fid), (unsigned long)pos, rc); 586 } 587 } 588 589 ctx->pos = pos; 590 ll_dir_chain_fini(&chain); 591 return rc; 592} 593 594static int ll_readdir(struct file *filp, struct dir_context *ctx) 595{ 596 struct inode *inode = filp->f_dentry->d_inode; 597 struct ll_file_data *lfd = LUSTRE_FPRIVATE(filp); 598 struct ll_sb_info *sbi = ll_i2sbi(inode); 599 int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH; 600 int api32 = ll_need_32bit_api(sbi); 601 int rc; 602 603 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu " 604 " 32bit_api %d\n", inode->i_ino, inode->i_generation, 605 inode, (unsigned long)lfd->lfd_pos, i_size_read(inode), api32); 606 607 if (lfd->lfd_pos == MDS_DIR_END_OFF) { 608 /* 609 * end-of-file. 610 */ 611 rc = 0; 612 goto out; 613 } 614 615 ctx->pos = lfd->lfd_pos; 616 rc = ll_dir_read(inode, ctx); 617 lfd->lfd_pos = ctx->pos; 618 if (ctx->pos == MDS_DIR_END_OFF) { 619 if (api32) 620 ctx->pos = LL_DIR_END_OFF_32BIT; 621 else 622 ctx->pos = LL_DIR_END_OFF; 623 } else { 624 if (api32 && hash64) 625 ctx->pos >>= 32; 626 } 627 filp->f_version = inode->i_version; 628 629out: 630 if (!rc) 631 ll_stats_ops_tally(sbi, LPROC_LL_READDIR, 1); 632 633 return rc; 634} 635 636static int ll_send_mgc_param(struct obd_export *mgc, char *string) 637{ 638 struct mgs_send_param *msp; 639 int rc = 0; 640 641 msp = kzalloc(sizeof(*msp), GFP_NOFS); 642 if (!msp) 643 return -ENOMEM; 644 645 strncpy(msp->mgs_param, string, MGS_PARAM_MAXLEN); 646 rc = obd_set_info_async(NULL, mgc, sizeof(KEY_SET_INFO), KEY_SET_INFO, 647 sizeof(struct mgs_send_param), msp, NULL); 648 if (rc) 649 CERROR("Failed to set parameter: %d\n", rc); 650 OBD_FREE_PTR(msp); 651 652 return rc; 653} 654 655int ll_dir_setdirstripe(struct inode *dir, struct lmv_user_md *lump, 656 char *filename) 657{ 658 struct ptlrpc_request *request = NULL; 659 struct md_op_data *op_data; 660 struct ll_sb_info *sbi = ll_i2sbi(dir); 661 int mode; 662 int err; 663 664 mode = (0755 & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR; 665 op_data = ll_prep_md_op_data(NULL, dir, NULL, filename, 666 strlen(filename), mode, LUSTRE_OPC_MKDIR, 667 lump); 668 if (IS_ERR(op_data)) { 669 err = PTR_ERR(op_data); 670 goto err_exit; 671 } 672 673 op_data->op_cli_flags |= CLI_SET_MEA; 674 err = md_create(sbi->ll_md_exp, op_data, lump, sizeof(*lump), mode, 675 from_kuid(&init_user_ns, current_fsuid()), 676 from_kgid(&init_user_ns, current_fsgid()), 677 cfs_curproc_cap_pack(), 0, &request); 678 ll_finish_md_op_data(op_data); 679 if (err) 680 goto err_exit; 681err_exit: 682 ptlrpc_req_finished(request); 683 return err; 684} 685 686int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump, 687 int set_default) 688{ 689 struct ll_sb_info *sbi = ll_i2sbi(inode); 690 struct md_op_data *op_data; 691 struct ptlrpc_request *req = NULL; 692 int rc = 0; 693 struct lustre_sb_info *lsi = s2lsi(inode->i_sb); 694 struct obd_device *mgc = lsi->lsi_mgc; 695 int lum_size; 696 697 if (lump != NULL) { 698 /* 699 * This is coming from userspace, so should be in 700 * local endian. But the MDS would like it in little 701 * endian, so we swab it before we send it. 702 */ 703 switch (lump->lmm_magic) { 704 case LOV_USER_MAGIC_V1: { 705 if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1)) 706 lustre_swab_lov_user_md_v1(lump); 707 lum_size = sizeof(struct lov_user_md_v1); 708 break; 709 } 710 case LOV_USER_MAGIC_V3: { 711 if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3)) 712 lustre_swab_lov_user_md_v3( 713 (struct lov_user_md_v3 *)lump); 714 lum_size = sizeof(struct lov_user_md_v3); 715 break; 716 } 717 default: { 718 CDEBUG(D_IOCTL, "bad userland LOV MAGIC:" 719 " %#08x != %#08x nor %#08x\n", 720 lump->lmm_magic, LOV_USER_MAGIC_V1, 721 LOV_USER_MAGIC_V3); 722 return -EINVAL; 723 } 724 } 725 } else { 726 lum_size = sizeof(struct lov_user_md_v1); 727 } 728 729 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, 730 LUSTRE_OPC_ANY, NULL); 731 if (IS_ERR(op_data)) 732 return PTR_ERR(op_data); 733 734 if (lump != NULL && lump->lmm_magic == cpu_to_le32(LMV_USER_MAGIC)) 735 op_data->op_cli_flags |= CLI_SET_MEA; 736 737 /* swabbing is done in lov_setstripe() on server side */ 738 rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size, 739 NULL, 0, &req, NULL); 740 ll_finish_md_op_data(op_data); 741 ptlrpc_req_finished(req); 742 if (rc) { 743 if (rc != -EPERM && rc != -EACCES) 744 CERROR("mdc_setattr fails: rc = %d\n", rc); 745 } 746 747 /* In the following we use the fact that LOV_USER_MAGIC_V1 and 748 LOV_USER_MAGIC_V3 have the same initial fields so we do not 749 need to make the distinction between the 2 versions */ 750 if (set_default && mgc->u.cli.cl_mgc_mgsexp) { 751 char *param = NULL; 752 char *buf; 753 754 param = kzalloc(MGS_PARAM_MAXLEN, GFP_NOFS); 755 if (!param) { 756 rc = -ENOMEM; 757 goto end; 758 } 759 760 buf = param; 761 /* Get fsname and assume devname to be -MDT0000. */ 762 ll_get_fsname(inode->i_sb, buf, MTI_NAME_MAXLEN); 763 strcat(buf, "-MDT0000.lov"); 764 buf += strlen(buf); 765 766 /* Set root stripesize */ 767 sprintf(buf, ".stripesize=%u", 768 lump ? le32_to_cpu(lump->lmm_stripe_size) : 0); 769 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param); 770 if (rc) 771 goto end; 772 773 /* Set root stripecount */ 774 sprintf(buf, ".stripecount=%hd", 775 lump ? le16_to_cpu(lump->lmm_stripe_count) : 0); 776 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param); 777 if (rc) 778 goto end; 779 780 /* Set root stripeoffset */ 781 sprintf(buf, ".stripeoffset=%hd", 782 lump ? le16_to_cpu(lump->lmm_stripe_offset) : 783 (typeof(lump->lmm_stripe_offset))(-1)); 784 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param); 785 786end: 787 if (param != NULL) 788 OBD_FREE(param, MGS_PARAM_MAXLEN); 789 } 790 return rc; 791} 792 793int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp, 794 int *lmm_size, struct ptlrpc_request **request) 795{ 796 struct ll_sb_info *sbi = ll_i2sbi(inode); 797 struct mdt_body *body; 798 struct lov_mds_md *lmm = NULL; 799 struct ptlrpc_request *req = NULL; 800 int rc, lmmsize; 801 struct md_op_data *op_data; 802 803 rc = ll_get_default_mdsize(sbi, &lmmsize); 804 if (rc) 805 return rc; 806 807 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 808 0, lmmsize, LUSTRE_OPC_ANY, 809 NULL); 810 if (IS_ERR(op_data)) 811 return PTR_ERR(op_data); 812 813 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA; 814 rc = md_getattr(sbi->ll_md_exp, op_data, &req); 815 ll_finish_md_op_data(op_data); 816 if (rc < 0) { 817 CDEBUG(D_INFO, "md_getattr failed on inode " 818 "%lu/%u: rc %d\n", inode->i_ino, 819 inode->i_generation, rc); 820 goto out; 821 } 822 823 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 824 LASSERT(body != NULL); 825 826 lmmsize = body->eadatasize; 827 828 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) || 829 lmmsize == 0) { 830 rc = -ENODATA; 831 goto out; 832 } 833 834 lmm = req_capsule_server_sized_get(&req->rq_pill, 835 &RMF_MDT_MD, lmmsize); 836 LASSERT(lmm != NULL); 837 838 /* 839 * This is coming from the MDS, so is probably in 840 * little endian. We convert it to host endian before 841 * passing it to userspace. 842 */ 843 /* We don't swab objects for directories */ 844 switch (le32_to_cpu(lmm->lmm_magic)) { 845 case LOV_MAGIC_V1: 846 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) 847 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm); 848 break; 849 case LOV_MAGIC_V3: 850 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) 851 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm); 852 break; 853 default: 854 CERROR("unknown magic: %lX\n", (unsigned long)lmm->lmm_magic); 855 rc = -EPROTO; 856 } 857out: 858 *lmmp = lmm; 859 *lmm_size = lmmsize; 860 *request = req; 861 return rc; 862} 863 864/* 865 * Get MDT index for the inode. 866 */ 867int ll_get_mdt_idx(struct inode *inode) 868{ 869 struct ll_sb_info *sbi = ll_i2sbi(inode); 870 struct md_op_data *op_data; 871 int rc, mdtidx; 872 873 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 874 0, LUSTRE_OPC_ANY, NULL); 875 if (IS_ERR(op_data)) 876 return PTR_ERR(op_data); 877 878 op_data->op_flags |= MF_GET_MDT_IDX; 879 rc = md_getattr(sbi->ll_md_exp, op_data, NULL); 880 mdtidx = op_data->op_mds; 881 ll_finish_md_op_data(op_data); 882 if (rc < 0) { 883 CDEBUG(D_INFO, "md_getattr_name: %d\n", rc); 884 return rc; 885 } 886 return mdtidx; 887} 888 889/** 890 * Generic handler to do any pre-copy work. 891 * 892 * It send a first hsm_progress (with extent length == 0) to coordinator as a 893 * first information for it that real work has started. 894 * 895 * Moreover, for a ARCHIVE request, it will sample the file data version and 896 * store it in \a copy. 897 * 898 * \return 0 on success. 899 */ 900static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy) 901{ 902 struct ll_sb_info *sbi = ll_s2sbi(sb); 903 struct hsm_progress_kernel hpk; 904 int rc; 905 906 /* Forge a hsm_progress based on data from copy. */ 907 hpk.hpk_fid = copy->hc_hai.hai_fid; 908 hpk.hpk_cookie = copy->hc_hai.hai_cookie; 909 hpk.hpk_extent.offset = copy->hc_hai.hai_extent.offset; 910 hpk.hpk_extent.length = 0; 911 hpk.hpk_flags = 0; 912 hpk.hpk_errval = 0; 913 hpk.hpk_data_version = 0; 914 915 916 /* For archive request, we need to read the current file version. */ 917 if (copy->hc_hai.hai_action == HSMA_ARCHIVE) { 918 struct inode *inode; 919 __u64 data_version = 0; 920 921 /* Get inode for this fid */ 922 inode = search_inode_for_lustre(sb, ©->hc_hai.hai_fid); 923 if (IS_ERR(inode)) { 924 hpk.hpk_flags |= HP_FLAG_RETRY; 925 /* hpk_errval is >= 0 */ 926 hpk.hpk_errval = -PTR_ERR(inode); 927 rc = PTR_ERR(inode); 928 goto progress; 929 } 930 931 /* Read current file data version */ 932 rc = ll_data_version(inode, &data_version, 1); 933 iput(inode); 934 if (rc != 0) { 935 CDEBUG(D_HSM, "Could not read file data version of " 936 DFID" (rc = %d). Archive request (%#llx) could not be done.\n", 937 PFID(©->hc_hai.hai_fid), rc, 938 copy->hc_hai.hai_cookie); 939 hpk.hpk_flags |= HP_FLAG_RETRY; 940 /* hpk_errval must be >= 0 */ 941 hpk.hpk_errval = -rc; 942 goto progress; 943 } 944 945 /* Store it the hsm_copy for later copytool use. 946 * Always modified even if no lsm. */ 947 copy->hc_data_version = data_version; 948 } 949 950progress: 951 rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk), 952 &hpk, NULL); 953 954 return rc; 955} 956 957/** 958 * Generic handler to do any post-copy work. 959 * 960 * It will send the last hsm_progress update to coordinator to inform it 961 * that copy is finished and whether it was successful or not. 962 * 963 * Moreover, 964 * - for ARCHIVE request, it will sample the file data version and compare it 965 * with the version saved in ll_ioc_copy_start(). If they do not match, copy 966 * will be considered as failed. 967 * - for RESTORE request, it will sample the file data version and send it to 968 * coordinator which is useful if the file was imported as 'released'. 969 * 970 * \return 0 on success. 971 */ 972static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy) 973{ 974 struct ll_sb_info *sbi = ll_s2sbi(sb); 975 struct hsm_progress_kernel hpk; 976 int rc; 977 978 /* If you modify the logic here, also check llapi_hsm_copy_end(). */ 979 /* Take care: copy->hc_hai.hai_action, len, gid and data are not 980 * initialized if copy_end was called with copy == NULL. 981 */ 982 983 /* Forge a hsm_progress based on data from copy. */ 984 hpk.hpk_fid = copy->hc_hai.hai_fid; 985 hpk.hpk_cookie = copy->hc_hai.hai_cookie; 986 hpk.hpk_extent = copy->hc_hai.hai_extent; 987 hpk.hpk_flags = copy->hc_flags | HP_FLAG_COMPLETED; 988 hpk.hpk_errval = copy->hc_errval; 989 hpk.hpk_data_version = 0; 990 991 /* For archive request, we need to check the file data was not changed. 992 * 993 * For restore request, we need to send the file data version, this is 994 * useful when the file was created using hsm_import. 995 */ 996 if (((copy->hc_hai.hai_action == HSMA_ARCHIVE) || 997 (copy->hc_hai.hai_action == HSMA_RESTORE)) && 998 (copy->hc_errval == 0)) { 999 struct inode *inode; 1000 __u64 data_version = 0; 1001 1002 /* Get lsm for this fid */ 1003 inode = search_inode_for_lustre(sb, ©->hc_hai.hai_fid); 1004 if (IS_ERR(inode)) { 1005 hpk.hpk_flags |= HP_FLAG_RETRY; 1006 /* hpk_errval must be >= 0 */ 1007 hpk.hpk_errval = -PTR_ERR(inode); 1008 rc = PTR_ERR(inode); 1009 goto progress; 1010 } 1011 1012 rc = ll_data_version(inode, &data_version, 1013 copy->hc_hai.hai_action == HSMA_ARCHIVE); 1014 iput(inode); 1015 if (rc) { 1016 CDEBUG(D_HSM, "Could not read file data version. " 1017 "Request could not be confirmed.\n"); 1018 if (hpk.hpk_errval == 0) 1019 hpk.hpk_errval = -rc; 1020 goto progress; 1021 } 1022 1023 /* Store it the hsm_copy for later copytool use. 1024 * Always modified even if no lsm. */ 1025 hpk.hpk_data_version = data_version; 1026 1027 /* File could have been stripped during archiving, so we need 1028 * to check anyway. */ 1029 if ((copy->hc_hai.hai_action == HSMA_ARCHIVE) && 1030 (copy->hc_data_version != data_version)) { 1031 CDEBUG(D_HSM, "File data version mismatched. " 1032 "File content was changed during archiving. " 1033 DFID", start:%#llx current:%#llx\n", 1034 PFID(©->hc_hai.hai_fid), 1035 copy->hc_data_version, data_version); 1036 /* File was changed, send error to cdt. Do not ask for 1037 * retry because if a file is modified frequently, 1038 * the cdt will loop on retried archive requests. 1039 * The policy engine will ask for a new archive later 1040 * when the file will not be modified for some tunable 1041 * time */ 1042 /* we do not notify caller */ 1043 hpk.hpk_flags &= ~HP_FLAG_RETRY; 1044 /* hpk_errval must be >= 0 */ 1045 hpk.hpk_errval = EBUSY; 1046 } 1047 1048 } 1049 1050progress: 1051 rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk), 1052 &hpk, NULL); 1053 1054 return rc; 1055} 1056 1057 1058static int copy_and_ioctl(int cmd, struct obd_export *exp, 1059 const void __user *data, size_t size) 1060{ 1061 void *copy; 1062 int rc; 1063 1064 copy = kzalloc(size, GFP_NOFS); 1065 if (!copy) 1066 return -ENOMEM; 1067 1068 if (copy_from_user(copy, data, size)) { 1069 rc = -EFAULT; 1070 goto out; 1071 } 1072 1073 rc = obd_iocontrol(cmd, exp, size, copy, NULL); 1074out: 1075 OBD_FREE(copy, size); 1076 1077 return rc; 1078} 1079 1080static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) 1081{ 1082 int cmd = qctl->qc_cmd; 1083 int type = qctl->qc_type; 1084 int id = qctl->qc_id; 1085 int valid = qctl->qc_valid; 1086 int rc = 0; 1087 1088 switch (cmd) { 1089 case LUSTRE_Q_INVALIDATE: 1090 case LUSTRE_Q_FINVALIDATE: 1091 case Q_QUOTAON: 1092 case Q_QUOTAOFF: 1093 case Q_SETQUOTA: 1094 case Q_SETINFO: 1095 if (!capable(CFS_CAP_SYS_ADMIN) || 1096 sbi->ll_flags & LL_SBI_RMT_CLIENT) 1097 return -EPERM; 1098 break; 1099 case Q_GETQUOTA: 1100 if (((type == USRQUOTA && 1101 !uid_eq(current_euid(), make_kuid(&init_user_ns, id))) || 1102 (type == GRPQUOTA && 1103 !in_egroup_p(make_kgid(&init_user_ns, id)))) && 1104 (!capable(CFS_CAP_SYS_ADMIN) || 1105 sbi->ll_flags & LL_SBI_RMT_CLIENT)) 1106 return -EPERM; 1107 break; 1108 case Q_GETINFO: 1109 break; 1110 default: 1111 CERROR("unsupported quotactl op: %#x\n", cmd); 1112 return -ENOTTY; 1113 } 1114 1115 if (valid != QC_GENERAL) { 1116 if (sbi->ll_flags & LL_SBI_RMT_CLIENT) 1117 return -EOPNOTSUPP; 1118 1119 if (cmd == Q_GETINFO) 1120 qctl->qc_cmd = Q_GETOINFO; 1121 else if (cmd == Q_GETQUOTA) 1122 qctl->qc_cmd = Q_GETOQUOTA; 1123 else 1124 return -EINVAL; 1125 1126 switch (valid) { 1127 case QC_MDTIDX: 1128 rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_md_exp, 1129 sizeof(*qctl), qctl, NULL); 1130 break; 1131 case QC_OSTIDX: 1132 rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_dt_exp, 1133 sizeof(*qctl), qctl, NULL); 1134 break; 1135 case QC_UUID: 1136 rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_md_exp, 1137 sizeof(*qctl), qctl, NULL); 1138 if (rc == -EAGAIN) 1139 rc = obd_iocontrol(OBD_IOC_QUOTACTL, 1140 sbi->ll_dt_exp, 1141 sizeof(*qctl), qctl, NULL); 1142 break; 1143 default: 1144 rc = -EINVAL; 1145 break; 1146 } 1147 1148 if (rc) 1149 return rc; 1150 1151 qctl->qc_cmd = cmd; 1152 } else { 1153 struct obd_quotactl *oqctl; 1154 1155 oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS); 1156 if (!oqctl) 1157 return -ENOMEM; 1158 1159 QCTL_COPY(oqctl, qctl); 1160 rc = obd_quotactl(sbi->ll_md_exp, oqctl); 1161 if (rc) { 1162 if (rc != -EALREADY && cmd == Q_QUOTAON) { 1163 oqctl->qc_cmd = Q_QUOTAOFF; 1164 obd_quotactl(sbi->ll_md_exp, oqctl); 1165 } 1166 OBD_FREE_PTR(oqctl); 1167 return rc; 1168 } 1169 /* If QIF_SPACE is not set, client should collect the 1170 * space usage from OSSs by itself */ 1171 if (cmd == Q_GETQUOTA && 1172 !(oqctl->qc_dqblk.dqb_valid & QIF_SPACE) && 1173 !oqctl->qc_dqblk.dqb_curspace) { 1174 struct obd_quotactl *oqctl_tmp; 1175 1176 oqctl_tmp = kzalloc(sizeof(*oqctl_tmp), GFP_NOFS); 1177 if (!oqctl_tmp) { 1178 rc = -ENOMEM; 1179 goto out; 1180 } 1181 1182 oqctl_tmp->qc_cmd = Q_GETOQUOTA; 1183 oqctl_tmp->qc_id = oqctl->qc_id; 1184 oqctl_tmp->qc_type = oqctl->qc_type; 1185 1186 /* collect space usage from OSTs */ 1187 oqctl_tmp->qc_dqblk.dqb_curspace = 0; 1188 rc = obd_quotactl(sbi->ll_dt_exp, oqctl_tmp); 1189 if (!rc || rc == -EREMOTEIO) { 1190 oqctl->qc_dqblk.dqb_curspace = 1191 oqctl_tmp->qc_dqblk.dqb_curspace; 1192 oqctl->qc_dqblk.dqb_valid |= QIF_SPACE; 1193 } 1194 1195 /* collect space & inode usage from MDTs */ 1196 oqctl_tmp->qc_dqblk.dqb_curspace = 0; 1197 oqctl_tmp->qc_dqblk.dqb_curinodes = 0; 1198 rc = obd_quotactl(sbi->ll_md_exp, oqctl_tmp); 1199 if (!rc || rc == -EREMOTEIO) { 1200 oqctl->qc_dqblk.dqb_curspace += 1201 oqctl_tmp->qc_dqblk.dqb_curspace; 1202 oqctl->qc_dqblk.dqb_curinodes = 1203 oqctl_tmp->qc_dqblk.dqb_curinodes; 1204 oqctl->qc_dqblk.dqb_valid |= QIF_INODES; 1205 } else { 1206 oqctl->qc_dqblk.dqb_valid &= ~QIF_SPACE; 1207 } 1208 1209 OBD_FREE_PTR(oqctl_tmp); 1210 } 1211out: 1212 QCTL_COPY(qctl, oqctl); 1213 OBD_FREE_PTR(oqctl); 1214 } 1215 1216 return rc; 1217} 1218 1219static char * 1220ll_getname(const char __user *filename) 1221{ 1222 int ret = 0, len; 1223 char *tmp = __getname(); 1224 1225 if (!tmp) 1226 return ERR_PTR(-ENOMEM); 1227 1228 len = strncpy_from_user(tmp, filename, PATH_MAX); 1229 if (len == 0) 1230 ret = -ENOENT; 1231 else if (len > PATH_MAX) 1232 ret = -ENAMETOOLONG; 1233 1234 if (ret) { 1235 __putname(tmp); 1236 tmp = ERR_PTR(ret); 1237 } 1238 return tmp; 1239} 1240 1241#define ll_putname(filename) __putname(filename) 1242 1243static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 1244{ 1245 struct inode *inode = file->f_dentry->d_inode; 1246 struct ll_sb_info *sbi = ll_i2sbi(inode); 1247 struct obd_ioctl_data *data; 1248 int rc = 0; 1249 1250 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n", 1251 inode->i_ino, inode->i_generation, inode, cmd); 1252 1253 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */ 1254 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */ 1255 return -ENOTTY; 1256 1257 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1); 1258 switch (cmd) { 1259 case FSFILT_IOC_GETFLAGS: 1260 case FSFILT_IOC_SETFLAGS: 1261 return ll_iocontrol(inode, file, cmd, arg); 1262 case FSFILT_IOC_GETVERSION_OLD: 1263 case FSFILT_IOC_GETVERSION: 1264 return put_user(inode->i_generation, (int *)arg); 1265 /* We need to special case any other ioctls we want to handle, 1266 * to send them to the MDS/OST as appropriate and to properly 1267 * network encode the arg field. 1268 case FSFILT_IOC_SETVERSION_OLD: 1269 case FSFILT_IOC_SETVERSION: 1270 */ 1271 case LL_IOC_GET_MDTIDX: { 1272 int mdtidx; 1273 1274 mdtidx = ll_get_mdt_idx(inode); 1275 if (mdtidx < 0) 1276 return mdtidx; 1277 1278 if (put_user((int)mdtidx, (int *)arg)) 1279 return -EFAULT; 1280 1281 return 0; 1282 } 1283 case IOC_MDC_LOOKUP: { 1284 struct ptlrpc_request *request = NULL; 1285 int namelen, len = 0; 1286 char *buf = NULL; 1287 char *filename; 1288 struct md_op_data *op_data; 1289 1290 rc = obd_ioctl_getdata(&buf, &len, (void *)arg); 1291 if (rc) 1292 return rc; 1293 data = (void *)buf; 1294 1295 filename = data->ioc_inlbuf1; 1296 namelen = strlen(filename); 1297 1298 if (namelen < 1) { 1299 CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n"); 1300 rc = -EINVAL; 1301 goto out_free; 1302 } 1303 1304 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename, namelen, 1305 0, LUSTRE_OPC_ANY, NULL); 1306 if (IS_ERR(op_data)) { 1307 rc = PTR_ERR(op_data); 1308 goto out_free; 1309 } 1310 1311 op_data->op_valid = OBD_MD_FLID; 1312 rc = md_getattr_name(sbi->ll_md_exp, op_data, &request); 1313 ll_finish_md_op_data(op_data); 1314 if (rc < 0) { 1315 CDEBUG(D_INFO, "md_getattr_name: %d\n", rc); 1316 goto out_free; 1317 } 1318 ptlrpc_req_finished(request); 1319out_free: 1320 obd_ioctl_freedata(buf, len); 1321 return rc; 1322 } 1323 case LL_IOC_LMV_SETSTRIPE: { 1324 struct lmv_user_md *lum; 1325 char *buf = NULL; 1326 char *filename; 1327 int namelen = 0; 1328 int lumlen = 0; 1329 int len; 1330 int rc; 1331 1332 rc = obd_ioctl_getdata(&buf, &len, (void *)arg); 1333 if (rc) 1334 return rc; 1335 1336 data = (void *)buf; 1337 if (data->ioc_inlbuf1 == NULL || data->ioc_inlbuf2 == NULL || 1338 data->ioc_inllen1 == 0 || data->ioc_inllen2 == 0) { 1339 rc = -EINVAL; 1340 goto lmv_out_free; 1341 } 1342 1343 filename = data->ioc_inlbuf1; 1344 namelen = data->ioc_inllen1; 1345 1346 if (namelen < 1) { 1347 CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n"); 1348 rc = -EINVAL; 1349 goto lmv_out_free; 1350 } 1351 lum = (struct lmv_user_md *)data->ioc_inlbuf2; 1352 lumlen = data->ioc_inllen2; 1353 1354 if (lum->lum_magic != LMV_USER_MAGIC || 1355 lumlen != sizeof(*lum)) { 1356 CERROR("%s: wrong lum magic %x or size %d: rc = %d\n", 1357 filename, lum->lum_magic, lumlen, -EFAULT); 1358 rc = -EINVAL; 1359 goto lmv_out_free; 1360 } 1361 1362 /** 1363 * ll_dir_setdirstripe will be used to set dir stripe 1364 * mdc_create--->mdt_reint_create (with dirstripe) 1365 */ 1366 rc = ll_dir_setdirstripe(inode, lum, filename); 1367lmv_out_free: 1368 obd_ioctl_freedata(buf, len); 1369 return rc; 1370 1371 } 1372 case LL_IOC_LOV_SETSTRIPE: { 1373 struct lov_user_md_v3 lumv3; 1374 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3; 1375 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg; 1376 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg; 1377 1378 int set_default = 0; 1379 1380 LASSERT(sizeof(lumv3) == sizeof(*lumv3p)); 1381 LASSERT(sizeof(lumv3.lmm_objects[0]) == 1382 sizeof(lumv3p->lmm_objects[0])); 1383 /* first try with v1 which is smaller than v3 */ 1384 if (copy_from_user(lumv1, lumv1p, sizeof(*lumv1))) 1385 return -EFAULT; 1386 1387 if ((lumv1->lmm_magic == LOV_USER_MAGIC_V3) ) { 1388 if (copy_from_user(&lumv3, lumv3p, sizeof(lumv3))) 1389 return -EFAULT; 1390 } 1391 1392 if (inode->i_sb->s_root == file->f_dentry) 1393 set_default = 1; 1394 1395 /* in v1 and v3 cases lumv1 points to data */ 1396 rc = ll_dir_setstripe(inode, lumv1, set_default); 1397 1398 return rc; 1399 } 1400 case LL_IOC_LMV_GETSTRIPE: { 1401 struct lmv_user_md *lump = (struct lmv_user_md *)arg; 1402 struct lmv_user_md lum; 1403 struct lmv_user_md *tmp; 1404 int lum_size; 1405 int rc = 0; 1406 int mdtindex; 1407 1408 if (copy_from_user(&lum, lump, sizeof(struct lmv_user_md))) 1409 return -EFAULT; 1410 1411 if (lum.lum_magic != LMV_MAGIC_V1) 1412 return -EINVAL; 1413 1414 lum_size = lmv_user_md_size(1, LMV_MAGIC_V1); 1415 tmp = kzalloc(lum_size, GFP_NOFS); 1416 if (!tmp) { 1417 rc = -ENOMEM; 1418 goto free_lmv; 1419 } 1420 1421 *tmp = lum; 1422 tmp->lum_type = LMV_STRIPE_TYPE; 1423 tmp->lum_stripe_count = 1; 1424 mdtindex = ll_get_mdt_idx(inode); 1425 if (mdtindex < 0) { 1426 rc = -ENOMEM; 1427 goto free_lmv; 1428 } 1429 1430 tmp->lum_stripe_offset = mdtindex; 1431 tmp->lum_objects[0].lum_mds = mdtindex; 1432 memcpy(&tmp->lum_objects[0].lum_fid, ll_inode2fid(inode), 1433 sizeof(struct lu_fid)); 1434 if (copy_to_user((void *)arg, tmp, lum_size)) { 1435 rc = -EFAULT; 1436 goto free_lmv; 1437 } 1438free_lmv: 1439 if (tmp) 1440 OBD_FREE(tmp, lum_size); 1441 return rc; 1442 } 1443 case LL_IOC_REMOVE_ENTRY: { 1444 char *filename = NULL; 1445 int namelen = 0; 1446 int rc; 1447 1448 /* Here is a little hack to avoid sending REINT_RMENTRY to 1449 * unsupported server, which might crash the server(LU-2730), 1450 * Because both LVB_TYPE and REINT_RMENTRY will be supported 1451 * on 2.4, we use OBD_CONNECT_LVB_TYPE to detect whether the 1452 * server will support REINT_RMENTRY XXX*/ 1453 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_LVB_TYPE)) 1454 return -ENOTSUPP; 1455 1456 filename = ll_getname((const char *)arg); 1457 if (IS_ERR(filename)) 1458 return PTR_ERR(filename); 1459 1460 namelen = strlen(filename); 1461 if (namelen < 1) { 1462 rc = -EINVAL; 1463 goto out_rmdir; 1464 } 1465 1466 rc = ll_rmdir_entry(inode, filename, namelen); 1467out_rmdir: 1468 if (filename) 1469 ll_putname(filename); 1470 return rc; 1471 } 1472 case LL_IOC_LOV_SWAP_LAYOUTS: 1473 return -EPERM; 1474 case LL_IOC_OBD_STATFS: 1475 return ll_obd_statfs(inode, (void *)arg); 1476 case LL_IOC_LOV_GETSTRIPE: 1477 case LL_IOC_MDC_GETINFO: 1478 case IOC_MDC_GETFILEINFO: 1479 case IOC_MDC_GETFILESTRIPE: { 1480 struct ptlrpc_request *request = NULL; 1481 struct lov_user_md *lump; 1482 struct lov_mds_md *lmm = NULL; 1483 struct mdt_body *body; 1484 char *filename = NULL; 1485 int lmmsize; 1486 1487 if (cmd == IOC_MDC_GETFILEINFO || 1488 cmd == IOC_MDC_GETFILESTRIPE) { 1489 filename = ll_getname((const char *)arg); 1490 if (IS_ERR(filename)) 1491 return PTR_ERR(filename); 1492 1493 rc = ll_lov_getstripe_ea_info(inode, filename, &lmm, 1494 &lmmsize, &request); 1495 } else { 1496 rc = ll_dir_getstripe(inode, &lmm, &lmmsize, &request); 1497 } 1498 1499 if (request) { 1500 body = req_capsule_server_get(&request->rq_pill, 1501 &RMF_MDT_BODY); 1502 LASSERT(body != NULL); 1503 } else { 1504 goto out_req; 1505 } 1506 1507 if (rc < 0) { 1508 if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO || 1509 cmd == LL_IOC_MDC_GETINFO)) { 1510 rc = 0; 1511 goto skip_lmm; 1512 } 1513 else 1514 goto out_req; 1515 } 1516 1517 if (cmd == IOC_MDC_GETFILESTRIPE || 1518 cmd == LL_IOC_LOV_GETSTRIPE) { 1519 lump = (struct lov_user_md *)arg; 1520 } else { 1521 struct lov_user_mds_data *lmdp; 1522 lmdp = (struct lov_user_mds_data *)arg; 1523 lump = &lmdp->lmd_lmm; 1524 } 1525 if (copy_to_user(lump, lmm, lmmsize)) { 1526 if (copy_to_user(lump, lmm, sizeof(*lump))) { 1527 rc = -EFAULT; 1528 goto out_req; 1529 } 1530 rc = -EOVERFLOW; 1531 } 1532skip_lmm: 1533 if (cmd == IOC_MDC_GETFILEINFO || cmd == LL_IOC_MDC_GETINFO) { 1534 struct lov_user_mds_data *lmdp; 1535 lstat_t st = { 0 }; 1536 1537 st.st_dev = inode->i_sb->s_dev; 1538 st.st_mode = body->mode; 1539 st.st_nlink = body->nlink; 1540 st.st_uid = body->uid; 1541 st.st_gid = body->gid; 1542 st.st_rdev = body->rdev; 1543 st.st_size = body->size; 1544 st.st_blksize = PAGE_CACHE_SIZE; 1545 st.st_blocks = body->blocks; 1546 st.st_atime = body->atime; 1547 st.st_mtime = body->mtime; 1548 st.st_ctime = body->ctime; 1549 st.st_ino = inode->i_ino; 1550 1551 lmdp = (struct lov_user_mds_data *)arg; 1552 if (copy_to_user(&lmdp->lmd_st, &st, sizeof(st))) { 1553 rc = -EFAULT; 1554 goto out_req; 1555 } 1556 } 1557 1558out_req: 1559 ptlrpc_req_finished(request); 1560 if (filename) 1561 ll_putname(filename); 1562 return rc; 1563 } 1564 case IOC_LOV_GETINFO: { 1565 struct lov_user_mds_data *lumd; 1566 struct lov_stripe_md *lsm; 1567 struct lov_user_md *lum; 1568 struct lov_mds_md *lmm; 1569 int lmmsize; 1570 lstat_t st; 1571 1572 lumd = (struct lov_user_mds_data *)arg; 1573 lum = &lumd->lmd_lmm; 1574 1575 rc = ll_get_max_mdsize(sbi, &lmmsize); 1576 if (rc) 1577 return rc; 1578 1579 OBD_ALLOC_LARGE(lmm, lmmsize); 1580 if (lmm == NULL) 1581 return -ENOMEM; 1582 if (copy_from_user(lmm, lum, lmmsize)) { 1583 rc = -EFAULT; 1584 goto free_lmm; 1585 } 1586 1587 switch (lmm->lmm_magic) { 1588 case LOV_USER_MAGIC_V1: 1589 if (LOV_USER_MAGIC_V1 == cpu_to_le32(LOV_USER_MAGIC_V1)) 1590 break; 1591 /* swab objects first so that stripes num will be sane */ 1592 lustre_swab_lov_user_md_objects( 1593 ((struct lov_user_md_v1 *)lmm)->lmm_objects, 1594 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count); 1595 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm); 1596 break; 1597 case LOV_USER_MAGIC_V3: 1598 if (LOV_USER_MAGIC_V3 == cpu_to_le32(LOV_USER_MAGIC_V3)) 1599 break; 1600 /* swab objects first so that stripes num will be sane */ 1601 lustre_swab_lov_user_md_objects( 1602 ((struct lov_user_md_v3 *)lmm)->lmm_objects, 1603 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count); 1604 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm); 1605 break; 1606 default: 1607 rc = -EINVAL; 1608 goto free_lmm; 1609 } 1610 1611 rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize); 1612 if (rc < 0) { 1613 rc = -ENOMEM; 1614 goto free_lmm; 1615 } 1616 1617 /* Perform glimpse_size operation. */ 1618 memset(&st, 0, sizeof(st)); 1619 1620 rc = ll_glimpse_ioctl(sbi, lsm, &st); 1621 if (rc) 1622 goto free_lsm; 1623 1624 if (copy_to_user(&lumd->lmd_st, &st, sizeof(st))) { 1625 rc = -EFAULT; 1626 goto free_lsm; 1627 } 1628 1629free_lsm: 1630 obd_free_memmd(sbi->ll_dt_exp, &lsm); 1631free_lmm: 1632 OBD_FREE_LARGE(lmm, lmmsize); 1633 return rc; 1634 } 1635 case OBD_IOC_LLOG_CATINFO: { 1636 return -EOPNOTSUPP; 1637 } 1638 case OBD_IOC_QUOTACHECK: { 1639 struct obd_quotactl *oqctl; 1640 int error = 0; 1641 1642 if (!capable(CFS_CAP_SYS_ADMIN) || 1643 sbi->ll_flags & LL_SBI_RMT_CLIENT) 1644 return -EPERM; 1645 1646 oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS); 1647 if (!oqctl) 1648 return -ENOMEM; 1649 oqctl->qc_type = arg; 1650 rc = obd_quotacheck(sbi->ll_md_exp, oqctl); 1651 if (rc < 0) { 1652 CDEBUG(D_INFO, "md_quotacheck failed: rc %d\n", rc); 1653 error = rc; 1654 } 1655 1656 rc = obd_quotacheck(sbi->ll_dt_exp, oqctl); 1657 if (rc < 0) 1658 CDEBUG(D_INFO, "obd_quotacheck failed: rc %d\n", rc); 1659 1660 OBD_FREE_PTR(oqctl); 1661 return error ?: rc; 1662 } 1663 case OBD_IOC_POLL_QUOTACHECK: { 1664 struct if_quotacheck *check; 1665 1666 if (!capable(CFS_CAP_SYS_ADMIN) || 1667 sbi->ll_flags & LL_SBI_RMT_CLIENT) 1668 return -EPERM; 1669 1670 check = kzalloc(sizeof(*check), GFP_NOFS); 1671 if (!check) 1672 return -ENOMEM; 1673 1674 rc = obd_iocontrol(cmd, sbi->ll_md_exp, 0, (void *)check, 1675 NULL); 1676 if (rc) { 1677 CDEBUG(D_QUOTA, "mdc ioctl %d failed: %d\n", cmd, rc); 1678 if (copy_to_user((void *)arg, check, 1679 sizeof(*check))) 1680 CDEBUG(D_QUOTA, "copy_to_user failed\n"); 1681 goto out_poll; 1682 } 1683 1684 rc = obd_iocontrol(cmd, sbi->ll_dt_exp, 0, (void *)check, 1685 NULL); 1686 if (rc) { 1687 CDEBUG(D_QUOTA, "osc ioctl %d failed: %d\n", cmd, rc); 1688 if (copy_to_user((void *)arg, check, 1689 sizeof(*check))) 1690 CDEBUG(D_QUOTA, "copy_to_user failed\n"); 1691 goto out_poll; 1692 } 1693out_poll: 1694 OBD_FREE_PTR(check); 1695 return rc; 1696 } 1697#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0) 1698 case LL_IOC_QUOTACTL_18: { 1699 /* copy the old 1.x quota struct for internal use, then copy 1700 * back into old format struct. For 1.8 compatibility. */ 1701 struct if_quotactl_18 *qctl_18; 1702 struct if_quotactl *qctl_20; 1703 1704 qctl_18 = kzalloc(sizeof(*qctl_18), GFP_NOFS); 1705 if (!qctl_18) 1706 return -ENOMEM; 1707 1708 qctl_20 = kzalloc(sizeof(*qctl_20), GFP_NOFS); 1709 if (!qctl_20) { 1710 rc = -ENOMEM; 1711 goto out_quotactl_18; 1712 } 1713 1714 if (copy_from_user(qctl_18, (void *)arg, sizeof(*qctl_18))) { 1715 rc = -ENOMEM; 1716 goto out_quotactl_20; 1717 } 1718 1719 QCTL_COPY(qctl_20, qctl_18); 1720 qctl_20->qc_idx = 0; 1721 1722 /* XXX: dqb_valid was borrowed as a flag to mark that 1723 * only mds quota is wanted */ 1724 if (qctl_18->qc_cmd == Q_GETQUOTA && 1725 qctl_18->qc_dqblk.dqb_valid) { 1726 qctl_20->qc_valid = QC_MDTIDX; 1727 qctl_20->qc_dqblk.dqb_valid = 0; 1728 } else if (qctl_18->obd_uuid.uuid[0] != '\0') { 1729 qctl_20->qc_valid = QC_UUID; 1730 qctl_20->obd_uuid = qctl_18->obd_uuid; 1731 } else { 1732 qctl_20->qc_valid = QC_GENERAL; 1733 } 1734 1735 rc = quotactl_ioctl(sbi, qctl_20); 1736 1737 if (rc == 0) { 1738 QCTL_COPY(qctl_18, qctl_20); 1739 qctl_18->obd_uuid = qctl_20->obd_uuid; 1740 1741 if (copy_to_user((void *)arg, qctl_18, 1742 sizeof(*qctl_18))) 1743 rc = -EFAULT; 1744 } 1745 1746out_quotactl_20: 1747 OBD_FREE_PTR(qctl_20); 1748out_quotactl_18: 1749 OBD_FREE_PTR(qctl_18); 1750 return rc; 1751 } 1752#else 1753#warning "remove old LL_IOC_QUOTACTL_18 compatibility code" 1754#endif /* LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0) */ 1755 case LL_IOC_QUOTACTL: { 1756 struct if_quotactl *qctl; 1757 1758 qctl = kzalloc(sizeof(*qctl), GFP_NOFS); 1759 if (!qctl) 1760 return -ENOMEM; 1761 1762 if (copy_from_user(qctl, (void *)arg, sizeof(*qctl))) { 1763 rc = -EFAULT; 1764 goto out_quotactl; 1765 } 1766 1767 rc = quotactl_ioctl(sbi, qctl); 1768 1769 if (rc == 0 && copy_to_user((void *)arg, qctl, sizeof(*qctl))) 1770 rc = -EFAULT; 1771 1772out_quotactl: 1773 OBD_FREE_PTR(qctl); 1774 return rc; 1775 } 1776 case OBD_IOC_GETDTNAME: 1777 case OBD_IOC_GETMDNAME: 1778 return ll_get_obd_name(inode, cmd, arg); 1779 case LL_IOC_FLUSHCTX: 1780 return ll_flush_ctx(inode); 1781#ifdef CONFIG_FS_POSIX_ACL 1782 case LL_IOC_RMTACL: { 1783 if (sbi->ll_flags & LL_SBI_RMT_CLIENT && 1784 inode == inode->i_sb->s_root->d_inode) { 1785 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 1786 1787 LASSERT(fd != NULL); 1788 rc = rct_add(&sbi->ll_rct, current_pid(), arg); 1789 if (!rc) 1790 fd->fd_flags |= LL_FILE_RMTACL; 1791 return rc; 1792 } else 1793 return 0; 1794 } 1795#endif 1796 case LL_IOC_GETOBDCOUNT: { 1797 int count, vallen; 1798 struct obd_export *exp; 1799 1800 if (copy_from_user(&count, (int *)arg, sizeof(int))) 1801 return -EFAULT; 1802 1803 /* get ost count when count is zero, get mdt count otherwise */ 1804 exp = count ? sbi->ll_md_exp : sbi->ll_dt_exp; 1805 vallen = sizeof(count); 1806 rc = obd_get_info(NULL, exp, sizeof(KEY_TGT_COUNT), 1807 KEY_TGT_COUNT, &vallen, &count, NULL); 1808 if (rc) { 1809 CERROR("get target count failed: %d\n", rc); 1810 return rc; 1811 } 1812 1813 if (copy_to_user((int *)arg, &count, sizeof(int))) 1814 return -EFAULT; 1815 1816 return 0; 1817 } 1818 case LL_IOC_PATH2FID: 1819 if (copy_to_user((void *)arg, ll_inode2fid(inode), 1820 sizeof(struct lu_fid))) 1821 return -EFAULT; 1822 return 0; 1823 case LL_IOC_GET_CONNECT_FLAGS: { 1824 return obd_iocontrol(cmd, sbi->ll_md_exp, 0, NULL, (void *)arg); 1825 } 1826 case OBD_IOC_CHANGELOG_SEND: 1827 case OBD_IOC_CHANGELOG_CLEAR: 1828 rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void *)arg, 1829 sizeof(struct ioc_changelog)); 1830 return rc; 1831 case OBD_IOC_FID2PATH: 1832 return ll_fid2path(inode, (void *)arg); 1833 case LL_IOC_HSM_REQUEST: { 1834 struct hsm_user_request *hur; 1835 ssize_t totalsize; 1836 1837 hur = kzalloc(sizeof(*hur), GFP_NOFS); 1838 if (!hur) 1839 return -ENOMEM; 1840 1841 /* We don't know the true size yet; copy the fixed-size part */ 1842 if (copy_from_user(hur, (void *)arg, sizeof(*hur))) { 1843 OBD_FREE_PTR(hur); 1844 return -EFAULT; 1845 } 1846 1847 /* Compute the whole struct size */ 1848 totalsize = hur_len(hur); 1849 OBD_FREE_PTR(hur); 1850 if (totalsize < 0) 1851 return -E2BIG; 1852 1853 /* Final size will be more than double totalsize */ 1854 if (totalsize >= MDS_MAXREQSIZE / 3) 1855 return -E2BIG; 1856 1857 OBD_ALLOC_LARGE(hur, totalsize); 1858 if (hur == NULL) 1859 return -ENOMEM; 1860 1861 /* Copy the whole struct */ 1862 if (copy_from_user(hur, (void *)arg, totalsize)) { 1863 OBD_FREE_LARGE(hur, totalsize); 1864 return -EFAULT; 1865 } 1866 1867 if (hur->hur_request.hr_action == HUA_RELEASE) { 1868 const struct lu_fid *fid; 1869 struct inode *f; 1870 int i; 1871 1872 for (i = 0; i < hur->hur_request.hr_itemcount; i++) { 1873 fid = &hur->hur_user_item[i].hui_fid; 1874 f = search_inode_for_lustre(inode->i_sb, fid); 1875 if (IS_ERR(f)) { 1876 rc = PTR_ERR(f); 1877 break; 1878 } 1879 1880 rc = ll_hsm_release(f); 1881 iput(f); 1882 if (rc != 0) 1883 break; 1884 } 1885 } else { 1886 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), totalsize, 1887 hur, NULL); 1888 } 1889 1890 OBD_FREE_LARGE(hur, totalsize); 1891 1892 return rc; 1893 } 1894 case LL_IOC_HSM_PROGRESS: { 1895 struct hsm_progress_kernel hpk; 1896 struct hsm_progress hp; 1897 1898 if (copy_from_user(&hp, (void *)arg, sizeof(hp))) 1899 return -EFAULT; 1900 1901 hpk.hpk_fid = hp.hp_fid; 1902 hpk.hpk_cookie = hp.hp_cookie; 1903 hpk.hpk_extent = hp.hp_extent; 1904 hpk.hpk_flags = hp.hp_flags; 1905 hpk.hpk_errval = hp.hp_errval; 1906 hpk.hpk_data_version = 0; 1907 1908 /* File may not exist in Lustre; all progress 1909 * reported to Lustre root */ 1910 rc = obd_iocontrol(cmd, sbi->ll_md_exp, sizeof(hpk), &hpk, 1911 NULL); 1912 return rc; 1913 } 1914 case LL_IOC_HSM_CT_START: 1915 rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void *)arg, 1916 sizeof(struct lustre_kernelcomm)); 1917 return rc; 1918 1919 case LL_IOC_HSM_COPY_START: { 1920 struct hsm_copy *copy; 1921 int rc; 1922 1923 copy = kzalloc(sizeof(*copy), GFP_NOFS); 1924 if (!copy) 1925 return -ENOMEM; 1926 if (copy_from_user(copy, (char *)arg, sizeof(*copy))) { 1927 OBD_FREE_PTR(copy); 1928 return -EFAULT; 1929 } 1930 1931 rc = ll_ioc_copy_start(inode->i_sb, copy); 1932 if (copy_to_user((char *)arg, copy, sizeof(*copy))) 1933 rc = -EFAULT; 1934 1935 OBD_FREE_PTR(copy); 1936 return rc; 1937 } 1938 case LL_IOC_HSM_COPY_END: { 1939 struct hsm_copy *copy; 1940 int rc; 1941 1942 copy = kzalloc(sizeof(*copy), GFP_NOFS); 1943 if (!copy) 1944 return -ENOMEM; 1945 if (copy_from_user(copy, (char *)arg, sizeof(*copy))) { 1946 OBD_FREE_PTR(copy); 1947 return -EFAULT; 1948 } 1949 1950 rc = ll_ioc_copy_end(inode->i_sb, copy); 1951 if (copy_to_user((char *)arg, copy, sizeof(*copy))) 1952 rc = -EFAULT; 1953 1954 OBD_FREE_PTR(copy); 1955 return rc; 1956 } 1957 default: 1958 return obd_iocontrol(cmd, sbi->ll_dt_exp, 0, NULL, (void *)arg); 1959 } 1960} 1961 1962static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) 1963{ 1964 struct inode *inode = file->f_mapping->host; 1965 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 1966 struct ll_sb_info *sbi = ll_i2sbi(inode); 1967 int api32 = ll_need_32bit_api(sbi); 1968 loff_t ret = -EINVAL; 1969 1970 mutex_lock(&inode->i_mutex); 1971 switch (origin) { 1972 case SEEK_SET: 1973 break; 1974 case SEEK_CUR: 1975 offset += file->f_pos; 1976 break; 1977 case SEEK_END: 1978 if (offset > 0) 1979 goto out; 1980 if (api32) 1981 offset += LL_DIR_END_OFF_32BIT; 1982 else 1983 offset += LL_DIR_END_OFF; 1984 break; 1985 default: 1986 goto out; 1987 } 1988 1989 if (offset >= 0 && 1990 ((api32 && offset <= LL_DIR_END_OFF_32BIT) || 1991 (!api32 && offset <= LL_DIR_END_OFF))) { 1992 if (offset != file->f_pos) { 1993 if ((api32 && offset == LL_DIR_END_OFF_32BIT) || 1994 (!api32 && offset == LL_DIR_END_OFF)) 1995 fd->lfd_pos = MDS_DIR_END_OFF; 1996 else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH) 1997 fd->lfd_pos = offset << 32; 1998 else 1999 fd->lfd_pos = offset; 2000 file->f_pos = offset; 2001 file->f_version = 0; 2002 } 2003 ret = offset; 2004 } 2005 goto out; 2006 2007out: 2008 mutex_unlock(&inode->i_mutex); 2009 return ret; 2010} 2011 2012static int ll_dir_open(struct inode *inode, struct file *file) 2013{ 2014 return ll_file_open(inode, file); 2015} 2016 2017static int ll_dir_release(struct inode *inode, struct file *file) 2018{ 2019 return ll_file_release(inode, file); 2020} 2021 2022const struct file_operations ll_dir_operations = { 2023 .llseek = ll_dir_seek, 2024 .open = ll_dir_open, 2025 .release = ll_dir_release, 2026 .read = generic_read_dir, 2027 .iterate = ll_readdir, 2028 .unlocked_ioctl = ll_dir_ioctl, 2029 .fsync = ll_fsync, 2030}; 2031