1/* 2 * GPL HEADER START 3 * 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 only, 8 * as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License version 2 for more details (a copy is included 14 * in the LICENSE file that accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License 17 * version 2 along with this program; If not, see 18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf 19 * 20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 21 * CA 95054 USA or visit www.sun.com if you need additional information or 22 * have any questions. 23 * 24 * GPL HEADER END 25 */ 26/* 27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 28 * Use is subject to license terms. 29 * 30 * Copyright (c) 2011, 2012, Intel Corporation. 31 */ 32/* 33 * This file is part of Lustre, http://www.lustre.org/ 34 * Lustre is a trademark of Sun Microsystems, Inc. 35 * 36 * lustre/llite/file.c 37 * 38 * Author: Peter Braam <braam@clusterfs.com> 39 * Author: Phil Schwan <phil@clusterfs.com> 40 * Author: Andreas Dilger <adilger@clusterfs.com> 41 */ 42 43#define DEBUG_SUBSYSTEM S_LLITE 44#include "../include/lustre_dlm.h" 45#include "../include/lustre_lite.h" 46#include <linux/pagemap.h> 47#include <linux/file.h> 48#include "llite_internal.h" 49#include "../include/lustre/ll_fiemap.h" 50 51#include "../include/cl_object.h" 52 53static int 54ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg); 55 56static int ll_lease_close(struct obd_client_handle *och, struct inode *inode, 57 bool *lease_broken); 58 59static enum llioc_iter 60ll_iocontrol_call(struct inode *inode, struct file *file, 61 unsigned int cmd, unsigned long arg, int *rcp); 62 63static struct ll_file_data *ll_file_data_get(void) 64{ 65 struct ll_file_data *fd; 66 67 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS); 68 if (fd == NULL) 69 return NULL; 70 fd->fd_write_failed = false; 71 return fd; 72} 73 74static void ll_file_data_put(struct ll_file_data *fd) 75{ 76 if (fd != NULL) 77 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab); 78} 79 80void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, 81 struct lustre_handle *fh) 82{ 83 op_data->op_fid1 = ll_i2info(inode)->lli_fid; 84 op_data->op_attr.ia_mode = inode->i_mode; 85 op_data->op_attr.ia_atime = inode->i_atime; 86 op_data->op_attr.ia_mtime = inode->i_mtime; 87 op_data->op_attr.ia_ctime = inode->i_ctime; 88 op_data->op_attr.ia_size = i_size_read(inode); 89 op_data->op_attr_blocks = inode->i_blocks; 90 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = 91 ll_inode_to_ext_flags(inode->i_flags); 92 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch; 93 if (fh) 94 op_data->op_handle = *fh; 95 op_data->op_capa1 = ll_mdscapa_get(inode); 96 97 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags) 98 op_data->op_bias |= MDS_DATA_MODIFIED; 99} 100 101/** 102 * Closes the IO epoch and packs all the attributes into @op_data for 103 * the CLOSE rpc. 104 */ 105static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data, 106 struct obd_client_handle *och) 107{ 108 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET | 109 ATTR_MTIME | ATTR_MTIME_SET | 110 ATTR_CTIME | ATTR_CTIME_SET; 111 112 if (!(och->och_flags & FMODE_WRITE)) 113 goto out; 114 115 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode)) 116 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS; 117 else 118 ll_ioepoch_close(inode, op_data, &och, 0); 119 120out: 121 ll_pack_inode2opdata(inode, op_data, &och->och_fh); 122 ll_prep_md_op_data(op_data, inode, NULL, NULL, 123 0, 0, LUSTRE_OPC_ANY, NULL); 124} 125 126static int ll_close_inode_openhandle(struct obd_export *md_exp, 127 struct inode *inode, 128 struct obd_client_handle *och, 129 const __u64 *data_version) 130{ 131 struct obd_export *exp = ll_i2mdexp(inode); 132 struct md_op_data *op_data; 133 struct ptlrpc_request *req = NULL; 134 struct obd_device *obd = class_exp2obd(exp); 135 int epoch_close = 1; 136 int rc; 137 138 if (obd == NULL) { 139 /* 140 * XXX: in case of LMV, is this correct to access 141 * ->exp_handle? 142 */ 143 CERROR("Invalid MDC connection handle %#llx\n", 144 ll_i2mdexp(inode)->exp_handle.h_cookie); 145 rc = 0; 146 goto out; 147 } 148 149 op_data = kzalloc(sizeof(*op_data), GFP_NOFS); 150 if (!op_data) { 151 /* XXX We leak openhandle and request here. */ 152 rc = -ENOMEM; 153 goto out; 154 } 155 156 ll_prepare_close(inode, op_data, och); 157 if (data_version != NULL) { 158 /* Pass in data_version implies release. */ 159 op_data->op_bias |= MDS_HSM_RELEASE; 160 op_data->op_data_version = *data_version; 161 op_data->op_lease_handle = och->och_lease_handle; 162 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS; 163 } 164 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE); 165 rc = md_close(md_exp, op_data, och->och_mod, &req); 166 if (rc == -EAGAIN) { 167 /* This close must have the epoch closed. */ 168 LASSERT(epoch_close); 169 /* MDS has instructed us to obtain Size-on-MDS attribute from 170 * OSTs and send setattr to back to MDS. */ 171 rc = ll_som_update(inode, op_data); 172 if (rc) { 173 CERROR("inode %lu mdc Size-on-MDS update failed: " 174 "rc = %d\n", inode->i_ino, rc); 175 rc = 0; 176 } 177 } else if (rc) { 178 CERROR("inode %lu mdc close failed: rc = %d\n", 179 inode->i_ino, rc); 180 } 181 182 /* DATA_MODIFIED flag was successfully sent on close, cancel data 183 * modification flag. */ 184 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) { 185 struct ll_inode_info *lli = ll_i2info(inode); 186 187 spin_lock(&lli->lli_lock); 188 lli->lli_flags &= ~LLIF_DATA_MODIFIED; 189 spin_unlock(&lli->lli_lock); 190 } 191 192 if (rc == 0) { 193 rc = ll_objects_destroy(req, inode); 194 if (rc) 195 CERROR("inode %lu ll_objects destroy: rc = %d\n", 196 inode->i_ino, rc); 197 } 198 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) { 199 struct mdt_body *body; 200 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 201 if (!(body->valid & OBD_MD_FLRELEASED)) 202 rc = -EBUSY; 203 } 204 205 ll_finish_md_op_data(op_data); 206 207out: 208 if (exp_connect_som(exp) && !epoch_close && 209 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) { 210 ll_queue_done_writing(inode, LLIF_DONE_WRITING); 211 } else { 212 md_clear_open_replay_data(md_exp, och); 213 /* Free @och if it is not waiting for DONE_WRITING. */ 214 och->och_fh.cookie = DEAD_HANDLE_MAGIC; 215 OBD_FREE_PTR(och); 216 } 217 if (req) /* This is close request */ 218 ptlrpc_req_finished(req); 219 return rc; 220} 221 222int ll_md_real_close(struct inode *inode, fmode_t fmode) 223{ 224 struct ll_inode_info *lli = ll_i2info(inode); 225 struct obd_client_handle **och_p; 226 struct obd_client_handle *och; 227 __u64 *och_usecount; 228 int rc = 0; 229 230 if (fmode & FMODE_WRITE) { 231 och_p = &lli->lli_mds_write_och; 232 och_usecount = &lli->lli_open_fd_write_count; 233 } else if (fmode & FMODE_EXEC) { 234 och_p = &lli->lli_mds_exec_och; 235 och_usecount = &lli->lli_open_fd_exec_count; 236 } else { 237 LASSERT(fmode & FMODE_READ); 238 och_p = &lli->lli_mds_read_och; 239 och_usecount = &lli->lli_open_fd_read_count; 240 } 241 242 mutex_lock(&lli->lli_och_mutex); 243 if (*och_usecount > 0) { 244 /* There are still users of this handle, so skip 245 * freeing it. */ 246 mutex_unlock(&lli->lli_och_mutex); 247 return 0; 248 } 249 250 och=*och_p; 251 *och_p = NULL; 252 mutex_unlock(&lli->lli_och_mutex); 253 254 if (och != NULL) { 255 /* There might be a race and this handle may already 256 be closed. */ 257 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, 258 inode, och, NULL); 259 } 260 261 return rc; 262} 263 264static int ll_md_close(struct obd_export *md_exp, struct inode *inode, 265 struct file *file) 266{ 267 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 268 struct ll_inode_info *lli = ll_i2info(inode); 269 int rc = 0; 270 271 /* clear group lock, if present */ 272 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) 273 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid); 274 275 if (fd->fd_lease_och != NULL) { 276 bool lease_broken; 277 278 /* Usually the lease is not released when the 279 * application crashed, we need to release here. */ 280 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken); 281 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n", 282 PFID(&lli->lli_fid), rc, lease_broken); 283 284 fd->fd_lease_och = NULL; 285 } 286 287 if (fd->fd_och != NULL) { 288 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL); 289 fd->fd_och = NULL; 290 goto out; 291 } 292 293 /* Let's see if we have good enough OPEN lock on the file and if 294 we can skip talking to MDS */ 295 if (file->f_dentry->d_inode) { /* Can this ever be false? */ 296 int lockmode; 297 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK; 298 struct lustre_handle lockh; 299 struct inode *inode = file->f_dentry->d_inode; 300 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}}; 301 302 mutex_lock(&lli->lli_och_mutex); 303 if (fd->fd_omode & FMODE_WRITE) { 304 lockmode = LCK_CW; 305 LASSERT(lli->lli_open_fd_write_count); 306 lli->lli_open_fd_write_count--; 307 } else if (fd->fd_omode & FMODE_EXEC) { 308 lockmode = LCK_PR; 309 LASSERT(lli->lli_open_fd_exec_count); 310 lli->lli_open_fd_exec_count--; 311 } else { 312 lockmode = LCK_CR; 313 LASSERT(lli->lli_open_fd_read_count); 314 lli->lli_open_fd_read_count--; 315 } 316 mutex_unlock(&lli->lli_och_mutex); 317 318 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode), 319 LDLM_IBITS, &policy, lockmode, 320 &lockh)) { 321 rc = ll_md_real_close(file->f_dentry->d_inode, 322 fd->fd_omode); 323 } 324 } else { 325 CERROR("Releasing a file %p with negative dentry %p. Name %s", 326 file, file->f_dentry, file->f_dentry->d_name.name); 327 } 328 329out: 330 LUSTRE_FPRIVATE(file) = NULL; 331 ll_file_data_put(fd); 332 ll_capa_close(inode); 333 334 return rc; 335} 336 337/* While this returns an error code, fput() the caller does not, so we need 338 * to make every effort to clean up all of our state here. Also, applications 339 * rarely check close errors and even if an error is returned they will not 340 * re-try the close call. 341 */ 342int ll_file_release(struct inode *inode, struct file *file) 343{ 344 struct ll_file_data *fd; 345 struct ll_sb_info *sbi = ll_i2sbi(inode); 346 struct ll_inode_info *lli = ll_i2info(inode); 347 int rc; 348 349 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, 350 inode->i_generation, inode); 351 352#ifdef CONFIG_FS_POSIX_ACL 353 if (sbi->ll_flags & LL_SBI_RMT_CLIENT && 354 inode == inode->i_sb->s_root->d_inode) { 355 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 356 357 LASSERT(fd != NULL); 358 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) { 359 fd->fd_flags &= ~LL_FILE_RMTACL; 360 rct_del(&sbi->ll_rct, current_pid()); 361 et_search_free(&sbi->ll_et, current_pid()); 362 } 363 } 364#endif 365 366 if (inode->i_sb->s_root != file->f_dentry) 367 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1); 368 fd = LUSTRE_FPRIVATE(file); 369 LASSERT(fd != NULL); 370 371 /* The last ref on @file, maybe not the the owner pid of statahead. 372 * Different processes can open the same dir, "ll_opendir_key" means: 373 * it is me that should stop the statahead thread. */ 374 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd && 375 lli->lli_opendir_pid != 0) 376 ll_stop_statahead(inode, lli->lli_opendir_key); 377 378 if (inode->i_sb->s_root == file->f_dentry) { 379 LUSTRE_FPRIVATE(file) = NULL; 380 ll_file_data_put(fd); 381 return 0; 382 } 383 384 if (!S_ISDIR(inode->i_mode)) { 385 lov_read_and_clear_async_rc(lli->lli_clob); 386 lli->lli_async_rc = 0; 387 } 388 389 rc = ll_md_close(sbi->ll_md_exp, inode, file); 390 391 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val)) 392 libcfs_debug_dumplog(); 393 394 return rc; 395} 396 397static int ll_intent_file_open(struct file *file, void *lmm, 398 int lmmsize, struct lookup_intent *itp) 399{ 400 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode); 401 struct dentry *parent = file->f_dentry->d_parent; 402 const char *name = file->f_dentry->d_name.name; 403 const int len = file->f_dentry->d_name.len; 404 struct md_op_data *op_data; 405 struct ptlrpc_request *req; 406 __u32 opc = LUSTRE_OPC_ANY; 407 int rc; 408 409 if (!parent) 410 return -ENOENT; 411 412 /* Usually we come here only for NFSD, and we want open lock. 413 But we can also get here with pre 2.6.15 patchless kernels, and in 414 that case that lock is also ok */ 415 /* We can also get here if there was cached open handle in revalidate_it 416 * but it disappeared while we were getting from there to ll_file_open. 417 * But this means this file was closed and immediately opened which 418 * makes a good candidate for using OPEN lock */ 419 /* If lmmsize & lmm are not 0, we are just setting stripe info 420 * parameters. No need for the open lock */ 421 if (lmm == NULL && lmmsize == 0) { 422 itp->it_flags |= MDS_OPEN_LOCK; 423 if (itp->it_flags & FMODE_WRITE) 424 opc = LUSTRE_OPC_CREATE; 425 } 426 427 op_data = ll_prep_md_op_data(NULL, parent->d_inode, 428 file->f_dentry->d_inode, name, len, 429 O_RDWR, opc, NULL); 430 if (IS_ERR(op_data)) 431 return PTR_ERR(op_data); 432 433 itp->it_flags |= MDS_OPEN_BY_FID; 434 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp, 435 0 /*unused */, &req, ll_md_blocking_ast, 0); 436 ll_finish_md_op_data(op_data); 437 if (rc == -ESTALE) { 438 /* reason for keep own exit path - don`t flood log 439 * with messages with -ESTALE errors. 440 */ 441 if (!it_disposition(itp, DISP_OPEN_OPEN) || 442 it_open_error(DISP_OPEN_OPEN, itp)) 443 goto out; 444 ll_release_openhandle(file->f_dentry, itp); 445 goto out; 446 } 447 448 if (it_disposition(itp, DISP_LOOKUP_NEG)) { 449 rc = -ENOENT; 450 goto out; 451 } 452 453 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) { 454 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp); 455 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc); 456 goto out; 457 } 458 459 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp); 460 if (!rc && itp->d.lustre.it_lock_mode) 461 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode, 462 itp, NULL); 463 464out: 465 ptlrpc_req_finished(req); 466 ll_intent_drop_lock(itp); 467 468 return rc; 469} 470 471/** 472 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does 473 * not believe attributes if a few ioepoch holders exist. Attributes for 474 * previous ioepoch if new one is opened are also skipped by MDS. 475 */ 476void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch) 477{ 478 if (ioepoch && lli->lli_ioepoch != ioepoch) { 479 lli->lli_ioepoch = ioepoch; 480 CDEBUG(D_INODE, "Epoch %llu opened on "DFID"\n", 481 ioepoch, PFID(&lli->lli_fid)); 482 } 483} 484 485static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it, 486 struct obd_client_handle *och) 487{ 488 struct ptlrpc_request *req = it->d.lustre.it_data; 489 struct mdt_body *body; 490 491 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 492 och->och_fh = body->handle; 493 och->och_fid = body->fid1; 494 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle; 495 och->och_magic = OBD_CLIENT_HANDLE_MAGIC; 496 och->och_flags = it->it_flags; 497 498 return md_set_open_replay_data(md_exp, och, it); 499} 500 501static int ll_local_open(struct file *file, struct lookup_intent *it, 502 struct ll_file_data *fd, struct obd_client_handle *och) 503{ 504 struct inode *inode = file->f_dentry->d_inode; 505 struct ll_inode_info *lli = ll_i2info(inode); 506 507 LASSERT(!LUSTRE_FPRIVATE(file)); 508 509 LASSERT(fd != NULL); 510 511 if (och) { 512 struct ptlrpc_request *req = it->d.lustre.it_data; 513 struct mdt_body *body; 514 int rc; 515 516 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och); 517 if (rc != 0) 518 return rc; 519 520 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 521 ll_ioepoch_open(lli, body->ioepoch); 522 } 523 524 LUSTRE_FPRIVATE(file) = fd; 525 ll_readahead_init(inode, &fd->fd_ras); 526 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); 527 return 0; 528} 529 530/* Open a file, and (for the very first open) create objects on the OSTs at 531 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object 532 * creation or open until ll_lov_setstripe() ioctl is called. 533 * 534 * If we already have the stripe MD locally then we don't request it in 535 * md_open(), by passing a lmm_size = 0. 536 * 537 * It is up to the application to ensure no other processes open this file 538 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be 539 * used. We might be able to avoid races of that sort by getting lli_open_sem 540 * before returning in the O_LOV_DELAY_CREATE case and dropping it here 541 * or in ll_file_release(), but I'm not sure that is desirable/necessary. 542 */ 543int ll_file_open(struct inode *inode, struct file *file) 544{ 545 struct ll_inode_info *lli = ll_i2info(inode); 546 struct lookup_intent *it, oit = { .it_op = IT_OPEN, 547 .it_flags = file->f_flags }; 548 struct obd_client_handle **och_p = NULL; 549 __u64 *och_usecount = NULL; 550 struct ll_file_data *fd; 551 int rc = 0, opendir_set = 0; 552 553 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino, 554 inode->i_generation, inode, file->f_flags); 555 556 it = file->private_data; /* XXX: compat macro */ 557 file->private_data = NULL; /* prevent ll_local_open assertion */ 558 559 fd = ll_file_data_get(); 560 if (fd == NULL) { 561 rc = -ENOMEM; 562 goto out_openerr; 563 } 564 565 fd->fd_file = file; 566 if (S_ISDIR(inode->i_mode)) { 567 spin_lock(&lli->lli_sa_lock); 568 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL && 569 lli->lli_opendir_pid == 0) { 570 lli->lli_opendir_key = fd; 571 lli->lli_opendir_pid = current_pid(); 572 opendir_set = 1; 573 } 574 spin_unlock(&lli->lli_sa_lock); 575 } 576 577 if (inode->i_sb->s_root == file->f_dentry) { 578 LUSTRE_FPRIVATE(file) = fd; 579 return 0; 580 } 581 582 if (!it || !it->d.lustre.it_disposition) { 583 /* Convert f_flags into access mode. We cannot use file->f_mode, 584 * because everything but O_ACCMODE mask was stripped from 585 * there */ 586 if ((oit.it_flags + 1) & O_ACCMODE) 587 oit.it_flags++; 588 if (file->f_flags & O_TRUNC) 589 oit.it_flags |= FMODE_WRITE; 590 591 /* kernel only call f_op->open in dentry_open. filp_open calls 592 * dentry_open after call to open_namei that checks permissions. 593 * Only nfsd_open call dentry_open directly without checking 594 * permissions and because of that this code below is safe. */ 595 if (oit.it_flags & (FMODE_WRITE | FMODE_READ)) 596 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE; 597 598 /* We do not want O_EXCL here, presumably we opened the file 599 * already? XXX - NFS implications? */ 600 oit.it_flags &= ~O_EXCL; 601 602 /* bug20584, if "it_flags" contains O_CREAT, the file will be 603 * created if necessary, then "IT_CREAT" should be set to keep 604 * consistent with it */ 605 if (oit.it_flags & O_CREAT) 606 oit.it_op |= IT_CREAT; 607 608 it = &oit; 609 } 610 611restart: 612 /* Let's see if we have file open on MDS already. */ 613 if (it->it_flags & FMODE_WRITE) { 614 och_p = &lli->lli_mds_write_och; 615 och_usecount = &lli->lli_open_fd_write_count; 616 } else if (it->it_flags & FMODE_EXEC) { 617 och_p = &lli->lli_mds_exec_och; 618 och_usecount = &lli->lli_open_fd_exec_count; 619 } else { 620 och_p = &lli->lli_mds_read_och; 621 och_usecount = &lli->lli_open_fd_read_count; 622 } 623 624 mutex_lock(&lli->lli_och_mutex); 625 if (*och_p) { /* Open handle is present */ 626 if (it_disposition(it, DISP_OPEN_OPEN)) { 627 /* Well, there's extra open request that we do not need, 628 let's close it somehow. This will decref request. */ 629 rc = it_open_error(DISP_OPEN_OPEN, it); 630 if (rc) { 631 mutex_unlock(&lli->lli_och_mutex); 632 goto out_openerr; 633 } 634 635 ll_release_openhandle(file->f_dentry, it); 636 } 637 (*och_usecount)++; 638 639 rc = ll_local_open(file, it, fd, NULL); 640 if (rc) { 641 (*och_usecount)--; 642 mutex_unlock(&lli->lli_och_mutex); 643 goto out_openerr; 644 } 645 } else { 646 LASSERT(*och_usecount == 0); 647 if (!it->d.lustre.it_disposition) { 648 /* We cannot just request lock handle now, new ELC code 649 means that one of other OPEN locks for this file 650 could be cancelled, and since blocking ast handler 651 would attempt to grab och_mutex as well, that would 652 result in a deadlock */ 653 mutex_unlock(&lli->lli_och_mutex); 654 it->it_create_mode |= M_CHECK_STALE; 655 rc = ll_intent_file_open(file, NULL, 0, it); 656 it->it_create_mode &= ~M_CHECK_STALE; 657 if (rc) 658 goto out_openerr; 659 660 goto restart; 661 } 662 *och_p = kzalloc(sizeof(struct obd_client_handle), GFP_NOFS); 663 if (!*och_p) { 664 rc = -ENOMEM; 665 goto out_och_free; 666 } 667 668 (*och_usecount)++; 669 670 /* md_intent_lock() didn't get a request ref if there was an 671 * open error, so don't do cleanup on the request here 672 * (bug 3430) */ 673 /* XXX (green): Should not we bail out on any error here, not 674 * just open error? */ 675 rc = it_open_error(DISP_OPEN_OPEN, it); 676 if (rc) 677 goto out_och_free; 678 679 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF)); 680 681 rc = ll_local_open(file, it, fd, *och_p); 682 if (rc) 683 goto out_och_free; 684 } 685 mutex_unlock(&lli->lli_och_mutex); 686 fd = NULL; 687 688 /* Must do this outside lli_och_mutex lock to prevent deadlock where 689 different kind of OPEN lock for this same inode gets cancelled 690 by ldlm_cancel_lru */ 691 if (!S_ISREG(inode->i_mode)) 692 goto out_och_free; 693 694 ll_capa_open(inode); 695 696 if (!lli->lli_has_smd && 697 (cl_is_lov_delay_create(file->f_flags) || 698 (file->f_mode & FMODE_WRITE) == 0)) { 699 CDEBUG(D_INODE, "object creation was delayed\n"); 700 goto out_och_free; 701 } 702 cl_lov_delay_create_clear(&file->f_flags); 703 goto out_och_free; 704 705out_och_free: 706 if (rc) { 707 if (och_p && *och_p) { 708 OBD_FREE(*och_p, sizeof (struct obd_client_handle)); 709 *och_p = NULL; /* OBD_FREE writes some magic there */ 710 (*och_usecount)--; 711 } 712 mutex_unlock(&lli->lli_och_mutex); 713 714out_openerr: 715 if (opendir_set != 0) 716 ll_stop_statahead(inode, lli->lli_opendir_key); 717 if (fd != NULL) 718 ll_file_data_put(fd); 719 } else { 720 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1); 721 } 722 723 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) { 724 ptlrpc_req_finished(it->d.lustre.it_data); 725 it_clear_disposition(it, DISP_ENQ_OPEN_REF); 726 } 727 728 return rc; 729} 730 731static int ll_md_blocking_lease_ast(struct ldlm_lock *lock, 732 struct ldlm_lock_desc *desc, void *data, int flag) 733{ 734 int rc; 735 struct lustre_handle lockh; 736 737 switch (flag) { 738 case LDLM_CB_BLOCKING: 739 ldlm_lock2handle(lock, &lockh); 740 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC); 741 if (rc < 0) { 742 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc); 743 return rc; 744 } 745 break; 746 case LDLM_CB_CANCELING: 747 /* do nothing */ 748 break; 749 } 750 return 0; 751} 752 753/** 754 * Acquire a lease and open the file. 755 */ 756static struct obd_client_handle * 757ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode, 758 __u64 open_flags) 759{ 760 struct lookup_intent it = { .it_op = IT_OPEN }; 761 struct ll_sb_info *sbi = ll_i2sbi(inode); 762 struct md_op_data *op_data; 763 struct ptlrpc_request *req; 764 struct lustre_handle old_handle = { 0 }; 765 struct obd_client_handle *och = NULL; 766 int rc; 767 int rc2; 768 769 if (fmode != FMODE_WRITE && fmode != FMODE_READ) 770 return ERR_PTR(-EINVAL); 771 772 if (file != NULL) { 773 struct ll_inode_info *lli = ll_i2info(inode); 774 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 775 struct obd_client_handle **och_p; 776 __u64 *och_usecount; 777 778 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC)) 779 return ERR_PTR(-EPERM); 780 781 /* Get the openhandle of the file */ 782 rc = -EBUSY; 783 mutex_lock(&lli->lli_och_mutex); 784 if (fd->fd_lease_och != NULL) { 785 mutex_unlock(&lli->lli_och_mutex); 786 return ERR_PTR(rc); 787 } 788 789 if (fd->fd_och == NULL) { 790 if (file->f_mode & FMODE_WRITE) { 791 LASSERT(lli->lli_mds_write_och != NULL); 792 och_p = &lli->lli_mds_write_och; 793 och_usecount = &lli->lli_open_fd_write_count; 794 } else { 795 LASSERT(lli->lli_mds_read_och != NULL); 796 och_p = &lli->lli_mds_read_och; 797 och_usecount = &lli->lli_open_fd_read_count; 798 } 799 if (*och_usecount == 1) { 800 fd->fd_och = *och_p; 801 *och_p = NULL; 802 *och_usecount = 0; 803 rc = 0; 804 } 805 } 806 mutex_unlock(&lli->lli_och_mutex); 807 if (rc < 0) /* more than 1 opener */ 808 return ERR_PTR(rc); 809 810 LASSERT(fd->fd_och != NULL); 811 old_handle = fd->fd_och->och_fh; 812 } 813 814 och = kzalloc(sizeof(*och), GFP_NOFS); 815 if (!och) 816 return ERR_PTR(-ENOMEM); 817 818 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0, 819 LUSTRE_OPC_ANY, NULL); 820 if (IS_ERR(op_data)) { 821 rc = PTR_ERR(op_data); 822 goto out; 823 } 824 825 /* To tell the MDT this openhandle is from the same owner */ 826 op_data->op_handle = old_handle; 827 828 it.it_flags = fmode | open_flags; 829 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE; 830 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req, 831 ll_md_blocking_lease_ast, 832 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise 833 * it can be cancelled which may mislead applications that the lease is 834 * broken; 835 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal 836 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast 837 * doesn't deal with openhandle, so normal openhandle will be leaked. */ 838 LDLM_FL_NO_LRU | LDLM_FL_EXCL); 839 ll_finish_md_op_data(op_data); 840 ptlrpc_req_finished(req); 841 if (rc < 0) 842 goto out_release_it; 843 844 if (it_disposition(&it, DISP_LOOKUP_NEG)) { 845 rc = -ENOENT; 846 goto out_release_it; 847 } 848 849 rc = it_open_error(DISP_OPEN_OPEN, &it); 850 if (rc) 851 goto out_release_it; 852 853 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF)); 854 ll_och_fill(sbi->ll_md_exp, &it, och); 855 856 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */ { 857 rc = -EOPNOTSUPP; 858 goto out_close; 859 } 860 861 /* already get lease, handle lease lock */ 862 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL); 863 if (it.d.lustre.it_lock_mode == 0 || 864 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) { 865 /* open lock must return for lease */ 866 CERROR(DFID "lease granted but no open lock, %d/%llu.\n", 867 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode, 868 it.d.lustre.it_lock_bits); 869 rc = -EPROTO; 870 goto out_close; 871 } 872 873 ll_intent_release(&it); 874 return och; 875 876out_close: 877 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL); 878 if (rc2) 879 CERROR("Close openhandle returned %d\n", rc2); 880 881 /* cancel open lock */ 882 if (it.d.lustre.it_lock_mode != 0) { 883 ldlm_lock_decref_and_cancel(&och->och_lease_handle, 884 it.d.lustre.it_lock_mode); 885 it.d.lustre.it_lock_mode = 0; 886 } 887out_release_it: 888 ll_intent_release(&it); 889out: 890 OBD_FREE_PTR(och); 891 return ERR_PTR(rc); 892} 893 894/** 895 * Release lease and close the file. 896 * It will check if the lease has ever broken. 897 */ 898static int ll_lease_close(struct obd_client_handle *och, struct inode *inode, 899 bool *lease_broken) 900{ 901 struct ldlm_lock *lock; 902 bool cancelled = true; 903 int rc; 904 905 lock = ldlm_handle2lock(&och->och_lease_handle); 906 if (lock != NULL) { 907 lock_res_and_lock(lock); 908 cancelled = ldlm_is_cancel(lock); 909 unlock_res_and_lock(lock); 910 ldlm_lock_put(lock); 911 } 912 913 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n", 914 PFID(&ll_i2info(inode)->lli_fid), cancelled); 915 916 if (!cancelled) 917 ldlm_cli_cancel(&och->och_lease_handle, 0); 918 if (lease_broken != NULL) 919 *lease_broken = cancelled; 920 921 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och, 922 NULL); 923 return rc; 924} 925 926/* Fills the obdo with the attributes for the lsm */ 927static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, 928 struct obd_capa *capa, struct obdo *obdo, 929 __u64 ioepoch, int sync) 930{ 931 struct ptlrpc_request_set *set; 932 struct obd_info oinfo = { { { 0 } } }; 933 int rc; 934 935 LASSERT(lsm != NULL); 936 937 oinfo.oi_md = lsm; 938 oinfo.oi_oa = obdo; 939 oinfo.oi_oa->o_oi = lsm->lsm_oi; 940 oinfo.oi_oa->o_mode = S_IFREG; 941 oinfo.oi_oa->o_ioepoch = ioepoch; 942 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | 943 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | 944 OBD_MD_FLBLKSZ | OBD_MD_FLATIME | 945 OBD_MD_FLMTIME | OBD_MD_FLCTIME | 946 OBD_MD_FLGROUP | OBD_MD_FLEPOCH | 947 OBD_MD_FLDATAVERSION; 948 oinfo.oi_capa = capa; 949 if (sync) { 950 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS; 951 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK; 952 } 953 954 set = ptlrpc_prep_set(); 955 if (set == NULL) { 956 CERROR("can't allocate ptlrpc set\n"); 957 rc = -ENOMEM; 958 } else { 959 rc = obd_getattr_async(exp, &oinfo, set); 960 if (rc == 0) 961 rc = ptlrpc_set_wait(set); 962 ptlrpc_set_destroy(set); 963 } 964 if (rc == 0) 965 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | 966 OBD_MD_FLATIME | OBD_MD_FLMTIME | 967 OBD_MD_FLCTIME | OBD_MD_FLSIZE | 968 OBD_MD_FLDATAVERSION); 969 return rc; 970} 971 972/** 973 * Performs the getattr on the inode and updates its fields. 974 * If @sync != 0, perform the getattr under the server-side lock. 975 */ 976int ll_inode_getattr(struct inode *inode, struct obdo *obdo, 977 __u64 ioepoch, int sync) 978{ 979 struct obd_capa *capa = ll_mdscapa_get(inode); 980 struct lov_stripe_md *lsm; 981 int rc; 982 983 lsm = ccc_inode_lsm_get(inode); 984 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode), 985 capa, obdo, ioepoch, sync); 986 capa_put(capa); 987 if (rc == 0) { 988 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi; 989 990 obdo_refresh_inode(inode, obdo, obdo->o_valid); 991 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu," 992 " blksize %lu\n", POSTID(oi), i_size_read(inode), 993 (unsigned long long)inode->i_blocks, 994 1UL << inode->i_blkbits); 995 } 996 ccc_inode_lsm_put(inode, lsm); 997 return rc; 998} 999 1000int ll_merge_lvb(const struct lu_env *env, struct inode *inode) 1001{ 1002 struct ll_inode_info *lli = ll_i2info(inode); 1003 struct cl_object *obj = lli->lli_clob; 1004 struct cl_attr *attr = ccc_env_thread_attr(env); 1005 struct ost_lvb lvb; 1006 int rc = 0; 1007 1008 ll_inode_size_lock(inode); 1009 /* merge timestamps the most recently obtained from mds with 1010 timestamps obtained from osts */ 1011 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime; 1012 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime; 1013 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime; 1014 1015 lvb.lvb_size = i_size_read(inode); 1016 lvb.lvb_blocks = inode->i_blocks; 1017 lvb.lvb_mtime = LTIME_S(inode->i_mtime); 1018 lvb.lvb_atime = LTIME_S(inode->i_atime); 1019 lvb.lvb_ctime = LTIME_S(inode->i_ctime); 1020 1021 cl_object_attr_lock(obj); 1022 rc = cl_object_attr_get(env, obj, attr); 1023 cl_object_attr_unlock(obj); 1024 1025 if (rc == 0) { 1026 if (lvb.lvb_atime < attr->cat_atime) 1027 lvb.lvb_atime = attr->cat_atime; 1028 if (lvb.lvb_ctime < attr->cat_ctime) 1029 lvb.lvb_ctime = attr->cat_ctime; 1030 if (lvb.lvb_mtime < attr->cat_mtime) 1031 lvb.lvb_mtime = attr->cat_mtime; 1032 1033 CDEBUG(D_VFSTRACE, DFID" updating i_size %llu\n", 1034 PFID(&lli->lli_fid), attr->cat_size); 1035 cl_isize_write_nolock(inode, attr->cat_size); 1036 1037 inode->i_blocks = attr->cat_blocks; 1038 1039 LTIME_S(inode->i_mtime) = lvb.lvb_mtime; 1040 LTIME_S(inode->i_atime) = lvb.lvb_atime; 1041 LTIME_S(inode->i_ctime) = lvb.lvb_ctime; 1042 } 1043 ll_inode_size_unlock(inode); 1044 1045 return rc; 1046} 1047 1048int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm, 1049 lstat_t *st) 1050{ 1051 struct obdo obdo = { 0 }; 1052 int rc; 1053 1054 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0); 1055 if (rc == 0) { 1056 st->st_size = obdo.o_size; 1057 st->st_blocks = obdo.o_blocks; 1058 st->st_mtime = obdo.o_mtime; 1059 st->st_atime = obdo.o_atime; 1060 st->st_ctime = obdo.o_ctime; 1061 } 1062 return rc; 1063} 1064 1065static bool file_is_noatime(const struct file *file) 1066{ 1067 const struct vfsmount *mnt = file->f_path.mnt; 1068 const struct inode *inode = file->f_path.dentry->d_inode; 1069 1070 /* Adapted from file_accessed() and touch_atime().*/ 1071 if (file->f_flags & O_NOATIME) 1072 return true; 1073 1074 if (inode->i_flags & S_NOATIME) 1075 return true; 1076 1077 if (IS_NOATIME(inode)) 1078 return true; 1079 1080 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY)) 1081 return true; 1082 1083 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 1084 return true; 1085 1086 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 1087 return true; 1088 1089 return false; 1090} 1091 1092void ll_io_init(struct cl_io *io, const struct file *file, int write) 1093{ 1094 struct inode *inode = file->f_dentry->d_inode; 1095 1096 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK; 1097 if (write) { 1098 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND); 1099 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC || 1100 file->f_flags & O_DIRECT || 1101 IS_SYNC(inode); 1102 } 1103 io->ci_obj = ll_i2info(inode)->lli_clob; 1104 io->ci_lockreq = CILR_MAYBE; 1105 if (ll_file_nolock(file)) { 1106 io->ci_lockreq = CILR_NEVER; 1107 io->ci_no_srvlock = 1; 1108 } else if (file->f_flags & O_APPEND) { 1109 io->ci_lockreq = CILR_MANDATORY; 1110 } 1111 1112 io->ci_noatime = file_is_noatime(file); 1113} 1114 1115static ssize_t 1116ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args, 1117 struct file *file, enum cl_io_type iot, 1118 loff_t *ppos, size_t count) 1119{ 1120 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode); 1121 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 1122 struct cl_io *io; 1123 ssize_t result; 1124 1125restart: 1126 io = ccc_env_thread_io(env); 1127 ll_io_init(io, file, iot == CIT_WRITE); 1128 1129 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) { 1130 struct vvp_io *vio = vvp_env_io(env); 1131 struct ccc_io *cio = ccc_env_io(env); 1132 int write_mutex_locked = 0; 1133 1134 cio->cui_fd = LUSTRE_FPRIVATE(file); 1135 vio->cui_io_subtype = args->via_io_subtype; 1136 1137 switch (vio->cui_io_subtype) { 1138 case IO_NORMAL: 1139 cio->cui_iter = args->u.normal.via_iter; 1140 cio->cui_iocb = args->u.normal.via_iocb; 1141 if ((iot == CIT_WRITE) && 1142 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { 1143 if (mutex_lock_interruptible(&lli-> 1144 lli_write_mutex)) { 1145 result = -ERESTARTSYS; 1146 goto out; 1147 } 1148 write_mutex_locked = 1; 1149 } else if (iot == CIT_READ) { 1150 down_read(&lli->lli_trunc_sem); 1151 } 1152 break; 1153 case IO_SPLICE: 1154 vio->u.splice.cui_pipe = args->u.splice.via_pipe; 1155 vio->u.splice.cui_flags = args->u.splice.via_flags; 1156 break; 1157 default: 1158 CERROR("Unknown IO type - %u\n", vio->cui_io_subtype); 1159 LBUG(); 1160 } 1161 result = cl_io_loop(env, io); 1162 if (write_mutex_locked) 1163 mutex_unlock(&lli->lli_write_mutex); 1164 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ) 1165 up_read(&lli->lli_trunc_sem); 1166 } else { 1167 /* cl_io_rw_init() handled IO */ 1168 result = io->ci_result; 1169 } 1170 1171 if (io->ci_nob > 0) { 1172 result = io->ci_nob; 1173 *ppos = io->u.ci_wr.wr.crw_pos; 1174 } 1175 goto out; 1176out: 1177 cl_io_fini(env, io); 1178 /* If any bit been read/written (result != 0), we just return 1179 * short read/write instead of restart io. */ 1180 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) { 1181 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n", 1182 iot == CIT_READ ? "read" : "write", 1183 file->f_dentry->d_name.name, *ppos, count); 1184 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob); 1185 goto restart; 1186 } 1187 1188 if (iot == CIT_READ) { 1189 if (result >= 0) 1190 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode), 1191 LPROC_LL_READ_BYTES, result); 1192 } else if (iot == CIT_WRITE) { 1193 if (result >= 0) { 1194 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode), 1195 LPROC_LL_WRITE_BYTES, result); 1196 fd->fd_write_failed = false; 1197 } else if (result != -ERESTARTSYS) { 1198 fd->fd_write_failed = true; 1199 } 1200 } 1201 1202 return result; 1203} 1204 1205static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 1206{ 1207 struct lu_env *env; 1208 struct vvp_io_args *args; 1209 ssize_t result; 1210 int refcheck; 1211 1212 env = cl_env_get(&refcheck); 1213 if (IS_ERR(env)) 1214 return PTR_ERR(env); 1215 1216 args = vvp_env_args(env, IO_NORMAL); 1217 args->u.normal.via_iter = to; 1218 args->u.normal.via_iocb = iocb; 1219 1220 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ, 1221 &iocb->ki_pos, iov_iter_count(to)); 1222 cl_env_put(env, &refcheck); 1223 return result; 1224} 1225 1226/* 1227 * Write to a file (through the page cache). 1228 */ 1229static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 1230{ 1231 struct lu_env *env; 1232 struct vvp_io_args *args; 1233 ssize_t result; 1234 int refcheck; 1235 1236 env = cl_env_get(&refcheck); 1237 if (IS_ERR(env)) 1238 return PTR_ERR(env); 1239 1240 args = vvp_env_args(env, IO_NORMAL); 1241 args->u.normal.via_iter = from; 1242 args->u.normal.via_iocb = iocb; 1243 1244 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE, 1245 &iocb->ki_pos, iov_iter_count(from)); 1246 cl_env_put(env, &refcheck); 1247 return result; 1248} 1249 1250/* 1251 * Send file content (through pagecache) somewhere with helper 1252 */ 1253static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos, 1254 struct pipe_inode_info *pipe, size_t count, 1255 unsigned int flags) 1256{ 1257 struct lu_env *env; 1258 struct vvp_io_args *args; 1259 ssize_t result; 1260 int refcheck; 1261 1262 env = cl_env_get(&refcheck); 1263 if (IS_ERR(env)) 1264 return PTR_ERR(env); 1265 1266 args = vvp_env_args(env, IO_SPLICE); 1267 args->u.splice.via_pipe = pipe; 1268 args->u.splice.via_flags = flags; 1269 1270 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count); 1271 cl_env_put(env, &refcheck); 1272 return result; 1273} 1274 1275static int ll_lov_recreate(struct inode *inode, struct ost_id *oi, u32 ost_idx) 1276{ 1277 struct obd_export *exp = ll_i2dtexp(inode); 1278 struct obd_trans_info oti = { 0 }; 1279 struct obdo *oa = NULL; 1280 int lsm_size; 1281 int rc = 0; 1282 struct lov_stripe_md *lsm = NULL, *lsm2; 1283 1284 OBDO_ALLOC(oa); 1285 if (oa == NULL) 1286 return -ENOMEM; 1287 1288 lsm = ccc_inode_lsm_get(inode); 1289 if (!lsm_has_objects(lsm)) { 1290 rc = -ENOENT; 1291 goto out; 1292 } 1293 1294 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) * 1295 (lsm->lsm_stripe_count)); 1296 1297 OBD_ALLOC_LARGE(lsm2, lsm_size); 1298 if (lsm2 == NULL) { 1299 rc = -ENOMEM; 1300 goto out; 1301 } 1302 1303 oa->o_oi = *oi; 1304 oa->o_nlink = ost_idx; 1305 oa->o_flags |= OBD_FL_RECREATE_OBJS; 1306 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP; 1307 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME | 1308 OBD_MD_FLMTIME | OBD_MD_FLCTIME); 1309 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid); 1310 memcpy(lsm2, lsm, lsm_size); 1311 ll_inode_size_lock(inode); 1312 rc = obd_create(NULL, exp, oa, &lsm2, &oti); 1313 ll_inode_size_unlock(inode); 1314 1315 OBD_FREE_LARGE(lsm2, lsm_size); 1316 goto out; 1317out: 1318 ccc_inode_lsm_put(inode, lsm); 1319 OBDO_FREE(oa); 1320 return rc; 1321} 1322 1323static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg) 1324{ 1325 struct ll_recreate_obj ucreat; 1326 struct ost_id oi; 1327 1328 if (!capable(CFS_CAP_SYS_ADMIN)) 1329 return -EPERM; 1330 1331 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg, 1332 sizeof(ucreat))) 1333 return -EFAULT; 1334 1335 ostid_set_seq_mdt0(&oi); 1336 ostid_set_id(&oi, ucreat.lrc_id); 1337 return ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx); 1338} 1339 1340static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg) 1341{ 1342 struct lu_fid fid; 1343 struct ost_id oi; 1344 u32 ost_idx; 1345 1346 if (!capable(CFS_CAP_SYS_ADMIN)) 1347 return -EPERM; 1348 1349 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid))) 1350 return -EFAULT; 1351 1352 fid_to_ostid(&fid, &oi); 1353 ost_idx = (fid_seq(&fid) >> 16) & 0xffff; 1354 return ll_lov_recreate(inode, &oi, ost_idx); 1355} 1356 1357int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file, 1358 int flags, struct lov_user_md *lum, int lum_size) 1359{ 1360 struct lov_stripe_md *lsm = NULL; 1361 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags}; 1362 int rc = 0; 1363 1364 lsm = ccc_inode_lsm_get(inode); 1365 if (lsm != NULL) { 1366 ccc_inode_lsm_put(inode, lsm); 1367 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n", 1368 inode->i_ino); 1369 rc = -EEXIST; 1370 goto out; 1371 } 1372 1373 ll_inode_size_lock(inode); 1374 rc = ll_intent_file_open(file, lum, lum_size, &oit); 1375 if (rc) 1376 goto out_unlock; 1377 rc = oit.d.lustre.it_status; 1378 if (rc < 0) 1379 goto out_req_free; 1380 1381 ll_release_openhandle(file->f_dentry, &oit); 1382 1383out_unlock: 1384 ll_inode_size_unlock(inode); 1385 ll_intent_release(&oit); 1386 ccc_inode_lsm_put(inode, lsm); 1387out: 1388 cl_lov_delay_create_clear(&file->f_flags); 1389 return rc; 1390out_req_free: 1391 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data); 1392 goto out; 1393} 1394 1395int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, 1396 struct lov_mds_md **lmmp, int *lmm_size, 1397 struct ptlrpc_request **request) 1398{ 1399 struct ll_sb_info *sbi = ll_i2sbi(inode); 1400 struct mdt_body *body; 1401 struct lov_mds_md *lmm = NULL; 1402 struct ptlrpc_request *req = NULL; 1403 struct md_op_data *op_data; 1404 int rc, lmmsize; 1405 1406 rc = ll_get_default_mdsize(sbi, &lmmsize); 1407 if (rc) 1408 return rc; 1409 1410 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename, 1411 strlen(filename), lmmsize, 1412 LUSTRE_OPC_ANY, NULL); 1413 if (IS_ERR(op_data)) 1414 return PTR_ERR(op_data); 1415 1416 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA; 1417 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req); 1418 ll_finish_md_op_data(op_data); 1419 if (rc < 0) { 1420 CDEBUG(D_INFO, "md_getattr_name failed " 1421 "on %s: rc %d\n", filename, rc); 1422 goto out; 1423 } 1424 1425 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 1426 LASSERT(body != NULL); /* checked by mdc_getattr_name */ 1427 1428 lmmsize = body->eadatasize; 1429 1430 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) || 1431 lmmsize == 0) { 1432 rc = -ENODATA; 1433 goto out; 1434 } 1435 1436 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize); 1437 LASSERT(lmm != NULL); 1438 1439 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) && 1440 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) { 1441 rc = -EPROTO; 1442 goto out; 1443 } 1444 1445 /* 1446 * This is coming from the MDS, so is probably in 1447 * little endian. We convert it to host endian before 1448 * passing it to userspace. 1449 */ 1450 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) { 1451 int stripe_count; 1452 1453 stripe_count = le16_to_cpu(lmm->lmm_stripe_count); 1454 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED) 1455 stripe_count = 0; 1456 1457 /* if function called for directory - we should 1458 * avoid swab not existent lsm objects */ 1459 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) { 1460 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm); 1461 if (S_ISREG(body->mode)) 1462 lustre_swab_lov_user_md_objects( 1463 ((struct lov_user_md_v1 *)lmm)->lmm_objects, 1464 stripe_count); 1465 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) { 1466 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm); 1467 if (S_ISREG(body->mode)) 1468 lustre_swab_lov_user_md_objects( 1469 ((struct lov_user_md_v3 *)lmm)->lmm_objects, 1470 stripe_count); 1471 } 1472 } 1473 1474out: 1475 *lmmp = lmm; 1476 *lmm_size = lmmsize; 1477 *request = req; 1478 return rc; 1479} 1480 1481static int ll_lov_setea(struct inode *inode, struct file *file, 1482 unsigned long arg) 1483{ 1484 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE; 1485 struct lov_user_md *lump; 1486 int lum_size = sizeof(struct lov_user_md) + 1487 sizeof(struct lov_user_ost_data); 1488 int rc; 1489 1490 if (!capable(CFS_CAP_SYS_ADMIN)) 1491 return -EPERM; 1492 1493 OBD_ALLOC_LARGE(lump, lum_size); 1494 if (lump == NULL) 1495 return -ENOMEM; 1496 1497 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) { 1498 OBD_FREE_LARGE(lump, lum_size); 1499 return -EFAULT; 1500 } 1501 1502 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size); 1503 1504 OBD_FREE_LARGE(lump, lum_size); 1505 return rc; 1506} 1507 1508static int ll_lov_setstripe(struct inode *inode, struct file *file, 1509 unsigned long arg) 1510{ 1511 struct lov_user_md_v3 lumv3; 1512 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3; 1513 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg; 1514 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg; 1515 int lum_size, rc; 1516 int flags = FMODE_WRITE; 1517 1518 /* first try with v1 which is smaller than v3 */ 1519 lum_size = sizeof(struct lov_user_md_v1); 1520 if (copy_from_user(lumv1, lumv1p, lum_size)) 1521 return -EFAULT; 1522 1523 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) { 1524 lum_size = sizeof(struct lov_user_md_v3); 1525 if (copy_from_user(&lumv3, lumv3p, lum_size)) 1526 return -EFAULT; 1527 } 1528 1529 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size); 1530 if (rc == 0) { 1531 struct lov_stripe_md *lsm; 1532 __u32 gen; 1533 1534 put_user(0, &lumv1p->lmm_stripe_count); 1535 1536 ll_layout_refresh(inode, &gen); 1537 lsm = ccc_inode_lsm_get(inode); 1538 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 1539 0, lsm, (void *)arg); 1540 ccc_inode_lsm_put(inode, lsm); 1541 } 1542 return rc; 1543} 1544 1545static int ll_lov_getstripe(struct inode *inode, unsigned long arg) 1546{ 1547 struct lov_stripe_md *lsm; 1548 int rc = -ENODATA; 1549 1550 lsm = ccc_inode_lsm_get(inode); 1551 if (lsm != NULL) 1552 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, 1553 lsm, (void *)arg); 1554 ccc_inode_lsm_put(inode, lsm); 1555 return rc; 1556} 1557 1558static int 1559ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg) 1560{ 1561 struct ll_inode_info *lli = ll_i2info(inode); 1562 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 1563 struct ccc_grouplock grouplock; 1564 int rc; 1565 1566 if (ll_file_nolock(file)) 1567 return -EOPNOTSUPP; 1568 1569 spin_lock(&lli->lli_lock); 1570 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { 1571 CWARN("group lock already existed with gid %lu\n", 1572 fd->fd_grouplock.cg_gid); 1573 spin_unlock(&lli->lli_lock); 1574 return -EINVAL; 1575 } 1576 LASSERT(fd->fd_grouplock.cg_lock == NULL); 1577 spin_unlock(&lli->lli_lock); 1578 1579 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob, 1580 arg, (file->f_flags & O_NONBLOCK), &grouplock); 1581 if (rc) 1582 return rc; 1583 1584 spin_lock(&lli->lli_lock); 1585 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { 1586 spin_unlock(&lli->lli_lock); 1587 CERROR("another thread just won the race\n"); 1588 cl_put_grouplock(&grouplock); 1589 return -EINVAL; 1590 } 1591 1592 fd->fd_flags |= LL_FILE_GROUP_LOCKED; 1593 fd->fd_grouplock = grouplock; 1594 spin_unlock(&lli->lli_lock); 1595 1596 CDEBUG(D_INFO, "group lock %lu obtained\n", arg); 1597 return 0; 1598} 1599 1600int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg) 1601{ 1602 struct ll_inode_info *lli = ll_i2info(inode); 1603 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 1604 struct ccc_grouplock grouplock; 1605 1606 spin_lock(&lli->lli_lock); 1607 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) { 1608 spin_unlock(&lli->lli_lock); 1609 CWARN("no group lock held\n"); 1610 return -EINVAL; 1611 } 1612 LASSERT(fd->fd_grouplock.cg_lock != NULL); 1613 1614 if (fd->fd_grouplock.cg_gid != arg) { 1615 CWARN("group lock %lu doesn't match current id %lu\n", 1616 arg, fd->fd_grouplock.cg_gid); 1617 spin_unlock(&lli->lli_lock); 1618 return -EINVAL; 1619 } 1620 1621 grouplock = fd->fd_grouplock; 1622 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock)); 1623 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED; 1624 spin_unlock(&lli->lli_lock); 1625 1626 cl_put_grouplock(&grouplock); 1627 CDEBUG(D_INFO, "group lock %lu released\n", arg); 1628 return 0; 1629} 1630 1631/** 1632 * Close inode open handle 1633 * 1634 * \param dentry [in] dentry which contains the inode 1635 * \param it [in,out] intent which contains open info and result 1636 * 1637 * \retval 0 success 1638 * \retval <0 failure 1639 */ 1640int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it) 1641{ 1642 struct inode *inode = dentry->d_inode; 1643 struct obd_client_handle *och; 1644 int rc; 1645 1646 LASSERT(inode); 1647 1648 /* Root ? Do nothing. */ 1649 if (dentry->d_inode->i_sb->s_root == dentry) 1650 return 0; 1651 1652 /* No open handle to close? Move away */ 1653 if (!it_disposition(it, DISP_OPEN_OPEN)) 1654 return 0; 1655 1656 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0); 1657 1658 och = kzalloc(sizeof(*och), GFP_NOFS); 1659 if (!och) { 1660 rc = -ENOMEM; 1661 goto out; 1662 } 1663 1664 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och); 1665 1666 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, 1667 inode, och, NULL); 1668out: 1669 /* this one is in place of ll_file_open */ 1670 if (it_disposition(it, DISP_ENQ_OPEN_REF)) { 1671 ptlrpc_req_finished(it->d.lustre.it_data); 1672 it_clear_disposition(it, DISP_ENQ_OPEN_REF); 1673 } 1674 return rc; 1675} 1676 1677/** 1678 * Get size for inode for which FIEMAP mapping is requested. 1679 * Make the FIEMAP get_info call and returns the result. 1680 */ 1681static int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap, 1682 size_t num_bytes) 1683{ 1684 struct obd_export *exp = ll_i2dtexp(inode); 1685 struct lov_stripe_md *lsm = NULL; 1686 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, }; 1687 __u32 vallen = num_bytes; 1688 int rc; 1689 1690 /* Checks for fiemap flags */ 1691 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) { 1692 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT; 1693 return -EBADR; 1694 } 1695 1696 /* Check for FIEMAP_FLAG_SYNC */ 1697 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) { 1698 rc = filemap_fdatawrite(inode->i_mapping); 1699 if (rc) 1700 return rc; 1701 } 1702 1703 lsm = ccc_inode_lsm_get(inode); 1704 if (lsm == NULL) 1705 return -ENOENT; 1706 1707 /* If the stripe_count > 1 and the application does not understand 1708 * DEVICE_ORDER flag, then it cannot interpret the extents correctly. 1709 */ 1710 if (lsm->lsm_stripe_count > 1 && 1711 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER)) { 1712 rc = -EOPNOTSUPP; 1713 goto out; 1714 } 1715 1716 fm_key.oa.o_oi = lsm->lsm_oi; 1717 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; 1718 1719 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE); 1720 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid); 1721 /* If filesize is 0, then there would be no objects for mapping */ 1722 if (fm_key.oa.o_size == 0) { 1723 fiemap->fm_mapped_extents = 0; 1724 rc = 0; 1725 goto out; 1726 } 1727 1728 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap)); 1729 1730 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen, 1731 fiemap, lsm); 1732 if (rc) 1733 CERROR("obd_get_info failed: rc = %d\n", rc); 1734 1735out: 1736 ccc_inode_lsm_put(inode, lsm); 1737 return rc; 1738} 1739 1740int ll_fid2path(struct inode *inode, void __user *arg) 1741{ 1742 struct obd_export *exp = ll_i2mdexp(inode); 1743 const struct getinfo_fid2path __user *gfin = arg; 1744 struct getinfo_fid2path *gfout; 1745 u32 pathlen; 1746 size_t outsize; 1747 int rc; 1748 1749 if (!capable(CFS_CAP_DAC_READ_SEARCH) && 1750 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH)) 1751 return -EPERM; 1752 1753 /* Only need to get the buflen */ 1754 if (get_user(pathlen, &gfin->gf_pathlen)) 1755 return -EFAULT; 1756 1757 if (pathlen > PATH_MAX) 1758 return -EINVAL; 1759 1760 outsize = sizeof(*gfout) + pathlen; 1761 1762 gfout = kzalloc(outsize, GFP_NOFS); 1763 if (!gfout) 1764 return -ENOMEM; 1765 1766 if (copy_from_user(gfout, arg, sizeof(*gfout))) { 1767 rc = -EFAULT; 1768 goto gf_free; 1769 } 1770 1771 /* Call mdc_iocontrol */ 1772 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL); 1773 if (rc != 0) 1774 goto gf_free; 1775 1776 if (copy_to_user(arg, gfout, outsize)) 1777 rc = -EFAULT; 1778 1779gf_free: 1780 OBD_FREE(gfout, outsize); 1781 return rc; 1782} 1783 1784static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg) 1785{ 1786 struct ll_user_fiemap *fiemap_s; 1787 size_t num_bytes, ret_bytes; 1788 unsigned int extent_count; 1789 int rc = 0; 1790 1791 /* Get the extent count so we can calculate the size of 1792 * required fiemap buffer */ 1793 if (get_user(extent_count, 1794 &((struct ll_user_fiemap __user *)arg)->fm_extent_count)) 1795 return -EFAULT; 1796 1797 if (extent_count >= 1798 (SIZE_MAX - sizeof(*fiemap_s)) / sizeof(struct ll_fiemap_extent)) 1799 return -EINVAL; 1800 num_bytes = sizeof(*fiemap_s) + (extent_count * 1801 sizeof(struct ll_fiemap_extent)); 1802 1803 OBD_ALLOC_LARGE(fiemap_s, num_bytes); 1804 if (fiemap_s == NULL) 1805 return -ENOMEM; 1806 1807 /* get the fiemap value */ 1808 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg, 1809 sizeof(*fiemap_s))) { 1810 rc = -EFAULT; 1811 goto error; 1812 } 1813 1814 /* If fm_extent_count is non-zero, read the first extent since 1815 * it is used to calculate end_offset and device from previous 1816 * fiemap call. */ 1817 if (extent_count) { 1818 if (copy_from_user(&fiemap_s->fm_extents[0], 1819 (char __user *)arg + sizeof(*fiemap_s), 1820 sizeof(struct ll_fiemap_extent))) { 1821 rc = -EFAULT; 1822 goto error; 1823 } 1824 } 1825 1826 rc = ll_do_fiemap(inode, fiemap_s, num_bytes); 1827 if (rc) 1828 goto error; 1829 1830 ret_bytes = sizeof(struct ll_user_fiemap); 1831 1832 if (extent_count != 0) 1833 ret_bytes += (fiemap_s->fm_mapped_extents * 1834 sizeof(struct ll_fiemap_extent)); 1835 1836 if (copy_to_user((void *)arg, fiemap_s, ret_bytes)) 1837 rc = -EFAULT; 1838 1839error: 1840 OBD_FREE_LARGE(fiemap_s, num_bytes); 1841 return rc; 1842} 1843 1844/* 1845 * Read the data_version for inode. 1846 * 1847 * This value is computed using stripe object version on OST. 1848 * Version is computed using server side locking. 1849 * 1850 * @param extent_lock Take extent lock. Not needed if a process is already 1851 * holding the OST object group locks. 1852 */ 1853int ll_data_version(struct inode *inode, __u64 *data_version, 1854 int extent_lock) 1855{ 1856 struct lov_stripe_md *lsm = NULL; 1857 struct ll_sb_info *sbi = ll_i2sbi(inode); 1858 struct obdo *obdo = NULL; 1859 int rc; 1860 1861 /* If no stripe, we consider version is 0. */ 1862 lsm = ccc_inode_lsm_get(inode); 1863 if (!lsm_has_objects(lsm)) { 1864 *data_version = 0; 1865 CDEBUG(D_INODE, "No object for inode\n"); 1866 rc = 0; 1867 goto out; 1868 } 1869 1870 obdo = kzalloc(sizeof(*obdo), GFP_NOFS); 1871 if (!obdo) { 1872 rc = -ENOMEM; 1873 goto out; 1874 } 1875 1876 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock); 1877 if (rc == 0) { 1878 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION)) 1879 rc = -EOPNOTSUPP; 1880 else 1881 *data_version = obdo->o_data_version; 1882 } 1883 1884 OBD_FREE_PTR(obdo); 1885out: 1886 ccc_inode_lsm_put(inode, lsm); 1887 return rc; 1888} 1889 1890/* 1891 * Trigger a HSM release request for the provided inode. 1892 */ 1893int ll_hsm_release(struct inode *inode) 1894{ 1895 struct cl_env_nest nest; 1896 struct lu_env *env; 1897 struct obd_client_handle *och = NULL; 1898 __u64 data_version = 0; 1899 int rc; 1900 1901 1902 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n", 1903 ll_get_fsname(inode->i_sb, NULL, 0), 1904 PFID(&ll_i2info(inode)->lli_fid)); 1905 1906 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE); 1907 if (IS_ERR(och)) { 1908 rc = PTR_ERR(och); 1909 goto out; 1910 } 1911 1912 /* Grab latest data_version and [am]time values */ 1913 rc = ll_data_version(inode, &data_version, 1); 1914 if (rc != 0) 1915 goto out; 1916 1917 env = cl_env_nested_get(&nest); 1918 if (IS_ERR(env)) { 1919 rc = PTR_ERR(env); 1920 goto out; 1921 } 1922 1923 ll_merge_lvb(env, inode); 1924 cl_env_nested_put(&nest, env); 1925 1926 /* Release the file. 1927 * NB: lease lock handle is released in mdc_hsm_release_pack() because 1928 * we still need it to pack l_remote_handle to MDT. */ 1929 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och, 1930 &data_version); 1931 och = NULL; 1932 1933 1934out: 1935 if (och != NULL && !IS_ERR(och)) /* close the file */ 1936 ll_lease_close(och, inode, NULL); 1937 1938 return rc; 1939} 1940 1941struct ll_swap_stack { 1942 struct iattr ia1, ia2; 1943 __u64 dv1, dv2; 1944 struct inode *inode1, *inode2; 1945 bool check_dv1, check_dv2; 1946}; 1947 1948static int ll_swap_layouts(struct file *file1, struct file *file2, 1949 struct lustre_swap_layouts *lsl) 1950{ 1951 struct mdc_swap_layouts msl; 1952 struct md_op_data *op_data; 1953 __u32 gid; 1954 __u64 dv; 1955 struct ll_swap_stack *llss = NULL; 1956 int rc; 1957 1958 llss = kzalloc(sizeof(*llss), GFP_NOFS); 1959 if (!llss) 1960 return -ENOMEM; 1961 1962 llss->inode1 = file1->f_dentry->d_inode; 1963 llss->inode2 = file2->f_dentry->d_inode; 1964 1965 if (!S_ISREG(llss->inode2->i_mode)) { 1966 rc = -EINVAL; 1967 goto free; 1968 } 1969 1970 if (inode_permission(llss->inode1, MAY_WRITE) || 1971 inode_permission(llss->inode2, MAY_WRITE)) { 1972 rc = -EPERM; 1973 goto free; 1974 } 1975 1976 if (llss->inode2->i_sb != llss->inode1->i_sb) { 1977 rc = -EXDEV; 1978 goto free; 1979 } 1980 1981 /* we use 2 bool because it is easier to swap than 2 bits */ 1982 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1) 1983 llss->check_dv1 = true; 1984 1985 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2) 1986 llss->check_dv2 = true; 1987 1988 /* we cannot use lsl->sl_dvX directly because we may swap them */ 1989 llss->dv1 = lsl->sl_dv1; 1990 llss->dv2 = lsl->sl_dv2; 1991 1992 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2)); 1993 if (rc == 0) /* same file, done! */ { 1994 rc = 0; 1995 goto free; 1996 } 1997 1998 if (rc < 0) { /* sequentialize it */ 1999 swap(llss->inode1, llss->inode2); 2000 swap(file1, file2); 2001 swap(llss->dv1, llss->dv2); 2002 swap(llss->check_dv1, llss->check_dv2); 2003 } 2004 2005 gid = lsl->sl_gid; 2006 if (gid != 0) { /* application asks to flush dirty cache */ 2007 rc = ll_get_grouplock(llss->inode1, file1, gid); 2008 if (rc < 0) 2009 goto free; 2010 2011 rc = ll_get_grouplock(llss->inode2, file2, gid); 2012 if (rc < 0) { 2013 ll_put_grouplock(llss->inode1, file1, gid); 2014 goto free; 2015 } 2016 } 2017 2018 /* to be able to restore mtime and atime after swap 2019 * we need to first save them */ 2020 if (lsl->sl_flags & 2021 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) { 2022 llss->ia1.ia_mtime = llss->inode1->i_mtime; 2023 llss->ia1.ia_atime = llss->inode1->i_atime; 2024 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME; 2025 llss->ia2.ia_mtime = llss->inode2->i_mtime; 2026 llss->ia2.ia_atime = llss->inode2->i_atime; 2027 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME; 2028 } 2029 2030 /* ultimate check, before swapping the layouts we check if 2031 * dataversion has changed (if requested) */ 2032 if (llss->check_dv1) { 2033 rc = ll_data_version(llss->inode1, &dv, 0); 2034 if (rc) 2035 goto putgl; 2036 if (dv != llss->dv1) { 2037 rc = -EAGAIN; 2038 goto putgl; 2039 } 2040 } 2041 2042 if (llss->check_dv2) { 2043 rc = ll_data_version(llss->inode2, &dv, 0); 2044 if (rc) 2045 goto putgl; 2046 if (dv != llss->dv2) { 2047 rc = -EAGAIN; 2048 goto putgl; 2049 } 2050 } 2051 2052 /* struct md_op_data is used to send the swap args to the mdt 2053 * only flags is missing, so we use struct mdc_swap_layouts 2054 * through the md_op_data->op_data */ 2055 /* flags from user space have to be converted before they are send to 2056 * server, no flag is sent today, they are only used on the client */ 2057 msl.msl_flags = 0; 2058 rc = -ENOMEM; 2059 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0, 2060 0, LUSTRE_OPC_ANY, &msl); 2061 if (IS_ERR(op_data)) { 2062 rc = PTR_ERR(op_data); 2063 goto free; 2064 } 2065 2066 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1), 2067 sizeof(*op_data), op_data, NULL); 2068 ll_finish_md_op_data(op_data); 2069 2070putgl: 2071 if (gid != 0) { 2072 ll_put_grouplock(llss->inode2, file2, gid); 2073 ll_put_grouplock(llss->inode1, file1, gid); 2074 } 2075 2076 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */ 2077 if (rc != 0) 2078 goto free; 2079 2080 /* clear useless flags */ 2081 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) { 2082 llss->ia1.ia_valid &= ~ATTR_MTIME; 2083 llss->ia2.ia_valid &= ~ATTR_MTIME; 2084 } 2085 2086 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) { 2087 llss->ia1.ia_valid &= ~ATTR_ATIME; 2088 llss->ia2.ia_valid &= ~ATTR_ATIME; 2089 } 2090 2091 /* update time if requested */ 2092 rc = 0; 2093 if (llss->ia2.ia_valid != 0) { 2094 mutex_lock(&llss->inode1->i_mutex); 2095 rc = ll_setattr(file1->f_dentry, &llss->ia2); 2096 mutex_unlock(&llss->inode1->i_mutex); 2097 } 2098 2099 if (llss->ia1.ia_valid != 0) { 2100 int rc1; 2101 2102 mutex_lock(&llss->inode2->i_mutex); 2103 rc1 = ll_setattr(file2->f_dentry, &llss->ia1); 2104 mutex_unlock(&llss->inode2->i_mutex); 2105 if (rc == 0) 2106 rc = rc1; 2107 } 2108 2109free: 2110 if (llss != NULL) 2111 OBD_FREE_PTR(llss); 2112 2113 return rc; 2114} 2115 2116static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss) 2117{ 2118 struct md_op_data *op_data; 2119 int rc; 2120 2121 /* Non-root users are forbidden to set or clear flags which are 2122 * NOT defined in HSM_USER_MASK. */ 2123 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) && 2124 !capable(CFS_CAP_SYS_ADMIN)) 2125 return -EPERM; 2126 2127 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, 2128 LUSTRE_OPC_ANY, hss); 2129 if (IS_ERR(op_data)) 2130 return PTR_ERR(op_data); 2131 2132 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode), 2133 sizeof(*op_data), op_data, NULL); 2134 2135 ll_finish_md_op_data(op_data); 2136 2137 return rc; 2138} 2139 2140static int ll_hsm_import(struct inode *inode, struct file *file, 2141 struct hsm_user_import *hui) 2142{ 2143 struct hsm_state_set *hss = NULL; 2144 struct iattr *attr = NULL; 2145 int rc; 2146 2147 2148 if (!S_ISREG(inode->i_mode)) 2149 return -EINVAL; 2150 2151 /* set HSM flags */ 2152 hss = kzalloc(sizeof(*hss), GFP_NOFS); 2153 if (!hss) { 2154 rc = -ENOMEM; 2155 goto out; 2156 } 2157 2158 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID; 2159 hss->hss_archive_id = hui->hui_archive_id; 2160 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED; 2161 rc = ll_hsm_state_set(inode, hss); 2162 if (rc != 0) 2163 goto out; 2164 2165 attr = kzalloc(sizeof(*attr), GFP_NOFS); 2166 if (!attr) { 2167 rc = -ENOMEM; 2168 goto out; 2169 } 2170 2171 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO); 2172 attr->ia_mode |= S_IFREG; 2173 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid); 2174 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid); 2175 attr->ia_size = hui->hui_size; 2176 attr->ia_mtime.tv_sec = hui->hui_mtime; 2177 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns; 2178 attr->ia_atime.tv_sec = hui->hui_atime; 2179 attr->ia_atime.tv_nsec = hui->hui_atime_ns; 2180 2181 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE | 2182 ATTR_UID | ATTR_GID | 2183 ATTR_MTIME | ATTR_MTIME_SET | 2184 ATTR_ATIME | ATTR_ATIME_SET; 2185 2186 mutex_lock(&inode->i_mutex); 2187 2188 rc = ll_setattr_raw(file->f_dentry, attr, true); 2189 if (rc == -ENODATA) 2190 rc = 0; 2191 2192 mutex_unlock(&inode->i_mutex); 2193 2194out: 2195 if (hss != NULL) 2196 OBD_FREE_PTR(hss); 2197 2198 if (attr != NULL) 2199 OBD_FREE_PTR(attr); 2200 2201 return rc; 2202} 2203 2204static long 2205ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 2206{ 2207 struct inode *inode = file->f_dentry->d_inode; 2208 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 2209 int flags, rc; 2210 2211 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino, 2212 inode->i_generation, inode, cmd); 2213 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1); 2214 2215 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */ 2216 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */ 2217 return -ENOTTY; 2218 2219 switch (cmd) { 2220 case LL_IOC_GETFLAGS: 2221 /* Get the current value of the file flags */ 2222 return put_user(fd->fd_flags, (int *)arg); 2223 case LL_IOC_SETFLAGS: 2224 case LL_IOC_CLRFLAGS: 2225 /* Set or clear specific file flags */ 2226 /* XXX This probably needs checks to ensure the flags are 2227 * not abused, and to handle any flag side effects. 2228 */ 2229 if (get_user(flags, (int *) arg)) 2230 return -EFAULT; 2231 2232 if (cmd == LL_IOC_SETFLAGS) { 2233 if ((flags & LL_FILE_IGNORE_LOCK) && 2234 !(file->f_flags & O_DIRECT)) { 2235 CERROR("%s: unable to disable locking on " 2236 "non-O_DIRECT file\n", current->comm); 2237 return -EINVAL; 2238 } 2239 2240 fd->fd_flags |= flags; 2241 } else { 2242 fd->fd_flags &= ~flags; 2243 } 2244 return 0; 2245 case LL_IOC_LOV_SETSTRIPE: 2246 return ll_lov_setstripe(inode, file, arg); 2247 case LL_IOC_LOV_SETEA: 2248 return ll_lov_setea(inode, file, arg); 2249 case LL_IOC_LOV_SWAP_LAYOUTS: { 2250 struct file *file2; 2251 struct lustre_swap_layouts lsl; 2252 2253 if (copy_from_user(&lsl, (char *)arg, 2254 sizeof(struct lustre_swap_layouts))) 2255 return -EFAULT; 2256 2257 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */ 2258 return -EPERM; 2259 2260 file2 = fget(lsl.sl_fd); 2261 if (file2 == NULL) 2262 return -EBADF; 2263 2264 rc = -EPERM; 2265 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */ 2266 rc = ll_swap_layouts(file, file2, &lsl); 2267 fput(file2); 2268 return rc; 2269 } 2270 case LL_IOC_LOV_GETSTRIPE: 2271 return ll_lov_getstripe(inode, arg); 2272 case LL_IOC_RECREATE_OBJ: 2273 return ll_lov_recreate_obj(inode, arg); 2274 case LL_IOC_RECREATE_FID: 2275 return ll_lov_recreate_fid(inode, arg); 2276 case FSFILT_IOC_FIEMAP: 2277 return ll_ioctl_fiemap(inode, arg); 2278 case FSFILT_IOC_GETFLAGS: 2279 case FSFILT_IOC_SETFLAGS: 2280 return ll_iocontrol(inode, file, cmd, arg); 2281 case FSFILT_IOC_GETVERSION_OLD: 2282 case FSFILT_IOC_GETVERSION: 2283 return put_user(inode->i_generation, (int *)arg); 2284 case LL_IOC_GROUP_LOCK: 2285 return ll_get_grouplock(inode, file, arg); 2286 case LL_IOC_GROUP_UNLOCK: 2287 return ll_put_grouplock(inode, file, arg); 2288 case IOC_OBD_STATFS: 2289 return ll_obd_statfs(inode, (void *)arg); 2290 2291 /* We need to special case any other ioctls we want to handle, 2292 * to send them to the MDS/OST as appropriate and to properly 2293 * network encode the arg field. 2294 case FSFILT_IOC_SETVERSION_OLD: 2295 case FSFILT_IOC_SETVERSION: 2296 */ 2297 case LL_IOC_FLUSHCTX: 2298 return ll_flush_ctx(inode); 2299 case LL_IOC_PATH2FID: { 2300 if (copy_to_user((void *)arg, ll_inode2fid(inode), 2301 sizeof(struct lu_fid))) 2302 return -EFAULT; 2303 2304 return 0; 2305 } 2306 case OBD_IOC_FID2PATH: 2307 return ll_fid2path(inode, (void *)arg); 2308 case LL_IOC_DATA_VERSION: { 2309 struct ioc_data_version idv; 2310 int rc; 2311 2312 if (copy_from_user(&idv, (char *)arg, sizeof(idv))) 2313 return -EFAULT; 2314 2315 rc = ll_data_version(inode, &idv.idv_version, 2316 !(idv.idv_flags & LL_DV_NOFLUSH)); 2317 2318 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv))) 2319 return -EFAULT; 2320 2321 return rc; 2322 } 2323 2324 case LL_IOC_GET_MDTIDX: { 2325 int mdtidx; 2326 2327 mdtidx = ll_get_mdt_idx(inode); 2328 if (mdtidx < 0) 2329 return mdtidx; 2330 2331 if (put_user((int)mdtidx, (int *)arg)) 2332 return -EFAULT; 2333 2334 return 0; 2335 } 2336 case OBD_IOC_GETDTNAME: 2337 case OBD_IOC_GETMDNAME: 2338 return ll_get_obd_name(inode, cmd, arg); 2339 case LL_IOC_HSM_STATE_GET: { 2340 struct md_op_data *op_data; 2341 struct hsm_user_state *hus; 2342 int rc; 2343 2344 hus = kzalloc(sizeof(*hus), GFP_NOFS); 2345 if (!hus) 2346 return -ENOMEM; 2347 2348 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, 2349 LUSTRE_OPC_ANY, hus); 2350 if (IS_ERR(op_data)) { 2351 OBD_FREE_PTR(hus); 2352 return PTR_ERR(op_data); 2353 } 2354 2355 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data), 2356 op_data, NULL); 2357 2358 if (copy_to_user((void *)arg, hus, sizeof(*hus))) 2359 rc = -EFAULT; 2360 2361 ll_finish_md_op_data(op_data); 2362 OBD_FREE_PTR(hus); 2363 return rc; 2364 } 2365 case LL_IOC_HSM_STATE_SET: { 2366 struct hsm_state_set *hss; 2367 int rc; 2368 2369 hss = kzalloc(sizeof(*hss), GFP_NOFS); 2370 if (!hss) 2371 return -ENOMEM; 2372 2373 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) { 2374 OBD_FREE_PTR(hss); 2375 return -EFAULT; 2376 } 2377 2378 rc = ll_hsm_state_set(inode, hss); 2379 2380 OBD_FREE_PTR(hss); 2381 return rc; 2382 } 2383 case LL_IOC_HSM_ACTION: { 2384 struct md_op_data *op_data; 2385 struct hsm_current_action *hca; 2386 int rc; 2387 2388 hca = kzalloc(sizeof(*hca), GFP_NOFS); 2389 if (!hca) 2390 return -ENOMEM; 2391 2392 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, 2393 LUSTRE_OPC_ANY, hca); 2394 if (IS_ERR(op_data)) { 2395 OBD_FREE_PTR(hca); 2396 return PTR_ERR(op_data); 2397 } 2398 2399 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data), 2400 op_data, NULL); 2401 2402 if (copy_to_user((char *)arg, hca, sizeof(*hca))) 2403 rc = -EFAULT; 2404 2405 ll_finish_md_op_data(op_data); 2406 OBD_FREE_PTR(hca); 2407 return rc; 2408 } 2409 case LL_IOC_SET_LEASE: { 2410 struct ll_inode_info *lli = ll_i2info(inode); 2411 struct obd_client_handle *och = NULL; 2412 bool lease_broken; 2413 fmode_t mode = 0; 2414 2415 switch (arg) { 2416 case F_WRLCK: 2417 if (!(file->f_mode & FMODE_WRITE)) 2418 return -EPERM; 2419 mode = FMODE_WRITE; 2420 break; 2421 case F_RDLCK: 2422 if (!(file->f_mode & FMODE_READ)) 2423 return -EPERM; 2424 mode = FMODE_READ; 2425 break; 2426 case F_UNLCK: 2427 mutex_lock(&lli->lli_och_mutex); 2428 if (fd->fd_lease_och != NULL) { 2429 och = fd->fd_lease_och; 2430 fd->fd_lease_och = NULL; 2431 } 2432 mutex_unlock(&lli->lli_och_mutex); 2433 2434 if (och != NULL) { 2435 mode = och->och_flags & 2436 (FMODE_READ|FMODE_WRITE); 2437 rc = ll_lease_close(och, inode, &lease_broken); 2438 if (rc == 0 && lease_broken) 2439 mode = 0; 2440 } else { 2441 rc = -ENOLCK; 2442 } 2443 2444 /* return the type of lease or error */ 2445 return rc < 0 ? rc : (int)mode; 2446 default: 2447 return -EINVAL; 2448 } 2449 2450 CDEBUG(D_INODE, "Set lease with mode %d\n", mode); 2451 2452 /* apply for lease */ 2453 och = ll_lease_open(inode, file, mode, 0); 2454 if (IS_ERR(och)) 2455 return PTR_ERR(och); 2456 2457 rc = 0; 2458 mutex_lock(&lli->lli_och_mutex); 2459 if (fd->fd_lease_och == NULL) { 2460 fd->fd_lease_och = och; 2461 och = NULL; 2462 } 2463 mutex_unlock(&lli->lli_och_mutex); 2464 if (och != NULL) { 2465 /* impossible now that only excl is supported for now */ 2466 ll_lease_close(och, inode, &lease_broken); 2467 rc = -EBUSY; 2468 } 2469 return rc; 2470 } 2471 case LL_IOC_GET_LEASE: { 2472 struct ll_inode_info *lli = ll_i2info(inode); 2473 struct ldlm_lock *lock = NULL; 2474 2475 rc = 0; 2476 mutex_lock(&lli->lli_och_mutex); 2477 if (fd->fd_lease_och != NULL) { 2478 struct obd_client_handle *och = fd->fd_lease_och; 2479 2480 lock = ldlm_handle2lock(&och->och_lease_handle); 2481 if (lock != NULL) { 2482 lock_res_and_lock(lock); 2483 if (!ldlm_is_cancel(lock)) 2484 rc = och->och_flags & 2485 (FMODE_READ | FMODE_WRITE); 2486 unlock_res_and_lock(lock); 2487 ldlm_lock_put(lock); 2488 } 2489 } 2490 mutex_unlock(&lli->lli_och_mutex); 2491 return rc; 2492 } 2493 case LL_IOC_HSM_IMPORT: { 2494 struct hsm_user_import *hui; 2495 2496 hui = kzalloc(sizeof(*hui), GFP_NOFS); 2497 if (!hui) 2498 return -ENOMEM; 2499 2500 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) { 2501 OBD_FREE_PTR(hui); 2502 return -EFAULT; 2503 } 2504 2505 rc = ll_hsm_import(inode, file, hui); 2506 2507 OBD_FREE_PTR(hui); 2508 return rc; 2509 } 2510 default: { 2511 int err; 2512 2513 if (LLIOC_STOP == 2514 ll_iocontrol_call(inode, file, cmd, arg, &err)) 2515 return err; 2516 2517 return obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL, 2518 (void *)arg); 2519 } 2520 } 2521} 2522 2523 2524static loff_t ll_file_seek(struct file *file, loff_t offset, int origin) 2525{ 2526 struct inode *inode = file->f_dentry->d_inode; 2527 loff_t retval, eof = 0; 2528 2529 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) : 2530 (origin == SEEK_CUR) ? file->f_pos : 0); 2531 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n", 2532 inode->i_ino, inode->i_generation, inode, retval, retval, 2533 origin); 2534 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1); 2535 2536 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) { 2537 retval = ll_glimpse_size(inode); 2538 if (retval != 0) 2539 return retval; 2540 eof = i_size_read(inode); 2541 } 2542 2543 retval = generic_file_llseek_size(file, offset, origin, 2544 ll_file_maxbytes(inode), eof); 2545 return retval; 2546} 2547 2548static int ll_flush(struct file *file, fl_owner_t id) 2549{ 2550 struct inode *inode = file->f_dentry->d_inode; 2551 struct ll_inode_info *lli = ll_i2info(inode); 2552 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 2553 int rc, err; 2554 2555 LASSERT(!S_ISDIR(inode->i_mode)); 2556 2557 /* catch async errors that were recorded back when async writeback 2558 * failed for pages in this mapping. */ 2559 rc = lli->lli_async_rc; 2560 lli->lli_async_rc = 0; 2561 err = lov_read_and_clear_async_rc(lli->lli_clob); 2562 if (rc == 0) 2563 rc = err; 2564 2565 /* The application has been told write failure already. 2566 * Do not report failure again. */ 2567 if (fd->fd_write_failed) 2568 return 0; 2569 return rc ? -EIO : 0; 2570} 2571 2572/** 2573 * Called to make sure a portion of file has been written out. 2574 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST. 2575 * 2576 * Return how many pages have been written. 2577 */ 2578int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, 2579 enum cl_fsync_mode mode, int ignore_layout) 2580{ 2581 struct cl_env_nest nest; 2582 struct lu_env *env; 2583 struct cl_io *io; 2584 struct obd_capa *capa = NULL; 2585 struct cl_fsync_io *fio; 2586 int result; 2587 2588 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL && 2589 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL) 2590 return -EINVAL; 2591 2592 env = cl_env_nested_get(&nest); 2593 if (IS_ERR(env)) 2594 return PTR_ERR(env); 2595 2596 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE); 2597 2598 io = ccc_env_thread_io(env); 2599 io->ci_obj = cl_i2info(inode)->lli_clob; 2600 io->ci_ignore_layout = ignore_layout; 2601 2602 /* initialize parameters for sync */ 2603 fio = &io->u.ci_fsync; 2604 fio->fi_capa = capa; 2605 fio->fi_start = start; 2606 fio->fi_end = end; 2607 fio->fi_fid = ll_inode2fid(inode); 2608 fio->fi_mode = mode; 2609 fio->fi_nr_written = 0; 2610 2611 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0) 2612 result = cl_io_loop(env, io); 2613 else 2614 result = io->ci_result; 2615 if (result == 0) 2616 result = fio->fi_nr_written; 2617 cl_io_fini(env, io); 2618 cl_env_nested_put(&nest, env); 2619 2620 capa_put(capa); 2621 2622 return result; 2623} 2624 2625/* 2626 * When dentry is provided (the 'else' case), *file->f_dentry may be 2627 * null and dentry must be used directly rather than pulled from 2628 * *file->f_dentry as is done otherwise. 2629 */ 2630 2631int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync) 2632{ 2633 struct dentry *dentry = file->f_dentry; 2634 struct inode *inode = dentry->d_inode; 2635 struct ll_inode_info *lli = ll_i2info(inode); 2636 struct ptlrpc_request *req; 2637 struct obd_capa *oc; 2638 int rc, err; 2639 2640 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, 2641 inode->i_generation, inode); 2642 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1); 2643 2644 rc = filemap_write_and_wait_range(inode->i_mapping, start, end); 2645 mutex_lock(&inode->i_mutex); 2646 2647 /* catch async errors that were recorded back when async writeback 2648 * failed for pages in this mapping. */ 2649 if (!S_ISDIR(inode->i_mode)) { 2650 err = lli->lli_async_rc; 2651 lli->lli_async_rc = 0; 2652 if (rc == 0) 2653 rc = err; 2654 err = lov_read_and_clear_async_rc(lli->lli_clob); 2655 if (rc == 0) 2656 rc = err; 2657 } 2658 2659 oc = ll_mdscapa_get(inode); 2660 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc, 2661 &req); 2662 capa_put(oc); 2663 if (!rc) 2664 rc = err; 2665 if (!err) 2666 ptlrpc_req_finished(req); 2667 2668 if (S_ISREG(inode->i_mode)) { 2669 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 2670 2671 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0); 2672 if (rc == 0 && err < 0) 2673 rc = err; 2674 if (rc < 0) 2675 fd->fd_write_failed = true; 2676 else 2677 fd->fd_write_failed = false; 2678 } 2679 2680 mutex_unlock(&inode->i_mutex); 2681 return rc; 2682} 2683 2684static int 2685ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) 2686{ 2687 struct inode *inode = file->f_dentry->d_inode; 2688 struct ll_sb_info *sbi = ll_i2sbi(inode); 2689 struct ldlm_enqueue_info einfo = { 2690 .ei_type = LDLM_FLOCK, 2691 .ei_cb_cp = ldlm_flock_completion_ast, 2692 .ei_cbdata = file_lock, 2693 }; 2694 struct md_op_data *op_data; 2695 struct lustre_handle lockh = {0}; 2696 ldlm_policy_data_t flock = {{0}}; 2697 __u64 flags = 0; 2698 int rc; 2699 int rc2 = 0; 2700 2701 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n", 2702 inode->i_ino, file_lock); 2703 2704 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1); 2705 2706 if (file_lock->fl_flags & FL_FLOCK) 2707 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK)); 2708 else if (!(file_lock->fl_flags & FL_POSIX)) 2709 return -EINVAL; 2710 2711 flock.l_flock.owner = (unsigned long)file_lock->fl_owner; 2712 flock.l_flock.pid = file_lock->fl_pid; 2713 flock.l_flock.start = file_lock->fl_start; 2714 flock.l_flock.end = file_lock->fl_end; 2715 2716 /* Somewhat ugly workaround for svc lockd. 2717 * lockd installs custom fl_lmops->lm_compare_owner that checks 2718 * for the fl_owner to be the same (which it always is on local node 2719 * I guess between lockd processes) and then compares pid. 2720 * As such we assign pid to the owner field to make it all work, 2721 * conflict with normal locks is unlikely since pid space and 2722 * pointer space for current->files are not intersecting */ 2723 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner) 2724 flock.l_flock.owner = (unsigned long)file_lock->fl_pid; 2725 2726 switch (file_lock->fl_type) { 2727 case F_RDLCK: 2728 einfo.ei_mode = LCK_PR; 2729 break; 2730 case F_UNLCK: 2731 /* An unlock request may or may not have any relation to 2732 * existing locks so we may not be able to pass a lock handle 2733 * via a normal ldlm_lock_cancel() request. The request may even 2734 * unlock a byte range in the middle of an existing lock. In 2735 * order to process an unlock request we need all of the same 2736 * information that is given with a normal read or write record 2737 * lock request. To avoid creating another ldlm unlock (cancel) 2738 * message we'll treat a LCK_NL flock request as an unlock. */ 2739 einfo.ei_mode = LCK_NL; 2740 break; 2741 case F_WRLCK: 2742 einfo.ei_mode = LCK_PW; 2743 break; 2744 default: 2745 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", 2746 file_lock->fl_type); 2747 return -ENOTSUPP; 2748 } 2749 2750 switch (cmd) { 2751 case F_SETLKW: 2752#ifdef F_SETLKW64 2753 case F_SETLKW64: 2754#endif 2755 flags = 0; 2756 break; 2757 case F_SETLK: 2758#ifdef F_SETLK64 2759 case F_SETLK64: 2760#endif 2761 flags = LDLM_FL_BLOCK_NOWAIT; 2762 break; 2763 case F_GETLK: 2764#ifdef F_GETLK64 2765 case F_GETLK64: 2766#endif 2767 flags = LDLM_FL_TEST_LOCK; 2768 /* Save the old mode so that if the mode in the lock changes we 2769 * can decrement the appropriate reader or writer refcount. */ 2770 file_lock->fl_type = einfo.ei_mode; 2771 break; 2772 default: 2773 CERROR("unknown fcntl lock command: %d\n", cmd); 2774 return -EINVAL; 2775 } 2776 2777 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, 2778 LUSTRE_OPC_ANY, NULL); 2779 if (IS_ERR(op_data)) 2780 return PTR_ERR(op_data); 2781 2782 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n", 2783 inode->i_ino, flock.l_flock.pid, flags, einfo.ei_mode, 2784 flock.l_flock.start, flock.l_flock.end); 2785 2786 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, 2787 op_data, &lockh, &flock, 0, NULL /* req */, flags); 2788 2789 if ((file_lock->fl_flags & FL_FLOCK) && 2790 (rc == 0 || file_lock->fl_type == F_UNLCK)) 2791 rc2 = flock_lock_file_wait(file, file_lock); 2792 if ((file_lock->fl_flags & FL_POSIX) && 2793 (rc == 0 || file_lock->fl_type == F_UNLCK) && 2794 !(flags & LDLM_FL_TEST_LOCK)) 2795 rc2 = posix_lock_file_wait(file, file_lock); 2796 2797 if (rc2 && file_lock->fl_type != F_UNLCK) { 2798 einfo.ei_mode = LCK_NL; 2799 md_enqueue(sbi->ll_md_exp, &einfo, NULL, 2800 op_data, &lockh, &flock, 0, NULL /* req */, flags); 2801 rc = rc2; 2802 } 2803 2804 ll_finish_md_op_data(op_data); 2805 2806 return rc; 2807} 2808 2809static int 2810ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock) 2811{ 2812 return -ENOSYS; 2813} 2814 2815/** 2816 * test if some locks matching bits and l_req_mode are acquired 2817 * - bits can be in different locks 2818 * - if found clear the common lock bits in *bits 2819 * - the bits not found, are kept in *bits 2820 * \param inode [IN] 2821 * \param bits [IN] searched lock bits [IN] 2822 * \param l_req_mode [IN] searched lock mode 2823 * \retval boolean, true iff all bits are found 2824 */ 2825int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode) 2826{ 2827 struct lustre_handle lockh; 2828 ldlm_policy_data_t policy; 2829 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ? 2830 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode; 2831 struct lu_fid *fid; 2832 __u64 flags; 2833 int i; 2834 2835 if (!inode) 2836 return 0; 2837 2838 fid = &ll_i2info(inode)->lli_fid; 2839 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid), 2840 ldlm_lockname[mode]); 2841 2842 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK; 2843 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) { 2844 policy.l_inodebits.bits = *bits & (1 << i); 2845 if (policy.l_inodebits.bits == 0) 2846 continue; 2847 2848 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, 2849 &policy, mode, &lockh)) { 2850 struct ldlm_lock *lock; 2851 2852 lock = ldlm_handle2lock(&lockh); 2853 if (lock) { 2854 *bits &= 2855 ~(lock->l_policy_data.l_inodebits.bits); 2856 LDLM_LOCK_PUT(lock); 2857 } else { 2858 *bits &= ~policy.l_inodebits.bits; 2859 } 2860 } 2861 } 2862 return *bits == 0; 2863} 2864 2865ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits, 2866 struct lustre_handle *lockh, __u64 flags, 2867 ldlm_mode_t mode) 2868{ 2869 ldlm_policy_data_t policy = { .l_inodebits = {bits}}; 2870 struct lu_fid *fid; 2871 ldlm_mode_t rc; 2872 2873 fid = &ll_i2info(inode)->lli_fid; 2874 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid)); 2875 2876 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags, 2877 fid, LDLM_IBITS, &policy, mode, lockh); 2878 2879 return rc; 2880} 2881 2882static int ll_inode_revalidate_fini(struct inode *inode, int rc) 2883{ 2884 /* Already unlinked. Just update nlink and return success */ 2885 if (rc == -ENOENT) { 2886 clear_nlink(inode); 2887 /* This path cannot be hit for regular files unless in 2888 * case of obscure races, so no need to validate size. 2889 */ 2890 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) 2891 return 0; 2892 } else if (rc != 0) { 2893 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR, 2894 "%s: revalidate FID "DFID" error: rc = %d\n", 2895 ll_get_fsname(inode->i_sb, NULL, 0), 2896 PFID(ll_inode2fid(inode)), rc); 2897 } 2898 2899 return rc; 2900} 2901 2902static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits) 2903{ 2904 struct inode *inode = dentry->d_inode; 2905 struct ptlrpc_request *req = NULL; 2906 struct obd_export *exp; 2907 int rc = 0; 2908 2909 LASSERT(inode != NULL); 2910 2911 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n", 2912 inode->i_ino, inode->i_generation, inode, dentry->d_name.name); 2913 2914 exp = ll_i2mdexp(inode); 2915 2916 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC. 2917 * But under CMD case, it caused some lock issues, should be fixed 2918 * with new CMD ibits lock. See bug 12718 */ 2919 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) { 2920 struct lookup_intent oit = { .it_op = IT_GETATTR }; 2921 struct md_op_data *op_data; 2922 2923 if (ibits == MDS_INODELOCK_LOOKUP) 2924 oit.it_op = IT_LOOKUP; 2925 2926 /* Call getattr by fid, so do not provide name at all. */ 2927 op_data = ll_prep_md_op_data(NULL, dentry->d_inode, 2928 dentry->d_inode, NULL, 0, 0, 2929 LUSTRE_OPC_ANY, NULL); 2930 if (IS_ERR(op_data)) 2931 return PTR_ERR(op_data); 2932 2933 oit.it_create_mode |= M_CHECK_STALE; 2934 rc = md_intent_lock(exp, op_data, NULL, 0, 2935 /* we are not interested in name 2936 based lookup */ 2937 &oit, 0, &req, 2938 ll_md_blocking_ast, 0); 2939 ll_finish_md_op_data(op_data); 2940 oit.it_create_mode &= ~M_CHECK_STALE; 2941 if (rc < 0) { 2942 rc = ll_inode_revalidate_fini(inode, rc); 2943 goto out; 2944 } 2945 2946 rc = ll_revalidate_it_finish(req, &oit, dentry); 2947 if (rc != 0) { 2948 ll_intent_release(&oit); 2949 goto out; 2950 } 2951 2952 /* Unlinked? Unhash dentry, so it is not picked up later by 2953 do_lookup() -> ll_revalidate_it(). We cannot use d_drop 2954 here to preserve get_cwd functionality on 2.6. 2955 Bug 10503 */ 2956 if (!dentry->d_inode->i_nlink) 2957 d_lustre_invalidate(dentry, 0); 2958 2959 ll_lookup_finish_locks(&oit, dentry); 2960 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) { 2961 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode); 2962 u64 valid = OBD_MD_FLGETATTR; 2963 struct md_op_data *op_data; 2964 int ealen = 0; 2965 2966 if (S_ISREG(inode->i_mode)) { 2967 rc = ll_get_default_mdsize(sbi, &ealen); 2968 if (rc) 2969 return rc; 2970 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE; 2971 } 2972 2973 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 2974 0, ealen, LUSTRE_OPC_ANY, 2975 NULL); 2976 if (IS_ERR(op_data)) 2977 return PTR_ERR(op_data); 2978 2979 op_data->op_valid = valid; 2980 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one 2981 * capa for this inode. Because we only keep capas of dirs 2982 * fresh. */ 2983 rc = md_getattr(sbi->ll_md_exp, op_data, &req); 2984 ll_finish_md_op_data(op_data); 2985 if (rc) { 2986 rc = ll_inode_revalidate_fini(inode, rc); 2987 return rc; 2988 } 2989 2990 rc = ll_prep_inode(&inode, req, NULL, NULL); 2991 } 2992out: 2993 ptlrpc_req_finished(req); 2994 return rc; 2995} 2996 2997static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits) 2998{ 2999 struct inode *inode = dentry->d_inode; 3000 int rc; 3001 3002 rc = __ll_inode_revalidate(dentry, ibits); 3003 if (rc != 0) 3004 return rc; 3005 3006 /* if object isn't regular file, don't validate size */ 3007 if (!S_ISREG(inode->i_mode)) { 3008 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime; 3009 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime; 3010 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime; 3011 } else { 3012 /* In case of restore, the MDT has the right size and has 3013 * already send it back without granting the layout lock, 3014 * inode is up-to-date so glimpse is useless. 3015 * Also to glimpse we need the layout, in case of a running 3016 * restore the MDT holds the layout lock so the glimpse will 3017 * block up to the end of restore (getattr will block) 3018 */ 3019 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING)) 3020 rc = ll_glimpse_size(inode); 3021 } 3022 return rc; 3023} 3024 3025int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) 3026{ 3027 struct inode *inode = de->d_inode; 3028 struct ll_sb_info *sbi = ll_i2sbi(inode); 3029 struct ll_inode_info *lli = ll_i2info(inode); 3030 int res = 0; 3031 3032 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE | 3033 MDS_INODELOCK_LOOKUP); 3034 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1); 3035 3036 if (res) 3037 return res; 3038 3039 stat->dev = inode->i_sb->s_dev; 3040 if (ll_need_32bit_api(sbi)) 3041 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1); 3042 else 3043 stat->ino = inode->i_ino; 3044 stat->mode = inode->i_mode; 3045 stat->nlink = inode->i_nlink; 3046 stat->uid = inode->i_uid; 3047 stat->gid = inode->i_gid; 3048 stat->rdev = inode->i_rdev; 3049 stat->atime = inode->i_atime; 3050 stat->mtime = inode->i_mtime; 3051 stat->ctime = inode->i_ctime; 3052 stat->blksize = 1 << inode->i_blkbits; 3053 3054 stat->size = i_size_read(inode); 3055 stat->blocks = inode->i_blocks; 3056 3057 return 0; 3058} 3059 3060static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 3061 __u64 start, __u64 len) 3062{ 3063 int rc; 3064 size_t num_bytes; 3065 struct ll_user_fiemap *fiemap; 3066 unsigned int extent_count = fieinfo->fi_extents_max; 3067 3068 num_bytes = sizeof(*fiemap) + (extent_count * 3069 sizeof(struct ll_fiemap_extent)); 3070 OBD_ALLOC_LARGE(fiemap, num_bytes); 3071 3072 if (fiemap == NULL) 3073 return -ENOMEM; 3074 3075 fiemap->fm_flags = fieinfo->fi_flags; 3076 fiemap->fm_extent_count = fieinfo->fi_extents_max; 3077 fiemap->fm_start = start; 3078 fiemap->fm_length = len; 3079 if (extent_count > 0) 3080 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start, 3081 sizeof(struct ll_fiemap_extent)); 3082 3083 rc = ll_do_fiemap(inode, fiemap, num_bytes); 3084 3085 fieinfo->fi_flags = fiemap->fm_flags; 3086 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents; 3087 if (extent_count > 0) 3088 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0], 3089 fiemap->fm_mapped_extents * 3090 sizeof(struct ll_fiemap_extent)); 3091 3092 OBD_FREE_LARGE(fiemap, num_bytes); 3093 return rc; 3094} 3095 3096struct posix_acl *ll_get_acl(struct inode *inode, int type) 3097{ 3098 struct ll_inode_info *lli = ll_i2info(inode); 3099 struct posix_acl *acl = NULL; 3100 3101 spin_lock(&lli->lli_lock); 3102 /* VFS' acl_permission_check->check_acl will release the refcount */ 3103 acl = posix_acl_dup(lli->lli_posix_acl); 3104 spin_unlock(&lli->lli_lock); 3105 3106 return acl; 3107} 3108 3109 3110int ll_inode_permission(struct inode *inode, int mask) 3111{ 3112 int rc = 0; 3113 3114#ifdef MAY_NOT_BLOCK 3115 if (mask & MAY_NOT_BLOCK) 3116 return -ECHILD; 3117#endif 3118 3119 /* as root inode are NOT getting validated in lookup operation, 3120 * need to do it before permission check. */ 3121 3122 if (inode == inode->i_sb->s_root->d_inode) { 3123 rc = __ll_inode_revalidate(inode->i_sb->s_root, 3124 MDS_INODELOCK_LOOKUP); 3125 if (rc) 3126 return rc; 3127 } 3128 3129 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n", 3130 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask); 3131 3132 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT) 3133 return lustre_check_remote_perm(inode, mask); 3134 3135 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1); 3136 rc = generic_permission(inode, mask); 3137 3138 return rc; 3139} 3140 3141/* -o localflock - only provides locally consistent flock locks */ 3142struct file_operations ll_file_operations = { 3143 .read = new_sync_read, 3144 .read_iter = ll_file_read_iter, 3145 .write = new_sync_write, 3146 .write_iter = ll_file_write_iter, 3147 .unlocked_ioctl = ll_file_ioctl, 3148 .open = ll_file_open, 3149 .release = ll_file_release, 3150 .mmap = ll_file_mmap, 3151 .llseek = ll_file_seek, 3152 .splice_read = ll_file_splice_read, 3153 .fsync = ll_fsync, 3154 .flush = ll_flush 3155}; 3156 3157struct file_operations ll_file_operations_flock = { 3158 .read = new_sync_read, 3159 .read_iter = ll_file_read_iter, 3160 .write = new_sync_write, 3161 .write_iter = ll_file_write_iter, 3162 .unlocked_ioctl = ll_file_ioctl, 3163 .open = ll_file_open, 3164 .release = ll_file_release, 3165 .mmap = ll_file_mmap, 3166 .llseek = ll_file_seek, 3167 .splice_read = ll_file_splice_read, 3168 .fsync = ll_fsync, 3169 .flush = ll_flush, 3170 .flock = ll_file_flock, 3171 .lock = ll_file_flock 3172}; 3173 3174/* These are for -o noflock - to return ENOSYS on flock calls */ 3175struct file_operations ll_file_operations_noflock = { 3176 .read = new_sync_read, 3177 .read_iter = ll_file_read_iter, 3178 .write = new_sync_write, 3179 .write_iter = ll_file_write_iter, 3180 .unlocked_ioctl = ll_file_ioctl, 3181 .open = ll_file_open, 3182 .release = ll_file_release, 3183 .mmap = ll_file_mmap, 3184 .llseek = ll_file_seek, 3185 .splice_read = ll_file_splice_read, 3186 .fsync = ll_fsync, 3187 .flush = ll_flush, 3188 .flock = ll_file_noflock, 3189 .lock = ll_file_noflock 3190}; 3191 3192struct inode_operations ll_file_inode_operations = { 3193 .setattr = ll_setattr, 3194 .getattr = ll_getattr, 3195 .permission = ll_inode_permission, 3196 .setxattr = ll_setxattr, 3197 .getxattr = ll_getxattr, 3198 .listxattr = ll_listxattr, 3199 .removexattr = ll_removexattr, 3200 .fiemap = ll_fiemap, 3201 .get_acl = ll_get_acl, 3202}; 3203 3204/* dynamic ioctl number support routines */ 3205static struct llioc_ctl_data { 3206 struct rw_semaphore ioc_sem; 3207 struct list_head ioc_head; 3208} llioc = { 3209 __RWSEM_INITIALIZER(llioc.ioc_sem), 3210 LIST_HEAD_INIT(llioc.ioc_head) 3211}; 3212 3213 3214struct llioc_data { 3215 struct list_head iocd_list; 3216 unsigned int iocd_size; 3217 llioc_callback_t iocd_cb; 3218 unsigned int iocd_count; 3219 unsigned int iocd_cmd[0]; 3220}; 3221 3222void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd) 3223{ 3224 unsigned int size; 3225 struct llioc_data *in_data = NULL; 3226 3227 if (cb == NULL || cmd == NULL || 3228 count > LLIOC_MAX_CMD || count < 0) 3229 return NULL; 3230 3231 size = sizeof(*in_data) + count * sizeof(unsigned int); 3232 in_data = kzalloc(size, GFP_NOFS); 3233 if (!in_data) 3234 return NULL; 3235 3236 memset(in_data, 0, sizeof(*in_data)); 3237 in_data->iocd_size = size; 3238 in_data->iocd_cb = cb; 3239 in_data->iocd_count = count; 3240 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count); 3241 3242 down_write(&llioc.ioc_sem); 3243 list_add_tail(&in_data->iocd_list, &llioc.ioc_head); 3244 up_write(&llioc.ioc_sem); 3245 3246 return in_data; 3247} 3248 3249void ll_iocontrol_unregister(void *magic) 3250{ 3251 struct llioc_data *tmp; 3252 3253 if (magic == NULL) 3254 return; 3255 3256 down_write(&llioc.ioc_sem); 3257 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) { 3258 if (tmp == magic) { 3259 unsigned int size = tmp->iocd_size; 3260 3261 list_del(&tmp->iocd_list); 3262 up_write(&llioc.ioc_sem); 3263 3264 OBD_FREE(tmp, size); 3265 return; 3266 } 3267 } 3268 up_write(&llioc.ioc_sem); 3269 3270 CWARN("didn't find iocontrol register block with magic: %p\n", magic); 3271} 3272 3273EXPORT_SYMBOL(ll_iocontrol_register); 3274EXPORT_SYMBOL(ll_iocontrol_unregister); 3275 3276static enum llioc_iter 3277ll_iocontrol_call(struct inode *inode, struct file *file, 3278 unsigned int cmd, unsigned long arg, int *rcp) 3279{ 3280 enum llioc_iter ret = LLIOC_CONT; 3281 struct llioc_data *data; 3282 int rc = -EINVAL, i; 3283 3284 down_read(&llioc.ioc_sem); 3285 list_for_each_entry(data, &llioc.ioc_head, iocd_list) { 3286 for (i = 0; i < data->iocd_count; i++) { 3287 if (cmd != data->iocd_cmd[i]) 3288 continue; 3289 3290 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc); 3291 break; 3292 } 3293 3294 if (ret == LLIOC_STOP) 3295 break; 3296 } 3297 up_read(&llioc.ioc_sem); 3298 3299 if (rcp) 3300 *rcp = rc; 3301 return ret; 3302} 3303 3304int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf) 3305{ 3306 struct ll_inode_info *lli = ll_i2info(inode); 3307 struct cl_env_nest nest; 3308 struct lu_env *env; 3309 int result; 3310 3311 if (lli->lli_clob == NULL) 3312 return 0; 3313 3314 env = cl_env_nested_get(&nest); 3315 if (IS_ERR(env)) 3316 return PTR_ERR(env); 3317 3318 result = cl_conf_set(env, lli->lli_clob, conf); 3319 cl_env_nested_put(&nest, env); 3320 3321 if (conf->coc_opc == OBJECT_CONF_SET) { 3322 struct ldlm_lock *lock = conf->coc_lock; 3323 3324 LASSERT(lock != NULL); 3325 LASSERT(ldlm_has_layout(lock)); 3326 if (result == 0) { 3327 /* it can only be allowed to match after layout is 3328 * applied to inode otherwise false layout would be 3329 * seen. Applying layout should happen before dropping 3330 * the intent lock. */ 3331 ldlm_lock_allow_match(lock); 3332 } 3333 } 3334 return result; 3335} 3336 3337/* Fetch layout from MDT with getxattr request, if it's not ready yet */ 3338static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock) 3339 3340{ 3341 struct ll_sb_info *sbi = ll_i2sbi(inode); 3342 struct obd_capa *oc; 3343 struct ptlrpc_request *req; 3344 struct mdt_body *body; 3345 void *lvbdata; 3346 void *lmm; 3347 int lmmsize; 3348 int rc; 3349 3350 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n", 3351 PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY), 3352 lock->l_lvb_data, lock->l_lvb_len); 3353 3354 if ((lock->l_lvb_data != NULL) && (lock->l_flags & LDLM_FL_LVB_READY)) 3355 return 0; 3356 3357 /* if layout lock was granted right away, the layout is returned 3358 * within DLM_LVB of dlm reply; otherwise if the lock was ever 3359 * blocked and then granted via completion ast, we have to fetch 3360 * layout here. Please note that we can't use the LVB buffer in 3361 * completion AST because it doesn't have a large enough buffer */ 3362 oc = ll_mdscapa_get(inode); 3363 rc = ll_get_default_mdsize(sbi, &lmmsize); 3364 if (rc == 0) 3365 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, 3366 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0, 3367 lmmsize, 0, &req); 3368 capa_put(oc); 3369 if (rc < 0) 3370 return rc; 3371 3372 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 3373 if (body == NULL) { 3374 rc = -EPROTO; 3375 goto out; 3376 } 3377 3378 lmmsize = body->eadatasize; 3379 if (lmmsize == 0) /* empty layout */ { 3380 rc = 0; 3381 goto out; 3382 } 3383 3384 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize); 3385 if (lmm == NULL) { 3386 rc = -EFAULT; 3387 goto out; 3388 } 3389 3390 OBD_ALLOC_LARGE(lvbdata, lmmsize); 3391 if (lvbdata == NULL) { 3392 rc = -ENOMEM; 3393 goto out; 3394 } 3395 3396 memcpy(lvbdata, lmm, lmmsize); 3397 lock_res_and_lock(lock); 3398 if (lock->l_lvb_data != NULL) 3399 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len); 3400 3401 lock->l_lvb_data = lvbdata; 3402 lock->l_lvb_len = lmmsize; 3403 unlock_res_and_lock(lock); 3404 3405out: 3406 ptlrpc_req_finished(req); 3407 return rc; 3408} 3409 3410/** 3411 * Apply the layout to the inode. Layout lock is held and will be released 3412 * in this function. 3413 */ 3414static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode, 3415 struct inode *inode, __u32 *gen, bool reconf) 3416{ 3417 struct ll_inode_info *lli = ll_i2info(inode); 3418 struct ll_sb_info *sbi = ll_i2sbi(inode); 3419 struct ldlm_lock *lock; 3420 struct lustre_md md = { NULL }; 3421 struct cl_object_conf conf; 3422 int rc = 0; 3423 bool lvb_ready; 3424 bool wait_layout = false; 3425 3426 LASSERT(lustre_handle_is_used(lockh)); 3427 3428 lock = ldlm_handle2lock(lockh); 3429 LASSERT(lock != NULL); 3430 LASSERT(ldlm_has_layout(lock)); 3431 3432 LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d.\n", 3433 inode, PFID(&lli->lli_fid), reconf); 3434 3435 /* in case this is a caching lock and reinstate with new inode */ 3436 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL); 3437 3438 lock_res_and_lock(lock); 3439 lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY); 3440 unlock_res_and_lock(lock); 3441 /* checking lvb_ready is racy but this is okay. The worst case is 3442 * that multi processes may configure the file on the same time. */ 3443 if (lvb_ready || !reconf) { 3444 rc = -ENODATA; 3445 if (lvb_ready) { 3446 /* layout_gen must be valid if layout lock is not 3447 * cancelled and stripe has already set */ 3448 *gen = ll_layout_version_get(lli); 3449 rc = 0; 3450 } 3451 goto out; 3452 } 3453 3454 rc = ll_layout_fetch(inode, lock); 3455 if (rc < 0) 3456 goto out; 3457 3458 /* for layout lock, lmm is returned in lock's lvb. 3459 * lvb_data is immutable if the lock is held so it's safe to access it 3460 * without res lock. See the description in ldlm_lock_decref_internal() 3461 * for the condition to free lvb_data of layout lock */ 3462 if (lock->l_lvb_data != NULL) { 3463 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm, 3464 lock->l_lvb_data, lock->l_lvb_len); 3465 if (rc >= 0) { 3466 *gen = LL_LAYOUT_GEN_EMPTY; 3467 if (md.lsm != NULL) 3468 *gen = md.lsm->lsm_layout_gen; 3469 rc = 0; 3470 } else { 3471 CERROR("%s: file "DFID" unpackmd error: %d\n", 3472 ll_get_fsname(inode->i_sb, NULL, 0), 3473 PFID(&lli->lli_fid), rc); 3474 } 3475 } 3476 if (rc < 0) 3477 goto out; 3478 3479 /* set layout to file. Unlikely this will fail as old layout was 3480 * surely eliminated */ 3481 memset(&conf, 0, sizeof(conf)); 3482 conf.coc_opc = OBJECT_CONF_SET; 3483 conf.coc_inode = inode; 3484 conf.coc_lock = lock; 3485 conf.u.coc_md = &md; 3486 rc = ll_layout_conf(inode, &conf); 3487 3488 if (md.lsm != NULL) 3489 obd_free_memmd(sbi->ll_dt_exp, &md.lsm); 3490 3491 /* refresh layout failed, need to wait */ 3492 wait_layout = rc == -EBUSY; 3493 3494out: 3495 LDLM_LOCK_PUT(lock); 3496 ldlm_lock_decref(lockh, mode); 3497 3498 /* wait for IO to complete if it's still being used. */ 3499 if (wait_layout) { 3500 CDEBUG(D_INODE, "%s: %p/"DFID" wait for layout reconf.\n", 3501 ll_get_fsname(inode->i_sb, NULL, 0), 3502 inode, PFID(&lli->lli_fid)); 3503 3504 memset(&conf, 0, sizeof(conf)); 3505 conf.coc_opc = OBJECT_CONF_WAIT; 3506 conf.coc_inode = inode; 3507 rc = ll_layout_conf(inode, &conf); 3508 if (rc == 0) 3509 rc = -EAGAIN; 3510 3511 CDEBUG(D_INODE, "file: "DFID" waiting layout return: %d.\n", 3512 PFID(&lli->lli_fid), rc); 3513 } 3514 return rc; 3515} 3516 3517/** 3518 * This function checks if there exists a LAYOUT lock on the client side, 3519 * or enqueues it if it doesn't have one in cache. 3520 * 3521 * This function will not hold layout lock so it may be revoked any time after 3522 * this function returns. Any operations depend on layout should be redone 3523 * in that case. 3524 * 3525 * This function should be called before lov_io_init() to get an uptodate 3526 * layout version, the caller should save the version number and after IO 3527 * is finished, this function should be called again to verify that layout 3528 * is not changed during IO time. 3529 */ 3530int ll_layout_refresh(struct inode *inode, __u32 *gen) 3531{ 3532 struct ll_inode_info *lli = ll_i2info(inode); 3533 struct ll_sb_info *sbi = ll_i2sbi(inode); 3534 struct md_op_data *op_data; 3535 struct lookup_intent it; 3536 struct lustre_handle lockh; 3537 ldlm_mode_t mode; 3538 struct ldlm_enqueue_info einfo = { 3539 .ei_type = LDLM_IBITS, 3540 .ei_mode = LCK_CR, 3541 .ei_cb_bl = ll_md_blocking_ast, 3542 .ei_cb_cp = ldlm_completion_ast, 3543 }; 3544 int rc; 3545 3546 *gen = ll_layout_version_get(lli); 3547 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != LL_LAYOUT_GEN_NONE) 3548 return 0; 3549 3550 /* sanity checks */ 3551 LASSERT(fid_is_sane(ll_inode2fid(inode))); 3552 LASSERT(S_ISREG(inode->i_mode)); 3553 3554 /* take layout lock mutex to enqueue layout lock exclusively. */ 3555 mutex_lock(&lli->lli_layout_mutex); 3556 3557again: 3558 /* mostly layout lock is caching on the local side, so try to match 3559 * it before grabbing layout lock mutex. */ 3560 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0, 3561 LCK_CR | LCK_CW | LCK_PR | LCK_PW); 3562 if (mode != 0) { /* hit cached lock */ 3563 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true); 3564 if (rc == -EAGAIN) 3565 goto again; 3566 3567 mutex_unlock(&lli->lli_layout_mutex); 3568 return rc; 3569 } 3570 3571 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 3572 0, 0, LUSTRE_OPC_ANY, NULL); 3573 if (IS_ERR(op_data)) { 3574 mutex_unlock(&lli->lli_layout_mutex); 3575 return PTR_ERR(op_data); 3576 } 3577 3578 /* have to enqueue one */ 3579 memset(&it, 0, sizeof(it)); 3580 it.it_op = IT_LAYOUT; 3581 lockh.cookie = 0ULL; 3582 3583 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID".\n", 3584 ll_get_fsname(inode->i_sb, NULL, 0), inode, 3585 PFID(&lli->lli_fid)); 3586 3587 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh, 3588 NULL, 0, NULL, 0); 3589 if (it.d.lustre.it_data != NULL) 3590 ptlrpc_req_finished(it.d.lustre.it_data); 3591 it.d.lustre.it_data = NULL; 3592 3593 ll_finish_md_op_data(op_data); 3594 3595 mode = it.d.lustre.it_lock_mode; 3596 it.d.lustre.it_lock_mode = 0; 3597 ll_intent_drop_lock(&it); 3598 3599 if (rc == 0) { 3600 /* set lock data in case this is a new lock */ 3601 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL); 3602 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true); 3603 if (rc == -EAGAIN) 3604 goto again; 3605 } 3606 mutex_unlock(&lli->lli_layout_mutex); 3607 3608 return rc; 3609} 3610 3611/** 3612 * This function send a restore request to the MDT 3613 */ 3614int ll_layout_restore(struct inode *inode) 3615{ 3616 struct hsm_user_request *hur; 3617 int len, rc; 3618 3619 len = sizeof(struct hsm_user_request) + 3620 sizeof(struct hsm_user_item); 3621 hur = kzalloc(len, GFP_NOFS); 3622 if (!hur) 3623 return -ENOMEM; 3624 3625 hur->hur_request.hr_action = HUA_RESTORE; 3626 hur->hur_request.hr_archive_id = 0; 3627 hur->hur_request.hr_flags = 0; 3628 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid, 3629 sizeof(hur->hur_user_item[0].hui_fid)); 3630 hur->hur_user_item[0].hui_extent.length = -1; 3631 hur->hur_request.hr_itemcount = 1; 3632 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp, 3633 len, hur, NULL); 3634 OBD_FREE(hur, len); 3635 return rc; 3636} 3637