[go: nahoru, domu]

1/*
2 * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3 * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses.  You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 *     Redistribution and use in source and binary forms, with or
15 *     without modification, are permitted provided that the following
16 *     conditions are met:
17 *
18 *      - Redistributions of source code must retain the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer.
21 *
22 *      - Redistributions in binary form must reproduce the above
23 *        copyright notice, this list of conditions and the following
24 *        disclaimer in the documentation and/or other materials
25 *        provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 */
36
37#include <linux/module.h>
38#include <linux/init.h>
39#include <linux/device.h>
40#include <linux/err.h>
41#include <linux/fs.h>
42#include <linux/poll.h>
43#include <linux/sched.h>
44#include <linux/file.h>
45#include <linux/cdev.h>
46#include <linux/anon_inodes.h>
47#include <linux/slab.h>
48
49#include <asm/uaccess.h>
50
51#include "uverbs.h"
52
53MODULE_AUTHOR("Roland Dreier");
54MODULE_DESCRIPTION("InfiniBand userspace verbs access");
55MODULE_LICENSE("Dual BSD/GPL");
56
57enum {
58	IB_UVERBS_MAJOR       = 231,
59	IB_UVERBS_BASE_MINOR  = 192,
60	IB_UVERBS_MAX_DEVICES = 32
61};
62
63#define IB_UVERBS_BASE_DEV	MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
64
65static struct class *uverbs_class;
66
67DEFINE_SPINLOCK(ib_uverbs_idr_lock);
68DEFINE_IDR(ib_uverbs_pd_idr);
69DEFINE_IDR(ib_uverbs_mr_idr);
70DEFINE_IDR(ib_uverbs_mw_idr);
71DEFINE_IDR(ib_uverbs_ah_idr);
72DEFINE_IDR(ib_uverbs_cq_idr);
73DEFINE_IDR(ib_uverbs_qp_idr);
74DEFINE_IDR(ib_uverbs_srq_idr);
75DEFINE_IDR(ib_uverbs_xrcd_idr);
76DEFINE_IDR(ib_uverbs_rule_idr);
77
78static DEFINE_SPINLOCK(map_lock);
79static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
80
81static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
82				     const char __user *buf, int in_len,
83				     int out_len) = {
84	[IB_USER_VERBS_CMD_GET_CONTEXT]		= ib_uverbs_get_context,
85	[IB_USER_VERBS_CMD_QUERY_DEVICE]	= ib_uverbs_query_device,
86	[IB_USER_VERBS_CMD_QUERY_PORT]		= ib_uverbs_query_port,
87	[IB_USER_VERBS_CMD_ALLOC_PD]		= ib_uverbs_alloc_pd,
88	[IB_USER_VERBS_CMD_DEALLOC_PD]		= ib_uverbs_dealloc_pd,
89	[IB_USER_VERBS_CMD_REG_MR]		= ib_uverbs_reg_mr,
90	[IB_USER_VERBS_CMD_REREG_MR]		= ib_uverbs_rereg_mr,
91	[IB_USER_VERBS_CMD_DEREG_MR]		= ib_uverbs_dereg_mr,
92	[IB_USER_VERBS_CMD_ALLOC_MW]		= ib_uverbs_alloc_mw,
93	[IB_USER_VERBS_CMD_DEALLOC_MW]		= ib_uverbs_dealloc_mw,
94	[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
95	[IB_USER_VERBS_CMD_CREATE_CQ]		= ib_uverbs_create_cq,
96	[IB_USER_VERBS_CMD_RESIZE_CQ]		= ib_uverbs_resize_cq,
97	[IB_USER_VERBS_CMD_POLL_CQ]		= ib_uverbs_poll_cq,
98	[IB_USER_VERBS_CMD_REQ_NOTIFY_CQ]	= ib_uverbs_req_notify_cq,
99	[IB_USER_VERBS_CMD_DESTROY_CQ]		= ib_uverbs_destroy_cq,
100	[IB_USER_VERBS_CMD_CREATE_QP]		= ib_uverbs_create_qp,
101	[IB_USER_VERBS_CMD_QUERY_QP]		= ib_uverbs_query_qp,
102	[IB_USER_VERBS_CMD_MODIFY_QP]		= ib_uverbs_modify_qp,
103	[IB_USER_VERBS_CMD_DESTROY_QP]		= ib_uverbs_destroy_qp,
104	[IB_USER_VERBS_CMD_POST_SEND]		= ib_uverbs_post_send,
105	[IB_USER_VERBS_CMD_POST_RECV]		= ib_uverbs_post_recv,
106	[IB_USER_VERBS_CMD_POST_SRQ_RECV]	= ib_uverbs_post_srq_recv,
107	[IB_USER_VERBS_CMD_CREATE_AH]		= ib_uverbs_create_ah,
108	[IB_USER_VERBS_CMD_DESTROY_AH]		= ib_uverbs_destroy_ah,
109	[IB_USER_VERBS_CMD_ATTACH_MCAST]	= ib_uverbs_attach_mcast,
110	[IB_USER_VERBS_CMD_DETACH_MCAST]	= ib_uverbs_detach_mcast,
111	[IB_USER_VERBS_CMD_CREATE_SRQ]		= ib_uverbs_create_srq,
112	[IB_USER_VERBS_CMD_MODIFY_SRQ]		= ib_uverbs_modify_srq,
113	[IB_USER_VERBS_CMD_QUERY_SRQ]		= ib_uverbs_query_srq,
114	[IB_USER_VERBS_CMD_DESTROY_SRQ]		= ib_uverbs_destroy_srq,
115	[IB_USER_VERBS_CMD_OPEN_XRCD]		= ib_uverbs_open_xrcd,
116	[IB_USER_VERBS_CMD_CLOSE_XRCD]		= ib_uverbs_close_xrcd,
117	[IB_USER_VERBS_CMD_CREATE_XSRQ]		= ib_uverbs_create_xsrq,
118	[IB_USER_VERBS_CMD_OPEN_QP]		= ib_uverbs_open_qp,
119};
120
121static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
122				    struct ib_udata *ucore,
123				    struct ib_udata *uhw) = {
124	[IB_USER_VERBS_EX_CMD_CREATE_FLOW]	= ib_uverbs_ex_create_flow,
125	[IB_USER_VERBS_EX_CMD_DESTROY_FLOW]	= ib_uverbs_ex_destroy_flow
126};
127
128static void ib_uverbs_add_one(struct ib_device *device);
129static void ib_uverbs_remove_one(struct ib_device *device);
130
131static void ib_uverbs_release_dev(struct kref *ref)
132{
133	struct ib_uverbs_device *dev =
134		container_of(ref, struct ib_uverbs_device, ref);
135
136	complete(&dev->comp);
137}
138
139static void ib_uverbs_release_event_file(struct kref *ref)
140{
141	struct ib_uverbs_event_file *file =
142		container_of(ref, struct ib_uverbs_event_file, ref);
143
144	kfree(file);
145}
146
147void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
148			  struct ib_uverbs_event_file *ev_file,
149			  struct ib_ucq_object *uobj)
150{
151	struct ib_uverbs_event *evt, *tmp;
152
153	if (ev_file) {
154		spin_lock_irq(&ev_file->lock);
155		list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
156			list_del(&evt->list);
157			kfree(evt);
158		}
159		spin_unlock_irq(&ev_file->lock);
160
161		kref_put(&ev_file->ref, ib_uverbs_release_event_file);
162	}
163
164	spin_lock_irq(&file->async_file->lock);
165	list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
166		list_del(&evt->list);
167		kfree(evt);
168	}
169	spin_unlock_irq(&file->async_file->lock);
170}
171
172void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
173			      struct ib_uevent_object *uobj)
174{
175	struct ib_uverbs_event *evt, *tmp;
176
177	spin_lock_irq(&file->async_file->lock);
178	list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
179		list_del(&evt->list);
180		kfree(evt);
181	}
182	spin_unlock_irq(&file->async_file->lock);
183}
184
185static void ib_uverbs_detach_umcast(struct ib_qp *qp,
186				    struct ib_uqp_object *uobj)
187{
188	struct ib_uverbs_mcast_entry *mcast, *tmp;
189
190	list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
191		ib_detach_mcast(qp, &mcast->gid, mcast->lid);
192		list_del(&mcast->list);
193		kfree(mcast);
194	}
195}
196
197static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
198				      struct ib_ucontext *context)
199{
200	struct ib_uobject *uobj, *tmp;
201
202	if (!context)
203		return 0;
204
205	context->closing = 1;
206
207	list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
208		struct ib_ah *ah = uobj->object;
209
210		idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
211		ib_destroy_ah(ah);
212		kfree(uobj);
213	}
214
215	/* Remove MWs before QPs, in order to support type 2A MWs. */
216	list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
217		struct ib_mw *mw = uobj->object;
218
219		idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
220		ib_dealloc_mw(mw);
221		kfree(uobj);
222	}
223
224	list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
225		struct ib_flow *flow_id = uobj->object;
226
227		idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
228		ib_destroy_flow(flow_id);
229		kfree(uobj);
230	}
231
232	list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
233		struct ib_qp *qp = uobj->object;
234		struct ib_uqp_object *uqp =
235			container_of(uobj, struct ib_uqp_object, uevent.uobject);
236
237		idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
238		if (qp != qp->real_qp) {
239			ib_close_qp(qp);
240		} else {
241			ib_uverbs_detach_umcast(qp, uqp);
242			ib_destroy_qp(qp);
243		}
244		ib_uverbs_release_uevent(file, &uqp->uevent);
245		kfree(uqp);
246	}
247
248	list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
249		struct ib_cq *cq = uobj->object;
250		struct ib_uverbs_event_file *ev_file = cq->cq_context;
251		struct ib_ucq_object *ucq =
252			container_of(uobj, struct ib_ucq_object, uobject);
253
254		idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
255		ib_destroy_cq(cq);
256		ib_uverbs_release_ucq(file, ev_file, ucq);
257		kfree(ucq);
258	}
259
260	list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
261		struct ib_srq *srq = uobj->object;
262		struct ib_uevent_object *uevent =
263			container_of(uobj, struct ib_uevent_object, uobject);
264
265		idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
266		ib_destroy_srq(srq);
267		ib_uverbs_release_uevent(file, uevent);
268		kfree(uevent);
269	}
270
271	list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
272		struct ib_mr *mr = uobj->object;
273
274		idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
275		ib_dereg_mr(mr);
276		kfree(uobj);
277	}
278
279	mutex_lock(&file->device->xrcd_tree_mutex);
280	list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
281		struct ib_xrcd *xrcd = uobj->object;
282		struct ib_uxrcd_object *uxrcd =
283			container_of(uobj, struct ib_uxrcd_object, uobject);
284
285		idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
286		ib_uverbs_dealloc_xrcd(file->device, xrcd);
287		kfree(uxrcd);
288	}
289	mutex_unlock(&file->device->xrcd_tree_mutex);
290
291	list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
292		struct ib_pd *pd = uobj->object;
293
294		idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
295		ib_dealloc_pd(pd);
296		kfree(uobj);
297	}
298
299	return context->device->dealloc_ucontext(context);
300}
301
302static void ib_uverbs_release_file(struct kref *ref)
303{
304	struct ib_uverbs_file *file =
305		container_of(ref, struct ib_uverbs_file, ref);
306
307	module_put(file->device->ib_dev->owner);
308	kref_put(&file->device->ref, ib_uverbs_release_dev);
309
310	kfree(file);
311}
312
313static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
314				    size_t count, loff_t *pos)
315{
316	struct ib_uverbs_event_file *file = filp->private_data;
317	struct ib_uverbs_event *event;
318	int eventsz;
319	int ret = 0;
320
321	spin_lock_irq(&file->lock);
322
323	while (list_empty(&file->event_list)) {
324		spin_unlock_irq(&file->lock);
325
326		if (filp->f_flags & O_NONBLOCK)
327			return -EAGAIN;
328
329		if (wait_event_interruptible(file->poll_wait,
330					     !list_empty(&file->event_list)))
331			return -ERESTARTSYS;
332
333		spin_lock_irq(&file->lock);
334	}
335
336	event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
337
338	if (file->is_async)
339		eventsz = sizeof (struct ib_uverbs_async_event_desc);
340	else
341		eventsz = sizeof (struct ib_uverbs_comp_event_desc);
342
343	if (eventsz > count) {
344		ret   = -EINVAL;
345		event = NULL;
346	} else {
347		list_del(file->event_list.next);
348		if (event->counter) {
349			++(*event->counter);
350			list_del(&event->obj_list);
351		}
352	}
353
354	spin_unlock_irq(&file->lock);
355
356	if (event) {
357		if (copy_to_user(buf, event, eventsz))
358			ret = -EFAULT;
359		else
360			ret = eventsz;
361	}
362
363	kfree(event);
364
365	return ret;
366}
367
368static unsigned int ib_uverbs_event_poll(struct file *filp,
369					 struct poll_table_struct *wait)
370{
371	unsigned int pollflags = 0;
372	struct ib_uverbs_event_file *file = filp->private_data;
373
374	poll_wait(filp, &file->poll_wait, wait);
375
376	spin_lock_irq(&file->lock);
377	if (!list_empty(&file->event_list))
378		pollflags = POLLIN | POLLRDNORM;
379	spin_unlock_irq(&file->lock);
380
381	return pollflags;
382}
383
384static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
385{
386	struct ib_uverbs_event_file *file = filp->private_data;
387
388	return fasync_helper(fd, filp, on, &file->async_queue);
389}
390
391static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
392{
393	struct ib_uverbs_event_file *file = filp->private_data;
394	struct ib_uverbs_event *entry, *tmp;
395
396	spin_lock_irq(&file->lock);
397	file->is_closed = 1;
398	list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
399		if (entry->counter)
400			list_del(&entry->obj_list);
401		kfree(entry);
402	}
403	spin_unlock_irq(&file->lock);
404
405	if (file->is_async) {
406		ib_unregister_event_handler(&file->uverbs_file->event_handler);
407		kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
408	}
409	kref_put(&file->ref, ib_uverbs_release_event_file);
410
411	return 0;
412}
413
414static const struct file_operations uverbs_event_fops = {
415	.owner	 = THIS_MODULE,
416	.read	 = ib_uverbs_event_read,
417	.poll    = ib_uverbs_event_poll,
418	.release = ib_uverbs_event_close,
419	.fasync  = ib_uverbs_event_fasync,
420	.llseek	 = no_llseek,
421};
422
423void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
424{
425	struct ib_uverbs_event_file    *file = cq_context;
426	struct ib_ucq_object	       *uobj;
427	struct ib_uverbs_event	       *entry;
428	unsigned long			flags;
429
430	if (!file)
431		return;
432
433	spin_lock_irqsave(&file->lock, flags);
434	if (file->is_closed) {
435		spin_unlock_irqrestore(&file->lock, flags);
436		return;
437	}
438
439	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
440	if (!entry) {
441		spin_unlock_irqrestore(&file->lock, flags);
442		return;
443	}
444
445	uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
446
447	entry->desc.comp.cq_handle = cq->uobject->user_handle;
448	entry->counter		   = &uobj->comp_events_reported;
449
450	list_add_tail(&entry->list, &file->event_list);
451	list_add_tail(&entry->obj_list, &uobj->comp_list);
452	spin_unlock_irqrestore(&file->lock, flags);
453
454	wake_up_interruptible(&file->poll_wait);
455	kill_fasync(&file->async_queue, SIGIO, POLL_IN);
456}
457
458static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
459				    __u64 element, __u64 event,
460				    struct list_head *obj_list,
461				    u32 *counter)
462{
463	struct ib_uverbs_event *entry;
464	unsigned long flags;
465
466	spin_lock_irqsave(&file->async_file->lock, flags);
467	if (file->async_file->is_closed) {
468		spin_unlock_irqrestore(&file->async_file->lock, flags);
469		return;
470	}
471
472	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
473	if (!entry) {
474		spin_unlock_irqrestore(&file->async_file->lock, flags);
475		return;
476	}
477
478	entry->desc.async.element    = element;
479	entry->desc.async.event_type = event;
480	entry->desc.async.reserved   = 0;
481	entry->counter               = counter;
482
483	list_add_tail(&entry->list, &file->async_file->event_list);
484	if (obj_list)
485		list_add_tail(&entry->obj_list, obj_list);
486	spin_unlock_irqrestore(&file->async_file->lock, flags);
487
488	wake_up_interruptible(&file->async_file->poll_wait);
489	kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
490}
491
492void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
493{
494	struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
495						  struct ib_ucq_object, uobject);
496
497	ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
498				event->event, &uobj->async_list,
499				&uobj->async_events_reported);
500}
501
502void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
503{
504	struct ib_uevent_object *uobj;
505
506	/* for XRC target qp's, check that qp is live */
507	if (!event->element.qp->uobject || !event->element.qp->uobject->live)
508		return;
509
510	uobj = container_of(event->element.qp->uobject,
511			    struct ib_uevent_object, uobject);
512
513	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
514				event->event, &uobj->event_list,
515				&uobj->events_reported);
516}
517
518void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
519{
520	struct ib_uevent_object *uobj;
521
522	uobj = container_of(event->element.srq->uobject,
523			    struct ib_uevent_object, uobject);
524
525	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
526				event->event, &uobj->event_list,
527				&uobj->events_reported);
528}
529
530void ib_uverbs_event_handler(struct ib_event_handler *handler,
531			     struct ib_event *event)
532{
533	struct ib_uverbs_file *file =
534		container_of(handler, struct ib_uverbs_file, event_handler);
535
536	ib_uverbs_async_handler(file, event->element.port_num, event->event,
537				NULL, NULL);
538}
539
540struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
541					int is_async)
542{
543	struct ib_uverbs_event_file *ev_file;
544	struct file *filp;
545
546	ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL);
547	if (!ev_file)
548		return ERR_PTR(-ENOMEM);
549
550	kref_init(&ev_file->ref);
551	spin_lock_init(&ev_file->lock);
552	INIT_LIST_HEAD(&ev_file->event_list);
553	init_waitqueue_head(&ev_file->poll_wait);
554	ev_file->uverbs_file = uverbs_file;
555	ev_file->async_queue = NULL;
556	ev_file->is_async    = is_async;
557	ev_file->is_closed   = 0;
558
559	filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
560				  ev_file, O_RDONLY);
561	if (IS_ERR(filp))
562		kfree(ev_file);
563
564	return filp;
565}
566
567/*
568 * Look up a completion event file by FD.  If lookup is successful,
569 * takes a ref to the event file struct that it returns; if
570 * unsuccessful, returns NULL.
571 */
572struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
573{
574	struct ib_uverbs_event_file *ev_file = NULL;
575	struct fd f = fdget(fd);
576
577	if (!f.file)
578		return NULL;
579
580	if (f.file->f_op != &uverbs_event_fops)
581		goto out;
582
583	ev_file = f.file->private_data;
584	if (ev_file->is_async) {
585		ev_file = NULL;
586		goto out;
587	}
588
589	kref_get(&ev_file->ref);
590
591out:
592	fdput(f);
593	return ev_file;
594}
595
596static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
597			     size_t count, loff_t *pos)
598{
599	struct ib_uverbs_file *file = filp->private_data;
600	struct ib_uverbs_cmd_hdr hdr;
601	__u32 flags;
602
603	if (count < sizeof hdr)
604		return -EINVAL;
605
606	if (copy_from_user(&hdr, buf, sizeof hdr))
607		return -EFAULT;
608
609	flags = (hdr.command &
610		 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
611
612	if (!flags) {
613		__u32 command;
614
615		if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
616					   IB_USER_VERBS_CMD_COMMAND_MASK))
617			return -EINVAL;
618
619		command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
620
621		if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
622		    !uverbs_cmd_table[command])
623			return -EINVAL;
624
625		if (!file->ucontext &&
626		    command != IB_USER_VERBS_CMD_GET_CONTEXT)
627			return -EINVAL;
628
629		if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << command)))
630			return -ENOSYS;
631
632		if (hdr.in_words * 4 != count)
633			return -EINVAL;
634
635		return uverbs_cmd_table[command](file,
636						 buf + sizeof(hdr),
637						 hdr.in_words * 4,
638						 hdr.out_words * 4);
639
640	} else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
641		__u32 command;
642
643		struct ib_uverbs_ex_cmd_hdr ex_hdr;
644		struct ib_udata ucore;
645		struct ib_udata uhw;
646		int err;
647		size_t written_count = count;
648
649		if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
650					   IB_USER_VERBS_CMD_COMMAND_MASK))
651			return -EINVAL;
652
653		command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
654
655		if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
656		    !uverbs_ex_cmd_table[command])
657			return -ENOSYS;
658
659		if (!file->ucontext)
660			return -EINVAL;
661
662		if (!(file->device->ib_dev->uverbs_ex_cmd_mask & (1ull << command)))
663			return -ENOSYS;
664
665		if (count < (sizeof(hdr) + sizeof(ex_hdr)))
666			return -EINVAL;
667
668		if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
669			return -EFAULT;
670
671		count -= sizeof(hdr) + sizeof(ex_hdr);
672		buf += sizeof(hdr) + sizeof(ex_hdr);
673
674		if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
675			return -EINVAL;
676
677		if (ex_hdr.cmd_hdr_reserved)
678			return -EINVAL;
679
680		if (ex_hdr.response) {
681			if (!hdr.out_words && !ex_hdr.provider_out_words)
682				return -EINVAL;
683
684			if (!access_ok(VERIFY_WRITE,
685				       (void __user *) (unsigned long) ex_hdr.response,
686				       (hdr.out_words + ex_hdr.provider_out_words) * 8))
687				return -EFAULT;
688		} else {
689			if (hdr.out_words || ex_hdr.provider_out_words)
690				return -EINVAL;
691		}
692
693		INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response,
694				       hdr.in_words * 8, hdr.out_words * 8);
695
696		INIT_UDATA_BUF_OR_NULL(&uhw,
697				       buf + ucore.inlen,
698				       (unsigned long) ex_hdr.response + ucore.outlen,
699				       ex_hdr.provider_in_words * 8,
700				       ex_hdr.provider_out_words * 8);
701
702		err = uverbs_ex_cmd_table[command](file,
703						   &ucore,
704						   &uhw);
705
706		if (err)
707			return err;
708
709		return written_count;
710	}
711
712	return -ENOSYS;
713}
714
715static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
716{
717	struct ib_uverbs_file *file = filp->private_data;
718
719	if (!file->ucontext)
720		return -ENODEV;
721	else
722		return file->device->ib_dev->mmap(file->ucontext, vma);
723}
724
725/*
726 * ib_uverbs_open() does not need the BKL:
727 *
728 *  - the ib_uverbs_device structures are properly reference counted and
729 *    everything else is purely local to the file being created, so
730 *    races against other open calls are not a problem;
731 *  - there is no ioctl method to race against;
732 *  - the open method will either immediately run -ENXIO, or all
733 *    required initialization will be done.
734 */
735static int ib_uverbs_open(struct inode *inode, struct file *filp)
736{
737	struct ib_uverbs_device *dev;
738	struct ib_uverbs_file *file;
739	int ret;
740
741	dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
742	if (dev)
743		kref_get(&dev->ref);
744	else
745		return -ENXIO;
746
747	if (!try_module_get(dev->ib_dev->owner)) {
748		ret = -ENODEV;
749		goto err;
750	}
751
752	file = kmalloc(sizeof *file, GFP_KERNEL);
753	if (!file) {
754		ret = -ENOMEM;
755		goto err_module;
756	}
757
758	file->device	 = dev;
759	file->ucontext	 = NULL;
760	file->async_file = NULL;
761	kref_init(&file->ref);
762	mutex_init(&file->mutex);
763
764	filp->private_data = file;
765
766	return nonseekable_open(inode, filp);
767
768err_module:
769	module_put(dev->ib_dev->owner);
770
771err:
772	kref_put(&dev->ref, ib_uverbs_release_dev);
773	return ret;
774}
775
776static int ib_uverbs_close(struct inode *inode, struct file *filp)
777{
778	struct ib_uverbs_file *file = filp->private_data;
779
780	ib_uverbs_cleanup_ucontext(file, file->ucontext);
781
782	if (file->async_file)
783		kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
784
785	kref_put(&file->ref, ib_uverbs_release_file);
786
787	return 0;
788}
789
790static const struct file_operations uverbs_fops = {
791	.owner	 = THIS_MODULE,
792	.write	 = ib_uverbs_write,
793	.open	 = ib_uverbs_open,
794	.release = ib_uverbs_close,
795	.llseek	 = no_llseek,
796};
797
798static const struct file_operations uverbs_mmap_fops = {
799	.owner	 = THIS_MODULE,
800	.write	 = ib_uverbs_write,
801	.mmap    = ib_uverbs_mmap,
802	.open	 = ib_uverbs_open,
803	.release = ib_uverbs_close,
804	.llseek	 = no_llseek,
805};
806
807static struct ib_client uverbs_client = {
808	.name   = "uverbs",
809	.add    = ib_uverbs_add_one,
810	.remove = ib_uverbs_remove_one
811};
812
813static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
814			  char *buf)
815{
816	struct ib_uverbs_device *dev = dev_get_drvdata(device);
817
818	if (!dev)
819		return -ENODEV;
820
821	return sprintf(buf, "%s\n", dev->ib_dev->name);
822}
823static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
824
825static ssize_t show_dev_abi_version(struct device *device,
826				    struct device_attribute *attr, char *buf)
827{
828	struct ib_uverbs_device *dev = dev_get_drvdata(device);
829
830	if (!dev)
831		return -ENODEV;
832
833	return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver);
834}
835static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
836
837static CLASS_ATTR_STRING(abi_version, S_IRUGO,
838			 __stringify(IB_USER_VERBS_ABI_VERSION));
839
840static dev_t overflow_maj;
841static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
842
843/*
844 * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
845 * requesting a new major number and doubling the number of max devices we
846 * support. It's stupid, but simple.
847 */
848static int find_overflow_devnum(void)
849{
850	int ret;
851
852	if (!overflow_maj) {
853		ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
854					  "infiniband_verbs");
855		if (ret) {
856			printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n");
857			return ret;
858		}
859	}
860
861	ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
862	if (ret >= IB_UVERBS_MAX_DEVICES)
863		return -1;
864
865	return ret;
866}
867
868static void ib_uverbs_add_one(struct ib_device *device)
869{
870	int devnum;
871	dev_t base;
872	struct ib_uverbs_device *uverbs_dev;
873
874	if (!device->alloc_ucontext)
875		return;
876
877	uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
878	if (!uverbs_dev)
879		return;
880
881	kref_init(&uverbs_dev->ref);
882	init_completion(&uverbs_dev->comp);
883	uverbs_dev->xrcd_tree = RB_ROOT;
884	mutex_init(&uverbs_dev->xrcd_tree_mutex);
885
886	spin_lock(&map_lock);
887	devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
888	if (devnum >= IB_UVERBS_MAX_DEVICES) {
889		spin_unlock(&map_lock);
890		devnum = find_overflow_devnum();
891		if (devnum < 0)
892			goto err;
893
894		spin_lock(&map_lock);
895		uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
896		base = devnum + overflow_maj;
897		set_bit(devnum, overflow_map);
898	} else {
899		uverbs_dev->devnum = devnum;
900		base = devnum + IB_UVERBS_BASE_DEV;
901		set_bit(devnum, dev_map);
902	}
903	spin_unlock(&map_lock);
904
905	uverbs_dev->ib_dev           = device;
906	uverbs_dev->num_comp_vectors = device->num_comp_vectors;
907
908	cdev_init(&uverbs_dev->cdev, NULL);
909	uverbs_dev->cdev.owner = THIS_MODULE;
910	uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
911	kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
912	if (cdev_add(&uverbs_dev->cdev, base, 1))
913		goto err_cdev;
914
915	uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
916					uverbs_dev->cdev.dev, uverbs_dev,
917					"uverbs%d", uverbs_dev->devnum);
918	if (IS_ERR(uverbs_dev->dev))
919		goto err_cdev;
920
921	if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
922		goto err_class;
923	if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
924		goto err_class;
925
926	ib_set_client_data(device, &uverbs_client, uverbs_dev);
927
928	return;
929
930err_class:
931	device_destroy(uverbs_class, uverbs_dev->cdev.dev);
932
933err_cdev:
934	cdev_del(&uverbs_dev->cdev);
935	if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
936		clear_bit(devnum, dev_map);
937	else
938		clear_bit(devnum, overflow_map);
939
940err:
941	kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
942	wait_for_completion(&uverbs_dev->comp);
943	kfree(uverbs_dev);
944	return;
945}
946
947static void ib_uverbs_remove_one(struct ib_device *device)
948{
949	struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);
950
951	if (!uverbs_dev)
952		return;
953
954	dev_set_drvdata(uverbs_dev->dev, NULL);
955	device_destroy(uverbs_class, uverbs_dev->cdev.dev);
956	cdev_del(&uverbs_dev->cdev);
957
958	if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
959		clear_bit(uverbs_dev->devnum, dev_map);
960	else
961		clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
962
963	kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
964	wait_for_completion(&uverbs_dev->comp);
965	kfree(uverbs_dev);
966}
967
968static char *uverbs_devnode(struct device *dev, umode_t *mode)
969{
970	if (mode)
971		*mode = 0666;
972	return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
973}
974
975static int __init ib_uverbs_init(void)
976{
977	int ret;
978
979	ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
980				     "infiniband_verbs");
981	if (ret) {
982		printk(KERN_ERR "user_verbs: couldn't register device number\n");
983		goto out;
984	}
985
986	uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
987	if (IS_ERR(uverbs_class)) {
988		ret = PTR_ERR(uverbs_class);
989		printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
990		goto out_chrdev;
991	}
992
993	uverbs_class->devnode = uverbs_devnode;
994
995	ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
996	if (ret) {
997		printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
998		goto out_class;
999	}
1000
1001	ret = ib_register_client(&uverbs_client);
1002	if (ret) {
1003		printk(KERN_ERR "user_verbs: couldn't register client\n");
1004		goto out_class;
1005	}
1006
1007	return 0;
1008
1009out_class:
1010	class_destroy(uverbs_class);
1011
1012out_chrdev:
1013	unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
1014
1015out:
1016	return ret;
1017}
1018
1019static void __exit ib_uverbs_cleanup(void)
1020{
1021	ib_unregister_client(&uverbs_client);
1022	class_destroy(uverbs_class);
1023	unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
1024	if (overflow_maj)
1025		unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
1026	idr_destroy(&ib_uverbs_pd_idr);
1027	idr_destroy(&ib_uverbs_mr_idr);
1028	idr_destroy(&ib_uverbs_mw_idr);
1029	idr_destroy(&ib_uverbs_ah_idr);
1030	idr_destroy(&ib_uverbs_cq_idr);
1031	idr_destroy(&ib_uverbs_qp_idr);
1032	idr_destroy(&ib_uverbs_srq_idr);
1033}
1034
1035module_init(ib_uverbs_init);
1036module_exit(ib_uverbs_cleanup);
1037