[go: nahoru, domu]

1/*
2   drbd_nl.c
3
4   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10   drbd is free software; you can redistribute it and/or modify
11   it under the terms of the GNU General Public License as published by
12   the Free Software Foundation; either version 2, or (at your option)
13   any later version.
14
15   drbd is distributed in the hope that it will be useful,
16   but WITHOUT ANY WARRANTY; without even the implied warranty of
17   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18   GNU General Public License for more details.
19
20   You should have received a copy of the GNU General Public License
21   along with drbd; see the file COPYING.  If not, write to
22   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
26#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
27
28#include <linux/module.h>
29#include <linux/drbd.h>
30#include <linux/in.h>
31#include <linux/fs.h>
32#include <linux/file.h>
33#include <linux/slab.h>
34#include <linux/blkpg.h>
35#include <linux/cpumask.h>
36#include "drbd_int.h"
37#include "drbd_protocol.h"
38#include "drbd_req.h"
39#include <asm/unaligned.h>
40#include <linux/drbd_limits.h>
41#include <linux/kthread.h>
42
43#include <net/genetlink.h>
44
45/* .doit */
46// int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
47// int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
48
49int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
50int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
51
52int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
53int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
54int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
55
56int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
57int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
58int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
59int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
60int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
61int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
62int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
63int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
64int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
65int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
66int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
67int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
68int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
69int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
70int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
71int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
72int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
73int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
74int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
75int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
76/* .dumpit */
77int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
78
79#include <linux/drbd_genl_api.h>
80#include "drbd_nla.h"
81#include <linux/genl_magic_func.h>
82
83/* used blkdev_get_by_path, to claim our meta data device(s) */
84static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
85
86static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
87{
88	genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
89	if (genlmsg_reply(skb, info))
90		pr_err("error sending genl reply\n");
91}
92
93/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
94 * reason it could fail was no space in skb, and there are 4k available. */
95int drbd_msg_put_info(struct sk_buff *skb, const char *info)
96{
97	struct nlattr *nla;
98	int err = -EMSGSIZE;
99
100	if (!info || !info[0])
101		return 0;
102
103	nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
104	if (!nla)
105		return err;
106
107	err = nla_put_string(skb, T_info_text, info);
108	if (err) {
109		nla_nest_cancel(skb, nla);
110		return err;
111	} else
112		nla_nest_end(skb, nla);
113	return 0;
114}
115
116/* This would be a good candidate for a "pre_doit" hook,
117 * and per-family private info->pointers.
118 * But we need to stay compatible with older kernels.
119 * If it returns successfully, adm_ctx members are valid.
120 *
121 * At this point, we still rely on the global genl_lock().
122 * If we want to avoid that, and allow "genl_family.parallel_ops", we may need
123 * to add additional synchronization against object destruction/modification.
124 */
125#define DRBD_ADM_NEED_MINOR	1
126#define DRBD_ADM_NEED_RESOURCE	2
127#define DRBD_ADM_NEED_CONNECTION 4
128static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
129	struct sk_buff *skb, struct genl_info *info, unsigned flags)
130{
131	struct drbd_genlmsghdr *d_in = info->userhdr;
132	const u8 cmd = info->genlhdr->cmd;
133	int err;
134
135	memset(adm_ctx, 0, sizeof(*adm_ctx));
136
137	/* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
138	if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
139	       return -EPERM;
140
141	adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
142	if (!adm_ctx->reply_skb) {
143		err = -ENOMEM;
144		goto fail;
145	}
146
147	adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb,
148					info, &drbd_genl_family, 0, cmd);
149	/* put of a few bytes into a fresh skb of >= 4k will always succeed.
150	 * but anyways */
151	if (!adm_ctx->reply_dh) {
152		err = -ENOMEM;
153		goto fail;
154	}
155
156	adm_ctx->reply_dh->minor = d_in->minor;
157	adm_ctx->reply_dh->ret_code = NO_ERROR;
158
159	adm_ctx->volume = VOLUME_UNSPECIFIED;
160	if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
161		struct nlattr *nla;
162		/* parse and validate only */
163		err = drbd_cfg_context_from_attrs(NULL, info);
164		if (err)
165			goto fail;
166
167		/* It was present, and valid,
168		 * copy it over to the reply skb. */
169		err = nla_put_nohdr(adm_ctx->reply_skb,
170				info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
171				info->attrs[DRBD_NLA_CFG_CONTEXT]);
172		if (err)
173			goto fail;
174
175		/* and assign stuff to the adm_ctx */
176		nla = nested_attr_tb[__nla_type(T_ctx_volume)];
177		if (nla)
178			adm_ctx->volume = nla_get_u32(nla);
179		nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
180		if (nla)
181			adm_ctx->resource_name = nla_data(nla);
182		adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
183		adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
184		if ((adm_ctx->my_addr &&
185		     nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
186		    (adm_ctx->peer_addr &&
187		     nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
188			err = -EINVAL;
189			goto fail;
190		}
191	}
192
193	adm_ctx->minor = d_in->minor;
194	adm_ctx->device = minor_to_device(d_in->minor);
195
196	/* We are protected by the global genl_lock().
197	 * But we may explicitly drop it/retake it in drbd_adm_set_role(),
198	 * so make sure this object stays around. */
199	if (adm_ctx->device)
200		kref_get(&adm_ctx->device->kref);
201
202	if (adm_ctx->resource_name) {
203		adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name);
204	}
205
206	if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
207		drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor");
208		return ERR_MINOR_INVALID;
209	}
210	if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
211		drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource");
212		if (adm_ctx->resource_name)
213			return ERR_RES_NOT_KNOWN;
214		return ERR_INVALID_REQUEST;
215	}
216
217	if (flags & DRBD_ADM_NEED_CONNECTION) {
218		if (adm_ctx->resource) {
219			drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected");
220			return ERR_INVALID_REQUEST;
221		}
222		if (adm_ctx->device) {
223			drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected");
224			return ERR_INVALID_REQUEST;
225		}
226		if (adm_ctx->my_addr && adm_ctx->peer_addr)
227			adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr),
228							  nla_len(adm_ctx->my_addr),
229							  nla_data(adm_ctx->peer_addr),
230							  nla_len(adm_ctx->peer_addr));
231		if (!adm_ctx->connection) {
232			drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection");
233			return ERR_INVALID_REQUEST;
234		}
235	}
236
237	/* some more paranoia, if the request was over-determined */
238	if (adm_ctx->device && adm_ctx->resource &&
239	    adm_ctx->device->resource != adm_ctx->resource) {
240		pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
241				adm_ctx->minor, adm_ctx->resource->name,
242				adm_ctx->device->resource->name);
243		drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
244		return ERR_INVALID_REQUEST;
245	}
246	if (adm_ctx->device &&
247	    adm_ctx->volume != VOLUME_UNSPECIFIED &&
248	    adm_ctx->volume != adm_ctx->device->vnr) {
249		pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
250				adm_ctx->minor, adm_ctx->volume,
251				adm_ctx->device->vnr,
252				adm_ctx->device->resource->name);
253		drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
254		return ERR_INVALID_REQUEST;
255	}
256
257	/* still, provide adm_ctx->resource always, if possible. */
258	if (!adm_ctx->resource) {
259		adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource
260			: adm_ctx->connection ? adm_ctx->connection->resource : NULL;
261		if (adm_ctx->resource)
262			kref_get(&adm_ctx->resource->kref);
263	}
264
265	return NO_ERROR;
266
267fail:
268	nlmsg_free(adm_ctx->reply_skb);
269	adm_ctx->reply_skb = NULL;
270	return err;
271}
272
273static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
274	struct genl_info *info, int retcode)
275{
276	if (adm_ctx->device) {
277		kref_put(&adm_ctx->device->kref, drbd_destroy_device);
278		adm_ctx->device = NULL;
279	}
280	if (adm_ctx->connection) {
281		kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection);
282		adm_ctx->connection = NULL;
283	}
284	if (adm_ctx->resource) {
285		kref_put(&adm_ctx->resource->kref, drbd_destroy_resource);
286		adm_ctx->resource = NULL;
287	}
288
289	if (!adm_ctx->reply_skb)
290		return -ENOMEM;
291
292	adm_ctx->reply_dh->ret_code = retcode;
293	drbd_adm_send_reply(adm_ctx->reply_skb, info);
294	return 0;
295}
296
297static void setup_khelper_env(struct drbd_connection *connection, char **envp)
298{
299	char *afs;
300
301	/* FIXME: A future version will not allow this case. */
302	if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
303		return;
304
305	switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
306	case AF_INET6:
307		afs = "ipv6";
308		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
309			 &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
310		break;
311	case AF_INET:
312		afs = "ipv4";
313		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
314			 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
315		break;
316	default:
317		afs = "ssocks";
318		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
319			 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
320	}
321	snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
322}
323
324int drbd_khelper(struct drbd_device *device, char *cmd)
325{
326	char *envp[] = { "HOME=/",
327			"TERM=linux",
328			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
329			 (char[20]) { }, /* address family */
330			 (char[60]) { }, /* address */
331			NULL };
332	char mb[12];
333	char *argv[] = {usermode_helper, cmd, mb, NULL };
334	struct drbd_connection *connection = first_peer_device(device)->connection;
335	struct sib_info sib;
336	int ret;
337
338	if (current == connection->worker.task)
339		set_bit(CALLBACK_PENDING, &connection->flags);
340
341	snprintf(mb, 12, "minor-%d", device_to_minor(device));
342	setup_khelper_env(connection, envp);
343
344	/* The helper may take some time.
345	 * write out any unsynced meta data changes now */
346	drbd_md_sync(device);
347
348	drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
349	sib.sib_reason = SIB_HELPER_PRE;
350	sib.helper_name = cmd;
351	drbd_bcast_event(device, &sib);
352	ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
353	if (ret)
354		drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
355				usermode_helper, cmd, mb,
356				(ret >> 8) & 0xff, ret);
357	else
358		drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
359				usermode_helper, cmd, mb,
360				(ret >> 8) & 0xff, ret);
361	sib.sib_reason = SIB_HELPER_POST;
362	sib.helper_exit_code = ret;
363	drbd_bcast_event(device, &sib);
364
365	if (current == connection->worker.task)
366		clear_bit(CALLBACK_PENDING, &connection->flags);
367
368	if (ret < 0) /* Ignore any ERRNOs we got. */
369		ret = 0;
370
371	return ret;
372}
373
374static int conn_khelper(struct drbd_connection *connection, char *cmd)
375{
376	char *envp[] = { "HOME=/",
377			"TERM=linux",
378			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
379			 (char[20]) { }, /* address family */
380			 (char[60]) { }, /* address */
381			NULL };
382	char *resource_name = connection->resource->name;
383	char *argv[] = {usermode_helper, cmd, resource_name, NULL };
384	int ret;
385
386	setup_khelper_env(connection, envp);
387	conn_md_sync(connection);
388
389	drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
390	/* TODO: conn_bcast_event() ?? */
391
392	ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
393	if (ret)
394		drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
395			  usermode_helper, cmd, resource_name,
396			  (ret >> 8) & 0xff, ret);
397	else
398		drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
399			  usermode_helper, cmd, resource_name,
400			  (ret >> 8) & 0xff, ret);
401	/* TODO: conn_bcast_event() ?? */
402
403	if (ret < 0) /* Ignore any ERRNOs we got. */
404		ret = 0;
405
406	return ret;
407}
408
409static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
410{
411	enum drbd_fencing_p fp = FP_NOT_AVAIL;
412	struct drbd_peer_device *peer_device;
413	int vnr;
414
415	rcu_read_lock();
416	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
417		struct drbd_device *device = peer_device->device;
418		if (get_ldev_if_state(device, D_CONSISTENT)) {
419			struct disk_conf *disk_conf =
420				rcu_dereference(peer_device->device->ldev->disk_conf);
421			fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
422			put_ldev(device);
423		}
424	}
425	rcu_read_unlock();
426
427	if (fp == FP_NOT_AVAIL) {
428		/* IO Suspending works on the whole resource.
429		   Do it only for one device. */
430		vnr = 0;
431		peer_device = idr_get_next(&connection->peer_devices, &vnr);
432		drbd_change_state(peer_device->device, CS_VERBOSE | CS_HARD, NS(susp_fen, 0));
433	}
434
435	return fp;
436}
437
438bool conn_try_outdate_peer(struct drbd_connection *connection)
439{
440	unsigned int connect_cnt;
441	union drbd_state mask = { };
442	union drbd_state val = { };
443	enum drbd_fencing_p fp;
444	char *ex_to_string;
445	int r;
446
447	spin_lock_irq(&connection->resource->req_lock);
448	if (connection->cstate >= C_WF_REPORT_PARAMS) {
449		drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
450		spin_unlock_irq(&connection->resource->req_lock);
451		return false;
452	}
453
454	connect_cnt = connection->connect_cnt;
455	spin_unlock_irq(&connection->resource->req_lock);
456
457	fp = highest_fencing_policy(connection);
458	switch (fp) {
459	case FP_NOT_AVAIL:
460		drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
461		goto out;
462	case FP_DONT_CARE:
463		return true;
464	default: ;
465	}
466
467	r = conn_khelper(connection, "fence-peer");
468
469	switch ((r>>8) & 0xff) {
470	case 3: /* peer is inconsistent */
471		ex_to_string = "peer is inconsistent or worse";
472		mask.pdsk = D_MASK;
473		val.pdsk = D_INCONSISTENT;
474		break;
475	case 4: /* peer got outdated, or was already outdated */
476		ex_to_string = "peer was fenced";
477		mask.pdsk = D_MASK;
478		val.pdsk = D_OUTDATED;
479		break;
480	case 5: /* peer was down */
481		if (conn_highest_disk(connection) == D_UP_TO_DATE) {
482			/* we will(have) create(d) a new UUID anyways... */
483			ex_to_string = "peer is unreachable, assumed to be dead";
484			mask.pdsk = D_MASK;
485			val.pdsk = D_OUTDATED;
486		} else {
487			ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
488		}
489		break;
490	case 6: /* Peer is primary, voluntarily outdate myself.
491		 * This is useful when an unconnected R_SECONDARY is asked to
492		 * become R_PRIMARY, but finds the other peer being active. */
493		ex_to_string = "peer is active";
494		drbd_warn(connection, "Peer is primary, outdating myself.\n");
495		mask.disk = D_MASK;
496		val.disk = D_OUTDATED;
497		break;
498	case 7:
499		if (fp != FP_STONITH)
500			drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
501		ex_to_string = "peer was stonithed";
502		mask.pdsk = D_MASK;
503		val.pdsk = D_OUTDATED;
504		break;
505	default:
506		/* The script is broken ... */
507		drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
508		return false; /* Eventually leave IO frozen */
509	}
510
511	drbd_info(connection, "fence-peer helper returned %d (%s)\n",
512		  (r>>8) & 0xff, ex_to_string);
513
514 out:
515
516	/* Not using
517	   conn_request_state(connection, mask, val, CS_VERBOSE);
518	   here, because we might were able to re-establish the connection in the
519	   meantime. */
520	spin_lock_irq(&connection->resource->req_lock);
521	if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
522		if (connection->connect_cnt != connect_cnt)
523			/* In case the connection was established and droped
524			   while the fence-peer handler was running, ignore it */
525			drbd_info(connection, "Ignoring fence-peer exit code\n");
526		else
527			_conn_request_state(connection, mask, val, CS_VERBOSE);
528	}
529	spin_unlock_irq(&connection->resource->req_lock);
530
531	return conn_highest_pdsk(connection) <= D_OUTDATED;
532}
533
534static int _try_outdate_peer_async(void *data)
535{
536	struct drbd_connection *connection = (struct drbd_connection *)data;
537
538	conn_try_outdate_peer(connection);
539
540	kref_put(&connection->kref, drbd_destroy_connection);
541	return 0;
542}
543
544void conn_try_outdate_peer_async(struct drbd_connection *connection)
545{
546	struct task_struct *opa;
547
548	kref_get(&connection->kref);
549	/* We may just have force_sig()'ed this thread
550	 * to get it out of some blocking network function.
551	 * Clear signals; otherwise kthread_run(), which internally uses
552	 * wait_on_completion_killable(), will mistake our pending signal
553	 * for a new fatal signal and fail. */
554	flush_signals(current);
555	opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
556	if (IS_ERR(opa)) {
557		drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
558		kref_put(&connection->kref, drbd_destroy_connection);
559	}
560}
561
562enum drbd_state_rv
563drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force)
564{
565	struct drbd_peer_device *const peer_device = first_peer_device(device);
566	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
567	const int max_tries = 4;
568	enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
569	struct net_conf *nc;
570	int try = 0;
571	int forced = 0;
572	union drbd_state mask, val;
573
574	if (new_role == R_PRIMARY) {
575		struct drbd_connection *connection;
576
577		/* Detect dead peers as soon as possible.  */
578
579		rcu_read_lock();
580		for_each_connection(connection, device->resource)
581			request_ping(connection);
582		rcu_read_unlock();
583	}
584
585	mutex_lock(device->state_mutex);
586
587	mask.i = 0; mask.role = R_MASK;
588	val.i  = 0; val.role  = new_role;
589
590	while (try++ < max_tries) {
591		rv = _drbd_request_state(device, mask, val, CS_WAIT_COMPLETE);
592
593		/* in case we first succeeded to outdate,
594		 * but now suddenly could establish a connection */
595		if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
596			val.pdsk = 0;
597			mask.pdsk = 0;
598			continue;
599		}
600
601		if (rv == SS_NO_UP_TO_DATE_DISK && force &&
602		    (device->state.disk < D_UP_TO_DATE &&
603		     device->state.disk >= D_INCONSISTENT)) {
604			mask.disk = D_MASK;
605			val.disk  = D_UP_TO_DATE;
606			forced = 1;
607			continue;
608		}
609
610		if (rv == SS_NO_UP_TO_DATE_DISK &&
611		    device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
612			D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
613
614			if (conn_try_outdate_peer(connection)) {
615				val.disk = D_UP_TO_DATE;
616				mask.disk = D_MASK;
617			}
618			continue;
619		}
620
621		if (rv == SS_NOTHING_TO_DO)
622			goto out;
623		if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
624			if (!conn_try_outdate_peer(connection) && force) {
625				drbd_warn(device, "Forced into split brain situation!\n");
626				mask.pdsk = D_MASK;
627				val.pdsk  = D_OUTDATED;
628
629			}
630			continue;
631		}
632		if (rv == SS_TWO_PRIMARIES) {
633			/* Maybe the peer is detected as dead very soon...
634			   retry at most once more in this case. */
635			int timeo;
636			rcu_read_lock();
637			nc = rcu_dereference(connection->net_conf);
638			timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
639			rcu_read_unlock();
640			schedule_timeout_interruptible(timeo);
641			if (try < max_tries)
642				try = max_tries - 1;
643			continue;
644		}
645		if (rv < SS_SUCCESS) {
646			rv = _drbd_request_state(device, mask, val,
647						CS_VERBOSE + CS_WAIT_COMPLETE);
648			if (rv < SS_SUCCESS)
649				goto out;
650		}
651		break;
652	}
653
654	if (rv < SS_SUCCESS)
655		goto out;
656
657	if (forced)
658		drbd_warn(device, "Forced to consider local data as UpToDate!\n");
659
660	/* Wait until nothing is on the fly :) */
661	wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
662
663	/* FIXME also wait for all pending P_BARRIER_ACK? */
664
665	if (new_role == R_SECONDARY) {
666		if (get_ldev(device)) {
667			device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
668			put_ldev(device);
669		}
670	} else {
671		mutex_lock(&device->resource->conf_update);
672		nc = connection->net_conf;
673		if (nc)
674			nc->discard_my_data = 0; /* without copy; single bit op is atomic */
675		mutex_unlock(&device->resource->conf_update);
676
677		if (get_ldev(device)) {
678			if (((device->state.conn < C_CONNECTED ||
679			       device->state.pdsk <= D_FAILED)
680			      && device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
681				drbd_uuid_new_current(device);
682
683			device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
684			put_ldev(device);
685		}
686	}
687
688	/* writeout of activity log covered areas of the bitmap
689	 * to stable storage done in after state change already */
690
691	if (device->state.conn >= C_WF_REPORT_PARAMS) {
692		/* if this was forced, we should consider sync */
693		if (forced)
694			drbd_send_uuids(peer_device);
695		drbd_send_current_state(peer_device);
696	}
697
698	drbd_md_sync(device);
699	set_disk_ro(device->vdisk, new_role == R_SECONDARY);
700	kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
701out:
702	mutex_unlock(device->state_mutex);
703	return rv;
704}
705
706static const char *from_attrs_err_to_txt(int err)
707{
708	return	err == -ENOMSG ? "required attribute missing" :
709		err == -EOPNOTSUPP ? "unknown mandatory attribute" :
710		err == -EEXIST ? "can not change invariant setting" :
711		"invalid attribute value";
712}
713
714int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
715{
716	struct drbd_config_context adm_ctx;
717	struct set_role_parms parms;
718	int err;
719	enum drbd_ret_code retcode;
720
721	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
722	if (!adm_ctx.reply_skb)
723		return retcode;
724	if (retcode != NO_ERROR)
725		goto out;
726
727	memset(&parms, 0, sizeof(parms));
728	if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
729		err = set_role_parms_from_attrs(&parms, info);
730		if (err) {
731			retcode = ERR_MANDATORY_TAG;
732			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
733			goto out;
734		}
735	}
736	genl_unlock();
737	mutex_lock(&adm_ctx.resource->adm_mutex);
738
739	if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
740		retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
741	else
742		retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
743
744	mutex_unlock(&adm_ctx.resource->adm_mutex);
745	genl_lock();
746out:
747	drbd_adm_finish(&adm_ctx, info, retcode);
748	return 0;
749}
750
751/* Initializes the md.*_offset members, so we are able to find
752 * the on disk meta data.
753 *
754 * We currently have two possible layouts:
755 * external:
756 *   |----------- md_size_sect ------------------|
757 *   [ 4k superblock ][ activity log ][  Bitmap  ]
758 *   | al_offset == 8 |
759 *   | bm_offset = al_offset + X      |
760 *  ==> bitmap sectors = md_size_sect - bm_offset
761 *
762 * internal:
763 *            |----------- md_size_sect ------------------|
764 * [data.....][  Bitmap  ][ activity log ][ 4k superblock ]
765 *                        | al_offset < 0 |
766 *            | bm_offset = al_offset - Y |
767 *  ==> bitmap sectors = Y = al_offset - bm_offset
768 *
769 *  Activity log size used to be fixed 32kB,
770 *  but is about to become configurable.
771 */
772static void drbd_md_set_sector_offsets(struct drbd_device *device,
773				       struct drbd_backing_dev *bdev)
774{
775	sector_t md_size_sect = 0;
776	unsigned int al_size_sect = bdev->md.al_size_4k * 8;
777
778	bdev->md.md_offset = drbd_md_ss(bdev);
779
780	switch (bdev->md.meta_dev_idx) {
781	default:
782		/* v07 style fixed size indexed meta data */
783		bdev->md.md_size_sect = MD_128MB_SECT;
784		bdev->md.al_offset = MD_4kB_SECT;
785		bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
786		break;
787	case DRBD_MD_INDEX_FLEX_EXT:
788		/* just occupy the full device; unit: sectors */
789		bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
790		bdev->md.al_offset = MD_4kB_SECT;
791		bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
792		break;
793	case DRBD_MD_INDEX_INTERNAL:
794	case DRBD_MD_INDEX_FLEX_INT:
795		/* al size is still fixed */
796		bdev->md.al_offset = -al_size_sect;
797		/* we need (slightly less than) ~ this much bitmap sectors: */
798		md_size_sect = drbd_get_capacity(bdev->backing_bdev);
799		md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
800		md_size_sect = BM_SECT_TO_EXT(md_size_sect);
801		md_size_sect = ALIGN(md_size_sect, 8);
802
803		/* plus the "drbd meta data super block",
804		 * and the activity log; */
805		md_size_sect += MD_4kB_SECT + al_size_sect;
806
807		bdev->md.md_size_sect = md_size_sect;
808		/* bitmap offset is adjusted by 'super' block size */
809		bdev->md.bm_offset   = -md_size_sect + MD_4kB_SECT;
810		break;
811	}
812}
813
814/* input size is expected to be in KB */
815char *ppsize(char *buf, unsigned long long size)
816{
817	/* Needs 9 bytes at max including trailing NUL:
818	 * -1ULL ==> "16384 EB" */
819	static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
820	int base = 0;
821	while (size >= 10000 && base < sizeof(units)-1) {
822		/* shift + round */
823		size = (size >> 10) + !!(size & (1<<9));
824		base++;
825	}
826	sprintf(buf, "%u %cB", (unsigned)size, units[base]);
827
828	return buf;
829}
830
831/* there is still a theoretical deadlock when called from receiver
832 * on an D_INCONSISTENT R_PRIMARY:
833 *  remote READ does inc_ap_bio, receiver would need to receive answer
834 *  packet from remote to dec_ap_bio again.
835 *  receiver receive_sizes(), comes here,
836 *  waits for ap_bio_cnt == 0. -> deadlock.
837 * but this cannot happen, actually, because:
838 *  R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
839 *  (not connected, or bad/no disk on peer):
840 *  see drbd_fail_request_early, ap_bio_cnt is zero.
841 *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
842 *  peer may not initiate a resize.
843 */
844/* Note these are not to be confused with
845 * drbd_adm_suspend_io/drbd_adm_resume_io,
846 * which are (sub) state changes triggered by admin (drbdsetup),
847 * and can be long lived.
848 * This changes an device->flag, is triggered by drbd internals,
849 * and should be short-lived. */
850void drbd_suspend_io(struct drbd_device *device)
851{
852	set_bit(SUSPEND_IO, &device->flags);
853	if (drbd_suspended(device))
854		return;
855	wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
856}
857
858void drbd_resume_io(struct drbd_device *device)
859{
860	clear_bit(SUSPEND_IO, &device->flags);
861	wake_up(&device->misc_wait);
862}
863
864/**
865 * drbd_determine_dev_size() -  Sets the right device size obeying all constraints
866 * @device:	DRBD device.
867 *
868 * Returns 0 on success, negative return values indicate errors.
869 * You should call drbd_md_sync() after calling this function.
870 */
871enum determine_dev_size
872drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
873{
874	sector_t prev_first_sect, prev_size; /* previous meta location */
875	sector_t la_size_sect, u_size;
876	struct drbd_md *md = &device->ldev->md;
877	u32 prev_al_stripe_size_4k;
878	u32 prev_al_stripes;
879	sector_t size;
880	char ppb[10];
881	void *buffer;
882
883	int md_moved, la_size_changed;
884	enum determine_dev_size rv = DS_UNCHANGED;
885
886	/* race:
887	 * application request passes inc_ap_bio,
888	 * but then cannot get an AL-reference.
889	 * this function later may wait on ap_bio_cnt == 0. -> deadlock.
890	 *
891	 * to avoid that:
892	 * Suspend IO right here.
893	 * still lock the act_log to not trigger ASSERTs there.
894	 */
895	drbd_suspend_io(device);
896	buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */
897	if (!buffer) {
898		drbd_resume_io(device);
899		return DS_ERROR;
900	}
901
902	/* no wait necessary anymore, actually we could assert that */
903	wait_event(device->al_wait, lc_try_lock(device->act_log));
904
905	prev_first_sect = drbd_md_first_sector(device->ldev);
906	prev_size = device->ldev->md.md_size_sect;
907	la_size_sect = device->ldev->md.la_size_sect;
908
909	if (rs) {
910		/* rs is non NULL if we should change the AL layout only */
911
912		prev_al_stripes = md->al_stripes;
913		prev_al_stripe_size_4k = md->al_stripe_size_4k;
914
915		md->al_stripes = rs->al_stripes;
916		md->al_stripe_size_4k = rs->al_stripe_size / 4;
917		md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
918	}
919
920	drbd_md_set_sector_offsets(device, device->ldev);
921
922	rcu_read_lock();
923	u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
924	rcu_read_unlock();
925	size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
926
927	if (size < la_size_sect) {
928		if (rs && u_size == 0) {
929			/* Remove "rs &&" later. This check should always be active, but
930			   right now the receiver expects the permissive behavior */
931			drbd_warn(device, "Implicit shrink not allowed. "
932				 "Use --size=%llus for explicit shrink.\n",
933				 (unsigned long long)size);
934			rv = DS_ERROR_SHRINK;
935		}
936		if (u_size > size)
937			rv = DS_ERROR_SPACE_MD;
938		if (rv != DS_UNCHANGED)
939			goto err_out;
940	}
941
942	if (drbd_get_capacity(device->this_bdev) != size ||
943	    drbd_bm_capacity(device) != size) {
944		int err;
945		err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
946		if (unlikely(err)) {
947			/* currently there is only one error: ENOMEM! */
948			size = drbd_bm_capacity(device)>>1;
949			if (size == 0) {
950				drbd_err(device, "OUT OF MEMORY! "
951				    "Could not allocate bitmap!\n");
952			} else {
953				drbd_err(device, "BM resizing failed. "
954				    "Leaving size unchanged at size = %lu KB\n",
955				    (unsigned long)size);
956			}
957			rv = DS_ERROR;
958		}
959		/* racy, see comments above. */
960		drbd_set_my_capacity(device, size);
961		device->ldev->md.la_size_sect = size;
962		drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
963		     (unsigned long long)size>>1);
964	}
965	if (rv <= DS_ERROR)
966		goto err_out;
967
968	la_size_changed = (la_size_sect != device->ldev->md.la_size_sect);
969
970	md_moved = prev_first_sect != drbd_md_first_sector(device->ldev)
971		|| prev_size	   != device->ldev->md.md_size_sect;
972
973	if (la_size_changed || md_moved || rs) {
974		u32 prev_flags;
975
976		/* We do some synchronous IO below, which may take some time.
977		 * Clear the timer, to avoid scary "timer expired!" messages,
978		 * "Superblock" is written out at least twice below, anyways. */
979		del_timer(&device->md_sync_timer);
980		drbd_al_shrink(device); /* All extents inactive. */
981
982		prev_flags = md->flags;
983		md->flags &= ~MDF_PRIMARY_IND;
984		drbd_md_write(device, buffer);
985
986		drbd_info(device, "Writing the whole bitmap, %s\n",
987			 la_size_changed && md_moved ? "size changed and md moved" :
988			 la_size_changed ? "size changed" : "md moved");
989		/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
990		drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
991			       "size changed", BM_LOCKED_MASK);
992		drbd_initialize_al(device, buffer);
993
994		md->flags = prev_flags;
995		drbd_md_write(device, buffer);
996
997		if (rs)
998			drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
999				  md->al_stripes, md->al_stripe_size_4k * 4);
1000	}
1001
1002	if (size > la_size_sect)
1003		rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
1004	if (size < la_size_sect)
1005		rv = DS_SHRUNK;
1006
1007	if (0) {
1008	err_out:
1009		if (rs) {
1010			md->al_stripes = prev_al_stripes;
1011			md->al_stripe_size_4k = prev_al_stripe_size_4k;
1012			md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
1013
1014			drbd_md_set_sector_offsets(device, device->ldev);
1015		}
1016	}
1017	lc_unlock(device->act_log);
1018	wake_up(&device->al_wait);
1019	drbd_md_put_buffer(device);
1020	drbd_resume_io(device);
1021
1022	return rv;
1023}
1024
1025sector_t
1026drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
1027		  sector_t u_size, int assume_peer_has_space)
1028{
1029	sector_t p_size = device->p_size;   /* partner's disk size. */
1030	sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
1031	sector_t m_size; /* my size */
1032	sector_t size = 0;
1033
1034	m_size = drbd_get_max_capacity(bdev);
1035
1036	if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
1037		drbd_warn(device, "Resize while not connected was forced by the user!\n");
1038		p_size = m_size;
1039	}
1040
1041	if (p_size && m_size) {
1042		size = min_t(sector_t, p_size, m_size);
1043	} else {
1044		if (la_size_sect) {
1045			size = la_size_sect;
1046			if (m_size && m_size < size)
1047				size = m_size;
1048			if (p_size && p_size < size)
1049				size = p_size;
1050		} else {
1051			if (m_size)
1052				size = m_size;
1053			if (p_size)
1054				size = p_size;
1055		}
1056	}
1057
1058	if (size == 0)
1059		drbd_err(device, "Both nodes diskless!\n");
1060
1061	if (u_size) {
1062		if (u_size > size)
1063			drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
1064			    (unsigned long)u_size>>1, (unsigned long)size>>1);
1065		else
1066			size = u_size;
1067	}
1068
1069	return size;
1070}
1071
1072/**
1073 * drbd_check_al_size() - Ensures that the AL is of the right size
1074 * @device:	DRBD device.
1075 *
1076 * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1077 * failed, and 0 on success. You should call drbd_md_sync() after you called
1078 * this function.
1079 */
1080static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1081{
1082	struct lru_cache *n, *t;
1083	struct lc_element *e;
1084	unsigned int in_use;
1085	int i;
1086
1087	if (device->act_log &&
1088	    device->act_log->nr_elements == dc->al_extents)
1089		return 0;
1090
1091	in_use = 0;
1092	t = device->act_log;
1093	n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
1094		dc->al_extents, sizeof(struct lc_element), 0);
1095
1096	if (n == NULL) {
1097		drbd_err(device, "Cannot allocate act_log lru!\n");
1098		return -ENOMEM;
1099	}
1100	spin_lock_irq(&device->al_lock);
1101	if (t) {
1102		for (i = 0; i < t->nr_elements; i++) {
1103			e = lc_element_by_index(t, i);
1104			if (e->refcnt)
1105				drbd_err(device, "refcnt(%d)==%d\n",
1106				    e->lc_number, e->refcnt);
1107			in_use += e->refcnt;
1108		}
1109	}
1110	if (!in_use)
1111		device->act_log = n;
1112	spin_unlock_irq(&device->al_lock);
1113	if (in_use) {
1114		drbd_err(device, "Activity log still in use!\n");
1115		lc_destroy(n);
1116		return -EBUSY;
1117	} else {
1118		if (t)
1119			lc_destroy(t);
1120	}
1121	drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
1122	return 0;
1123}
1124
1125static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
1126				   unsigned int max_bio_size)
1127{
1128	struct request_queue * const q = device->rq_queue;
1129	unsigned int max_hw_sectors = max_bio_size >> 9;
1130	unsigned int max_segments = 0;
1131	struct request_queue *b = NULL;
1132
1133	if (bdev) {
1134		b = bdev->backing_bdev->bd_disk->queue;
1135
1136		max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
1137		rcu_read_lock();
1138		max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
1139		rcu_read_unlock();
1140
1141		blk_set_stacking_limits(&q->limits);
1142		blk_queue_max_write_same_sectors(q, 0);
1143	}
1144
1145	blk_queue_logical_block_size(q, 512);
1146	blk_queue_max_hw_sectors(q, max_hw_sectors);
1147	/* This is the workaround for "bio would need to, but cannot, be split" */
1148	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
1149	blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
1150
1151	if (b) {
1152		struct drbd_connection *connection = first_peer_device(device)->connection;
1153
1154		if (blk_queue_discard(b) &&
1155		    (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) {
1156			/* For now, don't allow more than one activity log extent worth of data
1157			 * to be discarded in one go. We may need to rework drbd_al_begin_io()
1158			 * to allow for even larger discard ranges */
1159			q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS;
1160
1161			queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
1162			/* REALLY? Is stacking secdiscard "legal"? */
1163			if (blk_queue_secdiscard(b))
1164				queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
1165		} else {
1166			q->limits.max_discard_sectors = 0;
1167			queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
1168			queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q);
1169		}
1170
1171		blk_queue_stack_limits(q, b);
1172
1173		if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
1174			drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
1175				 q->backing_dev_info.ra_pages,
1176				 b->backing_dev_info.ra_pages);
1177			q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
1178		}
1179	}
1180}
1181
1182void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev)
1183{
1184	unsigned int now, new, local, peer;
1185
1186	now = queue_max_hw_sectors(device->rq_queue) << 9;
1187	local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
1188	peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
1189
1190	if (bdev) {
1191		local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
1192		device->local_max_bio_size = local;
1193	}
1194	local = min(local, DRBD_MAX_BIO_SIZE);
1195
1196	/* We may ignore peer limits if the peer is modern enough.
1197	   Because new from 8.3.8 onwards the peer can use multiple
1198	   BIOs for a single peer_request */
1199	if (device->state.conn >= C_WF_REPORT_PARAMS) {
1200		if (first_peer_device(device)->connection->agreed_pro_version < 94)
1201			peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1202			/* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
1203		else if (first_peer_device(device)->connection->agreed_pro_version == 94)
1204			peer = DRBD_MAX_SIZE_H80_PACKET;
1205		else if (first_peer_device(device)->connection->agreed_pro_version < 100)
1206			peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
1207		else
1208			peer = DRBD_MAX_BIO_SIZE;
1209
1210		/* We may later detach and re-attach on a disconnected Primary.
1211		 * Avoid this setting to jump back in that case.
1212		 * We want to store what we know the peer DRBD can handle,
1213		 * not what the peer IO backend can handle. */
1214		if (peer > device->peer_max_bio_size)
1215			device->peer_max_bio_size = peer;
1216	}
1217	new = min(local, peer);
1218
1219	if (device->state.role == R_PRIMARY && new < now)
1220		drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
1221
1222	if (new != now)
1223		drbd_info(device, "max BIO size = %u\n", new);
1224
1225	drbd_setup_queue_param(device, bdev, new);
1226}
1227
1228/* Starts the worker thread */
1229static void conn_reconfig_start(struct drbd_connection *connection)
1230{
1231	drbd_thread_start(&connection->worker);
1232	drbd_flush_workqueue(&connection->sender_work);
1233}
1234
1235/* if still unconfigured, stops worker again. */
1236static void conn_reconfig_done(struct drbd_connection *connection)
1237{
1238	bool stop_threads;
1239	spin_lock_irq(&connection->resource->req_lock);
1240	stop_threads = conn_all_vols_unconf(connection) &&
1241		connection->cstate == C_STANDALONE;
1242	spin_unlock_irq(&connection->resource->req_lock);
1243	if (stop_threads) {
1244		/* asender is implicitly stopped by receiver
1245		 * in conn_disconnect() */
1246		drbd_thread_stop(&connection->receiver);
1247		drbd_thread_stop(&connection->worker);
1248	}
1249}
1250
1251/* Make sure IO is suspended before calling this function(). */
1252static void drbd_suspend_al(struct drbd_device *device)
1253{
1254	int s = 0;
1255
1256	if (!lc_try_lock(device->act_log)) {
1257		drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
1258		return;
1259	}
1260
1261	drbd_al_shrink(device);
1262	spin_lock_irq(&device->resource->req_lock);
1263	if (device->state.conn < C_CONNECTED)
1264		s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
1265	spin_unlock_irq(&device->resource->req_lock);
1266	lc_unlock(device->act_log);
1267
1268	if (s)
1269		drbd_info(device, "Suspended AL updates\n");
1270}
1271
1272
1273static bool should_set_defaults(struct genl_info *info)
1274{
1275	unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
1276	return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
1277}
1278
1279static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1280{
1281	/* This is limited by 16 bit "slot" numbers,
1282	 * and by available on-disk context storage.
1283	 *
1284	 * Also (u16)~0 is special (denotes a "free" extent).
1285	 *
1286	 * One transaction occupies one 4kB on-disk block,
1287	 * we have n such blocks in the on disk ring buffer,
1288	 * the "current" transaction may fail (n-1),
1289	 * and there is 919 slot numbers context information per transaction.
1290	 *
1291	 * 72 transaction blocks amounts to more than 2**16 context slots,
1292	 * so cap there first.
1293	 */
1294	const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
1295	const unsigned int sufficient_on_disk =
1296		(max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
1297		/AL_CONTEXT_PER_TRANSACTION;
1298
1299	unsigned int al_size_4k = bdev->md.al_size_4k;
1300
1301	if (al_size_4k > sufficient_on_disk)
1302		return max_al_nr;
1303
1304	return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
1305}
1306
1307static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
1308{
1309	return	a->disk_barrier != b->disk_barrier ||
1310		a->disk_flushes != b->disk_flushes ||
1311		a->disk_drain != b->disk_drain;
1312}
1313
1314int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1315{
1316	struct drbd_config_context adm_ctx;
1317	enum drbd_ret_code retcode;
1318	struct drbd_device *device;
1319	struct disk_conf *new_disk_conf, *old_disk_conf;
1320	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
1321	int err, fifo_size;
1322
1323	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1324	if (!adm_ctx.reply_skb)
1325		return retcode;
1326	if (retcode != NO_ERROR)
1327		goto finish;
1328
1329	device = adm_ctx.device;
1330	mutex_lock(&adm_ctx.resource->adm_mutex);
1331
1332	/* we also need a disk
1333	 * to change the options on */
1334	if (!get_ldev(device)) {
1335		retcode = ERR_NO_DISK;
1336		goto out;
1337	}
1338
1339	new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
1340	if (!new_disk_conf) {
1341		retcode = ERR_NOMEM;
1342		goto fail;
1343	}
1344
1345	mutex_lock(&device->resource->conf_update);
1346	old_disk_conf = device->ldev->disk_conf;
1347	*new_disk_conf = *old_disk_conf;
1348	if (should_set_defaults(info))
1349		set_disk_conf_defaults(new_disk_conf);
1350
1351	err = disk_conf_from_attrs_for_change(new_disk_conf, info);
1352	if (err && err != -ENOMSG) {
1353		retcode = ERR_MANDATORY_TAG;
1354		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1355		goto fail_unlock;
1356	}
1357
1358	if (!expect(new_disk_conf->resync_rate >= 1))
1359		new_disk_conf->resync_rate = 1;
1360
1361	if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1362		new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1363	if (new_disk_conf->al_extents > drbd_al_extents_max(device->ldev))
1364		new_disk_conf->al_extents = drbd_al_extents_max(device->ldev);
1365
1366	if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1367		new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1368
1369	fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1370	if (fifo_size != device->rs_plan_s->size) {
1371		new_plan = fifo_alloc(fifo_size);
1372		if (!new_plan) {
1373			drbd_err(device, "kmalloc of fifo_buffer failed");
1374			retcode = ERR_NOMEM;
1375			goto fail_unlock;
1376		}
1377	}
1378
1379	drbd_suspend_io(device);
1380	wait_event(device->al_wait, lc_try_lock(device->act_log));
1381	drbd_al_shrink(device);
1382	err = drbd_check_al_size(device, new_disk_conf);
1383	lc_unlock(device->act_log);
1384	wake_up(&device->al_wait);
1385	drbd_resume_io(device);
1386
1387	if (err) {
1388		retcode = ERR_NOMEM;
1389		goto fail_unlock;
1390	}
1391
1392	write_lock_irq(&global_state_lock);
1393	retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1394	if (retcode == NO_ERROR) {
1395		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
1396		drbd_resync_after_changed(device);
1397	}
1398	write_unlock_irq(&global_state_lock);
1399
1400	if (retcode != NO_ERROR)
1401		goto fail_unlock;
1402
1403	if (new_plan) {
1404		old_plan = device->rs_plan_s;
1405		rcu_assign_pointer(device->rs_plan_s, new_plan);
1406	}
1407
1408	mutex_unlock(&device->resource->conf_update);
1409
1410	if (new_disk_conf->al_updates)
1411		device->ldev->md.flags &= ~MDF_AL_DISABLED;
1412	else
1413		device->ldev->md.flags |= MDF_AL_DISABLED;
1414
1415	if (new_disk_conf->md_flushes)
1416		clear_bit(MD_NO_FUA, &device->flags);
1417	else
1418		set_bit(MD_NO_FUA, &device->flags);
1419
1420	if (write_ordering_changed(old_disk_conf, new_disk_conf))
1421		drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush);
1422
1423	drbd_md_sync(device);
1424
1425	if (device->state.conn >= C_CONNECTED) {
1426		struct drbd_peer_device *peer_device;
1427
1428		for_each_peer_device(peer_device, device)
1429			drbd_send_sync_param(peer_device);
1430	}
1431
1432	synchronize_rcu();
1433	kfree(old_disk_conf);
1434	kfree(old_plan);
1435	mod_timer(&device->request_timer, jiffies + HZ);
1436	goto success;
1437
1438fail_unlock:
1439	mutex_unlock(&device->resource->conf_update);
1440 fail:
1441	kfree(new_disk_conf);
1442	kfree(new_plan);
1443success:
1444	put_ldev(device);
1445 out:
1446	mutex_unlock(&adm_ctx.resource->adm_mutex);
1447 finish:
1448	drbd_adm_finish(&adm_ctx, info, retcode);
1449	return 0;
1450}
1451
1452int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1453{
1454	struct drbd_config_context adm_ctx;
1455	struct drbd_device *device;
1456	struct drbd_peer_device *peer_device;
1457	struct drbd_connection *connection;
1458	int err;
1459	enum drbd_ret_code retcode;
1460	enum determine_dev_size dd;
1461	sector_t max_possible_sectors;
1462	sector_t min_md_device_sectors;
1463	struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
1464	struct disk_conf *new_disk_conf = NULL;
1465	struct block_device *bdev;
1466	struct lru_cache *resync_lru = NULL;
1467	struct fifo_buffer *new_plan = NULL;
1468	union drbd_state ns, os;
1469	enum drbd_state_rv rv;
1470	struct net_conf *nc;
1471
1472	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1473	if (!adm_ctx.reply_skb)
1474		return retcode;
1475	if (retcode != NO_ERROR)
1476		goto finish;
1477
1478	device = adm_ctx.device;
1479	mutex_lock(&adm_ctx.resource->adm_mutex);
1480	peer_device = first_peer_device(device);
1481	connection = peer_device ? peer_device->connection : NULL;
1482	conn_reconfig_start(connection);
1483
1484	/* if you want to reconfigure, please tear down first */
1485	if (device->state.disk > D_DISKLESS) {
1486		retcode = ERR_DISK_CONFIGURED;
1487		goto fail;
1488	}
1489	/* It may just now have detached because of IO error.  Make sure
1490	 * drbd_ldev_destroy is done already, we may end up here very fast,
1491	 * e.g. if someone calls attach from the on-io-error handler,
1492	 * to realize a "hot spare" feature (not that I'd recommend that) */
1493	wait_event(device->misc_wait, !test_bit(GOING_DISKLESS, &device->flags));
1494
1495	/* make sure there is no leftover from previous force-detach attempts */
1496	clear_bit(FORCE_DETACH, &device->flags);
1497	clear_bit(WAS_IO_ERROR, &device->flags);
1498	clear_bit(WAS_READ_ERROR, &device->flags);
1499
1500	/* and no leftover from previously aborted resync or verify, either */
1501	device->rs_total = 0;
1502	device->rs_failed = 0;
1503	atomic_set(&device->rs_pending_cnt, 0);
1504
1505	/* allocation not in the IO path, drbdsetup context */
1506	nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1507	if (!nbc) {
1508		retcode = ERR_NOMEM;
1509		goto fail;
1510	}
1511	spin_lock_init(&nbc->md.uuid_lock);
1512
1513	new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
1514	if (!new_disk_conf) {
1515		retcode = ERR_NOMEM;
1516		goto fail;
1517	}
1518	nbc->disk_conf = new_disk_conf;
1519
1520	set_disk_conf_defaults(new_disk_conf);
1521	err = disk_conf_from_attrs(new_disk_conf, info);
1522	if (err) {
1523		retcode = ERR_MANDATORY_TAG;
1524		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1525		goto fail;
1526	}
1527
1528	if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1529		new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1530
1531	new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
1532	if (!new_plan) {
1533		retcode = ERR_NOMEM;
1534		goto fail;
1535	}
1536
1537	if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
1538		retcode = ERR_MD_IDX_INVALID;
1539		goto fail;
1540	}
1541
1542	write_lock_irq(&global_state_lock);
1543	retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1544	write_unlock_irq(&global_state_lock);
1545	if (retcode != NO_ERROR)
1546		goto fail;
1547
1548	rcu_read_lock();
1549	nc = rcu_dereference(connection->net_conf);
1550	if (nc) {
1551		if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1552			rcu_read_unlock();
1553			retcode = ERR_STONITH_AND_PROT_A;
1554			goto fail;
1555		}
1556	}
1557	rcu_read_unlock();
1558
1559	bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
1560				  FMODE_READ | FMODE_WRITE | FMODE_EXCL, device);
1561	if (IS_ERR(bdev)) {
1562		drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
1563			PTR_ERR(bdev));
1564		retcode = ERR_OPEN_DISK;
1565		goto fail;
1566	}
1567	nbc->backing_bdev = bdev;
1568
1569	/*
1570	 * meta_dev_idx >= 0: external fixed size, possibly multiple
1571	 * drbd sharing one meta device.  TODO in that case, paranoia
1572	 * check that [md_bdev, meta_dev_idx] is not yet used by some
1573	 * other drbd minor!  (if you use drbd.conf + drbdadm, that
1574	 * should check it for you already; but if you don't, or
1575	 * someone fooled it, we need to double check here)
1576	 */
1577	bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
1578				  FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1579				  (new_disk_conf->meta_dev_idx < 0) ?
1580				  (void *)device : (void *)drbd_m_holder);
1581	if (IS_ERR(bdev)) {
1582		drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
1583			PTR_ERR(bdev));
1584		retcode = ERR_OPEN_MD_DISK;
1585		goto fail;
1586	}
1587	nbc->md_bdev = bdev;
1588
1589	if ((nbc->backing_bdev == nbc->md_bdev) !=
1590	    (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1591	     new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1592		retcode = ERR_MD_IDX_INVALID;
1593		goto fail;
1594	}
1595
1596	resync_lru = lc_create("resync", drbd_bm_ext_cache,
1597			1, 61, sizeof(struct bm_extent),
1598			offsetof(struct bm_extent, lce));
1599	if (!resync_lru) {
1600		retcode = ERR_NOMEM;
1601		goto fail;
1602	}
1603
1604	/* Read our meta data super block early.
1605	 * This also sets other on-disk offsets. */
1606	retcode = drbd_md_read(device, nbc);
1607	if (retcode != NO_ERROR)
1608		goto fail;
1609
1610	if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1611		new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1612	if (new_disk_conf->al_extents > drbd_al_extents_max(nbc))
1613		new_disk_conf->al_extents = drbd_al_extents_max(nbc);
1614
1615	if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1616		drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
1617			(unsigned long long) drbd_get_max_capacity(nbc),
1618			(unsigned long long) new_disk_conf->disk_size);
1619		retcode = ERR_DISK_TOO_SMALL;
1620		goto fail;
1621	}
1622
1623	if (new_disk_conf->meta_dev_idx < 0) {
1624		max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1625		/* at least one MB, otherwise it does not make sense */
1626		min_md_device_sectors = (2<<10);
1627	} else {
1628		max_possible_sectors = DRBD_MAX_SECTORS;
1629		min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
1630	}
1631
1632	if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1633		retcode = ERR_MD_DISK_TOO_SMALL;
1634		drbd_warn(device, "refusing attach: md-device too small, "
1635		     "at least %llu sectors needed for this meta-disk type\n",
1636		     (unsigned long long) min_md_device_sectors);
1637		goto fail;
1638	}
1639
1640	/* Make sure the new disk is big enough
1641	 * (we may currently be R_PRIMARY with no local disk...) */
1642	if (drbd_get_max_capacity(nbc) <
1643	    drbd_get_capacity(device->this_bdev)) {
1644		retcode = ERR_DISK_TOO_SMALL;
1645		goto fail;
1646	}
1647
1648	nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1649
1650	if (nbc->known_size > max_possible_sectors) {
1651		drbd_warn(device, "==> truncating very big lower level device "
1652			"to currently maximum possible %llu sectors <==\n",
1653			(unsigned long long) max_possible_sectors);
1654		if (new_disk_conf->meta_dev_idx >= 0)
1655			drbd_warn(device, "==>> using internal or flexible "
1656				      "meta data may help <<==\n");
1657	}
1658
1659	drbd_suspend_io(device);
1660	/* also wait for the last barrier ack. */
1661	/* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1662	 * We need a way to either ignore barrier acks for barriers sent before a device
1663	 * was attached, or a way to wait for all pending barrier acks to come in.
1664	 * As barriers are counted per resource,
1665	 * we'd need to suspend io on all devices of a resource.
1666	 */
1667	wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
1668	/* and for any other previously queued work */
1669	drbd_flush_workqueue(&connection->sender_work);
1670
1671	rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1672	retcode = rv;  /* FIXME: Type mismatch. */
1673	drbd_resume_io(device);
1674	if (rv < SS_SUCCESS)
1675		goto fail;
1676
1677	if (!get_ldev_if_state(device, D_ATTACHING))
1678		goto force_diskless;
1679
1680	if (!device->bitmap) {
1681		if (drbd_bm_init(device)) {
1682			retcode = ERR_NOMEM;
1683			goto force_diskless_dec;
1684		}
1685	}
1686
1687	if (device->state.conn < C_CONNECTED &&
1688	    device->state.role == R_PRIMARY && device->ed_uuid &&
1689	    (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1690		drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1691		    (unsigned long long)device->ed_uuid);
1692		retcode = ERR_DATA_NOT_CURRENT;
1693		goto force_diskless_dec;
1694	}
1695
1696	/* Since we are diskless, fix the activity log first... */
1697	if (drbd_check_al_size(device, new_disk_conf)) {
1698		retcode = ERR_NOMEM;
1699		goto force_diskless_dec;
1700	}
1701
1702	/* Prevent shrinking of consistent devices ! */
1703	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1704	    drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
1705		drbd_warn(device, "refusing to truncate a consistent device\n");
1706		retcode = ERR_DISK_TOO_SMALL;
1707		goto force_diskless_dec;
1708	}
1709
1710	/* Reset the "barriers don't work" bits here, then force meta data to
1711	 * be written, to ensure we determine if barriers are supported. */
1712	if (new_disk_conf->md_flushes)
1713		clear_bit(MD_NO_FUA, &device->flags);
1714	else
1715		set_bit(MD_NO_FUA, &device->flags);
1716
1717	/* Point of no return reached.
1718	 * Devices and memory are no longer released by error cleanup below.
1719	 * now device takes over responsibility, and the state engine should
1720	 * clean it up somewhere.  */
1721	D_ASSERT(device, device->ldev == NULL);
1722	device->ldev = nbc;
1723	device->resync = resync_lru;
1724	device->rs_plan_s = new_plan;
1725	nbc = NULL;
1726	resync_lru = NULL;
1727	new_disk_conf = NULL;
1728	new_plan = NULL;
1729
1730	drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush);
1731
1732	if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1733		set_bit(CRASHED_PRIMARY, &device->flags);
1734	else
1735		clear_bit(CRASHED_PRIMARY, &device->flags);
1736
1737	if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1738	    !(device->state.role == R_PRIMARY && device->resource->susp_nod))
1739		set_bit(CRASHED_PRIMARY, &device->flags);
1740
1741	device->send_cnt = 0;
1742	device->recv_cnt = 0;
1743	device->read_cnt = 0;
1744	device->writ_cnt = 0;
1745
1746	drbd_reconsider_max_bio_size(device, device->ldev);
1747
1748	/* If I am currently not R_PRIMARY,
1749	 * but meta data primary indicator is set,
1750	 * I just now recover from a hard crash,
1751	 * and have been R_PRIMARY before that crash.
1752	 *
1753	 * Now, if I had no connection before that crash
1754	 * (have been degraded R_PRIMARY), chances are that
1755	 * I won't find my peer now either.
1756	 *
1757	 * In that case, and _only_ in that case,
1758	 * we use the degr-wfc-timeout instead of the default,
1759	 * so we can automatically recover from a crash of a
1760	 * degraded but active "cluster" after a certain timeout.
1761	 */
1762	clear_bit(USE_DEGR_WFC_T, &device->flags);
1763	if (device->state.role != R_PRIMARY &&
1764	     drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1765	    !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
1766		set_bit(USE_DEGR_WFC_T, &device->flags);
1767
1768	dd = drbd_determine_dev_size(device, 0, NULL);
1769	if (dd <= DS_ERROR) {
1770		retcode = ERR_NOMEM_BITMAP;
1771		goto force_diskless_dec;
1772	} else if (dd == DS_GREW)
1773		set_bit(RESYNC_AFTER_NEG, &device->flags);
1774
1775	if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
1776	    (test_bit(CRASHED_PRIMARY, &device->flags) &&
1777	     drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
1778		drbd_info(device, "Assuming that all blocks are out of sync "
1779		     "(aka FullSync)\n");
1780		if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
1781			"set_n_write from attaching", BM_LOCKED_MASK)) {
1782			retcode = ERR_IO_MD_DISK;
1783			goto force_diskless_dec;
1784		}
1785	} else {
1786		if (drbd_bitmap_io(device, &drbd_bm_read,
1787			"read from attaching", BM_LOCKED_MASK)) {
1788			retcode = ERR_IO_MD_DISK;
1789			goto force_diskless_dec;
1790		}
1791	}
1792
1793	if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
1794		drbd_suspend_al(device); /* IO is still suspended here... */
1795
1796	spin_lock_irq(&device->resource->req_lock);
1797	os = drbd_read_state(device);
1798	ns = os;
1799	/* If MDF_CONSISTENT is not set go into inconsistent state,
1800	   otherwise investigate MDF_WasUpToDate...
1801	   If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
1802	   otherwise into D_CONSISTENT state.
1803	*/
1804	if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
1805		if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
1806			ns.disk = D_CONSISTENT;
1807		else
1808			ns.disk = D_OUTDATED;
1809	} else {
1810		ns.disk = D_INCONSISTENT;
1811	}
1812
1813	if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
1814		ns.pdsk = D_OUTDATED;
1815
1816	rcu_read_lock();
1817	if (ns.disk == D_CONSISTENT &&
1818	    (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
1819		ns.disk = D_UP_TO_DATE;
1820
1821	/* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
1822	   MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
1823	   this point, because drbd_request_state() modifies these
1824	   flags. */
1825
1826	if (rcu_dereference(device->ldev->disk_conf)->al_updates)
1827		device->ldev->md.flags &= ~MDF_AL_DISABLED;
1828	else
1829		device->ldev->md.flags |= MDF_AL_DISABLED;
1830
1831	rcu_read_unlock();
1832
1833	/* In case we are C_CONNECTED postpone any decision on the new disk
1834	   state after the negotiation phase. */
1835	if (device->state.conn == C_CONNECTED) {
1836		device->new_state_tmp.i = ns.i;
1837		ns.i = os.i;
1838		ns.disk = D_NEGOTIATING;
1839
1840		/* We expect to receive up-to-date UUIDs soon.
1841		   To avoid a race in receive_state, free p_uuid while
1842		   holding req_lock. I.e. atomic with the state change */
1843		kfree(device->p_uuid);
1844		device->p_uuid = NULL;
1845	}
1846
1847	rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
1848	spin_unlock_irq(&device->resource->req_lock);
1849
1850	if (rv < SS_SUCCESS)
1851		goto force_diskless_dec;
1852
1853	mod_timer(&device->request_timer, jiffies + HZ);
1854
1855	if (device->state.role == R_PRIMARY)
1856		device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
1857	else
1858		device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
1859
1860	drbd_md_mark_dirty(device);
1861	drbd_md_sync(device);
1862
1863	kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
1864	put_ldev(device);
1865	conn_reconfig_done(connection);
1866	mutex_unlock(&adm_ctx.resource->adm_mutex);
1867	drbd_adm_finish(&adm_ctx, info, retcode);
1868	return 0;
1869
1870 force_diskless_dec:
1871	put_ldev(device);
1872 force_diskless:
1873	drbd_force_state(device, NS(disk, D_DISKLESS));
1874	drbd_md_sync(device);
1875 fail:
1876	conn_reconfig_done(connection);
1877	if (nbc) {
1878		if (nbc->backing_bdev)
1879			blkdev_put(nbc->backing_bdev,
1880				   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1881		if (nbc->md_bdev)
1882			blkdev_put(nbc->md_bdev,
1883				   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1884		kfree(nbc);
1885	}
1886	kfree(new_disk_conf);
1887	lc_destroy(resync_lru);
1888	kfree(new_plan);
1889	mutex_unlock(&adm_ctx.resource->adm_mutex);
1890 finish:
1891	drbd_adm_finish(&adm_ctx, info, retcode);
1892	return 0;
1893}
1894
1895static int adm_detach(struct drbd_device *device, int force)
1896{
1897	enum drbd_state_rv retcode;
1898	int ret;
1899
1900	if (force) {
1901		set_bit(FORCE_DETACH, &device->flags);
1902		drbd_force_state(device, NS(disk, D_FAILED));
1903		retcode = SS_SUCCESS;
1904		goto out;
1905	}
1906
1907	drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
1908	drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
1909	retcode = drbd_request_state(device, NS(disk, D_FAILED));
1910	drbd_md_put_buffer(device);
1911	/* D_FAILED will transition to DISKLESS. */
1912	ret = wait_event_interruptible(device->misc_wait,
1913			device->state.disk != D_FAILED);
1914	drbd_resume_io(device);
1915	if ((int)retcode == (int)SS_IS_DISKLESS)
1916		retcode = SS_NOTHING_TO_DO;
1917	if (ret)
1918		retcode = ERR_INTR;
1919out:
1920	return retcode;
1921}
1922
1923/* Detaching the disk is a process in multiple stages.  First we need to lock
1924 * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1925 * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1926 * internal references as well.
1927 * Only then we have finally detached. */
1928int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
1929{
1930	struct drbd_config_context adm_ctx;
1931	enum drbd_ret_code retcode;
1932	struct detach_parms parms = { };
1933	int err;
1934
1935	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1936	if (!adm_ctx.reply_skb)
1937		return retcode;
1938	if (retcode != NO_ERROR)
1939		goto out;
1940
1941	if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
1942		err = detach_parms_from_attrs(&parms, info);
1943		if (err) {
1944			retcode = ERR_MANDATORY_TAG;
1945			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1946			goto out;
1947		}
1948	}
1949
1950	mutex_lock(&adm_ctx.resource->adm_mutex);
1951	retcode = adm_detach(adm_ctx.device, parms.force_detach);
1952	mutex_unlock(&adm_ctx.resource->adm_mutex);
1953out:
1954	drbd_adm_finish(&adm_ctx, info, retcode);
1955	return 0;
1956}
1957
1958static bool conn_resync_running(struct drbd_connection *connection)
1959{
1960	struct drbd_peer_device *peer_device;
1961	bool rv = false;
1962	int vnr;
1963
1964	rcu_read_lock();
1965	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1966		struct drbd_device *device = peer_device->device;
1967		if (device->state.conn == C_SYNC_SOURCE ||
1968		    device->state.conn == C_SYNC_TARGET ||
1969		    device->state.conn == C_PAUSED_SYNC_S ||
1970		    device->state.conn == C_PAUSED_SYNC_T) {
1971			rv = true;
1972			break;
1973		}
1974	}
1975	rcu_read_unlock();
1976
1977	return rv;
1978}
1979
1980static bool conn_ov_running(struct drbd_connection *connection)
1981{
1982	struct drbd_peer_device *peer_device;
1983	bool rv = false;
1984	int vnr;
1985
1986	rcu_read_lock();
1987	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1988		struct drbd_device *device = peer_device->device;
1989		if (device->state.conn == C_VERIFY_S ||
1990		    device->state.conn == C_VERIFY_T) {
1991			rv = true;
1992			break;
1993		}
1994	}
1995	rcu_read_unlock();
1996
1997	return rv;
1998}
1999
2000static enum drbd_ret_code
2001_check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
2002{
2003	struct drbd_peer_device *peer_device;
2004	int i;
2005
2006	if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
2007		if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
2008			return ERR_NEED_APV_100;
2009
2010		if (new_net_conf->two_primaries != old_net_conf->two_primaries)
2011			return ERR_NEED_APV_100;
2012
2013		if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
2014			return ERR_NEED_APV_100;
2015	}
2016
2017	if (!new_net_conf->two_primaries &&
2018	    conn_highest_role(connection) == R_PRIMARY &&
2019	    conn_highest_peer(connection) == R_PRIMARY)
2020		return ERR_NEED_ALLOW_TWO_PRI;
2021
2022	if (new_net_conf->two_primaries &&
2023	    (new_net_conf->wire_protocol != DRBD_PROT_C))
2024		return ERR_NOT_PROTO_C;
2025
2026	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2027		struct drbd_device *device = peer_device->device;
2028		if (get_ldev(device)) {
2029			enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
2030			put_ldev(device);
2031			if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
2032				return ERR_STONITH_AND_PROT_A;
2033		}
2034		if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
2035			return ERR_DISCARD_IMPOSSIBLE;
2036	}
2037
2038	if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
2039		return ERR_CONG_NOT_PROTO_A;
2040
2041	return NO_ERROR;
2042}
2043
2044static enum drbd_ret_code
2045check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
2046{
2047	static enum drbd_ret_code rv;
2048	struct drbd_peer_device *peer_device;
2049	int i;
2050
2051	rcu_read_lock();
2052	rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
2053	rcu_read_unlock();
2054
2055	/* connection->volumes protected by genl_lock() here */
2056	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2057		struct drbd_device *device = peer_device->device;
2058		if (!device->bitmap) {
2059			if (drbd_bm_init(device))
2060				return ERR_NOMEM;
2061		}
2062	}
2063
2064	return rv;
2065}
2066
2067struct crypto {
2068	struct crypto_hash *verify_tfm;
2069	struct crypto_hash *csums_tfm;
2070	struct crypto_hash *cram_hmac_tfm;
2071	struct crypto_hash *integrity_tfm;
2072};
2073
2074static int
2075alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg)
2076{
2077	if (!tfm_name[0])
2078		return NO_ERROR;
2079
2080	*tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC);
2081	if (IS_ERR(*tfm)) {
2082		*tfm = NULL;
2083		return err_alg;
2084	}
2085
2086	return NO_ERROR;
2087}
2088
2089static enum drbd_ret_code
2090alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
2091{
2092	char hmac_name[CRYPTO_MAX_ALG_NAME];
2093	enum drbd_ret_code rv;
2094
2095	rv = alloc_hash(&crypto->csums_tfm, new_net_conf->csums_alg,
2096		       ERR_CSUMS_ALG);
2097	if (rv != NO_ERROR)
2098		return rv;
2099	rv = alloc_hash(&crypto->verify_tfm, new_net_conf->verify_alg,
2100		       ERR_VERIFY_ALG);
2101	if (rv != NO_ERROR)
2102		return rv;
2103	rv = alloc_hash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
2104		       ERR_INTEGRITY_ALG);
2105	if (rv != NO_ERROR)
2106		return rv;
2107	if (new_net_conf->cram_hmac_alg[0] != 0) {
2108		snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
2109			 new_net_conf->cram_hmac_alg);
2110
2111		rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name,
2112			       ERR_AUTH_ALG);
2113	}
2114
2115	return rv;
2116}
2117
2118static void free_crypto(struct crypto *crypto)
2119{
2120	crypto_free_hash(crypto->cram_hmac_tfm);
2121	crypto_free_hash(crypto->integrity_tfm);
2122	crypto_free_hash(crypto->csums_tfm);
2123	crypto_free_hash(crypto->verify_tfm);
2124}
2125
2126int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
2127{
2128	struct drbd_config_context adm_ctx;
2129	enum drbd_ret_code retcode;
2130	struct drbd_connection *connection;
2131	struct net_conf *old_net_conf, *new_net_conf = NULL;
2132	int err;
2133	int ovr; /* online verify running */
2134	int rsr; /* re-sync running */
2135	struct crypto crypto = { };
2136
2137	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2138	if (!adm_ctx.reply_skb)
2139		return retcode;
2140	if (retcode != NO_ERROR)
2141		goto finish;
2142
2143	connection = adm_ctx.connection;
2144	mutex_lock(&adm_ctx.resource->adm_mutex);
2145
2146	new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
2147	if (!new_net_conf) {
2148		retcode = ERR_NOMEM;
2149		goto out;
2150	}
2151
2152	conn_reconfig_start(connection);
2153
2154	mutex_lock(&connection->data.mutex);
2155	mutex_lock(&connection->resource->conf_update);
2156	old_net_conf = connection->net_conf;
2157
2158	if (!old_net_conf) {
2159		drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
2160		retcode = ERR_INVALID_REQUEST;
2161		goto fail;
2162	}
2163
2164	*new_net_conf = *old_net_conf;
2165	if (should_set_defaults(info))
2166		set_net_conf_defaults(new_net_conf);
2167
2168	err = net_conf_from_attrs_for_change(new_net_conf, info);
2169	if (err && err != -ENOMSG) {
2170		retcode = ERR_MANDATORY_TAG;
2171		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2172		goto fail;
2173	}
2174
2175	retcode = check_net_options(connection, new_net_conf);
2176	if (retcode != NO_ERROR)
2177		goto fail;
2178
2179	/* re-sync running */
2180	rsr = conn_resync_running(connection);
2181	if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
2182		retcode = ERR_CSUMS_RESYNC_RUNNING;
2183		goto fail;
2184	}
2185
2186	/* online verify running */
2187	ovr = conn_ov_running(connection);
2188	if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
2189		retcode = ERR_VERIFY_RUNNING;
2190		goto fail;
2191	}
2192
2193	retcode = alloc_crypto(&crypto, new_net_conf);
2194	if (retcode != NO_ERROR)
2195		goto fail;
2196
2197	rcu_assign_pointer(connection->net_conf, new_net_conf);
2198
2199	if (!rsr) {
2200		crypto_free_hash(connection->csums_tfm);
2201		connection->csums_tfm = crypto.csums_tfm;
2202		crypto.csums_tfm = NULL;
2203	}
2204	if (!ovr) {
2205		crypto_free_hash(connection->verify_tfm);
2206		connection->verify_tfm = crypto.verify_tfm;
2207		crypto.verify_tfm = NULL;
2208	}
2209
2210	crypto_free_hash(connection->integrity_tfm);
2211	connection->integrity_tfm = crypto.integrity_tfm;
2212	if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
2213		/* Do this without trying to take connection->data.mutex again.  */
2214		__drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
2215
2216	crypto_free_hash(connection->cram_hmac_tfm);
2217	connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2218
2219	mutex_unlock(&connection->resource->conf_update);
2220	mutex_unlock(&connection->data.mutex);
2221	synchronize_rcu();
2222	kfree(old_net_conf);
2223
2224	if (connection->cstate >= C_WF_REPORT_PARAMS) {
2225		struct drbd_peer_device *peer_device;
2226		int vnr;
2227
2228		idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
2229			drbd_send_sync_param(peer_device);
2230	}
2231
2232	goto done;
2233
2234 fail:
2235	mutex_unlock(&connection->resource->conf_update);
2236	mutex_unlock(&connection->data.mutex);
2237	free_crypto(&crypto);
2238	kfree(new_net_conf);
2239 done:
2240	conn_reconfig_done(connection);
2241 out:
2242	mutex_unlock(&adm_ctx.resource->adm_mutex);
2243 finish:
2244	drbd_adm_finish(&adm_ctx, info, retcode);
2245	return 0;
2246}
2247
2248int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
2249{
2250	struct drbd_config_context adm_ctx;
2251	struct drbd_peer_device *peer_device;
2252	struct net_conf *old_net_conf, *new_net_conf = NULL;
2253	struct crypto crypto = { };
2254	struct drbd_resource *resource;
2255	struct drbd_connection *connection;
2256	enum drbd_ret_code retcode;
2257	int i;
2258	int err;
2259
2260	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2261
2262	if (!adm_ctx.reply_skb)
2263		return retcode;
2264	if (retcode != NO_ERROR)
2265		goto out;
2266	if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2267		drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
2268		retcode = ERR_INVALID_REQUEST;
2269		goto out;
2270	}
2271
2272	/* No need for _rcu here. All reconfiguration is
2273	 * strictly serialized on genl_lock(). We are protected against
2274	 * concurrent reconfiguration/addition/deletion */
2275	for_each_resource(resource, &drbd_resources) {
2276		for_each_connection(connection, resource) {
2277			if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
2278			    !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
2279				    connection->my_addr_len)) {
2280				retcode = ERR_LOCAL_ADDR;
2281				goto out;
2282			}
2283
2284			if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
2285			    !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
2286				    connection->peer_addr_len)) {
2287				retcode = ERR_PEER_ADDR;
2288				goto out;
2289			}
2290		}
2291	}
2292
2293	mutex_lock(&adm_ctx.resource->adm_mutex);
2294	connection = first_connection(adm_ctx.resource);
2295	conn_reconfig_start(connection);
2296
2297	if (connection->cstate > C_STANDALONE) {
2298		retcode = ERR_NET_CONFIGURED;
2299		goto fail;
2300	}
2301
2302	/* allocation not in the IO path, drbdsetup / netlink process context */
2303	new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
2304	if (!new_net_conf) {
2305		retcode = ERR_NOMEM;
2306		goto fail;
2307	}
2308
2309	set_net_conf_defaults(new_net_conf);
2310
2311	err = net_conf_from_attrs(new_net_conf, info);
2312	if (err && err != -ENOMSG) {
2313		retcode = ERR_MANDATORY_TAG;
2314		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2315		goto fail;
2316	}
2317
2318	retcode = check_net_options(connection, new_net_conf);
2319	if (retcode != NO_ERROR)
2320		goto fail;
2321
2322	retcode = alloc_crypto(&crypto, new_net_conf);
2323	if (retcode != NO_ERROR)
2324		goto fail;
2325
2326	((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
2327
2328	drbd_flush_workqueue(&connection->sender_work);
2329
2330	mutex_lock(&adm_ctx.resource->conf_update);
2331	old_net_conf = connection->net_conf;
2332	if (old_net_conf) {
2333		retcode = ERR_NET_CONFIGURED;
2334		mutex_unlock(&adm_ctx.resource->conf_update);
2335		goto fail;
2336	}
2337	rcu_assign_pointer(connection->net_conf, new_net_conf);
2338
2339	conn_free_crypto(connection);
2340	connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2341	connection->integrity_tfm = crypto.integrity_tfm;
2342	connection->csums_tfm = crypto.csums_tfm;
2343	connection->verify_tfm = crypto.verify_tfm;
2344
2345	connection->my_addr_len = nla_len(adm_ctx.my_addr);
2346	memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
2347	connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
2348	memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
2349
2350	mutex_unlock(&adm_ctx.resource->conf_update);
2351
2352	rcu_read_lock();
2353	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2354		struct drbd_device *device = peer_device->device;
2355		device->send_cnt = 0;
2356		device->recv_cnt = 0;
2357	}
2358	rcu_read_unlock();
2359
2360	retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2361
2362	conn_reconfig_done(connection);
2363	mutex_unlock(&adm_ctx.resource->adm_mutex);
2364	drbd_adm_finish(&adm_ctx, info, retcode);
2365	return 0;
2366
2367fail:
2368	free_crypto(&crypto);
2369	kfree(new_net_conf);
2370
2371	conn_reconfig_done(connection);
2372	mutex_unlock(&adm_ctx.resource->adm_mutex);
2373out:
2374	drbd_adm_finish(&adm_ctx, info, retcode);
2375	return 0;
2376}
2377
2378static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2379{
2380	enum drbd_state_rv rv;
2381
2382	rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2383			force ? CS_HARD : 0);
2384
2385	switch (rv) {
2386	case SS_NOTHING_TO_DO:
2387		break;
2388	case SS_ALREADY_STANDALONE:
2389		return SS_SUCCESS;
2390	case SS_PRIMARY_NOP:
2391		/* Our state checking code wants to see the peer outdated. */
2392		rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
2393
2394		if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
2395			rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
2396
2397		break;
2398	case SS_CW_FAILED_BY_PEER:
2399		/* The peer probably wants to see us outdated. */
2400		rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2401							disk, D_OUTDATED), 0);
2402		if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2403			rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2404					CS_HARD);
2405		}
2406		break;
2407	default:;
2408		/* no special handling necessary */
2409	}
2410
2411	if (rv >= SS_SUCCESS) {
2412		enum drbd_state_rv rv2;
2413		/* No one else can reconfigure the network while I am here.
2414		 * The state handling only uses drbd_thread_stop_nowait(),
2415		 * we want to really wait here until the receiver is no more.
2416		 */
2417		drbd_thread_stop(&connection->receiver);
2418
2419		/* Race breaker.  This additional state change request may be
2420		 * necessary, if this was a forced disconnect during a receiver
2421		 * restart.  We may have "killed" the receiver thread just
2422		 * after drbd_receiver() returned.  Typically, we should be
2423		 * C_STANDALONE already, now, and this becomes a no-op.
2424		 */
2425		rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
2426				CS_VERBOSE | CS_HARD);
2427		if (rv2 < SS_SUCCESS)
2428			drbd_err(connection,
2429				"unexpected rv2=%d in conn_try_disconnect()\n",
2430				rv2);
2431	}
2432	return rv;
2433}
2434
2435int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
2436{
2437	struct drbd_config_context adm_ctx;
2438	struct disconnect_parms parms;
2439	struct drbd_connection *connection;
2440	enum drbd_state_rv rv;
2441	enum drbd_ret_code retcode;
2442	int err;
2443
2444	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2445	if (!adm_ctx.reply_skb)
2446		return retcode;
2447	if (retcode != NO_ERROR)
2448		goto fail;
2449
2450	connection = adm_ctx.connection;
2451	memset(&parms, 0, sizeof(parms));
2452	if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2453		err = disconnect_parms_from_attrs(&parms, info);
2454		if (err) {
2455			retcode = ERR_MANDATORY_TAG;
2456			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2457			goto fail;
2458		}
2459	}
2460
2461	mutex_lock(&adm_ctx.resource->adm_mutex);
2462	rv = conn_try_disconnect(connection, parms.force_disconnect);
2463	if (rv < SS_SUCCESS)
2464		retcode = rv;  /* FIXME: Type mismatch. */
2465	else
2466		retcode = NO_ERROR;
2467	mutex_unlock(&adm_ctx.resource->adm_mutex);
2468 fail:
2469	drbd_adm_finish(&adm_ctx, info, retcode);
2470	return 0;
2471}
2472
2473void resync_after_online_grow(struct drbd_device *device)
2474{
2475	int iass; /* I am sync source */
2476
2477	drbd_info(device, "Resync of new storage after online grow\n");
2478	if (device->state.role != device->state.peer)
2479		iass = (device->state.role == R_PRIMARY);
2480	else
2481		iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
2482
2483	if (iass)
2484		drbd_start_resync(device, C_SYNC_SOURCE);
2485	else
2486		_drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2487}
2488
2489int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
2490{
2491	struct drbd_config_context adm_ctx;
2492	struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
2493	struct resize_parms rs;
2494	struct drbd_device *device;
2495	enum drbd_ret_code retcode;
2496	enum determine_dev_size dd;
2497	bool change_al_layout = false;
2498	enum dds_flags ddsf;
2499	sector_t u_size;
2500	int err;
2501
2502	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2503	if (!adm_ctx.reply_skb)
2504		return retcode;
2505	if (retcode != NO_ERROR)
2506		goto finish;
2507
2508	mutex_lock(&adm_ctx.resource->adm_mutex);
2509	device = adm_ctx.device;
2510	if (!get_ldev(device)) {
2511		retcode = ERR_NO_DISK;
2512		goto fail;
2513	}
2514
2515	memset(&rs, 0, sizeof(struct resize_parms));
2516	rs.al_stripes = device->ldev->md.al_stripes;
2517	rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
2518	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2519		err = resize_parms_from_attrs(&rs, info);
2520		if (err) {
2521			retcode = ERR_MANDATORY_TAG;
2522			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2523			goto fail_ldev;
2524		}
2525	}
2526
2527	if (device->state.conn > C_CONNECTED) {
2528		retcode = ERR_RESIZE_RESYNC;
2529		goto fail_ldev;
2530	}
2531
2532	if (device->state.role == R_SECONDARY &&
2533	    device->state.peer == R_SECONDARY) {
2534		retcode = ERR_NO_PRIMARY;
2535		goto fail_ldev;
2536	}
2537
2538	if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
2539		retcode = ERR_NEED_APV_93;
2540		goto fail_ldev;
2541	}
2542
2543	rcu_read_lock();
2544	u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
2545	rcu_read_unlock();
2546	if (u_size != (sector_t)rs.resize_size) {
2547		new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
2548		if (!new_disk_conf) {
2549			retcode = ERR_NOMEM;
2550			goto fail_ldev;
2551		}
2552	}
2553
2554	if (device->ldev->md.al_stripes != rs.al_stripes ||
2555	    device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
2556		u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
2557
2558		if (al_size_k > (16 * 1024 * 1024)) {
2559			retcode = ERR_MD_LAYOUT_TOO_BIG;
2560			goto fail_ldev;
2561		}
2562
2563		if (al_size_k < MD_32kB_SECT/2) {
2564			retcode = ERR_MD_LAYOUT_TOO_SMALL;
2565			goto fail_ldev;
2566		}
2567
2568		if (device->state.conn != C_CONNECTED && !rs.resize_force) {
2569			retcode = ERR_MD_LAYOUT_CONNECTED;
2570			goto fail_ldev;
2571		}
2572
2573		change_al_layout = true;
2574	}
2575
2576	if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
2577		device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
2578
2579	if (new_disk_conf) {
2580		mutex_lock(&device->resource->conf_update);
2581		old_disk_conf = device->ldev->disk_conf;
2582		*new_disk_conf = *old_disk_conf;
2583		new_disk_conf->disk_size = (sector_t)rs.resize_size;
2584		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
2585		mutex_unlock(&device->resource->conf_update);
2586		synchronize_rcu();
2587		kfree(old_disk_conf);
2588	}
2589
2590	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
2591	dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
2592	drbd_md_sync(device);
2593	put_ldev(device);
2594	if (dd == DS_ERROR) {
2595		retcode = ERR_NOMEM_BITMAP;
2596		goto fail;
2597	} else if (dd == DS_ERROR_SPACE_MD) {
2598		retcode = ERR_MD_LAYOUT_NO_FIT;
2599		goto fail;
2600	} else if (dd == DS_ERROR_SHRINK) {
2601		retcode = ERR_IMPLICIT_SHRINK;
2602		goto fail;
2603	}
2604
2605	if (device->state.conn == C_CONNECTED) {
2606		if (dd == DS_GREW)
2607			set_bit(RESIZE_PENDING, &device->flags);
2608
2609		drbd_send_uuids(first_peer_device(device));
2610		drbd_send_sizes(first_peer_device(device), 1, ddsf);
2611	}
2612
2613 fail:
2614	mutex_unlock(&adm_ctx.resource->adm_mutex);
2615 finish:
2616	drbd_adm_finish(&adm_ctx, info, retcode);
2617	return 0;
2618
2619 fail_ldev:
2620	put_ldev(device);
2621	goto fail;
2622}
2623
2624int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
2625{
2626	struct drbd_config_context adm_ctx;
2627	enum drbd_ret_code retcode;
2628	struct res_opts res_opts;
2629	int err;
2630
2631	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2632	if (!adm_ctx.reply_skb)
2633		return retcode;
2634	if (retcode != NO_ERROR)
2635		goto fail;
2636
2637	res_opts = adm_ctx.resource->res_opts;
2638	if (should_set_defaults(info))
2639		set_res_opts_defaults(&res_opts);
2640
2641	err = res_opts_from_attrs(&res_opts, info);
2642	if (err && err != -ENOMSG) {
2643		retcode = ERR_MANDATORY_TAG;
2644		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2645		goto fail;
2646	}
2647
2648	mutex_lock(&adm_ctx.resource->adm_mutex);
2649	err = set_resource_options(adm_ctx.resource, &res_opts);
2650	if (err) {
2651		retcode = ERR_INVALID_REQUEST;
2652		if (err == -ENOMEM)
2653			retcode = ERR_NOMEM;
2654	}
2655	mutex_unlock(&adm_ctx.resource->adm_mutex);
2656
2657fail:
2658	drbd_adm_finish(&adm_ctx, info, retcode);
2659	return 0;
2660}
2661
2662int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2663{
2664	struct drbd_config_context adm_ctx;
2665	struct drbd_device *device;
2666	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2667
2668	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2669	if (!adm_ctx.reply_skb)
2670		return retcode;
2671	if (retcode != NO_ERROR)
2672		goto out;
2673
2674	device = adm_ctx.device;
2675	if (!get_ldev(device)) {
2676		retcode = ERR_NO_DISK;
2677		goto out;
2678	}
2679
2680	mutex_lock(&adm_ctx.resource->adm_mutex);
2681
2682	/* If there is still bitmap IO pending, probably because of a previous
2683	 * resync just being finished, wait for it before requesting a new resync.
2684	 * Also wait for it's after_state_ch(). */
2685	drbd_suspend_io(device);
2686	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2687	drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2688
2689	/* If we happen to be C_STANDALONE R_SECONDARY, just change to
2690	 * D_INCONSISTENT, and set all bits in the bitmap.  Otherwise,
2691	 * try to start a resync handshake as sync target for full sync.
2692	 */
2693	if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
2694		retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
2695		if (retcode >= SS_SUCCESS) {
2696			if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
2697				"set_n_write from invalidate", BM_LOCKED_MASK))
2698				retcode = ERR_IO_MD_DISK;
2699		}
2700	} else
2701		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
2702	drbd_resume_io(device);
2703	mutex_unlock(&adm_ctx.resource->adm_mutex);
2704	put_ldev(device);
2705out:
2706	drbd_adm_finish(&adm_ctx, info, retcode);
2707	return 0;
2708}
2709
2710static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2711		union drbd_state mask, union drbd_state val)
2712{
2713	struct drbd_config_context adm_ctx;
2714	enum drbd_ret_code retcode;
2715
2716	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2717	if (!adm_ctx.reply_skb)
2718		return retcode;
2719	if (retcode != NO_ERROR)
2720		goto out;
2721
2722	mutex_lock(&adm_ctx.resource->adm_mutex);
2723	retcode = drbd_request_state(adm_ctx.device, mask, val);
2724	mutex_unlock(&adm_ctx.resource->adm_mutex);
2725out:
2726	drbd_adm_finish(&adm_ctx, info, retcode);
2727	return 0;
2728}
2729
2730static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
2731{
2732	int rv;
2733
2734	rv = drbd_bmio_set_n_write(device);
2735	drbd_suspend_al(device);
2736	return rv;
2737}
2738
2739int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
2740{
2741	struct drbd_config_context adm_ctx;
2742	int retcode; /* drbd_ret_code, drbd_state_rv */
2743	struct drbd_device *device;
2744
2745	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2746	if (!adm_ctx.reply_skb)
2747		return retcode;
2748	if (retcode != NO_ERROR)
2749		goto out;
2750
2751	device = adm_ctx.device;
2752	if (!get_ldev(device)) {
2753		retcode = ERR_NO_DISK;
2754		goto out;
2755	}
2756
2757	mutex_lock(&adm_ctx.resource->adm_mutex);
2758
2759	/* If there is still bitmap IO pending, probably because of a previous
2760	 * resync just being finished, wait for it before requesting a new resync.
2761	 * Also wait for it's after_state_ch(). */
2762	drbd_suspend_io(device);
2763	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2764	drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2765
2766	/* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
2767	 * in the bitmap.  Otherwise, try to start a resync handshake
2768	 * as sync source for full sync.
2769	 */
2770	if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
2771		/* The peer will get a resync upon connect anyways. Just make that
2772		   into a full resync. */
2773		retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
2774		if (retcode >= SS_SUCCESS) {
2775			if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
2776				"set_n_write from invalidate_peer",
2777				BM_LOCKED_SET_ALLOWED))
2778				retcode = ERR_IO_MD_DISK;
2779		}
2780	} else
2781		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
2782	drbd_resume_io(device);
2783	mutex_unlock(&adm_ctx.resource->adm_mutex);
2784	put_ldev(device);
2785out:
2786	drbd_adm_finish(&adm_ctx, info, retcode);
2787	return 0;
2788}
2789
2790int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
2791{
2792	struct drbd_config_context adm_ctx;
2793	enum drbd_ret_code retcode;
2794
2795	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2796	if (!adm_ctx.reply_skb)
2797		return retcode;
2798	if (retcode != NO_ERROR)
2799		goto out;
2800
2801	mutex_lock(&adm_ctx.resource->adm_mutex);
2802	if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2803		retcode = ERR_PAUSE_IS_SET;
2804	mutex_unlock(&adm_ctx.resource->adm_mutex);
2805out:
2806	drbd_adm_finish(&adm_ctx, info, retcode);
2807	return 0;
2808}
2809
2810int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
2811{
2812	struct drbd_config_context adm_ctx;
2813	union drbd_dev_state s;
2814	enum drbd_ret_code retcode;
2815
2816	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2817	if (!adm_ctx.reply_skb)
2818		return retcode;
2819	if (retcode != NO_ERROR)
2820		goto out;
2821
2822	mutex_lock(&adm_ctx.resource->adm_mutex);
2823	if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2824		s = adm_ctx.device->state;
2825		if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2826			retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2827				  s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
2828		} else {
2829			retcode = ERR_PAUSE_IS_CLEAR;
2830		}
2831	}
2832	mutex_unlock(&adm_ctx.resource->adm_mutex);
2833out:
2834	drbd_adm_finish(&adm_ctx, info, retcode);
2835	return 0;
2836}
2837
2838int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
2839{
2840	return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
2841}
2842
2843int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
2844{
2845	struct drbd_config_context adm_ctx;
2846	struct drbd_device *device;
2847	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2848
2849	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2850	if (!adm_ctx.reply_skb)
2851		return retcode;
2852	if (retcode != NO_ERROR)
2853		goto out;
2854
2855	mutex_lock(&adm_ctx.resource->adm_mutex);
2856	device = adm_ctx.device;
2857	if (test_bit(NEW_CUR_UUID, &device->flags)) {
2858		drbd_uuid_new_current(device);
2859		clear_bit(NEW_CUR_UUID, &device->flags);
2860	}
2861	drbd_suspend_io(device);
2862	retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2863	if (retcode == SS_SUCCESS) {
2864		if (device->state.conn < C_CONNECTED)
2865			tl_clear(first_peer_device(device)->connection);
2866		if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
2867			tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
2868	}
2869	drbd_resume_io(device);
2870	mutex_unlock(&adm_ctx.resource->adm_mutex);
2871out:
2872	drbd_adm_finish(&adm_ctx, info, retcode);
2873	return 0;
2874}
2875
2876int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
2877{
2878	return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
2879}
2880
2881static int nla_put_drbd_cfg_context(struct sk_buff *skb,
2882				    struct drbd_resource *resource,
2883				    struct drbd_connection *connection,
2884				    struct drbd_device *device)
2885{
2886	struct nlattr *nla;
2887	nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
2888	if (!nla)
2889		goto nla_put_failure;
2890	if (device &&
2891	    nla_put_u32(skb, T_ctx_volume, device->vnr))
2892		goto nla_put_failure;
2893	if (nla_put_string(skb, T_ctx_resource_name, resource->name))
2894		goto nla_put_failure;
2895	if (connection) {
2896		if (connection->my_addr_len &&
2897		    nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
2898			goto nla_put_failure;
2899		if (connection->peer_addr_len &&
2900		    nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
2901			goto nla_put_failure;
2902	}
2903	nla_nest_end(skb, nla);
2904	return 0;
2905
2906nla_put_failure:
2907	if (nla)
2908		nla_nest_cancel(skb, nla);
2909	return -EMSGSIZE;
2910}
2911
2912/*
2913 * Return the connection of @resource if @resource has exactly one connection.
2914 */
2915static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
2916{
2917	struct list_head *connections = &resource->connections;
2918
2919	if (list_empty(connections) || connections->next->next != connections)
2920		return NULL;
2921	return list_first_entry(&resource->connections, struct drbd_connection, connections);
2922}
2923
2924static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
2925		const struct sib_info *sib)
2926{
2927	struct drbd_resource *resource = device->resource;
2928	struct state_info *si = NULL; /* for sizeof(si->member); */
2929	struct nlattr *nla;
2930	int got_ldev;
2931	int err = 0;
2932	int exclude_sensitive;
2933
2934	/* If sib != NULL, this is drbd_bcast_event, which anyone can listen
2935	 * to.  So we better exclude_sensitive information.
2936	 *
2937	 * If sib == NULL, this is drbd_adm_get_status, executed synchronously
2938	 * in the context of the requesting user process. Exclude sensitive
2939	 * information, unless current has superuser.
2940	 *
2941	 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
2942	 * relies on the current implementation of netlink_dump(), which
2943	 * executes the dump callback successively from netlink_recvmsg(),
2944	 * always in the context of the receiving process */
2945	exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
2946
2947	got_ldev = get_ldev(device);
2948
2949	/* We need to add connection name and volume number information still.
2950	 * Minor number is in drbd_genlmsghdr. */
2951	if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
2952		goto nla_put_failure;
2953
2954	if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
2955		goto nla_put_failure;
2956
2957	rcu_read_lock();
2958	if (got_ldev) {
2959		struct disk_conf *disk_conf;
2960
2961		disk_conf = rcu_dereference(device->ldev->disk_conf);
2962		err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
2963	}
2964	if (!err) {
2965		struct net_conf *nc;
2966
2967		nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
2968		if (nc)
2969			err = net_conf_to_skb(skb, nc, exclude_sensitive);
2970	}
2971	rcu_read_unlock();
2972	if (err)
2973		goto nla_put_failure;
2974
2975	nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
2976	if (!nla)
2977		goto nla_put_failure;
2978	if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
2979	    nla_put_u32(skb, T_current_state, device->state.i) ||
2980	    nla_put_u64(skb, T_ed_uuid, device->ed_uuid) ||
2981	    nla_put_u64(skb, T_capacity, drbd_get_capacity(device->this_bdev)) ||
2982	    nla_put_u64(skb, T_send_cnt, device->send_cnt) ||
2983	    nla_put_u64(skb, T_recv_cnt, device->recv_cnt) ||
2984	    nla_put_u64(skb, T_read_cnt, device->read_cnt) ||
2985	    nla_put_u64(skb, T_writ_cnt, device->writ_cnt) ||
2986	    nla_put_u64(skb, T_al_writ_cnt, device->al_writ_cnt) ||
2987	    nla_put_u64(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
2988	    nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
2989	    nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
2990	    nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
2991		goto nla_put_failure;
2992
2993	if (got_ldev) {
2994		int err;
2995
2996		spin_lock_irq(&device->ldev->md.uuid_lock);
2997		err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
2998		spin_unlock_irq(&device->ldev->md.uuid_lock);
2999
3000		if (err)
3001			goto nla_put_failure;
3002
3003		if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
3004		    nla_put_u64(skb, T_bits_total, drbd_bm_bits(device)) ||
3005		    nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(device)))
3006			goto nla_put_failure;
3007		if (C_SYNC_SOURCE <= device->state.conn &&
3008		    C_PAUSED_SYNC_T >= device->state.conn) {
3009			if (nla_put_u64(skb, T_bits_rs_total, device->rs_total) ||
3010			    nla_put_u64(skb, T_bits_rs_failed, device->rs_failed))
3011				goto nla_put_failure;
3012		}
3013	}
3014
3015	if (sib) {
3016		switch(sib->sib_reason) {
3017		case SIB_SYNC_PROGRESS:
3018		case SIB_GET_STATUS_REPLY:
3019			break;
3020		case SIB_STATE_CHANGE:
3021			if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
3022			    nla_put_u32(skb, T_new_state, sib->ns.i))
3023				goto nla_put_failure;
3024			break;
3025		case SIB_HELPER_POST:
3026			if (nla_put_u32(skb, T_helper_exit_code,
3027					sib->helper_exit_code))
3028				goto nla_put_failure;
3029			/* fall through */
3030		case SIB_HELPER_PRE:
3031			if (nla_put_string(skb, T_helper, sib->helper_name))
3032				goto nla_put_failure;
3033			break;
3034		}
3035	}
3036	nla_nest_end(skb, nla);
3037
3038	if (0)
3039nla_put_failure:
3040		err = -EMSGSIZE;
3041	if (got_ldev)
3042		put_ldev(device);
3043	return err;
3044}
3045
3046int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
3047{
3048	struct drbd_config_context adm_ctx;
3049	enum drbd_ret_code retcode;
3050	int err;
3051
3052	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3053	if (!adm_ctx.reply_skb)
3054		return retcode;
3055	if (retcode != NO_ERROR)
3056		goto out;
3057
3058	err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
3059	if (err) {
3060		nlmsg_free(adm_ctx.reply_skb);
3061		return err;
3062	}
3063out:
3064	drbd_adm_finish(&adm_ctx, info, retcode);
3065	return 0;
3066}
3067
3068static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
3069{
3070	struct drbd_device *device;
3071	struct drbd_genlmsghdr *dh;
3072	struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
3073	struct drbd_resource *resource = NULL;
3074	struct drbd_resource *tmp;
3075	unsigned volume = cb->args[1];
3076
3077	/* Open coded, deferred, iteration:
3078	 * for_each_resource_safe(resource, tmp, &drbd_resources) {
3079	 *      connection = "first connection of resource or undefined";
3080	 *	idr_for_each_entry(&resource->devices, device, i) {
3081	 *	  ...
3082	 *	}
3083	 * }
3084	 * where resource is cb->args[0];
3085	 * and i is cb->args[1];
3086	 *
3087	 * cb->args[2] indicates if we shall loop over all resources,
3088	 * or just dump all volumes of a single resource.
3089	 *
3090	 * This may miss entries inserted after this dump started,
3091	 * or entries deleted before they are reached.
3092	 *
3093	 * We need to make sure the device won't disappear while
3094	 * we are looking at it, and revalidate our iterators
3095	 * on each iteration.
3096	 */
3097
3098	/* synchronize with conn_create()/drbd_destroy_connection() */
3099	rcu_read_lock();
3100	/* revalidate iterator position */
3101	for_each_resource_rcu(tmp, &drbd_resources) {
3102		if (pos == NULL) {
3103			/* first iteration */
3104			pos = tmp;
3105			resource = pos;
3106			break;
3107		}
3108		if (tmp == pos) {
3109			resource = pos;
3110			break;
3111		}
3112	}
3113	if (resource) {
3114next_resource:
3115		device = idr_get_next(&resource->devices, &volume);
3116		if (!device) {
3117			/* No more volumes to dump on this resource.
3118			 * Advance resource iterator. */
3119			pos = list_entry_rcu(resource->resources.next,
3120					     struct drbd_resource, resources);
3121			/* Did we dump any volume of this resource yet? */
3122			if (volume != 0) {
3123				/* If we reached the end of the list,
3124				 * or only a single resource dump was requested,
3125				 * we are done. */
3126				if (&pos->resources == &drbd_resources || cb->args[2])
3127					goto out;
3128				volume = 0;
3129				resource = pos;
3130				goto next_resource;
3131			}
3132		}
3133
3134		dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3135				cb->nlh->nlmsg_seq, &drbd_genl_family,
3136				NLM_F_MULTI, DRBD_ADM_GET_STATUS);
3137		if (!dh)
3138			goto out;
3139
3140		if (!device) {
3141			/* This is a connection without a single volume.
3142			 * Suprisingly enough, it may have a network
3143			 * configuration. */
3144			struct drbd_connection *connection;
3145
3146			dh->minor = -1U;
3147			dh->ret_code = NO_ERROR;
3148			connection = the_only_connection(resource);
3149			if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
3150				goto cancel;
3151			if (connection) {
3152				struct net_conf *nc;
3153
3154				nc = rcu_dereference(connection->net_conf);
3155				if (nc && net_conf_to_skb(skb, nc, 1) != 0)
3156					goto cancel;
3157			}
3158			goto done;
3159		}
3160
3161		D_ASSERT(device, device->vnr == volume);
3162		D_ASSERT(device, device->resource == resource);
3163
3164		dh->minor = device_to_minor(device);
3165		dh->ret_code = NO_ERROR;
3166
3167		if (nla_put_status_info(skb, device, NULL)) {
3168cancel:
3169			genlmsg_cancel(skb, dh);
3170			goto out;
3171		}
3172done:
3173		genlmsg_end(skb, dh);
3174	}
3175
3176out:
3177	rcu_read_unlock();
3178	/* where to start the next iteration */
3179	cb->args[0] = (long)pos;
3180	cb->args[1] = (pos == resource) ? volume + 1 : 0;
3181
3182	/* No more resources/volumes/minors found results in an empty skb.
3183	 * Which will terminate the dump. */
3184        return skb->len;
3185}
3186
3187/*
3188 * Request status of all resources, or of all volumes within a single resource.
3189 *
3190 * This is a dump, as the answer may not fit in a single reply skb otherwise.
3191 * Which means we cannot use the family->attrbuf or other such members, because
3192 * dump is NOT protected by the genl_lock().  During dump, we only have access
3193 * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
3194 *
3195 * Once things are setup properly, we call into get_one_status().
3196 */
3197int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
3198{
3199	const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3200	struct nlattr *nla;
3201	const char *resource_name;
3202	struct drbd_resource *resource;
3203	int maxtype;
3204
3205	/* Is this a followup call? */
3206	if (cb->args[0]) {
3207		/* ... of a single resource dump,
3208		 * and the resource iterator has been advanced already? */
3209		if (cb->args[2] && cb->args[2] != cb->args[0])
3210			return 0; /* DONE. */
3211		goto dump;
3212	}
3213
3214	/* First call (from netlink_dump_start).  We need to figure out
3215	 * which resource(s) the user wants us to dump. */
3216	nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
3217			nlmsg_attrlen(cb->nlh, hdrlen),
3218			DRBD_NLA_CFG_CONTEXT);
3219
3220	/* No explicit context given.  Dump all. */
3221	if (!nla)
3222		goto dump;
3223	maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
3224	nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
3225	if (IS_ERR(nla))
3226		return PTR_ERR(nla);
3227	/* context given, but no name present? */
3228	if (!nla)
3229		return -EINVAL;
3230	resource_name = nla_data(nla);
3231	if (!*resource_name)
3232		return -ENODEV;
3233	resource = drbd_find_resource(resource_name);
3234	if (!resource)
3235		return -ENODEV;
3236
3237	kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
3238
3239	/* prime iterators, and set "filter" mode mark:
3240	 * only dump this connection. */
3241	cb->args[0] = (long)resource;
3242	/* cb->args[1] = 0; passed in this way. */
3243	cb->args[2] = (long)resource;
3244
3245dump:
3246	return get_one_status(skb, cb);
3247}
3248
3249int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
3250{
3251	struct drbd_config_context adm_ctx;
3252	enum drbd_ret_code retcode;
3253	struct timeout_parms tp;
3254	int err;
3255
3256	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3257	if (!adm_ctx.reply_skb)
3258		return retcode;
3259	if (retcode != NO_ERROR)
3260		goto out;
3261
3262	tp.timeout_type =
3263		adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
3264		test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
3265		UT_DEFAULT;
3266
3267	err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
3268	if (err) {
3269		nlmsg_free(adm_ctx.reply_skb);
3270		return err;
3271	}
3272out:
3273	drbd_adm_finish(&adm_ctx, info, retcode);
3274	return 0;
3275}
3276
3277int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
3278{
3279	struct drbd_config_context adm_ctx;
3280	struct drbd_device *device;
3281	enum drbd_ret_code retcode;
3282	struct start_ov_parms parms;
3283
3284	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3285	if (!adm_ctx.reply_skb)
3286		return retcode;
3287	if (retcode != NO_ERROR)
3288		goto out;
3289
3290	device = adm_ctx.device;
3291
3292	/* resume from last known position, if possible */
3293	parms.ov_start_sector = device->ov_start_sector;
3294	parms.ov_stop_sector = ULLONG_MAX;
3295	if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
3296		int err = start_ov_parms_from_attrs(&parms, info);
3297		if (err) {
3298			retcode = ERR_MANDATORY_TAG;
3299			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3300			goto out;
3301		}
3302	}
3303	mutex_lock(&adm_ctx.resource->adm_mutex);
3304
3305	/* w_make_ov_request expects position to be aligned */
3306	device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
3307	device->ov_stop_sector = parms.ov_stop_sector;
3308
3309	/* If there is still bitmap IO pending, e.g. previous resync or verify
3310	 * just being finished, wait for it before requesting a new resync. */
3311	drbd_suspend_io(device);
3312	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
3313	retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
3314	drbd_resume_io(device);
3315
3316	mutex_unlock(&adm_ctx.resource->adm_mutex);
3317out:
3318	drbd_adm_finish(&adm_ctx, info, retcode);
3319	return 0;
3320}
3321
3322
3323int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
3324{
3325	struct drbd_config_context adm_ctx;
3326	struct drbd_device *device;
3327	enum drbd_ret_code retcode;
3328	int skip_initial_sync = 0;
3329	int err;
3330	struct new_c_uuid_parms args;
3331
3332	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3333	if (!adm_ctx.reply_skb)
3334		return retcode;
3335	if (retcode != NO_ERROR)
3336		goto out_nolock;
3337
3338	device = adm_ctx.device;
3339	memset(&args, 0, sizeof(args));
3340	if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
3341		err = new_c_uuid_parms_from_attrs(&args, info);
3342		if (err) {
3343			retcode = ERR_MANDATORY_TAG;
3344			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3345			goto out_nolock;
3346		}
3347	}
3348
3349	mutex_lock(&adm_ctx.resource->adm_mutex);
3350	mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
3351
3352	if (!get_ldev(device)) {
3353		retcode = ERR_NO_DISK;
3354		goto out;
3355	}
3356
3357	/* this is "skip initial sync", assume to be clean */
3358	if (device->state.conn == C_CONNECTED &&
3359	    first_peer_device(device)->connection->agreed_pro_version >= 90 &&
3360	    device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
3361		drbd_info(device, "Preparing to skip initial sync\n");
3362		skip_initial_sync = 1;
3363	} else if (device->state.conn != C_STANDALONE) {
3364		retcode = ERR_CONNECTED;
3365		goto out_dec;
3366	}
3367
3368	drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
3369	drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
3370
3371	if (args.clear_bm) {
3372		err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
3373			"clear_n_write from new_c_uuid", BM_LOCKED_MASK);
3374		if (err) {
3375			drbd_err(device, "Writing bitmap failed with %d\n", err);
3376			retcode = ERR_IO_MD_DISK;
3377		}
3378		if (skip_initial_sync) {
3379			drbd_send_uuids_skip_initial_sync(first_peer_device(device));
3380			_drbd_uuid_set(device, UI_BITMAP, 0);
3381			drbd_print_uuids(device, "cleared bitmap UUID");
3382			spin_lock_irq(&device->resource->req_lock);
3383			_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3384					CS_VERBOSE, NULL);
3385			spin_unlock_irq(&device->resource->req_lock);
3386		}
3387	}
3388
3389	drbd_md_sync(device);
3390out_dec:
3391	put_ldev(device);
3392out:
3393	mutex_unlock(device->state_mutex);
3394	mutex_unlock(&adm_ctx.resource->adm_mutex);
3395out_nolock:
3396	drbd_adm_finish(&adm_ctx, info, retcode);
3397	return 0;
3398}
3399
3400static enum drbd_ret_code
3401drbd_check_resource_name(struct drbd_config_context *adm_ctx)
3402{
3403	const char *name = adm_ctx->resource_name;
3404	if (!name || !name[0]) {
3405		drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing");
3406		return ERR_MANDATORY_TAG;
3407	}
3408	/* if we want to use these in sysfs/configfs/debugfs some day,
3409	 * we must not allow slashes */
3410	if (strchr(name, '/')) {
3411		drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name");
3412		return ERR_INVALID_REQUEST;
3413	}
3414	return NO_ERROR;
3415}
3416
3417int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
3418{
3419	struct drbd_config_context adm_ctx;
3420	enum drbd_ret_code retcode;
3421	struct res_opts res_opts;
3422	int err;
3423
3424	retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
3425	if (!adm_ctx.reply_skb)
3426		return retcode;
3427	if (retcode != NO_ERROR)
3428		goto out;
3429
3430	set_res_opts_defaults(&res_opts);
3431	err = res_opts_from_attrs(&res_opts, info);
3432	if (err && err != -ENOMSG) {
3433		retcode = ERR_MANDATORY_TAG;
3434		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3435		goto out;
3436	}
3437
3438	retcode = drbd_check_resource_name(&adm_ctx);
3439	if (retcode != NO_ERROR)
3440		goto out;
3441
3442	if (adm_ctx.resource) {
3443		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
3444			retcode = ERR_INVALID_REQUEST;
3445			drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
3446		}
3447		/* else: still NO_ERROR */
3448		goto out;
3449	}
3450
3451	/* not yet safe for genl_family.parallel_ops */
3452	if (!conn_create(adm_ctx.resource_name, &res_opts))
3453		retcode = ERR_NOMEM;
3454out:
3455	drbd_adm_finish(&adm_ctx, info, retcode);
3456	return 0;
3457}
3458
3459int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
3460{
3461	struct drbd_config_context adm_ctx;
3462	struct drbd_genlmsghdr *dh = info->userhdr;
3463	enum drbd_ret_code retcode;
3464
3465	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3466	if (!adm_ctx.reply_skb)
3467		return retcode;
3468	if (retcode != NO_ERROR)
3469		goto out;
3470
3471	if (dh->minor > MINORMASK) {
3472		drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
3473		retcode = ERR_INVALID_REQUEST;
3474		goto out;
3475	}
3476	if (adm_ctx.volume > DRBD_VOLUME_MAX) {
3477		drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
3478		retcode = ERR_INVALID_REQUEST;
3479		goto out;
3480	}
3481
3482	/* drbd_adm_prepare made sure already
3483	 * that first_peer_device(device)->connection and device->vnr match the request. */
3484	if (adm_ctx.device) {
3485		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
3486			retcode = ERR_MINOR_EXISTS;
3487		/* else: still NO_ERROR */
3488		goto out;
3489	}
3490
3491	mutex_lock(&adm_ctx.resource->adm_mutex);
3492	retcode = drbd_create_device(&adm_ctx, dh->minor);
3493	mutex_unlock(&adm_ctx.resource->adm_mutex);
3494out:
3495	drbd_adm_finish(&adm_ctx, info, retcode);
3496	return 0;
3497}
3498
3499static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
3500{
3501	if (device->state.disk == D_DISKLESS &&
3502	    /* no need to be device->state.c C_STANDALONE &&
3503	     * we may want to delete a minor from a live replication group.
3504	     */
3505	    device->state.role == R_SECONDARY) {
3506		_drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
3507				    CS_VERBOSE + CS_WAIT_COMPLETE);
3508		drbd_delete_device(device);
3509		return NO_ERROR;
3510	} else
3511		return ERR_MINOR_CONFIGURED;
3512}
3513
3514int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
3515{
3516	struct drbd_config_context adm_ctx;
3517	enum drbd_ret_code retcode;
3518
3519	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3520	if (!adm_ctx.reply_skb)
3521		return retcode;
3522	if (retcode != NO_ERROR)
3523		goto out;
3524
3525	mutex_lock(&adm_ctx.resource->adm_mutex);
3526	retcode = adm_del_minor(adm_ctx.device);
3527	mutex_unlock(&adm_ctx.resource->adm_mutex);
3528out:
3529	drbd_adm_finish(&adm_ctx, info, retcode);
3530	return 0;
3531}
3532
3533int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
3534{
3535	struct drbd_config_context adm_ctx;
3536	struct drbd_resource *resource;
3537	struct drbd_connection *connection;
3538	struct drbd_device *device;
3539	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
3540	unsigned i;
3541
3542	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3543	if (!adm_ctx.reply_skb)
3544		return retcode;
3545	if (retcode != NO_ERROR)
3546		goto finish;
3547
3548	resource = adm_ctx.resource;
3549	mutex_lock(&resource->adm_mutex);
3550	/* demote */
3551	for_each_connection(connection, resource) {
3552		struct drbd_peer_device *peer_device;
3553
3554		idr_for_each_entry(&connection->peer_devices, peer_device, i) {
3555			retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
3556			if (retcode < SS_SUCCESS) {
3557				drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
3558				goto out;
3559			}
3560		}
3561
3562		retcode = conn_try_disconnect(connection, 0);
3563		if (retcode < SS_SUCCESS) {
3564			drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
3565			goto out;
3566		}
3567	}
3568
3569	/* detach */
3570	idr_for_each_entry(&resource->devices, device, i) {
3571		retcode = adm_detach(device, 0);
3572		if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
3573			drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
3574			goto out;
3575		}
3576	}
3577
3578	/* If we reach this, all volumes (of this connection) are Secondary,
3579	 * Disconnected, Diskless, aka Unconfigured. Make sure all threads have
3580	 * actually stopped, state handling only does drbd_thread_stop_nowait(). */
3581	for_each_connection(connection, resource)
3582		drbd_thread_stop(&connection->worker);
3583
3584	/* Now, nothing can fail anymore */
3585
3586	/* delete volumes */
3587	idr_for_each_entry(&resource->devices, device, i) {
3588		retcode = adm_del_minor(device);
3589		if (retcode != NO_ERROR) {
3590			/* "can not happen" */
3591			drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
3592			goto out;
3593		}
3594	}
3595
3596	list_del_rcu(&resource->resources);
3597	synchronize_rcu();
3598	drbd_free_resource(resource);
3599	retcode = NO_ERROR;
3600out:
3601	mutex_unlock(&resource->adm_mutex);
3602finish:
3603	drbd_adm_finish(&adm_ctx, info, retcode);
3604	return 0;
3605}
3606
3607int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
3608{
3609	struct drbd_config_context adm_ctx;
3610	struct drbd_resource *resource;
3611	struct drbd_connection *connection;
3612	enum drbd_ret_code retcode;
3613
3614	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3615	if (!adm_ctx.reply_skb)
3616		return retcode;
3617	if (retcode != NO_ERROR)
3618		goto finish;
3619
3620	resource = adm_ctx.resource;
3621	mutex_lock(&resource->adm_mutex);
3622	for_each_connection(connection, resource) {
3623		if (connection->cstate > C_STANDALONE) {
3624			retcode = ERR_NET_CONFIGURED;
3625			goto out;
3626		}
3627	}
3628	if (!idr_is_empty(&resource->devices)) {
3629		retcode = ERR_RES_IN_USE;
3630		goto out;
3631	}
3632
3633	list_del_rcu(&resource->resources);
3634	for_each_connection(connection, resource)
3635		drbd_thread_stop(&connection->worker);
3636	synchronize_rcu();
3637	drbd_free_resource(resource);
3638	retcode = NO_ERROR;
3639out:
3640	mutex_unlock(&resource->adm_mutex);
3641finish:
3642	drbd_adm_finish(&adm_ctx, info, retcode);
3643	return 0;
3644}
3645
3646void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
3647{
3648	static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
3649	struct sk_buff *msg;
3650	struct drbd_genlmsghdr *d_out;
3651	unsigned seq;
3652	int err = -ENOMEM;
3653
3654	seq = atomic_inc_return(&drbd_genl_seq);
3655	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
3656	if (!msg)
3657		goto failed;
3658
3659	err = -EMSGSIZE;
3660	d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
3661	if (!d_out) /* cannot happen, but anyways. */
3662		goto nla_put_failure;
3663	d_out->minor = device_to_minor(device);
3664	d_out->ret_code = NO_ERROR;
3665
3666	if (nla_put_status_info(msg, device, sib))
3667		goto nla_put_failure;
3668	genlmsg_end(msg, d_out);
3669	err = drbd_genl_multicast_events(msg, 0);
3670	/* msg has been consumed or freed in netlink_broadcast() */
3671	if (err && err != -ESRCH)
3672		goto failed;
3673
3674	return;
3675
3676nla_put_failure:
3677	nlmsg_free(msg);
3678failed:
3679	drbd_err(device, "Error %d while broadcasting event. "
3680			"Event seq:%u sib_reason:%u\n",
3681			err, seq, sib->sib_reason);
3682}
3683