[go: nahoru, domu]

13241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber/*
23241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * Copyright (C) 2011 Red Hat, Inc.
33241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber *
43241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * This file is released under the GPL.
53241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber */
63241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
73241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber#include "dm-btree.h"
83241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber#include "dm-btree-internal.h"
93241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber#include "dm-transaction-manager.h"
103241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
111944ce60fe1e92506d3347f4d8e10a82b17096e4Paul Gortmaker#include <linux/export.h>
123241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
133241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber/*
143241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * Removing an entry from a btree
153241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * ==============================
163241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber *
173241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * A very important constraint for our btree is that no node, except the
183241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * root, may have fewer than a certain number of entries.
193241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * (MIN_ENTRIES <= nr_entries <= MAX_ENTRIES).
203241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber *
213241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * Ensuring this is complicated by the way we want to only ever hold the
223241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * locks on 2 nodes concurrently, and only change nodes in a top to bottom
233241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * fashion.
243241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber *
253241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * Each node may have a left or right sibling.  When decending the spine,
263241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * if a node contains only MIN_ENTRIES then we try and increase this to at
273241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * least MIN_ENTRIES + 1.  We do this in the following ways:
283241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber *
293241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * [A] No siblings => this can only happen if the node is the root, in which
303241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber *     case we copy the childs contents over the root.
313241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber *
323241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * [B] No left sibling
333241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber *     ==> rebalance(node, right sibling)
343241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber *
353241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * [C] No right sibling
363241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber *     ==> rebalance(left sibling, node)
373241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber *
383241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * [D] Both siblings, total_entries(left, node, right) <= DEL_THRESHOLD
393241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber *     ==> delete node adding it's contents to left and right
403241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber *
413241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * [E] Both siblings, total_entries(left, node, right) > DEL_THRESHOLD
423241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber *     ==> rebalance(left, node, right)
433241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber *
443241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * After these operations it's possible that the our original node no
453241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * longer contains the desired sub tree.  For this reason this rebalancing
463241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * is performed on the children of the current node.  This also avoids
473241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * having a special case for the root.
483241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber *
493241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * Once this rebalancing has occurred we can then step into the child node
503241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * for internal nodes.  Or delete the entry for leaf nodes.
513241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber */
523241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
533241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber/*
543241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * Some little utilities for moving node data around.
553241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber */
56550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patockastatic void node_shift(struct btree_node *n, int shift)
573241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber{
583241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
593241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	uint32_t value_size = le32_to_cpu(n->header.value_size);
603241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
613241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (shift < 0) {
623241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		shift = -shift;
633241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		BUG_ON(shift > nr_entries);
64a3aefb395e4f321c8b1314c88f1123624adcf743Joe Thornber		BUG_ON((void *) key_ptr(n, shift) >= value_ptr(n, shift));
653241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		memmove(key_ptr(n, 0),
663241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			key_ptr(n, shift),
673241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			(nr_entries - shift) * sizeof(__le64));
68a3aefb395e4f321c8b1314c88f1123624adcf743Joe Thornber		memmove(value_ptr(n, 0),
69a3aefb395e4f321c8b1314c88f1123624adcf743Joe Thornber			value_ptr(n, shift),
703241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			(nr_entries - shift) * value_size);
713241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	} else {
723241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		BUG_ON(nr_entries + shift > le32_to_cpu(n->header.max_entries));
733241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		memmove(key_ptr(n, shift),
743241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			key_ptr(n, 0),
753241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			nr_entries * sizeof(__le64));
76a3aefb395e4f321c8b1314c88f1123624adcf743Joe Thornber		memmove(value_ptr(n, shift),
77a3aefb395e4f321c8b1314c88f1123624adcf743Joe Thornber			value_ptr(n, 0),
783241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			nr_entries * value_size);
793241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	}
803241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber}
813241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
82550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patockastatic void node_copy(struct btree_node *left, struct btree_node *right, int shift)
833241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber{
843241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
853241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	uint32_t value_size = le32_to_cpu(left->header.value_size);
863241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	BUG_ON(value_size != le32_to_cpu(right->header.value_size));
873241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
883241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (shift < 0) {
893241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		shift = -shift;
903241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		BUG_ON(nr_left + shift > le32_to_cpu(left->header.max_entries));
913241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		memcpy(key_ptr(left, nr_left),
923241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		       key_ptr(right, 0),
933241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		       shift * sizeof(__le64));
94a3aefb395e4f321c8b1314c88f1123624adcf743Joe Thornber		memcpy(value_ptr(left, nr_left),
95a3aefb395e4f321c8b1314c88f1123624adcf743Joe Thornber		       value_ptr(right, 0),
963241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		       shift * value_size);
973241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	} else {
983241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		BUG_ON(shift > le32_to_cpu(right->header.max_entries));
993241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		memcpy(key_ptr(right, 0),
1003241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		       key_ptr(left, nr_left - shift),
1013241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		       shift * sizeof(__le64));
102a3aefb395e4f321c8b1314c88f1123624adcf743Joe Thornber		memcpy(value_ptr(right, 0),
103a3aefb395e4f321c8b1314c88f1123624adcf743Joe Thornber		       value_ptr(left, nr_left - shift),
1043241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		       shift * value_size);
1053241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	}
1063241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber}
1073241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
1083241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber/*
1093241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * Delete a specific entry from a leaf node.
1103241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber */
111550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patockastatic void delete_at(struct btree_node *n, unsigned index)
1123241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber{
1133241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	unsigned nr_entries = le32_to_cpu(n->header.nr_entries);
1143241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	unsigned nr_to_copy = nr_entries - (index + 1);
1153241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	uint32_t value_size = le32_to_cpu(n->header.value_size);
1163241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	BUG_ON(index >= nr_entries);
1173241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
1183241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (nr_to_copy) {
1193241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		memmove(key_ptr(n, index),
1203241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			key_ptr(n, index + 1),
1213241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			nr_to_copy * sizeof(__le64));
1223241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
123a3aefb395e4f321c8b1314c88f1123624adcf743Joe Thornber		memmove(value_ptr(n, index),
124a3aefb395e4f321c8b1314c88f1123624adcf743Joe Thornber			value_ptr(n, index + 1),
1253241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			nr_to_copy * value_size);
1263241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	}
1273241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
1283241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	n->header.nr_entries = cpu_to_le32(nr_entries - 1);
1293241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber}
1303241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
131550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patockastatic unsigned merge_threshold(struct btree_node *n)
1323241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber{
133b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	return le32_to_cpu(n->header.max_entries) / 3;
1343241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber}
1353241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
1363241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornberstruct child {
1373241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	unsigned index;
1383241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	struct dm_block *block;
139550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patocka	struct btree_node *n;
1403241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber};
1413241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
142f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornberstatic int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt,
143f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber		      struct btree_node *parent,
1443241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		      unsigned index, struct child *result)
1453241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber{
1463241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	int r, inc;
1473241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	dm_block_t root;
1483241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
1493241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	result->index = index;
1503241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	root = value64(parent, index);
1513241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
1523241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	r = dm_tm_shadow_block(info->tm, root, &btree_node_validator,
1533241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			       &result->block, &inc);
1543241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (r)
1553241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		return r;
1563241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
1573241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	result->n = dm_block_data(result->block);
1583241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
1593241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (inc)
160f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber		inc_children(info->tm, result->n, vt);
1613241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
162a3aefb395e4f321c8b1314c88f1123624adcf743Joe Thornber	*((__le64 *) value_ptr(parent, index)) =
1633241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		cpu_to_le64(dm_block_location(result->block));
1643241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
1653241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	return 0;
1663241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber}
1673241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
1683241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornberstatic int exit_child(struct dm_btree_info *info, struct child *c)
1693241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber{
1703241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	return dm_tm_unlock(info->tm, c->block);
1713241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber}
1723241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
173550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patockastatic void shift(struct btree_node *left, struct btree_node *right, int count)
1743241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber{
175b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
176b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
177b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
178b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	uint32_t r_max_entries = le32_to_cpu(right->header.max_entries);
179b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber
180b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	BUG_ON(max_entries != r_max_entries);
181b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	BUG_ON(nr_left - count > max_entries);
182b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	BUG_ON(nr_right + count > max_entries);
183b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber
1843241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (!count)
1853241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		return;
1863241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
1873241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (count > 0) {
1883241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		node_shift(right, count);
1893241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		node_copy(left, right, count);
1903241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	} else {
1913241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		node_copy(left, right, count);
1923241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		node_shift(right, count);
1933241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	}
1943241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
195b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	left->header.nr_entries = cpu_to_le32(nr_left - count);
196b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	right->header.nr_entries = cpu_to_le32(nr_right + count);
1973241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber}
1983241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
199550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patockastatic void __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
2003241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			 struct child *l, struct child *r)
2013241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber{
202550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patocka	struct btree_node *left = l->n;
203550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patocka	struct btree_node *right = r->n;
2043241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
2053241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
206b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	unsigned threshold = 2 * merge_threshold(left) + 1;
2073241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
208b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	if (nr_left + nr_right < threshold) {
2093241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		/*
2103241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		 * Merge
2113241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		 */
2123241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		node_copy(left, right, -nr_right);
2133241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		left->header.nr_entries = cpu_to_le32(nr_left + nr_right);
2143241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		delete_at(parent, r->index);
2153241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
2163241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		/*
2173241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		 * We need to decrement the right block, but not it's
2183241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		 * children, since they're still referenced by left.
2193241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		 */
2203241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		dm_tm_dec(info->tm, dm_block_location(r->block));
2213241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	} else {
2223241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		/*
2233241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		 * Rebalance.
2243241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		 */
2253241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		unsigned target_left = (nr_left + nr_right) / 2;
2263241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		shift(left, right, nr_left - target_left);
2273241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		*key_ptr(parent, r->index) = right->keys[0];
2283241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	}
2293241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber}
2303241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
2313241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornberstatic int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
232f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber		      struct dm_btree_value_type *vt, unsigned left_index)
2333241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber{
2343241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	int r;
235550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patocka	struct btree_node *parent;
2363241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	struct child left, right;
2373241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
2383241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	parent = dm_block_data(shadow_current(s));
2393241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
240f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber	r = init_child(info, vt, parent, left_index, &left);
2413241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (r)
2423241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		return r;
2433241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
244f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber	r = init_child(info, vt, parent, left_index + 1, &right);
2453241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (r) {
2463241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		exit_child(info, &left);
2473241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		return r;
2483241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	}
2493241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
2503241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	__rebalance2(info, parent, &left, &right);
2513241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
2523241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	r = exit_child(info, &left);
2533241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (r) {
2543241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		exit_child(info, &right);
2553241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		return r;
2563241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	}
2573241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
2583241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	return exit_child(info, &right);
2593241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber}
2603241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
261b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber/*
262b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber * We dump as many entries from center as possible into left, then the rest
263b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber * in right, then rebalance2.  This wastes some cpu, but I want something
264b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber * simple atm.
265b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber */
266550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patockastatic void delete_center_node(struct dm_btree_info *info, struct btree_node *parent,
267b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			       struct child *l, struct child *c, struct child *r,
268550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patocka			       struct btree_node *left, struct btree_node *center, struct btree_node *right,
269b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			       uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
270b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber{
271b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
272b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	unsigned shift = min(max_entries - nr_left, nr_center);
273b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber
274b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	BUG_ON(nr_left + shift > max_entries);
275b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	node_copy(left, center, -shift);
276b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	left->header.nr_entries = cpu_to_le32(nr_left + shift);
277b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber
278b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	if (shift != nr_center) {
279b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber		shift = nr_center - shift;
280b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber		BUG_ON((nr_right + shift) > max_entries);
281b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber		node_shift(right, shift);
282b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber		node_copy(center, right, shift);
283b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber		right->header.nr_entries = cpu_to_le32(nr_right + shift);
284b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	}
285b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	*key_ptr(parent, r->index) = right->keys[0];
286b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber
287b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	delete_at(parent, c->index);
288b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	r->index--;
289b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber
290b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	dm_tm_dec(info->tm, dm_block_location(c->block));
291b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	__rebalance2(info, parent, l, r);
292b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber}
293b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber
294b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber/*
295b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber * Redistributes entries among 3 sibling nodes.
296b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber */
297550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patockastatic void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
298b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			  struct child *l, struct child *c, struct child *r,
299550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patocka			  struct btree_node *left, struct btree_node *center, struct btree_node *right,
300b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			  uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
301b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber{
302b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	int s;
303b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
304b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	unsigned target = (nr_left + nr_center + nr_right) / 3;
305b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	BUG_ON(target > max_entries);
306b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber
307b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	if (nr_left < nr_right) {
308b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber		s = nr_left - target;
309b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber
310b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber		if (s < 0 && nr_center < -s) {
311b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			/* not enough in central node */
312b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			shift(left, center, nr_center);
313b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			s = nr_center - target;
314b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			shift(left, right, s);
315b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			nr_right += s;
316b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber		} else
317b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			shift(left, center, s);
318b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber
319b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber		shift(center, right, target - nr_right);
320b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber
321b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	} else {
322b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber		s = target - nr_right;
323b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber		if (s > 0 && nr_center < s) {
324b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			/* not enough in central node */
325b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			shift(center, right, nr_center);
326b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			s = target - nr_center;
327b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			shift(left, right, s);
328b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			nr_left -= s;
329b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber		} else
330b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			shift(center, right, s);
331b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber
332b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber		shift(left, center, nr_left - target);
333b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	}
334b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber
335b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	*key_ptr(parent, c->index) = center->keys[0];
336b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	*key_ptr(parent, r->index) = right->keys[0];
337b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber}
338b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber
339550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patockastatic void __rebalance3(struct dm_btree_info *info, struct btree_node *parent,
3403241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			 struct child *l, struct child *c, struct child *r)
3413241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber{
342550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patocka	struct btree_node *left = l->n;
343550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patocka	struct btree_node *center = c->n;
344550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patocka	struct btree_node *right = r->n;
3453241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
3463241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
3473241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	uint32_t nr_center = le32_to_cpu(center->header.nr_entries);
3483241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
3493241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
350b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	unsigned threshold = merge_threshold(left) * 4 + 1;
3513241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
3523241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	BUG_ON(left->header.max_entries != center->header.max_entries);
3533241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	BUG_ON(center->header.max_entries != right->header.max_entries);
3543241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
355b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	if ((nr_left + nr_center + nr_right) < threshold)
356b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber		delete_center_node(info, parent, l, c, r, left, center, right,
357b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber				   nr_left, nr_center, nr_right);
358b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber	else
359b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber		redistribute3(info, parent, l, c, r, left, center, right,
360b0988900bae9ecf968a8a8d086a9eec671a9517aJoe Thornber			      nr_left, nr_center, nr_right);
3613241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber}
3623241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
3633241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornberstatic int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
364f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber		      struct dm_btree_value_type *vt, unsigned left_index)
3653241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber{
3663241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	int r;
367550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patocka	struct btree_node *parent = dm_block_data(shadow_current(s));
3683241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	struct child left, center, right;
3693241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
3703241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	/*
3713241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	 * FIXME: fill out an array?
3723241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	 */
373f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber	r = init_child(info, vt, parent, left_index, &left);
3743241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (r)
3753241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		return r;
3763241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
377f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber	r = init_child(info, vt, parent, left_index + 1, &center);
3783241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (r) {
3793241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		exit_child(info, &left);
3803241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		return r;
3813241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	}
3823241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
383f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber	r = init_child(info, vt, parent, left_index + 2, &right);
3843241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (r) {
3853241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		exit_child(info, &left);
3863241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		exit_child(info, &center);
3873241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		return r;
3883241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	}
3893241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
3903241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	__rebalance3(info, parent, &left, &center, &right);
3913241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
3923241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	r = exit_child(info, &left);
3933241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (r) {
3943241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		exit_child(info, &center);
3953241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		exit_child(info, &right);
3963241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		return r;
3973241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	}
3983241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
3993241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	r = exit_child(info, &center);
4003241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (r) {
4013241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		exit_child(info, &right);
4023241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		return r;
4033241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	}
4043241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4053241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	r = exit_child(info, &right);
4063241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (r)
4073241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		return r;
4083241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4093241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	return 0;
4103241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber}
4113241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4123241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornberstatic int get_nr_entries(struct dm_transaction_manager *tm,
4133241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			  dm_block_t b, uint32_t *result)
4143241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber{
4153241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	int r;
4163241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	struct dm_block *block;
417550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patocka	struct btree_node *n;
4183241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4193241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	r = dm_tm_read_lock(tm, b, &btree_node_validator, &block);
4203241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (r)
4213241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		return r;
4223241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4233241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	n = dm_block_data(block);
4243241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	*result = le32_to_cpu(n->header.nr_entries);
4253241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4263241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	return dm_tm_unlock(tm, block);
4273241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber}
4283241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4293241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornberstatic int rebalance_children(struct shadow_spine *s,
430f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber			      struct dm_btree_info *info,
431f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber			      struct dm_btree_value_type *vt, uint64_t key)
4323241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber{
4333241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	int i, r, has_left_sibling, has_right_sibling;
4343241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	uint32_t child_entries;
435550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patocka	struct btree_node *n;
4363241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4373241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	n = dm_block_data(shadow_current(s));
4383241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4393241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (le32_to_cpu(n->header.nr_entries) == 1) {
4403241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		struct dm_block *child;
4413241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		dm_block_t b = value64(n, 0);
4423241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4433241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child);
4443241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		if (r)
4453241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			return r;
4463241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4473241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		memcpy(n, dm_block_data(child),
4483241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		       dm_bm_block_size(dm_tm_get_bm(info->tm)));
4493241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		r = dm_tm_unlock(info->tm, child);
4503241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		if (r)
4513241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			return r;
4523241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4533241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		dm_tm_dec(info->tm, dm_block_location(child));
4543241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		return 0;
4553241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	}
4563241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4573241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	i = lower_bound(n, key);
4583241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (i < 0)
4593241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		return -ENODATA;
4603241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4613241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	r = get_nr_entries(info->tm, value64(n, i), &child_entries);
4623241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (r)
4633241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		return r;
4643241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4653241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	has_left_sibling = i > 0;
4663241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
4673241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4683241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if (!has_left_sibling)
469f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber		r = rebalance2(s, info, vt, i);
4703241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4713241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	else if (!has_right_sibling)
472f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber		r = rebalance2(s, info, vt, i - 1);
4733241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4743241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	else
475f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber		r = rebalance3(s, info, vt, i - 1);
4763241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4773241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	return r;
4783241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber}
4793241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
480550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patockastatic int do_leaf(struct btree_node *n, uint64_t key, unsigned *index)
4813241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber{
4823241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	int i = lower_bound(n, key);
4833241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4843241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	if ((i < 0) ||
4853241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	    (i >= le32_to_cpu(n->header.nr_entries)) ||
4863241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	    (le64_to_cpu(n->keys[i]) != key))
4873241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		return -ENODATA;
4883241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4893241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	*index = i;
4903241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4913241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	return 0;
4923241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber}
4933241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
4943241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber/*
4953241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * Prepares for removal from one level of the hierarchy.  The caller must
4963241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber * call delete_at() to remove the entry at index.
4973241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber */
4983241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornberstatic int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
4993241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		      struct dm_btree_value_type *vt, dm_block_t root,
5003241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		      uint64_t key, unsigned *index)
5013241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber{
5023241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	int i = *index, r;
503550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patocka	struct btree_node *n;
5043241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
5053241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	for (;;) {
5063241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		r = shadow_step(s, root, vt);
5073241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		if (r < 0)
5083241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			break;
5093241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
5103241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		/*
5113241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		 * We have to patch up the parent node, ugly, but I don't
5123241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		 * see a way to do this automatically as part of the spine
5133241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		 * op.
5143241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		 */
5153241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		if (shadow_has_parent(s)) {
5163241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
517a3aefb395e4f321c8b1314c88f1123624adcf743Joe Thornber			memcpy(value_ptr(dm_block_data(shadow_parent(s)), i),
5183241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			       &location, sizeof(__le64));
5193241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		}
5203241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
5213241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		n = dm_block_data(shadow_current(s));
5223241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
5233241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		if (le32_to_cpu(n->header.flags) & LEAF_NODE)
5243241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			return do_leaf(n, key, index);
5253241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
526f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber		r = rebalance_children(s, info, vt, key);
5273241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		if (r)
5283241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			break;
5293241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
5303241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		n = dm_block_data(shadow_current(s));
5313241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		if (le32_to_cpu(n->header.flags) & LEAF_NODE)
5323241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			return do_leaf(n, key, index);
5333241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
5343241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		i = lower_bound(n, key);
5353241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
5363241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		/*
5373241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		 * We know the key is present, or else
5383241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		 * rebalance_children would have returned
5393241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		 * -ENODATA
5403241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		 */
5413241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		root = value64(n, i);
5423241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	}
5433241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
5443241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	return r;
5453241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber}
5463241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
547f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornberstatic struct dm_btree_value_type le64_type = {
548f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber	.context = NULL,
549f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber	.size = sizeof(__le64),
550f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber	.inc = NULL,
551f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber	.dec = NULL,
552f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber	.equal = NULL
553f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber};
554f046f89a99ccfd9408b94c653374ff3065c7edb3Joe Thornber
5553241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornberint dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
5563241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		    uint64_t *keys, dm_block_t *new_root)
5573241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber{
5583241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	unsigned level, last_level = info->levels - 1;
5593241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	int index = 0, r = 0;
5603241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	struct shadow_spine spine;
561550929faf89e2e2cdb3e9945ea87d383989274cfMikulas Patocka	struct btree_node *n;
5623241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
5633241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	init_shadow_spine(&spine, info);
5643241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	for (level = 0; level < info->levels; level++) {
5653241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		r = remove_raw(&spine, info,
5663241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			       (level == last_level ?
5673241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber				&info->value_type : &le64_type),
5683241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			       root, keys[level], (unsigned *)&index);
5693241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		if (r < 0)
5703241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			break;
5713241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
5723241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		n = dm_block_data(shadow_current(&spine));
5733241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		if (level != last_level) {
5743241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			root = value64(n, index);
5753241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			continue;
5763241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		}
5773241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
5783241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		BUG_ON(index < 0 || index >= le32_to_cpu(n->header.nr_entries));
5793241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
5803241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		if (info->value_type.dec)
5813241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber			info->value_type.dec(info->value_type.context,
582a3aefb395e4f321c8b1314c88f1123624adcf743Joe Thornber					     value_ptr(n, index));
5833241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
5843241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber		delete_at(n, index);
5853241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	}
5863241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
5873241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	*new_root = shadow_root(&spine);
5883241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	exit_shadow_spine(&spine);
5893241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber
5903241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber	return r;
5913241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe Thornber}
5923241b1d3e0aaafbfcd320f4d71ade629728cc4f4Joe ThornberEXPORT_SYMBOL_GPL(dm_btree_remove);
593