[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1393276530-26423-4-git-send-email-amadvance@gmail.com>
Date: Mon, 24 Feb 2014 22:15:30 +0100
From: Andrea Mazzoleni <amadvance@...il.com>
To: clm@...com, jbacik@...com, neilb@...e.de
Cc: linux-kernel@...r.kernel.org, linux-raid@...r.kernel.org,
linux-btrfs@...r.kernel.org, amadvance@...il.com
Subject: [PATCH v5 3/3] btrfs-progs: Adds new par3456 modes to support up to six parities
Extends mkfs.btrfs to support the new par1/2/3/4/5/6 modes to create
filesystem with up to six parities.
Replaces the raid6 code with a new references function able to compute up
to six parities.
Replaces the existing BLOCK_GROUP_RAID5/6 with new PAR1/2/3/4/5/6 ones that
handle up to six parities, and updates all the code to use them.
Signed-off-by: Andrea Mazzoleni <amadvance@...il.com>
---
Makefile | 14 ++-
chunk-recover.c | 18 +---
cmds-balance.c | 20 +++-
cmds-check.c | 7 +-
cmds-chunk.c | 18 +---
cmds-filesystem.c | 12 ++-
ctree.h | 42 ++++++++-
disk-io.h | 2 -
extent-tree.c | 3 +-
ioctl.h | 18 +++-
man/mkfs.btrfs.8.in | 4 +-
mkfs.c | 28 +++++-
mktables.c | 256 ++++++++++++++++++++++++++++++++++++++++++++++++++++
raid.c | 44 +++++++++
raid.h | 34 +++++++
raid6.c | 101 ---------------------
utils.c | 12 ++-
volumes.c | 112 ++++++++++-------------
volumes.h | 12 ++-
19 files changed, 530 insertions(+), 227 deletions(-)
create mode 100644 mktables.c
create mode 100644 raid.c
create mode 100644 raid.h
delete mode 100644 raid6.c
diff --git a/Makefile b/Makefile
index 0874a41..72c5c01 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ CFLAGS = -g -O1 -fno-strict-aliasing
objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
root-tree.o dir-item.o file-item.o inode-item.o inode-map.o \
extent-cache.o extent_io.o volumes.o utils.o repair.o \
- qgroup.o raid6.o free-space-cache.o list_sort.o
+ qgroup.o raid.o tables.o free-space-cache.o list_sort.o
cmds_objects = cmds-subvolume.o cmds-filesystem.o cmds-device.o cmds-scrub.o \
cmds-inspect.o cmds-balance.o cmds-send.o cmds-receive.o \
cmds-quota.o cmds-qgroup.o cmds-replace.o cmds-check.o \
@@ -140,6 +140,10 @@ version.h:
@echo " [SH] $@"
$(Q)bash version.sh
+tables.c: mktables
+ @echo " [MK] $@"
+ $(Q)./mktables > tables.c
+
$(libs_shared): $(libbtrfs_objects) $(lib_links) send.h
@echo " [LD] $@"
$(Q)$(CC) $(CFLAGS) $(libbtrfs_objects) $(LDFLAGS) $(lib_LIBS) \
@@ -193,6 +197,10 @@ mkfs.btrfs: $(objects) $(libs) mkfs.o
@echo " [LD] $@"
$(Q)$(CC) $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o $(LDFLAGS) $(LIBS)
+mktables: $(libs) mktables.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o mktables mktables.o $(LDFLAGS) $(LIBS)
+
mkfs.btrfs.static: $(static_objects) mkfs.static.o $(static_libbtrfs_objects)
@echo " [LD] $@"
$(Q)$(CC) $(STATIC_CFLAGS) -o mkfs.btrfs.static mkfs.static.o $(static_objects) \
@@ -225,8 +233,8 @@ clean: $(CLEANDIRS)
@echo "Cleaning"
$(Q)rm -f $(progs) cscope.out *.o *.o.d btrfs-convert btrfs-image btrfs-select-super \
btrfs-zero-log btrfstune dir-test ioctl-test quick-test send-test btrfsck \
- btrfs.static mkfs.btrfs.static btrfs-calc-size \
- version.h $(check_defs) \
+ btrfs.static mkfs.btrfs.static btrfs-calc-size mktables \
+ version.h tables.c $(check_defs) \
$(libs) $(lib_links)
$(CLEANDIRS):
diff --git a/chunk-recover.c b/chunk-recover.c
index bcde39e..cec14cd 100644
--- a/chunk-recover.c
+++ b/chunk-recover.c
@@ -1327,8 +1327,7 @@ static int calc_num_stripes(u64 type)
{
if (type & (BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6))
+ BTRFS_BLOCK_GROUP_PARX))
return 0;
else if (type & (BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_DUP))
@@ -1404,13 +1403,8 @@ static int btrfs_calc_stripe_index(struct chunk_record *chunk, u64 logical)
} else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID10) {
index = stripe_nr % (chunk->num_stripes / chunk->sub_stripes);
index *= chunk->sub_stripes;
- } else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID5) {
- nr_data_stripes = chunk->num_stripes - 1;
- index = stripe_nr % nr_data_stripes;
- stripe_nr /= nr_data_stripes;
- index = (index + stripe_nr) % chunk->num_stripes;
- } else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID6) {
- nr_data_stripes = chunk->num_stripes - 2;
+ } else if (chunk->type_flags & BTRFS_BLOCK_GROUP_PARX) {
+ nr_data_stripes = chunk->num_stripes - btrfs_flags_par(chunk->type_flags);
index = stripe_nr % nr_data_stripes;
stripe_nr /= nr_data_stripes;
index = (index + stripe_nr) % chunk->num_stripes;
@@ -1503,8 +1497,7 @@ no_extent_record:
if (list_empty(&devexts))
return 0;
- if (chunk->type_flags & (BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6)) {
+ if (chunk->type_flags & BTRFS_BLOCK_GROUP_PARX) {
/* Fixme: try to recover the order by the parity block. */
list_splice_tail(&devexts, &chunk->dextents);
return -EINVAL;
@@ -1540,8 +1533,7 @@ no_extent_record:
#define BTRFS_ORDERED_RAID (BTRFS_BLOCK_GROUP_RAID0 | \
BTRFS_BLOCK_GROUP_RAID10 | \
- BTRFS_BLOCK_GROUP_RAID5 | \
- BTRFS_BLOCK_GROUP_RAID6)
+ BTRFS_BLOCK_GROUP_PARX)
static int btrfs_rebuild_chunk_stripes(struct recover_control *rc,
struct chunk_record *chunk)
diff --git a/cmds-balance.c b/cmds-balance.c
index a151475..7d116bb 100644
--- a/cmds-balance.c
+++ b/cmds-balance.c
@@ -48,10 +48,22 @@ static int parse_one_profile(const char *profile, u64 *flags)
*flags |= BTRFS_BLOCK_GROUP_RAID1;
} else if (!strcmp(profile, "raid10")) {
*flags |= BTRFS_BLOCK_GROUP_RAID10;
- } else if (!strcmp(profile, "raid5")) {
- *flags |= BTRFS_BLOCK_GROUP_RAID5;
- } else if (!strcmp(profile, "raid6")) {
- *flags |= BTRFS_BLOCK_GROUP_RAID6;
+ } else if (!strcmp(profile, "raid5")) { /* synonymous of "par1" */
+ *flags |= BTRFS_BLOCK_GROUP_PAR1;
+ } else if (!strcmp(profile, "raid6")) { /* synonymous of "par2" */
+ *flags |= BTRFS_BLOCK_GROUP_PAR2;
+ } else if (!strcmp(profile, "par1")) {
+ *flags |= BTRFS_BLOCK_GROUP_PAR1;
+ } else if (!strcmp(profile, "par2")) {
+ *flags |= BTRFS_BLOCK_GROUP_PAR2;
+ } else if (!strcmp(profile, "par3")) {
+ *flags |= BTRFS_BLOCK_GROUP_PAR3;
+ } else if (!strcmp(profile, "par4")) {
+ *flags |= BTRFS_BLOCK_GROUP_PAR4;
+ } else if (!strcmp(profile, "par5")) {
+ *flags |= BTRFS_BLOCK_GROUP_PAR5;
+ } else if (!strcmp(profile, "par6")) {
+ *flags |= BTRFS_BLOCK_GROUP_PAR6;
} else if (!strcmp(profile, "dup")) {
*flags |= BTRFS_BLOCK_GROUP_DUP;
} else if (!strcmp(profile, "single")) {
diff --git a/cmds-check.c b/cmds-check.c
index a65670e..46e1a83 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -5189,12 +5189,9 @@ u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
} else if (type & BTRFS_BLOCK_GROUP_RAID10) {
stripe_size = length * 2;
stripe_size /= num_stripes;
- } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
+ } else if (type & BTRFS_BLOCK_GROUP_PARX) {
stripe_size = length;
- stripe_size /= (num_stripes - 1);
- } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
- stripe_size = length;
- stripe_size /= (num_stripes - 2);
+ stripe_size /= num_stripes - btrfs_flags_par(type);
} else {
stripe_size = length;
}
diff --git a/cmds-chunk.c b/cmds-chunk.c
index 4d7fce0..b4c067d 100644
--- a/cmds-chunk.c
+++ b/cmds-chunk.c
@@ -1347,8 +1347,7 @@ static int calc_num_stripes(u64 type)
{
if (type & (BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6))
+ BTRFS_BLOCK_GROUP_PARX))
return 0;
else if (type & (BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_DUP))
@@ -1424,13 +1423,8 @@ static int btrfs_calc_stripe_index(struct chunk_record *chunk, u64 logical)
} else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID10) {
index = stripe_nr % (chunk->num_stripes / chunk->sub_stripes);
index *= chunk->sub_stripes;
- } else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID5) {
- nr_data_stripes = chunk->num_stripes - 1;
- index = stripe_nr % nr_data_stripes;
- stripe_nr /= nr_data_stripes;
- index = (index + stripe_nr) % chunk->num_stripes;
- } else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID6) {
- nr_data_stripes = chunk->num_stripes - 2;
+ } else if (chunk->type_flags & BTRFS_BLOCK_GROUP_PARX) {
+ nr_data_stripes = chunk->num_stripes - btrfs_flags_par(chunk->type_flags);
index = stripe_nr % nr_data_stripes;
stripe_nr /= nr_data_stripes;
index = (index + stripe_nr) % chunk->num_stripes;
@@ -1523,8 +1517,7 @@ no_extent_record:
if (list_empty(&devexts))
return 0;
- if (chunk->type_flags & (BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6)) {
+ if (chunk->type_flags & BTRFS_BLOCK_GROUP_PARX) {
/* Fixme: try to recover the order by the parity block. */
list_splice_tail(&devexts, &chunk->dextents);
return -EINVAL;
@@ -1560,8 +1553,7 @@ no_extent_record:
#define BTRFS_ORDERED_RAID (BTRFS_BLOCK_GROUP_RAID0 | \
BTRFS_BLOCK_GROUP_RAID10 | \
- BTRFS_BLOCK_GROUP_RAID5 | \
- BTRFS_BLOCK_GROUP_RAID6)
+ BTRFS_BLOCK_GROUP_PARX)
static int btrfs_rebuild_chunk_stripes(struct recover_control *rc,
struct chunk_record *chunk)
diff --git a/cmds-filesystem.c b/cmds-filesystem.c
index 1c1926b..861cbb3 100644
--- a/cmds-filesystem.c
+++ b/cmds-filesystem.c
@@ -142,10 +142,18 @@ static char *group_profile_str(u64 flag)
return "RAID0";
case BTRFS_BLOCK_GROUP_RAID1:
return "RAID1";
- case BTRFS_BLOCK_GROUP_RAID5:
+ case BTRFS_BLOCK_GROUP_PAR1:
return "RAID5";
- case BTRFS_BLOCK_GROUP_RAID6:
+ case BTRFS_BLOCK_GROUP_PAR2:
return "RAID6";
+ case BTRFS_BLOCK_GROUP_PAR3:
+ return "PAR3";
+ case BTRFS_BLOCK_GROUP_PAR4:
+ return "PAR4";
+ case BTRFS_BLOCK_GROUP_PAR5:
+ return "PAR5";
+ case BTRFS_BLOCK_GROUP_PAR6:
+ return "PAR6";
case BTRFS_BLOCK_GROUP_DUP:
return "DUP";
case BTRFS_BLOCK_GROUP_RAID10:
diff --git a/ctree.h b/ctree.h
index 2117374..4d2d1b6 100644
--- a/ctree.h
+++ b/ctree.h
@@ -470,6 +470,7 @@ struct btrfs_super_block {
#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7)
#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
+#define BTRFS_FEATURE_INCOMPAT_PAR3456 (1ULL << 10)
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
@@ -482,7 +483,8 @@ struct btrfs_super_block {
BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
BTRFS_FEATURE_INCOMPAT_RAID56 | \
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
- BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
+ BTRFS_FEATURE_INCOMPAT_PAR3456)
/*
* A leaf is full of items. offset and size tell us where to find
@@ -830,8 +832,39 @@ struct btrfs_csum_item {
#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
-#define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7)
-#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8)
+#define BTRFS_BLOCK_GROUP_PAR1 (1ULL << 7)
+#define BTRFS_BLOCK_GROUP_PAR2 (1ULL << 8)
+#define BTRFS_BLOCK_GROUP_PAR3 (1ULL << 9)
+#define BTRFS_BLOCK_GROUP_PAR4 (1ULL << 10)
+#define BTRFS_BLOCK_GROUP_PAR5 (1ULL << 11)
+#define BTRFS_BLOCK_GROUP_PAR6 (1ULL << 12)
+
+/* tags for all the parity groups */
+#define BTRFS_BLOCK_GROUP_PARX (BTRFS_BLOCK_GROUP_PAR1 | \
+ BTRFS_BLOCK_GROUP_PAR2 | \
+ BTRFS_BLOCK_GROUP_PAR3 | \
+ BTRFS_BLOCK_GROUP_PAR4 | \
+ BTRFS_BLOCK_GROUP_PAR5 | \
+ BTRFS_BLOCK_GROUP_PAR6)
+
+/* gets the parity number from the parity group */
+static inline int btrfs_flags_par(unsigned group)
+{
+ switch (group & BTRFS_BLOCK_GROUP_PARX) {
+ case BTRFS_BLOCK_GROUP_PAR1: return 1;
+ case BTRFS_BLOCK_GROUP_PAR2: return 2;
+ case BTRFS_BLOCK_GROUP_PAR3: return 3;
+ case BTRFS_BLOCK_GROUP_PAR4: return 4;
+ case BTRFS_BLOCK_GROUP_PAR5: return 5;
+ case BTRFS_BLOCK_GROUP_PAR6: return 6;
+ }
+
+ /* ensures that no multiple groups are defined */
+ BUG_ON(group & BTRFS_BLOCK_GROUP_PARX);
+
+ return 0;
+}
+
#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE
#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
@@ -840,8 +873,7 @@ struct btrfs_csum_item {
#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
BTRFS_BLOCK_GROUP_RAID1 | \
- BTRFS_BLOCK_GROUP_RAID5 | \
- BTRFS_BLOCK_GROUP_RAID6 | \
+ BTRFS_BLOCK_GROUP_PARX | \
BTRFS_BLOCK_GROUP_DUP | \
BTRFS_BLOCK_GROUP_RAID10)
diff --git a/disk-io.h b/disk-io.h
index ca6af2d..27e3dc4 100644
--- a/disk-io.h
+++ b/disk-io.h
@@ -110,5 +110,3 @@ int write_and_map_eb(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *eb);
#endif
-/* raid6.c */
-void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs);
diff --git a/extent-tree.c b/extent-tree.c
index 7860d1d..98a8cb4 100644
--- a/extent-tree.c
+++ b/extent-tree.c
@@ -1862,8 +1862,7 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6 |
+ BTRFS_BLOCK_GROUP_PARX |
BTRFS_BLOCK_GROUP_DUP);
if (extra_flags) {
if (flags & BTRFS_BLOCK_GROUP_DATA)
diff --git a/ioctl.h b/ioctl.h
index a589cd7..f798d22 100644
--- a/ioctl.h
+++ b/ioctl.h
@@ -466,7 +466,11 @@ enum btrfs_err_code {
BTRFS_ERROR_DEV_TGT_REPLACE,
BTRFS_ERROR_DEV_MISSING_NOT_FOUND,
BTRFS_ERROR_DEV_ONLY_WRITABLE,
- BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS
+ BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS,
+ BTRFS_ERROR_DEV_PAR3_MIN_NOT_MET,
+ BTRFS_ERROR_DEV_PAR4_MIN_NOT_MET,
+ BTRFS_ERROR_DEV_PAR5_MIN_NOT_MET,
+ BTRFS_ERROR_DEV_PAR6_MIN_NOT_MET
};
/* An error code to error string mapping for the kernel
@@ -480,9 +484,9 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
case BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET:
return "unable to go below four devices on raid10";
case BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET:
- return "unable to go below three devices on raid5";
+ return "unable to go below two devices on raid5/par1";
case BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET:
- return "unable to go below four devices on raid6";
+ return "unable to go below three devices on raid6/par2";
case BTRFS_ERROR_DEV_TGT_REPLACE:
return "unable to remove the dev_replace target dev";
case BTRFS_ERROR_DEV_MISSING_NOT_FOUND:
@@ -492,6 +496,14 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
case BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS:
return "add/delete/balance/replace/resize operation "
"in progress";
+ case BTRFS_ERROR_DEV_PAR3_MIN_NOT_MET:
+ return "unable to go below four devices on par3";
+ case BTRFS_ERROR_DEV_PAR4_MIN_NOT_MET:
+ return "unable to go below five devices on par4";
+ case BTRFS_ERROR_DEV_PAR5_MIN_NOT_MET:
+ return "unable to go below six devices on par5";
+ case BTRFS_ERROR_DEV_PAR6_MIN_NOT_MET:
+ return "unable to go below seven devices on par5";
default:
return NULL;
}
diff --git a/man/mkfs.btrfs.8.in b/man/mkfs.btrfs.8.in
index b54e935..e3f4ec7 100644
--- a/man/mkfs.btrfs.8.in
+++ b/man/mkfs.btrfs.8.in
@@ -38,7 +38,9 @@ mkfs.btrfs uses all the available storage for the filesystem.
.TP
\fB\-d\fR, \fB\-\-data \fItype\fR
Specify how the data must be spanned across the devices specified. Valid
-values are raid0, raid1, raid5, raid6, raid10 or single.
+values are raid0, raid1, raid5, raid6, raid10, par1, par2, par3, par4, par5,
+par6 or single. The parX values enable RAID for up to six parity levels.
+Note that raid5 and raid6 are synonymous of par1 and par2.
.TP
\fB\-f\fR, \fB\-\-force\fR
Force overwrite when an existing filesystem is detected on the device.
diff --git a/mkfs.c b/mkfs.c
index 33369f9..661e59f 100644
--- a/mkfs.c
+++ b/mkfs.c
@@ -276,7 +276,7 @@ static void print_usage(void)
fprintf(stderr, "options:\n");
fprintf(stderr, "\t -A --alloc-start the offset to start the FS\n");
fprintf(stderr, "\t -b --byte-count total number of bytes in the FS\n");
- fprintf(stderr, "\t -d --data data profile, raid0, raid1, raid5, raid6, raid10, dup or single\n");
+ fprintf(stderr, "\t -d --data data profile, raid0, raid1, raid5, raid6, par[1,2,3,4,5,6], raid10, dup or single\n");
fprintf(stderr, "\t -f --force force overwrite of existing filesystem\n");
fprintf(stderr, "\t -l --leafsize size of btree leaves\n");
fprintf(stderr, "\t -L --label set a label\n");
@@ -306,9 +306,21 @@ static u64 parse_profile(char *s)
} else if (strcmp(s, "raid1") == 0) {
return BTRFS_BLOCK_GROUP_RAID1;
} else if (strcmp(s, "raid5") == 0) {
- return BTRFS_BLOCK_GROUP_RAID5;
+ return BTRFS_BLOCK_GROUP_PAR1;
} else if (strcmp(s, "raid6") == 0) {
- return BTRFS_BLOCK_GROUP_RAID6;
+ return BTRFS_BLOCK_GROUP_PAR2;
+ } else if (strcmp(s, "par1") == 0) {
+ return BTRFS_BLOCK_GROUP_PAR1;
+ } else if (strcmp(s, "par2") == 0) {
+ return BTRFS_BLOCK_GROUP_PAR2;
+ } else if (strcmp(s, "par3") == 0) {
+ return BTRFS_BLOCK_GROUP_PAR3;
+ } else if (strcmp(s, "par4") == 0) {
+ return BTRFS_BLOCK_GROUP_PAR4;
+ } else if (strcmp(s, "par5") == 0) {
+ return BTRFS_BLOCK_GROUP_PAR5;
+ } else if (strcmp(s, "par6") == 0) {
+ return BTRFS_BLOCK_GROUP_PAR6;
} else if (strcmp(s, "raid10") == 0) {
return BTRFS_BLOCK_GROUP_RAID10;
} else if (strcmp(s, "dup") == 0) {
@@ -1147,6 +1159,8 @@ static const struct btrfs_fs_feature {
"raid56 extended format" },
{ "skinny-metadata", BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA,
"reduced-size metadata extent refs" },
+ { "par3456", BTRFS_FEATURE_INCOMPAT_PAR3456,
+ "raid support with up to six parities" },
/* Keep this one last */
{ "list-all", BTRFS_FEATURE_LIST_ALL, NULL }
};
@@ -1491,10 +1505,16 @@ int main(int ac, char **av)
features |= BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS;
if ((data_profile | metadata_profile) &
- (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) {
+ (BTRFS_BLOCK_GROUP_PAR1 | BTRFS_BLOCK_GROUP_PAR2)) {
features |= BTRFS_FEATURE_INCOMPAT_RAID56;
}
+ if ((data_profile | metadata_profile) &
+ (BTRFS_BLOCK_GROUP_PAR3 | BTRFS_BLOCK_GROUP_PAR4
+ | BTRFS_BLOCK_GROUP_PAR5 | BTRFS_BLOCK_GROUP_PAR6)) {
+ features |= BTRFS_FEATURE_INCOMPAT_PAR3456;
+ }
+
process_fs_features(features);
ret = make_btrfs(fd, file, label, blocks, dev_block_count,
diff --git a/mktables.c b/mktables.c
new file mode 100644
index 0000000..21c0222
--- /dev/null
+++ b/mktables.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+
+/**
+ * Multiplication a*b in GF(2^8).
+ */
+static uint8_t gfmul(uint8_t a, uint8_t b)
+{
+ uint8_t v;
+
+ v = 0;
+ while (b) {
+ if ((b & 1) != 0)
+ v ^= a;
+
+ if ((a & 0x80) != 0) {
+ a <<= 1;
+ a ^= 0x1d;
+ } else {
+ a <<= 1;
+ }
+
+ b >>= 1;
+ }
+
+ return v;
+}
+
+/**
+ * Inversion (1/a) in GF(2^8).
+ */
+uint8_t gfinv[256];
+
+/**
+ * Number of parities.
+ * This is the number of rows of the generator matrix.
+ */
+#define PARITY 6
+
+/**
+ * Number of disks.
+ * This is the number of columns of the generator matrix.
+ */
+#define DISK (257-PARITY)
+
+/**
+ * Setup the Cauchy matrix used to generate the parity.
+ */
+static void set_cauchy(uint8_t *matrix)
+{
+ int i, j;
+ uint8_t inv_x, y;
+
+ /*
+ * The first row of the generator matrix is formed by all 1.
+ *
+ * The generator matrix is an Extended Cauchy matrix built from
+ * a Cauchy matrix adding at the top a row of all 1.
+ *
+ * Extending a Cauchy matrix in this way maintains the MDS property
+ * of the matrix.
+ *
+ * For example, considering a generator matrix of 4x6 we have now:
+ *
+ * 1 1 1 1 1 1
+ * - - - - - -
+ * - - - - - -
+ * - - - - - -
+ */
+ for (i = 0; i < DISK; ++i)
+ matrix[0*DISK+i] = 1;
+
+ /*
+ * Second row is formed with powers 2^i, and it's the first
+ * row of the Cauchy matrix.
+ *
+ * Each element of the Cauchy matrix is in the form 1/(x_i + y_j)
+ * where all x_i and y_j must be different for any i and j.
+ *
+ * For the first row with j=0, we choose x_i = 2^-i and y_0 = 0
+ * and we obtain a first row formed as:
+ *
+ * 1/(x_i + y_0) = 1/(2^-i + 0) = 2^i
+ *
+ * with 2^-i != 0 for any i
+ *
+ * In the example we get:
+ *
+ * x_0 = 1
+ * x_1 = 142
+ * x_2 = 71
+ * x_3 = 173
+ * x_4 = 216
+ * x_5 = 108
+ * y_0 = 0
+ *
+ * with the matrix:
+ *
+ * 1 1 1 1 1 1
+ * 1 2 4 8 16 32
+ * - - - - - -
+ * - - - - - -
+ */
+ inv_x = 1;
+ for (i = 0; i < DISK; ++i) {
+ matrix[1*DISK+i] = inv_x;
+ inv_x = gfmul(2, inv_x);
+ }
+
+ /*
+ * The rest of the Cauchy matrix is formed choosing for each row j
+ * a new y_j = 2^j and reusing the x_i already assigned in the first
+ * row obtaining :
+ *
+ * 1/(x_i + y_j) = 1/(2^-i + 2^j)
+ *
+ * with 2^-i + 2^j != 0 for any i,j with i>=0,j>=1,i+j<255
+ *
+ * In the example we get:
+ *
+ * y_1 = 2
+ * y_2 = 4
+ *
+ * with the matrix:
+ *
+ * 1 1 1 1 1 1
+ * 1 2 4 8 16 32
+ * 244 83 78 183 118 47
+ * 167 39 213 59 153 82
+ */
+ y = 2;
+ for (j = 0; j < PARITY-2; ++j) {
+ inv_x = 1;
+ for (i = 0; i < DISK; ++i) {
+ uint8_t x = gfinv[inv_x];
+ matrix[(j+2)*DISK+i] = gfinv[y ^ x];
+ inv_x = gfmul(2, inv_x);
+ }
+
+ y = gfmul(2, y);
+ }
+
+ /*
+ * Finally we adjust the matrix multipling each row for
+ * the inverse of the first element in the row.
+ *
+ * Also this operation maintains the MDS property of the matrix.
+ *
+ * Resulting in:
+ *
+ * 1 1 1 1 1 1
+ * 1 2 4 8 16 32
+ * 1 245 210 196 154 113
+ * 1 187 166 215 7 106
+ */
+ for (j = 0; j < PARITY-2; ++j) {
+ uint8_t f = gfinv[matrix[(j+2)*DISK]];
+
+ for (i = 0; i < DISK; ++i)
+ matrix[(j+2)*DISK+i] = gfmul(matrix[(j+2)*DISK+i], f);
+ }
+}
+
+int main(void)
+{
+ uint8_t v;
+ int i, j, p;
+ uint8_t matrix[PARITY * 256];
+
+ printf("/*\n");
+ printf(" * Copyright (C) 2013 Andrea Mazzoleni\n");
+ printf(" *\n");
+ printf(" * This program is free software: you can redistribute it and/or modify\n");
+ printf(" * it under the terms of the GNU General Public License as published by\n");
+ printf(" * the Free Software Foundation, either version 2 of the License, or\n");
+ printf(" * (at your option) any later version.\n");
+ printf(" *\n");
+ printf(" * This program is distributed in the hope that it will be useful,\n");
+ printf(" * but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
+ printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n");
+ printf(" * GNU General Public License for more details.\n");
+ printf(" */\n");
+ printf("\n");
+
+ printf("#include \"kerncompat.h\"\n");
+ printf("\n");
+
+ /* a*b */
+ printf("const u8 raid_gfmul[256][256] =\n");
+ printf("{\n");
+ for (i = 0; i < 256; ++i) {
+ printf("\t{\n");
+ for (j = 0; j < 256; ++j) {
+ if (j % 8 == 0)
+ printf("\t\t");
+ v = gfmul(i, j);
+ if (v == 1)
+ gfinv[i] = j;
+ printf("0x%02x,", (unsigned)v);
+ if (j % 8 == 7)
+ printf("\n");
+ else
+ printf(" ");
+ }
+ printf("\t},\n");
+ }
+ printf("};\n\n");
+
+ /* cauchy matrix */
+ set_cauchy(matrix);
+
+ printf("/**\n");
+ printf(" * Cauchy matrix used to generate parity.\n");
+ printf(" * This matrix is valid for up to %u parity with %u data disks.\n", PARITY, DISK);
+ printf(" *\n");
+ for (p = 0; p < PARITY; ++p) {
+ printf(" * ");
+ for (i = 0; i < DISK; ++i)
+ printf("%02x ", matrix[p*DISK+i]);
+ printf("\n");
+ }
+ printf(" */\n");
+ printf("const u8 raid_gfcauchy[%u][256] =\n", PARITY);
+ printf("{\n");
+ for (p = 0; p < PARITY; ++p) {
+ printf("\t{\n");
+ for (i = 0; i < DISK; ++i) {
+ if (i % 8 == 0)
+ printf("\t\t");
+ printf("0x%02x,", matrix[p*DISK+i]);
+ if (i % 8 == 7)
+ printf("\n");
+ else
+ printf(" ");
+ }
+ printf("\n\t},\n");
+ }
+ printf("};\n\n");
+
+ return 0;
+}
+
diff --git a/raid.c b/raid.c
new file mode 100644
index 0000000..2aa275e
--- /dev/null
+++ b/raid.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "raid.h"
+
+/* tables defined in tables.c */
+const u8 raid_gfmul[256][256];
+const u8 raid_gfcauchy[6][256];
+
+void raid_gen(int nd, int np, size_t size, void **vv)
+{
+ u8 **v = (u8 **)vv;
+ size_t i;
+
+ for (i = 0; i < size; ++i) {
+ u8 p[RAID_PARITY_MAX];
+ int j, d;
+
+ for (j = 0; j < np; ++j)
+ p[j] = 0;
+
+ for (d = 0; d < nd; ++d) {
+ u8 b = v[d][i];
+
+ for (j = 0; j < np; ++j)
+ p[j] ^= raid_gfmul[b][raid_gfcauchy[j][d]];
+ }
+
+ for (j = 0; j < np; ++j)
+ v[nd + j][i] = p[j];
+ }
+}
+
diff --git a/raid.h b/raid.h
new file mode 100644
index 0000000..83f8b25
--- /dev/null
+++ b/raid.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_H
+#define __RAID_H
+
+#include "kerncompat.h"
+
+/*
+ * Max number of parities supported.
+ */
+#define RAID_PARITY_MAX 6
+
+/*
+ * Generate the RAID Cauchy parity.
+ *
+ * Note that this is the slow reference implementation.
+ * For a faster one and documentation see lib/raid/raid.c in the Linux Kernel.
+ */
+void raid_gen(int nd, int np, size_t size, void **vv);
+
+#endif
+
diff --git a/raid6.c b/raid6.c
deleted file mode 100644
index a6ee483..0000000
--- a/raid6.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/* -*- linux-c -*- ------------------------------------------------------- *
- *
- * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- * Boston MA 02111-1307, USA; either version 2 of the License, or
- * (at your option) any later version; incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
-
-/*
- * raid6int1.c
- *
- * 1-way unrolled portable integer math RAID-6 instruction set
- *
- * This file was postprocessed using unroll.pl and then ported to userspace
- */
-#include <stdint.h>
-#include <unistd.h>
-#include "kerncompat.h"
-#include "ctree.h"
-#include "disk-io.h"
-
-/*
- * This is the C data type to use
- */
-
-/* Change this from BITS_PER_LONG if there is something better... */
-#if BITS_PER_LONG == 64
-# define NBYTES(x) ((x) * 0x0101010101010101UL)
-# define NSIZE 8
-# define NSHIFT 3
-typedef uint64_t unative_t;
-#else
-# define NBYTES(x) ((x) * 0x01010101U)
-# define NSIZE 4
-# define NSHIFT 2
-typedef uint32_t unative_t;
-#endif
-
-/*
- * These sub-operations are separate inlines since they can sometimes be
- * specially optimized using architecture-specific hacks.
- */
-
-/*
- * The SHLBYTE() operation shifts each byte left by 1, *not*
- * rolling over into the next byte
- */
-static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
-{
- unative_t vv;
-
- vv = (v << 1) & NBYTES(0xfe);
- return vv;
-}
-
-/*
- * The MASK() operation returns 0xFF in any byte for which the high
- * bit is 1, 0x00 for any byte for which the high bit is 0.
- */
-static inline __attribute_const__ unative_t MASK(unative_t v)
-{
- unative_t vv;
-
- vv = v & NBYTES(0x80);
- vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */
- return vv;
-}
-
-
-void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs)
-{
- uint8_t **dptr = (uint8_t **)ptrs;
- uint8_t *p, *q;
- int d, z, z0;
-
- unative_t wd0, wq0, wp0, w10, w20;
-
- z0 = disks - 3; /* Highest data disk */
- p = dptr[z0+1]; /* XOR parity */
- q = dptr[z0+2]; /* RS syndrome */
-
- for ( d = 0 ; d < bytes ; d += NSIZE*1 ) {
- wq0 = wp0 = *(unative_t *)&dptr[z0][d+0*NSIZE];
- for ( z = z0-1 ; z >= 0 ; z-- ) {
- wd0 = *(unative_t *)&dptr[z][d+0*NSIZE];
- wp0 ^= wd0;
- w20 = MASK(wq0);
- w10 = SHLBYTE(wq0);
- w20 &= NBYTES(0x1d);
- w10 ^= w20;
- wq0 = w10 ^ wd0;
- }
- *(unative_t *)&p[d+NSIZE*0] = wp0;
- *(unative_t *)&q[d+NSIZE*0] = wq0;
- }
-}
-
diff --git a/utils.c b/utils.c
index f499023..52b090b 100644
--- a/utils.c
+++ b/utils.c
@@ -1856,13 +1856,19 @@ int test_num_disk_vs_raid(u64 metadata_profile, u64 data_profile,
switch (dev_cnt) {
default:
+ case 7:
+ allowed |= BTRFS_BLOCK_GROUP_PAR6;
+ case 6:
+ allowed |= BTRFS_BLOCK_GROUP_PAR5;
+ case 5:
+ allowed |= BTRFS_BLOCK_GROUP_PAR4;
case 4:
- allowed |= BTRFS_BLOCK_GROUP_RAID10;
+ allowed |= BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_PAR3;
case 3:
- allowed |= BTRFS_BLOCK_GROUP_RAID6;
+ allowed |= BTRFS_BLOCK_GROUP_PAR2;
case 2:
allowed |= BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID5;
+ BTRFS_BLOCK_GROUP_PAR1;
break;
case 1:
allowed |= BTRFS_BLOCK_GROUP_DUP;
diff --git a/volumes.c b/volumes.c
index c38da6c..b1fb7de 100644
--- a/volumes.c
+++ b/volumes.c
@@ -30,6 +30,7 @@
#include "print-tree.h"
#include "volumes.h"
#include "math.h"
+#include "raid.h"
struct stripe {
struct btrfs_device *dev;
@@ -38,12 +39,7 @@ struct stripe {
static inline int nr_parity_stripes(struct map_lookup *map)
{
- if (map->type & BTRFS_BLOCK_GROUP_RAID5)
- return 1;
- else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
- return 2;
- else
- return 0;
+ return btrfs_flags_par(map->type);
}
static inline int nr_data_stripes(struct map_lookup *map)
@@ -51,8 +47,6 @@ static inline int nr_data_stripes(struct map_lookup *map)
return map->num_stripes - nr_parity_stripes(map);
}
-#define is_parity_stripe(x) ( ((x) == BTRFS_RAID5_P_STRIPE) || ((x) == BTRFS_RAID6_Q_STRIPE) )
-
static LIST_HEAD(fs_uuids);
static struct btrfs_device *__find_device(struct list_head *head, u64 devid,
@@ -643,10 +637,8 @@ static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes,
return calc_size;
else if (type & BTRFS_BLOCK_GROUP_RAID10)
return calc_size * (num_stripes / sub_stripes);
- else if (type & BTRFS_BLOCK_GROUP_RAID5)
- return calc_size * (num_stripes - 1);
- else if (type & BTRFS_BLOCK_GROUP_RAID6)
- return calc_size * (num_stripes - 2);
+ else if (type & BTRFS_BLOCK_GROUP_PARX)
+ return calc_size * (num_stripes - btrfs_flags_par(type));
else
return calc_size * num_stripes;
}
@@ -782,7 +774,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
}
if (type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
+ BTRFS_BLOCK_GROUP_PARX |
BTRFS_BLOCK_GROUP_RAID10 |
BTRFS_BLOCK_GROUP_DUP)) {
if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
@@ -822,20 +814,13 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
sub_stripes = 2;
min_stripes = 4;
}
- if (type & (BTRFS_BLOCK_GROUP_RAID5)) {
- num_stripes = btrfs_super_num_devices(info->super_copy);
- if (num_stripes < 2)
- return -ENOSPC;
- min_stripes = 2;
- stripe_len = find_raid56_stripe_len(num_stripes - 1,
- btrfs_super_stripesize(info->super_copy));
- }
- if (type & (BTRFS_BLOCK_GROUP_RAID6)) {
+ if (type & BTRFS_BLOCK_GROUP_PARX) {
+ min_stripes = 1 + btrfs_flags_par(type);
num_stripes = btrfs_super_num_devices(info->super_copy);
- if (num_stripes < 3)
+ if (num_stripes < min_stripes)
return -ENOSPC;
- min_stripes = 3;
- stripe_len = find_raid56_stripe_len(num_stripes - 2,
+
+ stripe_len = find_raid56_stripe_len(num_stripes - btrfs_flags_par(type),
btrfs_super_stripesize(info->super_copy));
}
@@ -1107,10 +1092,8 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
ret = map->num_stripes;
else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
ret = map->sub_stripes;
- else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
- ret = 2;
- else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
- ret = 3;
+ else if (map->type & BTRFS_BLOCK_GROUP_PARX)
+ ret = 1 + btrfs_flags_par(map->type);
else
ret = 1;
return ret;
@@ -1163,8 +1146,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
length = ce->size / (map->num_stripes / map->sub_stripes);
else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
length = ce->size / map->num_stripes;
- else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6)) {
+ else if (map->type & BTRFS_BLOCK_GROUP_PARX) {
length = ce->size / nr_data_stripes(map);
rmap_len = map->stripe_len * nr_data_stripes(map);
}
@@ -1294,9 +1276,9 @@ again:
stripes_required = map->sub_stripes;
}
}
- if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)
+ if ((map->type & BTRFS_BLOCK_GROUP_PARX)
&& multi_ret && ((rw & WRITE) || mirror_num > 1) && raid_map_ret) {
- /* RAID[56] write or recovery. Return all stripes */
+ /* PAR write or recovery. Return all stripes */
stripes_required = map->num_stripes;
/* Only allocate the map if we've already got a large enough multi_ret */
@@ -1330,7 +1312,7 @@ again:
stripe_offset = offset - stripe_offset;
if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
+ BTRFS_BLOCK_GROUP_PARX |
BTRFS_BLOCK_GROUP_RAID10 |
BTRFS_BLOCK_GROUP_DUP)) {
/* we limit the length of each bio to what fits in a stripe */
@@ -1369,14 +1351,14 @@ again:
multi->num_stripes = map->num_stripes;
else if (mirror_num)
stripe_index = mirror_num - 1;
- } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6)) {
+ } else if (map->type & BTRFS_BLOCK_GROUP_PARX) {
if (raid_map) {
int rot;
u64 tmp;
u64 raid56_full_stripe_start;
u64 full_stripe_len = nr_data_stripes(map) * map->stripe_len;
+ int j;
/*
* align the start of our data stripe in the logical
@@ -1399,9 +1381,8 @@ again:
raid_map[(i+rot) % map->num_stripes] =
ce->start + (tmp + i) * map->stripe_len;
- raid_map[(i+rot) % map->num_stripes] = BTRFS_RAID5_P_STRIPE;
- if (map->type & BTRFS_BLOCK_GROUP_RAID6)
- raid_map[(i+rot+1) % map->num_stripes] = BTRFS_RAID6_Q_STRIPE;
+ for (j = 0; j < btrfs_flags_par(map->type); j++)
+ raid_map[(i+rot+j) % map->num_stripes] = BTRFS_RAID_PAR1_STRIPE + j;
*length = map->stripe_len;
stripe_index = 0;
@@ -1413,8 +1394,9 @@ again:
/*
* Mirror #0 or #1 means the original data block.
- * Mirror #2 is RAID5 parity block.
- * Mirror #3 is RAID6 Q block.
+ * Mirror #2 is RAID5/PAR1 P block.
+ * Mirror #3 is RAID6/PAR2 Q block.
+ * .. and so on up to PAR6
*/
if (mirror_num > 1)
stripe_index = nr_data_stripes(map) + mirror_num - 2;
@@ -1838,7 +1820,7 @@ static void split_eb_for_raid56(struct btrfs_fs_info *info,
int ret;
for (i = 0; i < num_stripes; i++) {
- if (raid_map[i] >= BTRFS_RAID5_P_STRIPE)
+ if (raid_map[i] >= BTRFS_RAID_PAR1_STRIPE)
break;
eb = malloc(sizeof(struct extent_buffer) + stripe_len);
@@ -1871,11 +1853,13 @@ int write_raid56_with_parity(struct btrfs_fs_info *info,
struct btrfs_multi_bio *multi,
u64 stripe_len, u64 *raid_map)
{
- struct extent_buffer **ebs, *p_eb = NULL, *q_eb = NULL;
+ struct extent_buffer **ebs;
+ struct extent_buffer *p_eb[RAID_PARITY_MAX];
int i;
int j;
int ret;
int alloc_size = eb->len;
+ int np;
ebs = kmalloc(sizeof(*ebs) * multi->num_stripes, GFP_NOFS);
BUG_ON(!ebs);
@@ -1883,12 +1867,16 @@ int write_raid56_with_parity(struct btrfs_fs_info *info,
if (stripe_len > alloc_size)
alloc_size = stripe_len;
+ np = 0;
+ for (i = 0; i < RAID_PARITY_MAX; i++)
+ p_eb[i] = NULL;
+
split_eb_for_raid56(info, eb, ebs, stripe_len, raid_map,
multi->num_stripes);
for (i = 0; i < multi->num_stripes; i++) {
struct extent_buffer *new_eb;
- if (raid_map[i] < BTRFS_RAID5_P_STRIPE) {
+ if (raid_map[i] < BTRFS_RAID_PAR1_STRIPE) {
ebs[i]->dev_bytenr = multi->stripes[i].physical;
ebs[i]->fd = multi->stripes[i].dev->fd;
multi->stripes[i].dev->total_ios++;
@@ -1902,35 +1890,33 @@ int write_raid56_with_parity(struct btrfs_fs_info *info,
multi->stripes[i].dev->total_ios++;
new_eb->len = stripe_len;
- if (raid_map[i] == BTRFS_RAID5_P_STRIPE)
- p_eb = new_eb;
- else if (raid_map[i] == BTRFS_RAID6_Q_STRIPE)
- q_eb = new_eb;
+ /* parity index */
+ j = raid_map[i] - BTRFS_RAID_PAR1_STRIPE;
+
+ BUG_ON(j < 0 || j >= RAID_PARITY_MAX);
+
+ p_eb[j] = new_eb;
+
+ /* keep track of the number of parities used */
+ if (j + 1 > np)
+ np = j + 1;
}
- if (q_eb) {
+
+ if (np != 0) {
void **pointers;
- pointers = kmalloc(sizeof(*pointers) * multi->num_stripes,
- GFP_NOFS);
+ pointers = kmalloc(sizeof(*pointers) * multi->num_stripes, GFP_NOFS);
BUG_ON(!pointers);
- ebs[multi->num_stripes - 2] = p_eb;
- ebs[multi->num_stripes - 1] = q_eb;
+ for (i = 0; i < np; i++)
+ ebs[multi->num_stripes - np + i] = p_eb[i];
for (i = 0; i < multi->num_stripes; i++)
pointers[i] = ebs[i]->data;
- raid6_gen_syndrome(multi->num_stripes, stripe_len, pointers);
+ raid_gen(multi->num_stripes - np, np, stripe_len, pointers);
+
kfree(pointers);
- } else {
- ebs[multi->num_stripes - 1] = p_eb;
- memcpy(p_eb->data, ebs[0]->data, stripe_len);
- for (j = 1; j < multi->num_stripes - 1; j++) {
- for (i = 0; i < stripe_len; i += sizeof(unsigned long)) {
- *(unsigned long *)(p_eb->data + i) ^=
- *(unsigned long *)(ebs[j]->data + i);
- }
- }
}
for (i = 0; i < multi->num_stripes; i++) {
diff --git a/volumes.h b/volumes.h
index 2802cb0..0a73084 100644
--- a/volumes.h
+++ b/volumes.h
@@ -137,9 +137,15 @@ struct map_lookup {
#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8)
#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9)
-#define BTRFS_RAID5_P_STRIPE ((u64)-2)
-#define BTRFS_RAID6_Q_STRIPE ((u64)-1)
-
+/*
+ * Parity stripe indexes.
+ */
+#define BTRFS_RAID_PAR1_STRIPE ((u64)-6)
+#define BTRFS_RAID_PAR2_STRIPE ((u64)-5)
+#define BTRFS_RAID_PAR3_STRIPE ((u64)-4)
+#define BTRFS_RAID_PAR4_STRIPE ((u64)-3)
+#define BTRFS_RAID_PAR5_STRIPE ((u64)-2)
+#define BTRFS_RAID_PAR6_STRIPE ((u64)-1)
int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
u64 logical, u64 *length, u64 *type,
--
1.7.12.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists