[<prev] [next>] [day] [month] [year] [list]
Message-ID: <4FB2E192.3080004@gmail.com>
Date: Wed, 16 May 2012 01:06:58 +0200
From: Vladimir 'φ-coder/phcoder' Serbinenko
<phcoder@...il.com>
To: linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org,
Christoph Hellwig <hch@...era.com>,
Anton Salikhmetov <alexo@...era.com>
Subject: [PATCH 4/8] Support non-BMP characters on HFS+.
This one is little bit tricky since HFS+ transforms UTF-16 but since it was designed without any attention to non-BMP characters, they are not decomposed or case-folded.
Signed-off-by: Vladimir Serbinenko <phcoder@...il.com>
---
fs/hfsplus/unicode.c | 76 ++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 62 insertions(+), 14 deletions(-)
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index 5b2c8de..161a23b 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -97,6 +97,11 @@ int hfsplus_strcmp(const struct hfsplus_unistr *s1,
#define Hangul_TCount 28
#define Hangul_NCount (Hangul_VCount * Hangul_TCount)
+#define SURROGATE_MASK 0xfffff800
+#define SURROGATE_PAIR 0x0000d800
+#define SURROGATE_LOW 0x00000400
+#define SURROGATE_BITS 0x000003ff
+
static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
{
@@ -189,6 +194,9 @@ int hfsplus_uni2asc(struct super_block *sb,
c0 = ':';
break;
}
+
+ if ((c0 & SURROGATE_MASK) == SURROGATE_PAIR)
+ goto same;
res = nls->uni2char(c0, op, len);
if (res < 0) {
if (res == -ENAMETOOLONG)
@@ -232,7 +240,19 @@ same:
cc = c0;
}
done:
- res = nls->uni2char(cc, op, len);
+ if ((cc & SURROGATE_MASK) == SURROGATE_PAIR
+ && !(cc & SURROGATE_LOW)
+ && ustrlen
+ && (be16_to_cpu(*ip) & SURROGATE_MASK) == SURROGATE_PAIR
+ && (be16_to_cpu(*ip) & SURROGATE_LOW)) {
+ unicode_t complete;
+ complete = (c0 & SURROGATE_BITS) << 10;
+ complete |= (be16_to_cpu(*ip++) & SURROGATE_BITS);
+ complete += 0x10000;
+ ustrlen--;
+ res = nls->uni2char(complete, op, len);
+ } else
+ res = nls->uni2char(cc, op, len);
if (res < 0) {
if (res == -ENAMETOOLONG)
goto out;
@@ -256,7 +276,7 @@ static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
unicode_t *uc)
{
int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
- if (size <= 0 || *uc > 0xffff) {
+ if (size <= 0) {
*uc = '?';
size = 1;
}
@@ -272,10 +292,13 @@ static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
}
/* Decomposes a single unicode character. */
-static inline u16 *decompose_unichar(wchar_t uc, int *size)
+static inline u16 *decompose_unichar(unicode_t uc, int *size)
{
int off;
+ if (uc >= 0x10000)
+ return NULL;
+
off = hfsplus_decompose_table[(uc >> 12) & 0xf];
if (off == 0 || off == 0xffff)
return NULL;
@@ -316,8 +339,16 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
do {
ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
} while (--dsize > 0);
- } else
- ustr->unicode[outlen++] = cpu_to_be16(c);
+ } else {
+ int s;
+ s = unicode_to_utf16s(c, UTF16_BIG_ENDIAN,
+ ustr->unicode + outlen,
+ HFSPLUS_MAX_STRLEN - outlen);
+ if (s <= 0)
+ break;
+
+ outlen += s;
+ }
astr += size;
len -= size;
@@ -342,7 +373,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
int casefold, decompose, size, len;
unsigned long hash;
unicode_t c;
- u16 c2;
+ unicode_t c2;
casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
@@ -369,9 +400,17 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
} while (--dsize > 0);
} else {
c2 = c;
- if (casefold)
+ if (casefold && c2 < 0x10000)
c2 = case_fold(c2);
- if (!casefold || c2)
+ if (c2 >= 0x10000) {
+ int i, s;
+ u16 tmp[2];
+ s = unicode_to_utf16s(c2,
+ UTF16_HOST_ENDIAN,
+ tmp, 2);
+ for (i = 0; i < s; i++)
+ hash = partial_name_hash(tmp[i], hash);
+ } else if (!casefold || c2)
hash = partial_name_hash(c2, hash);
}
}
@@ -395,6 +434,7 @@ int hfsplus_compare_dentry(const struct dentry *parent,
int dsize1, dsize2, len1, len2;
const u16 *dstr1, *dstr2;
const char *astr1, *astr2;
+ u16 buf1[2], buf2[2];
u16 c1, c2;
unicode_t c;
@@ -416,9 +456,13 @@ int hfsplus_compare_dentry(const struct dentry *parent,
if (decompose)
dstr1 = decompose_unichar(c, &dsize1);
if (!decompose || !dstr1) {
- c1 = c;
- dstr1 = &c1;
- dsize1 = 1;
+ int s;
+ s = unicode_to_utf16s(c, UTF16_HOST_ENDIAN,
+ buf1, 2);
+ if (s <= 0)
+ s = 0;
+ dstr1 = buf1;
+ dsize1 = s;
}
}
@@ -430,9 +474,13 @@ int hfsplus_compare_dentry(const struct dentry *parent,
if (decompose)
dstr2 = decompose_unichar(c, &dsize2);
if (!decompose || !dstr2) {
- c2 = c;
- dstr2 = &c2;
- dsize2 = 1;
+ int s;
+ s = unicode_to_utf16s(c, UTF16_HOST_ENDIAN,
+ buf2, 2);
+ if (s <= 0)
+ s = 0;
+ dstr2 = buf2;
+ dsize2 = s;
}
}
--
1.7.10
--
Regards
Vladimir 'φ-coder/phcoder' Serbinenko
Download attachment "signature.asc" of type "application/pgp-signature" (295 bytes)
Powered by blists - more mailing lists