[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250806203705.2560493-5-dhowells@redhat.com>
Date: Wed, 6 Aug 2025 21:36:25 +0100
From: David Howells <dhowells@...hat.com>
To: Steve French <sfrench@...ba.org>
Cc: David Howells <dhowells@...hat.com>,
Paulo Alcantara <pc@...guebit.org>,
Shyam Prasad N <sprasad@...rosoft.com>,
Tom Talpey <tom@...pey.com>,
Wang Zhaolong <wangzhaolong@...weicloud.com>,
Stefan Metzmacher <metze@...ba.org>,
Mina Almasry <almasrymina@...gle.com>,
linux-cifs@...r.kernel.org,
linux-kernel@...r.kernel.org,
netfs@...ts.linux.dev,
linux-fsdevel@...r.kernel.org
Subject: [RFC PATCH 04/31] cifs, nls: Provide unicode size determination func
Signed-off-by: David Howells <dhowells@...hat.com>
cc: Steve French <sfrench@...ba.org>
cc: Paulo Alcantara <pc@...guebit.org>
cc: Shyam Prasad N <sprasad@...rosoft.com>
cc: Tom Talpey <tom@...pey.com>
cc: linux-cifs@...r.kernel.org
cc: netfs@...ts.linux.dev
cc: linux-fsdevel@...r.kernel.org
---
fs/nls/nls_base.c | 33 ++++++++++++++++++++++++++++++
fs/smb/client/cifs_unicode.c | 39 ++++++++++++++++++++++++++++++++++++
fs/smb/client/cifs_unicode.h | 2 ++
include/linux/nls.h | 1 +
4 files changed, 75 insertions(+)
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 18d597e49a19..f6927c7d9fe1 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -171,6 +171,39 @@ int utf8s_to_utf16s(const u8 *s, int inlen, enum utf16_endian endian,
}
EXPORT_SYMBOL(utf8s_to_utf16s);
+/**
+ * utf8s_to_len_utf16s - Determine the length of a conversion of UTF8 to UTF16.
+ * @s: The source utf8 string
+ * @inlen: The length of the string
+ */
+ssize_t utf8s_to_len_utf16s(const u8 *s, int inlen)
+{
+ unicode_t u;
+ size_t outcount = 0;
+ int size;
+
+ while (inlen > 0 && *s) {
+ if (*s & 0x80) {
+ size = utf8_to_utf32(s, inlen, &u);
+ if (size < 0)
+ return -EINVAL;
+ s += size;
+ inlen -= size;
+
+ if (u >= PLANE_SIZE)
+ outcount += 2;
+ else
+ outcount++;
+ } else {
+ s++;
+ outcount++;
+ inlen--;
+ }
+ }
+ return outcount * sizeof(wchar_t);
+}
+EXPORT_SYMBOL(utf8s_to_len_utf16s);
+
static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian)
{
switch (endian) {
diff --git a/fs/smb/client/cifs_unicode.c b/fs/smb/client/cifs_unicode.c
index 4cc6e0896fad..ba4b361613f6 100644
--- a/fs/smb/client/cifs_unicode.c
+++ b/fs/smb/client/cifs_unicode.c
@@ -290,6 +290,45 @@ cifs_strtoUTF16(__le16 *to, const char *from, int len,
return i;
}
+/*
+ * Work out how long a string will be once converted to UTF16 in bytes. This
+ * does not include a NUL terminator.
+ */
+size_t cifs_size_strtoUTF16(const char *from, int len,
+ const struct nls_table *codepage)
+{
+ wchar_t wchar_to; /* needed to quiet sparse */
+ ssize_t out_len = 0;
+ int charlen;
+
+ /* special case for utf8 to handle no plane0 chars */
+ if (strcmp(codepage->charset, "utf8") == 0) {
+ out_len = utf8s_to_len_utf16s(from, len);
+ if (out_len >= 0)
+ goto success;
+ /*
+ * On failure, fall back to UCS encoding as this function
+ * should not return negative values currently can fail only if
+ * source contains invalid encoded characters
+ */
+ }
+
+ for (; len && *from; len -= charlen) {
+ charlen = codepage->char2uni(from, len, &wchar_to);
+ if (charlen < 1) {
+ cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n",
+ *from, charlen);
+ /* Replace with a question mark */
+ charlen = 1;
+ }
+ from += charlen;
+ out_len += 2;
+ }
+
+success:
+ return out_len;
+}
+
/*
* cifs_utf16_bytes - how long will a string be after conversion?
* @utf16 - pointer to input string
diff --git a/fs/smb/client/cifs_unicode.h b/fs/smb/client/cifs_unicode.h
index e137a0dfbbe9..c3519a46a2b5 100644
--- a/fs/smb/client/cifs_unicode.h
+++ b/fs/smb/client/cifs_unicode.h
@@ -60,6 +60,8 @@ int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
int cifs_utf16_bytes(const __le16 *from, int maxbytes,
const struct nls_table *codepage);
int cifs_strtoUTF16(__le16 *, const char *, int, const struct nls_table *);
+size_t cifs_size_strtoUTF16(const char *from, int len,
+ const struct nls_table *codepage);
char *cifs_strndup_from_utf16(const char *src, const int maxlen,
const bool is_unicode,
const struct nls_table *codepage);
diff --git a/include/linux/nls.h b/include/linux/nls.h
index e0bf8367b274..026da1d5ffaa 100644
--- a/include/linux/nls.h
+++ b/include/linux/nls.h
@@ -56,6 +56,7 @@ extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu);
extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen);
extern int utf8s_to_utf16s(const u8 *s, int len,
enum utf16_endian endian, wchar_t *pwcs, int maxlen);
+ssize_t utf8s_to_len_utf16s(const u8 *s, int inlen);
extern int utf16s_to_utf8s(const wchar_t *pwcs, int len,
enum utf16_endian endian, u8 *s, int maxlen);
Powered by blists - more mailing lists