[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <201505261933.t4QJXCIW015524@farm-0024.internal.tilera.com>
Date: Tue, 26 May 2015 15:33:12 -0400
From: Chris Metcalf <cmetcalf@...hip.com>
To: Michael Ellerman <mpe@...erman.id.au>
CC: Linus Torvalds <torvalds@...ux-foundation.org>,
Al Viro <viro@...iv.linux.org.uk>,
Fabian Frederick <fabf@...net.be>,
Randy Dunlap <rdunlap@...radead.org>,
Rickard Strandqvist <rickard_strandqvist@...ctrumdigital.se>,
<gregkh@...uxfoundation.org>,
Peter Zijlstra <peterz@...radead.org>,
"David S. Miller" <davem@...emloft.net>,
Frederic Weisbecker <fweisbec@...il.com>,
"Andrew Morton" <akpm@...ux-foundation.org>,
Sam Ravnborg <sam@...nborg.org>,
"Stephen Rothwell" <sfr@...b.auug.org.au>,
Theodore Ts'o <tytso@....edu>,
"Grant Likely" <grant.likely@...aro.org>,
Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
<linux-arch@...r.kernel.org>
Subject: Re: [PATCH 0/3] add new strscpy() API for string copy
On 05/17/2015 09:13 PM, Michael Ellerman wrote:
> On Fri, 2015-05-15 at 11:15 -0400, Chris Metcalf wrote:
>> On 05/14/2015 07:10 PM, Michael Ellerman wrote:
>>> On Thu, 2015-04-30 at 12:01 -0400, Chris Metcalf wrote:
>>>>
>>>> I tested the implementation with a simple user-space harness, so I
>>>> believe it is correct for the corner cases I could think of. In
>>>> particular I pairwise-tested all the unaligned values of source and
>>>> dest, and tested the restriction on src page-crossing at all
>>>> unaligned offsets approaching the page boundary.
>>> Can you please put that in tools/testing/selftests and merge it as part of the
>>> series? That way I can run the tests and be confident it works on powerpc.
>>
>> Unfortunately, the strscpy patch series only changes the one previous
>> user of the API, which is a tile-architecture-only driver piece, not
>> particularly useful for anyone else for testing.
>>
>> The testing I did pulled strscpy() and word-at-a-time out into a
>> separate, standalone userspace implementation, and tested it there,
>> rather than doing tests through the syscall API like
>> tools/testing/selftests.
>
> Not everything in selftests has to or does go through the syscall API.
>
> We (powerpc) have tests of our memcpy/memcmp/load_unaligned_zeropad that are
> built as standalone test programs.
>
> Doing that for stuff in lib/string.c does look a bit complicated, because you'd
> need to pull in a bunch of kernel headers.
>
> Do you mind posting your test code somewhere so I can run it, and maybe I can
> work out how to fold it into a selftest.
Seems easiest to just post it to LKML so anyone else who wants to can
take a look at it. Trying to post a collection of files without
violating the "no MIME parts" rule for LKML made me dust off my
Usenet memories and track down a version of shar to use to bundle up
the test.c harness, the strscpy.c code (excerpted from lib/string.c with
suitable #defines and #includes so it builds in userspace), and a
version of word-at-a-time.h that works for the two environments I
tested it in (x86_64 and tile).
--
Chris Metcalf, EZChip Semiconductor
http://www.ezchip.com
#!/bin/sh
# This is a shell archive (produced by GNU sharutils 4.2.1).
# To extract the files from this archive, save it to some FILE, remove
# everything before the `!/bin/sh' line above, then type `sh FILE'.
#
# existing files WILL be overwritten
#
# This shar contains:
# length mode name
# ------ ---------- ------------------------------------------
# 2294 -rw-r--r-- test.c
# 3321 -rw-r--r-- strscpy.c
# 2905 -rw-r--r-- word-at-a-time.h
#
echo=echo
if mkdir _sh15429; then
$echo 'x -' 'creating lock directory'
else
$echo 'failed to create lock directory'
exit 1
fi
# ============= test.c ==============
$echo 'x -' extracting 'test.c' '(text)'
sed 's/^X//' << 'SHAR_EOF' > 'test.c' &&
/* Compile "gcc -O -o test test.c strscpy.c" and run "./test" */
X
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/mman.h>
X
size_t strscpy_truncate(char *dest, const char *src, size_t count);
size_t strscpy(char *dest, const char *src, size_t count);
X
char dest[1024] __attribute__((aligned(8)));
char src[1024] __attribute__((aligned(8)));
X
#define assert(check) do { \
X if (!(check)) { \
X printf("%s: %d: %d: %s @%zd: %d => %d\n", #check, i, j, s, count, rc, ret); \
X exit(1); \
X } } while (0)
X
void test(int i, int j, char *s, size_t count, int ret)
{
X int rc;
X
X if (ret == -1)
X ret = strlen(s);
X
X memset(dest, 0, sizeof(dest));
X memset(src, 0, sizeof(src));
X strcpy(&src[i], s);
X rc = strscpy(&dest[j], &src[i], count);
X assert(rc == ret);
X if (rc > 0)
X assert(strcmp(&dest[j], s) == 0);
X else if (count > 0)
X assert(dest[j] == '\0');
X
X memset(dest, 0, sizeof(dest));
X memset(src, 0, sizeof(src));
X strcpy(&src[i], s);
X rc = strscpy_truncate(&dest[j], &src[i], count);
X assert(rc == ret);
X if (rc > 0)
X assert(strcmp(&dest[j], s) == 0);
X else if (count > 0) {
X assert(strncmp(&dest[j], s, count-1) == 0);
X assert(dest[j+count-1] == '\0');
X }
}
X
int main()
{
X int i, j;
X
X for (i = 0; i < 15; ++i) {
X for (j = 0; j < 15; ++j) {
X test(i, j, "Hello, world", sizeof(src) - i, -1);
X test(i, j, "Hello, world", 12, -E2BIG);
X test(i, j, "foo", 0, -E2BIG);
X test(i, j, "foo", 1, -E2BIG);
X }
X }
X
X /* Check we never walk across a page boundary past the source. */
X char *p = mmap(NULL, 2*getpagesize(), PROT_READ|PROT_WRITE,
X MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
X mprotect(p+getpagesize(), getpagesize(), PROT_NONE);
X char *base = p + getpagesize() - 16;
X strcpy(base, "This is some ra");
X for (i = 0; i < 15; ++i) {
X memset(dest, 0, 16);
X int rc = strscpy_truncate(dest, base + i, sizeof(dest));
X if (rc != 16 - i - 1 || strcmp(dest, base + i) != 0) {
X printf("Badness at %d %d\n", i, rc);
X exit(1);
X }
X }
X
X printf("OK!\n");
X return 0;
}
SHAR_EOF
chmod 0644 'test.c' ||
$echo 'restore of' 'test.c' 'failed'
# ============= strscpy.c ==============
$echo 'x -' extracting 'strscpy.c' '(text)'
sed 's/^X//' << 'SHAR_EOF' > 'strscpy.c' &&
#include <sys/types.h>
#include <unistd.h>
#include <errno.h>
#include "word-at-a-time.h"
#ifndef __tile__
#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
#endif
#define PAGE_SIZE getpagesize()
#define EXPORT_SYMBOL(sym)
X
#ifndef __HAVE_ARCH_STRSCPY_TRUNCATE
/**
X * strscpy_truncate - Copy a C-string into a sized buffer
X * @dest: Where to copy the string to
X * @src: Where to copy the string from
X * @count: Size of destination buffer
X *
X * Copy the string, or as much of it as fits, into the dest buffer.
X * The routine returns the number of characters copied (not including
X * the trailing NUL) or -E2BIG if the destination buffer wasn't big enough.
X * The behavior is undefined if the string buffers overlap.
X *
X * Note that the implementation is robust to the string changing out
X * from underneath it, unlike the current strlcpy() implementation,
X * and it is easier to check overflow than with strlcpy()'s API.
X *
X * strscpy() is preferred over this function unless a truncated string
X * provides some valid semantics in the destination buffer.
X */
ssize_t strscpy_truncate(char *dest, const char *src, size_t count)
{
X const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
X size_t max = count;
X long res = 0;
X
X if (count == 0)
X return -E2BIG;
X
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
X /*
X * If src is unaligned, don't cross a page boundary,
X * since we don't know if the next page is mapped.
X */
X if ((long)src & (sizeof(long) - 1)) {
X size_t limit = PAGE_SIZE - ((long)src & (PAGE_SIZE - 1));
X if (limit < max)
X max = limit;
X }
#else
X /* If src or dest is unaligned, don't do word-at-a-time. */
X if (((long) dest | (long) src) & (sizeof(long) - 1))
X max = 0;
#endif
X
X while (max >= sizeof(unsigned long)) {
X unsigned long c, data;
X
X c = *(unsigned long *)(src+res);
X *(unsigned long *)(dest+res) = c;
X if (has_zero(c, &data, &constants)) {
X data = prep_zero_mask(c, data, &constants);
X data = create_zero_mask(data);
X return res + find_zero(data);
X }
X res += sizeof(unsigned long);
X count -= sizeof(unsigned long);
X max -= sizeof(unsigned long);
X }
X
X while (count) {
X char c;
X
X c = src[res];
X dest[res] = c;
X if (!c)
X return res;
X res++;
X count--;
X }
X
X /* Hit buffer length without finding a NUL; force NUL-termination. */
X if (res)
X dest[res-1] = '\0';
X
X return -E2BIG;
}
EXPORT_SYMBOL(strscpy_truncate);
#endif
X
#ifndef __HAVE_ARCH_STRSCPY
/**
X * strscpy - Copy a C-string into a sized buffer, but only if it fits
X * @dest: Where to copy the string to
X * @src: Where to copy the string from
X * @count: Size of destination buffer
X *
X * Copy the string into the dest buffer. The routine returns the
X * number of characters copied (not including the trailing NUL) or
X * -E2BIG if the destination buffer wasn't big enough. The behavior
X * is undefined if the string buffers overlap. The destination buffer
X * is set to the empty string if the buffer is not big enough.
X *
X * Preferred over strlcpy() in all cases, and over strncpy() unless
X * the latter's zero-fill behavior is desired and truncation of the
X * source string is known not to be an issue.
X */
ssize_t strscpy(char *dest, const char *src, size_t count)
{
X ssize_t res = strscpy_truncate(dest, src, count);
X if (res < 0 && count != 0)
X dest[0] = '\0';
X return res;
}
EXPORT_SYMBOL(strscpy);
#endif
SHAR_EOF
chmod 0644 'strscpy.c' ||
$echo 'restore of' 'strscpy.c' 'failed'
# ============= word-at-a-time.h ==============
$echo 'x -' extracting 'word-at-a-time.h' '(text)'
sed 's/^X//' << 'SHAR_EOF' > 'word-at-a-time.h' &&
#ifndef _ASM_WORD_AT_A_TIME_H
#define _ASM_WORD_AT_A_TIME_H
X
#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
X
#ifndef __tile__
#define IS_UNALIGNED(src, dst) 0
#else
#define IS_UNALIGNED(src, dst) \
X (((long) dst | (long) src) & (sizeof(long) - 1))
#endif
X
#ifdef __BIG_ENDIAN__
X
struct word_at_a_time {
X const unsigned long high_bits, low_bits;
};
X
#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) }
X
/* Bit set in the bytes that have a zero */
static inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c)
{
X unsigned long mask = (val & c->low_bits) + c->low_bits;
X return ~(mask | rhs);
}
X
#define create_zero_mask(mask) (mask)
X
static inline long find_zero(unsigned long mask)
{
X long byte = 0;
#ifdef _LP64
X if (mask >> 32)
X mask >>= 32;
X else
X byte = 4;
#endif
X if (mask >> 16)
X mask >>= 16;
X else
X byte += 2;
X return (mask >> 8) ? byte : byte + 1;
}
X
static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
{
X unsigned long rhs = val | c->low_bits;
X *data = rhs;
X return (val + c->high_bits) & ~rhs;
}
X
#ifndef zero_bytemask
#define zero_bytemask(mask) (~1ul << __fls(mask))
#endif
X
#else
X
/*
X * The optimal byte mask counting is probably going to be something
X * that is architecture-specific. If you have a reliably fast
X * bit count instruction, that might be better than the multiply
X * and shift, for example.
X */
struct word_at_a_time {
X const unsigned long one_bits, high_bits;
};
X
#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
X
#ifdef _LP64
X
/*
X * Jan Achrenius on G+: microoptimized version of
X * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
X * that works for the bytemasks without having to
X * mask them first.
X */
static inline long count_masked_bytes(unsigned long mask)
{
X return mask*0x0001020304050608ul >> 56;
}
X
#else /* 32-bit case */
X
/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
static inline long count_masked_bytes(long mask)
{
X /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
X long a = (0x0ff0001+mask) >> 23;
X /* Fix the 1 for 00 case */
X return a & mask;
}
X
#endif
X
/* Return nonzero if it has a zero */
static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
{
X unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
X *bits = mask;
X return mask;
}
X
static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
{
X return bits;
}
X
static inline unsigned long create_zero_mask(unsigned long bits)
{
X bits = (bits - 1) & ~bits;
X return bits >> 7;
}
X
/* The mask we created is directly usable as a bytemask */
#define zero_bytemask(mask) (mask)
X
static inline unsigned long find_zero(unsigned long mask)
{
X return count_masked_bytes(mask);
}
X
#endif /* __BIG_ENDIAN */
X
#endif /* _ASM_WORD_AT_A_TIME_H */
SHAR_EOF
chmod 0644 'word-at-a-time.h' ||
$echo 'restore of' 'word-at-a-time.h' 'failed'
rm -fr _sh15429
exit 0
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists