[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8c4279dd244eed4b1a6ef7e2ea6dafac0b5fc79e.camel@physik.fu-berlin.de>
Date: Thu, 30 Jan 2025 09:44:13 +0100
From: John Paul Adrian Glaubitz <glaubitz@...sik.fu-berlin.de>
To: Julian Vetter <julian@...er-limits.org>, Arnd Bergmann <arnd@...db.de>,
Yoshinori Sato <ysato@...rs.sourceforge.jp>, Rich Felker <dalias@...c.org>
Cc: linux-sh@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] sh: Remove IO memcpy and memset from sh code
Hi Julian,
On Tue, 2025-01-28 at 11:13 +0100, Julian Vetter wrote:
> Remove IO memcpy and memset from sh specific code and fall back to the
> new implementations from lib/iomem_copy.c. They use word accesses if the
> buffers are aligned and only fall back to byte accesses for potentially
> unaligned parts of a buffer.
>
> Signed-off-by: Julian Vetter <julian@...er-limits.org>
> ---
> Changes for V2:
> - Removed also SH4 specific memcpy_fromio code
> ---
> arch/sh/include/asm/io.h | 8 ---
> arch/sh/kernel/io.c | 111 ---------------------------------------
> 2 files changed, 119 deletions(-)
> delete mode 100644 arch/sh/kernel/io.c
>
> diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
> index cf5eab840d57..1e33a1c8b72d 100644
> --- a/arch/sh/include/asm/io.h
> +++ b/arch/sh/include/asm/io.h
> @@ -268,14 +268,6 @@ __BUILD_IOPORT_STRING(q, u64)
>
> #define IO_SPACE_LIMIT 0xffffffff
>
> -/* We really want to try and get these to memcpy etc */
> -#define memset_io memset_io
> -#define memcpy_fromio memcpy_fromio
> -#define memcpy_toio memcpy_toio
> -void memcpy_fromio(void *, const volatile void __iomem *, unsigned long);
> -void memcpy_toio(volatile void __iomem *, const void *, unsigned long);
> -void memset_io(volatile void __iomem *, int, unsigned long);
> -
> /* Quad-word real-mode I/O, don't ask.. */
> unsigned long long peek_real_address_q(unsigned long long addr);
> unsigned long long poke_real_address_q(unsigned long long addr,
> diff --git a/arch/sh/kernel/io.c b/arch/sh/kernel/io.c
> deleted file mode 100644
> index da22f3b32d30..000000000000
> --- a/arch/sh/kernel/io.c
> +++ /dev/null
> @@ -1,111 +0,0 @@
> -// SPDX-License-Identifier: GPL-2.0
> -/*
> - * arch/sh/kernel/io.c - Machine independent I/O functions.
> - *
> - * Copyright (C) 2000 - 2009 Stuart Menefy
> - * Copyright (C) 2005 Paul Mundt
> - */
> -#include <linux/module.h>
> -#include <linux/pci.h>
> -#include <asm/machvec.h>
> -#include <asm/io.h>
> -
> -/*
> - * Copy data from IO memory space to "real" memory space.
> - */
> -void memcpy_fromio(void *to, const volatile void __iomem *from, unsigned long count)
> -{
> - /*
> - * Would it be worthwhile doing byte and long transfers first
> - * to try and get aligned?
> - */
> -#ifdef CONFIG_CPU_SH4
> - if ((count >= 0x20) &&
> - (((u32)to & 0x1f) == 0) && (((u32)from & 0x3) == 0)) {
> - int tmp2, tmp3, tmp4, tmp5, tmp6;
> -
> - __asm__ __volatile__(
> - "1: \n\t"
> - "mov.l @%7+, r0 \n\t"
> - "mov.l @%7+, %2 \n\t"
> - "movca.l r0, @%0 \n\t"
> - "mov.l @%7+, %3 \n\t"
> - "mov.l @%7+, %4 \n\t"
> - "mov.l @%7+, %5 \n\t"
> - "mov.l @%7+, %6 \n\t"
> - "mov.l @%7+, r7 \n\t"
> - "mov.l @%7+, r0 \n\t"
> - "mov.l %2, @(0x04,%0) \n\t"
> - "mov #0x20, %2 \n\t"
> - "mov.l %3, @(0x08,%0) \n\t"
> - "sub %2, %1 \n\t"
> - "mov.l %4, @(0x0c,%0) \n\t"
> - "cmp/hi %1, %2 ! T if 32 > count \n\t"
> - "mov.l %5, @(0x10,%0) \n\t"
> - "mov.l %6, @(0x14,%0) \n\t"
> - "mov.l r7, @(0x18,%0) \n\t"
> - "mov.l r0, @(0x1c,%0) \n\t"
> - "bf.s 1b \n\t"
> - " add #0x20, %0 \n\t"
> - : "=&r" (to), "=&r" (count),
> - "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4),
> - "=&r" (tmp5), "=&r" (tmp6), "=&r" (from)
> - : "7"(from), "0" (to), "1" (count)
> - : "r0", "r7", "t", "memory");
> - }
> -#endif
> -
> - if ((((u32)to | (u32)from) & 0x3) == 0) {
> - for (; count > 3; count -= 4) {
> - *(u32 *)to = *(volatile u32 *)from;
> - to += 4;
> - from += 4;
> - }
> - }
> -
> - for (; count > 0; count--) {
> - *(u8 *)to = *(volatile u8 *)from;
> - to++;
> - from++;
> - }
> -
> - mb();
> -}
> -EXPORT_SYMBOL(memcpy_fromio);
> -
> -/*
> - * Copy data from "real" memory space to IO memory space.
> - */
> -void memcpy_toio(volatile void __iomem *to, const void *from, unsigned long count)
> -{
> - if ((((u32)to | (u32)from) & 0x3) == 0) {
> - for ( ; count > 3; count -= 4) {
> - *(volatile u32 *)to = *(u32 *)from;
> - to += 4;
> - from += 4;
> - }
> - }
> -
> - for (; count > 0; count--) {
> - *(volatile u8 *)to = *(u8 *)from;
> - to++;
> - from++;
> - }
> -
> - mb();
> -}
> -EXPORT_SYMBOL(memcpy_toio);
> -
> -/*
> - * "memset" on IO memory space.
> - * This needs to be optimized.
> - */
> -void memset_io(volatile void __iomem *dst, int c, unsigned long count)
> -{
> - while (count) {
> - count--;
> - writeb(c, dst);
> - dst++;
> - }
> -}
> -EXPORT_SYMBOL(memset_io);
I'm not sure that I understand the motivation to remove hand-optimized sh4 assembler
code for memset and drop it in favor of potentially slower generic C code. What is
the reasoning behind this?
Also, it seems that this patch would make your other patch
"sh: Remove memset_io from sh specific code"
obsolete.
Geert, do you have any opinions on this patch?
Thanks,
Adrian
--
.''`. John Paul Adrian Glaubitz
: :' : Debian Developer
`. `' Physicist
`- GPG: 62FF 8A75 84E0 2956 9546 0006 7426 3B37 F5B5 F913
Powered by blists - more mailing lists