Zhaolei : > Hello, Mathieu > > Why not use instructions generated by gcc instead of memcpy on arch without > 64bit write as: > case 4: *(u32 *)dest = *(const u32 *)src; > break; > case 8: *(u64 *)dest = *(const u64 *)src; > break; > > IMHO, even on arch without 64bit write, memcpy is more complex. #include char dest[100]; char src[100]; typedef uint64_t u64; typedef uint32_t u32; void gcc_u64(void) { asm("/* begin */"); *(u64 *)dest = *(const u64 *)src; asm("/* end */"); } movl src, %eax movl src+4, %edx movl %eax, dest movl %edx, dest+4 void twice_u32(void) { asm("/* begin */"); ((u32 *)dest)[0] = ((const u32 *)src)[0]; ((u32 *)dest)[1] = ((const u32 *)src)[1]; asm("/* end */"); } movl src, %eax movl %eax, dest movl src+4, %eax movl %eax, dest+4 gcc seems to do a better register scheduler than my code, so I think it's not so bad. I will take your proposal. Signed-off-by: Mathieu Desnoyers CC: Zhaolei --- include/linux/ltt-relay.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) Index: linux-2.6-lttng/include/linux/ltt-relay.h =================================================================== --- linux-2.6-lttng.orig/include/linux/ltt-relay.h 2009-03-05 15:40:02.000000000 -0500 +++ linux-2.6-lttng/include/linux/ltt-relay.h 2009-03-05 15:40:42.000000000 -0500 @@ -215,13 +215,16 @@ static inline void ltt_relay_do_copy(voi case 4: *(u32 *)dest = *(const u32 *)src; break; -#if (BITS_PER_LONG == 64) case 8: *(u64 *)dest = *(const u64 *)src; break; -#endif default: - memcpy(dest, src, len); + /* + * What we really want here is an inline memcpy, but we don't + * have constants, so gcc generally uses a function call. + */ + for (; len > 0; len--) + *(u8 *)dest++ = *(const u8 *)src++; } } #else @@ -256,19 +259,19 @@ static inline void ltt_relay_do_copy(voi goto memcpy_fallback; *(u32 *)dest = *(const u32 *)src; break; -#if (BITS_PER_LONG == 64) case 8: if (unlikely(!addr_aligned(dest, src, 8))) goto memcpy_fallback; *(u64 *)dest = *(const u64 *)src; break; -#endif default: - goto memcpy_fallback; + /* + * What we really want here is an inline memcpy, but we don't + * have constants, so gcc generally uses a function call. + */ + for (; len > 0; len--) + *(u8 *)dest++ = *(const u8 *)src++; } - return; -memcpy_fallback: - memcpy(dest, src, len); } #endif -- Mathieu Desnoyers OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/