[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <mhng-0770bfe6-73bd-4b8a-9fa7-142ed95a6974@palmer-si-x1c4>
Date: Fri, 25 Jan 2019 12:21:07 -0800 (PST)
From: Palmer Dabbelt <palmer@...ive.com>
To: bjorn.topel@...il.com, Jim Wilson <jimw@...ive.com>
CC: Christoph Hellwig <hch@...radead.org>,
linux-riscv@...ts.infradead.org, davidlee@...ive.com,
daniel@...earbox.net, netdev@...r.kernel.org
Subject: Re: [RFC PATCH 1/3] riscv: set HAVE_EFFICIENT_UNALIGNED_ACCESS
On Tue, 15 Jan 2019 08:06:47 PST (-0800), bjorn.topel@...il.com wrote:
> Den tis 15 jan. 2019 kl 16:39 skrev Christoph Hellwig <hch@...radead.org>:
>>
>> Hmm, while the RISC-V spec requires misaligned load/store support,
>> who says they are efficient? Maybe add a little comment that says
>> on which cpus they are efficient.
>
> Good point! :-) I need to check how other architectures does this.
> Enabling it for *all* RV64 is probably not correct.
RISC-V mandates that misaligned memory accesses execute correctly in S-mode,
but allow them to be trapped and emulated in M-mode. As a result they can be
quite slow. Every microarchitecture I know of traps misaligned accesses into
M-mode, so for now we're probably safe just unconditionally saying they're
slow.
GCC does have a tuning parameter that says "are misaligned accesses fast?" that
we set depending on -mtune, but it doesn't appear to be exposed as a
preprocessor macro. I think it's probably best to just expose the tuning
parameter as a macro so software that needs to know this has one standard way
of doing it.
Jim, would you be opposed to something like this?
diff --git a/riscv-c-api.md b/riscv-c-api.md
index 0b0236c38826..a790f5cc23ee 100644
--- a/riscv-c-api.md
+++ b/riscv-c-api.md
@@ -52,6 +52,10 @@ https://creativecommons.org/licenses/by/4.0/.
* `__riscv_cmodel_medlow`
* `__riscv_cmodel_medany`
* `__riscv_cmodel_pic`
+* `__riscv_tune_misaligned_load_cost`: The number of cycles a word-sized
+ misaligned load will take.
+* `__riscv_tune_misaligned_store_cost`: The number of cycles a word-sized
+ misaligned store will take.
## Function Attributes
Which I think shouldn't be too much of a headache to implement in GCC -- I
haven't compiled this yet, though...
diff --git a/gcc/config/riscv/riscv-c.c b/gcc/config/riscv/riscv-c.c
index ca72de74a7b4..fa71a4a22104 100644
--- a/gcc/config/riscv/riscv-c.c
+++ b/gcc/config/riscv/riscv-c.c
@@ -98,4 +98,9 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
builtin_define ("__riscv_cmodel_pic");
break;
}
+
+ builtin_define_with_int_value ("__riscv_tune_misaligned_load_cost",
+ riscv_tune_info->slow_unaligned_access ? 1024 : 1);
+ builtin_define_with_int_value ("__riscv_tune_misaligned_store_cost",
+ riscv_tune_info->slow_unaligned_access ? 1024 : 1);
}
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index a3ab6cec33b4..d58a307d27b4 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -39,4 +39,6 @@ enum riscv_code_model {
};
extern enum riscv_code_model riscv_cmodel;
+extern struct riscv_tune_info riscv_tune_info;
+
#endif /* ! GCC_RISCV_OPTS_H */
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index bf4571d91b8c..671c2ddaaa0f 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -226,7 +226,7 @@ struct riscv_cpu_info {
const char *name;
/* Tuning parameters for this CPU. */
- const struct riscv_tune_info *tune_info;
+ const struct riscv_tune_info *riscv_tune_info;
};
/* Global variables for machine-dependent things. */
@@ -243,7 +243,7 @@ unsigned riscv_stack_boundary;
static int epilogue_cfa_sp_offset;
/* Which tuning parameters to use. */
-static const struct riscv_tune_info *tune_info;
+const struct riscv_tune_info *riscv_tune_info;
/* Index R is the smallest register class that contains register R. */
const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
@@ -1528,7 +1528,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
instructions it needs. */
if ((cost = riscv_address_insns (XEXP (x, 0), mode, true)) > 0)
{
- *total = COSTS_N_INSNS (cost + tune_info->memory_cost);
+ *total = COSTS_N_INSNS (cost + riscv_tune_info->memory_cost);
return true;
}
/* Otherwise use the default handling. */
@@ -1592,7 +1592,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
mode instead. */
mode = GET_MODE (XEXP (x, 0));
if (float_mode_p)
- *total = tune_info->fp_add[mode == DFmode];
+ *total = riscv_tune_info->fp_add[mode == DFmode];
else
*total = riscv_binary_cost (x, 1, 3);
return false;
@@ -1601,14 +1601,14 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
case ORDERED:
/* (FEQ(A, A) & FEQ(B, B)) compared against 0. */
mode = GET_MODE (XEXP (x, 0));
- *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
+ *total = riscv_tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
return false;
case UNEQ:
case LTGT:
/* (FEQ(A, A) & FEQ(B, B)) compared against FEQ(A, B). */
mode = GET_MODE (XEXP (x, 0));
- *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (3);
+ *total = riscv_tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (3);
return false;
case UNGE:
@@ -1617,13 +1617,13 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
case UNLT:
/* FLT or FLE, but guarded by an FFLAGS read and write. */
mode = GET_MODE (XEXP (x, 0));
- *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (4);
+ *total = riscv_tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (4);
return false;
case MINUS:
case PLUS:
if (float_mode_p)
- *total = tune_info->fp_add[mode == DFmode];
+ *total = riscv_tune_info->fp_add[mode == DFmode];
else
*total = riscv_binary_cost (x, 1, 4);
return false;
@@ -1633,7 +1633,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
rtx op = XEXP (x, 0);
if (GET_CODE (op) == FMA && !HONOR_SIGNED_ZEROS (mode))
{
- *total = (tune_info->fp_mul[mode == DFmode]
+ *total = (riscv_tune_info->fp_mul[mode == DFmode]
+ set_src_cost (XEXP (op, 0), mode, speed)
+ set_src_cost (XEXP (op, 1), mode, speed)
+ set_src_cost (XEXP (op, 2), mode, speed));
@@ -1642,23 +1642,23 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
}
if (float_mode_p)
- *total = tune_info->fp_add[mode == DFmode];
+ *total = riscv_tune_info->fp_add[mode == DFmode];
else
*total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 4 : 1);
return false;
case MULT:
if (float_mode_p)
- *total = tune_info->fp_mul[mode == DFmode];
+ *total = riscv_tune_info->fp_mul[mode == DFmode];
else if (!TARGET_MUL)
/* Estimate the cost of a library call. */
*total = COSTS_N_INSNS (speed ? 32 : 6);
else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
- *total = 3 * tune_info->int_mul[0] + COSTS_N_INSNS (2);
+ *total = 3 * riscv_tune_info->int_mul[0] + COSTS_N_INSNS (2);
else if (!speed)
*total = COSTS_N_INSNS (1);
else
- *total = tune_info->int_mul[mode == DImode];
+ *total = riscv_tune_info->int_mul[mode == DImode];
return false;
case DIV:
@@ -1666,7 +1666,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
case MOD:
if (float_mode_p)
{
- *total = tune_info->fp_div[mode == DFmode];
+ *total = riscv_tune_info->fp_div[mode == DFmode];
return false;
}
/* Fall through. */
@@ -1677,7 +1677,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
/* Estimate the cost of a library call. */
*total = COSTS_N_INSNS (speed ? 32 : 6);
else if (speed)
- *total = tune_info->int_div[mode == DImode];
+ *total = riscv_tune_info->int_div[mode == DImode];
else
*total = COSTS_N_INSNS (1);
return false;
@@ -1699,11 +1699,11 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
case FIX:
case FLOAT_EXTEND:
case FLOAT_TRUNCATE:
- *total = tune_info->fp_add[mode == DFmode];
+ *total = riscv_tune_info->fp_add[mode == DFmode];
return false;
case FMA:
- *total = (tune_info->fp_mul[mode == DFmode]
+ *total = (riscv_tune_info->fp_mul[mode == DFmode]
+ set_src_cost (XEXP (x, 0), mode, speed)
+ set_src_cost (XEXP (x, 1), mode, speed)
+ set_src_cost (XEXP (x, 2), mode, speed));
@@ -4165,7 +4165,7 @@ riscv_class_max_nregs (reg_class_t rclass, machine_mode mode)
static int
riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
{
- return (tune_info->memory_cost
+ return (riscv_tune_info->memory_cost
+ memory_move_secondary_cost (mode, rclass, in));
}
@@ -4174,7 +4174,7 @@ riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
static int
riscv_issue_rate (void)
{
- return tune_info->issue_rate;
+ return riscv_tune_info->issue_rate;
}
/* Implement TARGET_ASM_FILE_START. */
@@ -4307,22 +4307,22 @@ riscv_option_override (void)
/* Handle -mtune. */
cpu = riscv_parse_cpu (riscv_tune_string ? riscv_tune_string :
RISCV_TUNE_STRING_DEFAULT);
- tune_info = optimize_size ? &optimize_size_tune_info : cpu->tune_info;
+ riscv_tune_info = optimize_size ? &optimize_size_tune_info : cpu->riscv_tune_info;
/* Use -mtune's setting for slow_unaligned_access, even when optimizing
for size. For architectures that trap and emulate unaligned accesses,
the performance cost is too great, even for -Os. Similarly, if
-m[no-]strict-align is left unspecified, heed -mtune's advice. */
- riscv_slow_unaligned_access_p = (cpu->tune_info->slow_unaligned_access
+ riscv_slow_unaligned_access_p = (cpu->riscv_tune_info->slow_unaligned_access
|| TARGET_STRICT_ALIGN);
if ((target_flags_explicit & MASK_STRICT_ALIGN) == 0
- && cpu->tune_info->slow_unaligned_access)
+ && cpu->riscv_tune_info->slow_unaligned_access)
target_flags |= MASK_STRICT_ALIGN;
/* If the user hasn't specified a branch cost, use the processor's
default. */
if (riscv_branch_cost == 0)
- riscv_branch_cost = tune_info->branch_cost;
+ riscv_branch_cost = riscv_tune_info->branch_cost;
/* Function to allocate machine-dependent function status. */
init_machine_status = &riscv_init_machine_status;
Powered by blists - more mailing lists