[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3ff4d71d-6a63-58ca-bdfd-79b9867fa477@linaro.org>
Date: Mon, 24 Sep 2018 03:59:55 +0200
From: Daniel Lezcano <daniel.lezcano@...aro.org>
To: Alexandre Belloni <alexandre.belloni@...tlin.com>
Cc: Thomas Gleixner <tglx@...utronix.de>,
Nicolas Ferre <nicolas.ferre@...rochip.com>,
Alexander Dahl <ada@...rsis.com>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org
Subject: [PATCH v7 2/7] clocksource/drivers: Add a new driver for the Atmel
ARM TC blocks
On 13/09/2018 13:30, Alexandre Belloni wrote:
> Add a driver for the Atmel Timer Counter Blocks. This driver provides a
> clocksource and two clockevent devices.
>
> One of the clockevent device is linked to the clocksource counter and so it
> will run at the same frequency. This will be used when there is only on TCB
> channel available for timers.
>
> The other clockevent device runs on a separate TCB channel when available.
>
> This driver uses regmap and syscon to be able to probe early in the boot
> and avoid having to switch on the TCB clocksource later.
Sorry, I don't get it. Can you elaborate?
> Using regmap also
> means that unused TCB channels may be used by other drivers (PWM for
> example). read/writel are still used to access channel specific registers
> to avoid the performance impact of regmap (mainly locking).
I don't get the regmap reasoning here.
My main concern with this driver is the 16bits chained support. See
below in the comments.
> Tested-by: Alexander Dahl <ada@...rsis.com>
> Tested-by: Andras Szemzo <szemzo.andras@...il.com>
> Signed-off-by: Alexandre Belloni <alexandre.belloni@...tlin.com>
> ---
> drivers/clocksource/Kconfig | 8 +
> drivers/clocksource/Makefile | 3 +-
> drivers/clocksource/timer-atmel-tcb.c | 410 ++++++++++++++++++++++++++
> 3 files changed, 420 insertions(+), 1 deletion(-)
> create mode 100644 drivers/clocksource/timer-atmel-tcb.c
>
> diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
> index a11f4ba98b05..8c7324e409f6 100644
> --- a/drivers/clocksource/Kconfig
> +++ b/drivers/clocksource/Kconfig
> @@ -403,6 +403,14 @@ config ATMEL_ST
> help
> Support for the Atmel ST timer.
>
> +config ATMEL_ARM_TCB_CLKSRC
> + bool "Microchip ARM TC Block" if COMPILE_TEST
> + select REGMAP_MMIO
> + depends on GENERIC_CLOCKEVENTS
> + help
> + This enables build of clocksource and clockevent driver for
> + the integrated Timer Counter Blocks in Microchip ARM SoCs.
> +
> config CLKSRC_EXYNOS_MCT
> bool "Exynos multi core timer driver" if COMPILE_TEST
> depends on ARM || ARM64
> diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
> index db51b2427e8a..0df9384a1230 100644
> --- a/drivers/clocksource/Makefile
> +++ b/drivers/clocksource/Makefile
> @@ -3,7 +3,8 @@ obj-$(CONFIG_TIMER_OF) += timer-of.o
> obj-$(CONFIG_TIMER_PROBE) += timer-probe.o
> obj-$(CONFIG_ATMEL_PIT) += timer-atmel-pit.o
> obj-$(CONFIG_ATMEL_ST) += timer-atmel-st.o
> -obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o
> +obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o
> +obj-$(CONFIG_ATMEL_ARM_TCB_CLKSRC) += timer-atmel-tcb.o
> obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o
> obj-$(CONFIG_SCx200HR_TIMER) += scx200_hrt.o
> obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC) += cs5535-clockevt.o
> diff --git a/drivers/clocksource/timer-atmel-tcb.c b/drivers/clocksource/timer-atmel-tcb.c
> new file mode 100644
> index 000000000000..21fbe430f91b
> --- /dev/null
> +++ b/drivers/clocksource/timer-atmel-tcb.c
> @@ -0,0 +1,410 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/clk.h>
> +#include <linux/clockchips.h>
> +#include <linux/clocksource.h>
> +#include <linux/interrupt.h>
> +#include <linux/kernel.h>
> +#include <linux/mfd/syscon.h>
> +#include <linux/of_address.h>
> +#include <linux/of_irq.h>
> +#include <linux/regmap.h>
> +#include <linux/sched_clock.h>
> +#include <soc/at91/atmel_tcb.h>
> +
> +struct atmel_tcb_clksrc {
> + struct clocksource clksrc;
> + struct clock_event_device clkevt;
> + struct regmap *regmap;
> + void __iomem *base;
> + struct clk *clk[2];
> + char name[20];
You can reasonably remove this field and use directly the ones in the
clocksrc/evt.
> + int channels[2];
> + int bits;
> + int irq;
After removing the request_irq/free_irq calls below (see comment), this
field can be removed.
> + struct {
> + u32 cmr;
> + u32 imr;
> + u32 rc;
> + bool clken;
Not sure clken is needed, 16/32 is enough information.
> + } cache[2];
> + u32 bmr_cache;
Are you sure you should save the bmr content ?
> + bool registered;
> + bool clk_enabled;
Not used.
> +};
> +
> +static struct atmel_tcb_clksrc tc;
> +
> +static struct clk *tcb_clk_get(struct device_node *node, int channel)
> +{
> + struct clk *clk;
> + char clk_name[] = "t0_clk";
> +
> + clk_name[1] += channel;
> + clk = of_clk_get_by_name(node->parent, clk_name);
> + if (!IS_ERR(clk))
> + return clk;
> +
> + return of_clk_get_by_name(node->parent, "t0_clk");
Can you explain why returning "t0_clk" is better than returning an error?
> +}
> +
> +/*
> + * Clocksource and clockevent using the same channel(s)
> + */
> +static u64 tc_get_cycles(struct clocksource *cs)
> +{
> + u32 lower, upper;
> +
> + do {
> + upper = readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[1]));
> + lower = readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[0]));
> + } while (upper != readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[1])));
> +
> + return (upper << 16) | lower;
> +}
> +
> +static u64 tc_get_cycles32(struct clocksource *cs)
> +{
> + return readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[0]));
> +}
> +
> +static u64 notrace tc_sched_clock_read(void)
> +{
> + return tc_get_cycles(&tc.clksrc);
> +}
> +
> +static u64 notrace tc_sched_clock_read32(void)
> +{
> + return tc_get_cycles32(&tc.clksrc);
> +}
> +
> +static int tcb_clkevt_next_event(unsigned long delta,
> + struct clock_event_device *d)
> +{
> + u32 old, next, cur;
> +
> + old = readl(tc.base + ATMEL_TC_CV(tc.channels[0]));
> + next = old + delta;
> + writel(next, tc.base + ATMEL_TC_RC(tc.channels[0]));
> + cur = readl(tc.base + ATMEL_TC_CV(tc.channels[0]));
> +
> + /* check whether the delta elapsed while setting the register */
> + if ((next < old && cur < old && cur > next) ||
> + (next > old && (cur < old || cur > next))) {
> + /*
> + * Clear the CPCS bit in the status register to avoid
> + * generating a spurious interrupt next time a valid
> + * timer event is configured.
> + */
> + old = readl(tc.base + ATMEL_TC_SR(tc.channels[0]));
> + return -ETIME;
> + }
> > + writel(ATMEL_TC_CPCS, tc.base + ATMEL_TC_IER(tc.channels[0]));
How this is compatible with 16bits as defined in the init function ?
> + return 0;
> +}
> +
> +static irqreturn_t tc_clkevt_irq(int irq, void *handle)
> +{
> + unsigned int sr;
> +
> + sr = readl(tc.base + ATMEL_TC_SR(tc.channels[0]));
> + if (sr & ATMEL_TC_CPCS) {
> + tc.clkevt.event_handler(&tc.clkevt);
> + return IRQ_HANDLED;
> + }
> +
> + return IRQ_NONE;
> +}
> +
> +static int tcb_clkevt_oneshot(struct clock_event_device *dev)
> +{
> + if (clockevent_state_oneshot(dev))
> + return 0;
> +
> + /*
> + * Because both clockevent devices may share the same IRQ, we don't want
> + * the less likely one to stay requested
> + */
> + return request_irq(tc.irq, tc_clkevt_irq, IRQF_TIMER | IRQF_SHARED,
> + tc.name, &tc);
> +}
> +
> +static int tcb_clkevt_shutdown(struct clock_event_device *dev)
> +{
> + writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[0]));
> + if (tc.bits == 16)
> + writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[1]));
> +
> + if (!clockevent_state_detached(dev))
> + free_irq(tc.irq, &tc);
Why are you requesting and freeing the irq instead of using the
disable/enable register operations ?
> + return 0;
> +}
> +
> +static void __init tcb_setup_dual_chan(struct atmel_tcb_clksrc *tc,
> + int mck_divisor_idx)
> +{
> + /* first channel: waveform mode, input mclk/8, clock TIOA on overflow */
> + writel(mck_divisor_idx /* likely divide-by-8 */
> + | ATMEL_TC_CMR_WAVE
> + | ATMEL_TC_CMR_WAVESEL_UP /* free-run */
> + | ATMEL_TC_CMR_ACPA(SET) /* TIOA rises at 0 */
> + | ATMEL_TC_CMR_ACPC(CLEAR), /* (duty cycle 50%) */
> + tc->base + ATMEL_TC_CMR(tc->channels[0]));
> + writel(0x0000, tc->base + ATMEL_TC_RA(tc->channels[0]));
> + writel(0x8000, tc->base + ATMEL_TC_RC(tc->channels[0]));
> + writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[0])); /* no irqs */
> + writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[0]));
> +
> + /* second channel: waveform mode, input TIOA */
> + writel(ATMEL_TC_CMR_XC(tc->channels[1]) /* input: TIOA */
> + | ATMEL_TC_CMR_WAVE
> + | ATMEL_TC_CMR_WAVESEL_UP, /* free-run */
> + tc->base + ATMEL_TC_CMR(tc->channels[1]));
> + writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[1])); /* no irqs */
> + writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[1]));
> +
> + /* chain both channel, we assume the previous channel */
> + regmap_write(tc->regmap, ATMEL_TC_BMR,
> + ATMEL_TC_BMR_TCXC(1 + tc->channels[1], tc->channels[1]));
> + /* then reset all the timers */
> + regmap_write(tc->regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
> +}
> +
> +static void __init tcb_setup_single_chan(struct atmel_tcb_clksrc *tc,
> + int mck_divisor_idx)
> +{
> + /* channel 0: waveform mode, input mclk/8 */
> + writel(mck_divisor_idx /* likely divide-by-8 */
> + | ATMEL_TC_CMR_WAVE
> + | ATMEL_TC_CMR_WAVESEL_UP, /* free-run */
> + tc->base + ATMEL_TC_CMR(tc->channels[0]));
> + writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[0])); /* no irqs */
> + writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[0]));
> +
> + /* then reset all the timers */
> + regmap_write(tc->regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
> +}
> +
> +static void tc_clksrc_suspend(struct clocksource *cs)
> +{
> + int i;
> +
> + for (i = 0; i < 1 + (tc.bits == 16); i++) {
> + tc.cache[i].cmr = readl(tc.base + ATMEL_TC_CMR(tc.channels[i]));
> + tc.cache[i].imr = readl(tc.base + ATMEL_TC_IMR(tc.channels[i]));
> + tc.cache[i].rc = readl(tc.base + ATMEL_TC_RC(tc.channels[i]));
> + tc.cache[i].clken = !!(readl(tc.base +
> + ATMEL_TC_SR(tc.channels[i])) &
> + ATMEL_TC_CLKSTA);
> + }
> +
> + if (tc.bits == 16)
> + regmap_read(tc.regmap, ATMEL_TC_BMR, &tc.bmr_cache);
> +}
> +
> +static void tc_clksrc_resume(struct clocksource *cs)
> +{
> + int i;
> +
> + for (i = 0; i < 1 + (tc.bits == 16); i++) {
> + /* Restore registers for the channel, RA and RB are not used */
> + writel(tc.cache[i].cmr, tc.base + ATMEL_TC_CMR(tc.channels[i]));
> + writel(tc.cache[i].rc, tc.base + ATMEL_TC_RC(tc.channels[i]));
> + writel(0, tc.base + ATMEL_TC_RA(tc.channels[i]));
> + writel(0, tc.base + ATMEL_TC_RB(tc.channels[i]));
> + /* Disable all the interrupts */
> + writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[i]));
> + /* Reenable interrupts that were enabled before suspending */
> + writel(tc.cache[i].imr, tc.base + ATMEL_TC_IER(tc.channels[i]));
> +
> + /* Start the clock if it was used */
> + if (tc.cache[i].clken)
> + writel(ATMEL_TC_CCR_CLKEN, tc.base +
> + ATMEL_TC_CCR(tc.channels[i]));
> + }
> +
> + /* in case of dual channel, chain channels */
> + if (tc.bits == 16)
> + regmap_write(tc.regmap, ATMEL_TC_BMR, tc.bmr_cache);
> + /* Finally, trigger all the channels*/
> + regmap_write(tc.regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
> +}
> +
> +static int __init tcb_clksrc_register(struct device_node *node,
> + struct regmap *regmap, void __iomem *base,
> + int channel, int channel1, int irq,
> + int bits)
> +{
> + u32 rate, divided_rate = 0;
> + int best_divisor_idx = -1;
> + int i, err = -1;
> + u64 (*tc_sched_clock)(void);
> +
> + tc.regmap = regmap;
> + tc.base = base;
> + tc.channels[0] = channel;
> + tc.channels[1] = channel1;
> + tc.irq = irq;
> + tc.bits = bits;
> +
> + tc.clk[0] = tcb_clk_get(node, tc.channels[0]);
> + if (IS_ERR(tc.clk[0]))
> + return PTR_ERR(tc.clk[0]);
> + err = clk_prepare_enable(tc.clk[0]);
> + if (err) {
> + pr_debug("can't enable T0 clk\n");
> + goto err_clk;
> + }
> +
> + /* How fast will we be counting? Pick something over 5 MHz. */
> + rate = (u32)clk_get_rate(tc.clk[0]);
> + for (i = 0; i < 5; i++) {
> + unsigned int divisor = atmel_tc_divisors[i];
> + unsigned int tmp;
> +
> + if (!divisor)
> + continue;
I suppose you meant here 'break' ? Use atmel_tc_divisors[] = { 2, 8, 32,
128 }; And then the ARRAY_SIZE macro.
> + tmp = rate / divisor;
> + pr_debug("TC: %u / %-3u [%d] --> %u\n", rate, divisor, i, tmp);
> + if (best_divisor_idx > 0) {
> + if (tmp < 5 * 1000 * 1000)
> + continue;
> + }
> + divided_rate = tmp;
> + best_divisor_idx = i;
What is a best divisor ? The highest one or the one closer to 5MHz ?
> + }
> +
> + if (tc.bits == 32) {
> + tc.clksrc.read = tc_get_cycles32;
> + tcb_setup_single_chan(&tc, best_divisor_idx);
> + tc_sched_clock = tc_sched_clock_read32;
> + snprintf(tc.name, sizeof(tc.name), "%s:%d",
> + kbasename(node->parent->full_name), tc.channels[0]);
> + } else {
> + tc.clk[1] = tcb_clk_get(node, tc.channels[1]);
> + if (IS_ERR(tc.clk[1]))
> + goto err_disable_t0;
This is very confusing. If the function tcb_clk_get() fails with this
channel, it will return "t0_clk" and will be used here ? Why ?
> + err = clk_prepare_enable(tc.clk[1]);
> + if (err) {
> + pr_debug("can't enable T1 clk\n");
> + goto err_clk1;
> + }
> + tc.clksrc.read = tc_get_cycles,
> + tcb_setup_dual_chan(&tc, best_divisor_idx);
> + tc_sched_clock = tc_sched_clock_read;
> + snprintf(tc.name, sizeof(tc.name), "%s:%d,%d",
> + kbasename(node->parent->full_name), tc.channels[0],
> + tc.channels[1]);
> + }
> +
> + pr_debug("%s at %d.%03d MHz\n", tc.name,
> + divided_rate / 1000000,
> + ((divided_rate + 500000) % 1000000) / 1000);
Using two channels to emulate a 32bits timer has a significant cost,
especially in the sched_clock function which is part of the hot kernel
path. In addition it makes the code less maintainable and readable.
Why don't you just stick to a specific rate with the prescalar value and
reduce the rating of the timer ? (example in the stm32 timer,
stm32_timer_set_prescaler and init function).
It will be less precise (thus the lower rating) but will make the system
faster by preventing multiple register reads in the sched_clock.
Is it an acceptable trade-off ?
> + tc.clksrc.name = tc.name;
> + tc.clksrc.suspend = tc_clksrc_suspend;
> + tc.clksrc.resume = tc_clksrc_resume;
> + tc.clksrc.rating = 200;
> + tc.clksrc.mask = CLOCKSOURCE_MASK(32);
> + tc.clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
> +
> + err = clocksource_register_hz(&tc.clksrc, divided_rate);
> + if (err)
> + goto err_disable_t1;
> +
> + sched_clock_register(tc_sched_clock, 32, divided_rate);
> +
> + tc.registered = true;
> +
> + /* Set up and register clockevents */
> + tc.clkevt.name = tc.name;
> + tc.clkevt.cpumask = cpumask_of(0);
> + tc.clkevt.set_next_event = tcb_clkevt_next_event;
> + tc.clkevt.set_state_oneshot = tcb_clkevt_oneshot;
> + tc.clkevt.set_state_shutdown = tcb_clkevt_shutdown;
> + tc.clkevt.features = CLOCK_EVT_FEAT_ONESHOT;
> + tc.clkevt.rating = 125;
> +
> + clockevents_config_and_register(&tc.clkevt, divided_rate, 1,
> + BIT(tc.bits) - 1);
> +
> + return 0;
> +
> +err_disable_t1:
> + if (tc.bits == 16)
> + clk_disable_unprepare(tc.clk[1]);
> +
> +err_clk1:
> + if (tc.bits == 16)
> + clk_put(tc.clk[1]);
> +
> +err_disable_t0:
> + clk_disable_unprepare(tc.clk[0]);
> +
> +err_clk:
> + clk_put(tc.clk[0]);
> +
> + pr_err("%s: unable to register clocksource/clockevent\n",
> + tc.clksrc.name);
> +
> + return err;
> +}
> +
> +static int __init tcb_clksrc_init(struct device_node *node)
> +{
> + const struct of_device_id *match;
> + struct regmap *regmap;
> + void __iomem *tcb_base;
> + u32 channel;
> + int irq, err, chan1 = -1;
> + unsigned bits;
> +
> + if (tc.registered)
> + return -ENODEV;
> +
> + /*
> + * The regmap has to be used to access registers that are shared
> + * between channels on the same TCB but we keep direct IO access for
> + * the counters to avoid the impact on performance
> + */
> + regmap = syscon_node_to_regmap(node->parent);
> + if (IS_ERR(regmap))
> + return PTR_ERR(regmap);
> +
> + tcb_base = of_iomap(node->parent, 0);
> + if (!tcb_base) {
> + pr_err("%s +%d %s\n", __FILE__, __LINE__, __func__);
Remove those debug information and replace them by a proper error message.
> + return -ENXIO;
> + }
> +
> + match = of_match_node(atmel_tcb_dt_ids, node->parent);
> + bits = (uintptr_t)match->data;
> +
> + err = of_property_read_u32_index(node, "reg", 0, &channel);
> + if (err)
> + return err;
> +
> + irq = of_irq_get(node->parent, channel);
> + if (irq < 0) {
if (irq <= 0) {
> + irq = of_irq_get(node->parent, 0);
Why ?
> + if (irq < 0)
if (irq <= 0) {
> + return irq;
> + }
> +
> + if (bits == 16) {
> + of_property_read_u32_index(node, "reg", 1, &chan1);
> + if (chan1 == -1) {
> + pr_err("%s: clocksource needs two channels\n",
> + node->parent->full_name);
Think about it. The code is giving up at this point in the boot process.
So of two things, you consider there is an alternate clocksource /
clockevent or the system hangs:
- If there is an alternate clocksource why support 32bits by chaining
the channels with the cost it introduces instead of using the alternate
one ?
- If there is no alternate clocksource why not support a 16bits less
precise timer and give up with the 32bits emulation and the complexity
it introduces in this driver ?
> + return -EINVAL;
> + }
> + }
> + return tcb_clksrc_register(node, regmap, tcb_base, channel, chan1, irq,
> + bits);
> +}
> +TIMER_OF_DECLARE(atmel_tcb_clksrc, "atmel,tcb-timer", tcb_clksrc_init);
>
--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
Powered by blists - more mailing lists