lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <1699459797-21703-1-git-send-email-quic_mojha@quicinc.com>
Date:   Wed, 8 Nov 2023 21:39:57 +0530
From:   Mukesh Ojha <quic_mojha@...cinc.com>
To:     <myungjoo.ham@...sung.com>, <kyungmin.park@...sung.com>,
        <cw00.choi@...sung.com>, <jstultz@...gle.com>,
        <tglx@...utronix.de>, <sboyd@...nel.org>, <tj@...nel.org>,
        <jiangshanlai@...il.com>
CC:     <linux-kernel@...r.kernel.org>, <linux-pm@...r.kernel.org>,
        Mukesh Ojha <quic_mojha@...cinc.com>
Subject: timer list corruption in devfreq

Hi,

We are facing an issue on 6.1 kernel while using devfreq framework
and looks like the devfreq_monitor_stop()/devfreq_monitor_start is
vulnerable if frequent governor change is being done from user space
in a loop.

echo simple_ondemand > /sys/class/devfreq/1d84000.ufshc/governor
echo performance > /sys/class/devfreq/1d84000.ufshc/governor

Here, we are using ufs device, but could be any device.

Issue is because same instance of timer is being queued from two
places one from devfreq_monitor() and one from devfreq_monitor_start() as
cancel_delayed_work_sync() from devfreq_monitor_stop() was not
able to delete the delayed work time completely due to which
devfreq_monitor() work rearmed the same timer.

But there looks to be issue in the timer framework where
it was initially discussed in [1] and later fixed in [2]
but not sure being whether is it issue in cancel_delayed_work_sync()
where del_timer() inside try_to_grab_pending() need to be replaced
with timer_delete[_sync]() or devfreq_monitor_stop() need to use
this api's and then delete the work.

[1]
https://lore.kernel.org/all/20220407161745.7d6754b3@gandalf.local.home/

[2]
https://lore.kernel.org/all/20221123201306.823305113@linutronix.de/

In the below ftraces, last two are back to back two timer cancel
operations one from devfreq_monitor_stop() and other from expire_timer().

devfreq_monitor_stop
 cancel_delayed_work_sync
  __cancel_work_timer
    try_to_grab_pending
      del_timer()

00|__hlist_del(inline) == list corruption
-000|detach_timer(inline)
-000|expire_timers(inline) ==>
-000|__run_timers(base = 0xFFFFFF8A70F31240)
-001|run_timer_softirq()
-002|__do_softirq()
-003|____do_softirq()
-004|call_on_irq_stack(asm)
-005|do_softirq_own_stack()
-006|invoke_softirq(inline)

kworker/u16:2-5133    [002]   9436.089773:  timer_start   timer=0xffffff80444f0428  function=__typeid__ZTSFvP10timer_listE_global_addr  expires=0x10022da0d  now=0x10022d9fe  flags=56623106
<idle>-0    [002]   9436.149664:  timer_cancel   timer=0xffffff80444f0428
<idle>-0    [002]   9436.149666:  timer_expire_entry   timer=0xffffff80444f0428  now=0x10022da0d  function=__typeid__ZTSFvP10timer_listE_global_addr  baseclk=0x10022da0d
<idle>-0    [002]   9436.149705:  timer_expire_exit   timer=0xffffff80444f0428
kworker/u16:6-14217    [003]   9436.149776:  timer_start   timer=0xffffff80444f0428  function=__typeid__ZTSFvP10timer_listE_global_addr  expires=0x10022da1c  now=0x10022da0d  flags=119537667
vendor.xxxyyy.ha-1593    [006]   9436.182730:  timer_init   timer=0xffffff80444f0428
vendor.xxxyyy.ha-1593    [006]   9436.182731:  timer_start   timer=0xffffff80444f0428  function=__typeid__ZTSFvP10timer_listE_global_addr  expires=0x10022da24  now=0x10022da15  flags=153092102
vendor.xxxyyy.ha-1593    [004]   9436.188128:  timer_cancel   timer=0xffffff80444f0428
vendor.xxxyyy.ha-1593    [004]   9436.193456:  timer_init   timer=0xffffff80444f0428
vendor.xxxyyy.ha-1593    [004]   9436.193458:  timer_start   timer=0xffffff80444f0428  function=__typeid__ZTSFvP10timer_listE_global_addr  expires=0x10022da26  now=0x10022da17  flags=161480708
vendor.xxxyyy.ha-1593    [005]   9436.199602:  timer_cancel   timer=0xffffff80444f0428
vendor.xxxyyy.ha-1593    [004]   9436.205410:  timer_init   timer=0xffffff80444f0428
vendor.xxxyyy.ha-1593    [004]   9436.205412:  timer_start   timer=0xffffff80444f0428  function=__typeid__ZTSFvP10timer_listE_global_addr  expires=0x10022da29  now=0x10022da1a  flags=174063620
<idle>-0    [003]   9436.209662:  timer_cancel   timer=0xffffff80444f0428
<idle>-0    [003]   9436.209664:  timer_expire_entry   timer=0xffffff80444f0428  now=0x10022da1c  function=__typeid__ZTSFvP10timer_listE_global_addr  baseclk=0x10022da1c
<idle>-0    [003]   9436.209718:  timer_expire_exit   timer=0xffffff80444f0428
kworker/u16:6-14217    [003]   9436.209863:  timer_start   timer=0xffffff80444f0428  function=__typeid__ZTSFvP10timer_listE_global_addr  expires=0x10022da2b  now=0x10022da1c  flags=182452227
vendor.xxxyyy.ha-1593    [004]   9436.209888:  timer_cancel   timer=0xffffff80444f0428
vendor.xxxyyy.ha-1593    [004]   9436.216390:  timer_init   timer=0xffffff80444f0428
vendor.xxxyyy.ha-1593    [004]   9436.216392:  timer_start   timer=0xffffff80444f0428  function=__typeid__ZTSFvP10timer_listE_global_addr  expires=0x10022da2c  now=0x10022da1d  flags=186646532
vendor.xxxyyy.ha-1593    [005]   9436.220992:  timer_cancel   timer=0xffffff80444f0428
xxxyyyTraceManag-7795    [004]   9436.261641:  timer_cancel   timer=0xffffff80444f0428

<1>[10278.515522] [cpu:   0] Unable to handle kernel paging request at virtual address dead00000000012a

-000|__hlist_del(inline)
-000|detach_timer(inline)
-000|expire_timers(inline)
-000|__run_timers(base = 0xFFFFFF8A70F31240)
-001|run_timer_softirq()
-002|__do_softirq()
-003|____do_softirq()
-004|call_on_irq_stack(asm)
-005|do_softirq_own_stack()
-006|invoke_softirq(inline)
-006|__irq_exit_rcu()
-007|irq_exit_rcu()
-008|__el1_irq(inline)
-008|el1_interrupt(regs = 0xFFFFFFC0410FB680, handler = 0xFFFFFFE5222100F8)
-009|el1h_64_irq_handler(regs = ?)
-010|el1h_64_irq(asm)
 -->|exception
-011|arch_local_irq_enable(inline)
-011|raw_spin_rq_unlock_irq(inline)
-011|finish_lock_switch(inline)
-011|finish_task_switch(prev = 0xFFFFFF88D30D8000)
-012|context_switch(inline)
-012|__schedule(:sched_mode = 1091549328)
-013|schedule()
-014|schedule_hrtimeout_range_clock(expires = ?, delta = ?, mode = ?, clock_id = ?)
-015|schedule_hrtimeout_range(expires = 0x0, delta = 0, mode = 1091549520)
-016|poll_schedule_timeout(inline)
-016|do_poll(inline)
-016|do_sys_poll(:ufds = 0x00000072B4397BD8, :nfds = 2, :end_time = 0x0)
-017|__do_sys_ppoll(inline)
-017|__se_sys_ppoll(inline)
-017|__arm64_sys_ppoll(:regs = 0xFFFFFFC0410FBEB0)
-018|__invoke_syscall(inline)
-018|invoke_syscall(:regs = 0xFFFFFFC0410FBEB0, scno = ?, sc_nr = ?, :syscall_table = 0xFFFFFFE5231E0D20)
-019|el0_svc_common(regs = 0xFFFFFFC0410FBEB0, scno = ?, sc_nr = ?, syscall_table = 0xFFFFFFE5231E0D20)
-020|do_el0_svc(regs = 0xFFFFFFC0410FBEB0)
-021|el0_svc(regs = 0xFFFFFFC0410FBEB0)
-022|el0t_64_sync_handler(regs = ?)
-023|el0t_64_sync(asm)
 -->|exception
-024|NUX:0x736F8F15FC(asm)
 ---|end of frame
expire_timers(inline)
expires = 4297461926,
  function = 0xFFFFFFE262CD6E18,
 base = 0xFFFFFF8975713240
 baseclk = 4297461926
 fn = 0x0
 timer = 0xFFFFFF8044AA5430 ->
( entry = (next = 0xDEAD000000000122, pprev = 0xFFFFFFC008003EA0 -> 0xDEAD000000000122 ->
next = 0x0,pprev = 0x0)),
  expires = 4297461926,
  function = 0xFFFFFFE262CD6E18, //delayed_work_timer_fn


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ