lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a96858374fb06ba38dd5eea5561cc7542220416e.camel@gmx.de>
Date: Thu, 12 Sep 2024 16:00:25 +0200
From: Mike Galbraith <efault@....de>
To: Peter Zijlstra <peterz@...radead.org>
Cc: Luis Machado <luis.machado@....com>, Dietmar Eggemann
 <dietmar.eggemann@....com>, Vincent Guittot <vincent.guittot@...aro.org>, 
 Hongyan Xia <hongyan.xia2@....com>, mingo@...hat.com,
 juri.lelli@...hat.com, rostedt@...dmis.org,  bsegall@...gle.com,
 mgorman@...e.de, vschneid@...hat.com,  linux-kernel@...r.kernel.org,
 kprateek.nayak@....com, wuyun.abel@...edance.com, 
 youssefesmat@...omium.org, tglx@...utronix.de
Subject: Re: [PATCH 10/24] sched/uclamg: Handle delayed dequeue

On Wed, 2024-09-11 at 11:27 +0200, Mike Galbraith wrote:
> On Wed, 2024-09-11 at 11:13 +0200, Peter Zijlstra wrote:
> > On Wed, Sep 11, 2024 at 11:10:26AM +0200, Mike Galbraith wrote:
> > >
> > > Hm, would be interesting to know how the heck he's triggering that.
> > >
> > > My x86_64 box refuses to produce any such artifacts with anything I've
> > > tossed at it, including full LTP with enterprise RT and !RT configs,
> > > both in master and my local SLE15-SP7 branch.  Hohum.
> >
> > Yeah, my hackbench runs also didn't show that. Perhaps something funny
> > with cgroups. I didn't test cgroup bandwidth for exanple.
>
> That's all on in enterprise configs tested with LTP, so hypothetically
> got some testing.  I also turned on AUTOGROUP in !RT configs so cgroups
> would get some exercise no matter what I'm mucking about with.

Oho, I just hit a pick_eevdf() returns NULL in pick_next_entity() and
we deref it bug in tip that I recall having seen someone else mention
them having hit.  LTP was chugging away doing lord knows what when
evolution apparently decided to check accounts, which didn't go well.

state=TASK_WAKING(?), on_rq=0, on_cpu=1, cfs_rq.nr_running=0

crash> bt -sx
PID: 29024    TASK: ffff9118b7583300  CPU: 1    COMMAND: "pool-evolution"
 #0 [ffffa939dfd0f930] machine_kexec+0x1a0 at ffffffffab886cc0
 #1 [ffffa939dfd0f990] __crash_kexec+0x6a at ffffffffab99496a
 #2 [ffffa939dfd0fa50] crash_kexec+0x23 at ffffffffab994e33
 #3 [ffffa939dfd0fa60] oops_end+0xbe at ffffffffab844b4e
 #4 [ffffa939dfd0fa80] page_fault_oops+0x151 at ffffffffab898fc1
 #5 [ffffa939dfd0fb08] exc_page_fault+0x6b at ffffffffac3a410b
 #6 [ffffa939dfd0fb30] asm_exc_page_fault+0x22 at ffffffffac400ac2
    [exception RIP: pick_task_fair+113]
    RIP: ffffffffab8fb471  RSP: ffffa939dfd0fbe0  RFLAGS: 00010046
    RAX: 0000000000000000  RBX: ffff91180735ee00  RCX: 000b709eab0437d5
    RDX: 0000000000000001  RSI: 0000000000000000  RDI: ffff91180735ee00
    RBP: ffff91180735f400   R8: 00000000000001d9   R9: 0000000000000000
    R10: ffff911a8ecb9380  R11: 0000000000000000  R12: ffff911a8eab89c0
    R13: ffff911a8eab8a40  R14: ffffffffacafc373  R15: ffff9118b7583300
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #7 [ffffa939dfd0fc08] pick_next_task_fair+0x48 at ffffffffab9013b8
 #8 [ffffa939dfd0fc48] __schedule+0x1d9 at ffffffffac3aab39
 #9 [ffffa939dfd0fcf8] schedule+0x24 at ffffffffac3ac084
#10 [ffffa939dfd0fd10] futex_wait_queue+0x63 at ffffffffab98e353
#11 [ffffa939dfd0fd38] __futex_wait+0x139 at ffffffffab98e989
#12 [ffffa939dfd0fdf0] futex_wait+0x6a at ffffffffab98ea5a
#13 [ffffa939dfd0fe80] do_futex+0x88 at ffffffffab98a9f8
#14 [ffffa939dfd0fe90] __x64_sys_futex+0x5e at ffffffffab98ab0e
#15 [ffffa939dfd0ff00] do_syscall_64+0x74 at ffffffffac39ce44
#16 [ffffa939dfd0ff40] entry_SYSCALL_64_after_hwframe+0x4b at ffffffffac4000ac
    RIP: 00007fd6b991a849  RSP: 00007fd6813ff6e8  RFLAGS: 00000246
    RAX: ffffffffffffffda  RBX: 0000000000000a6c  RCX: 00007fd6b991a849
    RDX: 0000000000000a6c  RSI: 0000000000000080  RDI: 00005631abf620c0
    RBP: 00005631abf620b8   R8: 00007fd6bad0a080   R9: 00000000000015fe
    R10: 00007fd6813ff700  R11: 0000000000000246  R12: 00005631abf620b0
    R13: 00005631abf620b0  R14: 00005631abf620b8  R15: 0000000000000000
    ORIG_RAX: 00000000000000ca  CS: 0033  SS: 002b
crash> dis pick_task_fair+113
0xffffffffab8fb471 <pick_task_fair+113>:        cmpb   $0x0,0x51(%rax)
crash> gdb list *pick_task_fair+113
0xffffffffab8fb471 is in pick_task_fair (kernel/sched/fair.c:5639).
5634			SCHED_WARN_ON(cfs_rq->next->sched_delayed);
5635			return cfs_rq->next;
5636		}
5637
5638		struct sched_entity *se = pick_eevdf(cfs_rq);
5639		if (se->sched_delayed) {
5640			dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
5641			SCHED_WARN_ON(se->sched_delayed);
5642			SCHED_WARN_ON(se->on_rq);
5643			return NULL;
crash> task_struct -x 0xffff9118b7583300 | grep "__state ="
  __state = 0x200,
crash> task_struct -x 0xffff9118b7583300 | grep rq
  on_rq = 0x0,
    on_rq = 0x0,
    cfs_rq = 0xffff9117e81a3e00,
    on_rq = 0x0,
    rq = 0x0,
crash> task_struct -xo | grep sched_entity
    [0x80] struct sched_entity se
crash> sched_entity 0xffff9118b7583380
struct sched_entity {
  load = {
    weight = 1048576,
    inv_weight = 4194304
  },
  run_node = {
    __rb_parent_color = 1,
    rb_right = 0x0,
    rb_left = 0x0
  },
  deadline = 5788784166,
  min_vruntime = 5785784166,
  min_slice = 3000000,
  group_node = {
    next = 0xffff9118b75833c0,
    prev = 0xffff9118b75833c0
  },
  on_rq = 0 '\000',
  sched_delayed = 0 '\000',
  rel_deadline = 0 '\000',
  custom_slice = 0 '\000',
  exec_start = 5630407844294,
  sum_exec_runtime = 5031478,
  prev_sum_exec_runtime = 5004139,
  vruntime = 5785811505,
  vlag = 0,
  slice = 3000000,
  nr_migrations = 0,
  depth = 1,
  parent = 0xffff9117e81a0600,
  cfs_rq = 0xffff9117e81a3e00,
  my_q = 0x0,
  runnable_weight = 0,
  avg = {
    last_update_time = 5630386353152,
    load_sum = 2555,
    runnable_sum = 2617274,
    util_sum = 83342,
    period_contrib = 877,
    load_avg = 39,
    runnable_avg = 39,
    util_avg = 1,
    util_est = 2147483760
  }
}
crash> cfs_rq 0xffff9117e81a3e00
struct cfs_rq {
  load = {
    weight = 0,
    inv_weight = 0
  },
  nr_running = 0,
  h_nr_running = 0,
  idle_nr_running = 0,
  idle_h_nr_running = 0,
  h_nr_delayed = 0,
  avg_vruntime = 0,
  avg_load = 0,
  min_vruntime = 5785811505,
  forceidle_seq = 0,
  min_vruntime_fi = 0,
  tasks_timeline = {
    rb_root = {
      rb_node = 0x0
    },
    rb_leftmost = 0x0
  },
  curr = 0xffff9118b7583380,
  next = 0x0,
  avg = {
    last_update_time = 5630386353152,
    load_sum = 2617381,
    runnable_sum = 2617379,
    util_sum = 83417,
    period_contrib = 877,
    load_avg = 39,
    runnable_avg = 39,
    util_avg = 1,
    util_est = 0
  },
  removed = {
    lock = {
      raw_lock = {
        {
          val = {
            counter = 0
          },
          {
            locked = 0 '\000',
            pending = 0 '\000'
          },
          {
            locked_pending = 0,
            tail = 0
          }
        }
      }
    },
    nr = 0,
    load_avg = 0,
    util_avg = 0,
    runnable_avg = 0
  },
  last_update_tg_load_avg = 5630407057919,
  tg_load_avg_contrib = 39,
  propagate = 0,
  prop_runnable_sum = 0,
  h_load = 0,
  last_h_load_update = 4296299815,
  h_load_next = 0x0,
  rq = 0xffff911a8eab89c0,
  on_list = 1,
  leaf_cfs_rq_list = {
    next = 0xffff911794a2d348,
    prev = 0xffff9119ebe62148
  },
  tg = 0xffff91178434a080,
  idle = 0,
  runtime_enabled = 0,
  runtime_remaining = 0,
  throttled_pelt_idle = 0,
  throttled_clock = 0,
  throttled_clock_pelt = 0,
  throttled_clock_pelt_time = 0,
  throttled_clock_self = 0,
  throttled_clock_self_time = 0,
  throttled = 0,
  throttle_count = 0,
  throttled_list = {
    next = 0xffff9117e81a3fa8,
    prev = 0xffff9117e81a3fa8
  },
  throttled_csd_list = {
    next = 0xffff9117e81a3fb8,
    prev = 0xffff9117e81a3fb8
  }
}
crash>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ