lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <SJ0PR02MB8862A7F336A45D8E8B0090C4FEE19@SJ0PR02MB8862.namprd02.prod.outlook.com>
Date:   Thu, 31 Mar 2022 18:17:14 +0000
From:   "Kallol Biswas [C]" <kallol.biswas@...anix.com>
To:     "netdev@...r.kernel.org" <netdev@...r.kernel.org>
Subject: bug in i40e-2.14.13 driver ??

Hi,
     We have been getting a NULL pointer dereference in intel i40e driver.

[  105.551413] BUG: kernel NULL pointer dereference, address: 000000000000000a

PID: 369    TASK: ffff980d62d70000  CPU: 16  COMMAND: "kworker/16:1"
#0 [ffffb0354e26fb00] machine_kexec at ffffffffae059db5
    /usr/src/debug/kernel-5.4.109/linux-5.4.109-2.el7.nutanix.20201105.2244.x86_64/arch/x86/kernel/machine_kexec_64.c: 441
#1 [ffffb0354e26fb50] __crash_kexec at ffffffffae12584d
    /usr/src/debug/kernel-5.4.109/linux-5.4.109-2.el7.nutanix.20201105.2244.x86_64/kernel/kexec_core.c: 957
#2 [ffffb0354e26fc18] crash_kexec at ffffffffae126ab9
    /usr/src/debug/kernel-5.4.109/linux-5.4.109-2.el7.nutanix.20201105.2244.x86_64/include/linux/compiler.h: 292
#3 [ffffb0354e26fc30] oops_end at ffffffffae02a3da
    /usr/src/debug/kernel-5.4.109/linux-5.4.109-2.el7.nutanix.20201105.2244.x86_64/arch/x86/kernel/dumpstack.c: 334
#4 [ffffb0354e26fc50] no_context at ffffffffae065ff8
    /usr/src/debug/kernel-5.4.109/linux-5.4.109-2.el7.nutanix.20201105.2244.x86_64/arch/x86/mm/fault.c: 848
#5 [ffffb0354e26fcc0] do_page_fault at ffffffffae066ad1
    /usr/src/debug/kernel-5.4.109/linux-5.4.109-2.el7.nutanix.20201105.2244.x86_64/arch/x86/mm/fault.c: 1552
#6 [ffffb0354e26fcf0] page_fault at ffffffffae801119
    /usr/src/debug/kernel-5.4.109/linux-5.4.109-2.el7.nutanix.20201105.2244.x86_64/arch/x86/entry/entry_64.S: 1203
    [exception RIP: i40e_detect_recover_hung+116]
    RIP: ffffffffc07ae0d4  RSP: ffffb0354e26fda0  RFLAGS: 00010202
    RAX: ffff980d64e6a000  RBX: ffff980d5b788c00  RCX: ffff980d6f426e08
    RDX: 0000000000000000  RSI: 0000000000000001  RDI: ffff980d5b788800
    RBP: 000000000000003c   R8: 0000000065303469   R9: 8080808080808080
    R10: 0000000000000000  R11: 0000000000000000  R12: ffff980d62d86000
    R13: 00000000ffffffff  R14: 0000000000000000  R15: ffff980d64e6a848
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
    /home/mockbuild/rpmbuild/BUILD/i40e-2.14.13/src/i40e_virtchnl_pf.c: 7253
#7 [ffffb0354e26fdc8] i40e_service_task at ffffffffc078ff9b [i40e]
    /home/mockbuild/rpmbuild/BUILD/i40e-2.14.13/src/i40e_ethtool.c: 5000
#8 [ffffb0354e26fe78] process_one_work at ffffffffae09818b
    /usr/src/debug/kernel-5.4.109/linux-5.4.109-2.el7.nutanix.20201105.2244.x86_64/kernel/workqueue.c: 2271
#9 [ffffb0354e26feb8] worker_thread at ffffffffae098ca9
    /usr/src/debug/kernel-5.4.109/linux-5.4.109-2.el7.nutanix.20201105.2244.x86_64/include/linux/compiler.h: 266
#10 [ffffb0354e26ff10] kthread at ffffffffae09e378
    /usr/src/debug/kernel-5.4.109/linux-5.4.109-2.el7.nutanix.20201105.2244.x86_64/kernel/kthread.c: 268
#11 [ffffb0354e26ff50] ret_from_fork at ffffffffae8001ff
    /usr/src/debug/kernel-5.4.109/linux-5.4.109-2.el7.nutanix.20201105.2244.x86_64/arch/x86/entry/entry_64.S: 352

-------------------------------------------

movzwl 0xa(%rdx),%edx fails as RDX: 0000000000000000  (offset 0xa from 0) causes NULL pointer dereference
4:27
mov    0xe8(%rbx),%rdx program rdx, and %rbx is ffff980d5b788c00
x/x 0xffff980d5b788ce8
0xffff980d5b788ce8:     0x00000000, so %rdx gets programmed with 0.

crash> i40e_vsi.state ffff980d62d86000
  state = {0}
crash> i40e_vsi.netdev ffff980d62d86000
  netdev = 0xffff980d62d87000
crash> num_queue_pairs
crash: command not found: num_queue_pairs
crash> i40e_vsi.num_queue_pairs ffff980d62d86000
  num_queue_pairs = 64
All Tx rings
crash> x/64g 0xffff980d61f11800
0xffff980d61f11800:     0xffff980d61f11c00      0xffff980d61f12000
0xffff980d61f11810:     0xffff980d61f12400      0xffff980d61f12800
0xffff980d61f11820:     0xffff980d61f12c00      0xffff980d61f13000
0xffff980d61f11830:     0xffff980d61f13400      0xffff980d61f13800
0xffff980d61f11840:     0xffff980d61f13c00      0xffff980d61f14000
0xffff980d61f11850:     0xffff980d61f14400      0xffff980d61f14800
0xffff980d61f11860:     0xffff980d61f14c00      0xffff980d61f15000
0xffff980d61f11870:     0xffff980d61f15400      0xffff980d61f15800
0xffff980d61f11880:     0xffff980d61f15c00      0xffff980d61f16000
0xffff980d61f11890:     0xffff980d61f16400      0xffff980d61f16800
0xffff980d61f118a0:     0xffff980d61f16c00      0xffff980d61f17000
0xffff980d61f118b0:     0xffff980d61f17400      0xffff980d61f17800
0xffff980d61f118c0:     0xffff980d61f17c00      0xffff980d5b790000
0xffff980d61f118d0:     0xffff980d5b790400      0xffff980d5b790800
0xffff980d61f118e0:     0xffff980d5b790c00      0xffff980d5b791000
0xffff980d61f118f0:     0xffff980d5b791400      0xffff980d5b791800
0xffff980d61f11900:     0xffff980d5b791c00      0xffff980d5b792000
0xffff980d61f11910:     0xffff980d5b792400      0xffff980d5b792800
0xffff980d61f11920:     0xffff980d5b792c00      0xffff980d5b793000
0xffff980d61f11930:     0xffff980d5b793400      0xffff980d5b793800
0xffff980d61f11940:     0xffff980d5b793c00      0xffff980d5b794000
0xffff980d61f11950:     0xffff980d5b794400      0xffff980d5b794800
0xffff980d61f11960:     0xffff980d5b794c00      0xffff980d5b795000
0xffff980d61f11970:     0xffff980d5b795400      0xffff980d5b795800
0xffff980d61f11980:     0xffff980d5b795c00      0xffff980d5b796000
0xffff980d61f11990:     0xffff980d5b796400      0xffff980d5b796800
0xffff980d61f119a0:     0xffff980d5b796c00      0xffff980d5b797000
0xffff980d61f119b0:     0xffff980d5b797400      0xffff980d5b797800
0xffff980d61f119c0:     0xffff980d5b797c00      0xffff980d5b788000
0xffff980d61f119d0:     0xffff980d5b788400      0xffff980d5b788800
0xffff980d61f119e0:     0xffff980d5b788c00      0xffff980d5b789000
0xffff980d61f119f0:     0xffff980d5b789400      0xffff980d5b789800crash> struct i40e_ring.q_vector 0xffff980d5b788400  q_vector = 0xffff980d61c92800
crash> struct i40e_ring.q_vector 0xffff980d5b788400  
q_vector = 0xffff980d61c92800

crash> struct i40e_ring.q_vector 0xffff980d5b788c00
  q_vector = 0x0

So q_vector is not set after around 60 queues, yet in the driver we do a deference
i40e_force_wb():
(q_vector->reg_idx) and die.

Gdb macro:
define print_i40e_q_vector
    set $vsi = (struct i40e_vsi *)$arg0

    set $q_vectors = $vsi->num_q_vectors

    printf "vsi %p q_vectors %d", $vsi, $q_vectors
    set $index = 0

    while $index < $q_vectors

        set $q_vector = (struct i40e_q_vector *)$vsi->q_vectors[$index]

        printf "num_ringpairs %d\n", $q_vector->num_ringpairs

        set $index += 1
    end


end

Ouput:

crash> print_i40e_q_vector 0xffff980d62d86000
vsi 0xffff980d62d86000 q_vectors 64num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 1
num_ringpairs 0
num_ringpairs 0
num_ringpairs 0
num_ringpairs 0


Source code:

static void i40e_vsi_map_rings_to_vectors(struct i40e_vsi *vsi)
{
  int qp_remaining = vsi->num_queue_pairs;
  int q_vectors = vsi->num_q_vectors;
  int num_ringpairs;
  int v_start = 0;
  int qp_idx = 0;

  /* If we don't have enough vectors for a 1-to-1 mapping, we'll have to
   * group them so there are multiple queues per vector.
   * It is also important to go through all the vectors available to be
   * sure that if we don't use all the vectors, that the remaining vectors
   * are cleared. This is especially important when decreasing the
   * number of queues in use.
   */
  for (; v_start < q_vectors; v_start++) {
    struct i40e_q_vector *q_vector = vsi->q_vectors[v_start];

    num_ringpairs = DIV_ROUND_UP(qp_remaining, q_vectors - v_start);

    q_vector->num_ringpairs = num_ringpairs;
    q_vector->reg_idx = q_vector->v_idx + vsi->base_vector - 1;

    q_vector->rx.count = 0;
    q_vector->tx.count = 0;
    q_vector->rx.ring = NULL;
    q_vector->tx.ring = NULL;

    while (num_ringpairs--) {
      i40e_map_vector_to_qp(vsi, v_start, qp_idx);
      qp_idx++;
      qp_remaining--;
    }
  }
}

How in the above for loop 
    num_ringpairs = DIV_ROUND_UP(qp_remaining, q_vectors - v_start);
evaluates to 0, is not clear.

Have we seen this problem before? If so, is there are fix?

Nucleodyne@...anix
408-718-8164

Nucleodyne@...anix
408-718-8164

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ