Add GPL license file #1

darakian · 2018-04-13T11:23:22Z

No description provided.

Create License file

gregmarsden · 2018-04-13T17:03:12Z

Thanks for the suggestion but this is covered by the toplevel COPYING file, as it is in
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/tree/?h=v4.1.51

Orabug: 27719848 The locking order in fuse should be nn->fc->lock then nn->lock, mis-order locking will cause deadlock. The following deadlock was caused. PID 378084 asked lock in wrong order. PID: 378084 TASK: ffff8825421942c0 CPU: 2 COMMAND: "dbfs_client" #0 [ffff88207f846e70] crash_nmi_callback at ffffffff810326c6 #1 [ffff88207f846e80] notifier_call_chain at ffffffff81513115 #2 [ffff88207f846ec0] atomic_notifier_call_chain at ffffffff8151317a #3 [ffff88207f846ed0] notify_die at ffffffff815131ae #4 [ffff88207f846f00] default_do_nmi at ffffffff815106b9 #5 [ffff88207f846f30] do_nmi at ffffffff81510840 #6 [ffff88207f846f50] nmi at ffffffff8150fc10 [exception RIP: __ticket_spin_lock+25] RIP: ffffffff81040fe9 RSP: ffff8801f6d3b8e8 RFLAGS: 00000297 RAX: 00000000000068f8 RBX: 0000000000021000 RCX: ffff881fbd8e2d50 RDX: 00000000000068f7 RSI: ffff8801f6d3ba78 RDI: ffff883127828000 RBP: ffff8801f6d3b8e8 R8: ffff8801f6d3ba20 R9: 0000000000000001 R10: 0000000000000001 R11: 0000000000000001 R12: ffff883127828000 R13: ffff8801f6d3ba78 R14: ffff881fbd8e2cc4 R15: ffff881fbd8e2cc0 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 --- <NMI exception stack> --- #7 [ffff8801f6d3b8e8] __ticket_spin_lock at ffffffff81040fe9 #8 [ffff8801f6d3b8f0] _raw_spin_lock at ffffffff8150f16e #9 [ffff8801f6d3b900] fuse_get_unique at ffffffffa00fe2ce [fuse] #10 [ffff8801f6d3b920] fuse_read_batch_forget at ffffffffa00fe820 [fuse] #11 [ffff8801f6d3b9a0] fuse_dev_do_read at ffffffffa010052c [fuse] #12 [ffff8801f6d3ba70] fuse_dev_read at ffffffffa0100984 [fuse] #13 [ffff8801f6d3baf0] do_sync_read at ffffffff8116da52 #14 [ffff8801f6d3bc00] vfs_read at ffffffff8116e195 #15 [ffff8801f6d3bc30] sys_read at ffffffff8116e361 #16 [ffff8801f6d3bc80] _read_orig at ffffffffa05f411d [krg_10_5_0_3021_impOEL6-UEK4-smp-x86_64] #17 [ffff8801f6d3bce0] syscall_wrappers_generic_flow_with_param at ffffffffa05f0cc6 [krg_10_5_0_3021_impOEL6-UEK4-smp-x86_64] #18 [ffff8801f6d3bdb0] syscall_wrappers_generic_read.clone.2 at ffffffffa05f136b [krg_10_5_0_3021_impOEL6-UEK4-smp-x86_64] #19 [ffff8801f6d3bee0] SYS_read_common_wrap at ffffffffa05f6085 [krg_10_5_0_3021_impOEL6-UEK4-smp-x86_64] #20 [ffff8801f6d3bf70] SYS_read_wrap64 at ffffffffa05f617e [krg_10_5_0_3021_impOEL6-UEK4-smp-x86_64] #21 [ffff8801f6d3bf80] system_call_fastpath at ffffffff81517622 RIP: 00007f1492a3282d RSP: 00007f148a5f1448 RFLAGS: 00010206 RAX: 0000000000000000 RBX: ffffffff81517622 RCX: 00007f12de0cafd0 RDX: 0000000000021000 RSI: 00007f11e3938550 RDI: 0000000000000004 RBP: 00000000023f1110 R8: 00007ffce2baab50 R9: 000000000005c4e4 R10: 0000000000000024 R11: 0000000000000293 R12: ffffffffa05f617e R13: ffff8801f6d3bf78 R14: 00007f148a5f1e58 R15: 0000000000021000 ORIG_RAX: 0000000000000000 CS: 0033 SS: 002b PID: 38445 TASK: ffff881072a1c600 CPU: 19 COMMAND: "ggcmd" #0 [ffff88407f026e70] crash_nmi_callback at ffffffff810326c6 #1 [ffff88407f026e80] notifier_call_chain at ffffffff81513115 #2 [ffff88407f026ec0] atomic_notifier_call_chain at ffffffff8151317a #3 [ffff88407f026ed0] notify_die at ffffffff815131ae #4 [ffff88407f026f00] default_do_nmi at ffffffff815106b9 #5 [ffff88407f026f30] do_nmi at ffffffff81510840 #6 [ffff88407f026f50] nmi at ffffffff8150fc10 [exception RIP: __ticket_spin_lock+28] RIP: ffffffff81040fec RSP: ffff881070b8fb48 RFLAGS: 00000297 RAX: 000000000000a41c RBX: ffff881fbd8e2cc4 RCX: 0000000000051000 RDX: 000000000000a41b RSI: ffff8811edefac50 RDI: ffff881fbd8e2cc4 RBP: ffff881070b8fb48 R8: ffff8811edefac58 R9: 0000000000000003 R10: ffff88407ffd8e00 R11: 000000000000007d R12: ffff881fbd8e2cc0 R13: ffff8811edefac50 R14: ffff8811edefac58 R15: ffff8811edefac50 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 --- <NMI exception stack> --- #7 [ffff881070b8fb48] __ticket_spin_lock at ffffffff81040fec #8 [ffff881070b8fb50] _raw_spin_lock at ffffffff8150f16e #9 [ffff881070b8fb60] fuse_request_send_background_locked at ffffffffa00ffa97 [fuse] #10 [ffff881070b8fb90] fuse_send_writepage at ffffffffa0108301 [fuse] #11 [ffff881070b8fbc0] fuse_flush_writepages at ffffffffa01083f3 [fuse] #12 [ffff881070b8fc00] fuse_writepage_locked at ffffffffa0108683 [fuse] #13 [ffff881070b8fc60] fuse_writepage at ffffffffa010875e [fuse] #14 [ffff881070b8fc80] __writepage at ffffffff8111a8a7 #15 [ffff881070b8fca0] write_cache_pages at ffffffff8111bc06 #16 [ffff881070b8fdd0] generic_writepages at ffffffff8111bf31 #17 [ffff881070b8fe30] do_writepages at ffffffff8111bf95 #18 [ffff881070b8fe40] __filemap_fdatawrite_range at ffffffff8111166b #19 [ffff881070b8fe90] filemap_fdatawrite at ffffffff8111193f #20 [ffff881070b8fea0] filemap_write_and_wait at ffffffff81111985 #21 [ffff881070b8fec0] fuse_vma_close at ffffffffa010662c [fuse] #22 [ffff881070b8fed0] remove_vma at ffffffff8113c8b3 #23 [ffff881070b8fef0] do_munmap at ffffffff8113e8cf #24 [ffff881070b8ff50] sys_munmap at ffffffff8113e9e6 #25 [ffff881070b8ff80] system_call_fastpath at ffffffff81517622 RIP: 00007f3ed5cc84b7 RSP: 00007f3ed5100950 RFLAGS: 00000216 RAX: 000000000000000b RBX: ffffffff81517622 RCX: 0000000000140070 RDX: 0000000000000000 RSI: 00000000002fe000 RDI: 00007f3ed4abc000 RBP: 00007f3ed4abc1d8 R8: 00000000ffffffff R9: ffffffffffffc4f9 R10: 00000000000ce02f R11: 0000000000000246 R12: 00007f3ed4abc000 R13: 0000000000000000 R14: 00007f3ecc20d950 R15: 00007f3ecc007620 ORIG_RAX: 000000000000000b CS: 0033 SS: 002b OFF-MAINLINE/UEK5: nn->lock was introduced by oracle special fuse numa aware patches. OFF-UEK4: New lock fc->seq_lock was introduced, fc->lock not used in fuse_get_unique(). Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com> Reviewed-by: Ashish Samant <ashish.samant@oracle.com> Signed-off-by: Brian Maly <brian.maly@oracle.com>

Scenario: 1. Port down and do fail over 2. Ap do rds_bind syscall PID: 47039 TASK: ffff89887e2fe640 CPU: 47 COMMAND: "kworker/u:6" #0 [ffff898e35f159f0] machine_kexec at ffffffff8103abf9 #1 [ffff898e35f15a60] crash_kexec at ffffffff810b96e3 #2 [ffff898e35f15b30] oops_end at ffffffff8150f518 #3 [ffff898e35f15b60] no_context at ffffffff8104854c #4 [ffff898e35f15ba0] __bad_area_nosemaphore at ffffffff81048675 #5 [ffff898e35f15bf0] bad_area_nosemaphore at ffffffff810487d3 #6 [ffff898e35f15c00] do_page_fault at ffffffff815120b8 #7 [ffff898e35f15d10] page_fault at ffffffff8150ea95 [exception RIP: unknown or invalid address] RIP: 0000000000000000 RSP: ffff898e35f15dc8 RFLAGS: 00010282 RAX: 00000000fffffffe RBX: ffff889b77f6fc00 RCX:ffffffff81c99d88 RDX: 0000000000000000 RSI: ffff896019ee08e8 RDI:ffff889b77f6fc00 RBP: ffff898e35f15df0 R8: ffff896019ee08c8 R9:0000000000000000 R10: 0000000000000400 R11: 0000000000000000 R12:ffff896019ee08c0 R13: ffff889b77f6fe68 R14: ffffffff81c99d80 R15: ffffffffa022a1e0 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #8 [ffff898e35f15dc8] cma_ndev_work_handler at ffffffffa022a228 [rdma_cm] #9 [ffff898e35f15df8] process_one_work at ffffffff8108a7c6 #10 [ffff898e35f15e58] worker_thread at ffffffff8108bda0 #11 [ffff898e35f15ee8] kthread at ffffffff81090fe6 PID: 45659 TASK: ffff880d313d2500 CPU: 31 COMMAND: "oracle_45659_ap" #0 [ffff881024ccfc98] __schedule at ffffffff8150bac4 #1 [ffff881024ccfd40] schedule at ffffffff8150c2cf #2 [ffff881024ccfd50] __mutex_lock_slowpath at ffffffff8150cee7 #3 [ffff881024ccfdc0] mutex_lock at ffffffff8150cdeb #4 [ffff881024ccfde0] rdma_destroy_id at ffffffffa022a027 [rdma_cm] #5 [ffff881024ccfe10] rds_ib_laddr_check at ffffffffa0357857 [rds_rdma] #6 [ffff881024ccfe50] rds_trans_get_preferred at ffffffffa0324c2a [rds] #7 [ffff881024ccfe80] rds_bind at ffffffffa031d690 [rds] #8 [ffff881024ccfeb0] sys_bind at ffffffff8142a670 PID: 45659 PID: 47039 rds_ib_laddr_check /* create id_priv with a null event_handler */ rdma_create_id rdma_bind_addr cma_acquire_dev /* add id_priv to cma_dev->id_list */ cma_attach_to_dev cma_ndev_work_handler /* event_hanlder is null */ id_priv->id.event_handler Orabug: 27241654 Signed-off-by: Guanglei Li <guanglei.li@oracle.com> Signed-off-by: Honglei Wang <honglei.wang@oracle.com> Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com> Reviewed-by: Yanjun Zhu <yanjun.zhu@oracle.com> Reviewed-by: Leon Romanovsky <leonro@mellanox.com> Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> Acked-by: Doug Ledford <dledford@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> (cherry picked from commit 2c0aa08) Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com> Signed-off-by: Brian Maly <brian.maly@oracle.com>

Orabug: 27760268 The locking order in fuse should be nn->fc->lock then nn->lock, mis-order locking will cause deadlock. The following deadlock was caused. PID 378084 asked lock in wrong order. PID: 378084 TASK: ffff8825421942c0 CPU: 2 COMMAND: "dbfs_client" #0 [ffff88207f846e70] crash_nmi_callback at ffffffff810326c6 #1 [ffff88207f846e80] notifier_call_chain at ffffffff81513115 #2 [ffff88207f846ec0] atomic_notifier_call_chain at ffffffff8151317a #3 [ffff88207f846ed0] notify_die at ffffffff815131ae #4 [ffff88207f846f00] default_do_nmi at ffffffff815106b9 #5 [ffff88207f846f30] do_nmi at ffffffff81510840 #6 [ffff88207f846f50] nmi at ffffffff8150fc10 [exception RIP: __ticket_spin_lock+25] RIP: ffffffff81040fe9 RSP: ffff8801f6d3b8e8 RFLAGS: 00000297 RAX: 00000000000068f8 RBX: 0000000000021000 RCX: ffff881fbd8e2d50 RDX: 00000000000068f7 RSI: ffff8801f6d3ba78 RDI: ffff883127828000 RBP: ffff8801f6d3b8e8 R8: ffff8801f6d3ba20 R9: 0000000000000001 R10: 0000000000000001 R11: 0000000000000001 R12: ffff883127828000 R13: ffff8801f6d3ba78 R14: ffff881fbd8e2cc4 R15: ffff881fbd8e2cc0 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 --- <NMI exception stack> --- #7 [ffff8801f6d3b8e8] __ticket_spin_lock at ffffffff81040fe9 #8 [ffff8801f6d3b8f0] _raw_spin_lock at ffffffff8150f16e #9 [ffff8801f6d3b900] fuse_get_unique at ffffffffa00fe2ce [fuse] #10 [ffff8801f6d3b920] fuse_read_batch_forget at ffffffffa00fe820 [fuse] #11 [ffff8801f6d3b9a0] fuse_dev_do_read at ffffffffa010052c [fuse] #12 [ffff8801f6d3ba70] fuse_dev_read at ffffffffa0100984 [fuse] #13 [ffff8801f6d3baf0] do_sync_read at ffffffff8116da52 #14 [ffff8801f6d3bc00] vfs_read at ffffffff8116e195 #15 [ffff8801f6d3bc30] sys_read at ffffffff8116e361 #16 [ffff8801f6d3bc80] _read_orig at ffffffffa05f411d [krg_10_5_0_3021_impOEL6-UEK4-smp-x86_64] #17 [ffff8801f6d3bce0] syscall_wrappers_generic_flow_with_param at ffffffffa05f0cc6 [krg_10_5_0_3021_impOEL6-UEK4-smp-x86_64] #18 [ffff8801f6d3bdb0] syscall_wrappers_generic_read.clone.2 at ffffffffa05f136b [krg_10_5_0_3021_impOEL6-UEK4-smp-x86_64] #19 [ffff8801f6d3bee0] SYS_read_common_wrap at ffffffffa05f6085 [krg_10_5_0_3021_impOEL6-UEK4-smp-x86_64] #20 [ffff8801f6d3bf70] SYS_read_wrap64 at ffffffffa05f617e [krg_10_5_0_3021_impOEL6-UEK4-smp-x86_64] #21 [ffff8801f6d3bf80] system_call_fastpath at ffffffff81517622 RIP: 00007f1492a3282d RSP: 00007f148a5f1448 RFLAGS: 00010206 RAX: 0000000000000000 RBX: ffffffff81517622 RCX: 00007f12de0cafd0 RDX: 0000000000021000 RSI: 00007f11e3938550 RDI: 0000000000000004 RBP: 00000000023f1110 R8: 00007ffce2baab50 R9: 000000000005c4e4 R10: 0000000000000024 R11: 0000000000000293 R12: ffffffffa05f617e R13: ffff8801f6d3bf78 R14: 00007f148a5f1e58 R15: 0000000000021000 ORIG_RAX: 0000000000000000 CS: 0033 SS: 002b PID: 38445 TASK: ffff881072a1c600 CPU: 19 COMMAND: "ggcmd" #0 [ffff88407f026e70] crash_nmi_callback at ffffffff810326c6 #1 [ffff88407f026e80] notifier_call_chain at ffffffff81513115 #2 [ffff88407f026ec0] atomic_notifier_call_chain at ffffffff8151317a #3 [ffff88407f026ed0] notify_die at ffffffff815131ae #4 [ffff88407f026f00] default_do_nmi at ffffffff815106b9 #5 [ffff88407f026f30] do_nmi at ffffffff81510840 #6 [ffff88407f026f50] nmi at ffffffff8150fc10 [exception RIP: __ticket_spin_lock+28] RIP: ffffffff81040fec RSP: ffff881070b8fb48 RFLAGS: 00000297 RAX: 000000000000a41c RBX: ffff881fbd8e2cc4 RCX: 0000000000051000 RDX: 000000000000a41b RSI: ffff8811edefac50 RDI: ffff881fbd8e2cc4 RBP: ffff881070b8fb48 R8: ffff8811edefac58 R9: 0000000000000003 R10: ffff88407ffd8e00 R11: 000000000000007d R12: ffff881fbd8e2cc0 R13: ffff8811edefac50 R14: ffff8811edefac58 R15: ffff8811edefac50 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 --- <NMI exception stack> --- #7 [ffff881070b8fb48] __ticket_spin_lock at ffffffff81040fec #8 [ffff881070b8fb50] _raw_spin_lock at ffffffff8150f16e #9 [ffff881070b8fb60] fuse_request_send_background_locked at ffffffffa00ffa97 [fuse] #10 [ffff881070b8fb90] fuse_send_writepage at ffffffffa0108301 [fuse] #11 [ffff881070b8fbc0] fuse_flush_writepages at ffffffffa01083f3 [fuse] #12 [ffff881070b8fc00] fuse_writepage_locked at ffffffffa0108683 [fuse] #13 [ffff881070b8fc60] fuse_writepage at ffffffffa010875e [fuse] #14 [ffff881070b8fc80] __writepage at ffffffff8111a8a7 #15 [ffff881070b8fca0] write_cache_pages at ffffffff8111bc06 #16 [ffff881070b8fdd0] generic_writepages at ffffffff8111bf31 #17 [ffff881070b8fe30] do_writepages at ffffffff8111bf95 #18 [ffff881070b8fe40] __filemap_fdatawrite_range at ffffffff8111166b #19 [ffff881070b8fe90] filemap_fdatawrite at ffffffff8111193f #20 [ffff881070b8fea0] filemap_write_and_wait at ffffffff81111985 #21 [ffff881070b8fec0] fuse_vma_close at ffffffffa010662c [fuse] #22 [ffff881070b8fed0] remove_vma at ffffffff8113c8b3 #23 [ffff881070b8fef0] do_munmap at ffffffff8113e8cf #24 [ffff881070b8ff50] sys_munmap at ffffffff8113e9e6 #25 [ffff881070b8ff80] system_call_fastpath at ffffffff81517622 RIP: 00007f3ed5cc84b7 RSP: 00007f3ed5100950 RFLAGS: 00000216 RAX: 000000000000000b RBX: ffffffff81517622 RCX: 0000000000140070 RDX: 0000000000000000 RSI: 00000000002fe000 RDI: 00007f3ed4abc000 RBP: 00007f3ed4abc1d8 R8: 00000000ffffffff R9: ffffffffffffc4f9 R10: 00000000000ce02f R11: 0000000000000246 R12: 00007f3ed4abc000 R13: 0000000000000000 R14: 00007f3ecc20d950 R15: 00007f3ecc007620 ORIG_RAX: 000000000000000b CS: 0033 SS: 002b OFF-MAINLINE/UEK5: nn->lock was introduced by oracle special fuse numa aware patches. OFF-UEK4: New lock fc->seq_lock was introduced, fc->lock not used in fuse_get_unique(). Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com> Signed-off-by: Brian Maly <brian.maly@oracle.com>

commit 3f05317 upstream. syzbot reported a use-after-free of shm_file_data(file)->file->f_op in shm_get_unmapped_area(), called via sys_remap_file_pages(). Unfortunately it couldn't generate a reproducer, but I found a bug which I think caused it. When remap_file_pages() is passed a full System V shared memory segment, the memory is first unmapped, then a new map is created using the ->vm_file. Between these steps, the shm ID can be removed and reused for a new shm segment. But, shm_mmap() only checks whether the ID is currently valid before calling the underlying file's ->mmap(); it doesn't check whether it was reused. Thus it can use the wrong underlying file, one that was already freed. Fix this by making the "outer" shm file (the one that gets put in ->vm_file) hold a reference to the real shm file, and by making __shm_open() require that the file associated with the shm ID matches the one associated with the "outer" file. Taking the reference to the real shm file is needed to fully solve the problem, since otherwise sfd->file could point to a freed file, which then could be reallocated for the reused shm ID, causing the wrong shm segment to be mapped (and without the required permission checks). Commit 1ac0b6d ("ipc/shm: handle removed segments gracefully in shm_mmap()") almost fixed this bug, but it didn't go far enough because it didn't consider the case where the shm ID is reused. The following program usually reproduces this bug: #include <stdlib.h> #include <sys/shm.h> #include <sys/syscall.h> #include <unistd.h> int main() { int is_parent = (fork() != 0); srand(getpid()); for (;;) { int id = shmget(0xF00F, 4096, IPC_CREAT|0700); if (is_parent) { void *addr = shmat(id, NULL, 0); usleep(rand() % 50); while (!syscall(__NR_remap_file_pages, addr, 4096, 0, 0, 0)); } else { usleep(rand() % 50); shmctl(id, IPC_RMID, NULL); } } } It causes the following NULL pointer dereference due to a 'struct file' being used while it's being freed. (I couldn't actually get a KASAN use-after-free splat like in the syzbot report. But I think it's possible with this bug; it would just take a more extraordinary race...) BUG: unable to handle kernel NULL pointer dereference at 0000000000000058 PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 9 PID: 258 Comm: syz_ipc Not tainted 4.16.0-05140-gf8cf2f16a7c95 #189 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014 RIP: 0010:d_inode include/linux/dcache.h:519 [inline] RIP: 0010:touch_atime+0x25/0xd0 fs/inode.c:1724 [...] Call Trace: file_accessed include/linux/fs.h:2063 [inline] shmem_mmap+0x25/0x40 mm/shmem.c:2149 call_mmap include/linux/fs.h:1789 [inline] shm_mmap+0x34/0x80 ipc/shm.c:465 call_mmap include/linux/fs.h:1789 [inline] mmap_region+0x309/0x5b0 mm/mmap.c:1712 do_mmap+0x294/0x4a0 mm/mmap.c:1483 do_mmap_pgoff include/linux/mm.h:2235 [inline] SYSC_remap_file_pages mm/mmap.c:2853 [inline] SyS_remap_file_pages+0x232/0x310 mm/mmap.c:2769 do_syscall_64+0x64/0x1a0 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 [ebiggers@google.com: add comment] Link: http://lkml.kernel.org/r/20180410192850.235835-1-ebiggers3@gmail.com Link: http://lkml.kernel.org/r/20180409043039.28915-1-ebiggers3@gmail.com Reported-by: syzbot+d11f321e7f1923157eac80aa990b446596f46439@syzkaller.appspotmail.com Fixes: c8d78c1 ("mm: replace remap_file_pages() syscall with emulation") Signed-off-by: Eric Biggers <ebiggers@google.com> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Davidlohr Bueso <dbueso@suse.de> Cc: Manfred Spraul <manfred@colorfullife.com> Cc: "Eric W . Biederman" <ebiederm@xmission.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

commit e6e133c upstream. Michael Ellerman reported the following call trace when running ftracetest: BUG: using __this_cpu_write() in preemptible [00000000] code: ftracetest/6178 caller is opt_pre_handler+0xc4/0x110 CPU: 1 PID: 6178 Comm: ftracetest Not tainted 4.15.0-rc7-gcc6x-gb2cd1df #1 Call Trace: [c0000000f9ec39c0] [c000000000ac4304] dump_stack+0xb4/0x100 (unreliable) [c0000000f9ec3a00] [c00000000061159c] check_preemption_disabled+0x15c/0x170 [c0000000f9ec3a90] [c000000000217e84] opt_pre_handler+0xc4/0x110 [c0000000f9ec3af0] [c00000000004cf68] optimized_callback+0x148/0x170 [c0000000f9ec3b40] [c00000000004d954] optinsn_slot+0xec/0x10000 [c0000000f9ec3e30] [c00000000004bae0] kretprobe_trampoline+0x0/0x10 This is showing up since OPTPROBES is now enabled with CONFIG_PREEMPT. trampoline_probe_handler() considers itself to be a special kprobe handler for kretprobes. In doing so, it expects to be called from kprobe_handler() on a trap, and re-enables preemption before returning a non-zero return value so as to suppress any subsequent processing of the trap by the kprobe_handler(). However, with optprobes, we don't deal with special handlers (we ignore the return code) and just try to re-enable preemption causing the above trace. To address this, modify trampoline_probe_handler() to not be special. The only additional processing done in kprobe_handler() is to emulate the instruction (in this case, a 'nop'). We adjust the value of regs->nip for the purpose and delegate the job of re-enabling preemption and resetting current kprobe to the probe handlers (kprobe_handler() or optimized_callback()). Fixes: 8a2d71a ("powerpc/kprobes: Disable preemption before invoking probe handler for optprobes") Cc: stable@vger.kernel.org # v4.15+ Reported-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Acked-by: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

commit a6544a6 upstream. This patch avoids that KASAN reports the following when the SRP initiator calls srp_post_send(): ================================================================== BUG: KASAN: stack-out-of-bounds in rxe_post_send+0x5c4/0x980 [rdma_rxe] Read of size 8 at addr ffff880066606e30 by task 02-mq/1074 CPU: 2 PID: 1074 Comm: 02-mq Not tainted 4.16.0-rc3-dbg+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0x85/0xc7 print_address_description+0x65/0x270 kasan_report+0x231/0x350 rxe_post_send+0x5c4/0x980 [rdma_rxe] srp_post_send.isra.16+0x149/0x190 [ib_srp] srp_queuecommand+0x94d/0x1670 [ib_srp] scsi_dispatch_cmd+0x1c2/0x550 [scsi_mod] scsi_queue_rq+0x843/0xa70 [scsi_mod] blk_mq_dispatch_rq_list+0x143/0xac0 blk_mq_do_dispatch_ctx+0x1c5/0x260 blk_mq_sched_dispatch_requests+0x2bf/0x2f0 __blk_mq_run_hw_queue+0xdb/0x160 __blk_mq_delay_run_hw_queue+0xba/0x100 blk_mq_run_hw_queue+0xf2/0x190 blk_mq_sched_insert_request+0x163/0x2f0 blk_execute_rq+0xb0/0x130 scsi_execute+0x14e/0x260 [scsi_mod] scsi_probe_and_add_lun+0x366/0x13d0 [scsi_mod] __scsi_scan_target+0x18a/0x810 [scsi_mod] scsi_scan_target+0x11e/0x130 [scsi_mod] srp_create_target+0x1522/0x19e0 [ib_srp] kernfs_fop_write+0x180/0x210 __vfs_write+0xb1/0x2e0 vfs_write+0xf6/0x250 SyS_write+0x99/0x110 do_syscall_64+0xee/0x2b0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 The buggy address belongs to the page: page:ffffea0001998180 count:0 mapcount:0 mapping:0000000000000000 index:0x0 flags: 0x4000000000000000() raw: 4000000000000000 0000000000000000 0000000000000000 00000000ffffffff raw: dead000000000100 dead000000000200 0000000000000000 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff880066606d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 ffff880066606d80: f1 00 f2 f2 f2 f2 f2 f2 f2 00 00 f2 f2 f2 f2 f2 >ffff880066606e00: f2 00 00 00 00 00 f2 f2 f2 f3 f3 f3 f3 00 00 00 ^ ffff880066606e80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff880066606f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== Fixes: 8700e3e ("Soft RoCE driver") Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com> Cc: Moni Shoua <monis@mellanox.com> Cc: stable@vger.kernel.org Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

commit 4f86722 upstream. The following NULL dereference results from incorrectly assuming that ndd is valid in this print: struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]); /* * Give up if we don't find an instance of a uuid at each * position (from 0 to nd_region->ndr_mappings - 1), or if we * find a dimm with two instances of the same uuid. */ dev_err(&nd_region->dev, "%s missing label for %pUb\n", dev_name(ndd->dev), nd_label->uuid); BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 IP: nd_region_register_namespaces+0xd67/0x13c0 [libnvdimm] PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 43 PID: 673 Comm: kworker/u609:10 Not tainted 4.16.0-rc4+ #1 [..] RIP: 0010:nd_region_register_namespaces+0xd67/0x13c0 [libnvdimm] [..] Call Trace: ? devres_add+0x2f/0x40 ? devm_kmalloc+0x52/0x60 ? nd_region_activate+0x9c/0x320 [libnvdimm] nd_region_probe+0x94/0x260 [libnvdimm] ? kernfs_add_one+0xe4/0x130 nvdimm_bus_probe+0x63/0x100 [libnvdimm] Switch to using the nvdimm device directly. Fixes: 0e3b0d1 ("libnvdimm, namespace: allow multiple pmem...") Cc: <stable@vger.kernel.org> Reported-by: Dave Jiang <dave.jiang@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

[not upstream as the driver is deleted in 4.16 - gregkh] Whenever poll is called, the reference count is increased but never decreased. This means that on rmmod, the lirc_thread is not stopped, and will trample over freed memory. Zilog/Hauppauge IR driver unloaded BUG: unable to handle kernel paging request at ffffffffc17ba640 Oops: 0010 [#1] SMP CPU: 1 PID: 667 Comm: zilog-rx-i2c-1 Tainted: P C OE 4.13.16-302.fc27.x86_64 #1 Hardware name: Gigabyte Technology Co., Ltd. GA-MA790FXT-UD5P/GA-MA790FXT-UD5P, BIOS F6 08/06/2009 task: ffff964eb452ca00 task.stack: ffffb254414dc000 RIP: 0010:0xffffffffc17ba640 RSP: 0018:ffffb254414dfe78 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff964ec1b35890 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000246 RDI: 0000000000000246 RBP: ffffb254414dff00 R08: 000000000000036e R09: ffff964ecfc8dfd0 R10: ffffb254414dfe78 R11: 00000000000f4240 R12: ffff964ec2bf28a0 R13: ffff964ec1b358a8 R14: ffff964ec1b358d0 R15: ffff964ec1b35800 FS: 0000000000000000(0000) GS:ffff964ecfc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffc17ba640 CR3: 000000023058c000 CR4: 00000000000006e0 Call Trace: kthread+0x125/0x140 ? kthread_park+0x60/0x60 ? do_syscall_64+0x67/0x140 ret_from_fork+0x25/0x30 Code: Bad RIP value. RIP: 0xffffffffc17ba640 RSP: ffffb254414dfe78 CR2: ffffffffc17ba640 Note that zilog-rx-i2c-1 should have exited by now, but hasn't due to the missing put in poll(). This code has been replaced completely in kernel v4.16 by a new driver, see commit acaa34b ("media: rc: implement zilog transmitter"), and commit f95367a ("media: staging: remove lirc_zilog driver"). Cc: stable@vger.kernel.org # v4.15- (all up to and including v4.15) Reported-by: Warren Sturm <warren.sturm@gmail.com> Tested-by: Warren Sturm <warren.sturm@gmail.com> Signed-off-by: Sean Young <sean@mess.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

[ Upstream commit 9d2e650 ] after commit a1ddcbe ("iommu/vt-d: Pass dmar_domain directly into iommu_flush_iotlb_psi", 2015-08-12), we have domain pointer as parameter to iommu_flush_iotlb_psi(), so no need to fetch it from cache again. More importantly, a NULL reference pointer bug is reported on RHEL7 (and it can be reproduced on some old upstream kernels too, e.g., v4.13) by unplugging an 40g nic from a VM (hard to test unplug on real host, but it should be the same): https://bugzilla.redhat.com/show_bug.cgi?id=1531367 [ 24.391863] pciehp 0000:00:03.0:pcie004: Slot(0): Attention button pressed [ 24.393442] pciehp 0000:00:03.0:pcie004: Slot(0): Powering off due to button press [ 29.721068] i40evf 0000:01:00.0: Unable to send opcode 2 to PF, err I40E_ERR_QUEUE_EMPTY, aq_err OK [ 29.783557] iommu: Removing device 0000:01:00.0 from group 3 [ 29.784662] BUG: unable to handle kernel NULL pointer dereference at 0000000000000304 [ 29.785817] IP: iommu_flush_iotlb_psi+0xcf/0x120 [ 29.786486] PGD 0 [ 29.786487] P4D 0 [ 29.786812] [ 29.787390] Oops: 0000 [#1] SMP [ 29.787876] Modules linked in: ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_ng [ 29.795371] CPU: 0 PID: 156 Comm: kworker/0:2 Not tainted 4.13.0 #14 [ 29.796366] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.11.0-1.el7 04/01/2014 [ 29.797593] Workqueue: pciehp-0 pciehp_power_thread [ 29.798328] task: ffff94f5745b4a00 task.stack: ffffb326805ac000 [ 29.799178] RIP: 0010:iommu_flush_iotlb_psi+0xcf/0x120 [ 29.799919] RSP: 0018:ffffb326805afbd0 EFLAGS: 00010086 [ 29.800666] RAX: ffff94f5bc56e800 RBX: 0000000000000000 RCX: 0000000200000025 [ 29.801667] RDX: ffff94f5bc56e000 RSI: 0000000000000082 RDI: 0000000000000000 [ 29.802755] RBP: ffffb326805afbf8 R08: 0000000000000000 R09: ffff94f5bc86bbf0 [ 29.803772] R10: ffffb326805afba8 R11: 00000000000ffdc4 R12: ffff94f5bc86a400 [ 29.804789] R13: 0000000000000000 R14: 00000000ffdc4000 R15: 0000000000000000 [ 29.805792] FS: 0000000000000000(0000) GS:ffff94f5bfc00000(0000) knlGS:0000000000000000 [ 29.806923] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 29.807736] CR2: 0000000000000304 CR3: 000000003499d000 CR4: 00000000000006f0 [ 29.808747] Call Trace: [ 29.809156] flush_unmaps_timeout+0x126/0x1c0 [ 29.809800] domain_exit+0xd6/0x100 [ 29.810322] device_notifier+0x6b/0x70 [ 29.810902] notifier_call_chain+0x4a/0x70 [ 29.812822] __blocking_notifier_call_chain+0x47/0x60 [ 29.814499] blocking_notifier_call_chain+0x16/0x20 [ 29.816137] device_del+0x233/0x320 [ 29.817588] pci_remove_bus_device+0x6f/0x110 [ 29.819133] pci_stop_and_remove_bus_device+0x1a/0x20 [ 29.820817] pciehp_unconfigure_device+0x7a/0x1d0 [ 29.822434] pciehp_disable_slot+0x52/0xe0 [ 29.823931] pciehp_power_thread+0x8a/0xa0 [ 29.825411] process_one_work+0x18c/0x3a0 [ 29.826875] worker_thread+0x4e/0x3b0 [ 29.828263] kthread+0x109/0x140 [ 29.829564] ? process_one_work+0x3a0/0x3a0 [ 29.831081] ? kthread_park+0x60/0x60 [ 29.832464] ret_from_fork+0x25/0x30 [ 29.833794] Code: 85 ed 74 0b 5b 41 5c 41 5d 41 5e 41 5f 5d c3 49 8b 54 24 60 44 89 f8 0f b6 c4 48 8b 04 c2 48 85 c0 74 49 45 0f b6 ff 4a 8b 3c f8 <80> bf [ 29.838514] RIP: iommu_flush_iotlb_psi+0xcf/0x120 RSP: ffffb326805afbd0 [ 29.840362] CR2: 0000000000000304 [ 29.841716] ---[ end trace b10ec0d6900868d3 ]--- This patch fixes that problem if applied to v4.13 kernel. The bug does not exist on latest upstream kernel since it's fixed as a side effect of commit 13cf017 ("iommu/vt-d: Make use of iova deferred flushing", 2017-08-15). But IMHO it's still good to have this patch upstream. CC: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Peter Xu <peterx@redhat.com> Fixes: a1ddcbe ("iommu/vt-d: Pass dmar_domain directly into iommu_flush_iotlb_psi") Reviewed-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Joerg Roedel <jroedel@suse.de> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

[ Upstream commit 5bdd0c6 ] If jffs2_iget() fails for a newly-allocated inode, jffs2_do_clear_inode() can get called twice in the error handling path, the first call in jffs2_iget() itself and the second through iget_failed(). This can result to a use-after-free error in the second jffs2_do_clear_inode() call, such as shown by the oops below wherein the second jffs2_do_clear_inode() call was trying to free node fragments that were already freed in the first jffs2_do_clear_inode() call. [ 78.178860] jffs2: error: (1904) jffs2_do_read_inode_internal: CRC failed for read_inode of inode 24 at physical location 0x1fc00c [ 78.178914] Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b6b7b [ 78.185871] pgd = ffffffc03a567000 [ 78.188794] [6b6b6b6b6b6b6b7b] *pgd=0000000000000000, *pud=0000000000000000 [ 78.194968] Internal error: Oops: 96000004 [#1] PREEMPT SMP ... [ 78.513147] PC is at rb_first_postorder+0xc/0x28 [ 78.516503] LR is at jffs2_kill_fragtree+0x28/0x90 [jffs2] [ 78.520672] pc : [<ffffff8008323d28>] lr : [<ffffff8000eb1cc8>] pstate: 60000105 [ 78.526757] sp : ffffff800cea38f0 [ 78.528753] x29: ffffff800cea38f0 x28: ffffffc01f3f8e80 [ 78.532754] x27: 0000000000000000 x26: ffffff800cea3c70 [ 78.536756] x25: 00000000dc67c8ae x24: ffffffc033d6945d [ 78.540759] x23: ffffffc036811740 x22: ffffff800891a5b8 [ 78.544760] x21: 0000000000000000 x20: 0000000000000000 [ 78.548762] x19: ffffffc037d48910 x18: ffffff800891a588 [ 78.552764] x17: 0000000000000800 x16: 0000000000000c00 [ 78.556766] x15: 0000000000000010 x14: 6f2065646f6e695f [ 78.560767] x13: 6461657220726f66 x12: 2064656c69616620 [ 78.564769] x11: 435243203a6c616e x10: 7265746e695f6564 [ 78.568771] x9 : 6f6e695f64616572 x8 : ffffffc037974038 [ 78.572774] x7 : bbbbbbbbbbbbbbbb x6 : 0000000000000008 [ 78.576775] x5 : 002f91d85bd44a2f x4 : 0000000000000000 [ 78.580777] x3 : 0000000000000000 x2 : 000000403755e000 [ 78.584779] x1 : 6b6b6b6b6b6b6b6b x0 : 6b6b6b6b6b6b6b6b ... [ 79.038551] [<ffffff8008323d28>] rb_first_postorder+0xc/0x28 [ 79.042962] [<ffffff8000eb5578>] jffs2_do_clear_inode+0x88/0x100 [jffs2] [ 79.048395] [<ffffff8000eb9ddc>] jffs2_evict_inode+0x3c/0x48 [jffs2] [ 79.053443] [<ffffff8008201ca8>] evict+0xb0/0x168 [ 79.056835] [<ffffff8008202650>] iput+0x1c0/0x200 [ 79.060228] [<ffffff800820408c>] iget_failed+0x30/0x3c [ 79.064097] [<ffffff8000eba0c0>] jffs2_iget+0x2d8/0x360 [jffs2] [ 79.068740] [<ffffff8000eb0a60>] jffs2_lookup+0xe8/0x130 [jffs2] [ 79.073434] [<ffffff80081f1a28>] lookup_slow+0x118/0x190 [ 79.077435] [<ffffff80081f4708>] walk_component+0xfc/0x28c [ 79.081610] [<ffffff80081f4dd0>] path_lookupat+0x84/0x108 [ 79.085699] [<ffffff80081f5578>] filename_lookup+0x88/0x100 [ 79.089960] [<ffffff80081f572c>] user_path_at_empty+0x58/0x6c [ 79.094396] [<ffffff80081ebe14>] vfs_statx+0xa4/0x114 [ 79.098138] [<ffffff80081ec44c>] SyS_newfstatat+0x58/0x98 [ 79.102227] [<ffffff800808354c>] __sys_trace_return+0x0/0x4 [ 79.106489] Code: d65f03c0 f9400001 b40000e1 aa0103e0 (f9400821) The jffs2_do_clear_inode() call in jffs2_iget() is unnecessary since iget_failed() will eventually call jffs2_do_clear_inode() if needed, so just remove it. Fixes: 5451f79 ("iget: stop JFFS2 from using iget() and read_inode()") Reviewed-by: Richard Weinberger <richard@nod.at> Signed-off-by: Jake Daryll Obina <jake.obina@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

[ Upstream commit 202a0a7 ] When the frame check sequence (FCS) is split across the last two frames of a fragmented packet, part of the FCS gets counted twice, once when subtracting the FCS, and again when subtracting the previously received data. For example, if 1602 bytes are received, and the first fragment contains the first 1600 bytes (including the first two bytes of the FCS), and the second fragment contains the last two bytes of the FCS: 'skb->len == 1600' from the first fragment size = lstatus & BD_LENGTH_MASK; # 1602 size -= ETH_FCS_LEN; # 1598 size -= skb->len; # -2 Since the size is unsigned, it wraps around and causes a BUG later in the packet handling, as shown below: kernel BUG at ./include/linux/skbuff.h:2068! Oops: Exception in kernel mode, sig: 5 [#1] ... NIP [c021ec60] skb_pull+0x24/0x44 LR [c01e2fbc] gfar_clean_rx_ring+0x498/0x690 Call Trace: [df7edeb0] [c01e2c1c] gfar_clean_rx_ring+0xf8/0x690 (unreliable) [df7edf20] [c01e33a8] gfar_poll_rx_sq+0x3c/0x9c [df7edf40] [c023352c] net_rx_action+0x21c/0x274 [df7edf90] [c0329000] __do_softirq+0xd8/0x240 [df7edff0] [c000c108] call_do_irq+0x24/0x3c [c0597e90] [c00041dc] do_IRQ+0x64/0xc4 [c0597eb0] [c000d920] ret_from_except+0x0/0x18 --- interrupt: 501 at arch_cpu_idle+0x24/0x5c Change the size to a signed integer and then trim off any part of the FCS that was received prior to the last fragment. Fixes: 6c389fc ("gianfar: fix size of scatter-gathered frames") Signed-off-by: Andy Spencer <aspencer@spacex.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

[ Upstream commit 3b454ad ] In the current design, khugepaged needs to acquire mmap_sem before scanning an mm. But in some corner cases, khugepaged may scan a process which is modifying its memory mapping, so khugepaged blocks in uninterruptible state. But the process might hold the mmap_sem for a long time when modifying a huge memory space and it may trigger the below khugepaged hung issue: INFO: task khugepaged:270 blocked for more than 120 seconds. Tainted: G E 4.9.65-006.ali3000.alios7.x86_64 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. khugepaged D 0 270 2 0x00000000 ffff883f3deae4c0 0000000000000000 ffff883f610596c0 ffff883f7d359440 ffff883f63818000 ffffc90019adfc78 ffffffff817079a5 d67e5aa8c1860a64 0000000000000246 ffff883f7d359440 ffffc90019adfc88 ffff883f610596c0 Call Trace: schedule+0x36/0x80 rwsem_down_read_failed+0xf0/0x150 call_rwsem_down_read_failed+0x18/0x30 down_read+0x20/0x40 khugepaged+0x476/0x11d0 kthread+0xe6/0x100 ret_from_fork+0x25/0x30 So it sounds pointless to just block khugepaged waiting for the semaphore so replace down_read() with down_read_trylock() to move to scan the next mm quickly instead of just blocking on the semaphore so that other processes can get more chances to install THP. Then khugepaged can come back to scan the skipped mm when it has finished the current round full_scan. And it appears that the change can improve khugepaged efficiency a little bit. Below is the test result when running LTP on a 24 cores 4GB memory 2 nodes NUMA VM: pristine w/ trylock full_scan 197 187 pages_collapsed 21 26 thp_fault_alloc 40818 44466 thp_fault_fallback 18413 16679 thp_collapse_alloc 21 150 thp_collapse_alloc_failed 14 16 thp_file_alloc 369 369 [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: tweak comment] [arnd@arndb.de: avoid uninitialized variable use] Link: http://lkml.kernel.org/r/20171215125129.2948634-1-arnd@arndb.de Link: http://lkml.kernel.org/r/1513281203-54878-1-git-send-email-yang.s@alibaba-inc.com Signed-off-by: Yang Shi <yang.s@alibaba-inc.com> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Hugh Dickins <hughd@google.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

[ Upstream commit 2c0aa08 ] Scenario: 1. Port down and do fail over 2. Ap do rds_bind syscall PID: 47039 TASK: ffff89887e2fe640 CPU: 47 COMMAND: "kworker/u:6" #0 [ffff898e35f159f0] machine_kexec at ffffffff8103abf9 #1 [ffff898e35f15a60] crash_kexec at ffffffff810b96e3 #2 [ffff898e35f15b30] oops_end at ffffffff8150f518 #3 [ffff898e35f15b60] no_context at ffffffff8104854c #4 [ffff898e35f15ba0] __bad_area_nosemaphore at ffffffff81048675 #5 [ffff898e35f15bf0] bad_area_nosemaphore at ffffffff810487d3 #6 [ffff898e35f15c00] do_page_fault at ffffffff815120b8 #7 [ffff898e35f15d10] page_fault at ffffffff8150ea95 [exception RIP: unknown or invalid address] RIP: 0000000000000000 RSP: ffff898e35f15dc8 RFLAGS: 00010282 RAX: 00000000fffffffe RBX: ffff889b77f6fc00 RCX:ffffffff81c99d88 RDX: 0000000000000000 RSI: ffff896019ee08e8 RDI:ffff889b77f6fc00 RBP: ffff898e35f15df0 R8: ffff896019ee08c8 R9:0000000000000000 R10: 0000000000000400 R11: 0000000000000000 R12:ffff896019ee08c0 R13: ffff889b77f6fe68 R14: ffffffff81c99d80 R15: ffffffffa022a1e0 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #8 [ffff898e35f15dc8] cma_ndev_work_handler at ffffffffa022a228 [rdma_cm] #9 [ffff898e35f15df8] process_one_work at ffffffff8108a7c6 #10 [ffff898e35f15e58] worker_thread at ffffffff8108bda0 #11 [ffff898e35f15ee8] kthread at ffffffff81090fe6 PID: 45659 TASK: ffff880d313d2500 CPU: 31 COMMAND: "oracle_45659_ap" #0 [ffff881024ccfc98] __schedule at ffffffff8150bac4 #1 [ffff881024ccfd40] schedule at ffffffff8150c2cf #2 [ffff881024ccfd50] __mutex_lock_slowpath at ffffffff8150cee7 #3 [ffff881024ccfdc0] mutex_lock at ffffffff8150cdeb #4 [ffff881024ccfde0] rdma_destroy_id at ffffffffa022a027 [rdma_cm] #5 [ffff881024ccfe10] rds_ib_laddr_check at ffffffffa0357857 [rds_rdma] #6 [ffff881024ccfe50] rds_trans_get_preferred at ffffffffa0324c2a [rds] #7 [ffff881024ccfe80] rds_bind at ffffffffa031d690 [rds] #8 [ffff881024ccfeb0] sys_bind at ffffffff8142a670 PID: 45659 PID: 47039 rds_ib_laddr_check /* create id_priv with a null event_handler */ rdma_create_id rdma_bind_addr cma_acquire_dev /* add id_priv to cma_dev->id_list */ cma_attach_to_dev cma_ndev_work_handler /* event_hanlder is null */ id_priv->id.event_handler Signed-off-by: Guanglei Li <guanglei.li@oracle.com> Signed-off-by: Honglei Wang <honglei.wang@oracle.com> Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com> Reviewed-by: Yanjun Zhu <yanjun.zhu@oracle.com> Reviewed-by: Leon Romanovsky <leonro@mellanox.com> Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> Acked-by: Doug Ledford <dledford@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

[ Upstream commit e7bde88 ] The OPAL IMC driver's shutdown handler disables nest PMU counters by walking nodes and taking the first CPU out of their cpumask, which is used to index into the paca (get_hard_smp_processor_id()). This does not always do the right thing, and in particular for CPU-less nodes it returns NR_CPUS and that overruns the paca and dereferences random memory. Fix it by being more careful about checking returned CPU, and only using online CPUs. It's not clear this shutdown code makes sense after commit 885dcd7 ("powerpc/perf: Add nest IMC PMU support"), but this should not make things worse Currently the bug causes us to call OPAL with a junk CPU number. A separate patch in development to change the way pacas are allocated escalates this bug into a crash: Unable to handle kernel paging request for data at address 0x2a21af1eeb000076 Faulting instruction address: 0xc0000000000a5468 Oops: Kernel access of bad area, sig: 11 [#1] ... NIP opal_imc_counters_shutdown+0x148/0x1d0 LR opal_imc_counters_shutdown+0x134/0x1d0 Call Trace: opal_imc_counters_shutdown+0x134/0x1d0 (unreliable) platform_drv_shutdown+0x44/0x60 device_shutdown+0x1f8/0x350 kernel_restart_prepare+0x54/0x70 kernel_restart+0x28/0xc0 SyS_reboot+0x1d0/0x2c0 system_call+0x58/0x6c Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

[ Upstream commit b6dd4d8 ] The pr_debug() in gic-v3 gic_send_sgi() can trigger a circular locking warning: GICv3: CPU10: ICC_SGI1R_EL1 5000400 ====================================================== WARNING: possible circular locking dependency detected 4.15.0+ #1 Tainted: G W ------------------------------------------------------ dynamic_debug01/1873 is trying to acquire lock: ((console_sem).lock){-...}, at: [<0000000099c891ec>] down_trylock+0x20/0x4c but task is already holding lock: (&rq->lock){-.-.}, at: [<00000000842e1587>] __task_rq_lock+0x54/0xdc which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&rq->lock){-.-.}: __lock_acquire+0x3b4/0x6e0 lock_acquire+0xf4/0x2a8 _raw_spin_lock+0x4c/0x60 task_fork_fair+0x3c/0x148 sched_fork+0x10c/0x214 copy_process.isra.32.part.33+0x4e8/0x14f0 _do_fork+0xe8/0x78c kernel_thread+0x48/0x54 rest_init+0x34/0x2a4 start_kernel+0x45c/0x488 -> #1 (&p->pi_lock){-.-.}: __lock_acquire+0x3b4/0x6e0 lock_acquire+0xf4/0x2a8 _raw_spin_lock_irqsave+0x58/0x70 try_to_wake_up+0x48/0x600 wake_up_process+0x28/0x34 __up.isra.0+0x60/0x6c up+0x60/0x68 __up_console_sem+0x4c/0x7c console_unlock+0x328/0x634 vprintk_emit+0x25c/0x390 dev_vprintk_emit+0xc4/0x1fc dev_printk_emit+0x88/0xa8 __dev_printk+0x58/0x9c _dev_info+0x84/0xa8 usb_new_device+0x100/0x474 hub_port_connect+0x280/0x92c hub_event+0x740/0xa84 process_one_work+0x240/0x70c worker_thread+0x60/0x400 kthread+0x110/0x13c ret_from_fork+0x10/0x18 -> #0 ((console_sem).lock){-...}: validate_chain.isra.34+0x6e4/0xa20 __lock_acquire+0x3b4/0x6e0 lock_acquire+0xf4/0x2a8 _raw_spin_lock_irqsave+0x58/0x70 down_trylock+0x20/0x4c __down_trylock_console_sem+0x3c/0x9c console_trylock+0x20/0xb0 vprintk_emit+0x254/0x390 vprintk_default+0x58/0x90 vprintk_func+0xbc/0x164 printk+0x80/0xa0 __dynamic_pr_debug+0x84/0xac gic_raise_softirq+0x184/0x18c smp_cross_call+0xac/0x218 smp_send_reschedule+0x3c/0x48 resched_curr+0x60/0x9c check_preempt_curr+0x70/0xdc wake_up_new_task+0x310/0x470 _do_fork+0x188/0x78c SyS_clone+0x44/0x50 __sys_trace_return+0x0/0x4 other info that might help us debug this: Chain exists of: (console_sem).lock --> &p->pi_lock --> &rq->lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&rq->lock); lock(&p->pi_lock); lock(&rq->lock); lock((console_sem).lock); *** DEADLOCK *** 2 locks held by dynamic_debug01/1873: #0: (&p->pi_lock){-.-.}, at: [<000000001366df53>] wake_up_new_task+0x40/0x470 #1: (&rq->lock){-.-.}, at: [<00000000842e1587>] __task_rq_lock+0x54/0xdc stack backtrace: CPU: 10 PID: 1873 Comm: dynamic_debug01 Tainted: G W 4.15.0+ #1 Hardware name: GIGABYTE R120-T34-00/MT30-GS2-00, BIOS T48 10/02/2017 Call trace: dump_backtrace+0x0/0x188 show_stack+0x24/0x2c dump_stack+0xa4/0xe0 print_circular_bug.isra.31+0x29c/0x2b8 check_prev_add.constprop.39+0x6c8/0x6dc validate_chain.isra.34+0x6e4/0xa20 __lock_acquire+0x3b4/0x6e0 lock_acquire+0xf4/0x2a8 _raw_spin_lock_irqsave+0x58/0x70 down_trylock+0x20/0x4c __down_trylock_console_sem+0x3c/0x9c console_trylock+0x20/0xb0 vprintk_emit+0x254/0x390 vprintk_default+0x58/0x90 vprintk_func+0xbc/0x164 printk+0x80/0xa0 __dynamic_pr_debug+0x84/0xac gic_raise_softirq+0x184/0x18c smp_cross_call+0xac/0x218 smp_send_reschedule+0x3c/0x48 resched_curr+0x60/0x9c check_preempt_curr+0x70/0xdc wake_up_new_task+0x310/0x470 _do_fork+0x188/0x78c SyS_clone+0x44/0x50 __sys_trace_return+0x0/0x4 GICv3: CPU0: ICC_SGI1R_EL1 12000 This could be fixed with printk_deferred() but that might lessen its usefulness for debugging. So change it to pr_devel to keep it out of production kernels. Developers working on gic-v3 can enable it as needed in their kernels. Signed-off-by: Mark Salter <msalter@redhat.com> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

commit 75a4598 upstream. mlx5 modify_qp() relies on FW that the error will be thrown if wrong state is supplied. The missing check in FW causes the following crash while using XRC_TGT QPs. [ 14.769632] BUG: unable to handle kernel NULL pointer dereference at (null) [ 14.771085] IP: mlx5_ib_modify_qp+0xf60/0x13f0 [ 14.771894] PGD 800000001472e067 P4D 800000001472e067 PUD 14529067 PMD 0 [ 14.773126] Oops: 0002 [#1] SMP PTI [ 14.773763] CPU: 0 PID: 365 Comm: ubsan Not tainted 4.16.0-rc1-00038-g8151138c0793 #119 [ 14.775192] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 [ 14.777522] RIP: 0010:mlx5_ib_modify_qp+0xf60/0x13f0 [ 14.778417] RSP: 0018:ffffbf48001c7bd8 EFLAGS: 00010246 [ 14.779346] RAX: 0000000000000000 RBX: ffff9a8f9447d400 RCX: 0000000000000000 [ 14.780643] RDX: 0000000000000000 RSI: 000000000000000a RDI: 0000000000000000 [ 14.781930] RBP: 0000000000000000 R08: 00000000000217b0 R09: ffffffffbc9c1504 [ 14.783214] R10: fffff4a180519480 R11: ffff9a8f94523600 R12: ffff9a8f9493e240 [ 14.784507] R13: ffff9a8f9447d738 R14: 000000000000050a R15: 0000000000000000 [ 14.785800] FS: 00007f545b466700(0000) GS:ffff9a8f9fc00000(0000) knlGS:0000000000000000 [ 14.787073] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 14.787792] CR2: 0000000000000000 CR3: 00000000144be000 CR4: 00000000000006b0 [ 14.788689] Call Trace: [ 14.789007] _ib_modify_qp+0x71/0x120 [ 14.789475] modify_qp.isra.20+0x207/0x2f0 [ 14.790010] ib_uverbs_modify_qp+0x90/0xe0 [ 14.790532] ib_uverbs_write+0x1d2/0x3c0 [ 14.791049] ? __handle_mm_fault+0x93c/0xe40 [ 14.791644] __vfs_write+0x36/0x180 [ 14.792096] ? handle_mm_fault+0xc1/0x210 [ 14.792601] vfs_write+0xad/0x1e0 [ 14.793018] SyS_write+0x52/0xc0 [ 14.793422] do_syscall_64+0x75/0x180 [ 14.793888] entry_SYSCALL_64_after_hwframe+0x21/0x86 [ 14.794527] RIP: 0033:0x7f545ad76099 [ 14.794975] RSP: 002b:00007ffd78787468 EFLAGS: 00000287 ORIG_RAX: 0000000000000001 [ 14.795958] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f545ad76099 [ 14.797075] RDX: 0000000000000078 RSI: 0000000020009000 RDI: 0000000000000003 [ 14.798140] RBP: 00007ffd78787470 R08: 00007ffd78787480 R09: 00007ffd78787480 [ 14.799207] R10: 00007ffd78787480 R11: 0000000000000287 R12: 00005599ada98760 [ 14.800277] R13: 00007ffd78787560 R14: 0000000000000000 R15: 0000000000000000 [ 14.801341] Code: 4c 8b 1c 24 48 8b 83 70 02 00 00 48 c7 83 cc 02 00 00 00 00 00 00 48 c7 83 24 03 00 00 00 00 00 00 c7 83 2c 03 00 00 00 00 00 00 <c7> 00 00 00 00 00 48 8b 83 70 02 00 00 c7 40 04 00 00 00 00 4c [ 14.804012] RIP: mlx5_ib_modify_qp+0xf60/0x13f0 RSP: ffffbf48001c7bd8 [ 14.804838] CR2: 0000000000000000 [ 14.805288] ---[ end trace 3f1da0df5c8b7c37 ]--- Cc: syzkaller <syzkaller@googlegroups.com> Reported-by: Maor Gottlieb <maorg@mellanox.com> Signed-off-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Doug Ledford <dledford@redhat.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

[ Upstream commit a957fa1 ] In case of seg6 in encap mode, seg6_do_srh_encap() calls set_tun_src() in order to set the src addr of outer IPv6 header. The net_device is required for set_tun_src(). However calling ip6_dst_idev() on dst_entry in case of IPv4 traffic results on the following bug. Using just dst->dev should fix this BUG. [ 196.242461] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [ 196.242975] PGD 800000010f076067 P4D 800000010f076067 PUD 10f060067 PMD 0 [ 196.243329] Oops: 0000 [#1] SMP PTI [ 196.243468] Modules linked in: nfsd auth_rpcgss nfs_acl nfs lockd grace fscache sunrpc crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd input_leds glue_helper led_class pcspkr serio_raw mac_hid video autofs4 hid_generic usbhid hid e1000 i2c_piix4 ahci pata_acpi libahci [ 196.244362] CPU: 2 PID: 1089 Comm: ping Not tainted 4.16.0+ #1 [ 196.244606] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 196.244968] RIP: 0010:seg6_do_srh_encap+0x1ac/0x300 [ 196.245236] RSP: 0018:ffffb2ce00b23a60 EFLAGS: 00010202 [ 196.245464] RAX: 0000000000000000 RBX: ffff8c7f53eea300 RCX: 0000000000000000 [ 196.245742] RDX: 0000f10000000000 RSI: ffff8c7f52085a6c RDI: ffff8c7f41166850 [ 196.246018] RBP: ffffb2ce00b23aa8 R08: 00000000000261e0 R09: ffff8c7f41166800 [ 196.246294] R10: ffffdce5040ac780 R11: ffff8c7f41166828 R12: ffff8c7f41166808 [ 196.246570] R13: ffff8c7f52085a44 R14: ffffffffb73211c0 R15: ffff8c7e69e44200 [ 196.246846] FS: 00007fc448789700(0000) GS:ffff8c7f59d00000(0000) knlGS:0000000000000000 [ 196.247286] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 196.247526] CR2: 0000000000000000 CR3: 000000010f05a000 CR4: 00000000000406e0 [ 196.247804] Call Trace: [ 196.247972] seg6_do_srh+0x15b/0x1c0 [ 196.248156] seg6_output+0x3c/0x220 [ 196.248341] ? prandom_u32+0x14/0x20 [ 196.248526] ? ip_idents_reserve+0x6c/0x80 [ 196.248723] ? __ip_select_ident+0x90/0x100 [ 196.248923] ? ip_append_data.part.50+0x6c/0xd0 [ 196.249133] lwtunnel_output+0x44/0x70 [ 196.249328] ip_send_skb+0x15/0x40 [ 196.249515] raw_sendmsg+0x8c3/0xac0 [ 196.249701] ? _copy_from_user+0x2e/0x60 [ 196.249897] ? rw_copy_check_uvector+0x53/0x110 [ 196.250106] ? _copy_from_user+0x2e/0x60 [ 196.250299] ? copy_msghdr_from_user+0xce/0x140 [ 196.250508] sock_sendmsg+0x36/0x40 [ 196.250690] ___sys_sendmsg+0x292/0x2a0 [ 196.250881] ? _cond_resched+0x15/0x30 [ 196.251074] ? copy_termios+0x1e/0x70 [ 196.251261] ? _copy_to_user+0x22/0x30 [ 196.251575] ? tty_mode_ioctl+0x1c3/0x4e0 [ 196.251782] ? _cond_resched+0x15/0x30 [ 196.251972] ? mutex_lock+0xe/0x30 [ 196.252152] ? vvar_fault+0xd2/0x110 [ 196.252337] ? __do_fault+0x1f/0xc0 [ 196.252521] ? __handle_mm_fault+0xc1f/0x12d0 [ 196.252727] ? __sys_sendmsg+0x63/0xa0 [ 196.252919] __sys_sendmsg+0x63/0xa0 [ 196.253107] do_syscall_64+0x72/0x200 [ 196.253305] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 196.253530] RIP: 0033:0x7fc4480b0690 [ 196.253715] RSP: 002b:00007ffde9f252f8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 196.254053] RAX: ffffffffffffffda RBX: 0000000000000040 RCX: 00007fc4480b0690 [ 196.254331] RDX: 0000000000000000 RSI: 000000000060a360 RDI: 0000000000000003 [ 196.254608] RBP: 00007ffde9f253f0 R08: 00000000002d1e81 R09: 0000000000000002 [ 196.254884] R10: 00007ffde9f250c0 R11: 0000000000000246 R12: 0000000000b22070 [ 196.255205] R13: 20c49ba5e353f7cf R14: 431bde82d7b634db R15: 00007ffde9f278fe [ 196.255484] Code: a5 0f b6 45 c0 41 88 41 28 41 0f b6 41 2c 48 c1 e0 04 49 8b 54 01 38 49 8b 44 01 30 49 89 51 20 49 89 41 18 48 8b 83 b0 00 00 00 <48> 8b 30 49 8b 86 08 0b 00 00 48 8b 40 20 48 8b 50 08 48 0b 10 [ 196.256190] RIP: seg6_do_srh_encap+0x1ac/0x300 RSP: ffffb2ce00b23a60 [ 196.256445] CR2: 0000000000000000 [ 196.256676] ---[ end trace 71af7d093603885c ]--- Fixes: 8936ef7 ("ipv6: sr: fix NULL pointer dereference when setting encap source address") Signed-off-by: Ahmed Abdelsalam <amsalam20@gmail.com> Acked-by: David Lebrun <dlebrun@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

[ Upstream commit 4fb0534 ] When parsing the options provided by the user space, team_nl_cmd_options_set() insert them in a temporary list to send multiple events with a single message. While each option's attribute is correctly validated, the code does not check for duplicate entries before inserting into the event list. Exploiting the above, the syzbot was able to trigger the following splat: kernel BUG at lib/list_debug.c:31! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 4466 Comm: syzkaller556835 Not tainted 4.16.0+ #17 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__list_add_valid+0xaa/0xb0 lib/list_debug.c:29 RSP: 0018:ffff8801b04bf248 EFLAGS: 00010286 RAX: 0000000000000058 RBX: ffff8801c8fc7a90 RCX: 0000000000000000 RDX: 0000000000000058 RSI: ffffffff815fbf41 RDI: ffffed0036097e3f RBP: ffff8801b04bf260 R08: ffff8801b0b2a700 R09: ffffed003b604f90 R10: ffffed003b604f90 R11: ffff8801db027c87 R12: ffff8801c8fc7a90 R13: ffff8801c8fc7a90 R14: dffffc0000000000 R15: 0000000000000000 FS: 0000000000b98880(0000) GS:ffff8801db000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000043fc30 CR3: 00000001afe8e000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __list_add include/linux/list.h:60 [inline] list_add include/linux/list.h:79 [inline] team_nl_cmd_options_set+0x9ff/0x12b0 drivers/net/team/team.c:2571 genl_family_rcv_msg+0x889/0x1120 net/netlink/genetlink.c:599 genl_rcv_msg+0xc6/0x170 net/netlink/genetlink.c:624 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2448 genl_rcv+0x28/0x40 net/netlink/genetlink.c:635 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x58b/0x740 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x9f0/0xfa0 net/netlink/af_netlink.c:1901 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:639 ___sys_sendmsg+0x805/0x940 net/socket.c:2117 __sys_sendmsg+0x115/0x270 net/socket.c:2155 SYSC_sendmsg net/socket.c:2164 [inline] SyS_sendmsg+0x29/0x30 net/socket.c:2162 do_syscall_64+0x29e/0x9d0 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x4458b9 RSP: 002b:00007ffd1d4a7278 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 000000000000001b RCX: 00000000004458b9 RDX: 0000000000000010 RSI: 0000000020000d00 RDI: 0000000000000004 RBP: 00000000004a74ed R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000213 R12: 00007ffd1d4a7348 R13: 0000000000402a60 R14: 0000000000000000 R15: 0000000000000000 Code: 75 e8 eb a9 48 89 f7 48 89 75 e8 e8 d1 85 7b fe 48 8b 75 e8 eb bb 48 89 f2 48 89 d9 4c 89 e6 48 c7 c7 a0 84 d8 87 e8 ea 67 28 fe <0f> 0b 0f 1f 40 00 48 b8 00 00 00 00 00 fc ff df 55 48 89 e5 41 RIP: __list_add_valid+0xaa/0xb0 lib/list_debug.c:29 RSP: ffff8801b04bf248 This changeset addresses the avoiding list_add() if the current option is already present in the event list. Reported-and-tested-by: syzbot+4d4af685432dc0e56c91@syzkaller.appspotmail.com Signed-off-by: Paolo Abeni <pabeni@redhat.com> Fixes: 2fcdb2c ("team: allow to send multiple set events in one message") Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

commit 3180dab upstream. Add DELAY_INIT quirk to fix the following problem with HP v222w 16GB Mini: usb 1-3: unable to read config index 0 descriptor/start: -110 usb 1-3: can't read configurations, error -110 usb 1-3: can't set config #1, error -110 Signed-off-by: Kamil Lulko <kamilx.lulko@intel.com> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Cc: stable <stable@vger.kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Orabug: 27952054 Before the nl REMOVE msg has been sent to the userspace, the ring's and other resources have been released, but the userspace maybe still using them. And then we can see the crash messages like: ring broken, not handling completions BUG: unable to handle kernel paging request at ffffffffffffffd0 IP: tcmu_handle_completions+0x134/0x2f0 [target_core_user] PGD 11bdc0c067 P4D 11bdc0c067 PUD 11bdc0e067 PMD 0 Oops: 0000 [#1] SMP cmd_id not found, ring is broken RIP: 0010:tcmu_handle_completions+0x134/0x2f0 [target_core_user] RSP: 0018:ffffb8a2d8983d88 EFLAGS: 00010296 RAX: 0000000000000000 RBX: ffffb8a2aaa4e000 RCX: 00000000ffffffff RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000220 R10: 0000000076c71401 R11: ffff8d2e76c713f0 R12: ffffb8a2aad56bc0 R13: 000000000000001c R14: ffff8d2e32c90000 R15: ffff8d2e76c713f0 FS: 00007f411ffff700(0000) GS:ffff8d1e7fdc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd0 CR3: 0000001027070000 CR4: 00000000001406e0 Call Trace: ? tcmu_irqcontrol+0x2a/0x40 [target_core_user] ? uio_write+0x7b/0xc0 [uio] ? __vfs_write+0x37/0x150 ? __getnstimeofday64+0x3b/0xd0 ? vfs_write+0xb2/0x1b0 ? syscall_trace_enter+0x1d0/0x2b0 ? SyS_write+0x55/0xc0 ? do_syscall_64+0x67/0x150 ? entry_SYSCALL64_slow_path+0x25/0x25 Code: 41 5d 41 5e 41 5f 5d c3 83 f8 01 0f 85 cf 01 00 00 48 8b 7d d0 e8 dd 5c 1d f3 41 0f b7 74 24 04 48 8b 7d c8 31 d2 e8 5c c7 1b f3 <48> 8b 7d d0 49 89 c7 c6 07 00 0f 1f 40 00 4d 85 ff 0f 84 82 01 RIP: tcmu_handle_completions+0x134/0x2f0 [target_core_user] RSP: ffffb8a2d8983d88 CR2: ffffffffffffffd0 And the crash also could happen in tcmu_page_fault and other places. Signed-off-by: Zhang Zhuoyu <zhangzhuoyu@cmss.chinamobile.com> Signed-off-by: Xiubo Li <lixiubo@cmss.chinamobile.com> Reviewed-by: Mike Christie <mchristi@redhat.com> Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org> (cherry picked from commit c22adc0) Signed-off-by: Ashish Samant <ashish.samant@oracle.com> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>

…ation for array index commit 52759c0 upstream. At a commit f91c9d7 ('ALSA: firewire-lib: cache maximum length of payload to reduce function calls'), maximum size of payload for tx isochronous packet is cached to reduce the number of function calls. This cache was programmed to updated at a first callback of ohci1394 IR context. However, the maximum size is required to queueing packets before starting the isochronous context. As a result, the cached value is reused to queue packets in next time to starting the isochronous context. Then the cache is updated in a first callback of the isochronous context. This can cause kernel NULL pointer dereference in a below call graph: (sound/firewire/amdtp-stream.c) amdtp_stream_start() ->queue_in_packet() ->queue_packet() (drivers/firewire/core-iso.c) ->fw_iso_context_queue() ->struct fw_card_driver.queue_iso() (drivers/firewire/ohci.c) = ohci_queue_iso() ->queue_iso_packet_per_buffer() buffer->pages[page] The issued dereference occurs in a case that: - target unit supports different stream formats for sampling transmission frequency. - maximum length of payload for tx stream in a first trial is bigger than the length in a second trial. In this case, correct number of pages are allocated for DMA and the 'pages' array has enough elements, while index of the element is wrongly calculated according to the old value of length of payload in a call of 'queue_in_packet()'. Then it causes the issue. This commit fixes the critical bug. This affects all of drivers in ALSA firewire stack in Linux kernel v4.12 or later. [12665.302360] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030 [12665.302415] IP: ohci_queue_iso+0x47c/0x800 [firewire_ohci] [12665.302439] PGD 0 [12665.302440] P4D 0 [12665.302450] [12665.302470] Oops: 0000 [#1] SMP PTI [12665.302487] Modules linked in: ... [12665.303096] CPU: 1 PID: 12760 Comm: jackd Tainted: P OE 4.13.0-38-generic #43-Ubuntu [12665.303154] Hardware name: /DH77DF, BIOS KCH7710H.86A.0069.2012.0224.1825 02/24/2012 [12665.303215] task: ffff9ce87da2ae80 task.stack: ffffb5b8823d0000 [12665.303258] RIP: 0010:ohci_queue_iso+0x47c/0x800 [firewire_ohci] [12665.303301] RSP: 0018:ffffb5b8823d3ab8 EFLAGS: 00010086 [12665.303337] RAX: ffff9ce4f4876930 RBX: 0000000000000008 RCX: ffff9ce88a3955e0 [12665.303384] RDX: 0000000000000000 RSI: 0000000034877f00 RDI: 0000000000000000 [12665.303427] RBP: ffffb5b8823d3b68 R08: ffff9ce8ccb390a0 R09: ffff9ce877639ab0 [12665.303475] R10: 0000000000000108 R11: 0000000000000000 R12: 0000000000000003 [12665.303513] R13: 0000000000000000 R14: ffff9ce4f4876950 R15: 0000000000000000 [12665.303554] FS: 00007f2ec467f8c0(0000) GS:ffff9ce8df280000(0000) knlGS:0000000000000000 [12665.303600] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [12665.303633] CR2: 0000000000000030 CR3: 00000002dcf90004 CR4: 00000000000606e0 [12665.303674] Call Trace: [12665.303698] fw_iso_context_queue+0x18/0x20 [firewire_core] [12665.303735] queue_packet+0x88/0xe0 [snd_firewire_lib] [12665.303770] amdtp_stream_start+0x19b/0x270 [snd_firewire_lib] [12665.303811] start_streams+0x276/0x3c0 [snd_dice] [12665.303840] snd_dice_stream_start_duplex+0x1bf/0x480 [snd_dice] [12665.303882] ? vma_gap_callbacks_rotate+0x1e/0x30 [12665.303914] ? __rb_insert_augmented+0xab/0x240 [12665.303936] capture_prepare+0x3c/0x70 [snd_dice] [12665.303961] snd_pcm_do_prepare+0x1d/0x30 [snd_pcm] [12665.303985] snd_pcm_action_single+0x3b/0x90 [snd_pcm] [12665.304009] snd_pcm_action_nonatomic+0x68/0x70 [snd_pcm] [12665.304035] snd_pcm_prepare+0x68/0x90 [snd_pcm] [12665.304058] snd_pcm_common_ioctl1+0x4c0/0x940 [snd_pcm] [12665.304083] snd_pcm_capture_ioctl1+0x19b/0x250 [snd_pcm] [12665.304108] snd_pcm_capture_ioctl+0x27/0x40 [snd_pcm] [12665.304131] do_vfs_ioctl+0xa8/0x630 [12665.304148] ? entry_SYSCALL_64_after_hwframe+0xe9/0x139 [12665.304172] ? entry_SYSCALL_64_after_hwframe+0xe2/0x139 [12665.304195] ? entry_SYSCALL_64_after_hwframe+0xdb/0x139 [12665.304218] ? entry_SYSCALL_64_after_hwframe+0xd4/0x139 [12665.304242] ? entry_SYSCALL_64_after_hwframe+0xcd/0x139 [12665.304265] ? entry_SYSCALL_64_after_hwframe+0xc6/0x139 [12665.304288] ? entry_SYSCALL_64_after_hwframe+0xbf/0x139 [12665.304312] ? entry_SYSCALL_64_after_hwframe+0xb8/0x139 [12665.304335] ? entry_SYSCALL_64_after_hwframe+0xb1/0x139 [12665.304358] SyS_ioctl+0x79/0x90 [12665.304374] ? entry_SYSCALL_64_after_hwframe+0x72/0x139 [12665.304397] entry_SYSCALL_64_fastpath+0x24/0xab [12665.304417] RIP: 0033:0x7f2ec3750ef7 [12665.304433] RSP: 002b:00007fff99e31388 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [12665.304465] RAX: ffffffffffffffda RBX: 00007fff99e312f0 RCX: 00007f2ec3750ef7 [12665.304494] RDX: 0000000000000000 RSI: 0000000000004140 RDI: 0000000000000007 [12665.304522] RBP: 0000556ebc63fd60 R08: 0000556ebc640560 R09: 0000000000000000 [12665.304553] R10: 0000000000000001 R11: 0000000000000246 R12: 0000556ebc63fcf0 [12665.304584] R13: 0000000000000000 R14: 0000000000000007 R15: 0000000000000000 [12665.304612] Code: 01 00 00 44 89 eb 45 31 ed 45 31 db 66 41 89 1e 66 41 89 5e 0c 66 45 89 5e 0e 49 8b 49 08 49 63 d4 4d 85 c0 49 63 ff 48 8b 14 d1 <48> 8b 72 30 41 8d 14 37 41 89 56 04 48 63 d3 0f 84 ce 00 00 00 [12665.304713] RIP: ohci_queue_iso+0x47c/0x800 [firewire_ohci] RSP: ffffb5b8823d3ab8 [12665.304743] CR2: 0000000000000030 [12665.317701] ---[ end trace 9d55b056dd52a19f ]--- Fixes: f91c9d7 ('ALSA: firewire-lib: cache maximum length of payload to reduce function calls') Cc: <stable@vger.kernel.org> # v4.12+ Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp> Signed-off-by: Takashi Iwai <tiwai@suse.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

commit af8a41c upstream. Some HP laptops have only a single wifi antenna. This would not be a problem except that they were shipped with an incorrectly encoded EFUSE. It should have been possible to open the computer and transfer the antenna connection to the other terminal except that such action might void the warranty, and moving the antenna broke the Windows driver. The fix was to add a module option that would override the EFUSE encoding. That was done with commit c18d8f5 ("rtlwifi: rtl8723be: Add antenna select module parameter"). There was still a problem with Bluetooth coexistence, which was addressed with commit baa1702 ("rtlwifi: btcoexist: Implement antenna selection"). There were still problems, thus there were commit 0ff78ad ("rtlwifi: rtl8723be: fix ant_sel code") and commit 6d62269 ("rtlwifi: btcoexist: Fix antenna selection code"). Despite all these attempts at fixing the problem, the code is not yet right. A proper fix is important as there are now instances of laptops having RTL8723DE chips with the same problem. The module parameter ant_sel is used to control antenna number and path. At present enum ANT_{X2,X1} is used to define the antenna number, but this choice is not intuitive, thus change to a new enum ANT_{MAIN,AUX} to make it more readable. This change showed examples where incorrect values were used. It was also possible to remove a workaround in halbtcoutsrc.c. The experimental results with single antenna connected to specific path are now as follows: ant_sel ANT_MAIN(#1) ANT_AUX(#2) 0 -8 -62 1 -62 -10 2 -6 -60 Signed-off-by: Ping-Ke Shih <pkshih@realtek.com> Fixes: c18d8f5 ("rtlwifi: rtl8723be: Add antenna select module parameter") Fixes: baa1702 ("rtlwifi: btcoexist: Implement antenna selection") Fixes: 0ff78ad ("rtlwifi: rtl8723be: fix ant_sel code") Fixes: 6d62269 ("rtlwifi: btcoexist: Fix antenna selection code") Cc: Stable <stable@vger.kernel.org> # 4.7+ Reviewed-by: Larry Finger <Larry.Finger@lwfinger.net> Signed-off-by: Kalle Valo <kvalo@codeaurora.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

commit 9f0a93d upstream. When the module is removed the led workqueue is destroyed in the remove callback, before the led device is unregistered from the led subsystem. This leads to a NULL pointer derefence when the led device is unregistered automatically later as part of the module removal cleanup. Bellow is the backtrace showing the problem. BUG: unable to handle kernel NULL pointer dereference at (null) IP: __queue_work+0x8c/0x410 PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI Modules linked in: ccm edac_mce_amd kvm_amd kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 joydev crypto_simd asus_nb_wmi glue_helper uvcvideo snd_hda_codec_conexant snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_intel asus_wmi snd_hda_codec cryptd snd_hda_core sparse_keymap videobuf2_vmalloc arc4 videobuf2_memops snd_hwdep input_leds videobuf2_v4l2 ath9k psmouse videobuf2_core videodev ath9k_common snd_pcm ath9k_hw media fam15h_power ath k10temp snd_timer mac80211 i2c_piix4 r8169 mii mac_hid cfg80211 asus_wireless(-) snd soundcore wmi shpchp 8250_dw ip_tables x_tables amdkfd amd_iommu_v2 amdgpu radeon chash i2c_algo_bit drm_kms_helper syscopyarea serio_raw sysfillrect sysimgblt fb_sys_fops ahci ttm libahci drm video CPU: 3 PID: 2177 Comm: rmmod Not tainted 4.15.0-5-generic #6+dev94.b4287e5bem1-Endless Hardware name: ASUSTeK COMPUTER INC. X555DG/X555DG, BIOS 5.011 05/05/2015 RIP: 0010:__queue_work+0x8c/0x410 RSP: 0018:ffffbe8cc249fcd8 EFLAGS: 00010086 RAX: ffff992ac6810800 RBX: 0000000000000000 RCX: 0000000000000008 RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff992ac6400e18 RBP: ffffbe8cc249fd18 R08: ffff992ac6400db0 R09: 0000000000000000 R10: 0000000000000040 R11: ffff992ac6400dd8 R12: 0000000000002000 R13: ffff992abd762e00 R14: ffff992abd763e38 R15: 000000000001ebe0 FS: 00007f318203e700(0000) GS:ffff992aced80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000001c720e000 CR4: 00000000001406e0 Call Trace: queue_work_on+0x38/0x40 led_state_set+0x2c/0x40 [asus_wireless] led_set_brightness_nopm+0x14/0x40 led_set_brightness+0x37/0x60 led_trigger_set+0xfc/0x1d0 led_classdev_unregister+0x32/0xd0 devm_led_classdev_release+0x11/0x20 release_nodes+0x109/0x1f0 devres_release_all+0x3c/0x50 device_release_driver_internal+0x16d/0x220 driver_detach+0x3f/0x80 bus_remove_driver+0x55/0xd0 driver_unregister+0x2c/0x40 acpi_bus_unregister_driver+0x15/0x20 asus_wireless_driver_exit+0x10/0xb7c [asus_wireless] SyS_delete_module+0x1da/0x2b0 entry_SYSCALL_64_fastpath+0x24/0x87 RIP: 0033:0x7f3181b65fd7 RSP: 002b:00007ffe74bcbe18 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3181b65fd7 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000555ea2559258 RBP: 0000555ea25591f0 R08: 00007ffe74bcad91 R09: 000000000000000a R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000003 R13: 00007ffe74bcae00 R14: 0000000000000000 R15: 0000555ea25591f0 Code: 01 00 00 02 0f 85 7d 01 00 00 48 63 45 d4 48 c7 c6 00 f4 fa 87 49 8b 9d 08 01 00 00 48 03 1c c6 4c 89 f7 e8 87 fb ff ff 48 85 c0 <48> 8b 3b 0f 84 c5 01 00 00 48 39 f8 0f 84 bc 01 00 00 48 89 c7 RIP: __queue_work+0x8c/0x410 RSP: ffffbe8cc249fcd8 CR2: 0000000000000000 ---[ end trace 7aa4f4a232e9c39c ]--- Unregistering the led device on the remove callback before destroying the workqueue avoids this problem. https://bugzilla.kernel.org/show_bug.cgi?id=196097 Reported-by: Dun Hum <bitter.taste@gmx.com> Cc: stable@vger.kernel.org Signed-off-by: João Paulo Rechi Vita <jprvita@endlessm.com> Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

commit 5c64576 upstream. syzkaller reports for wrong rtnl_lock usage in sync code [1] and [2] We have 2 problems in start_sync_thread if error path is taken, eg. on memory allocation error or failure to configure sockets for mcast group or addr/port binding: 1. recursive locking: holding rtnl_lock while calling sock_release which in turn calls again rtnl_lock in ip_mc_drop_socket to leave the mcast group, as noticed by Florian Westphal. Additionally, sock_release can not be called while holding sync_mutex (ABBA deadlock). 2. task hung: holding rtnl_lock while calling kthread_stop to stop the running kthreads. As the kthreads do the same to leave the mcast group (sock_release -> ip_mc_drop_socket -> rtnl_lock) they hang. Fix the problems by calling rtnl_unlock early in the error path, now sock_release is called after unlocking both mutexes. Problem 3 (task hung reported by syzkaller [2]) is variant of problem 2: use _trylock to prevent one user to call rtnl_lock and then while waiting for sync_mutex to block kthreads that execute sock_release when they are stopped by stop_sync_thread. [1] IPVS: stopping backup sync thread 4500 ... WARNING: possible recursive locking detected 4.16.0-rc7+ #3 Not tainted -------------------------------------------- syzkaller688027/4497 is trying to acquire lock: (rtnl_mutex){+.+.}, at: [<00000000bb14d7fb>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 but task is already holding lock: IPVS: stopping backup sync thread 4495 ... (rtnl_mutex){+.+.}, at: [<00000000bb14d7fb>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(rtnl_mutex); lock(rtnl_mutex); *** DEADLOCK *** May be due to missing lock nesting notation 2 locks held by syzkaller688027/4497: #0: (rtnl_mutex){+.+.}, at: [<00000000bb14d7fb>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 #1: (ipvs->sync_mutex){+.+.}, at: [<00000000703f78e3>] do_ip_vs_set_ctl+0x10f8/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2388 stack backtrace: CPU: 1 PID: 4497 Comm: syzkaller688027 Not tainted 4.16.0-rc7+ #3 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x24d lib/dump_stack.c:53 print_deadlock_bug kernel/locking/lockdep.c:1761 [inline] check_deadlock kernel/locking/lockdep.c:1805 [inline] validate_chain kernel/locking/lockdep.c:2401 [inline] __lock_acquire+0xe8f/0x3e00 kernel/locking/lockdep.c:3431 lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:3920 __mutex_lock_common kernel/locking/mutex.c:756 [inline] __mutex_lock+0x16f/0x1a80 kernel/locking/mutex.c:893 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908 rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 ip_mc_drop_socket+0x88/0x230 net/ipv4/igmp.c:2643 inet_release+0x4e/0x1c0 net/ipv4/af_inet.c:413 sock_release+0x8d/0x1e0 net/socket.c:595 start_sync_thread+0x2213/0x2b70 net/netfilter/ipvs/ip_vs_sync.c:1924 do_ip_vs_set_ctl+0x1139/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2389 nf_sockopt net/netfilter/nf_sockopt.c:106 [inline] nf_setsockopt+0x67/0xc0 net/netfilter/nf_sockopt.c:115 ip_setsockopt+0x97/0xa0 net/ipv4/ip_sockglue.c:1261 udp_setsockopt+0x45/0x80 net/ipv4/udp.c:2406 sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2975 SYSC_setsockopt net/socket.c:1849 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1828 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x446a69 RSP: 002b:00007fa1c3a64da8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000446a69 RDX: 000000000000048b RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00000000006e29fc R08: 0000000000000018 R09: 0000000000000000 R10: 00000000200000c0 R11: 0000000000000246 R12: 00000000006e29f8 R13: 00676e697279656b R14: 00007fa1c3a659c0 R15: 00000000006e2b60 [2] IPVS: sync thread started: state = BACKUP, mcast_ifn = syz_tun, syncid = 4, id = 0 IPVS: stopping backup sync thread 25415 ... INFO: task syz-executor7:25421 blocked for more than 120 seconds. Not tainted 4.16.0-rc6+ #284 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. syz-executor7 D23688 25421 4408 0x00000004 Call Trace: context_switch kernel/sched/core.c:2862 [inline] __schedule+0x8fb/0x1ec0 kernel/sched/core.c:3440 schedule+0xf5/0x430 kernel/sched/core.c:3499 schedule_timeout+0x1a3/0x230 kernel/time/timer.c:1777 do_wait_for_common kernel/sched/completion.c:86 [inline] __wait_for_common kernel/sched/completion.c:107 [inline] wait_for_common kernel/sched/completion.c:118 [inline] wait_for_completion+0x415/0x770 kernel/sched/completion.c:139 kthread_stop+0x14a/0x7a0 kernel/kthread.c:530 stop_sync_thread+0x3d9/0x740 net/netfilter/ipvs/ip_vs_sync.c:1996 do_ip_vs_set_ctl+0x2b1/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2394 nf_sockopt net/netfilter/nf_sockopt.c:106 [inline] nf_setsockopt+0x67/0xc0 net/netfilter/nf_sockopt.c:115 ip_setsockopt+0x97/0xa0 net/ipv4/ip_sockglue.c:1253 sctp_setsockopt+0x2ca/0x63e0 net/sctp/socket.c:4154 sock_common_setsockopt+0x95/0xd0 net/core/sock.c:3039 SYSC_setsockopt net/socket.c:1850 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1829 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x454889 RSP: 002b:00007fc927626c68 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 00007fc9276276d4 RCX: 0000000000454889 RDX: 000000000000048c RSI: 0000000000000000 RDI: 0000000000000017 RBP: 000000000072bf58 R08: 0000000000000018 R09: 0000000000000000 R10: 0000000020000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 000000000000051c R14: 00000000006f9b40 R15: 0000000000000001 Showing all locks held in the system: 2 locks held by khungtaskd/868: #0: (rcu_read_lock){....}, at: [<00000000a1a8f002>] check_hung_uninterruptible_tasks kernel/hung_task.c:175 [inline] #0: (rcu_read_lock){....}, at: [<00000000a1a8f002>] watchdog+0x1c5/0xd60 kernel/hung_task.c:249 #1: (tasklist_lock){.+.+}, at: [<0000000037c2f8f9>] debug_show_all_locks+0xd3/0x3d0 kernel/locking/lockdep.c:4470 1 lock held by rsyslogd/4247: #0: (&f->f_pos_lock){+.+.}, at: [<000000000d8d6983>] __fdget_pos+0x12b/0x190 fs/file.c:765 2 locks held by getty/4338: #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 2 locks held by getty/4339: #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 2 locks held by getty/4340: #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 2 locks held by getty/4341: #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 2 locks held by getty/4342: #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 2 locks held by getty/4343: #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 2 locks held by getty/4344: #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 3 locks held by kworker/0:5/6494: #0: ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000a062b18e>] work_static include/linux/workqueue.h:198 [inline] #0: ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000a062b18e>] set_work_data kernel/workqueue.c:619 [inline] #0: ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000a062b18e>] set_work_pool_and_clear_pending kernel/workqueue.c:646 [inline] #0: ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000a062b18e>] process_one_work+0xb12/0x1bb0 kernel/workqueue.c:2084 #1: ((addr_chk_work).work){+.+.}, at: [<00000000278427d5>] process_one_work+0xb89/0x1bb0 kernel/workqueue.c:2088 #2: (rtnl_mutex){+.+.}, at: [<00000000066e35ac>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 1 lock held by syz-executor7/25421: #0: (ipvs->sync_mutex){+.+.}, at: [<00000000d414a689>] do_ip_vs_set_ctl+0x277/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2393 2 locks held by syz-executor7/25427: #0: (rtnl_mutex){+.+.}, at: [<00000000066e35ac>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 #1: (ipvs->sync_mutex){+.+.}, at: [<00000000e6d48489>] do_ip_vs_set_ctl+0x10f8/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2388 1 lock held by syz-executor7/25435: #0: (rtnl_mutex){+.+.}, at: [<00000000066e35ac>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 1 lock held by ipvs-b:2:0/25415: #0: (rtnl_mutex){+.+.}, at: [<00000000066e35ac>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 Reported-and-tested-by: syzbot+a46d6abf9d56b1365a72@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+5fe074c01b2032ce9618@syzkaller.appspotmail.com Fixes: e0b26cc ("ipvs: call rtnl_lock early") Signed-off-by: Julian Anastasov <ja@ssi.bg> Signed-off-by: Simon Horman <horms@verge.net.au> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Cc: Zubin Mithra <zsm@chromium.org> Cc: Guenter Roeck <groeck@chromium.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

commit 352672d upstream. Currently; we're grabbing all of the modesetting locks before adding MST connectors to fbdev. This isn't actually necessary, and causes a deadlock as well: ====================================================== WARNING: possible circular locking dependency detected 4.17.0-rc3Lyude-Test+ #1 Tainted: G O ------------------------------------------------------ kworker/1:0/18 is trying to acquire lock: 00000000c832f62d (&helper->lock){+.+.}, at: drm_fb_helper_add_one_connector+0x2a/0x60 [drm_kms_helper] but task is already holding lock: 00000000942e28e2 (crtc_ww_class_mutex){+.+.}, at: drm_modeset_backoff+0x8e/0x1c0 [drm] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (crtc_ww_class_mutex){+.+.}: ww_mutex_lock+0x43/0x80 drm_modeset_lock+0x71/0x130 [drm] drm_helper_probe_single_connector_modes+0x7d/0x6b0 [drm_kms_helper] drm_setup_crtcs+0x15e/0xc90 [drm_kms_helper] __drm_fb_helper_initial_config_and_unlock+0x29/0x480 [drm_kms_helper] nouveau_fbcon_init+0x138/0x1a0 [nouveau] nouveau_drm_load+0x173/0x7e0 [nouveau] drm_dev_register+0x134/0x1c0 [drm] drm_get_pci_dev+0x8e/0x160 [drm] nouveau_drm_probe+0x1a9/0x230 [nouveau] pci_device_probe+0xcd/0x150 driver_probe_device+0x30b/0x480 __driver_attach+0xbc/0xe0 bus_for_each_dev+0x67/0x90 bus_add_driver+0x164/0x260 driver_register+0x57/0xc0 do_one_initcall+0x4d/0x323 do_init_module+0x5b/0x1f8 load_module+0x20e5/0x2ac0 __do_sys_finit_module+0xb7/0xd0 do_syscall_64+0x60/0x1b0 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #2 (crtc_ww_class_acquire){+.+.}: drm_helper_probe_single_connector_modes+0x58/0x6b0 [drm_kms_helper] drm_setup_crtcs+0x15e/0xc90 [drm_kms_helper] __drm_fb_helper_initial_config_and_unlock+0x29/0x480 [drm_kms_helper] nouveau_fbcon_init+0x138/0x1a0 [nouveau] nouveau_drm_load+0x173/0x7e0 [nouveau] drm_dev_register+0x134/0x1c0 [drm] drm_get_pci_dev+0x8e/0x160 [drm] nouveau_drm_probe+0x1a9/0x230 [nouveau] pci_device_probe+0xcd/0x150 driver_probe_device+0x30b/0x480 __driver_attach+0xbc/0xe0 bus_for_each_dev+0x67/0x90 bus_add_driver+0x164/0x260 driver_register+0x57/0xc0 do_one_initcall+0x4d/0x323 do_init_module+0x5b/0x1f8 load_module+0x20e5/0x2ac0 __do_sys_finit_module+0xb7/0xd0 do_syscall_64+0x60/0x1b0 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #1 (&dev->mode_config.mutex){+.+.}: drm_setup_crtcs+0x10c/0xc90 [drm_kms_helper] __drm_fb_helper_initial_config_and_unlock+0x29/0x480 [drm_kms_helper] nouveau_fbcon_init+0x138/0x1a0 [nouveau] nouveau_drm_load+0x173/0x7e0 [nouveau] drm_dev_register+0x134/0x1c0 [drm] drm_get_pci_dev+0x8e/0x160 [drm] nouveau_drm_probe+0x1a9/0x230 [nouveau] pci_device_probe+0xcd/0x150 driver_probe_device+0x30b/0x480 __driver_attach+0xbc/0xe0 bus_for_each_dev+0x67/0x90 bus_add_driver+0x164/0x260 driver_register+0x57/0xc0 do_one_initcall+0x4d/0x323 do_init_module+0x5b/0x1f8 load_module+0x20e5/0x2ac0 __do_sys_finit_module+0xb7/0xd0 do_syscall_64+0x60/0x1b0 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #0 (&helper->lock){+.+.}: __mutex_lock+0x70/0x9d0 drm_fb_helper_add_one_connector+0x2a/0x60 [drm_kms_helper] nv50_mstm_register_connector+0x2c/0x50 [nouveau] drm_dp_add_port+0x2f5/0x420 [drm_kms_helper] drm_dp_send_link_address+0x155/0x1e0 [drm_kms_helper] drm_dp_add_port+0x33f/0x420 [drm_kms_helper] drm_dp_send_link_address+0x155/0x1e0 [drm_kms_helper] drm_dp_check_and_send_link_address+0x87/0xd0 [drm_kms_helper] drm_dp_mst_link_probe_work+0x4d/0x80 [drm_kms_helper] process_one_work+0x20d/0x650 worker_thread+0x3a/0x390 kthread+0x11e/0x140 ret_from_fork+0x3a/0x50 other info that might help us debug this: Chain exists of: &helper->lock --> crtc_ww_class_acquire --> crtc_ww_class_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(crtc_ww_class_mutex); lock(crtc_ww_class_acquire); lock(crtc_ww_class_mutex); lock(&helper->lock); *** DEADLOCK *** 5 locks held by kworker/1:0/18: #0: 000000004a05cd50 ((wq_completion)"events_long"){+.+.}, at: process_one_work+0x187/0x650 #1: 00000000601c11d1 ((work_completion)(&mgr->work)){+.+.}, at: process_one_work+0x187/0x650 #2: 00000000586ca0df (&dev->mode_config.mutex){+.+.}, at: drm_modeset_lock_all+0x3a/0x1b0 [drm] #3: 00000000d3ca0ffa (crtc_ww_class_acquire){+.+.}, at: drm_modeset_lock_all+0x44/0x1b0 [drm] #4: 00000000942e28e2 (crtc_ww_class_mutex){+.+.}, at: drm_modeset_backoff+0x8e/0x1c0 [drm] stack backtrace: CPU: 1 PID: 18 Comm: kworker/1:0 Tainted: G O 4.17.0-rc3Lyude-Test+ #1 Hardware name: Gateway FX6840/FX6840, BIOS P01-A3 05/17/2010 Workqueue: events_long drm_dp_mst_link_probe_work [drm_kms_helper] Call Trace: dump_stack+0x85/0xcb print_circular_bug.isra.38+0x1ce/0x1db __lock_acquire+0x128f/0x1350 ? lock_acquire+0x9f/0x200 ? lock_acquire+0x9f/0x200 ? __ww_mutex_lock.constprop.13+0x8f/0x1000 lock_acquire+0x9f/0x200 ? drm_fb_helper_add_one_connector+0x2a/0x60 [drm_kms_helper] ? drm_fb_helper_add_one_connector+0x2a/0x60 [drm_kms_helper] __mutex_lock+0x70/0x9d0 ? drm_fb_helper_add_one_connector+0x2a/0x60 [drm_kms_helper] ? ww_mutex_lock+0x43/0x80 ? _cond_resched+0x15/0x30 ? ww_mutex_lock+0x43/0x80 ? drm_modeset_lock+0xb2/0x130 [drm] ? drm_fb_helper_add_one_connector+0x2a/0x60 [drm_kms_helper] drm_fb_helper_add_one_connector+0x2a/0x60 [drm_kms_helper] nv50_mstm_register_connector+0x2c/0x50 [nouveau] drm_dp_add_port+0x2f5/0x420 [drm_kms_helper] ? mark_held_locks+0x50/0x80 ? kfree+0xcf/0x2a0 ? drm_dp_check_mstb_guid+0xd6/0x120 [drm_kms_helper] ? trace_hardirqs_on_caller+0xed/0x180 ? drm_dp_check_mstb_guid+0xd6/0x120 [drm_kms_helper] drm_dp_send_link_address+0x155/0x1e0 [drm_kms_helper] drm_dp_add_port+0x33f/0x420 [drm_kms_helper] ? nouveau_connector_aux_xfer+0x7c/0xb0 [nouveau] ? find_held_lock+0x2d/0x90 ? drm_dp_dpcd_access+0xd9/0xf0 [drm_kms_helper] ? __mutex_unlock_slowpath+0x3b/0x280 ? drm_dp_dpcd_access+0xd9/0xf0 [drm_kms_helper] drm_dp_send_link_address+0x155/0x1e0 [drm_kms_helper] drm_dp_check_and_send_link_address+0x87/0xd0 [drm_kms_helper] drm_dp_mst_link_probe_work+0x4d/0x80 [drm_kms_helper] process_one_work+0x20d/0x650 worker_thread+0x3a/0x390 ? process_one_work+0x650/0x650 kthread+0x11e/0x140 ? kthread_create_worker_on_cpu+0x50/0x50 ret_from_fork+0x3a/0x50 Taking example from i915, the only time we need to hold any modesetting locks is when changing the port on the mstc, and in that case we only need to hold the connection mutex. Signed-off-by: Lyude Paul <lyude@redhat.com> Cc: Karol Herbst <kherbst@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Lyude Paul <lyude@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

[ Upstream commit a8d7aa1 ] syzbot reported a crash in tasklet_action_common() caused by dccp. dccp needs to make sure socket wont disappear before tasklet handler has completed. This patch takes a reference on the socket when arming the tasklet, and moves the sock_put() from dccp_write_xmit_timer() to dccp_write_xmitlet() kernel BUG at kernel/softirq.c:514! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 17 Comm: ksoftirqd/1 Not tainted 4.17.0-rc3+ #30 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:tasklet_action_common.isra.19+0x6db/0x700 kernel/softirq.c:515 RSP: 0018:ffff8801d9b3faf8 EFLAGS: 00010246 dccp_close: ABORT with 65423 bytes unread RAX: 1ffff1003b367f6b RBX: ffff8801daf1f3f0 RCX: 0000000000000000 RDX: ffff8801cf895498 RSI: 0000000000000004 RDI: 0000000000000000 RBP: ffff8801d9b3fc40 R08: ffffed0039f12a95 R09: ffffed0039f12a94 dccp_close: ABORT with 65423 bytes unread R10: ffffed0039f12a94 R11: ffff8801cf8954a3 R12: 0000000000000000 R13: ffff8801d9b3fc18 R14: dffffc0000000000 R15: ffff8801cf895490 FS: 0000000000000000(0000) GS:ffff8801daf00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2bc28000 CR3: 00000001a08a9000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tasklet_action+0x1d/0x20 kernel/softirq.c:533 __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285 dccp_close: ABORT with 65423 bytes unread run_ksoftirqd+0x86/0x100 kernel/softirq.c:646 smpboot_thread_fn+0x417/0x870 kernel/smpboot.c:164 kthread+0x345/0x410 kernel/kthread.c:238 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412 Code: 48 8b 85 e8 fe ff ff 48 8b 95 f0 fe ff ff e9 94 fb ff ff 48 89 95 f0 fe ff ff e8 81 53 6e 00 48 8b 95 f0 fe ff ff e9 62 fb ff ff <0f> 0b 48 89 cf 48 89 8d e8 fe ff ff e8 64 53 6e 00 48 8b 8d e8 RIP: tasklet_action_common.isra.19+0x6db/0x700 kernel/softirq.c:515 RSP: ffff8801d9b3faf8 Fixes: dc841e3 ("dccp: Extend CCID packet dequeueing interface") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk> Cc: dccp@vger.kernel.org Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Also revert "rds: ib: Correct the cm_id compare commit" This reverts commit 7999a6a. This reverts commit 7b0b7b2. Replacing a stale pointer by a cyclically allocated identifier that's hypothetically less likely to be stale doesn't really solve the root cause of the staleness. These pointer compares have a long history going back to: commit fe00cae ("RDS: give up on half formed connections after 15s") when a "ic->i_cm_id == cm_id" compare was introduced inside "rds_ib_cm_handle_connect" in order to test if the upper-layer "ic->i_cm_id" is already pointing to the underlying "cm_id". However, upon arrival of a "RDMA_CM_EVENT_CONNECT_REQUEST" event, the "cm_id" is expected to have been freshly allocated, ("cm_req_handler" unconditionally calls "ib_create_cm_id", which allocates with "kzalloc") and therefore cm_id->context is expected to be "NULL". What's more likely in this case is that due to the missing invalidation of "ic->i_cm_id" whenever "rds_rdma_cm_event_handler_cmn" returns a non-zero value, that "ic->i_cm_id" was still pointing to a "cm_id" that consequently gets destroyed ("rdma_destroy_id") due to the non-zero return value of the "event_handler". So a subsequent "kzalloc" may return the same pointer, but not because it's the same object, but a different object that happened to land on the same address as the never invalidated "ic->i_cm_id" pointer. Back to the commits we're reverting here. There were a number of problems with this commit: 1) "drivers/infiniband/core/cma.c" is completely unaware of of the "rds_ib_rdma_destroy_id" wrapper and will continue to call "rdma_destroy_id" instead. So the point of keeping track of and being able to invalidate these identifiers is missed whenever it's "cma.c" that destroys the "cm_id" due to a non-zero return value of "event_handler". Not only that, but this will inevitably lead to identifier / memory-leaks. 2) The identifiers started with (start == 0) in "idr_alloc_cyclic", and then cast to "(struct rds_connection *)(unsigned long)". If we ignore the confusing nature of this pointer cast for a moment, we now no longer can distinguish between a "NULL" pointer that was a result of having cast an IDR value of "== 0" and a real NULL pointer after the "kzalloc". A freshly allocated "cm_id" would stand a chance to be mistaken for "IDR value == 0". 3) There is no error check on the return value of "idr_alloc_cyclic". If identifiers would run out (e.g. due to the leak desribed in #1), "cm_id->context" would just end up with a value of PTR_ERR(-ENOSPC) or whatever error code "idr_alloc_cyclic" returned. That would lead to a number of hard to debug problems. 4) "RDS_IB_NO_CTX" was defined as "ERR_PTR(ENOENT)" The convention is to use a negative errno in the kernel, i.e. "ERR_PTR(-ENOENT)". "ERR_PTR" is defined as an inline function that just casts its parameter to a pointer. By using a positive value of (ENOENT == 2) this also causes a collision with IDR value "== 2", as both of them end up with the same value in "cm_id->context". So instead of making stale pointers or stale identifiers less likely, we fix the root-cause(s) of these problems in subsequent commits. Please note that these reverts also removes code that had been introduced on behalf of this functionality (ow reverted) between the original commit and this revert in commit 2b13b0b ("rds: add tracepoint for RDS IB errors, info") * -DEFINE_EVENT(rds_ib, rds_ib_cm_mismatch, ...); * - reason = "rds_ib_rdma_create_id failed"; + reason = "rdma_create_id failed"; * -bool rds_ib_same_cm_id(struct rds_ib_connection *ic, struct rdma_cm_id *cm_id) * -EXPORT_TRACEPOINT_SYMBOL_GPL(rds_ib_cm_mismatch); Orabug: 32373816 Fixes: 7b0b7b2 ("rds: ib: Correct the cm_id compare commit") Fixes: 7999a6a ("rds: ib: Implement proper cm_id compare") Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com> Reviewed-by: Ka-cheong Poon <ka-cheong.poon@oracle.com> (cherry picked from commit d91b564) Port to U3 Signed-off-by: Jack Vogel <jack.vogel@oracle.com> Orabug: 33590097 UEK6 => UEK7 (cherry picked from commit 95037a6) cherry-pick-repo=UEK/production/linux-uek.git Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com> Reviewed-by: William Kucharski <william.kucharski@oracle.com> Orabug: 33590087 UEK7 => LUCI (cherry picked from commit e359ce1) cherry-pick-repo=UEK/production/linux-uek.git Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com> Reviewed-by: William Kucharski <william.kucharski@oracle.com>

Add a check to mlx5e_xmit() for shorter frames. A corrupted/malformed packet, with shorter length can eventually cause system panic further down in the code path. Avoid it by validating the length and dropping it at the earliest. Following is seen in our env with shorter skb->len crash> bt PID: 76981 TASK: ff19828cfe508000 CPU: 106 COMMAND: "vhost-76942" #0 [ff2d20159b39f2c8] machine_kexec at ffffffffad884801 #1 [ff2d20159b39f328] __crash_kexec at ffffffffad976142 #2 [ff2d20159b39f3f8] panic at ffffffffad8b3640 #3 [ff2d20159b39f4a0] no_context at ffffffffad8954e1 #4 [ff2d20159b39f518] __bad_area_nosemaphore at ffffffffad8958de #5 [ff2d20159b39f578] bad_area_nosemaphore at ffffffffad895a96 #6 [ff2d20159b39f588] do_kern_addr_fault at ffffffffad89688e #7 [ff2d20159b39f5b0] __do_page_fault at ffffffffad896b30 #8 [ff2d20159b39f618] do_page_fault at ffffffffad896db6 #9 [ff2d20159b39f650] page_fault at ffffffffae402acd [exception RIP: memcpy_erms+6] RIP: ffffffffae261ab6 RSP: ff2d20159b39f700 RFLAGS: 00010293 RAX: ff198291741ecf2e RBX: ff19828e70d6a100 RCX: fffffffffea1af2b RDX: fffffffffffffffd RSI: ff19828eba6d7e5e RDI: ff198291757d2000 RBP: ff2d20159b39f760 R8: ff198291741ecf00 R9: 000000000000037c R10: 000000000000003c R11: ff19828ffe953940 R12: ff198291741ecf20 R13: ff198267dcb1b600 R14: ff19828eeebb09c0 R15: ff198291741ecf00 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #10 [ff2d20159b39f700] mlx5e_sq_xmit_wqe at ffffffffc05c162e [mlx5_core] #11 [ff2d20159b39f768] mlx5e_xmit at ffffffffc05c1ca3 [mlx5_core] #12 [ff2d20159b39f800] dev_hard_start_xmit at ffffffffae083766 #13 [ff2d20159b39f860] sch_direct_xmit at ffffffffae0e2564 #14 [ff2d20159b39f8b0] __qdisc_run at ffffffffae0e294e #15 [ff2d20159b39f928] __dev_queue_xmit at ffffffffae083eee #16 [ff2d20159b39f9a8] dev_queue_xmit at ffffffffae084370 #17 [ff2d20159b39f9b8] vlan_dev_hard_start_xmit at ffffffffc2fb6fec [8021q] #18 [ff2d20159b39f9d8] dev_hard_start_xmit at ffffffffae083766 #19 [ff2d20159b39fa38] __dev_queue_xmit at ffffffffae08416a #20 [ff2d20159b39fab8] dev_queue_xmit_accel at ffffffffae08438e #21 [ff2d20159b39fac8] macvlan_start_xmit at ffffffffc2fc18d9 [macvlan] #22 [ff2d20159b39faf0] dev_hard_start_xmit at ffffffffae083766 #23 [ff2d20159b39fb50] sch_direct_xmit at ffffffffae0e2564 #24 [ff2d20159b39fba0] __qdisc_run at ffffffffae0e294e #25 [ff2d20159b39fc18] __dev_queue_xmit at ffffffffae083c81 #26 [ff2d20159b39fc90] dev_queue_xmit at ffffffffae084370 #27 [ff2d20159b39fca0] tap_sendmsg at ffffffffc07206ed [tap] #28 [ff2d20159b39fd20] vhost_tx_batch at ffffffffc2fd6590 [vhost_net] #29 [ff2d20159b39fd68] handle_tx_copy at ffffffffc2fd70f3 [vhost_net] #30 [ff2d20159b39fe80] handle_tx at ffffffffc2fd7651 [vhost_net] #31 [ff2d20159b39feb0] handle_tx_kick at ffffffffc2fd76b5 [vhost_net] #32 [ff2d20159b39fec0] vhost_worker at ffffffffc12a5be8 [vhost] #33 [ff2d20159b39ff08] kthread at ffffffffad8dbfe5 #34 [ff2d20159b39ff50] ret_from_fork at ffffffffae400364 This change was discussed with Nvidia and they are in agreement. Orabug: 36879156 CVE: CVE-2024-41090 CVE: CVE-2024-41091 Fixes: e4cf27b ("net/mlx5e: Re-eanble client vlan TX acceleration") Reported-and-tested-by: Dongli Zhang <dongli.zhang@oracle.com> Signed-off-by: Manjunath Patil <manjunath.b.patil@oracle.com> Reviewed-by: Si-Wei Liu <si-wei.liu@oracle.com> Reviewed-by: Jack Vogel <jack.vogel@oracle.com> Signed-off-by: Brian Maly <brian.maly@oracle.com> (cherry picked from commit 0dd4b99) Orabug: 36879126 CVE: CVE-2024-41090 CVE: CVE-2024-41091 Signed-off-by: Harshvardhan Jha <harshvardhan.j.jha@oracle.com> Reviewed-by: Vijayendra Suman <vijayendra.suman@oracle.com>

Commit 65e542a82b5b ("rds/ib: handle rds uncongested notifications in worker") introduced an additional initialization call for the congestion monitor. This call was added at the end of the initialization sequence. This order implies that RDS could be up and kicking before the last initialization call, and a NULL pointer dereference is possible, if a user-space application starts to use RDS in close proximity in time with module loading. We then see the following stack trace: BUG: kernel NULL pointer dereference, address: 0000000000000010 PGD 8000000129853067 P4D 8000000129853067 PUD 129854067 PMD 0 Oops: 0002 [#1] SMP PTI CPU: 2 PID: 4396 Comm: 610dab0edd8b4ee Not tainted 5.4.17-2136.301.1.el7uek.x86_64 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 RIP: 0010:_raw_write_lock_irqsave+0x22/0x3a RSP: 0018:ffff9a0040bebdd0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000286 RCX: 00000000000000ff RDX: 0000000000000001 RSI: 0000000000000282 RDI: 0000000000000010 RBP: ffff9a0040bebdd8 R08: ffff8a2ae9a3e7e0 R09: 0000000000000000 R10: 0000000000000008 R11: ffff8a2ae9c37b00 R12: ffff8a2ae9a3e6e0 R13: ffff8a2ae9c37a80 R14: ffff8a2ac7d5fe20 R15: ffff8a2af678e8f0 FS: 000000000236c980(0000) GS:ffff8a2afbd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 00000001299ac000 CR4: 00000000000006e0 Call Trace: rds_cong_remove_socket+0x28/0xf0 [rds] rds_release+0x61/0x130 [rds] __sock_release+0x42/0xb7 sock_close+0x15/0x19 __fput+0xc6/0x257 ____fput+0xe/0x10 task_work_run+0x71/0xa2 exit_to_usermode_loop+0xc8/0x122 do_syscall_64+0x19a/0x1d9 entry_SYSCALL_64_after_hwframe+0x170/0x0 Fixed by changing the initialization order. Orabug: 33923370 Fixes: 65e542a82b5b ("rds/ib: handle rds uncongested notifications in worker") Reported-by: syzkaller Reported-by: george kennedy <george.kennedy@oracle.com> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com> Tested-by: george kennedy <george.kennedy@oracle.com> Reviewed-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>

Also revert "rds: ib: Correct the cm_id compare commit" This reverts commit 7999a6a. This reverts commit 7b0b7b2. Replacing a stale pointer by a cyclically allocated identifier that's hypothetically less likely to be stale doesn't really solve the root cause of the staleness. These pointer compares have a long history going back to: commit fe00cae ("RDS: give up on half formed connections after 15s") when a "ic->i_cm_id == cm_id" compare was introduced inside "rds_ib_cm_handle_connect" in order to test if the upper-layer "ic->i_cm_id" is already pointing to the underlying "cm_id". However, upon arrival of a "RDMA_CM_EVENT_CONNECT_REQUEST" event, the "cm_id" is expected to have been freshly allocated, ("cm_req_handler" unconditionally calls "ib_create_cm_id", which allocates with "kzalloc") and therefore cm_id->context is expected to be "NULL". What's more likely in this case is that due to the missing invalidation of "ic->i_cm_id" whenever "rds_rdma_cm_event_handler_cmn" returns a non-zero value, that "ic->i_cm_id" was still pointing to a "cm_id" that consequently gets destroyed ("rdma_destroy_id") due to the non-zero return value of the "event_handler". So a subsequent "kzalloc" may return the same pointer, but not because it's the same object, but a different object that happened to land on the same address as the never invalidated "ic->i_cm_id" pointer. Back to the commits we're reverting here. There were a number of problems with this commit: 1) "drivers/infiniband/core/cma.c" is completely unaware of of the "rds_ib_rdma_destroy_id" wrapper and will continue to call "rdma_destroy_id" instead. So the point of keeping track of and being able to invalidate these identifiers is missed whenever it's "cma.c" that destroys the "cm_id" due to a non-zero return value of "event_handler". Not only that, but this will inevitably lead to identifier / memory-leaks. 2) The identifiers started with (start == 0) in "idr_alloc_cyclic", and then cast to "(struct rds_connection *)(unsigned long)". If we ignore the confusing nature of this pointer cast for a moment, we now no longer can distinguish between a "NULL" pointer that was a result of having cast an IDR value of "== 0" and a real NULL pointer after the "kzalloc". A freshly allocated "cm_id" would stand a chance to be mistaken for "IDR value == 0". 3) There is no error check on the return value of "idr_alloc_cyclic". If identifiers would run out (e.g. due to the leak desribed in #1), "cm_id->context" would just end up with a value of PTR_ERR(-ENOSPC) or whatever error code "idr_alloc_cyclic" returned. That would lead to a number of hard to debug problems. 4) "RDS_IB_NO_CTX" was defined as "ERR_PTR(ENOENT)" The convention is to use a negative errno in the kernel, i.e. "ERR_PTR(-ENOENT)". "ERR_PTR" is defined as an inline function that just casts its parameter to a pointer. By using a positive value of (ENOENT == 2) this also causes a collision with IDR value "== 2", as both of them end up with the same value in "cm_id->context". So instead of making stale pointers or stale identifiers less likely, we fix the root-cause(s) of these problems in subsequent commits. Please note that these reverts also removes code that had been introduced on behalf of this functionality (ow reverted) between the original commit and this revert in commit 2b13b0b ("rds: add tracepoint for RDS IB errors, info") * -DEFINE_EVENT(rds_ib, rds_ib_cm_mismatch, ...); * - reason = "rds_ib_rdma_create_id failed"; + reason = "rdma_create_id failed"; * -bool rds_ib_same_cm_id(struct rds_ib_connection *ic, struct rdma_cm_id *cm_id) * -EXPORT_TRACEPOINT_SYMBOL_GPL(rds_ib_cm_mismatch); Orabug: 32373816 Fixes: 7b0b7b2 ("rds: ib: Correct the cm_id compare commit") Fixes: 7999a6a ("rds: ib: Implement proper cm_id compare") Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com> Reviewed-by: Ka-cheong Poon <ka-cheong.poon@oracle.com> (cherry picked from commit d91b564) Port to U3 Signed-off-by: Jack Vogel <jack.vogel@oracle.com> Orabug: 33590097 UEK6 => UEK7 (cherry picked from commit 95037a6) cherry-pick-repo=UEK/production/linux-uek.git Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com> Reviewed-by: William Kucharski <william.kucharski@oracle.com> Orabug: 33590087 UEK7 => LUCI (cherry picked from commit e359ce1) cherry-pick-repo=UEK/production/linux-uek.git Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com> Reviewed-by: William Kucharski <william.kucharski@oracle.com>

The original struct rds_ib_work_ring was implemented using double set of counters / pointers for the consumer and producer of the ring. It is complicated and error prone, due to the compilers ability to re-order execution of the statements and moderns CPUs ability to re-order execution. Hence, use only a single set of free-running counters and use only atomic ops, which do not rely on explicit memory barriers. Without this commit, we will encounter the following BUG_ON() running uek7/ga (v5.15.0-0.30.9) on ARM: kernel BUG at net/rds/ib_send.c:886! Internal error: Oops - BUG: 0 [#1] SMP [] rds_ib_xmit+0xa1c/0xb70 [rds_rdma] rds_send_xmit+0x4a0/0xd50 [rds] rds_sendmsg+0xe7c/0xfa0 [rds] sock_sendmsg+0x74/0x80 ____sys_sendmsg+0x270/0x29c ___sys_sendmsg+0xb0/0x118 __sys_sendmsg+0x90/0x104 __arm64_sys_sendmsg+0x30/0x50 invoke_syscall+0x50/0x15c The bug may also manifest itself as: kernel: RDS/IB: ib_post_send to ::ffff:192.168.0.4 returned -22 or as: kernel: RDS/IB: rds_ib_send_unmap_op: unexpected opcode 0xdead in WR! Orabug: 34137460 Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Fixes: f528efe ("RDS/IB: Ring-handling code.") Tested-by: Gerald Gibson <gerald.gibson@oracle.com> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>

Commit 65e542a82b5b ("rds/ib: handle rds uncongested notifications in worker") introduced an additional initialization call for the congestion monitor. This call was added at the end of the initialization sequence. This order implies that RDS could be up and kicking before the last initialization call, and a NULL pointer dereference is possible, if a user-space application starts to use RDS in close proximity in time with module loading. We then see the following stack trace: BUG: kernel NULL pointer dereference, address: 0000000000000010 PGD 8000000129853067 P4D 8000000129853067 PUD 129854067 PMD 0 Oops: 0002 [#1] SMP PTI CPU: 2 PID: 4396 Comm: 610dab0edd8b4ee Not tainted 5.4.17-2136.301.1.el7uek.x86_64 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 RIP: 0010:_raw_write_lock_irqsave+0x22/0x3a RSP: 0018:ffff9a0040bebdd0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000286 RCX: 00000000000000ff RDX: 0000000000000001 RSI: 0000000000000282 RDI: 0000000000000010 RBP: ffff9a0040bebdd8 R08: ffff8a2ae9a3e7e0 R09: 0000000000000000 R10: 0000000000000008 R11: ffff8a2ae9c37b00 R12: ffff8a2ae9a3e6e0 R13: ffff8a2ae9c37a80 R14: ffff8a2ac7d5fe20 R15: ffff8a2af678e8f0 FS: 000000000236c980(0000) GS:ffff8a2afbd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 00000001299ac000 CR4: 00000000000006e0 Call Trace: rds_cong_remove_socket+0x28/0xf0 [rds] rds_release+0x61/0x130 [rds] __sock_release+0x42/0xb7 sock_close+0x15/0x19 __fput+0xc6/0x257 ____fput+0xe/0x10 task_work_run+0x71/0xa2 exit_to_usermode_loop+0xc8/0x122 do_syscall_64+0x19a/0x1d9 entry_SYSCALL_64_after_hwframe+0x170/0x0 Fixed by changing the initialization order. Orabug: 33923370 Fixes: 65e542a82b5b ("rds/ib: handle rds uncongested notifications in worker") Reported-by: syzkaller Reported-by: george kennedy <george.kennedy@oracle.com> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com> Tested-by: george kennedy <george.kennedy@oracle.com> Reviewed-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>

Multiple RDS/TCP backend connections need to be brought up deterministically (i.e. first #0, then #1, ... then #7), in order to not shuffle per-path connection state that's preserved across reconnects (e.g. "cp_next_rx_seq") Orabug: 32422417 Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com> Reviewed-by: Sharath Srinivasan <sharath.srinivasan@oracle.com>

The original struct rds_ib_work_ring was implemented using double set of counters / pointers for the consumer and producer of the ring. It is complicated and error prone, due to the compilers ability to re-order execution of the statements and moderns CPUs ability to re-order execution. Hence, use only a single set of free-running counters and use only atomic ops, which do not rely on explicit memory barriers. Without this commit, we will encounter the following BUG_ON() running uek7/ga (v5.15.0-0.30.9) on ARM: kernel BUG at net/rds/ib_send.c:886! Internal error: Oops - BUG: 0 [#1] SMP [] rds_ib_xmit+0xa1c/0xb70 [rds_rdma] rds_send_xmit+0x4a0/0xd50 [rds] rds_sendmsg+0xe7c/0xfa0 [rds] sock_sendmsg+0x74/0x80 ____sys_sendmsg+0x270/0x29c ___sys_sendmsg+0xb0/0x118 __sys_sendmsg+0x90/0x104 __arm64_sys_sendmsg+0x30/0x50 invoke_syscall+0x50/0x15c The bug may also manifest itself as: kernel: RDS/IB: ib_post_send to ::ffff:192.168.0.4 returned -22 or as: kernel: RDS/IB: rds_ib_send_unmap_op: unexpected opcode 0xdead in WR! Orabug: 34137460 Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Fixes: f528efe ("RDS/IB: Ring-handling code.") Tested-by: Gerald Gibson <gerald.gibson@oracle.com> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>

The original syzkaller issue, https://syzkaller.appspot.com/bug?id=610dab0edd8b4ee86ec402400ad72c87ec0a69fa has no CVE and is not considered a security issue. On upstream kernels, the only bad effect of running the syzkaller program(s) was WARNINGS printed to the console from the kernel. This issue has been fixed upstream down to stable 4.19. The reason the effect of the syzkaller program is not more severe in upstream, is that upstream has the commit d139ff0 ("RDS: Let rds_message_alloc_sgs() return NULL"), which adds a return NULL when incorrect data has been read from user-space. UEK does _not_ have this commit. Nevertheless, when Oracle ran said syzkaller repro on UEK, a KASAN enabled kernels was used, which had crash_on_warn set. Hence, the observed behavior of a UEK kernel was the same as observed on an upstream kernel before the issue was fixed upstream. The pervasive issue on UEK kernels is that if you run said syzkaller program on a standard, non-KASAN-enabled kernel, you first get the WARNING messages, but execution continues and memory gets corrupted, and the kernel crashes in various ways: WARNING: CPU: 41 PID: 16856 at net/rds/message.c:287 rds_message_alloc_sgs+0x54/0x60 [rds] RIP: 0010:rds_message_alloc_sgs+0x54/0x60 [rds] Call Trace: rds_rdma_process_send_cmsg+0x592/0x8e0 [rds] rds_sendmsg+0xa87/0x1050 [rds] sock_sendmsg+0x67/0x72 ____sys_sendmsg+0x223/0x252 ___sys_sendmsg+0x7c/0xb5 __sys_sendmsg+0x5d/0xa3 __x64_sys_sendmsg+0x1f/0x21 do_syscall_64+0x60/0x1d9 entry_SYSCALL_64_after_hwframe+0x170/0x0 general protection fault: 0000 [#1] SMP PTI RIP: 0010:__kmalloc+0x139/0x487 Call Trace: rds_message_alloc+0x30/0xa0 [rds] rds_sendmsg+0x77d/0x1050 [rds] __x64_sys_sendmsg+0x1f/0x21 do_syscall_64+0x60/0x1d9 entry_SYSCALL_64_after_hwframe+0x170/0x0 We have made a modified syzkaller program, that is able to crash a UEK kernel when running as an ordinary, non-root user. In UEK-6-U3 and newer, RDS has commit 2072b67 ("net/rds: Refactor sendmsg ancillary data processing"), and to crash these kernels, rds and rds_rdma must be modprobed and the test program needs to bind to a valid local address associated with an RDMA device, still running as an ordinary, non-root user. Fix this by doing the copy_from_user only once per rds_sendmsg system call. Orabug: 34522570 CVE: CVE-2022-21385 Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com> Reviewed-by: Vegard Nossum <vegard.nossum@oracle.com> Tested-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com> Tested-by: Gerald Gibson <gerald.gibson@oracle.com>

Multiple RDS/TCP backend connections need to be brought up deterministically (i.e. first #0, then #1, ... then #7), in order to not shuffle per-path connection state that's preserved across reconnects (e.g. "cp_next_rx_seq") Orabug: 32422417 Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com> Reviewed-by: Sharath Srinivasan <sharath.srinivasan@oracle.com>

rds_ib_free_caches() calls cancel_delayed_work() in an attempt to clean up the rds_ib_cache_gc_worker(). But, rds_ib_cache_gc_worker() may still be running after cancel_delayed_work() is called. And since rds_ib_cache_gc_worker() re-queues itself, a disaster may happen: BUG: unable to handle page fault for address: 00007ffeaa9bf6cc [snip] Call Trace: <IRQ> run_timer_softirq+0x19/0x2d __do_softirq+0xd0/0x2a5 ? sched_clock_cpu+0x9/0xb6 __irq_exit_rcu+0xc7/0xf1 sysvec_apic_timer_interrupt+0x72/0x89 </IRQ> <TASK> asm_sysvec_apic_timer_interrupt+0x16/0x1b RIP: 0010:cpuidle_enter_state+0xc7/0x35d [snip] cpuidle_enter+0x29/0x40 cpuidle_idle_call+0x143/0x1de do_idle+0x81/0xd2 cpu_startup_entry+0x19/0x1b secondary_startup_64_no_verify+0xc2/0x0 </TASK> or general protection fault, probably for non-canonical address 0xffff11de4ab26c00: 0000 [#1] SMP PTI [snip] RIP: 0010:__queue_work+0xde/0x40a [snip] Call Trace: <IRQ> ? queue_work_node+0x110/0x105 call_timer_fn+0x27/0xff __run_timers+0x1bd/0x299 run_timer_softirq+0x19/0x2d __do_softirq+0xd0/0x2a5 ? sched_clock_cpu+0x9/0xb6 __irq_exit_rcu+0xc7/0xf1 sysvec_apic_timer_interrupt+0x72/0x89 </IRQ> <TASK> asm_sysvec_apic_timer_interrupt+0x16/0x1b RIP: 0010:cpuidle_enter_state+0xc7/0x35d [snip] ? cpuidle_enter_state+0xb7/0x35d cpuidle_enter+0x29/0x40 cpuidle_idle_call+0x143/0x1de do_idle+0x81/0xd2 cpu_startup_entry+0x19/0x1b secondary_startup_64_no_verify+0xc2/0x0 </TASK> or BUG: kernel NULL pointer dereference, address: 0000000000000000 [snip] RIP: 0010:_raw_spin_lock+0xc/0x51 [snip] Call Trace: <IRQ> __queue_work+0x13f/0x40a call_timer_fn+0x24/0xff __run_timers+0x1bd/0x299 run_timer_softirq+0x19/0x2d __do_softirq+0xcd/0x2a5 __irq_exit_rcu+0xc7/0xf1 sysvec_apic_timer_interrupt+0x72/0x89 </IRQ> <TASK> asm_sysvec_apic_timer_interrupt+0x16/0x1b RIP: 0010:cpuidle_enter_state+0xc7/0x35d [snip] cpuidle_enter+0x29/0x40 cpuidle_idle_call+0x143/0x1de do_idle+0x81/0xd2 cpu_startup_entry+0x19/0x1b secondary_startup_64_no_verify+0xc2/0x0 </TASK> Fixed by calling cancel_delayed_work_sync() instead. Orabug: 34806050 Fixes: 8450b32 ("RDS-IB: Add garbage-collection to cache") Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com> Reviewed-by: Sharath Srinivasan <sharath.srinivasan@oracle.com>

The original syzkaller issue, https://syzkaller.appspot.com/bug?id=610dab0edd8b4ee86ec402400ad72c87ec0a69fa has no CVE and is not considered a security issue. On upstream kernels, the only bad effect of running the syzkaller program(s) was WARNINGS printed to the console from the kernel. This issue has been fixed upstream down to stable 4.19. The reason the effect of the syzkaller program is not more severe in upstream, is that upstream has the commit d139ff0 ("RDS: Let rds_message_alloc_sgs() return NULL"), which adds a return NULL when incorrect data has been read from user-space. UEK does _not_ have this commit. Nevertheless, when Oracle ran said syzkaller repro on UEK, a KASAN enabled kernels was used, which had crash_on_warn set. Hence, the observed behavior of a UEK kernel was the same as observed on an upstream kernel before the issue was fixed upstream. The pervasive issue on UEK kernels is that if you run said syzkaller program on a standard, non-KASAN-enabled kernel, you first get the WARNING messages, but execution continues and memory gets corrupted, and the kernel crashes in various ways: WARNING: CPU: 41 PID: 16856 at net/rds/message.c:287 rds_message_alloc_sgs+0x54/0x60 [rds] RIP: 0010:rds_message_alloc_sgs+0x54/0x60 [rds] Call Trace: rds_rdma_process_send_cmsg+0x592/0x8e0 [rds] rds_sendmsg+0xa87/0x1050 [rds] sock_sendmsg+0x67/0x72 ____sys_sendmsg+0x223/0x252 ___sys_sendmsg+0x7c/0xb5 __sys_sendmsg+0x5d/0xa3 __x64_sys_sendmsg+0x1f/0x21 do_syscall_64+0x60/0x1d9 entry_SYSCALL_64_after_hwframe+0x170/0x0 general protection fault: 0000 [#1] SMP PTI RIP: 0010:__kmalloc+0x139/0x487 Call Trace: rds_message_alloc+0x30/0xa0 [rds] rds_sendmsg+0x77d/0x1050 [rds] __x64_sys_sendmsg+0x1f/0x21 do_syscall_64+0x60/0x1d9 entry_SYSCALL_64_after_hwframe+0x170/0x0 We have made a modified syzkaller program, that is able to crash a UEK kernel when running as an ordinary, non-root user. In UEK-6-U3 and newer, RDS has commit 2072b67 ("net/rds: Refactor sendmsg ancillary data processing"), and to crash these kernels, rds and rds_rdma must be modprobed and the test program needs to bind to a valid local address associated with an RDMA device, still running as an ordinary, non-root user. Fix this by doing the copy_from_user only once per rds_sendmsg system call. Orabug: 34522570 CVE: CVE-2022-21385 Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com> Reviewed-by: Vegard Nossum <vegard.nossum@oracle.com> Tested-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com> Tested-by: Gerald Gibson <gerald.gibson@oracle.com>

rds_ib_free_caches() calls cancel_delayed_work() in an attempt to clean up the rds_ib_cache_gc_worker(). But, rds_ib_cache_gc_worker() may still be running after cancel_delayed_work() is called. And since rds_ib_cache_gc_worker() re-queues itself, a disaster may happen: BUG: unable to handle page fault for address: 00007ffeaa9bf6cc [snip] Call Trace: <IRQ> run_timer_softirq+0x19/0x2d __do_softirq+0xd0/0x2a5 ? sched_clock_cpu+0x9/0xb6 __irq_exit_rcu+0xc7/0xf1 sysvec_apic_timer_interrupt+0x72/0x89 </IRQ> <TASK> asm_sysvec_apic_timer_interrupt+0x16/0x1b RIP: 0010:cpuidle_enter_state+0xc7/0x35d [snip] cpuidle_enter+0x29/0x40 cpuidle_idle_call+0x143/0x1de do_idle+0x81/0xd2 cpu_startup_entry+0x19/0x1b secondary_startup_64_no_verify+0xc2/0x0 </TASK> or general protection fault, probably for non-canonical address 0xffff11de4ab26c00: 0000 [#1] SMP PTI [snip] RIP: 0010:__queue_work+0xde/0x40a [snip] Call Trace: <IRQ> ? queue_work_node+0x110/0x105 call_timer_fn+0x27/0xff __run_timers+0x1bd/0x299 run_timer_softirq+0x19/0x2d __do_softirq+0xd0/0x2a5 ? sched_clock_cpu+0x9/0xb6 __irq_exit_rcu+0xc7/0xf1 sysvec_apic_timer_interrupt+0x72/0x89 </IRQ> <TASK> asm_sysvec_apic_timer_interrupt+0x16/0x1b RIP: 0010:cpuidle_enter_state+0xc7/0x35d [snip] ? cpuidle_enter_state+0xb7/0x35d cpuidle_enter+0x29/0x40 cpuidle_idle_call+0x143/0x1de do_idle+0x81/0xd2 cpu_startup_entry+0x19/0x1b secondary_startup_64_no_verify+0xc2/0x0 </TASK> or BUG: kernel NULL pointer dereference, address: 0000000000000000 [snip] RIP: 0010:_raw_spin_lock+0xc/0x51 [snip] Call Trace: <IRQ> __queue_work+0x13f/0x40a call_timer_fn+0x24/0xff __run_timers+0x1bd/0x299 run_timer_softirq+0x19/0x2d __do_softirq+0xcd/0x2a5 __irq_exit_rcu+0xc7/0xf1 sysvec_apic_timer_interrupt+0x72/0x89 </IRQ> <TASK> asm_sysvec_apic_timer_interrupt+0x16/0x1b RIP: 0010:cpuidle_enter_state+0xc7/0x35d [snip] cpuidle_enter+0x29/0x40 cpuidle_idle_call+0x143/0x1de do_idle+0x81/0xd2 cpu_startup_entry+0x19/0x1b secondary_startup_64_no_verify+0xc2/0x0 </TASK> Fixed by calling cancel_delayed_work_sync() instead. Orabug: 34806050 Fixes: 8450b32 ("RDS-IB: Add garbage-collection to cache") Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com> Reviewed-by: Sharath Srinivasan <sharath.srinivasan@oracle.com>

After updating lfstack-code to use try_cmpxchg128() as a replacement for cmpxchg_double(), we introduced errors caused by variables being read twice and possibly being changed in between the reads causing wrong values to be inserted in the list-structure. The following crash is hit when attempting a basic rds-stress [ 398.146678] kernel BUG at net/rds/recv.c:97! [ 398.197773] invalid opcode: 0000 [#1] PREEMPT SMP PTI [ 398.258218] CPU: 28 PID: 0 Comm: swapper/28 Kdump: loaded Not tainted 6.5.0-2135.20230914050031.el8uek.rc1.x86_64 #2 [ 398.384196] Hardware name: Oracle Corporation ORACLE SERVER X6-2/ASM,MOTHERBOARD,1U, BIOS 38320100 04/15/2020 [ 398.502889] RIP: 0010:rds_inc_put+0x3d/0x40 [rds] [ 398.559219] Code: 08 48 8d 47 08 48 39 c2 75 20 48 8b 47 18 48 8b 40 50 48 8b 80 e0 00 00 00 e9 ff c4 e4 f3 cc 31 c0 89 c2 89 c7 c3 cc cc cc cc <0f> 0b 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 [ 398.784014] RSP: 0018:ffffa8b68ccd4e70 EFLAGS: 00010206 [ 398.846541] RAX: ffff920363351120 RBX: ffff92034792a000 RCX: 0000000000000000 [ 398.931948] RDX: ffff9222e14df270 RSI: 0000000000000000 RDI: ffff920363351118 [ 399.017355] RBP: ffffa8b6aa41bc40 R08: 0000000000000000 R09: 0000000000000000 [ 399.102763] R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000362c0 [ 399.188169] R13: ffff9203c0435a40 R14: ffffa8b68ccd4f00 R15: ffff92034792a980 [ 399.273576] FS: 0000000000000000(0000) GS:ffff92223f980000(0000) knlGS:0000000000000000 [ 399.370426] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 399.439191] CR2: 00007fbe536030c0 CR3: 0000002bdcc36002 CR4: 00000000003706e0 [ 399.524599] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 399.610013] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 399.695420] Call Trace: [ 399.724660] <IRQ> [ 399.748700] ? die+0x37/0x38 [ 399.783143] ? do_trap+0xf7/0x38 [ 399.821756] ? rds_inc_put+0x3d/0x40 [rds] [ 399.870780] ? do_error_trap+0x6a/0x38 [ 399.915616] ? rds_inc_put+0x3d/0x40 [rds] [ 399.964639] ? exc_invalid_op+0x52/0x2c [ 400.010522] ? rds_inc_put+0x3d/0x40 [rds] [ 400.059542] ? asm_exc_invalid_op+0x1a/0x2c [ 400.109592] ? rds_inc_put+0x3d/0x40 [rds] [ 400.158618] rds_ib_recv_cqe_handler+0xe4/0x390 [rds_rdma] [ 400.224283] poll_rcq+0x84/0xc0 [rds_rdma] [ 400.273295] rds_ib_rx+0xad/0x260 [rds_rdma] [ 400.324386] rds_ib_tasklet_fn_recv+0x30/0x40 [rds_rdma] [ 400.387957] tasklet_action_common.constprop.0+0x140/0xf0 [ 400.452568] __do_softirq+0xd4/0x2c [ 400.494289] __irq_exit_rcu+0xc8/0x1b8 [ 400.539131] common_interrupt+0x84/0x2c [ 400.585011] </IRQ> [ 400.610085] <TASK> [ 400.635158] asm_common_interrupt+0x26/0x2c [ 400.685202] RIP: 0010:cpuidle_enter_state+0xcc/0x2c [ 400.743567] Code: 6a 3a 2c ff e8 f5 f2 ff ff 8b 53 04 49 89 c5 0f 1f 44 00 00 31 ff e8 33 13 2b ff 45 84 ff 0f 85 4d 02 00 00 fb 0f 1f 44 00 00 <45> 85 f6 0f 88 7c 01 00 00 49 63 d6 4c 2b 2c 24 48 8d 04 52 48 8d [ 400.968362] RSP: 0018:ffffa8b688237e80 EFLAGS: 00000246 [ 401.030885] RAX: 0000000000000000 RBX: ffffc8967f982128 RCX: 0000000000000000 [ 401.116291] RDX: 000000000000001c RSI: 0000000000000000 RDI: 0000000000000000 [ 401.201699] RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000000 [ 401.287108] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffb68c4dc0 [ 401.372512] R13: 0000005cb363ad51 R14: 0000000000000002 R15: 0000000000000000 [ 401.457925] cpuidle_enter+0x2d/0x10 [ 401.500686] cpuidle_idle_call+0x108/0x7 [ 401.547606] do_idle+0x80/0x8 [ 401.583085] cpu_startup_entry+0x1d/0xb [ 401.628968] start_secondary+0x11e/0x38 [ 401.674848] secondary_startup_64_no_verify+0x17e/0x8 [ 401.735294] </TASK> Orabug: 35836155 Fixes: 606495f9b842 ("rds/ib: Replace cmpxchg_double with try_cmpxchg128") Suggested-by: Gerd Rausch <gerd.rausch@oracle.com> Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com> Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com>

rds_rdma testing often loads/unloads the module several times which leads to an RDS connection destroy not seen during production. A small window exists where a module unload (and connection destroy) can occur immediately after connection establishment, but before a heartbeat handshake completes, so the worker thread remains uncancelled after the connection is destroyed. This code change to cancel any pending worker threads is safe even when heartbeats are disabled via: sysctl net.rds.conn_heartbeat_timeout_secs=0 as there is no penalty to call cancel_delayed_work_sync() with no items in the delayed_work queue. [ 601.460085] general protection fault, probably for non-canonical address 0xffff20e8871f4d08: 0000 [#1] SMP PTI [ 601.471262] CPU: 15 PID: 0 Comm: swapper/15 Kdump: loaded Tainted: G S W 5.15.0-200.131.26.connreap.el8uek.v1.x86_64 #2 [ 601.484563] Hardware name: Oracle Corporation ORACLE SERVER X5-2/ASM,MOTHERBOARD,1U, BIOS 30300200 07/10/2019 [ 601.495634] RIP: 0010:__queue_work+0xde/0x40a [ 601.500504] Code: 8b 37 40 f6 c6 04 75 cf 48 c1 ee 05 81 fe ff ff ff 7f 0f 84 99 00 00 00 48 c7 c7 50 0c c7 95 48 63 f6 e8 55 29 4f 00 48 89 c7 <48> 8b 03 48 85 ff 0f 84 c0 02 00 00 48 39 f8 74 79 48 89 7c 24 08 [ 601.521460] RSP: 0018:ffffb5474c8d4e78 EFLAGS: 00010046 [ 601.527294] RAX: ffff9093bfbf1500 RBX: ffff20e8871f4d08 RCX: 0000000000000000 [ 601.535264] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9093bfbf1500 [ 601.543231] RBP: 000000000000003f R08: 0000000000000000 R09: 0000000000000000 [ 601.551197] R10: 0000000000000000 R11: 0000000000000000 R12: 000000000000000f [ 601.559167] R13: 000000000002e308 R14: ffff9054c7634c00 R15: ffff9054e5a8f208 [ 601.567136] FS: 0000000000000000(0000) GS:ffff9093bfbc0000(0000) knlGS:0000000000000000 [ 601.576168] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 601.582585] CR2: 000055b92cebf000 CR3: 000000178a010003 CR4: 00000000001706e0 [ 601.590549] Call Trace: [ 601.593279] <IRQ> [ 601.595526] ? show_trace_log_lvl+0x1d6/0x2f9 [ 601.600394] ? show_trace_log_lvl+0x1d6/0x2f9 [ 601.605255] ? call_timer_fn+0x27/0xff [ 601.609441] ? __die_body.cold+0x8/0xa [ 601.613625] ? die_addr+0x39/0x53 [ 601.617327] ? exc_general_protection+0x1c4/0x3e9 [ 601.622583] ? asm_exc_general_protection+0x22/0x27 [ 601.628034] ? __queue_work+0xde/0x40a [ 601.632221] ? __queue_work+0xdb/0x40a [ 601.636398] ? queue_work_node+0x110/0x105 [ 601.640973] call_timer_fn+0x27/0xff [ 601.644973] __run_timers+0x1bd/0x299 [ 601.649064] run_timer_softirq+0x19/0x2d [ 601.653442] __do_softirq+0xd0/0x2a5 [ 601.657442] ? sched_clock_cpu+0x9/0xb6 [ 601.661730] __irq_exit_rcu+0xc7/0xf1 [ 601.665829] sysvec_apic_timer_interrupt+0x72/0x89 [ 601.671186] </IRQ> [ 601.673526] <TASK> [ 601.675867] asm_sysvec_apic_timer_interrupt+0x16/0x1b [ 601.681609] RIP: 0010:cpuidle_enter_state+0xc7/0x35d Orabug: 35954530 Fixes: fbf83fabd8fb ("net/rds: Quiesce heartbeat worker in rds_conn_path_destroy()") Tested-by: Jenny Xu <jenny.x.xu@oracle.com> Signed-off-by: Sharath Srinivasan <sharath.srinivasan@oracle.com> Reviewed-by: Gerd Rausch <gerd.rausch@oracle.com> Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com>

After updating lfstack-code to use try_cmpxchg128() as a replacement for cmpxchg_double(), we introduced errors caused by variables being read twice and possibly being changed in between the reads causing wrong values to be inserted in the list-structure. The following crash is hit when attempting a basic rds-stress [ 398.146678] kernel BUG at net/rds/recv.c:97! [ 398.197773] invalid opcode: 0000 [#1] PREEMPT SMP PTI [ 398.258218] CPU: 28 PID: 0 Comm: swapper/28 Kdump: loaded Not tainted 6.5.0-2135.20230914050031.el8uek.rc1.x86_64 #2 [ 398.384196] Hardware name: Oracle Corporation ORACLE SERVER X6-2/ASM,MOTHERBOARD,1U, BIOS 38320100 04/15/2020 [ 398.502889] RIP: 0010:rds_inc_put+0x3d/0x40 [rds] [ 398.559219] Code: 08 48 8d 47 08 48 39 c2 75 20 48 8b 47 18 48 8b 40 50 48 8b 80 e0 00 00 00 e9 ff c4 e4 f3 cc 31 c0 89 c2 89 c7 c3 cc cc cc cc <0f> 0b 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 [ 398.784014] RSP: 0018:ffffa8b68ccd4e70 EFLAGS: 00010206 [ 398.846541] RAX: ffff920363351120 RBX: ffff92034792a000 RCX: 0000000000000000 [ 398.931948] RDX: ffff9222e14df270 RSI: 0000000000000000 RDI: ffff920363351118 [ 399.017355] RBP: ffffa8b6aa41bc40 R08: 0000000000000000 R09: 0000000000000000 [ 399.102763] R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000362c0 [ 399.188169] R13: ffff9203c0435a40 R14: ffffa8b68ccd4f00 R15: ffff92034792a980 [ 399.273576] FS: 0000000000000000(0000) GS:ffff92223f980000(0000) knlGS:0000000000000000 [ 399.370426] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 399.439191] CR2: 00007fbe536030c0 CR3: 0000002bdcc36002 CR4: 00000000003706e0 [ 399.524599] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 399.610013] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 399.695420] Call Trace: [ 399.724660] <IRQ> [ 399.748700] ? die+0x37/0x38 [ 399.783143] ? do_trap+0xf7/0x38 [ 399.821756] ? rds_inc_put+0x3d/0x40 [rds] [ 399.870780] ? do_error_trap+0x6a/0x38 [ 399.915616] ? rds_inc_put+0x3d/0x40 [rds] [ 399.964639] ? exc_invalid_op+0x52/0x2c [ 400.010522] ? rds_inc_put+0x3d/0x40 [rds] [ 400.059542] ? asm_exc_invalid_op+0x1a/0x2c [ 400.109592] ? rds_inc_put+0x3d/0x40 [rds] [ 400.158618] rds_ib_recv_cqe_handler+0xe4/0x390 [rds_rdma] [ 400.224283] poll_rcq+0x84/0xc0 [rds_rdma] [ 400.273295] rds_ib_rx+0xad/0x260 [rds_rdma] [ 400.324386] rds_ib_tasklet_fn_recv+0x30/0x40 [rds_rdma] [ 400.387957] tasklet_action_common.constprop.0+0x140/0xf0 [ 400.452568] __do_softirq+0xd4/0x2c [ 400.494289] __irq_exit_rcu+0xc8/0x1b8 [ 400.539131] common_interrupt+0x84/0x2c [ 400.585011] </IRQ> [ 400.610085] <TASK> [ 400.635158] asm_common_interrupt+0x26/0x2c [ 400.685202] RIP: 0010:cpuidle_enter_state+0xcc/0x2c [ 400.743567] Code: 6a 3a 2c ff e8 f5 f2 ff ff 8b 53 04 49 89 c5 0f 1f 44 00 00 31 ff e8 33 13 2b ff 45 84 ff 0f 85 4d 02 00 00 fb 0f 1f 44 00 00 <45> 85 f6 0f 88 7c 01 00 00 49 63 d6 4c 2b 2c 24 48 8d 04 52 48 8d [ 400.968362] RSP: 0018:ffffa8b688237e80 EFLAGS: 00000246 [ 401.030885] RAX: 0000000000000000 RBX: ffffc8967f982128 RCX: 0000000000000000 [ 401.116291] RDX: 000000000000001c RSI: 0000000000000000 RDI: 0000000000000000 [ 401.201699] RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000000 [ 401.287108] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffb68c4dc0 [ 401.372512] R13: 0000005cb363ad51 R14: 0000000000000002 R15: 0000000000000000 [ 401.457925] cpuidle_enter+0x2d/0x10 [ 401.500686] cpuidle_idle_call+0x108/0x7 [ 401.547606] do_idle+0x80/0x8 [ 401.583085] cpu_startup_entry+0x1d/0xb [ 401.628968] start_secondary+0x11e/0x38 [ 401.674848] secondary_startup_64_no_verify+0x17e/0x8 [ 401.735294] </TASK> Orabug: 35836155 Fixes: 606495f9b842 ("rds/ib: Replace cmpxchg_double with try_cmpxchg128") Suggested-by: Gerd Rausch <gerd.rausch@oracle.com> Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com> Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com>

rds_rdma testing often loads/unloads the module several times which leads to an RDS connection destroy not seen during production. A small window exists where a module unload (and connection destroy) can occur immediately after connection establishment, but before a heartbeat handshake completes, so the worker thread remains uncancelled after the connection is destroyed. This code change to cancel any pending worker threads is safe even when heartbeats are disabled via: sysctl net.rds.conn_heartbeat_timeout_secs=0 as there is no penalty to call cancel_delayed_work_sync() with no items in the delayed_work queue. [ 601.460085] general protection fault, probably for non-canonical address 0xffff20e8871f4d08: 0000 [#1] SMP PTI [ 601.471262] CPU: 15 PID: 0 Comm: swapper/15 Kdump: loaded Tainted: G S W 5.15.0-200.131.26.connreap.el8uek.v1.x86_64 #2 [ 601.484563] Hardware name: Oracle Corporation ORACLE SERVER X5-2/ASM,MOTHERBOARD,1U, BIOS 30300200 07/10/2019 [ 601.495634] RIP: 0010:__queue_work+0xde/0x40a [ 601.500504] Code: 8b 37 40 f6 c6 04 75 cf 48 c1 ee 05 81 fe ff ff ff 7f 0f 84 99 00 00 00 48 c7 c7 50 0c c7 95 48 63 f6 e8 55 29 4f 00 48 89 c7 <48> 8b 03 48 85 ff 0f 84 c0 02 00 00 48 39 f8 74 79 48 89 7c 24 08 [ 601.521460] RSP: 0018:ffffb5474c8d4e78 EFLAGS: 00010046 [ 601.527294] RAX: ffff9093bfbf1500 RBX: ffff20e8871f4d08 RCX: 0000000000000000 [ 601.535264] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9093bfbf1500 [ 601.543231] RBP: 000000000000003f R08: 0000000000000000 R09: 0000000000000000 [ 601.551197] R10: 0000000000000000 R11: 0000000000000000 R12: 000000000000000f [ 601.559167] R13: 000000000002e308 R14: ffff9054c7634c00 R15: ffff9054e5a8f208 [ 601.567136] FS: 0000000000000000(0000) GS:ffff9093bfbc0000(0000) knlGS:0000000000000000 [ 601.576168] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 601.582585] CR2: 000055b92cebf000 CR3: 000000178a010003 CR4: 00000000001706e0 [ 601.590549] Call Trace: [ 601.593279] <IRQ> [ 601.595526] ? show_trace_log_lvl+0x1d6/0x2f9 [ 601.600394] ? show_trace_log_lvl+0x1d6/0x2f9 [ 601.605255] ? call_timer_fn+0x27/0xff [ 601.609441] ? __die_body.cold+0x8/0xa [ 601.613625] ? die_addr+0x39/0x53 [ 601.617327] ? exc_general_protection+0x1c4/0x3e9 [ 601.622583] ? asm_exc_general_protection+0x22/0x27 [ 601.628034] ? __queue_work+0xde/0x40a [ 601.632221] ? __queue_work+0xdb/0x40a [ 601.636398] ? queue_work_node+0x110/0x105 [ 601.640973] call_timer_fn+0x27/0xff [ 601.644973] __run_timers+0x1bd/0x299 [ 601.649064] run_timer_softirq+0x19/0x2d [ 601.653442] __do_softirq+0xd0/0x2a5 [ 601.657442] ? sched_clock_cpu+0x9/0xb6 [ 601.661730] __irq_exit_rcu+0xc7/0xf1 [ 601.665829] sysvec_apic_timer_interrupt+0x72/0x89 [ 601.671186] </IRQ> [ 601.673526] <TASK> [ 601.675867] asm_sysvec_apic_timer_interrupt+0x16/0x1b [ 601.681609] RIP: 0010:cpuidle_enter_state+0xc7/0x35d Orabug: 35954530 Fixes: fbf83fabd8fb ("net/rds: Quiesce heartbeat worker in rds_conn_path_destroy()") Tested-by: Jenny Xu <jenny.x.xu@oracle.com> Signed-off-by: Sharath Srinivasan <sharath.srinivasan@oracle.com> Reviewed-by: Gerd Rausch <gerd.rausch@oracle.com> Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com>

One of our customers reported the following stack. crash-7.3.0> bt PID: 250515 TASK: ffff888189482f80 CPU: 1 COMMAND: "vmbackup" #0 [ffffc90025017878] die at ffffffff81033c22 #1 [ffffc900250178a8] do_trap at ffffffff81030990 #2 [ffffc900250178f8] do_error_trap at ffffffff810311d7 #3 [ffffc900250179c0] do_invalid_op at ffffffff81031310 #4 [ffffc900250179d0] invalid_op at ffffffff81a01f2a [exception RIP: ocfs2_truncate_rec+1914] RIP: ffffffffc1e73b4a RSP: ffffc90025017a80 RFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000053a75 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff8882d385be08 RDI: ffff8882d385be08 RBP: ffffc90025017b10 R8: 0000000000000000 R9: 0000000000005900 R10: 0000000000000001 R11: 0000000000aaaaaa R12: 0000000000000001 R13: ffff88829e5a9900 R14: ffffc90025017cf0 R15: 0000000000000000 ORIG_RAX: ffffffffffffffff CS: e030 SS: e02b #5 [ffffc90025017b18] ocfs2_remove_extent at ffffffffc1e73e6c [ocfs2] #6 [ffffc90025017bc8] ocfs2_remove_btree_range at ffffffffc1e745f2 [ocfs2] #7 [ffffc90025017c60] ocfs2_commit_truncate at ffffffffc1e75b1f [ocfs2] #8 [ffffc90025017d68] __dta_ocfs2_wipe_inode_606 at ffffffffc1e9a3e0 [ocfs2] #9 [ffffc90025017dd8] ocfs2_evict_inode at ffffffffc1e9ac10 [ocfs2] RIP: 00007f9b26ec8307 RSP: 00007ffc5a193f68 RFLAGS: 00000246 RAX: ffffffffffffffda RBX: 0000000000ddd0a0 RCX: 00007f9b26ec8307 RDX: 0000000000000001 RSI: 00007f9b2719e770 RDI: 0000000001010400 RBP: 0000000001263d80 R8: 0000000000000000 R9: 00000000012146a0 R10: 000000000000000d R11: 0000000000000246 R12: 0000000000ddd0a0 R13: 00007f9b27ba9595 R14: 00007f9b27ca4a50 R15: 00000000ffffffff ORIG_RAX: 0000000000000057 CS: 0033 SS: 002b crash-7.3.0> This crash resulted due to invalid extent record selected for truncate. At the top of the function ocfs2_truncate_rec(), the code checks if the first extent record at the leaf extent list corresponding to the input path is still empty. In that case the tree is rotated left to get rid of the empty extent record but this rotation did not happen. But the function ocfs2_truncate_rec() assumes that the top level call to ocfs2_rotate_tree_left() to get rid of the empty extent always succeeds and hence it decrements the input "index" value. This results in selection of a wrong record for truncate that causes to hit a call to BUG() with the message "Owner %llu: Invalid record truncate: (%u, %u) ". The stack above is the panic stack caused due to hitting BUG(). Though the function ocfs2_rotate_tree_left() was intended to get rid of the first empty record in the extent block, it did not call the function ocfs2_rotate_rightmost_leaf_left() as it did not find h_next_leaf_blk in the extentleaf block to be zero, instead, it proceeded to call __ocfs2_rotate_tree_left(). However the input "index" value was indeed pointing to the last extent record in the leaf block. The macro path_leaf_bh() was returning rightmost extent block as per the tree-depth. and the function ocfs2_find_cpos_for_right_leaf() also found out that the extent block in question is indeed the rightmost and hence there is nothing to rotate at the last extent record pointed by the input "index" value. Hence the extent tree in the leaf block was not totated at all. Hence, the real reason for the above panic is that the value of the field h_next_leaf_blk in the right most leaf block was non-zero that caused the tree not to rotate left resulting in selection of invalid record for truncate. The reason why h_next_leaf_blk was not cleared for the last extent block is still not known and the code changes here is a workaround to avoid the panic by verifying that the extent block in question is indeed the rightmost leaf block in the tree and then correcting the invalid h_next_leaf_blk value. These changes have been verified by the customer by running the provided rpm in their env. Orabug: 34393593 Signed-off-by: Gautham Ananthakrishna <gautham.ananthakrishna@oracle.com> Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com>

A SRQ inherits its parent PD's resource name in ib_create_srq_user(): rdma_restrack_new(&srq->res, RDMA_RESTRACK_SRQ); rdma_restrack_parent_name(&srq->res, &pd->res); But user PDs created via ib_uverbs_share_pd() aren't restracked causing the PD to not have any parent name, causing the following crash when we run "rdma res show srq" and so this patch adds the shpd to restrack. [ 189.099669] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 189.100707] #PF: supervisor read access in kernel mode [ 189.101504] #PF: error_code(0x0000) - not-present page [ 189.102357] PGD 0 P4D 0 [ 189.102801] Oops: 0000 [#1] SMP NOPTI [ 189.103413] CPU: 26 PID: 69041 Comm: rdma Kdump: loaded Not tainted 5.15.0-5.76.3.el8uek.x86_64 #2 [ 189.104758] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-2.module+el8.6.0+20659+3dcf7c70 04/01/2014 [ 189.106359] RIP: 0010:strlen+0x0/0x24 [ 189.106994] Code: 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee 31 d2 89 d1 89 d6 89 d7 41 89 d0 c3 cc cc cc cc 0f 1f 84 00 00 00 00 00 <80> 3f 00 74 16 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 31 ff [ 189.109828] RSP: 0018:ffffa2f2b409b808 EFLAGS: 00010246 [ 189.110684] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000000000 [ 189.111790] RDX: 0000000000000000 RSI: ffff93dca8f46448 RDI: 0000000000000000 [ 189.112943] RBP: ffff93f8091b2500 R08: 0000000000000000 R09: ffff93f8090750b4 [ 189.114102] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 189.115279] R13: ffff93f809075088 R14: ffff93f8067e46a8 R15: 0000000000000000 [ 189.116434] FS: 00007fe7c9707540(0000) GS:ffff9416c2800000(0000) knlGS:0000000000000000 [ 189.117753] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 189.118683] CR2: 0000000000000000 CR3: 000000240eebc004 CR4: 0000000000770ee0 [ 189.119857] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 189.121029] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 189.122198] PKRU: 55555554 [ 189.122676] Call Trace: [ 189.123114] <TASK> [ 189.123474] fill_res_name_pid+0x31/0xb0 [ib_core] [ 189.124217] res_get_common_dumpit+0x38f/0x540 [ib_core] [ 189.125045] ? fill_res_srq_qps+0x210/0x210 [ib_core] [ 189.125930] netlink_dump+0x18b/0x307 [ 189.126511] __netlink_dump_start+0x1f2/0x2d9 [ 189.127145] rdma_nl_rcv_msg+0x1d4/0x210 [ib_core] [ 189.127954] ? res_get_common_dumpit+0x540/0x540 [ib_core] [ 189.128871] rdma_nl_rcv+0xaa/0x100 [ib_core] [ 189.129616] netlink_unicast+0x213/0x2ce [ 189.130284] netlink_sendmsg+0x24f/0x4d9 [ 189.130941] sock_sendmsg+0x65/0x6a [ 189.131547] __sys_sendto+0x128/0x19b [ 189.132189] __x64_sys_sendto+0x20/0x35 [ 189.132832] do_syscall_64+0x38/0x8d [ 189.133451] entry_SYSCALL_64_after_hwframe+0x63/0x0 [ 189.134292] RIP: 0033:0x7fe7c87bc3ab [ 189.134906] Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f3 0f 1e fa 48 8d 05 f5 41 29 00 41 89 ca 8b 00 85 c0 75 14 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 75 c3 0f 1f 40 00 41 57 4d 89 c7 41 56 41 89 [ 189.137790] RSP: 002b:00007fffc9e324a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 189.139019] RAX: ffffffffffffffda RBX: 00007fffc9e32750 RCX: 00007fe7c87bc3ab [ 189.140153] RDX: 0000000000000018 RSI: 0000558d21de1920 RDI: 0000000000000004 [ 189.141332] RBP: 0000000000000017 R08: 00007fe7c8c5c480 R09: 000000000000000c [ 189.142470] R10: 0000000000000000 R11: 0000000000000246 R12: 0000558d2120e850 [ 189.143631] R13: 00007fffc9e32770 R14: 0000000000000000 R15: 0000000000000000 [ 189.144785] </TASK> and so with the fix: # rdma res show pd ... dev mlx5_0 pdn 42 local_dma_lkey 0x0 users 12 ctxn 36 pid 87599 comm ora_ipc0_dbm051 dev mlx5_0 pdn 43 local_dma_lkey 0x0 users 4 ctxn 36 pid 87599 comm ora_ipc0_dbm051 ... we now see correct pdns, process names for the SRQs and no kernel crash: # rdma res show srq dev mlx5_0 srqn 1 type BASIC lqpn 2448 pdn 42 pid 87599 comm ora_ipc0_dbm051 dev mlx5_0 srqn 3 type XRC pdn 42 cqn 2081 pid 87599 comm ora_ipc0_dbm051 dev mlx5_0 srqn 4 type XRC pdn 42 cqn 2081 pid 87599 comm ora_ipc0_dbm051 dev mlx5_0 srqn 5 type XRC pdn 43 cqn 2083 pid 87599 comm ora_ipc0_dbm051 dev mlx5_0 srqn 6 type XRC pdn 43 cqn 2083 pid 87599 comm ora_ipc0_dbm051 ... Orabug: 34812519 Fixes: b09c4d7 ("RDMA/restrack: Improve readability in task name management") Fixes: 86133a24cbd8 ("IB/Shared PD support from Oracle") Signed-off-by: Sharath Srinivasan <sharath.srinivasan@oracle.com> Reviewed-by: Gerd Rausch <gerd.rausch@oracle.com> Reviewed-by: Qing Huang <qing.huang@oracle.com>

Add a check to mlx5e_xmit() for shorter frames. A corrupted/malformed packet, with shorter length can eventually cause system panic further down in the code path. Avoid it by validating the length and dropping it at the earliest. Following is seen in our env with shorter skb->len crash> bt PID: 76981 TASK: ff19828cfe508000 CPU: 106 COMMAND: "vhost-76942" #0 [ff2d20159b39f2c8] machine_kexec at ffffffffad884801 #1 [ff2d20159b39f328] __crash_kexec at ffffffffad976142 #2 [ff2d20159b39f3f8] panic at ffffffffad8b3640 #3 [ff2d20159b39f4a0] no_context at ffffffffad8954e1 #4 [ff2d20159b39f518] __bad_area_nosemaphore at ffffffffad8958de #5 [ff2d20159b39f578] bad_area_nosemaphore at ffffffffad895a96 #6 [ff2d20159b39f588] do_kern_addr_fault at ffffffffad89688e #7 [ff2d20159b39f5b0] __do_page_fault at ffffffffad896b30 #8 [ff2d20159b39f618] do_page_fault at ffffffffad896db6 #9 [ff2d20159b39f650] page_fault at ffffffffae402acd [exception RIP: memcpy_erms+6] RIP: ffffffffae261ab6 RSP: ff2d20159b39f700 RFLAGS: 00010293 RAX: ff198291741ecf2e RBX: ff19828e70d6a100 RCX: fffffffffea1af2b RDX: fffffffffffffffd RSI: ff19828eba6d7e5e RDI: ff198291757d2000 RBP: ff2d20159b39f760 R8: ff198291741ecf00 R9: 000000000000037c R10: 000000000000003c R11: ff19828ffe953940 R12: ff198291741ecf20 R13: ff198267dcb1b600 R14: ff19828eeebb09c0 R15: ff198291741ecf00 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #10 [ff2d20159b39f700] mlx5e_sq_xmit_wqe at ffffffffc05c162e [mlx5_core] #11 [ff2d20159b39f768] mlx5e_xmit at ffffffffc05c1ca3 [mlx5_core] #12 [ff2d20159b39f800] dev_hard_start_xmit at ffffffffae083766 #13 [ff2d20159b39f860] sch_direct_xmit at ffffffffae0e2564 #14 [ff2d20159b39f8b0] __qdisc_run at ffffffffae0e294e #15 [ff2d20159b39f928] __dev_queue_xmit at ffffffffae083eee #16 [ff2d20159b39f9a8] dev_queue_xmit at ffffffffae084370 #17 [ff2d20159b39f9b8] vlan_dev_hard_start_xmit at ffffffffc2fb6fec [8021q] #18 [ff2d20159b39f9d8] dev_hard_start_xmit at ffffffffae083766 #19 [ff2d20159b39fa38] __dev_queue_xmit at ffffffffae08416a #20 [ff2d20159b39fab8] dev_queue_xmit_accel at ffffffffae08438e #21 [ff2d20159b39fac8] macvlan_start_xmit at ffffffffc2fc18d9 [macvlan] #22 [ff2d20159b39faf0] dev_hard_start_xmit at ffffffffae083766 #23 [ff2d20159b39fb50] sch_direct_xmit at ffffffffae0e2564 #24 [ff2d20159b39fba0] __qdisc_run at ffffffffae0e294e #25 [ff2d20159b39fc18] __dev_queue_xmit at ffffffffae083c81 #26 [ff2d20159b39fc90] dev_queue_xmit at ffffffffae084370 #27 [ff2d20159b39fca0] tap_sendmsg at ffffffffc07206ed [tap] #28 [ff2d20159b39fd20] vhost_tx_batch at ffffffffc2fd6590 [vhost_net] #29 [ff2d20159b39fd68] handle_tx_copy at ffffffffc2fd70f3 [vhost_net] #30 [ff2d20159b39fe80] handle_tx at ffffffffc2fd7651 [vhost_net] #31 [ff2d20159b39feb0] handle_tx_kick at ffffffffc2fd76b5 [vhost_net] #32 [ff2d20159b39fec0] vhost_worker at ffffffffc12a5be8 [vhost] #33 [ff2d20159b39ff08] kthread at ffffffffad8dbfe5 #34 [ff2d20159b39ff50] ret_from_fork at ffffffffae400364 This change was discussed with Nvidia and they are in agreement. Orabug: 36879156 CVE: CVE-2024-41090 CVE: CVE-2024-41091 Fixes: e4cf27b ("net/mlx5e: Re-eanble client vlan TX acceleration") Reported-and-tested-by: Dongli Zhang <dongli.zhang@oracle.com> Signed-off-by: Manjunath Patil <manjunath.b.patil@oracle.com> Reviewed-by: Si-Wei Liu <si-wei.liu@oracle.com> Reviewed-by: Jack Vogel <jack.vogel@oracle.com> Signed-off-by: Brian Maly <brian.maly@oracle.com> (cherry picked from commit 0dd4b99) Orabug: 36879126 CVE: CVE-2024-41090 CVE: CVE-2024-41091 Signed-off-by: Harshvardhan Jha <harshvardhan.j.jha@oracle.com> Reviewed-by: Vijayendra Suman <vijayendra.suman@oracle.com>

Distribute the channels between the different SD-devices to acheive local numa node performance on multiple numas. Each channel works against one specific mdev, creating all datapath queues against it. We distribute channels to mdevs in a round-robin policy. Example for 2 mdevs and 6 channels: +-------+---------+ | ch ix | mdev ix | +-------+---------+ | 0 | 0 | | 1 | 1 | | 2 | 0 | | 3 | 1 | | 4 | 0 | | 5 | 1 | +-------+---------+ This round-robin distribution policy is preferred over another suggested intuitive distribution, in which we first distribute one half of the channels to mdev #0 and then the second half to mdev #1. We prefer round-robin for a reason: it is less influenced by changes in the number of channels. The mapping between channel index and mdev is fixed, no matter how many channels the user configures. As the channel stats are persistent to channels closure, changing the mapping every single time would turn the accumulative stats less representing of the channel's history. Per-channel objects should stop using the primary mdev (priv->mdev) directly, and instead move to using their own channel's mdev. Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Reviewed-by: Gal Pressman <gal@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com> Orabug: 37710815 (cherry picked from commit 67936e1) cherry-pick-repo=kernel/git/torvalds/linux.git Conflicts: drivers/net/ethernet/mellanox/mlx5/core/en_main.c Small conflicts in a context caused by missing netif_napi related code. Signed-off-by: Mikhael Goikhman <migo@nvidia.com> Signed-off-by: Qing Huang <qing.huang@oracle.com> Reviewed-by: Sharath Srinivasan <sharath.srinivasan@oracle.com> Signed-off-by: Vijayendra Suman <vijayendra.suman@oracle.com>

mlx5e_suspend cleans resources only if netif_device_present() returns true. However, mlx5e_resume changes the state of netif, via mlx5e_nic_enable, only if reg_state == NETREG_REGISTERED. In the below case, the above leads to NULL-ptr Oops[1] and memory leaks: mlx5e_probe _mlx5e_resume mlx5e_attach_netdev mlx5e_nic_enable <-- netdev not reg, not calling netif_device_attach() register_netdev <-- failed for some reason. ERROR_FLOW: _mlx5e_suspend <-- netif_device_present return false, resources aren't freed :( Hence, clean resources in this case as well. [1] BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 0 P4D 0 Oops: 0010 [#1] SMP CPU: 2 PID: 9345 Comm: test-ovs-ct-gen Not tainted 6.5.0_for_upstream_min_debug_2023_09_05_16_01 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:0x0 Code: Unable to access opcode bytes at0xffffffffffffffd6. RSP: 0018:ffff888178aaf758 EFLAGS: 00010246 Call Trace: <TASK> ? __die+0x20/0x60 ? page_fault_oops+0x14c/0x3c0 ? exc_page_fault+0x75/0x140 ? asm_exc_page_fault+0x22/0x30 notifier_call_chain+0x35/0xb0 blocking_notifier_call_chain+0x3d/0x60 mlx5_blocking_notifier_call_chain+0x22/0x30 [mlx5_core] mlx5_core_uplink_netdev_event_replay+0x3e/0x60 [mlx5_core] mlx5_mdev_netdev_track+0x53/0x60 [mlx5_ib] mlx5_ib_roce_init+0xc3/0x340 [mlx5_ib] __mlx5_ib_add+0x34/0xd0 [mlx5_ib] mlx5r_probe+0xe1/0x210 [mlx5_ib] ? auxiliary_match_id+0x6a/0x90 auxiliary_bus_probe+0x38/0x80 ? driver_sysfs_add+0x51/0x80 really_probe+0xc9/0x3e0 ? driver_probe_device+0x90/0x90 __driver_probe_device+0x80/0x160 driver_probe_device+0x1e/0x90 __device_attach_driver+0x7d/0x100 bus_for_each_drv+0x80/0xd0 __device_attach+0xbc/0x1f0 bus_probe_device+0x86/0xa0 device_add+0x637/0x840 __auxiliary_device_add+0x3b/0xa0 add_adev+0xc9/0x140 [mlx5_core] mlx5_rescan_drivers_locked+0x22a/0x310 [mlx5_core] mlx5_register_device+0x53/0xa0 [mlx5_core] mlx5_init_one_devl_locked+0x5c4/0x9c0 [mlx5_core] mlx5_init_one+0x3b/0x60 [mlx5_core] probe_one+0x44c/0x730 [mlx5_core] local_pci_probe+0x3e/0x90 pci_device_probe+0xbf/0x210 ? kernfs_create_link+0x5d/0xa0 ? sysfs_do_create_link_sd+0x60/0xc0 really_probe+0xc9/0x3e0 ? driver_probe_device+0x90/0x90 __driver_probe_device+0x80/0x160 driver_probe_device+0x1e/0x90 __device_attach_driver+0x7d/0x100 bus_for_each_drv+0x80/0xd0 __device_attach+0xbc/0x1f0 pci_bus_add_device+0x54/0x80 pci_iov_add_virtfn+0x2e6/0x320 sriov_enable+0x208/0x420 mlx5_core_sriov_configure+0x9e/0x200 [mlx5_core] sriov_numvfs_store+0xae/0x1a0 kernfs_fop_write_iter+0x10c/0x1a0 vfs_write+0x291/0x3c0 ksys_write+0x5f/0xe0 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- Fixes: 2c3b5be ("net/mlx5e: More generic netdev management API") Signed-off-by: Shay Drory <shayd@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://lore.kernel.org/r/20240509112951.590184-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Orabug: 37710815 (cherry picked from commit 3d59184) cherry-pick-repo=kernel/git/torvalds/linux.git unmodified-from-upstream: 3d59184 Signed-off-by: Mikhael Goikhman <migo@nvidia.com> Signed-off-by: Qing Huang <qing.huang@oracle.com> Reviewed-by: Sharath Srinivasan <sharath.srinivasan@oracle.com> Signed-off-by: Vijayendra Suman <vijayendra.suman@oracle.com>

Command bitmask have a dedicated bit for MANAGE_PAGES command, this bit isn't Initialize during command bitmask Initialization, only during MANAGE_PAGES. In addition, mlx5_cmd_trigger_completions() is trying to trigger completion for MANAGE_PAGES command as well. Hence, in case health error occurred before any MANAGE_PAGES command have been invoke (for example, during mlx5_enable_hca()), mlx5_cmd_trigger_completions() will try to trigger completion for MANAGE_PAGES command, which will result in null-ptr-deref error.[1] Fix it by Initialize command bitmask correctly. While at it, re-write the code for better understanding. [1] BUG: KASAN: null-ptr-deref in mlx5_cmd_trigger_completions+0x1db/0x600 [mlx5_core] Write of size 4 at addr 0000000000000214 by task kworker/u96:2/12078 CPU: 10 PID: 12078 Comm: kworker/u96:2 Not tainted 6.9.0-rc2_for_upstream_debug_2024_04_07_19_01 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5_health0000:08:00.0 mlx5_fw_fatal_reporter_err_work [mlx5_core] Call Trace: <TASK> dump_stack_lvl+0x7e/0xc0 kasan_report+0xb9/0xf0 kasan_check_range+0xec/0x190 mlx5_cmd_trigger_completions+0x1db/0x600 [mlx5_core] mlx5_cmd_flush+0x94/0x240 [mlx5_core] enter_error_state+0x6c/0xd0 [mlx5_core] mlx5_fw_fatal_reporter_err_work+0xf3/0x480 [mlx5_core] process_one_work+0x787/0x1490 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? pwq_dec_nr_in_flight+0xda0/0xda0 ? assign_work+0x168/0x240 worker_thread+0x586/0xd30 ? rescuer_thread+0xae0/0xae0 kthread+0x2df/0x3b0 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x2d/0x70 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork_asm+0x11/0x20 </TASK> Fixes: 9b98d39 ("net/mlx5: Start health poll at earlier stage of driver load") Signed-off-by: Shay Drory <shayd@nvidia.com> Reviewed-by: Moshe Shemesh <moshe@nvidia.com> Reviewed-by: Saeed Mahameed <saeedm@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com> Orabug: 37710815 (cherry picked from commit d62b140) cherry-pick-repo=kernel/git/torvalds/linux.git unmodified-from-upstream: d62b140 Signed-off-by: Mikhael Goikhman <migo@nvidia.com> Signed-off-by: Qing Huang <qing.huang@oracle.com> Reviewed-by: Sharath Srinivasan <sharath.srinivasan@oracle.com> Signed-off-by: Vijayendra Suman <vijayendra.suman@oracle.com>

When cmd_alloc_index(), fails cmd_work_handler() needs to complete ent->slotted before returning early. Otherwise the task which issued the command may hang: mlx5_core 0000:01:00.0: cmd_work_handler:877:(pid 3880418): failed to allocate command entry INFO: task kworker/13:2:4055883 blocked for more than 120 seconds. Not tainted 4.19.90-25.44.v2101.ky10.aarch64 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/13:2 D 0 4055883 2 0x00000228 Workqueue: events mlx5e_tx_dim_work [mlx5_core] Call trace: __switch_to+0xe8/0x150 __schedule+0x2a8/0x9b8 schedule+0x2c/0x88 schedule_timeout+0x204/0x478 wait_for_common+0x154/0x250 wait_for_completion+0x28/0x38 cmd_exec+0x7a0/0xa00 [mlx5_core] mlx5_cmd_exec+0x54/0x80 [mlx5_core] mlx5_core_modify_cq+0x6c/0x80 [mlx5_core] mlx5_core_modify_cq_moderation+0xa0/0xb8 [mlx5_core] mlx5e_tx_dim_work+0x54/0x68 [mlx5_core] process_one_work+0x1b0/0x448 worker_thread+0x54/0x468 kthread+0x134/0x138 ret_from_fork+0x10/0x18 Fixes: 485d65e ("net/mlx5: Add a timeout to acquire the command queue semaphore") Signed-off-by: Chenguang Zhao <zhaochenguang@kylinos.cn> Reviewed-by: Moshe Shemesh <moshe@nvidia.com> Acked-by: Tariq Toukan <tariqt@nvidia.com> Link: https://patch.msgid.link/20250108030009.68520-1-zhaochenguang@kylinos.cn Signed-off-by: Jakub Kicinski <kuba@kernel.org> Orabug: 37710815 (cherry picked from commit 0e2909c) cherry-pick-repo=kernel/git/torvalds/linux.git unmodified-from-upstream: 0e2909c Signed-off-by: Mikhael Goikhman <migo@nvidia.com> Signed-off-by: Qing Huang <qing.huang@oracle.com> Reviewed-by: Sharath Srinivasan <sharath.srinivasan@oracle.com> Signed-off-by: Vijayendra Suman <vijayendra.suman@oracle.com>

Clear the port select structure on error so no stale values left after definers are destroyed. That's because the mlx5_lag_destroy_definers() always try to destroy all lag definers in the tt_map, so in the flow below lag definers get double-destroyed and cause kernel crash: mlx5_lag_port_sel_create() mlx5_lag_create_definers() mlx5_lag_create_definer() <- Failed on tt 1 mlx5_lag_destroy_definers() <- definers[tt=0] gets destroyed mlx5_lag_port_sel_create() mlx5_lag_create_definers() mlx5_lag_create_definer() <- Failed on tt 0 mlx5_lag_destroy_definers() <- definers[tt=0] gets double-destroyed Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Mem abort info: ESR = 0x0000000096000005 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x05: level 1 translation fault Data abort info: ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 64k pages, 48-bit VAs, pgdp=0000000112ce2e00 [0000000000000008] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP Modules linked in: iptable_raw bonding ip_gre ip6_gre gre ip6_tunnel tunnel6 geneve ip6_udp_tunnel udp_tunnel ipip tunnel4 ip_tunnel rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_fwctl(OE) fwctl(OE) mlx5_core(OE) mlxdevm(OE) ib_core(OE) mlxfw(OE) memtrack(OE) mlx_compat(OE) openvswitch nsh nf_conncount psample xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xfrm_user xfrm_algo xt_addrtype iptable_filter iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter bridge stp llc netconsole overlay efi_pstore sch_fq_codel zram ip_tables crct10dif_ce qemu_fw_cfg fuse ipv6 crc_ccitt [last unloaded: mlx_compat(OE)] CPU: 3 UID: 0 PID: 217 Comm: kworker/u53:2 Tainted: G OE 6.11.0+ #2 Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 Workqueue: mlx5_lag mlx5_do_bond_work [mlx5_core] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : mlx5_del_flow_rules+0x24/0x2c0 [mlx5_core] lr : mlx5_lag_destroy_definer+0x54/0x100 [mlx5_core] sp : ffff800085fafb00 x29: ffff800085fafb00 x28: ffff0000da0c8000 x27: 0000000000000000 x26: ffff0000da0c8000 x25: ffff0000da0c8000 x24: ffff0000da0c8000 x23: ffff0000c31f81a0 x22: 0400000000000000 x21: ffff0000da0c8000 x20: 0000000000000000 x19: 0000000000000001 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffff8b0c9350 x14: 0000000000000000 x13: ffff800081390d18 x12: ffff800081dc3cc0 x11: 0000000000000001 x10: 0000000000000b10 x9 : ffff80007ab7304c x8 : ffff0000d00711f0 x7 : 0000000000000004 x6 : 0000000000000190 x5 : ffff00027edb3010 x4 : 0000000000000000 x3 : 0000000000000000 x2 : ffff0000d39b8000 x1 : ffff0000d39b8000 x0 : 0400000000000000 Call trace: mlx5_del_flow_rules+0x24/0x2c0 [mlx5_core] mlx5_lag_destroy_definer+0x54/0x100 [mlx5_core] mlx5_lag_destroy_definers+0xa0/0x108 [mlx5_core] mlx5_lag_port_sel_create+0x2d4/0x6f8 [mlx5_core] mlx5_activate_lag+0x60c/0x6f8 [mlx5_core] mlx5_do_bond_work+0x284/0x5c8 [mlx5_core] process_one_work+0x170/0x3e0 worker_thread+0x2d8/0x3e0 kthread+0x11c/0x128 ret_from_fork+0x10/0x20 Code: a9025bf5 aa0003f6 a90363f7 f90023f9 (f9400400) ---[ end trace 0000000000000000 ]--- Fixes: dc48516 ("net/mlx5: Lag, add support to create definers for LAG") Signed-off-by: Mark Zhang <markzhang@nvidia.com> Reviewed-by: Leon Romanovsky <leonro@nvidia.com> Reviewed-by: Mark Bloch <mbloch@nvidia.com> Reviewed-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com> Orabug: 37710815 (cherry picked from commit 5641e82) cherry-pick-repo=kernel/git/torvalds/linux.git unmodified-from-upstream: 5641e82 Signed-off-by: Mikhael Goikhman <migo@nvidia.com> Signed-off-by: Qing Huang <qing.huang@oracle.com> Reviewed-by: Sharath Srinivasan <sharath.srinivasan@oracle.com> Signed-off-by: Vijayendra Suman <vijayendra.suman@oracle.com>

Attempt to enable IPsec packet offload in tunnel mode in debug kernel generates the following kernel panic, which is happening due to two issues: 1. In SA add section, the should be _bh() variant when marking SA mode. 2. There is not needed flush_workqueue in SA delete routine. It is not needed as at this stage as it is removed from SADB and the running work will be canceled later in SA free. ===================================================== WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected 6.12.0+ #4 Not tainted ----------------------------------------------------- charon/1337 [HC0[0]:SC0[4]:HE1:SE0] is trying to acquire: ffff88810f365020 (&xa->xa_lock#24){+.+.}-{3:3}, at: mlx5e_xfrm_del_state+0xca/0x1e0 [mlx5_core] and this task is already holding: ffff88813e0f0d48 (&x->lock){+.-.}-{3:3}, at: xfrm_state_delete+0x16/0x30 which would create a new lock dependency: (&x->lock){+.-.}-{3:3} -> (&xa->xa_lock#24){+.+.}-{3:3} but this new dependency connects a SOFTIRQ-irq-safe lock: (&x->lock){+.-.}-{3:3} ... which became SOFTIRQ-irq-safe at: lock_acquire+0x1be/0x520 _raw_spin_lock_bh+0x34/0x40 xfrm_timer_handler+0x91/0xd70 __hrtimer_run_queues+0x1dd/0xa60 hrtimer_run_softirq+0x146/0x2e0 handle_softirqs+0x266/0x860 irq_exit_rcu+0x115/0x1a0 sysvec_apic_timer_interrupt+0x6e/0x90 asm_sysvec_apic_timer_interrupt+0x16/0x20 default_idle+0x13/0x20 default_idle_call+0x67/0xa0 do_idle+0x2da/0x320 cpu_startup_entry+0x50/0x60 start_secondary+0x213/0x2a0 common_startup_64+0x129/0x138 to a SOFTIRQ-irq-unsafe lock: (&xa->xa_lock#24){+.+.}-{3:3} ... which became SOFTIRQ-irq-unsafe at: ... lock_acquire+0x1be/0x520 _raw_spin_lock+0x2c/0x40 xa_set_mark+0x70/0x110 mlx5e_xfrm_add_state+0xe48/0x2290 [mlx5_core] xfrm_dev_state_add+0x3bb/0xd70 xfrm_add_sa+0x2451/0x4a90 xfrm_user_rcv_msg+0x493/0x880 netlink_rcv_skb+0x12e/0x380 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x42f/0x740 netlink_sendmsg+0x745/0xbe0 __sock_sendmsg+0xc5/0x190 __sys_sendto+0x1fe/0x2c0 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&xa->xa_lock#24); local_irq_disable(); lock(&x->lock); lock(&xa->xa_lock#24); <Interrupt> lock(&x->lock); *** DEADLOCK *** 2 locks held by charon/1337: #0: ffffffff87f8f858 (&net->xfrm.xfrm_cfg_mutex){+.+.}-{4:4}, at: xfrm_netlink_rcv+0x5e/0x90 #1: ffff88813e0f0d48 (&x->lock){+.-.}-{3:3}, at: xfrm_state_delete+0x16/0x30 the dependencies between SOFTIRQ-irq-safe lock and the holding lock: -> (&x->lock){+.-.}-{3:3} ops: 29 { HARDIRQ-ON-W at: lock_acquire+0x1be/0x520 _raw_spin_lock_bh+0x34/0x40 xfrm_alloc_spi+0xc0/0xe60 xfrm_alloc_userspi+0x5f6/0xbc0 xfrm_user_rcv_msg+0x493/0x880 netlink_rcv_skb+0x12e/0x380 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x42f/0x740 netlink_sendmsg+0x745/0xbe0 __sock_sendmsg+0xc5/0x190 __sys_sendto+0x1fe/0x2c0 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 IN-SOFTIRQ-W at: lock_acquire+0x1be/0x520 _raw_spin_lock_bh+0x34/0x40 xfrm_timer_handler+0x91/0xd70 __hrtimer_run_queues+0x1dd/0xa60 hrtimer_run_softirq+0x146/0x2e0 handle_softirqs+0x266/0x860 irq_exit_rcu+0x115/0x1a0 sysvec_apic_timer_interrupt+0x6e/0x90 asm_sysvec_apic_timer_interrupt+0x16/0x20 default_idle+0x13/0x20 default_idle_call+0x67/0xa0 do_idle+0x2da/0x320 cpu_startup_entry+0x50/0x60 start_secondary+0x213/0x2a0 common_startup_64+0x129/0x138 INITIAL USE at: lock_acquire+0x1be/0x520 _raw_spin_lock_bh+0x34/0x40 xfrm_alloc_spi+0xc0/0xe60 xfrm_alloc_userspi+0x5f6/0xbc0 xfrm_user_rcv_msg+0x493/0x880 netlink_rcv_skb+0x12e/0x380 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x42f/0x740 netlink_sendmsg+0x745/0xbe0 __sock_sendmsg+0xc5/0x190 __sys_sendto+0x1fe/0x2c0 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 } ... key at: [<ffffffff87f9cd20>] __key.18+0x0/0x40 the dependencies between the lock to be acquired and SOFTIRQ-irq-unsafe lock: -> (&xa->xa_lock#24){+.+.}-{3:3} ops: 9 { HARDIRQ-ON-W at: lock_acquire+0x1be/0x520 _raw_spin_lock_bh+0x34/0x40 mlx5e_xfrm_add_state+0xc5b/0x2290 [mlx5_core] xfrm_dev_state_add+0x3bb/0xd70 xfrm_add_sa+0x2451/0x4a90 xfrm_user_rcv_msg+0x493/0x880 netlink_rcv_skb+0x12e/0x380 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x42f/0x740 netlink_sendmsg+0x745/0xbe0 __sock_sendmsg+0xc5/0x190 __sys_sendto+0x1fe/0x2c0 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 SOFTIRQ-ON-W at: lock_acquire+0x1be/0x520 _raw_spin_lock+0x2c/0x40 xa_set_mark+0x70/0x110 mlx5e_xfrm_add_state+0xe48/0x2290 [mlx5_core] xfrm_dev_state_add+0x3bb/0xd70 xfrm_add_sa+0x2451/0x4a90 xfrm_user_rcv_msg+0x493/0x880 netlink_rcv_skb+0x12e/0x380 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x42f/0x740 netlink_sendmsg+0x745/0xbe0 __sock_sendmsg+0xc5/0x190 __sys_sendto+0x1fe/0x2c0 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 INITIAL USE at: lock_acquire+0x1be/0x520 _raw_spin_lock_bh+0x34/0x40 mlx5e_xfrm_add_state+0xc5b/0x2290 [mlx5_core] xfrm_dev_state_add+0x3bb/0xd70 xfrm_add_sa+0x2451/0x4a90 xfrm_user_rcv_msg+0x493/0x880 netlink_rcv_skb+0x12e/0x380 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x42f/0x740 netlink_sendmsg+0x745/0xbe0 __sock_sendmsg+0xc5/0x190 __sys_sendto+0x1fe/0x2c0 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 } ... key at: [<ffffffffa078ff60>] __key.48+0x0/0xfffffffffff210a0 [mlx5_core] ... acquired at: __lock_acquire+0x30a0/0x5040 lock_acquire+0x1be/0x520 _raw_spin_lock_bh+0x34/0x40 mlx5e_xfrm_del_state+0xca/0x1e0 [mlx5_core] xfrm_dev_state_delete+0x90/0x160 __xfrm_state_delete+0x662/0xae0 xfrm_state_delete+0x1e/0x30 xfrm_del_sa+0x1c2/0x340 xfrm_user_rcv_msg+0x493/0x880 netlink_rcv_skb+0x12e/0x380 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x42f/0x740 netlink_sendmsg+0x745/0xbe0 __sock_sendmsg+0xc5/0x190 __sys_sendto+0x1fe/0x2c0 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 stack backtrace: CPU: 7 UID: 0 PID: 1337 Comm: charon Not tainted 6.12.0+ #4 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: <TASK> dump_stack_lvl+0x74/0xd0 check_irq_usage+0x12e8/0x1d90 ? print_shortest_lock_dependencies_backwards+0x1b0/0x1b0 ? check_chain_key+0x1bb/0x4c0 ? __lockdep_reset_lock+0x180/0x180 ? check_path.constprop.0+0x24/0x50 ? mark_lock+0x108/0x2fb0 ? print_circular_bug+0x9b0/0x9b0 ? mark_lock+0x108/0x2fb0 ? print_usage_bug.part.0+0x670/0x670 ? check_prev_add+0x1c4/0x2310 check_prev_add+0x1c4/0x2310 __lock_acquire+0x30a0/0x5040 ? lockdep_set_lock_cmp_fn+0x190/0x190 ? lockdep_set_lock_cmp_fn+0x190/0x190 lock_acquire+0x1be/0x520 ? mlx5e_xfrm_del_state+0xca/0x1e0 [mlx5_core] ? lockdep_hardirqs_on_prepare+0x400/0x400 ? __xfrm_state_delete+0x5f0/0xae0 ? lock_downgrade+0x6b0/0x6b0 _raw_spin_lock_bh+0x34/0x40 ? mlx5e_xfrm_del_state+0xca/0x1e0 [mlx5_core] mlx5e_xfrm_del_state+0xca/0x1e0 [mlx5_core] xfrm_dev_state_delete+0x90/0x160 __xfrm_state_delete+0x662/0xae0 xfrm_state_delete+0x1e/0x30 xfrm_del_sa+0x1c2/0x340 ? xfrm_get_sa+0x250/0x250 ? check_chain_key+0x1bb/0x4c0 xfrm_user_rcv_msg+0x493/0x880 ? copy_sec_ctx+0x270/0x270 ? check_chain_key+0x1bb/0x4c0 ? lockdep_set_lock_cmp_fn+0x190/0x190 ? lockdep_set_lock_cmp_fn+0x190/0x190 netlink_rcv_skb+0x12e/0x380 ? copy_sec_ctx+0x270/0x270 ? netlink_ack+0xd90/0xd90 ? netlink_deliver_tap+0xcd/0xb60 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x42f/0x740 ? netlink_attachskb+0x730/0x730 ? lock_acquire+0x1be/0x520 netlink_sendmsg+0x745/0xbe0 ? netlink_unicast+0x740/0x740 ? __might_fault+0xbb/0x170 ? netlink_unicast+0x740/0x740 __sock_sendmsg+0xc5/0x190 ? fdget+0x163/0x1d0 __sys_sendto+0x1fe/0x2c0 ? __x64_sys_getpeername+0xb0/0xb0 ? do_user_addr_fault+0x856/0xe30 ? lock_acquire+0x1be/0x520 ? __task_pid_nr_ns+0x117/0x410 ? lock_downgrade+0x6b0/0x6b0 __x64_sys_sendto+0xdc/0x1b0 ? lockdep_hardirqs_on_prepare+0x284/0x400 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x7f7d31291ba4 Code: 7d e8 89 4d d4 e8 4c 42 f7 ff 44 8b 4d d0 4c 8b 45 c8 89 c3 44 8b 55 d4 8b 7d e8 b8 2c 00 00 00 48 8b 55 d8 48 8b 75 e0 0f 05 <48> 3d 00 f0 ff ff 77 34 89 df 48 89 45 e8 e8 99 42 f7 ff 48 8b 45 RSP: 002b:00007f7d2ccd94f0 EFLAGS: 00000297 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f7d31291ba4 RDX: 0000000000000028 RSI: 00007f7d2ccd96a0 RDI: 000000000000000a RBP: 00007f7d2ccd9530 R08: 00007f7d2ccd9598 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000297 R12: 0000000000000028 R13: 00007f7d2ccd9598 R14: 00007f7d2ccd96a0 R15: 00000000000000e1 </TASK> Fixes: 4c24272 ("net/mlx5e: Listen to ARP events to update IPsec L2 headers in tunnel mode") Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com> Orabug: 37710815 (cherry picked from commit 2c36880) cherry-pick-repo=kernel/git/torvalds/linux.git unmodified-from-upstream: 2c36880 Signed-off-by: Mikhael Goikhman <migo@nvidia.com> Signed-off-by: Qing Huang <qing.huang@oracle.com> Reviewed-by: Sharath Srinivasan <sharath.srinivasan@oracle.com> Signed-off-by: Vijayendra Suman <vijayendra.suman@oracle.com>

[ 16.602634] BUG: kernel NULL pointer dereference, address: 000000000000023e [ 16.605935] #PF: supervisor read access in kernel mode [ 16.608212] #PF: error_code(0x0000) - not-present page [ 16.610394] PGD 0 P4D 0 [ 16.611862] Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI [ 16.614116] CPU: 1 UID: 0 PID: 1141 Comm: sha512hmac Tainted: GF OE 6.12.0-0.7.5fips.el9uek.v12.ol9.x86_64 #1 [ 16.618140] Tainted: [F]=FORCED_MODULE, [O]=OOT_MODULE, [E]=UNSIGNED_MODULE [ 16.620797] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.6.4 02/27/2023 [ 16.624072] RIP: 0010:netlink_unicast+0xf8/0x3a0 [ 16.626277] Code: df e8 2c c5 f9 ff 85 c0 0f 85 03 02 00 00 48 8d 54 24 08 48 89 e9 4c 89 e6 48 89 df e8 c1 fc ff ff 83 f8 01 0f 85 18 02 00 00 <0f> b7 85 3e 02 00 00 4c 8b 7d 30 48 8d 14 40 48 8d 1c 90 48 c1 e3 [ 16.633200] RSP: 0018:ffffb3a980d63958 EFLAGS: 00010202 [ 16.635681] RAX: 0000000000000000 RBX: 0000000000000040 RCX: 0000000000000000 [ 16.638819] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 16.641687] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 [ 16.644414] R10: 0000000000000000 R11: 0000004000000080 R12: ffff9020de5b6f00 [ 16.647480] R13: 0000000000000475 R14: 0000000000000001 R15: ffffffff9e4c2f40 [ 16.650658] FS: 00007f11f613c740(0000) GS:ffff90236fc80000(0000) knlGS:0000000000000000 [ 16.654165] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 16.656847] CR2: 000000000000023e CR3: 0000000100ce0000 CR4: 00000000003506f0 [ 16.659808] Call Trace: [ 16.661202] <TASK> [ 16.662570] ? srso_return_thunk+0x5/0x5f [ 16.664506] ? show_trace_log_lvl+0x255/0x300 [ 16.666589] ? show_trace_log_lvl+0x255/0x300 [ 16.668611] ? crypto_report+0xcc/0x13a [fips_crypto_user] [ 16.671092] ? __die_body.cold+0x8/0x17 [ 16.673003] ? page_fault_oops+0x160/0x16b [ 16.674997] ? exc_page_fault+0x73/0x180 [ 16.676917] ? asm_exc_page_fault+0x26/0x30 [ 16.678966] ? netlink_unicast+0xf8/0x3a0 [ 16.680896] ? netlink_unicast+0x52/0x3a0 [ 16.682935] crypto_report+0xcc/0x13a [fips_crypto_user] [ 16.685240] crypto_user_rcv_msg+0xd6/0x1f0 [fips_crypto_user] [ 16.687846] ? crypto_alloc_tfmmem.isra.0+0x28/0x60 [fips140] [ 16.690434] ? __pfx_crypto_user_rcv_msg+0x10/0x10 [fips_crypto_user] [ 16.693132] netlink_rcv_skb+0x53/0x110 [ 16.695043] crypto_netlink_rcv+0x28/0x40 [fips_crypto_user] [ 16.697495] netlink_unicast+0x250/0x3a0 [ 16.699480] netlink_sendmsg+0x21b/0x47f [ 16.701266] ____sys_sendmsg+0x3af/0x3e0 [ 16.703135] ? srso_return_thunk+0x5/0x5f [ 16.705151] ___sys_sendmsg+0x9a/0xf0 [ 16.706912] __sys_sendmsg+0x7a/0xe0 [ 16.708664] do_syscall_64+0x8c/0x1b0 [ 16.710413] ? arch_exit_to_user_mode_prepare.isra.0+0x1e/0xd0 [ 16.712917] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 16.715218] RIP: 0033:0x7f11f624ea97 [ 16.716898] Code: 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [ 16.724038] RSP: 002b:00007ffe6714b2b8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 16.727267] RAX: ffffffffffffffda RBX: 00005592acd469d0 RCX: 00007f11f624ea97 [ 16.730478] RDX: 0000000000000000 RSI: 00007ffe6714b2e0 RDI: 0000000000000004 [ 16.733559] RBP: 0000000000000004 R08: 000000000000000f R09: 0029323135616873 [ 16.736579] R10: 00007f11f62efc40 R11: 0000000000000246 R12: 00007ffe6714b460 [ 16.739661] R13: 0000559279c9e909 R14: 00007ffe6714b2e0 R15: 0000000000000010 [ 16.742802] </TASK> Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>

darakian added 2 commits April 13, 2018 13:21

Create License file

0ce75a7

Merge pull request #1 from darakian/add-license-1

940604a

Create License file

gregmarsden closed this Apr 13, 2018

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add GPL license file #1

Add GPL license file #1

darakian commented Apr 13, 2018

gregmarsden commented Apr 13, 2018

Add GPL license file #1

Add GPL license file #1

Conversation

darakian commented Apr 13, 2018

gregmarsden commented Apr 13, 2018