Skip to content

Commit 65aae79

Browse files
w1ldptrnathan-zcgao
authored andcommitted
net/mlx5e: Fix use-after-free of encap entry in neigh update handler
commit fb1a313 upstream. Function mlx5e_rep_neigh_update() wasn't updated to accommodate rtnl lock removal from TC filter update path and properly handle concurrent encap entry insertion/deletion which can lead to following use-after-free: [23827.464923] ================================================================== [23827.469446] BUG: KASAN: use-after-free in mlx5e_encap_take+0x72/0x140 [mlx5_core] [23827.470971] Read of size 4 at addr ffff8881d132228c by task kworker/u20:6/21635 [23827.472251] [23827.472615] CPU: 9 PID: 21635 Comm: kworker/u20:6 Not tainted 5.13.0-rc3+ #5 [23827.473788] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [23827.475639] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core] [23827.476731] Call Trace: [23827.477260] dump_stack+0xbb/0x107 [23827.477906] print_address_description.constprop.0+0x18/0x140 [23827.478896] ? mlx5e_encap_take+0x72/0x140 [mlx5_core] [23827.479879] ? mlx5e_encap_take+0x72/0x140 [mlx5_core] [23827.480905] kasan_report.cold+0x7c/0xd8 [23827.481701] ? mlx5e_encap_take+0x72/0x140 [mlx5_core] [23827.482744] kasan_check_range+0x145/0x1a0 [23827.493112] mlx5e_encap_take+0x72/0x140 [mlx5_core] [23827.494054] ? mlx5e_tc_tun_encap_info_equal_generic+0x140/0x140 [mlx5_core] [23827.495296] mlx5e_rep_neigh_update+0x41e/0x5e0 [mlx5_core] [23827.496338] ? mlx5e_rep_neigh_entry_release+0xb80/0xb80 [mlx5_core] [23827.497486] ? read_word_at_a_time+0xe/0x20 [23827.498250] ? strscpy+0xa0/0x2a0 [23827.498889] process_one_work+0x8ac/0x14e0 [23827.499638] ? lockdep_hardirqs_on_prepare+0x400/0x400 [23827.500537] ? pwq_dec_nr_in_flight+0x2c0/0x2c0 [23827.501359] ? rwlock_bug.part.0+0x90/0x90 [23827.502116] worker_thread+0x53b/0x1220 [23827.502831] ? process_one_work+0x14e0/0x14e0 [23827.503627] kthread+0x328/0x3f0 [23827.504254] ? _raw_spin_unlock_irq+0x24/0x40 [23827.505065] ? __kthread_bind_mask+0x90/0x90 [23827.505912] ret_from_fork+0x1f/0x30 [23827.506621] [23827.506987] Allocated by task 28248: [23827.507694] kasan_save_stack+0x1b/0x40 [23827.508476] __kasan_kmalloc+0x7c/0x90 [23827.509197] mlx5e_attach_encap+0xde1/0x1d40 [mlx5_core] [23827.510194] mlx5e_tc_add_fdb_flow+0x397/0xc40 [mlx5_core] [23827.511218] __mlx5e_add_fdb_flow+0x519/0xb30 [mlx5_core] [23827.512234] mlx5e_configure_flower+0x191c/0x4870 [mlx5_core] [23827.513298] tc_setup_cb_add+0x1d5/0x420 [23827.514023] fl_hw_replace_filter+0x382/0x6a0 [cls_flower] [23827.514975] fl_change+0x2ceb/0x4a51 [cls_flower] [23827.515821] tc_new_tfilter+0x89a/0x2070 [23827.516548] rtnetlink_rcv_msg+0x644/0x8c0 [23827.517300] netlink_rcv_skb+0x11d/0x340 [23827.518021] netlink_unicast+0x42b/0x700 [23827.518742] netlink_sendmsg+0x743/0xc20 [23827.519467] sock_sendmsg+0xb2/0xe0 [23827.520131] ____sys_sendmsg+0x590/0x770 [23827.520851] ___sys_sendmsg+0xd8/0x160 [23827.521552] __sys_sendmsg+0xb7/0x140 [23827.522238] do_syscall_64+0x3a/0x70 [23827.522907] entry_SYSCALL_64_after_hwframe+0x44/0xae [23827.523797] [23827.524163] Freed by task 25948: [23827.524780] kasan_save_stack+0x1b/0x40 [23827.525488] kasan_set_track+0x1c/0x30 [23827.526187] kasan_set_free_info+0x20/0x30 [23827.526968] __kasan_slab_free+0xed/0x130 [23827.527709] slab_free_freelist_hook+0xcf/0x1d0 [23827.528528] kmem_cache_free_bulk+0x33a/0x6e0 [23827.529317] kfree_rcu_work+0x55f/0xb70 [23827.530024] process_one_work+0x8ac/0x14e0 [23827.530770] worker_thread+0x53b/0x1220 [23827.531480] kthread+0x328/0x3f0 [23827.532114] ret_from_fork+0x1f/0x30 [23827.532785] [23827.533147] Last potentially related work creation: [23827.534007] kasan_save_stack+0x1b/0x40 [23827.534710] kasan_record_aux_stack+0xab/0xc0 [23827.535492] kvfree_call_rcu+0x31/0x7b0 [23827.536206] mlx5e_tc_del_fdb_flow+0x577/0xef0 [mlx5_core] [23827.537305] mlx5e_flow_put+0x49/0x80 [mlx5_core] [23827.538290] mlx5e_delete_flower+0x6d1/0xe60 [mlx5_core] [23827.539300] tc_setup_cb_destroy+0x18e/0x2f0 [23827.540144] fl_hw_destroy_filter+0x1d2/0x310 [cls_flower] [23827.541148] __fl_delete+0x4dc/0x660 [cls_flower] [23827.541985] fl_delete+0x97/0x160 [cls_flower] [23827.542782] tc_del_tfilter+0x7ab/0x13d0 [23827.543503] rtnetlink_rcv_msg+0x644/0x8c0 [23827.544257] netlink_rcv_skb+0x11d/0x340 [23827.544981] netlink_unicast+0x42b/0x700 [23827.545700] netlink_sendmsg+0x743/0xc20 [23827.546424] sock_sendmsg+0xb2/0xe0 [23827.547084] ____sys_sendmsg+0x590/0x770 [23827.547850] ___sys_sendmsg+0xd8/0x160 [23827.548606] __sys_sendmsg+0xb7/0x140 [23827.549303] do_syscall_64+0x3a/0x70 [23827.549969] entry_SYSCALL_64_after_hwframe+0x44/0xae [23827.550853] [23827.551217] The buggy address belongs to the object at ffff8881d1322200 [23827.551217] which belongs to the cache kmalloc-256 of size 256 [23827.553341] The buggy address is located 140 bytes inside of [23827.553341] 256-byte region [ffff8881d1322200, ffff8881d1322300) [23827.555747] The buggy address belongs to the page: [23827.556847] page:00000000898762aa refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1d1320 [23827.558651] head:00000000898762aa order:2 compound_mapcount:0 compound_pincount:0 [23827.559961] flags: 0x2ffff800010200(slab|head|node=0|zone=2|lastcpupid=0x1ffff) [23827.561243] raw: 002ffff800010200 dead000000000100 dead000000000122 ffff888100042b40 [23827.562653] raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 [23827.564112] page dumped because: kasan: bad access detected [23827.565439] [23827.565932] Memory state around the buggy address: [23827.566917] ffff8881d1322180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [23827.568485] ffff8881d1322200: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [23827.569818] >ffff8881d1322280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [23827.571143] ^ [23827.571879] ffff8881d1322300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [23827.573283] ffff8881d1322380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [23827.574654] ================================================================== Most of the necessary logic is already correctly implemented by mlx5e_get_next_valid_encap() helper that is used in neigh stats update handler. Make the handler generic by renaming it to mlx5e_get_next_matching_encap() and use callback to test whether flow is matching instead of hardcoded check for 'valid' flag value. Implement mlx5e_get_next_valid_encap() by calling mlx5e_get_next_matching_encap() with callback that tests encap MLX5_ENCAP_ENTRY_VALID flag. Implement new mlx5e_get_next_init_encap() helper by calling mlx5e_get_next_matching_encap() with callback that tests encap completion result to be non-error and use it in mlx5e_rep_neigh_update() to safely iterate over nhe->encap_list. Remove encap completion logic from mlx5e_rep_update_flows() since the encap entries passed to this function are already guaranteed to be properly initialized by similar code in mlx5e_get_next_init_encap(). Fixes: 2a1f176 ("net/mlx5e: Refactor neigh update for concurrent execution") Signed-off-by: Vlad Buslov <vladbu@nvidia.com> Reviewed-by: Roi Dayan <roid@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com> Signed-off-by: Pratyush Yadav <ptyadav@amazon.de>
1 parent 94bc9c8 commit 65aae79

File tree

4 files changed

+40
-17
lines changed

4 files changed

+40
-17
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c

+6-9
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,8 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
129129
work);
130130
struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
131131
struct neighbour *n = update_work->n;
132-
struct mlx5e_encap_entry *e;
132+
struct mlx5e_encap_entry *e = NULL;
133133
unsigned char ha[ETH_ALEN];
134-
struct mlx5e_priv *priv;
135134
bool neigh_connected;
136135
u8 nud_state, dead;
137136

@@ -152,14 +151,12 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
152151

153152
trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
154153

155-
list_for_each_entry(e, &nhe->encap_list, encap_list) {
156-
if (!mlx5e_encap_take(e))
157-
continue;
154+
/* mlx5e_get_next_init_encap() releases previous encap before returning
155+
* the next one.
156+
*/
157+
while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL)
158+
mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha);
158159

159-
priv = netdev_priv(e->out_dev);
160-
mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
161-
mlx5e_encap_put(priv, e);
162-
}
163160
rtnl_unlock();
164161
mlx5e_release_neigh_update_work(update_work);
165162
}

drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c

+1-5
Original file line numberDiff line numberDiff line change
@@ -91,13 +91,9 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
9191

9292
ASSERT_RTNL();
9393

94-
/* wait for encap to be fully initialized */
95-
wait_for_completion(&e->res_ready);
96-
9794
mutex_lock(&esw->offloads.encap_tbl_lock);
9895
encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
99-
if (e->compl_result < 0 || (encap_connected == neigh_connected &&
100-
ether_addr_equal(e->h_dest, ha)))
96+
if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha))
10197
goto unlock;
10298

10399
mlx5e_take_all_encap_flows(e, &flow_list);

drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

+30-3
Original file line numberDiff line numberDiff line change
@@ -1653,9 +1653,12 @@ void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_l
16531653
mlx5e_flow_put(priv, flow);
16541654
}
16551655

1656+
typedef bool (match_cb)(struct mlx5e_encap_entry *);
1657+
16561658
static struct mlx5e_encap_entry *
1657-
mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
1658-
struct mlx5e_encap_entry *e)
1659+
mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
1660+
struct mlx5e_encap_entry *e,
1661+
match_cb match)
16591662
{
16601663
struct mlx5e_encap_entry *next = NULL;
16611664

@@ -1690,14 +1693,38 @@ mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
16901693
/* wait for encap to be fully initialized */
16911694
wait_for_completion(&next->res_ready);
16921695
/* continue searching if encap entry is not in valid state after completion */
1693-
if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
1696+
if (!match(next)) {
16941697
e = next;
16951698
goto retry;
16961699
}
16971700

16981701
return next;
16991702
}
17001703

1704+
static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
1705+
{
1706+
return e->flags & MLX5_ENCAP_ENTRY_VALID;
1707+
}
1708+
1709+
static struct mlx5e_encap_entry *
1710+
mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
1711+
struct mlx5e_encap_entry *e)
1712+
{
1713+
return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
1714+
}
1715+
1716+
static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
1717+
{
1718+
return e->compl_result >= 0;
1719+
}
1720+
1721+
struct mlx5e_encap_entry *
1722+
mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
1723+
struct mlx5e_encap_entry *e)
1724+
{
1725+
return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
1726+
}
1727+
17011728
void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
17021729
{
17031730
struct mlx5e_neigh *m_neigh = &nhe->m_neigh;

drivers/net/ethernet/mellanox/mlx5/core/en_tc.h

+3
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@ void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *f
161161
void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list);
162162

163163
struct mlx5e_neigh_hash_entry;
164+
struct mlx5e_encap_entry *
165+
mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
166+
struct mlx5e_encap_entry *e);
164167
void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
165168

166169
void mlx5e_tc_reoffload_flows_work(struct work_struct *work);

0 commit comments

Comments
 (0)