Skip to content

Commit 7e51891

Browse files
ogerlitzSaeed Mahameed
authored and
Saeed Mahameed
committed
net/mlx5e: Use netdev events to set/del egress acl forward-to-vport rule
Register a notifier block to handle netdev events for bond device of non-uplink representors to support eswitch vports bonding. When a non-uplink representor is a lower dev (slave) of bond and becomes active, adding egress acl forward-to-vport rule of all slave netdevs (active + standby) to forward to this representor's vport. Use change lower netdev event to do this. Use change upper event to detect slave representor unslaved from lag device to delete its vport egress acl forward rule if any. Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: Vu Pham <vuhuong@mellanox.com> Reviewed-by: Parav Pandit <parav@mellanox.com> Reviewed-by: Roi Dayan <roid@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
1 parent bf773dc commit 7e51891

File tree

4 files changed

+175
-4
lines changed

4 files changed

+175
-4
lines changed

drivers/net/ethernet/mellanox/mlx5/core/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
3434
mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
3535
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
3636
mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
37-
mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o lib/geneve.o lib/port_tun.o lag_mp.o
37+
mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag_mp.o lib/geneve.o lib/port_tun.o \
38+
en_rep.o en/rep/bond.o
3839
mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \
3940
en/mapping.o esw/chains.o en/tc_tun.o \
4041
en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2+
/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
3+
4+
#include <net/lag.h>
5+
6+
#include "mlx5_core.h"
7+
#include "eswitch.h"
8+
#include "esw/acl/ofld.h"
9+
#include "en_rep.h"
10+
11+
struct mlx5e_rep_bond {
12+
struct notifier_block nb;
13+
struct netdev_net_notifier nn;
14+
};
15+
16+
static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
17+
{
18+
struct mlx5e_priv *priv = netdev_priv(netdev);
19+
struct mlx5e_rep_priv *rpriv = priv->ppriv;
20+
21+
/* A given netdev is not a representor or not a slave of LAG configuration */
22+
if (!mlx5e_eswitch_rep(netdev) || !bond_slave_get_rtnl(netdev))
23+
return false;
24+
25+
/* Egress acl forward to vport is supported only non-uplink representor */
26+
return rpriv->rep->vport != MLX5_VPORT_UPLINK;
27+
}
28+
29+
static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
30+
{
31+
struct netdev_notifier_changelowerstate_info *info;
32+
struct netdev_lag_lower_state_info *lag_info;
33+
struct mlx5e_rep_priv *rpriv;
34+
struct net_device *lag_dev;
35+
struct mlx5e_priv *priv;
36+
struct list_head *iter;
37+
struct net_device *dev;
38+
u16 acl_vport_num;
39+
u16 fwd_vport_num;
40+
41+
if (!mlx5e_rep_is_lag_netdev(netdev))
42+
return;
43+
44+
info = ptr;
45+
lag_info = info->lower_state_info;
46+
/* This is not an event of a representor becoming active slave */
47+
if (!lag_info->tx_enabled)
48+
return;
49+
50+
priv = netdev_priv(netdev);
51+
rpriv = priv->ppriv;
52+
fwd_vport_num = rpriv->rep->vport;
53+
lag_dev = netdev_master_upper_dev_get(netdev);
54+
55+
netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n",
56+
lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev));
57+
58+
/* Point everyone's egress acl to the vport of the active representor */
59+
netdev_for_each_lower_dev(lag_dev, dev, iter) {
60+
priv = netdev_priv(dev);
61+
rpriv = priv->ppriv;
62+
acl_vport_num = rpriv->rep->vport;
63+
if (acl_vport_num != fwd_vport_num) {
64+
mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch,
65+
fwd_vport_num,
66+
acl_vport_num);
67+
}
68+
}
69+
}
70+
71+
static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
72+
{
73+
struct netdev_notifier_changeupper_info *info = ptr;
74+
struct mlx5e_rep_priv *rpriv;
75+
struct mlx5e_priv *priv;
76+
77+
if (!mlx5e_rep_is_lag_netdev(netdev))
78+
return;
79+
80+
/* Nothing to setup for new enslaved representor */
81+
if (info->linking)
82+
return;
83+
84+
priv = netdev_priv(netdev);
85+
rpriv = priv->ppriv;
86+
netdev_dbg(netdev, "Unslave, reset vport(%d) egress acl\n", rpriv->rep->vport);
87+
88+
/* Reset all egress acl rules of unslave representor's vport */
89+
mlx5_esw_acl_egress_vport_unbond(priv->mdev->priv.eswitch,
90+
rpriv->rep->vport);
91+
}
92+
93+
/* Bond device of representors and netdev events are used here in specific way
94+
* to support eswitch vports bonding and to perform failover of eswitch vport
95+
* by modifying the vport's egress acl of lower dev representors. Thus this
96+
* also change the traditional behavior of lower dev under bond device.
97+
* All non-representor netdevs or representors of other vendors as lower dev
98+
* of bond device are not supported.
99+
*/
100+
static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
101+
unsigned long event, void *ptr)
102+
{
103+
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
104+
105+
switch (event) {
106+
case NETDEV_CHANGELOWERSTATE:
107+
mlx5e_rep_changelowerstate_event(netdev, ptr);
108+
break;
109+
case NETDEV_CHANGEUPPER:
110+
mlx5e_rep_changeupper_event(netdev, ptr);
111+
break;
112+
}
113+
return NOTIFY_DONE;
114+
}
115+
116+
/* If HW support eswitch vports bonding, register a specific notifier to
117+
* handle it when two or more representors are bonded
118+
*/
119+
int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv)
120+
{
121+
struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
122+
struct net_device *netdev = rpriv->netdev;
123+
struct mlx5e_priv *priv;
124+
int ret = 0;
125+
126+
priv = netdev_priv(netdev);
127+
if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch))
128+
goto out;
129+
130+
uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL);
131+
if (!uplink_priv->bond) {
132+
ret = -ENOMEM;
133+
goto out;
134+
}
135+
136+
uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent;
137+
ret = register_netdevice_notifier_dev_net(netdev,
138+
&uplink_priv->bond->nb,
139+
&uplink_priv->bond->nn);
140+
if (ret) {
141+
netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret);
142+
kvfree(uplink_priv->bond);
143+
uplink_priv->bond = NULL;
144+
}
145+
out:
146+
return ret;
147+
}
148+
149+
void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv)
150+
{
151+
struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
152+
153+
if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) ||
154+
!rpriv->uplink_priv.bond)
155+
return;
156+
157+
unregister_netdevice_notifier_dev_net(rpriv->netdev,
158+
&rpriv->uplink_priv.bond->nb,
159+
&rpriv->uplink_priv.bond->nn);
160+
kvfree(rpriv->uplink_priv.bond);
161+
}

drivers/net/ethernet/mellanox/mlx5/core/en_rep.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -959,16 +959,18 @@ static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
959959

960960
mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev);
961961

962+
mlx5e_rep_bond_init(rpriv);
962963
err = mlx5e_rep_tc_netdevice_event_register(rpriv);
963964
if (err) {
964965
mlx5_core_err(priv->mdev, "Failed to register netdev notifier, err: %d\n",
965966
err);
966-
goto tc_rep_cleanup;
967+
goto err_event_reg;
967968
}
968969

969970
return 0;
970971

971-
tc_rep_cleanup:
972+
err_event_reg:
973+
mlx5e_rep_bond_cleanup(rpriv);
972974
mlx5e_rep_tc_cleanup(rpriv);
973975
return err;
974976
}
@@ -1001,7 +1003,7 @@ static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
10011003
{
10021004
mlx5e_rep_tc_netdevice_event_unregister(rpriv);
10031005
mlx5e_rep_indr_clean_block_privs(rpriv);
1004-
1006+
mlx5e_rep_bond_cleanup(rpriv);
10051007
mlx5e_rep_tc_cleanup(rpriv);
10061008
}
10071009

drivers/net/ethernet/mellanox/mlx5/core/en_rep.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ struct mlx5e_neigh_update_table {
5656
};
5757

5858
struct mlx5_tc_ct_priv;
59+
struct mlx5e_rep_bond;
5960
struct mlx5_rep_uplink_priv {
6061
/* Filters DB - instantiated by the uplink representor and shared by
6162
* the uplink's VFs
@@ -89,6 +90,9 @@ struct mlx5_rep_uplink_priv {
8990
struct mapping_ctx *tunnel_enc_opts_mapping;
9091

9192
struct mlx5_tc_ct_priv *ct_priv;
93+
94+
/* support eswitch vports bonding */
95+
struct mlx5e_rep_bond *bond;
9296
};
9397

9498
struct mlx5e_rep_priv {
@@ -211,6 +215,9 @@ struct mlx5e_rep_sq {
211215

212216
void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev);
213217
void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev);
218+
int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv);
219+
void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv);
220+
214221
bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
215222
int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
216223
void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);

0 commit comments

Comments
 (0)