Skip to content

Commit 1ddf991

Browse files
committed
xfrm: Add support for per cpu xfrm state handling.
Currently all flows for a certain SA must be processed by the same cpu to avoid packet reordering and lock contention of the xfrm state lock. To get rid of this limitation, the IETF standardized per cpu SAs in RFC 9611. This patch implements the xfrm part of it. We add the cpu as a lookup key for xfrm states and a config option to generate acquire messages for each cpu. With that, we can have on each cpu a SA with identical traffic selector so that flows can be processed in parallel on all cpus. Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com> Tested-by: Antony Antony <antony.antony@secunet.com> Tested-by: Tobias Brunner <tobias@strongswan.org>
1 parent ab101c5 commit 1ddf991

File tree

6 files changed

+112
-22
lines changed

6 files changed

+112
-22
lines changed

include/net/xfrm.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ struct xfrm_state {
188188
refcount_t refcnt;
189189
spinlock_t lock;
190190

191+
u32 pcpu_num;
191192
struct xfrm_id id;
192193
struct xfrm_selector sel;
193194
struct xfrm_mark mark;
@@ -1684,7 +1685,7 @@ struct xfrmk_spdinfo {
16841685
u32 spdhmcnt;
16851686
};
16861687

1687-
struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
1688+
struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
16881689
int xfrm_state_delete(struct xfrm_state *x);
16891690
int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
16901691
int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
@@ -1796,7 +1797,7 @@ int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack);
17961797
int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi,
17971798
struct netlink_ext_ack *extack);
17981799
struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark,
1799-
u8 mode, u32 reqid, u32 if_id, u8 proto,
1800+
u8 mode, u32 reqid, u32 if_id, u32 pcpu_num, u8 proto,
18001801
const xfrm_address_t *daddr,
18011802
const xfrm_address_t *saddr, int create,
18021803
unsigned short family);

include/uapi/linux/xfrm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,7 @@ enum xfrm_attr_type_t {
322322
XFRMA_MTIMER_THRESH, /* __u32 in seconds for input SA */
323323
XFRMA_SA_DIR, /* __u8 */
324324
XFRMA_NAT_KEEPALIVE_INTERVAL, /* __u32 in seconds for NAT keepalive */
325+
XFRMA_SA_PCPU, /* __u32 */
325326
__XFRMA_MAX
326327

327328
#define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */
@@ -437,6 +438,7 @@ struct xfrm_userpolicy_info {
437438
#define XFRM_POLICY_LOCALOK 1 /* Allow user to override global policy */
438439
/* Automatically expand selector to include matching ICMP payloads. */
439440
#define XFRM_POLICY_ICMP 2
441+
#define XFRM_POLICY_CPU_ACQUIRE 4
440442
__u8 share;
441443
};
442444

net/key/af_key.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1354,15 +1354,16 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
13541354
}
13551355

13561356
if (hdr->sadb_msg_seq) {
1357-
x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
1357+
x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, UINT_MAX);
13581358
if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) {
13591359
xfrm_state_put(x);
13601360
x = NULL;
13611361
}
13621362
}
13631363

13641364
if (!x)
1365-
x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, proto, xdaddr, xsaddr, 1, family);
1365+
x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, UINT_MAX,
1366+
proto, xdaddr, xsaddr, 1, family);
13661367

13671368
if (x == NULL)
13681369
return -ENOENT;
@@ -1417,7 +1418,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb
14171418
if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0)
14181419
return 0;
14191420

1420-
x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
1421+
x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, UINT_MAX);
14211422
if (x == NULL)
14221423
return 0;
14231424

net/xfrm/xfrm_compat.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
132132
[XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
133133
[XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
134134
[XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 },
135+
[XFRMA_SA_PCPU] = { .type = NLA_U32 },
135136
};
136137

137138
static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb,
@@ -282,9 +283,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src)
282283
case XFRMA_MTIMER_THRESH:
283284
case XFRMA_SA_DIR:
284285
case XFRMA_NAT_KEEPALIVE_INTERVAL:
286+
case XFRMA_SA_PCPU:
285287
return xfrm_nla_cpy(dst, src, nla_len(src));
286288
default:
287-
BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL);
289+
BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU);
288290
pr_warn_once("unsupported nla_type %d\n", src->nla_type);
289291
return -EOPNOTSUPP;
290292
}
@@ -439,7 +441,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
439441
int err;
440442

441443
if (type > XFRMA_MAX) {
442-
BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL);
444+
BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU);
443445
NL_SET_ERR_MSG(extack, "Bad attribute");
444446
return -EOPNOTSUPP;
445447
}

net/xfrm/xfrm_state.c

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
679679
x->lft.hard_packet_limit = XFRM_INF;
680680
x->replay_maxage = 0;
681681
x->replay_maxdiff = 0;
682+
x->pcpu_num = UINT_MAX;
682683
spin_lock_init(&x->lock);
683684
}
684685
return x;
@@ -1155,6 +1156,12 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
11551156
struct xfrm_state **best, int *acq_in_progress,
11561157
int *error)
11571158
{
1159+
/* We need the cpu id just as a lookup key,
1160+
* we don't require it to be stable.
1161+
*/
1162+
unsigned int pcpu_id = get_cpu();
1163+
put_cpu();
1164+
11581165
/* Resolution logic:
11591166
* 1. There is a valid state with matching selector. Done.
11601167
* 2. Valid state with inappropriate selector. Skip.
@@ -1174,13 +1181,18 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
11741181
&fl->u.__fl_common))
11751182
return;
11761183

1184+
if (x->pcpu_num != UINT_MAX && x->pcpu_num != pcpu_id)
1185+
return;
1186+
11771187
if (!*best ||
1188+
((*best)->pcpu_num == UINT_MAX && x->pcpu_num == pcpu_id) ||
11781189
(*best)->km.dying > x->km.dying ||
11791190
((*best)->km.dying == x->km.dying &&
11801191
(*best)->curlft.add_time < x->curlft.add_time))
11811192
*best = x;
11821193
} else if (x->km.state == XFRM_STATE_ACQ) {
1183-
*acq_in_progress = 1;
1194+
if (!*best || x->pcpu_num == pcpu_id)
1195+
*acq_in_progress = 1;
11841196
} else if (x->km.state == XFRM_STATE_ERROR ||
11851197
x->km.state == XFRM_STATE_EXPIRED) {
11861198
if ((!x->sel.family ||
@@ -1209,6 +1221,13 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
12091221
unsigned short encap_family = tmpl->encap_family;
12101222
unsigned int sequence;
12111223
struct km_event c;
1224+
unsigned int pcpu_id;
1225+
1226+
/* We need the cpu id just as a lookup key,
1227+
* we don't require it to be stable.
1228+
*/
1229+
pcpu_id = get_cpu();
1230+
put_cpu();
12121231

12131232
to_put = NULL;
12141233

@@ -1282,7 +1301,10 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
12821301
}
12831302

12841303
found:
1285-
x = best;
1304+
if (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) ||
1305+
(best && (best->pcpu_num == pcpu_id)))
1306+
x = best;
1307+
12861308
if (!x && !error && !acquire_in_progress) {
12871309
if (tmpl->id.spi &&
12881310
(x0 = __xfrm_state_lookup_all(net, mark, daddr,
@@ -1314,6 +1336,8 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
13141336
xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
13151337
memcpy(&x->mark, &pol->mark, sizeof(x->mark));
13161338
x->if_id = if_id;
1339+
if ((pol->flags & XFRM_POLICY_CPU_ACQUIRE) && best)
1340+
x->pcpu_num = pcpu_id;
13171341

13181342
error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
13191343
if (error) {
@@ -1392,6 +1416,11 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
13921416
x = NULL;
13931417
error = -ESRCH;
13941418
}
1419+
1420+
/* Use the already installed 'fallback' while the CPU-specific
1421+
* SA acquire is handled*/
1422+
if (best)
1423+
x = best;
13951424
}
13961425
out:
13971426
if (x) {
@@ -1524,12 +1553,14 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
15241553
unsigned int h;
15251554
u32 mark = xnew->mark.v & xnew->mark.m;
15261555
u32 if_id = xnew->if_id;
1556+
u32 cpu_id = xnew->pcpu_num;
15271557

15281558
h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
15291559
hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
15301560
if (x->props.family == family &&
15311561
x->props.reqid == reqid &&
15321562
x->if_id == if_id &&
1563+
x->pcpu_num == cpu_id &&
15331564
(mark & x->mark.m) == x->mark.v &&
15341565
xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
15351566
xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
@@ -1552,7 +1583,7 @@ EXPORT_SYMBOL(xfrm_state_insert);
15521583
static struct xfrm_state *__find_acq_core(struct net *net,
15531584
const struct xfrm_mark *m,
15541585
unsigned short family, u8 mode,
1555-
u32 reqid, u32 if_id, u8 proto,
1586+
u32 reqid, u32 if_id, u32 pcpu_num, u8 proto,
15561587
const xfrm_address_t *daddr,
15571588
const xfrm_address_t *saddr,
15581589
int create)
@@ -1569,6 +1600,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
15691600
x->id.spi != 0 ||
15701601
x->id.proto != proto ||
15711602
(mark & x->mark.m) != x->mark.v ||
1603+
x->pcpu_num != pcpu_num ||
15721604
!xfrm_addr_equal(&x->id.daddr, daddr, family) ||
15731605
!xfrm_addr_equal(&x->props.saddr, saddr, family))
15741606
continue;
@@ -1602,6 +1634,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
16021634
break;
16031635
}
16041636

1637+
x->pcpu_num = pcpu_num;
16051638
x->km.state = XFRM_STATE_ACQ;
16061639
x->id.proto = proto;
16071640
x->props.family = family;
@@ -1630,7 +1663,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
16301663
return x;
16311664
}
16321665

1633-
static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
1666+
static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
16341667

16351668
int xfrm_state_add(struct xfrm_state *x)
16361669
{
@@ -1656,7 +1689,7 @@ int xfrm_state_add(struct xfrm_state *x)
16561689
}
16571690

16581691
if (use_spi && x->km.seq) {
1659-
x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
1692+
x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq, x->pcpu_num);
16601693
if (x1 && ((x1->id.proto != x->id.proto) ||
16611694
!xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
16621695
to_put = x1;
@@ -1666,7 +1699,7 @@ int xfrm_state_add(struct xfrm_state *x)
16661699

16671700
if (use_spi && !x1)
16681701
x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
1669-
x->props.reqid, x->if_id, x->id.proto,
1702+
x->props.reqid, x->if_id, x->pcpu_num, x->id.proto,
16701703
&x->id.daddr, &x->props.saddr, 0);
16711704

16721705
__xfrm_state_bump_genids(x);
@@ -1791,6 +1824,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
17911824
x->props.flags = orig->props.flags;
17921825
x->props.extra_flags = orig->props.extra_flags;
17931826

1827+
x->pcpu_num = orig->pcpu_num;
17941828
x->if_id = orig->if_id;
17951829
x->tfcpad = orig->tfcpad;
17961830
x->replay_maxdiff = orig->replay_maxdiff;
@@ -2066,13 +2100,14 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
20662100

20672101
struct xfrm_state *
20682102
xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
2069-
u32 if_id, u8 proto, const xfrm_address_t *daddr,
2103+
u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr,
20702104
const xfrm_address_t *saddr, int create, unsigned short family)
20712105
{
20722106
struct xfrm_state *x;
20732107

20742108
spin_lock_bh(&net->xfrm.xfrm_state_lock);
2075-
x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create);
2109+
x = __find_acq_core(net, mark, family, mode, reqid, if_id, pcpu_num,
2110+
proto, daddr, saddr, create);
20762111
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
20772112

20782113
return x;
@@ -2207,14 +2242,15 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
22072242

22082243
/* Silly enough, but I'm lazy to build resolution list */
22092244

2210-
static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
2245+
static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num)
22112246
{
22122247
unsigned int h = xfrm_seq_hash(net, seq);
22132248
struct xfrm_state *x;
22142249

22152250
hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) {
22162251
if (x->km.seq == seq &&
22172252
(mark & x->mark.m) == x->mark.v &&
2253+
x->pcpu_num == pcpu_num &&
22182254
x->km.state == XFRM_STATE_ACQ) {
22192255
xfrm_state_hold(x);
22202256
return x;
@@ -2224,12 +2260,12 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s
22242260
return NULL;
22252261
}
22262262

2227-
struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
2263+
struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num)
22282264
{
22292265
struct xfrm_state *x;
22302266

22312267
spin_lock_bh(&net->xfrm.xfrm_state_lock);
2232-
x = __xfrm_find_acq_byseq(net, mark, seq);
2268+
x = __xfrm_find_acq_byseq(net, mark, seq, pcpu_num);
22332269
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
22342270
return x;
22352271
}

0 commit comments

Comments
 (0)