From 829afdf58cc29dd2d63caa5c807911a18c83eaa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Tue, 30 Dec 2025 00:14:43 +0100 Subject: [PATCH 01/10] netfilter: nf_tables: fix pointer math issue in nft_byteorder_eval() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-42268 subsystem-update centos-stream-9 f8751245d3174ab38a11084355955fdf8ff4d53d cve CVE-2024-0607 commit-author Florian Westphal commit c301f0981fdd3fd1ffac6836b423c4d7a8e0eb63 upstream-diff ciqlts9_4 backport e7fce923c6297083b2248349a26eeb5d800f576e used for clean cherry-pick netfilter: nf_tables: fix pointer math issue in nft_byteorder_eval() The problem is in nft_byteorder_eval() where we are iterating through a loop and writing to dst[0], dst[1], dst[2] and so on... On each iteration we are writing 8 bytes. But dst[] is an array of u32 so each element only has space for 4 bytes. That means that every iteration overwrites part of the previous element. I spotted this bug while reviewing commit caf3ef7468f7 ("netfilter: nf_tables: prevent OOB access in nft_byteorder_eval") which is a related issue. I think that the reason we have not detected this bug in testing is that most of time we only write one element. Fixes: ce1e7989d989 ("netfilter: nft_byteorder: provide 64bit le/be conversion") Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso (cherry picked from commit e7fce923c6297083b2248349a26eeb5d800f576e) Signed-off-by: Marcin Wcisło --- include/net/netfilter/nf_tables.h | 4 ++-- net/netfilter/nft_byteorder.c | 6 ++++-- net/netfilter/nft_meta.c | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d23c73f1dc4a6..9cecf5c5cf1a8 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -147,9 +147,9 @@ static inline u16 nft_reg_load16(const u32 *sreg) return *(u16 *)sreg; } -static inline void nft_reg_store64(u32 *dreg, u64 val) +static inline void nft_reg_store64(u64 *dreg, u64 val) { - put_unaligned(val, (u64 *)dreg); + put_unaligned(val, dreg); } static inline u64 nft_reg_load64(const u32 *sreg) diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 7b0b8fecb2205..adf208b7929fd 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -38,20 +38,22 @@ void nft_byteorder_eval(const struct nft_expr *expr, switch (priv->size) { case 8: { + u64 *dst64 = (void *)dst; u64 src64; switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 8; i++) { src64 = nft_reg_load64(&src[i]); - nft_reg_store64(&dst[i], be64_to_cpu(src64)); + nft_reg_store64(&dst64[i], + be64_to_cpu((__force __be64)src64)); } break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 8; i++) { src64 = (__force __u64) cpu_to_be64(nft_reg_load64(&src[i])); - nft_reg_store64(&dst[i], src64); + nft_reg_store64(&dst64[i], src64); } break; } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 2f8686b711cf4..2eb150c1fc9d4 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -63,7 +63,7 @@ nft_meta_get_eval_time(enum nft_meta_keys key, { switch (key) { case NFT_META_TIME_NS: - nft_reg_store64(dest, ktime_get_real_ns()); + nft_reg_store64((u64 *)dest, ktime_get_real_ns()); break; case NFT_META_TIME_DAY: nft_reg_store8(dest, nft_meta_weekday()); From d4ca8773c4505788f1ec09a5d8d3f211c8c59246 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Tue, 30 Dec 2025 00:22:37 +0100 Subject: [PATCH 02/10] netfilter: nf_tables: skip set commit for deleted/destroyed sets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-6825 subsystem-update centos-stream-9 f8751245d3174ab38a11084355955fdf8ff4d53d cve CVE-2024-0193 commit-author Pablo Neira Ayuso commit 7315dc1e122c85ffdfc8defffbb8f8b616c2eb1a NFT_MSG_DELSET deactivates all elements in the set, skip set->ops->commit() to avoid the unnecessary clone (for the pipapo case) as well as the sync GC cycle, which could deactivate again expired elements in such set. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Reported-by: Kevin Rich Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 7315dc1e122c85ffdfc8defffbb8f8b616c2eb1a) Signed-off-by: Marcin Wcisło --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a9d9408997af7..d96b89cbf2c52 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9376,7 +9376,7 @@ static void nft_set_commit_update(struct list_head *set_update_list) list_for_each_entry_safe(set, next, set_update_list, pending_update) { list_del_init(&set->pending_update); - if (!set->ops->commit) + if (!set->ops->commit || set->dead) continue; set->ops->commit(set); From 464758bfb65f2ee6cc9829e2c3cad06591b1619b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Tue, 30 Dec 2025 00:25:20 +0100 Subject: [PATCH 03/10] netfilter: nf_tables: mark newset as dead on transaction abort MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-430 subsystem-update centos-stream-9 f8751245d3174ab38a11084355955fdf8ff4d53d cve-bf CVE-2023-4244 commit-author Florian Westphal commit 08e4c8c5919fd405a4d709b4ba43d836894a26eb If a transaction is aborted, we should mark the to-be-released NEWSET dead, just like commit path does for DEL and DESTROYSET commands. In both cases all remaining elements will be released via set->ops->destroy(). The existing abort code does NOT post the actual release to the work queue. Also the entire __nf_tables_abort() function is wrapped in gc_seq begin/end pair. Therefore, async gc worker will never try to release the pending set elements, as gc sequence is always stale. It might be possible to speed up transaction aborts via work queue too, this would result in a race and a possible use-after-free. So fix this before it becomes an issue. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 08e4c8c5919fd405a4d709b4ba43d836894a26eb) Signed-off-by: Marcin Wcisło --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d96b89cbf2c52..f68419580bf61 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9834,6 +9834,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } + nft_trans_set(trans)->dead = 1; list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: From efedaab0cda69a56bc539e2ca4af962b01915342 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Tue, 30 Dec 2025 00:26:37 +0100 Subject: [PATCH 04/10] netfilter: nft_set_pipapo: prefer gfp_kernel allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-158865 subsystem-update centos-stream-9 f8751245d3174ab38a11084355955fdf8ff4d53d cve-bf CVE-2023-52923 commit-author Florian Westphal commit ffb40fba404561f141d37e5878ec542b67464d74 No need to use GFP_ATOMIC here. Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso (cherry picked from commit ffb40fba404561f141d37e5878ec542b67464d74) Signed-off-by: Marcin Wcisło --- net/netfilter/nft_set_pipapo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index c4da1a1c1a1bf..2f3efeb3793b7 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1603,7 +1603,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) if (nft_set_elem_expired(&e->ext)) { priv->dirty = true; - gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); + gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); if (!gc) return; From 9d816e26304e0668f140f5c5fb658e6e037bd243 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Tue, 30 Dec 2025 00:28:44 +0100 Subject: [PATCH 05/10] netfilter: nf_tables: skip dead set elements in netlink dump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-430 cve-bf CVE-2023-4244 commit-author Pablo Neira Ayuso commit 6b1ca88e4bb63673dc9f9c7f23c899f22c3cb17a Delete from packet path relies on the garbage collector to purge elements with NFT_SET_ELEM_DEAD_BIT on. Skip these dead elements from nf_tables_dump_setelem() path, I very rarely see tests/shell/testcases/maps/typeof_maps_add_delete reports [DUMP FAILED] showing a mismatch in the expected output with an element that should not be there. If the netlink dump happens before GC worker run, it might show dead elements in the ruleset listing. nft_rhash_get() already skips dead elements in nft_rhash_cmp(), therefore, it already does not show the element when getting a single element via netlink control plane. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 6b1ca88e4bb63673dc9f9c7f23c899f22c3cb17a) Signed-off-by: Marcin Wcisło --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f68419580bf61..7fc0e3713d720 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5379,7 +5379,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); struct nft_set_dump_args *args; - if (nft_set_elem_expired(ext)) + if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext)) return 0; args = container_of(iter, struct nft_set_dump_args, iter); From c7bc5a0b5f2eedc9fec492e56bf45947a255a40b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Tue, 30 Dec 2025 00:29:18 +0100 Subject: [PATCH 06/10] netfilter: nft_set_pipapo: remove static in nft_pipapo_get() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-430 cve-bf CVE-2023-4244 commit-author Pablo Neira Ayuso commit ab0beafd52b98dfb8b8244b2c6794efbc87478db This has slipped through when reducing memory footprint for set elements, remove it. Fixes: 9dad402b89e8 ("netfilter: nf_tables: expose opaque set element as struct nft_elem_priv") Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso (cherry picked from commit ab0beafd52b98dfb8b8244b2c6794efbc87478db) Signed-off-by: Marcin Wcisło --- net/netfilter/nft_set_pipapo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 2f3efeb3793b7..1b9fb8d59f9d6 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -603,7 +603,7 @@ static struct nft_elem_priv * nft_pipapo_get(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags) { - static struct nft_pipapo_elem *e; + struct nft_pipapo_elem *e; e = pipapo_get(net, set, (const u8 *)elem->key.val.data, nft_genmask_cur(net)); From 3c16b062cf1abee08b4454149a7f834d417c8874 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Tue, 30 Dec 2025 00:30:15 +0100 Subject: [PATCH 07/10] netfilter: nf_tables: unconditionally flush pending work before notifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-44483 cve CVE-2024-42109 commit-author Florian Westphal commit 9f6958ba2e902f9820c594869bd710ba74b7c4c0 syzbot reports: KASAN: slab-uaf in nft_ctx_update include/net/netfilter/nf_tables.h:1831 KASAN: slab-uaf in nft_commit_release net/netfilter/nf_tables_api.c:9530 KASAN: slab-uaf int nf_tables_trans_destroy_work+0x152b/0x1750 net/netfilter/nf_tables_api.c:9597 Read of size 2 at addr ffff88802b0051c4 by task kworker/1:1/45 [..] Workqueue: events nf_tables_trans_destroy_work Call Trace: nft_ctx_update include/net/netfilter/nf_tables.h:1831 [inline] nft_commit_release net/netfilter/nf_tables_api.c:9530 [inline] nf_tables_trans_destroy_work+0x152b/0x1750 net/netfilter/nf_tables_api.c:9597 Problem is that the notifier does a conditional flush, but its possible that the table-to-be-removed is still referenced by transactions being processed by the worker, so we need to flush unconditionally. We could make the flush_work depend on whether we found a table to delete in nf-next to avoid the flush for most cases. AFAICS this problem is only exposed in nf-next, with commit e169285f8c56 ("netfilter: nf_tables: do not store nft_ctx in transaction objects"), with this commit applied there is an unconditional fetch of table->family which is whats triggering the above splat. Fixes: 2c9f0293280e ("netfilter: nf_tables: flush pending destroy work before netlink notifier") Reported-and-tested-by: syzbot+4fd66a69358fc15ae2ad@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=4fd66a69358fc15ae2ad Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 9f6958ba2e902f9820c594869bd710ba74b7c4c0) Signed-off-by: Marcin Wcisło --- net/netfilter/nf_tables_api.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7fc0e3713d720..a0a9fd7e47447 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10677,8 +10677,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, gc_seq = nft_gc_seq_begin(nft_net); - if (!list_empty(&nf_tables_destroy_list)) - nf_tables_trans_destroy_flush_work(); + nf_tables_trans_destroy_flush_work(); again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && From 9524d09dd9f140e7ea87e49624ac5843bfc52639 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Tue, 30 Dec 2025 00:32:36 +0100 Subject: [PATCH 08/10] netfilter: nf_tables: make destruction work queue pernet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-44483 cve-bf CVE-2024-42109 commit-author Florian Westphal commit fb8286562ecfb585e26b033c5e32e6fb85efb0b3 upstream-diff Context conflicts only The call to flush_work before tearing down a table from the netlink notifier was supposed to make sure that all earlier updates (e.g. rule add) that might reference that table have been processed. Unfortunately, flush_work() waits for the last queued instance. This could be an instance that is different from the one that we must wait for. This is because transactions are protected with a pernet mutex, but the work item is global, so holding the transaction mutex doesn't prevent another netns from queueing more work. Make the work item pernet so that flush_work() will wait for all transactions queued from this netns. A welcome side effect is that we no longer need to wait for transaction objects from foreign netns. The gc work queue is still global. This seems to be ok because nft_set structures are reference counted and each container structure owns a reference on the net namespace. The destroy_list is still protected by a global spinlock rather than pernet one but the hold time is very short anyway. v2: call cancel_work_sync before reaping the remaining tables (Pablo). Fixes: 9f6958ba2e90 ("netfilter: nf_tables: unconditionally flush pending work before notifier") Reported-by: syzbot+5d8c5789c8cb076b2c25@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso (cherry picked from commit fb8286562ecfb585e26b033c5e32e6fb85efb0b3) Signed-off-by: Marcin Wcisło --- include/net/netfilter/nf_tables.h | 4 +++- net/netfilter/nf_tables_api.c | 23 ++++++++++++++--------- net/netfilter/nft_compat.c | 8 ++++---- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 9cecf5c5cf1a8..093bc95cad0a6 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1654,7 +1654,7 @@ void nft_chain_filter_fini(void); void __init nft_chain_route_init(void); void nft_chain_route_fini(void); -void nf_tables_trans_destroy_flush_work(void); +void nf_tables_trans_destroy_flush_work(struct net *net); int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result); __be64 nf_jiffies64_to_msecs(u64 input); @@ -1668,6 +1668,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re struct nftables_pernet { struct list_head tables; struct list_head commit_list; + struct list_head destroy_list; struct list_head binding_list; struct list_head module_list; struct list_head notify_list; @@ -1676,6 +1677,7 @@ struct nftables_pernet { unsigned int base_seq; unsigned int gc_seq; u8 validate_state; + struct work_struct destroy_work; }; extern unsigned int nf_tables_net_id; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a0a9fd7e47447..fb02ba1b0010e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -30,7 +30,6 @@ unsigned int nf_tables_net_id __read_mostly; static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); -static LIST_HEAD(nf_tables_destroy_list); static LIST_HEAD(nf_tables_gc_list); static DEFINE_SPINLOCK(nf_tables_destroy_list_lock); static DEFINE_SPINLOCK(nf_tables_gc_list_lock); @@ -122,7 +121,6 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state) nft_net->validate_state = new_validate_state; } static void nf_tables_trans_destroy_work(struct work_struct *w); -static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work); static void nft_trans_gc_work(struct work_struct *work); static DECLARE_WORK(trans_gc_work, nft_trans_gc_work); @@ -8824,11 +8822,12 @@ static void nft_commit_release(struct nft_trans *trans) static void nf_tables_trans_destroy_work(struct work_struct *w) { + struct nftables_pernet *nft_net = container_of(w, struct nftables_pernet, destroy_work); struct nft_trans *trans, *next; LIST_HEAD(head); spin_lock(&nf_tables_destroy_list_lock); - list_splice_init(&nf_tables_destroy_list, &head); + list_splice_init(&nft_net->destroy_list, &head); spin_unlock(&nf_tables_destroy_list_lock); if (list_empty(&head)) @@ -8842,9 +8841,11 @@ static void nf_tables_trans_destroy_work(struct work_struct *w) } } -void nf_tables_trans_destroy_flush_work(void) +void nf_tables_trans_destroy_flush_work(struct net *net) { - flush_work(&trans_destroy_work); + struct nftables_pernet *nft_net = nft_pernet(net); + + flush_work(&nft_net->destroy_work); } EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); @@ -9263,11 +9264,11 @@ static void nf_tables_commit_release(struct net *net) trans->put_net = true; spin_lock(&nf_tables_destroy_list_lock); - list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list); + list_splice_tail_init(&nft_net->commit_list, &nft_net->destroy_list); spin_unlock(&nf_tables_destroy_list_lock); nf_tables_module_autoload_cleanup(net); - schedule_work(&trans_destroy_work); + schedule_work(&nft_net->destroy_work); mutex_unlock(&nft_net->commit_mutex); } @@ -10677,7 +10678,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, gc_seq = nft_gc_seq_begin(nft_net); - nf_tables_trans_destroy_flush_work(); + nf_tables_trans_destroy_flush_work(net); again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && @@ -10715,6 +10716,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->tables); INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->destroy_list); INIT_LIST_HEAD(&nft_net->binding_list); INIT_LIST_HEAD(&nft_net->module_list); INIT_LIST_HEAD(&nft_net->notify_list); @@ -10722,6 +10724,7 @@ static int __net_init nf_tables_init_net(struct net *net) nft_net->base_seq = 1; nft_net->gc_seq = 0; nft_net->validate_state = NFT_VALIDATE_SKIP; + INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work); return 0; } @@ -10749,14 +10752,17 @@ static void __net_exit nf_tables_exit_net(struct net *net) if (!list_empty(&nft_net->module_list)) nf_tables_module_autoload_cleanup(net); + cancel_work_sync(&nft_net->destroy_work); __nft_release_tables(net); nft_gc_seq_end(nft_net, gc_seq); mutex_unlock(&nft_net->commit_mutex); + WARN_ON_ONCE(!list_empty(&nft_net->tables)); WARN_ON_ONCE(!list_empty(&nft_net->module_list)); WARN_ON_ONCE(!list_empty(&nft_net->notify_list)); + WARN_ON_ONCE(!list_empty(&nft_net->destroy_list)); } static void nf_tables_exit_batch(struct list_head *net_exit_list) @@ -10841,7 +10847,6 @@ static void __exit nf_tables_module_exit(void) nft_chain_route_fini(); unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_gc_work); - cancel_work_sync(&trans_destroy_work); rcu_barrier(); rhltable_destroy(&nft_objname_ht); nf_tables_core_module_exit(); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index d354cefa783ca..86a21c985b1ae 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -223,7 +223,7 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) return 0; } -static void nft_compat_wait_for_destructors(void) +static void nft_compat_wait_for_destructors(struct net *net) { /* xtables matches or targets can have side effects, e.g. * creation/destruction of /proc files. @@ -231,7 +231,7 @@ static void nft_compat_wait_for_destructors(void) * work queue. If we have pending invocations we thus * need to wait for those to finish. */ - nf_tables_trans_destroy_flush_work(); + nf_tables_trans_destroy_flush_work(net); } static int @@ -257,7 +257,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); - nft_compat_wait_for_destructors(); + nft_compat_wait_for_destructors(ctx->net); ret = xt_check_target(&par, size, proto, inv); if (ret < 0) { @@ -494,7 +494,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); - nft_compat_wait_for_destructors(); + nft_compat_wait_for_destructors(ctx->net); return xt_check_match(&par, size, proto, inv); } From adf4d28428a9de481f1378d1b10eb4280240957d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Tue, 30 Dec 2025 00:33:34 +0100 Subject: [PATCH 09/10] netfilter: nft_set_hash: skip duplicated elements pending gc run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-158865 cve-bf CVE-2023-52923 commit-author Pablo Neira Ayuso commit 7ffc7481153bbabf3332c6a19b289730c7e1edf5 rhashtable does not provide stable walk, duplicated elements are possible in case of resizing. I considered that checking for errors when calling rhashtable_walk_next() was sufficient to detect the resizing. However, rhashtable_walk_next() returns -EAGAIN only at the end of the iteration, which is too late, because a gc work containing duplicated elements could have been already scheduled for removal to the worker. Add a u32 gc worker sequence number per set, bump it on every workqueue run. Annotate gc worker sequence number on the expired element. Use it to skip those already seen in this gc workqueue run. Note that this new field is never reset in case gc transaction fails, so next gc worker run on the expired element overrides it. Wraparound of gc worker sequence number should not be an issue with stale gc worker sequence number in the element, that would just postpone the element removal in one gc run. Note that it is not possible to use flags to annotate that element is pending gc run to detect duplicates, given that gc transaction can be invalidated in case of update from the control plane, therefore, not allowing to clear such flag. On x86_64, pahole reports no changes in the size of nft_rhash_elem. Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Reported-by: Laurent Fasnacht Tested-by: Laurent Fasnacht Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 7ffc7481153bbabf3332c6a19b289730c7e1edf5) Signed-off-by: Marcin Wcisło --- net/netfilter/nft_set_hash.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 0691565caa81b..191164d0a3431 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -24,11 +24,13 @@ struct nft_rhash { struct rhashtable ht; struct delayed_work gc_work; + u32 wq_gc_seq; }; struct nft_rhash_elem { struct nft_elem_priv priv; struct rhash_head node; + u32 wq_gc_seq; struct nft_set_ext ext; }; @@ -337,6 +339,10 @@ static void nft_rhash_gc(struct work_struct *work) if (!gc) goto done; + /* Elements never collected use a zero gc worker sequence number. */ + if (unlikely(++priv->wq_gc_seq == 0)) + priv->wq_gc_seq++; + rhashtable_walk_enter(&priv->ht, &hti); rhashtable_walk_start(&hti); @@ -354,6 +360,14 @@ static void nft_rhash_gc(struct work_struct *work) goto try_later; } + /* rhashtable walk is unstable, already seen in this gc run? + * Then, skip this element. In case of (unlikely) sequence + * wraparound and stale element wq_gc_seq, next gc run will + * just find this expired element. + */ + if (he->wq_gc_seq == priv->wq_gc_seq) + continue; + if (nft_set_elem_is_dead(&he->ext)) goto dead_elem; @@ -370,6 +384,8 @@ static void nft_rhash_gc(struct work_struct *work) if (!gc) goto try_later; + /* annotate gc sequence for this attempt. */ + he->wq_gc_seq = priv->wq_gc_seq; nft_trans_gc_elem_add(gc, he); } From 2e7b321ef9156de5a7185392dc1aafe0b5e538e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Tue, 30 Dec 2025 00:34:18 +0100 Subject: [PATCH 10/10] netfilter: nft_set_hash: unaligned atomic read on struct nft_set_ext MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-5468 cve CVE-2024-54031 commit-author Pablo Neira Ayuso commit 542ed8145e6f9392e3d0a86a0e9027d2ffd183e4 Access to genmask field in struct nft_set_ext results in unaligned atomic read: [ 72.130109] Unable to handle kernel paging request at virtual address ffff0000c2bb708c [ 72.131036] Mem abort info: [ 72.131213] ESR = 0x0000000096000021 [ 72.131446] EC = 0x25: DABT (current EL), IL = 32 bits [ 72.132209] SET = 0, FnV = 0 [ 72.133216] EA = 0, S1PTW = 0 [ 72.134080] FSC = 0x21: alignment fault [ 72.135593] Data abort info: [ 72.137194] ISV = 0, ISS = 0x00000021, ISS2 = 0x00000000 [ 72.142351] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 72.145989] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 72.150115] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000237d27000 [ 72.154893] [ffff0000c2bb708c] pgd=0000000000000000, p4d=180000023ffff403, pud=180000023f84b403, pmd=180000023f835403, +pte=0068000102bb7707 [ 72.163021] Internal error: Oops: 0000000096000021 [#1] SMP [...] [ 72.170041] CPU: 7 UID: 0 PID: 54 Comm: kworker/7:0 Tainted: G E 6.13.0-rc3+ #2 [ 72.170509] Tainted: [E]=UNSIGNED_MODULE [ 72.170720] Hardware name: QEMU QEMU Virtual Machine, BIOS edk2-stable202302-for-qemu 03/01/2023 [ 72.171192] Workqueue: events_power_efficient nft_rhash_gc [nf_tables] [ 72.171552] pstate: 21400005 (nzCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) [ 72.171915] pc : nft_rhash_gc+0x200/0x2d8 [nf_tables] [ 72.172166] lr : nft_rhash_gc+0x128/0x2d8 [nf_tables] [ 72.172546] sp : ffff800081f2bce0 [ 72.172724] x29: ffff800081f2bd40 x28: ffff0000c2bb708c x27: 0000000000000038 [ 72.173078] x26: ffff0000c6780ef0 x25: ffff0000c643df00 x24: ffff0000c6778f78 [ 72.173431] x23: 000000000000001a x22: ffff0000c4b1f000 x21: ffff0000c6780f78 [ 72.173782] x20: ffff0000c2bb70dc x19: ffff0000c2bb7080 x18: 0000000000000000 [ 72.174135] x17: ffff0000c0a4e1c0 x16: 0000000000003000 x15: 0000ac26d173b978 [ 72.174485] x14: ffffffffffffffff x13: 0000000000000030 x12: ffff0000c6780ef0 [ 72.174841] x11: 0000000000000000 x10: ffff800081f2bcf8 x9 : ffff0000c3000000 [ 72.175193] x8 : 00000000000004be x7 : 0000000000000000 x6 : 0000000000000000 [ 72.175544] x5 : 0000000000000040 x4 : ffff0000c3000010 x3 : 0000000000000000 [ 72.175871] x2 : 0000000000003a98 x1 : ffff0000c2bb708c x0 : 0000000000000004 [ 72.176207] Call trace: [ 72.176316] nft_rhash_gc+0x200/0x2d8 [nf_tables] (P) [ 72.176653] process_one_work+0x178/0x3d0 [ 72.176831] worker_thread+0x200/0x3f0 [ 72.176995] kthread+0xe8/0xf8 [ 72.177130] ret_from_fork+0x10/0x20 [ 72.177289] Code: 54fff984 d503201f d2800080 91003261 (f820303f) [ 72.177557] ---[ end trace 0000000000000000 ]--- Align struct nft_set_ext to word size to address this and documentation it. pahole reports that this increases the size of elements for rhash and pipapo in 8 bytes on x86_64. Fixes: 7ffc7481153b ("netfilter: nft_set_hash: skip duplicated elements pending gc run") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 542ed8145e6f9392e3d0a86a0e9027d2ffd183e4) Signed-off-by: Marcin Wcisło --- include/net/netfilter/nf_tables.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 093bc95cad0a6..f25511d26a461 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -665,15 +665,18 @@ struct nft_set_ext_tmpl { /** * struct nft_set_ext - set extensions * - * @genmask: generation mask + * @genmask: generation mask, but also flags (see NFT_SET_ELEM_DEAD_BIT) * @offset: offsets of individual extension types * @data: beginning of extension data + * + * This structure must be aligned to word size, otherwise atomic bitops + * on genmask field can cause alignment failure on some archs. */ struct nft_set_ext { u8 genmask; u8 offset[NFT_SET_EXT_NUM]; char data[]; -}; +} __aligned(BITS_PER_LONG / 8); static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) {