]> git.itanic.dy.fi Git - linux-stable/commitdiff
netfilter: nft_set_rbtree: prefer sync gc to async worker
authorFlorian Westphal <fw@strlen.de>
Fri, 13 Oct 2023 12:18:16 +0000 (14:18 +0200)
committerPablo Neira Ayuso <pablo@netfilter.org>
Tue, 24 Oct 2023 11:16:29 +0000 (13:16 +0200)
There is no need for asynchronous garbage collection, rbtree inserts
can only happen from the netlink control plane.

We already perform on-demand gc on insertion, in the area of the
tree where the insertion takes place, but we don't do a full tree
walk there for performance reasons.

Do a full gc walk at the end of the transaction instead and
remove the async worker.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
net/netfilter/nft_set_rbtree.c

index d59be2bc6e6c52350b3aea2a1e26e87da8e4feb7..7d1004f9e7d25855f2b88040772afa18713298fa 100644 (file)
@@ -19,7 +19,7 @@ struct nft_rbtree {
        struct rb_root          root;
        rwlock_t                lock;
        seqcount_rwlock_t       count;
-       struct delayed_work     gc_work;
+       unsigned long           last_gc;
 };
 
 struct nft_rbtree_elem {
@@ -48,8 +48,7 @@ static int nft_rbtree_cmp(const struct nft_set *set,
 
 static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
 {
-       return nft_set_elem_expired(&rbe->ext) ||
-              nft_set_elem_is_dead(&rbe->ext);
+       return nft_set_elem_expired(&rbe->ext);
 }
 
 static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
@@ -508,6 +507,15 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
        return err;
 }
 
+static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rbe)
+{
+       write_lock_bh(&priv->lock);
+       write_seqcount_begin(&priv->count);
+       rb_erase(&rbe->node, &priv->root);
+       write_seqcount_end(&priv->count);
+       write_unlock_bh(&priv->lock);
+}
+
 static void nft_rbtree_remove(const struct net *net,
                              const struct nft_set *set,
                              const struct nft_set_elem *elem)
@@ -515,11 +523,7 @@ static void nft_rbtree_remove(const struct net *net,
        struct nft_rbtree *priv = nft_set_priv(set);
        struct nft_rbtree_elem *rbe = elem->priv;
 
-       write_lock_bh(&priv->lock);
-       write_seqcount_begin(&priv->count);
-       rb_erase(&rbe->node, &priv->root);
-       write_seqcount_end(&priv->count);
-       write_unlock_bh(&priv->lock);
+       nft_rbtree_erase(priv, rbe);
 }
 
 static void nft_rbtree_activate(const struct net *net,
@@ -613,45 +617,40 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
        read_unlock_bh(&priv->lock);
 }
 
-static void nft_rbtree_gc(struct work_struct *work)
+static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
+                                struct nft_rbtree *priv,
+                                struct nft_rbtree_elem *rbe)
 {
+       struct nft_set_elem elem = {
+               .priv   = rbe,
+       };
+
+       nft_setelem_data_deactivate(net, set, &elem);
+       nft_rbtree_erase(priv, rbe);
+}
+
+static void nft_rbtree_gc(struct nft_set *set)
+{
+       struct nft_rbtree *priv = nft_set_priv(set);
        struct nft_rbtree_elem *rbe, *rbe_end = NULL;
        struct nftables_pernet *nft_net;
-       struct nft_rbtree *priv;
+       struct rb_node *node, *next;
        struct nft_trans_gc *gc;
-       struct rb_node *node;
-       struct nft_set *set;
-       unsigned int gc_seq;
        struct net *net;
 
-       priv = container_of(work, struct nft_rbtree, gc_work.work);
        set  = nft_set_container_of(priv);
        net  = read_pnet(&set->net);
        nft_net = nft_pernet(net);
-       gc_seq  = READ_ONCE(nft_net->gc_seq);
 
-       if (nft_set_gc_is_pending(set))
-               goto done;
-
-       gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+       gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
        if (!gc)
-               goto done;
-
-       read_lock_bh(&priv->lock);
-       for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+               return;
 
-               /* Ruleset has been updated, try later. */
-               if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
-                       nft_trans_gc_destroy(gc);
-                       gc = NULL;
-                       goto try_later;
-               }
+       for (node = rb_first(&priv->root); node ; node = next) {
+               next = rb_next(node);
 
                rbe = rb_entry(node, struct nft_rbtree_elem, node);
 
-               if (nft_set_elem_is_dead(&rbe->ext))
-                       goto dead_elem;
-
                /* elements are reversed in the rbtree for historical reasons,
                 * from highest to lowest value, that is why end element is
                 * always visited before the start element.
@@ -663,37 +662,34 @@ static void nft_rbtree_gc(struct work_struct *work)
                if (!nft_set_elem_expired(&rbe->ext))
                        continue;
 
-               nft_set_elem_dead(&rbe->ext);
-
-               if (!rbe_end)
-                       continue;
-
-               nft_set_elem_dead(&rbe_end->ext);
-
-               gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+               gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
                if (!gc)
                        goto try_later;
 
-               nft_trans_gc_elem_add(gc, rbe_end);
-               rbe_end = NULL;
-dead_elem:
-               gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+               /* end element needs to be removed first, it has
+                * no timeout extension.
+                */
+               if (rbe_end) {
+                       nft_rbtree_gc_remove(net, set, priv, rbe_end);
+                       nft_trans_gc_elem_add(gc, rbe_end);
+                       rbe_end = NULL;
+               }
+
+               gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
                if (!gc)
                        goto try_later;
 
+               nft_rbtree_gc_remove(net, set, priv, rbe);
                nft_trans_gc_elem_add(gc, rbe);
        }
 
-       gc = nft_trans_gc_catchall_async(gc, gc_seq);
-
 try_later:
-       read_unlock_bh(&priv->lock);
 
-       if (gc)
-               nft_trans_gc_queue_async_done(gc);
-done:
-       queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
-                          nft_set_gc_interval(set));
+       if (gc) {
+               gc = nft_trans_gc_catchall_sync(gc);
+               nft_trans_gc_queue_sync_done(gc);
+               priv->last_gc = jiffies;
+       }
 }
 
 static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
@@ -712,11 +708,6 @@ static int nft_rbtree_init(const struct nft_set *set,
        seqcount_rwlock_init(&priv->count, &priv->lock);
        priv->root = RB_ROOT;
 
-       INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc);
-       if (set->flags & NFT_SET_TIMEOUT)
-               queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
-                                  nft_set_gc_interval(set));
-
        return 0;
 }
 
@@ -727,8 +718,6 @@ static void nft_rbtree_destroy(const struct nft_ctx *ctx,
        struct nft_rbtree_elem *rbe;
        struct rb_node *node;
 
-       cancel_delayed_work_sync(&priv->gc_work);
-       rcu_barrier();
        while ((node = priv->root.rb_node) != NULL) {
                rb_erase(node, &priv->root);
                rbe = rb_entry(node, struct nft_rbtree_elem, node);
@@ -754,6 +743,21 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
        return true;
 }
 
+static void nft_rbtree_commit(struct nft_set *set)
+{
+       struct nft_rbtree *priv = nft_set_priv(set);
+
+       if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
+               nft_rbtree_gc(set);
+}
+
+static void nft_rbtree_gc_init(const struct nft_set *set)
+{
+       struct nft_rbtree *priv = nft_set_priv(set);
+
+       priv->last_gc = jiffies;
+}
+
 const struct nft_set_type nft_set_rbtree_type = {
        .features       = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
        .ops            = {
@@ -767,6 +771,8 @@ const struct nft_set_type nft_set_rbtree_type = {
                .deactivate     = nft_rbtree_deactivate,
                .flush          = nft_rbtree_flush,
                .activate       = nft_rbtree_activate,
+               .commit         = nft_rbtree_commit,
+               .gc_init        = nft_rbtree_gc_init,
                .lookup         = nft_rbtree_lookup,
                .walk           = nft_rbtree_walk,
                .get            = nft_rbtree_get,