exynos-linux-stable/include/linux/list_nulls.h

123 lines
3.4 KiB
C
Raw Permalink Normal View History

rcu: Introduce hlist_nulls variant of hlist hlist uses NULL value to finish a chain. hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker. This allows to store many different end markers, so that some RCU lockless algos (used in TCP/UDP stack for example) can save some memory barriers in fast paths. Two new files are added : include/linux/list_nulls.h - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant include/linux/rculist_nulls.h - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant Only four helpers are declared for the moment : hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(), hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu() prefetches() were removed, since an end of list is not anymore NULL value. prefetches() could trigger useless (and possibly dangerous) memory transactions. Example of use (extracted from __udp4_lib_lookup()) struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness; rcu_read_lock(); begin: result = NULL; badness = -1; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != hash) goto begin; if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { sock_put(result); goto begin; } } rcu_read_unlock(); return result; Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-16 19:37:55 -08:00
#ifndef _LINUX_LIST_NULLS_H
#define _LINUX_LIST_NULLS_H
#include <linux/poison.h>
#include <linux/const.h>
rcu: Introduce hlist_nulls variant of hlist hlist uses NULL value to finish a chain. hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker. This allows to store many different end markers, so that some RCU lockless algos (used in TCP/UDP stack for example) can save some memory barriers in fast paths. Two new files are added : include/linux/list_nulls.h - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant include/linux/rculist_nulls.h - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant Only four helpers are declared for the moment : hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(), hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu() prefetches() were removed, since an end of list is not anymore NULL value. prefetches() could trigger useless (and possibly dangerous) memory transactions. Example of use (extracted from __udp4_lib_lookup()) struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness; rcu_read_lock(); begin: result = NULL; badness = -1; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != hash) goto begin; if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { sock_put(result); goto begin; } } rcu_read_unlock(); return result; Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-16 19:37:55 -08:00
/*
* Special version of lists, where end of list is not a NULL pointer,
* but a 'nulls' marker, which can have many different values.
* (up to 2^31 different values guaranteed on all platforms)
*
* In the standard hlist, termination of a list is the NULL pointer.
* In this special 'nulls' variant, we use the fact that objects stored in
* a list are aligned on a word (4 or 8 bytes alignment).
* We therefore use the last significant bit of 'ptr' :
* Set to 1 : This is a 'nulls' end-of-list marker (ptr >> 1)
* Set to 0 : This is a pointer to some object (ptr)
*/
struct hlist_nulls_head {
struct hlist_nulls_node *first;
};
struct hlist_nulls_node {
struct hlist_nulls_node *next, **pprev;
};
#define NULLS_MARKER(value) (1UL | (((long)value) << 1))
rcu: Introduce hlist_nulls variant of hlist hlist uses NULL value to finish a chain. hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker. This allows to store many different end markers, so that some RCU lockless algos (used in TCP/UDP stack for example) can save some memory barriers in fast paths. Two new files are added : include/linux/list_nulls.h - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant include/linux/rculist_nulls.h - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant Only four helpers are declared for the moment : hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(), hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu() prefetches() were removed, since an end of list is not anymore NULL value. prefetches() could trigger useless (and possibly dangerous) memory transactions. Example of use (extracted from __udp4_lib_lookup()) struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness; rcu_read_lock(); begin: result = NULL; badness = -1; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != hash) goto begin; if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { sock_put(result); goto begin; } } rcu_read_unlock(); return result; Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-16 19:37:55 -08:00
#define INIT_HLIST_NULLS_HEAD(ptr, nulls) \
((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
rcu: Introduce hlist_nulls variant of hlist hlist uses NULL value to finish a chain. hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker. This allows to store many different end markers, so that some RCU lockless algos (used in TCP/UDP stack for example) can save some memory barriers in fast paths. Two new files are added : include/linux/list_nulls.h - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant include/linux/rculist_nulls.h - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant Only four helpers are declared for the moment : hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(), hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu() prefetches() were removed, since an end of list is not anymore NULL value. prefetches() could trigger useless (and possibly dangerous) memory transactions. Example of use (extracted from __udp4_lib_lookup()) struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness; rcu_read_lock(); begin: result = NULL; badness = -1; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != hash) goto begin; if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { sock_put(result); goto begin; } } rcu_read_unlock(); return result; Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-16 19:37:55 -08:00
#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
#define hlist_nulls_entry_safe(ptr, type, member) \
({ typeof(ptr) ____ptr = (ptr); \
!is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
})
rcu: Introduce hlist_nulls variant of hlist hlist uses NULL value to finish a chain. hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker. This allows to store many different end markers, so that some RCU lockless algos (used in TCP/UDP stack for example) can save some memory barriers in fast paths. Two new files are added : include/linux/list_nulls.h - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant include/linux/rculist_nulls.h - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant Only four helpers are declared for the moment : hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(), hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu() prefetches() were removed, since an end of list is not anymore NULL value. prefetches() could trigger useless (and possibly dangerous) memory transactions. Example of use (extracted from __udp4_lib_lookup()) struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness; rcu_read_lock(); begin: result = NULL; badness = -1; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != hash) goto begin; if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { sock_put(result); goto begin; } } rcu_read_unlock(); return result; Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-16 19:37:55 -08:00
/**
* ptr_is_a_nulls - Test if a ptr is a nulls
* @ptr: ptr to be tested
*
*/
static inline int is_a_nulls(const struct hlist_nulls_node *ptr)
{
return ((unsigned long)ptr & 1);
}
/**
* get_nulls_value - Get the 'nulls' value of the end of chain
* @ptr: end of chain
*
* Should be called only if is_a_nulls(ptr);
*/
static inline unsigned long get_nulls_value(const struct hlist_nulls_node *ptr)
{
return ((unsigned long)ptr) >> 1;
}
static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h)
{
return !h->pprev;
}
static inline int hlist_nulls_empty(const struct hlist_nulls_head *h)
{
return is_a_nulls(READ_ONCE(h->first));
rcu: Introduce hlist_nulls variant of hlist hlist uses NULL value to finish a chain. hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker. This allows to store many different end markers, so that some RCU lockless algos (used in TCP/UDP stack for example) can save some memory barriers in fast paths. Two new files are added : include/linux/list_nulls.h - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant include/linux/rculist_nulls.h - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant Only four helpers are declared for the moment : hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(), hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu() prefetches() were removed, since an end of list is not anymore NULL value. prefetches() could trigger useless (and possibly dangerous) memory transactions. Example of use (extracted from __udp4_lib_lookup()) struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness; rcu_read_lock(); begin: result = NULL; badness = -1; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != hash) goto begin; if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { sock_put(result); goto begin; } } rcu_read_unlock(); return result; Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-16 19:37:55 -08:00
}
static inline void hlist_nulls_add_head(struct hlist_nulls_node *n,
struct hlist_nulls_head *h)
{
struct hlist_nulls_node *first = h->first;
n->next = first;
rcu: Use WRITE_ONCE() for assignments to ->pprev for hlist_nulls [ Upstream commit 860c8802ace14c646864795e057349c9fb2d60ad ] Eric Dumazet supplied a KCSAN report of a bug that forces use of hlist_unhashed_lockless() from sk_unhashed(): ------------------------------------------------------------------------ BUG: KCSAN: data-race in inet_unhash / inet_unhash write to 0xffff8880a69a0170 of 8 bytes by interrupt on cpu 1: __hlist_nulls_del include/linux/list_nulls.h:88 [inline] hlist_nulls_del_init_rcu include/linux/rculist_nulls.h:36 [inline] __sk_nulls_del_node_init_rcu include/net/sock.h:676 [inline] inet_unhash+0x38f/0x4a0 net/ipv4/inet_hashtables.c:612 tcp_set_state+0xfa/0x3e0 net/ipv4/tcp.c:2249 tcp_done+0x93/0x1e0 net/ipv4/tcp.c:3854 tcp_write_err+0x7e/0xc0 net/ipv4/tcp_timer.c:56 tcp_retransmit_timer+0x9b8/0x16d0 net/ipv4/tcp_timer.c:479 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:599 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:619 call_timer_fn+0x5f/0x2f0 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0xc0c/0xcd0 kernel/time/timer.c:1786 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355 start_secondary+0x208/0x260 arch/x86/kernel/smpboot.c:264 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 read to 0xffff8880a69a0170 of 8 bytes by interrupt on cpu 0: sk_unhashed include/net/sock.h:607 [inline] inet_unhash+0x3d/0x4a0 net/ipv4/inet_hashtables.c:592 tcp_set_state+0xfa/0x3e0 net/ipv4/tcp.c:2249 tcp_done+0x93/0x1e0 net/ipv4/tcp.c:3854 tcp_write_err+0x7e/0xc0 net/ipv4/tcp_timer.c:56 tcp_retransmit_timer+0x9b8/0x16d0 net/ipv4/tcp_timer.c:479 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:599 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:619 call_timer_fn+0x5f/0x2f0 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0xc0c/0xcd0 kernel/time/timer.c:1786 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355 rest_init+0xec/0xf6 init/main.c:452 arch_call_rest_init+0x17/0x37 start_kernel+0x838/0x85e init/main.c:786 x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:490 x86_64_start_kernel+0x72/0x76 arch/x86/kernel/head64.c:471 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.0-rc6+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 ------------------------------------------------------------------------ This commit therefore replaces C-language assignments with WRITE_ONCE() in include/linux/list_nulls.h and include/linux/rculist_nulls.h. Reported-by: Eric Dumazet <edumazet@google.com> # For KCSAN Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
2019-11-09 09:42:13 -08:00
WRITE_ONCE(n->pprev, &h->first);
h->first = n;
if (!is_a_nulls(first))
rcu: Use WRITE_ONCE() for assignments to ->pprev for hlist_nulls [ Upstream commit 860c8802ace14c646864795e057349c9fb2d60ad ] Eric Dumazet supplied a KCSAN report of a bug that forces use of hlist_unhashed_lockless() from sk_unhashed(): ------------------------------------------------------------------------ BUG: KCSAN: data-race in inet_unhash / inet_unhash write to 0xffff8880a69a0170 of 8 bytes by interrupt on cpu 1: __hlist_nulls_del include/linux/list_nulls.h:88 [inline] hlist_nulls_del_init_rcu include/linux/rculist_nulls.h:36 [inline] __sk_nulls_del_node_init_rcu include/net/sock.h:676 [inline] inet_unhash+0x38f/0x4a0 net/ipv4/inet_hashtables.c:612 tcp_set_state+0xfa/0x3e0 net/ipv4/tcp.c:2249 tcp_done+0x93/0x1e0 net/ipv4/tcp.c:3854 tcp_write_err+0x7e/0xc0 net/ipv4/tcp_timer.c:56 tcp_retransmit_timer+0x9b8/0x16d0 net/ipv4/tcp_timer.c:479 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:599 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:619 call_timer_fn+0x5f/0x2f0 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0xc0c/0xcd0 kernel/time/timer.c:1786 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355 start_secondary+0x208/0x260 arch/x86/kernel/smpboot.c:264 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 read to 0xffff8880a69a0170 of 8 bytes by interrupt on cpu 0: sk_unhashed include/net/sock.h:607 [inline] inet_unhash+0x3d/0x4a0 net/ipv4/inet_hashtables.c:592 tcp_set_state+0xfa/0x3e0 net/ipv4/tcp.c:2249 tcp_done+0x93/0x1e0 net/ipv4/tcp.c:3854 tcp_write_err+0x7e/0xc0 net/ipv4/tcp_timer.c:56 tcp_retransmit_timer+0x9b8/0x16d0 net/ipv4/tcp_timer.c:479 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:599 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:619 call_timer_fn+0x5f/0x2f0 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0xc0c/0xcd0 kernel/time/timer.c:1786 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355 rest_init+0xec/0xf6 init/main.c:452 arch_call_rest_init+0x17/0x37 start_kernel+0x838/0x85e init/main.c:786 x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:490 x86_64_start_kernel+0x72/0x76 arch/x86/kernel/head64.c:471 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.0-rc6+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 ------------------------------------------------------------------------ This commit therefore replaces C-language assignments with WRITE_ONCE() in include/linux/list_nulls.h and include/linux/rculist_nulls.h. Reported-by: Eric Dumazet <edumazet@google.com> # For KCSAN Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
2019-11-09 09:42:13 -08:00
WRITE_ONCE(first->pprev, &n->next);
}
rcu: Introduce hlist_nulls variant of hlist hlist uses NULL value to finish a chain. hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker. This allows to store many different end markers, so that some RCU lockless algos (used in TCP/UDP stack for example) can save some memory barriers in fast paths. Two new files are added : include/linux/list_nulls.h - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant include/linux/rculist_nulls.h - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant Only four helpers are declared for the moment : hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(), hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu() prefetches() were removed, since an end of list is not anymore NULL value. prefetches() could trigger useless (and possibly dangerous) memory transactions. Example of use (extracted from __udp4_lib_lookup()) struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness; rcu_read_lock(); begin: result = NULL; badness = -1; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != hash) goto begin; if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { sock_put(result); goto begin; } } rcu_read_unlock(); return result; Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-16 19:37:55 -08:00
static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
{
struct hlist_nulls_node *next = n->next;
struct hlist_nulls_node **pprev = n->pprev;
WRITE_ONCE(*pprev, next);
rcu: Introduce hlist_nulls variant of hlist hlist uses NULL value to finish a chain. hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker. This allows to store many different end markers, so that some RCU lockless algos (used in TCP/UDP stack for example) can save some memory barriers in fast paths. Two new files are added : include/linux/list_nulls.h - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant include/linux/rculist_nulls.h - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant Only four helpers are declared for the moment : hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(), hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu() prefetches() were removed, since an end of list is not anymore NULL value. prefetches() could trigger useless (and possibly dangerous) memory transactions. Example of use (extracted from __udp4_lib_lookup()) struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness; rcu_read_lock(); begin: result = NULL; badness = -1; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != hash) goto begin; if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { sock_put(result); goto begin; } } rcu_read_unlock(); return result; Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-16 19:37:55 -08:00
if (!is_a_nulls(next))
rcu: Use WRITE_ONCE() for assignments to ->pprev for hlist_nulls [ Upstream commit 860c8802ace14c646864795e057349c9fb2d60ad ] Eric Dumazet supplied a KCSAN report of a bug that forces use of hlist_unhashed_lockless() from sk_unhashed(): ------------------------------------------------------------------------ BUG: KCSAN: data-race in inet_unhash / inet_unhash write to 0xffff8880a69a0170 of 8 bytes by interrupt on cpu 1: __hlist_nulls_del include/linux/list_nulls.h:88 [inline] hlist_nulls_del_init_rcu include/linux/rculist_nulls.h:36 [inline] __sk_nulls_del_node_init_rcu include/net/sock.h:676 [inline] inet_unhash+0x38f/0x4a0 net/ipv4/inet_hashtables.c:612 tcp_set_state+0xfa/0x3e0 net/ipv4/tcp.c:2249 tcp_done+0x93/0x1e0 net/ipv4/tcp.c:3854 tcp_write_err+0x7e/0xc0 net/ipv4/tcp_timer.c:56 tcp_retransmit_timer+0x9b8/0x16d0 net/ipv4/tcp_timer.c:479 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:599 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:619 call_timer_fn+0x5f/0x2f0 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0xc0c/0xcd0 kernel/time/timer.c:1786 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355 start_secondary+0x208/0x260 arch/x86/kernel/smpboot.c:264 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 read to 0xffff8880a69a0170 of 8 bytes by interrupt on cpu 0: sk_unhashed include/net/sock.h:607 [inline] inet_unhash+0x3d/0x4a0 net/ipv4/inet_hashtables.c:592 tcp_set_state+0xfa/0x3e0 net/ipv4/tcp.c:2249 tcp_done+0x93/0x1e0 net/ipv4/tcp.c:3854 tcp_write_err+0x7e/0xc0 net/ipv4/tcp_timer.c:56 tcp_retransmit_timer+0x9b8/0x16d0 net/ipv4/tcp_timer.c:479 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:599 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:619 call_timer_fn+0x5f/0x2f0 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0xc0c/0xcd0 kernel/time/timer.c:1786 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355 rest_init+0xec/0xf6 init/main.c:452 arch_call_rest_init+0x17/0x37 start_kernel+0x838/0x85e init/main.c:786 x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:490 x86_64_start_kernel+0x72/0x76 arch/x86/kernel/head64.c:471 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.0-rc6+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 ------------------------------------------------------------------------ This commit therefore replaces C-language assignments with WRITE_ONCE() in include/linux/list_nulls.h and include/linux/rculist_nulls.h. Reported-by: Eric Dumazet <edumazet@google.com> # For KCSAN Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
2019-11-09 09:42:13 -08:00
WRITE_ONCE(next->pprev, pprev);
rcu: Introduce hlist_nulls variant of hlist hlist uses NULL value to finish a chain. hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker. This allows to store many different end markers, so that some RCU lockless algos (used in TCP/UDP stack for example) can save some memory barriers in fast paths. Two new files are added : include/linux/list_nulls.h - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant include/linux/rculist_nulls.h - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant Only four helpers are declared for the moment : hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(), hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu() prefetches() were removed, since an end of list is not anymore NULL value. prefetches() could trigger useless (and possibly dangerous) memory transactions. Example of use (extracted from __udp4_lib_lookup()) struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness; rcu_read_lock(); begin: result = NULL; badness = -1; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != hash) goto begin; if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { sock_put(result); goto begin; } } rcu_read_unlock(); return result; Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-16 19:37:55 -08:00
}
static inline void hlist_nulls_del(struct hlist_nulls_node *n)
{
__hlist_nulls_del(n);
rcu: Use WRITE_ONCE() for assignments to ->pprev for hlist_nulls [ Upstream commit 860c8802ace14c646864795e057349c9fb2d60ad ] Eric Dumazet supplied a KCSAN report of a bug that forces use of hlist_unhashed_lockless() from sk_unhashed(): ------------------------------------------------------------------------ BUG: KCSAN: data-race in inet_unhash / inet_unhash write to 0xffff8880a69a0170 of 8 bytes by interrupt on cpu 1: __hlist_nulls_del include/linux/list_nulls.h:88 [inline] hlist_nulls_del_init_rcu include/linux/rculist_nulls.h:36 [inline] __sk_nulls_del_node_init_rcu include/net/sock.h:676 [inline] inet_unhash+0x38f/0x4a0 net/ipv4/inet_hashtables.c:612 tcp_set_state+0xfa/0x3e0 net/ipv4/tcp.c:2249 tcp_done+0x93/0x1e0 net/ipv4/tcp.c:3854 tcp_write_err+0x7e/0xc0 net/ipv4/tcp_timer.c:56 tcp_retransmit_timer+0x9b8/0x16d0 net/ipv4/tcp_timer.c:479 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:599 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:619 call_timer_fn+0x5f/0x2f0 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0xc0c/0xcd0 kernel/time/timer.c:1786 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355 start_secondary+0x208/0x260 arch/x86/kernel/smpboot.c:264 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 read to 0xffff8880a69a0170 of 8 bytes by interrupt on cpu 0: sk_unhashed include/net/sock.h:607 [inline] inet_unhash+0x3d/0x4a0 net/ipv4/inet_hashtables.c:592 tcp_set_state+0xfa/0x3e0 net/ipv4/tcp.c:2249 tcp_done+0x93/0x1e0 net/ipv4/tcp.c:3854 tcp_write_err+0x7e/0xc0 net/ipv4/tcp_timer.c:56 tcp_retransmit_timer+0x9b8/0x16d0 net/ipv4/tcp_timer.c:479 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:599 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:619 call_timer_fn+0x5f/0x2f0 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0xc0c/0xcd0 kernel/time/timer.c:1786 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355 rest_init+0xec/0xf6 init/main.c:452 arch_call_rest_init+0x17/0x37 start_kernel+0x838/0x85e init/main.c:786 x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:490 x86_64_start_kernel+0x72/0x76 arch/x86/kernel/head64.c:471 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.0-rc6+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 ------------------------------------------------------------------------ This commit therefore replaces C-language assignments with WRITE_ONCE() in include/linux/list_nulls.h and include/linux/rculist_nulls.h. Reported-by: Eric Dumazet <edumazet@google.com> # For KCSAN Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
2019-11-09 09:42:13 -08:00
WRITE_ONCE(n->pprev, LIST_POISON2);
}
rcu: Introduce hlist_nulls variant of hlist hlist uses NULL value to finish a chain. hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker. This allows to store many different end markers, so that some RCU lockless algos (used in TCP/UDP stack for example) can save some memory barriers in fast paths. Two new files are added : include/linux/list_nulls.h - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant include/linux/rculist_nulls.h - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant Only four helpers are declared for the moment : hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(), hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu() prefetches() were removed, since an end of list is not anymore NULL value. prefetches() could trigger useless (and possibly dangerous) memory transactions. Example of use (extracted from __udp4_lib_lookup()) struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness; rcu_read_lock(); begin: result = NULL; badness = -1; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != hash) goto begin; if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { sock_put(result); goto begin; } } rcu_read_unlock(); return result; Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-16 19:37:55 -08:00
/**
* hlist_nulls_for_each_entry - iterate over list of given type
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_node to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the hlist_node within the struct.
*
*/
#define hlist_nulls_for_each_entry(tpos, pos, head, member) \
for (pos = (head)->first; \
(!is_a_nulls(pos)) && \
({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \
pos = pos->next)
/**
* hlist_nulls_for_each_entry_from - iterate over a hlist continuing from current point
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_node to use as a loop cursor.
* @member: the name of the hlist_node within the struct.
*
*/
#define hlist_nulls_for_each_entry_from(tpos, pos, member) \
for (; (!is_a_nulls(pos)) && \
({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \
pos = pos->next)
#endif