[bionic][PATCH 1/2] srcu: Prohibit call_srcu() use under raw spinlocks
Thadeu Lima de Souza Cascardo
cascardo at canonical.com
Thu Feb 21 22:47:42 UTC 2019
On Wed, Feb 20, 2019 at 08:28:36PM -0300, Marcelo Henrique Cerri wrote:
> From: Paul E. McKenney <paulmck at linux.vnet.ibm.com>
No BugLink, cherry-picked from or SOB.
>
> Invoking queue_delayed_work() while holding a raw spinlock is forbidden
> in -rt kernels, which is exactly what __call_srcu() does, indirectly via
> srcu_funnel_gp_start(). This commit therefore downgrades Tree SRCU's
> locking from raw to non-raw spinlocks, which works because call_srcu()
> is not ever called while holding a raw spinlock.
>
> Reported-by: Sebastian Andrzej Siewior <bigeasy at linutronix.de>
> Signed-off-by: Paul E. McKenney <paulmck at linux.vnet.ibm.com>
> ---
> include/linux/srcutree.h | 8 +--
> kernel/rcu/srcutree.c | 109 ++++++++++++++++++++++++---------------
> 2 files changed, 72 insertions(+), 45 deletions(-)
>
> diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
> index a949f4f9e4d7..4eda108abee0 100644
> --- a/include/linux/srcutree.h
> +++ b/include/linux/srcutree.h
> @@ -40,7 +40,7 @@ struct srcu_data {
> unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */
>
> /* Update-side state. */
> - raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp;
> + spinlock_t __private lock ____cacheline_internodealigned_in_smp;
> struct rcu_segcblist srcu_cblist; /* List of callbacks.*/
> unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */
> unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */
> @@ -58,7 +58,7 @@ struct srcu_data {
> * Node in SRCU combining tree, similar in function to rcu_data.
> */
> struct srcu_node {
> - raw_spinlock_t __private lock;
> + spinlock_t __private lock;
> unsigned long srcu_have_cbs[4]; /* GP seq for children */
> /* having CBs, but only */
> /* is > ->srcu_gq_seq. */
> @@ -78,7 +78,7 @@ struct srcu_struct {
> struct srcu_node *level[RCU_NUM_LVLS + 1];
> /* First node at each level. */
> struct mutex srcu_cb_mutex; /* Serialize CB preparation. */
> - raw_spinlock_t __private lock; /* Protect counters */
> + spinlock_t __private lock; /* Protect counters */
> struct mutex srcu_gp_mutex; /* Serialize GP work. */
> unsigned int srcu_idx; /* Current rdr array element. */
> unsigned long srcu_gp_seq; /* Grace-period seq #. */
> @@ -107,7 +107,7 @@ struct srcu_struct {
> #define __SRCU_STRUCT_INIT(name) \
> { \
> .sda = &name##_srcu_data, \
> - .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
> + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
> .srcu_gp_seq_needed = 0 - 1, \
> __SRCU_DEP_MAP_INIT(name) \
> }
> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> index 6d5880089ff6..d5cea81378cc 100644
> --- a/kernel/rcu/srcutree.c
> +++ b/kernel/rcu/srcutree.c
> @@ -53,6 +53,33 @@ static void srcu_invoke_callbacks(struct work_struct *work);
> static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
> static void process_srcu(struct work_struct *work);
>
> +/* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
> +#define spin_lock_rcu_node(p) \
> +do { \
> + spin_lock(&ACCESS_PRIVATE(p, lock)); \
> + smp_mb__after_unlock_lock(); \
> +} while (0)
> +
> +#define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock))
> +
> +#define spin_lock_irq_rcu_node(p) \
> +do { \
> + spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \
> + smp_mb__after_unlock_lock(); \
> +} while (0)
> +
> +#define spin_unlock_irq_rcu_node(p) \
> + spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
> +
> +#define spin_lock_irqsave_rcu_node(p, flags) \
> +do { \
> + spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
> + smp_mb__after_unlock_lock(); \
> +} while (0)
> +
> +#define spin_unlock_irqrestore_rcu_node(p, flags) \
> + spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \
> +
> /*
> * Initialize SRCU combining tree. Note that statically allocated
> * srcu_struct structures might already have srcu_read_lock() and
> @@ -77,7 +104,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
>
> /* Each pass through this loop initializes one srcu_node structure. */
> rcu_for_each_node_breadth_first(sp, snp) {
> - raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock));
> + spin_lock_init(&ACCESS_PRIVATE(snp, lock));
> WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
> ARRAY_SIZE(snp->srcu_data_have_cbs));
> for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
> @@ -111,7 +138,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
> snp_first = sp->level[level];
> for_each_possible_cpu(cpu) {
> sdp = per_cpu_ptr(sp->sda, cpu);
> - raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
> + spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
> rcu_segcblist_init(&sdp->srcu_cblist);
> sdp->srcu_cblist_invoking = false;
> sdp->srcu_gp_seq_needed = sp->srcu_gp_seq;
> @@ -170,7 +197,7 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name,
> /* Don't re-initialize a lock while it is held. */
> debug_check_no_locks_freed((void *)sp, sizeof(*sp));
> lockdep_init_map(&sp->dep_map, name, key, 0);
> - raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock));
> + spin_lock_init(&ACCESS_PRIVATE(sp, lock));
> return init_srcu_struct_fields(sp, false);
> }
> EXPORT_SYMBOL_GPL(__init_srcu_struct);
> @@ -187,7 +214,7 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct);
> */
> int init_srcu_struct(struct srcu_struct *sp)
> {
> - raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock));
> + spin_lock_init(&ACCESS_PRIVATE(sp, lock));
> return init_srcu_struct_fields(sp, false);
> }
> EXPORT_SYMBOL_GPL(init_srcu_struct);
> @@ -210,13 +237,13 @@ static void check_init_srcu_struct(struct srcu_struct *sp)
> /* The smp_load_acquire() pairs with the smp_store_release(). */
> if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/
> return; /* Already initialized. */
> - raw_spin_lock_irqsave_rcu_node(sp, flags);
> + spin_lock_irqsave_rcu_node(sp, flags);
> if (!rcu_seq_state(sp->srcu_gp_seq_needed)) {
> - raw_spin_unlock_irqrestore_rcu_node(sp, flags);
> + spin_unlock_irqrestore_rcu_node(sp, flags);
> return;
> }
> init_srcu_struct_fields(sp, true);
> - raw_spin_unlock_irqrestore_rcu_node(sp, flags);
> + spin_unlock_irqrestore_rcu_node(sp, flags);
> }
>
> /*
> @@ -513,7 +540,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
> mutex_lock(&sp->srcu_cb_mutex);
>
> /* End the current grace period. */
> - raw_spin_lock_irq_rcu_node(sp);
> + spin_lock_irq_rcu_node(sp);
> idx = rcu_seq_state(sp->srcu_gp_seq);
> WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
> cbdelay = srcu_get_delay(sp);
> @@ -522,7 +549,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
> gpseq = rcu_seq_current(&sp->srcu_gp_seq);
> if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq))
> sp->srcu_gp_seq_needed_exp = gpseq;
> - raw_spin_unlock_irq_rcu_node(sp);
> + spin_unlock_irq_rcu_node(sp);
> mutex_unlock(&sp->srcu_gp_mutex);
> /* A new grace period can start at this point. But only one. */
>
> @@ -530,7 +557,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
> idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
> idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs);
> rcu_for_each_node_breadth_first(sp, snp) {
> - raw_spin_lock_irq_rcu_node(snp);
> + spin_lock_irq_rcu_node(snp);
> cbs = false;
> if (snp >= sp->level[rcu_num_lvls - 1])
> cbs = snp->srcu_have_cbs[idx] == gpseq;
> @@ -540,7 +567,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
> snp->srcu_gp_seq_needed_exp = gpseq;
> mask = snp->srcu_data_have_cbs[idx];
> snp->srcu_data_have_cbs[idx] = 0;
> - raw_spin_unlock_irq_rcu_node(snp);
> + spin_unlock_irq_rcu_node(snp);
> if (cbs)
> srcu_schedule_cbs_snp(sp, snp, mask, cbdelay);
>
> @@ -548,11 +575,11 @@ static void srcu_gp_end(struct srcu_struct *sp)
> if (!(gpseq & counter_wrap_check))
> for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
> sdp = per_cpu_ptr(sp->sda, cpu);
> - raw_spin_lock_irqsave_rcu_node(sdp, flags);
> + spin_lock_irqsave_rcu_node(sdp, flags);
> if (ULONG_CMP_GE(gpseq,
> sdp->srcu_gp_seq_needed + 100))
> sdp->srcu_gp_seq_needed = gpseq;
> - raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
> + spin_unlock_irqrestore_rcu_node(sdp, flags);
> }
> }
>
> @@ -560,17 +587,17 @@ static void srcu_gp_end(struct srcu_struct *sp)
> mutex_unlock(&sp->srcu_cb_mutex);
>
> /* Start a new grace period if needed. */
> - raw_spin_lock_irq_rcu_node(sp);
> + spin_lock_irq_rcu_node(sp);
> gpseq = rcu_seq_current(&sp->srcu_gp_seq);
> if (!rcu_seq_state(gpseq) &&
> ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) {
> srcu_gp_start(sp);
> - raw_spin_unlock_irq_rcu_node(sp);
> + spin_unlock_irq_rcu_node(sp);
> /* Throttle expedited grace periods: Should be rare! */
> srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff
> ? 0 : SRCU_INTERVAL);
> } else {
> - raw_spin_unlock_irq_rcu_node(sp);
> + spin_unlock_irq_rcu_node(sp);
> }
> }
>
> @@ -590,18 +617,18 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
> if (rcu_seq_done(&sp->srcu_gp_seq, s) ||
> ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s))
> return;
> - raw_spin_lock_irqsave_rcu_node(snp, flags);
> + spin_lock_irqsave_rcu_node(snp, flags);
> if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) {
> - raw_spin_unlock_irqrestore_rcu_node(snp, flags);
> + spin_unlock_irqrestore_rcu_node(snp, flags);
> return;
> }
> WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
> - raw_spin_unlock_irqrestore_rcu_node(snp, flags);
> + spin_unlock_irqrestore_rcu_node(snp, flags);
> }
> - raw_spin_lock_irqsave_rcu_node(sp, flags);
> + spin_lock_irqsave_rcu_node(sp, flags);
> if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
> sp->srcu_gp_seq_needed_exp = s;
> - raw_spin_unlock_irqrestore_rcu_node(sp, flags);
> + spin_unlock_irqrestore_rcu_node(sp, flags);
> }
>
> /*
> @@ -623,12 +650,12 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
> for (; snp != NULL; snp = snp->srcu_parent) {
> if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode)
> return; /* GP already done and CBs recorded. */
> - raw_spin_lock_irqsave_rcu_node(snp, flags);
> + spin_lock_irqsave_rcu_node(snp, flags);
> if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
> snp_seq = snp->srcu_have_cbs[idx];
> if (snp == sdp->mynode && snp_seq == s)
> snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
> - raw_spin_unlock_irqrestore_rcu_node(snp, flags);
> + spin_unlock_irqrestore_rcu_node(snp, flags);
> if (snp == sdp->mynode && snp_seq != s) {
> srcu_schedule_cbs_sdp(sdp, do_norm
> ? SRCU_INTERVAL
> @@ -644,11 +671,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
> snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
> if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s))
> snp->srcu_gp_seq_needed_exp = s;
> - raw_spin_unlock_irqrestore_rcu_node(snp, flags);
> + spin_unlock_irqrestore_rcu_node(snp, flags);
> }
>
> /* Top of tree, must ensure the grace period will be started. */
> - raw_spin_lock_irqsave_rcu_node(sp, flags);
> + spin_lock_irqsave_rcu_node(sp, flags);
> if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) {
> /*
> * Record need for grace period s. Pair with load
> @@ -667,7 +694,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
> queue_delayed_work(system_power_efficient_wq, &sp->work,
> srcu_get_delay(sp));
> }
> - raw_spin_unlock_irqrestore_rcu_node(sp, flags);
> + spin_unlock_irqrestore_rcu_node(sp, flags);
> }
>
> /*
> @@ -830,7 +857,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
> rhp->func = func;
> local_irq_save(flags);
> sdp = this_cpu_ptr(sp->sda);
> - raw_spin_lock_rcu_node(sdp);
> + spin_lock_rcu_node(sdp);
> rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false);
> rcu_segcblist_advance(&sdp->srcu_cblist,
> rcu_seq_current(&sp->srcu_gp_seq));
> @@ -844,7 +871,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
> sdp->srcu_gp_seq_needed_exp = s;
> needexp = true;
> }
> - raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
> + spin_unlock_irqrestore_rcu_node(sdp, flags);
> if (needgp)
> srcu_funnel_gp_start(sp, sdp, s, do_norm);
> else if (needexp)
> @@ -900,7 +927,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm)
>
> /*
> * Make sure that later code is ordered after the SRCU grace
> - * period. This pairs with the raw_spin_lock_irq_rcu_node()
> + * period. This pairs with the spin_lock_irq_rcu_node()
> * in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed
> * because the current CPU might have been totally uninvolved with
> * (and thus unordered against) that grace period.
> @@ -1024,7 +1051,7 @@ void srcu_barrier(struct srcu_struct *sp)
> */
> for_each_possible_cpu(cpu) {
> sdp = per_cpu_ptr(sp->sda, cpu);
> - raw_spin_lock_irq_rcu_node(sdp);
> + spin_lock_irq_rcu_node(sdp);
> atomic_inc(&sp->srcu_barrier_cpu_cnt);
> sdp->srcu_barrier_head.func = srcu_barrier_cb;
> debug_rcu_head_queue(&sdp->srcu_barrier_head);
> @@ -1033,7 +1060,7 @@ void srcu_barrier(struct srcu_struct *sp)
> debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
> atomic_dec(&sp->srcu_barrier_cpu_cnt);
> }
> - raw_spin_unlock_irq_rcu_node(sdp);
> + spin_unlock_irq_rcu_node(sdp);
> }
>
> /* Remove the initial count, at which point reaching zero can happen. */
> @@ -1082,17 +1109,17 @@ static void srcu_advance_state(struct srcu_struct *sp)
> */
> idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */
> if (idx == SRCU_STATE_IDLE) {
> - raw_spin_lock_irq_rcu_node(sp);
> + spin_lock_irq_rcu_node(sp);
> if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
> WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq));
> - raw_spin_unlock_irq_rcu_node(sp);
> + spin_unlock_irq_rcu_node(sp);
> mutex_unlock(&sp->srcu_gp_mutex);
> return;
> }
> idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
> if (idx == SRCU_STATE_IDLE)
> srcu_gp_start(sp);
> - raw_spin_unlock_irq_rcu_node(sp);
> + spin_unlock_irq_rcu_node(sp);
> if (idx != SRCU_STATE_IDLE) {
> mutex_unlock(&sp->srcu_gp_mutex);
> return; /* Someone else started the grace period. */
> @@ -1141,19 +1168,19 @@ static void srcu_invoke_callbacks(struct work_struct *work)
> sdp = container_of(work, struct srcu_data, work.work);
> sp = sdp->sp;
> rcu_cblist_init(&ready_cbs);
> - raw_spin_lock_irq_rcu_node(sdp);
> + spin_lock_irq_rcu_node(sdp);
> rcu_segcblist_advance(&sdp->srcu_cblist,
> rcu_seq_current(&sp->srcu_gp_seq));
> if (sdp->srcu_cblist_invoking ||
> !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
> - raw_spin_unlock_irq_rcu_node(sdp);
> + spin_unlock_irq_rcu_node(sdp);
> return; /* Someone else on the job or nothing to do. */
> }
>
> /* We are on the job! Extract and invoke ready callbacks. */
> sdp->srcu_cblist_invoking = true;
> rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs);
> - raw_spin_unlock_irq_rcu_node(sdp);
> + spin_unlock_irq_rcu_node(sdp);
> rhp = rcu_cblist_dequeue(&ready_cbs);
> for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
> debug_rcu_head_unqueue(rhp);
> @@ -1166,13 +1193,13 @@ static void srcu_invoke_callbacks(struct work_struct *work)
> * Update counts, accelerate new callbacks, and if needed,
> * schedule another round of callback invocation.
> */
> - raw_spin_lock_irq_rcu_node(sdp);
> + spin_lock_irq_rcu_node(sdp);
> rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs);
> (void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
> rcu_seq_snap(&sp->srcu_gp_seq));
> sdp->srcu_cblist_invoking = false;
> more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
> - raw_spin_unlock_irq_rcu_node(sdp);
> + spin_unlock_irq_rcu_node(sdp);
> if (more)
> srcu_schedule_cbs_sdp(sdp, 0);
> }
> @@ -1185,7 +1212,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
> {
> bool pushgp = true;
>
> - raw_spin_lock_irq_rcu_node(sp);
> + spin_lock_irq_rcu_node(sp);
> if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
> if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) {
> /* All requests fulfilled, time to go idle. */
> @@ -1195,7 +1222,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
> /* Outstanding request and no GP. Start one. */
> srcu_gp_start(sp);
> }
> - raw_spin_unlock_irq_rcu_node(sp);
> + spin_unlock_irq_rcu_node(sp);
>
> if (pushgp)
> queue_delayed_work(system_power_efficient_wq, &sp->work, delay);
> --
> 2.17.1
>
>
> --
> kernel-team mailing list
> kernel-team at lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
More information about the kernel-team
mailing list