[PATCH] ipmi: Fix timer race with module unload

Manoj Iyer manoj.iyer at canonical.com
Mon Oct 22 22:48:34 UTC 2018


From: Jan Glauber <jglauber at cavium.com>

Please note that below oops is from an older kernel, but the same
race seems to be present in the upstream kernel too.

---8<---

The following panic was encountered during removing the ipmi_ssif
module:

[ 526.352555] Unable to handle kernel paging request at virtual address ffff000006923090
[ 526.360464] Mem abort info:
[ 526.363257] ESR = 0x86000007
[ 526.366304] Exception class = IABT (current EL), IL = 32 bits
[ 526.372221] SET = 0, FnV = 0
[ 526.375269] EA = 0, S1PTW = 0
[ 526.378405] swapper pgtable: 4k pages, 48-bit VAs, pgd = 000000008ae60416
[ 526.385185] [ffff000006923090] *pgd=000000bffcffe803, *pud=000000bffcffd803, *pmd=0000009f4731a003, *pte=0000000000000000
[ 526.396141] Internal error: Oops: 86000007 [#1] SMP
[ 526.401008] Modules linked in: nls_iso8859_1 ipmi_devintf joydev input_leds ipmi_msghandler shpchp sch_fq_codel ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 btrfs zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear i2c_smbus hid_generic usbhid uas hid usb_storage ast aes_ce_blk i2c_algo_bit aes_ce_cipher qede ttm crc32_ce ptp crct10dif_ce drm_kms_helper ghash_ce syscopyarea sha2_ce sysfillrect sysimgblt pps_core fb_sys_fops sha256_arm64 sha1_ce mpt3sas qed drm raid_class ahci scsi_transport_sas libahci gpio_xlp i2c_xlp9xx aes_neon_bs aes_neon_blk crypto_simd cryptd aes_arm64 [last unloaded: ipmi_ssif]
[ 526.468085] CPU: 125 PID: 0 Comm: swapper/125 Not tainted 4.15.0-35-generic #38~lp1775396+build.1
[ 526.476942] Hardware name: To be filled by O.E.M. Saber/Saber, BIOS 0ACKL022 08/14/2018
[ 526.484932] pstate: 00400009 (nzcv daif +PAN -UAO)
[ 526.489713] pc : 0xffff000006923090
[ 526.493198] lr : call_timer_fn+0x34/0x178
[ 526.497194] sp : ffff000009b0bdd0
[ 526.500496] x29: ffff000009b0bdd0 x28: 0000000000000082
[ 526.505796] x27: 0000000000000002 x26: ffff000009515188
[ 526.511096] x25: ffff000009515180 x24: ffff0000090f1018
[ 526.516396] x23: ffff000009519660 x22: dead000000000200
[ 526.521696] x21: ffff000006923090 x20: 0000000000000100
[ 526.526995] x19: ffff809eeb466a40 x18: 0000000000000000
[ 526.532295] x17: 000000000000000e x16: 0000000000000007
[ 526.537594] x15: 0000000000000000 x14: 071c71c71c71c71c
[ 526.542894] x13: 0000000000000000 x12: 0000000000000000
[ 526.548193] x11: 0000000000000001 x10: ffff000009b0be88
[ 526.553493] x9 : 0000000000000000 x8 : 0000000000000005
[ 526.558793] x7 : ffff80befc1f8528 x6 : 0000000000000020
[ 526.564092] x5 : 0000000000000040 x4 : 0000000020001b20
[ 526.569392] x3 : 0000000000000000 x2 : ffff809eeb466a40
[ 526.574692] x1 : ffff000006923090 x0 : ffff809eeb466a40
[ 526.579992] Process swapper/125 (pid: 0, stack limit = 0x000000002eb50acc)
[ 526.586854] Call trace:
[ 526.589289] 0xffff000006923090
[ 526.592419] expire_timers+0xc8/0x130
[ 526.596070] run_timer_softirq+0xec/0x1b0
[ 526.600070] __do_softirq+0x134/0x328
[ 526.603726] irq_exit+0xc8/0xe0
[ 526.606857] __handle_domain_irq+0x6c/0xc0
[ 526.610941] gic_handle_irq+0x84/0x188
[ 526.614679] el1_irq+0xe8/0x180
[ 526.617822] cpuidle_enter_state+0xa0/0x328
[ 526.621993] cpuidle_enter+0x34/0x48
[ 526.625564] call_cpuidle+0x44/0x70
[ 526.629040] do_idle+0x1b8/0x1f0
[ 526.632256] cpu_startup_entry+0x2c/0x30
[ 526.636174] secondary_start_kernel+0x11c/0x130
[ 526.640694] Code: bad PC value
[ 526.643800] ---[ end trace d020b0b8417c2498 ]---
[ 526.648404] Kernel panic - not syncing: Fatal exception in interrupt
[ 526.654778] SMP: stopping secondary CPUs
[ 526.658734] Kernel Offset: disabled
[ 526.662211] CPU features: 0x5800c38
[ 526.665688] Memory Limit: none
[ 526.668768] ---[ end Kernel panic - not syncing: Fatal exception in interrupt

Prevent mod_timer from arming a timer that was already removed by
del_timer during module unload.

BugLink: http://launchpad.net/bugs/1799281

Signed-off-by: Jan Glauber <jglauber at cavium.com>
Cc: <stable at vger.kernel.org> # 3.19
Signed-off-by: Corey Minyard <cminyard at mvista.com>

(cherry picked from commit 0711e8c1b4572d076264e71b0002d223f2666ed7
linux-next)

Signed-off-by: Manoj Iyer <manoj.iyer at canonical.com>
---
 drivers/char/ipmi/ipmi_ssif.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 265d6a6583bc..e33fefd6ceae 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -606,8 +606,9 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
 			flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
 			ssif_info->waiting_alert = true;
 			ssif_info->rtc_us_timer = SSIF_MSG_USEC;
-			mod_timer(&ssif_info->retry_timer,
-				  jiffies + SSIF_MSG_JIFFIES);
+			if (!ssif_info->stopping)
+				mod_timer(&ssif_info->retry_timer,
+					  jiffies + SSIF_MSG_JIFFIES);
 			ipmi_ssif_unlock_cond(ssif_info, flags);
 			return;
 		}
@@ -939,8 +940,9 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result,
 			ssif_info->waiting_alert = true;
 			ssif_info->retries_left = SSIF_RECV_RETRIES;
 			ssif_info->rtc_us_timer = SSIF_MSG_PART_USEC;
-			mod_timer(&ssif_info->retry_timer,
-				  jiffies + SSIF_MSG_PART_JIFFIES);
+			if (!ssif_info->stopping)
+				mod_timer(&ssif_info->retry_timer,
+					  jiffies + SSIF_MSG_PART_JIFFIES);
 			ipmi_ssif_unlock_cond(ssif_info, flags);
 		}
 	}
-- 
2.19.1





More information about the kernel-team mailing list