[PATCH 1/1][autotest-client-tests] UBUNTU: SAUCE: ubuntu_nvidia_server_driver: create Nvidia server driver loading test
Taihsiang Ho (tai271828)
taihsiang.ho at canonical.com
Tue Nov 30 10:16:09 UTC 2021
The goal of the test is trying to reload all relevant packages in
different scenarios and make sure all of the scenarios work propoerly.
Signed-off-by: Taihsiang Ho (tai271828) <taihsiang.ho at canonical.com>
---
ubuntu_nvidia_server_driver/control | 12 +++
ubuntu_nvidia_server_driver/nvidia-module-lib | 96 +++++++++++++++++++
.../test-each-nvidia-server-driver.sh | 35 +++++++
.../ubuntu_nvidia_server_driver.py | 32 +++++++
.../ubuntu_nvidia_server_driver.sh | 40 ++++++++
5 files changed, 215 insertions(+)
create mode 100644 ubuntu_nvidia_server_driver/control
create mode 100644 ubuntu_nvidia_server_driver/nvidia-module-lib
create mode 100755 ubuntu_nvidia_server_driver/test-each-nvidia-server-driver.sh
create mode 100644 ubuntu_nvidia_server_driver/ubuntu_nvidia_server_driver.py
create mode 100755 ubuntu_nvidia_server_driver/ubuntu_nvidia_server_driver.sh
diff --git a/ubuntu_nvidia_server_driver/control b/ubuntu_nvidia_server_driver/control
new file mode 100644
index 00000000..2c3f2510
--- /dev/null
+++ b/ubuntu_nvidia_server_driver/control
@@ -0,0 +1,12 @@
+AUTHOR = 'Taihsiang Ho <taihsiang.ho at canonical.com>'
+TIME = 'SHORT'
+NAME = 'Nvidia server driver build and load verification test'
+TEST_TYPE = 'client'
+TEST_CLASS = 'General'
+TEST_CATEGORY = 'Smoke'
+
+DOC = """
+Perform testing of Nvidia server drivers
+"""
+
+job.run_test_detail('ubuntu_nvidia_server_driver', test_name='load', tag='load', timeout=600)
diff --git a/ubuntu_nvidia_server_driver/nvidia-module-lib b/ubuntu_nvidia_server_driver/nvidia-module-lib
new file mode 100644
index 00000000..06141bfc
--- /dev/null
+++ b/ubuntu_nvidia_server_driver/nvidia-module-lib
@@ -0,0 +1,96 @@
+# Copyright 2021 Canonical Ltd.
+# Written by:
+# Dann Frazier <dann.frazier at canonical.com>
+# Taihsiang Ho <taihsiang.ho at canonical.com>
+#
+# shellcheck shell=bash
+module_loaded() {
+ module="$1"
+ # Check linux/include/linux/module.h for module_state enumeration
+ # There are the other states like Loading and Unloading besides Live. The
+ # other states usually only take only few microseconds but let's specify
+ # Live explicitly.
+ grep "^${module} " /proc/modules | grep -q Live
+}
+
+get_module_field() {
+ local module="$1"
+ local field="$2"
+ # shellcheck disable=SC2034
+ read -r mod size usecnt deps rest < <(grep "^${module} " /proc/modules)
+ case $field in
+ usecnt)
+ echo "$usecnt"
+ ;;
+ deps)
+ if [ "$deps" = "-" ]; then
+ return 0
+ fi
+ echo "$deps" | tr ',' ' '
+ ;;
+ *)
+ return 1
+ esac
+}
+
+module_in_use() {
+ module="$1"
+
+ usecnt="$(get_module_field "$module" usecnt)"
+
+ if [ "$usecnt" -eq 0 ]; then
+ return 1
+ fi
+ return 0
+}
+
+recursive_remove_module() {
+ local module="$1"
+
+ if ! module_loaded "$module"; then
+ return 0
+ fi
+
+ if ! module_in_use "$module"; then
+ sudo rmmod "$module"
+ return 0
+ fi
+
+ if [ "$(get_module_field "$module" deps)" = "" ]; then
+ echo "ERROR: $module is in use, but has no reverse dependencies"
+ echo "ERROR: Maybe an application is using it."
+ exit 1
+ fi
+ beforecnt="$(get_module_field "$module" usecnt)"
+ for dep in $(get_module_field "$module" deps); do
+ recursive_remove_module "$dep"
+ done
+ aftercnt="$(get_module_field "$module" usecnt)"
+ if [ "$beforecnt" -eq "$aftercnt" ]; then
+ echo "ERROR: Unable to reduce $module use count"
+ exit 1
+ fi
+ recursive_remove_module "$module"
+}
+
+uninstall_all_nvidia_mod_pkgs() {
+ for pkg in $(dpkg-query -f "\${Package}\n" -W 'linux-modules-nvidia-*'); do
+ sudo apt remove --purge "$pkg" -y
+ done
+ if sudo modinfo nvidia; then
+ echo "ERROR: Uninstallation of all nvidia modules failed."
+ exit 1
+ fi
+}
+
+product="$(sudo dmidecode -s baseboard-product-name)"
+pkg_compatible_with_platform() {
+ local pkg="$1"
+ branch="$(echo "$pkg" | cut -d- -f4)"
+
+ if [ "$product" = "DGXA100" ] && [ "$branch" -le "418" ]; then
+ return 1
+ fi
+
+ return 0
+}
diff --git a/ubuntu_nvidia_server_driver/test-each-nvidia-server-driver.sh b/ubuntu_nvidia_server_driver/test-each-nvidia-server-driver.sh
new file mode 100755
index 00000000..9dae85cc
--- /dev/null
+++ b/ubuntu_nvidia_server_driver/test-each-nvidia-server-driver.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+#
+# Copyright 2021 Canonical Ltd.
+# Written by:
+# Dann Frazier <dann.frazier at canonical.com>
+# Taihsiang Ho <taihsiang.ho at canonical.com>
+
+set -e
+
+source nvidia-module-lib
+
+sudo service nvidia-fabricmanager stop || /bin/true
+
+# Some examples like:
+# ubuntu at hot-koala:~$ apt-cache search --names-only "^linux-modules-nvidia-[0-9]+-server-$(uname -r)$"
+# linux-modules-nvidia-418-server-5.4.0-90-generic - Linux kernel nvidia modules for version 5.4.0-90
+# linux-modules-nvidia-450-server-5.4.0-90-generic - Linux kernel nvidia modules for version 5.4.0-90
+# linux-modules-nvidia-460-server-5.4.0-90-generic - Linux kernel nvidia modules for version 5.4.0-90
+# linux-modules-nvidia-470-server-5.4.0-90-generic - Linux kernel nvidia modules for version 5.4.0-90
+for drvpkg in $(apt-cache search --names-only "^linux-modules-nvidia-[0-9]+-server-$(uname -r)$" | cut -d' ' -f1); do
+ if ! pkg_compatible_with_platform "$drvpkg"; then
+ echo "INFO: Skipping $drvpkg on $platform" 1>&2
+ continue
+ fi
+ uninstall_all_nvidia_mod_pkgs
+ recursive_remove_module nvidia
+ sudo dmesg -c > /dev/null
+ sudo apt install -y "$drvpkg"
+ sudo modprobe nvidia
+ if sudo dmesg | grep "NVRM: loading NVIDIA UNIX"; then
+ continue
+ fi
+ echo "ERROR: Failed to detect nvidia driver initialization message in dmesg"
+ exit 1
+done
diff --git a/ubuntu_nvidia_server_driver/ubuntu_nvidia_server_driver.py b/ubuntu_nvidia_server_driver/ubuntu_nvidia_server_driver.py
new file mode 100644
index 00000000..d0c667ae
--- /dev/null
+++ b/ubuntu_nvidia_server_driver/ubuntu_nvidia_server_driver.py
@@ -0,0 +1,32 @@
+import os
+from autotest.client import test, utils
+
+p_dir = os.path.dirname(os.path.abspath(__file__))
+sh_executable = os.path.join(p_dir, "ubuntu_nvidia_server_driver.sh")
+
+
+class ubuntu_nvidia_server_driver(test.test):
+ version = 1
+
+ def initialize(self):
+ pass
+
+ def setup(self):
+ cmd = "{} setup".format(sh_executable)
+ utils.system(cmd)
+
+ def compare_kernel_modules(self):
+ cmd = "{} test".format(sh_executable)
+ utils.system(cmd)
+
+ def run_once(self, test_name):
+ if test_name == "load":
+ self.compare_kernel_modules()
+
+ print("")
+ print("{} has run.".format(test_name))
+
+ print("")
+
+ def postprocess_iteration(self):
+ pass
diff --git a/ubuntu_nvidia_server_driver/ubuntu_nvidia_server_driver.sh b/ubuntu_nvidia_server_driver/ubuntu_nvidia_server_driver.sh
new file mode 100755
index 00000000..8dda591e
--- /dev/null
+++ b/ubuntu_nvidia_server_driver/ubuntu_nvidia_server_driver.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+#
+# perform Nvidia driver load testing and corresponding pre-setup.
+#
+
+set -eo pipefail
+
+setup() {
+ # pre-setup testing environment and necessary tools
+ # currently there is nothing practically but will be used possibly in the future.
+ echo "begin to pre-setup testing"
+}
+
+run_test() {
+ exe_dir=$(dirname "${BASH_SOURCE[0]}")
+ pushd "${exe_dir}"
+ ./test-each-nvidia-server-driver.sh
+ popd
+}
+
+case $1 in
+ setup)
+ echo ""
+ echo "On setting up necessary test environment..."
+ echo ""
+ setup
+ echo ""
+ echo "Setting up necessary test environment..."
+ echo ""
+ ;;
+ test)
+ echo ""
+ echo "On running test..."
+ echo ""
+ run_test
+ echo ""
+ echo "Running test..."
+ echo ""
+ ;;
+esac
--
2.33.1
More information about the kernel-team
mailing list