1#!/usr/bin/env bash
2# SPDX-License-Identifier: LGPL-2.1-or-later
3set -eux
4set -o pipefail
5
6systemd-analyze log-level debug
7systemd-analyze log-target console
8
9# Loose checks to ensure the environment has the necessary features for systemd-oomd
10[[ -e /proc/pressure ]] || echo "no PSI" >>/skipped
11cgroup_type="$(stat -fc %T /sys/fs/cgroup/)"
12if [[ "$cgroup_type" != *"cgroup2"* ]] && [[ "$cgroup_type" != *"0x63677270"* ]]; then
13    echo "no cgroup2" >>/skipped
14fi
15if [ ! -f /usr/lib/systemd/systemd-oomd ] && [ ! -f /lib/systemd/systemd-oomd ]; then
16    echo "no oomd" >>/skipped
17fi
18
19if [[ -e /skipped ]]; then
20    exit 0
21fi
22
23rm -rf /etc/systemd/system/testsuite-55-testbloat.service.d
24
25# Configure oomd explicitly to avoid conflicts with distro dropins
26mkdir -p /etc/systemd/oomd.conf.d/
27echo -e "[OOM]\nDefaultMemoryPressureDurationSec=2s" >/etc/systemd/oomd.conf.d/99-oomd-test.conf
28mkdir -p /etc/systemd/system/-.slice.d/
29echo -e "[Slice]\nManagedOOMSwap=auto" >/etc/systemd/system/-.slice.d/99-oomd-test.conf
30mkdir -p /etc/systemd/system/user@.service.d/
31echo -e "[Service]\nManagedOOMMemoryPressure=auto\nManagedOOMMemoryPressureLimit=0%" >/etc/systemd/system/user@.service.d/99-oomd-test.conf
32
33mkdir -p /etc/systemd/system/systemd-oomd.service.d/
34echo -e "[Service]\nEnvironment=SYSTEMD_LOG_LEVEL=debug" >/etc/systemd/system/systemd-oomd.service.d/debug.conf
35
36systemctl daemon-reload
37
38# enable the service to ensure dbus-org.freedesktop.oom1.service exists
39# and D-Bus activation works
40systemctl enable systemd-oomd.service
41
42# if oomd is already running for some reasons, then restart it to make sure the above settings to be applied
43if systemctl is-active systemd-oomd.service; then
44    systemctl restart systemd-oomd.service
45fi
46
47systemctl start testsuite-55-testchill.service
48systemctl start testsuite-55-testbloat.service
49
50# Verify systemd-oomd is monitoring the expected units
51# Try to avoid racing the oomctl output check by checking in a loop with a timeout
52oomctl_output=$(oomctl)
53timeout="$(date -ud "1 minutes" +%s)"
54while [[ $(date -u +%s) -le $timeout ]]; do
55    if grep "/testsuite-55-workload.slice" <<< "$oomctl_output"; then
56        break
57    fi
58    oomctl_output=$(oomctl)
59    sleep 1
60done
61
62grep "/testsuite-55-workload.slice" <<< "$oomctl_output"
63grep "20.00%" <<< "$oomctl_output"
64grep "Default Memory Pressure Duration: 2s" <<< "$oomctl_output"
65
66systemctl status testsuite-55-testchill.service
67
68# systemd-oomd watches for elevated pressure for 2 seconds before acting.
69# It can take time to build up pressure so either wait 2 minutes or for the service to fail.
70timeout="$(date -ud "2 minutes" +%s)"
71while [[ $(date -u +%s) -le $timeout ]]; do
72    if ! systemctl status testsuite-55-testbloat.service; then
73        break
74    fi
75    sleep 2
76done
77
78# testbloat should be killed and testchill should be fine
79if systemctl status testsuite-55-testbloat.service; then exit 42; fi
80if ! systemctl status testsuite-55-testchill.service; then exit 24; fi
81
82# Make sure we also work correctly on user units.
83
84systemctl start --machine "testuser@.host" --user testsuite-55-testchill.service
85systemctl start --machine "testuser@.host" --user testsuite-55-testbloat.service
86
87# Verify systemd-oomd is monitoring the expected units
88# Try to avoid racing the oomctl output check by checking in a loop with a timeout
89oomctl_output=$(oomctl)
90timeout="$(date -ud "1 minutes" +%s)"
91while [[ $(date -u +%s) -le $timeout ]]; do
92    if grep -E "/user.slice.*/testsuite-55-workload.slice" <<< "$oomctl_output"; then
93        break
94    fi
95    oomctl_output=$(oomctl)
96    sleep 1
97done
98
99grep -E "/user.slice.*/testsuite-55-workload.slice" <<< "$oomctl_output"
100grep "20.00%" <<< "$oomctl_output"
101grep "Default Memory Pressure Duration: 2s" <<< "$oomctl_output"
102
103systemctl --machine "testuser@.host" --user status testsuite-55-testchill.service
104
105# systemd-oomd watches for elevated pressure for 2 seconds before acting.
106# It can take time to build up pressure so either wait 2 minutes or for the service to fail.
107timeout="$(date -ud "2 minutes" +%s)"
108while [[ $(date -u +%s) -le $timeout ]]; do
109    if ! systemctl --machine "testuser@.host" --user status testsuite-55-testbloat.service; then
110        break
111    fi
112    sleep 2
113done
114
115# testbloat should be killed and testchill should be fine
116if systemctl --machine "testuser@.host" --user status testsuite-55-testbloat.service; then exit 42; fi
117if ! systemctl --machine "testuser@.host" --user status testsuite-55-testchill.service; then exit 24; fi
118
119# only run this portion of the test if we can set xattrs
120if setfattr -n user.xattr_test -v 1 /sys/fs/cgroup/; then
121    sleep 120 # wait for systemd-oomd kill cool down and elevated memory pressure to come down
122
123    mkdir -p /etc/systemd/system/testsuite-55-testbloat.service.d/
124    echo "[Service]" >/etc/systemd/system/testsuite-55-testbloat.service.d/override.conf
125    echo "ManagedOOMPreference=avoid" >>/etc/systemd/system/testsuite-55-testbloat.service.d/override.conf
126
127    systemctl daemon-reload
128    systemctl start testsuite-55-testchill.service
129    systemctl start testsuite-55-testmunch.service
130    systemctl start testsuite-55-testbloat.service
131
132    timeout="$(date -ud "2 minutes" +%s)"
133    while [[ "$(date -u +%s)" -le "$timeout" ]]; do
134        if ! systemctl status testsuite-55-testmunch.service; then
135            break
136        fi
137        sleep 2
138    done
139
140    # testmunch should be killed since testbloat had the avoid xattr on it
141    if ! systemctl status testsuite-55-testbloat.service; then exit 25; fi
142    if systemctl status testsuite-55-testmunch.service; then exit 43; fi
143    if ! systemctl status testsuite-55-testchill.service; then exit 24; fi
144fi
145
146systemd-analyze log-level info
147
148echo OK >/testok
149
150exit 0
151