1#!/usr/bin/env bash
2# SPDX-License-Identifier: LGPL-2.1-or-later
3set -eux
4set -o pipefail
5
6at_exit() {
7    # shellcheck disable=SC2181
8    if [[ $? -ne 0 ]]; then
9        # We're exiting with a non-zero EC, let's dump test artifacts
10        # for easier debugging
11        [[ -v straceLog && -f "$straceLog" ]] && cat "$straceLog"
12        [[ -v journalLog && -f "$journalLog" ]] && cat "$journalLog"
13    fi
14}
15
16trap at_exit EXIT
17
18systemd-analyze log-level debug
19systemd-analyze log-target journal
20
21# Log files
22straceLog='strace.log'
23journalLog='journal.log'
24
25# Systemd config files
26testUnit='numa-test.service'
27testUnitFile="/run/systemd/system/$testUnit"
28testUnitNUMAConf="$testUnitFile.d/numa.conf"
29
30# Sleep constants (we should probably figure out something better but nothing comes to mind)
31sleepAfterStart=1
32
33# Journal cursor for easier navigation
34journalCursorFile="jounalCursorFile"
35
36startStrace() {
37    coproc strace -qq -p 1 -o "$straceLog" -e set_mempolicy -s 1024 ${1:+"$1"}
38    # Wait for strace to properly "initialize", i.e. until PID 1 has the TracerPid
39    # field set to the current strace's PID
40    while ! awk -v spid="$COPROC_PID" '/^TracerPid:/ {exit !($2 == spid);}' /proc/1/status; do sleep 0.1; done
41}
42
43stopStrace() {
44    [[ -v COPROC_PID ]] || return
45
46    local PID=$COPROC_PID
47    kill -s TERM "$PID"
48    # Make sure the strace process is indeed dead
49    while kill -0 "$PID" 2>/dev/null; do sleep 0.1; done
50}
51
52startJournalctl() {
53    : >"$journalCursorFile"
54    # Save journal's cursor for later navigation
55    journalctl --no-pager --cursor-file="$journalCursorFile" -n0 -ocat
56}
57
58stopJournalctl() {
59    local unit="${1:-init.scope}"
60    # Using journalctl --sync should be better than using SIGRTMIN+1, as
61    # the --sync wait until the synchronization is complete
62    echo "Force journald to write all queued messages"
63    journalctl --sync
64    journalctl -u "$unit" --cursor-file="$journalCursorFile" >"$journalLog"
65}
66
67checkNUMA() {
68    # NUMA enabled system should have at least NUMA node0
69    test -e /sys/devices/system/node/node0
70}
71
72writePID1NUMAPolicy() {
73    cat >"$confDir/numa.conf" <<EOF
74[Manager]
75NUMAPolicy=${1:?}
76NUMAMask=${2:-""}
77EOF
78}
79
80writeTestUnit() {
81    mkdir -p "$testUnitFile.d/"
82    printf "[Service]\nExecStart=/bin/sleep 3600\n" >"$testUnitFile"
83}
84
85writeTestUnitNUMAPolicy() {
86    cat >"$testUnitNUMAConf" <<EOF
87[Service]
88NUMAPolicy=${1:?}
89NUMAMask=${2:-""}
90EOF
91    systemctl daemon-reload
92}
93
94pid1ReloadWithStrace() {
95    startStrace
96    systemctl daemon-reload
97    sleep $sleepAfterStart
98    stopStrace
99}
100
101pid1ReloadWithJournal() {
102    startJournalctl
103    systemctl daemon-reload
104    stopJournalctl
105}
106
107pid1StartUnitWithStrace() {
108    startStrace '-f'
109    systemctl start "${1:?}"
110    sleep $sleepAfterStart
111    stopStrace
112}
113
114pid1StartUnitWithJournal() {
115    startJournalctl
116    systemctl start "${1:?}"
117    sleep $sleepAfterStart
118    stopJournalctl
119}
120
121pid1StopUnit() {
122    systemctl stop "${1:?}"
123}
124
125systemctlCheckNUMAProperties() {
126    local UNIT_NAME="${1:?}"
127    local NUMA_POLICY="${2:?}"
128    local NUMA_MASK="${3:-""}"
129    local LOGFILE
130
131    LOGFILE="$(mktemp)"
132
133    systemctl show -p NUMAPolicy "$UNIT_NAME" >"$LOGFILE"
134    grep "NUMAPolicy=$NUMA_POLICY" "$LOGFILE"
135
136    : >"$LOGFILE"
137
138    if [ -n "$NUMA_MASK" ]; then
139        systemctl show -p NUMAMask "$UNIT_NAME" >"$LOGFILE"
140        grep "NUMAMask=$NUMA_MASK" "$LOGFILE"
141    fi
142}
143
144writeTestUnit
145
146# Create systemd config drop-in directory
147confDir="/run/systemd/system.conf.d/"
148mkdir -p "$confDir"
149
150if ! checkNUMA; then
151    echo >&2 "NUMA is not supported on this machine, switching to a simple sanity check"
152
153    echo "PID1 NUMAPolicy=default && NUMAMask=0 check without NUMA support"
154    writePID1NUMAPolicy "default" "0"
155    startJournalctl
156    systemctl daemon-reload
157    stopJournalctl
158    grep "NUMA support not available, ignoring" "$journalLog"
159
160    echo "systemd-run NUMAPolicy=default && NUMAMask=0 check without NUMA support"
161    runUnit='numa-systemd-run-test.service'
162    startJournalctl
163    systemd-run -p NUMAPolicy=default -p NUMAMask=0 --unit "$runUnit" sleep 1000
164    sleep $sleepAfterStart
165    pid1StopUnit "$runUnit"
166    stopJournalctl "$runUnit"
167    grep "NUMA support not available, ignoring" "$journalLog"
168
169else
170    echo "PID1 NUMAPolicy support - Default policy w/o mask"
171    writePID1NUMAPolicy "default"
172    pid1ReloadWithStrace
173    # Kernel requires that nodemask argument is set to NULL when setting default policy
174    grep "set_mempolicy(MPOL_DEFAULT, NULL" "$straceLog"
175
176    echo "PID1 NUMAPolicy support - Default policy w/ mask"
177    writePID1NUMAPolicy "default" "0"
178    pid1ReloadWithStrace
179    grep "set_mempolicy(MPOL_DEFAULT, NULL" "$straceLog"
180
181    echo "PID1 NUMAPolicy support - Bind policy w/o mask"
182    writePID1NUMAPolicy "bind"
183    pid1ReloadWithJournal
184    grep "Failed to set NUMA memory policy, ignoring: Invalid argument" "$journalLog"
185
186    echo "PID1 NUMAPolicy support - Bind policy w/ mask"
187    writePID1NUMAPolicy "bind" "0"
188    pid1ReloadWithStrace
189    grep -P "set_mempolicy\(MPOL_BIND, \[0x0*1\]" "$straceLog"
190
191    echo "PID1 NUMAPolicy support - Interleave policy w/o mask"
192    writePID1NUMAPolicy "interleave"
193    pid1ReloadWithJournal
194    grep "Failed to set NUMA memory policy, ignoring: Invalid argument" "$journalLog"
195
196    echo "PID1 NUMAPolicy support - Interleave policy w/ mask"
197    writePID1NUMAPolicy "interleave" "0"
198    pid1ReloadWithStrace
199    grep -P "set_mempolicy\(MPOL_INTERLEAVE, \[0x0*1\]" "$straceLog"
200
201    echo "PID1 NUMAPolicy support - Preferred policy w/o mask"
202    writePID1NUMAPolicy "preferred"
203    pid1ReloadWithJournal
204    # Preferred policy with empty node mask is actually allowed and should reset allocation policy to default
205    grep "Failed to set NUMA memory policy, ignoring: Invalid argument" "$journalLog" && { echo >&2 "unexpected pass"; exit 1; }
206
207    echo "PID1 NUMAPolicy support - Preferred policy w/ mask"
208    writePID1NUMAPolicy "preferred" "0"
209    pid1ReloadWithStrace
210    grep -P "set_mempolicy\(MPOL_PREFERRED, \[0x0*1\]" "$straceLog"
211
212    echo "PID1 NUMAPolicy support - Local policy w/o mask"
213    writePID1NUMAPolicy "local"
214    pid1ReloadWithStrace
215    # Kernel requires that nodemask argument is set to NULL when setting default policy
216    # The unpatched versions of strace don't recognize the MPOL_LOCAL constant and
217    # return a numerical constant instead (with a comment):
218    #   set_mempolicy(0x4 /* MPOL_??? */, NULL, 0) = 0
219    # Let's cover this scenario as well
220    grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" "$straceLog"
221
222    echo "PID1 NUMAPolicy support - Local policy w/ mask"
223    writePID1NUMAPolicy "local" "0"
224    pid1ReloadWithStrace
225    grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" "$straceLog"
226
227    echo "Unit file NUMAPolicy support - Default policy w/o mask"
228    writeTestUnitNUMAPolicy "default"
229    pid1StartUnitWithStrace "$testUnit"
230    systemctlCheckNUMAProperties "$testUnit" "default"
231    pid1StopUnit "$testUnit"
232    grep "set_mempolicy(MPOL_DEFAULT, NULL" "$straceLog"
233
234    echo "Unit file NUMAPolicy support - Default policy w/ mask"
235    writeTestUnitNUMAPolicy "default" "0"
236    pid1StartUnitWithStrace "$testUnit"
237    systemctlCheckNUMAProperties "$testUnit" "default" "0"
238    pid1StopUnit $testUnit
239    # Mask must be ignored
240    grep "set_mempolicy(MPOL_DEFAULT, NULL" "$straceLog"
241
242    echo "Unit file NUMAPolicy support - Bind policy w/o mask"
243    writeTestUnitNUMAPolicy "bind"
244    pid1StartUnitWithJournal "$testUnit"
245    pid1StopUnit "$testUnit"
246    grep "numa-test.service: Main process exited, code=exited, status=242/NUMA" "$journalLog"
247
248    echo "Unit file NUMAPolicy support - Bind policy w/ mask"
249    writeTestUnitNUMAPolicy "bind" "0"
250    pid1StartUnitWithStrace "$testUnit"
251    systemctlCheckNUMAProperties "$testUnit" "bind" "0"
252    pid1StopUnit "$testUnit"
253    grep -P "set_mempolicy\(MPOL_BIND, \[0x0*1\]" "$straceLog"
254
255    echo "Unit file NUMAPolicy support - Interleave policy w/o mask"
256    writeTestUnitNUMAPolicy "interleave"
257    pid1StartUnitWithStrace "$testUnit"
258    pid1StopUnit "$testUnit"
259    grep "numa-test.service: Main process exited, code=exited, status=242/NUMA" "$journalLog"
260
261    echo "Unit file NUMAPolicy support - Interleave policy w/ mask"
262    writeTestUnitNUMAPolicy "interleave" "0"
263    pid1StartUnitWithStrace "$testUnit"
264    systemctlCheckNUMAProperties "$testUnit" "interleave" "0"
265    pid1StopUnit "$testUnit"
266    grep -P "set_mempolicy\(MPOL_INTERLEAVE, \[0x0*1\]" "$straceLog"
267
268    echo "Unit file NUMAPolicy support - Preferred policy w/o mask"
269    writeTestUnitNUMAPolicy "preferred"
270    pid1StartUnitWithJournal "$testUnit"
271    systemctlCheckNUMAProperties "$testUnit" "preferred"
272    pid1StopUnit "$testUnit"
273    grep "numa-test.service: Main process exited, code=exited, status=242/NUMA" "$journalLog" && { echo >&2 "unexpected pass"; exit 1; }
274
275    echo "Unit file NUMAPolicy support - Preferred policy w/ mask"
276    writeTestUnitNUMAPolicy "preferred" "0"
277    pid1StartUnitWithStrace "$testUnit"
278    systemctlCheckNUMAProperties "$testUnit" "preferred" "0"
279    pid1StopUnit "$testUnit"
280    grep -P "set_mempolicy\(MPOL_PREFERRED, \[0x0*1\]" "$straceLog"
281
282    echo "Unit file NUMAPolicy support - Local policy w/o mask"
283    writeTestUnitNUMAPolicy "local"
284    pid1StartUnitWithStrace "$testUnit"
285    systemctlCheckNUMAProperties "$testUnit" "local"
286    pid1StopUnit "$testUnit"
287    grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" "$straceLog"
288
289    echo "Unit file NUMAPolicy support - Local policy w/ mask"
290    writeTestUnitNUMAPolicy "local" "0"
291    pid1StartUnitWithStrace "$testUnit"
292    systemctlCheckNUMAProperties "$testUnit" "local" "0"
293    pid1StopUnit "$testUnit"
294    # Mask must be ignored
295    grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" "$straceLog"
296
297    echo "Unit file CPUAffinity=NUMA support"
298    writeTestUnitNUMAPolicy "bind" "0"
299    echo "CPUAffinity=numa" >>"$testUnitNUMAConf"
300    systemctl daemon-reload
301    systemctl start "$testUnit"
302    systemctlCheckNUMAProperties "$testUnit" "bind" "0"
303    cpulist="$(cat /sys/devices/system/node/node0/cpulist)"
304    affinity_systemd="$(systemctl show --value -p CPUAffinity "$testUnit")"
305    [ "$cpulist" = "$affinity_systemd" ]
306    pid1StopUnit "$testUnit"
307
308    echo "systemd-run NUMAPolicy support"
309    runUnit='numa-systemd-run-test.service'
310
311    systemd-run -p NUMAPolicy=default --unit "$runUnit" sleep 1000
312    systemctlCheckNUMAProperties "$runUnit" "default"
313    pid1StopUnit "$runUnit"
314
315    systemd-run -p NUMAPolicy=default -p NUMAMask=0 --unit "$runUnit" sleep 1000
316    systemctlCheckNUMAProperties "$runUnit" "default" ""
317    pid1StopUnit "$runUnit"
318
319    systemd-run -p NUMAPolicy=bind -p NUMAMask=0 --unit "$runUnit" sleep 1000
320    systemctlCheckNUMAProperties "$runUnit" "bind" "0"
321    pid1StopUnit "$runUnit"
322
323    systemd-run -p NUMAPolicy=interleave -p NUMAMask=0 --unit "$runUnit" sleep 1000
324    systemctlCheckNUMAProperties "$runUnit" "interleave" "0"
325    pid1StopUnit "$runUnit"
326
327    systemd-run -p NUMAPolicy=preferred -p NUMAMask=0 --unit "$runUnit" sleep 1000
328    systemctlCheckNUMAProperties "$runUnit" "preferred" "0"
329    pid1StopUnit "$runUnit"
330
331    systemd-run -p NUMAPolicy=local --unit "$runUnit" sleep 1000
332    systemctlCheckNUMAProperties "$runUnit" "local"
333    pid1StopUnit "$runUnit"
334
335    systemd-run -p NUMAPolicy=local -p NUMAMask=0 --unit "$runUnit" sleep 1000
336    systemctlCheckNUMAProperties "$runUnit" "local" ""
337    pid1StopUnit "$runUnit"
338
339    systemd-run -p NUMAPolicy=local -p NUMAMask=0 -p CPUAffinity=numa --unit "$runUnit" sleep 1000
340    systemctlCheckNUMAProperties "$runUnit" "local" ""
341    systemctl cat "$runUnit" | grep -q 'CPUAffinity=numa'
342    pid1StopUnit "$runUnit"
343fi
344
345# Cleanup
346rm -rf "$confDir"
347systemctl daemon-reload
348
349systemd-analyze log-level info
350
351echo OK >/testok
352
353exit 0
354