1#!/bin/sh 2 3# This script demonstrates interaction of conntrack and vrf. 4# The vrf driver calls the netfilter hooks again, with oif/iif 5# pointing at the VRF device. 6# 7# For ingress, this means first iteration has iifname of lower/real 8# device. In this script, thats veth0. 9# Second iteration is iifname set to vrf device, tvrf in this script. 10# 11# For egress, this is reversed: first iteration has the vrf device, 12# second iteration is done with the lower/real/veth0 device. 13# 14# test_ct_zone_in demonstrates unexpected change of nftables 15# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack 16# connection on VRF rcv" 17# 18# It was possible to assign conntrack zone to a packet (or mark it for 19# `notracking`) in the prerouting chain before conntrack, based on real iif. 20# 21# After the change, the zone assignment is lost and the zone is assigned based 22# on the VRF master interface (in case such a rule exists). 23# assignment is lost. Instead, assignment based on the `iif` matching 24# Thus it is impossible to distinguish packets based on the original 25# interface. 26# 27# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem 28# that was supposed to be fixed by the commit mentioned above to make sure 29# that any fix to test case 1 won't break masquerade again. 30 31ksft_skip=4 32 33IP0=172.30.30.1 34IP1=172.30.30.2 35PFXL=30 36ret=0 37 38sfx=$(mktemp -u "XXXXXXXX") 39ns0="ns0-$sfx" 40ns1="ns1-$sfx" 41 42cleanup() 43{ 44 ip netns pids $ns0 | xargs kill 2>/dev/null 45 ip netns pids $ns1 | xargs kill 2>/dev/null 46 47 ip netns del $ns0 $ns1 48} 49 50nft --version > /dev/null 2>&1 51if [ $? -ne 0 ];then 52 echo "SKIP: Could not run test without nft tool" 53 exit $ksft_skip 54fi 55 56ip -Version > /dev/null 2>&1 57if [ $? -ne 0 ];then 58 echo "SKIP: Could not run test without ip tool" 59 exit $ksft_skip 60fi 61 62ip netns add "$ns0" 63if [ $? -ne 0 ];then 64 echo "SKIP: Could not create net namespace $ns0" 65 exit $ksft_skip 66fi 67ip netns add "$ns1" 68 69trap cleanup EXIT 70 71ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0 72ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 73ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 74 75ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1 76if [ $? -ne 0 ];then 77 echo "SKIP: Could not add veth device" 78 exit $ksft_skip 79fi 80 81ip -net $ns0 li add tvrf type vrf table 9876 82if [ $? -ne 0 ];then 83 echo "SKIP: Could not add vrf device" 84 exit $ksft_skip 85fi 86 87ip -net $ns0 li set lo up 88 89ip -net $ns0 li set veth0 master tvrf 90ip -net $ns0 li set tvrf up 91ip -net $ns0 li set veth0 up 92ip -net $ns1 li set veth0 up 93 94ip -net $ns0 addr add $IP0/$PFXL dev veth0 95ip -net $ns1 addr add $IP1/$PFXL dev veth0 96 97ip netns exec $ns1 iperf3 -s > /dev/null 2>&1& 98if [ $? -ne 0 ];then 99 echo "SKIP: Could not start iperf3" 100 exit $ksft_skip 101fi 102 103# test vrf ingress handling. 104# The incoming connection should be placed in conntrack zone 1, 105# as decided by the first iteration of the ruleset. 106test_ct_zone_in() 107{ 108ip netns exec $ns0 nft -f - <<EOF 109table testct { 110 chain rawpre { 111 type filter hook prerouting priority raw; 112 113 iif { veth0, tvrf } counter meta nftrace set 1 114 iif veth0 counter ct zone set 1 counter return 115 iif tvrf counter ct zone set 2 counter return 116 ip protocol icmp counter 117 notrack counter 118 } 119 120 chain rawout { 121 type filter hook output priority raw; 122 123 oif veth0 counter ct zone set 1 counter return 124 oif tvrf counter ct zone set 2 counter return 125 notrack counter 126 } 127} 128EOF 129 ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null 130 131 # should be in zone 1, not zone 2 132 count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l) 133 if [ $count -eq 1 ]; then 134 echo "PASS: entry found in conntrack zone 1" 135 else 136 echo "FAIL: entry not found in conntrack zone 1" 137 count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l) 138 if [ $count -eq 1 ]; then 139 echo "FAIL: entry found in zone 2 instead" 140 else 141 echo "FAIL: entry not in zone 1 or 2, dumping table" 142 ip netns exec $ns0 conntrack -L 143 ip netns exec $ns0 nft list ruleset 144 fi 145 fi 146} 147 148# add masq rule that gets evaluated w. outif set to vrf device. 149# This tests the first iteration of the packet through conntrack, 150# oifname is the vrf device. 151test_masquerade_vrf() 152{ 153 local qdisc=$1 154 155 if [ "$qdisc" != "default" ]; then 156 tc -net $ns0 qdisc add dev tvrf root $qdisc 157 fi 158 159 ip netns exec $ns0 conntrack -F 2>/dev/null 160 161ip netns exec $ns0 nft -f - <<EOF 162flush ruleset 163table ip nat { 164 chain rawout { 165 type filter hook output priority raw; 166 167 oif tvrf ct state untracked counter 168 } 169 chain postrouting2 { 170 type filter hook postrouting priority mangle; 171 172 oif tvrf ct state untracked counter 173 } 174 chain postrouting { 175 type nat hook postrouting priority 0; 176 # NB: masquerade should always be combined with 'oif(name) bla', 177 # lack of this is intentional here, we want to exercise double-snat. 178 ip saddr 172.30.30.0/30 counter masquerade random 179 } 180} 181EOF 182 ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null 183 if [ $? -ne 0 ]; then 184 echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device" 185 ret=1 186 return 187 fi 188 189 # must also check that nat table was evaluated on second (lower device) iteration. 190 ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' && 191 ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]' 192 if [ $? -eq 0 ]; then 193 echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)" 194 else 195 echo "FAIL: vrf rules have unexpected counter value" 196 ret=1 197 fi 198 199 if [ "$qdisc" != "default" ]; then 200 tc -net $ns0 qdisc del dev tvrf root 201 fi 202} 203 204# add masq rule that gets evaluated w. outif set to veth device. 205# This tests the 2nd iteration of the packet through conntrack, 206# oifname is the lower device (veth0 in this case). 207test_masquerade_veth() 208{ 209 ip netns exec $ns0 conntrack -F 2>/dev/null 210ip netns exec $ns0 nft -f - <<EOF 211flush ruleset 212table ip nat { 213 chain postrouting { 214 type nat hook postrouting priority 0; 215 meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random 216 } 217} 218EOF 219 ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null 220 if [ $? -ne 0 ]; then 221 echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device" 222 ret=1 223 return 224 fi 225 226 # must also check that nat table was evaluated on second (lower device) iteration. 227 ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' 228 if [ $? -eq 0 ]; then 229 echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device" 230 else 231 echo "FAIL: vrf masq rule has unexpected counter value" 232 ret=1 233 fi 234} 235 236test_ct_zone_in 237test_masquerade_vrf "default" 238test_masquerade_vrf "pfifo" 239test_masquerade_veth 240 241exit $ret 242