Bug Summary

File:build-scan/../src/core/bpf-firewall.c
Warning:line 433, column 23
Although the value stored to 'ipv6_map_fd' is used in the enclosing expression, the value is never actually read from 'ipv6_map_fd'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name bpf-firewall.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -menable-no-infs -menable-no-nans -menable-unsafe-fp-math -fno-signed-zeros -mreassociate -freciprocal-math -fdenormal-fp-math=preserve-sign,preserve-sign -ffp-contract=fast -fno-rounding-math -ffast-math -ffinite-math-only -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -resource-dir /usr/lib64/clang/12.0.0 -include config.h -I src/core/libcore.a.p -I src/core -I ../src/core -I src/basic -I ../src/basic -I src/shared -I ../src/shared -I src/systemd -I ../src/systemd -I src/journal -I ../src/journal -I src/journal-remote -I ../src/journal-remote -I src/nspawn -I ../src/nspawn -I src/resolve -I ../src/resolve -I src/timesync -I ../src/timesync -I ../src/time-wait-sync -I src/login -I ../src/login -I src/udev -I ../src/udev -I src/libudev -I ../src/libudev -I ../src/libsystemd/sd-bus -I ../src/libsystemd/sd-device -I ../src/libsystemd/sd-hwdb -I ../src/libsystemd/sd-id128 -I ../src/libsystemd/sd-netlink -I ../src/libsystemd/sd-network -I src/libsystemd-network -I ../src/libsystemd-network -I . -I .. -I /usr/include/libmount -I /usr/include/blkid -D _FILE_OFFSET_BITS=64 -internal-isystem /usr/local/include -internal-isystem /usr/lib64/clang/12.0.0/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -Wwrite-strings -Wno-unused-parameter -Wno-missing-field-initializers -Wno-unused-result -Wno-format-signedness -Wno-error=nonnull -std=gnu99 -fconst-strings -fdebug-compilation-dir /home/mrc0mmand/repos/@redhat-plumbers/systemd-rhel8/build-scan -ferror-limit 19 -fvisibility hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -analyzer-output=html -faddrsig -o /tmp/scan-build-2021-07-16-221226-1465241-1 -x c ../src/core/bpf-firewall.c
1/* SPDX-License-Identifier: LGPL-2.1+ */
2
3#include <arpa/inet.h>
4#include <assert.h>
5#include <errno(*__errno_location ()).h>
6#include <fcntl.h>
7#include <linux1/libbpf.h>
8#include <net/ethernet.h>
9#include <net/if.h>
10#include <netinet/ip.h>
11#include <netinet/ip6.h>
12#include <stddef.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
16#include <unistd.h>
17
18#include "alloc-util.h"
19#include "bpf-firewall.h"
20#include "bpf-program.h"
21#include "fd-util.h"
22#include "ip-address-access.h"
23#include "unit.h"
24
25enum {
26 MAP_KEY_PACKETS,
27 MAP_KEY_BYTES,
28};
29
30enum {
31 ACCESS_ALLOWED = 1,
32 ACCESS_DENIED = 2,
33};
34
35/* Compile instructions for one list of addresses, one direction and one specific verdict on matches. */
36
37static int add_lookup_instructions(
38 BPFProgram *p,
39 int map_fd,
40 int protocol,
41 bool_Bool is_ingress,
42 int verdict) {
43
44 int r, addr_offset, addr_size;
45
46 assert(p)do { if ((__builtin_expect(!!(!(p)),0))) log_assert_failed_realm
(LOG_REALM_SYSTEMD, ("p"), "../src/core/bpf-firewall.c", 46, __PRETTY_FUNCTION__
); } while (0)
;
47 assert(map_fd >= 0)do { if ((__builtin_expect(!!(!(map_fd >= 0)),0))) log_assert_failed_realm
(LOG_REALM_SYSTEMD, ("map_fd >= 0"), "../src/core/bpf-firewall.c"
, 47, __PRETTY_FUNCTION__); } while (0)
;
48
49 switch (protocol) {
50
51 case ETH_P_IP0x0800:
52 addr_size = sizeof(uint32_t);
53 addr_offset = is_ingress ?
54 offsetof(struct iphdr, saddr)__builtin_offsetof(struct iphdr, saddr) :
55 offsetof(struct iphdr, daddr)__builtin_offsetof(struct iphdr, daddr);
56 break;
57
58 case ETH_P_IPV60x86DD:
59 addr_size = 4 * sizeof(uint32_t);
60 addr_offset = is_ingress ?
61 offsetof(struct ip6_hdr, ip6_src.s6_addr)__builtin_offsetof(struct ip6_hdr, ip6_src.__in6_u.__u6_addr8
)
:
62 offsetof(struct ip6_hdr, ip6_dst.s6_addr)__builtin_offsetof(struct ip6_hdr, ip6_dst.__in6_u.__u6_addr8
)
;
63 break;
64
65 default:
66 return -EAFNOSUPPORT97;
67 }
68
69 do {
70 /* Compare IPv4 with one word instruction (32bit) */
71 struct bpf_insn insn[] = {
72 /* If skb->protocol != ETH_P_IP, skip this whole block. The offset will be set later. */
73 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, htobe16(protocol), 0)((struct bpf_insn) { .code = 0x05 | ((0x50) & 0xf0) | 0x00
, .dst_reg = BPF_REG_7, .src_reg = 0, .off = 0, .imm = __bswap_16
(protocol) })
,
74
75 /*
76 * Call into BPF_FUNC_skb_load_bytes to load the dst/src IP address
77 *
78 * R1: Pointer to the skb
79 * R2: Data offset
80 * R3: Destination buffer on the stack (r10 - 4)
81 * R4: Number of bytes to read (4)
82 */
83
84 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6)((struct bpf_insn) { .code = 0x07 | 0xb0 | 0x08, .dst_reg = BPF_REG_1
, .src_reg = BPF_REG_6, .off = 0, .imm = 0 })
,
85 BPF_MOV32_IMM(BPF_REG_2, addr_offset)((struct bpf_insn) { .code = 0x04 | 0xb0 | 0x00, .dst_reg = BPF_REG_2
, .src_reg = 0, .off = 0, .imm = addr_offset })
,
86
87 BPF_MOV64_REG(BPF_REG_3, BPF_REG_10)((struct bpf_insn) { .code = 0x07 | 0xb0 | 0x08, .dst_reg = BPF_REG_3
, .src_reg = BPF_REG_10, .off = 0, .imm = 0 })
,
88 BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -addr_size)((struct bpf_insn) { .code = 0x07 | ((0x00) & 0xf0) | 0x00
, .dst_reg = BPF_REG_3, .src_reg = 0, .off = 0, .imm = -addr_size
})
,
89
90 BPF_MOV32_IMM(BPF_REG_4, addr_size)((struct bpf_insn) { .code = 0x04 | 0xb0 | 0x00, .dst_reg = BPF_REG_4
, .src_reg = 0, .off = 0, .imm = addr_size })
,
91 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes)((struct bpf_insn) { .code = 0x05 | 0x80, .dst_reg = 0, .src_reg
= 0, .off = 0, .imm = BPF_FUNC_skb_load_bytes })
,
92
93 /*
94 * Call into BPF_FUNC_map_lookup_elem to see if the address matches any entry in the
95 * LPM trie map. For this to work, the prefixlen field of 'struct bpf_lpm_trie_key'
96 * has to be set to the maximum possible value.
97 *
98 * On success, the looked up value is stored in R0. For this application, the actual
99 * value doesn't matter, however; we just set the bit in @verdict in R8 if we found any
100 * matching value.
101 */
102
103 BPF_LD_MAP_FD(BPF_REG_1, map_fd)((struct bpf_insn) { .code = 0x00 | 0x18 | 0x00, .dst_reg = BPF_REG_1
, .src_reg = 1, .off = 0, .imm = (__u32) (map_fd) }), ((struct
bpf_insn) { .code = 0, .dst_reg = 0, .src_reg = 0, .off = 0,
.imm = ((__u64) (map_fd)) >> 32 })
,
104 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10)((struct bpf_insn) { .code = 0x07 | 0xb0 | 0x08, .dst_reg = BPF_REG_2
, .src_reg = BPF_REG_10, .off = 0, .imm = 0 })
,
105 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -addr_size - sizeof(uint32_t))((struct bpf_insn) { .code = 0x07 | ((0x00) & 0xf0) | 0x00
, .dst_reg = BPF_REG_2, .src_reg = 0, .off = 0, .imm = -addr_size
- sizeof(uint32_t) })
,
106 BPF_ST_MEM(BPF_W, BPF_REG_2, 0, addr_size * 8)((struct bpf_insn) { .code = 0x02 | ((0x00) & 0x18) | 0x60
, .dst_reg = BPF_REG_2, .src_reg = 0, .off = 0, .imm = addr_size
* 8 })
,
107
108 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem)((struct bpf_insn) { .code = 0x05 | 0x80, .dst_reg = 0, .src_reg
= 0, .off = 0, .imm = BPF_FUNC_map_lookup_elem })
,
109 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1)((struct bpf_insn) { .code = 0x05 | ((0x10) & 0xf0) | 0x00
, .dst_reg = BPF_REG_0, .src_reg = 0, .off = 1, .imm = 0 })
,
110 BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict)((struct bpf_insn) { .code = 0x04 | ((0x40) & 0xf0) | 0x00
, .dst_reg = BPF_REG_8, .src_reg = 0, .off = 0, .imm = verdict
})
,
111 };
112
113 /* Jump label fixup */
114 insn[0].off = ELEMENTSOF(insn)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p
(typeof(insn), typeof(&*(insn))), sizeof(insn)/sizeof((insn
)[0]), ((void)0)))
- 1;
115
116 r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p
(typeof(insn), typeof(&*(insn))), sizeof(insn)/sizeof((insn
)[0]), ((void)0)))
);
117 if (r < 0)
118 return r;
119
120 } while (false0);
121
122 return 0;
123}
124
125static int bpf_firewall_compile_bpf(
126 Unit *u,
127 bool_Bool is_ingress,
128 BPFProgram **ret) {
129
130 struct bpf_insn pre_insn[] = {
131 /*
132 * When the eBPF program is entered, R1 contains the address of the skb.
133 * However, R1-R5 are scratch registers that are not preserved when calling
134 * into kernel functions, so we need to save anything that's supposed to
135 * stay around to R6-R9. Save the skb to R6.
136 */
137 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1)((struct bpf_insn) { .code = 0x07 | 0xb0 | 0x08, .dst_reg = BPF_REG_6
, .src_reg = BPF_REG_1, .off = 0, .imm = 0 })
,
138
139 /*
140 * Although we cannot access the skb data directly from eBPF programs used in this
141 * scenario, the kernel has prepared some fields for us to access through struct __sk_buff.
142 * Load the protocol (IPv4, IPv6) used by the packet in flight once and cache it in R7
143 * for later use.
144 */
145 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct __sk_buff, protocol))((struct bpf_insn) { .code = 0x01 | ((0x00) & 0x18) | 0x60
, .dst_reg = BPF_REG_7, .src_reg = BPF_REG_6, .off = __builtin_offsetof
(struct __sk_buff, protocol), .imm = 0 })
,
146
147 /*
148 * R8 is used to keep track of whether any address check has explicitly allowed or denied the packet
149 * through ACCESS_DENIED or ACCESS_ALLOWED bits. Reset them both to 0 in the beginning.
150 */
151 BPF_MOV32_IMM(BPF_REG_8, 0)((struct bpf_insn) { .code = 0x04 | 0xb0 | 0x00, .dst_reg = BPF_REG_8
, .src_reg = 0, .off = 0, .imm = 0 })
,
152 };
153
154 /*
155 * The access checkers compiled for the configured allowance and denial lists
156 * write to R8 at runtime. The following code prepares for an early exit that
157 * skip the accounting if the packet is denied.
158 *
159 * R0 = 1
160 * if (R8 == ACCESS_DENIED)
161 * R0 = 0
162 *
163 * This means that if both ACCESS_DENIED and ACCESS_ALLOWED are set, the packet
164 * is allowed to pass.
165 */
166 struct bpf_insn post_insn[] = {
167 BPF_MOV64_IMM(BPF_REG_0, 1)((struct bpf_insn) { .code = 0x07 | 0xb0 | 0x00, .dst_reg = BPF_REG_0
, .src_reg = 0, .off = 0, .imm = 1 })
,
168 BPF_JMP_IMM(BPF_JNE, BPF_REG_8, ACCESS_DENIED, 1)((struct bpf_insn) { .code = 0x05 | ((0x50) & 0xf0) | 0x00
, .dst_reg = BPF_REG_8, .src_reg = 0, .off = 1, .imm = ACCESS_DENIED
})
,
169 BPF_MOV64_IMM(BPF_REG_0, 0)((struct bpf_insn) { .code = 0x07 | 0xb0 | 0x00, .dst_reg = BPF_REG_0
, .src_reg = 0, .off = 0, .imm = 0 })
,
170 };
171
172 _cleanup_(bpf_program_unrefp)__attribute__((cleanup(bpf_program_unrefp))) BPFProgram *p = NULL((void*)0);
173 int accounting_map_fd, r;
174 bool_Bool access_enabled;
175
176 assert(u)do { if ((__builtin_expect(!!(!(u)),0))) log_assert_failed_realm
(LOG_REALM_SYSTEMD, ("u"), "../src/core/bpf-firewall.c", 176,
__PRETTY_FUNCTION__); } while (0)
;
177 assert(ret)do { if ((__builtin_expect(!!(!(ret)),0))) log_assert_failed_realm
(LOG_REALM_SYSTEMD, ("ret"), "../src/core/bpf-firewall.c", 177
, __PRETTY_FUNCTION__); } while (0)
;
178
179 accounting_map_fd = is_ingress ?
180 u->ip_accounting_ingress_map_fd :
181 u->ip_accounting_egress_map_fd;
182
183 access_enabled =
184 u->ipv4_allow_map_fd >= 0 ||
185 u->ipv6_allow_map_fd >= 0 ||
186 u->ipv4_deny_map_fd >= 0 ||
187 u->ipv6_deny_map_fd >= 0;
188
189 if (accounting_map_fd < 0 && !access_enabled) {
190 *ret = NULL((void*)0);
191 return 0;
192 }
193
194 r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
195 if (r < 0)
196 return r;
197
198 r = bpf_program_add_instructions(p, pre_insn, ELEMENTSOF(pre_insn)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p
(typeof(pre_insn), typeof(&*(pre_insn))), sizeof(pre_insn
)/sizeof((pre_insn)[0]), ((void)0)))
);
199 if (r < 0)
200 return r;
201
202 if (access_enabled) {
203 /*
204 * The simple rule this function translates into eBPF instructions is:
205 *
206 * - Access will be granted when an address matches an entry in @list_allow
207 * - Otherwise, access will be denied when an address matches an entry in @list_deny
208 * - Otherwise, access will be granted
209 */
210
211 if (u->ipv4_deny_map_fd >= 0) {
212 r = add_lookup_instructions(p, u->ipv4_deny_map_fd, ETH_P_IP0x0800, is_ingress, ACCESS_DENIED);
213 if (r < 0)
214 return r;
215 }
216
217 if (u->ipv6_deny_map_fd >= 0) {
218 r = add_lookup_instructions(p, u->ipv6_deny_map_fd, ETH_P_IPV60x86DD, is_ingress, ACCESS_DENIED);
219 if (r < 0)
220 return r;
221 }
222
223 if (u->ipv4_allow_map_fd >= 0) {
224 r = add_lookup_instructions(p, u->ipv4_allow_map_fd, ETH_P_IP0x0800, is_ingress, ACCESS_ALLOWED);
225 if (r < 0)
226 return r;
227 }
228
229 if (u->ipv6_allow_map_fd >= 0) {
230 r = add_lookup_instructions(p, u->ipv6_allow_map_fd, ETH_P_IPV60x86DD, is_ingress, ACCESS_ALLOWED);
231 if (r < 0)
232 return r;
233 }
234 }
235
236 r = bpf_program_add_instructions(p, post_insn, ELEMENTSOF(post_insn)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p
(typeof(post_insn), typeof(&*(post_insn))), sizeof(post_insn
)/sizeof((post_insn)[0]), ((void)0)))
);
237 if (r < 0)
238 return r;
239
240 if (accounting_map_fd >= 0) {
241 struct bpf_insn insn[] = {
242 /*
243 * If R0 == 0, the packet will be denied; skip the accounting instructions in this case.
244 * The jump label will be fixed up later.
245 */
246 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0)((struct bpf_insn) { .code = 0x05 | ((0x10) & 0xf0) | 0x00
, .dst_reg = BPF_REG_0, .src_reg = 0, .off = 0, .imm = 0 })
,
247
248 /* Count packets */
249 BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS)((struct bpf_insn) { .code = 0x07 | 0xb0 | 0x00, .dst_reg = BPF_REG_0
, .src_reg = 0, .off = 0, .imm = MAP_KEY_PACKETS })
, /* r0 = 0 */
250 BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4)((struct bpf_insn) { .code = 0x03 | ((0x00) & 0x18) | 0x60
, .dst_reg = BPF_REG_10, .src_reg = BPF_REG_0, .off = -4, .imm
= 0 })
, /* *(u32 *)(fp - 4) = r0 */
251 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10)((struct bpf_insn) { .code = 0x07 | 0xb0 | 0x08, .dst_reg = BPF_REG_2
, .src_reg = BPF_REG_10, .off = 0, .imm = 0 })
,
252 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4)((struct bpf_insn) { .code = 0x07 | ((0x00) & 0xf0) | 0x00
, .dst_reg = BPF_REG_2, .src_reg = 0, .off = 0, .imm = -4 })
, /* r2 = fp - 4 */
253 BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd)((struct bpf_insn) { .code = 0x00 | 0x18 | 0x00, .dst_reg = BPF_REG_1
, .src_reg = 1, .off = 0, .imm = (__u32) (accounting_map_fd) }
), ((struct bpf_insn) { .code = 0, .dst_reg = 0, .src_reg = 0
, .off = 0, .imm = ((__u64) (accounting_map_fd)) >> 32 }
)
, /* load map fd to r1 */
254 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem)((struct bpf_insn) { .code = 0x05 | 0x80, .dst_reg = 0, .src_reg
= 0, .off = 0, .imm = BPF_FUNC_map_lookup_elem })
,
255 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2)((struct bpf_insn) { .code = 0x05 | ((0x10) & 0xf0) | 0x00
, .dst_reg = BPF_REG_0, .src_reg = 0, .off = 2, .imm = 0 })
,
256 BPF_MOV64_IMM(BPF_REG_1, 1)((struct bpf_insn) { .code = 0x07 | 0xb0 | 0x00, .dst_reg = BPF_REG_1
, .src_reg = 0, .off = 0, .imm = 1 })
, /* r1 = 1 */
257 BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0)((struct bpf_insn) { .code = 0x03 | 0xc0 | 0x18, .dst_reg = BPF_REG_0
, .src_reg = BPF_REG_1, .off = 0, .imm = 0 })
, /* xadd r0 += r1 */
258
259 /* Count bytes */
260 BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES)((struct bpf_insn) { .code = 0x07 | 0xb0 | 0x00, .dst_reg = BPF_REG_0
, .src_reg = 0, .off = 0, .imm = MAP_KEY_BYTES })
, /* r0 = 1 */
261 BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4)((struct bpf_insn) { .code = 0x03 | ((0x00) & 0x18) | 0x60
, .dst_reg = BPF_REG_10, .src_reg = BPF_REG_0, .off = -4, .imm
= 0 })
, /* *(u32 *)(fp - 4) = r0 */
262 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10)((struct bpf_insn) { .code = 0x07 | 0xb0 | 0x08, .dst_reg = BPF_REG_2
, .src_reg = BPF_REG_10, .off = 0, .imm = 0 })
,
263 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4)((struct bpf_insn) { .code = 0x07 | ((0x00) & 0xf0) | 0x00
, .dst_reg = BPF_REG_2, .src_reg = 0, .off = 0, .imm = -4 })
, /* r2 = fp - 4 */
264 BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd)((struct bpf_insn) { .code = 0x00 | 0x18 | 0x00, .dst_reg = BPF_REG_1
, .src_reg = 1, .off = 0, .imm = (__u32) (accounting_map_fd) }
), ((struct bpf_insn) { .code = 0, .dst_reg = 0, .src_reg = 0
, .off = 0, .imm = ((__u64) (accounting_map_fd)) >> 32 }
)
,
265 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem)((struct bpf_insn) { .code = 0x05 | 0x80, .dst_reg = 0, .src_reg
= 0, .off = 0, .imm = BPF_FUNC_map_lookup_elem })
,
266 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2)((struct bpf_insn) { .code = 0x05 | ((0x10) & 0xf0) | 0x00
, .dst_reg = BPF_REG_0, .src_reg = 0, .off = 2, .imm = 0 })
,
267 BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len))((struct bpf_insn) { .code = 0x01 | ((0x00) & 0x18) | 0x60
, .dst_reg = BPF_REG_1, .src_reg = BPF_REG_6, .off = __builtin_offsetof
(struct __sk_buff, len), .imm = 0 })
, /* r1 = skb->len */
268 BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0)((struct bpf_insn) { .code = 0x03 | 0xc0 | 0x18, .dst_reg = BPF_REG_0
, .src_reg = BPF_REG_1, .off = 0, .imm = 0 })
, /* xadd r0 += r1 */
269
270 /* Allow the packet to pass */
271 BPF_MOV64_IMM(BPF_REG_0, 1)((struct bpf_insn) { .code = 0x07 | 0xb0 | 0x00, .dst_reg = BPF_REG_0
, .src_reg = 0, .off = 0, .imm = 1 })
,
272 };
273
274 /* Jump label fixup */
275 insn[0].off = ELEMENTSOF(insn)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p
(typeof(insn), typeof(&*(insn))), sizeof(insn)/sizeof((insn
)[0]), ((void)0)))
- 1;
276
277 r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p
(typeof(insn), typeof(&*(insn))), sizeof(insn)/sizeof((insn
)[0]), ((void)0)))
);
278 if (r < 0)
279 return r;
280 }
281
282 do {
283 /*
284 * Exit from the eBPF program, R0 contains the verdict.
285 * 0 means the packet is denied, 1 means the packet may pass.
286 */
287 struct bpf_insn insn[] = {
288 BPF_EXIT_INSN()((struct bpf_insn) { .code = 0x05 | 0x90, .dst_reg = 0, .src_reg
= 0, .off = 0, .imm = 0 })
289 };
290
291 r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p
(typeof(insn), typeof(&*(insn))), sizeof(insn)/sizeof((insn
)[0]), ((void)0)))
);
292 if (r < 0)
293 return r;
294 } while (false0);
295
296 *ret = TAKE_PTR(p)({ typeof(p) _ptr_ = (p); (p) = ((void*)0); _ptr_; });
297
298 return 0;
299}
300
301static int bpf_firewall_count_access_items(IPAddressAccessItem *list, size_t *n_ipv4, size_t *n_ipv6) {
302 IPAddressAccessItem *a;
303
304 assert(n_ipv4)do { if ((__builtin_expect(!!(!(n_ipv4)),0))) log_assert_failed_realm
(LOG_REALM_SYSTEMD, ("n_ipv4"), "../src/core/bpf-firewall.c",
304, __PRETTY_FUNCTION__); } while (0)
;
305 assert(n_ipv6)do { if ((__builtin_expect(!!(!(n_ipv6)),0))) log_assert_failed_realm
(LOG_REALM_SYSTEMD, ("n_ipv6"), "../src/core/bpf-firewall.c",
305, __PRETTY_FUNCTION__); } while (0)
;
306
307 LIST_FOREACH(items, a, list)for ((a) = (list); (a); (a) = (a)->items_next) {
308 switch (a->family) {
309
310 case AF_INET2:
311 (*n_ipv4)++;
312 break;
313
314 case AF_INET610:
315 (*n_ipv6)++;
316 break;
317
318 default:
319 return -EAFNOSUPPORT97;
320 }
321 }
322
323 return 0;
324}
325
326static int bpf_firewall_add_access_items(
327 IPAddressAccessItem *list,
328 int ipv4_map_fd,
329 int ipv6_map_fd,
330 int verdict) {
331
332 struct bpf_lpm_trie_key *key_ipv4, *key_ipv6;
333 uint64_t value = verdict;
334 IPAddressAccessItem *a;
335 int r;
336
337 key_ipv4 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t))({ char *_new_; size_t _len_ = __builtin_offsetof(struct bpf_lpm_trie_key
, data) + sizeof(uint32_t); _new_ = __builtin_alloca (_len_);
(void *) memset(_new_, 0, _len_); })
;
338 key_ipv6 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t) * 4)({ char *_new_; size_t _len_ = __builtin_offsetof(struct bpf_lpm_trie_key
, data) + sizeof(uint32_t) * 4; _new_ = __builtin_alloca (_len_
); (void *) memset(_new_, 0, _len_); })
;
339
340 LIST_FOREACH(items, a, list)for ((a) = (list); (a); (a) = (a)->items_next) {
341 switch (a->family) {
342
343 case AF_INET2:
344 key_ipv4->prefixlen = a->prefixlen;
345 memcpy(key_ipv4->data, &a->address, sizeof(uint32_t));
346
347 r = bpf_map_update_element(ipv4_map_fd, key_ipv4, &value);
348 if (r < 0)
349 return r;
350
351 break;
352
353 case AF_INET610:
354 key_ipv6->prefixlen = a->prefixlen;
355 memcpy(key_ipv6->data, &a->address, 4 * sizeof(uint32_t));
356
357 r = bpf_map_update_element(ipv6_map_fd, key_ipv6, &value);
358 if (r < 0)
359 return r;
360
361 break;
362
363 default:
364 return -EAFNOSUPPORT97;
365 }
366 }
367
368 return 0;
369}
370
371static int bpf_firewall_prepare_access_maps(
372 Unit *u,
373 int verdict,
374 int *ret_ipv4_map_fd,
375 int *ret_ipv6_map_fd) {
376
377 _cleanup_close___attribute__((cleanup(closep))) int ipv4_map_fd = -1, ipv6_map_fd = -1;
378 size_t n_ipv4 = 0, n_ipv6 = 0;
379 Unit *p;
380 int r;
381
382 assert(ret_ipv4_map_fd)do { if ((__builtin_expect(!!(!(ret_ipv4_map_fd)),0))) log_assert_failed_realm
(LOG_REALM_SYSTEMD, ("ret_ipv4_map_fd"), "../src/core/bpf-firewall.c"
, 382, __PRETTY_FUNCTION__); } while (0)
;
383 assert(ret_ipv6_map_fd)do { if ((__builtin_expect(!!(!(ret_ipv6_map_fd)),0))) log_assert_failed_realm
(LOG_REALM_SYSTEMD, ("ret_ipv6_map_fd"), "../src/core/bpf-firewall.c"
, 383, __PRETTY_FUNCTION__); } while (0)
;
384
385 for (p = u; p; p = UNIT_DEREF(p->slice)((p->slice).target)) {
386 CGroupContext *cc;
387
388 cc = unit_get_cgroup_context(p);
389 if (!cc)
390 continue;
391
392 bpf_firewall_count_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny, &n_ipv4, &n_ipv6);
393 }
394
395 if (n_ipv4 > 0) {
396 ipv4_map_fd = bpf_map_new(
397 BPF_MAP_TYPE_LPM_TRIE,
398 offsetof(struct bpf_lpm_trie_key, data)__builtin_offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t),
399 sizeof(uint64_t),
400 n_ipv4,
401 BPF_F_NO_PREALLOC(1U << 0));
402 if (ipv4_map_fd < 0)
403 return ipv4_map_fd;
404 }
405
406 if (n_ipv6 > 0) {
407 ipv6_map_fd = bpf_map_new(
408 BPF_MAP_TYPE_LPM_TRIE,
409 offsetof(struct bpf_lpm_trie_key, data)__builtin_offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t)*4,
410 sizeof(uint64_t),
411 n_ipv6,
412 BPF_F_NO_PREALLOC(1U << 0));
413 if (ipv6_map_fd < 0)
414 return ipv6_map_fd;
415 }
416
417 for (p = u; p; p = UNIT_DEREF(p->slice)((p->slice).target)) {
418 CGroupContext *cc;
419
420 cc = unit_get_cgroup_context(p);
421 if (!cc)
422 continue;
423
424 r = bpf_firewall_add_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny,
425 ipv4_map_fd, ipv6_map_fd, verdict);
426 if (r < 0)
427 return r;
428 }
429
430 *ret_ipv4_map_fd = ipv4_map_fd;
431 *ret_ipv6_map_fd = ipv6_map_fd;
432
433 ipv4_map_fd = ipv6_map_fd = -1;
Although the value stored to 'ipv6_map_fd' is used in the enclosing expression, the value is never actually read from 'ipv6_map_fd'
434 return 0;
435}
436
437static int bpf_firewall_prepare_accounting_maps(Unit *u, bool_Bool enabled, int *fd_ingress, int *fd_egress) {
438 int r;
439
440 assert(u)do { if ((__builtin_expect(!!(!(u)),0))) log_assert_failed_realm
(LOG_REALM_SYSTEMD, ("u"), "../src/core/bpf-firewall.c", 440,
__PRETTY_FUNCTION__); } while (0)
;
441 assert(fd_ingress)do { if ((__builtin_expect(!!(!(fd_ingress)),0))) log_assert_failed_realm
(LOG_REALM_SYSTEMD, ("fd_ingress"), "../src/core/bpf-firewall.c"
, 441, __PRETTY_FUNCTION__); } while (0)
;
442 assert(fd_egress)do { if ((__builtin_expect(!!(!(fd_egress)),0))) log_assert_failed_realm
(LOG_REALM_SYSTEMD, ("fd_egress"), "../src/core/bpf-firewall.c"
, 442, __PRETTY_FUNCTION__); } while (0)
;
443
444 if (enabled) {
445 if (*fd_ingress < 0) {
446 r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
447 if (r < 0)
448 return r;
449
450 *fd_ingress = r;
451 }
452
453 if (*fd_egress < 0) {
454
455 r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
456 if (r < 0)
457 return r;
458
459 *fd_egress = r;
460 }
461
462 } else {
463 *fd_ingress = safe_close(*fd_ingress);
464 *fd_egress = safe_close(*fd_egress);
465
466 zero(u->ip_accounting_extra)(({ size_t _l_ = (sizeof(u->ip_accounting_extra)); void *_x_
= (&(u->ip_accounting_extra)); _l_ == 0 ? _x_ : memset
(_x_, 0, _l_); }))
;
467 }
468
469 return 0;
470}
471
472int bpf_firewall_compile(Unit *u) {
473 CGroupContext *cc;
474 int r, supported;
475
476 assert(u)do { if ((__builtin_expect(!!(!(u)),0))) log_assert_failed_realm
(LOG_REALM_SYSTEMD, ("u"), "../src/core/bpf-firewall.c", 476,
__PRETTY_FUNCTION__); } while (0)
;
477
478 cc = unit_get_cgroup_context(u);
479 if (!cc)
480 return -EINVAL22;
481
482 supported = bpf_firewall_supported();
483 if (supported < 0)
484 return supported;
485 if (supported == BPF_FIREWALL_UNSUPPORTED) {
486 log_debug("BPF firewalling not supported on this manager, proceeding without.")({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 486, __func__, "BPF firewalling not supported on this manager, proceeding without."
) : -abs(_e); })
;
487 return -EOPNOTSUPP95;
488 }
489 if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && u->type == UNIT_SLICE) {
490 /* If BPF_F_ALLOW_MULTI is not supported we don't support any BPF magic on inner nodes (i.e. on slice
491 * units), since that would mean leaf nodes couldn't do any BPF anymore at all. Under the assumption
492 * that BPF is more interesting on leaf nodes we hence avoid it on inner nodes in that case. This is
493 * consistent with old systemd behaviour from before v238, where BPF wasn't supported in inner nodes at
494 * all, either. */
495 log_debug("BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.")({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 495, __func__, "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units."
) : -abs(_e); })
;
496 return -EOPNOTSUPP95;
497 }
498
499 /* Note that when we compile a new firewall we first flush out the access maps and the BPF programs themselves,
500 * but we reuse the the accounting maps. That way the firewall in effect always maps to the actual
501 * configuration, but we don't flush out the accounting unnecessarily */
502
503 u->ip_bpf_ingress = bpf_program_unref(u->ip_bpf_ingress);
504 u->ip_bpf_egress = bpf_program_unref(u->ip_bpf_egress);
505
506 u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
507 u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
508
509 u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
510 u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
511
512 if (u->type != UNIT_SLICE) {
513 /* In inner nodes we only do accounting, we do not actually bother with access control. However, leaf
514 * nodes will incorporate all IP access rules set on all their parent nodes. This has the benefit that
515 * they can optionally cancel out system-wide rules. Since inner nodes can't contain processes this
516 * means that all configure IP access rules *will* take effect on processes, even though we never
517 * compile them for inner nodes. */
518
519 r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &u->ipv4_allow_map_fd, &u->ipv6_allow_map_fd);
520 if (r < 0)
521 return log_error_errno(r, "Preparation of eBPF allow maps failed: %m")({ int _level = ((3)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 521, __func__, "Preparation of eBPF allow maps failed: %m"
) : -abs(_e); })
;
522
523 r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &u->ipv4_deny_map_fd, &u->ipv6_deny_map_fd);
524 if (r < 0)
525 return log_error_errno(r, "Preparation of eBPF deny maps failed: %m")({ int _level = ((3)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 525, __func__, "Preparation of eBPF deny maps failed: %m"
) : -abs(_e); })
;
526 }
527
528 r = bpf_firewall_prepare_accounting_maps(u, cc->ip_accounting, &u->ip_accounting_ingress_map_fd, &u->ip_accounting_egress_map_fd);
529 if (r < 0)
530 return log_error_errno(r, "Preparation of eBPF accounting maps failed: %m")({ int _level = ((3)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 530, __func__, "Preparation of eBPF accounting maps failed: %m"
) : -abs(_e); })
;
531
532 r = bpf_firewall_compile_bpf(u, true1, &u->ip_bpf_ingress);
533 if (r < 0)
534 return log_error_errno(r, "Compilation for ingress BPF program failed: %m")({ int _level = ((3)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 534, __func__, "Compilation for ingress BPF program failed: %m"
) : -abs(_e); })
;
535
536 r = bpf_firewall_compile_bpf(u, false0, &u->ip_bpf_egress);
537 if (r < 0)
538 return log_error_errno(r, "Compilation for egress BPF program failed: %m")({ int _level = ((3)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 538, __func__, "Compilation for egress BPF program failed: %m"
) : -abs(_e); })
;
539
540 return 0;
541}
542
543int bpf_firewall_install(Unit *u) {
544 _cleanup_free___attribute__((cleanup(freep))) char *path = NULL((void*)0);
545 CGroupContext *cc;
546 int r, supported;
547 uint32_t flags;
548
549 assert(u)do { if ((__builtin_expect(!!(!(u)),0))) log_assert_failed_realm
(LOG_REALM_SYSTEMD, ("u"), "../src/core/bpf-firewall.c", 549,
__PRETTY_FUNCTION__); } while (0)
;
550
551 cc = unit_get_cgroup_context(u);
552 if (!cc)
553 return -EINVAL22;
554 if (!u->cgroup_path)
555 return -EINVAL22;
556 if (!u->cgroup_realized)
557 return -EINVAL22;
558
559 supported = bpf_firewall_supported();
560 if (supported < 0)
561 return supported;
562 if (supported == BPF_FIREWALL_UNSUPPORTED) {
563 log_debug("BPF firewalling not supported on this manager, proceeding without.")({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 563, __func__, "BPF firewalling not supported on this manager, proceeding without."
) : -abs(_e); })
;
564 return -EOPNOTSUPP95;
565 }
566 if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && u->type == UNIT_SLICE) {
567 log_debug("BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.")({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 567, __func__, "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units."
) : -abs(_e); })
;
568 return -EOPNOTSUPP95;
569 }
570
571 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER"_systemd", u->cgroup_path, NULL((void*)0), &path);
572 if (r < 0)
573 return log_error_errno(r, "Failed to determine cgroup path: %m")({ int _level = ((3)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 573, __func__, "Failed to determine cgroup path: %m"
) : -abs(_e); })
;
574
575 flags = (supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI &&
576 (u->type == UNIT_SLICE || unit_cgroup_delegate(u))) ? BPF_F_ALLOW_MULTI(1U << 1) : 0;
577
578 /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program, to
579 * minimize the time window when we don't account for IP traffic. */
580 u->ip_bpf_egress_installed = bpf_program_unref(u->ip_bpf_egress_installed);
581 u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);
582
583 if (u->ip_bpf_egress) {
584 r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags);
585 if (r < 0)
586 return log_error_errno(r, "Attaching egress BPF program to cgroup %s failed: %m", path)({ int _level = ((3)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 586, __func__, "Attaching egress BPF program to cgroup %s failed: %m"
, path) : -abs(_e); })
;
587
588 /* Remember that this BPF program is installed now. */
589 u->ip_bpf_egress_installed = bpf_program_ref(u->ip_bpf_egress);
590 }
591
592 if (u->ip_bpf_ingress) {
593 r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags);
594 if (r < 0)
595 return log_error_errno(r, "Attaching ingress BPF program to cgroup %s failed: %m", path)({ int _level = ((3)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 595, __func__, "Attaching ingress BPF program to cgroup %s failed: %m"
, path) : -abs(_e); })
;
596
597 u->ip_bpf_ingress_installed = bpf_program_ref(u->ip_bpf_ingress);
598 }
599
600 return 0;
601}
602
603int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets) {
604 uint64_t key, packets;
605 int r;
606
607 if (map_fd < 0)
608 return -EBADF9;
609
610 if (ret_packets) {
611 key = MAP_KEY_PACKETS;
612 r = bpf_map_lookup_element(map_fd, &key, &packets);
613 if (r < 0)
614 return r;
615 }
616
617 if (ret_bytes) {
618 key = MAP_KEY_BYTES;
619 r = bpf_map_lookup_element(map_fd, &key, ret_bytes);
620 if (r < 0)
621 return r;
622 }
623
624 if (ret_packets)
625 *ret_packets = packets;
626
627 return 0;
628}
629
630int bpf_firewall_reset_accounting(int map_fd) {
631 uint64_t key, value = 0;
632 int r;
633
634 if (map_fd < 0)
635 return -EBADF9;
636
637 key = MAP_KEY_PACKETS;
638 r = bpf_map_update_element(map_fd, &key, &value);
639 if (r < 0)
640 return r;
641
642 key = MAP_KEY_BYTES;
643 return bpf_map_update_element(map_fd, &key, &value);
644}
645
646int bpf_firewall_supported(void) {
647 struct bpf_insn trivial[] = {
648 BPF_MOV64_IMM(BPF_REG_0, 1)((struct bpf_insn) { .code = 0x07 | 0xb0 | 0x00, .dst_reg = BPF_REG_0
, .src_reg = 0, .off = 0, .imm = 1 })
,
649 BPF_EXIT_INSN()((struct bpf_insn) { .code = 0x05 | 0x90, .dst_reg = 0, .src_reg
= 0, .off = 0, .imm = 0 })
650 };
651
652 _cleanup_(bpf_program_unrefp)__attribute__((cleanup(bpf_program_unrefp))) BPFProgram *program = NULL((void*)0);
653 static int supported = -1;
654 union bpf_attr attr;
655 int fd, r;
656
657 /* Checks whether BPF firewalling is supported. For this, we check five things:
658 *
659 * a) whether we are privileged
660 * b) whether the unified hierarchy is being used
661 * c) the BPF implementation in the kernel supports BPF LPM TRIE maps, which we require
662 * d) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_SKB programs, which we require
663 * e) the BPF implementation in the kernel supports the BPF_PROG_ATTACH call, which we require
664 *
665 */
666
667 if (supported >= 0)
668 return supported;
669
670 if (geteuid() != 0) {
671 log_debug("Not enough privileges, BPF firewalling is not supported.")({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 671, __func__, "Not enough privileges, BPF firewalling is not supported."
) : -abs(_e); })
;
672 return supported = BPF_FIREWALL_UNSUPPORTED;
673 }
674
675 r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER"_systemd");
676 if (r < 0)
677 return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m")({ int _level = ((3)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 677, __func__, "Can't determine whether the unified hierarchy is used: %m"
) : -abs(_e); })
;
678 if (r == 0) {
679 log_debug("Not running with unified cgroups, BPF firewalling is not supported.")({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 679, __func__, "Not running with unified cgroups, BPF firewalling is not supported."
) : -abs(_e); })
;
680 return supported = BPF_FIREWALL_UNSUPPORTED;
681 }
682
683 fd = bpf_map_new(BPF_MAP_TYPE_LPM_TRIE,
684 offsetof(struct bpf_lpm_trie_key, data)__builtin_offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint64_t),
685 sizeof(uint64_t),
686 1,
687 BPF_F_NO_PREALLOC(1U << 0));
688 if (fd < 0) {
689 log_debug_errno(fd, "Can't allocate BPF LPM TRIE map, BPF firewalling is not supported: %m")({ int _level = ((7)), _e = ((fd)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 689, __func__, "Can't allocate BPF LPM TRIE map, BPF firewalling is not supported: %m"
) : -abs(_e); })
;
690 return supported = BPF_FIREWALL_UNSUPPORTED;
691 }
692
693 safe_close(fd);
694
695 r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &program);
696 if (r < 0) {
697 log_debug_errno(r, "Can't allocate CGROUP SKB BPF program, BPF firewalling is not supported: %m")({ int _level = ((7)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 697, __func__, "Can't allocate CGROUP SKB BPF program, BPF firewalling is not supported: %m"
) : -abs(_e); })
;
698 return supported = BPF_FIREWALL_UNSUPPORTED;
699 }
700
701 r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p
(typeof(trivial), typeof(&*(trivial))), sizeof(trivial)/sizeof
((trivial)[0]), ((void)0)))
);
702 if (r < 0) {
703 log_debug_errno(r, "Can't add trivial instructions to CGROUP SKB BPF program, BPF firewalling is not supported: %m")({ int _level = ((7)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 703, __func__, "Can't add trivial instructions to CGROUP SKB BPF program, BPF firewalling is not supported: %m"
) : -abs(_e); })
;
704 return supported = BPF_FIREWALL_UNSUPPORTED;
705 }
706
707 r = bpf_program_load_kernel(program, NULL((void*)0), 0);
708 if (r < 0) {
709 log_debug_errno(r, "Can't load kernel CGROUP SKB BPF program, BPF firewalling is not supported: %m")({ int _level = ((7)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 709, __func__, "Can't load kernel CGROUP SKB BPF program, BPF firewalling is not supported: %m"
) : -abs(_e); })
;
710 return supported = BPF_FIREWALL_UNSUPPORTED;
711 }
712
713 /* Unfortunately the kernel allows us to create BPF_PROG_TYPE_CGROUP_SKB programs even when CONFIG_CGROUP_BPF
714 * is turned off at kernel compilation time. This sucks of course: why does it allow us to create a cgroup BPF
715 * program if we can't do a thing with it later?
716 *
717 * We detect this case by issuing the BPF_PROG_ATTACH bpf() call with invalid file descriptors: if
718 * CONFIG_CGROUP_BPF is turned off, then the call will fail early with EINVAL. If it is turned on the
719 * parameters are validated however, and that'll fail with EBADF then. */
720
721 attr = (union bpf_attr) {
722 .attach_type = BPF_CGROUP_INET_EGRESS,
723 .target_fd = -1,
724 .attach_bpf_fd = -1,
725 };
726
727 if (bpfmissing_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0) {
728 if (errno(*__errno_location ()) != EBADF9) {
729 log_debug_errno(errno, "Didn't get EBADF from BPF_PROG_ATTACH, BPF firewalling is not supported: %m")({ int _level = ((7)), _e = (((*__errno_location ()))), _realm
= (LOG_REALM_SYSTEMD); (log_get_max_level_realm(_realm) >=
((_level) & 0x07)) ? log_internal_realm(((_realm) <<
10 | (_level)), _e, "../src/core/bpf-firewall.c", 729, __func__
, "Didn't get EBADF from BPF_PROG_ATTACH, BPF firewalling is not supported: %m"
) : -abs(_e); })
;
730 return supported = BPF_FIREWALL_UNSUPPORTED;
731 }
732
733 /* YAY! */
734 } else {
735 log_debug("Wut? Kernel accepted our invalid BPF_PROG_ATTACH call? Something is weird, assuming BPF firewalling is broken and hence not supported.")({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 735, __func__, "Wut? Kernel accepted our invalid BPF_PROG_ATTACH call? Something is weird, assuming BPF firewalling is broken and hence not supported."
) : -abs(_e); })
;
736 return supported = BPF_FIREWALL_UNSUPPORTED;
737 }
738
739 /* So now we know that the BPF program is generally available, let's see if BPF_F_ALLOW_MULTI is also supported
740 * (which was added in kernel 4.15). We use a similar logic as before, but this time we use
741 * BPF_F_ALLOW_MULTI. Since the flags are checked early in the system call we'll get EINVAL if it's not
742 * supported, and EBADF as before if it is available. */
743
744 attr = (union bpf_attr) {
745 .attach_type = BPF_CGROUP_INET_EGRESS,
746 .target_fd = -1,
747 .attach_bpf_fd = -1,
748 .attach_flags = BPF_F_ALLOW_MULTI(1U << 1),
749 };
750
751 if (bpfmissing_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0) {
752 if (errno(*__errno_location ()) == EBADF9) {
753 log_debug_errno(errno, "Got EBADF when using BPF_F_ALLOW_MULTI, which indicates it is supported. Yay!")({ int _level = ((7)), _e = (((*__errno_location ()))), _realm
= (LOG_REALM_SYSTEMD); (log_get_max_level_realm(_realm) >=
((_level) & 0x07)) ? log_internal_realm(((_realm) <<
10 | (_level)), _e, "../src/core/bpf-firewall.c", 753, __func__
, "Got EBADF when using BPF_F_ALLOW_MULTI, which indicates it is supported. Yay!"
) : -abs(_e); })
;
754 return supported = BPF_FIREWALL_SUPPORTED_WITH_MULTI;
755 }
756
757 if (errno(*__errno_location ()) == EINVAL22)
758 log_debug_errno(errno, "Got EINVAL error when using BPF_F_ALLOW_MULTI, which indicates it's not supported.")({ int _level = ((7)), _e = (((*__errno_location ()))), _realm
= (LOG_REALM_SYSTEMD); (log_get_max_level_realm(_realm) >=
((_level) & 0x07)) ? log_internal_realm(((_realm) <<
10 | (_level)), _e, "../src/core/bpf-firewall.c", 758, __func__
, "Got EINVAL error when using BPF_F_ALLOW_MULTI, which indicates it's not supported."
) : -abs(_e); })
;
759 else
760 log_debug_errno(errno, "Got unexpected error when using BPF_F_ALLOW_MULTI, assuming it's not supported: %m")({ int _level = ((7)), _e = (((*__errno_location ()))), _realm
= (LOG_REALM_SYSTEMD); (log_get_max_level_realm(_realm) >=
((_level) & 0x07)) ? log_internal_realm(((_realm) <<
10 | (_level)), _e, "../src/core/bpf-firewall.c", 760, __func__
, "Got unexpected error when using BPF_F_ALLOW_MULTI, assuming it's not supported: %m"
) : -abs(_e); })
;
761
762 return supported = BPF_FIREWALL_SUPPORTED;
763 } else {
764 log_debug("Wut? Kernel accepted our invalid BPF_PROG_ATTACH+BPF_F_ALLOW_MULTI call? Something is weird, assuming BPF firewalling is broken and hence not supported.")({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD
); (log_get_max_level_realm(_realm) >= ((_level) & 0x07
)) ? log_internal_realm(((_realm) << 10 | (_level)), _e
, "../src/core/bpf-firewall.c", 764, __func__, "Wut? Kernel accepted our invalid BPF_PROG_ATTACH+BPF_F_ALLOW_MULTI call? Something is weird, assuming BPF firewalling is broken and hence not supported."
) : -abs(_e); })
;
765 return supported = BPF_FIREWALL_UNSUPPORTED;
766 }
767}