LCOV - code coverage report
Current view: top level - nspawn - nspawn-seccomp.c (source / functions) Hit Total Coverage
Test: main_coverage.info Lines: 0 48 0.0 %
Date: 2019-08-22 15:41:25 Functions: 0 2 0.0 %

          Line data    Source code
       1             : /* SPDX-License-Identifier: LGPL-2.1+ */
       2             : 
       3             : #include <errno.h>
       4             : #include <linux/netlink.h>
       5             : #include <sys/capability.h>
       6             : #include <sys/socket.h>
       7             : #include <sys/types.h>
       8             : 
       9             : #if HAVE_SECCOMP
      10             : #include <seccomp.h>
      11             : #endif
      12             : 
      13             : #include "alloc-util.h"
      14             : #include "log.h"
      15             : #include "nspawn-seccomp.h"
      16             : #if HAVE_SECCOMP
      17             : #include "seccomp-util.h"
      18             : #endif
      19             : #include "string-util.h"
      20             : #include "strv.h"
      21             : 
      22             : #if HAVE_SECCOMP
      23             : 
      24           0 : static int seccomp_add_default_syscall_filter(
      25             :                 scmp_filter_ctx ctx,
      26             :                 uint32_t arch,
      27             :                 uint64_t cap_list_retain,
      28             :                 char **syscall_whitelist,
      29             :                 char **syscall_blacklist) {
      30             : 
      31             :         static const struct {
      32             :                 uint64_t capability;
      33             :                 const char* name;
      34             :         } whitelist[] = {
      35             :                 /* Let's use set names where we can */
      36             :                 { 0,                  "@aio"                   },
      37             :                 { 0,                  "@basic-io"              },
      38             :                 { 0,                  "@chown"                 },
      39             :                 { 0,                  "@default"               },
      40             :                 { 0,                  "@file-system"           },
      41             :                 { 0,                  "@io-event"              },
      42             :                 { 0,                  "@ipc"                   },
      43             :                 { 0,                  "@mount"                 },
      44             :                 { 0,                  "@network-io"            },
      45             :                 { 0,                  "@process"               },
      46             :                 { 0,                  "@resources"             },
      47             :                 { 0,                  "@setuid"                },
      48             :                 { 0,                  "@signal"                },
      49             :                 { 0,                  "@sync"                  },
      50             :                 { 0,                  "@timer"                 },
      51             : 
      52             :                 /* The following four are sets we optionally enable, in case the caps have been configured for it */
      53             :                 { CAP_SYS_TIME,       "@clock"                 },
      54             :                 { CAP_SYS_MODULE,     "@module"                },
      55             :                 { CAP_SYS_RAWIO,      "@raw-io"                },
      56             :                 { CAP_IPC_LOCK,       "@memlock"               },
      57             : 
      58             :                 /* Plus a good set of additional syscalls which are not part of any of the groups above */
      59             :                 { 0,                  "brk"                    },
      60             :                 { 0,                  "capget"                 },
      61             :                 { 0,                  "capset"                 },
      62             :                 { 0,                  "copy_file_range"        },
      63             :                 { 0,                  "fadvise64"              },
      64             :                 { 0,                  "fadvise64_64"           },
      65             :                 { 0,                  "flock"                  },
      66             :                 { 0,                  "get_mempolicy"          },
      67             :                 { 0,                  "getcpu"                 },
      68             :                 { 0,                  "getpriority"            },
      69             :                 { 0,                  "getrandom"              },
      70             :                 { 0,                  "ioctl"                  },
      71             :                 { 0,                  "ioprio_get"             },
      72             :                 { 0,                  "kcmp"                   },
      73             :                 { 0,                  "madvise"                },
      74             :                 { 0,                  "mincore"                },
      75             :                 { 0,                  "mprotect"               },
      76             :                 { 0,                  "mremap"                 },
      77             :                 { 0,                  "name_to_handle_at"      },
      78             :                 { 0,                  "oldolduname"            },
      79             :                 { 0,                  "olduname"               },
      80             :                 { 0,                  "personality"            },
      81             :                 { 0,                  "readahead"              },
      82             :                 { 0,                  "readdir"                },
      83             :                 { 0,                  "remap_file_pages"       },
      84             :                 { 0,                  "sched_get_priority_max" },
      85             :                 { 0,                  "sched_get_priority_min" },
      86             :                 { 0,                  "sched_getaffinity"      },
      87             :                 { 0,                  "sched_getattr"          },
      88             :                 { 0,                  "sched_getparam"         },
      89             :                 { 0,                  "sched_getscheduler"     },
      90             :                 { 0,                  "sched_rr_get_interval"  },
      91             :                 { 0,                  "sched_yield"            },
      92             :                 { 0,                  "seccomp"                },
      93             :                 { 0,                  "sendfile"               },
      94             :                 { 0,                  "sendfile64"             },
      95             :                 { 0,                  "setdomainname"          },
      96             :                 { 0,                  "setfsgid"               },
      97             :                 { 0,                  "setfsgid32"             },
      98             :                 { 0,                  "setfsuid"               },
      99             :                 { 0,                  "setfsuid32"             },
     100             :                 { 0,                  "sethostname"            },
     101             :                 { 0,                  "setpgid"                },
     102             :                 { 0,                  "setsid"                 },
     103             :                 { 0,                  "splice"                 },
     104             :                 { 0,                  "sysinfo"                },
     105             :                 { 0,                  "tee"                    },
     106             :                 { 0,                  "umask"                  },
     107             :                 { 0,                  "uname"                  },
     108             :                 { 0,                  "userfaultfd"            },
     109             :                 { 0,                  "vmsplice"               },
     110             : 
     111             :                 /* The following individual syscalls are added depending on specified caps */
     112             :                 { CAP_SYS_PACCT,      "acct"                   },
     113             :                 { CAP_SYS_PTRACE,     "process_vm_readv"       },
     114             :                 { CAP_SYS_PTRACE,     "process_vm_writev"      },
     115             :                 { CAP_SYS_PTRACE,     "ptrace"                 },
     116             :                 { CAP_SYS_BOOT,       "reboot"                 },
     117             :                 { CAP_SYSLOG,         "syslog"                 },
     118             :                 { CAP_SYS_TTY_CONFIG, "vhangup"                },
     119             : 
     120             :                 /*
     121             :                  * The following syscalls and groups are knowingly excluded:
     122             :                  *
     123             :                  * @cpu-emulation
     124             :                  * @keyring           (NB: keyring is not namespaced!)
     125             :                  * @obsolete
     126             :                  * @swap
     127             :                  *
     128             :                  * bpf                (NB: bpffs is not namespaced!)
     129             :                  * fanotify_init
     130             :                  * fanotify_mark
     131             :                  * kexec_file_load
     132             :                  * kexec_load
     133             :                  * lookup_dcookie
     134             :                  * nfsservctl
     135             :                  * open_by_handle_at
     136             :                  * perf_event_open
     137             :                  * pkey_alloc
     138             :                  * pkey_free
     139             :                  * pkey_mprotect
     140             :                  * quotactl
     141             :                  */
     142             :         };
     143             : 
     144             :         int r;
     145             :         size_t i;
     146             :         char **p;
     147             : 
     148           0 :         for (i = 0; i < ELEMENTSOF(whitelist); i++) {
     149           0 :                 if (whitelist[i].capability != 0 && (cap_list_retain & (1ULL << whitelist[i].capability)) == 0)
     150           0 :                         continue;
     151             : 
     152           0 :                 r = seccomp_add_syscall_filter_item(ctx, whitelist[i].name, SCMP_ACT_ALLOW, syscall_blacklist, false);
     153           0 :                 if (r < 0)
     154           0 :                         return log_error_errno(r, "Failed to add syscall filter item %s: %m", whitelist[i].name);
     155             :         }
     156             : 
     157           0 :         STRV_FOREACH(p, syscall_whitelist) {
     158           0 :                 r = seccomp_add_syscall_filter_item(ctx, *p, SCMP_ACT_ALLOW, syscall_blacklist, false);
     159           0 :                 if (r < 0)
     160           0 :                         log_warning_errno(r, "Failed to add rule for system call %s on %s, ignoring: %m",
     161             :                                           *p, seccomp_arch_to_string(arch));
     162             :         }
     163             : 
     164           0 :         return 0;
     165             : }
     166             : 
     167           0 : int setup_seccomp(uint64_t cap_list_retain, char **syscall_whitelist, char **syscall_blacklist) {
     168             :         uint32_t arch;
     169             :         int r;
     170             : 
     171           0 :         if (!is_seccomp_available()) {
     172           0 :                 log_debug("SECCOMP features not detected in the kernel, disabling SECCOMP filterering");
     173           0 :                 return 0;
     174             :         }
     175             : 
     176           0 :         SECCOMP_FOREACH_LOCAL_ARCH(arch) {
     177           0 :                 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
     178             : 
     179           0 :                 log_debug("Applying whitelist on architecture: %s", seccomp_arch_to_string(arch));
     180             : 
     181           0 :                 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ERRNO(EPERM));
     182           0 :                 if (r < 0)
     183           0 :                         return log_error_errno(r, "Failed to allocate seccomp object: %m");
     184             : 
     185           0 :                 r = seccomp_add_default_syscall_filter(seccomp, arch, cap_list_retain, syscall_whitelist, syscall_blacklist);
     186           0 :                 if (r < 0)
     187           0 :                         return r;
     188             : 
     189           0 :                 r = seccomp_load(seccomp);
     190           0 :                 if (ERRNO_IS_SECCOMP_FATAL(r))
     191           0 :                         return log_error_errno(r, "Failed to install seccomp filter: %m");
     192           0 :                 if (r < 0)
     193           0 :                         log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
     194             :         }
     195             : 
     196           0 :         SECCOMP_FOREACH_LOCAL_ARCH(arch) {
     197           0 :                 _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
     198             : 
     199           0 :                 log_debug("Applying NETLINK_AUDIT mask on architecture: %s", seccomp_arch_to_string(arch));
     200             : 
     201           0 :                 r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
     202           0 :                 if (r < 0)
     203           0 :                         return log_error_errno(r, "Failed to allocate seccomp object: %m");
     204             : 
     205             :                 /*
     206             :                   Audit is broken in containers, much of the userspace audit hookup will fail if running inside a
     207             :                   container. We don't care and just turn off creation of audit sockets.
     208             : 
     209             :                   This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail with EAFNOSUPPORT which audit userspace uses
     210             :                   as indication that audit is disabled in the kernel.
     211             :                 */
     212             : 
     213           0 :                 r = seccomp_rule_add_exact(
     214             :                                 seccomp,
     215             :                                 SCMP_ACT_ERRNO(EAFNOSUPPORT),
     216             :                                 SCMP_SYS(socket),
     217             :                                 2,
     218           0 :                                 SCMP_A0(SCMP_CMP_EQ, AF_NETLINK),
     219           0 :                                 SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT));
     220           0 :                 if (r < 0) {
     221           0 :                         log_debug_errno(r, "Failed to add audit seccomp rule, ignoring: %m");
     222           0 :                         continue;
     223             :                 }
     224             : 
     225           0 :                 r = seccomp_load(seccomp);
     226           0 :                 if (ERRNO_IS_SECCOMP_FATAL(r))
     227           0 :                         return log_error_errno(r, "Failed to install seccomp audit filter: %m");
     228           0 :                 if (r < 0)
     229           0 :                         log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
     230             :         }
     231             : 
     232           0 :         return 0;
     233             : }
     234             : 
     235             : #else
     236             : 
     237             : int setup_seccomp(uint64_t cap_list_retain, char **syscall_whitelist, char **syscall_blacklist) {
     238             :         return 0;
     239             : }
     240             : 
     241             : #endif

Generated by: LCOV version 1.14