//
// Syd: rock-solid application kernel
// src/workers/aes.rs: `syd_aes' encryption thread
//
// Copyright (c) 2024, 2025 Ali Polatel <alip@chesswob.org>
//
// SPDX-License-Identifier: GPL-3.0

use std::{
    env,
    os::fd::{AsFd, FromRawFd, OwnedFd, RawFd},
    sync::{
        atomic::{AtomicBool, Ordering},
        Arc, RwLock,
    },
    thread,
};

use libseccomp::{scmp_cmp, RawSyscall, ScmpAction, ScmpFilterContext, ScmpSyscall};
use nix::{
    errno::Errno,
    fcntl::{splice, tee, OFlag, SpliceFFlags},
    unistd::{lseek64, pipe2, write, Gid, Uid, Whence},
};

#[cfg(target_arch = "x86")]
use crate::cookie::FTRUNCATE64_COOKIE_ARG3;
use crate::{
    config::*,
    confine::{
        confine_scmp_clone, confine_scmp_clone3, confine_scmp_write, scmp_add_setid_rules,
        ExportMode,
    },
    cookie::{
        safe_ftruncate64, ACCEPT4_COOKIE_ARG4, ACCEPT4_COOKIE_ARG5, FTRUNCATE64_COOKIE_ARG4,
        FTRUNCATE64_COOKIE_ARG5, FTRUNCATE_COOKIE_ARG2, FTRUNCATE_COOKIE_ARG3,
        FTRUNCATE_COOKIE_ARG4, FTRUNCATE_COOKIE_ARG5, SYS_ACCEPT4,
    },
    err::{err2no, SydJoinHandle, SydResult},
    error,
    fs::{lock_fd, retry_on_eintr, retry_on_intr, seal_memfd, FileInfo},
    hash::{
        aes_ctr_enc, aes_ctr_init, hmac_sha256_feed, hmac_sha256_fini, hmac_sha256_init,
        SydHashMap, BLOCK_SIZE, HMAC_TAG_SIZE, IV, IV_SIZE,
    },
    info,
    path::{XPath, XPathBuf},
    sandbox::Flags,
};

#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
pub(crate) enum AesMod {
    Read,
    Append,
    Write,
}

impl From<OFlag> for AesMod {
    fn from(flags: OFlag) -> Self {
        if flags.contains(OFlag::O_WRONLY) || flags.contains(OFlag::O_RDWR) {
            if flags.contains(OFlag::O_APPEND) {
                Self::Append
            } else {
                Self::Write
            }
        } else {
            Self::Read
        }
    }
}

pub(crate) struct AesVal {
    pub(crate) crypt_fd: RawFd,
    pub(crate) enc_fd: Option<OwnedFd>,
    pub(crate) iv: Option<IV>,
    pub(crate) info: FileInfo,
    pub(crate) mode: AesMod,
    pub(crate) spawned: bool,
}

pub(crate) type AesMap = Arc<RwLock<SydHashMap<XPathBuf, AesVal>>>;

#[derive(Clone)]
pub(crate) struct AesWorker {
    fdalg: (RawFd, RawFd),
    files: AesMap,
    flags: Flags,
    is_memfd: bool,
    should_exit: Arc<AtomicBool>,
    transit_uids: Vec<(Uid, Uid)>,
    transit_gids: Vec<(Gid, Gid)>,
}

impl AesWorker {
    pub(crate) fn new(
        fdalg: (RawFd, RawFd),
        files: AesMap,
        flags: Flags,
        is_memfd: bool,
        should_exit: Arc<AtomicBool>,
        transit_uids: &[(Uid, Uid)],
        transit_gids: &[(Gid, Gid)],
    ) -> Self {
        Self {
            fdalg,
            files,
            flags,
            is_memfd,
            should_exit,
            transit_uids: transit_uids.to_vec(),
            transit_gids: transit_gids.to_vec(),
        }
    }

    #[allow(clippy::cognitive_complexity)]
    pub(crate) fn try_spawn(self) -> Result<SydJoinHandle<()>, Errno> {
        thread::Builder::new()
            .name("syd_aes".to_string())
            .stack_size(AES_STACK_SIZE)
            .spawn(move || {
                // Honour dry-run when exporting.
                let dry_run =
                    env::var_os(ENV_SKIP_SCMP).is_some() || ExportMode::from_env().is_some();

                if !dry_run {
                    let ctx =
                        Self::prepare_confine(self.flags, &self.transit_uids, &self.transit_gids)?;

                    let safe_setid = self
                        .flags
                        .intersects(Flags::FL_ALLOW_SAFE_SETUID | Flags::FL_ALLOW_SAFE_SETGID);

                    // Log before load, log after will kill.
                    // TODO: Ensure syd_aes threads can write to log fd.
                    info!("ctx": "confine", "op": "confine_aes_thread",
                        "msg": format!("AES thread confined with{} SROP mitigation",
                            if safe_setid { "out" } else { "" }));

                    ctx.load()?;
                } else {
                    error!("ctx": "confine", "op": "confine_aes_thread",
                        "msg": "AES threads are running unconfined in debug mode");
                }

                // Enter main loop.
                Self::main(self.fdalg, self.files, self.is_memfd, self.should_exit)
            })
            .map_err(|err| err2no(&err))
    }

    fn main(
        fdalg: (RawFd, RawFd),
        files: AesMap,
        is_memfd: bool,
        should_exit: Arc<AtomicBool>,
    ) -> SydResult<()> {
        let mut paths = vec![];
        let mut threads = vec![];
        loop {
            // Check if there're any pending encryption requests
            // for which we have not spawned a thread yet.
            // The thread is responsible for removing
            // the entry from the files map. This way
            // we ensure fstat() requests keep working
            // until we're done writing.
            let my_files = files.read().unwrap_or_else(|err| err.into_inner());
            for (path, v) in my_files.iter() {
                if !v.spawned {
                    paths.push((v.crypt_fd, path.clone()));
                }
            }
            drop(my_files);

            if paths.is_empty() {
                if should_exit.load(Ordering::Relaxed) {
                    // main-thread signaled exit.
                    break;
                } else {
                    // wait a bit and retry.
                    std::thread::sleep(AES_CYCLE_TIME);
                    continue;
                }
            }

            // Mark entries for which we're spawning a thread.
            let mut my_files = files.write().unwrap_or_else(|err| err.into_inner());
            for (_, path) in &paths {
                if let Some(info) = my_files.get_mut(path) {
                    info.spawned = true;
                }
            }
            drop(my_files);

            for (crypt_fd, crypt_path) in paths.drain(..) {
                // Spawn a thread to handle the write.
                threads.push(Self::spawn(fdalg, &files, is_memfd, crypt_fd, &crypt_path)?);
            }

            // wait for a cycle.
            std::thread::sleep(AES_CYCLE_TIME);
        }

        // Wait for the ongoing encryption
        // operations before exiting.
        for thread in threads {
            let _ = thread.join();
        }

        Ok(())
    }

    fn spawn(
        fdalg: (RawFd, RawFd),
        files: &AesMap,
        memfd: bool,
        crypt_fd: RawFd,
        crypt_path: &XPath,
    ) -> SydResult<SydJoinHandle<()>> {
        let handle = retry_on_intr(|| {
            let crypt_path = XPathBuf::from(crypt_path);
            let files = Arc::clone(files);

            thread::Builder::new()
                .name("syd_aes".into())
                .stack_size(AES_STACK_SIZE)
                .spawn(move || {
                    // SAFETY: crypt_map keys are valid FDs.
                    let crypt_fd = unsafe { OwnedFd::from_raw_fd(crypt_fd) };

                    // Wait until we take a write lock on the encrypted fd.
                    // This will succeed once all fds owned by the sandbox
                    // process are closed.
                    retry_on_eintr(|| lock_fd(&crypt_fd, true, true))?;

                    // All good, sync contents to disk.
                    let result = Self::sync(fdalg, &files, memfd, &crypt_fd, &crypt_path);

                    // Safe to remove file entry now.
                    let mut files = files.write().unwrap_or_else(|err| err.into_inner());
                    #[allow(clippy::disallowed_methods)]
                    files.remove(&crypt_path).map(drop).unwrap();

                    // Close the encrypted FD.
                    drop(crypt_fd);

                    result
                })
                .map_err(|err| err2no(&err))
        })?;

        Ok(handle)
    }

    #[allow(clippy::arithmetic_side_effects)]
    #[allow(clippy::cognitive_complexity)]
    #[allow(clippy::disallowed_methods)]
    fn sync<Fd: AsFd>(
        fdalg: (RawFd, RawFd),
        files: &AesMap,
        memfd: bool,
        crypt_fd: Fd,
        crypt_path: &XPath,
    ) -> SydResult<()> {
        // Seal memfd to ensure no further writes happen.
        if memfd {
            seal_memfd(&crypt_fd).unwrap();
        }

        let (aes_fd, mac_fd) = fdalg;
        let (enc_fd, file_mode, mut iv) = {
            let mut files = files.write().unwrap_or_else(|err| err.into_inner());
            let v = files.get_mut(crypt_path).unwrap();
            #[allow(clippy::disallowed_methods)]
            (v.enc_fd.take().unwrap(), v.mode, v.iv.take().unwrap())
        };

        // Nothing to do if file was readonly.
        let mut is_append = match file_mode {
            AesMod::Read => return Ok(()),
            AesMod::Append => true,
            _ => false,
        };

        // Handle truncation quickly.
        #[allow(clippy::cast_sign_loss)]
        let data_size = lseek64(&crypt_fd, 0, Whence::SeekEnd).unwrap() as u64;
        if data_size == 0 {
            retry_on_eintr(|| safe_ftruncate64(&enc_fd, 0)).unwrap();
            return Ok(());
        }

        // Handle opened for append but encrypted file is new.
        #[allow(clippy::cast_sign_loss)]
        let mut file_size = lseek64(&enc_fd, 0, Whence::SeekEnd)? as u64;
        if is_append && file_size == 0 {
            is_append = false;
        }

        // Handle opened for append but appended nothing quickly.
        if is_append
            && data_size
                <= file_size.saturating_sub((CRYPT_MAGIC.len() + HMAC_TAG_SIZE + IV_SIZE) as u64)
        {
            return Ok(());
        }

        // We handled quick cases, before possibly
        // truncating the encrypted file, let's
        // ensure we open the connections as expected.

        // Initialize HMAC socket and feed magic header and IV.
        let sock_mac = hmac_sha256_init(&mac_fd, false)?;
        hmac_sha256_feed(&sock_mac, CRYPT_MAGIC, true)?;
        hmac_sha256_feed(&sock_mac, iv.as_ref(), true)?;
        let (pipe_rd_mac, pipe_wr_mac) = pipe2(OFlag::O_CLOEXEC)?;

        // Handle last block re-encryption for append.
        if is_append {
            // Adjust file_size to exclude the header.
            let header_size = (CRYPT_MAGIC.len() + HMAC_TAG_SIZE + IV_SIZE) as u64;
            file_size -= header_size;

            // Calculate the offset of the last full block.
            let last_block_offset = if file_size % BLOCK_SIZE as u64 == 0 {
                file_size
            } else {
                file_size - (file_size % BLOCK_SIZE as u64)
            };

            // Adjust the IV counter based on the last full block offset.
            iv.add_counter(last_block_offset);

            // If there is a partial block at the end, we need to re-encrypt it.
            if last_block_offset < file_size {
                // Truncate the encrypted file to remove the partial block.
                let truncate_offset = header_size + last_block_offset;
                retry_on_eintr(|| safe_ftruncate64(&enc_fd, truncate_offset.try_into().unwrap()))?;

                // Adjust crypt_fd to read from the last full block offset.
                #[allow(clippy::cast_possible_wrap)]
                lseek64(&crypt_fd, last_block_offset as i64, Whence::SeekSet)?;
            } else {
                // No partial block, start reading from the current file size.
                #[allow(clippy::cast_possible_wrap)]
                lseek64(&crypt_fd, file_size as i64, Whence::SeekSet)?;
            }

            // Feed existing encrypted data into HMAC calculation until EOF.
            // Read from the encrypted file starting after the header.
            // Here the last partial block is already stripped.
            #[allow(clippy::cast_possible_wrap)]
            lseek64(&enc_fd, header_size as i64, Whence::SeekSet)?;
            loop {
                let n = retry_on_eintr(|| {
                    splice(
                        &enc_fd,
                        None,
                        &pipe_wr_mac,
                        None,
                        PIPE_BUF_ALG,
                        SpliceFFlags::empty(),
                    )
                })?;
                if n == 0 {
                    break;
                }

                let mut ncopy = n;
                while ncopy > 0 {
                    let n = retry_on_eintr(|| {
                        splice(
                            &pipe_rd_mac,
                            None,
                            &sock_mac,
                            None,
                            ncopy,
                            SpliceFFlags::SPLICE_F_MORE,
                        )
                    })?;
                    if n == 0 {
                        return Err(Errno::EBADMSG.into());
                    }
                    ncopy -= n;
                }
            }
        } else {
            // Non-append mode: overwrite the file.

            // Reset crypt_fd to the beginning.
            lseek64(&crypt_fd, 0, Whence::SeekSet)?;

            if file_size > 0 {
                // Remove previous content,
                // SAFETY: wipe IV to avoid reuse.
                retry_on_eintr(|| safe_ftruncate64(&enc_fd, 0))?;
                lseek64(&enc_fd, 0, Whence::SeekSet)?;
            }

            // Write file magic and IV to the beginning of the file.
            // Leave gap for HMAC to write later.
            // SAFETY: We need the write(2) system call to write file
            // magic, HMAC and IV to the file so our seccomp filter
            // unfortunately allows it. We do our best by only allowing
            // writes up the HMAC size, which is 32 bytes. Arguably,
            // pulling a BROP with only 32 bytes of buffer-space allowed
            // to transfer the binary over a socket would be really
            // tedious.
            // Alternatively writing the HMAC & IV to xattrs would be a
            // dangerous (think backups stripping xattrs), and
            // relatively less portable workaround.
            let buf = &CRYPT_MAGIC;
            let mut nwrite = 0;
            while nwrite < buf.len() {
                #[allow(clippy::arithmetic_side_effects)]
                match write(&enc_fd, &buf[nwrite..]) {
                    Ok(0) => return Err(Errno::EINVAL.into()),
                    Ok(n) => nwrite += n,
                    Err(Errno::EINTR) => continue,
                    Err(errno) => return Err(errno.into()),
                }
            }

            // Move the file offset forward by HMAC_TAG_SIZE to leave
            // space for the HMAC tag. This space is going to be a
            // hole until we write back at the end, see lseek(2).
            // lseek64(enc_fd.as_raw_fd(), HMAC_TAG_SIZE as i64, Whence::SeekCur)?;
            // SAFETY: ^^ This is not portable, instead we zero it out!
            // Write HMAC placeholder (zeroed out) to reserve space for HMAC tag.
            let hmac_placeholder = [0u8; HMAC_TAG_SIZE];
            let mut nwrite = 0;
            while nwrite < hmac_placeholder.len() {
                #[allow(clippy::arithmetic_side_effects)]
                match write(&enc_fd, &hmac_placeholder[nwrite..]) {
                    Ok(0) => return Err(Errno::EINVAL.into()),
                    Ok(n) => nwrite += n,
                    Err(Errno::EINTR) => continue,
                    Err(errno) => return Err(errno.into()),
                }
            }

            // Write the IV to the file.
            let buf = iv.as_ref();
            let mut nwrite = 0;
            while nwrite < buf.len() {
                #[allow(clippy::arithmetic_side_effects)]
                match write(&enc_fd, &buf[nwrite..]) {
                    Ok(0) => return Err(Errno::EINVAL.into()),
                    Ok(n) => nwrite += n,
                    Err(Errno::EINTR) => continue,
                    Err(errno) => return Err(errno.into()),
                }
            }
        }

        // Initialize encryption socket, and set IV.
        let sock_enc = aes_ctr_init(&aes_fd, false)?;
        aes_ctr_enc(&sock_enc, &[], Some(&iv), true)?;

        // The IV is no longer needed.
        drop(iv);

        let (pipe_rd_enc, pipe_wr_enc) = pipe2(OFlag::O_CLOEXEC)?;

        // Feed plaintext via zero-copy into the kernel socket.
        let mut nflush = 0;
        loop {
            let nfeed = retry_on_eintr(|| {
                splice(
                    &crypt_fd,
                    None,
                    &pipe_wr_enc,
                    None,
                    PIPE_BUF_ALG,
                    SpliceFFlags::empty(),
                )
            })?;
            if nfeed == 0 {
                break;
            }

            let mut ncopy = nfeed;
            while ncopy > 0 {
                let n = retry_on_eintr(|| {
                    splice(
                        &pipe_rd_enc,
                        None,
                        &sock_enc,
                        None,
                        ncopy,
                        SpliceFFlags::SPLICE_F_MORE,
                    )
                })?;
                if n == 0 {
                    return Err(Errno::EBADMSG.into());
                }
                ncopy -= n;
            }

            nflush += nfeed;
            #[allow(clippy::cast_possible_truncation)]
            while nflush >= BLOCK_SIZE {
                let len = nflush - (nflush % BLOCK_SIZE);
                let n = retry_on_eintr(|| {
                    splice(
                        &sock_enc,
                        None,
                        &pipe_wr_enc,
                        None,
                        len,
                        SpliceFFlags::SPLICE_F_MORE,
                    )
                })?;
                if n == 0 {
                    return Err(Errno::EBADMSG.into());
                }

                // Duplicate data from encryption pipe to the MAC pipe using tee(2).
                let mut ntee = n;
                while ntee > 0 {
                    let ntee_size = ntee.min(PIPE_BUF_ALG);
                    let n_tee = retry_on_eintr(|| {
                        tee(&pipe_rd_enc, &pipe_wr_mac, ntee_size, SpliceFFlags::empty())
                    })?;
                    if n_tee == 0 {
                        return Err(Errno::EBADMSG.into());
                    }
                    ntee -= n_tee;
                }

                // Splice encrypted data to output file.
                let mut ncopy = n;
                while ncopy > 0 {
                    let n = retry_on_eintr(|| {
                        splice(
                            &pipe_rd_enc,
                            None,
                            &enc_fd,
                            None,
                            ncopy,
                            SpliceFFlags::empty(),
                        )
                    })?;
                    if n == 0 {
                        return Err(Errno::EBADMSG.into());
                    }
                    ncopy -= n;
                    nflush -= n;
                }

                // Splice duplicated data to HMAC socket.
                let mut ncopy_mac = n;
                while ncopy_mac > 0 {
                    let n = retry_on_eintr(|| {
                        splice(
                            &pipe_rd_mac,
                            None,
                            &sock_mac,
                            None,
                            ncopy_mac,
                            SpliceFFlags::SPLICE_F_MORE,
                        )
                    })?;
                    if n == 0 {
                        return Err(Errno::EBADMSG.into());
                    }
                    ncopy_mac -= n;
                }
            }
        }

        // Flush the final batch.
        #[allow(clippy::cast_possible_truncation)]
        while nflush > 0 {
            // Finalize encryption with `false`.
            //
            // Some kernel versions may incorrectly return EINVAL here.
            // Gracefully handle this errno and move on.
            match aes_ctr_enc(&sock_enc, &[], None, false) {
                Ok(_) | Err(Errno::EINVAL) => {}
                Err(errno) => return Err(errno.into()),
            }

            let len = nflush.min(PIPE_BUF_ALG);
            let n = retry_on_eintr(|| {
                splice(
                    &sock_enc,
                    None,
                    &pipe_wr_enc,
                    None,
                    len,
                    SpliceFFlags::empty(),
                )
            })?;
            if n == 0 {
                return Err(Errno::EBADMSG.into());
            }

            // Duplicate data from encryption pipe to the MAC pipe using tee(2).
            let mut ntee = n;
            while ntee > 0 {
                let ntee_size = ntee.min(PIPE_BUF_ALG);
                let n_tee = retry_on_eintr(|| {
                    tee(&pipe_rd_enc, &pipe_wr_mac, ntee_size, SpliceFFlags::empty())
                })?;
                if n_tee == 0 {
                    return Err(Errno::EBADMSG.into());
                }
                ntee -= n_tee;
            }

            // Splice encrypted data to output file.
            let mut ncopy = n;
            while ncopy > 0 {
                let n = retry_on_eintr(|| {
                    splice(
                        &pipe_rd_enc,
                        None,
                        &enc_fd,
                        None,
                        ncopy,
                        SpliceFFlags::empty(),
                    )
                })?;
                if n == 0 {
                    return Err(Errno::EBADMSG.into());
                }
                ncopy -= n;
                nflush -= n;
            }

            // Splice duplicated data to HMAC socket.
            let mut ncopy_mac = n;
            while ncopy_mac > 0 {
                let n = retry_on_eintr(|| {
                    splice(
                        &pipe_rd_mac,
                        None,
                        &sock_mac,
                        None,
                        ncopy_mac,
                        SpliceFFlags::SPLICE_F_MORE,
                    )
                })?;
                if n == 0 {
                    return Err(Errno::EBADMSG.into());
                }
                ncopy_mac -= n;
            }
        }

        // Finalize HMAC computation and retrieve the tag.
        // SAFETY: This is the only place where we use
        // the read(2) system call hence we allow read(2)
        // system call up to 32 bytes which is the size
        // of the HMAC.
        let hmac_tag = hmac_sha256_fini(&sock_mac)?;

        // Seek back to the position after the magic header.
        #[allow(clippy::cast_possible_wrap)]
        lseek64(&enc_fd, CRYPT_MAGIC.len() as i64, Whence::SeekSet)?;

        // Write the HMAC tag to the file.
        let buf = hmac_tag.as_slice();
        let mut nwrite = 0;
        while nwrite < buf.len() {
            #[allow(clippy::arithmetic_side_effects)]
            match write(&enc_fd, &buf[nwrite..]) {
                Ok(0) => return Err(Errno::EINVAL.into()),
                Ok(n) => nwrite += n,
                Err(Errno::EINTR) => continue,
                Err(errno) => return Err(errno.into()),
            }
        }

        // All good, farewell to all OwnedFds!
        Ok(())
    }

    /// Confine AES thread.
    #[allow(clippy::cognitive_complexity)]
    pub(crate) fn prepare_confine(
        flags: Flags,
        transit_uids: &[(Uid, Uid)],
        transit_gids: &[(Gid, Gid)],
    ) -> SydResult<ScmpFilterContext> {
        // Create seccomp filter with default action.
        let mut ctx = ScmpFilterContext::new(ScmpAction::KillProcess)?;

        // Enforce the NO_NEW_PRIVS functionality before
        // loading the seccomp filter into the kernel.
        ctx.set_ctl_nnp(true)?;

        // Disable Speculative Store Bypass mitigations
        // with trace/allow_unsafe_spec_exec:1
        ctx.set_ctl_ssb(flags.allow_unsafe_spec_exec())?;

        // DO NOT synchronize filter to all threads.
        // Main thread will confine itself.
        ctx.set_ctl_tsync(false)?;

        // We kill for bad system call and bad arch.
        ctx.set_act_badarch(ScmpAction::KillProcess)?;

        // Use a binary tree sorted by syscall number if possible.
        let _ = ctx.set_ctl_optimize(2);

        // SAFETY: Do NOT add supported architectures to the filter.
        // This ensures Syd can never run a non-native system call,
        // which we do not need at all.
        // seccomp_add_architectures(&mut ctx)?;

        // Deny open and stat family with ENOSYS rather than KillProcess.
        // We need this because std::thread::spawn has unwanted
        // side-effects such as opening /sys/devices/system/cpu/online
        // on some architectures.
        for sysname in [
            "open",
            "openat",
            "openat2",
            "stat",
            "lstat",
            "statx",
            "newfstatat",
        ] {
            match ScmpSyscall::from_name(sysname) {
                Ok(syscall) => {
                    ctx.add_rule(ScmpAction::Errno(Errno::ENOSYS as i32), syscall)?;
                }
                Err(_) => {
                    info!("ctx": "confine", "op": "allow_aes_syscall",
                        "msg": format!("invalid or unsupported syscall {sysname}"));
                }
            }
        }

        // Allow reads up to MAX(HMAC | IV) bytes.
        // See the note in sync_file().
        let rw_max = u64::try_from(HMAC_TAG_SIZE.max(IV_SIZE))?;
        match ScmpSyscall::from_name("read") {
            Ok(syscall) => {
                ctx.add_rule_conditional(
                    ScmpAction::Allow,
                    syscall,
                    &[scmp_cmp!($arg2 <= rw_max)],
                )?;
            }
            Err(_) => {
                info!("ctx": "confine", "op": "allow_aes_syscall",
                    "msg": "invalid or unsupported syscall read");
            }
        }

        // Allow writes to log-fd OR up to MAX(HMAC | IV) bytes.
        // No proc_pid_mem(5) access required here.
        confine_scmp_write(&mut ctx, Some(rw_max), false)?;

        // Allow clones without namespace flags.
        confine_scmp_clone(&mut ctx)?;
        // Deny clone3 with ENOSYS for compatibility.
        confine_scmp_clone3(&mut ctx)?;

        // ftruncate{,64}(2) may be used only with syscall argument cookies.
        let sysname = "ftruncate";
        #[allow(clippy::useless_conversion)]
        match ScmpSyscall::from_name(sysname) {
            Ok(syscall) => {
                // Secure using syscall argument cookies.
                let mut rules = vec![];
                if !flags.allow_unsafe_nocookie() {
                    rules.extend(&[
                        scmp_cmp!($arg2 == (*FTRUNCATE_COOKIE_ARG2).into()),
                        scmp_cmp!($arg3 == (*FTRUNCATE_COOKIE_ARG3).into()),
                        scmp_cmp!($arg4 == (*FTRUNCATE_COOKIE_ARG4).into()),
                        scmp_cmp!($arg5 == (*FTRUNCATE_COOKIE_ARG5).into()),
                    ]);
                }

                if rules.is_empty() {
                    ctx.add_rule(ScmpAction::Allow, syscall)?;
                } else {
                    ctx.add_rule_conditional(ScmpAction::Allow, syscall, &rules)?;
                }
            }
            Err(_) => {
                info!("ctx": "confine", "op": "allow_emu_syscall",
                    "msg": format!("invalid or unsupported syscall {sysname}"));
            }
        }

        let sysname = "ftruncate64";
        #[allow(clippy::useless_conversion)]
        match ScmpSyscall::from_name(sysname) {
            Ok(syscall) => {
                // Secure using syscall argument cookies.
                let mut rules = vec![];
                if !flags.allow_unsafe_nocookie() {
                    rules.extend(&[
                        #[cfg(target_arch = "x86")]
                        scmp_cmp!($arg3 == (*FTRUNCATE64_COOKIE_ARG3).into()),
                        scmp_cmp!($arg4 == (*FTRUNCATE64_COOKIE_ARG4).into()),
                        scmp_cmp!($arg5 == (*FTRUNCATE64_COOKIE_ARG5).into()),
                    ]);
                }

                if rules.is_empty() {
                    ctx.add_rule(ScmpAction::Allow, syscall)?;
                } else {
                    ctx.add_rule_conditional(ScmpAction::Allow, syscall, &rules)?;
                }
            }
            Err(_) => {
                info!("ctx": "confine", "op": "allow_emu_syscall",
                    "msg": format!("invalid or unsupported syscall {sysname}"));
            }
        }

        // accept4(2) may be used only with syscall argument cookies.
        //
        // We only enforce this on architectures where the system call is direct,
        // and there's no socketcall(2) multiplexer indirection.
        #[allow(clippy::cast_possible_truncation)]
        #[allow(clippy::cast_sign_loss)]
        #[allow(clippy::useless_conversion)]
        #[allow(deprecated)]
        if let Some(syscall) = SYS_ACCEPT4.map(|n| ScmpSyscall::from_raw_syscall(n as RawSyscall)) {
            // Secure using syscall argument cookies.
            let mut rules = vec![];
            if !flags.allow_unsafe_nocookie() {
                rules.extend(&[
                    scmp_cmp!($arg4 == (*ACCEPT4_COOKIE_ARG4).into()),
                    scmp_cmp!($arg5 == (*ACCEPT4_COOKIE_ARG5).into()),
                ]);
            }

            if rules.is_empty() {
                ctx.add_rule(ScmpAction::Allow, syscall)?;
            } else {
                ctx.add_rule_conditional(ScmpAction::Allow, syscall, &rules)?;
            }
        } else {
            match ScmpSyscall::from_name("accept4") {
                Ok(syscall) => {
                    // Allow socketcall(2).
                    ctx.add_rule(ScmpAction::Allow, syscall)?;
                }
                Err(_) => {
                    info!("ctx": "confine", "op": "allow_emu_syscall",
                        "msg": "invalid or unsupported syscall accept4");
                }
            }
        }

        // Deny pipe2(2) O_NOTIFICATION_PIPE flag.
        let sysname = "pipe2";
        #[allow(clippy::cast_sign_loss)]
        if let Ok(syscall) = ScmpSyscall::from_name(sysname) {
            // O_NOTIFICATION_PIPE is equivalent to O_EXCL,
            // see: linux/watch_queue.h
            const O_NOTIFICATION_PIPE: u64 = OFlag::O_EXCL.bits() as u64;

            ctx.add_rule_conditional(
                ScmpAction::Allow,
                syscall,
                &[scmp_cmp!($arg1 & O_NOTIFICATION_PIPE == 0)],
            )?;
        } else {
            info!("ctx": "confine", "op": "allow_aes_syscall",
                "msg": format!("invalid or unsupported syscall {sysname}"));
        }

        // Allow safe fcntl(2) utility calls.
        for sysname in ["fcntl", "fcntl64"] {
            let syscall = match ScmpSyscall::from_name(sysname) {
                Ok(syscall) => syscall,
                Err(_) => {
                    info!("ctx": "confine", "op": "allow_aes_syscall",
                        "msg": format!("invalid or unsupported syscall {sysname}"));
                    continue;
                }
            };

            for op in AES_FCNTL_OPS {
                ctx.add_rule_conditional(ScmpAction::Allow, syscall, &[scmp_cmp!($arg1 == *op)])?;
            }
        }

        // Deny installing new signal handlers for {rt_,}sigaction(2).
        for sysname in ["sigaction", "rt_sigaction"] {
            let syscall = match ScmpSyscall::from_name(sysname) {
                Ok(syscall) => syscall,
                Err(_) => {
                    crate::info!("ctx": "confine", "op": "allow_main_syscall",
                        "msg": format!("invalid or unsupported syscall {sysname}"));
                    continue;
                }
            };

            // Installs a signal handler if first argument is non-NULL.
            // We deny this case, but allow returning the current handler.
            ctx.add_rule_conditional(ScmpAction::Allow, syscall, &[scmp_cmp!($arg1 == 0)])?;
        }

        // Allow safe system calls.
        for sysname in AES_SYSCALLS {
            match ScmpSyscall::from_name(sysname) {
                Ok(syscall) => {
                    ctx.add_rule(ScmpAction::Allow, syscall)?;
                }
                Err(_) => {
                    info!("ctx": "confine", "op": "allow_aes_syscall",
                        "msg": format!("invalid or unsupported syscall {sysname}"));
                }
            }
        }

        // Allow futex system calls.
        for sysname in FUTEX_SYSCALLS {
            match ScmpSyscall::from_name(sysname) {
                Ok(syscall) => {
                    ctx.add_rule(ScmpAction::Allow, syscall)?;
                }
                Err(_) => {
                    info!("ctx": "confine", "op": "allow_aes_syscall",
                        "msg": format!("invalid or unsupported syscall {sysname}"));
                }
            }
        }

        // Allow getid system calls.
        for sysname in GET_ID_SYSCALLS {
            match ScmpSyscall::from_name(sysname) {
                Ok(syscall) => {
                    ctx.add_rule(ScmpAction::Allow, syscall)?;
                }
                Err(_) => {
                    info!("ctx": "confine", "op": "allow_aes_syscall",
                        "msg": format!("invalid or unsupported syscall {sysname}"));
                }
            }
        }

        // Allow UID/GID changing system calls as necessary.
        let safe_setuid = flags.allow_safe_setuid();
        let safe_setgid = flags.allow_safe_setgid();
        if safe_setuid || safe_setgid {
            scmp_add_setid_rules(
                "aes",
                &mut ctx,
                safe_setuid,
                safe_setgid,
                transit_uids,
                transit_gids,
            )?;
        }

        Ok(ctx)
    }
}
