1
Fork 0
mirror of https://https.git.savannah.gnu.org/git/guix.git/ synced 2025-07-10 16:50:43 +02:00

linux-container: Support having a read-only root file system.

Until now, the read-only file system set up by ‘call-with-container’
would always be writable.  With this change, it can be made read-only.
With this patch, only ‘least-authority-wrapper’ switches to a read-only
root file system.

* gnu/build/linux-container.scm (remount-read-only): New procedure.
(mount-file-systems): Add #:writable-root? and #:populate-file-system
and honor them.
(run-container): Likewise.
(call-with-container): Likewise.
* gnu/system/linux-container.scm (container-script): Pass #:writable-root?
to ‘call-with-container’.
(eval/container): Add #:populate-file-system and #:writable-root? and
honor them.
* guix/scripts/environment.scm (launch-environment/container):
Pass #:writable-root? to ‘call-with-container’.
* guix/scripts/home.scm (spawn-home-container): Likewise.
* tests/containers.scm ("call-with-container, mnt namespace, read-only root")
("call-with-container, mnt namespace, writable root"): New tests.

Change-Id: I603e2fd08851338b737bb16c8af3f765e2538906
This commit is contained in:
Ludovic Courtès 2025-04-04 16:36:17 +02:00
parent acc4215644
commit a391394a22
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5
5 changed files with 66 additions and 5 deletions

View file

@ -75,10 +75,16 @@ exists."
(match (get-environment-variables) (match (get-environment-variables)
(((names . _) ...) names)))) (((names . _) ...) names))))
(define (remount-read-only mount-point)
(mount mount-point mount-point "none"
(logior MS_BIND MS_REMOUNT MS_RDONLY)))
;; The container setup procedure closely resembles that of the Docker ;; The container setup procedure closely resembles that of the Docker
;; specification: ;; specification:
;; https://raw.githubusercontent.com/docker/libcontainer/master/SPEC.md ;; https://raw.githubusercontent.com/docker/libcontainer/master/SPEC.md
(define* (mount-file-systems root mounts #:key mount-/sys? mount-/proc?) (define* (mount-file-systems root mounts #:key mount-/sys? mount-/proc?
(populate-file-system (const #t))
writable-root?)
"Mount the essential file systems and the those in MOUNTS, a list of "Mount the essential file systems and the those in MOUNTS, a list of
<file-system> objects, relative to ROOT; then make ROOT the new root directory <file-system> objects, relative to ROOT; then make ROOT the new root directory
for the process." for the process."
@ -177,7 +183,10 @@ for the process."
(chdir "/") (chdir "/")
(umount "real-root" MNT_DETACH) (umount "real-root" MNT_DETACH)
(rmdir "real-root") (rmdir "real-root")
(chmod "/" #o755))) (populate-file-system)
(chmod "/" #o755)
(unless writable-root?
(remount-read-only "/"))))
(define* (initialize-user-namespace pid host-uids (define* (initialize-user-namespace pid host-uids
#:key (guest-uid 0) (guest-gid 0)) #:key (guest-uid 0) (guest-gid 0))
@ -226,13 +235,19 @@ corresponds to the symbols in NAMESPACES."
namespaces))) namespaces)))
(define* (run-container root mounts namespaces host-uids thunk (define* (run-container root mounts namespaces host-uids thunk
#:key (guest-uid 0) (guest-gid 0)) #:key (guest-uid 0) (guest-gid 0)
(populate-file-system (const #t))
writable-root?)
"Run THUNK in a new container process and return its PID. ROOT specifies "Run THUNK in a new container process and return its PID. ROOT specifies
the root directory for the container. MOUNTS is a list of <file-system> the root directory for the container. MOUNTS is a list of <file-system>
objects that specify file systems to mount inside the container. NAMESPACES objects that specify file systems to mount inside the container. NAMESPACES
is a list of symbols that correspond to the possible Linux namespaces: mnt, is a list of symbols that correspond to the possible Linux namespaces: mnt,
ipc, uts, user, and net. ipc, uts, user, and net.
When WRITABLE-ROOT? is false, remount the container's root as read-only before
calling THUNK. Call POPULATE-FILE-SYSTEM before the root is (potentially)
made read-only.
HOST-UIDS specifies the number of host user identifiers to map into the user HOST-UIDS specifies the number of host user identifiers to map into the user
namespace. GUEST-UID and GUEST-GID specify the first UID (respectively GID) namespace. GUEST-UID and GUEST-GID specify the first UID (respectively GID)
that host UIDs (respectively GIDs) map to in the namespace." that host UIDs (respectively GIDs) map to in the namespace."
@ -258,7 +273,12 @@ that host UIDs (respectively GIDs) map to in the namespace."
(mount-file-systems root mounts (mount-file-systems root mounts
#:mount-/proc? (memq 'pid namespaces) #:mount-/proc? (memq 'pid namespaces)
#:mount-/sys? (memq 'net #:mount-/sys? (memq 'net
namespaces))) namespaces)
#:populate-file-system
populate-file-system
#:writable-root?
(or writable-root?
(not (memq 'mnt namespaces)))))
(lambda args (lambda args
;; Forward the exception to the parent process. ;; Forward the exception to the parent process.
;; FIXME: SRFI-35 conditions and non-trivial objects ;; FIXME: SRFI-35 conditions and non-trivial objects
@ -329,6 +349,8 @@ if there are no child processes left."
(host-uids 1) (guest-uid 0) (guest-gid 0) (host-uids 1) (guest-uid 0) (guest-gid 0)
(relayed-signals (list SIGINT SIGTERM)) (relayed-signals (list SIGINT SIGTERM))
(child-is-pid1? #t) (child-is-pid1? #t)
(populate-file-system (const #t))
writable-root?
(process-spawned-hook (const #t))) (process-spawned-hook (const #t)))
"Run THUNK in a new container process and return its exit status; call "Run THUNK in a new container process and return its exit status; call
PROCESS-SPAWNED-HOOK with the PID of the new process that has been spawned. PROCESS-SPAWNED-HOOK with the PID of the new process that has been spawned.
@ -349,6 +371,10 @@ UIDs (respectively GIDs) map to in the namespace.
RELAYED-SIGNALS is the list of signals that are \"relayed\" to the container RELAYED-SIGNALS is the list of signals that are \"relayed\" to the container
process when caught by its parent. process when caught by its parent.
When WRITABLE-ROOT? is false, remount the container's root as read-only before
calling THUNK. Call POPULATE-FILE-SYSTEM before the root is (potentially)
made read-only.
When CHILD-IS-PID1? is true, and if NAMESPACES contains 'pid', then the child When CHILD-IS-PID1? is true, and if NAMESPACES contains 'pid', then the child
process runs directly as PID 1. As such, it is responsible for (1) installing process runs directly as PID 1. As such, it is responsible for (1) installing
signal handlers and (2) reaping terminated processes by calling 'waitpid'. signal handlers and (2) reaping terminated processes by calling 'waitpid'.
@ -402,7 +428,9 @@ load path must be adjusted as needed."
(lambda (root) (lambda (root)
(let ((pid (run-container root mounts namespaces host-uids thunk* (let ((pid (run-container root mounts namespaces host-uids thunk*
#:guest-uid guest-uid #:guest-uid guest-uid
#:guest-gid guest-gid))) #:guest-gid guest-gid
#:populate-file-system populate-file-system
#:writable-root? writable-root?)))
(install-signal-handlers pid) (install-signal-handlers pid)
(process-spawned-hook pid) (process-spawned-hook pid)
(match (waitpid pid) (match (waitpid pid)

View file

@ -312,12 +312,15 @@ Run the container with the given options."))
#:namespaces (if #$shared-network? #:namespaces (if #$shared-network?
(delq 'net %namespaces) (delq 'net %namespaces)
%namespaces) %namespaces)
#:writable-root? #t
#:process-spawned-hook explain))))) #:process-spawned-hook explain)))))
(gexp->script "run-container" script))) (gexp->script "run-container" script)))
(define* (eval/container exp (define* (eval/container exp
#:key #:key
(populate-file-system (const #t))
writable-root?
(mappings '()) (mappings '())
(mounts '()) (mounts '())
(namespaces %namespaces) (namespaces %namespaces)
@ -367,6 +370,8 @@ effects."
(list "-c" (list "-c"
(object->string (object->string
(lowered-gexp-sexp lowered)))))) (lowered-gexp-sexp lowered))))))
#:writable-root? writable-root?
#:populate-file-system populate-file-system
#:namespaces namespaces #:namespaces namespaces
#:guest-uid guest-uid #:guest-uid guest-uid
#:guest-gid guest-gid)))))) #:guest-gid guest-gid))))))

View file

@ -961,6 +961,7 @@ WHILE-LIST."
#:emulate-fhs? emulate-fhs?))) #:emulate-fhs? emulate-fhs?)))
#:guest-uid uid #:guest-uid uid
#:guest-gid gid #:guest-gid gid
#:writable-root? #t ;for backward compatibility
#:namespaces (if network? #:namespaces (if network?
(delq 'net %namespaces) ; share host network (delq 'net %namespaces) ; share host network
%namespaces))))))) %namespaces)))))))

View file

@ -377,6 +377,7 @@ immediately. Return the exit status of the process in the container."
(type "tmpfs") (type "tmpfs")
(check? #f))) (check? #f)))
#:mappings (append network-mappings mappings) #:mappings (append network-mappings mappings)
#:writable-root? #t
#:guest-uid uid #:guest-uid uid
#:guest-gid gid)) #:guest-gid gid))

View file

@ -142,6 +142,32 @@
(assert-exit (= #o755 (stat:perms (lstat "/"))))) (assert-exit (= #o755 (stat:perms (lstat "/")))))
#:namespaces '(user mnt)))) #:namespaces '(user mnt))))
(skip-if-unsupported)
(test-assert "call-with-container, mnt namespace, read-only root"
(zero?
(call-with-container '()
(lambda ()
(assert-exit (and (file-is-directory? "/witness")
(catch 'system-error
(lambda ()
(mkdir "/whatever")
#f)
(lambda args
(= (system-error-errno args) EROFS))))))
#:populate-file-system (lambda ()
(mkdir "/witness"))
#:namespaces '(user mnt))))
(skip-if-unsupported)
(test-assert "call-with-container, mnt namespace, writable root"
(zero?
(call-with-container '()
(lambda ()
(mkdir "whatever")
(assert-exit (file-is-directory? "/whatever")))
#:writable-root? #t
#:namespaces '(user mnt))))
(skip-if-unsupported) (skip-if-unsupported)
(test-assert "container-excursion" (test-assert "container-excursion"
(call-with-temporary-directory (call-with-temporary-directory