Implementing volume mounts#
As with cgroups, let's play a bit before implementing basic volumes for our container.
If you've used docker at all you most certainly know that you can create a
volume with docker, it's a neat way to start a database for example and mount
that volume inside a container so that you don't lose your data when the
container stops. But how does Docker implement these volumes?
Let's create a volume first:
$ docker volume create devoxx
Let's now go back to our PID 1 namespace as we did in the last exercise
$ docker run -it --rm --privileged --pid=host justincormack/nsenter1
# ls -l /var/lib/docker/volumes/devoxx/_data/
total 0
Open a new terminal and run this command
$ docker run --rm -v devoxx:/devoxx alpine sh -c 'echo "world" > /devoxx/hello'
And finally in the nsenter1 terminal
# cat /var/lib/docker/volumes/devoxx/_data/hello
world
By this time you hopefully understand that docker volumes are nothing other than special directories that live in a special, managed by docker, directory!
Let's see now how we can create something similar in our container runtime.
Step 1: create the volume directory#
In the parent process, create a directory inside the rootfs of the container, we
will use the volume directory present in this repository as our source volume.
func setupVolume(containerPath string) error {
// TODO: Create a directory inside the rootfs of the container
return nil
}
Step 2: bind mount the volume#
Create a function to handle bind mounting, make sure that you are using the right flags, look at the different mount flags available, which ones should we use? Where should the mount be made? In the parent or the child process?
func mountVolume(source, target string) error {
// TODO: Perform bind mount
return nil
}
Hint
Use the syscall.Mount function
Hint
Don't forget to give the mount call the syscall.MS_PRIVATE flags, this ensures
that this mount stays private for our current mount namespace.
Hint
Since this mount is for the container, the mount should be done in the child process i.e. in the process that lives in a new namespace.
Step 3: unmount when done#
Let's cleanup after all is done, we don't want to have dangling mounts all over the place.
func unmountVolume(target string) error {
// TODO:
// 1. Unmount the volume
// 2. Handle any busy mount errors
// 3. Clean up the mount point directory
return nil
}
Hint
Look at syscall.Unmount function
Step 4: test#
- Test your volume implementation:
# Build the program
make
# Run with sudo
sudo ./bin/devoxx-docker ...
# check the content of the mounted volume
If everything works correctly, the file you created in the container should be visible in the volume directory on the host.
Summary#
We have now implemented volume mounting functionality for containers using bind mounts. This enables data persistence and sharing between the host and container.
Additional Resources#
Solution#
Click to see the complete solution
const (
CGROUP_ROOT = "/sys/fs/cgroup"
MEMORY_MAX = "104857600" // 100MB memory limit
CPU_MAX = "50000 100000" // 50ms per 100ms period
VOLUME_ROOT = "/volumes" // Base directory for volumes
)
func main() {
if len(os.Args) < 2 {
log.Fatal("Not enough arguments")
}
switch os.Args[1] {
case "child":
if len(os.Args) < 3 {
log.Fatal("Missing image name")
}
if err := child(os.Args[2]); err != nil {
log.Fatal(err)
}
case "pull":
if len(os.Args) < 3 {
log.Fatal("Missing image name")
}
if err := pull(os.Args[2]); err != nil {
log.Fatal(err)
}
case "run":
if len(os.Args) < 4 {
log.Fatal("Missing image name or command")
}
if err := run(); err != nil {
log.Fatal(err)
}
default:
log.Fatal("Unknown command", os.Args[1])
}
}
func pull(image string) error {
fmt.Printf("Pulling %s\n", image)
puller := remote.NewImagePuller(image)
if err := puller.Pull(); err != nil {
return fmt.Errorf("pull failed: %w", err)
}
fmt.Println("Pulling done")
return nil
}
func setupCgroups() error {
// Create base cgroup directory
cgroupPath := filepath.Join(CGROUP_ROOT, "devoxx-docker")
if err := os.MkdirAll(cgroupPath, 0755); err != nil {
return fmt.Errorf("failed to create cgroup directory: %w", err)
}
// Set memory limit
memoryMaxPath := filepath.Join(cgroupPath, "memory.max")
if err := os.WriteFile(memoryMaxPath, []byte(MEMORY_MAX), 0644); err != nil {
return fmt.Errorf("failed to set memory limit: %w", err)
}
// Set CPU limit
cpuMaxPath := filepath.Join(cgroupPath, "cpu.max")
if err := os.WriteFile(cpuMaxPath, []byte(CPU_MAX), 0644); err != nil {
return fmt.Errorf("failed to set CPU limit: %w", err)
}
fmt.Printf("Created cgroup at %s with memory limit %s and CPU limit %s\n",
cgroupPath, MEMORY_MAX, CPU_MAX)
return nil
}
func addProcessToCgroup(pid int) error {
cgroupPath := filepath.Join(CGROUP_ROOT, "devoxx-docker")
procsPath := filepath.Join(cgroupPath, "cgroup.procs")
// Write PID to cgroup.procs
if err := os.WriteFile(procsPath, []byte(fmt.Sprintf("%d", pid)), 0644); err != nil {
return fmt.Errorf("failed to add process to cgroup: %w", err)
}
fmt.Printf("Added process %d to cgroup %s\n", pid, cgroupPath)
return nil
}
func setupVolume(containerPath string) error {
// Create the volume directory if it doesn't exist
if err := os.MkdirAll(containerPath, 0755); err != nil {
return fmt.Errorf("failed to create volume directory: %w", err)
}
fmt.Printf("Created volume directory at %s\n", containerPath)
return nil
}
func mountVolume(source, target string) error {
// Ensure target directory exists
if err := os.MkdirAll(target, 0755); err != nil {
return fmt.Errorf("failed to create mount point: %w", err)
}
// Perform bind mount
flags := syscall.MS_BIND | syscall.MS_REC | syscall.MS_PRIVATE
if err := syscall.Mount(source, target, "", uintptr(flags), ""); err != nil {
return fmt.Errorf("failed to bind mount volume: %w", err)
}
fmt.Printf("Mounted volume from %s to %s\n", source, target)
return nil
}
func unmountVolume(target string) error {
// Try to unmount
if err := syscall.Unmount(target, syscall.MNT_DETACH); err != nil {
if err == syscall.EBUSY {
// If mount is busy, retry with force unmount
fmt.Printf("Mount point busy, attempting force unmount of %s\n", target)
if err := syscall.Unmount(target, syscall.MNT_FORCE); err != nil {
return fmt.Errorf("failed to force unmount volume: %w", err)
}
} else {
return fmt.Errorf("failed to unmount volume: %w", err)
}
}
// Clean up the mount point directory
if err := os.RemoveAll(target); err != nil {
return fmt.Errorf("failed to remove mount point directory: %w", err)
}
fmt.Printf("Unmounted and cleaned up volume at %s\n", target)
return nil
}
func child(image string) error {
// Print the PID of the current process
fmt.Println("CHILD: Hello from child, my pid is", os.Getpid())
// Print a simple message
fmt.Println("Hello from child")
// Set container hostname
if err := syscall.Sethostname([]byte("container")); err != nil {
return err
}
// Print new hostname to verify the change
hostname, err := os.Hostname()
if err != nil {
return err
}
fmt.Printf("CHILD Hostname: %s\n", hostname)
// Set up volume mounts if specified
if len(os.Args) > 4 && os.Args[4] == "-v" {
volumeSpec := os.Args[5]
parts := strings.Split(volumeSpec, ":")
if len(parts) != 2 {
return fmt.Errorf("invalid volume specification: %s", volumeSpec)
}
source := filepath.Join(VOLUME_ROOT, parts[0])
target := filepath.Join("/", parts[1])
if err := mountVolume(source, target); err != nil {
return err
}
// Register cleanup handler
defer unmountVolume(target)
}
// Execute the command
cmd := exec.Command(os.Args[3], os.Args[4:]...)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
return cmd.Run()
}
func run() error {
// Create a unique volume path for this container
volumePath := filepath.Join(VOLUME_ROOT, fmt.Sprintf("vol-%d", time.Now().UnixNano()))
if err := setupVolume(volumePath); err != nil {
return err
}
cmd := exec.Command("/proc/self/exe", append([]string{"child"}, os.Args[2:]...)...)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWUTS | syscall.CLONE_NEWPID | syscall.CLONE_NEWNS,
}
if err := cmd.Start(); err != nil {
return fmt.Errorf("start failed: %w", err)
}
if err := cmd.Wait(); err != nil {
return fmt.Errorf("wait failed: %w", err)
}
fmt.Printf("Container exited with code %d\n", cmd.ProcessState.ExitCode())
return nil
}