O_CLOEXEC
サンプル
[root@north cloexec]# cat cloexec.c#include <stdio.h> #include <string.h> #include <fcntl.h> #include <stdlib.h> #include <unistd.h> #include <sys/wait.h> void do_fork(int fd); void do_exe(int fdindex); void main(int argc,char **argv) { int fd; if (!strcmp(argv[1], "CLOEXEC")) { fd =open("babakaka.txt", O_RDWR | O_CLOEXEC); do_exe(fd); } if (!strcmp(argv[1], "NO-CLOEXEC")) { fd =open("babakaka.txt", O_RDWR); do_exe(fd); } if (!strcmp(argv[1], "FORK+CLOEXEC")) { fd =open("babakaka.txt", O_RDWR | O_CLOEXEC); do_fork(fd); } } void do_fork(int fd) { char buff[5]; int status; if (!fork()) { sleep(1); buff[3] = 0; read(fd, buff, 3); printf("child :%s\n", buff); } else { buff[3] = 0; read(fd, buff, 3); printf("parent:%s\n", buff); wait(&status); } exit(0); } void do_exe(int fd) { char buff[5], arg_fd[5]; buff[3] = 0; read(fd, buff, 3); printf("parent:%s\n", buff); sprintf(arg_fd, "%d", fd); execl("./fread.out", "./fread", arg_fd, NULL); }[root@north cloexec]# cat fread.c
#include <stdio.h> #include <stdlib.h> #include <unistd.h> void main(int argc,char **argv) { char buff[5]; int cnt; buff[3] = 0; cnt = read(atoi(argv[1]), buff, 3); if (cnt > 0) { printf("exec :%s\n", buff); } else { printf("exec :no read\n"); } }
[root@north cloexec]# cat babakaka.txt
012345
[root@north cloexec]# ./cloexec.out NO-CLOEXEC
parent:012 exec :345
[root@north cloexec]# ./cloexec.out CLOEXEC
parent:012 exec :no read
[root@north cloexec]# ./cloexec.out FORK+CLOEXEC
parent:012 child :345
カーネル
fork親プロセスのstruct files_struct *current->filesは、子プロセスのstruct files *current->filesと共有し、親/子プロセスのファイル操作は互いに影響し合い、親プロセスと共有するファイルを有する子プロセスのexecでのファイル削除は、ファイル共有故に親プロセスのファイルをも削除したことになる。故に、execシステムコールは共有current->filesを複写したバッファをcurrent->filesに設定し、故にexecのファイル削除は、共有する他プロセスに係るファイル削除はされない。共有filesを複写したfileに差し替え故に、共有しなくなった係るfile属性の変更の、参照カウンタfiles->count--で、結果files->count==0になったなら他プロセス参照は無い故に、係るfilesを削除する。
int do_execve(const char *filename, const char __user *const __user *__argv, const char __user *const __user *__envp, struct pt_regs *regs) { struct user_arg_ptr argv = { .ptr.native = __argv }; struct user_arg_ptr envp = { .ptr.native = __envp }; return do_execve_common(filename, argv, envp, regs); } static int do_execve_common(const char *filename, struct user_arg_ptr argv, struct user_arg_ptr envp, struct pt_regs *regs) { struct linux_binprm *bprm; struct file *file; struct files_struct *displaced; bool clear_in_exec; int retval; const struct cred *cred = current_cred(); if ((current->flags & PF_NPROC_EXCEEDED) && atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) { retval = -EAGAIN; goto out_ret; } current->flags &= ~PF_NPROC_EXCEEDED; retval = unshare_files(&displaced); <- displaced=current->files、current->files=displacedの複写バッファ if (retval) goto out_ret; retval = -ENOMEM; bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); if (!bprm) goto out_files; retval = prepare_bprm_creds(bprm); if (retval) goto out_free; retval = check_unsafe_exec(bprm); if (retval < 0) goto out_free; clear_in_exec = retval; current->in_execve = 1; file = open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) goto out_unmark; sched_exec(); bprm->file = file; bprm->filename = filename; bprm->interp = filename; retval = bprm_mm_init(bprm); if (retval) goto out_file; bprm->argc = count(argv, MAX_ARG_STRINGS); if ((retval = bprm->argc) < 0) goto out; bprm->envc = count(envp, MAX_ARG_STRINGS); if ((retval = bprm->envc) < 0) goto out; retval = prepare_binprm(bprm); if (retval < 0) goto out; retval = copy_strings_kernel(1, &bprm->filename, bprm); if (retval < 0) goto out; bprm->exec = bprm->p; retval = copy_strings(bprm->envc, envp, bprm); if (retval < 0) goto out; retval = copy_strings(bprm->argc, argv, bprm); if (retval < 0) goto out; retval = search_binary_handler(bprm,regs); <- current->files->fdt->close_on_execビットを削除fidとしてcurrent->files[削除fid]をcloseする if (retval < 0) goto out; current->fs->in_exec = 0; current->in_execve = 0; acct_update_integrals(current); free_bprm(bprm); if (displaced) <-search_binary_handler()エラーなら重複更新したcurrent->filesの共有ファイルへの復元 put_files_struct(displaced); return retval; out: if (bprm->mm) { acct_arg_size(bprm, 0); mmput(bprm->mm); } out_file: if (bprm->file) { allow_write_access(bprm->file); fput(bprm->file); } out_unmark: if (clear_in_exec) current->fs->in_exec = 0; current->in_execve = 0; out_free: free_bprm(bprm); out_files: if (displaced) <-search_binary_handler()が成功なら共有diplacedの参照カウント削減で係るファイル削除 reset_files_struct(displaced); out_ret: return retval; } int unshare_files(struct files_struct **displaced) { struct task_struct *task = current; struct files_struct *copy = NULL; int error; error = unshare_fd(CLONE_FILES, ©); if (error || !copy) { *displaced = NULL; return error; } *displaced = task->files; task_lock(task); task->files = copy; task_unlock(task); return 0; } static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) { struct files_struct *fd = current->files; int error = 0; if ((unshare_flags & CLONE_FILES) && (fd && atomic_read(&fd->count) > 1)) { *new_fdp = dup_fd(fd, &error); if (!*new_fdp) return error; } return 0; } void reset_files_struct(struct files_struct *files) { struct task_struct *tsk = current; struct files_struct *old; old = tsk->files; task_lock(tsk); tsk->files = files; task_unlock(tsk); put_files_struct(old); } void put_files_struct(struct files_struct *files) { struct fdtable *fdt; if (atomic_dec_and_test(&files->count)) { close_files(files); rcu_read_lock(); fdt = files_fdtable(files); rcu_read_unlock(); if (fdt != &files->fdtab) __free_fdtable(fdt); kmem_cache_free(files_cachep, files); } }ファイルオープンのO_CLOEXECフラグに係る処理
long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) { struct open_flags op; int lookup = build_open_flags(flags, mode, &op); struct filename *tmp = getname(filename); int fd = PTR_ERR(tmp); if (!IS_ERR(tmp)) { fd = get_unused_fd_flags(flags); if (fd >= 0) { struct file *f = do_filp_open(dfd, tmp, &op, lookup); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); } else { fsnotify_open(f); fd_install(fd, f); } } putname(tmp); } return fd; } int get_unused_fd_flags(unsigned flags) { return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags); } int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags) { unsigned int fd; int error; struct fdtable *fdt; spin_lock(&files->file_lock); repeat: fdt = files_fdtable(files); fd = start; if (fd < files->next_fd) fd = files->next_fd; if (fd < fdt->max_fds) fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); error = -EMFILE; if (fd >= end) goto out; error = expand_files(files, fd); if (error < 0) goto out; if (error) goto repeat; if (start <= files->next_fd) files->next_fd = fd + 1; __set_open_fd(fd, fdt); if (flags & O_CLOEXEC) __set_close_on_exec(fd, fdt); else __clear_close_on_exec(fd, fdt); error = fd; out: spin_unlock(&files->file_lock); return error; } static inline void __set_close_on_exec(int fd, struct fdtable *fdt) { __set_bit(fd, fdt->close_on_exec); } static inline void __set_bit(int nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); *p |= mask; } static struct linux_binfmt elf_format = { .module = THIS_MODULE, .load_binary = load_elf_binary, .load_shlib = load_elf_library, .core_dump = elf_core_dump, .min_coredump = ELF_EXEC_PAGESIZE, }; static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) { : setup_new_exec(bprm); : }unsigned long * current->files->fdt->close_on_execのビット位置をfdのcurrent->files[fd]を削除
void setup_new_exec(struct linux_binprm * bprm) { : do_close_on_exec(current->files); } #define files_fdtable(files) \ (rcu_dereference_check_fdtable((files), (files)->fdt)) void do_close_on_exec(struct files_struct *files) { unsigned i; struct fdtable *fdt; spin_lock(&files->file_lock); for (i = 0; ; i++) { unsigned long set; unsigned fd = i * BITS_PER_LONG; fdt = files_fdtable(files); if (fd >= fdt->max_fds) break; set = fdt->close_on_exec[i]; if (!set) continue; fdt->close_on_exec[i] = 0; for ( ; set ; fd++, set >>= 1) { struct file *file; if (!(set & 1)) continue; file = fdt->fd[fd]; if (!file) continue; rcu_assign_pointer(fdt->fd[fd], NULL); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); filp_close(file, files); cond_resched(); spin_lock(&files->file_lock); } } spin_unlock(&files->file_lock); }
捕捉
コマンド実行は、search_binary_handler()で実行ファイル形式依存のstruct linux_binfmtのload_binaryが実行され、故にO_CLOEXEC実装は実行ファイル形式のload_binary()依存する。実行ファイルによっては、係る実装がされていない事も有り得る。struct linux_binfmt { struct list_head lh; struct module *module; int (*load_binary)(struct linux_binprm *, struct pt_regs * regs); int (*load_shlib)(struct file *); int (*core_dump)(struct coredump_params *cprm); unsigned long min_coredump; };