O_CLOEXEC
サンプル
[root@north cloexec]# cat cloexec.c
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/wait.h>
void do_fork(int fd);
void do_exe(int fdindex);
void main(int argc,char **argv)
{
int fd;
if (!strcmp(argv[1], "CLOEXEC")) {
fd =open("babakaka.txt", O_RDWR | O_CLOEXEC);
do_exe(fd);
}
if (!strcmp(argv[1], "NO-CLOEXEC")) {
fd =open("babakaka.txt", O_RDWR);
do_exe(fd);
}
if (!strcmp(argv[1], "FORK+CLOEXEC")) {
fd =open("babakaka.txt", O_RDWR | O_CLOEXEC);
do_fork(fd);
}
}
void do_fork(int fd)
{
char buff[5];
int status;
if (!fork()) {
sleep(1);
buff[3] = 0; read(fd, buff, 3);
printf("child :%s\n", buff);
}
else {
buff[3] = 0; read(fd, buff, 3);
printf("parent:%s\n", buff);
wait(&status);
}
exit(0);
}
void do_exe(int fd)
{
char buff[5], arg_fd[5];
buff[3] = 0; read(fd, buff, 3);
printf("parent:%s\n", buff);
sprintf(arg_fd, "%d", fd);
execl("./fread.out", "./fread", arg_fd, NULL);
}
[root@north cloexec]# cat fread.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
void main(int argc,char **argv)
{
char buff[5];
int cnt;
buff[3] = 0; cnt = read(atoi(argv[1]), buff, 3);
if (cnt > 0) {
printf("exec :%s\n", buff);
}
else {
printf("exec :no read\n");
}
}
[root@north cloexec]# cat babakaka.txt
012345
[root@north cloexec]# ./cloexec.out NO-CLOEXEC
parent:012 exec :345
[root@north cloexec]# ./cloexec.out CLOEXEC
parent:012 exec :no read
[root@north cloexec]# ./cloexec.out FORK+CLOEXEC
parent:012 child :345
カーネル
fork親プロセスのstruct files_struct *current->filesは、子プロセスのstruct files *current->filesと共有し、親/子プロセスのファイル操作は互いに影響し合い、親プロセスと共有するファイルを有する子プロセスのexecでのファイル削除は、ファイル共有故に親プロセスのファイルをも削除したことになる。故に、execシステムコールは共有current->filesを複写したバッファをcurrent->filesに設定し、故にexecのファイル削除は、共有する他プロセスに係るファイル削除はされない。共有filesを複写したfileに差し替え故に、共有しなくなった係るfile属性の変更の、参照カウンタfiles->count--で、結果files->count==0になったなら他プロセス参照は無い故に、係るfilesを削除する。
int do_execve(const char *filename,
const char __user *const __user *__argv,
const char __user *const __user *__envp,
struct pt_regs *regs)
{
struct user_arg_ptr argv = { .ptr.native = __argv };
struct user_arg_ptr envp = { .ptr.native = __envp };
return do_execve_common(filename, argv, envp, regs);
}
static int do_execve_common(const char *filename,
struct user_arg_ptr argv,
struct user_arg_ptr envp,
struct pt_regs *regs)
{
struct linux_binprm *bprm;
struct file *file;
struct files_struct *displaced;
bool clear_in_exec;
int retval;
const struct cred *cred = current_cred();
if ((current->flags & PF_NPROC_EXCEEDED) &&
atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) {
retval = -EAGAIN;
goto out_ret;
}
current->flags &= ~PF_NPROC_EXCEEDED;
retval = unshare_files(&displaced); <- displaced=current->files、current->files=displacedの複写バッファ
if (retval)
goto out_ret;
retval = -ENOMEM;
bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
if (!bprm)
goto out_files;
retval = prepare_bprm_creds(bprm);
if (retval)
goto out_free;
retval = check_unsafe_exec(bprm);
if (retval < 0)
goto out_free;
clear_in_exec = retval;
current->in_execve = 1;
file = open_exec(filename);
retval = PTR_ERR(file);
if (IS_ERR(file))
goto out_unmark;
sched_exec();
bprm->file = file;
bprm->filename = filename;
bprm->interp = filename;
retval = bprm_mm_init(bprm);
if (retval)
goto out_file;
bprm->argc = count(argv, MAX_ARG_STRINGS);
if ((retval = bprm->argc) < 0)
goto out;
bprm->envc = count(envp, MAX_ARG_STRINGS);
if ((retval = bprm->envc) < 0)
goto out;
retval = prepare_binprm(bprm);
if (retval < 0)
goto out;
retval = copy_strings_kernel(1, &bprm->filename, bprm);
if (retval < 0)
goto out;
bprm->exec = bprm->p;
retval = copy_strings(bprm->envc, envp, bprm);
if (retval < 0)
goto out;
retval = copy_strings(bprm->argc, argv, bprm);
if (retval < 0)
goto out;
retval = search_binary_handler(bprm,regs); <- current->files->fdt->close_on_execビットを削除fidとしてcurrent->files[削除fid]をcloseする
if (retval < 0)
goto out;
current->fs->in_exec = 0;
current->in_execve = 0;
acct_update_integrals(current);
free_bprm(bprm);
if (displaced) <-search_binary_handler()エラーなら重複更新したcurrent->filesの共有ファイルへの復元
put_files_struct(displaced);
return retval;
out:
if (bprm->mm) {
acct_arg_size(bprm, 0);
mmput(bprm->mm);
}
out_file:
if (bprm->file) {
allow_write_access(bprm->file);
fput(bprm->file);
}
out_unmark:
if (clear_in_exec)
current->fs->in_exec = 0;
current->in_execve = 0;
out_free:
free_bprm(bprm);
out_files:
if (displaced) <-search_binary_handler()が成功なら共有diplacedの参照カウント削減で係るファイル削除
reset_files_struct(displaced);
out_ret:
return retval;
}
int unshare_files(struct files_struct **displaced)
{
struct task_struct *task = current;
struct files_struct *copy = NULL;
int error;
error = unshare_fd(CLONE_FILES, ©);
if (error || !copy) {
*displaced = NULL;
return error;
}
*displaced = task->files;
task_lock(task);
task->files = copy;
task_unlock(task);
return 0;
}
static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
{
struct files_struct *fd = current->files;
int error = 0;
if ((unshare_flags & CLONE_FILES) &&
(fd && atomic_read(&fd->count) > 1)) {
*new_fdp = dup_fd(fd, &error);
if (!*new_fdp)
return error;
}
return 0;
}
void reset_files_struct(struct files_struct *files)
{
struct task_struct *tsk = current;
struct files_struct *old;
old = tsk->files;
task_lock(tsk);
tsk->files = files;
task_unlock(tsk);
put_files_struct(old);
}
void put_files_struct(struct files_struct *files)
{
struct fdtable *fdt;
if (atomic_dec_and_test(&files->count)) {
close_files(files);
rcu_read_lock();
fdt = files_fdtable(files);
rcu_read_unlock();
if (fdt != &files->fdtab)
__free_fdtable(fdt);
kmem_cache_free(files_cachep, files);
}
}
ファイルオープンのO_CLOEXECフラグに係る処理
long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
struct open_flags op;
int lookup = build_open_flags(flags, mode, &op);
struct filename *tmp = getname(filename);
int fd = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
fd = get_unused_fd_flags(flags);
if (fd >= 0) {
struct file *f = do_filp_open(dfd, tmp, &op, lookup);
if (IS_ERR(f)) {
put_unused_fd(fd);
fd = PTR_ERR(f);
} else {
fsnotify_open(f);
fd_install(fd, f);
}
}
putname(tmp);
}
return fd;
}
int get_unused_fd_flags(unsigned flags)
{
return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
}
int __alloc_fd(struct files_struct *files,
unsigned start, unsigned end, unsigned flags)
{
unsigned int fd;
int error;
struct fdtable *fdt;
spin_lock(&files->file_lock);
repeat:
fdt = files_fdtable(files);
fd = start;
if (fd < files->next_fd)
fd = files->next_fd;
if (fd < fdt->max_fds)
fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
error = -EMFILE;
if (fd >= end)
goto out;
error = expand_files(files, fd);
if (error < 0)
goto out;
if (error)
goto repeat;
if (start <= files->next_fd)
files->next_fd = fd + 1;
__set_open_fd(fd, fdt);
if (flags & O_CLOEXEC)
__set_close_on_exec(fd, fdt);
else
__clear_close_on_exec(fd, fdt);
error = fd;
out:
spin_unlock(&files->file_lock);
return error;
}
static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
{
__set_bit(fd, fdt->close_on_exec);
}
static inline void __set_bit(int nr, volatile unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
*p |= mask;
}
static struct linux_binfmt elf_format = {
.module = THIS_MODULE,
.load_binary = load_elf_binary,
.load_shlib = load_elf_library,
.core_dump = elf_core_dump,
.min_coredump = ELF_EXEC_PAGESIZE,
};
static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
{
:
setup_new_exec(bprm);
:
}
unsigned long * current->files->fdt->close_on_execのビット位置をfdのcurrent->files[fd]を削除
void setup_new_exec(struct linux_binprm * bprm)
{
:
do_close_on_exec(current->files);
}
#define files_fdtable(files) \
(rcu_dereference_check_fdtable((files), (files)->fdt))
void do_close_on_exec(struct files_struct *files)
{
unsigned i;
struct fdtable *fdt;
spin_lock(&files->file_lock);
for (i = 0; ; i++) {
unsigned long set;
unsigned fd = i * BITS_PER_LONG;
fdt = files_fdtable(files);
if (fd >= fdt->max_fds)
break;
set = fdt->close_on_exec[i];
if (!set)
continue;
fdt->close_on_exec[i] = 0;
for ( ; set ; fd++, set >>= 1) {
struct file *file;
if (!(set & 1))
continue;
file = fdt->fd[fd];
if (!file)
continue;
rcu_assign_pointer(fdt->fd[fd], NULL);
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
filp_close(file, files);
cond_resched();
spin_lock(&files->file_lock);
}
}
spin_unlock(&files->file_lock);
}
捕捉
コマンド実行は、search_binary_handler()で実行ファイル形式依存のstruct linux_binfmtのload_binaryが実行され、故にO_CLOEXEC実装は実行ファイル形式のload_binary()依存する。実行ファイルによっては、係る実装がされていない事も有り得る。
struct linux_binfmt {
struct list_head lh;
struct module *module;
int (*load_binary)(struct linux_binprm *, struct pt_regs * regs);
int (*load_shlib)(struct file *);
int (*core_dump)(struct coredump_params *cprm);
unsigned long min_coredump;
};






