プロセスのオープンファイル数
fopenではnr=0で、files->fdt[fdt->max_fds]の空きインデックスを走査します。dup2システムコールではnrが任意のインデックスとなりファイルID取得を試みます。
ファイルIDがtsk->signal->rlim[RLIMIT_NOFILE].rlim_curを超えてれば取得できません。ファイルIDはfiles->fdt[fdt->max_fds]のインデックスで、nr > fdt->max_fdsdならfiles->fdt[]に空き領域がなく、でnr < sysctl_nr_openなら、expand_fdtable()で改めてfiles->fdt[]を拡張し取得します。
・ファイル数制約は、プロセス毎におけるファイルID以外に、システム(CPU単位)としてファイルその物の制約も実装されています。
ファイルIDがtsk->signal->rlim[RLIMIT_NOFILE].rlim_curを超えてれば取得できません。ファイルIDはfiles->fdt[fdt->max_fds]のインデックスで、nr > fdt->max_fdsdならfiles->fdt[]に空き領域がなく、でnr < sysctl_nr_openなら、expand_fdtable()で改めてfiles->fdt[]を拡張し取得します。
int expand_files(struct files_struct *files, int nr)
{
struct fdtable *fdt;
fdt = files_fdtable(files);
if (nr >= rlimit(RLIMIT_NOFILE))
return -EMFILE;
if (nr < fdt->max_fds)
return 0;
if (nr >= sysctl_nr_open)
return -EMFILE;
return expand_fdtable(files, nr);
}
static inline unsigned long rlimit(unsigned int limit)
{
return task_rlimit(current, limit);
}
static inline unsigned long task_rlimit(const struct task_struct *tsk,
unsigned int limit)
{
return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur);
}
プロセスのデフォルトのRLIMIT_NOFILEのrlim_curは1024/rlim_maxは4096です。[root@localhost ~]# cat /proc/self/limits Limit Soft Limit Hard Limit Units : Max open files 1024(rlim_cur) 4096(rlim_max) files :/proc/self/limitsの .proc_readコールバックで、0からRLIM_NLIMITSのtask->signal->rlim[]のrlim_cur/rlim_maxを表示しますが、proc_writeコールバックは実装されません。
#define INF(NAME, MODE, read) \
NOD(NAME, (S_IFREG|(MODE)), \
NULL, &proc_info_file_operations, \
{ .proc_read = read } )
static const struct pid_entry tgid_base_stuff[] = {
:
INF("limits", S_IRUGO, proc_pid_limits),
:
};
static int proc_pid_limits(struct task_struct *task, char *buffer)
{
unsigned int i;
int count = 0;
unsigned long flags;
char *bufptr = buffer;
struct rlimit rlim[RLIM_NLIMITS];
if (!lock_task_sighand(task, &flags))
return 0;
memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
unlock_task_sighand(task, &flags);
count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
"Limit", "Soft Limit", "Hard Limit", "Units");
for (i = 0; i < RLIM_NLIMITS; i++) {
if (rlim[i].rlim_cur == RLIM_INFINITY)
count += sprintf(&bufptr[count], "%-25s %-20s ",
lnames[i].name, "unlimited");
else
count += sprintf(&bufptr[count], "%-25s %-20lu ",
lnames[i].name, rlim[i].rlim_cur);
if (rlim[i].rlim_max == RLIM_INFINITY)
count += sprintf(&bufptr[count], "%-20s ", "unlimited");
else
count += sprintf(&bufptr[count], "%-20lu ",
rlim[i].rlim_max);
if (lnames[i].unit)
count += sprintf(&bufptr[count], "%-10s\n",
lnames[i].unit);
else
count += sprintf(&bufptr[count], "\n");
}
return count;
}
sysctl_nr_openは、fdt->max_fds < nr < rlimit(RLIMIT_NOFILE)時での、files->fdt[]の拡張最大値。[root@localhost ~]# cat /proc/sys/fs/nr_open 1048576
int sysctl_nr_open __read_mostly = 1024*1024;
static struct ctl_table fs_table[] = {
:
{
.procname = "nr_open",
.data = &sysctl_nr_open,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &sysctl_nr_open_min,
.extra2 = &sysctl_nr_open_max,
},
:
}
rootプロセスの初期値はINIT_TASK(init_task)で、roott->signal->rlim[]はINIT_SIGNALS(init_signals)で、signal->rlim[RLIMIT_NOFILE] = { INR_OPEN_CUR, INR_OPEN_MAX },]となります。
void __init fork_init(unsigned long mempages)
{
#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
#ifndef ARCH_MIN_TASKALIGN
#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES
#endif
task_struct_cachep =
kmem_cache_create("task_struct", sizeof(struct task_struct),
ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
#endif
/* do the arch specific task caches init */
arch_task_cache_init();
max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);
if (max_threads < 20)
max_threads = 20;
init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
init_task.signal->rlim[RLIMIT_SIGPENDING] =
init_task.signal->rlim[RLIMIT_NPROC];
}
struct task_struct init_task = INIT_TASK(init_task);
#define INIT_TASK(tsk) \
{ \
.state = 0, \
.stack = &init_thread_info, \
.usage = ATOMIC_INIT(2), \
.flags = PF_KTHREAD, \
.prio = MAX_PRIO-20, \
.static_prio = MAX_PRIO-20, \
.normal_prio = MAX_PRIO-20, \
.policy = SCHED_NORMAL, \
.cpus_allowed = CPU_MASK_ALL, \
.mm = NULL, \
.active_mm = &init_mm, \
.se = { \
.group_node = LIST_HEAD_INIT(tsk.se.group_node), \
}, \
.rt = { \
.run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
.time_slice = HZ, \
.nr_cpus_allowed = NR_CPUS, \
}, \
.tasks = LIST_HEAD_INIT(tsk.tasks), \
INIT_PUSHABLE_TASKS(tsk) \
.ptraced = LIST_HEAD_INIT(tsk.ptraced), \
.ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \
.real_parent = &tsk, \
.parent = &tsk, \
.children = LIST_HEAD_INIT(tsk.children), \
.sibling = LIST_HEAD_INIT(tsk.sibling), \
.group_leader = &tsk, \
RCU_INIT_POINTER(.real_cred, &init_cred), \
RCU_INIT_POINTER(.cred, &init_cred), \
.comm = INIT_TASK_COMM, \
.thread = INIT_THREAD, \
.fs = &init_fs, \
.files = &init_files, \
.signal = &init_signals, \ <= オープンファイル
.sighand = &init_sighand, \
.nsproxy = &init_nsproxy, \
.pending = { \
.list = LIST_HEAD_INIT(tsk.pending.list), \
.signal = {{0}}}, \
.blocked = {{0}}, \
.alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \
.journal_info = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
.pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
.timer_slack_ns = 50000, /* 50 usec default slack */ \
.pids = { \
[PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
[PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \
}, \
.thread_group = LIST_HEAD_INIT(tsk.thread_group), \
INIT_IDS \
INIT_PERF_EVENTS(tsk) \
INIT_TRACE_IRQFLAGS \
INIT_LOCKDEP \
INIT_FTRACE_GRAPH \
INIT_TRACE_RECURSION \
INIT_TASK_RCU_PREEMPT(tsk) \
}
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
#define INIT_SIGNALS(sig) { \
.nr_threads = 1, \
.wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
.shared_pending = { \
.list = LIST_HEAD_INIT(sig.shared_pending.list), \
.signal = {{0}}}, \
.posix_timers = LIST_HEAD_INIT(sig.posix_timers), \
.cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \
.rlim = INIT_RLIMITS, \ <= オープンファイル
.cputimer = { \
.cputime = INIT_CPUTIME, \
.running = 0, \
.lock = __RAW_SPIN_LOCK_UNLOCKED(sig.cputimer.lock), \
}, \
.cred_guard_mutex = \
__MUTEX_INITIALIZER(sig.cred_guard_mutex), \
INIT_GROUP_RWSEM(sig) \
}
#define INR_OPEN_CUR 1024 /* Initial setting for nfile rlimits */
#define INR_OPEN_MAX 4096 /* Hard limit for nfile rlimits */
#define INIT_RLIMITS \
{ \
[RLIMIT_CPU] = { RLIM_INFINITY, RLIM_INFINITY }, \
[RLIMIT_FSIZE] = { RLIM_INFINITY, RLIM_INFINITY }, \
[RLIMIT_DATA] = { RLIM_INFINITY, RLIM_INFINITY }, \
[RLIMIT_STACK] = { _STK_LIM, _STK_LIM_MAX }, \
[RLIMIT_CORE] = { 0, RLIM_INFINITY }, \
[RLIMIT_RSS] = { RLIM_INFINITY, RLIM_INFINITY }, \
[RLIMIT_NPROC] = { 0, 0 }, \
[RLIMIT_NOFILE] = { INR_OPEN_CUR, INR_OPEN_MAX }, \ <= オープンファイル
[RLIMIT_MEMLOCK] = { MLOCK_LIMIT, MLOCK_LIMIT }, \
[RLIMIT_AS] = { RLIM_INFINITY, RLIM_INFINITY }, \
[RLIMIT_LOCKS] = { RLIM_INFINITY, RLIM_INFINITY }, \
[RLIMIT_SIGPENDING] = { 0, 0 }, \
[RLIMIT_MSGQUEUE] = { MQ_BYTES_MAX, MQ_BYTES_MAX }, \
[RLIMIT_NICE] = { 0, 0 }, \
[RLIMIT_RTPRIO] = { 0, 0 }, \
[RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY }, \
}
子プロセス作成時、copy_process()からcopy_signa()がコールされ、current->signal->rlimが新規プロセスに複写されます。
long do_fork(unsigned long clone_flags,
unsigned long stack_start,
struct pt_regs *regs,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr)
{
struct task_struct *p;
int trace = 0;
long nr;
if (clone_flags & CLONE_NEWUSER) {
if (clone_flags & CLONE_THREAD)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
!capable(CAP_SETGID))
return -EPERM;
}
if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) {
if (clone_flags & CLONE_VFORK)
trace = PTRACE_EVENT_VFORK;
else if ((clone_flags & CSIGNAL) != SIGCHLD)
trace = PTRACE_EVENT_CLONE;
else
trace = PTRACE_EVENT_FORK;
if (likely(!ptrace_event_enabled(current, trace)))
trace = 0;
}
p = copy_process(clone_flags, stack_start, regs, stack_size,
child_tidptr, NULL, trace);
if (!IS_ERR(p)) {
struct completion vfork;
trace_sched_process_fork(current, p);
nr = task_pid_vnr(p);
if (clone_flags & CLONE_PARENT_SETTID)
put_user(nr, parent_tidptr);
if (clone_flags & CLONE_VFORK) {
p->vfork_done = &vfork;
init_completion(&vfork);
get_task_struct(p);
}
wake_up_new_task(p);
if (unlikely(trace))
ptrace_event(trace, nr);
if (clone_flags & CLONE_VFORK) {
if (!wait_for_vfork_done(p, &vfork))
ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
}
} else {
nr = PTR_ERR(p);
}
return nr;
}
tsk->signal->rlimにcurrent->signal->rlimを複写します。
static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
{
struct signal_struct *sig;
if (clone_flags & CLONE_THREAD)
return 0;
sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
tsk->signal = sig;
if (!sig)
return -ENOMEM;
sig->nr_threads = 1;
atomic_set(&sig->live, 1);
atomic_set(&sig->sigcnt, 1);
init_waitqueue_head(&sig->wait_chldexit);
if (clone_flags & CLONE_NEWPID)
sig->flags |= SIGNAL_UNKILLABLE;
sig->curr_target = tsk;
init_sigpending(&sig->shared_pending);
INIT_LIST_HEAD(&sig->posix_timers);
hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
sig->real_timer.function = it_real_fn;
task_lock(current->group_leader);
memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
task_unlock(current->group_leader);
posix_cpu_timers_init_group(sig);
tty_audit_fork(sig);
sched_autogroup_fork(sig);
#ifdef CONFIG_CGROUPS
init_rwsem(&sig->group_rwsem);
#endif
sig->oom_adj = current->signal->oom_adj;
sig->oom_score_adj = current->signal->oom_score_adj;
sig->oom_score_adj_min = current->signal->oom_score_adj_min;
mutex_init(&sig->cred_guard_mutex);
return 0;
}
補足
・CAP_SYS_RESOURCEケーパビリティを有してなければ、setrlimitシステムコールでのcurrent->signal->rlim[RLIMIT_NOFILE]の設定は、元のrlim_max以下でなければなりません。・ファイル数制約は、プロセス毎におけるファイルID以外に、システム(CPU単位)としてファイルその物の制約も実装されています。






