/proc/pid/syscall
/proc/pid/syscallはプロセスがシステムコール下で待機しているなら、そのシステムコールの情報(引数)を表示します。システムコール外での待機なら、runningとなります。
サンプル1
サンプル1
#include <sys/syscall.h> #include <stdio.h> void main(int argc, char* argv[]) { char buff[0x123]; printf("%d:%lx\n", SYS_read, buff); syscall(SYS_read, 0, buff, sizeof(buff)); } [root@localhost test]# ./a.out & [1] 1662 [root@localhost test]# 3:bff98e5d [1]+ 停止 ./a.out [root@localhost test]# cat /proc/1662/syscall 3 0x0 0xbff98e5d 0x123 0x8 0x8 0x7 0xbff98e1c 0xb7737424サンプル2
#include <sys/syscall.h> #include <stdio.h> void main(int argc, char* argv[]) { while(1) { ; } } [root@localhost test]# ./a.out & [1] 1643 [root@localhost test]# cat /proc/1643/syscall runningシステムコールはeaxにシステムコール番号を設定し、bx/cx・・・に引数が設定されint 80またはsysenterによりsystem_callカーネルスタックにpushし、sys_call_tableのeaxを引数とするアドレスをコールします。従ってシステムコールで待機しているなら、スタックはカーネルモードで、そうでないならユーザモードとなります。
ENTRY(system_call) pushl %eax # save orig_eax cld; pushl %es; pushl %ds; pushl %eax; pushl %ebp; pushl %edi; # 第 5 引数 pushl %esi; # 第 4 引数 pushl %edx; # 第 3 引数 pushl %ecx; # 第 2 引数 pushl %ebx; # 第 1 引数 movl $(__KERNEL_DS),%edx; movl %dx,%ds; movl %dx,%es; movl %esp, %ebx; andl $-8192, %ebx; cmpl $(NR_syscalls),%eax jae badsys testb $0x20,flags(%ebx) # PF_TRACESYS jne tracesys call *SYMBOL_NAME(sys_call_table)(,%eax,4) # eax のシステムコールに対応する # 関数を呼ぶ movl %eax,EAX(%esp) # スタック上のeaxに返り値設定 popl %ebx; popl %ecx; popl %edx; popl %esi; popl %edi; popl %ebp; popl %eax; # 返り値設定済み popl %ds; popl %es; addl $4,%esp; # 最初の pushl %eax の分を捨てる iret; static int proc_pid_syscall(struct task_struct *task, char *buffer) { long nr; unsigned long args[6], sp, pc; int res = lock_trace(task); if (res) return res; if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) res = sprintf(buffer, "running\n"); else if (nr < 0) res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); else res = sprintf(buffer, "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", nr, args[0], args[1], args[2], args[3], args[4], args[5], sp, pc); unlock_trace(task); return res; } int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc) { long state; unsigned long ncsw; if (unlikely(maxargs > 6)) return -EINVAL; if (target == current) return collect_syscall(target, callno, args, maxargs, sp, pc); state = target->state; if (unlikely(!state)) return -EAGAIN; ncsw = wait_task_inactive(target, state); if (unlikely(!ncsw) || unlikely(collect_syscall(target, callno, args, maxargs, sp, pc)) || unlikely(wait_task_inactive(target, state) != ncsw)) return -EAGAIN; return 0; } static int collect_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc) { struct pt_regs *regs = task_pt_regs(target); if (unlikely(!regs)) return -EAGAIN; *sp = user_stack_pointer(regs); *pc = instruction_pointer(regs); *callno = syscall_get_nr(target, regs); if (*callno != -1L && maxargs > 0) syscall_get_arguments(target, regs, 0, maxargs, args); return 0; } static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { return regs->orig_ax; } struct pt_regs { unsigned long bx; unsigned long cx; unsigned long dx; unsigned long si; unsigned long di; unsigned long bp; unsigned long ax; unsigned long ds; unsigned long es; unsigned long fs; unsigned long gs; unsigned long orig_ax; unsigned long ip; unsigned long cs; unsigned long flags; unsigned long sp; unsigned long ss; }; static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, unsigned int i, unsigned int n, unsigned long *args) { BUG_ON(i + n > 6); memcpy(args, ®s->bx + i, n * sizeof(args[0])); }
補足
/proc/pid配下のdentryのコールバックです。/procをreaddirすると、namespace下のプロセスを取得し、のプロセスIDをdentry名とするdentryキャッシュ取得します。この時に.proc/pidのreaddirコールバックでtid_base_stuff[]とするキャッシュファイルが作成され、syscallはproc_pid_syscallがfile_operationsコールバックとなります。struct pid_entry { char *name; int len; umode_t mode; const struct inode_operations *iop; const struct file_operations *fop; union proc_op op; }; static const struct pid_entry tid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), REG("environ", S_IRUSR, proc_environ_operations), INF("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUGO, proc_pid_personality), INF("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK INF("syscall", S_IRUGO, proc_pid_syscall), #endif INF("cmdline", S_IRUGO, proc_pid_cmdline), ONE("stat", S_IRUGO, proc_tid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_maps_operations), #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, proc_numa_maps_operations), #endif REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), LNK("cwd", proc_cwd_link), LNK("root", proc_root_link), LNK("exe", proc_exe_link), REG("mounts", S_IRUGO, proc_mounts_operations), REG("mountinfo", S_IRUGO, proc_mountinfo_operations), #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_smaps_operations), REG("pagemap", S_IRUGO, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif #ifdef CONFIG_KALLSYMS INF("wchan", S_IRUGO, proc_pid_wchan), #endif #ifdef CONFIG_STACKTRACE ONE("stack", S_IRUGO, proc_pid_stack), #endif #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET REG("cpuset", S_IRUGO, proc_cpuset_operations), #endif #ifdef CONFIG_CGROUPS REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING INF("io", S_IRUSR, proc_tid_io_accounting), #endif #ifdef CONFIG_HARDWALL INF("hardwall", S_IRUGO, proc_pid_hardwall), #endif };