eventfd
Rev.4を表示中。最新版はこちら。
eventfdは、実ファイルを有さないバッファによるpipeに係る操作に相当し、eventfdバッファは、pipeバッファのchar *でなくuint64_t file->private_data->countの整数。pipe多重書き込みはchar *の追加で、eventfd多重書き込みはuint64_tの加算とする。読込はfile->private_data->countの減算で0なら減算できない故、書込まれcount値が0でなくなるまでウエイトする。EFD_NONBLOCKならpipeと同様countが0でもreadでウエイトしない。デフォルトの読込はfile->private_data->count値で、読込後はfile->private_data->count=0となる。EFD_SEMAPHOREでの読込は1とし、運用上file->private_data->countはread可能回数となる。
サンプル
[root@north eventfd]# cat semaphore.c
#include <sys/eventfd.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
void event_read(int efd, int init_value, int write_value);
int main(int argc, char *argv[])
{
int efd;
uint64_t u;
if (!strcmp(argv[2], "EFD_SEMAPHORE")) {
efd = eventfd(3, EFD_SEMAPHORE | EFD_NONBLOCK);
}
if (!strcmp(argv[2], "NO_EFD_SEMAPHORE")) {
efd = eventfd(3, EFD_NONBLOCK);
}
u = atoi(argv[1]);
if (u) {
write(efd, &u, sizeof(uint64_t));
}
event_read(efd, 3, u);
close(efd);
}
void event_read(int efd, int init_value, int write_value)
{
uint64_t u;
int ret, cnt = 0;
printf("%10s:%10s eventfd->count=%d+%d n",
"read cnt", "read value", init_value, write_value);
while (1) {
u = -1;
ret = read(efd, &u, sizeof(uint64_t));
if (ret != sizeof(uint64_t)) {
printf("%10d:%10d n", ++cnt, u);
break;
}
printf("%10d:%10d n", ++cnt, u);
}
printf("noread ret:%d n", ret);
}
[root@north eventfd]# ./semaphore.out 0 EFD_SEMAPHORE
read cnt:read value eventfd->count=3+0
1: 1
2: 1
3: 1
4: -1
noread ret:-1
[root@north eventfd]# ./semaphore.out 2 EFD_SEMAPHORE
read cnt:read value eventfd->count=3+2
1: 1
2: 1
3: 1
4: 1
5: 1
6: -1
noread ret:-1
[root@north eventfd]# ./semaphore.out 0 NO_EFD_SEMAPHORE
read cnt:read value eventfd->count=3+0
1: 3
2: -1
noread ret:-1
[root@north eventfd]# ./semaphore.out 2 NO_EFD_SEMAPHORE
read cnt:read value eventfd->count=3+2
1: 5
2: -1
noread ret:-1
BLOCK
デフォルトのBLOCKは、readfile->private_data->count=0ならread()はfile->private_data->count!=0となるまでウエイト。EFD_NONBLOCKでなら、readfile->private_data->count=0でもread()はfile->private_data->count!=0でもウエイトしない。
[root@north eventfd]# cat noblock.c
#include <sys/eventfd.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
void event_read(int efd);
int main(int argc, char *argv[])
{
int efd;
uint64_t u;
if (argc == 1) {
printf("BLOCK n");
printf("NONBLOCK n");
exit(0);
}
if (!strcmp(argv[1], "BLOCK")) {
efd = eventfd(0, 0);
}
if (!strcmp(argv[1], "NONBLOCK")) {
efd = eventfd(0, EFD_NONBLOCK);
}
if (fork()) {
sleep(1);
printf("parent write start n");
u = 11;
int s = write(efd, &u, sizeof(uint64_t));
printf("write:%d n", u);
}
else {
printf("child read start n");
u = 0;
read(efd, &u, sizeof(uint64_t));
printf("read :%d n", u);
}
close(efd);
}
[root@north eventfd]# ./noblock.out NONBLOCK child read start read :0 parent write start write:11 [root@north eventfd]# ./noblock.out BLOCK child read start parent write start write:11 read :11
カーネル
struct eventfd_ctx {
struct kref kref;
wait_queue_head_t wqh;
__u64 count; <- pipeバッファに相当
unsigned int flags;
} priv;
struct file {
:
void *private_data;
:
};
static const struct file_operations eventfd_fops = {
.release = eventfd_release,
.poll = eventfd_poll,
.read = eventfd_read,
.write = eventfd_write,
.llseek = noop_llseek,
};
current->files[efd]=struct file *efile:
__u64 efile->private_data->count; バッファ
SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
{
int fd, error;
struct file *file;
error = get_unused_fd_flags(flags & EFD_SHARED_FCNTL_FLAGS);
if (error < 0)
return error;
fd = error;
file = eventfd_file_create(count, flags);
if (IS_ERR(file)) {
error = PTR_ERR(file);
goto err_put_unused_fd;
}
fd_install(fd, file);
return fd;
err_put_unused_fd:
put_unused_fd(fd);
return error;
}
struct file *eventfd_file_create(unsigned int count, int flags)
{
struct file *file;
struct eventfd_ctx *ctx;
BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
if (flags & ~EFD_FLAGS_SET)
return ERR_PTR(-EINVAL);
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return ERR_PTR(-ENOMEM);
kref_init(&ctx->kref);
init_waitqueue_head(&ctx->wqh);
ctx->count = count; <- 初期値は引数のcountのeventfdのread/writeバッファ
ctx->flags = flags;
file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx,
O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
if (IS_ERR(file))
eventfd_free_ctx(ctx);
return file;
}
struct file *anon_inode_getfile(const char *name,
const struct file_operations *fops,
void *priv, int flags)
{
struct qstr this;
struct path path;
struct file *file;
int error;
if (IS_ERR(anon_inode_inode))
return ERR_PTR(-ENODEV);
if (fops->owner && !try_module_get(fops->owner))
return ERR_PTR(-ENOENT);
error = -ENOMEM;
this.name = name;
this.len = strlen(name);
this.hash = 0;
path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
if (!path.dentry)
goto err_module;
path.mnt = mntget(anon_inode_mnt);
ihold(anon_inode_inode);
d_instantiate(path.dentry, anon_inode_inode);
error = -ENFILE;
file = alloc_file(&path, OPEN_FMODE(flags), fops);
if (!file)
goto err_dput;
file->f_mapping = anon_inode_inode->i_mapping;
file->f_pos = 0;
file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
file->f_version = 0;
file->private_data = priv; <--- priv = struct eventfd_ctx *ctx
return file;
err_dput:
path_put(&path);
err_module:
module_put(fops->owner);
return ERR_PTR(error);
}
#define ULLONG_MAX (~0ULL)
file->f_op = eventfd_fops;
static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
loff_t *ppos)
{
struct eventfd_ctx *ctx = file->private_data;
ssize_t res;
__u64 ucnt;
DECLARE_WAITQUEUE(wait, current);
if (count < sizeof(ucnt)) <- 書き込みサイズ8バイト以下ならエラー
return -EINVAL;
if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
return -EFAULT;
if (ucnt == ULLONG_MAX) <- 書き込み値が8バイトの最大値ならエラー
return -EINVAL;
spin_lock_irq(&ctx->wqh.lock);
res = -EAGAIN;
if (ULLONG_MAX - ctx->count > ucnt) <- 書き込み済みctx->countへの書込み最大値のチェックでO_BLOCKのULLONG_MAXならウエイト
res = sizeof(ucnt);
else if (!(file->f_flags & O_NONBLOCK)) {
__add_wait_queue(&ctx->wqh, &wait);
for (res = 0;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (ULLONG_MAX - ctx->count > ucnt) {
res = sizeof(ucnt);
break;
}
if (signal_pending(current)) {
res = -ERESTARTSYS;
break;
}
spin_unlock_irq(&ctx->wqh.lock);
schedule();
spin_lock_irq(&ctx->wqh.lock);
}
__remove_wait_queue(&ctx->wqh, &wait);
__set_current_state(TASK_RUNNING);
}
if (likely(res > 0)) {
ctx->count += ucnt; <- 引数データ値の書き込み
if (waitqueue_active(&ctx->wqh))
wake_up_locked_poll(&ctx->wqh, POLLIN);
}
spin_unlock_irq(&ctx->wqh.lock);
return res;
}
static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
struct eventfd_ctx *ctx = file->private_data;
ssize_t res;
__u64 cnt;
if (count < sizeof(cnt))
return -EINVAL;
res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
if (res < 0)
return res;
return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt);
}
ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
{
ssize_t res;
DECLARE_WAITQUEUE(wait, current);
spin_lock_irq(&ctx->wqh.lock);
*cnt = 0;
res = -EAGAIN;
if (ctx->count > 0)
res = 0;
else if (!no_wait) {
__add_wait_queue(&ctx->wqh, &wait);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (ctx->count > 0) { <- ctx->count=0ならschedule()でsignal送信されるまでウエイト
res = 0;
break;
}
if (signal_pending(current)) {
res = -ERESTARTSYS;
break;
}
spin_unlock_irq(&ctx->wqh.lock);
schedule();
spin_lock_irq(&ctx->wqh.lock);
}
__remove_wait_queue(&ctx->wqh, &wait);
__set_current_state(TASK_RUNNING);
}
if (likely(res == 0)) {
eventfd_ctx_do_read(ctx, cnt);
if (waitqueue_active(&ctx->wqh))
wake_up_locked_poll(&ctx->wqh, POLLOUT);
}
spin_unlock_irq(&ctx->wqh.lock);
return res;
}
static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
{
*cnt = (ctx->flags & EFD_SEMAPHORE) ? 1: ctx->count; <-EFD_SEMAPHORE:読込値はctx->count。NO_EFD_SEMAPHORE:読込値は1
ctx->count -= *cnt;
}





