UNIXソケットバッファサイズ
sk->sk_sndbufに送信バッファサイズ、sk->sk_rcvbufに受信バッファサイズが設定され、unixソケットではDGRAMだとsk_sndbufサイズ以下のデータしか送信できません。STREAMではsk_sndbufサイズ以上のデータは、sk_sndbufサイズ毎に繰り返し送信されます。(ただし送信したデータがアクセスされないとウエイトします。)なお、UNIXソケットでは、受信バッファサイズにかかる実装はされていません。
/proc/sys/net/coreコールバック。
/proc/sys/net/coreコールバック。
static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET { .procname = "wmem_max", .data = &sysctl_wmem_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "rmem_max", .data = &sysctl_rmem_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "wmem_default", .data = &sysctl_wmem_default, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "rmem_default", .data = &sysctl_rmem_default, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, : };初期送信受信バッファサイズはsysctl_wmem_default/sysctl_rmem_defaultで、サイズは設定値の2倍とし、送信範囲:SOCK_MIN_SNDBUF〜sysctl_wmem_max/受信範囲:SOCK_MIN_RCVBUF〜sysctl_rmem_maxtonとなります。
#define _SK_MEM_PACKETS 256 #define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) #define SOCK_MIN_SNDBUF 2048 #define SOCK_MIN_RCVBUF (2048 + sizeof(struct sk_buff)) __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; void __init sk_init(void) { if (totalram_pages <= 4096) { sysctl_wmem_max = 32767; sysctl_rmem_max = 32767; sysctl_wmem_default = 32767; sysctl_rmem_default = 32767; } else if (totalram_pages >= 131072) { sysctl_wmem_max = 131071; sysctl_rmem_max = 131071; } } void sock_init_data(struct socket *sock, struct sock *sk) { skb_queue_head_init(&sk->sk_receive_queue); skb_queue_head_init(&sk->sk_write_queue); skb_queue_head_init(&sk->sk_error_queue); #ifdef CONFIG_NET_DMA skb_queue_head_init(&sk->sk_async_wait_queue); #endif sk->sk_send_head = NULL; init_timer(&sk->sk_timer); sk->sk_allocation = GFP_KERNEL; sk->sk_rcvbuf = sysctl_rmem_default; sk->sk_sndbuf = sysctl_wmem_default; sk->sk_state = TCP_CLOSE; sk_set_socket(sk, sock); sock_set_flag(sk, SOCK_ZAPPED); if (sock) { sk->sk_type = sock->type; sk->sk_wq = sock->wq; sock->sk = sk; } else sk->sk_wq = NULL; spin_lock_init(&sk->sk_dst_lock); rwlock_init(&sk->sk_callback_lock); lockdep_set_class_and_name(&sk->sk_callback_lock, af_callback_keys + sk->sk_family, af_family_clock_key_strings[sk->sk_family]); sk->sk_state_change = sock_def_wakeup; sk->sk_data_ready = sock_def_readable; sk->sk_write_space = sock_def_write_space; sk->sk_error_report = sock_def_error_report; sk->sk_destruct = sock_def_destruct; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; sk->sk_peer_pid = NULL; sk->sk_peer_cred = NULL; sk->sk_write_pending = 0; sk->sk_rcvlowat = 1; sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_stamp = ktime_set(-1L, 0); smp_wmb(); atomic_set(&sk->sk_refcnt, 1); atomic_set(&sk->sk_drops, 0); }level=SOL_SOCKETなら、ジェネリックなsocketの属性値とするsock_setsockopt()で、そうでないならソケットコールバック.setsockoptによる実装となります。UNIXソケットは.setsockoptは実装されておらずエラーとなります。
#define SOL_SOCKET 1 SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, char __user *, optval, int, optlen) { int err, fput_needed; struct socket *sock; if (optlen < 0) return -EINVAL; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock != NULL) { err = security_socket_setsockopt(sock, level, optname); if (err) goto out_put; if (level == SOL_SOCKET) err = sock_setsockopt(sock, level, optname, optval, optlen); else err = sock->ops->setsockopt(sock, level, optname, optval, optlen); out_put: fput_light(sock->file, fput_needed); } return err; } int sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { struct sock *sk = sock->sk; int val; int valbool; struct linger ling; int ret = 0; if (optname == SO_BINDTODEVICE) return sock_bindtodevice(sk, optval, optlen); if (optlen < sizeof(int)) return -EINVAL; if (get_user(val, (int __user *)optval)) return -EFAULT; valbool = val ? 1 : 0; lock_sock(sk); switch (optname) { case SO_DEBUG: if (val && !capable(CAP_NET_ADMIN)) ret = -EACCES; else sock_valbool_flag(sk, SOCK_DBG, valbool); break; case SO_REUSEADDR: sk->sk_reuse = valbool; break; case SO_TYPE: case SO_PROTOCOL: case SO_DOMAIN: case SO_ERROR: ret = -ENOPROTOOPT; break; case SO_DONTROUTE: sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); break; case SO_BROADCAST: sock_valbool_flag(sk, SOCK_BROADCAST, valbool); break; case SO_SNDBUF: if (val > sysctl_wmem_max) val = sysctl_wmem_max; set_sndbuf: sk->sk_userlocks |= SOCK_SNDBUF_LOCK; if ((val * 2) < SOCK_MIN_SNDBUF) sk->sk_sndbuf = SOCK_MIN_SNDBUF; else sk->sk_sndbuf = val * 2; sk->sk_write_space(sk); break; case SO_SNDBUFFORCE: if (!capable(CAP_NET_ADMIN)) { ret = -EPERM; break; } goto set_sndbuf; case SO_RCVBUF: if (val > sysctl_rmem_max) val = sysctl_rmem_max; set_rcvbuf: sk->sk_userlocks |= SOCK_RCVBUF_LOCK; if ((val * 2) < SOCK_MIN_RCVBUF) sk->sk_rcvbuf = SOCK_MIN_RCVBUF; else sk->sk_rcvbuf = val * 2; break; case SO_RCVBUFFORCE: if (!capable(CAP_NET_ADMIN)) { ret = -EPERM; break; } goto set_rcvbuf; case SO_KEEPALIVE: #ifdef CONFIG_INET if (sk->sk_protocol == IPPROTO_TCP) tcp_set_keepalive(sk, valbool); #endif sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); break; case SO_OOBINLINE: sock_valbool_flag(sk, SOCK_URGINLINE, valbool); break; case SO_NO_CHECK: sk->sk_no_check = valbool; break; case SO_PRIORITY: if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) sk->sk_priority = val; else ret = -EPERM; break; case SO_LINGER: if (optlen < sizeof(ling)) { ret = -EINVAL; /* 1003.1g */ break; } if (copy_from_user(&ling, optval, sizeof(ling))) { ret = -EFAULT; break; } if (!ling.l_onoff) sock_reset_flag(sk, SOCK_LINGER); else { #if (BITS_PER_LONG == 32) if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT; else #endif sk->sk_lingertime = (unsigned int)ling.l_linger * HZ; sock_set_flag(sk, SOCK_LINGER); } break; case SO_BSDCOMPAT: sock_warn_obsolete_bsdism("setsockopt"); break; case SO_PASSCRED: if (valbool) set_bit(SOCK_PASSCRED, &sock->flags); else clear_bit(SOCK_PASSCRED, &sock->flags); break; case SO_TIMESTAMP: case SO_TIMESTAMPNS: if (valbool) { if (optname == SO_TIMESTAMP) sock_reset_flag(sk, SOCK_RCVTSTAMPNS); else sock_set_flag(sk, SOCK_RCVTSTAMPNS); sock_set_flag(sk, SOCK_RCVTSTAMP); sock_enable_timestamp(sk, SOCK_TIMESTAMP); } else { sock_reset_flag(sk, SOCK_RCVTSTAMP); sock_reset_flag(sk, SOCK_RCVTSTAMPNS); } break; case SO_TIMESTAMPING: if (val & ~SOF_TIMESTAMPING_MASK) { ret = -EINVAL; break; } sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE, val & SOF_TIMESTAMPING_TX_HARDWARE); sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE, val & SOF_TIMESTAMPING_TX_SOFTWARE); sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE, val & SOF_TIMESTAMPING_RX_HARDWARE); if (val & SOF_TIMESTAMPING_RX_SOFTWARE) sock_enable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE); else sock_disable_timestamp(sk, (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE, val & SOF_TIMESTAMPING_SOFTWARE); sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE, val & SOF_TIMESTAMPING_SYS_HARDWARE); sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE, val & SOF_TIMESTAMPING_RAW_HARDWARE); break; case SO_RCVLOWAT: if (val < 0) val = INT_MAX; sk->sk_rcvlowat = val ? : 1; break; case SO_RCVTIMEO: ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen); break; case SO_SNDTIMEO: ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); break; case SO_ATTACH_FILTER: ret = -EINVAL; if (optlen == sizeof(struct sock_fprog)) { struct sock_fprog fprog; ret = -EFAULT; if (copy_from_user(&fprog, optval, sizeof(fprog))) break; ret = sk_attach_filter(&fprog, sk); } break; case SO_DETACH_FILTER: ret = sk_detach_filter(sk); break; case SO_PASSSEC: if (valbool) set_bit(SOCK_PASSSEC, &sock->flags); else clear_bit(SOCK_PASSSEC, &sock->flags); break; case SO_MARK: if (!capable(CAP_NET_ADMIN)) ret = -EPERM; else sk->sk_mark = val; break; case SO_RXQ_OVFL: sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); break; case SO_WIFI_STATUS: sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); break; default: ret = -ENOPROTOOPT; break; } release_sock(sk); return ret; }検証サンプル
[root@localhost c]# cat socketsize.c #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <sys/socket.h> void set_buffsize(int sockfd, int optname, int buff_size); void main() { int s_socket, d_socket; int val[2]; int len = sizeof(sizeof(int)); s_socket = socket(AF_UNIX, SOCK_STREAM, 0); d_socket = socket(AF_UNIX, SOCK_DGRAM, 0); len = sizeof(sizeof(int)); getsockopt(d_socket, SOL_SOCKET, SO_RCVBUF, &val[0], &len); len = sizeof(sizeof(int)); getsockopt(d_socket, SOL_SOCKET, SO_SNDBUF, &val[1], &len); printf("DGRAM INIT\n"); printf(" rcvbuf:%6d\n",val[0]); printf(" sndbuf:%6d\n",val[1]); len = sizeof(sizeof(int)); getsockopt(s_socket, SOL_SOCKET, SO_RCVBUF, &val[0], &len); len = sizeof(sizeof(int)); getsockopt(s_socket, SOL_SOCKET, SO_SNDBUF, &val[1], &len); printf("STREAM INIT\n"); printf(" rcvbuf:%6d\n",val[0]); printf(" sndbuf:%6d\n",val[1]); printf("DGRAM UPDATE\n"); set_buffsize(d_socket, SO_RCVBUF, 0); set_buffsize(d_socket, SO_SNDBUF, 0); set_buffsize(d_socket, SO_RCVBUF, 3000); set_buffsize(d_socket, SO_SNDBUF, 3000); set_buffsize(d_socket, SO_RCVBUF, 163840); set_buffsize(d_socket, SO_SNDBUF, 163840) set_buffsize(d_socket, SO_RCVBUF, 200000); set_buffsize(d_socket, SO_SNDBUF, 200000); printf("STREAM UPDATE\n"); set_buffsize(s_socket, SO_RCVBUF, 0); set_buffsize(s_socket, SO_SNDBUF, 0); set_buffsize(s_socket, SO_RCVBUF, 3000); set_buffsize(s_socket, SO_SNDBUF, 3000); set_buffsize(s_socket, SO_RCVBUF, 163840); set_buffsize(s_socket, SO_SNDBUF, 163840); set_buffsize(s_socket, SO_RCVBUF, 200000); set_buffsize(s_socket, SO_SNDBUF, 200000); close(s_socket); close(d_socket); } void set_buffsize(int socket, int optname, int buf_size) { int val; int len = sizeof(sizeof(int)); char *name; val = buf_size; setsockopt(socket, SOL_SOCKET, optname, (char *)&val, len); getsockopt(socket, SOL_SOCKET, optname, &val, &len); name = (optname == SO_RCVBUF)? "rcvbuf": "sndbuf"; printf(" %s:%6d by(%6d)\n",name, val, buf_size); }検証結果
[root@localhost c]# cat /proc/sys/net/core/wmem_max 163840 [root@localhost c]# cat /proc/sys/net/core/rmem_max 163840 [root@localhost c]# cat /proc/sys/net/core/wmem_default 163840 [root@localhost c]# cat /proc/sys/net/core/rmem_default 163840 [root@localhost c]# ./socketsize.o DGRAM INIT rcvbuf:163840 sndbuf:163840 STREAM INIT rcvbuf:163840 sndbuf:163840 DGRAM UPDATE rcvbuf: 2240 by( 0) sndbuf: 2048 by( 0) rcvbuf: 6000 by( 3000) sndbuf: 6000 by( 3000) rcvbuf:327680 by(163840) sndbuf:327680 by(163840) rcvbuf:327680 by(200000) sndbuf:327680 by(200000) STREAM UPDATE rcvbuf: 2240 by( 0) sndbuf: 2048 by( 0) rcvbuf: 6000 by( 3000) sndbuf: 6000 by( 3000) rcvbuf:327680 by(163840) sndbuf:327680 by(163840) rcvbuf:327680 by(200000) sndbuf:327680 by(200000)