UNIXソケットバッファサイズ


sk->sk_sndbufに送信バッファサイズ、sk->sk_rcvbufに受信バッファサイズが設定され、unixソケットではDGRAMだとsk_sndbufサイズ以下のデータしか送信できません。STREAMではsk_sndbufサイズ以上のデータは、sk_sndbufサイズ毎に繰り返し送信されます。(ただし送信したデータがアクセスされないとウエイトします。)なお、UNIXソケットでは、受信バッファサイズにかかる実装はされていません。

/proc/sys/net/coreコールバック。
static struct ctl_table net_core_table[] = {
#ifdef CONFIG_NET
       {
               .procname       = "wmem_max",
               .data           = &sysctl_wmem_max,
               .maxlen         = sizeof(int),
               .mode           = 0644,
               .proc_handler   = proc_dointvec
       },
       {
               .procname       = "rmem_max",
               .data           = &sysctl_rmem_max,
               .maxlen         = sizeof(int),
               .mode           = 0644,
               .proc_handler   = proc_dointvec
       },
       {
               .procname       = "wmem_default",
               .data           = &sysctl_wmem_default,
               .maxlen         = sizeof(int),
               .mode           = 0644,
               .proc_handler   = proc_dointvec
       },
       {
               .procname       = "rmem_default",
               .data           = &sysctl_rmem_default,
               .maxlen         = sizeof(int),
               .mode           = 0644,
               .proc_handler   = proc_dointvec
       },
 :
};
初期送信受信バッファサイズはsysctl_wmem_default/sysctl_rmem_defaultで、サイズは設定値の2倍とし、送信範囲:SOCK_MIN_SNDBUF〜sysctl_wmem_max/受信範囲:SOCK_MIN_RCVBUF〜sysctl_rmem_maxtonとなります。
#define _SK_MEM_PACKETS         256
#define _SK_MEM_OVERHEAD        SKB_TRUESIZE(256)
#define SK_WMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
#define SK_RMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)

#define SOCK_MIN_SNDBUF 2048
#define SOCK_MIN_RCVBUF (2048 + sizeof(struct sk_buff))

__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;

void __init sk_init(void)
{
       if (totalram_pages <= 4096) {
               sysctl_wmem_max = 32767;
               sysctl_rmem_max = 32767;
               sysctl_wmem_default = 32767;
               sysctl_rmem_default = 32767;
       } else if (totalram_pages >= 131072) {
               sysctl_wmem_max = 131071;
               sysctl_rmem_max = 131071;
       }
}

void sock_init_data(struct socket *sock, struct sock *sk)
{
       skb_queue_head_init(&sk->sk_receive_queue);
       skb_queue_head_init(&sk->sk_write_queue);
       skb_queue_head_init(&sk->sk_error_queue);
#ifdef CONFIG_NET_DMA
       skb_queue_head_init(&sk->sk_async_wait_queue);
#endif

       sk->sk_send_head        =       NULL;

       init_timer(&sk->sk_timer);

       sk->sk_allocation       =       GFP_KERNEL;
       sk->sk_rcvbuf           =       sysctl_rmem_default;
       sk->sk_sndbuf           =       sysctl_wmem_default;
       sk->sk_state            =       TCP_CLOSE;
       sk_set_socket(sk, sock);

       sock_set_flag(sk, SOCK_ZAPPED);

       if (sock) {
               sk->sk_type     =       sock->type;
               sk->sk_wq       =       sock->wq;
               sock->sk        =       sk;
       } else
               sk->sk_wq       =       NULL;

       spin_lock_init(&sk->sk_dst_lock);
       rwlock_init(&sk->sk_callback_lock);
       lockdep_set_class_and_name(&sk->sk_callback_lock,
                       af_callback_keys + sk->sk_family,
                       af_family_clock_key_strings[sk->sk_family]);

       sk->sk_state_change     =       sock_def_wakeup;
       sk->sk_data_ready       =       sock_def_readable;
       sk->sk_write_space      =       sock_def_write_space;
       sk->sk_error_report     =       sock_def_error_report;
       sk->sk_destruct         =       sock_def_destruct;

       sk->sk_sndmsg_page      =       NULL;
       sk->sk_sndmsg_off       =       0;

       sk->sk_peer_pid         =       NULL;
       sk->sk_peer_cred        =       NULL;
       sk->sk_write_pending    =       0;
       sk->sk_rcvlowat         =       1;
       sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
       sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;

       sk->sk_stamp = ktime_set(-1L, 0);

       smp_wmb();
       atomic_set(&sk->sk_refcnt, 1);
       atomic_set(&sk->sk_drops, 0);
}
level=SOL_SOCKETなら、ジェネリックなsocketの属性値とするsock_setsockopt()で、そうでないならソケットコールバック.setsockoptによる実装となります。UNIXソケットは.setsockoptは実装されておらずエラーとなります。
#define SOL_SOCKET      1

SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
               char __user *, optval, int, optlen)
{
       int err, fput_needed;
       struct socket *sock;

       if (optlen < 0)
               return -EINVAL;

       sock = sockfd_lookup_light(fd, &err, &fput_needed);
       if (sock != NULL) {
               err = security_socket_setsockopt(sock, level, optname);
               if (err)
                       goto out_put;

               if (level == SOL_SOCKET)
                       err =
                           sock_setsockopt(sock, level, optname, optval,
                                           optlen);
               else
                       err =
                           sock->ops->setsockopt(sock, level, optname, optval,
                                                 optlen);
out_put:
               fput_light(sock->file, fput_needed);
       }
       return err;
}

int sock_setsockopt(struct socket *sock, int level, int optname,
                   char __user *optval, unsigned int optlen)
{
       struct sock *sk = sock->sk;
       int val;
       int valbool;
       struct linger ling;
       int ret = 0;

       if (optname == SO_BINDTODEVICE)
               return sock_bindtodevice(sk, optval, optlen);

       if (optlen < sizeof(int))
               return -EINVAL;

       if (get_user(val, (int __user *)optval))
               return -EFAULT;

       valbool = val ? 1 : 0;

       lock_sock(sk);

       switch (optname) {
       case SO_DEBUG:
               if (val && !capable(CAP_NET_ADMIN))
                       ret = -EACCES;
               else
                       sock_valbool_flag(sk, SOCK_DBG, valbool);
               break;
       case SO_REUSEADDR:
               sk->sk_reuse = valbool;
               break;
       case SO_TYPE:
       case SO_PROTOCOL:
       case SO_DOMAIN:
       case SO_ERROR:
               ret = -ENOPROTOOPT;
               break;
       case SO_DONTROUTE:
               sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
               break;
       case SO_BROADCAST:
               sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
               break;
       case SO_SNDBUF:
               if (val > sysctl_wmem_max)
                       val = sysctl_wmem_max;
set_sndbuf:
               sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
               if ((val * 2) < SOCK_MIN_SNDBUF)
                       sk->sk_sndbuf = SOCK_MIN_SNDBUF;
               else
                       sk->sk_sndbuf = val * 2;
               sk->sk_write_space(sk);
               break;
       case SO_SNDBUFFORCE:
               if (!capable(CAP_NET_ADMIN)) {
                       ret = -EPERM;
                       break;
               }
               goto set_sndbuf;
       case SO_RCVBUF:
               if (val > sysctl_rmem_max)
                       val = sysctl_rmem_max;
set_rcvbuf:
               sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
               if ((val * 2) < SOCK_MIN_RCVBUF)
                       sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
               else
                       sk->sk_rcvbuf = val * 2;
               break;
       case SO_RCVBUFFORCE:
               if (!capable(CAP_NET_ADMIN)) {
                       ret = -EPERM;
                       break;
               }
               goto set_rcvbuf;
       case SO_KEEPALIVE:
#ifdef CONFIG_INET
               if (sk->sk_protocol == IPPROTO_TCP)
                       tcp_set_keepalive(sk, valbool);
#endif
               sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
               break;
       case SO_OOBINLINE:
               sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
               break;
       case SO_NO_CHECK:
               sk->sk_no_check = valbool;
               break;
       case SO_PRIORITY:
               if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
                       sk->sk_priority = val;
               else
                       ret = -EPERM;
               break;
       case SO_LINGER:
               if (optlen < sizeof(ling)) {
                       ret = -EINVAL;  /* 1003.1g */
                       break;
               }
               if (copy_from_user(&ling, optval, sizeof(ling))) {
                       ret = -EFAULT;
                       break;
               }
               if (!ling.l_onoff)
                       sock_reset_flag(sk, SOCK_LINGER);
               else {
#if (BITS_PER_LONG == 32)
                       if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
                               sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
                       else
#endif
                               sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
                       sock_set_flag(sk, SOCK_LINGER);
               }
               break;
       case SO_BSDCOMPAT:
               sock_warn_obsolete_bsdism("setsockopt");
               break;
       case SO_PASSCRED:
               if (valbool)
                       set_bit(SOCK_PASSCRED, &sock->flags);
               else
                       clear_bit(SOCK_PASSCRED, &sock->flags);
               break;
       case SO_TIMESTAMP:
       case SO_TIMESTAMPNS:
               if (valbool)  {
                       if (optname == SO_TIMESTAMP)
                               sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
                       else
                               sock_set_flag(sk, SOCK_RCVTSTAMPNS);
                       sock_set_flag(sk, SOCK_RCVTSTAMP);
                       sock_enable_timestamp(sk, SOCK_TIMESTAMP);
               } else {
                       sock_reset_flag(sk, SOCK_RCVTSTAMP);
                       sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
               }
               break;
       case SO_TIMESTAMPING:
               if (val & ~SOF_TIMESTAMPING_MASK) {
                       ret = -EINVAL;
                       break;
               }
               sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
                                 val & SOF_TIMESTAMPING_TX_HARDWARE);
               sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
                                 val & SOF_TIMESTAMPING_TX_SOFTWARE);
               sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
                                 val & SOF_TIMESTAMPING_RX_HARDWARE);
               if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
                       sock_enable_timestamp(sk,
                                             SOCK_TIMESTAMPING_RX_SOFTWARE);
               else
                       sock_disable_timestamp(sk,
                                              (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
               sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
                                 val & SOF_TIMESTAMPING_SOFTWARE);
               sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
                                 val & SOF_TIMESTAMPING_SYS_HARDWARE);
               sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
                                 val & SOF_TIMESTAMPING_RAW_HARDWARE);
               break;
       case SO_RCVLOWAT:
               if (val < 0)
                       val = INT_MAX;
               sk->sk_rcvlowat = val ? : 1;
               break;
       case SO_RCVTIMEO:
               ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
               break;
       case SO_SNDTIMEO:
               ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
               break;
       case SO_ATTACH_FILTER:
               ret = -EINVAL;
               if (optlen == sizeof(struct sock_fprog)) {
                       struct sock_fprog fprog;

                       ret = -EFAULT;
                       if (copy_from_user(&fprog, optval, sizeof(fprog)))
                               break;

                       ret = sk_attach_filter(&fprog, sk);
               }
               break;
       case SO_DETACH_FILTER:
               ret = sk_detach_filter(sk);
               break;
       case SO_PASSSEC:
               if (valbool)
                       set_bit(SOCK_PASSSEC, &sock->flags);
               else
                       clear_bit(SOCK_PASSSEC, &sock->flags);
               break;
       case SO_MARK:
               if (!capable(CAP_NET_ADMIN))
                       ret = -EPERM;
               else
                       sk->sk_mark = val;
               break;
       case SO_RXQ_OVFL:
               sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
               break;
       case SO_WIFI_STATUS:
               sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
               break;
       default:
               ret = -ENOPROTOOPT;
               break;
       }
       release_sock(sk);
       return ret;
}
検証サンプル
[root@localhost c]# cat socketsize.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>

void    set_buffsize(int sockfd, int optname, int buff_size);
void main() {

       int   s_socket, d_socket;
       int val[2];
       int len = sizeof(sizeof(int));

       s_socket = socket(AF_UNIX, SOCK_STREAM, 0);
       d_socket = socket(AF_UNIX, SOCK_DGRAM, 0);

       len = sizeof(sizeof(int));
       getsockopt(d_socket, SOL_SOCKET, SO_RCVBUF, &val[0], &len);
       len = sizeof(sizeof(int));
       getsockopt(d_socket, SOL_SOCKET, SO_SNDBUF, &val[1], &len);

       printf("DGRAM INIT\n");
       printf(" rcvbuf:%6d\n",val[0]);
       printf(" sndbuf:%6d\n",val[1]);

       len = sizeof(sizeof(int));
       getsockopt(s_socket, SOL_SOCKET, SO_RCVBUF, &val[0], &len);
       len = sizeof(sizeof(int));
       getsockopt(s_socket, SOL_SOCKET, SO_SNDBUF, &val[1], &len);
       printf("STREAM INIT\n");
       printf(" rcvbuf:%6d\n",val[0]);
       printf(" sndbuf:%6d\n",val[1]);

       printf("DGRAM UPDATE\n");
       set_buffsize(d_socket, SO_RCVBUF, 0);
       set_buffsize(d_socket, SO_SNDBUF, 0);
       set_buffsize(d_socket, SO_RCVBUF, 3000);
       set_buffsize(d_socket, SO_SNDBUF, 3000);
       set_buffsize(d_socket, SO_RCVBUF, 163840);
       set_buffsize(d_socket, SO_SNDBUF, 163840)
       set_buffsize(d_socket, SO_RCVBUF, 200000);
       set_buffsize(d_socket, SO_SNDBUF, 200000);

       printf("STREAM UPDATE\n");
       set_buffsize(s_socket, SO_RCVBUF, 0);
       set_buffsize(s_socket, SO_SNDBUF, 0);
       set_buffsize(s_socket, SO_RCVBUF, 3000);
       set_buffsize(s_socket, SO_SNDBUF, 3000);
       set_buffsize(s_socket, SO_RCVBUF, 163840);
       set_buffsize(s_socket, SO_SNDBUF, 163840);
       set_buffsize(s_socket, SO_RCVBUF, 200000);
       set_buffsize(s_socket, SO_SNDBUF, 200000);

       close(s_socket);
       close(d_socket);
}

void    set_buffsize(int socket, int optname, int buf_size)
{
       int     val;
       int len = sizeof(sizeof(int));
       char    *name;

       val = buf_size;
       setsockopt(socket, SOL_SOCKET, optname, (char *)&val, len);
       getsockopt(socket, SOL_SOCKET, optname, &val, &len);
       name =  (optname == SO_RCVBUF)? "rcvbuf": "sndbuf";
       printf(" %s:%6d by(%6d)\n",name, val, buf_size);
}
検証結果
[root@localhost c]# cat /proc/sys/net/core/wmem_max
163840
[root@localhost c]# cat /proc/sys/net/core/rmem_max
163840
[root@localhost c]# cat /proc/sys/net/core/wmem_default
163840
[root@localhost c]# cat /proc/sys/net/core/rmem_default
163840

[root@localhost c]# ./socketsize.o
DGRAM INIT
 rcvbuf:163840
 sndbuf:163840
STREAM INIT
 rcvbuf:163840
 sndbuf:163840
DGRAM UPDATE
 rcvbuf:  2240 by(     0)
 sndbuf:  2048 by(     0)
 rcvbuf:  6000 by(  3000)
 sndbuf:  6000 by(  3000)
 rcvbuf:327680 by(163840)
 sndbuf:327680 by(163840)
 rcvbuf:327680 by(200000)
 sndbuf:327680 by(200000)
STREAM UPDATE
 rcvbuf:  2240 by(     0)
 sndbuf:  2048 by(     0)
 rcvbuf:  6000 by(  3000)
 sndbuf:  6000 by(  3000)
 rcvbuf:327680 by(163840)
 sndbuf:327680 by(163840)
 rcvbuf:327680 by(200000)
 sndbuf:327680 by(200000)


最終更新 2016/09/28 13:02:08 - north
(2016/09/28 13:02:08 作成)


検索

アクセス数
3575119
最近のコメント
コアダンプファイル - sakaia
list_head構造体 - yocto_no_yomikata
勧告ロックと強制ロック - wataash
LKMからのファイル出力 - 重松 宏昌
kprobe - ななし
ksetの実装 - スーパーコピー
カーネルスレッドとは - ノース
カーネルスレッドとは - nbyst
asmlinkageってなに? - ノース
asmlinkageってなに? - よろしく
Adsense
広告情報が設定されていません。