mount

Rev.3を表示中。最新版はこちら。

mountはデバイスのスーパブロック情報およびマウント先dentry情報から、struct vfsmount *mntを作成し、マウント先dentryのd_mountをインクリメントし、struct vfsmount *mntの目的に応じたヘッドにリスト化する事にあります。

mount処理はdo_mount()がコールされます。まず、MS_MGC_VALをクリアしています。ver2.4はマジックNOを必要としていたらしく、今は必要としないと言う事のようです。次にデバイス名/マウント先の引数のチェックして、引数のフラグに応じて以下のマウントフラグを再設定します。
MNT_NOSUID:このファイルシステムでは、set UIDを使用できない。
MNT_NODEV:このファイルシステムでは、デバイスファイルにアクセスできない。
MNT_NOEXEC:このファイルシステムでは、プロセスを実行できない。

path_lookup()で、dir_nameのstruct nameidata ndを取得した後、MS_REMOUNT(再マウント)/MS_BIND(バインド)/MS_MOVE(マウント先移動)なら、それぞれdo_remount()/do_loopback()/do_move_mount()がコールされ、通常のマウントの場合、do_add_mount()がコールされる事になります。

long do_mount(char * dev_name, char * dir_name, char *type_page,
                 unsigned long flags, void *data_page)
{
       struct nameidata nd;
       int retval = 0;
       int mnt_flags = 0;

       if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
               flags &= ~MS_MGC_MSK;

       if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
               return -EINVAL;
       if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
               return -EINVAL;

       if (flags & MS_NOSUID)
               mnt_flags |= MNT_NOSUID;
       if (flags & MS_NODEV)
               mnt_flags |= MNT_NODEV;
       if (flags & MS_NOEXEC)
               mnt_flags |= MNT_NOEXEC;
       flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);

       retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
       if (retval)
               return retval;

       retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page);
       if (retval)
               goto dput_out;

       if (flags & MS_REMOUNT)
               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
                                   data_page);
       else if (flags & MS_BIND)
               retval = do_loopback(&nd, dev_name, flags & MS_REC);
       else if (flags & MS_MOVE)
               retval = do_move_mount(&nd, dev_name);
       else
               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
                                     dev_name, data_page);
dput_out:
       path_release(&nd);
       return retval;
}

do_add_mount()は通常のマウントを処理します。まずパラメータ/ケーパピリティチェックした後、do_kern_mount()でstruct vfsmountを取得します。

while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))の処理は、この時点で他のプロセスが別のファイルシステムとしてnd->dentryをマウントしているかもしれません。do_kern_mount()はスーパブロックを読込むため、スリープするかもしれないからです。もしマウントされていれば、follow_down()でマウント先をさかのぼっていっった先をマウント先としています。

check_mnt()は、別のネームスペース間で同じファイルシステムを使わせないためのチェックと思います。マウントディレクトリのマウントされているネームスペースが、マウントしようとしているプロセスのネームスペースと違うネームスペースだとエラーです。

if (nd->mnt->mnt_sb == mnt->mnt_sb && nd->mnt->mnt_root == nd->dentry)はファイルシステムが、すでにマウントされているディレクトリにマウントしようとしているかチェックします。

ここまできたら、graft_tree()で、struct vfsmountにstruct nameidataの情報を設定します。

static int do_add_mount(struct nameidata *nd, char *type, int flags,
                       int mnt_flags, char *name, void *data)
{
       struct vfsmount *mnt;
       int err;

       if (!type || !memchr(type, 0, PAGE_SIZE))
               return -EINVAL;

       if (!capable(CAP_SYS_ADMIN))
               return -EPERM;

       mnt = do_kern_mount(type, flags, name, data);
       err = PTR_ERR(mnt);
       if (IS_ERR(mnt))
               goto out;

       down_write(&current->namespace->sem);
       while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
               ;
       err = -EINVAL;
       if (!check_mnt(nd->mnt))
               goto unlock;

       err = -EBUSY;
       if (nd->mnt->mnt_sb == mnt->mnt_sb && nd->mnt->mnt_root == nd->dentry)
               goto unlock;

       mnt->mnt_flags = mnt_flags;
       err = graft_tree(mnt, nd);
unlock:
       up_write(&current->namespace->sem);
       mntput(mnt);
out:
       return err;
}

static inline int check_mnt(struct vfsmount *mnt) {

return mnt->mnt_namespace == current->namespace;

alloc_vfsmnt()でstruct vfsmountを取得した後、デバイスのスーパブロックを読込んで、このルートディレクトリ(通常は/)、そのdentryを設定します。なおmnt->mnt_parent = mntとし、この時点ではマウントの親子間の階層は設定しません。(graft_treeで行う。)

struct vfsmount *
do_kern_mount(const char *fstype, int flags, const char *name, void *data)
{
       struct file_system_type *type = get_fs_type(fstype);
       struct super_block *sb = ERR_PTR(-ENOMEM);
       struct vfsmount *mnt;
       int error;

       if (!type)
               return ERR_PTR(-ENODEV);

       mnt = alloc_vfsmnt(name);
       if (!mnt)
               goto out;
       sb = type->get_sb(type, flags, name, data);
       if (IS_ERR(sb))
               goto out_mnt;
       error = security_sb_kern_mount(sb);
       if (error) 
               goto out_sb;
       mnt->mnt_sb = sb;
       mnt->mnt_root = dget(sb->s_root);
       mnt->mnt_mountpoint = sb->s_root;
       mnt->mnt_parent = mnt;
       up_write(&sb->s_umount);
       put_filesystem(type);
       return mnt;
out_sb:
       up_write(&sb->s_umount);
       deactivate_super(sb);
       sb = ERR_PTR(error);
out_mnt:
       free_vfsmnt(mnt);
out:
       put_filesystem(type);
       return (struct vfsmount *)sb;
}

alloc_vfsmnt()はスラブから、struct vfsmount *mntを取得し、４つの目的に応じた各リスト、及びデバイス名で初期化します。

struct vfsmount *alloc_vfsmnt(const char *name)
{
       struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL); 
       if (mnt) {
               memset(mnt, 0, sizeof(struct vfsmount));
               atomic_set(&mnt->mnt_count,1);
               INIT_LIST_HEAD(&mnt->mnt_hash);
               INIT_LIST_HEAD(&mnt->mnt_child);
               INIT_LIST_HEAD(&mnt->mnt_mounts);
               INIT_LIST_HEAD(&mnt->mnt_list);
               if (name) {
                       int size = strlen(name)+1;
                       char *newname = kmalloc(size, GFP_KERNEL);
                       if (newname) {
                               memcpy(newname, name, size);
                               mnt->mnt_devname = newname;
                       }
               }
       }
       return mnt;
}

graft_tree()で、作成されたstruct vfsmountを、実際のマウント先にバインドし、目的に応じた項目のリスト化の処理を行います。

まずファイルシステムがユーザがマウントできるかどうかチェックします。(rootfsのように、システムとしてカーネルしかマウントできないファイルシステムもあります。)

マウント先がディレクトリかどうかのチェックを行います。mnt->mnt_rootは、do_kern_mount()でスーパブロックのルートのdentryが設定されています。これは/です。従って、S_ISDIR(mnt->mnt_root->d_inode->i_mode)はtrueです。でS_ISDIR(nd->dentry->d_inode->i_mode)もtrueとならなければなりません。nd->dentry->d_inode->i_modeはディレクトリでなければならないと言う事です。なんでこんな処理してんの？

この時点で、他のプロセスがマウントしようとしているディレクトリを削除しているかもしれないので、IS_DEADDIRマクロでチェックし、SELinuxのチェックをして、マウントディレクトリがルートかアンハッシュされていないと(ルートはアンハッシュされない。)、attach_mnt()でstruct vfsmountにマウントディレクトリのdentryにかかる設定を行います。そしてネームスペースにstruct vfsmountをリストします。

ここでのリストは、list_splice()でリストをリスト化する処理としています。do_add_mount()からの処理だと、mntは１つだけでリストしておらず、list_splice()での結合は冗長のように思われますが、ループバックでのマウントだと、すでに作成されているmntを複写することで、struct vfsmountを作成しており、従ってmntはリストされている事も在りうるからです。

static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
{
       int err;
       if (mnt->mnt_sb->s_flags & MS_NOUSER)
               return -EINVAL;

       if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
             S_ISDIR(mnt->mnt_root->d_inode->i_mode))
               return -ENOTDIR;

       err = -ENOENT;
       down(&nd->dentry->d_inode->i_sem);
       if (IS_DEADDIR(nd->dentry->d_inode))
               goto out_unlock;

       err = security_sb_check_sb(mnt, nd);
       if (err)
               goto out_unlock;

       err = -ENOENT;
       spin_lock(&vfsmount_lock);
       if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) {
               struct list_head head;

               attach_mnt(mnt, nd);
               list_add_tail(&head, &mnt->mnt_list);
               list_splice(&head, current->namespace->list.prev);
               mntget(mnt);
               err = 0;
       }
       spin_unlock(&vfsmount_lock);
out_unlock:
       up(&nd->dentry->d_inode->i_sem);
       if (!err)
               security_sb_post_addmount(mnt, nd);
       return err;
}

attach_mnt()は、struct vfsmountにマウントディレクトリのstruct nameidataに掛かる設定を行います。mnt->mnt_parentはマウントディレクトリのファイルシステムのstruct vfsmount、mnt->mnt_mountpointにマウントディレクトリを設定します。

そして、mount_hashtable[]にmnt->mnt_hashを、ネームスペースにmnt->mnt_childをリストして、最後にマウントディレクトリのdentry->d_mounted++とすることで、このディレクトリがマウントされている事が分かり、その時に応じたmntリストを辿ることで、マウントしているstruct vfsmountを取得する事ができるわけです。

static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd)
{
       mnt->mnt_parent = mntget(nd->mnt);
       mnt->mnt_mountpoint = dget(nd->dentry);
       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
       list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
       nd->dentry->d_mounted++;
}