共有マウント

SHAREはclone_mnt()で実装されるmount bind等で扱われます。(他にcloneでのスレッド) clone_mnt()は元ファイルシステムを雛形とし、mnt_rootを接続先dentryとする新規のmntを作成します。従って互いに独立したファイルシステムで、一方のディレクトリにmountする掛かる変更は、他方のmntへ反映されず、一方のmnt下でmountされたファイルシステムを参照できませんが、bind元mntをSHAREとすると、一方のmntにもmntされます。

検証サンプル

[root@localhost north]# mount -o loop loopfile1 mnt1/
[root@localhost north]# mount -o loop loopfile2 mnt2/
[root@localhost north]# mount -o loop loopfile3 mnt3/

[root@localhost north]# ls mnt1
lost+found  mnt1.txt  submnt
[root@localhost north]# ls mnt1/submnt/
submnt1.txt

[root@localhost north]# ls mnt2
lost+found  mnt2.txt  submnt
[root@localhost north]# ls mnt2/submnt/
submnt2.txt

[root@localhost north]# ls mnt3
lost+found  mnt3.txt  submnt
[root@localhost north]# ls mnt3/submnt/
submnt3.txt

NO SHAREのbind元mnt1をmnt2にbind後、mnt1/submntをmountしても、mnt2/submntにはmountされません。

[root@localhost north]# mount --bind mnt1 mnt2
[root@localhost north]# mount -o loop loopfile3 mnt1/submnt/
[root@localhost north]# ls mnt1/submnt/
lost+found  mnt3.txt  submnt
[root@localhost north]# ls mnt2/submnt/
submnt1.txt

SHAREのbind元mnt1をmnt2にbind後、mnt1/submntをmountすると、mnt2/submntにもmountされます。

[root@localhost north]# mount --make-shared mnt1
[root@localhost north]# mount --bind mnt1 mnt2
[root@localhost north]# mount -o loop loopfile3 mnt1/submnt/
[root@localhost north]# ls mnt1/submnt/
lost+found  mnt3.txt  submnt
[root@localhost north]# ls mnt2/submnt/
lost+found  mnt3.txt  submnt

SHAREのbind元mnt1をmnt2にbind後、mnt2/submntをmountしても、mnt1/submntにもmountされます。

[root@localhost north]# mount --make-shared mnt1
[root@localhost north]# mount --bind mnt1 mnt2
[root@localhost north]# mount -o loop loopfile3 mnt2/submnt/
[root@localhost north]# ls mnt1/submnt/
lost+found  mnt3.txt  submnt
[root@localhost north]# ls mnt2/submnt/
lost+found  mnt3.txt  submnt

bind元mnt1をSHAREのmnt2にbind後、mnt1/submntまたmnt2/submntをmountしても、mnt2/submntまたmnt1/submntにもmountされません。

[root@localhost north]# mount --make-shared mnt2
[root@localhost north]# mount --bind mnt1 mnt2
[root@localhost north]# mount -o loop loopfile3 mnt1/submnt/
[root@localhost north]# ls mnt1/submnt/
lost+found  mnt3.txt  submnt
[root@localhost north]# ls mnt2/submnt/
submnt1.txt

[root@localhost north]# umount mnt1/submnt/
[root@localhost north]# mount -o loop loopfile3 mnt2/submnt/
[root@localhost north]# ls mnt1/submnt/
submnt1.txt
[root@localhost north]# ls mnt2/submnt/
lost+found  mnt3.txt  submnt

oldはbind元mntで、oldを雛形とするバーチャルmntを作成します。SHAREならmnt->mnt_shareをold->mnt_shareにリストし、mnt->mnt_master = old->mnt_masterとします。通常のファイルシステムはmnt->mnt_master = mntで、oldがbindされてないならold->mnt_master=oldで、バーチャルmnt->mnt_master->mnt_shareから、SHAREされているmntを取得し、mntするdentryがそれらのmnt配下ならmountします。(mount bindではflagは0です。)

static struct mount *clone_mnt(struct mount *old, struct dentry *root,
                                       int flag)
{
       struct super_block *sb = old->mnt.mnt_sb;
       struct mount *mnt = alloc_vfsmnt(old->mnt_devname);

       if (mnt) {
               if (flag & (CL_SLAVE | CL_PRIVATE))
                       mnt->mnt_group_id = 0;
               else
                       mnt->mnt_group_id = old->mnt_group_id;

               if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
                       int err = mnt_alloc_group_id(mnt);
                       if (err)
                               goto out_free;
               }

               mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
               atomic_inc(&sb->s_active);
               mnt->mnt.mnt_sb = sb;
               mnt->mnt.mnt_root = dget(root);
               mnt->mnt_mountpoint = mnt->mnt.mnt_root;
               mnt->mnt_parent = mnt;
               br_write_lock(vfsmount_lock);
               list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
               br_write_unlock(vfsmount_lock);

               if (flag & CL_SLAVE) {
                       list_add(&mnt->mnt_slave, &old->mnt_slave_list);
                       mnt->mnt_master = old;
                       CLEAR_MNT_SHARED(mnt);
               } else if (!(flag & CL_PRIVATE)) {
                       if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
                               list_add(&mnt->mnt_share, &old->mnt_share);
                       if (IS_MNT_SLAVE(old))
                               list_add(&mnt->mnt_slave, &old->mnt_slave);
                       mnt->mnt_master = old->mnt_master;
               }
               if (flag & CL_MAKE_SHARED)
                       set_mnt_shared(mnt);

               if (flag & CL_EXPIRE) {
                       if (!list_empty(&old->mnt_expire))
                               list_add(&mnt->mnt_expire, &old->mnt_expire);
               }
       }
       return mnt;

out_free:
       free_vfsmnt(mnt);
       return NULL;
}

do_new_mount()でnameデバイスのmntをpathにmountします。

static int do_new_mount(struct path *path, char *type, int flags,
                       int mnt_flags, char *name, void *data)
{
       struct vfsmount *mnt;
       int err;

       if (!type)
               return -EINVAL;

       if (!capable(CAP_SYS_ADMIN))
               return -EPERM;

       mnt = do_kern_mount(type, flags, name, data);
       if (IS_ERR(mnt))
               return PTR_ERR(mnt);

       err = do_add_mount(real_mount(mnt), path, mnt_flags);
       if (err)
               mntput(mnt);
       return err;
}

static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
{
       int err;

       mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
 
       err = lock_mount(path);
       if (err)
               return err;

       err = -EINVAL;
       if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(real_mount(path->mnt)))
               goto unlock;

       err = -EBUSY;
       if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
           path->mnt->mnt_root == path->dentry)
               goto unlock;

       err = -EINVAL;
       if (S_ISLNK(newmnt->mnt.mnt_root->d_inode->i_mode))
               goto unlock;

       newmnt->mnt.mnt_flags = mnt_flags;
       err = graft_tree(newmnt, path);

unlock:
       unlock_mount(path);
       return err;
}

static int graft_tree(struct mount *mnt, struct path *path)
{
       if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
               return -EINVAL;

       if (S_ISDIR(path->dentry->d_inode->i_mode) !=
             S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode))
               return -ENOTDIR;

       if (d_unlinked(path->dentry))
               return -ENOENT;

       return attach_recursive_mnt(mnt, path, NULL);
}

static int attach_recursive_mnt(struct mount *source_mnt,
                       struct path *path, struct path *parent_path)
{
       LIST_HEAD(tree_list);
       struct mount *dest_mnt = real_mount(path->mnt);
       struct dentry *dest_dentry = path->dentry;
       struct mount *child, *p;
       int err;

       if (IS_MNT_SHARED(dest_mnt)) {
               err = invent_group_ids(source_mnt, true);
               if (err)
                       goto out;
       }
       err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
       if (err)
               goto out_cleanup_ids;

       br_write_lock(vfsmount_lock);

       if (IS_MNT_SHARED(dest_mnt)) {
               for (p = source_mnt; p; p = next_mnt(p, source_mnt))
                       set_mnt_shared(p);
       }
       if (parent_path) {
               detach_mnt(source_mnt, parent_path);
               attach_mnt(source_mnt, path);
               touch_mnt_namespace(source_mnt->mnt_ns);
       } else {
               mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
               commit_tree(source_mnt);
       }

       list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
               list_del_init(&child->mnt_hash);
               commit_tree(child);
       }
       br_write_unlock(vfsmount_lock);

       return 0;

out_cleanup_ids:
       if (IS_MNT_SHARED(dest_mnt))
               cleanup_group_ids(source_mnt, NULL);
out:
       return err;
}

int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
                   struct mount *source_mnt, struct list_head *tree_list)
{
       struct mount *m, *child;
       int ret = 0;
       struct mount *prev_dest_mnt = dest_mnt;
       struct mount *prev_src_mnt  = source_mnt;
       LIST_HEAD(tmp_list);
       LIST_HEAD(umount_list);

       for (m = propagation_next(dest_mnt, dest_mnt); m;
                       m = propagation_next(m, dest_mnt)) {
               int type;
               struct mount *source;

               if (IS_MNT_NEW(m))
                       continue;

               source =  get_source(m, prev_dest_mnt, prev_src_mnt, &type);

               if (!(child = copy_tree(source, source->mnt.mnt_root, type))) {
                       ret = -ENOMEM;
                       list_splice(tree_list, tmp_list.prev);
                       goto out;
               }

               if (is_subdir(dest_dentry, m->mnt.mnt_root)) {
                       mnt_set_mountpoint(m, dest_dentry, child);
                       list_add_tail(&child->mnt_hash, tree_list);
               } else {
                       list_add_tail(&child->mnt_hash, &tmp_list);
               }
               prev_dest_mnt = m;
               prev_src_mnt  = child;
       }
out:
       br_write_lock(vfsmount_lock);
       while (!list_empty(&tmp_list)) {
               child = list_first_entry(&tmp_list, struct mount, mnt_hash);
               umount_tree(child, 0, &umount_list);
       }
       br_write_unlock(vfsmount_lock);
       release_mounts(&umount_list);
       return ret;
}

static struct mount *propagation_next(struct mount *m,
                                        struct mount *origin)
{
       if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
               return first_slave(m);

       while (1) {
               struct mount *master = m->mnt_master;

               if (master == origin->mnt_master) {
                       struct mount *next = next_peer(m);
                       return (next == origin) ? NULL : next;
               } else if (m->mnt_slave.next != &master->mnt_slave_list)
                       return next_slave(m);

               m = master;
       }
}

static inline struct mount *next_peer(struct mount *p)
{
       return list_entry(p->mnt_share.next, struct mount, mnt_share);
}

追記

まだ明確に読み切れておりません。