ディレクトリ(ext3)
ディレクトリは作成されるファイル等の領域でなく、ファイルシステム依存のstruct ext3_dir_entry_2ファイル属性のデータテキストファイルに相当します。read()での読み込には .readコールバックで、generic_read_dir()は-EISDIRでできません。readdir()はシステムコールgetdentで.readdirコールされ、ext3_readdir()がstruct ext3_dir_entry_2のテキストファイルをgccのstruct direntに設定します。
ファイル数のディレクトリ毎の制限はありませんが、ディレクトリの係るサイズはブロックサイズ単位の領域で、新規に作成されたディレクトリサイズは1ブロックサイズで、デバイスの有効利用実装のため、ディレクトリとして管理するディレクトリ数はEXT3_LINK_MAX=32000です。これはファイルシステムに依存し、ext2ではディレクトリにかかる制約はありません。
ファイル数のディレクトリ毎の制限はありませんが、ディレクトリの係るサイズはブロックサイズ単位の領域で、新規に作成されたディレクトリサイズは1ブロックサイズで、デバイスの有効利用実装のため、ディレクトリとして管理するディレクトリ数はEXT3_LINK_MAX=32000です。これはファイルシステムに依存し、ext2ではディレクトリにかかる制約はありません。
#define EXT3_NAME_LEN 255 ファイル名長さ struct ext3_dir_entry_2 { __le32 inode; /* Inode number */ __le16 rec_len; /* Directory entry length */ __u8 name_len; /* Name length */ __u8 file_type; char name[EXT3_NAME_LEN]; /* File name */ }; const struct file_operations ext3_dir_operations = { .llseek = ext3_dir_llseek, .read = generic_read_dir, .readdir = ext3_readdir, .unlocked_ioctl = ext3_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext3_compat_ioctl, #endif .fsync = ext3_sync_file, .release = ext3_release_dir, }; ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) { return -EISDIR; } const struct inode_operations ext3_dir_inode_operations = { .create = ext3_create, .lookup = ext3_lookup, .link = ext3_link, .unlink = ext3_unlink, .symlink = ext3_symlink, .mkdir = ext3_mkdir, .rmdir = ext3_rmdir, .mknod = ext3_mknod, .rename = ext3_rename, .setattr = ext3_setattr, #ifdef CONFIG_EXT3_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .listxattr = ext3_listxattr, .removexattr = generic_removexattr, #endif .get_acl = ext3_get_acl, }; static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode, bool excl) { handle_t *handle; struct inode * inode; int err, retries = 0; dquot_initialize(dir); retry: handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); if (IS_DIRSYNC(dir)) handle->h_sync = 1; inode = ext3_new_inode (handle, dir, &dentry->d_name, mode); err = PTR_ERR(inode); if (!IS_ERR(inode)) { inode->i_op = &ext3_file_inode_operations; inode->i_fop = &ext3_file_operations; ext3_set_aops(inode); err = ext3_add_nondir(handle, dentry, inode); } ext3_journal_stop(handle); if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; } static int ext3_add_nondir(handle_t *handle, struct dentry *dentry, struct inode *inode) { int err = ext3_add_entry(handle, dentry, inode); if (!err) { ext3_mark_inode_dirty(handle, inode); unlock_new_inode(inode); d_instantiate(dentry, inode); return 0; } drop_nlink(inode); unlock_new_inode(inode); iput(inode); return err; } static int ext3_add_entry (handle_t *handle, struct dentry *dentry, struct inode *inode) { struct inode *dir = dentry->d_parent->d_inode; struct buffer_head * bh; struct ext3_dir_entry_2 *de; struct super_block * sb; int retval; int dx_fallback=0; unsigned blocksize; u32 block, blocks; sb = dir->i_sb; blocksize = sb->s_blocksize; if (!dentry->d_name.len) return -EINVAL; if (is_dx(dir)) { retval = ext3_dx_add_entry(handle, dentry, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) return retval; EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL; dx_fallback++; ext3_mark_inode_dirty(handle, dir); } blocks = dir->i_size >> sb->s_blocksize_bits; for (block = 0; block < blocks; block++) { if (!(bh = ext3_dir_bread(handle, dir, block, 0, &retval))) return retval; retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); if (retval != -ENOSPC) return retval; if (blocks == 1 && !dx_fallback && EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) return make_indexed_dir(handle, dentry, inode, bh); brelse(bh); } bh = ext3_append(handle, dir, &block, &retval); if (!bh) return retval; de = (struct ext3_dir_entry_2 *) bh->b_data; de->inode = 0; de->rec_len = ext3_rec_len_to_disk(blocksize); return add_dirent_to_buf(handle, dentry, inode, de, bh); } static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, struct inode *inode, struct ext3_dir_entry_2 *de, struct buffer_head * bh) { struct inode *dir = dentry->d_parent->d_inode; const char *name = dentry->d_name.name; int namelen = dentry->d_name.len; unsigned long offset = 0; unsigned short reclen; int nlen, rlen, err; char *top; reclen = EXT3_DIR_REC_LEN(namelen); if (!de) { de = (struct ext3_dir_entry_2 *)bh->b_data; top = bh->b_data + dir->i_sb->s_blocksize - reclen; while ((char *) de <= top) { if (!ext3_check_dir_entry("ext3_add_entry", dir, de, bh, offset)) { brelse (bh); return -EIO; } if (ext3_match (namelen, name, de)) { brelse (bh); return -EEXIST; } nlen = EXT3_DIR_REC_LEN(de->name_len); rlen = ext3_rec_len_from_disk(de->rec_len); if ((de->inode? rlen - nlen: rlen) >= reclen) break; de = (struct ext3_dir_entry_2 *)((char *)de + rlen); offset += rlen; } if ((char *) de > top) return -ENOSPC; } BUFFER_TRACE(bh, "get_write_access"); err = ext3_journal_get_write_access(handle, bh); if (err) { ext3_std_error(dir->i_sb, err); brelse(bh); return err; } nlen = EXT3_DIR_REC_LEN(de->name_len); rlen = ext3_rec_len_from_disk(de->rec_len); if (de->inode) { struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen); de1->rec_len = ext3_rec_len_to_disk(rlen - nlen); de->rec_len = ext3_rec_len_to_disk(nlen); de = de1; } de->file_type = EXT3_FT_UNKNOWN; if (inode) { de->inode = cpu_to_le32(inode->i_ino); ext3_set_de_type(dir->i_sb, de, inode->i_mode); } else de->inode = 0; de->name_len = namelen; memcpy (de->name, name, namelen); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; ext3_update_dx_flag(dir); dir->i_version++; ext3_mark_inode_dirty(handle, dir); BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); err = ext3_journal_dirty_metadata(handle, bh); if (err) ext3_std_error(dir->i_sb, err); brelse(bh); return 0; }ext3_dir_entry_2の.file_typeはファイルシステム依存で、ext3_readdir()で取得される.file_typeのd_typeは、.file_typeをインデックスとするext3_filetype_table[]で、故にファイルシステムに関係なくDT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNKとなります。
struct ext3_dir_entry_2 { __le32 inode; /* Inode number */ __le16 rec_len; /* Directory entry length */ __u8 name_len; /* Name length */ __u8 file_type; char name[EXT3_NAME_LEN]; /* File name */ }; static inline void ext3_set_de_type(struct super_block *sb, struct ext3_dir_entry_2 *de, umode_t mode) { if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE)) de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; } #define DT_UNKNOWN 0 #define DT_FIFO 1 #define DT_CHR 2 #define DT_DIR 4 #define DT_BLK 6 #define DT_REG 8 #define DT_LNK 10 #define DT_SOCK 12 #define DT_WHT 14 static unsigned char ext3_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK }; #define S_SHIFT 12 #define S_IFMT 00170000 #define S_IFSOCK 0140000 #define S_IFLNK 0120000 #define S_IFREG 0100000 #define S_IFBLK 0060000 #define S_IFDIR 0040000 #define S_IFCHR 0020000 #define S_IFIFO 0010000 #define S_ISUID 0004000 #define S_ISGID 0002000 #define S_ISVTX 0001000 #define EXT3_FT_UNKNOWN 0 #define EXT3_FT_REG_FILE 1 #define EXT3_FT_DIR 2 #define EXT3_FT_CHRDEV 3 #define EXT3_FT_BLKDEV 4 #define EXT3_FT_FIFO 5 #define EXT3_FT_SOCK 6 #define EXT3_FT_SYMLINK 7 static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFREG >> S_SHIFT] = EXT3_FT_REG_FILE, [S_IFDIR >> S_SHIFT] = EXT3_FT_DIR, [S_IFCHR >> S_SHIFT] = EXT3_FT_CHRDEV, [S_IFBLK >> S_SHIFT] = EXT3_FT_BLKDEV, [S_IFIFO >> S_SHIFT] = EXT3_FT_FIFO, [S_IFSOCK >> S_SHIFT] = EXT3_FT_SOCK, [S_IFLNK >> S_SHIFT] = EXT3_FT_SYMLINK, }; static int ext3_readdir(struct file * filp, void * dirent, filldir_t filldir) { int error = 0; unsigned long offset; int i, stored; struct ext3_dir_entry_2 *de; int err; struct inode *inode = filp->f_path.dentry->d_inode; struct super_block *sb = inode->i_sb; int ret = 0; int dir_has_error = 0; if (is_dx_dir(inode)) { err = ext3_dx_readdir(filp, dirent, filldir); if (err != ERR_BAD_DX_DIR) { ret = err; goto out; } EXT3_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL; } stored = 0; offset = filp->f_pos & (sb->s_blocksize - 1); while (!error && !stored && filp->f_pos < inode->i_size) { unsigned long blk = filp->f_pos >> EXT3_BLOCK_SIZE_BITS(sb); struct buffer_head map_bh; struct buffer_head *bh = NULL; map_bh.b_state = 0; err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0); if (err > 0) { pgoff_t index = map_bh.b_blocknr >> (PAGE_CACHE_SHIFT - inode->i_blkbits); if (!ra_has_index(&filp->f_ra, index)) page_cache_sync_readahead( sb->s_bdev->bd_inode->i_mapping, &filp->f_ra, filp, index, 1); filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; bh = ext3_bread(NULL, inode, blk, 0, &err); } if (!bh) { if (!dir_has_error) { ext3_error(sb, __func__, "directory #%lu " "contains a hole at offset %lld", inode->i_ino, filp->f_pos); dir_has_error = 1; } /* corrupt size? Maybe no more blocks to read */ if (filp->f_pos > inode->i_blocks << 9) break; filp->f_pos += sb->s_blocksize - offset; continue; } revalidate: if (filp->f_version != inode->i_version) { for (i = 0; i < sb->s_blocksize && i < offset; ) { de = (struct ext3_dir_entry_2 *) (bh->b_data + i); if (ext3_rec_len_from_disk(de->rec_len) < EXT3_DIR_REC_LEN(1)) break; i += ext3_rec_len_from_disk(de->rec_len); } offset = i; filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) | offset; filp->f_version = inode->i_version; } while (!error && filp->f_pos < inode->i_size && offset < sb->s_blocksize) { de = (struct ext3_dir_entry_2 *) (bh->b_data + offset); if (!ext3_check_dir_entry ("ext3_readdir", inode, de, bh, offset)) { filp->f_pos = (filp->f_pos | (sb->s_blocksize - 1)) + 1; brelse (bh); ret = stored; goto out; } offset += ext3_rec_len_from_disk(de->rec_len); if (le32_to_cpu(de->inode)) { u64 version = filp->f_version; error = filldir(dirent, de->name, de->name_len, filp->f_pos, le32_to_cpu(de->inode), get_dtype(sb, de->file_type)); if (error) break; if (version != filp->f_version) goto revalidate; stored ++; } filp->f_pos += ext3_rec_len_from_disk(de->rec_len); } offset = 0; brelse (bh); } out: return ret; } static unsigned char get_dtype(struct super_block *sb, int filetype) { if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) || (filetype >= EXT3_FT_MAX)) return DT_UNKNOWN; return (ext3_filetype_table[filetype]); }
検証
[root@localhost north]# cat dir.c #include <stdio.h> #include <stdlib.h> #include <dirent.h> void main(int argc, char *argv[]) { DIR *dir; struct dirent *dp; dir=opendir(argv[1]); printf ("%7s %10s %3s %4s %s\n", "inode", "off", "len", "type", "name"); while (1) { dp = readdir(dir); if (!dp) { break; } printf ("%07d %10d %03d %04d %s\n", dp->d_ino, dp->d_off, dp->d_reclen, dp->d_type, dp->d_name); free(dp); } } [root@localhost north]# touch babakaka/text [root@localhost north]# mkdir babakaka/dir [root@localhost north]# mkfifo babakaka/fifo [root@localhost north]# mknod babakaka/blockdev b 10 1 [root@localhost north]# mknod babakaka/chardev c 11 1 [root@localhost north]# ln -s babakaka/text babakaka/symlink [root@localhost north]# ln babakaka/text babakaka/hardlink [root@localhost north]# ./dir babakaka/ inode off len type name 1802243 698206621 016 0008 text DT_REG : 8 1802242 942110034 016 0004 dir DT_DIR : 4 0000002 1106706763 016 0004 .. DT_DIR : 4 1802247 1162905680 020 0010 symlink DT_LNK :10 1802245 1470500359 020 0006 blockdev DT_BLK : 6 1802243 1475777499 020 0008 hardlink DT_REG : 8 hardlinkはinodeを共有での実装です。 1802241 1852366747 016 0004 . DT_DIR : 4 1802246 1936624415 020 0002 chardev DT_CHR : 2 1802244 2147483647 016 0001 fifo DT_FIFO: 1 [root@localhost north]# ls -lai babakaka/ 合計 12 1802241 drwxr-xr-x 3 root root 4096 7月 7 19:46 . 2 drwxr-xr-x 5 root root 4096 7月 7 20:23 .. 1802245 brw-r--r-- 1 root root 10, 1 7月 7 19:17 blockdev 1802246 crw-r--r-- 1 root root 11, 1 7月 7 19:46 chardev 1802242 drwxr-xr-x 2 root root 4096 7月 7 19:15 dir 1802244 prw-r--r-- 1 root root 0 7月 7 19:16 fifo 1802243 -rw-r--r-- 2 root root 0 7月 7 19:15 hardlink 1802247 lrwxrwxrwx 1 root root 13 7月 7 19:18 symlink -> babakaka/text 1802243 -rw-r--r-- 2 root root 0 7月 7 19:15 text
struct linux_dirent { unsigned long d_ino; unsigned long d_off; unsigned short d_reclen; char d_name[1]; }; SYSCALL_DEFINE3(getdents, unsigned int, fd, struct linux_dirent __user *, dirent, unsigned int, count) { struct fd f; struct linux_dirent __user * lastdirent; struct getdents_callback buf; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) return -EFAULT; f = fdget(fd); if (!f.file) return -EBADF; buf.current_dir = dirent; buf.previous = NULL; buf.count = count; buf.error = 0; error = vfs_readdir(f.file, filldir, &buf); if (error >= 0) error = buf.error; lastdirent = buf.previous; if (lastdirent) { if (put_user(f.file->f_pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } fdput(f); return error; }作成できるディレクトリ下のファイルの制約はありませんが、ディレクトリはEXT3_LINK_MAX=32000までです。ディレクトリはファイルと異り、ブロックサイズの領域を有する故の実装かと。
struct dirent { #ifndef __USE_FILE_OFFSET64 __ino_t d_ino; __off_t d_off; #else __ino64_t d_ino; __off64_t d_off; #endif unsigned short int d_reclen; unsigned char d_type; char d_name[256]; /* We must not include limits.h! */ }; #define EXT3_LINK_MAX 32000 static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) { handle_t *handle; struct inode * inode; struct buffer_head * dir_block = NULL; struct ext3_dir_entry_2 * de; int err, retries = 0; if (dir->i_nlink >= EXT3_LINK_MAX) return -EMLINK; dquot_initialize(dir); retry: handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); if (IS_DIRSYNC(dir)) handle->h_sync = 1; inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFDIR | mode); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_stop; inode->i_op = &ext3_dir_inode_operations; inode->i_fop = &ext3_dir_operations; inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; if (!(dir_block = ext3_dir_bread(handle, inode, 0, 1, &err))) goto out_clear_inode; BUFFER_TRACE(dir_block, "get_write_access"); err = ext3_journal_get_write_access(handle, dir_block); if (err) goto out_clear_inode; de = (struct ext3_dir_entry_2 *) dir_block->b_data; de->inode = cpu_to_le32(inode->i_ino); de->name_len = 1; de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len)); strcpy (de->name, "."); ext3_set_de_type(dir->i_sb, de, S_IFDIR); de = ext3_next_entry(de); de->inode = cpu_to_le32(dir->i_ino); de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1)); de->name_len = 2; strcpy (de->name, ".."); ext3_set_de_type(dir->i_sb, de, S_IFDIR); set_nlink(inode, 2); BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); err = ext3_journal_dirty_metadata(handle, dir_block); if (err) goto out_clear_inode; err = ext3_mark_inode_dirty(handle, inode); if (!err) err = ext3_add_entry (handle, dentry, inode); if (err) { out_clear_inode: clear_nlink(inode); unlock_new_inode(inode); ext3_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; } inc_nlink(dir); ext3_update_dx_flag(dir); err = ext3_mark_inode_dirty(handle, dir); if (err) goto out_clear_inode; unlock_new_inode(inode); d_instantiate(dentry, inode); out_stop: brelse(dir_block); ext3_journal_stop(handle); if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; } void inc_nlink(struct inode *inode) { if (WARN_ON(inode->i_nlink == 0)) atomic_long_dec(&inode->i_sb->s_remove_count); inode->__i_nlink++; }cdコマンドはdirに変移するのでなく、current->fs->pwdにdirを設定して、パス捜査はcurrent->fs->pwdのdirファイルを読み込むことで運用されます。
SYSCALL_DEFINE1(chdir, const char __user *, filename) { struct path path; int error; error = user_path_dir(filename, &path); if (error) goto out; error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; set_fs_pwd(current->fs, &path); dput_and_out: path_put(&path); out: return error; } void set_fs_pwd(struct fs_struct *fs, struct path *path) { struct path old_pwd; path_get(path); spin_lock(&fs->lock); write_seqcount_begin(&fs->seq); old_pwd = fs->pwd; fs->pwd = *path; write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_pwd.dentry) path_put(&old_pwd); }
追記
ext2ファイルシステムは、dir->i_nlinkかかる制約はありません。static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) { struct inode * inode; int err; dquot_initialize(dir); inode_inc_link_count(dir); inode = ext2_new_inode(dir, S_IFDIR | mode, &dentry->d_name); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_dir; inode->i_op = &ext2_dir_inode_operations; inode->i_fop = &ext2_dir_operations; if (test_opt(inode->i_sb, NOBH)) inode->i_mapping->a_ops = &ext2_nobh_aops; else inode->i_mapping->a_ops = &ext2_aops; inode_inc_link_count(inode); err = ext2_make_empty(inode, dir); if (err) goto out_fail; err = ext2_add_link(dentry, inode); if (err) goto out_fail; unlock_new_inode(inode); d_instantiate(dentry, inode); out: return err; out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); unlock_new_inode(inode); iput(inode); out_dir: inode_dec_link_count(dir); goto out; }