ディレクトリ(ext3)
Rev.1を表示中。最新版はこちら。
ディレクトリは作成されるファイル等の領域でなく、ファイルシステム依存のstruct ext3_dir_entry_2ファイル属性のデータテキストファイルに相当します。read()での読み込には .readコールバックで、generic_read_dir()は-EISDIRでできません。readdir()はシステムコールgetdentで.readdirコールされ、ext3_readdir()がstruct ext3_dir_entry_2のテキストファイルをgccのstruct direntに設定します。
ファイル数のディレクトリ毎の制限はありませんが、ディレクトリの係るサイズはブロックサイズ単位の領域で、新規に作成されたディレクトリサイズは1ブロックサイズで、デバイスの有効利用実装のため、ディレクトリとして管理するディレクトリ数はEXT3_LINK_MAX=32000です。これはファイルシステムに依存し、ext2ではディレクトリにかかる制約はありません。
#define EXT3_NAME_LEN 255 ファイル名長さ
struct ext3_dir_entry_2 {
__le32 inode; /* Inode number */
__le16 rec_len; /* Directory entry length */
__u8 name_len; /* Name length */
__u8 file_type;
char name[EXT3_NAME_LEN]; /* File name */
};
const struct file_operations ext3_dir_operations = {
.llseek = ext3_dir_llseek,
.read = generic_read_dir,
.readdir = ext3_readdir,
.unlocked_ioctl = ext3_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext3_compat_ioctl,
#endif
.fsync = ext3_sync_file,
.release = ext3_release_dir,
};
ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
{
return -EISDIR;
}
const struct inode_operations ext3_dir_inode_operations = {
.create = ext3_create,
.lookup = ext3_lookup,
.link = ext3_link,
.unlink = ext3_unlink,
.symlink = ext3_symlink,
.mkdir = ext3_mkdir,
.rmdir = ext3_rmdir,
.mknod = ext3_mknod,
.rename = ext3_rename,
.setattr = ext3_setattr,
#ifdef CONFIG_EXT3_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext3_listxattr,
.removexattr = generic_removexattr,
#endif
.get_acl = ext3_get_acl,
};
static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode,
bool excl)
{
handle_t *handle;
struct inode * inode;
int err, retries = 0;
dquot_initialize(dir);
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
inode = ext3_new_inode (handle, dir, &dentry->d_name, mode);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
inode->i_op = &ext3_file_inode_operations;
inode->i_fop = &ext3_file_operations;
ext3_set_aops(inode);
err = ext3_add_nondir(handle, dentry, inode);
}
ext3_journal_stop(handle);
if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
goto retry;
return err;
}
static int ext3_add_nondir(handle_t *handle,
struct dentry *dentry, struct inode *inode)
{
int err = ext3_add_entry(handle, dentry, inode);
if (!err) {
ext3_mark_inode_dirty(handle, inode);
unlock_new_inode(inode);
d_instantiate(dentry, inode);
return 0;
}
drop_nlink(inode);
unlock_new_inode(inode);
iput(inode);
return err;
}
static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
struct inode *inode)
{
struct inode *dir = dentry->d_parent->d_inode;
struct buffer_head * bh;
struct ext3_dir_entry_2 *de;
struct super_block * sb;
int retval;
int dx_fallback=0;
unsigned blocksize;
u32 block, blocks;
sb = dir->i_sb;
blocksize = sb->s_blocksize;
if (!dentry->d_name.len)
return -EINVAL;
if (is_dx(dir)) {
retval = ext3_dx_add_entry(handle, dentry, inode);
if (!retval || (retval != ERR_BAD_DX_DIR))
return retval;
EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
dx_fallback++;
ext3_mark_inode_dirty(handle, dir);
}
blocks = dir->i_size >> sb->s_blocksize_bits;
for (block = 0; block < blocks; block++) {
if (!(bh = ext3_dir_bread(handle, dir, block, 0, &retval)))
return retval;
retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
if (retval != -ENOSPC)
return retval;
if (blocks == 1 && !dx_fallback &&
EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
return make_indexed_dir(handle, dentry, inode, bh);
brelse(bh);
}
bh = ext3_append(handle, dir, &block, &retval);
if (!bh)
return retval;
de = (struct ext3_dir_entry_2 *) bh->b_data;
de->inode = 0;
de->rec_len = ext3_rec_len_to_disk(blocksize);
return add_dirent_to_buf(handle, dentry, inode, de, bh);
}
static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
struct inode *inode, struct ext3_dir_entry_2 *de,
struct buffer_head * bh)
{
struct inode *dir = dentry->d_parent->d_inode;
const char *name = dentry->d_name.name;
int namelen = dentry->d_name.len;
unsigned long offset = 0;
unsigned short reclen;
int nlen, rlen, err;
char *top;
reclen = EXT3_DIR_REC_LEN(namelen);
if (!de) {
de = (struct ext3_dir_entry_2 *)bh->b_data;
top = bh->b_data + dir->i_sb->s_blocksize - reclen;
while ((char *) de <= top) {
if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
bh, offset)) {
brelse (bh);
return -EIO;
}
if (ext3_match (namelen, name, de)) {
brelse (bh);
return -EEXIST;
}
nlen = EXT3_DIR_REC_LEN(de->name_len);
rlen = ext3_rec_len_from_disk(de->rec_len);
if ((de->inode? rlen - nlen: rlen) >= reclen)
break;
de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
offset += rlen;
}
if ((char *) de > top)
return -ENOSPC;
}
BUFFER_TRACE(bh, "get_write_access");
err = ext3_journal_get_write_access(handle, bh);
if (err) {
ext3_std_error(dir->i_sb, err);
brelse(bh);
return err;
}
nlen = EXT3_DIR_REC_LEN(de->name_len);
rlen = ext3_rec_len_from_disk(de->rec_len);
if (de->inode) {
struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
de1->rec_len = ext3_rec_len_to_disk(rlen - nlen);
de->rec_len = ext3_rec_len_to_disk(nlen);
de = de1;
}
de->file_type = EXT3_FT_UNKNOWN;
if (inode) {
de->inode = cpu_to_le32(inode->i_ino);
ext3_set_de_type(dir->i_sb, de, inode->i_mode);
} else
de->inode = 0;
de->name_len = namelen;
memcpy (de->name, name, namelen);
dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
ext3_update_dx_flag(dir);
dir->i_version++;
ext3_mark_inode_dirty(handle, dir);
BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, bh);
if (err)
ext3_std_error(dir->i_sb, err);
brelse(bh);
return 0;
}
ext3_dir_entry_2の.file_typeはファイルシステム依存で、ext3_readdir()で取得される.file_typeのd_typeは、.file_typeをインデックスとするext3_filetype_table[]で、故にファイルシステムに関係なくDT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNKとなります。
struct ext3_dir_entry_2 {
__le32 inode; /* Inode number */
__le16 rec_len; /* Directory entry length */
__u8 name_len; /* Name length */
__u8 file_type;
char name[EXT3_NAME_LEN]; /* File name */
};
static inline void ext3_set_de_type(struct super_block *sb,
struct ext3_dir_entry_2 *de,
umode_t mode) {
if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE))
de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
}
#define DT_UNKNOWN 0
#define DT_FIFO 1
#define DT_CHR 2
#define DT_DIR 4
#define DT_BLK 6
#define DT_REG 8
#define DT_LNK 10
#define DT_SOCK 12
#define DT_WHT 14
static unsigned char ext3_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
};
#define S_SHIFT 12
#define S_IFMT 00170000
#define S_IFSOCK 0140000
#define S_IFLNK 0120000
#define S_IFREG 0100000
#define S_IFBLK 0060000
#define S_IFDIR 0040000
#define S_IFCHR 0020000
#define S_IFIFO 0010000
#define S_ISUID 0004000
#define S_ISGID 0002000
#define S_ISVTX 0001000
#define EXT3_FT_UNKNOWN 0
#define EXT3_FT_REG_FILE 1
#define EXT3_FT_DIR 2
#define EXT3_FT_CHRDEV 3
#define EXT3_FT_BLKDEV 4
#define EXT3_FT_FIFO 5
#define EXT3_FT_SOCK 6
#define EXT3_FT_SYMLINK 7
static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFREG >> S_SHIFT] = EXT3_FT_REG_FILE,
[S_IFDIR >> S_SHIFT] = EXT3_FT_DIR,
[S_IFCHR >> S_SHIFT] = EXT3_FT_CHRDEV,
[S_IFBLK >> S_SHIFT] = EXT3_FT_BLKDEV,
[S_IFIFO >> S_SHIFT] = EXT3_FT_FIFO,
[S_IFSOCK >> S_SHIFT] = EXT3_FT_SOCK,
[S_IFLNK >> S_SHIFT] = EXT3_FT_SYMLINK,
};
static int ext3_readdir(struct file * filp,
void * dirent, filldir_t filldir)
{
int error = 0;
unsigned long offset;
int i, stored;
struct ext3_dir_entry_2 *de;
int err;
struct inode *inode = filp->f_path.dentry->d_inode;
struct super_block *sb = inode->i_sb;
int ret = 0;
int dir_has_error = 0;
if (is_dx_dir(inode)) {
err = ext3_dx_readdir(filp, dirent, filldir);
if (err != ERR_BAD_DX_DIR) {
ret = err;
goto out;
}
EXT3_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL;
}
stored = 0;
offset = filp->f_pos & (sb->s_blocksize - 1);
while (!error && !stored && filp->f_pos < inode->i_size) {
unsigned long blk = filp->f_pos >> EXT3_BLOCK_SIZE_BITS(sb);
struct buffer_head map_bh;
struct buffer_head *bh = NULL;
map_bh.b_state = 0;
err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0);
if (err > 0) {
pgoff_t index = map_bh.b_blocknr >>
(PAGE_CACHE_SHIFT - inode->i_blkbits);
if (!ra_has_index(&filp->f_ra, index))
page_cache_sync_readahead(
sb->s_bdev->bd_inode->i_mapping,
&filp->f_ra, filp,
index, 1);
filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
bh = ext3_bread(NULL, inode, blk, 0, &err);
}
if (!bh) {
if (!dir_has_error) {
ext3_error(sb, __func__, "directory #%lu "
"contains a hole at offset %lld",
inode->i_ino, filp->f_pos);
dir_has_error = 1;
}
/* corrupt size? Maybe no more blocks to read */
if (filp->f_pos > inode->i_blocks << 9)
break;
filp->f_pos += sb->s_blocksize - offset;
continue;
}
revalidate:
if (filp->f_version != inode->i_version) {
for (i = 0; i < sb->s_blocksize && i < offset; ) {
de = (struct ext3_dir_entry_2 *)
(bh->b_data + i);
if (ext3_rec_len_from_disk(de->rec_len) <
EXT3_DIR_REC_LEN(1))
break;
i += ext3_rec_len_from_disk(de->rec_len);
}
offset = i;
filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
| offset;
filp->f_version = inode->i_version;
}
while (!error && filp->f_pos < inode->i_size
&& offset < sb->s_blocksize) {
de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
bh, offset)) {
filp->f_pos = (filp->f_pos |
(sb->s_blocksize - 1)) + 1;
brelse (bh);
ret = stored;
goto out;
}
offset += ext3_rec_len_from_disk(de->rec_len);
if (le32_to_cpu(de->inode)) {
u64 version = filp->f_version;
error = filldir(dirent, de->name,
de->name_len,
filp->f_pos,
le32_to_cpu(de->inode),
get_dtype(sb, de->file_type));
if (error)
break;
if (version != filp->f_version)
goto revalidate;
stored ++;
}
filp->f_pos += ext3_rec_len_from_disk(de->rec_len);
}
offset = 0;
brelse (bh);
}
out:
return ret;
}
static unsigned char get_dtype(struct super_block *sb, int filetype)
{
if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) ||
(filetype >= EXT3_FT_MAX))
return DT_UNKNOWN;
return (ext3_filetype_table[filetype]);
}
***検証
[root@localhost north]# cat dir.c
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
void main(int argc, char *argv[])
{
DIR *dir;
struct dirent *dp;
dir=opendir(argv[1]);
printf ("%7s %10s %3s %4s %s\n", "inode", "off", "len", "type", "name");
while (1) {
dp = readdir(dir);
if (!dp) {
break;
}
printf ("%07d %10d %03d %04d %s\n",
dp->d_ino,
dp->d_off,
dp->d_reclen,
dp->d_type,
dp->d_name);
free(dp);
}
}
[root@localhost north]# touch babakaka/text
[root@localhost north]# mkdir babakaka/dir
[root@localhost north]# mkfifo babakaka/fifo
[root@localhost north]# mknod babakaka/blockdev b 10 1
[root@localhost north]# mknod babakaka/chardev b 11 1
[root@localhost north]# ln -s babakaka/text babakaka/symlink
[root@localhost north]# ln babakaka/text babakaka/hardlink
[root@localhost north]# ./dir babakaka/
inode off len type name
1802243 698206621 016 0008 text
1802242 942110034 016 0004 dir
0000002 1106706763 016 0004 ..
1802247 1162905680 020 0010 symlink
1802245 1470500359 020 0006 blockdev
1802243 1475777499 020 0008 hardlink
1802241 1852366747 016 0004 .
1802246 1936624415 020 0002 chardev
1802244 2147483647 016 0001 fifo
[root@localhost north]# ls -lai babakaka/
合計 12
1802241 drwxr-xr-x 3 root root 4096 7月 7 19:46 .
2 drwxr-xr-x 5 root root 4096 7月 7 20:23 ..
1802245 brw-r--r-- 1 root root 10, 1 7月 7 19:17 blockdev
1802246 crw-r--r-- 1 root root 11, 1 7月 7 19:46 chardev
1802242 drwxr-xr-x 2 root root 4096 7月 7 19:15 dir
1802244 prw-r--r-- 1 root root 0 7月 7 19:16 fifo
1802243 -rw-r--r-- 2 root root 0 7月 7 19:15 hardlink
1802247 lrwxrwxrwx 1 root root 13 7月 7 19:18 symlink -> babakaka/text
1802243 -rw-r--r-- 2 root root 0 7月 7 19:15 text
struct linux_dirent {
unsigned long d_ino;
unsigned long d_off;
unsigned short d_reclen;
char d_name[1];
};
SYSCALL_DEFINE3(getdents, unsigned int, fd,
struct linux_dirent __user *, dirent, unsigned int, count)
{
struct fd f;
struct linux_dirent __user * lastdirent;
struct getdents_callback buf;
int error;
if (!access_ok(VERIFY_WRITE, dirent, count))
return -EFAULT;
f = fdget(fd);
if (!f.file)
return -EBADF;
buf.current_dir = dirent;
buf.previous = NULL;
buf.count = count;
buf.error = 0;
error = vfs_readdir(f.file, filldir, &buf);
if (error >= 0)
error = buf.error;
lastdirent = buf.previous;
if (lastdirent) {
if (put_user(f.file->f_pos, &lastdirent->d_off))
error = -EFAULT;
else
error = count - buf.count;
}
fdput(f);
return error;
}
作成できるディレクトリ下のファイルの制約はありませんが、ディレクトリはEXT3_LINK_MAX=32000までです。ディレクトリはファイルと異り、ブロックサイズの領域を有する故の実装かと。
struct dirent
{
#ifndef __USE_FILE_OFFSET64
__ino_t d_ino;
__off_t d_off;
#else
__ino64_t d_ino;
__off64_t d_off;
#endif
unsigned short int d_reclen;
unsigned char d_type;
char d_name[256]; /* We must not include limits.h! */
};
#define EXT3_LINK_MAX 32000
static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
{
handle_t *handle;
struct inode * inode;
struct buffer_head * dir_block = NULL;
struct ext3_dir_entry_2 * de;
int err, retries = 0;
if (dir->i_nlink >= EXT3_LINK_MAX)
return -EMLINK;
dquot_initialize(dir);
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFDIR | mode);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
inode->i_op = &ext3_dir_inode_operations;
inode->i_fop = &ext3_dir_operations;
inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
if (!(dir_block = ext3_dir_bread(handle, inode, 0, 1, &err)))
goto out_clear_inode;
BUFFER_TRACE(dir_block, "get_write_access");
err = ext3_journal_get_write_access(handle, dir_block);
if (err)
goto out_clear_inode;
de = (struct ext3_dir_entry_2 *) dir_block->b_data;
de->inode = cpu_to_le32(inode->i_ino);
de->name_len = 1;
de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len));
strcpy (de->name, ".");
ext3_set_de_type(dir->i_sb, de, S_IFDIR);
de = ext3_next_entry(de);
de->inode = cpu_to_le32(dir->i_ino);
de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize -
EXT3_DIR_REC_LEN(1));
de->name_len = 2;
strcpy (de->name, "..");
ext3_set_de_type(dir->i_sb, de, S_IFDIR);
set_nlink(inode, 2);
BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, dir_block);
if (err)
goto out_clear_inode;
err = ext3_mark_inode_dirty(handle, inode);
if (!err)
err = ext3_add_entry (handle, dentry, inode);
if (err) {
out_clear_inode:
clear_nlink(inode);
unlock_new_inode(inode);
ext3_mark_inode_dirty(handle, inode);
iput (inode);
goto out_stop;
}
inc_nlink(dir);
ext3_update_dx_flag(dir);
err = ext3_mark_inode_dirty(handle, dir);
if (err)
goto out_clear_inode;
unlock_new_inode(inode);
d_instantiate(dentry, inode);
out_stop:
brelse(dir_block);
ext3_journal_stop(handle);
if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
goto retry;
return err;
}
void inc_nlink(struct inode *inode)
{
if (WARN_ON(inode->i_nlink == 0))
atomic_long_dec(&inode->i_sb->s_remove_count);
inode->__i_nlink++;
}
cdコマンドはdirに変移するのでなく、current->fs->fs->pwdにdirを設定して、パス捜査はcurrent->fs->fs->pwdのdirファイルを読み込むことで運用されます。
SYSCALL_DEFINE1(chdir, const char __user *, filename)
{
struct path path;
int error;
error = user_path_dir(filename, &path);
if (error)
goto out;
error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
if (error)
goto dput_and_out;
set_fs_pwd(current->fs, &path);
dput_and_out:
path_put(&path);
out:
return error;
}
void set_fs_pwd(struct fs_struct *fs, struct path *path)
{
struct path old_pwd;
path_get(path);
spin_lock(&fs->lock);
write_seqcount_begin(&fs->seq);
old_pwd = fs->pwd;
fs->pwd = *path;
write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock);
if (old_pwd.dentry)
path_put(&old_pwd);
}
***追記
ext2ファイルシステムは、dir->i_nlinkかかる制約はありません。
static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
{
struct inode * inode;
int err;
dquot_initialize(dir);
inode_inc_link_count(dir);
inode = ext2_new_inode(dir, S_IFDIR | mode, &dentry->d_name);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_dir;
inode->i_op = &ext2_dir_inode_operations;
inode->i_fop = &ext2_dir_operations;
if (test_opt(inode->i_sb, NOBH))
inode->i_mapping->a_ops = &ext2_nobh_aops;
else
inode->i_mapping->a_ops = &ext2_aops;
inode_inc_link_count(inode);
err = ext2_make_empty(inode, dir);
if (err)
goto out_fail;
err = ext2_add_link(dentry, inode);
if (err)
goto out_fail;
unlock_new_inode(inode);
d_instantiate(dentry, inode);
out:
return err;
out_fail:
inode_dec_link_count(inode);
inode_dec_link_count(inode);
unlock_new_inode(inode);
iput(inode);
out_dir:
inode_dec_link_count(dir);
goto out;
}
***備考
ディレクトリ/ファイル等はスーパブロックのデバイスのinode領域から取得され、そもそもinodeが取得できなければディレクトリマターに依存せず作成できません。