@Author: Gordon Wu
@Time : 2014/12
0.摘要
mount是Linux很常见的命令,本文将从用户空间的命令行开始,一步一步切入到内核的源代码,解释一个文件系统是如果挂载的。本文基于linux 2.6.32
1.SYSCALL_DEFINE5, 系统调用
Linux kernel通过系统调用的方式为用户提供陷入到内核,mount的系统调用是SYSCALL_DEFINE5,位于fs/namespace.c:
主要完成把一些安装信息从用户空间拷贝到内核空间, 见代码的中文注释。
- SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
- char __user *, type, unsigned long, flags, void __user *, data)
- {
- int ret;
- char *kernel_type;
- char *kernel_dir;
- char *kernel_dev;
- unsigned long data_page;
-
- ret = copy_mount_string(type, &kernel_type);
- if (ret < 0)
- goto out_type;
-
- kernel_dir = getname(dir_name);
- if (IS_ERR(kernel_dir)) {
- ret = PTR_ERR(kernel_dir);
- goto out_dir;
- }
-
- ret = copy_mount_string(dev_name, &kernel_dev);
- if (ret < 0)
- goto out_dev;
-
- ret = copy_mount_options(data, &data_page);
- if (ret < 0)
- goto out_data;
-
- ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,
- (void *) data_page);
-
- free_page(data_page);
- out_data:
- kfree(kernel_dev);
- out_dev:
- putname(kernel_dir);
- out_dir:
- kfree(kernel_type);
- out_type:
- return ret;
- }
2. do_mount
验证挂载的选项Flags,以及获取并填充相应挂载目录dir_name的路径Path结构。 再根据挂载选项Flags来判断,挂载的动作。
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- long do_mount(char *dev_name, char *dir_name, char *type_page,
- unsigned long flags, void *data_page)
- {
- struct path path;
- int retval = 0;
- int mnt_flags = 0;
-
-
- if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
- flags &= ~MS_MGC_MSK;
-
-
-
- if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
- return -EINVAL;
-
- if (data_page)
- ((char *)data_page)[PAGE_SIZE - 1] = 0;
-
-
- if (!(flags & MS_NOATIME))
- mnt_flags |= MNT_RELATIME;
-
-
- if (flags & MS_NOSUID)
- mnt_flags |= MNT_NOSUID;
- if (flags & MS_NODEV)
- mnt_flags |= MNT_NODEV;
- if (flags & MS_NOEXEC)
- mnt_flags |= MNT_NOEXEC;
- if (flags & MS_NOATIME)
- mnt_flags |= MNT_NOATIME;
- if (flags & MS_NODIRATIME)
- mnt_flags |= MNT_NODIRATIME;
- if (flags & MS_STRICTATIME)
- mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
- if (flags & MS_RDONLY)
- mnt_flags |= MNT_READONLY;
-
- flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
- MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
- MS_STRICTATIME);
-
-
- retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
- if (retval)
- return retval;
-
- retval = security_sb_mount(dev_name, &path,
- type_page, flags, data_page);
- if (retval)
- goto dput_out;
- <span style="color:#FF0000;"></span>
- if (flags & MS_REMOUNT)
- retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
- data_page);
- else if (flags & MS_BIND)
- retval = do_loopback(&path, dev_name, flags & MS_REC);
- else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
- retval = do_change_type(&path, flags);
- else if (flags & MS_MOVE)
- retval = do_move_mount(&path, dev_name);
- else
- retval = do_new_mount(&path, type_page, flags, mnt_flags,
- dev_name, data_page);
- dput_out:
- path_put(&path);
- return retval;
- }
3. do_new_mount
利用do_kern_mount为用户空间生成一个新的挂载,并do_add_mount把新安装加入到命名空间树上
-
-
-
-
- static int do_new_mount(struct path *path, char *type, int flags,
- int mnt_flags, char *name, void *data)
- {
- struct vfsmount *mnt;
-
- if (!type)
- return -EINVAL;
-
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
-
- lock_kernel();
- mnt = do_kern_mount(type, flags, name, data);
- unlock_kernel();
- if (IS_ERR(mnt))
- return PTR_ERR(mnt);
-
- return do_add_mount(mnt, path, mnt_flags, NULL);
- }
4. do_kern_mount
4.1 do_kern_mount函数首先会调用get_fs_type来查看内核是否注册了参数type所指的文件系统,对于内核源码下fs目录下的所有文件系统都会通过调用register_filesystem来注册这个文件系统,其实就是添加到内核文件系统链表中,get_fs_type会将参数type字符串跟内核链表中所有已经注册的文件系统结构体file_system_type的name成员向比较,如果找到,则说明内核已经注册了相应文件系统,并且返回相应文件系统注册的file_system_type结构体。后面的挂载过程需要使用到这个结构体中的成员。[1]
- struct vfsmount *
- do_kern_mount(const char *fstype, int flags, const char *name, void *data)
- {
- struct file_system_type *type = get_fs_type(fstype);
- struct vfsmount *mnt;
- if (!type)
- return ERR_PTR(-ENODEV);
- mnt = vfs_kern_mount(type, flags, name, data);
- if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
- !mnt->mnt_sb->s_subtype)
- mnt = fs_set_subtype(mnt, fstype);
- put_filesystem(type);
- return mnt;
- }
4.2 struct file_system_type结构体,下面的get_sb函数会在下面的挂载过程中用到,用来完成具体文件系统挂载的操作。
- struct file_system_type {
- const char *name;
- int fs_flags;
- int (*get_sb) (struct file_system_type *, int,
- const char *, void *, struct vfsmount *);
- void (*kill_sb) (struct super_block *);
- struct module *owner;
- struct file_system_type * next;
- struct list_head fs_supers;
-
- struct lock_class_key s_lock_key;
- struct lock_class_key s_umount_key;
-
- struct lock_class_key i_lock_key;
- struct lock_class_key i_mutex_key;
- struct lock_class_key i_mutex_dir_key;
- struct lock_class_key i_alloc_sem_key;
- };
4.3 get_fs_type用到的最主要的函数: 就是从已挂载文件系统队列file_systems中查找,返回相应name类型的file_system_type.
- static struct file_system_type **find_filesystem(const char *name, unsigned len)
- {
- struct file_system_type **p;
- for (p=&file_systems; *p; p=&(*p)->next)
- if (strlen((*p)->name) == len &&
- strncmp((*p)->name, name, len) == 0)
- break;
- return p;
- }
5. vfs_kern_mount
具体文件系统file_system_type的get_sb成员函数是关键,它填充vfsmnt结构体的super_block结构体,是写文件系统的第一步。
- struct vfsmount *
- vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
- {
- struct vfsmount *mnt;
- char *secdata = NULL;
- int error;
-
- if (!type)
- return ERR_PTR(-ENODEV);
-
- error = -ENOMEM;
- mnt = alloc_vfsmnt(name);
- if (!mnt)
- goto out;
-
- if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
- secdata = alloc_secdata();
- if (!secdata)
- goto out_mnt;
-
- error = security_sb_copy_data(data, secdata);
- if (error)
- goto out_free_secdata;
- }
-
- error = type->get_sb(type, flags, name, data, mnt);
- if (error < 0)
- goto out_free_secdata;
- BUG_ON(!mnt->mnt_sb);
-
- error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
- if (error)
- goto out_sb;
-
-
-
-
-
-
-
-
- WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
- "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
-
- mnt->mnt_mountpoint = mnt->mnt_root;
- mnt->mnt_parent = mnt;
- up_write(&mnt->mnt_sb->s_umount);
- free_secdata(secdata);
- return mnt;
- out_sb:
- dput(mnt->mnt_root);
- deactivate_locked_super(mnt->mnt_sb);
- out_free_secdata:
- free_secdata(secdata);
- out_mnt:
- free_vfsmnt(mnt);
- out:
- return ERR_PTR(error);
- }
5.2 ext2_get_sb, 以ext2文件系统为例,通过ext2_fill_super来填充super_block的信息。
- static int ext2_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
- {
- return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt);
- }
5.3 get_sb_bdev
获取相应块设备的super_block,通过ext2_fill_super来完成super_block信息的填充。
- int get_sb_bdev(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- int (*fill_super)(struct super_block *, void *, int),
- struct vfsmount *mnt)
- {
- struct block_device *bdev;
- struct super_block *s;
- fmode_t mode = FMODE_READ;
- int error = 0;
-
- if (!(flags & MS_RDONLY))
- mode |= FMODE_WRITE;
-
- bdev = open_bdev_exclusive(dev_name, mode, fs_type);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
-
-
-
-
-
-
- mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (bdev->bd_fsfreeze_count > 0) {
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- error = -EBUSY;
- goto error_bdev;
- }
- s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- if (IS_ERR(s))
- goto error_s;
-
- if (s->s_root) {
- if ((flags ^ s->s_flags) & MS_RDONLY) {
- deactivate_locked_super(s);
- error = -EBUSY;
- goto error_bdev;
- }
-
- close_bdev_exclusive(bdev, mode);
- } else {
- char b[BDEVNAME_SIZE];
-
- s->s_flags = flags;
- s->s_mode = mode;
- strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
- sb_set_blocksize(s, block_size(bdev));
- error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
- if (error) {
- deactivate_locked_super(s);
- goto error;
- }
-
- s->s_flags |= MS_ACTIVE;
- bdev->bd_super = s;
- }
-
- simple_set_mnt(mnt, s);
- return 0;
-
- error_s:
- error = PTR_ERR(s);
- error_bdev:
- close_bdev_exclusive(bdev, mode);
- error:
- return error;
- }
6.do_new_mount->do_add_mount
回到do_new_mount的do_add_mount函数,把mnt挂载到命名空间上。do_add_mount将新挂载的文件系统(由vfsmnt表示)添加到系统的命名空间结构体的已挂载文件系统链表中,命名空间是指系统中以挂载文件系统树,每个进程的PCB中都有namespace成员来表示该进程的命名空间,大多数的进程共享同一个命名空间,所以如果在一个进程中将磁盘挂载到系统中,在另一个进程也是可以看到的,这就是由命名空间来实现的[1]。
- *
- * add a mount into a namespace's mount tree
- * - provide the option of adding the new mount to an expiration list
- */
- int do_add_mount(struct vfsmount *newmnt, struct path *path,
- int mnt_flags, struct list_head *fslist)
- {
- int err;
-
- down_write(&namespace_sem);
-
- while (d_mountpoint(path->dentry) &&
- follow_down(path))
- ;
- err = -EINVAL;
- if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
- goto unlock;
-
-
- err = -EBUSY;
- if (path->mnt->mnt_sb == newmnt->mnt_sb &&
- path->mnt->mnt_root == path->dentry)
- goto unlock;
-
- err = -EINVAL;
- if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
- goto unlock;
-
- newmnt->mnt_flags = mnt_flags;
- if ((err = graft_tree(newmnt, path)))
- goto unlock;
-
- if (fslist)
- list_add_tail(&newmnt->mnt_expire, fslist);
-
- up_write(&namespace_sem);
- return 0;
-
- unlock:
- up_write(&namespace_sem);
- mntput(newmnt);
- return err;
- }
参考博文: [1]: http://blog.csdn.net/skyflying2012/article/details/9748133
版权声明:本文为博主原创文章,未经博主允许不得转载。