mount命令常用于挂载文件系统,它调用的内核函数是sys_mount,sys_mount主要功能是将用户的命令行参数从用户空间传递到内核空间,并调用do_mount解析参数,完成mount过程。其实现源码如下:
SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, char __user *, type, unsigned long, flags, void __user *, data) { int ret; char *kernel_type; char *kernel_dir; char *kernel_dev; unsigned long data_page; ret = copy_mount_string(type, &kernel_type);//拷贝用户参数 if (ret < 0) goto out_type; kernel_dir = getname(dir_name); if (IS_ERR(kernel_dir)) { ret = PTR_ERR(kernel_dir); goto out_dir; } ret = copy_mount_string(dev_name, &kernel_dev); if (ret < 0) goto out_dev; ret = copy_mount_options(data, &data_page); if (ret < 0) goto out_data; ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,//实际执行挂载操作 (void *) data_page); free_page(data_page); out_data: kfree(kernel_dev); out_dev: putname(kernel_dir); out_dir: kfree(kernel_type); out_type: return ret; }
/* * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to * be given to the mount() call (ie: read-only, no-dev, no-suid etc). * * data is a (void *) that can point to any structure up to * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent * information (or be NULL). * * Pre-0.97 versions of mount() didn't have a flags word. * When the flags word was introduced its top half was required * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9. * Therefore, if this magic number is present, it carries no information * and must be discarded. */ long do_mount(char *dev_name, char *dir_name, char *type_page, unsigned long flags, void *data_page) { struct path path; int retval = 0; int mnt_flags = 0; /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) flags &= ~MS_MGC_MSK; /* Basic sanity checks */ if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) return -EINVAL; if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; /* ... and get the mountpoint */ retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);//查找挂载到目录的path对象? if (retval) return retval; retval = security_sb_mount(dev_name, &path,//安全检查 type_page, flags, data_page); if (retval) goto dput_out; /* Default to relatime unless overriden */ if (!(flags & MS_NOATIME))//参数设置 mnt_flags |= MNT_RELATIME; /* Separate the per-mountpoint flags */ if (flags & MS_NOSUID) mnt_flags |= MNT_NOSUID; if (flags & MS_NODEV) mnt_flags |= MNT_NODEV; if (flags & MS_NOEXEC) mnt_flags |= MNT_NOEXEC; if (flags & MS_NOATIME) mnt_flags |= MNT_NOATIME; if (flags & MS_NODIRATIME) mnt_flags |= MNT_NODIRATIME; if (flags & MS_STRICTATIME) mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME); if (flags & MS_RDONLY) mnt_flags |= MNT_READONLY; flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | MS_STRICTATIME); if (flags & MS_REMOUNT) retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, data_page); else if (flags & MS_BIND) retval = do_loopback(&path, dev_name, flags & MS_REC); else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) retval = do_change_type(&path, flags); else if (flags & MS_MOVE) retval = do_move_mount(&path, dev_name); else retval = do_new_mount(&path, type_page, flags, mnt_flags,//一般是执行一个新的挂载操作 dev_name, data_page); dput_out: path_put(&path); return retval; }
/* * create a new mount for userspace and request it to be added into the * namespace's tree */ static int do_new_mount(struct path *path, char *type, int flags, int mnt_flags, char *name, void *data) { struct vfsmount *mnt; if (!type) return -EINVAL; /* we need capabilities... */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; lock_kernel(); mnt = do_kern_mount(type, flags, name, data);//执行真正的挂载操作 unlock_kernel(); if (IS_ERR(mnt)) return PTR_ERR(mnt); return do_add_mount(mnt, path, mnt_flags, NULL);//添加到挂载树 }
struct vfsmount * do_kern_mount(const char *fstype, int flags, const char *name, void *data) { struct file_system_type *type = get_fs_type(fstype); struct vfsmount *mnt; if (!type) return ERR_PTR(-ENODEV); mnt = vfs_kern_mount(type, flags, name, data);/*初始化vfsmount结构,并对其进行赋值*/ if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && !mnt->mnt_sb->s_subtype) mnt = fs_set_subtype(mnt, fstype);//赋值文件系统类型 put_filesystem(type); return mnt; }
主要工作有二:一是获得一个新的安装区域块,二是将该新的安装区域块加入了安装系统链表
看一下vfs_kern_mount:
struct vfsmount * vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) { struct vfsmount *mnt; char *secdata = NULL; int error; if (!type) return ERR_PTR(-ENODEV); error = -ENOMEM; mnt = alloc_vfsmnt(name);//申请了一块该类型的内存空间(struct vfsmount *mnt),并初始化其部分成员变量 if (!mnt) goto out; if (flags & MS_KERNMOUNT) mnt->mnt_flags = MNT_INTERNAL; if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {//文件系统使用的二进制安装数据 secdata = alloc_secdata(); if (!secdata) goto out_mnt; error = security_sb_copy_data(data, secdata); if (error) goto out_free_secdata; } error = type->get_sb(type, flags, name, data, mnt);/*调用文件系统控制结构体的get_sb()*/ if (error < 0) goto out_free_secdata; BUG_ON(!mnt->mnt_sb); WARN_ON(!mnt->mnt_sb->s_bdi); mnt->mnt_sb->s_flags |= MS_BORN; error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); if (error) goto out_sb; /* * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE * but s_maxbytes was an unsigned long long for many releases. Throw * this warning for a little while to try and catch filesystems that * violate this rule. This warning should be either removed or * converted to a BUG() in 2.6.34. */ WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes); mnt->mnt_mountpoint = mnt->mnt_root;//设置挂载点的目录项 mnt->mnt_parent = mnt;// 把新的 vfsmount 结构赋给自身的 parent 这样可以 通过 parent 遍历出所有 mount 的文件系统 up_write(&mnt->mnt_sb->s_umount); free_secdata(secdata); return mnt; out_sb: dput(mnt->mnt_root); deactivate_locked_super(mnt->mnt_sb); out_free_secdata: free_secdata(secdata); out_mnt: free_vfsmnt(mnt); out: return ERR_PTR(error); }
回到do_new_mount函数,接着调用do_add_mount添加到命令空间挂载树上去
/* * add a mount into a namespace's mount tree * - provide the option of adding the new mount to an expiration list */ int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags, struct list_head *fslist) { int err; mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); down_write(&namespace_sem); /* Something was mounted here while we slept */ while (d_mountpoint(path->dentry) && follow_down(path)) ; err = -EINVAL; if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) goto unlock; /* Refuse the same filesystem on the same mount point */ err = -EBUSY; if (path->mnt->mnt_sb == newmnt->mnt_sb && path->mnt->mnt_root == path->dentry) goto unlock; err = -EINVAL; if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode)) goto unlock; newmnt->mnt_flags = mnt_flags; if ((err = graft_tree(newmnt, path)))//主要是这个函数 goto unlock; if (fslist) /* add to the specified expiration list */ list_add_tail(&newmnt->mnt_expire, fslist); up_write(&namespace_sem); return 0; unlock: up_write(&namespace_sem); mntput(newmnt); return err; }
static int graft_tree(struct vfsmount *mnt, struct path *path) { int err; if (mnt->mnt_sb->s_flags & MS_NOUSER) return -EINVAL; if (S_ISDIR(path->dentry->d_inode->i_mode) != S_ISDIR(mnt->mnt_root->d_inode->i_mode)) return -ENOTDIR; err = -ENOENT; mutex_lock(&path->dentry->d_inode->i_mutex); if (cant_mount(path->dentry)) goto out_unlock; if (!d_unlinked(path->dentry)) err = attach_recursive_mnt(mnt, path, NULL);// out_unlock: mutex_unlock(&path->dentry->d_inode->i_mutex); return err; }
/* * @source_mnt : mount tree to be attached * @nd : place the mount tree @source_mnt is attached * @parent_nd : if non-null, detach the source_mnt from its parent and * store the parent mount and mountpoint dentry. * (done when source_mnt is moved) * * NOTE: in the table below explains the semantics when a source mount * of a given type is attached to a destination mount of a given type. * --------------------------------------------------------------------------- * | BIND MOUNT OPERATION | * |************************************************************************** * | source-->| shared | private | slave | unbindable | * | dest | | | | | * | | | | | | | * | v | | | | | * |************************************************************************** * | shared | shared (++) | shared (+) | shared(+++)| invalid | * | | | | | | * |non-shared| shared (+) | private | slave (*) | invalid | * *************************************************************************** * A bind operation clones the source mount and mounts the clone on the * destination mount. * * (++) the cloned mount is propagated to all the mounts in the propagation * tree of the destination mount and the cloned mount is added to * the peer group of the source mount. * (+) the cloned mount is created under the destination mount and is marked * as shared. The cloned mount is added to the peer group of the source * mount. * (+++) the mount is propagated to all the mounts in the propagation tree * of the destination mount and the cloned mount is made slave * of the same master as that of the source mount. The cloned mount * is marked as 'shared and slave'. * (*) the cloned mount is made a slave of the same master as that of the * source mount. * * --------------------------------------------------------------------------- * | MOVE MOUNT OPERATION | * |************************************************************************** * | source-->| shared | private | slave | unbindable | * | dest | | | | | * | | | | | | | * | v | | | | | * |************************************************************************** * | shared | shared (+) | shared (+) | shared(+++) | invalid | * | | | | | | * |non-shared| shared (+*) | private | slave (*) | unbindable | * *************************************************************************** * * (+) the mount is moved to the destination. And is then propagated to * all the mounts in the propagation tree of the destination mount. * (+*) the mount is moved to the destination. * (+++) the mount is moved to the destination and is then propagated to * all the mounts belonging to the destination mount's propagation tree. * the mount is marked as 'shared and slave'. * (*) the mount continues to be a slave at the new location. * * if the source mount is a tree, the operations explained above is * applied to each mount in the tree. * Must be called without spinlocks held, since this function can sleep * in allocations. */ /*将文件系统添加到父文件系统的命名空间中*/ static int attach_recursive_mnt(struct vfsmount *source_mnt, struct path *path, struct path *parent_path) { LIST_HEAD(tree_list); struct vfsmount *dest_mnt = path->mnt; struct dentry *dest_dentry = path->dentry; struct vfsmount *child, *p; int err; if (IS_MNT_SHARED(dest_mnt)) { err = invent_group_ids(source_mnt, true); if (err) goto out; } err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list); if (err) goto out_cleanup_ids; spin_lock(&vfsmount_lock); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) set_mnt_shared(p); } if (parent_path) { detach_mnt(source_mnt, parent_path); attach_mnt(source_mnt, path); touch_mnt_namespace(parent_path->mnt->mnt_ns); } else { /*确保新的vfsmount实例的mnt_parent成员指向父文件系统 的vfsmount实例,而mnt_mountpoint成员指向装载点在父文件 系统中的dentry实例*/ mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); commit_tree(source_mnt); } list_for_each_entry_safe(child, p, &tree_list, mnt_hash) { list_del_init(&child->mnt_hash); /*将新的mnt添加到全局散列表以及父文件系统mnt实例中 的子文件系统链表*/ commit_tree(child); } spin_unlock(&vfsmount_lock); return 0; out_cleanup_ids: if (IS_MNT_SHARED(dest_mnt)) cleanup_group_ids(source_mnt, NULL); out: return err; }
void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *child_mnt) { child_mnt->mnt_parent = mntget(mnt); child_mnt->mnt_mountpoint = dget(dentry); dentry->d_mounted++; }
再来看一 下commit_tree
static void commit_tree(struct vfsmount *mnt) { struct vfsmount *parent = mnt->mnt_parent; struct vfsmount *m; LIST_HEAD(head); struct mnt_namespace *n = parent->mnt_ns; BUG_ON(parent == mnt); list_add_tail(&head, &mnt->mnt_list); list_for_each_entry(m, &head, mnt_list) m->mnt_ns = n; list_splice(&head, n->list.prev); list_add_tail(&mnt->mnt_hash, mount_hashtable + hash(parent, mnt->mnt_mountpoint));//添加到父节点链表末 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);//添加到父节点mnt_mounts链表末 touch_mnt_namespace(n); }这里会把它添加到一个全局的mount_hashtable,这里的插入点是通过hash(parent, mnt->mnt_mountpoint),即挂载目录先前的vfsmount结构和dentry结构
到这里,文件系统就已经挂载完成了,当下次再次访问到这个挂载点时,就会使用这个新的安装的文件系统指针。
参考:
http://www.ibm.com/developerworks/cn/linux/l-vfs/