Bug #36

nnull.diff (see comments on kernel@)

Added by csaba.henk over 9 years ago. Updated over 8 years ago.

Status:ClosedStart date:
Priority:LowDue date:
Assignee:-% Done:

0%

Category:-
Target version:-

Description

# HG changeset patch
# User root@
# Node ID e65b90b999c3e7f71539c8affa762d61d3a5fd17
# Parent 758f5a725024e40c276e253651f9115aa4ba9fbe
patch queue: nnull

diff -r 758f5a725024 -r e65b90b999c3 sys/vfs/nullfs/Makefile
--- a/sys/vfs/nullfs/Makefile Mon Jan 2 11:42:05 2006 +0000
+++ b/sys/vfs/nullfs/Makefile Mon Jan 2 12:21:05 2006 +0000
@@ -2,7 +2,7 @@
# $DragonFly: src/sys/vfs/nullfs/Makefile,v 1.4 2004/08/13 17:51:12 dillon Exp $

KMOD= null
-SRCS= null_subr.c null_vfsops.c null_vnops.c
+SRCS= null_vfsops.c null_vnops.c
NOMAN=

.include <bsd.kmod.mk>
diff -r 758f5a725024 -r e65b90b999c3 sys/vfs/nullfs/null.h
--- a/sys/vfs/nullfs/null.h Mon Jan 2 11:42:05 2006 +0000
+++ b/sys/vfs/nullfs/null.h Mon Jan 2 12:21:05 2006 +0000
@@ -49,36 +49,7 @@
};

#ifdef _KERNEL
-/*
- * A cache of vnode references
- */
-struct null_node {
- struct null_node *null_next; /* Hash list */
- struct vnode *null_lowervp; /* vrefed once */
- struct vnode *null_vnode; /* Back pointer */
-};
-
#define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
-#define VTONULL(vp) ((struct null_node *)(vp)->v_data)
-#define NULLTOV(xp) ((xp)->null_vnode)
-
-int nullfs_init(struct vfsconf *vfsp);
-int nullfs_uninit(struct vfsconf *vfsp);
-int null_node_add(struct null_node *np);
-void null_node_rem(struct null_node *np);
-int null_node_create(struct mount *mp, struct vnode *target, struct vnode **vpp);
-int null_bypass(struct vop_generic_args *ap);
-
-#ifdef DIAGNOSTIC
-struct vnode *null_checkvp(struct vnode *vp, char *fil, int lno);
-#define NULLVPTOLOWERVP(vp) null_checkvp((vp), __FILE__, __LINE__)
-#else
-#define NULLVPTOLOWERVP(vp) (VTONULL(vp)->null_lowervp)
-#endif
-
-#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_NULLFSNODE);
-#endif

#ifdef NULLFS_DEBUG
#define NULLFSDEBUG(format, args...) printf(format ,## args)
diff -r 758f5a725024 -r e65b90b999c3 sys/vfs/nullfs/null_vfsops.c
--- a/sys/vfs/nullfs/null_vfsops.c Mon Jan 2 11:42:05 2006 +0000
+++ b/sys/vfs/nullfs/null_vfsops.c Mon Jan 2 12:21:05 2006 +0000
@@ -59,8 +59,6 @@

static MALLOC_DEFINE(M_NULLFSMNT, "NULLFS mount", "NULLFS mount structure");

-static int nullfs_fhtovp(struct mount *mp, struct fid *fidp,
- struct vnode **vpp);
static int nullfs_checkexp(struct mount *mp, struct sockaddr *nam,
int *extflagsp, struct ucred **credanonp);
static int nullfs_mount(struct mount *mp, char *path, caddr_t data,
@@ -71,8 +69,6 @@
static int nullfs_statfs(struct mount *mp, struct statfs *sbp,
struct thread *td);
static int nullfs_unmount(struct mount *mp, int mntflags, struct thread *td);
-static int nullfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp);
-static int nullfs_vptofh(struct vnode *vp, struct fid *fhp);
static int nullfs_extattrctl(struct mount *mp, int cmd,
const char *attrname, caddr_t arg, struct thread *td);

@@ -84,11 +80,9 @@
{
int error = 0;
struct null_args args;
- struct vnode *lowerrootvp, *vp;
- struct vnode *nullm_rootvp;
+ struct vnode *rootvp;
struct null_mount *xmp;
u_int size;
- int isvnunlocked = 0;
struct nlookupdata nd;

NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp);
@@ -108,44 +102,15 @@
return (error);

/*
- * Unlock lower node to avoid deadlock.
- * (XXX) VOP_ISLOCKED is needed?
- */
- if ((mp->mnt_vnodecovered->v_tag == VT_NULL) &&
- VOP_ISLOCKED(mp->mnt_vnodecovered, NULL)) {
- VOP_UNLOCK(mp->mnt_vnodecovered, 0, td);
- isvnunlocked = 1;
- }
- /*
* Find lower node
*/
- lowerrootvp = NULL;
+ rootvp = NULL;
error = nlookup_init(&nd, args.target, UIO_USERSPACE, NLC_FOLLOW);
if (error == 0)
error = nlookup(&nd);
if (error == 0) {
error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE,
- &lowerrootvp);
- }
- nlookup_done(&nd);
-
- /*
- * Re-lock vnode.
- */
- if (isvnunlocked && !VOP_ISLOCKED(mp->mnt_vnodecovered, NULL))
- vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY, td);
- if (error)
- return (error);
-
- /*
- * Sanity check on lower vnode
- *
- * Check multi null mount to avoid `lock against myself' panic.
- */
- if (lowerrootvp == VTONULL(mp->mnt_vnodecovered)->null_lowervp) {
- NULLFSDEBUG("nullfs_mount: multi null mount?\n");
- vput(lowerrootvp);
- return (EDEADLK);
+ &rootvp);
}

xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
@@ -154,37 +119,29 @@
/*
* Save reference to underlying FS
*/
- xmp->nullm_vfs = lowerrootvp->v_mount;
+ /*
+ * As lite stacking enters the scene, the old way of doing this
+ * -- via the vnode -- is not good enough anymore...
+ */
+ xmp->nullm_vfs = nd.nl_ncp->nc_mount;
+ nlookup_done(&nd);

vfs_add_vnodeops(mp, &mp->mnt_vn_norm_ops,
null_vnodeop_entries, 0);

- /*
- * Save reference. Each mount also holds
- * a reference on the root vnode.
- */
- error = null_node_create(mp, lowerrootvp, &vp);
- /*
- * Unlock the node (either the lower or the alias)
- */
- VOP_UNLOCK(vp, 0, td);
- /*
- * Make sure the node alias worked
- */
- if (error) {
- vrele(lowerrootvp);
- free(xmp, M_NULLFSMNT); /* XXX */
- return (error);
- }
+ VOP_UNLOCK(rootvp, 0, td);

/*
* Keep a held reference to the root vnode.
* It is vrele'd in nullfs_unmount.
*/
- nullm_rootvp = vp;
- nullm_rootvp->v_flag |= VROOT;
- xmp->nullm_rootvp = nullm_rootvp;
- if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+ xmp->nullm_rootvp = rootvp;
+ /*
+ * XXX What's the proper safety condition for querying
+ * the underlying mount? Is this flag tuning necessary
+ * at all?
+ */
+ if (xmp->nullm_vfs->mnt_flag & MNT_LOCAL)
mp->mnt_flag |= MNT_LOCAL;
mp->mnt_data = (qaddr_t) xmp;
vfs_getnewfsid(mp);
@@ -205,18 +162,12 @@
nullfs_unmount(struct mount *mp, int mntflags, struct thread *td)
{
void *mntdata;
- int error;
int flags = 0;

NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);

if (mntflags & MNT_FORCE)
flags |= FORCECLOSE;
-
- /* There is 1 extra root vnode reference (nullm_rootvp). */
- error = vflush(mp, 1, flags);
- if (error)
- return (error);

/*
* Finally, throw away the null_mount structure
@@ -233,9 +184,8 @@
struct thread *td = curthread; /* XXX */
struct vnode *vp;

- NULLFSDEBUG("nullfs_root(mp = %p, vp = %p->%p)\n", (void *)mp,
- (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
- (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
+ NULLFSDEBUG("nullfs_root(mp = %p, vp = %p)\n", (void *)mp,
+ (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp);

/*
* Return locked reference to root.
@@ -268,9 +218,8 @@
int error;
struct statfs mstat;

- NULLFSDEBUG("nullfs_statfs(mp = %p, vp = %p->%p)\n", (void *)mp,
- (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
- (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
+ NULLFSDEBUG("nullfs_statfs(mp = %p, vp = %p)\n", (void *)mp,
+ (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp);

bzero(&mstat, sizeof(mstat));

@@ -296,32 +245,12 @@
}

static int
-nullfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
-{
-
- return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp);
-}
-
-static int
-nullfs_fhtovp(struct mount *mp, struct fid *fidp, struct vnode **vpp)
-{
-
- return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, vpp);
-}
-
-static int
nullfs_checkexp(struct mount *mp, struct sockaddr *nam, int *extflagsp,
struct ucred **credanonp)
{

return VFS_CHECKEXP(MOUNTTONULLMOUNT(mp)->nullm_vfs, nam,
extflagsp, credanonp);
-}
-
-static int
-nullfs_vptofh(struct vnode *vp, struct fid *fhp)
-{
- return VFS_VPTOFH(NULLVPTOLOWERVP(vp), fhp);
}

static int
@@ -340,12 +269,7 @@
.vfs_quotactl = nullfs_quotactl,
.vfs_statfs = nullfs_statfs,
.vfs_sync = vfs_stdsync,
- .vfs_vget = nullfs_vget,
- .vfs_fhtovp = nullfs_fhtovp,
.vfs_checkexp = nullfs_checkexp,
- .vfs_vptofh = nullfs_vptofh,
- .vfs_init = nullfs_init,
- .vfs_uninit = nullfs_uninit,
.vfs_extattrctl = nullfs_extattrctl
};

diff -r 758f5a725024 -r e65b90b999c3 sys/vfs/nullfs/null_vnops.c
--- a/sys/vfs/nullfs/null_vnops.c Mon Jan 2 11:42:05 2006 +0000
+++ b/sys/vfs/nullfs/null_vnops.c Mon Jan 2 12:21:05 2006 +0000
@@ -83,98 +83,22 @@
*
* The null layer is the minimum file system layer,
* simply bypassing all possible operations to the lower layer
- * for processing there. The majority of its activity centers
- * on the bypass routine, through which nearly all vnode operations
- * pass.
- *
- * The bypass routine accepts arbitrary vnode operations for
- * handling by the lower layer. It begins by examing vnode
- * operation arguments and replacing any null-nodes by their
- * lower-layer equivlants. It then invokes the operation
- * on the lower layer. Finally, it replaces the null-nodes
- * in the arguments and, if a vnode is return by the operation,
- * stacks a null-node on top of the returned vnode.
- *
- * Although bypass handles most operations, vop_getattr, vop_lock,
- * vop_unlock, vop_inactive, vop_reclaim, and vop_print are not
- * bypassed. Vop_getattr must change the fsid being returned.
- * Vop_lock and vop_unlock must handle any locking for the
- * current vnode as well as pass the lock request down.
- * Vop_inactive and vop_reclaim are not bypassed so that
- * they can handle freeing null-layer specific data. Vop_print
- * is not bypassed to avoid excessive debugging information.
- * Also, certain vnode operations change the locking state within
- * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
- * and symlink). Ideally these operations should not change the
- * lock state, but should be changed to let the caller of the
- * function unlock them. Otherwise all intermediate vnode layers
- * (such as union, umapfs, etc) must catch these functions to do
- * the necessary locking at their layer.
- *
- *
- * INSTANTIATING VNODE STACKS
- *
- * Mounting associates the null layer with a lower layer,
- * effect stacking two VFSes. Vnode stacks are instead
- * created on demand as files are accessed.
- *
- * The initial mount creates a single vnode stack for the
- * root of the new null layer. All other vnode stacks
- * are created as a result of vnode operations on
- * this or other null vnode stacks.
- *
- * New vnode stacks come into existance as a result of
- * an operation which returns a vnode.
- * The bypass routine stacks a null-node above the new
- * vnode before returning it to the caller.
- *
- * For example, imagine mounting a null layer with
- * "mount_null /usr/include /dev/layer/null".
- * Changing directory to /dev/layer/null will assign
- * the root null-node (which was created when the null layer was mounted).
- * Now consider opening "sys". A vop_old_lookup would be
- * done on the root null-node. This operation would bypass through
- * to the lower layer which would return a vnode representing
- * the UFS "sys". Null_bypass then builds a null-node
- * aliasing the UFS "sys" and returns this to the caller.
- * Later operations on the null-node "sys" will repeat this
- * process when constructing other vnode stacks.
- *
- *
- * CREATING OTHER FILE SYSTEM LAYERS
- *
- * One of the easiest ways to construct new file system layers is to make
- * a copy of the null layer, rename all files and variables, and
- * then begin modifing the copy. Sed can be used to easily rename
- * all variables.
- *
- * The umap layer is an example of a layer descended from the
- * null layer.
- *
- *
- * INVOKING OPERATIONS ON LOWER LAYERS
- *
- * There are two techniques to invoke operations on a lower layer
- * when the operation cannot be completely bypassed. Each method
- * is appropriate in different situations. In both cases,
- * it is the responsibility of the aliasing layer to make
- * the operation arguments "correct" for the lower layer
- * by mapping an vnode arguments to the lower layer.
- *
- * The first approach is to call the aliasing layer's bypass routine.
- * This method is most suitable when you wish to invoke the operation
- * currently being handled on the lower layer. It has the advantage
- * that the bypass routine already must do argument mapping.
- * An example of this is null_getattrs in the null layer.
- *
- * A second approach is to directly invoke vnode operations on
- * the lower layer with the VOP_OPERATIONNAME interface.
- * The advantage of this method is that it is easy to invoke
- * arbitrary operations on the lower layer. The disadvantage
- * is that vnode arguments must be manualy mapped.
- *
+ * for processing there. The majority of its activity used to center
+ * on a so-called bypass routine, through which nullfs vnodes
+ * passed on operation to their underlying peer.
+ *
+ * However, with the current implementation nullfs doesn't have any private
+ * vnodes, it rather relies on DragonFly's namecache API. That gives a much
+ * more lightweight null layer, as namecache structures are pure data, with
+ * no private operations, so there is no need of subtle dispatching routines.
+ *
+ * Unlike the old code, this implementation is not a general skeleton overlay
+ * filesystem: to get more comprehensive overlaying, like that of umapfs, we
+ * will need vnode operation dispatch. Other overlay filesystems, like unionfs
+ * might be able to get on with a hybrid solution: overlay some vnodes, and rely
+ * on namecache API for the rest.
*/
-
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@@ -187,775 +111,114 @@
#include <sys/buf.h>
#include "null.h"

-static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
-SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
- &null_bug_bypass, 0, "");
-
static int null_nresolve(struct vop_nresolve_args *ap);
static int null_ncreate(struct vop_ncreate_args *ap);
static int null_nmkdir(struct vop_nmkdir_args *ap);
+static int null_nmknod(struct vop_nmknod_args *ap);
+static int null_nlink(struct vop_nlink_args *ap);
+static int null_nsymlink(struct vop_nsymlink_args *ap);
+static int null_nwhiteout(struct vop_nwhiteout_args *ap);
static int null_nremove(struct vop_nremove_args *ap);
static int null_nrmdir(struct vop_nrmdir_args *ap);
static int null_nrename(struct vop_nrename_args *ap);

-static int null_revoke(struct vop_revoke_args *ap);
-static int null_access(struct vop_access_args *ap);
-static int null_createvobject(struct vop_createvobject_args *ap);
-static int null_destroyvobject(struct vop_destroyvobject_args *ap);
-static int null_getattr(struct vop_getattr_args *ap);
-static int null_getvobject(struct vop_getvobject_args *ap);
-static int null_inactive(struct vop_inactive_args *ap);
-static int null_islocked(struct vop_islocked_args *ap);
-static int null_lock(struct vop_lock_args *ap);
-static int null_lookup(struct vop_old_lookup_args *ap);
-static int null_open(struct vop_open_args *ap);
-static int null_print(struct vop_print_args *ap);
-static int null_reclaim(struct vop_reclaim_args *ap);
-static int null_rename(struct vop_old_rename_args *ap);
-static int null_setattr(struct vop_setattr_args *ap);
-static int null_unlock(struct vop_unlock_args *ap);
-
-/*
- * This is the 10-Apr-92 bypass routine.
- * This version has been optimized for speed, throwing away some
- * safety checks. It should still always work, but it's not as
- * robust to programmer errors.
- *
- * In general, we map all vnodes going down and unmap them on the way back.
- * As an exception to this, vnodes can be marked "unmapped" by setting
- * the Nth bit in operation's vdesc_flags.
- *
- * Also, some BSD vnode operations have the side effect of vrele'ing
- * their arguments. With stacking, the reference counts are held
- * by the upper node, not the lower one, so we must handle these
- * side-effects here. This is not of concern in Sun-derived systems
- * since there are no such side-effects.
- *
- * This makes the following assumptions:
- * - only one returned vpp
- * - no INOUT vpp's (Sun's vop_open has one of these)
- * - the vnode operation vector of the first vnode should be used
- * to determine what implementation of the op should be invoked
- * - all mapped vnodes are of our vnode-type (NEEDSWORK:
- * problems on rmdir'ing mount points and renaming?)
- *
- * null_bypass(struct vnodeop_desc *a_desc, ...)
- */
-int
-null_bypass(struct vop_generic_args *ap)
-{
- struct vnode **this_vp_p;
- int error;
- struct vnode *old_vps[VDESC_MAX_VPS];
- struct vnode **vps_p[VDESC_MAX_VPS];
- struct vnode ***vppp;
- struct vnodeop_desc *descp = ap->a_desc;
- int reles, i, j;
-
- if (null_bug_bypass)
- printf ("null_bypass: %s\n", descp->vdesc_name);
-
-#ifdef DIAGNOSTIC
- /*
- * We require at least one vp.
- */
- if (descp->vdesc_vp_offsets == NULL ||
- descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
- panic ("null_bypass: no vp's in map");
-#endif
-
- /*
- * Map the vnodes going in.
- */
- reles = descp->vdesc_flags;
- for (i = 0; i < VDESC_MAX_VPS; ++i) {
- if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
- break; /* bail out at end of list */
- vps_p[i] = this_vp_p =
- VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
- /*
- * We're not guaranteed that any but the first vnode
- * are of our type. Check for and don't map any
- * that aren't. (We must always map first vp or vclean fails.)
- */
- if (i && (*this_vp_p == NULLVP ||
- (*this_vp_p)->v_tag != VT_NULL)) {
- old_vps[i] = NULLVP;
- } else {
- old_vps[i] = *this_vp_p;
- *this_vp_p = NULLVPTOLOWERVP(*this_vp_p);
- /*
- * Several operations have the side effect of vrele'ing
- * their vp's. We must account for that in the lower
- * vp we pass down.
- */
- if (reles & (VDESC_VP0_WILLRELE << i))
- vref(*this_vp_p);
- }
-
- }
-
- /*
- * Call the operation on the lower layer with the modified
- * argument structure. We have to adjust a_fm to point to the
- * lower vp's vop_ops structure.
- */
- if (vps_p[0] && *vps_p[0]) {
- ap->a_ops = *(*(vps_p[0]))->v_ops;
- error = vop_vnoperate_ap(ap);
- } else {
- printf("null_bypass: no map for %s\n", descp->vdesc_name);
- error = EINVAL;
- }
-
- /*
- * Maintain the illusion of call-by-value by restoring vnodes in the
- * argument structure to their original value.
- */
- reles = descp->vdesc_flags;
- for (i = 0; i < VDESC_MAX_VPS; ++i) {
- if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
- break; /* bail out at end of list */
- if (old_vps[i]) {
- *(vps_p[i]) = old_vps[i];
-
- /*
- * Since we operated on the lowervp's instead of the
- * null node vp's, we have to adjust the null node
- * vp's based on what the VOP did to the lower vp.
- *
- * Note: the unlock case only occurs with rename.
- * tdvp and tvp are both locked on call and must be
- * unlocked on return.
- *
- * Unlock semantics indicate that if two locked vp's
- * are passed and they are the same vp, they are only
- * actually locked once.
- */
- if (reles & (VDESC_VP0_WILLUNLOCK << i)) {
- VOP_UNLOCK(old_vps[i], LK_THISLAYER, curthread);
- for (j = i + 1; j < VDESC_MAX_VPS; ++j) {
- if (descp->vdesc_vp_offsets[j] == VDESC_NO_OFFSET)
- break;
- if (old_vps[i] == old_vps[j]) {
- reles &= ~(1 << (VDESC_VP0_WILLUNLOCK << j));
- }
- }
- }
-
- if (reles & (VDESC_VP0_WILLRELE << i))
- vrele(old_vps[i]);
- }
- }
-
- /*
- * Map the possible out-going vpp
- * (Assumes that the lower layer always returns
- * a vref'ed vpp unless it gets an error.)
- */
- if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
- !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
- !error) {
- /*
- * XXX - even though some ops have vpp returned vp's,
- * several ops actually vrele this before returning.
- * We must avoid these ops.
- * (This should go away when these ops are regularized.)
- */
- if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
- goto out;
- vppp = VOPARG_OFFSETTO(struct vnode***,
- descp->vdesc_vpp_offset,ap);
- if (*vppp)
- error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
- }
-
- out:
- return (error);
-}
-
-/*
- * We have to carry on the locking protocol on the null layer vnodes
- * as we progress through the tree. We also have to enforce read-only
- * if this layer is mounted read-only.
- *
- * null_lookup(struct vnode *a_dvp, struct vnode **a_vpp,
- * struct componentname *a_cnp)
- */
-static int
-null_lookup(struct vop_old_lookup_args *ap)
-{
- struct componentname *cnp = ap->a_cnp;
- struct vnode *dvp = ap->a_dvp;
- struct thread *td = cnp->cn_td;
- int flags = cnp->cn_flags;
- struct vnode *vp, *ldvp, *lvp;
- int error;
-
- if ((dvp->v_mount->mnt_flag & MNT_RDONLY) &&
- (cnp->cn_nameiop == NAMEI_DELETE ||
- cnp->cn_nameiop == NAMEI_RENAME)) {
- return (EROFS);
- }
- ldvp = NULLVPTOLOWERVP(dvp);
-
- /*
- * If we are doing a ".." lookup we must release the lock on dvp
- * now, before we run a lookup in the underlying fs, or we may
- * deadlock. If we do this we must protect ldvp by ref'ing it.
- */
- if (flags & CNP_ISDOTDOT) {
- vref(ldvp);
- VOP_UNLOCK(dvp, LK_THISLAYER, td);
- }
-
- /*
- * Due to the non-deterministic nature of the handling of the
- * parent directory lock by lookup, we cannot call null_bypass()
- * here. We must make a direct call. It's faster to do a direct
- * call, anyway.
- */
- vp = lvp = NULL;
- error = VOP_LOOKUP(ldvp, &lvp, cnp);
- if (error == EJUSTRETURN &&
- (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
- (cnp->cn_nameiop == NAMEI_CREATE ||
- cnp->cn_nameiop == NAMEI_RENAME)) {
- error = EROFS;
- }
-
- if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) {
- if (ldvp == lvp) {
- *ap->a_vpp = dvp;
- vref(dvp);
- vrele(lvp);
- } else {
- error = null_node_create(dvp->v_mount, lvp, &vp);
- if (error == 0)
- *ap->a_vpp = vp;
- }
- }
-
- /*
- * The underlying fs will set PDIRUNLOCK if it unlocked the parent
- * directory, which means we have to follow suit in the nullfs layer.
- * Note that the parent directory may have already been unlocked due
- * to the ".." case. Note that use of cnp->cn_flags instead of flags.
- */
- if (flags & CNP_ISDOTDOT) {
- if ((cnp->cn_flags & CNP_PDIRUNLOCK) == 0)
- VOP_LOCK(dvp, LK_THISLAYER | LK_EXCLUSIVE, td);
- vrele(ldvp);
- } else if (cnp->cn_flags & CNP_PDIRUNLOCK) {
- VOP_UNLOCK(dvp, LK_THISLAYER, td);
- }
- return (error);
-}
-
-/*
- * Setattr call. Disallow write attempts if the layer is mounted read-only.
- *
- * null_setattr(struct vnodeop_desc *a_desc, struct vnode *a_vp,
- * struct vattr *a_vap, struct ucred *a_cred,
- * struct thread *a_td)
- */
-int
-null_setattr(struct vop_setattr_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct vattr *vap = ap->a_vap;
-
- if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
- vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
- vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
- (vp->v_mount->mnt_flag & MNT_RDONLY))
- return (EROFS);
- if (vap->va_size != VNOVAL) {
- switch (vp->v_type) {
- case VDIR:
- return (EISDIR);
- case VCHR:
- case VBLK:
- case VSOCK:
- case VFIFO:
- if (vap->va_flags != VNOVAL)
- return (EOPNOTSUPP);
- return (0);
- case VREG:
- case VLNK:
- default:
- /*
- * Disallow write attempts if the filesystem is
- * mounted read-only.
- */
- if (vp->v_mount->mnt_flag & MNT_RDONLY)
- return (EROFS);
- }
- }
-
- return (null_bypass(&ap->a_head));
-}
-
-/*
- * We handle getattr only to change the fsid.
- *
- * null_getattr(struct vnode *a_vp, struct vattr *a_vap, struct ucred *a_cred,
- * struct thread *a_td)
- */
-static int
-null_getattr(struct vop_getattr_args *ap)
-{
- int error;
-
- if ((error = null_bypass(&ap->a_head)) != 0)
- return (error);
-
- ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
- return (0);
-}
-
-/*
- * Resolve a locked ncp at the nullfs layer.
- */
static int
null_nresolve(struct vop_nresolve_args *ap)
{
- return(vop_compat_nresolve(ap));
-}
-
-/*
- * Create a file
- */
+ ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+ return vop_nresolve_ap(ap);
+}
+
static int
null_ncreate(struct vop_ncreate_args *ap)
{
- return(vop_compat_ncreate(ap));
+ ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+ return vop_ncreate_ap(ap);
}

static int
null_nmkdir(struct vop_nmkdir_args *ap)
{
- return(vop_compat_nmkdir(ap));
+ ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+ return vop_nmkdir_ap(ap);
+}
+
+static int
+null_nmknod(struct vop_nmknod_args *ap)
+{
+ ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+ return vop_nmknod_ap(ap);
+}
+
+static int
+null_nlink(struct vop_nlink_args *ap)
+{
+ ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+ return vop_nlink_ap(ap);
+}
+
+static int
+null_nsymlink(struct vop_nsymlink_args *ap)
+{
+ ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+ return vop_nsymlink_ap(ap);
+}
+
+static int
+null_nwhiteout(struct vop_nwhiteout_args *ap)
+{
+ ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+ return vop_nwhiteout_ap(ap);
}

static int
null_nremove(struct vop_nremove_args *ap)
{
- return(vop_compat_nremove(ap));
+ ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+ return vop_nremove_ap(ap);
}

static int
null_nrmdir(struct vop_nrmdir_args *ap)
{
- return(vop_compat_nrmdir(ap));
+ ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+ return vop_nrmdir_ap(ap);
}

static int
null_nrename(struct vop_nrename_args *ap)
{
- return(vop_compat_nrename(ap));
-}
-
-/*
- * revoke is VX locked, we can't go through null_bypass
- */
-static int
-null_revoke(struct vop_revoke_args *ap)
-{
- struct null_node *np;
- struct vnode *lvp;
-
- np = VTONULL(ap->a_vp);
- vx_unlock(ap->a_vp);
- if ((lvp = np->null_lowervp) != NULL) {
- vx_get(lvp);
- VOP_REVOKE(lvp, ap->a_flags);
- vx_put(lvp);
- }
- vx_lock(ap->a_vp);
- vgone(ap->a_vp);
- return(0);
-}
-
-/*
- * Handle to disallow write access if mounted read-only.
- *
- * null_access(struct vnode *a_vp, int a_mode, struct ucred *a_cred,
- * struct thread *a_td)
- */
-static int
-null_access(struct vop_access_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- mode_t mode = ap->a_mode;
-
- /*
- * Disallow write attempts on read-only layers;
- * unless the file is a socket, fifo, or a block or
- * character device resident on the file system.
- */
- if (mode & VWRITE) {
- switch (vp->v_type) {
- case VDIR:
- case VLNK:
- case VREG:
- if (vp->v_mount->mnt_flag & MNT_RDONLY)
- return (EROFS);
- break;
- default:
- break;
- }
- }
- return (null_bypass(&ap->a_head));
-}
-
-/*
- * We must handle open to be able to catch MNT_NODEV and friends.
- *
- * null_open(struct vnode *a_vp, int a_mode, struct ucred *a_cred,
- * struct thread *a_td)
- */
-static int
-null_open(struct vop_open_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp);
-
- if ((vp->v_mount->mnt_flag & MNT_NODEV) &&
- (lvp->v_type == VBLK || lvp->v_type == VCHR))
- return ENXIO;
-
- return (null_bypass(&ap->a_head));
-}
-
-/*
- * We handle this to eliminate null FS to lower FS
- * file moving. Don't know why we don't allow this,
- * possibly we should.
- *
- * null_rename(struct vnode *a_fdvp, struct vnode *a_fvp,
- * struct componentname *a_fcnp, struct vnode *a_tdvp,
- * struct vnode *a_tvp, struct componentname *a_tcnp)
- */
-static int
-null_rename(struct vop_old_rename_args *ap)
-{
- struct vnode *tdvp = ap->a_tdvp;
- struct vnode *fvp = ap->a_fvp;
- struct vnode *fdvp = ap->a_fdvp;
- struct vnode *tvp = ap->a_tvp;
-
- /* Check for cross-device rename. */
- if ((fvp->v_mount != tdvp->v_mount) ||
- (tvp && (fvp->v_mount != tvp->v_mount))) {
- if (tdvp == tvp)
- vrele(tdvp);
- else
- vput(tdvp);
- if (tvp)
- vput(tvp);
- vrele(fdvp);
- vrele(fvp);
- return (EXDEV);
- }
-
- return (null_bypass(&ap->a_head));
-}
-
-/*
- * A special flag, LK_THISLAYER, causes the locking function to operate
- * ONLY on the nullfs layer. Otherwise we are responsible for locking not
- * only our layer, but the lower layer as well.
- *
- * null_lock(struct vnode *a_vp, int a_flags, struct thread *a_td)
- */
-static int
-null_lock(struct vop_lock_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- int flags = ap->a_flags;
- struct null_node *np = VTONULL(vp);
- struct vnode *lvp;
- int error;
-
- /*
- * Lock the nullfs layer first, disposing of the interlock in the
- * process.
- */
- KKASSERT((flags & LK_INTERLOCK) == 0);
- error = lockmgr(&vp->v_lock, flags & ~LK_THISLAYER,
- NULL, ap->a_td);
-
- /*
- * If locking only the nullfs layer, or if there is no lower layer,
- * or if an error occured while attempting to lock the nullfs layer,
- * we are done.
- *
- * np can be NULL is the vnode is being recycled from a previous
- * hash collision.
- */
- if ((flags & LK_THISLAYER) || np == NULL ||
- np->null_lowervp == NULL || error) {
- return (error);
- }
-
- /*
- * Lock the underlying vnode. If we are draining we should not drain
- * the underlying vnode, since it is not being destroyed, but we do
- * lock it exclusively in that case. Note that any interlocks have
- * already been disposed of above.
- */
- lvp = np->null_lowervp;
- if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
- NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n");
- error = vn_lock(lvp, (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE,
- ap->a_td);
- } else {
- error = vn_lock(lvp, flags, ap->a_td);
- }
-
- /*
- * If an error occured we have to undo our nullfs lock, then return
- * the original error.
- */
- if (error)
- lockmgr(&vp->v_lock, LK_RELEASE, NULL, ap->a_td);
- return(error);
-}
-
-/*
- * A special flag, LK_THISLAYER, causes the unlocking function to operate
- * ONLY on the nullfs layer. Otherwise we are responsible for unlocking not
- * only our layer, but the lower layer as well.
- *
- * null_unlock(struct vnode *a_vp, int a_flags, struct thread *a_td)
- */
-static int
-null_unlock(struct vop_unlock_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- int flags = ap->a_flags;
- struct null_node *np = VTONULL(vp);
- struct vnode *lvp;
- int error;
-
- KKASSERT((flags & LK_INTERLOCK) == 0);
- /*
- * nullfs layer only
- */
- if (flags & LK_THISLAYER) {
- error = lockmgr(&vp->v_lock,
- (flags & ~LK_THISLAYER) | LK_RELEASE,
- NULL, ap->a_td);
- return (error);
- }
-
- /*
- * If there is no underlying vnode the lock operation occurs at
- * the nullfs layer. np can be NULL is the vnode is being recycled
- * from a previous hash collision.
- */
- if (np == NULL || (lvp = np->null_lowervp) == NULL) {
- error = lockmgr(&vp->v_lock, flags | LK_RELEASE,
- NULL, ap->a_td);
- return(error);
- }
-
- /*
- * Unlock the lower layer first, then our nullfs layer.
- */
- VOP_UNLOCK(lvp, flags, ap->a_td);
- error = lockmgr(&vp->v_lock, flags | LK_RELEASE, NULL, ap->a_td);
- return (error);
-}
-
-/*
- * null_islocked(struct vnode *a_vp, struct thread *a_td)
- *
- * If a lower layer exists return the lock status of the lower layer,
- * otherwise return the lock status of our nullfs layer.
- */
-static int
-null_islocked(struct vop_islocked_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct vnode *lvp;
- struct null_node *np = VTONULL(vp);
- int error;
-
- lvp = np->null_lowervp;
- if (lvp == NULL)
- error = lockstatus(&vp->v_lock, ap->a_td);
- else
- error = VOP_ISLOCKED(lvp, ap->a_td);
- return (error);
-}
-
-
-/*
- * The vnode is no longer active. However, the new VFS API may retain
- * the node in the vfs cache. There is no way to tell that someone issued
- * a remove/rmdir operation on the underlying filesystem (yet), but we can't
- * remove the lowervp reference here.
- *
- * null_inactive(struct vnode *a_vp, struct thread *a_td)
- */
-static int
-null_inactive(struct vop_inactive_args *ap)
-{
- /*struct vnode *vp = ap->a_vp;*/
- /*struct null_node *np = VTONULL(vp);*/
-
- /*
- * At the moment don't do anything here. All the rest of the code
- * assumes that lowervp will remain inact, and the inactive nullvp
- * may be reactivated at any time. XXX I'm not sure why the 4.x code
- * even worked.
- */
-
- /*
- * Now it is safe to release our nullfs layer vnode.
- */
- return (0);
-}
-
-/*
- * We can free memory in null_inactive, but we do this
- * here. (Possible to guard vp->v_data to point somewhere)
- *
- * null_reclaim(struct vnode *a_vp, struct thread *a_td)
- */
-static int
-null_reclaim(struct vop_reclaim_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct vnode *lowervp;
- struct null_node *np;
-
- np = VTONULL(vp);
- vp->v_data = NULL;
- /*
- * null_lowervp reference to lowervp. The lower vnode's
- * inactive routine may or may not be called when we do the
- * final vrele().
- */
- if (np) {
- null_node_rem(np);
- lowervp = np->null_lowervp;
- np->null_lowervp = NULLVP;
- if (lowervp)
- vrele(lowervp);
- free(np, M_NULLFSNODE);
- }
- return (0);
-}
-
-/*
- * null_print(struct vnode *a_vp)
- */
-static int
-null_print(struct vop_print_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct null_node *np = VTONULL(vp);
-
- if (np == NULL) {
- printf ("\ttag VT_NULLFS, vp=%p, NULL v_data!\n", vp);
- return(0);
- }
- printf ("\ttag VT_NULLFS, vp=%p, lowervp=%p\n", vp, np->null_lowervp);
- if (np->null_lowervp != NULL) {
- printf("\tlowervp_lock: ");
- lockmgr_printinfo(&np->null_lowervp->v_lock);
- } else {
- printf("\tnull_lock: ");
- lockmgr_printinfo(&vp->v_lock);
- }
- printf("\n");
- return (0);
-}
-
-/*
- * Let an underlying filesystem do the work
- *
- * null_createvobject(struct vnode *vp, struct ucred *cred, struct proc *p)
- */
-static int
-null_createvobject(struct vop_createvobject_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct vnode *lowervp = VTONULL(vp) ? NULLVPTOLOWERVP(vp) : NULL;
- int error;
-
- if (vp->v_type == VNON || lowervp == NULL)
- return 0;
- error = VOP_CREATEVOBJECT(lowervp, ap->a_td);
- if (error)
- return (error);
- vp->v_flag |= VOBJBUF;
- return (0);
-}
-
-/*
- * We have nothing to destroy and this operation shouldn't be bypassed.
- *
- * null_destroyvobject(struct vnode *vp)
- */
-static int
-null_destroyvobject(struct vop_destroyvobject_args *ap)
-{
- struct vnode *vp = ap->a_vp;
-
- vp->v_flag &= ~VOBJBUF;
- return (0);
-}
-
-/*
- * null_getvobject(struct vnode *vp, struct vm_object **objpp)
- *
- * Note that this can be called when a vnode is being recycled, and
- * v_data may be NULL in that case if nullfs had to recycle a vnode
- * due to a null_node collision.
- */
-static int
-null_getvobject(struct vop_getvobject_args *ap)
-{
- struct vnode *lvp;
-
- if (ap->a_vp->v_data == NULL)
- return EINVAL;
-
- lvp = NULLVPTOLOWERVP(ap->a_vp);
- if (lvp == NULL)
- return EINVAL;
- return (VOP_GETVOBJECT(lvp, ap->a_objpp));
+ struct mount *lmp;
+
+ lmp = MOUNTTONULLMOUNT(ap->a_fncp->nc_mount)->nullm_vfs;
+ if (lmp != MOUNTTONULLMOUNT(ap->a_tncp->nc_mount)->nullm_vfs)
+ return (EINVAL);
+
+ ap->a_head.a_ops = lmp->mnt_vn_norm_ops;
+
+ return vop_nrename_ap(ap);
}

/*
* Global vfs data structures
*/
struct vnodeopv_entry_desc null_vnodeop_entries[] = {
- { &vop_default_desc, (vnodeopv_entry_t) null_bypass },
- { &vop_access_desc, (vnodeopv_entry_t) null_access },
- { &vop_createvobject_desc, (vnodeopv_entry_t) null_createvobject },
- { &vop_destroyvobject_desc, (vnodeopv_entry_t) null_destroyvobject },
- { &vop_getattr_desc, (vnodeopv_entry_t) null_getattr },
- { &vop_getvobject_desc, (vnodeopv_entry_t) null_getvobject },
- { &vop_inactive_desc, (vnodeopv_entry_t) null_inactive },
- { &vop_islocked_desc, (vnodeopv_entry_t) null_islocked },
- { &vop_lock_desc, (vnodeopv_entry_t) null_lock },
- { &vop_old_lookup_desc, (vnodeopv_entry_t) null_lookup },
- { &vop_open_desc, (vnodeopv_entry_t) null_open },
- { &vop_print_desc, (vnodeopv_entry_t) null_print },
- { &vop_reclaim_desc, (vnodeopv_entry_t) null_reclaim },
- { &vop_old_rename_desc, (vnodeopv_entry_t) null_rename },
- { &vop_setattr_desc, (vnodeopv_entry_t) null_setattr },
- { &vop_unlock_desc, (vnodeopv_entry_t) null_unlock },
- { &vop_revoke_desc, (vnodeopv_entry_t) null_revoke },
-
{ &vop_nresolve_desc, (vnodeopv_entry_t) null_nresolve },
{ &vop_ncreate_desc, (vnodeopv_entry_t) null_ncreate },
{ &vop_nmkdir_desc, (vnodeopv_entry_t) null_nmkdir },
+ { &vop_nmknod_desc, (vnodeopv_entry_t) null_nmknod },
+ { &vop_nlink_desc, (vnodeopv_entry_t) null_nlink },
+ { &vop_nsymlink_desc, (vnodeopv_entry_t) null_nsymlink },
+ { &vop_nwhiteout_desc, (vnodeopv_entry_t) null_nwhiteout },
{ &vop_nremove_desc, (vnodeopv_entry_t) null_nremove },
{ &vop_nrmdir_desc, (vnodeopv_entry_t) null_nrmdir },
{ &vop_nrename_desc, (vnodeopv_entry_t) null_nrename },
diff -r 758f5a725024 -r e65b90b999c3 sys/vfs/nullfs/null_subr.c
--- a/sys/vfs/nullfs/null_subr.c Mon Jan 2 11:42:05 2006 +0000
+++ /dev/null Thu Jan 1 00:00:00 1970 +0000
@@ -1,389 +0,0 @@
-/*
- * Copyright (c) 1992, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software donated to Berkeley by
- * Jan-Simon Pendry.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)null_subr.c 8.7 (Berkeley) 5/14/95
- *
- * $FreeBSD: src/sys/miscfs/nullfs/null_subr.c,v 1.21.2.4 2001/06/26 04:20:09 bp Exp $
- * $DragonFly: src/sys/vfs/nullfs/null_subr.c,v 1.17 2004/12/17 00:18:30 dillon Exp $
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/proc.h>
-#include <sys/vnode.h>
-#include <sys/mount.h>
-#include <sys/malloc.h>
-#include "null.h"
-
-#define LOG2_SIZEVNODE 7 /* log2(sizeof struct vnode) */
-
-/*
- * Null layer cache:
- * Each cache entry holds a reference to the lower vnode
- * along with a pointer to the alias vnode. When an
- * entry is added the lower vnode is vref'd. When the
- * alias is removed the lower vnode is vrele'd.
- */
-
-#define NULL_NHASH(vp) \
- (&null_node_hashtbl[(((uintptr_t)vp)>>LOG2_SIZEVNODE) & null_node_hash])
-
-static struct null_node **null_node_hashtbl;
-static u_long null_node_hash;
-static struct lwkt_token null_ihash_token;
-
-static MALLOC_DEFINE(M_NULLFSHASH, "NULLFS hash", "NULLFS hash table");
-MALLOC_DEFINE(M_NULLFSNODE, "NULLFS node", "NULLFS vnode private part");
-
-static int null_node_alloc(struct mount *mp, struct vnode *lowervp,
- struct vnode **vpp);
-static struct vnode *
- null_node_find(struct mount *mp, struct vnode *lowervp);
-
-/*
- * Initialise cache headers
- */
-int
-nullfs_init(struct vfsconf *vfsp)
-{
- NULLFSDEBUG("nullfs_init\n"); /* printed during system boot */
- null_node_hash = 16;
- while (null_node_hash < desiredvnodes)
- null_node_hash <<= 1;
- null_node_hashtbl = malloc(sizeof(void *) * null_node_hash,
- M_NULLFSHASH, M_WAITOK|M_ZERO);
- --null_node_hash;
- lwkt_token_init(&null_ihash_token);
- return (0);
-}
-
-int
-nullfs_uninit(struct vfsconf *vfsp)
-{
- if (null_node_hashtbl) {
- free(null_node_hashtbl, M_NULLFSHASH);
- null_node_hashtbl = NULL;
- }
- return (0);
-}
-
-/*
- * Return a vref'ed alias for lower vnode if already exists, else 0.
- * Lower vnode should be locked (but with no additional refs) on entry
- * and will be unlocked on return if the search was successful, and left
- * locked if the search was not successful.
- */
-static struct vnode *
-null_node_find(struct mount *mp, struct vnode *lowervp)
-{
- struct thread *td = curthread; /* XXX */
- struct null_node *np;
- struct null_node *xp;
- struct vnode *vp;
- lwkt_tokref ilock;
-
- lwkt_gettoken(&ilock, &null_ihash_token);
-loop:
- for (np = *NULL_NHASH(lowervp); np; np = np->null_next) {
- if (np->null_lowervp == lowervp && NULLTOV(np)->v_mount == mp) {
- vp = NULLTOV(np);
- if (vget(vp, LK_EXCLUSIVE | LK_CANRECURSE, td)) {
- printf ("null_node_find: vget failed.\n");
- goto loop;
- }
-
- /*
- * vget() might have blocked, we have to check that
- * our vnode is still valid.
- */
- xp = *NULL_NHASH(lowervp);
- while (xp) {
- if (xp == np && xp->null_lowervp == lowervp &&
- NULLTOV(xp) == vp &&
- NULLTOV(xp)->v_mount == mp) {
- break;
- }
- xp = xp->null_next;
- }
- if (xp == NULL) {
- printf ("null_node_find: node race, retry.\n");
- vput(vp);
- goto loop;
- }
- /*
- * SUCCESS! Returned the locked and referenced vp
- * and release the lock on lowervp.
- */
- VOP_UNLOCK(lowervp, 0, td);
- lwkt_reltoken(&ilock);
- return (vp);
- }
- }
-
- /*
- * Failure, leave lowervp locked on return.
- */
- lwkt_reltoken(&ilock);
- return(NULL);
-}
-
-int
-null_node_add(struct null_node *np)
-{
- struct null_node **npp;
- struct null_node *n2;
- lwkt_tokref ilock;
-
- lwkt_gettoken(&ilock, &null_ihash_token);
- npp = NULL_NHASH(np->null_lowervp);
- while ((n2 = *npp) != NULL) {
- if (n2->null_lowervp == np->null_lowervp &&
- n2->null_vnode->v_mount == np->null_vnode->v_mount) {
- lwkt_reltoken(&ilock);
- return(EBUSY);
- }
- npp = &n2->null_next;
- }
- np->null_next = NULL;
- *npp = np;
- lwkt_reltoken(&ilock);
- return(0);
-}
-
-void
-null_node_rem(struct null_node *np)
-{
- struct null_node **npp;
- struct null_node *n2;
- lwkt_tokref ilock;
-
- lwkt_gettoken(&ilock, &null_ihash_token);
- npp = NULL_NHASH(np->null_lowervp);
- while ((n2 = *npp) != NULL) {
- if (n2 == np)
- break;
- npp = &n2->null_next;
- }
- KKASSERT(np == n2);
- *npp = np->null_next;
- np->null_next = NULL;
- lwkt_reltoken(&ilock);
-}
-
-/*
- * Make a new null_node node. vp is the null mount vnode, lowervp is the
- * lower vnode. Maintain a reference to (lowervp). lowervp must be
- * locked on call.
- */
-static int
-null_node_alloc(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
-{
- struct null_node *np;
- struct thread *td;
- struct vnode *vp;
- int error;
-
- td = curthread;
-retry:
- /*
- * If we have already hashed the vp we can just return it.
- */
- *vpp = null_node_find(mp, lowervp);
- if (*vpp)
- return 0;
-
- /*
- * lowervp is locked but not referenced at this point.
- */
- MALLOC(np, struct null_node *, sizeof(struct null_node),
- M_NULLFSNODE, M_WAITOK);
-
- error = getnewvnode(VT_NULL, mp, vpp, 0, LK_CANRECURSE);
- if (error) {
- FREE(np, M_NULLFSNODE);
- return (error);
- }
- vp = *vpp;
-
- /*
- * Set up the np/vp relationship and set the lower vnode.
- *
- * XXX:
- * When nullfs encounters sockets or device nodes, it
- * has a hard time working with the normal vp union, probably
- * because the device has not yet been opened. Needs investigation.
- */
- vp->v_type = lowervp->v_type;
- if (vp->v_type == VCHR || vp->v_type == VBLK)
- addaliasu(vp, lowervp->v_udev);
- else
- vp->v_un = lowervp->v_un; /* XXX why this assignment? */
- np->null_vnode = vp;
- np->null_lowervp = lowervp;
-
- /*
- * Our new vnode is already VX locked (which is effective
- * LK_THISLAYER, which is what we want).
- */
-
- /*
- * Try to add our new node to the hash table. If a collision
- * occurs someone else beat us to it and we need to destroy the
- * vnode and retry.
- */
- if (null_node_add(np) != 0) {
- free(np, M_NULLFSNODE);
- vput(vp);
- goto retry;
- }
-
- /*
- * Finish up. Link the vnode and null_node together, ref lowervp
- * for the null node. lowervp is already locked so the lock state
- * is already properly synchronized.
- *
- * Set the vnode up to reclaim as quickly as possible
- */
- vp->v_data = np;
- vp->v_flag |= VAGE;
- vref(lowervp);
- return (0);
-}
-
-
-/*
- * Try to find an existing null_node vnode refering to the given underlying
- * vnode (which should be locked and referenced). If no vnode found, create
- * a new null_node vnode which contains a reference to the lower vnode.
- */
-int
-null_node_create(struct mount *mp, struct vnode *lowervp, struct vnode **newvpp)
-{
- struct vnode *aliasvp;
-
- aliasvp = null_node_find(mp, lowervp);
- if (aliasvp) {
- /*
- * null_node_find() has unlocked lowervp for us, so we just
- * have to get rid of the reference.
- */
- vrele(lowervp);
-#ifdef NULLFS_DEBUG
- vprint("null_node_create: exists", aliasvp);
-#endif
- } else {
- int error;
-
- /*
- * Get new vnode. Note that lowervp is locked and referenced
- * at this point (as it was passed to us).
- */
- NULLFSDEBUG("null_node_create: create new alias vnode\n");
-
- /*
- * Make new vnode reference the null_node.
- */
- error = null_node_alloc(mp, lowervp, &aliasvp);
- vrele(lowervp);
- if (error)
- return error;
-
- /*
- * aliasvp is already locked and ref'd by getnewvnode()
- */
- }
-
-#ifdef DIAGNOSTIC
- if (lowervp->v_usecount < 1) {
- /* Should never happen... */
- vprint ("null_node_create: alias ", aliasvp);
- vprint ("null_node_create: lower ", lowervp);
- panic ("null_node_create: lower has 0 usecount.");
- };
-#endif
-
-#ifdef NULLFS_DEBUG
- vprint("null_node_create: alias", aliasvp);
- vprint("null_node_create: lower", lowervp);
-#endif
-
- *newvpp = aliasvp;
- return (0);
-}
-
-#ifdef DIAGNOSTIC
-#include "opt_ddb.h"
-
-#ifdef DDB
-#define null_checkvp_barrier 1
-#else
-#define null_checkvp_barrier 0
-#endif
-
-struct vnode *
-null_checkvp(struct vnode *vp, char *fil, int lno)
-{
- struct null_node *a = VTONULL(vp);
- if (a->null_lowervp == NULLVP) {
- /* Should never happen */
- int i; u_long *p;
- printf("vp = %p, ZERO ptr\n", (void *)vp);
- for (p = (u_long *) a, i = 0; i < 8; i++)
- printf(" %lx", p[i]);
- printf("\n");
- /* wait for debugger */
- while (null_checkvp_barrier) /*WAIT*/ ;
- panic("null_checkvp");
- }
- if (a->null_lowervp->v_usecount < 1) {
- int i; u_long *p;
- printf("vp = %p, unref'ed lowervp\n", (void *)vp);
- for (p = (u_long *) a, i = 0; i < 8; i++)
- printf(" %lx", p[i]);
- printf("\n");
- /* wait for debugger */
- while (null_checkvp_barrier) /*WAIT*/ ;
- panic ("null with unref'ed lowervp");
- };
-#ifdef notyet
- printf("null %x/%d -> %x/%d [%s, %d]\n",
- NULLTOV(a), NULLTOV(a)->v_usecount,
- a->null_lowervp, a->null_lowervp->v_usecount,
- fil, lno);
-#endif
- return a->null_lowervp;
-}
-#endif

History

#1 Updated by corecode over 9 years ago

committed by dillon

Also available in: Atom PDF