Bug #36: nnull.diff (see comments on kernel@) - DragonFlyBSD - DragonFlyBSD bugtracker

Actions

Copy link

Bug #36

closed

nnull.diff (see comments on kernel@)

Added by csaba.henk over 20 years ago. Updated over 19 years ago.

Status:

Closed

Priority:

Low

Assignee:

Category:

Target version:

Start date:

Due date:

% Done:

Estimated time:

Description

HG changeset patch
User root@
Node ID e65b90b999c3e7f71539c8affa762d61d3a5fd17
Parent 758f5a725024e40c276e253651f9115aa4ba9fbe
patch queue: nnull

diff r 758f5a725024 -r e65b90b999c3 sys/vfs/nullfs/Makefile
-- a/sys/vfs/nullfs/Makefile Mon Jan 2 11:42:05 2006 0000
++ b/sys/vfs/nullfs/Makefile Mon Jan 2 12:21:05 2006 +0000
@ -2,7 +2,7 @ # $DragonFly: src/sys/vfs/nullfs/Makefile,v 1.4 2004/08/13 17:51:12 dillon Exp $

KMOD=    null
-SRCS=    null_subr.c null_vfsops.c null_vnops.c
+SRCS=    null_vfsops.c null_vnops.c
 NOMAN=

.include &lt;bsd.kmod.mk&gt;
diff r 758f5a725024 -r e65b90b999c3 sys/vfs/nullfs/null.h
-- a/sys/vfs/nullfs/null.h    Mon Jan  2 11:42:05 2006 0000
++ b/sys/vfs/nullfs/null.h    Mon Jan  2 12:21:05 2006 +0000
@ -49,36 +49,7 @
 };

#ifdef _KERNEL
/*
 * A cache of vnode references
- /
struct null_node {
    struct null_node    *null_next;    / Hash list /
-    struct vnode            *null_lowervp;    / vrefed once /
-    struct vnode        *null_vnode;    / Back pointer */
};

 #define    MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
#define    VTONULL(vp) ((struct null_node *)(vp)>v_data)
#define    NULLTOV(xp) ((xp)>null_vnode)

-int nullfs_init(struct vfsconf *vfsp);
-int nullfs_uninit(struct vfsconf *vfsp);
-int null_node_add(struct null_node *np);
-void null_node_rem(struct null_node *np);
-int null_node_create(struct mount *mp, struct vnode *target, struct vnode **vpp);
-int null_bypass(struct vop_generic_args *ap);

#ifdef DIAGNOSTIC
-struct vnode *null_checkvp(struct vnode *vp, char *fil, int lno);
#define    NULLVPTOLOWERVP(vp) null_checkvp((vp), FILE, LINE)
#else
#define    NULLVPTOLOWERVP(vp) (VTONULL(vp)->null_lowervp)
#endif

#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_NULLFSNODE);
#endif

#ifdef NULLFS_DEBUG
 #define NULLFSDEBUG(format, args...) printf(format ,## args)
diff r 758f5a725024 -r e65b90b999c3 sys/vfs/nullfs/null_vfsops.c
-- a/sys/vfs/nullfs/null_vfsops.c    Mon Jan  2 11:42:05 2006 0000
++ b/sys/vfs/nullfs/null_vfsops.c    Mon Jan  2 12:21:05 2006 +0000
@ -59,8 +59,6 @

static MALLOC_DEFINE(M_NULLFSMNT, "NULLFS mount", "NULLFS mount structure");

~~static int nullfs_fhtovp(struct mount *mp, struct fid *fidp,~~
struct vnode **vpp);
static int nullfs_checkexp(struct mount *mp, struct sockaddr *nam,
int *extflagsp, struct ucred **credanonp);
static int nullfs_mount(struct mount *mp, char *path, caddr_t data,
@ -71,8 +69,6 @
static int nullfs_statfs(struct mount *mp, struct statfs *sbp,
struct thread *td);
static int nullfs_unmount(struct mount *mp, int mntflags, struct thread *td);
-static int nullfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp);
-static int nullfs_vptofh(struct vnode *vp, struct fid *fhp);
static int nullfs_extattrctl(struct mount *mp, int cmd,
const char *attrname, caddr_t arg, struct thread *td);

@ -84,11 +80,9 @ {
int error = 0;
struct null_args args;
- struct vnode *lowerrootvp, *vp;
- struct vnode *nullm_rootvp;
+ struct vnode *rootvp;
struct null_mount *xmp;
u_int size;
- int isvnunlocked = 0;
struct nlookupdata nd;

NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp);
@ -108,44 +102,15 @
         return (error);

/*
-     * Unlock lower node to avoid deadlock.
-     * (XXX) VOP_ISLOCKED is needed?
-     /
-    if ((mp->mnt_vnodecovered->v_tag == VT_NULL) &&
-        VOP_ISLOCKED(mp->mnt_vnodecovered, NULL)) {
-        VOP_UNLOCK(mp->mnt_vnodecovered, 0, td);
-        isvnunlocked = 1;
-    }
-    /
 * Find lower node
      /
-    lowerrootvp = NULL;
+    rootvp = NULL;
     error = nlookup_init(&nd, args.target, UIO_USERSPACE, NLC_FOLLOW);
     if (error == 0)
         error = nlookup(&nd);
     if (error == 0) {
         error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, 
-                    &lowerrootvp);
-    }
-    nlookup_done(&nd);

    /
-     * Re-lock vnode.
-     /
-    if (isvnunlocked && !VOP_ISLOCKED(mp->mnt_vnodecovered, NULL))
-        vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY, td);
-    if (error)
-        return (error);
-        
-    /
-     * Sanity check on lower vnode
-     *
-     * Check multi null mount to avoid `lock against myself' panic.
-     */
-    if (lowerrootvp == VTONULL(mp->mnt_vnodecovered)->null_lowervp) {
-        NULLFSDEBUG("nullfs_mount: multi null mount?\n");
-        vput(lowerrootvp);
-        return (EDEADLK);
+                    &rootvp);
     }

xmp = (struct null_mount ) malloc(sizeof(struct null_mount),
@ -154,37 +119,29 @
     /
 * Save reference to underlying FS
      /
-    xmp->nullm_vfs = lowerrootvp->v_mount;
+        /
+         * As lite stacking enters the scene, the old way of doing this
+     * -- via the vnode -- is not good enough anymore...
+     */
+    xmp->nullm_vfs = nd.nl_ncp->nc_mount;
+    nlookup_done(&nd);

vfs_add_vnodeops(mp, &mp->mnt_vn_norm_ops, 
              null_vnodeop_entries, 0);

- /*
- * Save reference. Each mount also holds
- * a reference on the root vnode.
- /
- error = null_node_create(mp, lowerrootvp, &vp);
- /
- * Unlock the node (either the lower or the alias)
- /
- VOP_UNLOCK(vp, 0, td);
- /
- * Make sure the node alias worked
- /
- if (error) {
- vrele(lowerrootvp);
- free(xmp, M_NULLFSMNT); / XXX */
- return (error);
- }
+ VOP_UNLOCK(rootvp, 0, td);

/*
     * Keep a held reference to the root vnode.
     * It is vrele'd in nullfs_unmount.
      /
-    nullm_rootvp = vp;
-    nullm_rootvp->v_flag |= VROOT;
-    xmp->nullm_rootvp = nullm_rootvp;
-    if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+    xmp->nullm_rootvp = rootvp;
+    /
+     * XXX What's the proper safety condition for querying
+     * the underlying mount? Is this flag tuning necessary
+     * at all?
+     */
+    if (xmp->nullm_vfs->mnt_flag & MNT_LOCAL)
         mp->mnt_flag |= MNT_LOCAL;
     mp->mnt_data = (qaddr_t) xmp;
     vfs_getnewfsid(mp);
@ -205,18 +162,12 @
 nullfs_unmount(struct mount *mp, int mntflags, struct thread *td)
{
     void *mntdata;
-    int error;
     int flags = 0;

NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);

if (mntflags & MNT_FORCE)
         flags |= FORCECLOSE;

    /* There is 1 extra root vnode reference (nullm_rootvp). */
-    error = vflush(mp, 1, flags);
-    if (error)
-        return (error);

/*
 * Finally, throw away the null_mount structure
@ -233,9 +184,8 @
     struct thread td = curthread;    / XXX */
     struct vnode *vp;

- NULLFSDEBUG\n", (void *)mp,
- (void *)MOUNTTONULLMOUNT->nullm_rootvp,
- (void *)NULLVPTOLOWERVP->nullm_rootvp));
+ NULLFSDEBUG\n", (void *)mp,
+ (void *)MOUNTTONULLMOUNT->nullm_rootvp);

/*
 * Return locked reference to root.
@ -268,9 +218,8 @
     int error;
     struct statfs mstat;

- NULLFSDEBUG\n", (void *)mp,
- (void *)MOUNTTONULLMOUNT->nullm_rootvp,
- (void *)NULLVPTOLOWERVP->nullm_rootvp));
+ NULLFSDEBUG\n", (void *)mp,
+ (void *)MOUNTTONULLMOUNT->nullm_rootvp);

bzero(&mstat, sizeof(mstat));

@ -296,32 +245,12 @
}

static int
nullfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
{

    return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp);
}

static int
-nullfs_fhtovp(struct mount *mp, struct fid *fidp, struct vnode **vpp)
{

    return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, vpp);
}

-static int
 nullfs_checkexp(struct mount *mp, struct sockaddr *nam, int *extflagsp,
         struct ucred **credanonp)
{

return VFS_CHECKEXP(MOUNTTONULLMOUNT(mp)->nullm_vfs, nam, 
         extflagsp, credanonp);
}

static int
-nullfs_vptofh(struct vnode *vp, struct fid *fhp)
{
-    return VFS_VPTOFH(NULLVPTOLOWERVP(vp), fhp);
 }

static int                        
@ -340,12 +269,7 @
     .vfs_quotactl =       nullfs_quotactl,
     .vfs_statfs =        nullfs_statfs,
     .vfs_sync =         vfs_stdsync,
-    .vfs_vget =         nullfs_vget,
-    .vfs_fhtovp =       nullfs_fhtovp,
     .vfs_checkexp =      nullfs_checkexp,
-    .vfs_vptofh =       nullfs_vptofh,
-    .vfs_init =         nullfs_init,
-    .vfs_uninit =        nullfs_uninit,
     .vfs_extattrctl =      nullfs_extattrctl
 };

diff r 758f5a725024 -r e65b90b999c3 sys/vfs/nullfs/null_vnops.c
-- a/sys/vfs/nullfs/null_vnops.c Mon Jan 2 11:42:05 2006 0000
++ b/sys/vfs/nullfs/null_vnops.c Mon Jan 2 12:21:05 2006 +0000
@ -83,98 +83,22 @ * * The null layer is the minimum file system layer, * simply bypassing all possible operations to the lower layer
- * for processing there. The majority of its activity centers
- * on the bypass routine, through which nearly all vnode operations
- * pass.
- *
- * The bypass routine accepts arbitrary vnode operations for
- * handling by the lower layer. It begins by examing vnode
- * operation arguments and replacing any null-nodes by their
- * lower-layer equivlants. It then invokes the operation
- * on the lower layer. Finally, it replaces the null-nodes
- * in the arguments and, if a vnode is return by the operation,
- * stacks a null-node on top of the returned vnode.
- *
- * Although bypass handles most operations, vop_getattr, vop_lock,
- * vop_unlock, vop_inactive, vop_reclaim, and vop_print are not
- * bypassed. Vop_getattr must change the fsid being returned.
- * Vop_lock and vop_unlock must handle any locking for the
- * current vnode as well as pass the lock request down.
- * Vop_inactive and vop_reclaim are not bypassed so that
- * they can handle freeing null-layer specific data. Vop_print
- * is not bypassed to avoid excessive debugging information.
- * Also, certain vnode operations change the locking state within
- * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
- * and symlink). Ideally these operations should not change the
- * lock state, but should be changed to let the caller of the
- * function unlock them. Otherwise all intermediate vnode layers
- * (such as union, umapfs, etc) must catch these functions to do
- * the necessary locking at their layer.
- *
- *
- * INSTANTIATING VNODE STACKS
- *
- * Mounting associates the null layer with a lower layer,
- * effect stacking two VFSes. Vnode stacks are instead
- * created on demand as files are accessed.
- *
- * The initial mount creates a single vnode stack for the
- * root of the new null layer. All other vnode stacks
- * are created as a result of vnode operations on
- * this or other null vnode stacks.
- *
- * New vnode stacks come into existance as a result of
- * an operation which returns a vnode.
- * The bypass routine stacks a null-node above the new
- * vnode before returning it to the caller.
- *
- * For example, imagine mounting a null layer with
- * "mount_null /usr/include /dev/layer/null".
- * Changing directory to /dev/layer/null will assign
- * the root null-node (which was created when the null layer was mounted).
- * Now consider opening "sys". A vop_old_lookup would be
- * done on the root null-node. This operation would bypass through
- * to the lower layer which would return a vnode representing
- * the UFS "sys". Null_bypass then builds a null-node
- * aliasing the UFS "sys" and returns this to the caller.
- * Later operations on the null-node "sys" will repeat this
- * process when constructing other vnode stacks.
- *
- *
- * CREATING OTHER FILE SYSTEM LAYERS
- *
- * One of the easiest ways to construct new file system layers is to make
- * a copy of the null layer, rename all files and variables, and
- * then begin modifing the copy. Sed can be used to easily rename
- * all variables.
- *
- * The umap layer is an example of a layer descended from the
- * null layer.
- *
- *
- * INVOKING OPERATIONS ON LOWER LAYERS
- *
- * There are two techniques to invoke operations on a lower layer
- * when the operation cannot be completely bypassed. Each method
- * is appropriate in different situations. In both cases,
- * it is the responsibility of the aliasing layer to make
- * the operation arguments "correct" for the lower layer
- * by mapping an vnode arguments to the lower layer.
- *
- * The first approach is to call the aliasing layer's bypass routine.
- * This method is most suitable when you wish to invoke the operation
- * currently being handled on the lower layer. It has the advantage
- * that the bypass routine already must do argument mapping.
- * An example of this is null_getattrs in the null layer.
- *
- * A second approach is to directly invoke vnode operations on
- * the lower layer with the VOP_OPERATIONNAME interface.
- * The advantage of this method is that it is easy to invoke
- * arbitrary operations on the lower layer. The disadvantage
- * is that vnode arguments must be manualy mapped.
- *
+ * for processing there. The majority of its activity used to center
+ * on a so-called bypass routine, through which nullfs vnodes
+ * passed on operation to their underlying peer.
+ *
+ * However, with the current implementation nullfs doesn't have any private
+ * vnodes, it rather relies on DragonFly's namecache API. That gives a much
+ * more lightweight null layer, as namecache structures are pure data, with
+ * no private operations, so there is no need of subtle dispatching routines.
+ *
+ * Unlike the old code, this implementation is not a general skeleton overlay
+ * filesystem: to get more comprehensive overlaying, like that of umapfs, we
+ * will need vnode operation dispatch. Other overlay filesystems, like unionfs
+ * might be able to get on with a hybrid solution: overlay some vnodes, and rely
+ * on namecache API for the rest.
*/
-
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@ -187,775 +111,114 @
#include <sys/buf.h>
#include "null.h"

static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
-SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
&null_bug_bypass, 0, "");
-
static int null_nresolve(struct vop_nresolve_args *ap);
static int null_ncreate(struct vop_ncreate_args *ap);
static int null_nmkdir(struct vop_nmkdir_args *ap);
+static int null_nmknod(struct vop_nmknod_args *ap);
+static int null_nlink(struct vop_nlink_args *ap);
+static int null_nsymlink(struct vop_nsymlink_args *ap);
+static int null_nwhiteout(struct vop_nwhiteout_args *ap);
static int null_nremove(struct vop_nremove_args *ap);
static int null_nrmdir(struct vop_nrmdir_args *ap);
static int null_nrename(struct vop_nrename_args *ap);

static int null_revoke(struct vop_revoke_args ap);
-static int null_access(struct vop_access_args *ap);
-static int null_createvobject(struct vop_createvobject_args *ap);
-static int null_destroyvobject(struct vop_destroyvobject_args *ap);
-static int null_getattr(struct vop_getattr_args *ap);
-static int null_getvobject(struct vop_getvobject_args *ap);
-static int null_inactive(struct vop_inactive_args *ap);
-static int null_islocked(struct vop_islocked_args *ap);
-static int null_lock(struct vop_lock_args *ap);
-static int null_lookup(struct vop_old_lookup_args *ap);
-static int null_open(struct vop_open_args *ap);
-static int null_print(struct vop_print_args *ap);
-static int null_reclaim(struct vop_reclaim_args *ap);
-static int null_rename(struct vop_old_rename_args *ap);
-static int null_setattr(struct vop_setattr_args *ap);
-static int null_unlock(struct vop_unlock_args *ap);

/
* This is the 10-Apr-92 bypass routine.
- * This version has been optimized for speed, throwing away some
- * safety checks. It should still always work, but it's not as
- * robust to programmer errors.
-
- * In general, we map all vnodes going down and unmap them on the way back.
- * As an exception to this, vnodes can be marked "unmapped" by setting
- * the Nth bit in operation's vdesc_flags.
- *
- * Also, some BSD vnode operations have the side effect of vrele'ing
- * their arguments. With stacking, the reference counts are held
- * by the upper node, not the lower one, so we must handle these
- * side-effects here. This is not of concern in Sun-derived systems
- * since there are no such side-effects.
- *
- * This makes the following assumptions:
- * - only one returned vpp
- * - no INOUT vpp's (Sun's vop_open has one of these)
- * - the vnode operation vector of the first vnode should be used
- * to determine what implementation of the op should be invoked
- * - all mapped vnodes are of our vnode-type (NEEDSWORK:
- * problems on rmdir'ing mount points and renaming?)
- *
- * null_bypass(struct vnodeop_desc *a_desc, ...)
- */
int
-null_bypass(struct vop_generic_args *ap)
{
- struct vnode this_vp_p;
- int error;
- struct vnode *old_vps[VDESC_MAX_VPS];
- struct vnode **vps_p[VDESC_MAX_VPS];
- struct vnode **vppp;
- struct vnodeop_desc descp = ap->a_desc;
- int reles, i, j;

if (null_bug_bypass)
- printf ("null_bypass: s\n", descp->vdesc_name);

#ifdef DIAGNOSTIC
- /
- * We require at least one vp.
- /
- if (descp->vdesc_vp_offsets NULL ||
- descp->vdesc_vp_offsets[0] VDESC_NO_OFFSET)
- panic ("null_bypass: no vp's in map");
~~#endif~~

- /
- * Map the vnodes going in.
- /
- reles = descp->vdesc_flags;
- for (i = 0; i < VDESC_MAX_VPS; ++i) {
- if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
- break; / bail out at end of list /
- vps_p[i] = this_vp_p =
- VOPARG_OFFSETTO(struct vnode,descp->vdesc_vp_offsets[i],ap);
- /
- * We're not guaranteed that any but the first vnode
- * are of our type. Check for and don't map any
- * that aren't. (We must always map first vp or vclean fails.)
- /
- if (i &x%x (*this_vp_p == NULLVP ||
- (*this_vp_p)->v_tag != VT_NULL)) {
- old_vps[i] = NULLVP;
- } else {
- old_vps[i] = *this_vp_p;
- *this_vp_p = NULLVPTOLOWERVP;
- /
- * Several operations have the side effect of vrele'ing
- * their vp's. We must account for that in the lower
- * vp we pass down.
- /
- if (reles & (VDESC_VP0_WILLRELE << i))
- vref(*this_vp_p);
- }

}

/
- * Call the operation on the lower layer with the modified
- * argument structure. We have to adjust a_fm to point to the
- * lower vp's vop_ops structure.
- /
- if (vps_p⁰ && *vps_p⁰) {
- ap->a_ops = *((vps_p⁰))->v_ops;
- error = vop_vnoperate_ap(ap);
- } else {
- printf("null_bypass: no map for s\n", descp->vdesc_name);
- error = EINVAL;
- }

/*
- * Maintain the illusion of call-by-value by restoring vnodes in the
- * argument structure to their original value.
- /
- reles = descp->vdesc_flags;
- for (i = 0; i < VDESC_MAX_VPS; ++i) {
- if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
- break; / bail out at end of list /
- if (old_vps[i]) {
- *(vps_p[i]) = old_vps[i];

/
- * Since we operated on the lowervp's instead of the
- * null node vp's, we have to adjust the null node
- * vp's based on what the VOP did to the lower vp.
- *
- * Note: the unlock case only occurs with rename.
- * tdvp and tvp are both locked on call and must be
- * unlocked on return.
-
- * Unlock semantics indicate that if two locked vp's
- * are passed and they are the same vp, they are only
- * actually locked once.
- */
- if (reles x%x (VDESC_VP0_WILLUNLOCK << i)) {
- VOP_UNLOCK(old_vps[i], LK_THISLAYER, curthread);
- for (j = i + 1; j < VDESC_MAX_VPS; ++j) {
- if (descp->vdesc_vp_offsets[j] VDESC_NO_OFFSET)
- break;
- if (old_vps[i] old_vps[j]) {
- reles &= ~(1 << (VDESC_VP0_WILLUNLOCK << j));
- }
- }
- }

if (reles & (VDESC_VP0_WILLRELE << i))
- vrele(old_vps[i]);
- }
- }

/
- * Map the possible out-going vpp
- * (Assumes that the lower layer always returns
- * a vref'ed vpp unless it gets an error.)
- /
- if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
- !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
- !error) {
- /
- * XXX - even though some ops have vpp returned vp's,
- * several ops actually vrele this before returning.
- * We must avoid these ops.
- * (This should go away when these ops are regularized.)
- /
- if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
- goto out;
- vppp = VOPARG_OFFSETTO(struct vnode**,
- descp->vdesc_vpp_offset,ap);
- if (vppp)
- error = null_node_create(old_vps⁰->v_mount, **vppp, *vppp);
- }

out:
- return (error);
}

/
* We have to carry on the locking protocol on the null layer vnodes
- * as we progress through the tree. We also have to enforce read-only
- * if this layer is mounted read-only.
-
- * null_lookup(struct vnode *a_dvp, struct vnode **a_vpp,
- * struct componentname *a_cnp)
- */
static int
-null_lookup(struct vop_old_lookup_args *ap)
{
- struct componentname *cnp = ap->a_cnp;
- struct vnode *dvp = ap->a_dvp;
- struct thread *td = cnp->cn_td;
- int flags = cnp->cn_flags;
- struct vnode *vp, *ldvp, *lvp;
- int error;

if ((dvp->v_mount->mnt_flag & MNT_RDONLY) &&
- (cnp->cn_nameiop NAMEI_DELETE ||
- cnp->cn_nameiop NAMEI_RENAME)) {
- return (EROFS);
- }
- ldvp = NULLVPTOLOWERVP;

/
- * If we are doing a ".." lookup we must release the lock on dvp
- * now, before we run a lookup in the underlying fs, or we may
- * deadlock. If we do this we must protect ldvp by ref'ing it.
- /
- if (flags & CNP_ISDOTDOT) {
- vref(ldvp);
- VOP_UNLOCK(dvp, LK_THISLAYER, td);
- }

/
- * Due to the non-deterministic nature of the handling of the
- * parent directory lock by lookup, we cannot call null_bypass()
- * here. We must make a direct call. It's faster to do a direct
- * call, anyway.
- /
- vp = lvp = NULL;
- error = VOP_LOOKUP(ldvp, &lvp, cnp);
- if (error EJUSTRETURN &&
- (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
- (cnp->cn_nameiop NAMEI_CREATE ||
- cnp->cn_nameiop == NAMEI_RENAME)) {
- error = EROFS;
- }

if ((error 0 || error EJUSTRETURN) && lvp != NULL) {
- if (ldvp == lvp) {
- *ap->a_vpp = dvp;
- vref(dvp);
- vrele(lvp);
- } else {
- error = null_node_create(dvp->v_mount, lvp, &vp);
- if (error == 0)
- *ap->a_vpp = vp;
- }
- }

/
- * The underlying fs will set PDIRUNLOCK if it unlocked the parent
- * directory, which means we have to follow suit in the nullfs layer.
- * Note that the parent directory may have already been unlocked due
- * to the ".." case. Note that use of cnp->cn_flags instead of flags.
- /
- if (flags & CNP_ISDOTDOT) {
- if ((cnp->cn_flags & CNP_PDIRUNLOCK) == 0)
- VOP_LOCK(dvp, LK_THISLAYER | LK_EXCLUSIVE, td);
- vrele(ldvp);
- } else if (cnp->cn_flags & CNP_PDIRUNLOCK) {
- VOP_UNLOCK(dvp, LK_THISLAYER, td);
- }
- return (error);
}

/
* Setattr call. Disallow write attempts if the layer is mounted read-only.
-
- * null_setattr(struct vnodeop_desc *a_desc, struct vnode *a_vp,
- * struct vattr *a_vap, struct ucred *a_cred,
- * struct thread *a_td)
- */
int
-null_setattr(struct vop_setattr_args *ap)
{
- struct vnode *vp = ap->a_vp;
- struct vattr *vap = ap->a_vap;

if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
- vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
- vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
- (vp->v_mount->mnt_flag & MNT_RDONLY))
- return (EROFS);
- if (vap->va_size != VNOVAL) {
- switch (vp->v_type) {
- case VDIR:
- return (EISDIR);
- case VCHR:
- case VBLK:
- case VSOCK:
- case VFIFO:
- if (vap->va_flags != VNOVAL)
- return (EOPNOTSUPP);
- return (0);
- case VREG:
- case VLNK:
- default:
- /
- * Disallow write attempts if the filesystem is
- * mounted read-only.
- /
- if (vp->v_mount->mnt_flag & MNT_RDONLY)
- return (EROFS);
- }
- }

return (null_bypass(&ap->a_head));
}

/
* We handle getattr only to change the fsid.
-
- * null_getattr(struct vnode *a_vp, struct vattr *a_vap, struct ucred *a_cred,
- * struct thread *a_td)
- */
static int
-null_getattr(struct vop_getattr_args *ap)
{
- int error;

if ((error = null_bypass(&ap->a_head)) != 0)
- return (error);

ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val⁰;
- return (0);
}

/
* Resolve a locked ncp at the nullfs layer.
- */
static int
null_nresolve(struct vop_nresolve_args *ap) {
- return(vop_compat_nresolve(ap));
}

/*
* Create a file
- */
+ ap->a_head.a_ops = MOUNTTONULLMOUNT->nullm_vfs->mnt_vn_norm_ops;

return vop_nresolve_ap(ap);
}

static int
null_ncreate(struct vop_ncreate_args *ap) {
- return(vop_compat_ncreate(ap));
+ ap->a_head.a_ops = MOUNTTONULLMOUNT->nullm_vfs->mnt_vn_norm_ops;

return vop_ncreate_ap(ap);
}

static int
 null_nmkdir(struct vop_nmkdir_args *ap)
{
-    return(vop_compat_nmkdir(ap));
+    ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;

    return vop_nmkdir_ap(ap);
}

static int
+null_nmknod(struct vop_nmknod_args *ap)
{
+    ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;

    return vop_nmknod_ap(ap);
}

static int
+null_nlink(struct vop_nlink_args *ap)
{
+    ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;

    return vop_nlink_ap(ap);
}

static int
+null_nsymlink(struct vop_nsymlink_args *ap)
{
+    ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;

    return vop_nsymlink_ap(ap);
}

static int
+null_nwhiteout(struct vop_nwhiteout_args *ap)
{
+    ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;

    return vop_nwhiteout_ap(ap);
 }

static int
 null_nremove(struct vop_nremove_args *ap)
{
-    return(vop_compat_nremove(ap));
+    ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;

    return vop_nremove_ap(ap);
 }

static int
 null_nrmdir(struct vop_nrmdir_args *ap)
{
-    return(vop_compat_nrmdir(ap));
+    ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;

    return vop_nrmdir_ap(ap);
 }

static int
 null_nrename(struct vop_nrename_args *ap)
{
-    return(vop_compat_nrename(ap));
}

/*
 * revoke is VX locked, we can't go through null_bypass
- /
static int
-null_revoke(struct vop_revoke_args *ap)
{
-    struct null_node *np;
-    struct vnode *lvp;

    np = VTONULL(ap->a_vp);
-    vx_unlock(ap->a_vp);
-    if ((lvp = np->null_lowervp) != NULL) {
-        vx_get(lvp);
-        VOP_REVOKE(lvp, ap->a_flags);
-        vx_put(lvp);
-    }
-    vx_lock(ap->a_vp);
-    vgone(ap->a_vp);
-    return(0);
}

/
 * Handle to disallow write access if mounted read-only.
- 
- * null_access(struct vnode *a_vp, int a_mode, struct ucred *a_cred,
- *        struct thread *a_td)
- */
static int
-null_access(struct vop_access_args *ap)
{
-    struct vnode *vp = ap->a_vp;
-    mode_t mode = ap->a_mode;

    /
-     * Disallow write attempts on read-only layers;
-     * unless the file is a socket, fifo, or a block or
-     * character device resident on the file system.
-     /
-    if (mode & VWRITE) {
-        switch (vp->v_type) {
-        case VDIR:
-        case VLNK:
-        case VREG:
-            if (vp->v_mount->mnt_flag & MNT_RDONLY)
-                return (EROFS);
-            break;
-        default:
-            break;
-        }
-    }
-    return (null_bypass(&ap->a_head));
}

/
 * We must handle open to be able to catch MNT_NODEV and friends.
- 
- * null_open(struct vnode *a_vp, int a_mode, struct ucred *a_cred,
- *         struct thread *a_td)
- */
static int
-null_open(struct vop_open_args *ap)
{
-    struct vnode *vp = ap->a_vp;
-    struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp);

    if ((vp->v_mount->mnt_flag & MNT_NODEV) &&
-        (lvp->v_type  VBLK || lvp->v_type  VCHR))
-        return ENXIO;

    return (null_bypass(&ap->a_head));
}

/
 * We handle this to eliminate null FS to lower FS
- * file moving. Don't know why we don't allow this,
- * possibly we should.
- 
- * null_rename(struct vnode *a_fdvp, struct vnode *a_fvp,
- *        struct componentname *a_fcnp, struct vnode *a_tdvp,
- *        struct vnode *a_tvp, struct componentname *a_tcnp)
- */
static int
-null_rename(struct vop_old_rename_args *ap)
{
-    struct vnode *tdvp = ap->a_tdvp;
-    struct vnode *fvp = ap->a_fvp;
-    struct vnode *fdvp = ap->a_fdvp;
-    struct vnode *tvp = ap->a_tvp;

    / Check for cross-device rename. /
-    if ((fvp->v_mount != tdvp->v_mount) ||
-        (tvp && (fvp->v_mount != tvp->v_mount))) {
-        if (tdvp == tvp)
-            vrele(tdvp);
-        else
-            vput(tdvp);
-        if (tvp)
-            vput(tvp);
-        vrele(fdvp);
-        vrele(fvp);
-        return (EXDEV);
-    }
-    
-    return (null_bypass(&ap->a_head));
}

/
 * A special flag, LK_THISLAYER, causes the locking function to operate
- * ONLY on the nullfs layer.  Otherwise we are responsible for locking not
- * only our layer, but the lower layer as well.
- 
- * null_lock(struct vnode *a_vp, int a_flags, struct thread *a_td)
- */
static int
-null_lock(struct vop_lock_args *ap)
{
-    struct vnode *vp = ap->a_vp;
-    int flags = ap->a_flags;
-    struct null_node *np = VTONULL(vp);
-    struct vnode *lvp;
-    int error;

    /
-     * Lock the nullfs layer first, disposing of the interlock in the
-     * process.
-     /
-    KKASSERT((flags & LK_INTERLOCK) == 0);
-    error = lockmgr(&vp->v_lock, flags & ~LK_THISLAYER,
-            NULL, ap->a_td);

    /
-     * If locking only the nullfs layer, or if there is no lower layer,
-     * or if an error occured while attempting to lock the nullfs layer,
-     * we are done.
-     
-     * np can be NULL is the vnode is being recycled from a previous
-     * hash collision.
-     */
-    if ((flags & LK_THISLAYER) || np  NULL ||
-        np->null_lowervp  NULL || error) {
-        return (error);
-    }

    /
-     * Lock the underlying vnode.  If we are draining we should not drain
-     * the underlying vnode, since it is not being destroyed, but we do
-     * lock it exclusively in that case.  Note that any interlocks have
-     * already been disposed of above.
-     /
-    lvp = np->null_lowervp;
-    if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
-        NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n");
-        error = vn_lock(lvp, (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE,
-                ap->a_td);
-    } else {
-        error = vn_lock(lvp, flags, ap->a_td);
-    }

    /
-     * If an error occured we have to undo our nullfs lock, then return
-     * the original error.
-     /
-    if (error)
-        lockmgr(&vp->v_lock, LK_RELEASE, NULL, ap->a_td);
-    return(error);
}

/
 * A special flag, LK_THISLAYER, causes the unlocking function to operate
- * ONLY on the nullfs layer.  Otherwise we are responsible for unlocking not
- * only our layer, but the lower layer as well.
- 
- * null_unlock(struct vnode *a_vp, int a_flags, struct thread *a_td)
- */
static int
-null_unlock(struct vop_unlock_args *ap)
{
-    struct vnode *vp = ap->a_vp;
-    int flags = ap->a_flags;
-    struct null_node *np = VTONULL(vp);
-    struct vnode *lvp;
-    int error;

    KKASSERT((flags & LK_INTERLOCK) == 0);
-    /
-     * nullfs layer only
-     /
-    if (flags & LK_THISLAYER) {
-        error = lockmgr(&vp->v_lock, 
-                (flags & ~LK_THISLAYER) | LK_RELEASE,
-                NULL, ap->a_td);
-        return (error);
-    }

    /
-     * If there is no underlying vnode the lock operation occurs at
-     * the nullfs layer.  np can be NULL is the vnode is being recycled
-     * from a previous hash collision.
-     /
-    if (np  NULL || (lvp = np->null_lowervp)  NULL) {
-        error = lockmgr(&vp->v_lock, flags | LK_RELEASE,
-                NULL, ap->a_td);
-        return(error);
-    }

    /
-     * Unlock the lower layer first, then our nullfs layer.
-     /
-    VOP_UNLOCK(lvp, flags, ap->a_td);
-    error = lockmgr(&vp->v_lock, flags | LK_RELEASE, NULL, ap->a_td);
-    return (error);
}

/
 * null_islocked(struct vnode a_vp, struct thread *a_td)
- *
- * If a lower layer exists return the lock status of the lower layer,
- * otherwise return the lock status of our nullfs layer.
- */
static int
-null_islocked(struct vop_islocked_args *ap)
{
-    struct vnode *vp = ap->a_vp;
-    struct vnode *lvp;
-    struct null_node *np = VTONULL(vp);
-    int error;

    lvp = np->null_lowervp;
-    if (lvp == NULL)
-        error = lockstatus(&vp->v_lock, ap->a_td);
-    else
-        error = VOP_ISLOCKED(lvp, ap->a_td);
-    return (error);
}


/
- * The vnode is no longer active.  However, the new VFS API may retain
- * the node in the vfs cache.  There is no way to tell that someone issued
- * a remove/rmdir operation on the underlying filesystem (yet), but we can't
- * remove the lowervp reference here.
- 
- * null_inactive(struct vnode *a_vp, struct thread *a_td)
- */
static int
-null_inactive(struct vop_inactive_args *ap)
{
-    /*struct vnode *vp = ap->a_vp;/
-    /*struct null_node np = VTONULL(vp);/

    /*
-     * At the moment don't do anything here.  All the rest of the code
-     * assumes that lowervp will remain inact, and the inactive nullvp
-     * may be reactivated at any time.  XXX I'm not sure why the 4.x code
-     * even worked.
-     /

    /
-     * Now it is safe to release our nullfs layer vnode.
-     /
-    return (0);
}

/
 * We can free memory in null_inactive, but we do this
- * here. (Possible to guard vp->v_data to point somewhere)
- 
- * null_reclaim(struct vnode *a_vp, struct thread *a_td)
- */
static int
-null_reclaim(struct vop_reclaim_args *ap)
{
-    struct vnode *vp = ap->a_vp;
-    struct vnode *lowervp;
-    struct null_node *np;

    np = VTONULL(vp);
-    vp->v_data = NULL;
-    /
-     * null_lowervp reference to lowervp.  The lower vnode's
-     * inactive routine may or may not be called when we do the
-     * final vrele().
-     /
-    if (np) {
-        null_node_rem(np);
-        lowervp = np->null_lowervp;
-        np->null_lowervp = NULLVP;
-        if (lowervp)
-            vrele(lowervp);
-        free(np, M_NULLFSNODE);
-    }
-    return (0);
}

/
 * null_print(struct vnode a_vp)
- */
static int
-null_print(struct vop_print_args *ap)
{
-    struct vnode *vp = ap->a_vp;
-    struct null_node *np = VTONULL(vp);

    if (np == NULL) {
-        printf ("\ttag VT_NULLFS, vp=%p, NULL v_data!\n", vp);
-        return(0);
-    }
-    printf ("\ttag VT_NULLFS, vp=%p, lowervp=%p\n", vp, np->null_lowervp);
-    if (np->null_lowervp != NULL) {
-        printf("\tlowervp_lock: ");
-        lockmgr_printinfo(&np->null_lowervp->v_lock);
-    } else {
-        printf("\tnull_lock: ");
-        lockmgr_printinfo(&vp->v_lock);
-    }
-    printf("\n");
-    return (0);
}

/
 * Let an underlying filesystem do the work
- 
- * null_createvobject(struct vnode *vp, struct ucred *cred, struct proc *p)
- */
static int
-null_createvobject(struct vop_createvobject_args *ap)
{
-    struct vnode *vp = ap->a_vp;
-    struct vnode *lowervp = VTONULL(vp) ? NULLVPTOLOWERVP(vp) : NULL;
-    int error;

    if (vp->v_type  VNON || lowervp  NULL)
-        return 0;
-    error = VOP_CREATEVOBJECT(lowervp, ap->a_td);
-    if (error)
-        return (error);
-    vp->v_flag |= VOBJBUF;
-    return (0);
}

/
 * We have nothing to destroy and this operation shouldn't be bypassed.
- 
- * null_destroyvobject(struct vnode *vp)
- */
static int
-null_destroyvobject(struct vop_destroyvobject_args *ap)
{
-    struct vnode *vp = ap->a_vp;

    vp->v_flag &= ~VOBJBUF;
-    return (0);
}

/
 * null_getvobject(struct vnode *vp, struct vm_object **objpp)
- *
- * Note that this can be called when a vnode is being recycled, and
- * v_data may be NULL in that case if nullfs had to recycle a vnode
- * due to a null_node collision.
- */
static int
-null_getvobject(struct vop_getvobject_args *ap)
{
-    struct vnode *lvp;

    if (ap->a_vp->v_data == NULL)
-        return EINVAL;

    lvp = NULLVPTOLOWERVP(ap->a_vp);
-    if (lvp == NULL)
-        return EINVAL;
-    return (VOP_GETVOBJECT(lvp, ap->a_objpp));
+    struct mount *lmp;

    lmp = MOUNTTONULLMOUNT(ap->a_fncp->nc_mount)->nullm_vfs;
+    if (lmp != MOUNTTONULLMOUNT(ap->a_tncp->nc_mount)->nullm_vfs)
+        return (EINVAL);

    ap->a_head.a_ops = lmp->mnt_vn_norm_ops;

    return vop_nrename_ap(ap);
 }

/*
 * Global vfs data structures
  */
 struct vnodeopv_entry_desc null_vnodeop_entries[] = {
-    { &vop_default_desc,        (vnodeopv_entry_t) null_bypass },
-    { &vop_access_desc,        (vnodeopv_entry_t) null_access },
-    { &vop_createvobject_desc,    (vnodeopv_entry_t) null_createvobject },
-    { &vop_destroyvobject_desc,    (vnodeopv_entry_t) null_destroyvobject },
-    { &vop_getattr_desc,        (vnodeopv_entry_t) null_getattr },
-    { &vop_getvobject_desc,        (vnodeopv_entry_t) null_getvobject },
-    { &vop_inactive_desc,        (vnodeopv_entry_t) null_inactive },
-    { &vop_islocked_desc,        (vnodeopv_entry_t) null_islocked },
-    { &vop_lock_desc,        (vnodeopv_entry_t) null_lock },
-    { &vop_old_lookup_desc,        (vnodeopv_entry_t) null_lookup },
-    { &vop_open_desc,        (vnodeopv_entry_t) null_open },
-    { &vop_print_desc,        (vnodeopv_entry_t) null_print },
-    { &vop_reclaim_desc,        (vnodeopv_entry_t) null_reclaim },
-    { &vop_old_rename_desc,        (vnodeopv_entry_t) null_rename },
-    { &vop_setattr_desc,        (vnodeopv_entry_t) null_setattr },
-    { &vop_unlock_desc,        (vnodeopv_entry_t) null_unlock },
-    { &vop_revoke_desc,        (vnodeopv_entry_t) null_revoke },
-
    { &vop_nresolve_desc,        (vnodeopv_entry_t) null_nresolve },
    { &vop_ncreate_desc,        (vnodeopv_entry_t) null_ncreate },
    { &vop_nmkdir_desc,        (vnodeopv_entry_t) null_nmkdir },
+    { &vop_nmknod_desc,        (vnodeopv_entry_t) null_nmknod },
+    { &vop_nlink_desc,        (vnodeopv_entry_t) null_nlink },
+    { &vop_nsymlink_desc,        (vnodeopv_entry_t) null_nsymlink },
+    { &vop_nwhiteout_desc,        (vnodeopv_entry_t) null_nwhiteout },
    { &vop_nremove_desc,        (vnodeopv_entry_t) null_nremove },
    { &vop_nrmdir_desc,        (vnodeopv_entry_t) null_nrmdir },
    { &vop_nrename_desc,        (vnodeopv_entry_t) null_nrename },
diff r 758f5a725024 -r e65b90b999c3 sys/vfs/nullfs/null_subr.c
-- a/sys/vfs/nullfs/null_subr.c    Mon Jan  2 11:42:05 2006 0000
++ /dev/null    Thu Jan  1 00:00:00 1970 0000
@ -1,389 +0,0 @
/*
 * Copyright (c) 1992, 1993
- *    The Regents of the University of California.  All rights reserved.
- 
- * This code is derived from software donated to Berkeley by
- * Jan-Simon Pendry.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *    This product includes software developed by the University of
- *    California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *    @(#)null_subr.c    8.7 (Berkeley) 5/14/95
- *
- * $FreeBSD: src/sys/miscfs/nullfs/null_subr.c,v 1.21.2.4 2001/06/26 04:20:09 bp Exp $
- * $DragonFly: src/sys/vfs/nullfs/null_subr.c,v 1.17 2004/12/17 00:18:30 dillon Exp $
- */

#include &lt;sys/param.h&gt;
#include &lt;sys/systm.h&gt;
#include &lt;sys/kernel.h&gt;
#include &lt;sys/proc.h&gt;
#include &lt;sys/vnode.h&gt;
#include &lt;sys/mount.h&gt;
#include &lt;sys/malloc.h&gt;
#include "null.h" 

#define LOG2_SIZEVNODE 7        / log2(sizeof struct vnode) /

/
 * Null layer cache:
- * Each cache entry holds a reference to the lower vnode
- * along with a pointer to the alias vnode.  When an
- * entry is added the lower vnode is vref'd.  When the
- * alias is removed the lower vnode is vrele'd.
- /

#define    NULL_NHASH(vp) \
-    (&null_node_hashtbl[(((uintptr_t)vp)>>LOG2_SIZEVNODE) & null_node_hash])

-static struct null_node **null_node_hashtbl;
-static u_long null_node_hash;
-static struct lwkt_token null_ihash_token;

static MALLOC_DEFINE(M_NULLFSHASH, "NULLFS hash", "NULLFS hash table");
-MALLOC_DEFINE(M_NULLFSNODE, "NULLFS node", "NULLFS vnode private part");

static int    null_node_alloc(struct mount *mp, struct vnode *lowervp,
                     struct vnode **vpp);
static struct vnode *
        null_node_find(struct mount *mp, struct vnode *lowervp);

/
- * Initialise cache headers
- /
int
-nullfs_init(struct vfsconf *vfsp)
{
-    NULLFSDEBUG("nullfs_init\n");        / printed during system boot /
-    null_node_hash = 16;
-    while (null_node_hash < desiredvnodes)
-        null_node_hash <<= 1;
-    null_node_hashtbl = malloc(sizeof(void *) * null_node_hash,
-                    M_NULLFSHASH, M_WAITOK|M_ZERO);
-    --null_node_hash;
-    lwkt_token_init(&null_ihash_token);
-    return (0);
}

int
-nullfs_uninit(struct vfsconf *vfsp)
{
-        if (null_node_hashtbl) {
-        free(null_node_hashtbl, M_NULLFSHASH);
-        null_node_hashtbl = NULL;
-    }
-    return (0);
}

/
 * Return a vref'ed alias for lower vnode if already exists, else 0.
- * Lower vnode should be locked (but with no additional refs) on entry
- * and will be unlocked on return if the search was successful, and left
- * locked if the search was not successful.
- /
static struct vnode *
-null_node_find(struct mount *mp, struct vnode *lowervp)
{
-    struct thread *td = curthread;    / XXX /
-    struct null_node *np;
-    struct null_node *xp;
-    struct vnode *vp;
-    lwkt_tokref ilock;

    lwkt_gettoken(&ilock, &null_ihash_token);
loop:
    for (np = *NULL_NHASH(lowervp); np; np = np->null_next) {
-        if (np->null_lowervp  lowervp && NULLTOV(np)->v_mount  mp) {
-            vp = NULLTOV(np);
-            if (vget(vp, LK_EXCLUSIVE | LK_CANRECURSE, td)) {
-                printf ("null_node_find: vget failed.\n");
-                goto loop;
-            }

            /
-             * vget() might have blocked, we have to check that
-             * our vnode is still valid.
-             /
-            xp = *NULL_NHASH(lowervp);
-            while (xp) {
-                if (xp  np && xp->null_lowervp  lowervp &&
-                    NULLTOV(xp)  vp &&
-                    NULLTOV(xp)->v_mount  mp) {
-                    break;
-                }
-                xp = xp->null_next;
-            }
-            if (xp == NULL) {
-                printf ("null_node_find: node race, retry.\n");
-                vput(vp);
-                goto loop;
-            }
-            /
-             * SUCCESS!  Returned the locked and referenced vp
-             * and release the lock on lowervp.
-             /
-            VOP_UNLOCK(lowervp, 0, td);
-            lwkt_reltoken(&ilock);
-            return (vp);
-        }
-    }

    /
-     * Failure, leave lowervp locked on return.
-     /
-    lwkt_reltoken(&ilock);
-    return(NULL);
}

int
-null_node_add(struct null_node *np)
{
-    struct null_node **npp;
-    struct null_node *n2;
-    lwkt_tokref ilock;

    lwkt_gettoken(&ilock, &null_ihash_token);
-    npp = NULL_NHASH(np->null_lowervp);
-    while ((n2 = *npp) != NULL) {
-        if (n2->null_lowervp  np->null_lowervp &&
-            n2->null_vnode->v_mount  np->null_vnode->v_mount) {
-            lwkt_reltoken(&ilock);
-            return(EBUSY);
-        }
-        npp = &n2->null_next;
-    }
-    np->null_next = NULL;
-    *npp = np;
-    lwkt_reltoken(&ilock);
-    return(0);
}

void
-null_node_rem(struct null_node *np)
{
-    struct null_node **npp;
-    struct null_node *n2;
-    lwkt_tokref ilock;

    lwkt_gettoken(&ilock, &null_ihash_token);
-    npp = NULL_NHASH(np->null_lowervp);
-    while ((n2 = *npp) != NULL) {
-        if (n2 == np)
-            break;
-        npp = &n2->null_next;
-    }
-    KKASSERT(np == n2);
-    *npp = np->null_next;
-    np->null_next = NULL;
-    lwkt_reltoken(&ilock);
}

/
 * Make a new null_node node.  vp is the null mount vnode, lowervp is the
- * lower vnode.  Maintain a reference to (lowervp).  lowervp must be
- * locked on call.
- /
static int
-null_node_alloc(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
{
-    struct null_node *np;
-    struct thread *td;
-    struct vnode *vp;
-    int error;

    td = curthread;
retry:
    /
-     * If we have already hashed the vp we can just return it.
-     /
-    *vpp = null_node_find(mp, lowervp);
-    if (*vpp)
-        return 0;

    /
-     * lowervp is locked but not referenced at this point.
-     /
-    MALLOC(np, struct null_node *, sizeof(struct null_node),
-           M_NULLFSNODE, M_WAITOK);

    error = getnewvnode(VT_NULL, mp, vpp, 0, LK_CANRECURSE);
-    if (error) {
-        FREE(np, M_NULLFSNODE);
-        return (error);
-    }
-    vp = *vpp;

    /
-     * Set up the np/vp relationship and set the lower vnode.
-     
-     * XXX:
-     * When nullfs encounters sockets or device nodes, it
-     * has a hard time working with the normal vp union, probably
-     * because the device has not yet been opened.  Needs investigation.
-     */
-    vp->v_type = lowervp->v_type;
-    if (vp->v_type  VCHR || vp->v_type  VBLK)
-        addaliasu(vp, lowervp->v_udev);
-    else
-        vp->v_un = lowervp->v_un;    / XXX why this assignment? /
-    np->null_vnode = vp;
-    np->null_lowervp = lowervp;

    /
-     * Our new vnode is already VX locked (which is effective
-     * LK_THISLAYER, which is what we want).
-     /

    /
-     * Try to add our new node to the hash table.  If a collision
-     * occurs someone else beat us to it and we need to destroy the
-     * vnode and retry.
-     /
-    if (null_node_add(np) != 0) {
-        free(np, M_NULLFSNODE);
-        vput(vp);
-        goto retry;
-    }

    /
-     * Finish up.  Link the vnode and null_node together, ref lowervp
-     * for the null node.  lowervp is already locked so the lock state
-     * is already properly synchronized.
-     
-     * Set the vnode up to reclaim as quickly as possible
-     */
-    vp->v_data = np;
-    vp->v_flag |= VAGE;
-    vref(lowervp);
-    return (0);
}


/
- * Try to find an existing null_node vnode refering to the given underlying
- * vnode (which should be locked and referenced). If no vnode found, create
- * a new null_node vnode which contains a reference to the lower vnode.
- /
int
-null_node_create(struct mount *mp, struct vnode *lowervp, struct vnode **newvpp)
{
-    struct vnode *aliasvp;

    aliasvp = null_node_find(mp, lowervp);
-    if (aliasvp) {
-        /
-         * null_node_find() has unlocked lowervp for us, so we just
-         * have to get rid of the reference.
-         /
-        vrele(lowervp);
#ifdef NULLFS_DEBUG
        vprint("null_node_create: exists", aliasvp);
#endif
    } else {
-        int error;

        /
-         * Get new vnode.  Note that lowervp is locked and referenced
-         * at this point (as it was passed to us).
-         /
-        NULLFSDEBUG("null_node_create: create new alias vnode\n");

        /
-         * Make new vnode reference the null_node.
-         /
-        error = null_node_alloc(mp, lowervp, &aliasvp);
-        vrele(lowervp);
-        if (error)
-            return error;

        /
-         * aliasvp is already locked and ref'd by getnewvnode()
-         /
-    }

#ifdef DIAGNOSTIC
-    if (lowervp->v_usecount < 1) {
-        / Should never happen... /
-        vprint ("null_node_create: alias ", aliasvp);
-        vprint ("null_node_create: lower ", lowervp);
-        panic ("null_node_create: lower has 0 usecount.");
-    };
#endif

#ifdef NULLFS_DEBUG
    vprint("null_node_create: alias", aliasvp);
-    vprint("null_node_create: lower", lowervp);
#endif

-    *newvpp = aliasvp;
-    return (0);
}

#ifdef DIAGNOSTIC
#include "opt_ddb.h" 

#ifdef DDB
#define    null_checkvp_barrier    1
#else
#define    null_checkvp_barrier    0
#endif

-struct vnode *
-null_checkvp(struct vnode *vp, char *fil, int lno)
{
-    struct null_node *a = VTONULL(vp);
-    if (a->null_lowervp == NULLVP) {
-        / Should never happen /
-        int i; u_long *p;
-        printf("vp = %p, ZERO ptr\n", (void *)vp);
-        for (p = (u_long *) a, i = 0; i < 8; i+)
-            printf(" %lx", p[i]);
-        printf("\n");
-        / wait for debugger /
-        while (null_checkvp_barrier) /*WAIT/ ;
-        panic("null_checkvp");
-    }
-    if (a->null_lowervp->v_usecount < 1) {
-        int i; u_long p;
-        printf("vp = %p, unref'ed lowervp\n", (void *)vp);
-        for (p = (u_long *) a, i = 0; i < 8; i++)
-            printf(" %lx", p[i]);
-        printf("\n");
-        / wait for debugger /
-        while (null_checkvp_barrier) /*WAIT/ ;
-        panic ("null with unref'ed lowervp");
-    };
#ifdef notyet
    printf("null %x/%d > %x/%d [%s, %d]\n",
            NULLTOV(a), NULLTOV(a)->v_usecount,
-        a->null_lowervp, a->null_lowervp->v_usecount,
-        fil, lno);
#endif
    return a->null_lowervp;
}
#endif

Actions

Copy link

Also available in: Atom PDF

Project

General

Profile

DragonFlyBSD

Bug #36

nnull.diff (see comments on kernel@)

Updated by corecode about 20 years ago