Project

General

Profile

Bug #129 » ncc3.diff

csaba.henk, 03/29/2006 10:02 AM

View differences:

sys/kern/vfs_cache.c Sun Mar 26 07:56:54 2006 +0000 → sys/kern/vfs_cache.c Wed Mar 29 10:23:26 2006 +0200
#define NCHHASH(hash) (&nchashtbl[(hash) & nchash])
#define MINNEG 1024
/* Modes for shadow group traversal */
#define SG_ALL 0 /* traverse whole group */
#define SG_SUBTREE 1 /* traverse only subtree */
MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
......
static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
struct nchstats nchstats[SMP_MAXCPU];
static STAILQ_HEAD(, shadowinfo) shadowinfo_freeq;
static u_long numshadowinfo = 0;
STATNODE(CTLFLAG_RD, numshadowinfo, &numshadowinfo);
static long maxnumshadowinfo = -1;
SYSCTL_LONG(_vfs_cache, OID_AUTO, maxnumshadowinfo, CTLFLAG_RW,
&maxnumshadowinfo, 0, "");
MALLOC_DEFINE(M_SHADOWINFO, "shadowinfo", "VFS name cache shadowinfo");
/*
* Export VFS cache effectiveness statistics to user-land.
*
......
SYSCTL_PROC(_vfs_cache, OID_AUTO, nchstats, CTLTYPE_OPAQUE|CTLFLAG_RD,
0, 0, sysctl_nchstats, "S,nchstats", "VFS cache effectiveness statistics");
/* XXX stubs for later MPSAFE work */
#define shadowinfo_freeq_lock()
#define shadowinfo_freeq_unlock()
static struct shadowinfo *
shadowinfo_fetch(void)
{
struct shadowinfo *shinf = STAILQ_FIRST(&shadowinfo_freeq);
if (! shinf)
goto alloc;
shadowinfo_freeq_lock();
if ((shinf = STAILQ_FIRST(&shadowinfo_freeq)))
STAILQ_REMOVE_HEAD(&shadowinfo_freeq, sh_entry);
shadowinfo_freeq_unlock();
if (shinf)
return (shinf);
alloc:
shinf = malloc(sizeof(*shinf), M_SHADOWINFO, M_WAITOK|M_ZERO);
numshadowinfo++;
return (shinf);
}
static __inline
struct shadowinfo*
shadowinfo_ref(struct shadowinfo *shinf)
{
shinf->sh_refs++;
return (shinf);
}
static void
shadowinfo_put(struct shadowinfo *shinf)
{
if (--shinf->sh_refs > 0)
return;
if (maxnumshadowinfo >= 0 && numshadowinfo > maxnumshadowinfo) {
free(shinf, M_SHADOWINFO);
numshadowinfo--;
return;
}
shinf->sh_exlocks = 0;
shinf->sh_locktd = NULL;
shadowinfo_freeq_lock();
STAILQ_INSERT_TAIL(&shadowinfo_freeq, shinf, sh_entry);
shadowinfo_freeq_unlock();
}
static void cache_zap(struct namecache *ncp);
/*
......
(ncp->nc_flag & NCF_UNRESOLVED) &&
TAILQ_EMPTY(&ncp->nc_list)
) {
KKASSERT(ncp->nc_exlocks == 0);
KKASSERT(ncp->nc_shadowinfo->sh_exlocks == 0);
cache_lock(ncp);
cache_zap(ncp);
} else {
......
ncp->nc_error = ENOTCONN; /* needs to be resolved */
ncp->nc_refs = 1;
ncp->nc_fsmid = 1;
ncp->nc_shadowinfo = &ncp->nc_shadowinfo_internal;
ncp->nc_shadowinfo_internal.sh_refs = 2;
ncp->nc_shadow_prev = NULL;
ncp->nc_shadow_next = NULL;
TAILQ_INIT(&ncp->nc_list);
cache_lock(ncp);
return(ncp);
......
static void
cache_free(struct namecache *ncp)
{
KKASSERT(ncp->nc_refs == 1 && ncp->nc_exlocks == 1);
KKASSERT(ncp->nc_refs == 1 && ncp->nc_shadowinfo->sh_exlocks == 1);
if (ncp->nc_name)
free(ncp->nc_name, M_VFSCACHE);
free(ncp, M_VFSCACHE);
......
cache_drop(struct namecache *ncp)
{
_cache_drop(ncp);
}
/*
* Iterate an "updater" function over a shadow group.
* All-group and subtree-only traversals are supported.
*/
static struct namecache *
cache_group_walk(struct namecache *ncp,
int (*updater)(struct namecache *xncp, void *param),
int flags, void *param)
{
struct namecache *xncp = ncp, *yncp;
for (;;) {
yncp = xncp->nc_shadow_next;
if (updater(xncp, param))
break;
if (! yncp || yncp == ncp ||
(flags & SG_SUBTREE &&
yncp->nc_shadowheight <= ncp->nc_shadowheight))
break;
xncp = yncp;
}
return(xncp);
}
struct migrate_param {
int heightdelta;
int exlocks;
struct shadowinfo *shadowinfo;
};
static int
migrate_updater(struct namecache *ncp, void *param)
{
struct migrate_param *mpm = param;
struct shadowinfo *shinf = mpm->shadowinfo;
struct shadowinfo *oldshinf = ncp->nc_shadowinfo;
if (! shinf)
shinf = &ncp->nc_shadowinfo_internal;
if (shinf == oldshinf)
goto out;
shinf->sh_locktd = oldshinf->sh_locktd;
ncp->nc_shadowinfo = shadowinfo_ref(shinf);
shadowinfo_put(oldshinf);
out:
ncp->nc_shadowheight += mpm->heightdelta;
if (mpm->exlocks >= 0)
shinf->sh_exlocks = mpm->exlocks;
return (0);
}
static __inline
void
cache_shadow_link(struct namecache *sncp, struct namecache *ncp)
{
struct namecache *pncp;
struct namecache *nsncp;
pncp = ncp->nc_shadow_prev ?: ncp;
nsncp = sncp->nc_shadow_next ?: sncp;
pncp->nc_shadow_next = nsncp;
nsncp->nc_shadow_prev = pncp;
sncp->nc_shadow_next = ncp;
ncp->nc_shadow_prev = sncp;
}
static __inline
void
cache_shadow_unlink(struct namecache *ncp)
{
if (! ncp->nc_shadow_next)
return;
KKASSERT(ncp->nc_shadow_prev);
if (ncp->nc_shadow_prev == ncp->nc_shadow_next) {
ncp->nc_shadow_prev->nc_shadow_next = NULL;
ncp->nc_shadow_next->nc_shadow_prev = NULL;
} else {
ncp->nc_shadow_prev->nc_shadow_next = ncp->nc_shadow_next;
ncp->nc_shadow_next->nc_shadow_prev = ncp->nc_shadow_prev;
}
ncp->nc_shadow_prev = ncp->nc_shadow_next = NULL;
}
/*
* Join ncp into the shadow group of sncp.
*
* ncp must be unlocked on entry, while sncp must be locked on entry.
*
* The routine will fail and return ELOOP if the intended shadowing association
* doesnt' make sense (currently this boils down to ncp being the same as
* sncp).
* It will fail with EEXIST if ncp gets resolved or acquires a shadow
* association from elsewhere during the attach attempt (it is possbile due to
* the fact that ncp is unlocked).
*
* - On success ncp will be a representative of the joint shadow group, which
* then will be locked.
* - On failure the namecache entries will exist separately just as they did
* before; both entries will be locked.
*/
int
cache_shadow_attach(struct namecache *ncp, struct namecache *sncp)
{
struct migrate_param mpm;
if (ncp == sncp)
return(ELOOP);
KKASSERT(ncp->nc_shadowinfo->sh_locktd != curthread);
KKASSERT(sncp->nc_shadowinfo->sh_locktd == curthread);
cache_lock_two(ncp, sncp);
if ((ncp->nc_flag & NCF_UNRESOLVED) == 0 || ncp->nc_shadowheight != 0)
return(EEXIST);
if (sncp->nc_shadowinfo == &sncp->nc_shadowinfo_internal) {
mpm.heightdelta = 0;
mpm.shadowinfo = shadowinfo_fetch();
mpm.exlocks = sncp->nc_shadowinfo->sh_exlocks;
migrate_updater(sncp, &mpm);
}
mpm.heightdelta = sncp->nc_shadowheight + 1;
mpm.shadowinfo = sncp->nc_shadowinfo;
mpm.exlocks = -1;
cache_group_walk(ncp, &migrate_updater, SG_ALL, &mpm);
cache_shadow_link(sncp, ncp);
return(0);
}
/*
* Take out namecache entry from its shadow group.
*
* The shadow group must be locked upon entry.
*
* On return both the entry and its former group remain locked.
*/
void
cache_shadow_detach(struct namecache *ncp)
{
struct namecache *pncp, *nncp;
struct migrate_param mpm;
mpm.shadowinfo = NULL;
again:
mpm.heightdelta = -ncp->nc_shadowheight;
mpm.exlocks = ncp->nc_shadowinfo->sh_exlocks;
pncp = ncp->nc_shadow_prev;
nncp = ncp->nc_shadow_next;
migrate_updater(ncp, &mpm);
cache_shadow_unlink(ncp);
if (nncp && nncp == pncp) {
ncp = nncp;
goto again;
}
}
static int
vref_updater(struct namecache *ncp, void *param)
{
if (ncp->nc_vp)
*(int *)param > 0 ? vhold(ncp->nc_vp) : vdrop(ncp->nc_vp);
return(0);
}
/*
......
{
thread_t td;
int didwarn;
struct shadowinfo *shinf;
KKASSERT(ncp->nc_refs != 0);
didwarn = 0;
td = curthread;
for (;;) {
if (ncp->nc_exlocks == 0) {
ncp->nc_exlocks = 1;
ncp->nc_locktd = td;
shinf = ncp->nc_shadowinfo;
KKASSERT(shinf);
KKASSERT(shinf->sh_refs != 0);
if (shinf->sh_exlocks == 0) {
int ref = 1;
shinf->sh_exlocks = 1;
shinf->sh_locktd = td;
/*
* The vp associated with a locked ncp must be held
* to prevent it from being recycled (which would
......
*
* XXX loop on race for later MPSAFE work.
*/
if (ncp->nc_vp)
vhold(ncp->nc_vp);
cache_group_walk(ncp, &vref_updater, SG_ALL, &ref);
break;
}
if (ncp->nc_locktd == td) {
++ncp->nc_exlocks;
if (shinf->sh_locktd == td) {
++shinf->sh_exlocks;
break;
}
ncp->nc_flag |= NCF_LOCKREQ;
if (tsleep(ncp, 0, "clock", nclockwarn) == EWOULDBLOCK) {
shinf->sh_lockreq = 1;
if (tsleep(shinf, 0, "clock", nclockwarn) == EWOULDBLOCK) {
if (didwarn)
continue;
didwarn = 1;
......
cache_lock_nonblock(struct namecache *ncp)
{
thread_t td;
struct shadowinfo *shinf = ncp->nc_shadowinfo;
KKASSERT(ncp->nc_refs != 0);
KKASSERT(shinf);
KKASSERT(shinf->sh_refs != 0);
td = curthread;
if (ncp->nc_exlocks == 0) {
ncp->nc_exlocks = 1;
ncp->nc_locktd = td;
if (shinf->sh_exlocks == 0) {
int ref = 1;
shinf->sh_exlocks = 1;
shinf->sh_locktd = td;
/*
* The vp associated with a locked ncp must be held
* to prevent it from being recycled (which would
......
*
* XXX loop on race for later MPSAFE work.
*/
if (ncp->nc_vp)
vhold(ncp->nc_vp);
cache_group_walk(ncp, &vref_updater, SG_ALL, &ref);
return(0);
} else {
return(EWOULDBLOCK);
......
cache_unlock(struct namecache *ncp)
{
thread_t td = curthread;
struct shadowinfo *shinf = ncp->nc_shadowinfo;
KKASSERT(ncp->nc_refs > 0);
KKASSERT(ncp->nc_exlocks > 0);
KKASSERT(ncp->nc_locktd == td);
if (--ncp->nc_exlocks == 0) {
if (ncp->nc_vp)
vdrop(ncp->nc_vp);
ncp->nc_locktd = NULL;
if (ncp->nc_flag & NCF_LOCKREQ) {
ncp->nc_flag &= ~NCF_LOCKREQ;
wakeup(ncp);
KKASSERT(shinf);
KKASSERT(shinf->sh_refs > 0);
KKASSERT(shinf->sh_exlocks > 0);
KKASSERT(shinf->sh_locktd == td);
if (shinf->sh_exlocks == 1) {
int ref = -1;
cache_group_walk(ncp, &vref_updater, SG_ALL, &ref);
}
if (--shinf->sh_exlocks == 0) {
shinf->sh_locktd = NULL;
if (shinf->sh_lockreq) {
shinf->sh_lockreq = 0;
wakeup(shinf);
}
}
}
/*
* Obtain lock on both of uncp and lncp.
*
* On entry, uncp is assumed to be unlocked, and lncp is assumed to be
* locked.
*
* After this function returns, caller is responsible for checking
* the state of lncp which might have got unlocked temporarily.
*/
void
cache_lock_two(struct namecache *uncp, struct namecache *lncp)
{
if (cache_lock_nonblock(uncp) != 0) {
if (uncp > lncp)
cache_lock(uncp);
else {
cache_unlock(lncp);
cache_lock(uncp);
cache_lock(lncp);
}
}
}
......
cache_get_nonblock(struct namecache *ncp)
{
/* XXX MP */
if (ncp->nc_exlocks == 0 || ncp->nc_locktd == curthread) {
if (ncp->nc_shadowinfo->sh_exlocks == 0 ||
ncp->nc_shadowinfo->sh_locktd == curthread) {
_cache_hold(ncp);
cache_lock(ncp);
return(0);
......
if (!TAILQ_EMPTY(&ncp->nc_list))
vhold(vp);
TAILQ_INSERT_HEAD(&vp->v_namecache, ncp, nc_vnode);
if (ncp->nc_exlocks)
if (ncp->nc_shadowinfo->sh_exlocks)
vhold(vp);
/*
......
ncp->nc_timeout = 1;
}
static int unresolver_updater(struct namecache *ncp, void *param);
/*
* Disassociate the vnode or negative-cache association and mark a
* namecache entry as unresolved again. Note that the ncp is still
......
void
cache_setunresolved(struct namecache *ncp)
{
struct namecache *nncp;
cache_group_walk(ncp, &unresolver_updater, SG_SUBTREE, ncp);
nncp = ncp->nc_shadow_next;
if (nncp)
cache_hold(nncp);
unresolver_updater(ncp, NULL);
if (nncp)
cache_put(nncp);
}
static int
unresolver_updater(struct namecache *ncp, void *param)
{
struct vnode *vp;
if (ncp == param)
return(0);
if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
ncp->nc_flag |= NCF_UNRESOLVED;
......
*/
if (!TAILQ_EMPTY(&ncp->nc_list))
vdrop(vp);
if (ncp->nc_exlocks)
if (ncp->nc_shadowinfo->sh_exlocks)
vdrop(vp);
} else {
TAILQ_REMOVE(&ncneglist, ncp, nc_vnode);
--numneg;
}
}
cache_shadow_detach(ncp);
}
if (ncp->nc_refs == 0) {
cache_hold(ncp);
cache_put(ncp);
}
return(0);
}
/*
......
struct namecache *nextkid;
int rcnt = 0;
KKASSERT(ncp->nc_exlocks);
KKASSERT(ncp->nc_shadowinfo->sh_exlocks);
cache_setunresolved(ncp);
if (flags & CINV_DESTROY)
......
* XXX the disconnection could pose a problem, check code paths to make
* sure any code that blocks can handle the parent being changed out from
* under it. Maybe we should lock the children (watch out for deadlocks) ?
* [UPDATE: attempt made to lock children, see in situ explanation]
*
* After we return the caller has the option of calling cache_setvp() if
* the vnode of the new target ncp is known.
......
cache_rename(struct namecache *fncp, struct namecache *tncp)
{
struct namecache *scan;
int didwarn = 0;
int didwarn[] = { 0, 0 };
/* XXX should we rather make here a non-equality assertion? */
if (fncp == tncp)
return;
again:
cache_setunresolved(fncp);
cache_setunresolved(tncp);
/*
* It seems we need to unlock fncp before calling cache_inval():
* cache_inval() does a lot of lock/unlock/relock-ing (with tncp
* and its children), therefore keeping fncp locked might be
* deadlocky...
*/
cache_unlock(fncp);
while (cache_inval(tncp, CINV_CHILDREN) != 0) {
if (didwarn++ % 10 == 0) {
printf("Warning: cache_rename: race during "
if (didwarn[0]++ % 10 == 0) {
printf("Warning: cache_rename: race #1 during "
"rename %s->%s\n",
fncp->nc_name, tncp->nc_name);
}
tsleep(tncp, 0, "mvrace", hz / 10);
cache_setunresolved(tncp);
}
cache_unlock(tncp);
cache_lock(fncp);
while ((scan = TAILQ_FIRST(&fncp->nc_list)) != NULL) {
cache_hold(scan);
cache_unlock(fncp);
/*
* We have to lock fncp's kids in order to unresolve
* their shadow kids...
*/
cache_get(scan);
cache_unlink_parent(scan);
cache_group_walk(scan, &unresolver_updater, SG_SUBTREE, scan);
cache_link_parent(scan, tncp);
if (scan->nc_flag & NCF_HASHED)
cache_rehash(scan);
cache_drop(scan);
cache_put(scan);
cache_lock(fncp);
}
cache_lock_two(tncp, fncp);
if ((fncp->nc_flag & tncp->nc_flag & NCF_UNRESOLVED) == 0) {
if (didwarn[1]++ % 10 == 0) {
printf("Warning: cache_rename: race #2 during "
"rename %s->%s\n",
fncp->nc_name, tncp->nc_name);
}
goto again;
}
}
......
cache_drop(ncp);
return;
}
KKASSERT(par->nc_exlocks == 0);
KKASSERT(par->nc_shadowinfo->sh_exlocks == 0);
cache_lock(ncp);
}
done:
......
if (ncp->nc_timeout &&
(int)(ncp->nc_timeout - ticks) < 0 &&
(ncp->nc_flag & NCF_UNRESOLVED) == 0 &&
ncp->nc_exlocks == 0
ncp->nc_shadowinfo->sh_exlocks == 0
) {
cache_zap(cache_get(ncp));
goto restart;
......
gd->gd_nchstats = &nchstats[i];
}
TAILQ_INIT(&ncneglist);
STAILQ_INIT(&shadowinfo_freeq);
nchashtbl = hashinit(desiredvnodes*2, M_VFSCACHE, &nchash);
nclockwarn = 1 * hz;
}
sys/sys/namecache.h Sun Mar 26 07:56:54 2006 +0000 → sys/sys/namecache.h Wed Mar 29 10:23:26 2006 +0200
struct vnode;
/*
* Auxiliary structure for locking namecache entries,
* either on their own or grouped into "shadow groups".
*/
struct shadowinfo {
STAILQ_ENTRY(shadowinfo) sh_entry; /* entry for free list */
int sh_exlocks; /* namespace locking */
struct thread *sh_locktd; /* namespace locking */
int sh_refs; /* reference count */
uint8_t sh_lockreq :1; /* lock intent sign */
};
TAILQ_HEAD(namecache_list, namecache);
LIST_HEAD(namecache_shadow_list, namecache);
/*
* The namecache structure is used to manage the filesystem namespace. Most
......
char *nc_name; /* Separately allocated seg name */
int nc_error;
int nc_timeout; /* compared against ticks, or 0 */
int nc_exlocks; /* namespace locking */
struct thread *nc_locktd; /* namespace locking */
struct shadowinfo *nc_shadowinfo; /* namespace locking */
struct shadowinfo nc_shadowinfo_internal; /* private locking information */
struct namecache *nc_shadow_prev; /* previous entry in shadow group */
struct namecache *nc_shadow_next; /* next entry in shadow group */
int nc_shadowheight; /* measure within shadow group */
struct namecache *nc_shadowed; /* lower layer entry in layered fs */
struct mount *nc_mount; /* associated mount for vopops */
int64_t nc_fsmid; /* filesystem modified id */
};
......
#define NCF_MOUNTPT 0x0008 /* mount point */
#define NCF_ROOT 0x0010 /* namecache root (static) */
#define NCF_HASHED 0x0020 /* namecache entry in hash table */
#define NCF_LOCKREQ 0x0040
#define NCF_UNUSED040 0x0040
#define NCF_UNUSED080 0x0080
#define NCF_ISSYMLINK 0x0100 /* represents a symlink */
#define NCF_ISDIR 0x0200 /* represents a directory */
......
void cache_lock(struct namecache *ncp);
int cache_lock_nonblock(struct namecache *ncp);
void cache_unlock(struct namecache *ncp);
void cache_lock_two(struct namecache *uncp, struct namecache *lncp);
int cache_shadow_attach(struct namecache *ncp, struct namecache *sncp);
void cache_shadow_detach(struct namecache *ncp);
void cache_setvp(struct namecache *ncp, struct vnode *vp);
void cache_settimeout(struct namecache *ncp, int nticks);
void cache_setunresolved(struct namecache *ncp);
(1-1/2)