Index: dfly/src/sys/kern/sys_generic.c =================================================================== --- dfly.orig/src/sys/kern/sys_generic.c 2007-12-25 13:09:58.418932000 +0100 +++ dfly/src/sys/kern/sys_generic.c 2007-12-25 13:10:32.000000000 +0100 @@ -77,6 +77,8 @@ static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); MALLOC_DEFINE(M_IOV, "iov", "large iov's"); +static int doselect(int nd, fd_set *in, fd_set *ou, fd_set *ex, + struct timeval *tv, int *res); static int pollscan (struct proc *, struct pollfd *, u_int, int *); static int selscan (struct proc *, fd_mask **, fd_mask **, int, int *); @@ -760,6 +762,109 @@ int sys_select(struct select_args *uap) { + struct timeval ktv; + struct timeval *ktvp; + int error; + + /* + * Get timeout if any. + */ + if (uap->tv != NULL) { + error = copyin(uap->tv, &ktv, sizeof (ktv)); + if (error) + return (error); + error = itimerfix(&ktv); + if (error) + return (error); + ktvp = &ktv; + } else { + ktvp = NULL; + } + + + /* + * Do real work. + */ + error = doselect(uap->nd, uap->in, uap->ou, uap->ex, ktvp, + &uap->sysmsg_result); + + + /* + * XXX write back updated timeout value to userland if any. + */ + + return (error); +} + + +/* + * Pselect system call. + */ +int +sys_pselect(struct pselect_args *uap) +{ + struct timespec kts; + struct timeval ktv; + struct timeval *ktvp; + sigset_t sigmask; + sigset_t osigmask; + int error; + + /* + * Get timeout if any and convert it. + * Round up during conversion to avoid timeout going off early. + */ + if (uap->ts != NULL) { + error = copyin(uap->ts, &kts, sizeof (kts)); + if (error) + return (error); + ktv.tv_sec = kts.tv_sec; + ktv.tv_usec = (kts.tv_nsec + 999) / 1000; + error = itimerfix(&ktv); + if (error) + return (error); + ktvp = &ktv; + } else { + ktvp = NULL; + } + + /* + * Install temporary signal mask if any. + */ + if (uap->sigmask != NULL) { + error = copyin(uap->sigmask, &sigmask, sizeof(sigmask)); + if (error) + return (error); + (void) kern_sigprocmask(SIG_SETMASK, &sigmask, &osigmask); + } + + /* + * Do real job. + */ + error = doselect(uap->nd, uap->in, uap->ou, uap->ex, ktvp, + &uap->sysmsg_result); + + /* + * Restore previous signal mask if necessary. + */ + if (uap->sigmask != NULL) { + (void) kern_sigprocmask(SIG_SETMASK, &osigmask, NULL); + } + + return (error); +} + +/* + * Common code for sys_select() and sys_pselect(). + * + * in, out and ex are userland pointers. tv must point to validated + * kernel-side timeout value or NULL for infinite timeout. res must + * point to syscall return value. + */ +static int +doselect(int nd, fd_set *in, fd_set *ou, fd_set *ex, struct timeval *tv, + int *res) +{ struct lwp *lp = curthread->td_lwp; struct proc *p = curproc; @@ -775,23 +880,23 @@ int ncoll, error, timo; u_int nbufbytes, ncpbytes, nfdbits; - if (uap->nd < 0) + if (nd < 0) return (EINVAL); - if (uap->nd > p->p_fd->fd_nfiles) - uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ + if (nd > p->p_fd->fd_nfiles) + nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ /* * Allocate just enough bits for the non-null fd_sets. Use the * preallocated auto buffer if possible. */ - nfdbits = roundup(uap->nd, NFDBITS); + nfdbits = roundup(nd, NFDBITS); ncpbytes = nfdbits / NBBY; nbufbytes = 0; - if (uap->in != NULL) + if (in != NULL) nbufbytes += 2 * ncpbytes; - if (uap->ou != NULL) + if (ou != NULL) nbufbytes += 2 * ncpbytes; - if (uap->ex != NULL) + if (ex != NULL) nbufbytes += 2 * ncpbytes; if (nbufbytes <= sizeof s_selbits) selbits = &s_selbits[0]; @@ -806,13 +911,13 @@ sbp = selbits; #define getbits(name, x) \ do { \ - if (uap->name == NULL) \ + if (name == NULL) \ ibits[x] = NULL; \ else { \ ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ obits[x] = sbp; \ sbp += ncpbytes / sizeof *sbp; \ - error = copyin(uap->name, ibits[x], ncpbytes); \ + error = copyin(name, ibits[x], ncpbytes); \ if (error != 0) \ goto done; \ } \ @@ -824,15 +929,8 @@ if (nbufbytes != 0) bzero(selbits, nbufbytes / 2); - if (uap->tv) { - error = copyin((caddr_t)uap->tv, (caddr_t)&atv, - sizeof (atv)); - if (error) - goto done; - if (itimerfix(&atv)) { - error = EINVAL; - goto done; - } + if (tv != NULL) { + atv = *tv; getmicrouptime(&rtv); timevaladd(&atv, &rtv); } else { @@ -843,12 +941,12 @@ retry: ncoll = nselcoll; lp->lwp_flag |= LWP_SELECT; - error = selscan(p, ibits, obits, uap->nd, &uap->sysmsg_result); - if (error || uap->sysmsg_result) + error = selscan(p, ibits, obits, nd, res); + if (error || *res) goto done; if (atv.tv_sec || atv.tv_usec) { getmicrouptime(&rtv); - if (timevalcmp(&rtv, &atv, >=)) + if (timevalcmp(&rtv, &atv, >=)) goto done; ttv = atv; timevalsub(&ttv, &rtv); @@ -875,7 +973,7 @@ if (error == EWOULDBLOCK) error = 0; #define putbits(name, x) \ - if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ + if (name && (error2 = copyout(obits[x], name, ncpbytes))) \ error = error2; if (error == 0) { int error2; Index: dfly/src/sys/kern/syscalls.master =================================================================== --- dfly.orig/src/sys/kern/syscalls.master 2007-12-25 13:09:58.419032000 +0100 +++ dfly/src/sys/kern/syscalls.master 2007-12-25 13:10:32.000000000 +0100 @@ -681,3 +681,6 @@ 496 STD BSD { lwpid_t lwp_gettid(void); } 497 STD BSD { int lwp_kill(pid_t pid, lwpid_t tid, int signum); } 498 STD BSD { int lwp_rtprio(int function, pid_t pid, lwpid_t tid, struct rtprio *rtp); } +499 STD BSD { int pselect(int nd, fd_set *in, fd_set *ou, \ + fd_set *ex, const struct timespec *ts, \ + const sigset_t *sigmask); } Index: dfly/src/include/unistd.h =================================================================== --- dfly.orig/src/include/unistd.h 2007-12-25 13:09:58.419308000 +0100 +++ dfly/src/include/unistd.h 2007-12-25 13:10:32.000000000 +0100 @@ -195,6 +195,10 @@ struct timeval; /* select(2) */ int select(int, fd_set * __restrict, fd_set * __restrict, fd_set * __restrict, struct timeval * __restrict); +struct timespec; /* pselect(2) */ +int pselect(int, fd_set * __restrict, fd_set * __restrict, + fd_set * __restrict, const struct timespec * __restrict, + const sigset_t * __restrict); #endif int setdomainname(const char *, int); int setegid(gid_t); Index: dfly/src/sys/sys/select.h =================================================================== --- dfly.orig/src/sys/sys/select.h 2007-12-25 13:09:58.419178000 +0100 +++ dfly/src/sys/sys/select.h 2007-12-25 13:10:33.000000000 +0100 @@ -85,6 +85,10 @@ struct timeval; int select(int, fd_set * __restrict, fd_set * __restrict, fd_set * __restrict, struct timeval * __restrict); +struct timespec; +int pselect(int, fd_set * __restrict, fd_set * __restrict, + fd_set * __restrict, const struct timespec * __restrict, + const sigset_t * __restrict); #endif __END_DECLS Index: dfly/src/lib/libthread_xu/thread/thr_syscalls.c =================================================================== --- dfly.orig/src/lib/libthread_xu/thread/thr_syscalls.c 2007-12-25 13:09:58.419573000 +0100 +++ dfly/src/lib/libthread_xu/thread/thr_syscalls.c 2007-12-25 13:10:33.000000000 +0100 @@ -95,7 +95,7 @@ extern int __creat(const char *, mode_t); extern int __pause(void); -extern int __pselect(int, fd_set *, fd_set *, fd_set *, +extern int __sys_pselect(int, fd_set *, fd_set *, fd_set *, const struct timespec *, const sigset_t *); extern unsigned __sleep(unsigned int); extern int __system(const char *); @@ -382,10 +382,9 @@ } __strong_reference(__poll, poll); -#if 0 int -_pselect(int count, fd_set *rfds, fd_set *wfds, fd_set *efds, +__pselect(int count, fd_set *rfds, fd_set *wfds, fd_set *efds, const struct timespec *timo, const sigset_t *mask) { struct pthread *curthread = tls_get_curthread(); @@ -393,13 +392,12 @@ int ret; oldcancel = _thr_cancel_enter(curthread); - ret = __pselect(count, rfds, wfds, efds, timo, mask); + ret = __sys_pselect(count, rfds, wfds, efds, timo, mask); _thr_cancel_leave(curthread, oldcancel); return (ret); } -__strong_reference(_pselect, pselect); -#endif +__strong_reference(__pselect, pselect); int Index: dfly/src/lib/libc_r/uthread/uthread_select.c =================================================================== --- dfly.orig/src/lib/libc_r/uthread/uthread_select.c 2007-12-25 13:09:58.419792000 +0100 +++ dfly/src/lib/libc_r/uthread/uthread_select.c 2007-12-25 13:18:13.000000000 +0100 @@ -228,3 +228,48 @@ return ret; } + + +int +pselect(int numfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, + const struct timespec *timeout, const sigset_t *mask) +{ + sigset_t omask; + struct timeval tv; + struct timeval *tvp; + int ret; + + _thread_enter_cancellation_point(); + + if (timeout != NULL) { + tv.tv_sec = timeout->tv_sec; + tv.tv_usec = (timeout->tv_nsec + 999) / 1000; + tvp = &tv; + } else { + tvp = NULL; + } + + /* + * XXX The masking/select/unmasking sequence below is not atomic. See + * man page. + * + * The Right Thing would be to mask/unmask signals kernel-side. We do + * this for single-threaded and libthread_xu processes but this is far + * from trivial for libc_r because select() is actually a poll() + * wrapper there and not using poll() would involve complex changes in + * the user thread scheduler. We're deprecating libc_r in favor of + * libthread_xu so the usefulness of such a change is questionable. + */ + + if (mask != NULL) + (void) sigprocmask(SIG_SETMASK, mask, &omask); + + ret = _select(numfds, readfds, writefds, exceptfds, tvp); + + if (mask != NULL) + (void) sigprocmask(SIG_SETMASK, &omask, NULL); + + _thread_leave_cancellation_point(); + + return ret; +} Index: dfly/src/lib/libc/sys/Makefile.inc =================================================================== --- dfly.orig/src/lib/libc/sys/Makefile.inc 2007-12-25 13:09:58.420022000 +0100 +++ dfly/src/lib/libc/sys/Makefile.inc 2007-12-25 13:10:33.000000000 +0100 @@ -91,7 +91,9 @@ sigstack.2 sigsuspend.2 socket.2 socketpair.2 stat.2 statfs.2 \ swapon.2 symlink.2 sync.2 sysarch.2 syscall.2 syslink.2 \ truncate.2 tls.2 umask.2 umtx.2 undelete.2 \ - unlink.2 utimes.2 upc_register.2 usched_set.2 uuidgen.2 vfork.2 wait.2 write.2 + unlink.2 utimes.2 upc_register.2 usched_set.2 uuidgen.2 vfork.2 \ + wait.2 write.2 pselect.2 + .if !defined(NO_P1003_1B) MAN+= sched_get_priority_max.2 sched_setparam.2 \ sched_setscheduler.2 sched_yield.2 Index: dfly/src/lib/libc/sys/pselect.2 =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ dfly/src/lib/libc/sys/pselect.2 2007-12-25 13:13:39.000000000 +0100 @@ -0,0 +1,135 @@ +.\" +.\" Copyright 2002 Massachusetts Institute of Technology +.\" +.\" Permission to use, copy, modify, and distribute this software and +.\" its documentation for any purpose and without fee is hereby +.\" granted, provided that both the above copyright notice and this +.\" permission notice appear in all copies, that both the above +.\" copyright notice and this permission notice appear in all +.\" supporting documentation, and that the name of M.I.T. not be used +.\" in advertising or publicity pertaining to distribution of the +.\" software without specific, written prior permission. M.I.T. makes +.\" no representations about the suitability of this software for any +.\" purpose. It is provided "as is" without express or implied +.\" warranty. +.\" +.\" THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS +.\" ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, +.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT +.\" SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +.\" SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +.\" LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +.\" USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +.\" ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +.\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: src/lib/libc/gen/pselect.3,v 1.4 2002/12/18 10:13:54 ru Exp $ +.\" +.Dd December 24, 2007 +.Dt PSELECT 2 +.Os +.Sh NAME +.Nm pselect +.Nd synchronous I/O multiplexing a la POSIX.1g +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/select.h +.Ft int +.Fo pselect +.Fa "int nfds" +.Fa "fd_set * restrict readfds" +.Fa "fd_set * restrict writefds" +.Fa "fd_set * restrict exceptfds" +.Fa "const struct timespec * restrict timeout" +.Fa "const sigset_t * restrict newsigmask" +.Fc +.Sh DESCRIPTION +The +.Fn pselect +function was introduced by +.St -p1003.1g-2000 +as a slightly stronger version of +.Xr select 2 . +The +.Fa nfds , readfds , writefds , +and +.Fa exceptfds +arguments are all identical to the analogous arguments of +.Fn select . +The +.Fa timeout +argument in +.Fn pselect +points to a +.Vt "const struct timespec" +rather than the (modifiable) +.Vt "struct timeval" +used by +.Fn select ; +as in +.Fn select , +a null pointer may be passed to indicate that +.Fn pselect +should wait indefinitely. +Finally, +.Fa newsigmask +specifies a signal mask which is set while waiting for input. +When +.Fn pselect +returns, the original signal mask is restored. +.Pp +See +.Xr select 2 +for a more detailed discussion of the semantics of this interface, and +for macros used to manipulate the +.Vt "fd_set" +data type. +.Sh RETURN VALUES +The +.Fn pselect +function returns the same values and under the same conditions as +.Fn select . +.Sh ERRORS +The +.Fn pselect +function may fail for any of the reasons documented for +.Xr select 2 +and (if a signal mask is provided) +.Xr sigprocmask 2 . +.Sh SEE ALSO +.Xr kqueue 2 , +.Xr poll 2 , +.Xr select 2 , +.Xr sigprocmask 2 +.Sh STANDARDS +The +.Fn pselect +function conforms to +.St -p1003.1-2001 . +.Sh BUGS +The +.Fn pselect +implementation in libc_r (user multi-threading) does not mask signals, calls +.Fn select , +and restores the original signal mask atomically. A calling user thread +using +.Fn pselect +to block until either a signal occurs or some descriptor changes may therefore +block forever. +The +.Fn pselect +implementations in libthread_xu (kernel multi-threading) and libc +(single-threading) operate atomically and are therefore race-free. +.Sh HISTORY +The +.Fn pselect +function first appeared in +.Dx 1.11 . +.Sh AUTHORS +This manual page was originally written by +.An Garrett Wollman Aq wollman@FreeBSD.org +for FreeBSD and later modified for DragonFly. Index: dfly/src/lib/libbind/port_after.h =================================================================== --- dfly.orig/src/lib/libbind/port_after.h 2007-12-25 13:09:41.322480000 +0100 +++ dfly/src/lib/libbind/port_after.h 2007-12-25 13:11:50.000000000 +0100 @@ -13,7 +13,6 @@ #include #endif -#define NEED_PSELECT #define HAVE_SA_LEN 1 #define HAVE_MINIMUM_IFREQ 1 #undef NEED_DAEMON