diff --git a/etc/protocols b/etc/protocols index 4399da5..fc04f48 100644 --- a/etc/protocols +++ b/etc/protocols @@ -120,7 +120,7 @@ ipcomp 108 IPComp # IP Payload Compress snp 109 SNP # Sitara Networks Protocol compaq-peer 110 Compaq-Peer # Compaq Peer Protocol ipx-in-ip 111 IPX-in-IP # IPX in IP -vrrp 112 VRRP # Virtual Router Redundancy Protocol +carp 112 CARP vrrp # Common Address Redundancy Protocol pgm 113 PGM # PGM Reliable Transport Protocol # 114 # any 0-hop protocol l2tp 115 L2TP # Layer Two Tunneling Protocol diff --git a/sbin/ifconfig/Makefile b/sbin/ifconfig/Makefile index c6556e6..f15aaa7 100644 --- a/sbin/ifconfig/Makefile +++ b/sbin/ifconfig/Makefile @@ -24,7 +24,7 @@ SRCS+= ifmedia.c # SIOC[GS]IFMEDIA supp SRCS+= ifvlan.c # SIOC[GS]ETVLAN support SRCS+= ifieee80211.c # SIOC[GS]IEEE80211 support -#SRCS+= ifcarp.c # SIOC[GS]VH support +SRCS+= ifcarp.c # SIOC[GS]VH support #SRCS+= ifpfsync.c # pfsync(4) support SRCS+= ifbridge.c # bridge support diff --git a/sbin/ifconfig/ifcarp.c b/sbin/ifconfig/ifcarp.c new file mode 100644 index 0000000..2ab73f3 --- /dev/null +++ b/sbin/ifconfig/ifcarp.c @@ -0,0 +1,200 @@ +/* $Id$ */ +/* from $FreeBSD: src/sbin/ifconfig/ifcarp.c,v 1.2 2005/02/22 14:07:47 glebius Exp $ */ +/* from $OpenBSD: ifconfig.c,v 1.82 2003/10/19 05:43:35 mcbride Exp $ */ + +/* + * Copyright (c) 2002 Michael Shalayeff. All rights reserved. + * Copyright (c) 2003 Ryan McBride. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "ifconfig.h" + +static const char *carp_states[] = { CARP_STATES }; + +void carp_status(int s); +void setcarp_advbase(const char *,int, int, const struct afswtch *rafp); +void setcarp_advskew(const char *, int, int, const struct afswtch *rafp); +void setcarp_passwd(const char *, int, int, const struct afswtch *rafp); +void setcarp_vhid(const char *, int, int, const struct afswtch *rafp); + +void +carp_status(int s) +{ + const char *state; + struct carpreq carpr; + + memset((char *)&carpr, 0, sizeof(struct carpreq)); + ifr.ifr_data = (caddr_t)&carpr; + + if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1) + return; + + if (carpr.carpr_vhid > 0) { + if (carpr.carpr_state > CARP_MAXSTATE) + state = ""; + else + state = carp_states[carpr.carpr_state]; + + printf("\tcarp: %s vhid %d advbase %d advskew %d\n", + state, carpr.carpr_vhid, carpr.carpr_advbase, + carpr.carpr_advskew); + } + + return; + +} + +void +setcarp_passwd(const char *val, int d, int s, const struct afswtch *afp) +{ + struct carpreq carpr; + + memset((char *)&carpr, 0, sizeof(struct carpreq)); + ifr.ifr_data = (caddr_t)&carpr; + + if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1) + err(1, "SIOCGVH"); + + /* XXX Should hash the password into the key here, perhaps? */ + strlcpy(carpr.carpr_key, val, CARP_KEY_LEN); + + if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) + err(1, "SIOCSVH"); + + return; +} + +void +setcarp_vhid(const char *val, int d, int s, const struct afswtch *afp) +{ + int vhid; + struct carpreq carpr; + + vhid = atoi(val); + + if (vhid <= 0) + errx(1, "vhid must be greater than 0"); + + memset((char *)&carpr, 0, sizeof(struct carpreq)); + ifr.ifr_data = (caddr_t)&carpr; + + if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1) + err(1, "SIOCGVH"); + + carpr.carpr_vhid = vhid; + + if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) + err(1, "SIOCSVH"); + + return; +} + +void +setcarp_advskew(const char *val, int d, int s, const struct afswtch *afp) +{ + int advskew; + struct carpreq carpr; + + advskew = atoi(val); + + memset((char *)&carpr, 0, sizeof(struct carpreq)); + ifr.ifr_data = (caddr_t)&carpr; + + if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1) + err(1, "SIOCGVH"); + + carpr.carpr_advskew = advskew; + + if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) + err(1, "SIOCSVH"); + + return; +} + +void +setcarp_advbase(const char *val, int d, int s, const struct afswtch *afp) +{ + int advbase; + struct carpreq carpr; + + advbase = atoi(val); + + memset((char *)&carpr, 0, sizeof(struct carpreq)); + ifr.ifr_data = (caddr_t)&carpr; + + if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1) + err(1, "SIOCGVH"); + + carpr.carpr_advbase = advbase; + + if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) + err(1, "SIOCSVH"); + + return; +} + +static struct cmd carp_cmds[] = { + DEF_CMD_ARG("advbase", setcarp_advbase), + DEF_CMD_ARG("advskew", setcarp_advskew), + DEF_CMD_ARG("pass", setcarp_passwd), + DEF_CMD_ARG("vhid", setcarp_vhid), +}; +static struct afswtch af_carp = { + .af_name = "af_carp", + .af_af = AF_UNSPEC, + .af_other_status = carp_status, +}; + +static __constructor void +carp_ctor(void) +{ +#define N(a) (sizeof(a) / sizeof(a[0])) + int i; + + for (i = 0; i < N(carp_cmds); i++) + cmd_register(&carp_cmds[i]); + af_register(&af_carp); +#undef N +} diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8 index c832cc1..e9820fb 100644 --- a/sbin/ifconfig/ifconfig.8 +++ b/sbin/ifconfig/ifconfig.8 @@ -28,8 +28,9 @@ .\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94 .\" $FreeBSD: src/sbin/ifconfig/ifconfig.8,v 1.124 2006/10/10 09:44:08 ru Exp $ .\" $DragonFly: src/sbin/ifconfig/ifconfig.8,v 1.19 2007/04/09 21:20:37 swildner Exp $ + .\" -.Dd November 19, 2006 +.Dd April 8, 2007 .Dt IFCONFIG 8 .Os .Sh NAME @@ -1388,6 +1389,31 @@ The argument is useless and hence deprecated. .El .Pp +The following parameters are specific to +.Xr carp 4 +interfaces: +.Bl -tag -width indent +.It Cm advbase Ar seconds +Specifies the base of the advertisement interval in seconds. +The acceptable values are 1 to 255. +The default value is 1. +.\" The default value is +.\" .Dv CARP_DFLTINTV . +.It Cm advskew Ar interval +Specifies the skew to add to the base advertisement interval to +make one host advertise slower than another host. +It is specified in 1/256 of seconds. +The acceptable values are 1 to 254. +The default value is 0. +.It Cm pass Ar phrase +Set the authentication key to +.Ar phrase . +.It Cm vhid Ar n +Set the virtual host ID. +This is a required setting. +Acceptable values are 1 to 255. +.El +.Pp The .Nm utility displays the current configuration for a network interface @@ -1460,6 +1486,7 @@ requested address is unknown, or the use tried to alter an interface's configuration. .Sh SEE ALSO .Xr netstat 1 , +.Xr carp 4 , .Xr ifmedia 4 , .Xr netintro 4 , .Xr polling 4 , diff --git a/sbin/ifconfig/ifmedia.c b/sbin/ifconfig/ifmedia.c index 978c79a..7876715 100644 --- a/sbin/ifconfig/ifmedia.c +++ b/sbin/ifconfig/ifmedia.c @@ -175,6 +175,12 @@ media_status(int s) else printf("no carrier"); break; + case IFM_CARP: + if (ifmr.ifm_status & IFM_ACTIVE) + printf("master"); + else + printf("backup"); + break; } putchar('\n'); } diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile index 4b9fcba..53cf3c8 100644 --- a/share/man/man4/Makefile +++ b/share/man/man4/Makefile @@ -34,6 +34,7 @@ MAN= aac.4 \ bridge.4 \ bt.4 \ cardbus.4 \ + carp.4 \ ccd.4 \ cd.4 \ ch.4 \ diff --git a/share/man/man4/carp.4 b/share/man/man4/carp.4 new file mode 100644 index 0000000..1a458a8 --- /dev/null +++ b/share/man/man4/carp.4 @@ -0,0 +1,263 @@ +.\" $OpenBSD: carp.4,v 1.16 2004/12/07 23:41:35 jmc Exp $ +.\" $FreeBSD: src/share/man/man4/carp.4,v 1.10 2006/06/07 10:26:51 glebius Exp $ +.\" $Id$ +.\" +.\" Copyright (c) 2003, Ryan McBride. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" +.Dd April 9, 2007 +.Dt CARP 4 +.Os +.Sh NAME +.Nm carp +.Nd Common Address Redundancy Protocol +.Sh SYNOPSIS +.Cd "pseudo-device carp" +.Sh DESCRIPTION +The +.Nm +interface is a pseudo-device that implements and controls the +CARP protocol. +CARP allows multiple hosts on the same local network to share a set of IP addresses. +Its primary purpose is to ensure that these +addresses are always available, but in some configurations +.Nm +can also provide load balancing functionality. +.Pp +A +.Nm +interface can be created at runtime using the +.Nm ifconfig Li carp Ns Ar N Cm create +command or by configuring +it via +.Va cloned_interfaces +in the +.Pa /etc/rc.conf +file. +.Pp +To use +.Nm , +the administrator needs to configure at minimum a common virtual host ID (VHID) +and virtual host IP address on each machine which is to take part in the virtual +group. +Additional parameters can also be set on a per-interface basis: +.Cm advbase +and +.Cm advskew , +which are used to control how frequently the host sends advertisements when it +is the master for a virtual host, and +.Cm pass +which is used to authenticate +.Nm +advertisements. +The +.Cm advbase +parameter stands for +.Dq "advertisement base" . +It is measured in seconds and specifies the base of the advertisement interval. +The +.Cm advskew +parameter stands for +.Dq "advertisement skew" . +It is measured in 1/256 of seconds. +It is added to the base advertisement interval to make one host advertise +a bit slower that the other does. +Both +.Cm advbase +and +.Cm advskew +are put inside CARP advertisements. +These configurations can be done using +.Xr ifconfig 8 , +or through the +.Dv SIOCSVH +.Xr ioctl 2 . +.Pp +Additionally, there are a number of global parameters which can be set using +.Xr sysctl 8 : +.Bl -tag -width ".Va net.inet.carp.arpbalance" +.It Va net.inet.carp.allow +Accept incoming +.Nm +packets. +Enabled by default. +.It Va net.inet.carp.preempt +Allow virtual hosts to preempt each other. +It is also used to failover +.Nm +interfaces as a group. +When the option is enabled and one of the +.Nm +enabled physical interfaces +goes down, +.Cm advskew +is changed to 240 on all +.Nm +interfaces. +See also the first example. +Disabled by default. +.It Va net.inet.carp.log +Value of 0 disables any logging. +Value of 1 enables logging of bad +.Nm +packets. +Values above 1 enable logging state changes of +.Nm +interfaces. +Default value is 1. +.It Va net.inet.carp.arpbalance +Balance local traffic using ARP (see below). +Disabled by default. +.It Va net.inet.carp.suppress_preempt +A read only value showing the status of preemption suppression. +Preemption can be suppressed if link on an interface is down +or when +.Xr pfsync 4 +interface is not synchronized. +Value of 0 means that preemption is not suppressed, since no +problems are detected. +Every problem increments suppression counter. +.El +.Sh ARP level load balancing +The +.Nm +has limited abilities for load balancing the incoming connections +between hosts in Ethernet network. +For load balancing operation, one needs several CARP interfaces that +are configured to the same IP address, but to a different VHIDs. +Once an ARP request is received, the CARP protocol will use a hashing +function against the source IP address in the ARP request to determine +which VHID should this request belong to. +If the corresponding CARP interface is in master state, the ARP request +will be replied, otherwise it will be ignored. +See the +.Sx EXAMPLES +section for a practical example of load balancing. +.Pp +The ARP load balancing has some limitations. +First, ARP balancing only works on the local network segment. +It cannot balance traffic that crosses a router, because the +router itself will always be balanced to the same virtual host. +Second, ARP load balancing can lead to asymmetric routing +of incoming and outgoing traffic, and thus combining it with +.Xr pfsync 4 +is dangerous, because this creates a race condition between +balanced routers and a host they are serving. +Imagine an incoming packet creating state on the first router, being +forwarded to its destination, and destination replying faster +than the state information is packed and synced with the second router. +If the reply would be load balanced to second router, it will be +dropped due to no state. +.Sh EXAMPLES +For firewalls and routers with multiple interfaces, it is desirable to +failover all of the +.Nm +interfaces together, when one of the physical interfaces goes down. +This is achieved by the preempt option. +Enable it on both host A and B: +.Pp +.Dl sysctl net.inet.carp.preempt=1 +.Pp +Assume that host A is the preferred master and 192.168.1.x/24 is +configured on one physical interface and 192.168.2.y/24 on another. +This is the setup for host A: +.Bd -literal -offset indent +ifconfig carp0 create +ifconfig carp0 vhid 1 pass mekmitasdigoat 192.168.1.1/24 +ifconfig carp1 create +ifconfig carp1 vhid 2 pass mekmitasdigoat 192.168.2.1/24 +.Ed +.Pp +The setup for host B is identical, but it has a higher +.Cm advskew : +.Bd -literal -offset indent +ifconfig carp0 create +ifconfig carp0 vhid 1 advskew 100 pass mekmitasdigoat 192.168.1.1/24 +ifconfig carp1 create +ifconfig carp1 vhid 2 advskew 100 pass mekmitasdigoat 192.168.2.1/24 +.Ed +.Pp +Because of the preempt option, when one of the physical interfaces of +host A fails, +.Cm advskew +is adjusted to 240 on all its +.Nm +interfaces. +This will cause host B to preempt on both interfaces instead of +just the failed one. +.Pp +In order to set up an ARP balanced virtual host, it is necessary to configure +one virtual host for each physical host which would respond to ARP requests +and thus handle the traffic. +In the following example, two virtual hosts are configured on two hosts to +provide balancing and failover for the IP address 192.168.1.10. +.Pp +First the +.Nm +interfaces on host A are configured. +The +.Cm advskew +of 100 on the second virtual host means that its advertisements will be sent +out slightly less frequently. +.Bd -literal -offset indent +ifconfig carp0 create +ifconfig carp0 vhid 1 pass mekmitasdigoat 192.168.1.10/24 +ifconfig carp1 create +ifconfig carp1 vhid 2 advskew 100 pass mekmitasdigoat 192.168.1.10/24 +.Ed +.Pp +The configuration for host B is identical, except the +.Cm advskew +is on virtual host 1 rather than virtual host 2. +.Bd -literal -offset indent +ifconfig carp0 create +ifconfig carp0 vhid 1 advskew 100 pass mekmitasdigoat 192.168.1.10/24 +ifconfig carp1 create +ifconfig carp1 vhid 2 pass mekmitasdigoat 192.168.1.10/24 +.Ed +.Pp +Finally, the ARP balancing feature must be enabled on both hosts: +.Pp +.Dl sysctl net.inet.carp.arpbalance=1 +.Pp +When the hosts receive an ARP request for 192.168.1.10, the source IP address +of the request is used to compute which virtual host should answer the request. +The host which is master of the selected virtual host will reply to the +request, the other(s) will ignore it. +.Pp +This way, locally connected systems will receive different ARP replies and +subsequent IP traffic will be balanced among the hosts. +If one of the hosts fails, the other will take over the virtual MAC address, +and begin answering ARP requests on its behalf. +.Sh SEE ALSO +.Xr inet 4 , +.Xr pfsync 4 , +.Xr rc.conf 5 , +.Xr ifconfig 8 , +.Xr sysctl 8 +.Sh HISTORY +The +.Nm +device first appeared in +.Ox 3.5 . diff --git a/share/man/man4/pfsync.4 b/share/man/man4/pfsync.4 index 349e56e..f518690 100644 --- a/share/man/man4/pfsync.4 +++ b/share/man/man4/pfsync.4 @@ -24,7 +24,7 @@ .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" -.Dd November 29, 2002 +.Dd April 9, 2007 .Dt PFSYNC 4 .Os .Sh NAME @@ -129,8 +129,76 @@ only the necessary information. # ifconfig pfsync0 up syncif fxp0 maxupd 64 # tcpdump -s1500 -evtni pfsync0 .Ed +.Sh USING PFSYNC WITH CARP +.Nm +and +.Xr carp 4 +can be used together to provide automatic failover of a pair of firewalls +configured in parallel. +One firewall handles all traffic \- if it dies or +is shut down, the second firewall takes over automatically. +.Pp +Both firewalls in this example have three +.Xr sis 4 +interfaces. +sis0 is the external interface, on the 10.0.0.0/24 subnet; sis1 is the +internal interface, on the 192.168.0.0/24 subnet; and sis2 is the +.Nm +interface, using the 192.168.254.0/24 subnet. +A crossover cable connects the two firewalls via their sis2 interfaces. +On all three interfaces, firewall A uses the .254 address, while firewall B +uses .253. +The interfaces are configured as follows (firewall A unless otherwise +indicated): +.Pp +Interfaces configuration in +.Pa /etc/rc.conf : +.Bd -literal -offset indent +network_interfaces="lo0 sis0 sis1 sis2" +cloned_interfaces="carp0 carp1" +ifconfig_sis0="10.0.0.254/24" +ifconfig_sis1="192.168.0.254/24" +ifconfig_sis2="192.168.254.254/24" +ifconfig_carp0="vhid 1 pass foo 10.0.0.1/24" +ifconfig_carp1="vhid 2 pass bar 192.168.0.1/24" +pfsync_enable="YES" +pfsync_syncdev="sis2" +.Ed +.Pp +.Xr pf 4 +must also be configured to allow +.Nm +and +.Xr carp 4 +traffic through. +The following should be added to the top of +.Pa /etc/pf.conf : +.Bd -literal -offset indent +pass quick on { sis2 } proto pfsync +pass quick on { sis0 sis1 } proto carp keep state +.Ed +.Pp +If it is preferable that one firewall handle the traffic, +the +.Ar advskew +on the backup firewall's +.Xr carp 4 +interfaces should be set to something higher than +the primary's. +For example, if firewall B is the backup, its +carp1 configuration would look like this: +.Bd -literal -offset indent +ifconfig_carp1="vhid 2 pass bar advskew 100 192.168.0.1/24" +.Ed +.Pp +The following must also be added to +.Pa /etc/sysctl.conf : +.Bd -literal -offset indent +net.inet.carp.preempt=1 +.Ed .Sh SEE ALSO .Xr tcpdump 1 , +.Xr carp 4 , .Xr bpf 4 , .Xr inet 4 , .Xr inet6 4 , diff --git a/sys/conf/files b/sys/conf/files index 92cd5b7..5222fe3 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -79,6 +79,7 @@ crypto/des/des_setkey.c optional ipsec crypto/rijndael/rijndael-alg-fst.c optional ipsec ipsec_esp crypto/rijndael/rijndael-api-fst.c optional ipsec ipsec_esp crypto/sha1.c optional ipsec +crypto/sha1.c optional carp crypto/sha2/sha2.c optional ipsec ddb/db_access.c optional ddb ddb/db_kld.c optional ddb @@ -887,6 +888,7 @@ netinet/in_gif.c optional gif inet netinet/igmp.c optional inet netinet/in.c optional inet netinet/in_cksum.c optional inet +netinet/ip_carp.c optional carp netinet/ip_gre.c optional gre inet netinet/ip_id.c optional inet netinet/in_pcb.c optional inet diff --git a/sys/conf/options b/sys/conf/options index 184ec98..bfeaaef 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -264,6 +264,7 @@ BOOTP_COMPAT opt_bootp.h BOOTP_NFSROOT opt_bootp.h BOOTP_NFSV3 opt_bootp.h BOOTP_WIRED_TO opt_bootp.h +CARP opt_carp.h ETHER_II opt_ef.h ETHER_8023 opt_ef.h ETHER_8022 opt_ef.h diff --git a/sys/config/LINT b/sys/config/LINT index 58721c7..92bc5ad 100644 --- a/sys/config/LINT +++ b/sys/config/LINT @@ -633,6 +633,10 @@ device pf device pfsync device pflog +#CARP +pseudo-device carp +options CARP + # The MBUF_STRESS_TEST option enables options which create # various random failures / extreme cases related to mbuf # functions. See the mbuf(9) manpage for a list of available diff --git a/sys/config/VKERNEL b/sys/config/VKERNEL index 6f2cfb6..151f2c8 100644 --- a/sys/config/VKERNEL +++ b/sys/config/VKERNEL @@ -73,6 +73,8 @@ options DDB options DDB_TRACE options INVARIANTS +options CARP + # Floating point support - do not disable. device npx0 at nexus? @@ -87,6 +89,8 @@ pseudo-device md # Memory "disks" pseudo-device gif # IPv6 and IPv4 tunneling pseudo-device faith 1 # IPv6-to-IPv4 relaying (translation) +pseudo-device carp + # The `bpf' pseudo-device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! pseudo-device bpf #Berkeley packet filter @@ -96,3 +100,4 @@ pseudo-device bpf #Berkeley packet filt device vn device vkd device vke + diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index d08ae24..7ed3ca4 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -40,6 +40,7 @@ #include "opt_inet6.h" #include "opt_ipx.h" #include "opt_netgraph.h" +#include "opt_carp.h" #include #include @@ -71,6 +72,10 @@ #include #endif +#ifdef CARP +#include +#endif + #ifdef IPX #include #include @@ -346,6 +351,12 @@ ether_output(struct ifnet *ifp, struct m } } +#ifdef CARP + if (ifp->if_carp && (error = carp_output(ifp, m, dst, NULL))) + goto bad; +#endif + + /* Handle ng_ether(4) processing, if any */ if (ng_ether_output_p != NULL) { if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) @@ -654,6 +665,20 @@ ether_demux(struct ifnet *ifp, struct et if (rule) /* packet is passing the second time */ goto post_stats; +#ifdef CARP + /* + * XXX: Okay, we need to call carp_forus() and - if it is for + * us jump over code that does the normal check + * "ac_enaddr == ether_dhost". The check sequence is a bit + * different from OpenBSD, so we jump over as few code as + * possible, to catch _all_ sanity checks. This needs + * evaluation, to see if the carp ether_dhost values break any + * of these checks! + */ + if (ifp->if_carp && carp_forus(ifp->if_carp, eh->ether_dhost)) + goto pre_stats; +#endif + /* * Discard packet if upper layers shouldn't see it because * it was unicast to a different Ethernet address. If the @@ -666,6 +691,11 @@ ether_demux(struct ifnet *ifp, struct et m_freem(m); return; } + +#ifdef CARP +pre_stats: +#endif + /* Discard packet if interface is not up */ if (!(ifp->if_flags & IFF_UP)) { m_freem(m); diff --git a/sys/net/if_media.h b/sys/net/if_media.h index cac9959..1dec5f6 100644 --- a/sys/net/if_media.h +++ b/sys/net/if_media.h @@ -239,6 +239,11 @@ int ifmedia_baudrate(int); #define IFM_ATM_UNASSIGNED 0x00000400 /* unassigned cells */ /* + * CARP Common Address Redundancy Protocol + */ +#define IFM_CARP 0x000000c0 + +/* * Shared media sub-types */ #define IFM_AUTO 0 /* Autoselect best media */ @@ -316,6 +321,7 @@ struct ifmedia_description { { IFM_TOKEN, "Token ring" }, \ { IFM_FDDI, "FDDI" }, \ { IFM_IEEE80211, "IEEE 802.11 Wireless Ethernet" }, \ + { IFM_CARP, "Common Address Redundancy Protocol" }, \ { 0, NULL }, \ } diff --git a/sys/net/if_types.h b/sys/net/if_types.h index d2e6742..4039b0d 100644 --- a/sys/net/if_types.h +++ b/sys/net/if_types.h @@ -252,4 +252,5 @@ #define IFT_STF 0xf3 #define IFT_PFLOG 0xf5 /* Packet filter logging */ #define IFT_PFSYNC 0xf6 /* Packet filter state syncing */ +#define IFT_CARP 0xf8 /* Common Address Redundancy Protocol */ #endif /* !_NET_IF_TYPES_H_ */ diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 8dc1e40..1436a72 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -81,6 +81,7 @@ struct rtentry; struct rt_addrinfo; struct socket; struct ether_header; +struct carp_if; struct ucred; struct lwkt_serialize; @@ -178,6 +179,7 @@ struct ifnet { int if_dunit; /* unit or IF_DUNIT_NONE */ struct ifaddrhead if_addrhead; /* linked list of addresses per if */ int if_pcount; /* number of promiscuous listeners */ + struct carp_if *if_carp; /* carp interface structure */ struct bpf_if *if_bpf; /* packet filter structure */ u_short if_index; /* numeric abbreviation for this if */ short if_timer; /* time 'til if_watchdog called */ diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index b5c3410..1b7fc53 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -74,6 +74,7 @@ */ #include "opt_inet.h" +#include "opt_carp.h" #include #include @@ -102,6 +103,10 @@ #include #include +#ifdef CARP +#include +#endif + #define SIN(s) ((struct sockaddr_in *)s) #define SDL(s) ((struct sockaddr_dl *)s) @@ -143,6 +148,7 @@ SYSCTL_INT(_net_link_ether_inet, OID_AUT SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW, &arp_proxyall, 0, ""); +void arprequest_acces(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip, u_char *enaddr); static void arp_rtrequest (int, struct rtentry *, struct rt_addrinfo *); static void arprequest (struct ifnet *, struct in_addr *, struct in_addr *, u_char *); @@ -736,6 +742,7 @@ in_arpinput(struct mbuf *m) #ifdef SMP struct netmsg_arp_update msg; #endif + u_int8_t *enaddr = NULL; int op; int req_len; @@ -770,6 +777,19 @@ in_arpinput(struct mbuf *m) if (ifp->if_bridge && ia->ia_ifp && ifp->if_bridge == ia->ia_ifp->if_bridge) goto match; + +#ifdef CARP + /* + * If the interface does not match, but the recieving interface + * is part of carp, we call carp_iamatch to see if this is a + * request for the virtual host ip. + * XXX: This is really ugly! + */ + if (ifp->if_carp != NULL && + carp_iamatch(ifp->if_carp, ia, &isaddr, &enaddr) && + itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) + goto match; +#endif } LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash) { /* Skip all ia's which don't match */ @@ -801,8 +821,10 @@ in_arpinput(struct mbuf *m) return; match: + if (!enaddr) + enaddr = (u_int8_t *)IF_LLADDR(ifp); myaddr = ia->ia_addr.sin_addr; - if (!bcmp(ar_sha(ah), IF_LLADDR(ifp), ifp->if_addrlen)) { + if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen)) { m_freem(m); /* it's from me, ignore it. */ return; } @@ -839,7 +861,7 @@ reply: if (itaddr.s_addr == myaddr.s_addr) { /* I am the target */ memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); - memcpy(ar_sha(ah), IF_LLADDR(ifp), ah->ar_hln); + memcpy(ar_sha(ah), enaddr, ah->ar_hln); } else { struct llinfo_arp *la; @@ -873,7 +895,7 @@ reply: return; } memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); - memcpy(ar_sha(ah), IF_LLADDR(ifp), ah->ar_hln); + memcpy(ar_sha(ah), enaddr, ah->ar_hln); #ifdef DEBUG_PROXY kprintf("arp: proxying for %s\n", inet_ntoa(itaddr)); #endif @@ -1039,6 +1061,16 @@ arp_ifinit(struct ifnet *ifp, struct ifa ifa->ifa_flags |= RTF_CLONING; } +void +arp_ifinit2(struct ifnet *ifp, struct ifaddr *ifa, u_char *enaddr) +{ + if (IA_SIN(ifa)->sin_addr.s_addr != INADDR_ANY) + arprequest(ifp, &IA_SIN(ifa)->sin_addr, &IA_SIN(ifa)->sin_addr, + enaddr); + ifa->ifa_rtrequest = arp_rtrequest; + ifa->ifa_flags |= RTF_CLONING; +} + static void arp_init(void) { diff --git a/sys/netinet/if_ether.h b/sys/netinet/if_ether.h index 6a2a65a..97dce14 100644 --- a/sys/netinet/if_ether.h +++ b/sys/netinet/if_ether.h @@ -124,6 +124,7 @@ extern u_char ether_ipmulticast_max[ETHE int arpresolve (struct ifnet *, struct rtentry *, struct mbuf *, struct sockaddr *, u_char *); void arp_ifinit (struct ifnet *, struct ifaddr *); +void arp_ifinit2 (struct ifnet *, struct ifaddr *, u_char *); #endif #endif diff --git a/sys/netinet/in.h b/sys/netinet/in.h index c867488..5cdedfb 100644 --- a/sys/netinet/in.h +++ b/sys/netinet/in.h @@ -169,6 +169,7 @@ #define IPPROTO_IPCOMP 108 /* payload compression (IPComp) */ /* 101-254: Partly Unassigned */ #define IPPROTO_PIM 103 /* Protocol Independent Mcast */ +#define IPPROTO_CARP 112 /* CARP */ #define IPPROTO_PGM 113 /* PGM */ #define IPPROTO_SCTP 132 /* SCTP */ #define IPPROTO_PFSYNC 240 /* PFSYNC */ @@ -297,6 +298,7 @@ struct in_addr { #define INADDR_UNSPEC_GROUP (u_int32_t)0xe0000000 /* 224.0.0.0 */ #define INADDR_ALLHOSTS_GROUP (u_int32_t)0xe0000001 /* 224.0.0.1 */ #define INADDR_ALLRTRS_GROUP (u_int32_t)0xe0000002 /* 224.0.0.2 */ +#define INADDR_CARP_GROUP (u_int32_t)0xe0000012 /* 224.0.0.18 */ #define INADDR_PFSYNC_GROUP (u_int32_t)0xe00000f0 /* 224.0.0.240 */ #define INADDR_MAX_LOCAL_GROUP (u_int32_t)0xe00000ff /* 224.0.0.255 */ diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index ab5bfca..5babd61 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -41,6 +41,7 @@ #include "opt_ipsec.h" #include "opt_inet6.h" #include "opt_sctp.h" +#include "opt_carp.h" #include #include @@ -105,6 +106,10 @@ #include /* for cpu0_soport */ +#ifdef CARP +#include +#endif + extern struct domain inetdomain; static struct pr_usrreqs nousrreqs; @@ -294,6 +299,16 @@ struct protosw inetsw[] = { rip_init, 0, 0, 0, &rip_usrreqs }, + +#ifdef CARP + { SOCK_RAW, &inetdomain, IPPROTO_CARP, PR_ATOMIC|PR_ADDR, + carp_input, rip_output, 0, rip_ctloutput, + 0, + 0, 0, 0, 0, + &rip_usrreqs +}, + +#endif }; struct domain inetdomain = { @@ -332,3 +347,6 @@ SYSCTL_NODE(_net_inet, IPPROTO_DIVERT, d #ifdef PIM SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM"); #endif +#ifdef CARP +SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP"); +#endif diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c new file mode 100644 index 0000000..00a40c4 --- /dev/null +++ b/sys/netinet/ip_carp.c @@ -0,0 +1,2215 @@ +/* $Id$ */ +/* from $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $ */ + +/* + * Copyright (c) 2002 Michael Shalayeff. All rights reserved. + * Copyright (c) 2003 Ryan McBride. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opt_carp.h" +/*#include "opt_bpf.h"*/ +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef INET +#include +#include +#include +#include +#include +#include +#include +#endif + +#ifdef INET6 +#include +#include +#include +#include +#include +#endif + +#include +#include +#include + +#define CARP_IFNAME "carp" +static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); +static MALLOC_DEFINE(M_IFNET, "IFNET", "IFNET CARP?"); +SYSCTL_DECL(_net_inet_carp); + +struct carp_softc { + struct ifnet *sc_ifp; /* Interface clue */ + struct ifnet *sc_carpdev; /* Pointer to parent interface */ + struct in_ifaddr *sc_ia; /* primary iface address */ + struct ip_moptions sc_imo; +#ifdef INET6 + struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ + struct ip6_moptions sc_im6o; +#endif /* INET6 */ + TAILQ_ENTRY(carp_softc) sc_list; + + enum { INIT = 0, BACKUP, MASTER } sc_state; + + int sc_flags_backup; + int sc_suppress; + + int sc_sendad_errors; +#define CARP_SENDAD_MAX_ERRORS 3 + int sc_sendad_success; +#define CARP_SENDAD_MIN_SUCCESS 3 + + int sc_vhid; + int sc_advskew; + int sc_naddrs; + int sc_naddrs6; + int sc_advbase; /* seconds */ + int sc_init_counter; + u_int64_t sc_counter; + + /* authentication */ +#define CARP_HMAC_PAD 64 + unsigned char sc_key[CARP_KEY_LEN]; + unsigned char sc_pad[CARP_HMAC_PAD]; + SHA1_CTX sc_sha1; + + struct callout sc_ad_tmo; /* advertisement timeout */ + struct callout sc_md_tmo; /* master down timeout */ + struct callout sc_md6_tmo; /* master down timeout */ + + LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ +}; +#define SC2IFP(sc) ((sc)->sc_ifp) + +int carp_suppress_preempt = 0; +int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ +SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, + &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); +SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, + &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); +SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, + &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); +SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, + &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); +SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, + &carp_suppress_preempt, 0, "Preemption is suppressed"); + +struct carpstats carpstats; +SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, + &carpstats, carpstats, + "CARP statistics (struct carpstats, netinet/ip_carp.h)"); + +struct carp_if { + TAILQ_HEAD(, carp_softc) vhif_vrs; + int vhif_nvrs; + + struct ifnet *vhif_ifp; + struct lock vhif_lock; +}; + +/* Get carp_if from softc. Valid after carp_set_addr{,6}. */ +#define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp) + +#define CARP_LOCK_INIT(cif) lockinit(&(cif)->vhif_lock, "carp_if", 0, LK_NOWAIT); +#define CARP_LOCK_DESTROY(cif) ; +#define CARP_LOCK_ASSERT(cif) ; +#define CARP_LOCK(cif) lockmgr(&(cif)->vhif_lock, LK_EXCLUSIVE); +#define CARP_UNLOCK(cif) lockmgr(&(cif)->vhif_lock, LK_RELEASE); + +#define CARP_SCLOCK(sc) lockmgr(&SC2CIF(sc)->vhif_lock, LK_EXCLUSIVE); +#define CARP_SCUNLOCK(sc) lockmgr(&SC2CIF(sc)->vhif_lock, LK_RELEASE); +#define CARP_SCLOCK_ASSERT(sc) ; + +#define CARP_LOG(...) do { \ + if (carp_opts[CARPCTL_LOG] > 0) \ + log(LOG_INFO, __VA_ARGS__); \ +} while (0) + +#define CARP_DEBUG(...) do { \ + if (carp_opts[CARPCTL_LOG] > 1) \ + log(LOG_DEBUG, __VA_ARGS__); \ +} while (0) + +static void carp_hmac_prepare(struct carp_softc *); +static void carp_hmac_generate(struct carp_softc *, u_int32_t *, + unsigned char *); +static int carp_hmac_verify(struct carp_softc *, u_int32_t *, + unsigned char *); +static void carp_setroute(struct carp_softc *, int); +static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); +static int carp_clone_create(struct if_clone *, int); +static void carp_clone_destroy(struct ifnet *); +static void carpdetach(struct carp_softc *, int); +static int carp_prepare_ad(struct mbuf *, struct carp_softc *, + struct carp_header *); +static void carp_send_ad_all(void); +static void carp_send_ad(void *); +static void carp_send_ad_locked(struct carp_softc *); +static void carp_send_arp(struct carp_softc *); +static void carp_master_down(void *); +static void carp_master_down_locked(struct carp_softc *); +static int carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); +static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); +static void carp_start(struct ifnet *); +static void carp_setrun(struct carp_softc *, sa_family_t); +static void carp_set_state(struct carp_softc *, int); +static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); +enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; + +static void carp_multicast_cleanup(struct carp_softc *); +static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); +static int carp_del_addr(struct carp_softc *, struct sockaddr_in *); +static void carp_carpdev_state_locked(struct carp_if *); +static void carp_sc_state_locked(struct carp_softc *); +#ifdef INET6 +static void carp_send_na(struct carp_softc *); +static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); +static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); +static void carp_multicast6_cleanup(struct carp_softc *); +#endif + +static LIST_HEAD(, carp_softc) carpif_list; + +struct if_clone carp_cloner = IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy, 0, IF_MAXUNIT); + +static eventhandler_tag if_detach_event_tag; + +static __inline u_int16_t +carp_cksum(struct mbuf *m, int len) +{ + return (in_cksum(m, len)); +} + +static void +carp_hmac_prepare(struct carp_softc *sc) +{ + u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; + u_int8_t vhid = sc->sc_vhid & 0xff; + struct ifaddr *ifa; + int i; +#ifdef INET6 + struct in6_addr in6; +#endif + + if (sc->sc_carpdev) + CARP_SCLOCK(sc); + + /* XXX: possible race here */ + + /* compute ipad from key */ + bzero(sc->sc_pad, sizeof(sc->sc_pad)); + bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); + for (i = 0; i < sizeof(sc->sc_pad); i++) + sc->sc_pad[i] ^= 0x36; + + /* precompute first part of inner hash */ + SHA1Init(&sc->sc_sha1); + SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); + SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); + SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); + SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); +#ifdef INET + TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { + if (ifa->ifa_addr->sa_family == AF_INET) + SHA1Update(&sc->sc_sha1, + (void *)&ifatoia(ifa)->ia_addr.sin_addr.s_addr, + sizeof(struct in_addr)); + } +#endif /* INET */ +#ifdef INET6 + TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { + if (ifa->ifa_addr->sa_family == AF_INET6) { + in6 = ifatoia6(ifa)->ia_addr.sin6_addr; + in6_clearscope(&in6); + SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6)); + } + } +#endif /* INET6 */ + + /* convert ipad to opad */ + for (i = 0; i < sizeof(sc->sc_pad); i++) + sc->sc_pad[i] ^= 0x36 ^ 0x5c; + + if (sc->sc_carpdev) + CARP_SCUNLOCK(sc); +} + +static void +carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], + unsigned char md[20]) +{ + SHA1_CTX sha1ctx; + + /* fetch first half of inner hash */ + bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); + + SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); + SHA1Final(md, &sha1ctx); + + /* outer hash */ + SHA1Init(&sha1ctx); + SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); + SHA1Update(&sha1ctx, md, 20); + SHA1Final(md, &sha1ctx); +} + +static int +carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], + unsigned char md[20]) +{ + unsigned char md2[20]; + + CARP_SCLOCK_ASSERT(sc); + + carp_hmac_generate(sc, counter, md2); + + return (bcmp(md, md2, sizeof(md2))); +} + +static void +carp_setroute(struct carp_softc *sc, int cmd) +{ + struct ifaddr *ifa; + + if (sc->sc_carpdev) + CARP_SCLOCK_ASSERT(sc); + + crit_enter(); + TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { + if (ifa->ifa_addr->sa_family == AF_INET && + sc->sc_carpdev != NULL) { + int count = carp_addrcount( + (struct carp_if *)sc->sc_carpdev->if_carp, + ifatoia(ifa), CARP_COUNT_MASTER); + + if ((cmd == RTM_ADD && count == 1) || + (cmd == RTM_DELETE && count == 0)) + rtinit(ifa, cmd, RTF_UP | RTF_HOST); + } +#ifdef INET6 + if (ifa->ifa_addr->sa_family == AF_INET6) { + if (cmd == RTM_ADD) + in6_ifaddloop(ifa); + else + in6_ifremloop(ifa); + } +#endif /* INET6 */ + } + crit_exit(); + +} + +static int +carp_clone_create(struct if_clone *ifc, int unit) +{ + + struct carp_softc *sc; + struct ifnet *ifp; + + MALLOC(sc, struct carp_softc *, sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); + ifp = SC2IFP(sc) = kmalloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO); + + if (ifp == NULL) { + FREE(sc, M_CARP); + return (ENOSPC); + } + + sc->sc_flags_backup = 0; + sc->sc_suppress = 0; + sc->sc_advbase = CARP_DFLTINTV; + sc->sc_vhid = -1; /* required setting */ + sc->sc_advskew = 0; + sc->sc_init_counter = 1; + sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */ + +#ifdef INET6 + sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; +#endif + +/* sc->sc_imo.imo_membership = kmalloc((sizeof(struct in_multi) * IP_MAX_MEMBERSHIPS), M_CARP,M_WAITOK);*/ +/* + sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS; + sc->sc_imo.imo_multicast_vif = -1; +*/ + callout_init(&sc->sc_ad_tmo); + callout_init(&sc->sc_md_tmo); + callout_init(&sc->sc_md6_tmo); + + ifp->if_softc = sc; + if_initname(ifp, CARP_IFNAME, unit); + ifp->if_mtu = ETHERMTU; + ifp->if_flags = IFF_LOOPBACK; + ifp->if_ioctl = carp_ioctl; + ifp->if_output = carp_looutput; + ifp->if_start = carp_start; + ifp->if_type = IFT_CARP; + ifp->if_snd.ifq_maxlen = ifqmaxlen; + ifp->if_hdrlen = 0; + if_attach(ifp, NULL); + bpfattach(ifp, DLT_NULL, sizeof(u_int)); + + crit_enter(); + LIST_INSERT_HEAD(&carpif_list, sc, sc_next); + crit_exit(); + + return (0); +} + +static void +carp_clone_destroy(struct ifnet *ifp) +{ + struct carp_softc *sc = ifp->if_softc; + + if (sc->sc_carpdev) + CARP_SCLOCK(sc); + carpdetach(sc, 1); /* Returns unlocked. */ + + crit_enter(); + LIST_REMOVE(sc, sc_next); + crit_exit(); + bpfdetach(ifp); + if_detach(ifp); +/* if_free_type(ifp, IFT_ETHER);*/ +/* kfree(sc->sc_imo.imo_membership, M_CARP); */ + kfree(sc, M_CARP); +} + +/* + * This function can be called on CARP interface destroy path, + * and in case of the removal of the underlying interface as + * well. We differentiate these two cases. In the latter case + * we do not cleanup our multicast memberships, since they + * are already freed. Also, in the latter case we do not + * release the lock on return, because the function will be + * called once more, for another CARP instance on the same + * interface. + */ +static void +carpdetach(struct carp_softc *sc, int unlock) +{ + struct carp_if *cif; + + callout_stop(&sc->sc_ad_tmo); + callout_stop(&sc->sc_md_tmo); + callout_stop(&sc->sc_md6_tmo); + + if (sc->sc_suppress) + carp_suppress_preempt--; + sc->sc_suppress = 0; + + if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) + carp_suppress_preempt--; + sc->sc_sendad_errors = 0; + + carp_set_state(sc, INIT); + SC2IFP(sc)->if_flags &= ~IFF_UP; + carp_setrun(sc, 0); + if (unlock) + carp_multicast_cleanup(sc); +#ifdef INET6 + carp_multicast6_cleanup(sc); +#endif + + if (sc->sc_carpdev != NULL) { + cif = (struct carp_if *)sc->sc_carpdev->if_carp; + CARP_LOCK_ASSERT(cif); + TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); + if (!--cif->vhif_nvrs) { + ifpromisc(sc->sc_carpdev, 0); + sc->sc_carpdev->if_carp = NULL; + CARP_LOCK_DESTROY(cif); + FREE(cif, M_IFADDR); + } else if (unlock) + CARP_UNLOCK(cif); + sc->sc_carpdev = NULL; + } +} + +/* Detach an interface from the carp. */ +static void +carp_ifdetach(void *arg __unused, struct ifnet *ifp) +{ + struct carp_if *cif = (struct carp_if *)ifp->if_carp; + struct carp_softc *sc, *nextsc; + + if (cif == NULL) + return; + + /* + * XXX: At the end of for() cycle the lock will be destroyed. + */ + CARP_LOCK(cif); + for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { + nextsc = TAILQ_NEXT(sc, sc_list); + carpdetach(sc, 0); + } + CARP_UNLOCK(cif); +} + +/* + * process input packet. + * we have rearranged checks order compared to the rfc, + * but it seems more efficient this way or not possible otherwise. + */ +void +carp_input(struct mbuf *m, int hlen) +{ + struct ip *ip = mtod(m, struct ip *); + struct carp_header *ch; + int iplen, len; + + carpstats.carps_ipackets++; + + if (!carp_opts[CARPCTL_ALLOW]) { + m_freem(m); + return; + } + + /* check if received on a valid carp interface */ + if (m->m_pkthdr.rcvif->if_carp == NULL) { + carpstats.carps_badif++; + CARP_LOG("carp_input: packet received on non-carp " + "interface: %s\n", + m->m_pkthdr.rcvif->if_xname); + m_freem(m); + return; + } + + /* verify that the IP TTL is 255. */ + if (ip->ip_ttl != CARP_DFLTTL) { + carpstats.carps_badttl++; + CARP_LOG("carp_input: received ttl %d != 255i on %s\n", + ip->ip_ttl, + m->m_pkthdr.rcvif->if_xname); + m_freem(m); + return; + } + + iplen = ip->ip_hl << 2; + + if (m->m_pkthdr.len < iplen + sizeof(*ch)) { + carpstats.carps_badlen++; + CARP_LOG("carp_input: received len %zd < " + "sizeof(struct carp_header)\n", + m->m_len - sizeof(struct ip)); + m_freem(m); + return; + } + + if (iplen + sizeof(*ch) < m->m_len) { + if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { + carpstats.carps_hdrops++; + CARP_LOG("carp_input: pullup failed\n"); + return; + } + ip = mtod(m, struct ip *); + } + ch = (struct carp_header *)((char *)ip + iplen); + + /* + * verify that the received packet length is + * equal to the CARP header + */ + len = iplen + sizeof(*ch); + if (len > m->m_pkthdr.len) { + carpstats.carps_badlen++; + CARP_LOG("carp_input: packet too short %d on %s\n", + m->m_pkthdr.len, + m->m_pkthdr.rcvif->if_xname); + m_freem(m); + return; + } + + if ((m = m_pullup(m, len)) == NULL) { + carpstats.carps_hdrops++; + return; + } + ip = mtod(m, struct ip *); + ch = (struct carp_header *)((char *)ip + iplen); + + /* verify the CARP checksum */ + m->m_data += iplen; + if (carp_cksum(m, len - iplen)) { + carpstats.carps_badsum++; + CARP_LOG("carp_input: checksum failed on %s\n", + m->m_pkthdr.rcvif->if_xname); + m_freem(m); + return; + } + m->m_data -= iplen; + + carp_input_c(m, ch, AF_INET); +} + +#ifdef INET6 +int +carp6_input(struct mbuf **mp, int *offp, int proto) +{ + struct mbuf *m = *mp; + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct carp_header *ch; + u_int len; + + carpstats.carps_ipackets6++; + + if (!carp_opts[CARPCTL_ALLOW]) { + m_freem(m); + return (IPPROTO_DONE); + } + + /* check if received on a valid carp interface */ + if (m->m_pkthdr.rcvif->if_carp == NULL) { + carpstats.carps_badif++; + CARP_LOG("carp6_input: packet received on non-carp " + "interface: %s\n", + m->m_pkthdr.rcvif->if_xname); + m_freem(m); + return (IPPROTO_DONE); + } + + /* verify that the IP TTL is 255 */ + if (ip6->ip6_hlim != CARP_DFLTTL) { + carpstats.carps_badttl++; + CARP_LOG("carp6_input: received ttl %d != 255 on %s\n", + ip6->ip6_hlim, + m->m_pkthdr.rcvif->if_xname); + m_freem(m); + return (IPPROTO_DONE); + } + + /* verify that we have a complete carp packet */ + len = m->m_len; + IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); + if (ch == NULL) { + carpstats.carps_badlen++; + CARP_LOG("carp6_input: packet size %u too small\n", len); + return (IPPROTO_DONE); + } + + + /* verify the CARP checksum */ + m->m_data += *offp; + if (carp_cksum(m, sizeof(*ch))) { + carpstats.carps_badsum++; + CARP_LOG("carp6_input: checksum failed, on %s\n", + m->m_pkthdr.rcvif->if_xname); + m_freem(m); + return (IPPROTO_DONE); + } + m->m_data -= *offp; + + carp_input_c(m, ch, AF_INET6); + return (IPPROTO_DONE); +} +#endif /* INET6 */ + +static void +carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) +{ + struct ifnet *ifp = m->m_pkthdr.rcvif; + struct carp_softc *sc; + u_int64_t tmp_counter; + struct timeval sc_tv, ch_tv; + + /* verify that the VHID is valid on the receiving interface */ + CARP_LOCK(ifp->if_carp); + TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) + if (sc->sc_vhid == ch->carp_vhid) + break; + + if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) && (SC2IFP(sc)->if_flags & IFF_RUNNING))) { + carpstats.carps_badvhid++; + CARP_UNLOCK(ifp->if_carp); + m_freem(m); + return; + } + + getmicrotime(&SC2IFP(sc)->if_lastchange); + SC2IFP(sc)->if_ipackets++; + SC2IFP(sc)->if_ibytes += m->m_pkthdr.len; + + if (SC2IFP(sc)->if_bpf) { + struct ip *ip = mtod(m, struct ip *); + + /* BPF wants net byte order */ + ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); + ip->ip_off = htons(ip->ip_off); + bpf_mtap(SC2IFP(sc)->if_bpf, m); + } + + /* verify the CARP version. */ + if (ch->carp_version != CARP_VERSION) { + carpstats.carps_badver++; + SC2IFP(sc)->if_ierrors++; + CARP_UNLOCK(ifp->if_carp); + CARP_LOG("%s; invalid version %d\n", + SC2IFP(sc)->if_xname, + ch->carp_version); + m_freem(m); + return; + } + + /* verify the hash */ + if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { + carpstats.carps_badauth++; + SC2IFP(sc)->if_ierrors++; + CARP_UNLOCK(ifp->if_carp); + CARP_LOG("%s: incorrect hash\n", SC2IFP(sc)->if_xname); + m_freem(m); + return; + } + + tmp_counter = ntohl(ch->carp_counter[0]); + tmp_counter = tmp_counter<<32; + tmp_counter += ntohl(ch->carp_counter[1]); + + /* XXX Replay protection goes here */ + + sc->sc_init_counter = 0; + sc->sc_counter = tmp_counter; + + sc_tv.tv_sec = sc->sc_advbase; + if (carp_suppress_preempt && sc->sc_advskew < 240) + sc_tv.tv_usec = 240 * 1000000 / 256; + else + sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; + ch_tv.tv_sec = ch->carp_advbase; + ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; + + switch (sc->sc_state) { + case INIT: + break; + case MASTER: + /* + * If we receive an advertisement from a master who's going to + * be more frequent than us, go into BACKUP state. + */ + if (timevalcmp(&sc_tv, &ch_tv, >) || + timevalcmp(&sc_tv, &ch_tv, ==)) { + callout_stop(&sc->sc_ad_tmo); + CARP_DEBUG("%s: MASTER -> BACKUP " + "(more frequent advertisement received)\n", + SC2IFP(sc)->if_xname); + carp_set_state(sc, BACKUP); + carp_setrun(sc, 0); + carp_setroute(sc, RTM_DELETE); + } + break; + case BACKUP: + /* + * If we're pre-empting masters who advertise slower than us, + * and this one claims to be slower, treat him as down. + */ + if (carp_opts[CARPCTL_PREEMPT] && + timevalcmp(&sc_tv, &ch_tv, <)) { + CARP_DEBUG("%s: BACKUP -> MASTER " + "(preempting a slower master)\n", + SC2IFP(sc)->if_xname); + carp_master_down_locked(sc); + break; + } + + /* + * If the master is going to advertise at such a low frequency + * that he's guaranteed to time out, we'd might as well just + * treat him as timed out now. + */ + sc_tv.tv_sec = sc->sc_advbase * 3; + if (timevalcmp(&sc_tv, &ch_tv, <)) { + CARP_DEBUG("%s: BACKUP -> MASTER " + "(master timed out)\n", + SC2IFP(sc)->if_xname); + carp_master_down_locked(sc); + break; + } + + /* + * Otherwise, we reset the counter and wait for the next + * advertisement. + */ + carp_setrun(sc, af); + break; + } + + CARP_UNLOCK(ifp->if_carp); + + m_freem(m); + return; +} + +static int +carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) +{ + struct m_tag *mtag; + struct ifnet *ifp = SC2IFP(sc); + + if (sc->sc_init_counter) { + /* this could also be seconds since unix epoch */ + sc->sc_counter = karc4random(); + sc->sc_counter = sc->sc_counter << 32; + sc->sc_counter += karc4random(); + } else + sc->sc_counter++; + + ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); + ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); + + carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); + + /* Tag packet for carp_output */ + mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT); + if (mtag == NULL) { + m_freem(m); + SC2IFP(sc)->if_oerrors++; + return (ENOMEM); + } + bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); + m_tag_prepend(m, mtag); + + return (0); +} + +static void +carp_send_ad_all(void) +{ + struct carp_softc *sc; + + LIST_FOREACH(sc, &carpif_list, sc_next) { + if (sc->sc_carpdev == NULL) + continue; + CARP_SCLOCK(sc); + if ((SC2IFP(sc)->if_flags & IFF_UP) && (SC2IFP(sc)->if_flags & IFF_RUNNING) && + sc->sc_state == MASTER) + carp_send_ad_locked(sc); + CARP_SCUNLOCK(sc); + } +} + +static void +carp_send_ad(void *v) +{ + struct carp_softc *sc = v; + + CARP_SCLOCK(sc); + carp_send_ad_locked(sc); + CARP_SCUNLOCK(sc); +} + +static void +carp_send_ad_locked(struct carp_softc *sc) +{ + struct carp_header ch; + struct timeval tv; + struct carp_header *ch_ptr; + struct mbuf *m; + int len, advbase, advskew; + + + /* bow out if we've lost our UPness or RUNNINGuiness */ + if (!((SC2IFP(sc)->if_flags & IFF_UP) && (SC2IFP(sc)->if_flags & IFF_RUNNING))) { + advbase = 255; + advskew = 255; + } else { + advbase = sc->sc_advbase; + if (!carp_suppress_preempt || sc->sc_advskew > 240) + advskew = sc->sc_advskew; + else + advskew = 240; + tv.tv_sec = advbase; + tv.tv_usec = advskew * 1000000 / 256; + } + + ch.carp_version = CARP_VERSION; + ch.carp_type = CARP_ADVERTISEMENT; + ch.carp_vhid = sc->sc_vhid; + ch.carp_advbase = advbase; + ch.carp_advskew = advskew; + ch.carp_authlen = 7; /* XXX DEFINE */ + ch.carp_pad1 = 0; /* must be zero */ + ch.carp_cksum = 0; + +#ifdef INET + if (sc->sc_ia) { + struct ip *ip; + + MGETHDR(m, M_NOWAIT, MT_HEADER); + if (m == NULL) { + SC2IFP(sc)->if_oerrors++; + carpstats.carps_onomem++; + /* XXX maybe less ? */ + if (advbase != 255 || advskew != 255) + callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), + carp_send_ad, sc); + return; + } + len = sizeof(*ip) + sizeof(ch); + m->m_pkthdr.len = len; + m->m_pkthdr.rcvif = NULL; + m->m_len = len; + MH_ALIGN(m, m->m_len); + m->m_flags |= M_MCAST; + ip = mtod(m, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_tos = IPTOS_LOWDELAY; + ip->ip_len = len; + ip->ip_id = ip_newid(); + ip->ip_off = IP_DF; + ip->ip_ttl = CARP_DFLTTL; + ip->ip_p = IPPROTO_CARP; + ip->ip_sum = 0; + ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr; + ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); + + ch_ptr = (struct carp_header *)(&ip[1]); + bcopy(&ch, ch_ptr, sizeof(ch)); + if (carp_prepare_ad(m, sc, ch_ptr)) + return; + + m->m_data += sizeof(*ip); + ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); + m->m_data -= sizeof(*ip); + + getmicrotime(&SC2IFP(sc)->if_lastchange); + SC2IFP(sc)->if_opackets++; + SC2IFP(sc)->if_obytes += len; + carpstats.carps_opackets++; + + if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { + SC2IFP(sc)->if_oerrors++; + if (sc->sc_sendad_errors < INT_MAX) + sc->sc_sendad_errors++; + if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { + carp_suppress_preempt++; + if (carp_suppress_preempt == 1) { + CARP_SCUNLOCK(sc); + carp_send_ad_all(); + CARP_SCLOCK(sc); + } + } + sc->sc_sendad_success = 0; + } else { + if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { + if (++sc->sc_sendad_success >= + CARP_SENDAD_MIN_SUCCESS) { + carp_suppress_preempt--; + sc->sc_sendad_errors = 0; + } + } else + sc->sc_sendad_errors = 0; + } + } +#endif /* INET */ +#ifdef INET6 + if (sc->sc_ia6) { + struct ip6_hdr *ip6; + + MGETHDR(m, M_NOWAIT, MT_HEADER); + if (m == NULL) { + SC2IFP(sc)->if_oerrors++; + carpstats.carps_onomem++; + /* XXX maybe less ? */ + if (advbase != 255 || advskew != 255) + callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), + carp_send_ad, sc); + return; + } + len = sizeof(*ip6) + sizeof(ch); + m->m_pkthdr.len = len; + m->m_pkthdr.rcvif = NULL; + m->m_len = len; + MH_ALIGN(m, m->m_len); + m->m_flags |= M_MCAST; + ip6 = mtod(m, struct ip6_hdr *); + bzero(ip6, sizeof(*ip6)); + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_hlim = CARP_DFLTTL; + ip6->ip6_nxt = IPPROTO_CARP; + bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, + sizeof(struct in6_addr)); + /* set the multicast destination */ + + ip6->ip6_dst.s6_addr16[0] = htons(0xff02); + ip6->ip6_dst.s6_addr8[15] = 0x12; + if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { + SC2IFP(sc)->if_oerrors++; + m_freem(m); + CARP_LOG("%s: in6_setscope failed\n", __func__); + return; + } + + ch_ptr = (struct carp_header *)(&ip6[1]); + bcopy(&ch, ch_ptr, sizeof(ch)); + if (carp_prepare_ad(m, sc, ch_ptr)) + return; + + m->m_data += sizeof(*ip6); + ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); + m->m_data -= sizeof(*ip6); + + getmicrotime(&SC2IFP(sc)->if_lastchange); + SC2IFP(sc)->if_opackets++; + SC2IFP(sc)->if_obytes += len; + carpstats.carps_opackets6++; + + if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { + SC2IFP(sc)->if_oerrors++; + if (sc->sc_sendad_errors < INT_MAX) + sc->sc_sendad_errors++; + if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { + carp_suppress_preempt++; + if (carp_suppress_preempt == 1) { + CARP_SCUNLOCK(sc); + carp_send_ad_all(); + CARP_SCLOCK(sc); + } + } + sc->sc_sendad_success = 0; + } else { + if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { + if (++sc->sc_sendad_success >= + CARP_SENDAD_MIN_SUCCESS) { + carp_suppress_preempt--; + sc->sc_sendad_errors = 0; + } + } else + sc->sc_sendad_errors = 0; + } + } +#endif /* INET6 */ + + if (advbase != 255 || advskew != 255) + callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), + carp_send_ad, sc); + +} + +/* + * Broadcast a gratuitous ARP request containing + * the virtual router MAC address for each IP address + * associated with the virtual router. + */ +static void +carp_send_arp(struct carp_softc *sc) +{ + struct ifaddr *ifa; + + TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { + + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + lwkt_serialize_enter(sc->sc_carpdev->if_serializer); + arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp)); + lwkt_serialize_exit(sc->sc_carpdev->if_serializer); + + DELAY(1000); /* XXX */ + } +} + +#ifdef INET6 +static void +carp_send_na(struct carp_softc *sc) +{ + struct ifaddr *ifa; + struct in6_addr *in6; + static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; + + TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { + + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + + in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; + nd6_na_output(sc->sc_carpdev, &mcast, in6, + ND_NA_FLAG_OVERRIDE, 1, NULL); + DELAY(1000); /* XXX */ + } +} +#endif /* INET6 */ + +static int +carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) +{ + struct carp_softc *vh; + struct ifaddr *ifa; + int count = 0; + + CARP_LOCK_ASSERT(cif); + + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { + if ((type == CARP_COUNT_RUNNING && + (SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_flags & IFF_RUNNING)) || + (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { + TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, + ifa_list) { + if (ifa->ifa_addr->sa_family == AF_INET && + ia->ia_addr.sin_addr.s_addr == + ifatoia(ifa)->ia_addr.sin_addr.s_addr) + count++; + } + } + } + return (count); +} + +int +carp_iamatch(void *v, struct in_ifaddr *ia, + struct in_addr *isaddr, u_int8_t **enaddr) +{ + struct carp_if *cif = v; + struct carp_softc *vh; + int index, count = 0; + struct ifaddr *ifa; + + CARP_LOCK(cif); + + if (carp_opts[CARPCTL_ARPBALANCE]) { + /* + * XXX proof of concept implementation. + * We use the source ip to decide which virtual host should + * handle the request. If we're master of that virtual host, + * then we respond, otherwise, just drop the arp packet on + * the floor. + */ + count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING); + if (count == 0) { + /* should never reach this */ + CARP_UNLOCK(cif); + return (0); + } + + /* this should be a hash, like pf_hash() */ + index = ntohl(isaddr->s_addr) % count; + count = 0; + + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { + if ((SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_flags & IFF_RUNNING)) { + TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, + ifa_list) { + if (ifa->ifa_addr->sa_family == + AF_INET && + ia->ia_addr.sin_addr.s_addr == + ifatoia(ifa)->ia_addr.sin_addr.s_addr) { + if (count == index) { + if (vh->sc_state == + MASTER) { + *enaddr = IF_LLADDR(vh->sc_ifp); + CARP_UNLOCK(cif); + return (1); + } else { + CARP_UNLOCK(cif); + return (0); + } + } + count++; + } + } + } + } + } else { + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { + if ((SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_flags & IFF_RUNNING) && + vh->sc_state == MASTER) { + *enaddr = IF_LLADDR(vh->sc_ifp); + CARP_UNLOCK(cif); + return (1); + } + } + } + CARP_UNLOCK(cif); + return(0); +} + +#ifdef INET6 +struct ifaddr * +carp_iamatch6(void *v, struct in6_addr *taddr) +{ + struct carp_if *cif = v; + struct carp_softc *vh; + struct ifaddr *ifa; + + CARP_LOCK(cif); + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { + TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) { + if (IN6_ARE_ADDR_EQUAL(taddr, + &ifatoia6(ifa)->ia_addr.sin6_addr) && + (SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_flags & IFF_RUNNING) && + vh->sc_state == MASTER) { + CARP_UNLOCK(cif); + return (ifa); + } + } + } + CARP_UNLOCK(cif); + + return (NULL); +} + +void * +carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) +{ + struct m_tag *mtag; + struct carp_if *cif = v; + struct carp_softc *sc; + struct ifaddr *ifa; + + CARP_LOCK(cif); + TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { + TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { + if (IN6_ARE_ADDR_EQUAL(taddr, + &ifatoia6(ifa)->ia_addr.sin6_addr) && + (SC2IFP(sc)->if_flags & IFF_UP) && (SC2IFP(sc)->if_flags & IFF_RUNNING)) { + struct ifnet *ifp = SC2IFP(sc); + mtag = m_tag_get(PACKET_TAG_CARP, + sizeof(struct ifnet *), M_NOWAIT); + if (mtag == NULL) { + /* better a bit than nothing */ + CARP_UNLOCK(cif); + return (IF_LLADDR(sc->sc_ifp)); + } + bcopy(&ifp, (caddr_t)(mtag + 1), + sizeof(struct ifnet *)); + m_tag_prepend(m, mtag); + + CARP_UNLOCK(cif); + return (IF_LLADDR(sc->sc_ifp)); + } + } + } + CARP_UNLOCK(cif); + + return (NULL); +} +#endif + +struct ifnet * +carp_forus(void *v, void *dhost) +{ + struct carp_if *cif = v; + struct carp_softc *vh; + u_int8_t *ena = dhost; + + /** + * XXX: See here for check on MAC adr is not for virtual use + * + **/ + + if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) + { + return (NULL); + } + + CARP_LOCK(cif); + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) + if ((SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_flags & IFF_RUNNING) && + vh->sc_state == MASTER && + !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) { + CARP_UNLOCK(cif); + return (SC2IFP(vh)); + } + + CARP_UNLOCK(cif); + return (NULL); +} + +static void +carp_master_down(void *v) +{ + struct carp_softc *sc = v; + + lwkt_serialize_enter(sc->sc_ifp->if_serializer); + carp_master_down_locked(sc); + lwkt_serialize_exit(sc->sc_ifp->if_serializer); +} + +static void +carp_master_down_locked(struct carp_softc *sc) +{ + if (sc->sc_carpdev) + CARP_SCLOCK_ASSERT(sc); + + switch (sc->sc_state) { + case INIT: + kprintf("%s: master_down event in INIT state\n", + SC2IFP(sc)->if_xname); + break; + case MASTER: + break; + case BACKUP: + carp_set_state(sc, MASTER); + carp_send_ad_locked(sc); + carp_send_arp(sc); +#ifdef INET6 + carp_send_na(sc); +#endif /* INET6 */ + carp_setrun(sc, 0); + carp_setroute(sc, RTM_ADD); + break; + } +} + +/* + * When in backup state, af indicates whether to reset the master down timer + * for v4 or v6. If it's set to zero, reset the ones which are already pending. + */ +static void +carp_setrun(struct carp_softc *sc, sa_family_t af) +{ + struct timeval tv; + + if (sc->sc_carpdev == NULL) { + SC2IFP(sc)->if_flags &= ~IFF_RUNNING; + carp_set_state(sc, INIT); + return; + } + + if (SC2IFP(sc)->if_flags & IFF_UP && + sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6)) + SC2IFP(sc)->if_flags |= IFF_RUNNING; + else { + SC2IFP(sc)->if_flags &= ~IFF_RUNNING; + carp_setroute(sc, RTM_DELETE); + return; + } + + switch (sc->sc_state) { + case INIT: + if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { + carp_send_ad_locked(sc); + carp_send_arp(sc); +#ifdef INET6 + carp_send_na(sc); +#endif /* INET6 */ + CARP_DEBUG("%s: INIT -> MASTER (preempting)\n", + SC2IFP(sc)->if_xname); + carp_set_state(sc, MASTER); + carp_setroute(sc, RTM_ADD); + } else { + CARP_DEBUG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname); + carp_set_state(sc, BACKUP); + carp_setroute(sc, RTM_DELETE); + carp_setrun(sc, 0); + } + break; + case BACKUP: + callout_stop(&sc->sc_ad_tmo); + tv.tv_sec = 3 * sc->sc_advbase; + tv.tv_usec = sc->sc_advskew * 1000000 / 256; + switch (af) { +#ifdef INET + case AF_INET: + callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), + carp_master_down, sc); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), + carp_master_down, sc); + break; +#endif /* INET6 */ + default: + if (sc->sc_naddrs) + callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), + carp_master_down, sc); + if (sc->sc_naddrs6) + callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), + carp_master_down, sc); + break; + } + break; + case MASTER: + tv.tv_sec = sc->sc_advbase; + tv.tv_usec = sc->sc_advskew * 1000000 / 256; + callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), + carp_send_ad, sc); + break; + } +} + +static void +carp_multicast_cleanup(struct carp_softc *sc) +{ + struct ip_moptions *imo = &sc->sc_imo; + u_int16_t n = imo->imo_num_memberships; + + /* Clean up our own multicast memberships */ + while (n-- > 0) { + if (imo->imo_membership[n] != NULL) { + in_delmulti(imo->imo_membership[n]); + imo->imo_membership[n] = NULL; + } + } + imo->imo_num_memberships = 0; + imo->imo_multicast_ifp = NULL; +} + +#ifdef INET6 +static void +carp_multicast6_cleanup(struct carp_softc *sc) +{ + struct ip6_moptions *im6o = &sc->sc_im6o; + + while (!LIST_EMPTY(&im6o->im6o_memberships)) { + struct in6_multi_mship *imm = + LIST_FIRST(&im6o->im6o_memberships); + + LIST_REMOVE(imm, i6mm_chain); + in6_leavegroup(imm); + } + im6o->im6o_multicast_ifp = NULL; +} +#endif + +static int +carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) +{ + struct ifnet *ifp; + struct carp_if *cif; + struct in_ifaddr *ia, *ia_if; + struct ip_moptions *imo = &sc->sc_imo; + struct in_addr addr; + u_long iaddr = htonl(sin->sin_addr.s_addr); + int own, error; + + if (sin->sin_addr.s_addr == 0) + { + if (!(SC2IFP(sc)->if_flags & IFF_UP)) + { + carp_set_state(sc, INIT); + } + if (sc->sc_naddrs) + { + SC2IFP(sc)->if_flags |= IFF_UP; + } + carp_setrun(sc, 0); + return (0); + } + /* we have to do it by hands to check we won't match on us */ + ia_if = NULL; own = 0; + TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { + /* and, yeah, we need a multicast-capable iface too */ + if (ia->ia_ifp != SC2IFP(sc) && + (ia->ia_ifp->if_flags & IFF_MULTICAST) && + (iaddr & ia->ia_subnetmask) == ia->ia_subnet) { + if (!ia_if) + ia_if = ia; + if (sin->sin_addr.s_addr == + ia->ia_addr.sin_addr.s_addr) + own++; + } + } + + + if (!ia_if) + return (EADDRNOTAVAIL); + + ia = ia_if; + ifp = ia->ia_ifp; + + if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || + (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) + return (EADDRNOTAVAIL); + + if (imo->imo_num_memberships == 0) { + addr.s_addr = htonl(INADDR_CARP_GROUP); + if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL) + return (ENOBUFS); + imo->imo_num_memberships++; + imo->imo_multicast_ifp = ifp; + imo->imo_multicast_ttl = CARP_DFLTTL; + imo->imo_multicast_loop = 0; + } + + if (!ifp->if_carp) { + + MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP, + M_WAITOK|M_ZERO); + if (!cif) { + error = ENOBUFS; + goto cleanup; + } + if ((error = ifpromisc(ifp, 1))) { + FREE(cif, M_CARP); + goto cleanup; + } + + CARP_LOCK_INIT(cif); + CARP_LOCK(cif); + cif->vhif_ifp = ifp; + TAILQ_INIT(&cif->vhif_vrs); + ifp->if_carp = cif; + + } else { + struct carp_softc *vr; + + cif = (struct carp_if *)ifp->if_carp; + CARP_LOCK(cif); + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) + if (vr != sc && vr->sc_vhid == sc->sc_vhid) { + CARP_UNLOCK(cif); + error = EINVAL; + goto cleanup; + } + } + sc->sc_ia = ia; + sc->sc_carpdev = ifp; + + { /* XXX prevent endless loop if already in queue */ + struct carp_softc *vr, *after = NULL; + int myself = 0; + cif = (struct carp_if *)ifp->if_carp; + + /* XXX: cif should not change, right? So we still hold the lock */ + CARP_LOCK_ASSERT(cif); + + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { + if (vr == sc) + myself = 1; + if (vr->sc_vhid < sc->sc_vhid) + after = vr; + } + + if (!myself) { + /* We're trying to keep things in order */ + if (after == NULL) { + TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); + } else { + TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); + } + cif->vhif_nvrs++; + } + } + + sc->sc_naddrs++; + SC2IFP(sc)->if_flags |= IFF_UP; + if (own) + sc->sc_advskew = 0; + + + carp_sc_state_locked(sc); + carp_setrun(sc, 0); + + CARP_UNLOCK(cif); + + return (0); + +cleanup: + in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); + return (error); + +} + +static int +carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin) +{ + int error = 0; + + if (!--sc->sc_naddrs) { + struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; + struct ip_moptions *imo = &sc->sc_imo; + + CARP_LOCK(cif); + callout_stop(&sc->sc_ad_tmo); + SC2IFP(sc)->if_flags &= ~IFF_UP; + SC2IFP(sc)->if_flags &= ~IFF_RUNNING; + sc->sc_vhid = -1; + in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); + imo->imo_multicast_ifp = NULL; + TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); + if (!--cif->vhif_nvrs) { + sc->sc_carpdev->if_carp = NULL; + CARP_LOCK_DESTROY(cif); + FREE(cif, M_IFADDR); + } else { + CARP_UNLOCK(cif); + } + } + + return (error); +} + +#ifdef INET6 +static int +carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) +{ + struct ifnet *ifp; + struct carp_if *cif; + struct in6_ifaddr *ia, *ia_if; + struct ip6_moptions *im6o = &sc->sc_im6o; + struct in6_multi_mship *imm; + struct in6_addr in6; + int own, error; + + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + if (!(SC2IFP(sc)->if_flags & IFF_UP)) + carp_set_state(sc, INIT); + if (sc->sc_naddrs6) + SC2IFP(sc)->if_flags |= IFF_UP; + carp_setrun(sc, 0); + return (0); + } + + /* we have to do it by hands to check we won't match on us */ + ia_if = NULL; own = 0; + for (ia = in6_ifaddr; ia; ia = ia->ia_next) { + int i; + + for (i = 0; i < 4; i++) { + if ((sin6->sin6_addr.s6_addr32[i] & + ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != + (ia->ia_addr.sin6_addr.s6_addr32[i] & + ia->ia_prefixmask.sin6_addr.s6_addr32[i])) + break; + } + /* and, yeah, we need a multicast-capable iface too */ + if (ia->ia_ifp != SC2IFP(sc) && + (ia->ia_ifp->if_flags & IFF_MULTICAST) && + (i == 4)) { + if (!ia_if) + ia_if = ia; + if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, + &ia->ia_addr.sin6_addr)) + own++; + } + } + + if (!ia_if) + return (EADDRNOTAVAIL); + ia = ia_if; + ifp = ia->ia_ifp; + + if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || + (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) + return (EADDRNOTAVAIL); + + if (!sc->sc_naddrs6) { + im6o->im6o_multicast_ifp = ifp; + + /* join CARP multicast address */ + bzero(&in6, sizeof(in6)); + in6.s6_addr16[0] = htons(0xff02); + in6.s6_addr8[15] = 0x12; + if (in6_setscope(&in6, ifp, NULL) != 0) + goto cleanup; + if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL) + goto cleanup; + LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); + + /* join solicited multicast address */ + bzero(&in6, sizeof(in6)); + in6.s6_addr16[0] = htons(0xff02); + in6.s6_addr32[1] = 0; + in6.s6_addr32[2] = htonl(1); + in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; + in6.s6_addr8[12] = 0xff; + if (in6_setscope(&in6, ifp, NULL) != 0) + goto cleanup; + if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL) + goto cleanup; + LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); + } + + if (!ifp->if_carp) { + MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP, + M_WAITOK|M_ZERO); + if (!cif) { + error = ENOBUFS; + goto cleanup; + } + if ((error = ifpromisc(ifp, 1))) { + FREE(cif, M_CARP); + goto cleanup; + } + + CARP_LOCK_INIT(cif); + CARP_LOCK(cif); + cif->vhif_ifp = ifp; + TAILQ_INIT(&cif->vhif_vrs); + ifp->if_carp = cif; + + } else { + struct carp_softc *vr; + + cif = (struct carp_if *)ifp->if_carp; + CARP_LOCK(cif); + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) + if (vr != sc && vr->sc_vhid == sc->sc_vhid) { + CARP_UNLOCK(cif); + error = EINVAL; + goto cleanup; + } + } + sc->sc_ia6 = ia; + sc->sc_carpdev = ifp; + + { /* XXX prevent endless loop if already in queue */ + struct carp_softc *vr, *after = NULL; + int myself = 0; + cif = (struct carp_if *)ifp->if_carp; + CARP_LOCK_ASSERT(cif); + + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { + if (vr == sc) + myself = 1; + if (vr->sc_vhid < sc->sc_vhid) + after = vr; + } + + if (!myself) { + /* We're trying to keep things in order */ + if (after == NULL) { + TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); + } else { + TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); + } + cif->vhif_nvrs++; + } + } + + sc->sc_naddrs6++; + SC2IFP(sc)->if_flags |= IFF_UP; + if (own) + sc->sc_advskew = 0; + carp_sc_state_locked(sc); + carp_setrun(sc, 0); + + CARP_UNLOCK(cif); + + return (0); + +cleanup: + /* clean up multicast memberships */ + if (!sc->sc_naddrs6) { + while (!LIST_EMPTY(&im6o->im6o_memberships)) { + imm = LIST_FIRST(&im6o->im6o_memberships); + LIST_REMOVE(imm, i6mm_chain); + in6_leavegroup(imm); + } + } + return (error); +} + +static int +carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) +{ + int error = 0; + + if (!--sc->sc_naddrs6) { + struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; + struct ip6_moptions *im6o = &sc->sc_im6o; + + CARP_LOCK(cif); + callout_stop(&sc->sc_ad_tmo); + SC2IFP(sc)->if_flags &= ~IFF_UP; + SC2IFP(sc)->if_flags &= ~IFF_RUNNING; + sc->sc_vhid = -1; + while (!LIST_EMPTY(&im6o->im6o_memberships)) { + struct in6_multi_mship *imm = + LIST_FIRST(&im6o->im6o_memberships); + + LIST_REMOVE(imm, i6mm_chain); + in6_leavegroup(imm); + } + im6o->im6o_multicast_ifp = NULL; + TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); + if (!--cif->vhif_nvrs) { + CARP_LOCK_DESTROY(cif); + sc->sc_carpdev->if_carp = NULL; + FREE(cif, M_IFADDR); + } else + CARP_UNLOCK(cif); + } + + return (error); +} +#endif /* INET6 */ + +static int +carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *creds) +{ + struct carp_softc *sc = ifp->if_softc, *vr; + struct carpreq carpr; + struct ifaddr *ifa; + struct ifreq *ifr; + struct ifaliasreq *ifra; + int locked = 0, error = 0; + + ifa = (struct ifaddr *)addr; + ifra = (struct ifaliasreq *)addr; + ifr = (struct ifreq *)addr; + + + switch (cmd) { + case SIOCSIFADDR: + switch (ifa->ifa_addr->sa_family) { +#ifdef INET + case AF_INET: + SC2IFP(sc)->if_flags |= IFF_UP; + bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, + sizeof(struct sockaddr)); + error = carp_set_addr(sc, satosin(ifa->ifa_addr)); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + SC2IFP(sc)->if_flags |= IFF_UP; + error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); + break; +#endif /* INET6 */ + default: + error = EAFNOSUPPORT; + break; + } + break; + + case SIOCAIFADDR: + switch (ifa->ifa_addr->sa_family) { +#ifdef INET + case AF_INET: + SC2IFP(sc)->if_flags |= IFF_UP; + bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, + sizeof(struct sockaddr)); + error = carp_set_addr(sc, satosin(&ifra->ifra_addr)); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + SC2IFP(sc)->if_flags |= IFF_UP; + error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); + break; +#endif /* INET6 */ + default: + error = EAFNOSUPPORT; + break; + } + break; + + case SIOCDIFADDR: + switch (ifa->ifa_addr->sa_family) { +#ifdef INET + case AF_INET: + error = carp_del_addr(sc, satosin(&ifra->ifra_addr)); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); + break; +#endif /* INET6 */ + default: + error = EAFNOSUPPORT; + break; + } + break; + + case SIOCSIFFLAGS: + if (sc->sc_carpdev) { + locked = 1; + CARP_SCLOCK(sc); + } + if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { + callout_stop(&sc->sc_ad_tmo); + callout_stop(&sc->sc_md_tmo); + callout_stop(&sc->sc_md6_tmo); + if (sc->sc_state == MASTER) + carp_send_ad_locked(sc); + carp_set_state(sc, INIT); + carp_setrun(sc, 0); + } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { + SC2IFP(sc)->if_flags |= IFF_UP; + carp_setrun(sc, 0); + } + break; + + case SIOCSVH: + error = suser(curthread); + if (error) + break; + if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) + break; + error = 1; + if (sc->sc_carpdev) { + locked = 1; + CARP_SCLOCK(sc); + } + if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { + switch (carpr.carpr_state) { + case BACKUP: + callout_stop(&sc->sc_ad_tmo); + carp_set_state(sc, BACKUP); + carp_setrun(sc, 0); + carp_setroute(sc, RTM_DELETE); + break; + case MASTER: + carp_master_down_locked(sc); + break; + default: + break; + } + } + if (carpr.carpr_vhid > 0) { + if (carpr.carpr_vhid > 255) { + error = EINVAL; + break; + } + if (sc->sc_carpdev) { + struct carp_if *cif; + cif = (struct carp_if *)sc->sc_carpdev->if_carp; + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) + if (vr != sc && + vr->sc_vhid == carpr.carpr_vhid) + return EEXIST; + } + sc->sc_vhid = carpr.carpr_vhid; + IF_LLADDR(sc->sc_ifp)[0] = 0; + IF_LLADDR(sc->sc_ifp)[1] = 0; + IF_LLADDR(sc->sc_ifp)[2] = 0x5e; + IF_LLADDR(sc->sc_ifp)[3] = 0; + IF_LLADDR(sc->sc_ifp)[4] = 1; + IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid; + error--; + } + if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { + if (carpr.carpr_advskew >= 255) { + error = EINVAL; + break; + } + if (carpr.carpr_advbase > 255) { + error = EINVAL; + break; + } + sc->sc_advbase = carpr.carpr_advbase; + sc->sc_advskew = carpr.carpr_advskew; + error--; + } + bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); + if (error > 0) + error = EINVAL; + else { + error = 0; + carp_setrun(sc, 0); + } + break; + + case SIOCGVH: + /* XXX: lockless read */ + bzero(&carpr, sizeof(carpr)); + carpr.carpr_state = sc->sc_state; + carpr.carpr_vhid = sc->sc_vhid; + carpr.carpr_advbase = sc->sc_advbase; + carpr.carpr_advskew = sc->sc_advskew; + error = suser(curthread); + if (error == 0) + bcopy(sc->sc_key, carpr.carpr_key, + sizeof(carpr.carpr_key)); + error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); + break; + + default: + error = EINVAL; + } + + if (locked) + CARP_SCUNLOCK(sc); + + carp_hmac_prepare(sc); + + return (error); +} + +/* + * XXX: this is looutput. We should eventually use it from there. + */ +static int +carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, + struct rtentry *rt) +{ + u_int32_t af; + + M_ASSERTPKTHDR(m); /* check if we have the packet header */ + + if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { + m_freem(m); + return (rt->rt_flags & RTF_BLACKHOLE ? 0 : + rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + } + + ifp->if_opackets++; + ifp->if_obytes += m->m_pkthdr.len; + + /* BPF writes need to be handled specially. */ + if (dst->sa_family == AF_UNSPEC) { + bcopy(dst->sa_data, &af, sizeof(af)); + dst->sa_family = af; + } + +#if 1 /* XXX */ + switch (dst->sa_family) { + case AF_INET: + case AF_INET6: + case AF_IPX: + case AF_APPLETALK: + break; + default: + m_freem(m); + return (EAFNOSUPPORT); + } +#endif + return(if_simloop(ifp, m, dst->sa_family, 0)); +} + +/* + * Start output on carp interface. This function should never be called. + */ +static void +carp_start(struct ifnet *ifp) +{ +#ifdef DEBUG + kprintf("%s: start called\n", ifp->if_xname); +#endif +} + +int +carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, + struct rtentry *rt) +{ + struct m_tag *mtag; + struct carp_softc *sc; + struct ifnet *carp_ifp; + + if (!sa) + return (0); + + switch (sa->sa_family) { +#ifdef INET + case AF_INET: + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + break; +#endif /* INET6 */ + default: + return (0); + } + + mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); + if (mtag == NULL) + return (0); + + bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); + sc = carp_ifp->if_softc; + + /* Set the source MAC address to Virtual Router MAC Address */ + switch (ifp->if_type) { + case IFT_ETHER: + case IFT_L2VLAN: { + struct ether_header *eh; + + eh = mtod(m, struct ether_header *); + eh->ether_shost[0] = 0; + eh->ether_shost[1] = 0; + eh->ether_shost[2] = 0x5e; + eh->ether_shost[3] = 0; + eh->ether_shost[4] = 1; + eh->ether_shost[5] = sc->sc_vhid; + } + break; + case IFT_FDDI: { + struct fddi_header *fh; + + fh = mtod(m, struct fddi_header *); + fh->fddi_shost[0] = 0; + fh->fddi_shost[1] = 0; + fh->fddi_shost[2] = 0x5e; + fh->fddi_shost[3] = 0; + fh->fddi_shost[4] = 1; + fh->fddi_shost[5] = sc->sc_vhid; + } + break; + case IFT_ISO88025: { + struct iso88025_header *th; + th = mtod(m, struct iso88025_header *); + th->iso88025_shost[0] = 3; + th->iso88025_shost[1] = 0; + th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); + th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); + th->iso88025_shost[4] = 0; + th->iso88025_shost[5] = 0; + } + break; + default: + kprintf("%s: carp is not supported for this interface type\n", + ifp->if_xname); + return (EOPNOTSUPP); + } + + return (0); + +} + +static void +carp_set_state(struct carp_softc *sc, int state) +{ + + if (sc->sc_carpdev) + CARP_SCLOCK_ASSERT(sc); + + if (sc->sc_state == state) + return; + + sc->sc_state = state; + switch (state) { + case BACKUP: + SC2IFP(sc)->if_link_state = LINK_STATE_DOWN; + break; + case MASTER: + SC2IFP(sc)->if_link_state = LINK_STATE_UP; + break; + default: + SC2IFP(sc)->if_link_state = LINK_STATE_UNKNOWN; + break; + } + rt_ifmsg(SC2IFP(sc)); +} + +void +carp_carpdev_state(void *v) +{ + struct carp_if *cif = v; + + CARP_LOCK(cif); + carp_carpdev_state_locked(cif); + CARP_UNLOCK(cif); +} + +static void +carp_carpdev_state_locked(struct carp_if *cif) +{ + struct carp_softc *sc; + + TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) + carp_sc_state_locked(sc); +} + +static void +carp_sc_state_locked(struct carp_softc *sc) +{ + CARP_SCLOCK_ASSERT(sc); + + if ( !(sc->sc_carpdev->if_flags & IFF_UP)) { + sc->sc_flags_backup = SC2IFP(sc)->if_flags; + SC2IFP(sc)->if_flags &= ~IFF_UP; + SC2IFP(sc)->if_flags &= ~IFF_RUNNING; + callout_stop(&sc->sc_ad_tmo); + callout_stop(&sc->sc_md_tmo); + callout_stop(&sc->sc_md6_tmo); + carp_set_state(sc, INIT); + carp_setrun(sc, 0); + if (!sc->sc_suppress) { + carp_suppress_preempt++; + if (carp_suppress_preempt == 1) { + CARP_SCUNLOCK(sc); + carp_send_ad_all(); + CARP_SCLOCK(sc); + } + } + sc->sc_suppress = 1; + } else { + SC2IFP(sc)->if_flags |= sc->sc_flags_backup; + carp_set_state(sc, INIT); + carp_setrun(sc, 0); + if (sc->sc_suppress) + carp_suppress_preempt--; + sc->sc_suppress = 0; + } + + return; +} + +static int +carp_modevent(module_t mod, int type, void *data) +{ + switch (type) { + case MOD_LOAD: + if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, + carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY); + if (if_detach_event_tag == NULL) + return (ENOMEM); + + LIST_INIT(&carpif_list); + if_clone_attach(&carp_cloner); + break; + + case MOD_UNLOAD: + EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag); + if_clone_detach(&carp_cloner); + break; + + default: + return (EINVAL); + } + + return (0); +} + +static moduledata_t carp_mod = { + "carp", + carp_modevent, + 0 +}; + +DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); diff --git a/sys/netinet/ip_carp.h b/sys/netinet/ip_carp.h new file mode 100644 index 0000000..5d3b6d1 --- /dev/null +++ b/sys/netinet/ip_carp.h @@ -0,0 +1,167 @@ +/* $Id$ */ +/* from $FreeBSD: src/sys/netinet/ip_carp.h,v 1.3 2006/12/01 18:37:41 imp Exp $ */ +/* from $OpenBSD: ip_carp.h,v 1.8 2004/07/29 22:12:15 mcbride Exp $ */ + +/* + * Copyright (c) 2002 Michael Shalayeff. All rights reserved. + * Copyright (c) 2003 Ryan McBride. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _IP_CARP_H +#define _IP_CARP_H + +/* + * The CARP header layout is as follows: + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |Version| Type | VirtualHostID | AdvSkew | Auth Len | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | AdvBase | Checksum | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Counter (1) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Counter (2) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SHA-1 HMAC (1) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SHA-1 HMAC (2) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SHA-1 HMAC (3) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SHA-1 HMAC (4) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SHA-1 HMAC (5) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + */ + +struct carp_header { +#if BYTE_ORDER == LITTLE_ENDIAN + u_int8_t carp_type:4, + carp_version:4; +#endif +#if BYTE_ORDER == BIG_ENDIAN + u_int8_t carp_version:4, + carp_type:4; +#endif + u_int8_t carp_vhid; /* virtual host id */ + u_int8_t carp_advskew; /* advertisement skew */ + u_int8_t carp_authlen; /* size of counter+md, 32bit chunks */ + u_int8_t carp_pad1; /* reserved */ + u_int8_t carp_advbase; /* advertisement interval */ + u_int16_t carp_cksum; + u_int32_t carp_counter[2]; + unsigned char carp_md[20]; /* SHA1 HMAC */ +} __packed; + +#ifdef CTASSERT +CTASSERT(sizeof(struct carp_header) == 36); +#endif + +#define CARP_DFLTTL 255 + +/* carp_version */ +#define CARP_VERSION 2 + +/* carp_type */ +#define CARP_ADVERTISEMENT 0x01 + +#define CARP_KEY_LEN 20 /* a sha1 hash of a passphrase */ + +/* carp_advbase */ +#define CARP_DFLTINTV 1 + +/* + * Statistics. + */ +struct carpstats { + uint64_t carps_ipackets; /* total input packets, IPv4 */ + uint64_t carps_ipackets6; /* total input packets, IPv6 */ + uint64_t carps_badif; /* wrong interface */ + uint64_t carps_badttl; /* TTL is not CARP_DFLTTL */ + uint64_t carps_hdrops; /* packets shorter than hdr */ + uint64_t carps_badsum; /* bad checksum */ + uint64_t carps_badver; /* bad (incl unsupp) version */ + uint64_t carps_badlen; /* data length does not match */ + uint64_t carps_badauth; /* bad authentication */ + uint64_t carps_badvhid; /* bad VHID */ + uint64_t carps_badaddrs; /* bad address list */ + + uint64_t carps_opackets; /* total output packets, IPv4 */ + uint64_t carps_opackets6; /* total output packets, IPv6 */ + uint64_t carps_onomem; /* no memory for an mbuf */ + uint64_t carps_ostates; /* total state updates sent */ + + uint64_t carps_preempt; /* if enabled, preemptions */ +}; + +/* + * Configuration structure for SIOCSVH SIOCGVH + */ +struct carpreq { + int carpr_state; +#define CARP_STATES "INIT", "BACKUP", "MASTER" +#define CARP_MAXSTATE 2 + int carpr_vhid; + int carpr_advskew; + int carpr_advbase; + unsigned char carpr_key[CARP_KEY_LEN]; +}; +#define SIOCSVH _IOWR('i', 245, struct ifreq) +#define SIOCGVH _IOWR('i', 246, struct ifreq) + +/* + * Names for CARP sysctl objects + */ +#define CARPCTL_ALLOW 1 /* accept incoming CARP packets */ +#define CARPCTL_PREEMPT 2 /* high-pri backup preemption mode */ +#define CARPCTL_LOG 3 /* log bad packets */ +#define CARPCTL_STATS 4 /* statistics (read-only) */ +#define CARPCTL_ARPBALANCE 5 /* balance arp responses */ +#define CARPCTL_MAXID 6 + +#define CARPCTL_NAMES { \ + { 0, 0 }, \ + { "allow", CTLTYPE_INT }, \ + { "preempt", CTLTYPE_INT }, \ + { "log", CTLTYPE_INT }, \ + { "stats", CTLTYPE_STRUCT }, \ + { "arpbalance", CTLTYPE_INT }, \ +} + +#ifdef _KERNEL +void carp_carpdev_state(void *); +void carp_input (struct mbuf *, int); +int carp6_input (struct mbuf **, int *, int); +int carp_output (struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); +int carp_iamatch (void *, struct in_ifaddr *, struct in_addr *, + u_int8_t **); +struct ifaddr *carp_iamatch6(void *, struct in6_addr *); +void *carp_macmatch6(void *, struct mbuf *, const struct in6_addr *); +struct ifnet *carp_forus (void *, void *); +#endif +#endif /* _IP_CARP_H */ diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 6c2809f..31da470 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -115,6 +115,7 @@ struct ip_moptions { u_char imo_multicast_ttl; /* TTL for outgoing multicasts */ u_char imo_multicast_loop; /* 1 => hear sends if a member */ u_short imo_num_memberships; /* no. memberships this socket */ + u_short imo_max_memberships; /* max memberships this socket */ struct in_multi *imo_membership[IP_MAX_MEMBERSHIPS]; u_long imo_multicast_vif; /* vif num outgoing multicasts */ }; diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index ae76439..f84087b 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -231,7 +231,7 @@ done: * rely on the cloning mechanism from the corresponding interface route * any more. */ -static void +void in6_ifaddloop(struct ifaddr *ifa) { struct rtentry *rt; @@ -249,7 +249,7 @@ in6_ifaddloop(struct ifaddr *ifa) * Remove loopback rtentry of ownaddr generated by in6_ifaddloop(), * if it exists. */ -static void +void in6_ifremloop(struct ifaddr *ifa) { struct in6_ifaddr *ia; @@ -1614,6 +1614,39 @@ in6_ifinit(struct ifnet *ifp, struct in6 return (error); } +struct in6_multi_mship * +in6_joingroup(ifp, addr, errorp) + struct ifnet *ifp; + struct in6_addr *addr; + int *errorp; +{ + struct in6_multi_mship *imm; + + imm = kmalloc(sizeof(*imm), M_IPMADDR, M_NOWAIT); + if (!imm) { + *errorp = ENOBUFS; + return NULL; + } + imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp); + if (!imm->i6mm_maddr) { + /* *errorp is alrady set */ + kfree(imm, M_IPMADDR); + return NULL; + } + return imm; +} + +int +in6_leavegroup(imm) + struct in6_multi_mship *imm; +{ + + if (imm->i6mm_maddr) + in6_delmulti(imm->i6mm_maddr); + kfree(imm, M_IPMADDR); + return 0; +} + /* * Add an address to the list of IP6 multicast addresses for a * given interface. diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c index 1e0bd15..94833c6 100644 --- a/sys/netinet6/in6_ifattach.c +++ b/sys/netinet6/in6_ifattach.c @@ -714,6 +714,7 @@ in6_ifattach(struct ifnet *ifp, #endif case IFT_PFLOG: case IFT_PFSYNC: + case IFT_CARP: return; } diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c index 1434cca..f2b430b 100644 --- a/sys/netinet6/in6_proto.c +++ b/sys/netinet6/in6_proto.c @@ -69,6 +69,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_carp.h" #include #include @@ -140,6 +141,11 @@ #include +#ifdef CARP +#include +#endif + + /* * TCP/IP protocol family: IP6, ICMP6, UDP, TCP. */ @@ -247,6 +253,15 @@ struct ip6protosw inet6sw[] = { 0, 0, 0, 0, &rip6_usrreqs }, +#ifdef CARP +{ SOCK_RAW, &inet6domain, IPPROTO_CARP, PR_ATOMIC|PR_ADDR, + carp6_input, rip6_output, 0, rip6_ctloutput, + 0, + 0, 0, 0, 0, + &rip6_usrreqs +}, +#endif /* CARP */ + /* raw wildcard */ { SOCK_RAW, &inet6domain, 0, PR_ATOMIC|PR_ADDR, rip6_input, rip6_output, 0, rip6_ctloutput, diff --git a/sys/netinet6/in6_var.h b/sys/netinet6/in6_var.h index 6420634..cbfe2cb 100644 --- a/sys/netinet6/in6_var.h +++ b/sys/netinet6/in6_var.h @@ -585,6 +585,9 @@ do { \ struct in6_multi *in6_addmulti (struct in6_addr *, struct ifnet *, int *); void in6_delmulti (struct in6_multi *); +struct in6_multi_mship *in6_joingroup(struct ifnet *, struct in6_addr *, int *); +int in6_leavegroup(struct in6_multi_mship *); + extern int in6_ifindex2scopeid (int); extern int in6_mask2len (struct in6_addr *, u_char *); extern void in6_len2mask (struct in6_addr *, int); @@ -615,6 +618,8 @@ int in6_prefix_ioctl (struct socket *so, int in6_prefix_add_ifid (int iilen, struct in6_ifaddr *ia); void in6_prefix_remove_ifid (int iilen, struct in6_ifaddr *ia); void in6_purgeprefix (struct ifnet *); +void in6_ifremloop(struct ifaddr *); +void in6_ifaddloop(struct ifaddr *); int in6_is_addr_deprecated (struct sockaddr_in6 *); struct inpcb; diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index 801ec0c..e295fa5 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -1994,6 +1994,9 @@ nd6_need_cache(struct ifnet *ifp) #ifdef IFT_IEEE80211 case IFT_IEEE80211: #endif +#ifdef IFT_CARP + case IFT_CARP: +#endif case IFT_GIF: /* XXX need more cases? */ return (1); default: diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index 04f051b..4c70301 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -34,6 +34,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_carp.h" #include #include @@ -71,6 +72,11 @@ #include +#ifdef CARP +#include +#endif + + #define SDL(s) ((struct sockaddr_dl *)s) struct dadq; @@ -102,7 +108,7 @@ nd6_ns_input(struct mbuf *m, int off, in struct in6_addr taddr6; struct in6_addr myaddr6; char *lladdr = NULL; - struct ifaddr *ifa; + struct ifaddr *ifa = NULL; int lladdrlen = 0; int anycast = 0, proxy = 0, tentative = 0; int tlladdr; @@ -201,7 +207,14 @@ nd6_ns_input(struct mbuf *m, int off, in * (3) "tentative" address on which DAD is being performed. */ /* (1) and (3) check. */ +#ifdef CARP + if (ifp->if_carp) + ifa = carp_iamatch6(ifp->if_carp, &taddr6); + if (!ifa) + ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); +#else ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); +#endif /* (2) check. */ if (!ifa) { @@ -895,9 +908,16 @@ nd6_na_output(struct ifnet *ifp, const s * lladdr in sdl0. If we are not proxying (sending NA for * my address) use lladdr configured for the interface. */ - if (sdl0 == NULL) + if (sdl0 == NULL) { +#ifdef CARP + if (ifp->if_carp) + mac = carp_macmatch6(ifp->if_carp, m, taddr6); + if (mac == NULL) + mac = nd6_ifptomac(ifp); +#else mac = nd6_ifptomac(ifp); - else if (sdl0->sa_family == AF_LINK) { +#endif + } else if (sdl0->sa_family == AF_LINK) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)sdl0; if (sdl->sdl_alen == ifp->if_addrlen) @@ -949,6 +969,9 @@ nd6_ifptomac(struct ifnet *ifp) #ifdef IFT_IEEE80211 case IFT_IEEE80211: #endif +#ifdef IFT_CARP + case IFT_CARP: +#endif return ((caddr_t)(ifp + 1)); break; default: diff --git a/sys/netinet6/scope6.c b/sys/netinet6/scope6.c index 96b0891..2957c58 100644 --- a/sys/netinet6/scope6.c +++ b/sys/netinet6/scope6.c @@ -296,3 +296,76 @@ scope6_addr2default(struct in6_addr *add return (sid_default.s6id_list[in6_addrscope(addr)]); } + +/* + * Determine the appropriate scope zone ID for in6 and ifp. If ret_id is + * non NULL, it is set to the zone ID. If the zone ID needs to be embedded + * in the in6_addr structure, in6 will be modified. + */ +int +in6_setscope(struct in6_addr *in6, struct ifnet *ifp, u_int32_t *ret_id) +{ + int scope; + u_int32_t zoneid = 0; + struct scope6_id *sid; + + lwkt_serialize_enter(ifp->if_serializer); + + sid = SID(ifp); + +#ifdef DIAGNOSTIC + if (sid == NULL) { /* should not happen */ + panic("in6_setscope: scope array is NULL"); + /* NOTREACHED */ + } +#endif + + /* + * special case: the loopback address can only belong to a loopback + * interface. + */ + if (IN6_IS_ADDR_LOOPBACK(in6)) { + if (!(ifp->if_flags & IFF_LOOPBACK)) { + lwkt_serialize_exit(ifp->if_serializer); + return (EINVAL); + } else { + if (ret_id != NULL) + *ret_id = 0; /* there's no ambiguity */ + lwkt_serialize_exit(ifp->if_serializer); + return (0); + } + } + + scope = in6_addrscope(in6); + + switch (scope) { + case IPV6_ADDR_SCOPE_NODELOCAL: /* should be interface index */ + zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_NODELOCAL]; + break; + + case IPV6_ADDR_SCOPE_LINKLOCAL: + zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL]; + break; + + case IPV6_ADDR_SCOPE_SITELOCAL: + zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL]; + break; + + case IPV6_ADDR_SCOPE_ORGLOCAL: + zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL]; + break; + + default: + zoneid = 0; /* XXX: treat as global. */ + break; + } + lwkt_serialize_exit(ifp->if_serializer); + + if (ret_id != NULL) + *ret_id = zoneid; + + if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_NODELOCAL(in6) ) + in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */ + + return (0); +} diff --git a/sys/netinet6/scope6_var.h b/sys/netinet6/scope6_var.h index 57c84c4..62427e3 100644 --- a/sys/netinet6/scope6_var.h +++ b/sys/netinet6/scope6_var.h @@ -65,6 +65,7 @@ void scope6_setdefault (struct ifnet *); int scope6_get_default (struct scope6_id *); u_int32_t scope6_in6_addrscope (struct in6_addr *); u_int32_t scope6_addr2default (struct in6_addr *); +int in6_setscope __P((struct in6_addr *, struct ifnet *, u_int32_t *)); #endif /* _KERNEL */ diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 6391553..0774545 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -560,6 +560,7 @@ m_getb(int len, int how, int type, int f /* struct ip6aux */ #define PACKET_TAG_IPFW_DIVERT 9 /* divert info */ /* uint16_t */ +#define PACKET_TAG_CARP 28 /* CARP info */ /* * As a temporary and low impact solution to replace the even uglier diff --git a/usr.bin/netstat/inet.c b/usr.bin/netstat/inet.c index 1158082..47e76c7 100644 --- a/usr.bin/netstat/inet.c +++ b/usr.bin/netstat/inet.c @@ -47,6 +47,7 @@ #include #include #include +#include #ifdef INET6 #include #endif /* INET6 */ @@ -584,6 +585,50 @@ udp_stats(u_long off __unused, char *nam #undef p1a } +/* + * Dump CARP statistics structure. + */ +void +carp_stats(u_long off, const char *name, int af1 __unused) +{ + struct carpstats carpstat, zerostat; + size_t len = sizeof(struct carpstats); + + if (zflag) + memset(&zerostat, 0, len); + if (sysctlbyname("net.inet.carp.stats", &carpstat, &len, + zflag ? &zerostat : NULL, zflag ? len : 0) < 0) { + warn("sysctl: net.inet.carp.stats"); + return; + } + + printf("%s:\n", name); + +#define p(f, m) if (carpstat.f || sflag <= 1) \ + printf(m, (unsigned long long)carpstat.f, plural((int)carpstat.f)) +#define p2(f, m) if (carpstat.f || sflag <= 1) \ + printf(m, (unsigned long long)carpstat.f) + + p(carps_ipackets, "\t%llu packet%s received (IPv4)\n"); + p(carps_ipackets6, "\t%llu packet%s received (IPv6)\n"); + p(carps_badttl, "\t\t%llu packet%s discarded for wrong TTL\n"); + p(carps_hdrops, "\t\t%llu packet%s shorter than header\n"); + p(carps_badsum, "\t\t%llu discarded for bad checksum%s\n"); + p(carps_badver, "\t\t%llu discarded packet%s with a bad version\n"); + p2(carps_badlen, "\t\t%llu discarded because packet too short\n"); + p2(carps_badauth, "\t\t%llu discarded for bad authentication\n"); + p2(carps_badvhid, "\t\t%llu discarded for bad vhid\n"); + p2(carps_badaddrs, "\t\t%llu discarded because of a bad address list\n"); + p(carps_opackets, "\t%llu packet%s sent (IPv4)\n"); + p(carps_opackets6, "\t%llu packet%s sent (IPv6)\n"); + p2(carps_onomem, "\t\t%llu send failed due to mbuf memory error\n"); +#if notyet + p(carps_ostates, "\t\t%s state update%s sent\n"); +#endif +#undef p +#undef p2 +} + /* * Dump IP statistics structure. */ diff --git a/usr.bin/netstat/main.c b/usr.bin/netstat/main.c index b487811..2004d93 100644 --- a/usr.bin/netstat/main.c +++ b/usr.bin/netstat/main.c @@ -148,6 +148,8 @@ static struct nlist nl[] = { { "_rttrash" }, #define N_NCPUS 43 { "_ncpus" }, +#define N_CARPSTAT 44 + { "_carpstats" }, { "" }, }; @@ -179,6 +181,8 @@ struct protox { { -1, N_IPSECSTAT, 1, 0, ipsec_stats, NULL, "ipsec", 0}, #endif + { -1, N_CARPSTAT, 1, 0, + carp_stats, NULL, "carp", 0}, { -1, -1, 0, 0, 0, NULL, 0 } }; diff --git a/usr.bin/netstat/netstat.h b/usr.bin/netstat/netstat.h index f0329cd..b7cd79e 100644 --- a/usr.bin/netstat/netstat.h +++ b/usr.bin/netstat/netstat.h @@ -73,6 +73,7 @@ void ip_stats (u_long, char *, int); void icmp_stats (u_long, char *, int); void igmp_stats (u_long, char *, int); void pim_stats (u_long, char *, int); +void carp_stats (u_long, const char *, int); #ifdef IPSEC void ipsec_stats (u_long, char *, int); #endif