-
Notifications
You must be signed in to change notification settings - Fork 34
/
in_mcast.c
2954 lines (2590 loc) · 76 KB
/
in_mcast.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*-
* Copyright (c) 2007-2009 Bruce Simpson.
* Copyright (c) 2005 Robert N. M. Watson.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* IPv4 multicast socket, group, and socket option processing module.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <sys/sysctl.h>
#include <sys/ktr.h>
#include <sys/taskqueue.h>
#include <sys/tree.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#include <netinet/igmp_var.h>
#ifndef KTR_IGMPV3
#define KTR_IGMPV3 KTR_INET
#endif
#ifndef __SOCKUNION_DECLARED
union sockunion {
struct sockaddr_storage ss;
struct sockaddr sa;
struct sockaddr_dl sdl;
struct sockaddr_in sin;
};
typedef union sockunion sockunion_t;
#define __SOCKUNION_DECLARED
#endif /* __SOCKUNION_DECLARED */
static MALLOC_DEFINE(M_INMFILTER, "in_mfilter",
"IPv4 multicast PCB-layer source filter");
static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource",
"IPv4 multicast IGMP-layer source filter");
/*
* Locking:
* - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
* - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however
* it can be taken by code in net/if.c also.
* - ip_moptions and in_mfilter are covered by the INP_WLOCK.
*
* struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly
* any need for in_multi itself to be virtualized -- it is bound to an ifp
* anyway no matter what happens.
*/
struct mtx in_multi_mtx;
MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF);
/*
* Functions with non-static linkage defined in this file should be
* declared in in_var.h:
* imo_multi_filter()
* in_addmulti()
* in_delmulti()
* in_joingroup()
* in_joingroup_locked()
* in_leavegroup()
* in_leavegroup_locked()
* and ip_var.h:
* inp_freemoptions()
* inp_getmoptions()
* inp_setmoptions()
*
* XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti()
* and in_delmulti().
*/
static void imf_commit(struct in_mfilter *);
static int imf_get_source(struct in_mfilter *imf,
const struct sockaddr_in *psin,
struct in_msource **);
static struct in_msource *
imf_graft(struct in_mfilter *, const uint8_t,
const struct sockaddr_in *);
static void imf_leave(struct in_mfilter *);
static int imf_prune(struct in_mfilter *, const struct sockaddr_in *);
static void imf_purge(struct in_mfilter *);
static void imf_rollback(struct in_mfilter *);
static void imf_reap(struct in_mfilter *);
static int imo_grow(struct ip_moptions *);
static size_t imo_match_group(const struct ip_moptions *,
const struct ifnet *, const struct sockaddr *);
static struct in_msource *
imo_match_source(const struct ip_moptions *, const size_t,
const struct sockaddr *);
static void ims_merge(struct ip_msource *ims,
const struct in_msource *lims, const int rollback);
static int in_getmulti(struct ifnet *, const struct in_addr *,
struct in_multi **);
static int inm_get_source(struct in_multi *inm, const in_addr_t haddr,
const int noalloc, struct ip_msource **pims);
static int inm_is_ifp_detached(const struct in_multi *);
static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
static void inm_purge(struct in_multi *);
static void inm_reap(struct in_multi *);
static struct ip_moptions *
inp_findmoptions(struct inpcb *);
static void inp_freemoptions_internal(struct ip_moptions *);
static void inp_gcmoptions(void *, int);
static int inp_get_source_filters(struct inpcb *, struct sockopt *);
static int inp_join_group(struct inpcb *, struct sockopt *);
static int inp_leave_group(struct inpcb *, struct sockopt *);
static struct ifnet *
inp_lookup_mcast_ifp(const struct inpcb *,
const struct sockaddr_in *, const struct in_addr);
static int inp_block_unblock_source(struct inpcb *, struct sockopt *);
static int inp_set_multicast_if(struct inpcb *, struct sockopt *);
static int inp_set_source_filters(struct inpcb *, struct sockopt *);
static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS);
static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0,
"IPv4 multicast");
static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxgrpsrc, 0,
"Max source filters per group");
TUNABLE_ULONG("net.inet.ip.mcast.maxgrpsrc", &in_mcast_maxgrpsrc);
static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER;
SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc,
CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxsocksrc, 0,
"Max source filters per socket");
TUNABLE_ULONG("net.inet.ip.mcast.maxsocksrc", &in_mcast_maxsocksrc);
int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN,
&in_mcast_loop, 0, "Loopback multicast datagrams by default");
TUNABLE_INT("net.inet.ip.mcast.loop", &in_mcast_loop);
static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
"Per-interface stack-wide source filters");
static STAILQ_HEAD(, ip_moptions) imo_gc_list =
STAILQ_HEAD_INITIALIZER(imo_gc_list);
static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL);
/*
* Inline function which wraps assertions for a valid ifp.
* The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
* is detached.
*/
static int __inline
inm_is_ifp_detached(const struct in_multi *inm)
{
struct ifnet *ifp;
KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
ifp = inm->inm_ifma->ifma_ifp;
if (ifp != NULL) {
/*
* Sanity check that netinet's notion of ifp is the
* same as net's.
*/
KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
}
return (ifp == NULL);
}
/*
* Initialize an in_mfilter structure to a known state at t0, t1
* with an empty source filter list.
*/
static __inline void
imf_init(struct in_mfilter *imf, const int st0, const int st1)
{
memset(imf, 0, sizeof(struct in_mfilter));
RB_INIT(&imf->imf_sources);
imf->imf_st[0] = st0;
imf->imf_st[1] = st1;
}
/*
* Resize the ip_moptions vector to the next power-of-two minus 1.
* May be called with locks held; do not sleep.
*/
static int
imo_grow(struct ip_moptions *imo)
{
struct in_multi **nmships;
struct in_multi **omships;
struct in_mfilter *nmfilters;
struct in_mfilter *omfilters;
size_t idx;
size_t newmax;
size_t oldmax;
nmships = NULL;
nmfilters = NULL;
omships = imo->imo_membership;
omfilters = imo->imo_mfilters;
oldmax = imo->imo_max_memberships;
newmax = ((oldmax + 1) * 2) - 1;
if (newmax <= IP_MAX_MEMBERSHIPS) {
nmships = (struct in_multi **)realloc(omships,
sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
nmfilters = (struct in_mfilter *)realloc(omfilters,
sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT);
if (nmships != NULL && nmfilters != NULL) {
/* Initialize newly allocated source filter heads. */
for (idx = oldmax; idx < newmax; idx++) {
imf_init(&nmfilters[idx], MCAST_UNDEFINED,
MCAST_EXCLUDE);
}
imo->imo_max_memberships = newmax;
imo->imo_membership = nmships;
imo->imo_mfilters = nmfilters;
}
}
if (nmships == NULL || nmfilters == NULL) {
if (nmships != NULL)
free(nmships, M_IPMOPTS);
if (nmfilters != NULL)
free(nmfilters, M_INMFILTER);
return (ETOOMANYREFS);
}
return (0);
}
/*
* Find an IPv4 multicast group entry for this ip_moptions instance
* which matches the specified group, and optionally an interface.
* Return its index into the array, or -1 if not found.
*/
static size_t
imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
const struct sockaddr *group)
{
const struct sockaddr_in *gsin;
struct in_multi **pinm;
int idx;
int nmships;
gsin = (const struct sockaddr_in *)group;
/* The imo_membership array may be lazy allocated. */
if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
return (-1);
nmships = imo->imo_num_memberships;
pinm = &imo->imo_membership[0];
for (idx = 0; idx < nmships; idx++, pinm++) {
if (*pinm == NULL)
continue;
if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) {
break;
}
}
if (idx >= nmships)
idx = -1;
return (idx);
}
/*
* Find an IPv4 multicast source entry for this imo which matches
* the given group index for this socket, and source address.
*
* NOTE: This does not check if the entry is in-mode, merely if
* it exists, which may not be the desired behaviour.
*/
static struct in_msource *
imo_match_source(const struct ip_moptions *imo, const size_t gidx,
const struct sockaddr *src)
{
struct ip_msource find;
struct in_mfilter *imf;
struct ip_msource *ims;
const sockunion_t *psa;
KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
("%s: invalid index %d\n", __func__, (int)gidx));
/* The imo_mfilters array may be lazy allocated. */
if (imo->imo_mfilters == NULL)
return (NULL);
imf = &imo->imo_mfilters[gidx];
/* Source trees are keyed in host byte order. */
psa = (const sockunion_t *)src;
find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr);
ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
return ((struct in_msource *)ims);
}
/*
* Perform filtering for multicast datagrams on a socket by group and source.
*
* Returns 0 if a datagram should be allowed through, or various error codes
* if the socket was not a member of the group, or the source was muted, etc.
*/
int
imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
const struct sockaddr *group, const struct sockaddr *src)
{
size_t gidx;
struct in_msource *ims;
int mode;
KASSERT(ifp != NULL, ("%s: null ifp", __func__));
gidx = imo_match_group(imo, ifp, group);
if (gidx == -1)
return (MCAST_NOTGMEMBER);
/*
* Check if the source was included in an (S,G) join.
* Allow reception on exclusive memberships by default,
* reject reception on inclusive memberships by default.
* Exclude source only if an in-mode exclude filter exists.
* Include source only if an in-mode include filter exists.
* NOTE: We are comparing group state here at IGMP t1 (now)
* with socket-layer t0 (since last downcall).
*/
mode = imo->imo_mfilters[gidx].imf_st[1];
ims = imo_match_source(imo, gidx, src);
if ((ims == NULL && mode == MCAST_INCLUDE) ||
(ims != NULL && ims->imsl_st[0] != mode))
return (MCAST_NOTSMEMBER);
return (MCAST_PASS);
}
/*
* Find and return a reference to an in_multi record for (ifp, group),
* and bump its reference count.
* If one does not exist, try to allocate it, and update link-layer multicast
* filters on ifp to listen for group.
* Assumes the IN_MULTI lock is held across the call.
* Return 0 if successful, otherwise return an appropriate error code.
*/
static int
in_getmulti(struct ifnet *ifp, const struct in_addr *group,
struct in_multi **pinm)
{
struct sockaddr_in gsin;
struct ifmultiaddr *ifma;
struct in_ifinfo *ii;
struct in_multi *inm;
int error;
IN_MULTI_LOCK_ASSERT();
ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET];
inm = inm_lookup(ifp, *group);
if (inm != NULL) {
/*
* If we already joined this group, just bump the
* refcount and return it.
*/
KASSERT(inm->inm_refcount >= 1,
("%s: bad refcount %d", __func__, inm->inm_refcount));
++inm->inm_refcount;
*pinm = inm;
return (0);
}
memset(&gsin, 0, sizeof(gsin));
gsin.sin_family = AF_INET;
gsin.sin_len = sizeof(struct sockaddr_in);
gsin.sin_addr = *group;
/*
* Check if a link-layer group is already associated
* with this network-layer group on the given ifnet.
*/
error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma);
if (error != 0)
return (error);
/* XXX ifma_protospec must be covered by IF_ADDR_LOCK */
IF_ADDR_WLOCK(ifp);
/*
* If something other than netinet is occupying the link-layer
* group, print a meaningful error message and back out of
* the allocation.
* Otherwise, bump the refcount on the existing network-layer
* group association and return it.
*/
if (ifma->ifma_protospec != NULL) {
inm = (struct in_multi *)ifma->ifma_protospec;
#ifdef INVARIANTS
KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr",
__func__));
KASSERT(ifma->ifma_addr->sa_family == AF_INET,
("%s: ifma not AF_INET", __func__));
KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__));
if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
!in_hosteq(inm->inm_addr, *group))
panic("%s: ifma %p is inconsistent with %p (%s)",
__func__, ifma, inm, inet_ntoa(*group));
#endif
++inm->inm_refcount;
*pinm = inm;
IF_ADDR_WUNLOCK(ifp);
return (0);
}
IF_ADDR_WLOCK_ASSERT(ifp);
/*
* A new in_multi record is needed; allocate and initialize it.
* We DO NOT perform an IGMP join as the in_ layer may need to
* push an initial source list down to IGMP to support SSM.
*
* The initial source filter state is INCLUDE, {} as per the RFC.
*/
inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
if (inm == NULL) {
if_delmulti_ifma(ifma);
IF_ADDR_WUNLOCK(ifp);
return (ENOMEM);
}
inm->inm_addr = *group;
inm->inm_ifp = ifp;
inm->inm_igi = ii->ii_igmp;
inm->inm_ifma = ifma;
inm->inm_refcount = 1;
inm->inm_state = IGMP_NOT_MEMBER;
/*
* Pending state-changes per group are subject to a bounds check.
*/
IFQ_SET_MAXLEN(&inm->inm_scq, IGMP_MAX_STATE_CHANGES);
inm->inm_st[0].iss_fmode = MCAST_UNDEFINED;
inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
RB_INIT(&inm->inm_srcs);
ifma->ifma_protospec = inm;
*pinm = inm;
IF_ADDR_WUNLOCK(ifp);
return (0);
}
/*
* Drop a reference to an in_multi record.
*
* If the refcount drops to 0, free the in_multi record and
* delete the underlying link-layer membership.
*/
void
inm_release_locked(struct in_multi *inm)
{
struct ifmultiaddr *ifma;
IN_MULTI_LOCK_ASSERT();
CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount);
if (--inm->inm_refcount > 0) {
CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__,
inm->inm_refcount);
return;
}
CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm);
ifma = inm->inm_ifma;
/* XXX this access is not covered by IF_ADDR_LOCK */
CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma);
KASSERT(ifma->ifma_protospec == inm,
("%s: ifma_protospec != inm", __func__));
ifma->ifma_protospec = NULL;
inm_purge(inm);
free(inm, M_IPMADDR);
if_delmulti_ifma(ifma);
}
/*
* Clear recorded source entries for a group.
* Used by the IGMP code. Caller must hold the IN_MULTI lock.
* FIXME: Should reap.
*/
void
inm_clear_recorded(struct in_multi *inm)
{
struct ip_msource *ims;
IN_MULTI_LOCK_ASSERT();
RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
if (ims->ims_stp) {
ims->ims_stp = 0;
--inm->inm_st[1].iss_rec;
}
}
KASSERT(inm->inm_st[1].iss_rec == 0,
("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec));
}
/*
* Record a source as pending for a Source-Group IGMPv3 query.
* This lives here as it modifies the shared tree.
*
* inm is the group descriptor.
* naddr is the address of the source to record in network-byte order.
*
* If the net.inet.igmp.sgalloc sysctl is non-zero, we will
* lazy-allocate a source node in response to an SG query.
* Otherwise, no allocation is performed. This saves some memory
* with the trade-off that the source will not be reported to the
* router if joined in the window between the query response and
* the group actually being joined on the local host.
*
* VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed.
* This turns off the allocation of a recorded source entry if
* the group has not been joined.
*
* Return 0 if the source didn't exist or was already marked as recorded.
* Return 1 if the source was marked as recorded by this function.
* Return <0 if any error occured (negated errno code).
*/
int
inm_record_source(struct in_multi *inm, const in_addr_t naddr)
{
struct ip_msource find;
struct ip_msource *ims, *nims;
IN_MULTI_LOCK_ASSERT();
find.ims_haddr = ntohl(naddr);
ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
if (ims && ims->ims_stp)
return (0);
if (ims == NULL) {
if (inm->inm_nsrc == in_mcast_maxgrpsrc)
return (-ENOSPC);
nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
M_NOWAIT | M_ZERO);
if (nims == NULL)
return (-ENOMEM);
nims->ims_haddr = find.ims_haddr;
RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
++inm->inm_nsrc;
ims = nims;
}
/*
* Mark the source as recorded and update the recorded
* source count.
*/
++ims->ims_stp;
++inm->inm_st[1].iss_rec;
return (1);
}
/*
* Return a pointer to an in_msource owned by an in_mfilter,
* given its source address.
* Lazy-allocate if needed. If this is a new entry its filter state is
* undefined at t0.
*
* imf is the filter set being modified.
* haddr is the source address in *host* byte-order.
*
* SMPng: May be called with locks held; malloc must not block.
*/
static int
imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin,
struct in_msource **plims)
{
struct ip_msource find;
struct ip_msource *ims, *nims;
struct in_msource *lims;
int error;
error = 0;
ims = NULL;
lims = NULL;
/* key is host byte order */
find.ims_haddr = ntohl(psin->sin_addr.s_addr);
ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
lims = (struct in_msource *)ims;
if (lims == NULL) {
if (imf->imf_nsrc == in_mcast_maxsocksrc)
return (ENOSPC);
nims = malloc(sizeof(struct in_msource), M_INMFILTER,
M_NOWAIT | M_ZERO);
if (nims == NULL)
return (ENOMEM);
lims = (struct in_msource *)nims;
lims->ims_haddr = find.ims_haddr;
lims->imsl_st[0] = MCAST_UNDEFINED;
RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
++imf->imf_nsrc;
}
*plims = lims;
return (error);
}
/*
* Graft a source entry into an existing socket-layer filter set,
* maintaining any required invariants and checking allocations.
*
* The source is marked as being in the new filter mode at t1.
*
* Return the pointer to the new node, otherwise return NULL.
*/
static struct in_msource *
imf_graft(struct in_mfilter *imf, const uint8_t st1,
const struct sockaddr_in *psin)
{
struct ip_msource *nims;
struct in_msource *lims;
nims = malloc(sizeof(struct in_msource), M_INMFILTER,
M_NOWAIT | M_ZERO);
if (nims == NULL)
return (NULL);
lims = (struct in_msource *)nims;
lims->ims_haddr = ntohl(psin->sin_addr.s_addr);
lims->imsl_st[0] = MCAST_UNDEFINED;
lims->imsl_st[1] = st1;
RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
++imf->imf_nsrc;
return (lims);
}
/*
* Prune a source entry from an existing socket-layer filter set,
* maintaining any required invariants and checking allocations.
*
* The source is marked as being left at t1, it is not freed.
*
* Return 0 if no error occurred, otherwise return an errno value.
*/
static int
imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin)
{
struct ip_msource find;
struct ip_msource *ims;
struct in_msource *lims;
/* key is host byte order */
find.ims_haddr = ntohl(psin->sin_addr.s_addr);
ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
if (ims == NULL)
return (ENOENT);
lims = (struct in_msource *)ims;
lims->imsl_st[1] = MCAST_UNDEFINED;
return (0);
}
/*
* Revert socket-layer filter set deltas at t1 to t0 state.
*/
static void
imf_rollback(struct in_mfilter *imf)
{
struct ip_msource *ims, *tims;
struct in_msource *lims;
RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
lims = (struct in_msource *)ims;
if (lims->imsl_st[0] == lims->imsl_st[1]) {
/* no change at t1 */
continue;
} else if (lims->imsl_st[0] != MCAST_UNDEFINED) {
/* revert change to existing source at t1 */
lims->imsl_st[1] = lims->imsl_st[0];
} else {
/* revert source added t1 */
CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
free(ims, M_INMFILTER);
imf->imf_nsrc--;
}
}
imf->imf_st[1] = imf->imf_st[0];
}
/*
* Mark socket-layer filter set as INCLUDE {} at t1.
*/
static void
imf_leave(struct in_mfilter *imf)
{
struct ip_msource *ims;
struct in_msource *lims;
RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
lims = (struct in_msource *)ims;
lims->imsl_st[1] = MCAST_UNDEFINED;
}
imf->imf_st[1] = MCAST_INCLUDE;
}
/*
* Mark socket-layer filter set deltas as committed.
*/
static void
imf_commit(struct in_mfilter *imf)
{
struct ip_msource *ims;
struct in_msource *lims;
RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
lims = (struct in_msource *)ims;
lims->imsl_st[0] = lims->imsl_st[1];
}
imf->imf_st[0] = imf->imf_st[1];
}
/*
* Reap unreferenced sources from socket-layer filter set.
*/
static void
imf_reap(struct in_mfilter *imf)
{
struct ip_msource *ims, *tims;
struct in_msource *lims;
RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
lims = (struct in_msource *)ims;
if ((lims->imsl_st[0] == MCAST_UNDEFINED) &&
(lims->imsl_st[1] == MCAST_UNDEFINED)) {
CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims);
RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
free(ims, M_INMFILTER);
imf->imf_nsrc--;
}
}
}
/*
* Purge socket-layer filter set.
*/
static void
imf_purge(struct in_mfilter *imf)
{
struct ip_msource *ims, *tims;
RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
free(ims, M_INMFILTER);
imf->imf_nsrc--;
}
imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED;
KASSERT(RB_EMPTY(&imf->imf_sources),
("%s: imf_sources not empty", __func__));
}
/*
* Look up a source filter entry for a multicast group.
*
* inm is the group descriptor to work with.
* haddr is the host-byte-order IPv4 address to look up.
* noalloc may be non-zero to suppress allocation of sources.
* *pims will be set to the address of the retrieved or allocated source.
*
* SMPng: NOTE: may be called with locks held.
* Return 0 if successful, otherwise return a non-zero error code.
*/
static int
inm_get_source(struct in_multi *inm, const in_addr_t haddr,
const int noalloc, struct ip_msource **pims)
{
struct ip_msource find;
struct ip_msource *ims, *nims;
#ifdef KTR
struct in_addr ia;
#endif
find.ims_haddr = haddr;
ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
if (ims == NULL && !noalloc) {
if (inm->inm_nsrc == in_mcast_maxgrpsrc)
return (ENOSPC);
nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
M_NOWAIT | M_ZERO);
if (nims == NULL)
return (ENOMEM);
nims->ims_haddr = haddr;
RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
++inm->inm_nsrc;
ims = nims;
#ifdef KTR
ia.s_addr = htonl(haddr);
CTR3(KTR_IGMPV3, "%s: allocated %s as %p", __func__,
inet_ntoa(ia), ims);
#endif
}
*pims = ims;
return (0);
}
/*
* Merge socket-layer source into IGMP-layer source.
* If rollback is non-zero, perform the inverse of the merge.
*/
static void
ims_merge(struct ip_msource *ims, const struct in_msource *lims,
const int rollback)
{
int n = rollback ? -1 : 1;
#ifdef KTR
struct in_addr ia;
ia.s_addr = htonl(ims->ims_haddr);
#endif
if (lims->imsl_st[0] == MCAST_EXCLUDE) {
CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on %s",
__func__, n, inet_ntoa(ia));
ims->ims_st[1].ex -= n;
} else if (lims->imsl_st[0] == MCAST_INCLUDE) {
CTR3(KTR_IGMPV3, "%s: t1 in -= %d on %s",
__func__, n, inet_ntoa(ia));
ims->ims_st[1].in -= n;
}
if (lims->imsl_st[1] == MCAST_EXCLUDE) {
CTR3(KTR_IGMPV3, "%s: t1 ex += %d on %s",
__func__, n, inet_ntoa(ia));
ims->ims_st[1].ex += n;
} else if (lims->imsl_st[1] == MCAST_INCLUDE) {
CTR3(KTR_IGMPV3, "%s: t1 in += %d on %s",
__func__, n, inet_ntoa(ia));
ims->ims_st[1].in += n;
}
}
/*
* Atomically update the global in_multi state, when a membership's
* filter list is being updated in any way.
*
* imf is the per-inpcb-membership group filter pointer.
* A fake imf may be passed for in-kernel consumers.
*
* XXX This is a candidate for a set-symmetric-difference style loop
* which would eliminate the repeated lookup from root of ims nodes,
* as they share the same key space.
*
* If any error occurred this function will back out of refcounts
* and return a non-zero value.
*/
static int
inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
{
struct ip_msource *ims, *nims;
struct in_msource *lims;
int schanged, error;
int nsrc0, nsrc1;
schanged = 0;
error = 0;
nsrc1 = nsrc0 = 0;
/*
* Update the source filters first, as this may fail.
* Maintain count of in-mode filters at t0, t1. These are
* used to work out if we transition into ASM mode or not.
* Maintain a count of source filters whose state was
* actually modified by this operation.
*/
RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
lims = (struct in_msource *)ims;
if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++;
if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++;
if (lims->imsl_st[0] == lims->imsl_st[1]) continue;
error = inm_get_source(inm, lims->ims_haddr, 0, &nims);
++schanged;
if (error)
break;
ims_merge(nims, lims, 0);
}
if (error) {
struct ip_msource *bims;
RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) {
lims = (struct in_msource *)ims;
if (lims->imsl_st[0] == lims->imsl_st[1])
continue;
(void)inm_get_source(inm, lims->ims_haddr, 1, &bims);
if (bims == NULL)
continue;
ims_merge(bims, lims, 1);
}
goto out_reap;
}
CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1",
__func__, nsrc0, nsrc1);
/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
if (imf->imf_st[0] == imf->imf_st[1] &&
imf->imf_st[1] == MCAST_INCLUDE) {
if (nsrc1 == 0) {
CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
--inm->inm_st[1].iss_in;
}
}
/* Handle filter mode transition on socket. */
if (imf->imf_st[0] != imf->imf_st[1]) {
CTR3(KTR_IGMPV3, "%s: imf transition %d to %d",
__func__, imf->imf_st[0], imf->imf_st[1]);
if (imf->imf_st[0] == MCAST_EXCLUDE) {
CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__);
--inm->inm_st[1].iss_ex;
} else if (imf->imf_st[0] == MCAST_INCLUDE) {
CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
--inm->inm_st[1].iss_in;
}
if (imf->imf_st[1] == MCAST_EXCLUDE) {
CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__);
inm->inm_st[1].iss_ex++;
} else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__);
inm->inm_st[1].iss_in++;
}
}
/*
* Track inm filter state in terms of listener counts.
* If there are any exclusive listeners, stack-wide
* membership is exclusive.
* Otherwise, if only inclusive listeners, stack-wide is inclusive.
* If no listeners remain, state is undefined at t1,
* and the IGMP lifecycle for this group should finish.
*/
if (inm->inm_st[1].iss_ex > 0) {
CTR1(KTR_IGMPV3, "%s: transition to EX", __func__);