Skip to content

Commit

Permalink
northd, controller: Flood ARP and NA packet on transit router.
Browse files Browse the repository at this point in the history
When packets goes between AZs through transit router for the first
time there isn't any MAC binding for the remote port equivalent. The
TR will properly generate ARP/ND NS packet that will arrive to the
remote AZ, however the response would never leave the remote AZ as a
consequence the local AZ would never learn this MAC binding.

To prevent the described behavior add a new table that will contain
all remote chassis and corresponding encapsulations that allow us
to just flood all chassis with any packet that will be sent to this
table. At the same time add a new action that sends the packet to this
table.

In order to properly generate MAC binding we need to redirect the ARP
into ingress instead of egress as usual for reception from tunnels.
Add flows that will match on ARP and ND NA with combination of 0
outport which should indicate that this is the remote flood flow.
Only exception is VXLAN which doesn't have enough space for outport
encoding, in that case we need to send the packet to both ingress
and egress as we cannot determine if it was part of the remote flood
or regular packet that arrived from another chassis in the same AZ.

Signed-off-by: Ales Musil <[email protected]>
  • Loading branch information
almusil committed Nov 29, 2024
1 parent 7dbae80 commit 29deafd
Show file tree
Hide file tree
Showing 11 changed files with 292 additions and 24 deletions.
1 change: 1 addition & 0 deletions controller/lflow.c
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,7 @@ add_matches_to_flow_table(const struct sbrec_logical_flow *lflow,
.ct_nw_dst_load_table = OFTABLE_CT_ORIG_NW_DST_LOAD,
.ct_ip6_dst_load_table = OFTABLE_CT_ORIG_IP6_DST_LOAD,
.ct_tp_dst_load_table = OFTABLE_CT_ORIG_TP_DST_LOAD,
.flood_remote_table = OFTABLE_FLOOD_REMOTE_CHASSIS,
.ctrl_meter_id = ctrl_meter_id,
.common_nat_ct_zone = get_common_nat_zone(ldp),
};
Expand Down
4 changes: 4 additions & 0 deletions controller/lflow.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ struct uuid;
#define OFTABLE_CT_ORIG_NW_DST_LOAD 81
#define OFTABLE_CT_ORIG_IP6_DST_LOAD 82
#define OFTABLE_CT_ORIG_TP_DST_LOAD 83
#define OFTABLE_FLOOD_REMOTE_CHASSIS 84

/* Common defines shared between some controller components. */
#define CHASSIS_FLOOD_INDEX_START 0x8000


struct lflow_ctx_in {
Expand Down
188 changes: 170 additions & 18 deletions controller/physical.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,73 @@ put_encapsulation(enum mf_field_id mff_ovn_geneve,
}
}

static void
put_decapsulation(enum mf_field_id mff_ovn_geneve,
const struct chassis_tunnel *tun,
struct ofpbuf *ofpacts)
{
if (tun->type == GENEVE) {
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, ofpacts);
put_move(mff_ovn_geneve, 16, MFF_LOG_INPORT, 0, 15, ofpacts);
put_move(mff_ovn_geneve, 0, MFF_LOG_OUTPORT, 0, 16, ofpacts);
} else if (tun->type == STT) {
put_move(MFF_TUN_ID, 40, MFF_LOG_INPORT, 0, 15, ofpacts);
put_move(MFF_TUN_ID, 24, MFF_LOG_OUTPORT, 0, 16, ofpacts);
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, ofpacts);
} else if (tun->type == VXLAN) {
/* Add flows for non-VTEP tunnels. Split VNI into two 12-bit
* sections and use them for datapath and outport IDs. */
put_move(MFF_TUN_ID, 12, MFF_LOG_OUTPORT, 0, 12, ofpacts);
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 12, ofpacts);
} else {
OVS_NOT_REACHED();
}
}


static void
put_remote_chassis_flood_encap(struct ofpbuf *ofpacts,
enum chassis_tunnel_type type,
enum mf_field_id mff_ovn_geneve)
{
if (type == GENEVE) {
put_move(MFF_LOG_DATAPATH, 0, MFF_TUN_ID, 0, 24, ofpacts);
put_load(0, mff_ovn_geneve, 0, 32, ofpacts);
put_move(MFF_LOG_INPORT, 0, mff_ovn_geneve, 16, 15, ofpacts);
} else if (type == STT) {
put_move(MFF_LOG_INPORT, 0, MFF_TUN_ID, 40, 15, ofpacts);
put_load(0, MFF_TUN_ID, 24, 16, ofpacts);
put_move(MFF_LOG_DATAPATH, 0, MFF_TUN_ID, 0, 24, ofpacts);
} else if (type == VXLAN) {
put_move(MFF_LOG_INPORT, 0, MFF_TUN_ID, 12, 12, ofpacts);
put_move(MFF_LOG_DATAPATH, 0, MFF_TUN_ID, 0, 12, ofpacts);
} else {
OVS_NOT_REACHED();
}
}

static void
match_set_chassis_flood_outport(struct match *match,
enum chassis_tunnel_type type,
enum mf_field_id mff_ovn_geneve)
{
if (type == GENEVE) {
/* Outport occupies the lower half of tunnel metadata (0-15). */
union mf_value value, mask;
memset(&value, 0, sizeof value);
memset(&mask, 0, sizeof mask);

const struct mf_field *mf_ovn_geneve = mf_from_id(mff_ovn_geneve);
memset(&mask.tun_metadata[mf_ovn_geneve->n_bytes - 2], 0xff, 2);

tun_metadata_set_match(mf_ovn_geneve, &value, &mask, match, NULL);
} else if (type == STT) {
/* Outport occupies bits 24-39. */
match_set_tun_id_masked(match, 0, htonll(UINT64_C(0xffff) << 24));
}
}


static void
put_stack(enum mf_field_id field, struct ofpact_stack *stack)
{
Expand Down Expand Up @@ -2349,6 +2416,106 @@ consider_mc_group(const struct physical_ctx *ctx,
sset_destroy(&vtep_chassis);
}

#define CHASSIS_FLOOD_MAX_MSG_SIZE MC_OFPACTS_MAX_MSG_SIZE

static void
physical_eval_remote_chassis_flows(const struct physical_ctx *ctx,
struct ofpbuf *egress_ofpacts,
struct ovn_desired_flow_table *flow_table)
{
struct match match = MATCH_CATCHALL_INITIALIZER;
uint32_t index = CHASSIS_FLOOD_INDEX_START;
struct chassis_tunnel *prev = NULL;

uint8_t actions_stub[256];
struct ofpbuf ingress_ofpacts;
ofpbuf_use_stub(&ingress_ofpacts, actions_stub, sizeof(actions_stub));

ofpbuf_clear(egress_ofpacts);

const struct sbrec_chassis *chassis;
SBREC_CHASSIS_TABLE_FOR_EACH (chassis, ctx->chassis_table) {
if (!smap_get_bool(&chassis->other_config, "is-remote", false)) {
continue;
}

struct chassis_tunnel *tun =
chassis_tunnel_find(ctx->chassis_tunnels, chassis->name,
NULL, NULL);
if (!tun) {
continue;
}

if (!(prev && prev->type == tun->type)) {
put_remote_chassis_flood_encap(egress_ofpacts, tun->type,
ctx->mff_ovn_geneve);
}

ofpact_put_OUTPUT(egress_ofpacts)->port = tun->ofport;
prev = tun;

if (egress_ofpacts->size > CHASSIS_FLOOD_MAX_MSG_SIZE) {
match_init_catchall(&match);
match_set_reg(&match, MFF_REG6 - MFF_REG0, index++);

put_split_buf_function(index, 0, OFTABLE_FLOOD_REMOTE_CHASSIS,
egress_ofpacts);

ofctrl_add_flow(flow_table, OFTABLE_FLOOD_REMOTE_CHASSIS, 100, 0,
&match, egress_ofpacts, hc_uuid);

ofpbuf_clear(egress_ofpacts);
prev = NULL;
}


ofpbuf_clear(&ingress_ofpacts);
put_decapsulation(ctx->mff_ovn_geneve, tun, &ingress_ofpacts);
put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ingress_ofpacts);
if (tun->type == VXLAN) {
/* VXLAN doesn't carry the inport information, we cannot set
* the outport to 0 then and match on it. */
put_resubmit(OFTABLE_LOCAL_OUTPUT, &ingress_ofpacts);
}

/* Add match on ARP response coming from remote chassis. */
match_init_catchall(&match);
match_set_in_port(&match, tun->ofport);
match_set_dl_type(&match, htons(ETH_TYPE_ARP));
match_set_arp_opcode_masked(&match, 2, UINT8_MAX);
match_set_chassis_flood_outport(&match, tun->type,
ctx->mff_ovn_geneve);

ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120,
chassis->header_.uuid.parts[0],
&match, &ingress_ofpacts, hc_uuid);

/* Add match on ND NA coming from remote chassis. */
match_init_catchall(&match);
match_set_in_port(&match, tun->ofport);
match_set_dl_type(&match, htons(ETH_TYPE_IPV6));
match_set_nw_proto(&match, IPPROTO_ICMPV6);
match_set_icmp_type(&match, 136);
match_set_icmp_code(&match, 0);
match_set_chassis_flood_outport(&match, tun->type,
ctx->mff_ovn_geneve);

ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120,
chassis->header_.uuid.parts[0],
&match, &ingress_ofpacts, hc_uuid);
}

if (egress_ofpacts->size > 0) {
match_init_catchall(&match);
match_set_reg(&match, MFF_REG6 - MFF_REG0, index);

ofctrl_add_flow(flow_table, OFTABLE_FLOOD_REMOTE_CHASSIS, 100, 0,
&match, egress_ofpacts, hc_uuid);
}

ofpbuf_uninit(&ingress_ofpacts);
}

static void
physical_eval_port_binding(struct physical_ctx *p_ctx,
const struct sbrec_port_binding *pb,
Expand Down Expand Up @@ -2504,24 +2671,7 @@ physical_run(struct physical_ctx *p_ctx,
match_set_in_port(&match, tun->ofport);

ofpbuf_clear(&ofpacts);
if (tun->type == GENEVE) {
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts);
put_move(p_ctx->mff_ovn_geneve, 16, MFF_LOG_INPORT, 0, 15,
&ofpacts);
put_move(p_ctx->mff_ovn_geneve, 0, MFF_LOG_OUTPORT, 0, 16,
&ofpacts);
} else if (tun->type == STT) {
put_move(MFF_TUN_ID, 40, MFF_LOG_INPORT, 0, 15, &ofpacts);
put_move(MFF_TUN_ID, 24, MFF_LOG_OUTPORT, 0, 16, &ofpacts);
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts);
} else if (tun->type == VXLAN) {
/* Add flows for non-VTEP tunnels. Split VNI into two 12-bit
* sections and use them for datapath and outport IDs. */
put_move(MFF_TUN_ID, 12, MFF_LOG_OUTPORT, 0, 12, &ofpacts);
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 12, &ofpacts);
} else {
OVS_NOT_REACHED();
}
put_decapsulation(p_ctx->mff_ovn_geneve, tun, &ofpacts);

put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, 0, &match,
Expand Down Expand Up @@ -2773,5 +2923,7 @@ physical_run(struct physical_ctx *p_ctx,
ofctrl_add_flow(flow_table, OFTABLE_CT_ORIG_IP6_DST_LOAD, 100, 0, &match,
&ofpacts, hc_uuid);

physical_eval_remote_chassis_flows(p_ctx, &ofpacts, flow_table);

ofpbuf_uninit(&ofpacts);
}
3 changes: 3 additions & 0 deletions include/ovn/actions.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ struct collector_set_ids;
OVNACT(CT_ORIG_NW_DST, ovnact_result) \
OVNACT(CT_ORIG_IP6_DST, ovnact_result) \
OVNACT(CT_ORIG_TP_DST, ovnact_result) \
OVNACT(FLOOD_REMOTE, ovnact_null) \

/* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
enum OVS_PACKED_ENUM ovnact_type {
Expand Down Expand Up @@ -945,6 +946,8 @@ struct ovnact_encode_params {
* to resubmit. */
uint32_t ct_tp_dst_load_table; /* OpenFlow table for 'ct_tp_dst'
* to resubmit. */
uint32_t flood_remote_table; /* OpenFlow table for 'chassis_flood'
* to resubmit. */
};

void ovnacts_encode(const struct ovnact[], size_t ovnacts_len,
Expand Down
17 changes: 17 additions & 0 deletions lib/actions.c
Original file line number Diff line number Diff line change
Expand Up @@ -5531,6 +5531,21 @@ format_CT_ORIG_TP_DST(const struct ovnact_result *res, struct ds *s)
ds_put_cstr(s, " = ct_tp_dst();");
}

static void
format_FLOOD_REMOTE(const struct ovnact_null *null OVS_UNUSED, struct ds *s)
{
ds_put_cstr(s, "flood_remote;");
}

static void
encode_FLOOD_REMOTE(const struct ovnact_null *null OVS_UNUSED,
const struct ovnact_encode_params *ep,
struct ofpbuf *ofpacts)
{
put_load(CHASSIS_FLOOD_INDEX_START, MFF_REG6, 0, 32, ofpacts);
emit_resubmit(ofpacts, ep->flood_remote_table);
}

/* Parses an assignment or exchange or put_dhcp_opts action. */
static void
parse_set_action(struct action_context *ctx)
Expand Down Expand Up @@ -5758,6 +5773,8 @@ parse_action(struct action_context *ctx)
parse_sample(ctx);
} else if (lexer_match_id(ctx->lexer, "mac_cache_use")) {
ovnact_put_MAC_CACHE_USE(ctx->ovnacts);
} else if (lexer_match_id(ctx->lexer, "flood_remote")) {
ovnact_put_FLOOD_REMOTE(ctx->ovnacts);
} else {
lexer_syntax_error(ctx->lexer, "expecting action");
}
Expand Down
12 changes: 7 additions & 5 deletions northd/northd.c
Original file line number Diff line number Diff line change
Expand Up @@ -13398,21 +13398,22 @@ build_neigh_learning_flows_for_lrouter(
* */

/* Flows for LOOKUP_NEIGHBOR. */
const char *flood = od->is_transit_router ? "flood_remote; " : "";
bool learn_from_arp_request = smap_get_bool(&od->nbr->options,
"always_learn_from_arp_request", true);
ds_clear(actions);
ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
" = lookup_arp(inport, arp.spa, arp.sha); %snext;",
" = lookup_arp(inport, arp.spa, arp.sha); %s%snext;",
learn_from_arp_request ? "" :
REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ", flood);
ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
"arp.op == 2", ds_cstr(actions), lflow_ref);

ds_clear(actions);
ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
" = lookup_nd(inport, nd.target, nd.tll); %snext;",
" = lookup_nd(inport, nd.target, nd.tll); %s%snext;",
learn_from_arp_request ? "" :
REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ", flood);
ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_na",
ds_cstr(actions), lflow_ref);

Expand All @@ -13428,7 +13429,8 @@ build_neigh_learning_flows_for_lrouter(
ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
" = lookup_nd(inport, nd.target, nd.tll); "
REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
" = lookup_nd_ip(inport, nd.target); next;");
" = lookup_nd_ip(inport, nd.target); %snext;",
flood);
ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 110,
"nd_na && ip6.src == fe80::/10 && ip6.dst == ff00::/8",
ds_cstr(actions), lflow_ref);
Expand Down
76 changes: 76 additions & 0 deletions tests/ovn-controller.at
Original file line number Diff line number Diff line change
Expand Up @@ -3511,3 +3511,79 @@ AT_CHECK([grep -c "cookie=$lr1_peer_cookie," log_to_phy_flows], [0], [dnl

OVN_CLEANUP([hv1])
AT_CLEANUP

AT_SETUP([Remote chassis flood flows])
ovn_start

net_add n1
sim_add hv1
as hv1
check ovs-vsctl add-br br-phys
ovn_attach n1 br-phys 192.168.0.11 24 geneve,vxlan,stt

check ovs-vsctl set open . external_ids:ovn-is-interconn=true

check ovn-sbctl chassis-add hv2 geneve 192.168.0.12 \
-- set chassis hv2 other_config:is-remote=true

check ovn-sbctl chassis-add hv3 stt 192.168.0.13 \
-- set chassis hv3 other_config:is-remote=true

check ovn-sbctl chassis-add hv4 vxlan 192.168.0.14 \
-- set chassis hv4 other_config:is-remote=true

check ovn-nbctl --wait=hv sync

chassis_cookie() {
name=$1
fetch_column chassis _uuid name=$name |\
cut -d '-' -f 1 | tr -d '\n' | sed 's/^0\{0,8\}//'
}

ovs-ofctl dump-flows --names --no-stats br-int table=OFTABLE_PHY_TO_LOG > phy_to_log_flows
ovs-ofctl dump-flows --names --no-stats br-int table=OFTABLE_FLOOD_REMOTE_CHASSIS > flood_flows

# Check that we have all encap + output actions one by one because the order can change
# Geneve
AT_CHECK([grep -c 'move:OXM_OF_METADATA\[[0..23\]]->NXM_NX_TUN_ID\[[0..23\]],set_field:0->tun_metadata0,move:NXM_NX_REG14\[[0..14\]]->NXM_NX_TUN_METADATA0\[[16..30\]],output:"ovn-hv2-0"' flood_flows], [0], [dnl
1
])

# STT
AT_CHECK([grep -c 'move:NXM_NX_REG14\[[0..14\]]->NXM_NX_TUN_ID\[[40..54\]],load:0->NXM_NX_TUN_ID\[[24..39\]],move:OXM_OF_METADATA\[[0..23\]]->NXM_NX_TUN_ID\[[0..23\]],output:"ovn-hv3-0"' flood_flows], [0], [dnl
1
])

# VXLAN
AT_CHECK([grep -c 'move:NXM_NX_REG14\[[0..11\]]->NXM_NX_TUN_ID\[[12..23\]],move:OXM_OF_METADATA\[[0..11\]]->NXM_NX_TUN_ID\[[0..11\]],output:"ovn-hv4-0"' flood_flows], [0], [dnl
1
])

AT_CHECK([grep -c "reg6=0x8000" flood_flows], [0], [dnl
1
])

# Check ingress flows for ARP and ND NA
# Geneve
hv2_cookie="0x$(chassis_cookie hv2)"
AT_CHECK_UNQUOTED([grep "cookie=$hv2_cookie," phy_to_log_flows], [0], [dnl
cookie=$hv2_cookie, priority=120,arp,tun_metadata0=0,in_port="ovn-hv2-0",arp_op=2 actions=move:NXM_NX_TUN_ID[[0..23]]->OXM_OF_METADATA[[0..23]],move:NXM_NX_TUN_METADATA0[[16..30]]->NXM_NX_REG14[[0..14]],move:NXM_NX_TUN_METADATA0[[0..15]]->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
cookie=$hv2_cookie, priority=120,icmp6,tun_metadata0=0,in_port="ovn-hv2-0",icmp_type=136,icmp_code=0 actions=move:NXM_NX_TUN_ID[[0..23]]->OXM_OF_METADATA[[0..23]],move:NXM_NX_TUN_METADATA0[[16..30]]->NXM_NX_REG14[[0..14]],move:NXM_NX_TUN_METADATA0[[0..15]]->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
])

# STT
hv3_cookie="0x$(chassis_cookie hv3)"
AT_CHECK_UNQUOTED([grep "cookie=$hv3_cookie," phy_to_log_flows], [0], [dnl
cookie=$hv3_cookie, priority=120,icmp6,tun_id=0/0xffff000000,in_port="ovn-hv3-0",icmp_type=136,icmp_code=0 actions=move:NXM_NX_TUN_ID[[40..54]]->NXM_NX_REG14[[0..14]],move:NXM_NX_TUN_ID[[24..39]]->NXM_NX_REG15[[0..15]],move:NXM_NX_TUN_ID[[0..23]]->OXM_OF_METADATA[[0..23]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
cookie=$hv3_cookie, priority=120,arp,tun_id=0/0xffff000000,in_port="ovn-hv3-0",arp_op=2 actions=move:NXM_NX_TUN_ID[[40..54]]->NXM_NX_REG14[[0..14]],move:NXM_NX_TUN_ID[[24..39]]->NXM_NX_REG15[[0..15]],move:NXM_NX_TUN_ID[[0..23]]->OXM_OF_METADATA[[0..23]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
])

# VXLAN
hv4_cookie="0x$(chassis_cookie hv4)"
AT_CHECK_UNQUOTED([grep "cookie=$hv4_cookie," phy_to_log_flows], [0], [dnl
cookie=$hv4_cookie, priority=120,icmp6,in_port="ovn-hv4-0",icmp_type=136,icmp_code=0 actions=move:NXM_NX_TUN_ID[[12..23]]->NXM_NX_REG15[[0..11]],move:NXM_NX_TUN_ID[[0..11]]->OXM_OF_METADATA[[0..11]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE),resubmit(,OFTABLE_LOCAL_OUTPUT)
cookie=$hv4_cookie, priority=120,arp,in_port="ovn-hv4-0",arp_op=2 actions=move:NXM_NX_TUN_ID[[12..23]]->NXM_NX_REG15[[0..11]],move:NXM_NX_TUN_ID[[0..11]]->OXM_OF_METADATA[[0..11]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE),resubmit(,OFTABLE_LOCAL_OUTPUT)
])

OVN_CLEANUP([hv1])
AT_CLEANUP
Loading

0 comments on commit 29deafd

Please sign in to comment.