Skip to content

Commit

Permalink
northd, controller: Flood ARP and NA packet on transit router.
Browse files Browse the repository at this point in the history
When packets goes between AZs through transit router for the first
time there isn't any MAC binding for the remote port equivalent. The
TR will properly generate ARP/ND NS packet that will arrive to the
remote AZ, however the response would never leave the remote AZ as a
consequence the local AZ would never learn this MAC binding.

To prevent the described behavior add a new table that will contain
all remote chassis and corresponding encapsulations that allow us
to just flood all chassis with any packet that will be sent to this
table. At the same time add a new action that sends the packet to this
table.

In order to properly generate MAC binding we need to redirect the ARP
into ingress instead of egress as usual for reception from tunnels.
Add flows that will match on ARP and ND NA with combination of 0
outport which should indicate that this is the remote flood flow.
Only exception is VXLAN which doesn't have enough space for outport
encoding, in that case we need to send the packet to both ingress
and egress as we cannot determine if it was part of the remote flood
or regular packet that arrived from another chassis in the same AZ.

Signed-off-by: Ales Musil <[email protected]>
  • Loading branch information
almusil committed Dec 2, 2024
1 parent 7dbae80 commit f583761
Show file tree
Hide file tree
Showing 13 changed files with 498 additions and 24 deletions.
1 change: 1 addition & 0 deletions controller/lflow.c
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,7 @@ add_matches_to_flow_table(const struct sbrec_logical_flow *lflow,
.ct_nw_dst_load_table = OFTABLE_CT_ORIG_NW_DST_LOAD,
.ct_ip6_dst_load_table = OFTABLE_CT_ORIG_IP6_DST_LOAD,
.ct_tp_dst_load_table = OFTABLE_CT_ORIG_TP_DST_LOAD,
.flood_remote_table = OFTABLE_FLOOD_REMOTE_CHASSIS,
.ctrl_meter_id = ctrl_meter_id,
.common_nat_ct_zone = get_common_nat_zone(ldp),
};
Expand Down
4 changes: 4 additions & 0 deletions controller/lflow.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ struct uuid;
#define OFTABLE_CT_ORIG_NW_DST_LOAD 81
#define OFTABLE_CT_ORIG_IP6_DST_LOAD 82
#define OFTABLE_CT_ORIG_TP_DST_LOAD 83
#define OFTABLE_FLOOD_REMOTE_CHASSIS 84

/* Common defines shared between some controller components. */
#define CHASSIS_FLOOD_INDEX_START 0x8000


struct lflow_ctx_in {
Expand Down
188 changes: 170 additions & 18 deletions controller/physical.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,73 @@ put_encapsulation(enum mf_field_id mff_ovn_geneve,
}
}

static void
put_decapsulation(enum mf_field_id mff_ovn_geneve,
const struct chassis_tunnel *tun,
struct ofpbuf *ofpacts)
{
if (tun->type == GENEVE) {
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, ofpacts);
put_move(mff_ovn_geneve, 16, MFF_LOG_INPORT, 0, 15, ofpacts);
put_move(mff_ovn_geneve, 0, MFF_LOG_OUTPORT, 0, 16, ofpacts);
} else if (tun->type == STT) {
put_move(MFF_TUN_ID, 40, MFF_LOG_INPORT, 0, 15, ofpacts);
put_move(MFF_TUN_ID, 24, MFF_LOG_OUTPORT, 0, 16, ofpacts);
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, ofpacts);
} else if (tun->type == VXLAN) {
/* Add flows for non-VTEP tunnels. Split VNI into two 12-bit
* sections and use them for datapath and outport IDs. */
put_move(MFF_TUN_ID, 12, MFF_LOG_OUTPORT, 0, 12, ofpacts);
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 12, ofpacts);
} else {
OVS_NOT_REACHED();
}
}


static void
put_remote_chassis_flood_encap(struct ofpbuf *ofpacts,
enum chassis_tunnel_type type,
enum mf_field_id mff_ovn_geneve)
{
if (type == GENEVE) {
put_move(MFF_LOG_DATAPATH, 0, MFF_TUN_ID, 0, 24, ofpacts);
put_load(0, mff_ovn_geneve, 0, 32, ofpacts);
put_move(MFF_LOG_INPORT, 0, mff_ovn_geneve, 16, 15, ofpacts);
} else if (type == STT) {
put_move(MFF_LOG_INPORT, 0, MFF_TUN_ID, 40, 15, ofpacts);
put_load(0, MFF_TUN_ID, 24, 16, ofpacts);
put_move(MFF_LOG_DATAPATH, 0, MFF_TUN_ID, 0, 24, ofpacts);
} else if (type == VXLAN) {
put_move(MFF_LOG_INPORT, 0, MFF_TUN_ID, 12, 12, ofpacts);
put_move(MFF_LOG_DATAPATH, 0, MFF_TUN_ID, 0, 12, ofpacts);
} else {
OVS_NOT_REACHED();
}
}

static void
match_set_chassis_flood_outport(struct match *match,
enum chassis_tunnel_type type,
enum mf_field_id mff_ovn_geneve)
{
if (type == GENEVE) {
/* Outport occupies the lower half of tunnel metadata (0-15). */
union mf_value value, mask;
memset(&value, 0, sizeof value);
memset(&mask, 0, sizeof mask);

const struct mf_field *mf_ovn_geneve = mf_from_id(mff_ovn_geneve);
memset(&mask.tun_metadata[mf_ovn_geneve->n_bytes - 2], 0xff, 2);

tun_metadata_set_match(mf_ovn_geneve, &value, &mask, match, NULL);
} else if (type == STT) {
/* Outport occupies bits 24-39. */
match_set_tun_id_masked(match, 0, htonll(UINT64_C(0xffff) << 24));
}
}


static void
put_stack(enum mf_field_id field, struct ofpact_stack *stack)
{
Expand Down Expand Up @@ -2349,6 +2416,106 @@ consider_mc_group(const struct physical_ctx *ctx,
sset_destroy(&vtep_chassis);
}

#define CHASSIS_FLOOD_MAX_MSG_SIZE MC_OFPACTS_MAX_MSG_SIZE

static void
physical_eval_remote_chassis_flows(const struct physical_ctx *ctx,
struct ofpbuf *egress_ofpacts,
struct ovn_desired_flow_table *flow_table)
{
struct match match = MATCH_CATCHALL_INITIALIZER;
uint32_t index = CHASSIS_FLOOD_INDEX_START;
struct chassis_tunnel *prev = NULL;

uint8_t actions_stub[256];
struct ofpbuf ingress_ofpacts;
ofpbuf_use_stub(&ingress_ofpacts, actions_stub, sizeof(actions_stub));

ofpbuf_clear(egress_ofpacts);

const struct sbrec_chassis *chassis;
SBREC_CHASSIS_TABLE_FOR_EACH (chassis, ctx->chassis_table) {
if (!smap_get_bool(&chassis->other_config, "is-remote", false)) {
continue;
}

struct chassis_tunnel *tun =
chassis_tunnel_find(ctx->chassis_tunnels, chassis->name,
NULL, NULL);
if (!tun) {
continue;
}

if (!(prev && prev->type == tun->type)) {
put_remote_chassis_flood_encap(egress_ofpacts, tun->type,
ctx->mff_ovn_geneve);
}

ofpact_put_OUTPUT(egress_ofpacts)->port = tun->ofport;
prev = tun;

if (egress_ofpacts->size > CHASSIS_FLOOD_MAX_MSG_SIZE) {
match_init_catchall(&match);
match_set_reg(&match, MFF_REG6 - MFF_REG0, index++);

put_split_buf_function(index, 0, OFTABLE_FLOOD_REMOTE_CHASSIS,
egress_ofpacts);

ofctrl_add_flow(flow_table, OFTABLE_FLOOD_REMOTE_CHASSIS, 100, 0,
&match, egress_ofpacts, hc_uuid);

ofpbuf_clear(egress_ofpacts);
prev = NULL;
}


ofpbuf_clear(&ingress_ofpacts);
put_decapsulation(ctx->mff_ovn_geneve, tun, &ingress_ofpacts);
put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ingress_ofpacts);
if (tun->type == VXLAN) {
/* VXLAN doesn't carry the inport information, we cannot set
* the outport to 0 then and match on it. */
put_resubmit(OFTABLE_LOCAL_OUTPUT, &ingress_ofpacts);
}

/* Add match on ARP response coming from remote chassis. */
match_init_catchall(&match);
match_set_in_port(&match, tun->ofport);
match_set_dl_type(&match, htons(ETH_TYPE_ARP));
match_set_arp_opcode_masked(&match, 2, UINT8_MAX);
match_set_chassis_flood_outport(&match, tun->type,
ctx->mff_ovn_geneve);

ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120,
chassis->header_.uuid.parts[0],
&match, &ingress_ofpacts, hc_uuid);

/* Add match on ND NA coming from remote chassis. */
match_init_catchall(&match);
match_set_in_port(&match, tun->ofport);
match_set_dl_type(&match, htons(ETH_TYPE_IPV6));
match_set_nw_proto(&match, IPPROTO_ICMPV6);
match_set_icmp_type(&match, 136);
match_set_icmp_code(&match, 0);
match_set_chassis_flood_outport(&match, tun->type,
ctx->mff_ovn_geneve);

ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120,
chassis->header_.uuid.parts[0],
&match, &ingress_ofpacts, hc_uuid);
}

if (egress_ofpacts->size > 0) {
match_init_catchall(&match);
match_set_reg(&match, MFF_REG6 - MFF_REG0, index);

ofctrl_add_flow(flow_table, OFTABLE_FLOOD_REMOTE_CHASSIS, 100, 0,
&match, egress_ofpacts, hc_uuid);
}

ofpbuf_uninit(&ingress_ofpacts);
}

static void
physical_eval_port_binding(struct physical_ctx *p_ctx,
const struct sbrec_port_binding *pb,
Expand Down Expand Up @@ -2504,24 +2671,7 @@ physical_run(struct physical_ctx *p_ctx,
match_set_in_port(&match, tun->ofport);

ofpbuf_clear(&ofpacts);
if (tun->type == GENEVE) {
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts);
put_move(p_ctx->mff_ovn_geneve, 16, MFF_LOG_INPORT, 0, 15,
&ofpacts);
put_move(p_ctx->mff_ovn_geneve, 0, MFF_LOG_OUTPORT, 0, 16,
&ofpacts);
} else if (tun->type == STT) {
put_move(MFF_TUN_ID, 40, MFF_LOG_INPORT, 0, 15, &ofpacts);
put_move(MFF_TUN_ID, 24, MFF_LOG_OUTPORT, 0, 16, &ofpacts);
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts);
} else if (tun->type == VXLAN) {
/* Add flows for non-VTEP tunnels. Split VNI into two 12-bit
* sections and use them for datapath and outport IDs. */
put_move(MFF_TUN_ID, 12, MFF_LOG_OUTPORT, 0, 12, &ofpacts);
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 12, &ofpacts);
} else {
OVS_NOT_REACHED();
}
put_decapsulation(p_ctx->mff_ovn_geneve, tun, &ofpacts);

put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, 0, &match,
Expand Down Expand Up @@ -2773,5 +2923,7 @@ physical_run(struct physical_ctx *p_ctx,
ofctrl_add_flow(flow_table, OFTABLE_CT_ORIG_IP6_DST_LOAD, 100, 0, &match,
&ofpacts, hc_uuid);

physical_eval_remote_chassis_flows(p_ctx, &ofpacts, flow_table);

ofpbuf_uninit(&ofpacts);
}
3 changes: 3 additions & 0 deletions include/ovn/actions.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ struct collector_set_ids;
OVNACT(CT_ORIG_NW_DST, ovnact_result) \
OVNACT(CT_ORIG_IP6_DST, ovnact_result) \
OVNACT(CT_ORIG_TP_DST, ovnact_result) \
OVNACT(FLOOD_REMOTE, ovnact_null) \

/* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
enum OVS_PACKED_ENUM ovnact_type {
Expand Down Expand Up @@ -945,6 +946,8 @@ struct ovnact_encode_params {
* to resubmit. */
uint32_t ct_tp_dst_load_table; /* OpenFlow table for 'ct_tp_dst'
* to resubmit. */
uint32_t flood_remote_table; /* OpenFlow table for 'chassis_flood'
* to resubmit. */
};

void ovnacts_encode(const struct ovnact[], size_t ovnacts_len,
Expand Down
17 changes: 17 additions & 0 deletions lib/actions.c
Original file line number Diff line number Diff line change
Expand Up @@ -5531,6 +5531,21 @@ format_CT_ORIG_TP_DST(const struct ovnact_result *res, struct ds *s)
ds_put_cstr(s, " = ct_tp_dst();");
}

static void
format_FLOOD_REMOTE(const struct ovnact_null *null OVS_UNUSED, struct ds *s)
{
ds_put_cstr(s, "flood_remote;");
}

static void
encode_FLOOD_REMOTE(const struct ovnact_null *null OVS_UNUSED,
const struct ovnact_encode_params *ep,
struct ofpbuf *ofpacts)
{
put_load(CHASSIS_FLOOD_INDEX_START, MFF_REG6, 0, 32, ofpacts);
emit_resubmit(ofpacts, ep->flood_remote_table);
}

/* Parses an assignment or exchange or put_dhcp_opts action. */
static void
parse_set_action(struct action_context *ctx)
Expand Down Expand Up @@ -5758,6 +5773,8 @@ parse_action(struct action_context *ctx)
parse_sample(ctx);
} else if (lexer_match_id(ctx->lexer, "mac_cache_use")) {
ovnact_put_MAC_CACHE_USE(ctx->ovnacts);
} else if (lexer_match_id(ctx->lexer, "flood_remote")) {
ovnact_put_FLOOD_REMOTE(ctx->ovnacts);
} else {
lexer_syntax_error(ctx->lexer, "expecting action");
}
Expand Down
12 changes: 7 additions & 5 deletions northd/northd.c
Original file line number Diff line number Diff line change
Expand Up @@ -13398,21 +13398,22 @@ build_neigh_learning_flows_for_lrouter(
* */

/* Flows for LOOKUP_NEIGHBOR. */
const char *flood = od->is_transit_router ? "flood_remote; " : "";
bool learn_from_arp_request = smap_get_bool(&od->nbr->options,
"always_learn_from_arp_request", true);
ds_clear(actions);
ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
" = lookup_arp(inport, arp.spa, arp.sha); %snext;",
" = lookup_arp(inport, arp.spa, arp.sha); %s%snext;",
learn_from_arp_request ? "" :
REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ", flood);
ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
"arp.op == 2", ds_cstr(actions), lflow_ref);

ds_clear(actions);
ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
" = lookup_nd(inport, nd.target, nd.tll); %snext;",
" = lookup_nd(inport, nd.target, nd.tll); %s%snext;",
learn_from_arp_request ? "" :
REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ", flood);
ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_na",
ds_cstr(actions), lflow_ref);

Expand All @@ -13428,7 +13429,8 @@ build_neigh_learning_flows_for_lrouter(
ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
" = lookup_nd(inport, nd.target, nd.tll); "
REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
" = lookup_nd_ip(inport, nd.target); next;");
" = lookup_nd_ip(inport, nd.target); %snext;",
flood);
ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 110,
"nd_na && ip6.src == fe80::/10 && ip6.dst == ff00::/8",
ds_cstr(actions), lflow_ref);
Expand Down
47 changes: 47 additions & 0 deletions tests/multinode-macros.at
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,53 @@ cleanup_multinode_resources_by_nodes() {
done
}

# multinode_cleanup_northd NODE
#
# Removes previously set nothd on specified node
multinode_cleanup_northd() {
c=$1
# Cleanup existing one
m_as $c /usr/share/ovn/scripts/ovn-ctl stop_northd
m_as $c rm -f /etc/ovn/*.db
}

# multinode_setup_northd NODE
#
# Sets up northd on specified node.
multinode_setup_northd() {
c=$1

multinode_cleanup_northd $c

m_as $c /usr/share/ovn/scripts/ovn-ctl start_northd
m_as $c ovn-nbctl set-connection ptcp:6641
m_as $c ovn-sbctl set-connection ptcp:6642
}

# multinode_setup_controller NODE ENCAP_IP REMOTE_IP [ENCAP_TYPE]
#
# Sets up controller on specified node.
multinode_setup_controller() {
c=$1
encap_ip=$2
remote_ip=$3
encap_type=${4:-"geneve"}

# Cleanup existing one
m_as $c /usr/share/openvswitch/scripts/ovs-ctl stop
m_as $c /usr/share/ovn/scripts/ovn-ctl stop_controller

m_as $c /usr/share/openvswitch/scripts/ovs-ctl start --system-id=$id
m_as $c /usr/share/ovn/scripts/ovn-ctl start_controller

m_as $c ovs-vsctl set open . external_ids:ovn-encap-ip=$encap_ip
m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=$encap_type
m_as $c ovs-vsctl set open . external-ids:ovn-remote=tcp:$remote_ip:6642
m_as $c ovs-vsctl set open . external-ids:ovn-openflow-probe-interval=60
m_as $c ovs-vsctl set open . external-ids:ovn-remote-probe-interval=180000
m_as $c ovs-vsctl set open . external-ids:ovn-bridge-datapath-type=system
}

# m_count_rows TABLE [CONDITION...]
#
# Prints the number of rows in TABLE (that satisfy CONDITION).
Expand Down
Loading

0 comments on commit f583761

Please sign in to comment.