From 3b85472adf15ce16fb2d43b71e219cc1f5750806 Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Tue, 3 Dec 2024 10:55:09 -0500 Subject: [PATCH] prov/lnx: Convert peer table to use buffer pools Convert peer table to use buffer pools in order to utilize the built-in capabilities of expanding the table as more peers are added dynamically. The peer table is protected by the domain's genlock. Signed-off-by: Amir Shehata --- prov/lnx/include/lnx.h | 19 ++---- prov/lnx/src/lnx_av.c | 142 +++++++++++++++-------------------------- prov/lnx/src/lnx_ops.c | 69 ++++++++++++-------- 3 files changed, 97 insertions(+), 133 deletions(-) diff --git a/prov/lnx/include/lnx.h b/prov/lnx/include/lnx.h index 450324d5d92..e6ed95f2efa 100644 --- a/prov/lnx/include/lnx.h +++ b/prov/lnx/include/lnx.h @@ -33,7 +33,6 @@ #ifndef LNX_H #define LNX_H -#define LNX_DEF_AV_SIZE 1024 #define LNX_MAX_LOCAL_EPS 16 #define LNX_IOV_LIMIT 4 @@ -180,6 +179,7 @@ struct lnx_peer_prov { struct lnx_peer { /* true if peer can be reached over shared memory, false otherwise */ bool lp_local; + fi_addr_t lp_fi_addr; /* Each provider that we can reach the peer on will have an entry * below. Each entry will contain all the local provider endpoints we @@ -200,10 +200,9 @@ struct lnx_peer { struct lnx_peer_table { struct util_av lpt_av; int lpt_max_count; - int lpt_count; struct lnx_domain *lpt_domain; - /* an array of peer entries */ - struct lnx_peer **lpt_entries; + /* an array of peer entries of type struct lnx_peer */ + struct ofi_bufpool *lpt_entries; }; struct lnx_ctx { @@ -293,6 +292,9 @@ int lnx_domain_open(struct fid_fabric *fabric, struct fi_info *info, int lnx_av_open(struct fid_domain *domain, struct fi_av_attr *attr, struct fid_av **av, void *context); +struct lnx_peer * +lnx_av_lookup_addr(struct lnx_peer_table *peer_tbl, fi_addr_t addr); + int lnx_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, struct fid_cq **cq, void *context); @@ -314,15 +316,6 @@ void lnx_free_entry(struct fi_peer_rx_entry *entry); void lnx_foreach_unspec_addr(struct fid_peer_srx *srx, fi_addr_t (*get_addr)(struct fi_peer_rx_entry *)); -static inline struct lnx_peer * -lnx_get_peer(struct lnx_peer **peers, fi_addr_t addr) -{ - if (!peers || addr == FI_ADDR_UNSPEC) - return NULL; - - return peers[addr]; -} - static inline void lnx_get_core_desc(struct lnx_mem_desc *desc, void **mem_desc) { diff --git a/prov/lnx/src/lnx_av.c b/prov/lnx/src/lnx_av.c index f0b8d09fb86..60a26f1ea28 100644 --- a/prov/lnx/src/lnx_av.c +++ b/prov/lnx/src/lnx_av.c @@ -58,76 +58,25 @@ #include "rdma/fi_ext.h" #include "lnx.h" -static void lnx_free_peer(struct lnx_peer *lp) +struct lnx_peer * +lnx_av_lookup_addr(struct lnx_peer_table *peer_tbl, fi_addr_t addr) { - struct lnx_peer_prov *lpp; - struct dlist_entry *tmp, *tmp2; - struct lnx_local2peer_map *lpm; + struct lnx_peer *entry; - dlist_foreach_container_safe(&lp->lp_provs, - struct lnx_peer_prov, lpp, entry, tmp) { - dlist_foreach_container_safe(&lpp->lpp_map, - struct lnx_local2peer_map, lpm, entry, tmp2) { - dlist_remove(&lpm->entry); - free(lpm); - } - dlist_remove(&lpp->entry); - free(lpp); - } + if (addr == FI_ADDR_UNSPEC) + return NULL; - free(lp); -} - -#if ENABLE_DEBUG -static void lnx_print_peer(int idx, struct lnx_peer *lp) -{ - int k; - struct lnx_peer_prov *lpp; - struct lnx_local2peer_map *lpm; + ofi_genlock_lock(&peer_tbl->lpt_domain->ld_domain.lock); - FI_DBG(&lnx_prov, FI_LOG_CORE, - "%d: lnx_peer[%d] is %s\n", getpid(), idx, - (lp->lp_local) ? "local" : "remote"); - dlist_foreach_container(&lp->lp_provs, - struct lnx_peer_prov, lpp, entry) { - FI_DBG(&lnx_prov, FI_LOG_CORE, - "%d: peer[%p] provider %s\n", getpid(), lpp, - lpp->lpp_prov_name); - dlist_foreach_container(&lpp->lpp_map, - struct lnx_local2peer_map, lpm, entry) { - FI_DBG(&lnx_prov, FI_LOG_CORE, - " %d: peer has %d mapped addrs\n", - getpid(), lpm->addr_count); - for (k = 0; k < lpm->addr_count; k++) - FI_DBG(&lnx_prov, FI_LOG_CORE, - " %d: addr = %lu\n", - getpid(), lpm->peer_addrs[k]); - } - } -} -#endif /* ENABLE_DEBUG */ + entry = ofi_bufpool_get_ibuf(peer_tbl->lpt_entries, addr); -static int lnx_peer_insert(struct lnx_peer_table *tbl, - struct lnx_peer *lp) -{ - int i; + ofi_genlock_unlock(&peer_tbl->lpt_domain->ld_domain.lock); - if (tbl->lpt_max_count == 0 || - tbl->lpt_count >= tbl->lpt_max_count) - return -FI_ENOENT; - - for (i = 0; i < tbl->lpt_max_count; i++) { - if (!tbl->lpt_entries[i]) { - tbl->lpt_entries[i] = lp; -#if ENABLE_DEBUG - lnx_print_peer(i, lp); -#endif - tbl->lpt_count++; - return i; - } - } + if (!entry) + FI_WARN(&lnx_prov, FI_LOG_CORE, + "Invalid fi_addr %#lx\n", addr); - return -FI_ENOENT; + return entry; } static int lnx_peer_av_remove(struct lnx_peer *lp) @@ -160,19 +109,22 @@ static int lnx_peer_av_remove(struct lnx_peer *lp) return frc; } -static int lnx_peer_remove(struct lnx_peer_table *tbl, int idx) +static int lnx_peer_remove(struct lnx_peer_table *tbl, fi_addr_t addr) { - struct lnx_peer *lp = tbl->lpt_entries[idx]; + struct lnx_peer *lp = NULL; int rc = 0; + ofi_genlock_lock(&tbl->lpt_domain->ld_domain.lock); + lp = ofi_bufpool_get_ibuf(tbl->lpt_entries, addr); if (!lp) - return 0; + goto out; rc = lnx_peer_av_remove(lp); - tbl->lpt_entries[idx] = NULL; - tbl->lpt_count--; + ofi_ibuf_free(lp); +out: + ofi_genlock_unlock(&tbl->lpt_domain->ld_domain.lock); return rc; } @@ -193,7 +145,7 @@ static int lnx_cleanup_avs(struct local_prov *prov) static inline void lnx_free_peer_tbl(struct lnx_peer_table *peer_tbl) { - free(peer_tbl->lpt_entries); + ofi_bufpool_destroy(peer_tbl->lpt_entries); free(peer_tbl); } @@ -501,10 +453,14 @@ int lnx_av_insert(struct fid_av *av, const void *addr, size_t count, la->la_prov_count <= 0) return -FI_EPROTO; - /* this is a local peer */ - lp = calloc(sizeof(*lp), 1); - if (!lp) + ofi_genlock_lock(&peer_tbl->lpt_domain->ld_domain.lock); + lp = ofi_ibuf_alloc(peer_tbl->lpt_entries); + if (!lp) { + ofi_genlock_unlock(&peer_tbl->lpt_domain->ld_domain.lock); return -FI_ENOMEM; + } + idx = ofi_buf_index(lp); + ofi_genlock_unlock(&peer_tbl->lpt_domain->ld_domain.lock); dlist_init(&lp->lp_provs); @@ -521,20 +477,18 @@ int lnx_av_insert(struct fid_av *av, const void *addr, size_t count, rc = lnx_peer_map_addrs(prov_table, lp, la, flags, context); if (rc) { - free(lp); + ofi_genlock_lock(&peer_tbl->lpt_domain->ld_domain.lock); + ofi_ibuf_free(lp); + ofi_genlock_unlock(&peer_tbl->lpt_domain->ld_domain.lock); return rc; } - idx = lnx_peer_insert(peer_tbl, lp); - if (idx == -1) { - rc = lnx_peer_av_remove(lp); - lnx_free_peer(lp); - FI_INFO(&lnx_prov, FI_LOG_CORE, - "Peer table size exceeded. Removed = %d\n", rc); - return -FI_ENOENT; - } + if (flags & FI_AV_USER_ID) + lp->lp_fi_addr = fi_addr[i]; + else + lp->lp_fi_addr = idx; - fi_addr[i] = (fi_addr_t) idx; + fi_addr[i] = idx; la = next_peer(la); } @@ -622,8 +576,12 @@ int lnx_av_open(struct fid_domain *domain, struct fi_av_attr *attr, struct lnx_domain *lnx_domain; struct lnx_peer_table *peer_tbl; struct local_prov *entry; - size_t table_sz = LNX_DEF_AV_SIZE; + size_t table_sz; int rc = 0; + struct ofi_bufpool_attr pool_attr = { + .size = sizeof(struct lnx_peer), + .flags = OFI_BUFPOOL_NO_TRACK | OFI_BUFPOOL_INDEXED, + }; if (!attr) return -FI_EINVAL; @@ -634,24 +592,24 @@ int lnx_av_open(struct fid_domain *domain, struct fi_av_attr *attr, if (attr->type != FI_AV_TABLE) attr->type = FI_AV_TABLE; + lnx_domain = container_of(domain, struct lnx_domain, + ld_domain.domain_fid.fid); + fabric = lnx_domain->ld_fabric; + peer_tbl = calloc(sizeof(*peer_tbl), 1); if (!peer_tbl) return -FI_ENOMEM; - if (attr->count != 0) - table_sz = attr->count; + table_sz = attr->count ? attr->count : ofi_universe_size; + table_sz = roundup_power_of_two(table_sz); + pool_attr.chunk_cnt = table_sz; - peer_tbl->lpt_entries = - calloc(sizeof(struct lnx_peer *) * table_sz, 1); - if (!peer_tbl->lpt_entries) { + rc = ofi_bufpool_create_attr(&pool_attr, &peer_tbl->lpt_entries); + if (rc) { rc = -FI_ENOMEM; goto failed; } - lnx_domain = container_of(domain, struct lnx_domain, - ld_domain.domain_fid.fid); - fabric = lnx_domain->ld_fabric; - rc = ofi_av_init_lightweight(&lnx_domain->ld_domain, attr, &peer_tbl->lpt_av, context); if (rc) { diff --git a/prov/lnx/src/lnx_ops.c b/prov/lnx/src/lnx_ops.c index 7d94b7c9352..2c6b725c0ac 100644 --- a/prov/lnx/src/lnx_ops.c +++ b/prov/lnx/src/lnx_ops.c @@ -416,7 +416,7 @@ ssize_t lnx_trecv(struct fid_ep *ep, void *buf, size_t len, void *desc, * multiple endpoints. Each endpoint has its own fi_addr_t which is * core provider specific. */ - lp = lnx_get_peer(peer_tbl->lpt_entries, src_addr); + lp = lnx_av_lookup_addr(peer_tbl, src_addr); if (lp) { rc = lnx_select_recv_pathway(lp, lep->le_domain, desc, &cep, &core_addr, &iov, 1, &mre, &mem_desc); @@ -464,7 +464,7 @@ ssize_t lnx_trecvv(struct fid_ep *ep, const struct iovec *iov, void **desc, peer_tbl = lep->le_peer_tbl; lnx_get_core_desc(*desc, &mem_desc); - lp = lnx_get_peer(peer_tbl->lpt_entries, src_addr); + lp = lnx_av_lookup_addr(peer_tbl, src_addr); if (lp) { rc = lnx_select_recv_pathway(lp, lep->le_domain, *desc, &cep, &core_addr, iov, count, &mre, &mem_desc); @@ -509,7 +509,7 @@ ssize_t lnx_trecvmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg, peer_tbl = lep->le_peer_tbl; - lp = lnx_get_peer(peer_tbl->lpt_entries, msg->addr); + lp = lnx_av_lookup_addr(peer_tbl, msg->addr); if (lp) { rc = lnx_select_recv_pathway(lp, lep->le_domain, *msg->desc, &cep, &core_addr, msg->msg_iov, @@ -549,6 +549,7 @@ ssize_t lnx_tsend(struct fid_ep *ep, const void *buf, size_t len, void *desc, { int rc; struct lnx_ep *lep; + struct lnx_peer *lp; struct local_prov_ep *cep; fi_addr_t core_addr; struct lnx_peer_table *peer_tbl; @@ -562,8 +563,8 @@ ssize_t lnx_tsend(struct fid_ep *ep, const void *buf, size_t len, void *desc, peer_tbl = lep->le_peer_tbl; - rc = lnx_select_send_pathway(peer_tbl->lpt_entries[dest_addr], - lep->le_domain, desc, &cep, + lp = lnx_av_lookup_addr(peer_tbl, dest_addr); + rc = lnx_select_send_pathway(lp, lep->le_domain, desc, &cep, &core_addr, &iov, 1, &mre, &mem_desc, NULL); if (rc) return rc; @@ -585,6 +586,7 @@ ssize_t lnx_tsendv(struct fid_ep *ep, const struct iovec *iov, void **desc, { int rc; struct lnx_ep *lep; + struct lnx_peer *lp; struct local_prov_ep *cep; fi_addr_t core_addr; struct lnx_peer_table *peer_tbl; @@ -597,8 +599,8 @@ ssize_t lnx_tsendv(struct fid_ep *ep, const struct iovec *iov, void **desc, peer_tbl = lep->le_peer_tbl; - rc = lnx_select_send_pathway(peer_tbl->lpt_entries[dest_addr], - lep->le_domain, (desc) ? *desc : NULL, &cep, + lp = lnx_av_lookup_addr(peer_tbl, dest_addr); + rc = lnx_select_send_pathway(lp, lep->le_domain, (desc) ? *desc : NULL, &cep, &core_addr, iov, count, &mre, &mem_desc, NULL); if (rc) return rc; @@ -619,6 +621,7 @@ ssize_t lnx_tsendmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg, { int rc; struct lnx_ep *lep; + struct lnx_peer *lp; struct local_prov_ep *cep; fi_addr_t core_addr; struct lnx_peer_table *peer_tbl; @@ -632,8 +635,8 @@ ssize_t lnx_tsendmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg, peer_tbl = lep->le_peer_tbl; - rc = lnx_select_send_pathway(peer_tbl->lpt_entries[msg->addr], - lep->le_domain, + lp = lnx_av_lookup_addr(peer_tbl, msg->addr); + rc = lnx_select_send_pathway(lp, lep->le_domain, (msg->desc) ? *msg->desc : NULL, &cep, &core_addr, msg->msg_iov, msg->iov_count, &mre, &mem_desc, NULL); @@ -661,6 +664,7 @@ ssize_t lnx_tinject(struct fid_ep *ep, const void *buf, size_t len, { int rc; struct lnx_ep *lep; + struct lnx_peer *lp; struct local_prov_ep *cep; fi_addr_t core_addr; struct lnx_peer_table *peer_tbl; @@ -672,8 +676,8 @@ ssize_t lnx_tinject(struct fid_ep *ep, const void *buf, size_t len, peer_tbl = lep->le_peer_tbl; - rc = lnx_select_send_pathway(peer_tbl->lpt_entries[dest_addr], - lep->le_domain, NULL, &cep, + lp = lnx_av_lookup_addr(peer_tbl, dest_addr); + rc = lnx_select_send_pathway(lp, lep->le_domain, NULL, &cep, &core_addr, NULL, 0, &mre, NULL, NULL); if (rc) return rc; @@ -695,6 +699,7 @@ ssize_t lnx_tsenddata(struct fid_ep *ep, const void *buf, size_t len, void *desc { int rc; struct lnx_ep *lep; + struct lnx_peer *lp; struct local_prov_ep *cep; fi_addr_t core_addr; struct lnx_peer_table *peer_tbl; @@ -708,8 +713,8 @@ ssize_t lnx_tsenddata(struct fid_ep *ep, const void *buf, size_t len, void *desc peer_tbl = lep->le_peer_tbl; - rc = lnx_select_send_pathway(peer_tbl->lpt_entries[dest_addr], - lep->le_domain, desc, &cep, + lp = lnx_av_lookup_addr(peer_tbl, dest_addr); + rc = lnx_select_send_pathway(lp, lep->le_domain, desc, &cep, &core_addr, &iov, 1, &mre, &mem_desc, NULL); if (rc) return rc; @@ -732,6 +737,7 @@ ssize_t lnx_tinjectdata(struct fid_ep *ep, const void *buf, size_t len, { int rc; struct lnx_ep *lep; + struct lnx_peer *lp; struct local_prov_ep *cep; fi_addr_t core_addr; struct lnx_peer_table *peer_tbl; @@ -743,8 +749,8 @@ ssize_t lnx_tinjectdata(struct fid_ep *ep, const void *buf, size_t len, peer_tbl = lep->le_peer_tbl; - rc = lnx_select_send_pathway(peer_tbl->lpt_entries[dest_addr], - lep->le_domain, NULL, &cep, + lp = lnx_av_lookup_addr(peer_tbl, dest_addr); + rc = lnx_select_send_pathway(lp, lep->le_domain, NULL, &cep, &core_addr, NULL, 0, &mre, NULL, NULL); if (rc) return rc; @@ -767,6 +773,7 @@ lnx_rma_read(struct fid_ep *ep, void *buf, size_t len, void *desc, { int rc; struct lnx_ep *lep; + struct lnx_peer *lp; struct fid_ep *core_ep; struct lnx_ctx *ctx; struct local_prov_ep *cep; @@ -783,8 +790,8 @@ lnx_rma_read(struct fid_ep *ep, void *buf, size_t len, void *desc, peer_tbl = lep->le_peer_tbl; - rc = lnx_select_send_pathway(peer_tbl->lpt_entries[src_addr], - lep->le_domain, desc, &cep, + lp = lnx_av_lookup_addr(peer_tbl, src_addr); + rc = lnx_select_send_pathway(lp, lep->le_domain, desc, &cep, &core_addr, &iov, 1, &mre, &mem_desc, &rkey); if (rc) goto out; @@ -810,6 +817,7 @@ lnx_rma_write(struct fid_ep *ep, const void *buf, size_t len, void *desc, { int rc; struct lnx_ep *lep; + struct lnx_peer *lp; struct fid_ep *core_ep; struct lnx_ctx *ctx; struct local_prov_ep *cep; @@ -826,9 +834,9 @@ lnx_rma_write(struct fid_ep *ep, const void *buf, size_t len, void *desc, peer_tbl = lep->le_peer_tbl; - rc = lnx_select_send_pathway(peer_tbl->lpt_entries[dest_addr], - lep->le_domain, desc, &cep, - &core_addr, &iov, 1, &mre, &mem_desc, &rkey); + lp = lnx_av_lookup_addr(peer_tbl, dest_addr); + rc = lnx_select_send_pathway(lp, lep->le_domain, desc, &cep, + &core_addr, &iov, 1, &mre, &mem_desc, &rkey); if (rc) goto out; @@ -856,6 +864,7 @@ lnx_atomic_write(struct fid_ep *ep, { int rc; struct lnx_ep *lep; + struct lnx_peer *lp; struct fid_ep *core_ep; struct lnx_ctx *ctx; struct local_prov_ep *cep; @@ -872,8 +881,8 @@ lnx_atomic_write(struct fid_ep *ep, peer_tbl = lep->le_peer_tbl; - rc = lnx_select_send_pathway(peer_tbl->lpt_entries[dest_addr], - lep->le_domain, desc, &cep, + lp = lnx_av_lookup_addr(peer_tbl, dest_addr); + rc = lnx_select_send_pathway(lp, lep->le_domain, desc, &cep, &core_addr, &iov, 1, &mre, &mem_desc, &rkey); if (rc) goto out; @@ -902,6 +911,7 @@ lnx_atomic_readwrite(struct fid_ep *ep, { int rc; struct lnx_ep *lep; + struct lnx_peer *lp; struct fid_ep *core_ep; struct lnx_ctx *ctx; struct local_prov_ep *cep; @@ -918,9 +928,10 @@ lnx_atomic_readwrite(struct fid_ep *ep, peer_tbl = lep->le_peer_tbl; - rc = lnx_select_send_pathway(peer_tbl->lpt_entries[dest_addr], - lep->le_domain, result_desc, &cep, &core_addr, &iov, 1, - &mre, &mem_desc, &rkey); + lp = lnx_av_lookup_addr(peer_tbl, dest_addr); + rc = lnx_select_send_pathway(lp, lep->le_domain, result_desc, + &cep, &core_addr, &iov, 1, + &mre, &mem_desc, &rkey); if (rc) goto out; @@ -950,6 +961,7 @@ lnx_atomic_compwrite(struct fid_ep *ep, { int rc; struct lnx_ep *lep; + struct lnx_peer *lp; struct fid_ep *core_ep; struct lnx_ctx *ctx; struct local_prov_ep *cep; @@ -966,9 +978,10 @@ lnx_atomic_compwrite(struct fid_ep *ep, peer_tbl = lep->le_peer_tbl; - rc = lnx_select_send_pathway(peer_tbl->lpt_entries[dest_addr], - lep->le_domain, result_desc, &cep, &core_addr, &iov, 1, - &mre, &mem_desc, &rkey); + lp = lnx_av_lookup_addr(peer_tbl, dest_addr); + rc = lnx_select_send_pathway(lp, lep->le_domain, result_desc, &cep, + &core_addr, &iov, 1, + &mre, &mem_desc, &rkey); if (rc) goto out;