From 58bb9598ad6ce33bfbc67858c8298cae63a13984 Mon Sep 17 00:00:00 2001 From: OFIWG Bot Date: Thu, 21 Mar 2024 23:26:13 +0000 Subject: [PATCH] Updated nroff-generated man pages Signed-off-by: OFIWG Bot --- man/man3/fi_endpoint.3 | 4 ++-- man/man3/fi_errno.3 | 35 ++++++++++++++++++++++++++++++++++- man/man7/fi_cxi.7 | 32 +++++++++++++++++++++++++++++++- man/man7/fi_setup.7 | 6 +++++- 4 files changed, 72 insertions(+), 5 deletions(-) diff --git a/man/man3/fi_endpoint.3 b/man/man3/fi_endpoint.3 index 37a3df79cf8..5b890e2223e 100644 --- a/man/man3/fi_endpoint.3 +++ b/man/man3/fi_endpoint.3 @@ -1,6 +1,6 @@ .\" Automatically generated by Pandoc 2.9.2.1 .\" -.TH "fi_endpoint" "3" "2024\-03\-07" "Libfabric Programmer\[cq]s Manual" "#VERSION#" +.TH "fi_endpoint" "3" "2024\-03\-21" "Libfabric Programmer\[cq]s Manual" "#VERSION#" .hy .SH NAME .PP @@ -1779,7 +1779,7 @@ A resource domain was not bound to the endpoint or an attempt was made to bind multiple domains. .TP \f[I]-FI_ENOCQ\f[R] -The endpoint has not been configured with necessary event queue. +The endpoint has not been configured with necessary completion queue. .TP \f[I]-FI_EOPBADSTATE\f[R] The endpoint\[cq]s state does not permit the requested operation. diff --git a/man/man3/fi_errno.3 b/man/man3/fi_errno.3 index 92b8f3ebfc1..2312e84f6ff 100644 --- a/man/man3/fi_errno.3 +++ b/man/man3/fi_errno.3 @@ -1,6 +1,6 @@ .\" Automatically generated by Pandoc 2.9.2.1 .\" -.TH "fi_errno" "3" "2022\-12\-09" "Libfabric Programmer\[cq]s Manual" "#VERSION#" +.TH "fi_errno" "3" "2024\-03\-21" "Libfabric Programmer\[cq]s Manual" "#VERSION#" .hy .SH NAME .PP @@ -57,12 +57,18 @@ No space left on device \f[I]FI_ENOSYS\f[R] Function not implemented .TP +\f[I]FI_EWOULDBLOCK\f[R] +Operation would block +.TP \f[I]FI_ENOMSG\f[R] No message of desired type .TP \f[I]FI_ENODATA\f[R] No data available .TP +\f[I]FI_EOVERFLOW\f[R] +Value too large for defined data type +.TP \f[I]FI_EMSGSIZE\f[R] Message too long .TP @@ -90,6 +96,9 @@ Software caused connection abort \f[I]FI_ECONNRESET\f[R] Connection reset by peer .TP +\f[I]FI_ENOBUFS\f[R] +No buffer space available +.TP \f[I]FI_EISCONN\f[R] Transport endpoint is already connected .TP @@ -105,6 +114,9 @@ Operation timed out \f[I]FI_ECONNREFUSED\f[R] Connection refused .TP +\f[I]FI_EHOSTDOWN\f[R] +Host is down +.TP \f[I]FI_EHOSTUNREACH\f[R] No route to host .TP @@ -149,6 +161,27 @@ Invalid resource domain .TP \f[I]FI_ENOCQ\f[R] Missing or unavailable completion queue +.TP +\f[I]FI_ECRC\f[R] +CRC error +.TP +\f[I]FI_ETRUNC\f[R] +Truncation error +.TP +\f[I]FI_ENOKEY\f[R] +Required key not available +.TP +\f[I]FI_ENOAV\f[R] +Missing or unavailable address vector +.TP +\f[I]FI_EOVERRUN\f[R] +Queue has been overrun +.TP +\f[I]FI_ENORX\f[R] +Receiver not ready, no receive buffers available +.TP +\f[I]FI_ENOMR\f[R] +Memory registration limit exceeded .SH SEE ALSO .PP \f[C]fabric\f[R](7) diff --git a/man/man7/fi_cxi.7 b/man/man7/fi_cxi.7 index 14b838824cd..c0ad9d32a0d 100644 --- a/man/man7/fi_cxi.7 +++ b/man/man7/fi_cxi.7 @@ -1,7 +1,7 @@ .\"t .\" Automatically generated by Pandoc 2.9.2.1 .\" -.TH "fi_cxi" "7" "2024\-02\-01" "Libfabric Programmer\[cq]s Manual" "#VERSION#" +.TH "fi_cxi" "7" "2024\-03\-21" "Libfabric Programmer\[cq]s Manual" "#VERSION#" .hy .SH NAME .PP @@ -269,6 +269,19 @@ application processes. .IP "7." 3 Application processes select from the list of available service IDs and VNIs to form an authorization key to use for Endpoint allocation. +.SS Endpoint Protocols +.PP +The provider supports multiple endpoint protocols. +The default protocol is FI_PROTO_CXI and fully supports the messaging +requirements of parallel applicaitons. +.PP +The FI_PROTO_CXI_RNR endpoint protocol is an optional protocol that +targets client/server environments where send-after-send ordering is not +required and messaging is generally to pre-posted buffers; FI_MULTI_RECV +is recommended. +It utilizes a receiver-not-ready implementation where +\f[I]FI_CXI_RNR_MAX_TIMEOUT_US\f[R] can be tuned to control the maximum +retry duration. .SS Address Vectors .PP The CXI provider supports both \f[I]FI_AV_TABLE\f[R] and @@ -514,6 +527,16 @@ Using Pinned mode avoids any overhead due to network page faults but requires all buffers to be backed by physical memory. Copy-on-write semantics are broken when using pinned memory. See the Fork section for more information. +.PP +The CXI provider supports DMABUF for device memory registration. +If the ROCR and CUDA libraries support it, the CXI provider will default +to use DMA-buf. +There may be situations with CUDA that may double the BAR consumption. +Until this is fixed in the CUDA stack, the environment variable +\f[I]FI_CXI_DISABLE_DMABUF_CUDA\f[R] can be used to fall back to the +nvidia peer-memory interface. +Also, \f[I]FI_CXI_DISABLE_DMABUF_ROCR\f[R] can be used to fall back to +the amdgpu peer-memory interface. .SS Translation Cache .PP Mapping a buffer for use by the NIC is an expensive operation. @@ -1300,6 +1323,13 @@ queue becomes empty. Default VNI value used only for service IDs where the VNI is not restricted. .TP +\f[I]FI_CXI_RNR_MAX_TIMEOUT_US\f[R] +When using the endpoint FI_PROTO_CXI_RNR protocol, this setting is used +to control the maximum time from the original posting of the message +that the message should be retried. +A value of 0 will return an error completion on the first RNR ack +status. +.TP \f[I]FI_CXI_EQ_ACK_BATCH_SIZE\f[R] Number of EQ events to process before writing an acknowledgement to HW. Batching ACKs amortizes the cost of event acknowledgement over multiple diff --git a/man/man7/fi_setup.7 b/man/man7/fi_setup.7 index faa80c4916e..1644147e363 100644 --- a/man/man7/fi_setup.7 +++ b/man/man7/fi_setup.7 @@ -1,6 +1,6 @@ .\" Automatically generated by Pandoc 2.9.2.1 .\" -.TH "fi_setup" "7" "2023\-01\-02" "Libfabric Programmer\[cq]s Manual" "#VERSION#" +.TH "fi_setup" "7" "2024\-03\-21" "Libfabric Programmer\[cq]s Manual" "#VERSION#" .hy .SH NAME .PP @@ -1118,8 +1118,12 @@ struct fi_cq_err_entry { /* Sample error handling */ struct fi_cq_msg_entry entry; struct fi_cq_err_entry err_entry; +char err_data[256]; int ret; +err_entry.err_data = err_data; +err_entry.err_data_size = 256; + ret = fi_cq_read(cq, &entry, 1); if (ret == -FI_EAVAIL) ret = fi_cq_readerr(cq, &err_entry, 0);