diff --git a/config/neoversen1/bli_cntx_init_neoversen1.c b/config/neoversen1/bli_cntx_init_neoversen1.c new file mode 100644 index 0000000000..a32a849e97 --- /dev/null +++ b/config/neoversen1/bli_cntx_init_neoversen1.c @@ -0,0 +1,77 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +void bli_cntx_init_neoversen1( cntx_t* cntx ) +{ + blksz_t blkszs[ BLIS_NUM_BLKSZS ]; + + // Set default kernel blocksizes and functions. + bli_cntx_init_neoversen1_ref( cntx ); + + // ------------------------------------------------------------------------- + + // Update the context with optimized native gemm micro-kernels and + // their storage preferences. + bli_cntx_set_l3_nat_ukrs + ( + 2, + BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv8a_asm_8x12, FALSE, + BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv8a_asm_6x8, FALSE, + cntx + ); + + // Initialize level-3 blocksize objects with architecture-specific values. + // s d c z + bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 6, -1, -1 ); + bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 8, -1, -1 ); + bli_blksz_init_easy( &blkszs[ BLIS_MC ], 120, 120, -1, -1 ); + bli_blksz_init_easy( &blkszs[ BLIS_KC ], 640, 240, -1, -1 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 3072, -1, -1 ); + + // Update the context with the current architecture's register and cache + // blocksizes (and multiples) for native execution. + bli_cntx_set_blkszs + ( + BLIS_NAT, 5, + BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, + BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, + BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, + BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, + BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, + cntx + ); +} + diff --git a/config/neoversen1/bli_family_neoversen1.h b/config/neoversen1/bli_family_neoversen1.h new file mode 100644 index 0000000000..8e8d954092 --- /dev/null +++ b/config/neoversen1/bli_family_neoversen1.h @@ -0,0 +1,42 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_FAMILY_H +//#define BLIS_FAMILY_H + + +// -- MEMORY ALLOCATION -------------------------------------------------------- + +#define BLIS_SIMD_ALIGN_SIZE 16 + diff --git a/config/neoversen1/make_defs.mk b/config/neoversen1/make_defs.mk new file mode 100644 index 0000000000..21f801cd06 --- /dev/null +++ b/config/neoversen1/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := neoversen1 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -mcpu=neoverse-n1 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=neoverse-n1 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mcpu=neoverse-n1 +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/config_registry b/config_registry index 93cff15233..2ea858226d 100644 --- a/config_registry +++ b/config_registry @@ -12,7 +12,7 @@ x86_64: intel64 amd64 amd64_legacy intel64: skx knl haswell sandybridge penryn generic amd64_legacy: excavator steamroller piledriver bulldozer generic amd64: zen3 zen2 zen generic -arm64: armsve firestorm thunderx2 cortexa57 cortexa53 generic +arm64: neoversen1 armsve firestorm thunderx2 cortexa57 cortexa53 generic arm32: cortexa15 cortexa9 generic # Intel architectures. @@ -32,6 +32,7 @@ piledriver: piledriver bulldozer: bulldozer # ARM architectures. +neoversen1: neoversen1/armv8a armsve: armsve/armsve a64fx: a64fx/armsve firestorm: firestorm/armv8a diff --git a/frame/base/bli_arch.c b/frame/base/bli_arch.c index 54aa64d42c..b87ea28ee9 100644 --- a/frame/base/bli_arch.c +++ b/frame/base/bli_arch.c @@ -194,6 +194,9 @@ void bli_arch_set_id( void ) #endif // ARM microarchitectures. + #ifdef BLIS_FAMILY_NEOVERSEN1 + id = BLIS_ARCH_NEOVERSEN1; + #endif #ifdef BLIS_FAMILY_ARMSVE id = BLIS_ARCH_ARMSVE; #endif @@ -270,6 +273,7 @@ static char* config_name[ BLIS_NUM_ARCHS ] = "piledriver", "bulldozer", + "neoversen1", "armsve", "a64fx", "firestorm", diff --git a/frame/base/bli_cpuid.c b/frame/base/bli_cpuid.c index ff0f386e65..8e65adc80f 100644 --- a/frame/base/bli_cpuid.c +++ b/frame/base/bli_cpuid.c @@ -1127,9 +1127,9 @@ static uint32_t get_coretype case 0xd03: // Cortex A53 return BLIS_ARCH_CORTEXA53; #endif -#ifdef BLIS_CONFIG_THUNDERX2 +#ifdef BLIS_CONFIG_NEOVERSEN1 case 0xd0c: // Neoverse N1 (and Graviton G2?) - return BLIS_ARCH_THUNDERX2; //placeholder for N1 + return BLIS_ARCH_NEOVERSEN1; //placeholder for N1 #endif } break; diff --git a/frame/base/bli_cpuid.h b/frame/base/bli_cpuid.h index 3fea78e5a3..e0434628d2 100644 --- a/frame/base/bli_cpuid.h +++ b/frame/base/bli_cpuid.h @@ -70,6 +70,7 @@ bool bli_cpuid_is_piledriver( uint32_t family, uint32_t model, uint32_t features bool bli_cpuid_is_bulldozer( uint32_t family, uint32_t model, uint32_t features ); // ARM +bool bli_cpuid_is_neoversen1( uint32_t model, uint32_t part, uint32_t features ); bool bli_cpuid_is_thunderx2( uint32_t model, uint32_t part, uint32_t features ); bool bli_cpuid_is_cortexa57( uint32_t model, uint32_t part, uint32_t features ); bool bli_cpuid_is_cortexa53( uint32_t model, uint32_t part, uint32_t features ); diff --git a/frame/base/bli_gks.c b/frame/base/bli_gks.c index cc17b33ffb..84e931d594 100644 --- a/frame/base/bli_gks.c +++ b/frame/base/bli_gks.c @@ -134,6 +134,11 @@ void bli_gks_init( void ) #endif // ARM architectures +#ifdef BLIS_CONFIG_NEOVERSEN1 + bli_gks_register_cntx( BLIS_ARCH_NEOVERSEN1, bli_cntx_init_neoversen1, + bli_cntx_init_neoversen1_ref, + bli_cntx_init_neoversen1_ind ); +#endif #ifdef BLIS_CONFIG_A64FX bli_gks_register_cntx( BLIS_ARCH_A64FX, bli_cntx_init_a64fx, bli_cntx_init_a64fx_ref, diff --git a/frame/include/bli_arch_config.h b/frame/include/bli_arch_config.h index f804d30036..c4496a5f05 100644 --- a/frame/include/bli_arch_config.h +++ b/frame/include/bli_arch_config.h @@ -88,6 +88,9 @@ CNTX_INIT_PROTS( bulldozer ) // -- ARM architectures -- +#ifdef BLIS_CONFIG_NEOVERSEN1 +CNTX_INIT_PROTS( neoversen1 ) +#endif #ifdef BLIS_CONFIG_ARMSVE CNTX_INIT_PROTS( armsve ) #endif @@ -212,6 +215,9 @@ CNTX_INIT_PROTS( generic ) // -- ARM architectures -- +#ifdef BLIS_FAMILY_NEOVERSEN1 +#include "bli_family_neoversen1.h" +#endif #ifdef BLIS_FAMILY_ARMSVE #include "bli_family_armsve.h" #endif diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index c66505bde8..9bf180842b 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -948,6 +948,7 @@ typedef enum BLIS_ARCH_BULLDOZER, // ARM + BLIS_ARCH_NEOVERSEN1, BLIS_ARCH_ARMSVE, BLIS_ARCH_A64FX, BLIS_ARCH_FIRESTORM,