Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

C++17 parallelism #273

Open
wants to merge 182 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
182 commits
Select commit Hold shift + click to select a range
2543ff8
starting over with StdPar because git submodules are trash
jeffhammond Jul 7, 2022
a435877
starting over with StdPar because git submodules are trash
jeffhammond Jul 7, 2022
9cf1e4b
starting over with StdPar because git submodules are trash
jeffhammond Jul 7, 2022
6bcc445
fix MEM***
jeffhammond Jul 8, 2022
bb16c99
fix MEM*** more
jeffhammond Jul 8, 2022
7f784d1
add exec and alg to stdpar helper header
jeffhammond Jul 8, 2022
41d9cd2
more stdpar
jeffhammond Jul 8, 2022
710a084
add SCAN and NODAL
jeffhammond Jul 8, 2022
87e0111
add more stuff
jeffhammond Jul 8, 2022
4a674d3
index list
jeffhammond Jul 8, 2022
48152bc
reduce sum
jeffhammond Jul 8, 2022
f0ad8c2
cout fix
jeffhammond Jul 8, 2022
d55a2d5
cout fix
jeffhammond Jul 8, 2022
64a9768
cleanup
jeffhammond Jul 8, 2022
1cda0fd
README
jeffhammond Jul 8, 2022
a1470e1
fix unroll pragma
jeffhammond Jul 8, 2022
263821a
GCC
jeffhammond Jul 8, 2022
bf344cf
remove RAJA_StdPar
jeffhammond Jul 8, 2022
8ce9c41
fix the issue with running
jeffhammond Jul 8, 2022
27f8007
NVC note
jeffhammond Jul 8, 2022
e4f1604
nuke StdPar SORTPAIRS because it needs work
jeffhammond Jul 8, 2022
2e3bb2d
notes on NVC++ multicore issues
jeffhammond Jul 8, 2022
a85a5c6
NVC++ GPU fails here
jeffhammond Jul 8, 2022
fe8f916
more errata
jeffhammond Jul 8, 2022
f56c87a
all the erratum
jeffhammond Jul 8, 2022
ccbd4f0
pointer to atomic for GPU
jeffhammond Jul 8, 2022
51cbb35
update PI_ATOMIC
jeffhammond Jul 8, 2022
790d74e
fix this one - nested way faster on GPU
jeffhammond Jul 8, 2022
2cc38e7
fix this one - nested way faster on GPU
jeffhammond Jul 8, 2022
1a77c06
remove RAJA_StdPar
jeffhammond Jul 8, 2022
c986bf9
disable Lambda_StdPar; use par not par_unseq
jeffhammond Jul 12, 2022
563c969
enable par_unseq even though it is slower
jeffhammond Jul 12, 2022
ea4bccb
disable Lambda_StdPar
jeffhammond Jul 12, 2022
2d54a4b
move collapse choice here
jeffhammond Jul 12, 2022
d9a94c2
partially implement SORTPAIRS with StdPar (GPU issues)
jeffhammond Jul 12, 2022
6f1fbbd
partially implement SORTPAIRS with StdPar (GPU issues)
jeffhammond Jul 12, 2022
3f39c8b
bring USE_STDPAR_COLLAPSE into common header
jeffhammond Jul 12, 2022
6548d9c
add StdPar impl using std::reduce
jeffhammond Jul 12, 2022
33bb44e
add comment why GPU disabled
jeffhammond Jul 12, 2022
0cd37a3
add SCAN StdPar but wrong on GPU???
jeffhammond Jul 12, 2022
4f8e7ab
start working on INDEXLIST StdPar
jeffhammond Jul 12, 2022
c7c292b
implement DAXPY_ATOMIC StdPar using a variety of atomics, because C++…
jeffhammond Jul 12, 2022
d4f47a5
use std:: not RAJA min/max
jeffhammond Jul 12, 2022
16c2099
implement REDUCE_STRUCT Base_StdPar
jeffhammond Jul 12, 2022
7ece3bf
remove warning
jeffhammond Jul 12, 2022
7a58180
FIRST_MIN Lambda_StdPar unimplemented
jeffhammond Jul 12, 2022
5fa7743
s/RAJA_MAX/std::max/g
jeffhammond Jul 12, 2022
7ea72bc
CPU StdPar
jeffhammond Jul 12, 2022
c2147ee
Merge branch 'new-stdpar' of https://github.com/jeffhammond/RAJAPerf …
jeffhammond Jul 12, 2022
013c0ad
disable atomic_ref
jeffhammond Jul 12, 2022
e99f8c3
CPU info
jeffhammond Jul 12, 2022
88baaeb
Merge remote-tracking branch 'origin/new-stdpar' into new-stdpar
jeffhammond Jul 12, 2022
7213acc
fix no GPU StdPar in SCAN
jeffhammond Jul 12, 2022
e1791c7
Lambda_StdPar HEAT_3D added
jeffhammond Jul 12, 2022
ffb45e3
Merge branch 'LLNL:develop' into new-stdpar
jeffhammond Aug 2, 2022
d06f0ce
starting over with StdPar because git submodules are trash
jeffhammond Jul 7, 2022
4b6d7c9
starting over with StdPar because git submodules are trash
jeffhammond Jul 7, 2022
baae1b2
starting over with StdPar because git submodules are trash
jeffhammond Jul 7, 2022
0044ac9
fix MEM***
jeffhammond Jul 8, 2022
4a57467
add exec and alg to stdpar helper header
jeffhammond Jul 8, 2022
e0349b4
more stdpar
jeffhammond Jul 8, 2022
cbe5356
add SCAN and NODAL
jeffhammond Jul 8, 2022
43b9f8f
add more stuff
jeffhammond Jul 8, 2022
d7c7a68
index list
jeffhammond Jul 8, 2022
854871b
reduce sum
jeffhammond Jul 8, 2022
ab62562
cout fix
jeffhammond Jul 8, 2022
1f10cdc
cout fix
jeffhammond Jul 8, 2022
c3d1dbe
cleanup
jeffhammond Jul 8, 2022
4c3519e
README
jeffhammond Jul 8, 2022
8c34177
fix unroll pragma
jeffhammond Jul 8, 2022
a6d6bc4
GCC
jeffhammond Jul 8, 2022
c9a6ec5
remove RAJA_StdPar
jeffhammond Jul 8, 2022
1e3f624
fix the issue with running
jeffhammond Jul 8, 2022
21885f7
NVC note
jeffhammond Jul 8, 2022
6a26a5f
nuke StdPar SORTPAIRS because it needs work
jeffhammond Jul 8, 2022
de6a3d2
notes on NVC++ multicore issues
jeffhammond Jul 8, 2022
f07036b
NVC++ GPU fails here
jeffhammond Jul 8, 2022
00a107a
more errata
jeffhammond Jul 8, 2022
92f8d12
all the erratum
jeffhammond Jul 8, 2022
645bb4a
pointer to atomic for GPU
jeffhammond Jul 8, 2022
dda1d64
update PI_ATOMIC
jeffhammond Jul 8, 2022
8bbcf1a
fix this one - nested way faster on GPU
jeffhammond Jul 8, 2022
88547f2
fix this one - nested way faster on GPU
jeffhammond Jul 8, 2022
65963b6
remove RAJA_StdPar
jeffhammond Jul 8, 2022
bdf03ba
disable Lambda_StdPar; use par not par_unseq
jeffhammond Jul 12, 2022
eeb41c8
enable par_unseq even though it is slower
jeffhammond Jul 12, 2022
c9e981b
disable Lambda_StdPar
jeffhammond Jul 12, 2022
9a07e35
move collapse choice here
jeffhammond Jul 12, 2022
de7ab8b
partially implement SORTPAIRS with StdPar (GPU issues)
jeffhammond Jul 12, 2022
e4e06b0
partially implement SORTPAIRS with StdPar (GPU issues)
jeffhammond Jul 12, 2022
e2fb576
bring USE_STDPAR_COLLAPSE into common header
jeffhammond Jul 12, 2022
6bf1600
add StdPar impl using std::reduce
jeffhammond Jul 12, 2022
cc5bc50
add comment why GPU disabled
jeffhammond Jul 12, 2022
ac4a8f1
add SCAN StdPar but wrong on GPU???
jeffhammond Jul 12, 2022
252c98c
start working on INDEXLIST StdPar
jeffhammond Jul 12, 2022
dd4a5e0
implement DAXPY_ATOMIC StdPar using a variety of atomics, because C++…
jeffhammond Jul 12, 2022
2e4a4ef
use std:: not RAJA min/max
jeffhammond Jul 12, 2022
34b8324
implement REDUCE_STRUCT Base_StdPar
jeffhammond Jul 12, 2022
de072c1
remove warning
jeffhammond Jul 12, 2022
7b05b04
FIRST_MIN Lambda_StdPar unimplemented
jeffhammond Jul 12, 2022
97e5348
s/RAJA_MAX/std::max/g
jeffhammond Jul 12, 2022
c98264d
CPU StdPar
jeffhammond Jul 12, 2022
15ed8ac
disable atomic_ref
jeffhammond Jul 12, 2022
0d75450
CPU info
jeffhammond Jul 12, 2022
e25ec99
fix no GPU StdPar in SCAN
jeffhammond Jul 12, 2022
f842555
Lambda_StdPar HEAT_3D added
jeffhammond Jul 12, 2022
185d2c4
Merge branch 'new-stdpar' of https://github.com/jeffhammond/RAJAPerf …
jeffhammond Aug 31, 2022
7a96a08
fix correctness issues caused by emplace_back in parallel
jeffhammond Aug 31, 2022
a38b190
deal with exception mess
jeffhammond Aug 31, 2022
7b4039c
fix emplace_back and deal with exceptions
jeffhammond Aug 31, 2022
8f7e804
remove listed issues that have been fixed
jeffhammond Aug 31, 2022
d7716e5
no clue what to do with these
jeffhammond Aug 31, 2022
ea0feff
code compiles but is wrong, like Base
jeffhammond Aug 31, 2022
aea0bca
code compiles but is wrong, like Base
jeffhammond Aug 31, 2022
69ce57c
change output
jeffhammond Oct 11, 2022
e4248d6
addd comment
jeffhammond Oct 11, 2022
db398c2
collapse in StdPar is really important for GPU
jeffhammond Oct 11, 2022
274ad60
collapse in StdPar is really important for GPU
jeffhammond Oct 11, 2022
076abfb
fixed lambda collapse - need to do the base version and cleanup
jeffhammond Oct 11, 2022
a884cd1
fix atomic_ref
jeffhammond Oct 12, 2022
1456962
move openacc.h header to stdpar common header
jeffhammond Oct 12, 2022
00bd052
use for_each everywhere - correct with intel and nvhpc it seems
jeffhammond Oct 12, 2022
cfa64d9
debug FDTD_2D with GCC
jeffhammond Oct 12, 2022
2aeb29b
debug the GCC problem more
jeffhammond Oct 12, 2022
eef71d5
add a note about a bad idea
jeffhammond Oct 12, 2022
788e2b4
ugh atomics are such a pain right now
jeffhammond Oct 12, 2022
1f10a86
change atomics again
jeffhammond Oct 12, 2022
3c93637
solve lambda capture t issue a diff way
jeffhammond Oct 12, 2022
f64306d
switch to for_each_n in algorithm
jeffhammond Oct 12, 2022
50a661d
switch to for_each_n in stream
jeffhammond Oct 12, 2022
3c2368b
switch to for_each_n in basic
jeffhammond Oct 12, 2022
0b67f20
for_each_n
jeffhammond Oct 12, 2022
c18465d
for_each_n
jeffhammond Oct 12, 2022
1aa331f
fix collapsed case
jeffhammond Oct 12, 2022
27e59b3
for_each_n
jeffhammond Oct 12, 2022
456e722
for_each_n
jeffhammond Oct 13, 2022
a31da74
for_each_n
jeffhammond Oct 13, 2022
7fbae76
add failed experiment w xform red
jeffhammond Oct 13, 2022
6853072
for_each_n
jeffhammond Oct 13, 2022
4a82e0e
for_each_n
jeffhammond Oct 13, 2022
8207cb1
for_each_n
jeffhammond Oct 13, 2022
02d5466
for_each_n
jeffhammond Oct 13, 2022
e0a0331
for_each_n
jeffhammond Oct 13, 2022
7af9e3d
for_each_n
jeffhammond Oct 13, 2022
f70e3c9
for_each_n
jeffhammond Oct 13, 2022
45fc93b
fixed unused warnings
jeffhammond Oct 13, 2022
8074f41
disable this, since it requires SLM
jeffhammond Oct 13, 2022
eb3ecee
remove lambda version - will not bother with this
jeffhammond Oct 13, 2022
c1b4034
remove since not implementing this
jeffhammond Oct 13, 2022
9893a3a
remove since not implementing this
jeffhammond Oct 13, 2022
c59a972
need empty impl after all
jeffhammond Oct 13, 2022
83ccc76
need empty impl after all
jeffhammond Oct 13, 2022
1f85e58
enable INDEXLIST_3LOOP StdPar
jeffhammond Oct 13, 2022
1499974
fix for_each_n
jeffhammond Oct 13, 2022
6a91b3d
INDEXLIST_3LOOP validated manually; Intel busted
jeffhammond Oct 13, 2022
b406723
GCC broke too
jeffhammond Oct 13, 2022
e51cd8c
suppress misspelled
jeffhammond Oct 26, 2022
bc5976b
cleanup workaround for __throw_bad_array_new_length
jeffhammond Oct 26, 2022
72c1065
merge develop
jeffhammond Oct 27, 2022
85f65ff
rewrite to use transform not for_each_n
jeffhammond Oct 31, 2022
00e8690
rewrite to use transform not for_each_n
jeffhammond Oct 31, 2022
9d8d3e2
explicit seq because cannot find the implicit one for who knows why
jeffhammond Nov 3, 2022
eb81d25
Merge branch 'new-stdpar' of https://github.com/jeffhammond/RAJAPerf …
jeffhammond Nov 3, 2022
00a5498
use transform instead
jeffhammond Nov 3, 2022
25ca55d
use transform instead
jeffhammond Nov 3, 2022
c9f0295
dunno
jeffhammond Mar 28, 2023
f54afcd
fix all merge problems
jeffhammond Mar 28, 2023
c86144c
fix bad merge
jeffhammond Mar 28, 2023
834b519
add markdown
jeffhammond Mar 28, 2023
74c9fda
this was WIP - restore serial fallback
jeffhammond Mar 28, 2023
16b9f09
give up on std::atomic<double> for now
jeffhammond Mar 28, 2023
323d780
Merge branch 'develop' into new-stdpar
jeffhammond Mar 31, 2023
043bb5a
add RAJA_ENABLE_STDPAR guard
jeffhammond Mar 31, 2023
4c40ab0
add RAJA_ENABLE_STDPAR guard
jeffhammond Mar 31, 2023
75a6eb6
add RAJA_ENABLE_STDPAR guard
jeffhammond Mar 31, 2023
6770f11
add RAJA_ENABLE_STDPAR guard
jeffhammond Mar 31, 2023
8402b34
add RAJA_ENABLE_STDPAR guard
jeffhammond Mar 31, 2023
d4db911
N namespace oops
jeffhammond Mar 31, 2023
75b16b9
add RAJA_ENABLE_STDPAR guard
jeffhammond Mar 31, 2023
23d04fe
end namespace oops
jeffhammond Mar 31, 2023
bbe72dd
change guard name to BUILD_STDPAR
jeffhammond Mar 31, 2023
38bf9bd
Update ADD-StdPar.cpp
jeffhammond Mar 31, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ endif()
if (ENABLE_KOKKOS)
set(CMAKE_CXX_STANDARD 17)
set(BLT_CXX_STD c++17)
elseif (ENABLE_STDPAR)
set(CMAKE_CXX_STANDARD 20)
set(BLT_CXX_STD c++14)
add_definitions(-DBUILD_STDPAR)
else()
set(CMAKE_CXX_STANDARD 14)
set(BLT_CXX_STD c++14)
Expand Down Expand Up @@ -94,6 +98,9 @@ endif ()
if (ENABLE_OPENMP)
add_definitions(-DRUN_OPENMP)
endif ()
if (ENABLE_STDPAR)
add_definitions(-DRUN_STDPAR)
endif ()

set(RAJA_PERFSUITE_VERSION_MAJOR 2022)
set(RAJA_PERFSUITE_VERSION_MINOR 10)
Expand Down
106 changes: 106 additions & 0 deletions README.stdpar
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# GCC

```
cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-volatile -Wno-unused-parameter" -DENABLE_STDPAR=1 && make -j`nproc`
```

# NVC++

## Patches

```
$ diff /opt/nvidia/hpc_sdk/Linux_$(uname -m)/${V}/compilers/include/nvhpc/algorithm_execution.hpp
1066c1066
< _ASSERT_RANDOM_ACCESS(_FIt);
---
> //_ASSERT_RANDOM_ACCESS(_FIt);
```

```
$ diff /opt/nvidia/hpc_sdk/Linux_$(uname -m)/${V}/compilers/include/nvhpc/numeric_execution.hpp
386c386
< _ASSERT_RANDOM_ACCESS(_FIt);
---
> //_ASSERT_RANDOM_ACCESS(_FIt);
```

## OpenMP/OpenACC for atomics

```
cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_suppress=volatile_inc_dec_deprecated -stdpar=multicore -acc=multicore -mp=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8
```

```
cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_suppress=volatile_inc_dec_deprecated -stdpar=gpu -tp=haswell -acc" -DENABLE_STDPAR=1 && make -j8
```

## CPU

Just disable the lambda one I guess...

-------------------------------------------------------
Basic_MAT_MAT_SHARED
........................................................
Base_StdPar-default 1136.6199452543779141 0.0000000000000000000
Lambda_StdPar-default -nan -nan

Probably just not atomic...

-------------------------------------------------------
Basic_PI_ATOMIC
........................................................
Base_StdPar-default 0.55899274342205662602 2.5825999101679185666
Lambda_StdPar-default 3.1415926535899751926 0.0000000000000000000

Check these to make sure no stupid float<->double stuff happening.

-------------------------------------------------------
Polybench_GEMVER
........................................................
Base_Seq-default 16695345.016927006001 0.0000000000000000000
Lambda_Seq-default 16695345.016927005882 1.1914380593225359917e-10
RAJA_Seq-default 16695345.016927006608 -6.0663296608254313469e-10
Base_StdPar-default 16695345.016927005745 2.5647750589996576309e-10
Lambda_StdPar-default 16695345.016927006608 -6.0663296608254313469e-10

-------------------------------------------------------
Polybench_MVT
........................................................
Base_Seq-default 6821556.1519041797419 0.0000000000000000000
Lambda_Seq-default 6821556.1519041797419 0.0000000000000000000
RAJA_Seq-default 6821556.1519041792999 4.4201442506164312363e-10
Base_StdPar-default 6821556.1519041792999 4.4201442506164312363e-10
Lambda_StdPar-default 6821556.1519041792999 4.4201442506164312363e-10

-------------------------------------------------------
Stream_DOT
........................................................
Base_Seq-default 39999973.379841431975 0.0000000000000000000
Lambda_Seq-default 39999973.379841439426 -7.4505805969238281250e-09
RAJA_Seq-default 39999973.379841662943 -2.3096799850463867188e-07
Base_StdPar-default 39999973.379841439426 -7.4505805969238281250e-09
Lambda_StdPar-default 39999973.379841439426 -7.4505805969238281250e-09

-------------------------------------------------------
Algorithm_REDUCE_SUM
........................................................
RAJA_Seq-default 268294.10758353886195 1.5483237802982330322e-08

## GPU

Lambda_Seq has the bug too so just disable the Lambda versions...

-------------------------------------------------------
Basic_MAT_MAT_SHARED
........................................................
Base_Seq-default 1136.6199452543779141 0.0000000000000000000
Lambda_Seq-default -6.0464819976872759102e+32 6.0464819976872759102e+32
RAJA_Seq-default 1136.6199452543779141 0.0000000000000000000
Base_StdPar-default 1136.6199452543779141 0.0000000000000000000
Lambda_StdPar-default -6.0464819976872759102e+32 6.0464819976872759102e+32

# Intel

```
cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8
```
6 changes: 6 additions & 0 deletions src/algorithm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,34 +10,40 @@ blt_add_library(
NAME algorithm
SOURCES SCAN.cpp
SCAN-Seq.cpp
SCAN-StdPar.cpp
SCAN-Hip.cpp
SCAN-Cuda.cpp
SCAN-OMP.cpp
SCAN-OMPTarget.cpp
SORT.cpp
SORT-Seq.cpp
SORT-StdPar.cpp
SORT-Hip.cpp
SORT-Cuda.cpp
SORT-OMP.cpp
SORTPAIRS.cpp
SORTPAIRS-Seq.cpp
SORTPAIRS-StdPar.cpp
SORTPAIRS-Hip.cpp
SORTPAIRS-Cuda.cpp
SORTPAIRS-OMP.cpp
REDUCE_SUM.cpp
REDUCE_SUM-Seq.cpp
REDUCE_SUM-StdPar.cpp
REDUCE_SUM-Hip.cpp
REDUCE_SUM-Cuda.cpp
REDUCE_SUM-OMP.cpp
REDUCE_SUM-OMPTarget.cpp
MEMSET.cpp
MEMSET-Seq.cpp
MEMSET-StdPar.cpp
MEMSET-Hip.cpp
MEMSET-Cuda.cpp
MEMSET-OMP.cpp
MEMSET-OMPTarget.cpp
MEMCPY.cpp
MEMCPY-Seq.cpp
MEMCPY-StdPar.cpp
MEMCPY-Hip.cpp
MEMCPY-Cuda.cpp
MEMCPY-OMP.cpp
Expand Down
154 changes: 154 additions & 0 deletions src/algorithm/MEMCPY-StdPar.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
// and RAJA Performance Suite project contributors.
// See the RAJAPerf/LICENSE file for details.
//
// SPDX-License-Identifier: (BSD-3-Clause)
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//

#include "MEMCPY.hpp"

#include "RAJA/RAJA.hpp"

#if defined(BUILD_STDPAR)

#include "common/StdParUtils.hpp"

#include <iostream>

namespace rajaperf
{
namespace algorithm
{


void MEMCPY::runStdParVariantLibrary(VariantID vid)
{
#if defined(RUN_STDPAR)
const Index_type run_reps = getRunReps();
const Index_type ibegin = 0;
const Index_type iend = getActualProblemSize();

MEMCPY_DATA_SETUP;

switch ( vid ) {

case Base_StdPar : {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

std::copy_n(std::execution::par_unseq,
x+ibegin, iend-ibegin, y+ibegin);

}
stopTimer();

break;
}

default : {
getCout() << "\n MEMCPY : Unknown variant id = " << vid << std::endl;
}

}
#endif
}

void MEMCPY::runStdParVariantDefault(VariantID vid)
{
#if defined(RUN_STDPAR)
const Index_type run_reps = getRunReps();
const Index_type ibegin = 0;
const Index_type iend = getActualProblemSize();

MEMCPY_DATA_SETUP;

switch ( vid ) {

case Base_StdPar : {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

std::for_each_n( std::execution::par_unseq,
counting_iterator<Index_type>(ibegin), iend-ibegin,
[=](Index_type i) {
MEMCPY_BODY;
});

}
stopTimer();

break;
}

case Lambda_StdPar : {

auto memcpy_lambda = [=](Index_type i) {
MEMCPY_BODY;
};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

std::for_each_n( std::execution::par_unseq,
counting_iterator<Index_type>(ibegin), iend-ibegin,
[=](Index_type i) {
memcpy_lambda(i);
});

}
stopTimer();

break;
}

default : {
getCout() << "\n MEMCPY : Unknown variant id = " << vid << std::endl;
}

}

#endif
}

void MEMCPY::runStdParVariant(VariantID vid, size_t tune_idx)
{
size_t t = 0;

if (vid == Base_StdPar) {

if (tune_idx == t) {

runStdParVariantLibrary(vid);

}

t += 1;

}

if (tune_idx == t) {

runStdParVariantDefault(vid);

}

t += 1;
}

void MEMCPY::setStdParTuningDefinitions(VariantID vid)
{
if (vid == Base_StdPar) {
addVariantTuningName(vid, "library");
}

addVariantTuningName(vid, "default");
}

} // end namespace algorithm
} // end namespace rajaperf

#endif // BUILD_STDPAR

3 changes: 3 additions & 0 deletions src/algorithm/MEMCPY.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ MEMCPY::MEMCPY(const RunParams& params)
setVariantDefined( Base_HIP );
setVariantDefined( Lambda_HIP );
setVariantDefined( RAJA_HIP );

setVariantDefined( Base_StdPar );
setVariantDefined( Lambda_StdPar );
}

MEMCPY::~MEMCPY()
Expand Down
4 changes: 4 additions & 0 deletions src/algorithm/MEMCPY.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,16 @@ class MEMCPY : public KernelBase
void runCudaVariant(VariantID vid, size_t tune_idx);
void runHipVariant(VariantID vid, size_t tune_idx);
void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
void runStdParVariant(VariantID vid, size_t tune_idx);

void setSeqTuningDefinitions(VariantID vid);
void setStdParTuningDefinitions(VariantID vid);
void setCudaTuningDefinitions(VariantID vid);
void setHipTuningDefinitions(VariantID vid);
void runSeqVariantDefault(VariantID vid);
void runSeqVariantLibrary(VariantID vid);
void runStdParVariantDefault(VariantID vid);
void runStdParVariantLibrary(VariantID vid);

template < size_t block_size >
void runCudaVariantBlock(VariantID vid);
Expand Down
Loading