25#if !defined(__HIPCC_RTC__)
26#include "amd_device_functions.h"
29#if __has_builtin(__hip_atomic_compare_exchange_strong)
31template<
bool B,
typename T,
typename F>
struct Cond_t;
33template<
typename T,
typename F>
struct Cond_t<true, T, F> {
using type = T; };
34template<
typename T,
typename F>
struct Cond_t<false, T, F> {
using type = F; };
36#if !__HIP_DEVICE_COMPILE__
38#define __HIP_MEMORY_SCOPE_SINGLETHREAD 1
39#define __HIP_MEMORY_SCOPE_WAVEFRONT 2
40#define __HIP_MEMORY_SCOPE_WORKGROUP 3
41#define __HIP_MEMORY_SCOPE_AGENT 4
42#define __HIP_MEMORY_SCOPE_SYSTEM 5
45#if !defined(__HIPCC_RTC__)
46#include "amd_hip_unsafe_atomics.h"
51 int mem_order = __ATOMIC_SEQ_CST,
52 int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
57__attribute__((always_inline, device))
58T hip_cas_expander(T* p, T x, Op op, F f)
noexcept
60 using FP = __attribute__((address_space(0))) const
void*;
63 extern
bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
65 if (is_shared_workaround((FP)p))
68 using U = typename Cond_t<
69 sizeof(T) == sizeof(
unsigned int),
unsigned int,
unsigned long long>::type;
71 auto q = reinterpret_cast<U*>(p);
73 U tmp0{__hip_atomic_load(q, mem_order, mem_scope)};
78 op(
reinterpret_cast<T&
>(tmp1), x);
79 }
while (!__hip_atomic_compare_exchange_strong(q, &tmp0, tmp1, mem_order,
80 mem_order, mem_scope));
82 return reinterpret_cast<const T&
>(tmp0);
86 int mem_order = __ATOMIC_SEQ_CST,
87 int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
92__attribute__((always_inline, device))
93T hip_cas_extrema_expander(T* p, T x, Cmp cmp, F f)
noexcept
95 using FP = __attribute__((address_space(0))) const
void*;
98 extern
bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
100 if (is_shared_workaround((FP)p))
103 using U = typename Cond_t<
104 sizeof(T) == sizeof(
unsigned int),
unsigned int,
unsigned long long>::type;
106 auto q = reinterpret_cast<U*>(p);
108 U tmp{__hip_atomic_load(q, mem_order, mem_scope)};
109 while (cmp(x,
reinterpret_cast<const T&
>(tmp)) &&
110 !__hip_atomic_compare_exchange_strong(q, &tmp, x, mem_order, mem_order,
113 return reinterpret_cast<const T&
>(tmp);
118int atomicCAS(
int* address,
int compare,
int val) {
119 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
120 __HIP_MEMORY_SCOPE_AGENT);
126int atomicCAS_system(
int* address,
int compare,
int val) {
127 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
128 __HIP_MEMORY_SCOPE_SYSTEM);
134unsigned int atomicCAS(
unsigned int* address,
unsigned int compare,
unsigned int val) {
135 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
136 __HIP_MEMORY_SCOPE_AGENT);
142unsigned int atomicCAS_system(
unsigned int* address,
unsigned int compare,
unsigned int val) {
143 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
144 __HIP_MEMORY_SCOPE_SYSTEM);
150unsigned long atomicCAS(
unsigned long* address,
unsigned long compare,
unsigned long val) {
151 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
152 __HIP_MEMORY_SCOPE_AGENT);
158unsigned long atomicCAS_system(
unsigned long* address,
unsigned long compare,
unsigned long val) {
159 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
160 __HIP_MEMORY_SCOPE_SYSTEM);
166unsigned long long atomicCAS(
unsigned long long* address,
unsigned long long compare,
167 unsigned long long val) {
168 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
169 __HIP_MEMORY_SCOPE_AGENT);
175unsigned long long atomicCAS_system(
unsigned long long* address,
unsigned long long compare,
176 unsigned long long val) {
177 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
178 __HIP_MEMORY_SCOPE_SYSTEM);
184float atomicCAS(
float* address,
float compare,
float val) {
185 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
186 __HIP_MEMORY_SCOPE_AGENT);
192float atomicCAS_system(
float* address,
float compare,
float val) {
193 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
194 __HIP_MEMORY_SCOPE_SYSTEM);
200double atomicCAS(
double* address,
double compare,
double val) {
201 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
202 __HIP_MEMORY_SCOPE_AGENT);
208double atomicCAS_system(
double* address,
double compare,
double val) {
209 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
210 __HIP_MEMORY_SCOPE_SYSTEM);
216int atomicAdd(
int* address,
int val) {
217 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
222int atomicAdd_system(
int* address,
int val) {
223 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
228unsigned int atomicAdd(
unsigned int* address,
unsigned int val) {
229 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
234unsigned int atomicAdd_system(
unsigned int* address,
unsigned int val) {
235 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
240unsigned long atomicAdd(
unsigned long* address,
unsigned long val) {
241 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
246unsigned long atomicAdd_system(
unsigned long* address,
unsigned long val) {
247 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
252unsigned long long atomicAdd(
unsigned long long* address,
unsigned long long val) {
253 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
258unsigned long long atomicAdd_system(
unsigned long long* address,
unsigned long long val) {
259 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
264float atomicAdd(
float* address,
float val) {
265#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
266 return unsafeAtomicAdd(address, val);
268 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
274float atomicAdd_system(
float* address,
float val) {
275 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
278#if !defined(__HIPCC_RTC__)
279DEPRECATED(
"use atomicAdd instead")
283void atomicAddNoRet(
float* address,
float val)
285 __ockl_atomic_add_noret_f32(address, val);
290double atomicAdd(
double* address,
double val) {
291#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
292 return unsafeAtomicAdd(address, val);
294 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
300double atomicAdd_system(
double* address,
double val) {
301 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
306int atomicSub(
int* address,
int val) {
307 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
312int atomicSub_system(
int* address,
int val) {
313 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
318unsigned int atomicSub(
unsigned int* address,
unsigned int val) {
319 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
324unsigned int atomicSub_system(
unsigned int* address,
unsigned int val) {
325 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
330unsigned long atomicSub(
unsigned long* address,
unsigned long val) {
331 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
336unsigned long atomicSub_system(
unsigned long* address,
unsigned long val) {
337 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
342unsigned long long atomicSub(
unsigned long long* address,
unsigned long long val) {
343 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
348unsigned long long atomicSub_system(
unsigned long long* address,
unsigned long long val) {
349 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
354float atomicSub(
float* address,
float val) {
355#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
356 return unsafeAtomicAdd(address, -val);
358 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
364float atomicSub_system(
float* address,
float val) {
365 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
370double atomicSub(
double* address,
double val) {
371#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
372 return unsafeAtomicAdd(address, -val);
374 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
380double atomicSub_system(
double* address,
double val) {
381 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
386int atomicExch(
int* address,
int val) {
387 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
392int atomicExch_system(
int* address,
int val) {
393 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
398unsigned int atomicExch(
unsigned int* address,
unsigned int val) {
399 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
404unsigned int atomicExch_system(
unsigned int* address,
unsigned int val) {
405 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
410unsigned long atomicExch(
unsigned long* address,
unsigned long val) {
411 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
416unsigned long atomicExch_system(
unsigned long* address,
unsigned long val) {
417 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
422unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val) {
423 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
428unsigned long long atomicExch_system(
unsigned long long* address,
unsigned long long val) {
429 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
434float atomicExch(
float* address,
float val) {
435 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
440float atomicExch_system(
float* address,
float val) {
441 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
446double atomicExch(
double* address,
double val) {
447 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
452double atomicExch_system(
double* address,
double val) {
453 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
458int atomicMin(
int* address,
int val) {
459#if defined(__gfx941__)
460 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
461 address, val, [](
int x,
int y) {
return x < y; }, [=]() {
462 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
463 __HIP_MEMORY_SCOPE_AGENT);
466 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
472int atomicMin_system(
int* address,
int val) {
473#if defined(__gfx941__)
474 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
475 address, val, [](
int x,
int y) {
return x < y; }, [=]() {
476 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
477 __HIP_MEMORY_SCOPE_SYSTEM);
480 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
486unsigned int atomicMin(
unsigned int* address,
unsigned int val) {
487#if defined(__gfx941__)
488 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
489 address, val, [](
unsigned int x,
unsigned int y) {
return x < y; }, [=]() {
490 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
491 __HIP_MEMORY_SCOPE_AGENT);
494 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
501unsigned int atomicMin_system(
unsigned int* address,
unsigned int val) {
502#if defined(__gfx941__)
503 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
504 address, val, [](
unsigned int x,
unsigned int y) {
return x < y; }, [=]() {
505 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
506 __HIP_MEMORY_SCOPE_SYSTEM);
509 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
515unsigned long long atomicMin(
unsigned long* address,
unsigned long val) {
516#if defined(__gfx941__)
517 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
520 [](
unsigned long x,
unsigned long y) {
return x < y; },
522 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
523 __HIP_MEMORY_SCOPE_AGENT);
526 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
532unsigned long atomicMin_system(
unsigned long* address,
unsigned long val) {
533#if defined(__gfx941__)
534 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
537 [](
unsigned long x,
unsigned long y) {
return x < y; },
539 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
540 __HIP_MEMORY_SCOPE_SYSTEM);
543 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
549unsigned long long atomicMin(
unsigned long long* address,
unsigned long long val) {
550#if defined(__gfx941__)
551 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
554 [](
unsigned long long x,
unsigned long long y) {
return x < y; },
556 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
557 __HIP_MEMORY_SCOPE_AGENT);
560 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
566unsigned long long atomicMin_system(
unsigned long long* address,
unsigned long long val) {
567#if defined(__gfx941__)
568 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
571 [](
unsigned long long x,
unsigned long long y) {
return x < y; },
573 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
574 __HIP_MEMORY_SCOPE_SYSTEM);
577 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
583long long atomicMin(
long long* address,
long long val) {
584#if defined(__gfx941__)
585 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
586 address, val, [](
long long x,
long long y) {
return x < y; },
588 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
591 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
597long long atomicMin_system(
long long* address,
long long val) {
598#if defined(__gfx941__)
599 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
600 address, val, [](
long long x,
long long y) {
return x < y; },
602 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
605 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
611float atomicMin(
float* addr,
float val) {
612#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
613 return unsafeAtomicMin(addr, val);
615 #if __has_builtin(__hip_atomic_load) && \
616 __has_builtin(__hip_atomic_compare_exchange_strong)
617 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
619 while (!done && value > val) {
620 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
621 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
625 unsigned int *uaddr = (
unsigned int *)addr;
626 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
628 while (!done && __uint_as_float(value) > val) {
629 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val),
false,
630 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
632 return __uint_as_float(value);
639float atomicMin_system(
float* address,
float val) {
640 unsigned int* uaddr {
reinterpret_cast<unsigned int*
>(address) };
641 #if __has_builtin(__hip_atomic_load)
642 unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
644 unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
646 float value = __uint_as_float(tmp);
648 while (val < value) {
649 value = atomicCAS_system(address, value, val);
657double atomicMin(
double* addr,
double val) {
658#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
659 return unsafeAtomicMin(addr, val);
661 #if __has_builtin(__hip_atomic_load) && \
662 __has_builtin(__hip_atomic_compare_exchange_strong)
663 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
665 while (!done && value > val) {
666 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
667 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
671 unsigned long long *uaddr = (
unsigned long long *)addr;
672 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
674 while (!done && __longlong_as_double(value) > val) {
675 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val),
false,
676 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
678 return __longlong_as_double(value);
685double atomicMin_system(
double* address,
double val) {
686 unsigned long long* uaddr {
reinterpret_cast<unsigned long long*
>(address) };
687 #if __has_builtin(__hip_atomic_load)
688 unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
690 unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
692 double value = __longlong_as_double(tmp);
694 while (val < value) {
695 value = atomicCAS_system(address, value, val);
703int atomicMax(
int* address,
int val) {
704#if defined(__gfx941__)
705 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
706 address, val, [](
int x,
int y) {
return y < x; }, [=]() {
707 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
708 __HIP_MEMORY_SCOPE_AGENT);
711 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
717int atomicMax_system(
int* address,
int val) {
718#if defined(__gfx941__)
719 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
720 address, val, [](
int x,
int y) {
return y < x; }, [=]() {
721 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
722 __HIP_MEMORY_SCOPE_SYSTEM);
725 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
731unsigned int atomicMax(
unsigned int* address,
unsigned int val) {
732#if defined(__gfx941__)
733 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
734 address, val, [](
unsigned int x,
unsigned int y) {
return y < x; }, [=]() {
735 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
736 __HIP_MEMORY_SCOPE_AGENT);
739 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
745unsigned int atomicMax_system(
unsigned int* address,
unsigned int val) {
746#if defined(__gfx941__)
747 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
748 address, val, [](
unsigned int x,
unsigned int y) {
return y < x; }, [=]() {
749 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
750 __HIP_MEMORY_SCOPE_SYSTEM);
753 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
759unsigned long atomicMax(
unsigned long* address,
unsigned long val) {
760#if defined(__gfx941__)
761 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
764 [](
unsigned long x,
unsigned long y) {
return y < x; },
766 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
767 __HIP_MEMORY_SCOPE_AGENT);
770 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
776unsigned long atomicMax_system(
unsigned long* address,
unsigned long val) {
777#if defined(__gfx941__)
778 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
781 [](
unsigned long x,
unsigned long y) {
return y < x; },
783 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
784 __HIP_MEMORY_SCOPE_SYSTEM);
787 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
793unsigned long long atomicMax(
unsigned long long* address,
unsigned long long val) {
794#if defined(__gfx941__)
795 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
798 [](
unsigned long long x,
unsigned long long y) {
return y < x; },
800 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
801 __HIP_MEMORY_SCOPE_AGENT);
804 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
810unsigned long long atomicMax_system(
unsigned long long* address,
unsigned long long val) {
811#if defined(__gfx941__)
812 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
815 [](
unsigned long long x,
unsigned long long y) {
return y < x; },
817 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
818 __HIP_MEMORY_SCOPE_SYSTEM);
821 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
827long long atomicMax(
long long* address,
long long val) {
828 #if defined(__gfx941__)
829 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
830 address, val, [](
long long x,
long long y) {
return y < x; },
832 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
835 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
841long long atomicMax_system(
long long* address,
long long val) {
842#if defined(__gfx941__)
843 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
844 address, val, [](
long long x,
long long y) {
return y < x; },
846 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
849 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
855float atomicMax(
float* addr,
float val) {
856#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
857 return unsafeAtomicMax(addr, val);
859 #if __has_builtin(__hip_atomic_load) && \
860 __has_builtin(__hip_atomic_compare_exchange_strong)
861 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
863 while (!done && value < val) {
864 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
865 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
869 unsigned int *uaddr = (
unsigned int *)addr;
870 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
872 while (!done && __uint_as_float(value) < val) {
873 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val),
false,
874 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
876 return __uint_as_float(value);
883float atomicMax_system(
float* address,
float val) {
884 unsigned int* uaddr {
reinterpret_cast<unsigned int*
>(address) };
885 #if __has_builtin(__hip_atomic_load)
886 unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
888 unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
890 float value = __uint_as_float(tmp);
892 while (value < val) {
893 value = atomicCAS_system(address, value, val);
901double atomicMax(
double* addr,
double val) {
902#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
903 return unsafeAtomicMax(addr, val);
905 #if __has_builtin(__hip_atomic_load) && \
906 __has_builtin(__hip_atomic_compare_exchange_strong)
907 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
909 while (!done && value < val) {
910 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
911 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
915 unsigned long long *uaddr = (
unsigned long long *)addr;
916 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
918 while (!done && __longlong_as_double(value) < val) {
919 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val),
false,
920 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
922 return __longlong_as_double(value);
929double atomicMax_system(
double* address,
double val) {
930 unsigned long long* uaddr {
reinterpret_cast<unsigned long long*
>(address) };
931 #if __has_builtin(__hip_atomic_load)
932 unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
934 unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
936 double value = __longlong_as_double(tmp);
938 while (value < val) {
939 value = atomicCAS_system(address, value, val);
947unsigned int atomicInc(
unsigned int* address,
unsigned int val)
949#if defined(__gfx941__)
950 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
953 [](
unsigned int& x,
unsigned int y) { x = (x >= y) ? 0 : (x + 1); },
956 __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED,
"agent");
959 return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED,
"agent");
966unsigned int atomicDec(
unsigned int* address,
unsigned int val)
968#if defined(__gfx941__)
969 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
972 [](
unsigned int& x,
unsigned int y) { x = (!x || x > y) ? y : (x - 1); },
975 __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED,
"agent");
978 return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED,
"agent");
985int atomicAnd(
int* address,
int val) {
986#if defined(__gfx941__)
987 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
988 address, val, [](
int& x,
int y) { x &= y; }, [=]() {
989 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
990 __HIP_MEMORY_SCOPE_AGENT);
993 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
999int atomicAnd_system(
int* address,
int val) {
1000#if defined(__gfx941__)
1001 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1002 address, val, [](
int& x,
int y) { x &= y; }, [=]() {
1003 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1004 __HIP_MEMORY_SCOPE_SYSTEM);
1007 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1013unsigned int atomicAnd(
unsigned int* address,
unsigned int val) {
1014#if defined(__gfx941__)
1015 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1016 address, val, [](
unsigned int& x,
unsigned int y) { x &= y; }, [=]() {
1017 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1018 __HIP_MEMORY_SCOPE_AGENT);
1021 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1027unsigned int atomicAnd_system(
unsigned int* address,
unsigned int val) {
1028#if defined(__gfx941__)
1029 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1030 address, val, [](
unsigned int& x,
unsigned int y) { x &= y; }, [=]() {
1031 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1032 __HIP_MEMORY_SCOPE_SYSTEM);
1035 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1041unsigned long atomicAnd(
unsigned long* address,
unsigned long val) {
1042#if defined(__gfx941__)
1043 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1044 address, val, [](
unsigned long& x,
unsigned long y) { x &= y; }, [=]() {
1045 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1046 __HIP_MEMORY_SCOPE_AGENT);
1049 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1055unsigned long atomicAnd_system(
unsigned long* address,
unsigned long val) {
1056#if defined(__gfx941__)
1057 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1058 address, val, [](
unsigned long& x,
unsigned long y) { x &= y; }, [=]() {
1059 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1060 __HIP_MEMORY_SCOPE_SYSTEM);
1063 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1069unsigned long long atomicAnd(
unsigned long long* address,
unsigned long long val) {
1070#if defined(__gfx941__)
1071 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1074 [](
unsigned long long& x,
unsigned long long y) { x &= y; },
1076 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1077 __HIP_MEMORY_SCOPE_AGENT);
1080 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1086unsigned long long atomicAnd_system(
unsigned long long* address,
unsigned long long val) {
1087#if defined(__gfx941__)
1088 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1091 [](
unsigned long long& x,
unsigned long long y) { x &= y; },
1093 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1094 __HIP_MEMORY_SCOPE_SYSTEM);
1097 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1103int atomicOr(
int* address,
int val) {
1104#if defined(__gfx941__)
1105 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1106 address, val, [](
int& x,
int y) { x |= y; }, [=]() {
1107 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1108 __HIP_MEMORY_SCOPE_AGENT);
1111 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1117int atomicOr_system(
int* address,
int val) {
1118#if defined(__gfx941__)
1119 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1120 address, val, [](
int& x,
int y) { x |= y; }, [=]() {
1121 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1122 __HIP_MEMORY_SCOPE_SYSTEM);
1125 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1131unsigned int atomicOr(
unsigned int* address,
unsigned int val) {
1132#if defined(__gfx941__)
1133 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1134 address, val, [](
unsigned int& x,
unsigned int y) { x |= y; }, [=]() {
1135 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1136 __HIP_MEMORY_SCOPE_AGENT);
1139 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1145unsigned int atomicOr_system(
unsigned int* address,
unsigned int val) {
1146#if defined(__gfx941__)
1147 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1148 address, val, [](
unsigned int& x,
unsigned int y) { x |= y; }, [=]() {
1149 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1150 __HIP_MEMORY_SCOPE_SYSTEM);
1153 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1159unsigned long atomicOr(
unsigned long* address,
unsigned long val) {
1160#if defined(__gfx941__)
1161 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1162 address, val, [](
unsigned long& x,
unsigned long y) { x |= y; }, [=]() {
1163 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1164 __HIP_MEMORY_SCOPE_AGENT);
1167 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1173unsigned long atomicOr_system(
unsigned long* address,
unsigned long val) {
1174#if defined(__gfx941__)
1175 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1176 address, val, [](
unsigned long& x,
unsigned long y) { x |= y; }, [=]() {
1177 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1178 __HIP_MEMORY_SCOPE_SYSTEM);
1181 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1187unsigned long long atomicOr(
unsigned long long* address,
unsigned long long val) {
1188#if defined(__gfx941__)
1189 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1192 [](
unsigned long long& x,
unsigned long long y) { x |= y; },
1194 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1195 __HIP_MEMORY_SCOPE_AGENT);
1198 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1204unsigned long long atomicOr_system(
unsigned long long* address,
unsigned long long val) {
1205#if defined(__gfx941__)
1206 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1209 [](
unsigned long long& x,
unsigned long long y) { x |= y; },
1211 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1212 __HIP_MEMORY_SCOPE_SYSTEM);
1215 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1221int atomicXor(
int* address,
int val) {
1222#if defined(__gfx941__)
1223 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1224 address, val, [](
int& x,
int y) { x ^= y; }, [=]() {
1225 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1226 __HIP_MEMORY_SCOPE_AGENT);
1229 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1235int atomicXor_system(
int* address,
int val) {
1236#if defined(__gfx941__)
1237 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1238 address, val, [](
int& x,
int y) { x ^= y; }, [=]() {
1239 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1240 __HIP_MEMORY_SCOPE_SYSTEM);
1243 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1249unsigned int atomicXor(
unsigned int* address,
unsigned int val) {
1250#if defined(__gfx941__)
1251 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1252 address, val, [](
unsigned int& x,
unsigned int y) { x ^= y; }, [=]() {
1253 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1254 __HIP_MEMORY_SCOPE_AGENT);
1257 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1263unsigned int atomicXor_system(
unsigned int* address,
unsigned int val) {
1264#if defined(__gfx941__)
1265 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1266 address, val, [](
unsigned int& x,
unsigned int y) { x ^= y; }, [=]() {
1267 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1268 __HIP_MEMORY_SCOPE_SYSTEM);
1271 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1277unsigned long atomicXor(
unsigned long* address,
unsigned long val) {
1278#if defined(__gfx941__)
1279 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1280 address, val, [](
unsigned long& x,
unsigned long y) { x ^= y; }, [=]() {
1281 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1282 __HIP_MEMORY_SCOPE_AGENT);
1285 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1291unsigned long atomicXor_system(
unsigned long* address,
unsigned long val) {
1292#if defined(__gfx941__)
1293 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1294 address, val, [](
unsigned long& x,
unsigned long y) { x ^= y; }, [=]() {
1295 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1296 __HIP_MEMORY_SCOPE_SYSTEM);
1299 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1305unsigned long long atomicXor(
unsigned long long* address,
unsigned long long val) {
1306#if defined(__gfx941__)
1307 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1310 [](
unsigned long long& x,
unsigned long long y) { x ^= y; },
1312 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1313 __HIP_MEMORY_SCOPE_AGENT);
1316 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1322unsigned long long atomicXor_system(
unsigned long long* address,
unsigned long long val) {
1323 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1330int atomicCAS(
int* address,
int compare,
int val)
1332 __atomic_compare_exchange_n(
1333 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1339unsigned int atomicCAS(
1340 unsigned int* address,
unsigned int compare,
unsigned int val)
1342 __atomic_compare_exchange_n(
1343 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1349unsigned long long atomicCAS(
1350 unsigned long long* address,
1351 unsigned long long compare,
1352 unsigned long long val)
1354 __atomic_compare_exchange_n(
1355 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1362int atomicAdd(
int* address,
int val)
1364 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1368unsigned int atomicAdd(
unsigned int* address,
unsigned int val)
1370 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1374unsigned long long atomicAdd(
1375 unsigned long long* address,
unsigned long long val)
1377 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1381float atomicAdd(
float* address,
float val)
1383#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1384 return unsafeAtomicAdd(address, val);
1386 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1390#if !defined(__HIPCC_RTC__)
1391DEPRECATED(
"use atomicAdd instead")
1395void atomicAddNoRet(
float* address,
float val)
1397 __ockl_atomic_add_noret_f32(address, val);
1402double atomicAdd(
double* address,
double val)
1404#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1405 return unsafeAtomicAdd(address, val);
1407 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1413int atomicSub(
int* address,
int val)
1415 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1419unsigned int atomicSub(
unsigned int* address,
unsigned int val)
1421 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1426int atomicExch(
int* address,
int val)
1428 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1432unsigned int atomicExch(
unsigned int* address,
unsigned int val)
1434 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1438unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val)
1440 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1444float atomicExch(
float* address,
float val)
1446 return __uint_as_float(__atomic_exchange_n(
1447 reinterpret_cast<unsigned int*
>(address),
1448 __float_as_uint(val),
1454int atomicMin(
int* address,
int val)
1456 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1460unsigned int atomicMin(
unsigned int* address,
unsigned int val)
1462 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1466unsigned long long atomicMin(
1467 unsigned long long* address,
unsigned long long val)
1469 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1471 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1473 if (tmp1 != tmp) { tmp = tmp1;
continue; }
1475 tmp = atomicCAS(address, tmp, val);
1480__device__
inline long long atomicMin(
long long* address,
long long val) {
1481 long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1483 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1490 tmp = atomicCAS(address, tmp, val);
1497int atomicMax(
int* address,
int val)
1499 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1503unsigned int atomicMax(
unsigned int* address,
unsigned int val)
1505 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1509unsigned long long atomicMax(
1510 unsigned long long* address,
unsigned long long val)
1512 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1514 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1516 if (tmp1 != tmp) { tmp = tmp1;
continue; }
1518 tmp = atomicCAS(address, tmp, val);
1523__device__
inline long long atomicMax(
long long* address,
long long val) {
1524 long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1526 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1533 tmp = atomicCAS(address, tmp, val);
1540unsigned int atomicInc(
unsigned int* address,
unsigned int val)
1542 return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED,
"agent");
1547unsigned int atomicDec(
unsigned int* address,
unsigned int val)
1549 return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED,
"agent");
1554int atomicAnd(
int* address,
int val)
1556 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1560unsigned int atomicAnd(
unsigned int* address,
unsigned int val)
1562 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1566unsigned long long atomicAnd(
1567 unsigned long long* address,
unsigned long long val)
1569 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1574int atomicOr(
int* address,
int val)
1576 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1580unsigned int atomicOr(
unsigned int* address,
unsigned int val)
1582 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1586unsigned long long atomicOr(
1587 unsigned long long* address,
unsigned long long val)
1589 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1594int atomicXor(
int* address,
int val)
1596 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1600unsigned int atomicXor(
unsigned int* address,
unsigned int val)
1602 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1606unsigned long long atomicXor(
1607 unsigned long long* address,
unsigned long long val)
1609 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);