// -*- C++ -*-
//===--------------------------- atomic -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA_STD_ATOMIC
#define _CUDA_STD_ATOMIC

// clang-format off

#include <cuda/std/detail/__config>

#ifdef _LIBCUDACXX_HAS_NO_THREADS
# error <atomic> is not supported on this single threaded system
#endif
#ifdef _LIBCUDACXX_HAS_NO_ATOMIC_HEADER
# error <atomic> is not implemented
#endif
#ifdef _LIBCUDACXX_UNSUPPORTED_THREAD_API
# error "<atomic> is not supported on this system"
#endif
#ifdef kill_dependency
# error C++ standard library is incompatible with <stdatomic.h>
#endif

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
#  pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
#  pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
#  pragma system_header
#endif // no system header

#include <cuda/std/__atomic/platform.h>
#include <cuda/std/__atomic/order.h>
#include <cuda/std/__atomic/scopes.h>
#include <cuda/std/__atomic/wait/polling.h>
#include <cuda/std/__atomic/wait/notify_wait.h>
#include <cuda/std/__atomic/api/owned.h>
#include <cuda/std/__atomic/api/reference.h>

#include <cuda/std/__type_traits/enable_if.h>
#include <cuda/std/__type_traits/is_floating_point.h>
#include <cuda/std/__type_traits/is_integral.h>
#include <cuda/std/__type_traits/is_same.h>

// clang-format on

_CCCL_PUSH_MACROS

_LIBCUDACXX_BEGIN_NAMESPACE_STD

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp kill_dependency(_Tp __y) noexcept
{
  return __y;
}

// atomic<T>
template <class _Tp>
struct atomic : public __atomic_impl<_Tp>
{
  using value_type = _Tp;

  _LIBCUDACXX_HIDE_FROM_ABI constexpr atomic() noexcept
      : __atomic_impl<_Tp>()
  {}

  _LIBCUDACXX_HIDE_FROM_ABI constexpr atomic(_Tp __d) noexcept
      : __atomic_impl<_Tp>(__d)
  {}

  atomic(const atomic&)                     = delete;
  atomic& operator=(const atomic&)          = delete;
  atomic& operator=(const atomic&) volatile = delete;

  _LIBCUDACXX_HIDE_FROM_ABI _Tp operator=(_Tp __d) volatile noexcept
  {
    this->store(__d);
    return __d;
  }
  _LIBCUDACXX_HIDE_FROM_ABI _Tp operator=(_Tp __d) noexcept
  {
    this->store(__d);
    return __d;
  }
};

// atomic_ref<T>
template <class _Tp>
struct atomic_ref : public __atomic_ref_impl<_Tp>
{
  using value_type = _Tp;

  static constexpr size_t required_alignment = sizeof(_Tp);

  static constexpr bool is_always_lock_free = sizeof(_Tp) <= 8;

  _LIBCUDACXX_HIDE_FROM_ABI explicit atomic_ref(_Tp& __ref)
      : __atomic_ref_impl<_Tp>(__ref)
  {}

  _LIBCUDACXX_HIDE_FROM_ABI _Tp operator=(_Tp __v) const noexcept
  {
    this->store(__v);
    return __v;
  }

  _CCCL_HIDE_FROM_ABI atomic_ref(const atomic_ref&) noexcept = default;
  atomic_ref& operator=(const atomic_ref&)                   = delete;
  atomic_ref& operator=(const atomic_ref&) const             = delete;
};

// atomic_is_lock_free

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI bool atomic_is_lock_free(const volatile atomic<_Tp>* __o) noexcept
{
  return __o->is_lock_free();
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI bool atomic_is_lock_free(const atomic<_Tp>* __o) noexcept
{
  return __o->is_lock_free();
}

// atomic_init

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI void atomic_init(volatile atomic<_Tp>* __o, _Tp __d) noexcept
{
  __atomic_init_dispatch(&__o->__a, __d);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI void atomic_init(atomic<_Tp>* __o, _Tp __d) noexcept
{
  __atomic_init_dispatch(&__o->__a, __d);
}

// atomic_store

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI void atomic_store(volatile atomic<_Tp>* __o, _Tp __d) noexcept
{
  __o->store(__d);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI void atomic_store(atomic<_Tp>* __o, _Tp __d) noexcept
{
  __o->store(__d);
}

// atomic_store_explicit

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI void atomic_store_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept
  _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m)
{
  __o->store(__d, __m);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI void atomic_store_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept
  _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m)
{
  __o->store(__d, __m);
}

// atomic_load

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp atomic_load(const volatile atomic<_Tp>* __o) noexcept
{
  return __o->load();
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp atomic_load(const atomic<_Tp>* __o) noexcept
{
  return __o->load();
}

// atomic_load_explicit

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp atomic_load_explicit(const volatile atomic<_Tp>* __o, memory_order __m) noexcept
  _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m)
{
  return __o->load(__m);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp atomic_load_explicit(const atomic<_Tp>* __o, memory_order __m) noexcept
  _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m)
{
  return __o->load(__m);
}

// atomic_exchange

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp atomic_exchange(volatile atomic<_Tp>* __o, _Tp __d) noexcept
{
  return __o->exchange(__d);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp atomic_exchange(atomic<_Tp>* __o, _Tp __d) noexcept
{
  return __o->exchange(__d);
}

// atomic_exchange_explicit

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp atomic_exchange_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept
{
  return __o->exchange(__d, __m);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp atomic_exchange_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept
{
  return __o->exchange(__d, __m);
}

// atomic_compare_exchange_weak

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI bool atomic_compare_exchange_weak(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept
{
  return __o->compare_exchange_weak(*__e, __d);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI bool atomic_compare_exchange_weak(atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept
{
  return __o->compare_exchange_weak(*__e, __d);
}

// atomic_compare_exchange_strong

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI bool atomic_compare_exchange_strong(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept
{
  return __o->compare_exchange_strong(*__e, __d);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI bool atomic_compare_exchange_strong(atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept
{
  return __o->compare_exchange_strong(*__e, __d);
}

// atomic_compare_exchange_weak_explicit

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI bool atomic_compare_exchange_weak_explicit(
  volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept
  _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f)
{
  return __o->compare_exchange_weak(*__e, __d, __s, __f);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI bool
atomic_compare_exchange_weak_explicit(atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept
  _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f)
{
  return __o->compare_exchange_weak(*__e, __d, __s, __f);
}

// atomic_compare_exchange_strong_explicit

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI bool atomic_compare_exchange_strong_explicit(
  volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept
  _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f)
{
  return __o->compare_exchange_strong(*__e, __d, __s, __f);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI bool atomic_compare_exchange_strong_explicit(
  atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept
  _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f)
{
  return __o->compare_exchange_strong(*__e, __d, __s, __f);
}

// atomic_wait

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI void
atomic_wait(const volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v) noexcept
{
  return __o->wait(__v);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI void atomic_wait(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v) noexcept
{
  return __o->wait(__v);
}

// atomic_wait_explicit

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI void
atomic_wait_explicit(const volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v, memory_order __m) noexcept
  _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m)
{
  return __o->wait(__v, __m);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI void
atomic_wait_explicit(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v, memory_order __m) noexcept
  _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m)
{
  return __o->wait(__v, __m);
}

// atomic_notify_one

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI void atomic_notify_one(volatile atomic<_Tp>* __o) noexcept
{
  __o->notify_one();
}
template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI void atomic_notify_one(atomic<_Tp>* __o) noexcept
{
  __o->notify_one();
}

// atomic_notify_one

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI void atomic_notify_all(volatile atomic<_Tp>* __o) noexcept
{
  __o->notify_all();
}
template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI void atomic_notify_all(atomic<_Tp>* __o) noexcept
{
  __o->notify_all();
}

// atomic_fetch_add

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI
enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp>
atomic_fetch_add(volatile atomic<_Tp>* __o, _Tp __op) noexcept
{
  return __o->fetch_add(__op);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI
enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp>
atomic_fetch_add(atomic<_Tp>* __o, _Tp __op) noexcept
{
  return __o->fetch_add(__op);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp* atomic_fetch_add(volatile atomic<_Tp*>* __o, ptrdiff_t __op) noexcept
{
  return __o->fetch_add(__op);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp* atomic_fetch_add(atomic<_Tp*>* __o, ptrdiff_t __op) noexcept
{
  return __o->fetch_add(__op);
}

// atomic_fetch_add_explicit

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI
enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp>
atomic_fetch_add_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept
{
  return __o->fetch_add(__op, __m);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI
enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp>
atomic_fetch_add_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept
{
  return __o->fetch_add(__op, __m);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp*
atomic_fetch_add_explicit(volatile atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept
{
  return __o->fetch_add(__op, __m);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp* atomic_fetch_add_explicit(atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept
{
  return __o->fetch_add(__op, __m);
}

// atomic_fetch_sub

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI
enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp>
atomic_fetch_sub(volatile atomic<_Tp>* __o, _Tp __op) noexcept
{
  return __o->fetch_sub(__op);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI
enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp>
atomic_fetch_sub(atomic<_Tp>* __o, _Tp __op) noexcept
{
  return __o->fetch_sub(__op);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp* atomic_fetch_sub(volatile atomic<_Tp*>* __o, ptrdiff_t __op) noexcept
{
  return __o->fetch_sub(__op);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp* atomic_fetch_sub(atomic<_Tp*>* __o, ptrdiff_t __op) noexcept
{
  return __o->fetch_sub(__op);
}

// atomic_fetch_sub_explicit

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI
enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp>
atomic_fetch_sub_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept
{
  return __o->fetch_sub(__op, __m);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI
enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp>
atomic_fetch_sub_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept
{
  return __o->fetch_sub(__op, __m);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp*
atomic_fetch_sub_explicit(volatile atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept
{
  return __o->fetch_sub(__op, __m);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI _Tp* atomic_fetch_sub_explicit(atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept
{
  return __o->fetch_sub(__op, __m);
}

// atomic_fetch_and

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI enable_if_t<is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp>
atomic_fetch_and(volatile atomic<_Tp>* __o, _Tp __op) noexcept
{
  return __o->fetch_and(__op);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI enable_if_t<is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp>
atomic_fetch_and(atomic<_Tp>* __o, _Tp __op) noexcept
{
  return __o->fetch_and(__op);
}

// atomic_fetch_and_explicit

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI enable_if_t<is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp>
atomic_fetch_and_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept
{
  return __o->fetch_and(__op, __m);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI enable_if_t<is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp>
atomic_fetch_and_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept
{
  return __o->fetch_and(__op, __m);
}

// atomic_fetch_or

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI enable_if_t<is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp>
atomic_fetch_or(volatile atomic<_Tp>* __o, _Tp __op) noexcept
{
  return __o->fetch_or(__op);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI enable_if_t<is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp>
atomic_fetch_or(atomic<_Tp>* __o, _Tp __op) noexcept
{
  return __o->fetch_or(__op);
}

// atomic_fetch_or_explicit

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI enable_if_t<is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp>
atomic_fetch_or_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept
{
  return __o->fetch_or(__op, __m);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI enable_if_t<is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp>
atomic_fetch_or_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept
{
  return __o->fetch_or(__op, __m);
}

// atomic_fetch_xor

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI enable_if_t<is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp>
atomic_fetch_xor(volatile atomic<_Tp>* __o, _Tp __op) noexcept
{
  return __o->fetch_xor(__op);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI enable_if_t<is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp>
atomic_fetch_xor(atomic<_Tp>* __o, _Tp __op) noexcept
{
  return __o->fetch_xor(__op);
}

// atomic_fetch_xor_explicit

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI enable_if_t<is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp>
atomic_fetch_xor_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept
{
  return __o->fetch_xor(__op, __m);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI enable_if_t<is_integral<_Tp>::value && !is_same<_Tp, bool>::value, _Tp>
atomic_fetch_xor_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept
{
  return __o->fetch_xor(__op, __m);
}

// flag type and operations

struct atomic_flag
{
  __atomic_storage_t<_LIBCUDACXX_ATOMIC_FLAG_TYPE> __a;

  _LIBCUDACXX_HIDE_FROM_ABI bool test(memory_order __m = memory_order_seq_cst) const volatile noexcept
  {
    return _LIBCUDACXX_ATOMIC_FLAG_TYPE(true) == __atomic_load_dispatch(&__a, __m, __thread_scope_system_tag{});
  }
  _LIBCUDACXX_HIDE_FROM_ABI bool test(memory_order __m = memory_order_seq_cst) const noexcept
  {
    return _LIBCUDACXX_ATOMIC_FLAG_TYPE(true) == __atomic_load_dispatch(&__a, __m, __thread_scope_system_tag{});
  }

  _LIBCUDACXX_HIDE_FROM_ABI bool test_and_set(memory_order __m = memory_order_seq_cst) volatile noexcept
  {
    return __atomic_exchange_dispatch(&__a, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), __m, __thread_scope_system_tag{});
  }
  _LIBCUDACXX_HIDE_FROM_ABI bool test_and_set(memory_order __m = memory_order_seq_cst) noexcept
  {
    return __atomic_exchange_dispatch(&__a, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), __m, __thread_scope_system_tag{});
  }
  _LIBCUDACXX_HIDE_FROM_ABI void clear(memory_order __m = memory_order_seq_cst) volatile noexcept
  {
    __atomic_store_dispatch(&__a, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), __m, __thread_scope_system_tag{});
  }
  _LIBCUDACXX_HIDE_FROM_ABI void clear(memory_order __m = memory_order_seq_cst) noexcept
  {
    __atomic_store_dispatch(&__a, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), __m, __thread_scope_system_tag{});
  }

  _LIBCUDACXX_HIDE_FROM_ABI void wait(_LIBCUDACXX_ATOMIC_FLAG_TYPE __v, memory_order __m = memory_order_seq_cst) const
    volatile noexcept
  {
    __atomic_wait(&__a, __v, __m, __thread_scope_system_tag{});
  }
  _LIBCUDACXX_HIDE_FROM_ABI void
  wait(_LIBCUDACXX_ATOMIC_FLAG_TYPE __v, memory_order __m = memory_order_seq_cst) const noexcept
  {
    __atomic_wait(&__a, __v, __m, __thread_scope_system_tag{});
  }
  _LIBCUDACXX_HIDE_FROM_ABI void notify_one() volatile noexcept
  {
    __atomic_notify_one(&__a, __thread_scope_system_tag{});
  }
  _LIBCUDACXX_HIDE_FROM_ABI void notify_one() noexcept
  {
    __atomic_notify_one(&__a, __thread_scope_system_tag{});
  }
  _LIBCUDACXX_HIDE_FROM_ABI void notify_all() volatile noexcept
  {
    __atomic_notify_all(&__a, __thread_scope_system_tag{});
  }
  _LIBCUDACXX_HIDE_FROM_ABI void notify_all() noexcept
  {
    __atomic_notify_all(&__a, __thread_scope_system_tag{});
  }

  _CCCL_HIDE_FROM_ABI atomic_flag() noexcept = default;

  _LIBCUDACXX_HIDE_FROM_ABI constexpr atomic_flag(bool __b) noexcept
      : __a(__b)
  {} // EXTENSION

  atomic_flag(const atomic_flag&)                     = delete;
  atomic_flag& operator=(const atomic_flag&)          = delete;
  atomic_flag& operator=(const atomic_flag&) volatile = delete;
};

_LIBCUDACXX_HIDE_FROM_ABI bool atomic_flag_test(const volatile atomic_flag* __o) noexcept
{
  return __o->test();
}

_LIBCUDACXX_HIDE_FROM_ABI bool atomic_flag_test(const atomic_flag* __o) noexcept
{
  return __o->test();
}

_LIBCUDACXX_HIDE_FROM_ABI bool atomic_flag_test_explicit(const volatile atomic_flag* __o, memory_order __m) noexcept
{
  return __o->test(__m);
}

_LIBCUDACXX_HIDE_FROM_ABI bool atomic_flag_test_explicit(const atomic_flag* __o, memory_order __m) noexcept
{
  return __o->test(__m);
}

_LIBCUDACXX_HIDE_FROM_ABI bool atomic_flag_test_and_set(volatile atomic_flag* __o) noexcept
{
  return __o->test_and_set();
}

_LIBCUDACXX_HIDE_FROM_ABI bool atomic_flag_test_and_set(atomic_flag* __o) noexcept
{
  return __o->test_and_set();
}

_LIBCUDACXX_HIDE_FROM_ABI bool atomic_flag_test_and_set_explicit(volatile atomic_flag* __o, memory_order __m) noexcept
{
  return __o->test_and_set(__m);
}

_LIBCUDACXX_HIDE_FROM_ABI bool atomic_flag_test_and_set_explicit(atomic_flag* __o, memory_order __m) noexcept
{
  return __o->test_and_set(__m);
}

_LIBCUDACXX_HIDE_FROM_ABI void atomic_flag_clear(volatile atomic_flag* __o) noexcept
{
  __o->clear();
}

_LIBCUDACXX_HIDE_FROM_ABI void atomic_flag_clear(atomic_flag* __o) noexcept
{
  __o->clear();
}

_LIBCUDACXX_HIDE_FROM_ABI void atomic_flag_clear_explicit(volatile atomic_flag* __o, memory_order __m) noexcept
{
  __o->clear(__m);
}

_LIBCUDACXX_HIDE_FROM_ABI void atomic_flag_clear_explicit(atomic_flag* __o, memory_order __m) noexcept
{
  __o->clear(__m);
}

#if !defined(__CUDA_MINIMUM_ARCH__) || __CUDA_MINIMUM_ARCH__ >= 700

_LIBCUDACXX_HIDE_FROM_ABI void atomic_flag_wait(const volatile atomic_flag* __o, bool __v) noexcept
{
  __o->wait(__v);
}

_LIBCUDACXX_HIDE_FROM_ABI void atomic_flag_wait(const atomic_flag* __o, bool __v) noexcept
{
  __o->wait(__v);
}

_LIBCUDACXX_HIDE_FROM_ABI void
atomic_flag_wait_explicit(const volatile atomic_flag* __o, bool __v, memory_order __m) noexcept
{
  __o->wait(__v, __m);
}

_LIBCUDACXX_HIDE_FROM_ABI void atomic_flag_wait_explicit(const atomic_flag* __o, bool __v, memory_order __m) noexcept
{
  __o->wait(__v, __m);
}

_LIBCUDACXX_HIDE_FROM_ABI void atomic_flag_notify_one(volatile atomic_flag* __o) noexcept
{
  __o->notify_one();
}

_LIBCUDACXX_HIDE_FROM_ABI void atomic_flag_notify_one(atomic_flag* __o) noexcept
{
  __o->notify_one();
}

_LIBCUDACXX_HIDE_FROM_ABI void atomic_flag_notify_all(volatile atomic_flag* __o) noexcept
{
  __o->notify_all();
}

_LIBCUDACXX_HIDE_FROM_ABI void atomic_flag_notify_all(atomic_flag* __o) noexcept
{
  __o->notify_all();
}

#endif

// fences

_LIBCUDACXX_HIDE_FROM_ABI void atomic_thread_fence(memory_order __m) noexcept
{
  __atomic_thread_fence_dispatch(__m);
}

_LIBCUDACXX_HIDE_FROM_ABI void atomic_signal_fence(memory_order __m) noexcept
{
  __atomic_signal_fence_dispatch(__m);
}

// Atomics for standard typedef types

using atomic_bool     = atomic<bool>;
using atomic_char     = atomic<char>;
using atomic_schar    = atomic<signed char>;
using atomic_uchar    = atomic<unsigned char>;
using atomic_short    = atomic<short>;
using atomic_ushort   = atomic<unsigned short>;
using atomic_int      = atomic<int>;
using atomic_uint     = atomic<unsigned int>;
using atomic_long     = atomic<long>;
using atomic_ulong    = atomic<unsigned long>;
using atomic_llong    = atomic<long long>;
using atomic_ullong   = atomic<unsigned long long>;
using atomic_char16_t = atomic<char16_t>;
using atomic_char32_t = atomic<char32_t>;
using atomic_wchar_t  = atomic<wchar_t>;

using atomic_int_least8_t   = atomic<int_least8_t>;
using atomic_uint_least8_t  = atomic<uint_least8_t>;
using atomic_int_least16_t  = atomic<int_least16_t>;
using atomic_uint_least16_t = atomic<uint_least16_t>;
using atomic_int_least32_t  = atomic<int_least32_t>;
using atomic_uint_least32_t = atomic<uint_least32_t>;
using atomic_int_least64_t  = atomic<int_least64_t>;
using atomic_uint_least64_t = atomic<uint_least64_t>;

using atomic_int_fast8_t   = atomic<int_fast8_t>;
using atomic_uint_fast8_t  = atomic<uint_fast8_t>;
using atomic_int_fast16_t  = atomic<int_fast16_t>;
using atomic_uint_fast16_t = atomic<uint_fast16_t>;
using atomic_int_fast32_t  = atomic<int_fast32_t>;
using atomic_uint_fast32_t = atomic<uint_fast32_t>;
using atomic_int_fast64_t  = atomic<int_fast64_t>;
using atomic_uint_fast64_t = atomic<uint_fast64_t>;

using atomic_int8_t   = atomic<int8_t>;
using atomic_uint8_t  = atomic<uint8_t>;
using atomic_int16_t  = atomic<int16_t>;
using atomic_uint16_t = atomic<uint16_t>;
using atomic_int32_t  = atomic<int32_t>;
using atomic_uint32_t = atomic<uint32_t>;
using atomic_int64_t  = atomic<int64_t>;
using atomic_uint64_t = atomic<uint64_t>;

using atomic_intptr_t  = atomic<intptr_t>;
using atomic_uintptr_t = atomic<uintptr_t>;
using atomic_size_t    = atomic<size_t>;
using atomic_ptrdiff_t = atomic<ptrdiff_t>;
using atomic_intmax_t  = atomic<intmax_t>;
using atomic_uintmax_t = atomic<uintmax_t>;

static_assert(LIBCUDACXX_ATOMIC_INT_LOCK_FREE, "This library assumes atomic<int> is lock-free.");

using atomic_signed_lock_free   = atomic<int>;
using atomic_unsigned_lock_free = atomic<unsigned>;

#define LIBCUDACXX_ATOMIC_FLAG_INIT     {false}
#define LIBCUDACXX_ATOMIC_VAR_INIT(__v) {__v}

_LIBCUDACXX_END_NAMESPACE_STD

_CCCL_POP_MACROS

#endif // _CUDA_STD_ATOMIC
