SKALE-4586 Added Thread Pool

f457b201 · kladko · ecde0240 · f457b201 · f457b201 · f457b201
Unverified Commit f457b201 authored Sep 09, 2021 by kladko
Showing with 1018 additions and 996 deletions

atomicops.h third_party/atomicops.h +385 -362

readerwriterqueue.h third_party/readerwriterqueue.h +617 -630

ZMQServer.cpp zmq_src/ZMQServer.cpp +12 -2

ZMQServer.h zmq_src/ZMQServer.h +4 -2

No files found.
--- a/third_party/atomicops.h
+++ b/third_party/atomicops.h
@@ -206,24 +206,48 @@ namespace moodycamel {
    AE_FORCEINLINE void compiler_fence(memory_order order) AE_NO_TSAN
    {
        switch (order) {
-            case memory_order_relaxed: break;
+            case memory_order_relaxed:
-            case memory_order_acquire: std::atomic_signal_fence(std::memory_order_acquire); break;
+                break;
-            case memory_order_release: std::atomic_signal_fence(std::memory_order_release); break;
+            case memory_order_acquire:
-            case memory_order_acq_rel: std::atomic_signal_fence(std::memory_order_acq_rel); break;
+                std::atomic_signal_fence(std::memory_order_acquire);
-            case memory_order_seq_cst: std::atomic_signal_fence(std::memory_order_seq_cst); break;
+                break;
-            default: assert(false);
+            case memory_order_release:
+                std::atomic_signal_fence(std::memory_order_release);
+                break;
+            case memory_order_acq_rel:
+                std::atomic_signal_fence(std::memory_order_acq_rel);
+                break;
+            case memory_order_seq_cst:
+                std::atomic_signal_fence(std::memory_order_seq_cst);
+                break;
+            default:
+                assert(false);
        }
    }
    AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN
    {
        switch (order) {
-            case memory_order_relaxed: break;
+            case memory_order_relaxed:
-            case memory_order_acquire: AE_TSAN_ANNOTATE_ACQUIRE(); std::atomic_thread_fence(std::memory_order_acquire); break;
+                break;
-            case memory_order_release: AE_TSAN_ANNOTATE_RELEASE(); std::atomic_thread_fence(std::memory_order_release); break;
+            case memory_order_acquire:
-            case memory_order_acq_rel: AE_TSAN_ANNOTATE_ACQUIRE(); AE_TSAN_ANNOTATE_RELEASE(); std::atomic_thread_fence(std::memory_order_acq_rel); break;
+                AE_TSAN_ANNOTATE_ACQUIRE();
-            case memory_order_seq_cst: AE_TSAN_ANNOTATE_ACQUIRE(); AE_TSAN_ANNOTATE_RELEASE(); std::atomic_thread_fence(std::memory_order_seq_cst); break;
+                std::atomic_thread_fence(std::memory_order_acquire);
-            default: assert(false);
+                break;
+            case memory_order_release:
+                AE_TSAN_ANNOTATE_RELEASE();
+                std::atomic_thread_fence(std::memory_order_release);
+                break;
+            case memory_order_acq_rel:
+                AE_TSAN_ANNOTATE_ACQUIRE(); AE_TSAN_ANNOTATE_RELEASE();
+                std::atomic_thread_fence(std::memory_order_acq_rel);
+                break;
+            case memory_order_seq_cst:
+                AE_TSAN_ANNOTATE_ACQUIRE(); AE_TSAN_ANNOTATE_RELEASE();
+                std::atomic_thread_fence(std::memory_order_seq_cst);
+                break;
+            default:
+                assert(false);
        }
    }
@@ -237,8 +261,11 @@ namespace moodycamel {
 #endif
 #ifdef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
 #include <atomic>
 #endif
 #include <utility>
 // WARNING: *NOT* A REPLACEMENT FOR std::atomic. READ CAREFULLY:
@@ -247,21 +274,26 @@ namespace moodycamel {
 // at the hardware level -- on most platforms this generally means aligned pointers and integers (only).
 namespace moodycamel {
    template<typename T>
-    class weak_atomic
+    class weak_atomic {
-    {
    public:
-        AE_NO_TSAN weak_atomic() : value() { }
+        AE_NO_TSAN weak_atomic() : value() {}
 #ifdef AE_VCPP
-        #pragma warning(push)
+#pragma warning(push)
 #pragma warning(disable: 4100)		// Get rid of (erroneous) 'unreferenced formal parameter' warning
 #endif
-        template<typename U> AE_NO_TSAN weak_atomic(U&& x) : value(std::forward<U>(x)) {  }
+        template<typename U>
+        AE_NO_TSAN weak_atomic(U &&x) : value(std::forward<U>(x)) {}
 #ifdef __cplusplus_cli
        // Work around bug with universal reference/nullptr combination that only appears when /clr is on
    AE_NO_TSAN weak_atomic(nullptr_t) : value(nullptr) {  }
 #endif
-        AE_NO_TSAN weak_atomic(weak_atomic const& other) : value(other.load()) {  }
+        AE_NO_TSAN weak_atomic(weak_atomic const &other) : value(other.load()) {}
-        AE_NO_TSAN weak_atomic(weak_atomic&& other) : value(std::move(other.load())) {  }
+        AE_NO_TSAN weak_atomic(weak_atomic &&other) : value(std::move(other.load())) {}
 #ifdef AE_VCPP
 #pragma warning(pop)
 #endif
@@ -303,14 +335,14 @@ namespace moodycamel {
        return value;
    }
 #else
        template<typename U>
-        AE_FORCEINLINE weak_atomic const& operator=(U&& x) AE_NO_TSAN
+        AE_FORCEINLINE weak_atomic const &operator=(U &&x) AE_NO_TSAN {
-        {
            value.store(std::forward<U>(x), std::memory_order_relaxed);
            return *this;
        }
-        AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) AE_NO_TSAN
+        AE_FORCEINLINE weak_atomic const &operator=(weak_atomic const &other) AE_NO_TSAN
        {
            value.store(other.value.load(std::memory_order_relaxed), std::memory_order_relaxed);
            return *this;
@@ -327,6 +359,7 @@ namespace moodycamel {
        {
            return value.fetch_add(increment, std::memory_order_release);
        }
 #endif
@@ -336,7 +369,7 @@ namespace moodycamel {
    // `volatile` will make memory access slow, but is guaranteed to be reliable.
    volatile T value;
 #else
-        std::atomic<T> value;
+        std::atomic <T> value;
 #endif
    };
@@ -369,8 +402,7 @@ extern "C" {
 #include <task.h>
 #endif
-namespace moodycamel
+namespace moodycamel {
-{
    // Code in the spsc_sema namespace below is an adaptation of Jeff Preshing's
    // portable + lightweight semaphore implementations, originally from
    // https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
@@ -392,8 +424,7 @@ namespace moodycamel
    // 2. Altered source versions must be plainly marked as such, and must not be
    //    misrepresented as being the original software.
    // 3. This notice may not be removed or altered from any source distribution.
-    namespace spsc_sema
+    namespace spsc_sema {
-    {
 #if defined(_WIN32)
        class Semaphore
        {
@@ -655,8 +686,7 @@ namespace moodycamel
        //---------------------------------------------------------
        // LightweightSemaphore
        //---------------------------------------------------------
-        class LightweightSemaphore
+        class LightweightSemaphore {
-        {
        public:
            typedef std::make_signed<std::size_t>::type ssize_t;
@@ -671,10 +701,8 @@ namespace moodycamel
                // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
                // as threads start hitting the kernel semaphore.
                int spin = 1024;
-                while (--spin >= 0)
+                while (--spin >= 0) {
-                {
+                    if (m_count.load() > 0) {
-                    if (m_count.load() > 0)
-                    {
                        m_count.fetch_add_acquire(-1);
                        return true;
                    }
@@ -683,8 +711,7 @@ namespace moodycamel
                oldCount = m_count.fetch_add_acquire(-1);
                if (oldCount > 0)
                    return true;
-                if (timeout_usecs < 0)
+                if (timeout_usecs < 0) {
-                {
                    if (m_sema.wait())
                        return true;
                }
@@ -695,8 +722,7 @@ namespace moodycamel
                // it. So we have to re-adjust the count, but only if the semaphore
                // wasn't signaled enough times for us too since then. If it was, we
                // need to release the semaphore too.
-                while (true)
+                while (true) {
-                {
                    oldCount = m_count.fetch_add_release(1);
                    if (oldCount < 0)
                        return false;    // successfully restored things to the way they were
@@ -708,15 +734,13 @@ namespace moodycamel
            }
        public:
-            AE_NO_TSAN LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount), m_sema()
+            AE_NO_TSAN LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount), m_sema() {
-            {
                assert(initialCount >= 0);
            }
            bool tryWait() AE_NO_TSAN
            {
-                if (m_count.load() > 0)
+                if (m_count.load() > 0) {
-                {
                    m_count.fetch_add_acquire(-1);
                    return true;
                }
@@ -738,8 +762,7 @@ namespace moodycamel
                assert(count >= 0);
                ssize_t oldCount = m_count.fetch_add_release(count);
                assert(oldCount >= -1);
-                if (oldCount < 0)
+                if (oldCount < 0) {
-                {
                    m_sema.signal(1);
                }
            }

--- a/third_party/readerwriterqueue.h
+++ b/third_party/readerwriterqueue.h
@@ -14,6 +14,7 @@
 #include <cstdint>
 #include <cstdlib>        // For malloc/free/abort & size_t
 #include <memory>
 #if __cplusplus > 199711L || _MSC_VER >= 1700 // C++11 or VS2012
 #include <chrono>
 #endif
@@ -73,8 +74,7 @@
 namespace moodycamel {
    template<typename T, size_t MAX_BLOCK_SIZE = 512>
-    class MOODYCAMEL_MAYBE_ALIGN_TO_CACHELINE ReaderWriterQueue
+    class MOODYCAMEL_MAYBE_ALIGN_TO_CACHELINE ReaderWriterQueue {
-{
        // Design: Based on a queue-of-queues. The low-level queues are just
        // circular buffers with front and tail indices indicating where the
        // next element to dequeue is and where the next element can be enqueued,
@@ -104,14 +104,13 @@ namespace moodycamel {
        // at least one extra buffer block).
        AE_NO_TSAN explicit ReaderWriterQueue(size_t size = 15)
 #ifndef NDEBUG
-    : enqueuing(false)
+                : enqueuing(false), dequeuing(false)
-    ,dequeuing(false)
 #endif
-{
+        {
            assert(MAX_BLOCK_SIZE == ceilToPow2(MAX_BLOCK_SIZE) && "MAX_BLOCK_SIZE must be a power of 2");
            assert(MAX_BLOCK_SIZE >= 2 && "MAX_BLOCK_SIZE must be at least 2");
-    Block* firstBlock = nullptr;
+            Block *firstBlock = nullptr;
            largestBlockSize = ceilToPow2(size + 1);        // We need a spare slot to fit size elements in the block
            if (largestBlockSize > MAX_BLOCK_SIZE * 2) {
@@ -122,7 +121,7 @@ namespace moodycamel {
                // number of blocks - 1. Solving for size and applying a ceiling to the division gives us (after simplifying):
                size_t initialBlockCount = (size + MAX_BLOCK_SIZE * 2 - 3) / (MAX_BLOCK_SIZE - 1);
                largestBlockSize = MAX_BLOCK_SIZE;
-    Block* lastBlock = nullptr;
+                Block *lastBlock = nullptr;
                for (size_t i = 0; i != initialBlockCount; ++i) {
                    auto block = make_block(largestBlockSize);
                    if (block == nullptr) {
@@ -131,93 +130,89 @@ namespace moodycamel {
 #else
                        abort();
 #endif
-}
+                    }
-if (firstBlock == nullptr) {
+                    if (firstBlock == nullptr) {
-firstBlock = block;
+                        firstBlock = block;
-}
+                    } else {
-else {
+                        lastBlock->next = block;
-lastBlock->next = block;
+                    }
-}
+                    lastBlock = block;
-lastBlock = block;
+                    block->next = firstBlock;
-block->next = firstBlock;
+                }
-}
+            } else {
-}
+                firstBlock = make_block(largestBlockSize);
-else {
+                if (firstBlock == nullptr) {
-firstBlock = make_block(largestBlockSize);
-if (firstBlock == nullptr) {
 #ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
-throw std::bad_alloc();
+                    throw std::bad_alloc();
 #else
-abort();
+                    abort();
 #endif
-}
+                }
-firstBlock->next = firstBlock;
+                firstBlock->next = firstBlock;
-}
+            }
-frontBlock = firstBlock;
+            frontBlock = firstBlock;
-tailBlock = firstBlock;
+            tailBlock = firstBlock;
 // Make sure the reader/writer threads will have the initialized memory setup above:
-fence(memory_order_sync);
+            fence(memory_order_sync);
-}
+        }
 // Note: The queue should not be accessed concurrently while it's
 // being moved. It's up to the user to synchronize this.
-AE_NO_TSAN ReaderWriterQueue(ReaderWriterQueue&& other)
+        AE_NO_TSAN ReaderWriterQueue(ReaderWriterQueue &&other)
-: frontBlock(other.frontBlock.load()),
+                : frontBlock(other.frontBlock.load()),
-tailBlock(other.tailBlock.load()),
+                  tailBlock(other.tailBlock.load()),
-largestBlockSize(other.largestBlockSize)
+                  largestBlockSize(other.largestBlockSize)
 #ifndef NDEBUG
-,enqueuing(false)
+                , enqueuing(false), dequeuing(false)
-,dequeuing(false)
 #endif
-{
+        {
-other.largestBlockSize = 32;
+            other.largestBlockSize = 32;
-Block* b = other.make_block(other.largestBlockSize);
+            Block *b = other.make_block(other.largestBlockSize);
-if (b == nullptr) {
+            if (b == nullptr) {
 #ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
-throw std::bad_alloc();
+                throw std::bad_alloc();
 #else
-abort();
+                abort();
 #endif
-}
+            }
-b->next = b;
+            b->next = b;
-other.frontBlock = b;
+            other.frontBlock = b;
-other.tailBlock = b;
+            other.tailBlock = b;
-}
+        }
 // Note: The queue should not be accessed concurrently while it's
 // being moved. It's up to the user to synchronize this.
-ReaderWriterQueue& operator=(ReaderWriterQueue&& other) AE_NO_TSAN
+        ReaderWriterQueue &operator=(ReaderWriterQueue &&other) AE_NO_TSAN
-{
+        {
-Block* b = frontBlock.load();
+            Block *b = frontBlock.load();
-frontBlock = other.frontBlock.load();
+            frontBlock = other.frontBlock.load();
-other.frontBlock = b;
+            other.frontBlock = b;
-b = tailBlock.load();
+            b = tailBlock.load();
-tailBlock = other.tailBlock.load();
+            tailBlock = other.tailBlock.load();
-other.tailBlock = b;
+            other.tailBlock = b;
-std::swap(largestBlockSize, other.largestBlockSize);
+            std::swap(largestBlockSize, other.largestBlockSize);
-return *this;
+            return *this;
-}
+        }
 // Note: The queue should not be accessed concurrently while it's
 // being deleted. It's up to the user to synchronize this.
-AE_NO_TSAN ~ReaderWriterQueue()
+        AE_NO_TSAN ~ReaderWriterQueue() {
-{
            // Make sure we get the latest version of all variables from other CPUs:
            fence(memory_order_sync);
            // Destroy any remaining objects in queue and free memory
-    Block* frontBlock_ = frontBlock;
+            Block *frontBlock_ = frontBlock;
-    Block* block = frontBlock_;
+            Block *block = frontBlock_;
            do {
-        Block* nextBlock = block->next;
+                Block *nextBlock = block->next;
                size_t blockFront = block->front;
                size_t blockTail = block->tail;
                for (size_t i = blockFront; i != blockTail; i = (i + 1) & block->sizeMask) {
-            auto element = reinterpret_cast<T*>(block->data + i * sizeof(T));
+                    auto element = reinterpret_cast<T *>(block->data + i * sizeof(T));
                    element->~T();
-            (void)element;
+                    (void) element;
                }
                auto rawBlock = block->rawThis;
@@ -225,67 +220,68 @@ AE_NO_TSAN ~ReaderWriterQueue()
                std::free(rawBlock);
                block = nextBlock;
            } while (block != frontBlock_);
-}
+        }
 // Enqueues a copy of element if there is room in the queue.
 // Returns true if the element was enqueued, false otherwise.
 // Does not allocate memory.
-AE_FORCEINLINE bool try_enqueue(T const& element) AE_NO_TSAN
+        AE_FORCEINLINE bool try_enqueue(T const &element) AE_NO_TSAN
-{
+        {
-return inner_enqueue<CannotAlloc>(element);
+            return inner_enqueue<CannotAlloc>(element);
-}
+        }
 // Enqueues a moved copy of element if there is room in the queue.
 // Returns true if the element was enqueued, false otherwise.
 // Does not allocate memory.
-AE_FORCEINLINE bool try_enqueue(T&& element) AE_NO_TSAN
+        AE_FORCEINLINE bool try_enqueue(T &&element) AE_NO_TSAN
-{
+        {
-return inner_enqueue<CannotAlloc>(std::forward<T>(element));
+            return inner_enqueue<CannotAlloc>(std::forward<T>(element));
-}
+        }
 #if MOODYCAMEL_HAS_EMPLACE
 // Like try_enqueue() but with emplace semantics (i.e. construct-in-place).
-template<typename... Args>
+        template<typename... Args>
-AE_FORCEINLINE bool try_emplace(Args&&... args) AE_NO_TSAN
+        AE_FORCEINLINE bool try_emplace(Args &&... args) AE_NO_TSAN {
-{
+            return inner_enqueue<CannotAlloc>(std::forward<Args>(args)...);
-return inner_enqueue<CannotAlloc>(std::forward<Args>(args)...);
+        }
-}
 #endif
 // Enqueues a copy of element on the queue.
 // Allocates an additional block of memory if needed.
 // Only fails (returns false) if memory allocation fails.
-AE_FORCEINLINE bool enqueue(T const& element) AE_NO_TSAN
+        AE_FORCEINLINE bool enqueue(T const &element) AE_NO_TSAN
-{
+        {
-return inner_enqueue<CanAlloc>(element);
+            return inner_enqueue<CanAlloc>(element);
-}
+        }
 // Enqueues a moved copy of element on the queue.
 // Allocates an additional block of memory if needed.
 // Only fails (returns false) if memory allocation fails.
-AE_FORCEINLINE bool enqueue(T&& element) AE_NO_TSAN
+        AE_FORCEINLINE bool enqueue(T &&element) AE_NO_TSAN
-{
+        {
-return inner_enqueue<CanAlloc>(std::forward<T>(element));
+            return inner_enqueue<CanAlloc>(std::forward<T>(element));
-}
+        }
 #if MOODYCAMEL_HAS_EMPLACE
 // Like enqueue() but with emplace semantics (i.e. construct-in-place).
-template<typename... Args>
+        template<typename... Args>
-AE_FORCEINLINE bool emplace(Args&&... args) AE_NO_TSAN
+        AE_FORCEINLINE bool emplace(Args &&... args) AE_NO_TSAN {
-{
+            return inner_enqueue<CanAlloc>(std::forward<Args>(args)...);
-return inner_enqueue<CanAlloc>(std::forward<Args>(args)...);
+        }
-}
 #endif
 // Attempts to dequeue an element; if the queue is empty,
 // returns false instead. If the queue has at least one element,
 // moves front to result using operator=, then returns true.
-template<typename U>
+        template<typename U>
-bool try_dequeue(U& result) AE_NO_TSAN
+        bool try_dequeue(U &result) AE_NO_TSAN {
-{
 #ifndef NDEBUG
-ReentrantGuard guard(this->dequeuing);
+            ReentrantGuard guard(this->dequeuing);
 #endif
 // High-level pseudocode:
@@ -305,75 +301,73 @@ ReentrantGuard guard(this->dequeuing);
 // then re-read the front block and check if it's not empty again, then check if the tail
 // block has advanced.
-Block* frontBlock_ = frontBlock.load();
+            Block *frontBlock_ = frontBlock.load();
-size_t blockTail = frontBlock_->localTail;
+            size_t blockTail = frontBlock_->localTail;
-size_t blockFront = frontBlock_->front.load();
+            size_t blockFront = frontBlock_->front.load();
-if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
+            if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
-fence(memory_order_acquire);
+                fence(memory_order_acquire);
-non_empty_front_block:
+                non_empty_front_block:
 // Front block not empty, dequeue from here
-auto element = reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
+                auto element = reinterpret_cast<T *>(frontBlock_->data + blockFront * sizeof(T));
-result = std::move(*element);
+                result = std::move(*element);
-element->~T();
+                element->~T();
-blockFront = (blockFront + 1) & frontBlock_->sizeMask;
+                blockFront = (blockFront + 1) & frontBlock_->sizeMask;
-fence(memory_order_release);
+                fence(memory_order_release);
-frontBlock_->front = blockFront;
+                frontBlock_->front = blockFront;
-}
+            } else if (frontBlock_ != tailBlock.load()) {
-else if (frontBlock_ != tailBlock.load()) {
+                fence(memory_order_acquire);
-fence(memory_order_acquire);
-frontBlock_ = frontBlock.load();
+                frontBlock_ = frontBlock.load();
-blockTail = frontBlock_->localTail = frontBlock_->tail.load();
+                blockTail = frontBlock_->localTail = frontBlock_->tail.load();
-blockFront = frontBlock_->front.load();
+                blockFront = frontBlock_->front.load();
-fence(memory_order_acquire);
+                fence(memory_order_acquire);
-if (blockFront != blockTail) {
+                if (blockFront != blockTail) {
 // Oh look, the front block isn't empty after all
-goto non_empty_front_block;
+                    goto non_empty_front_block;
-}
+                }
 // Front block is empty but there's another block ahead, advance to it
-Block* nextBlock = frontBlock_->next;
+                Block *nextBlock = frontBlock_->next;
 // Don't need an acquire fence here since next can only ever be set on the tailBlock,
 // and we're not the tailBlock, and we did an acquire earlier after reading tailBlock which
 // ensures next is up-to-date on this CPU in case we recently were at tailBlock.
-size_t nextBlockFront = nextBlock->front.load();
+                size_t nextBlockFront = nextBlock->front.load();
-size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
+                size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
-fence(memory_order_acquire);
+                fence(memory_order_acquire);
 // Since the tailBlock is only ever advanced after being written to,
 // we know there's for sure an element to dequeue on it
-assert(nextBlockFront != nextBlockTail);
+                assert(nextBlockFront != nextBlockTail);
-AE_UNUSED(nextBlockTail);
+                AE_UNUSED(nextBlockTail);
 // We're done with this block, let the producer use it if it needs
-fence(memory_order_release);		// Expose possibly pending changes to frontBlock->front from last dequeue
+                fence(memory_order_release);        // Expose possibly pending changes to frontBlock->front from last dequeue
-frontBlock = frontBlock_ = nextBlock;
+                frontBlock = frontBlock_ = nextBlock;
-compiler_fence(memory_order_release);	// Not strictly needed
+                compiler_fence(memory_order_release);    // Not strictly needed
-auto element = reinterpret_cast<T*>(frontBlock_->data + nextBlockFront * sizeof(T));
+                auto element = reinterpret_cast<T *>(frontBlock_->data + nextBlockFront * sizeof(T));
-result = std::move(*element);
+                result = std::move(*element);
-element->~T();
+                element->~T();
-nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;
+                nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;
-fence(memory_order_release);
+                fence(memory_order_release);
-frontBlock_->front = nextBlockFront;
+                frontBlock_->front = nextBlockFront;
-}
+            } else {
-else {
 // No elements in current block and no other block to advance to
-return false;
+                return false;
-}
+            }
-return true;
+            return true;
-}
+        }
 // Returns a pointer to the front element in the queue (the one that
@@ -381,129 +375,126 @@ return true;
 // queue appears empty at the time the method is called, nullptr is
 // returned instead.
 // Must be called only from the consumer thread.
-T* peek() const AE_NO_TSAN
+        T *peek() const AE_NO_TSAN
-{
+        {
 #ifndef NDEBUG
-ReentrantGuard guard(this->dequeuing);
+            ReentrantGuard guard(this->dequeuing);
 #endif
 // See try_dequeue() for reasoning
-Block* frontBlock_ = frontBlock.load();
+            Block *frontBlock_ = frontBlock.load();
-size_t blockTail = frontBlock_->localTail;
+            size_t blockTail = frontBlock_->localTail;
-size_t blockFront = frontBlock_->front.load();
+            size_t blockFront = frontBlock_->front.load();
-if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
+            if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
-fence(memory_order_acquire);
+                fence(memory_order_acquire);
-non_empty_front_block:
+                non_empty_front_block:
-return reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
+                return reinterpret_cast<T *>(frontBlock_->data + blockFront * sizeof(T));
-}
+            } else if (frontBlock_ != tailBlock.load()) {
-else if (frontBlock_ != tailBlock.load()) {
+                fence(memory_order_acquire);
-fence(memory_order_acquire);
+                frontBlock_ = frontBlock.load();
-frontBlock_ = frontBlock.load();
+                blockTail = frontBlock_->localTail = frontBlock_->tail.load();
-blockTail = frontBlock_->localTail = frontBlock_->tail.load();
+                blockFront = frontBlock_->front.load();
-blockFront = frontBlock_->front.load();
+                fence(memory_order_acquire);
-fence(memory_order_acquire);
-if (blockFront != blockTail) {
+                if (blockFront != blockTail) {
-goto non_empty_front_block;
+                    goto non_empty_front_block;
-}
+                }
-Block* nextBlock = frontBlock_->next;
+                Block *nextBlock = frontBlock_->next;
-size_t nextBlockFront = nextBlock->front.load();
+                size_t nextBlockFront = nextBlock->front.load();
-fence(memory_order_acquire);
+                fence(memory_order_acquire);
-assert(nextBlockFront != nextBlock->tail.load());
+                assert(nextBlockFront != nextBlock->tail.load());
-return reinterpret_cast<T*>(nextBlock->data + nextBlockFront * sizeof(T));
+                return reinterpret_cast<T *>(nextBlock->data + nextBlockFront * sizeof(T));
-}
+            }
-return nullptr;
+            return nullptr;
-}
+        }
 // Removes the front element from the queue, if any, without returning it.
 // Returns true on success, or false if the queue appeared empty at the time
 // `pop` was called.
-bool pop() AE_NO_TSAN
+        bool pop() AE_NO_TSAN
-{
+        {
 #ifndef NDEBUG
-ReentrantGuard guard(this->dequeuing);
+            ReentrantGuard guard(this->dequeuing);
 #endif
 // See try_dequeue() for reasoning
-Block* frontBlock_ = frontBlock.load();
+            Block *frontBlock_ = frontBlock.load();
-size_t blockTail = frontBlock_->localTail;
+            size_t blockTail = frontBlock_->localTail;
-size_t blockFront = frontBlock_->front.load();
+            size_t blockFront = frontBlock_->front.load();
-if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
+            if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
-fence(memory_order_acquire);
+                fence(memory_order_acquire);
-non_empty_front_block:
+                non_empty_front_block:
-auto element = reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
+                auto element = reinterpret_cast<T *>(frontBlock_->data + blockFront * sizeof(T));
-element->~T();
+                element->~T();
-blockFront = (blockFront + 1) & frontBlock_->sizeMask;
+                blockFront = (blockFront + 1) & frontBlock_->sizeMask;
-fence(memory_order_release);
+                fence(memory_order_release);
-frontBlock_->front = blockFront;
+                frontBlock_->front = blockFront;
-}
+            } else if (frontBlock_ != tailBlock.load()) {
-else if (frontBlock_ != tailBlock.load()) {
+                fence(memory_order_acquire);
-fence(memory_order_acquire);
+                frontBlock_ = frontBlock.load();
-frontBlock_ = frontBlock.load();
+                blockTail = frontBlock_->localTail = frontBlock_->tail.load();
-blockTail = frontBlock_->localTail = frontBlock_->tail.load();
+                blockFront = frontBlock_->front.load();
-blockFront = frontBlock_->front.load();
+                fence(memory_order_acquire);
-fence(memory_order_acquire);
-if (blockFront != blockTail) {
+                if (blockFront != blockTail) {
-goto non_empty_front_block;
+                    goto non_empty_front_block;
-}
+                }
 // Front block is empty but there's another block ahead, advance to it
-Block* nextBlock = frontBlock_->next;
+                Block *nextBlock = frontBlock_->next;
-size_t nextBlockFront = nextBlock->front.load();
+                size_t nextBlockFront = nextBlock->front.load();
-size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
+                size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
-fence(memory_order_acquire);
+                fence(memory_order_acquire);
-assert(nextBlockFront != nextBlockTail);
+                assert(nextBlockFront != nextBlockTail);
-AE_UNUSED(nextBlockTail);
+                AE_UNUSED(nextBlockTail);
-fence(memory_order_release);
+                fence(memory_order_release);
-frontBlock = frontBlock_ = nextBlock;
+                frontBlock = frontBlock_ = nextBlock;
-compiler_fence(memory_order_release);
+                compiler_fence(memory_order_release);
-auto element = reinterpret_cast<T*>(frontBlock_->data + nextBlockFront * sizeof(T));
+                auto element = reinterpret_cast<T *>(frontBlock_->data + nextBlockFront * sizeof(T));
-element->~T();
+                element->~T();
-nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;
+                nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;
-fence(memory_order_release);
+                fence(memory_order_release);
-frontBlock_->front = nextBlockFront;
+                frontBlock_->front = nextBlockFront;
-}
+            } else {
-else {
 // No elements in current block and no other block to advance to
-return false;
+                return false;
-}
+            }
-return true;
+            return true;
-}
+        }
 // Returns the approximate number of items currently in the queue.
 // Safe to call from both the producer and consumer threads.
-inline size_t size_approx() const AE_NO_TSAN
+        inline size_t size_approx() const AE_NO_TSAN
-{
+        {
-size_t result = 0;
+            size_t result = 0;
-Block* frontBlock_ = frontBlock.load();
+            Block *frontBlock_ = frontBlock.load();
-Block* block = frontBlock_;
+            Block *block = frontBlock_;
-do {
+            do {
-fence(memory_order_acquire);
+                fence(memory_order_acquire);
-size_t blockFront = block->front.load();
+                size_t blockFront = block->front.load();
-size_t blockTail = block->tail.load();
+                size_t blockTail = block->tail.load();
-result += (blockTail - blockFront) & block->sizeMask;
+                result += (blockTail - blockFront) & block->sizeMask;
-block = block->next.load();
+                block = block->next.load();
-} while (block != frontBlock_);
+            } while (block != frontBlock_);
-return result;
+            return result;
-}
+        }
 // Returns the total number of items that could be enqueued without incurring
 // an allocation when this queue is empty.
@@ -514,32 +505,35 @@ return result;
 //       the block the consumer is removing from until it's completely empty, except in
 //       the case where the producer was writing to the same block the consumer was
 //       reading from the whole time.
-inline size_t max_capacity() const {
+        inline size_t max_capacity() const {
            size_t result = 0;
-    Block* frontBlock_ = frontBlock.load();
+            Block *frontBlock_ = frontBlock.load();
-    Block* block = frontBlock_;
+            Block *block = frontBlock_;
            do {
                fence(memory_order_acquire);
                result += block->sizeMask;
                block = block->next.load();
            } while (block != frontBlock_);
            return result;
-}
+        }
-private:
+    private:
-enum AllocationMode { CanAlloc, CannotAlloc };
+        enum AllocationMode {
+            CanAlloc, CannotAlloc
+        };
 #if MOODYCAMEL_HAS_EMPLACE
-template<AllocationMode canAlloc, typename... Args>
-bool inner_enqueue(Args&&... args) AE_NO_TSAN
+        template<AllocationMode canAlloc, typename... Args>
+        bool inner_enqueue(Args &&... args) AE_NO_TSAN
 #else
-template<AllocationMode canAlloc, typename U>
+        template<AllocationMode canAlloc, typename U>
            bool inner_enqueue(U&& element) AE_NO_TSAN
 #endif
-{
+        {
 #ifndef NDEBUG
-ReentrantGuard guard(this->enqueuing);
+            ReentrantGuard guard(this->enqueuing);
 #endif
 // High-level pseudocode (assuming we're allowed to alloc a new block):
@@ -549,77 +543,75 @@ ReentrantGuard guard(this->enqueuing);
 //     Else create a new block and enqueue there
 //     Advance tail to the block we just enqueued to
-Block* tailBlock_ = tailBlock.load();
+            Block *tailBlock_ = tailBlock.load();
-size_t blockFront = tailBlock_->localFront;
+            size_t blockFront = tailBlock_->localFront;
-size_t blockTail = tailBlock_->tail.load();
+            size_t blockTail = tailBlock_->tail.load();
-size_t nextBlockTail = (blockTail + 1) & tailBlock_->sizeMask;
+            size_t nextBlockTail = (blockTail + 1) & tailBlock_->sizeMask;
-if (nextBlockTail != blockFront || nextBlockTail != (tailBlock_->localFront = tailBlock_->front.load())) {
+            if (nextBlockTail != blockFront || nextBlockTail != (tailBlock_->localFront = tailBlock_->front.load())) {
-fence(memory_order_acquire);
+                fence(memory_order_acquire);
 // This block has room for at least one more element
-char* location = tailBlock_->data + blockTail * sizeof(T);
+                char *location = tailBlock_->data + blockTail * sizeof(T);
 #if MOODYCAMEL_HAS_EMPLACE
-new (location) T(std::forward<Args>(args)...);
+                new(location) T(std::forward<Args>(args)...);
 #else
-new (location) T(std::forward<U>(element));
+                new (location) T(std::forward<U>(element));
 #endif
-fence(memory_order_release);
+                fence(memory_order_release);
-tailBlock_->tail = nextBlockTail;
+                tailBlock_->tail = nextBlockTail;
-}
+            } else {
-else {
+                fence(memory_order_acquire);
-fence(memory_order_acquire);
+                if (tailBlock_->next.load() != frontBlock) {
-if (tailBlock_->next.load() != frontBlock) {
 // Note that the reason we can't advance to the frontBlock and start adding new entries there
 // is because if we did, then dequeue would stay in that block, eventually reading the new values,
 // instead of advancing to the next full block (whose values were enqueued first and so should be
 // consumed first).
-fence(memory_order_acquire);		// Ensure we get latest writes if we got the latest frontBlock
+                    fence(memory_order_acquire);        // Ensure we get latest writes if we got the latest frontBlock
 // tailBlock is full, but there's a free block ahead, use it
-Block* tailBlockNext = tailBlock_->next.load();
+                    Block *tailBlockNext = tailBlock_->next.load();
-size_t nextBlockFront = tailBlockNext->localFront = tailBlockNext->front.load();
+                    size_t nextBlockFront = tailBlockNext->localFront = tailBlockNext->front.load();
-nextBlockTail = tailBlockNext->tail.load();
+                    nextBlockTail = tailBlockNext->tail.load();
-fence(memory_order_acquire);
+                    fence(memory_order_acquire);
 // This block must be empty since it's not the head block and we
 // go through the blocks in a circle
-assert(nextBlockFront == nextBlockTail);
+                    assert(nextBlockFront == nextBlockTail);
-tailBlockNext->localFront = nextBlockFront;
+                    tailBlockNext->localFront = nextBlockFront;
-char* location = tailBlockNext->data + nextBlockTail * sizeof(T);
+                    char *location = tailBlockNext->data + nextBlockTail * sizeof(T);
 #if MOODYCAMEL_HAS_EMPLACE
-new (location) T(std::forward<Args>(args)...);
+                    new(location) T(std::forward<Args>(args)...);
 #else
-new (location) T(std::forward<U>(element));
+                    new (location) T(std::forward<U>(element));
 #endif
-tailBlockNext->tail = (nextBlockTail + 1) & tailBlockNext->sizeMask;
+                    tailBlockNext->tail = (nextBlockTail + 1) & tailBlockNext->sizeMask;
-fence(memory_order_release);
+                    fence(memory_order_release);
-tailBlock = tailBlockNext;
+                    tailBlock = tailBlockNext;
-}
+                } else if (canAlloc == CanAlloc) {
-else if (canAlloc == CanAlloc) {
 // tailBlock is full and there's no free block ahead; create a new block
-auto newBlockSize = largestBlockSize >= MAX_BLOCK_SIZE ? largestBlockSize : largestBlockSize * 2;
+                    auto newBlockSize = largestBlockSize >= MAX_BLOCK_SIZE ? largestBlockSize : largestBlockSize * 2;
-auto newBlock = make_block(newBlockSize);
+                    auto newBlock = make_block(newBlockSize);
-if (newBlock == nullptr) {
+                    if (newBlock == nullptr) {
 // Could not allocate a block!
-return false;
+                        return false;
-}
+                    }
-largestBlockSize = newBlockSize;
+                    largestBlockSize = newBlockSize;
 #if MOODYCAMEL_HAS_EMPLACE
-new (newBlock->data) T(std::forward<Args>(args)...);
+                    new(newBlock->data) T(std::forward<Args>(args)...);
 #else
-new (newBlock->data) T(std::forward<U>(element));
+                    new (newBlock->data) T(std::forward<U>(element));
 #endif
-assert(newBlock->front == 0);
+                    assert(newBlock->front == 0);
-newBlock->tail = newBlock->localTail = 1;
+                    newBlock->tail = newBlock->localTail = 1;
-newBlock->next = tailBlock_->next.load();
+                    newBlock->next = tailBlock_->next.load();
-tailBlock_->next = newBlock;
+                    tailBlock_->next = newBlock;
 // Might be possible for the dequeue thread to see the new tailBlock->next
 // *without* seeing the new tailBlock value, but this is OK since it can't
@@ -627,73 +619,70 @@ tailBlock_->next = newBlock;
 // case where it could try to read the next is if it's already at the tailBlock,
 // and it won't advance past tailBlock in any circumstance).
-fence(memory_order_release);
+                    fence(memory_order_release);
-tailBlock = newBlock;
+                    tailBlock = newBlock;
-}
+                } else if (canAlloc == CannotAlloc) {
-else if (canAlloc == CannotAlloc) {
 // Would have had to allocate a new block to enqueue, but not allowed
-return false;
+                    return false;
-}
+                } else {
-else {
+                    assert(false && "Should be unreachable code");
-assert(false && "Should be unreachable code");
+                    return false;
-return false;
+                }
-}
+            }
-}
-return true;
+            return true;
-}
+        }
 // Disable copying
-ReaderWriterQueue(ReaderWriterQueue const&) {  }
+        ReaderWriterQueue(ReaderWriterQueue const &) {}
 // Disable assignment
-ReaderWriterQueue& operator=(ReaderWriterQueue const&) {  }
+        ReaderWriterQueue &operator=(ReaderWriterQueue const &) {}
-AE_FORCEINLINE static size_t ceilToPow2(size_t x)
+        AE_FORCEINLINE static size_t ceilToPow2(size_t x) {
-{
 // From http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
--x;
+            --x;
-x |= x >> 1;
+            x |= x >> 1;
-x |= x >> 2;
+            x |= x >> 2;
-x |= x >> 4;
+            x |= x >> 4;
-for (size_t i = 1; i < sizeof(size_t); i <<= 1) {
+            for (size_t i = 1; i < sizeof(size_t); i <<= 1) {
-x |= x >> (i << 3);
+                x |= x >> (i << 3);
-}
+            }
-++x;
+            ++x;
-return x;
+            return x;
-}
+        }
-template<typename U>
+        template<typename U>
-static AE_FORCEINLINE char* align_for(char* ptr) AE_NO_TSAN
+        static AE_FORCEINLINE char *align_for(char *ptr) AE_NO_TSAN {
-{
+            const std::size_t alignment = std::alignment_of<U>::value;
-const std::size_t alignment = std::alignment_of<U>::value;
+            return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
-return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
+        }
-}
-private:
+    private:
 #ifndef NDEBUG
-struct ReentrantGuard
-{
+        struct ReentrantGuard {
-    AE_NO_TSAN ReentrantGuard(weak_atomic<bool>& _inSection)
+            AE_NO_TSAN ReentrantGuard(weak_atomic<bool> &_inSection)
-            : inSection(_inSection)
+                    : inSection(_inSection) {
-    {
+                assert(!inSection &&
-        assert(!inSection && "Concurrent (or re-entrant) enqueue or dequeue operation detected (only one thread at a time may hold the producer or consumer role)");
+                       "Concurrent (or re-entrant) enqueue or dequeue operation detected (only one thread at a time may hold the producer or consumer role)");
                inSection = true;
            }
            AE_NO_TSAN ~ReentrantGuard() { inSection = false; }
-private:
+        private:
-    ReentrantGuard& operator=(ReentrantGuard const&);
+            ReentrantGuard &operator=(ReentrantGuard const &);
+        private:
+            weak_atomic<bool> &inSection;
+        };
-private:
-    weak_atomic<bool>& inSection;
-};
 #endif
-struct Block
+        struct Block {
-{
            // Avoid false-sharing by putting highly contended variables on their own cache lines
            weak_atomic<size_t> front;    // (Atomic) Elements are read from here
            size_t localTail;            // An uncontended shadow copy of tail, owned by the consumer
@@ -702,75 +691,73 @@ struct Block
            weak_atomic<size_t> tail;    // (Atomic) Elements are enqueued here
            size_t localFront;
-    char cachelineFiller1[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) - sizeof(size_t)];	// next isn't very contended, but we don't want it on the same cache line as tail (which is)
+            char cachelineFiller1[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) -
-    weak_atomic<Block*> next;	// (Atomic)
+                                  sizeof(size_t)];    // next isn't very contended, but we don't want it on the same cache line as tail (which is)
+            weak_atomic<Block *> next;    // (Atomic)
-    char* data;		// Contents (on heap) are aligned to T's alignment
+            char *data;        // Contents (on heap) are aligned to T's alignment
            const size_t sizeMask;
            // size must be a power of two (and greater than 0)
-    AE_NO_TSAN Block(size_t const& _size, char* _rawThis, char* _data)
+            AE_NO_TSAN Block(size_t const &_size, char *_rawThis, char *_data)
-            : front(0UL), localTail(0), tail(0UL), localFront(0), next(nullptr), data(_data), sizeMask(_size - 1), rawThis(_rawThis)
+                    : front(0UL), localTail(0), tail(0UL), localFront(0), next(nullptr), data(_data),
-    {
+                      sizeMask(_size - 1), rawThis(_rawThis) {
            }
-private:
+        private:
            // C4512 - Assignment operator could not be generated
-    Block& operator=(Block const&);
+            Block &operator=(Block const &);
-public:
+        public:
-    char* rawThis;
+            char *rawThis;
-};
+        };
-static Block* make_block(size_t capacity) AE_NO_TSAN
+        static Block *make_block(size_t capacity) AE_NO_TSAN
-{
+        {
 // Allocate enough memory for the block itself, as well as all the elements it will contain
-auto size = sizeof(Block) + std::alignment_of<Block>::value - 1;
+            auto size = sizeof(Block) + std::alignment_of<Block>::value - 1;
-size += sizeof(T) * capacity + std::alignment_of<T>::value - 1;
+            size += sizeof(T) * capacity + std::alignment_of<T>::value - 1;
-auto newBlockRaw = static_cast<char*>(std::malloc(size));
+            auto newBlockRaw = static_cast<char *>(std::malloc(size));
-if (newBlockRaw == nullptr) {
+            if (newBlockRaw == nullptr) {
-return nullptr;
+                return nullptr;
-}
+            }
-auto newBlockAligned = align_for<Block>(newBlockRaw);
+            auto newBlockAligned = align_for<Block>(newBlockRaw);
-auto newBlockData = align_for<T>(newBlockAligned + sizeof(Block));
+            auto newBlockData = align_for<T>(newBlockAligned + sizeof(Block));
-return new (newBlockAligned) Block(capacity, newBlockRaw, newBlockData);
+            return new(newBlockAligned) Block(capacity, newBlockRaw, newBlockData);
-}
+        }
-private:
+    private:
-weak_atomic<Block*> frontBlock;		// (Atomic) Elements are dequeued from this block
+        weak_atomic<Block *> frontBlock;        // (Atomic) Elements are dequeued from this block
-char cachelineFiller[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<Block*>)];
+        char cachelineFiller[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<Block *>)];
-weak_atomic<Block*> tailBlock;		// (Atomic) Elements are enqueued to this block
+        weak_atomic<Block *> tailBlock;        // (Atomic) Elements are enqueued to this block
-size_t largestBlockSize;
+        size_t largestBlockSize;
 #ifndef NDEBUG
-weak_atomic<bool> enqueuing;
+        weak_atomic<bool> enqueuing;
-mutable weak_atomic<bool> dequeuing;
+        mutable weak_atomic<bool> dequeuing;
 #endif
-};
+    };
 // Like ReaderWriterQueue, but also providees blocking operations
-template<typename T, size_t MAX_BLOCK_SIZE = 512>
+    template<typename T, size_t MAX_BLOCK_SIZE = 512>
-class BlockingReaderWriterQueue
+    class BlockingReaderWriterQueue {
-{
+    private:
-private:
        typedef ::moodycamel::ReaderWriterQueue<T, MAX_BLOCK_SIZE> ReaderWriterQueue;
-public:
+    public:
        explicit BlockingReaderWriterQueue(size_t size = 15) AE_NO_TSAN
-            : inner(size), sema(new spsc_sema::LightweightSemaphore())
+                : inner(size), sema(new spsc_sema::LightweightSemaphore()) {}
-    { }
-    BlockingReaderWriterQueue(BlockingReaderWriterQueue&& other) AE_NO_TSAN
+        BlockingReaderWriterQueue(BlockingReaderWriterQueue &&other) AE_NO_TSAN
-            : inner(std::move(other.inner)), sema(std::move(other.sema))
+                : inner(std::move(other.inner)), sema(std::move(other.sema)) {}
-    { }
-    BlockingReaderWriterQueue& operator=(BlockingReaderWriterQueue&& other) AE_NO_TSAN
+        BlockingReaderWriterQueue &operator=(BlockingReaderWriterQueue &&other) AE_NO_TSAN
        {
            std::swap(sema, other.sema);
            std::swap(inner, other.inner);
@@ -781,7 +768,7 @@ public:
        // Enqueues a copy of element if there is room in the queue.
        // Returns true if the element was enqueued, false otherwise.
        // Does not allocate memory.
-    AE_FORCEINLINE bool try_enqueue(T const& element) AE_NO_TSAN
+        AE_FORCEINLINE bool try_enqueue(T const &element) AE_NO_TSAN
        {
            if (inner.try_enqueue(element)) {
                sema->signal();
@@ -793,7 +780,7 @@ public:
        // Enqueues a moved copy of element if there is room in the queue.
        // Returns true if the element was enqueued, false otherwise.
        // Does not allocate memory.
-    AE_FORCEINLINE bool try_enqueue(T&& element) AE_NO_TSAN
+        AE_FORCEINLINE bool try_enqueue(T &&element) AE_NO_TSAN
        {
            if (inner.try_enqueue(std::forward<T>(element))) {
                sema->signal();
@@ -803,23 +790,24 @@ public:
        }
 #if MOODYCAMEL_HAS_EMPLACE
        // Like try_enqueue() but with emplace semantics (i.e. construct-in-place).
        template<typename... Args>
-    AE_FORCEINLINE bool try_emplace(Args&&... args) AE_NO_TSAN
+        AE_FORCEINLINE bool try_emplace(Args &&... args) AE_NO_TSAN {
-    {
            if (inner.try_emplace(std::forward<Args>(args)...)) {
                sema->signal();
                return true;
            }
            return false;
        }
 #endif
        // Enqueues a copy of element on the queue.
        // Allocates an additional block of memory if needed.
        // Only fails (returns false) if memory allocation fails.
-    AE_FORCEINLINE bool enqueue(T const& element) AE_NO_TSAN
+        AE_FORCEINLINE bool enqueue(T const &element) AE_NO_TSAN
        {
            if (inner.enqueue(element)) {
                sema->signal();
@@ -831,7 +819,7 @@ public:
        // Enqueues a moved copy of element on the queue.
        // Allocates an additional block of memory if needed.
        // Only fails (returns false) if memory allocation fails.
-    AE_FORCEINLINE bool enqueue(T&& element) AE_NO_TSAN
+        AE_FORCEINLINE bool enqueue(T &&element) AE_NO_TSAN
        {
            if (inner.enqueue(std::forward<T>(element))) {
                sema->signal();
@@ -841,16 +829,17 @@ public:
        }
 #if MOODYCAMEL_HAS_EMPLACE
        // Like enqueue() but with emplace semantics (i.e. construct-in-place).
        template<typename... Args>
-    AE_FORCEINLINE bool emplace(Args&&... args) AE_NO_TSAN
+        AE_FORCEINLINE bool emplace(Args &&... args) AE_NO_TSAN {
-    {
            if (inner.emplace(std::forward<Args>(args)...)) {
                sema->signal();
                return true;
            }
            return false;
        }
 #endif
@@ -858,8 +847,7 @@ public:
        // returns false instead. If the queue has at least one element,
        // moves front to result using operator=, then returns true.
        template<typename U>
-    bool try_dequeue(U& result) AE_NO_TSAN
+        bool try_dequeue(U &result) AE_NO_TSAN {
-    {
            if (sema->tryWait()) {
                bool success = inner.try_dequeue(result);
                assert(success);
@@ -873,8 +861,7 @@ public:
        // Attempts to dequeue an element; if the queue is empty,
        // waits until an element is available, then dequeues it.
        template<typename U>
-    void wait_dequeue(U& result) AE_NO_TSAN
+        void wait_dequeue(U &result) AE_NO_TSAN {
-    {
            while (!sema->wait());
            bool success = inner.try_dequeue(result);
            AE_UNUSED(result);
@@ -890,8 +877,7 @@ public:
        // Using a negative timeout indicates an indefinite timeout,
        // and is thus functionally equivalent to calling wait_dequeue.
        template<typename U>
-    bool wait_dequeue_timed(U& result, std::int64_t timeout_usecs) AE_NO_TSAN
+        bool wait_dequeue_timed(U &result, std::int64_t timeout_usecs) AE_NO_TSAN {
-    {
            if (!sema->wait(timeout_usecs)) {
                return false;
            }
@@ -923,7 +909,7 @@ public:
        // queue appears empty at the time the method is called, nullptr is
        // returned instead.
        // Must be called only from the consumer thread.
-    AE_FORCEINLINE T* peek() const AE_NO_TSAN
+        AE_FORCEINLINE T *peek() const AE_NO_TSAN
        {
            return inner.peek();
        }
@@ -962,15 +948,16 @@ public:
            return inner.max_capacity();
        }
-private:
+    private:
        // Disable copying & assignment
-    BlockingReaderWriterQueue(BlockingReaderWriterQueue const&) {  }
+        BlockingReaderWriterQueue(BlockingReaderWriterQueue const &) {}
-    BlockingReaderWriterQueue& operator=(BlockingReaderWriterQueue const&) {  }
+        BlockingReaderWriterQueue &operator=(BlockingReaderWriterQueue const &) {}
-private:
+    private:
        ReaderWriterQueue inner;
-    std::unique_ptr<spsc_sema::LightweightSemaphore> sema;
+        std::unique_ptr <spsc_sema::LightweightSemaphore> sema;
-};
+    };
 }    // end namespace moodycamel

--- a/zmq_src/ZMQServer.cpp
+++ b/zmq_src/ZMQServer.cpp
@@ -42,9 +42,11 @@ using namespace std;
 shared_ptr <ZMQServer> ZMQServer::zmqServer = nullptr;
 ZMQServer::ZMQServer(bool _checkSignature, bool _checkKeyOwnership, const string &_caCertFile)
-        : outgoingQueue(NUM_ZMQ_WORKER_THREADS), checkSignature(_checkSignature), checkKeyOwnership(_checkKeyOwnership),
+        : incomingQueue(NUM_ZMQ_WORKER_THREADS), checkSignature(_checkSignature), checkKeyOwnership(_checkKeyOwnership),
          caCertFile(_caCertFile), ctx(make_shared<zmq::context_t>(1)) {
+    CHECK_STATE(NUM_ZMQ_WORKER_THREADS > 1);
    socket = make_shared<zmq::socket_t>(*ctx, ZMQ_ROUTER);
    if (_checkSignature) {
@@ -277,12 +279,20 @@ void ZMQServer::doOneServerLoop() {
        CHECK_STATE2(msg, ZMQ_COULD_NOT_PARSE);
+        uint64_t index = 0;
        if ((dynamic_pointer_cast<BLSSignReqMessage>(msg)!= nullptr) ||
             dynamic_pointer_cast<ECDSASignReqMessage>(msg)) {
+            index = NUM_ZMQ_WORKER_THREADS - 1;
        } else {
+            index = 0;
        }
+        auto element = pair<shared_ptr<ZMQMessage>, shared_ptr<zmq::message_t>>(msg, identity);
+        incomingQueue.at(index).enqueue(element);
        result = msg->process();
    } catch (ExitRequestedException) {
        throw;

--- a/zmq_src/ZMQServer.h
+++ b/zmq_src/ZMQServer.h
@@ -34,6 +34,7 @@
 #include "Agent.h"
 #include "WorkerThreadPool.h"
+#include "ZMQMessage.h"
 using namespace moodycamel;
@@ -41,6 +42,7 @@ typedef enum {GOT_INCOMING_MSG = 0, GOT_OUTFOING_MSG = 1} PollResult;
 static const uint64_t NUM_ZMQ_WORKER_THREADS = 2;
 class ZMQServer : public Agent{
    uint64_t workerThreads;
@@ -48,9 +50,9 @@ class ZMQServer : public Agent{
    string caCertFile;
    string caCert;
-    ReaderWriterQueue<pair<string, shared_ptr<zmq_msg_t>>> outgoingQueue;
+    ReaderWriterQueue<pair<string, shared_ptr<zmq::message_t>>> outgoingQueue;
-    vector<ReaderWriterQueue<pair<string, shared_ptr<zmq_msg_t>>>> incomingQueue;
+    vector<ReaderWriterQueue<pair<shared_ptr<ZMQMessage>, shared_ptr<zmq::message_t>>>> incomingQueue;
    bool checkKeyOwnership = true;