SKALE-4586 Added Thread Pool

f457b201 · kladko · ecde0240 · f457b201 · f457b201 · f457b201
Unverified Commit f457b201 authored Sep 09, 2021 by kladko
Showing with 1018 additions and 996 deletions

atomicops.h third_party/atomicops.h +385 -362

readerwriterqueue.h third_party/readerwriterqueue.h +617 -630

ZMQServer.cpp zmq_src/ZMQServer.cpp +12 -2

ZMQServer.h zmq_src/ZMQServer.h +4 -2

No files found.
--- a/third_party/atomicops.h
+++ b/third_party/atomicops.h
@@ -206,24 +206,48 @@ namespace moodycamel {
    AE_FORCEINLINE void compiler_fence(memory_order order) AE_NO_TSAN
    {
        switch (order) {
-            case memory_order_relaxed: break;
-            case memory_order_acquire: std::atomic_signal_fence(std::memory_order_acquire); break;
-            case memory_order_release: std::atomic_signal_fence(std::memory_order_release); break;
-            case memory_order_acq_rel: std::atomic_signal_fence(std::memory_order_acq_rel); break;
-            case memory_order_seq_cst: std::atomic_signal_fence(std::memory_order_seq_cst); break;
-            default: assert(false);
+            case memory_order_relaxed:
+                break;
+            case memory_order_acquire:
+                std::atomic_signal_fence(std::memory_order_acquire);
+                break;
+            case memory_order_release:
+                std::atomic_signal_fence(std::memory_order_release);
+                break;
+            case memory_order_acq_rel:
+                std::atomic_signal_fence(std::memory_order_acq_rel);
+                break;
+            case memory_order_seq_cst:
+                std::atomic_signal_fence(std::memory_order_seq_cst);
+                break;
+            default:
+                assert(false);
        }
    }

    AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN
    {
        switch (order) {
-            case memory_order_relaxed: break;
-            case memory_order_acquire: AE_TSAN_ANNOTATE_ACQUIRE(); std::atomic_thread_fence(std::memory_order_acquire); break;
-            case memory_order_release: AE_TSAN_ANNOTATE_RELEASE(); std::atomic_thread_fence(std::memory_order_release); break;
-            case memory_order_acq_rel: AE_TSAN_ANNOTATE_ACQUIRE(); AE_TSAN_ANNOTATE_RELEASE(); std::atomic_thread_fence(std::memory_order_acq_rel); break;
-            case memory_order_seq_cst: AE_TSAN_ANNOTATE_ACQUIRE(); AE_TSAN_ANNOTATE_RELEASE(); std::atomic_thread_fence(std::memory_order_seq_cst); break;
-            default: assert(false);
+            case memory_order_relaxed:
+                break;
+            case memory_order_acquire:
+                AE_TSAN_ANNOTATE_ACQUIRE();
+                std::atomic_thread_fence(std::memory_order_acquire);
+                break;
+            case memory_order_release:
+                AE_TSAN_ANNOTATE_RELEASE();
+                std::atomic_thread_fence(std::memory_order_release);
+                break;
+            case memory_order_acq_rel:
+                AE_TSAN_ANNOTATE_ACQUIRE(); AE_TSAN_ANNOTATE_RELEASE();
+                std::atomic_thread_fence(std::memory_order_acq_rel);
+                break;
+            case memory_order_seq_cst:
+                AE_TSAN_ANNOTATE_ACQUIRE(); AE_TSAN_ANNOTATE_RELEASE();
+                std::atomic_thread_fence(std::memory_order_seq_cst);
+                break;
+            default:
+                assert(false);
        }
    }

@@ -237,8 +261,11 @@ namespace moodycamel {
 #endif

 #ifdef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
+
 #include <atomic>
+
 #endif
+
 #include <utility>

 // WARNING: *NOT* A REPLACEMENT FOR std::atomic. READ CAREFULLY:
@@ -247,21 +274,26 @@ namespace moodycamel {
 // at the hardware level -- on most platforms this generally means aligned pointers and integers (only).
 namespace moodycamel {
    template<typename T>
-    class weak_atomic
-    {
+    class weak_atomic {
    public:
-        AE_NO_TSAN weak_atomic() : value() { }
+        AE_NO_TSAN weak_atomic() : value() {}
+
 #ifdef AE_VCPP
-        #pragma warning(push)
+#pragma warning(push)
 #pragma warning(disable: 4100)		// Get rid of (erroneous) 'unreferenced formal parameter' warning
 #endif
-        template<typename U> AE_NO_TSAN weak_atomic(U&& x) : value(std::forward<U>(x)) {  }
+
+        template<typename U>
+        AE_NO_TSAN weak_atomic(U &&x) : value(std::forward<U>(x)) {}
+
 #ifdef __cplusplus_cli
        // Work around bug with universal reference/nullptr combination that only appears when /clr is on
    AE_NO_TSAN weak_atomic(nullptr_t) : value(nullptr) {  }
 #endif
-        AE_NO_TSAN weak_atomic(weak_atomic const& other) : value(other.load()) {  }
-        AE_NO_TSAN weak_atomic(weak_atomic&& other) : value(std::move(other.load())) {  }
+        AE_NO_TSAN weak_atomic(weak_atomic const &other) : value(other.load()) {}
+
+        AE_NO_TSAN weak_atomic(weak_atomic &&other) : value(std::move(other.load())) {}
+
 #ifdef AE_VCPP
 #pragma warning(pop)
 #endif
@@ -303,14 +335,14 @@ namespace moodycamel {
        return value;
    }
 #else
+
        template<typename U>
-        AE_FORCEINLINE weak_atomic const& operator=(U&& x) AE_NO_TSAN
-        {
+        AE_FORCEINLINE weak_atomic const &operator=(U &&x) AE_NO_TSAN {
            value.store(std::forward<U>(x), std::memory_order_relaxed);
            return *this;
        }

-        AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) AE_NO_TSAN
+        AE_FORCEINLINE weak_atomic const &operator=(weak_atomic const &other) AE_NO_TSAN
        {
            value.store(other.value.load(std::memory_order_relaxed), std::memory_order_relaxed);
            return *this;
@@ -327,6 +359,7 @@ namespace moodycamel {
        {
            return value.fetch_add(increment, std::memory_order_release);
        }
+
 #endif


@@ -336,7 +369,7 @@ namespace moodycamel {
    // `volatile` will make memory access slow, but is guaranteed to be reliable.
    volatile T value;
 #else
-        std::atomic<T> value;
+        std::atomic <T> value;
 #endif
    };

@@ -369,8 +402,7 @@ extern "C" {
 #include <task.h>
 #endif

-namespace moodycamel
-{
+namespace moodycamel {
    // Code in the spsc_sema namespace below is an adaptation of Jeff Preshing's
    // portable + lightweight semaphore implementations, originally from
    // https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
@@ -392,8 +424,7 @@ namespace moodycamel
    // 2. Altered source versions must be plainly marked as such, and must not be
    //    misrepresented as being the original software.
    // 3. This notice may not be removed or altered from any source distribution.
-    namespace spsc_sema
-    {
+    namespace spsc_sema {
 #if defined(_WIN32)
        class Semaphore
        {
@@ -655,8 +686,7 @@ namespace moodycamel
        //---------------------------------------------------------
        // LightweightSemaphore
        //---------------------------------------------------------
-        class LightweightSemaphore
-        {
+        class LightweightSemaphore {
        public:
            typedef std::make_signed<std::size_t>::type ssize_t;

@@ -671,10 +701,8 @@ namespace moodycamel
                // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
                // as threads start hitting the kernel semaphore.
                int spin = 1024;
-                while (--spin >= 0)
-                {
-                    if (m_count.load() > 0)
-                    {
+                while (--spin >= 0) {
+                    if (m_count.load() > 0) {
                        m_count.fetch_add_acquire(-1);
                        return true;
                    }
@@ -683,8 +711,7 @@ namespace moodycamel
                oldCount = m_count.fetch_add_acquire(-1);
                if (oldCount > 0)
                    return true;
-                if (timeout_usecs < 0)
-                {
+                if (timeout_usecs < 0) {
                    if (m_sema.wait())
                        return true;
                }
@@ -695,8 +722,7 @@ namespace moodycamel
                // it. So we have to re-adjust the count, but only if the semaphore
                // wasn't signaled enough times for us too since then. If it was, we
                // need to release the semaphore too.
-                while (true)
-                {
+                while (true) {
                    oldCount = m_count.fetch_add_release(1);
                    if (oldCount < 0)
                        return false;    // successfully restored things to the way they were
@@ -708,15 +734,13 @@ namespace moodycamel
            }

        public:
-            AE_NO_TSAN LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount), m_sema()
-            {
+            AE_NO_TSAN LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount), m_sema() {
                assert(initialCount >= 0);
            }

            bool tryWait() AE_NO_TSAN
            {
-                if (m_count.load() > 0)
-                {
+                if (m_count.load() > 0) {
                    m_count.fetch_add_acquire(-1);
                    return true;
                }
@@ -738,8 +762,7 @@ namespace moodycamel
                assert(count >= 0);
                ssize_t oldCount = m_count.fetch_add_release(count);
                assert(oldCount >= -1);
-                if (oldCount < 0)
-                {
+                if (oldCount < 0) {
                    m_sema.signal(1);
                }
            }

--- a/third_party/readerwriterqueue.h
+++ b/third_party/readerwriterqueue.h
@@ -14,6 +14,7 @@
 #include <cstdint>
 #include <cstdlib>        // For malloc/free/abort & size_t
 #include <memory>
+
 #if __cplusplus > 199711L || _MSC_VER >= 1700 // C++11 or VS2012
 #include <chrono>
 #endif
@@ -73,8 +74,7 @@
 namespace moodycamel {

    template<typename T, size_t MAX_BLOCK_SIZE = 512>
-    class MOODYCAMEL_MAYBE_ALIGN_TO_CACHELINE ReaderWriterQueue
-{
+    class MOODYCAMEL_MAYBE_ALIGN_TO_CACHELINE ReaderWriterQueue {
        // Design: Based on a queue-of-queues. The low-level queues are just
        // circular buffers with front and tail indices indicating where the
        // next element to dequeue is and where the next element can be enqueued,
@@ -104,14 +104,13 @@ namespace moodycamel {
        // at least one extra buffer block).
        AE_NO_TSAN explicit ReaderWriterQueue(size_t size = 15)
 #ifndef NDEBUG
-    : enqueuing(false)
-    ,dequeuing(false)
+                : enqueuing(false), dequeuing(false)
 #endif
-{
+        {
            assert(MAX_BLOCK_SIZE == ceilToPow2(MAX_BLOCK_SIZE) && "MAX_BLOCK_SIZE must be a power of 2");
            assert(MAX_BLOCK_SIZE >= 2 && "MAX_BLOCK_SIZE must be at least 2");

-    Block* firstBlock = nullptr;
+            Block *firstBlock = nullptr;

            largestBlockSize = ceilToPow2(size + 1);        // We need a spare slot to fit size elements in the block
            if (largestBlockSize > MAX_BLOCK_SIZE * 2) {
@@ -122,7 +121,7 @@ namespace moodycamel {
                // number of blocks - 1. Solving for size and applying a ceiling to the division gives us (after simplifying):
                size_t initialBlockCount = (size + MAX_BLOCK_SIZE * 2 - 3) / (MAX_BLOCK_SIZE - 1);
                largestBlockSize = MAX_BLOCK_SIZE;
-    Block* lastBlock = nullptr;
+                Block *lastBlock = nullptr;
                for (size_t i = 0; i != initialBlockCount; ++i) {
                    auto block = make_block(largestBlockSize);
                    if (block == nullptr) {
@@ -131,93 +130,89 @@ namespace moodycamel {
 #else
                        abort();
 #endif
-}
-if (firstBlock == nullptr) {
-firstBlock = block;
-}
-else {
-lastBlock->next = block;
-}
-lastBlock = block;
-block->next = firstBlock;
-}
-}
-else {
-firstBlock = make_block(largestBlockSize);
-if (firstBlock == nullptr) {
+                    }
+                    if (firstBlock == nullptr) {
+                        firstBlock = block;
+                    } else {
+                        lastBlock->next = block;
+                    }
+                    lastBlock = block;
+                    block->next = firstBlock;
+                }
+            } else {
+                firstBlock = make_block(largestBlockSize);
+                if (firstBlock == nullptr) {
 #ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
-throw std::bad_alloc();
+                    throw std::bad_alloc();
 #else
-abort();
+                    abort();
 #endif
-}
-firstBlock->next = firstBlock;
-}
-frontBlock = firstBlock;
-tailBlock = firstBlock;
+                }
+                firstBlock->next = firstBlock;
+            }
+            frontBlock = firstBlock;
+            tailBlock = firstBlock;

 // Make sure the reader/writer threads will have the initialized memory setup above:
-fence(memory_order_sync);
-}
+            fence(memory_order_sync);
+        }

 // Note: The queue should not be accessed concurrently while it's
 // being moved. It's up to the user to synchronize this.
-AE_NO_TSAN ReaderWriterQueue(ReaderWriterQueue&& other)
-: frontBlock(other.frontBlock.load()),
-tailBlock(other.tailBlock.load()),
-largestBlockSize(other.largestBlockSize)
+        AE_NO_TSAN ReaderWriterQueue(ReaderWriterQueue &&other)
+                : frontBlock(other.frontBlock.load()),
+                  tailBlock(other.tailBlock.load()),
+                  largestBlockSize(other.largestBlockSize)
 #ifndef NDEBUG
-,enqueuing(false)
-,dequeuing(false)
+                , enqueuing(false), dequeuing(false)
 #endif
-{
-other.largestBlockSize = 32;
-Block* b = other.make_block(other.largestBlockSize);
-if (b == nullptr) {
+        {
+            other.largestBlockSize = 32;
+            Block *b = other.make_block(other.largestBlockSize);
+            if (b == nullptr) {
 #ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
-throw std::bad_alloc();
+                throw std::bad_alloc();
 #else
-abort();
+                abort();
 #endif
-}
-b->next = b;
-other.frontBlock = b;
-other.tailBlock = b;
-}
+            }
+            b->next = b;
+            other.frontBlock = b;
+            other.tailBlock = b;
+        }

 // Note: The queue should not be accessed concurrently while it's
 // being moved. It's up to the user to synchronize this.
-ReaderWriterQueue& operator=(ReaderWriterQueue&& other) AE_NO_TSAN
-{
-Block* b = frontBlock.load();
-frontBlock = other.frontBlock.load();
-other.frontBlock = b;
-b = tailBlock.load();
-tailBlock = other.tailBlock.load();
-other.tailBlock = b;
-std::swap(largestBlockSize, other.largestBlockSize);
-return *this;
-}
+        ReaderWriterQueue &operator=(ReaderWriterQueue &&other) AE_NO_TSAN
+        {
+            Block *b = frontBlock.load();
+            frontBlock = other.frontBlock.load();
+            other.frontBlock = b;
+            b = tailBlock.load();
+            tailBlock = other.tailBlock.load();
+            other.tailBlock = b;
+            std::swap(largestBlockSize, other.largestBlockSize);
+            return *this;
+        }

 // Note: The queue should not be accessed concurrently while it's
 // being deleted. It's up to the user to synchronize this.
-AE_NO_TSAN ~ReaderWriterQueue()
-{
+        AE_NO_TSAN ~ReaderWriterQueue() {
            // Make sure we get the latest version of all variables from other CPUs:
            fence(memory_order_sync);

            // Destroy any remaining objects in queue and free memory
-    Block* frontBlock_ = frontBlock;
-    Block* block = frontBlock_;
+            Block *frontBlock_ = frontBlock;
+            Block *block = frontBlock_;
            do {
-        Block* nextBlock = block->next;
+                Block *nextBlock = block->next;
                size_t blockFront = block->front;
                size_t blockTail = block->tail;

                for (size_t i = blockFront; i != blockTail; i = (i + 1) & block->sizeMask) {
-            auto element = reinterpret_cast<T*>(block->data + i * sizeof(T));
+                    auto element = reinterpret_cast<T *>(block->data + i * sizeof(T));
                    element->~T();
-            (void)element;
+                    (void) element;
                }

                auto rawBlock = block->rawThis;
@@ -225,67 +220,68 @@ AE_NO_TSAN ~ReaderWriterQueue()
                std::free(rawBlock);
                block = nextBlock;
            } while (block != frontBlock_);
-}
+        }


 // Enqueues a copy of element if there is room in the queue.
 // Returns true if the element was enqueued, false otherwise.
 // Does not allocate memory.
-AE_FORCEINLINE bool try_enqueue(T const& element) AE_NO_TSAN
-{
-return inner_enqueue<CannotAlloc>(element);
-}
+        AE_FORCEINLINE bool try_enqueue(T const &element) AE_NO_TSAN
+        {
+            return inner_enqueue<CannotAlloc>(element);
+        }

 // Enqueues a moved copy of element if there is room in the queue.
 // Returns true if the element was enqueued, false otherwise.
 // Does not allocate memory.
-AE_FORCEINLINE bool try_enqueue(T&& element) AE_NO_TSAN
-{
-return inner_enqueue<CannotAlloc>(std::forward<T>(element));
-}
+        AE_FORCEINLINE bool try_enqueue(T &&element) AE_NO_TSAN
+        {
+            return inner_enqueue<CannotAlloc>(std::forward<T>(element));
+        }

 #if MOODYCAMEL_HAS_EMPLACE
+
 // Like try_enqueue() but with emplace semantics (i.e. construct-in-place).
-template<typename... Args>
-AE_FORCEINLINE bool try_emplace(Args&&... args) AE_NO_TSAN
-{
-return inner_enqueue<CannotAlloc>(std::forward<Args>(args)...);
-}
+        template<typename... Args>
+        AE_FORCEINLINE bool try_emplace(Args &&... args) AE_NO_TSAN {
+            return inner_enqueue<CannotAlloc>(std::forward<Args>(args)...);
+        }
+
 #endif

 // Enqueues a copy of element on the queue.
 // Allocates an additional block of memory if needed.
 // Only fails (returns false) if memory allocation fails.
-AE_FORCEINLINE bool enqueue(T const& element) AE_NO_TSAN
-{
-return inner_enqueue<CanAlloc>(element);
-}
+        AE_FORCEINLINE bool enqueue(T const &element) AE_NO_TSAN
+        {
+            return inner_enqueue<CanAlloc>(element);
+        }

 // Enqueues a moved copy of element on the queue.
 // Allocates an additional block of memory if needed.
 // Only fails (returns false) if memory allocation fails.
-AE_FORCEINLINE bool enqueue(T&& element) AE_NO_TSAN
-{
-return inner_enqueue<CanAlloc>(std::forward<T>(element));
-}
+        AE_FORCEINLINE bool enqueue(T &&element) AE_NO_TSAN
+        {
+            return inner_enqueue<CanAlloc>(std::forward<T>(element));
+        }

 #if MOODYCAMEL_HAS_EMPLACE
+
 // Like enqueue() but with emplace semantics (i.e. construct-in-place).
-template<typename... Args>
-AE_FORCEINLINE bool emplace(Args&&... args) AE_NO_TSAN
-{
-return inner_enqueue<CanAlloc>(std::forward<Args>(args)...);
-}
+        template<typename... Args>
+        AE_FORCEINLINE bool emplace(Args &&... args) AE_NO_TSAN {
+            return inner_enqueue<CanAlloc>(std::forward<Args>(args)...);
+        }
+
 #endif

 // Attempts to dequeue an element; if the queue is empty,
 // returns false instead. If the queue has at least one element,
 // moves front to result using operator=, then returns true.
-template<typename U>
-bool try_dequeue(U& result) AE_NO_TSAN
-{
+        template<typename U>
+        bool try_dequeue(U &result) AE_NO_TSAN {
 #ifndef NDEBUG
-ReentrantGuard guard(this->dequeuing);
+            ReentrantGuard guard(this->dequeuing);
 #endif

 // High-level pseudocode:
@@ -305,75 +301,73 @@ ReentrantGuard guard(this->dequeuing);
 // then re-read the front block and check if it's not empty again, then check if the tail
 // block has advanced.

-Block* frontBlock_ = frontBlock.load();
-size_t blockTail = frontBlock_->localTail;
-size_t blockFront = frontBlock_->front.load();
+            Block *frontBlock_ = frontBlock.load();
+            size_t blockTail = frontBlock_->localTail;
+            size_t blockFront = frontBlock_->front.load();

-if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
-fence(memory_order_acquire);
+            if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
+                fence(memory_order_acquire);

-non_empty_front_block:
+                non_empty_front_block:
 // Front block not empty, dequeue from here
-auto element = reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
-result = std::move(*element);
-element->~T();
+                auto element = reinterpret_cast<T *>(frontBlock_->data + blockFront * sizeof(T));
+                result = std::move(*element);
+                element->~T();

-blockFront = (blockFront + 1) & frontBlock_->sizeMask;
+                blockFront = (blockFront + 1) & frontBlock_->sizeMask;

-fence(memory_order_release);
-frontBlock_->front = blockFront;
-}
-else if (frontBlock_ != tailBlock.load()) {
-fence(memory_order_acquire);
+                fence(memory_order_release);
+                frontBlock_->front = blockFront;
+            } else if (frontBlock_ != tailBlock.load()) {
+                fence(memory_order_acquire);

-frontBlock_ = frontBlock.load();
-blockTail = frontBlock_->localTail = frontBlock_->tail.load();
-blockFront = frontBlock_->front.load();
-fence(memory_order_acquire);
+                frontBlock_ = frontBlock.load();
+                blockTail = frontBlock_->localTail = frontBlock_->tail.load();
+                blockFront = frontBlock_->front.load();
+                fence(memory_order_acquire);

-if (blockFront != blockTail) {
+                if (blockFront != blockTail) {
 // Oh look, the front block isn't empty after all
-goto non_empty_front_block;
-}
+                    goto non_empty_front_block;
+                }

 // Front block is empty but there's another block ahead, advance to it
-Block* nextBlock = frontBlock_->next;
+                Block *nextBlock = frontBlock_->next;
 // Don't need an acquire fence here since next can only ever be set on the tailBlock,
 // and we're not the tailBlock, and we did an acquire earlier after reading tailBlock which
 // ensures next is up-to-date on this CPU in case we recently were at tailBlock.

-size_t nextBlockFront = nextBlock->front.load();
-size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
-fence(memory_order_acquire);
+                size_t nextBlockFront = nextBlock->front.load();
+                size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
+                fence(memory_order_acquire);

 // Since the tailBlock is only ever advanced after being written to,
 // we know there's for sure an element to dequeue on it
-assert(nextBlockFront != nextBlockTail);
-AE_UNUSED(nextBlockTail);
+                assert(nextBlockFront != nextBlockTail);
+                AE_UNUSED(nextBlockTail);

 // We're done with this block, let the producer use it if it needs
-fence(memory_order_release);		// Expose possibly pending changes to frontBlock->front from last dequeue
-frontBlock = frontBlock_ = nextBlock;
+                fence(memory_order_release);        // Expose possibly pending changes to frontBlock->front from last dequeue
+                frontBlock = frontBlock_ = nextBlock;

-compiler_fence(memory_order_release);	// Not strictly needed
+                compiler_fence(memory_order_release);    // Not strictly needed

-auto element = reinterpret_cast<T*>(frontBlock_->data + nextBlockFront * sizeof(T));
+                auto element = reinterpret_cast<T *>(frontBlock_->data + nextBlockFront * sizeof(T));

-result = std::move(*element);
-element->~T();
+                result = std::move(*element);
+                element->~T();

-nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;
+                nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;

-fence(memory_order_release);
-frontBlock_->front = nextBlockFront;
-}
-else {
+                fence(memory_order_release);
+                frontBlock_->front = nextBlockFront;
+            } else {
 // No elements in current block and no other block to advance to
-return false;
-}
+                return false;
+            }

-return true;
-}
+            return true;
+        }


 // Returns a pointer to the front element in the queue (the one that
@@ -381,129 +375,126 @@ return true;
 // queue appears empty at the time the method is called, nullptr is
 // returned instead.
 // Must be called only from the consumer thread.
-T* peek() const AE_NO_TSAN
-{
+        T *peek() const AE_NO_TSAN
+        {
 #ifndef NDEBUG
-ReentrantGuard guard(this->dequeuing);
+            ReentrantGuard guard(this->dequeuing);
 #endif
 // See try_dequeue() for reasoning

-Block* frontBlock_ = frontBlock.load();
-size_t blockTail = frontBlock_->localTail;
-size_t blockFront = frontBlock_->front.load();
+            Block *frontBlock_ = frontBlock.load();
+            size_t blockTail = frontBlock_->localTail;
+            size_t blockFront = frontBlock_->front.load();

-if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
-fence(memory_order_acquire);
-non_empty_front_block:
-return reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
-}
-else if (frontBlock_ != tailBlock.load()) {
-fence(memory_order_acquire);
-frontBlock_ = frontBlock.load();
-blockTail = frontBlock_->localTail = frontBlock_->tail.load();
-blockFront = frontBlock_->front.load();
-fence(memory_order_acquire);
+            if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
+                fence(memory_order_acquire);
+                non_empty_front_block:
+                return reinterpret_cast<T *>(frontBlock_->data + blockFront * sizeof(T));
+            } else if (frontBlock_ != tailBlock.load()) {
+                fence(memory_order_acquire);
+                frontBlock_ = frontBlock.load();
+                blockTail = frontBlock_->localTail = frontBlock_->tail.load();
+                blockFront = frontBlock_->front.load();
+                fence(memory_order_acquire);

-if (blockFront != blockTail) {
-goto non_empty_front_block;
-}
+                if (blockFront != blockTail) {
+                    goto non_empty_front_block;
+                }

-Block* nextBlock = frontBlock_->next;
+                Block *nextBlock = frontBlock_->next;

-size_t nextBlockFront = nextBlock->front.load();
-fence(memory_order_acquire);
+                size_t nextBlockFront = nextBlock->front.load();
+                fence(memory_order_acquire);

-assert(nextBlockFront != nextBlock->tail.load());
-return reinterpret_cast<T*>(nextBlock->data + nextBlockFront * sizeof(T));
-}
+                assert(nextBlockFront != nextBlock->tail.load());
+                return reinterpret_cast<T *>(nextBlock->data + nextBlockFront * sizeof(T));
+            }

-return nullptr;
-}
+            return nullptr;
+        }

 // Removes the front element from the queue, if any, without returning it.
 // Returns true on success, or false if the queue appeared empty at the time
 // `pop` was called.
-bool pop() AE_NO_TSAN
-{
+        bool pop() AE_NO_TSAN
+        {
 #ifndef NDEBUG
-ReentrantGuard guard(this->dequeuing);
+            ReentrantGuard guard(this->dequeuing);
 #endif
 // See try_dequeue() for reasoning

-Block* frontBlock_ = frontBlock.load();
-size_t blockTail = frontBlock_->localTail;
-size_t blockFront = frontBlock_->front.load();
+            Block *frontBlock_ = frontBlock.load();
+            size_t blockTail = frontBlock_->localTail;
+            size_t blockFront = frontBlock_->front.load();

-if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
-fence(memory_order_acquire);
+            if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
+                fence(memory_order_acquire);

-non_empty_front_block:
-auto element = reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
-element->~T();
+                non_empty_front_block:
+                auto element = reinterpret_cast<T *>(frontBlock_->data + blockFront * sizeof(T));
+                element->~T();

-blockFront = (blockFront + 1) & frontBlock_->sizeMask;
+                blockFront = (blockFront + 1) & frontBlock_->sizeMask;

-fence(memory_order_release);
-frontBlock_->front = blockFront;
-}
-else if (frontBlock_ != tailBlock.load()) {
-fence(memory_order_acquire);
-frontBlock_ = frontBlock.load();
-blockTail = frontBlock_->localTail = frontBlock_->tail.load();
-blockFront = frontBlock_->front.load();
-fence(memory_order_acquire);
+                fence(memory_order_release);
+                frontBlock_->front = blockFront;
+            } else if (frontBlock_ != tailBlock.load()) {
+                fence(memory_order_acquire);
+                frontBlock_ = frontBlock.load();
+                blockTail = frontBlock_->localTail = frontBlock_->tail.load();
+                blockFront = frontBlock_->front.load();
+                fence(memory_order_acquire);

-if (blockFront != blockTail) {
-goto non_empty_front_block;
-}
+                if (blockFront != blockTail) {
+                    goto non_empty_front_block;
+                }

 // Front block is empty but there's another block ahead, advance to it
-Block* nextBlock = frontBlock_->next;
+                Block *nextBlock = frontBlock_->next;

-size_t nextBlockFront = nextBlock->front.load();
-size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
-fence(memory_order_acquire);
+                size_t nextBlockFront = nextBlock->front.load();
+                size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
+                fence(memory_order_acquire);

-assert(nextBlockFront != nextBlockTail);
-AE_UNUSED(nextBlockTail);
+                assert(nextBlockFront != nextBlockTail);
+                AE_UNUSED(nextBlockTail);

-fence(memory_order_release);
-frontBlock = frontBlock_ = nextBlock;
+                fence(memory_order_release);
+                frontBlock = frontBlock_ = nextBlock;

-compiler_fence(memory_order_release);
+                compiler_fence(memory_order_release);

-auto element = reinterpret_cast<T*>(frontBlock_->data + nextBlockFront * sizeof(T));
-element->~T();
+                auto element = reinterpret_cast<T *>(frontBlock_->data + nextBlockFront * sizeof(T));
+                element->~T();

-nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;
+                nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;

-fence(memory_order_release);
-frontBlock_->front = nextBlockFront;
-}
-else {
+                fence(memory_order_release);
+                frontBlock_->front = nextBlockFront;
+            } else {
 // No elements in current block and no other block to advance to
-return false;
-}
+                return false;
+            }

-return true;
-}
+            return true;
+        }

 // Returns the approximate number of items currently in the queue.
 // Safe to call from both the producer and consumer threads.
-inline size_t size_approx() const AE_NO_TSAN
-{
-size_t result = 0;
-Block* frontBlock_ = frontBlock.load();
-Block* block = frontBlock_;
-do {
-fence(memory_order_acquire);
-size_t blockFront = block->front.load();
-size_t blockTail = block->tail.load();
-result += (blockTail - blockFront) & block->sizeMask;
-block = block->next.load();
-} while (block != frontBlock_);
-return result;
-}
+        inline size_t size_approx() const AE_NO_TSAN
+        {
+            size_t result = 0;
+            Block *frontBlock_ = frontBlock.load();
+            Block *block = frontBlock_;
+            do {
+                fence(memory_order_acquire);
+                size_t blockFront = block->front.load();
+                size_t blockTail = block->tail.load();
+                result += (blockTail - blockFront) & block->sizeMask;
+                block = block->next.load();
+            } while (block != frontBlock_);
+            return result;
+        }

 // Returns the total number of items that could be enqueued without incurring
 // an allocation when this queue is empty.
@@ -514,32 +505,35 @@ return result;
 //       the block the consumer is removing from until it's completely empty, except in
 //       the case where the producer was writing to the same block the consumer was
 //       reading from the whole time.
-inline size_t max_capacity() const {
+        inline size_t max_capacity() const {
            size_t result = 0;
-    Block* frontBlock_ = frontBlock.load();
-    Block* block = frontBlock_;
+            Block *frontBlock_ = frontBlock.load();
+            Block *block = frontBlock_;
            do {
                fence(memory_order_acquire);
                result += block->sizeMask;
                block = block->next.load();
            } while (block != frontBlock_);
            return result;
-}
+        }


-private:
-enum AllocationMode { CanAlloc, CannotAlloc };
+    private:
+        enum AllocationMode {
+            CanAlloc, CannotAlloc
+        };

 #if MOODYCAMEL_HAS_EMPLACE
-template<AllocationMode canAlloc, typename... Args>
-bool inner_enqueue(Args&&... args) AE_NO_TSAN
+
+        template<AllocationMode canAlloc, typename... Args>
+        bool inner_enqueue(Args &&... args) AE_NO_TSAN
 #else
-template<AllocationMode canAlloc, typename U>
+        template<AllocationMode canAlloc, typename U>
            bool inner_enqueue(U&& element) AE_NO_TSAN
 #endif
-{
+        {
 #ifndef NDEBUG
-ReentrantGuard guard(this->enqueuing);
+            ReentrantGuard guard(this->enqueuing);
 #endif

 // High-level pseudocode (assuming we're allowed to alloc a new block):
@@ -549,77 +543,75 @@ ReentrantGuard guard(this->enqueuing);
 //     Else create a new block and enqueue there
 //     Advance tail to the block we just enqueued to

-Block* tailBlock_ = tailBlock.load();
-size_t blockFront = tailBlock_->localFront;
-size_t blockTail = tailBlock_->tail.load();
+            Block *tailBlock_ = tailBlock.load();
+            size_t blockFront = tailBlock_->localFront;
+            size_t blockTail = tailBlock_->tail.load();

-size_t nextBlockTail = (blockTail + 1) & tailBlock_->sizeMask;
-if (nextBlockTail != blockFront || nextBlockTail != (tailBlock_->localFront = tailBlock_->front.load())) {
-fence(memory_order_acquire);
+            size_t nextBlockTail = (blockTail + 1) & tailBlock_->sizeMask;
+            if (nextBlockTail != blockFront || nextBlockTail != (tailBlock_->localFront = tailBlock_->front.load())) {
+                fence(memory_order_acquire);
 // This block has room for at least one more element
-char* location = tailBlock_->data + blockTail * sizeof(T);
+                char *location = tailBlock_->data + blockTail * sizeof(T);
 #if MOODYCAMEL_HAS_EMPLACE
-new (location) T(std::forward<Args>(args)...);
+                new(location) T(std::forward<Args>(args)...);
 #else
-new (location) T(std::forward<U>(element));
+                new (location) T(std::forward<U>(element));
 #endif

-fence(memory_order_release);
-tailBlock_->tail = nextBlockTail;
-}
-else {
-fence(memory_order_acquire);
-if (tailBlock_->next.load() != frontBlock) {
+                fence(memory_order_release);
+                tailBlock_->tail = nextBlockTail;
+            } else {
+                fence(memory_order_acquire);
+                if (tailBlock_->next.load() != frontBlock) {
 // Note that the reason we can't advance to the frontBlock and start adding new entries there
 // is because if we did, then dequeue would stay in that block, eventually reading the new values,
 // instead of advancing to the next full block (whose values were enqueued first and so should be
 // consumed first).

-fence(memory_order_acquire);		// Ensure we get latest writes if we got the latest frontBlock
+                    fence(memory_order_acquire);        // Ensure we get latest writes if we got the latest frontBlock

 // tailBlock is full, but there's a free block ahead, use it
-Block* tailBlockNext = tailBlock_->next.load();
-size_t nextBlockFront = tailBlockNext->localFront = tailBlockNext->front.load();
-nextBlockTail = tailBlockNext->tail.load();
-fence(memory_order_acquire);
+                    Block *tailBlockNext = tailBlock_->next.load();
+                    size_t nextBlockFront = tailBlockNext->localFront = tailBlockNext->front.load();
+                    nextBlockTail = tailBlockNext->tail.load();
+                    fence(memory_order_acquire);

 // This block must be empty since it's not the head block and we
 // go through the blocks in a circle
-assert(nextBlockFront == nextBlockTail);
-tailBlockNext->localFront = nextBlockFront;
+                    assert(nextBlockFront == nextBlockTail);
+                    tailBlockNext->localFront = nextBlockFront;

-char* location = tailBlockNext->data + nextBlockTail * sizeof(T);
+                    char *location = tailBlockNext->data + nextBlockTail * sizeof(T);
 #if MOODYCAMEL_HAS_EMPLACE
-new (location) T(std::forward<Args>(args)...);
+                    new(location) T(std::forward<Args>(args)...);
 #else
-new (location) T(std::forward<U>(element));
+                    new (location) T(std::forward<U>(element));
 #endif

-tailBlockNext->tail = (nextBlockTail + 1) & tailBlockNext->sizeMask;
+                    tailBlockNext->tail = (nextBlockTail + 1) & tailBlockNext->sizeMask;

-fence(memory_order_release);
-tailBlock = tailBlockNext;
-}
-else if (canAlloc == CanAlloc) {
+                    fence(memory_order_release);
+                    tailBlock = tailBlockNext;
+                } else if (canAlloc == CanAlloc) {
 // tailBlock is full and there's no free block ahead; create a new block
-auto newBlockSize = largestBlockSize >= MAX_BLOCK_SIZE ? largestBlockSize : largestBlockSize * 2;
-auto newBlock = make_block(newBlockSize);
-if (newBlock == nullptr) {
+                    auto newBlockSize = largestBlockSize >= MAX_BLOCK_SIZE ? largestBlockSize : largestBlockSize * 2;
+                    auto newBlock = make_block(newBlockSize);
+                    if (newBlock == nullptr) {
 // Could not allocate a block!
-return false;
-}
-largestBlockSize = newBlockSize;
+                        return false;
+                    }
+                    largestBlockSize = newBlockSize;

 #if MOODYCAMEL_HAS_EMPLACE
-new (newBlock->data) T(std::forward<Args>(args)...);
+                    new(newBlock->data) T(std::forward<Args>(args)...);
 #else
-new (newBlock->data) T(std::forward<U>(element));
+                    new (newBlock->data) T(std::forward<U>(element));
 #endif
-assert(newBlock->front == 0);
-newBlock->tail = newBlock->localTail = 1;
+                    assert(newBlock->front == 0);
+                    newBlock->tail = newBlock->localTail = 1;

-newBlock->next = tailBlock_->next.load();
-tailBlock_->next = newBlock;
+                    newBlock->next = tailBlock_->next.load();
+                    tailBlock_->next = newBlock;

 // Might be possible for the dequeue thread to see the new tailBlock->next
 // *without* seeing the new tailBlock value, but this is OK since it can't
@@ -627,73 +619,70 @@ tailBlock_->next = newBlock;
 // case where it could try to read the next is if it's already at the tailBlock,
 // and it won't advance past tailBlock in any circumstance).

-fence(memory_order_release);
-tailBlock = newBlock;
-}
-else if (canAlloc == CannotAlloc) {
+                    fence(memory_order_release);
+                    tailBlock = newBlock;
+                } else if (canAlloc == CannotAlloc) {
 // Would have had to allocate a new block to enqueue, but not allowed
-return false;
-}
-else {
-assert(false && "Should be unreachable code");
-return false;
-}
-}
+                    return false;
+                } else {
+                    assert(false && "Should be unreachable code");
+                    return false;
+                }
+            }

-return true;
-}
+            return true;
+        }


 // Disable copying
-ReaderWriterQueue(ReaderWriterQueue const&) {  }
+        ReaderWriterQueue(ReaderWriterQueue const &) {}

 // Disable assignment
-ReaderWriterQueue& operator=(ReaderWriterQueue const&) {  }
+        ReaderWriterQueue &operator=(ReaderWriterQueue const &) {}


-AE_FORCEINLINE static size_t ceilToPow2(size_t x)
-{
+        AE_FORCEINLINE static size_t ceilToPow2(size_t x) {
 // From http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
--x;
-x |= x >> 1;
-x |= x >> 2;
-x |= x >> 4;
-for (size_t i = 1; i < sizeof(size_t); i <<= 1) {
-x |= x >> (i << 3);
-}
-++x;
-return x;
-}
-
-template<typename U>
-static AE_FORCEINLINE char* align_for(char* ptr) AE_NO_TSAN
-{
-const std::size_t alignment = std::alignment_of<U>::value;
-return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
-}
-private:
+            --x;
+            x |= x >> 1;
+            x |= x >> 2;
+            x |= x >> 4;
+            for (size_t i = 1; i < sizeof(size_t); i <<= 1) {
+                x |= x >> (i << 3);
+            }
+            ++x;
+            return x;
+        }
+
+        template<typename U>
+        static AE_FORCEINLINE char *align_for(char *ptr) AE_NO_TSAN {
+            const std::size_t alignment = std::alignment_of<U>::value;
+            return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
+        }
+
+    private:
 #ifndef NDEBUG
-struct ReentrantGuard
-{
-    AE_NO_TSAN ReentrantGuard(weak_atomic<bool>& _inSection)
-            : inSection(_inSection)
-    {
-        assert(!inSection && "Concurrent (or re-entrant) enqueue or dequeue operation detected (only one thread at a time may hold the producer or consumer role)");
+
+        struct ReentrantGuard {
+            AE_NO_TSAN ReentrantGuard(weak_atomic<bool> &_inSection)
+                    : inSection(_inSection) {
+                assert(!inSection &&
+                       "Concurrent (or re-entrant) enqueue or dequeue operation detected (only one thread at a time may hold the producer or consumer role)");
                inSection = true;
            }

            AE_NO_TSAN ~ReentrantGuard() { inSection = false; }

-private:
-    ReentrantGuard& operator=(ReentrantGuard const&);
+        private:
+            ReentrantGuard &operator=(ReentrantGuard const &);
+
+        private:
+            weak_atomic<bool> &inSection;
+        };

-private:
-    weak_atomic<bool>& inSection;
-};
 #endif

-struct Block
-{
+        struct Block {
            // Avoid false-sharing by putting highly contended variables on their own cache lines
            weak_atomic<size_t> front;    // (Atomic) Elements are read from here
            size_t localTail;            // An uncontended shadow copy of tail, owned by the consumer
@@ -702,75 +691,73 @@ struct Block
            weak_atomic<size_t> tail;    // (Atomic) Elements are enqueued here
            size_t localFront;

-    char cachelineFiller1[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) - sizeof(size_t)];	// next isn't very contended, but we don't want it on the same cache line as tail (which is)
-    weak_atomic<Block*> next;	// (Atomic)
+            char cachelineFiller1[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) -
+                                  sizeof(size_t)];    // next isn't very contended, but we don't want it on the same cache line as tail (which is)
+            weak_atomic<Block *> next;    // (Atomic)

-    char* data;		// Contents (on heap) are aligned to T's alignment
+            char *data;        // Contents (on heap) are aligned to T's alignment

            const size_t sizeMask;


            // size must be a power of two (and greater than 0)
-    AE_NO_TSAN Block(size_t const& _size, char* _rawThis, char* _data)
-            : front(0UL), localTail(0), tail(0UL), localFront(0), next(nullptr), data(_data), sizeMask(_size - 1), rawThis(_rawThis)
-    {
+            AE_NO_TSAN Block(size_t const &_size, char *_rawThis, char *_data)
+                    : front(0UL), localTail(0), tail(0UL), localFront(0), next(nullptr), data(_data),
+                      sizeMask(_size - 1), rawThis(_rawThis) {
            }

-private:
+        private:
            // C4512 - Assignment operator could not be generated
-    Block& operator=(Block const&);
+            Block &operator=(Block const &);

-public:
-    char* rawThis;
-};
+        public:
+            char *rawThis;
+        };


-static Block* make_block(size_t capacity) AE_NO_TSAN
-{
+        static Block *make_block(size_t capacity) AE_NO_TSAN
+        {
 // Allocate enough memory for the block itself, as well as all the elements it will contain
-auto size = sizeof(Block) + std::alignment_of<Block>::value - 1;
-size += sizeof(T) * capacity + std::alignment_of<T>::value - 1;
-auto newBlockRaw = static_cast<char*>(std::malloc(size));
-if (newBlockRaw == nullptr) {
-return nullptr;
-}
+            auto size = sizeof(Block) + std::alignment_of<Block>::value - 1;
+            size += sizeof(T) * capacity + std::alignment_of<T>::value - 1;
+            auto newBlockRaw = static_cast<char *>(std::malloc(size));
+            if (newBlockRaw == nullptr) {
+                return nullptr;
+            }

-auto newBlockAligned = align_for<Block>(newBlockRaw);
-auto newBlockData = align_for<T>(newBlockAligned + sizeof(Block));
-return new (newBlockAligned) Block(capacity, newBlockRaw, newBlockData);
-}
+            auto newBlockAligned = align_for<Block>(newBlockRaw);
+            auto newBlockData = align_for<T>(newBlockAligned + sizeof(Block));
+            return new(newBlockAligned) Block(capacity, newBlockRaw, newBlockData);
+        }

-private:
-weak_atomic<Block*> frontBlock;		// (Atomic) Elements are dequeued from this block
+    private:
+        weak_atomic<Block *> frontBlock;        // (Atomic) Elements are dequeued from this block

-char cachelineFiller[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<Block*>)];
-weak_atomic<Block*> tailBlock;		// (Atomic) Elements are enqueued to this block
+        char cachelineFiller[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<Block *>)];
+        weak_atomic<Block *> tailBlock;        // (Atomic) Elements are enqueued to this block

-size_t largestBlockSize;
+        size_t largestBlockSize;

 #ifndef NDEBUG
-weak_atomic<bool> enqueuing;
-mutable weak_atomic<bool> dequeuing;
+        weak_atomic<bool> enqueuing;
+        mutable weak_atomic<bool> dequeuing;
 #endif
-};
+    };

 // Like ReaderWriterQueue, but also providees blocking operations
-template<typename T, size_t MAX_BLOCK_SIZE = 512>
-class BlockingReaderWriterQueue
-{
-private:
+    template<typename T, size_t MAX_BLOCK_SIZE = 512>
+    class BlockingReaderWriterQueue {
+    private:
        typedef ::moodycamel::ReaderWriterQueue<T, MAX_BLOCK_SIZE> ReaderWriterQueue;

-public:
+    public:
        explicit BlockingReaderWriterQueue(size_t size = 15) AE_NO_TSAN
-            : inner(size), sema(new spsc_sema::LightweightSemaphore())
-    { }
+                : inner(size), sema(new spsc_sema::LightweightSemaphore()) {}

-    BlockingReaderWriterQueue(BlockingReaderWriterQueue&& other) AE_NO_TSAN
-            : inner(std::move(other.inner)), sema(std::move(other.sema))
-    { }
+        BlockingReaderWriterQueue(BlockingReaderWriterQueue &&other) AE_NO_TSAN
+                : inner(std::move(other.inner)), sema(std::move(other.sema)) {}

-    BlockingReaderWriterQueue& operator=(BlockingReaderWriterQueue&& other) AE_NO_TSAN
+        BlockingReaderWriterQueue &operator=(BlockingReaderWriterQueue &&other) AE_NO_TSAN
        {
            std::swap(sema, other.sema);
            std::swap(inner, other.inner);
@@ -781,7 +768,7 @@ public:
        // Enqueues a copy of element if there is room in the queue.
        // Returns true if the element was enqueued, false otherwise.
        // Does not allocate memory.
-    AE_FORCEINLINE bool try_enqueue(T const& element) AE_NO_TSAN
+        AE_FORCEINLINE bool try_enqueue(T const &element) AE_NO_TSAN
        {
            if (inner.try_enqueue(element)) {
                sema->signal();
@@ -793,7 +780,7 @@ public:
        // Enqueues a moved copy of element if there is room in the queue.
        // Returns true if the element was enqueued, false otherwise.
        // Does not allocate memory.
-    AE_FORCEINLINE bool try_enqueue(T&& element) AE_NO_TSAN
+        AE_FORCEINLINE bool try_enqueue(T &&element) AE_NO_TSAN
        {
            if (inner.try_enqueue(std::forward<T>(element))) {
                sema->signal();
@@ -803,23 +790,24 @@ public:
        }

 #if MOODYCAMEL_HAS_EMPLACE
+
        // Like try_enqueue() but with emplace semantics (i.e. construct-in-place).
        template<typename... Args>
-    AE_FORCEINLINE bool try_emplace(Args&&... args) AE_NO_TSAN
-    {
+        AE_FORCEINLINE bool try_emplace(Args &&... args) AE_NO_TSAN {
            if (inner.try_emplace(std::forward<Args>(args)...)) {
                sema->signal();
                return true;
            }
            return false;
        }
+
 #endif


        // Enqueues a copy of element on the queue.
        // Allocates an additional block of memory if needed.
        // Only fails (returns false) if memory allocation fails.
-    AE_FORCEINLINE bool enqueue(T const& element) AE_NO_TSAN
+        AE_FORCEINLINE bool enqueue(T const &element) AE_NO_TSAN
        {
            if (inner.enqueue(element)) {
                sema->signal();
@@ -831,7 +819,7 @@ public:
        // Enqueues a moved copy of element on the queue.
        // Allocates an additional block of memory if needed.
        // Only fails (returns false) if memory allocation fails.
-    AE_FORCEINLINE bool enqueue(T&& element) AE_NO_TSAN
+        AE_FORCEINLINE bool enqueue(T &&element) AE_NO_TSAN
        {
            if (inner.enqueue(std::forward<T>(element))) {
                sema->signal();
@@ -841,16 +829,17 @@ public:
        }

 #if MOODYCAMEL_HAS_EMPLACE
+
        // Like enqueue() but with emplace semantics (i.e. construct-in-place).
        template<typename... Args>
-    AE_FORCEINLINE bool emplace(Args&&... args) AE_NO_TSAN
-    {
+        AE_FORCEINLINE bool emplace(Args &&... args) AE_NO_TSAN {
            if (inner.emplace(std::forward<Args>(args)...)) {
                sema->signal();
                return true;
            }
            return false;
        }
+
 #endif


@@ -858,8 +847,7 @@ public:
        // returns false instead. If the queue has at least one element,
        // moves front to result using operator=, then returns true.
        template<typename U>
-    bool try_dequeue(U& result) AE_NO_TSAN
-    {
+        bool try_dequeue(U &result) AE_NO_TSAN {
            if (sema->tryWait()) {
                bool success = inner.try_dequeue(result);
                assert(success);
@@ -873,8 +861,7 @@ public:
        // Attempts to dequeue an element; if the queue is empty,
        // waits until an element is available, then dequeues it.
        template<typename U>
-    void wait_dequeue(U& result) AE_NO_TSAN
-    {
+        void wait_dequeue(U &result) AE_NO_TSAN {
            while (!sema->wait());
            bool success = inner.try_dequeue(result);
            AE_UNUSED(result);
@@ -890,8 +877,7 @@ public:
        // Using a negative timeout indicates an indefinite timeout,
        // and is thus functionally equivalent to calling wait_dequeue.
        template<typename U>
-    bool wait_dequeue_timed(U& result, std::int64_t timeout_usecs) AE_NO_TSAN
-    {
+        bool wait_dequeue_timed(U &result, std::int64_t timeout_usecs) AE_NO_TSAN {
            if (!sema->wait(timeout_usecs)) {
                return false;
            }
@@ -923,7 +909,7 @@ public:
        // queue appears empty at the time the method is called, nullptr is
        // returned instead.
        // Must be called only from the consumer thread.
-    AE_FORCEINLINE T* peek() const AE_NO_TSAN
+        AE_FORCEINLINE T *peek() const AE_NO_TSAN
        {
            return inner.peek();
        }
@@ -962,15 +948,16 @@ public:
            return inner.max_capacity();
        }

-private:
+    private:
        // Disable copying & assignment
-    BlockingReaderWriterQueue(BlockingReaderWriterQueue const&) {  }
-    BlockingReaderWriterQueue& operator=(BlockingReaderWriterQueue const&) {  }
+        BlockingReaderWriterQueue(BlockingReaderWriterQueue const &) {}
+
+        BlockingReaderWriterQueue &operator=(BlockingReaderWriterQueue const &) {}

-private:
+    private:
        ReaderWriterQueue inner;
-    std::unique_ptr<spsc_sema::LightweightSemaphore> sema;
-};
+        std::unique_ptr <spsc_sema::LightweightSemaphore> sema;
+    };

 }    // end namespace moodycamel


--- a/zmq_src/ZMQServer.cpp
+++ b/zmq_src/ZMQServer.cpp
@@ -42,9 +42,11 @@ using namespace std;
 shared_ptr <ZMQServer> ZMQServer::zmqServer = nullptr;

 ZMQServer::ZMQServer(bool _checkSignature, bool _checkKeyOwnership, const string &_caCertFile)
-        : outgoingQueue(NUM_ZMQ_WORKER_THREADS), checkSignature(_checkSignature), checkKeyOwnership(_checkKeyOwnership),
+        : incomingQueue(NUM_ZMQ_WORKER_THREADS), checkSignature(_checkSignature), checkKeyOwnership(_checkKeyOwnership),
          caCertFile(_caCertFile), ctx(make_shared<zmq::context_t>(1)) {

+    CHECK_STATE(NUM_ZMQ_WORKER_THREADS > 1);
+
    socket = make_shared<zmq::socket_t>(*ctx, ZMQ_ROUTER);

    if (_checkSignature) {
@@ -277,12 +279,20 @@ void ZMQServer::doOneServerLoop() {

        CHECK_STATE2(msg, ZMQ_COULD_NOT_PARSE);

+
+        uint64_t index = 0;
+
        if ((dynamic_pointer_cast<BLSSignReqMessage>(msg)!= nullptr) ||
             dynamic_pointer_cast<ECDSASignReqMessage>(msg)) {
+            index = NUM_ZMQ_WORKER_THREADS - 1;
        } else {
-
+            index = 0;
        }

+        auto element = pair<shared_ptr<ZMQMessage>, shared_ptr<zmq::message_t>>(msg, identity);
+
+        incomingQueue.at(index).enqueue(element);
+
        result = msg->process();
    } catch (ExitRequestedException) {
        throw;

--- a/zmq_src/ZMQServer.h
+++ b/zmq_src/ZMQServer.h
@@ -34,6 +34,7 @@

 #include "Agent.h"
 #include "WorkerThreadPool.h"
+#include "ZMQMessage.h"

 using namespace moodycamel;

@@ -41,6 +42,7 @@ typedef enum {GOT_INCOMING_MSG = 0, GOT_OUTFOING_MSG = 1} PollResult;

 static const uint64_t NUM_ZMQ_WORKER_THREADS = 2;

+
 class ZMQServer : public Agent{

    uint64_t workerThreads;
@@ -48,9 +50,9 @@ class ZMQServer : public Agent{
    string caCertFile;
    string caCert;

-    ReaderWriterQueue<pair<string, shared_ptr<zmq_msg_t>>> outgoingQueue;
+    ReaderWriterQueue<pair<string, shared_ptr<zmq::message_t>>> outgoingQueue;

-    vector<ReaderWriterQueue<pair<string, shared_ptr<zmq_msg_t>>>> incomingQueue;
+    vector<ReaderWriterQueue<pair<shared_ptr<ZMQMessage>, shared_ptr<zmq::message_t>>>> incomingQueue;

    bool checkKeyOwnership = true;