SKALE-4586 Added Thread Pool

f457b201 · kladko · ecde0240 · f457b201 · f457b201 · f457b201
Unverified Commit f457b201 authored Sep 09, 2021 by kladko
Showing with 1018 additions and 996 deletions

atomicops.h third_party/atomicops.h +385 -362

readerwriterqueue.h third_party/readerwriterqueue.h +617 -630

ZMQServer.cpp zmq_src/ZMQServer.cpp +12 -2

ZMQServer.h zmq_src/ZMQServer.h +4 -2

No files found.
--- a/third_party/atomicops.h
+++ b/third_party/atomicops.h
@@ -136,14 +136,14 @@ namespace moodycamel {

 AE_FORCEINLINE void compiler_fence(memory_order order) AE_NO_TSAN
 {
-	switch (order) {
-		case memory_order_relaxed: break;
-		case memory_order_acquire: _ReadBarrier(); break;
-		case memory_order_release: _WriteBarrier(); break;
-		case memory_order_acq_rel: _ReadWriteBarrier(); break;
-		case memory_order_seq_cst: _ReadWriteBarrier(); break;
-		default: assert(false);
-	}
+    switch (order) {
+        case memory_order_relaxed: break;
+        case memory_order_acquire: _ReadBarrier(); break;
+        case memory_order_release: _WriteBarrier(); break;
+        case memory_order_acq_rel: _ReadWriteBarrier(); break;
+        case memory_order_seq_cst: _ReadWriteBarrier(); break;
+        default: assert(false);
+    }
 }

 // x86/x64 have a strong memory model -- all loads and stores have
@@ -152,48 +152,48 @@ AE_FORCEINLINE void compiler_fence(memory_order order) AE_NO_TSAN
 #if defined(AE_ARCH_X86) || defined(AE_ARCH_X64)
 AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN
 {
-	switch (order) {
-		case memory_order_relaxed: break;
-		case memory_order_acquire: _ReadBarrier(); break;
-		case memory_order_release: _WriteBarrier(); break;
-		case memory_order_acq_rel: _ReadWriteBarrier(); break;
-		case memory_order_seq_cst:
-			_ReadWriteBarrier();
-			AeFullSync();
-			_ReadWriteBarrier();
-			break;
-		default: assert(false);
-	}
+    switch (order) {
+        case memory_order_relaxed: break;
+        case memory_order_acquire: _ReadBarrier(); break;
+        case memory_order_release: _WriteBarrier(); break;
+        case memory_order_acq_rel: _ReadWriteBarrier(); break;
+        case memory_order_seq_cst:
+            _ReadWriteBarrier();
+            AeFullSync();
+            _ReadWriteBarrier();
+            break;
+        default: assert(false);
+    }
 }
 #else
 AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN
 {
-	// Non-specialized arch, use heavier memory barriers everywhere just in case :-(
-	switch (order) {
-		case memory_order_relaxed:
-			break;
-		case memory_order_acquire:
-			_ReadBarrier();
-			AeLiteSync();
-			_ReadBarrier();
-			break;
-		case memory_order_release:
-			_WriteBarrier();
-			AeLiteSync();
-			_WriteBarrier();
-			break;
-		case memory_order_acq_rel:
-			_ReadWriteBarrier();
-			AeLiteSync();
-			_ReadWriteBarrier();
-			break;
-		case memory_order_seq_cst:
-			_ReadWriteBarrier();
-			AeFullSync();
-			_ReadWriteBarrier();
-			break;
-		default: assert(false);
-	}
+    // Non-specialized arch, use heavier memory barriers everywhere just in case :-(
+    switch (order) {
+        case memory_order_relaxed:
+            break;
+        case memory_order_acquire:
+            _ReadBarrier();
+            AeLiteSync();
+            _ReadBarrier();
+            break;
+        case memory_order_release:
+            _WriteBarrier();
+            AeLiteSync();
+            _WriteBarrier();
+            break;
+        case memory_order_acq_rel:
+            _ReadWriteBarrier();
+            AeLiteSync();
+            _ReadWriteBarrier();
+            break;
+        case memory_order_seq_cst:
+            _ReadWriteBarrier();
+            AeFullSync();
+            _ReadWriteBarrier();
+            break;
+        default: assert(false);
+    }
 }
 #endif
 }    // end namespace moodycamel
@@ -206,24 +206,48 @@ namespace moodycamel {
    AE_FORCEINLINE void compiler_fence(memory_order order) AE_NO_TSAN
    {
        switch (order) {
-            case memory_order_relaxed: break;
-            case memory_order_acquire: std::atomic_signal_fence(std::memory_order_acquire); break;
-            case memory_order_release: std::atomic_signal_fence(std::memory_order_release); break;
-            case memory_order_acq_rel: std::atomic_signal_fence(std::memory_order_acq_rel); break;
-            case memory_order_seq_cst: std::atomic_signal_fence(std::memory_order_seq_cst); break;
-            default: assert(false);
+            case memory_order_relaxed:
+                break;
+            case memory_order_acquire:
+                std::atomic_signal_fence(std::memory_order_acquire);
+                break;
+            case memory_order_release:
+                std::atomic_signal_fence(std::memory_order_release);
+                break;
+            case memory_order_acq_rel:
+                std::atomic_signal_fence(std::memory_order_acq_rel);
+                break;
+            case memory_order_seq_cst:
+                std::atomic_signal_fence(std::memory_order_seq_cst);
+                break;
+            default:
+                assert(false);
        }
    }

    AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN
    {
        switch (order) {
-            case memory_order_relaxed: break;
-            case memory_order_acquire: AE_TSAN_ANNOTATE_ACQUIRE(); std::atomic_thread_fence(std::memory_order_acquire); break;
-            case memory_order_release: AE_TSAN_ANNOTATE_RELEASE(); std::atomic_thread_fence(std::memory_order_release); break;
-            case memory_order_acq_rel: AE_TSAN_ANNOTATE_ACQUIRE(); AE_TSAN_ANNOTATE_RELEASE(); std::atomic_thread_fence(std::memory_order_acq_rel); break;
-            case memory_order_seq_cst: AE_TSAN_ANNOTATE_ACQUIRE(); AE_TSAN_ANNOTATE_RELEASE(); std::atomic_thread_fence(std::memory_order_seq_cst); break;
-            default: assert(false);
+            case memory_order_relaxed:
+                break;
+            case memory_order_acquire:
+                AE_TSAN_ANNOTATE_ACQUIRE();
+                std::atomic_thread_fence(std::memory_order_acquire);
+                break;
+            case memory_order_release:
+                AE_TSAN_ANNOTATE_RELEASE();
+                std::atomic_thread_fence(std::memory_order_release);
+                break;
+            case memory_order_acq_rel:
+                AE_TSAN_ANNOTATE_ACQUIRE(); AE_TSAN_ANNOTATE_RELEASE();
+                std::atomic_thread_fence(std::memory_order_acq_rel);
+                break;
+            case memory_order_seq_cst:
+                AE_TSAN_ANNOTATE_ACQUIRE(); AE_TSAN_ANNOTATE_RELEASE();
+                std::atomic_thread_fence(std::memory_order_seq_cst);
+                break;
+            default:
+                assert(false);
        }
    }

@@ -237,8 +261,11 @@ namespace moodycamel {
 #endif

 #ifdef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
+
 #include <atomic>
+
 #endif
+
 #include <utility>

 // WARNING: *NOT* A REPLACEMENT FOR std::atomic. READ CAREFULLY:
@@ -247,21 +274,26 @@ namespace moodycamel {
 // at the hardware level -- on most platforms this generally means aligned pointers and integers (only).
 namespace moodycamel {
    template<typename T>
-    class weak_atomic
-    {
+    class weak_atomic {
    public:
-        AE_NO_TSAN weak_atomic() : value() { }
+        AE_NO_TSAN weak_atomic() : value() {}
+
 #ifdef AE_VCPP
-        #pragma warning(push)
+#pragma warning(push)
 #pragma warning(disable: 4100)		// Get rid of (erroneous) 'unreferenced formal parameter' warning
 #endif
-        template<typename U> AE_NO_TSAN weak_atomic(U&& x) : value(std::forward<U>(x)) {  }
+
+        template<typename U>
+        AE_NO_TSAN weak_atomic(U &&x) : value(std::forward<U>(x)) {}
+
 #ifdef __cplusplus_cli
        // Work around bug with universal reference/nullptr combination that only appears when /clr is on
-	AE_NO_TSAN weak_atomic(nullptr_t) : value(nullptr) {  }
+    AE_NO_TSAN weak_atomic(nullptr_t) : value(nullptr) {  }
 #endif
-        AE_NO_TSAN weak_atomic(weak_atomic const& other) : value(other.load()) {  }
-        AE_NO_TSAN weak_atomic(weak_atomic&& other) : value(std::move(other.load())) {  }
+        AE_NO_TSAN weak_atomic(weak_atomic const &other) : value(other.load()) {}
+
+        AE_NO_TSAN weak_atomic(weak_atomic &&other) : value(std::move(other.load())) {}
+
 #ifdef AE_VCPP
 #pragma warning(pop)
 #endif
@@ -271,46 +303,46 @@ namespace moodycamel {

 #ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
        template<typename U> AE_FORCEINLINE weak_atomic const& operator=(U&& x) AE_NO_TSAN { value = std::forward<U>(x); return *this; }
-	AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) AE_NO_TSAN { value = other.value; return *this; }
+    AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) AE_NO_TSAN { value = other.value; return *this; }

-	AE_FORCEINLINE T load() const AE_NO_TSAN { return value; }
+    AE_FORCEINLINE T load() const AE_NO_TSAN { return value; }

-	AE_FORCEINLINE T fetch_add_acquire(T increment) AE_NO_TSAN
-	{
+    AE_FORCEINLINE T fetch_add_acquire(T increment) AE_NO_TSAN
+    {
 #if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
-		if (sizeof(T) == 4) return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
+        if (sizeof(T) == 4) return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
 #if defined(_M_AMD64)
-		else if (sizeof(T) == 8) return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
+        else if (sizeof(T) == 8) return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
 #endif
 #else
 #error Unsupported platform
 #endif
-		assert(false && "T must be either a 32 or 64 bit type");
-		return value;
-	}
+        assert(false && "T must be either a 32 or 64 bit type");
+        return value;
+    }

-	AE_FORCEINLINE T fetch_add_release(T increment) AE_NO_TSAN
-	{
+    AE_FORCEINLINE T fetch_add_release(T increment) AE_NO_TSAN
+    {
 #if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
-		if (sizeof(T) == 4) return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
+        if (sizeof(T) == 4) return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
 #if defined(_M_AMD64)
-		else if (sizeof(T) == 8) return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
+        else if (sizeof(T) == 8) return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
 #endif
 #else
 #error Unsupported platform
 #endif
-		assert(false && "T must be either a 32 or 64 bit type");
-		return value;
-	}
+        assert(false && "T must be either a 32 or 64 bit type");
+        return value;
+    }
 #else
+
        template<typename U>
-        AE_FORCEINLINE weak_atomic const& operator=(U&& x) AE_NO_TSAN
-        {
+        AE_FORCEINLINE weak_atomic const &operator=(U &&x) AE_NO_TSAN {
            value.store(std::forward<U>(x), std::memory_order_relaxed);
            return *this;
        }

-        AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) AE_NO_TSAN
+        AE_FORCEINLINE weak_atomic const &operator=(weak_atomic const &other) AE_NO_TSAN
        {
            value.store(other.value.load(std::memory_order_relaxed), std::memory_order_relaxed);
            return *this;
@@ -327,20 +359,21 @@ namespace moodycamel {
        {
            return value.fetch_add(increment, std::memory_order_release);
        }
+
 #endif


    private:
 #ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
        // No std::atomic support, but still need to circumvent compiler optimizations.
-	// `volatile` will make memory access slow, but is guaranteed to be reliable.
-	volatile T value;
+    // `volatile` will make memory access slow, but is guaranteed to be reliable.
+    volatile T value;
 #else
-        std::atomic<T> value;
+        std::atomic <T> value;
 #endif
    };

-}	// end namespace moodycamel
+}    // end namespace moodycamel



@@ -353,11 +386,11 @@ namespace moodycamel {
 // I know this is an ugly hack but it still beats polluting the global
 // namespace with thousands of generic names or adding a .cpp for nothing.
 extern "C" {
-	struct _SECURITY_ATTRIBUTES;
-	__declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
-	__declspec(dllimport) int __stdcall CloseHandle(void* hObject);
-	__declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
-	__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
+    struct _SECURITY_ATTRIBUTES;
+    __declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
+    __declspec(dllimport) int __stdcall CloseHandle(void* hObject);
+    __declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
+    __declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
 }
 #elif defined(__MACH__)
 #include <mach/mach.h>
@@ -369,8 +402,7 @@ extern "C" {
 #include <task.h>
 #endif

-namespace moodycamel
-{
+namespace moodycamel {
    // Code in the spsc_sema namespace below is an adaptation of Jeff Preshing's
    // portable + lightweight semaphore implementations, originally from
    // https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
@@ -392,262 +424,261 @@ namespace moodycamel
    // 2. Altered source versions must be plainly marked as such, and must not be
    //    misrepresented as being the original software.
    // 3. This notice may not be removed or altered from any source distribution.
-    namespace spsc_sema
-    {
+    namespace spsc_sema {
 #if defined(_WIN32)
        class Semaphore
-		{
-		private:
-		    void* m_hSema;
-
-		    Semaphore(const Semaphore& other);
-		    Semaphore& operator=(const Semaphore& other);
-
-		public:
-		    AE_NO_TSAN Semaphore(int initialCount = 0) : m_hSema()
-		    {
-		        assert(initialCount >= 0);
-		        const long maxLong = 0x7fffffff;
-		        m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
-		        assert(m_hSema);
-		    }
-
-		    AE_NO_TSAN ~Semaphore()
-		    {
-		        CloseHandle(m_hSema);
-		    }
-
-		    bool wait() AE_NO_TSAN
-		    {
-		    	const unsigned long infinite = 0xffffffff;
-		        return WaitForSingleObject(m_hSema, infinite) == 0;
-		    }
-
-			bool try_wait() AE_NO_TSAN
-			{
-				return WaitForSingleObject(m_hSema, 0) == 0;
-			}
-
-			bool timed_wait(std::uint64_t usecs) AE_NO_TSAN
-			{
-				return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0;
-			}
-
-		    void signal(int count = 1) AE_NO_TSAN
-		    {
-		        while (!ReleaseSemaphore(m_hSema, count, nullptr));
-		    }
-		};
+        {
+        private:
+            void* m_hSema;
+
+            Semaphore(const Semaphore& other);
+            Semaphore& operator=(const Semaphore& other);
+
+        public:
+            AE_NO_TSAN Semaphore(int initialCount = 0) : m_hSema()
+            {
+                assert(initialCount >= 0);
+                const long maxLong = 0x7fffffff;
+                m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
+                assert(m_hSema);
+            }
+
+            AE_NO_TSAN ~Semaphore()
+            {
+                CloseHandle(m_hSema);
+            }
+
+            bool wait() AE_NO_TSAN
+            {
+                const unsigned long infinite = 0xffffffff;
+                return WaitForSingleObject(m_hSema, infinite) == 0;
+            }
+
+            bool try_wait() AE_NO_TSAN
+            {
+                return WaitForSingleObject(m_hSema, 0) == 0;
+            }
+
+            bool timed_wait(std::uint64_t usecs) AE_NO_TSAN
+            {
+                return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0;
+            }
+
+            void signal(int count = 1) AE_NO_TSAN
+            {
+                while (!ReleaseSemaphore(m_hSema, count, nullptr));
+            }
+        };
 #elif defined(__MACH__)
        //---------------------------------------------------------
-		// Semaphore (Apple iOS and OSX)
-		// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
-		//---------------------------------------------------------
-		class Semaphore
-		{
-		private:
-		    semaphore_t m_sema;
-
-		    Semaphore(const Semaphore& other);
-		    Semaphore& operator=(const Semaphore& other);
-
-		public:
-		    AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema()
-		    {
-		        assert(initialCount >= 0);
-		        kern_return_t rc = semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
-		        assert(rc == KERN_SUCCESS);
-		        AE_UNUSED(rc);
-		    }
-
-		    AE_NO_TSAN ~Semaphore()
-		    {
-		        semaphore_destroy(mach_task_self(), m_sema);
-		    }
-
-		    bool wait() AE_NO_TSAN
-		    {
-		        return semaphore_wait(m_sema) == KERN_SUCCESS;
-		    }
-
-			bool try_wait() AE_NO_TSAN
-			{
-				return timed_wait(0);
-			}
-
-			bool timed_wait(std::uint64_t timeout_usecs) AE_NO_TSAN
-			{
-				mach_timespec_t ts;
-				ts.tv_sec = static_cast<unsigned int>(timeout_usecs / 1000000);
-				ts.tv_nsec = static_cast<int>((timeout_usecs % 1000000) * 1000);
-
-				// added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
-				kern_return_t rc = semaphore_timedwait(m_sema, ts);
-				return rc == KERN_SUCCESS;
-			}
-
-		    void signal() AE_NO_TSAN
-		    {
-		        while (semaphore_signal(m_sema) != KERN_SUCCESS);
-		    }
-
-		    void signal(int count) AE_NO_TSAN
-		    {
-		        while (count-- > 0)
-		        {
-		            while (semaphore_signal(m_sema) != KERN_SUCCESS);
-		        }
-		    }
-		};
+        // Semaphore (Apple iOS and OSX)
+        // Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
+        //---------------------------------------------------------
+        class Semaphore
+        {
+        private:
+            semaphore_t m_sema;
+
+            Semaphore(const Semaphore& other);
+            Semaphore& operator=(const Semaphore& other);
+
+        public:
+            AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema()
+            {
+                assert(initialCount >= 0);
+                kern_return_t rc = semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
+                assert(rc == KERN_SUCCESS);
+                AE_UNUSED(rc);
+            }
+
+            AE_NO_TSAN ~Semaphore()
+            {
+                semaphore_destroy(mach_task_self(), m_sema);
+            }
+
+            bool wait() AE_NO_TSAN
+            {
+                return semaphore_wait(m_sema) == KERN_SUCCESS;
+            }
+
+            bool try_wait() AE_NO_TSAN
+            {
+                return timed_wait(0);
+            }
+
+            bool timed_wait(std::uint64_t timeout_usecs) AE_NO_TSAN
+            {
+                mach_timespec_t ts;
+                ts.tv_sec = static_cast<unsigned int>(timeout_usecs / 1000000);
+                ts.tv_nsec = static_cast<int>((timeout_usecs % 1000000) * 1000);
+
+                // added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
+                kern_return_t rc = semaphore_timedwait(m_sema, ts);
+                return rc == KERN_SUCCESS;
+            }
+
+            void signal() AE_NO_TSAN
+            {
+                while (semaphore_signal(m_sema) != KERN_SUCCESS);
+            }
+
+            void signal(int count) AE_NO_TSAN
+            {
+                while (count-- > 0)
+                {
+                    while (semaphore_signal(m_sema) != KERN_SUCCESS);
+                }
+            }
+        };
 #elif defined(__unix__)
        //---------------------------------------------------------
-		// Semaphore (POSIX, Linux)
-		//---------------------------------------------------------
-		class Semaphore
-		{
-		private:
-		    sem_t m_sema;
-
-		    Semaphore(const Semaphore& other);
-		    Semaphore& operator=(const Semaphore& other);
-
-		public:
-		    AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema()
-		    {
-		        assert(initialCount >= 0);
-		        int rc = sem_init(&m_sema, 0, static_cast<unsigned int>(initialCount));
-		        assert(rc == 0);
-		        AE_UNUSED(rc);
-		    }
-
-		    AE_NO_TSAN ~Semaphore()
-		    {
-		        sem_destroy(&m_sema);
-		    }
-
-		    bool wait() AE_NO_TSAN
-		    {
-		        // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
-		        int rc;
-		        do
-		        {
-		            rc = sem_wait(&m_sema);
-		        }
-		        while (rc == -1 && errno == EINTR);
-		        return rc == 0;
-		    }
-
-			bool try_wait() AE_NO_TSAN
-			{
-				int rc;
-				do {
-					rc = sem_trywait(&m_sema);
-				} while (rc == -1 && errno == EINTR);
-				return rc == 0;
-			}
-
-			bool timed_wait(std::uint64_t usecs) AE_NO_TSAN
-			{
-				struct timespec ts;
-				const int usecs_in_1_sec = 1000000;
-				const int nsecs_in_1_sec = 1000000000;
-				clock_gettime(CLOCK_REALTIME, &ts);
-				ts.tv_sec += static_cast<time_t>(usecs / usecs_in_1_sec);
-				ts.tv_nsec += static_cast<long>(usecs % usecs_in_1_sec) * 1000;
-				// sem_timedwait bombs if you have more than 1e9 in tv_nsec
-				// so we have to clean things up before passing it in
-				if (ts.tv_nsec >= nsecs_in_1_sec) {
-					ts.tv_nsec -= nsecs_in_1_sec;
-					++ts.tv_sec;
-				}
-
-				int rc;
-				do {
-					rc = sem_timedwait(&m_sema, &ts);
-				} while (rc == -1 && errno == EINTR);
-				return rc == 0;
-			}
-
-		    void signal() AE_NO_TSAN
-		    {
-		        while (sem_post(&m_sema) == -1);
-		    }
-
-		    void signal(int count) AE_NO_TSAN
-		    {
-		        while (count-- > 0)
-		        {
-		            while (sem_post(&m_sema) == -1);
-		        }
-		    }
-		};
+        // Semaphore (POSIX, Linux)
+        //---------------------------------------------------------
+        class Semaphore
+        {
+        private:
+            sem_t m_sema;
+
+            Semaphore(const Semaphore& other);
+            Semaphore& operator=(const Semaphore& other);
+
+        public:
+            AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema()
+            {
+                assert(initialCount >= 0);
+                int rc = sem_init(&m_sema, 0, static_cast<unsigned int>(initialCount));
+                assert(rc == 0);
+                AE_UNUSED(rc);
+            }
+
+            AE_NO_TSAN ~Semaphore()
+            {
+                sem_destroy(&m_sema);
+            }
+
+            bool wait() AE_NO_TSAN
+            {
+                // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
+                int rc;
+                do
+                {
+                    rc = sem_wait(&m_sema);
+                }
+                while (rc == -1 && errno == EINTR);
+                return rc == 0;
+            }
+
+            bool try_wait() AE_NO_TSAN
+            {
+                int rc;
+                do {
+                    rc = sem_trywait(&m_sema);
+                } while (rc == -1 && errno == EINTR);
+                return rc == 0;
+            }
+
+            bool timed_wait(std::uint64_t usecs) AE_NO_TSAN
+            {
+                struct timespec ts;
+                const int usecs_in_1_sec = 1000000;
+                const int nsecs_in_1_sec = 1000000000;
+                clock_gettime(CLOCK_REALTIME, &ts);
+                ts.tv_sec += static_cast<time_t>(usecs / usecs_in_1_sec);
+                ts.tv_nsec += static_cast<long>(usecs % usecs_in_1_sec) * 1000;
+                // sem_timedwait bombs if you have more than 1e9 in tv_nsec
+                // so we have to clean things up before passing it in
+                if (ts.tv_nsec >= nsecs_in_1_sec) {
+                    ts.tv_nsec -= nsecs_in_1_sec;
+                    ++ts.tv_sec;
+                }
+
+                int rc;
+                do {
+                    rc = sem_timedwait(&m_sema, &ts);
+                } while (rc == -1 && errno == EINTR);
+                return rc == 0;
+            }
+
+            void signal() AE_NO_TSAN
+            {
+                while (sem_post(&m_sema) == -1);
+            }
+
+            void signal(int count) AE_NO_TSAN
+            {
+                while (count-- > 0)
+                {
+                    while (sem_post(&m_sema) == -1);
+                }
+            }
+        };
 #elif defined(FREERTOS)
        //---------------------------------------------------------
-		// Semaphore (FreeRTOS)
-		//---------------------------------------------------------
-		class Semaphore
-		{
-		private:
-			SemaphoreHandle_t m_sema;
-
-			Semaphore(const Semaphore& other);
-			Semaphore& operator=(const Semaphore& other);
-
-		public:
-			AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema()
-			{
-				assert(initialCount >= 0);
-				m_sema = xSemaphoreCreateCounting(static_cast<UBaseType_t>(~0ull), static_cast<UBaseType_t>(initialCount));
-				assert(m_sema);
-			}
-
-			AE_NO_TSAN ~Semaphore()
-			{
-				vSemaphoreDelete(m_sema);
-			}
-
-			bool wait() AE_NO_TSAN
-			{
-				return xSemaphoreTake(m_sema, portMAX_DELAY) == pdTRUE;
-			}
-
-			bool try_wait() AE_NO_TSAN
-			{
-				// Note: In an ISR context, if this causes a task to unblock,
-				// the caller won't know about it
-				if (xPortIsInsideInterrupt())
-					return xSemaphoreTakeFromISR(m_sema, NULL) == pdTRUE;
-				return xSemaphoreTake(m_sema, 0) == pdTRUE;
-			}
-
-			bool timed_wait(std::uint64_t usecs) AE_NO_TSAN
-			{
-				std::uint64_t msecs = usecs / 1000;
-				TickType_t ticks = static_cast<TickType_t>(msecs / portTICK_PERIOD_MS);
-				if (ticks == 0)
-					return try_wait();
-				return xSemaphoreTake(m_sema, ticks) == pdTRUE;
-			}
-
-			void signal() AE_NO_TSAN
-			{
-				// Note: In an ISR context, if this causes a task to unblock,
-				// the caller won't know about it
-				BaseType_t rc;
-				if (xPortIsInsideInterrupt())
-					rc = xSemaphoreGiveFromISR(m_sema, NULL);
-				else
-					rc = xSemaphoreGive(m_sema);
-				assert(rc == pdTRUE);
-				AE_UNUSED(rc);
-			}
-
-			void signal(int count) AE_NO_TSAN
-			{
-				while (count-- > 0)
-					signal();
-			}
-		};
+        // Semaphore (FreeRTOS)
+        //---------------------------------------------------------
+        class Semaphore
+        {
+        private:
+            SemaphoreHandle_t m_sema;
+
+            Semaphore(const Semaphore& other);
+            Semaphore& operator=(const Semaphore& other);
+
+        public:
+            AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema()
+            {
+                assert(initialCount >= 0);
+                m_sema = xSemaphoreCreateCounting(static_cast<UBaseType_t>(~0ull), static_cast<UBaseType_t>(initialCount));
+                assert(m_sema);
+            }
+
+            AE_NO_TSAN ~Semaphore()
+            {
+                vSemaphoreDelete(m_sema);
+            }
+
+            bool wait() AE_NO_TSAN
+            {
+                return xSemaphoreTake(m_sema, portMAX_DELAY) == pdTRUE;
+            }
+
+            bool try_wait() AE_NO_TSAN
+            {
+                // Note: In an ISR context, if this causes a task to unblock,
+                // the caller won't know about it
+                if (xPortIsInsideInterrupt())
+                    return xSemaphoreTakeFromISR(m_sema, NULL) == pdTRUE;
+                return xSemaphoreTake(m_sema, 0) == pdTRUE;
+            }
+
+            bool timed_wait(std::uint64_t usecs) AE_NO_TSAN
+            {
+                std::uint64_t msecs = usecs / 1000;
+                TickType_t ticks = static_cast<TickType_t>(msecs / portTICK_PERIOD_MS);
+                if (ticks == 0)
+                    return try_wait();
+                return xSemaphoreTake(m_sema, ticks) == pdTRUE;
+            }
+
+            void signal() AE_NO_TSAN
+            {
+                // Note: In an ISR context, if this causes a task to unblock,
+                // the caller won't know about it
+                BaseType_t rc;
+                if (xPortIsInsideInterrupt())
+                    rc = xSemaphoreGiveFromISR(m_sema, NULL);
+                else
+                    rc = xSemaphoreGive(m_sema);
+                assert(rc == pdTRUE);
+                AE_UNUSED(rc);
+            }
+
+            void signal(int count) AE_NO_TSAN
+            {
+                while (count-- > 0)
+                    signal();
+            }
+        };
 #else
 #error Unsupported platform! (No semaphore wrapper available)
 #endif
@@ -655,8 +686,7 @@ namespace moodycamel
        //---------------------------------------------------------
        // LightweightSemaphore
        //---------------------------------------------------------
-        class LightweightSemaphore
-        {
+        class LightweightSemaphore {
        public:
            typedef std::make_signed<std::size_t>::type ssize_t;

@@ -671,10 +701,8 @@ namespace moodycamel
                // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
                // as threads start hitting the kernel semaphore.
                int spin = 1024;
-                while (--spin >= 0)
-                {
-                    if (m_count.load() > 0)
-                    {
+                while (--spin >= 0) {
+                    if (m_count.load() > 0) {
                        m_count.fetch_add_acquire(-1);
                        return true;
                    }
@@ -683,8 +711,7 @@ namespace moodycamel
                oldCount = m_count.fetch_add_acquire(-1);
                if (oldCount > 0)
                    return true;
-                if (timeout_usecs < 0)
-                {
+                if (timeout_usecs < 0) {
                    if (m_sema.wait())
                        return true;
                }
@@ -695,8 +722,7 @@ namespace moodycamel
                // it. So we have to re-adjust the count, but only if the semaphore
                // wasn't signaled enough times for us too since then. If it was, we
                // need to release the semaphore too.
-                while (true)
-                {
+                while (true) {
                    oldCount = m_count.fetch_add_release(1);
                    if (oldCount < 0)
                        return false;    // successfully restored things to the way they were
@@ -708,15 +734,13 @@ namespace moodycamel
            }

        public:
-            AE_NO_TSAN LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount), m_sema()
-            {
+            AE_NO_TSAN LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount), m_sema() {
                assert(initialCount >= 0);
            }

            bool tryWait() AE_NO_TSAN
            {
-                if (m_count.load() > 0)
-                {
+                if (m_count.load() > 0) {
                    m_count.fetch_add_acquire(-1);
                    return true;
                }
@@ -738,8 +762,7 @@ namespace moodycamel
                assert(count >= 0);
                ssize_t oldCount = m_count.fetch_add_release(count);
                assert(oldCount >= -1);
-                if (oldCount < 0)
-                {
+                if (oldCount < 0) {
                    m_sema.signal(1);
                }
            }
@@ -750,8 +773,8 @@ namespace moodycamel
                return count > 0 ? static_cast<std::size_t>(count) : 0;
            }
        };
-    }	// end namespace spsc_sema
-}	// end namespace moodycamel
+    }    // end namespace spsc_sema
+}    // end namespace moodycamel

 #if defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))
 #pragma warning(pop)

--- a/third_party/readerwriterqueue.h
+++ b/third_party/readerwriterqueue.h
@@ -12,8 +12,9 @@
 #include <stdexcept>
 #include <new>
 #include <cstdint>
-#include <cstdlib>		// For malloc/free/abort & size_t
+#include <cstdlib>        // For malloc/free/abort & size_t
 #include <memory>
+
 #if __cplusplus > 199711L || _MSC_VER >= 1700 // C++11 or VS2012
 #include <chrono>
 #endif
@@ -73,219 +74,214 @@
 namespace moodycamel {

    template<typename T, size_t MAX_BLOCK_SIZE = 512>
-    class MOODYCAMEL_MAYBE_ALIGN_TO_CACHELINE ReaderWriterQueue
-{
-    // Design: Based on a queue-of-queues. The low-level queues are just
-    // circular buffers with front and tail indices indicating where the
-    // next element to dequeue is and where the next element can be enqueued,
-    // respectively. Each low-level queue is called a "block". Each block
-    // wastes exactly one element's worth of space to keep the design simple
-    // (if front == tail then the queue is empty, and can't be full).
-    // The high-level queue is a circular linked list of blocks; again there
-    // is a front and tail, but this time they are pointers to the blocks.
-    // The front block is where the next element to be dequeued is, provided
-    // the block is not empty. The back block is where elements are to be
-    // enqueued, provided the block is not full.
-    // The producer thread owns all the tail indices/pointers. The consumer
-    // thread owns all the front indices/pointers. Both threads read each
-    // other's variables, but only the owning thread updates them. E.g. After
-    // the consumer reads the producer's tail, the tail may change before the
-    // consumer is done dequeuing an object, but the consumer knows the tail
-    // will never go backwards, only forwards.
-    // If there is no room to enqueue an object, an additional block (of
-    // equal size to the last block) is added. Blocks are never removed.
+    class MOODYCAMEL_MAYBE_ALIGN_TO_CACHELINE ReaderWriterQueue {
+        // Design: Based on a queue-of-queues. The low-level queues are just
+        // circular buffers with front and tail indices indicating where the
+        // next element to dequeue is and where the next element can be enqueued,
+        // respectively. Each low-level queue is called a "block". Each block
+        // wastes exactly one element's worth of space to keep the design simple
+        // (if front == tail then the queue is empty, and can't be full).
+        // The high-level queue is a circular linked list of blocks; again there
+        // is a front and tail, but this time they are pointers to the blocks.
+        // The front block is where the next element to be dequeued is, provided
+        // the block is not empty. The back block is where elements are to be
+        // enqueued, provided the block is not full.
+        // The producer thread owns all the tail indices/pointers. The consumer
+        // thread owns all the front indices/pointers. Both threads read each
+        // other's variables, but only the owning thread updates them. E.g. After
+        // the consumer reads the producer's tail, the tail may change before the
+        // consumer is done dequeuing an object, but the consumer knows the tail
+        // will never go backwards, only forwards.
+        // If there is no room to enqueue an object, an additional block (of
+        // equal size to the last block) is added. Blocks are never removed.

    public:
-    typedef T value_type;
+        typedef T value_type;

-    // Constructs a queue that can hold at least `size` elements without further
-    // allocations. If more than MAX_BLOCK_SIZE elements are requested,
-    // then several blocks of MAX_BLOCK_SIZE each are reserved (including
-    // at least one extra buffer block).
-    AE_NO_TSAN explicit ReaderWriterQueue(size_t size = 15)
+        // Constructs a queue that can hold at least `size` elements without further
+        // allocations. If more than MAX_BLOCK_SIZE elements are requested,
+        // then several blocks of MAX_BLOCK_SIZE each are reserved (including
+        // at least one extra buffer block).
+        AE_NO_TSAN explicit ReaderWriterQueue(size_t size = 15)
 #ifndef NDEBUG
-    : enqueuing(false)
-    ,dequeuing(false)
+                : enqueuing(false), dequeuing(false)
 #endif
-{
-    assert(MAX_BLOCK_SIZE == ceilToPow2(MAX_BLOCK_SIZE) && "MAX_BLOCK_SIZE must be a power of 2");
-    assert(MAX_BLOCK_SIZE >= 2 && "MAX_BLOCK_SIZE must be at least 2");
-
-    Block* firstBlock = nullptr;
-
-    largestBlockSize = ceilToPow2(size + 1);		// We need a spare slot to fit size elements in the block
-    if (largestBlockSize > MAX_BLOCK_SIZE * 2) {
-    // We need a spare block in case the producer is writing to a different block the consumer is reading from, and
-    // wants to enqueue the maximum number of elements. We also need a spare element in each block to avoid the ambiguity
-    // between front == tail meaning "empty" and "full".
-    // So the effective number of slots that are guaranteed to be usable at any time is the block size - 1 times the
-    // number of blocks - 1. Solving for size and applying a ceiling to the division gives us (after simplifying):
-    size_t initialBlockCount = (size + MAX_BLOCK_SIZE * 2 - 3) / (MAX_BLOCK_SIZE - 1);
-    largestBlockSize = MAX_BLOCK_SIZE;
-    Block* lastBlock = nullptr;
-    for (size_t i = 0; i != initialBlockCount; ++i) {
-    auto block = make_block(largestBlockSize);
-    if (block == nullptr) {
+        {
+            assert(MAX_BLOCK_SIZE == ceilToPow2(MAX_BLOCK_SIZE) && "MAX_BLOCK_SIZE must be a power of 2");
+            assert(MAX_BLOCK_SIZE >= 2 && "MAX_BLOCK_SIZE must be at least 2");
+
+            Block *firstBlock = nullptr;
+
+            largestBlockSize = ceilToPow2(size + 1);        // We need a spare slot to fit size elements in the block
+            if (largestBlockSize > MAX_BLOCK_SIZE * 2) {
+                // We need a spare block in case the producer is writing to a different block the consumer is reading from, and
+                // wants to enqueue the maximum number of elements. We also need a spare element in each block to avoid the ambiguity
+                // between front == tail meaning "empty" and "full".
+                // So the effective number of slots that are guaranteed to be usable at any time is the block size - 1 times the
+                // number of blocks - 1. Solving for size and applying a ceiling to the division gives us (after simplifying):
+                size_t initialBlockCount = (size + MAX_BLOCK_SIZE * 2 - 3) / (MAX_BLOCK_SIZE - 1);
+                largestBlockSize = MAX_BLOCK_SIZE;
+                Block *lastBlock = nullptr;
+                for (size_t i = 0; i != initialBlockCount; ++i) {
+                    auto block = make_block(largestBlockSize);
+                    if (block == nullptr) {
 #ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
-    throw std::bad_alloc();
+                        throw std::bad_alloc();
 #else
-    abort();
+                        abort();
 #endif
-}
-if (firstBlock == nullptr) {
-firstBlock = block;
-}
-else {
-lastBlock->next = block;
-}
-lastBlock = block;
-block->next = firstBlock;
-}
-}
-else {
-firstBlock = make_block(largestBlockSize);
-if (firstBlock == nullptr) {
+                    }
+                    if (firstBlock == nullptr) {
+                        firstBlock = block;
+                    } else {
+                        lastBlock->next = block;
+                    }
+                    lastBlock = block;
+                    block->next = firstBlock;
+                }
+            } else {
+                firstBlock = make_block(largestBlockSize);
+                if (firstBlock == nullptr) {
 #ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
-throw std::bad_alloc();
+                    throw std::bad_alloc();
 #else
-abort();
+                    abort();
 #endif
-}
-firstBlock->next = firstBlock;
-}
-frontBlock = firstBlock;
-tailBlock = firstBlock;
+                }
+                firstBlock->next = firstBlock;
+            }
+            frontBlock = firstBlock;
+            tailBlock = firstBlock;

 // Make sure the reader/writer threads will have the initialized memory setup above:
-fence(memory_order_sync);
-}
+            fence(memory_order_sync);
+        }

 // Note: The queue should not be accessed concurrently while it's
 // being moved. It's up to the user to synchronize this.
-AE_NO_TSAN ReaderWriterQueue(ReaderWriterQueue&& other)
-: frontBlock(other.frontBlock.load()),
-tailBlock(other.tailBlock.load()),
-largestBlockSize(other.largestBlockSize)
+        AE_NO_TSAN ReaderWriterQueue(ReaderWriterQueue &&other)
+                : frontBlock(other.frontBlock.load()),
+                  tailBlock(other.tailBlock.load()),
+                  largestBlockSize(other.largestBlockSize)
 #ifndef NDEBUG
-,enqueuing(false)
-,dequeuing(false)
+                , enqueuing(false), dequeuing(false)
 #endif
-{
-other.largestBlockSize = 32;
-Block* b = other.make_block(other.largestBlockSize);
-if (b == nullptr) {
+        {
+            other.largestBlockSize = 32;
+            Block *b = other.make_block(other.largestBlockSize);
+            if (b == nullptr) {
 #ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
-throw std::bad_alloc();
+                throw std::bad_alloc();
 #else
-abort();
+                abort();
 #endif
-}
-b->next = b;
-other.frontBlock = b;
-other.tailBlock = b;
-}
+            }
+            b->next = b;
+            other.frontBlock = b;
+            other.tailBlock = b;
+        }

 // Note: The queue should not be accessed concurrently while it's
 // being moved. It's up to the user to synchronize this.
-ReaderWriterQueue& operator=(ReaderWriterQueue&& other) AE_NO_TSAN
-{
-Block* b = frontBlock.load();
-frontBlock = other.frontBlock.load();
-other.frontBlock = b;
-b = tailBlock.load();
-tailBlock = other.tailBlock.load();
-other.tailBlock = b;
-std::swap(largestBlockSize, other.largestBlockSize);
-return *this;
-}
+        ReaderWriterQueue &operator=(ReaderWriterQueue &&other) AE_NO_TSAN
+        {
+            Block *b = frontBlock.load();
+            frontBlock = other.frontBlock.load();
+            other.frontBlock = b;
+            b = tailBlock.load();
+            tailBlock = other.tailBlock.load();
+            other.tailBlock = b;
+            std::swap(largestBlockSize, other.largestBlockSize);
+            return *this;
+        }

 // Note: The queue should not be accessed concurrently while it's
 // being deleted. It's up to the user to synchronize this.
-AE_NO_TSAN ~ReaderWriterQueue()
-{
-    // Make sure we get the latest version of all variables from other CPUs:
-    fence(memory_order_sync);
-
-    // Destroy any remaining objects in queue and free memory
-    Block* frontBlock_ = frontBlock;
-    Block* block = frontBlock_;
-    do {
-        Block* nextBlock = block->next;
-        size_t blockFront = block->front;
-        size_t blockTail = block->tail;
-
-        for (size_t i = blockFront; i != blockTail; i = (i + 1) & block->sizeMask) {
-            auto element = reinterpret_cast<T*>(block->data + i * sizeof(T));
-            element->~T();
-            (void)element;
+        AE_NO_TSAN ~ReaderWriterQueue() {
+            // Make sure we get the latest version of all variables from other CPUs:
+            fence(memory_order_sync);
+
+            // Destroy any remaining objects in queue and free memory
+            Block *frontBlock_ = frontBlock;
+            Block *block = frontBlock_;
+            do {
+                Block *nextBlock = block->next;
+                size_t blockFront = block->front;
+                size_t blockTail = block->tail;
+
+                for (size_t i = blockFront; i != blockTail; i = (i + 1) & block->sizeMask) {
+                    auto element = reinterpret_cast<T *>(block->data + i * sizeof(T));
+                    element->~T();
+                    (void) element;
+                }
+
+                auto rawBlock = block->rawThis;
+                block->~Block();
+                std::free(rawBlock);
+                block = nextBlock;
+            } while (block != frontBlock_);
        }

-        auto rawBlock = block->rawThis;
-        block->~Block();
-        std::free(rawBlock);
-        block = nextBlock;
-    } while (block != frontBlock_);
-}
-

 // Enqueues a copy of element if there is room in the queue.
 // Returns true if the element was enqueued, false otherwise.
 // Does not allocate memory.
-AE_FORCEINLINE bool try_enqueue(T const& element) AE_NO_TSAN
-{
-return inner_enqueue<CannotAlloc>(element);
-}
+        AE_FORCEINLINE bool try_enqueue(T const &element) AE_NO_TSAN
+        {
+            return inner_enqueue<CannotAlloc>(element);
+        }

 // Enqueues a moved copy of element if there is room in the queue.
 // Returns true if the element was enqueued, false otherwise.
 // Does not allocate memory.
-AE_FORCEINLINE bool try_enqueue(T&& element) AE_NO_TSAN
-{
-return inner_enqueue<CannotAlloc>(std::forward<T>(element));
-}
+        AE_FORCEINLINE bool try_enqueue(T &&element) AE_NO_TSAN
+        {
+            return inner_enqueue<CannotAlloc>(std::forward<T>(element));
+        }

 #if MOODYCAMEL_HAS_EMPLACE
+
 // Like try_enqueue() but with emplace semantics (i.e. construct-in-place).
-template<typename... Args>
-AE_FORCEINLINE bool try_emplace(Args&&... args) AE_NO_TSAN
-{
-return inner_enqueue<CannotAlloc>(std::forward<Args>(args)...);
-}
+        template<typename... Args>
+        AE_FORCEINLINE bool try_emplace(Args &&... args) AE_NO_TSAN {
+            return inner_enqueue<CannotAlloc>(std::forward<Args>(args)...);
+        }
+
 #endif

 // Enqueues a copy of element on the queue.
 // Allocates an additional block of memory if needed.
 // Only fails (returns false) if memory allocation fails.
-AE_FORCEINLINE bool enqueue(T const& element) AE_NO_TSAN
-{
-return inner_enqueue<CanAlloc>(element);
-}
+        AE_FORCEINLINE bool enqueue(T const &element) AE_NO_TSAN
+        {
+            return inner_enqueue<CanAlloc>(element);
+        }

 // Enqueues a moved copy of element on the queue.
 // Allocates an additional block of memory if needed.
 // Only fails (returns false) if memory allocation fails.
-AE_FORCEINLINE bool enqueue(T&& element) AE_NO_TSAN
-{
-return inner_enqueue<CanAlloc>(std::forward<T>(element));
-}
+        AE_FORCEINLINE bool enqueue(T &&element) AE_NO_TSAN
+        {
+            return inner_enqueue<CanAlloc>(std::forward<T>(element));
+        }

 #if MOODYCAMEL_HAS_EMPLACE
+
 // Like enqueue() but with emplace semantics (i.e. construct-in-place).
-template<typename... Args>
-AE_FORCEINLINE bool emplace(Args&&... args) AE_NO_TSAN
-{
-return inner_enqueue<CanAlloc>(std::forward<Args>(args)...);
-}
+        template<typename... Args>
+        AE_FORCEINLINE bool emplace(Args &&... args) AE_NO_TSAN {
+            return inner_enqueue<CanAlloc>(std::forward<Args>(args)...);
+        }
+
 #endif

 // Attempts to dequeue an element; if the queue is empty,
 // returns false instead. If the queue has at least one element,
 // moves front to result using operator=, then returns true.
-template<typename U>
-bool try_dequeue(U& result) AE_NO_TSAN
-{
+        template<typename U>
+        bool try_dequeue(U &result) AE_NO_TSAN {
 #ifndef NDEBUG
-ReentrantGuard guard(this->dequeuing);
+            ReentrantGuard guard(this->dequeuing);
 #endif

 // High-level pseudocode:
@@ -305,75 +301,73 @@ ReentrantGuard guard(this->dequeuing);
 // then re-read the front block and check if it's not empty again, then check if the tail
 // block has advanced.

-Block* frontBlock_ = frontBlock.load();
-size_t blockTail = frontBlock_->localTail;
-size_t blockFront = frontBlock_->front.load();
+            Block *frontBlock_ = frontBlock.load();
+            size_t blockTail = frontBlock_->localTail;
+            size_t blockFront = frontBlock_->front.load();

-if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
-fence(memory_order_acquire);
+            if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
+                fence(memory_order_acquire);

-non_empty_front_block:
+                non_empty_front_block:
 // Front block not empty, dequeue from here
-auto element = reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
-result = std::move(*element);
-element->~T();
+                auto element = reinterpret_cast<T *>(frontBlock_->data + blockFront * sizeof(T));
+                result = std::move(*element);
+                element->~T();

-blockFront = (blockFront + 1) & frontBlock_->sizeMask;
+                blockFront = (blockFront + 1) & frontBlock_->sizeMask;

-fence(memory_order_release);
-frontBlock_->front = blockFront;
-}
-else if (frontBlock_ != tailBlock.load()) {
-fence(memory_order_acquire);
+                fence(memory_order_release);
+                frontBlock_->front = blockFront;
+            } else if (frontBlock_ != tailBlock.load()) {
+                fence(memory_order_acquire);

-frontBlock_ = frontBlock.load();
-blockTail = frontBlock_->localTail = frontBlock_->tail.load();
-blockFront = frontBlock_->front.load();
-fence(memory_order_acquire);
+                frontBlock_ = frontBlock.load();
+                blockTail = frontBlock_->localTail = frontBlock_->tail.load();
+                blockFront = frontBlock_->front.load();
+                fence(memory_order_acquire);

-if (blockFront != blockTail) {
+                if (blockFront != blockTail) {
 // Oh look, the front block isn't empty after all
-goto non_empty_front_block;
-}
+                    goto non_empty_front_block;
+                }

 // Front block is empty but there's another block ahead, advance to it
-Block* nextBlock = frontBlock_->next;
+                Block *nextBlock = frontBlock_->next;
 // Don't need an acquire fence here since next can only ever be set on the tailBlock,
 // and we're not the tailBlock, and we did an acquire earlier after reading tailBlock which
 // ensures next is up-to-date on this CPU in case we recently were at tailBlock.

-size_t nextBlockFront = nextBlock->front.load();
-size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
-fence(memory_order_acquire);
+                size_t nextBlockFront = nextBlock->front.load();
+                size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
+                fence(memory_order_acquire);

 // Since the tailBlock is only ever advanced after being written to,
 // we know there's for sure an element to dequeue on it
-assert(nextBlockFront != nextBlockTail);
-AE_UNUSED(nextBlockTail);
+                assert(nextBlockFront != nextBlockTail);
+                AE_UNUSED(nextBlockTail);

 // We're done with this block, let the producer use it if it needs
-fence(memory_order_release);		// Expose possibly pending changes to frontBlock->front from last dequeue
-frontBlock = frontBlock_ = nextBlock;
+                fence(memory_order_release);        // Expose possibly pending changes to frontBlock->front from last dequeue
+                frontBlock = frontBlock_ = nextBlock;

-compiler_fence(memory_order_release);	// Not strictly needed
+                compiler_fence(memory_order_release);    // Not strictly needed

-auto element = reinterpret_cast<T*>(frontBlock_->data + nextBlockFront * sizeof(T));
+                auto element = reinterpret_cast<T *>(frontBlock_->data + nextBlockFront * sizeof(T));

-result = std::move(*element);
-element->~T();
+                result = std::move(*element);
+                element->~T();

-nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;
+                nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;

-fence(memory_order_release);
-frontBlock_->front = nextBlockFront;
-}
-else {
+                fence(memory_order_release);
+                frontBlock_->front = nextBlockFront;
+            } else {
 // No elements in current block and no other block to advance to
-return false;
-}
+                return false;
+            }

-return true;
-}
+            return true;
+        }


 // Returns a pointer to the front element in the queue (the one that
@@ -381,129 +375,126 @@ return true;
 // queue appears empty at the time the method is called, nullptr is
 // returned instead.
 // Must be called only from the consumer thread.
-T* peek() const AE_NO_TSAN
-{
+        T *peek() const AE_NO_TSAN
+        {
 #ifndef NDEBUG
-ReentrantGuard guard(this->dequeuing);
+            ReentrantGuard guard(this->dequeuing);
 #endif
 // See try_dequeue() for reasoning

-Block* frontBlock_ = frontBlock.load();
-size_t blockTail = frontBlock_->localTail;
-size_t blockFront = frontBlock_->front.load();
+            Block *frontBlock_ = frontBlock.load();
+            size_t blockTail = frontBlock_->localTail;
+            size_t blockFront = frontBlock_->front.load();

-if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
-fence(memory_order_acquire);
-non_empty_front_block:
-return reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
-}
-else if (frontBlock_ != tailBlock.load()) {
-fence(memory_order_acquire);
-frontBlock_ = frontBlock.load();
-blockTail = frontBlock_->localTail = frontBlock_->tail.load();
-blockFront = frontBlock_->front.load();
-fence(memory_order_acquire);
+            if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
+                fence(memory_order_acquire);
+                non_empty_front_block:
+                return reinterpret_cast<T *>(frontBlock_->data + blockFront * sizeof(T));
+            } else if (frontBlock_ != tailBlock.load()) {
+                fence(memory_order_acquire);
+                frontBlock_ = frontBlock.load();
+                blockTail = frontBlock_->localTail = frontBlock_->tail.load();
+                blockFront = frontBlock_->front.load();
+                fence(memory_order_acquire);

-if (blockFront != blockTail) {
-goto non_empty_front_block;
-}
+                if (blockFront != blockTail) {
+                    goto non_empty_front_block;
+                }

-Block* nextBlock = frontBlock_->next;
+                Block *nextBlock = frontBlock_->next;

-size_t nextBlockFront = nextBlock->front.load();
-fence(memory_order_acquire);
+                size_t nextBlockFront = nextBlock->front.load();
+                fence(memory_order_acquire);

-assert(nextBlockFront != nextBlock->tail.load());
-return reinterpret_cast<T*>(nextBlock->data + nextBlockFront * sizeof(T));
-}
+                assert(nextBlockFront != nextBlock->tail.load());
+                return reinterpret_cast<T *>(nextBlock->data + nextBlockFront * sizeof(T));
+            }

-return nullptr;
-}
+            return nullptr;
+        }

 // Removes the front element from the queue, if any, without returning it.
 // Returns true on success, or false if the queue appeared empty at the time
 // `pop` was called.
-bool pop() AE_NO_TSAN
-{
+        bool pop() AE_NO_TSAN
+        {
 #ifndef NDEBUG
-ReentrantGuard guard(this->dequeuing);
+            ReentrantGuard guard(this->dequeuing);
 #endif
 // See try_dequeue() for reasoning

-Block* frontBlock_ = frontBlock.load();
-size_t blockTail = frontBlock_->localTail;
-size_t blockFront = frontBlock_->front.load();
+            Block *frontBlock_ = frontBlock.load();
+            size_t blockTail = frontBlock_->localTail;
+            size_t blockFront = frontBlock_->front.load();

-if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
-fence(memory_order_acquire);
+            if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
+                fence(memory_order_acquire);

-non_empty_front_block:
-auto element = reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
-element->~T();
+                non_empty_front_block:
+                auto element = reinterpret_cast<T *>(frontBlock_->data + blockFront * sizeof(T));
+                element->~T();

-blockFront = (blockFront + 1) & frontBlock_->sizeMask;
+                blockFront = (blockFront + 1) & frontBlock_->sizeMask;

-fence(memory_order_release);
-frontBlock_->front = blockFront;
-}
-else if (frontBlock_ != tailBlock.load()) {
-fence(memory_order_acquire);
-frontBlock_ = frontBlock.load();
-blockTail = frontBlock_->localTail = frontBlock_->tail.load();
-blockFront = frontBlock_->front.load();
-fence(memory_order_acquire);
+                fence(memory_order_release);
+                frontBlock_->front = blockFront;
+            } else if (frontBlock_ != tailBlock.load()) {
+                fence(memory_order_acquire);
+                frontBlock_ = frontBlock.load();
+                blockTail = frontBlock_->localTail = frontBlock_->tail.load();
+                blockFront = frontBlock_->front.load();
+                fence(memory_order_acquire);

-if (blockFront != blockTail) {
-goto non_empty_front_block;
-}
+                if (blockFront != blockTail) {
+                    goto non_empty_front_block;
+                }

 // Front block is empty but there's another block ahead, advance to it
-Block* nextBlock = frontBlock_->next;
+                Block *nextBlock = frontBlock_->next;

-size_t nextBlockFront = nextBlock->front.load();
-size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
-fence(memory_order_acquire);
+                size_t nextBlockFront = nextBlock->front.load();
+                size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
+                fence(memory_order_acquire);

-assert(nextBlockFront != nextBlockTail);
-AE_UNUSED(nextBlockTail);
+                assert(nextBlockFront != nextBlockTail);
+                AE_UNUSED(nextBlockTail);

-fence(memory_order_release);
-frontBlock = frontBlock_ = nextBlock;
+                fence(memory_order_release);
+                frontBlock = frontBlock_ = nextBlock;

-compiler_fence(memory_order_release);
+                compiler_fence(memory_order_release);

-auto element = reinterpret_cast<T*>(frontBlock_->data + nextBlockFront * sizeof(T));
-element->~T();
+                auto element = reinterpret_cast<T *>(frontBlock_->data + nextBlockFront * sizeof(T));
+                element->~T();

-nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;
+                nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;

-fence(memory_order_release);
-frontBlock_->front = nextBlockFront;
-}
-else {
+                fence(memory_order_release);
+                frontBlock_->front = nextBlockFront;
+            } else {
 // No elements in current block and no other block to advance to
-return false;
-}
+                return false;
+            }

-return true;
-}
+            return true;
+        }

 // Returns the approximate number of items currently in the queue.
 // Safe to call from both the producer and consumer threads.
-inline size_t size_approx() const AE_NO_TSAN
-{
-size_t result = 0;
-Block* frontBlock_ = frontBlock.load();
-Block* block = frontBlock_;
-do {
-fence(memory_order_acquire);
-size_t blockFront = block->front.load();
-size_t blockTail = block->tail.load();
-result += (blockTail - blockFront) & block->sizeMask;
-block = block->next.load();
-} while (block != frontBlock_);
-return result;
-}
+        inline size_t size_approx() const AE_NO_TSAN
+        {
+            size_t result = 0;
+            Block *frontBlock_ = frontBlock.load();
+            Block *block = frontBlock_;
+            do {
+                fence(memory_order_acquire);
+                size_t blockFront = block->front.load();
+                size_t blockTail = block->tail.load();
+                result += (blockTail - blockFront) & block->sizeMask;
+                block = block->next.load();
+            } while (block != frontBlock_);
+            return result;
+        }

 // Returns the total number of items that could be enqueued without incurring
 // an allocation when this queue is empty.
@@ -514,32 +505,35 @@ return result;
 //       the block the consumer is removing from until it's completely empty, except in
 //       the case where the producer was writing to the same block the consumer was
 //       reading from the whole time.
-inline size_t max_capacity() const {
-    size_t result = 0;
-    Block* frontBlock_ = frontBlock.load();
-    Block* block = frontBlock_;
-    do {
-        fence(memory_order_acquire);
-        result += block->sizeMask;
-        block = block->next.load();
-    } while (block != frontBlock_);
-    return result;
-}
-
-
-private:
-enum AllocationMode { CanAlloc, CannotAlloc };
+        inline size_t max_capacity() const {
+            size_t result = 0;
+            Block *frontBlock_ = frontBlock.load();
+            Block *block = frontBlock_;
+            do {
+                fence(memory_order_acquire);
+                result += block->sizeMask;
+                block = block->next.load();
+            } while (block != frontBlock_);
+            return result;
+        }
+
+
+    private:
+        enum AllocationMode {
+            CanAlloc, CannotAlloc
+        };

 #if MOODYCAMEL_HAS_EMPLACE
-template<AllocationMode canAlloc, typename... Args>
-bool inner_enqueue(Args&&... args) AE_NO_TSAN
+
+        template<AllocationMode canAlloc, typename... Args>
+        bool inner_enqueue(Args &&... args) AE_NO_TSAN
 #else
-template<AllocationMode canAlloc, typename U>
-	bool inner_enqueue(U&& element) AE_NO_TSAN
+        template<AllocationMode canAlloc, typename U>
+            bool inner_enqueue(U&& element) AE_NO_TSAN
 #endif
-{
+        {
 #ifndef NDEBUG
-ReentrantGuard guard(this->enqueuing);
+            ReentrantGuard guard(this->enqueuing);
 #endif

 // High-level pseudocode (assuming we're allowed to alloc a new block):
@@ -549,77 +543,75 @@ ReentrantGuard guard(this->enqueuing);
 //     Else create a new block and enqueue there
 //     Advance tail to the block we just enqueued to

-Block* tailBlock_ = tailBlock.load();
-size_t blockFront = tailBlock_->localFront;
-size_t blockTail = tailBlock_->tail.load();
+            Block *tailBlock_ = tailBlock.load();
+            size_t blockFront = tailBlock_->localFront;
+            size_t blockTail = tailBlock_->tail.load();

-size_t nextBlockTail = (blockTail + 1) & tailBlock_->sizeMask;
-if (nextBlockTail != blockFront || nextBlockTail != (tailBlock_->localFront = tailBlock_->front.load())) {
-fence(memory_order_acquire);
+            size_t nextBlockTail = (blockTail + 1) & tailBlock_->sizeMask;
+            if (nextBlockTail != blockFront || nextBlockTail != (tailBlock_->localFront = tailBlock_->front.load())) {
+                fence(memory_order_acquire);
 // This block has room for at least one more element
-char* location = tailBlock_->data + blockTail * sizeof(T);
+                char *location = tailBlock_->data + blockTail * sizeof(T);
 #if MOODYCAMEL_HAS_EMPLACE
-new (location) T(std::forward<Args>(args)...);
+                new(location) T(std::forward<Args>(args)...);
 #else
-new (location) T(std::forward<U>(element));
+                new (location) T(std::forward<U>(element));
 #endif

-fence(memory_order_release);
-tailBlock_->tail = nextBlockTail;
-}
-else {
-fence(memory_order_acquire);
-if (tailBlock_->next.load() != frontBlock) {
+                fence(memory_order_release);
+                tailBlock_->tail = nextBlockTail;
+            } else {
+                fence(memory_order_acquire);
+                if (tailBlock_->next.load() != frontBlock) {
 // Note that the reason we can't advance to the frontBlock and start adding new entries there
 // is because if we did, then dequeue would stay in that block, eventually reading the new values,
 // instead of advancing to the next full block (whose values were enqueued first and so should be
 // consumed first).

-fence(memory_order_acquire);		// Ensure we get latest writes if we got the latest frontBlock
+                    fence(memory_order_acquire);        // Ensure we get latest writes if we got the latest frontBlock

 // tailBlock is full, but there's a free block ahead, use it
-Block* tailBlockNext = tailBlock_->next.load();
-size_t nextBlockFront = tailBlockNext->localFront = tailBlockNext->front.load();
-nextBlockTail = tailBlockNext->tail.load();
-fence(memory_order_acquire);
+                    Block *tailBlockNext = tailBlock_->next.load();
+                    size_t nextBlockFront = tailBlockNext->localFront = tailBlockNext->front.load();
+                    nextBlockTail = tailBlockNext->tail.load();
+                    fence(memory_order_acquire);

 // This block must be empty since it's not the head block and we
 // go through the blocks in a circle
-assert(nextBlockFront == nextBlockTail);
-tailBlockNext->localFront = nextBlockFront;
+                    assert(nextBlockFront == nextBlockTail);
+                    tailBlockNext->localFront = nextBlockFront;

-char* location = tailBlockNext->data + nextBlockTail * sizeof(T);
+                    char *location = tailBlockNext->data + nextBlockTail * sizeof(T);
 #if MOODYCAMEL_HAS_EMPLACE
-new (location) T(std::forward<Args>(args)...);
+                    new(location) T(std::forward<Args>(args)...);
 #else
-new (location) T(std::forward<U>(element));
+                    new (location) T(std::forward<U>(element));
 #endif

-tailBlockNext->tail = (nextBlockTail + 1) & tailBlockNext->sizeMask;
+                    tailBlockNext->tail = (nextBlockTail + 1) & tailBlockNext->sizeMask;

-fence(memory_order_release);
-tailBlock = tailBlockNext;
-}
-else if (canAlloc == CanAlloc) {
+                    fence(memory_order_release);
+                    tailBlock = tailBlockNext;
+                } else if (canAlloc == CanAlloc) {
 // tailBlock is full and there's no free block ahead; create a new block
-auto newBlockSize = largestBlockSize >= MAX_BLOCK_SIZE ? largestBlockSize : largestBlockSize * 2;
-auto newBlock = make_block(newBlockSize);
-if (newBlock == nullptr) {
+                    auto newBlockSize = largestBlockSize >= MAX_BLOCK_SIZE ? largestBlockSize : largestBlockSize * 2;
+                    auto newBlock = make_block(newBlockSize);
+                    if (newBlock == nullptr) {
 // Could not allocate a block!
-return false;
-}
-largestBlockSize = newBlockSize;
+                        return false;
+                    }
+                    largestBlockSize = newBlockSize;

 #if MOODYCAMEL_HAS_EMPLACE
-new (newBlock->data) T(std::forward<Args>(args)...);
+                    new(newBlock->data) T(std::forward<Args>(args)...);
 #else
-new (newBlock->data) T(std::forward<U>(element));
+                    new (newBlock->data) T(std::forward<U>(element));
 #endif
-assert(newBlock->front == 0);
-newBlock->tail = newBlock->localTail = 1;
+                    assert(newBlock->front == 0);
+                    newBlock->tail = newBlock->localTail = 1;

-newBlock->next = tailBlock_->next.load();
-tailBlock_->next = newBlock;
+                    newBlock->next = tailBlock_->next.load();
+                    tailBlock_->next = newBlock;

 // Might be possible for the dequeue thread to see the new tailBlock->next
 // *without* seeing the new tailBlock value, but this is OK since it can't
@@ -627,350 +619,345 @@ tailBlock_->next = newBlock;
 // case where it could try to read the next is if it's already at the tailBlock,
 // and it won't advance past tailBlock in any circumstance).

-fence(memory_order_release);
-tailBlock = newBlock;
-}
-else if (canAlloc == CannotAlloc) {
+                    fence(memory_order_release);
+                    tailBlock = newBlock;
+                } else if (canAlloc == CannotAlloc) {
 // Would have had to allocate a new block to enqueue, but not allowed
-return false;
-}
-else {
-assert(false && "Should be unreachable code");
-return false;
-}
-}
+                    return false;
+                } else {
+                    assert(false && "Should be unreachable code");
+                    return false;
+                }
+            }

-return true;
-}
+            return true;
+        }


 // Disable copying
-ReaderWriterQueue(ReaderWriterQueue const&) {  }
+        ReaderWriterQueue(ReaderWriterQueue const &) {}

 // Disable assignment
-ReaderWriterQueue& operator=(ReaderWriterQueue const&) {  }
+        ReaderWriterQueue &operator=(ReaderWriterQueue const &) {}


-AE_FORCEINLINE static size_t ceilToPow2(size_t x)
-{
+        AE_FORCEINLINE static size_t ceilToPow2(size_t x) {
 // From http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
--x;
-x |= x >> 1;
-x |= x >> 2;
-x |= x >> 4;
-for (size_t i = 1; i < sizeof(size_t); i <<= 1) {
-x |= x >> (i << 3);
-}
-++x;
-return x;
-}
-
-template<typename U>
-static AE_FORCEINLINE char* align_for(char* ptr) AE_NO_TSAN
-{
-const std::size_t alignment = std::alignment_of<U>::value;
-return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
-}
-private:
+            --x;
+            x |= x >> 1;
+            x |= x >> 2;
+            x |= x >> 4;
+            for (size_t i = 1; i < sizeof(size_t); i <<= 1) {
+                x |= x >> (i << 3);
+            }
+            ++x;
+            return x;
+        }
+
+        template<typename U>
+        static AE_FORCEINLINE char *align_for(char *ptr) AE_NO_TSAN {
+            const std::size_t alignment = std::alignment_of<U>::value;
+            return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
+        }
+
+    private:
 #ifndef NDEBUG
-struct ReentrantGuard
-{
-    AE_NO_TSAN ReentrantGuard(weak_atomic<bool>& _inSection)
-            : inSection(_inSection)
-    {
-        assert(!inSection && "Concurrent (or re-entrant) enqueue or dequeue operation detected (only one thread at a time may hold the producer or consumer role)");
-        inSection = true;
-    }
-
-    AE_NO_TSAN ~ReentrantGuard() { inSection = false; }
-
-private:
-    ReentrantGuard& operator=(ReentrantGuard const&);
-
-private:
-    weak_atomic<bool>& inSection;
-};
+
+        struct ReentrantGuard {
+            AE_NO_TSAN ReentrantGuard(weak_atomic<bool> &_inSection)
+                    : inSection(_inSection) {
+                assert(!inSection &&
+                       "Concurrent (or re-entrant) enqueue or dequeue operation detected (only one thread at a time may hold the producer or consumer role)");
+                inSection = true;
+            }
+
+            AE_NO_TSAN ~ReentrantGuard() { inSection = false; }
+
+        private:
+            ReentrantGuard &operator=(ReentrantGuard const &);
+
+        private:
+            weak_atomic<bool> &inSection;
+        };
+
 #endif

-struct Block
-{
-    // Avoid false-sharing by putting highly contended variables on their own cache lines
-    weak_atomic<size_t> front;	// (Atomic) Elements are read from here
-    size_t localTail;			// An uncontended shadow copy of tail, owned by the consumer
+        struct Block {
+            // Avoid false-sharing by putting highly contended variables on their own cache lines
+            weak_atomic<size_t> front;    // (Atomic) Elements are read from here
+            size_t localTail;            // An uncontended shadow copy of tail, owned by the consumer

-    char cachelineFiller0[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) - sizeof(size_t)];
-    weak_atomic<size_t> tail;	// (Atomic) Elements are enqueued here
-    size_t localFront;
+            char cachelineFiller0[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) - sizeof(size_t)];
+            weak_atomic<size_t> tail;    // (Atomic) Elements are enqueued here
+            size_t localFront;

-    char cachelineFiller1[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) - sizeof(size_t)];	// next isn't very contended, but we don't want it on the same cache line as tail (which is)
-    weak_atomic<Block*> next;	// (Atomic)
+            char cachelineFiller1[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) -
+                                  sizeof(size_t)];    // next isn't very contended, but we don't want it on the same cache line as tail (which is)
+            weak_atomic<Block *> next;    // (Atomic)

-    char* data;		// Contents (on heap) are aligned to T's alignment
+            char *data;        // Contents (on heap) are aligned to T's alignment

-    const size_t sizeMask;
+            const size_t sizeMask;


-    // size must be a power of two (and greater than 0)
-    AE_NO_TSAN Block(size_t const& _size, char* _rawThis, char* _data)
-            : front(0UL), localTail(0), tail(0UL), localFront(0), next(nullptr), data(_data), sizeMask(_size - 1), rawThis(_rawThis)
-    {
-    }
+            // size must be a power of two (and greater than 0)
+            AE_NO_TSAN Block(size_t const &_size, char *_rawThis, char *_data)
+                    : front(0UL), localTail(0), tail(0UL), localFront(0), next(nullptr), data(_data),
+                      sizeMask(_size - 1), rawThis(_rawThis) {
+            }

-private:
-    // C4512 - Assignment operator could not be generated
-    Block& operator=(Block const&);
+        private:
+            // C4512 - Assignment operator could not be generated
+            Block &operator=(Block const &);

-public:
-    char* rawThis;
-};
+        public:
+            char *rawThis;
+        };


-static Block* make_block(size_t capacity) AE_NO_TSAN
-{
+        static Block *make_block(size_t capacity) AE_NO_TSAN
+        {
 // Allocate enough memory for the block itself, as well as all the elements it will contain
-auto size = sizeof(Block) + std::alignment_of<Block>::value - 1;
-size += sizeof(T) * capacity + std::alignment_of<T>::value - 1;
-auto newBlockRaw = static_cast<char*>(std::malloc(size));
-if (newBlockRaw == nullptr) {
-return nullptr;
-}
-
-auto newBlockAligned = align_for<Block>(newBlockRaw);
-auto newBlockData = align_for<T>(newBlockAligned + sizeof(Block));
-return new (newBlockAligned) Block(capacity, newBlockRaw, newBlockData);
-}
+            auto size = sizeof(Block) + std::alignment_of<Block>::value - 1;
+            size += sizeof(T) * capacity + std::alignment_of<T>::value - 1;
+            auto newBlockRaw = static_cast<char *>(std::malloc(size));
+            if (newBlockRaw == nullptr) {
+                return nullptr;
+            }
+
+            auto newBlockAligned = align_for<Block>(newBlockRaw);
+            auto newBlockData = align_for<T>(newBlockAligned + sizeof(Block));
+            return new(newBlockAligned) Block(capacity, newBlockRaw, newBlockData);
+        }

-private:
-weak_atomic<Block*> frontBlock;		// (Atomic) Elements are dequeued from this block
+    private:
+        weak_atomic<Block *> frontBlock;        // (Atomic) Elements are dequeued from this block

-char cachelineFiller[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<Block*>)];
-weak_atomic<Block*> tailBlock;		// (Atomic) Elements are enqueued to this block
+        char cachelineFiller[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<Block *>)];
+        weak_atomic<Block *> tailBlock;        // (Atomic) Elements are enqueued to this block

-size_t largestBlockSize;
+        size_t largestBlockSize;

 #ifndef NDEBUG
-weak_atomic<bool> enqueuing;
-mutable weak_atomic<bool> dequeuing;
+        weak_atomic<bool> enqueuing;
+        mutable weak_atomic<bool> dequeuing;
 #endif
-};
+    };

 // Like ReaderWriterQueue, but also providees blocking operations
-template<typename T, size_t MAX_BLOCK_SIZE = 512>
-class BlockingReaderWriterQueue
-{
-private:
-    typedef ::moodycamel::ReaderWriterQueue<T, MAX_BLOCK_SIZE> ReaderWriterQueue;
-
-public:
-    explicit BlockingReaderWriterQueue(size_t size = 15) AE_NO_TSAN
-            : inner(size), sema(new spsc_sema::LightweightSemaphore())
-    { }
-
-    BlockingReaderWriterQueue(BlockingReaderWriterQueue&& other) AE_NO_TSAN
-            : inner(std::move(other.inner)), sema(std::move(other.sema))
-    { }
-
-    BlockingReaderWriterQueue& operator=(BlockingReaderWriterQueue&& other) AE_NO_TSAN
-    {
-        std::swap(sema, other.sema);
-        std::swap(inner, other.inner);
-        return *this;
-    }
-
-
-    // Enqueues a copy of element if there is room in the queue.
-    // Returns true if the element was enqueued, false otherwise.
-    // Does not allocate memory.
-    AE_FORCEINLINE bool try_enqueue(T const& element) AE_NO_TSAN
-    {
-        if (inner.try_enqueue(element)) {
-            sema->signal();
-            return true;
+    template<typename T, size_t MAX_BLOCK_SIZE = 512>
+    class BlockingReaderWriterQueue {
+    private:
+        typedef ::moodycamel::ReaderWriterQueue<T, MAX_BLOCK_SIZE> ReaderWriterQueue;
+
+    public:
+        explicit BlockingReaderWriterQueue(size_t size = 15) AE_NO_TSAN
+                : inner(size), sema(new spsc_sema::LightweightSemaphore()) {}
+
+        BlockingReaderWriterQueue(BlockingReaderWriterQueue &&other) AE_NO_TSAN
+                : inner(std::move(other.inner)), sema(std::move(other.sema)) {}
+
+        BlockingReaderWriterQueue &operator=(BlockingReaderWriterQueue &&other) AE_NO_TSAN
+        {
+            std::swap(sema, other.sema);
+            std::swap(inner, other.inner);
+            return *this;
        }
-        return false;
-    }

-    // Enqueues a moved copy of element if there is room in the queue.
-    // Returns true if the element was enqueued, false otherwise.
-    // Does not allocate memory.
-    AE_FORCEINLINE bool try_enqueue(T&& element) AE_NO_TSAN
-    {
-        if (inner.try_enqueue(std::forward<T>(element))) {
-            sema->signal();
-            return true;
+
+        // Enqueues a copy of element if there is room in the queue.
+        // Returns true if the element was enqueued, false otherwise.
+        // Does not allocate memory.
+        AE_FORCEINLINE bool try_enqueue(T const &element) AE_NO_TSAN
+        {
+            if (inner.try_enqueue(element)) {
+                sema->signal();
+                return true;
+            }
+            return false;
+        }
+
+        // Enqueues a moved copy of element if there is room in the queue.
+        // Returns true if the element was enqueued, false otherwise.
+        // Does not allocate memory.
+        AE_FORCEINLINE bool try_enqueue(T &&element) AE_NO_TSAN
+        {
+            if (inner.try_enqueue(std::forward<T>(element))) {
+                sema->signal();
+                return true;
+            }
+            return false;
        }
-        return false;
-    }

 #if MOODYCAMEL_HAS_EMPLACE
-    // Like try_enqueue() but with emplace semantics (i.e. construct-in-place).
-    template<typename... Args>
-    AE_FORCEINLINE bool try_emplace(Args&&... args) AE_NO_TSAN
-    {
-        if (inner.try_emplace(std::forward<Args>(args)...)) {
-            sema->signal();
-            return true;
+
+        // Like try_enqueue() but with emplace semantics (i.e. construct-in-place).
+        template<typename... Args>
+        AE_FORCEINLINE bool try_emplace(Args &&... args) AE_NO_TSAN {
+            if (inner.try_emplace(std::forward<Args>(args)...)) {
+                sema->signal();
+                return true;
+            }
+            return false;
        }
-        return false;
-    }
+
 #endif


-    // Enqueues a copy of element on the queue.
-    // Allocates an additional block of memory if needed.
-    // Only fails (returns false) if memory allocation fails.
-    AE_FORCEINLINE bool enqueue(T const& element) AE_NO_TSAN
-    {
-        if (inner.enqueue(element)) {
-            sema->signal();
-            return true;
+        // Enqueues a copy of element on the queue.
+        // Allocates an additional block of memory if needed.
+        // Only fails (returns false) if memory allocation fails.
+        AE_FORCEINLINE bool enqueue(T const &element) AE_NO_TSAN
+        {
+            if (inner.enqueue(element)) {
+                sema->signal();
+                return true;
+            }
+            return false;
        }
-        return false;
-    }

-    // Enqueues a moved copy of element on the queue.
-    // Allocates an additional block of memory if needed.
-    // Only fails (returns false) if memory allocation fails.
-    AE_FORCEINLINE bool enqueue(T&& element) AE_NO_TSAN
-    {
-        if (inner.enqueue(std::forward<T>(element))) {
-            sema->signal();
-            return true;
+        // Enqueues a moved copy of element on the queue.
+        // Allocates an additional block of memory if needed.
+        // Only fails (returns false) if memory allocation fails.
+        AE_FORCEINLINE bool enqueue(T &&element) AE_NO_TSAN
+        {
+            if (inner.enqueue(std::forward<T>(element))) {
+                sema->signal();
+                return true;
+            }
+            return false;
        }
-        return false;
-    }

 #if MOODYCAMEL_HAS_EMPLACE
-    // Like enqueue() but with emplace semantics (i.e. construct-in-place).
-    template<typename... Args>
-    AE_FORCEINLINE bool emplace(Args&&... args) AE_NO_TSAN
-    {
-        if (inner.emplace(std::forward<Args>(args)...)) {
-            sema->signal();
-            return true;
+
+        // Like enqueue() but with emplace semantics (i.e. construct-in-place).
+        template<typename... Args>
+        AE_FORCEINLINE bool emplace(Args &&... args) AE_NO_TSAN {
+            if (inner.emplace(std::forward<Args>(args)...)) {
+                sema->signal();
+                return true;
+            }
+            return false;
        }
-        return false;
-    }
+
 #endif


-    // Attempts to dequeue an element; if the queue is empty,
-    // returns false instead. If the queue has at least one element,
-    // moves front to result using operator=, then returns true.
-    template<typename U>
-    bool try_dequeue(U& result) AE_NO_TSAN
-    {
-        if (sema->tryWait()) {
+        // Attempts to dequeue an element; if the queue is empty,
+        // returns false instead. If the queue has at least one element,
+        // moves front to result using operator=, then returns true.
+        template<typename U>
+        bool try_dequeue(U &result) AE_NO_TSAN {
+            if (sema->tryWait()) {
+                bool success = inner.try_dequeue(result);
+                assert(success);
+                AE_UNUSED(success);
+                return true;
+            }
+            return false;
+        }
+
+
+        // Attempts to dequeue an element; if the queue is empty,
+        // waits until an element is available, then dequeues it.
+        template<typename U>
+        void wait_dequeue(U &result) AE_NO_TSAN {
+            while (!sema->wait());
            bool success = inner.try_dequeue(result);
+            AE_UNUSED(result);
            assert(success);
            AE_UNUSED(success);
-            return true;
        }
-        return false;
-    }
-
-
-    // Attempts to dequeue an element; if the queue is empty,
-    // waits until an element is available, then dequeues it.
-    template<typename U>
-    void wait_dequeue(U& result) AE_NO_TSAN
-    {
-        while (!sema->wait());
-        bool success = inner.try_dequeue(result);
-        AE_UNUSED(result);
-        assert(success);
-        AE_UNUSED(success);
-    }
-
-
-    // Attempts to dequeue an element; if the queue is empty,
-    // waits until an element is available up to the specified timeout,
-    // then dequeues it and returns true, or returns false if the timeout
-    // expires before an element can be dequeued.
-    // Using a negative timeout indicates an indefinite timeout,
-    // and is thus functionally equivalent to calling wait_dequeue.
-    template<typename U>
-    bool wait_dequeue_timed(U& result, std::int64_t timeout_usecs) AE_NO_TSAN
-    {
-        if (!sema->wait(timeout_usecs)) {
-            return false;
+
+
+        // Attempts to dequeue an element; if the queue is empty,
+        // waits until an element is available up to the specified timeout,
+        // then dequeues it and returns true, or returns false if the timeout
+        // expires before an element can be dequeued.
+        // Using a negative timeout indicates an indefinite timeout,
+        // and is thus functionally equivalent to calling wait_dequeue.
+        template<typename U>
+        bool wait_dequeue_timed(U &result, std::int64_t timeout_usecs) AE_NO_TSAN {
+            if (!sema->wait(timeout_usecs)) {
+                return false;
+            }
+            bool success = inner.try_dequeue(result);
+            AE_UNUSED(result);
+            assert(success);
+            AE_UNUSED(success);
+            return true;
        }
-        bool success = inner.try_dequeue(result);
-        AE_UNUSED(result);
-        assert(success);
-        AE_UNUSED(success);
-        return true;
-    }


 #if __cplusplus > 199711L || _MSC_VER >= 1700
-    // Attempts to dequeue an element; if the queue is empty,
-	// waits until an element is available up to the specified timeout,
-	// then dequeues it and returns true, or returns false if the timeout
-	// expires before an element can be dequeued.
-	// Using a negative timeout indicates an indefinite timeout,
-	// and is thus functionally equivalent to calling wait_dequeue.
-	template<typename U, typename Rep, typename Period>
-	inline bool wait_dequeue_timed(U& result, std::chrono::duration<Rep, Period> const& timeout) AE_NO_TSAN
-	{
-        return wait_dequeue_timed(result, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
-	}
+        // Attempts to dequeue an element; if the queue is empty,
+        // waits until an element is available up to the specified timeout,
+        // then dequeues it and returns true, or returns false if the timeout
+        // expires before an element can be dequeued.
+        // Using a negative timeout indicates an indefinite timeout,
+        // and is thus functionally equivalent to calling wait_dequeue.
+        template<typename U, typename Rep, typename Period>
+        inline bool wait_dequeue_timed(U& result, std::chrono::duration<Rep, Period> const& timeout) AE_NO_TSAN
+        {
+            return wait_dequeue_timed(result, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
+        }
 #endif


-    // Returns a pointer to the front element in the queue (the one that
-    // would be removed next by a call to `try_dequeue` or `pop`). If the
-    // queue appears empty at the time the method is called, nullptr is
-    // returned instead.
-    // Must be called only from the consumer thread.
-    AE_FORCEINLINE T* peek() const AE_NO_TSAN
-    {
-        return inner.peek();
-    }
-
-    // Removes the front element from the queue, if any, without returning it.
-    // Returns true on success, or false if the queue appeared empty at the time
-    // `pop` was called.
-    AE_FORCEINLINE bool pop() AE_NO_TSAN
-    {
-        if (sema->tryWait()) {
-            bool result = inner.pop();
-            assert(result);
-            AE_UNUSED(result);
-            return true;
+        // Returns a pointer to the front element in the queue (the one that
+        // would be removed next by a call to `try_dequeue` or `pop`). If the
+        // queue appears empty at the time the method is called, nullptr is
+        // returned instead.
+        // Must be called only from the consumer thread.
+        AE_FORCEINLINE T *peek() const AE_NO_TSAN
+        {
+            return inner.peek();
+        }
+
+        // Removes the front element from the queue, if any, without returning it.
+        // Returns true on success, or false if the queue appeared empty at the time
+        // `pop` was called.
+        AE_FORCEINLINE bool pop() AE_NO_TSAN
+        {
+            if (sema->tryWait()) {
+                bool result = inner.pop();
+                assert(result);
+                AE_UNUSED(result);
+                return true;
+            }
+            return false;
+        }
+
+        // Returns the approximate number of items currently in the queue.
+        // Safe to call from both the producer and consumer threads.
+        AE_FORCEINLINE size_t size_approx() const AE_NO_TSAN
+        {
+            return sema->availableApprox();
        }
-        return false;
-    }
-
-    // Returns the approximate number of items currently in the queue.
-    // Safe to call from both the producer and consumer threads.
-    AE_FORCEINLINE size_t size_approx() const AE_NO_TSAN
-    {
-        return sema->availableApprox();
-    }
-
-    // Returns the total number of items that could be enqueued without incurring
-    // an allocation when this queue is empty.
-    // Safe to call from both the producer and consumer threads.
-    //
-    // NOTE: The actual capacity during usage may be different depending on the consumer.
-    //       If the consumer is removing elements concurrently, the producer cannot add to
-    //       the block the consumer is removing from until it's completely empty, except in
-    //       the case where the producer was writing to the same block the consumer was
-    //       reading from the whole time.
-    AE_FORCEINLINE size_t max_capacity() const {
-        return inner.max_capacity();
-    }
-
-private:
-    // Disable copying & assignment
-    BlockingReaderWriterQueue(BlockingReaderWriterQueue const&) {  }
-    BlockingReaderWriterQueue& operator=(BlockingReaderWriterQueue const&) {  }
-
-private:
-    ReaderWriterQueue inner;
-    std::unique_ptr<spsc_sema::LightweightSemaphore> sema;
-};
+
+        // Returns the total number of items that could be enqueued without incurring
+        // an allocation when this queue is empty.
+        // Safe to call from both the producer and consumer threads.
+        //
+        // NOTE: The actual capacity during usage may be different depending on the consumer.
+        //       If the consumer is removing elements concurrently, the producer cannot add to
+        //       the block the consumer is removing from until it's completely empty, except in
+        //       the case where the producer was writing to the same block the consumer was
+        //       reading from the whole time.
+        AE_FORCEINLINE size_t max_capacity() const {
+            return inner.max_capacity();
+        }
+
+    private:
+        // Disable copying & assignment
+        BlockingReaderWriterQueue(BlockingReaderWriterQueue const &) {}
+
+        BlockingReaderWriterQueue &operator=(BlockingReaderWriterQueue const &) {}
+
+    private:
+        ReaderWriterQueue inner;
+        std::unique_ptr <spsc_sema::LightweightSemaphore> sema;
+    };

 }    // end namespace moodycamel


--- a/zmq_src/ZMQServer.cpp
+++ b/zmq_src/ZMQServer.cpp
@@ -42,9 +42,11 @@ using namespace std;
 shared_ptr <ZMQServer> ZMQServer::zmqServer = nullptr;

 ZMQServer::ZMQServer(bool _checkSignature, bool _checkKeyOwnership, const string &_caCertFile)
-        : outgoingQueue(NUM_ZMQ_WORKER_THREADS), checkSignature(_checkSignature), checkKeyOwnership(_checkKeyOwnership),
+        : incomingQueue(NUM_ZMQ_WORKER_THREADS), checkSignature(_checkSignature), checkKeyOwnership(_checkKeyOwnership),
          caCertFile(_caCertFile), ctx(make_shared<zmq::context_t>(1)) {

+    CHECK_STATE(NUM_ZMQ_WORKER_THREADS > 1);
+
    socket = make_shared<zmq::socket_t>(*ctx, ZMQ_ROUTER);

    if (_checkSignature) {
@@ -277,12 +279,20 @@ void ZMQServer::doOneServerLoop() {

        CHECK_STATE2(msg, ZMQ_COULD_NOT_PARSE);

+
+        uint64_t index = 0;
+
        if ((dynamic_pointer_cast<BLSSignReqMessage>(msg)!= nullptr) ||
             dynamic_pointer_cast<ECDSASignReqMessage>(msg)) {
+            index = NUM_ZMQ_WORKER_THREADS - 1;
        } else {
-
+            index = 0;
        }

+        auto element = pair<shared_ptr<ZMQMessage>, shared_ptr<zmq::message_t>>(msg, identity);
+
+        incomingQueue.at(index).enqueue(element);
+
        result = msg->process();
    } catch (ExitRequestedException) {
        throw;

--- a/zmq_src/ZMQServer.h
+++ b/zmq_src/ZMQServer.h
@@ -34,6 +34,7 @@

 #include "Agent.h"
 #include "WorkerThreadPool.h"
+#include "ZMQMessage.h"

 using namespace moodycamel;

@@ -41,6 +42,7 @@ typedef enum {GOT_INCOMING_MSG = 0, GOT_OUTFOING_MSG = 1} PollResult;

 static const uint64_t NUM_ZMQ_WORKER_THREADS = 2;

+
 class ZMQServer : public Agent{

    uint64_t workerThreads;
@@ -48,9 +50,9 @@ class ZMQServer : public Agent{
    string caCertFile;
    string caCert;

-    ReaderWriterQueue<pair<string, shared_ptr<zmq_msg_t>>> outgoingQueue;
+    ReaderWriterQueue<pair<string, shared_ptr<zmq::message_t>>> outgoingQueue;

-    vector<ReaderWriterQueue<pair<string, shared_ptr<zmq_msg_t>>>> incomingQueue;
+    vector<ReaderWriterQueue<pair<shared_ptr<ZMQMessage>, shared_ptr<zmq::message_t>>>> incomingQueue;

    bool checkKeyOwnership = true;