/** @file ***************************************************************************** Implementation of functions for profiling code blocks. See profiling.hpp . ***************************************************************************** * @author This file is part of libff, developed by SCIPR Lab * and contributors (see AUTHORS). * @copyright MIT license (see LICENSE file) *****************************************************************************/ #include <cassert> #include <chrono> #include <cstdio> #include <ctime> #include <list> #include <stdexcept> #include <vector> #include <../SCIPR/libff/common/default_types/ec_pp.hpp> #include <../SCIPR/libff/common/profiling.hpp> #include <../SCIPR/libff/common/utils.hpp> #ifndef NO_PROCPS #include <proc/readproc.h> #endif namespace libff { long long get_nsec_time() { auto timepoint = std::chrono::high_resolution_clock::now(); return std::chrono::duration_cast<std::chrono::nanoseconds>(timepoint.time_since_epoch()).count(); } /* Return total CPU time consumsed by all threads of the process, in nanoseconds. */ long long get_nsec_cpu_time() { #if _MSC_VER return 0; #else ::timespec ts; if ( ::clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts) ) throw ::std::runtime_error("clock_gettime(CLOCK_PROCESS_CPUTIME_ID) failed"); // If we expected this to work, don't silently ignore failures, because that would hide the problem and incur an unnecessarily system-call overhead. So if we ever observe this exception, we should probably add a suitable #ifdef . //TODO: clock_gettime(CLOCK_PROCESS_CPUTIME_ID) is not supported by native Windows. What about Cygwin? Should we #ifdef on CLOCK_PROCESS_CPUTIME_ID or on __linux__? return ts.tv_sec * 1000000000ll + ts.tv_nsec; #endif } long long start_time, last_time; long long start_cpu_time, last_cpu_time; void start_profiling() { printf("Reset time counters for profiling\n"); last_time = start_time = get_nsec_time(); last_cpu_time = start_cpu_time = get_nsec_cpu_time(); } std::map<std::string, size_t> invocation_counts; std::map<std::string, long long> enter_times; std::map<std::string, long long> last_times; std::map<std::string, long long> cumulative_times; //TODO: Instead of analogous maps for time and cpu_time, use a single struct-valued map std::map<std::string, long long> enter_cpu_times; std::map<std::string, long long> last_cpu_times; std::map<std::pair<std::string, std::string>, long long> op_counts; std::map<std::pair<std::string, std::string>, long long> cumulative_op_counts; // ((msg, data_point), value) // TODO: Convert op_counts and cumulative_op_counts from pair to structs size_t indentation = 0; std::vector<std::string> block_names; std::list<std::pair<std::string, long long*> > op_data_points = { #ifdef PROFILE_OP_COUNTS std::make_pair("Fradd", &Fr<default_ec_pp>::add_cnt), std::make_pair("Frsub", &Fr<default_ec_pp>::sub_cnt), std::make_pair("Frmul", &Fr<default_ec_pp>::mul_cnt), std::make_pair("Frinv", &Fr<default_ec_pp>::inv_cnt), std::make_pair("Fqadd", &Fq<default_ec_pp>::add_cnt), std::make_pair("Fqsub", &Fq<default_ec_pp>::sub_cnt), std::make_pair("Fqmul", &Fq<default_ec_pp>::mul_cnt), std::make_pair("Fqinv", &Fq<default_ec_pp>::inv_cnt), std::make_pair("G1add", &G1<default_ec_pp>::add_cnt), std::make_pair("G1dbl", &G1<default_ec_pp>::dbl_cnt), std::make_pair("G2add", &G2<default_ec_pp>::add_cnt), std::make_pair("G2dbl", &G2<default_ec_pp>::dbl_cnt) #endif }; bool inhibit_profiling_info = false; bool inhibit_profiling_counters = false; void clear_profiling_counters() { invocation_counts.clear(); last_times.clear(); last_cpu_times.clear(); cumulative_times.clear(); } void print_cumulative_time_entry(const std::string &key, const long long factor) { const double total_ms = (cumulative_times.at(key) * 1e-6); const size_t cnt = invocation_counts.at(key); const double avg_ms = total_ms / cnt; printf(" %-45s: %12.5fms = %lld * %0.5fms (%zu invocations, %0.5fms = %lld * %0.5fms per invocation)\n", key.c_str(), total_ms, factor, total_ms/factor, cnt, avg_ms, factor, avg_ms/factor); } void print_cumulative_times(const long long factor) { printf("Dumping times:\n"); for (auto& kv : cumulative_times) { print_cumulative_time_entry(kv.first, factor); } } void print_cumulative_op_counts(const bool only_fq) { #ifdef PROFILE_OP_COUNTS printf("Dumping operation counts:\n"); for (auto& msg : invocation_counts) { printf(" %-45s: ", msg.first.c_str()); bool first = true; for (auto& data_point : op_data_points) { if (only_fq && data_point.first.compare(0, 2, "Fq") != 0) { continue; } if (!first) { printf(", "); } printf("%-5s = %7.0f (%3zu)", data_point.first.c_str(), 1. * cumulative_op_counts[std::make_pair(msg.first, data_point.first)] / msg.second, msg.second); first = false; } printf("\n"); } #else UNUSED(only_fq); #endif } void print_op_profiling(const std::string &msg) { #ifdef PROFILE_OP_COUNTS printf("\n"); print_indent(); printf("(opcounts) = ("); bool first = true; for (std::pair<std::string, long long*> p : op_data_points) { if (!first) { printf(", "); } printf("%s=%lld", p.first.c_str(), *(p.second)-op_counts[std::make_pair(msg, p.first)]); first = false; } printf(")"); #else UNUSED(msg); #endif } static void print_times_from_last_and_start(long long now, long long last, long long cpu_now, long long cpu_last) { long long time_from_start = now - start_time; long long time_from_last = now - last; long long cpu_time_from_start = cpu_now - start_cpu_time; long long cpu_time_from_last = cpu_now - cpu_last; if (time_from_last != 0) { double parallelism_from_last = 1.0 * cpu_time_from_last / time_from_last; printf("[%0.4fs x%0.2f]", time_from_last * 1e-9, parallelism_from_last); } else { printf("[ ]"); } if (time_from_start != 0) { double parallelism_from_start = 1.0 * cpu_time_from_start / time_from_start; printf("\t(%0.4fs x%0.2f from start)", time_from_start * 1e-9, parallelism_from_start); } } void print_time(const char* msg) { if (inhibit_profiling_info) { return; } long long now = get_nsec_time(); long long cpu_now = get_nsec_cpu_time(); printf("%-35s\t", msg); print_times_from_last_and_start(now, last_time, cpu_now, last_cpu_time); #ifdef PROFILE_OP_COUNTS print_op_profiling(msg); #endif printf("\n"); fflush(stdout); last_time = now; last_cpu_time = cpu_now; } void print_header(const char *msg) { printf("\n================================================================================\n"); printf("%s\n", msg); printf("================================================================================\n\n"); } void print_indent() { for (size_t i = 0; i < indentation; ++i) { printf(" "); } } void op_profiling_enter(const std::string &msg) { for (std::pair<std::string, long long*> p : op_data_points) { op_counts[std::make_pair(msg, p.first)] = *(p.second); } } void enter_block(const std::string &msg, const bool indent) { if (inhibit_profiling_counters) { return; } block_names.emplace_back(msg); long long t = get_nsec_time(); enter_times[msg] = t; long long cpu_t = get_nsec_cpu_time(); enter_cpu_times[msg] = cpu_t; if (inhibit_profiling_info) { return; } #ifdef MULTICORE #pragma omp critical #endif { op_profiling_enter(msg); print_indent(); printf("(enter) %-35s\t", msg.c_str()); print_times_from_last_and_start(t, t, cpu_t, cpu_t); printf("\n"); fflush(stdout); if (indent) { ++indentation; } } } void leave_block(const std::string &msg, const bool indent) { if (inhibit_profiling_counters) { return; } #ifndef MULTICORE assert(*(--block_names.end()) == msg); #endif block_names.pop_back(); ++invocation_counts[msg]; long long t = get_nsec_time(); last_times[msg] = (t - enter_times[msg]); cumulative_times[msg] += (t - enter_times[msg]); long long cpu_t = get_nsec_cpu_time(); last_cpu_times[msg] = (cpu_t - enter_cpu_times[msg]); #ifdef PROFILE_OP_COUNTS for (std::pair<std::string, long long*> p : op_data_points) { cumulative_op_counts[std::make_pair(msg, p.first)] += *(p.second)-op_counts[std::make_pair(msg, p.first)]; } #endif if (inhibit_profiling_info) { return; } #ifdef MULTICORE #pragma omp critical #endif { if (indent) { --indentation; } print_indent(); printf("(leave) %-35s\t", msg.c_str()); print_times_from_last_and_start(t, enter_times[msg], cpu_t, enter_cpu_times[msg]); print_op_profiling(msg); printf("\n"); fflush(stdout); } } void print_mem(const std::string &s) { #ifndef NO_PROCPS struct proc_t usage; look_up_our_self(&usage); if (s.empty()) { printf("* Peak vsize (physical memory+swap) in mebibytes: %lu\n", usage.vsize >> 20); } else { printf("* Peak vsize (physical memory+swap) in mebibytes (%s): %lu\n", s.c_str(), usage.vsize >> 20); } #else printf("* Memory profiling not supported in NO_PROCPS mode\n"); #endif } void print_compilation_info() { #ifdef __GNUC__ printf("g++ version: %s\n", __VERSION__); printf("Compiled on %s %s\n", __DATE__, __TIME__); #endif #ifdef STATIC printf("STATIC: yes\n"); #else printf("STATIC: no\n"); #endif #ifdef MULTICORE printf("MULTICORE: yes\n"); #else printf("MULTICORE: no\n"); #endif #ifdef DEBUG printf("DEBUG: yes\n"); #else printf("DEBUG: no\n"); #endif #ifdef PROFILE_OP_COUNTS printf("PROFILE_OP_COUNTS: yes\n"); #else printf("PROFILE_OP_COUNTS: no\n"); #endif #ifdef _GLIBCXX_DEBUG printf("_GLIBCXX_DEBUG: yes\n"); #else printf("_GLIBCXX_DEBUG: no\n"); #endif } } // libff