rpcs3/rpcs3/util/cpu_stats.cpp

#include "util/types.hpp"
#include "util/cpu_stats.hpp"
#include "util/sysinfo.hpp"
#include "util/logs.hpp"
#include "Utilities/StrUtil.h"
#include <algorithm>

#ifdef _WIN32
#include "windows.h"
#include "tlhelp32.h"
#pragma comment(lib, "pdh.lib")
#else
#include "fstream"
#include "sstream"
#include "stdlib.h"
#include "sys/times.h"
#include "sys/types.h"
#include "unistd.h"
#endif

#ifdef __APPLE__
# include <mach/mach_init.h>
# include <mach/task.h>
# include <mach/vm_map.h>
#endif

#ifdef __linux__
# include <dirent.h>
#endif

#if defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
# include <sys/sysctl.h>
# if defined(__DragonFly__) || defined(__FreeBSD__)
#  include <sys/user.h>
# endif

# if defined(__NetBSD__)
#  undef KERN_PROC
#  define KERN_PROC KERN_PROC2
#  define kinfo_proc kinfo_proc2
# endif

# if defined(__DragonFly__)
#  define KP_NLWP(kp) (kp.kp_nthreads)
# elif defined(__FreeBSD__)
#  define KP_NLWP(kp) (kp.ki_numthreads)
# elif defined(__NetBSD__)
#  define KP_NLWP(kp) (kp.p_nlwps)
# endif
#endif

LOG_CHANNEL(perf_log, "PERF");

namespace utils
{
#ifdef _WIN32
	fmt::win_error pdh_error(PDH_STATUS status)
	{
		return fmt::win_error{static_cast<unsigned long>(status), LoadLibrary(L"pdh.dll")};
	}
#endif

	cpu_stats::cpu_stats()
	{
#ifdef _WIN32
		FILETIME ftime, fsys, fuser;

		GetSystemTimeAsFileTime(&ftime);
		memcpy(&m_last_cpu, &ftime, sizeof(FILETIME));

		GetProcessTimes(GetCurrentProcess(), &ftime, &ftime, &fsys, &fuser);
		memcpy(&m_sys_cpu, &fsys, sizeof(FILETIME));
		memcpy(&m_usr_cpu, &fuser, sizeof(FILETIME));
#else
		struct tms timeSample;

		m_last_cpu = times(&timeSample);
		m_sys_cpu  = timeSample.tms_stime;
		m_usr_cpu  = timeSample.tms_utime;
#endif
	}

	void cpu_stats::init_cpu_query()
	{
#ifdef _WIN32
		PDH_STATUS status = PdhOpenQuery(NULL, NULL, &m_cpu_query);
		if (ERROR_SUCCESS != status)
		{
			perf_log.error("Failed to open cpu query for per core cpu usage: %s", pdh_error(status));
			return;
		}
		status = PdhAddEnglishCounter(m_cpu_query, L"\\Processor(*)\\% Processor Time", NULL, &m_cpu_cores);
		if (ERROR_SUCCESS != status)
		{
			perf_log.error("Failed to add processor time counter for per core cpu usage: %s", pdh_error(status));
			return;
		}
		status = PdhCollectQueryData(m_cpu_query);
		if (ERROR_SUCCESS != status)
		{
			perf_log.error("Failed to collect per core cpu usage: %s", pdh_error(status));
			return;
		}
#endif
	}

	void cpu_stats::get_per_core_usage(std::vector<double>& per_core_usage, double& total_usage)
	{
		total_usage = 0.0;

		per_core_usage.resize(utils::get_thread_count());
		std::fill(per_core_usage.begin(), per_core_usage.end(), 0.0);

#if defined(_WIN32) || defined(__linux__)
		const auto string_to_number = [](const std::string& str) -> std::pair<bool, size_t>
		{
			std::add_pointer_t<char> eval;
			const size_t number = std::strtol(str.c_str(), &eval, 10);

			if (str.c_str() + str.size() == eval)
			{
				return { true, number };
			}

			return { false, 0 };
		};

#ifdef _WIN32
		if (!m_cpu_cores || !m_cpu_query)
		{
			perf_log.warning("Can not collect per core cpu usage: The required API is not initialized.");
			return;
		}

		PDH_STATUS status = PdhCollectQueryData(m_cpu_query);
		if (ERROR_SUCCESS != status)
		{
			perf_log.error("Failed to collect per core cpu usage: %s", pdh_error(status));
			return;
		}

		PDH_FMT_COUNTERVALUE counterVal{};
		DWORD dwBufferSize = 0; // Size of the pItems buffer
		DWORD dwItemCount = 0;  // Number of items in the pItems buffer
		PDH_FMT_COUNTERVALUE_ITEM *pItems = NULL; // Array of PDH_FMT_COUNTERVALUE_ITEM structures

		status = PdhGetFormattedCounterArray(m_cpu_cores, PDH_FMT_DOUBLE, &dwBufferSize, &dwItemCount, pItems);
		if (PDH_MORE_DATA == status)
		{
			pItems = (PDH_FMT_COUNTERVALUE_ITEM*)malloc(dwBufferSize);
			if (pItems)
			{
				status = PdhGetFormattedCounterArray(m_cpu_cores, PDH_FMT_DOUBLE, &dwBufferSize, &dwItemCount, pItems);
				if (ERROR_SUCCESS == status)
				{
					ensure(dwItemCount > 0);
					ensure((dwItemCount - 1) == per_core_usage.size()); // Remove one for _Total

					// Loop through the array and get the instance name and percentage.
					for (DWORD i = 0; i < dwItemCount; i++)
					{
						const std::string token = wchar_to_utf8(pItems[i].szName);

						if (const std::string lower = fmt::to_lower(token); lower.find("total") != umax)
						{
							total_usage = pItems[i].FmtValue.doubleValue;
							continue;
						}

						if (const auto [success, cpu_index] = string_to_number(token); success && cpu_index < dwItemCount)
						{
							per_core_usage[cpu_index] = pItems[i].FmtValue.doubleValue;
						}
						else if (!success)
						{
							perf_log.error("Can not convert string to cpu index for per core cpu usage. (token='%s')", token);
						}
						else
						{
							perf_log.error("Invalid cpu index for per core cpu usage. (token='%s', cpu_index=%d, cores=%d)", token, cpu_index, dwItemCount);
						}
					}
				}
				else
				{
					perf_log.error("Failed to get per core cpu usage: %s", pdh_error(status));
				}
			}
			else
			{
				perf_log.error("Failed to allocate buffer for per core cpu usage.");
			}
		}
		if (pItems) free(pItems);

#elif __linux__

		m_previous_idle_times_per_cpu.resize(utils::get_thread_count(), 0.0);
		m_previous_total_times_per_cpu.resize(utils::get_thread_count(), 0.0);

		if (std::ifstream proc_stat("/proc/stat"); proc_stat.good())
		{
			std::stringstream content;
			content << proc_stat.rdbuf();
			proc_stat.close();

			const std::vector<std::string> lines = fmt::split(content.str(), {"\n"});
			if (lines.empty())
			{
				perf_log.error("/proc/stat is empty");
				return;
			}

			for (const std::string& line : lines)
			{
				const std::vector<std::string> tokens = fmt::split(line, {" "});
				if (tokens.size() < 5)
				{
					return;
				}

				const std::string& token = tokens[0];
				if (!token.starts_with("cpu"))
				{
					return;
				}

				// Get CPU index
				int cpu_index = -1; // -1 for total

				constexpr size_t size_of_cpu = 3;
				if (token.size() > size_of_cpu)
				{
					if (const auto [success, val] = string_to_number(token.substr(size_of_cpu)); success && val < per_core_usage.size())
					{
						cpu_index = val;
					}
					else if (!success)
					{
						perf_log.error("Can not convert string to cpu index for per core cpu usage. (token='%s', line='%s')", token, line);
						continue;
					}
					else
					{
						perf_log.error("Invalid cpu index for per core cpu usage. (cpu_index=%d, cores=%d, token='%s', line='%s')", cpu_index, per_core_usage.size(), token, line);
						continue;
					}
				}

				size_t idle_time = 0;
				size_t total_time = 0;

				for (size_t i = 1; i < tokens.size(); i++)
				{
					if (const auto [success, val] = string_to_number(tokens[i]); success)
					{
						if (i == 4)
						{
							idle_time = val;
						}

						total_time += val;
					}
					else
					{
						perf_log.error("Can not convert string to time for per core cpu usage. (i=%d, token='%s', line='%s')", i, tokens[i], line);
					}
				}

				if (cpu_index < 0)
				{
					const double idle_time_delta = idle_time - std::exchange(m_previous_idle_time_total, idle_time);
					const double total_time_delta = total_time - std::exchange(m_previous_total_time_total, total_time);
					total_usage = 100.0 * (1.0 - idle_time_delta / total_time_delta);
				}
				else
				{
					const double idle_time_delta = idle_time - std::exchange(m_previous_idle_times_per_cpu[cpu_index], idle_time);
					const double total_time_delta = total_time - std::exchange(m_previous_total_times_per_cpu[cpu_index], total_time);
					per_core_usage[cpu_index] = 100.0 * (1.0 - idle_time_delta / total_time_delta);
				}
			}
		}
		else
		{
			perf_log.error("Failed to open /proc/stat (%s)", strerror(errno));
		}
#endif
#else
		total_usage = get_usage();
#endif
	}

	double cpu_stats::get_usage()
	{
#ifdef _WIN32
		FILETIME ftime, fsys, fusr;
		ULARGE_INTEGER now, sys, usr;
		double percent;

		GetSystemTimeAsFileTime(&ftime);
		memcpy(&now, &ftime, sizeof(FILETIME));

		GetProcessTimes(GetCurrentProcess(), &ftime, &ftime, &fsys, &fusr);
		memcpy(&sys, &fsys, sizeof(FILETIME));
		memcpy(&usr, &fusr, sizeof(FILETIME));

		if (now.QuadPart <= m_last_cpu || sys.QuadPart < m_sys_cpu || usr.QuadPart < m_usr_cpu)
		{
			// Overflow detection. Just skip this value.
			percent = 0.0;
		}
		else
		{
			percent = (sys.QuadPart - m_sys_cpu) + (usr.QuadPart - m_usr_cpu);
			percent /= (now.QuadPart - m_last_cpu);
			percent /= utils::get_thread_count(); // Let's assume this is at least 1
			percent *= 100;
		}

		m_last_cpu = now.QuadPart;
		m_usr_cpu  = usr.QuadPart;
		m_sys_cpu  = sys.QuadPart;

		return std::clamp(percent, 0.0, 100.0);
#else
		struct tms timeSample;
		clock_t now = times(&timeSample);
		double percent;

		if (now <= static_cast<clock_t>(m_last_cpu) || timeSample.tms_stime < static_cast<clock_t>(m_sys_cpu) || timeSample.tms_utime < static_cast<clock_t>(m_usr_cpu))
		{
			// Overflow detection. Just skip this value.
			percent = 0.0;
		}
		else
		{
			percent = (timeSample.tms_stime - m_sys_cpu) + (timeSample.tms_utime - m_usr_cpu);
			percent /= (now - m_last_cpu);
			percent /= utils::get_thread_count();
			percent *= 100;
		}
		m_last_cpu = now;
		m_sys_cpu  = timeSample.tms_stime;
		m_usr_cpu  = timeSample.tms_utime;

		return std::clamp(percent, 0.0, 100.0);
#endif
	}

	u32 cpu_stats::get_current_thread_count() // static
	{
#ifdef _WIN32
		// first determine the id of the current process
		const DWORD id = GetCurrentProcessId();

		// then get a process list snapshot.
		const HANDLE snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);

		// initialize the process entry structure.
		PROCESSENTRY32 entry = {0};
		entry.dwSize         = sizeof(entry);

		// get the first process info.
		BOOL ret = true;
		ret      = Process32First(snapshot, &entry);
		while (ret && entry.th32ProcessID != id)
		{
			ret = Process32Next(snapshot, &entry);
		}
		CloseHandle(snapshot);
		return ret ? entry.cntThreads : 0;
#elif defined(__APPLE__)
		const task_t task = mach_task_self();
		mach_msg_type_number_t thread_count;
		thread_act_array_t thread_list;
		if (task_threads(task, &thread_list, &thread_count) != KERN_SUCCESS)
		{
			return 0;
		}
		vm_deallocate(task, reinterpret_cast<vm_address_t>(thread_list),
			      sizeof(thread_t) * thread_count);
		return static_cast<u32>(thread_count);
#elif defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__)
		int mib[] = {
			CTL_KERN,
			KERN_PROC,
			KERN_PROC_PID,
			getpid(),
#if defined(__NetBSD__)
			sizeof(struct kinfo_proc),
			1,
#endif
		};
		u_int miblen = std::size(mib);
		struct kinfo_proc info;
		usz size = sizeof(info);
		if (sysctl(mib, miblen, &info, &size, NULL, 0))
		{
			return 0;
		}
		return KP_NLWP(info);
#elif defined(__OpenBSD__)
		int mib[] = {
			CTL_KERN,
			KERN_PROC,
			KERN_PROC_PID | KERN_PROC_SHOW_THREADS,
			getpid(),
			sizeof(struct kinfo_proc),
			0,
		};
		u_int miblen = std::size(mib);

		// get number of structs
		usz size;
		if (sysctl(mib, miblen, NULL, &size, NULL, 0))
		{
			return 0;
		}
		mib[5] = size / mib[4];

		// populate array of structs
		struct kinfo_proc info[mib[5]];
		if (sysctl(mib, miblen, &info, &size, NULL, 0))
		{
			return 0;
		}

		// exclude empty members
		u32 thread_count{0};
		for (int i = 0; i < size / mib[4]; i++)
		{
			if (info[i].p_tid != -1)
				++thread_count;
		}
		return thread_count;
#elif defined(__linux__)
		u32 thread_count{0};

		DIR* proc_dir = opendir("/proc/self/task");
		if (proc_dir)
		{
			// proc available, iterate through tasks and count them
			struct dirent* entry;
			while ((entry = readdir(proc_dir)) != NULL)
			{
				if (entry->d_name[0] == '.')
					continue;

				++thread_count;
			}

			closedir(proc_dir);
		}
		return thread_count;
#else
		// unimplemented
		return 0;
#endif
	}
}