/* Copyright (C) 2013-2025 by Arm Limited. All rights reserved. */

#include "CpuUtils.h"
#include "CpuUtils_Topology.h"
#include "Logging.h"
#include "OlyUtility.h"
#include "lib/File.h"
#include "lib/Span.h"
#include "linux/PerCoreIdentificationThread.h"

#include <algorithm>
#include <chrono>
#include <condition_variable>
#include <cstddef>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <map>
#include <memory>
#include <mutex>
#include <optional>
#include <set>
#include <string>
#include <string_view>
#include <utility>
#include <vector>

#include <dirent.h>

namespace cpu_utils {
    unsigned int getMaxCoreNum()
    {
        // why don't we just use /sys/devices/system/cpu/kernel_max
        // or pick the highest in /sys/devices/system/cpu/possible?
        DIR * dir = opendir("/sys/devices/system/cpu");
        if (dir == nullptr) {
            LOG_ERROR("Unable to determine the number of cores on the target, opendir failed");
            handleException();
        }

        long maxCoreNum = -1;
        struct dirent * dirent;
        // NOLINTNEXTLINE(concurrency-mt-unsafe)
        while ((dirent = readdir(dir)) != nullptr) {
            if (strncmp(dirent->d_name, "cpu", 3) == 0) {
                long coreNum;
                if (stringToLong(&coreNum, dirent->d_name + 3, OlyBase::Decimal) && (coreNum >= maxCoreNum)) {
                    maxCoreNum = coreNum + 1;
                }
            }
        }
        closedir(dir);

        if (maxCoreNum < 1) {
            LOG_ERROR("Unable to determine the number of cores on the target, no cpu# directories found");
            handleException();
        }

        return maxCoreNum;
    }

    namespace {
        constexpr const std::string_view CPU_ARCHITECTURE {"CPU architecture"};
        constexpr const std::string_view CPU_IMPLEMENTER {"CPU implementer"};
        constexpr const std::string_view CPU_PART {"CPU part"};
        constexpr const std::string_view CPU_REVISION {"CPU revision"};
        constexpr const std::string_view CPU_VARIANT {"CPU variant"};
        constexpr const std::string_view HARDWARE {"Hardware"};
        constexpr const std::string_view PROCESSOR {"processor"};
        constexpr const std::string_view MODEL_NAME {"model name"};

        template<typename CpuInfoRecordConsumer>
        void iterate_cpuinfo(CpuInfoRecordConsumer && consumer)
        {
            std::vector<char> temp(1024, ' ');

            FILE * f = lib::fopen_cloexec("/proc/cpuinfo", "r");
            if (f == nullptr) {
                LOG_WARNING("Error opening /proc/cpuinfo\n"
                            "The core name in the captured xml file will be 'unknown'.");
                return;
            }

            size_t last_insert_pos = 0;

            while (fgets(temp.data() + last_insert_pos, temp.size(), f) != nullptr) {
                last_insert_pos = strnlen(temp.data(), temp.size());
                temp.resize(last_insert_pos);

                if (std::find(temp.begin(), temp.end(), '\n') == temp.end()) {
                    // haven't found the end of the line yet. resize the buffer and try again
                    temp.resize(temp.capacity() + 1024);
                    continue;
                }

                // found the end of the line so reset the buffer write pointer for next time
                last_insert_pos = 0;
                const auto size = temp.size();
                // resizae the buffer, ready for the next iteration
                temp.resize(temp.capacity());

                if (size > 0) {
                    // Replace the line feed with a null
                    temp[size - 1] = '\0';
                }

                if (size == 1) {
                    consumer("", "");
                    continue;
                }

                LOG_DEBUG("cpuinfo: %s", temp.data());

                auto record = std::string_view {temp.data(), size};

                auto split_pos = record.find(':');
                if (split_pos == std::string_view::npos || split_pos + 2 >= record.size()) {
                    continue;
                }

                std::string_view value_part = record.substr(split_pos + 2, strlen(record.data() + split_pos + 2));
                std::string_view key_part = record.substr(0, split_pos);

                auto trim_pos = key_part.find_last_not_of(" \n\t\r");
                if (trim_pos) {
                    key_part = key_part.substr(0, trim_pos + 1);
                }

                if (!consumer(key_part, value_part)) {
                    break;
                }
            }

            if (fclose(f) == EOF) {
                LOG_WARNING("Failed to close /proc/cpuinfo");
            };
        }

        std::string parseProcCpuInfo(bool justGetHardwareName, lib::Span<midr_t> midrs)
        {
            std::string hardwareName;

            if (justGetHardwareName) {
                iterate_cpuinfo([&](std::string_view key, std::string_view value) {
                    if (key == HARDWARE) {
                        hardwareName = value;
                        return false;
                    }
                    return true;
                });
                return hardwareName;
            }

            bool foundCoreName = false;
            constexpr size_t UNKNOWN_PROCESSOR = -1;
            size_t processor = UNKNOWN_PROCESSOR;
            size_t minProcessor = midrs.size();
            size_t maxProcessor = 0;
            bool foundProcessorInSection = false;
            midr_t outOfPlaceCpuId {};
            bool invalidFormat = false;

            iterate_cpuinfo([&](std::string_view key, std::string_view value) -> bool {
                if (key == "") {
                    // New section, clear the processor. Streamline will not know the cpus if the pre Linux 3.8 format of cpuinfo is encountered but also that no incorrect information will be transmitted.
                    processor = UNKNOWN_PROCESSOR;
                    foundProcessorInSection = false;
                    return true;
                }

                if (key == HARDWARE && !foundCoreName) {
                    hardwareName = value;
                    foundCoreName = true;
                }
                else if (key == CPU_ARCHITECTURE) {
                    int architecture;
                    if (!stringToInt(&architecture, value.data())) {
                        // Do nothing
                    }
                    else {
                        LOG_DEBUG("   architecture = 0x%x", architecture);
                        if (processor != UNKNOWN_PROCESSOR) {

                            midrs[processor].set_architecture(architecture);
                        }
                        else {
                            outOfPlaceCpuId.set_architecture(architecture);
                            invalidFormat = true;
                        }
                    }
                }
                else if (key == CPU_IMPLEMENTER) {
                    int implementer;
                    if (!stringToInt(&implementer, value.data())) {
                        // Do nothing
                    }
                    else {
                        LOG_DEBUG("   implementer = 0x%x", implementer);
                        if (processor != UNKNOWN_PROCESSOR) {
                            midrs[processor].set_implementer(implementer);
                        }
                        else {
                            outOfPlaceCpuId.set_implementer(implementer);
                            invalidFormat = true;
                        }
                    }
                }
                else if (key == CPU_PART) {
                    int part_num;
                    if (!stringToInt(&part_num, value.data())) {
                        // Do nothing
                    }
                    else {
                        LOG_DEBUG("   part_num = 0x%x", part_num);
                        if (processor != UNKNOWN_PROCESSOR) {
                            midrs[processor].set_partnum(part_num);
                        }
                        else {
                            outOfPlaceCpuId.set_partnum(part_num);
                            invalidFormat = true;
                        }
                    }
                }
                else if (key == CPU_REVISION) {
                    int revision;
                    if (!stringToInt(&revision, value.data())) {
                        // Do nothing
                    }
                    else {
                        LOG_DEBUG("   revision = 0x%x", revision);
                        if (processor != UNKNOWN_PROCESSOR) {
                            midrs[processor].set_revision(revision);
                        }
                        else {
                            outOfPlaceCpuId.set_revision(revision);
                            invalidFormat = true;
                        }
                    }
                }
                else if (key == CPU_VARIANT) {
                    int variant;
                    if (!stringToInt(&variant, value.data())) {
                        // Do nothing
                    }
                    else {
                        LOG_DEBUG("   variant = 0x%x", variant);
                        if (processor != UNKNOWN_PROCESSOR) {
                            midrs[processor].set_variant(variant);
                        }
                        else {
                            outOfPlaceCpuId.set_variant(variant);
                            invalidFormat = true;
                        }
                    }
                }
                else if (key == PROCESSOR) {
                    int processorId = -1;
                    const bool converted = stringToInt(&processorId, value.data());

                    // update min and max processor ids
                    if (converted) {
                        minProcessor = (static_cast<size_t>(processorId) < minProcessor ? processorId : minProcessor);
                        maxProcessor = (static_cast<size_t>(processorId) > maxProcessor ? processorId : maxProcessor);
                    }

                    if (foundProcessorInSection) {
                        // Found a second processor in this section, ignore them all
                        processor = UNKNOWN_PROCESSOR;
                        invalidFormat = true;
                    }
                    else if (converted) {
                        LOG_DEBUG("   processorId = %d", processorId);

                        processor = processorId;
                        if (processor >= midrs.size()) {
                            LOG_ERROR("Found processor %zu but max is %zu", processor, midrs.size());
                            handleException();
                        }
                        foundProcessorInSection = true;
                    }
                }
                return true;
            });

            if (invalidFormat && (outOfPlaceCpuId.valid()) && (minProcessor <= maxProcessor)) {
                minProcessor = (minProcessor > 0 ? minProcessor : 0);
                maxProcessor = (maxProcessor < midrs.size() ? maxProcessor + 1 : midrs.size());

                for (size_t processor = minProcessor; processor < maxProcessor; ++processor) {
                    if (!midrs[processor].valid()) {
                        LOG_DEBUG("Setting global MIDR 0x%08x for processors %zu ",
                                  outOfPlaceCpuId.to_raw_value(),
                                  processor);
                        midrs[processor] = outOfPlaceCpuId;
                    }
                }
            }

            if (!foundCoreName) {
                LOG_FINE("Could not determine core name from /proc/cpuinfo\n"
                         "The core name in the captured xml file will be 'unknown'.");
            }

            return hardwareName;
        }
    }

    std::string readCpuDisplayName(unsigned int core_idx)
    {
        std::string displayName = "Unknown CPU";
        bool found_core_idx = false;
        iterate_cpuinfo([&](std::string_view key, std::string_view value) -> bool {
            if (PROCESSOR == key) {
                int processor = -1;
                stringToInt(&processor, value.data());
                found_core_idx = static_cast<unsigned int>(processor) == core_idx;
            }
            else if (MODEL_NAME == key && found_core_idx) {
                displayName = value;
                return false;
            }
            return true;
        });
        return displayName;
    }

    topology_info_t read_cpu_topology(bool ignore_offline, std::size_t max_cpu_number)
    {
        topology_info_t topology;
        // first collect the detailed state using the identifier if available
        {
            std::mutex mutex;
            std::condition_variable cv;
            std::size_t identificationThreadCallbackCounter = 0;
            std::map<unsigned, PerCoreIdentificationThread::properties_t> collected_properties {};
            std::vector<std::unique_ptr<PerCoreIdentificationThread>> perCoreThreads {};

            // wake all cores; this ensures the contents of /proc/cpuinfo reflect the full range of cores in the system.
            // this works as follows:
            // - spawn one thread per core that is affined to each core
            // - once all cores are online and affined, *and* have read the data they are required to read, then they callback here to notify this method to continue
            // - the threads remain online until this function finishes (they are disposed of / terminated by destructor); this is so as
            //   to ensure that the cores remain online until cpuinfo is read
            if (!ignore_offline) {
                for (unsigned cpu = 0; cpu < max_cpu_number; ++cpu) {
                    perCoreThreads.emplace_back(new PerCoreIdentificationThread(
                        false,
                        cpu,
                        [&](unsigned c, PerCoreIdentificationThread::properties_t && properties) -> void {
                            std::lock_guard<std::mutex> const guard {mutex};

                            // store it for later processing
                            collected_properties.emplace(c, std::move(properties));

                            // update completed count
                            identificationThreadCallbackCounter += 1;
                            cv.notify_one();
                        }));
                }

                // wait until all threads are online
                std::unique_lock<std::mutex> lock {mutex};
                auto succeeded = cv.wait_for(lock, std::chrono::seconds(10), [&] {
                    return identificationThreadCallbackCounter >= max_cpu_number;
                });
                if (!succeeded) {
                    LOG_WARNING("Could not identify all CPU cores within the timeout period. Activated %zu of %zu",
                                identificationThreadCallbackCounter,
                                max_cpu_number);
                }
            }
            //
            // when we don't care about onlining the cores, just read them directly, one by one, any that are offline will be ignored anyway
            //
            else {
                for (unsigned cpu = 0; cpu < max_cpu_number; ++cpu) {
                    if (collected_properties.count(cpu) == 0) {
                        auto properties = PerCoreIdentificationThread::detectFor(cpu);
                        collected_properties.emplace(cpu, std::move(properties));
                    }
                }
            }

            // lock to prevent concurrent access to maps if one of the threads stalls
            std::lock_guard<std::mutex> const lock(mutex);

            // process the collected properties
            for (auto const & entry : collected_properties) {
                auto c = entry.first;
                auto const & properties = entry.second;

                // store the cluster / core mappings to allow us to fill in any gaps by assuming the same core type per cluster
                if (properties.physical_package_id != PerCoreIdentificationThread::INVALID_PACKAGE_ID) {
                    topology.cpu_to_cluster[c] = properties.physical_package_id;

                    // also map cluster to MIDR value if read
                    if (properties.midr_el1 != PerCoreIdentificationThread::INVALID_MIDR_EL1) {
                        topology.cluster_to_midrs[properties.physical_package_id].insert(
                            midr_t::from_raw(properties.midr_el1));
                    }

                    for (int sibling : properties.core_siblings) {
                        const unsigned sibling_cpu = sibling;

                        if (topology.cpu_to_cluster.count(sibling_cpu) == 0) {
                            topology.cpu_to_cluster[sibling_cpu] = properties.physical_package_id;
                        }
                    }
                }

                // map cpu to MIDR value if read
                if (properties.midr_el1 != PerCoreIdentificationThread::INVALID_MIDR_EL1) {
                    topology.cpu_to_midr[c] = midr_t::from_raw(properties.midr_el1);
                }
            }
        }

        return topology;
    }

    std::string readCpuInfo(bool ignoreOffline, bool wantsHardwareName, lib::Span<midr_t> midrs)
    {
        auto topology = read_cpu_topology(ignoreOffline, midrs.size());
        // log what we learnt
        for (const auto & pair : topology.cpu_to_midr) {
            LOG_DEBUG("Read CPU %u MIDR_EL1 -> 0x%08x", pair.first, pair.second.to_raw_value());
        }
        for (const auto & pair : topology.cpu_to_cluster) {
            LOG_DEBUG("Read CPU %u CLUSTER %u", pair.first, pair.second);
        }
        for (const auto & pair : topology.cluster_to_midrs) {
            LOG_DEBUG("Read CLUSTER %u MIDRs:", pair.first);
            for (auto const & midr : pair.second) {
                LOG_DEBUG("    0x%08x", midr.to_raw_value());
            }
        }

        // did we successfully read all MIDR values from all cores?
        const bool knowAllMidrValues = (topology.cpu_to_midr.size() == midrs.size());

        // do we need to read /proc/cpuinfo
        std::string hardwareName = (wantsHardwareName || (!knowAllMidrValues && !ignoreOffline)
                                        ? parseProcCpuInfo(/* justGetHardwareName = */ knowAllMidrValues, midrs)
                                        : "");

        // update/set known items from MIDR map and topology information. This will override anything read from /proc/cpuinfo
        updateCpuIdsFromTopologyInformation(midrs,
                                            topology.cpu_to_midr,
                                            topology.cpu_to_cluster,
                                            topology.cluster_to_midrs);

        return hardwareName;
    }
}
