# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.

import abc
import os
from os import PathLike
from pathlib import Path
from typing import Any, Generic, Mapping, Optional, TypeVar, Union

import nsysstats
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq

from nsys_recipe.lib import export
from nsys_recipe.log import logger

DEFAULT_FORMAT = "parquetdir"

HandleType = TypeVar("HandleType")


class TableNotFoundError(Exception):
    def __init__(self, table: str):
        super().__init__(f"Table '{table}' does not exist.")


class ColumnNotFoundError(Exception):
    def __init__(self, column: str, table: str):
        super().__init__(f"Column '{column}' does not exist in table '{table}'.")


class InvalidExportError(Exception):
    def __init__(self, filename: Union[str, PathLike[str]]):
        super().__init__(f"Could not validate {filename}.")


class _Loader(Generic[HandleType], abc.ABC):
    output_suffix = ""

    def __init__(self, report_path: Union[str, PathLike[str]]):
        self._report_path = report_path

    def validate_export_time(self, path: Union[str, PathLike[str]]) -> bool:
        """Check whether the export file is newer than the report file."""
        return os.path.getctime(self._report_path) < os.path.getctime(path)

    @abc.abstractmethod
    def validate_table(
        self,
        path: Union[str, PathLike[str]],
        table: str,
        columns: Optional[list[str]] = None,
    ) -> HandleType:
        """Check whether the given table and columns are present in the
        export file.

        Parameters
        ----------
        path : str or PathLike
            Path to the export file or directory.
        table : str
            Name of the table to validate.
        columns : list of str, optional
            List of columns to validate. If not given, all columns will be validated.

        Returns
        -------
        handle : object
            Object that can be used as input to the 'read_table' function.
        """
        pass

    @abc.abstractmethod
    def read_table(
        self, handle: HandleType, table: str, columns: Optional[list[str]] = None
    ) -> pd.DataFrame:
        """Read table from the export file.

        Parameters
        ----------
        handle : object
            Object obtained from the 'validate_table' function.
        table : str
            Name of the table to read.
        columns : list of str, optional
            List of columns to read. If not given, all columns will be read.

        Returns
        -------
        result : dataframe
            Table as a dataframe.
        """
        pass

    @abc.abstractmethod
    def list_tables(self, path: Union[str, PathLike[str]]) -> list[str]:
        """List all tables available in the export file.

        Parameters
        ----------
        path : str or PathLike
            Path to the export file.

        Returns
        -------
        tables : list of str
            List of table names.
        """
        pass

    def get_export_path(self) -> str:
        if not self.output_suffix:
            raise NotImplementedError("output_suffix must be set.")

        return f"{Path(self._report_path).with_suffix('')}{self.output_suffix}"


class ParquetLoader(_Loader[pq.ParquetFile]):
    output_suffix = "_pqtdir"
    file_extension = ".parquet"

    def validate_table(
        self,
        path: Union[str, PathLike[str]],
        table: str,
        columns: Optional[list[str]] = None,
    ) -> pq.ParquetFile:
        file_path = str(Path(path) / f"{table}{self.file_extension}")

        if not Path(file_path).exists():
            raise TableNotFoundError(table)

        if not self.validate_export_time(file_path):
            raise InvalidExportError(file_path)

        parquet_file = pq.ParquetFile(file_path)
        parquet_schema = parquet_file.schema
        parquet_columns = parquet_schema.names

        columns = columns or []
        for column in columns:
            if column not in parquet_columns:
                raise ColumnNotFoundError(column, table)

        return parquet_file

    def read_table(
        self, handle: pq.ParquetFile, table: str, columns: Optional[list[str]] = None
    ) -> pd.DataFrame:
        return handle.read(columns).to_pandas()

    def list_tables(self, path: Union[str, PathLike[str]]) -> list[str]:
        file_path = Path(path)
        if not file_path.exists():
            return []

        filenames = list(file_path.glob(f"*{self.file_extension}"))
        return [filename.stem for filename in filenames]


class ArrowLoader(_Loader[pa.RecordBatchStreamReader]):
    output_suffix = "_arwdir"
    file_extension = ".arrow"

    def validate_table(
        self,
        path: Union[str, PathLike[str]],
        table: str,
        columns: Optional[list[str]] = None,
    ) -> pa.RecordBatchStreamReader:
        file_path = str(Path(path) / f"{table}{self.file_extension}")

        if not Path(file_path).exists():
            raise TableNotFoundError(table)

        if not self.validate_export_time(file_path):
            raise InvalidExportError(file_path)

        # Neither the documentation nor the type hints allow passing a file path
        # However, it works in practice and is efficient and concise.
        # It uses a memory-mapped file that is associated with the Reader,
        # and cleaned up when the Reader is garbage collected.
        reader = pa.RecordBatchStreamReader(file_path)  # type: ignore
        arrow_schema = reader.schema
        arrow_columns = arrow_schema.names

        columns = columns or []
        for column in columns:
            if column not in arrow_columns:
                raise ColumnNotFoundError(column, table)

        return reader

    def read_table(
        self,
        handle: pa.RecordBatchStreamReader,
        table: str,
        columns: Optional[list[str]] = None,
    ) -> pd.DataFrame:
        df = handle.read_pandas()  # type: ignore[misc] # issue in pyarrow-stubs
        return df[columns] if columns else df

    def list_tables(self, path: Union[str, PathLike[str]]) -> list[str]:
        file_path = Path(path)
        if not file_path.exists():
            return []

        filenames = list(file_path.glob(f"*{self.file_extension}"))
        return [filename.stem for filename in filenames]


class SqliteLoader(_Loader[nsysstats.Report]):
    output_suffix = ".sqlite"
    file_extension = ".sqlite"

    def __init__(self, report_path: Union[str, PathLike[str]]):
        super().__init__(report_path)
        self._sql_report: Optional[nsysstats.Report] = None
        self._sqlite_report_ctime: Optional[float] = None

    def _get_sql_report(self, path: Union[str, PathLike[str]]) -> nsysstats.Report:
        if (
            self._sql_report is None
            or self._sql_report.dbfile != path
            or self._sqlite_report_ctime != os.path.getctime(path)
        ):
            self._sql_report = nsysstats.Report(path)
            self._sqlite_report_ctime = os.path.getctime(path)

        return self._sql_report

    def validate_tables(
        self, path: Union[str, PathLike[str]], tables: Optional[list[str]]
    ) -> nsysstats.Report:
        if not Path(path).exists() or not self.validate_export_time(path):
            raise InvalidExportError(path)

        sql_report = self._get_sql_report(path)

        tables = tables or []
        for table in tables:
            if not sql_report.table_exists(table):
                raise TableNotFoundError(table)

        return sql_report

    def validate_table(
        self,
        path: Union[str, PathLike[str]],
        table: str,
        columns: Optional[list[str]] = None,
    ) -> nsysstats.Report:
        sql_report = self.validate_tables(path, [table])

        columns = columns or []
        for column in columns:
            if not sql_report.table_col_exists(table, column):
                raise ColumnNotFoundError(column, table)

        return sql_report

    def read_sql_query(self, handle: nsysstats.Report, query: str) -> pd.DataFrame:
        return pd.read_sql(query, handle.dbcon)

    def read_table(
        self, handle: nsysstats.Report, table: str, columns: Optional[list[str]] = None
    ) -> pd.DataFrame:
        column_query = ",".join(columns) if columns else "*"
        query = f"SELECT {column_query} FROM {table}"
        return self.read_sql_query(handle, query)

    def list_tables(self, path: Union[str, PathLike[str]]) -> list[str]:
        if Path(path) is None:
            return []

        try:
            return self._get_sql_report(path).tables
        except Exception:
            return []


class ServiceFactory:
    def __init__(self, report_path: Union[str, PathLike[str]]):
        if not Path(report_path).exists():
            raise FileNotFoundError(f"{report_path} does not exist.")

        self._report_path = report_path
        self._service_instances: dict[str, _Loader] = {}

    def _create_service(self, format: str) -> _Loader:
        loader_map: dict[str, type[_Loader]] = {
            "parquetdir": ParquetLoader,
            "arrowdir": ArrowLoader,
            "sqlite": SqliteLoader,
        }

        if format not in loader_map:
            raise NotImplementedError("Invalid format type.")

        return loader_map[format](self._report_path)

    def get_service(self, format: str) -> _Loader:
        if format not in self._service_instances:
            self._service_instances[format] = self._create_service(format)

        return self._service_instances[format]


class DataReader:
    """The DataReader class provides a high-level interface for exporting and
    reading data from Nsight Systems report files."""

    def __init__(self, report_path: Union[str, PathLike[str]]):
        self._service_factory = ServiceFactory(report_path)
        self._report_path = report_path

    def _handle_exceptions(self, e: Exception) -> None:
        if isinstance(e, TableNotFoundError):
            logger.error(
                f"{self._report_path}: {e}"
                " Please ensure the table name is correct or re-try with a recent version of Nsight Systems."
            )
        elif isinstance(e, ColumnNotFoundError):
            logger.error(
                f"{self._report_path}: {e}"
                " Please ensure the column name is correct or re-try with a recent version of Nsight Systems."
            )
        else:
            raise e

    def get_export_path(
        self, service: _Loader, path: Optional[Union[str, PathLike[str]]]
    ) -> str:
        export_path = service.get_export_path()

        if path is not None:
            export_path = str(Path(path) / Path(export_path).name)

        return export_path

    def _check_deprecation_for_recipes(
        self, result_dict: Optional[dict[str, pd.DataFrame]], hints: dict[str, Any]
    ) -> bool:
        check_deprecation = hints.get("check_deprecation", True)

        if not check_deprecation or result_dict is None:
            return True

        for table, df in result_dict.items():
            if not df.empty:
                continue

            if (
                table == "ANALYSIS_DETAILS"
                or table == "TARGET_INFO_SESSION_START_TIME"
                or table == "NIC_ID_MAP"
            ):
                logger.error(
                    f"{self._report_path}: Report is outdated and does not contain '{table}'."
                    " Please generate a new report file using a recent version of Nsight Systems."
                )
                return False

        return True

    def _read_tables_and_report_missing(
        self,
        table_column_dict: Mapping[str, Optional[list[str]]],
        report_missing: bool,
        hints: dict[str, Any],
    ) -> tuple[
        Optional[dict[str, pd.DataFrame]], Optional[Mapping[str, Optional[list[str]]]]
    ]:
        """Read known tables and report any missing tables.

        Returns
        -------
        result_dict : dict
            Dictionary mapping table names to dataframes.
        missing_dict : dict
            Dictionary mapping missing table names to lists of column names.
        """
        service = self._service_factory.get_service(hints.get("format", DEFAULT_FORMAT))
        export_path = self.get_export_path(service, hints.get("path"))

        result_dict = {}
        missing_dict = {}

        for table, columns in table_column_dict.items():
            try:
                handle = service.validate_table(export_path, table, columns)
                result_dict[table] = service.read_table(handle, table, columns)
            except Exception as e:
                if report_missing:
                    self._handle_exceptions(e)
                    return None, None

                missing_dict[table] = columns

        return result_dict, missing_dict

    def _read_sql_query(
        self,
        query: str,
        tables: Optional[list[str]],
        report_missing: bool,
        hints: dict[str, Any],
    ) -> Optional[pd.DataFrame]:
        format_type = hints.get("format", "sqlite")
        assert format_type == "sqlite"

        service = self._service_factory.get_service("sqlite")
        assert isinstance(service, SqliteLoader)

        export_path = self.get_export_path(service, hints.get("path"))
        overwrite = hints.get("overwrite", False)

        if overwrite:
            return None

        try:
            handle = service.validate_tables(export_path, tables)
            df = service.read_sql_query(handle, query)
            return df
        except Exception as e:
            if report_missing:
                self._handle_exceptions(e)
            return None

    def export(
        self, tables: Optional[list[str]] = None, hints: Optional[dict[str, Any]] = None
    ) -> bool:
        if hints is None:
            hints = {}

        format_type = hints.get("format", DEFAULT_FORMAT)
        service = self._service_factory.get_service(format_type)
        export_path = self.get_export_path(service, hints.get("path"))
        export_args = hints.get("export_args", None)

        return export.export_file(
            self._report_path, tables, format_type, export_path, export_args
        )

    def read_tables(
        self,
        table_column_dict: Mapping[str, Optional[list[str]]],
        hints: Optional[dict[str, Any]] = None,
    ) -> Optional[dict[str, pd.DataFrame]]:
        """Read tables into dataframes.

        Parameters
        ----------
        table_column_dict : dict
            Dictionary mapping table names to column names to be read.
        hints : dict, optional
            Additional configurations. The supported hints are:
            - 'format' (str): the export file format. Default is 'parquetdir'.
            - 'path' (str): the export file path. Default is in the same
                directory as the report file.
            - 'overwrite' (bool): whether to fresh export even though the
                existing file is valid. Default is False.
            - 'check_deprecation' (bool): whether to check if report file is
                deprecated for recipes. Default is True.
            - 'export_args' (list): a list of arguments to be passed when
                calling `nsys export`.

        Returns
        -------
        result : dict or None
            Dictionary containing the dataframes for each table, or None if
            there was an error reading at least one table.
        """
        if hints is None:
            hints = {}

        overwrite = hints.get("overwrite", False)
        result_dict: Optional[dict[str, pd.DataFrame]] = None
        missing_dict: Optional[Mapping[str, Optional[list[str]]]] = None

        if overwrite:
            result_dict = {}
            missing_dict = table_column_dict

        else:
            result_dict, missing_dict = self._read_tables_and_report_missing(
                table_column_dict, False, hints
            )

        if missing_dict:
            missing_tables = list(missing_dict.keys())

            if not self.export(missing_tables, hints):
                return None

            remaining_dict, _ = self._read_tables_and_report_missing(
                missing_dict, True, hints
            )

            if remaining_dict is None:
                return None

            assert result_dict is not None
            result_dict.update(remaining_dict)

        if not self._check_deprecation_for_recipes(result_dict, hints):
            return None

        return result_dict

    def read_table(
        self,
        table: str,
        columns: Optional[list[str]] = None,
        hints: Optional[dict[str, Any]] = None,
    ) -> Optional[pd.DataFrame]:
        """Read a single table into a dataframe.

        Parameters
        ----------
        table : str
            Name of the table to read.
        columns : list of str, optional
            List of columns to read. If not given, all columns will be read.
        hints : dict, optional
            Additional configurations. The supported hints are:
            - 'format' (str): the export file format. Default is 'parquetdir'.
            - 'path' (str): the export file path. Default is in the same
                directory as the report file.
            - 'overwrite' (bool): whether to fresh export even though the
                existing file is valid. Default is False.
            - 'check_deprecation' (bool): whether to check if report file is
                deprecated for recipes. Default is True.
            - 'export_args' (list): a list of arguments to be passed when
                calling `nsys export`.

        Returns
        -------
        result : dataframe or None
            Dataframe containing the table, or None if there was an error.
        """
        df_dict = self.read_tables({table: columns}, hints)
        if df_dict is None:
            return None

        return df_dict.get(table)

    def read_sql_query(
        self,
        query: str,
        tables: Optional[Union[list[str], str]] = None,
        hints: Optional[dict[str, Any]] = None,
    ) -> Optional[pd.DataFrame]:
        """Read the SQL query into a dataframe.

        Parameters
        ----------
        query : str
            SQL query to execute.
        tables : list of str or str, optional
            If specified, the function will export the tables before executing
            the query and check whether the table names are valid. If no
            tables are provided, all tables will be exported, and no checks
            will be made before executing the query.
        hints : dict, optional
            Additional configurations. The supported hints are:
            - 'format' (str): the export file format. Default is 'sqlite'.
            - 'path' (str): the export file path. Default is in the same
                directory as the report file.
            - 'overwrite' (bool): whether to fresh export even though the
                existing file is valid. Default is False.
            - 'export_args' (list): a list of arguments to be passed when
                calling `nsys export`.

        Returns
        -------
        result : dataframe or None
            Result of the SQL query, or None if there was an error.
        """
        if hints is None:
            hints = {"format": "sqlite"}
        elif hints.setdefault("format", "sqlite") != "sqlite":
            raise NotImplementedError("Invalid format type.")

        if isinstance(tables, str):
            tables = [tables]

        df = self._read_sql_query(query, tables, False, hints)
        if df is not None:
            return df

        if not self.export(tables, hints):
            return None

        return self._read_sql_query(query, tables, True, hints)

    def list_tables(self, hints: Optional[dict[str, Any]] = None) -> list[str]:
        """List the available tables in the report file."""
        if hints is None:
            hints = {}

        service = self._service_factory.get_service(hints.get("format", DEFAULT_FORMAT))
        export_path = self.get_export_path(service, hints.get("path"))

        return service.list_tables(export_path)
