import json
import re
import logging
from datetime import datetime, time
from typing import Any, Optional

import boto3
import pytz
from typeguard import typechecked

from psi_elt import get_logger

EXTRACTOR_ELT_JOB_TYPE = "extractor"
LOADER_ELT_JOB_TYPE = "loader"
TRANSFORMER_ELT_JOB_TYPE = "transformer"
ELT_JOB_TYPES = [EXTRACTOR_ELT_JOB_TYPE, LOADER_ELT_JOB_TYPE, TRANSFORMER_ELT_JOB_TYPE]


@typechecked
class AbstractELTJob:
    def __init__(
        self,
        name: str,
        elt_job_type: str,
        capture_time: Optional[datetime] = None,
        aws_session: Optional[Any] = None,
        logger: Optional[logging.Logger] = None,
    ):
        assert name, "A name is required to initialize an AbstractELTJob"
        assert elt_job_type in ELT_JOB_TYPES, f"Invalid ELT job type: {elt_job_type}"
        self.__name = name
        self.__elt_job_type = elt_job_type
        self.__aws_session = aws_session or boto3.Session()
        self.__logger = logger or get_logger()

        # Default capture time of 6am UTC
        if capture_time is None:
            self.logger.warning("No capture time provided, using now as the capture time")
            self.__capture_time = datetime.now().astimezone(pytz.utc)
        elif not capture_time.tzinfo:
            self.__capture_time = capture_time.replace(tzinfo=pytz.utc)
        else:
            self.__capture_time = capture_time.astimezone(pytz.utc)
        self.__default_capture_time = datetime.combine(
            self.__capture_time.date(), time(hour=6, tzinfo=pytz.utc)
        )
        self.__default_capture_time_ms = self.dt_to_ms(self.__default_capture_time)
        self.logger.info(f"Initialized ELT job: {self}")

    @property
    def name(self) -> str:
        return self.__name

    @property
    def elt_job_type(self) -> str:
        return self.__elt_job_type

    @property
    def capture_time(self) -> datetime:
        return self.__capture_time

    @property
    def default_capture_time(self) -> datetime:
        return self.__default_capture_time

    @property
    def default_capture_time_ms(self) -> int:
        return self.__default_capture_time_ms

    @property
    def aws_session(self) -> Any:
        return self.__aws_session

    @property
    def logger(self) -> logging.Logger:
        return self.__logger

    def get_s3_object(self, s3_uri: str) -> Any:
        assert s3_uri

        s3_path = s3_uri.split("s3://")[-1]
        bucket = s3_path.split("/", 1)[0]
        key = s3_path.split("/", 1)[-1]

        try:
            client = self.aws_session.client("s3")
            response = client.get_object(Bucket=bucket, Key=key)
            return response["Body"].read().decode("utf-8")
        except Exception as e:
            self.logger.error(f"Failed to get s3 object {s3_path} with exception: {e}")
            raise e

    def get_parameter(self, parameter: str) -> str:
        assert parameter
        try:
            client = self.aws_session.client("ssm")
            response = client.get_parameter(Name=parameter, WithDecryption=True)
            value = response.get("Parameter", {}).get("Value")
            if value is None:
                raise Exception("Parameter value not found in response")
            return value
        except Exception as e:
            self.logger.error(f"Failed to get parameter {parameter} with exception: {e}")
            raise e

    def format_date(self, dt: datetime) -> str:
        """Formats the date as a string. If the provided datetime is naive (no timezone), this
        assumes UTC and returns a string like "2021-05-26T01:02:03Z". If the provided datetime has
        a non-UTC timezone, returns a string like "2021-05-26T01:02:03-05:00".
        """
        if dt.tzinfo and dt.tzinfo != pytz.UTC:
            return dt.isoformat(timespec="seconds")
        return dt.strftime("%Y-%m-%dT%H:%M:%SZ")

    def parse_date(self, dt: str) -> datetime:
        """Parses the given string into a datetime. By default, expects UTC offset information in
        the given string (e.g. "2021-05-26T01:02:03-05:00") and assigns the appropriate time zone.
        If dt is a string like "2021-05-26T01:02:03Z", assumes the timezone is UTC.
        """
        try:
            if dt.endswith("Z"):
                parsed = datetime.strptime(dt, "%Y-%m-%dT%H:%M:%SZ")
                return pytz.timezone("UTC").localize(parsed)
            return datetime.strptime("".join(dt.rsplit(":", 1)), "%Y-%m-%dT%H:%M:%S%z")
        except Exception as e:
            raise ValueError(
                'Datetime must be a string like "2021-05-26T01:02:03Z" for UTC times or "2021-05-26T01:02:03-05:00" for non-UTC times'
            )

    def is_tappid_compliant(self, t_appid: str) -> bool:
        return bool(re.match("^SVC[0-9]{5}$", t_appid))

    def dt_to_ms(self, dt: datetime) -> int:
        if dt:
            return int(round(dt.timestamp() * 1000))
        return -1

    def remove_none_from_dict(self, d: dict) -> dict:
        d = d or {}
        return {k: v for k, v in d.items() if v is not None}

    def to_dict(self) -> dict[str, Any]:
        return {
            "name": self.name,
            "elt_job_type": self.elt_job_type,
            "capture_time": str(self.capture_time),
            "default_capture_time": str(self.default_capture_time),
            "default_capture_time_ms": self.default_capture_time_ms,
        }

    def __repr__(self) -> str:
        return json.dumps(self.to_dict())
