#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Script for building and pushing the HTCondor Docker containers.


Example usage:


    Build the latest version of the feature series for the default distro:
        %(prog)s

    Build the latest daily build for all distros that support daily builds
    and push to the default registry:
        %(prog)s --distro=all --push --version=daily

    Build version 9.11.0 for EL8 and push to the OSG registry:
        %(prog)s --distro=el8 --version=9.11.0 --push --registry=osg

    Build the latest version of the 9.x series for Ubuntu 20.04:
        %(prog)s --distro=ubu20.04 --series=9.x

    Build the latest version of the current feature series for the default distro,
    and push it to the `myuser` dockerhub user.

        %(prog)s --prefix=myuser/ --push --registry=dockerhub

    Same but with a prefix (naming an image "htcondor-execute" instead of "execute")
        %(prog)s --prefix=myuser/htcondor- --push --registry=dockerhub

"""
from collections import defaultdict
import locale
import re
import subprocess
import sys
import argparse
import time
import random
from typing import Dict, List, Set, Union

# mypy: no-strict-optional


# fmt:off
SUPPORTED_DISTROS = [
         "el7",
         "el8",
    "ubu20.04",
]
SUPPORTED_DAILY_DISTROS = [
    "el7",
    "el8",
    "ubu20.04",
]
DISTRO_IMAGE = {
         "el7": "centos:7",
         "el8": "almalinux:8",
    "ubu20.04": "ubuntu:20.04",
}
# fmt:on


IMAGES = ["execute", "submit", "cm", "mini"]
DISTRO_FOR_LATEST_TAGS = "el8"


DEFAULT_REGISTRY = "docker.io"


def today() -> str:
    """Current date in YYYYMMDD format (used in tags)"""
    return time.strftime("%Y%m%d", time.localtime())


def parse_condor_version_str(condor_version_str: str) -> Dict[str, str]:
    """Extract various information from CondorVersion
    (as returned by get_condor_version_and_condor_platform()).

    >>> parse_condor_version_str('9.9.0 2022-05-09 BuildID: 586544 PackageID: 9.9.0-0.586544')
    {'version': '9.9.0', 'date': '2022-05-09', 'build_id': '586544', 'package_id': '9.9.0-0.586544'}

    """
    match = re.match(
        r"""
                     (?P<version>[0-9]+[.][0-9]+[.][0-9]+) \s+
                     (?P<date>\d+-\d+-\d+) \s+
                     BuildID:\s+(?P<build_id>\S+) \s+
                     PackageID:\s+(?P<package_id>[^\n]+)
                     $""",
        condor_version_str,
        re.X,
    )
    if match:
        parsed = dict(match.groupdict())
        return parsed
    else:
        """Extract various information from CondorVersion (old_format)
        (as returned by get_condor_version_and_condor_platform()).
        The date is converted to YYYY-MM-DD format.

        >>> parse_condor_version_str('8.9.8 Jun 30 2020 BuildID: 508520 PackageID: 8.9.8-0.508520')
        {'version': '8.9.8', 'date': '2020-06-30', 'build_id': '508520', 'package_id': '8.9.8-0.508520'}

        """
        match = re.match(
            r"""
                         (?P<version>[0-9]+[.][0-9]+[.][0-9]+) \s+
                         (?P<date>\w+\s+\d+\s+\d+) \s+
                         BuildID:\s+(?P<build_id>\S+) \s+
                         PackageID:\s+(?P<package_id>[^\n]+)
                         $""",
            condor_version_str,
            re.X,
        )
        if match:
            parsed = dict(match.groupdict())
            parsed["date"] = time.strftime(
                "%Y-%m-%d", time.strptime(parsed["date"], "%b %d %Y")
            )
            return parsed
        else:
            raise RuntimeError("Can't parse CondorVersion: %s" % condor_version_str)


def series_from_version(version: str) -> str:
    """Return the series string from the given version string.

    "9.0.*" turns into "9.0";
    "9.1.*", "9.2.*", "9.3.*" etc turn into "9.x".

    """
    version_match = re.match(r"([0-9]+)[.]([0-9]+)[.][0-9]+", version)
    if not version_match:
        raise ValueError(f"{version} cannot be parsed as an HTCondor version")
    major, minor = version_match.groups()

    if minor == "0":
        return f"{major}.{minor}"
    else:
        return f"{major}.x"


class KnownVersions:
    """Contains information about known HTCondor versions and series, as extracted from Git tags.
    
    Versions older than 9.0.0 are ignored.
    
    """

    def __init__(self):

        ret = subprocess.run(
            ["git", "tag", "--list", "[vV]*[._]*[._]*"],
            stdin=subprocess.DEVNULL,
            stdout=subprocess.PIPE,
            check=True,
        )
        version_regex = re.compile(r"^[vV]([0-9]+)[._]([0-9]+)[._]([0-9]+)$")

        versions_set = set()
        feature_series_set = set()
        lts_series_set = set()
        versions_by_series = defaultdict(set)
        
        for line in ret.stdout.split(b"\n"):
            match = version_regex.match(line.decode("latin-1"))
            if match:
                version = ".".join(match.group(1,2,3))
                major, minor = [int(x) for x in match.group(1,2)]
                if major < 9:  # skip pre-9.0 versions, where the stable/devel series were numbered differently
                    continue
                versions_set.add(version)
                if minor == 0:
                    ser = f"{major}.0"
                    lts_series_set.add(ser)
                    versions_by_series[ser].add(version)
                else:
                    ser = f"{major}.x"
                    feature_series_set.add(ser)
                    versions_by_series[ser].add(version)

        assert versions_set, "We should have found some versions or errored out"

        def version_sorted(iterable):
            return sorted(iterable, key=self._version_key)

        # fmt:off
        self.version_lists_by_series = {ser: version_sorted(vers) for ser, vers in versions_by_series.items()}
        self.version_list            = version_sorted(versions_set)
        self.feature_series_list     = version_sorted(feature_series_set)
        self.lts_series_list         = version_sorted(lts_series_set)
        self.series_list             = version_sorted(feature_series_set.union(lts_series_set))
        # fmt:on

    @property
    def latest_version(self) -> str:
        """Return the latest version of any series.
        Usually the same as latest_feature_version except at the start of a new LTS series
        where we may not have a feature release yet.
        
        """
        return self.version_list[-1]

    @property
    def latest_series(self) -> str:
        """Return the latest series.  Usually the same as latest_feature_series
        except at the start of a new LTS series where we may not have a feature release yet.
        
        """
        return self.series_list[-1]

    @property
    def latest_lts_series(self) -> str:
        """Return the latest LTS series as a string."""
        return self.lts_series_list[-1]
    
    @property
    def latest_feature_series(self) -> str:
        """Return the latest feature series as a string."""
        return self.feature_series_list[-1]

    @property
    def latest_lts_version(self) -> str:
        """Return the latest LTS version as a string."""
        return self.version_lists_by_series[self.latest_lts_series][-1]
        
    @property
    def latest_feature_version(self) -> str:
        """Return the latest feature version as a string."""
        return self.version_lists_by_series[self.latest_lts_series][-1]

    @staticmethod
    def _version_key(version_str: str) -> List[int]:
        """Sorting key for a version string e.g. '9.0.0'"""
        new_version_str = re.sub(r"\b[Xx]\b", "999", version_str)
        # ^^ hack: have e.g. '9.x' sort at the end of 9.whatever by turning it into '9.999'
        return [int(v) for v in new_version_str.split(".")]


class VersionInfo:
    """Contains the information that can be extracted from the output of condor_version"""

    def __init__(self, condor_version_str, condor_platform_str):
        self.condor_version_str = condor_version_str
        self.condor_platform_str = condor_platform_str
        condor_version_dict = parse_condor_version_str(condor_version_str)
        self.date = condor_version_dict["date"]
        self.version = condor_version_dict["version"]
        self.build_id = condor_version_dict["build_id"]
        self.package_id = condor_version_dict["package_id"]


class Builder:
    """Handles all image builds for a single version

    Fields:
    - executable: `docker` or `podman`

    - series: str, a series like `9.0` or `9.x`
    - want_latest_series: bool, `latest` was passed to the `series` argument of the constructor

    - version: the desired version -- can be numbers (`9.11.0`, `daily`, or `latest`)
    - want_latest_version: bool, `latest` was passed to the `version` argument of the constructor
    - want_daily: bool, `daily` was passed to the `version` argument of the constructor

    - prefix: goes before the image names;
        the image names are `base`, `execute`, `cm`, etc. so with a prefix of
        `htcondor/`, you get `htcondor/base` etc.

    series and want_latest_series are ignored if an exact version was passed.

    """

    def __init__(self, executable, series, version, prefix, one_tag: bool, known_versions: KnownVersions,
                 no_cache: bool, skip_git_tag_check: bool):
        self.executable = executable
        self.series = series
        self.prefix = prefix
        self.version = version
        self.want_latest_version = version == "latest"
        self.want_daily = version == "daily"
        self.one_tag = one_tag
        self.known_versions = known_versions
        self.no_cache = no_cache
        self.skip_git_tag_check = skip_git_tag_check

    def docker(self, *args, **kwargs):
        """Runs a single docker/podman command with subprocess.run() and returns the result.

        Accepts all keyword arguments that subprocess.run() does.

        """
        return subprocess.run([self.executable] + list(args), **kwargs)

    def docker_tag(self, old_name, *new_names):
        """Create new tags for an existing image.  Old versions of docker could
        only create one new tag at a time.

        - old_name: the old tag
        - *new_names: one or more new tags, or a tuple/list of them

        """
        if isinstance(new_names[0], (tuple, list)):
            new_names = new_names[0]
        for new_name in new_names:
            self.docker("tag", old_name, new_name)
            if self.executable == "podman":
                # Podman is silent when tagging so print something
                print(f"Tagged {old_name} as {new_name}")

    def get_condor_version_info(self, image_name: str) -> VersionInfo:
        """Get the version information out of CondorVersion and CondorPlatform from a container by running `condor_version`.
        The output of `condor_version` looks like:

            $CondorVersion: 8.9.8 Jun 30 2020 BuildID: 508520 PackageID: 8.9.8-0.508520 $
            $CondorPlatform: x86_64_Fedora32 $

        """
        ret = subprocess.run(
            [self.executable, "run", "--rm", image_name, "condor_version"],
            stdin=subprocess.DEVNULL,
            stdout=subprocess.PIPE,
            check=True,
        )
        try:
            condor_version_str = (
                re.search(
                    rb"^[$]CondorVersion:\s*([^\n]+)[$]$", ret.stdout, re.MULTILINE
                )
                .group(1)
                .rstrip()
                .decode("latin-1")
            )
            condor_platform_str = (
                re.search(
                    rb"^[$]CondorPlatform:\s*([^\n]+)[$]$", ret.stdout, re.MULTILINE
                )
                .group(1)
                .rstrip()
                .decode("latin-1")
            )
        except AttributeError:
            raise RuntimeError(
                "Couldn't find CondorVersion or CondorPlatform in condor_version output"
            )
        return VersionInfo(condor_version_str, condor_platform_str)

    def add_htcondor_labels(self, image_name: str, version_info: VersionInfo):
        """Take an existing docker image and add org.htcondor.* labels describing condor version information.

        The new image will overwrite the old one.

        - image_name: the name of the image to add labels to
        - version_info: a VersionInfo object containing the info obtained by
            running `condor_version` in the image

        """
        dockerfile = """\
FROM %s
LABEL \
org.htcondor.condor-version="%s" \
org.htcondor.condor-platform="%s" \
org.htcondor.build-date="%s" \
org.htcondor.build-id="%s" \
org.htcondor.package-id="%s" \
org.htcondor.version="%s"
""" % (
            image_name,
            version_info.condor_version_str,
            version_info.condor_platform_str,
            version_info.date,
            version_info.build_id,
            version_info.package_id,
            version_info.version,
        )
        subprocess.run(
            [self.executable, "build", "-t", image_name, "-f", "-", "."],
            input=dockerfile.encode(),
            check=True,
        )

    def tags_to_make_daily(self, distro: str, real_version: str, real_date: str) -> Set[str]:
        """The tags to make for a daily build.

        `:VERSION-DATE-DISTRO` is always listed.  The following are added if --one-tag is not specified:
        - `:MAJOR.MINOR-daily-DISTRO`
        - `:MAJOR.MINOR-DATE-DISTRO`

        VERSION and SERIES will be based on the RPMs installed in the image.
        DATE is the date the HTCondor RPMs were built.
        """
        tags = set()
        major_minor = ".".join(real_version.split(".")[0:2])
        real_date_no_dashes = real_date.replace("-", "")

        tags.add(f"{real_version}-{real_date_no_dashes}-{distro}")
        if self.one_tag:
            return tags
        tags.add(f"{major_minor}-{real_date_no_dashes}-{distro}")
        tags.add(f"{major_minor}-daily-{distro}")
        return tags

    def tags_to_make_release(self, distro: str, real_version: str) -> Set[str]:
        """The tags to make for a release (non-daily) build.

        `:VERSION-DISTRO` is always listed.  The following are added if --one-tag is not specified:
        - `:SERIES-DISTRO` (e.g. `9.x-el7`)
        - `:latest-DISTRO` or `:lts-DISTRO` if appropriate
        - `:DISTRO` as a synonym for `:latest-DISTRO` if appropriate
        - `:latest` or `:lts` for the default distro defined in `DISTRO_FOR_LATEST_TAGS`
        """
        tags = set()

        tags.add(f"{real_version}-{distro}")
        if self.one_tag:
            return tags

        latest_version_for_this_series = self.known_versions.version_lists_by_series[self.series][-1]
        if latest_version_for_this_series == real_version:
            tags.add(f"{self.series}-{distro}")
            if self.series == self.known_versions.latest_series:
                tags.add(f"latest-{distro}")
                tags.add(distro)
                if distro == DISTRO_FOR_LATEST_TAGS:
                    tags.add("latest")
            if self.series == self.known_versions.latest_lts_series:
                tags.add(f"lts-{distro}")
                if distro == DISTRO_FOR_LATEST_TAGS:
                    tags.add("lts")
        elif self.want_latest_version:
            # XXX how should we _really_ handle this?
            print(f"The installed version {real_version} does not match"
                  f" the latest Git tagged version for this series"
                  f" {latest_version_for_this_series}")
            if self.skip_git_tag_check:
                print("Not adding extra image tags due to the mismatch.")
            else:
                raise RuntimeError("Aborting due to version mismatch")

        return tags

    def build_base(self, distro: str) -> List[str]:
        """Build a base HTCondor image

        - distro: the distribution to build images for e.g. `el7`

        Returns a list of image names.

        Images are built from the files in the `base/` subdirectory.  One image
        is created but will have multiple tags.
        Assuming the default prefix of `htcondor/`, you will get
        `htcondor/base:TAG` for each TAG returned by `tags_to_make_daily()` or
        `tags_to_make_release()` for daily or non-daily builds, respectively.
        See those methods for the list.
        """
        args = [f"--build-arg=BASE_IMAGE={DISTRO_IMAGE[distro]}"]
        if self.want_latest_version:
            args.extend(
                [f"--build-arg=SERIES={self.series}", "--build-arg=VERSION=latest"]
            )
        elif self.want_daily:
            args.extend(
                [f"--build-arg=SERIES={self.series}", "--build-arg=VERSION=daily"]
            )
        else:
            args.append(f"--build-arg=VERSION={self.version}")

        # We don't know up front what the version of the installed software will be.
        # Give the image a temporary name (better than parsing `docker build` output
        # for the hash); we'll change the name once we know what's in there.
        tmp_image_name = f"{self.prefix}base:tmp{random.randint(10000,99999)}"
        args.extend(["-t", tmp_image_name, "-f", f"base/Dockerfile", "base"])
        if self.no_cache:
            args.append("--no-cache")

        self.docker("build", *args, stdin=subprocess.DEVNULL, check=True)

        try:
            # Get the real version and series by running condor_version in the image.
            version_info = self.get_condor_version_info(tmp_image_name)
            real_version = version_info.version
            real_date = version_info.date

            # sanity check: if requesting a specific version, make sure that version got installed
            if not self.want_daily and not self.want_latest_version and real_version != self.version:
                raise RuntimeError(
                    f"condor_version version {real_version} does not match requested version {self.version}"
                )

            if self.want_daily:
                real_image_names = [f"{self.prefix}base:{tag}" for tag in self.tags_to_make_daily(distro, real_version, real_date)]
            else:
                real_image_names = [f"{self.prefix}base:{tag}" for tag in self.tags_to_make_release(distro, real_version)]

            self.add_htcondor_labels(tmp_image_name, version_info)
            self.docker_tag(tmp_image_name, *real_image_names)

            return real_image_names
        finally:
            self.docker("rmi", tmp_image_name, check=False)

    def build_derived(
        self, image: str, base_image: str, new_tags: List[str], distro: str
    ):
        """Build a single derived image.

        - image: the new type of image, e.g. `execute`; must be a subdirectory
            with a Dockerfile
        - base_images: the image name to be used in the FROM line
        - new_tags: a list of tags to be applied to the built image
        - distro: the distribution, e.g. `el7`.  Currently used for getting the
            right PACKAGE_LIST for the execute image

        """
        args = [f"--build-arg=BASE_IMAGE={base_image}"]
        if image == "execute":
            args.append(f"--build-arg=PACKAGE_LIST=packagelist-{distro}.txt")

        for new_tag in new_tags:
            args.extend(["-t", new_tag])
        args.extend(["-f", f"{image}/Dockerfile", image])

        self.docker("build", *args, stdin=subprocess.DEVNULL, check=True)

    def push_image(self, image: str, registry: str):
        """Push the named image to the given registry.

        - image: the full image name with tag, e.g. `htcondor/base:9.11.0-el7`
        - registry: the host:port (port optional) of the registry, e.g. `docker.io`

        Normally done by tagging the image with the registry, pushing the
        image, and removing the new tag.

        Docker (real docker, not podman) considers the 'docker.io' registry
        special so the full sequence is not necessary (and would cause the
        original image to be deleted as well).

        """

        if self.executable == "docker" and registry == "docker.io":
            self.docker("push", image, check=True)
        else:
            image_with_registry = f"{registry}/{image}"
            self.docker("tag", image, image_with_registry, check=True)
            self.docker("push", image_with_registry, check=True)
            self.docker("rmi", image_with_registry, check=False)


def parse_arguments(argv, known_versions):
    parser = argparse.ArgumentParser(
        prog=argv[0],
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument(
        "--version",
        help="Version of HTCondor to install. "
        "Can be an exact version (e.g. '9.11.0'), 'daily', or 'latest' "
        "(default %(default)s)",
        default="latest",
    )
    parser.add_argument(
        "--distro",
        help="Distribution to build/push for (default '%(default)s')",
        choices=SUPPORTED_DISTROS + ["all"],
        default=DISTRO_FOR_LATEST_TAGS,
    )
    parser.add_argument(
        "--series",
        help="The release series for daily or latest builds (default '%(default)s'). "
        f"Either an explicit series (e.g. '9.0', '9.x'), 'latest' (which is '{known_versions.latest_series}'), "
        f"'feature' (which is '{known_versions.latest_feature_series}'), "
        f"or 'lts' (which is '{known_versions.latest_lts_series}'). "
        "Ignored if a specific version is provided with --version.",
        default="feature",
    )
    parser.add_argument(
        "--push", action="store_true", dest="push", help="Push resulting builds"
    )
    # parser.add_argument("--no-push", action="store_false", dest="push", help="Do not push resulting builds")
    parser.add_argument(
        "--registry",
        help="Registry to push to (default %(default)s). Enter HOST[:PORT] "
        "or 'dockerhub' for Docker Hub, 'chtc' for the CHTC registry, "
        "or 'osg' for the OSG registry.",
        default=DEFAULT_REGISTRY,
    )
    # parser.add_argument(
    #     "--mini", action="store_true", dest="mini", help="Build minicondor"
    # )
    parser.add_argument(
        "--no-mini", action="store_false", dest="mini", help="Do not build minicondor"
    )
    parser.add_argument(
        "--podman",
        action="store_const",
        const="podman",
        dest="executable",
        default="docker",
        help="Use podman",
    )
    parser.add_argument(
        "--no-cache",
        action="store_true",
        help="Tell docker build/podman build not to use cached layers and always start building from scratch",
    )
    parser.add_argument(
        "--base-only", action="store_true", help="Build htcondor/base only"
    )
    parser.add_argument(
        "--prefix",
        help="Prefix to use for the image names. Must include a '/' and must not "
        "include a ':'.  Default: '%(default)s'; use something like "
        "'myusername/htcondor-' to push images named 'htcondor-base', etc. "
        "to your own docker account.",
        default="htcondor/",
    )
    parser.add_argument(
        "--one-tag",
        action="store_true",
        help="Only create images with the most specific version tags (e.g. '9.11.0-el7' but not '9.x-el7' or '9.x')",
    )
    parser.add_argument(
        "--skip-git-tag-check",
        action="store_true",
        help="Don't error out if the latest version does not match the latest Git tag",
    )
    args = parser.parse_args(argv[1:])

    # Validate series, get the real series
    if args.series:
        args.series = str(args.series).lower()
    series_match = re.match(r"[0-9]+[.][0x]$", args.series)
    if series_match:
        if args.series not in known_versions.series_list:
            parser.error(f"Unknown or unsupported HTCondor series '{args.series}'; supported series are: " +
                         ", ".join(known_versions.series_list))
    elif args.series == "lts":
        args.series = known_versions.latest_lts_series
    elif args.series == "feature":
        args.series = known_versions.latest_feature_series
    elif args.series == "latest":
        args.series = known_versions.latest_series
    else:
        parser.error(
            f"Bad series '{args.series}' -- should be like '9.x', 'lts', 'feature', or 'latest'"
        )

    # Validate version and series
    if args.version:
        args.version = str(args.version).lower()
    version_match = re.match(r"([0-9]+)[.]([0-9]+)[.]([0-9]+)$", args.version)
    if version_match:
        if int(version_match.group(1)) < 9:
            parser.error(f"Bad version '{args.version}': versions before 9.0 are unsupported.")
        elif args.version not in known_versions.version_list:
            parser.error(f"Unknown or unsupported HTCondor version '{args.version}'; supported versions are: " +
                         ", ".join(known_versions.version_list))
            # get the series; overwrites --series if it's specified
            args.series = series_from_version(args.version)
    elif args.version not in ["daily", "latest"]:
        parser.error(
            f"Bad version '{args.version}' -- should be like '9.11.0', 'daily', or 'latest'"
        )

    if not re.fullmatch(r"[0-9a-z][.0-9a-z-]*/[.0-9a-z-]*", args.prefix):
        parser.error(f"Invalid prefix '{args.prefix}'")

    images = IMAGES
    if not args.mini:
        images.remove("mini")

    if args.distro == "all":
        if args.version == "daily":
            distros = SUPPORTED_DAILY_DISTROS
        else:
            distros = SUPPORTED_DISTROS
    else:
        assert args.distro in SUPPORTED_DISTROS, "Should have been caught"
        if args.version == "daily":
            if args.distro not in SUPPORTED_DAILY_DISTROS:
                raise RuntimeError("Daily builds aren't available for this distro")
        distros = [args.distro]

    if args.registry.lower() == "dockerhub":
        args.registry = "docker.io"
    elif args.registry.lower() == "osg":
        args.registry = "hub.opensciencegrid.org"

    return args, distros, images


def main(argv) -> Union[int,str]:
    locale.setlocale(locale.LC_ALL, "C")  # no parsing surprises please

    # try:
    #     known_versions = KnownVersions()
    # except (OSError, subprocess.SubprocessError, ValueError) as err:
    #     return f"Failed to get HTCondor versions from git tags: {err}"

    known_versions = KnownVersions()

    args, distros, images = parse_arguments(argv, known_versions)

    builder = Builder(
        executable=args.executable,
        series=args.series,
        version=args.version,
        prefix=args.prefix,
        one_tag=args.one_tag,
        known_versions=known_versions,
        no_cache=args.no_cache,
        skip_git_tag_check=args.skip_git_tag_check,
    )

    images_to_push = []
    for distro in distros:
        base_images = builder.build_base(distro)
        images_to_push.extend(base_images)
        if not args.base_only:
            for image in images:
                new_tags = [re.sub(r"base:", f"{image}:", x) for x in base_images]
                builder.build_derived(
                    image=image,
                    base_image=base_images[0],
                    new_tags=new_tags,
                    distro=distro,
                )
                images_to_push.extend(new_tags)

    if args.push:
        for img in images_to_push:
            builder.push_image(image=img, registry=args.registry)

    return 0


if __name__ == "__main__":
    sys.exit(main(sys.argv))
