#!/usr/bin/python3

# SPDX-License-Identifier: MPL-2.0
# SPDX-FileCopyrightText: 2020-2025 Collabora Ltd.
# SPDX-FileCopyrightText: 2020-2025 Walter Lozano <walter.lozano@collabora.com>
# SPDX-FileCopyrightText: 2021 Emanuele Aina <emanuele.aina@collabora.com>
# SPDX-FileCopyrightText: 2022 Vignesh Raman <vignesh.raman@collabora.com>
# SPDX-FileCopyrightText: 2022 Andre Moreira Magalhaes <andre.magalhaes@collabora.com>
# SPDX-FileCopyrightText: 2022-2024 Ryan Gonzalez <ryan.gonzalez@collabora.com>
# SPDX-FileCopyrightText: 2022-2025 Dylan Aïssi <dylan.aissi@collabora.com>
# SPDX-FileCopyrightText: 2022 Detlev Casanova <detlev.casanova@collabora.com>
# SPDX-FileCopyrightText: 2024 Andrej Shadura <andrew.shadura@collabora.co.uk>

import argparse
import gzip
import json
import os
import sys
from os.path import isdir, isfile, join

DEFAULT_METADATA_DIR = "/usr/share/doc"
DEFAULT_DPKG_STATUS = "/var/lib/dpkg/status"

VERBOSE_IMAGE = 0
VERBOSE_PACKAGE = 1
VERBOSE_INSTALLED_FILE = 2
VERBOSE_SOURCE = 3

COPYRIGHT_LENGTH = 500
COPYRIGHT_CONTAINS_NONASCII_CHARS = "CopyrightContainsNonAsciiCharacters"

NO_INFO_FOUND = "NoInfoFound"


def open_potentially_gzipped(path, *args, **kw):
    if str(path).endswith(".gz"):
        return gzip.open(path, *args, **kw)
    else:
        return open(path, *args, **kw)


class BomGenerator:
    def __init__(self, metadata_dir, dpkg_status, verbose):
        self.metadata_dir = metadata_dir
        self.dpkg_status = dpkg_status
        self.verbose = verbose

    def get_installed_packages(self):
        installed_packages = set()
        with open(self.dpkg_status) as dpkg_status:
            for line in dpkg_status.readlines():
                if line.startswith("Package:"):
                    installed_packages.add(line.split(" ")[1].strip())

        return installed_packages

    def scan_metadata(self):
        image_licenses = set()
        image_copyright = set()
        packages = []
        processed_packages = set()
        installed_packages = self.get_installed_packages()
        for d in os.listdir(self.metadata_dir):
            dirpath = join(self.metadata_dir, d)
            if not isdir(dirpath):
                continue
            filenames = os.listdir(dirpath)
            if len(filenames) == 0:
                continue
            for f in filenames:
                if not isfile(join(dirpath, f)) or f.find("_metadata_") == -1:
                    continue

                fparts = f.split("_")
                package_name = fparts[0]

                f = join(dirpath, f)
                metadata = {}
                with open_potentially_gzipped(f) as fm:
                    metadata = json.load(fm)
                package = {}
                package["name"] = package_name
                package["license"] = metadata.get("license", [NO_INFO_FOUND])
                package["copyright"] = metadata.get("copyright", [NO_INFO_FOUND])

                processed_packages.add(package_name)
                if self.verbose > VERBOSE_IMAGE:
                    packages.append(package)

                image_licenses.update(package["license"])
                image_copyright.update(package["copyright"])

        missing_packages = installed_packages.difference(processed_packages)
        missing_packages = list(missing_packages)
        missing_packages.sort()
        if len(missing_packages):
            print(
                "WARNING: there are packages without license information",
                file=sys.stderr,
            )
            if self.verbose > VERBOSE_IMAGE:
                for p in missing_packages:
                    package = {}
                    package["name"] = p
                    package["license"] = [NO_INFO_FOUND]
                    package["copyright"] = [NO_INFO_FOUND]

            image_licenses.update([NO_INFO_FOUND])
            image_copyright.update([NO_INFO_FOUND])

        bom = {"license": list(image_licenses), "copyright": list(image_copyright)}
        if self.verbose > VERBOSE_IMAGE:
            bom["packages"] = packages

        return bom


def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-d",
        "--dir",
        default=DEFAULT_METADATA_DIR,
        help="directory to search for information",
    )
    parser.add_argument("-o", "--out", help="output file")
    parser.add_argument(
        "-s", "--dpkg-status", default=DEFAULT_DPKG_STATUS, help="dpkg status file"
    )
    parser.add_argument(
        "-v",
        "--verbose",
        type=int,
        default=VERBOSE_IMAGE,
        help="verbose use in output 0: image, 1: package, 2: binary, 3: source",
    )

    args = parser.parse_args()

    bom_generator = BomGenerator(args.dir, args.dpkg_status, args.verbose)

    bom = bom_generator.scan_metadata()

    if args.out:
        with open(args.out, "w+") as output:
            json.dump(bom, output)
    else:
        print(json.dumps(bom))


if __name__ == "__main__":
    main(sys.argv[1:])
