#!/usr/bin/perl -w

use strict;
use warnings;
use File::Find;
use File::Path qw(remove_tree);
use Cwd qw(getcwd);
use DateTime;
use Digest::MD5 qw(md5_hex);

my $dir = $ARGV[0];
my @epubs = ();

# Helper functions: read and write a file

sub read_file {
    my $f = shift;

    local $/;
    open(my $fh, '<', $f) or die "Could not open fole $f for reading: $!";
    my $c = <$fh>;
    close($fh);
    return $c;
}

sub write_file {
    my $f = shift;
    my $content = shift;

    open(my $fh, '>', "$f") or die "Could not open fole $f for writing: $!";
    print $fh $content;
    close($fh);
}

sub digest {
    my $contents = shift;
    my $exp = uc(md5_hex($contents));
    return substr($exp, 0, 8);
}

#   Step 1: Unzip all EPUB files

sub unzip_epubs {
    my $dir = shift;

    find(\&check_epub, $dir);
}

sub unzip_epub {
    my ($path, $f) = @_;
    print "Unzipping $path...\n";
    my $d = "$f.dir";
    push @epubs, $path, $f;
    my $res = `unzip -d "$d" "$f"`;
    #print $res;
}

sub check_epub {
    my $f = $_;
    if (-f $f && $f =~ /\.epub$/ && $f ne ".build.epub") {
        unzip_epub $File::Find::name, $f;
    }
}

# Step 2: Find and sort all html, xhtml and js files

my @all_files = ();

sub collect_files {
    my $dir = shift;

    find(\&add_file, $dir);

    @all_files = sort(@all_files);
}

sub add_file {
    my $f = $_;
    if (-f $f && ($f =~ /\.x?html$/ || $f =~ /\.js$/ || $f =~ /\.build$/)) {
         push @all_files, $File::Find::name;
    }
}

# Step 3: Create the map of stable IDs to replace the random ones
# and also the list of non-html files (js) with random ids in them

sub grep_data_group_ids {
    my @files = @_;
    my @data_group_ids = ();
    my @js_files_with_group_ids = ();

    foreach my $f (@files) {
        #print "$f\n";

        my $content = read_file($f);

        foreach my $m ($content =~ /data-group-id=\\?"(\d+)-\d+\\?"/g) {
            if ($f !~ /\.x?html$/ && ! grep { $_ eq $f } @js_files_with_group_ids) {
                push @js_files_with_group_ids, $f;
            }

            if (! grep { $_ eq $m } @data_group_ids) {
                push @data_group_ids, $m;
            }
        }
    }

    return \@data_group_ids, \@js_files_with_group_ids;
}

sub create_map_for_ids {
    my @ids = @_;

    my $i = 1;
    my %id_map;
    foreach my $id (@ids) {
        $id_map{$id} = sprintf "%010d", $i;
        $i++;
    }

    return %id_map;
}

# Step 4: Replace the random IDs by stable ones

sub replace_group_ids {
    my ($rfiles, $ridmap, $rjs_files_with_group_ids) = @_;
    my @files = @$rfiles;
    my @js_files_with_group_ids = @$rjs_files_with_group_ids;
    my %idmap = %$ridmap;
    my %rename_map = ();

    foreach my $f (@files) {
        my $content = read_file($f);
        my $newcontent = $content =~ s/data-group-id=(\\?)"(\d+)-(\d+\\?)"/data-group-id=$1"$idmap{$2}-$3"/gr;

        if (grep { $_ eq $f} @js_files_with_group_ids) {
            my $old = digest($content);
            my $new = digest($newcontent);
            if ($f =~ /$old/) {
                $rename_map{$f} = $f =~ s/$old/$new/r;
            }
        }

        if ($newcontent ne $content) {
            write_file($f, $newcontent);
        }
    }

    return %rename_map;
}

# Step 5: Rename the files with digests in their names and replace links to them

sub rename_files_with_digests {
    my ($rfiles, $rrenmap) = @_;
    my @files = @$rfiles;
    my %renmap = %$rrenmap;

    my %hmap = ();
    foreach my $key (keys %renmap) {
        my $from = $key =~ s/.*\///r;
        my $to = %renmap{$key} =~ s/.*\///r;
        $hmap{$from} = $to;
        print "$from -> $to\n";
    }

    foreach my $f (@files) {
        my $content = read_file($f);
        my $newcontent = $content;

        foreach my $key (keys %hmap) {
            $newcontent = $newcontent =~ s/$key/$hmap{$key}/gr;
        }

        if ($newcontent ne $content) {
            write_file($f, $newcontent);
        }
    }

    foreach my $key (keys %renmap) {
        rename $key, $renmap{$key} or die "Could not rename $key to $renmap{$key}: $!";
    }
}

# Step 6: Drop all the <dc:identifier/> elements and fix <meta property="dcterms:modified"/>

my $datetime = "";

if (exists $ENV{SOURCE_DATE_EPOCH}) {
    my $dt = DateTime->from_epoch(epoch => $ENV{SOURCE_DATE_EPOCH}, time_zone => 'UTC');
    $datetime = $dt->strftime('%FT%TZ');
} else {
    my ($sec, $min, $hour, $mday, $mon, $year) = localtime();

    my $dt = DateTime->new(
        year   => $year + 1900,
        month  => $mon + 1,
        day    => $mday,
        hour   => $hour,
        minute => $min,
        second => $sec,
        time_zone => 'local',
    );
    $dt->set_time_zone('UTC');
    $datetime = $dt->strftime('%FT%TZ');
}

sub clean_opfs {
    my $dir = shift;

    find(\&check_opf, $dir);
}

sub clean_opf {
    my ($path, $f) = @_;

    my $content = read_file($f);

    my $newcontent1 = $content =~ s/^\s*<dc:identifier id=".*">.*<\/dc:identifier>\s*$//mr;
    my $newcontent2 = $newcontent1 =~ s/<meta\s+property="dcterms:modified">.*<\/meta>/<meta property="dcterms:modified">$datetime<\/meta>/mr;

    if ($newcontent2 ne $content) {
        write_file($f, $newcontent2);
    }
}

sub check_opf {
    my $f = $_;
    if (-f $f && $f =~ /\.opf$/) {
        clean_opf $File::Find::name, $f;
    }
}

# Step 7: Zip all the EPUB files back and remove the unzipped contents

sub zip_epubs {
    my $curdir = getcwd();

    while (scalar(@epubs) > 0) {
        my $path = shift(@epubs);
        my $f = shift(@epubs);

        my $dirpath = "$path.dir";
        print "Zipping $path...\n";
        chdir($dirpath);
        my $res = `zip -r "../$f" .`;
        chdir($curdir);
        #print $res;
        remove_tree($dirpath);
    }
}

# Actually do all the steps

print "Unzipping EPUB files...\n";
unzip_epubs($dir);

print "Collecting and sorting file names...\n";
collect_files($dir);

print "Getting the list of data group ids...\n";
my ($rgroup_ids, $rjs_files_with_group_ids) = grep_data_group_ids(@all_files);

print "Creating the map of stable ids...\n";
my %id_map = create_map_for_ids(@$rgroup_ids);

print "Replacing random ids by stable ones...\n";
my %rename_map = replace_group_ids(\@all_files, \%id_map, $rjs_files_with_group_ids);

if (%rename_map) {
    #print join(", ", %rename_map), "\n";
    print("Renaming files with digests in their names and changed contents...\n");
    rename_files_with_digests(\@all_files, \%rename_map);
}

print "Removing random dc identifiers and fixing modification dates...\n";
clean_opfs($dir);

print "Zipping EPUB files...\n";
zip_epubs();
