#!/opt/ddr-local/venv/ddrlocal/bin/python
#
# This file is part of ddr-cmdln/ddr
#
#  

description = """Copies binaries from a collection into a destination directory for publication."""

epilog = """
This command copies specified types of binaries from a collection to a
destination folder.  Destination files are in a very simple hierarchy (just
files within a single directory per collection) that is suitable for use by
ddr-public.

ddr-pubcopy produces a very simple file layout:

    $BASE/$COLLECTION_ID/$FILENAME

This command uses git-annex find to generate the list of files. Thus it will
only work with files that are phsyically present in the filesystem.

IMPORTANT: You are responsible for making sure that the destination base
directory is available and writable.

IMPORTANT: This command does not check whether files are published!
Use *after* filtering a collection.

EXAMPLES

     $ ddr-pubcopy -amM /var/www/media/ddr/ddr-test-123 /media/USBHARDDRIVE

ddr-public - ddr-pubcopy"""

import argparse
from datetime import datetime
import logging
import os
import sys

import envoy

from DDR import config
from DDR import identifier


def dtfmt(dt):
    """Consistent date format.
    """
    return dt.strftime('%Y-%m-%dT%H:%M:%S.%f')

def logprint(filename, msg):
    """Print to log file and console, with timestamp.
    """
    msg = '%s - %s\n' % (dtfmt(datetime.now()), msg)
    with open(filename, 'a') as f:
        f.writelines([msg])
    print(msg.strip('\n'))

def logprint_nots(filename, msg):
    """Print to log file and console, no timestamp.
    """
    msg = '%s\n' % msg
    with open(filename, 'a') as f:
        f.writelines([msg])
    print(msg.strip('\n'))

def find_files(collection_path):
    """List files using git-annex-find.
    """
    os.chdir(collection_path)
    # git annex find
    r0 = envoy.run('git annex find')
    files = r0.std_out.split('\n')
    return files

def filter_files(files, args):
    to_copy = []
    for f in files:
        copy = False
        ftype = identifier.identify_filepath(f)
        if (ftype == 'access') and args.access: copy = True
        if (ftype == 'mezzanine') and args.mezzanine: copy = True
        if (ftype == 'transcript') and args.transcript: copy = True
        if (ftype == 'master') and args.master: copy = True
        if copy:
            to_copy.append(f)
    return to_copy

def rsync_files(to_copy, collection_path, destdir, LOG):
    for f in to_copy:
        src = os.path.join(collection_path, f)
        dest = os.path.join(destdir, f)
        cmd = 'rsync --copy-links %s %s/' % (src, destdir)
        logprint(LOG, cmd)
        r1 = envoy.run(cmd)
        if r1.status_code:
            logprint(LOG, r1.status_code)
            logprint(LOG, r1.std_out)


def main():

    parser = argparse.ArgumentParser(description=description, epilog=epilog,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('collection', help='Absolute path to source collection repository.')
    parser.add_argument('destbase', help='Absolute path to destination base directory.')
    parser.add_argument('-a', '--access', action='store_const', const=1, help='git-annex copy access files.')
    parser.add_argument('-m', '--mezzanine', action='store_const', const=1, help='git-annex copy mezzanine files.')
    parser.add_argument('-t', '--transcript', action='store_const', const=1, help='git-annex copy transcript')
    parser.add_argument('-M', '--master', action='store_const', const=1, help='git-annex copy master files.')
    args = parser.parse_args()
    
    # check args
    if args.destbase == args.collection:
        print('ddrfilter: error: Source and destination are the same!')
        sys.exit(1)
    if not (args.access or args.mezzanine or args.master):
        print('ddrfilter: error: No file type(s) selected!')
        sys.exit(1)
    
    started = datetime.now()
    LOG = os.path.join(args.destbase, 'ddr-pubcopy.log')
    
    cid = os.path.basename(args.collection)
    destdir = os.path.join(args.destbase, cid)
    logprint(LOG, 'Copying from %s' % args.collection)
    logprint(LOG, 'Copying to %s' % destdir)
    
    if args.access:    logprint(LOG, 'Copying access')
    if args.mezzanine: logprint(LOG, 'Copying mezzanine')
    if args.transcript: logprint(LOG, 'Copying transcript')
    if args.master:    logprint(LOG, 'Copying master')
    
    # if collection dir doesn't exist in destdir, mkdir
    if not os.path.exists(destdir):
        os.makedirs(destdir)
    
    files = find_files(args.collection)
    to_copy = filter_files(files, args)
    rsync_files(to_copy, args.collection, destdir, LOG)
        
    finished = datetime.now()
    elapsed = finished - started
    logprint(LOG, 'DONE!')
    logprint_nots(LOG, '%s elapsed' % elapsed)
    print('')

if __name__ == '__main__':
    main()
