#!/usr/bin/env python

#
# ddr-export
#

description = """Exports a DDR collection's entities or files to CSV."""

epilog = """
If CSV is not specified a filename will be generated in the form
COLLECTION-MODEL.csv and written to /tmp/.  For example:
    /tmp/ddr-test-123-entity.csv

Sample ID formats:
    ddr-test-123-*            All entities in a collection
    ddr-test-123-1-*          All files in an entity
    ddr-test-123-*            All files in a collection
    ddr-test-123-[1-5,7-8,10] Ranges of entities

Examples:

    $ ddr-export entity /PATH/TO/ddr/ddr-testing-123 /tmp/ddr-test-123-entity.csv
    $ ddr-export file /PATH/TO/ddr/ddr-testing-123 /tmp/just-some-files-YYYYMMDD.csv

You can also print out blank CSV files with all fields:

    $ ddr-export -b file ...

And blank with only required fields:

    $ ddr-export -br entity ...

Please see "ddr-import --help" for information on importing CSV files.
---"""


import argparse
from datetime import datetime
import logging
import os
import re
import sys

from DDR import batch
from DDR import identifier
from DDR import util

logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s %(levelname)-8s %(message)s',
    stream=sys.stdout,
)


def read_id_file(path):
    """Read file and return list of IDs
    
    @param path: str Absolute path to file.
    @returns: list of IDs
    """
    with open(path, 'r') as f:
        text = f.read()
    ids = [line.strip() for line in text.strip().split('\n')]
    return ids

def make_paths(collection_path, ids):
    """
    """
    basedir = os.path.dirname(collection_path)
    paths = [
        identifier.Identifier(object_id, basedir).path_abs('json')
        for object_id in ids
    ]
    return paths

def make_path(destdir, collection_path, model):
    """Assemble path for CSV file.
    
    @param destdir: str Absolute path to destination dir
    @param collection_path: str Absolute path to collection repository
    @param model: str One of ['collection', 'entity', 'file']
    """
    filename = '%s-%s-%s.csv' % (
        identifier.Identifier(collection_path).id,
        model,
        datetime.now().strftime('%Y%m%d%H%M')
    )
    return os.path.join(destdir, filename)

def filter_paths(paths, pattern, exclude=False):
    """Get metadata paths containing a regex.
    
    @param paths: list
    @param pattern: str A regular expression
    @param exclude: boolean If true, exclude paths matching pattern.
    @returns: list of absolute paths
    """
    prog = re.compile(pattern)
    if exclude:
        return [path for path in paths if not prog.search(path)]
    else:
        return [path for path in paths if prog.search(path)]

def all_paths(collection_path, model):
    """Get all .json paths for specified model.
    
    @param collection_path: str Absolute path to collection repo
    @param model: str One of ['collection', 'entity', 'file']
    """
    return util.find_meta_files(
        basedir=collection_path, model=model, recursive=1, force_read=1
    )


def main():

    parser = argparse.ArgumentParser(description=description, epilog=epilog,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-b', '--blank', action='store_true', help='Blank CSV, just headers.')
    parser.add_argument('-r', '--required', action='store_true', help='Required fields only.')
    parser.add_argument('-I', '--idfile', help='File containing list of IDs, one per line.')
    parser.add_argument('-i', '--include', help='ID(s) to include (see help for formatting).')
    parser.add_argument('-e', '--exclude', help='ID(s) to exclude (see help for formatting).')
    parser.add_argument('-d', '--dryrun', action='store_true', help="Print paths but don't export anything.")
    parser.add_argument('model', help="Model: 'entity' or 'file'.")
    parser.add_argument('collection', help='Absolute path to Collection.')
    parser.add_argument('destination', help='Absolute path to destination directory or file.')
    args = parser.parse_args()
    
    # ensure we have absolute paths (CWD+relpath)
    collection_path = os.path.abspath(args.collection)
    destination_path = os.path.abspath(args.destination)
    
    if args.idfile and not os.path.exists(args.idfile):
        raise Exception('IDs file does not exist: %s' % args.idfile)
    elif not os.path.exists(collection_path):
        raise Exception('Collection does not exist: %s' % collection_path)
    elif args.model not in identifier.MODELS:
        raise Exception("Bad model name: '%s'" % args.model)

    if os.path.basename(destination_path):
        filename = destination_path
    elif os.path.isdir(destination_path):
        filename = make_path(destination_path, collection_path, args.model)
    logging.info('Writing to %s' % filename)
    if not os.access(os.path.dirname(filename), os.W_OK):
        raise Exception('Cannot write to %s.' % filename)
    
    start = datetime.now()
    
    paths = []
    if args.blank:
        logging.info('Blank: no paths')
    elif args.idfile:
        logging.info('Looking for paths in %s' % args.idfile)
        paths = make_paths(collection_path, read_id_file(args.idfile))
    elif args.include:
        logging.info('Including paths: "%s"' % args.include)
        paths = filter_paths(all_paths(collection_path, args.model), args.include)
    else:
        logging.info('All paths in %s' % collection_path)
        paths = all_paths(collection_path, args.model)
    logging.info('found %s paths' % len(paths))
    
    if args.exclude:
        logging.info('Excluding paths: "%s"' % args.exclude)
        before = len(paths)
        paths = filter_paths(paths, args.exclude, exclude=True)
        num_excluded = len(paths) - before
        logging.info('excluded %s' % abs(num_excluded))
    
    if not paths and not (args.blank):
        raise Exception('ERROR: Could not find metadata paths.')
    logging.info('Exporting %s paths' % len(paths))

    if args.dryrun:
        logging.info("Dry run -- no output!")
        for n,path in enumerate(paths):
            logging.info('%s/%s %s' % (n+1, len(paths), path))
    else:
        batch.Exporter.export(paths, args.model, filename, required_only=args.required)
    
    finish = datetime.now()
    elapsed = finish - start
    if args.dryrun:
        logging.info('DONE - (%s elapsed) (DRY RUN)' % (elapsed))
    else:
        logging.info('DONE - (%s elapsed) - %s' % (elapsed, filename))
    
    

if __name__ == '__main__':
    main()
