#!/usr/bin/env python
#
# ddrdensho255fix
#
#

"""
ddr-densho-255

PROBLEM: files imported with inconsistent roles and extensions.
example: ddr-densho-255-1-Master-43199c8853.JPG

for entity in entities:
    load entity json
    for file in entity_json['files']:
        load file json
        git mv path_rel
        git mv access_rel
        confirm moved path
        confirm moved access
        change file_json['path_rel']
        change file_json['access_rel']
        write file_json
        git mv file_json
        confirm moved file_json
        update entity['files']file['path_rel']
    write entity_json        
    git status, confirm changed and moved files
    if ok, commit
Do one entity at a time
one commit per entity
"""

import argparse
from datetime import datetime
import json
import logging
import os

import envoy


def dtfmt(dt):
    """Consistent date format.
    """
    return dt.strftime('%Y-%m-%dT%H:%M:%S.%f')

def logprint(filename, msg):
    """Print to log file and console, with timestamp.
    """
    msg = '%s - %s\n' % (dtfmt(datetime.now()), msg)
    with open(filename, 'a') as f:
        f.writelines([msg])
    print(msg.strip('\n'))

def logprint_nots(filename, msg):
    """Print to log file and console, no timestamp.
    """
    msg = '%s\n' % msg
    with open(filename, 'a') as f:
        f.writelines([msg])
    print(msg.strip('\n'))

def find_files(entity_files_dir):
    """Looks for file JSONs in the entity files dir.
    Just does stupid matching, not real regex or anything.
    """
    paths = []
    for f in os.listdir(entity_files_dir):
        if '.json' in f:
            paths.append(f)
    return paths


def fix_filename( path ):
    """
    Right now this just makes filename lowercase
    """
    dirname = os.path.dirname(path)
    basename = os.path.basename(path)
    fixed = os.path.join(dirname, basename.lower())
    return fixed

def write_json(data, path):
    """Write JSON using consistent formatting and sorting.
    
    For versioning and history to be useful we need data fields to be written
    in a format that is easy to edit by hand and in which values can be compared
    from one commit to the next.  This function prints JSON with nice spacing
    and indentation and with sorted keys, so fields will be in the same relative
    position across commits.
    
    >>> data = {'a':1, 'b':2}
    >>> path = '/tmp/ddrlocal.models.write_json.json'
    >>> write_json(data, path)
    >>> with open(path, 'r') as f:
    ...     print(f.readlines())
    ...
    ['{\n', '    "a": 1,\n', '    "b": 2\n', '}']
    """
    json_pretty = json.dumps(data, indent=4, separators=(',', ': '), sort_keys=True)
    with open(path, 'w') as f:
        f.write(json_pretty)


def exists( path_abs ):
    return os.path.exists(path_abs) or os.path.islink(path_abs)

def git_mv( src_rel, dest_rel, LOG ):
    cmd = 'git mv %s %s' % (src_rel, dest_rel)
    print(cmd)
    r = envoy.run(cmd)
    logprint(LOG, 'git_mv: %s %s' % (r.status_code, r.std_out))

def confirm_moved( src_rel, dest_rel ):
    if exists(dest_rel) and not exists(src_rel):
       return True
    return False

def confirm_file_metadata( json_abs_fixed, file_rel_fixed, accs_rel_fixed ):
    file_confirmed = False
    accs_confirmed = False
    with open(json_abs_fixed, 'r') as f:
        data = json.loads(f.read())
    for field in data:
        if (field.keys()[0] == 'path_rel')   and (field['path_rel'] == file_rel_fixed): file_confirmed = True
        if (field.keys()[0] == 'access_rel') and (field['access_rel'] == accs_rel_fixed): accs_confirmed = True
    if file_confirmed and accs_confirmed:
        return True
    return False

def git_add( rel, LOG ):
    cmd = 'git add %s' % rel
    logprint(LOG, 'git_add: %s' % cmd)
    r = envoy.run(cmd)
    logprint(LOG, 'git_add: %s %s' % (r.status_code, r.std_out))


def update_files( collection_abs, entity_files_rel, path_rel, LOG ):
    # collect variables
    
    # base filenames
    file_base = path_rel
    stub = os.path.splitext(file_base)[0]
    accs_base = '%s-a.jpg' % stub
    json_base = '%s.json' % stub
    # paths relative to repo root (git uses these)
    file_rel = os.path.join(entity_files_rel, file_base)
    accs_rel = os.path.join(entity_files_rel, accs_base)
    json_rel = os.path.join(entity_files_rel, json_base)
    # absolute paths
    file_abs = os.path.join(collection_abs, file_rel)
    accs_abs = os.path.join(collection_abs, accs_rel)
    json_abs = os.path.join(collection_abs, json_rel)
    # matches actual files in filesystem
    # or git-annex links to files that may or may not actually be there
    file_exists = exists(file_abs)
    accs_exists = exists(accs_abs)
    json_exists = exists(json_abs)
    # relative and absolute paths to fixed filenames
    file_base_fixed = fix_filename(file_base)
    accs_base_fixed = fix_filename(accs_base)
    file_rel_fixed = fix_filename(file_rel)
    accs_rel_fixed = fix_filename(accs_rel)
    json_rel_fixed = fix_filename(json_rel)
    file_abs_fixed = os.path.join(collection_abs, file_rel_fixed)
    accs_abs_fixed = os.path.join(collection_abs, accs_rel_fixed)
    json_abs_fixed = os.path.join(collection_abs, json_rel_fixed)
    
    logprint(LOG, 'update: %s %s %s %s %s %s' % (file_rel, json_rel, accs_rel, file_exists, json_exists, accs_exists))
    
    # move the files
    git_mv(file_rel, file_rel_fixed, LOG)
    git_mv(accs_rel, accs_rel_fixed, LOG)
    git_mv(json_rel, json_rel_fixed, LOG)
    
    # confirm files were moved
    file_moved = confirm_moved(file_rel, file_rel_fixed)
    accs_moved = confirm_moved(accs_rel, accs_rel_fixed)
    json_moved = confirm_moved(json_rel, json_rel_fixed)

    def write_file_metadata( json_abs_fixed, file_base_fixed, accs_base_fixed ):
        with open(json_abs_fixed, 'r') as f2:
            filedata = json.loads(f2.read())
        for field in filedata:
            if field.keys()[0] == 'path_rel':   field['path_rel']   = file_base_fixed
            if field.keys()[0] == 'access_rel': field['access_rel'] = accs_base_fixed
        write_json(filedata, json_abs_fixed)
    
    # update file metadata and write
    json_updated = False
    if file_moved and accs_moved and json_moved:
        write_file_metadata(json_abs_fixed, file_base_fixed, accs_base_fixed)
        json_updated = True
    
    # confirm that metadata matches new filenames
    json_confirmed = confirm_file_metadata(json_abs_fixed, file_base_fixed, accs_base_fixed)
    if json_confirmed:
        git_add(json_rel_fixed, LOG)
        
    status = {
        'path_rel':file_base_fixed,
        'file_moved':file_moved,
        'accs_moved':accs_moved,
        'json_moved':json_moved,
        'json_updated':json_updated,
        'json_confirmed':json_confirmed
        }
    return status

def update_entity( entity_json_abs, path_rel_orig, path_rel_fixed, LOG ):
    logprint(LOG, 'update_entity: %s' % entity_json_abs)
    with open(entity_json_abs, 'r') as fr:
        data = json.loads(fr.read())
    for field in data:
        if field.keys()[0] == 'files':
            for file in field['files']:
                if file.get('path_rel',None) and (file['path_rel'] == path_rel_orig):
                    file['path_rel'] = path_rel_fixed
    write_json(data, entity_json_abs)

def confirm_entity_metadata( entity_json_abs, path_rel_fixed, LOG ):
    print('looking for "%s"' % path_rel_fixed)
    with open(entity_json_abs, 'r') as fr:
        lines = fr.readlines()
    for line in lines:
        if path_rel_fixed in line:
            return True
    return False


def main():

    parser = argparse.ArgumentParser(description='no description here', epilog='',
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-c', '--collection', required=True, help='Absolute path to source collection repository.')
    parser.add_argument('-d', '--dryrun', action='store_true', help="Don't change anything, just list files that would be changed.")
    parser.add_argument('-e', '--entityid', required=True, help="Entity ID (ex. ddr-densho-255-1).")
    
    args = parser.parse_args()
    collection_abs = os.path.realpath(args.collection)
    collection_id = os.path.basename(collection_abs)
    entity_id = args.entityid
    entity_rel = os.path.join('files', entity_id)
    entity_abs = os.path.join(collection_abs, entity_rel)
    entity_json_rel = os.path.join(entity_rel, 'entity.json')
    entity_json_abs = os.path.join(collection_abs, entity_json_rel)
    entity_files_rel = os.path.join(entity_rel, 'files')
    entity_files_abs = os.path.join(collection_abs, entity_files_rel)
    
    LOG = os.path.join('/tmp', 'ddrdensho255fix-%s.log' % entity_id)
    
    started = datetime.now()
    if args.dryrun:
        logprint(LOG, 'DRY RUN (no files will be changed)')
    paths = find_files(entity_files_abs)
    num_files = len(paths)
    if args.dryrun:
        logprint(LOG, '%s files.' % num_files)
        for path in paths:
            logprint(LOG, path)
    else:
        logprint(LOG, '%s files.' % num_files)
        
        # go into the collection repo directory
        os.chdir(collection_abs)
        
        # load entity json
        entity = None
        entity_files = []
        with open(entity_json_abs, 'r') as f:
            entity = json.loads(f.read())
        
        # update the actual files and file metadata
        entity_files = None
        for field in entity:
            if field.keys()[0] == 'files':
                entity_files = field['files']
        for n,entity_file in enumerate(entity_files):
            logprint(LOG, '%s/%s --------------------------------------------------------------' % (n,num_files))
            
            # move files and update file metadata
            file_status = update_files(collection_abs, entity_files_rel, entity_file['path_rel'], LOG)
            logprint(LOG, file_status)
            
            # update entity metadata
            entity_confirmed = None
            if file_status.get('path_rel',None) and file_status.get('json_confirmed',None):
                update_entity(
                    entity_json_abs,
                    entity_file['path_rel'], # original filename
                    file_status['path_rel'], # new filename
                    LOG
                )
                entity_confirmed = confirm_entity_metadata( entity_json_abs, file_status['path_rel'], LOG )
            logprint(LOG, 'entity_confirmed: %s' % entity_confirmed)
            if entity_confirmed:
                git_add(entity_json_abs, LOG)

    finished = datetime.now()
    elapsed = finished - started
    logprint(LOG, 'DONE!')
    logprint_nots(LOG, '%s elapsed' % elapsed)
    print('Wrote log to %s' % LOG)
    print('')

if __name__ == '__main__':
    main()
