Source code for abcd.cli


from __future__ import print_function

import argparse
import getpass
import os
import io
import shlex
import sys
import tarfile
import time
from abcd import Direction
from ase.atoms import Atoms
from ase.db.core import convert_str_to_float_or_str
from ase.io import read as ase_read
from ase.io import write as ase_write
from .authentication import Credentials
from base64 import b64encode, b64decode
from .config import ConfigFile
from .query import translate
from random import randint
from .results import UpdateResult, InsertResult
from .structurebox import StructureBox
from .table import print_keys_table, print_rows, print_long_row

description = ''

examples = '''
    abcd --remote abcd@gc121mac1 db1.db --show   (display the database)
    abcd --remote abcd@gc121mac1 db1.db   (display information about available keys)
    abcd abcd@gc121mac1:db1.db \'energy<0.6 id>4 id<20 id!=10,11,12 elements~C elements~H,F,Cl\'   (querying; remote can be specified using a colon before the database name)
    abcd abcd@gc121mac1:db1.db --extract-original-files --path-prefix extracted/   (extract original files to the extracted/ folder)
    abcd abcd@gc121mac1:db1.db --write-to-file extr.xyz   (write selected configurations to the file extr.xyz)
    abcd db1.db \'energy>0.7\' --count   (count number of selected rows)
    abcd db1.db \'energy>0.8\' --remove   (remove selected configurations)
    abcd db1.db --store conf1.xyz conf2.xyz info.txt   (store original files in the database)
    abcd db1.db --store configs/   (store the whole directory in the database)
    abcd db1.db --keys 'user,id' --omit-keys --show  (show the database, but omit keys user and id)
    abcd db1.db --sort 'energy:A,age:D' --show  (sort by energy (ascending) and age (descending))
'''

[docs]def main(): sys_args = sys.argv[1:] if isinstance(sys_args, str): sys_args = sys_args.split(' ') config_file = ConfigFile('cli') if not config_file.exists(): defaults = {'abcd': { 'opts': '', 'backend_module': '', 'backend_name': ''}} config_file.initialise(defaults) # Load the options from the config file. Push them to the front of the list # so they will be overwritten on the command line. cfg_options = config_file.get('abcd', 'opts') # parse string as commadline options new_args = shlex.split(cfg_options) # Stick them at the front # TODO: will not work with subcommands sys_args = new_args + sys_args parser = argparse.ArgumentParser(usage = 'abcd [db-name] [selection] [options]', description = description, epilog = 'Examples: ' + examples, formatter_class=argparse.RawTextHelpFormatter) # Display usage if no arguments are supplied if len(sys_args) == 0: parser.print_usage() add = parser.add_argument add('database', nargs='?', help = 'Specify the database') add('query', nargs = '*', default = '', help = 'Query') add('-U', '--user', nargs='?', metavar='USER', default=None, const=[], help='User. Leave blank to input via stdin') add('-P', '--password', nargs='?', metavar='PASSWD', default=None, const=[], help='Password. Leave blank to input via stdin') add('-v', '--verbose', action='store_true', default=False) add('-q', '--quiet', action='store_true', default=False) add('--remote', help = 'Specify the remote') add('-l', '--list', action = 'store_true', help = 'Lists all the databases you have access to') add('-o', '--show', action='store_true', help='Show the database') add('-g', '--long', action='store_true', help='Show more informaion about one selected configuration') add('--pretty', action='store_true', default=True, help='Use pretty tables') add('--no-pretty', action='store_false', dest='pretty', help='Don\'t use pretty tables') add('-m', '--limit', type=int, default=0, metavar='N', help='Show only first N rows. Use 0 to show all (default).') add('-z', '--sort', metavar='COL1:[A/D],COL2[A/D]...', default='', help='Specify columns to sort the rows by (default direction is ascending). Multicolumn sorting might not be supported by all backends.') add('-c', '--count', action='store_true', help='Count number of selected rows.') add('-k', '--keys', metavar='K1,K2,...', help='Select only specified keys. "+" for all. See also --omit-keys.') add('-n', '--omit-keys', action='store_true', help='Omit keys specified with --keys argument') add('-t', '--add-keys', metavar='K1=V1,...', help='Add key-value pairs') add('--remove-keys', metavar='K1,K2,...', help='Remove keys') add('--remove', action='store_true', help='Remove selected rows.') add('-s', '--store', metavar='', nargs='+', help='Store a directory / list of files') add('-u', '--update', metavar='', nargs='+', help='Update the databse with a directory / list of files') add('--replace', action='store_true', default=False, help='Replace configurations with the same uid when using --update') add('--no-replace', action='store_false', dest='replace', help='Don\'t replace configurations with the same uid when using --update') add('--upsert', action='store_true', default=False, help='Insert configurations which are not yet in the database when using --update') add('--no-upsert', action='store_false', dest='upsert', help='Don\'t insert configurations which are not yet in the database when using --update') add('-x', '--extract-original-files', action='store_true', help='Extract original files stored with --store') add('--untar', action='store_true', default=True, help='Automatically untar files extracted with --extract-files') add('--no-untar', action='store_false', dest='untar', help='Don\'t automatically untar files extracted with --extract-files') add('-p', '--path-prefix', metavar='PREFIX', default='.', help='Path prefix for extracted files') add('-w', '--write-to-file', metavar='FILE', help='Write selected rows to file(s). Include format string for multiple \nfiles, e.g. file_%%03d.xyz') add('--ids', action='store_true', help='Print unique ids of selected configurations') args = parser.parse_args(sys_args) # Do some post-processing if args.database is not None and ':' in args.database: remote, database = args.database.split(':') if args.remote is not None: print('Error: Remote specified twice: "--remote {}" and "{}"'.format(args.remote, args.database), file=sys.stderr) sys.exit() args.remote = remote args.database = database # Calculate the verbosity # Verbosity can be either 0, 1 or 2 verbosity = 1 - args.quiet + args.verbose try: run(args, sys_args, verbosity) except Exception as x: if verbosity < 2: print('{0}: {1}'.format(x.__class__.__name__, x), file=sys.stderr) sys.exit(1) else: raise
[docs]def to_stderr(*args): '''Prints to stderr''' if args and any(not arg.isspace() for arg in args): print(*(arg.rstrip('\n') for arg in args), file=sys.stderr)
[docs]def untar_file(fileobj, path_prefix): try: tar = tarfile.open(fileobj=fileobj, mode='r') members = tar.getmembers() no_files = len(members) tar.extractall(path=path_prefix) return [os.path.join(path_prefix, m.name) for m in members] except Exception as e: to_stderr(str(e)) return None finally: tar.close()
[docs]def untar_and_delete(tar_files, path_prefix): # Untar individual tarballs for tarball in tar_files: with open(tarball, 'rb') as f: untar_file(f, path_prefix) os.remove(tarball)
[docs]def run(args, sys_args, verbosity): def out(*args): '''Prints information in accordance to verbosity''' if verbosity > 0 and args and any(not arg.isspace() for arg in args): print(*(arg.rstrip('\n') for arg in args)) # Get the query query = translate(args.query) if args.omit_keys and args.keys is None: print('Error: No keys to omit specified. Use --keys') sys.exit() omit_keys = args.omit_keys if args.keys is None or args.keys == '+': keys = None else: keys = args.keys.split(',') keys = [k for k in keys if k not in ('', ' ')] sort_list = args.sort.split(',') sort_list = [s for s in sort_list if s not in (None, '', ' ')] sort = {} for s in sort_list: if ':' in s: key, direction = s.split(':') if direction in ('A', 'a', 'ascending', 'Ascending', 'ASCENDING'): direction = Direction.ASCENDING elif direction in ('D', 'd', 'descending', 'Descending', 'DESCENDING'): direction = Direction.DESCENDING else: key = s direction = Direction.ASCENDING sort[key] = direction # Get kvp kvp = {} if args.add_keys: for pair in args.add_keys.split(','): k, sep, v = pair.partition('=') kvp[k] = convert_str_to_float_or_str(v) # Get keys to be removed remove_keys = [] if args.remove_keys: for key in args.remove_keys.split(','): remove_keys.append(key) remove_keys = [a for a in remove_keys if a not in (None, '', ' ')] # # # Backend initialisation and authentication cfg = ConfigFile('cli') backend_module = cfg.get('abcd', 'backend_module') backend_name = cfg.get('abcd', 'backend_name') # Quit if no backend was specified if not backend_module or not backend_name: print(' Please specify the backend in {}'.format(cfg.path)) sys.exit() # Import the backend Backend = getattr(__import__(backend_module, fromlist=[backend_name]), backend_name) # Initialise the backend box = StructureBox(Backend(database=args.database, remote=args.remote)) # Get the username and password if args.user == []: try: # PY2 compat user = raw_input('User: ') except NameError: user = input('User: ') else: user = args.user if args.password == []: password = getpass.getpass() else: password = args.password # Authenticate token = box.authenticate(Credentials(user)) # # # # Remove entries from a database if args.remove: result = box.remove(token, query, just_one=False) print(result.msg) # Extract a configuration from the database and write it # to the specified file. elif args.write_to_file: filename, display_format = os.path.splitext(args.write_to_file) if not display_format: display_format = 'xyz' elif display_format[0] == '.': display_format = display_format[1:] # displayed_format will appear in the file name if display_format == 'xyz': format = 'extxyz' else: format = display_format nrows = 0 list_of_atoms = [] # Make sure 'original_files' is omitted omit = omit_keys if keys is not None and omit: keys.append('original_files') elif keys is not None and 'original_files' in keys: keys.remove('original_files') elif keys is None and omit: # All keys will be omitted pass else: keys = ['original_files'] omit = True for atoms in box.find(auth_token=token, filter=query, sort=sort, limit=args.limit, keys=keys, omit_keys=omit): list_of_atoms.append(atoms) nrows += 1 if not list_of_atoms: to_stderr('No atoms selected') return if '%' not in filename: one_file = True else: one_file = False def add_atoms_to_tar(tar, atoms, name, format): # For some reason tarring doesn't work # if temp_s is used directly in the # tar.addfile function. For this reason # the contents of temp_s are transferred # to s. temp_s = io.StringIO() ase_write(temp_s, atoms, format=format) s = io.StringIO(temp_s.getvalue()) temp_s.close() info = tarfile.TarInfo(name=name) info.size = len(s.buf) tar.addfile(tarinfo=info, fileobj=s) s.close() def write_atoms_locally(atoms, name, format, path_prefix): if not os.path.exists(path_prefix): os.makedirs(path_prefix) ase_write(os.path.join(path_prefix, name), atoms, format=format) files_written = 0 if one_file: name = filename + '.' + display_format write_atoms_locally(list_of_atoms, name, format, args.path_prefix) files_written = 1 else: # Write extracted configurations into separate files for i, atoms in enumerate(list_of_atoms): name = filename % i + '.' + display_format write_atoms_locally(atoms, name, format, args.path_prefix) files_written += 1 out(' Writing {} file(s) to {}/'.format(files_written, args.path_prefix)) # Extract original file(s) from the database and write them # to the directory specified by the --path-prefix argument # (current directory by default), or print the file # to stdout. elif args.extract_original_files: # Extract original file contents from the atoms names = [] unique_ids = [] original_files =[] skipped_configs = [] nat = 0 for atoms in box.find(auth_token=token, filter=query, sort=sort, limit=args.limit, keys=['original_files', 'uid']): nat += 1 # Find the original file contents contents = '' if 'original_files' in atoms.info: contents = atoms.info['original_files'] elif 'original_files' in atoms.arrays: contents = atoms.arrays['original_files'] elif 'original_file_contents' in atoms.info: contents = atoms.info['original_file_contents'] elif 'original_file_contents' in atoms.arrays: contents = atoms.arrays['original_file_contents'] else: skipped_configs.append(nat) continue name = atoms.get_chemical_formula() if len(name) > 15: name = str[:15] names.append(name) # The Atoms object should have a uid, but if it doesn't # then use uid='0'. if 'uid' in atoms.info and atoms.info['uid'] is not None: unique_ids.append(atoms.info['uid']) else: unique_ids.append('0') original_files.append(contents) # Mangle the names for i, name in enumerate(names): names[i] += '-' + str(unique_ids[i])[-15:] extracted_paths = [] # Write the file locally for i in range(len(names)): path = os.path.join(args.path_prefix, names[i]+'.tar') if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) elif os.path.exists(path): out('{} already exists. Skipping write'.format(path)) with open(path, 'wb') as original_file: original_file.write(b64decode(original_files[i])) extracted_paths.append(path) msg = ' Extracted original files from {} configurations\n'.format(len(extracted_paths)) if skipped_configs: msg += ' No original files stored for configurations {}\n'.format(skipped_configs) # Untar individual tarballs if extracted_paths: if args.untar: untar_and_delete(extracted_paths, args.path_prefix) msg += ' Files were untarred to {}/'.format(args.path_prefix) else: msg += ' Files were written to {}/'.format(args.path_prefix) out(msg) elif (args.store or args.update): if args.store: to_store = args.store else: to_store = args.update def parse(f): try: return ase_read(f, index=slice(0, None, 1)) except: return None atoms_to_store = [] multiconfig_files = [] # Clasify each argument as either file or directory auxilary_files = [] parsable_files = [] dirs = [] for f in to_store: if os.path.isfile(f): atoms = parse(f) if atoms is not None: parsable_files.append({'atoms': atoms, 'path': f}) else: auxilary_files.append({'path': f, 'name': os.path.basename(f)}) elif os.path.isdir(f): dirs.append(f) else: raise IOError('No such file or directory: "{}"'.format(f)) def create_tarball(aux_files, atoms, atoms_fname, short_fnames=False): '''If short_fnames is True, only last portion of the filename is used. Returns b64encoded tarball (or an empty string)''' c = io.BytesIO() tar = tarfile.open(fileobj=c, mode='w') # Add auxilary files for aux_f in aux_files: arcname = aux_f['name'] tar.add(name=aux_f['path'], arcname=arcname) # Add the file containing the original configuration, # but only if it isn't a multi-config file. arcname = os.path.basename(atoms_fname) if short_fnames else atoms_fname if len(atoms) == 1: tar.add(name=atoms_fname, arcname=arcname) else: multiconfig_files.append(arcname) ret = '' if tar.getmembers(): ret = b64encode(c.getvalue()) tar.close() return ret def walk(tree, atoms_to_store, aux=[]): for parsed_dct in tree['parsable']: atoms = parsed_dct['atoms'] atoms_fname = parsed_dct['path'] aux_files = aux + tree['auxilary'] tar = create_tarball(aux_files, atoms, atoms_fname) # Attach the tar to the Atoms objects and add the Atoms objects to # atoms_to_store. for ats in atoms: if tar: ats.info['original_files'] = str(tar) atoms_to_store.append(ats) for subdir_name, subdir in tree['subdirs'].items(): walk(subdir, atoms_to_store, aux + tree['auxilary']) # Files were specified on the command line if parsable_files: # Iterate over parsed files for parsed_dct in parsable_files: atoms = parsed_dct['atoms'] atoms_fname = parsed_dct['path'] tar = create_tarball(auxilary_files, atoms, atoms_fname, short_fnames=True) for ats in atoms: if tar: ats.info['original_files'] = str(tar) atoms_to_store.append(ats) # At least one directory was specified on the command line. for d in dirs: # Convert directories to a tree d = d.rstrip(os.sep) parent_dir, dirname = os.path.split(d) # Change the directory to parent_dir so that all the names are relative to it if parent_dir: os.chdir(parent_dir) # If any additional auxilary files were specified, treat them as being in # the directory "dirname". additional_aux = [{'path': dct['path'], 'name': os.path.join(dirname, dct['name'])} for dct in auxilary_files] tree = {dirname: {'subdirs': {}, 'parsable': [], 'auxilary': additional_aux}} for root, dirs, files in os.walk(dirname): folders = list(root.split(os.sep)) if '' in folders: folders.remove('') current = tree for i, folder in enumerate(folders): if i == 0: current = current[folder] else: current = current['subdirs'][folder] for subdir in dirs: current['subdirs'][subdir] = {'subdirs': {}, 'parsable': [], 'auxilary': []} for f in files: fname = os.path.join(root, f) atoms = parse(fname) if atoms is None: current['auxilary'].append({'path': fname, 'name': fname}) else: current['parsable'].append({'atoms': atoms, 'path': fname}) # Now walk the tree to find all parsed files walk(tree[dirname], atoms_to_store) for atoms in atoms_to_store: # Check if the configuration has a uid. # If not, attach one. if not 'uid' in atoms.info or atoms.info['uid'] is None: atoms.info['uid'] = '%x' % randint(16**14, 16**15 - 1) # Add c_time, formula and n_atoms if not 'c_time' in atoms.info: atoms.info['c_time'] = int(time.time()) if not 'formula' in atoms.info: atoms.info['formula'] = atoms.get_chemical_formula() if not 'n_atoms' in atoms.info: atoms.info['n_atoms'] = len(atoms.numbers) # Store/update parsed atoms if args.store: result = box.insert(token, atoms_to_store) else: result = box.update(token, atoms_to_store, args.upsert, args.replace) print_result(result, multiconfig_files, args.database) elif args.add_keys: result = box.add_keys(token, query, kvp) print(result.msg) elif args.remove_keys: result = box.remove_keys(token, query, remove_keys) print(result.msg) # Count selected configurations elif args.count: if args.limit == 0: lim = 0 else: lim = args.limit + 1 atoms_it = box.find(auth_token=token, filter=query, sort=sort, limit=lim, keys=keys, omit_keys=omit_keys) count = atoms_it.count() if args.limit != 0 and count > args.limit: count = '{}+'.format(count-1) else: count = str(count) print('Found:', count) elif args.ids: atoms_it = box.find(auth_token=token, filter=query, sort=sort, limit=args.limit, keys=keys, omit_keys=omit_keys) for atoms in atoms_it: uid = atoms.info.get('uid') print(' ' + uid) # Show the database elif args.show: atoms_it = box.find(auth_token=token, filter=query, sort=sort, limit=args.limit, keys=keys, omit_keys=omit_keys) print_rows(atoms_it, border=args.pretty, truncate=args.pretty, show_keys=keys, omit_keys=omit_keys) elif args.long: atoms_it = box.find(auth_token=token, filter=query, sort=sort, limit=args.limit, keys=keys, omit_keys=omit_keys) try: atoms = next(atoms_it) except StopIteration: to_stderr('No matches') return try: next(atoms_it) to_stderr('\nWarning: more than one matches. Showing only one configuration.') except StopIteration: pass print_long_row(atoms) elif args.list or not args.database: dbs = box.list(token) if dbs: print('Hello. Databases you have access to:') for db in dbs: print(' {}'.format(db)) else: print('Hello. You don\'t have access to any databases.') # Print info about keys else: atoms_it = box.find(auth_token=token, filter=query, sort=sort, limit=args.limit, keys=keys, omit_keys=omit_keys) print_keys_table(atoms_it, border=args.pretty, truncate=args.pretty, show_keys=keys, omit_keys=omit_keys)