Source code for exa.editor

# -*- coding: utf-8 -*-
# Copyright (c) 2015-2016, Exa Analytics Development Team
# Distributed under the terms of the Apache License 2.0
'''
Editor
####################################
Text-editor-like functionality for programatically manipulating raw text input
and output data and converting this data into container objects. This class
does not behave like a fully fledged text editor but does have some basic find,
replace, insert, etc. functionality.
'''
import os
import re
import sys
import pandas as pd
from io import StringIO, TextIOWrapper
from collections import OrderedDict


[docs]class Editor: ''' An editor is a representation of a text file on disk that can be programmatically manipulated. Text lines are stored in memory; no files remain open. This class does not strive to be a fully fledged text editor rather a base class for converting input and output data from text on disk to some type of (exa framework) container object (and vice versa). >>> template = "Hello World!\\nHello {user}" >>> editor = Editor(template) >>> editor[0] 'Hello World!' >>> len(editor) 2 >>> del editor[0] >>> len(editor) 1 >>> editor.write(fullpath=None, user='Alice') Hello Alice Tip: Editor line numbers use a 0 base index. To increase the number of lines displayed by the repr, increase the value of the **nprint** attribute. Warning: For large text with repeating strings be sure to use the **as_interned** argument. Attributes: name (str): Data/file/misc name description (str): Data/file/misc description meta (dict): Additional metadata as key, value pairs nrpint (int): Number of lines to display when printing cursor (int): Line number position of the cusor (see :func:`~exa.editor.Editor.find_next_any` and :func:`~exa.editor.Editor.find_next_string`) ''' _getter_prefix = 'parse' _fmt = '{0}: {1}\n'.format # Format for printing lines (see __repr__)
[docs] def write(self, path=None, *args, **kwargs): ''' Perform formatting and write the formatted string to a file or stdout. Optional arguments can be used to format the editor's contents. If no file path is given, prints to standard output. Args: path (str): Full file path (default None, prints to stdout) *args: Positional arguments to format the editor with **kwargs: Keyword arguments to format the editor with ''' if path is None: print(self.format(*args, **kwargs)) else: with open(path, 'w') as f: f.write(self.format(*args, **kwargs))
[docs] def format(self, *args, inplace=False, **kwargs): ''' Format the string representation of the editor. Args: inplace (bool): If True, overwrite editor's contents with formatted contents ''' if not inplace: return str(self).format(*args, **kwargs) self._lines = str(self).format(*args, **kwargs).splitlines()
[docs] def head(self, n=10): ''' Display the top of the file. Args: n (int): Number of lines to display ''' r = self.__repr__().split('\n') print('\n'.join(r[:n]), end=' ')
[docs] def tail(self, n=10): ''' Display the bottom of the file. Args: n (int): Number of lines to display ''' r = self.__repr__().split('\n') print('\n'.join(r[-n:]), end=' ')
[docs] def append(self, lines): ''' Args: lines (list): List of line strings to append to the end of the editor ''' if isinstance(lines, list): self._lines = self._lines + lines elif isinstance(lines, str): lines = lines.split('\n') self._lines = self._lines + lines else: raise TypeError('Unsupported type {0} for lines.'.format(type(lines)))
[docs] def prepend(self, lines): ''' Args: lines (list): List of line strings to insert at the beginning of the editor ''' if isinstance(lines, list): self._lines = lines + self._lines elif isinstance(lines, str): lines = lines.split('\n') self._lines = lines + self._lines else: raise TypeError('Unsupported type {0} for lines.'.format(type(lines)))
[docs] def insert(self, lines={}): ''' Insert lines into the editor. Note: To insert before the first line, use :func:`~exa.editor.Editor.preappend` (or key 0); to insert after the last line use :func:`~exa.editor.Editor.append`. Args: lines (dict): Dictionary of lines of form (lineno, string) pairs ''' for i, (key, line) in enumerate(lines.items()): n = key + i first_half = self._lines[:n] last_half = self._lines[n:] self._lines = first_half + [line] + last_half
[docs] def remove_blank_lines(self): '''Remove all blank lines (blank lines are those with zero characters).''' to_remove = [] for i, line in enumerate(self): ln = line.strip() if ln == '': to_remove.append(i) self.delete_lines(to_remove)
def _data(self, copy=False): ''' Get all data associated with the container as key value pairs. ''' data = {} for key, obj in self.__dict__.items(): if isinstance(obj, (pd.Series, pd.DataFrame, pd.SparseDataFrame)): if copy: data[key] = obj.copy() else: data[key] = obj return data
[docs] def delete_lines(self, lines): ''' Delete all lines with given line numbers. Args: lines (list): List of integers corresponding to line numbers to delete ''' for k, i in enumerate(lines): del self[i-k] # Accounts for the fact that len(self) decrease upon deletion
[docs] def find(self, *strings): ''' Search the entire editor for lines that match the string. Args: \*strings: Any number of strings to search for Returns: results (dict): Dictionary of string key, line values. ''' results = {string: OrderedDict() for string in strings} for i, line in enumerate(self): for string in strings: if string in line: results[string][i] = line return results
[docs] def find_next(self, string): ''' From the editor's current cursor position find the next instance of the given string. Args: string (str): String to search for from the current cursor position. reverse (bool): Search in reverse (default false) Returns: tup (tuple): Tuple of cursor position and line or None if not found Note: This function cycles the entire editor (i.e. cursor to length of editor to zero and back to cursor position). ''' for start, stop in [(self.cursor, len(self)), (0, self.cursor)]: for i in range(start, stop): if string in self[i]: tup = (i, self[i]) self.cursor = i + 1 return tup
[docs] def regex(self, *patterns, line=False): ''' Search the editor for lines matching the regular expression. Args: \*patterns: Regular expressions to search each line for line (bool): Return the whole line or the matched groups (groups default) Returns: results (dict): Dictionary of pattern keys, line values (or groups - default) ''' results = {pattern: OrderedDict() for pattern in patterns} for i, line in enumerate(self): for pattern in patterns: grps = re.search(pattern, line) if grps: grps = grps.groups() if grps: results[pattern][i] = grps else: results[pattern][i] = line return results
[docs] def replace(self, pattern, replacement): ''' Replace all instances of a pattern with a replacement. Args: pattern (str): Pattern to replace replacement (str): Text to insert ''' for i in range(len(self)): line = self[i] while pattern in line: line = line.replace(pattern, replacement) self[i] = line
[docs] def pandas_dataframe(self, start, stop, ncol): ''' Returns the result of tab-separated pandas.read_csv on a subset of the file. Args: start (int): line number where structured data starts stop (int): line number where structured data stops ncol (int or list): the number of columns in the structured data or a list of that length with column names Returns: pd.DataFrame: structured data ''' try: ncol = int(ncol) return pd.read_csv(StringIO('\n'.join(self[start:stop])), delim_whitespace=True, names=range(ncol)) except TypeError: try: ncol = list(ncol) return pd.read_csv(StringIO('\n'.join(self[start:stop])), delim_whitespace=True, names=ncol) except TypeError: print('Cannot pandas_dataframe if ncol is {}, must be int or list'.format(type(ncol)))
@property def variables(self): ''' Display a list of templatable variables present in the file. Templating is accomplished by creating a bracketed object in the same way that Python performs `string formatting`_. The editor is able to replace the placeholder value of the template. Integer templates are positional arguments. .. _string formatting: https://docs.python.org/3.6/library/string.html ''' string = str(self) constants = [match[1:-1] for match in re.findall('{{[A-z0-9]}}', string)] variables = re.findall('{[A-z0-9]*}', string) return sorted(set(variables).difference(constants)) @classmethod
[docs] def from_file(cls, path, **kwargs): '''Create an editor instance from a file on disk.''' lines = lines_from_file(path) if 'meta' not in kwargs: kwargs['meta'] = {} kwargs['meta']['filepath'] = path return cls(lines, **kwargs)
@classmethod
[docs] def from_stream(cls, f, **kwargs): '''Create an editor instance from a file stream.''' lines = lines_from_stream(f) if 'meta' not in kwargs: kwargs['meta'] = {} kwargs['meta']['filepath'] = f.name if hasattr(f, 'name') else None return cls(lines, **kwargs)
@classmethod
[docs] def from_string(cls, string, **kwargs): '''Create an editor instance from a string template.''' return cls(lines_from_string(string), **kwargs)
def __init__(self, path_stream_or_string, as_interned=False, nprint=30, name=None, description=None, meta={}): if len(path_stream_or_string) < 256 and os.path.exists(path_stream_or_string): self._lines = lines_from_file(path_stream_or_string, as_interned) elif isinstance(path_stream_or_string, list): self._lines = path_stream_or_string elif isinstance(path_stream_or_string, (TextIOWrapper, StringIO)): self._lines = lines_from_stream(path_stream_or_string, as_interned) elif isinstance(path_stream_or_string, str): self._lines = lines_from_string(path_stream_or_string, as_interned) else: raise TypeError('Unknown type for arg data: {}'.format(type(data))) self.name = name self.description = description self.meta = {} if meta is None else meta self.nprint = 30 self.cursor = 0 def __delitem__(self, line): del self._lines[line] # "line" is the line number minus one def __getitem__(self, key): if isinstance(key, str): return getattr(self, key) return self._lines[key] def __setitem__(self, line, value): self._lines[line] = value def __iter__(self): for line in self._lines: yield line def __len__(self): return len(self._lines) def __str__(self): return '\n'.join(self._lines) def __contains__(self, item): for obj in self: if item in obj: return True def __repr__(self): r = '' nn = len(self) n = len(str(nn)) if nn > self.nprint * 2: for i in range(self.nprint): ln = str(i).rjust(n, ' ') r += self._fmt(ln, self._lines[i]) r += '...\n'.rjust(n, ' ') for i in range(nn - self.nprint, nn): ln = str(i).rjust(n, ' ') r += self._fmt(ln, self._lines[i]) else: for i, line in enumerate(self): ln = str(i).rjust(n, ' ') r += self._fmt(ln, line) return r
[docs]def lines_from_file(path, as_interned=False): ''' Create a list of file lines from a given filepath. Args: path (str): File path as_interned (bool): List of "interned" strings (default False) Returns: strings (list): File line list ''' lines = None with open(path) as f: if as_interned: lines = [sys.intern(line) for line in f.read().splitlines()] else: lines = f.read().splitlines() return lines
[docs]def lines_from_stream(f, as_interned=False): ''' Create a list of file lines from a given file stream. Args: f (:class:`~io.TextIOWrapper): File stream as_interned (bool): List of "interned" strings (default False) Returns: strings (list): File line list ''' if as_interned: return [sys.intern(line) for line in f.read().splitlines()] return f.read().splitlines()
[docs]def lines_from_string(string, as_interned=False): ''' Create a list of file lines from a given string. Args: string (str): File string as_interned (bool): List of "interned" strings (default False) Returns: strings (list): File line list ''' if as_interned: return [sys.intern(line) for line in string.splitlines()] return string.splitlines()