# -*- coding: utf-8 -*-
# Copyright (c) 2015-2016, Exa Analytics Development Team
# Distributed under the terms of the Apache License 2.0
'''
Editor
####################################
Text-editor-like functionality for programatically manipulating raw text input
and output data and converting this data into container objects. This class
does not behave like a fully fledged text editor but does have some basic find,
replace, insert, etc. functionality.
'''
import os
import re
import sys
import pandas as pd
from io import StringIO, TextIOWrapper
from collections import OrderedDict
[docs]class Editor:
'''
An editor is a representation of a text file on disk that can be
programmatically manipulated.
Text lines are stored in memory; no files remain open. This class does not
strive to be a fully fledged text editor rather a base class for converting
input and output data from text on disk to some type of (exa framework)
container object (and vice versa).
>>> template = "Hello World!\\nHello {user}"
>>> editor = Editor(template)
>>> editor[0]
'Hello World!'
>>> len(editor)
2
>>> del editor[0]
>>> len(editor)
1
>>> editor.write(fullpath=None, user='Alice')
Hello Alice
Tip:
Editor line numbers use a 0 base index. To increase the number of lines
displayed by the repr, increase the value of the **nprint** attribute.
Warning:
For large text with repeating strings be sure to use the **as_interned**
argument.
Attributes:
name (str): Data/file/misc name
description (str): Data/file/misc description
meta (dict): Additional metadata as key, value pairs
nrpint (int): Number of lines to display when printing
cursor (int): Line number position of the cusor (see :func:`~exa.editor.Editor.find_next_any` and :func:`~exa.editor.Editor.find_next_string`)
'''
_getter_prefix = 'parse'
_fmt = '{0}: {1}\n'.format # Format for printing lines (see __repr__)
[docs] def write(self, path=None, *args, **kwargs):
'''
Perform formatting and write the formatted string to a file or stdout.
Optional arguments can be used to format the editor's contents. If no
file path is given, prints to standard output.
Args:
path (str): Full file path (default None, prints to stdout)
*args: Positional arguments to format the editor with
**kwargs: Keyword arguments to format the editor with
'''
if path is None:
print(self.format(*args, **kwargs))
else:
with open(path, 'w') as f:
f.write(self.format(*args, **kwargs))
[docs] def head(self, n=10):
'''
Display the top of the file.
Args:
n (int): Number of lines to display
'''
r = self.__repr__().split('\n')
print('\n'.join(r[:n]), end=' ')
[docs] def tail(self, n=10):
'''
Display the bottom of the file.
Args:
n (int): Number of lines to display
'''
r = self.__repr__().split('\n')
print('\n'.join(r[-n:]), end=' ')
[docs] def append(self, lines):
'''
Args:
lines (list): List of line strings to append to the end of the editor
'''
if isinstance(lines, list):
self._lines = self._lines + lines
elif isinstance(lines, str):
lines = lines.split('\n')
self._lines = self._lines + lines
else:
raise TypeError('Unsupported type {0} for lines.'.format(type(lines)))
[docs] def prepend(self, lines):
'''
Args:
lines (list): List of line strings to insert at the beginning of the editor
'''
if isinstance(lines, list):
self._lines = lines + self._lines
elif isinstance(lines, str):
lines = lines.split('\n')
self._lines = lines + self._lines
else:
raise TypeError('Unsupported type {0} for lines.'.format(type(lines)))
[docs] def insert(self, lines={}):
'''
Insert lines into the editor.
Note:
To insert before the first line, use :func:`~exa.editor.Editor.preappend`
(or key 0); to insert after the last line use :func:`~exa.editor.Editor.append`.
Args:
lines (dict): Dictionary of lines of form (lineno, string) pairs
'''
for i, (key, line) in enumerate(lines.items()):
n = key + i
first_half = self._lines[:n]
last_half = self._lines[n:]
self._lines = first_half + [line] + last_half
[docs] def remove_blank_lines(self):
'''Remove all blank lines (blank lines are those with zero characters).'''
to_remove = []
for i, line in enumerate(self):
ln = line.strip()
if ln == '':
to_remove.append(i)
self.delete_lines(to_remove)
def _data(self, copy=False):
'''
Get all data associated with the container as key value pairs.
'''
data = {}
for key, obj in self.__dict__.items():
if isinstance(obj, (pd.Series, pd.DataFrame, pd.SparseDataFrame)):
if copy:
data[key] = obj.copy()
else:
data[key] = obj
return data
[docs] def delete_lines(self, lines):
'''
Delete all lines with given line numbers.
Args:
lines (list): List of integers corresponding to line numbers to delete
'''
for k, i in enumerate(lines):
del self[i-k] # Accounts for the fact that len(self) decrease upon deletion
[docs] def find(self, *strings):
'''
Search the entire editor for lines that match the string.
Args:
\*strings: Any number of strings to search for
Returns:
results (dict): Dictionary of string key, line values.
'''
results = {string: OrderedDict() for string in strings}
for i, line in enumerate(self):
for string in strings:
if string in line:
results[string][i] = line
return results
[docs] def find_next(self, string):
'''
From the editor's current cursor position find the next instance of the
given string.
Args:
string (str): String to search for from the current cursor position.
reverse (bool): Search in reverse (default false)
Returns:
tup (tuple): Tuple of cursor position and line or None if not found
Note:
This function cycles the entire editor (i.e. cursor to length of
editor to zero and back to cursor position).
'''
for start, stop in [(self.cursor, len(self)), (0, self.cursor)]:
for i in range(start, stop):
if string in self[i]:
tup = (i, self[i])
self.cursor = i + 1
return tup
[docs] def regex(self, *patterns, line=False):
'''
Search the editor for lines matching the regular expression.
Args:
\*patterns: Regular expressions to search each line for
line (bool): Return the whole line or the matched groups (groups default)
Returns:
results (dict): Dictionary of pattern keys, line values (or groups - default)
'''
results = {pattern: OrderedDict() for pattern in patterns}
for i, line in enumerate(self):
for pattern in patterns:
grps = re.search(pattern, line)
if grps:
grps = grps.groups()
if grps:
results[pattern][i] = grps
else:
results[pattern][i] = line
return results
[docs] def replace(self, pattern, replacement):
'''
Replace all instances of a pattern with a replacement.
Args:
pattern (str): Pattern to replace
replacement (str): Text to insert
'''
for i in range(len(self)):
line = self[i]
while pattern in line:
line = line.replace(pattern, replacement)
self[i] = line
[docs] def pandas_dataframe(self, start, stop, ncol):
'''
Returns the result of tab-separated pandas.read_csv on
a subset of the file.
Args:
start (int): line number where structured data starts
stop (int): line number where structured data stops
ncol (int or list): the number of columns in the structured
data or a list of that length with column names
Returns:
pd.DataFrame: structured data
'''
try:
ncol = int(ncol)
return pd.read_csv(StringIO('\n'.join(self[start:stop])), delim_whitespace=True, names=range(ncol))
except TypeError:
try:
ncol = list(ncol)
return pd.read_csv(StringIO('\n'.join(self[start:stop])), delim_whitespace=True, names=ncol)
except TypeError:
print('Cannot pandas_dataframe if ncol is {}, must be int or list'.format(type(ncol)))
@property
def variables(self):
'''
Display a list of templatable variables present in the file.
Templating is accomplished by creating a bracketed object in the same
way that Python performs `string formatting`_. The editor is able to
replace the placeholder value of the template. Integer templates are
positional arguments.
.. _string formatting: https://docs.python.org/3.6/library/string.html
'''
string = str(self)
constants = [match[1:-1] for match in re.findall('{{[A-z0-9]}}', string)]
variables = re.findall('{[A-z0-9]*}', string)
return sorted(set(variables).difference(constants))
@classmethod
[docs] def from_file(cls, path, **kwargs):
'''Create an editor instance from a file on disk.'''
lines = lines_from_file(path)
if 'meta' not in kwargs:
kwargs['meta'] = {}
kwargs['meta']['filepath'] = path
return cls(lines, **kwargs)
@classmethod
[docs] def from_stream(cls, f, **kwargs):
'''Create an editor instance from a file stream.'''
lines = lines_from_stream(f)
if 'meta' not in kwargs:
kwargs['meta'] = {}
kwargs['meta']['filepath'] = f.name if hasattr(f, 'name') else None
return cls(lines, **kwargs)
@classmethod
[docs] def from_string(cls, string, **kwargs):
'''Create an editor instance from a string template.'''
return cls(lines_from_string(string), **kwargs)
def __init__(self, path_stream_or_string, as_interned=False, nprint=30,
name=None, description=None, meta={}):
if len(path_stream_or_string) < 256 and os.path.exists(path_stream_or_string):
self._lines = lines_from_file(path_stream_or_string, as_interned)
elif isinstance(path_stream_or_string, list):
self._lines = path_stream_or_string
elif isinstance(path_stream_or_string, (TextIOWrapper, StringIO)):
self._lines = lines_from_stream(path_stream_or_string, as_interned)
elif isinstance(path_stream_or_string, str):
self._lines = lines_from_string(path_stream_or_string, as_interned)
else:
raise TypeError('Unknown type for arg data: {}'.format(type(data)))
self.name = name
self.description = description
self.meta = {} if meta is None else meta
self.nprint = 30
self.cursor = 0
def __delitem__(self, line):
del self._lines[line] # "line" is the line number minus one
def __getitem__(self, key):
if isinstance(key, str):
return getattr(self, key)
return self._lines[key]
def __setitem__(self, line, value):
self._lines[line] = value
def __iter__(self):
for line in self._lines:
yield line
def __len__(self):
return len(self._lines)
def __str__(self):
return '\n'.join(self._lines)
def __contains__(self, item):
for obj in self:
if item in obj:
return True
def __repr__(self):
r = ''
nn = len(self)
n = len(str(nn))
if nn > self.nprint * 2:
for i in range(self.nprint):
ln = str(i).rjust(n, ' ')
r += self._fmt(ln, self._lines[i])
r += '...\n'.rjust(n, ' ')
for i in range(nn - self.nprint, nn):
ln = str(i).rjust(n, ' ')
r += self._fmt(ln, self._lines[i])
else:
for i, line in enumerate(self):
ln = str(i).rjust(n, ' ')
r += self._fmt(ln, line)
return r
[docs]def lines_from_file(path, as_interned=False):
'''
Create a list of file lines from a given filepath.
Args:
path (str): File path
as_interned (bool): List of "interned" strings (default False)
Returns:
strings (list): File line list
'''
lines = None
with open(path) as f:
if as_interned:
lines = [sys.intern(line) for line in f.read().splitlines()]
else:
lines = f.read().splitlines()
return lines
[docs]def lines_from_stream(f, as_interned=False):
'''
Create a list of file lines from a given file stream.
Args:
f (:class:`~io.TextIOWrapper): File stream
as_interned (bool): List of "interned" strings (default False)
Returns:
strings (list): File line list
'''
if as_interned:
return [sys.intern(line) for line in f.read().splitlines()]
return f.read().splitlines()
[docs]def lines_from_string(string, as_interned=False):
'''
Create a list of file lines from a given string.
Args:
string (str): File string
as_interned (bool): List of "interned" strings (default False)
Returns:
strings (list): File line list
'''
if as_interned:
return [sys.intern(line) for line in string.splitlines()]
return string.splitlines()