Source code for celery.utils.saferepr

"""Streaming, truncating, non-recursive version of :func:`repr`.

Differences from regular :func:`repr`:

- Sets are represented the Python 3 way: ``{1, 2}`` vs ``set([1, 2])``.
- Unicode strings does not have the ``u'`` prefix, even on Python 2.
- Empty set formatted as ``set()`` (Python 3), not ``set([])`` (Python 2).
- Longs don't have the ``L`` suffix.

Very slow with no limits, super quick with limits.
"""
import traceback
from collections import deque, namedtuple
from decimal import Decimal
from itertools import chain
from numbers import Number
from pprint import _recursion

from .text import truncate

__all__ = ('saferepr', 'reprstream')

#: Node representing literal text.
#:   - .value: is the literal text value
#:   - .truncate: specifies if this text can be truncated, for things like
#:                LIT_DICT_END this will be False, as we always display
#:                the ending brackets, e.g:  [[[1, 2, 3, ...,], ..., ]]
#:   - .direction: If +1 the current level is increment by one,
#:                 if -1 the current level is decremented by one, and
#:                 if 0 the current level is unchanged.
_literal = namedtuple('_literal', ('value', 'truncate', 'direction'))

#: Node representing a dictionary key.
_key = namedtuple('_key', ('value',))

#: Node representing quoted text, e.g. a string value.
_quoted = namedtuple('_quoted', ('value',))


#: Recursion protection.
_dirty = namedtuple('_dirty', ('objid',))

#: Types that are repsented as chars.
chars_t = (bytes, str)

#: Types that are regarded as safe to call repr on.
safe_t = (Number,)

#: Set types.
set_t = (frozenset, set)

LIT_DICT_START = _literal('{', False, +1)
LIT_DICT_KVSEP = _literal(': ', True, 0)
LIT_DICT_END = _literal('}', False, -1)
LIT_LIST_START = _literal('[', False, +1)
LIT_LIST_END = _literal(']', False, -1)
LIT_LIST_SEP = _literal(', ', True, 0)
LIT_SET_START = _literal('{', False, +1)
LIT_SET_END = _literal('}', False, -1)
LIT_TUPLE_START = _literal('(', False, +1)
LIT_TUPLE_END = _literal(')', False, -1)
LIT_TUPLE_END_SV = _literal(',)', False, -1)


[docs]def saferepr(o, maxlen=None, maxlevels=3, seen=None): # type: (Any, int, int, Set) -> str """Safe version of :func:`repr`. Warning: Make sure you set the maxlen argument, or it will be very slow for recursive objects. With the maxlen set, it's often faster than built-in repr. """ return ''.join(_saferepr( o, maxlen=maxlen, maxlevels=maxlevels, seen=seen ))
def _chaindict(mapping, LIT_DICT_KVSEP=LIT_DICT_KVSEP, LIT_LIST_SEP=LIT_LIST_SEP): # type: (Dict, _literal, _literal) -> Iterator[Any] size = len(mapping) for i, (k, v) in enumerate(mapping.items()): yield _key(k) yield LIT_DICT_KVSEP yield v if i < (size - 1): yield LIT_LIST_SEP def _chainlist(it, LIT_LIST_SEP=LIT_LIST_SEP): # type: (List) -> Iterator[Any] size = len(it) for i, v in enumerate(it): yield v if i < (size - 1): yield LIT_LIST_SEP def _repr_empty_set(s): # type: (Set) -> str return f'{type(s).__name__}()' def _safetext(val): # type: (AnyStr) -> str if isinstance(val, bytes): try: val.encode('utf-8') except UnicodeDecodeError: # is bytes with unrepresentable characters, attempt # to convert back to unicode return val.decode('utf-8', errors='backslashreplace') return val def _format_binary_bytes(val, maxlen, ellipsis='...'): # type: (bytes, int, str) -> str if maxlen and len(val) > maxlen: # we don't want to copy all the data, just take what we need. chunk = memoryview(val)[:maxlen].tobytes() return _bytes_prefix(f"'{_repr_binary_bytes(chunk)}{ellipsis}'") return _bytes_prefix(f"'{_repr_binary_bytes(val)}'") def _bytes_prefix(s): return 'b' + s def _repr_binary_bytes(val): # type: (bytes) -> str try: return val.decode('utf-8') except UnicodeDecodeError: # possibly not unicode, but binary data so format as hex. try: ashex = val.hex except AttributeError: # pragma: no cover # Python 3.4 return val.decode('utf-8', errors='replace') else: # Python 3.5+ return ashex() def _format_chars(val, maxlen): # type: (AnyStr, int) -> str if isinstance(val, bytes): # pragma: no cover return _format_binary_bytes(val, maxlen) else: return "'{}'".format(truncate(val, maxlen).replace("'", "\\'")) def _repr(obj): # type: (Any) -> str try: return repr(obj) except Exception as exc: stack = '\n'.join(traceback.format_stack()) return f'<Unrepresentable {type(obj)!r}{id(obj):#x}: {exc!r} {stack!r}>' def _saferepr(o, maxlen=None, maxlevels=3, seen=None): # type: (Any, int, int, Set) -> str stack = deque([iter([o])]) for token, it in reprstream(stack, seen=seen, maxlevels=maxlevels): if maxlen is not None and maxlen <= 0: yield ', ...' # move rest back to stack, so that we can include # dangling parens. stack.append(it) break if isinstance(token, _literal): val = token.value elif isinstance(token, _key): val = saferepr(token.value, maxlen, maxlevels) elif isinstance(token, _quoted): val = _format_chars(token.value, maxlen) else: val = _safetext(truncate(token, maxlen)) yield val if maxlen is not None: maxlen -= len(val) for rest1 in stack: # maxlen exceeded, process any dangling parens. for rest2 in rest1: if isinstance(rest2, _literal) and not rest2.truncate: yield rest2.value def _reprseq(val, lit_start, lit_end, builtin_type, chainer): # type: (Sequence, _literal, _literal, Any, Any) -> Tuple[Any, ...] if type(val) is builtin_type: # noqa return lit_start, lit_end, chainer(val) return ( _literal(f'{type(val).__name__}({lit_start.value}', False, +1), _literal(f'{lit_end.value})', False, -1), chainer(val) )
[docs]def reprstream(stack, seen=None, maxlevels=3, level=0, isinstance=isinstance): """Streaming repr, yielding tokens.""" # type: (deque, Set, int, int, Callable) -> Iterator[Any] seen = seen or set() append = stack.append popleft = stack.popleft is_in_seen = seen.__contains__ discard_from_seen = seen.discard add_to_seen = seen.add while stack: lit_start = lit_end = None it = popleft() for val in it: orig = val if isinstance(val, _dirty): discard_from_seen(val.objid) continue elif isinstance(val, _literal): level += val.direction yield val, it elif isinstance(val, _key): yield val, it elif isinstance(val, Decimal): yield _repr(val), it elif isinstance(val, safe_t): yield str(val), it elif isinstance(val, chars_t): yield _quoted(val), it elif isinstance(val, range): # pragma: no cover yield _repr(val), it else: if isinstance(val, set_t): if not val: yield _repr_empty_set(val), it continue lit_start, lit_end, val = _reprseq( val, LIT_SET_START, LIT_SET_END, set, _chainlist, ) elif isinstance(val, tuple): lit_start, lit_end, val = ( LIT_TUPLE_START, LIT_TUPLE_END_SV if len(val) == 1 else LIT_TUPLE_END, _chainlist(val)) elif isinstance(val, dict): lit_start, lit_end, val = ( LIT_DICT_START, LIT_DICT_END, _chaindict(val)) elif isinstance(val, list): lit_start, lit_end, val = ( LIT_LIST_START, LIT_LIST_END, _chainlist(val)) else: # other type of object yield _repr(val), it continue if maxlevels and level >= maxlevels: yield f'{lit_start.value}...{lit_end.value}', it continue objid = id(orig) if is_in_seen(objid): yield _recursion(orig), it continue add_to_seen(objid) # Recurse into the new list/tuple/dict/etc by tacking # the rest of our iterable onto the new it: this way # it works similar to a linked list. append(chain([lit_start], val, [_dirty(objid), lit_end], it)) break
Back to Top