git-arr/utils.py

"""
Miscellaneous utilities.

These are mostly used in templates, for presentation purposes.
"""

try:
    import pygments
    from pygments import highlight
    from pygments import lexers
    from pygments.formatters import HtmlFormatter
except ImportError:
    pygments = None

try:
    import markdown
except ImportError:
    markdown = None

import base64
import mimetypes
import string

def shorten(s, width = 60):
    if len(s) < 60:
        return s
    return s[:57] + "..."

def can_colorize(s):
    """True if we can colorize the string, False otherwise."""
    if pygments is None:
        return False

    # Pygments can take a huge amount of time with long files, or with very
    # long lines; these are heuristics to try to avoid those situations.
    if len(s) > (512 * 1024):
        return False

    # If any of the first 5 lines is over 300 characters long, don't colorize.
    start = 0
    for i in range(5):
        pos = s.find('\n', start)
        if pos == -1:
            break

        if pos - start > 300:
            return False
        start = pos + 1

    return True

def can_markdown(repo, fname):
    """True if we can process file through markdown, False otherwise."""
    if markdown is None:
        return False

    if not repo.info.embed_markdown:
        return False

    return fname.endswith(".md")

def can_embed_image(repo, fname):
    """True if we can embed image file in HTML, False otherwise."""
    if not repo.info.embed_images:
        return False

    return (('.' in fname) and
            (fname.split('.')[-1].lower() in [ 'jpg', 'jpeg', 'png', 'gif' ]))

def colorize_diff(s):
    lexer = lexers.DiffLexer(encoding = 'utf-8')
    formatter = HtmlFormatter(encoding = 'utf-8',
                    cssclass = 'source_code')

    return highlight(s, lexer, formatter)

def colorize_blob(fname, s):
    try:
        lexer = lexers.guess_lexer_for_filename(fname, s, encoding = 'utf-8')
    except lexers.ClassNotFound:
        # Only try to guess lexers if the file starts with a shebang,
        # otherwise it's likely a text file and guess_lexer() is prone to
        # make mistakes with those.
        lexer = lexers.TextLexer(encoding = 'utf-8')
        if s.startswith('#!'):
            try:
                lexer = lexers.guess_lexer(s[:80], encoding = 'utf-8')
            except lexers.ClassNotFound:
                pass

    formatter = HtmlFormatter(encoding = 'utf-8',
                    cssclass = 'source_code',
                    linenos = 'table',
                    anchorlinenos = True,
                    lineanchors = 'line')

    return highlight(s, lexer, formatter)

def markdown_blob(s):
    extensions = [
        "markdown.extensions.fenced_code",
        "markdown.extensions.tables",
    ]
    return markdown.markdown(s, extensions = extensions)

def embed_image_blob(fname, image_data):
    mimetype = mimetypes.guess_type(fname)[0]
    return '<img style="max-width:100%;" src="data:{0};base64,{1}" />'.format( \
                                    mimetype, base64.b64encode(image_data))

def is_binary(s):
    # Git considers a blob binary if NUL in first ~8KB, so do the same.
    return '\0' in s[:8192]

def hexdump(s):
    graph = string.ascii_letters + string.digits + string.punctuation + ' '
    offset = 0
    while s:
        t = s[:16]
        hexvals = ['%.2x' % ord(c) for c in t]
        text = ''.join(c if c in graph else '.' for c in t)
        yield offset, ' '.join(hexvals[:8]), ' '.join(hexvals[8:]), text
        offset += 16
        s = s[16:]