git-arr/utils.py
Eric Sunshine 09c2f33f5a blob: render binary blob summary information rather than raw content
Binary blobs are currently rendered as raw data directly into the HTML
output, looking much like "line noise". This is rarely, if ever,
meaningful, and consumes considerable storage space since the entire raw
blob content is embedded in the generated HTML file.

Address this issue by instead emitting summary information about the
blob, such as its classification ("binary") and its size. Other
information can be added as needed.

As in Git itself, a blob is considered binary if a NUL is present in the
first ~8KB.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Alberto Bertogli <albertito@blitiri.com.ar>
2015-01-13 19:51:44 +00:00

109 lines
3.0 KiB
Python

"""
Miscellaneous utilities.
These are mostly used in templates, for presentation purposes.
"""
try:
import pygments
from pygments import highlight
from pygments import lexers
from pygments.formatters import HtmlFormatter
except ImportError:
pygments = None
try:
import markdown
except ImportError:
markdown = None
import base64
import mimetypes
def shorten(s, width = 60):
if len(s) < 60:
return s
return s[:57] + "..."
def can_colorize(s):
"""True if we can colorize the string, False otherwise."""
if pygments is None:
return False
# Pygments can take a huge amount of time with long files, or with very
# long lines; these are heuristics to try to avoid those situations.
if len(s) > (512 * 1024):
return False
# If any of the first 5 lines is over 300 characters long, don't colorize.
start = 0
for i in range(5):
pos = s.find('\n', start)
if pos == -1:
break
if pos - start > 300:
return False
start = pos + 1
return True
def can_markdown(repo, fname):
"""True if we can process file through markdown, False otherwise."""
if markdown is None:
return False
if not repo.info.embed_markdown:
return False
return fname.endswith(".md")
def can_embed_image(repo, fname):
"""True if we can embed image file in HTML, False otherwise."""
if not repo.info.embed_images:
return False
return (('.' in fname) and
(fname.split('.')[-1].lower() in [ 'jpg', 'jpeg', 'png', 'gif' ]))
def colorize_diff(s):
lexer = lexers.DiffLexer(encoding = 'utf-8')
formatter = HtmlFormatter(encoding = 'utf-8',
cssclass = 'source_code')
return highlight(s, lexer, formatter)
def colorize_blob(fname, s):
try:
lexer = lexers.guess_lexer_for_filename(fname, s, encoding = 'utf-8')
except lexers.ClassNotFound:
# Only try to guess lexers if the file starts with a shebang,
# otherwise it's likely a text file and guess_lexer() is prone to
# make mistakes with those.
lexer = lexers.TextLexer(encoding = 'utf-8')
if s.startswith('#!'):
try:
lexer = lexers.guess_lexer(s[:80], encoding = 'utf-8')
except lexers.ClassNotFound:
pass
formatter = HtmlFormatter(encoding = 'utf-8',
cssclass = 'source_code',
linenos = 'table',
anchorlinenos = True,
lineanchors = 'line')
return highlight(s, lexer, formatter)
def markdown_blob(s):
return markdown.markdown(s)
def embed_image_blob(fname, image_data):
mimetype = mimetypes.guess_type(fname)[0]
return '<img style="max-width:100%;" src="data:{0};base64,{1}" />'.format( \
mimetype, base64.b64encode(image_data))
def is_binary(s):
# Git considers a blob binary if NUL in first ~8KB, so do the same.
return '\0' in s[:8192]