6f3942ce38
Raw binary blob content tends to look like "line noise" and is rarely, if ever, meaningful. A hexdump(1)-style rendering (specifically, "hexdump -C"), on the other hand, showing runs of hexadecimal byte values along with an ASCII representation of those bytes can sometimes reveal useful information about the data. (A subsequent patch will add the ability to cap the amount of data rendered in order to reduce storage space requirements.) Signed-off-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Alberto Bertogli <albertito@blitiri.com.ar>
121 lines
3.4 KiB
Python
121 lines
3.4 KiB
Python
"""
|
|
Miscellaneous utilities.
|
|
|
|
These are mostly used in templates, for presentation purposes.
|
|
"""
|
|
|
|
try:
|
|
import pygments
|
|
from pygments import highlight
|
|
from pygments import lexers
|
|
from pygments.formatters import HtmlFormatter
|
|
except ImportError:
|
|
pygments = None
|
|
|
|
try:
|
|
import markdown
|
|
except ImportError:
|
|
markdown = None
|
|
|
|
import base64
|
|
import mimetypes
|
|
import string
|
|
|
|
def shorten(s, width = 60):
|
|
if len(s) < 60:
|
|
return s
|
|
return s[:57] + "..."
|
|
|
|
def can_colorize(s):
|
|
"""True if we can colorize the string, False otherwise."""
|
|
if pygments is None:
|
|
return False
|
|
|
|
# Pygments can take a huge amount of time with long files, or with very
|
|
# long lines; these are heuristics to try to avoid those situations.
|
|
if len(s) > (512 * 1024):
|
|
return False
|
|
|
|
# If any of the first 5 lines is over 300 characters long, don't colorize.
|
|
start = 0
|
|
for i in range(5):
|
|
pos = s.find('\n', start)
|
|
if pos == -1:
|
|
break
|
|
|
|
if pos - start > 300:
|
|
return False
|
|
start = pos + 1
|
|
|
|
return True
|
|
|
|
def can_markdown(repo, fname):
|
|
"""True if we can process file through markdown, False otherwise."""
|
|
if markdown is None:
|
|
return False
|
|
|
|
if not repo.info.embed_markdown:
|
|
return False
|
|
|
|
return fname.endswith(".md")
|
|
|
|
def can_embed_image(repo, fname):
|
|
"""True if we can embed image file in HTML, False otherwise."""
|
|
if not repo.info.embed_images:
|
|
return False
|
|
|
|
return (('.' in fname) and
|
|
(fname.split('.')[-1].lower() in [ 'jpg', 'jpeg', 'png', 'gif' ]))
|
|
|
|
def colorize_diff(s):
|
|
lexer = lexers.DiffLexer(encoding = 'utf-8')
|
|
formatter = HtmlFormatter(encoding = 'utf-8',
|
|
cssclass = 'source_code')
|
|
|
|
return highlight(s, lexer, formatter)
|
|
|
|
def colorize_blob(fname, s):
|
|
try:
|
|
lexer = lexers.guess_lexer_for_filename(fname, s, encoding = 'utf-8')
|
|
except lexers.ClassNotFound:
|
|
# Only try to guess lexers if the file starts with a shebang,
|
|
# otherwise it's likely a text file and guess_lexer() is prone to
|
|
# make mistakes with those.
|
|
lexer = lexers.TextLexer(encoding = 'utf-8')
|
|
if s.startswith('#!'):
|
|
try:
|
|
lexer = lexers.guess_lexer(s[:80], encoding = 'utf-8')
|
|
except lexers.ClassNotFound:
|
|
pass
|
|
|
|
formatter = HtmlFormatter(encoding = 'utf-8',
|
|
cssclass = 'source_code',
|
|
linenos = 'table',
|
|
anchorlinenos = True,
|
|
lineanchors = 'line')
|
|
|
|
return highlight(s, lexer, formatter)
|
|
|
|
def markdown_blob(s):
|
|
return markdown.markdown(s)
|
|
|
|
def embed_image_blob(fname, image_data):
|
|
mimetype = mimetypes.guess_type(fname)[0]
|
|
return '<img style="max-width:100%;" src="data:{0};base64,{1}" />'.format( \
|
|
mimetype, base64.b64encode(image_data))
|
|
|
|
def is_binary(s):
|
|
# Git considers a blob binary if NUL in first ~8KB, so do the same.
|
|
return '\0' in s[:8192]
|
|
|
|
def hexdump(s):
|
|
graph = string.ascii_letters + string.digits + string.punctuation + ' '
|
|
offset = 0
|
|
while s:
|
|
t = s[:16]
|
|
hexvals = ['%.2x' % ord(c) for c in t]
|
|
text = ''.join(c if c in graph else '.' for c in t)
|
|
yield offset, ' '.join(hexvals[:8]), ' '.join(hexvals[8:]), text
|
|
offset += 16
|
|
s = s[16:]
|