blob: render hexdump(1)-style binary blob content

Raw binary blob content tends to look like "line noise" and is rarely,
if ever, meaningful. A hexdump(1)-style rendering (specifically,
"hexdump -C"), on the other hand, showing runs of hexadecimal byte
values along with an ASCII representation of those bytes can sometimes
reveal useful information about the data.

(A subsequent patch will add the ability to cap the amount of data
rendered in order to reduce storage space requirements.)

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Alberto Bertogli <albertito@blitiri.com.ar>
This commit is contained in:
Eric Sunshine 2015-01-13 04:57:14 -05:00 committed by Alberto Bertogli
parent 09c2f33f5a
commit 6f3942ce38
4 changed files with 35 additions and 2 deletions

@ -188,6 +188,7 @@ def with_utils(f):
'can_embed_image': utils.can_embed_image, 'can_embed_image': utils.can_embed_image,
'embed_image_blob': utils.embed_image_blob, 'embed_image_blob': utils.embed_image_blob,
'is_binary': utils.is_binary, 'is_binary': utils.is_binary,
'hexdump': utils.hexdump,
'abort': bottle.abort, 'abort': bottle.abort,
'smstr': git.smstr, 'smstr': git.smstr,
} }

@ -159,6 +159,18 @@ pre.blob-body {
font-size: medium; font-size: medium;
} }
table.blob-binary pre {
padding: 0;
margin: 0;
}
table.blob-binary .offset {
text-align: right;
font-size: x-small;
color: darkgray;
border-right: 1px solid #eee;
}
/* Pygments overrides. */ /* Pygments overrides. */
div.linenodiv { div.linenodiv {
padding-right: 0.5em; padding-right: 0.5em;

@ -19,6 +19,7 @@ except ImportError:
import base64 import base64
import mimetypes import mimetypes
import string
def shorten(s, width = 60): def shorten(s, width = 60):
if len(s) < 60: if len(s) < 60:
@ -106,3 +107,14 @@ def embed_image_blob(fname, image_data):
def is_binary(s): def is_binary(s):
# Git considers a blob binary if NUL in first ~8KB, so do the same. # Git considers a blob binary if NUL in first ~8KB, so do the same.
return '\0' in s[:8192] return '\0' in s[:8192]
def hexdump(s):
graph = string.ascii_letters + string.digits + string.punctuation + ' '
offset = 0
while s:
t = s[:16]
hexvals = ['%.2x' % ord(c) for c in t]
text = ''.join(c if c in graph else '.' for c in t)
yield offset, ' '.join(hexvals[:8]), ' '.join(hexvals[8:]), text
offset += 16
s = s[16:]

@ -42,12 +42,20 @@
% if can_embed_image(repo, fname.unicode): % if can_embed_image(repo, fname.unicode):
{{!embed_image_blob(fname.raw, blob.raw_content)}} {{!embed_image_blob(fname.raw, blob.raw_content)}}
% elif is_binary(blob.raw_content): % elif is_binary(blob.raw_content):
<table class="nice"> <table class="nice blob-binary">
<tr> <tr>
<td> <td colspan="4">
binary &mdash; {{'{:,}'.format(len(blob.raw_content))}} bytes binary &mdash; {{'{:,}'.format(len(blob.raw_content))}} bytes
</td> </td>
</tr> </tr>
% for offset, hex1, hex2, text in hexdump(blob.raw_content):
<tr>
<td class="offset">{{offset}}</td>
<td><pre>{{hex1}}</pre></td>
<td><pre>{{hex2}}</pre></td>
<td><pre>{{text}}</pre></td>
</tr>
% end
</table> </table>
% elif can_markdown(repo, fname.unicode): % elif can_markdown(repo, fname.unicode):
{{!markdown_blob(blob.utf8_content)}} {{!markdown_blob(blob.utf8_content)}}