Simplify smstr

With the Python 2 to 3 migration and the type checking, we can be
fairly confident that smstr are always constructed from strings, not
bytes.

This allows the code to be simplified, as we no longer need to carry
the dual raw/unicode representation.
This commit is contained in:
Alberto Bertogli 2020-05-24 15:52:11 +01:00
parent 20b99ee568
commit aee18d0edd
3 changed files with 15 additions and 28 deletions

29
git.py

@ -125,28 +125,15 @@ class SimpleNamespace(object):
class smstr:
"""A "smart" string, containing many representations for ease of use.
"""A "smart" string, containing many representations for ease of use."""
This is a string class that contains:
.raw -> raw string, authoritative source.
.unicode -> unicode representation, may not be perfect if .raw is not
proper utf8 but should be good enough to show.
.url -> escaped for safe embedding in URLs, can be not quite
readable.
.html -> an HTML-embeddable representation.
"""
raw: str # string, probably utf8-encoded, good enough to show.
url: str # escaped for safe embedding in URLs (not human-readable).
html: str # HTML-embeddable representation.
def __init__(self, raw: str):
if not isinstance(raw, (str, bytes)):
raise TypeError(
"The raw string must be instance of 'str', not %s" % type(raw)
)
self.raw = raw
if isinstance(raw, bytes):
self.unicode: str = raw.decode("utf8", errors="backslashreplace")
else:
self.unicode = raw
self.url = urllib.request.pathname2url(raw)
def __init__(self, s: str):
self.raw = s
self.url = urllib.request.pathname2url(s)
self.html = self._to_html()
def __cmp__(self, other):
@ -173,7 +160,7 @@ class smstr:
def _to_html(self):
"""Returns an html representation of the unicode string."""
html = ""
for c in escape(self.unicode):
for c in escape(self.raw):
if c in "\t\r\n\r\f\a\b\v\0":
esc_c = c.encode("unicode-escape").decode("utf8")
html += '<span class="ctrlchr">%s</span>' % esc_c

@ -10,7 +10,7 @@
% relroot = reltree + '../' * (len(branch.split('/')) - 1)
<title>git &raquo; {{repo.name}} &raquo;
{{branch}} &raquo; {{dirname.unicode}}{{fname.unicode}}</title>
{{branch}} &raquo; {{dirname.raw}}{{fname.raw}}</title>
<link rel="stylesheet" type="text/css"
href="{{relroot}}../../../../../static/git-arr.css"/>
<link rel="stylesheet" type="text/css"
@ -33,7 +33,7 @@
% if not c.raw:
% continue
% end
<a href="{{base.url}}{{c.url}}/">{{c.unicode}}</a> /
<a href="{{base.url}}{{c.url}}/">{{c.raw}}</a> /
% base += c + '/'
% end
<a href="">{{!fname.html}}</a>
@ -45,7 +45,7 @@
<td>empty &mdash; 0 bytes</td>
</tr>
</table>
% elif can_embed_image(repo, fname.unicode):
% elif can_embed_image(repo, fname.raw):
{{!embed_image_blob(fname.raw, blob.raw_content)}}
% elif is_binary(blob.raw_content):
<table class="nice blob-binary">
@ -72,12 +72,12 @@
</tr>
% end
</table>
% elif can_markdown(repo, fname.unicode):
% elif can_markdown(repo, fname.raw):
<div class="markdown">
{{!markdown_blob(blob.utf8_content)}}
</div>
% elif can_colorize(blob.utf8_content):
{{!colorize_blob(fname.unicode, blob.utf8_content)}}
{{!colorize_blob(fname.raw, blob.utf8_content)}}
% else:
<pre class="blob-body">
{{blob.utf8_content}}

@ -10,7 +10,7 @@
% relroot = reltree + '../' * (len(branch.split('/')) - 1)
<title>git &raquo; {{repo.name}} &raquo;
{{branch}} &raquo; {{dirname.unicode if dirname.unicode else '/'}}</title>
{{branch}} &raquo; {{dirname.raw if dirname.raw else '/'}}</title>
<link rel="stylesheet" type="text/css"
href="{{relroot}}../../../../../static/git-arr.css"/>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
@ -31,7 +31,7 @@
% if not c.raw:
% continue
% end
<a href="{{base.url}}{{c.url}}/">{{c.unicode}}</a> /
<a href="{{base.url}}{{c.url}}/">{{c.raw}}</a> /
% base += c + '/'
% end
</h3>