diff --git a/.gitignore b/.gitignore
index faf410c..94fab83 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
*.pyc
__pycache__
-.*.swp
+.*
+!.gitignore
diff --git a/git-arr b/git-arr
index 5c4e7db..98a6bc7 100755
--- a/git-arr
+++ b/git-arr
@@ -1,21 +1,15 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
"""
git-arr: A git web html generator.
"""
-from __future__ import print_function
-
+import configparser
import math
import optparse
import os
import re
import sys
-try:
- import configparser
-except ImportError:
- import ConfigParser as configparser
-
import bottle
import git
@@ -64,7 +58,7 @@ def load_config(path):
'generate_patch': 'yes',
}
- config = configparser.SafeConfigParser(defaults)
+ config = configparser.ConfigParser(defaults)
config.read(path)
# Do a first pass for general sanity checking and recursive expansion.
@@ -118,7 +112,7 @@ def load_config(path):
r.info.commits_per_page = config.getint(s, 'commits_per_page')
r.info.max_pages = config.getint(s, 'max_pages')
if r.info.max_pages <= 0:
- r.info.max_pages = sys.maxint
+ r.info.max_pages = sys.maxsize
r.info.generate_tree = config.getboolean(s, 'tree')
r.info.root_diff = config.getboolean(s, 'rootdiff')
r.info.generate_patch = config.getboolean(s, 'generate_patch')
@@ -263,6 +257,10 @@ def blob(repo, bname, fname, dirname = ''):
fname = git.smstr.from_url(fname)
path = dirname.raw + fname.raw
+ # Handle backslash-escaped characters, which are not utf8.
+ # This matches the generated links from git.unquote().
+ path = path.encode("utf8").decode("unicode-escape").encode("latin1")
+
content = repo.blob(path, bname)
if content is None:
bottle.abort(404, "File %r not found in branch %s" % (path, bname))
@@ -339,7 +337,7 @@ def generate(output, only = None):
else:
# Otherwise, be lazy if we were given a function to run, or write
# always if they gave us a string.
- if isinstance(func_or_str, (str, unicode)):
+ if isinstance(func_or_str, str):
print(path)
s = func_or_str
else:
@@ -348,7 +346,7 @@ def generate(output, only = None):
print(path)
s = func_or_str(*args)
- open(path, 'w').write(s.encode('utf8', errors = 'xmlcharrefreplace'))
+ open(path, 'w').write(s)
if mtime:
os.utime(path, (mtime, mtime))
@@ -398,7 +396,7 @@ def generate(output, only = None):
write_to('static/syntax.css', read_f, [static_path + '/syntax.css'],
os.stat(static_path + '/syntax.css').st_mtime)
- rs = sorted(repos.values(), key = lambda r: r.name)
+ rs = sorted(list(repos.values()), key = lambda r: r.name)
if only:
rs = [r for r in rs if r.name in only]
diff --git a/git.py b/git.py
index 2240175..09ccd37 100644
--- a/git.py
+++ b/git.py
@@ -12,35 +12,13 @@ import subprocess
from collections import defaultdict
import email.utils
import datetime
-import urllib
-from cgi import escape
+import urllib.request, urllib.parse, urllib.error
+from html import escape
# Path to the git binary.
GIT_BIN = "git"
-class EncodeWrapper:
- """File-like wrapper that returns data utf8 encoded."""
- def __init__(self, fd, encoding = 'utf8', errors = 'replace'):
- self.fd = fd
- self.encoding = encoding
- self.errors = errors
-
- def __iter__(self):
- for line in self.fd:
- yield line.decode(self.encoding, errors = self.errors)
-
- def read(self):
- """Returns the whole content."""
- s = self.fd.read()
- return s.decode(self.encoding, errors = self.errors)
-
- def readline(self):
- """Returns a single line."""
- s = self.fd.readline()
- return s.decode(self.encoding, errors = self.errors)
-
-
def run_git(repo_path, params, stdin = None, silent_stderr = False, raw = False):
"""Invokes git with the given parameters.
@@ -66,13 +44,8 @@ def run_git(repo_path, params, stdin = None, silent_stderr = False, raw = False)
if raw:
return p.stdout
- # We need to wrap stdout if we want to decode it as utf8, subprocess
- # doesn't support us telling it the encoding.
- if sys.version_info.major == 3:
- return io.TextIOWrapper(p.stdout, encoding = 'utf8',
- errors = 'replace')
- else:
- return EncodeWrapper(p.stdout)
+ return io.TextIOWrapper(p.stdout, encoding = 'utf8',
+ errors = 'backslashreplace')
class GitCommand (object):
@@ -109,6 +82,8 @@ class GitCommand (object):
def stdin(self, s):
"""Sets the contents we will send in stdin."""
self._override = True
+ if isinstance(s, str):
+ s = s.encode("utf8")
self._stdin_buf = s
self._override = False
@@ -116,7 +91,7 @@ class GitCommand (object):
"""Runs the git command."""
params = [self._cmd]
- for k, v in self._kwargs.items():
+ for k, v in list(self._kwargs.items()):
dash = '--' if len(k) > 1 else '-'
if v is None:
params.append('%s%s' % (dash, k))
@@ -146,11 +121,16 @@ class smstr:
.html -> an HTML-embeddable representation.
"""
def __init__(self, raw):
- if not isinstance(raw, str):
- raise TypeError("The raw string must be instance of 'str'")
+ if not isinstance(raw, (str, bytes)):
+ raise TypeError(
+ "The raw string must be instance of 'str', not %s" %
+ type(raw))
self.raw = raw
- self.unicode = raw.decode('utf8', errors = 'replace')
- self.url = urllib.pathname2url(raw)
+ if isinstance(raw, bytes):
+ self.unicode = raw.decode('utf8', errors = 'backslashreplace')
+ else:
+ self.unicode = raw
+ self.url = urllib.request.pathname2url(raw)
self.html = self._to_html()
def __cmp__(self, other):
@@ -163,7 +143,7 @@ class smstr:
@staticmethod
def from_url(url):
"""Returns an smstr() instance from an url-encoded string."""
- return smstr(urllib.url2pathname(url))
+ return smstr(urllib.request.url2pathname(url))
def split(self, sep):
"""Like str.split()."""
@@ -176,10 +156,10 @@ class smstr:
def _to_html(self):
"""Returns an html representation of the unicode string."""
- html = u''
+ html = ''
for c in escape(self.unicode):
if c in '\t\r\n\r\f\a\b\v\0':
- esc_c = c.encode('ascii').encode('string_escape')
+ esc_c = c.encode("unicode-escape").decode("utf8")
html += '%s' % esc_c
else:
html += c
@@ -190,14 +170,23 @@ class smstr:
def unquote(s):
"""Git can return quoted file names, unquote them. Always return a str."""
if not (s[0] == '"' and s[-1] == '"'):
- # Unquoted strings are always safe, no need to mess with them; just
- # make sure we return str.
- s = s.encode('ascii')
+ # Unquoted strings are always safe, no need to mess with them
return s
- # Get rid of the quotes, we never want them in the output, and convert to
- # a raw string, un-escaping the backslashes.
- s = s[1:-1].decode('string-escape')
+ # The string will be of the form `""`, where is a
+ # backslash-escaped representation of the name of the file.
+ # Examples: "with\ttwo\ttabs" , "\303\261aca-utf8", "\361aca-latin1"
+
+ # Get rid of the quotes, we never want them in the output.
+ s = s[1:-1]
+
+ # Un-escape the backslashes.
+ # latin1 is ok to use here because in Python it just maps the code points
+ # 0-255 to the bytes 0x-0xff, which is what we expect.
+ s = s.encode("latin1").decode("unicode-escape")
+
+ # Convert to utf8.
+ s = s.encode("latin1").decode("utf8", errors='backslashreplace')
return s
@@ -337,13 +326,13 @@ class Repo:
cmd.raw(True)
cmd.batch = '%(objectsize)'
- if isinstance(ref, unicode):
- ref = ref.encode('utf8')
- cmd.stdin('%s:%s' % (ref, path))
+ # Format: [:
+ # Construct it in binary since the path might not be utf8.
+ cmd.stdin(ref.encode("utf8") + b":" + path)
out = cmd.run()
head = out.readline()
- if not head or head.strip().endswith('missing'):
+ if not head or head.strip().endswith(b'missing'):
return None
return Blob(out.read()[:int(head)])
diff --git a/utils.py b/utils.py
index ada9c7e..4e12b0d 100644
--- a/utils.py
+++ b/utils.py
@@ -108,15 +108,17 @@ def markdown_blob(s):
def embed_image_blob(fname, image_data):
mimetype = mimetypes.guess_type(fname)[0]
+ b64img = base64.b64encode(image_data).decode("ascii")
return ''.format( \
- mimetype, base64.b64encode(image_data))
+ mimetype, b64img)
def is_binary(s):
# Git considers a blob binary if NUL in first ~8KB, so do the same.
- return '\0' in s[:8192]
+ return b'\0' in s[:8192]
def hexdump(s):
graph = string.ascii_letters + string.digits + string.punctuation + ' '
+ s = s.decode("latin1")
offset = 0
while s:
t = s[:16]
diff --git a/views/tree-list.html b/views/tree-list.html
index 70f032a..ce5b0d6 100644
--- a/views/tree-list.html
+++ b/views/tree-list.html
@@ -1,5 +1,5 @@
]
-% key_func = lambda (t, n, s): (t != 'tree', n.raw)
+% key_func = lambda x: (x[0] != 'tree', x[1].raw)
% for type, name, size in sorted(tree.ls(dirname.raw), key = key_func):
% if type == "blob":