Only guess the lexer if the file starts with "#!"

The lexer guesser based on content is often wrong; to minimize the chances of
that happening, we only use it on files that start with "#!", for which it
usually has smarter rules.

Signed-off-by: Alberto Bertogli <albertito@blitiri.com.ar>
This commit is contained in:
Alberto Bertogli 2012-11-26 23:29:08 +00:00
parent 36db9cc0ee
commit 9ec2bde5c4

@ -52,10 +52,15 @@ def colorize_blob(fname, s):
try:
lexer = lexers.guess_lexer_for_filename(fname, s, encoding = 'utf-8')
except lexers.ClassNotFound:
try:
lexer = lexers.guess_lexer(s[:200], encoding = 'utf-8')
except lexers.ClassNotFound:
lexer = lexers.TextLexer(encoding = 'utf-8')
# Only try to guess lexers if the file starts with a shebang,
# otherwise it's likely a text file and guess_lexer() is prone to
# make mistakes with those.
lexer = lexers.TextLexer(encoding = 'utf-8')
if s.startswith('#!'):
try:
lexer = lexers.guess_lexer(s[:80], encoding = 'utf-8')
except lexers.ClassNotFound:
pass
formatter = HtmlFormatter(encoding = 'utf-8',
cssclass = 'source_code',