From 9ec2bde5c45c64f7fac432dbd3f23a1883d2b594 Mon Sep 17 00:00:00 2001 From: Alberto Bertogli Date: Mon, 26 Nov 2012 23:29:08 +0000 Subject: [PATCH] Only guess the lexer if the file starts with "#!" The lexer guesser based on content is often wrong; to minimize the chances of that happening, we only use it on files that start with "#!", for which it usually has smarter rules. Signed-off-by: Alberto Bertogli --- utils.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/utils.py b/utils.py index 039d02b..801580e 100644 --- a/utils.py +++ b/utils.py @@ -52,10 +52,15 @@ def colorize_blob(fname, s): try: lexer = lexers.guess_lexer_for_filename(fname, s, encoding = 'utf-8') except lexers.ClassNotFound: - try: - lexer = lexers.guess_lexer(s[:200], encoding = 'utf-8') - except lexers.ClassNotFound: - lexer = lexers.TextLexer(encoding = 'utf-8') + # Only try to guess lexers if the file starts with a shebang, + # otherwise it's likely a text file and guess_lexer() is prone to + # make mistakes with those. + lexer = lexers.TextLexer(encoding = 'utf-8') + if s.startswith('#!'): + try: + lexer = lexers.guess_lexer(s[:80], encoding = 'utf-8') + except lexers.ClassNotFound: + pass formatter = HtmlFormatter(encoding = 'utf-8', cssclass = 'source_code',