From 9ec2bde5c45c64f7fac432dbd3f23a1883d2b594 Mon Sep 17 00:00:00 2001
From: Alberto Bertogli <albertito@blitiri.com.ar>
Date: Mon, 26 Nov 2012 23:29:08 +0000
Subject: [PATCH] Only guess the lexer if the file starts with "#!"

The lexer guesser based on content is often wrong; to minimize the chances of
that happening, we only use it on files that start with "#!", for which it
usually has smarter rules.

Signed-off-by: Alberto Bertogli <albertito@blitiri.com.ar>
---
 utils.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/utils.py b/utils.py
index 039d02b..801580e 100644
--- a/utils.py
+++ b/utils.py
@@ -52,10 +52,15 @@ def colorize_blob(fname, s):
     try:
         lexer = lexers.guess_lexer_for_filename(fname, s, encoding = 'utf-8')
     except lexers.ClassNotFound:
-        try:
-            lexer = lexers.guess_lexer(s[:200], encoding = 'utf-8')
-        except lexers.ClassNotFound:
-            lexer = lexers.TextLexer(encoding = 'utf-8')
+        # Only try to guess lexers if the file starts with a shebang,
+        # otherwise it's likely a text file and guess_lexer() is prone to
+        # make mistakes with those.
+        lexer = lexers.TextLexer(encoding = 'utf-8')
+        if s.startswith('#!'):
+            try:
+                lexer = lexers.guess_lexer(s[:80], encoding = 'utf-8')
+            except lexers.ClassNotFound:
+                pass
 
     formatter = HtmlFormatter(encoding = 'utf-8',
                     cssclass = 'source_code',