pygmentize.cgi is a CGI for HTML-based syntax highlighting by Pygments, with almost all functionality of Pygments and for client’s simple usage, for host’s simple setup.
This is designed for the server with Python and Pygments, for the client without both.
Note that it has no caching mechanism, because it is designed for simply bundle to other product like WordPress’s plugin, so, maybe this is very slow and will use a lot of CPU, memory resource. Don’t use it on the site which has huge PV without your own caching mechanism. The developpers of like WordPress plugin should implement cache, for your user’s experiences.
Example Using with Ajax pygmentize_cgi_example.html is available.
HTML:
<select id="lexer"/>
script:
jQuery(function($) {
var syntaxService = "http://path/to/yourserver/cgi-bin/pygmentize.cgi";
$(document).ready(function() {
$.ajax({
url: syntaxService,
type: "POST", // MUST BE POST
data: JSON.stringify({
command: "lexers",
params: {
fields: ["name", "aliases"]
}
}),
success: function(data) {
res = "";
for (i in data) {
fullname = data[i][0];
name = data[i][1][0];
res += "<option value='" + name + "'";
if (name == 'pycon') {
res += " selected";
}
res += ">" + fullname + "</option>";
}
$('#lexer').html(res);
},
contentType: "application/json",
headers: {'Accept': 'application/json'},
dataType: "json",
});
});
});
HTML:
<textarea id="source" rows="12" cols="120"></textarea>
<div id="result"/>
script:
jQuery(function($) {
var syntaxService = "http://path/to/yourserver/cgi-bin/pygmentize.cgi";
function call_formatter(lang, code, fn)
{
$.ajax({
url: syntaxService,
type: "POST", // MUST BE POST
data: JSON.stringify({
command: "format",
params: {
lang: lang,
code: code,
}
}),
contentType: "application/json",
headers: {'Accept': 'text/html'},
success: fn,
dataType: "html",
});
}
function highlight() {
code = $('#source').val();
call_formatter($('#lexer').val(), code, function(result) {
$('#result').html(result);
});
}
$(document).ready(function() {
// ...
$('#source').on('change', function() { highlight(); });
$('#lexer').on('change', function() { highlight(); });
});
});
<?php
/*
* call pygmentize.cgi as CGI.
*/
function call_pygmentize_cgi_webservice($params, $outtype, &$error) {
$curl = curl_init();
// User Agent should be set against for WAF (Web Application Firewall)
// or some.htaccess settings.
$ua = '...';
$verbose_stream = tmpfile(); // receiver stream for cURL verbose output
curl_setopt_array($curl, array(
CURLOPT_HEADER => FALSE,
CURLOPT_RETURNTRANSFER => TRUE,
CURLOPT_VERBOSE => TRUE,
CURLOPT_STDERR => $verbose_stream,
CURLOPT_FAILONERROR => TRUE, // fail on error code >= 400
//CURLOPT_USERAGENT => $ua, // recommended
CURLOPT_REFERER => home_url('/'),
CURLOPT_ENCODING => "gzip,deflate", // if you want
));
$header = array(
"Accept-Charset: utf-8",
"Content-Type: application/json",
);
if ($outtype == "json") {
$header[] = "Accept: application/json";
} else {
$header[] = "Accept: text/html";
}
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($curl, CURLOPT_URL, $cgipath); // URL of pygmentize.cgi
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_POST, 1); // MUST BE POST
$post_data = json_encode($params);
curl_setopt($curl, CURLOPT_POSTFIELDS, $post_data);
$output = curl_exec($curl);
if ($output === FALSE) {
// any code what you want
$error = TRUE;
}
curl_close($curl);
return $output;
}
<?php
/*
* get lexers
*/
function get_pygments_lexers() {
$service_params = array(
"command" => "lexers",
"params" => array(
"fields" => array("name", "aliases")
)
);
$output = call_pygmentize_cgi_webservice(
$service_params, "json", $error);
$output = json_decode($output);
if (!$error) {
$result = "";
foreach ($output as $row) {
$fullname = $row[0];
$name = $row[1][0];
$result .= '<option value="';
$result .= $name;
$result .= '"';
if ($name == 'php') {
$result .= ' selected';
}
$result .= ">";
$result .= $fullname;
$result .= "</option>";
}
return $result;
}
return "";
}
$lexers = get_pygments_lexers();
?>
<select id="lang-pulldown"><?php echo $lexers ?></select>
<?php
function entities_to_unicode($str, $flags) {
$str = html_entity_decode($str, $flags, 'UTF-8');
$str = preg_replace_callback(
"/(&#[0-9]+;)/", function($m) {
return mb_convert_encoding($m[1], "UTF-8", "HTML-ENTITIES");
}, $str);
return $str;
}
// ...
$content = "<?php echo 'blah blah blah' ?>";
// ...
if (TRUE) { // if you want
// mainly, the purpose of this code block is to
// become friendly with visual editor.
$flags = ENT_QUOTES | ENT_COMPAT | ENT_HTML401;
$content = entities_to_unicode(
preg_replace(
array('/ /', '/'/'),
array(' ', "'"),
$content), $flags);
}
/* ------------------------------ */
$error = FALSE;
$service_params = array(
"command" => "format",
"params" => array(
'code' => $content,
'lang' => $lang,
)
);
$output = call_pygmentize_cgi_webservice(
$service_params, "html", $error);
Because pygments is written by Python, so maybe this makes no sense for many pythonista, but it is very useful for trouble-shooting.
>>> URL = "http://path/to/yourserver/cgi-bin/pygmentize.cgi"
>>> try:
... import urllib2
... from StringIO import StringIO as BytesIO
... except ImportError: # Python 3.x
... import urllib.request as urllib2
... from io import BytesIO
...
>>> import json
>>> import sys
>>> import zlib
>>> import gzip
>>> if sys.platform == "win32":
... import os, msvcrt
... msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
... msvcrt.setmode(sys.stderr.fileno(), os.O_BINARY)
...
16384
16384
>>> req = urllib2.Request(URL, data=json.dumps({
... "command": "lexers",
... "params": {"fields": ["name", "aliases", "filenames", "mimetypes"]},
... }))
>>> req.add_header("Accept", "application/json")
>>> req.add_header("Accept-Encoding", "gzip")
>>> req.add_header("Content-Type", "application/json")
>>> handler = urllib2.HTTPHandler()
>>> opener = urllib2.build_opener(handler)
>>> f = opener.open(req)
>>> print(f.headers)
Date: Wed, 08 Jul 2015 10:05:52 GMT
Server: Apache
Content-Encoding: gzip
Vary: Accept-Encoding,User-Agent
Content-Length: 7106
Cache-Control: max-age=31536000
Expires: Thu, 07 Jul 2016 10:05:52 GMT
Content-Type: application/json
Connection: close
>>> out = BytesIO()
>>> out.write(f.read())
>>> out.seek(0)
>>> gzf = gzip.GzipFile(fileobj=out, mode='r')
>>> d = gzf.read()
>>> json.loads(d)[0]
[u'ABAP', [u'abap'], [u'*.abap'], [u'text/x-abap']]
>>> json.loads(d)[1]
[u'ANTLR', [u'antlr'], [], []]
>>> json.loads(d)[2]
[u'ANTLR With ActionScript Target', [u'antlr-as', u'antlr-actionscript'], [u'*.G', u'*.g'], []]
>>> URL = "http://path/to/yourserver/cgi-bin/pygmentize.cgi"
>>> try:
... import urllib2
... from StringIO import StringIO as BytesIO
... except ImportError: # Python 3.x
... import urllib.request as urllib2
... from io import BytesIO
...
>>> import json
>>> import sys
>>> import zlib
>>> import gzip
>>> if sys.platform == "win32":
... import os, msvcrt
... msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
... msvcrt.setmode(sys.stderr.fileno(), os.O_BINARY)
...
16384
16384
>>>
>>> code = """
... >>> import os
... >>> os.listdir(".") # list current dir
... ['hoge.cpp', 'hoge.pyx', 'hoge.pxd', 'hoge.so']
... """
>>> req = urllib2.Request(URL, data=json.dumps({
... "command": "format",
... "params": {
... "code": code,
... "lang": "pycon",
... "linenos": "inline",
... }}))
>>> req.add_header("Accept", "text/html")
>>> req.add_header("Accept-Encoding", "deflate")
>>> req.add_header("Content-Type", "application/json")
>>> handler = urllib2.HTTPHandler()
>>> opener = urllib2.build_opener(handler)
>>> f = opener.open(req)
>>> print(f.headers)
Date: Wed, 08 Jul 2015 10:40:02 GMT
Server: Apache
Content-Encoding: deflate
Content-Length: 203
Cache-Control: max-age=3600
Expires: Wed, 08 Jul 2015 11:40:02 GMT
Vary: User-Agent
Content-Type: text/html; charset=iso-8859-1
Connection: close
>>> d = zlib.decompress(f.read())
>>> d
'<div class="highlight"><pre><span class="lineno">1 </span><span class="gp">>>> </span><span class="kn">import</span> <span class="nn">os</span>\n<span class="lineno">2 </span><span class="gp
">>>> </span><span class="n">os</span><span class="o">.</span><span class="n">listdir</span><span class="p">(</span><span class="s">"."</span><span class="p">)</span> <span class="
c1"># list current dir</span>\n<span class="lineno">3 </span><span class="go">['hoge.cpp', 'hoge.pyx', 'hoge.pxd', 'hoge.so']</span>\n</pre></div>\n'
>>>
- maybe upper Python 2.7
- or Python 3.x
The HTTP server that has capability of executing borne-shell script as CGI.
distribution files of pygmentize.cgi
Simply, you just need:
Optionaly, if you need responce with document page(this page) when the client request to pygmentize.cgi with HTTP GET:
If you are lucky, just deploy distribution files of pygmentize.cgi to proper location (commonly /cgi-bin/), and just add permission for executing to pygmentize.cgi, that’s the all.
Please see your server documents how to use CGI on your server, and I don’t explain like .htaccess, etc.
Like a rental server, for the possible coexistence of multiple python versions, python’s name or path is sometimes abnormal, like /usr/opt/bin/python2.7.
If so, please edit pygmentize.cgi.cfg:
python=/usr/opt/bin/python2.7
PYTHONPATH=
If Pygments is installed to different location with normal, or you want to use specific version, and so if pygmentize.cgi can’t find it, edit pygmentize.cgi.cfg, too.
python=/usr/opt/bin/python2.7
PYTHONPATH=/home/youraccount/lib/python/site-packages
If you have access-rights system-widely, i.e. you have the root account, just install it. Ordinally you can like this:
youraccount@yourserver: youraccount$ su
password: ********
root@yourserver: youraccount# pip install pygments
Or if you have no pip, you can install manually from source tree:
youraccount@yourserver: youraccount$ ls
Pygments-2.0.2.tar.gz
youraccount@yourserver: youraccount$ tar zxvf Pygments-2.0.2.tar.gz
...
youraccount@yourserver: youraccount$ cd Pygments-2.0.2
youraccount@yourserver: Pygments-2.0.2$ python setup.py build
...
youraccount@yourserver: Pygments-2.0.2$ su
password: ********
root@yourserver: Pygments-2.0.2# python setup.py install
...
If you can’t access for writing of system location like /usr, you can deploy Pygments packages and modules to your favor location, so edit pygmentize.cgi.cfg, too.
python=/usr/opt/bin/python2.7
PYTHONPATH=/home/youraccount/lib/python/site-packages
BTW, if you have downloaded Pygments source tree, and you want to upload to your server, you don’t need to upload whole tree but just need to upload pygments subdir.
The query parameters for pygmentize.cgi is structured with json:
{
"command": "format",
"params": {
}
}
command is format or lexers. params is diference between format and lexers. See below in this document.
You can control the behaviour of CGI via some of HTTP layer headers, for example (in Python):
>>> req = urllib2.Request(URL, data=json.dumps({
... "command": "lexers",
... "params": {"fields": ["name", "aliases", "filenames", "mimetypes"]},
... }))
>>> # we need compress with gzip
...
>>> req.add_header("Accept-Encoding", "gzip")
The only params for "command": "lexers" is fields.
Internally, this functionality is owed to calling like this:
import pygments
from pygments.lexers import get_all_lexers
for name, aliases, filenames, mimetypes in get_all_lexers():
# ...
i.e, fields parameter just relate it directly.
You can choise fields from name, aliases, filenames, and mimetypes:
>>> req = urllib2.Request(URL, data=json.dumps({
... "command": "lexers",
... "params": {"fields": ["name", "aliases", "filenames", "mimetypes"]},
... }))
In most cases, maybe you just want to build pulldown for selection of lexer, if so, you just need name and aliases.
params for "command": "format" is almost the same as pygments.formatters.html.HTMLFormatter (see details in pygments source).
Next section is the document of pygments.formatters.html.HTMLFormatter, not of pygmentize.cgi. But the difference is a little and tiny, so, I will describe such differences later.
Format tokens as HTML 4 <span> tags within a <pre> tag, wrapped in a <div> tag. The <div>‘s CSS class can be set by the cssclass option.
If the linenos option is set to "table", the <pre> is additionally wrapped inside a <table> which has one row and two cells: one containing the line numbers and one containing the code. Example:
<div class="highlight" >
<table><tr>
<td class="linenos" title="click to toggle"
onclick="with (this.firstChild.style)
{ display = (display == '') ? 'none' : '' }">
<pre>1
2</pre>
</td>
<td class="code">
<pre><span class="Ke">def </span><span class="NaFu">foo</span>(bar):
<span class="Ke">pass</span>
</pre>
</td>
</tr></table></div>
(whitespace added to improve clarity).
Wrapping can be disabled using the nowrap option.
A list of lines can be specified using the hl_lines option to make these lines highlighted (as of Pygments 0.11).
With the full option, a complete HTML 4 document is output, including the style definitions inside a <style> tag, or in a separate file if the cssfile option is given.
When tagsfile is set to the path of a ctags index file, it is used to generate hyperlinks from names to their definition. You must enable anchorlines and run ctags with the -n option for this to work. The python-ctags module from PyPI must be installed to use this feature; otherwise a RuntimeError will be raised.
The get_style_defs(arg=’‘) method of a HtmlFormatter returns a string containing CSS rules for the CSS classes used by the formatter. The argument arg can be used to specify additional CSS selectors that are prepended to the classes. A call fmter.get_style_defs(‘td .code’) would result in the following CSS classes:
td .code .kw { font-weight: bold; color: #00FF00 }
td .code .cm { color: #999999 }
...
If you have Pygments 0.6 or higher, you can also pass a list or tuple to the get_style_defs() method to request multiple prefixes for the tokens:
formatter.get_style_defs(['div.syntax pre', 'pre.syntax'])
The output would then look like this:
div.syntax pre .kw,
pre.syntax .kw { font-weight: bold; color: #00FF00 }
div.syntax pre .cm,
pre.syntax .cm { color: #999999 }
...
Additional options accepted:
CSS class for the wrapping <div> tag (default: 'highlight'). If you set this option, the default selector for get_style_defs() will be this class.
New in version 0.9: If you select the 'table' line numbers, the wrapping table will have a CSS class of this string plus 'table', the default is accordingly 'highlighttable'.
Inline CSS styles for the <pre> tag (default: '').
New in version 0.11.
If the full option is true and this option is given, it must be the name of an external file. If the filename does not include an absolute path, the file’s path will be assumed to be relative to the main output file’s path, if the latter can be found. The stylesheet is then written to this file instead of the HTML file.
New in version 0.6.
If cssfile is given and the specified file exists, the css file will not be overwritten. This allows the use of the full option in combination with a user specified css file. Default is False.
New in version 1.1.
If set to 'table', output line numbers as a table with two cells, one containing the line numbers, the other the whole code. This is copy-and-paste-friendly, but may cause alignment problems with some browsers or fonts. If set to 'inline', the line numbers will be integrated in the <pre> tag that contains the code (that setting is new in Pygments 0.8).
For compatibility with Pygments 0.7 and earlier, every true value except 'inline' means the same as 'table' (in particular, that means also True).
The default value is False, which means no line numbers at all.
Note: with the default (“table”) line number mechanism, the line numbers and code can have different line heights in Internet Explorer unless you give the enclosing <pre> tags an explicit line-height CSS property (you get the default line spacing with line-height: 125%).
Specify a list of lines to be highlighted.
New in version 0.11.
If set to True, the formatter won’t output the background color for the wrapping element (this automatically defaults to False when there is no wrapping element [eg: no argument for the get_syntax_defs method given]) (default: False).
New in version 0.6.
This string is output between lines of code. It defaults to "\n", which is enough to break a line inside <pre> tags, but you can e.g. set it to "<br>" to get HTML line breaks.
New in version 0.7.
If set to a nonempty string, e.g. foo, the formatter will wrap each output line in an anchor tag with a name of foo-linenumber. This allows easy linking to certain lines.
New in version 0.9.
If set to a nonempty string, e.g. foo, the formatter will wrap each output line in a span tag with an id of foo-linenumber. This allows easy access to lines via javascript.
New in version 1.6.
If set to the path of a ctags file, wrap names in anchor tags that link to their definitions. lineanchors should be used, and the tags file should specify line numbers (see the -n option to ctags).
New in version 1.6.
A string formatting pattern used to generate links to ctags definitions. Available variables are %(path)s, %(fname)s and %(fext)s. Defaults to an empty string, resulting in just #prefix-number links.
New in version 1.6.
Some of params are non-sence for WEB-based formatter, so pygmentize.cgi don’t support those. These are:
You can specify hl_lines with special format:
>>> req = urllib2.Request(URL, data=json.dumps({
... "command": "format",
... "params": {
... "code": code,
... "lang": "pycon",
... "hl_lines": "1-10, 20, 30-40, range(40, 50, 5)",
... }}))
Now, modern browsers such as Google Chrome will always request with Accept-Encoding: gzip;deflate and you can’t change this in Ajax library, so you don’t warry about it if you are in Ajax.
But, if you will call pygmentize.cgi via some programming languages such as PHP, python, they don’t add this header automatically.
pygmentize.cgi now accept only gzip, deflate (and of course identity).
You can choise Content-Type of the response either application/json or text/html.
In “command”: “format” case, application/json response makes no sense (just string of json).
In “command”: “lexers” case,
When Accept is not application/json, you can change charset of html document (default: utf-8). If Accept is application/json, this is ignored (always encode with utf-8).