xed/plugins/externaltools/tools/linkparsing.py

232 lines
7.0 KiB
Python
Raw Normal View History

2011-11-07 13:46:58 -06:00
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2010 Per Arneng <per.arneng@anyplanet.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
import re
class Link:
"""
This class represents a file link from within a string given by the
output of some software tool. A link contains a reference to a file, the
line number within the file and the boundaries within the given output
string that should be marked as a link.
"""
def __init__(self, path, line_nr, start, end):
"""
path -- the path of the file (that could be extracted)
line_nr -- the line nr of the specified file
start -- the index within the string that the link starts at
end -- the index within the string where the link ends at
"""
self.path = path
self.line_nr = int(line_nr)
self.start = start
self.end = end
def __repr__(self):
return "%s[%s](%s:%s)" % (self.path, self.line_nr,
self.start, self.end)
class LinkParser:
"""
Parses a text using different parsing providers with the goal of finding one
or more file links within the text. A typical example could be the output
from a compiler that specifies an error in a specific file. The path of the
file, the line nr and some more info is then returned so that it can be used
to be able to navigate from the error output in to the specific file.
The actual work of parsing the text is done by instances of classes that
inherits from AbstractLinkParser or by regular expressions. To add a new
parser just create a class that inherits from AbstractLinkParser and then
register in this class cunstructor using the method add_parser. If you want
to add a regular expression then just call add_regexp in this class
constructor and provide your regexp string as argument.
"""
def __init__(self):
self._providers = []
self.add_regexp(REGEXP_STANDARD)
self.add_regexp(REGEXP_PYTHON)
self.add_regexp(REGEXP_VALAC)
self.add_regexp(REGEXP_BASH)
self.add_regexp(REGEXP_RUBY)
self.add_regexp(REGEXP_PERL)
self.add_regexp(REGEXP_MCS)
def add_parser(self, parser):
self._providers.append(parser)
def add_regexp(self, regexp):
"""
Adds a regular expression string that should match a link using
re.MULTILINE and re.VERBOSE regexp. The area marked as a link should
be captured by a group named lnk. The path of the link should be
captured by a group named pth. The line number should be captured by
a group named ln. To read more about this look at the documentation
for the RegexpLinkParser constructor.
"""
self.add_parser(RegexpLinkParser(regexp))
def parse(self, text):
"""
Parses the given text and returns a list of links that are parsed from
the text. This method delegates to parser providers that can parse
output from different kinds of formats. If no links are found then an
empty list is returned.
text -- the text to scan for file links. 'text' can not be None.
"""
if text is None:
raise ValueError("text can not be None")
links = []
for provider in self._providers:
links.extend(provider.parse(text))
return links
class AbstractLinkParser(object):
"""The "abstract" base class for link parses"""
def parse(self, text):
"""
This method should be implemented by subclasses. It takes a text as
argument (never None) and then returns a list of Link objects. If no
links are found then an empty list is expected. The Link class is
defined in this module. If you do not override this method then a
NotImplementedError will be thrown.
text -- the text to parse. This argument is never None.
"""
raise NotImplementedError("need to implement a parse method")
class RegexpLinkParser(AbstractLinkParser):
"""
A class that represents parsers that only use one single regular expression.
It can be used by subclasses or by itself. See the constructor documentation
for details about the rules surrouning the regexp.
"""
def __init__(self, regex):
"""
Creates a new RegexpLinkParser based on the given regular expression.
The regular expression is multiline and verbose (se python docs on
compilation flags). The regular expression should contain three named
capturing groups 'lnk', 'pth' and 'ln'. 'lnk' represents the area wich
should be marked as a link in the text. 'pth' is the path that should
be looked for and 'ln' is the line number in that file.
"""
self.re = re.compile(regex, re.MULTILINE | re.VERBOSE)
def parse(self, text):
links = []
for m in re.finditer(self.re, text):
path = m.group("pth")
line_nr = m.group("ln")
start = m.start("lnk")
end = m.end("lnk")
link = Link(path, line_nr, start, end)
links.append(link)
return links
# gcc 'test.c:13: warning: ...'
# javac 'Test.java:13: ...'
# ruby 'test.rb:5: ...'
# scalac 'Test.scala:5: ...'
# 6g (go) 'test.go:9: ...'
REGEXP_STANDARD = r"""
^
(?P<lnk>
(?P<pth> .*[a-z0-9] )
\:
(?P<ln> \d+)
)
\:\s"""
# python ' File "test.py", line 13'
REGEXP_PYTHON = r"""
^\s\sFile\s
(?P<lnk>
\"
(?P<pth> [^\"]+ )
\",\sline\s
(?P<ln> \d+ )
),"""
# python 'test.sh: line 5:'
REGEXP_BASH = r"""
^(?P<lnk>
(?P<pth> .* )
\:\sline\s
(?P<ln> \d+ )
)\:"""
# valac 'Test.vala:13.1-13.3: ...'
REGEXP_VALAC = r"""
^(?P<lnk>
(?P<pth>
.*vala
)
\:
(?P<ln>
\d+
)
\.\d+-\d+\.\d+
)\: """
#ruby
#test.rb:5: ...
# from test.rb:3:in `each'
# fist line parsed by REGEXP_STANDARD
REGEXP_RUBY = r"""
^\s+from\s
(?P<lnk>
(?P<pth>
.*
)
\:
(?P<ln>
\d+
)
)"""
# perl 'syntax error at test.pl line 88, near "$fake_var'
REGEXP_PERL = r"""
\sat\s
(?P<lnk>
(?P<pth> .* )
\sline\s
(?P<ln> \d+ )
)"""
# mcs (C#) 'Test.cs(12,7): error CS0103: The name `fakeMethod'
REGEXP_MCS = r"""
^
(?P<lnk>
(?P<pth> .*\.[cC][sS] )
\(
(?P<ln> \d+ )
,\d+\)
)
\:\s
"""
# ex:ts=4:et: