Cython has moved to github.
cython-devel
view Cython/Compiler/Scanning.py @ 3068:f9baa86d0372
support 'from __future__ import print_function' in Py2.6+
| author | Stefan Behnel <scoder@users.berlios.de> |
|---|---|
| date | Wed Mar 10 08:46:58 2010 +0100 (2 years ago) |
| parents | 640a4fc806c3 |
| children | edf737ca860f |
line source
1 #
2 # Cython Scanner
3 #
5 import os
6 import platform
7 import stat
8 import sys
9 import codecs
10 from time import time
12 import cython
13 cython.declare(EncodedString=object, string_prefixes=object, raw_prefixes=object, IDENT=object,
14 print_function=object)
16 from Cython import Plex, Utils
17 from Cython.Plex.Scanners import Scanner
18 from Cython.Plex.Errors import UnrecognizedInput
19 from Errors import CompileError, error
20 from Lexicon import string_prefixes, raw_prefixes, make_lexicon, IDENT
21 from Future import print_function
23 from StringEncoding import EncodedString
25 debug_scanner = 0
26 trace_scanner = 0
27 scanner_debug_flags = 0
28 scanner_dump_file = None
30 lexicon = None
32 def get_lexicon():
33 global lexicon
34 if not lexicon:
35 lexicon = make_lexicon()
36 return lexicon
38 #------------------------------------------------------------------
40 reserved_words = [
41 "global", "include", "ctypedef", "cdef", "def", "class",
42 "print", "del", "pass", "break", "continue", "return",
43 "raise", "import", "exec", "try", "except", "finally",
44 "while", "if", "elif", "else", "for", "in", "assert",
45 "and", "or", "not", "is", "in", "lambda", "from", "yield",
46 "cimport", "by", "with", "cpdef", "DEF", "IF", "ELIF", "ELSE"
47 ]
49 class Method(object):
51 def __init__(self, name):
52 self.name = name
53 self.__name__ = name # for Plex tracing
55 def __call__(self, stream, text):
56 return getattr(stream, self.name)(text)
58 #------------------------------------------------------------------
60 def build_resword_dict():
61 d = {}
62 for word in reserved_words:
63 d[word] = 1
64 return d
66 cython.declare(resword_dict=object)
67 resword_dict = build_resword_dict()
69 #------------------------------------------------------------------
71 class CompileTimeScope(object):
73 def __init__(self, outer = None):
74 self.entries = {}
75 self.outer = outer
77 def declare(self, name, value):
78 self.entries[name] = value
80 def lookup_here(self, name):
81 return self.entries[name]
83 def __contains__(self, name):
84 return name in self.entries
86 def lookup(self, name):
87 try:
88 return self.lookup_here(name)
89 except KeyError:
90 outer = self.outer
91 if outer:
92 return outer.lookup(name)
93 else:
94 raise
96 def initial_compile_time_env():
97 benv = CompileTimeScope()
98 names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE',
99 'UNAME_VERSION', 'UNAME_MACHINE')
100 for name, value in zip(names, platform.uname()):
101 benv.declare(name, value)
102 try:
103 import __builtin__ as builtins
104 except ImportError:
105 import builtins
106 names = ('False', 'True',
107 'abs', 'bool', 'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate',
108 'float', 'hash', 'hex', 'int', 'len', 'list', 'long', 'map', 'max', 'min',
109 'oct', 'ord', 'pow', 'range', 'reduce', 'repr', 'round', 'slice', 'str',
110 'sum', 'tuple', 'xrange', 'zip')
111 for name in names:
112 try:
113 benv.declare(name, getattr(builtins, name))
114 except AttributeError:
115 # ignore, likely Py3
116 pass
117 denv = CompileTimeScope(benv)
118 return denv
120 #------------------------------------------------------------------
122 class SourceDescriptor(object):
123 """
124 A SourceDescriptor should be considered immutable.
125 """
126 _escaped_description = None
127 _cmp_name = ''
128 def __str__(self):
129 assert False # To catch all places where a descriptor is used directly as a filename
131 def get_escaped_description(self):
132 if self._escaped_description is None:
133 self._escaped_description = \
134 self.get_description().encode('ASCII', 'replace').decode("ASCII")
135 return self._escaped_description
137 def __gt__(self, other):
138 # this is only used to provide some sort of order
139 try:
140 return self._cmp_name > other._cmp_name
141 except AttributeError:
142 return False
144 def __lt__(self, other):
145 # this is only used to provide some sort of order
146 try:
147 return self._cmp_name < other._cmp_name
148 except AttributeError:
149 return False
151 def __le__(self, other):
152 # this is only used to provide some sort of order
153 try:
154 return self._cmp_name <= other._cmp_name
155 except AttributeError:
156 return False
158 class FileSourceDescriptor(SourceDescriptor):
159 """
160 Represents a code source. A code source is a more generic abstraction
161 for a "filename" (as sometimes the code doesn't come from a file).
162 Instances of code sources are passed to Scanner.__init__ as the
163 optional name argument and will be passed back when asking for
164 the position()-tuple.
165 """
166 def __init__(self, filename):
167 self.filename = filename
168 self._cmp_name = filename
170 def get_lines(self, encoding=None, error_handling=None):
171 if not encoding:
172 return Utils.open_source_file(self.filename)
173 else:
174 return codecs.open(self.filename, "rU", encoding=encoding,
175 errors=error_handling)
177 def get_description(self):
178 return self.filename
180 def get_filenametable_entry(self):
181 return self.filename
183 def __eq__(self, other):
184 return isinstance(other, FileSourceDescriptor) and self.filename == other.filename
186 def __hash__(self):
187 return hash(self.filename)
189 def __repr__(self):
190 return "<FileSourceDescriptor:%s>" % self.filename
192 class StringSourceDescriptor(SourceDescriptor):
193 """
194 Instances of this class can be used instead of a filenames if the
195 code originates from a string object.
196 """
197 def __init__(self, name, code):
198 self.name = name
199 self.codelines = [x + "\n" for x in code.split("\n")]
200 self._cmp_name = name
202 def get_lines(self, encoding=None, error_handling=None):
203 if not encoding:
204 return self.codelines
205 else:
206 return [ line.encode(encoding, error_handling).decode(encoding)
207 for line in self.codelines ]
209 def get_description(self):
210 return self.name
212 def get_filenametable_entry(self):
213 return "stringsource"
215 def __hash__(self):
216 return hash(self.name)
218 def __eq__(self, other):
219 return isinstance(other, StringSourceDescriptor) and self.name == other.name
221 def __repr__(self):
222 return "<StringSourceDescriptor:%s>" % self.name
224 #------------------------------------------------------------------
226 class PyrexScanner(Scanner):
227 # context Context Compilation context
228 # included_files [string] Files included with 'include' statement
229 # compile_time_env dict Environment for conditional compilation
230 # compile_time_eval boolean In a true conditional compilation context
231 # compile_time_expr boolean In a compile-time expression context
233 def __init__(self, file, filename, parent_scanner = None,
234 scope = None, context = None, source_encoding=None, parse_comments=True, initial_pos=None):
235 Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
236 if parent_scanner:
237 self.context = parent_scanner.context
238 self.included_files = parent_scanner.included_files
239 self.compile_time_env = parent_scanner.compile_time_env
240 self.compile_time_eval = parent_scanner.compile_time_eval
241 self.compile_time_expr = parent_scanner.compile_time_expr
242 else:
243 self.context = context
244 self.included_files = scope.included_files
245 self.compile_time_env = initial_compile_time_env()
246 self.compile_time_eval = 1
247 self.compile_time_expr = 0
248 self.parse_comments = parse_comments
249 self.source_encoding = source_encoding
250 self.trace = trace_scanner
251 self.indentation_stack = [0]
252 self.indentation_char = None
253 self.bracket_nesting_level = 0
254 self.begin('INDENT')
255 self.sy = ''
256 self.next()
258 def commentline(self, text):
259 if self.parse_comments:
260 self.produce('commentline', text)
262 def current_level(self):
263 return self.indentation_stack[-1]
265 def open_bracket_action(self, text):
266 self.bracket_nesting_level = self.bracket_nesting_level + 1
267 return text
269 def close_bracket_action(self, text):
270 self.bracket_nesting_level = self.bracket_nesting_level - 1
271 return text
273 def newline_action(self, text):
274 if self.bracket_nesting_level == 0:
275 self.begin('INDENT')
276 self.produce('NEWLINE', '')
278 string_states = {
279 "'": 'SQ_STRING',
280 '"': 'DQ_STRING',
281 "'''": 'TSQ_STRING',
282 '"""': 'TDQ_STRING'
283 }
285 def begin_string_action(self, text):
286 if text[:1] in string_prefixes:
287 text = text[1:]
288 if text[:1] in raw_prefixes:
289 text = text[1:]
290 self.begin(self.string_states[text])
291 self.produce('BEGIN_STRING')
293 def end_string_action(self, text):
294 self.begin('')
295 self.produce('END_STRING')
297 def unclosed_string_action(self, text):
298 self.end_string_action(text)
299 self.error("Unclosed string literal")
301 def indentation_action(self, text):
302 self.begin('')
303 # Indentation within brackets should be ignored.
304 #if self.bracket_nesting_level > 0:
305 # return
306 # Check that tabs and spaces are being used consistently.
307 if text:
308 c = text[0]
309 #print "Scanner.indentation_action: indent with", repr(c) ###
310 if self.indentation_char is None:
311 self.indentation_char = c
312 #print "Scanner.indentation_action: setting indent_char to", repr(c)
313 else:
314 if self.indentation_char != c:
315 self.error("Mixed use of tabs and spaces")
316 if text.replace(c, "") != "":
317 self.error("Mixed use of tabs and spaces")
318 # Figure out how many indents/dedents to do
319 current_level = self.current_level()
320 new_level = len(text)
321 #print "Changing indent level from", current_level, "to", new_level ###
322 if new_level == current_level:
323 return
324 elif new_level > current_level:
325 #print "...pushing level", new_level ###
326 self.indentation_stack.append(new_level)
327 self.produce('INDENT', '')
328 else:
329 while new_level < self.current_level():
330 #print "...popping level", self.indentation_stack[-1] ###
331 self.indentation_stack.pop()
332 self.produce('DEDENT', '')
333 #print "...current level now", self.current_level() ###
334 if new_level != self.current_level():
335 self.error("Inconsistent indentation")
337 def eof_action(self, text):
338 while len(self.indentation_stack) > 1:
339 self.produce('DEDENT', '')
340 self.indentation_stack.pop()
341 self.produce('EOF', '')
343 def next(self):
344 try:
345 sy, systring = self.read()
346 except UnrecognizedInput:
347 self.error("Unrecognized character")
348 if sy == IDENT:
349 if systring in resword_dict:
350 if systring == 'print' and \
351 print_function in self.context.future_directives:
352 systring = EncodedString(systring)
353 else:
354 sy = systring
355 else:
356 systring = EncodedString(systring)
357 self.sy = sy
358 self.systring = systring
359 if False: # debug_scanner:
360 _, line, col = self.position()
361 if not self.systring or self.sy == self.systring:
362 t = self.sy
363 else:
364 t = "%s %s" % (self.sy, self.systring)
365 print("--- %3d %2d %s" % (line, col, t))
367 def peek(self):
368 saved = self.sy, self.systring
369 self.next()
370 next = self.sy, self.systring
371 self.unread(*next)
372 self.sy, self.systring = saved
373 return next
375 def put_back(self, sy, systring):
376 self.unread(self.sy, self.systring)
377 self.sy = sy
378 self.systring = systring
380 def unread(self, token, value):
381 # This method should be added to Plex
382 self.queue.insert(0, (token, value))
384 def error(self, message, pos = None, fatal = True):
385 if pos is None:
386 pos = self.position()
387 if self.sy == 'INDENT':
388 err = error(pos, "Possible inconsistent indentation")
389 err = error(pos, message)
390 if fatal: raise err
392 def expect(self, what, message = None):
393 if self.sy == what:
394 self.next()
395 else:
396 self.expected(what, message)
398 def expect_keyword(self, what, message = None):
399 if self.sy == IDENT and self.systring == what:
400 self.next()
401 else:
402 self.expected(what, message)
404 def expected(self, what, message = None):
405 if message:
406 self.error(message)
407 else:
408 self.error("Expected '%s'" % what)
410 def expect_indent(self):
411 self.expect('INDENT',
412 "Expected an increase in indentation level")
414 def expect_dedent(self):
415 self.expect('DEDENT',
416 "Expected a decrease in indentation level")
418 def expect_newline(self, message = "Expected a newline"):
419 # Expect either a newline or end of file
420 if self.sy != 'EOF':
421 self.expect('NEWLINE', message)
