Cython has moved to github.

cython-devel

view Cython/Compiler/Lexicon.py @ 3484:b254c4dbc089

support for some Python 3 (or 2.6+) syntax features (found by test_grammar.py in Py3.1.1):
- oct/bin notation: 0o12345, 0b10101
- function annotations (only pure syntax support, not currently used)
also: allow decorators on inner functions
author Stefan Behnel <scoder@users.berlios.de>
date Thu Jan 28 23:05:39 2010 +0100 (2 years ago)
parents 5404cf15e29e
children 3e25233bbcc7
line source
1 #
2 # Pyrex Scanner - Lexical Definitions
3 #
4 # Changing anything in this file will cause Lexicon.pickle
5 # to be rebuilt next time pyrexc is run.
6 #
8 raw_prefixes = "rR"
9 string_prefixes = "cCuUbB"
10 IDENT = 'IDENT'
12 def make_lexicon():
13 from Cython.Plex import \
14 Str, Any, AnyBut, AnyChar, Rep, Rep1, Opt, Bol, Eol, Eof, \
15 TEXT, IGNORE, State, Lexicon
16 from Scanning import Method
18 letter = Any("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_")
19 digit = Any("0123456789")
20 bindigit = Any("01")
21 octdigit = Any("01234567")
22 hexdigit = Any("0123456789ABCDEFabcdef")
23 indentation = Bol + Rep(Any(" \t"))
25 decimal = Rep1(digit)
26 dot = Str(".")
27 exponent = Any("Ee") + Opt(Any("+-")) + decimal
28 decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal)
30 name = letter + Rep(letter | digit)
31 intconst = decimal | (Str("0") + ((Any("Xx") + Rep1(hexdigit)) |
32 (Any("Oo") + Rep1(octdigit)) |
33 (Any("Bb") + Rep1(bindigit)) ))
34 intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu")))
35 intliteral = intconst + intsuffix
36 fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent)
37 imagconst = (intconst | fltconst) + Any("jJ")
39 sq_string = (
40 Str("'") +
41 Rep(AnyBut("\\\n'") | (Str("\\") + AnyChar)) +
42 Str("'")
43 )
45 dq_string = (
46 Str('"') +
47 Rep(AnyBut('\\\n"') | (Str("\\") + AnyChar)) +
48 Str('"')
49 )
51 non_sq = AnyBut("'") | (Str('\\') + AnyChar)
52 tsq_string = (
53 Str("'''")
54 + Rep(non_sq | (Str("'") + non_sq) | (Str("''") + non_sq))
55 + Str("'''")
56 )
58 non_dq = AnyBut('"') | (Str('\\') + AnyChar)
59 tdq_string = (
60 Str('"""')
61 + Rep(non_dq | (Str('"') + non_dq) | (Str('""') + non_dq))
62 + Str('"""')
63 )
65 beginstring = Opt(Any(string_prefixes)) + Opt(Any(raw_prefixes)) + (Str("'") | Str('"') | Str("'''") | Str('"""'))
66 two_oct = octdigit + octdigit
67 three_oct = octdigit + octdigit + octdigit
68 two_hex = hexdigit + hexdigit
69 four_hex = two_hex + two_hex
70 escapeseq = Str("\\") + (two_oct | three_oct |
71 Str('u') + four_hex | Str('x') + two_hex |
72 Str('U') + four_hex + four_hex | AnyChar)
75 deco = Str("@")
76 bra = Any("([{")
77 ket = Any(")]}")
78 punct = Any(":,;+-*/|&<>=.%`~^?")
79 diphthong = Str("==", "<>", "!=", "<=", ">=", "<<", ">>", "**", "//",
80 "+=", "-=", "*=", "/=", "%=", "|=", "^=", "&=",
81 "<<=", ">>=", "**=", "//=", "->")
82 spaces = Rep1(Any(" \t\f"))
83 escaped_newline = Str("\\\n")
84 lineterm = Eol + Opt(Str("\n"))
86 comment = Str("#") + Rep(AnyBut("\n"))
88 return Lexicon([
89 (name, IDENT),
90 (intliteral, 'INT'),
91 (fltconst, 'FLOAT'),
92 (imagconst, 'IMAG'),
93 (deco, 'DECORATOR'),
94 (punct | diphthong, TEXT),
96 (bra, Method('open_bracket_action')),
97 (ket, Method('close_bracket_action')),
98 (lineterm, Method('newline_action')),
100 #(stringlit, 'STRING'),
101 (beginstring, Method('begin_string_action')),
103 (comment, IGNORE),
104 (spaces, IGNORE),
105 (escaped_newline, IGNORE),
107 State('INDENT', [
108 (comment + lineterm, Method('commentline')),
109 (Opt(spaces) + Opt(comment) + lineterm, IGNORE),
110 (indentation, Method('indentation_action')),
111 (Eof, Method('eof_action'))
112 ]),
114 State('SQ_STRING', [
115 (escapeseq, 'ESCAPE'),
116 (Rep1(AnyBut("'\"\n\\")), 'CHARS'),
117 (Str('"'), 'CHARS'),
118 (Str("\n"), Method('unclosed_string_action')),
119 (Str("'"), Method('end_string_action')),
120 (Eof, 'EOF')
121 ]),
123 State('DQ_STRING', [
124 (escapeseq, 'ESCAPE'),
125 (Rep1(AnyBut('"\n\\')), 'CHARS'),
126 (Str("'"), 'CHARS'),
127 (Str("\n"), Method('unclosed_string_action')),
128 (Str('"'), Method('end_string_action')),
129 (Eof, 'EOF')
130 ]),
132 State('TSQ_STRING', [
133 (escapeseq, 'ESCAPE'),
134 (Rep1(AnyBut("'\"\n\\")), 'CHARS'),
135 (Any("'\""), 'CHARS'),
136 (Str("\n"), 'NEWLINE'),
137 (Str("'''"), Method('end_string_action')),
138 (Eof, 'EOF')
139 ]),
141 State('TDQ_STRING', [
142 (escapeseq, 'ESCAPE'),
143 (Rep1(AnyBut('"\'\n\\')), 'CHARS'),
144 (Any("'\""), 'CHARS'),
145 (Str("\n"), 'NEWLINE'),
146 (Str('"""'), Method('end_string_action')),
147 (Eof, 'EOF')
148 ]),
150 (Eof, Method('eof_action'))
151 ],
153 # FIXME: Plex 1.9 needs different args here from Plex 1.1.4
154 #debug_flags = scanner_debug_flags,
155 #debug_file = scanner_dump_file
156 )