Cython has moved to github.
cython-devel
view Cython/Compiler/Lexicon.py @ 834:4695bbb3a785
Better integer literal parsing.
Now accepts U and LL suffixes, and large integer literals are longs rather than being truncated as Python objects.
Now accepts U and LL suffixes, and large integer literals are longs rather than being truncated as Python objects.
| author | Robert Bradshaw <robertwb@math.washington.edu> |
|---|---|
| date | Thu Jul 31 00:55:14 2008 -0700 (3 years ago) |
| parents | a320e6c04422 |
| children | 89c892fe7c5b e90beaabe9fe |
line source
1 #
2 # Pyrex Scanner - Lexical Definitions
3 #
4 # Changing anything in this file will cause Lexicon.pickle
5 # to be rebuilt next time pyrexc is run.
6 #
8 raw_prefixes = "rR"
9 string_prefixes = "cCuUbB"
11 def make_lexicon():
12 from Cython.Plex import \
13 Str, Any, AnyBut, AnyChar, Rep, Rep1, Opt, Bol, Eol, Eof, \
14 TEXT, IGNORE, State, Lexicon
15 from Scanning import Method
17 letter = Any("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_")
18 digit = Any("0123456789")
19 octdigit = Any("01234567")
20 hexdigit = Any("0123456789ABCDEFabcdef")
21 indentation = Bol + Rep(Any(" \t"))
23 decimal = Rep1(digit)
24 dot = Str(".")
25 exponent = Any("Ee") + Opt(Any("+-")) + decimal
26 decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal)
28 name = letter + Rep(letter | digit)
29 intconst = decimal | (Str("0x") + Rep1(hexdigit))
30 intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu")))
31 intliteral = intconst + intsuffix
32 fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent)
33 imagconst = (intconst | fltconst) + Any("jJ")
35 sq_string = (
36 Str("'") +
37 Rep(AnyBut("\\\n'") | (Str("\\") + AnyChar)) +
38 Str("'")
39 )
41 dq_string = (
42 Str('"') +
43 Rep(AnyBut('\\\n"') | (Str("\\") + AnyChar)) +
44 Str('"')
45 )
47 non_sq = AnyBut("'") | (Str('\\') + AnyChar)
48 tsq_string = (
49 Str("'''")
50 + Rep(non_sq | (Str("'") + non_sq) | (Str("''") + non_sq))
51 + Str("'''")
52 )
54 non_dq = AnyBut('"') | (Str('\\') + AnyChar)
55 tdq_string = (
56 Str('"""')
57 + Rep(non_dq | (Str('"') + non_dq) | (Str('""') + non_dq))
58 + Str('"""')
59 )
61 beginstring = Opt(Any(string_prefixes)) + Opt(Any(raw_prefixes)) + (Str("'") | Str('"') | Str("'''") | Str('"""'))
62 two_oct = octdigit + octdigit
63 three_oct = octdigit + octdigit + octdigit
64 two_hex = hexdigit + hexdigit
65 four_hex = two_hex + two_hex
66 escapeseq = Str("\\") + (two_oct | three_oct | two_hex |
67 Str('u') + four_hex | Str('x') + two_hex | AnyChar)
69 deco = Str("@")
70 bra = Any("([{")
71 ket = Any(")]}")
72 punct = Any(":,;+-*/|&<>=.%`~^?")
73 diphthong = Str("==", "<>", "!=", "<=", ">=", "<<", ">>", "**", "//",
74 "+=", "-=", "*=", "/=", "%=", "|=", "^=", "&=",
75 "<<=", ">>=", "**=", "//=")
76 spaces = Rep1(Any(" \t\f"))
77 comment = Str("#") + Rep(AnyBut("\n"))
78 escaped_newline = Str("\\\n")
79 lineterm = Eol + Opt(Str("\n"))
81 return Lexicon([
82 (name, 'IDENT'),
83 (intliteral, 'INT'),
84 (fltconst, 'FLOAT'),
85 (imagconst, 'IMAG'),
86 (deco, 'DECORATOR'),
87 (punct | diphthong, TEXT),
89 (bra, Method('open_bracket_action')),
90 (ket, Method('close_bracket_action')),
91 (lineterm, Method('newline_action')),
93 #(stringlit, 'STRING'),
94 (beginstring, Method('begin_string_action')),
96 (comment, IGNORE),
97 (spaces, IGNORE),
98 (escaped_newline, IGNORE),
100 State('INDENT', [
101 (Opt(spaces) + Opt(comment) + lineterm, IGNORE),
102 (indentation, Method('indentation_action')),
103 (Eof, Method('eof_action'))
104 ]),
106 State('SQ_STRING', [
107 (escapeseq, 'ESCAPE'),
108 (Rep1(AnyBut("'\"\n\\")), 'CHARS'),
109 (Str('"'), 'CHARS'),
110 (Str("\n"), Method('unclosed_string_action')),
111 (Str("'"), Method('end_string_action')),
112 (Eof, 'EOF')
113 ]),
115 State('DQ_STRING', [
116 (escapeseq, 'ESCAPE'),
117 (Rep1(AnyBut('"\n\\')), 'CHARS'),
118 (Str("'"), 'CHARS'),
119 (Str("\n"), Method('unclosed_string_action')),
120 (Str('"'), Method('end_string_action')),
121 (Eof, 'EOF')
122 ]),
124 State('TSQ_STRING', [
125 (escapeseq, 'ESCAPE'),
126 (Rep1(AnyBut("'\"\n\\")), 'CHARS'),
127 (Any("'\""), 'CHARS'),
128 (Str("\n"), 'NEWLINE'),
129 (Str("'''"), Method('end_string_action')),
130 (Eof, 'EOF')
131 ]),
133 State('TDQ_STRING', [
134 (escapeseq, 'ESCAPE'),
135 (Rep1(AnyBut('"\'\n\\')), 'CHARS'),
136 (Any("'\""), 'CHARS'),
137 (Str("\n"), 'NEWLINE'),
138 (Str('"""'), Method('end_string_action')),
139 (Eof, 'EOF')
140 ]),
142 (Eof, Method('eof_action'))
143 ],
145 # FIXME: Plex 1.9 needs different args here from Plex 1.1.4
146 #debug_flags = scanner_debug_flags,
147 #debug_file = scanner_dump_file
148 )
