cython-devel
changeset 2507:8d8cc4c9b91b
fix bug 412: str char comparison, refactoring to move comparison coercions closer in the code
| author | Stefan Behnel <scoder@users.berlios.de> |
|---|---|
| date | Sat Oct 17 22:34:28 2009 +0200 (3 years ago) |
| parents | 236f6c2f1b26 |
| children | 936826f5af34 |
| files | Cython/Compiler/ExprNodes.py tests/run/str_char_coercion_T412.pyx |
line diff
1.1 --- a/Cython/Compiler/ExprNodes.py Sat Oct 17 14:33:20 2009 +0200
1.2 +++ b/Cython/Compiler/ExprNodes.py Sat Oct 17 22:34:28 2009 +0200
1.3 @@ -13,7 +13,8 @@
1.4 from Nodes import Node
1.5 import PyrexTypes
1.6 from PyrexTypes import py_object_type, c_long_type, typecast, error_type, unspecified_type
1.7 -from Builtin import list_type, tuple_type, set_type, dict_type, unicode_type, bytes_type, type_type
1.8 +from Builtin import list_type, tuple_type, set_type, dict_type, \
1.9 + unicode_type, str_type, bytes_type, type_type
1.10 import Builtin
1.11 import Symtab
1.12 import Options
1.13 @@ -821,6 +822,9 @@
1.14 if isinstance(sizeof_node, SizeofTypeNode):
1.15 return sizeof_node.arg_type
1.16
1.17 + def can_coerce_to_char_literal(self):
1.18 + return len(self.value) == 1
1.19 +
1.20 def coerce_to(self, dst_type, env):
1.21 if dst_type == PyrexTypes.c_char_ptr_type:
1.22 self.type = PyrexTypes.c_char_ptr_type
1.23 @@ -830,7 +834,7 @@
1.24 return CastNode(self, PyrexTypes.c_uchar_ptr_type)
1.25
1.26 if dst_type.is_int:
1.27 - if len(self.value) > 1:
1.28 + if not self.can_coerce_to_char_literal():
1.29 error(self.pos, "Only single-character strings can be coerced into ints.")
1.30 return self
1.31 return CharNode(self.pos, value=self.value)
1.32 @@ -905,11 +909,11 @@
1.33 # value BytesLiteral or EncodedString
1.34 # is_identifier boolean
1.35
1.36 - type = Builtin.str_type
1.37 + type = str_type
1.38 is_identifier = False
1.39
1.40 def coerce_to(self, dst_type, env):
1.41 - if dst_type is not py_object_type and dst_type is not Builtin.str_type:
1.42 + if dst_type is not py_object_type and dst_type is not str_type:
1.43 # if dst_type is Builtin.bytes_type:
1.44 # # special case: bytes = 'str literal'
1.45 # return BytesNode(self.pos, value=self.value)
1.46 @@ -927,6 +931,9 @@
1.47
1.48 return self
1.49
1.50 + def can_coerce_to_char_literal(self):
1.51 + return not self.is_identifier and len(self.value) == 1
1.52 +
1.53 def generate_evaluation_code(self, code):
1.54 self.result_code = code.get_py_string_const(
1.55 self.value, identifier=self.is_identifier, is_str=True)
1.56 @@ -5065,6 +5072,73 @@
1.57 result = result and cascade.compile_time_value(operand2, denv)
1.58 return result
1.59
1.60 + def try_coerce_to_int_cmp(self, env, op, operand1, operand2):
1.61 + # type1 != type2 and at least one of the types is not a C int
1.62 + type1 = operand1.type
1.63 + type2 = operand2.type
1.64 + type1_can_be_int = False
1.65 + type2_can_be_int = False
1.66 +
1.67 + if isinstance(operand1, (StringNode, BytesNode)) \
1.68 + and operand1.can_coerce_to_char_literal():
1.69 + type1_can_be_int = True
1.70 + if isinstance(operand2, (StringNode, BytesNode)) \
1.71 + and operand2.can_coerce_to_char_literal():
1.72 + type2_can_be_int = True
1.73 +
1.74 + if type1.is_int:
1.75 + if type2_can_be_int:
1.76 + operand2 = operand2.coerce_to(type1, env)
1.77 + elif type2.is_int:
1.78 + if type1_can_be_int:
1.79 + operand1 = operand1.coerce_to(type2, env)
1.80 + elif type1_can_be_int:
1.81 + if type2_can_be_int:
1.82 + operand1 = operand1.coerce_to(PyrexTypes.c_uchar_type, env)
1.83 + operand2 = operand2.coerce_to(PyrexTypes.c_uchar_type, env)
1.84 +
1.85 + return operand1, operand2
1.86 +
1.87 + def coerce_operands(self, env, op, operand1, common_type=None):
1.88 + operand2 = self.operand2
1.89 + type1 = operand1.type
1.90 + type2 = operand2.type
1.91 +
1.92 + if type1 == str_type and (type2.is_string or type2 in (bytes_type, unicode_type)) or \
1.93 + type2 == str_type and (type1.is_string or type1 in (bytes_type, unicode_type)):
1.94 + error(self.pos, "Comparisons between bytes/unicode and str are not portable to Python 3")
1.95 +
1.96 + elif operand1.type.is_complex or operand2.type.is_complex:
1.97 + if op not in ('==', '!='):
1.98 + error(self.pos, "complex types unordered")
1.99 + if operand1.type.is_pyobject:
1.100 + operand2 = operand2.coerce_to(operand2.type, env)
1.101 + elif operand2.type.is_pyobject:
1.102 + operand1 = operand1.coerce_to(operand2.type, env)
1.103 + else:
1.104 + common_type = PyrexTypes.widest_numeric_type(type1, type2)
1.105 + operand1 = operand1.coerce_to(common_type, env)
1.106 + operand2 = operand2.coerce_to(common_type, env)
1.107 +
1.108 + elif common_type is None or not common_type.is_pyobject:
1.109 + if not type1.is_int or not type2.is_int:
1.110 + operand1, operand2 = self.try_coerce_to_int_cmp(env, op, operand1, operand2)
1.111 +
1.112 + if operand1.type.is_pyobject or operand2.type.is_pyobject:
1.113 + # we could do a lot better by splitting the comparison
1.114 + # into a non-Python part and a Python part, but this is
1.115 + # safer for now
1.116 + if operand1.type == operand2.type:
1.117 + common_type = operand1.type
1.118 + else:
1.119 + common_type = py_object_type
1.120 +
1.121 + if self.cascade:
1.122 + operand2 = self.cascade.coerce_operands(env, self.operator, operand2, common_type)
1.123 +
1.124 + self.operand2 = operand2
1.125 + return operand1
1.126 +
1.127 def is_python_comparison(self):
1.128 return (self.has_python_operands()
1.129 or (self.cascade and self.cascade.is_python_comparison())
1.130 @@ -5075,13 +5149,7 @@
1.131 or (self.cascade and self.cascade.is_python_result()))
1.132
1.133 def check_types(self, env, operand1, op, operand2):
1.134 - if operand1.type.is_complex or operand2.type.is_complex:
1.135 - if op not in ('==', '!='):
1.136 - error(self.pos, "complex types unordered")
1.137 - common_type = PyrexTypes.widest_numeric_type(operand1.type, operand2.type)
1.138 - self.operand1 = operand1.coerce_to(common_type, env)
1.139 - self.operand2 = operand2.coerce_to(common_type, env)
1.140 - elif not self.types_okay(operand1, op, operand2):
1.141 + if not self.types_okay(operand1, op, operand2):
1.142 error(self.pos, "Invalid types for '%s' (%s, %s)" %
1.143 (self.operator, operand1.type, operand2.type))
1.144
1.145 @@ -5225,11 +5293,10 @@
1.146 self.operand2.analyse_types(env)
1.147 if self.cascade:
1.148 self.cascade.analyse_types(env, self.operand2)
1.149 + self.operand1 = self.coerce_operands(env, self.operator, self.operand1)
1.150 self.is_pycmp = self.is_python_comparison()
1.151 if self.is_pycmp:
1.152 self.coerce_operands_to_pyobjects(env)
1.153 - if self.has_int_operands():
1.154 - self.coerce_chars_to_ints(env)
1.155 if self.cascade:
1.156 self.operand2 = self.operand2.coerce_to_simple(env)
1.157 self.cascade.coerce_cascaded_operands_to_temp(env)
1.158 @@ -5260,19 +5327,6 @@
1.159 self.operand2 = self.operand2.coerce_to_pyobject(env)
1.160 if self.cascade:
1.161 self.cascade.coerce_operands_to_pyobjects(env)
1.162 -
1.163 - def has_int_operands(self):
1.164 - return (self.operand1.type.is_int or self.operand2.type.is_int) \
1.165 - or (self.cascade and self.cascade.has_int_operands())
1.166 -
1.167 - def coerce_chars_to_ints(self, env):
1.168 - # coerce literal single-char strings to c chars
1.169 - if self.operand1.type.is_string and isinstance(self.operand1, BytesNode):
1.170 - self.operand1 = self.operand1.coerce_to(PyrexTypes.c_uchar_type, env)
1.171 - if self.operand2.type.is_string and isinstance(self.operand2, BytesNode):
1.172 - self.operand2 = self.operand2.coerce_to(PyrexTypes.c_uchar_type, env)
1.173 - if self.cascade:
1.174 - self.cascade.coerce_chars_to_ints(env)
1.175
1.176 def check_const(self):
1.177 self.operand1.check_const()
1.178 @@ -5372,13 +5426,6 @@
1.179 if self.cascade:
1.180 self.cascade.coerce_operands_to_pyobjects(env)
1.181
1.182 - def has_int_operands(self):
1.183 - return self.operand2.type.is_int
1.184 -
1.185 - def coerce_chars_to_ints(self, env):
1.186 - if self.operand2.type.is_string and isinstance(self.operand2, BytesNode):
1.187 - self.operand2 = self.operand2.coerce_to(PyrexTypes.c_uchar_type, env)
1.188 -
1.189 def coerce_cascaded_operands_to_temp(self, env):
1.190 if self.cascade:
1.191 #self.operand2 = self.operand2.coerce_to_temp(env) #CTT
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
2.2 +++ b/tests/run/str_char_coercion_T412.pyx Sat Oct 17 22:34:28 2009 +0200
2.3 @@ -0,0 +1,75 @@
2.4 +__doc__ = u"""
2.5 +>>> test_eq()
2.6 +True
2.7 +True
2.8 +True
2.9 +True
2.10 +
2.11 +>>> test_cascaded_eq()
2.12 +True
2.13 +True
2.14 +True
2.15 +True
2.16 +True
2.17 +True
2.18 +True
2.19 +True
2.20 +
2.21 +>>> test_cascaded_ineq()
2.22 +True
2.23 +True
2.24 +True
2.25 +True
2.26 +True
2.27 +True
2.28 +True
2.29 +True
2.30 +
2.31 +>>> test_long_ineq()
2.32 +True
2.33 +
2.34 +>>> test_long_ineq_py()
2.35 +True
2.36 +True
2.37 +"""
2.38 +
2.39 +cdef int i = 'x'
2.40 +cdef char c = 'x'
2.41 +cdef char* s = 'x'
2.42 +
2.43 +def test_eq():
2.44 + print i == 'x'
2.45 + print i == c'x'
2.46 + print c == 'x'
2.47 + print c == c'x'
2.48 +# print s == 'x' # error
2.49 +# print s == c'x' # error
2.50 +
2.51 +def test_cascaded_eq():
2.52 + print 'x' == i == 'x'
2.53 + print 'x' == i == c'x'
2.54 + print c'x' == i == 'x'
2.55 + print c'x' == i == c'x'
2.56 +
2.57 + print 'x' == c == 'x'
2.58 + print 'x' == c == c'x'
2.59 + print c'x' == c == 'x'
2.60 + print c'x' == c == c'x'
2.61 +
2.62 +def test_cascaded_ineq():
2.63 + print 'a' <= i <= 'z'
2.64 + print 'a' <= i <= c'z'
2.65 + print c'a' <= i <= 'z'
2.66 + print c'a' <= i <= c'z'
2.67 +
2.68 + print 'a' <= c <= 'z'
2.69 + print 'a' <= c <= c'z'
2.70 + print c'a' <= c <= 'z'
2.71 + print c'a' <= c <= c'z'
2.72 +
2.73 +def test_long_ineq():
2.74 + print 'a' < 'b' < 'c' < 'd' < c < 'y' < 'z'
2.75 +
2.76 +def test_long_ineq_py():
2.77 + print 'abcdef' < 'b' < 'c' < 'd' < 'y' < 'z'
2.78 + print 'a' < 'b' < 'cde' < 'd' < 'y' < 'z'
