cython-devel

changeset 2507:8d8cc4c9b91b

fix bug 412: str char comparison, refactoring to move comparison coercions closer in the code
author Stefan Behnel <scoder@users.berlios.de>
date Sat Oct 17 22:34:28 2009 +0200 (3 years ago)
parents 236f6c2f1b26
children 936826f5af34
files Cython/Compiler/ExprNodes.py tests/run/str_char_coercion_T412.pyx
line diff
1.1 --- a/Cython/Compiler/ExprNodes.py Sat Oct 17 14:33:20 2009 +0200 1.2 +++ b/Cython/Compiler/ExprNodes.py Sat Oct 17 22:34:28 2009 +0200 1.3 @@ -13,7 +13,8 @@ 1.4 from Nodes import Node 1.5 import PyrexTypes 1.6 from PyrexTypes import py_object_type, c_long_type, typecast, error_type, unspecified_type 1.7 -from Builtin import list_type, tuple_type, set_type, dict_type, unicode_type, bytes_type, type_type 1.8 +from Builtin import list_type, tuple_type, set_type, dict_type, \ 1.9 + unicode_type, str_type, bytes_type, type_type 1.10 import Builtin 1.11 import Symtab 1.12 import Options 1.13 @@ -821,6 +822,9 @@ 1.14 if isinstance(sizeof_node, SizeofTypeNode): 1.15 return sizeof_node.arg_type 1.16 1.17 + def can_coerce_to_char_literal(self): 1.18 + return len(self.value) == 1 1.19 + 1.20 def coerce_to(self, dst_type, env): 1.21 if dst_type == PyrexTypes.c_char_ptr_type: 1.22 self.type = PyrexTypes.c_char_ptr_type 1.23 @@ -830,7 +834,7 @@ 1.24 return CastNode(self, PyrexTypes.c_uchar_ptr_type) 1.25 1.26 if dst_type.is_int: 1.27 - if len(self.value) > 1: 1.28 + if not self.can_coerce_to_char_literal(): 1.29 error(self.pos, "Only single-character strings can be coerced into ints.") 1.30 return self 1.31 return CharNode(self.pos, value=self.value) 1.32 @@ -905,11 +909,11 @@ 1.33 # value BytesLiteral or EncodedString 1.34 # is_identifier boolean 1.35 1.36 - type = Builtin.str_type 1.37 + type = str_type 1.38 is_identifier = False 1.39 1.40 def coerce_to(self, dst_type, env): 1.41 - if dst_type is not py_object_type and dst_type is not Builtin.str_type: 1.42 + if dst_type is not py_object_type and dst_type is not str_type: 1.43 # if dst_type is Builtin.bytes_type: 1.44 # # special case: bytes = 'str literal' 1.45 # return BytesNode(self.pos, value=self.value) 1.46 @@ -927,6 +931,9 @@ 1.47 1.48 return self 1.49 1.50 + def can_coerce_to_char_literal(self): 1.51 + return not self.is_identifier and len(self.value) == 1 1.52 + 1.53 def generate_evaluation_code(self, code): 1.54 self.result_code = code.get_py_string_const( 1.55 self.value, identifier=self.is_identifier, is_str=True) 1.56 @@ -5065,6 +5072,73 @@ 1.57 result = result and cascade.compile_time_value(operand2, denv) 1.58 return result 1.59 1.60 + def try_coerce_to_int_cmp(self, env, op, operand1, operand2): 1.61 + # type1 != type2 and at least one of the types is not a C int 1.62 + type1 = operand1.type 1.63 + type2 = operand2.type 1.64 + type1_can_be_int = False 1.65 + type2_can_be_int = False 1.66 + 1.67 + if isinstance(operand1, (StringNode, BytesNode)) \ 1.68 + and operand1.can_coerce_to_char_literal(): 1.69 + type1_can_be_int = True 1.70 + if isinstance(operand2, (StringNode, BytesNode)) \ 1.71 + and operand2.can_coerce_to_char_literal(): 1.72 + type2_can_be_int = True 1.73 + 1.74 + if type1.is_int: 1.75 + if type2_can_be_int: 1.76 + operand2 = operand2.coerce_to(type1, env) 1.77 + elif type2.is_int: 1.78 + if type1_can_be_int: 1.79 + operand1 = operand1.coerce_to(type2, env) 1.80 + elif type1_can_be_int: 1.81 + if type2_can_be_int: 1.82 + operand1 = operand1.coerce_to(PyrexTypes.c_uchar_type, env) 1.83 + operand2 = operand2.coerce_to(PyrexTypes.c_uchar_type, env) 1.84 + 1.85 + return operand1, operand2 1.86 + 1.87 + def coerce_operands(self, env, op, operand1, common_type=None): 1.88 + operand2 = self.operand2 1.89 + type1 = operand1.type 1.90 + type2 = operand2.type 1.91 + 1.92 + if type1 == str_type and (type2.is_string or type2 in (bytes_type, unicode_type)) or \ 1.93 + type2 == str_type and (type1.is_string or type1 in (bytes_type, unicode_type)): 1.94 + error(self.pos, "Comparisons between bytes/unicode and str are not portable to Python 3") 1.95 + 1.96 + elif operand1.type.is_complex or operand2.type.is_complex: 1.97 + if op not in ('==', '!='): 1.98 + error(self.pos, "complex types unordered") 1.99 + if operand1.type.is_pyobject: 1.100 + operand2 = operand2.coerce_to(operand2.type, env) 1.101 + elif operand2.type.is_pyobject: 1.102 + operand1 = operand1.coerce_to(operand2.type, env) 1.103 + else: 1.104 + common_type = PyrexTypes.widest_numeric_type(type1, type2) 1.105 + operand1 = operand1.coerce_to(common_type, env) 1.106 + operand2 = operand2.coerce_to(common_type, env) 1.107 + 1.108 + elif common_type is None or not common_type.is_pyobject: 1.109 + if not type1.is_int or not type2.is_int: 1.110 + operand1, operand2 = self.try_coerce_to_int_cmp(env, op, operand1, operand2) 1.111 + 1.112 + if operand1.type.is_pyobject or operand2.type.is_pyobject: 1.113 + # we could do a lot better by splitting the comparison 1.114 + # into a non-Python part and a Python part, but this is 1.115 + # safer for now 1.116 + if operand1.type == operand2.type: 1.117 + common_type = operand1.type 1.118 + else: 1.119 + common_type = py_object_type 1.120 + 1.121 + if self.cascade: 1.122 + operand2 = self.cascade.coerce_operands(env, self.operator, operand2, common_type) 1.123 + 1.124 + self.operand2 = operand2 1.125 + return operand1 1.126 + 1.127 def is_python_comparison(self): 1.128 return (self.has_python_operands() 1.129 or (self.cascade and self.cascade.is_python_comparison()) 1.130 @@ -5075,13 +5149,7 @@ 1.131 or (self.cascade and self.cascade.is_python_result())) 1.132 1.133 def check_types(self, env, operand1, op, operand2): 1.134 - if operand1.type.is_complex or operand2.type.is_complex: 1.135 - if op not in ('==', '!='): 1.136 - error(self.pos, "complex types unordered") 1.137 - common_type = PyrexTypes.widest_numeric_type(operand1.type, operand2.type) 1.138 - self.operand1 = operand1.coerce_to(common_type, env) 1.139 - self.operand2 = operand2.coerce_to(common_type, env) 1.140 - elif not self.types_okay(operand1, op, operand2): 1.141 + if not self.types_okay(operand1, op, operand2): 1.142 error(self.pos, "Invalid types for '%s' (%s, %s)" % 1.143 (self.operator, operand1.type, operand2.type)) 1.144 1.145 @@ -5225,11 +5293,10 @@ 1.146 self.operand2.analyse_types(env) 1.147 if self.cascade: 1.148 self.cascade.analyse_types(env, self.operand2) 1.149 + self.operand1 = self.coerce_operands(env, self.operator, self.operand1) 1.150 self.is_pycmp = self.is_python_comparison() 1.151 if self.is_pycmp: 1.152 self.coerce_operands_to_pyobjects(env) 1.153 - if self.has_int_operands(): 1.154 - self.coerce_chars_to_ints(env) 1.155 if self.cascade: 1.156 self.operand2 = self.operand2.coerce_to_simple(env) 1.157 self.cascade.coerce_cascaded_operands_to_temp(env) 1.158 @@ -5260,19 +5327,6 @@ 1.159 self.operand2 = self.operand2.coerce_to_pyobject(env) 1.160 if self.cascade: 1.161 self.cascade.coerce_operands_to_pyobjects(env) 1.162 - 1.163 - def has_int_operands(self): 1.164 - return (self.operand1.type.is_int or self.operand2.type.is_int) \ 1.165 - or (self.cascade and self.cascade.has_int_operands()) 1.166 - 1.167 - def coerce_chars_to_ints(self, env): 1.168 - # coerce literal single-char strings to c chars 1.169 - if self.operand1.type.is_string and isinstance(self.operand1, BytesNode): 1.170 - self.operand1 = self.operand1.coerce_to(PyrexTypes.c_uchar_type, env) 1.171 - if self.operand2.type.is_string and isinstance(self.operand2, BytesNode): 1.172 - self.operand2 = self.operand2.coerce_to(PyrexTypes.c_uchar_type, env) 1.173 - if self.cascade: 1.174 - self.cascade.coerce_chars_to_ints(env) 1.175 1.176 def check_const(self): 1.177 self.operand1.check_const() 1.178 @@ -5372,13 +5426,6 @@ 1.179 if self.cascade: 1.180 self.cascade.coerce_operands_to_pyobjects(env) 1.181 1.182 - def has_int_operands(self): 1.183 - return self.operand2.type.is_int 1.184 - 1.185 - def coerce_chars_to_ints(self, env): 1.186 - if self.operand2.type.is_string and isinstance(self.operand2, BytesNode): 1.187 - self.operand2 = self.operand2.coerce_to(PyrexTypes.c_uchar_type, env) 1.188 - 1.189 def coerce_cascaded_operands_to_temp(self, env): 1.190 if self.cascade: 1.191 #self.operand2 = self.operand2.coerce_to_temp(env) #CTT
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/tests/run/str_char_coercion_T412.pyx Sat Oct 17 22:34:28 2009 +0200 2.3 @@ -0,0 +1,75 @@ 2.4 +__doc__ = u""" 2.5 +>>> test_eq() 2.6 +True 2.7 +True 2.8 +True 2.9 +True 2.10 + 2.11 +>>> test_cascaded_eq() 2.12 +True 2.13 +True 2.14 +True 2.15 +True 2.16 +True 2.17 +True 2.18 +True 2.19 +True 2.20 + 2.21 +>>> test_cascaded_ineq() 2.22 +True 2.23 +True 2.24 +True 2.25 +True 2.26 +True 2.27 +True 2.28 +True 2.29 +True 2.30 + 2.31 +>>> test_long_ineq() 2.32 +True 2.33 + 2.34 +>>> test_long_ineq_py() 2.35 +True 2.36 +True 2.37 +""" 2.38 + 2.39 +cdef int i = 'x' 2.40 +cdef char c = 'x' 2.41 +cdef char* s = 'x' 2.42 + 2.43 +def test_eq(): 2.44 + print i == 'x' 2.45 + print i == c'x' 2.46 + print c == 'x' 2.47 + print c == c'x' 2.48 +# print s == 'x' # error 2.49 +# print s == c'x' # error 2.50 + 2.51 +def test_cascaded_eq(): 2.52 + print 'x' == i == 'x' 2.53 + print 'x' == i == c'x' 2.54 + print c'x' == i == 'x' 2.55 + print c'x' == i == c'x' 2.56 + 2.57 + print 'x' == c == 'x' 2.58 + print 'x' == c == c'x' 2.59 + print c'x' == c == 'x' 2.60 + print c'x' == c == c'x' 2.61 + 2.62 +def test_cascaded_ineq(): 2.63 + print 'a' <= i <= 'z' 2.64 + print 'a' <= i <= c'z' 2.65 + print c'a' <= i <= 'z' 2.66 + print c'a' <= i <= c'z' 2.67 + 2.68 + print 'a' <= c <= 'z' 2.69 + print 'a' <= c <= c'z' 2.70 + print c'a' <= c <= 'z' 2.71 + print c'a' <= c <= c'z' 2.72 + 2.73 +def test_long_ineq(): 2.74 + print 'a' < 'b' < 'c' < 'd' < c < 'y' < 'z' 2.75 + 2.76 +def test_long_ineq_py(): 2.77 + print 'abcdef' < 'b' < 'c' < 'd' < 'y' < 'z' 2.78 + print 'a' < 'b' < 'cde' < 'd' < 'y' < 'z'