cython-devel

changeset 3156:a92b70b5624e

optimise unicode.split() and unicode.splitlines()
author Stefan Behnel <scoder@users.berlios.de>
date Sun Mar 21 07:57:00 2010 +0100 (23 months ago)
parents 751bdd38b55c
children bb090cf72455
files Cython/Compiler/Optimize.py
line diff
1.1 --- a/Cython/Compiler/Optimize.py Sat Mar 20 19:51:51 2010 +0100 1.2 +++ b/Cython/Compiler/Optimize.py Sun Mar 21 07:57:00 2010 +0100 1.3 @@ -1405,8 +1405,7 @@ 1.4 PyrexTypes.CFuncTypeArg("dict", PyrexTypes.py_object_type, None), 1.5 PyrexTypes.CFuncTypeArg("key", PyrexTypes.py_object_type, None), 1.6 PyrexTypes.CFuncTypeArg("default", PyrexTypes.py_object_type, None), 1.7 - ], 1.8 - exception_value = "NULL") 1.9 + ]) 1.10 1.11 def _handle_simple_method_dict_get(self, node, args, is_unbound_method): 1.12 """Replace dict.get() by a call to PyDict_GetItem(). 1.13 @@ -1422,19 +1421,68 @@ 1.14 'get', is_unbound_method, args, 1.15 utility_code = dict_getitem_default_utility_code) 1.16 1.17 + PyUnicode_Splitlines_func_type = PyrexTypes.CFuncType( 1.18 + Builtin.list_type, [ 1.19 + PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None), 1.20 + PyrexTypes.CFuncTypeArg("keepends", PyrexTypes.c_bint_type, None), 1.21 + ]) 1.22 + 1.23 + def _handle_simple_method_unicode_splitlines(self, node, args, is_unbound_method): 1.24 + """Replace unicode.splitlines(...) by a direct call to the 1.25 + corresponding C-API function. 1.26 + """ 1.27 + if len(args) not in (1,2): 1.28 + self._error_wrong_arg_count('unicode.splitlines', node, args, "1 or 2") 1.29 + return node 1.30 + if len(args) < 2: 1.31 + args.append(ExprNodes.BoolNode(node.pos, value=False)) 1.32 + else: 1.33 + args[1] = args[1].coerce_to(PyrexTypes.c_bint_type, 1.34 + self.env_stack[-1]) 1.35 + 1.36 + return self._substitute_method_call( 1.37 + node, "PyUnicode_Splitlines", self.PyUnicode_Splitlines_func_type, 1.38 + 'splitlines', is_unbound_method, args) 1.39 + 1.40 + PyUnicode_Split_func_type = PyrexTypes.CFuncType( 1.41 + Builtin.list_type, [ 1.42 + PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None), 1.43 + PyrexTypes.CFuncTypeArg("sep", PyrexTypes.py_object_type, None), 1.44 + PyrexTypes.CFuncTypeArg("maxsplit", PyrexTypes.c_py_ssize_t_type, None), 1.45 + ] 1.46 + ) 1.47 + 1.48 + def _handle_simple_method_unicode_split(self, node, args, is_unbound_method): 1.49 + """Replace unicode.split(...) by a direct call to the 1.50 + corresponding C-API function. 1.51 + """ 1.52 + if len(args) not in (1,2,3): 1.53 + self._error_wrong_arg_count('unicode.split', node, args, "1-3") 1.54 + return node 1.55 + if len(args) < 2: 1.56 + args.append(ExprNodes.NullNode(node.pos)) 1.57 + if len(args) < 3: 1.58 + args.append(ExprNodes.IntNode( 1.59 + node.pos, value="-1", type=PyrexTypes.c_py_ssize_t_type)) 1.60 + else: 1.61 + args[2] = args[2].coerce_to(PyrexTypes.c_py_ssize_t_type, 1.62 + self.env_stack[-1]) 1.63 + 1.64 + return self._substitute_method_call( 1.65 + node, "PyUnicode_Split", self.PyUnicode_Split_func_type, 1.66 + 'split', is_unbound_method, args) 1.67 + 1.68 PyUnicode_AsEncodedString_func_type = PyrexTypes.CFuncType( 1.69 Builtin.bytes_type, [ 1.70 PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None), 1.71 PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_char_ptr_type, None), 1.72 PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None), 1.73 - ], 1.74 - exception_value = "NULL") 1.75 + ]) 1.76 1.77 PyUnicode_AsXyzString_func_type = PyrexTypes.CFuncType( 1.78 Builtin.bytes_type, [ 1.79 PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None), 1.80 - ], 1.81 - exception_value = "NULL") 1.82 + ]) 1.83 1.84 _special_encodings = ['UTF8', 'UTF16', 'Latin1', 'ASCII', 1.85 'unicode_escape', 'raw_unicode_escape'] 1.86 @@ -1498,8 +1546,7 @@ 1.87 PyrexTypes.CFuncTypeArg("string", PyrexTypes.c_char_ptr_type, None), 1.88 PyrexTypes.CFuncTypeArg("size", PyrexTypes.c_py_ssize_t_type, None), 1.89 PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None), 1.90 - ], 1.91 - exception_value = "NULL") 1.92 + ]) 1.93 1.94 PyUnicode_Decode_func_type = PyrexTypes.CFuncType( 1.95 Builtin.unicode_type, [ 1.96 @@ -1507,8 +1554,7 @@ 1.97 PyrexTypes.CFuncTypeArg("size", PyrexTypes.c_py_ssize_t_type, None), 1.98 PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_char_ptr_type, None), 1.99 PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None), 1.100 - ], 1.101 - exception_value = "NULL") 1.102 + ]) 1.103 1.104 def _handle_simple_method_bytes_decode(self, node, args, is_unbound_method): 1.105 """Replace char*.decode() by a direct C-API call to the