Python Str.translate Vs Str.replace
Why in Python replace is ~1.5x quicker than translate? In [188]: s = '1 a 2' In [189]: s.replace(' ','') Out[189]: '1a2' In [190]: s.translate(None,' ') Out[190]: '1a2' In [191
Solution 1:
translate will likely be faster as N and M increase, where N is the number of unique character replacement maps, and M is the length of the string that is being translated.
import random
import string
import timeit
import re
def do_translation(N,M):
trans_map = random.sample(string.ascii_lowercase,N),random.sample(string.ascii_lowercase,N)
trans_tab = string.maketrans(*map("".join,trans_map))
s = "".join(random.choice(string.ascii_lowercase) for _ in range(M))
return s.translate(trans_tab)
def do_resub(N,M):
trans_map = random.sample(string.ascii_lowercase,N),random.sample(string.ascii_lowercase,N)
trans_tab = dict(zip(*trans_map))
s = "".join(random.choice(string.ascii_lowercase) for _ in range(M))
return re.sub("([%s])"%("".join(trans_map[0]),),lambda m:trans_tab.get(m.group(0),m.group(0)),s)
def do_replace(N,M):
trans_map = random.sample(string.ascii_lowercase,N),random.sample(string.ascii_lowercase,N)
s = "".join(random.choice(string.ascii_lowercase) for _ in range(M))
for k,v in zip(*trans_map):
s = s.replace(k,v)
return s
data = {}
for i in range(2,20,2):
for j in range(10,200,10):
data[(i,j)] = {
"translate":timeit.timeit("do_translation(%s,%s)"%(i,j),"from __main__ import do_translation,string,random",number=100),
"re.sub":timeit.timeit("do_resub(%s,%s)"%(i,j),"from __main__ import do_resub,re,random",number=100),
"replace":timeit.timeit("do_replace(%s,%s)"%(i,j),"from __main__ import do_replace,random",number=100)}
print data
will show you several different timings ... including that translate can be faster in several of these cases (I considered adding some plots here ... but ive already invested more time in this question than I really should have :P)
Post a Comment for "Python Str.translate Vs Str.replace"