#!/usr/bin/env python # coding: utf-8 # In[1]: masterlist = "Lorem ipsum dolor sit amet, agam assum nemore id vis, decore commune oporteat in has. Ne sit malis instructior, no mazim ancillae moderatius pri. Admodum albucius pri cu, duo nihil nonumy appellantur id, essent alienum invenire ea duo. Ius cu justo intellegebat, vix integre suscipiantur et, vix in error aliquam periculis. Solum deleniti accusamus ea pri, ex saperet honestatis eos. Ut vero justo eruditi nam. Quot vide minim his no, at eam graeco complectitur. Nemore expetenda nam in. Vel torquatos consetetur concludaturque et, ad duo dicam ponderum, ad eos populo suscipit. An per enim erant, mei et quot invenire abhorreant, et qui soleat comprehensam. Audire menandri conceptam eu ius, at eum vitae dolorem erroribus. Ei his soluta invidunt, cu sit vide disputationi. Diam alienum elaboraret per id. Eam novum nobis pertinax cu, ad sed justo possit. Idque quando partiendo vix ut. Nominavi inimicus similique et est. Eam quas idque deseruisse eu, sea ut appetere invenire definitionem. Efficiendi ullamcorper has ei, an has liber ignota. Est an oblique appetere volutpat, ad lobortis democritum sea. An nec soluta corpora, eu lorem vidisse sea, id mutat facilisi insolens ius. Consul tamquam qui id, ex alienum interpretaris eum, te tota posse eam. Cum lobortis sententiae ei, nam et feugiat recusabo persequeris. Sit choro voluptatibus ne. Ea mel quaeque vivendo. Eam case hendrerit no, eu harum ceteros invenire sed.".split() print(len(masterlist)) print(len(set(masterlist))) # # `O(n)` # In[2]: def counter_sub_non_mut(mylist): from collections import Counter, defaultdict # not modifying original list counts = {k:v for k,v in Counter(mylist).items() if v > 1} newlist = mylist[:] for i in reversed(range(len(mylist))): item = mylist[i] if item in counts and counts[item]: newlist[i] += str(counts[item]) counts[item]-=1 return newlist # In[3]: print(counter_sub_non_mut(masterlist[:])[:10]) get_ipython().run_line_magic('timeit', 'counter_sub_non_mut(masterlist[:])') # In[4]: print(counter_sub_non_mut(["a"]*10**4)[:10]) get_ipython().run_line_magic('timeit', 'counter_sub_non_mut(["a"]*10**4)') # In[5]: print(counter_sub_non_mut(list(map(str, range(10**4))))[:10]) get_ipython().run_line_magic('timeit', 'counter_sub_non_mut(list(map(str, range(10**4))))') # In[6]: def counter_sub_mut(mylist): from collections import Counter, defaultdict # modifying original list counts = {k:v for k,v in Counter(mylist).items() if v > 1} for i in reversed(range(len(mylist))): item = mylist[i] if item in counts and counts[item]: mylist[i] += str(counts[item]) counts[item]-=1 return mylist # In[7]: print(counter_sub_mut(masterlist[:])[:10]) get_ipython().run_line_magic('timeit', 'counter_sub_mut(masterlist[:])') # In[8]: print(counter_sub_mut(["a"]*10**4)[:10]) get_ipython().run_line_magic('timeit', 'counter_sub_mut(["a"]*10**4)') # In[9]: print(counter_sub_mut(list(map(str, range(10**4))))[:10]) get_ipython().run_line_magic('timeit', 'counter_sub_mut(list(map(str, range(10**4))))') # # `O(n^2)` # http://stackoverflow.com/a/30651963/635411 # # `O(n^2)` using `.index()` # # This has excellent performance when all the items in the list are difference since it doesn't do any index lookups. # In[10]: def counter_with_index(mylist): from collections import Counter # Counter counts the number of occurrences of each item counts = Counter(mylist) # so we have: {'name':3, 'state':1, 'city':1, 'zip':2} for s,num in counts.items(): if num > 1: # ignore strings that only appear once for suffix in range(1, num + 1): # suffix starts at 1 and increases by 1 each time mylist[mylist.index(s)] = s + str(suffix) # replace each appearance of s return mylist # In[11]: print(counter_with_index(masterlist[:])[:10]) get_ipython().run_line_magic('timeit', 'counter_with_index(masterlist[:])') # In[12]: print(counter_with_index(["a"]*10**4)[:10]) get_ipython().run_line_magic('timeit', 'counter_with_index(["a"]*10**4)') # In[13]: print(counter_with_index(list(map(str, range(10**4))))[:10]) get_ipython().run_line_magic('timeit', 'counter_with_index(list(map(str, range(10**4))))') # http://stackoverflow.com/a/30650847/635411 # # `O(n^2)` using `.count()` # In[14]: def one_liner_count(mylist): return list(map(lambda x: x[1] + str(mylist[:x[0]].count(x[1]) + 1) if mylist.count(x[1]) > 1 else x[1], enumerate(mylist))) # In[15]: print(one_liner_count(masterlist[:])[:10]) get_ipython().run_line_magic('timeit', 'one_liner_count(masterlist[:])') # In[16]: print(one_liner_count(["a"]*10**4)[:10]) get_ipython().run_line_magic('timeit', 'one_liner_count(["a"]*10**4)') # In[17]: print(one_liner_count(list(map(str, range(10**4))))[:10]) get_ipython().run_line_magic('timeit', 'one_liner_count(list(map(str, range(10**4))))')