import pandas as pdimport numpy as npdef f1(a1, a2):n = 0for i in a1:if i in a2:n += 1for i in a2:if i in a1:n += 1return ndef f3(a1, a2):n = 0n = map(lambda x, y: f1(x, y), a1, a2)return list(n)N = 5data = pd.DataFrame(np.random.uniform(1,9,(N,2)), columns=['A', 'B'], dtype=str)%timeit data1 = data['A'].combine(data['B'], f1)%timeit data1 = data.apply(lambda x: f1(x['A'], x['B']), axis=1)%timeit data1 = f3(data['A'], data['B'])%timeit data1 = [f1(x, y) for x, y in zip(data['A'], data['B'])]%timeit data1 = f3(data['A'].values, data['B'].values)# 218 µs ± 4.19 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)# 698 µs ± 1.07 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)# 37 µs ± 20.5 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)# 36.5 µs ± 26.5 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)# 16.9 µs ± 15.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)N = 500data = pd.DataFrame(np.random.uniform(1,9,(N,2)), columns=['A', 'B'], dtype=str)%timeit data1 = data['A'].combine(data['B'], f1)%timeit data1 = data.apply(lambda x: f1(x['A'], x['B']), axis=1)%timeit data1 = f3(data['A'], data['B'])%timeit data1 = [f1(x, y) for x, y in zip(data['A'], data['B'])]%timeit data1 = f3(data['A'].values, data['B'].values)# 13 ms ± 11.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)# 11.2 ms ± 18.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)# 1.02 ms ± 1.47 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)# 982 µs ± 733 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)# 1.01 ms ± 1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)N = 50000data = pd.DataFrame(np.random.uniform(1,9,(N,2)), columns=['A', 'B'], dtype=str)%timeit data1 = data['A'].combine(data['B'], f1)%timeit data1 = data.apply(lambda x: f1(x['A'], x['B']), axis=1)%timeit data1 = f3(data['A'], data['B'])%timeit data1 = [f1(x, y) for x, y in zip(data['A'], data['B'])]%timeit data1 = f3(data['A'].values, data['B'].values)# 1.29 s ± 3.74 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)# 1.04 s ± 2.83 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)# 99.6 ms ± 58.6 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)# 96.7 ms ± 121 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
显然,.apply方法效率并不高。
