import pandas as pdimport numpy as npN = 10data = pd.DataFrame(np.random.uniform(1,9,(N,2)), columns=['A', 'B'])data1 = data.copy()data2 = data.copy()%time data1 = data.query('A > 4')%time data2 = data[data['A'] > 4]# CPU times: user 9.29 ms, sys: 3.84 ms, total: 13.1 ms# Wall time: 11.7 ms# CPU times: user 681 µs, sys: 0 ns, total: 681 µs# Wall time: 637 µsN = 1000data = pd.DataFrame(np.random.uniform(1,9,(N,2)), columns=['A', 'B'])data1 = data.copy()data2 = data.copy()%time data1 = data.query('A > 4')%time data2 = data[data['A'] > 4]# CPU times: user 988 µs, sys: 2.53 ms, total: 3.52 ms# Wall time: 2.64 ms# CPU times: user 1.64 ms, sys: 0 ns, total: 1.64 ms# Wall time: 1.18 msN = 100000data = pd.DataFrame(np.random.uniform(1,9,(N,2)), columns=['A', 'B'])data1 = data.copy()data2 = data.copy()%time data1 = data.query('A > 4')%time data2 = data[data['A'] > 4]# CPU times: user 5.78 ms, sys: 2.95 ms, total: 8.73 ms# Wall time: 8.24 ms# CPU times: user 3.11 ms, sys: 781 µs, total: 3.89 ms# Wall time: 3.45 msN = 10000000data = pd.DataFrame(np.random.uniform(1,9,(N,2)), columns=['A', 'B'])data1 = data.copy()data2 = data.copy()%time data1 = data.query('A > 4')%time data2 = data[data['A'] > 4]# CPU times: user 448 ms, sys: 416 ms, total: 864 ms# Wall time: 568 ms# CPU times: user 218 ms, sys: 173 ms, total: 391 ms# Wall time: 390 ms
显然,data[data[‘A’] > 4] 较于 data.query(‘A > 4’) 更有优势。
