Note
Click here to download the full example code
Timestamp Sorting EfficiencyΒΆ
Investigating whether it is faster to sort timestamps before or after converting from string to pandas datetimes.

Out:
Text(31.388408135308154, 0.5, 'Sort Time [s]')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
np.random.seed(0)
timings = []
for N in np.logspace(4, 6, 4).astype(int):
times = pd.date_range('2019-01-01', periods=N, freq='15T').values
np.random.shuffle(times)
index_dt = pd.DatetimeIndex(times).tz_localize('Etc/GMT+7')
index_str = index_dt.astype(str)
index_int = index_dt.astype(int)
series_dt = pd.Series(1, index=index_dt)
series_str = pd.Series(1, index=index_str)
series_int = pd.Series(1, index=index_int)
data = {'Index Size': N}
st = time.time()
series_dt.sort_index(inplace=True)
ed = time.time()
data['dt'] = ed - st
st = time.time()
series_str.sort_index(inplace=True)
ed = time.time()
data['str'] = ed - st
st = time.time()
series_int.sort_index(inplace=True)
ed = time.time()
data['int'] = ed - st
timings.append(data)
df = pd.DataFrame(timings)
df.plot('Index Size', ['str', 'dt', 'int'], logx=True, logy=True)
plt.ylabel('Sort Time [s]')
Total running time of the script: ( 0 minutes 21.270 seconds)