LZ4 Compresion

Benchmarking the effect of lz4’s “acceleration factor” on compression ratio and runtime speed in high-compression mode.

import subprocess
import pvlib
import re
import matplotlib.pyplot as plt
import pandas as pd
import os

pattern = r'\((?P<ratio>\d*\.\d*)\), *(?P<speed>\d*\.\d*) MB/s'

factors = list(range(3, 13))  # see lz4 -H
data = []

input_files = [
    pvlib.irradiance.__file__,
    pvlib.spa.__file__,
    pvlib.shading.__file__,
]

for input_file in input_files:
    for factor in factors:
        cmd = ['lz4', f'-b{factor}', input_file]
        text = subprocess.run(cmd, capture_output=True).stderr.decode()
        last_line = text.split('\r')[-2]
        m = re.search(pattern, last_line)
        info = m.groupdict()
        info['input'] = input_file
        info['factor'] = factor
        data.append(info)

df = pd.DataFrame(data)
df['ratio'] = df['ratio'].astype(float)
df['speed'] = df['speed'].astype(float)
fig, ax = plt.subplots()

for input_file in df['input'].unique():
    subset = df.loc[df['input'] == input_file]
    subset.plot('ratio', 'speed', ax=ax, label=os.path.split(input_file)[1])

ax.set_xlabel('Compression Ratio')
ax.set_ylabel('Compresion Speed [MB/s]')
lz4 compression

Out:

Text(38.347222222222214, 0.5, 'Compresion Speed [MB/s]')

Total running time of the script: ( 3 minutes 16.171 seconds)

Gallery generated by Sphinx-Gallery