Note
Click here to download the full example code
dd block sizeΒΆ
The dd
POSIX utility is useful for copying bytes between files. The bytes
are read and written in blocks whose size is controlled by the bs
parameter.
The default value of bs
is 512 bytes, presumably to match the historical
customary block size of hard drives. Is it a good default on modern computers?
import subprocess
import tempfile
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
reps = 10
pattern = r'(?P<time>\d\.\d*) s'
bss = (2**np.arange(10, 27, 2)).astype(int)
timings = []
# set up dummy files to copy bytes to and from
tmp1 = tempfile.NamedTemporaryFile('w')
tmp2 = tempfile.NamedTemporaryFile('r')
io_files = [
('/dev/zero', '/dev/null'),
(tmp1.name, tmp2.name)
]
subprocess.run(['dd', 'if=/dev/zero', f'of={tmp1.name}', 'bs=1M', 'count=64'])
for input_file, output_file in io_files:
for bs in bss:
count = int(bss[-1] / bs)
cmd = ['dd', f'if={input_file}', f'of={output_file}', f'bs={bs}', f'count={count}']
for i in range(reps):
text = subprocess.run(cmd, capture_output=True).stderr.decode()
m = re.search(pattern, text)
timings.append({
'bs': bs,
'time': float(m.groupdict()['time'])
})
data = pd.DataFrame(timings)
data.plot.scatter('bs', 'time', logx=True, logy=True)
plt.ylabel('Copy Time [s]')
plt.xlabel('Block Size [byte]')
plt.title(rf'{input_file} $\rightarrow$ {output_file}')
tmp1.close()
tmp2.close()
Total running time of the script: ( 0 minutes 26.728 seconds)