CEC Over Time

A rough view of how the efficiency and Voc of Mono-Si PV modules on the market have evolved over time.

import pandas as pd
import matplotlib.pyplot as plt
import subprocess
import urllib

Find one SAM commit hash for each calendar quarter:

cmd = ["git", "log", "--pretty=%H %cI"]
result = subprocess.run(cmd, capture_output=True, cwd=SAM_dir)
values = [line.split() for line in result.stdout.decode().strip().split('\n')]
all_commits = pd.DataFrame(values, columns=['hash', 'date'])
# not sure why but I only get a proper datetime index when I drop the times
all_commits['date'] = pd.to_datetime(all_commits['date'].str.split('T').str[0])
all_commits = all_commits.set_index('date')
quarterly_commits = all_commits.resample('QS').first()
# the early commits don't have any useful info
quarterly_commits = quarterly_commits.loc['2014':]

Fetch historical CEC module db files from SAM’s GitHub:

url_template = 'https://raw.githubusercontent.com/NREL/SAM/{longhash}/deploy/libraries/CEC%20Modules.csv'

df = pd.DataFrame({'Name': []})
for date, longhash in quarterly_commits['hash'].items():
    url = url_template.format(longhash=longhash)
        aux = pd.read_csv(url)
    except urllib.error.HTTPError:
    aux = aux.drop(index=[0, 1])
    aux['date'] = date
    # only store new mods -- not perfect because some mods change names between
    # versions, but a simple filter should mostly work...?
    previously_seen_modules = set(df['Name'])
    aux = aux.loc[aux['Technology'] == 'Mono-c-Si', :]
    aux = aux.loc[~aux['Name'].isin(previously_seen_modules), :]
    if aux.empty:
        # need a dummy entry so the quarter is still represented
        aux = pd.DataFrame({'date': [date]})
    df = df.append(aux)  # inefficient but whatever

numerics = ['V_oc_ref', 'N_s', 'V_mp_ref', 'I_mp_ref', 'A_c']
df[numerics] = df[numerics].astype(float)


def plot(y, ylabel):
    df.boxplot(y, by='date', showfliers=False, grid=False)
    ticks, labels = plt.xticks()
    labels = [tl.get_text().rsplit('-', 1)[0] for tl in labels]
    labels = [tl.split('-')[0] if tl.endswith('-01') else '' for tl in labels]
    plt.xticks(ticks=ticks, labels=labels)

df['Voc_per_cell'] = df['V_oc_ref'] / df['N_s']
plot('Voc_per_cell', 'Cell STC $V_{oc}$ [V]')
# Pstc column doesn't exist in old files so calculate from Vmp*Imp instead
df['P_per_area'] = 100 * df['V_mp_ref'] * df['I_mp_ref'] / df['A_c'] / 1000
plot('P_per_area', 'Module STC Efficiency [%]')
