blog-contributions/opsec/openwebuilocalllms/gen_quant_graph.py

import matplotlib.pyplot as plt

# Your data: quantization level -> (memory usage, accuracy)
data = {
    "Q2_K": (3032, 74.29),
    "Q3_K_S": (3495, 82.19),
    "Q3_K_M": (3833, 93.29),
    "Q4_0": (4460, 96.09),
    "Q4_K_S": (4476, 97.38),
    "Q4_K_M": (4693, 97.67),
    "Q4_1": (4893, 97.18),
    "Q5_0": (5354, 98.98),
    "Q5_K_S": (5340, 99.08),
    "Q5_K_M": (5468, 99.00),
    "Q5_1": (5788, 99.16),
    "Q6_K": (6291, 99.58),
    "Q8_0": (8146, 99.93)
}

# Extract labels, memory usage, and accuracy
labels = list(data.keys())
memory_usage = [value[0] for value in data.values()]
accuracy = [value[1] for value in data.values()]

# Plot setup using a dark theme
plt.style.use('dark_background')

fig, ax1 = plt.subplots()
plt.title('Quantization Levels of llama 3.1 8B')

# Create two y-axes: one for memory usage and the other for accuracy
color_memory = 'tab:cyan'
ax1.set_xlabel('Quantization Level')
ax1.set_ylabel('Memory Usage (MB)', color=color_memory)
ax1.bar(labels, memory_usage, color=color_memory, alpha=0.8, label='Memory Usage')
ax1.tick_params(axis='y', labelcolor=color_memory)

# Second y-axis for accuracy
ax2 = ax1.twinx()
color_accuracy = 'tab:orange'
ax2.set_ylabel('Accuracy (%)', color=color_accuracy)
ax2.plot(labels, accuracy, color=color_accuracy, marker='o', linestyle='-', linewidth=2, markersize=8, label='Accuracy')
ax2.tick_params(axis='y', labelcolor=color_accuracy)

# Adding legends
fig.tight_layout()  # To ensure the layout is tight
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper left')


# Show plot
plt.show()