diff --git a/README.md b/README.md index 94f2def..3c39eb9 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,129 @@ -# gpu_scoring +# GPU Scoring -Data and methods for scoring GPUs for use in pricing models and other comparative rankings \ No newline at end of file +A small, opinionated toolkit to score GPUs based on memory capacity, memory bandwidth, FP16 compute, and high‑bandwidth interconnect capability. It outputs both a human‑readable table and JSON for downstream automation. + +## What this does +- Loads GPU specifications from `gpu_data.json` +- Computes a composite score per GPU on a 0–1 scale with a configurable minimum floor so no score is exactly 0 (useful when scores are later used as multipliers) +- Prints a sorted table and a JSON array of `{ name, score }` + +## Project layout +- `gpu_rankings.py`: scoring logic and CLI entry point +- `gpu_data.json`: GPU specification dataset consumed by the scorer +- `README.md`: this document + +## Data schema (`gpu_data.json`) +Each top‑level key is a GPU name. Required fields per GPU: +- `MEMORY_GB` (number): Total memory capacity in GB +- `FP16_TFLOPS` (number): FP16 performance (or BF16 if that’s what the vendor exposes) +- `MEMORY_BW_GBPS` (number): Sustained memory bandwidth in GB/s +- `HIGH_BW_INTERCONNECT_EXISTS` (0 or 1): 1 if NVLink/SXM or equivalent high‑bandwidth interconnect is supported; otherwise 0 + +Example: +```json +{ + "H100-80G-SXM5": { + "MEMORY_GB": 80, + "FP16_TFLOPS": 1979, + "MEMORY_BW_GBPS": 3360, + "HIGH_BW_INTERCONNECT_EXISTS": 1 + } +} +``` + +Notes: +- If a field is missing or identical across all GPUs, the scorer will normalize gracefully (e.g., return 1.0 if there’s no variation). +- Extra fields in JSON are ignored by the scorer. + +## Scoring method (high level) +For each GPU: +1) Normalize memory capacity to [0, 1]: `mem_score` +2) Normalize memory bandwidth to [0, 1]: `bw_score` +3) Apply a moderate multiplicative bandwidth boost to memory: + `bandwidth_weighted_memory = mem_score * (1 + bandwidth_bonus_weight * bw_score)` +4) Normalize FP16 TFLOPs to [0, 1]: `compute_score` +5) Add an interconnect bonus: `interconnect_bonus = interconnect_weight * {0 or 1}` +6) Combine: + `combined = memory_weight * bandwidth_weighted_memory + compute_weight * compute_score + interconnect_bonus` +7) Min–max normalize across all GPUs and apply a floor epsilon `min_floor`: + `score = ((combined - min) / (max - min)) * (1 - min_floor) + min_floor` + +Why the floor? To avoid exact zeros when scores are later used as multiplicative factors; every device remains comparable but strictly > 0. + +## Default weights (tunable) +Defaults used in `main()`: +- `memory_weight`: 0.6 +- `compute_weight`: 0.4 +- `bandwidth_bonus_weight`: 0.4 (max +40% boost to the memory component at highest bandwidth) +- `interconnect_weight`: 0.1 +- `min_floor`: 0.05 (final normalized scores lie in [0.05, 1]) + +Tuning guidance: +- Increase `bandwidth_bonus_weight` to value memory speed more +- Increase `compute_weight` when FP16 compute is more critical +- Increase `interconnect_weight` when NVLink/SXM‑class fabrics are required +- Adjust `min_floor` (e.g., 0.02–0.1) to avoid zeros while preserving rank contrast + +## Requirements +- Python 3.10+ +- Packages: `pandas`, `numpy` + +Install: +```bash +pip install pandas numpy +``` + +## Running +From the `gpu_scoring` directory: +```bash +python gpu_rankings.py +``` + +You’ll see: +- A table sorted by `score` (descending) +- A JSON array printed after the table: +```json +[ + { "name": "H100-80G-SXM5", "score": 0.995 }, + { "name": "A100-80G-SXM4", "score": 0.872 } +] +``` + +## Customizing weights +Edit the call to `gpu_score(...)` in `gpu_rankings.py` `main()`: +```python +df["score"] = gpu_score( + df, + memory_weight=0.6, + compute_weight=0.4, + bandwidth_bonus_weight=0.4, + interconnect_weight=0.1, + min_floor=0.05, +) +``` + +## Library usage (import in your own code) +```python +from gpu_rankings import load_gpu_data, build_df, gpu_score + +gpu_dict = load_gpu_data() # or load_gpu_data("/path/to/gpu_data.json") +df = build_df(gpu_dict) +df["score"] = gpu_score( + df, + memory_weight=0.6, + compute_weight=0.4, + bandwidth_bonus_weight=0.4, + interconnect_weight=0.1, + min_floor=0.05, +) +records = df[["name", "score"]].sort_values("score", ascending=False).to_dict(orient="records") +``` + +## Updating the dataset +Edit `gpu_data.json` to add or modify GPUs. Keep field names consistent: +- `MEMORY_GB`, `FP16_TFLOPS`, `MEMORY_BW_GBPS`, `HIGH_BW_INTERCONNECT_EXISTS` + +## Limitations and notes +- Scoring is single‑GPU and spec‑based; it does not model workload‑specific behavior (e.g., comms‑bound vs compute‑bound) or cluster‑level scaling. +- FP16 figures may be provided by vendors with different caveats (e.g., sparsity). Use consistent, non‑sparse figures where possible. +- Interconnect bonus is a coarse indicator (0/1); adjust the weight or extend the data if you need gradations. \ No newline at end of file diff --git a/gpu_data.json b/gpu_data.json new file mode 100644 index 0000000..b0a9c18 --- /dev/null +++ b/gpu_data.json @@ -0,0 +1,75 @@ +{ + "RTX-A4000": { + "MEMORY_GB": 16, + "FP16_TFLOPS": 19.7, + "MEMORY_BW_GBPS": 448, + "HIGH_BW_INTERCONNECT_EXISTS": 0 + }, + "RTX-A6000": { + "MEMORY_GB": 48, + "FP16_TFLOPS": 38.71, + "MEMORY_BW_GBPS": 768, + "HIGH_BW_INTERCONNECT_EXISTS": 0 + }, + "A100-80G-PCIe": { + "MEMORY_GB": 80, + "FP16_TFLOPS": 311.84, + "MEMORY_BW_GBPS": 1935, + "HIGH_BW_INTERCONNECT_EXISTS": 0 + }, + "A100-80G-PCIe-NVLink": { + "MEMORY_GB": 80, + "FP16_TFLOPS": 311.84, + "MEMORY_BW_GBPS": 1935, + "HIGH_BW_INTERCONNECT_EXISTS": 1 + }, + "A100-80G-SXM4": { + "MEMORY_GB": 80, + "FP16_TFLOPS": 311.84, + "MEMORY_BW_GBPS": 2039, + "RELEVANT_TFLOPS": 311.84, + "HIGH_BW_INTERCONNECT_EXISTS": 1 + }, + "L40": { + "MEMORY_GB": 48, + "FP16_TFLOPS": 90.52, + "MEMORY_BW_GBPS": 864, + "HIGH_BW_INTERCONNECT_EXISTS": 0 + }, + "H100-80G-PCIe": { + "MEMORY_GB": 80, + "FP16_TFLOPS": 1671, + "MEMORY_BW_GBPS": 2040, + "HIGH_BW_INTERCONNECT_EXISTS": 0 + }, + "H100-80G-PCIe-NVLink": { + "MEMORY_GB": 80, + "FP16_TFLOPS": 1671, + "MEMORY_BW_GBPS": 2040, + "HIGH_BW_INTERCONNECT_EXISTS": 1 + }, + "H100-80G-SXM5": { + "MEMORY_GB": 80, + "FP16_TFLOPS": 1979, + "MEMORY_BW_GBPS": 3360, + "HIGH_BW_INTERCONNECT_EXISTS": 1 + }, + "RTX-4090": { + "MEMORY_GB": 24, + "FP16_TFLOPS": 82.58, + "MEMORY_BW_GBPS": 1008, + "HIGH_BW_INTERCONNECT_EXISTS": 0 + }, + "RTX-5090": { + "MEMORY_GB": 32, + "FP16_TFLOPS": 104.8, + "MEMORY_BW_GBPS": 1792, + "HIGH_BW_INTERCONNECT_EXISTS": 0 + }, + "RTX-PRO6000-SE": { + "MEMORY_GB": 96, + "FP16_TFLOPS": 125.0, + "MEMORY_BW_GBPS": 1792, + "HIGH_BW_INTERCONNECT_EXISTS": 0 + } +} diff --git a/gpu_rankings.py b/gpu_rankings.py new file mode 100644 index 0000000..fe134d7 --- /dev/null +++ b/gpu_rankings.py @@ -0,0 +1,135 @@ +import pandas as pd +import numpy as np +import json +from pathlib import Path + + +# GPU specifications data + +# Notes: +# FP16_TFLOPS is either FP16 perf or BF16 if available +# High BW Interconnect refers to the ability to use NVLink or SXM interconnects to connect multiple GPUs together, this is either 0 or 1 +# Specs moved to external JSON file (gpu_data.json) + +def load_gpu_data(json_path: str | None = None): + base_dir = Path(__file__).parent + path = Path(json_path) if json_path else (base_dir / "gpu_data.json") + with path.open("r") as f: + return json.load(f) + + +def build_df(gpu_dict): + """Convert nested GPU dictionary to DataFrame""" + data = [] + for name, specs in gpu_dict.items(): + row = {"name": name} + row.update(specs) + data.append(row) + + df = pd.DataFrame(data) + return df + + +def gpu_score( + df, + memory_weight=0.7, + compute_weight=0.3, + bandwidth_bonus_weight=0.3, + interconnect_weight=0.3, + min_floor=0.05, +): + """ + GPU score calculation using: + - Memory capacity (0-1), moderately boosted by memory bandwidth + - FP16 TFLOPs (0-1) as a separate, tunable weight + - Optional high-bandwidth interconnect bonus (default off) + + Args: + df: DataFrame with MEMORY_GB, MEMORY_BW_GBPS, FP16_TFLOPS, HIGH_BW_INTERCONNECT_EXISTS + memory_weight: Weight for memory component (0-1) + compute_weight: Weight for FP16 compute component (0-1) + bandwidth_bonus_weight: Scales bandwidth effect on memory (e.g., 0.3 => up to +30% boost) + interconnect_weight: Optional bonus for high-BW interconnect (0 disables) + min_floor: Minimum normalized value (>0 ensures no exact zeros); result scaled to [min_floor, 1]. + + Notes: + - To make bandwidth influence more/less, adjust bandwidth_bonus_weight. + - To favor compute vs memory, adjust compute_weight vs memory_weight. + - Final combined score is normalized to 0-1 across GPUs. + """ + # Normalize memory capacity + mem = df["MEMORY_GB"].astype(float) + mem_min, mem_max = mem.min(), mem.max() + mem_score = pd.Series(1.0, index=df.index) if mem_max == mem_min else (mem - mem_min) / (mem_max - mem_min) + + # Normalize memory bandwidth + bw = df["MEMORY_BW_GBPS"].astype(float) + bw_min, bw_max = bw.min(), bw.max() + bw_score = pd.Series(1.0, index=df.index) if bw_max == bw_min else (bw - bw_min) / (bw_max - bw_min) + + # Apply a moderate multiplicative bonus to memory based on bandwidth + # Example: with bandwidth_bonus_weight=0.3, highest-bandwidth memory gets up to +30% boost + bandwidth_weighted_memory = mem_score * (1.0 + bandwidth_bonus_weight * bw_score) + + # Normalize FP16 TFLOPs + fp16 = df["FP16_TFLOPS"].astype(float) + fp16_min, fp16_max = fp16.min(), fp16.max() + compute_score = pd.Series(1.0, index=df.index) if fp16_max == fp16_min else (fp16 - fp16_min) / (fp16_max - fp16_min) + + # Optional interconnect bonus + interconnect_bonus = df["HIGH_BW_INTERCONNECT_EXISTS"].astype(float) * interconnect_weight + + # Combine components + combined = (memory_weight * bandwidth_weighted_memory) + (compute_weight * compute_score) + interconnect_bonus + + # Normalize to 0-1 for comparability + cmin, cmax = combined.min(), combined.max() + if cmax == cmin: + return pd.Series(1.0, index=df.index) + combined01 = (combined - cmin) / (cmax - cmin) + return (combined01 * (1.0 - min_floor)) + min_floor + + +def main(): + """Run GPU score calculation and display results in a table""" + # Build dataframe + gpu_data = load_gpu_data() + df = build_df(gpu_data) + + # Default weights: equal memory/compute; moderate bandwidth bonus; no interconnect bonus + df["score"] = gpu_score( + df, + memory_weight=0.6, + compute_weight=0.4, + bandwidth_bonus_weight=0.4, + interconnect_weight=0.1, + ) + + # Create results table with GPU names and scores + results = df[["name", "score"]].copy() + + # Sort by score (descending) for better readability + results = results.sort_values("score", ascending=False) + + # Format scores to 3 decimal places for cleaner display + results["score"] = results["score"].round(3) + + # Print table + print("\nGPU Ranking Results (0-1 scale, higher is better)\n") + print("=" * 80) + print(f"{'GPU Name':<30} {'Score':<12}") + print("=" * 80) + + for _, row in results.iterrows(): + print(f"{row['name']:<30} {row['score']:<12.3f}") + + print("=" * 80) + + # Also output JSON (list of {name, score}) + json_payload = results.to_dict(orient="records") + print("\nJSON Results:\n") + print(json.dumps(json_payload, indent=2)) + +if __name__ == "__main__": + main() +