{"rows":[{"id":"cmpdzs3fq00s8pd01aobvkld4","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":82.93,"tokSOut":576.85,"tokSPrefill":754.54,"tokSTotal":null,"peakVramGb":7.43,"createdAt":"2026-05-20T11:42:06.183Z","notes":"hipfire @ 4840f0b6 (master, post-sync 2026-05-20)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\ndecode mode: DFlash speculative\n  drafter: qwen35-9b-dflash-mq4.hf4 (z-lab Qwen3.5-DFlash native head, MQ4 weights)\n  target: qwen3.5-9b.mq4\nkv_cache: q8\nprompt_normalize: true (default)\nτ: 13.1818   accept_rate: 0.8788  (byte-identical to canonical bench)\noutput: 256 tokens emitted, coherent merge_sort code\nruns: 3 (median reported); per-run tok/s: [577.48, 572.66, 576.85], spread 0.84%\nAR baseline (same binary same hardware, --ar-baseline): 122.88 tok/s — DFlash speedup 4.70x\nHardware: Sapphire Nitro+ RX 7900 XTX, ROCm 7.2.x, k9lin host\nRefresh of existing leaderboard row 2 (575.24 tok/s on 0.1.20-alpha+71896daa) — parity with current master.","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target ~/.hipfire/models/qwen3.5-9b.mq4 --draft ~/.hipfire/models/qwen35-9b-dflash-mq4.hf4 --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096","tensorParallel":1,"gpuLayers":null,"kvCacheDtype":"q8","attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-9B","displayName":"Qwen3.5-9B","family":"Qwen","params":9,"isMoE":false,"baseModel":{"hfId":"Qwen/Qwen3.5-9B-Base","displayName":"Qwen3.5-9B-Base"}},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.2.0+4840f0b6","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":1,"reactionCounts":{},"myEmoji":null},{"id":"cmofzk74w0002ij042txvvkx5","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":75.53,"tokSOut":575.25,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":8.07421875,"createdAt":"2026-04-26T16:31:47.744Z","notes":"hipfire @ e659452 (master post-PR #51 + #52 series)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\n  chatml-wrapped + explicit empty <think></think> for thinking-off\n  prompt file: benchmarks/prompts/merge_sort_thinking_off.txt\nruns: 3 (median reported); range 571.2–576.0\n  per-run tok/s: [571.23, 575.25, 576.04]\ndecode mode: DFlash speculative (block_size=16)\nkv_cache: asym3\nprompt_normalize: true (default since 2026-04-26)\nτ (median): 13.077\naccept_rate (median): 0.872\nprefill: 31.1ms (868.3 tok/s)\nttft (excl warmup): 75.5ms = prefill + first cycle\nvram: 8268 MB used / 24560 MB total\nnatural EOS at 184 tokens — production-shape bounded code (no loop)","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target qwen3.5-9b.mq4 --draft qwen35-9b-dflash-mq4.hfq --prompt $(cat benchmarks/prompts/merge_sort_thinking_off.txt) --max 256 --no-chatml --kv-mode asym3","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-9B","displayName":"Qwen3.5-9B","family":"Qwen","params":9,"isMoE":false,"baseModel":{"hfId":"Qwen/Qwen3.5-9B-Base","displayName":"Qwen3.5-9B-Base"}},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+e659452","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":2,"reactionCounts":{},"myEmoji":null},{"id":"cmp8fqdz200ypo4014i9empwm","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":78.93,"tokSOut":575.24,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":7.95,"createdAt":"2026-05-16T14:22:03.326Z","notes":"hipfire @ 71896daa (feat/tbq, sFWHT KV family + multi-GPU)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\n  benchmarks/prompts/merge_sort_thinking_off.txt (27 input tokens)\ndecode mode: DFlash speculative (adaptive block_size 8..16, mean B=16)\n  drafter: qwen35-9b-dflash-mq4.hf4 (z-lab Qwen3.5-DFlash native head, MQ4 weights)\n  target: qwen3.5-9b.mq4 (MQ4 weights, KV q8 filtered: 16/64 layers carry KV)\nkv_cache: q8 — chosen as winner from {q8,asym3,fwht3,fwht4} sweep (τ identical across all modes at this prompt; q8 has lowest KV-write overhead)\nprompt_normalize: true (default since 2026-04-26)\nτ: 13.1818   accept_rate: 0.8788\noutput: 256 tokens emitted\nAR baseline (same binary same hardware, --ar-baseline): 123.11 tok/s — DFlash speedup 4.67x\nHardware: Sapphire Nitro+ RX 7900 XTX, ROCm 7.2.x, k9lin host\nGPU coordination: gpu-tcas exclusive lease per cell","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target ~/.hipfire/models/qwen3.5-9b.mq4 --draft ~/.hipfire/models/qwen35-9b-dflash-mq4.hf4 --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-9B","displayName":"Qwen3.5-9B","family":"Qwen","params":9,"isMoE":false,"baseModel":{"hfId":"Qwen/Qwen3.5-9B-Base","displayName":"Qwen3.5-9B-Base"}},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.20-alpha+71896daa","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":3,"reactionCounts":{},"myEmoji":null},{"id":"cmpdzu8ig00sbpd01dsdrf3zr","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":63.2,"tokSOut":371.8,"tokSPrefill":1160.07,"tokSTotal":null,"peakVramGb":7.65,"createdAt":"2026-05-20T11:43:46.072Z","notes":"hipfire @ 4840f0b6 (master post-sync 2026-05-20)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\n  benchmarks/prompts/merge_sort_thinking_off.txt (27 input tokens)\ndecode mode: DFlash speculative (adaptive block_size, mean B=16)\n  drafter: qwen35-9b-dflash-mq4.hfq (z-lab Qwen3.5-DFlash native head, MQ4 weights)\n  target: qwen3.5-9b.mq4 (MQ4 weights)\nkv_cache: q8\nprompt_normalize: true (default since 2026-04-26)\nτ: 13.1818   accept_rate: 0.8788\noutput: 256 tokens emitted\nAR baseline (same binary same hardware, --ar-baseline): 99.39 tok/s — DFlash speedup 3.74x\nHardware: single R9700, ROCm 7.x, hiptrx host (HIP_VISIBLE_DEVICES=0)\nGPU coordination: dedicated host (hiptrx), no concurrent GPU load","engineFlags":{"commandSnippet":"HIP_VISIBLE_DEVICES=0 ./target/release/examples/dflash_spec_demo --target ~/.hipfire/models/qwen3.5-9b.mq4 --draft ~/.hipfire/models/qwen35-9b-dflash-mq4.hfq --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096","tensorParallel":1,"gpuLayers":null,"kvCacheDtype":"q8","attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-9B","displayName":"Qwen3.5-9B","family":"Qwen","params":9,"isMoE":false,"baseModel":{"hfId":"Qwen/Qwen3.5-9B-Base","displayName":"Qwen3.5-9B-Base"}},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"Radeon AI Pro R9700","gpuCount":1,"vramGb":32,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":"i5 8500","os":"Ubuntu 24.04","isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.2.0+4840f0b6","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"Radeon AI Pro R9700","hardwareGroupKey":"DISCRETE_GPU:radeon ai pro r9700","rank":4,"reactionCounts":{},"myEmoji":null},{"id":"cmofdp1ve0003i904tw2yf6bs","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":null,"tokSOut":336.98,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":null,"createdAt":"2026-04-26T06:19:42.650Z","notes":"hipfire @ f16eceb (master post-perf-recovery, PR #47)\nprompt: LRU cache PEP-8 strict (md5=df5dedc8040c, see benchmarks/prompts/lru_cache_pep8_strict.txt)\nruns: 3 (median reported)\ndecode mode: DFlash speculative (block_size=16)\nkv_cache: asym3\nprompt_normalize: true (default since 2026-04-26)\nτ (median): 7.5\naccept_rate (median): 0.5\nruns all: [336.98, 343.32, 314.93]","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target qwen3.5-9b.mq4 --draft qwen35-9b-dflash-mq4.hfq --prompt $(cat benchmarks/prompts/lru_cache_pep8_strict.txt) --max 120 --no-chatml --kv-mode asym3","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-9B","displayName":"Qwen3.5-9B","family":"Qwen","params":9,"isMoE":false,"baseModel":{"hfId":"Qwen/Qwen3.5-9B-Base","displayName":"Qwen3.5-9B-Base"}},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+f16eceb","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":5,"reactionCounts":{},"myEmoji":null},{"id":"cmofkl1gm0006l4045txzjtoc","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":null,"tokSOut":322.6,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":null,"createdAt":"2026-04-26T09:32:32.806Z","notes":"hipfire @ f16eceb (master post-perf-recovery PR #47)\nprompt: LRU cache PEP-8 strict (md5=df5dedc8040c, see benchmarks/prompts/lru_cache_pep8_strict.txt)\nruns: 3 (median reported); range 322.5–346.0\ndecode mode: DFlash speculative (block_size=16)\nkv_cache: asym3\nprompt_normalize: true (default since 2026-04-26)\nτ (median): 7.067\naccept_rate (median): 0.471\nall runs tok/s: [346.0, 322.47, 322.6]","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target qwen3.5-9b.mq4 --draft qwen35-9b-dflash-mq4.hfq --prompt $(cat benchmarks/prompts/lru_cache_pep8_strict.txt) --max 120 --no-chatml --kv-mode asym3","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-9B","displayName":"Qwen3.5-9B","family":"Qwen","params":9,"isMoE":false,"baseModel":{"hfId":"Qwen/Qwen3.5-9B-Base","displayName":"Qwen3.5-9B-Base"}},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+f16eceb","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":6,"reactionCounts":{},"myEmoji":null},{"id":"cmppftgt8003ipl016cm2gqpb","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":110.63,"tokSOut":286.64,"tokSPrefill":435.71,"tokSTotal":null,"peakVramGb":17.505859375,"createdAt":"2026-05-28T11:56:31.964Z","notes":"Qwen3.5-27B dense AWQ DFlash merge-sort bench on hiptrx.\nTarget path: /home/kaden/.hipfire/models/qwen3-27b-3.5.mq4-awq\nTarget md5: e1c9480a3fa54ad0a0f25ca18105510a\nTarget sha256: ea615949ddf6a180eee03ff6fde39f7e51148f153b1b05f82258b9953088576e\nDraft path: /home/kaden/.hipfire/models/qwen35-27b-dflash-mq4.hfq\nDraft md5: 7b6df2a4ee1c8d933f0a52e187d1860b\nDraft sha256: 3d428b97c1911a9ad815cc52fbee080306852c1dafad6b1b17bb70bd68010301\nPrompt: benchmarks/prompts/merge_sort_thinking_off.txt\nPrompt md5: 253c7ac50857fe6d0e10fb0d2c5e35c0\nBinary md5: a86f47db76bf9c25360e95422768e49f\nBinary sha256: a6047b9e23e3d37face8492edd0a7833383708f63b308425175ded325661d2a2\nGit: feat/dense-dflash-perfmaxx 3730b58bd3b5380eb1de672ec032b24016905458\n3 fresh-process runs tok/s: 286.64, 286.98, 286.61; median 286.64\nPer-run metrics: all runs tau=13.0000, accept_rate=0.8667, cycles=11, committed=165, accepted=143, output_tokens=155, eos=y, output_md5=f8ae27b7925cfde51ba446eaab547b2e\nMedian auxiliary metrics: ttft_ms=110.63, prefill_tok_s=435.71, peak_vram_gb=17.51\nHardware: hiptrx, AMD Radeon AI PRO R9700, gfx1201, GPU dev 0, 34.2 GB VRAM, HIP 7.2, ROCm driver 7.0.0-15-generic; host reports 4x R9700 gfx1201 but benchmark uses one GPU\nCoherence/eyeball: decoded output is fluent Python merge_sort code and terminates with <|im_end|>. No AR baseline rerun for this publish.","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo \\\n  --target /home/kaden/.hipfire/models/qwen3-27b-3.5.mq4-awq \\\n  --draft /home/kaden/.hipfire/models/qwen35-27b-dflash-mq4.hfq \\\n  --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt \\\n  --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096","tensorParallel":1,"gpuLayers":null,"kvCacheDtype":"q8","attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-27B","displayName":"Qwen3.5-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"Radeon AI Pro R9700","gpuCount":1,"vramGb":34.2,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":"Ubuntu Linux 7.0.0-15-generic on hiptrx","isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.2.0+3730b58","quantization":"MQ4-AWQ","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"Radeon AI Pro R9700","hardwareGroupKey":"DISCRETE_GPU:radeon ai pro r9700","rank":7,"reactionCounts":{},"myEmoji":null},{"id":"cmppmb47l0051pl01lyaup9mc","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":77,"tokSOut":259.53,"tokSPrefill":1093.7,"tokSTotal":null,"peakVramGb":27.8179,"createdAt":"2026-05-28T14:58:13.137Z","notes":"A3B MoE MTP crown dry run on hiptrx. Prompt is HumanEval/26 remove_duplicates from /home/kaden/lucebox-hub/dflash/eval/humaneval_plus/humanevalplus.jsonl.\nTarget path: /home/kaden/.hipfire/models/qwen3.6-35b-a3b.mq4-awq-mi300x\nTarget md5: edde51ec1dac0f2bd42cff5ef1cb8944\nTarget sha256: 1dc1c7964de415e0040a540a4300b9518e11b00c13d99c23f576f2b9fe1e8bca\nMTP head path: /home/kaden/.hipfire/models/qwen3.6-35b-a3b.moe-mtp-mq4-cvs16384.mtp\nMTP head md5: 51076bfb5b489832f2f6c4191b9799b0\nMTP head sha256: 1e11a06d1946e1e5711d6692894e917a61d5f360d4f3508c8372d49e97c912c1\nPrompt file: .codeinsight+research/dense-dflash-perfmaxx/a3b-mtp-humaneval26-crown-validate-20260528-144250/prompt.txt\nPrompt raw file md5: 90a10f2b41763aacc80a3e4db5e0bb62\nPrompt normalized md5 reported by mtp_only_demo: e04fc67ed919bec41bfd7426a0b75d58\nPrompt sha256: 7823eea9be9599563c786fa16e792f3da2482016607d75ee06ca40b2d33c7dca\nBinary md5: 8c148158f2edd7e03bc78b2b76b73242\nBinary sha256: 2097a2b651af12e20ef7a30a932051ad4f5a30f0fc0fc5e33a975cdf48e5ea57\nGit: feat/dense-dflash-perfmaxx 3730b58bd3b5380eb1de672ec032b24016905458\n5 fresh-process runs tok/s: 258.64, 259.77, 259.34, 260.06, 259.53; median 259.53\nPer-run metrics: all runs tau=5.1250, cycles=8, committed=42, accepted_mtp=33, bonus=7, replay_skipped=5/8 cycles, eos=y, decoded_output_md5=5a1ba0994ef05da65ecfb9025f51743a\nMedian auxiliary metrics: ttft_ms=77.0, prefill_tok_s=1093.70, decode_secs=0.162, peak_vram_gb=27.8179\nHardware: hiptrx, AMD Radeon AI PRO R9700, gfx1201, GPU dev 0, 34.2 GB VRAM, HIP 7.2, ROCm driver 7.0.0-15-generic; host reports 4x R9700 gfx1201 but benchmark uses one GPU\nCoherence/eyeball: decoded output is a coherent remove_duplicates Python body and terminates with <|endoftext|>. No AR baseline rerun for this dry run.\nPositioning: 259.53 tok/s exceeds the current noted 254 tok/s first-place threshold for Qwen3.6-35B-A3B.","engineFlags":{"commandSnippet":"./target/release/examples/mtp_only_demo \\\n  --target /home/kaden/.hipfire/models/qwen3.6-35b-a3b.mq4-awq-mi300x \\\n  --mtp-head /home/kaden/.hipfire/models/qwen3.6-35b-a3b.moe-mtp-mq4-cvs16384.mtp \\\n  --prompt-file .codeinsight+research/dense-dflash-perfmaxx/a3b-mtp-humaneval26-crown-validate-20260528-144250/prompt.txt \\\n  --max-n 5 --trunk-spine --kv-mode q8 --temp 0.0 --max 256 --no-chatml","tensorParallel":1,"gpuLayers":null,"kvCacheDtype":"q8","attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":true},"model":{"hfId":"Qwen/Qwen3.6-35B-A3B","displayName":"Qwen3.6-35B-A3B","family":"Qwen","params":36,"isMoE":true,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"Radeon AI Pro R9700","gpuCount":1,"vramGb":34.2,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":"Ubuntu Linux 7.0.0-15-generic on hiptrx","isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.2.0+3730b58","quantization":"MQ4-AWQ","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"Radeon AI Pro R9700","hardwareGroupKey":"DISCRETE_GPU:radeon ai pro r9700","rank":8,"reactionCounts":{},"myEmoji":null},{"id":"cmpdzx93g00sppd01d7e6lzvm","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":123.68,"tokSOut":255.83,"tokSPrefill":405.91,"tokSTotal":null,"peakVramGb":7.73,"createdAt":"2026-05-20T11:46:06.796Z","notes":"hipfire @ 1a378379 (master 4840f0b6 + 3 hipx-local ROCm 7.2.x compiler shims rebased)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\ndecode mode: DFlash speculative\n  drafter: qwen35-9b-dflash-mq4.hfq\n  target: qwen3.5-9b.mq4\nkv_cache: q8\nprompt_normalize: true (default)\nτ: 13.1818   accept_rate: 0.8788\noutput: 256 tokens emitted, coherent merge_sort code\nruns: 3 (median reported); per-run tok/s: [255.83, 255.13, 256.32], spread 0.47%\nAR baseline (same binary same hardware, --ar-baseline): 45.65 tok/s — DFlash speedup 5.60x\nHardware: AMD RYZEN AI MAX+ 395 APU (Strix Halo), ROCm 7.2.x, hipx host (HIP_VISIBLE_DEVICES=1)\nFirst DFlash row for gfx1151 on localmaxxing.","engineFlags":{"commandSnippet":"HIP_VISIBLE_DEVICES=1 ./target/release/examples/dflash_spec_demo --target ~/.hipfire/models/qwen3.5-9b.mq4 --draft ~/.hipfire/models/qwen35-9b-dflash-mq4.hfq --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":"q8","attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-9B","displayName":"Qwen3.5-9B","family":"Qwen","params":9,"isMoE":false,"baseModel":{"hfId":"Qwen/Qwen3.5-9B-Base","displayName":"Qwen3.5-9B-Base"}},"hardware":{"hwClass":"UNIFIED","gpuName":null,"gpuCount":1,"vramGb":null,"chipVendor":"AMD","chipFamily":"Strix Halo","chipVariant":"Radeon 8060S Graphics","unifiedMemoryGb":103,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.2.0+1a378379","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"Strix Halo Radeon 8060S","hardwareGroupKey":"UNIFIED:strix halo radeon 8060s","rank":9,"reactionCounts":{},"myEmoji":null},{"id":"cmppfddi3002xpl01z00xj2yk","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":110.87,"tokSOut":254.83,"tokSPrefill":435.19,"tokSTotal":null,"peakVramGb":17.505859375,"createdAt":"2026-05-28T11:44:01.179Z","notes":"Corrected AWQ trunk; not the dev lmhead-a100 .hfq.\nTarget path used for command: /home/kaden/.hipfire/models/qwen3.6-27b-awq.mq4\nTarget md5: e42a489edf0be1b144307f2869b73b93\nTarget sha256 / HF x-linked-etag: 86a5f80fd29d545abb1093dead242725ced6d68b8607c6d566d897b1a82442dc\nHF source: schuttdev/hipfire-qwen3.6-27b qwen3.6-27b.mq4 @ f9b326a657f14cbc400e384ff84a4b9b4b726ba2\nDraft path: /home/kaden/.hipfire/models/qwen36-27b-dflash-mq4.hf4\nDraft md5: 204c4c4ceab30cb9ebc118fa9d59a446\nDraft sha256: bd8c4f07ae80fe1385bf2606af9a7ba0daa18ca8daec50916f2a489054c44e70\nPrompt: benchmarks/prompts/merge_sort_thinking_off.txt\nPrompt md5: 253c7ac50857fe6d0e10fb0d2c5e35c0\nBinary md5: a86f47db76bf9c25360e95422768e49f\nBinary sha256: a6047b9e23e3d37face8492edd0a7833383708f63b308425175ded325661d2a2\nGit: feat/dense-dflash-perfmaxx 3730b58bd3b5380eb1de672ec032b24016905458\n3 fresh-process runs tok/s: 254.83, 255.10, 253.97; median 254.83\nPer-run metrics: all runs tau=11.3846, accept_rate=0.7590, cycles=13, committed=174, accepted=148, output_tokens=162, eos=y, output_md5=4d3a55e4d1daff05ee0265b3055325bd\nMedian auxiliary metrics: ttft_ms=110.87, prefill_tok_s=435.19, peak_vram_gb=17.51\nHardware: hiptrx, AMD Radeon AI PRO R9700, gfx1201, GPU dev 0, 34.2 GB VRAM, HIP 7.2, ROCm driver 7.0.0-15-generic; host reports 4x R9700 gfx1201 but benchmark uses one GPU\nCoherence/eyeball: decoded output is fluent Python merge_sort code and terminates with <|im_end|> then <|endoftext|>. No AR baseline rerun for this publish.","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo \\\n  --target /home/kaden/.hipfire/models/qwen3.6-27b-awq.mq4 \\\n  --draft /home/kaden/.hipfire/models/qwen36-27b-dflash-mq4.hf4 \\\n  --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt \\\n  --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096","tensorParallel":1,"gpuLayers":null,"kvCacheDtype":"q8","attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.6-27B","displayName":"Qwen3.6-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"Radeon AI Pro R9700","gpuCount":1,"vramGb":34.2,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":"Ubuntu Linux 7.0.0-15-generic on hiptrx","isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.2.0+3730b58","quantization":"MQ4-AWQ","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"Radeon AI Pro R9700","hardwareGroupKey":"DISCRETE_GPU:radeon ai pro r9700","rank":10,"reactionCounts":{},"myEmoji":null},{"id":"cmp8fozao00xqo401nzsa1w68","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":153.93,"tokSOut":254.4,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":18.72,"createdAt":"2026-05-16T14:20:57.648Z","notes":"hipfire @ 71896daa (feat/tbq, sFWHT KV family + multi-GPU)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\n  benchmarks/prompts/merge_sort_thinking_off.txt (27 input tokens)\ndecode mode: DFlash speculative (adaptive block_size 8..16, mean B=16)\n  drafter: qwen35-27b-dflash.mq4 (z-lab Qwen3.5-DFlash native head, MQ4 weights)\n  target: qwen3.5-27b.mq4 (MQ4 weights, KV q8 filtered: 16/64 layers carry KV)\nkv_cache: q8 — chosen as winner from {q8,asym3,fwht3,fwht4} sweep (τ identical across all modes at this prompt; q8 has lowest KV-write overhead)\nprompt_normalize: true (default since 2026-04-26)\nτ: 13.1818   accept_rate: 0.8788\noutput: 256 tokens emitted\nAR baseline (same binary same hardware, --ar-baseline): 44.60 tok/s — DFlash speedup 5.70x\nHardware: Sapphire Nitro+ RX 7900 XTX, ROCm 7.2.x, k9lin host\nGPU coordination: gpu-tcas exclusive lease per cell","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target ~/.hipfire/models/qwen3.5-27b.mq4 --draft ~/.hipfire/models/qwen35-27b-dflash.mq4 --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-27B","displayName":"Qwen3.5-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.20-alpha+71896daa","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":11,"reactionCounts":{"fire":1},"myEmoji":null},{"id":"cmofyatrj0002jv04r8064iqg","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":154.7,"tokSOut":250.25,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":18.2890625,"createdAt":"2026-04-26T15:56:30.896Z","notes":"hipfire @ 3945bb2 (master post-PR #51 loop-break + ngram_block series)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\n  chatml-wrapped + explicit empty <think></think> for thinking-off\n  prompt file: benchmarks/prompts/merge_sort_thinking_off.txt\nruns: 3 (median reported); range 249.6–251.0\n  per-run tok/s: [249.56, 250.25, 251.02]\ndecode mode: DFlash speculative (block_size=16)\nkv_cache: asym3\nprompt_normalize: true (default since 2026-04-26)\nτ (median): 13.182\naccept_rate (median): 0.879\nprefill: 81.3ms (332.0 tok/s)\nttft (excl warmup): 154.7ms = prefill + first cycle\nvram: 18728 MB used / 24560 MB total\nnatural EOS at 157 tokens — production-shape bounded code (no loop)","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target qwen3.5-27b.mq4 --draft qwen35-27b-dflash.mq4 --prompt $(cat benchmarks/prompts/merge_sort_thinking_off.txt) --max 256 --no-chatml --kv-mode asym3","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-27B","displayName":"Qwen3.5-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+3945bb2","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":12,"reactionCounts":{},"myEmoji":null},{"id":"cmpruarq600bqo3017mie18ph","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":194.1,"tokSOut":241.1,"tokSPrefill":242.1,"tokSTotal":null,"peakVramGb":5.43,"createdAt":"2026-05-30T04:17:26.239Z","notes":"hipfire @ d38a269d (branch lfm2moe/impl, arch_id 11) — native Rust HIP/ROCm engine.\nmodel: LFM2.5-8B-A1B — hybrid 18 short-conv (LIV double-gated) + 6 GQA-attn layers;\n  FFN = 2 dense SwiGLU + 22 sparse top-4 MoE (32 experts).\nquant: default 'mq4' = experts MQ4G256 (FWHT 4-bit) + Q8 projections/embeddings.\nhw: 1x AMD Radeon AI PRO R9700 (gfx1201/RDNA4), ROCm 7.2.2.\n  (box has 4 cards; this is a single-card single-stream decode, pinned HIP_VISIBLE_DEVICES=0.)\n\nDECODE THROUGHPUT (matched, integrity-checked):\n  greedy max=256, FULL-LENGTH — all 5/5 runs emitted 256 tok (NO EOS truncation).\n  fresh process per run, 2 discarded warmups (DPM + kernel-cache), median reported.\n  decode runs: [241.2, 241.1, 240.3, 241.3, 241.1] tok/s  ->  median 241.1\n  range 240.3-241.3 (spread 0.41%, DPM warm).\n  prompt 46 tok (md5=427ebaf94017faf5056215891980888f); prefill 242 tok/s.\n  binary md5=3b00b57fb0c493d824a9154aede19032.  measured via examples/infer_lfm2moe (decode rate is path-independent).\n\nCOHERENCE (validated separately via the daemon's chat path, same binary):\n  coherence_probe self-check 11/11 detectors; 3 daemon/ChatFrame probes\n  (reasoning + 2 loop-prone code prompts) all verdict OK (0 hard / 0 soft) —\n  no attractor/loop/special-leak. coherence-gate lfm2 rows correct\n  (Paris / 80 km/h) at ~244 tok/s short-form. Forward also validated to\n  per-layer cosine >=0.999 vs HF Lfm2MoeForCausalLM (tiny oracle).\n  A daemon full-256 chat run (lru_cache) decoded 238.6 tok/s — within ~1% of the headline.","engineFlags":{"commandSnippet":"HIP_VISIBLE_DEVICES=0 HIPFIRE_DPM_WARMUP_SECS=10 ./target/release/examples/infer_lfm2moe --model ~/.hipfire/models/lfm2.5-8b-a1b.mq4 --prompt <250-char prompt> --max 256  # prompt md5=427ebaf94017faf5056215891980888f, full-256-tok, median of 5 fresh-process runs","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":"q8_0","attentionBackend":null,"flashAttn":null,"specDecoding":false,"mtpEnabled":false},"model":{"hfId":"LiquidAI/LFM2.5-8B-A1B","displayName":"LFM2.5-8B-A1B","family":null,"params":8,"isMoE":false,"baseModel":{"hfId":"LiquidAI/LFM2.5-8B-A1B-Base","displayName":"LFM2.5-8B-A1B-Base"}},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"Radeon AI Pro R9700","gpuCount":1,"vramGb":34.2,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":"Ubuntu Linux 7.0.0-15-generic on hiptrx","isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.2.0+d38a269d","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"Radeon AI Pro R9700","hardwareGroupKey":"DISCRETE_GPU:radeon ai pro r9700","rank":13,"reactionCounts":{},"myEmoji":null},{"id":"cmppe779e002mpl014npvq9st","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":154.23,"tokSOut":232.14,"tokSPrefill":334.59,"tokSTotal":null,"peakVramGb":17.46875,"createdAt":"2026-05-28T11:11:13.539Z","notes":"hipfire @ ca30ca21 (dense-dflash-gfx11-perfmaxx, origin/feat/dense-dflash-perfmaxx base)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\ndecode mode: DFlash speculative, adaptive B stayed at B=16\ntarget: qwen3.6-27b-awq.mq4 (md5=e42a489edf0be1b144307f2869b73b93) -- corrected AWQ trunk\ndraft: qwen36-27b-dflash-mq4.hf4 (md5=204c4c4ceab30cb9ebc118fa9d59a446)\nbinary: dflash_spec_demo md5=5c7585b5523a6abc0ce9829413a81bec\nkv_cache: q8; ctx=4096; temp=0.0; no-chatml; prompt_normalize: true/default\nruns: 3 fresh processes; per-run tok/s: [232.28, 231.81, 232.14]; median=232.14; spread=0.20%\nmedian details: cycles=13 committed=174 accepted=148 tau=11.3846 accept_rate=0.7590\nprefill median: 80.7ms / 334.59 tok/s; ttft median: 154.23ms; peak VRAM: 17888 MB\nnatural EOS at 162 emitted tokens; decoded output eyeballed as fluent merge_sort code\nThis row supersedes invalid scratch runs that used qwen3.6-27b-dev/qwen3.6-27b.mq4-awq-gptq-f2-lmhead-a100.hfq as trunk.\nNo AR baseline rerun in this correction pass.\nHardware: Sapphire Nitro+ RX 7900 XTX / gfx1100 on k9lin, HIP 7.2, Ubuntu 24.04.","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target /home/kaden/.hipfire/models/qwen3.6-27b-awq.mq4 --draft /home/kaden/.hipfire/models/qwen36-27b-dflash-mq4.hf4 --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096","tensorParallel":1,"gpuLayers":null,"kvCacheDtype":"q8","attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.6-27B","displayName":"Qwen3.6-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.2.0+ca30ca21","quantization":"MQ4-AWQ","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":14,"reactionCounts":{},"myEmoji":null},{"id":"cmpe01n9200szpd017ctsjoiw","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":125.87,"tokSOut":221.97,"tokSPrefill":479.38,"tokSTotal":null,"peakVramGb":7.43,"createdAt":"2026-05-20T11:49:31.766Z","notes":"hipfire @ 1a378379 (master 4840f0b6 + 3 hipx-local ROCm 7.2.x compiler shims rebased)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\ndecode mode: DFlash speculative\n  drafter: qwen35-9b-dflash-mq4.hfq (z-lab Qwen3.5-DFlash native head, MQ4 weights)\n  target: qwen3.5-9b.mq4\nkv_cache: q8\nprompt_normalize: true (default)\nτ: 13.1818   accept_rate: 0.8788  (byte-identical to canonical bench)\noutput: 256 tokens emitted\nruns: 3 (median reported); per-run tok/s: [221.97, 219.99, 222.23], spread 1.02%\nAR baseline (same binary same hardware, --ar-baseline): 75.07 tok/s — DFlash speedup 2.96x\nHardware: AMD RX 6950 XT (RDNA2 gfx1030, 16 GB GDDR6), ROCm 7.2.x, hipx host (HIP_VISIBLE_DEVICES=2)\nFirst-ever RDNA2 / gfx1030 row on localmaxxing for hipfire. Per-chip dp4a + fdot2 kernel family hand-tuned for RDNA2.","engineFlags":{"commandSnippet":"HIP_VISIBLE_DEVICES=2 ./target/release/examples/dflash_spec_demo --target ~/.hipfire/models/qwen3.5-9b.mq4 --draft ~/.hipfire/models/qwen35-9b-dflash-mq4.hfq --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096","tensorParallel":1,"gpuLayers":null,"kvCacheDtype":"q8","attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-9B","displayName":"Qwen3.5-9B","family":"Qwen","params":9,"isMoE":false,"baseModel":{"hfId":"Qwen/Qwen3.5-9B-Base","displayName":"Qwen3.5-9B-Base"}},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 6950 XT","gpuCount":1,"vramGb":16,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.2.0+1a378379","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 6950 XT","hardwareGroupKey":"DISCRETE_GPU:rx 6950 xt","rank":15,"reactionCounts":{},"myEmoji":null},{"id":"cmp8fw36n00zno401goz8qnyv","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":155.98,"tokSOut":216.69,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":18.72,"createdAt":"2026-05-16T14:26:29.280Z","notes":"hipfire @ 71896daa (feat/tbq, sFWHT KV family + multi-GPU)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\n  benchmarks/prompts/merge_sort_thinking_off.txt (27 input tokens)\ndecode mode: DFlash speculative (adaptive block_size 8..16, mean B=16)\n  drafter: qwen36-27b-dflash-mq4.hf4 (z-lab Qwen3.6-DFlash native head, MQ4 weights)\n  target: qwen3.6-27b.mq4 (MQ4 weights, KV q8 filtered: 16/64 layers carry KV)\nkv_cache: q8 — chosen as winner from {q8,asym3,fwht3,fwht4} sweep (τ identical across all modes at this prompt; q8 has lowest KV-write overhead)\nprompt_normalize: true (default since 2026-04-26)\nτ: 10.9286   accept_rate: 0.7286\noutput: 256 tokens emitted\nAR baseline (same binary same hardware, --ar-baseline): 44.79 tok/s — DFlash speedup 4.84x\nHardware: Sapphire Nitro+ RX 7900 XTX, ROCm 7.2.x, k9lin host\nGPU coordination: gpu-tcas exclusive lease per cell","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target ~/.hipfire/models/qwen3.6-27b.mq4 --draft ~/.hipfire/models/qwen36-27b-dflash-mq4.hf4 --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.6-27B","displayName":"Qwen3.6-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.20-alpha+71896daa","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":16,"reactionCounts":{},"myEmoji":null},{"id":"cmp8fnkw000xbo401r5mi3m0i","contextLength":1024,"prefillTokens":null,"batchSize":1,"ttftMs":155.82,"tokSOut":216.59,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":17.69,"createdAt":"2026-05-16T14:19:52.321Z","notes":"hipfire @ 71896daa (feat/tbq, sFWHT KV family + multi-GPU)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\n  benchmarks/prompts/merge_sort_thinking_off.txt (27 input tokens)\ndecode mode: DFlash speculative (adaptive block_size 8..16, mean B=16)\n  drafter: qwen36-27b-dflash-mq4.hf4 (z-lab Qwen3.6-DFlash native head, MQ4 weights)\n  target: qwen3.6-27b.mq4 (MQ4 weights, KV q8 filtered: 16/64 layers carry KV)\nkv_cache: q8 — chosen as winner from {q8,asym3,fwht3,fwht4} sweep (τ identical across all modes at this prompt; q8 has lowest KV-write overhead)\nprompt_normalize: true (default since 2026-04-26)\nτ: 10.9286   accept_rate: 0.7286\noutput: 256 tokens emitted\nAR baseline (same binary same hardware, --ar-baseline): 44.65 tok/s — DFlash speedup 4.85x\nHardware: Sapphire Nitro+ RX 7900 XTX, ROCm 7.2.x, k9lin host\nGPU coordination: gpu-tcas exclusive lease per cell","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target ~/.hipfire/models/qwen3.6-27b.mq4 --draft ~/.hipfire/models/qwen36-27b-dflash-mq4.hf4 --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 1024","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.6-27B","displayName":"Qwen3.6-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.20-alpha+71896daa","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":17,"reactionCounts":{},"myEmoji":null},{"id":"cmofkrpp0000hic04ymyzumfg","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":null,"tokSOut":201.11,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":null,"createdAt":"2026-04-26T09:37:44.148Z","notes":"hipfire @ f16eceb (master post-perf-recovery PR #47)\nprompt: LRU cache PEP-8 strict (md5=df5dedc8040c, see benchmarks/prompts/lru_cache_pep8_strict.txt)\nruns: 3 (median reported); range 201.0–201.1\ndecode mode: DFlash speculative (block_size=16)\nkv_cache: asym3\nprompt_normalize: true (default since 2026-04-26)\nτ (median): 10.545\naccept_rate (median): 0.703\nall runs tok/s: [201.04, 201.11, 201.11]","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target qwen3.5-27b.mq4 --draft qwen35-27b-dflash.mq4 --prompt $(cat benchmarks/prompts/lru_cache_pep8_strict.txt) --max 120 --no-chatml --kv-mode asym3","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-27B","displayName":"Qwen3.5-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+f16eceb","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":18,"reactionCounts":{},"myEmoji":null},{"id":"cmpdzvqpx00sipd01t74sf3wk","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":133.04,"tokSOut":196.23,"tokSPrefill":438.49,"tokSTotal":null,"peakVramGb":17.92,"createdAt":"2026-05-20T11:44:56.326Z","notes":"hipfire @ 4840f0b6 (master post-sync 2026-05-20)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\n  benchmarks/prompts/merge_sort_thinking_off.txt (27 input tokens)\ndecode mode: DFlash speculative (adaptive block_size, mean B=16)\n  drafter: qwen35-27b-dflash-mq4.hfq (z-lab Qwen3.5-DFlash native head, MQ4 weights)\n  target: qwen3.5-27b.mq4 (MQ4 weights)\nkv_cache: q8\nprompt_normalize: true (default)\nτ: 13.1818   accept_rate: 0.8788\noutput: 256 tokens emitted\nruns: 3 (median reported); per-run tok/s: [196.53, 196.23, 196.09], spread 0.22%\nAR baseline (same binary same hardware, --ar-baseline): 35.41 tok/s — DFlash speedup 5.54x\nHardware: single R9700 (32 GB), ROCm 7.x, hiptrx host (HIP_VISIBLE_DEVICES=0)","engineFlags":{"commandSnippet":"HIP_VISIBLE_DEVICES=0 ./target/release/examples/dflash_spec_demo --target ~/.hipfire/models/qwen3.5-27b.mq4 --draft ~/.hipfire/models/qwen35-27b-dflash-mq4.hfq --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096","tensorParallel":1,"gpuLayers":null,"kvCacheDtype":"q8","attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-27B","displayName":"Qwen3.5-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"Radeon AI Pro R9700","gpuCount":1,"vramGb":32,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":"i5 8500","os":"Ubuntu 24.04","isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.2.0+4840f0b6","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"Radeon AI Pro R9700","hardwareGroupKey":"DISCRETE_GPU:radeon ai pro r9700","rank":19,"reactionCounts":{},"myEmoji":null},{"id":"cmofe2efg000kla04k5h7rto0","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":null,"tokSOut":181.96,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":null,"createdAt":"2026-04-26T06:30:05.453Z","notes":"hipfire @ f16eceb (master post-perf-recovery, PR #47)\nprompt: LRU cache PEP-8 strict (md5=df5dedc8040c, see benchmarks/prompts/lru_cache_pep8_strict.txt)\nruns: 3 (median reported)\ndecode mode: DFlash speculative (block_size=16)\nkv_cache: asym3\nprompt_normalize: true (default since 2026-04-26)\nτ (median): 9.417\naccept_rate (median): 0.628\nruns all: [171.34, 181.96, 200.56]","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target qwen3.5-27b.mq4 --draft qwen35-27b-dflash.mq4 --prompt $(cat benchmarks/prompts/lru_cache_pep8_strict.txt) --max 120 --no-chatml --kv-mode asym3","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-27B","displayName":"Qwen3.5-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+f16eceb","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":20,"reactionCounts":{},"myEmoji":null},{"id":"cmofkye0g0009l404ap3uadyl","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":null,"tokSOut":154.62,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":null,"createdAt":"2026-04-26T09:42:55.600Z","notes":"hipfire @ f16eceb (master post-perf-recovery PR #47)\nprompt: LRU cache PEP-8 strict (md5=df5dedc8040c, see benchmarks/prompts/lru_cache_pep8_strict.txt)\nruns: 3 (median reported); range 66.4–155.9\ndecode mode: DFlash speculative (block_size=16)\nkv_cache: asym3\nprompt_normalize: true (default since 2026-04-26)\nτ (median): 5.611\naccept_rate (median): 0.374\nall runs tok/s: [66.38, 154.62, 155.92]","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target qwen3.5-35b-a3b.mq4 --draft qwen35-35b-a3b-dflash-mq4.hfq --prompt $(cat benchmarks/prompts/lru_cache_pep8_strict.txt) --max 120 --no-chatml --kv-mode asym3","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-35B-A3B","displayName":"Qwen3.5-35B-A3B","family":"Qwen","params":35,"isMoE":false,"baseModel":{"hfId":"Qwen/Qwen3.5-35B-A3B-Base","displayName":"Qwen3.5-35B-A3B-Base"}},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+f16eceb","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":21,"reactionCounts":{},"myEmoji":null},{"id":"cmofefr0j000ula04chaak8ou","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":null,"tokSOut":140.59,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":null,"createdAt":"2026-04-26T06:40:28.291Z","notes":"hipfire @ f16eceb (master post-perf-recovery, PR #47)\nprompt: LRU cache PEP-8 strict (md5=df5dedc8040c, see benchmarks/prompts/lru_cache_pep8_strict.txt)\nruns: 3 (median reported)\ndecode mode: DFlash speculative (block_size=16)\nkv_cache: asym3\nprompt_normalize: true (default since 2026-04-26)\nτ (median): 4.905\naccept_rate (median): 0.327\nruns all: [74.91, 160.26, 140.59]","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target qwen3.5-35b-a3b.mq4 --draft qwen35-35b-a3b-dflash-mq4.hfq --prompt $(cat benchmarks/prompts/lru_cache_pep8_strict.txt) --max 120 --no-chatml --kv-mode asym3","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-35B-A3B","displayName":"Qwen3.5-35B-A3B","family":"Qwen","params":35,"isMoE":false,"baseModel":{"hfId":"Qwen/Qwen3.5-35B-A3B-Base","displayName":"Qwen3.5-35B-A3B-Base"}},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+f16eceb","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":22,"reactionCounts":{},"myEmoji":null},{"id":"cmofe92tt000pla04ivnjv4jd","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":75.4,"tokSOut":135.9,"tokSPrefill":null,"tokSTotal":125.9,"peakVramGb":22.12890625,"createdAt":"2026-04-26T06:35:17.010Z","notes":"hipfire @ f16eceb (master post-perf-recovery)\nprompt: \"Explain the theory of general relativity in simple terms.\"\nruns: 3\ndecode mode: AR\nkv_cache: asym3\nprompt_normalize: true (default since 2026-04-26)\npp128: 326.1 tok/s\npp512: 322.5 tok/s\npp1024: 320.8 tok/s\npp2048: 317.5 tok/s\nvram_loaded: 22660 MB","engineFlags":{"commandSnippet":"hipfire bench qwen3.5:35b-a3b --runs 3 \"Explain the theory of general relativity in simple terms.\"","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":false,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-35B-A3B","displayName":"Qwen3.5-35B-A3B","family":"Qwen","params":35,"isMoE":false,"baseModel":{"hfId":"Qwen/Qwen3.5-35B-A3B-Base","displayName":"Qwen3.5-35B-A3B-Base"}},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+f16eceb","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":23,"reactionCounts":{},"myEmoji":null},{"id":"cmofezrun000cie04z5y0b83e","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":75.5,"tokSOut":135.3,"tokSPrefill":null,"tokSTotal":125.3,"peakVramGb":22.12890625,"createdAt":"2026-04-26T06:56:02.495Z","notes":"hipfire @ f16eceb (master post-perf-recovery)\nprompt: \"Explain the theory of general relativity in simple terms.\"\nruns: 3\ndecode mode: AR\nkv_cache: asym3\nprompt_normalize: true (default since 2026-04-26)\npp128: 325.4 tok/s\npp512: 322.0 tok/s\npp1024: 320.4 tok/s\npp2048: 316.8 tok/s\nvram_loaded: 22660 MB","engineFlags":{"commandSnippet":"hipfire bench qwen3.6:35b-a3b --runs 3 \"Explain the theory of general relativity in simple terms.\"","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":false,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.6-35B-A3B","displayName":"Qwen3.6-35B-A3B","family":"Qwen","params":36,"isMoE":true,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+f16eceb","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":24,"reactionCounts":{},"myEmoji":null},{"id":"cmofgd9iz000ml4055xsdgvuy","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":75.6,"tokSOut":134,"tokSPrefill":null,"tokSTotal":124.2,"peakVramGb":22.13,"createdAt":"2026-04-26T07:34:31.547Z","notes":"hipfire @ 0.1.8-alpha+f16eceb (master post-perf-recovery, PR #47)\nprompt: \"Explain the theory of general relativity in simple terms.\"\nruns: 3 (median reported)\ndecode mode: AR (autoregressive, no spec-decode)\nkv_cache: asym3 (3-bit rotated K + Q8 V; 5.5x vs fp32)\nprompt_normalize: true (default since 2026-04-26)\npp128: 328.1 tok/s\npp512: 324.6 tok/s\npp1024: 323.1 tok/s\npp2048: 318.9 tok/s\nvram_loaded: 22660 MB\nQuantized in-house: hipfire-quantize MQ4 (FWHT-rotated 4-bit, group=256)","engineFlags":{"commandSnippet":"hipfire bench ornstein-3.6-35b-a3b --runs 3 \"Explain the theory of general relativity in simple terms.\"","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":false,"mtpEnabled":false},"model":{"hfId":"DJLougen/Ornstein3.6-35B-A3B","displayName":"Ornstein3.6-35B-A3B","family":"Qwen","params":35,"isMoE":true,"baseModel":{"hfId":"unsloth/Qwen3.6-35B-A3B","displayName":"Qwen3.6-35B-A3B"}},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+f16eceb","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":25,"reactionCounts":{},"myEmoji":null},{"id":"cmp8ft6rt00z5o401l2omxie9","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":38.14,"tokSOut":123.11,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":5.83,"createdAt":"2026-05-16T14:24:13.961Z","notes":"hipfire @ 71896daa (feat/tbq) — AR baseline (--ar-baseline, no DFlash)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\n  benchmarks/prompts/merge_sort_thinking_off.txt (27 input tokens)\ndecode mode: AR (pure greedy target decode, no drafter)\ntarget: qwen3.5-9b.mq4 (MQ4 weights, KV q8)\noutput: 156 tokens (natural EOS)\nhipGraph captured 475 blobs (enabled)\npeak VRAM: target-only (drafter loaded by bench binary but unused; deducted)\nHardware: Sapphire Nitro+ RX 7900 XTX, ROCm 7.2.x, k9lin host","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target ~/.hipfire/models/qwen3.5-9b.mq4 --draft ~/.hipfire/models/qwen35-9b-dflash-mq4.hf4 --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096 --ar-baseline","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":false,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-9B","displayName":"Qwen3.5-9B","family":"Qwen","params":9,"isMoE":false,"baseModel":{"hfId":"Qwen/Qwen3.5-9B-Base","displayName":"Qwen3.5-9B-Base"}},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.20-alpha+71896daa","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":26,"reactionCounts":{},"myEmoji":null},{"id":"cmofdidib0002ju04tsntnze1","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":48.1,"tokSOut":122.3,"tokSPrefill":null,"tokSTotal":116.9,"peakVramGb":5.662109375,"createdAt":"2026-04-26T06:14:31.139Z","notes":"hipfire @ f16eceb (master post-perf-recovery)\nprompt: \"Explain the theory of general relativity in simple terms.\"\nruns: 3\ndecode mode: AR\nkv_cache: asym3\nprompt_normalize: true (default since 2026-04-26)\npp128: 602.8 tok/s\npp512: 591.4 tok/s\npp1024: 587.8 tok/s\npp2048: 580.0 tok/s\nvram_loaded: 5798 MB","engineFlags":{"commandSnippet":"hipfire bench qwen3.5:9b --runs 3 \"Explain the theory of general relativity in simple terms.\"","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":false,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-9B","displayName":"Qwen3.5-9B","family":"Qwen","params":9,"isMoE":false,"baseModel":{"hfId":"Qwen/Qwen3.5-9B-Base","displayName":"Qwen3.5-9B-Base"}},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+f16eceb","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":27,"reactionCounts":{},"myEmoji":null},{"id":"cmofet3j80005l2042r92gywi","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":null,"tokSOut":118.16,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":null,"createdAt":"2026-04-26T06:50:51.045Z","notes":"hipfire @ f16eceb (master post-perf-recovery, PR #47)\nprompt: LRU cache PEP-8 strict (md5=df5dedc8040c, see benchmarks/prompts/lru_cache_pep8_strict.txt)\nruns: 3 (median reported)\ndecode mode: DFlash speculative (block_size=16)\nkv_cache: asym3\nprompt_normalize: true (default since 2026-04-26)\nτ (median): 5.632\naccept_rate (median): 0.375\nruns all: [118.19, 118.14, 118.16]","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target qwen3.6-27b.mq4 --draft qwen36-27b-dflash-mq4.hfq --prompt $(cat benchmarks/prompts/lru_cache_pep8_strict.txt) --max 120 --no-chatml --kv-mode asym3","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.6-27B","displayName":"Qwen3.6-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+f16eceb","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":28,"reactionCounts":{},"myEmoji":null},{"id":"cmofl528t000nl4045c6nx68j","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":null,"tokSOut":118.14,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":null,"createdAt":"2026-04-26T09:48:06.941Z","notes":"hipfire @ f16eceb (master post-perf-recovery PR #47)\nprompt: LRU cache PEP-8 strict (md5=df5dedc8040c, see benchmarks/prompts/lru_cache_pep8_strict.txt)\nruns: 3 (median reported); range 117.5–118.4\ndecode mode: DFlash speculative (block_size=16)\nkv_cache: asym3\nprompt_normalize: true (default since 2026-04-26)\nτ (median): 5.632\naccept_rate (median): 0.375\nall runs tok/s: [118.14, 117.48, 118.44]","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target qwen3.6-27b.mq4 --draft qwen36-27b-dflash-mq4.hfq --prompt $(cat benchmarks/prompts/lru_cache_pep8_strict.txt) --max 120 --no-chatml --kv-mode asym3","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.6-27B","displayName":"Qwen3.6-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+f16eceb","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":29,"reactionCounts":{},"myEmoji":null},{"id":"cmpe035k300t6pd01ytbe6t1x","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":338.04,"tokSOut":104.51,"tokSPrefill":135.01,"tokSTotal":null,"peakVramGb":18,"createdAt":"2026-05-20T11:50:42.147Z","notes":"hipfire @ 1a378379 (master 4840f0b6 + 3 hipx-local ROCm 7.2.x compiler shims rebased)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\ndecode mode: DFlash speculative\n  drafter: qwen35-27b-dflash-mq4.hfq (z-lab Qwen3.5-DFlash native head, MQ4 weights)\n  target: qwen3.5-27b.mq4\nkv_cache: q8\nprompt_normalize: true (default)\nτ: 13.1818   accept_rate: 0.8788  (byte-identical to canonical bench)\noutput: 256 tokens emitted\nruns: 3 (median reported); per-run tok/s: [104.46, 104.83, 104.51], spread 0.35%\nAR baseline (same binary same hardware, --ar-baseline): TBD — see separate row pair\nHardware: AMD RYZEN AI MAX+ 395 APU (Strix Halo gfx1151, 103 GB unified RAM), ROCm 7.2.x, hipx host (HIP_VISIBLE_DEVICES=1)\nFirst 27B DFlash row for gfx1151 on localmaxxing.","engineFlags":{"commandSnippet":"HIP_VISIBLE_DEVICES=1 ./target/release/examples/dflash_spec_demo --target ~/.hipfire/models/qwen3.5-27b.mq4 --draft ~/.hipfire/models/qwen35-27b-dflash-mq4.hfq --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":"q8","attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-27B","displayName":"Qwen3.5-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"UNIFIED","gpuName":null,"gpuCount":1,"vramGb":null,"chipVendor":"AMD","chipFamily":"Strix Halo","chipVariant":"Radeon 8060S Graphics","unifiedMemoryGb":103,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.2.0+1a378379","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"Strix Halo Radeon 8060S","hardwareGroupKey":"UNIFIED:strix halo radeon 8060s","rank":30,"reactionCounts":{},"myEmoji":null},{"id":"cmpe04nu500tnpd010msqcvbw","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":337.39,"tokSOut":88.3,"tokSPrefill":134.37,"tokSTotal":null,"peakVramGb":18,"createdAt":"2026-05-20T11:51:52.494Z","notes":"hipfire @ 1a378379 (master 4840f0b6 + 3 hipx-local ROCm 7.2.x compiler shims rebased)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\ndecode mode: DFlash speculative\n  drafter: qwen36-27b-dflash-mq4.hf4 (z-lab Qwen3.6-DFlash native head, MQ4 weights)\n  target: qwen3.6-27b.mq4\nkv_cache: q8\nprompt_normalize: true (default)\nτ: 10.9286   accept_rate: 0.7286\noutput: 256 tokens emitted\nruns: 3 (median reported)\nAR baseline (same binary same hardware, --ar-baseline): 14.81 tok/s — DFlash speedup 5.96x\nHardware: AMD RYZEN AI MAX+ 395 APU (Strix Halo gfx1151), ROCm 7.2.x, hipx host (HIP_VISIBLE_DEVICES=1)\nFirst Qwen 3.6 row on gfx1151 on localmaxxing.","engineFlags":{"commandSnippet":"HIP_VISIBLE_DEVICES=1 ./target/release/examples/dflash_spec_demo --target ~/.hipfire/models/qwen3.6-27b.mq4 --draft ~/.hipfire/models/qwen36-27b-dflash-mq4.hf4 --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":"q8","attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.6-27B","displayName":"Qwen3.6-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"UNIFIED","gpuName":null,"gpuCount":1,"vramGb":null,"chipVendor":"AMD","chipFamily":"Strix Halo","chipVariant":"Radeon 8060S Graphics","unifiedMemoryGb":103,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.2.0+1a378379","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"Strix Halo Radeon 8060S","hardwareGroupKey":"UNIFIED:strix halo radeon 8060s","rank":31,"reactionCounts":{},"myEmoji":null},{"id":"cmoff6g560019la04hewfjrll","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":null,"tokSOut":68.6,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":null,"createdAt":"2026-04-26T07:01:13.915Z","notes":"hipfire @ f16eceb (master post-perf-recovery, PR #47)\nprompt: LRU cache PEP-8 strict (md5=df5dedc8040c, see benchmarks/prompts/lru_cache_pep8_strict.txt)\nruns: 3 (median reported)\ndecode mode: DFlash speculative (block_size=16)\nkv_cache: asym3\nprompt_normalize: true (default since 2026-04-26)\nτ (median): 1.222\naccept_rate (median): 0.081\nruns all: [48.08, 68.6, 68.65]","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target qwen3.6-35b-a3b.mq4 --draft qwen36-35b-a3b-dflash-mq4.hfq --prompt $(cat benchmarks/prompts/lru_cache_pep8_strict.txt) --max 120 --no-chatml --kv-mode asym3","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.6-35B-A3B","displayName":"Qwen3.6-35B-A3B","family":"Qwen","params":36,"isMoE":true,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+f16eceb","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":32,"reactionCounts":{},"myEmoji":null},{"id":"cmpdzyrb700svpd01klzbe8zq","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":147.7,"tokSOut":61.6,"tokSPrefill":209.91,"tokSTotal":null,"peakVramGb":7.42,"createdAt":"2026-05-20T11:47:17.059Z","notes":"hipfire @ 1a378379 (master 4840f0b6 + 3 hipx-local ROCm 7.2.x compiler shims rebased)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\ndecode mode: AR baseline (--ar-baseline, no speculative)\n  target: qwen3.5-9b.mq4 (MQ4 weights, ~5.5 GB)\nkv_cache: q8 (fits in 8 GB with headroom)\nprompt_normalize: true (default)\noutput: 256 tokens emitted, coherent merge_sort code\nruns: 3 (median reported); per-run tok/s: [61.72, 61.47, 61.60], spread 0.41%\nHardware: AMD RX 5700 XT (RDNA1 gfx1010, 8 GB GDDR6), ROCm 7.2.x, hipx host (HIP_VISIBLE_DEVICES=0)\nFirst-ever RDNA1 / gfx1010 row on localmaxxing for hipfire. Notable: AMD doesn't officially support ROCm on RDNA1; hipfire compiles & runs natively without HSA_OVERRIDE_GFX_VERSION.","engineFlags":{"commandSnippet":"HIP_VISIBLE_DEVICES=0 ./target/release/examples/dflash_spec_demo --target ~/.hipfire/models/qwen3.5-9b.mq4 --draft ~/.hipfire/models/qwen35-9b-dflash-mq4.hfq --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096 --ar-baseline","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":"q8","attentionBackend":null,"flashAttn":null,"specDecoding":false,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-9B","displayName":"Qwen3.5-9B","family":"Qwen","params":9,"isMoE":false,"baseModel":{"hfId":"Qwen/Qwen3.5-9B-Base","displayName":"Qwen3.5-9B-Base"}},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 5700 XT","gpuCount":1,"vramGb":8,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.2.0+1a378379","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 5700 XT","hardwareGroupKey":"DISCRETE_GPU:rx 5700 xt","rank":33,"reactionCounts":{},"myEmoji":null},{"id":"cmp8frsd700ywo401juntko0l","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":102.35,"tokSOut":44.79,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":15.75,"createdAt":"2026-05-16T14:23:08.636Z","notes":"hipfire @ 71896daa (feat/tbq) — AR baseline (--ar-baseline, no DFlash)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\n  benchmarks/prompts/merge_sort_thinking_off.txt (27 input tokens)\ndecode mode: AR (pure greedy target decode, no drafter)\ntarget: qwen3.6-27b.mq4 (MQ4 weights, KV q8)\noutput: 166 tokens (natural EOS)\nhipGraph captured 475 blobs (enabled)\npeak VRAM: target-only (drafter loaded by bench binary but unused; deducted)\nHardware: Sapphire Nitro+ RX 7900 XTX, ROCm 7.2.x, k9lin host","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target ~/.hipfire/models/qwen3.6-27b.mq4 --draft ~/.hipfire/models/qwen36-27b-dflash-mq4.hf4 --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096 --ar-baseline","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":false,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.6-27B","displayName":"Qwen3.6-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.20-alpha+71896daa","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":34,"reactionCounts":{},"myEmoji":null},{"id":"cmp8ful7100zgo40177q9g4f9","contextLength":1024,"prefillTokens":null,"batchSize":1,"ttftMs":102.41,"tokSOut":44.65,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":15.58,"createdAt":"2026-05-16T14:25:19.309Z","notes":"hipfire @ 71896daa (feat/tbq) — AR baseline (--ar-baseline, no DFlash)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\n  benchmarks/prompts/merge_sort_thinking_off.txt (27 input tokens)\ndecode mode: AR (pure greedy target decode, no drafter)\ntarget: qwen3.6-27b.mq4 (MQ4 weights, KV q8)\noutput: 166 tokens (natural EOS)\nhipGraph captured 475 blobs (enabled)\npeak VRAM: target-only (drafter loaded by bench binary but unused; deducted)\nHardware: Sapphire Nitro+ RX 7900 XTX, ROCm 7.2.x, k9lin host","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target ~/.hipfire/models/qwen3.6-27b.mq4 --draft ~/.hipfire/models/qwen36-27b-dflash-mq4.hf4 --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 1024 --ar-baseline","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":false,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.6-27B","displayName":"Qwen3.6-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.20-alpha+71896daa","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":35,"reactionCounts":{},"myEmoji":null},{"id":"cmp8fl9rq00x0o401mf7mnqjc","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":102.44,"tokSOut":44.6,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":15.75,"createdAt":"2026-05-16T14:18:04.598Z","notes":"hipfire @ 71896daa (feat/tbq) — AR baseline (--ar-baseline, no DFlash)\nprompt: merge_sort thinking-OFF (md5=253c7ac50857fe6d0e10fb0d2c5e35c0)\n  benchmarks/prompts/merge_sort_thinking_off.txt (27 input tokens)\ndecode mode: AR (pure greedy target decode, no drafter)\ntarget: qwen3.5-27b.mq4 (MQ4 weights, KV q8)\noutput: 156 tokens (natural EOS)\nhipGraph captured 475 blobs (enabled)\npeak VRAM: target-only (drafter loaded by bench binary but unused; deducted)\nHardware: Sapphire Nitro+ RX 7900 XTX, ROCm 7.2.x, k9lin host","engineFlags":{"commandSnippet":"./target/release/examples/dflash_spec_demo --target ~/.hipfire/models/qwen3.5-27b.mq4 --draft ~/.hipfire/models/qwen35-27b-dflash.mq4 --prompt-file benchmarks/prompts/merge_sort_thinking_off.txt --max 256 --temp 0.0 --no-chatml --kv-mode q8 --ctx 4096 --ar-baseline","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":false,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-27B","displayName":"Qwen3.5-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.20-alpha+71896daa","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":36,"reactionCounts":{},"myEmoji":null},{"id":"cmofdvq7c0001l40461v8g3z7","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":117.8,"tokSOut":43.7,"tokSPrefill":null,"tokSTotal":42,"peakVramGb":15.10546875,"createdAt":"2026-04-26T06:24:54.120Z","notes":"hipfire @ f16eceb (master post-perf-recovery)\nprompt: \"Explain the theory of general relativity in simple terms.\"\nruns: 3\ndecode mode: AR\nkv_cache: asym3\nprompt_normalize: true (default since 2026-04-26)\npp128: 223.5 tok/s\npp512: 215.7 tok/s\npp1024: 213.3 tok/s\npp2048: 210.9 tok/s\nvram_loaded: 15468 MB","engineFlags":{"commandSnippet":"hipfire bench qwen3.5:27b --runs 3 \"Explain the theory of general relativity in simple terms.\"","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":false,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-27B","displayName":"Qwen3.5-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+f16eceb","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":37,"reactionCounts":{},"myEmoji":null},{"id":"cmofemf840014la04lhmues5d","contextLength":4096,"prefillTokens":null,"batchSize":1,"ttftMs":117.6,"tokSOut":43.6,"tokSPrefill":null,"tokSTotal":41.9,"peakVramGb":15.10546875,"createdAt":"2026-04-26T06:45:39.604Z","notes":"hipfire @ f16eceb (master post-perf-recovery)\nprompt: \"Explain the theory of general relativity in simple terms.\"\nruns: 3\ndecode mode: AR\nkv_cache: asym3\nprompt_normalize: true (default since 2026-04-26)\npp128: 224.2 tok/s\npp512: 216.5 tok/s\npp1024: 214.3 tok/s\npp2048: 211.1 tok/s\nvram_loaded: 15468 MB","engineFlags":{"commandSnippet":"hipfire bench qwen3.6:27b --runs 3 \"Explain the theory of general relativity in simple terms.\"","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":false,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.6-27B","displayName":"Qwen3.6-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"RX 7900 XTX","gpuCount":1,"vramGb":24,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha+f16eceb","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"RX 7900 XTX","hardwareGroupKey":"DISCRETE_GPU:rx 7900 xtx","rank":38,"reactionCounts":{},"myEmoji":null},{"id":"cmpqeql2m00d6pl01ytmtzkml","contextLength":4096,"prefillTokens":2048,"batchSize":1,"ttftMs":659,"tokSOut":18.98884405411821,"tokSPrefill":146.4,"tokSTotal":16.76445934618608,"peakVramGb":83,"createdAt":"2026-05-29T04:14:04.078Z","notes":"hipfire @ eef416fc on deepseek/lmhead-batch-verify; PR #355 DeepSeek4 Flash perf config.\nhardware ID: Ryzen AI Max 395; GPU: Radeon 8060S gfx1151; HIP 7.2; HIP_VISIBLE_DEVICES=1.\nmodel: /home/kaden/.hipfire/models/deepseek-v4-flash.mq2lloyd + MTP sidecar deepseek-v4-flash-mtp.mq2lloyd.\nprompt md5: 9604bed9b1921b111a338949e6b3718e; daemon md5: a7efa4620fed82bb0ed338750d77cd94; profile_prefill md5: 08118fc3cc48ad255f4722835a1767d5.\ndecode: warmed daemon, 1 warmup + 3 measured, max_tokens=80, temperature=0, repeat_penalty=1.0.\nMTP K=2 median decode tok/s: 18.989; per-run [18.984, 18.989, 19.011].\nmedian spec_accept_pct: 80.952; spec_windows: 42; finish_reason=length.\nprefill small prompt median: 561.0ms; first committed token +98ms; TTFT submitted as prefill+first-token.\n2048-token synthetic prefill, PP_BATCH=2048, no profiler: median 146.4 tok/s; per-run [146.4, 146.5, 146.1].\nPREFILL_CHECK identical across all three prefill runs: argmax=102 logit_sum=-16994.6827 logit_max=23.570824 n=129280.\nraw logs: /tmp/deepseek4_lmx_20260529T040111Z","engineFlags":{"commandSnippet":"HIP_VISIBLE_DEVICES=1 HIPFIRE_DEEPSEEK4_PP_BATCH=2048 HIPFIRE_DEEPSEEK4_SPEC_DECODE=1 HIPFIRE_DEEPSEEK4_SPEC_K=2 HIPFIRE_DEEPSEEK4_MTP_ADDON=$HOME/.hipfire/models/deepseek-v4-flash-mtp.mq2lloyd ./target/release/examples/daemon","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":true,"mtpEnabled":false},"model":{"hfId":"deepseek-ai/DeepSeek-V4-Flash","displayName":"DeepSeek-V4-Flash","family":"Deepseek","params":158,"isMoE":true,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"Radeon 8060S","gpuCount":1,"vramGb":103.1,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":"AMD RYZEN AI MAX+ 395 w/ Radeon 8060S","os":"Ubuntu Linux 7.0.0-15-generic on hipx","isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.2.0+eef416fc","quantization":"MQ2-Lloyd + Q8 KV + MTP sidecar","backend":"rocm"},"user":{"id":"cmoeye1gq0000le04ie2kqs58","username":"schuttdev","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"Radeon 8060S","hardwareGroupKey":"DISCRETE_GPU:radeon 8060s","rank":39,"reactionCounts":{},"myEmoji":null},{"id":"cmokadzkk000bjl04vse1q64t","contextLength":16384,"prefillTokens":null,"batchSize":1,"ttftMs":null,"tokSOut":17,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":18,"createdAt":"2026-04-29T16:45:58.485Z","notes":"Hipfire (Kaden-Schutt/hipfire, alpha) on Strix Halo gfx1151 with native MQ4 (FWHT-pre-rotated 4-bit) format. Decode bandwidth-bound at ~17 tok/s ceiling: 256 GB/s LPDDR5X-8533 / ~15 GB MQ4 weight = ~17 tok/s theoretical, measured 16-22. Decode parity with llama.cpp expected; no decode-side optimization headroom on this hardware.","engineFlags":{"commandSnippet":"","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":false,"mtpEnabled":false},"model":{"hfId":"unsloth/Qwen3.6-27B","displayName":"Qwen3.6-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":{"hfId":"Qwen/Qwen3.6-27B","displayName":"Qwen3.6-27B"}},"hardware":{"hwClass":"UNIFIED","gpuName":null,"gpuCount":1,"vramGb":null,"chipVendor":"AMD","chipFamily":"Strix Halo","chipVariant":"Ryzen AI Max+ 395","unifiedMemoryGb":128,"cpu":null,"os":"Ubuntu 24.04","isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"v0.1.8-alpha.2","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmojgsim70000l1043eaus0fz","username":"micahchoo","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"Ryzen AI Max 395","hardwareGroupKey":"UNIFIED:ryzen ai max 395","rank":40,"reactionCounts":{},"myEmoji":null},{"id":"cmoknenfb0003l504ea1cci9l","contextLength":65536,"prefillTokens":null,"batchSize":1,"ttftMs":480,"tokSOut":14.79,"tokSPrefill":null,"tokSTotal":null,"peakVramGb":16,"createdAt":"2026-04-29T22:50:24.408Z","notes":"Strix Halo APU with 128 GB unified LPDDR5x at 256 GB/s; reported VRAM is the GPU-allocated portion of the unified pool. Hipfire pre-quantized MQ4 weights from the schuttdev/hipfire-qwen3.6-27b HuggingFace repo (FWHT-rotated 4-bit, ~15 GB on disk). Model is dense 27B with hybrid Full + Linear (DeltaNet) attention (Qwen 3.5 architecture, vocab 248320). Streaming SSE decode probe; warm steady-state on a 12-token prompt with thinking disabled and AR-only decode (no speculative draft). Decode rate is bandwidth-bound at the LPDDR5x ceiling.","engineFlags":{"commandSnippet":"hipfire serve  # default config: kv_cache=q8, attn_flash=auto, prefill_max_batch=2048, AR-only decode, thinking disabled per request via chat_template_kwargs.enable_thinking=false","tensorParallel":null,"gpuLayers":null,"kvCacheDtype":null,"attentionBackend":null,"flashAttn":null,"specDecoding":false,"mtpEnabled":false},"model":{"hfId":"Qwen/Qwen3.5-27B","displayName":"Qwen3.5-27B","family":"Qwen","params":28,"isMoE":false,"baseModel":null},"hardware":{"hwClass":"DISCRETE_GPU","gpuName":"Radeon 8060S","gpuCount":1,"vramGb":96,"chipVendor":null,"chipFamily":null,"chipVariant":null,"unifiedMemoryGb":null,"cpu":null,"os":null,"isHeterogeneousGpu":false,"gpuSlots":[]},"engine":{"engineName":"hipfire","engineVersion":"0.1.8-alpha","quantization":"MQ4","backend":"rocm"},"user":{"id":"cmojgsim70000l1043eaus0fz","username":"micahchoo","verified":false,"verifiedAt":null,"pro":false},"hardwareGroupLabel":"Radeon 8060S","hardwareGroupKey":"DISCRETE_GPU:radeon 8060s","rank":41,"reactionCounts":{},"myEmoji":null}],"total":41,"limit":50,"offset":0}