<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://labs.scale.com/papers</loc>
<lastmod>2026-04-03T17:48:57.630Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/defensive-refusal-bias</loc>
<lastmod>2026-03-12T02:35:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/vero</loc>
<lastmod>2026-02-25T18:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/lhaw</loc>
<lastmod>2026-02-12T20:23:04.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/scipredict</loc>
<lastmod>2026-01-15T21:20:54.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/agenticrubrics</loc>
<lastmod>2026-01-06T21:24:35.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/morebench</loc>
<lastmod>2025-12-22T17:08:50.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/mcpatlas</loc>
<lastmod>2025-12-18T17:58:33.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/audiomc</loc>
<lastmod>2025-12-17T18:07:23.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/propensitybench</loc>
<lastmod>2025-11-25T15:39:17.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/prbench</loc>
<lastmod>2025-11-13T15:51:09.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/researchrubrics</loc>
<lastmod>2025-11-10T21:14:40.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/bioriskeval</loc>
<lastmod>2025-11-05T02:03:36.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/rli</loc>
<lastmod>2025-10-28T15:08:08.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/reasoning_gym</loc>
<lastmod>2025-10-20T16:16:42.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/vtb</loc>
<lastmod>2025-10-15T15:29:41.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/onlinerubrics</loc>
<lastmod>2025-10-08T18:36:27.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/rubric_reward_modeling</loc>
<lastmod>2025-09-25T17:47:31.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/progres_over_points</loc>
<lastmod>2025-09-23T16:20:16.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/swe_bench_pro</loc>
<lastmod>2025-09-19T21:43:03.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/tutorbench</loc>
<lastmod>2025-09-11T18:46:28.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/mrt</loc>
<lastmod>2025-08-26T13:33:41.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/stc</loc>
<lastmod>2025-08-13T16:02:22.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/multinrc</loc>
<lastmod>2025-07-23T22:26:15.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/rubrics_as_rewards</loc>
<lastmod>2025-07-23T15:47:27.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/webguard</loc>
<lastmod>2025-07-21T17:43:39.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/cotm</loc>
<lastmod>2025-07-15T20:37:46.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/verbalize_cot_reasoning</loc>
<lastmod>2025-06-28T18:58:49.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/fortress</loc>
<lastmod>2025-06-18T15:26:23.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/guide</loc>
<lastmod>2025-06-16T16:50:55.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/agent_rlvr</loc>
<lastmod>2025-06-13T15:44:21.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/red_teaming_roadmap</loc>
<lastmod>2025-06-05T15:41:49.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/robustness_spurious_correlations</loc>
<lastmod>2025-05-09T15:15:04.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/rerank_beyond_relevance</loc>
<lastmod>2025-03-14T18:34:53.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/cfpd</loc>
<lastmod>2025-03-08T15:32:42.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/mask</loc>
<lastmod>2025-03-05T17:38:32.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/enigma_eval</loc>
<lastmod>2025-02-13T15:22:49.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/j2</loc>
<lastmod>2025-02-11T01:51:06.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/projecttest</loc>
<lastmod>2025-02-10T20:07:01.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/multichallenge</loc>
<lastmod>2025-01-29T18:41:05.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/humanitys-last-exam</loc>
<lastmod>2025-01-23T21:50:12.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/toolcomp-a-multi-tool-reasoning-and-process-supervision-benchmark</loc>
<lastmod>2025-01-02T19:39:22.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/browser-art</loc>
<lastmod>2024-10-11T20:53:09.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/balancing-cost-and-effectiveness-of-synthetic-data-generation-strategies-for-fine-tuning-llms</loc>
<lastmod>2024-09-29T20:50:44.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/revisiting-the-superficial-alignment-hypothesis</loc>
<lastmod>2024-09-27T20:51:15.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/planning-natural-language-llm-search-code-generation</loc>
<lastmod>2024-09-05T20:52:01.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/pre-training-multimodal-hallucination-detectors-with-corrupted-grounding-data</loc>
<lastmod>2024-08-30T18:19:02.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/mhj</loc>
<lastmod>2024-08-27T20:52:45.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/learning-goalconditioned-representations-large-rewards-models</loc>
<lastmod>2024-07-18T20:51:39.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/llm-performance-grade-school-arithmetic</loc>
<lastmod>2024-05-01T20:52:23.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/the-wmdp-benchmark-measuring-and-reducing-malicious-use-with-unlearning</loc>
<lastmod>2024-03-05T19:39:05.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/out-of-distribution-detection-&-applications-with-ablated-learned-temperature-energy</loc>
<lastmod>2024-01-22T19:36:54.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/a-baseline-analysis-of-reward-models-ability-to-accurately-analyze-foundation-models-under-distribution-shift</loc>
<lastmod>2023-11-21T19:37:07.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/a-holistic-approach-for-test-and-evaluation-of-large-language-models</loc>
<lastmod>2023-10-05T18:38:16.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/on-the-performance-of-multimodal-language-models</loc>
<lastmod>2023-10-04T18:36:32.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/empirical-analysis-of-the-strengths-and-weaknesses-of-peft-techniques-for-llms</loc>
<lastmod>2023-04-28T18:34:53.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/detecting-and-preventing-hallucinations-in-large-vision-language-models</loc>
<lastmod>2023-04-11T18:35:40.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/enabling-calibration-in-the-zero-shot-inference-of-large-vision-language-models</loc>
<lastmod>2023-03-11T19:35:24.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/improving-the-accuracy-robustness-trade-off-of-classifiers-via-adaptive-smoothing</loc>
<lastmod>2023-01-29T19:34:15.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/glidenet-global-local-and-intrinsic-based-dense-embedding-network-for-multi-category-attributes-prediction</loc>
<lastmod>2022-03-07T19:29:50.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/car-cityscapes-attributes-recognition-a-multi-category-attributes-dataset-for-autonomous-vehicles</loc>
<lastmod>2021-11-16T19:29:31.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/natural-adversarial-objects</loc>
<lastmod>2021-11-07T19:32:46.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/debagreement:-a-comment-reply-dataset-for-disagreement-detection-in-online-debates</loc>
<lastmod>2021-10-11T18:33:31.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/on-the-state-of-data-in-computer-vision-human-annotations-remain-indispensable-for-developing-deep-learning-models</loc>
<lastmod>2021-07-31T18:23:36.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/evaluating-deep-neural-networks-trained-on-clinical-images-in-dermatology-with-the-fitzpatrick-17k-dataset</loc>
<lastmod>2021-04-20T18:31:01.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://labs.scale.com/papers/a-survey-of-deep-learning-approaches-for-ocr-and-document-understanding</loc>
<lastmod>2020-11-27T19:22:44.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
</urlset>
