From 6b95c3cdf2e4eeb53f3180a01203a01952a14408 Mon Sep 17 00:00:00 2001 From: ganler Date: Fri, 31 May 2024 22:49:35 -0500 Subject: [PATCH] fix: qwen 72b result --- results/repoqa/COMBINED-RESULTS.json | 4454 +++++++++++++------------- 1 file changed, 2227 insertions(+), 2227 deletions(-) diff --git a/results/repoqa/COMBINED-RESULTS.json b/results/repoqa/COMBINED-RESULTS.json index c341677..ed3e969 100644 --- a/results/repoqa/COMBINED-RESULTS.json +++ b/results/repoqa/COMBINED-RESULTS.json @@ -1,253 +1,253 @@ { - "mistralai/Mixtral-8x22B-Instruct-v0.1": { - "eval_date": "2024-05-13 01:39:30.147044", - "train_size": "64k", + "gpt-4-turbo-2024-04-09": { + "eval_date": "2024-05-13 01:38:51.141924", + "train_size": "128k", "scores": { "all": { - "0.0": { "pass@1": 0.926 }, - "0.1": { "pass@1": 0.872 }, - "0.2": { "pass@1": 0.844 }, - "0.3": { "pass@1": 0.812 }, - "0.4": { "pass@1": 0.794 }, - "0.5": { "pass@1": 0.774 }, - "0.6": { "pass@1": 0.744 }, - "0.7": { "pass@1": 0.714 }, - "0.8": { "pass@1": 0.678 }, - "0.9": { "pass@1": 0.622 }, - "1.0": { "pass@1": 0.502 } + "0.0": { "pass@1": 0.878 }, + "0.1": { "pass@1": 0.828 }, + "0.2": { "pass@1": 0.814 }, + "0.3": { "pass@1": 0.802 }, + "0.4": { "pass@1": 0.796 }, + "0.5": { "pass@1": 0.786 }, + "0.6": { "pass@1": 0.776 }, + "0.7": { "pass@1": 0.772 }, + "0.8": { "pass@1": 0.764 }, + "0.9": { "pass@1": 0.744 }, + "1.0": { "pass@1": 0.668 } }, "python": { - "0.0": { "pass@1": 0.89 }, - "0.1": { "pass@1": 0.8 }, - "0.2": { "pass@1": 0.79 }, - "0.3": { "pass@1": 0.76 }, - "0.4": { "pass@1": 0.74 }, - "0.5": { "pass@1": 0.73 }, - "0.6": { "pass@1": 0.69 }, - "0.7": { "pass@1": 0.64 }, - "0.8": { "pass@1": 0.6 }, - "0.9": { "pass@1": 0.55 }, - "1.0": { "pass@1": 0.44 } + "0.0": { "pass@1": 0.97 }, + "0.1": { "pass@1": 0.91 }, + "0.2": { "pass@1": 0.89 }, + "0.3": { "pass@1": 0.88 }, + "0.4": { "pass@1": 0.87 }, + "0.5": { "pass@1": 0.85 }, + "0.6": { "pass@1": 0.84 }, + "0.7": { "pass@1": 0.84 }, + "0.8": { "pass@1": 0.84 }, + "0.9": { "pass@1": 0.83 }, + "1.0": { "pass@1": 0.82 } }, "cpp": { "0.0": { "pass@1": 0.92 }, - "0.1": { "pass@1": 0.86 }, - "0.2": { "pass@1": 0.8 }, - "0.3": { "pass@1": 0.78 }, - "0.4": { "pass@1": 0.76 }, - "0.5": { "pass@1": 0.75 }, - "0.6": { "pass@1": 0.73 }, - "0.7": { "pass@1": 0.7 }, - "0.8": { "pass@1": 0.67 }, - "0.9": { "pass@1": 0.62 }, - "1.0": { "pass@1": 0.51 } + "0.1": { "pass@1": 0.85 }, + "0.2": { "pass@1": 0.82 }, + "0.3": { "pass@1": 0.82 }, + "0.4": { "pass@1": 0.82 }, + "0.5": { "pass@1": 0.82 }, + "0.6": { "pass@1": 0.8 }, + "0.7": { "pass@1": 0.8 }, + "0.8": { "pass@1": 0.79 }, + "0.9": { "pass@1": 0.77 }, + "1.0": { "pass@1": 0.7 } }, "java": { "0.0": { "pass@1": 0.97 }, - "0.1": { "pass@1": 0.96 }, - "0.2": { "pass@1": 0.94 }, - "0.3": { "pass@1": 0.9 }, - "0.4": { "pass@1": 0.89 }, - "0.5": { "pass@1": 0.87 }, - "0.6": { "pass@1": 0.86 }, - "0.7": { "pass@1": 0.86 }, - "0.8": { "pass@1": 0.83 }, - "0.9": { "pass@1": 0.77 }, - "1.0": { "pass@1": 0.69 } + "0.1": { "pass@1": 0.94 }, + "0.2": { "pass@1": 0.93 }, + "0.3": { "pass@1": 0.91 }, + "0.4": { "pass@1": 0.9 }, + "0.5": { "pass@1": 0.9 }, + "0.6": { "pass@1": 0.9 }, + "0.7": { "pass@1": 0.9 }, + "0.8": { "pass@1": 0.89 }, + "0.9": { "pass@1": 0.86 }, + "1.0": { "pass@1": 0.83 } }, "typescript": { - "0.0": { "pass@1": 0.95 }, - "0.1": { "pass@1": 0.9 }, - "0.2": { "pass@1": 0.88 }, - "0.3": { "pass@1": 0.82 }, - "0.4": { "pass@1": 0.78 }, - "0.5": { "pass@1": 0.74 }, - "0.6": { "pass@1": 0.69 }, - "0.7": { "pass@1": 0.63 }, + "0.0": { "pass@1": 0.69 }, + "0.1": { "pass@1": 0.63 }, + "0.2": { "pass@1": 0.63 }, + "0.3": { "pass@1": 0.62 }, + "0.4": { "pass@1": 0.61 }, + "0.5": { "pass@1": 0.59 }, + "0.6": { "pass@1": 0.58 }, + "0.7": { "pass@1": 0.57 }, "0.8": { "pass@1": 0.55 }, - "0.9": { "pass@1": 0.43 }, - "1.0": { "pass@1": 0.18 } + "0.9": { "pass@1": 0.51 }, + "1.0": { "pass@1": 0.25 } }, "rust": { - "0.0": { "pass@1": 0.9 }, - "0.1": { "pass@1": 0.84 }, - "0.2": { "pass@1": 0.81 }, - "0.3": { "pass@1": 0.8 }, - "0.4": { "pass@1": 0.8 }, - "0.5": { "pass@1": 0.78 }, - "0.6": { "pass@1": 0.75 }, - "0.7": { "pass@1": 0.74 }, - "0.8": { "pass@1": 0.74 }, - "0.9": { "pass@1": 0.74 }, - "1.0": { "pass@1": 0.69 } + "0.0": { "pass@1": 0.84 }, + "0.1": { "pass@1": 0.81 }, + "0.2": { "pass@1": 0.8 }, + "0.3": { "pass@1": 0.78 }, + "0.4": { "pass@1": 0.78 }, + "0.5": { "pass@1": 0.77 }, + "0.6": { "pass@1": 0.76 }, + "0.7": { "pass@1": 0.75 }, + "0.8": { "pass@1": 0.75 }, + "0.9": { "pass@1": 0.75 }, + "1.0": { "pass@1": 0.74 } } } }, - "claude-3-opus-20240229": { - "eval_date": "2024-05-13 01:37:48.414103", - "train_size": "200k", + "codellama/CodeLlama-7b-Instruct-hf": { + "eval_date": "2024-05-13 01:38:12.022860", + "train_size": "16k", "scores": { "all": { - "0.0": { "pass@1": 0.968 }, - "0.1": { "pass@1": 0.946 }, - "0.2": { "pass@1": 0.936 }, - "0.3": { "pass@1": 0.93 }, - "0.4": { "pass@1": 0.928 }, - "0.5": { "pass@1": 0.926 }, - "0.6": { "pass@1": 0.91 }, - "0.7": { "pass@1": 0.908 }, - "0.8": { "pass@1": 0.906 }, - "0.9": { "pass@1": 0.892 }, - "1.0": { "pass@1": 0.814 } + "0.0": { "pass@1": 0.712 }, + "0.1": { "pass@1": 0.524 }, + "0.2": { "pass@1": 0.446 }, + "0.3": { "pass@1": 0.414 }, + "0.4": { "pass@1": 0.37 }, + "0.5": { "pass@1": 0.348 }, + "0.6": { "pass@1": 0.326 }, + "0.7": { "pass@1": 0.312 }, + "0.8": { "pass@1": 0.282 }, + "0.9": { "pass@1": 0.234 }, + "1.0": { "pass@1": 0.15 } }, "python": { - "0.0": { "pass@1": 0.98 }, - "0.1": { "pass@1": 0.97 }, - "0.2": { "pass@1": 0.97 }, - "0.3": { "pass@1": 0.96 }, - "0.4": { "pass@1": 0.96 }, - "0.5": { "pass@1": 0.95 }, - "0.6": { "pass@1": 0.93 }, - "0.7": { "pass@1": 0.93 }, - "0.8": { "pass@1": 0.93 }, - "0.9": { "pass@1": 0.93 }, - "1.0": { "pass@1": 0.93 } + "0.0": { "pass@1": 0.8 }, + "0.1": { "pass@1": 0.59 }, + "0.2": { "pass@1": 0.46 }, + "0.3": { "pass@1": 0.43 }, + "0.4": { "pass@1": 0.35 }, + "0.5": { "pass@1": 0.32 }, + "0.6": { "pass@1": 0.27 }, + "0.7": { "pass@1": 0.25 }, + "0.8": { "pass@1": 0.2 }, + "0.9": { "pass@1": 0.18 }, + "1.0": { "pass@1": 0.14 } }, "cpp": { - "0.0": { "pass@1": 0.89 }, - "0.1": { "pass@1": 0.86 }, - "0.2": { "pass@1": 0.86 }, - "0.3": { "pass@1": 0.86 }, - "0.4": { "pass@1": 0.86 }, - "0.5": { "pass@1": 0.86 }, - "0.6": { "pass@1": 0.83 }, - "0.7": { "pass@1": 0.83 }, - "0.8": { "pass@1": 0.83 }, - "0.9": { "pass@1": 0.83 }, - "1.0": { "pass@1": 0.79 } + "0.0": { "pass@1": 0.73 }, + "0.1": { "pass@1": 0.55 }, + "0.2": { "pass@1": 0.49 }, + "0.3": { "pass@1": 0.49 }, + "0.4": { "pass@1": 0.48 }, + "0.5": { "pass@1": 0.46 }, + "0.6": { "pass@1": 0.45 }, + "0.7": { "pass@1": 0.43 }, + "0.8": { "pass@1": 0.41 }, + "0.9": { "pass@1": 0.35 }, + "1.0": { "pass@1": 0.21 } }, "java": { - "0.0": { "pass@1": 0.98 }, - "0.1": { "pass@1": 0.98 }, - "0.2": { "pass@1": 0.96 }, - "0.3": { "pass@1": 0.96 }, - "0.4": { "pass@1": 0.96 }, - "0.5": { "pass@1": 0.96 }, - "0.6": { "pass@1": 0.95 }, - "0.7": { "pass@1": 0.95 }, - "0.8": { "pass@1": 0.95 }, - "0.9": { "pass@1": 0.94 }, - "1.0": { "pass@1": 0.94 } - }, - "typescript": { - "0.0": { "pass@1": 1.0 }, - "0.1": { "pass@1": 0.98 }, - "0.2": { "pass@1": 0.97 }, - "0.3": { "pass@1": 0.96 }, - "0.4": { "pass@1": 0.96 }, - "0.5": { "pass@1": 0.96 }, - "0.6": { "pass@1": 0.95 }, - "0.7": { "pass@1": 0.95 }, - "0.8": { "pass@1": 0.94 }, - "0.9": { "pass@1": 0.88 }, - "1.0": { "pass@1": 0.53 } + "0.0": { "pass@1": 0.65 }, + "0.1": { "pass@1": 0.44 }, + "0.2": { "pass@1": 0.4 }, + "0.3": { "pass@1": 0.38 }, + "0.4": { "pass@1": 0.31 }, + "0.5": { "pass@1": 0.3 }, + "0.6": { "pass@1": 0.28 }, + "0.7": { "pass@1": 0.28 }, + "0.8": { "pass@1": 0.25 }, + "0.9": { "pass@1": 0.17 }, + "1.0": { "pass@1": 0.11 } }, "rust": { - "0.0": { "pass@1": 0.99 }, - "0.1": { "pass@1": 0.94 }, - "0.2": { "pass@1": 0.92 }, - "0.3": { "pass@1": 0.91 }, - "0.4": { "pass@1": 0.9 }, - "0.5": { "pass@1": 0.9 }, - "0.6": { "pass@1": 0.89 }, - "0.7": { "pass@1": 0.88 }, - "0.8": { "pass@1": 0.88 }, - "0.9": { "pass@1": 0.88 }, - "1.0": { "pass@1": 0.88 } - } - } - }, - "microsoft/Phi-3-medium-128k-instruct": { - "eval_date": "2024-05-22 22:30:02.520421", - "train_size": "128k", + "0.0": { "pass@1": 0.57 }, + "0.1": { "pass@1": 0.33 }, + "0.2": { "pass@1": 0.31 }, + "0.3": { "pass@1": 0.27 }, + "0.4": { "pass@1": 0.25 }, + "0.5": { "pass@1": 0.25 }, + "0.6": { "pass@1": 0.25 }, + "0.7": { "pass@1": 0.24 }, + "0.8": { "pass@1": 0.22 }, + "0.9": { "pass@1": 0.2 }, + "1.0": { "pass@1": 0.18 } + }, + "typescript": { + "0.0": { "pass@1": 0.81 }, + "0.1": { "pass@1": 0.71 }, + "0.2": { "pass@1": 0.57 }, + "0.3": { "pass@1": 0.5 }, + "0.4": { "pass@1": 0.46 }, + "0.5": { "pass@1": 0.41 }, + "0.6": { "pass@1": 0.38 }, + "0.7": { "pass@1": 0.36 }, + "0.8": { "pass@1": 0.33 }, + "0.9": { "pass@1": 0.27 }, + "1.0": { "pass@1": 0.11 } + } + } + }, + "gemini-1.5-flash-latest": { + "eval_date": "2024-05-19 04:32:12.200298", + "train_size": "1000k", "scores": { "all": { - "0.0": { "pass@1": 0.88 }, - "0.1": { "pass@1": 0.804 }, - "0.2": { "pass@1": 0.76 }, - "0.3": { "pass@1": 0.736 }, - "0.4": { "pass@1": 0.726 }, - "0.5": { "pass@1": 0.708 }, - "0.6": { "pass@1": 0.678 }, - "0.7": { "pass@1": 0.662 }, - "0.8": { "pass@1": 0.632 }, - "0.9": { "pass@1": 0.562 }, - "1.0": { "pass@1": 0.412 } + "0.0": { "pass@1": 0.938 }, + "0.1": { "pass@1": 0.926 }, + "0.2": { "pass@1": 0.916 }, + "0.3": { "pass@1": 0.914 }, + "0.4": { "pass@1": 0.912 }, + "0.5": { "pass@1": 0.912 }, + "0.6": { "pass@1": 0.904 }, + "0.7": { "pass@1": 0.904 }, + "0.8": { "pass@1": 0.9 }, + "0.9": { "pass@1": 0.886 }, + "1.0": { "pass@1": 0.8 } }, "python": { - "0.0": { "pass@1": 0.95 }, - "0.1": { "pass@1": 0.82 }, - "0.2": { "pass@1": 0.79 }, - "0.3": { "pass@1": 0.75 }, - "0.4": { "pass@1": 0.75 }, - "0.5": { "pass@1": 0.71 }, - "0.6": { "pass@1": 0.64 }, - "0.7": { "pass@1": 0.61 }, - "0.8": { "pass@1": 0.57 }, - "0.9": { "pass@1": 0.51 }, - "1.0": { "pass@1": 0.4 } + "0.0": { "pass@1": 0.93 }, + "0.1": { "pass@1": 0.93 }, + "0.2": { "pass@1": 0.93 }, + "0.3": { "pass@1": 0.93 }, + "0.4": { "pass@1": 0.93 }, + "0.5": { "pass@1": 0.93 }, + "0.6": { "pass@1": 0.93 }, + "0.7": { "pass@1": 0.93 }, + "0.8": { "pass@1": 0.93 }, + "0.9": { "pass@1": 0.92 }, + "1.0": { "pass@1": 0.91 } }, "cpp": { - "0.0": { "pass@1": 0.81 }, - "0.1": { "pass@1": 0.72 }, - "0.2": { "pass@1": 0.68 }, - "0.3": { "pass@1": 0.66 }, - "0.4": { "pass@1": 0.64 }, - "0.5": { "pass@1": 0.64 }, - "0.6": { "pass@1": 0.61 }, - "0.7": { "pass@1": 0.58 }, - "0.8": { "pass@1": 0.54 }, - "0.9": { "pass@1": 0.51 }, - "1.0": { "pass@1": 0.37 } + "0.0": { "pass@1": 0.87 }, + "0.1": { "pass@1": 0.85 }, + "0.2": { "pass@1": 0.81 }, + "0.3": { "pass@1": 0.81 }, + "0.4": { "pass@1": 0.81 }, + "0.5": { "pass@1": 0.81 }, + "0.6": { "pass@1": 0.79 }, + "0.7": { "pass@1": 0.79 }, + "0.8": { "pass@1": 0.79 }, + "0.9": { "pass@1": 0.79 }, + "1.0": { "pass@1": 0.76 } }, "java": { - "0.0": { "pass@1": 0.85 }, - "0.1": { "pass@1": 0.8 }, - "0.2": { "pass@1": 0.75 }, - "0.3": { "pass@1": 0.72 }, - "0.4": { "pass@1": 0.72 }, - "0.5": { "pass@1": 0.71 }, - "0.6": { "pass@1": 0.7 }, - "0.7": { "pass@1": 0.69 }, - "0.8": { "pass@1": 0.69 }, - "0.9": { "pass@1": 0.59 }, - "1.0": { "pass@1": 0.5 } + "0.0": { "pass@1": 0.98 }, + "0.1": { "pass@1": 0.98 }, + "0.2": { "pass@1": 0.97 }, + "0.3": { "pass@1": 0.97 }, + "0.4": { "pass@1": 0.97 }, + "0.5": { "pass@1": 0.97 }, + "0.6": { "pass@1": 0.96 }, + "0.7": { "pass@1": 0.96 }, + "0.8": { "pass@1": 0.94 }, + "0.9": { "pass@1": 0.94 }, + "1.0": { "pass@1": 0.94 } }, "typescript": { - "0.0": { "pass@1": 0.96 }, - "0.1": { "pass@1": 0.93 }, - "0.2": { "pass@1": 0.88 }, - "0.3": { "pass@1": 0.86 }, - "0.4": { "pass@1": 0.84 }, - "0.5": { "pass@1": 0.81 }, - "0.6": { "pass@1": 0.79 }, - "0.7": { "pass@1": 0.79 }, - "0.8": { "pass@1": 0.74 }, - "0.9": { "pass@1": 0.64 }, - "1.0": { "pass@1": 0.3 } + "0.0": { "pass@1": 0.97 }, + "0.1": { "pass@1": 0.97 }, + "0.2": { "pass@1": 0.97 }, + "0.3": { "pass@1": 0.97 }, + "0.4": { "pass@1": 0.97 }, + "0.5": { "pass@1": 0.97 }, + "0.6": { "pass@1": 0.97 }, + "0.7": { "pass@1": 0.97 }, + "0.8": { "pass@1": 0.97 }, + "0.9": { "pass@1": 0.93 }, + "1.0": { "pass@1": 0.56 } }, "rust": { - "0.0": { "pass@1": 0.83 }, - "0.1": { "pass@1": 0.75 }, - "0.2": { "pass@1": 0.7 }, - "0.3": { "pass@1": 0.69 }, - "0.4": { "pass@1": 0.68 }, - "0.5": { "pass@1": 0.67 }, - "0.6": { "pass@1": 0.65 }, - "0.7": { "pass@1": 0.64 }, - "0.8": { "pass@1": 0.62 }, - "0.9": { "pass@1": 0.56 }, - "1.0": { "pass@1": 0.49 } + "0.0": { "pass@1": 0.94 }, + "0.1": { "pass@1": 0.9 }, + "0.2": { "pass@1": 0.9 }, + "0.3": { "pass@1": 0.89 }, + "0.4": { "pass@1": 0.88 }, + "0.5": { "pass@1": 0.88 }, + "0.6": { "pass@1": 0.87 }, + "0.7": { "pass@1": 0.87 }, + "0.8": { "pass@1": 0.87 }, + "0.9": { "pass@1": 0.85 }, + "1.0": { "pass@1": 0.83 } } } }, @@ -335,2439 +335,2439 @@ } } }, - "gpt-4-turbo-2024-04-09": { - "eval_date": "2024-05-13 01:38:51.141924", - "train_size": "128k", + "google/codegemma-7b-it": { + "eval_date": "2024-05-13 01:38:40.273310", + "train_size": "8k", "scores": { "all": { - "0.0": { "pass@1": 0.878 }, - "0.1": { "pass@1": 0.828 }, - "0.2": { "pass@1": 0.814 }, - "0.3": { "pass@1": 0.802 }, - "0.4": { "pass@1": 0.796 }, - "0.5": { "pass@1": 0.786 }, - "0.6": { "pass@1": 0.776 }, - "0.7": { "pass@1": 0.772 }, - "0.8": { "pass@1": 0.764 }, - "0.9": { "pass@1": 0.744 }, - "1.0": { "pass@1": 0.668 } + "0.0": { "pass@1": 0.074 }, + "0.1": { "pass@1": 0.028 }, + "0.2": { "pass@1": 0.026 }, + "0.3": { "pass@1": 0.024 }, + "0.4": { "pass@1": 0.024 }, + "0.5": { "pass@1": 0.024 }, + "0.6": { "pass@1": 0.022 }, + "0.7": { "pass@1": 0.022 }, + "0.8": { "pass@1": 0.022 }, + "0.9": { "pass@1": 0.018 }, + "1.0": { "pass@1": 0.016 } }, "python": { - "0.0": { "pass@1": 0.97 }, - "0.1": { "pass@1": 0.91 }, - "0.2": { "pass@1": 0.89 }, - "0.3": { "pass@1": 0.88 }, - "0.4": { "pass@1": 0.87 }, - "0.5": { "pass@1": 0.85 }, - "0.6": { "pass@1": 0.84 }, - "0.7": { "pass@1": 0.84 }, - "0.8": { "pass@1": 0.84 }, - "0.9": { "pass@1": 0.83 }, - "1.0": { "pass@1": 0.82 } + "0.0": { "pass@1": 0.07 }, + "0.1": { "pass@1": 0.03 }, + "0.2": { "pass@1": 0.03 }, + "0.3": { "pass@1": 0.03 }, + "0.4": { "pass@1": 0.03 }, + "0.5": { "pass@1": 0.03 }, + "0.6": { "pass@1": 0.03 }, + "0.7": { "pass@1": 0.03 }, + "0.8": { "pass@1": 0.03 }, + "0.9": { "pass@1": 0.03 }, + "1.0": { "pass@1": 0.03 } }, "cpp": { - "0.0": { "pass@1": 0.92 }, - "0.1": { "pass@1": 0.85 }, - "0.2": { "pass@1": 0.82 }, - "0.3": { "pass@1": 0.82 }, - "0.4": { "pass@1": 0.82 }, - "0.5": { "pass@1": 0.82 }, - "0.6": { "pass@1": 0.8 }, - "0.7": { "pass@1": 0.8 }, - "0.8": { "pass@1": 0.79 }, - "0.9": { "pass@1": 0.77 }, - "1.0": { "pass@1": 0.7 } + "0.0": { "pass@1": 0.07 }, + "0.1": { "pass@1": 0.03 }, + "0.2": { "pass@1": 0.03 }, + "0.3": { "pass@1": 0.03 }, + "0.4": { "pass@1": 0.03 }, + "0.5": { "pass@1": 0.03 }, + "0.6": { "pass@1": 0.02 }, + "0.7": { "pass@1": 0.02 }, + "0.8": { "pass@1": 0.02 }, + "0.9": { "pass@1": 0.02 }, + "1.0": { "pass@1": 0.02 } }, "java": { - "0.0": { "pass@1": 0.97 }, - "0.1": { "pass@1": 0.94 }, - "0.2": { "pass@1": 0.93 }, - "0.3": { "pass@1": 0.91 }, - "0.4": { "pass@1": 0.9 }, - "0.5": { "pass@1": 0.9 }, - "0.6": { "pass@1": 0.9 }, - "0.7": { "pass@1": 0.9 }, - "0.8": { "pass@1": 0.89 }, - "0.9": { "pass@1": 0.86 }, - "1.0": { "pass@1": 0.83 } - }, - "typescript": { - "0.0": { "pass@1": 0.69 }, - "0.1": { "pass@1": 0.63 }, - "0.2": { "pass@1": 0.63 }, - "0.3": { "pass@1": 0.62 }, - "0.4": { "pass@1": 0.61 }, - "0.5": { "pass@1": 0.59 }, - "0.6": { "pass@1": 0.58 }, - "0.7": { "pass@1": 0.57 }, - "0.8": { "pass@1": 0.55 }, - "0.9": { "pass@1": 0.51 }, - "1.0": { "pass@1": 0.25 } + "0.0": { "pass@1": 0.07 }, + "0.1": { "pass@1": 0.03 }, + "0.2": { "pass@1": 0.02 }, + "0.3": { "pass@1": 0.01 }, + "0.4": { "pass@1": 0.01 }, + "0.5": { "pass@1": 0.01 }, + "0.6": { "pass@1": 0.01 }, + "0.7": { "pass@1": 0.01 }, + "0.8": { "pass@1": 0.01 }, + "0.9": { "pass@1": 0.01 }, + "1.0": { "pass@1": 0.01 } }, "rust": { - "0.0": { "pass@1": 0.84 }, - "0.1": { "pass@1": 0.81 }, - "0.2": { "pass@1": 0.8 }, - "0.3": { "pass@1": 0.78 }, - "0.4": { "pass@1": 0.78 }, - "0.5": { "pass@1": 0.77 }, - "0.6": { "pass@1": 0.76 }, - "0.7": { "pass@1": 0.75 }, - "0.8": { "pass@1": 0.75 }, - "0.9": { "pass@1": 0.75 }, - "1.0": { "pass@1": 0.74 } + "0.0": { "pass@1": 0.08 }, + "0.1": { "pass@1": 0.01 }, + "0.2": { "pass@1": 0.01 }, + "0.3": { "pass@1": 0.01 }, + "0.4": { "pass@1": 0.01 }, + "0.5": { "pass@1": 0.01 }, + "0.6": { "pass@1": 0.01 }, + "0.7": { "pass@1": 0.01 }, + "0.8": { "pass@1": 0.01 }, + "0.9": { "pass@1": 0.01 }, + "1.0": { "pass@1": 0.0 } + }, + "typescript": { + "0.0": { "pass@1": 0.08 }, + "0.1": { "pass@1": 0.04 }, + "0.2": { "pass@1": 0.04 }, + "0.3": { "pass@1": 0.04 }, + "0.4": { "pass@1": 0.04 }, + "0.5": { "pass@1": 0.04 }, + "0.6": { "pass@1": 0.04 }, + "0.7": { "pass@1": 0.04 }, + "0.8": { "pass@1": 0.04 }, + "0.9": { "pass@1": 0.02 }, + "1.0": { "pass@1": 0.02 } } } }, - "Qwen/CodeQwen1.5-7B-Chat": { - "eval_date": "2024-05-13 02:01:09.406097", - "train_size": "64k", + "microsoft/Phi-3-small-128k-instruct": { + "eval_date": "2024-05-23 02:31:23.338528", + "train_size": "128k", "scores": { "all": { - "0.0": { "pass@1": 0.848 }, - "0.1": { "pass@1": 0.772 }, - "0.2": { "pass@1": 0.74 }, - "0.3": { "pass@1": 0.724 }, - "0.4": { "pass@1": 0.698 }, - "0.5": { "pass@1": 0.686 }, - "0.6": { "pass@1": 0.66 }, - "0.7": { "pass@1": 0.648 }, - "0.8": { "pass@1": 0.628 }, - "0.9": { "pass@1": 0.6 }, - "1.0": { "pass@1": 0.51 } + "0.0": { "pass@1": 0.792 }, + "0.1": { "pass@1": 0.662 }, + "0.2": { "pass@1": 0.612 }, + "0.3": { "pass@1": 0.564 }, + "0.4": { "pass@1": 0.526 }, + "0.5": { "pass@1": 0.49 }, + "0.6": { "pass@1": 0.466 }, + "0.7": { "pass@1": 0.444 }, + "0.8": { "pass@1": 0.396 }, + "0.9": { "pass@1": 0.322 }, + "1.0": { "pass@1": 0.242 } }, "python": { - "0.0": { "pass@1": 0.92 }, - "0.1": { "pass@1": 0.81 }, - "0.2": { "pass@1": 0.77 }, - "0.3": { "pass@1": 0.77 }, - "0.4": { "pass@1": 0.76 }, - "0.5": { "pass@1": 0.75 }, - "0.6": { "pass@1": 0.71 }, - "0.7": { "pass@1": 0.69 }, - "0.8": { "pass@1": 0.69 }, - "0.9": { "pass@1": 0.67 }, - "1.0": { "pass@1": 0.63 } + "0.0": { "pass@1": 0.74 }, + "0.1": { "pass@1": 0.57 }, + "0.2": { "pass@1": 0.52 }, + "0.3": { "pass@1": 0.46 }, + "0.4": { "pass@1": 0.41 }, + "0.5": { "pass@1": 0.36 }, + "0.6": { "pass@1": 0.34 }, + "0.7": { "pass@1": 0.29 }, + "0.8": { "pass@1": 0.25 }, + "0.9": { "pass@1": 0.23 }, + "1.0": { "pass@1": 0.21 } }, "cpp": { - "0.0": { "pass@1": 0.69 }, - "0.1": { "pass@1": 0.6 }, - "0.2": { "pass@1": 0.58 }, + "0.0": { "pass@1": 0.83 }, + "0.1": { "pass@1": 0.68 }, + "0.2": { "pass@1": 0.62 }, "0.3": { "pass@1": 0.57 }, - "0.4": { "pass@1": 0.54 }, + "0.4": { "pass@1": 0.55 }, "0.5": { "pass@1": 0.53 }, "0.6": { "pass@1": 0.52 }, "0.7": { "pass@1": 0.5 }, - "0.8": { "pass@1": 0.47 }, - "0.9": { "pass@1": 0.46 }, - "1.0": { "pass@1": 0.4 } + "0.8": { "pass@1": 0.48 }, + "0.9": { "pass@1": 0.4 }, + "1.0": { "pass@1": 0.35 } }, "java": { - "0.0": { "pass@1": 0.93 }, - "0.1": { "pass@1": 0.87 }, - "0.2": { "pass@1": 0.85 }, - "0.3": { "pass@1": 0.84 }, - "0.4": { "pass@1": 0.81 }, - "0.5": { "pass@1": 0.8 }, - "0.6": { "pass@1": 0.78 }, - "0.7": { "pass@1": 0.77 }, - "0.8": { "pass@1": 0.74 }, - "0.9": { "pass@1": 0.71 }, - "1.0": { "pass@1": 0.68 } + "0.0": { "pass@1": 0.83 }, + "0.1": { "pass@1": 0.74 }, + "0.2": { "pass@1": 0.69 }, + "0.3": { "pass@1": 0.67 }, + "0.4": { "pass@1": 0.6 }, + "0.5": { "pass@1": 0.58 }, + "0.6": { "pass@1": 0.55 }, + "0.7": { "pass@1": 0.53 }, + "0.8": { "pass@1": 0.46 }, + "0.9": { "pass@1": 0.39 }, + "1.0": { "pass@1": 0.3 } }, "typescript": { - "0.0": { "pass@1": 0.87 }, - "0.1": { "pass@1": 0.84 }, - "0.2": { "pass@1": 0.8 }, - "0.3": { "pass@1": 0.76 }, - "0.4": { "pass@1": 0.75 }, - "0.5": { "pass@1": 0.73 }, - "0.6": { "pass@1": 0.72 }, - "0.7": { "pass@1": 0.71 }, - "0.8": { "pass@1": 0.67 }, - "0.9": { "pass@1": 0.6 }, - "1.0": { "pass@1": 0.32 } + "0.0": { "pass@1": 0.9 }, + "0.1": { "pass@1": 0.81 }, + "0.2": { "pass@1": 0.78 }, + "0.3": { "pass@1": 0.7 }, + "0.4": { "pass@1": 0.68 }, + "0.5": { "pass@1": 0.62 }, + "0.6": { "pass@1": 0.58 }, + "0.7": { "pass@1": 0.56 }, + "0.8": { "pass@1": 0.49 }, + "0.9": { "pass@1": 0.34 }, + "1.0": { "pass@1": 0.14 } }, "rust": { - "0.0": { "pass@1": 0.83 }, - "0.1": { "pass@1": 0.74 }, - "0.2": { "pass@1": 0.7 }, - "0.3": { "pass@1": 0.68 }, - "0.4": { "pass@1": 0.63 }, - "0.5": { "pass@1": 0.62 }, - "0.6": { "pass@1": 0.57 }, - "0.7": { "pass@1": 0.57 }, - "0.8": { "pass@1": 0.57 }, - "0.9": { "pass@1": 0.56 }, - "1.0": { "pass@1": 0.52 } + "0.0": { "pass@1": 0.66 }, + "0.1": { "pass@1": 0.51 }, + "0.2": { "pass@1": 0.45 }, + "0.3": { "pass@1": 0.42 }, + "0.4": { "pass@1": 0.39 }, + "0.5": { "pass@1": 0.36 }, + "0.6": { "pass@1": 0.34 }, + "0.7": { "pass@1": 0.34 }, + "0.8": { "pass@1": 0.3 }, + "0.9": { "pass@1": 0.25 }, + "1.0": { "pass@1": 0.21 } } } }, - "microsoft/Phi-3-mini-128k-instruct": { - "eval_date": "2024-05-13 01:26:47.347692", - "train_size": "128k", + "mistralai/Mixtral-8x22B-Instruct-v0.1": { + "eval_date": "2024-05-13 01:39:30.147044", + "train_size": "64k", "scores": { "all": { - "0.0": { "pass@1": 0.744 }, - "0.1": { "pass@1": 0.59 }, - "0.2": { "pass@1": 0.502 }, - "0.3": { "pass@1": 0.454 }, - "0.4": { "pass@1": 0.4 }, - "0.5": { "pass@1": 0.366 }, - "0.6": { "pass@1": 0.324 }, - "0.7": { "pass@1": 0.29 }, - "0.8": { "pass@1": 0.224 }, - "0.9": { "pass@1": 0.164 }, - "1.0": { "pass@1": 0.104 } + "0.0": { "pass@1": 0.926 }, + "0.1": { "pass@1": 0.872 }, + "0.2": { "pass@1": 0.844 }, + "0.3": { "pass@1": 0.812 }, + "0.4": { "pass@1": 0.794 }, + "0.5": { "pass@1": 0.774 }, + "0.6": { "pass@1": 0.744 }, + "0.7": { "pass@1": 0.714 }, + "0.8": { "pass@1": 0.678 }, + "0.9": { "pass@1": 0.622 }, + "1.0": { "pass@1": 0.502 } }, "python": { - "0.0": { "pass@1": 0.81 }, - "0.1": { "pass@1": 0.63 }, - "0.2": { "pass@1": 0.55 }, - "0.3": { "pass@1": 0.48 }, - "0.4": { "pass@1": 0.38 }, - "0.5": { "pass@1": 0.31 }, - "0.6": { "pass@1": 0.25 }, - "0.7": { "pass@1": 0.2 }, - "0.8": { "pass@1": 0.19 }, - "0.9": { "pass@1": 0.15 }, - "1.0": { "pass@1": 0.13 } + "0.0": { "pass@1": 0.89 }, + "0.1": { "pass@1": 0.8 }, + "0.2": { "pass@1": 0.79 }, + "0.3": { "pass@1": 0.76 }, + "0.4": { "pass@1": 0.74 }, + "0.5": { "pass@1": 0.73 }, + "0.6": { "pass@1": 0.69 }, + "0.7": { "pass@1": 0.64 }, + "0.8": { "pass@1": 0.6 }, + "0.9": { "pass@1": 0.55 }, + "1.0": { "pass@1": 0.44 } }, "cpp": { - "0.0": { "pass@1": 0.67 }, - "0.1": { "pass@1": 0.43 }, - "0.2": { "pass@1": 0.41 }, - "0.3": { "pass@1": 0.38 }, - "0.4": { "pass@1": 0.35 }, - "0.5": { "pass@1": 0.34 }, - "0.6": { "pass@1": 0.3 }, - "0.7": { "pass@1": 0.3 }, - "0.8": { "pass@1": 0.25 }, - "0.9": { "pass@1": 0.18 }, - "1.0": { "pass@1": 0.14 } + "0.0": { "pass@1": 0.92 }, + "0.1": { "pass@1": 0.86 }, + "0.2": { "pass@1": 0.8 }, + "0.3": { "pass@1": 0.78 }, + "0.4": { "pass@1": 0.76 }, + "0.5": { "pass@1": 0.75 }, + "0.6": { "pass@1": 0.73 }, + "0.7": { "pass@1": 0.7 }, + "0.8": { "pass@1": 0.67 }, + "0.9": { "pass@1": 0.62 }, + "1.0": { "pass@1": 0.51 } }, "java": { - "0.0": { "pass@1": 0.71 }, - "0.1": { "pass@1": 0.6 }, - "0.2": { "pass@1": 0.49 }, - "0.3": { "pass@1": 0.45 }, - "0.4": { "pass@1": 0.42 }, - "0.5": { "pass@1": 0.39 }, - "0.6": { "pass@1": 0.34 }, - "0.7": { "pass@1": 0.31 }, - "0.8": { "pass@1": 0.21 }, - "0.9": { "pass@1": 0.15 }, - "1.0": { "pass@1": 0.1 } + "0.0": { "pass@1": 0.97 }, + "0.1": { "pass@1": 0.96 }, + "0.2": { "pass@1": 0.94 }, + "0.3": { "pass@1": 0.9 }, + "0.4": { "pass@1": 0.89 }, + "0.5": { "pass@1": 0.87 }, + "0.6": { "pass@1": 0.86 }, + "0.7": { "pass@1": 0.86 }, + "0.8": { "pass@1": 0.83 }, + "0.9": { "pass@1": 0.77 }, + "1.0": { "pass@1": 0.69 } }, "typescript": { - "0.0": { "pass@1": 0.78 }, - "0.1": { "pass@1": 0.67 }, - "0.2": { "pass@1": 0.57 }, - "0.3": { "pass@1": 0.49 }, - "0.4": { "pass@1": 0.45 }, - "0.5": { "pass@1": 0.41 }, - "0.6": { "pass@1": 0.37 }, - "0.7": { "pass@1": 0.33 }, - "0.8": { "pass@1": 0.22 }, - "0.9": { "pass@1": 0.14 }, - "1.0": { "pass@1": 0.03 } + "0.0": { "pass@1": 0.95 }, + "0.1": { "pass@1": 0.9 }, + "0.2": { "pass@1": 0.88 }, + "0.3": { "pass@1": 0.82 }, + "0.4": { "pass@1": 0.78 }, + "0.5": { "pass@1": 0.74 }, + "0.6": { "pass@1": 0.69 }, + "0.7": { "pass@1": 0.63 }, + "0.8": { "pass@1": 0.55 }, + "0.9": { "pass@1": 0.43 }, + "1.0": { "pass@1": 0.18 } }, "rust": { - "0.0": { "pass@1": 0.75 }, - "0.1": { "pass@1": 0.62 }, - "0.2": { "pass@1": 0.49 }, - "0.3": { "pass@1": 0.47 }, - "0.4": { "pass@1": 0.4 }, - "0.5": { "pass@1": 0.38 }, - "0.6": { "pass@1": 0.36 }, - "0.7": { "pass@1": 0.31 }, - "0.8": { "pass@1": 0.25 }, - "0.9": { "pass@1": 0.2 }, - "1.0": { "pass@1": 0.12 } + "0.0": { "pass@1": 0.9 }, + "0.1": { "pass@1": 0.84 }, + "0.2": { "pass@1": 0.81 }, + "0.3": { "pass@1": 0.8 }, + "0.4": { "pass@1": 0.8 }, + "0.5": { "pass@1": 0.78 }, + "0.6": { "pass@1": 0.75 }, + "0.7": { "pass@1": 0.74 }, + "0.8": { "pass@1": 0.74 }, + "0.9": { "pass@1": 0.74 }, + "1.0": { "pass@1": 0.69 } } } }, - "Qwen/Qwen1.5-7B-Chat": { - "eval_date": "2024-05-13 01:40:02.854316", - "train_size": "32k", + "gpt-4o-2024-05-13": { + "eval_date": "2024-05-18 21:37:11.078575", + "train_size": "128k", "scores": { "all": { - "0.0": { "pass@1": 0.41 }, - "0.1": { "pass@1": 0.218 }, - "0.2": { "pass@1": 0.15 }, - "0.3": { "pass@1": 0.114 }, - "0.4": { "pass@1": 0.082 }, - "0.5": { "pass@1": 0.06 }, - "0.6": { "pass@1": 0.046 }, - "0.7": { "pass@1": 0.036 }, - "0.8": { "pass@1": 0.028 }, - "0.9": { "pass@1": 0.014 }, - "1.0": { "pass@1": 0.012 } + "0.0": { "pass@1": 0.952 }, + "0.1": { "pass@1": 0.93 }, + "0.2": { "pass@1": 0.922 }, + "0.3": { "pass@1": 0.92 }, + "0.4": { "pass@1": 0.92 }, + "0.5": { "pass@1": 0.92 }, + "0.6": { "pass@1": 0.908 }, + "0.7": { "pass@1": 0.908 }, + "0.8": { "pass@1": 0.906 }, + "0.9": { "pass@1": 0.894 }, + "1.0": { "pass@1": 0.812 } }, "python": { - "0.0": { "pass@1": 0.36 }, - "0.1": { "pass@1": 0.13 }, - "0.2": { "pass@1": 0.08 }, - "0.3": { "pass@1": 0.06 }, - "0.4": { "pass@1": 0.04 }, - "0.5": { "pass@1": 0.04 }, - "0.6": { "pass@1": 0.03 }, - "0.7": { "pass@1": 0.02 }, - "0.8": { "pass@1": 0.01 }, - "0.9": { "pass@1": 0.0 }, - "1.0": { "pass@1": 0.0 } + "0.0": { "pass@1": 0.96 }, + "0.1": { "pass@1": 0.96 }, + "0.2": { "pass@1": 0.96 }, + "0.3": { "pass@1": 0.96 }, + "0.4": { "pass@1": 0.96 }, + "0.5": { "pass@1": 0.96 }, + "0.6": { "pass@1": 0.95 }, + "0.7": { "pass@1": 0.95 }, + "0.8": { "pass@1": 0.95 }, + "0.9": { "pass@1": 0.94 }, + "1.0": { "pass@1": 0.94 } }, "cpp": { - "0.0": { "pass@1": 0.51 }, - "0.1": { "pass@1": 0.31 }, - "0.2": { "pass@1": 0.23 }, - "0.3": { "pass@1": 0.17 }, - "0.4": { "pass@1": 0.12 }, - "0.5": { "pass@1": 0.09 }, - "0.6": { "pass@1": 0.06 }, - "0.7": { "pass@1": 0.06 }, - "0.8": { "pass@1": 0.06 }, - "0.9": { "pass@1": 0.04 }, - "1.0": { "pass@1": 0.03 } + "0.0": { "pass@1": 0.89 }, + "0.1": { "pass@1": 0.85 }, + "0.2": { "pass@1": 0.83 }, + "0.3": { "pass@1": 0.83 }, + "0.4": { "pass@1": 0.83 }, + "0.5": { "pass@1": 0.83 }, + "0.6": { "pass@1": 0.8 }, + "0.7": { "pass@1": 0.8 }, + "0.8": { "pass@1": 0.8 }, + "0.9": { "pass@1": 0.79 }, + "1.0": { "pass@1": 0.75 } }, "java": { - "0.0": { "pass@1": 0.35 }, - "0.1": { "pass@1": 0.15 }, - "0.2": { "pass@1": 0.11 }, - "0.3": { "pass@1": 0.08 }, - "0.4": { "pass@1": 0.06 }, - "0.5": { "pass@1": 0.04 }, - "0.6": { "pass@1": 0.04 }, - "0.7": { "pass@1": 0.03 }, - "0.8": { "pass@1": 0.02 }, - "0.9": { "pass@1": 0.02 }, - "1.0": { "pass@1": 0.02 } - }, - "rust": { - "0.0": { "pass@1": 0.38 }, - "0.1": { "pass@1": 0.2 }, - "0.2": { "pass@1": 0.12 }, - "0.3": { "pass@1": 0.09 }, - "0.4": { "pass@1": 0.06 }, - "0.5": { "pass@1": 0.04 }, - "0.6": { "pass@1": 0.03 }, - "0.7": { "pass@1": 0.02 }, - "0.8": { "pass@1": 0.02 }, - "0.9": { "pass@1": 0.0 }, - "1.0": { "pass@1": 0.0 } + "0.0": { "pass@1": 0.98 }, + "0.1": { "pass@1": 0.98 }, + "0.2": { "pass@1": 0.97 }, + "0.3": { "pass@1": 0.97 }, + "0.4": { "pass@1": 0.97 }, + "0.5": { "pass@1": 0.97 }, + "0.6": { "pass@1": 0.96 }, + "0.7": { "pass@1": 0.96 }, + "0.8": { "pass@1": 0.96 }, + "0.9": { "pass@1": 0.96 }, + "1.0": { "pass@1": 0.96 } }, "typescript": { - "0.0": { "pass@1": 0.45 }, - "0.1": { "pass@1": 0.3 }, - "0.2": { "pass@1": 0.21 }, - "0.3": { "pass@1": 0.17 }, - "0.4": { "pass@1": 0.13 }, - "0.5": { "pass@1": 0.09 }, - "0.6": { "pass@1": 0.07 }, - "0.7": { "pass@1": 0.05 }, - "0.8": { "pass@1": 0.03 }, - "0.9": { "pass@1": 0.01 }, - "1.0": { "pass@1": 0.01 } + "0.0": { "pass@1": 1.0 }, + "0.1": { "pass@1": 0.98 }, + "0.2": { "pass@1": 0.98 }, + "0.3": { "pass@1": 0.98 }, + "0.4": { "pass@1": 0.98 }, + "0.5": { "pass@1": 0.98 }, + "0.6": { "pass@1": 0.98 }, + "0.7": { "pass@1": 0.98 }, + "0.8": { "pass@1": 0.97 }, + "0.9": { "pass@1": 0.93 }, + "1.0": { "pass@1": 0.56 } + }, + "rust": { + "0.0": { "pass@1": 0.93 }, + "0.1": { "pass@1": 0.88 }, + "0.2": { "pass@1": 0.87 }, + "0.3": { "pass@1": 0.86 }, + "0.4": { "pass@1": 0.86 }, + "0.5": { "pass@1": 0.86 }, + "0.6": { "pass@1": 0.85 }, + "0.7": { "pass@1": 0.85 }, + "0.8": { "pass@1": 0.85 }, + "0.9": { "pass@1": 0.85 }, + "1.0": { "pass@1": 0.85 } } } }, - "codellama/CodeLlama-13b-Instruct-hf": { - "eval_date": "2024-05-13 01:37:59.615784", - "train_size": "16k", + "deepseek-ai/DeepSeek-V2-Chat": { + "eval_date": "2024-05-19 13:56:08.965716", + "train_size": "128k", "scores": { "all": { - "0.0": { "pass@1": 0.812 }, - "0.1": { "pass@1": 0.666 }, - "0.2": { "pass@1": 0.602 }, - "0.3": { "pass@1": 0.558 }, - "0.4": { "pass@1": 0.528 }, - "0.5": { "pass@1": 0.502 }, - "0.6": { "pass@1": 0.474 }, - "0.7": { "pass@1": 0.454 }, - "0.8": { "pass@1": 0.426 }, - "0.9": { "pass@1": 0.35 }, - "1.0": { "pass@1": 0.18 } + "0.0": { "pass@1": 0.95 }, + "0.1": { "pass@1": 0.912 }, + "0.2": { "pass@1": 0.892 }, + "0.3": { "pass@1": 0.882 }, + "0.4": { "pass@1": 0.876 }, + "0.5": { "pass@1": 0.866 }, + "0.6": { "pass@1": 0.848 }, + "0.7": { "pass@1": 0.838 }, + "0.8": { "pass@1": 0.834 }, + "0.9": { "pass@1": 0.806 }, + "1.0": { "pass@1": 0.718 } }, "python": { - "0.0": { "pass@1": 0.89 }, - "0.1": { "pass@1": 0.77 }, - "0.2": { "pass@1": 0.71 }, - "0.3": { "pass@1": 0.67 }, - "0.4": { "pass@1": 0.64 }, - "0.5": { "pass@1": 0.61 }, - "0.6": { "pass@1": 0.55 }, - "0.7": { "pass@1": 0.51 }, - "0.8": { "pass@1": 0.45 }, - "0.9": { "pass@1": 0.37 }, - "1.0": { "pass@1": 0.27 } + "0.0": { "pass@1": 0.97 }, + "0.1": { "pass@1": 0.94 }, + "0.2": { "pass@1": 0.93 }, + "0.3": { "pass@1": 0.92 }, + "0.4": { "pass@1": 0.92 }, + "0.5": { "pass@1": 0.92 }, + "0.6": { "pass@1": 0.9 }, + "0.7": { "pass@1": 0.9 }, + "0.8": { "pass@1": 0.9 }, + "0.9": { "pass@1": 0.86 }, + "1.0": { "pass@1": 0.84 } }, "cpp": { - "0.0": { "pass@1": 0.68 }, - "0.1": { "pass@1": 0.49 }, - "0.2": { "pass@1": 0.41 }, - "0.3": { "pass@1": 0.34 }, - "0.4": { "pass@1": 0.34 }, - "0.5": { "pass@1": 0.34 }, - "0.6": { "pass@1": 0.32 }, - "0.7": { "pass@1": 0.31 }, - "0.8": { "pass@1": 0.3 }, - "0.9": { "pass@1": 0.23 }, - "1.0": { "pass@1": 0.13 } + "0.0": { "pass@1": 0.9 }, + "0.1": { "pass@1": 0.85 }, + "0.2": { "pass@1": 0.83 }, + "0.3": { "pass@1": 0.83 }, + "0.4": { "pass@1": 0.82 }, + "0.5": { "pass@1": 0.82 }, + "0.6": { "pass@1": 0.79 }, + "0.7": { "pass@1": 0.76 }, + "0.8": { "pass@1": 0.76 }, + "0.9": { "pass@1": 0.75 }, + "1.0": { "pass@1": 0.69 } }, "java": { - "0.0": { "pass@1": 0.89 }, - "0.1": { "pass@1": 0.66 }, - "0.2": { "pass@1": 0.6 }, - "0.3": { "pass@1": 0.57 }, - "0.4": { "pass@1": 0.55 }, - "0.5": { "pass@1": 0.54 }, - "0.6": { "pass@1": 0.53 }, - "0.7": { "pass@1": 0.52 }, - "0.8": { "pass@1": 0.5 }, - "0.9": { "pass@1": 0.41 }, - "1.0": { "pass@1": 0.14 } + "0.0": { "pass@1": 0.97 }, + "0.1": { "pass@1": 0.96 }, + "0.2": { "pass@1": 0.93 }, + "0.3": { "pass@1": 0.92 }, + "0.4": { "pass@1": 0.92 }, + "0.5": { "pass@1": 0.92 }, + "0.6": { "pass@1": 0.91 }, + "0.7": { "pass@1": 0.91 }, + "0.8": { "pass@1": 0.91 }, + "0.9": { "pass@1": 0.89 }, + "1.0": { "pass@1": 0.87 } }, "typescript": { - "0.0": { "pass@1": 0.9 }, - "0.1": { "pass@1": 0.84 }, - "0.2": { "pass@1": 0.78 }, - "0.3": { "pass@1": 0.73 }, - "0.4": { "pass@1": 0.7 }, - "0.5": { "pass@1": 0.66 }, - "0.6": { "pass@1": 0.62 }, - "0.7": { "pass@1": 0.6 }, - "0.8": { "pass@1": 0.57 }, - "0.9": { "pass@1": 0.46 }, - "1.0": { "pass@1": 0.21 } + "0.0": { "pass@1": 0.99 }, + "0.1": { "pass@1": 0.96 }, + "0.2": { "pass@1": 0.94 }, + "0.3": { "pass@1": 0.92 }, + "0.4": { "pass@1": 0.9 }, + "0.5": { "pass@1": 0.87 }, + "0.6": { "pass@1": 0.86 }, + "0.7": { "pass@1": 0.85 }, + "0.8": { "pass@1": 0.83 }, + "0.9": { "pass@1": 0.76 }, + "1.0": { "pass@1": 0.44 } }, "rust": { - "0.0": { "pass@1": 0.7 }, - "0.1": { "pass@1": 0.57 }, - "0.2": { "pass@1": 0.51 }, - "0.3": { "pass@1": 0.48 }, - "0.4": { "pass@1": 0.41 }, - "0.5": { "pass@1": 0.36 }, - "0.6": { "pass@1": 0.35 }, - "0.7": { "pass@1": 0.33 }, - "0.8": { "pass@1": 0.31 }, - "0.9": { "pass@1": 0.28 }, - "1.0": { "pass@1": 0.15 } + "0.0": { "pass@1": 0.92 }, + "0.1": { "pass@1": 0.85 }, + "0.2": { "pass@1": 0.83 }, + "0.3": { "pass@1": 0.82 }, + "0.4": { "pass@1": 0.82 }, + "0.5": { "pass@1": 0.8 }, + "0.6": { "pass@1": 0.78 }, + "0.7": { "pass@1": 0.77 }, + "0.8": { "pass@1": 0.77 }, + "0.9": { "pass@1": 0.77 }, + "1.0": { "pass@1": 0.75 } } } }, - "codellama/CodeLlama-34b-Instruct-hf": { - "eval_date": "2024-05-13 01:38:05.832801", - "train_size": "16k", + "deepseek-ai/DeepSeek-V2-Lite-Chat": { + "eval_date": "2024-05-19 08:01:09.773213", + "train_size": "32k", "scores": { "all": { - "0.0": { "pass@1": 0.728 }, - "0.1": { "pass@1": 0.582 }, - "0.2": { "pass@1": 0.524 }, - "0.3": { "pass@1": 0.496 }, - "0.4": { "pass@1": 0.48 }, - "0.5": { "pass@1": 0.454 }, - "0.6": { "pass@1": 0.438 }, - "0.7": { "pass@1": 0.43 }, + "0.0": { "pass@1": 0.786 }, + "0.1": { "pass@1": 0.666 }, + "0.2": { "pass@1": 0.624 }, + "0.3": { "pass@1": 0.574 }, + "0.4": { "pass@1": 0.544 }, + "0.5": { "pass@1": 0.522 }, + "0.6": { "pass@1": 0.476 }, + "0.7": { "pass@1": 0.446 }, "0.8": { "pass@1": 0.416 }, - "0.9": { "pass@1": 0.372 }, - "1.0": { "pass@1": 0.28 } + "0.9": { "pass@1": 0.346 }, + "1.0": { "pass@1": 0.256 } }, "python": { - "0.0": { "pass@1": 0.76 }, - "0.1": { "pass@1": 0.59 }, - "0.2": { "pass@1": 0.52 }, - "0.3": { "pass@1": 0.47 }, - "0.4": { "pass@1": 0.46 }, - "0.5": { "pass@1": 0.44 }, - "0.6": { "pass@1": 0.43 }, - "0.7": { "pass@1": 0.42 }, - "0.8": { "pass@1": 0.41 }, - "0.9": { "pass@1": 0.4 }, - "1.0": { "pass@1": 0.36 } + "0.0": { "pass@1": 0.83 }, + "0.1": { "pass@1": 0.65 }, + "0.2": { "pass@1": 0.6 }, + "0.3": { "pass@1": 0.58 }, + "0.4": { "pass@1": 0.51 }, + "0.5": { "pass@1": 0.51 }, + "0.6": { "pass@1": 0.44 }, + "0.7": { "pass@1": 0.41 }, + "0.8": { "pass@1": 0.39 }, + "0.9": { "pass@1": 0.33 }, + "1.0": { "pass@1": 0.25 } }, "cpp": { - "0.0": { "pass@1": 0.64 }, - "0.1": { "pass@1": 0.45 }, - "0.2": { "pass@1": 0.41 }, - "0.3": { "pass@1": 0.37 }, - "0.4": { "pass@1": 0.37 }, - "0.5": { "pass@1": 0.36 }, - "0.6": { "pass@1": 0.35 }, - "0.7": { "pass@1": 0.32 }, - "0.8": { "pass@1": 0.31 }, - "0.9": { "pass@1": 0.29 }, - "1.0": { "pass@1": 0.26 } - }, - "java": { - "0.0": { "pass@1": 0.7 }, + "0.0": { "pass@1": 0.75 }, "0.1": { "pass@1": 0.59 }, - "0.2": { "pass@1": 0.52 }, - "0.3": { "pass@1": 0.52 }, - "0.4": { "pass@1": 0.47 }, - "0.5": { "pass@1": 0.45 }, + "0.2": { "pass@1": 0.53 }, + "0.3": { "pass@1": 0.5 }, + "0.4": { "pass@1": 0.49 }, + "0.5": { "pass@1": 0.46 }, "0.6": { "pass@1": 0.42 }, - "0.7": { "pass@1": 0.42 }, - "0.8": { "pass@1": 0.4 }, - "0.9": { "pass@1": 0.33 }, - "1.0": { "pass@1": 0.22 } - }, - "rust": { - "0.0": { "pass@1": 0.77 }, - "0.1": { "pass@1": 0.61 }, - "0.2": { "pass@1": 0.58 }, - "0.3": { "pass@1": 0.58 }, - "0.4": { "pass@1": 0.58 }, - "0.5": { "pass@1": 0.55 }, - "0.6": { "pass@1": 0.54 }, - "0.7": { "pass@1": 0.54 }, - "0.8": { "pass@1": 0.53 }, - "0.9": { "pass@1": 0.47 }, - "1.0": { "pass@1": 0.35 } + "0.7": { "pass@1": 0.38 }, + "0.8": { "pass@1": 0.37 }, + "0.9": { "pass@1": 0.3 }, + "1.0": { "pass@1": 0.23 } }, - "typescript": { - "0.0": { "pass@1": 0.77 }, - "0.1": { "pass@1": 0.67 }, - "0.2": { "pass@1": 0.59 }, + "java": { + "0.0": { "pass@1": 0.79 }, + "0.1": { "pass@1": 0.65 }, + "0.2": { "pass@1": 0.64 }, "0.3": { "pass@1": 0.54 }, "0.4": { "pass@1": 0.52 }, - "0.5": { "pass@1": 0.47 }, - "0.6": { "pass@1": 0.45 }, - "0.7": { "pass@1": 0.45 }, - "0.8": { "pass@1": 0.43 }, - "0.9": { "pass@1": 0.37 }, - "1.0": { "pass@1": 0.21 } + "0.5": { "pass@1": 0.52 }, + "0.6": { "pass@1": 0.48 }, + "0.7": { "pass@1": 0.46 }, + "0.8": { "pass@1": 0.41 }, + "0.9": { "pass@1": 0.38 }, + "1.0": { "pass@1": 0.3 } + }, + "typescript": { + "0.0": { "pass@1": 0.84 }, + "0.1": { "pass@1": 0.79 }, + "0.2": { "pass@1": 0.75 }, + "0.3": { "pass@1": 0.69 }, + "0.4": { "pass@1": 0.66 }, + "0.5": { "pass@1": 0.6 }, + "0.6": { "pass@1": 0.53 }, + "0.7": { "pass@1": 0.5 }, + "0.8": { "pass@1": 0.46 }, + "0.9": { "pass@1": 0.34 }, + "1.0": { "pass@1": 0.17 } + }, + "rust": { + "0.0": { "pass@1": 0.72 }, + "0.1": { "pass@1": 0.65 }, + "0.2": { "pass@1": 0.6 }, + "0.3": { "pass@1": 0.56 }, + "0.4": { "pass@1": 0.54 }, + "0.5": { "pass@1": 0.52 }, + "0.6": { "pass@1": 0.51 }, + "0.7": { "pass@1": 0.48 }, + "0.8": { "pass@1": 0.45 }, + "0.9": { "pass@1": 0.38 }, + "1.0": { "pass@1": 0.33 } } } }, - "mistralai/Mixtral-8x7B-Instruct-v0.1": { - "eval_date": "2024-05-13 01:39:35.376129", + "Qwen/Qwen1.5-7B-Chat": { + "eval_date": "2024-05-13 01:40:02.854316", "train_size": "32k", "scores": { "all": { - "0.0": { "pass@1": 0.904 }, - "0.1": { "pass@1": 0.834 }, - "0.2": { "pass@1": 0.802 }, - "0.3": { "pass@1": 0.766 }, - "0.4": { "pass@1": 0.762 }, - "0.5": { "pass@1": 0.75 }, - "0.6": { "pass@1": 0.726 }, - "0.7": { "pass@1": 0.702 }, - "0.8": { "pass@1": 0.68 }, - "0.9": { "pass@1": 0.626 }, - "1.0": { "pass@1": 0.52 } + "0.0": { "pass@1": 0.41 }, + "0.1": { "pass@1": 0.218 }, + "0.2": { "pass@1": 0.15 }, + "0.3": { "pass@1": 0.114 }, + "0.4": { "pass@1": 0.082 }, + "0.5": { "pass@1": 0.06 }, + "0.6": { "pass@1": 0.046 }, + "0.7": { "pass@1": 0.036 }, + "0.8": { "pass@1": 0.028 }, + "0.9": { "pass@1": 0.014 }, + "1.0": { "pass@1": 0.012 } }, "python": { - "0.0": { "pass@1": 0.89 }, - "0.1": { "pass@1": 0.82 }, - "0.2": { "pass@1": 0.81 }, - "0.3": { "pass@1": 0.76 }, - "0.4": { "pass@1": 0.74 }, - "0.5": { "pass@1": 0.74 }, - "0.6": { "pass@1": 0.71 }, - "0.7": { "pass@1": 0.68 }, - "0.8": { "pass@1": 0.66 }, - "0.9": { "pass@1": 0.62 }, - "1.0": { "pass@1": 0.57 } + "0.0": { "pass@1": 0.36 }, + "0.1": { "pass@1": 0.13 }, + "0.2": { "pass@1": 0.08 }, + "0.3": { "pass@1": 0.06 }, + "0.4": { "pass@1": 0.04 }, + "0.5": { "pass@1": 0.04 }, + "0.6": { "pass@1": 0.03 }, + "0.7": { "pass@1": 0.02 }, + "0.8": { "pass@1": 0.01 }, + "0.9": { "pass@1": 0.0 }, + "1.0": { "pass@1": 0.0 } }, "cpp": { - "0.0": { "pass@1": 0.87 }, - "0.1": { "pass@1": 0.77 }, - "0.2": { "pass@1": 0.74 }, - "0.3": { "pass@1": 0.72 }, - "0.4": { "pass@1": 0.72 }, - "0.5": { "pass@1": 0.71 }, - "0.6": { "pass@1": 0.68 }, - "0.7": { "pass@1": 0.67 }, - "0.8": { "pass@1": 0.65 }, - "0.9": { "pass@1": 0.58 }, - "1.0": { "pass@1": 0.47 } + "0.0": { "pass@1": 0.51 }, + "0.1": { "pass@1": 0.31 }, + "0.2": { "pass@1": 0.23 }, + "0.3": { "pass@1": 0.17 }, + "0.4": { "pass@1": 0.12 }, + "0.5": { "pass@1": 0.09 }, + "0.6": { "pass@1": 0.06 }, + "0.7": { "pass@1": 0.06 }, + "0.8": { "pass@1": 0.06 }, + "0.9": { "pass@1": 0.04 }, + "1.0": { "pass@1": 0.03 } }, "java": { - "0.0": { "pass@1": 0.93 }, - "0.1": { "pass@1": 0.89 }, - "0.2": { "pass@1": 0.82 }, - "0.3": { "pass@1": 0.78 }, - "0.4": { "pass@1": 0.78 }, - "0.5": { "pass@1": 0.77 }, - "0.6": { "pass@1": 0.75 }, - "0.7": { "pass@1": 0.74 }, - "0.8": { "pass@1": 0.71 }, - "0.9": { "pass@1": 0.65 }, - "1.0": { "pass@1": 0.6 } + "0.0": { "pass@1": 0.35 }, + "0.1": { "pass@1": 0.15 }, + "0.2": { "pass@1": 0.11 }, + "0.3": { "pass@1": 0.08 }, + "0.4": { "pass@1": 0.06 }, + "0.5": { "pass@1": 0.04 }, + "0.6": { "pass@1": 0.04 }, + "0.7": { "pass@1": 0.03 }, + "0.8": { "pass@1": 0.02 }, + "0.9": { "pass@1": 0.02 }, + "1.0": { "pass@1": 0.02 } }, "rust": { - "0.0": { "pass@1": 0.87 }, - "0.1": { "pass@1": 0.81 }, - "0.2": { "pass@1": 0.77 }, - "0.3": { "pass@1": 0.72 }, - "0.4": { "pass@1": 0.72 }, - "0.5": { "pass@1": 0.69 }, - "0.6": { "pass@1": 0.67 }, - "0.7": { "pass@1": 0.66 }, - "0.8": { "pass@1": 0.64 }, - "0.9": { "pass@1": 0.62 }, - "1.0": { "pass@1": 0.56 } + "0.0": { "pass@1": 0.38 }, + "0.1": { "pass@1": 0.2 }, + "0.2": { "pass@1": 0.12 }, + "0.3": { "pass@1": 0.09 }, + "0.4": { "pass@1": 0.06 }, + "0.5": { "pass@1": 0.04 }, + "0.6": { "pass@1": 0.03 }, + "0.7": { "pass@1": 0.02 }, + "0.8": { "pass@1": 0.02 }, + "0.9": { "pass@1": 0.0 }, + "1.0": { "pass@1": 0.0 } }, "typescript": { - "0.0": { "pass@1": 0.96 }, - "0.1": { "pass@1": 0.88 }, - "0.2": { "pass@1": 0.87 }, - "0.3": { "pass@1": 0.85 }, - "0.4": { "pass@1": 0.85 }, - "0.5": { "pass@1": 0.84 }, - "0.6": { "pass@1": 0.82 }, - "0.7": { "pass@1": 0.76 }, - "0.8": { "pass@1": 0.74 }, - "0.9": { "pass@1": 0.66 }, - "1.0": { "pass@1": 0.4 } + "0.0": { "pass@1": 0.45 }, + "0.1": { "pass@1": 0.3 }, + "0.2": { "pass@1": 0.21 }, + "0.3": { "pass@1": 0.17 }, + "0.4": { "pass@1": 0.13 }, + "0.5": { "pass@1": 0.09 }, + "0.6": { "pass@1": 0.07 }, + "0.7": { "pass@1": 0.05 }, + "0.8": { "pass@1": 0.03 }, + "0.9": { "pass@1": 0.01 }, + "1.0": { "pass@1": 0.01 } } } }, - "mistralai/Mistral-7B-Instruct-v0.2": { - "eval_date": "2024-05-13 01:30:36.208632", + "mistralai/Mistral-7B-Instruct-v0.1": { + "eval_date": "2024-05-13 01:39:19.607214", "train_size": "32k", "scores": { "all": { - "0.0": { "pass@1": 0.852 }, - "0.1": { "pass@1": 0.766 }, - "0.2": { "pass@1": 0.708 }, - "0.3": { "pass@1": 0.674 }, - "0.4": { "pass@1": 0.638 }, - "0.5": { "pass@1": 0.598 }, - "0.6": { "pass@1": 0.56 }, - "0.7": { "pass@1": 0.526 }, - "0.8": { "pass@1": 0.474 }, - "0.9": { "pass@1": 0.424 }, - "1.0": { "pass@1": 0.294 } + "0.0": { "pass@1": 0.576 }, + "0.1": { "pass@1": 0.286 }, + "0.2": { "pass@1": 0.214 }, + "0.3": { "pass@1": 0.172 }, + "0.4": { "pass@1": 0.162 }, + "0.5": { "pass@1": 0.148 }, + "0.6": { "pass@1": 0.132 }, + "0.7": { "pass@1": 0.12 }, + "0.8": { "pass@1": 0.11 }, + "0.9": { "pass@1": 0.094 }, + "1.0": { "pass@1": 0.068 } }, "python": { - "0.0": { "pass@1": 0.89 }, - "0.1": { "pass@1": 0.77 }, - "0.2": { "pass@1": 0.7 }, - "0.3": { "pass@1": 0.65 }, - "0.4": { "pass@1": 0.6 }, - "0.5": { "pass@1": 0.53 }, - "0.6": { "pass@1": 0.48 }, - "0.7": { "pass@1": 0.43 }, - "0.8": { "pass@1": 0.38 }, - "0.9": { "pass@1": 0.31 }, - "1.0": { "pass@1": 0.23 } + "0.0": { "pass@1": 0.61 }, + "0.1": { "pass@1": 0.25 }, + "0.2": { "pass@1": 0.18 }, + "0.3": { "pass@1": 0.14 }, + "0.4": { "pass@1": 0.14 }, + "0.5": { "pass@1": 0.14 }, + "0.6": { "pass@1": 0.11 }, + "0.7": { "pass@1": 0.1 }, + "0.8": { "pass@1": 0.1 }, + "0.9": { "pass@1": 0.1 }, + "1.0": { "pass@1": 0.07 } }, "cpp": { - "0.0": { "pass@1": 0.79 }, - "0.1": { "pass@1": 0.68 }, - "0.2": { "pass@1": 0.64 }, - "0.3": { "pass@1": 0.61 }, - "0.4": { "pass@1": 0.61 }, - "0.5": { "pass@1": 0.57 }, - "0.6": { "pass@1": 0.56 }, - "0.7": { "pass@1": 0.53 }, - "0.8": { "pass@1": 0.5 }, - "0.9": { "pass@1": 0.44 }, - "1.0": { "pass@1": 0.31 } + "0.0": { "pass@1": 0.43 }, + "0.1": { "pass@1": 0.21 }, + "0.2": { "pass@1": 0.13 }, + "0.3": { "pass@1": 0.12 }, + "0.4": { "pass@1": 0.11 }, + "0.5": { "pass@1": 0.1 }, + "0.6": { "pass@1": 0.1 }, + "0.7": { "pass@1": 0.1 }, + "0.8": { "pass@1": 0.09 }, + "0.9": { "pass@1": 0.09 }, + "1.0": { "pass@1": 0.09 } }, "java": { - "0.0": { "pass@1": 0.87 }, - "0.1": { "pass@1": 0.8 }, - "0.2": { "pass@1": 0.71 }, - "0.3": { "pass@1": 0.66 }, - "0.4": { "pass@1": 0.65 }, - "0.5": { "pass@1": 0.63 }, - "0.6": { "pass@1": 0.57 }, - "0.7": { "pass@1": 0.54 }, - "0.8": { "pass@1": 0.45 }, - "0.9": { "pass@1": 0.43 }, - "1.0": { "pass@1": 0.34 } - }, - "typescript": { - "0.0": { "pass@1": 0.93 }, - "0.1": { "pass@1": 0.91 }, - "0.2": { "pass@1": 0.87 }, - "0.3": { "pass@1": 0.87 }, - "0.4": { "pass@1": 0.81 }, - "0.5": { "pass@1": 0.76 }, - "0.6": { "pass@1": 0.72 }, - "0.7": { "pass@1": 0.67 }, - "0.8": { "pass@1": 0.6 }, - "0.9": { "pass@1": 0.52 }, - "1.0": { "pass@1": 0.25 } + "0.0": { "pass@1": 0.64 }, + "0.1": { "pass@1": 0.39 }, + "0.2": { "pass@1": 0.3 }, + "0.3": { "pass@1": 0.22 }, + "0.4": { "pass@1": 0.2 }, + "0.5": { "pass@1": 0.17 }, + "0.6": { "pass@1": 0.16 }, + "0.7": { "pass@1": 0.13 }, + "0.8": { "pass@1": 0.11 }, + "0.9": { "pass@1": 0.08 }, + "1.0": { "pass@1": 0.05 } }, "rust": { - "0.0": { "pass@1": 0.78 }, - "0.1": { "pass@1": 0.67 }, - "0.2": { "pass@1": 0.62 }, - "0.3": { "pass@1": 0.58 }, - "0.4": { "pass@1": 0.52 }, - "0.5": { "pass@1": 0.5 }, - "0.6": { "pass@1": 0.47 }, - "0.7": { "pass@1": 0.46 }, - "0.8": { "pass@1": 0.44 }, - "0.9": { "pass@1": 0.42 }, - "1.0": { "pass@1": 0.34 } + "0.0": { "pass@1": 0.54 }, + "0.1": { "pass@1": 0.21 }, + "0.2": { "pass@1": 0.19 }, + "0.3": { "pass@1": 0.16 }, + "0.4": { "pass@1": 0.16 }, + "0.5": { "pass@1": 0.15 }, + "0.6": { "pass@1": 0.14 }, + "0.7": { "pass@1": 0.12 }, + "0.8": { "pass@1": 0.1 }, + "0.9": { "pass@1": 0.08 }, + "1.0": { "pass@1": 0.08 } + }, + "typescript": { + "0.0": { "pass@1": 0.66 }, + "0.1": { "pass@1": 0.37 }, + "0.2": { "pass@1": 0.27 }, + "0.3": { "pass@1": 0.22 }, + "0.4": { "pass@1": 0.2 }, + "0.5": { "pass@1": 0.18 }, + "0.6": { "pass@1": 0.15 }, + "0.7": { "pass@1": 0.15 }, + "0.8": { "pass@1": 0.15 }, + "0.9": { "pass@1": 0.12 }, + "1.0": { "pass@1": 0.05 } } } }, - "deepseek-ai/DeepSeek-V2-Chat": { - "eval_date": "2024-05-19 13:56:08.965716", - "train_size": "128k", + "codellama/CodeLlama-13b-Instruct-hf": { + "eval_date": "2024-05-13 01:37:59.615784", + "train_size": "16k", "scores": { "all": { - "0.0": { "pass@1": 0.95 }, - "0.1": { "pass@1": 0.912 }, - "0.2": { "pass@1": 0.892 }, - "0.3": { "pass@1": 0.882 }, - "0.4": { "pass@1": 0.876 }, - "0.5": { "pass@1": 0.866 }, - "0.6": { "pass@1": 0.848 }, - "0.7": { "pass@1": 0.838 }, - "0.8": { "pass@1": 0.834 }, - "0.9": { "pass@1": 0.806 }, - "1.0": { "pass@1": 0.718 } - }, - "python": { - "0.0": { "pass@1": 0.97 }, - "0.1": { "pass@1": 0.94 }, - "0.2": { "pass@1": 0.93 }, - "0.3": { "pass@1": 0.92 }, - "0.4": { "pass@1": 0.92 }, - "0.5": { "pass@1": 0.92 }, - "0.6": { "pass@1": 0.9 }, - "0.7": { "pass@1": 0.9 }, - "0.8": { "pass@1": 0.9 }, - "0.9": { "pass@1": 0.86 }, - "1.0": { "pass@1": 0.84 } - }, - "cpp": { - "0.0": { "pass@1": 0.9 }, - "0.1": { "pass@1": 0.85 }, - "0.2": { "pass@1": 0.83 }, - "0.3": { "pass@1": 0.83 }, - "0.4": { "pass@1": 0.82 }, - "0.5": { "pass@1": 0.82 }, - "0.6": { "pass@1": 0.79 }, - "0.7": { "pass@1": 0.76 }, - "0.8": { "pass@1": 0.76 }, - "0.9": { "pass@1": 0.75 }, - "1.0": { "pass@1": 0.69 } - }, - "java": { - "0.0": { "pass@1": 0.97 }, - "0.1": { "pass@1": 0.96 }, - "0.2": { "pass@1": 0.93 }, - "0.3": { "pass@1": 0.92 }, - "0.4": { "pass@1": 0.92 }, - "0.5": { "pass@1": 0.92 }, - "0.6": { "pass@1": 0.91 }, - "0.7": { "pass@1": 0.91 }, - "0.8": { "pass@1": 0.91 }, - "0.9": { "pass@1": 0.89 }, - "1.0": { "pass@1": 0.87 } - }, - "typescript": { - "0.0": { "pass@1": 0.99 }, - "0.1": { "pass@1": 0.96 }, - "0.2": { "pass@1": 0.94 }, - "0.3": { "pass@1": 0.92 }, - "0.4": { "pass@1": 0.9 }, - "0.5": { "pass@1": 0.87 }, - "0.6": { "pass@1": 0.86 }, - "0.7": { "pass@1": 0.85 }, - "0.8": { "pass@1": 0.83 }, - "0.9": { "pass@1": 0.76 }, - "1.0": { "pass@1": 0.44 } - }, - "rust": { - "0.0": { "pass@1": 0.92 }, - "0.1": { "pass@1": 0.85 }, - "0.2": { "pass@1": 0.83 }, - "0.3": { "pass@1": 0.82 }, - "0.4": { "pass@1": 0.82 }, - "0.5": { "pass@1": 0.8 }, - "0.6": { "pass@1": 0.78 }, - "0.7": { "pass@1": 0.77 }, - "0.8": { "pass@1": 0.77 }, - "0.9": { "pass@1": 0.77 }, - "1.0": { "pass@1": 0.75 } - } - } - }, - "meta-llama/Meta-Llama-3-8B-Instruct": { - "eval_date": "2024-05-13 01:39:08.064606", - "train_size": "8k", - "scores": { - "all": { - "0.0": { "pass@1": 0.788 }, - "0.1": { "pass@1": 0.712 }, - "0.2": { "pass@1": 0.688 }, - "0.3": { "pass@1": 0.672 }, - "0.4": { "pass@1": 0.646 }, - "0.5": { "pass@1": 0.63 }, - "0.6": { "pass@1": 0.614 }, - "0.7": { "pass@1": 0.59 }, - "0.8": { "pass@1": 0.536 }, - "0.9": { "pass@1": 0.458 }, - "1.0": { "pass@1": 0.302 } + "0.0": { "pass@1": 0.812 }, + "0.1": { "pass@1": 0.666 }, + "0.2": { "pass@1": 0.602 }, + "0.3": { "pass@1": 0.558 }, + "0.4": { "pass@1": 0.528 }, + "0.5": { "pass@1": 0.502 }, + "0.6": { "pass@1": 0.474 }, + "0.7": { "pass@1": 0.454 }, + "0.8": { "pass@1": 0.426 }, + "0.9": { "pass@1": 0.35 }, + "1.0": { "pass@1": 0.18 } }, "python": { - "0.0": { "pass@1": 0.86 }, + "0.0": { "pass@1": 0.89 }, "0.1": { "pass@1": 0.77 }, - "0.2": { "pass@1": 0.76 }, - "0.3": { "pass@1": 0.74 }, - "0.4": { "pass@1": 0.69 }, - "0.5": { "pass@1": 0.66 }, - "0.6": { "pass@1": 0.66 }, - "0.7": { "pass@1": 0.63 }, - "0.8": { "pass@1": 0.54 }, - "0.9": { "pass@1": 0.48 }, - "1.0": { "pass@1": 0.3 } - }, - "cpp": { - "0.0": { "pass@1": 0.7 }, - "0.1": { "pass@1": 0.65 }, - "0.2": { "pass@1": 0.59 }, - "0.3": { "pass@1": 0.58 }, - "0.4": { "pass@1": 0.57 }, - "0.5": { "pass@1": 0.56 }, + "0.2": { "pass@1": 0.71 }, + "0.3": { "pass@1": 0.67 }, + "0.4": { "pass@1": 0.64 }, + "0.5": { "pass@1": 0.61 }, "0.6": { "pass@1": 0.55 }, "0.7": { "pass@1": 0.51 }, - "0.8": { "pass@1": 0.48 }, - "0.9": { "pass@1": 0.43 }, - "1.0": { "pass@1": 0.34 } + "0.8": { "pass@1": 0.45 }, + "0.9": { "pass@1": 0.37 }, + "1.0": { "pass@1": 0.27 } + }, + "cpp": { + "0.0": { "pass@1": 0.68 }, + "0.1": { "pass@1": 0.49 }, + "0.2": { "pass@1": 0.41 }, + "0.3": { "pass@1": 0.34 }, + "0.4": { "pass@1": 0.34 }, + "0.5": { "pass@1": 0.34 }, + "0.6": { "pass@1": 0.32 }, + "0.7": { "pass@1": 0.31 }, + "0.8": { "pass@1": 0.3 }, + "0.9": { "pass@1": 0.23 }, + "1.0": { "pass@1": 0.13 } }, "java": { - "0.0": { "pass@1": 0.77 }, + "0.0": { "pass@1": 0.89 }, "0.1": { "pass@1": 0.66 }, - "0.2": { "pass@1": 0.64 }, - "0.3": { "pass@1": 0.63 }, - "0.4": { "pass@1": 0.63 }, - "0.5": { "pass@1": 0.61 }, - "0.6": { "pass@1": 0.6 }, - "0.7": { "pass@1": 0.58 }, - "0.8": { "pass@1": 0.53 }, - "0.9": { "pass@1": 0.49 }, - "1.0": { "pass@1": 0.38 } + "0.2": { "pass@1": 0.6 }, + "0.3": { "pass@1": 0.57 }, + "0.4": { "pass@1": 0.55 }, + "0.5": { "pass@1": 0.54 }, + "0.6": { "pass@1": 0.53 }, + "0.7": { "pass@1": 0.52 }, + "0.8": { "pass@1": 0.5 }, + "0.9": { "pass@1": 0.41 }, + "1.0": { "pass@1": 0.14 } }, "typescript": { "0.0": { "pass@1": 0.9 }, - "0.1": { "pass@1": 0.86 }, - "0.2": { "pass@1": 0.84 }, - "0.3": { "pass@1": 0.81 }, - "0.4": { "pass@1": 0.76 }, - "0.5": { "pass@1": 0.75 }, - "0.6": { "pass@1": 0.71 }, - "0.7": { "pass@1": 0.69 }, - "0.8": { "pass@1": 0.62 }, - "0.9": { "pass@1": 0.45 }, - "1.0": { "pass@1": 0.16 } + "0.1": { "pass@1": 0.84 }, + "0.2": { "pass@1": 0.78 }, + "0.3": { "pass@1": 0.73 }, + "0.4": { "pass@1": 0.7 }, + "0.5": { "pass@1": 0.66 }, + "0.6": { "pass@1": 0.62 }, + "0.7": { "pass@1": 0.6 }, + "0.8": { "pass@1": 0.57 }, + "0.9": { "pass@1": 0.46 }, + "1.0": { "pass@1": 0.21 } }, "rust": { - "0.0": { "pass@1": 0.71 }, - "0.1": { "pass@1": 0.62 }, - "0.2": { "pass@1": 0.61 }, - "0.3": { "pass@1": 0.6 }, - "0.4": { "pass@1": 0.58 }, - "0.5": { "pass@1": 0.57 }, - "0.6": { "pass@1": 0.55 }, - "0.7": { "pass@1": 0.54 }, - "0.8": { "pass@1": 0.51 }, - "0.9": { "pass@1": 0.44 }, - "1.0": { "pass@1": 0.33 } + "0.0": { "pass@1": 0.7 }, + "0.1": { "pass@1": 0.57 }, + "0.2": { "pass@1": 0.51 }, + "0.3": { "pass@1": 0.48 }, + "0.4": { "pass@1": 0.41 }, + "0.5": { "pass@1": 0.36 }, + "0.6": { "pass@1": 0.35 }, + "0.7": { "pass@1": 0.33 }, + "0.8": { "pass@1": 0.31 }, + "0.9": { "pass@1": 0.28 }, + "1.0": { "pass@1": 0.15 } } } }, - "Qwen/Qwen1.5-14B-Chat": { - "eval_date": "2024-05-13 01:39:46.042567", + "mistralai/Mixtral-8x7B-Instruct-v0.1": { + "eval_date": "2024-05-13 01:39:35.376129", "train_size": "32k", "scores": { "all": { - "0.0": { "pass@1": 0.734 }, - "0.1": { "pass@1": 0.616 }, - "0.2": { "pass@1": 0.53 }, - "0.3": { "pass@1": 0.454 }, - "0.4": { "pass@1": 0.384 }, - "0.5": { "pass@1": 0.342 }, - "0.6": { "pass@1": 0.314 }, - "0.7": { "pass@1": 0.28 }, - "0.8": { "pass@1": 0.26 }, - "0.9": { "pass@1": 0.212 }, - "1.0": { "pass@1": 0.15 } + "0.0": { "pass@1": 0.904 }, + "0.1": { "pass@1": 0.834 }, + "0.2": { "pass@1": 0.802 }, + "0.3": { "pass@1": 0.766 }, + "0.4": { "pass@1": 0.762 }, + "0.5": { "pass@1": 0.75 }, + "0.6": { "pass@1": 0.726 }, + "0.7": { "pass@1": 0.702 }, + "0.8": { "pass@1": 0.68 }, + "0.9": { "pass@1": 0.626 }, + "1.0": { "pass@1": 0.52 } }, "python": { - "0.0": { "pass@1": 0.64 }, - "0.1": { "pass@1": 0.52 }, - "0.2": { "pass@1": 0.36 }, - "0.3": { "pass@1": 0.22 }, - "0.4": { "pass@1": 0.1 }, - "0.5": { "pass@1": 0.1 }, - "0.6": { "pass@1": 0.09 }, - "0.7": { "pass@1": 0.04 }, - "0.8": { "pass@1": 0.04 }, - "0.9": { "pass@1": 0.04 }, - "1.0": { "pass@1": 0.04 } + "0.0": { "pass@1": 0.89 }, + "0.1": { "pass@1": 0.82 }, + "0.2": { "pass@1": 0.81 }, + "0.3": { "pass@1": 0.76 }, + "0.4": { "pass@1": 0.74 }, + "0.5": { "pass@1": 0.74 }, + "0.6": { "pass@1": 0.71 }, + "0.7": { "pass@1": 0.68 }, + "0.8": { "pass@1": 0.66 }, + "0.9": { "pass@1": 0.62 }, + "1.0": { "pass@1": 0.57 } }, "cpp": { - "0.0": { "pass@1": 0.69 }, - "0.1": { "pass@1": 0.59 }, - "0.2": { "pass@1": 0.54 }, - "0.3": { "pass@1": 0.51 }, - "0.4": { "pass@1": 0.48 }, - "0.5": { "pass@1": 0.41 }, - "0.6": { "pass@1": 0.36 }, - "0.7": { "pass@1": 0.32 }, - "0.8": { "pass@1": 0.3 }, - "0.9": { "pass@1": 0.25 }, - "1.0": { "pass@1": 0.17 } + "0.0": { "pass@1": 0.87 }, + "0.1": { "pass@1": 0.77 }, + "0.2": { "pass@1": 0.74 }, + "0.3": { "pass@1": 0.72 }, + "0.4": { "pass@1": 0.72 }, + "0.5": { "pass@1": 0.71 }, + "0.6": { "pass@1": 0.68 }, + "0.7": { "pass@1": 0.67 }, + "0.8": { "pass@1": 0.65 }, + "0.9": { "pass@1": 0.58 }, + "1.0": { "pass@1": 0.47 } }, "java": { - "0.0": { "pass@1": 0.84 }, - "0.1": { "pass@1": 0.68 }, - "0.2": { "pass@1": 0.61 }, - "0.3": { "pass@1": 0.56 }, - "0.4": { "pass@1": 0.47 }, - "0.5": { "pass@1": 0.43 }, - "0.6": { "pass@1": 0.4 }, - "0.7": { "pass@1": 0.38 }, - "0.8": { "pass@1": 0.36 }, - "0.9": { "pass@1": 0.29 }, - "1.0": { "pass@1": 0.23 } + "0.0": { "pass@1": 0.93 }, + "0.1": { "pass@1": 0.89 }, + "0.2": { "pass@1": 0.82 }, + "0.3": { "pass@1": 0.78 }, + "0.4": { "pass@1": 0.78 }, + "0.5": { "pass@1": 0.77 }, + "0.6": { "pass@1": 0.75 }, + "0.7": { "pass@1": 0.74 }, + "0.8": { "pass@1": 0.71 }, + "0.9": { "pass@1": 0.65 }, + "1.0": { "pass@1": 0.6 } }, "rust": { - "0.0": { "pass@1": 0.75 }, - "0.1": { "pass@1": 0.65 }, - "0.2": { "pass@1": 0.54 }, - "0.3": { "pass@1": 0.44 }, - "0.4": { "pass@1": 0.39 }, - "0.5": { "pass@1": 0.34 }, - "0.6": { "pass@1": 0.32 }, - "0.7": { "pass@1": 0.28 }, - "0.8": { "pass@1": 0.26 }, - "0.9": { "pass@1": 0.24 }, - "1.0": { "pass@1": 0.22 } + "0.0": { "pass@1": 0.87 }, + "0.1": { "pass@1": 0.81 }, + "0.2": { "pass@1": 0.77 }, + "0.3": { "pass@1": 0.72 }, + "0.4": { "pass@1": 0.72 }, + "0.5": { "pass@1": 0.69 }, + "0.6": { "pass@1": 0.67 }, + "0.7": { "pass@1": 0.66 }, + "0.8": { "pass@1": 0.64 }, + "0.9": { "pass@1": 0.62 }, + "1.0": { "pass@1": 0.56 } }, "typescript": { - "0.0": { "pass@1": 0.75 }, - "0.1": { "pass@1": 0.64 }, - "0.2": { "pass@1": 0.6 }, - "0.3": { "pass@1": 0.54 }, - "0.4": { "pass@1": 0.48 }, - "0.5": { "pass@1": 0.43 }, - "0.6": { "pass@1": 0.4 }, - "0.7": { "pass@1": 0.38 }, - "0.8": { "pass@1": 0.34 }, - "0.9": { "pass@1": 0.24 }, - "1.0": { "pass@1": 0.09 } + "0.0": { "pass@1": 0.96 }, + "0.1": { "pass@1": 0.88 }, + "0.2": { "pass@1": 0.87 }, + "0.3": { "pass@1": 0.85 }, + "0.4": { "pass@1": 0.85 }, + "0.5": { "pass@1": 0.84 }, + "0.6": { "pass@1": 0.82 }, + "0.7": { "pass@1": 0.76 }, + "0.8": { "pass@1": 0.74 }, + "0.9": { "pass@1": 0.66 }, + "1.0": { "pass@1": 0.4 } } } }, - "mistralai/Mistral-7B-Instruct-v0.3": { - "eval_date": "2024-05-22 20:31:38.617825", - "train_size": "32k", + "gpt-3.5-turbo-0125": { + "eval_date": "2024-05-13 01:38:45.762205", + "train_size": "16k", "scores": { "all": { - "0.0": { "pass@1": 0.86 }, - "0.1": { "pass@1": 0.758 }, - "0.2": { "pass@1": 0.716 }, - "0.3": { "pass@1": 0.696 }, - "0.4": { "pass@1": 0.686 }, - "0.5": { "pass@1": 0.67 }, - "0.6": { "pass@1": 0.656 }, - "0.7": { "pass@1": 0.64 }, - "0.8": { "pass@1": 0.616 }, - "0.9": { "pass@1": 0.558 }, - "1.0": { "pass@1": 0.438 } + "0.0": { "pass@1": 0.878 }, + "0.1": { "pass@1": 0.79 }, + "0.2": { "pass@1": 0.752 }, + "0.3": { "pass@1": 0.742 }, + "0.4": { "pass@1": 0.73 }, + "0.5": { "pass@1": 0.706 }, + "0.6": { "pass@1": 0.674 }, + "0.7": { "pass@1": 0.642 }, + "0.8": { "pass@1": 0.604 }, + "0.9": { "pass@1": 0.562 }, + "1.0": { "pass@1": 0.47 } }, "python": { - "0.0": { "pass@1": 0.92 }, - "0.1": { "pass@1": 0.75 }, - "0.2": { "pass@1": 0.73 }, - "0.3": { "pass@1": 0.7 }, - "0.4": { "pass@1": 0.68 }, - "0.5": { "pass@1": 0.66 }, - "0.6": { "pass@1": 0.66 }, - "0.7": { "pass@1": 0.64 }, - "0.8": { "pass@1": 0.62 }, - "0.9": { "pass@1": 0.58 }, - "1.0": { "pass@1": 0.51 } + "0.0": { "pass@1": 0.77 }, + "0.1": { "pass@1": 0.59 }, + "0.2": { "pass@1": 0.57 }, + "0.3": { "pass@1": 0.57 }, + "0.4": { "pass@1": 0.57 }, + "0.5": { "pass@1": 0.53 }, + "0.6": { "pass@1": 0.52 }, + "0.7": { "pass@1": 0.46 }, + "0.8": { "pass@1": 0.43 }, + "0.9": { "pass@1": 0.41 }, + "1.0": { "pass@1": 0.35 } }, "cpp": { - "0.0": { "pass@1": 0.78 }, - "0.1": { "pass@1": 0.65 }, - "0.2": { "pass@1": 0.62 }, - "0.3": { "pass@1": 0.61 }, - "0.4": { "pass@1": 0.61 }, - "0.5": { "pass@1": 0.59 }, - "0.6": { "pass@1": 0.57 }, - "0.7": { "pass@1": 0.56 }, - "0.8": { "pass@1": 0.56 }, - "0.9": { "pass@1": 0.51 }, - "1.0": { "pass@1": 0.41 } + "0.0": { "pass@1": 0.87 }, + "0.1": { "pass@1": 0.8 }, + "0.2": { "pass@1": 0.76 }, + "0.3": { "pass@1": 0.75 }, + "0.4": { "pass@1": 0.75 }, + "0.5": { "pass@1": 0.73 }, + "0.6": { "pass@1": 0.67 }, + "0.7": { "pass@1": 0.65 }, + "0.8": { "pass@1": 0.65 }, + "0.9": { "pass@1": 0.6 }, + "1.0": { "pass@1": 0.5 } }, "java": { - "0.0": { "pass@1": 0.85 }, - "0.1": { "pass@1": 0.77 }, - "0.2": { "pass@1": 0.75 }, - "0.3": { "pass@1": 0.7 }, - "0.4": { "pass@1": 0.69 }, - "0.5": { "pass@1": 0.68 }, - "0.6": { "pass@1": 0.66 }, - "0.7": { "pass@1": 0.62 }, - "0.8": { "pass@1": 0.6 }, - "0.9": { "pass@1": 0.57 }, - "1.0": { "pass@1": 0.52 } + "0.0": { "pass@1": 0.95 }, + "0.1": { "pass@1": 0.9 }, + "0.2": { "pass@1": 0.86 }, + "0.3": { "pass@1": 0.84 }, + "0.4": { "pass@1": 0.83 }, + "0.5": { "pass@1": 0.82 }, + "0.6": { "pass@1": 0.8 }, + "0.7": { "pass@1": 0.79 }, + "0.8": { "pass@1": 0.76 }, + "0.9": { "pass@1": 0.75 }, + "1.0": { "pass@1": 0.72 } }, "typescript": { - "0.0": { "pass@1": 0.98 }, - "0.1": { "pass@1": 0.94 }, - "0.2": { "pass@1": 0.89 }, - "0.3": { "pass@1": 0.89 }, - "0.4": { "pass@1": 0.89 }, - "0.5": { "pass@1": 0.87 }, - "0.6": { "pass@1": 0.86 }, - "0.7": { "pass@1": 0.85 }, - "0.8": { "pass@1": 0.8 }, - "0.9": { "pass@1": 0.67 }, - "1.0": { "pass@1": 0.35 } + "0.0": { "pass@1": 0.94 }, + "0.1": { "pass@1": 0.9 }, + "0.2": { "pass@1": 0.85 }, + "0.3": { "pass@1": 0.83 }, + "0.4": { "pass@1": 0.82 }, + "0.5": { "pass@1": 0.78 }, + "0.6": { "pass@1": 0.72 }, + "0.7": { "pass@1": 0.67 }, + "0.8": { "pass@1": 0.58 }, + "0.9": { "pass@1": 0.48 }, + "1.0": { "pass@1": 0.26 } }, "rust": { - "0.0": { "pass@1": 0.77 }, - "0.1": { "pass@1": 0.68 }, - "0.2": { "pass@1": 0.59 }, - "0.3": { "pass@1": 0.58 }, - "0.4": { "pass@1": 0.56 }, - "0.5": { "pass@1": 0.55 }, - "0.6": { "pass@1": 0.53 }, - "0.7": { "pass@1": 0.53 }, - "0.8": { "pass@1": 0.5 }, - "0.9": { "pass@1": 0.46 }, - "1.0": { "pass@1": 0.4 } + "0.0": { "pass@1": 0.86 }, + "0.1": { "pass@1": 0.76 }, + "0.2": { "pass@1": 0.72 }, + "0.3": { "pass@1": 0.72 }, + "0.4": { "pass@1": 0.68 }, + "0.5": { "pass@1": 0.67 }, + "0.6": { "pass@1": 0.66 }, + "0.7": { "pass@1": 0.64 }, + "0.8": { "pass@1": 0.6 }, + "0.9": { "pass@1": 0.57 }, + "1.0": { "pass@1": 0.52 } } } }, - "deepseek-ai/deepseek-coder-6.7b-instruct": { - "eval_date": "2024-05-13 01:38:29.998744", - "train_size": "16k", + "microsoft/Phi-3-medium-128k-instruct": { + "eval_date": "2024-05-22 22:30:02.520421", + "train_size": "128k", "scores": { "all": { - "0.0": { "pass@1": 0.27 }, - "0.1": { "pass@1": 0.144 }, - "0.2": { "pass@1": 0.132 }, - "0.3": { "pass@1": 0.122 }, - "0.4": { "pass@1": 0.118 }, - "0.5": { "pass@1": 0.112 }, - "0.6": { "pass@1": 0.112 }, - "0.7": { "pass@1": 0.11 }, - "0.8": { "pass@1": 0.106 }, - "0.9": { "pass@1": 0.098 }, - "1.0": { "pass@1": 0.074 } + "0.0": { "pass@1": 0.88 }, + "0.1": { "pass@1": 0.804 }, + "0.2": { "pass@1": 0.76 }, + "0.3": { "pass@1": 0.736 }, + "0.4": { "pass@1": 0.726 }, + "0.5": { "pass@1": 0.708 }, + "0.6": { "pass@1": 0.678 }, + "0.7": { "pass@1": 0.662 }, + "0.8": { "pass@1": 0.632 }, + "0.9": { "pass@1": 0.562 }, + "1.0": { "pass@1": 0.412 } }, "python": { - "0.0": { "pass@1": 0.39 }, - "0.1": { "pass@1": 0.22 }, - "0.2": { "pass@1": 0.19 }, - "0.3": { "pass@1": 0.15 }, - "0.4": { "pass@1": 0.14 }, - "0.5": { "pass@1": 0.13 }, - "0.6": { "pass@1": 0.13 }, - "0.7": { "pass@1": 0.12 }, - "0.8": { "pass@1": 0.11 }, - "0.9": { "pass@1": 0.1 }, - "1.0": { "pass@1": 0.08 } - }, - "cpp": { - "0.0": { "pass@1": 0.36 }, - "0.1": { "pass@1": 0.25 }, - "0.2": { "pass@1": 0.23 }, - "0.3": { "pass@1": 0.23 }, - "0.4": { "pass@1": 0.23 }, - "0.5": { "pass@1": 0.22 }, - "0.6": { "pass@1": 0.22 }, - "0.7": { "pass@1": 0.22 }, - "0.8": { "pass@1": 0.21 }, - "0.9": { "pass@1": 0.19 }, - "1.0": { "pass@1": 0.15 } - }, - "java": { - "0.0": { "pass@1": 0.22 }, - "0.1": { "pass@1": 0.05 }, - "0.2": { "pass@1": 0.04 }, - "0.3": { "pass@1": 0.03 }, - "0.4": { "pass@1": 0.03 }, - "0.5": { "pass@1": 0.03 }, - "0.6": { "pass@1": 0.03 }, - "0.7": { "pass@1": 0.03 }, - "0.8": { "pass@1": 0.03 }, - "0.9": { "pass@1": 0.03 }, - "1.0": { "pass@1": 0.03 } - }, - "rust": { - "0.0": { "pass@1": 0.07 }, - "0.1": { "pass@1": 0.03 }, - "0.2": { "pass@1": 0.03 }, - "0.3": { "pass@1": 0.03 }, - "0.4": { "pass@1": 0.03 }, - "0.5": { "pass@1": 0.02 }, - "0.6": { "pass@1": 0.02 }, - "0.7": { "pass@1": 0.02 }, - "0.8": { "pass@1": 0.02 }, - "0.9": { "pass@1": 0.02 }, - "1.0": { "pass@1": 0.01 } - }, - "typescript": { - "0.0": { "pass@1": 0.31 }, - "0.1": { "pass@1": 0.17 }, - "0.2": { "pass@1": 0.17 }, - "0.3": { "pass@1": 0.17 }, - "0.4": { "pass@1": 0.16 }, - "0.5": { "pass@1": 0.16 }, - "0.6": { "pass@1": 0.16 }, - "0.7": { "pass@1": 0.16 }, - "0.8": { "pass@1": 0.16 }, - "0.9": { "pass@1": 0.15 }, - "1.0": { "pass@1": 0.1 } - } - } - }, - "CohereForAI/c4ai-command-r-plus": { - "eval_date": "2024-05-13 01:38:17.550466", - "train_size": "128k", - "scores": { - "all": { - "0.0": { "pass@1": 0.93 }, - "0.1": { "pass@1": 0.874 }, - "0.2": { "pass@1": 0.862 }, - "0.3": { "pass@1": 0.846 }, - "0.4": { "pass@1": 0.838 }, - "0.5": { "pass@1": 0.828 }, - "0.6": { "pass@1": 0.808 }, - "0.7": { "pass@1": 0.796 }, - "0.8": { "pass@1": 0.784 }, - "0.9": { "pass@1": 0.75 }, - "1.0": { "pass@1": 0.624 } - }, - "python": { - "0.0": { "pass@1": 0.94 }, - "0.1": { "pass@1": 0.86 }, - "0.2": { "pass@1": 0.85 }, - "0.3": { "pass@1": 0.84 }, - "0.4": { "pass@1": 0.84 }, - "0.5": { "pass@1": 0.84 }, - "0.6": { "pass@1": 0.82 }, - "0.7": { "pass@1": 0.82 }, - "0.8": { "pass@1": 0.81 }, - "0.9": { "pass@1": 0.79 }, - "1.0": { "pass@1": 0.73 } + "0.0": { "pass@1": 0.95 }, + "0.1": { "pass@1": 0.82 }, + "0.2": { "pass@1": 0.79 }, + "0.3": { "pass@1": 0.75 }, + "0.4": { "pass@1": 0.75 }, + "0.5": { "pass@1": 0.71 }, + "0.6": { "pass@1": 0.64 }, + "0.7": { "pass@1": 0.61 }, + "0.8": { "pass@1": 0.57 }, + "0.9": { "pass@1": 0.51 }, + "1.0": { "pass@1": 0.4 } }, "cpp": { - "0.0": { "pass@1": 0.86 }, - "0.1": { "pass@1": 0.8 }, - "0.2": { "pass@1": 0.79 }, - "0.3": { "pass@1": 0.77 }, - "0.4": { "pass@1": 0.77 }, - "0.5": { "pass@1": 0.77 }, - "0.6": { "pass@1": 0.75 }, - "0.7": { "pass@1": 0.74 }, - "0.8": { "pass@1": 0.74 }, - "0.9": { "pass@1": 0.71 }, - "1.0": { "pass@1": 0.61 } + "0.0": { "pass@1": 0.81 }, + "0.1": { "pass@1": 0.72 }, + "0.2": { "pass@1": 0.68 }, + "0.3": { "pass@1": 0.66 }, + "0.4": { "pass@1": 0.64 }, + "0.5": { "pass@1": 0.64 }, + "0.6": { "pass@1": 0.61 }, + "0.7": { "pass@1": 0.58 }, + "0.8": { "pass@1": 0.54 }, + "0.9": { "pass@1": 0.51 }, + "1.0": { "pass@1": 0.37 } }, "java": { - "0.0": { "pass@1": 0.96 }, - "0.1": { "pass@1": 0.93 }, - "0.2": { "pass@1": 0.9 }, - "0.3": { "pass@1": 0.89 }, - "0.4": { "pass@1": 0.88 }, - "0.5": { "pass@1": 0.87 }, - "0.6": { "pass@1": 0.87 }, - "0.7": { "pass@1": 0.87 }, - "0.8": { "pass@1": 0.84 }, - "0.9": { "pass@1": 0.79 }, - "1.0": { "pass@1": 0.74 } + "0.0": { "pass@1": 0.85 }, + "0.1": { "pass@1": 0.8 }, + "0.2": { "pass@1": 0.75 }, + "0.3": { "pass@1": 0.72 }, + "0.4": { "pass@1": 0.72 }, + "0.5": { "pass@1": 0.71 }, + "0.6": { "pass@1": 0.7 }, + "0.7": { "pass@1": 0.69 }, + "0.8": { "pass@1": 0.69 }, + "0.9": { "pass@1": 0.59 }, + "1.0": { "pass@1": 0.5 } }, "typescript": { - "0.0": { "pass@1": 1.0 }, - "0.1": { "pass@1": 0.96 }, - "0.2": { "pass@1": 0.95 }, - "0.3": { "pass@1": 0.92 }, - "0.4": { "pass@1": 0.9 }, - "0.5": { "pass@1": 0.87 }, - "0.6": { "pass@1": 0.83 }, - "0.7": { "pass@1": 0.78 }, - "0.8": { "pass@1": 0.77 }, - "0.9": { "pass@1": 0.71 }, - "1.0": { "pass@1": 0.36 } + "0.0": { "pass@1": 0.96 }, + "0.1": { "pass@1": 0.93 }, + "0.2": { "pass@1": 0.88 }, + "0.3": { "pass@1": 0.86 }, + "0.4": { "pass@1": 0.84 }, + "0.5": { "pass@1": 0.81 }, + "0.6": { "pass@1": 0.79 }, + "0.7": { "pass@1": 0.79 }, + "0.8": { "pass@1": 0.74 }, + "0.9": { "pass@1": 0.64 }, + "1.0": { "pass@1": 0.3 } }, "rust": { - "0.0": { "pass@1": 0.89 }, - "0.1": { "pass@1": 0.82 }, - "0.2": { "pass@1": 0.82 }, - "0.3": { "pass@1": 0.81 }, - "0.4": { "pass@1": 0.8 }, - "0.5": { "pass@1": 0.79 }, - "0.6": { "pass@1": 0.77 }, - "0.7": { "pass@1": 0.77 }, - "0.8": { "pass@1": 0.76 }, - "0.9": { "pass@1": 0.75 }, - "1.0": { "pass@1": 0.68 } + "0.0": { "pass@1": 0.83 }, + "0.1": { "pass@1": 0.75 }, + "0.2": { "pass@1": 0.7 }, + "0.3": { "pass@1": 0.69 }, + "0.4": { "pass@1": 0.68 }, + "0.5": { "pass@1": 0.67 }, + "0.6": { "pass@1": 0.65 }, + "0.7": { "pass@1": 0.64 }, + "0.8": { "pass@1": 0.62 }, + "0.9": { "pass@1": 0.56 }, + "1.0": { "pass@1": 0.49 } } } }, - "gemini-1.5-pro-latest": { - "eval_date": "2024-05-19 07:16:46.769202", - "train_size": "1000k", + "claude-3-sonnet-20240229": { + "eval_date": "2024-05-13 01:37:53.983726", + "train_size": "200k", "scores": { "all": { - "0.0": { "pass@1": 0.942 }, - "0.1": { "pass@1": 0.93 }, - "0.2": { "pass@1": 0.924 }, - "0.3": { "pass@1": 0.92 }, - "0.4": { "pass@1": 0.918 }, - "0.5": { "pass@1": 0.918 }, - "0.6": { "pass@1": 0.91 }, - "0.7": { "pass@1": 0.906 }, - "0.8": { "pass@1": 0.906 }, - "0.9": { "pass@1": 0.894 }, - "1.0": { "pass@1": 0.814 } + "0.0": { "pass@1": 0.95 }, + "0.1": { "pass@1": 0.92 }, + "0.2": { "pass@1": 0.908 }, + "0.3": { "pass@1": 0.9 }, + "0.4": { "pass@1": 0.9 }, + "0.5": { "pass@1": 0.898 }, + "0.6": { "pass@1": 0.882 }, + "0.7": { "pass@1": 0.88 }, + "0.8": { "pass@1": 0.874 }, + "0.9": { "pass@1": 0.86 }, + "1.0": { "pass@1": 0.774 } }, "python": { - "0.0": { "pass@1": 0.94 }, + "0.0": { "pass@1": 0.98 }, "0.1": { "pass@1": 0.93 }, "0.2": { "pass@1": 0.93 }, "0.3": { "pass@1": 0.93 }, - "0.4": { "pass@1": 0.92 }, + "0.4": { "pass@1": 0.93 }, "0.5": { "pass@1": 0.92 }, - "0.6": { "pass@1": 0.92 }, - "0.7": { "pass@1": 0.91 }, - "0.8": { "pass@1": 0.91 }, - "0.9": { "pass@1": 0.91 }, - "1.0": { "pass@1": 0.9 } + "0.6": { "pass@1": 0.9 }, + "0.7": { "pass@1": 0.9 }, + "0.8": { "pass@1": 0.88 }, + "0.9": { "pass@1": 0.87 }, + "1.0": { "pass@1": 0.87 } }, "cpp": { - "0.0": { "pass@1": 0.87 }, - "0.1": { "pass@1": 0.86 }, - "0.2": { "pass@1": 0.84 }, - "0.3": { "pass@1": 0.84 }, - "0.4": { "pass@1": 0.84 }, - "0.5": { "pass@1": 0.84 }, - "0.6": { "pass@1": 0.82 }, + "0.0": { "pass@1": 0.89 }, + "0.1": { "pass@1": 0.83 }, + "0.2": { "pass@1": 0.82 }, + "0.3": { "pass@1": 0.82 }, + "0.4": { "pass@1": 0.82 }, + "0.5": { "pass@1": 0.82 }, + "0.6": { "pass@1": 0.81 }, "0.7": { "pass@1": 0.81 }, "0.8": { "pass@1": 0.81 }, "0.9": { "pass@1": 0.8 }, - "1.0": { "pass@1": 0.77 } + "1.0": { "pass@1": 0.74 } }, "java": { - "0.0": { "pass@1": 0.97 }, - "0.1": { "pass@1": 0.96 }, - "0.2": { "pass@1": 0.95 }, - "0.3": { "pass@1": 0.95 }, - "0.4": { "pass@1": 0.95 }, - "0.5": { "pass@1": 0.95 }, - "0.6": { "pass@1": 0.94 }, - "0.7": { "pass@1": 0.94 }, - "0.8": { "pass@1": 0.94 }, - "0.9": { "pass@1": 0.94 }, - "1.0": { "pass@1": 0.94 } + "0.0": { "pass@1": 0.96 }, + "0.1": { "pass@1": 0.94 }, + "0.2": { "pass@1": 0.94 }, + "0.3": { "pass@1": 0.93 }, + "0.4": { "pass@1": 0.93 }, + "0.5": { "pass@1": 0.93 }, + "0.6": { "pass@1": 0.92 }, + "0.7": { "pass@1": 0.92 }, + "0.8": { "pass@1": 0.92 }, + "0.9": { "pass@1": 0.9 }, + "1.0": { "pass@1": 0.9 } }, "typescript": { - "0.0": { "pass@1": 0.96 }, - "0.1": { "pass@1": 0.96 }, + "0.0": { "pass@1": 0.99 }, + "0.1": { "pass@1": 0.99 }, "0.2": { "pass@1": 0.96 }, - "0.3": { "pass@1": 0.96 }, - "0.4": { "pass@1": 0.96 }, - "0.5": { "pass@1": 0.96 }, - "0.6": { "pass@1": 0.96 }, - "0.7": { "pass@1": 0.96 }, - "0.8": { "pass@1": 0.96 }, - "0.9": { "pass@1": 0.92 }, - "1.0": { "pass@1": 0.56 } + "0.3": { "pass@1": 0.94 }, + "0.4": { "pass@1": 0.94 }, + "0.5": { "pass@1": 0.94 }, + "0.6": { "pass@1": 0.92 }, + "0.7": { "pass@1": 0.92 }, + "0.8": { "pass@1": 0.91 }, + "0.9": { "pass@1": 0.88 }, + "1.0": { "pass@1": 0.52 } }, "rust": { - "0.0": { "pass@1": 0.97 }, - "0.1": { "pass@1": 0.94 }, - "0.2": { "pass@1": 0.94 }, - "0.3": { "pass@1": 0.92 }, - "0.4": { "pass@1": 0.92 }, - "0.5": { "pass@1": 0.92 }, - "0.6": { "pass@1": 0.91 }, - "0.7": { "pass@1": 0.91 }, - "0.8": { "pass@1": 0.91 }, - "0.9": { "pass@1": 0.9 }, - "1.0": { "pass@1": 0.9 } + "0.0": { "pass@1": 0.93 }, + "0.1": { "pass@1": 0.91 }, + "0.2": { "pass@1": 0.89 }, + "0.3": { "pass@1": 0.88 }, + "0.4": { "pass@1": 0.88 }, + "0.5": { "pass@1": 0.88 }, + "0.6": { "pass@1": 0.86 }, + "0.7": { "pass@1": 0.85 }, + "0.8": { "pass@1": 0.85 }, + "0.9": { "pass@1": 0.85 }, + "1.0": { "pass@1": 0.84 } } } }, - "gpt-4o-2024-05-13": { - "eval_date": "2024-05-18 21:37:11.078575", - "train_size": "128k", + "mistralai/Mistral-7B-Instruct-v0.2": { + "eval_date": "2024-05-13 01:30:36.208632", + "train_size": "32k", "scores": { "all": { - "0.0": { "pass@1": 0.952 }, - "0.1": { "pass@1": 0.93 }, - "0.2": { "pass@1": 0.922 }, - "0.3": { "pass@1": 0.92 }, - "0.4": { "pass@1": 0.92 }, - "0.5": { "pass@1": 0.92 }, - "0.6": { "pass@1": 0.908 }, - "0.7": { "pass@1": 0.908 }, - "0.8": { "pass@1": 0.906 }, - "0.9": { "pass@1": 0.894 }, - "1.0": { "pass@1": 0.812 } + "0.0": { "pass@1": 0.852 }, + "0.1": { "pass@1": 0.766 }, + "0.2": { "pass@1": 0.708 }, + "0.3": { "pass@1": 0.674 }, + "0.4": { "pass@1": 0.638 }, + "0.5": { "pass@1": 0.598 }, + "0.6": { "pass@1": 0.56 }, + "0.7": { "pass@1": 0.526 }, + "0.8": { "pass@1": 0.474 }, + "0.9": { "pass@1": 0.424 }, + "1.0": { "pass@1": 0.294 } }, "python": { - "0.0": { "pass@1": 0.96 }, - "0.1": { "pass@1": 0.96 }, - "0.2": { "pass@1": 0.96 }, - "0.3": { "pass@1": 0.96 }, - "0.4": { "pass@1": 0.96 }, - "0.5": { "pass@1": 0.96 }, - "0.6": { "pass@1": 0.95 }, - "0.7": { "pass@1": 0.95 }, - "0.8": { "pass@1": 0.95 }, - "0.9": { "pass@1": 0.94 }, - "1.0": { "pass@1": 0.94 } + "0.0": { "pass@1": 0.89 }, + "0.1": { "pass@1": 0.77 }, + "0.2": { "pass@1": 0.7 }, + "0.3": { "pass@1": 0.65 }, + "0.4": { "pass@1": 0.6 }, + "0.5": { "pass@1": 0.53 }, + "0.6": { "pass@1": 0.48 }, + "0.7": { "pass@1": 0.43 }, + "0.8": { "pass@1": 0.38 }, + "0.9": { "pass@1": 0.31 }, + "1.0": { "pass@1": 0.23 } }, "cpp": { - "0.0": { "pass@1": 0.89 }, - "0.1": { "pass@1": 0.85 }, - "0.2": { "pass@1": 0.83 }, - "0.3": { "pass@1": 0.83 }, - "0.4": { "pass@1": 0.83 }, - "0.5": { "pass@1": 0.83 }, - "0.6": { "pass@1": 0.8 }, - "0.7": { "pass@1": 0.8 }, - "0.8": { "pass@1": 0.8 }, - "0.9": { "pass@1": 0.79 }, - "1.0": { "pass@1": 0.75 } + "0.0": { "pass@1": 0.79 }, + "0.1": { "pass@1": 0.68 }, + "0.2": { "pass@1": 0.64 }, + "0.3": { "pass@1": 0.61 }, + "0.4": { "pass@1": 0.61 }, + "0.5": { "pass@1": 0.57 }, + "0.6": { "pass@1": 0.56 }, + "0.7": { "pass@1": 0.53 }, + "0.8": { "pass@1": 0.5 }, + "0.9": { "pass@1": 0.44 }, + "1.0": { "pass@1": 0.31 } }, "java": { - "0.0": { "pass@1": 0.98 }, - "0.1": { "pass@1": 0.98 }, - "0.2": { "pass@1": 0.97 }, - "0.3": { "pass@1": 0.97 }, - "0.4": { "pass@1": 0.97 }, - "0.5": { "pass@1": 0.97 }, - "0.6": { "pass@1": 0.96 }, - "0.7": { "pass@1": 0.96 }, - "0.8": { "pass@1": 0.96 }, - "0.9": { "pass@1": 0.96 }, - "1.0": { "pass@1": 0.96 } + "0.0": { "pass@1": 0.87 }, + "0.1": { "pass@1": 0.8 }, + "0.2": { "pass@1": 0.71 }, + "0.3": { "pass@1": 0.66 }, + "0.4": { "pass@1": 0.65 }, + "0.5": { "pass@1": 0.63 }, + "0.6": { "pass@1": 0.57 }, + "0.7": { "pass@1": 0.54 }, + "0.8": { "pass@1": 0.45 }, + "0.9": { "pass@1": 0.43 }, + "1.0": { "pass@1": 0.34 } }, "typescript": { - "0.0": { "pass@1": 1.0 }, - "0.1": { "pass@1": 0.98 }, - "0.2": { "pass@1": 0.98 }, - "0.3": { "pass@1": 0.98 }, - "0.4": { "pass@1": 0.98 }, - "0.5": { "pass@1": 0.98 }, - "0.6": { "pass@1": 0.98 }, - "0.7": { "pass@1": 0.98 }, - "0.8": { "pass@1": 0.97 }, - "0.9": { "pass@1": 0.93 }, - "1.0": { "pass@1": 0.56 } - }, - "rust": { "0.0": { "pass@1": 0.93 }, - "0.1": { "pass@1": 0.88 }, + "0.1": { "pass@1": 0.91 }, "0.2": { "pass@1": 0.87 }, - "0.3": { "pass@1": 0.86 }, - "0.4": { "pass@1": 0.86 }, - "0.5": { "pass@1": 0.86 }, - "0.6": { "pass@1": 0.85 }, - "0.7": { "pass@1": 0.85 }, - "0.8": { "pass@1": 0.85 }, - "0.9": { "pass@1": 0.85 }, - "1.0": { "pass@1": 0.85 } + "0.3": { "pass@1": 0.87 }, + "0.4": { "pass@1": 0.81 }, + "0.5": { "pass@1": 0.76 }, + "0.6": { "pass@1": 0.72 }, + "0.7": { "pass@1": 0.67 }, + "0.8": { "pass@1": 0.6 }, + "0.9": { "pass@1": 0.52 }, + "1.0": { "pass@1": 0.25 } + }, + "rust": { + "0.0": { "pass@1": 0.78 }, + "0.1": { "pass@1": 0.67 }, + "0.2": { "pass@1": 0.62 }, + "0.3": { "pass@1": 0.58 }, + "0.4": { "pass@1": 0.52 }, + "0.5": { "pass@1": 0.5 }, + "0.6": { "pass@1": 0.47 }, + "0.7": { "pass@1": 0.46 }, + "0.8": { "pass@1": 0.44 }, + "0.9": { "pass@1": 0.42 }, + "1.0": { "pass@1": 0.34 } } } }, - "google/codegemma-7b-it": { - "eval_date": "2024-05-13 01:38:40.273310", - "train_size": "8k", + "microsoft/Phi-3-mini-128k-instruct": { + "eval_date": "2024-05-13 01:26:47.347692", + "train_size": "128k", "scores": { "all": { - "0.0": { "pass@1": 0.074 }, - "0.1": { "pass@1": 0.028 }, - "0.2": { "pass@1": 0.026 }, - "0.3": { "pass@1": 0.024 }, - "0.4": { "pass@1": 0.024 }, - "0.5": { "pass@1": 0.024 }, - "0.6": { "pass@1": 0.022 }, - "0.7": { "pass@1": 0.022 }, - "0.8": { "pass@1": 0.022 }, - "0.9": { "pass@1": 0.018 }, - "1.0": { "pass@1": 0.016 } + "0.0": { "pass@1": 0.744 }, + "0.1": { "pass@1": 0.59 }, + "0.2": { "pass@1": 0.502 }, + "0.3": { "pass@1": 0.454 }, + "0.4": { "pass@1": 0.4 }, + "0.5": { "pass@1": 0.366 }, + "0.6": { "pass@1": 0.324 }, + "0.7": { "pass@1": 0.29 }, + "0.8": { "pass@1": 0.224 }, + "0.9": { "pass@1": 0.164 }, + "1.0": { "pass@1": 0.104 } }, "python": { - "0.0": { "pass@1": 0.07 }, - "0.1": { "pass@1": 0.03 }, - "0.2": { "pass@1": 0.03 }, - "0.3": { "pass@1": 0.03 }, - "0.4": { "pass@1": 0.03 }, - "0.5": { "pass@1": 0.03 }, - "0.6": { "pass@1": 0.03 }, - "0.7": { "pass@1": 0.03 }, - "0.8": { "pass@1": 0.03 }, - "0.9": { "pass@1": 0.03 }, - "1.0": { "pass@1": 0.03 } + "0.0": { "pass@1": 0.81 }, + "0.1": { "pass@1": 0.63 }, + "0.2": { "pass@1": 0.55 }, + "0.3": { "pass@1": 0.48 }, + "0.4": { "pass@1": 0.38 }, + "0.5": { "pass@1": 0.31 }, + "0.6": { "pass@1": 0.25 }, + "0.7": { "pass@1": 0.2 }, + "0.8": { "pass@1": 0.19 }, + "0.9": { "pass@1": 0.15 }, + "1.0": { "pass@1": 0.13 } }, "cpp": { - "0.0": { "pass@1": 0.07 }, - "0.1": { "pass@1": 0.03 }, - "0.2": { "pass@1": 0.03 }, - "0.3": { "pass@1": 0.03 }, - "0.4": { "pass@1": 0.03 }, - "0.5": { "pass@1": 0.03 }, - "0.6": { "pass@1": 0.02 }, - "0.7": { "pass@1": 0.02 }, - "0.8": { "pass@1": 0.02 }, - "0.9": { "pass@1": 0.02 }, - "1.0": { "pass@1": 0.02 } + "0.0": { "pass@1": 0.67 }, + "0.1": { "pass@1": 0.43 }, + "0.2": { "pass@1": 0.41 }, + "0.3": { "pass@1": 0.38 }, + "0.4": { "pass@1": 0.35 }, + "0.5": { "pass@1": 0.34 }, + "0.6": { "pass@1": 0.3 }, + "0.7": { "pass@1": 0.3 }, + "0.8": { "pass@1": 0.25 }, + "0.9": { "pass@1": 0.18 }, + "1.0": { "pass@1": 0.14 } }, "java": { - "0.0": { "pass@1": 0.07 }, - "0.1": { "pass@1": 0.03 }, - "0.2": { "pass@1": 0.02 }, - "0.3": { "pass@1": 0.01 }, - "0.4": { "pass@1": 0.01 }, - "0.5": { "pass@1": 0.01 }, - "0.6": { "pass@1": 0.01 }, - "0.7": { "pass@1": 0.01 }, - "0.8": { "pass@1": 0.01 }, - "0.9": { "pass@1": 0.01 }, - "1.0": { "pass@1": 0.01 } - }, - "rust": { - "0.0": { "pass@1": 0.08 }, - "0.1": { "pass@1": 0.01 }, - "0.2": { "pass@1": 0.01 }, - "0.3": { "pass@1": 0.01 }, - "0.4": { "pass@1": 0.01 }, - "0.5": { "pass@1": 0.01 }, - "0.6": { "pass@1": 0.01 }, - "0.7": { "pass@1": 0.01 }, - "0.8": { "pass@1": 0.01 }, - "0.9": { "pass@1": 0.01 }, - "1.0": { "pass@1": 0.0 } + "0.0": { "pass@1": 0.71 }, + "0.1": { "pass@1": 0.6 }, + "0.2": { "pass@1": 0.49 }, + "0.3": { "pass@1": 0.45 }, + "0.4": { "pass@1": 0.42 }, + "0.5": { "pass@1": 0.39 }, + "0.6": { "pass@1": 0.34 }, + "0.7": { "pass@1": 0.31 }, + "0.8": { "pass@1": 0.21 }, + "0.9": { "pass@1": 0.15 }, + "1.0": { "pass@1": 0.1 } }, "typescript": { - "0.0": { "pass@1": 0.08 }, - "0.1": { "pass@1": 0.04 }, - "0.2": { "pass@1": 0.04 }, - "0.3": { "pass@1": 0.04 }, - "0.4": { "pass@1": 0.04 }, - "0.5": { "pass@1": 0.04 }, - "0.6": { "pass@1": 0.04 }, - "0.7": { "pass@1": 0.04 }, - "0.8": { "pass@1": 0.04 }, - "0.9": { "pass@1": 0.02 }, - "1.0": { "pass@1": 0.02 } + "0.0": { "pass@1": 0.78 }, + "0.1": { "pass@1": 0.67 }, + "0.2": { "pass@1": 0.57 }, + "0.3": { "pass@1": 0.49 }, + "0.4": { "pass@1": 0.45 }, + "0.5": { "pass@1": 0.41 }, + "0.6": { "pass@1": 0.37 }, + "0.7": { "pass@1": 0.33 }, + "0.8": { "pass@1": 0.22 }, + "0.9": { "pass@1": 0.14 }, + "1.0": { "pass@1": 0.03 } + }, + "rust": { + "0.0": { "pass@1": 0.75 }, + "0.1": { "pass@1": 0.62 }, + "0.2": { "pass@1": 0.49 }, + "0.3": { "pass@1": 0.47 }, + "0.4": { "pass@1": 0.4 }, + "0.5": { "pass@1": 0.38 }, + "0.6": { "pass@1": 0.36 }, + "0.7": { "pass@1": 0.31 }, + "0.8": { "pass@1": 0.25 }, + "0.9": { "pass@1": 0.2 }, + "1.0": { "pass@1": 0.12 } } } }, - "gpt-3.5-turbo-0125": { - "eval_date": "2024-05-13 01:38:45.762205", + "deepseek-ai/deepseek-coder-6.7b-instruct": { + "eval_date": "2024-05-13 01:38:29.998744", "train_size": "16k", "scores": { "all": { - "0.0": { "pass@1": 0.878 }, - "0.1": { "pass@1": 0.79 }, - "0.2": { "pass@1": 0.752 }, - "0.3": { "pass@1": 0.742 }, - "0.4": { "pass@1": 0.73 }, - "0.5": { "pass@1": 0.706 }, - "0.6": { "pass@1": 0.674 }, - "0.7": { "pass@1": 0.642 }, - "0.8": { "pass@1": 0.604 }, - "0.9": { "pass@1": 0.562 }, - "1.0": { "pass@1": 0.47 } + "0.0": { "pass@1": 0.27 }, + "0.1": { "pass@1": 0.144 }, + "0.2": { "pass@1": 0.132 }, + "0.3": { "pass@1": 0.122 }, + "0.4": { "pass@1": 0.118 }, + "0.5": { "pass@1": 0.112 }, + "0.6": { "pass@1": 0.112 }, + "0.7": { "pass@1": 0.11 }, + "0.8": { "pass@1": 0.106 }, + "0.9": { "pass@1": 0.098 }, + "1.0": { "pass@1": 0.074 } }, "python": { - "0.0": { "pass@1": 0.77 }, - "0.1": { "pass@1": 0.59 }, - "0.2": { "pass@1": 0.57 }, - "0.3": { "pass@1": 0.57 }, - "0.4": { "pass@1": 0.57 }, - "0.5": { "pass@1": 0.53 }, - "0.6": { "pass@1": 0.52 }, - "0.7": { "pass@1": 0.46 }, - "0.8": { "pass@1": 0.43 }, - "0.9": { "pass@1": 0.41 }, - "1.0": { "pass@1": 0.35 } + "0.0": { "pass@1": 0.39 }, + "0.1": { "pass@1": 0.22 }, + "0.2": { "pass@1": 0.19 }, + "0.3": { "pass@1": 0.15 }, + "0.4": { "pass@1": 0.14 }, + "0.5": { "pass@1": 0.13 }, + "0.6": { "pass@1": 0.13 }, + "0.7": { "pass@1": 0.12 }, + "0.8": { "pass@1": 0.11 }, + "0.9": { "pass@1": 0.1 }, + "1.0": { "pass@1": 0.08 } }, "cpp": { - "0.0": { "pass@1": 0.87 }, - "0.1": { "pass@1": 0.8 }, - "0.2": { "pass@1": 0.76 }, - "0.3": { "pass@1": 0.75 }, - "0.4": { "pass@1": 0.75 }, - "0.5": { "pass@1": 0.73 }, - "0.6": { "pass@1": 0.67 }, - "0.7": { "pass@1": 0.65 }, - "0.8": { "pass@1": 0.65 }, - "0.9": { "pass@1": 0.6 }, - "1.0": { "pass@1": 0.5 } + "0.0": { "pass@1": 0.36 }, + "0.1": { "pass@1": 0.25 }, + "0.2": { "pass@1": 0.23 }, + "0.3": { "pass@1": 0.23 }, + "0.4": { "pass@1": 0.23 }, + "0.5": { "pass@1": 0.22 }, + "0.6": { "pass@1": 0.22 }, + "0.7": { "pass@1": 0.22 }, + "0.8": { "pass@1": 0.21 }, + "0.9": { "pass@1": 0.19 }, + "1.0": { "pass@1": 0.15 } }, "java": { - "0.0": { "pass@1": 0.95 }, - "0.1": { "pass@1": 0.9 }, - "0.2": { "pass@1": 0.86 }, - "0.3": { "pass@1": 0.84 }, - "0.4": { "pass@1": 0.83 }, - "0.5": { "pass@1": 0.82 }, - "0.6": { "pass@1": 0.8 }, - "0.7": { "pass@1": 0.79 }, - "0.8": { "pass@1": 0.76 }, - "0.9": { "pass@1": 0.75 }, - "1.0": { "pass@1": 0.72 } - }, - "typescript": { - "0.0": { "pass@1": 0.94 }, - "0.1": { "pass@1": 0.9 }, - "0.2": { "pass@1": 0.85 }, - "0.3": { "pass@1": 0.83 }, - "0.4": { "pass@1": 0.82 }, - "0.5": { "pass@1": 0.78 }, - "0.6": { "pass@1": 0.72 }, - "0.7": { "pass@1": 0.67 }, - "0.8": { "pass@1": 0.58 }, - "0.9": { "pass@1": 0.48 }, - "1.0": { "pass@1": 0.26 } + "0.0": { "pass@1": 0.22 }, + "0.1": { "pass@1": 0.05 }, + "0.2": { "pass@1": 0.04 }, + "0.3": { "pass@1": 0.03 }, + "0.4": { "pass@1": 0.03 }, + "0.5": { "pass@1": 0.03 }, + "0.6": { "pass@1": 0.03 }, + "0.7": { "pass@1": 0.03 }, + "0.8": { "pass@1": 0.03 }, + "0.9": { "pass@1": 0.03 }, + "1.0": { "pass@1": 0.03 } }, "rust": { - "0.0": { "pass@1": 0.86 }, - "0.1": { "pass@1": 0.76 }, - "0.2": { "pass@1": 0.72 }, - "0.3": { "pass@1": 0.72 }, - "0.4": { "pass@1": 0.68 }, - "0.5": { "pass@1": 0.67 }, - "0.6": { "pass@1": 0.66 }, - "0.7": { "pass@1": 0.64 }, - "0.8": { "pass@1": 0.6 }, - "0.9": { "pass@1": 0.57 }, - "1.0": { "pass@1": 0.52 } + "0.0": { "pass@1": 0.07 }, + "0.1": { "pass@1": 0.03 }, + "0.2": { "pass@1": 0.03 }, + "0.3": { "pass@1": 0.03 }, + "0.4": { "pass@1": 0.03 }, + "0.5": { "pass@1": 0.02 }, + "0.6": { "pass@1": 0.02 }, + "0.7": { "pass@1": 0.02 }, + "0.8": { "pass@1": 0.02 }, + "0.9": { "pass@1": 0.02 }, + "1.0": { "pass@1": 0.01 } + }, + "typescript": { + "0.0": { "pass@1": 0.31 }, + "0.1": { "pass@1": 0.17 }, + "0.2": { "pass@1": 0.17 }, + "0.3": { "pass@1": 0.17 }, + "0.4": { "pass@1": 0.16 }, + "0.5": { "pass@1": 0.16 }, + "0.6": { "pass@1": 0.16 }, + "0.7": { "pass@1": 0.16 }, + "0.8": { "pass@1": 0.16 }, + "0.9": { "pass@1": 0.15 }, + "1.0": { "pass@1": 0.1 } } } }, - "Qwen/Qwen1.5-72B-Chat": { - "eval_date": "2024-05-13 01:39:56.886925", + "mistralai/Mistral-7B-Instruct-v0.3": { + "eval_date": "2024-05-22 21:40:16.998989", "train_size": "32k", "scores": { "all": { - "0.0": { "pass@1": 0.862 }, + "0.0": { "pass@1": 0.864 }, "0.1": { "pass@1": 0.774 }, - "0.2": { "pass@1": 0.714 }, - "0.3": { "pass@1": 0.684 }, - "0.4": { "pass@1": 0.644 }, - "0.5": { "pass@1": 0.61 }, - "0.6": { "pass@1": 0.57 }, - "0.7": { "pass@1": 0.54 }, - "0.8": { "pass@1": 0.5 }, - "0.9": { "pass@1": 0.432 }, - "1.0": { "pass@1": 0.328 } + "0.2": { "pass@1": 0.73 }, + "0.3": { "pass@1": 0.712 }, + "0.4": { "pass@1": 0.694 }, + "0.5": { "pass@1": 0.68 }, + "0.6": { "pass@1": 0.666 }, + "0.7": { "pass@1": 0.648 }, + "0.8": { "pass@1": 0.62 }, + "0.9": { "pass@1": 0.566 }, + "1.0": { "pass@1": 0.44 } }, "python": { - "0.0": { "pass@1": 0.89 }, - "0.1": { "pass@1": 0.75 }, - "0.2": { "pass@1": 0.69 }, - "0.3": { "pass@1": 0.66 }, - "0.4": { "pass@1": 0.6 }, - "0.5": { "pass@1": 0.57 }, - "0.6": { "pass@1": 0.54 }, - "0.7": { "pass@1": 0.5 }, - "0.8": { "pass@1": 0.47 }, - "0.9": { "pass@1": 0.39 }, - "1.0": { "pass@1": 0.31 } - }, - "cpp": { - "0.0": { "pass@1": 0.74 }, - "0.1": { "pass@1": 0.64 }, - "0.2": { "pass@1": 0.57 }, - "0.3": { "pass@1": 0.54 }, - "0.4": { "pass@1": 0.51 }, - "0.5": { "pass@1": 0.51 }, - "0.6": { "pass@1": 0.47 }, - "0.7": { "pass@1": 0.46 }, - "0.8": { "pass@1": 0.45 }, - "0.9": { "pass@1": 0.4 }, - "1.0": { "pass@1": 0.26 } - }, - "java": { - "0.0": { "pass@1": 0.91 }, - "0.1": { "pass@1": 0.87 }, - "0.2": { "pass@1": 0.8 }, - "0.3": { "pass@1": 0.79 }, - "0.4": { "pass@1": 0.74 }, - "0.5": { "pass@1": 0.7 }, + "0.0": { "pass@1": 0.93 }, + "0.1": { "pass@1": 0.77 }, + "0.2": { "pass@1": 0.74 }, + "0.3": { "pass@1": 0.72 }, + "0.4": { "pass@1": 0.68 }, + "0.5": { "pass@1": 0.67 }, "0.6": { "pass@1": 0.67 }, "0.7": { "pass@1": 0.64 }, - "0.8": { "pass@1": 0.58 }, - "0.9": { "pass@1": 0.49 }, - "1.0": { "pass@1": 0.4 } + "0.8": { "pass@1": 0.61 }, + "0.9": { "pass@1": 0.57 }, + "1.0": { "pass@1": 0.49 } }, - "typescript": { - "0.0": { "pass@1": 0.92 }, - "0.1": { "pass@1": 0.86 }, - "0.2": { "pass@1": 0.81 }, - "0.3": { "pass@1": 0.76 }, - "0.4": { "pass@1": 0.72 }, - "0.5": { "pass@1": 0.64 }, + "cpp": { + "0.0": { "pass@1": 0.77 }, + "0.1": { "pass@1": 0.66 }, + "0.2": { "pass@1": 0.63 }, + "0.3": { "pass@1": 0.62 }, + "0.4": { "pass@1": 0.62 }, + "0.5": { "pass@1": 0.6 }, "0.6": { "pass@1": 0.58 }, - "0.7": { "pass@1": 0.53 }, - "0.8": { "pass@1": 0.45 }, - "0.9": { "pass@1": 0.39 }, - "1.0": { "pass@1": 0.21 } + "0.7": { "pass@1": 0.57 }, + "0.8": { "pass@1": 0.57 }, + "0.9": { "pass@1": 0.52 }, + "1.0": { "pass@1": 0.42 } }, - "rust": { + "java": { "0.0": { "pass@1": 0.85 }, - "0.1": { "pass@1": 0.75 }, - "0.2": { "pass@1": 0.7 }, - "0.3": { "pass@1": 0.67 }, - "0.4": { "pass@1": 0.65 }, - "0.5": { "pass@1": 0.63 }, - "0.6": { "pass@1": 0.59 }, - "0.7": { "pass@1": 0.57 }, - "0.8": { "pass@1": 0.55 }, - "0.9": { "pass@1": 0.49 }, - "1.0": { "pass@1": 0.46 } + "0.1": { "pass@1": 0.78 }, + "0.2": { "pass@1": 0.76 }, + "0.3": { "pass@1": 0.71 }, + "0.4": { "pass@1": 0.7 }, + "0.5": { "pass@1": 0.69 }, + "0.6": { "pass@1": 0.67 }, + "0.7": { "pass@1": 0.64 }, + "0.8": { "pass@1": 0.61 }, + "0.9": { "pass@1": 0.59 }, + "1.0": { "pass@1": 0.52 } + }, + "typescript": { + "0.0": { "pass@1": 0.98 }, + "0.1": { "pass@1": 0.94 }, + "0.2": { "pass@1": 0.9 }, + "0.3": { "pass@1": 0.9 }, + "0.4": { "pass@1": 0.89 }, + "0.5": { "pass@1": 0.87 }, + "0.6": { "pass@1": 0.86 }, + "0.7": { "pass@1": 0.85 }, + "0.8": { "pass@1": 0.8 }, + "0.9": { "pass@1": 0.67 }, + "1.0": { "pass@1": 0.35 } + }, + "rust": { + "0.0": { "pass@1": 0.79 }, + "0.1": { "pass@1": 0.72 }, + "0.2": { "pass@1": 0.62 }, + "0.3": { "pass@1": 0.61 }, + "0.4": { "pass@1": 0.58 }, + "0.5": { "pass@1": 0.57 }, + "0.6": { "pass@1": 0.55 }, + "0.7": { "pass@1": 0.54 }, + "0.8": { "pass@1": 0.51 }, + "0.9": { "pass@1": 0.48 }, + "1.0": { "pass@1": 0.42 } } } }, - "ise-uiuc/Magicoder-S-DS-6.7B": { - "eval_date": "2024-05-13 01:38:57.693249", - "train_size": "16k", + "gemini-1.5-pro-latest": { + "eval_date": "2024-05-19 07:16:46.769202", + "train_size": "1000k", "scores": { "all": { - "0.0": { "pass@1": 0.5 }, - "0.1": { "pass@1": 0.35 }, - "0.2": { "pass@1": 0.332 }, - "0.3": { "pass@1": 0.298 }, - "0.4": { "pass@1": 0.282 }, - "0.5": { "pass@1": 0.27 }, - "0.6": { "pass@1": 0.262 }, - "0.7": { "pass@1": 0.254 }, - "0.8": { "pass@1": 0.232 }, - "0.9": { "pass@1": 0.198 }, - "1.0": { "pass@1": 0.146 } + "0.0": { "pass@1": 0.942 }, + "0.1": { "pass@1": 0.93 }, + "0.2": { "pass@1": 0.924 }, + "0.3": { "pass@1": 0.92 }, + "0.4": { "pass@1": 0.918 }, + "0.5": { "pass@1": 0.918 }, + "0.6": { "pass@1": 0.91 }, + "0.7": { "pass@1": 0.906 }, + "0.8": { "pass@1": 0.906 }, + "0.9": { "pass@1": 0.894 }, + "1.0": { "pass@1": 0.814 } }, "python": { - "0.0": { "pass@1": 0.63 }, - "0.1": { "pass@1": 0.45 }, - "0.2": { "pass@1": 0.44 }, - "0.3": { "pass@1": 0.37 }, - "0.4": { "pass@1": 0.35 }, - "0.5": { "pass@1": 0.32 }, - "0.6": { "pass@1": 0.31 }, - "0.7": { "pass@1": 0.3 }, - "0.8": { "pass@1": 0.27 }, - "0.9": { "pass@1": 0.26 }, - "1.0": { "pass@1": 0.21 } + "0.0": { "pass@1": 0.94 }, + "0.1": { "pass@1": 0.93 }, + "0.2": { "pass@1": 0.93 }, + "0.3": { "pass@1": 0.93 }, + "0.4": { "pass@1": 0.92 }, + "0.5": { "pass@1": 0.92 }, + "0.6": { "pass@1": 0.92 }, + "0.7": { "pass@1": 0.91 }, + "0.8": { "pass@1": 0.91 }, + "0.9": { "pass@1": 0.91 }, + "1.0": { "pass@1": 0.9 } }, "cpp": { - "0.0": { "pass@1": 0.48 }, - "0.1": { "pass@1": 0.34 }, - "0.2": { "pass@1": 0.32 }, - "0.3": { "pass@1": 0.31 }, - "0.4": { "pass@1": 0.28 }, - "0.5": { "pass@1": 0.27 }, - "0.6": { "pass@1": 0.25 }, - "0.7": { "pass@1": 0.24 }, - "0.8": { "pass@1": 0.21 }, - "0.9": { "pass@1": 0.17 }, - "1.0": { "pass@1": 0.14 } + "0.0": { "pass@1": 0.87 }, + "0.1": { "pass@1": 0.86 }, + "0.2": { "pass@1": 0.84 }, + "0.3": { "pass@1": 0.84 }, + "0.4": { "pass@1": 0.84 }, + "0.5": { "pass@1": 0.84 }, + "0.6": { "pass@1": 0.82 }, + "0.7": { "pass@1": 0.81 }, + "0.8": { "pass@1": 0.81 }, + "0.9": { "pass@1": 0.8 }, + "1.0": { "pass@1": 0.77 } }, "java": { - "0.0": { "pass@1": 0.51 }, - "0.1": { "pass@1": 0.39 }, - "0.2": { "pass@1": 0.35 }, - "0.3": { "pass@1": 0.3 }, - "0.4": { "pass@1": 0.29 }, - "0.5": { "pass@1": 0.28 }, - "0.6": { "pass@1": 0.28 }, - "0.7": { "pass@1": 0.27 }, - "0.8": { "pass@1": 0.25 }, - "0.9": { "pass@1": 0.22 }, - "1.0": { "pass@1": 0.17 } + "0.0": { "pass@1": 0.97 }, + "0.1": { "pass@1": 0.96 }, + "0.2": { "pass@1": 0.95 }, + "0.3": { "pass@1": 0.95 }, + "0.4": { "pass@1": 0.95 }, + "0.5": { "pass@1": 0.95 }, + "0.6": { "pass@1": 0.94 }, + "0.7": { "pass@1": 0.94 }, + "0.8": { "pass@1": 0.94 }, + "0.9": { "pass@1": 0.94 }, + "1.0": { "pass@1": 0.94 } }, "typescript": { - "0.0": { "pass@1": 0.61 }, - "0.1": { "pass@1": 0.48 }, - "0.2": { "pass@1": 0.47 }, - "0.3": { "pass@1": 0.43 }, - "0.4": { "pass@1": 0.41 }, - "0.5": { "pass@1": 0.41 }, - "0.6": { "pass@1": 0.4 }, - "0.7": { "pass@1": 0.39 }, - "0.8": { "pass@1": 0.36 }, - "0.9": { "pass@1": 0.28 }, - "1.0": { "pass@1": 0.15 } + "0.0": { "pass@1": 0.96 }, + "0.1": { "pass@1": 0.96 }, + "0.2": { "pass@1": 0.96 }, + "0.3": { "pass@1": 0.96 }, + "0.4": { "pass@1": 0.96 }, + "0.5": { "pass@1": 0.96 }, + "0.6": { "pass@1": 0.96 }, + "0.7": { "pass@1": 0.96 }, + "0.8": { "pass@1": 0.96 }, + "0.9": { "pass@1": 0.92 }, + "1.0": { "pass@1": 0.56 } }, "rust": { - "0.0": { "pass@1": 0.27 }, - "0.1": { "pass@1": 0.09 }, - "0.2": { "pass@1": 0.08 }, - "0.3": { "pass@1": 0.08 }, - "0.4": { "pass@1": 0.08 }, - "0.5": { "pass@1": 0.07 }, - "0.6": { "pass@1": 0.07 }, - "0.7": { "pass@1": 0.07 }, - "0.8": { "pass@1": 0.07 }, - "0.9": { "pass@1": 0.06 }, - "1.0": { "pass@1": 0.06 } + "0.0": { "pass@1": 0.97 }, + "0.1": { "pass@1": 0.94 }, + "0.2": { "pass@1": 0.94 }, + "0.3": { "pass@1": 0.92 }, + "0.4": { "pass@1": 0.92 }, + "0.5": { "pass@1": 0.92 }, + "0.6": { "pass@1": 0.91 }, + "0.7": { "pass@1": 0.91 }, + "0.8": { "pass@1": 0.91 }, + "0.9": { "pass@1": 0.9 }, + "1.0": { "pass@1": 0.9 } } } }, - "microsoft/Phi-3-small-128k-instruct": { - "eval_date": "2024-05-23 02:31:23.338528", - "train_size": "128k", + "deepseek-ai/deepseek-coder-33b-instruct": { + "eval_date": "2024-05-13 01:38:24.023742", + "train_size": "16k", "scores": { "all": { - "0.0": { "pass@1": 0.792 }, - "0.1": { "pass@1": 0.662 }, - "0.2": { "pass@1": 0.612 }, - "0.3": { "pass@1": 0.564 }, - "0.4": { "pass@1": 0.526 }, - "0.5": { "pass@1": 0.49 }, - "0.6": { "pass@1": 0.466 }, - "0.7": { "pass@1": 0.444 }, - "0.8": { "pass@1": 0.396 }, - "0.9": { "pass@1": 0.322 }, - "1.0": { "pass@1": 0.242 } + "0.0": { "pass@1": 0.68 }, + "0.1": { "pass@1": 0.58 }, + "0.2": { "pass@1": 0.568 }, + "0.3": { "pass@1": 0.552 }, + "0.4": { "pass@1": 0.542 }, + "0.5": { "pass@1": 0.528 }, + "0.6": { "pass@1": 0.512 }, + "0.7": { "pass@1": 0.5 }, + "0.8": { "pass@1": 0.484 }, + "0.9": { "pass@1": 0.438 }, + "1.0": { "pass@1": 0.356 } }, "python": { - "0.0": { "pass@1": 0.74 }, - "0.1": { "pass@1": 0.57 }, - "0.2": { "pass@1": 0.52 }, - "0.3": { "pass@1": 0.46 }, - "0.4": { "pass@1": 0.41 }, - "0.5": { "pass@1": 0.36 }, - "0.6": { "pass@1": 0.34 }, - "0.7": { "pass@1": 0.29 }, - "0.8": { "pass@1": 0.25 }, - "0.9": { "pass@1": 0.23 }, - "1.0": { "pass@1": 0.21 } + "0.0": { "pass@1": 0.76 }, + "0.1": { "pass@1": 0.63 }, + "0.2": { "pass@1": 0.62 }, + "0.3": { "pass@1": 0.62 }, + "0.4": { "pass@1": 0.62 }, + "0.5": { "pass@1": 0.62 }, + "0.6": { "pass@1": 0.61 }, + "0.7": { "pass@1": 0.6 }, + "0.8": { "pass@1": 0.59 }, + "0.9": { "pass@1": 0.57 }, + "1.0": { "pass@1": 0.54 } }, "cpp": { - "0.0": { "pass@1": 0.83 }, - "0.1": { "pass@1": 0.68 }, - "0.2": { "pass@1": 0.62 }, - "0.3": { "pass@1": 0.57 }, - "0.4": { "pass@1": 0.55 }, - "0.5": { "pass@1": 0.53 }, - "0.6": { "pass@1": 0.52 }, - "0.7": { "pass@1": 0.5 }, - "0.8": { "pass@1": 0.48 }, - "0.9": { "pass@1": 0.4 }, - "1.0": { "pass@1": 0.35 } + "0.0": { "pass@1": 0.65 }, + "0.1": { "pass@1": 0.54 }, + "0.2": { "pass@1": 0.51 }, + "0.3": { "pass@1": 0.49 }, + "0.4": { "pass@1": 0.49 }, + "0.5": { "pass@1": 0.48 }, + "0.6": { "pass@1": 0.47 }, + "0.7": { "pass@1": 0.47 }, + "0.8": { "pass@1": 0.44 }, + "0.9": { "pass@1": 0.36 }, + "1.0": { "pass@1": 0.32 } }, "java": { - "0.0": { "pass@1": 0.83 }, - "0.1": { "pass@1": 0.74 }, - "0.2": { "pass@1": 0.69 }, - "0.3": { "pass@1": 0.67 }, - "0.4": { "pass@1": 0.6 }, - "0.5": { "pass@1": 0.58 }, - "0.6": { "pass@1": 0.55 }, - "0.7": { "pass@1": 0.53 }, - "0.8": { "pass@1": 0.46 }, - "0.9": { "pass@1": 0.39 }, - "1.0": { "pass@1": 0.3 } - }, - "typescript": { - "0.0": { "pass@1": 0.9 }, - "0.1": { "pass@1": 0.81 }, - "0.2": { "pass@1": 0.78 }, - "0.3": { "pass@1": 0.7 }, - "0.4": { "pass@1": 0.68 }, - "0.5": { "pass@1": 0.62 }, - "0.6": { "pass@1": 0.58 }, - "0.7": { "pass@1": 0.56 }, - "0.8": { "pass@1": 0.49 }, - "0.9": { "pass@1": 0.34 }, - "1.0": { "pass@1": 0.14 } + "0.0": { "pass@1": 0.7 }, + "0.1": { "pass@1": 0.65 }, + "0.2": { "pass@1": 0.63 }, + "0.3": { "pass@1": 0.62 }, + "0.4": { "pass@1": 0.61 }, + "0.5": { "pass@1": 0.61 }, + "0.6": { "pass@1": 0.59 }, + "0.7": { "pass@1": 0.57 }, + "0.8": { "pass@1": 0.53 }, + "0.9": { "pass@1": 0.47 }, + "1.0": { "pass@1": 0.4 } }, "rust": { - "0.0": { "pass@1": 0.66 }, - "0.1": { "pass@1": 0.51 }, - "0.2": { "pass@1": 0.45 }, - "0.3": { "pass@1": 0.42 }, - "0.4": { "pass@1": 0.39 }, - "0.5": { "pass@1": 0.36 }, - "0.6": { "pass@1": 0.34 }, - "0.7": { "pass@1": 0.34 }, - "0.8": { "pass@1": 0.3 }, - "0.9": { "pass@1": 0.25 }, - "1.0": { "pass@1": 0.21 } + "0.0": { "pass@1": 0.45 }, + "0.1": { "pass@1": 0.31 }, + "0.2": { "pass@1": 0.31 }, + "0.3": { "pass@1": 0.28 }, + "0.4": { "pass@1": 0.27 }, + "0.5": { "pass@1": 0.27 }, + "0.6": { "pass@1": 0.26 }, + "0.7": { "pass@1": 0.23 }, + "0.8": { "pass@1": 0.23 }, + "0.9": { "pass@1": 0.21 }, + "1.0": { "pass@1": 0.18 } + }, + "typescript": { + "0.0": { "pass@1": 0.84 }, + "0.1": { "pass@1": 0.77 }, + "0.2": { "pass@1": 0.77 }, + "0.3": { "pass@1": 0.75 }, + "0.4": { "pass@1": 0.72 }, + "0.5": { "pass@1": 0.66 }, + "0.6": { "pass@1": 0.63 }, + "0.7": { "pass@1": 0.63 }, + "0.8": { "pass@1": 0.63 }, + "0.9": { "pass@1": 0.58 }, + "1.0": { "pass@1": 0.34 } } } }, - "gemini-1.5-flash-latest": { - "eval_date": "2024-05-19 04:32:12.200298", - "train_size": "1000k", + "meta-llama/Meta-Llama-3-70B-Instruct": { + "eval_date": "2024-05-13 01:39:02.863882", + "train_size": "8k", "scores": { "all": { - "0.0": { "pass@1": 0.938 }, - "0.1": { "pass@1": 0.926 }, - "0.2": { "pass@1": 0.916 }, - "0.3": { "pass@1": 0.914 }, - "0.4": { "pass@1": 0.912 }, - "0.5": { "pass@1": 0.912 }, - "0.6": { "pass@1": 0.904 }, - "0.7": { "pass@1": 0.904 }, - "0.8": { "pass@1": 0.9 }, - "0.9": { "pass@1": 0.886 }, - "1.0": { "pass@1": 0.8 } + "0.0": { "pass@1": 0.922 }, + "0.1": { "pass@1": 0.894 }, + "0.2": { "pass@1": 0.876 }, + "0.3": { "pass@1": 0.86 }, + "0.4": { "pass@1": 0.85 }, + "0.5": { "pass@1": 0.846 }, + "0.6": { "pass@1": 0.838 }, + "0.7": { "pass@1": 0.83 }, + "0.8": { "pass@1": 0.822 }, + "0.9": { "pass@1": 0.762 }, + "1.0": { "pass@1": 0.576 } }, "python": { "0.0": { "pass@1": 0.93 }, - "0.1": { "pass@1": 0.93 }, - "0.2": { "pass@1": 0.93 }, - "0.3": { "pass@1": 0.93 }, - "0.4": { "pass@1": 0.93 }, - "0.5": { "pass@1": 0.93 }, - "0.6": { "pass@1": 0.93 }, - "0.7": { "pass@1": 0.93 }, - "0.8": { "pass@1": 0.93 }, - "0.9": { "pass@1": 0.92 }, - "1.0": { "pass@1": 0.91 } + "0.1": { "pass@1": 0.89 }, + "0.2": { "pass@1": 0.88 }, + "0.3": { "pass@1": 0.88 }, + "0.4": { "pass@1": 0.86 }, + "0.5": { "pass@1": 0.86 }, + "0.6": { "pass@1": 0.86 }, + "0.7": { "pass@1": 0.85 }, + "0.8": { "pass@1": 0.83 }, + "0.9": { "pass@1": 0.76 }, + "1.0": { "pass@1": 0.61 } }, "cpp": { - "0.0": { "pass@1": 0.87 }, - "0.1": { "pass@1": 0.85 }, - "0.2": { "pass@1": 0.81 }, - "0.3": { "pass@1": 0.81 }, - "0.4": { "pass@1": 0.81 }, - "0.5": { "pass@1": 0.81 }, - "0.6": { "pass@1": 0.79 }, - "0.7": { "pass@1": 0.79 }, - "0.8": { "pass@1": 0.79 }, - "0.9": { "pass@1": 0.79 }, - "1.0": { "pass@1": 0.76 } + "0.0": { "pass@1": 0.82 }, + "0.1": { "pass@1": 0.78 }, + "0.2": { "pass@1": 0.75 }, + "0.3": { "pass@1": 0.72 }, + "0.4": { "pass@1": 0.72 }, + "0.5": { "pass@1": 0.71 }, + "0.6": { "pass@1": 0.71 }, + "0.7": { "pass@1": 0.7 }, + "0.8": { "pass@1": 0.7 }, + "0.9": { "pass@1": 0.62 }, + "1.0": { "pass@1": 0.51 } }, "java": { - "0.0": { "pass@1": 0.98 }, - "0.1": { "pass@1": 0.98 }, - "0.2": { "pass@1": 0.97 }, - "0.3": { "pass@1": 0.97 }, - "0.4": { "pass@1": 0.97 }, - "0.5": { "pass@1": 0.97 }, - "0.6": { "pass@1": 0.96 }, - "0.7": { "pass@1": 0.96 }, - "0.8": { "pass@1": 0.94 }, - "0.9": { "pass@1": 0.94 }, - "1.0": { "pass@1": 0.94 } - }, - "typescript": { "0.0": { "pass@1": 0.97 }, "0.1": { "pass@1": 0.97 }, - "0.2": { "pass@1": 0.97 }, - "0.3": { "pass@1": 0.97 }, - "0.4": { "pass@1": 0.97 }, - "0.5": { "pass@1": 0.97 }, - "0.6": { "pass@1": 0.97 }, - "0.7": { "pass@1": 0.97 }, - "0.8": { "pass@1": 0.97 }, - "0.9": { "pass@1": 0.93 }, - "1.0": { "pass@1": 0.56 } + "0.2": { "pass@1": 0.94 }, + "0.3": { "pass@1": 0.93 }, + "0.4": { "pass@1": 0.92 }, + "0.5": { "pass@1": 0.91 }, + "0.6": { "pass@1": 0.89 }, + "0.7": { "pass@1": 0.88 }, + "0.8": { "pass@1": 0.86 }, + "0.9": { "pass@1": 0.81 }, + "1.0": { "pass@1": 0.72 } + }, + "typescript": { + "0.0": { "pass@1": 0.99 }, + "0.1": { "pass@1": 0.96 }, + "0.2": { "pass@1": 0.95 }, + "0.3": { "pass@1": 0.95 }, + "0.4": { "pass@1": 0.93 }, + "0.5": { "pass@1": 0.93 }, + "0.6": { "pass@1": 0.92 }, + "0.7": { "pass@1": 0.91 }, + "0.8": { "pass@1": 0.91 }, + "0.9": { "pass@1": 0.83 }, + "1.0": { "pass@1": 0.38 } }, "rust": { - "0.0": { "pass@1": 0.94 }, - "0.1": { "pass@1": 0.9 }, - "0.2": { "pass@1": 0.9 }, - "0.3": { "pass@1": 0.89 }, - "0.4": { "pass@1": 0.88 }, - "0.5": { "pass@1": 0.88 }, - "0.6": { "pass@1": 0.87 }, - "0.7": { "pass@1": 0.87 }, - "0.8": { "pass@1": 0.87 }, - "0.9": { "pass@1": 0.85 }, - "1.0": { "pass@1": 0.83 } + "0.0": { "pass@1": 0.9 }, + "0.1": { "pass@1": 0.87 }, + "0.2": { "pass@1": 0.86 }, + "0.3": { "pass@1": 0.82 }, + "0.4": { "pass@1": 0.82 }, + "0.5": { "pass@1": 0.82 }, + "0.6": { "pass@1": 0.81 }, + "0.7": { "pass@1": 0.81 }, + "0.8": { "pass@1": 0.81 }, + "0.9": { "pass@1": 0.79 }, + "1.0": { "pass@1": 0.66 } } } }, - "codellama/CodeLlama-7b-Instruct-hf": { - "eval_date": "2024-05-13 01:38:12.022860", + "codellama/CodeLlama-34b-Instruct-hf": { + "eval_date": "2024-05-13 01:38:05.832801", "train_size": "16k", "scores": { "all": { - "0.0": { "pass@1": 0.712 }, - "0.1": { "pass@1": 0.524 }, - "0.2": { "pass@1": 0.446 }, - "0.3": { "pass@1": 0.414 }, - "0.4": { "pass@1": 0.37 }, - "0.5": { "pass@1": 0.348 }, - "0.6": { "pass@1": 0.326 }, - "0.7": { "pass@1": 0.312 }, - "0.8": { "pass@1": 0.282 }, - "0.9": { "pass@1": 0.234 }, - "1.0": { "pass@1": 0.15 } + "0.0": { "pass@1": 0.728 }, + "0.1": { "pass@1": 0.582 }, + "0.2": { "pass@1": 0.524 }, + "0.3": { "pass@1": 0.496 }, + "0.4": { "pass@1": 0.48 }, + "0.5": { "pass@1": 0.454 }, + "0.6": { "pass@1": 0.438 }, + "0.7": { "pass@1": 0.43 }, + "0.8": { "pass@1": 0.416 }, + "0.9": { "pass@1": 0.372 }, + "1.0": { "pass@1": 0.28 } }, "python": { - "0.0": { "pass@1": 0.8 }, + "0.0": { "pass@1": 0.76 }, "0.1": { "pass@1": 0.59 }, - "0.2": { "pass@1": 0.46 }, - "0.3": { "pass@1": 0.43 }, - "0.4": { "pass@1": 0.35 }, - "0.5": { "pass@1": 0.32 }, - "0.6": { "pass@1": 0.27 }, - "0.7": { "pass@1": 0.25 }, - "0.8": { "pass@1": 0.2 }, - "0.9": { "pass@1": 0.18 }, - "1.0": { "pass@1": 0.14 } + "0.2": { "pass@1": 0.52 }, + "0.3": { "pass@1": 0.47 }, + "0.4": { "pass@1": 0.46 }, + "0.5": { "pass@1": 0.44 }, + "0.6": { "pass@1": 0.43 }, + "0.7": { "pass@1": 0.42 }, + "0.8": { "pass@1": 0.41 }, + "0.9": { "pass@1": 0.4 }, + "1.0": { "pass@1": 0.36 } }, "cpp": { - "0.0": { "pass@1": 0.73 }, - "0.1": { "pass@1": 0.55 }, - "0.2": { "pass@1": 0.49 }, - "0.3": { "pass@1": 0.49 }, - "0.4": { "pass@1": 0.48 }, - "0.5": { "pass@1": 0.46 }, - "0.6": { "pass@1": 0.45 }, - "0.7": { "pass@1": 0.43 }, - "0.8": { "pass@1": 0.41 }, - "0.9": { "pass@1": 0.35 }, - "1.0": { "pass@1": 0.21 } + "0.0": { "pass@1": 0.64 }, + "0.1": { "pass@1": 0.45 }, + "0.2": { "pass@1": 0.41 }, + "0.3": { "pass@1": 0.37 }, + "0.4": { "pass@1": 0.37 }, + "0.5": { "pass@1": 0.36 }, + "0.6": { "pass@1": 0.35 }, + "0.7": { "pass@1": 0.32 }, + "0.8": { "pass@1": 0.31 }, + "0.9": { "pass@1": 0.29 }, + "1.0": { "pass@1": 0.26 } }, "java": { - "0.0": { "pass@1": 0.65 }, - "0.1": { "pass@1": 0.44 }, - "0.2": { "pass@1": 0.4 }, - "0.3": { "pass@1": 0.38 }, - "0.4": { "pass@1": 0.31 }, - "0.5": { "pass@1": 0.3 }, - "0.6": { "pass@1": 0.28 }, - "0.7": { "pass@1": 0.28 }, - "0.8": { "pass@1": 0.25 }, - "0.9": { "pass@1": 0.17 }, - "1.0": { "pass@1": 0.11 } + "0.0": { "pass@1": 0.7 }, + "0.1": { "pass@1": 0.59 }, + "0.2": { "pass@1": 0.52 }, + "0.3": { "pass@1": 0.52 }, + "0.4": { "pass@1": 0.47 }, + "0.5": { "pass@1": 0.45 }, + "0.6": { "pass@1": 0.42 }, + "0.7": { "pass@1": 0.42 }, + "0.8": { "pass@1": 0.4 }, + "0.9": { "pass@1": 0.33 }, + "1.0": { "pass@1": 0.22 } }, "rust": { - "0.0": { "pass@1": 0.57 }, - "0.1": { "pass@1": 0.33 }, - "0.2": { "pass@1": 0.31 }, - "0.3": { "pass@1": 0.27 }, - "0.4": { "pass@1": 0.25 }, - "0.5": { "pass@1": 0.25 }, - "0.6": { "pass@1": 0.25 }, - "0.7": { "pass@1": 0.24 }, - "0.8": { "pass@1": 0.22 }, - "0.9": { "pass@1": 0.2 }, - "1.0": { "pass@1": 0.18 } + "0.0": { "pass@1": 0.77 }, + "0.1": { "pass@1": 0.61 }, + "0.2": { "pass@1": 0.58 }, + "0.3": { "pass@1": 0.58 }, + "0.4": { "pass@1": 0.58 }, + "0.5": { "pass@1": 0.55 }, + "0.6": { "pass@1": 0.54 }, + "0.7": { "pass@1": 0.54 }, + "0.8": { "pass@1": 0.53 }, + "0.9": { "pass@1": 0.47 }, + "1.0": { "pass@1": 0.35 } }, "typescript": { - "0.0": { "pass@1": 0.81 }, - "0.1": { "pass@1": 0.71 }, - "0.2": { "pass@1": 0.57 }, - "0.3": { "pass@1": 0.5 }, - "0.4": { "pass@1": 0.46 }, - "0.5": { "pass@1": 0.41 }, - "0.6": { "pass@1": 0.38 }, - "0.7": { "pass@1": 0.36 }, - "0.8": { "pass@1": 0.33 }, - "0.9": { "pass@1": 0.27 }, - "1.0": { "pass@1": 0.11 } + "0.0": { "pass@1": 0.77 }, + "0.1": { "pass@1": 0.67 }, + "0.2": { "pass@1": 0.59 }, + "0.3": { "pass@1": 0.54 }, + "0.4": { "pass@1": 0.52 }, + "0.5": { "pass@1": 0.47 }, + "0.6": { "pass@1": 0.45 }, + "0.7": { "pass@1": 0.45 }, + "0.8": { "pass@1": 0.43 }, + "0.9": { "pass@1": 0.37 }, + "1.0": { "pass@1": 0.21 } } } }, - "claude-3-haiku-20240307": { - "eval_date": "2024-05-13 01:37:42.931545", - "train_size": "200k", + "Qwen/Qwen1.5-72B-Chat": { + "eval_date": "2024-05-31 22:41:54.978671", + "train_size": "32k", "scores": { "all": { - "0.0": { "pass@1": 0.926 }, - "0.1": { "pass@1": 0.876 }, - "0.2": { "pass@1": 0.858 }, - "0.3": { "pass@1": 0.854 }, - "0.4": { "pass@1": 0.846 }, - "0.5": { "pass@1": 0.84 }, - "0.6": { "pass@1": 0.826 }, - "0.7": { "pass@1": 0.822 }, - "0.8": { "pass@1": 0.818 }, - "0.9": { "pass@1": 0.798 }, - "1.0": { "pass@1": 0.706 } + "0.0": { "pass@1": 0.896 }, + "0.1": { "pass@1": 0.838 }, + "0.2": { "pass@1": 0.808 }, + "0.3": { "pass@1": 0.786 }, + "0.4": { "pass@1": 0.77 }, + "0.5": { "pass@1": 0.746 }, + "0.6": { "pass@1": 0.724 }, + "0.7": { "pass@1": 0.692 }, + "0.8": { "pass@1": 0.67 }, + "0.9": { "pass@1": 0.626 }, + "1.0": { "pass@1": 0.516 } }, "python": { - "0.0": { "pass@1": 0.94 }, - "0.1": { "pass@1": 0.9 }, - "0.2": { "pass@1": 0.87 }, - "0.3": { "pass@1": 0.87 }, - "0.4": { "pass@1": 0.85 }, - "0.5": { "pass@1": 0.83 }, - "0.6": { "pass@1": 0.82 }, - "0.7": { "pass@1": 0.81 }, - "0.8": { "pass@1": 0.8 }, - "0.9": { "pass@1": 0.78 }, - "1.0": { "pass@1": 0.75 } + "0.0": { "pass@1": 0.91 }, + "0.1": { "pass@1": 0.81 }, + "0.2": { "pass@1": 0.78 }, + "0.3": { "pass@1": 0.77 }, + "0.4": { "pass@1": 0.73 }, + "0.5": { "pass@1": 0.7 }, + "0.6": { "pass@1": 0.67 }, + "0.7": { "pass@1": 0.64 }, + "0.8": { "pass@1": 0.62 }, + "0.9": { "pass@1": 0.58 }, + "1.0": { "pass@1": 0.5 } }, "cpp": { - "0.0": { "pass@1": 0.88 }, - "0.1": { "pass@1": 0.82 }, - "0.2": { "pass@1": 0.81 }, - "0.3": { "pass@1": 0.81 }, - "0.4": { "pass@1": 0.8 }, - "0.5": { "pass@1": 0.79 }, - "0.6": { "pass@1": 0.76 }, - "0.7": { "pass@1": 0.75 }, - "0.8": { "pass@1": 0.75 }, - "0.9": { "pass@1": 0.73 }, - "1.0": { "pass@1": 0.7 } + "0.0": { "pass@1": 0.84 }, + "0.1": { "pass@1": 0.76 }, + "0.2": { "pass@1": 0.71 }, + "0.3": { "pass@1": 0.68 }, + "0.4": { "pass@1": 0.66 }, + "0.5": { "pass@1": 0.65 }, + "0.6": { "pass@1": 0.63 }, + "0.7": { "pass@1": 0.6 }, + "0.8": { "pass@1": 0.6 }, + "0.9": { "pass@1": 0.55 }, + "1.0": { "pass@1": 0.46 } }, "java": { - "0.0": { "pass@1": 0.94 }, - "0.1": { "pass@1": 0.93 }, - "0.2": { "pass@1": 0.92 }, - "0.3": { "pass@1": 0.92 }, - "0.4": { "pass@1": 0.92 }, - "0.5": { "pass@1": 0.92 }, - "0.6": { "pass@1": 0.91 }, - "0.7": { "pass@1": 0.91 }, - "0.8": { "pass@1": 0.9 }, - "0.9": { "pass@1": 0.89 }, - "1.0": { "pass@1": 0.87 } + "0.0": { "pass@1": 0.92 }, + "0.1": { "pass@1": 0.9 }, + "0.2": { "pass@1": 0.88 }, + "0.3": { "pass@1": 0.87 }, + "0.4": { "pass@1": 0.86 }, + "0.5": { "pass@1": 0.85 }, + "0.6": { "pass@1": 0.83 }, + "0.7": { "pass@1": 0.79 }, + "0.8": { "pass@1": 0.75 }, + "0.9": { "pass@1": 0.74 }, + "1.0": { "pass@1": 0.67 } }, "typescript": { - "0.0": { "pass@1": 0.99 }, - "0.1": { "pass@1": 0.94 }, - "0.2": { "pass@1": 0.91 }, - "0.3": { "pass@1": 0.9 }, - "0.4": { "pass@1": 0.9 }, - "0.5": { "pass@1": 0.9 }, - "0.6": { "pass@1": 0.9 }, - "0.7": { "pass@1": 0.9 }, - "0.8": { "pass@1": 0.9 }, - "0.9": { "pass@1": 0.86 }, - "1.0": { "pass@1": 0.5 } + "0.0": { "pass@1": 0.91 }, + "0.1": { "pass@1": 0.86 }, + "0.2": { "pass@1": 0.85 }, + "0.3": { "pass@1": 0.82 }, + "0.4": { "pass@1": 0.81 }, + "0.5": { "pass@1": 0.76 }, + "0.6": { "pass@1": 0.75 }, + "0.7": { "pass@1": 0.72 }, + "0.8": { "pass@1": 0.7 }, + "0.9": { "pass@1": 0.61 }, + "1.0": { "pass@1": 0.35 } }, "rust": { - "0.0": { "pass@1": 0.88 }, - "0.1": { "pass@1": 0.79 }, - "0.2": { "pass@1": 0.78 }, - "0.3": { "pass@1": 0.77 }, - "0.4": { "pass@1": 0.76 }, - "0.5": { "pass@1": 0.76 }, + "0.0": { "pass@1": 0.9 }, + "0.1": { "pass@1": 0.86 }, + "0.2": { "pass@1": 0.82 }, + "0.3": { "pass@1": 0.79 }, + "0.4": { "pass@1": 0.79 }, + "0.5": { "pass@1": 0.77 }, "0.6": { "pass@1": 0.74 }, - "0.7": { "pass@1": 0.74 }, - "0.8": { "pass@1": 0.74 }, - "0.9": { "pass@1": 0.73 }, - "1.0": { "pass@1": 0.71 } + "0.7": { "pass@1": 0.71 }, + "0.8": { "pass@1": 0.68 }, + "0.9": { "pass@1": 0.65 }, + "1.0": { "pass@1": 0.6 } } } }, - "deepseek-ai/DeepSeek-V2-Lite-Chat": { - "eval_date": "2024-05-19 08:01:09.773213", - "train_size": "32k", + "CohereForAI/c4ai-command-r-plus": { + "eval_date": "2024-05-13 01:38:17.550466", + "train_size": "128k", "scores": { "all": { - "0.0": { "pass@1": 0.786 }, - "0.1": { "pass@1": 0.666 }, - "0.2": { "pass@1": 0.624 }, - "0.3": { "pass@1": 0.574 }, - "0.4": { "pass@1": 0.544 }, - "0.5": { "pass@1": 0.522 }, - "0.6": { "pass@1": 0.476 }, - "0.7": { "pass@1": 0.446 }, - "0.8": { "pass@1": 0.416 }, - "0.9": { "pass@1": 0.346 }, - "1.0": { "pass@1": 0.256 } + "0.0": { "pass@1": 0.93 }, + "0.1": { "pass@1": 0.874 }, + "0.2": { "pass@1": 0.862 }, + "0.3": { "pass@1": 0.846 }, + "0.4": { "pass@1": 0.838 }, + "0.5": { "pass@1": 0.828 }, + "0.6": { "pass@1": 0.808 }, + "0.7": { "pass@1": 0.796 }, + "0.8": { "pass@1": 0.784 }, + "0.9": { "pass@1": 0.75 }, + "1.0": { "pass@1": 0.624 } }, "python": { - "0.0": { "pass@1": 0.83 }, - "0.1": { "pass@1": 0.65 }, - "0.2": { "pass@1": 0.6 }, - "0.3": { "pass@1": 0.58 }, - "0.4": { "pass@1": 0.51 }, - "0.5": { "pass@1": 0.51 }, - "0.6": { "pass@1": 0.44 }, - "0.7": { "pass@1": 0.41 }, - "0.8": { "pass@1": 0.39 }, - "0.9": { "pass@1": 0.33 }, - "1.0": { "pass@1": 0.25 } + "0.0": { "pass@1": 0.94 }, + "0.1": { "pass@1": 0.86 }, + "0.2": { "pass@1": 0.85 }, + "0.3": { "pass@1": 0.84 }, + "0.4": { "pass@1": 0.84 }, + "0.5": { "pass@1": 0.84 }, + "0.6": { "pass@1": 0.82 }, + "0.7": { "pass@1": 0.82 }, + "0.8": { "pass@1": 0.81 }, + "0.9": { "pass@1": 0.79 }, + "1.0": { "pass@1": 0.73 } }, "cpp": { - "0.0": { "pass@1": 0.75 }, - "0.1": { "pass@1": 0.59 }, - "0.2": { "pass@1": 0.53 }, - "0.3": { "pass@1": 0.5 }, - "0.4": { "pass@1": 0.49 }, - "0.5": { "pass@1": 0.46 }, - "0.6": { "pass@1": 0.42 }, - "0.7": { "pass@1": 0.38 }, - "0.8": { "pass@1": 0.37 }, - "0.9": { "pass@1": 0.3 }, - "1.0": { "pass@1": 0.23 } + "0.0": { "pass@1": 0.86 }, + "0.1": { "pass@1": 0.8 }, + "0.2": { "pass@1": 0.79 }, + "0.3": { "pass@1": 0.77 }, + "0.4": { "pass@1": 0.77 }, + "0.5": { "pass@1": 0.77 }, + "0.6": { "pass@1": 0.75 }, + "0.7": { "pass@1": 0.74 }, + "0.8": { "pass@1": 0.74 }, + "0.9": { "pass@1": 0.71 }, + "1.0": { "pass@1": 0.61 } }, "java": { - "0.0": { "pass@1": 0.79 }, - "0.1": { "pass@1": 0.65 }, - "0.2": { "pass@1": 0.64 }, - "0.3": { "pass@1": 0.54 }, - "0.4": { "pass@1": 0.52 }, - "0.5": { "pass@1": 0.52 }, - "0.6": { "pass@1": 0.48 }, - "0.7": { "pass@1": 0.46 }, - "0.8": { "pass@1": 0.41 }, - "0.9": { "pass@1": 0.38 }, - "1.0": { "pass@1": 0.3 } + "0.0": { "pass@1": 0.96 }, + "0.1": { "pass@1": 0.93 }, + "0.2": { "pass@1": 0.9 }, + "0.3": { "pass@1": 0.89 }, + "0.4": { "pass@1": 0.88 }, + "0.5": { "pass@1": 0.87 }, + "0.6": { "pass@1": 0.87 }, + "0.7": { "pass@1": 0.87 }, + "0.8": { "pass@1": 0.84 }, + "0.9": { "pass@1": 0.79 }, + "1.0": { "pass@1": 0.74 } }, "typescript": { - "0.0": { "pass@1": 0.84 }, - "0.1": { "pass@1": 0.79 }, - "0.2": { "pass@1": 0.75 }, - "0.3": { "pass@1": 0.69 }, - "0.4": { "pass@1": 0.66 }, - "0.5": { "pass@1": 0.6 }, - "0.6": { "pass@1": 0.53 }, - "0.7": { "pass@1": 0.5 }, - "0.8": { "pass@1": 0.46 }, - "0.9": { "pass@1": 0.34 }, - "1.0": { "pass@1": 0.17 } + "0.0": { "pass@1": 1.0 }, + "0.1": { "pass@1": 0.96 }, + "0.2": { "pass@1": 0.95 }, + "0.3": { "pass@1": 0.92 }, + "0.4": { "pass@1": 0.9 }, + "0.5": { "pass@1": 0.87 }, + "0.6": { "pass@1": 0.83 }, + "0.7": { "pass@1": 0.78 }, + "0.8": { "pass@1": 0.77 }, + "0.9": { "pass@1": 0.71 }, + "1.0": { "pass@1": 0.36 } }, "rust": { - "0.0": { "pass@1": 0.72 }, - "0.1": { "pass@1": 0.65 }, - "0.2": { "pass@1": 0.6 }, - "0.3": { "pass@1": 0.56 }, - "0.4": { "pass@1": 0.54 }, - "0.5": { "pass@1": 0.52 }, - "0.6": { "pass@1": 0.51 }, - "0.7": { "pass@1": 0.48 }, - "0.8": { "pass@1": 0.45 }, - "0.9": { "pass@1": 0.38 }, - "1.0": { "pass@1": 0.33 } + "0.0": { "pass@1": 0.89 }, + "0.1": { "pass@1": 0.82 }, + "0.2": { "pass@1": 0.82 }, + "0.3": { "pass@1": 0.81 }, + "0.4": { "pass@1": 0.8 }, + "0.5": { "pass@1": 0.79 }, + "0.6": { "pass@1": 0.77 }, + "0.7": { "pass@1": 0.77 }, + "0.8": { "pass@1": 0.76 }, + "0.9": { "pass@1": 0.75 }, + "1.0": { "pass@1": 0.68 } } } }, - "meta-llama/Meta-Llama-3-70B-Instruct": { - "eval_date": "2024-05-13 01:39:02.863882", + "meta-llama/Meta-Llama-3-8B-Instruct": { + "eval_date": "2024-05-13 01:39:08.064606", "train_size": "8k", "scores": { "all": { - "0.0": { "pass@1": 0.922 }, - "0.1": { "pass@1": 0.894 }, - "0.2": { "pass@1": 0.876 }, - "0.3": { "pass@1": 0.86 }, - "0.4": { "pass@1": 0.85 }, - "0.5": { "pass@1": 0.846 }, - "0.6": { "pass@1": 0.838 }, - "0.7": { "pass@1": 0.83 }, - "0.8": { "pass@1": 0.822 }, - "0.9": { "pass@1": 0.762 }, - "1.0": { "pass@1": 0.576 } + "0.0": { "pass@1": 0.788 }, + "0.1": { "pass@1": 0.712 }, + "0.2": { "pass@1": 0.688 }, + "0.3": { "pass@1": 0.672 }, + "0.4": { "pass@1": 0.646 }, + "0.5": { "pass@1": 0.63 }, + "0.6": { "pass@1": 0.614 }, + "0.7": { "pass@1": 0.59 }, + "0.8": { "pass@1": 0.536 }, + "0.9": { "pass@1": 0.458 }, + "1.0": { "pass@1": 0.302 } }, "python": { - "0.0": { "pass@1": 0.93 }, - "0.1": { "pass@1": 0.89 }, - "0.2": { "pass@1": 0.88 }, - "0.3": { "pass@1": 0.88 }, - "0.4": { "pass@1": 0.86 }, - "0.5": { "pass@1": 0.86 }, - "0.6": { "pass@1": 0.86 }, - "0.7": { "pass@1": 0.85 }, - "0.8": { "pass@1": 0.83 }, - "0.9": { "pass@1": 0.76 }, - "1.0": { "pass@1": 0.61 } + "0.0": { "pass@1": 0.86 }, + "0.1": { "pass@1": 0.77 }, + "0.2": { "pass@1": 0.76 }, + "0.3": { "pass@1": 0.74 }, + "0.4": { "pass@1": 0.69 }, + "0.5": { "pass@1": 0.66 }, + "0.6": { "pass@1": 0.66 }, + "0.7": { "pass@1": 0.63 }, + "0.8": { "pass@1": 0.54 }, + "0.9": { "pass@1": 0.48 }, + "1.0": { "pass@1": 0.3 } }, "cpp": { - "0.0": { "pass@1": 0.82 }, - "0.1": { "pass@1": 0.78 }, - "0.2": { "pass@1": 0.75 }, - "0.3": { "pass@1": 0.72 }, - "0.4": { "pass@1": 0.72 }, - "0.5": { "pass@1": 0.71 }, - "0.6": { "pass@1": 0.71 }, - "0.7": { "pass@1": 0.7 }, - "0.8": { "pass@1": 0.7 }, - "0.9": { "pass@1": 0.62 }, - "1.0": { "pass@1": 0.51 } + "0.0": { "pass@1": 0.7 }, + "0.1": { "pass@1": 0.65 }, + "0.2": { "pass@1": 0.59 }, + "0.3": { "pass@1": 0.58 }, + "0.4": { "pass@1": 0.57 }, + "0.5": { "pass@1": 0.56 }, + "0.6": { "pass@1": 0.55 }, + "0.7": { "pass@1": 0.51 }, + "0.8": { "pass@1": 0.48 }, + "0.9": { "pass@1": 0.43 }, + "1.0": { "pass@1": 0.34 } }, "java": { - "0.0": { "pass@1": 0.97 }, - "0.1": { "pass@1": 0.97 }, - "0.2": { "pass@1": 0.94 }, - "0.3": { "pass@1": 0.93 }, - "0.4": { "pass@1": 0.92 }, - "0.5": { "pass@1": 0.91 }, - "0.6": { "pass@1": 0.89 }, - "0.7": { "pass@1": 0.88 }, - "0.8": { "pass@1": 0.86 }, - "0.9": { "pass@1": 0.81 }, - "1.0": { "pass@1": 0.72 } + "0.0": { "pass@1": 0.77 }, + "0.1": { "pass@1": 0.66 }, + "0.2": { "pass@1": 0.64 }, + "0.3": { "pass@1": 0.63 }, + "0.4": { "pass@1": 0.63 }, + "0.5": { "pass@1": 0.61 }, + "0.6": { "pass@1": 0.6 }, + "0.7": { "pass@1": 0.58 }, + "0.8": { "pass@1": 0.53 }, + "0.9": { "pass@1": 0.49 }, + "1.0": { "pass@1": 0.38 } }, "typescript": { - "0.0": { "pass@1": 0.99 }, - "0.1": { "pass@1": 0.96 }, - "0.2": { "pass@1": 0.95 }, - "0.3": { "pass@1": 0.95 }, - "0.4": { "pass@1": 0.93 }, - "0.5": { "pass@1": 0.93 }, - "0.6": { "pass@1": 0.92 }, - "0.7": { "pass@1": 0.91 }, - "0.8": { "pass@1": 0.91 }, - "0.9": { "pass@1": 0.83 }, - "1.0": { "pass@1": 0.38 } + "0.0": { "pass@1": 0.9 }, + "0.1": { "pass@1": 0.86 }, + "0.2": { "pass@1": 0.84 }, + "0.3": { "pass@1": 0.81 }, + "0.4": { "pass@1": 0.76 }, + "0.5": { "pass@1": 0.75 }, + "0.6": { "pass@1": 0.71 }, + "0.7": { "pass@1": 0.69 }, + "0.8": { "pass@1": 0.62 }, + "0.9": { "pass@1": 0.45 }, + "1.0": { "pass@1": 0.16 } }, "rust": { - "0.0": { "pass@1": 0.9 }, - "0.1": { "pass@1": 0.87 }, - "0.2": { "pass@1": 0.86 }, - "0.3": { "pass@1": 0.82 }, - "0.4": { "pass@1": 0.82 }, - "0.5": { "pass@1": 0.82 }, - "0.6": { "pass@1": 0.81 }, - "0.7": { "pass@1": 0.81 }, - "0.8": { "pass@1": 0.81 }, - "0.9": { "pass@1": 0.79 }, - "1.0": { "pass@1": 0.66 } + "0.0": { "pass@1": 0.71 }, + "0.1": { "pass@1": 0.62 }, + "0.2": { "pass@1": 0.61 }, + "0.3": { "pass@1": 0.6 }, + "0.4": { "pass@1": 0.58 }, + "0.5": { "pass@1": 0.57 }, + "0.6": { "pass@1": 0.55 }, + "0.7": { "pass@1": 0.54 }, + "0.8": { "pass@1": 0.51 }, + "0.9": { "pass@1": 0.44 }, + "1.0": { "pass@1": 0.33 } } } }, - "claude-3-sonnet-20240229": { - "eval_date": "2024-05-13 01:37:53.983726", - "train_size": "200k", + "Qwen/Qwen1.5-14B-Chat": { + "eval_date": "2024-05-13 01:39:46.042567", + "train_size": "32k", "scores": { "all": { - "0.0": { "pass@1": 0.95 }, - "0.1": { "pass@1": 0.92 }, - "0.2": { "pass@1": 0.908 }, - "0.3": { "pass@1": 0.9 }, - "0.4": { "pass@1": 0.9 }, - "0.5": { "pass@1": 0.898 }, - "0.6": { "pass@1": 0.882 }, - "0.7": { "pass@1": 0.88 }, - "0.8": { "pass@1": 0.874 }, - "0.9": { "pass@1": 0.86 }, - "1.0": { "pass@1": 0.774 } + "0.0": { "pass@1": 0.734 }, + "0.1": { "pass@1": 0.616 }, + "0.2": { "pass@1": 0.53 }, + "0.3": { "pass@1": 0.454 }, + "0.4": { "pass@1": 0.384 }, + "0.5": { "pass@1": 0.342 }, + "0.6": { "pass@1": 0.314 }, + "0.7": { "pass@1": 0.28 }, + "0.8": { "pass@1": 0.26 }, + "0.9": { "pass@1": 0.212 }, + "1.0": { "pass@1": 0.15 } }, "python": { - "0.0": { "pass@1": 0.98 }, + "0.0": { "pass@1": 0.64 }, + "0.1": { "pass@1": 0.52 }, + "0.2": { "pass@1": 0.36 }, + "0.3": { "pass@1": 0.22 }, + "0.4": { "pass@1": 0.1 }, + "0.5": { "pass@1": 0.1 }, + "0.6": { "pass@1": 0.09 }, + "0.7": { "pass@1": 0.04 }, + "0.8": { "pass@1": 0.04 }, + "0.9": { "pass@1": 0.04 }, + "1.0": { "pass@1": 0.04 } + }, + "cpp": { + "0.0": { "pass@1": 0.69 }, + "0.1": { "pass@1": 0.59 }, + "0.2": { "pass@1": 0.54 }, + "0.3": { "pass@1": 0.51 }, + "0.4": { "pass@1": 0.48 }, + "0.5": { "pass@1": 0.41 }, + "0.6": { "pass@1": 0.36 }, + "0.7": { "pass@1": 0.32 }, + "0.8": { "pass@1": 0.3 }, + "0.9": { "pass@1": 0.25 }, + "1.0": { "pass@1": 0.17 } + }, + "java": { + "0.0": { "pass@1": 0.84 }, + "0.1": { "pass@1": 0.68 }, + "0.2": { "pass@1": 0.61 }, + "0.3": { "pass@1": 0.56 }, + "0.4": { "pass@1": 0.47 }, + "0.5": { "pass@1": 0.43 }, + "0.6": { "pass@1": 0.4 }, + "0.7": { "pass@1": 0.38 }, + "0.8": { "pass@1": 0.36 }, + "0.9": { "pass@1": 0.29 }, + "1.0": { "pass@1": 0.23 } + }, + "rust": { + "0.0": { "pass@1": 0.75 }, + "0.1": { "pass@1": 0.65 }, + "0.2": { "pass@1": 0.54 }, + "0.3": { "pass@1": 0.44 }, + "0.4": { "pass@1": 0.39 }, + "0.5": { "pass@1": 0.34 }, + "0.6": { "pass@1": 0.32 }, + "0.7": { "pass@1": 0.28 }, + "0.8": { "pass@1": 0.26 }, + "0.9": { "pass@1": 0.24 }, + "1.0": { "pass@1": 0.22 } + }, + "typescript": { + "0.0": { "pass@1": 0.75 }, + "0.1": { "pass@1": 0.64 }, + "0.2": { "pass@1": 0.6 }, + "0.3": { "pass@1": 0.54 }, + "0.4": { "pass@1": 0.48 }, + "0.5": { "pass@1": 0.43 }, + "0.6": { "pass@1": 0.4 }, + "0.7": { "pass@1": 0.38 }, + "0.8": { "pass@1": 0.34 }, + "0.9": { "pass@1": 0.24 }, + "1.0": { "pass@1": 0.09 } + } + } + }, + "claude-3-haiku-20240307": { + "eval_date": "2024-05-13 01:37:42.931545", + "train_size": "200k", + "scores": { + "all": { + "0.0": { "pass@1": 0.926 }, + "0.1": { "pass@1": 0.876 }, + "0.2": { "pass@1": 0.858 }, + "0.3": { "pass@1": 0.854 }, + "0.4": { "pass@1": 0.846 }, + "0.5": { "pass@1": 0.84 }, + "0.6": { "pass@1": 0.826 }, + "0.7": { "pass@1": 0.822 }, + "0.8": { "pass@1": 0.818 }, + "0.9": { "pass@1": 0.798 }, + "1.0": { "pass@1": 0.706 } + }, + "python": { + "0.0": { "pass@1": 0.94 }, + "0.1": { "pass@1": 0.9 }, + "0.2": { "pass@1": 0.87 }, + "0.3": { "pass@1": 0.87 }, + "0.4": { "pass@1": 0.85 }, + "0.5": { "pass@1": 0.83 }, + "0.6": { "pass@1": 0.82 }, + "0.7": { "pass@1": 0.81 }, + "0.8": { "pass@1": 0.8 }, + "0.9": { "pass@1": 0.78 }, + "1.0": { "pass@1": 0.75 } + }, + "cpp": { + "0.0": { "pass@1": 0.88 }, + "0.1": { "pass@1": 0.82 }, + "0.2": { "pass@1": 0.81 }, + "0.3": { "pass@1": 0.81 }, + "0.4": { "pass@1": 0.8 }, + "0.5": { "pass@1": 0.79 }, + "0.6": { "pass@1": 0.76 }, + "0.7": { "pass@1": 0.75 }, + "0.8": { "pass@1": 0.75 }, + "0.9": { "pass@1": 0.73 }, + "1.0": { "pass@1": 0.7 } + }, + "java": { + "0.0": { "pass@1": 0.94 }, "0.1": { "pass@1": 0.93 }, - "0.2": { "pass@1": 0.93 }, - "0.3": { "pass@1": 0.93 }, - "0.4": { "pass@1": 0.93 }, + "0.2": { "pass@1": 0.92 }, + "0.3": { "pass@1": 0.92 }, + "0.4": { "pass@1": 0.92 }, "0.5": { "pass@1": 0.92 }, + "0.6": { "pass@1": 0.91 }, + "0.7": { "pass@1": 0.91 }, + "0.8": { "pass@1": 0.9 }, + "0.9": { "pass@1": 0.89 }, + "1.0": { "pass@1": 0.87 } + }, + "typescript": { + "0.0": { "pass@1": 0.99 }, + "0.1": { "pass@1": 0.94 }, + "0.2": { "pass@1": 0.91 }, + "0.3": { "pass@1": 0.9 }, + "0.4": { "pass@1": 0.9 }, + "0.5": { "pass@1": 0.9 }, "0.6": { "pass@1": 0.9 }, "0.7": { "pass@1": 0.9 }, - "0.8": { "pass@1": 0.88 }, - "0.9": { "pass@1": 0.87 }, - "1.0": { "pass@1": 0.87 } + "0.8": { "pass@1": 0.9 }, + "0.9": { "pass@1": 0.86 }, + "1.0": { "pass@1": 0.5 } + }, + "rust": { + "0.0": { "pass@1": 0.88 }, + "0.1": { "pass@1": 0.79 }, + "0.2": { "pass@1": 0.78 }, + "0.3": { "pass@1": 0.77 }, + "0.4": { "pass@1": 0.76 }, + "0.5": { "pass@1": 0.76 }, + "0.6": { "pass@1": 0.74 }, + "0.7": { "pass@1": 0.74 }, + "0.8": { "pass@1": 0.74 }, + "0.9": { "pass@1": 0.73 }, + "1.0": { "pass@1": 0.71 } + } + } + }, + "Qwen/CodeQwen1.5-7B-Chat": { + "eval_date": "2024-05-13 02:01:09.406097", + "train_size": "64k", + "scores": { + "all": { + "0.0": { "pass@1": 0.848 }, + "0.1": { "pass@1": 0.772 }, + "0.2": { "pass@1": 0.74 }, + "0.3": { "pass@1": 0.724 }, + "0.4": { "pass@1": 0.698 }, + "0.5": { "pass@1": 0.686 }, + "0.6": { "pass@1": 0.66 }, + "0.7": { "pass@1": 0.648 }, + "0.8": { "pass@1": 0.628 }, + "0.9": { "pass@1": 0.6 }, + "1.0": { "pass@1": 0.51 } + }, + "python": { + "0.0": { "pass@1": 0.92 }, + "0.1": { "pass@1": 0.81 }, + "0.2": { "pass@1": 0.77 }, + "0.3": { "pass@1": 0.77 }, + "0.4": { "pass@1": 0.76 }, + "0.5": { "pass@1": 0.75 }, + "0.6": { "pass@1": 0.71 }, + "0.7": { "pass@1": 0.69 }, + "0.8": { "pass@1": 0.69 }, + "0.9": { "pass@1": 0.67 }, + "1.0": { "pass@1": 0.63 } }, "cpp": { - "0.0": { "pass@1": 0.89 }, - "0.1": { "pass@1": 0.83 }, - "0.2": { "pass@1": 0.82 }, - "0.3": { "pass@1": 0.82 }, - "0.4": { "pass@1": 0.82 }, - "0.5": { "pass@1": 0.82 }, - "0.6": { "pass@1": 0.81 }, - "0.7": { "pass@1": 0.81 }, - "0.8": { "pass@1": 0.81 }, - "0.9": { "pass@1": 0.8 }, - "1.0": { "pass@1": 0.74 } + "0.0": { "pass@1": 0.69 }, + "0.1": { "pass@1": 0.6 }, + "0.2": { "pass@1": 0.58 }, + "0.3": { "pass@1": 0.57 }, + "0.4": { "pass@1": 0.54 }, + "0.5": { "pass@1": 0.53 }, + "0.6": { "pass@1": 0.52 }, + "0.7": { "pass@1": 0.5 }, + "0.8": { "pass@1": 0.47 }, + "0.9": { "pass@1": 0.46 }, + "1.0": { "pass@1": 0.4 } }, "java": { - "0.0": { "pass@1": 0.96 }, - "0.1": { "pass@1": 0.94 }, - "0.2": { "pass@1": 0.94 }, - "0.3": { "pass@1": 0.93 }, - "0.4": { "pass@1": 0.93 }, - "0.5": { "pass@1": 0.93 }, - "0.6": { "pass@1": 0.92 }, - "0.7": { "pass@1": 0.92 }, - "0.8": { "pass@1": 0.92 }, - "0.9": { "pass@1": 0.9 }, - "1.0": { "pass@1": 0.9 } + "0.0": { "pass@1": 0.93 }, + "0.1": { "pass@1": 0.87 }, + "0.2": { "pass@1": 0.85 }, + "0.3": { "pass@1": 0.84 }, + "0.4": { "pass@1": 0.81 }, + "0.5": { "pass@1": 0.8 }, + "0.6": { "pass@1": 0.78 }, + "0.7": { "pass@1": 0.77 }, + "0.8": { "pass@1": 0.74 }, + "0.9": { "pass@1": 0.71 }, + "1.0": { "pass@1": 0.68 } }, "typescript": { - "0.0": { "pass@1": 0.99 }, - "0.1": { "pass@1": 0.99 }, - "0.2": { "pass@1": 0.96 }, - "0.3": { "pass@1": 0.94 }, - "0.4": { "pass@1": 0.94 }, - "0.5": { "pass@1": 0.94 }, - "0.6": { "pass@1": 0.92 }, - "0.7": { "pass@1": 0.92 }, - "0.8": { "pass@1": 0.91 }, - "0.9": { "pass@1": 0.88 }, - "1.0": { "pass@1": 0.52 } + "0.0": { "pass@1": 0.87 }, + "0.1": { "pass@1": 0.84 }, + "0.2": { "pass@1": 0.8 }, + "0.3": { "pass@1": 0.76 }, + "0.4": { "pass@1": 0.75 }, + "0.5": { "pass@1": 0.73 }, + "0.6": { "pass@1": 0.72 }, + "0.7": { "pass@1": 0.71 }, + "0.8": { "pass@1": 0.67 }, + "0.9": { "pass@1": 0.6 }, + "1.0": { "pass@1": 0.32 } }, "rust": { - "0.0": { "pass@1": 0.93 }, - "0.1": { "pass@1": 0.91 }, - "0.2": { "pass@1": 0.89 }, - "0.3": { "pass@1": 0.88 }, - "0.4": { "pass@1": 0.88 }, - "0.5": { "pass@1": 0.88 }, - "0.6": { "pass@1": 0.86 }, - "0.7": { "pass@1": 0.85 }, - "0.8": { "pass@1": 0.85 }, - "0.9": { "pass@1": 0.85 }, - "1.0": { "pass@1": 0.84 } + "0.0": { "pass@1": 0.83 }, + "0.1": { "pass@1": 0.74 }, + "0.2": { "pass@1": 0.7 }, + "0.3": { "pass@1": 0.68 }, + "0.4": { "pass@1": 0.63 }, + "0.5": { "pass@1": 0.62 }, + "0.6": { "pass@1": 0.57 }, + "0.7": { "pass@1": 0.57 }, + "0.8": { "pass@1": 0.57 }, + "0.9": { "pass@1": 0.56 }, + "1.0": { "pass@1": 0.52 } } } }, - "deepseek-ai/deepseek-coder-33b-instruct": { - "eval_date": "2024-05-13 01:38:24.023742", - "train_size": "16k", + "claude-3-opus-20240229": { + "eval_date": "2024-05-13 01:37:48.414103", + "train_size": "200k", "scores": { "all": { - "0.0": { "pass@1": 0.68 }, - "0.1": { "pass@1": 0.58 }, - "0.2": { "pass@1": 0.568 }, - "0.3": { "pass@1": 0.552 }, - "0.4": { "pass@1": 0.542 }, - "0.5": { "pass@1": 0.528 }, - "0.6": { "pass@1": 0.512 }, - "0.7": { "pass@1": 0.5 }, - "0.8": { "pass@1": 0.484 }, - "0.9": { "pass@1": 0.438 }, - "1.0": { "pass@1": 0.356 } + "0.0": { "pass@1": 0.968 }, + "0.1": { "pass@1": 0.946 }, + "0.2": { "pass@1": 0.936 }, + "0.3": { "pass@1": 0.93 }, + "0.4": { "pass@1": 0.928 }, + "0.5": { "pass@1": 0.926 }, + "0.6": { "pass@1": 0.91 }, + "0.7": { "pass@1": 0.908 }, + "0.8": { "pass@1": 0.906 }, + "0.9": { "pass@1": 0.892 }, + "1.0": { "pass@1": 0.814 } }, "python": { - "0.0": { "pass@1": 0.76 }, - "0.1": { "pass@1": 0.63 }, - "0.2": { "pass@1": 0.62 }, - "0.3": { "pass@1": 0.62 }, - "0.4": { "pass@1": 0.62 }, - "0.5": { "pass@1": 0.62 }, - "0.6": { "pass@1": 0.61 }, - "0.7": { "pass@1": 0.6 }, - "0.8": { "pass@1": 0.59 }, - "0.9": { "pass@1": 0.57 }, - "1.0": { "pass@1": 0.54 } + "0.0": { "pass@1": 0.98 }, + "0.1": { "pass@1": 0.97 }, + "0.2": { "pass@1": 0.97 }, + "0.3": { "pass@1": 0.96 }, + "0.4": { "pass@1": 0.96 }, + "0.5": { "pass@1": 0.95 }, + "0.6": { "pass@1": 0.93 }, + "0.7": { "pass@1": 0.93 }, + "0.8": { "pass@1": 0.93 }, + "0.9": { "pass@1": 0.93 }, + "1.0": { "pass@1": 0.93 } }, "cpp": { - "0.0": { "pass@1": 0.65 }, - "0.1": { "pass@1": 0.54 }, - "0.2": { "pass@1": 0.51 }, - "0.3": { "pass@1": 0.49 }, - "0.4": { "pass@1": 0.49 }, - "0.5": { "pass@1": 0.48 }, - "0.6": { "pass@1": 0.47 }, - "0.7": { "pass@1": 0.47 }, - "0.8": { "pass@1": 0.44 }, - "0.9": { "pass@1": 0.36 }, - "1.0": { "pass@1": 0.32 } + "0.0": { "pass@1": 0.89 }, + "0.1": { "pass@1": 0.86 }, + "0.2": { "pass@1": 0.86 }, + "0.3": { "pass@1": 0.86 }, + "0.4": { "pass@1": 0.86 }, + "0.5": { "pass@1": 0.86 }, + "0.6": { "pass@1": 0.83 }, + "0.7": { "pass@1": 0.83 }, + "0.8": { "pass@1": 0.83 }, + "0.9": { "pass@1": 0.83 }, + "1.0": { "pass@1": 0.79 } }, "java": { - "0.0": { "pass@1": 0.7 }, - "0.1": { "pass@1": 0.65 }, - "0.2": { "pass@1": 0.63 }, - "0.3": { "pass@1": 0.62 }, - "0.4": { "pass@1": 0.61 }, - "0.5": { "pass@1": 0.61 }, - "0.6": { "pass@1": 0.59 }, - "0.7": { "pass@1": 0.57 }, - "0.8": { "pass@1": 0.53 }, - "0.9": { "pass@1": 0.47 }, - "1.0": { "pass@1": 0.4 } - }, - "rust": { - "0.0": { "pass@1": 0.45 }, - "0.1": { "pass@1": 0.31 }, - "0.2": { "pass@1": 0.31 }, - "0.3": { "pass@1": 0.28 }, - "0.4": { "pass@1": 0.27 }, - "0.5": { "pass@1": 0.27 }, - "0.6": { "pass@1": 0.26 }, - "0.7": { "pass@1": 0.23 }, - "0.8": { "pass@1": 0.23 }, - "0.9": { "pass@1": 0.21 }, - "1.0": { "pass@1": 0.18 } + "0.0": { "pass@1": 0.98 }, + "0.1": { "pass@1": 0.98 }, + "0.2": { "pass@1": 0.96 }, + "0.3": { "pass@1": 0.96 }, + "0.4": { "pass@1": 0.96 }, + "0.5": { "pass@1": 0.96 }, + "0.6": { "pass@1": 0.95 }, + "0.7": { "pass@1": 0.95 }, + "0.8": { "pass@1": 0.95 }, + "0.9": { "pass@1": 0.94 }, + "1.0": { "pass@1": 0.94 } }, "typescript": { - "0.0": { "pass@1": 0.84 }, - "0.1": { "pass@1": 0.77 }, - "0.2": { "pass@1": 0.77 }, - "0.3": { "pass@1": 0.75 }, - "0.4": { "pass@1": 0.72 }, - "0.5": { "pass@1": 0.66 }, - "0.6": { "pass@1": 0.63 }, - "0.7": { "pass@1": 0.63 }, - "0.8": { "pass@1": 0.63 }, - "0.9": { "pass@1": 0.58 }, - "1.0": { "pass@1": 0.34 } + "0.0": { "pass@1": 1.0 }, + "0.1": { "pass@1": 0.98 }, + "0.2": { "pass@1": 0.97 }, + "0.3": { "pass@1": 0.96 }, + "0.4": { "pass@1": 0.96 }, + "0.5": { "pass@1": 0.96 }, + "0.6": { "pass@1": 0.95 }, + "0.7": { "pass@1": 0.95 }, + "0.8": { "pass@1": 0.94 }, + "0.9": { "pass@1": 0.88 }, + "1.0": { "pass@1": 0.53 } + }, + "rust": { + "0.0": { "pass@1": 0.99 }, + "0.1": { "pass@1": 0.94 }, + "0.2": { "pass@1": 0.92 }, + "0.3": { "pass@1": 0.91 }, + "0.4": { "pass@1": 0.9 }, + "0.5": { "pass@1": 0.9 }, + "0.6": { "pass@1": 0.89 }, + "0.7": { "pass@1": 0.88 }, + "0.8": { "pass@1": 0.88 }, + "0.9": { "pass@1": 0.88 }, + "1.0": { "pass@1": 0.88 } } } }, - "mistralai/Mistral-7B-Instruct-v0.1": { - "eval_date": "2024-05-13 01:39:19.607214", - "train_size": "32k", + "ise-uiuc/Magicoder-S-DS-6.7B": { + "eval_date": "2024-05-13 01:38:57.693249", + "train_size": "16k", "scores": { "all": { - "0.0": { "pass@1": 0.576 }, - "0.1": { "pass@1": 0.286 }, - "0.2": { "pass@1": 0.214 }, - "0.3": { "pass@1": 0.172 }, - "0.4": { "pass@1": 0.162 }, - "0.5": { "pass@1": 0.148 }, - "0.6": { "pass@1": 0.132 }, - "0.7": { "pass@1": 0.12 }, - "0.8": { "pass@1": 0.11 }, - "0.9": { "pass@1": 0.094 }, - "1.0": { "pass@1": 0.068 } + "0.0": { "pass@1": 0.5 }, + "0.1": { "pass@1": 0.35 }, + "0.2": { "pass@1": 0.332 }, + "0.3": { "pass@1": 0.298 }, + "0.4": { "pass@1": 0.282 }, + "0.5": { "pass@1": 0.27 }, + "0.6": { "pass@1": 0.262 }, + "0.7": { "pass@1": 0.254 }, + "0.8": { "pass@1": 0.232 }, + "0.9": { "pass@1": 0.198 }, + "1.0": { "pass@1": 0.146 } }, "python": { - "0.0": { "pass@1": 0.61 }, - "0.1": { "pass@1": 0.25 }, - "0.2": { "pass@1": 0.18 }, - "0.3": { "pass@1": 0.14 }, - "0.4": { "pass@1": 0.14 }, - "0.5": { "pass@1": 0.14 }, - "0.6": { "pass@1": 0.11 }, - "0.7": { "pass@1": 0.1 }, - "0.8": { "pass@1": 0.1 }, - "0.9": { "pass@1": 0.1 }, - "1.0": { "pass@1": 0.07 } + "0.0": { "pass@1": 0.63 }, + "0.1": { "pass@1": 0.45 }, + "0.2": { "pass@1": 0.44 }, + "0.3": { "pass@1": 0.37 }, + "0.4": { "pass@1": 0.35 }, + "0.5": { "pass@1": 0.32 }, + "0.6": { "pass@1": 0.31 }, + "0.7": { "pass@1": 0.3 }, + "0.8": { "pass@1": 0.27 }, + "0.9": { "pass@1": 0.26 }, + "1.0": { "pass@1": 0.21 } }, "cpp": { - "0.0": { "pass@1": 0.43 }, - "0.1": { "pass@1": 0.21 }, - "0.2": { "pass@1": 0.13 }, - "0.3": { "pass@1": 0.12 }, - "0.4": { "pass@1": 0.11 }, - "0.5": { "pass@1": 0.1 }, - "0.6": { "pass@1": 0.1 }, - "0.7": { "pass@1": 0.1 }, - "0.8": { "pass@1": 0.09 }, - "0.9": { "pass@1": 0.09 }, - "1.0": { "pass@1": 0.09 } + "0.0": { "pass@1": 0.48 }, + "0.1": { "pass@1": 0.34 }, + "0.2": { "pass@1": 0.32 }, + "0.3": { "pass@1": 0.31 }, + "0.4": { "pass@1": 0.28 }, + "0.5": { "pass@1": 0.27 }, + "0.6": { "pass@1": 0.25 }, + "0.7": { "pass@1": 0.24 }, + "0.8": { "pass@1": 0.21 }, + "0.9": { "pass@1": 0.17 }, + "1.0": { "pass@1": 0.14 } }, "java": { - "0.0": { "pass@1": 0.64 }, + "0.0": { "pass@1": 0.51 }, "0.1": { "pass@1": 0.39 }, - "0.2": { "pass@1": 0.3 }, - "0.3": { "pass@1": 0.22 }, - "0.4": { "pass@1": 0.2 }, - "0.5": { "pass@1": 0.17 }, - "0.6": { "pass@1": 0.16 }, - "0.7": { "pass@1": 0.13 }, - "0.8": { "pass@1": 0.11 }, - "0.9": { "pass@1": 0.08 }, - "1.0": { "pass@1": 0.05 } - }, - "rust": { - "0.0": { "pass@1": 0.54 }, - "0.1": { "pass@1": 0.21 }, - "0.2": { "pass@1": 0.19 }, - "0.3": { "pass@1": 0.16 }, - "0.4": { "pass@1": 0.16 }, - "0.5": { "pass@1": 0.15 }, - "0.6": { "pass@1": 0.14 }, - "0.7": { "pass@1": 0.12 }, - "0.8": { "pass@1": 0.1 }, - "0.9": { "pass@1": 0.08 }, - "1.0": { "pass@1": 0.08 } + "0.2": { "pass@1": 0.35 }, + "0.3": { "pass@1": 0.3 }, + "0.4": { "pass@1": 0.29 }, + "0.5": { "pass@1": 0.28 }, + "0.6": { "pass@1": 0.28 }, + "0.7": { "pass@1": 0.27 }, + "0.8": { "pass@1": 0.25 }, + "0.9": { "pass@1": 0.22 }, + "1.0": { "pass@1": 0.17 } }, "typescript": { - "0.0": { "pass@1": 0.66 }, - "0.1": { "pass@1": 0.37 }, - "0.2": { "pass@1": 0.27 }, - "0.3": { "pass@1": 0.22 }, - "0.4": { "pass@1": 0.2 }, - "0.5": { "pass@1": 0.18 }, - "0.6": { "pass@1": 0.15 }, - "0.7": { "pass@1": 0.15 }, - "0.8": { "pass@1": 0.15 }, - "0.9": { "pass@1": 0.12 }, - "1.0": { "pass@1": 0.05 } + "0.0": { "pass@1": 0.61 }, + "0.1": { "pass@1": 0.48 }, + "0.2": { "pass@1": 0.47 }, + "0.3": { "pass@1": 0.43 }, + "0.4": { "pass@1": 0.41 }, + "0.5": { "pass@1": 0.41 }, + "0.6": { "pass@1": 0.4 }, + "0.7": { "pass@1": 0.39 }, + "0.8": { "pass@1": 0.36 }, + "0.9": { "pass@1": 0.28 }, + "1.0": { "pass@1": 0.15 } + }, + "rust": { + "0.0": { "pass@1": 0.27 }, + "0.1": { "pass@1": 0.09 }, + "0.2": { "pass@1": 0.08 }, + "0.3": { "pass@1": 0.08 }, + "0.4": { "pass@1": 0.08 }, + "0.5": { "pass@1": 0.07 }, + "0.6": { "pass@1": 0.07 }, + "0.7": { "pass@1": 0.07 }, + "0.8": { "pass@1": 0.07 }, + "0.9": { "pass@1": 0.06 }, + "1.0": { "pass@1": 0.06 } } } }