diff --git a/.changeset/good-falcons-applaud.md b/.changeset/good-falcons-applaud.md
new file mode 100644
index 00000000..7b431a5c
--- /dev/null
+++ b/.changeset/good-falcons-applaud.md
@@ -0,0 +1,5 @@
+---
+"@empiricalrun/scorer": patch
+---
+
+feat: py-script scorer has a simpler return type
diff --git a/docs/scoring/python.mdx b/docs/scoring/python.mdx
index b10e69ce..0e4ff4fd 100644
--- a/docs/scoring/python.mdx
+++ b/docs/scoring/python.mdx
@@ -10,7 +10,8 @@ the `scorers` section of the configuration. The `path` key should be the path to
 "scorers": [
   {
     "type": "py-script",
-    "path": "eval.py"
+    "path": "score.py",
+    "name": "my-custom-scorer"
   }
 ]
 ```
@@ -21,17 +22,32 @@ In the script, you need to define an `evaluate` method, with the following signa
   - output: dict with key `value` to get the output value (string) and key `metadata` to get metadata (dict)
   - inputs: dict of key-value pairs from the dataset sample
 - **Returns**
-  - List of results: each result is dict with score (0 or 1), message (string) and name (string)
+  - List of results: each result is dict with score (number between 0 to 1), message (optional, string) and name (optional, string)
 
-```python
+```python score.py
 def evaluate(output, inputs):
-    # ...
+    model_response = output["value"]
+    metadata = output["metadata"]
+    # ... score the model response
+    return {
+        "score": 1,
+        "message": "Optional reasoning for this score"
+    }
+```
+
+## Multiple scores
+
+It is possible for the Python script to return an array of scores. Use `name` to distinguish
+between them.
+
+```python score.py
+def evaluate(output, inputs):
+    model_response = output["value"]
+    metadata = output["metadata"]
+    # ... score the model response
     return [
-        {
-            "score": 1,
-            "message": "Reason for this score",
-            "name": "name-for-this-scorer"
-        }
+      { "score": 1, "name": "syntax-score" },
+      { "score": 0, "name": "semantic-score", "message": "failure reason"}
     ]
 ```
 
diff --git a/examples/humaneval/score.py b/examples/humaneval/score.py
index 7891c303..1c1bb374 100644
--- a/examples/humaneval/score.py
+++ b/examples/humaneval/score.py
@@ -22,4 +22,4 @@ def evaluate(output, inputs):
     except Exception as e:
         passed, reason = 0, repr(e)
     
-    return [{"score": passed, "message": reason, "name": "unit-tests"}]
+    return {"score": passed, "message": reason}
diff --git a/packages/scorer/src/provider/deterministic/script.test.ts b/packages/scorer/src/provider/deterministic/script.test.ts
index ccc9b22c..4f84efc6 100644
--- a/packages/scorer/src/provider/deterministic/script.test.ts
+++ b/packages/scorer/src/provider/deterministic/script.test.ts
@@ -1,8 +1,9 @@
-import { DatasetSample } from "@empiricalrun/types";
+import { DatasetSample, RunOutput, Scorer } from "@empiricalrun/types";
 import { expect, test } from "vitest";
 import { scoreWithPythonScript } from "./script";
+import score from "../../index";
 
-const humanEval = {
+const humanEvalSample = {
   output:
     "def truncate_number(number):\n    integer_part = int(number)\n    decimal_part = number - integer_part\n    return decimal_part",
   test: "\n\nMETADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate(3.5) == 0.5\n    assert abs(candidate(1.33) - 0.33) < 1e-6\n    assert abs(candidate(123.456) - 0.456) < 1e-6\n",
@@ -11,94 +12,88 @@ const humanEval = {
 
 // Using relative path to use the python script from HumanEval example
 // Tests run out of the $root/packages/evals directory
-const scriptPath = "../../examples/humaneval/score.py";
+const humanEvalScriptPath = "../../examples/humaneval/score.py";
 
-test("script scorer works for a correct humaneval output", async () => {
+test("py-script scorer works for a correct humaneval output", async () => {
   const sample: DatasetSample = {
     id: "1",
     inputs: {
-      test: humanEval.test,
-      entry_point: humanEval.funcName,
+      test: humanEvalSample.test,
+      entry_point: humanEvalSample.funcName,
     },
   };
-
   expect(
     await scoreWithPythonScript({
       sample,
       output: {
-        value: humanEval.output,
+        value: humanEvalSample.output,
       },
       config: {
         type: "py-script",
-        path: scriptPath,
+        name: "unit-tests",
+        path: humanEvalScriptPath,
       },
     }),
-  ).toStrictEqual([
-    {
-      score: 1,
-      name: "unit-tests",
-      message: "Tests passed",
-    },
-  ]);
+  ).toStrictEqual({
+    score: 1,
+    name: "unit-tests",
+    message: "Tests passed",
+  });
 });
 
-test("script scorer works for a incorrect humaneval output", async () => {
+test("py-script scorer works for a incorrect humaneval output", async () => {
   const sample: DatasetSample = {
     id: "1",
     inputs: {
-      test: humanEval.test,
-      entry_point: humanEval.funcName + "123", // wrong function name
+      test: humanEvalSample.test,
+      entry_point: humanEvalSample.funcName + "123", // wrong function name
     },
   };
-
   expect(
     await scoreWithPythonScript({
       sample,
-      output: { value: humanEval.output },
+      output: { value: humanEvalSample.output },
       config: {
         type: "py-script",
-        path: scriptPath,
+        name: "unit-tests",
+        path: humanEvalScriptPath,
       },
     }),
-  ).toStrictEqual([
-    {
-      score: 0,
-      name: "unit-tests",
-      message: "NameError(\"name 'truncate_number123' is not defined\")",
-    },
-  ]);
+  ).toStrictEqual({
+    score: 0,
+    name: "unit-tests",
+    message: "NameError(\"name 'truncate_number123' is not defined\")",
+  });
 });
 
-test("script scorer works for a humaneval output that has backticks", async () => {
+test("py-script scorer works for a humaneval output that has backticks", async () => {
   const sample: DatasetSample = {
     id: "1",
     inputs: {
-      test: humanEval.test,
-      entry_point: humanEval.funcName,
+      test: humanEvalSample.test,
+      entry_point: humanEvalSample.funcName,
     },
   };
-
   expect(
     await scoreWithPythonScript({
       sample,
       output: {
-        value: "```python\n" + humanEval.output + "\n```",
+        value: "```python\n" + humanEvalSample.output + "\n```",
       },
       config: {
         type: "py-script",
-        path: scriptPath,
+        name: "unit-tests",
+        path: humanEvalScriptPath,
       },
     }),
-  ).toStrictEqual([
-    {
-      score: 1,
-      name: "unit-tests",
-      message: "Tests passed",
-    },
-  ]);
+  ).toStrictEqual({
+    score: 1,
+    name: "unit-tests",
+    message: "Tests passed",
+  });
 });
 
-test("script scorer times out a long running script", async () => {
+test("py-script scorer times out a long running script", async () => {
   const sample: DatasetSample = {
     id: "0",
     inputs: {},
@@ -122,7 +117,7 @@ test("script scorer times out a long running script", async () => {
   ]);
 }, 21000);
 
-test("script scorer works with a python script that throws", async () => {
+test("py-script scorer works with a python script that throws", async () => {
   const sample: DatasetSample = {
     id: "0",
     inputs: {},
@@ -136,7 +131,6 @@ test("script scorer works with a python script that throws", async () => {
       path: scriptWithError,
     },
   });
-
   expect(score).toEqual(
     expect.objectContaining({
       score: 0,
@@ -144,3 +138,46 @@ test("script scorer works with a python script that throws", async () => {
     }),
   );
 });
+
+test("py-script scorer works when returning array of scores", async () => {
+  const sample: DatasetSample = {
+    id: "0",
+    inputs: {},
+  };
+  const output: RunOutput = {
+    value: "output",
+  };
+  const scorer: Scorer = {
+    type: "py-script",
+    path: __dirname + "/test-assets/returns_array_of_scores.py",
+    name: "score-name",
+  };
+  const result = await score({ sample, output, scorers: [scorer] });
+  expect(result.length).toBe(2);
+  expect(result[0].score).toBe(1);
+  expect(result[0].name).toBe("score_1");
+  expect(result[0].message).toBe(undefined);
+  expect(result[1].score).toBe(0);
+  expect(result[1].name).toBe("score_2");
+  expect(result[1].message).toBe("why this failed");
+});
+
+test("py-script scorer works when returning single score without name", async () => {
+  const sample: DatasetSample = {
+    id: "0",
+    inputs: {},
+  };
+  const output: RunOutput = {
+    value: "output",
+  };
+  const scorer: Scorer = {
+    type: "py-script",
+    path: __dirname + "/test-assets/returns_single_score.py",
+    name: "single-score",
+  };
+  const result = await score({ sample, output, scorers: [scorer] });
+  expect(result.length).toBe(1);
+  expect(result[0].score).toBe(1);
+  expect(result[0].message).toBe(undefined);
+  expect(result[0].name).toBe("single-score");
+});
diff --git a/packages/scorer/src/provider/deterministic/script.ts b/packages/scorer/src/provider/deterministic/script.ts
index e3bdafaf..5b022c21 100644
--- a/packages/scorer/src/provider/deterministic/script.ts
+++ b/packages/scorer/src/provider/deterministic/script.ts
@@ -92,6 +92,13 @@ export const scoreWithPythonScript: ScoringFn = async ({
     });
   });
 
-  const result = runOutput[runOutput.length - 1];
-  return JSON.parse(result!);
+  const rawResult = runOutput[runOutput.length - 1];
+  let result = JSON.parse(rawResult!);
+  if (!Array.isArray(result)) {
+    result = {
+      name: config.name || name,
+      ...result,
+    };
+  }
+  return result;
 };
diff --git a/packages/scorer/src/provider/deterministic/test-assets/returns_array_of_scores.py b/packages/scorer/src/provider/deterministic/test-assets/returns_array_of_scores.py
new file mode 100644
index 00000000..0ef1dd89
--- /dev/null
+++ b/packages/scorer/src/provider/deterministic/test-assets/returns_array_of_scores.py
@@ -0,0 +1,5 @@
+def evaluate(output, inputs):
+    return [
+        {"score": 1, "name": "score_1"},
+        {"score": 0, "name": "score_2", "message": "why this failed"},
+    ]
diff --git a/packages/scorer/src/provider/deterministic/test-assets/returns_single_score.py b/packages/scorer/src/provider/deterministic/test-assets/returns_single_score.py
new file mode 100644
index 00000000..3331cea3
--- /dev/null
+++ b/packages/scorer/src/provider/deterministic/test-assets/returns_single_score.py
@@ -0,0 +1,2 @@
+def evaluate(output, inputs):
+    return { "score": 1 }