diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
index 5451efbe6..bf6400117 100644
--- a/DEVELOPMENT.md
+++ b/DEVELOPMENT.md
@@ -65,6 +65,11 @@ make run-ci V=1
 
 ## Running Tests
 
+To install the libraries required for testing:
+```bash
+pip install -e ".[test]"
+```
+
 To run the test suite:
 
 ```bash
diff --git a/src/ragas/evaluation.py b/src/ragas/evaluation.py
index cfef3e7d4..bb59def68 100644
--- a/src/ragas/evaluation.py
+++ b/src/ragas/evaluation.py
@@ -154,6 +154,17 @@ def evaluate(
     if dataset is None:
         raise ValueError("Provide dataset!")
 
+    # Check metrics are correct type
+    if not isinstance(metrics, None | list):
+        raise TypeError(
+            "Metrics should be provded in a list, e.g: metrics=[BleuScore()]"
+        )
+
+    if isinstance(metrics, list) and any(not isinstance(m, Metric) for m in metrics):
+        raise TypeError(
+            "All metrics must be initialised metric objects, e.g: metrics=[BleuScore(), AspectCritic()]"
+        )
+
     # default metrics
     if metrics is None:
         from ragas.metrics import (