From 6f01285c8ecfc1136fdef28181cf381fcbd0a0ae Mon Sep 17 00:00:00 2001
From: SwayamInSync <hawkempire007@gmail.com>
Date: Thu, 8 Jan 2026 15:24:04 +0000
Subject: [PATCH 1/4] opus wrote the content

---
 quaddtype/docs/api/constants_api.md      | 144 +++++++++++++
 quaddtype/docs/api/core.md               | 195 +++++++++++++++++
 quaddtype/docs/api/functions.md          | 215 ++++++++++++++++++
 quaddtype/docs/api/index.md              |  12 ++
 quaddtype/docs/api/utilities.md          | 137 ++++++++++++
 quaddtype/docs/api_docs.rst              |   7 -
 quaddtype/docs/changelog.md              |  56 +++++
 quaddtype/docs/conf.py                   |  68 ++++++
 quaddtype/docs/contributing.md           | 159 ++++++++++++++
 quaddtype/docs/index.md                  | 108 +++++++++-
 quaddtype/docs/installation.md           | 153 +++++++++++++
 quaddtype/docs/user_guide/arrays.md      | 230 ++++++++++++++++++++
 quaddtype/docs/user_guide/backends.md    | 150 +++++++++++++
 quaddtype/docs/user_guide/constants.md   | 139 ++++++++++++
 quaddtype/docs/user_guide/functions.md   | 251 +++++++++++++++++++++
 quaddtype/docs/user_guide/index.md       |  15 ++
 quaddtype/docs/user_guide/performance.md | 263 +++++++++++++++++++++++
 quaddtype/docs/user_guide/precision.md   | 128 +++++++++++
 quaddtype/docs/user_guide/threading.md   | 195 +++++++++++++++++
 quaddtype/pyproject.toml                 |   2 +
 20 files changed, 2618 insertions(+), 9 deletions(-)
 create mode 100644 quaddtype/docs/api/constants_api.md
 create mode 100644 quaddtype/docs/api/core.md
 create mode 100644 quaddtype/docs/api/functions.md
 create mode 100644 quaddtype/docs/api/index.md
 create mode 100644 quaddtype/docs/api/utilities.md
 delete mode 100644 quaddtype/docs/api_docs.rst
 create mode 100644 quaddtype/docs/changelog.md
 create mode 100644 quaddtype/docs/contributing.md
 create mode 100644 quaddtype/docs/installation.md
 create mode 100644 quaddtype/docs/user_guide/arrays.md
 create mode 100644 quaddtype/docs/user_guide/backends.md
 create mode 100644 quaddtype/docs/user_guide/constants.md
 create mode 100644 quaddtype/docs/user_guide/functions.md
 create mode 100644 quaddtype/docs/user_guide/index.md
 create mode 100644 quaddtype/docs/user_guide/performance.md
 create mode 100644 quaddtype/docs/user_guide/precision.md
 create mode 100644 quaddtype/docs/user_guide/threading.md

diff --git a/quaddtype/docs/api/constants_api.md b/quaddtype/docs/api/constants_api.md
new file mode 100644
index 00000000..cc82753c
--- /dev/null
+++ b/quaddtype/docs/api/constants_api.md
@@ -0,0 +1,144 @@
+# Constants Reference
+
+Pre-defined mathematical constants with quad precision accuracy.
+
+## Mathematical Constants
+
+```{eval-rst}
+.. data:: numpy_quaddtype.pi
+
+   The mathematical constant π (pi).
+   
+   Value: 3.14159265358979323846264338327950288...
+   
+   :type: QuadPrecision
+
+.. data:: numpy_quaddtype.e
+
+   Euler's number, the base of natural logarithms.
+   
+   Value: 2.71828182845904523536028747135266249...
+   
+   :type: QuadPrecision
+
+.. data:: numpy_quaddtype.log2e
+
+   The base-2 logarithm of e: log₂(e).
+   
+   Value: 1.44269504088896340735992468100189213...
+   
+   :type: QuadPrecision
+
+.. data:: numpy_quaddtype.log10e
+
+   The base-10 logarithm of e: log₁₀(e).
+   
+   Value: 0.43429448190325182765112891891660508...
+   
+   :type: QuadPrecision
+
+.. data:: numpy_quaddtype.ln2
+
+   The natural logarithm of 2: ln(2).
+   
+   Value: 0.69314718055994530941723212145817656...
+   
+   :type: QuadPrecision
+
+.. data:: numpy_quaddtype.ln10
+
+   The natural logarithm of 10: ln(10).
+   
+   Value: 2.30258509299404568401799145468436420...
+   
+   :type: QuadPrecision
+```
+
+## Type Limits
+
+```{eval-rst}
+.. data:: numpy_quaddtype.epsilon
+
+   Machine epsilon: the smallest positive number such that 1.0 + epsilon ≠ 1.0.
+   
+   Approximately 1.93 × 10⁻³⁴.
+   
+   :type: QuadPrecision
+
+.. data:: numpy_quaddtype.max_value
+
+   The largest representable finite quad-precision value.
+   
+   Approximately 1.19 × 10⁴⁹³².
+   
+   :type: QuadPrecision
+
+.. data:: numpy_quaddtype.smallest_normal
+
+   The smallest positive normalized quad-precision value.
+   
+   Approximately 3.36 × 10⁻⁴⁹³².
+   
+   :type: QuadPrecision
+
+.. data:: numpy_quaddtype.smallest_subnormal
+
+   The smallest positive subnormal (denormalized) quad-precision value.
+   
+   :type: QuadPrecision
+
+.. data:: numpy_quaddtype.resolution
+
+   The approximate decimal resolution of quad precision.
+   
+   :type: QuadPrecision
+```
+
+## Type Information
+
+```{eval-rst}
+.. data:: numpy_quaddtype.bits
+
+   Total number of bits in quad precision representation.
+   
+   :value: 128
+   :type: int
+
+.. data:: numpy_quaddtype.precision
+
+   Approximate number of significant decimal digits.
+   
+   :value: 33
+   :type: int
+```
+
+## Example Usage
+
+```python
+from numpy_quaddtype import (
+    pi, e, log2e, log10e, ln2, ln10,
+    epsilon, max_value, smallest_normal,
+    bits, precision
+)
+
+# Mathematical constants
+print(f"π = {pi}")
+print(f"e = {e}")
+
+# Verify relationships
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+# e^(ln2) should equal 2
+two = np.exp(np.array([ln2]))[0]
+print(f"e^(ln2) = {two}")
+
+# log2(e) * ln(2) should equal 1
+one = log2e * ln2
+print(f"log2(e) × ln(2) = {one}")
+
+# Type limits
+print(f"\nQuad precision uses {bits} bits")
+print(f"Approximately {precision} decimal digits of precision")
+print(f"Machine epsilon: {epsilon}")
+```
diff --git a/quaddtype/docs/api/core.md b/quaddtype/docs/api/core.md
new file mode 100644
index 00000000..713a84de
--- /dev/null
+++ b/quaddtype/docs/api/core.md
@@ -0,0 +1,195 @@
+# Core Types
+
+The fundamental types provided by NumPy QuadDType.
+
+## QuadPrecision
+
+```{eval-rst}
+.. class:: numpy_quaddtype.QuadPrecision(value, backend="sleef")
+
+   A quad-precision (128-bit) floating-point scalar.
+   
+   QuadPrecision is a NumPy scalar type that provides IEEE 754 binary128
+   floating-point arithmetic. It can be used standalone or as elements
+   of NumPy arrays.
+   
+   :param value: The value to convert to quad precision. Can be:
+       
+       - ``float`` or ``int``: Python numeric types
+       - ``str``: String representation for maximum precision
+       - ``bytes``: Raw 16-byte representation
+       - ``numpy.floating`` or ``numpy.integer``: NumPy numeric types
+       - ``QuadPrecision``: Another QuadPrecision value
+       
+   :type value: float, int, str, bytes, numpy scalar, or QuadPrecision
+   :param backend: Computation backend to use. Either ``"sleef"`` (default) 
+       or ``"longdouble"``.
+   :type backend: str, optional
+   
+   **Examples**
+   
+   Create from different input types::
+   
+       >>> from numpy_quaddtype import QuadPrecision
+       >>> QuadPrecision(3.14)
+       QuadPrecision('3.14000000000000012434...')
+       >>> QuadPrecision("3.14159265358979323846264338327950288")
+       QuadPrecision('3.14159265358979323846264338327950288')
+       >>> QuadPrecision(42)
+       QuadPrecision('42.0')
+   
+   Arithmetic operations::
+   
+       >>> x = QuadPrecision("1.5")
+       >>> y = QuadPrecision("2.5")
+       >>> x + y
+       QuadPrecision('4.0')
+       >>> x * y
+       QuadPrecision('3.75')
+   
+   .. attribute:: dtype
+      :type: QuadPrecDType
+      
+      The NumPy dtype for this scalar.
+   
+   .. attribute:: real
+      :type: QuadPrecision
+      
+      The real part (returns self for real numbers).
+   
+   .. attribute:: imag
+      :type: QuadPrecision
+      
+      The imaginary part (always zero for QuadPrecision).
+```
+
+## QuadPrecDType
+
+```{eval-rst}
+.. class:: numpy_quaddtype.QuadPrecDType(backend="sleef")
+
+   NumPy dtype for quad-precision floating-point arrays.
+   
+   QuadPrecDType is a custom NumPy dtype that enables creation and
+   manipulation of arrays containing quad-precision values.
+   
+   :param backend: Computation backend. Either ``"sleef"`` (default) or
+       ``"longdouble"``.
+   :type backend: str, optional
+   
+   **Examples**
+   
+   Create arrays with QuadPrecDType::
+   
+       >>> import numpy as np
+       >>> from numpy_quaddtype import QuadPrecDType
+       >>> arr = np.array([1, 2, 3], dtype=QuadPrecDType())
+       >>> arr.dtype
+       QuadPrecDType128
+       >>> np.zeros(5, dtype=QuadPrecDType())
+       array([0.0, 0.0, 0.0, 0.0, 0.0], dtype=QuadPrecDType128)
+   
+   .. attribute:: backend
+      :type: QuadBackend
+      
+      The computation backend (SLEEF or LONGDOUBLE).
+   
+   .. attribute:: itemsize
+      :type: int
+      
+      Size of each element in bytes (always 16).
+   
+   .. attribute:: alignment
+      :type: int
+      
+      Memory alignment in bytes (always 16).
+   
+   .. attribute:: name
+      :type: str
+      
+      String name of the dtype (``"QuadPrecDType128"``).
+```
+
+## QuadBackend
+
+```{eval-rst}
+.. class:: numpy_quaddtype.QuadBackend
+
+   Enumeration of available computation backends.
+   
+   .. attribute:: SLEEF
+      :value: 0
+      
+      SLEEF library backend (default). Provides true IEEE 754 binary128
+      quad precision with SIMD optimization.
+   
+   .. attribute:: LONGDOUBLE
+      :value: 1
+      
+      Platform's native long double backend. Precision varies by platform.
+   
+   **Example**
+   
+   ::
+   
+       >>> from numpy_quaddtype import QuadPrecDType, QuadBackend
+       >>> dtype = QuadPrecDType()
+       >>> dtype.backend == QuadBackend.SLEEF
+       True
+```
+
+## Convenience Functions
+
+### SleefQuadPrecision
+
+```{eval-rst}
+.. function:: numpy_quaddtype.SleefQuadPrecision(value)
+
+   Create a QuadPrecision scalar using the SLEEF backend.
+   
+   Equivalent to ``QuadPrecision(value, backend="sleef")``.
+   
+   :param value: Value to convert to quad precision.
+   :return: Quad precision scalar using SLEEF backend.
+   :rtype: QuadPrecision
+```
+
+### LongDoubleQuadPrecision
+
+```{eval-rst}
+.. function:: numpy_quaddtype.LongDoubleQuadPrecision(value)
+
+   Create a QuadPrecision scalar using the longdouble backend.
+   
+   Equivalent to ``QuadPrecision(value, backend="longdouble")``.
+   
+   :param value: Value to convert to quad precision.
+   :return: Quad precision scalar using longdouble backend.
+   :rtype: QuadPrecision
+```
+
+### SleefQuadPrecDType
+
+```{eval-rst}
+.. function:: numpy_quaddtype.SleefQuadPrecDType()
+
+   Create a QuadPrecDType using the SLEEF backend.
+   
+   Equivalent to ``QuadPrecDType(backend="sleef")``.
+   
+   :return: Dtype for SLEEF-backed quad precision arrays.
+   :rtype: QuadPrecDType
+```
+
+### LongDoubleQuadPrecDType
+
+```{eval-rst}
+.. function:: numpy_quaddtype.LongDoubleQuadPrecDType()
+
+   Create a QuadPrecDType using the longdouble backend.
+   
+   Equivalent to ``QuadPrecDType(backend="longdouble")``.
+   
+   :return: Dtype for longdouble-backed quad precision arrays.
+   :rtype: QuadPrecDType
+```
diff --git a/quaddtype/docs/api/functions.md b/quaddtype/docs/api/functions.md
new file mode 100644
index 00000000..ef352702
--- /dev/null
+++ b/quaddtype/docs/api/functions.md
@@ -0,0 +1,215 @@
+# Supported NumPy Functions
+
+NumPy QuadDType supports a comprehensive set of NumPy universal functions (ufuncs) and array functions.
+
+## Arithmetic Operations
+
+### Binary Arithmetic
+
+| Function | Operator | Description |
+|----------|----------|-------------|
+| `np.add` | `+` | Element-wise addition |
+| `np.subtract` | `-` | Element-wise subtraction |
+| `np.multiply` | `*` | Element-wise multiplication |
+| `np.divide` | `/` | Element-wise division |
+| `np.true_divide` | `/` | Element-wise true division |
+| `np.floor_divide` | `//` | Element-wise floor division |
+| `np.mod` | `%` | Element-wise modulo |
+| `np.power` | `**` | Element-wise power |
+
+### Unary Arithmetic
+
+| Function | Operator | Description |
+|----------|----------|-------------|
+| `np.negative` | `-x` | Numerical negative |
+| `np.positive` | `+x` | Numerical positive |
+| `np.absolute` | `abs(x)` | Absolute value |
+| `np.sign` | - | Sign indicator |
+
+## Trigonometric Functions
+
+### Standard Trigonometric
+
+| Function | Description |
+|----------|-------------|
+| `np.sin` | Sine |
+| `np.cos` | Cosine |
+| `np.tan` | Tangent |
+
+### Inverse Trigonometric
+
+| Function | Description |
+|----------|-------------|
+| `np.arcsin` | Inverse sine |
+| `np.arccos` | Inverse cosine |
+| `np.arctan` | Inverse tangent |
+| `np.arctan2` | Two-argument inverse tangent |
+
+### Hyperbolic Functions
+
+| Function | Description |
+|----------|-------------|
+| `np.sinh` | Hyperbolic sine |
+| `np.cosh` | Hyperbolic cosine |
+| `np.tanh` | Hyperbolic tangent |
+| `np.arcsinh` | Inverse hyperbolic sine |
+| `np.arccosh` | Inverse hyperbolic cosine |
+| `np.arctanh` | Inverse hyperbolic tangent |
+
+## Exponential and Logarithmic
+
+### Exponential
+
+| Function | Description |
+|----------|-------------|
+| `np.exp` | Exponential (e^x) |
+| `np.exp2` | Base-2 exponential (2^x) |
+| `np.expm1` | exp(x) - 1 (accurate for small x) |
+
+### Logarithmic
+
+| Function | Description |
+|----------|-------------|
+| `np.log` | Natural logarithm |
+| `np.log2` | Base-2 logarithm |
+| `np.log10` | Base-10 logarithm |
+| `np.log1p` | log(1 + x) (accurate for small x) |
+
+## Power and Root Functions
+
+| Function | Description |
+|----------|-------------|
+| `np.sqrt` | Square root |
+| `np.cbrt` | Cube root |
+| `np.square` | Square (x²) |
+| `np.hypot` | Hypotenuse (√(x² + y²)) |
+
+## Comparison Functions
+
+### Element-wise Comparison
+
+| Function | Operator | Description |
+|----------|----------|-------------|
+| `np.equal` | `==` | Equal |
+| `np.not_equal` | `!=` | Not equal |
+| `np.less` | `<` | Less than |
+| `np.less_equal` | `<=` | Less than or equal |
+| `np.greater` | `>` | Greater than |
+| `np.greater_equal` | `>=` | Greater than or equal |
+
+### Min/Max
+
+| Function | Description |
+|----------|-------------|
+| `np.minimum` | Element-wise minimum |
+| `np.maximum` | Element-wise maximum |
+| `np.fmin` | Element-wise minimum (ignores NaN) |
+| `np.fmax` | Element-wise maximum (ignores NaN) |
+
+## Rounding Functions
+
+| Function | Description |
+|----------|-------------|
+| `np.floor` | Floor (round down) |
+| `np.ceil` | Ceiling (round up) |
+| `np.trunc` | Truncate toward zero |
+| `np.rint` | Round to nearest integer |
+
+## Special Value Functions
+
+| Function | Description |
+|----------|-------------|
+| `np.isfinite` | Test for finite values |
+| `np.isinf` | Test for infinity |
+| `np.isnan` | Test for NaN |
+| `np.signbit` | Test for negative sign bit |
+| `np.copysign` | Copy sign of second to first |
+
+## Reduction Functions
+
+| Function | Description |
+|----------|-------------|
+| `np.sum` | Sum of elements |
+| `np.prod` | Product of elements |
+| `np.mean` | Arithmetic mean |
+| `np.min` / `np.amin` | Minimum value |
+| `np.max` / `np.amax` | Maximum value |
+
+## Array Creation and Manipulation
+
+### Creation
+
+| Function | Description |
+|----------|-------------|
+| `np.zeros` | Array of zeros |
+| `np.ones` | Array of ones |
+| `np.empty` | Uninitialized array |
+| `np.full` | Array filled with value |
+| `np.arange` | Range of values |
+| `np.linspace` | Linearly spaced values |
+
+### Manipulation
+
+| Function | Description |
+|----------|-------------|
+| `np.reshape` | Reshape array |
+| `np.transpose` | Transpose array |
+| `np.concatenate` | Join arrays |
+| `np.stack` | Stack arrays |
+| `np.split` | Split array |
+
+## Linear Algebra (via QuadBLAS)
+
+When QuadBLAS is available (not on Windows):
+
+| Function | Description |
+|----------|-------------|
+| `np.dot` | Dot product |
+| `np.matmul` / `@` | Matrix multiplication |
+
+## Type Conversion
+
+| Function | Description |
+|----------|-------------|
+| `np.astype` | Convert array dtype |
+| `np.array` | Create array from data |
+
+## Usage Examples
+
+### Trigonometric
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType, pi
+
+x = np.array([0, float(pi)/6, float(pi)/4, float(pi)/3, float(pi)/2], 
+             dtype=QuadPrecDType())
+
+print("sin(x):", np.sin(x))
+print("cos(x):", np.cos(x))
+```
+
+### Exponential and Logarithmic
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+x = np.array([1, 2, 3], dtype=QuadPrecDType())
+
+print("exp(x):", np.exp(x))
+print("log(exp(x)):", np.log(np.exp(x)))  # Should return x
+```
+
+### Reductions
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+arr = np.arange(1, 11, dtype=QuadPrecDType())
+
+print("Sum:", np.sum(arr))        # 55
+print("Product:", np.prod(arr))   # 3628800 (10!)
+print("Mean:", np.mean(arr))      # 5.5
+```
diff --git a/quaddtype/docs/api/index.md b/quaddtype/docs/api/index.md
new file mode 100644
index 00000000..90c27e15
--- /dev/null
+++ b/quaddtype/docs/api/index.md
@@ -0,0 +1,12 @@
+# API Reference
+
+Complete API documentation for NumPy QuadDType.
+
+```{toctree}
+:maxdepth: 2
+
+core
+functions
+constants_api
+utilities
+```
diff --git a/quaddtype/docs/api/utilities.md b/quaddtype/docs/api/utilities.md
new file mode 100644
index 00000000..e47f0846
--- /dev/null
+++ b/quaddtype/docs/api/utilities.md
@@ -0,0 +1,137 @@
+# Utility Functions
+
+Helper functions for platform detection and threading control.
+
+## Platform Detection
+
+```{eval-rst}
+.. function:: numpy_quaddtype.is_longdouble_128()
+
+   Check if the platform's ``long double`` type is 128-bit.
+   
+   This is useful for determining whether the longdouble backend provides
+   true quad precision on the current platform.
+   
+   :return: ``True`` if ``long double`` is 128-bit, ``False`` otherwise.
+   :rtype: bool
+   
+   **Platform behavior:**
+   
+   - Linux x86_64: Returns ``False`` (80-bit extended precision)
+   - Linux aarch64: Returns ``True`` (128-bit quad precision)
+   - macOS (all): Returns ``False`` (64-bit double precision)
+   - Windows (all): Returns ``False`` (64-bit double precision)
+   
+   **Example**
+   
+   ::
+   
+       >>> from numpy_quaddtype import is_longdouble_128
+       >>> if is_longdouble_128():
+       ...     print("Native quad precision available via longdouble")
+       ... else:
+       ...     print("Use SLEEF backend for quad precision")
+```
+
+## Threading Control
+
+These functions control the number of threads used by QuadBLAS for parallel operations.
+
+```{eval-rst}
+.. function:: numpy_quaddtype.set_num_threads(n)
+
+   Set the number of threads used by QuadBLAS operations.
+   
+   :param n: Number of threads to use. Must be a positive integer.
+   :type n: int
+   :raises ValueError: If n is not a positive integer.
+   
+   **Example**
+   
+   ::
+   
+       >>> from numpy_quaddtype import set_num_threads, get_num_threads
+       >>> set_num_threads(4)
+       >>> get_num_threads()
+       4
+   
+   .. note::
+      
+      This function has no effect if QuadBLAS is disabled (e.g., on Windows).
+
+.. function:: numpy_quaddtype.get_num_threads()
+
+   Get the current number of threads used by QuadBLAS.
+   
+   :return: Current thread count for QuadBLAS operations.
+   :rtype: int
+   
+   **Example**
+   
+   ::
+   
+       >>> from numpy_quaddtype import get_num_threads
+       >>> get_num_threads()
+       4
+
+.. function:: numpy_quaddtype.get_quadblas_version()
+
+   Get the QuadBLAS library version string.
+   
+   :return: Version string if QuadBLAS is available, ``None`` otherwise.
+   :rtype: str or None
+   
+   **Example**
+   
+   ::
+   
+       >>> from numpy_quaddtype import get_quadblas_version
+       >>> version = get_quadblas_version()
+       >>> if version:
+       ...     print(f"QuadBLAS version: {version}")
+       ... else:
+       ...     print("QuadBLAS not available")
+   
+   .. note::
+      
+      QuadBLAS is automatically disabled on Windows builds due to MSVC
+      compatibility issues. In this case, the function returns ``None``.
+```
+
+## Example: Optimizing Thread Usage
+
+```python
+import numpy as np
+from numpy_quaddtype import (
+    QuadPrecDType, 
+    set_num_threads, 
+    get_num_threads,
+    get_quadblas_version
+)
+
+# Check QuadBLAS availability
+version = get_quadblas_version()
+if version:
+    print(f"QuadBLAS {version} available")
+    
+    # Get current threads
+    print(f"Default threads: {get_num_threads()}")
+    
+    # Create test array
+    arr = np.random.randn(100000).astype(QuadPrecDType())
+    
+    # Benchmark with different thread counts
+    import time
+    
+    for threads in [1, 2, 4, 8]:
+        set_num_threads(threads)
+        
+        start = time.time()
+        for _ in range(10):
+            result = np.dot(arr, arr)
+        elapsed = time.time() - start
+        
+        print(f"  {threads} threads: {elapsed:.3f}s")
+else:
+    print("QuadBLAS not available - single-threaded operations only")
+```
diff --git a/quaddtype/docs/api_docs.rst b/quaddtype/docs/api_docs.rst
deleted file mode 100644
index b167a85a..00000000
--- a/quaddtype/docs/api_docs.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-NumPy QuadDType API Documentation
-=================================
-
-.. automodule:: numpy_quaddtype
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/quaddtype/docs/changelog.md b/quaddtype/docs/changelog.md
new file mode 100644
index 00000000..a599b071
--- /dev/null
+++ b/quaddtype/docs/changelog.md
@@ -0,0 +1,56 @@
+# Changelog
+
+All notable changes to NumPy QuadDType will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [0.2.0] - 2025
+
+### Added
+
+- Full type stub support (`.pyi` files) for static type checking
+- Type-safe API with mypy and pyright compatibility
+- `QuadBackend` enum for backend type checking
+- Pre-defined mathematical constants (`pi`, `e`, `log2e`, etc.)
+- Type limit constants (`epsilon`, `max_value`, `smallest_normal`, etc.)
+- QuadBLAS threading control functions
+- Windows support (with QBLAS disabled)
+- Free-threading (GIL-free) support for Python 3.13+
+- Comprehensive test suite with thread safety tests
+
+### Changed
+
+- Improved string representation of QuadPrecision values
+- Better error messages for invalid operations
+- Enhanced documentation
+
+### Fixed
+
+- Memory alignment issues on certain platforms
+- Thread safety in scalar operations
+
+## [0.1.0] - 2024
+
+### Added
+
+- Initial release
+- `QuadPrecision` scalar type
+- `QuadPrecDType` NumPy dtype
+- SLEEF backend for cross-platform quad precision
+- Longdouble backend for native support
+- Basic arithmetic operations
+- Trigonometric functions (sin, cos, tan, etc.)
+- Exponential and logarithmic functions
+- Comparison operations
+- Array broadcasting support
+- Linux and macOS wheel builds
+
+## Unreleased
+
+### Planned
+
+- Complex quad precision support
+- Additional linear algebra functions
+- GPU acceleration exploration
+- Improved performance for small arrays
diff --git a/quaddtype/docs/conf.py b/quaddtype/docs/conf.py
index 2f6243ab..02819c8a 100644
--- a/quaddtype/docs/conf.py
+++ b/quaddtype/docs/conf.py
@@ -9,6 +9,8 @@
 project = 'NumPy QuadDType'
 copyright = '2025, NumPy Community'
 author = 'NumPy Community'
+release = '0.2.0'
+version = '0.2.0'
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
@@ -16,14 +18,80 @@
 extensions = [
     'sphinx.ext.autodoc',
     'sphinx.ext.napoleon',
+    'sphinx.ext.viewcode',
+    'sphinx.ext.intersphinx',
     'myst_parser',
+    'sphinx_design',
+    'sphinx_copybutton',
 ]
 
 templates_path = ['_templates']
 exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 
+# -- MyST configuration ------------------------------------------------------
+myst_enable_extensions = [
+    "colon_fence",
+    "deflist",
+    "fieldlist",
+]
+
+# -- Intersphinx configuration -----------------------------------------------
+intersphinx_mapping = {
+    'python': ('https://docs.python.org/3', None),
+    'numpy': ('https://numpy.org/doc/stable/', None),
+}
+
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
 html_theme = 'pydata_sphinx_theme'
 
+html_theme_options = {
+    "github_url": "https://github.com/numpy/numpy-user-dtypes",
+    "show_toc_level": 2,
+    "navbar_align": "left",
+    "navbar_end": ["theme-switcher", "navbar-icon-links"],
+    "icon_links": [
+        {
+            "name": "PyPI",
+            "url": "https://pypi.org/project/numpy-quaddtype/",
+            "icon": "fa-brands fa-python",
+        },
+    ],
+    "logo": {
+        "text": "NumPy QuadDType",
+    },
+    "footer_start": ["copyright"],
+    "footer_end": ["theme-version"],
+    "secondary_sidebar_items": ["page-toc", "edit-this-page"],
+    "pygments_light_style": "default",
+    "pygments_dark_style": "monokai",
+}
+
+html_context = {
+    "github_user": "numpy",
+    "github_repo": "numpy-user-dtypes",
+    "github_version": "main",
+    "doc_path": "quaddtype/docs",
+}
+
+html_sidebars = {
+    "**": ["sidebar-nav-bs", "sidebar-ethical-ads"],
+}
+
+# -- Copy button configuration -----------------------------------------------
+copybutton_prompt_text = r">>> |\.\.\. |\$ "
+copybutton_prompt_is_regexp = True
+
+# -- Autodoc configuration ---------------------------------------------------
+autodoc_default_options = {
+    'members': True,
+    'undoc-members': True,
+    'show-inheritance': True,
+}
+
+# -- Napoleon configuration --------------------------------------------------
+napoleon_google_docstring = True
+napoleon_numpy_docstring = True
+napoleon_include_init_with_doc = True
+
diff --git a/quaddtype/docs/contributing.md b/quaddtype/docs/contributing.md
new file mode 100644
index 00000000..4ad4d09a
--- /dev/null
+++ b/quaddtype/docs/contributing.md
@@ -0,0 +1,159 @@
+# Contributing
+
+We welcome contributions to NumPy QuadDType! This guide will help you get started.
+
+## Development Setup
+
+### Prerequisites
+
+- Python 3.11+
+- GCC or Clang compiler
+- CMake ≥ 3.15
+- Git
+
+### Setting Up the Development Environment
+
+```bash
+# Clone the repository
+git clone https://github.com/numpy/numpy-user-dtypes.git
+cd numpy-user-dtypes/quaddtype
+
+# Create virtual environment
+python -m venv venv
+source venv/bin/activate  # or `venv\Scripts\activate` on Windows
+
+# Install NumPy (development version)
+pip install "numpy @ git+https://github.com/numpy/numpy.git"
+
+# Install development dependencies
+pip install -e ".[test,docs]" -v --no-build-isolation
+```
+
+## Running Tests
+
+```bash
+# Run all tests
+pytest tests/
+
+# Run with verbose output
+pytest -v tests/
+
+# Run specific test file
+pytest tests/test_basic.py
+
+# Run with parallel execution
+pytest -n auto tests/  # requires pytest-xdist
+```
+
+## Code Style
+
+We follow standard Python conventions:
+
+- **PEP 8** for Python code style
+- **Type hints** for public APIs
+- **Docstrings** for all public functions and classes
+
+### Type Checking
+
+```bash
+# Run mypy
+mypy numpy_quaddtype/
+
+# Run pyright
+pyright numpy_quaddtype/
+```
+
+## Building Documentation
+
+```bash
+# Install documentation dependencies
+pip install ".[docs]"
+
+# Build HTML documentation
+cd docs/
+make html
+
+# View locally
+python -m http.server --directory _build/html
+```
+
+## Making Changes
+
+### 1. Create a Branch
+
+```bash
+git checkout -b feature/my-new-feature
+```
+
+### 2. Make Your Changes
+
+- Write code with tests
+- Add docstrings
+- Update documentation if needed
+
+### 3. Run Tests
+
+```bash
+pytest tests/
+```
+
+### 4. Submit a Pull Request
+
+- Push your branch to GitHub
+- Open a pull request against `main`
+- Fill out the PR template
+- Wait for review
+
+## Project Structure
+
+```
+quaddtype/
+├── docs/               # Documentation (Sphinx)
+├── numpy_quaddtype/    # Python package
+│   ├── __init__.py     # Public API
+│   ├── __init__.pyi    # Type stubs
+│   ├── _quaddtype_main.pyi  # C extension stubs
+│   └── src/            # C source files
+├── tests/              # Test suite
+├── subprojects/        # Meson subprojects (SLEEF)
+├── meson.build         # Build configuration
+└── pyproject.toml      # Package metadata
+```
+
+## C Extension Development
+
+The core functionality is implemented in C. Key files:
+
+- `numpy_quaddtype/src/quaddtype_main.c` - Main extension module
+- `numpy_quaddtype/src/scalar.c` - QuadPrecision scalar implementation
+- `numpy_quaddtype/src/dtype.c` - QuadPrecDType implementation
+- `numpy_quaddtype/src/umath.c` - Universal function implementations
+
+### Building the C Extension
+
+```bash
+# Rebuild after C changes
+pip install . -v --no-build-isolation
+
+# With debug symbols
+CFLAGS="-g -O0" pip install . -v --no-build-isolation
+```
+
+## Reporting Issues
+
+When reporting bugs, please include:
+
+1. Operating system and version
+2. Python version
+3. NumPy version
+4. NumPy-QuadDType version
+5. Minimal code to reproduce the issue
+6. Full error traceback
+
+## Code of Conduct
+
+This project follows the [NumPy Code of Conduct](https://numpy.org/code-of-conduct/).
+
+## License
+
+By contributing to NumPy QuadDType, you agree that your contributions will be licensed under the BSD-3-Clause License.
diff --git a/quaddtype/docs/index.md b/quaddtype/docs/index.md
index c19eba45..16678697 100644
--- a/quaddtype/docs/index.md
+++ b/quaddtype/docs/index.md
@@ -1,8 +1,112 @@
-```{include} ../README.md
+# NumPy QuadDType
+
+```{image} https://img.shields.io/pypi/v/numpy-quaddtype.svg
+:target: https://pypi.org/project/numpy-quaddtype/
+:alt: PyPI version
+```
+```{image} https://img.shields.io/pypi/pyversions/numpy-quaddtype.svg
+:alt: Python versions
+```
+
+**A cross-platform 128-bit (quadruple precision) floating-point data type for NumPy.**
+
+NumPy QuadDType provides IEEE 754 quadruple-precision (binary128) floating-point arithmetic as a first-class NumPy dtype, enabling high-precision numerical computations that go beyond the standard 64-bit double precision.
+
+## Key Features
+
+::::{grid} 1 1 2 3
+:gutter: 2
+
+:::{grid-item-card} 🎯 True Quad Precision
+:link: user_guide/precision
+:link-type: doc
+
+128-bit floating point with ~34 decimal digits of precision, compared to ~15-16 for float64.
+:::
+
+:::{grid-item-card} 🔌 NumPy Integration
+:link: user_guide/arrays
+:link-type: doc
+
+Works seamlessly with NumPy arrays, ufuncs, and broadcasting.
+:::
+
+:::{grid-item-card} ⚡ SIMD Optimized
+:link: user_guide/performance
+:link-type: doc
+
+Powered by SLEEF library for vectorized transcendental functions.
+:::
+
+:::{grid-item-card} 🧮 Mathematical Functions
+:link: api/functions
+:link-type: doc
+
+Full suite of math functions: trigonometric, exponential, logarithmic, and more.
+:::
+
+:::{grid-item-card} 🔀 Dual Backend
+:link: user_guide/backends
+:link-type: doc
+
+Choose between SLEEF (default) or longdouble backends.
+:::
+
+:::{grid-item-card} 🧵 Thread-Safe
+:link: user_guide/threading
+:link-type: doc
+
+Full support for Python's free-threading (GIL-free) mode.
+:::
+
+::::
+
+## Quick Start
+
+### Installation
+
+```bash
+pip install numpy-quaddtype
 ```
 
+### Basic Usage
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecision, QuadPrecDType
+
+# Create a quad-precision scalar
+x = QuadPrecision("3.14159265358979323846264338327950288")
+
+# Create a quad-precision array
+arr = np.array([1, 2, 3], dtype=QuadPrecDType())
+
+# Use NumPy functions
+result = np.sin(arr)
+print(result)
+```
+
+### Why Quad Precision?
+
+Standard double precision (float64) provides approximately 15-16 significant decimal digits. While sufficient for most applications, some scenarios require higher precision:
+
+- **Numerical Analysis**: Ill-conditioned problems, iterative algorithms
+- **Scientific Computing**: Astronomy, physics simulations requiring extreme accuracy
+- **Financial Calculations**: High-precision arithmetic for regulatory compliance
+- **Validation**: Checking accuracy of lower-precision implementations
+
 ```{toctree}
+:maxdepth: 2
 :hidden:
 
-api_docs.rst
+installation
+user_guide/index
+api/index
+contributing
+changelog
 ```
+
+## Indices and tables
+
+- {ref}`genindex`
+- {ref}`search`
diff --git a/quaddtype/docs/installation.md b/quaddtype/docs/installation.md
new file mode 100644
index 00000000..b0fe58f6
--- /dev/null
+++ b/quaddtype/docs/installation.md
@@ -0,0 +1,153 @@
+# Installation
+
+## Quick Install
+
+The simplest way to install NumPy QuadDType is via pip:
+
+```bash
+pip install numpy-quaddtype
+```
+
+```{note}
+NumPy QuadDType requires **NumPy 2.0 or later** and **Python 3.11+**.
+```
+
+## Requirements
+
+| Requirement | Version |
+|-------------|---------|
+| Python | ≥ 3.11 |
+| NumPy | ≥ 2.0 |
+
+## Platform Support
+
+NumPy QuadDType provides pre-built wheels for:
+
+| Platform | Architectures |
+|----------|---------------|
+| Linux | x86_64, aarch64 |
+| macOS | x86_64, arm64 (Apple Silicon) |
+| Windows | x64 |
+
+## Installing from Source
+
+For development or if pre-built wheels aren't available for your platform:
+
+### Prerequisites
+
+- **C/C++ Compiler**: GCC or Clang
+- **CMake**: ≥ 3.15
+- **Python**: 3.11+
+- **Git**
+
+### Linux/macOS
+
+```bash
+# Create and activate virtual environment
+python3 -m venv venv
+source venv/bin/activate
+
+# Install NumPy (development version required for NumPy 2.x features)
+pip install "numpy @ git+https://github.com/numpy/numpy.git"
+
+# Install build dependencies
+pip install meson meson-python ninja pytest
+
+# Clone and install
+git clone https://github.com/numpy/numpy-user-dtypes.git
+cd numpy-user-dtypes/quaddtype
+pip install . -v --no-build-isolation
+```
+
+### Windows
+
+```{warning}
+On Windows, QuadBLAS optimization is automatically disabled due to MSVC compatibility issues.
+```
+
+1. Open **Developer Command Prompt for VS** or **Developer PowerShell for VS**
+
+2. Setup environment:
+   ```powershell
+   python -m venv venv
+   .\venv\Scripts\Activate.ps1
+   
+   pip install -U pip
+   pip install numpy pytest ninja meson meson-python
+   ```
+
+3. Set compiler flags:
+   ```powershell
+   $env:CFLAGS = "/DDISABLE_QUADBLAS"
+   $env:CXXFLAGS = "/DDISABLE_QUADBLAS"
+   ```
+
+4. Build and install:
+   ```powershell
+   pip install . -v --no-build-isolation
+   ```
+
+## Verifying Installation
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecision, QuadPrecDType
+
+# Check version
+import numpy_quaddtype
+print(f"numpy-quaddtype version: {numpy_quaddtype.__version__}")
+
+# Create a quad precision value
+x = QuadPrecision("3.141592653589793238462643383279502884197")
+print(f"π in quad precision: {x}")
+
+# Create an array
+arr = np.array([1, 2, 3], dtype=QuadPrecDType())
+print(f"Array dtype: {arr.dtype}")
+```
+
+## Optional: Development Installation
+
+For contributing to NumPy QuadDType:
+
+```bash
+# Clone the repository
+git clone https://github.com/numpy/numpy-user-dtypes.git
+cd numpy-user-dtypes/quaddtype
+
+# Install in editable mode with test dependencies
+pip install -e ".[test,docs]" -v --no-build-isolation
+```
+
+## Troubleshooting
+
+### CMake Not Found
+
+If you get a CMake error, install it:
+
+```bash
+# Linux (Ubuntu/Debian)
+sudo apt-get install cmake
+
+# macOS
+brew install cmake
+
+# Windows
+# Download from https://cmake.org/download/
+```
+
+### NumPy Version Error
+
+NumPy QuadDType requires NumPy 2.0+. If you have an older version:
+
+```bash
+pip install --upgrade numpy>=2.0
+```
+
+### Compiler Issues on macOS
+
+If you encounter compiler issues on macOS, ensure you have Xcode command-line tools:
+
+```bash
+xcode-select --install
+```
diff --git a/quaddtype/docs/user_guide/arrays.md b/quaddtype/docs/user_guide/arrays.md
new file mode 100644
index 00000000..bedd6402
--- /dev/null
+++ b/quaddtype/docs/user_guide/arrays.md
@@ -0,0 +1,230 @@
+# Working with Arrays
+
+NumPy QuadDType integrates seamlessly with NumPy arrays, providing the full power of NumPy's array operations with quad precision arithmetic.
+
+## Creating Arrays
+
+### From Python Lists
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+# Create an array from a list
+arr = np.array([1.0, 2.0, 3.0], dtype=QuadPrecDType())
+print(arr)
+print(f"dtype: {arr.dtype}")
+```
+
+### From String Values (High Precision)
+
+For maximum precision, create arrays from string representations:
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+# String input preserves all significant digits
+high_precision = np.array([
+    "3.14159265358979323846264338327950288",
+    "2.71828182845904523536028747135266249",
+    "1.41421356237309504880168872420969807"
+], dtype=QuadPrecDType())
+
+print(high_precision)
+```
+
+### Using `zeros`, `ones`, `empty`
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+# Create arrays with standard NumPy functions
+zeros = np.zeros(5, dtype=QuadPrecDType())
+ones = np.ones((3, 3), dtype=QuadPrecDType())
+empty = np.empty(10, dtype=QuadPrecDType())
+
+print(f"Zeros shape: {zeros.shape}")
+print(f"Ones shape: {ones.shape}")
+```
+
+### Using `arange` and `linspace`
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+# Create ranges
+arr = np.arange(0, 10, dtype=QuadPrecDType())
+print(f"arange: {arr}")
+
+# Linear spacing
+lin = np.linspace(0, 1, 11, dtype=QuadPrecDType())
+print(f"linspace: {lin}")
+```
+
+## Array Operations
+
+### Element-wise Arithmetic
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+a = np.array([1, 2, 3], dtype=QuadPrecDType())
+b = np.array([4, 5, 6], dtype=QuadPrecDType())
+
+print(f"a + b = {a + b}")
+print(f"a - b = {a - b}")
+print(f"a * b = {a * b}")
+print(f"a / b = {a / b}")
+print(f"a ** 2 = {a ** 2}")
+```
+
+### Reductions
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+arr = np.array([1, 2, 3, 4, 5], dtype=QuadPrecDType())
+
+print(f"Sum: {np.sum(arr)}")
+print(f"Product: {np.prod(arr)}")
+print(f"Mean: {np.mean(arr)}")
+print(f"Min: {np.min(arr)}")
+print(f"Max: {np.max(arr)}")
+```
+
+### Broadcasting
+
+QuadPrecDType fully supports NumPy broadcasting:
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+# 2D array
+matrix = np.array([[1, 2, 3], [4, 5, 6]], dtype=QuadPrecDType())
+
+# 1D array - broadcasts across rows
+row_scale = np.array([10, 100, 1000], dtype=QuadPrecDType())
+
+result = matrix * row_scale
+print(result)
+```
+
+## Mathematical Functions
+
+All standard NumPy ufuncs work with QuadPrecDType arrays:
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+x = np.linspace(0, 2 * np.pi, 5, dtype=QuadPrecDType())
+
+# Trigonometric functions
+print(f"sin(x): {np.sin(x)}")
+print(f"cos(x): {np.cos(x)}")
+
+# Exponential and logarithmic
+y = np.array([1, 2, 3], dtype=QuadPrecDType())
+print(f"exp(y): {np.exp(y)}")
+print(f"log(y): {np.log(y)}")
+
+# Square root
+print(f"sqrt(y): {np.sqrt(y)}")
+```
+
+## Indexing and Slicing
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+arr = np.arange(10, dtype=QuadPrecDType())
+
+# Basic indexing
+print(f"arr[0]: {arr[0]}")
+print(f"arr[-1]: {arr[-1]}")
+
+# Slicing
+print(f"arr[2:5]: {arr[2:5]}")
+print(f"arr[::2]: {arr[::2]}")
+
+# Boolean indexing
+mask = arr > 5
+print(f"arr[arr > 5]: {arr[mask]}")
+```
+
+## Reshaping and Stacking
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+arr = np.arange(12, dtype=QuadPrecDType())
+
+# Reshape
+reshaped = arr.reshape(3, 4)
+print(f"Reshaped:\n{reshaped}")
+
+# Stack arrays
+a = np.array([1, 2, 3], dtype=QuadPrecDType())
+b = np.array([4, 5, 6], dtype=QuadPrecDType())
+
+stacked = np.stack([a, b])
+print(f"Stacked:\n{stacked}")
+
+concatenated = np.concatenate([a, b])
+print(f"Concatenated: {concatenated}")
+```
+
+## Type Conversion
+
+### Converting to QuadPrecDType
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+# From float64
+float64_arr = np.array([1.1, 2.2, 3.3], dtype=np.float64)
+quad_arr = float64_arr.astype(QuadPrecDType())
+
+# From integer
+int_arr = np.array([1, 2, 3], dtype=np.int64)
+quad_from_int = int_arr.astype(QuadPrecDType())
+```
+
+### Converting from QuadPrecDType
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+quad_arr = np.array([1.5, 2.5, 3.5], dtype=QuadPrecDType())
+
+# To float64 (loses precision)
+float64_arr = quad_arr.astype(np.float64)
+print(f"As float64: {float64_arr}")
+```
+
+## Memory Considerations
+
+QuadPrecDType arrays use 16 bytes per element (compared to 8 bytes for float64):
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+n = 1000000
+
+float64_arr = np.zeros(n, dtype=np.float64)
+quad_arr = np.zeros(n, dtype=QuadPrecDType())
+
+print(f"float64 size: {float64_arr.nbytes / 1e6:.1f} MB")
+print(f"quad size: {quad_arr.nbytes / 1e6:.1f} MB")
+```
diff --git a/quaddtype/docs/user_guide/backends.md b/quaddtype/docs/user_guide/backends.md
new file mode 100644
index 00000000..240e1191
--- /dev/null
+++ b/quaddtype/docs/user_guide/backends.md
@@ -0,0 +1,150 @@
+# Backends
+
+NumPy QuadDType supports two computational backends for quad-precision arithmetic. Understanding the differences helps you choose the right one for your use case.
+
+## Available Backends
+
+### SLEEF (Default)
+
+**SLEEF** (SIMD Library for Evaluating Elementary Functions) is the default and recommended backend.
+
+```python
+from numpy_quaddtype import QuadPrecDType, QuadPrecision
+
+# Explicit SLEEF backend
+dtype = QuadPrecDType("sleef")
+scalar = QuadPrecision(3.14, backend="sleef")
+
+# Or simply use defaults
+dtype = QuadPrecDType()  # SLEEF is default
+```
+
+**Advantages:**
+- ✅ True IEEE 754 binary128 quad precision
+- ✅ SIMD-optimized for performance
+- ✅ Consistent behavior across all platforms
+- ✅ Full suite of mathematical functions
+
+**Considerations:**
+- Uses the SLEEF library (bundled with the package)
+
+### Long Double
+
+The **longdouble** backend uses your platform's native `long double` type.
+
+```python
+from numpy_quaddtype import QuadPrecDType, QuadPrecision, is_longdouble_128
+
+# Check if your platform has 128-bit long double
+print(f"Is long double 128-bit? {is_longdouble_128()}")
+
+# Use longdouble backend
+dtype = QuadPrecDType("longdouble")
+scalar = QuadPrecision(3.14, backend="longdouble")
+```
+
+**Advantages:**
+- ✅ Uses native CPU instructions (when available)
+- ✅ No external library dependency
+
+**Considerations:**
+- ⚠️ Precision varies by platform (see table below)
+- ⚠️ Not true quad precision on most platforms
+
+## Platform-Specific Long Double Precision
+
+| Platform | Architecture | Long Double Size | Precision |
+|----------|--------------|------------------|-----------|
+| Linux | x86_64 | 80-bit (stored as 128) | ~18-19 decimal digits |
+| Linux | aarch64 | 128-bit | ~33-34 decimal digits |
+| macOS | x86_64 | 64-bit | Same as double |
+| macOS | arm64 | 64-bit | Same as double |
+| Windows | x64 | 64-bit | Same as double |
+
+```{warning}
+On macOS and Windows, `long double` is typically the same as `double` (64-bit), 
+providing no precision benefit. Use the SLEEF backend for true quad precision 
+on these platforms.
+```
+
+## Checking Backend Support
+
+```python
+from numpy_quaddtype import is_longdouble_128
+
+if is_longdouble_128():
+    print("Your platform supports 128-bit long double!")
+    print("Both backends will provide similar precision.")
+else:
+    print("Long double is NOT 128-bit on your platform.")
+    print("Use SLEEF backend for true quad precision.")
+```
+
+## Convenience Functions
+
+For cleaner code, use the pre-defined helper functions:
+
+```python
+from numpy_quaddtype import (
+    SleefQuadPrecDType, 
+    SleefQuadPrecision,
+    LongDoubleQuadPrecDType, 
+    LongDoubleQuadPrecision
+)
+
+# SLEEF backend
+sleef_dtype = SleefQuadPrecDType()
+sleef_scalar = SleefQuadPrecision("3.14159265358979323846")
+
+# Long double backend  
+ld_dtype = LongDoubleQuadPrecDType()
+ld_scalar = LongDoubleQuadPrecision(3.14)
+```
+
+## Checking Which Backend is in Use
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType, QuadBackend
+
+dtype = QuadPrecDType("sleef")
+print(f"Backend: {dtype.backend}")  # QuadBackend.SLEEF
+
+# Compare backends
+if dtype.backend == QuadBackend.SLEEF:
+    print("Using SLEEF backend")
+elif dtype.backend == QuadBackend.LONGDOUBLE:
+    print("Using longdouble backend")
+```
+
+## Mixing Backends
+
+```{warning}
+Arrays with different backends cannot be directly combined in operations.
+```
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+# These have different backends
+sleef_arr = np.array([1, 2, 3], dtype=QuadPrecDType("sleef"))
+ld_arr = np.array([4, 5, 6], dtype=QuadPrecDType("longdouble"))
+
+# This will raise an error:
+# result = sleef_arr + ld_arr  # Error!
+
+# Convert to same backend first:
+ld_arr_converted = ld_arr.astype(QuadPrecDType("sleef"))
+result = sleef_arr + ld_arr_converted  # Works!
+```
+
+## Recommendations
+
+| Use Case | Recommended Backend |
+|----------|---------------------|
+| Cross-platform consistency | SLEEF |
+| Maximum precision needed | SLEEF |
+| Linux aarch64 with native support | Either (SLEEF preferred) |
+| Performance-critical on x86_64 | SLEEF |
+| Debugging/comparison | Both (for validation) |
diff --git a/quaddtype/docs/user_guide/constants.md b/quaddtype/docs/user_guide/constants.md
new file mode 100644
index 00000000..ba888a93
--- /dev/null
+++ b/quaddtype/docs/user_guide/constants.md
@@ -0,0 +1,139 @@
+# Mathematical Constants
+
+NumPy QuadDType provides pre-defined mathematical constants with full quad precision accuracy.
+
+## Available Constants
+
+```python
+from numpy_quaddtype import (
+    pi, e, log2e, log10e, ln2, ln10,
+    max_value, epsilon, smallest_normal, smallest_subnormal,
+    bits, precision, resolution
+)
+```
+
+## Mathematical Constants
+
+### π (Pi)
+
+The ratio of a circle's circumference to its diameter.
+
+```python
+from numpy_quaddtype import pi
+print(f"π = {pi}")
+# 3.14159265358979323846264338327950288...
+```
+
+### e (Euler's Number)
+
+The base of the natural logarithm.
+
+```python
+from numpy_quaddtype import e
+print(f"e = {e}")
+# 2.71828182845904523536028747135266249...
+```
+
+### Logarithmic Constants
+
+```python
+from numpy_quaddtype import log2e, log10e, ln2, ln10
+
+print(f"log₂(e) = {log2e}")   # 1.44269504088896340735992468100189213...
+print(f"log₁₀(e) = {log10e}") # 0.43429448190325182765112891891660508...
+print(f"ln(2) = {ln2}")       # 0.69314718055994530941723212145817656...
+print(f"ln(10) = {ln10}")     # 2.30258509299404568401799145468436420...
+```
+
+## Type Limits
+
+### Machine Epsilon
+
+The smallest positive number such that `1.0 + epsilon != 1.0`.
+
+```python
+from numpy_quaddtype import epsilon, QuadPrecision
+
+print(f"ε = {epsilon}")
+
+# Demonstration
+one = QuadPrecision(1.0)
+print(f"1 + ε == 1: {one + epsilon == one}")           # False
+print(f"1 + ε/2 == 1: {one + epsilon/2 == one}")       # True
+```
+
+### Value Ranges
+
+```python
+from numpy_quaddtype import max_value, smallest_normal, smallest_subnormal
+
+print(f"Maximum value:       {max_value}")
+print(f"Smallest normal:     {smallest_normal}")
+print(f"Smallest subnormal:  {smallest_subnormal}")
+```
+
+## Type Information
+
+```python
+from numpy_quaddtype import bits, precision, resolution
+
+print(f"Total bits: {bits}")           # 128
+print(f"Decimal precision: {precision}")  # 33-34 significant decimal digits
+print(f"Resolution: {resolution}")     # Smallest distinguishable difference
+```
+
+## Using Constants in Calculations
+
+```python
+import numpy as np
+from numpy_quaddtype import pi, e, QuadPrecDType
+
+# Calculate e^(iπ) + 1 ≈ 0 (Euler's identity, real part)
+# We'll compute cos(π) + 1 which should be 0
+result = np.cos(np.array([pi]))[0] + 1
+print(f"cos(π) + 1 = {result}")
+
+# Area of a circle with radius 1
+radius = np.array([1], dtype=QuadPrecDType())
+area = pi * radius ** 2
+print(f"Area of unit circle: {area[0]}")
+
+# Natural exponential
+x = np.array([1], dtype=QuadPrecDType())
+exp_1 = np.exp(x)
+print(f"e¹ = {exp_1[0]}")
+print(f"e constant = {e}")
+```
+
+## Comparison with NumPy Constants
+
+```python
+import numpy as np
+from numpy_quaddtype import pi as quad_pi, e as quad_e
+
+print("Pi comparison:")
+print(f"  NumPy float64: {np.pi}")
+print(f"  QuadPrecision: {quad_pi}")
+
+print("\ne comparison:")
+print(f"  NumPy float64: {np.e}")
+print(f"  QuadPrecision: {quad_e}")
+```
+
+The quad precision constants provide approximately 33-34 significant decimal digits, compared to 15-16 for float64.
+
+## Constant Reference Table
+
+| Constant | Symbol | Approximate Value |
+|----------|--------|-------------------|
+| `pi` | π | 3.14159265358979323846... |
+| `e` | e | 2.71828182845904523536... |
+| `log2e` | log₂(e) | 1.44269504088896340735... |
+| `log10e` | log₁₀(e) | 0.43429448190325182765... |
+| `ln2` | ln(2) | 0.69314718055994530941... |
+| `ln10` | ln(10) | 2.30258509299404568401... |
+| `epsilon` | ε | ~1.93×10⁻³⁴ |
+| `max_value` | - | ~1.19×10⁴⁹³² |
+| `smallest_normal` | - | ~3.36×10⁻⁴⁹³² |
+| `bits` | - | 128 |
+| `precision` | - | 33 |
diff --git a/quaddtype/docs/user_guide/functions.md b/quaddtype/docs/user_guide/functions.md
new file mode 100644
index 00000000..b7819b0b
--- /dev/null
+++ b/quaddtype/docs/user_guide/functions.md
@@ -0,0 +1,251 @@
+# Mathematical Functions
+
+NumPy QuadDType provides a comprehensive set of mathematical functions through NumPy's universal function (ufunc) system. All functions work seamlessly with both scalars and arrays.
+
+## Basic Arithmetic
+
+### Binary Operations
+
+| Operation | Operator | NumPy Function |
+|-----------|----------|----------------|
+| Addition | `a + b` | `np.add(a, b)` |
+| Subtraction | `a - b` | `np.subtract(a, b)` |
+| Multiplication | `a * b` | `np.multiply(a, b)` |
+| Division | `a / b` | `np.divide(a, b)` |
+| Floor Division | `a // b` | `np.floor_divide(a, b)` |
+| Modulo | `a % b` | `np.mod(a, b)` |
+| Power | `a ** b` | `np.power(a, b)` |
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+a = np.array([1, 2, 3], dtype=QuadPrecDType())
+b = np.array([4, 5, 6], dtype=QuadPrecDType())
+
+print(f"a + b = {a + b}")
+print(f"a * b = {a * b}")
+print(f"a / b = {a / b}")
+print(f"a ** 2 = {a ** 2}")
+```
+
+### Unary Operations
+
+| Operation | Operator | NumPy Function |
+|-----------|----------|----------------|
+| Negation | `-a` | `np.negative(a)` |
+| Absolute | `abs(a)` | `np.abs(a)` |
+| Positive | `+a` | `np.positive(a)` |
+
+## Trigonometric Functions
+
+### Standard Trigonometric
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType, pi
+
+x = np.linspace(0, float(pi)/2, 5, dtype=QuadPrecDType())
+
+# Basic trig functions
+print(f"sin(x): {np.sin(x)}")
+print(f"cos(x): {np.cos(x)}")
+print(f"tan(x): {np.tan(x)}")
+```
+
+### Inverse Trigonometric
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+x = np.array([0, 0.5, 1.0], dtype=QuadPrecDType())
+
+print(f"arcsin(x): {np.arcsin(x)}")
+print(f"arccos(x): {np.arccos(x)}")
+
+y = np.array([0, 1, 10], dtype=QuadPrecDType())
+print(f"arctan(y): {np.arctan(y)}")
+```
+
+### Two-Argument Arctangent
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+y = np.array([1, 1, -1, -1], dtype=QuadPrecDType())
+x = np.array([1, -1, 1, -1], dtype=QuadPrecDType())
+
+# atan2 gives the angle in the correct quadrant
+angles = np.arctan2(y, x)
+print(f"arctan2(y, x): {angles}")
+```
+
+## Hyperbolic Functions
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+x = np.array([0, 0.5, 1.0, 2.0], dtype=QuadPrecDType())
+
+# Hyperbolic functions
+print(f"sinh(x): {np.sinh(x)}")
+print(f"cosh(x): {np.cosh(x)}")
+print(f"tanh(x): {np.tanh(x)}")
+
+# Inverse hyperbolic
+print(f"arcsinh(x): {np.arcsinh(x)}")
+print(f"arccosh(x+1): {np.arccosh(x + 1)}")  # arccosh requires x >= 1
+print(f"arctanh(x/3): {np.arctanh(x / 3)}")  # arctanh requires |x| < 1
+```
+
+## Exponential and Logarithmic
+
+### Exponential Functions
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+x = np.array([0, 1, 2, 3], dtype=QuadPrecDType())
+
+print(f"exp(x): {np.exp(x)}")
+print(f"exp2(x): {np.exp2(x)}")        # 2^x
+print(f"expm1(x): {np.expm1(x)}")      # exp(x) - 1, accurate for small x
+```
+
+### Logarithmic Functions
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+x = np.array([1, 2, 10, 100], dtype=QuadPrecDType())
+
+print(f"log(x): {np.log(x)}")          # Natural log
+print(f"log2(x): {np.log2(x)}")        # Base-2 log
+print(f"log10(x): {np.log10(x)}")      # Base-10 log
+print(f"log1p(x-1): {np.log1p(x - 1)}")  # log(1+x), accurate for small x
+```
+
+## Power and Root Functions
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+x = np.array([1, 4, 9, 16], dtype=QuadPrecDType())
+
+print(f"sqrt(x): {np.sqrt(x)}")
+print(f"cbrt(x): {np.cbrt(x)}")        # Cube root
+
+# Hypotenuse (sqrt(a^2 + b^2))
+a = np.array([3, 5, 8], dtype=QuadPrecDType())
+b = np.array([4, 12, 15], dtype=QuadPrecDType())
+print(f"hypot(a, b): {np.hypot(a, b)}")
+```
+
+## Rounding Functions
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+x = np.array([1.2, 2.5, 3.7, -1.5], dtype=QuadPrecDType())
+
+print(f"floor(x): {np.floor(x)}")
+print(f"ceil(x): {np.ceil(x)}")
+print(f"trunc(x): {np.trunc(x)}")
+print(f"rint(x): {np.rint(x)}")  # Round to nearest integer
+```
+
+## Comparison Functions
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+a = np.array([1, 5, 3], dtype=QuadPrecDType())
+b = np.array([2, 4, 3], dtype=QuadPrecDType())
+
+print(f"minimum(a, b): {np.minimum(a, b)}")
+print(f"maximum(a, b): {np.maximum(a, b)}")
+
+# Comparison operators
+print(f"a < b: {a < b}")
+print(f"a == b: {a == b}")
+print(f"a >= b: {a >= b}")
+```
+
+## Special Value Functions
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecision, QuadPrecDType
+
+# Create array with special values
+arr = np.array([
+    QuadPrecision(1.0),
+    QuadPrecision("inf"),
+    QuadPrecision("-inf"),
+    QuadPrecision("nan")
+])
+
+print(f"isfinite: {np.isfinite(arr)}")
+print(f"isinf: {np.isinf(arr)}")
+print(f"isnan: {np.isnan(arr)}")
+```
+
+## Sign and Absolute Value
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+x = np.array([-3, -1, 0, 1, 3], dtype=QuadPrecDType())
+
+print(f"abs(x): {np.abs(x)}")
+print(f"sign(x): {np.sign(x)}")
+print(f"copysign(1, x): {np.copysign(1, x)}")
+```
+
+## Reduction Functions
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+arr = np.array([1, 2, 3, 4, 5], dtype=QuadPrecDType())
+
+print(f"sum: {np.sum(arr)}")
+print(f"prod: {np.prod(arr)}")
+print(f"mean: {np.mean(arr)}")
+print(f"min: {np.min(arr)}")
+print(f"max: {np.max(arr)}")
+```
+
+## Precision Demonstration
+
+The advantage of quad precision is evident in calculations that lose precision in float64:
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+# Computing 1 - cos(x) for small x loses precision in float64
+x_small = 1e-8
+
+# Float64
+result_f64 = 1 - np.cos(np.float64(x_small))
+print(f"1 - cos(1e-8) [float64]: {result_f64}")
+
+# Quad precision
+x_quad = np.array([x_small], dtype=QuadPrecDType())
+result_quad = 1 - np.cos(x_quad)
+print(f"1 - cos(1e-8) [quad]:    {result_quad[0]}")
+
+# Theoretical value: x^2/2 ≈ 5e-17
+print(f"Theoretical (x²/2):      5e-17")
+```
diff --git a/quaddtype/docs/user_guide/index.md b/quaddtype/docs/user_guide/index.md
new file mode 100644
index 00000000..7333b0b6
--- /dev/null
+++ b/quaddtype/docs/user_guide/index.md
@@ -0,0 +1,15 @@
+# User Guide
+
+This guide covers the core functionality and usage patterns of NumPy QuadDType.
+
+```{toctree}
+:maxdepth: 2
+
+precision
+arrays
+backends
+functions
+constants
+threading
+performance
+```
diff --git a/quaddtype/docs/user_guide/performance.md b/quaddtype/docs/user_guide/performance.md
new file mode 100644
index 00000000..fb9564b7
--- /dev/null
+++ b/quaddtype/docs/user_guide/performance.md
@@ -0,0 +1,263 @@
+# Performance Guide
+
+Quad precision arithmetic is inherently slower than double precision due to the increased complexity of 128-bit operations. This guide helps you maximize performance while maintaining precision.
+
+## Performance Overview
+
+### Relative Performance
+
+As a general guideline, quad precision operations are approximately:
+
+| Operation Type | Slowdown vs float64 |
+|----------------|---------------------|
+| Basic arithmetic (+, -, *, /) | 5-20× |
+| Transcendental (sin, exp, log) | 10-50× |
+| Array reductions (sum, mean) | 5-15× |
+| Memory operations | 2× (due to size) |
+
+```{note}
+Actual performance varies significantly based on hardware, compiler optimizations, 
+and the specific operations being performed.
+```
+
+## Optimization Strategies
+
+### 1. Use Vectorized Operations
+
+Always prefer NumPy's vectorized operations over Python loops:
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+import time
+
+arr = np.arange(10000, dtype=QuadPrecDType())
+
+# ❌ Slow: Python loop
+def slow_sum(arr):
+    total = arr[0]
+    for x in arr[1:]:
+        total = total + x
+    return total
+
+# ✅ Fast: Vectorized
+def fast_sum(arr):
+    return np.sum(arr)
+
+# Benchmark
+start = time.time()
+slow_result = slow_sum(arr)
+slow_time = time.time() - start
+
+start = time.time()
+fast_result = fast_sum(arr)
+fast_time = time.time() - start
+
+print(f"Loop time: {slow_time:.4f}s")
+print(f"Vectorized time: {fast_time:.4f}s")
+print(f"Speedup: {slow_time/fast_time:.1f}×")
+```
+
+### 2. Minimize Type Conversions
+
+Avoid repeated conversions between precisions:
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+# ❌ Avoid: Repeated conversions
+def bad_approach(float64_arr):
+    results = []
+    for x in float64_arr:
+        quad_x = np.array([x], dtype=QuadPrecDType())
+        results.append(np.sin(quad_x)[0])
+    return results
+
+# ✅ Better: Convert once
+def good_approach(float64_arr):
+    quad_arr = float64_arr.astype(QuadPrecDType())
+    return np.sin(quad_arr)
+```
+
+### 3. Use In-Place Operations When Possible
+
+In-place operations avoid memory allocation:
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+arr = np.ones(10000, dtype=QuadPrecDType())
+
+# ❌ Creates new array
+arr = arr * 2
+
+# ✅ In-place modification (when supported)
+np.multiply(arr, 2, out=arr)
+```
+
+### 4. Control Threading
+
+Adjust thread count based on workload:
+
+```python
+from numpy_quaddtype import set_num_threads, get_num_threads
+
+# For small arrays, single thread may be faster (less overhead)
+set_num_threads(1)
+
+# For large arrays, use multiple threads
+set_num_threads(4)
+```
+
+### 5. Consider Mixed Precision
+
+Use quad precision only where needed:
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+def mixed_precision_calculation(data):
+    """Use quad precision only for sensitive calculations."""
+    
+    # Rough computation in float64 (fast)
+    rough_result = np.sum(data)
+    
+    # Precise refinement in quad (slower, but only for final step)
+    quad_data = data.astype(QuadPrecDType())
+    precise_result = np.sum(quad_data)
+    
+    return precise_result
+```
+
+## Memory Considerations
+
+### Memory Usage
+
+QuadPrecDType uses 16 bytes per element:
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+n = 1_000_000
+
+f64_arr = np.zeros(n, dtype=np.float64)
+quad_arr = np.zeros(n, dtype=QuadPrecDType())
+
+print(f"float64: {f64_arr.nbytes / 1e6:.1f} MB")
+print(f"quad:    {quad_arr.nbytes / 1e6:.1f} MB")
+```
+
+### Memory-Efficient Patterns
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+
+# Process data in chunks to limit memory usage
+def process_large_dataset(data, chunk_size=100000):
+    results = []
+    for i in range(0, len(data), chunk_size):
+        chunk = data[i:i+chunk_size].astype(QuadPrecDType())
+        result = np.sum(np.sin(chunk))
+        results.append(result)
+    return np.sum(results)
+```
+
+## Benchmarking Your Code
+
+### Simple Timing
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+import time
+
+def benchmark(func, arr, iterations=10):
+    """Benchmark a function."""
+    # Warmup
+    func(arr)
+    
+    start = time.time()
+    for _ in range(iterations):
+        func(arr)
+    elapsed = time.time() - start
+    
+    return elapsed / iterations
+
+arr = np.random.randn(100000).astype(QuadPrecDType())
+
+funcs = [
+    ("sum", lambda x: np.sum(x)),
+    ("sin", lambda x: np.sin(x)),
+    ("exp", lambda x: np.exp(x / 100)),
+    ("dot", lambda x: np.dot(x, x)),
+]
+
+for name, func in funcs:
+    avg_time = benchmark(func, arr)
+    print(f"{name}: {avg_time*1000:.2f} ms")
+```
+
+### Comparison with float64
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+import time
+
+n = 100000
+iterations = 100
+
+# Create test data
+f64_arr = np.random.randn(n)
+quad_arr = f64_arr.astype(QuadPrecDType())
+
+operations = [
+    ("Addition", lambda x: x + x),
+    ("Multiplication", lambda x: x * x),
+    ("Division", lambda x: x / (x + 1)),
+    ("Sin", lambda x: np.sin(x)),
+    ("Exp", lambda x: np.exp(x / n)),
+    ("Sum", lambda x: np.sum(x)),
+]
+
+print(f"{'Operation':<15} {'float64 (ms)':<15} {'quad (ms)':<15} {'Slowdown':<10}")
+print("-" * 55)
+
+for name, op in operations:
+    # float64 timing
+    start = time.time()
+    for _ in range(iterations):
+        op(f64_arr)
+    f64_time = (time.time() - start) / iterations * 1000
+    
+    # quad timing
+    start = time.time()
+    for _ in range(iterations):
+        op(quad_arr)
+    quad_time = (time.time() - start) / iterations * 1000
+    
+    slowdown = quad_time / f64_time
+    print(f"{name:<15} {f64_time:<15.3f} {quad_time:<15.3f} {slowdown:<10.1f}×")
+```
+
+## When to Use Quad Precision
+
+### Use Quad Precision For:
+
+- ✅ Final validation of numerical algorithms
+- ✅ Ill-conditioned linear algebra problems
+- ✅ High-precision requirements (regulatory, scientific)
+- ✅ Accumulating many small values (Kahan summation alternative)
+- ✅ Reference implementations
+
+### Consider Alternatives For:
+
+- ⚠️ Real-time applications
+- ⚠️ Processing very large datasets
+- ⚠️ When float64 precision is sufficient
+- ⚠️ GPU computations (no quad support)
diff --git a/quaddtype/docs/user_guide/precision.md b/quaddtype/docs/user_guide/precision.md
new file mode 100644
index 00000000..feb58702
--- /dev/null
+++ b/quaddtype/docs/user_guide/precision.md
@@ -0,0 +1,128 @@
+# Understanding Quad Precision
+
+## What is Quad Precision?
+
+Quad precision (also known as quadruple precision or binary128) is a floating-point format defined by the IEEE 754 standard. It provides significantly higher precision than the commonly used double precision (float64).
+
+## Precision Comparison
+
+| Format | Bits | Sign | Exponent | Mantissa | Decimal Digits |
+|--------|------|------|----------|----------|----------------|
+| Single (float32) | 32 | 1 | 8 | 23 | ~7 |
+| Double (float64) | 64 | 1 | 11 | 52 | ~15-16 |
+| **Quad (float128)** | **128** | **1** | **15** | **112** | **~33-34** |
+
+## Demonstrating the Precision Difference
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecision, pi
+
+# Standard double precision π
+pi_float64 = np.float64(np.pi)
+print(f"float64 π: {pi_float64}")
+
+# Quad precision π
+print(f"quad    π: {pi}")
+
+# The actual value of π to 50 decimal places:
+# 3.14159265358979323846264338327950288419716939937510...
+```
+
+### Practical Example: Computing e
+
+Let's compute Euler's number using the series expansion $e = \sum_{n=0}^{\infty} \frac{1}{n!}$:
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecision, QuadPrecDType
+
+def compute_e_quad(terms=50):
+    """Compute e using quad precision."""
+    result = QuadPrecision(0)
+    factorial = QuadPrecision(1)
+    
+    for n in range(terms):
+        if n > 0:
+            factorial = factorial * n
+        result = result + QuadPrecision(1) / factorial
+    
+    return result
+
+def compute_e_float64(terms=50):
+    """Compute e using float64."""
+    result = np.float64(0)
+    factorial = np.float64(1)
+    
+    for n in range(terms):
+        if n > 0:
+            factorial = factorial * n
+        result = result + 1.0 / factorial
+    
+    return result
+
+e_quad = compute_e_quad(30)
+e_float64 = compute_e_float64(30)
+
+print(f"e (quad):    {e_quad}")
+print(f"e (float64): {e_float64}")
+```
+
+## When to Use Quad Precision
+
+### ✅ Good Use Cases
+
+1. **Ill-conditioned Problems**: When numerical instability affects results
+2. **Reference Implementations**: Validating lower-precision algorithms
+3. **Financial Calculations**: When regulatory compliance requires high precision
+4. **Scientific Research**: Astronomy, physics simulations
+5. **Cryptographic Applications**: Where precision is critical
+
+### ⚠️ Consider Alternatives
+
+1. **Performance-Critical Code**: Quad precision is slower than float64
+2. **Large Datasets**: Memory usage is 2x compared to float64
+3. **Simple Calculations**: When float64 precision is sufficient
+
+## Memory Layout
+
+QuadPrecision values are stored as 128-bit (16 bytes) values in memory:
+
+```
+┌─────────┬─────────────────┬────────────────────────────────────────────────────┐
+│  Sign   │    Exponent     │                     Mantissa                        │
+│  1 bit  │    15 bits      │                    112 bits                         │
+└─────────┴─────────────────┴────────────────────────────────────────────────────┘
+```
+
+## Special Values
+
+QuadPrecision supports all IEEE 754 special values:
+
+```python
+from numpy_quaddtype import QuadPrecision
+import numpy as np
+
+# Infinity
+pos_inf = QuadPrecision("inf")
+neg_inf = QuadPrecision("-inf")
+
+# NaN (Not a Number)
+nan = QuadPrecision("nan")
+
+# Check special values
+print(f"Is inf: {np.isinf(pos_inf)}")
+print(f"Is nan: {np.isnan(nan)}")
+print(f"Is finite: {np.isfinite(QuadPrecision(1.0))}")
+```
+
+## Precision Limits
+
+```python
+from numpy_quaddtype import epsilon, smallest_normal, smallest_subnormal, max_value
+
+print(f"Machine epsilon:      {epsilon}")
+print(f"Smallest normal:      {smallest_normal}")
+print(f"Smallest subnormal:   {smallest_subnormal}")
+print(f"Maximum value:        {max_value}")
+```
diff --git a/quaddtype/docs/user_guide/threading.md b/quaddtype/docs/user_guide/threading.md
new file mode 100644
index 00000000..cb1b40ce
--- /dev/null
+++ b/quaddtype/docs/user_guide/threading.md
@@ -0,0 +1,195 @@
+# Threading and Parallelism
+
+NumPy QuadDType is designed to be thread-safe and supports Python's free-threading (GIL-free) mode introduced in Python 3.13.
+
+## Thread Safety
+
+All QuadDType operations are thread-safe:
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+from concurrent.futures import ThreadPoolExecutor
+
+def compute_sum(arr):
+    """Thread-safe computation."""
+    return np.sum(arr)
+
+# Create shared array
+arr = np.arange(1000, dtype=QuadPrecDType())
+
+# Run computations in parallel
+with ThreadPoolExecutor(max_workers=4) as executor:
+    # Split array and compute sums in parallel
+    chunks = np.array_split(arr, 4)
+    futures = [executor.submit(compute_sum, chunk) for chunk in chunks]
+    results = [f.result() for f in futures]
+    
+total = sum(results)
+print(f"Total sum: {total}")
+```
+
+## QuadBLAS Threading Control
+
+NumPy QuadDType uses QuadBLAS for optimized linear algebra operations. You can control the number of threads used:
+
+```python
+from numpy_quaddtype import set_num_threads, get_num_threads, get_quadblas_version
+
+# Check QuadBLAS version
+version = get_quadblas_version()
+if version:
+    print(f"QuadBLAS version: {version}")
+else:
+    print("QuadBLAS not available (DISABLE_QUADBLAS was set)")
+
+# Get current thread count
+current_threads = get_num_threads()
+print(f"Current threads: {current_threads}")
+
+# Set thread count
+set_num_threads(4)
+print(f"Threads after setting: {get_num_threads()}")
+
+# Use single thread for reproducibility
+set_num_threads(1)
+```
+
+```{note}
+QuadBLAS is disabled on Windows builds due to MSVC compatibility issues.
+Use `get_quadblas_version()` to check if it's available.
+```
+
+## Free-Threading Support (Python 3.13+)
+
+NumPy QuadDType fully supports Python's experimental free-threading mode (GIL-free Python).
+
+### Checking Free-Threading Mode
+
+```python
+import sys
+
+if hasattr(sys, '_is_gil_enabled'):
+    if sys._is_gil_enabled():
+        print("Running with GIL enabled")
+    else:
+        print("Running in free-threaded mode (no GIL)")
+else:
+    print("Free-threading not available (Python < 3.13)")
+```
+
+### Using Free-Threading
+
+When running with free-threading enabled, true parallel execution is possible:
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType
+import threading
+
+results = []
+lock = threading.Lock()
+
+def parallel_compute(arr):
+    """Compute in parallel."""
+    result = np.sum(np.sin(arr))
+    with lock:
+        results.append(result)
+
+# Create arrays for parallel processing
+arrays = [np.arange(i * 1000, (i + 1) * 1000, dtype=QuadPrecDType()) 
+          for i in range(4)]
+
+# Run in parallel threads
+threads = [threading.Thread(target=parallel_compute, args=(arr,)) 
+           for arr in arrays]
+
+for t in threads:
+    t.start()
+for t in threads:
+    t.join()
+
+print(f"Results from {len(results)} threads: {results}")
+```
+
+## Building with Thread Sanitizer
+
+For development and testing thread safety, you can build with Thread Sanitizer (TSan):
+
+### Prerequisites
+
+```bash
+# Use clang compiler
+export CC=clang
+export CXX=clang++
+```
+
+### Build Steps
+
+1. Build CPython with TSan support (see [Python Free-Threading Guide](https://py-free-threading.github.io/thread_sanitizer/))
+
+2. Build NumPy with TSan:
+   ```bash
+   pip install "numpy @ git+https://github.com/numpy/numpy" \
+       -C'setup-args=-Db_sanitize=thread'
+   ```
+
+3. Build SLEEF with TSan:
+   ```bash
+   cmake \
+       -DCMAKE_C_COMPILER=clang \
+       -DCMAKE_C_FLAGS="-fsanitize=thread -g -O1" \
+       -DSLEEF_BUILD_QUAD=ON \
+       -S sleef -B sleef/build
+   
+   cmake --build sleef/build -j
+   sudo cmake --install sleef/build --prefix=/usr/local
+   ```
+
+4. Build numpy-quaddtype with TSan:
+   ```bash
+   export CFLAGS="-fsanitize=thread -g -O0"
+   export CXXFLAGS="-fsanitize=thread -g -O0"
+   export LDFLAGS="-fsanitize=thread"
+   
+   pip install . -vv --no-build-isolation \
+       -Csetup-args=-Db_sanitize=thread
+   ```
+
+## Best Practices
+
+### Do's
+
+- ✅ Use `set_num_threads()` to control parallelism
+- ✅ Use thread-local storage for intermediate results when needed
+- ✅ Test with TSan during development
+- ✅ Use proper synchronization for shared mutable state
+
+### Don'ts
+
+- ❌ Don't assume operations are atomic
+- ❌ Don't modify arrays while other threads are reading them
+- ❌ Don't ignore thread sanitizer warnings
+
+## Performance Considerations
+
+```python
+import numpy as np
+from numpy_quaddtype import QuadPrecDType, set_num_threads
+import time
+
+arr = np.random.randn(100000).astype(QuadPrecDType())
+
+# Benchmark with different thread counts
+for threads in [1, 2, 4, 8]:
+    set_num_threads(threads)
+    
+    start = time.time()
+    for _ in range(10):
+        result = np.sum(arr)
+    elapsed = time.time() - start
+    
+    print(f"Threads: {threads}, Time: {elapsed:.3f}s")
+```
+
+The optimal thread count depends on your specific workload and hardware.
diff --git a/quaddtype/pyproject.toml b/quaddtype/pyproject.toml
index feca2441..dbfad0e0 100644
--- a/quaddtype/pyproject.toml
+++ b/quaddtype/pyproject.toml
@@ -41,6 +41,8 @@ docs = [
     "sphinx",
     "pydata-sphinx-theme",
     "myst-parser",
+    "sphinx-design",
+    "sphinx-copybutton",
 ]
 
 [project.urls]

From 5615340f46bcf98f08a5ad0a9f0db736381f3fa7 Mon Sep 17 00:00:00 2001
From: SwayamInSync <hawkempire007@gmail.com>
Date: Thu, 8 Jan 2026 16:11:59 +0000
Subject: [PATCH 2/4] some fix

---
 quaddtype/docs/index.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/quaddtype/docs/index.md b/quaddtype/docs/index.md
index 16678697..182aaba3 100644
--- a/quaddtype/docs/index.md
+++ b/quaddtype/docs/index.md
@@ -4,7 +4,7 @@
 :target: https://pypi.org/project/numpy-quaddtype/
 :alt: PyPI version
 ```
-```{image} https://img.shields.io/pypi/pyversions/numpy-quaddtype.svg
+```{image} https://img.shields.io/badge/python-3.11%20%7C%203.12%20%7C%203.13%20%7C%203.14-blue.svg
 :alt: Python versions
 ```
 
@@ -21,7 +21,7 @@ NumPy QuadDType provides IEEE 754 quadruple-precision (binary128) floating-point
 :link: user_guide/precision
 :link-type: doc
 
-128-bit floating point with ~34 decimal digits of precision, compared to ~15-16 for float64.
+128-bit floating point with ~34 decimal digits of precision
 :::
 
 :::{grid-item-card} 🔌 NumPy Integration
@@ -35,7 +35,7 @@ Works seamlessly with NumPy arrays, ufuncs, and broadcasting.
 :link: user_guide/performance
 :link-type: doc
 
-Powered by SLEEF library for vectorized transcendental functions.
+Vectorization-friendly design that can leverage SIMD acceleration where supported.
 :::
 
 :::{grid-item-card} 🧮 Mathematical Functions

From 2fb33b03371ea1a55e20bcbb7d3a186553d23e4e Mon Sep 17 00:00:00 2001
From: Juniper Tyree <juniper.tyree@helsinki.fi>
Date: Fri, 16 Jan 2026 10:13:59 +0200
Subject: [PATCH 3/4] Cleanup API docs and CHANGELOG

---
 quaddtype/docs/api/constants_api.md      |  82 +++----
 quaddtype/docs/api/core.md               | 102 ++++-----
 quaddtype/docs/api/functions.md          | 117 +++++-----
 quaddtype/docs/api/index.md              |   2 +-
 quaddtype/docs/api/utilities.md          |  62 +++---
 quaddtype/docs/changelog.md              |  54 ++++-
 quaddtype/docs/conf.py                   |   2 +-
 quaddtype/docs/contributing.md           | 159 --------------
 quaddtype/docs/index.md                  |   5 +-
 quaddtype/docs/installation.md           | 153 -------------
 quaddtype/docs/user_guide/arrays.md      | 230 --------------------
 quaddtype/docs/user_guide/backends.md    | 150 -------------
 quaddtype/docs/user_guide/constants.md   | 139 ------------
 quaddtype/docs/user_guide/functions.md   | 251 ---------------------
 quaddtype/docs/user_guide/index.md       |  15 --
 quaddtype/docs/user_guide/performance.md | 263 -----------------------
 quaddtype/docs/user_guide/precision.md   | 128 -----------
 quaddtype/docs/user_guide/threading.md   | 195 -----------------
 quaddtype/pyproject.toml                 |   1 +
 19 files changed, 228 insertions(+), 1882 deletions(-)
 delete mode 100644 quaddtype/docs/contributing.md
 delete mode 100644 quaddtype/docs/installation.md
 delete mode 100644 quaddtype/docs/user_guide/arrays.md
 delete mode 100644 quaddtype/docs/user_guide/backends.md
 delete mode 100644 quaddtype/docs/user_guide/constants.md
 delete mode 100644 quaddtype/docs/user_guide/functions.md
 delete mode 100644 quaddtype/docs/user_guide/index.md
 delete mode 100644 quaddtype/docs/user_guide/performance.md
 delete mode 100644 quaddtype/docs/user_guide/precision.md
 delete mode 100644 quaddtype/docs/user_guide/threading.md

diff --git a/quaddtype/docs/api/constants_api.md b/quaddtype/docs/api/constants_api.md
index cc82753c..d97f4ed0 100644
--- a/quaddtype/docs/api/constants_api.md
+++ b/quaddtype/docs/api/constants_api.md
@@ -7,50 +7,50 @@ Pre-defined mathematical constants with quad precision accuracy.
 ```{eval-rst}
 .. data:: numpy_quaddtype.pi
 
-   The mathematical constant π (pi).
-   
+   The mathematical constant :math:`pi` (pi).
+
    Value: 3.14159265358979323846264338327950288...
-   
+
    :type: QuadPrecision
 
 .. data:: numpy_quaddtype.e
 
-   Euler's number, the base of natural logarithms.
-   
+   Euler's number :math:`e`, the base of natural logarithms.
+
    Value: 2.71828182845904523536028747135266249...
-   
+
    :type: QuadPrecision
 
 .. data:: numpy_quaddtype.log2e
 
-   The base-2 logarithm of e: log₂(e).
-   
+   The base-2 logarithm of :math:`e`: :math:`\log_{2}{e}`.
+
    Value: 1.44269504088896340735992468100189213...
-   
+
    :type: QuadPrecision
 
 .. data:: numpy_quaddtype.log10e
 
-   The base-10 logarithm of e: log₁₀(e).
-   
+   The base-10 logarithm of :math:`e`: :math:`\log_{10}{e}`.
+
    Value: 0.43429448190325182765112891891660508...
-   
+
    :type: QuadPrecision
 
 .. data:: numpy_quaddtype.ln2
 
-   The natural logarithm of 2: ln(2).
-   
+   The natural logarithm of 2: :math:`\ln(2)`.
+
    Value: 0.69314718055994530941723212145817656...
-   
+
    :type: QuadPrecision
 
 .. data:: numpy_quaddtype.ln10
 
-   The natural logarithm of 10: ln(10).
-   
+   The natural logarithm of 10: :math:`\ln(10)`.
+
    Value: 2.30258509299404568401799145468436420...
-   
+
    :type: QuadPrecision
 ```
 
@@ -59,38 +59,44 @@ Pre-defined mathematical constants with quad precision accuracy.
 ```{eval-rst}
 .. data:: numpy_quaddtype.epsilon
 
-   Machine epsilon: the smallest positive number such that 1.0 + epsilon ≠ 1.0.
-   
-   Approximately 1.93 × 10⁻³⁴.
-   
+   Machine epsilon: the smallest positive number such that :math:`1.0 + \epsilon \neq 1.0`.
+
+   :math:`2^{-112}` or approximately :math:`1.93 \cdot 10^{-34}`.
+
    :type: QuadPrecision
 
 .. data:: numpy_quaddtype.max_value
 
    The largest representable finite quad-precision value.
-   
-   Approximately 1.19 × 10⁴⁹³².
-   
+
+   :math:`216383 \cdot (2 - 2^{-112})` or approximately :math:`1.19 \cdot 10^{4932}`.
+
+   The largest negative representable finite quad-precision value is `-numpy_quaddtype.max_value`.
+
    :type: QuadPrecision
 
 .. data:: numpy_quaddtype.smallest_normal
 
-   The smallest positive normalized quad-precision value.
-   
-   Approximately 3.36 × 10⁻⁴⁹³².
-   
+   The smallest positive normal (normalized, mantissa has a leading 1 bit) quad-precision value.
+
+   :math:`2^{-16382} \cdot (1 - 2^{-112})` or approximately :math:`3.36 \cdot 10^{-4932}`.
+
    :type: QuadPrecision
 
 .. data:: numpy_quaddtype.smallest_subnormal
 
-   The smallest positive subnormal (denormalized) quad-precision value.
-   
+   The smallest positive subnormal (denormalized, mantissa has a leading 0 bit) quad-precision value.
+
+   :math:`2^{-16494}` or approximately :math:`6.48 \cdot 10^{-4966}`.
+
    :type: QuadPrecision
 
 .. data:: numpy_quaddtype.resolution
 
-   The approximate decimal resolution of quad precision.
-   
+   The approximate decimal resolution of quad precision, i.e. `10 ** (-precision)`.
+
+   :math:`10^{-33}`.
+
    :type: QuadPrecision
 ```
 
@@ -99,15 +105,15 @@ Pre-defined mathematical constants with quad precision accuracy.
 ```{eval-rst}
 .. data:: numpy_quaddtype.bits
 
-   Total number of bits in quad precision representation.
-   
+   The total number of bits in quad precision representation.
+
    :value: 128
    :type: int
 
 .. data:: numpy_quaddtype.precision
 
-   Approximate number of significant decimal digits.
-   
+   The approximate number of significant decimal digits.
+
    :value: 33
    :type: int
 ```
@@ -130,7 +136,7 @@ import numpy as np
 from numpy_quaddtype import QuadPrecDType
 
 # e^(ln2) should equal 2
-two = np.exp(np.array([ln2]))[0]
+two = np.exp(np.array(ln2))
 print(f"e^(ln2) = {two}")
 
 # log2(e) * ln(2) should equal 1
diff --git a/quaddtype/docs/api/core.md b/quaddtype/docs/api/core.md
index 713a84de..12c540a2 100644
--- a/quaddtype/docs/api/core.md
+++ b/quaddtype/docs/api/core.md
@@ -8,28 +8,28 @@ The fundamental types provided by NumPy QuadDType.
 .. class:: numpy_quaddtype.QuadPrecision(value, backend="sleef")
 
    A quad-precision (128-bit) floating-point scalar.
-   
+
    QuadPrecision is a NumPy scalar type that provides IEEE 754 binary128
    floating-point arithmetic. It can be used standalone or as elements
    of NumPy arrays.
-   
-   :param value: The value to convert to quad precision. Can be:
-       
+
+   :param value: The value to convert to quad precision. It can be:
+
        - ``float`` or ``int``: Python numeric types
        - ``str``: String representation for maximum precision
        - ``bytes``: Raw 16-byte representation
        - ``numpy.floating`` or ``numpy.integer``: NumPy numeric types
        - ``QuadPrecision``: Another QuadPrecision value
-       
    :type value: float, int, str, bytes, numpy scalar, or QuadPrecision
+
    :param backend: Computation backend to use. Either ``"sleef"`` (default) 
        or ``"longdouble"``.
    :type backend: str, optional
-   
+
    **Examples**
-   
+
    Create from different input types::
-   
+
        >>> from numpy_quaddtype import QuadPrecision
        >>> QuadPrecision(3.14)
        QuadPrecision('3.14000000000000012434...')
@@ -37,29 +37,29 @@ The fundamental types provided by NumPy QuadDType.
        QuadPrecision('3.14159265358979323846264338327950288')
        >>> QuadPrecision(42)
        QuadPrecision('42.0')
-   
+
    Arithmetic operations::
-   
+
        >>> x = QuadPrecision("1.5")
        >>> y = QuadPrecision("2.5")
        >>> x + y
        QuadPrecision('4.0')
        >>> x * y
        QuadPrecision('3.75')
-   
+
    .. attribute:: dtype
       :type: QuadPrecDType
-      
+
       The NumPy dtype for this scalar.
-   
+
    .. attribute:: real
       :type: QuadPrecision
-      
-      The real part (returns self for real numbers).
-   
+
+      The real part (always self for QuadPrecision).
+
    .. attribute:: imag
       :type: QuadPrecision
-      
+
       The imaginary part (always zero for QuadPrecision).
 ```
 
@@ -69,18 +69,18 @@ The fundamental types provided by NumPy QuadDType.
 .. class:: numpy_quaddtype.QuadPrecDType(backend="sleef")
 
    NumPy dtype for quad-precision floating-point arrays.
-   
-   QuadPrecDType is a custom NumPy dtype that enables creation and
+
+   QuadPrecDType is a custom NumPy dtype that enables the creation and
    manipulation of arrays containing quad-precision values.
-   
+
    :param backend: Computation backend. Either ``"sleef"`` (default) or
        ``"longdouble"``.
    :type backend: str, optional
-   
+
    **Examples**
-   
+
    Create arrays with QuadPrecDType::
-   
+
        >>> import numpy as np
        >>> from numpy_quaddtype import QuadPrecDType
        >>> arr = np.array([1, 2, 3], dtype=QuadPrecDType())
@@ -88,26 +88,26 @@ The fundamental types provided by NumPy QuadDType.
        QuadPrecDType128
        >>> np.zeros(5, dtype=QuadPrecDType())
        array([0.0, 0.0, 0.0, 0.0, 0.0], dtype=QuadPrecDType128)
-   
+
    .. attribute:: backend
       :type: QuadBackend
-      
-      The computation backend (SLEEF or LONGDOUBLE).
-   
+
+      The computation backend (``QuadBackend.SLEEF`` or ``QuadBackend.LONGDOUBLE``).
+
    .. attribute:: itemsize
       :type: int
-      
-      Size of each element in bytes (always 16).
-   
+
+      The size of each element in bytes (always 16).
+
    .. attribute:: alignment
       :type: int
-      
-      Memory alignment in bytes (always 16).
-   
+
+      The memory alignment in bytes (always 16).
+
    .. attribute:: name
       :type: str
-      
-      String name of the dtype (``"QuadPrecDType128"``).
+
+      The string name of the dtype (``"QuadPrecDType128"``).
 ```
 
 ## QuadBackend
@@ -116,22 +116,22 @@ The fundamental types provided by NumPy QuadDType.
 .. class:: numpy_quaddtype.QuadBackend
 
    Enumeration of available computation backends.
-   
+
    .. attribute:: SLEEF
       :value: 0
-      
+
       SLEEF library backend (default). Provides true IEEE 754 binary128
       quad precision with SIMD optimization.
-   
+
    .. attribute:: LONGDOUBLE
       :value: 1
-      
-      Platform's native long double backend. Precision varies by platform.
-   
+
+      The platform's native long double backend. The precision varies by platform.
+
    **Example**
-   
+
    ::
-   
+
        >>> from numpy_quaddtype import QuadPrecDType, QuadBackend
        >>> dtype = QuadPrecDType()
        >>> dtype.backend == QuadBackend.SLEEF
@@ -146,9 +146,9 @@ The fundamental types provided by NumPy QuadDType.
 .. function:: numpy_quaddtype.SleefQuadPrecision(value)
 
    Create a QuadPrecision scalar using the SLEEF backend.
-   
+
    Equivalent to ``QuadPrecision(value, backend="sleef")``.
-   
+
    :param value: Value to convert to quad precision.
    :return: Quad precision scalar using SLEEF backend.
    :rtype: QuadPrecision
@@ -160,9 +160,9 @@ The fundamental types provided by NumPy QuadDType.
 .. function:: numpy_quaddtype.LongDoubleQuadPrecision(value)
 
    Create a QuadPrecision scalar using the longdouble backend.
-   
+
    Equivalent to ``QuadPrecision(value, backend="longdouble")``.
-   
+
    :param value: Value to convert to quad precision.
    :return: Quad precision scalar using longdouble backend.
    :rtype: QuadPrecision
@@ -174,9 +174,9 @@ The fundamental types provided by NumPy QuadDType.
 .. function:: numpy_quaddtype.SleefQuadPrecDType()
 
    Create a QuadPrecDType using the SLEEF backend.
-   
+
    Equivalent to ``QuadPrecDType(backend="sleef")``.
-   
+
    :return: Dtype for SLEEF-backed quad precision arrays.
    :rtype: QuadPrecDType
 ```
@@ -187,9 +187,9 @@ The fundamental types provided by NumPy QuadDType.
 .. function:: numpy_quaddtype.LongDoubleQuadPrecDType()
 
    Create a QuadPrecDType using the longdouble backend.
-   
+
    Equivalent to ``QuadPrecDType(backend="longdouble")``.
-   
+
    :return: Dtype for longdouble-backed quad precision arrays.
    :rtype: QuadPrecDType
 ```
diff --git a/quaddtype/docs/api/functions.md b/quaddtype/docs/api/functions.md
index ef352702..c6fa93e8 100644
--- a/quaddtype/docs/api/functions.md
+++ b/quaddtype/docs/api/functions.md
@@ -2,20 +2,7 @@
 
 NumPy QuadDType supports a comprehensive set of NumPy universal functions (ufuncs) and array functions.
 
-## Arithmetic Operations
-
-### Binary Arithmetic
-
-| Function | Operator | Description |
-|----------|----------|-------------|
-| `np.add` | `+` | Element-wise addition |
-| `np.subtract` | `-` | Element-wise subtraction |
-| `np.multiply` | `*` | Element-wise multiplication |
-| `np.divide` | `/` | Element-wise division |
-| `np.true_divide` | `/` | Element-wise true division |
-| `np.floor_divide` | `//` | Element-wise floor division |
-| `np.mod` | `%` | Element-wise modulo |
-| `np.power` | `**` | Element-wise power |
+## Element-wise Arithmetic Operations
 
 ### Unary Arithmetic
 
@@ -24,28 +11,41 @@ NumPy QuadDType supports a comprehensive set of NumPy universal functions (ufunc
 | `np.negative` | `-x` | Numerical negative |
 | `np.positive` | `+x` | Numerical positive |
 | `np.absolute` | `abs(x)` | Absolute value |
-| `np.sign` | - | Sign indicator |
 
-## Trigonometric Functions
+### Binary Arithmetic
+
+| Function | Operator | Description |
+|----------|----------|-------------|
+| `np.add` | `+` | Addition |
+| `np.subtract` | `-` | Subtraction |
+| `np.multiply` | `*` | Multiplication |
+| `np.divide` | `/` | Division |
+| `np.true_divide` | `/` | True division |
+| `np.floor_divide` | `//` | Floor division |
+| `np.mod` | `%` | Modulo |
+| `np.power` | `**` | Power |
 
-### Standard Trigonometric
+## Element-wise Sign Functions
 
 | Function | Description |
 |----------|-------------|
-| `np.sin` | Sine |
-| `np.cos` | Cosine |
-| `np.tan` | Tangent |
+| `np.sign` | Sign indicator |
+| `np.signbit` | Test for negative sign bit (works with NaN) |
+| `np.copysign` | Copy sign of second to first |
 
-### Inverse Trigonometric
+## Element-wise Trigonometric Functions
 
 | Function | Description |
 |----------|-------------|
+| `np.sin` | Sine |
+| `np.cos` | Cosine |
+| `np.tan` | Tangent |
 | `np.arcsin` | Inverse sine |
 | `np.arccos` | Inverse cosine |
 | `np.arctan` | Inverse tangent |
 | `np.arctan2` | Two-argument inverse tangent |
 
-### Hyperbolic Functions
+### Element-wise Hyperbolic Functions
 
 | Function | Description |
 |----------|-------------|
@@ -56,37 +56,50 @@ NumPy QuadDType supports a comprehensive set of NumPy universal functions (ufunc
 | `np.arccosh` | Inverse hyperbolic cosine |
 | `np.arctanh` | Inverse hyperbolic tangent |
 
-## Exponential and Logarithmic
-
-### Exponential
+## Element-wise Exponential Functions
 
 | Function | Description |
 |----------|-------------|
-| `np.exp` | Exponential (e^x) |
-| `np.exp2` | Base-2 exponential (2^x) |
-| `np.expm1` | exp(x) - 1 (accurate for small x) |
+| `np.exp` | Exponential (:math:`e^x`) |
+| `np.exp2` | Base-2 exponential (:math:`2^x`) |
+| `np.expm1` | `exp(x) - 1` (accurate for small x) |
 
-### Logarithmic
+## Element-wise Logarithmic Functions
 
 | Function | Description |
 |----------|-------------|
 | `np.log` | Natural logarithm |
 | `np.log2` | Base-2 logarithm |
 | `np.log10` | Base-10 logarithm |
-| `np.log1p` | log(1 + x) (accurate for small x) |
+| `np.log1p` | `log(1 + x)` (accurate for small x) |
 
-## Power and Root Functions
+## Element-wise Power and Root Functions
 
 | Function | Description |
 |----------|-------------|
+| `np.square` | Square (:math:`x^2`) |
 | `np.sqrt` | Square root |
 | `np.cbrt` | Cube root |
-| `np.square` | Square (x²) |
-| `np.hypot` | Hypotenuse (√(x² + y²)) |
+| `np.hypot` | Hypotenuse (:math:`\sqrt{x^2 + y^2}`) |
+
+## Element-wise Rounding Functions
 
-## Comparison Functions
+| Function | Description |
+|----------|-------------|
+| `np.floor` | Floor (round down) |
+| `np.ceil` | Ceiling (round up) |
+| `np.trunc` | Truncate toward zero |
+| `np.rint` | Round to nearest integer (ties to even) |
 
-### Element-wise Comparison
+## Element-wise Classification Functions
+
+| Function | Description |
+|----------|-------------|
+| `np.isfinite` | Test for finite values |
+| `np.isinf` | Test for infinity |
+| `np.isnan` | Test for NaN |
+
+## Element-wise Comparison Functions
 
 | Function | Operator | Description |
 |----------|----------|-------------|
@@ -97,33 +110,14 @@ NumPy QuadDType supports a comprehensive set of NumPy universal functions (ufunc
 | `np.greater` | `>` | Greater than |
 | `np.greater_equal` | `>=` | Greater than or equal |
 
-### Min/Max
-
-| Function | Description |
-|----------|-------------|
-| `np.minimum` | Element-wise minimum |
-| `np.maximum` | Element-wise maximum |
-| `np.fmin` | Element-wise minimum (ignores NaN) |
-| `np.fmax` | Element-wise maximum (ignores NaN) |
-
-## Rounding Functions
+### Element-wise Minimum/Maximum
 
 | Function | Description |
 |----------|-------------|
-| `np.floor` | Floor (round down) |
-| `np.ceil` | Ceiling (round up) |
-| `np.trunc` | Truncate toward zero |
-| `np.rint` | Round to nearest integer |
-
-## Special Value Functions
-
-| Function | Description |
-|----------|-------------|
-| `np.isfinite` | Test for finite values |
-| `np.isinf` | Test for infinity |
-| `np.isnan` | Test for NaN |
-| `np.signbit` | Test for negative sign bit |
-| `np.copysign` | Copy sign of second to first |
+| `np.minimum` | Minimum |
+| `np.maximum` | Maximum |
+| `np.fmin` | Minimum (ignores NaN) |
+| `np.fmax` | Maximum (ignores NaN) |
 
 ## Reduction Functions
 
@@ -144,7 +138,7 @@ NumPy QuadDType supports a comprehensive set of NumPy universal functions (ufunc
 | `np.zeros` | Array of zeros |
 | `np.ones` | Array of ones |
 | `np.empty` | Uninitialized array |
-| `np.full` | Array filled with value |
+| `np.full` | Array filled with given value |
 | `np.arange` | Range of values |
 | `np.linspace` | Linearly spaced values |
 
@@ -182,8 +176,7 @@ When QuadBLAS is available (not on Windows):
 import numpy as np
 from numpy_quaddtype import QuadPrecDType, pi
 
-x = np.array([0, float(pi)/6, float(pi)/4, float(pi)/3, float(pi)/2], 
-             dtype=QuadPrecDType())
+x = np.array([0, pi/6, pi/4, pi/3, pi/2, pi], dtype=QuadPrecDType())
 
 print("sin(x):", np.sin(x))
 print("cos(x):", np.cos(x))
diff --git a/quaddtype/docs/api/index.md b/quaddtype/docs/api/index.md
index 90c27e15..2505f0f7 100644
--- a/quaddtype/docs/api/index.md
+++ b/quaddtype/docs/api/index.md
@@ -1,6 +1,6 @@
 # API Reference
 
-Complete API documentation for NumPy QuadDType.
+API documentation for NumPy QuadDType.
 
 ```{toctree}
 :maxdepth: 2
diff --git a/quaddtype/docs/api/utilities.md b/quaddtype/docs/api/utilities.md
index e47f0846..a744b93d 100644
--- a/quaddtype/docs/api/utilities.md
+++ b/quaddtype/docs/api/utilities.md
@@ -1,31 +1,31 @@
 # Utility Functions
 
-Helper functions for platform detection and threading control.
+Helper functions for platform precision detection and threading control.
 
-## Platform Detection
+## Platform Precision Detection
 
 ```{eval-rst}
 .. function:: numpy_quaddtype.is_longdouble_128()
 
    Check if the platform's ``long double`` type is 128-bit.
-   
+
    This is useful for determining whether the longdouble backend provides
    true quad precision on the current platform.
-   
+
    :return: ``True`` if ``long double`` is 128-bit, ``False`` otherwise.
    :rtype: bool
-   
+
    **Platform behavior:**
-   
+
    - Linux x86_64: Returns ``False`` (80-bit extended precision)
    - Linux aarch64: Returns ``True`` (128-bit quad precision)
    - macOS (all): Returns ``False`` (64-bit double precision)
    - Windows (all): Returns ``False`` (64-bit double precision)
-   
+
    **Example**
-   
+
    ::
-   
+
        >>> from numpy_quaddtype import is_longdouble_128
        >>> if is_longdouble_128():
        ...     print("Native quad precision available via longdouble")
@@ -41,35 +41,35 @@ These functions control the number of threads used by QuadBLAS for parallel oper
 .. function:: numpy_quaddtype.set_num_threads(n)
 
    Set the number of threads used by QuadBLAS operations.
-   
+
    :param n: Number of threads to use. Must be a positive integer.
    :type n: int
    :raises ValueError: If n is not a positive integer.
-   
+
    **Example**
-   
+
    ::
-   
+
        >>> from numpy_quaddtype import set_num_threads, get_num_threads
        >>> set_num_threads(4)
        >>> get_num_threads()
        4
-   
+
    .. note::
-      
+
       This function has no effect if QuadBLAS is disabled (e.g., on Windows).
 
 .. function:: numpy_quaddtype.get_num_threads()
 
    Get the current number of threads used by QuadBLAS.
-   
+
    :return: Current thread count for QuadBLAS operations.
    :rtype: int
-   
+
    **Example**
-   
+
    ::
-   
+
        >>> from numpy_quaddtype import get_num_threads
        >>> get_num_threads()
        4
@@ -77,23 +77,23 @@ These functions control the number of threads used by QuadBLAS for parallel oper
 .. function:: numpy_quaddtype.get_quadblas_version()
 
    Get the QuadBLAS library version string.
-   
+
    :return: Version string if QuadBLAS is available, ``None`` otherwise.
    :rtype: str or None
-   
+
    **Example**
-   
+
    ::
-   
+
        >>> from numpy_quaddtype import get_quadblas_version
        >>> version = get_quadblas_version()
        >>> if version:
        ...     print(f"QuadBLAS version: {version}")
        ... else:
        ...     print("QuadBLAS not available")
-   
+
    .. note::
-      
+
       QuadBLAS is automatically disabled on Windows builds due to MSVC
       compatibility issues. In this case, the function returns ``None``.
 ```
@@ -113,24 +113,24 @@ from numpy_quaddtype import (
 version = get_quadblas_version()
 if version:
     print(f"QuadBLAS {version} available")
-    
+
     # Get current threads
     print(f"Default threads: {get_num_threads()}")
-    
+
     # Create test array
     arr = np.random.randn(100000).astype(QuadPrecDType())
-    
+
     # Benchmark with different thread counts
     import time
-    
+
     for threads in [1, 2, 4, 8]:
         set_num_threads(threads)
-        
+
         start = time.time()
         for _ in range(10):
             result = np.dot(arr, arr)
         elapsed = time.time() - start
-        
+
         print(f"  {threads} threads: {elapsed:.3f}s")
 else:
     print("QuadBLAS not available - single-threaded operations only")
diff --git a/quaddtype/docs/changelog.md b/quaddtype/docs/changelog.md
index a599b071..aed7d7ff 100644
--- a/quaddtype/docs/changelog.md
+++ b/quaddtype/docs/changelog.md
@@ -5,7 +5,7 @@ All notable changes to NumPy QuadDType will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.2.0] - 2025
+## Unreleased
 
 ### Added
 
@@ -30,7 +30,48 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Memory alignment issues on certain platforms
 - Thread safety in scalar operations
 
-## [0.1.0] - 2024
+## [0.2.2] - 13.10.2025
+
+### Changed
+
+- prioritise system-wide dependencies over meson wrap fallback
+
+## [0.2.1] - 11.10.2025
+
+### Fixed
+
+- multiple copies of OpenMP runtime initialization
+- null pointer dereference
+
+## [0.2.0] - 12.09.2025
+
+### Added
+
+- Cast for ubyte and half dtypes
+
+### Changed
+
+- Bundle SLEEF and submodules using meson wrap (sdist compatible)
+
+### Fixed
+
+- smallest_subnormal constant
+
+## [0.1.0] - 03.09.2025
+
+### Added
+
+- Support for Python 3.13 and 3.14
+- Support for ufuncs: copysign, sign, signbit, isfinite, isinf, isnan, fmin, fmax, reciprocal, matmul, sinh, cosh, tanh, arcsinh, arccosh, arctanh
+- Constants: smallest_subnormal, bits, precision, resolution
+
+### Fixed
+
+- NaN comparisons
+- mod ufunc
+- rint ufunc for near-halfway cases
+
+## [0.0.1] - 02.07.2025
 
 ### Added
 
@@ -45,12 +86,3 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Comparison operations
 - Array broadcasting support
 - Linux and macOS wheel builds
-
-## Unreleased
-
-### Planned
-
-- Complex quad precision support
-- Additional linear algebra functions
-- GPU acceleration exploration
-- Improved performance for small arrays
diff --git a/quaddtype/docs/conf.py b/quaddtype/docs/conf.py
index 02819c8a..03d11c71 100644
--- a/quaddtype/docs/conf.py
+++ b/quaddtype/docs/conf.py
@@ -23,6 +23,7 @@
     'myst_parser',
     'sphinx_design',
     'sphinx_copybutton',
+    'sphinxcontrib.katex',
 ]
 
 templates_path = ['_templates']
@@ -94,4 +95,3 @@
 napoleon_google_docstring = True
 napoleon_numpy_docstring = True
 napoleon_include_init_with_doc = True
-
diff --git a/quaddtype/docs/contributing.md b/quaddtype/docs/contributing.md
deleted file mode 100644
index 4ad4d09a..00000000
--- a/quaddtype/docs/contributing.md
+++ /dev/null
@@ -1,159 +0,0 @@
-# Contributing
-
-We welcome contributions to NumPy QuadDType! This guide will help you get started.
-
-## Development Setup
-
-### Prerequisites
-
-- Python 3.11+
-- GCC or Clang compiler
-- CMake ≥ 3.15
-- Git
-
-### Setting Up the Development Environment
-
-```bash
-# Clone the repository
-git clone https://github.com/numpy/numpy-user-dtypes.git
-cd numpy-user-dtypes/quaddtype
-
-# Create virtual environment
-python -m venv venv
-source venv/bin/activate  # or `venv\Scripts\activate` on Windows
-
-# Install NumPy (development version)
-pip install "numpy @ git+https://github.com/numpy/numpy.git"
-
-# Install development dependencies
-pip install -e ".[test,docs]" -v --no-build-isolation
-```
-
-## Running Tests
-
-```bash
-# Run all tests
-pytest tests/
-
-# Run with verbose output
-pytest -v tests/
-
-# Run specific test file
-pytest tests/test_basic.py
-
-# Run with parallel execution
-pytest -n auto tests/  # requires pytest-xdist
-```
-
-## Code Style
-
-We follow standard Python conventions:
-
-- **PEP 8** for Python code style
-- **Type hints** for public APIs
-- **Docstrings** for all public functions and classes
-
-### Type Checking
-
-```bash
-# Run mypy
-mypy numpy_quaddtype/
-
-# Run pyright
-pyright numpy_quaddtype/
-```
-
-## Building Documentation
-
-```bash
-# Install documentation dependencies
-pip install ".[docs]"
-
-# Build HTML documentation
-cd docs/
-make html
-
-# View locally
-python -m http.server --directory _build/html
-```
-
-## Making Changes
-
-### 1. Create a Branch
-
-```bash
-git checkout -b feature/my-new-feature
-```
-
-### 2. Make Your Changes
-
-- Write code with tests
-- Add docstrings
-- Update documentation if needed
-
-### 3. Run Tests
-
-```bash
-pytest tests/
-```
-
-### 4. Submit a Pull Request
-
-- Push your branch to GitHub
-- Open a pull request against `main`
-- Fill out the PR template
-- Wait for review
-
-## Project Structure
-
-```
-quaddtype/
-├── docs/               # Documentation (Sphinx)
-├── numpy_quaddtype/    # Python package
-│   ├── __init__.py     # Public API
-│   ├── __init__.pyi    # Type stubs
-│   ├── _quaddtype_main.pyi  # C extension stubs
-│   └── src/            # C source files
-├── tests/              # Test suite
-├── subprojects/        # Meson subprojects (SLEEF)
-├── meson.build         # Build configuration
-└── pyproject.toml      # Package metadata
-```
-
-## C Extension Development
-
-The core functionality is implemented in C. Key files:
-
-- `numpy_quaddtype/src/quaddtype_main.c` - Main extension module
-- `numpy_quaddtype/src/scalar.c` - QuadPrecision scalar implementation
-- `numpy_quaddtype/src/dtype.c` - QuadPrecDType implementation
-- `numpy_quaddtype/src/umath.c` - Universal function implementations
-
-### Building the C Extension
-
-```bash
-# Rebuild after C changes
-pip install . -v --no-build-isolation
-
-# With debug symbols
-CFLAGS="-g -O0" pip install . -v --no-build-isolation
-```
-
-## Reporting Issues
-
-When reporting bugs, please include:
-
-1. Operating system and version
-2. Python version
-3. NumPy version
-4. NumPy-QuadDType version
-5. Minimal code to reproduce the issue
-6. Full error traceback
-
-## Code of Conduct
-
-This project follows the [NumPy Code of Conduct](https://numpy.org/code-of-conduct/).
-
-## License
-
-By contributing to NumPy QuadDType, you agree that your contributions will be licensed under the BSD-3-Clause License.
diff --git a/quaddtype/docs/index.md b/quaddtype/docs/index.md
index 182aaba3..9df8ffad 100644
--- a/quaddtype/docs/index.md
+++ b/quaddtype/docs/index.md
@@ -49,7 +49,7 @@ Full suite of math functions: trigonometric, exponential, logarithmic, and more.
 :link: user_guide/backends
 :link-type: doc
 
-Choose between SLEEF (default) or longdouble backends.
+Choose between SLEEF (default) or native longdouble backends.
 :::
 
 :::{grid-item-card} 🧵 Thread-Safe
@@ -99,10 +99,7 @@ Standard double precision (float64) provides approximately 15-16 significant dec
 :maxdepth: 2
 :hidden:
 
-installation
-user_guide/index
 api/index
-contributing
 changelog
 ```
 
diff --git a/quaddtype/docs/installation.md b/quaddtype/docs/installation.md
deleted file mode 100644
index b0fe58f6..00000000
--- a/quaddtype/docs/installation.md
+++ /dev/null
@@ -1,153 +0,0 @@
-# Installation
-
-## Quick Install
-
-The simplest way to install NumPy QuadDType is via pip:
-
-```bash
-pip install numpy-quaddtype
-```
-
-```{note}
-NumPy QuadDType requires **NumPy 2.0 or later** and **Python 3.11+**.
-```
-
-## Requirements
-
-| Requirement | Version |
-|-------------|---------|
-| Python | ≥ 3.11 |
-| NumPy | ≥ 2.0 |
-
-## Platform Support
-
-NumPy QuadDType provides pre-built wheels for:
-
-| Platform | Architectures |
-|----------|---------------|
-| Linux | x86_64, aarch64 |
-| macOS | x86_64, arm64 (Apple Silicon) |
-| Windows | x64 |
-
-## Installing from Source
-
-For development or if pre-built wheels aren't available for your platform:
-
-### Prerequisites
-
-- **C/C++ Compiler**: GCC or Clang
-- **CMake**: ≥ 3.15
-- **Python**: 3.11+
-- **Git**
-
-### Linux/macOS
-
-```bash
-# Create and activate virtual environment
-python3 -m venv venv
-source venv/bin/activate
-
-# Install NumPy (development version required for NumPy 2.x features)
-pip install "numpy @ git+https://github.com/numpy/numpy.git"
-
-# Install build dependencies
-pip install meson meson-python ninja pytest
-
-# Clone and install
-git clone https://github.com/numpy/numpy-user-dtypes.git
-cd numpy-user-dtypes/quaddtype
-pip install . -v --no-build-isolation
-```
-
-### Windows
-
-```{warning}
-On Windows, QuadBLAS optimization is automatically disabled due to MSVC compatibility issues.
-```
-
-1. Open **Developer Command Prompt for VS** or **Developer PowerShell for VS**
-
-2. Setup environment:
-   ```powershell
-   python -m venv venv
-   .\venv\Scripts\Activate.ps1
-   
-   pip install -U pip
-   pip install numpy pytest ninja meson meson-python
-   ```
-
-3. Set compiler flags:
-   ```powershell
-   $env:CFLAGS = "/DDISABLE_QUADBLAS"
-   $env:CXXFLAGS = "/DDISABLE_QUADBLAS"
-   ```
-
-4. Build and install:
-   ```powershell
-   pip install . -v --no-build-isolation
-   ```
-
-## Verifying Installation
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecision, QuadPrecDType
-
-# Check version
-import numpy_quaddtype
-print(f"numpy-quaddtype version: {numpy_quaddtype.__version__}")
-
-# Create a quad precision value
-x = QuadPrecision("3.141592653589793238462643383279502884197")
-print(f"π in quad precision: {x}")
-
-# Create an array
-arr = np.array([1, 2, 3], dtype=QuadPrecDType())
-print(f"Array dtype: {arr.dtype}")
-```
-
-## Optional: Development Installation
-
-For contributing to NumPy QuadDType:
-
-```bash
-# Clone the repository
-git clone https://github.com/numpy/numpy-user-dtypes.git
-cd numpy-user-dtypes/quaddtype
-
-# Install in editable mode with test dependencies
-pip install -e ".[test,docs]" -v --no-build-isolation
-```
-
-## Troubleshooting
-
-### CMake Not Found
-
-If you get a CMake error, install it:
-
-```bash
-# Linux (Ubuntu/Debian)
-sudo apt-get install cmake
-
-# macOS
-brew install cmake
-
-# Windows
-# Download from https://cmake.org/download/
-```
-
-### NumPy Version Error
-
-NumPy QuadDType requires NumPy 2.0+. If you have an older version:
-
-```bash
-pip install --upgrade numpy>=2.0
-```
-
-### Compiler Issues on macOS
-
-If you encounter compiler issues on macOS, ensure you have Xcode command-line tools:
-
-```bash
-xcode-select --install
-```
diff --git a/quaddtype/docs/user_guide/arrays.md b/quaddtype/docs/user_guide/arrays.md
deleted file mode 100644
index bedd6402..00000000
--- a/quaddtype/docs/user_guide/arrays.md
+++ /dev/null
@@ -1,230 +0,0 @@
-# Working with Arrays
-
-NumPy QuadDType integrates seamlessly with NumPy arrays, providing the full power of NumPy's array operations with quad precision arithmetic.
-
-## Creating Arrays
-
-### From Python Lists
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-# Create an array from a list
-arr = np.array([1.0, 2.0, 3.0], dtype=QuadPrecDType())
-print(arr)
-print(f"dtype: {arr.dtype}")
-```
-
-### From String Values (High Precision)
-
-For maximum precision, create arrays from string representations:
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-# String input preserves all significant digits
-high_precision = np.array([
-    "3.14159265358979323846264338327950288",
-    "2.71828182845904523536028747135266249",
-    "1.41421356237309504880168872420969807"
-], dtype=QuadPrecDType())
-
-print(high_precision)
-```
-
-### Using `zeros`, `ones`, `empty`
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-# Create arrays with standard NumPy functions
-zeros = np.zeros(5, dtype=QuadPrecDType())
-ones = np.ones((3, 3), dtype=QuadPrecDType())
-empty = np.empty(10, dtype=QuadPrecDType())
-
-print(f"Zeros shape: {zeros.shape}")
-print(f"Ones shape: {ones.shape}")
-```
-
-### Using `arange` and `linspace`
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-# Create ranges
-arr = np.arange(0, 10, dtype=QuadPrecDType())
-print(f"arange: {arr}")
-
-# Linear spacing
-lin = np.linspace(0, 1, 11, dtype=QuadPrecDType())
-print(f"linspace: {lin}")
-```
-
-## Array Operations
-
-### Element-wise Arithmetic
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-a = np.array([1, 2, 3], dtype=QuadPrecDType())
-b = np.array([4, 5, 6], dtype=QuadPrecDType())
-
-print(f"a + b = {a + b}")
-print(f"a - b = {a - b}")
-print(f"a * b = {a * b}")
-print(f"a / b = {a / b}")
-print(f"a ** 2 = {a ** 2}")
-```
-
-### Reductions
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-arr = np.array([1, 2, 3, 4, 5], dtype=QuadPrecDType())
-
-print(f"Sum: {np.sum(arr)}")
-print(f"Product: {np.prod(arr)}")
-print(f"Mean: {np.mean(arr)}")
-print(f"Min: {np.min(arr)}")
-print(f"Max: {np.max(arr)}")
-```
-
-### Broadcasting
-
-QuadPrecDType fully supports NumPy broadcasting:
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-# 2D array
-matrix = np.array([[1, 2, 3], [4, 5, 6]], dtype=QuadPrecDType())
-
-# 1D array - broadcasts across rows
-row_scale = np.array([10, 100, 1000], dtype=QuadPrecDType())
-
-result = matrix * row_scale
-print(result)
-```
-
-## Mathematical Functions
-
-All standard NumPy ufuncs work with QuadPrecDType arrays:
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-x = np.linspace(0, 2 * np.pi, 5, dtype=QuadPrecDType())
-
-# Trigonometric functions
-print(f"sin(x): {np.sin(x)}")
-print(f"cos(x): {np.cos(x)}")
-
-# Exponential and logarithmic
-y = np.array([1, 2, 3], dtype=QuadPrecDType())
-print(f"exp(y): {np.exp(y)}")
-print(f"log(y): {np.log(y)}")
-
-# Square root
-print(f"sqrt(y): {np.sqrt(y)}")
-```
-
-## Indexing and Slicing
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-arr = np.arange(10, dtype=QuadPrecDType())
-
-# Basic indexing
-print(f"arr[0]: {arr[0]}")
-print(f"arr[-1]: {arr[-1]}")
-
-# Slicing
-print(f"arr[2:5]: {arr[2:5]}")
-print(f"arr[::2]: {arr[::2]}")
-
-# Boolean indexing
-mask = arr > 5
-print(f"arr[arr > 5]: {arr[mask]}")
-```
-
-## Reshaping and Stacking
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-arr = np.arange(12, dtype=QuadPrecDType())
-
-# Reshape
-reshaped = arr.reshape(3, 4)
-print(f"Reshaped:\n{reshaped}")
-
-# Stack arrays
-a = np.array([1, 2, 3], dtype=QuadPrecDType())
-b = np.array([4, 5, 6], dtype=QuadPrecDType())
-
-stacked = np.stack([a, b])
-print(f"Stacked:\n{stacked}")
-
-concatenated = np.concatenate([a, b])
-print(f"Concatenated: {concatenated}")
-```
-
-## Type Conversion
-
-### Converting to QuadPrecDType
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-# From float64
-float64_arr = np.array([1.1, 2.2, 3.3], dtype=np.float64)
-quad_arr = float64_arr.astype(QuadPrecDType())
-
-# From integer
-int_arr = np.array([1, 2, 3], dtype=np.int64)
-quad_from_int = int_arr.astype(QuadPrecDType())
-```
-
-### Converting from QuadPrecDType
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-quad_arr = np.array([1.5, 2.5, 3.5], dtype=QuadPrecDType())
-
-# To float64 (loses precision)
-float64_arr = quad_arr.astype(np.float64)
-print(f"As float64: {float64_arr}")
-```
-
-## Memory Considerations
-
-QuadPrecDType arrays use 16 bytes per element (compared to 8 bytes for float64):
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-n = 1000000
-
-float64_arr = np.zeros(n, dtype=np.float64)
-quad_arr = np.zeros(n, dtype=QuadPrecDType())
-
-print(f"float64 size: {float64_arr.nbytes / 1e6:.1f} MB")
-print(f"quad size: {quad_arr.nbytes / 1e6:.1f} MB")
-```
diff --git a/quaddtype/docs/user_guide/backends.md b/quaddtype/docs/user_guide/backends.md
deleted file mode 100644
index 240e1191..00000000
--- a/quaddtype/docs/user_guide/backends.md
+++ /dev/null
@@ -1,150 +0,0 @@
-# Backends
-
-NumPy QuadDType supports two computational backends for quad-precision arithmetic. Understanding the differences helps you choose the right one for your use case.
-
-## Available Backends
-
-### SLEEF (Default)
-
-**SLEEF** (SIMD Library for Evaluating Elementary Functions) is the default and recommended backend.
-
-```python
-from numpy_quaddtype import QuadPrecDType, QuadPrecision
-
-# Explicit SLEEF backend
-dtype = QuadPrecDType("sleef")
-scalar = QuadPrecision(3.14, backend="sleef")
-
-# Or simply use defaults
-dtype = QuadPrecDType()  # SLEEF is default
-```
-
-**Advantages:**
-- ✅ True IEEE 754 binary128 quad precision
-- ✅ SIMD-optimized for performance
-- ✅ Consistent behavior across all platforms
-- ✅ Full suite of mathematical functions
-
-**Considerations:**
-- Uses the SLEEF library (bundled with the package)
-
-### Long Double
-
-The **longdouble** backend uses your platform's native `long double` type.
-
-```python
-from numpy_quaddtype import QuadPrecDType, QuadPrecision, is_longdouble_128
-
-# Check if your platform has 128-bit long double
-print(f"Is long double 128-bit? {is_longdouble_128()}")
-
-# Use longdouble backend
-dtype = QuadPrecDType("longdouble")
-scalar = QuadPrecision(3.14, backend="longdouble")
-```
-
-**Advantages:**
-- ✅ Uses native CPU instructions (when available)
-- ✅ No external library dependency
-
-**Considerations:**
-- ⚠️ Precision varies by platform (see table below)
-- ⚠️ Not true quad precision on most platforms
-
-## Platform-Specific Long Double Precision
-
-| Platform | Architecture | Long Double Size | Precision |
-|----------|--------------|------------------|-----------|
-| Linux | x86_64 | 80-bit (stored as 128) | ~18-19 decimal digits |
-| Linux | aarch64 | 128-bit | ~33-34 decimal digits |
-| macOS | x86_64 | 64-bit | Same as double |
-| macOS | arm64 | 64-bit | Same as double |
-| Windows | x64 | 64-bit | Same as double |
-
-```{warning}
-On macOS and Windows, `long double` is typically the same as `double` (64-bit), 
-providing no precision benefit. Use the SLEEF backend for true quad precision 
-on these platforms.
-```
-
-## Checking Backend Support
-
-```python
-from numpy_quaddtype import is_longdouble_128
-
-if is_longdouble_128():
-    print("Your platform supports 128-bit long double!")
-    print("Both backends will provide similar precision.")
-else:
-    print("Long double is NOT 128-bit on your platform.")
-    print("Use SLEEF backend for true quad precision.")
-```
-
-## Convenience Functions
-
-For cleaner code, use the pre-defined helper functions:
-
-```python
-from numpy_quaddtype import (
-    SleefQuadPrecDType, 
-    SleefQuadPrecision,
-    LongDoubleQuadPrecDType, 
-    LongDoubleQuadPrecision
-)
-
-# SLEEF backend
-sleef_dtype = SleefQuadPrecDType()
-sleef_scalar = SleefQuadPrecision("3.14159265358979323846")
-
-# Long double backend  
-ld_dtype = LongDoubleQuadPrecDType()
-ld_scalar = LongDoubleQuadPrecision(3.14)
-```
-
-## Checking Which Backend is in Use
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType, QuadBackend
-
-dtype = QuadPrecDType("sleef")
-print(f"Backend: {dtype.backend}")  # QuadBackend.SLEEF
-
-# Compare backends
-if dtype.backend == QuadBackend.SLEEF:
-    print("Using SLEEF backend")
-elif dtype.backend == QuadBackend.LONGDOUBLE:
-    print("Using longdouble backend")
-```
-
-## Mixing Backends
-
-```{warning}
-Arrays with different backends cannot be directly combined in operations.
-```
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-# These have different backends
-sleef_arr = np.array([1, 2, 3], dtype=QuadPrecDType("sleef"))
-ld_arr = np.array([4, 5, 6], dtype=QuadPrecDType("longdouble"))
-
-# This will raise an error:
-# result = sleef_arr + ld_arr  # Error!
-
-# Convert to same backend first:
-ld_arr_converted = ld_arr.astype(QuadPrecDType("sleef"))
-result = sleef_arr + ld_arr_converted  # Works!
-```
-
-## Recommendations
-
-| Use Case | Recommended Backend |
-|----------|---------------------|
-| Cross-platform consistency | SLEEF |
-| Maximum precision needed | SLEEF |
-| Linux aarch64 with native support | Either (SLEEF preferred) |
-| Performance-critical on x86_64 | SLEEF |
-| Debugging/comparison | Both (for validation) |
diff --git a/quaddtype/docs/user_guide/constants.md b/quaddtype/docs/user_guide/constants.md
deleted file mode 100644
index ba888a93..00000000
--- a/quaddtype/docs/user_guide/constants.md
+++ /dev/null
@@ -1,139 +0,0 @@
-# Mathematical Constants
-
-NumPy QuadDType provides pre-defined mathematical constants with full quad precision accuracy.
-
-## Available Constants
-
-```python
-from numpy_quaddtype import (
-    pi, e, log2e, log10e, ln2, ln10,
-    max_value, epsilon, smallest_normal, smallest_subnormal,
-    bits, precision, resolution
-)
-```
-
-## Mathematical Constants
-
-### π (Pi)
-
-The ratio of a circle's circumference to its diameter.
-
-```python
-from numpy_quaddtype import pi
-print(f"π = {pi}")
-# 3.14159265358979323846264338327950288...
-```
-
-### e (Euler's Number)
-
-The base of the natural logarithm.
-
-```python
-from numpy_quaddtype import e
-print(f"e = {e}")
-# 2.71828182845904523536028747135266249...
-```
-
-### Logarithmic Constants
-
-```python
-from numpy_quaddtype import log2e, log10e, ln2, ln10
-
-print(f"log₂(e) = {log2e}")   # 1.44269504088896340735992468100189213...
-print(f"log₁₀(e) = {log10e}") # 0.43429448190325182765112891891660508...
-print(f"ln(2) = {ln2}")       # 0.69314718055994530941723212145817656...
-print(f"ln(10) = {ln10}")     # 2.30258509299404568401799145468436420...
-```
-
-## Type Limits
-
-### Machine Epsilon
-
-The smallest positive number such that `1.0 + epsilon != 1.0`.
-
-```python
-from numpy_quaddtype import epsilon, QuadPrecision
-
-print(f"ε = {epsilon}")
-
-# Demonstration
-one = QuadPrecision(1.0)
-print(f"1 + ε == 1: {one + epsilon == one}")           # False
-print(f"1 + ε/2 == 1: {one + epsilon/2 == one}")       # True
-```
-
-### Value Ranges
-
-```python
-from numpy_quaddtype import max_value, smallest_normal, smallest_subnormal
-
-print(f"Maximum value:       {max_value}")
-print(f"Smallest normal:     {smallest_normal}")
-print(f"Smallest subnormal:  {smallest_subnormal}")
-```
-
-## Type Information
-
-```python
-from numpy_quaddtype import bits, precision, resolution
-
-print(f"Total bits: {bits}")           # 128
-print(f"Decimal precision: {precision}")  # 33-34 significant decimal digits
-print(f"Resolution: {resolution}")     # Smallest distinguishable difference
-```
-
-## Using Constants in Calculations
-
-```python
-import numpy as np
-from numpy_quaddtype import pi, e, QuadPrecDType
-
-# Calculate e^(iπ) + 1 ≈ 0 (Euler's identity, real part)
-# We'll compute cos(π) + 1 which should be 0
-result = np.cos(np.array([pi]))[0] + 1
-print(f"cos(π) + 1 = {result}")
-
-# Area of a circle with radius 1
-radius = np.array([1], dtype=QuadPrecDType())
-area = pi * radius ** 2
-print(f"Area of unit circle: {area[0]}")
-
-# Natural exponential
-x = np.array([1], dtype=QuadPrecDType())
-exp_1 = np.exp(x)
-print(f"e¹ = {exp_1[0]}")
-print(f"e constant = {e}")
-```
-
-## Comparison with NumPy Constants
-
-```python
-import numpy as np
-from numpy_quaddtype import pi as quad_pi, e as quad_e
-
-print("Pi comparison:")
-print(f"  NumPy float64: {np.pi}")
-print(f"  QuadPrecision: {quad_pi}")
-
-print("\ne comparison:")
-print(f"  NumPy float64: {np.e}")
-print(f"  QuadPrecision: {quad_e}")
-```
-
-The quad precision constants provide approximately 33-34 significant decimal digits, compared to 15-16 for float64.
-
-## Constant Reference Table
-
-| Constant | Symbol | Approximate Value |
-|----------|--------|-------------------|
-| `pi` | π | 3.14159265358979323846... |
-| `e` | e | 2.71828182845904523536... |
-| `log2e` | log₂(e) | 1.44269504088896340735... |
-| `log10e` | log₁₀(e) | 0.43429448190325182765... |
-| `ln2` | ln(2) | 0.69314718055994530941... |
-| `ln10` | ln(10) | 2.30258509299404568401... |
-| `epsilon` | ε | ~1.93×10⁻³⁴ |
-| `max_value` | - | ~1.19×10⁴⁹³² |
-| `smallest_normal` | - | ~3.36×10⁻⁴⁹³² |
-| `bits` | - | 128 |
-| `precision` | - | 33 |
diff --git a/quaddtype/docs/user_guide/functions.md b/quaddtype/docs/user_guide/functions.md
deleted file mode 100644
index b7819b0b..00000000
--- a/quaddtype/docs/user_guide/functions.md
+++ /dev/null
@@ -1,251 +0,0 @@
-# Mathematical Functions
-
-NumPy QuadDType provides a comprehensive set of mathematical functions through NumPy's universal function (ufunc) system. All functions work seamlessly with both scalars and arrays.
-
-## Basic Arithmetic
-
-### Binary Operations
-
-| Operation | Operator | NumPy Function |
-|-----------|----------|----------------|
-| Addition | `a + b` | `np.add(a, b)` |
-| Subtraction | `a - b` | `np.subtract(a, b)` |
-| Multiplication | `a * b` | `np.multiply(a, b)` |
-| Division | `a / b` | `np.divide(a, b)` |
-| Floor Division | `a // b` | `np.floor_divide(a, b)` |
-| Modulo | `a % b` | `np.mod(a, b)` |
-| Power | `a ** b` | `np.power(a, b)` |
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-a = np.array([1, 2, 3], dtype=QuadPrecDType())
-b = np.array([4, 5, 6], dtype=QuadPrecDType())
-
-print(f"a + b = {a + b}")
-print(f"a * b = {a * b}")
-print(f"a / b = {a / b}")
-print(f"a ** 2 = {a ** 2}")
-```
-
-### Unary Operations
-
-| Operation | Operator | NumPy Function |
-|-----------|----------|----------------|
-| Negation | `-a` | `np.negative(a)` |
-| Absolute | `abs(a)` | `np.abs(a)` |
-| Positive | `+a` | `np.positive(a)` |
-
-## Trigonometric Functions
-
-### Standard Trigonometric
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType, pi
-
-x = np.linspace(0, float(pi)/2, 5, dtype=QuadPrecDType())
-
-# Basic trig functions
-print(f"sin(x): {np.sin(x)}")
-print(f"cos(x): {np.cos(x)}")
-print(f"tan(x): {np.tan(x)}")
-```
-
-### Inverse Trigonometric
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-x = np.array([0, 0.5, 1.0], dtype=QuadPrecDType())
-
-print(f"arcsin(x): {np.arcsin(x)}")
-print(f"arccos(x): {np.arccos(x)}")
-
-y = np.array([0, 1, 10], dtype=QuadPrecDType())
-print(f"arctan(y): {np.arctan(y)}")
-```
-
-### Two-Argument Arctangent
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-y = np.array([1, 1, -1, -1], dtype=QuadPrecDType())
-x = np.array([1, -1, 1, -1], dtype=QuadPrecDType())
-
-# atan2 gives the angle in the correct quadrant
-angles = np.arctan2(y, x)
-print(f"arctan2(y, x): {angles}")
-```
-
-## Hyperbolic Functions
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-x = np.array([0, 0.5, 1.0, 2.0], dtype=QuadPrecDType())
-
-# Hyperbolic functions
-print(f"sinh(x): {np.sinh(x)}")
-print(f"cosh(x): {np.cosh(x)}")
-print(f"tanh(x): {np.tanh(x)}")
-
-# Inverse hyperbolic
-print(f"arcsinh(x): {np.arcsinh(x)}")
-print(f"arccosh(x+1): {np.arccosh(x + 1)}")  # arccosh requires x >= 1
-print(f"arctanh(x/3): {np.arctanh(x / 3)}")  # arctanh requires |x| < 1
-```
-
-## Exponential and Logarithmic
-
-### Exponential Functions
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-x = np.array([0, 1, 2, 3], dtype=QuadPrecDType())
-
-print(f"exp(x): {np.exp(x)}")
-print(f"exp2(x): {np.exp2(x)}")        # 2^x
-print(f"expm1(x): {np.expm1(x)}")      # exp(x) - 1, accurate for small x
-```
-
-### Logarithmic Functions
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-x = np.array([1, 2, 10, 100], dtype=QuadPrecDType())
-
-print(f"log(x): {np.log(x)}")          # Natural log
-print(f"log2(x): {np.log2(x)}")        # Base-2 log
-print(f"log10(x): {np.log10(x)}")      # Base-10 log
-print(f"log1p(x-1): {np.log1p(x - 1)}")  # log(1+x), accurate for small x
-```
-
-## Power and Root Functions
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-x = np.array([1, 4, 9, 16], dtype=QuadPrecDType())
-
-print(f"sqrt(x): {np.sqrt(x)}")
-print(f"cbrt(x): {np.cbrt(x)}")        # Cube root
-
-# Hypotenuse (sqrt(a^2 + b^2))
-a = np.array([3, 5, 8], dtype=QuadPrecDType())
-b = np.array([4, 12, 15], dtype=QuadPrecDType())
-print(f"hypot(a, b): {np.hypot(a, b)}")
-```
-
-## Rounding Functions
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-x = np.array([1.2, 2.5, 3.7, -1.5], dtype=QuadPrecDType())
-
-print(f"floor(x): {np.floor(x)}")
-print(f"ceil(x): {np.ceil(x)}")
-print(f"trunc(x): {np.trunc(x)}")
-print(f"rint(x): {np.rint(x)}")  # Round to nearest integer
-```
-
-## Comparison Functions
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-a = np.array([1, 5, 3], dtype=QuadPrecDType())
-b = np.array([2, 4, 3], dtype=QuadPrecDType())
-
-print(f"minimum(a, b): {np.minimum(a, b)}")
-print(f"maximum(a, b): {np.maximum(a, b)}")
-
-# Comparison operators
-print(f"a < b: {a < b}")
-print(f"a == b: {a == b}")
-print(f"a >= b: {a >= b}")
-```
-
-## Special Value Functions
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecision, QuadPrecDType
-
-# Create array with special values
-arr = np.array([
-    QuadPrecision(1.0),
-    QuadPrecision("inf"),
-    QuadPrecision("-inf"),
-    QuadPrecision("nan")
-])
-
-print(f"isfinite: {np.isfinite(arr)}")
-print(f"isinf: {np.isinf(arr)}")
-print(f"isnan: {np.isnan(arr)}")
-```
-
-## Sign and Absolute Value
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-x = np.array([-3, -1, 0, 1, 3], dtype=QuadPrecDType())
-
-print(f"abs(x): {np.abs(x)}")
-print(f"sign(x): {np.sign(x)}")
-print(f"copysign(1, x): {np.copysign(1, x)}")
-```
-
-## Reduction Functions
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-arr = np.array([1, 2, 3, 4, 5], dtype=QuadPrecDType())
-
-print(f"sum: {np.sum(arr)}")
-print(f"prod: {np.prod(arr)}")
-print(f"mean: {np.mean(arr)}")
-print(f"min: {np.min(arr)}")
-print(f"max: {np.max(arr)}")
-```
-
-## Precision Demonstration
-
-The advantage of quad precision is evident in calculations that lose precision in float64:
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-# Computing 1 - cos(x) for small x loses precision in float64
-x_small = 1e-8
-
-# Float64
-result_f64 = 1 - np.cos(np.float64(x_small))
-print(f"1 - cos(1e-8) [float64]: {result_f64}")
-
-# Quad precision
-x_quad = np.array([x_small], dtype=QuadPrecDType())
-result_quad = 1 - np.cos(x_quad)
-print(f"1 - cos(1e-8) [quad]:    {result_quad[0]}")
-
-# Theoretical value: x^2/2 ≈ 5e-17
-print(f"Theoretical (x²/2):      5e-17")
-```
diff --git a/quaddtype/docs/user_guide/index.md b/quaddtype/docs/user_guide/index.md
deleted file mode 100644
index 7333b0b6..00000000
--- a/quaddtype/docs/user_guide/index.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# User Guide
-
-This guide covers the core functionality and usage patterns of NumPy QuadDType.
-
-```{toctree}
-:maxdepth: 2
-
-precision
-arrays
-backends
-functions
-constants
-threading
-performance
-```
diff --git a/quaddtype/docs/user_guide/performance.md b/quaddtype/docs/user_guide/performance.md
deleted file mode 100644
index fb9564b7..00000000
--- a/quaddtype/docs/user_guide/performance.md
+++ /dev/null
@@ -1,263 +0,0 @@
-# Performance Guide
-
-Quad precision arithmetic is inherently slower than double precision due to the increased complexity of 128-bit operations. This guide helps you maximize performance while maintaining precision.
-
-## Performance Overview
-
-### Relative Performance
-
-As a general guideline, quad precision operations are approximately:
-
-| Operation Type | Slowdown vs float64 |
-|----------------|---------------------|
-| Basic arithmetic (+, -, *, /) | 5-20× |
-| Transcendental (sin, exp, log) | 10-50× |
-| Array reductions (sum, mean) | 5-15× |
-| Memory operations | 2× (due to size) |
-
-```{note}
-Actual performance varies significantly based on hardware, compiler optimizations, 
-and the specific operations being performed.
-```
-
-## Optimization Strategies
-
-### 1. Use Vectorized Operations
-
-Always prefer NumPy's vectorized operations over Python loops:
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-import time
-
-arr = np.arange(10000, dtype=QuadPrecDType())
-
-# ❌ Slow: Python loop
-def slow_sum(arr):
-    total = arr[0]
-    for x in arr[1:]:
-        total = total + x
-    return total
-
-# ✅ Fast: Vectorized
-def fast_sum(arr):
-    return np.sum(arr)
-
-# Benchmark
-start = time.time()
-slow_result = slow_sum(arr)
-slow_time = time.time() - start
-
-start = time.time()
-fast_result = fast_sum(arr)
-fast_time = time.time() - start
-
-print(f"Loop time: {slow_time:.4f}s")
-print(f"Vectorized time: {fast_time:.4f}s")
-print(f"Speedup: {slow_time/fast_time:.1f}×")
-```
-
-### 2. Minimize Type Conversions
-
-Avoid repeated conversions between precisions:
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-# ❌ Avoid: Repeated conversions
-def bad_approach(float64_arr):
-    results = []
-    for x in float64_arr:
-        quad_x = np.array([x], dtype=QuadPrecDType())
-        results.append(np.sin(quad_x)[0])
-    return results
-
-# ✅ Better: Convert once
-def good_approach(float64_arr):
-    quad_arr = float64_arr.astype(QuadPrecDType())
-    return np.sin(quad_arr)
-```
-
-### 3. Use In-Place Operations When Possible
-
-In-place operations avoid memory allocation:
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-arr = np.ones(10000, dtype=QuadPrecDType())
-
-# ❌ Creates new array
-arr = arr * 2
-
-# ✅ In-place modification (when supported)
-np.multiply(arr, 2, out=arr)
-```
-
-### 4. Control Threading
-
-Adjust thread count based on workload:
-
-```python
-from numpy_quaddtype import set_num_threads, get_num_threads
-
-# For small arrays, single thread may be faster (less overhead)
-set_num_threads(1)
-
-# For large arrays, use multiple threads
-set_num_threads(4)
-```
-
-### 5. Consider Mixed Precision
-
-Use quad precision only where needed:
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-def mixed_precision_calculation(data):
-    """Use quad precision only for sensitive calculations."""
-    
-    # Rough computation in float64 (fast)
-    rough_result = np.sum(data)
-    
-    # Precise refinement in quad (slower, but only for final step)
-    quad_data = data.astype(QuadPrecDType())
-    precise_result = np.sum(quad_data)
-    
-    return precise_result
-```
-
-## Memory Considerations
-
-### Memory Usage
-
-QuadPrecDType uses 16 bytes per element:
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-n = 1_000_000
-
-f64_arr = np.zeros(n, dtype=np.float64)
-quad_arr = np.zeros(n, dtype=QuadPrecDType())
-
-print(f"float64: {f64_arr.nbytes / 1e6:.1f} MB")
-print(f"quad:    {quad_arr.nbytes / 1e6:.1f} MB")
-```
-
-### Memory-Efficient Patterns
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-
-# Process data in chunks to limit memory usage
-def process_large_dataset(data, chunk_size=100000):
-    results = []
-    for i in range(0, len(data), chunk_size):
-        chunk = data[i:i+chunk_size].astype(QuadPrecDType())
-        result = np.sum(np.sin(chunk))
-        results.append(result)
-    return np.sum(results)
-```
-
-## Benchmarking Your Code
-
-### Simple Timing
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-import time
-
-def benchmark(func, arr, iterations=10):
-    """Benchmark a function."""
-    # Warmup
-    func(arr)
-    
-    start = time.time()
-    for _ in range(iterations):
-        func(arr)
-    elapsed = time.time() - start
-    
-    return elapsed / iterations
-
-arr = np.random.randn(100000).astype(QuadPrecDType())
-
-funcs = [
-    ("sum", lambda x: np.sum(x)),
-    ("sin", lambda x: np.sin(x)),
-    ("exp", lambda x: np.exp(x / 100)),
-    ("dot", lambda x: np.dot(x, x)),
-]
-
-for name, func in funcs:
-    avg_time = benchmark(func, arr)
-    print(f"{name}: {avg_time*1000:.2f} ms")
-```
-
-### Comparison with float64
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-import time
-
-n = 100000
-iterations = 100
-
-# Create test data
-f64_arr = np.random.randn(n)
-quad_arr = f64_arr.astype(QuadPrecDType())
-
-operations = [
-    ("Addition", lambda x: x + x),
-    ("Multiplication", lambda x: x * x),
-    ("Division", lambda x: x / (x + 1)),
-    ("Sin", lambda x: np.sin(x)),
-    ("Exp", lambda x: np.exp(x / n)),
-    ("Sum", lambda x: np.sum(x)),
-]
-
-print(f"{'Operation':<15} {'float64 (ms)':<15} {'quad (ms)':<15} {'Slowdown':<10}")
-print("-" * 55)
-
-for name, op in operations:
-    # float64 timing
-    start = time.time()
-    for _ in range(iterations):
-        op(f64_arr)
-    f64_time = (time.time() - start) / iterations * 1000
-    
-    # quad timing
-    start = time.time()
-    for _ in range(iterations):
-        op(quad_arr)
-    quad_time = (time.time() - start) / iterations * 1000
-    
-    slowdown = quad_time / f64_time
-    print(f"{name:<15} {f64_time:<15.3f} {quad_time:<15.3f} {slowdown:<10.1f}×")
-```
-
-## When to Use Quad Precision
-
-### Use Quad Precision For:
-
-- ✅ Final validation of numerical algorithms
-- ✅ Ill-conditioned linear algebra problems
-- ✅ High-precision requirements (regulatory, scientific)
-- ✅ Accumulating many small values (Kahan summation alternative)
-- ✅ Reference implementations
-
-### Consider Alternatives For:
-
-- ⚠️ Real-time applications
-- ⚠️ Processing very large datasets
-- ⚠️ When float64 precision is sufficient
-- ⚠️ GPU computations (no quad support)
diff --git a/quaddtype/docs/user_guide/precision.md b/quaddtype/docs/user_guide/precision.md
deleted file mode 100644
index feb58702..00000000
--- a/quaddtype/docs/user_guide/precision.md
+++ /dev/null
@@ -1,128 +0,0 @@
-# Understanding Quad Precision
-
-## What is Quad Precision?
-
-Quad precision (also known as quadruple precision or binary128) is a floating-point format defined by the IEEE 754 standard. It provides significantly higher precision than the commonly used double precision (float64).
-
-## Precision Comparison
-
-| Format | Bits | Sign | Exponent | Mantissa | Decimal Digits |
-|--------|------|------|----------|----------|----------------|
-| Single (float32) | 32 | 1 | 8 | 23 | ~7 |
-| Double (float64) | 64 | 1 | 11 | 52 | ~15-16 |
-| **Quad (float128)** | **128** | **1** | **15** | **112** | **~33-34** |
-
-## Demonstrating the Precision Difference
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecision, pi
-
-# Standard double precision π
-pi_float64 = np.float64(np.pi)
-print(f"float64 π: {pi_float64}")
-
-# Quad precision π
-print(f"quad    π: {pi}")
-
-# The actual value of π to 50 decimal places:
-# 3.14159265358979323846264338327950288419716939937510...
-```
-
-### Practical Example: Computing e
-
-Let's compute Euler's number using the series expansion $e = \sum_{n=0}^{\infty} \frac{1}{n!}$:
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecision, QuadPrecDType
-
-def compute_e_quad(terms=50):
-    """Compute e using quad precision."""
-    result = QuadPrecision(0)
-    factorial = QuadPrecision(1)
-    
-    for n in range(terms):
-        if n > 0:
-            factorial = factorial * n
-        result = result + QuadPrecision(1) / factorial
-    
-    return result
-
-def compute_e_float64(terms=50):
-    """Compute e using float64."""
-    result = np.float64(0)
-    factorial = np.float64(1)
-    
-    for n in range(terms):
-        if n > 0:
-            factorial = factorial * n
-        result = result + 1.0 / factorial
-    
-    return result
-
-e_quad = compute_e_quad(30)
-e_float64 = compute_e_float64(30)
-
-print(f"e (quad):    {e_quad}")
-print(f"e (float64): {e_float64}")
-```
-
-## When to Use Quad Precision
-
-### ✅ Good Use Cases
-
-1. **Ill-conditioned Problems**: When numerical instability affects results
-2. **Reference Implementations**: Validating lower-precision algorithms
-3. **Financial Calculations**: When regulatory compliance requires high precision
-4. **Scientific Research**: Astronomy, physics simulations
-5. **Cryptographic Applications**: Where precision is critical
-
-### ⚠️ Consider Alternatives
-
-1. **Performance-Critical Code**: Quad precision is slower than float64
-2. **Large Datasets**: Memory usage is 2x compared to float64
-3. **Simple Calculations**: When float64 precision is sufficient
-
-## Memory Layout
-
-QuadPrecision values are stored as 128-bit (16 bytes) values in memory:
-
-```
-┌─────────┬─────────────────┬────────────────────────────────────────────────────┐
-│  Sign   │    Exponent     │                     Mantissa                        │
-│  1 bit  │    15 bits      │                    112 bits                         │
-└─────────┴─────────────────┴────────────────────────────────────────────────────┘
-```
-
-## Special Values
-
-QuadPrecision supports all IEEE 754 special values:
-
-```python
-from numpy_quaddtype import QuadPrecision
-import numpy as np
-
-# Infinity
-pos_inf = QuadPrecision("inf")
-neg_inf = QuadPrecision("-inf")
-
-# NaN (Not a Number)
-nan = QuadPrecision("nan")
-
-# Check special values
-print(f"Is inf: {np.isinf(pos_inf)}")
-print(f"Is nan: {np.isnan(nan)}")
-print(f"Is finite: {np.isfinite(QuadPrecision(1.0))}")
-```
-
-## Precision Limits
-
-```python
-from numpy_quaddtype import epsilon, smallest_normal, smallest_subnormal, max_value
-
-print(f"Machine epsilon:      {epsilon}")
-print(f"Smallest normal:      {smallest_normal}")
-print(f"Smallest subnormal:   {smallest_subnormal}")
-print(f"Maximum value:        {max_value}")
-```
diff --git a/quaddtype/docs/user_guide/threading.md b/quaddtype/docs/user_guide/threading.md
deleted file mode 100644
index cb1b40ce..00000000
--- a/quaddtype/docs/user_guide/threading.md
+++ /dev/null
@@ -1,195 +0,0 @@
-# Threading and Parallelism
-
-NumPy QuadDType is designed to be thread-safe and supports Python's free-threading (GIL-free) mode introduced in Python 3.13.
-
-## Thread Safety
-
-All QuadDType operations are thread-safe:
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-from concurrent.futures import ThreadPoolExecutor
-
-def compute_sum(arr):
-    """Thread-safe computation."""
-    return np.sum(arr)
-
-# Create shared array
-arr = np.arange(1000, dtype=QuadPrecDType())
-
-# Run computations in parallel
-with ThreadPoolExecutor(max_workers=4) as executor:
-    # Split array and compute sums in parallel
-    chunks = np.array_split(arr, 4)
-    futures = [executor.submit(compute_sum, chunk) for chunk in chunks]
-    results = [f.result() for f in futures]
-    
-total = sum(results)
-print(f"Total sum: {total}")
-```
-
-## QuadBLAS Threading Control
-
-NumPy QuadDType uses QuadBLAS for optimized linear algebra operations. You can control the number of threads used:
-
-```python
-from numpy_quaddtype import set_num_threads, get_num_threads, get_quadblas_version
-
-# Check QuadBLAS version
-version = get_quadblas_version()
-if version:
-    print(f"QuadBLAS version: {version}")
-else:
-    print("QuadBLAS not available (DISABLE_QUADBLAS was set)")
-
-# Get current thread count
-current_threads = get_num_threads()
-print(f"Current threads: {current_threads}")
-
-# Set thread count
-set_num_threads(4)
-print(f"Threads after setting: {get_num_threads()}")
-
-# Use single thread for reproducibility
-set_num_threads(1)
-```
-
-```{note}
-QuadBLAS is disabled on Windows builds due to MSVC compatibility issues.
-Use `get_quadblas_version()` to check if it's available.
-```
-
-## Free-Threading Support (Python 3.13+)
-
-NumPy QuadDType fully supports Python's experimental free-threading mode (GIL-free Python).
-
-### Checking Free-Threading Mode
-
-```python
-import sys
-
-if hasattr(sys, '_is_gil_enabled'):
-    if sys._is_gil_enabled():
-        print("Running with GIL enabled")
-    else:
-        print("Running in free-threaded mode (no GIL)")
-else:
-    print("Free-threading not available (Python < 3.13)")
-```
-
-### Using Free-Threading
-
-When running with free-threading enabled, true parallel execution is possible:
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType
-import threading
-
-results = []
-lock = threading.Lock()
-
-def parallel_compute(arr):
-    """Compute in parallel."""
-    result = np.sum(np.sin(arr))
-    with lock:
-        results.append(result)
-
-# Create arrays for parallel processing
-arrays = [np.arange(i * 1000, (i + 1) * 1000, dtype=QuadPrecDType()) 
-          for i in range(4)]
-
-# Run in parallel threads
-threads = [threading.Thread(target=parallel_compute, args=(arr,)) 
-           for arr in arrays]
-
-for t in threads:
-    t.start()
-for t in threads:
-    t.join()
-
-print(f"Results from {len(results)} threads: {results}")
-```
-
-## Building with Thread Sanitizer
-
-For development and testing thread safety, you can build with Thread Sanitizer (TSan):
-
-### Prerequisites
-
-```bash
-# Use clang compiler
-export CC=clang
-export CXX=clang++
-```
-
-### Build Steps
-
-1. Build CPython with TSan support (see [Python Free-Threading Guide](https://py-free-threading.github.io/thread_sanitizer/))
-
-2. Build NumPy with TSan:
-   ```bash
-   pip install "numpy @ git+https://github.com/numpy/numpy" \
-       -C'setup-args=-Db_sanitize=thread'
-   ```
-
-3. Build SLEEF with TSan:
-   ```bash
-   cmake \
-       -DCMAKE_C_COMPILER=clang \
-       -DCMAKE_C_FLAGS="-fsanitize=thread -g -O1" \
-       -DSLEEF_BUILD_QUAD=ON \
-       -S sleef -B sleef/build
-   
-   cmake --build sleef/build -j
-   sudo cmake --install sleef/build --prefix=/usr/local
-   ```
-
-4. Build numpy-quaddtype with TSan:
-   ```bash
-   export CFLAGS="-fsanitize=thread -g -O0"
-   export CXXFLAGS="-fsanitize=thread -g -O0"
-   export LDFLAGS="-fsanitize=thread"
-   
-   pip install . -vv --no-build-isolation \
-       -Csetup-args=-Db_sanitize=thread
-   ```
-
-## Best Practices
-
-### Do's
-
-- ✅ Use `set_num_threads()` to control parallelism
-- ✅ Use thread-local storage for intermediate results when needed
-- ✅ Test with TSan during development
-- ✅ Use proper synchronization for shared mutable state
-
-### Don'ts
-
-- ❌ Don't assume operations are atomic
-- ❌ Don't modify arrays while other threads are reading them
-- ❌ Don't ignore thread sanitizer warnings
-
-## Performance Considerations
-
-```python
-import numpy as np
-from numpy_quaddtype import QuadPrecDType, set_num_threads
-import time
-
-arr = np.random.randn(100000).astype(QuadPrecDType())
-
-# Benchmark with different thread counts
-for threads in [1, 2, 4, 8]:
-    set_num_threads(threads)
-    
-    start = time.time()
-    for _ in range(10):
-        result = np.sum(arr)
-    elapsed = time.time() - start
-    
-    print(f"Threads: {threads}, Time: {elapsed:.3f}s")
-```
-
-The optimal thread count depends on your specific workload and hardware.
diff --git a/quaddtype/pyproject.toml b/quaddtype/pyproject.toml
index 31c6dd40..c942deda 100644
--- a/quaddtype/pyproject.toml
+++ b/quaddtype/pyproject.toml
@@ -44,6 +44,7 @@ docs = [
     "myst-parser",
     "sphinx-design",
     "sphinx-copybutton",
+    "sphinxcontrib-katex",
 ]
 
 [project.urls]

From 6bd5d2fb4036c4408fac97af4d0be3422f17cbfd Mon Sep 17 00:00:00 2001
From: Juniper Tyree <juniper.tyree@helsinki.fi>
Date: Fri, 16 Jan 2026 10:44:47 +0200
Subject: [PATCH 4/4] fixups

---
 quaddtype/docs/api/constants_api.md | 30 ++++++++++++++---------------
 quaddtype/docs/api/core.md          | 16 +++------------
 quaddtype/docs/api/functions.md     |  8 ++++----
 quaddtype/docs/index.md             | 20 +++++++++----------
 4 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/quaddtype/docs/api/constants_api.md b/quaddtype/docs/api/constants_api.md
index d97f4ed0..351674d9 100644
--- a/quaddtype/docs/api/constants_api.md
+++ b/quaddtype/docs/api/constants_api.md
@@ -7,9 +7,9 @@ Pre-defined mathematical constants with quad precision accuracy.
 ```{eval-rst}
 .. data:: numpy_quaddtype.pi
 
-   The mathematical constant :math:`pi` (pi).
+   The mathematical constant :math:`\pi` (pi).
 
-   Value: 3.14159265358979323846264338327950288...
+   :value: 3.14159265358979323846264338327950288...
 
    :type: QuadPrecision
 
@@ -17,7 +17,7 @@ Pre-defined mathematical constants with quad precision accuracy.
 
    Euler's number :math:`e`, the base of natural logarithms.
 
-   Value: 2.71828182845904523536028747135266249...
+   :value: 2.71828182845904523536028747135266249...
 
    :type: QuadPrecision
 
@@ -25,7 +25,7 @@ Pre-defined mathematical constants with quad precision accuracy.
 
    The base-2 logarithm of :math:`e`: :math:`\log_{2}{e}`.
 
-   Value: 1.44269504088896340735992468100189213...
+   :value: 1.44269504088896340735992468100189213...
 
    :type: QuadPrecision
 
@@ -33,23 +33,23 @@ Pre-defined mathematical constants with quad precision accuracy.
 
    The base-10 logarithm of :math:`e`: :math:`\log_{10}{e}`.
 
-   Value: 0.43429448190325182765112891891660508...
+   :value: 0.43429448190325182765112891891660508...
 
    :type: QuadPrecision
 
 .. data:: numpy_quaddtype.ln2
 
-   The natural logarithm of 2: :math:`\ln(2)`.
+   The natural logarithm of 2: :math:`\log_{e}{2}`.
 
-   Value: 0.69314718055994530941723212145817656...
+   :value: 0.69314718055994530941723212145817656...
 
    :type: QuadPrecision
 
 .. data:: numpy_quaddtype.ln10
 
-   The natural logarithm of 10: :math:`\ln(10)`.
+   The natural logarithm of 10: :math:`\log_{e}{10}`.
 
-   Value: 2.30258509299404568401799145468436420...
+   :value: 2.30258509299404568401799145468436420...
 
    :type: QuadPrecision
 ```
@@ -61,7 +61,7 @@ Pre-defined mathematical constants with quad precision accuracy.
 
    Machine epsilon: the smallest positive number such that :math:`1.0 + \epsilon \neq 1.0`.
 
-   :math:`2^{-112}` or approximately :math:`1.93 \cdot 10^{-34}`.
+   :value: :math:`2^{-112}` or approximately :math:`1.93 \cdot 10^{-34}`
 
    :type: QuadPrecision
 
@@ -69,9 +69,9 @@ Pre-defined mathematical constants with quad precision accuracy.
 
    The largest representable finite quad-precision value.
 
-   :math:`216383 \cdot (2 - 2^{-112})` or approximately :math:`1.19 \cdot 10^{4932}`.
+   The largest negative representable finite quad-precision value is ``-numpy_quaddtype.max_value``.
 
-   The largest negative representable finite quad-precision value is `-numpy_quaddtype.max_value`.
+   :value: :math:`216383 \cdot (2 - 2^{-112})` or approximately :math:`1.19 \cdot 10^{4932}`
 
    :type: QuadPrecision
 
@@ -79,7 +79,7 @@ Pre-defined mathematical constants with quad precision accuracy.
 
    The smallest positive normal (normalized, mantissa has a leading 1 bit) quad-precision value.
 
-   :math:`2^{-16382} \cdot (1 - 2^{-112})` or approximately :math:`3.36 \cdot 10^{-4932}`.
+   :value: :math:`2^{-16382} \cdot (1 - 2^{-112})` or approximately :math:`3.36 \cdot 10^{-4932}`
 
    :type: QuadPrecision
 
@@ -87,7 +87,7 @@ Pre-defined mathematical constants with quad precision accuracy.
 
    The smallest positive subnormal (denormalized, mantissa has a leading 0 bit) quad-precision value.
 
-   :math:`2^{-16494}` or approximately :math:`6.48 \cdot 10^{-4966}`.
+   :value: :math:`2^{-16494}` or approximately :math:`6.48 \cdot 10^{-4966}`
 
    :type: QuadPrecision
 
@@ -95,7 +95,7 @@ Pre-defined mathematical constants with quad precision accuracy.
 
    The approximate decimal resolution of quad precision, i.e. `10 ** (-precision)`.
 
-   :math:`10^{-33}`.
+   :value: :math:`10^{-33}`
 
    :type: QuadPrecision
 ```
diff --git a/quaddtype/docs/api/core.md b/quaddtype/docs/api/core.md
index 12c540a2..8fa6107f 100644
--- a/quaddtype/docs/api/core.md
+++ b/quaddtype/docs/api/core.md
@@ -2,7 +2,7 @@
 
 The fundamental types provided by NumPy QuadDType.
 
-## QuadPrecision
+## Quad Precision Value
 
 ```{eval-rst}
 .. class:: numpy_quaddtype.QuadPrecision(value, backend="sleef")
@@ -63,7 +63,7 @@ The fundamental types provided by NumPy QuadDType.
       The imaginary part (always zero for QuadPrecision).
 ```
 
-## QuadPrecDType
+## Quad Precision DType
 
 ```{eval-rst}
 .. class:: numpy_quaddtype.QuadPrecDType(backend="sleef")
@@ -92,7 +92,7 @@ The fundamental types provided by NumPy QuadDType.
    .. attribute:: backend
       :type: QuadBackend
 
-      The computation backend (``QuadBackend.SLEEF`` or ``QuadBackend.LONGDOUBLE``).
+      The computation backend (``SLEEF`` or ``LONGDOUBLE``).
 
    .. attribute:: itemsize
       :type: int
@@ -110,8 +110,6 @@ The fundamental types provided by NumPy QuadDType.
       The string name of the dtype (``"QuadPrecDType128"``).
 ```
 
-## QuadBackend
-
 ```{eval-rst}
 .. class:: numpy_quaddtype.QuadBackend
 
@@ -140,8 +138,6 @@ The fundamental types provided by NumPy QuadDType.
 
 ## Convenience Functions
 
-### SleefQuadPrecision
-
 ```{eval-rst}
 .. function:: numpy_quaddtype.SleefQuadPrecision(value)
 
@@ -154,8 +150,6 @@ The fundamental types provided by NumPy QuadDType.
    :rtype: QuadPrecision
 ```
 
-### LongDoubleQuadPrecision
-
 ```{eval-rst}
 .. function:: numpy_quaddtype.LongDoubleQuadPrecision(value)
 
@@ -168,8 +162,6 @@ The fundamental types provided by NumPy QuadDType.
    :rtype: QuadPrecision
 ```
 
-### SleefQuadPrecDType
-
 ```{eval-rst}
 .. function:: numpy_quaddtype.SleefQuadPrecDType()
 
@@ -181,8 +173,6 @@ The fundamental types provided by NumPy QuadDType.
    :rtype: QuadPrecDType
 ```
 
-### LongDoubleQuadPrecDType
-
 ```{eval-rst}
 .. function:: numpy_quaddtype.LongDoubleQuadPrecDType()
 
diff --git a/quaddtype/docs/api/functions.md b/quaddtype/docs/api/functions.md
index c6fa93e8..c5329558 100644
--- a/quaddtype/docs/api/functions.md
+++ b/quaddtype/docs/api/functions.md
@@ -60,8 +60,8 @@ NumPy QuadDType supports a comprehensive set of NumPy universal functions (ufunc
 
 | Function | Description |
 |----------|-------------|
-| `np.exp` | Exponential (:math:`e^x`) |
-| `np.exp2` | Base-2 exponential (:math:`2^x`) |
+| `np.exp` | Exponential ({math}`e^x`) |
+| `np.exp2` | Base-2 exponential ({math}`2^x`) |
 | `np.expm1` | `exp(x) - 1` (accurate for small x) |
 
 ## Element-wise Logarithmic Functions
@@ -77,10 +77,10 @@ NumPy QuadDType supports a comprehensive set of NumPy universal functions (ufunc
 
 | Function | Description |
 |----------|-------------|
-| `np.square` | Square (:math:`x^2`) |
+| `np.square` | Square ({math}`x^2`) |
 | `np.sqrt` | Square root |
 | `np.cbrt` | Cube root |
-| `np.hypot` | Hypotenuse (:math:`\sqrt{x^2 + y^2}`) |
+| `np.hypot` | Hypotenuse ({math}`\sqrt{x^2 + y^2}`) |
 
 ## Element-wise Rounding Functions
 
diff --git a/quaddtype/docs/index.md b/quaddtype/docs/index.md
index 9df8ffad..e097fff9 100644
--- a/quaddtype/docs/index.md
+++ b/quaddtype/docs/index.md
@@ -18,22 +18,22 @@ NumPy QuadDType provides IEEE 754 quadruple-precision (binary128) floating-point
 :gutter: 2
 
 :::{grid-item-card} 🎯 True Quad Precision
-:link: user_guide/precision
-:link-type: doc
+<!--- :link: user_guide/precision
+:link-type: doc  -->
 
 128-bit floating point with ~34 decimal digits of precision
 :::
 
 :::{grid-item-card} 🔌 NumPy Integration
-:link: user_guide/arrays
-:link-type: doc
+<!--- :link: user_guide/arrays
+:link-type: doc  -->
 
 Works seamlessly with NumPy arrays, ufuncs, and broadcasting.
 :::
 
 :::{grid-item-card} ⚡ SIMD Optimized
-:link: user_guide/performance
-:link-type: doc
+<!--- :link: user_guide/performance
+:link-type: doc  -->
 
 Vectorization-friendly design that can leverage SIMD acceleration where supported.
 :::
@@ -46,15 +46,15 @@ Full suite of math functions: trigonometric, exponential, logarithmic, and more.
 :::
 
 :::{grid-item-card} 🔀 Dual Backend
-:link: user_guide/backends
-:link-type: doc
+<!--- :link: user_guide/backends
+:link-type: doc  -->
 
 Choose between SLEEF (default) or native longdouble backends.
 :::
 
 :::{grid-item-card} 🧵 Thread-Safe
-:link: user_guide/threading
-:link-type: doc
+<!--- :link: user_guide/threading
+:link-type: doc  -->
 
 Full support for Python's free-threading (GIL-free) mode.
 :::