Merge pull request #1 from shawndeggans/feature/setup

Checking in for first test
shawndeggans · Sep 23, 2024 · 3e51a99 · 3e51a99
2 parents f33f1df + b7e7c02
commit 3e51a99
Show file tree

Hide file tree

Showing 5 changed files with 216 additions and 0 deletions.
diff --git a/.github/workflows/docs_generation.yml b/.github/workflows/docs_generation.yml
@@ -0,0 +1,48 @@
+name: Generate Docs
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'notebooks/**'
+
+jobs:
+  convert-and-publish:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.x'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+
+    - name: Convert notebooks to markdown
+      run: |
+        for notebook in notebooks/*.ipynb; do
+          jupyter nbconvert --to markdown "$notebook" --output-dir docs/
+        done
+
+    - name: Update index page
+      run: |
+        echo "## Available Documentation:" >> docs/index.md
+        echo "" >> docs/index.md
+        for file in docs/*.md; do
+          if [ "$(basename "$file")" != "index.md" ]; then
+            echo "- [$(basename "$file" .md)]($(basename "$file"))" >> docs/index.md
+          fi
+        done
+
+    - name: Commit and push changes
+      run: |
+        git config --local user.email "action@github.com"
+        git config --local user.name "GitHub Action"
+        git add docs
+        git commit -m "Update documentation" || echo "No changes to commit"
+        git push
diff --git a/docs/_config.yml b/docs/_config.yml
@@ -0,0 +1,3 @@
+title: notebook_docs
+description: a repo to demonstrate how to create GitHub pages documentation from Jupyter notebooks
+theme: jekyll-theme-cayman
diff --git a/docs/index.md b/docs/index.md
@@ -0,0 +1,12 @@
+---
+layout: default
+title: Home
+---
+
+## Welcome to Notebook Documentation
+
+This site contains documentation for our jupyter notebooks.
+
+## Available Documentation
+
+(This list will be automatically populated)
diff --git a/notebooks/testnb.ipynb b/notebooks/testnb.ipynb
@@ -0,0 +1,151 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Import cleaning process\n",
+    "\n",
+    "The following notebook is part of our import cleaning process.\n",
+    "This notebook accomplishes the following:\n",
+    "- Imports a CSV file\n",
+    "- Removes extra columns\n",
+    "- Converts strings to correct data types\n",
+    "- Saves in the cleansed directory"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Pip install\n",
+    "%pip install pandas\n",
+    "\n",
+    "# Here we would import libraries\n",
+    "import sys\n",
+    "import pandas as pd\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import file from raw data folder\n",
+    "Here we import the file from the raw folder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Pseudo code for opening a file, importing a CSV, and loading it into pandas\n",
+    "\n",
+    "# Define the file path\n",
+    "file_path = 'path/to/your/csvfile.csv'\n",
+    "\n",
+    "# Use pandas to read the CSV file\n",
+    "df = pd.read_csv(file_path)\n",
+    "\n",
+    "# Display the first few rows of the dataframe\n",
+    "print(df.head())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Removes extra columns\n",
+    "Removing the address and phone fields"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# BEGIN: Remove extra columns\n",
+    "\n",
+    "# List of columns to remove\n",
+    "columns_to_remove = ['address', 'phone']\n",
+    "\n",
+    "# Remove the specified columns\n",
+    "df_cleaned = df.drop(columns=columns_to_remove)\n",
+    "\n",
+    "# Display the first few rows of the cleaned dataframe\n",
+    "print(df_cleaned.head())\n",
+    "\n",
+    "# END: Remove extra columns"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Set data types\n",
+    "Sets the correct datatypes for date and identity fields."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Pseudo code for setting data types\n",
+    "\n",
+    "# Convert the 'date_field' to datetime\n",
+    "df_cleaned['date_field'] = pd.to_datetime(df_cleaned['date_field'])\n",
+    "\n",
+    "# Convert the 'identity_field' to numeric (integer)\n",
+    "df_cleaned['identity_field'] = pd.to_numeric(df_cleaned['identity_field'], errors='coerce')\n",
+    "\n",
+    "# Display the data types of the dataframe to verify changes\n",
+    "print(df_cleaned.dtypes)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Save new data to a cleansed directory\n",
+    "Write the cleansed data from Pandas to a new CSV file in the cleansed folder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Pseudo code to write the cleansed data to a new CSV file in the cleansed folder\n",
+    "\n",
+    "# Define the output file path\n",
+    "output_file_path = 'path/to/cleansed/folder/cleansed_data.csv'\n",
+    "\n",
+    "# Use pandas to write the dataframe to a CSV file\n",
+    "df_cleaned.to_csv(output_file_path, index=False)\n",
+    "\n",
+    "# Confirm the file has been written\n",
+    "print(f\"Cleansed data has been written to {output_file_path}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.12.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,2 @@
+nbconvert
+jupyter