move_bases.py

#!/usr/bin/env python3

# Purpose:
# Move Airtable bases from one workspace to another workspace
# Bases are loaded from a CSV file, that was generated from the Airtable Web UI

import logging
logging.basicConfig()
log = logging.getLogger()
log.setLevel(logging.DEBUG)

from typing import List, Mapping, Optional, Tuple, Generator, Dict
import requests
import json
import csv
from os import environ
from sys import argv
from pathlib import Path

api_token = environ.get("AIRTABLE_API_TOKEN", None)
if not api_token:
  print("Please set your Airtable API token by typing 'export AIRTABLE_API_TOKEN=MYTOKEN'")
  exit(1)

headers = {"Authorization": f"Bearer {api_token}"}


class AirtableBase():
  """
  Represents an Airtable base (database). Fields are loaded from a row in
  a CSV file export generated by Airtable.
  """
  def __init__(self, row: Mapping) -> None:
    self.id = row.get("Base ID")
    self.name = row.get("Base name")
    self.collaborator_count = row.get("Collaborator count")
    self.collaborator_ids = row.get("Collaborator IDs")
    self.collaborator_emails = row.get("Collaborator emails")
    self.record_count = row.get("Record count")
    self.workspace_id = row.get("Workspace ID") # Workspace currently holding it
    self.workspace_name = row.get("Workspace name")
    self.created = row.get("Created (UTC)")

  def __repr__(self) -> str:
    return f"<{__class__.__name__}> {self.id}"


def move_base_to_workspace(
  base: AirtableBase,
  from_workspace_id: str,
  to_workspace_id: str
) -> None:
  """
  Use Airtable API to move base from a workspace Id to another workspace Id.
  """
  url = "https://api.airtable.com/v0/meta/workspaces/" + from_workspace_id + "/moveBase"
  body = {
    "baseId": base.id,
    "targetWorkspaceId": to_workspace_id,
  }
  log.info(
    f"Moving {base} from workspace {from_workspace_id} "
    f"to workspace {to_workspace_id}..."
  )
  try:
    response = requests.post(url, headers=headers, json=body)

    status_code = response.status_code
    if status_code != 200:
      log.info(f"Status: {response.status_code}. Headers: {response.headers}")

    try:
      json_response = response.json()
      if json_response:
        log.info(f"Got response: {json.dumps(json_response, indent=2)}")
    except:
      pass

    response.raise_for_status()

  except Exception as e:
    log.error(f"Error sending the request: {e}")
    raise


# Default to looking into the current working directory for now:
PROCESSED_IDS_FILE = Path() / "processed.txt"


def load_cached_processed() -> List[str]:
  """
  Load file on disk where previously migrated bases have been recorded.
  """
  # TODO ask user if they want to load or delete this file
  if PROCESSED_IDS_FILE.exists():
    print(f"{PROCESSED_IDS_FILE.name} exists, loading its base ids to avoid processing those again...")
    with open(PROCESSED_IDS_FILE, 'r') as f:
      return [line.rstrip() for line in f]
  return []


def yield_from_CSV(csv_file_path: Path, delimiter=',') -> Generator[Dict[str, str], None, None]:
  """
  Yield each line from a CSV file as a mapping of strings
  """
  with open(csv_file_path, 'r') as f:
    csv_reader = csv.DictReader(f, delimiter=delimiter)
    if not csv_reader:
      raise Exception(f"No data found in input file {csv_file_path.name}.")

    for row in csv_reader:
      yield row


def move_bases_from_csv(
  csv_path: Path,
  mode: str,
  target_ws: str
) -> None:
  """
  Read a CSV file (exported from Airtable's web UI) listing Aitable Bases,
  and depending on the mode, attempt to move each of these tables to or from
  a target workspace.
  Args:
    csv_path: Path. CSV file listing bases to move.
    mode: str. 'to' or 'from'
    target_ws: str. Id of the workspace to move to/from.
  """
  cached_processed = load_cached_processed()
  # bases_to_move = []
  moved_bases = []
  failed = []

  for row in yield_from_CSV(csv_path):
    base = AirtableBase(row)

    if base.id in cached_processed:
      # We have processed this one before and successfully moved it
      continue

    if int(base.record_count):
      print(f"Warning: found {base.record_count} records in base {base.id}.")
      continue

    if not base.workspace_id or not isinstance(base.workspace_id, str):
      print(f"Invalid origin workspace id: {base.workspace_id}. Skipping.")
      continue

    try:
      if mode == "to":
        move_base_to_workspace(
          base=base,
          from_workspace_id=base.workspace_id,
          to_workspace_id=target_ws
        )
      if mode == "from":
        move_base_to_workspace(
          base=base,
          from_workspace_id=target_ws,
          to_workspace_id=base.workspace_id
        )
    except requests.HTTPError as e:
      if e.errno == 422:
        print(f"Got 422 error. Headers: {e.response.headers}")
        # FIXME crash for now, perhaps the response headers might indicate time before retry?
        raise
    except Exception as e:
      print(f"Error while processing {base}: {e}")
      failed.append(base)
    else:
      moved_bases.append(base)
      with open(PROCESSED_IDS_FILE, 'a') as fp:
        fp.write(f"{base.id}\n")

  print(f"Sucessfully processed {len(moved_bases)} entries from {csv_path.name}.")
  if failed:
    print(f"Failed to process {len(failed)} base ids: {failed}.")


if __name__ == "__main__":
  if 4 < len(argv) <= 1:
    print(
      f"Usage: python3 {__file__.split('/')[-1]} \"path/to/file.csv\" [from|to] \"target_workspace_ID\"\n"
      "Using 'from' and a workspace ID means we'll move bases from that workspace "
      "to each workspace ID in the 'Workspace ID' column found in the CSV file.\n"
      "Using 'to' workspace ID means we'll move each entry to that workspace."
    )
    exit(1)

  # to/from
  mode = argv[2]
  modes = ("to", "from")
  if mode not in modes:
    print(f"Only '{modes[0]}' and '{modes[1]}' are valid second arguments.")
    exit(1)

  # TODO argparse
  # First argument should be path to CSV file to read
  input_csv_file = Path(argv[1])
  if not input_csv_file.exists():
    print(f"Input file {input_csv_file.name} was not found.")
    exit(1)

  # This is either the workspace to which we'll move the bases, OR the workspace
  # from which we'll move the bases (back into their original workspace).
  target_ws = argv[3]

  move_bases_from_csv(csv_path=input_csv_file, mode=mode, target_ws=target_ws)