Skip to content

Commit

Permalink
Merge pull request #19 from kaaveland/add-hints
Browse files Browse the repository at this point in the history
Output some hints for common migration issues
  • Loading branch information
kaaveland authored May 5, 2024
2 parents 4e0f668 + f36609f commit ecfb596
Show file tree
Hide file tree
Showing 4 changed files with 312 additions and 140 deletions.
135 changes: 135 additions & 0 deletions src/hints.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
use crate::output::FullSqlStatementLockTrace;

#[derive(Debug, Eq, PartialEq, Clone)]
pub struct Hint {
pub code: &'static str,
pub name: &'static str,
pub help: String,
}

fn add_new_valid_constraint(sql_statement_trace: &FullSqlStatementLockTrace) -> Option<Hint> {
let cons = sql_statement_trace
.new_constraints
.iter()
.find(|constraint| constraint.valid);

cons.map(|constraint| Hint {
name: "Validating table with a new constraint",
code: "validate_constraint_with_lock",
help: format!(
"A new constraint `{}` was added to the table `{}`. \
The constraint is of type `{}` and is valid. The statement blocks until all rows \
in the table are validated for the constraint. It is safer to add constraints as `NOT VALID` \
and validate them later, to avoid holding dangerous locks for a long time. Constraints that are `NOT VALID` \
affect all new inserts and updates, but not existing data. Adding the constraint initially as `NOT VALID`, then \
validating with `ALTER TABLE ... VALIDATE CONSTRAINT ...` in a later transaction minimizes time spent holding \
dangerous locks.",
constraint.name,
constraint.table_name,
constraint.constraint_type,
),
})
}

fn make_column_not_nullable(sql_statement_trace: &FullSqlStatementLockTrace) -> Option<Hint> {
let columns = sql_statement_trace
.altered_columns
.iter()
.find(|column| !column.new.nullable && column.old.nullable);

columns.map(|column| Hint {
name: "Validating table with a new `NOT NULL` column",
code: "make_column_not_nullable_with_lock",
help: format!(
"The column `{}` in the table `{}.{}` was changed to `NOT NULL`. \
The statement blocks until all rows in the table are validated to be `NOT NULL`, unless a \
`CHECK ({} IS NOT NULL)` constraint exists, in which case it is safe. \
Split this type of change into steps:\n\n \
1. Add a `CHECK ({} IS NOT NULL) NOT VALID;` constraint.\n\
2. Validate the constraint in a later transaction, with `ALTER TABLE ... VALIDATE CONSTRAINT`.\n\
3. Make the column `NOT NULL`\n",
column.new.column_name,
column.new.schema_name,
column.new.table_name,
column.new.column_name,
column.new.column_name,
),
})
}

fn add_json_column(sql_statement_trace: &FullSqlStatementLockTrace) -> Option<Hint> {
let columns = sql_statement_trace
.new_columns
.iter()
.find(|column| column.data_type == "json");

columns.map(|column| Hint {
name: "Validating table with a new JSON column",
code: "add_json_column",
help: format!(
"A new column `{}` of type `json` was added to the table `{}.{}`. The `json` type does not \
support the equality operator, so this can break all `SELECT DISTINCT` queries on the table. \
Use the `jsonb` type instead.",
column.column_name,
column.schema_name,
column.table_name,
),
})
}

fn running_statement_while_holding_access_exclusive(
sql_statement_trace: &FullSqlStatementLockTrace,
) -> Option<Hint> {
sql_statement_trace
.locks_at_start
.iter()
.find(|lock| lock.mode == "AccessExclusiveLock")
.map(|lock| Hint {
name: "Running more statements after taking `AccessExclusiveLock`",
code: "holding_access_exclusive",
help: format!(
"The statement is running while holding an `AccessExclusiveLock` on the {} `{}{}`, \
blocking all other transactions from accessing it. \
Once holding `AccessExclusiveLock` we should immediately commit the transaction. \
Any extra steps necessary are better done in a separate transaction.",
lock.relkind, lock.schema, lock.object_name,
),
})
}

fn type_change_requires_table_rewrite(
sql_statement_trace: &FullSqlStatementLockTrace,
) -> Option<Hint> {
sql_statement_trace
.altered_columns
.iter()
// TODO: This is not true for all type changes, eg. cidr -> inet is safe
// TODO: The check is also not sufficient, since varchar(10) -> varchar(20) is safe, but the opposite isn't
.find(|column| column.new.data_type != column.old.data_type)
.map(|column| Hint {
name: "Type change requiring table rewrite",
code: "type_change_requires_table_rewrite",
help: format!(
"The column `{}` in the table `{}.{}` was changed from type `{}` to `{}`. \
This change requires a full table rewrite, which can be slow on large tables. \
Consider adding a new column with the new type, updating it in batches, and then dropping the old column.",
column.new.column_name,
column.new.schema_name,
column.new.table_name,
column.old.data_type,
column.new.data_type,
),
})
}

pub type HintCheck = Box<dyn Fn(&FullSqlStatementLockTrace) -> Option<Hint>>;

pub fn checks() -> Vec<HintCheck> {
vec![
Box::new(add_new_valid_constraint),
Box::new(make_column_not_nullable),
Box::new(add_json_column),
Box::new(running_statement_while_holding_access_exclusive),
Box::new(type_change_requires_table_rewrite),
]
}
9 changes: 8 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,21 @@ use postgres::{Client, NoTls};
use crate::sqltext::{read_sql_statements, resolve_placeholders, sql_statements};
use crate::tracing::{trace_transaction, TxLockTracer};

/// Hints that can help avoid dangerous migrations, by minimizing time spent holding dangerous locks.
pub mod hints;
/// Generate output structures for lock traces and static data like lock modes.
/// This module is used by the binary to generate output in various formats is currently
/// the best documentation of output format, and can be considered a public api
/// for the library.
pub mod output;
/// Types that directly translate to postgres concepts like lock modes and relkinds.
pub mod pg_types;
/// Parse the postgres PGPASS file format.
pub mod pgpass;
/// Read and parse simple SQL scripts, resolve placeholders and break down into statements.
pub mod sqltext;
/// Trace locks taken by SQL statements.
/// Trace locks taken by SQL statements. Structures and data from here should be considered
/// internal to the crate, their fields are not part of the public API.
pub mod tracing;

/// Connection settings for connecting to a PostgreSQL database.
Expand Down
165 changes: 26 additions & 139 deletions src/output.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
use chrono::{DateTime, Local};
use serde::Serialize;

pub use output_format::{
Column, Constraint, FullSqlStatementLockTrace, FullTraceData, ModifiedColumn,
ModifiedConstraint, TracedLock,
};

use crate::output::markdown_helpers::{theader, trow};
use crate::pg_types::lock_modes::LockMode;
use crate::pg_types::locks::Lock;
use crate::tracing::tracer::ColumnMetadata;
use crate::tracing::{SqlStatementTrace, TxLockTracer};

/// Output types for the lock tracing library, exportable to JSON and public API.
///
/// The intention is to provide serialization and eventually deserialization for lock traces
/// using these record types.
pub mod output_format;

#[derive(Debug, Eq, PartialEq, Copy, Clone)]
pub struct Settings {
only_dangerous_locks: bool,
Expand Down Expand Up @@ -34,129 +43,14 @@ struct OutputContext {
duration_millis_so_far: u64,
}

#[derive(Debug, Eq, PartialEq, Clone, Serialize)]
struct TracedLock {
schema: String,
object_name: String,
mode: String,
relkind: &'static str,
oid: u32,
maybe_dangerous: bool,
blocked_queries: Vec<&'static str>,
blocked_ddl: Vec<&'static str>,
}

fn traced_lock_from(lock: &Lock) -> TracedLock {
TracedLock {
schema: lock.target().schema.to_string(),
object_name: lock.target().object_name.to_string(),
mode: lock.mode.to_db_str().to_string(),
relkind: lock.target().rel_kind.as_str(),
oid: lock.target().oid,
maybe_dangerous: lock.mode.dangerous(),
blocked_queries: lock.blocked_queries(),
blocked_ddl: lock.blocked_ddl(),
}
}

#[derive(Debug, Eq, PartialEq, Clone, Serialize)]
struct Column {
schema_name: String,
table_name: String,
column_name: String,
data_type: String,
nullable: bool,
}

impl From<&ColumnMetadata> for Column {
fn from(meta: &ColumnMetadata) -> Self {
Column {
schema_name: meta.schema_name.clone(),
table_name: meta.table_name.clone(),
column_name: meta.column_name.clone(),
data_type: meta.typename.clone(),
nullable: meta.nullable,
}
}
}

#[derive(Debug, Eq, PartialEq, Clone, Serialize)]
struct ModifiedColumn {
old: Column,
new: Column,
}

impl From<&crate::tracing::tracer::ModifiedColumn> for ModifiedColumn {
fn from(meta: &crate::tracing::tracer::ModifiedColumn) -> Self {
ModifiedColumn {
old: Column::from(&meta.old),
new: Column::from(&meta.new),
}
}
}

#[derive(Debug, Eq, PartialEq, Clone, Serialize)]
pub struct Constraint {
schema_name: String,
table_name: String,
name: String,
constraint_name: String,
constraint_type: &'static str,
valid: bool,
definition: Option<String>,
}

impl From<&crate::tracing::tracer::Constraint> for Constraint {
fn from(constraint: &crate::tracing::tracer::Constraint) -> Self {
Constraint {
schema_name: constraint.schema_name.clone(),
table_name: constraint.table_name.clone(),
name: constraint.name.clone(),
constraint_name: constraint.name.clone(),
constraint_type: constraint.constraint_type.to_display(),
valid: constraint.valid,
definition: constraint.expression.clone(),
}
}
}

#[derive(Debug, Eq, PartialEq, Clone, Serialize)]
pub struct ModifiedConstraint {
old: Constraint,
new: Constraint,
}

impl From<&crate::tracing::tracer::ModifiedConstraint> for ModifiedConstraint {
fn from(meta: &crate::tracing::tracer::ModifiedConstraint) -> Self {
ModifiedConstraint {
old: Constraint::from(&meta.old),
new: Constraint::from(&meta.new),
}
}
}

#[derive(Debug, Eq, PartialEq, Clone, Serialize)]
struct FullSqlStatementLockTrace {
statement_number_in_transaction: usize,
sql: String,
duration_millis: u64,
start_time_millis: u64,
locks_at_start: Vec<TracedLock>,
new_locks_taken: Vec<TracedLock>,
new_columns: Vec<Column>,
altered_columns: Vec<ModifiedColumn>,
new_constraints: Vec<Constraint>,
altered_constraints: Vec<ModifiedConstraint>,
}

impl OutputContext {
fn output_statement(&mut self, statement: &SqlStatementTrace) -> FullSqlStatementLockTrace {
let locks_at_start = self.held_locks_context.clone();
let new_locks_taken: Vec<_> = statement
.locks_taken
.iter()
.filter(|lock| !self.hide_lock(lock))
.map(traced_lock_from)
.map(TracedLock::from)
.filter(|lock| !locks_at_start.contains(lock))
.collect();
let result = FullSqlStatementLockTrace {
Expand Down Expand Up @@ -206,16 +100,6 @@ impl OutputContext {
}
}

#[derive(Debug, Eq, PartialEq, Clone, Serialize)]
pub struct FullTraceData {
name: Option<String>,
#[serde(with = "datefmt")]
start_time: DateTime<Local>,
total_duration_millis: u64,
all_locks_acquired: Vec<TracedLock>,
statements: Vec<FullSqlStatementLockTrace>,
}

pub fn full_trace_data(trace: &TxLockTracer, output_settings: Settings) -> FullTraceData {
let mut context = OutputContext::new(output_settings);
let mut statements = vec![];
Expand Down Expand Up @@ -344,6 +228,20 @@ impl FullTraceData {
}
}
result.push('\n');

let hints = crate::hints::checks()
.iter()
.filter_map(|check| check(statement))
.collect::<Vec<_>>();
if !hints.is_empty() {
result.push_str("### Hints\n\n");
for hint in hints {
result.push_str(&format!(
"#### {}\n\nID: `{}`\n\n{}\n\n",
hint.name, hint.code, hint.help
));
}
}
result
}

Expand Down Expand Up @@ -443,17 +341,6 @@ mod markdown_helpers {
}
}

mod datefmt {
use chrono::{DateTime, Local};

pub fn serialize<S>(date: &DateTime<Local>, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_str(&date.to_rfc3339())
}
}

#[derive(Serialize, Debug, Eq, PartialEq)]
pub struct TerseLockMode<'a> {
lock_mode: &'a str,
Expand Down
Loading

0 comments on commit ecfb596

Please sign in to comment.