Skip to content

Commit

Permalink
update feed_info and other tables separately
Browse files Browse the repository at this point in the history
Add validity checks
  • Loading branch information
fitnr committed Dec 30, 2021
1 parent 5af3751 commit fd6245c
Show file tree
Hide file tree
Showing 19 changed files with 372 additions and 289 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
GTFS: tests/data/${{ matrix.gtfs }}.zip
- run: make add_constraints || echo "could not add constraints"
- name: Run pgTAP tests
run: prove -f --exec 'psql -A -t -v schema=gtfs -f' tests/test-*.sql
run: make test
- run: psql -c "TABLE gtfs.feed_info"
- run: psql -c "SELECT feed_index, trip_id, stop_id, stop_sequence, shape_dist_traveled FROM gtfs.stop_times LIMIT 10"
- run: psql -c "SELECT feed_index, shape_id, length FROM gtfs.shape_geoms LIMIT 10"
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright (c) 2017 Neil Freeman
Copyright (c) 2017-2021 Neil Freeman
Copyright (c) 2010 Colin Bick, Robert Damphousse

Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down
31 changes: 20 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ TABLES = stop_times trips routes \

SCHEMA = gtfs

psql = $(strip psql -v schema=$(SCHEMA) $(PSQLFLAGS))
psql = $(strip psql -v schema=$(SCHEMA))

.PHONY: all load vacuum init clean \
test check truncate \
drop_constraints add_constraints \
drop_indices add_indices \
add_triggers drop_triggers
add_triggers drop_triggers \
$(addprefix load-,$(TABLES))

all:

Expand All @@ -24,13 +26,14 @@ add_constraints add_indices add_triggers: add_%: sql/%.sql
drop_indices drop_constraints drop_triggers: drop_%: sql/drop_%.sql
$(psql) -f $<

check:
$(psql) -f sql/violations.sql
load: $(addprefix load-,$(TABLES))

load: $(GTFS)
[[ -z "$$(psql -Atc "select feed_index from $(SCHEMA).feed_info where feed_file = '$(GTFS)'")" ]] && \
$(SHELL) src/load.sh $(GTFS) $(SCHEMA)
@$(psql) -F' ' -tAc "SELECT 'loaded feed with index: ', feed_index FROM $(SCHEMA).feed_info WHERE feed_file = '$(GTFS)'"
$(filter-out load-feed_info,$(addprefix load-,$(TABLES))): load-%: load-feed_info | $(GTFS)
$(SHELL) src/load.sh $| $(SCHEMA) $*
@$(psql) -t -A -c "SELECT 'loaded $(SCHEMA).$* with feed index: ' || feed_index::text FROM $(SCHEMA).feed_info WHERE feed_file = '$|'"

load-feed_info: | $(GTFS) ## Insert row into feed_index, if necessary
$(SHELL) ./src/load_feed_info.sh $| $(SCHEMA)

vacuum: ; $(psql) -c "VACUUM ANALYZE"

Expand All @@ -43,12 +46,18 @@ else
$(error "make clean" requires FEED_INDEX)
endif

ifdef FEED_INDEX
check: ; prove -v --exec 'psql -qAt -v schema=$(SCHEMA) -v feed_index=$(FEED_INDEX) -f' $(wildcard tests/validity/*.sql)
endif

test: ; prove -j5 -f --exec 'psql -qAt -v schema=$(SCHEMA) -f' $(wildcard tests/test-*.sql)

truncate:
for t in $(TABLES); do \
echo "TRUNCATE TABLE $(SCHEMA).$$t RESTART IDENTITY CASCADE;"; done \
| $(psql) -1

init: sql/schema.sql
$(psql) -f $<
$(psql) -c "\copy $(SCHEMA).route_types FROM 'data/route_types.txt'"
$(psql) -f sql/constraints.sql
$(psql) -v ON_ERROR_STOP=on -f $<
$(psql) -v ON_ERROR_STOP=on -c "\copy $(SCHEMA).route_types FROM 'data/route_types.txt'"
$(psql) -v ON_ERROR_STOP=on -f sql/constraints.sql
21 changes: 17 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Import GTFS data into a PostgreSQL database. Includes all the constraints in the

## Requirements

* Postgresql database (9.5+) with a PostGIS (2.2+) extension
* Postgresql database (10+) with a PostGIS (2.2+) extension

## Links

Expand Down Expand Up @@ -60,14 +60,27 @@ GTFS data is regularly updated, and it's reasonable to want to include multiple
Most GTFS data has errors in it, so you may encounter an error when running the step above.
Common errors include missing `service_id`s, which cause foreign key errors. To load data despite these violations, remove contraints with `make drop_constraints`. Then load the data and try repair the data. When you're ready, restore the constraints with `make add_constraints`.

### General violation checking
### General validity checking

The `check` task will run the script `sql/violations.sql`, which will perform several queries looking for rows that violate foreign key constraints and bad geometries in the `shapes` table.
The `check` task will run the scripts in `tests/validity`, which will perform several queries looking for rows that violate foreign key constraints and bad geometries in the `shapes` table. These tests require `prove`, a very common perl testing program and [pgTAP](https://pgtap.org), a Postgresql testing suite. Install it in a new `tap` schema with:
```bash
wget https://api.pgxn.org/dist/pgtap/1.2.0/pgtap-1.2.0.zip
unzip pgtap-1.2.0.zip
make -C pgtap-1.2.0 sql/pgtap.sql
PGOPTIONS=--search_path=tap,public psql -c "CREATE SCHEMA tap" -f pgtap-1.2.0/sql/pgtap.sql
```
make check
Then run the check task, giving the index of the feed to check:
```
make check FEED_INDEX=1
```

The resulting report will tell you which tables have constraint violations, and what the errors are. You may wish to manually add missing values to your tables.

If you don't have `prove` available, try another [TAP consumer](http://testanything.org/consumers.html). Failing that, you can run the tests with:
```bash
find tests -name '*.sql' -print -exec psql -Aqt -v schema=gtfs -f {} \;
```

### Extra columns

The loading script checks for extra columns in a GTFS table and adds them to database as `text` columns. You may wish to alter or remove these columns.
Expand Down
4 changes: 2 additions & 2 deletions sql/constraints.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@ ALTER TABLE routes
REFERENCES route_types (route_type);

ALTER TABLE routes
ADD CONSTRAINT routes_fkey
ADD CONSTRAINT routes_agency_id_fkey
FOREIGN KEY (feed_index, agency_id)
REFERENCES agency (feed_index, agency_id);

-- calendar_dates

ALTER TABLE calendar_dates
ADD CONSTRAINT calendar_fkey
ADD CONSTRAINT calendar_dates_service_id_fkey
FOREIGN KEY (feed_index, service_id)
REFERENCES calendar (feed_index, service_id);

Expand Down
4 changes: 2 additions & 2 deletions sql/drop_constraints.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ ALTER TABLE stops
-- :schema.routes

ALTER TABLE :schema.routes
DROP CONSTRAINT routes_fkey CASCADE;
DROP CONSTRAINT routes_agency_id_fkey CASCADE;
ALTER TABLE :schema.routes
DROP CONSTRAINT route_types_fkey CASCADE;

Expand All @@ -20,7 +20,7 @@ ALTER TABLE :schema.fare_attributes
-- :schema.calendar_dates

ALTER TABLE :schema.calendar_dates
DROP CONSTRAINT calendar_fkey CASCADE;
DROP CONSTRAINT calendar_dates_service_id_fkey CASCADE;

-- :schema.fare_rules

Expand Down
29 changes: 27 additions & 2 deletions sql/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,31 @@ CREATE TABLE calendar (
);
CREATE INDEX calendar_service_id ON calendar (service_id);

CREATE OR REPLACE FUNCTION feed_date_update()
RETURNS TRIGGER AS $$
BEGIN
UPDATE feed_info fi SET
feed_start_date = CASE WHEN feed_start_date IS NULL THEN start_date ELSE feed_start_date END,
feed_end_date = CASE WHEN feed_end_date IS NULL THEN end_date ELSE feed_end_date END
FROM (
SELECT feed_index, MIN(start_date) start_date, MAX(end_date) end_date
FROM inserted
GROUP BY 1
) a
WHERE fi.feed_index = a.feed_index
AND (fi.feed_start_date IS NULL OR fi.feed_end_date IS NULL);
RETURN NULL;
END;
$$ LANGUAGE plpgsql
SET search_path = :schema, public;

COMMENT ON FUNCTION feed_date_update IS
'Update start/end dates in feed_info after inserting info calendar_dates. Do not overwrite existing dates';

CREATE TRIGGER calendar_trigger AFTER INSERT ON calendar
REFERENCING NEW TABLE AS inserted
FOR EACH STATEMENT EXECUTE PROCEDURE feed_date_update();

CREATE TABLE stops (
feed_index int NOT NULL REFERENCES feed_info (feed_index),
stop_id text,
Expand Down Expand Up @@ -157,7 +182,7 @@ CREATE TABLE routes (
route_color text,
route_text_color text,
route_sort_order integer default null,
-- CONSTRAINT routes_fkey FOREIGN KEY (feed_index, agency_id)
-- CONSTRAINT routes_agency_id_fkey FOREIGN KEY (feed_index, agency_id)
-- REFERENCES agency (feed_index, agency_id),
CONSTRAINT routes_pkey PRIMARY KEY (feed_index, route_id)
);
Expand All @@ -167,7 +192,7 @@ CREATE TABLE calendar_dates (
service_id text,
date date not null,
exception_type int REFERENCES exception_types(exception_type),
-- CONSTRAINT calendar_fkey FOREIGN KEY (feed_index, service_id)
-- CONSTRAINT calendar_dates_service_id_fkey FOREIGN KEY (feed_index, service_id)
-- REFERENCES calendar (feed_index, service_id),
CONSTRAINT calendar_dates_pkey PRIMARY KEY (feed_index, service_id, date)
);
Expand Down
Loading

0 comments on commit fd6245c

Please sign in to comment.