Skip to content

Commit

Permalink
Update current_catches query and test data
Browse files Browse the repository at this point in the history
  • Loading branch information
VincentAntoine committed Dec 11, 2024
1 parent bd9f3f3 commit 5eaf3e9
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 35 deletions.
148 changes: 114 additions & 34 deletions datascience/src/pipeline/queries/monitorfish/current_catches.sql
Original file line number Diff line number Diff line change
@@ -1,33 +1,82 @@
WITH deleted_or_corrected_messages AS (
WITH acknowledged_messages AS (
SELECT referenced_report_id
FROM public.logbook_reports
WHERE operation_type IN ('DEL', 'COR')
AND operation_datetime_utc > CURRENT_TIMESTAMP - INTERVAL '6 months'
-- exclude VisioCapture (which is not real time but has several months of delay) from current_catches
AND (software IS NULL OR software NOT LIKE '%VISIOCaptures%')
FROM logbook_reports
WHERE
operation_datetime_utc >= CURRENT_TIMESTAMP - INTERVAL '3 months'
AND operation_datetime_utc < CURRENT_TIMESTAMP + INTERVAL '6 hours'
AND operation_type ='RET'
AND value->>'returnStatus' = '000'
),

deleted_messages AS (
SELECT
operation_number,
referenced_report_id
FROM logbook_reports
WHERE
operation_datetime_utc >= CURRENT_TIMESTAMP - INTERVAL '3 months'
AND operation_datetime_utc < CURRENT_TIMESTAMP + INTERVAL '6 hours'
AND operation_type ='DEL'
),

acknowledged_deleted_messages AS (
SELECT referenced_report_id
FROM deleted_messages
WHERE
operation_number IN (SELECT referenced_report_id FROM acknowledged_messages)
),

corrected_messages AS (
SELECT
referenced_report_id
FROM logbook_reports
WHERE
operation_datetime_utc >= CURRENT_TIMESTAMP - INTERVAL '3 months'
AND operation_datetime_utc < CURRENT_TIMESTAMP + INTERVAL '6 hours'
AND operation_type ='COR'
AND (
flag_state NOT IN ('FRA', 'GUF', 'VEN') -- Flag states for which we receive RET
OR report_id IN (SELECT referenced_report_id FROM acknowledged_messages)
)
),

ordered_deps AS (
SELECT
cfr,
ircs,
external_identification,
external_identification AS external_immatriculation,
trip_number,
(value->>'departureDatetimeUtc')::timestamptz AS departure_datetime_utc,
ROW_NUMBER() OVER(PARTITION BY cfr ORDER BY (value->>'departureDatetimeUtc')::timestamptz DESC) as rk
activity_datetime_utc AS departure_datetime_utc,
ROW_NUMBER() OVER (PARTITION BY cfr ORDER BY activity_datetime_utc DESC) as rk
FROM public.logbook_reports
WHERE log_type = 'DEP'
AND operation_datetime_utc > CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - INTERVAL '6 months'
AND (value->>'departureDatetimeUtc')::timestamptz < CURRENT_TIMESTAMP AT TIME ZONE 'UTC' + INTERVAL '24 hours'
AND report_id NOT IN (SELECT referenced_report_id FROM deleted_or_corrected_messages)
AND (software IS NULL OR software NOT LIKE '%VISIOCaptures%')
WHERE
log_type = 'DEP'
AND operation_datetime_utc > CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - INTERVAL '3 months'
AND activity_datetime_utc < CURRENT_TIMESTAMP AT TIME ZONE 'UTC' + INTERVAL '24 hours'
AND report_id NOT IN (SELECT referenced_report_id FROM corrected_messages)
AND NOT (
report_id IN (SELECT referenced_report_id FROM acknowledged_deleted_messages)
OR (
report_id IN (SELECT referenced_report_id FROM deleted_messages)
AND flag_state NOT IN ('FRA', 'GUF', 'VEN')
)
)
AND (
flag_state NOT IN ('FRA', 'GUF', 'VEN') -- Flag states for which we receive RET
OR report_id IN (SELECT referenced_report_id FROM acknowledged_messages)
)
-- Exclude data that is not real-time electronic logbook data
AND (
software IS NULL -- Non french vessels
OR software NOT LIKE '%VISIOCaptures%'
)
),

last_deps AS (
SELECT
cfr,
ircs,
external_identification,
external_immatriculation,
departure_datetime_utc,
trip_number
FROM ordered_deps
Expand All @@ -40,7 +89,7 @@ last_logbook_reports AS (
MAX(report_datetime_utc) AS last_logbook_message_datetime_utc
FROM public.logbook_reports
WHERE operation_type IN ('DAT', 'COR')
AND operation_datetime_utc > CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - INTERVAL '6 months'
AND operation_datetime_utc > CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - INTERVAL '3 months'
AND report_datetime_utc < CURRENT_TIMESTAMP AT TIME ZONE 'UTC' + INTERVAL '24 hours'
AND (software IS NULL OR software NOT LIKE '%VISIOCaptures%')
GROUP BY cfr
Expand All @@ -60,11 +109,22 @@ catches AS (
JOIN last_deps d
ON r.cfr = d.cfr
AND r.trip_number = d.trip_number
WHERE log_type = 'FAR'
AND operation_type IN ('DAT', 'COR')
AND operation_datetime_utc > CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - INTERVAL '6 months'
AND operation_number NOT IN (SELECT referenced_report_id FROM deleted_or_corrected_messages)
AND (software IS NULL OR software NOT LIKE '%VISIOCaptures%')
WHERE
log_type = 'FAR'
AND operation_datetime_utc > CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - INTERVAL '3 months'
AND report_id NOT IN (SELECT referenced_report_id FROM corrected_messages)
AND NOT (
report_id IN (SELECT referenced_report_id FROM acknowledged_deleted_messages)
OR (
report_id IN (SELECT referenced_report_id FROM deleted_messages)
AND r.flag_state NOT IN ('FRA', 'GUF', 'VEN')
)
)
AND (
flag_state NOT IN ('FRA', 'GUF', 'VEN') -- Flag states for which we receive RET
OR report_id IN (SELECT referenced_report_id FROM acknowledged_messages)
)
AND (software IS NULL OR software NOT LIKE '%VISIOCaptures%')
),

summed_catches AS (
Expand All @@ -73,9 +133,10 @@ summed_catches AS (
species,
gear,
fao_area,
mesh,
SUM(weight) as weight
FROM catches
GROUP BY cfr, species, gear, fao_area
GROUP BY cfr, species, gear, fao_area, mesh
),

gear_onboard AS (
Expand All @@ -93,21 +154,40 @@ gear_onboard AS (
)

SELECT
COALESCE(last_logbook_reports.cfr, last_deps.cfr) AS cfr,
ROW_NUMBER() OVER (ORDER BY COALESCE(l.cfr, last_deps.cfr), species) AS catch_id,
COALESCE(l.cfr, last_deps.cfr) AS cfr,
COALESCE(
v_cfr.id,
v_ircs.id,
v_ext.id
) AS vessel_id,
EXTRACT(YEAR from CURRENT_TIMESTAMP AT TIME ZONE 'UTC') AS year,
fao_area,
gear,
mesh,
species,
s.species_name,
weight,
COALESCE(
v_cfr.vessel_type,
v_ircs.vessel_type,
v_ext.vessel_type
) AS vessel_type,
last_deps.ircs,
last_deps.external_identification AS external_immatriculation,
last_logbook_reports.last_logbook_message_datetime_utc,
last_deps.external_immatriculation AS external_immatriculation,
l.last_logbook_message_datetime_utc,
departure_datetime_utc,
trip_number,
go.gear_onboard,
species,
gear,
fao_area,
weight
FROM last_logbook_reports
go.gear_onboard
FROM last_logbook_reports l
FULL OUTER JOIN last_deps
ON last_logbook_reports.cfr = last_deps.cfr
ON l.cfr = last_deps.cfr
LEFT JOIN summed_catches
ON last_logbook_reports.cfr = summed_catches.cfr
ON l.cfr = summed_catches.cfr
LEFT JOIN gear_onboard go
ON last_logbook_reports.cfr = go.cfr
ON l.cfr = go.cfr
LEFT JOIN species s
ON s.species_code = summed_catches.species
LEFT JOIN vessels v_cfr ON v_cfr.cfr = COALESCE(l.cfr, last_deps.cfr)
LEFT JOIN vessels v_ircs ON v_ircs.ircs = last_deps.ircs AND v_ircs.cfr IS NULL
LEFT JOIN vessels v_ext ON v_ext.external_immatriculation = last_deps.external_immatriculation AND v_ext.cfr IS NULL
Original file line number Diff line number Diff line change
Expand Up @@ -351,4 +351,27 @@ SET activity_datetime_utc = CASE
WHEN log_type = 'RTP' THEN (value->>'returnDatetimeUtc')::TIMESTAMPTZ AT TIME ZONE 'UTC'
ELSE NULL
END
WHERE log_type IS NOT NULL;
WHERE log_type IS NOT NULL;

-- Insert more RET
INSERT INTO logbook_raw_messages (operation_number, xml_message) VALUES
('23', '<ERS>Message RET xml</ERS>'),
('24', '<ERS>Message RET xml</ERS>'),
('25', '<ERS>Message RET xml</ERS>'),
('26', '<ERS>Message RET xml</ERS>'),
('27', '<ERS>Message RET xml</ERS>'),
('28', '<ERS>Message RET xml</ERS>'),
('29', '<ERS>Message RET xml</ERS>'),
('30', '<ERS>Message RET xml</ERS>');

INSERT INTO logbook_reports (
operation_number, operation_country, operation_datetime_utc, operation_type, referenced_report_id, value, integration_datetime_utc, transmission_format)
VALUES
( '23', 'OOF', ((now() AT TIME ZONE 'UTC') - INTERVAL '2 days')::TIMESTAMP, 'RET', '1', '{"returnStatus": "000"}', ((now() AT TIME ZONE 'UTC') - INTERVAL '2 days')::TIMESTAMP, 'ERS'),
( '24', 'OOF', ((now() AT TIME ZONE 'UTC') - INTERVAL '1 day 6 hours')::TIMESTAMP, 'RET', '2', '{"returnStatus": "000"}', ((now() AT TIME ZONE 'UTC') - INTERVAL '1 day 6 hours')::TIMESTAMP, 'ERS'),
( '25', 'OOF', ((now() AT TIME ZONE 'UTC') - INTERVAL '1 month 5 days')::TIMESTAMP, 'RET', '3', '{"returnStatus": "000"}', ((now() AT TIME ZONE 'UTC') - INTERVAL '1 month 5 days')::TIMESTAMP, 'ERS'),
( '26', 'OOF', ((now() AT TIME ZONE 'UTC') - INTERVAL '1 month 4 days')::TIMESTAMP, 'RET', '4', '{"returnStatus": "000"}', ((now() AT TIME ZONE 'UTC') - INTERVAL '1 month 4 days')::TIMESTAMP, 'ERS'),
( '27', 'OOF', ((now() AT TIME ZONE 'UTC') - INTERVAL '1 week 5 days')::TIMESTAMP, 'RET', '5', '{"returnStatus": "000"}', ((now() AT TIME ZONE 'UTC') - INTERVAL '1 week 5 days')::TIMESTAMP, 'ERS'),
( '28', 'OOF', ((now() AT TIME ZONE 'UTC') - INTERVAL '1 week 4 days')::TIMESTAMP, 'RET', '6', '{"returnStatus": "000"}', ((now() AT TIME ZONE 'UTC') - INTERVAL '1 week 4 days')::TIMESTAMP, 'ERS'),
( '29', 'OOF', ((now() AT TIME ZONE 'UTC') - INTERVAL '1 week 3 days')::TIMESTAMP, 'RET', '7', '{"returnStatus": "000"}', ((now() AT TIME ZONE 'UTC') - INTERVAL '1 week 3 days')::TIMESTAMP, 'ERS'),
( '30', 'OOF', ((now() AT TIME ZONE 'UTC') - INTERVAL '1 year 6 days')::TIMESTAMP, 'RET', '9', '{"returnStatus": "000"}', ((now() AT TIME ZONE 'UTC') - INTERVAL '1 year 6 days')::TIMESTAMP, 'ERS');

0 comments on commit 5eaf3e9

Please sign in to comment.