Skip to content

Commit

Permalink
Clean up type errors, minor bugs, edge cases (#95)
Browse files Browse the repository at this point in the history
* feat: Standardize app errors somewhat

* feat: Simplify error logic in users api

now handled automatically by error handler

* fix: Prevent unbound variable error

* lint: Add type guards to help checker

* refactor: Tweak db code to avoid typing errors

* feat: Add type hints to some json db cols

* feat: Make implicit db data munging explicit

* feat: Add more type guards, lint cleanups

* fix: Catch more type / sa v2 bugs

* feat: Handle db integ error w/ diff handler

* feat: Remove special db integ err handling in api

* fix: Type in db query, woof

* fix: The same typo, elsewhere

* fix: Give two db cols optional annots

i hope this is right ....
  • Loading branch information
bdewilde authored Dec 21, 2023
1 parent 569a4f2 commit 8ca9d4b
Show file tree
Hide file tree
Showing 17 changed files with 170 additions and 134 deletions.
12 changes: 3 additions & 9 deletions colandr/apis/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import flask_jwt_extended as jwtext
import sqlalchemy as sa
import sqlalchemy.exc as sa_exc
from flask import current_app, render_template, url_for
from flask_restx import Namespace, Resource
from marshmallow import fields as ma_fields
Expand All @@ -12,7 +13,7 @@
from .. import tasks
from ..extensions import db, jwt
from ..models import User
from .errors import db_integrity_error, forbidden_error, not_found_error
from .errors import forbidden_error, not_found_error
from .schemas import UserSchema
from .swagger import login_model, user_model

Expand Down Expand Up @@ -151,14 +152,6 @@ def post(self, args):
"password":"PASSWORD" \
}'
"""
existing_user = db.session.execute(
sa.select(User).filter_by(email=args["email"])
).scalar_one_or_none()
if existing_user is not None:
return db_integrity_error(
f"email={args['email']} already assigned to user in database"
)

user = User(**args)
db.session.add(user)
db.session.commit()
Expand Down Expand Up @@ -333,6 +326,7 @@ def user_lookup_callback(_jwt_header, jwt_data: dict) -> User:
"""
identity = jwt_data[current_app.config["JWT_IDENTITY_CLAIM"]]
user = db.session.get(User, identity)
assert user is not None # type guard
return user


Expand Down
6 changes: 1 addition & 5 deletions colandr/apis/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,12 @@ def not_found_error(error):

@api_v1.errorhandler(exceptions.InternalServerError)
def internal_server_error(error):
"""See also: :func:`colandr.errors.handle_error()"""
message = str(error)
current_app.logger.exception(message)
return _make_error_response(500, message)


def db_integrity_error(message):
current_app.logger.error(message)
return _make_error_response(422, message)


@webargs.flaskparser.parser.error_handler
def validation_error(error, req, schema, *, error_status_code, error_headers):
"""
Expand Down
4 changes: 2 additions & 2 deletions colandr/apis/health.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import redis
import redis.client
import sqlalchemy as sa
from celery import current_app as current_celery_app
from flask_restx import Namespace, Resource
Expand All @@ -17,7 +17,7 @@
class HealthResource(Resource):
@ns.doc(responses={200: "api is healthy"})
def get(self):
redis_conn = current_celery_app.backend.client
redis_conn = current_celery_app.backend.client # type: ignore
assert isinstance(redis_conn, redis.client.Redis) # type guard
redis_conn.ping()
_ = db.session.execute(sa.text("SELECT 1")).scalar()
Expand Down
11 changes: 7 additions & 4 deletions colandr/apis/resources/citation_screenings.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ def get(self, citation_id, user_id, review_id, status_counts):

if status_counts is True:
stmt = stmt.group_by(CitationScreening.status)
return dict(db.session.execute(stmt).all())
return {row.status: row.count for row in db.session.execute(stmt)}
else:
results = db.session.execute(stmt)
return ScreeningSchema(partial=True, many=True).dump(results)
Expand Down Expand Up @@ -451,13 +451,16 @@ def post(self, args, review_id, user_id):
db.session.commit()
current_app.logger.info("inserted %s fulltexts", len(fulltexts_to_insert))
# now update include/exclude counts on review
status_counts = db.session.execute(
status_counts_stmt = (
sa.select(Study.citation_status, db.func.count(1))
.filter_by(review_id=review_id, dedupe_status="not_duplicate")
.filter(Study.citation_status.in_(["included", "excluded"]))
.group_by(Study.citation_status)
).all()
status_counts = dict(status_counts)
)
status_counts: dict[str, int] = {
row.citation_status: row.count
for row in db.session.execute(status_counts_stmt)
} # type: ignore
n_included = status_counts.get("included", 0)
n_excluded = status_counts.get("excluded", 0)
review.num_citations_included = n_included
Expand Down
7 changes: 6 additions & 1 deletion colandr/apis/resources/citations.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,11 @@ def post(self, args, review_id, source_type, source_name, source_url, status):

# add the study
study = Study(
user_id=current_user.id, review_id=review_id, data_source_id=data_source.id
**{
"user_id": current_user.id,
"review_id": review_id,
"data_source_id": data_source.id,
}
)
if status is not None:
study.citation_status = status
Expand All @@ -268,6 +272,7 @@ def post(self, args, review_id, source_type, source_name, source_url, status):
citation = args
citation["review_id"] = review_id
citation = CitationSchema().load(citation) # this sanitizes the data
assert isinstance(citation, dict) # type guard
citation = Citation(study.id, **citation)
db.session.add(citation)
db.session.commit()
Expand Down
5 changes: 4 additions & 1 deletion colandr/apis/resources/data_extractions.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def delete(self, id, labels):
# in case of "full" deletion, update study's data_extraction_status
if not extracted_data.extracted_items:
study = db.session.get(Study, id)
assert study is not None # type guard
study.data_extraction_status = "not_started"
db.session.commit()
current_app.logger.info("deleted contents of %s", extracted_data)
Expand Down Expand Up @@ -150,6 +151,7 @@ def put(self, args, id):
current_user = jwtext.get_current_user()
# check current user authorization
extracted_data = db.session.get(DataExtraction, id)
assert extracted_data is not None # type guard
review_id = extracted_data.review_id
if not extracted_data:
return not_found_error(f"<DataExtraction(study_id={id})> not found")
Expand All @@ -161,6 +163,7 @@ def put(self, args, id):
f"{current_user} forbidden to modify extracted data for this study"
)
study = db.session.get(Study, id)
assert study is not None # type guard
if study.data_extraction_status == "finished":
return forbidden_error(
f'{extracted_data} already "finished", so can\'t be modified'
Expand All @@ -173,7 +176,7 @@ def put(self, args, id):
f"<ReviewPlan({review_id})> does not have a data extraction form"
)
labels_map = {
item["label"]: (item["field_type"], set(item.get("allowed_values", [])))
item["label"]: (item["field_type"], set(item.get("allowed_values", []))) # type: ignore
for item in data_extraction_form[0]
}
# manually validate inputs, given data extraction form specification
Expand Down
4 changes: 4 additions & 0 deletions colandr/apis/resources/exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ def get(self, review_id, content_type):
export_data = fileio.tabular.write_stream(
fieldnames, rows, quoting=csv.QUOTE_NONNUMERIC
)
else:
raise NotImplementedError("only 'text/csv' content type is available")

response = make_response(export_data, 200)
response.headers["Content-type"] = content_type
Expand Down Expand Up @@ -234,6 +236,8 @@ def get(self, review_id, content_type):
export_data = fileio.tabular.write_stream(
fieldnames, rows, quoting=csv.QUOTE_NONNUMERIC
)
else:
raise NotImplementedError("only 'text/csv' content type is available")

response = make_response(export_data, 200)
response.headers["Content-type"] = content_type
Expand Down
20 changes: 6 additions & 14 deletions colandr/apis/resources/fulltext_screenings.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,24 +450,16 @@ def post(self, args, review_id, user_id):
"inserted %s data extractions", len(data_extractions_to_insert)
)
# now update include/exclude counts on review
status_counts = db.session.execute(
status_counts_stmt = (
sa.select(Study.fulltext_status, db.func.count(1))
.filter_by(review_id=review_id)
.filter(Study.fulltext_status.in_(["included", "excluded"]))
.group_by(Study.fulltext_status)
).all()
status_counts = dict(status_counts)
review.num_fulltexts_included = status_counts.get("included", 0)
review.num_fulltexts_excluded = status_counts.get("excluded", 0)
db.session.commit()
# now update include/exclude counts on review
status_counts = db.session.execute(
sa.select(Study.fulltext_status, db.func.count(1))
.filter_by(review_id=review_id)
.filter(Study.fulltext_status.in_(["included", "excluded"]))
.group_by(Study.fulltext_status)
).all()
status_counts = dict(status_counts)
)
status_counts: dict[str, int] = {
row.fulltext_status: row.count
for row in db.session.execute(status_counts_stmt)
} # type: ignore
review.num_fulltexts_included = status_counts.get("included", 0)
review.num_fulltexts_excluded = status_counts.get("excluded", 0)
db.session.commit()
6 changes: 6 additions & 0 deletions colandr/apis/resources/fulltext_uploads.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class FulltextUploadResource(Resource):
def get(self, id, review_id):
"""get fulltext content file for a single fulltext by id"""
current_user = jwtext.get_current_user()
upload_dir = None
filename = None
if review_id is None:
for dirname, _, filenames in os.walk(
Expand Down Expand Up @@ -104,6 +105,8 @@ def get(self, id, review_id):
break
if not filename:
return not_found_error(f"no uploaded file for <Fulltext(id={id})> found")

assert upload_dir is not None # type guard
return send_from_directory(upload_dir, filename)

@ns.doc(
Expand Down Expand Up @@ -177,6 +180,9 @@ def post(self, id, uploaded_file):
# stderr=subprocess.STDOUT,
# )
text_content = fileio.pdf.read(filepath).encode("utf-8")
else:
raise ValueError(f"filepath '{filepath}' suffix '{ext} is not .txt or .pdf")

fulltext.text_content = ftfy.fix_text(text_content.decode(errors="ignore"))
db.session.commit()
current_app.logger.info(
Expand Down
53 changes: 30 additions & 23 deletions colandr/apis/resources/review_exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,18 @@ def get(self, id):
):
return forbidden_error(f"{current_user} forbidden to get this review")
# get counts by step, i.e. prisma
n_studies_by_source = dict(
db.session.execute(
sa.select(DataSource.source_type, db.func.sum(Import.num_records))
.filter(Import.data_source_id == DataSource.id)
.filter(Import.review_id == id)
.group_by(DataSource.source_type)
).all()
n_studies_by_source_stmt = (
sa.select(DataSource.source_type, db.func.sum(Import.num_records))
.filter(Import.data_source_id == DataSource.id)
.filter(Import.review_id == id)
.group_by(DataSource.source_type)
)

n_studies_by_source = {
row.source_type: row.sum
for row in db.session.execute(n_studies_by_source_stmt)
}

n_unique_studies = db.session.execute(
sa.select(sa.func.count()).select_from(
sa.select(Study)
Expand All @@ -78,26 +81,30 @@ def get(self, id):
)
).scalar_one()

n_citations_by_status = dict(
db.session.execute(
sa.select(Study.citation_status, db.func.count(1))
.filter(Study.review_id == id)
.filter(Study.citation_status.in_(["included", "excluded"]))
.group_by(Study.citation_status)
).all()
n_citations_by_status_stmt = (
sa.select(Study.citation_status, db.func.count(1))
.filter(Study.review_id == id)
.filter(Study.citation_status.in_(["included", "excluded"]))
.group_by(Study.citation_status)
)
n_citations_screened = sum(n_citations_by_status.values())
n_citations_by_status = {
row.citation_status: row.count
for row in db.session.execute(n_citations_by_status_stmt)
}
n_citations_screened = sum(n_citations_by_status.values()) # type: ignore
n_citations_excluded = n_citations_by_status.get("excluded", 0)

n_fulltexts_by_status = dict(
db.session.execute(
sa.select(Study.fulltext_status, db.func.count(1))
.filter(Study.review_id == id)
.filter(Study.fulltext_status.in_(["included", "excluded"]))
.group_by(Study.fulltext_status)
).all()
n_fulltexts_by_status_stmt = (
sa.select(Study.fulltext_status, db.func.count(1))
.filter(Study.review_id == id)
.filter(Study.fulltext_status.in_(["included", "excluded"]))
.group_by(Study.fulltext_status)
)
n_fulltexts_screened = sum(n_fulltexts_by_status.values())
n_fulltexts_by_status = {
row.fulltext_status: row.count
for row in db.session.execute(n_fulltexts_by_status_stmt)
}
n_fulltexts_screened = sum(n_fulltexts_by_status.values()) # type: ignore
n_fulltexts_excluded = n_fulltexts_by_status.get("excluded", 0)

results = db.session.execute(
Expand Down
Loading

0 comments on commit 8ca9d4b

Please sign in to comment.