Skip to content

Commit

Permalink
Add MediaExcerpt Justification bases (#425)
Browse files Browse the repository at this point in the history
* Add MediaExcerpt-based Justifications
* Add text fragment to MediaExcerpt links
* Fix URL substring validation

---------

Signed-off-by: Carl Gieringer <78054+carlgieringer@users.noreply.github.com>
  • Loading branch information
carlgieringer authored Jun 21, 2023
1 parent c6a9c66 commit 9ef3e45
Show file tree
Hide file tree
Showing 41 changed files with 785 additions and 200 deletions.
118 changes: 117 additions & 1 deletion howdju-client-common/lib/location.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
import { getCanonicalUrl } from "./location";
import {
brandedParse,
UrlLocatorRef,
UrlLocatorView,
UrlRef,
utcNow,
} from "howdju-common";
import { getCanonicalUrl, toUrlWithFragment } from "./location";

describe("getCanonicalUrl", () => {
it("should return the canonical URL by link rel=canonical", () => {
Expand All @@ -14,3 +21,112 @@ describe("getCanonicalUrl", () => {
expect(getCanonicalUrl()).toBe("https://example.com/canonical");
});
});

describe("toUrlWithFragment", () => {
it("should return the URL with the fragment", () => {
const urlLocator: UrlLocatorView = brandedParse(UrlLocatorRef, {
id: "url-locator-id",
url: brandedParse(UrlRef, {
id: "url-id",
url: "https://example.com",
}),
anchors: [
{
exactText: "the exact text",
prefixText: "the prefix text",
suffixText: "the suffix text",
startOffset: 0,
endOffset: 1,
urlLocatorId: "url-locator-id",
created: utcNow(),
creatorUserId: "creator-user-id",
},
],
});
expect(toUrlWithFragment(urlLocator)).toBe(
"https://example.com/#:~:text=the%20exact%20text"
);
});
it("is compatible with an existing document fragment", () => {
const urlLocator: UrlLocatorView = brandedParse(UrlLocatorRef, {
id: "url-locator-id",
url: brandedParse(UrlRef, {
id: "url-id",
url: "https://example.com#some-heading",
}),
anchors: [
{
exactText: "the exact text",
prefixText: "the prefix text",
suffixText: "the suffix text",
startOffset: 0,
endOffset: 1,
urlLocatorId: "url-locator-id",
created: utcNow(),
creatorUserId: "creator-user-id",
},
],
});
expect(toUrlWithFragment(urlLocator)).toBe(
"https://example.com/#some-heading:~:text=the%20exact%20text"
);
});
it("supports multiple anchors", () => {
const urlLocator: UrlLocatorView = brandedParse(UrlLocatorRef, {
id: "url-locator-id",
url: brandedParse(UrlRef, {
id: "url-id",
url: "https://example.com",
}),
anchors: [
{
exactText: "the exact text",
prefixText: "the prefix text",
suffixText: "the suffix text",
startOffset: 0,
endOffset: 1,
urlLocatorId: "url-locator-id",
created: utcNow(),
creatorUserId: "creator-user-id",
},
{
exactText: "the exact text 2",
prefixText: "the prefix text 2",
suffixText: "the suffix text 2",
startOffset: 2,
endOffset: 3,
urlLocatorId: "url-locator-id",
created: utcNow(),
creatorUserId: "creator-user-id",
},
],
});
expect(toUrlWithFragment(urlLocator)).toBe(
"https://example.com/#:~:text=the%20exact%20text&text=the%20exact%20text%202"
);
});
it("overwrites an existing text fragment", () => {
const urlLocator: UrlLocatorView = brandedParse(UrlLocatorRef, {
id: "url-locator-id",
url: brandedParse(UrlRef, {
id: "url-id",
url: "https://example.com#:~:text=some%20previous%20fragment",
}),
anchors: [
{
exactText: "the exact text",
prefixText: "the prefix text",
suffixText: "the suffix text",
startOffset: 0,
endOffset: 1,
urlLocatorId: "url-locator-id",
created: utcNow(),
creatorUserId: "creator-user-id",
},
],
});
expect(toUrlWithFragment(urlLocator)).toBe(
"https://example.com/#:~:text=the%20exact%20text"
);
});
});
39 changes: 39 additions & 0 deletions howdju-client-common/lib/location.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { logger, UrlLocatorView } from "howdju-common";
import { isUndefined } from "lodash";

export function urlEquivalent(
Expand Down Expand Up @@ -47,3 +48,41 @@ export function getCurrentUrl() {
export function getCanonicalOrCurrentUrl() {
return getCanonicalUrl() || getCurrentUrl();
}

export function toUrlWithFragment(
urlLocator: UrlLocatorView,
// TODO(427) fix prefix/suffix to be Chrome-compatible.
useContext = false
) {
// https://example.com#:~:text=[prefix-,]textStart[,textEnd][,-suffix]&...
const urlObj = new URL(urlLocator.url.url);
// TODO(38) what to do if the hash already contains a fragment? Overwrite it? We should probably
// remove fragments from the URL before saving it to the database.
if (urlObj.hash.includes(":~:")) {
logger.error(`URL ${urlLocator.url.url} already contains a fragment.`);
}
// For now, just ignore the hash if it already contains a fragment.
const hash = urlObj.hash.includes(":~:") ? "" : urlObj.hash.replace(/^#/, "");
const textFragments = urlLocator.anchors?.map((a) => {
const parts = [];
if (useContext && a.prefixText) {
parts.push(cleanTextFragmentPart(a.prefixText) + "-");
}
parts.push(cleanTextFragmentPart(a.exactText));
if (useContext && a.suffixText) {
parts.push("-" + cleanTextFragmentPart(a.suffixText));
}
return `text=${parts.join(",")}`;
});
const fragmentHash = textFragments?.length
? `#${hash}:~:${textFragments.join("&")}`
: hash
? `#${hash}`
: "";
urlObj.hash = fragmentHash;
return urlObj.toString();
}

function cleanTextFragmentPart(fragmentPart: string) {
return encodeURIComponent(fragmentPart.replace(/\n/g, ""));
}
3 changes: 3 additions & 0 deletions howdju-common/lib/apiModels.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ export const ExternalJustificationSearchFilters = [
"writId",
// Justifications based on this PropositionCompound
"propositionCompoundId",
"mediaExcerptId",
"sourceExcerptParaphraseId",
// Justifications based on this proposition in a PropositionCompound
"propositionId",
Expand All @@ -144,3 +145,5 @@ export interface SortDescription {
}

export type PersorgOut = Persisted<Persorg>;

export type TagOut = Persisted<Tag>;
3 changes: 2 additions & 1 deletion howdju-common/lib/contextTrails.ts
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,9 @@ export function areValidTargetAndConnectingEntity(
prev.entity.basis.entity.atoms,
(a) => a.entity.id === id
);
case "SOURCE_EXCERPT":
case "WRIT_QUOTE":
// TODO(20): when we add Appearances, connect them to MediaExcerpts here.
case "MEDIA_EXCERPT":
return false;
}
}
Expand Down
1 change: 1 addition & 0 deletions howdju-common/lib/enums.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ export const JustificationBasisSourceTypes = {
PROPOSITION: "PROPOSITION",
/** @deprecated TODO(215) */
SOURCE_EXCERPT_PARAPHRASE: "SOURCE_EXCERPT_PARAPHRASE",
MEDIA_EXCERPT: "MEDIA_EXCERPT",
} as const;
export type JustificationBasisSourceType =
typeof JustificationBasisSourceTypes[keyof typeof JustificationBasisSourceTypes];
Expand Down
20 changes: 20 additions & 0 deletions howdju-common/lib/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,13 @@ export const hasQuote = (j: Justification) =>
export const isPropositionCompoundBased = (
j: Justification | CreateJustification | CreateJustificationInput
) => (j ? j.basis.type === "PROPOSITION_COMPOUND" : false);

export function isMediaExcerptBased(
j: Justification | CreateJustification | CreateJustificationInput
) {
return j.basis.type === "MEDIA_EXCERPT";
}

export const isWritQuoteBased = (
j: Justification | CreateJustification | CreateJustificationInput
) => (j ? j.basis.type === "WRIT_QUOTE" : false);
Expand Down Expand Up @@ -483,6 +490,11 @@ const muxCreateJustificationBasisErrors = (
_errors: errors._errors,
propositionCompound: errors.entity,
};
case "MEDIA_EXCERPT":
return {
_errors: errors._errors,
mediaExcerpt: errors.entity,
};
case "WRIT_QUOTE":
return {
_errors: errors._errors,
Expand Down Expand Up @@ -533,6 +545,14 @@ const demuxCreateJustificationInputBasis = (
type: "PROPOSITION_COMPOUND",
entity: basis.propositionCompound,
};
case "MEDIA_EXCERPT":
if (!basis.mediaExcerpt) {
throw newImpossibleError("Media excerpt must be defined.");
}
return {
type: "MEDIA_EXCERPT",
entity: basis.mediaExcerpt,
};
case "WRIT_QUOTE":
// TODO(201) WritQuote bases are temporarily supported until we support SourceExcerpt bases.
return {
Expand Down
18 changes: 0 additions & 18 deletions howdju-common/lib/urls.js

This file was deleted.

31 changes: 31 additions & 0 deletions howdju-common/lib/urls.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { extractDomain } from "./urls";

describe("urls", () => {
describe("extractDomain", () => {
it("should return undefined if url is undefined", () => {
const url = undefined;
const domain = extractDomain(url);
expect(domain).toBeUndefined();
});
it("should return undefined if url is empty", () => {
const url = "";
const domain = extractDomain(url);
expect(domain).toBeUndefined();
});
it("should return undefined if url is not a valid url", () => {
const url = "not a valid url";
const domain = extractDomain(url);
expect(domain).toBeUndefined();
});
it("should return the domain if url is a valid url", () => {
const url = "https://www.google.com";
const domain = extractDomain(url);
expect(domain).toBe("www.google.com");
});
it("should return the domain if url is a valid url with a port", () => {
const url = "https://www.google.com:8080";
const domain = extractDomain(url);
expect(domain).toBe("www.google.com");
});
});
});
11 changes: 11 additions & 0 deletions howdju-common/lib/urls.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
export function extractDomain(url: string | undefined) {
if (!url) {
return undefined;
}
try {
const urlObj = new URL(url);
return urlObj.hostname;
} catch (e) {
return undefined;
}
}
44 changes: 42 additions & 2 deletions howdju-common/lib/viewModels.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
import { JustificationOut, PropositionOut, StatementOut } from "./apiModels";
import {
JustificationOut,
MediaExcerptOut,
PropositionCompoundOut,
PropositionOut,
StatementOut,
WritQuoteOut,
} from "./apiModels";

/** A JustificationOut that has been joined with its root target in the client */
export type JustificationView = Omit<
JustificationOut,
"rootTarget" | "rootTargetType" | "target"
"rootTarget" | "rootTargetType" | "target" | "basis"
> &
(
| {
Expand All @@ -28,4 +35,37 @@ export type JustificationView = Omit<
type: "JUSTIFICATION";
entity: JustificationView;
};
} & {
basis:
| {
type: "PROPOSITION_COMPOUND";
entity: PropositionCompoundOut;
}
| {
type: "MEDIA_EXCERPT";
entity: MediaExcerptView;
}
| {
type: "WRIT_QUOTE";
entity: WritQuoteOut;
};
};

export type UrlLocatorView =
MediaExcerptOut["locators"]["urlLocators"][number] & {
/** A key uniquely identifying a url locator relative to others. */
key: string;
};
export interface MediaExcerptView extends MediaExcerptOut {
citations: (MediaExcerptOut["citations"][number] & {
/** A key uniquely identifying a citation relative to others. */
key: string;
})[];
locators: MediaExcerptOut["locators"] & {
urlLocators: UrlLocatorView[];
};
speakers: (MediaExcerptOut["speakers"][number] & {
/** A key uniquely identifying a persorg relative to others. */
key: string;
})[];
}
2 changes: 1 addition & 1 deletion howdju-common/lib/zodRefinements.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ const urlRefinement =

if (domainPattern) {
const domain = extractDomain(val);
if (!domainPattern.test(domain)) {
if (!domain || !domainPattern.test(domain)) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: `URL domain must match: ${domainPattern}.`,
Expand Down
9 changes: 8 additions & 1 deletion howdju-common/lib/zodSchemaTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,10 @@ export type PersistedJustificationWithRootRef = Omit<
type: "PROPOSITION_COMPOUND";
entity: Persisted<PropositionCompound>;
}
| { type: "SOURCE_EXCERPT"; entity: Persisted<SourceExcerpt> }
| {
type: "MEDIA_EXCERPT";
entity: Persisted<MediaExcerpt>;
}
| { type: "WRIT_QUOTE"; entity: Persisted<WritQuote> };
};

Expand Down Expand Up @@ -121,6 +124,10 @@ export type BasedJustificationWithRootRef = Omit<
type: "PROPOSITION_COMPOUND";
entity: Persisted<PropositionCompound>;
}
| {
type: "MEDIA_EXCERPT";
entity: Persisted<MediaExcerpt>;
}
| { type: "SOURCE_EXCERPT"; entity: Persisted<SourceExcerpt> }
| { type: "WRIT_QUOTE"; entity: Persisted<WritQuote> };
};
Expand Down
Loading

0 comments on commit 9ef3e45

Please sign in to comment.