Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make licenses searchable and facetable #10204

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions conf/solr/9.3.0/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,8 @@

<!-- incomplete datasets issue 8822 -->
<field name="datasetValid" type="boolean" stored="true" indexed="true" multiValued="false"/>

<field name="license" type="string" stored="true" indexed="true" multiValued="false"/>
luddaniel marked this conversation as resolved.
Show resolved Hide resolved

<!--
METADATA SCHEMA FIELDS
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
### Search by License

A browse/search facet called "License" has been added and will be displayed as long as there is more than one license in datasets and datafiles in browse/search results. This facet allow you to filter by license such as CC0, etc.
Also, the Search API now handles license filtering using the `fq` parameter, for example : `/api/search?q=*&fq=license%3A%22CC0+1.0%22` for CC0 1.0. See PR #10204


Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ private List<String> getSolrFilterQueries(boolean totalCountsOnly){
//fq=publicationStatus:"Unpublished"&fq=publicationStatus:"Draft"

// -----------------------------------------------------------------
// (4) FQ by dataset metadata vlidity
// (4) FQ by dataset metadata validity
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ha, good catch.

// -----------------------------------------------------------------
filterQueries.add(this.filterParams.getSolrFragmentForDatasetValidity());
//fq=datasetValid:(true OR false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.FileUtil;
import edu.harvard.iq.dataverse.util.StringUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
Expand Down Expand Up @@ -784,6 +783,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
solrInputDocument.addField(SearchFields.DATASET_PERSISTENT_ID, dataset.getGlobalId().toString());
solrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL());
solrInputDocument.addField(SearchFields.TYPE, "datasets");

boolean valid;
if (!indexableDataset.getDatasetVersion().isDraft()) {
valid = true;
Expand Down Expand Up @@ -855,6 +855,8 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
String parentDatasetTitle = "TBD";
if (datasetVersion != null) {

addLicenseToSolrDoc(solrInputDocument, datasetVersion);

solrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
solrInputDocument.addField(SearchFields.DATASET_CITATION, datasetVersion.getCitation(false));
solrInputDocument.addField(SearchFields.DATASET_CITATION_HTML, datasetVersion.getCitation(true));
Expand Down Expand Up @@ -1242,6 +1244,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameCompleteFinal);

datafileSolrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
addLicenseToSolrDoc(datafileSolrInputDocument, datasetVersion);

/**
* for rules on sorting files see
Expand Down Expand Up @@ -1611,6 +1614,16 @@ private List<String> getDataversePathsFromSegments(List<String> dataversePathSeg
return subtrees;
}

private void addLicenseToSolrDoc(SolrInputDocument solrInputDocument, DatasetVersion datasetVersion) {
if (datasetVersion != null && datasetVersion.getTermsOfUseAndAccess() != null) {
String licenseName = "Custom Terms";
if(datasetVersion.getTermsOfUseAndAccess().getLicense() != null) {
licenseName = datasetVersion.getTermsOfUseAndAccess().getLicense().getName();
}
solrInputDocument.addField(SearchFields.DATASET_LICENSE, licenseName);
}
}

private void addDataverseReleaseDateToSolrDoc(SolrInputDocument solrInputDocument, Dataverse dataverse) {
if (dataverse.getPublicationDate() != null) {
Calendar calendar = Calendar.getInstance();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,4 +276,6 @@ more targeted results for just datasets. The format is YYYY (i.e.

public static final String DATASET_VALID = "datasetValid";

public static final String DATASET_LICENSE = "license";

}
Original file line number Diff line number Diff line change
Expand Up @@ -1243,6 +1243,12 @@ public List<String> getFriendlyNamesFromFilterQuery(String filterQuery) {
friendlyNames.add(friendlyName.get());
return friendlyNames;
}
} else if (key.equals(SearchFields.DATASET_LICENSE)) {
try {
friendlyNames.add(BundleUtil.getStringFromPropertyFile("license." + valueWithoutQuotes.toLowerCase().replace(" ","_") + ".name", "License"));
} catch (Exception e) {
logger.fine(String.format("action=getFriendlyNamesFromFilterQuery cannot find friendlyName for key=%s value=%s", key, value));
}
}

friendlyNames.add(valueWithoutQuotes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ public SolrQueryResponse search(
solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY);
solrQuery.addFacetField(SearchFields.METADATA_SOURCE);
solrQuery.addFacetField(SearchFields.PUBLICATION_YEAR);
solrQuery.addFacetField(SearchFields.DATASET_LICENSE);
/**
* @todo when a new method on datasetFieldService is available
* (retrieveFacetsByDataverse?) only show the facets that the
Expand Down Expand Up @@ -712,10 +713,12 @@ public SolrQueryResponse search(
boolean unpublishedAvailable = false;
boolean deaccessionedAvailable = false;
boolean hideMetadataSourceFacet = true;
boolean hideLicenseFacet = true;
for (FacetField facetField : queryResponse.getFacetFields()) {
FacetCategory facetCategory = new FacetCategory();
List<FacetLabel> facetLabelList = new ArrayList<>();
int numMetadataSources = 0;
int numLicenses = 0;
String metadataBlockName = "";
String datasetFieldName = "";
/**
Expand All @@ -741,23 +744,29 @@ public SolrQueryResponse search(
// logger.info("field: " + facetField.getName() + " " + facetFieldCount.getName() + " (" + facetFieldCount.getCount() + ")");
String localefriendlyName = null;
if (facetFieldCount.getCount() > 0) {
if(metadataBlockName.length() > 0 ) {
localefriendlyName = getLocaleTitle(datasetFieldName,facetFieldCount.getName(), metadataBlockName);
if(metadataBlockName.length() > 0 ) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here and below it looks like we're fixing some whitespace problems. Only three spaces or something.

localefriendlyName = getLocaleTitle(datasetFieldName,facetFieldCount.getName(), metadataBlockName);
} else if (facetField.getName().equals(SearchFields.METADATA_TYPES)) {
Optional<DataverseMetadataBlockFacet> metadataBlockFacet = metadataBlockFacets.stream().filter(blockFacet -> blockFacet.getMetadataBlock().getName().equals(facetFieldCount.getName())).findFirst();
if (metadataBlockFacet.isEmpty()) {
Optional<DataverseMetadataBlockFacet> metadataBlockFacet = metadataBlockFacets.stream().filter(blockFacet -> blockFacet.getMetadataBlock().getName().equals(facetFieldCount.getName())).findFirst();
if (metadataBlockFacet.isEmpty()) {
// metadata block facet is not configured to be displayed => ignore
continue;
}
}

localefriendlyName = metadataBlockFacet.get().getMetadataBlock().getLocaleDisplayFacet();
} else {
try {
localefriendlyName = metadataBlockFacet.get().getMetadataBlock().getLocaleDisplayFacet();
} else if (facetField.getName().equals(SearchFields.DATASET_LICENSE)) {
try {
localefriendlyName = BundleUtil.getStringFromPropertyFile("license." + facetFieldCount.getName().toLowerCase().replace(" ","_") + ".name", "License");
} catch (Exception e) {
localefriendlyName = facetFieldCount.getName();
}
} else {
try {
localefriendlyName = BundleUtil.getStringFromPropertyFile(facetFieldCount.getName(), "Bundle");
} catch (Exception e) {
} catch (Exception e) {
localefriendlyName = facetFieldCount.getName();
}
}
}
}
FacetLabel facetLabel = new FacetLabel(localefriendlyName, facetFieldCount.getCount());
// quote field facets
facetLabel.setFilterQuery(facetField.getName() + ":\"" + facetFieldCount.getName() + "\"");
Expand All @@ -770,15 +779,19 @@ public SolrQueryResponse search(
} else if (facetFieldCount.getName().equals(IndexServiceBean.getDEACCESSIONED_STRING())) {
deaccessionedAvailable = true;
}
}
if (facetField.getName().equals(SearchFields.METADATA_SOURCE)) {
} else if (facetField.getName().equals(SearchFields.METADATA_SOURCE)) {
numMetadataSources++;
} else if (facetField.getName().equals(SearchFields.DATASET_LICENSE)) {
numLicenses++;
}
}
}
if (numMetadataSources > 1) {
hideMetadataSourceFacet = false;
}
if (numLicenses > 1) {
hideLicenseFacet = false;
}
facetCategory.setName(facetField.getName());
// hopefully people will never see the raw facetField.getName() because it may well have an _s at the end
facetCategory.setFriendlyName(facetField.getName());
Expand Down Expand Up @@ -855,6 +868,10 @@ public SolrQueryResponse search(
if (!hideMetadataSourceFacet) {
facetCategoryList.add(facetCategory);
}
} else if (facetCategory.getName().equals(SearchFields.DATASET_LICENSE)) {
if (!hideLicenseFacet) {
facetCategoryList.add(facetCategory);
}
} else {
facetCategoryList.add(facetCategory);
}
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/propertyFiles/License.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@ license.cc0_1.0.description=Creative Commons CC0 1.0 Universal Public Domain Ded
license.cc_by_4.0.description=Creative Commons Attribution 4.0 International License.
license.cc0_1.0.name=CC0 1.0
license.cc_by_4.0.name=CC BY 4.0
license.custom_terms.name=Custom Terms

1 change: 1 addition & 0 deletions src/main/java/propertyFiles/staticSearchFields.properties
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ staticSearchFields.metadata_type_ss=Dataset Feature
staticSearchFields.dvCategory=Dataverse Category
staticSearchFields.metadataSource=Metadata Source
staticSearchFields.publicationDate=Publication Year
staticSearchFields.license=License
staticSearchFields.fileTypeGroupFacet=File Type
staticSearchFields.dvObjectType=Type
staticSearchFields.fileTag=File Tag
Expand Down
Loading