Skip to content

Commit

Permalink
Create IsIndexedIterable
Browse files Browse the repository at this point in the history
  • Loading branch information
hmiguim committed Dec 12, 2023
1 parent 3b6453e commit 9669402
Show file tree
Hide file tree
Showing 4 changed files with 279 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.util.Map;
import java.util.Set;

import com.databasepreservation.common.server.index.utils.IterableDatabaseResult;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrRequest;
Expand Down Expand Up @@ -226,6 +227,11 @@ public IterableNestedIndexResult findAllRows(String databaseUUID, SolrQuery quer
return new IterableNestedIndexResult(client, databaseUUID, query, sorter);
}

public <T extends IsIndexed> IterableDatabaseResult<T> findAll(Class<T> classToReturn, Filter filter, Sorter sorter,
List<String> fieldsToReturn) {
return new IterableDatabaseResult<>(client, classToReturn, filter, sorter, fieldsToReturn);
}

public IndexResult<ViewerRow> findRows(String databaseUUID, List<SolrQuery> queryList)
throws GenericException, RequestNotValidException {
return SolrUtils.findRowsWithSubQuery(client, databaseUUID, queryList);
Expand Down Expand Up @@ -501,14 +507,15 @@ private final void updateValidationFields(String databaseUUID, Pair<String, ?>..
}
}

public void updateDatabasePermissions(String databaseUUID, Set<String> permissions) throws GenericException, ViewerException {
public void updateDatabasePermissions(String databaseUUID, Set<String> permissions)
throws GenericException, ViewerException {
LOGGER.debug("Starting to update database permissions ({})", databaseUUID);
// create document to update this DB
SolrInputDocument doc = new SolrInputDocument();
doc.addField(ViewerConstants.INDEX_ID, databaseUUID);

try {
doc.addField(ViewerConstants.SOLR_DATABASES_PERMISSIONS,SolrUtils.asValueUpdate(permissions));
doc.addField(ViewerConstants.SOLR_DATABASES_PERMISSIONS, SolrUtils.asValueUpdate(permissions));
insertDocument(ViewerConstants.SOLR_INDEX_DATABASES_COLLECTION_NAME, doc);
} catch (ViewerException e) {
LOGGER.error("Could not update database metadata ({})", databaseUUID, e);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
package com.databasepreservation.common.server.index.utils;

import com.databasepreservation.common.client.index.IndexResult;
import com.databasepreservation.common.client.index.IsIndexed;
import com.databasepreservation.common.client.index.filter.Filter;
import com.databasepreservation.common.client.index.sort.Sorter;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.common.params.CursorMarkParams;
import org.roda.core.data.exceptions.GenericException;
import org.roda.core.data.exceptions.RequestNotValidException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;

/**
* @author Miguel Guimarães <mguimaraes@keep.pt>
*/
public class DatabaseResultIterator<T extends IsIndexed> implements Iterator<T> {

public static final int DEFAULT_PAGE_SIZE = 1000;
public static final int DEFAULT_RETRIES = 100;
public static final int DEFAULT_SLEEP_BETWEEN_RETRIES = 10000;
private static final Logger LOGGER = LoggerFactory.getLogger(DatabaseResultIterator.class);
private final SolrClient index;
private final Class<T> classToRetrieve;
private final Filter filter;
private final Sorter sorter;
private final List<String> fieldsToReturn;
private int pageSize = DEFAULT_PAGE_SIZE;
private int retries = DEFAULT_RETRIES;
private int sleepBetweenRetries = DEFAULT_SLEEP_BETWEEN_RETRIES;
private IndexResult<T> result = null;
private int indexInResult = 0;
private String cursorMark = CursorMarkParams.CURSOR_MARK_START;
private String nextCursorMark = CursorMarkParams.CURSOR_MARK_START;
private T next = null;

public DatabaseResultIterator(SolrClient index, Class<T> classToRetrieve, Filter filter, Sorter sorter,
List<String> fieldsToReturn) {
this.index = index;
this.filter = filter;
this.classToRetrieve = classToRetrieve;
this.sorter = sorter;
this.fieldsToReturn = fieldsToReturn;

getCurrentAndPrepareNext();
}

private T getCurrentAndPrepareNext() {
T current = next;

// ensure index result is renewed
if (result == null || result.getResults().size() == indexInResult) {
indexInResult = 0;

cursorMark = nextCursorMark;
result = null;
nextCursorMark = null;
int availableRetries = retries;

do {
try {
Pair<IndexResult<T>, String> page = SolrUtils.find(index, classToRetrieve, filter, sorter, pageSize,
cursorMark, fieldsToReturn, new HashMap<>());
result = page.getFirst();
nextCursorMark = page.getSecond();

} catch (GenericException | RequestNotValidException e) {
if (availableRetries > 0) {
availableRetries--;
LOGGER.warn("Error getting next page from Solr, retrying in {}ms...", sleepBetweenRetries);
try {
Thread.sleep(sleepBetweenRetries);
} catch (InterruptedException e1) {
// do nothing
}
} else {
LOGGER.error("Error getting next page from Solr, no more retries.", e);
throw new NoSuchElementException("Error getting next item in list: " + e.getMessage());
}
}
} while (result == null);
}

if (indexInResult < result.getResults().size()) {
this.next = result.getResults().get(indexInResult++);
} else {
this.next = null;
}

return current;
}

@Override
public boolean hasNext() {
return next != null;
}

@Override
public T next() {
return getCurrentAndPrepareNext();
}

/**
* @param pageSize
* the pageSize to set
*/
public void setPageSize(int pageSize) {
this.pageSize = pageSize;
}

/**
* @return the retries
*/
public int getRetries() {
return retries;
}

/**
* @param retries
* the retries to set
*/
public void setRetries(int retries) {
this.retries = retries;
}

/**
* @return the sleepBetweenRetries
*/
public int getSleepBetweenRetries() {
return sleepBetweenRetries;
}

/**
* @param sleepBetweenRetries
* the sleepBetweenRetries to set
*/
public void setSleepBetweenRetries(int sleepBetweenRetries) {
this.sleepBetweenRetries = sleepBetweenRetries;
}

/**
* Gets the total count of objects as reported by underlying Solr requests.
*
* @return
*/
public long getTotalCount() {
return result != null ? result.getTotalCount() : -1;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package com.databasepreservation.common.server.index.utils;

import com.databasepreservation.common.client.index.IsIndexed;
import com.databasepreservation.common.client.index.filter.Filter;
import com.databasepreservation.common.client.index.sort.Sorter;
import com.databasepreservation.common.utils.CloseableIterable;
import org.apache.solr.client.solrj.SolrClient;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;

/**
* @author Miguel Guimarães <mguimaraes@keep.pt>
*/
public class IterableDatabaseResult<T extends IsIndexed> implements CloseableIterable<T> {

private static int PAGE_SIZE = -1;
private static int RETRIES = -1;
private static int SLEEP_BETWEEN_RETRIES = -1;

private final DatabaseResultIterator<T> iterator;

public IterableDatabaseResult(final SolrClient solrClient, final Class<T> returnClass, final Filter filter,
final Sorter sorter, final List<String> fieldsToReturn) {
iterator = new DatabaseResultIterator<>(solrClient, returnClass, filter, sorter, fieldsToReturn);

if (PAGE_SIZE > 0) {
iterator.setPageSize(PAGE_SIZE);
}

if (RETRIES > 0) {
iterator.setRetries(RETRIES);
}

if (SLEEP_BETWEEN_RETRIES > 0) {
iterator.setSleepBetweenRetries(SLEEP_BETWEEN_RETRIES);
}
}

public static void injectSearchPageSize(int pageSize) {
PAGE_SIZE = pageSize;
}

public static void injectNumberOfRetries(int retries) {
RETRIES = retries;
}

public static void injectSleepBetweenRetries(int sleepTime) {
SLEEP_BETWEEN_RETRIES = sleepTime;
}

@Override
public Iterator<T> iterator() {
return iterator;
}

@Override
public void close() throws IOException {
// do nothing
}

/**
* @see IndexResultIterator#getTotalCount()
*/
public long getTotalCount() {
return iterator.getTotalCount();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.TimeZone;
import java.util.UUID;

import com.databasepreservation.common.server.index.schema.SolrDefaultCollectionRegistry;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
Expand Down Expand Up @@ -85,17 +86,15 @@
* @author Bruno Ferreira <bferreira@keep.pt>
*/
public class SolrUtils {
public static final String COMMON = "common";
public static final String CONF = "conf";
public static final String SCHEMA = "managed-schema";
private static final Logger LOGGER = LoggerFactory.getLogger(SolrUtils.class);
private static final String DEFAULT_QUERY_PARSER_OPERATOR = "AND";
private static final Set<String> NON_REPEATABLE_FIELDS = new HashSet<>(Arrays.asList(RodaConstants.AIP_TITLE,
RodaConstants.AIP_LEVEL, RodaConstants.AIP_DATE_INITIAL, RodaConstants.AIP_DATE_FINAL));

private static Map<String, List<String>> liteFieldsForEachClass = new HashMap<>();

public static final String COMMON = "common";
public static final String CONF = "conf";
public static final String SCHEMA = "managed-schema";

private SolrUtils() {
// do nothing
}
Expand Down Expand Up @@ -200,6 +199,48 @@ public static Pair<IndexResult<ViewerRow>, String> findRows(SolrClient index, St
new HashMap<>());
}

public static <T extends IsIndexed> Pair<IndexResult<T>, String> find(SolrClient index, Class<T> classToRetrieve,
Filter filter, Sorter sorter, int pageSize, String cursorMark, List<String> fieldsToReturn,
Map<String, String> extraParameters) throws RequestNotValidException, GenericException {
Pair<IndexResult<T>, String> ret;
SolrQuery query = new SolrQuery();
query.setParam("q.op", DEFAULT_QUERY_PARSER_OPERATOR);
query.setQuery(parseFilter(filter));

query.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
query.setRows(pageSize);
final List<SolrQuery.SortClause> sortClauses = parseSorter(sorter);
sortClauses.add(SolrQuery.SortClause.asc(RodaConstants.INDEX_UUID));
query.setSorts(sortClauses);

if (!extraParameters.isEmpty()) {
List<String> extraFields = new ArrayList<>();
for (Map.Entry<String, String> entry : extraParameters.entrySet()) {
query.setParam(entry.getKey(), entry.getValue());
extraFields.add(entry.getKey());
}
}

if (!fieldsToReturn.isEmpty()) {
query.setFields(fieldsToReturn.toArray(new String[0]));
}

try {
QueryResponse response = index.query(SolrDefaultCollectionRegistry.get(classToRetrieve).getIndexName(), query);
IndexResult<T> result = queryResponseToIndexResult(response, SolrDefaultCollectionRegistry.get(classToRetrieve),
Facets.NONE);
ret = Pair.of(result, response.getNextCursorMark());
} catch (SolrServerException | IOException e) {
throw new GenericException("Could not query index", e);
} catch (SolrException e) {
throw new RequestNotValidException(e);
} catch (RuntimeException e) {
throw new GenericException("Unexpected exception while querying index", e);
}

return ret;
}

public static Pair<IndexResult<ViewerRow>, String> findRows(SolrClient index, String databaseUUID, Filter filter,
Sorter sorter, int pageSize, String cursorMark, List<String> fieldsToReturn, Map<String, String> extraParameters)
throws GenericException, RequestNotValidException {
Expand Down

0 comments on commit 9669402

Please sign in to comment.