Skip to content

Deleting many rows from Cassandra

Turbo87 edited this page Oct 11, 2014 · 4 revisions

In case you need to delete many rows from Cassandra and cannot just drop and re-create your CF, then you can leverage the MutationBatch object. Here is an example of deleting many rows from a CF. We use a fixed batch size and just submit a bunch of MutationBatch tasks to a bounded threadpool.

private void deleteManyRows() {
	
	final ColumnFamily<String, Long> cf = 
			new ColumnFamily<String, Long>("MyCF", StringSerializer.get(), LongSerializer.get());
	
	final Keyspace ks = null;   // Assume you have this
	final Iterator<String> rowKeys = null;  // Assume you have this built
	
	int batchSize = 1000;  
	int nThreads = 10;
	final ExecutorService threadPool = Executors.newFixedThreadPool(nThreads);
	
	MutationBatch currentBatch = ks.prepareMutationBatch();
	int currentBatchSize = 0;
	
	while (rowKeys.hasNext()) {
		
		String rowKey = rowKeys.next();
		currentBatch.withRow(cf, rowKey).delete();
		currentBatchSize++;
		
		if (!rowKeys.hasNext() || (currentBatchSize > batchSize)) {
			threadPool.submit(new MutationBatchExec(currentBatch));
			currentBatch = ks.prepareMutationBatch();
			currentBatchSize = 0;
		}
	}
}
	
private class MutationBatchExec implements Callable<Void> {
	private final MutationBatch myBatch;
	private MutationBatchExec(MutationBatch batch) {
		myBatch = batch;
	}
	@Override
	public Void call() throws Exception {
		myBatch.execute();
		return null;
	}
}
Clone this wiki locally