Skip to content

Commit

Permalink
ReadFromKafkaDoFn: handle failed seek
Browse files Browse the repository at this point in the history
  • Loading branch information
Dippatel98 committed Sep 13, 2024
1 parent 1c30708 commit 9f1041f
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,11 @@ public ProcessContinuation processElement(
return ProcessContinuation.resume();
}
for (ConsumerRecord<byte[], byte[]> rawRecord : rawRecords) {
// If the Kafka consumer returns a record with an offset that is already processed
// the record can be safely skipped.
if (rawRecord.offset() < startOffset) {
continue;
}
if (!tracker.tryClaim(rawRecord.offset())) {
return ProcessContinuation.stop();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,12 @@ public class ReadFromKafkaDoFnTest {
private final ReadFromKafkaDoFn<String, String> exceptionDofnInstance =
ReadFromKafkaDoFn.create(makeReadSourceDescriptor(exceptionConsumer), RECORDS);

private final SimpleMockKafkaConsumerWithBrokenSeek consumerWithBrokenSeek =
new SimpleMockKafkaConsumerWithBrokenSeek(OffsetResetStrategy.NONE, topicPartition);

private final ReadFromKafkaDoFn<String, String> dofnInstanceWithBrokenSeek =
ReadFromKafkaDoFn.create(makeReadSourceDescriptor(consumerWithBrokenSeek), RECORDS);

private ReadSourceDescriptors<String, String> makeReadSourceDescriptor(
Consumer<byte[], byte[]> kafkaMockConsumer) {
return ReadSourceDescriptors.<String, String>read()
Expand Down Expand Up @@ -290,6 +296,17 @@ public synchronized long position(TopicPartition partition) {
}
}

private static class SimpleMockKafkaConsumerWithBrokenSeek extends SimpleMockKafkaConsumer {

public SimpleMockKafkaConsumerWithBrokenSeek(
OffsetResetStrategy offsetResetStrategy, TopicPartition topicPartition) {
super(offsetResetStrategy, topicPartition);
}

@Override
public synchronized void seek(TopicPartition partition, long offset) {}
}

private static class MockMultiOutputReceiver implements MultiOutputReceiver {

MockOutputReceiver<KV<KafkaSourceDescriptor, KafkaRecord<String, String>>> mockOutputReceiver =
Expand Down Expand Up @@ -372,6 +389,7 @@ private List<KV<KafkaSourceDescriptor, KafkaRecord<String, String>>> createExpec
public void setUp() throws Exception {
dofnInstance.setup();
exceptionDofnInstance.setup();
dofnInstanceWithBrokenSeek.setup();
consumer.reset();
}

Expand Down Expand Up @@ -470,6 +488,24 @@ public void testProcessElement() throws Exception {
receiver.getGoodRecords());
}

@Test
public void testProcessElementWithEarlierOffset() throws Exception {
MockMultiOutputReceiver receiver = new MockMultiOutputReceiver();
consumerWithBrokenSeek.setNumOfRecordsPerPoll(6L);
consumerWithBrokenSeek.setCurrentPos(0L);
long startOffset = 3L;
OffsetRangeTracker tracker =
new OffsetRangeTracker(new OffsetRange(startOffset, startOffset + 3));
KafkaSourceDescriptor descriptor =
KafkaSourceDescriptor.of(topicPartition, null, null, null, null, null);
ProcessContinuation result =
dofnInstanceWithBrokenSeek.processElement(descriptor, tracker, null, receiver);
assertEquals(ProcessContinuation.stop(), result);
assertEquals(
createExpectedRecords(descriptor, startOffset, 3, "key", "value"),
receiver.getGoodRecords());
}

@Test
public void testRawSizeMetric() throws Exception {
final int numElements = 1000;
Expand Down

0 comments on commit 9f1041f

Please sign in to comment.