From 0df10b43c1891931da6956f5c236006fb41b0f76 Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 12:13:00 -0400 Subject: [PATCH 01/18] Remove Phobos references --- .rubocop_todo.yml | 17 -- .tool-versions | 1 + deimos-ruby.gemspec | 3 +- lib/deimos.rb | 11 +- lib/deimos/backends/kafka.rb | 9 - lib/deimos/backends/kafka_async.rb | 9 - lib/deimos/config/configuration.rb | 7 - lib/deimos/config/phobos_config.rb | 164 ----------- lib/deimos/consume/batch_consumption.rb | 1 - lib/deimos/consume/message_consumption.rb | 1 - lib/deimos/kafka_message.rb | 4 +- lib/deimos/monkey_patches/phobos_cli.rb | 35 --- lib/deimos/producer.rb | 2 +- lib/tasks/deimos.rake | 4 +- spec/backends/kafka_async_spec.rb | 4 +- spec/backends/kafka_spec.rb | 4 +- spec/config/configuration_spec.rb | 329 ---------------------- 17 files changed, 9 insertions(+), 596 deletions(-) create mode 100644 .tool-versions delete mode 100644 lib/deimos/config/phobos_config.rb delete mode 100644 lib/deimos/monkey_patches/phobos_cli.rb delete mode 100644 spec/config/configuration_spec.rb diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 0adce443..621b68fb 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -146,7 +146,6 @@ Lint/UselessAssignment: Metrics/AbcSize: Exclude: - 'lib/deimos/active_record_consume/message_consumption.rb' - - 'lib/deimos/config/phobos_config.rb' - 'lib/deimos/instrumentation.rb' - 'lib/deimos/kafka_source.rb' - 'lib/deimos/kafka_topic_info.rb' @@ -159,12 +158,6 @@ Metrics/AbcSize: - 'lib/deimos/utils/schema_controller_mixin.rb' - 'lib/generators/deimos/schema_class_generator.rb' -# Offense count: 1 -# Configuration parameters: CountComments, Max, CountAsOne, ExcludedMethods. -Metrics/MethodLength: - Exclude: - - 'lib/deimos/config/phobos_config.rb' - # Offense count: 5 # Configuration parameters: CountComments, Max, CountAsOne. Metrics/ModuleLength: @@ -179,7 +172,6 @@ Metrics/ModuleLength: # Configuration parameters: IgnoredMethods, Max. Metrics/PerceivedComplexity: Exclude: - - 'lib/deimos/config/phobos_config.rb' - 'lib/deimos/consume/batch_consumption.rb' - 'lib/deimos/kafka_source.rb' - 'lib/deimos/schema_backends/avro_schema_coercer.rb' @@ -253,7 +245,6 @@ Style/FrozenStringLiteralComment: Style/GlobalStdStream: Exclude: - 'lib/deimos/config/configuration.rb' - - 'lib/deimos/config/phobos_config.rb' - 'lib/deimos/metrics/mock.rb' - 'lib/deimos/test_helpers.rb' - 'lib/deimos/tracing/mock.rb' @@ -329,14 +320,6 @@ Style/StringLiterals: - 'spec/schemas/my_namespace/my_schema_with_complex_type.rb' - 'spec/spec_helper.rb' -# Offense count: 1 -# Cop supports --auto-correct. -# Configuration parameters: EnforcedStyle, AllowSafeAssignment. -# SupportedStyles: require_parentheses, require_no_parentheses, require_parentheses_when_complex -Style/TernaryParentheses: - Exclude: - - 'lib/deimos/config/phobos_config.rb' - # Offense count: 21 # Cop supports --auto-correct. Style/TrailingBodyOnModule: diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 00000000..f2a971aa --- /dev/null +++ b/.tool-versions @@ -0,0 +1 @@ +ruby 3.2.2 diff --git a/deimos-ruby.gemspec b/deimos-ruby.gemspec index 376fc8a0..8379ee51 100644 --- a/deimos-ruby.gemspec +++ b/deimos-ruby.gemspec @@ -19,9 +19,8 @@ Gem::Specification.new do |spec| spec.require_paths = ['lib'] spec.add_runtime_dependency('avro_turf', '>= 1.4', '< 2') + spec.add_runtime_dependency('karafka', '~> 2.0') spec.add_runtime_dependency('fig_tree', '~> 0.0.2') - spec.add_runtime_dependency('phobos', '>= 1.9', '< 3.0') - spec.add_runtime_dependency('ruby-kafka', '< 2') spec.add_runtime_dependency('sigurd', '>= 0.1.0', '< 1.0') spec.add_development_dependency('activerecord-import') diff --git a/lib/deimos.rb b/lib/deimos.rb index e278b622..39235b67 100644 --- a/lib/deimos.rb +++ b/lib/deimos.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true require 'active_support' +require 'karafka' -require 'phobos' require 'deimos/version' require 'deimos/config/configuration' require 'deimos/producer' @@ -23,7 +23,6 @@ require 'deimos/schema_class/enum' require 'deimos/schema_class/record' -require 'deimos/monkey_patches/phobos_cli' require 'deimos/railtie' if defined?(Rails) require 'deimos/utils/schema_controller_mixin' if defined?(ActionController) @@ -117,13 +116,5 @@ def start_db_backend!(thread_count: 1) end end -at_exit do - begin - Deimos::Backends::KafkaAsync.shutdown_producer - Deimos::Backends::Kafka.shutdown_producer - rescue StandardError => e - Deimos.config.logger.error( - "Error closing producer on shutdown: #{e.message} #{e.backtrace.join("\n")}" - ) end end diff --git a/lib/deimos/backends/kafka.rb b/lib/deimos/backends/kafka.rb index e8f329cb..4bcc4444 100644 --- a/lib/deimos/backends/kafka.rb +++ b/lib/deimos/backends/kafka.rb @@ -4,15 +4,6 @@ module Deimos module Backends # Default backend to produce to Kafka. class Kafka < Base - include Phobos::Producer - - # Shut down the producer if necessary. - # @return [void] - def self.shutdown_producer - producer.sync_producer_shutdown if producer.respond_to?(:sync_producer_shutdown) - producer.kafka_client&.close - end - # :nodoc: def self.execute(producer_class:, messages:) Deimos.instrument( diff --git a/lib/deimos/backends/kafka_async.rb b/lib/deimos/backends/kafka_async.rb index ae0c345b..a5699d73 100644 --- a/lib/deimos/backends/kafka_async.rb +++ b/lib/deimos/backends/kafka_async.rb @@ -4,15 +4,6 @@ module Deimos module Backends # Backend which produces to Kafka via an async producer. class KafkaAsync < Base - include Phobos::Producer - - # Shut down the producer cleanly. - # @return [void] - def self.shutdown_producer - producer.async_producer_shutdown - producer.kafka_client&.close - end - # :nodoc: def self.execute(producer_class:, messages:) Deimos.instrument( diff --git a/lib/deimos/config/configuration.rb b/lib/deimos/config/configuration.rb index ac73e6b0..d16ebb0b 100644 --- a/lib/deimos/config/configuration.rb +++ b/lib/deimos/config/configuration.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true require 'fig_tree' -require_relative 'phobos_config' require_relative '../metrics/mock' require_relative '../tracing/mock' require 'active_support/core_ext/numeric' @@ -10,14 +9,8 @@ module Deimos # rubocop:disable Metrics/ModuleLength include FigTree - # :nodoc: - class FigTree::ConfigStruct - include Deimos::PhobosConfig - end - # :nodoc: after_configure do - Phobos.configure(self.config.phobos_config) if self.config.schema.use_schema_classes load_generated_schema_classes end diff --git a/lib/deimos/config/phobos_config.rb b/lib/deimos/config/phobos_config.rb deleted file mode 100644 index 8e739bf2..00000000 --- a/lib/deimos/config/phobos_config.rb +++ /dev/null @@ -1,164 +0,0 @@ -# frozen_string_literal: true - -require 'active_support/core_ext/array' - -module Deimos - # Module to handle phobos.yml as well as outputting the configuration to save - # to Phobos itself. - module PhobosConfig - extend ActiveSupport::Concern - - # @return [Hash] - def to_h - (FIELDS + [:handler]).map { |f| - val = self.send(f) - if f == :backoff && val - [:backoff, _backoff(val)] - elsif val.present? - [f, val] - end - }.to_h - end - - # @return [void] - def reset! - super - Phobos.configure(self.phobos_config) - end - - # Create a hash representing the config that Phobos expects. - # @return [Hash] - def phobos_config - p_config = { - logger: Logger.new(STDOUT), - custom_logger: self.phobos_logger, - custom_kafka_logger: self.kafka.logger, - kafka: { - client_id: self.kafka.client_id, - connect_timeout: self.kafka.connect_timeout, - socket_timeout: self.kafka.socket_timeout, - ssl_verify_hostname: self.kafka.ssl.verify_hostname, - ssl_ca_certs_from_system: self.kafka.ssl.ca_certs_from_system, - seed_brokers: Array.wrap(self.kafka.seed_brokers) - }, - producer: { - ack_timeout: self.producers.ack_timeout, - required_acks: self.producers.required_acks, - max_retries: self.producers.max_retries, - retry_backoff: self.producers.retry_backoff, - max_buffer_size: self.producers.max_buffer_size, - max_buffer_bytesize: self.producers.max_buffer_bytesize, - compression_codec: self.producers.compression_codec, - compression_threshold: self.producers.compression_threshold, - max_queue_size: self.producers.max_queue_size, - delivery_threshold: self.producers.delivery_threshold, - delivery_interval: self.producers.delivery_interval, - persistent_connections: self.producers.persistent_connections - }, - consumer: { - session_timeout: self.consumers.session_timeout, - offset_commit_interval: self.consumers.offset_commit_interval, - offset_commit_threshold: self.consumers.offset_commit_threshold, - heartbeat_interval: self.consumers.heartbeat_interval - }, - backoff: _backoff(self.consumers.backoff.to_a) - } - - p_config[:listeners] = self.consumer_objects.map do |consumer| - next nil if consumer.disabled - - hash = consumer.to_h.reject do |k, _| - %i(class_name schema namespace key_config backoff disabled replace_associations - bulk_import_id_column).include?(k) - end - hash = hash.map { |k, v| [k, v.is_a?(Symbol) ? v.to_s : v] }.to_h - hash[:handler] = consumer.class_name - if consumer.backoff - hash[:backoff] = _backoff(consumer.backoff.to_a) - end - hash - end - p_config[:listeners].compact! - - if self.kafka.ssl.enabled - %w(ca_cert client_cert client_cert_key).each do |key| - next if self.kafka.ssl.send(key).blank? - - p_config[:kafka]["ssl_#{key}".to_sym] = ssl_var_contents(self.kafka.ssl.send(key)) - end - end - - if self.kafka.sasl.enabled - p_config[:kafka][:sasl_over_ssl] = self.kafka.sasl.enforce_ssl - %w( - gssapi_principal - gssapi_keytab - plain_authzid - plain_username - plain_password - scram_username - scram_password - scram_mechanism - oauth_token_provider - ).each do |key| - value = self.kafka.sasl.send(key) - next if value.blank? - - p_config[:kafka]["sasl_#{key}".to_sym] = value - end - end - p_config - end - - # @param key [String] - # @return [String] - def ssl_var_contents(key) - File.exist?(key) ? File.read(key) : key - end - - # Legacy method to parse Phobos config file - # @!visibility private - def phobos_config_file=(file) - pconfig = YAML.load(ERB.new(File.read(File.expand_path(file))).result). # rubocop:disable Security/YAMLLoad - with_indifferent_access - self.logger&.warn('phobos.yml is deprecated - use direct configuration instead.') - pconfig[:kafka].each do |k, v| - if k.starts_with?('ssl') - k = k.sub('ssl_', '') - self.kafka.ssl.send("#{k}=", v) - elsif k.starts_with?('sasl') - k = (k == 'sasl_over_ssl') ? 'enforce_ssl' : k.sub('sasl_', '') - self.kafka.sasl.send("#{k}=", v) - else - self.kafka.send("#{k}=", v) - end - end - pconfig[:producer].each do |k, v| - self.producers.send("#{k}=", v) - end - pconfig[:consumer].each do |k, v| - self.consumers.send("#{k}=", v) - end - self.consumers.backoff = pconfig[:backoff][:min_ms]..pconfig[:backoff][:max_ms] - pconfig[:listeners].each do |listener_hash| - self.consumer do - listener_hash.each do |k, v| - k = 'class_name' if k == 'handler' - send(k, v) - end - end - end - end - - private - - # @param values [Array] - # @return [Hash] - def _backoff(values) - { - min_ms: values[0], - max_ms: values[-1] - } - end - end -end diff --git a/lib/deimos/consume/batch_consumption.rb b/lib/deimos/consume/batch_consumption.rb index 3a3db801..d2462097 100644 --- a/lib/deimos/consume/batch_consumption.rb +++ b/lib/deimos/consume/batch_consumption.rb @@ -7,7 +7,6 @@ module Consume # of messages to be handled at once module BatchConsumption extend ActiveSupport::Concern - include Phobos::BatchHandler # @param batch [Array] # @param metadata [Hash] diff --git a/lib/deimos/consume/message_consumption.rb b/lib/deimos/consume/message_consumption.rb index de6f2eb8..5b2a925f 100644 --- a/lib/deimos/consume/message_consumption.rb +++ b/lib/deimos/consume/message_consumption.rb @@ -6,7 +6,6 @@ module Consume # are invoked for every individual message. module MessageConsumption extend ActiveSupport::Concern - include Phobos::Handler # @param payload [String] # @param metadata [Hash] diff --git a/lib/deimos/kafka_message.rb b/lib/deimos/kafka_message.rb index 1bdc275b..0a791294 100644 --- a/lib/deimos/kafka_message.rb +++ b/lib/deimos/kafka_message.rb @@ -49,8 +49,7 @@ def self.decoded(messages=[]) end end - # @return [Hash] - def phobos_message + def karafka_message { payload: self.message, partition_key: self.partition_key, @@ -58,5 +57,6 @@ def phobos_message topic: self.topic } end + end end diff --git a/lib/deimos/monkey_patches/phobos_cli.rb b/lib/deimos/monkey_patches/phobos_cli.rb deleted file mode 100644 index 41bf4036..00000000 --- a/lib/deimos/monkey_patches/phobos_cli.rb +++ /dev/null @@ -1,35 +0,0 @@ -# frozen_string_literal: true - -require 'phobos/cli/start' - -#@!visibility private -module Phobos - # :nodoc: - module CLI - # :nodoc: - class Start - # :nodoc: - def validate_listeners! - Phobos.config.listeners.each do |listener| - handler = listener.handler - begin - handler.constantize - rescue NameError - error_exit("Handler '#{handler}' not defined") - end - - delivery = listener.delivery - if delivery.nil? - Phobos::CLI.logger.warn do - Hash(message: "Delivery option should be specified, defaulting to 'batch'"\ - ' - specify this option to silence this message') - end - elsif !Listener::DELIVERY_OPTS.include?(delivery) - error_exit("Invalid delivery option '#{delivery}'. Please specify one of: "\ - "#{Listener::DELIVERY_OPTS.join(', ')}") - end - end - end - end - end -end diff --git a/lib/deimos/producer.rb b/lib/deimos/producer.rb index 646fb333..3e56b8f7 100644 --- a/lib/deimos/producer.rb +++ b/lib/deimos/producer.rb @@ -2,12 +2,12 @@ require 'deimos/message' require 'deimos/shared_config' -require 'phobos/producer' require 'active_support/notifications' # :nodoc: module Deimos class << self + # Run a block without allowing any messages to be produced to Kafka. # Optionally add a list of producer classes to limit the disabling to those # classes. diff --git a/lib/tasks/deimos.rake b/lib/tasks/deimos.rake index b89ed819..86e937e7 100644 --- a/lib/tasks/deimos.rake +++ b/lib/tasks/deimos.rake @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'phobos' -require 'phobos/cli' require 'generators/deimos/schema_class_generator' require 'optparse' @@ -15,7 +13,7 @@ namespace :deimos do ENV['DEIMOS_TASK_NAME'] = 'consumer' STDOUT.sync = true Rails.logger.info('Running deimos:start rake task.') - Phobos::CLI::Commands.start(%w(start --skip_config)) + Karafka::Server.run end desc 'Starts the Deimos database producer' diff --git a/spec/backends/kafka_async_spec.rb b/spec/backends/kafka_async_spec.rb index 36be4672..99fc5a1e 100644 --- a/spec/backends/kafka_async_spec.rb +++ b/spec/backends/kafka_async_spec.rb @@ -3,9 +3,7 @@ RSpec.describe Deimos::Backends::KafkaAsync do include_context 'with publish_backend' it 'should publish to Kafka asynchronously' do - producer = instance_double(Phobos::Producer::ClassMethods::PublicAPI) - expect(producer).to receive(:async_publish_list).with(messages.map(&:encoded_hash)) - expect(described_class).to receive(:producer).and_return(producer) + expect(Karafka.producer).to receive(:produce_many_async).with(messages) described_class.publish(producer_class: MyProducer, messages: messages) end end diff --git a/spec/backends/kafka_spec.rb b/spec/backends/kafka_spec.rb index 34d3e4e8..e7c3c3ad 100644 --- a/spec/backends/kafka_spec.rb +++ b/spec/backends/kafka_spec.rb @@ -3,9 +3,7 @@ RSpec.describe Deimos::Backends::Kafka do include_context 'with publish_backend' it 'should publish to Kafka synchronously' do - producer = instance_double(Phobos::Producer::ClassMethods::PublicAPI) - expect(producer).to receive(:publish_list).with(messages.map(&:encoded_hash)) - expect(described_class).to receive(:producer).and_return(producer) + expect(Karafka.producer).to receive(:produce_many_sync).with(messages) described_class.publish(producer_class: MyProducer, messages: messages) end end diff --git a/spec/config/configuration_spec.rb b/spec/config/configuration_spec.rb deleted file mode 100644 index 0eab6522..00000000 --- a/spec/config/configuration_spec.rb +++ /dev/null @@ -1,329 +0,0 @@ -# frozen_string_literal: true - -# Mock consumer -class MyConfigConsumer < Deimos::Consumer - # :no-doc: - def consume - end -end - -# Mock consumer 2 -class MyConfigConsumer2 < Deimos::Consumer - # :no-doc: - def consume - end -end - -describe Deimos, 'configuration' do - it 'should configure with deprecated fields' do - logger = Logger.new(nil) - described_class.configure do - kafka_logger logger - reraise_consumer_errors true - schema_registry_url 'http://schema.registry' - schema.use_schema_classes false - seed_broker 'whatever' - schema_path 'some_path' - producer_schema_namespace 'namespace' - producer_topic_prefix 'prefix' - disable_producers true - ssl_enabled true - ssl_ca_cert 'cert' - ssl_client_cert 'cert' - ssl_client_cert_key 'key' - publish_backend 'db' - report_lag true - end - - expect(described_class.config.kafka.logger).to eq(logger) - expect(described_class.config.consumers.reraise_errors).to eq(true) - expect(described_class.config.schema.registry_url).to eq('http://schema.registry') - expect(described_class.config.schema.use_schema_classes).to eq(false) - expect(described_class.config.kafka.seed_brokers).to eq('whatever') - expect(described_class.config.producers.schema_namespace).to eq('namespace') - expect(described_class.config.producers.topic_prefix).to eq('prefix') - expect(described_class.config.producers.disabled).to eq(true) - expect(described_class.config.kafka.ssl.enabled).to eq(true) - expect(described_class.config.kafka.ssl.ca_cert).to eq('cert') - expect(described_class.config.kafka.ssl.client_cert).to eq('cert') - expect(described_class.config.kafka.ssl.client_cert_key).to eq('key') - expect(described_class.config.producers.backend).to eq('db') - expect(described_class.config.consumers.report_lag).to eq(true) - end - - it 'reads existing Phobos config YML files' do - described_class.config.reset! - described_class.configure { |c| c.phobos_config_file = File.join(File.dirname(__FILE__), '..', 'phobos.yml') } - expect(described_class.config.phobos_config).to match( - logger: an_instance_of(Logger), - backoff: { min_ms: 1000, max_ms: 60_000 }, - consumer: { - session_timeout: 300, - offset_commit_interval: 10, - offset_commit_threshold: 0, - heartbeat_interval: 10 - }, - custom_kafka_logger: an_instance_of(Logger), - custom_logger: an_instance_of(Logger), - kafka: { - client_id: 'phobos', - connect_timeout: 15, - socket_timeout: 15, - ssl_verify_hostname: true, - ssl_ca_certs_from_system: false, - seed_brokers: ['localhost:9092'] - }, - listeners: [ - { - topic: 'my_consume_topic', - group_id: 'my_group_id', - max_concurrency: 1, - start_from_beginning: true, - max_bytes_per_partition: 524_288, - min_bytes: 1, - max_wait_time: 5, - force_encoding: nil, - delivery: 'batch', - session_timeout: 300, - offset_commit_interval: 10, - offset_commit_threshold: 0, - offset_retention_time: nil, - heartbeat_interval: 10, - handler: 'ConsumerTest::MyConsumer', - use_schema_classes: nil, - max_db_batch_size: nil, - bulk_import_id_generator: nil, - save_associations_first: false - }, { - topic: 'my_batch_consume_topic', - group_id: 'my_batch_group_id', - max_concurrency: 1, - start_from_beginning: true, - max_bytes_per_partition: 500.kilobytes, - min_bytes: 1, - max_wait_time: 5, - force_encoding: nil, - delivery: 'inline_batch', - session_timeout: 300, - offset_commit_interval: 10, - offset_commit_threshold: 0, - offset_retention_time: nil, - heartbeat_interval: 10, - handler: 'ConsumerTest::MyBatchConsumer', - use_schema_classes: nil, - max_db_batch_size: nil, - bulk_import_id_generator: nil, - save_associations_first: false - } - ], - producer: { - ack_timeout: 5, - required_acks: :all, - max_retries: 2, - retry_backoff: 1, - max_buffer_size: 10_000, - max_buffer_bytesize: 10_000_000, - compression_codec: nil, - compression_threshold: 1, - max_queue_size: 10_000, - delivery_threshold: 0, - delivery_interval: 0, - persistent_connections: false - } - ) - end - - specify '#phobos_config' do - logger1 = Logger.new(nil) - logger2 = Logger.new(nil) - described_class.config.reset! - described_class.configure do - phobos_logger logger1 - kafka do - logger logger2 - seed_brokers 'my-seed-brokers' - client_id 'phobos2' - connect_timeout 30 - socket_timeout 30 - ssl.enabled(true) - ssl.ca_certs_from_system(true) - ssl.ca_cert('cert') - ssl.client_cert('cert') - ssl.client_cert_key('key') - ssl.verify_hostname(false) - sasl.enabled true - sasl.gssapi_principal 'gssapi_principal' - sasl.gssapi_keytab 'gssapi_keytab' - sasl.plain_authzid 'plain_authzid' - sasl.plain_username 'plain_username' - sasl.plain_password 'plain_password' - sasl.scram_username 'scram_username' - sasl.scram_password 'scram_password' - sasl.scram_mechanism 'scram_mechanism' - sasl.enforce_ssl true - sasl.oauth_token_provider 'oauth_token_provider' - end - consumers do - session_timeout 30 - offset_commit_interval 5 - offset_commit_threshold 0 - heartbeat_interval 5 - backoff 5..10 - end - producers do - ack_timeout 3 - required_acks 1 - max_retries 1 - retry_backoff 2 - max_buffer_size 5 - max_buffer_bytesize 5 - compression_codec :snappy - compression_threshold 2 - max_queue_size 10 - delivery_threshold 1 - delivery_interval 1 - persistent_connections true - end - consumer do - class_name 'MyConfigConsumer' - schema 'blah' - topic 'blah' - group_id 'myconsumerid' - max_concurrency 1 - start_from_beginning true - max_bytes_per_partition 10 - min_bytes 5 - max_wait_time 5 - force_encoding true - delivery :message - backoff 100..200 - session_timeout 10 - offset_commit_interval 13 - offset_commit_threshold 13 - offset_retention_time 13 - heartbeat_interval 13 - use_schema_classes false - end - consumer do - disabled true - class_name 'MyConfigConsumer2' - schema 'blah2' - topic 'blah2' - group_id 'myconsumerid2' - use_schema_classes false - end - end - - expect(described_class.config.phobos_config). - to match( - logger: an_instance_of(Logger), - backoff: { min_ms: 5, max_ms: 10 }, - consumer: { - session_timeout: 30, - offset_commit_interval: 5, - offset_commit_threshold: 0, - heartbeat_interval: 5 - }, - custom_kafka_logger: logger2, - custom_logger: logger1, - kafka: { - client_id: 'phobos2', - connect_timeout: 30, - socket_timeout: 30, - ssl_ca_certs_from_system: true, - ssl_ca_cert: 'cert', - ssl_client_cert: 'cert', - ssl_client_cert_key: 'key', - ssl_verify_hostname: false, - seed_brokers: ['my-seed-brokers'], - sasl_gssapi_principal: 'gssapi_principal', - sasl_gssapi_keytab: 'gssapi_keytab', - sasl_plain_authzid: 'plain_authzid', - sasl_plain_username: 'plain_username', - sasl_plain_password: 'plain_password', - sasl_scram_username: 'scram_username', - sasl_scram_password: 'scram_password', - sasl_scram_mechanism: 'scram_mechanism', - sasl_over_ssl: true, - sasl_oauth_token_provider: 'oauth_token_provider', - }, - listeners: [ - { - topic: 'blah', - group_id: 'myconsumerid', - max_concurrency: 1, - start_from_beginning: true, - max_bytes_per_partition: 10, - min_bytes: 5, - max_wait_time: 5, - force_encoding: true, - delivery: 'message', - backoff: { min_ms: 100, max_ms: 200 }, - session_timeout: 10, - offset_commit_interval: 13, - offset_commit_threshold: 13, - offset_retention_time: 13, - heartbeat_interval: 13, - handler: 'MyConfigConsumer', - use_schema_classes: false, - max_db_batch_size: nil, - bulk_import_id_generator: nil, - save_associations_first: false - } - ], - producer: { - ack_timeout: 3, - required_acks: 1, - max_retries: 1, - retry_backoff: 2, - max_buffer_size: 5, - max_buffer_bytesize: 5, - compression_codec: :snappy, - compression_threshold: 2, - max_queue_size: 10, - delivery_threshold: 1, - delivery_interval: 1, - persistent_connections: true - } - ) - end - - it 'should override global configurations' do - described_class.configure do - consumers.bulk_import_id_generator(-> { 'global' }) - consumers.replace_associations true - - consumer do - class_name 'MyConfigConsumer' - schema 'blah' - topic 'blah' - group_id 'myconsumerid' - bulk_import_id_generator(-> { 'consumer' }) - replace_associations false - save_associations_first true - end - - consumer do - class_name 'MyConfigConsumer2' - schema 'blah' - topic 'blah' - group_id 'myconsumerid' - end - end - - consumers = described_class.config.consumers - expect(consumers.replace_associations).to eq(true) - expect(consumers.bulk_import_id_generator.call).to eq('global') - - custom = MyConfigConsumer.config - expect(custom[:replace_associations]).to eq(false) - expect(custom[:bulk_import_id_generator].call).to eq('consumer') - expect(custom[:save_associations_first]).to eq(true) - - default = MyConfigConsumer2.config - expect(default[:replace_associations]).to eq(true) - expect(default[:bulk_import_id_generator].call).to eq('global') - expect(default[:save_associations_first]).to eq(false) - - end -end From 61660458b735a085e6cbb445c61712712aba9dbc Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 13:38:54 -0400 Subject: [PATCH 02/18] Add Deimos::Logging --- lib/deimos.rb | 1 + lib/deimos/backends/base.rb | 35 +-------- lib/deimos/consume/batch_consumption.rb | 2 +- lib/deimos/consume/message_consumption.rb | 4 +- lib/deimos/logging.rb | 71 +++++++++++++++++++ .../schema_backends/avro_schema_registry.rb | 2 +- lib/deimos/utils/db_poller.rb | 2 +- lib/deimos/utils/db_poller/base.rb | 14 ++-- lib/deimos/utils/db_poller/state_based.rb | 6 +- lib/deimos/utils/db_poller/time_based.rb | 6 +- lib/deimos/utils/deadlock_retry.rb | 2 +- spec/backends/base_spec.rb | 23 ------ spec/batch_consumer_spec.rb | 17 ++--- spec/logging_spec.rb | 25 +++++++ 14 files changed, 123 insertions(+), 87 deletions(-) create mode 100644 lib/deimos/logging.rb create mode 100644 spec/logging_spec.rb diff --git a/lib/deimos.rb b/lib/deimos.rb index 39235b67..467f4174 100644 --- a/lib/deimos.rb +++ b/lib/deimos.rb @@ -4,6 +4,7 @@ require 'karafka' require 'deimos/version' +require 'deimos/logging' require 'deimos/config/configuration' require 'deimos/producer' require 'deimos/active_record_producer' diff --git a/lib/deimos/backends/base.rb b/lib/deimos/backends/base.rb index 00f9da6b..aa88ae54 100644 --- a/lib/deimos/backends/base.rb +++ b/lib/deimos/backends/base.rb @@ -9,7 +9,8 @@ class << self # @param messages [Array] # @return [void] def publish(producer_class:, messages:) - Deimos.config.logger.info(log_message(messages)) + message = ::Deimos::Logging.messages_log_text(producer_class.karafka_config.payload_log, messages) + Deimos::Logging.log_info({message: 'Publishing Messages:'}.merge(message)) execute(producer_class: producer_class, messages: messages) end @@ -22,38 +23,6 @@ def execute(producer_class:, messages:) private - def log_message(messages) - log_message = { - message: 'Publishing messages', - topic: messages.first&.topic - } - - case Deimos.config.payload_log - when :keys - log_message.merge!( - payload_keys: messages.map(&:key) - ) - when :count - log_message.merge!( - payloads_count: messages.count - ) - when :headers - log_message.merge!( - payload_headers: messages.map(&:headers) - ) - else - log_message.merge!( - payloads: messages.map do |message| - { - payload: message.payload, - key: message.key - } - end - ) - end - - log_message - end end end end diff --git a/lib/deimos/consume/batch_consumption.rb b/lib/deimos/consume/batch_consumption.rb index d2462097..dc5f6261 100644 --- a/lib/deimos/consume/batch_consumption.rb +++ b/lib/deimos/consume/batch_consumption.rb @@ -84,7 +84,7 @@ def _handle_batch_error(exception, payloads, metadata) status:batch_error topic:#{metadata[:topic]} )) - Deimos.config.logger.warn( + Deimos::Logging.log_warn( message: 'Error consuming message batch', handler: self.class.name, metadata: metadata.except(:keys), diff --git a/lib/deimos/consume/message_consumption.rb b/lib/deimos/consume/message_consumption.rb index 5b2a925f..5d6816ec 100644 --- a/lib/deimos/consume/message_consumption.rb +++ b/lib/deimos/consume/message_consumption.rb @@ -37,7 +37,7 @@ def consume(_payload, _metadata) private def _received_message(payload, metadata) - Deimos.config.logger.info( + Deimos::Logging.log_info( message: 'Got Kafka event', payload: payload, metadata: metadata @@ -84,7 +84,7 @@ def _handle_success(time_taken, payload, metadata) status:success topic:#{metadata[:topic]} )) - Deimos.config.logger.info( + Deimos::Logging.log_info( message: 'Finished processing Kafka event', payload: payload, time_elapsed: time_taken, diff --git a/lib/deimos/logging.rb b/lib/deimos/logging.rb new file mode 100644 index 00000000..7d06cc24 --- /dev/null +++ b/lib/deimos/logging.rb @@ -0,0 +1,71 @@ +module Deimos + module Logging + class << self + + def log_add(method, msg) + Karafka.logger.tagged('Deimos') do |logger| + logger.send(method, msg.to_json) + end + + end + + def log_info(*args) + log_add(:info, *args) + end + + def log_debug(*args) + log_add(:debug, *args) + end + + def log_error(*args) + log_add(:error, *args) + end + + def log_warn(*args) + log_add(:warn, *args) + end + + def metadata_log_text(metadata) + metadata.to_h.slice(:timestamp, :offset, :first_offset, :last_offset, :partition, :topic, :size) + end + + def _payloads(messages) + + end + + def messages_log_text(payload_log, messages) + log_message = {} + + case payload_log + when :keys + keys = messages.map do |m| + m.respond_to?(:payload) ? m.key || m.payload['message_id'] : m[:key] || m[:payload]['message_id'] + end + log_message.merge!( + payload_keys: keys + ) + when :count + log_message.merge!( + payloads_count: messages.count + ) + when :headers + log_message.merge!( + payload_headers: messages.map { |m| m.respond_to?(:headers) ? m.headers : m[:headers] } + ) + else + log_message.merge!( + payloads: messages.map do |m| + { + payload: m.respond_to?(:payload) ? m.payload : m[:payload], + key: m.respond_to?(:payload) ? m.key : m[:key] + } + end + ) + end + + log_message + end + + end + end +end diff --git a/lib/deimos/schema_backends/avro_schema_registry.rb b/lib/deimos/schema_backends/avro_schema_registry.rb index e05d62bd..e67a6952 100644 --- a/lib/deimos/schema_backends/avro_schema_registry.rb +++ b/lib/deimos/schema_backends/avro_schema_registry.rb @@ -29,7 +29,7 @@ def avro_turf_messaging user: Deimos.config.schema.user, password: Deimos.config.schema.password, namespace: @namespace, - logger: Deimos.config.logger + logger: Karafka.logger ) end end diff --git a/lib/deimos/utils/db_poller.rb b/lib/deimos/utils/db_poller.rb index 531a94e7..2e71e0f4 100644 --- a/lib/deimos/utils/db_poller.rb +++ b/lib/deimos/utils/db_poller.rb @@ -16,7 +16,7 @@ def self.start! end executor = Sigurd::Executor.new(pollers, sleep_seconds: 5, - logger: Deimos.config.logger) + logger: Karafka.logger) signal_handler = Sigurd::SignalHandler.new(executor) signal_handler.run! end diff --git a/lib/deimos/utils/db_poller/base.rb b/lib/deimos/utils/db_poller/base.rb index a126dc9e..aaa7e446 100644 --- a/lib/deimos/utils/db_poller/base.rb +++ b/lib/deimos/utils/db_poller/base.rb @@ -58,14 +58,14 @@ def start if Deimos.config.producers.backend == :kafka_async Deimos.config.producers.backend = :kafka end - Deimos.config.logger.info('Starting...') + Deimos::Logging.log_info('Starting...') @signal_to_stop = false ActiveRecord::Base.connection.reconnect! unless ActiveRecord::Base.connection.open_transactions.positive? retrieve_poll_info loop do if @signal_to_stop - Deimos.config.logger.info('Shutting down') + Deimos::Logging.log_info('Shutting down') break end process_updates if should_run? @@ -95,7 +95,7 @@ def should_run? # Stop the poll. # @return [void] def stop - Deimos.config.logger.info('Received signal to stop') + Deimos::Logging.log_info('Received signal to stop') @signal_to_stop = true end @@ -111,9 +111,9 @@ def process_updates # @param span [Object] # @return [Boolean] def handle_message_too_large(exception, batch, status, span) - Deimos.config.logger.error("Error publishing through DB Poller: #{exception.message}") + Deimos::Logging.log_error("Error publishing through DB Poller: #{exception.message}") if @config.skip_too_large_messages - Deimos.config.logger.error("Skipping messages #{batch.map(&:id).join(', ')} since they are too large") + Deimos::Logging.log_error("Skipping messages #{batch.map(&:id).join(', ')} since they are too large") Deimos.config.tracer&.set_error(span, exception) status.batches_errored += 1 true @@ -145,13 +145,13 @@ def process_batch_with_span(batch, status) sleep(0.5) retry rescue StandardError => e - Deimos.config.logger.error("Error publishing through DB poller: #{e.message}}") + Deimos::Logging.log_error("Error publishing through DB poller: #{e.message}}") if @config.retries.nil? || retries < @config.retries retries += 1 sleep(0.5) retry else - Deimos.config.logger.error('Retries exceeded, moving on to next batch') + Deimos::Logging.log_error('Retries exceeded, moving on to next batch') Deimos.config.tracer&.set_error(span, e) status.batches_errored += 1 return false diff --git a/lib/deimos/utils/db_poller/state_based.rb b/lib/deimos/utils/db_poller/state_based.rb index d6ebf5cb..1085cd58 100644 --- a/lib/deimos/utils/db_poller/state_based.rb +++ b/lib/deimos/utils/db_poller/state_based.rb @@ -10,14 +10,14 @@ class StateBased < Base # Send messages for updated data. # @return [void] def process_updates - Deimos.config.logger.info("Polling #{log_identifier}") + Deimos::Logging.log_info("Polling #{log_identifier}") status = PollStatus.new(0, 0, 0) first_batch = true # poll_query gets all the relevant data from the database, as defined # by the producer itself. loop do - Deimos.config.logger.debug("Polling #{log_identifier}, batch #{status.current_batch}") + Deimos::Logging.log_debug("Polling #{log_identifier}, batch #{status.current_batch}") batch = fetch_results.to_a break if batch.empty? @@ -29,7 +29,7 @@ def process_updates # If there were no results at all, we update last_sent so that we still get a wait # before the next poll. @info.touch(:last_sent) if first_batch - Deimos.config.logger.info("Poll #{log_identifier} complete (#{status.report}") + Deimos::Logging.log_info("Poll #{log_identifier} complete (#{status.report}") end # @return [ActiveRecord::Relation] diff --git a/lib/deimos/utils/db_poller/time_based.rb b/lib/deimos/utils/db_poller/time_based.rb index 7834d310..6b116498 100644 --- a/lib/deimos/utils/db_poller/time_based.rb +++ b/lib/deimos/utils/db_poller/time_based.rb @@ -28,14 +28,14 @@ def process_and_touch_info(batch, status) def process_updates time_from = @config.full_table ? Time.new(0) : @info.last_sent.in_time_zone time_to = Time.zone.now - @config.delay_time - Deimos.config.logger.info("Polling #{log_identifier} from #{time_from} to #{time_to}") + Deimos::Logging.log_info("Polling #{log_identifier} from #{time_from} to #{time_to}") status = PollStatus.new(0, 0, 0) first_batch = true # poll_query gets all the relevant data from the database, as defined # by the producer itself. loop do - Deimos.config.logger.debug("Polling #{log_identifier}, batch #{status.current_batch}") + Deimos::Logging.log_debug("Polling #{log_identifier}, batch #{status.current_batch}") batch = fetch_results(time_from, time_to).to_a break if batch.empty? @@ -47,7 +47,7 @@ def process_updates # If there were no results at all, we update last_sent so that we still get a wait # before the next poll. @info.touch(:last_sent) if first_batch - Deimos.config.logger.info("Poll #{log_identifier} complete at #{time_to} (#{status.report})") + Deimos::Logging.log_info("Poll #{log_identifier} complete at #{time_to} (#{status.report})") end # @param time_from [ActiveSupport::TimeWithZone] diff --git a/lib/deimos/utils/deadlock_retry.rb b/lib/deimos/utils/deadlock_retry.rb index d1a6405b..f980e0f6 100644 --- a/lib/deimos/utils/deadlock_retry.rb +++ b/lib/deimos/utils/deadlock_retry.rb @@ -45,7 +45,7 @@ def wrap(tags=[]) # Reraise if all retries exhausted raise if count <= 0 - Deimos.config.logger.warn( + Deimos::Logging.log_warn( message: 'Deadlock encountered when trying to execute query. '\ "Retrying. #{count} attempt(s) remaining", tags: tags diff --git a/spec/backends/base_spec.rb b/spec/backends/base_spec.rb index ac742d89..bbe108bc 100644 --- a/spec/backends/base_spec.rb +++ b/spec/backends/base_spec.rb @@ -8,27 +8,4 @@ described_class.publish(producer_class: MyProducer, messages: messages) end - describe 'payload_log method' do - it 'should return whole payload (default behavior)' do - log_message = described_class.send(:log_message, messages) - expect(log_message[:payloads].count).to eq(3) - expect(log_message[:payloads].first[:payload]).to eq({ 'foo' => 1 }) - expect(log_message[:payloads].first[:key]).to eq('foo1') - end - - it 'should return only keys of messages' do - Deimos.config.payload_log = :keys - log_message = described_class.send(:log_message, messages) - expect(log_message[:payload_keys].count).to eq(3) - expect(log_message[:payload_keys]).to be_a(Array) - expect(log_message[:payload_keys].first).to eq('foo1') - end - - it 'should return only messages count' do - Deimos.config.payload_log = :count - log_message = described_class.send(:log_message, messages) - expect(log_message[:payloads_count]).to be_a(Integer) - expect(log_message[:payloads_count]).to eq(3) - end - end end diff --git a/spec/batch_consumer_spec.rb b/spec/batch_consumer_spec.rb index b742915a..3d8a9fe0 100644 --- a/spec/batch_consumer_spec.rb +++ b/spec/batch_consumer_spec.rb @@ -251,18 +251,11 @@ def consume_batch(_payloads, _metadata) 'timestamp' => 2.minutes.ago.to_s, 'message_id' => 'two' } ] - allow(Deimos.config.logger). - to receive(:info) - - expect(Deimos.config.logger). - to receive(:info). - with(hash_including( - message_ids: [ - { key: 1, message_id: 'one' }, - { key: 2, message_id: 'two' } - ] - )). - twice + allow(Deimos::Logging).to receive(:log_info) + + expect(Deimos::Logging). + to receive(:log_info). + with(hash_including(payload_keys: ["1", "2"])) test_consume_batch('my_batch_consume_topic', batch_with_message_id, keys: [1, 2]) end diff --git a/spec/logging_spec.rb b/spec/logging_spec.rb new file mode 100644 index 00000000..b9267151 --- /dev/null +++ b/spec/logging_spec.rb @@ -0,0 +1,25 @@ +RSpec.describe Deimos::Logging do + include_context 'with publish_backend' + describe '#messages_log_text' do + it 'should return whole payload (default behavior)' do + log_message = described_class.messages_log_text(:payloads, messages) + expect(log_message[:payloads].count).to eq(3) + expect(log_message[:payloads].first[:payload]).to eq({ some_int: 1, test_id: 'foo1' }) + expect(log_message[:payloads].first[:key]).to eq('foo1') + end + + it 'should return only keys of messages' do + log_message = described_class.messages_log_text(:keys, messages) + expect(log_message[:payload_keys].count).to eq(3) + expect(log_message[:payload_keys]).to be_a(Array) + expect(log_message[:payload_keys].first).to eq('foo1') + end + + it 'should return only messages count' do + log_message = described_class.messages_log_text(:count, messages) + expect(log_message[:payloads_count]).to be_a(Integer) + expect(log_message[:payloads_count]).to eq(3) + end + end + +end From dfbf84bea49bb8eb300e1312575f42af59a4f85b Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 13:44:00 -0400 Subject: [PATCH 03/18] Remove features no longer used --- lib/deimos.rb | 4 - lib/deimos/batch_consumer.rb | 7 - lib/deimos/config/configuration.rb | 15 -- lib/deimos/utils/inline_consumer.rb | 158 --------------- lib/deimos/utils/lag_reporter.rb | 186 ------------------ lib/deimos/utils/schema_controller_mixin.rb | 129 ------------ spec/utils/inline_consumer_spec.rb | 31 --- spec/utils/lag_reporter_spec.rb | 76 ------- spec/utils/platform_schema_validation_spec.rb | 0 spec/utils/schema_controller_mixin_spec.rb | 84 -------- 10 files changed, 690 deletions(-) delete mode 100644 lib/deimos/batch_consumer.rb delete mode 100644 lib/deimos/utils/inline_consumer.rb delete mode 100644 lib/deimos/utils/lag_reporter.rb delete mode 100644 lib/deimos/utils/schema_controller_mixin.rb delete mode 100644 spec/utils/inline_consumer_spec.rb delete mode 100644 spec/utils/lag_reporter_spec.rb delete mode 100644 spec/utils/platform_schema_validation_spec.rb delete mode 100644 spec/utils/schema_controller_mixin_spec.rb diff --git a/lib/deimos.rb b/lib/deimos.rb index 467f4174..272854ed 100644 --- a/lib/deimos.rb +++ b/lib/deimos.rb @@ -10,9 +10,7 @@ require 'deimos/active_record_producer' require 'deimos/active_record_consumer' require 'deimos/consumer' -require 'deimos/batch_consumer' require 'deimos/instrumentation' -require 'deimos/utils/lag_reporter' require 'deimos/backends/base' require 'deimos/backends/kafka' @@ -26,7 +24,6 @@ require 'deimos/railtie' if defined?(Rails) -require 'deimos/utils/schema_controller_mixin' if defined?(ActionController) if defined?(ActiveRecord) require 'deimos/kafka_source' @@ -37,7 +34,6 @@ require 'deimos/utils/db_poller' end -require 'deimos/utils/inline_consumer' require 'yaml' require 'erb' diff --git a/lib/deimos/batch_consumer.rb b/lib/deimos/batch_consumer.rb deleted file mode 100644 index e9f3bb30..00000000 --- a/lib/deimos/batch_consumer.rb +++ /dev/null @@ -1,7 +0,0 @@ -# frozen_string_literal: true - -module Deimos - # @deprecated Use Deimos::Consumer with `delivery: inline_batch` configured instead - class BatchConsumer < Consumer - end -end diff --git a/lib/deimos/config/configuration.rb b/lib/deimos/config/configuration.rb index d16ebb0b..2deb167f 100644 --- a/lib/deimos/config/configuration.rb +++ b/lib/deimos/config/configuration.rb @@ -541,20 +541,5 @@ def self.configure_producer_or_consumer(kafka_config) setting :poller_class, nil end - deprecate 'kafka_logger', 'kafka.logger' - deprecate 'reraise_consumer_errors', 'consumers.reraise_errors' - deprecate 'schema_registry_url', 'schema.registry_url' - deprecate 'seed_broker', 'kafka.seed_brokers' - deprecate 'schema_path', 'schema.path' - deprecate 'producer_schema_namespace', 'producers.schema_namespace' - deprecate 'producer_topic_prefix', 'producers.topic_prefix' - deprecate 'disable_producers', 'producers.disabled' - deprecate 'ssl_enabled', 'kafka.ssl.enabled' - deprecate 'ssl_ca_cert', 'kafka.ssl.ca_cert' - deprecate 'ssl_client_cert', 'kafka.ssl.client_cert' - deprecate 'ssl_client_cert_key', 'kafka.ssl.client_cert_key' - deprecate 'publish_backend', 'producers.backend' - deprecate 'report_lag', 'consumers.report_lag' - end end diff --git a/lib/deimos/utils/inline_consumer.rb b/lib/deimos/utils/inline_consumer.rb deleted file mode 100644 index 9e40e4de..00000000 --- a/lib/deimos/utils/inline_consumer.rb +++ /dev/null @@ -1,158 +0,0 @@ -# frozen_string_literal: true - -# Class to consume messages. Can be used with integration testing frameworks. -# Assumes that you have a topic with only one partition. -module Deimos - module Utils - # Listener that can seek to get the last X messages in a topic. - class SeekListener < Phobos::Listener - # @return [Integer] - MAX_SEEK_RETRIES = 3 - # @return [Integer] - attr_accessor :num_messages - - # @return [void] - def start_listener - @num_messages ||= 10 - @consumer = create_kafka_consumer - @consumer.subscribe(topic, @subscribe_opts) - attempt = 0 - - begin - attempt += 1 - last_offset = @kafka_client.last_offset_for(topic, 0) - offset = last_offset - num_messages - if offset.positive? - Deimos.config.logger.info("Seeking to #{offset}") - @consumer.seek(topic, 0, offset) - end - rescue StandardError => e - if attempt < MAX_SEEK_RETRIES - sleep(1.seconds * attempt) - retry - end - log_error("Could not seek to offset: #{e.message} after #{MAX_SEEK_RETRIES} retries", listener_metadata) - end - - instrument('listener.start_handler', listener_metadata) do - @handler_class.start(@kafka_client) - end - log_info('Listener started', listener_metadata) - end - end - - # Class to return the messages consumed. - class MessageBankHandler < Deimos::Consumer - include Phobos::Handler - - cattr_accessor :total_messages - - # @param klass [Class] - # @return [void] - def self.config_class=(klass) - self.config.merge!(klass.config) - end - - # @param _kafka_client [Kafka::Client] - # @return [void] - def self.start(_kafka_client) - self.total_messages = [] - end - - # @param payload [Hash] - # @param metadata [Hash] - def consume(payload, metadata) - self.class.total_messages << { - key: metadata[:key], - payload: payload - } - end - end - - # Class which can process/consume messages inline. - class InlineConsumer - # @return [Integer] - MAX_MESSAGE_WAIT_TIME = 1.second - # @return [Integer] - MAX_TOPIC_WAIT_TIME = 10.seconds - - # Get the last X messages from a topic. You can specify a subclass of - # Deimos::Consumer or Deimos::Producer, or provide the - # schema, namespace and key_config directly. - # @param topic [String] - # @param config_class [Class,Class] - # @param schema [String] - # @param namespace [String] - # @param key_config [Hash] - # @param num_messages [Integer] - # @return [Array] - def self.get_messages_for(topic:, schema: nil, namespace: nil, key_config: nil, - config_class: nil, num_messages: 10) - if config_class - MessageBankHandler.config_class = config_class - elsif schema.nil? || key_config.nil? - raise 'You must specify either a config_class or a schema, namespace and key_config!' - else - MessageBankHandler.class_eval do - schema schema - namespace namespace - key_config key_config - @decoder = nil - @key_decoder = nil - end - end - self.consume(topic: topic, - frk_consumer: MessageBankHandler, - num_messages: num_messages) - messages = MessageBankHandler.total_messages - messages.size <= num_messages ? messages : messages[-num_messages..-1] - end - - # Consume the last X messages from a topic. - # @param topic [String] - # @param frk_consumer [Class] - # @param num_messages [Integer] If this number is >= the number - # of messages in the topic, all messages will be consumed. - # @return [void] - def self.consume(topic:, frk_consumer:, num_messages: 10) - listener = SeekListener.new( - handler: frk_consumer, - group_id: SecureRandom.hex, - topic: topic, - heartbeat_interval: 1 - ) - listener.num_messages = num_messages - - # Add the start_time and last_message_time attributes to the - # consumer class so we can kill it if it's gone on too long - class << frk_consumer - attr_accessor :start_time, :last_message_time - end - - subscribers = [] - subscribers << ActiveSupport::Notifications. - subscribe('phobos.listener.process_message') do - frk_consumer.last_message_time = Time.zone.now - end - subscribers << ActiveSupport::Notifications. - subscribe('phobos.listener.start_handler') do - frk_consumer.start_time = Time.zone.now - frk_consumer.last_message_time = nil - end - subscribers << ActiveSupport::Notifications. - subscribe('heartbeat.consumer.kafka') do - if frk_consumer.last_message_time - if Time.zone.now - frk_consumer.last_message_time > MAX_MESSAGE_WAIT_TIME - raise Phobos::AbortError - end - elsif Time.zone.now - frk_consumer.start_time > MAX_TOPIC_WAIT_TIME - Deimos.config.logger.error('Aborting - initial wait too long') - raise Phobos::AbortError - end - end - listener.start - subscribers.each { |s| ActiveSupport::Notifications.unsubscribe(s) } - end - end - end -end diff --git a/lib/deimos/utils/lag_reporter.rb b/lib/deimos/utils/lag_reporter.rb deleted file mode 100644 index 931c0b83..00000000 --- a/lib/deimos/utils/lag_reporter.rb +++ /dev/null @@ -1,186 +0,0 @@ -# frozen_string_literal: true - -require 'mutex_m' - -# :nodoc: -module Deimos - module Utils - # Class that manages reporting lag. - class LagReporter - extend Mutex_m - - # Class that has a list of topics - class ConsumerGroup - # @return [Hash] - attr_accessor :topics - # @return [String] - attr_accessor :id - - # @param id [String] - def initialize(id) - self.id = id - self.topics = {} - end - - # @param topic [String] - # @param partition [Integer] - # @return [void] - def report_lag(topic, partition) - self.topics[topic.to_s] ||= Topic.new(topic, self) - self.topics[topic.to_s].report_lag(partition) - end - - # @param topic [String] - # @param partition [Integer] - # @param offset [Integer] - # @return [void] - def assign_current_offset(topic, partition, offset) - self.topics[topic.to_s] ||= Topic.new(topic, self) - self.topics[topic.to_s].assign_current_offset(partition, offset) - end - end - - # Topic which has a hash of partition => last known current offsets - class Topic - # @return [String] - attr_accessor :topic_name - # @return [Hash] - attr_accessor :partition_current_offsets - # @return [ConsumerGroup] - attr_accessor :consumer_group - - # @param topic_name [String] - # @param group [ConsumerGroup] - def initialize(topic_name, group) - self.topic_name = topic_name - self.consumer_group = group - self.partition_current_offsets = {} - end - - # @param partition [Integer] - # @param offset [Integer] - # @return [void] - def assign_current_offset(partition, offset) - self.partition_current_offsets[partition.to_i] = offset - end - - # @param partition [Integer] - # @param offset [Integer] - # @return [Integer] - def compute_lag(partition, offset) - begin - client = Phobos.create_kafka_client - last_offset = client.last_offset_for(self.topic_name, partition) - lag = last_offset - offset - rescue StandardError # don't do anything, just wait - Deimos.config.logger. - debug("Error computing lag for #{self.topic_name}, will retry") - end - lag || 0 - end - - # @param partition [Integer] - # @return [void] - def report_lag(partition) - current_offset = self.partition_current_offsets[partition.to_i] - return unless current_offset - - lag = compute_lag(partition, current_offset) - group = self.consumer_group.id - Deimos.config.logger. - debug("Sending lag: #{group}/#{partition}: #{lag}") - Deimos.config.metrics&.gauge('consumer_lag', lag, tags: %W( - consumer_group:#{group} - partition:#{partition} - topic:#{self.topic_name} - )) - end - end - - @groups = {} - - class << self - # Reset all group information. - # @return [void] - def reset - @groups = {} - end - - # offset_lag = event.payload.fetch(:offset_lag) - # group_id = event.payload.fetch(:group_id) - # topic = event.payload.fetch(:topic) - # partition = event.payload.fetch(:partition) - # @param payload [Hash] - # @return [void] - def message_processed(payload) - offset = payload[:offset] || payload[:last_offset] - topic = payload[:topic] - group = payload[:group_id] - partition = payload[:partition] - - synchronize do - @groups[group.to_s] ||= ConsumerGroup.new(group) - @groups[group.to_s].assign_current_offset(topic, partition, offset) - end - end - - # @param payload [Hash] - # @return [void] - def offset_seek(payload) - offset = payload[:offset] - topic = payload[:topic] - group = payload[:group_id] - partition = payload[:partition] - - synchronize do - @groups[group.to_s] ||= ConsumerGroup.new(group) - @groups[group.to_s].assign_current_offset(topic, partition, offset) - end - end - - # @param payload [Hash] - # @return [void] - def heartbeat(payload) - group = payload[:group_id] - synchronize do - @groups[group.to_s] ||= ConsumerGroup.new(group) - consumer_group = @groups[group.to_s] - payload[:topic_partitions].each do |topic, partitions| - partitions.each do |partition| - consumer_group.report_lag(topic, partition) - end - end - end - end - end - end - end - - ActiveSupport::Notifications.subscribe('start_process_message.consumer.kafka') do |*args| - next unless Deimos.config.consumers.report_lag - - event = ActiveSupport::Notifications::Event.new(*args) - Deimos::Utils::LagReporter.message_processed(event.payload) - end - - ActiveSupport::Notifications.subscribe('start_process_batch.consumer.kafka') do |*args| - next unless Deimos.config.consumers.report_lag - - event = ActiveSupport::Notifications::Event.new(*args) - Deimos::Utils::LagReporter.message_processed(event.payload) - end - - ActiveSupport::Notifications.subscribe('seek.consumer.kafka') do |*args| - next unless Deimos.config.consumers.report_lag - - event = ActiveSupport::Notifications::Event.new(*args) - Deimos::Utils::LagReporter.offset_seek(event.payload) - end - - ActiveSupport::Notifications.subscribe('heartbeat.consumer.kafka') do |*args| - next unless Deimos.config.consumers.report_lag - - event = ActiveSupport::Notifications::Event.new(*args) - Deimos::Utils::LagReporter.heartbeat(event.payload) - end -end diff --git a/lib/deimos/utils/schema_controller_mixin.rb b/lib/deimos/utils/schema_controller_mixin.rb deleted file mode 100644 index 23f3ead1..00000000 --- a/lib/deimos/utils/schema_controller_mixin.rb +++ /dev/null @@ -1,129 +0,0 @@ -# frozen_string_literal: true - -module Deimos - module Utils - # Mixin to automatically decode schema-encoded payloads when given the correct content type, - # and provide the `render_schema` method to encode the payload for responses. - module SchemaControllerMixin - extend ActiveSupport::Concern - - included do - Mime::Type.register('avro/binary', :avro) - - attr_accessor :payload - - if respond_to?(:before_filter) - before_filter(:decode_schema, if: :schema_format?) - else - before_action(:decode_schema, if: :schema_format?) - end - end - - # :nodoc: - module ClassMethods - # @return [Hash>] - def schema_mapping - @schema_mapping ||= {} - end - - # Indicate which schemas should be assigned to actions. - # @param actions [Symbol] - # @param kwactions [String] - # @param request [String] - # @param response [String] - # @return [void] - def schemas(*actions, request: nil, response: nil, **kwactions) - actions.each do |action| - request ||= action.to_s.titleize - response ||= action.to_s.titleize - schema_mapping[action.to_s] = { request: request, response: response } - end - kwactions.each do |key, val| - schema_mapping[key.to_s] = { request: val, response: val } - end - end - - # @return [Hash] - def namespaces - @namespaces ||= {} - end - - # Set the namespace for both requests and responses. - # @param name [String] - # @return [void] - def namespace(name) - request_namespace(name) - response_namespace(name) - end - - # Set the namespace for requests. - # @param name [String] - # @return [void] - def request_namespace(name) - namespaces[:request] = name - end - - # Set the namespace for repsonses. - # @param name [String] - # @return [void] - def response_namespace(name) - namespaces[:response] = name - end - end - - # @return [Boolean] - def schema_format? - request.content_type == Deimos.schema_backend_class.content_type - end - - # Get the namespace from either an existing instance variable, or tease it out of the schema. - # @param type [Symbol] :request or :response - # @return [Array] the namespace and schema. - def parse_namespace(type) - namespace = self.class.namespaces[type] - schema = self.class.schema_mapping[params['action']][type] - if schema.nil? - raise "No #{type} schema defined for #{params[:controller]}##{params[:action]}!" - end - - if namespace.nil? - last_period = schema.rindex('.') - namespace, schema = schema.split(last_period) - end - if namespace.nil? || schema.nil? - raise "No request namespace defined for #{params[:controller]}##{params[:action]}!" - end - - [namespace, schema] - end - - # Decode the payload with the parameters. - # @return [void] - def decode_schema - namespace, schema = parse_namespace(:request) - decoder = Deimos.schema_backend(schema: schema, namespace: namespace) - @payload = decoder.decode(request.body.read).with_indifferent_access - @payload.each do |key, value| - Deimos.config.tracer&.set_tag("body.#{key}", value) - end - if Deimos.config.schema.use_schema_classes - @payload = Utils::SchemaClass.instance(@payload, schema, namespace) - end - request.body.rewind if request.body.respond_to?(:rewind) - end - - # Render a hash into a payload as specified by the configured schema and namespace. - # @param payload [Hash] - # @param schema [String] - # @param namespace [String] - # @return [void] - def render_schema(payload, schema: nil, namespace: nil) - namespace, schema = parse_namespace(:response) if !schema && !namespace - encoder = Deimos.schema_backend(schema: schema, namespace: namespace) - encoded = encoder.encode(payload.to_h, topic: "#{namespace}.#{schema}") - response.headers['Content-Type'] = encoder.class.content_type - send_data(encoded) - end - end - end -end diff --git a/spec/utils/inline_consumer_spec.rb b/spec/utils/inline_consumer_spec.rb deleted file mode 100644 index 9b722715..00000000 --- a/spec/utils/inline_consumer_spec.rb +++ /dev/null @@ -1,31 +0,0 @@ -# frozen_string_literal: true - -describe Deimos::Utils::SeekListener do - - describe '#start_listener' do - let(:consumer) { instance_double(Kafka::Consumer) } - let(:handler) { class_double(Deimos::Utils::MessageBankHandler) } - - before(:each) do - allow(handler).to receive(:start) - allow(consumer).to receive(:subscribe) - allow_any_instance_of(Phobos::Listener).to receive(:create_kafka_consumer).and_return(consumer) - allow_any_instance_of(Kafka::Client).to receive(:last_offset_for).and_return(100) - stub_const('Deimos::Utils::SeekListener::MAX_SEEK_RETRIES', 2) - end - - it 'should seek offset' do - allow(consumer).to receive(:seek) - expect(consumer).to receive(:seek).once - seek_listener = described_class.new(handler: handler, group_id: 999, topic: 'test_topic') - seek_listener.start_listener - end - - it 'should retry on errors when seeking offset' do - allow(consumer).to receive(:seek).and_raise(StandardError) - expect(consumer).to receive(:seek).twice - seek_listener = described_class.new(handler: handler, group_id: 999, topic: 'test_topic') - seek_listener.start_listener - end - end -end diff --git a/spec/utils/lag_reporter_spec.rb b/spec/utils/lag_reporter_spec.rb deleted file mode 100644 index 3ced6982..00000000 --- a/spec/utils/lag_reporter_spec.rb +++ /dev/null @@ -1,76 +0,0 @@ -# frozen_string_literal: true - -describe Deimos::Utils::LagReporter do - - let(:kafka_client) { instance_double(Kafka::Client) } - let(:partition1_tags) { %w(consumer_group:group1 partition:1 topic:my-topic) } - let(:partition2_tags) { %w(consumer_group:group1 partition:2 topic:my-topic) } - - before(:each) do - allow(kafka_client).to receive(:last_offset_for).and_return(100) - allow(Phobos).to receive(:create_kafka_client).and_return(kafka_client) - Deimos.configure { |c| c.consumers.report_lag = true } - end - - after(:each) do - described_class.reset - Deimos.configure { |c| c.consumers.report_lag = false } - end - - it 'should not report lag before ready' do - expect(Deimos.config.metrics).not_to receive(:gauge) - ActiveSupport::Notifications.instrument( - 'heartbeat.consumer.kafka', - group_id: 'group1', topic_partitions: { 'my-topic': [1] } - ) - end - - it 'should report lag' do - expect(Deimos.config.metrics).to receive(:gauge).ordered.twice. - with('consumer_lag', 95, tags: partition1_tags) - expect(Deimos.config.metrics).to receive(:gauge).ordered.once. - with('consumer_lag', 80, tags: partition2_tags) - expect(Deimos.config.metrics).to receive(:gauge).ordered.once. - with('consumer_lag', 0, tags: partition2_tags) - ActiveSupport::Notifications.instrument( - 'seek.consumer.kafka', - offset: 5, topic: 'my-topic', group_id: 'group1', partition: 1 - ) - ActiveSupport::Notifications.instrument( - 'start_process_message.consumer.kafka', - offset: 20, topic: 'my-topic', group_id: 'group1', partition: 2 - ) - ActiveSupport::Notifications.instrument( - 'heartbeat.consumer.kafka', - group_id: 'group1', topic_partitions: { 'my-topic': [1, 2] } - ) - ActiveSupport::Notifications.instrument( - 'start_process_batch.consumer.kafka', - last_offset: 100, topic: 'my-topic', group_id: 'group1', partition: 2 - ) - ActiveSupport::Notifications.instrument( - 'heartbeat.consumer.kafka', - group_id: 'group1', topic_partitions: { 'my-topic': [1, 2] } - ) - end - - it 'should update lag after heartbeat' do - expect(Deimos.config.metrics).to receive(:gauge).ordered.once. - with('consumer_lag', 94, tags: partition2_tags) - expect(Deimos.config.metrics).to receive(:gauge).ordered.once. - with('consumer_lag', 95, tags: partition2_tags) - ActiveSupport::Notifications.instrument( - 'seek.consumer.kafka', - offset: 6, topic: 'my-topic', group_id: 'group1', partition: 2 - ) - ActiveSupport::Notifications.instrument( - 'heartbeat.consumer.kafka', - group_id: 'group1', topic_partitions: { 'my-topic': [1, 2] } - ) - allow(kafka_client).to receive(:last_offset_for).and_return(101) - ActiveSupport::Notifications.instrument( - 'heartbeat.consumer.kafka', - group_id: 'group1', topic_partitions: { 'my-topic': [1, 2] } - ) - end -end diff --git a/spec/utils/platform_schema_validation_spec.rb b/spec/utils/platform_schema_validation_spec.rb deleted file mode 100644 index e69de29b..00000000 diff --git a/spec/utils/schema_controller_mixin_spec.rb b/spec/utils/schema_controller_mixin_spec.rb deleted file mode 100644 index 4ae6385b..00000000 --- a/spec/utils/schema_controller_mixin_spec.rb +++ /dev/null @@ -1,84 +0,0 @@ -# frozen_string_literal: true - -require 'deimos/utils/schema_controller_mixin' -require 'deimos/schema_backends/avro_local' - -RSpec.describe Deimos::Utils::SchemaControllerMixin, type: :controller do - - before(:each) do - Deimos.configure do - schema.backend(:avro_local) - end - end - - controller(ActionController::Base) do - include Deimos::Utils::SchemaControllerMixin # rubocop:disable RSpec/DescribedClass - - request_namespace 'com.my-namespace.request' - response_namespace 'com.my-namespace.response' - schemas :index, :show - schemas create: 'CreateTopic' - schemas :update, request: 'UpdateRequest', response: 'UpdateResponse' - - # :nodoc: - def index - render_schema({ 'response_id' => payload[:request_id] + ' mom' }) - end - - # :nodoc: - def show - render_schema({ 'response_id' => payload[:request_id] + ' dad' }) - end - - # :nodoc: - def create - render_schema({ 'response_id' => payload[:request_id] + ' bro' }) - end - - # :nodoc: - def update - render_schema({ 'update_response_id' => payload[:update_request_id] + ' sis' }) - end - end - - it 'should render the correct response for index' do - request_backend = Deimos.schema_backend(schema: 'Index', - namespace: 'com.my-namespace.request') - response_backend = Deimos.schema_backend(schema: 'Index', - namespace: 'com.my-namespace.response') - request.content_type = 'avro/binary' - get :index, body: request_backend.encode({ 'request_id' => 'hi' }) - expect(response_backend.decode(response.body)).to eq({ 'response_id' => 'hi mom' }) - end - - it 'should render the correct response for show' do - request_backend = Deimos.schema_backend(schema: 'Index', - namespace: 'com.my-namespace.request') - response_backend = Deimos.schema_backend(schema: 'Index', - namespace: 'com.my-namespace.response') - request.content_type = 'avro/binary' - get :show, params: { id: 1 }, body: request_backend.encode({ 'request_id' => 'hi' }) - expect(response_backend.decode(response.body)).to eq({ 'response_id' => 'hi dad' }) - end - - it 'should render the correct response for update' do - request_backend = Deimos.schema_backend(schema: 'UpdateRequest', - namespace: 'com.my-namespace.request') - response_backend = Deimos.schema_backend(schema: 'UpdateResponse', - namespace: 'com.my-namespace.response') - request.content_type = 'avro/binary' - post :update, params: { id: 1 }, body: request_backend.encode({ 'update_request_id' => 'hi' }) - expect(response_backend.decode(response.body)).to eq({ 'update_response_id' => 'hi sis' }) - end - - it 'should render the correct response for create' do - request_backend = Deimos.schema_backend(schema: 'CreateTopic', - namespace: 'com.my-namespace.request') - response_backend = Deimos.schema_backend(schema: 'CreateTopic', - namespace: 'com.my-namespace.response') - request.content_type = 'avro/binary' - post :create, params: { id: 1 }, body: request_backend.encode({ 'request_id' => 'hi' }) - expect(response_backend.decode(response.body)).to eq({ 'response_id' => 'hi bro' }) - end - -end From 7b9dbbabe730966c576482c20794fec64ce08767 Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 14:00:02 -0400 Subject: [PATCH 04/18] Introduce Karafka routing features and update configuration --- lib/deimos.rb | 28 ++++++ .../batch_consumption.rb | 12 +-- .../message_consumption.rb | 3 + lib/deimos/active_record_consumer.rb | 62 +++++++------ lib/deimos/active_record_producer.rb | 25 ++++-- lib/deimos/config/configuration.rb | 34 +------ lib/deimos/ext/consumer_route.rb | 35 ++++++++ lib/deimos/ext/producer_route.rb | 22 +++++ lib/deimos/ext/schema_route.rb | 70 +++++++++++++++ lib/deimos/producer.rb | 28 +++--- lib/deimos/transcoder.rb | 88 +++++++++++++++++++ lib/deimos/utils/db_poller/time_based.rb | 2 +- 12 files changed, 318 insertions(+), 91 deletions(-) create mode 100644 lib/deimos/ext/consumer_route.rb create mode 100644 lib/deimos/ext/producer_route.rb create mode 100644 lib/deimos/ext/schema_route.rb create mode 100644 lib/deimos/transcoder.rb diff --git a/lib/deimos.rb b/lib/deimos.rb index 272854ed..5880f4e5 100644 --- a/lib/deimos.rb +++ b/lib/deimos.rb @@ -22,6 +22,9 @@ require 'deimos/schema_class/enum' require 'deimos/schema_class/record' +require 'deimos/ext/schema_route' +require 'deimos/ext/consumer_route' +require 'deimos/ext/producer_route' require 'deimos/railtie' if defined?(Rails) @@ -112,6 +115,31 @@ def start_db_backend!(thread_count: 1) end end end + # @return [Array key } else - { self.class.config[:key_field] => key } + { self.topic.key_config[:field].to_s => key } end end @@ -205,14 +205,14 @@ def build_records(messages) attrs = attrs.merge(record_key(m.key)) next unless attrs - col = if @klass.column_names.include?(self.class.bulk_import_id_column.to_s) - self.class.bulk_import_id_column + col = if @klass.column_names.include?(self.bulk_import_id_column.to_s) + self.bulk_import_id_column end BatchRecord.new(klass: @klass, attributes: attrs, bulk_import_column: col, - bulk_import_id_generator: self.class.bulk_import_id_generator) + bulk_import_id_generator: self.bulk_import_id_generator) end BatchRecordList.new(records.compact) end diff --git a/lib/deimos/active_record_consume/message_consumption.rb b/lib/deimos/active_record_consume/message_consumption.rb index 29820193..9d58ff47 100644 --- a/lib/deimos/active_record_consume/message_consumption.rb +++ b/lib/deimos/active_record_consume/message_consumption.rb @@ -1,10 +1,13 @@ # frozen_string_literal: true +require 'deimos/consume/message_consumption' + module Deimos module ActiveRecordConsume # Methods for consuming individual messages and saving them to the database # as ActiveRecord instances. module MessageConsumption + include Deimos::Consume::MessageConsumption # Find the record specified by the given payload and key. # Default is to use the primary key column and the value of the first # field in the key. diff --git a/lib/deimos/active_record_consumer.rb b/lib/deimos/active_record_consumer.rb index b45e2d99..63bfd72b 100644 --- a/lib/deimos/active_record_consumer.rb +++ b/lib/deimos/active_record_consumer.rb @@ -30,26 +30,6 @@ def record_class(klass) config[:record_class] = klass end - # @return [String,nil] - def bulk_import_id_column - config[:bulk_import_id_column] - end - - # @return [Proc] - def bulk_import_id_generator - config[:bulk_import_id_generator] - end - - # @return [Boolean] - def replace_associations - config[:replace_associations] - end - - # @return [Boolean] - def save_associations_first - config[:save_associations_first] - end - # @param val [Boolean] Turn pre-compaction of the batch on or off. If true, # only the last message for each unique key in a batch is processed. # @return [void] @@ -62,18 +42,48 @@ def compacted(val) def max_db_batch_size(limit) config[:max_db_batch_size] = limit end + + end + + # @return [Boolean] + def replace_associations + self.topic.replace_associations + end + + # @return [String,nil] + def bulk_import_id_column + self.topic.bulk_import_id_column + end + + # @return [Proc] + def bulk_import_id_generator + topic.bulk_import_id_generator + end + + # @return [Boolean] + def save_associations_first + topic.save_associations_first + end + + def key_decoder + self.topic.serializers[:key]&.backend end # Setup def initialize @klass = self.class.config[:record_class] - @converter = ActiveRecordConsume::SchemaModelConverter.new(self.class.decoder, @klass) + @compacted = self.class.config[:compacted] != false + end - if self.class.config[:key_schema] - @key_converter = ActiveRecordConsume::SchemaModelConverter.new(self.class.key_decoder, @klass) - end + def converter + decoder = self.topic.deserializers[:payload].backend + @converter ||= ActiveRecordConsume::SchemaModelConverter.new(decoder, @klass) + end - @compacted = self.class.config[:compacted] != false + def key_converter + decoder = self.topic.deserializers[:key]&.backend + return nil if decoder.nil? + @key_converter ||= ActiveRecordConsume::SchemaModelConverter.new(decoder, @klass) end # Override this method (with `super`) if you want to add/change the default @@ -82,7 +92,7 @@ def initialize # @param _key [String] # @return [Hash] def record_attributes(payload, _key=nil) - @converter.convert(payload) + self.converter.convert(payload) end # Override this message to conditionally save records diff --git a/lib/deimos/active_record_producer.rb b/lib/deimos/active_record_producer.rb index ea285c12..b35fa569 100644 --- a/lib/deimos/active_record_producer.rb +++ b/lib/deimos/active_record_producer.rb @@ -18,9 +18,11 @@ class << self # a record object, refetch the record to pass into the `generate_payload` # method. # @return [void] - def record_class(klass, refetch: true) - config[:record_class] = klass - config[:refetch_record] = refetch + def record_class(klass=nil, refetch: true) + return @record_class if klass.nil? + + @record_class = klass + @refetch_record = refetch end # @param record [ActiveRecord::Base] @@ -34,14 +36,14 @@ def send_event(record, force_send: false) # @param force_send [Boolean] # @return [void] def send_events(records, force_send: false) - primary_key = config[:record_class]&.primary_key + primary_key = @record_class&.primary_key messages = records.map do |record| if record.respond_to?(:attributes) attrs = record.attributes.with_indifferent_access else attrs = record.with_indifferent_access - if config[:refetch_record] && attrs[primary_key] - record = config[:record_class].find(attrs[primary_key]) + if @refetch_record && attrs[primary_key] + record = @record_class.find(attrs[primary_key]) end end generate_payload(attrs, record).with_indifferent_access @@ -50,6 +52,15 @@ def send_events(records, force_send: false) self.post_process(records) end + def config + Deimos.karafka_configs.find { |t| t.producer_class == self } + end + + def encoder + raise "No schema or namespace configured for #{self.name}" if config.nil? + config.deserializers[:payload].backend + end + # Generate the payload, given a list of attributes or a record.. # Can be overridden or added to by subclasses. # @param attributes [Hash] @@ -76,7 +87,7 @@ def generate_payload(attributes, _record) # than this value). # @return [ActiveRecord::Relation] def poll_query(time_from:, time_to:, column_name: :updated_at, min_id:) - klass = config[:record_class] + klass = @record_class table = ActiveRecord::Base.connection.quote_table_name(klass.table_name) column = ActiveRecord::Base.connection.quote_column_name(column_name) primary = ActiveRecord::Base.connection.quote_column_name(klass.primary_key) diff --git a/lib/deimos/config/configuration.rb b/lib/deimos/config/configuration.rb index 2deb167f..efb778be 100644 --- a/lib/deimos/config/configuration.rb +++ b/lib/deimos/config/configuration.rb @@ -3,7 +3,7 @@ require 'fig_tree' require_relative '../metrics/mock' require_relative '../tracing/mock' -require 'active_support/core_ext/numeric' +require 'active_support/core_ext/object' # :nodoc: module Deimos # rubocop:disable Metrics/ModuleLength @@ -14,12 +14,6 @@ module Deimos # rubocop:disable Metrics/ModuleLength if self.config.schema.use_schema_classes load_generated_schema_classes end - self.config.producer_objects.each do |producer| - configure_producer_or_consumer(producer) - end - self.config.consumer_objects.each do |consumer| - configure_producer_or_consumer(consumer) - end validate_consumers validate_db_backend if self.config.producers.backend == :db end @@ -70,32 +64,6 @@ def self.validate_consumers end end - # @!visibility private - # @param kafka_config [FigTree::ConfigStruct] - # rubocop:disable Metrics/PerceivedComplexity, Metrics/AbcSize - def self.configure_producer_or_consumer(kafka_config) - klass = kafka_config.class_name.constantize - klass.class_eval do - topic(kafka_config.topic) if kafka_config.topic.present? && klass.respond_to?(:topic) - schema(kafka_config.schema) if kafka_config.schema.present? - namespace(kafka_config.namespace) if kafka_config.namespace.present? - key_config(**kafka_config.key_config) if kafka_config.key_config.present? - schema_class_config(kafka_config.use_schema_classes) if kafka_config.use_schema_classes.present? - if kafka_config.respond_to?(:bulk_import_id_column) # consumer - klass.config.merge!( - bulk_import_id_column: kafka_config.bulk_import_id_column, - replace_associations: if kafka_config.replace_associations.nil? - Deimos.config.consumers.replace_associations - else - kafka_config.replace_associations - end, - bulk_import_id_generator: kafka_config.bulk_import_id_generator || - Deimos.config.consumers.bulk_import_id_generator, - save_associations_first: kafka_config.save_associations_first - ) - end - end - end # rubocop:enable Metrics/PerceivedComplexity, Metrics/AbcSize define_settings do diff --git a/lib/deimos/ext/consumer_route.rb b/lib/deimos/ext/consumer_route.rb new file mode 100644 index 00000000..d6d44bf1 --- /dev/null +++ b/lib/deimos/ext/consumer_route.rb @@ -0,0 +1,35 @@ +module Deimos + class ConsumerRoute < Karafka::Routing::Features::Base + module Topic + + FIELDS = %i(max_db_batch_size + bulk_import_id_column + replace_associations + bulk_import_id_generator + each_message + reraise_errors + fatal_error + save_associations_first + ) + Config = Struct.new(*FIELDS, keyword_init: true) + + FIELDS.each do |field| + define_method(field) do |val=Karafka::Routing::Default.new(nil)| + @deimos_config ||= Config.new( + bulk_import_id_column: :bulk_import_id, + replace_associations: true, + each_message: false, + bulk_import_id_generator: proc { SecureRandom.uuid }, + fatal_error: proc { false } + ) + unless val.is_a?(Karafka::Routing::Default) + @deimos_config.public_send("#{field}=", val) + end + @deimos_config[field] + end + end + end + end +end + +Deimos::ConsumerRoute.activate diff --git a/lib/deimos/ext/producer_route.rb b/lib/deimos/ext/producer_route.rb new file mode 100644 index 00000000..7d74da3d --- /dev/null +++ b/lib/deimos/ext/producer_route.rb @@ -0,0 +1,22 @@ +module Deimos + class ProducerRoute < Karafka::Routing::Features::Base + FIELDS = %i(producer_class payload_log disabled) + + Config = Struct.new(*FIELDS, keyword_init: true) + module Topic + FIELDS.each do |field| + define_method(field) do |val=Karafka::Routing::Default.new(nil)| + active(false) if field == :producer_class + @deimos_producer_config ||= Config.new + unless val.is_a?(Karafka::Routing::Default) + @deimos_producer_config.public_send("#{field}=", val) + _deimos_setup_transcoders if schema && namespace + end + @deimos_producer_config[field] + end + end + end + end +end + +Deimos::ProducerRoute.activate diff --git a/lib/deimos/ext/schema_route.rb b/lib/deimos/ext/schema_route.rb new file mode 100644 index 00000000..f48228d3 --- /dev/null +++ b/lib/deimos/ext/schema_route.rb @@ -0,0 +1,70 @@ +require "deimos/transcoder" +require "deimos/ext/producer_middleware" +require "deimos/schema_backends/plain" + +module Deimos + class SchemaRoute < Karafka::Routing::Features::Base + + module Topic + { + schema: nil, + namespace: nil, + key_config: {none: true}, + use_schema_classes: Deimos.config.schema.use_schema_classes + }.each do |field, default| + define_method(field) do |val=Karafka::Routing::Default.new(nil)| + @_deimos_config ||= {} + @_deimos_config[:schema] ||= {} + unless val.is_a?(Karafka::Routing::Default) + @_deimos_config[:schema][field] = val + _deimos_setup_transcoders if schema && namespace + end + @_deimos_config[:schema][field] || default + end + end + def _deimos_setup_transcoders + payload = Transcoder.new( + schema: schema, + namespace: namespace, + use_schema_classes: use_schema_classes, + topic: name + ) + + key = nil + + if key_config[:plain] + key = Transcoder.new( + schema: schema, + namespace: namespace, + use_schema_classes: use_schema_classes, + topic: name + ) + key.backend = Deimos::SchemaBackends::Plain.new(schema: nil, namespace: nil) + elsif !key_config[:none] + if key_config[:field] + key = Transcoder.new( + schema: schema, + namespace: namespace, + use_schema_classes: use_schema_classes, + key_field: key_config[:field].to_s, + topic: name + ) + elsif key_config[:schema] + key = Transcoder.new( + schema: key_config[:schema] || schema, + namespace: namespace, + use_schema_classes: use_schema_classes, + topic: self.name + ) + else + raise 'No key config given - if you are not encoding keys, please use `key_config plain: true`' + end + end + deserializers.payload = payload + deserializers.key = key if key + end + end + end +end + +Deimos::SchemaRoute.activate diff --git a/lib/deimos/producer.rb b/lib/deimos/producer.rb index 3e56b8f7..46a19ae0 100644 --- a/lib/deimos/producer.rb +++ b/lib/deimos/producer.rb @@ -50,6 +50,8 @@ def _disable_producer_classes(producer_classes) # @param producer_class [Class] # @return [Boolean] def producers_disabled?(producer_class=nil) + return true if Deimos.config.producers.disabled + Thread.current[:frk_disable_all_producers] || Thread.current[:frk_disabled_producers]&.include?(producer_class) end @@ -90,12 +92,6 @@ def partition_key(_payload) nil end - # @param size [Integer] Override the default batch size for publishing. - # @return [void] - def max_batch_size(size) - config[:max_batch_size] = size - end - # Publish the payload to the topic. # @param payload [Hash, SchemaClass::Record] with an optional payload_key hash key. # @param topic [String] if specifying the topic @@ -136,6 +132,14 @@ def publish_list(payloads, sync: nil, force_send: false, topic: self.topic, head end end + def karafka_config + Deimos.karafka_configs.find { |topic| topic.producer_class == self } + end + + def topic + karafka_config.name + end + # @param sync [Boolean] # @param force_send [Boolean] # @return [Class] @@ -161,18 +165,6 @@ def produce_batch(backend, batch) backend.publish(producer_class: self, messages: batch) end - # @return [Deimos::SchemaBackends::Base] - def encoder - @encoder ||= Deimos.schema_backend(schema: config[:schema], - namespace: config[:namespace]) - end - - # @return [Deimos::SchemaBackends::Base] - def key_encoder - @key_encoder ||= Deimos.schema_backend(schema: config[:key_schema], - namespace: config[:namespace]) - end - # Override this in active record producers to add # non-schema fields to check for updates # @return [Array] fields to check for updates diff --git a/lib/deimos/transcoder.rb b/lib/deimos/transcoder.rb new file mode 100644 index 00000000..50c6cab3 --- /dev/null +++ b/lib/deimos/transcoder.rb @@ -0,0 +1,88 @@ +module Deimos + class Transcoder + + attr_accessor :key_field, :backend + + # @param schema [String] + # @param namespace [String] + # @param key_field [Symbol] + # @param use_schema_classes [Boolean] + # @param topic [String] + def initialize(schema:, namespace:, key_field: nil, use_schema_classes: nil, topic: nil) + @schema = schema + @namespace = namespace + self.key_field = key_field + @use_schema_classes = use_schema_classes + @topic = topic + end + + # @return [Class < Deimos::SchemaBackends::Base] + def backend + @backend ||= Deimos.schema_backend(schema: @schema, namespace: @namespace) + end + + # for use in test helpers + # @param key [Object] + # @return [String] + def encode_key(key) + if self.key_field + self.backend.encode_key(self.key_field, key, topic: @topic) + else + self.backend.encode(key, topic: @topic) + end + end + + # @param key [String] + # @return [Object] + def decode_key(key) + return nil if key.nil? || self.key_field.nil? + + decoded_key = self.backend.decode_key(key, self.key_field) + return decoded_key unless @use_schema_classes + + Utils::SchemaClass.instance(decoded_key, + "#{@schema}_key", + @namespace) + end + + # @param payload [String] + # @return [Object] + def decode_message(payload) + return nil if payload.nil? + + decoded_payload = self.backend.decode(payload) + return decoded_payload unless @use_schema_classes + + Utils::SchemaClass.instance(decoded_payload, + @schema, + @namespace) + end + + # @param payload [Object] + # @return [String] + def encode(payload) + return nil if payload.nil? + + self.backend.encode(payload) + end + + # @param message [Karafka::Messages::Message] + # @return [Object] + def call(message) + if self.key_field + decode_key(message.raw_key) + elsif message.respond_to?(:raw_payload) + decode_message(message.raw_payload) + else + decode_message(message.raw_key) + end + end + + # @param payload [String] + # @return [Object] + def decode_message_hash(payload) + self.key_field ? decode_key(payload) : decode_message(payload) + end + + end +end diff --git a/lib/deimos/utils/db_poller/time_based.rb b/lib/deimos/utils/db_poller/time_based.rb index 6b116498..2e92b407 100644 --- a/lib/deimos/utils/db_poller/time_based.rb +++ b/lib/deimos/utils/db_poller/time_based.rb @@ -54,7 +54,7 @@ def process_updates # @param time_to [ActiveSupport::TimeWithZone] # @return [ActiveRecord::Relation] def fetch_results(time_from, time_to) - id = self.producer_classes.first.config[:record_class].primary_key + id = self.producer_classes.first.record_class.primary_key quoted_timestamp = ActiveRecord::Base.connection.quote_column_name(@config.timestamp_column) quoted_id = ActiveRecord::Base.connection.quote_column_name(id) @resource_class.poll_query(time_from: time_from, From 0384a358a7224eebc102704d97125164abd93bec Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 14:04:03 -0400 Subject: [PATCH 05/18] Use MissingImplementationError instead of NotImplementedError --- lib/deimos/backends/base.rb | 2 +- lib/deimos/consume/batch_consumption.rb | 2 +- lib/deimos/consume/message_consumption.rb | 2 +- lib/deimos/kafka_source.rb | 2 +- lib/deimos/metrics/provider.rb | 8 ++++---- lib/deimos/schema_backends/base.rb | 22 +++++++++++----------- lib/deimos/schema_class/base.rb | 4 ++-- lib/deimos/schema_class/enum.rb | 2 +- lib/deimos/schema_class/record.rb | 4 ++-- lib/deimos/tracing/provider.rb | 12 ++++++------ 10 files changed, 30 insertions(+), 30 deletions(-) diff --git a/lib/deimos/backends/base.rb b/lib/deimos/backends/base.rb index aa88ae54..cd68d9e6 100644 --- a/lib/deimos/backends/base.rb +++ b/lib/deimos/backends/base.rb @@ -18,7 +18,7 @@ def publish(producer_class:, messages:) # @param messages [Array] # @return [void] def execute(producer_class:, messages:) - raise NotImplementedError + raise MissingImplementationError end private diff --git a/lib/deimos/consume/batch_consumption.rb b/lib/deimos/consume/batch_consumption.rb index dc5f6261..5f0c5fd8 100644 --- a/lib/deimos/consume/batch_consumption.rb +++ b/lib/deimos/consume/batch_consumption.rb @@ -39,7 +39,7 @@ def around_consume_batch(batch, metadata) # @param _metadata [Hash] # @return [void] def consume_batch(_payloads, _metadata) - raise NotImplementedError + raise MissingImplementationError end protected diff --git a/lib/deimos/consume/message_consumption.rb b/lib/deimos/consume/message_consumption.rb index 5d6816ec..e853f2ba 100644 --- a/lib/deimos/consume/message_consumption.rb +++ b/lib/deimos/consume/message_consumption.rb @@ -31,7 +31,7 @@ def around_consume(payload, metadata) # @param _metadata [Hash] # @return [void] def consume(_payload, _metadata) - raise NotImplementedError + raise MissingImplementationError end private diff --git a/lib/deimos/kafka_source.rb b/lib/deimos/kafka_source.rb index be2360b3..f1e92561 100644 --- a/lib/deimos/kafka_source.rb +++ b/lib/deimos/kafka_source.rb @@ -76,7 +76,7 @@ def kafka_producers return [self.kafka_producer] end - raise NotImplementedError + raise MissingImplementationError end # This is an internal method, part of the activerecord_import gem. It's diff --git a/lib/deimos/metrics/provider.rb b/lib/deimos/metrics/provider.rb index 4f527a1e..ae99a149 100644 --- a/lib/deimos/metrics/provider.rb +++ b/lib/deimos/metrics/provider.rb @@ -9,7 +9,7 @@ class Provider # @param options [Hash] Any additional options, e.g. :tags # @return [void] def increment(metric_name, options={}) - raise NotImplementedError + raise MissingImplementationError end # Send an counter increment metric @@ -18,7 +18,7 @@ def increment(metric_name, options={}) # @param options [Hash] Any additional options, e.g. :tags # @return [void] def gauge(metric_name, count, options={}) - raise NotImplementedError + raise MissingImplementationError end # Send an counter increment metric @@ -27,7 +27,7 @@ def gauge(metric_name, count, options={}) # @param options [Hash] Any additional options, e.g. :tags # @return [void] def histogram(metric_name, count, options={}) - raise NotImplementedError + raise MissingImplementationError end # Time a yielded block, and send a timer metric @@ -35,7 +35,7 @@ def histogram(metric_name, count, options={}) # @param options [Hash] Any additional options, e.g. :tags # @return [void] def time(metric_name, options={}) - raise NotImplementedError + raise MissingImplementationError end end end diff --git a/lib/deimos/schema_backends/base.rb b/lib/deimos/schema_backends/base.rb index 8d9ea848..54a2fad7 100644 --- a/lib/deimos/schema_backends/base.rb +++ b/lib/deimos/schema_backends/base.rb @@ -90,7 +90,7 @@ def self.mock_backend # The content type to use when encoding / decoding requests over HTTP via ActionController. # @return [String] def self.content_type - raise NotImplementedError + raise MissingImplementationError end # Converts your schema to String form for generated YARD docs. @@ -98,7 +98,7 @@ def self.content_type # @param schema [Object] # @return [String] A string representation of the Type def self.field_type(schema) - raise NotImplementedError + raise MissingImplementationError end # Encode a payload. To be defined by subclass. @@ -107,7 +107,7 @@ def self.field_type(schema) # @param topic [String] # @return [String] def encode_payload(payload, schema:, topic: nil) - raise NotImplementedError + raise MissingImplementationError end # Decode a payload. To be defined by subclass. @@ -115,7 +115,7 @@ def encode_payload(payload, schema:, topic: nil) # @param schema [String,Symbol] # @return [Hash] def decode_payload(payload, schema:) - raise NotImplementedError + raise MissingImplementationError end # Validate that a payload matches the schema. To be defined by subclass. @@ -123,13 +123,13 @@ def decode_payload(payload, schema:) # @param schema [String,Symbol] # @return [void] def validate(payload, schema:) - raise NotImplementedError + raise MissingImplementationError end # List of field names belonging to the schema. To be defined by subclass. # @return [Array] def schema_fields - raise NotImplementedError + raise MissingImplementationError end # Given a value and a field definition (as defined by whatever the @@ -139,7 +139,7 @@ def schema_fields # @param value [Object] # @return [Object] def coerce_field(field, value) - raise NotImplementedError + raise MissingImplementationError end # Given a field definition, return the SQL type that might be used in @@ -150,7 +150,7 @@ def coerce_field(field, value) # @param field [SchemaField] # @return [Symbol] def sql_type(field) - raise NotImplementedError + raise MissingImplementationError end # Encode a message key. To be defined by subclass. @@ -159,7 +159,7 @@ def sql_type(field) # @param topic [String] # @return [String] def encode_key(key, key_id, topic: nil) - raise NotImplementedError + raise MissingImplementationError end # Decode a message key. To be defined by subclass. @@ -167,13 +167,13 @@ def encode_key(key, key_id, topic: nil) # @param key_id [String,Symbol] the field in the message to decode. # @return [String] def decode_key(payload, key_id) - raise NotImplementedError + raise MissingImplementationError end # Forcefully loads the schema into memory. # @return [Object] The schema that is of use. def load_schema - raise NotImplementedError + raise MissingImplementationError end end end diff --git a/lib/deimos/schema_class/base.rb b/lib/deimos/schema_class/base.rb index 6cb1c697..a565623f 100644 --- a/lib/deimos/schema_class/base.rb +++ b/lib/deimos/schema_class/base.rb @@ -15,7 +15,7 @@ def initialize(*_args) # @param _opts [Hash] # @return [Hash] a hash representation of the payload def as_json(_opts={}) - raise NotImplementedError + raise MissingImplementationError end # @param key [String,Symbol] @@ -46,7 +46,7 @@ def inspect # @param value [Object] # @return [SchemaClass::Base] def self.initialize_from_value(value) - raise NotImplementedError + raise MissingImplementationError end protected diff --git a/lib/deimos/schema_class/enum.rb b/lib/deimos/schema_class/enum.rb index 5e0d1b20..9d51b625 100644 --- a/lib/deimos/schema_class/enum.rb +++ b/lib/deimos/schema_class/enum.rb @@ -30,7 +30,7 @@ def initialize(value) # Returns all the valid symbols for this enum. # @return [Array] def symbols - raise NotImplementedError + raise MissingImplementationError end # @return [String] diff --git a/lib/deimos/schema_class/record.rb b/lib/deimos/schema_class/record.rb index 1189f791..bc3c9a4f 100644 --- a/lib/deimos/schema_class/record.rb +++ b/lib/deimos/schema_class/record.rb @@ -46,13 +46,13 @@ def with_indifferent_access # Returns the schema name of the inheriting class. # @return [String] def schema - raise NotImplementedError + raise MissingImplementationError end # Returns the namespace for the schema of the inheriting class. # @return [String] def namespace - raise NotImplementedError + raise MissingImplementationError end # Returns the full schema name of the inheriting class. diff --git a/lib/deimos/tracing/provider.rb b/lib/deimos/tracing/provider.rb index f1791a47..911d4ed8 100644 --- a/lib/deimos/tracing/provider.rb +++ b/lib/deimos/tracing/provider.rb @@ -9,14 +9,14 @@ class Provider # @param options [Hash] Options for the span # @return [Object] The span object def start(span_name, options={}) - raise NotImplementedError + raise MissingImplementationError end # Finishes the trace on the span object. # @param span [Object] The span to finish trace on # @return [void] def finish(span) - raise NotImplementedError + raise MissingImplementationError end # Set an error on the span. @@ -24,13 +24,13 @@ def finish(span) # @param exception [Exception] The exception that occurred # @return [void] def set_error(span, exception) - raise NotImplementedError + raise MissingImplementationError end # Get the currently activated span. # @return [Object] def active_span - raise NotImplementedError + raise MissingImplementationError end # Set a tag to a span. Use the currently active span if not given. @@ -39,13 +39,13 @@ def active_span # @param span [Object] # @return [void] def set_tag(tag, value, span=nil) - raise NotImplementedError + raise MissingImplementationError end # Get a tag from a span with the specified tag. # @param tag [String] def get_tag(tag) - raise NotImplementedError + raise MissingImplementationError end end From 7e84e8abc38d0431e038b0b8057537d9d6687efb Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 14:12:07 -0400 Subject: [PATCH 06/18] Move instrumentation to Karafka --- lib/deimos.rb | 16 +- .../batch_consumption.rb | 18 +- lib/deimos/backends/kafka.rb | 7 - lib/deimos/backends/kafka_async.rb | 7 - lib/deimos/instrumentation.rb | 95 ------- lib/deimos/utils/db_producer.rb | 260 ------------------ 6 files changed, 24 insertions(+), 379 deletions(-) delete mode 100644 lib/deimos/instrumentation.rb delete mode 100644 lib/deimos/utils/db_producer.rb diff --git a/lib/deimos.rb b/lib/deimos.rb index 5880f4e5..e3e48f7a 100644 --- a/lib/deimos.rb +++ b/lib/deimos.rb @@ -10,7 +10,6 @@ require 'deimos/active_record_producer' require 'deimos/active_record_consumer' require 'deimos/consumer' -require 'deimos/instrumentation' require 'deimos/backends/base' require 'deimos/backends/kafka' @@ -42,7 +41,16 @@ # Parent module. module Deimos + EVENT_TYPES = %w( + deimos.ar_consumer.consume_batch + deimos.encode_message + deimos.batch_consumption.invalid_records + deimos.batch_consumption.valid_records + deimos.outbox.produce + ) + class << self + # @return [Class] def schema_backend_class backend = Deimos.config.schema.backend.to_s @@ -115,6 +123,12 @@ def start_db_backend!(thread_count: 1) end end end + + def setup_karafka + EVENT_TYPES.each { |type| Karafka.monitor.notifications_bus.register_event(type) } + + end + # @return [Array] - def retrieve_topics - KafkaMessage.select('distinct topic').map(&:topic).uniq - end - - # @param topic [String] - # @return [String, nil] the topic that was locked, or nil if none were. - def process_topic(topic) - # If the topic is already locked, another producer is currently - # working on it. Move on to the next one. - unless KafkaTopicInfo.lock(topic, @id) - @logger.debug("Could not lock topic #{topic} - continuing") - return - end - @current_topic = topic - - loop { break unless process_topic_batch } - - KafkaTopicInfo.clear_lock(@current_topic, @id) - rescue StandardError => e - @logger.error("Error processing messages for topic #{@current_topic}: #{e.class.name}: #{e.message} #{e.backtrace.join("\n")}") - KafkaTopicInfo.register_error(@current_topic, @id) - shutdown_producer - end - - # Process a single batch in a topic. - # @return [void] - def process_topic_batch - messages = retrieve_messages - return false if messages.empty? - - batch_size = messages.size - compacted_messages = compact_messages(messages) - log_messages(compacted_messages) - Deimos.instrument('db_producer.produce', topic: @current_topic, messages: compacted_messages) do - begin - produce_messages(compacted_messages.map(&:phobos_message)) - rescue Kafka::BufferOverflow, Kafka::MessageSizeTooLarge, Kafka::RecordListTooLarge => e - delete_messages(messages) - @logger.error('Message batch too large, deleting...') - begin - @logger.error(Deimos::KafkaMessage.decoded(messages)) - rescue StandardError => logging_exception # rubocop:disable Naming/RescuedExceptionsVariableName - @logger.error("Large message details logging failure: #{logging_exception.message}") - ensure - raise e - end - end - end - delete_messages(messages) - Deimos.config.metrics&.increment( - 'db_producer.process', - tags: %W(topic:#{@current_topic}), - by: messages.size - ) - return false if batch_size < BATCH_SIZE - - KafkaTopicInfo.heartbeat(@current_topic, @id) # keep alive - send_pending_metrics - true - end - - # @param messages [Array] - # @return [void] - def delete_messages(messages) - attempts = 1 - begin - messages.in_groups_of(DELETE_BATCH_SIZE, false).each do |batch| - Deimos::KafkaMessage.where(topic: batch.first.topic, - id: batch.map(&:id)). - delete_all - end - rescue StandardError => e - if (e.message =~ /Lock wait/i || e.message =~ /Lost connection/i) && - attempts <= MAX_DELETE_ATTEMPTS - attempts += 1 - ActiveRecord::Base.connection.verify! - sleep(1) - retry - end - raise - end - end - - # @return [Array] - def retrieve_messages - KafkaMessage.where(topic: @current_topic).order(:id).limit(BATCH_SIZE) - end - - # @param messages [Array] - # @return [void] - def log_messages(messages) - return if config.log_topics != :all && !config.log_topics.include?(@current_topic) - - @logger.debug do - decoded_messages = Deimos::KafkaMessage.decoded(messages) - "DB producer: Topic #{@current_topic} Producing messages: #{decoded_messages}}" - end - end - - # Send metrics related to pending messages. - # @return [void] - def send_pending_metrics - metrics = Deimos.config.metrics - return unless metrics - - topics = KafkaTopicInfo.select(%w(topic last_processed_at)) - messages = Deimos::KafkaMessage. - select('count(*) as num_messages, min(created_at) as earliest, topic'). - group(:topic). - index_by(&:topic) - topics.each do |record| - message_record = messages[record.topic] - # We want to record the last time we saw any activity, meaning either - # the oldest message, or the last time we processed, whichever comes - # last. - if message_record - record_earliest = message_record.earliest - # SQLite gives a string here - if record_earliest.is_a?(String) - record_earliest = Time.zone.parse(record_earliest) - end - - earliest = [record.last_processed_at, record_earliest].max - time_diff = Time.zone.now - earliest - metrics.gauge('pending_db_messages_max_wait', time_diff, - tags: ["topic:#{record.topic}"]) - else - # no messages waiting - metrics.gauge('pending_db_messages_max_wait', 0, - tags: ["topic:#{record.topic}"]) - end - metrics.gauge('pending_db_messages_count', message_record&.num_messages || 0, - tags: ["topic:#{record.topic}"]) - end - end - - # Shut down the sync producer if we have to. Phobos will automatically - # create a new one. We should call this if the producer can be in a bad - # state and e.g. we need to clear the buffer. - # @return [void] - def shutdown_producer - if self.class.producer.respond_to?(:sync_producer_shutdown) # Phobos 1.8.3 - self.class.producer.sync_producer_shutdown - end - end - - # Produce messages in batches, reducing the size 1/10 if the batch is too - # large. Does not retry batches of messages that have already been sent. - # @param batch [Array] - # @return [void] - def produce_messages(batch) - batch_size = batch.size - current_index = 0 - begin - batch[current_index..-1].in_groups_of(batch_size, false).each do |group| - @logger.debug("Publishing #{group.size} messages to #{@current_topic}") - producer.publish_list(group) - Deimos.config.metrics&.increment( - 'publish', - tags: %W(status:success topic:#{@current_topic}), - by: group.size - ) - current_index += group.size - @logger.info("Sent #{group.size} messages to #{@current_topic}") - end - rescue Kafka::BufferOverflow, Kafka::MessageSizeTooLarge, - Kafka::RecordListTooLarge => e - if batch_size == 1 - shutdown_producer - raise - end - - @logger.error("Got error #{e.class.name} when publishing #{batch.size} in groups of #{batch_size}, retrying...") - batch_size = if batch_size < 10 - 1 - else - (batch_size / 10).to_i - end - shutdown_producer - retry - end - end - - # @param batch [Array] - # @return [Array] - def compact_messages(batch) - return batch if batch.first&.key.blank? - - topic = batch.first.topic - return batch if config.compact_topics != :all && - !config.compact_topics.include?(topic) - - batch.reverse.uniq(&:key).reverse! - end - end - end -end From 250df19dad4136b238c258fc9fa4350878941737 Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 14:24:18 -0400 Subject: [PATCH 07/18] Additional schema-related changes --- lib/deimos.rb | 15 +++++- .../batch_consumption.rb | 3 ++ lib/deimos/active_record_producer.rb | 4 +- lib/deimos/config/configuration.rb | 34 ++++++++++---- lib/deimos/consumer.rb | 44 ----------------- lib/deimos/producer.rb | 43 ----------------- lib/deimos/schema_backends/avro_base.rb | 42 ++++++++--------- .../schema_backends/avro_schema_registry.rb | 1 - lib/deimos/schema_backends/avro_validation.rb | 4 +- lib/deimos/schema_backends/base.rb | 9 +++- lib/deimos/schema_backends/mock.rb | 7 ++- lib/deimos/schema_backends/plain.rb | 47 +++++++++++++++++++ lib/deimos/utils/schema_class.rb | 7 --- .../deimos/schema_class_generator.rb | 1 - 14 files changed, 127 insertions(+), 134 deletions(-) create mode 100644 lib/deimos/schema_backends/plain.rb diff --git a/lib/deimos.rb b/lib/deimos.rb index e3e48f7a..97189fb1 100644 --- a/lib/deimos.rb +++ b/lib/deimos.rb @@ -64,7 +64,7 @@ def schema_backend_class # @param namespace [String] # @return [Deimos::SchemaBackends::Base] def schema_backend(schema:, namespace:) - if Utils::SchemaClass.use?(config.to_h) + if config.schema.use_schema_classes # Initialize an instance of the provided schema # in the event the schema class is an override, the inherited # schema and namespace will be applied @@ -98,6 +98,19 @@ def decode(schema:, namespace:, payload:) self.schema_backend(schema: schema, namespace: namespace).decode(payload) end + # @param message [Hash] a Karafka message with keys :payload, :key and :topic + def decode_message(message) + topic = message[:topic] + if Deimos.config.producers.topic_prefix + topic = topic.sub(Deimos.config.producers.topic_prefix, '') + end + config = karafka_config_for(topic: topic) + message[:payload] = config.deserializers[:payload].decode_message_hash(message[:payload]) + if message[:key] && config.deserializers[:key].respond_to?(:decode_message_hash) + message[:key] = config.deserializers[:key].decode_message_hash(message[:key]) + end + end + # Start the DB producers to send Kafka messages. # @param thread_count [Integer] the number of threads to start. # @return [void] diff --git a/lib/deimos/active_record_consume/batch_consumption.rb b/lib/deimos/active_record_consume/batch_consumption.rb index ceac8601..7907f5a1 100644 --- a/lib/deimos/active_record_consume/batch_consumption.rb +++ b/lib/deimos/active_record_consume/batch_consumption.rb @@ -4,6 +4,7 @@ require 'deimos/active_record_consume/batch_record' require 'deimos/active_record_consume/batch_record_list' require 'deimos/active_record_consume/mass_updater' +require 'deimos/consume/batch_consumption' require 'deimos/utils/deadlock_retry' require 'deimos/message' @@ -14,6 +15,8 @@ module ActiveRecordConsume # Methods for consuming batches of messages and saving them to the database # in bulk ActiveRecord operations. module BatchConsumption + include Deimos::Consume::BatchConsumption + # Handle a batch of Kafka messages. Batches are split into "slices", # which are groups of independent messages that can be processed together # in a single database operation. diff --git a/lib/deimos/active_record_producer.rb b/lib/deimos/active_record_producer.rb index b35fa569..63004824 100644 --- a/lib/deimos/active_record_producer.rb +++ b/lib/deimos/active_record_producer.rb @@ -73,9 +73,9 @@ def generate_payload(attributes, _record) payload.delete_if do |k, _| k.to_sym != :payload_key && !fields.map(&:name).include?(k) end - return payload unless Utils::SchemaClass.use?(config.to_h) + return payload unless self.config.use_schema_classes - Utils::SchemaClass.instance(payload, config[:schema], config[:namespace]) + Utils::SchemaClass.instance(payload, encoder.schema, encoder.namespace) end # Query to use when polling the database with the DbPoller. Add diff --git a/lib/deimos/config/configuration.rb b/lib/deimos/config/configuration.rb index efb778be..1d10cb42 100644 --- a/lib/deimos/config/configuration.rb +++ b/lib/deimos/config/configuration.rb @@ -16,19 +16,14 @@ module Deimos # rubocop:disable Metrics/ModuleLength end validate_consumers validate_db_backend if self.config.producers.backend == :db + generate_key_schemas end - # Loads generated classes - # @return [void] - def self.load_generated_schema_classes - if Deimos.config.schema.generated_class_path.nil? - raise 'Cannot use schema classes without schema.generated_class_path. Please provide a directory.' - end + class << self - Dir["./#{Deimos.config.schema.generated_class_path}/**/*.rb"].sort.each { |f| require f } - rescue LoadError - raise 'Cannot load schema classes. Please regenerate classes with rake deimos:generate_schema_models.' - end + def generate_key_schemas + Deimos.karafka_configs.each do |config| + transcoder = config.deserializers[:key] # Ensure everything is set up correctly for the DB backend. # @!visibility private @@ -57,11 +52,30 @@ def self.validate_consumers if delivery == 'inline_batch' if handler_class.instance_method(:consume_batch).owner == Deimos::Consume::BatchConsumption raise "BatchConsumer #{listener.handler} does not implement `consume_batch`" + if transcoder.respond_to?(:key_field) && transcoder.key_field + transcoder.backend = Deimos.schema_backend(schema: config.schema, + namespace: config.namespace) + transcoder.backend.generate_key_schema(transcoder.key_field) end elsif handler_class.instance_method(:consume).owner == Deimos::Consume::MessageConsumption raise "Non-batch Consumer #{listener.handler} does not implement `consume`" end end + + # Loads generated classes + # @return [void] + def load_generated_schema_classes + if Deimos.config.schema.generated_class_path.nil? + raise 'Cannot use schema classes without schema.generated_class_path. Please provide a directory.' + end + + Dir["./#{Deimos.config.schema.generated_class_path}/**/*.rb"].sort.each { |f| require f } + rescue LoadError + raise 'Cannot load schema classes. Please regenerate classes with rake deimos:generate_schema_models.' + end + + end + end end # rubocop:enable Metrics/PerceivedComplexity, Metrics/AbcSize diff --git a/lib/deimos/consumer.rb b/lib/deimos/consumer.rb index f9932bc3..99661c66 100644 --- a/lib/deimos/consumer.rb +++ b/lib/deimos/consumer.rb @@ -16,51 +16,7 @@ class Consumer include Consume::BatchConsumption include SharedConfig - class << self - # @return [Deimos::SchemaBackends::Base] - def decoder - @decoder ||= Deimos.schema_backend(schema: config[:schema], - namespace: config[:namespace]) end - - # @return [Deimos::SchemaBackends::Base] - def key_decoder - @key_decoder ||= Deimos.schema_backend(schema: config[:key_schema], - namespace: config[:namespace]) - end - end - - # Helper method to decode an encoded key. - # @param key [String] - # @return [Object] the decoded key. - def decode_key(key) - return nil if key.nil? - - config = self.class.config - unless config[:key_configured] - raise 'No key config given - if you are not decoding keys, please use '\ - '`key_config plain: true`' - end - - if config[:key_field] - self.class.decoder.decode_key(key, config[:key_field]) - elsif config[:key_schema] - self.class.key_decoder.decode(key, schema: config[:key_schema]) - else # no encoding - key - end - end - - # Helper method to decode an encoded message. - # @param payload [Object] - # @return [Object] the decoded message. - def decode_message(payload) - decoded_payload = payload.nil? ? nil : self.class.decoder.decode(payload) - return decoded_payload unless Utils::SchemaClass.use?(self.class.config.to_h) - - Utils::SchemaClass.instance(decoded_payload, - self.class.config[:schema], - self.class.config[:namespace]) end private diff --git a/lib/deimos/producer.rb b/lib/deimos/producer.rb index 46a19ae0..8d477e95 100644 --- a/lib/deimos/producer.rb +++ b/lib/deimos/producer.rb @@ -63,27 +63,6 @@ class Producer class << self - # @return [Hash] - def config - @config ||= { - encode_key: true, - namespace: Deimos.config.producers.schema_namespace, - max_batch_size: Deimos.config.producers.max_batch_size - } - end - - # Set the topic. - # @param topic [String] - # @return [String] the current topic if no argument given. - def topic(topic=nil) - if topic - config[:topic] = topic - return - end - # accessor - "#{Deimos.config.producers.topic_prefix}#{config[:topic]}" - end - # Override the default partition key (which is the payload key). # @param _payload [Hash] the payload being passed into the produce method. # Will include `payload_key` if it is part of the original payload. @@ -197,28 +176,6 @@ def _process_message(message, topic) end end - # @param key [Object] - # @return [String|Object] - def _encode_key(key) - if key.nil? - return nil if config[:no_keys] # no key is fine, otherwise it's a problem - - raise 'No key given but a key is required! Use `key_config none: true` to avoid using keys.' - end - if config[:encode_key] && config[:key_field].nil? && - config[:key_schema].nil? - raise 'No key config given - if you are not encoding keys, please use `key_config plain: true`' - end - - if config[:key_field] - encoder.encode_key(config[:key_field], key, topic: "#{Deimos.config.producers.topic_prefix}#{config[:topic]}-key") - elsif config[:key_schema] - key_encoder.encode(key, topic: "#{Deimos.config.producers.topic_prefix}#{config[:topic]}-key") - else - key - end - end - # @param payload [Hash] # @return [String] def _retrieve_key(payload) diff --git a/lib/deimos/schema_backends/avro_base.rb b/lib/deimos/schema_backends/avro_base.rb index 7169429d..0c9704c6 100644 --- a/lib/deimos/schema_backends/avro_base.rb +++ b/lib/deimos/schema_backends/avro_base.rb @@ -20,15 +20,19 @@ def initialize(schema:, namespace:) # @override def encode_key(key_id, key, topic: nil) - @key_schema ||= _generate_key_schema(key_id) + begin + @key_schema ||= @schema_store.find("#{@schema}_key") + rescue AvroTurf::SchemaNotFoundError + @key_schema = generate_key_schema(key_id) + end field_name = _field_name_from_schema(@key_schema) - payload = { field_name => key } + payload = key.is_a?(Hash) ? key : { field_name => key } encode(payload, schema: @key_schema['name'], topic: topic) end # @override def decode_key(payload, key_id) - @key_schema ||= _generate_key_schema(key_id) + @key_schema ||= generate_key_schema(key_id) field_name = _field_name_from_schema(@key_schema) decode(payload, schema: @key_schema['name'])[field_name] end @@ -85,7 +89,7 @@ def load_schema # @override def self.mock_backend - :avro_validation + :avro_local end # @override @@ -146,21 +150,8 @@ def self.schema_base_class(schema) end end - private - - # @param schema [String] - # @return [Avro::Schema] - def avro_schema(schema=nil) - schema ||= @schema - @schema_store.find(schema, @namespace) - end - - # Generate a key schema from the given value schema and key ID. This - # is used when encoding or decoding keys from an existing value schema. - # @param key_id [Symbol] - # @return [Hash] - def _generate_key_schema(key_id) - key_field = avro_schema.fields.find { |f| f.name == key_id.to_s } + def generate_key_schema(field_name) + key_field = avro_schema.fields.find { |f| f.name == field_name.to_s } name = _key_schema_name(@schema) key_schema = { 'type' => 'record', @@ -169,13 +160,22 @@ def _generate_key_schema(key_id) 'doc' => "Key for #{@namespace}.#{@schema} - autogenerated by Deimos", 'fields' => [ { - 'name' => key_id, + 'name' => field_name, 'type' => key_field.type.type_sym.to_s } ] } @schema_store.add_schema(key_schema) - key_schema + @key_schema = key_schema + end + + private + + # @param schema [String] + # @return [Avro::Schema] + def avro_schema(schema=nil) + schema ||= @schema + @schema_store.find(schema, @namespace) end # @param value_schema [Hash] diff --git a/lib/deimos/schema_backends/avro_schema_registry.rb b/lib/deimos/schema_backends/avro_schema_registry.rb index e67a6952..374e628e 100644 --- a/lib/deimos/schema_backends/avro_schema_registry.rb +++ b/lib/deimos/schema_backends/avro_schema_registry.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true require_relative 'avro_base' -require_relative 'avro_validation' require 'avro_turf/messaging' module Deimos diff --git a/lib/deimos/schema_backends/avro_validation.rb b/lib/deimos/schema_backends/avro_validation.rb index 2842d23f..3f9f3dcb 100644 --- a/lib/deimos/schema_backends/avro_validation.rb +++ b/lib/deimos/schema_backends/avro_validation.rb @@ -9,12 +9,12 @@ module SchemaBackends class AvroValidation < AvroBase # @override def decode_payload(payload, schema: nil) - payload.with_indifferent_access + JSON.parse(payload) end # @override def encode_payload(payload, schema: nil, topic: nil) - payload.with_indifferent_access + payload.to_h.with_indifferent_access.to_json end end end diff --git a/lib/deimos/schema_backends/base.rb b/lib/deimos/schema_backends/base.rb index 54a2fad7..aa62c656 100644 --- a/lib/deimos/schema_backends/base.rb +++ b/lib/deimos/schema_backends/base.rb @@ -79,7 +79,7 @@ def coerce(payload) # Indicate a class which should act as a mocked version of this backend. # This class should perform all validations but not actually do any # encoding. - # Note that the "mock" version (e.g. avro_validation) should return + # Note that the "mock" version should return # its own symbol when this is called, since it may be called multiple # times depending on the order of RSpec helpers. # @return [Symbol] @@ -153,6 +153,13 @@ def sql_type(field) raise MissingImplementationError end + # Generate a key schema from the given value schema and key ID. This + # is used when encoding or decoding keys from an existing value schema. + # @param field_name [Symbol] + def generate_key_schema(field_name) + raise MissingImplementationError + end + # Encode a message key. To be defined by subclass. # @param key [String,Hash] the value to use as the key. # @param key_id [String,Symbol] the field name of the key. diff --git a/lib/deimos/schema_backends/mock.rb b/lib/deimos/schema_backends/mock.rb index 0b5003d3..a666bda9 100644 --- a/lib/deimos/schema_backends/mock.rb +++ b/lib/deimos/schema_backends/mock.rb @@ -4,6 +4,11 @@ module Deimos module SchemaBackends # Mock implementation of a schema backend that does no encoding or validation. class Mock < Base + + # @override + def generate_key_schema(field_name) + end + # @override def decode_payload(payload, schema:) payload.is_a?(String) ? 'payload-decoded' : payload.map { |k, v| [k, "decoded-#{v}"] } @@ -11,7 +16,7 @@ def decode_payload(payload, schema:) # @override def encode_payload(payload, schema:, topic: nil) - payload.is_a?(String) ? 'payload-encoded' : payload.map { |k, v| [k, "encoded-#{v}"] } + payload.is_a?(String) ? 'payload-encoded' : payload.map { |k, v| [k, "encoded-#{v}"] }.to_json end # @override diff --git a/lib/deimos/schema_backends/plain.rb b/lib/deimos/schema_backends/plain.rb new file mode 100644 index 00000000..df7fafd9 --- /dev/null +++ b/lib/deimos/schema_backends/plain.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +module Deimos + module SchemaBackends + # Schema backend that passes through as a basic string. + class Plain < Base + + # @override + def generate_key_schema(field_name) + end + + # @override + def decode_payload(payload, schema:) + payload + end + + # @override + def encode_payload(payload, schema:, topic: nil) + payload.to_s + end + + # @override + def validate(payload, schema:) + end + + # @override + def schema_fields + [] + end + + # @override + def coerce_field(_field, value) + value + end + + # @override + def encode_key(key_id, key, topic: nil) + key + end + + # @override + def decode_key(payload, key_id) + payload[key_id] + end + end + end +end diff --git a/lib/deimos/utils/schema_class.rb b/lib/deimos/utils/schema_class.rb index 45093a46..0649a39b 100644 --- a/lib/deimos/utils/schema_class.rb +++ b/lib/deimos/utils/schema_class.rb @@ -55,13 +55,6 @@ def klass(schema, namespace) constants.join('::').safe_constantize end - # @param config [Hash] Producer or Consumer config - # @return [Boolean] - def use?(config) - use_schema_classes = config[:use_schema_classes] - use_schema_classes.present? ? use_schema_classes : Deimos.config.schema.use_schema_classes - end - end end end diff --git a/lib/generators/deimos/schema_class_generator.rb b/lib/generators/deimos/schema_class_generator.rb index 93b58567..30a18b90 100644 --- a/lib/generators/deimos/schema_class_generator.rb +++ b/lib/generators/deimos/schema_class_generator.rb @@ -3,7 +3,6 @@ require 'rails/generators' require 'deimos' require 'deimos/schema_backends/avro_base' -require 'deimos/config/configuration' # Generates new schema classes. module Deimos From b339b58e653900ce27838ab75f588053e2d533e2 Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 14:30:49 -0400 Subject: [PATCH 08/18] Metrics changes --- lib/deimos/backends/kafka.rb | 10 +++--- lib/deimos/backends/kafka_async.rb | 11 +++--- lib/deimos/consume/batch_consumption.rb | 41 +++++------------------ lib/deimos/consume/message_consumption.rb | 28 +++------------- lib/deimos/consumer.rb | 17 ---------- lib/deimos/metrics/datadog.rb | 41 ++++++++++++++++++++++- lib/deimos/producer.rb | 7 ---- 7 files changed, 63 insertions(+), 92 deletions(-) diff --git a/lib/deimos/backends/kafka.rb b/lib/deimos/backends/kafka.rb index 510e8435..84347ad4 100644 --- a/lib/deimos/backends/kafka.rb +++ b/lib/deimos/backends/kafka.rb @@ -7,11 +7,11 @@ class Kafka < Base # :nodoc: def self.execute(producer_class:, messages:) producer.publish_list(messages.map(&:encoded_hash)) - Deimos.config.metrics&.increment( - 'publish', - tags: %W(status:success topic:#{producer_class.topic}), - by: messages.size - ) + Deimos.config.metrics&.increment( + 'publish', + tags: %W(status:success topic:#{messages.first[:topic]}), + by: messages.size + ) end end end diff --git a/lib/deimos/backends/kafka_async.rb b/lib/deimos/backends/kafka_async.rb index 76b4710e..06ad1b2e 100644 --- a/lib/deimos/backends/kafka_async.rb +++ b/lib/deimos/backends/kafka_async.rb @@ -7,11 +7,12 @@ class KafkaAsync < Base # :nodoc: def self.execute(producer_class:, messages:) producer.async_publish_list(messages.map(&:encoded_hash)) - Deimos.config.metrics&.increment( - 'publish', - tags: %W(status:success topic:#{producer_class.topic}), - by: messages.size - ) + Karafka.producer.produce_many_async(messages) + Deimos.config.metrics&.increment( + 'publish', + tags: %W(status:success topic:#{messages.first[:topic]}), + by: messages.size + ) end end end diff --git a/lib/deimos/consume/batch_consumption.rb b/lib/deimos/consume/batch_consumption.rb index 5f0c5fd8..8e81af12 100644 --- a/lib/deimos/consume/batch_consumption.rb +++ b/lib/deimos/consume/batch_consumption.rb @@ -78,17 +78,11 @@ def _received_batch(payloads, metadata) # @param payloads [Array] # @param metadata [Hash] def _handle_batch_error(exception, payloads, metadata) - Deimos.config.metrics&.increment( - 'handler', - tags: %W( - status:batch_error - topic:#{metadata[:topic]} - )) Deimos::Logging.log_warn( message: 'Error consuming message batch', handler: self.class.name, - metadata: metadata.except(:keys), - message_ids: _payload_identifiers(payloads, metadata), + metadata: Deimos::Logging.metadata_log_text(messages.metadata), + messages: Deimos::Logging.messages_log_text(self.topic.payload_log, messages), error_message: exception.message, error: exception.backtrace ) @@ -100,31 +94,12 @@ def _handle_batch_error(exception, payloads, metadata) # @param payloads [Array] # @param metadata [Hash] def _handle_batch_success(time_taken, payloads, metadata) - Deimos.config.metrics&.histogram('handler', - time_taken, - tags: %W( - time:consume_batch - topic:#{metadata[:topic]} - )) - Deimos.config.metrics&.increment( - 'handler', - tags: %W( - status:batch_success - topic:#{metadata[:topic]} - )) - Deimos.config.metrics&.increment( - 'handler', - by: metadata[:batch_size], - tags: %W( - status:success - topic:#{metadata[:topic]} - )) - Deimos.config.logger.info( - message: 'Finished processing Kafka batch event', - message_ids: _payload_identifiers(payloads, metadata), - time_elapsed: time_taken, - metadata: metadata.except(:keys) - ) + Deimos::Logging.log_info( + { + message: 'Finished processing Kafka batch event', + time_elapsed: time_taken, + metadata: Deimos::Logging.metadata_log_text(messages.metadata) + }.merge(Deimos::Logging.messages_log_text(self.topic.payload_log, messages))) end # @!visibility private diff --git a/lib/deimos/consume/message_consumption.rb b/lib/deimos/consume/message_consumption.rb index e853f2ba..5c253a56 100644 --- a/lib/deimos/consume/message_consumption.rb +++ b/lib/deimos/consume/message_consumption.rb @@ -40,31 +40,19 @@ def _received_message(payload, metadata) Deimos::Logging.log_info( message: 'Got Kafka event', payload: payload, - metadata: metadata + metadata: Deimos::Logging.metadata_log_text(message.metadata) ) - Deimos.config.metrics&.increment('handler', tags: %W( - status:received - topic:#{metadata[:topic]} - )) - _report_time_delayed(payload, metadata) end # @param exception [Throwable] # @param payload [Hash] # @param metadata [Hash] def _handle_error(exception, payload, metadata) - Deimos.config.metrics&.increment( - 'handler', - tags: %W( - status:error - topic:#{metadata[:topic]} - ) - ) - Deimos.config.logger.warn( + Deimos::Logging.log_warn( message: 'Error consuming message', handler: self.class.name, - metadata: metadata, data: payload, + metadata: Deimos::Logging.metadata_log_text(message.metadata), error_message: exception.message, error: exception.backtrace ) @@ -76,19 +64,11 @@ def _handle_error(exception, payload, metadata) # @param payload [Hash] # @param metadata [Hash] def _handle_success(time_taken, payload, metadata) - Deimos.config.metrics&.histogram('handler', time_taken, tags: %W( - time:consume - topic:#{metadata[:topic]} - )) - Deimos.config.metrics&.increment('handler', tags: %W( - status:success - topic:#{metadata[:topic]} - )) Deimos::Logging.log_info( message: 'Finished processing Kafka event', payload: payload, time_elapsed: time_taken, - metadata: metadata + metadata: Deimos::Logging.metadata_log_text(message.metadata) ) end end diff --git a/lib/deimos/consumer.rb b/lib/deimos/consumer.rb index 99661c66..76d77848 100644 --- a/lib/deimos/consumer.rb +++ b/lib/deimos/consumer.rb @@ -31,23 +31,6 @@ def _with_span Deimos.config.tracer&.finish(@span) end - def _report_time_delayed(payload, metadata) - return if payload.nil? || payload['timestamp'].blank? - - begin - time_delayed = Time.now.in_time_zone - payload['timestamp'].to_datetime - rescue ArgumentError - Deimos.config.logger.info( - message: "Error parsing timestamp! #{payload['timestamp']}" - ) - return - end - Deimos.config.metrics&.histogram('handler', time_delayed, tags: %W( - time:time_delayed - topic:#{metadata[:topic]} - )) - end - # Overrideable method to determine if a given error should be considered # "fatal" and always be reraised. # @param _error [Exception] diff --git a/lib/deimos/metrics/datadog.rb b/lib/deimos/metrics/datadog.rb index 25e7980d..803f5102 100644 --- a/lib/deimos/metrics/datadog.rb +++ b/lib/deimos/metrics/datadog.rb @@ -1,12 +1,15 @@ # frozen_string_literal: true require 'deimos/metrics/provider' +require 'karafka/instrumentation/vendors/datadog/metrics_listener' +require 'waterdrop/instrumentation/vendors/datadog/metrics_listener' module Deimos module Metrics # A Metrics wrapper class for Datadog. class Datadog < Metrics::Provider - # @param config [Hash] + + # @param config [Hash] a hash of both client and Karakfa MetricsListener configs. # @param logger [Logger] def initialize(config, logger) raise 'Metrics config must specify host_ip' if config[:host_ip].nil? @@ -14,12 +17,48 @@ def initialize(config, logger) raise 'Metrics config must specify namespace' if config[:namespace].nil? logger.info("DatadogMetricsProvider configured with: #{config}") + @client = ::Datadog::Statsd.new( config[:host_ip], config[:host_port], tags: config[:tags], namespace: config[:namespace] ) + setup_karafka(config) + setup_waterdrop(config) + end + + def setup_karafka(config={}) + karafka_listener = ::Karafka::Instrumentation::Vendors::Datadog::MetricsListener.new do |karafka_config| + karafka_config.client = @client + if config[:karafka_namespace] + karafka_config.namespace = config[:karafka_namespace] + end + if config[:karafka_distribution_mode] + karafka_config.distribution_mode = config[:karafka_distribution_mode] + end + if config[:rd_kafka_metrics] + karafka_config.rd_kafka_metrics = config[:rd_kafka_metrics] + end + end + Karafka.monitor.subscribe(karafka_listener) + end + + def setup_waterdrop(config) + waterdrop_listener = ::WaterDrop::Instrumentation::Vendors::Datadog::MetricsListener.new do |waterdrop_config| + waterdrop_config.client = @client + if config[:karafka_namespace] + waterdrop_config.namespace = config[:karafka_namespace] + end + if config[:karafka_distribution_mode] + waterdrop_config.distribution_mode = config[:karafka_distribution_mode] + end + if config[:rd_kafka_metrics] + karafka_config.rd_kafka_metrics = [] # handled in Karafka + end + end + Karafka::Setup::Config.setup if Karafka.producer.nil? + Karafka.producer.monitor.subscribe(waterdrop_listener) end # :nodoc: diff --git a/lib/deimos/producer.rb b/lib/deimos/producer.rb index 8d477e95..fa710e9f 100644 --- a/lib/deimos/producer.rb +++ b/lib/deimos/producer.rb @@ -97,17 +97,10 @@ def publish_list(payloads, sync: nil, force_send: false, topic: self.topic, head raise 'Topic not specified. Please specify the topic.' if topic.blank? backend_class = determine_backend_class(sync, force_send) - Deimos.instrument( - 'encode_messages', - producer: self, - topic: topic, - payloads: payloads - ) do messages = Array(payloads).map { |p| Deimos::Message.new(p.to_h, self, headers: headers) } messages.each { |m| _process_message(m, topic) } messages.in_groups_of(self.config[:max_batch_size], false) do |batch| self.produce_batch(backend_class, batch) - end end end From c33168ee6fc982e25466e98fdad2fb6e0e5a57ef Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 14:32:24 -0400 Subject: [PATCH 09/18] Remaining consumer flow --- .../batch_consumption.rb | 24 ++--- .../message_consumption.rb | 33 +++--- lib/deimos/config/configuration.rb | 17 --- lib/deimos/consume/batch_consumption.rb | 102 +++--------------- lib/deimos/consume/message_consumption.rb | 64 +++++------ lib/deimos/consumer.rb | 30 +++--- 6 files changed, 81 insertions(+), 189 deletions(-) diff --git a/lib/deimos/active_record_consume/batch_consumption.rb b/lib/deimos/active_record_consume/batch_consumption.rb index 7907f5a1..d5454636 100644 --- a/lib/deimos/active_record_consume/batch_consumption.rb +++ b/lib/deimos/active_record_consume/batch_consumption.rb @@ -23,22 +23,18 @@ module BatchConsumption # If two messages in a batch have the same key, we cannot process them # in the same operation as they would interfere with each other. Thus # they are split - # @param payloads [Array] Decoded payloads - # @param metadata [Hash] Information about batch, including keys. # @return [void] - def consume_batch(payloads, metadata) - messages = payloads. - zip(metadata[:keys]). - map { |p, k| Deimos::Message.new(p, nil, key: k) } + def consume_batch + deimos_messages = messages.map { |p| Deimos::Message.new(p.payload, key: p.key) } - tag = metadata[:topic] + tag = topic.name Deimos.config.tracer.active_span.set_tag('topic', tag) - if @compacted || self.class.config[:no_keys] - update_database(compact_messages(messages)) Karafka.monitor.instrument('deimos.ar_consumer.consume_batch', {topic: tag}) do + if @compacted && deimos_messages.map(&:key).compact.any? + update_database(compact_messages(deimos_messages)) else - uncompacted_update(messages) + uncompacted_update(deimos_messages) end end end @@ -170,10 +166,10 @@ def upsert_records(messages) updater = MassUpdater.new(@klass, key_col_proc: key_col_proc, col_proc: col_proc, - replace_associations: self.class.replace_associations, - bulk_import_id_generator: self.class.bulk_import_id_generator, - save_associations_first: self.class.save_associations_first, - bulk_import_id_column: self.class.bulk_import_id_column) + replace_associations: self.replace_associations, + bulk_import_id_generator: self.bulk_import_id_generator, + save_associations_first: self.save_associations_first, + bulk_import_id_column: self.bulk_import_id_column) Karafka.monitor.instrument('deimos.batch_consumption.valid_records', { records: updater.mass_update(record_list), consumer: self.class diff --git a/lib/deimos/active_record_consume/message_consumption.rb b/lib/deimos/active_record_consume/message_consumption.rb index 9d58ff47..f01b297f 100644 --- a/lib/deimos/active_record_consume/message_consumption.rb +++ b/lib/deimos/active_record_consume/message_consumption.rb @@ -29,38 +29,29 @@ def assign_key(record, _payload, key) record[record.class.primary_key] = key end - # @param payload [Hash,Deimos::SchemaClass::Record] Decoded payloads - # @param metadata [Hash] Information about batch, including keys. - # @return [void] - def consume(payload, metadata) - unless self.process_message?(payload) - Deimos.config.logger.debug( - message: 'Skipping processing of message', - payload: payload, - metadata: metadata - ) + # @param message [Karafka::Messages::Message] + def consume_message(message) + unless self.process_message?(message) + Deimos::Logging.log_debug( + message: 'Skipping processing of message', + payload: message.payload.to_h, + metadata: Deimos::Logging.metadata_log_text(message.metadata) + ) return end - key = metadata.with_indifferent_access[:key] klass = self.class.config[:record_class] - record = fetch_record(klass, (payload || {}).with_indifferent_access, key) - if payload.nil? + record = fetch_record(klass, message.payload.to_h.with_indifferent_access, message.key) + if message.payload.nil? destroy_record(record) return end if record.blank? record = klass.new - assign_key(record, payload, key) + assign_key(record, message.payload, message.key) end - # for backwards compatibility - # TODO next major release we should deprecate this - attrs = if self.method(:record_attributes).parameters.size == 2 - record_attributes(payload.with_indifferent_access, key) - else - record_attributes(payload.with_indifferent_access) - end + attrs = record_attributes((message.payload || {}).with_indifferent_access, message.key) # don't use attributes= - bypass Rails < 5 attr_protected attrs.each do |k, v| record.send("#{k}=", v) diff --git a/lib/deimos/config/configuration.rb b/lib/deimos/config/configuration.rb index 1d10cb42..c2fa5a1e 100644 --- a/lib/deimos/config/configuration.rb +++ b/lib/deimos/config/configuration.rb @@ -14,7 +14,6 @@ module Deimos # rubocop:disable Metrics/ModuleLength if self.config.schema.use_schema_classes load_generated_schema_classes end - validate_consumers validate_db_backend if self.config.producers.backend == :db generate_key_schemas end @@ -38,27 +37,11 @@ def self.validate_db_backend end end - # Validate that consumers are configured correctly, including their - # delivery mode. - # @!visibility private - def self.validate_consumers - Phobos.config.listeners.each do |listener| - handler_class = listener.handler.constantize - delivery = listener.delivery - - next unless handler_class < Deimos::Consumer - - # Validate that each consumer implements the correct method for its type - if delivery == 'inline_batch' - if handler_class.instance_method(:consume_batch).owner == Deimos::Consume::BatchConsumption - raise "BatchConsumer #{listener.handler} does not implement `consume_batch`" if transcoder.respond_to?(:key_field) && transcoder.key_field transcoder.backend = Deimos.schema_backend(schema: config.schema, namespace: config.namespace) transcoder.backend.generate_key_schema(transcoder.key_field) end - elsif handler_class.instance_method(:consume).owner == Deimos::Consume::MessageConsumption - raise "Non-batch Consumer #{listener.handler} does not implement `consume`" end end diff --git a/lib/deimos/consume/batch_consumption.rb b/lib/deimos/consume/batch_consumption.rb index 8e81af12..ffbfdee9 100644 --- a/lib/deimos/consume/batch_consumption.rb +++ b/lib/deimos/consume/batch_consumption.rb @@ -8,76 +8,28 @@ module Consume module BatchConsumption extend ActiveSupport::Concern - # @param batch [Array] - # @param metadata [Hash] - # @return [void] - def around_consume_batch(batch, metadata) - payloads = [] - _with_span do - benchmark = Benchmark.measure do - if self.class.config[:key_configured] - metadata[:keys] = batch.map do |message| - decode_key(message.key) - end - end - metadata[:first_offset] = batch.first&.offset - - payloads = batch.map do |message| - decode_message(message.payload) - end - _received_batch(payloads, metadata) - yield(payloads, metadata) - end - _handle_batch_success(benchmark.real, payloads, metadata) - end - rescue StandardError => e - _handle_batch_error(e, payloads, metadata) - end - - # Consume a batch of incoming messages. - # @param _payloads [Array] - # @param _metadata [Hash] - # @return [void] - def consume_batch(_payloads, _metadata) + def consume_batch raise MissingImplementationError end protected - # @!visibility private - def _received_batch(payloads, metadata) - Deimos.config.logger.info( - message: 'Got Kafka batch event', - message_ids: _payload_identifiers(payloads, metadata), - metadata: metadata.except(:keys) - ) - Deimos.config.logger.debug( - message: 'Kafka batch event payloads', - payloads: payloads - ) - Deimos.config.metrics&.increment( - 'handler', - tags: %W( - status:batch_received - topic:#{metadata[:topic]} - )) - Deimos.config.metrics&.increment( - 'handler', - by: metadata[:batch_size], - tags: %W( - status:received - topic:#{metadata[:topic]} - )) - if payloads.present? - payloads.each { |payload| _report_time_delayed(payload, metadata) } + def _consume_batch + _with_span do + begin + benchmark = Benchmark.measure do + consume_batch + end + _handle_batch_success(benchmark.real) + rescue StandardError => e + _handle_batch_error(e) + end end end # @!visibility private # @param exception [Throwable] - # @param payloads [Array] - # @param metadata [Hash] - def _handle_batch_error(exception, payloads, metadata) + def _handle_batch_error(exception) Deimos::Logging.log_warn( message: 'Error consuming message batch', handler: self.class.name, @@ -86,14 +38,12 @@ def _handle_batch_error(exception, payloads, metadata) error_message: exception.message, error: exception.backtrace ) - _error(exception, payloads, metadata) + _error(exception, messages) end # @!visibility private # @param time_taken [Float] - # @param payloads [Array] - # @param metadata [Hash] - def _handle_batch_success(time_taken, payloads, metadata) + def _handle_batch_success(time_taken) Deimos::Logging.log_info( { message: 'Finished processing Kafka batch event', @@ -102,30 +52,6 @@ def _handle_batch_success(time_taken, payloads, metadata) }.merge(Deimos::Logging.messages_log_text(self.topic.payload_log, messages))) end - # @!visibility private - # Get payload identifiers (key and message_id if present) for logging. - # @param payloads [Array] - # @param metadata [Hash] - # @return [Array] the identifiers. - def _payload_identifiers(payloads, metadata) - message_ids = payloads&.map do |payload| - if payload.is_a?(Hash) && payload.key?('message_id') - payload['message_id'] - end - end - - # Payloads may be nil if preprocessing failed - messages = payloads || metadata[:keys] || [] - - messages.zip(metadata[:keys] || [], message_ids || []).map do |_, k, m_id| - ids = {} - - ids[:key] = k if k.present? - ids[:message_id] = m_id if m_id.present? - - ids - end - end end end end diff --git a/lib/deimos/consume/message_consumption.rb b/lib/deimos/consume/message_consumption.rb index 5c253a56..e2613358 100644 --- a/lib/deimos/consume/message_consumption.rb +++ b/lib/deimos/consume/message_consumption.rb @@ -7,67 +7,61 @@ module Consume module MessageConsumption extend ActiveSupport::Concern - # @param payload [String] - # @param metadata [Hash] - # @return [void] - def around_consume(payload, metadata) - decoded_payload = payload.nil? ? nil : payload.dup - new_metadata = metadata.dup - benchmark = Benchmark.measure do - _with_span do - new_metadata[:key] = decode_key(metadata[:key]) if self.class.config[:key_configured] - decoded_payload = decode_message(payload) - _received_message(decoded_payload, new_metadata) - yield(decoded_payload, new_metadata) - end - end - _handle_success(benchmark.real, decoded_payload, new_metadata) - rescue StandardError => e - _handle_error(e, decoded_payload, new_metadata) - end - # Consume incoming messages. - # @param _payload [String] - # @param _metadata [Hash] + # @param _message [Karafka::Messages::Message] # @return [void] - def consume(_payload, _metadata) + def consume_message(_message) raise MissingImplementationError end private - def _received_message(payload, metadata) + def _consume_messages + messages.each do |message| + begin + _with_span do + _received_message(message) + benchmark = Benchmark.measure do + consume_message(message) + end + _handle_success(message, benchmark.real) + rescue StandardError => e + _handle_message_error(e, message) + end + end + end + end + + def _received_message(message) Deimos::Logging.log_info( message: 'Got Kafka event', - payload: payload, + payload: message.payload, metadata: Deimos::Logging.metadata_log_text(message.metadata) ) end # @param exception [Throwable] - # @param payload [Hash] - # @param metadata [Hash] - def _handle_error(exception, payload, metadata) + # @param message [Karafka::Messages::Message] + def _handle_message_error(exception, message) Deimos::Logging.log_warn( message: 'Error consuming message', handler: self.class.name, - data: payload, metadata: Deimos::Logging.metadata_log_text(message.metadata), + key: message.key, + data: message.payload, error_message: exception.message, error: exception.backtrace ) - _error(exception, payload, metadata) + _error(exception, Karafka::Messages::Messages.new([message], messages.metadata)) end - # @param time_taken [Float] - # @param payload [Hash] - # @param metadata [Hash] - def _handle_success(time_taken, payload, metadata) + def _handle_success(message, benchmark) + mark_as_consumed(message) Deimos::Logging.log_info( message: 'Finished processing Kafka event', - payload: payload, - time_elapsed: time_taken, + payload: message.payload, + time_elapsed: benchmark, metadata: Deimos::Logging.metadata_log_text(message.metadata) ) end diff --git a/lib/deimos/consumer.rb b/lib/deimos/consumer.rb index 76d77848..64be46c3 100644 --- a/lib/deimos/consumer.rb +++ b/lib/deimos/consumer.rb @@ -7,15 +7,18 @@ # Note: According to the docs, instances of your handler will be created # for every incoming message/batch. This class should be lightweight. module Deimos - # Basic consumer class. Inherit from this class and override either consume - # or consume_batch, depending on the delivery mode of your listener. - # `consume` -> use `delivery :message` or `delivery :batch` - # `consume_batch` -> use `delivery :inline_batch` - class Consumer + # Basic consumer class. Inherit from this class and override either consume_message + # or consume_batch, depending on the `:batch` config setting. + class Consumer < Karafka::BaseConsumer include Consume::MessageConsumption include Consume::BatchConsumption include SharedConfig + def consume + if self.topic.each_message + _consume_messages + else + _consume_batch end end @@ -34,22 +37,21 @@ def _with_span # Overrideable method to determine if a given error should be considered # "fatal" and always be reraised. # @param _error [Exception] - # @param _payload [Hash] - # @param _metadata [Hash] + # @param _messages [Array] # @return [Boolean] - def fatal_error?(_error, _payload, _metadata) + def fatal_error?(_error, _messages) false end # @param exception [Exception] - # @param payload [Hash] - # @param metadata [Hash] - def _error(exception, payload, metadata) + # @param messages [Array] + def _error(exception, messages) Deimos.config.tracer&.set_error(@span, exception) - raise if Deimos.config.consumers.reraise_errors || - Deimos.config.consumers.fatal_error&.call(exception, payload, metadata) || - fatal_error?(exception, payload, metadata) + raise if self.topic.reraise_errors || + Deimos.config.consumers.fatal_error&.call(exception, messages) || + fatal_error?(exception, messages) end + end end From eb34830eb3d479287a14085516730a5aba3e3c89 Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 14:33:39 -0400 Subject: [PATCH 10/18] Remaining producer flow --- lib/deimos.rb | 10 +++ lib/deimos/active_record_producer.rb | 8 +++ lib/deimos/backends/base.rb | 2 +- lib/deimos/backends/kafka.rb | 2 +- lib/deimos/backends/kafka_async.rb | 1 - lib/deimos/ext/producer_middleware.rb | 94 +++++++++++++++++++++++++++ lib/deimos/kafka_source.rb | 7 +- lib/deimos/message.rb | 13 +--- lib/deimos/producer.rb | 85 +++++++++--------------- 9 files changed, 148 insertions(+), 74 deletions(-) create mode 100644 lib/deimos/ext/producer_middleware.rb diff --git a/lib/deimos.rb b/lib/deimos.rb index 97189fb1..687bd451 100644 --- a/lib/deimos.rb +++ b/lib/deimos.rb @@ -24,6 +24,7 @@ require 'deimos/ext/schema_route' require 'deimos/ext/consumer_route' require 'deimos/ext/producer_route' +require 'deimos/ext/producer_middleware' require 'deimos/railtie' if defined?(Rails) @@ -138,8 +139,17 @@ def start_db_backend!(thread_count: 1) end def setup_karafka + Karafka.producer.middleware.append(Deimos::ProducerMiddleware) EVENT_TYPES.each { |type| Karafka.monitor.notifications_bus.register_event(type) } + Karafka.producer.monitor.subscribe('error.occurred') do |event| + if event.payload.key?(:messages) + topic = event[:messages].first[:topic] + config = Deimos.karafka_config_for(topic: topic) + message = Deimos::Logging.messages_log_text(config&.payload_log, event[:messages]) + Karafka.logger.error("Error producing messages: #{event[:error].message} #{message.to_json}") + end + end end # @return [Array] fields to check for updates + def watched_attributes(_record) + self.encoder.schema_fields.map(&:name) + end + end end end diff --git a/lib/deimos/backends/base.rb b/lib/deimos/backends/base.rb index cd68d9e6..ddf83e60 100644 --- a/lib/deimos/backends/base.rb +++ b/lib/deimos/backends/base.rb @@ -6,7 +6,7 @@ module Backends class Base class << self # @param producer_class [Class] - # @param messages [Array] + # @param messages [Array] # @return [void] def publish(producer_class:, messages:) message = ::Deimos::Logging.messages_log_text(producer_class.karafka_config.payload_log, messages) diff --git a/lib/deimos/backends/kafka.rb b/lib/deimos/backends/kafka.rb index 84347ad4..90793889 100644 --- a/lib/deimos/backends/kafka.rb +++ b/lib/deimos/backends/kafka.rb @@ -6,7 +6,7 @@ module Backends class Kafka < Base # :nodoc: def self.execute(producer_class:, messages:) - producer.publish_list(messages.map(&:encoded_hash)) + Karafka.producer.produce_many_sync(messages) Deimos.config.metrics&.increment( 'publish', tags: %W(status:success topic:#{messages.first[:topic]}), diff --git a/lib/deimos/backends/kafka_async.rb b/lib/deimos/backends/kafka_async.rb index 06ad1b2e..1bd558ab 100644 --- a/lib/deimos/backends/kafka_async.rb +++ b/lib/deimos/backends/kafka_async.rb @@ -6,7 +6,6 @@ module Backends class KafkaAsync < Base # :nodoc: def self.execute(producer_class:, messages:) - producer.async_publish_list(messages.map(&:encoded_hash)) Karafka.producer.produce_many_async(messages) Deimos.config.metrics&.increment( 'publish', diff --git a/lib/deimos/ext/producer_middleware.rb b/lib/deimos/ext/producer_middleware.rb new file mode 100644 index 00000000..6c960e70 --- /dev/null +++ b/lib/deimos/ext/producer_middleware.rb @@ -0,0 +1,94 @@ +module Deimos + + module ProducerMiddleware + class << self + + def call(message) + Karafka.monitor.instrument( + 'deimos.encode_message', + producer: self, + message: message + ) do + config = Deimos.karafka_config_for(topic: message[:topic]) + return message if config.nil? + return if message[:payload] && !message[:payload].is_a?(Hash) && !message[:payload].is_a?(SchemaClass::Record) + + m = Deimos::Message.new(message[:payload].to_h, + headers: message[:headers], + partition_key: message[:partition_key]) + _process_message(m, message, config) + message[:payload] = m.encoded_payload + message[:key] = m.encoded_key + message[:partition_key] = if m.partition_key + m.partition_key.to_s + elsif m.key + m.key.to_s + else + nil + end + message[:topic] = "#{Deimos.config.producers.topic_prefix}#{config.name}" + + validate_key_config(config, message) + + message + end + end + + def validate_key_config(config, message) + if message[:key].nil? && config.deserializers[:key].is_a?(Deimos::Transcoder) + raise 'No key given but a key is required! Use `key_config none: true` to avoid using keys.' + end + end + + # @param message [Deimos::Message] + # @param karafka_message [Hash] + # @param config [Deimos::ProducerConfig] + def _process_message(message, karafka_message, config) + encoder = config.deserializers[:payload].backend + key_transcoder = config.deserializers[:key] + # this violates the Law of Demeter but it has to happen in a very + # specific order and requires a bunch of methods on the producer + # to work correctly. + message.add_fields(encoder.schema_fields.map(&:name)) + message.key = karafka_message[:key] || _retrieve_key(message.payload, key_transcoder) + # need to do this before _coerce_fields because that might result + # in an empty payload which is an *error* whereas this is intended. + message.payload = nil if message.payload.blank? + message.coerce_fields(encoder) + message.encoded_key = _encode_key(message.key, config) + message.topic = config.name + message.encoded_payload = if message.payload.nil? + nil + else + encoder.encode(message.payload, + topic: "#{Deimos.config.producers.topic_prefix}#{config.name}-value") + end + end + + # @param key [Object] + # @param config [ProducerConfig] + # @return [String|Object] + def _encode_key(key, config) + return nil if key.nil? + + if config.deserializers[:key].respond_to?(:encode_key) + config.deserializers[:key].encode_key(key) + elsif key + config.deserializers[:payload].encode(key) + else + key + end + end + + # @param payload [Hash] + # @param key_transcoder [Deimos::Transcoder] + # @return [String] + def _retrieve_key(payload, key_transcoder) + key = payload.delete(:payload_key) + return key if key || !key_transcoder.respond_to?(:key_field) + + key_transcoder.key_field ? payload[key_transcoder.key_field] : nil + end + end + end +end diff --git a/lib/deimos/kafka_source.rb b/lib/deimos/kafka_source.rb index f1e92561..63ced2ef 100644 --- a/lib/deimos/kafka_source.rb +++ b/lib/deimos/kafka_source.rb @@ -31,7 +31,7 @@ def send_kafka_event_on_update return unless self.class.kafka_config[:update] producers = self.class.kafka_producers - fields = producers.flat_map(&:watched_attributes).uniq + fields = producers.flat_map { |p| p.watched_attributes(self) }.uniq fields -= ['updated_at'] # Only send an event if a field we care about was changed. any_changes = fields.any? do |field| @@ -71,11 +71,6 @@ def kafka_config # @return [Array] the producers to run. def kafka_producers - if self.respond_to?(:kafka_producer) - Deimos.config.logger.warn(message: DEPRECATION_WARNING) - return [self.kafka_producer] - end - raise MissingImplementationError end diff --git a/lib/deimos/message.rb b/lib/deimos/message.rb index 2e5cf1f8..1fd47c40 100644 --- a/lib/deimos/message.rb +++ b/lib/deimos/message.rb @@ -17,17 +17,13 @@ class Message attr_accessor :encoded_payload # @return [String] attr_accessor :topic - # @return [String] - attr_accessor :producer_name # @param payload [Hash] - # @param producer [Class] # @param topic [String] # @param key [String, Integer, Hash] # @param partition_key [Integer] - def initialize(payload, producer, topic: nil, key: nil, headers: nil, partition_key: nil) + def initialize(payload, topic: nil, key: nil, headers: nil, partition_key: nil) @payload = payload&.with_indifferent_access - @producer_name = producer&.name @topic = topic @key = key @headers = headers&.with_indifferent_access @@ -64,11 +60,7 @@ def encoded_hash key: @encoded_key, headers: @headers, partition_key: @partition_key || @encoded_key, - payload: @encoded_payload, - metadata: { - decoded_payload: @payload, - producer_name: @producer_name - } + payload: @encoded_payload }.delete_if { |k, v| k == :headers && v.nil? } end @@ -82,7 +74,6 @@ def to_h payload: @payload, metadata: { decoded_payload: @payload, - producer_name: @producer_name } }.delete_if { |k, v| k == :headers && v.nil? } end diff --git a/lib/deimos/producer.rb b/lib/deimos/producer.rb index fa710e9f..993c33bb 100644 --- a/lib/deimos/producer.rb +++ b/lib/deimos/producer.rb @@ -61,6 +61,9 @@ def producers_disabled?(producer_class=nil) class Producer include SharedConfig + # @return [Integer] + MAX_BATCH_SIZE = 500 + class << self # Override the default partition key (which is the payload key). @@ -77,7 +80,22 @@ def partition_key(_payload) # @param headers [Hash] if specifying headers # @return [void] def publish(payload, topic: self.topic, headers: nil) - publish_list([payload], topic: topic, headers: headers) + produce([{payload: payload, topic: topic, headers: headers}]) + end + + # Produce a list of messages in WaterDrop message hash format. + # @param messages [Array] + # @param backend [Class < Deimos::Backend] + def produce(messages, backend: determine_backend_class) + return if Deimos.producers_disabled?(self) + + messages.each do |m| + m[:label] = m + m[:partition_key] ||= self.partition_key(m[:payload]) + end + messages.in_groups_of(MAX_BATCH_SIZE, false) do |batch| + self.produce_batch(backend, batch) + end end # Publish a list of messages. @@ -90,18 +108,17 @@ def publish(payload, topic: self.topic, headers: nil) # @param headers [Hash] if specifying headers # @return [void] def publish_list(payloads, sync: nil, force_send: false, topic: self.topic, headers: nil) - return if Deimos.config.kafka.seed_brokers.blank? || - Deimos.config.producers.disabled || - Deimos.producers_disabled?(self) - - raise 'Topic not specified. Please specify the topic.' if topic.blank? - - backend_class = determine_backend_class(sync, force_send) - messages = Array(payloads).map { |p| Deimos::Message.new(p.to_h, self, headers: headers) } - messages.each { |m| _process_message(m, topic) } - messages.in_groups_of(self.config[:max_batch_size], false) do |batch| - self.produce_batch(backend_class, batch) + backend = determine_backend_class(sync, force_send) + + messages = Array(payloads).map do |p| + { + payload: p&.to_h, + headers: headers, + topic: topic, + partition_key: self.partition_key(p) + } end + self.produce(messages, backend: backend) end def karafka_config @@ -115,7 +132,7 @@ def topic # @param sync [Boolean] # @param force_send [Boolean] # @return [Class] - def determine_backend_class(sync, force_send) + def determine_backend_class(sync=false, force_send=false) backend = if force_send :kafka else @@ -131,52 +148,12 @@ def determine_backend_class(sync, force_send) # Send a batch to the backend. # @param backend [Class] - # @param batch [Array] + # @param batch [Array] # @return [void] def produce_batch(backend, batch) backend.publish(producer_class: self, messages: batch) end - # Override this in active record producers to add - # non-schema fields to check for updates - # @return [Array] fields to check for updates - def watched_attributes - self.encoder.schema_fields.map(&:name) - end - - private - - # @param message [Message] - # @param topic [String] - def _process_message(message, topic) - # this violates the Law of Demeter but it has to happen in a very - # specific order and requires a bunch of methods on the producer - # to work correctly. - message.add_fields(encoder.schema_fields.map(&:name)) - message.partition_key = self.partition_key(message.payload) - message.key = _retrieve_key(message.payload) - # need to do this before _coerce_fields because that might result - # in an empty payload which is an *error* whereas this is intended. - message.payload = nil if message.payload.blank? - message.coerce_fields(encoder) - message.encoded_key = _encode_key(message.key) - message.topic = topic - message.encoded_payload = if message.payload.nil? - nil - else - encoder.encode(message.payload, - topic: "#{Deimos.config.producers.topic_prefix}#{config[:topic]}-value") - end - end - - # @param payload [Hash] - # @return [String] - def _retrieve_key(payload) - key = payload.delete(:payload_key) - return key if key - - config[:key_field] ? payload[config[:key_field]] : nil - end end end end From 51b9571958bd4225a4ce2ee7ad06f871240e04f0 Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 14:34:35 -0400 Subject: [PATCH 11/18] Rename DB backend to Outbox backend --- lib/deimos.rb | 18 +- lib/deimos/backends/{db.rb => outbox.rb} | 22 +- lib/deimos/config/configuration.rb | 22 +- lib/deimos/kafka_topic_info.rb | 2 +- lib/deimos/utils/outbox_producer.rb | 229 ++++++++++++++++++ .../templates/migration | 0 .../templates/rails3_migration | 0 ...nerator.rb => outbox_backend_generator.rb} | 8 +- lib/tasks/deimos.rake | 8 +- 9 files changed, 267 insertions(+), 42 deletions(-) rename lib/deimos/backends/{db.rb => outbox.rb} (65%) create mode 100644 lib/deimos/utils/outbox_producer.rb rename lib/generators/deimos/{db_backend => outbox_backend}/templates/migration (100%) rename lib/generators/deimos/{db_backend => outbox_backend}/templates/rails3_migration (100%) rename lib/generators/deimos/{db_backend_generator.rb => outbox_backend_generator.rb} (80%) diff --git a/lib/deimos.rb b/lib/deimos.rb index 687bd451..b336c805 100644 --- a/lib/deimos.rb +++ b/lib/deimos.rb @@ -31,9 +31,9 @@ if defined?(ActiveRecord) require 'deimos/kafka_source' require 'deimos/kafka_topic_info' - require 'deimos/backends/db' + require 'deimos/backends/outbox' require 'sigurd' - require 'deimos/utils/db_producer' + require 'deimos/utils/outbox_producer' require 'deimos/utils/db_poller' end @@ -115,10 +115,10 @@ def decode_message(message) # Start the DB producers to send Kafka messages. # @param thread_count [Integer] the number of threads to start. # @return [void] - def start_db_backend!(thread_count: 1) + def start_outbox_backend!(thread_count: 1) Sigurd.exit_on_signal = true - if self.config.producers.backend != :db - raise('Publish backend is not set to :db, exiting') + if self.config.producers.backend != :outbox + raise('Publish backend is not set to :outbox, exiting') end if thread_count.nil? || thread_count.zero? @@ -126,17 +126,15 @@ def start_db_backend!(thread_count: 1) end producers = (1..thread_count).map do - Deimos::Utils::DbProducer. - new(self.config.db_producer.logger || self.config.logger) + Deimos::Utils::OutboxProducer. + new(self.config.outbox.logger || Karafka.logger) end executor = Sigurd::Executor.new(producers, sleep_seconds: 5, - logger: self.config.logger) + logger: Karafka.logger) signal_handler = Sigurd::SignalHandler.new(executor) signal_handler.run! end - end -end def setup_karafka Karafka.producer.middleware.append(Deimos::ProducerMiddleware) diff --git a/lib/deimos/backends/db.rb b/lib/deimos/backends/outbox.rb similarity index 65% rename from lib/deimos/backends/db.rb rename to lib/deimos/backends/outbox.rb index 2561a5bf..793b370e 100644 --- a/lib/deimos/backends/db.rb +++ b/lib/deimos/backends/outbox.rb @@ -6,22 +6,23 @@ module Deimos module Backends # Backend which saves messages to the database instead of immediately # sending them. - class Db < Base + class Outbox < Base class << self # :nodoc: def execute(producer_class:, messages:) records = messages.map do |m| + Deimos::ProducerMiddleware.call(m) message = Deimos::KafkaMessage.new( - message: m.encoded_payload ? m.encoded_payload.to_s.b : nil, - topic: m.topic, + message: m[:payload] ? m[:payload].to_s.b : nil, + topic: m[:topic], partition_key: partition_key_for(m) ) - message.key = m.encoded_key.to_s.b unless producer_class.config[:no_keys] + message.key = m[:key].to_s.b if m[:key] message end Deimos::KafkaMessage.import(records) Deimos.config.metrics&.increment( - 'db_producer.insert', + 'outbox.insert', tags: %W(topic:#{producer_class.topic}), by: records.size ) @@ -30,10 +31,13 @@ def execute(producer_class:, messages:) # @param message [Deimos::Message] # @return [String] the partition key to use for this message def partition_key_for(message) - return message.partition_key if message.partition_key.present? - return message.key unless message.key.is_a?(Hash) - - message.key.to_yaml + if message[:partition_key].present? + message[:partition_key] + elsif message[:key].present? + message[:key].to_s.b + else + nil + end end end end diff --git a/lib/deimos/config/configuration.rb b/lib/deimos/config/configuration.rb index c2fa5a1e..173591ec 100644 --- a/lib/deimos/config/configuration.rb +++ b/lib/deimos/config/configuration.rb @@ -14,8 +14,8 @@ module Deimos # rubocop:disable Metrics/ModuleLength if self.config.schema.use_schema_classes load_generated_schema_classes end - validate_db_backend if self.config.producers.backend == :db generate_key_schemas + validate_outbox_backend if self.config.producers.backend == :outbox end class << self @@ -24,19 +24,6 @@ def generate_key_schemas Deimos.karafka_configs.each do |config| transcoder = config.deserializers[:key] - # Ensure everything is set up correctly for the DB backend. - # @!visibility private - def self.validate_db_backend - begin - require 'activerecord-import' - rescue LoadError - raise 'Cannot set producers.backend to :db without activerecord-import! Please add it to your Gemfile.' - end - if Deimos.config.producers.required_acks != :all - raise 'Cannot set producers.backend to :db unless producers.required_acks is set to ":all"!' - end - end - if transcoder.respond_to?(:key_field) && transcoder.key_field transcoder.backend = Deimos.schema_backend(schema: config.schema, namespace: config.namespace) @@ -57,6 +44,13 @@ def load_generated_schema_classes raise 'Cannot load schema classes. Please regenerate classes with rake deimos:generate_schema_models.' end + # Ensure everything is set up correctly for the DB backend. + # @!visibility private + def validate_outbox_backend + begin + require 'activerecord-import' + rescue LoadError + raise 'Cannot set producers.backend to :outbox without activerecord-import! Please add it to your Gemfile.' end end end diff --git a/lib/deimos/kafka_topic_info.rb b/lib/deimos/kafka_topic_info.rb index 7697742f..7da971ad 100644 --- a/lib/deimos/kafka_topic_info.rb +++ b/lib/deimos/kafka_topic_info.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module Deimos - # Record that keeps track of which topics are being worked on by DbProducers. + # Record that keeps track of which topics are being worked on by OutboxProducers. class KafkaTopicInfo < ActiveRecord::Base self.table_name = 'kafka_topic_info' diff --git a/lib/deimos/utils/outbox_producer.rb b/lib/deimos/utils/outbox_producer.rb new file mode 100644 index 00000000..83f4e9d8 --- /dev/null +++ b/lib/deimos/utils/outbox_producer.rb @@ -0,0 +1,229 @@ +# frozen_string_literal: true + +module Deimos + module Utils + # Class which continually polls the kafka_messages table + # in the database and sends Kafka messages. + class OutboxProducer + attr_accessor :id, :current_topic + + # @return [Integer] + BATCH_SIZE = 1000 + # @return [Integer] + DELETE_BATCH_SIZE = 10 + # @return [Integer] + MAX_DELETE_ATTEMPTS = 3 + # @return [Array] + FATAL_CODES = %i(invalid_msg_size msg_size_too_large) + + # @param logger [Logger] + def initialize(logger=Logger.new(STDOUT)) + @id = SecureRandom.uuid + @logger = logger + @logger.push_tags("OutboxProducer #{@id}") if @logger.respond_to?(:push_tags) + end + + # @return [FigTree] + def config + Deimos.config.outbox + end + + # Start the poll. + # @return [void] + def start + @logger.info('Starting...') + @signal_to_stop = false + ActiveRecord::Base.connection.reconnect! + loop do + if @signal_to_stop + @logger.info('Shutting down') + break + end + send_pending_metrics + process_next_messages + end + end + + # Stop the poll. + # @return [void] + def stop + @logger.info('Received signal to stop') + @signal_to_stop = true + end + + # Complete one loop of processing all messages in the DB. + # @return [void] + def process_next_messages + topics = retrieve_topics + @logger.info("Found topics: #{topics}") + topics.each(&method(:process_topic)) + KafkaTopicInfo.ping_empty_topics(topics) + sleep(0.5) + end + + # @return [Array] + def retrieve_topics + KafkaMessage.select('distinct topic').map(&:topic).uniq + end + + # @param topic [String] + # @return [String, nil] the topic that was locked, or nil if none were. + def process_topic(topic) + # If the topic is already locked, another producer is currently + # working on it. Move on to the next one. + unless KafkaTopicInfo.lock(topic, @id) + @logger.debug("Could not lock topic #{topic} - continuing") + return + end + @current_topic = topic + + loop { break unless process_topic_batch } + + KafkaTopicInfo.clear_lock(@current_topic, @id) + rescue StandardError => e + @logger.error("Error processing messages for topic #{@current_topic}: #{e.class.name}: #{e.message} #{e.backtrace.join("\n")}") + KafkaTopicInfo.register_error(@current_topic, @id) + end + + # Process a single batch in a topic. + # @return [void] + def process_topic_batch + messages = retrieve_messages + return false if messages.empty? + + batch_size = messages.size + compacted_messages = compact_messages(messages) + log_messages(compacted_messages) + Karafka.monitor.instrument('deimos.outbox.produce', topic: @current_topic, messages: compacted_messages) do + begin + produce_messages(compacted_messages.map(&:karafka_message)) + rescue WaterDrop::Errors::ProduceManyError => e + if FATAL_CODES.include?(e.cause.try(:code)) + @logger.error('Message batch too large, deleting...') + delete_messages(messages) + raise e + else + Deimos.log_error("Got error #{e.cause.class.name} when publishing #{batch_size} messages, retrying...") + retry + end + end + end + delete_messages(messages) + Deimos.config.metrics&.increment( + 'outbox.process', + tags: %W(topic:#{@current_topic}), + by: messages.size + ) + return false if batch_size < BATCH_SIZE + + KafkaTopicInfo.heartbeat(@current_topic, @id) # keep alive + send_pending_metrics + true + end + + # @param messages [Array] + # @return [void] + def delete_messages(messages) + attempts = 1 + begin + messages.in_groups_of(DELETE_BATCH_SIZE, false).each do |batch| + Deimos::KafkaMessage.where(topic: batch.first.topic, + id: batch.map(&:id)). + delete_all + end + rescue StandardError => e + if (e.message =~ /Lock wait/i || e.message =~ /Lost connection/i) && + attempts <= MAX_DELETE_ATTEMPTS + attempts += 1 + ActiveRecord::Base.connection.verify! + sleep(1) + retry + end + raise + end + end + + # @return [Array] + def retrieve_messages + KafkaMessage.where(topic: @current_topic).order(:id).limit(BATCH_SIZE) + end + + # @param messages [Array] + # @return [void] + def log_messages(messages) + return if config.log_topics != :all && !config.log_topics.include?(@current_topic) + + @logger.debug do + decoded_messages = Deimos::KafkaMessage.decoded(messages) + "DB producer: Topic #{@current_topic} Producing messages: #{decoded_messages}}" + end + end + + # Send metrics related to pending messages. + # @return [void] + def send_pending_metrics + metrics = Deimos.config.metrics + return unless metrics + + topics = KafkaTopicInfo.select(%w(topic last_processed_at)) + messages = Deimos::KafkaMessage. + select('count(*) as num_messages, min(created_at) as earliest, topic'). + group(:topic). + index_by(&:topic) + topics.each do |record| + message_record = messages[record.topic] + # We want to record the last time we saw any activity, meaning either + # the oldest message, or the last time we processed, whichever comes + # last. + if message_record + record_earliest = message_record.earliest + # SQLite gives a string here + if record_earliest.is_a?(String) + record_earliest = Time.zone.parse(record_earliest) + end + + earliest = [record.last_processed_at, record_earliest].max + time_diff = Time.zone.now - earliest + metrics.gauge('pending_db_messages_max_wait', time_diff, + tags: ["topic:#{record.topic}"]) + else + # no messages waiting + metrics.gauge('pending_db_messages_max_wait', 0, + tags: ["topic:#{record.topic}"]) + end + metrics.gauge('pending_db_messages_count', message_record&.num_messages || 0, + tags: ["topic:#{record.topic}"]) + end + end + + # Produce messages in batches, reducing the size 1/10 if the batch is too + # large. Does not retry batches of messages that have already been sent. + # @param batch [Array] + # @return [void] + def produce_messages(batch) + batch_size = batch.size + current_index = 0 + begin + batch[current_index..-1].in_groups_of(batch_size, false).each do |group| + @logger.debug("Publishing #{group.size} messages to #{@current_topic}") + Karafka.producer.produce_many_sync(group) + current_index += group.size + @logger.info("Sent #{group.size} messages to #{@current_topic}") + end + end + end + + # @param batch [Array] + # @return [Array] + def compact_messages(batch) + return batch if batch.first&.key.blank? + + topic = batch.first.topic + return batch if config.compact_topics != :all && + !config.compact_topics.include?(topic) + + batch.reverse.uniq(&:key).reverse! + end + end + end +end diff --git a/lib/generators/deimos/db_backend/templates/migration b/lib/generators/deimos/outbox_backend/templates/migration similarity index 100% rename from lib/generators/deimos/db_backend/templates/migration rename to lib/generators/deimos/outbox_backend/templates/migration diff --git a/lib/generators/deimos/db_backend/templates/rails3_migration b/lib/generators/deimos/outbox_backend/templates/rails3_migration similarity index 100% rename from lib/generators/deimos/db_backend/templates/rails3_migration rename to lib/generators/deimos/outbox_backend/templates/rails3_migration diff --git a/lib/generators/deimos/db_backend_generator.rb b/lib/generators/deimos/outbox_backend_generator.rb similarity index 80% rename from lib/generators/deimos/db_backend_generator.rb rename to lib/generators/deimos/outbox_backend_generator.rb index b16a1a0f..2db58b29 100644 --- a/lib/generators/deimos/db_backend_generator.rb +++ b/lib/generators/deimos/outbox_backend_generator.rb @@ -6,14 +6,14 @@ module Deimos module Generators # Generate the database backend migration. - class DbBackendGenerator < Rails::Generators::Base + class OutboxBackendGenerator < Rails::Generators::Base include Rails::Generators::Migration if Rails.version < '4' extend(ActiveRecord::Generators::Migration) else include ActiveRecord::Generators::Migration end - source_root File.expand_path('db_backend/templates', __dir__) + source_root File.expand_path('outbox_backend/templates', __dir__) desc 'Add migrations for the database backend' # @return [String] @@ -38,10 +38,10 @@ def db_migrate_path def generate if Rails.version < '4' migration_template('rails3_migration', - "#{db_migrate_path}/create_db_backend.rb") + "#{db_migrate_path}/create_outbox_backend.rb") else migration_template('migration', - "#{db_migrate_path}/create_db_backend.rb") + "#{db_migrate_path}/create_outbox_backend.rb") end end end diff --git a/lib/tasks/deimos.rake b/lib/tasks/deimos.rake index 86e937e7..36782433 100644 --- a/lib/tasks/deimos.rake +++ b/lib/tasks/deimos.rake @@ -17,13 +17,13 @@ namespace :deimos do end desc 'Starts the Deimos database producer' - task db_producer: :environment do + task outbox: :environment do ENV['DEIMOS_RAKE_TASK'] = 'true' - ENV['DEIMOS_TASK_NAME'] = 'db_producer' + ENV['DEIMOS_TASK_NAME'] = 'outbox' STDOUT.sync = true - Rails.logger.info('Running deimos:db_producer rake task.') + Rails.logger.info('Running deimos:outbox rake task.') thread_count = ENV['THREAD_COUNT'].to_i.zero? ? 1 : ENV['THREAD_COUNT'].to_i - Deimos.start_db_backend!(thread_count: thread_count) + Deimos.start_outbox_backend!(thread_count: thread_count) end task db_poller: :environment do From ee9906f1468b45ab4735f4651ae12d299347cab2 Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 14:35:04 -0400 Subject: [PATCH 12/18] Deprecate remaining config --- lib/deimos/config/configuration.rb | 385 +++++++---------------------- 1 file changed, 86 insertions(+), 299 deletions(-) diff --git a/lib/deimos/config/configuration.rb b/lib/deimos/config/configuration.rb index 173591ec..39e090a6 100644 --- a/lib/deimos/config/configuration.rb +++ b/lib/deimos/config/configuration.rb @@ -58,227 +58,64 @@ def validate_outbox_backend # rubocop:enable Metrics/PerceivedComplexity, Metrics/AbcSize define_settings do - - # @return [Logger] - setting :logger, Logger.new(STDOUT) - - # @return [Symbol] - setting :payload_log, :full - - # @return [Logger] - setting :phobos_logger, default_proc: proc { Deimos.config.logger.clone } + setting :logger, removed: 'Use "logger" in Karafka setup block.' + setting :payload_log, removed: 'Use topic.payload_log in Karafka settings' + setting :phobos_logger, removed: 'Separate logger for Phobos is no longer supported' setting :kafka do - - # @return [Logger] - setting :logger, default_proc: proc { Deimos.config.logger.clone } - - # URL of the seed broker. - # @return [Array] - setting :seed_brokers, ['localhost:9092'] - - # Identifier for this application. - # @return [String] - setting :client_id, 'phobos' - - # The socket timeout for connecting to the broker, in seconds. - # @return [Integer] - setting :connect_timeout, 15 - - # The socket timeout for reading and writing to the broker, in seconds. - # @return [Integer] - setting :socket_timeout, 15 + setting :logger, removed: "Karafka uses Rails logger by default" + setting :seed_brokers, ['localhost:9092'], removed: 'Use kafka(bootstrap.servers) in Karafka settings' + setting :client_id, 'phobos', removed: 'Use client_id in Karafka setup block.' + setting :connect_timeout, 15, removed: 'Use kafka(socket.connection.setup.timeout.ms) in Karafka settings' + setting :socket_timeout, 15, removed: 'Use kafka(socket.timeout.ms) in Karafka settings' setting :ssl do - # Whether SSL is enabled on the brokers. - # @return [Boolean] - setting :enabled - - # a PEM encoded CA cert, a file path to the cert, or an Array of certs, - # to use with an SSL connection. - # @return [String|Array] - setting :ca_cert - - # a PEM encoded client cert to use with an SSL connection, or a file path - # to the cert. - # @return [String] - setting :client_cert - - # a PEM encoded client cert key to use with an SSL connection. - # @return [String] - setting :client_cert_key - - # Verify certificate hostname if supported (ruby >= 2.4.0) - setting :verify_hostname, true - - # Use CA certs from system. This is useful to have enabled for Confluent Cloud - # @return [Boolean] - setting :ca_certs_from_system, false + setting :enabled, removed: 'Use kafka(security.protocol=ssl) in Karafka settings' + setting :ca_cert, removed: 'Use kafka(ssl.ca.pem) in Karafka settings' + setting :client_cert, removed: 'Use kafka(ssl.certificate.pem) in Karafka settings' + setting :client_cert_key, removed: 'Use kafka(ssl.key.pem) in Karafka settings' + setting :verify_hostname, removed: 'Use kafka(ssl.endpoint.identification.algorithm=https) in Karafka settings' + setting :ca_certs_from_system, removed: 'Should not be necessary with librdkafka.' end setting :sasl do - # Whether SASL is enabled on the brokers. - # @return [Boolean] - setting :enabled - - # A KRB5 principal. - # @return [String] - setting :gssapi_principal - - # A KRB5 keytab filepath. - # @return [String] - setting :gssapi_keytab - - # Plain authorization ID. It needs to default to '' in order for it to work. - # This is because Phobos expects it to be truthy for using plain SASL. - # @return [String] - setting :plain_authzid, '' - - # Plain username. - # @return [String] - setting :plain_username - - # Plain password. - # @return [String] - setting :plain_password - - # SCRAM username. - # @return [String] - setting :scram_username - - # SCRAM password. - # @return [String] - setting :scram_password - - # Scram mechanism, either "sha256" or "sha512". - # @return [String] - setting :scram_mechanism - - # Whether to enforce SSL with SASL. - # @return [Boolean] - setting :enforce_ssl - - # OAuthBearer Token Provider instance that implements - # method token. See {Sasl::OAuth#initialize}. - # @return [Object] - setting :oauth_token_provider + setting :enabled, removed: 'Use kafka(security.protocol=sasl_ssl or sasl_plaintext) in Karafka settings' + setting :gssapi_principal, removed: 'Use kafka(sasl.kerberos.principal) in Karafka settings' + setting :gssapi_keytab, removed: 'Use kafka(sasl.kerberos.keytab) in Karafka settings' + setting :plain_authzid, removed: 'No longer needed with rdkafka' + setting :plain_username, removed: 'Use kafka(sasl.username) in Karafka settings' + setting :plain_password, removed: 'Use kafka(sasl.password) in Karafka settings' + setting :scram_username, removed: 'Use kafka(sasl.username) in Karafka settings' + setting :scram_password, removed: 'Use kafka(sasl.password) in Karafka settings' + setting :scram_mechanism, removed: 'Use kafka(sasl.mechanisms) in Karafka settings' + setting :enforce_ssl, removed: 'Use kafka(security.protocol=sasl_ssl) in Karafka settings' + setting :oauth_token_provider, removed: 'See rdkafka configs for details' end end setting :consumers do - - # Number of seconds after which, if a client hasn't contacted the Kafka cluster, - # it will be kicked out of the group. - # @return [Integer] - setting :session_timeout, 300 - - # Interval between offset commits, in seconds. - # @return [Integer] - setting :offset_commit_interval, 10 - - # Number of messages that can be processed before their offsets are committed. - # If zero, offset commits are not triggered by message processing - # @return [Integer] - setting :offset_commit_threshold, 0 - - # Interval between heartbeats; must be less than the session window. - # @return [Integer] - setting :heartbeat_interval, 10 - - # Minimum and maximum number of milliseconds to back off after a consumer - # error. - setting :backoff, (1000..60_000) - - # By default, consumer errors will be consumed and logged to - # the metrics provider. - # Set this to true to force the error to be raised. - # @return [Boolean] - setting :reraise_errors - - # @return [Boolean] - setting :report_lag - - # Block taking an exception, payload and metadata and returning - # true if this should be considered a fatal error and false otherwise. - # Not needed if reraise_errors is set to true. - # @return [Block] - setting(:fatal_error, proc { false }) - - # The default function to generate a bulk ID for bulk consumers - # @return [Block] - setting(:bulk_import_id_generator, proc { SecureRandom.uuid }) - - # If true, multi-table consumers will blow away associations rather than appending to them. - # Applies to all consumers unless specified otherwise - # @return [Boolean] - setting :replace_associations, true + setting :reraise_errors, removed: 'Use topic.reraise_errors in Karafka settings' + setting :report_lag, removed: "Use Karafka's built in lag reporting" + setting(:fatal_error, removed: "Use topic.fatal_error in Karafka settings") + setting(:bulk_import_id_generator, removed: "Use topic.bulk_import_id_generator in Karafka settings") + setting :save_associations_first, removed: "Use topic.save_associations_first" + setting :replace_associations, removed: "Use topic.replace_associations in Karafka settings" end setting :producers do - # Number of seconds a broker can wait for replicas to acknowledge - # a write before responding with a timeout. - # @return [Integer] - setting :ack_timeout, 5 - - # Number of replicas that must acknowledge a write, or `:all` - # if all in-sync replicas must acknowledge. - # @return [Integer|Symbol] - setting :required_acks, 1 - - # Number of retries that should be attempted before giving up sending - # messages to the cluster. Does not include the original attempt. - # @return [Integer] - setting :max_retries, 2 - - # Number of seconds to wait between retries. - # @return [Integer] - setting :retry_backoff, 1 - - # Number of messages allowed in the buffer before new writes will - # raise {BufferOverflow} exceptions. - # @return [Integer] - setting :max_buffer_size, 10_000 - - # Maximum size of the buffer in bytes. Attempting to produce messages - # when the buffer reaches this size will result in {BufferOverflow} being raised. - # @return [Integer] - setting :max_buffer_bytesize, 10_000_000 - - # Name of the compression codec to use, or nil if no compression should be performed. - # Valid codecs: `:snappy` and `:gzip` - # @return [Symbol] - setting :compression_codec - - # Number of messages that needs to be in a message set before it should be compressed. - # Note that message sets are per-partition rather than per-topic or per-producer. - # @return [Integer] - setting :compression_threshold, 1 - - # Maximum number of messages allowed in the queue. Only used for async_producer. - # @return [Integer] - setting :max_queue_size, 10_000 - - # If greater than zero, the number of buffered messages that will automatically - # trigger a delivery. Only used for async_producer. - # @return [Integer] - setting :delivery_threshold, 0 - - # if greater than zero, the number of seconds between automatic message - # deliveries. Only used for async_producer. - # @return [Integer] - setting :delivery_interval, 0 - - # Set this to true to keep the producer connection between publish calls. - # This can speed up subsequent messages by around 30%, but it does mean - # that you need to manually call sync_producer_shutdown before exiting, - # similar to async_producer_shutdown. - # @return [Boolean] - setting :persistent_connections, false - - # Default namespace for all producers. Can remain nil. Individual - # producers can override. - # @return [String] - setting :schema_namespace + setting :ack_timeout, removed: "Not supported in rdkafka" + setting :required_acks, 1, removed: "Use kafka(request.required.acks) in Karafka settings" + setting :max_retries, removed: "Use kafka(message.send.max.retries) in Karafka settings" + setting :retry_backoff, removed: "Use kafka(retry.backoff.ms) in Karafka settings" + setting :max_buffer_size, removed: "Not relevant with Karafka. You may want to see the queue.buffering.max.messages setting." + setting :max_buffer_bytesize, removed: "Not relevant with Karafka." + setting :compression_codec, removed: "Use kafka(compression.codec) in Karafka settings" + setting :compression_threshold, removed: "Not supported in Karafka." + setting :max_queue_size, removed: "Not relevant to Karafka." + setting :delivery_threshold, removed: "Not relevant to Karafka." + setting :delivery_interval, removed: "Not relevant to Karafka." + setting :persistent_connections, removed: "Karafka connections are always persistent." + setting :schema_namespace, removed: "Use topic.namespace in Karafka settings" # Add a prefix to all topic names. This can be useful if you're using # the same Kafka broker for different environments that are producing @@ -296,10 +133,6 @@ def validate_outbox_backend # sync in your consumers or delayed workers. # @return [Symbol] setting :backend, :kafka_async - - # Maximum publishing batch size. Individual producers can override. - # @return [Integer] - setting :max_batch_size, 500 end setting :schema do @@ -327,9 +160,9 @@ def validate_outbox_backend # @return [String] setting :generated_class_path, 'app/lib/schema_classes' - # Set to true to use the generated schema classes in your application + # Set to true to use the generated schema classes in your application. # @return [Boolean] - setting :use_schema_classes, false + setting :use_schema_classes # Set to false to generate child schemas as their own files. # @return [Boolean] @@ -354,10 +187,10 @@ def validate_outbox_backend # @return [Tracing::Provider] setting :tracer, default_proc: proc { Tracing::Mock.new } - setting :db_producer do + setting :outbox do # @return [Logger] - setting :logger, default_proc: proc { Deimos.config.logger } + setting :logger, default_proc: proc { Karafka.logger } # @return [Symbol|Array] A list of topics to log all messages, or # :all to log all topics. @@ -369,94 +202,48 @@ def validate_outbox_backend end + setting :db_producer do + setting :logger, removed: "Use outbox.logger" + setting :log_topics, removed: "Use outbox.log_topics" + setting :compact_topics, removed: "Use outbox.compact_topics" + end + setting_object :producer do - # Producer class. - # @return [String] - setting :class_name - # Topic to produce to. - # @return [String] - setting :topic - # Schema of the data in the topic. - # @return [String] - setting :schema - # Optional namespace to access the schema. - # @return [String] - setting :namespace - # Key configuration (see docs). - # @return [Hash] - setting :key_config - # Configure the usage of generated schema classes for this producer - # @return [Boolean] - setting :use_schema_classes - # If true, and using the multi-table feature of ActiveRecordConsumers, replace associations - # instead of appending to them. - # @return [Boolean] - setting :replace_associations - # Maximum publishing batch size for this producer. - # @return [Integer] - setting :max_batch_size + setting :class_name, removed: "Use topic.producer_class in Karafka settings." + setting :topic, removed: "Use Karafka settings." + setting :schema, removed: "Use topic.schema(schema:) in Karafka settings." + setting :namespace, removed: "Use topic.schema(namespace:) in Karafka settings." + setting :key_config, removed: "Use topic.schema(key_config:) in Karafka settings." + setting :use_schema_classes, removed: "Use topic.schema(use_schema_classes:) in Karafka settings." end setting_object :consumer do - # Consumer class. - # @return [String] - setting :class_name - # Topic to read from. - # @return [String] - setting :topic - # Schema of the data in the topic. - # @return [String] - setting :schema - # Optional namespace to access the schema. - # @return [String] - setting :namespace - # Key configuration (see docs). - # @return [Hash] - setting :key_config - # Set to true to ignore the consumer in the Phobos config and not actually start up a - # listener. - # @return [Boolean] - setting :disabled, false - # Configure the usage of generated schema classes for this consumer - # @return [Boolean] - setting :use_schema_classes - # Optional maximum limit for batching database calls to reduce the load on the db. - # @return [Integer] - setting :max_db_batch_size - # Column to use for bulk imports, for multi-table feature. - # @return [String] - setting :bulk_import_id_column, :bulk_import_id - # If true, multi-table consumers will blow away associations rather than appending to them. - # @return [Boolean] - setting :replace_associations, nil - - # The default function to generate a bulk ID for this consumer - # Uses the consumers proc defined in the consumers config by default unless - # specified for individual consumers - # @return [Block] - setting :bulk_import_id_generator, nil - - # If enabled save associated records prior to saving the main record class - # This will also set foreign keys for associated records - # @return [Boolean] - setting :save_associations_first, false - - # These are the phobos "listener" configs. See CONFIGURATION.md for more - # info. - setting :group_id - setting :max_concurrency, 1 - setting :start_from_beginning, true - setting :max_bytes_per_partition, 500.kilobytes - setting :min_bytes, 1 - setting :max_wait_time, 5 - setting :force_encoding - setting :delivery, :batch - setting :backoff - setting :session_timeout, 300 - setting :offset_commit_interval, 10 - setting :offset_commit_threshold, 0 - setting :offset_retention_time - setting :heartbeat_interval, 10 + setting :class_name, removed: "Use topic.consumer in Karafka settings." + setting :topic, removed: "Use Karafka settings." + setting :schema, removed: "Use topic.schema(schema:) in Karafka settings." + setting :namespace, removed: "Use topic.schema(namespace:) in Karafka settings." + setting :key_config, removed: "Use topic.schema(key_config:) in Karafka settings." + setting :disabled, removed: "Use topic.active in Karafka settings." + setting :use_schema_classes, removed: "Use topic.use_schema_classes in Karafka settings." + setting :max_db_batch_size, removed: "Use topic.max_db_batch_size in Karafka settings." + setting :bulk_import_id_column, removed: "Use topic.bulk_import_id_column in Karafka settings." + setting :replace_associations, removed: "Use topic.replace_associations in Karafka settings." + setting :bulk_import_id_generator, removed: "Use topic.bulk_import_id_generator in Karafka settings." + setting :save_associations_first, removed: "Use topic.save_associations_first" + setting :group_id, removed: "Use kafka(group.id) in Karafka settings." + setting :max_concurrency, removed: "Use Karafka's 'config.concurrency' in the setup block." + setting :start_from_beginning, removed: "Use initial_offset in the setup block, or kafka(auto.offset.reset) in topic settings." + setting :max_bytes_per_partition, removed: "Use max_messages in the setup block." + setting :min_bytes, removed: "Not supported in Karafka." + setting :max_wait_time, removed: "Use max_wait_time in the setup block." + setting :force_encoding, removed: "Not supported with Karafka." + setting :delivery, :batch, removed: "Use batch: true/false in Karafka topic configs." + setting :backoff, removed: "Use kafka(retry.backoff.ms) and retry.backoff.max.ms in Karafka settings." + setting :session_timeout, removed: "Use kafka(session.timeout.ms) in Karafka settings." + setting :offset_commit_interval, removed: "Use kafka(auto.commit.interval.ms) in Karafka settings." + setting :offset_commit_threshold, removed: "Not supported with Karafka." + setting :offset_retention_time, removed: "Not supported with Karafka." + setting :heartbeat_interval, removed: "Use kafka(heartbeat.interval.ms) in Karafka settings." end setting_object :db_poller do From 1d6ebdbb957e9c2229a8be4a1df68929cfea66ce Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 14:37:12 -0400 Subject: [PATCH 13/18] Revamped test helpers and basic specs --- lib/deimos/test_helpers.rb | 415 +++++++--------------------- lib/deimos/utils/db_poller/base.rb | 16 +- spec/deimos_spec.rb | 136 +-------- spec/karafka/karafka.rb | 69 +++++ spec/karafka_config/karafka_spec.rb | 97 +++++++ spec/phobos.bad_db.yml | 73 ----- spec/phobos.yml | 77 ------ spec/rake_spec.rb | 4 +- spec/spec_helper.rb | 88 ++++-- 9 files changed, 341 insertions(+), 634 deletions(-) create mode 100644 spec/karafka/karafka.rb create mode 100644 spec/karafka_config/karafka_spec.rb delete mode 100644 spec/phobos.bad_db.yml delete mode 100644 spec/phobos.yml diff --git a/lib/deimos/test_helpers.rb b/lib/deimos/test_helpers.rb index 4e41a8c4..6df15316 100644 --- a/lib/deimos/test_helpers.rb +++ b/lib/deimos/test_helpers.rb @@ -4,6 +4,7 @@ require 'active_support/core_ext' require 'deimos/tracing/mock' require 'deimos/metrics/mock' +require 'karafka/testing/rspec/helpers' module Deimos # Include this module in your RSpec spec_helper @@ -11,122 +12,79 @@ module Deimos # and add methods to use to test encoding/decoding. module TestHelpers extend ActiveSupport::Concern + def self.included(base) + super + base.include Karafka::Testing::RSpec::Helpers + end + + # @return [Array] + def sent_messages + self.class.sent_messages + end class << self - # for backwards compatibility # @return [Array] def sent_messages - Deimos::Backends::Test.sent_messages + Karafka.producer.client.messages.map do |m| + produced_message = m.except(:label).deep_dup + Deimos.decode_message(produced_message) + produced_message[:payload] = Deimos::TestHelpers.normalize_message(produced_message[:payload]) + produced_message[:key] = Deimos::TestHelpers.normalize_message(produced_message[:key]) + produced_message + end end # Set the config to the right settings for a unit test # @return [void] def unit_test! - Deimos.configure do |deimos_config| - deimos_config.logger = Logger.new(STDOUT) - deimos_config.consumers.reraise_errors = true - deimos_config.kafka.seed_brokers ||= ['test_broker'] - deimos_config.schema.backend = Deimos.schema_backend_class.mock_backend - deimos_config.producers.backend = :test - deimos_config.tracer = Deimos::Tracing::Mock.new - end - end - - # Kafka test config with avro schema registry - # @return [void] - def full_integration_test! - Deimos.configure do |deimos_config| - deimos_config.producers.backend = :kafka - deimos_config.schema.backend = :avro_schema_registry - end - end - - # Set the config to the right settings for a kafka test - # @return [void] - def kafka_test! - Deimos.configure do |deimos_config| - deimos_config.producers.backend = :kafka - deimos_config.schema.backend = :avro_validation - end + Deimos.config.schema.backend = :avro_validation + warn "unit_test! is deprecated and can be replaced by setting Deimos's schema backend to `:avro_validation`. All other test behavior is provided by Karafka." end end - included do - - RSpec.configure do |config| - config.prepend_before(:each) do - client = double('client').as_null_object - allow(client).to receive(:time) do |*_args, &block| - block.call - end - Deimos::Backends::Test.sent_messages.clear - end - end - - end - - # @deprecated - # @!visibility private - def stub_producers_and_consumers! - warn('stub_producers_and_consumers! is no longer necessary and this method will be removed in 3.0') - end - - # @deprecated - # @!visibility private - def stub_producer(_klass) - warn('Stubbing producers is no longer necessary and this method will be removed in 3.0') - end - - # @deprecated - # @!visibility private - def stub_consumer(_klass) - warn('Stubbing consumers is no longer necessary and this method will be removed in 3.0') - end - - # @deprecated - # @!visibility private - def stub_batch_consumer(_klass) - warn('Stubbing batch consumers is no longer necessary and this method will be removed in 3.0') - end - # get the difference of 2 hashes. - # @param hash1 [Hash] - # @param hash2 [Hash] + # @param hash1 [Hash, nil] + # @param hash2 [Hash, nil] # @!visibility private def _hash_diff(hash1, hash2) - if hash1.nil? || !hash1.is_a?(Hash) - hash2 - elsif hash2.nil? || !hash2.is_a?(Hash) - hash1 + h1 = Deimos::TestHelpers.normalize_message(hash1) + h2 = Deimos::TestHelpers.normalize_message(hash2) + if h1.nil? || !h1.is_a?(Hash) + h2 + elsif h2.nil? || !h2.is_a?(Hash) + h1 else - hash1.dup. - delete_if { |k, v| hash2[k] == v }. - merge!(hash2.dup.delete_if { |k, _v| hash1.key?(k) }) + h1.dup. + delete_if { |k, v| h2[k] == v }. + merge!(h2.dup.delete_if { |k, _v| h1.key?(k) }) + end + end + + def self.normalize_message(m) + return nil if m.nil? + + if m.respond_to?(:to_h) + m = m.to_h end + if m.respond_to?(:with_indifferent_access) + m = m.with_indifferent_access + end + m end # @!visibility private def _frk_failure_message(topic, message, key=nil, partition_key=nil, was_negated=false) - messages = Deimos::Backends::Test.sent_messages. - select { |m| m[:topic] == topic }. - map { |m| m.except(:topic) } + messages = Deimos::TestHelpers.sent_messages.select { |m| m[:topic] == topic } message_string = '' diff = nil min_hash_diff = nil + message = Deimos::TestHelpers.normalize_message(message) if messages.any? - message_string = messages.map(&:inspect).join("\n") - min_hash_diff = messages.min_by { |m| _hash_diff(m, message).keys.size } - diff = RSpec::Expectations.differ. - diff_as_object(message, min_hash_diff[:payload]) + message_string = messages.map { |m| m[:payload].inspect}.join("\n") + min_hash_diff = messages.min_by { |m| _hash_diff(m, message)&.keys&.size } + diff = RSpec::Expectations.differ.diff_as_object(message, min_hash_diff[:payload]) end - description = if message.respond_to?(:description) - message.description - elsif message.nil? - 'nil' - else - message - end - str = "Expected #{topic} #{'not ' if was_negated}to have sent #{description}" + str = "Expected #{topic} #{'not ' if was_negated}to have sent #{message.try(:to_h) || message}" str += " with key #{key}" if key str += " with partition key #{partition_key}" if partition_key str += "\nClosest message received: #{min_hash_diff}" if min_hash_diff @@ -135,23 +93,18 @@ def _frk_failure_message(topic, message, key=nil, partition_key=nil, was_negated end RSpec::Matchers.define :have_sent do |msg, key=nil, partition_key=nil, headers=nil| - message = if msg.respond_to?(:with_indifferent_access) - msg.with_indifferent_access - else - msg - end + message = Deimos::TestHelpers.normalize_message(msg) match do |topic| - Deimos::Backends::Test.sent_messages.any? do |m| - hash_matcher = RSpec::Matchers::BuiltIn::Match.new(message) - hash_matcher.send(:match, - message&.respond_to?(:to_h) ? message.to_h : message, - m[:payload]&.with_indifferent_access) && + message_key = Deimos::TestHelpers.normalize_message(key) + hash_matcher = RSpec::Matchers::BuiltIn::Match.new(message) + Deimos::TestHelpers.sent_messages.any? do |m| + hash_matcher.send(:match, message, m[:payload]) && topic == m[:topic] && - (key.present? ? key == m[:key] : true) && + (key.present? ? message_key == m[:key] : true) && (partition_key.present? ? partition_key == m[:partition_key] : true) && if headers.present? hash_matcher.send(:match, - headers&.with_indifferent_access, + headers.with_indifferent_access, m[:headers]&.with_indifferent_access) else true @@ -159,20 +112,11 @@ def _frk_failure_message(topic, message, key=nil, partition_key=nil, was_negated end end - if respond_to?(:failure_message) - failure_message do |topic| - _frk_failure_message(topic, message, key, partition_key) - end - failure_message_when_negated do |topic| - _frk_failure_message(topic, message, key, partition_key, true) - end - else - failure_message_for_should do |topic| - _frk_failure_message(topic, message, key, partition_key) - end - failure_message_for_should_not do |topic| - _frk_failure_message(topic, message, key, partition_key, true) - end + failure_message do |topic| + _frk_failure_message(topic, message, key, partition_key) + end + failure_message_when_negated do |topic| + _frk_failure_message(topic, message, key, partition_key, true) end end @@ -180,7 +124,8 @@ def _frk_failure_message(topic, message, key=nil, partition_key=nil, was_negated # particular messages were sent or not sent after a point in time. # @return [void] def clear_kafka_messages! - Deimos::Backends::Test.sent_messages.clear + puts "[Deprecated] clear_kafka_messages! can be replaced with `karafka.produced_messages.clear`" + karafka.produced_messages.clear end # Test that a given handler will consume a given payload correctly, i.e. @@ -190,65 +135,19 @@ def clear_kafka_messages! # @param handler_class_or_topic [Class, String] Class which inherits from # Deimos::Consumer or the topic as a string # @param payload [Hash] the payload to consume - # @param call_original [Boolean] if true, allow the consume handler - # to continue as normal. Not compatible with a block. - # @param skip_expectation [Boolean] Set to true to not place any - # expectations on the consumer. Primarily used internally to Deimos. # @param key [Object] the key to use. + # @param call_original [Symbol] legacy parameter. # @param partition_key [Object] the partition key to use. # @return [void] def test_consume_message(handler_class_or_topic, payload, - call_original: false, key: nil, - partition_key: nil, - skip_expectation: false, - &block) - raise 'Cannot have both call_original and be given a block!' if call_original && block_given? - - payload.stringify_keys! if payload.respond_to?(:stringify_keys!) - handler_class = if handler_class_or_topic.is_a?(String) - _get_handler_class_from_topic(handler_class_or_topic) - else - handler_class_or_topic - end - handler = handler_class.new - allow(handler_class).to receive(:new).and_return(handler) - listener = double('listener', - handler_class: handler_class, - encoding: nil) - key ||= _key_from_consumer(handler_class) - message = double('message', - 'key' => key, - 'partition_key' => partition_key, - 'partition' => 1, - 'offset' => 1, - 'headers' => {}, - 'value' => payload) - - unless skip_expectation - _handler_expectation(:consume, - payload, - handler, - call_original, - &block) + call_original: Karafka::Routing::Default.new(nil), + partition_key: nil) + unless call_original.is_a?(Karafka::Routing::Default) + puts "test_consume_message(call_original: true) is deprecated and will be removed in the future. You can remove the call_original parameter." end - Phobos::Actions::ProcessMessage.new( - listener: listener, - message: message, - listener_metadata: { topic: 'my-topic' } - ).send(:process_message, payload) - end - - # Check to see that a given message will fail due to validation errors. - # @param handler_class [Class] - # @param payload [Hash] - # @return [void] - def test_consume_invalid_message(handler_class, payload) - expect { - handler_class.decoder.validate(payload, - schema: handler_class.decoder.schema) - }.to raise_error(Avro::SchemaValidator::ValidationError) + test_consume_batch(handler_class_or_topic, [payload], keys: [key], partition_keys: [partition_key], single: true) end # Test that a given handler will consume a given batch payload correctly, @@ -258,165 +157,41 @@ def test_consume_invalid_message(handler_class, payload) # @param handler_class_or_topic [Class, String] Class which inherits from # Deimos::Consumer or the topic as a string # @param payloads [Array] the payload to consume - # @param keys [Array] - # @param partition_keys [Array] - # @param call_original [Boolean] - # @param skip_expectation [Boolean] + # @param call_original [Symbol] legacy parameter. + # @param keys [Array] + # @param partition_keys [Array] + # @param single [Boolean] used internally. # @return [void] def test_consume_batch(handler_class_or_topic, payloads, keys: [], - partition_keys: [], - call_original: false, - skip_expectation: false, - &block) - if call_original && block_given? - raise 'Cannot have both call_original and be given a block!' - end - - topic_name = 'my-topic' - handler_class = if handler_class_or_topic.is_a?(String) - _get_handler_class_from_topic(handler_class_or_topic) - else - handler_class_or_topic - end - handler = handler_class.new - allow(handler_class).to receive(:new).and_return(handler) - listener = double('listener', - handler_class: handler_class, - encoding: nil) - batch_messages = payloads.zip(keys, partition_keys).map do |payload, key, partition_key| - key ||= _key_from_consumer(handler_class) - - double('message', - 'key' => key, - 'partition_key' => partition_key, - 'partition' => 1, - 'offset' => 1, - 'headers' => {}, - 'value' => payload) - end - batch = double('fetched_batch', - 'messages' => batch_messages, - 'topic' => topic_name, - 'partition' => 1, - 'offset_lag' => 0) - unless skip_expectation - _handler_expectation(:consume_batch, - payloads, - handler, - call_original, - &block) - end - action = Phobos::Actions::ProcessBatchInline.new( - listener: listener, - batch: batch, - metadata: { topic: topic_name } - ) - allow(action).to receive(:backoff_interval).and_return(0) - allow(action).to receive(:handle_error) { |e| raise e } - action.send(:execute) - end - - # Check to see that a given message will fail due to validation errors. - # @param handler_class [Class] - # @param payloads [Array] - # @return [void] - def test_consume_batch_invalid_message(handler_class, payloads) - topic_name = 'my-topic' - handler = handler_class.new - allow(handler_class).to receive(:new).and_return(handler) - listener = double('listener', - handler_class: handler_class, - encoding: nil) - batch_messages = payloads.map do |payload| - key ||= _key_from_consumer(handler_class) - - double('message', - 'key' => key, - 'partition' => 1, - 'offset' => 1, - 'value' => payload) - end - batch = double('fetched_batch', - 'messages' => batch_messages, - 'topic' => topic_name, - 'partition' => 1, - 'offset_lag' => 0) - - action = Phobos::Actions::ProcessBatchInline.new( - listener: listener, - batch: batch, - metadata: { topic: topic_name } - ) - allow(action).to receive(:backoff_interval).and_return(0) - allow(action).to receive(:handle_error) { |e| raise e } - - expect { action.send(:execute) }. - to raise_error - end - - private - - def _key_from_consumer(consumer) - if consumer.config[:key_field] - { consumer.config[:key_field] => 1 } - elsif consumer.config[:key_schema] - backend = consumer.decoder - old_schema = backend.schema - backend.schema = consumer.config[:key_schema] - key = backend.schema_fields.map { |field| [field.name, 1] }.to_h - backend.schema = old_schema - key - elsif consumer.config[:no_keys] - nil + call_original: Karafka::Routing::Default.new(nil), + single: false, + partition_keys: []) + unless call_original.is_a?(Karafka::Routing::Default) + puts "test_consume_batch(call_original: true) is deprecated and will be removed in the future. You can remove the call_original parameter." + end + consumer = nil + topic_name = nil + if handler_class_or_topic.is_a?(String) + topic_name = handler_class_or_topic + consumer = karafka.consumer_for(topic_name) else - 1 + topic_name = Deimos.topic_for_consumer(handler_class_or_topic) + consumer = karafka.consumer_for(topic_name) end - end - - # @param topic [String] - # @return [Class] - def _get_handler_class_from_topic(topic) - listeners = Phobos.config['listeners'] - handler = listeners.find { |l| l.topic == topic } - raise "No consumer found in Phobos configuration for topic #{topic}!" if handler.nil? - - handler.handler.constantize - end - - # Test that a given handler will execute a `method` on an `input` correctly, - # If a block is given, that block will be executed when `method` is called. - # Otherwise it will just confirm that `method` is called at all. - # @param method [Symbol] - # @param input [Object] - # @param handler [Deimos::Consumer] - # @param call_original [Boolean] - def _handler_expectation(method, - input, - handler, - call_original, - &block) - schema_class = handler.class.config[:schema] - namespace = handler.class.config[:namespace] - expected = input.dup - config = handler.class.config - use_schema_classes = config[:use_schema_classes] - use_schema_classes = use_schema_classes.present? ? use_schema_classes : Deimos.config.schema.use_schema_classes + Deimos.karafka_config_for(topic: topic_name).each_message(single) - if use_schema_classes && schema_class.present? - expected = if input.is_a?(Array) - input.map do |payload| - Utils::SchemaClass.instance(payload, schema_class, namespace) - end - else - Utils::SchemaClass.instance(input, schema_class, namespace) - end + payloads.each_with_index do |payload, i| + karafka.produce(payload, {key: keys[i], partition_key: partition_keys[i], topic: consumer.topic.name}) + end + if block_given? + allow_any_instance_of(consumer_class).to receive(:consume_batch) do + yield + end + end + consumer.consume end - - expectation = expect(handler).to receive(method).with(expected, anything, &block) - expectation.and_call_original if call_original - end end end diff --git a/lib/deimos/utils/db_poller/base.rb b/lib/deimos/utils/db_poller/base.rb index aaa7e446..aafed419 100644 --- a/lib/deimos/utils/db_poller/base.rb +++ b/lib/deimos/utils/db_poller/base.rb @@ -11,6 +11,7 @@ module DbPoller # Base poller class for retrieving and publishing messages. class Base + FATAL_CODES = %i(invalid_msg_size msg_size_too_large) # @return [Integer] BATCH_SIZE = 1000 @@ -137,13 +138,14 @@ def process_batch_with_span(batch, status) process_batch(batch) Deimos.config.tracer&.finish(span) status.batches_processed += 1 - rescue Kafka::BufferOverflow, Kafka::MessageSizeTooLarge, - Kafka::RecordListTooLarge => e - retry unless handle_message_too_large(e, batch, status, span) - rescue Kafka::Error => e # keep trying till it fixes itself - Deimos.config.logger.error("Error publishing through DB Poller: #{e.message}") - sleep(0.5) - retry + rescue WaterDrop::Errors::ProduceManyError => e + if FATAL_CODES.include?(e.cause.try(:code)) + retry unless handle_message_too_large(e, batch, status, span) + else + Deimos::Logging.log_error("Error publishing through DB Poller: #{e.message}") + sleep(0.5) + retry + end rescue StandardError => e Deimos::Logging.log_error("Error publishing through DB poller: #{e.message}}") if @config.retries.nil? || retries < @config.retries diff --git a/spec/deimos_spec.rb b/spec/deimos_spec.rb index 0058991c..5712ac00 100644 --- a/spec/deimos_spec.rb +++ b/spec/deimos_spec.rb @@ -2,72 +2,12 @@ describe Deimos do - let(:phobos_configuration) do - { 'logger' => - { 'file' => 'log/phobos.log', - 'stdout_json' => false, - 'level' => 'debug', - 'ruby_kafka' => - { 'level' => 'debug' } }, - 'kafka' => - { 'client_id' => 'phobos', - 'connect_timeout' => 15, - 'socket_timeout' => 15, - 'seed_brokers' => 'my_seed_broker.com', - 'ssl_ca_cert' => 'my_ssl_ca_cert', - 'ssl_client_cert' => 'my_ssl_client_cert', - 'ssl_client_cert_key' => 'my_ssl_client_cert_key' }, - 'producer' => - { 'ack_timeout' => 5, - 'required_acks' => :all, - 'max_retries' => 2, - 'retry_backoff' => 1, - 'max_buffer_size' => 10_000, - 'max_buffer_bytesize' => 10_000_000, - 'compression_codec' => nil, - 'compression_threshold' => 1, - 'max_queue_size' => 10_000, - 'delivery_threshold' => 0, - 'delivery_interval' => 0 }, - 'consumer' => - { 'session_timeout' => 300, - 'offset_commit_interval' => 10, - 'offset_commit_threshold' => 0, - 'heartbeat_interval' => 10 }, - 'backoff' => - { 'min_ms' => 1000, - 'max_ms' => 60_000 }, - 'listeners' => [ - { 'handler' => 'ConsumerTest::MyConsumer', - 'topic' => 'my_consume_topic', - 'group_id' => 'my_group_id', - 'max_bytes_per_partition' => 524_288 }, - { 'handler' => 'ConsumerTest::MyBatchConsumer', - 'topic' => 'my_batch_consume_topic', - 'group_id' => 'my_batch_group_id', - 'delivery' => 'inline_batch' } - ], - 'custom_logger' => nil, - 'custom_kafka_logger' => nil } - end - - let(:config_path) { File.join(File.dirname(__FILE__), 'phobos.yml') } - it 'should have a version number' do expect(Deimos::VERSION).not_to be_nil end - it 'should error if required_acks is not all' do - expect { - described_class.configure do |config| - config.producers.backend = :db - config.phobos_config_file = File.join(File.dirname(__FILE__), 'phobos.bad_db.yml') - end - }.to raise_error('Cannot set producers.backend to :db unless producers.required_acks is set to ":all"!') - end - - describe '#start_db_backend!' do - it 'should start if backend is db and thread_count is > 0' do + describe '#start_outbox_backend!' do + it 'should start if backend is outbox and thread_count is > 0' do signal_handler = instance_double(Sigurd::SignalHandler) allow(signal_handler).to receive(:run!) expect(Sigurd::Executor).to receive(:new). @@ -77,9 +17,9 @@ signal_handler end described_class.configure do |config| - config.producers.backend = :db + config.producers.backend = :outbox end - described_class.start_db_backend!(thread_count: 2) + described_class.start_outbox_backend!(thread_count: 2) end it 'should not start if backend is not db' do @@ -87,83 +27,27 @@ described_class.configure do |config| config.producers.backend = :kafka end - expect { described_class.start_db_backend!(thread_count: 2) }. - to raise_error('Publish backend is not set to :db, exiting') + expect { described_class.start_outbox_backend!(thread_count: 2) }. + to raise_error('Publish backend is not set to :outbox, exiting') end it 'should not start if thread_count is nil' do expect(Sigurd::SignalHandler).not_to receive(:new) described_class.configure do |config| - config.producers.backend = :db + config.producers.backend = :outbox end - expect { described_class.start_db_backend!(thread_count: nil) }. + expect { described_class.start_outbox_backend!(thread_count: nil) }. to raise_error('Thread count is not given or set to zero, exiting') end it 'should not start if thread_count is 0' do expect(Sigurd::SignalHandler).not_to receive(:new) described_class.configure do |config| - config.producers.backend = :db + config.producers.backend = :outbox end - expect { described_class.start_db_backend!(thread_count: 0) }. + expect { described_class.start_outbox_backend!(thread_count: 0) }. to raise_error('Thread count is not given or set to zero, exiting') end end - describe 'delivery configuration' do - before(:each) do - allow(YAML).to receive(:load).and_return(phobos_configuration) - end - - it 'should not raise an error with properly configured handlers' do - expect { - described_class.configure do - consumer do - class_name 'ConsumerTest::MyConsumer' - delivery :message - end - consumer do - class_name 'ConsumerTest::MyConsumer' - delivery :batch - end - consumer do - class_name 'ConsumerTest::MyBatchConsumer' - delivery :inline_batch - end - end - }.not_to raise_error - end - - it 'should raise an error if inline_batch listeners do not implement consume_batch' do - expect { - described_class.configure do - consumer do - class_name 'ConsumerTest::MyConsumer' - delivery :inline_batch - end - end - }.to raise_error('BatchConsumer ConsumerTest::MyConsumer does not implement `consume_batch`') - end - - it 'should raise an error if Consumers do not have message or batch delivery' do - expect { - described_class.configure do - consumer do - class_name 'ConsumerTest::MyBatchConsumer' - delivery :message - end - end - }.to raise_error('Non-batch Consumer ConsumerTest::MyBatchConsumer does not implement `consume`') - end - - it 'should treat nil as `batch`' do - expect { - described_class.configure do - consumer do - class_name 'ConsumerTest::MyConsumer' - end - end - }.not_to raise_error - end - end end diff --git a/spec/karafka/karafka.rb b/spec/karafka/karafka.rb new file mode 100644 index 00000000..ac54f8c2 --- /dev/null +++ b/spec/karafka/karafka.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true +class KarafkaApp < Karafka::App + setup do |config| + config.kafka = { 'bootstrap.servers': '127.0.0.1:9092' } + config.client_id = 'example_app' + # Recreate consumers with each batch. This will allow Rails code reload to work in the + # development mode. Otherwise Karafka process would not be aware of code changes + config.consumer_persistence = !Rails.env.development? + end + + # Comment out this part if you are not using instrumentation and/or you are not + # interested in logging events for certain environments. Since instrumentation + # notifications add extra boilerplate, if you want to achieve max performance, + # listen to only what you really need for given environment. + Karafka.monitor.subscribe(Karafka::Instrumentation::LoggerListener.new) + # Karafka.monitor.subscribe(Karafka::Instrumentation::ProctitleListener.new) + + # This logger prints the producer development info using the Karafka logger. + # It is similar to the consumer logger listener but producer oriented. + Karafka.producer.monitor.subscribe( + WaterDrop::Instrumentation::LoggerListener.new( + # Log producer operations using the Karafka logger + Karafka.logger, + # If you set this to true, logs will contain each message details + # Please note, that this can be extensive + log_messages: false + ) + ) + + # You can subscribe to all consumer related errors and record/track then that way + # + # Karafka.monitor.subscribe 'error.occurred' do |event| + # type = event[:type] + # error = event[:error] + # details = (error.backtrace || []).join("\n") + # ErrorTracker.send_error(error, type, details) + # end + + # You can subscribe to all producer related errors and record/track then that way + # Please note, that producer and consumer have their own notifications pipeline so you need to + # setup error tracking independently for each of them + # + # Karafka.producer.monitor.subscribe('error.occurred') do |event| + # type = event[:type] + # error = event[:error] + # details = (error.backtrace || []).join("\n") + # ErrorTracker.send_error(error, type, details) + # end + + routes.draw do + # Uncomment this if you use Karafka with ActiveJob + # You need to define the topic per each queue name you use + # active_job_topic :default + # topic :example do + # Uncomment this if you want Karafka to manage your topics configuration + # Managing topics configuration via routing will allow you to ensure config consistency + # across multiple environments + # + # config(partitions: 2, 'cleanup.policy': 'compact') + # consumer ExampleConsumer + # end + end +end + +# Karafka now features a Web UI! +# Visit the setup documentation to get started and enhance your experience. +# +# https://karafka.io/docs/Web-UI-Getting-Started +Deimos.setup_karafka diff --git a/spec/karafka_config/karafka_spec.rb b/spec/karafka_config/karafka_spec.rb new file mode 100644 index 00000000..aebe8b80 --- /dev/null +++ b/spec/karafka_config/karafka_spec.rb @@ -0,0 +1,97 @@ +RSpec.describe 'Karafka configs' do + before(:each) do + KarafkaApp.routes.clear + $found_stuff = nil + end + + let(:consumer_class) do + Class.new(Deimos::Consumer) do + def consume_message(message) + $found_stuff = message.payload + end + end + end + + let(:producer_class) do + Class.new(Deimos::Producer) do + end + end + + describe 'producers' do + before(:each) do + stub_const('MyProducer', producer_class) + end + + it 'should work with key none' do + KarafkaApp.routes.draw do + topic 'MyTopic' do + producer_class MyProducer + schema 'MySchema' + namespace 'com.my-namespace' + key_config(none: true) + end + end + producer_class.publish({test_id: "id1", some_int: 5}) + expect('MyTopic').to have_sent({test_id: "id1", some_int: 5}) + end + + it 'should work with key plain' do + KarafkaApp.routes.draw do + topic 'MyTopic' do + producer_class MyProducer + schema 'MySchema' + namespace 'com.my-namespace' + key_config({plain: true}) + end + end + producer_class.publish({test_id: "id1", some_int: 5, payload_key: 'key'}) + expect('MyTopic').to have_sent({test_id: "id1", some_int: 5}, 'key') + end + + it 'should work with key field' do + KarafkaApp.routes.draw do + topic 'MyTopic' do + producer_class MyProducer + schema 'MySchema' + namespace 'com.my-namespace' + key_config({field: :test_id}) + end + end + producer_class.publish({test_id: "id1", some_int: 5}) + expect('MyTopic').to have_sent({test_id: "id1", some_int: 5}, 'id1') + end + + it 'should work with key schema' do + KarafkaApp.routes.draw do + topic 'MyTopic' do + producer_class MyProducer + schema 'MySchema' + namespace 'com.my-namespace' + key_config({schema: 'MySchema_key'}) + end + end + producer_class.publish({test_id: "id1", some_int: 5, payload_key: {test_id: 'id3'}}) + expect('MyTopic').to have_sent({test_id: "id1", some_int: 5}, { test_id: 'id3'}) + end + + end + + it 'should be able to pick up a consumer' do + stub_const('MyConsumer', consumer_class) + KarafkaApp.routes.draw do + topic 'MyTopic' do + consumer MyConsumer + schema 'MySchema' + namespace 'com.my-namespace' + key_config({field: :test_id}) + end + end + + test_consume_message('MyTopic', {test_id: "id1", some_int: 5}, key: "id1") + expect($found_stuff).to eq({'test_id' => "id1", 'some_int' => 5}) + $found_stuff = nil + test_consume_message(MyConsumer, {test_id: "id1", some_int: 5}, key: "id1") + expect($found_stuff).to eq({'test_id' => "id1", 'some_int' => 5}) + end + +end diff --git a/spec/phobos.bad_db.yml b/spec/phobos.bad_db.yml deleted file mode 100644 index 9337cf46..00000000 --- a/spec/phobos.bad_db.yml +++ /dev/null @@ -1,73 +0,0 @@ -logger: - # Optional log file, set to false or remove to disable it - file: log/phobos.log - # Optional output format for stdout, default is false (human readable). - # Set to true to enable json output. - stdout_json: false - level: debug - # Comment the block to disable ruby-kafka logs - ruby_kafka: - level: debug - -kafka: - # identifier for this application - client_id: phobos - # timeout setting for connecting to brokers - connect_timeout: 15 - # timeout setting for socket connections - socket_timeout: 15 - -producer: - # number of seconds a broker can wait for replicas to acknowledge - # a write before responding with a timeout - ack_timeout: 5 - # number of replicas that must acknowledge a write, or `:all` - # if all in-sync replicas must acknowledge - required_acks: 1 - # number of retries that should be attempted before giving up sending - # messages to the cluster. Does not include the original attempt - max_retries: 2 - # number of seconds to wait between retries - retry_backoff: 1 - # number of messages allowed in the buffer before new writes will - # raise {BufferOverflow} exceptions - max_buffer_size: 10000 - # maximum size of the buffer in bytes. Attempting to produce messages - # when the buffer reaches this size will result in {BufferOverflow} being raised - max_buffer_bytesize: 10000000 - # name of the compression codec to use, or nil if no compression should be performed. - # Valid codecs: `:snappy` and `:gzip` - compression_codec: - # number of messages that needs to be in a message set before it should be compressed. - # Note that message sets are per-partition rather than per-topic or per-producer - compression_threshold: 1 - # maximum number of messages allowed in the queue. Only used for async_producer - max_queue_size: 10000 - # if greater than zero, the number of buffered messages that will automatically - # trigger a delivery. Only used for async_producer - delivery_threshold: 0 - # if greater than zero, the number of seconds between automatic message - # deliveries. Only used for async_producer - delivery_interval: 0 - -consumer: - # number of seconds after which, if a client hasn't contacted the Kafka cluster, - # it will be kicked out of the group - session_timeout: 300 - # interval between offset commits, in seconds - offset_commit_interval: 10 - # number of messages that can be processed before their offsets are committed. - # If zero, offset commits are not triggered by message processing - offset_commit_threshold: 0 - # interval between heartbeats; must be less than the session window - heartbeat_interval: 10 - -backoff: - min_ms: 1000 - max_ms: 60000 - -listeners: - - handler: ConsumerTest::MyConsumer - topic: my_consume_topic - group_id: my_group_id - max_bytes_per_partition: 524288 # 512 KB diff --git a/spec/phobos.yml b/spec/phobos.yml deleted file mode 100644 index 9eb88fd2..00000000 --- a/spec/phobos.yml +++ /dev/null @@ -1,77 +0,0 @@ -logger: - # Optional log file, set to false or remove to disable it - file: log/phobos.log - # Optional output format for stdout, default is false (human readable). - # Set to true to enable json output. - stdout_json: false - level: debug - # Comment the block to disable ruby-kafka logs - ruby_kafka: - level: debug - -kafka: - # identifier for this application - client_id: phobos - # timeout setting for connecting to brokers - connect_timeout: 15 - # timeout setting for socket connections - socket_timeout: 15 - -producer: - # number of seconds a broker can wait for replicas to acknowledge - # a write before responding with a timeout - ack_timeout: 5 - # number of replicas that must acknowledge a write, or `:all` - # if all in-sync replicas must acknowledge - required_acks: :all - # number of retries that should be attempted before giving up sending - # messages to the cluster. Does not include the original attempt - max_retries: 2 - # number of seconds to wait between retries - retry_backoff: 1 - # number of messages allowed in the buffer before new writes will - # raise {BufferOverflow} exceptions - max_buffer_size: 10000 - # maximum size of the buffer in bytes. Attempting to produce messages - # when the buffer reaches this size will result in {BufferOverflow} being raised - max_buffer_bytesize: 10000000 - # name of the compression codec to use, or nil if no compression should be performed. - # Valid codecs: `:snappy` and `:gzip` - compression_codec: - # number of messages that needs to be in a message set before it should be compressed. - # Note that message sets are per-partition rather than per-topic or per-producer - compression_threshold: 1 - # maximum number of messages allowed in the queue. Only used for async_producer - max_queue_size: 10000 - # if greater than zero, the number of buffered messages that will automatically - # trigger a delivery. Only used for async_producer - delivery_threshold: 0 - # if greater than zero, the number of seconds between automatic message - # deliveries. Only used for async_producer - delivery_interval: 0 - -consumer: - # number of seconds after which, if a client hasn't contacted the Kafka cluster, - # it will be kicked out of the group - session_timeout: 300 - # interval between offset commits, in seconds - offset_commit_interval: 10 - # number of messages that can be processed before their offsets are committed. - # If zero, offset commits are not triggered by message processing - offset_commit_threshold: 0 - # interval between heartbeats; must be less than the session window - heartbeat_interval: 10 - -backoff: - min_ms: 1000 - max_ms: 60000 - -listeners: - - handler: ConsumerTest::MyConsumer - topic: my_consume_topic - group_id: my_group_id - max_bytes_per_partition: 524288 # 512 KB - - handler: ConsumerTest::MyBatchConsumer - topic: my_batch_consume_topic - group_id: my_batch_group_id - delivery: inline_batch diff --git a/spec/rake_spec.rb b/spec/rake_spec.rb index e6884a67..ae4c83c2 100644 --- a/spec/rake_spec.rb +++ b/spec/rake_spec.rb @@ -11,9 +11,7 @@ describe 'Rakefile' do it 'should start listeners' do - runner = instance_double(Phobos::CLI::Runner) - expect(Phobos::CLI::Runner).to receive(:new).and_return(runner) - expect(runner).to receive(:run!) + expect(Karafka::Server).to receive(:run) Rake::Task['deimos:start'].invoke end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index acf29369..1f60aedc 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -14,6 +14,7 @@ require 'handlers/my_consumer' require 'rspec/rails' require 'rspec/snapshot' +require 'karafka/testing/rspec/helpers' require "trilogy_adapter/connection" ActiveRecord::Base.public_send :extend, TrilogyAdapter::Connection Dir['./spec/schemas/**/*.rb'].sort.each { |f| require f } @@ -23,9 +24,33 @@ class DeimosApp < Rails::Application end +DeimosApp.initializer("setup_root_dir", before: "karafka.require_karafka_boot_file") do + ENV['KARAFKA_ROOT_DIR'] = "#{Rails.root}/spec/karafka" +end DeimosApp.initialize! -# Helpers for Executor/DbProducer +module Helpers + + def set_karafka_config(method, val) + Deimos.karafka_configs.each { |c| c.send(method.to_sym, val) } + end + + def register_consumer(klass, schema, namespace='com.my-namespace', key_config:{none: true}, configs: {}) + Karafka::App.routes.redraw do + topic 'my-topic' do + consumer klass + schema schema + namespace namespace + key_config key_config + configs.each do |k, v| + public_send(k, v) + end + end + end + end +end + +# Helpers for Executor/OutboxProducer module TestRunners # Execute a block until it stops failing. This is helpful for testing threads # where we need to wait for them to continue but don't want to rely on @@ -73,11 +98,7 @@ module DbConfigs # @param topic [String] # @param key [String] def build_message(payload, topic, key) - message = Deimos::Message.new(payload, Deimos::Producer, - topic: topic, key: key) - message.encoded_payload = message.payload - message.encoded_key = message.key - message + { payload: payload, topic: topic, key: key} end DB_OPTIONS = [ @@ -85,7 +106,7 @@ def build_message(payload, topic, key) adapter: 'postgresql', port: 5432, username: 'postgres', - password: 'root', + password: 'password', database: 'postgres', host: ENV['PG_HOST'] || 'localhost' }, @@ -123,14 +144,14 @@ def each_db_config(subject, &block) end # :nodoc: - def run_db_backend_migration - migration_class_name = 'DbBackendMigration' + def run_outbox_backend_migration + migration_class_name = 'OutboxBackendMigration' migration_version = '[5.2]' migration = ERB.new( - File.read('lib/generators/deimos/db_backend/templates/migration') + File.read('lib/generators/deimos/outbox_backend/templates/migration') ).result(binding) eval(migration) # rubocop:disable Security/Eval - ActiveRecord::Migration.new.run(DbBackendMigration, direction: :up) + ActiveRecord::Migration.new.run(OutboxBackendMigration, direction: :up) end # :nodoc: @@ -147,7 +168,7 @@ def run_db_poller_migration # Set up the given database. def setup_db(options) ActiveRecord::Base.establish_connection(options) - run_db_backend_migration + run_outbox_backend_migration run_db_poller_migration ActiveRecord::Base.descendants.each do |klass| @@ -163,7 +184,10 @@ def setup_db(options) RSpec.configure do |config| config.extend(DbConfigs) include DbConfigs + config.include Karafka::Testing::RSpec::Helpers + config.include TestRunners + config.include Helpers config.full_backtrace = true config.snapshot_dir = "spec/snapshots" @@ -199,13 +223,11 @@ def setup_db(options) config.before(:each) do Deimos.config.reset! Deimos.configure do |deimos_config| - deimos_config.producers.backend = :test + deimos_config.producers.backend = :kafka deimos_config.schema.nest_child_schemas = true - deimos_config.phobos_config_file = File.join(File.dirname(__FILE__), 'phobos.yml') deimos_config.schema.path = File.join(File.expand_path(__dir__), 'schemas') deimos_config.consumers.reraise_errors = true deimos_config.schema.registry_url = ENV['SCHEMA_REGISTRY'] || 'http://localhost:8081' - deimos_config.kafka.seed_brokers = ENV['KAFKA_SEED_BROKER'] || 'localhost:9092' deimos_config.logger = Logger.new('/dev/null') deimos_config.logger.level = Logger::INFO deimos_config.schema.backend = :avro_validation @@ -213,6 +235,10 @@ def setup_db(options) end end + config.after(:each) do + Deimos::EVENT_TYPES.each { |type| Karafka.monitor.notifications_bus.clear(type) } + end + config.around(:each) do |example| use_cleaner = !example.metadata[:integration] @@ -262,26 +288,32 @@ def generated_id RSpec.shared_context('with publish_backend') do before(:each) do - producer_class = Class.new(Deimos::Producer) do - schema 'MySchema' - namespace 'com.my-namespace' - topic 'my-topic' - key_config field: 'test_id' - end + producer_class = Class.new(Deimos::Producer) stub_const('MyProducer', producer_class) - producer_class = Class.new(Deimos::Producer) do - schema 'MySchema' - namespace 'com.my-namespace' - topic 'my-topic' - key_config none: true - end + producer_class_no_key = Class.new(Deimos::Producer) stub_const('MyNoKeyProducer', producer_class) + + Karafka::App.routes.redraw do + topic 'my-topic-no-key' do + schema 'MySchema' + namespace 'com.my-namespace' + key_config none: true + producer_class producer_class_no_key + end + topic 'my-topic' do + schema 'MySchema' + namespace 'com.my-namespace' + key_config field: 'test_id' + producer_class producer_class + end + end + end let(:messages) do (1..3).map do |i| - build_message({ foo: i }, 'my-topic', "foo#{i}") + build_message({ test_id: "foo#{i}", some_int: i }, 'my-topic', "foo#{i}") end end end From 4d252577fae370102d87a8e3eefa1c30f48699e0 Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 14:41:15 -0400 Subject: [PATCH 14/18] Consumer specs --- ..._record_batch_consumer_association_spec.rb | 35 +-- spec/active_record_batch_consumer_spec.rb | 149 +++++++------ .../batch_consumption_spec.rb | 20 +- .../batch_slicer_spec.rb | 24 +-- spec/active_record_consumer_spec.rb | 42 ++-- spec/batch_consumer_spec.rb | 167 ++++++--------- spec/consumer_spec.rb | 200 +++++------------- spec/message_spec.rb | 18 +- .../com/my-namespace/MySchemaWithTitle.avsc | 22 ++ spec/snapshots/consumers-no-nest.snap | 49 +++++ spec/snapshots/consumers.snap | 49 +++++ .../consumers_and_producers-no-nest.snap | 49 +++++ spec/snapshots/consumers_and_producers.snap | 49 +++++ .../snapshots/consumers_circular-no-nest.snap | 49 +++++ spec/snapshots/consumers_circular.snap | 49 +++++ .../consumers_complex_types-no-nest.snap | 49 +++++ spec/snapshots/consumers_complex_types.snap | 49 +++++ spec/snapshots/consumers_nested-no-nest.snap | 49 +++++ spec/snapshots/consumers_nested.snap | 49 +++++ spec/snapshots/namespace_folders.snap | 49 +++++ spec/snapshots/namespace_map.snap | 49 +++++ .../snapshots/producers_with_key-no-nest.snap | 49 +++++ spec/snapshots/producers_with_key.snap | 49 +++++ 23 files changed, 989 insertions(+), 374 deletions(-) create mode 100644 spec/schemas/com/my-namespace/MySchemaWithTitle.avsc diff --git a/spec/active_record_batch_consumer_association_spec.rb b/spec/active_record_batch_consumer_association_spec.rb index 99b08aa1..038bfc91 100644 --- a/spec/active_record_batch_consumer_association_spec.rb +++ b/spec/active_record_batch_consumer_association_spec.rb @@ -74,9 +74,21 @@ class Widget < ActiveRecord::Base end prepend_before(:each) do - consumer_class.config[:bulk_import_id_column] = :bulk_import_id stub_const('MyBatchConsumer', consumer_class) stub_const('ConsumerTest::MyBatchConsumer', consumer_class) + klass = consumer_class + col = bulk_import_id_column + rep = replace_associations + Karafka::App.routes.redraw do + topic 'my-topic' do + consumer klass + schema 'MySchemaWithTitle' + namespace 'com.my-namespace' + key_config plain: true + bulk_import_id_column col + replace_associations rep + end + end end # Helper to publish a list of messages and call the consumer @@ -84,16 +96,16 @@ def publish_batch(messages) keys = messages.map { |m| m[:key] } payloads = messages.map { |m| m[:payload] } - test_consume_batch(MyBatchConsumer, payloads, keys: keys, call_original: true) + test_consume_batch(MyBatchConsumer, payloads, keys: keys) end + let(:bulk_import_id_column) { :bulk_import_id } + let(:replace_associations) { true } + let(:consumer_class) do klass = Class.new(described_class) do cattr_accessor :record_attributes_proc cattr_accessor :should_consume_proc - schema 'MySchema' - namespace 'com.my-namespace' - key_config plain: true record_class Widget def should_consume?(record, associations) @@ -154,7 +166,6 @@ def columns(record_class) context 'when association configured in consumer without model changes' do before(:each) do - consumer_class.config[:bulk_import_id_column] = :bulk_import_id ActiveRecord::Base.connection.remove_column(:widgets, :bulk_import_id) Widget.reset_column_information end @@ -164,6 +175,7 @@ def columns(record_class) end it 'should raise error when bulk_import_id is not found' do + set_karafka_config(:reraise_errors, true) expect { publish_batch([{ key: 2, payload: { test_id: 'xyz', some_int: 5, title: 'Widget Title' } }]) @@ -173,10 +185,8 @@ def columns(record_class) end context 'with one-to-one relation in association and custom bulk_import_id' do - before(:each) do - consumer_class.config[:bulk_import_id_column] = :custom_id - consumer_class.config[:replace_associations] = false - end + let(:bulk_import_id_column) { :custom_id } + let(:replace_associations) { false } before(:all) do ActiveRecord::Base.connection.add_column(:widgets, :custom_id, :string, if_not_exists: true) @@ -193,8 +203,8 @@ def columns(record_class) end context 'with one-to-many relationship in association and default bulk_import_id' do + let(:replace_associations) { false } before(:each) do - consumer_class.config[:replace_associations] = false consumer_class.record_attributes_proc = proc do |payload| { test_id: payload['test_id'], @@ -214,7 +224,6 @@ def columns(record_class) end it 'should save item to widget and associated details' do - consumer_class.config[:replace_associations] = false publish_batch([{ key: 2, payload: { test_id: 'xyz', some_int: 5, title: 'Widget Title' } }]) expect(Widget.count).to eq(2) @@ -233,8 +242,8 @@ def columns(record_class) end context 'with replace_associations on' do + let(:replace_associations) { true } before(:each) do - consumer_class.config[:replace_associations] = true consumer_class.record_attributes_proc = proc do |payload| { test_id: payload['test_id'], diff --git a/spec/active_record_batch_consumer_spec.rb b/spec/active_record_batch_consumer_spec.rb index 6c5fb43c..40979e8e 100644 --- a/spec/active_record_batch_consumer_spec.rb +++ b/spec/active_record_batch_consumer_spec.rb @@ -34,7 +34,11 @@ class Widget < ActiveRecord::Base prepend_before(:each) do stub_const('MyBatchConsumer', consumer_class) stub_const('ConsumerTest::MyBatchConsumer', consumer_class) - consumer_class.config[:bulk_import_id_column] = :bulk_import_id # default + register_consumer(MyBatchConsumer, + 'MySchema', + key_config: {plain: true}, + configs: {bulk_import_id_column: :bulk_import_id}) + Widget.delete_all end around(:each) do |ex| @@ -50,9 +54,6 @@ class Widget < ActiveRecord::Base # Basic uncompacted consumer let(:consumer_class) do Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config plain: true record_class Widget compacted false end @@ -68,7 +69,7 @@ def publish_batch(messages) keys = messages.map { |m| m[:key] } payloads = messages.map { |m| m[:payload] } - test_consume_batch(MyBatchConsumer, payloads, keys: keys, call_original: true) + test_consume_batch(MyBatchConsumer, payloads, keys: keys) end describe 'consume_batch' do @@ -82,10 +83,6 @@ def publish_batch(messages) end end - it 'should handle an empty batch' do - expect { publish_batch([]) }.not_to raise_error - end - it 'should create records from a batch' do publish_batch( [ @@ -257,12 +254,16 @@ def publish_batch(messages) end describe 'compacted mode' do + before(:each) do + register_consumer(consumer_class, + 'MySchema', + key_config: {plain: true}) + + end + # Create a compacted consumer let(:consumer_class) do Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config plain: true record_class Widget # :no-doc: @@ -302,11 +303,14 @@ def deleted_query(_records) end describe 'compound keys' do + before(:each) do + register_consumer(consumer_class, + 'MySchema', + key_config: {schema: 'MySchemaCompound_key'}) + end + let(:consumer_class) do Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config schema: 'MySchemaCompound_key' record_class Widget compacted false @@ -353,13 +357,10 @@ def deleted_query(records) end describe 'no keys' do - let(:consumer_class) do - Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config none: true - record_class Widget - end + before(:each) do + register_consumer(consumer_class, + 'MySchema', + key_config: {none: true}) end it 'should handle unkeyed topics' do @@ -385,11 +386,13 @@ def deleted_query(records) end describe 'soft deletion' do + before(:each) do + register_consumer(consumer_class, + 'MySchema', + key_config: {plain: true}) + end let(:consumer_class) do Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config plain: true record_class Widget compacted false @@ -451,11 +454,13 @@ def record_attributes(payload, key) end describe 'skipping records' do + before(:each) do + register_consumer(consumer_class, + 'MySchema', + key_config: {plain: true}) + end let(:consumer_class) do Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config plain: true record_class Widget # Sample customization: Skipping records @@ -471,9 +476,9 @@ def record_attributes(payload, key) publish_batch( [ { key: 1, # Record that consumer can decide to skip - payload: { test_id: 'skipme' } }, + payload: { test_id: 'skipme', some_int: 3 } }, { key: 2, - payload: { test_id: 'abc123' } } + payload: { test_id: 'abc123', some_int: 3 } } ] ) @@ -484,11 +489,13 @@ def record_attributes(payload, key) describe 'pre processing' do context 'with uncompacted messages' do + before(:each) do + register_consumer(consumer_class, + 'MySchema', + key_config: {plain: true}) + end let(:consumer_class) do Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config plain: true record_class Widget compacted false @@ -527,9 +534,11 @@ def pre_process(messages) context 'with a global bulk_import_id_generator' do before(:each) do - Deimos.configure do - consumers.bulk_import_id_generator(proc { 'global' }) - end + register_consumer(consumer_class, + 'MySchema', + key_config: {plain: true}, + configs: {bulk_import_id_generator: proc { 'global' }} + ) end it 'should call the default bulk_import_id_generator proc' do @@ -559,10 +568,17 @@ def pre_process(messages) context 'with a class defined bulk_import_id_generator' do before(:each) do - Deimos.configure do - consumers.bulk_import_id_generator(proc { 'global' }) + Karafka::App.routes.clear + Karafka::App.routes.draw do + defaults do + bulk_import_id_generator(proc { 'global'}) + end end - consumer_class.config[:bulk_import_id_generator] = proc { 'custom' } + register_consumer(consumer_class, + 'MySchema', + key_config: {plain: true}, + configs: {bulk_import_id_generator: proc { 'custom' }} + ) end it 'should call the default bulk_import_id_generator proc' do @@ -593,11 +609,13 @@ def pre_process(messages) describe 'should_consume?' do + before(:each) do + register_consumer(consumer_class, + 'MySchema', + key_config: {plain: true}) + end let(:consumer_class) do Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config plain: true record_class Widget compacted false @@ -609,9 +627,8 @@ def self.process_invalid_records(_) nil end - ActiveSupport::Notifications.subscribe('batch_consumption.invalid_records') do |*args| - payload = ActiveSupport::Notifications::Event.new(*args).payload - payload[:consumer].process_invalid_records(payload[:records]) + Karafka.monitor.subscribe('deimos.batch_consumption.invalid_records') do |event| + event[:consumer].process_invalid_records(event[:records]) end end @@ -649,11 +666,13 @@ def self.process_invalid_records(_) describe 'post processing' do context 'with uncompacted messages' do + before(:each) do + register_consumer(consumer_class, + 'MySchema', + key_config: {plain: true}) + end let(:consumer_class) do Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config plain: true record_class Widget compacted false @@ -673,13 +692,11 @@ def self.process_invalid_records(invalid) Widget.find_by(id: attrs['id'], test_id: attrs['test_id']).update!(some_int: attrs['some_int']) end - ActiveSupport::Notifications.subscribe('batch_consumption.invalid_records') do |*args| - payload = ActiveSupport::Notifications::Event.new(*args).payload + Karafka.monitor.subscribe('deimos.batch_consumption.invalid_records') do |payload| payload[:consumer].process_invalid_records(payload[:records]) end - ActiveSupport::Notifications.subscribe('batch_consumption.valid_records') do |*args| - payload = ActiveSupport::Notifications::Event.new(*args).payload + Karafka.monitor.subscribe('deimos.batch_consumption.valid_records') do |payload| payload[:consumer].process_valid_records(payload[:records]) end @@ -707,11 +724,13 @@ def self.process_invalid_records(invalid) end context 'with compacted messages' do + before(:each) do + register_consumer(consumer_class, + 'MySchema', + key_config: {plain: true}) + end let(:consumer_class) do Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config plain: true record_class Widget compacted true @@ -731,13 +750,11 @@ def self.process_invalid_records(invalid) Widget.find_by(id: attrs['id'], test_id: attrs['test_id']).update!(some_int: attrs['some_int']) end - ActiveSupport::Notifications.subscribe('batch_consumption.invalid_records') do |*args| - payload = ActiveSupport::Notifications::Event.new(*args).payload + Karafka.monitor.subscribe('deimos.batch_consumption.invalid_records') do |payload| payload[:consumer].process_invalid_records(payload[:records]) end - ActiveSupport::Notifications.subscribe('batch_consumption.valid_records') do |*args| - payload = ActiveSupport::Notifications::Event.new(*args).payload + Karafka.monitor.subscribe('deimos.batch_consumption.valid_records') do |payload| payload[:consumer].process_valid_records(payload[:records]) end @@ -765,11 +782,13 @@ def self.process_invalid_records(invalid) end context 'with post processing errors' do + before(:each) do + register_consumer(consumer_class, + 'MySchema', + key_config: {plain: true}) + end let(:consumer_class) do Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config plain: true record_class Widget compacted false @@ -777,15 +796,15 @@ def self.process_valid_records(_) raise StandardError, 'Something went wrong' end - ActiveSupport::Notifications.subscribe('batch_consumption.valid_records') do |*args| - payload = ActiveSupport::Notifications::Event.new(*args).payload - payload[:consumer].process_valid_records(payload[:records]) + Karafka.monitor.subscribe('deimos.batch_consumption.valid_records') do |event| + event[:consumer].process_valid_records(event[:records]) end end end it 'should save records if an exception occurs in post processing' do + set_karafka_config(:reraise_errors, true) Widget.create!(id: 1, test_id: 'abc', some_int: 1) Widget.create!(id: 2, test_id: 'def', some_int: 2) diff --git a/spec/active_record_consume/batch_consumption_spec.rb b/spec/active_record_consume/batch_consumption_spec.rb index e1e89562..6634eebe 100644 --- a/spec/active_record_consume/batch_consumption_spec.rb +++ b/spec/active_record_consume/batch_consumption_spec.rb @@ -9,11 +9,11 @@ describe 'upsert_records' do let(:records) do [ - Deimos::Message.new({ v: 1 }, nil, key: 1), - Deimos::Message.new({ v: 2 }, nil, key: 2), - Deimos::Message.new({ v: 3 }, nil, key: 3), - Deimos::Message.new({ v: 4 }, nil, key: 4), - Deimos::Message.new({ v: 5 }, nil, key: 5) + Deimos::Message.new({ v: 1 }, key: 1), + Deimos::Message.new({ v: 2 }, key: 2), + Deimos::Message.new({ v: 3 }, key: 3), + Deimos::Message.new({ v: 4 }, key: 4), + Deimos::Message.new({ v: 5 }, key: 5) ] end @@ -56,11 +56,11 @@ describe 'remove_records' do let(:records) do [ - Deimos::Message.new(nil, nil, key: 1), - Deimos::Message.new(nil, nil, key: 2), - Deimos::Message.new(nil, nil, key: 3), - Deimos::Message.new(nil, nil, key: 4), - Deimos::Message.new(nil, nil, key: 5) + Deimos::Message.new(nil, key: 1), + Deimos::Message.new(nil, key: 2), + Deimos::Message.new(nil, key: 3), + Deimos::Message.new(nil, key: 4), + Deimos::Message.new(nil, key: 5) ] end diff --git a/spec/active_record_consume/batch_slicer_spec.rb b/spec/active_record_consume/batch_slicer_spec.rb index 0333d3d9..ba1f68e1 100644 --- a/spec/active_record_consume/batch_slicer_spec.rb +++ b/spec/active_record_consume/batch_slicer_spec.rb @@ -4,12 +4,12 @@ describe '#slice' do let(:batch) do [ - Deimos::Message.new({ v: 1 }, nil, key: 'C'), - Deimos::Message.new({ v: 123 }, nil, key: 'A'), - Deimos::Message.new({ v: 999 }, nil, key: 'B'), - Deimos::Message.new({ v: 456 }, nil, key: 'A'), - Deimos::Message.new({ v: 2 }, nil, key: 'C'), - Deimos::Message.new({ v: 3 }, nil, key: 'C') + Deimos::Message.new({ v: 1 }, key: 'C'), + Deimos::Message.new({ v: 123 }, key: 'A'), + Deimos::Message.new({ v: 999 }, key: 'B'), + Deimos::Message.new({ v: 456 }, key: 'A'), + Deimos::Message.new({ v: 2 }, key: 'C'), + Deimos::Message.new({ v: 3 }, key: 'C') ] end @@ -19,16 +19,16 @@ expect(slices). to match([ match_array([ - Deimos::Message.new({ v: 1 }, nil, key: 'C'), - Deimos::Message.new({ v: 123 }, nil, key: 'A'), - Deimos::Message.new({ v: 999 }, nil, key: 'B') + Deimos::Message.new({ v: 1 }, key: 'C'), + Deimos::Message.new({ v: 123 }, key: 'A'), + Deimos::Message.new({ v: 999 }, key: 'B') ]), match_array([ - Deimos::Message.new({ v: 456 }, nil, key: 'A'), - Deimos::Message.new({ v: 2 }, nil, key: 'C') + Deimos::Message.new({ v: 456 }, key: 'A'), + Deimos::Message.new({ v: 2 }, key: 'C') ]), match_array([ - Deimos::Message.new({ v: 3 }, nil, key: 'C') + Deimos::Message.new({ v: 3 }, key: 'C') ]) ]) end diff --git a/spec/active_record_consumer_spec.rb b/spec/active_record_consumer_spec.rb index 7605b833..7139f037 100644 --- a/spec/active_record_consumer_spec.rb +++ b/spec/active_record_consumer_spec.rb @@ -33,25 +33,16 @@ class Widget < ActiveRecord::Base prepend_before(:each) do consumer_class = Class.new(Deimos::ActiveRecordConsumer) do - schema 'MySchemaWithDateTimes' - namespace 'com.my-namespace' - key_config plain: true record_class Widget end stub_const('MyConsumer', consumer_class) consumer_class = Class.new(Deimos::ActiveRecordConsumer) do - schema 'MySchemaWithDateTimes' - namespace 'com.my-namespace' - key_config schema: 'MySchemaId_key' record_class Widget end stub_const('MyConsumerWithKey', consumer_class) consumer_class = Class.new(Deimos::ActiveRecordConsumer) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config none: true record_class Widget # :nodoc: @@ -135,14 +126,39 @@ def as_json(_opts={}) end end stub_const('Schemas::MySchemaWithDateTimes', schema_datetime_class) + + Karafka::App.routes.redraw do + topic "my-topic" do + consumer MyConsumer + schema 'MySchemaWithDateTimes' + namespace 'com.my-namespace' + key_config plain: true + end + topic "my-topic2" do + consumer MyConsumerWithKey + schema 'MySchemaWithDateTimes' + namespace 'com.my-namespace' + key_config schema: 'MySchemaId_key' + end + topic "my-topic3" do + consumer MyCustomFetchConsumer + schema 'MySchema' + namespace 'com.my-namespace' + key_config none: true + end + end end describe 'consume' do SCHEMA_CLASS_SETTINGS.each do |setting, use_schema_classes| context "with Schema Class consumption #{setting}" do before(:each) do + Karafka::App.routes.draw do + defaults do + use_schema_classes use_schema_classes + end + end Deimos.configure do |config| - config.schema.use_schema_classes = use_schema_classes config.schema.use_full_namespace = true end end @@ -200,7 +216,7 @@ def as_json(_opts={}) test_consume_message(MyCustomFetchConsumer, { test_id: 'id1', some_int: 3 - }, call_original: true) + }) expect(widget1.reload.updated_at.in_time_zone). to eq(Time.local(2020, 5, 6, 5, 5, 5)) travel_back @@ -225,13 +241,13 @@ def as_json(_opts={}) test_consume_message(MyCustomFetchConsumer, { test_id: 'id1', some_int: 3 - }, call_original: true) + }) expect(widget1.reload.some_int).to eq(3) expect(Widget.count).to eq(1) test_consume_message(MyCustomFetchConsumer, { test_id: 'id2', some_int: 4 - }, call_original: true) + }) expect(Widget.count).to eq(2) expect(Widget.find_by_test_id('id1').some_int).to eq(3) expect(Widget.find_by_test_id('id2').some_int).to eq(4) diff --git a/spec/batch_consumer_spec.rb b/spec/batch_consumer_spec.rb index 3d8a9fe0..1c365ca0 100644 --- a/spec/batch_consumer_spec.rb +++ b/spec/batch_consumer_spec.rb @@ -3,19 +3,34 @@ # :nodoc: module ConsumerTest describe Deimos::Consumer, 'Batch Consumer' do - prepend_before(:each) do - # :nodoc: - consumer_class = Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config field: 'test_id' - + let(:schema) { 'MySchema' } + let(:use_schema_classes) { false } + let(:reraise_errors) { false } + let(:key_config) { { field: 'test_id' } } + let(:consumer_class) do + Class.new(described_class) do # :nodoc: - def consume_batch(_payloads, _metadata) - raise 'This should not be called unless call_original is set' + def consume_batch end end + end + before(:each) do + # :nodoc: + stub_const('MyBatchConsumer', consumer_class) stub_const('ConsumerTest::MyBatchConsumer', consumer_class) + klass = consumer_class + route_schema = schema + route_key = key_config + route_use_classes = use_schema_classes + Karafka::App.routes.redraw do + topic 'my-topic' do + consumer klass + schema route_schema + namespace 'com.my-namespace' + key_config route_key + use_schema_classes route_use_classes + end + end end let(:batch) do @@ -38,41 +53,23 @@ def consume_batch(_payloads, _metadata) Deimos::Utils::SchemaClass.instance(p, 'MySchema', 'com.my-namespace') end end + let(:use_schema_classes) { true } before(:each) do Deimos.configure do |config| - config.schema.use_schema_classes = use_schema_classes config.schema.use_full_namespace = true end end - it 'should provide backwards compatibility for BatchConsumer class' do - consumer_class = Class.new(Deimos::BatchConsumer) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config field: 'test_id' - - # :nodoc: - def consume_batch(_payloads, _metadata) - raise 'This should not be called unless call_original is set' - end - end - stub_const('ConsumerTest::MyOldBatchConsumer', consumer_class) - - test_consume_batch(MyOldBatchConsumer, schema_class_batch) do |received, _metadata| - expect(received).to eq(schema_class_batch) - end - end - it 'should consume a batch of messages' do - test_consume_batch(MyBatchConsumer, schema_class_batch) do |received, _metadata| - expect(received).to eq(schema_class_batch) + test_consume_batch(MyBatchConsumer, schema_class_batch) do |received| + expect(received.payloads).to eq(schema_class_batch) end end it 'should consume a message on a topic' do - test_consume_batch('my_batch_consume_topic', schema_class_batch) do |received, _metadata| - expect(received).to eq(schema_class_batch) + test_consume_batch('my-topic', schema_class_batch) do |received| + expect(received.payloads).to eq(schema_class_batch) end end end @@ -80,29 +77,19 @@ def consume_batch(_payloads, _metadata) end describe 'when reraising errors is disabled' do - before(:each) do - Deimos.configure { |config| config.consumers.reraise_errors = false } - end + let(:reraise_errors) { false } it 'should not fail when before_consume_batch fails' do expect { test_consume_batch( MyBatchConsumer, - batch, - skip_expectation: true - ) { raise 'OH NOES' } + batch + ) do + raise 'OH NOES' + end }.not_to raise_error end - it 'should not fail when consume_batch fails' do - expect { - test_consume_batch( - MyBatchConsumer, - invalid_payloads, - skip_expectation: true - ) - }.not_to raise_error - end end describe 'decoding' do @@ -111,53 +98,32 @@ def consume_batch(_payloads, _metadata) end it 'should decode payloads for all messages in the batch' do - test_consume_batch('my_batch_consume_topic', batch) do |received, _metadata| + test_consume_batch('my-topic', batch) do # Mock decoder simply returns the payload - expect(received).to eq(batch) + expect(messages.payloads).to eq(batch) end end it 'should decode keys for all messages in the batch' do - expect_any_instance_of(ConsumerTest::MyBatchConsumer). - to receive(:decode_key).with(keys[0]).and_call_original - expect_any_instance_of(ConsumerTest::MyBatchConsumer). - to receive(:decode_key).with(keys[1]).and_call_original - - test_consume_batch('my_batch_consume_topic', batch, keys: keys) do |_received, metadata| - # Mock decode_key extracts the value of the first field as the key - expect(metadata[:keys]).to eq(%w(foo bar)) - expect(metadata[:first_offset]).to eq(1) + test_consume_batch('my-topic', batch, keys: keys) do + expect(messages.map(&:key)).to eq([{'test_id' => 'foo'}, {'test_id' => 'bar'}]) end end - it 'should decode plain keys for all messages in the batch' do - consumer_class = Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config plain: true - end - stub_const('ConsumerTest::MyBatchConsumer', consumer_class) - - test_consume_batch('my_batch_consume_topic', batch, keys: [1, 2]) do |_received, metadata| - expect(metadata[:keys]).to eq([1, 2]) + context 'with plain keys' do + let(:key_config) { { plain: true } } + it 'should decode plain keys for all messages in the batch' do + test_consume_batch('my-topic', batch, keys: [1, 2]) do |_received, metadata| + expect(metadata[:keys]).to eq([1, 2]) + end end end end describe 'timestamps' do + let(:schema) { 'MySchemaWithDateTimes' } + let(:key_config) { { none: true } } before(:each) do - # :nodoc: - consumer_class = Class.new(described_class) do - schema 'MySchemaWithDateTimes' - namespace 'com.my-namespace' - key_config plain: true - - # :nodoc: - def consume_batch(_payloads, _metadata) - raise 'This should not be called unless call_original is set' - end - end - stub_const('ConsumerTest::MyBatchConsumer', consumer_class) allow(Deimos.config.metrics).to receive(:histogram) end @@ -202,45 +168,36 @@ def consume_batch(_payloads, _metadata) end it 'should consume a batch' do - expect(Deimos.config.metrics). - to receive(:histogram).with('handler', - a_kind_of(Numeric), - tags: %w(time:time_delayed topic:my-topic)).twice + # expect(Deimos.config.metrics). + # to receive(:histogram).with('handler', + # a_kind_of(Numeric), + # tags: %w(time:time_delayed topic:my-topic)).twice - test_consume_batch('my_batch_consume_topic', batch_with_time) do |received, _metadata| - expect(received).to eq(batch_with_time) + test_consume_batch('my-topic', batch_with_time) do + expect(messages.payloads).to eq(batch_with_time) end end it 'should fail nicely and ignore timestamps with the wrong format' do batch = invalid_times.concat(batch_with_time) - expect(Deimos.config.metrics). - to receive(:histogram).with('handler', - a_kind_of(Numeric), - tags: %w(time:time_delayed topic:my-topic)).twice + # expect(Deimos.config.metrics). + # to receive(:histogram).with('handler', + # a_kind_of(Numeric), + # tags: %w(time:time_delayed topic:my-topic)).twice - test_consume_batch('my_batch_consume_topic', batch) do |received, _metadata| - expect(received).to eq(batch) + test_consume_batch('my-topic', batch) do + expect(messages.payloads).to eq(batch) end end end describe 'logging' do + let(:schema) { 'MySchemaWithUniqueId' } + let(:key_config) { { plain: true } } before(:each) do - # :nodoc: - consumer_class = Class.new(described_class) do - schema 'MySchemaWithUniqueId' - namespace 'com.my-namespace' - key_config plain: true - - # :nodoc: - def consume_batch(_payloads, _metadata) - raise 'This should not be called unless call_original is set' - end - end - stub_const('ConsumerTest::MyBatchConsumer', consumer_class) allow(Deimos.config.metrics).to receive(:histogram) + set_karafka_config(:payload_log, :keys) end it 'should log message identifiers' do @@ -257,7 +214,7 @@ def consume_batch(_payloads, _metadata) to receive(:log_info). with(hash_including(payload_keys: ["1", "2"])) - test_consume_batch('my_batch_consume_topic', batch_with_message_id, keys: [1, 2]) + test_consume_batch('my-topic', batch_with_message_id, keys: [1, 2]) end end end diff --git a/spec/consumer_spec.rb b/spec/consumer_spec.rb index 4db04253..66466072 100644 --- a/spec/consumer_spec.rb +++ b/spec/consumer_spec.rb @@ -4,33 +4,44 @@ # rubocop:disable Metrics/ModuleLength module ConsumerTest describe Deimos::Consumer, 'Message Consumer' do + let(:use_schema_classes) { false } + let(:reraise_errors) { false } prepend_before(:each) do # :nodoc: consumer_class = Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config field: 'test_id' # :nodoc: - def fatal_error?(_exception, payload, _metadata) - payload.to_s == 'fatal' + def fatal_error?(_exception, messages) + messages.payloads.first&.dig(:test_id) == ['fatal'] end # :nodoc: - def consume(_payload, _metadata) - raise 'This should not be called unless call_original is set' + def consume_message(message) + message.payload end end stub_const('ConsumerTest::MyConsumer', consumer_class) + route_usc = use_schema_classes + route_rre = reraise_errors + Karafka::App.routes.redraw do + topic 'my_consume_topic' do + schema 'MySchema' + namespace 'com.my-namespace' + key_config field: 'test_id' + consumer consumer_class + use_schema_classes route_usc + reraise_errors route_rre + end + end end describe 'consume' do SCHEMA_CLASS_SETTINGS.each do |setting, use_schema_classes| + let(:use_schema_classes) { use_schema_classes } context "with Schema Class consumption #{setting}" do before(:each) do Deimos.configure do |config| - config.schema.use_schema_classes = use_schema_classes config.schema.use_full_namespace = true end end @@ -45,26 +56,9 @@ def consume(_payload, _metadata) end it 'should consume a nil message' do - test_consume_message(MyConsumer, nil) do |payload, _metadata| - expect(payload).to be_nil - end - end - - it 'should consume a message idempotently' do - # testing for a crash and re-consuming the same message/metadata - key = { 'test_id' => 'foo' } - test_metadata = { key: key } - allow_any_instance_of(MyConsumer).to(receive(:decode_key)) do |_instance, k| - k['test_id'] + test_consume_message(MyConsumer, nil, key: 'foo') do + expect(messages).to be_empty end - MyConsumer.new.around_consume({ 'test_id' => 'foo', - 'some_int' => 123 }, test_metadata) do |_payload, metadata| - expect(metadata[:key]).to eq('foo') - end - MyConsumer.new.around_consume({ 'test_id' => 'foo', - 'some_int' => 123 }, test_metadata) do |_payload, metadata| - expect(metadata[:key]).to eq('foo') - end end it 'should consume a message on a topic' do @@ -77,83 +71,82 @@ def consume(_payload, _metadata) end it 'should fail on invalid message' do - test_consume_invalid_message(MyConsumer, { 'invalid' => 'key' }) + expect { test_consume_message(MyConsumer, { 'invalid' => 'key' }) }. + to raise_error(Avro::SchemaValidator::ValidationError) end it 'should fail if reraise is false but fatal_error is true' do - Deimos.configure { |config| config.consumers.reraise_errors = false } - test_consume_invalid_message(MyConsumer, 'fatal') + expect { test_consume_message(MyConsumer, {test_id: 'fatal'}) }. + to raise_error(Avro::SchemaValidator::ValidationError) end it 'should fail if fatal_error is true globally' do - Deimos.configure do |config| - config.consumers.fatal_error = proc { true } - config.consumers.reraise_errors = false - end - test_consume_invalid_message(MyConsumer, { 'invalid' => 'key' }) + set_karafka_config(:fatal_error, proc { true }) + expect { test_consume_message(MyConsumer, { 'invalid' => 'key' }) }. + to raise_error(Avro::SchemaValidator::ValidationError) end it 'should fail on message with extra fields' do - test_consume_invalid_message(MyConsumer, + allow_any_instance_of(Deimos::SchemaBackends::AvroValidation). + to receive(:coerce) { |_, m| m.with_indifferent_access } + expect { test_consume_message(MyConsumer, { 'test_id' => 'foo', 'some_int' => 123, - 'extra_field' => 'field name' }) + 'extra_field' => 'field name' }) }. + to raise_error(Avro::SchemaValidator::ValidationError) end it 'should not fail when before_consume fails without reraising errors' do - Deimos.configure { |config| config.consumers.reraise_errors = false } + set_karafka_config(:reraise_errors, false) expect { test_consume_message( MyConsumer, { 'test_id' => 'foo', - 'some_int' => 123 }, - skip_expectation: true - ) { raise 'OH NOES' } + 'some_int' => 123 }) { raise 'OH NOES' } }.not_to raise_error end it 'should not fail when consume fails without reraising errors' do - Deimos.configure { |config| config.consumers.reraise_errors = false } + set_karafka_config(:reraise_errors, false) + allow(Deimos::ProducerMiddleware).to receive(:call) { |m| m[:payload] = m[:payload].to_json; m } expect { test_consume_message( MyConsumer, - { 'invalid' => 'key' }, - skip_expectation: true - ) + { 'invalid' => 'key' }) }.not_to raise_error end - - it 'should call original' do - expect { - test_consume_message(MyConsumer, - { 'test_id' => 'foo', 'some_int' => 123 }, - call_original: true) - }.to raise_error('This should not be called unless call_original is set') - end end end context 'with overriden schema classes' do before(:each) do + set_karafka_config(:use_schema_classes, true) Deimos.configure do |config| - config.schema.use_schema_classes = true config.schema.use_full_namespace = true end end prepend_before(:each) do consumer_class = Class.new(described_class) do - schema 'MyUpdatedSchema' - namespace 'com.my-namespace' - key_config field: 'test_id' - # :nodoc: - def consume(_payload, _metadata) - raise 'This should not be called unless call_original is set' + def consume_message(message) + message.payload end end stub_const('ConsumerTest::MyConsumer', consumer_class) + Deimos.config.schema.use_schema_classes = true + Karafka::App.routes.redraw do + topic 'my_consume_topic' do + schema 'MyUpdatedSchema' + namespace 'com.my-namespace' + key_config field: 'test_id' + consumer consumer_class + end + end + end + after(:each) do + Karafka::App.routes.clear end it 'should consume messages' do @@ -169,93 +162,6 @@ def consume(_payload, _metadata) end end - describe 'decode_key' do - - it 'should use the key field in the value if set' do - # actual decoding is disabled in test - expect(MyConsumer.new.decode_key('test_id' => '123')).to eq('123') - expect { MyConsumer.new.decode_key(123) }.to raise_error(NoMethodError) - end - - it 'should use the key schema if set' do - consumer_class = Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config schema: 'MySchema_key' - end - stub_const('ConsumerTest::MySchemaConsumer', consumer_class) - expect(MyConsumer.new.decode_key('test_id' => '123')).to eq('123') - expect { MyConsumer.new.decode_key(123) }.to raise_error(NoMethodError) - end - - it 'should not decode if plain is set' do - consumer_class = Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - key_config plain: true - end - stub_const('ConsumerTest::MyNonEncodedConsumer', consumer_class) - expect(MyNonEncodedConsumer.new.decode_key('123')).to eq('123') - end - - it 'should error with nothing set' do - consumer_class = Class.new(described_class) do - schema 'MySchema' - namespace 'com.my-namespace' - end - stub_const('ConsumerTest::MyErrorConsumer', consumer_class) - expect { MyErrorConsumer.new.decode_key('123') }. - to raise_error('No key config given - if you are not decoding keys, please use `key_config plain: true`') - end - - end - - describe 'timestamps' do - before(:each) do - # :nodoc: - consumer_class = Class.new(described_class) do - schema 'MySchemaWithDateTimes' - namespace 'com.my-namespace' - key_config plain: true - - # :nodoc: - def consume(_payload, _metadata) - raise 'This should not be called unless call_original is set' - end - end - stub_const('ConsumerTest::MyConsumer', consumer_class) - end - - it 'should consume a message' do - expect(Deimos.config.metrics).to receive(:histogram).twice - test_consume_message('my_consume_topic', - { 'test_id' => 'foo', - 'some_int' => 123, - 'updated_at' => Time.now.to_i, - 'timestamp' => 2.minutes.ago.to_s }) do |payload, _metadata| - expect(payload['test_id']).to eq('foo') - end - end - - it 'should fail nicely when timestamp wrong format' do - expect(Deimos.config.metrics).to receive(:histogram).twice - test_consume_message('my_consume_topic', - { 'test_id' => 'foo', - 'some_int' => 123, - 'updated_at' => Time.now.to_i, - 'timestamp' => 'dffdf' }) do |payload, _metadata| - expect(payload['test_id']).to eq('foo') - end - test_consume_message('my_consume_topic', - { 'test_id' => 'foo', - 'some_int' => 123, - 'updated_at' => Time.now.to_i, - 'timestamp' => '' }) do |payload, _metadata| - expect(payload['test_id']).to eq('foo') - end - end - - end end end # rubocop:enable Metrics/ModuleLength diff --git a/spec/message_spec.rb b/spec/message_spec.rb index 0ef89cc7..1e90551c 100644 --- a/spec/message_spec.rb +++ b/spec/message_spec.rb @@ -2,37 +2,37 @@ RSpec.describe(Deimos::Message) do it 'should detect tombstones' do - expect(described_class.new(nil, nil, key: 'key1')). + expect(described_class.new(nil, key: 'key1')). to be_tombstone - expect(described_class.new({ v: 'val1' }, nil, key: 'key1')). + expect(described_class.new({ v: 'val1' }, key: 'key1')). not_to be_tombstone - expect(described_class.new({ v: '' }, nil, key: 'key1')). + expect(described_class.new({ v: '' }, key: 'key1')). not_to be_tombstone - expect(described_class.new({ v: 'val1' }, nil, key: nil)). + expect(described_class.new({ v: 'val1' }, key: nil)). not_to be_tombstone end it 'can support complex keys/values' do - expect { described_class.new({ a: 1, b: 2 }, nil, key: { c: 3, d: 4 }) }. + expect { described_class.new({ a: 1, b: 2 }, key: { c: 3, d: 4 }) }. not_to raise_exception end describe 'headers' do it 'returns nil when not set' do - expect(described_class.new({ v: 'val1' }, nil, key: 'key1')). + expect(described_class.new({ v: 'val1' }, key: 'key1')). to have_attributes(headers: nil) end it 'can set and get headers' do - expect(described_class.new({ v: 'val1' }, nil, key: 'key1', headers: { a: 1 })). + expect(described_class.new({ v: 'val1' }, key: 'key1', headers: { a: 1 })). to have_attributes(headers: { a: 1 }) end it 'includes headers when converting to Hash' do - expect(described_class.new({ v: 'val1' }, nil, key: 'key1', headers: { a: 1 }).to_h). + expect(described_class.new({ v: 'val1' }, key: 'key1', headers: { a: 1 }).to_h). to include(headers: { a: 1 }) - expect(described_class.new({ v: 'val1' }, nil, key: 'key1', headers: { a: 1 }).encoded_hash). + expect(described_class.new({ v: 'val1' }, key: 'key1', headers: { a: 1 }).encoded_hash). to include(headers: { a: 1 }) end end diff --git a/spec/schemas/com/my-namespace/MySchemaWithTitle.avsc b/spec/schemas/com/my-namespace/MySchemaWithTitle.avsc new file mode 100644 index 00000000..27d61308 --- /dev/null +++ b/spec/schemas/com/my-namespace/MySchemaWithTitle.avsc @@ -0,0 +1,22 @@ +{ + "namespace": "com.my-namespace", + "name": "MySchemaWithTitle", + "type": "record", + "doc": "Test schema", + "fields": [ + { + "name": "test_id", + "type": "string", + "doc": "test string" + }, + { + "name": "some_int", + "type": "int", + "doc": "test int" + }, + { + "name": "title", + "type": "string" + } + ] +} diff --git a/spec/snapshots/consumers-no-nest.snap b/spec/snapshots/consumers-no-nest.snap index e7c75ca9..5791229f 100644 --- a/spec/snapshots/consumers-no-nest.snap +++ b/spec/snapshots/consumers-no-nest.snap @@ -950,6 +950,55 @@ module Schemas end +spec/app/lib/schema_classes/my_schema_with_title.rb: +# frozen_string_literal: true + +# This file is autogenerated by Deimos, Do NOT modify +module Schemas + ### Primary Schema Class ### + # Autogenerated Schema for Record at com.my-namespace.MySchemaWithTitle + class MySchemaWithTitle < Deimos::SchemaClass::Record + + ### Attribute Accessors ### + # @return [String] + attr_accessor :test_id + # @return [Integer] + attr_accessor :some_int + # @return [String] + attr_accessor :title + + # @override + def initialize(test_id: nil, + some_int: nil, + title: nil) + super + self.test_id = test_id + self.some_int = some_int + self.title = title + end + + # @override + def schema + 'MySchemaWithTitle' + end + + # @override + def namespace + 'com.my-namespace' + end + + # @override + def as_json(_opts={}) + { + 'test_id' => @test_id, + 'some_int' => @some_int, + 'title' => @title + } + end + end +end + + spec/app/lib/schema_classes/my_schema_with_unique_id.rb: # frozen_string_literal: true diff --git a/spec/snapshots/consumers.snap b/spec/snapshots/consumers.snap index ecd85dc9..9dfcd6ca 100644 --- a/spec/snapshots/consumers.snap +++ b/spec/snapshots/consumers.snap @@ -1010,6 +1010,55 @@ module Schemas end +spec/app/lib/schema_classes/my_schema_with_title.rb: +# frozen_string_literal: true + +# This file is autogenerated by Deimos, Do NOT modify +module Schemas + ### Primary Schema Class ### + # Autogenerated Schema for Record at com.my-namespace.MySchemaWithTitle + class MySchemaWithTitle < Deimos::SchemaClass::Record + + ### Attribute Accessors ### + # @return [String] + attr_accessor :test_id + # @return [Integer] + attr_accessor :some_int + # @return [String] + attr_accessor :title + + # @override + def initialize(test_id: nil, + some_int: nil, + title: nil) + super + self.test_id = test_id + self.some_int = some_int + self.title = title + end + + # @override + def schema + 'MySchemaWithTitle' + end + + # @override + def namespace + 'com.my-namespace' + end + + # @override + def as_json(_opts={}) + { + 'test_id' => @test_id, + 'some_int' => @some_int, + 'title' => @title + } + end + end +end + + spec/app/lib/schema_classes/my_schema_with_unique_id.rb: # frozen_string_literal: true diff --git a/spec/snapshots/consumers_and_producers-no-nest.snap b/spec/snapshots/consumers_and_producers-no-nest.snap index cec26bc1..94525256 100644 --- a/spec/snapshots/consumers_and_producers-no-nest.snap +++ b/spec/snapshots/consumers_and_producers-no-nest.snap @@ -977,6 +977,55 @@ module Schemas end +spec/app/lib/schema_classes/my_schema_with_title.rb: +# frozen_string_literal: true + +# This file is autogenerated by Deimos, Do NOT modify +module Schemas + ### Primary Schema Class ### + # Autogenerated Schema for Record at com.my-namespace.MySchemaWithTitle + class MySchemaWithTitle < Deimos::SchemaClass::Record + + ### Attribute Accessors ### + # @return [String] + attr_accessor :test_id + # @return [Integer] + attr_accessor :some_int + # @return [String] + attr_accessor :title + + # @override + def initialize(test_id: nil, + some_int: nil, + title: nil) + super + self.test_id = test_id + self.some_int = some_int + self.title = title + end + + # @override + def schema + 'MySchemaWithTitle' + end + + # @override + def namespace + 'com.my-namespace' + end + + # @override + def as_json(_opts={}) + { + 'test_id' => @test_id, + 'some_int' => @some_int, + 'title' => @title + } + end + end +end + + spec/app/lib/schema_classes/my_schema_with_unique_id.rb: # frozen_string_literal: true diff --git a/spec/snapshots/consumers_and_producers.snap b/spec/snapshots/consumers_and_producers.snap index 12985cd7..17f6996d 100644 --- a/spec/snapshots/consumers_and_producers.snap +++ b/spec/snapshots/consumers_and_producers.snap @@ -1037,6 +1037,55 @@ module Schemas end +spec/app/lib/schema_classes/my_schema_with_title.rb: +# frozen_string_literal: true + +# This file is autogenerated by Deimos, Do NOT modify +module Schemas + ### Primary Schema Class ### + # Autogenerated Schema for Record at com.my-namespace.MySchemaWithTitle + class MySchemaWithTitle < Deimos::SchemaClass::Record + + ### Attribute Accessors ### + # @return [String] + attr_accessor :test_id + # @return [Integer] + attr_accessor :some_int + # @return [String] + attr_accessor :title + + # @override + def initialize(test_id: nil, + some_int: nil, + title: nil) + super + self.test_id = test_id + self.some_int = some_int + self.title = title + end + + # @override + def schema + 'MySchemaWithTitle' + end + + # @override + def namespace + 'com.my-namespace' + end + + # @override + def as_json(_opts={}) + { + 'test_id' => @test_id, + 'some_int' => @some_int, + 'title' => @title + } + end + end +end + + spec/app/lib/schema_classes/my_schema_with_unique_id.rb: # frozen_string_literal: true diff --git a/spec/snapshots/consumers_circular-no-nest.snap b/spec/snapshots/consumers_circular-no-nest.snap index 1c35c61b..03d35b9b 100644 --- a/spec/snapshots/consumers_circular-no-nest.snap +++ b/spec/snapshots/consumers_circular-no-nest.snap @@ -950,6 +950,55 @@ module Schemas end +spec/app/lib/schema_classes/my_schema_with_title.rb: +# frozen_string_literal: true + +# This file is autogenerated by Deimos, Do NOT modify +module Schemas + ### Primary Schema Class ### + # Autogenerated Schema for Record at com.my-namespace.MySchemaWithTitle + class MySchemaWithTitle < Deimos::SchemaClass::Record + + ### Attribute Accessors ### + # @return [String] + attr_accessor :test_id + # @return [Integer] + attr_accessor :some_int + # @return [String] + attr_accessor :title + + # @override + def initialize(test_id: nil, + some_int: nil, + title: nil) + super + self.test_id = test_id + self.some_int = some_int + self.title = title + end + + # @override + def schema + 'MySchemaWithTitle' + end + + # @override + def namespace + 'com.my-namespace' + end + + # @override + def as_json(_opts={}) + { + 'test_id' => @test_id, + 'some_int' => @some_int, + 'title' => @title + } + end + end +end + + spec/app/lib/schema_classes/my_schema_with_unique_id.rb: # frozen_string_literal: true diff --git a/spec/snapshots/consumers_circular.snap b/spec/snapshots/consumers_circular.snap index fae0ae7f..1e139b70 100644 --- a/spec/snapshots/consumers_circular.snap +++ b/spec/snapshots/consumers_circular.snap @@ -1010,6 +1010,55 @@ module Schemas end +spec/app/lib/schema_classes/my_schema_with_title.rb: +# frozen_string_literal: true + +# This file is autogenerated by Deimos, Do NOT modify +module Schemas + ### Primary Schema Class ### + # Autogenerated Schema for Record at com.my-namespace.MySchemaWithTitle + class MySchemaWithTitle < Deimos::SchemaClass::Record + + ### Attribute Accessors ### + # @return [String] + attr_accessor :test_id + # @return [Integer] + attr_accessor :some_int + # @return [String] + attr_accessor :title + + # @override + def initialize(test_id: nil, + some_int: nil, + title: nil) + super + self.test_id = test_id + self.some_int = some_int + self.title = title + end + + # @override + def schema + 'MySchemaWithTitle' + end + + # @override + def namespace + 'com.my-namespace' + end + + # @override + def as_json(_opts={}) + { + 'test_id' => @test_id, + 'some_int' => @some_int, + 'title' => @title + } + end + end +end + + spec/app/lib/schema_classes/my_schema_with_unique_id.rb: # frozen_string_literal: true diff --git a/spec/snapshots/consumers_complex_types-no-nest.snap b/spec/snapshots/consumers_complex_types-no-nest.snap index a5800f77..7b18b9ed 100644 --- a/spec/snapshots/consumers_complex_types-no-nest.snap +++ b/spec/snapshots/consumers_complex_types-no-nest.snap @@ -950,6 +950,55 @@ module Schemas end +spec/app/lib/schema_classes/my_schema_with_title.rb: +# frozen_string_literal: true + +# This file is autogenerated by Deimos, Do NOT modify +module Schemas + ### Primary Schema Class ### + # Autogenerated Schema for Record at com.my-namespace.MySchemaWithTitle + class MySchemaWithTitle < Deimos::SchemaClass::Record + + ### Attribute Accessors ### + # @return [String] + attr_accessor :test_id + # @return [Integer] + attr_accessor :some_int + # @return [String] + attr_accessor :title + + # @override + def initialize(test_id: nil, + some_int: nil, + title: nil) + super + self.test_id = test_id + self.some_int = some_int + self.title = title + end + + # @override + def schema + 'MySchemaWithTitle' + end + + # @override + def namespace + 'com.my-namespace' + end + + # @override + def as_json(_opts={}) + { + 'test_id' => @test_id, + 'some_int' => @some_int, + 'title' => @title + } + end + end +end + + spec/app/lib/schema_classes/my_schema_with_unique_id.rb: # frozen_string_literal: true diff --git a/spec/snapshots/consumers_complex_types.snap b/spec/snapshots/consumers_complex_types.snap index 34dacd8c..70c8c26c 100644 --- a/spec/snapshots/consumers_complex_types.snap +++ b/spec/snapshots/consumers_complex_types.snap @@ -1010,6 +1010,55 @@ module Schemas end +spec/app/lib/schema_classes/my_schema_with_title.rb: +# frozen_string_literal: true + +# This file is autogenerated by Deimos, Do NOT modify +module Schemas + ### Primary Schema Class ### + # Autogenerated Schema for Record at com.my-namespace.MySchemaWithTitle + class MySchemaWithTitle < Deimos::SchemaClass::Record + + ### Attribute Accessors ### + # @return [String] + attr_accessor :test_id + # @return [Integer] + attr_accessor :some_int + # @return [String] + attr_accessor :title + + # @override + def initialize(test_id: nil, + some_int: nil, + title: nil) + super + self.test_id = test_id + self.some_int = some_int + self.title = title + end + + # @override + def schema + 'MySchemaWithTitle' + end + + # @override + def namespace + 'com.my-namespace' + end + + # @override + def as_json(_opts={}) + { + 'test_id' => @test_id, + 'some_int' => @some_int, + 'title' => @title + } + end + end +end + + spec/app/lib/schema_classes/my_schema_with_unique_id.rb: # frozen_string_literal: true diff --git a/spec/snapshots/consumers_nested-no-nest.snap b/spec/snapshots/consumers_nested-no-nest.snap index c3cf93bc..39086073 100644 --- a/spec/snapshots/consumers_nested-no-nest.snap +++ b/spec/snapshots/consumers_nested-no-nest.snap @@ -950,6 +950,55 @@ module Schemas end +spec/app/lib/schema_classes/my_schema_with_title.rb: +# frozen_string_literal: true + +# This file is autogenerated by Deimos, Do NOT modify +module Schemas + ### Primary Schema Class ### + # Autogenerated Schema for Record at com.my-namespace.MySchemaWithTitle + class MySchemaWithTitle < Deimos::SchemaClass::Record + + ### Attribute Accessors ### + # @return [String] + attr_accessor :test_id + # @return [Integer] + attr_accessor :some_int + # @return [String] + attr_accessor :title + + # @override + def initialize(test_id: nil, + some_int: nil, + title: nil) + super + self.test_id = test_id + self.some_int = some_int + self.title = title + end + + # @override + def schema + 'MySchemaWithTitle' + end + + # @override + def namespace + 'com.my-namespace' + end + + # @override + def as_json(_opts={}) + { + 'test_id' => @test_id, + 'some_int' => @some_int, + 'title' => @title + } + end + end +end + + spec/app/lib/schema_classes/my_schema_with_unique_id.rb: # frozen_string_literal: true diff --git a/spec/snapshots/consumers_nested.snap b/spec/snapshots/consumers_nested.snap index 3d778f33..d7b04d99 100644 --- a/spec/snapshots/consumers_nested.snap +++ b/spec/snapshots/consumers_nested.snap @@ -1010,6 +1010,55 @@ module Schemas end +spec/app/lib/schema_classes/my_schema_with_title.rb: +# frozen_string_literal: true + +# This file is autogenerated by Deimos, Do NOT modify +module Schemas + ### Primary Schema Class ### + # Autogenerated Schema for Record at com.my-namespace.MySchemaWithTitle + class MySchemaWithTitle < Deimos::SchemaClass::Record + + ### Attribute Accessors ### + # @return [String] + attr_accessor :test_id + # @return [Integer] + attr_accessor :some_int + # @return [String] + attr_accessor :title + + # @override + def initialize(test_id: nil, + some_int: nil, + title: nil) + super + self.test_id = test_id + self.some_int = some_int + self.title = title + end + + # @override + def schema + 'MySchemaWithTitle' + end + + # @override + def namespace + 'com.my-namespace' + end + + # @override + def as_json(_opts={}) + { + 'test_id' => @test_id, + 'some_int' => @some_int, + 'title' => @title + } + end + end +end + + spec/app/lib/schema_classes/my_schema_with_unique_id.rb: # frozen_string_literal: true diff --git a/spec/snapshots/namespace_folders.snap b/spec/snapshots/namespace_folders.snap index c547bea3..9d5fa823 100644 --- a/spec/snapshots/namespace_folders.snap +++ b/spec/snapshots/namespace_folders.snap @@ -915,6 +915,55 @@ module Schemas; module Com; module MyNamespace end; end; end +spec/app/lib/schema_classes/schemas/com/my_namespace/my_schema_with_title.rb: +# frozen_string_literal: true + +# This file is autogenerated by Deimos, Do NOT modify +module Schemas; module Com; module MyNamespace + ### Primary Schema Class ### + # Autogenerated Schema for Record at com.my-namespace.MySchemaWithTitle + class MySchemaWithTitle < Deimos::SchemaClass::Record + + ### Attribute Accessors ### + # @return [String] + attr_accessor :test_id + # @return [Integer] + attr_accessor :some_int + # @return [String] + attr_accessor :title + + # @override + def initialize(test_id: nil, + some_int: nil, + title: nil) + super + self.test_id = test_id + self.some_int = some_int + self.title = title + end + + # @override + def schema + 'MySchemaWithTitle' + end + + # @override + def namespace + 'com.my-namespace' + end + + # @override + def as_json(_opts={}) + { + 'test_id' => @test_id, + 'some_int' => @some_int, + 'title' => @title + } + end + end +end; end; end + + spec/app/lib/schema_classes/schemas/com/my_namespace/my_schema_with_unique_id.rb: # frozen_string_literal: true diff --git a/spec/snapshots/namespace_map.snap b/spec/snapshots/namespace_map.snap index 1a1b3a10..53fc5226 100644 --- a/spec/snapshots/namespace_map.snap +++ b/spec/snapshots/namespace_map.snap @@ -959,6 +959,55 @@ module Schemas; module MyNamespace end; end +spec/app/lib/schema_classes/schemas/my_namespace/my_schema_with_title.rb: +# frozen_string_literal: true + +# This file is autogenerated by Deimos, Do NOT modify +module Schemas; module MyNamespace + ### Primary Schema Class ### + # Autogenerated Schema for Record at com.my-namespace.MySchemaWithTitle + class MySchemaWithTitle < Deimos::SchemaClass::Record + + ### Attribute Accessors ### + # @return [String] + attr_accessor :test_id + # @return [Integer] + attr_accessor :some_int + # @return [String] + attr_accessor :title + + # @override + def initialize(test_id: nil, + some_int: nil, + title: nil) + super + self.test_id = test_id + self.some_int = some_int + self.title = title + end + + # @override + def schema + 'MySchemaWithTitle' + end + + # @override + def namespace + 'com.my-namespace' + end + + # @override + def as_json(_opts={}) + { + 'test_id' => @test_id, + 'some_int' => @some_int, + 'title' => @title + } + end + end +end; end + + spec/app/lib/schema_classes/schemas/my_namespace/my_schema_with_unique_id.rb: # frozen_string_literal: true diff --git a/spec/snapshots/producers_with_key-no-nest.snap b/spec/snapshots/producers_with_key-no-nest.snap index 3a6b5771..62752c1b 100644 --- a/spec/snapshots/producers_with_key-no-nest.snap +++ b/spec/snapshots/producers_with_key-no-nest.snap @@ -963,6 +963,55 @@ module Schemas end +spec/app/lib/schema_classes/my_schema_with_title.rb: +# frozen_string_literal: true + +# This file is autogenerated by Deimos, Do NOT modify +module Schemas + ### Primary Schema Class ### + # Autogenerated Schema for Record at com.my-namespace.MySchemaWithTitle + class MySchemaWithTitle < Deimos::SchemaClass::Record + + ### Attribute Accessors ### + # @return [String] + attr_accessor :test_id + # @return [Integer] + attr_accessor :some_int + # @return [String] + attr_accessor :title + + # @override + def initialize(test_id: nil, + some_int: nil, + title: nil) + super + self.test_id = test_id + self.some_int = some_int + self.title = title + end + + # @override + def schema + 'MySchemaWithTitle' + end + + # @override + def namespace + 'com.my-namespace' + end + + # @override + def as_json(_opts={}) + { + 'test_id' => @test_id, + 'some_int' => @some_int, + 'title' => @title + } + end + end +end + + spec/app/lib/schema_classes/my_schema_with_unique_id.rb: # frozen_string_literal: true diff --git a/spec/snapshots/producers_with_key.snap b/spec/snapshots/producers_with_key.snap index 246eda67..e29eddaf 100644 --- a/spec/snapshots/producers_with_key.snap +++ b/spec/snapshots/producers_with_key.snap @@ -1023,6 +1023,55 @@ module Schemas end +spec/app/lib/schema_classes/my_schema_with_title.rb: +# frozen_string_literal: true + +# This file is autogenerated by Deimos, Do NOT modify +module Schemas + ### Primary Schema Class ### + # Autogenerated Schema for Record at com.my-namespace.MySchemaWithTitle + class MySchemaWithTitle < Deimos::SchemaClass::Record + + ### Attribute Accessors ### + # @return [String] + attr_accessor :test_id + # @return [Integer] + attr_accessor :some_int + # @return [String] + attr_accessor :title + + # @override + def initialize(test_id: nil, + some_int: nil, + title: nil) + super + self.test_id = test_id + self.some_int = some_int + self.title = title + end + + # @override + def schema + 'MySchemaWithTitle' + end + + # @override + def namespace + 'com.my-namespace' + end + + # @override + def as_json(_opts={}) + { + 'test_id' => @test_id, + 'some_int' => @some_int, + 'title' => @title + } + end + end +end + + spec/app/lib/schema_classes/my_schema_with_unique_id.rb: # frozen_string_literal: true From 190bd8d0b2d94639a9ecd378c8bbeefc21474e06 Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 14:41:42 -0400 Subject: [PATCH 15/18] Producer and remaining specs --- spec/active_record_producer_spec.rb | 62 +-- spec/backends/{db_spec.rb => outbox_spec.rb} | 34 +- spec/kafka_listener_spec.rb | 55 --- spec/kafka_source_spec.rb | 71 +--- spec/producer_spec.rb | 399 +++++------------- spec/schema_backends/avro_validation_spec.rb | 2 +- spec/utils/db_poller_spec.rb | 88 ++-- ...oducer_spec.rb => outbox_producer_spec.rb} | 201 +-------- 8 files changed, 248 insertions(+), 664 deletions(-) rename spec/backends/{db_spec.rb => outbox_spec.rb} (59%) delete mode 100644 spec/kafka_listener_spec.rb rename spec/utils/{db_producer_spec.rb => outbox_producer_spec.rb} (66%) diff --git a/spec/active_record_producer_spec.rb b/spec/active_record_producer_spec.rb index ae732c72..5e430b13 100644 --- a/spec/active_record_producer_spec.rb +++ b/spec/active_record_producer_spec.rb @@ -5,28 +5,13 @@ include_context 'with widgets' prepend_before(:each) do - - producer_class = Class.new(Deimos::ActiveRecordProducer) do - schema 'MySchema' - namespace 'com.my-namespace' - topic 'my-topic' - key_config none: true - end + producer_class = Class.new(Deimos::ActiveRecordProducer) stub_const('MyProducer', producer_class) - producer_class = Class.new(Deimos::ActiveRecordProducer) do - schema 'MySchemaWithBooleans' - namespace 'com.my-namespace' - topic 'my-topic-with-boolean' - key_config none: true - end + producer_class = Class.new(Deimos::ActiveRecordProducer) stub_const('MyBooleanProducer', producer_class) producer_class = Class.new(Deimos::ActiveRecordProducer) do - schema 'MySchemaWithId' - namespace 'com.my-namespace' - topic 'my-topic-with-id' - key_config none: true record_class Widget # :nodoc: @@ -38,19 +23,11 @@ def self.generate_payload(attrs, widget) stub_const('MyProducerWithID', producer_class) producer_class = Class.new(Deimos::ActiveRecordProducer) do - schema 'MySchemaWithUniqueId' - namespace 'com.my-namespace' - topic 'my-topic-with-unique-id' - key_config field: :id record_class Widget end stub_const('MyProducerWithUniqueID', producer_class) producer_class = Class.new(Deimos::ActiveRecordProducer) do - schema 'MySchemaWithUniqueId' - namespace 'com.my-namespace' - topic 'my-topic-with-unique-id' - key_config field: :id record_class Widget # :nodoc: @@ -63,6 +40,39 @@ def self.post_process(batch) end stub_const('MyProducerWithPostProcess', producer_class) + Karafka::App.routes.redraw do + topic 'my-topic' do + schema 'MySchema' + namespace 'com.my-namespace' + key_config none: true + producer_class MyProducer + end + topic 'my-topic-with-boolean' do + producer_class MyBooleanProducer + schema 'MySchemaWithBooleans' + namespace 'com.my-namespace' + key_config none: true + end + topic 'my-topic-with-id' do + schema 'MySchemaWithId' + namespace 'com.my-namespace' + key_config none: true + producer_class MyProducerWithID + end + topic 'my-topic-with-unique-id' do + schema 'MySchemaWithUniqueId' + namespace 'com.my-namespace' + key_config field: :id + producer_class MyProducerWithUniqueID + end + topic 'my-topic-with-post-process' do + schema 'MySchemaWithUniqueId' + namespace 'com.my-namespace' + key_config none: true + producer_class MyProducerWithPostProcess + end + end + end describe 'produce' do @@ -117,7 +127,7 @@ def self.post_process(batch) end specify '#watched_attributes' do - expect(MyProducer.watched_attributes).to eq(%w(test_id some_int)) + expect(MyProducer.watched_attributes(nil)).to eq(%w(test_id some_int)) end end diff --git a/spec/backends/db_spec.rb b/spec/backends/outbox_spec.rb similarity index 59% rename from spec/backends/db_spec.rb rename to spec/backends/outbox_spec.rb index fa961f59..1b6645b2 100644 --- a/spec/backends/db_spec.rb +++ b/spec/backends/outbox_spec.rb @@ -1,31 +1,31 @@ # frozen_string_literal: true -each_db_config(Deimos::Backends::Db) do +each_db_config(Deimos::Backends::Outbox) do include_context 'with publish_backend' it 'should save to the database' do - expect(Deimos.config.metrics).to receive(:increment).with( - 'db_producer.insert', - tags: %w(topic:my-topic), - by: 3 - ) + # expect(Deimos.config.metrics).to receive(:increment).with( + # 'outbox.insert', + # tags: %w(topic:my-topic), + # by: 3 + # ) described_class.publish(producer_class: MyProducer, messages: messages) records = Deimos::KafkaMessage.all expect(records.size).to eq(3) expect(records[0].attributes.to_h).to include( - 'message' => '{"foo"=>1}', + 'message' => '{"test_id":"foo1","some_int":1}', 'topic' => 'my-topic', - 'key' => 'foo1' + 'key' => '{"test_id":"foo1"}' ) expect(records[1].attributes.to_h).to include( - 'message' => '{"foo"=>2}', + 'message' => '{"test_id":"foo2","some_int":2}', 'topic' => 'my-topic', - 'key' => 'foo2' + 'key' => '{"test_id":"foo2"}' ) expect(records[2].attributes.to_h).to include( - 'message' => '{"foo"=>3}', + 'message' => '{"test_id":"foo3","some_int":3}', 'topic' => 'my-topic', - 'key' => 'foo3' + 'key' => '{"test_id":"foo3"}' ) end @@ -37,18 +37,12 @@ end it 'should add to non-keyed messages' do + orig_messages = messages.deep_dup described_class.publish(producer_class: MyNoKeyProducer, messages: messages) expect(Deimos::KafkaMessage.count).to eq(3) described_class.publish(producer_class: MyNoKeyProducer, - messages: [messages.first]) + messages: [orig_messages.first]) expect(Deimos::KafkaMessage.count).to eq(4) end - - it 'should add messages with Hash keys with JSON encoding' do - described_class.publish(producer_class: MyProducer, - messages: [build_message({ foo: 0 }, 'my-topic', { 'test_id' => 0 })]) - expect(Deimos::KafkaMessage.count).to eq(1) - expect(Deimos::KafkaMessage.last.partition_key).to eq(%(---\ntest_id: 0\n)) - end end diff --git a/spec/kafka_listener_spec.rb b/spec/kafka_listener_spec.rb deleted file mode 100644 index 56ab8b4d..00000000 --- a/spec/kafka_listener_spec.rb +++ /dev/null @@ -1,55 +0,0 @@ -# frozen_string_literal: true - -describe Deimos::KafkaListener do - include_context 'with widgets' - - prepend_before(:each) do - producer_class = Class.new(Deimos::Producer) do - schema 'MySchema' - namespace 'com.my-namespace' - topic 'my-topic' - key_config none: true - end - stub_const('MyProducer', producer_class) - end - - before(:each) do - Deimos.configure do |c| - c.producers.backend = :kafka - c.schema.backend = :avro_local - end - allow_any_instance_of(Kafka::Cluster).to receive(:add_target_topics) - allow_any_instance_of(Kafka::Cluster).to receive(:partitions_for). - and_raise(Kafka::Error) - end - - describe '.send_produce_error' do - let(:payloads) do - [{ 'test_id' => 'foo', 'some_int' => 123 }, - { 'test_id' => 'bar', 'some_int' => 124 }] - end - - it 'should listen to publishing errors and republish as Deimos events' do - allow(Deimos::Producer).to receive(:descendants).and_return([MyProducer]) - Deimos.subscribe('produce_error') do |event| - expect(event.payload).to include( - producer: MyProducer, - topic: 'my-topic', - payloads: payloads - ) - end - expect(Deimos.config.metrics).to receive(:increment). - with('publish_error', tags: %w(topic:my-topic), by: 2) - expect { MyProducer.publish_list(payloads) }.to raise_error(Kafka::DeliveryFailed) - end - - it 'should not send any notifications when producer is not found' do - Deimos.subscribe('produce_error') do |_| - raise 'OH NOES' - end - allow(Deimos::Producer).to receive(:descendants).and_return([]) - expect(Deimos.config.metrics).not_to receive(:increment).with('publish_error', anything) - expect { MyProducer.publish_list(payloads) }.to raise_error(Kafka::DeliveryFailed) - end - end -end diff --git a/spec/kafka_source_spec.rb b/spec/kafka_source_spec.rb index 52f35eaa..d8c364a3 100644 --- a/spec/kafka_source_spec.rb +++ b/spec/kafka_source_spec.rb @@ -17,18 +17,10 @@ module KafkaSourceSpec # Dummy producer which mimicks the behavior of a real producer class WidgetProducer < Deimos::ActiveRecordProducer - topic 'my-topic' - namespace 'com.my-namespace' - schema 'Widget' - key_config field: :id end # Dummy producer which mimicks the behavior of a real producer class WidgetProducerTheSecond < Deimos::ActiveRecordProducer - topic 'my-topic-the-second' - namespace 'com.my-namespace' - schema 'WidgetTheSecond' - key_config field: :id end # Dummy class we can include the mixin in. Has a backing table created @@ -51,6 +43,22 @@ def self.kafka_producers before(:each) do Widget.delete_all + Karafka::App.routes.redraw do + topic 'my-topic' do + namespace 'com.my-namespace' + schema 'Widget' + key_config field: :id + producer_class WidgetProducer + end + + topic 'my-topic-the-second' do + namespace 'com.my-namespace' + schema 'WidgetTheSecond' + key_config field: :id + producer_class WidgetProducerTheSecond + end + + end end it 'should send events on creation, update, and deletion' do @@ -206,10 +214,9 @@ def self.kafka_producers context 'with DB backend' do before(:each) do Deimos.configure do |config| - config.producers.backend = :db + config.producers.backend = :outbox end setup_db(DB_OPTIONS.last) # sqlite - allow(Deimos::Producer).to receive(:produce_batch).and_call_original end it 'should save to the DB' do @@ -309,46 +316,6 @@ def self.kafka_producers end end - context 'with AR models that implement the kafka_producer interface' do - before(:each) do - # Dummy class we can include the mixin in. Has a backing table created - # earlier and has the import hook disabled - deprecated_class = Class.new(ActiveRecord::Base) do - include Deimos::KafkaSource - self.table_name = 'widgets' - - # :nodoc: - def self.kafka_config - { - update: true, - delete: true, - import: false, - create: true - } - end - - # :nodoc: - def self.kafka_producer - WidgetProducer - end - end - stub_const('WidgetDeprecated', deprecated_class) - WidgetDeprecated.reset_column_information - end - - it 'logs a warning and sends the message as usual' do - expect(Deimos.config.logger).to receive(:warn).with({ message: WidgetDeprecated::DEPRECATION_WARNING }) - widget = WidgetDeprecated.create(widget_id: 1, name: 'Widget 1') - expect('my-topic').to have_sent({ - widget_id: 1, - name: 'Widget 1', - id: widget.id, - created_at: anything, - updated_at: anything - }, widget.id) - end - end - context 'with AR models that do not implement any producer interface' do before(:each) do # Dummy class we can include the mixin in. Has a backing table created @@ -371,10 +338,10 @@ def self.kafka_config WidgetBuggy.reset_column_information end - it 'raises a NotImplementedError exception' do + it 'raises a MissingImplementationError exception' do expect { WidgetBuggy.create(widget_id: 1, name: 'Widget 1') - }.to raise_error(NotImplementedError) + }.to raise_error(Deimos::MissingImplementationError) end end end diff --git a/spec/producer_spec.rb b/spec/producer_spec.rb index a841d3be..b2f0909a 100644 --- a/spec/producer_spec.rb +++ b/spec/producer_spec.rb @@ -5,27 +5,13 @@ module ProducerTest describe Deimos::Producer do prepend_before(:each) do - producer_class = Class.new(Deimos::Producer) do - schema 'MySchema' - namespace 'com.my-namespace' - topic 'my-topic' - key_config field: 'test_id' - end + producer_class = Class.new(Deimos::Producer) stub_const('MyProducer', producer_class) - producer_class = Class.new(Deimos::Producer) do - schema 'MySchemaWithId' - namespace 'com.my-namespace' - topic 'my-topic' - key_config plain: true - end + producer_class = Class.new(Deimos::Producer) stub_const('MyProducerWithID', producer_class) producer_class = Class.new(Deimos::Producer) do - schema 'MySchema' - namespace 'com.my-namespace' - topic 'my-topic' - key_config plain: true # :nodoc: def self.partition_key(payload) payload[:payload_key] ? payload[:payload_key] + '1' : nil @@ -33,79 +19,90 @@ def self.partition_key(payload) end stub_const('MyNonEncodedProducer', producer_class) - producer_class = Class.new(Deimos::Producer) do - schema 'MySchema' - namespace 'com.my-namespace' - topic 'my-topic2' - key_config none: true - end + producer_class = Class.new(Deimos::Producer) stub_const('MyNoKeyProducer', producer_class) - producer_class = Class.new(Deimos::Producer) do - schema 'MyNestedSchema' - namespace 'com.my-namespace' - topic 'my-topic' - key_config field: 'test_id' - end + producer_class = Class.new(Deimos::Producer) stub_const('MyNestedSchemaProducer', producer_class) - producer_class = Class.new(Deimos::Producer) do - schema 'MySchema' - namespace 'com.my-namespace' - topic 'my-topic2' - key_config schema: 'MySchema_key' - end + producer_class = Class.new(Deimos::Producer) stub_const('MySchemaProducer', producer_class) - producer_class = Class.new(Deimos::Producer) do - schema 'MySchema' - namespace 'com.my-namespace' - topic 'my-topic' - end + producer_class = Class.new(Deimos::Producer) stub_const('MyErrorProducer', producer_class) - producer_class = Class.new(Deimos::Producer) do - schema 'MySchema' - namespace 'com.my-namespace' - topic nil - key_config none: true + Karafka::App.routes.redraw do + topic 'my-topic' do + producer_class MyProducer + schema 'MySchema' + namespace 'com.my-namespace' + key_config field: 'test_id' + end + topic 'a-new-topic' do + producer_class MyProducer + schema 'MySchema' + namespace 'com.my-namespace' + key_config field: 'test_id' + end + topic 'my-topic-with-id' do + producer_class MyProducerWithID + schema 'MySchemaWithId' + namespace 'com.my-namespace' + key_config plain: true + end + topic 'my-topic-non-encoded' do + producer_class MyNonEncodedProducer + schema 'MySchema' + namespace 'com.my-namespace' + key_config plain: true + end + topic 'my-topic-no-key' do + producer_class MyNoKeyProducer + schema 'MySchema' + namespace 'com.my-namespace' + key_config none: true + end + topic 'my-topic-nested-schema' do + producer_class MyNestedSchemaProducer + schema 'MyNestedSchema' + namespace 'com.my-namespace' + key_config field: 'test_id' + end + topic 'my-topic-schema' do + producer_class MySchemaProducer + schema 'MySchema' + namespace 'com.my-namespace' + key_config schema: 'MySchema_key' + end + topic 'my-topic-error' do + schema 'MySchema' + namespace 'com.my-namespace' + producer_class MyErrorProducer + end end - stub_const('MyNoTopicProducer', producer_class) - producer_class = Class.new(Deimos::Producer) do - schema 'MySchema' - namespace 'com.my-namespace' - topic 'my-topic' - key_config field: 'test_id' - max_batch_size 1 - end - stub_const('MySmallBatchProducer', producer_class) end - it 'should fail on invalid message with error handler' do - subscriber = Deimos.subscribe('produce') do |event| - expect(event.payload[:payloads]).to eq([{ 'invalid' => 'key' }]) - end - expect(MyProducer.encoder).to receive(:validate).and_raise('OH NOES') + it 'should fail on invalid message' do + expect(Deimos::ProducerMiddleware).to receive(:call).and_raise('OH NOES') expect { MyProducer.publish({ 'invalid' => 'key', :payload_key => 'key' }) }. to raise_error('OH NOES') - Deimos.unsubscribe(subscriber) end it 'should produce a message' do - expect(described_class).to receive(:produce_batch).once.with( - Deimos::Backends::Test, + expect(MyProducer).to receive(:produce_batch).once.with( + Deimos::Backends::Kafka, [ - Deimos::Message.new({ 'test_id' => 'foo', 'some_int' => 123 }, - MyProducer, - topic: 'my-topic', - partition_key: 'foo', - key: 'foo'), - Deimos::Message.new({ 'test_id' => 'bar', 'some_int' => 124 }, - MyProducer, - topic: 'my-topic', - partition_key: 'bar', - key: 'bar') + hash_including({ + payload: { 'test_id' => 'foo', 'some_int' => 123 }, + topic: 'my-topic', + partition_key: nil + }), + hash_including({ + payload: { 'test_id' => 'bar', 'some_int' => 124 }, + topic: 'my-topic', + partition_key: nil + }) ] ).and_call_original @@ -113,27 +110,27 @@ def self.partition_key(payload) [{ 'test_id' => 'foo', 'some_int' => 123 }, { 'test_id' => 'bar', 'some_int' => 124 }] ) - expect('my-topic').to have_sent('test_id' => 'foo', 'some_int' => 123) + expect('my-topic').to have_sent({'test_id' => 'foo', 'some_int' => 123}, 'foo', 'foo') expect('your-topic').not_to have_sent('test_id' => 'foo', 'some_int' => 123) expect('my-topic').not_to have_sent('test_id' => 'foo2', 'some_int' => 123) end it 'should allow setting the topic and headers from publish_list' do - expect(described_class).to receive(:produce_batch).once.with( - Deimos::Backends::Test, + expect(MyProducer).to receive(:produce_batch).once.with( + Deimos::Backends::Kafka, [ - Deimos::Message.new({ 'test_id' => 'foo', 'some_int' => 123 }, - MyProducer, - topic: 'a-new-topic', - headers: { 'foo' => 'bar' }, - partition_key: 'foo', - key: 'foo'), - Deimos::Message.new({ 'test_id' => 'bar', 'some_int' => 124 }, - MyProducer, - topic: 'a-new-topic', - headers: { 'foo' => 'bar' }, - partition_key: 'bar', - key: 'bar') + hash_including({ + payload: { 'test_id' => 'foo', 'some_int' => 123 }, + topic: 'a-new-topic', + headers: { 'foo' => 'bar' }, + partition_key: nil + }), + hash_including({ + payload: { 'test_id' => 'bar', 'some_int' => 124 }, + topic: 'a-new-topic', + headers: { 'foo' => 'bar' }, + partition_key: nil + }) ] ).and_call_original @@ -153,27 +150,14 @@ def self.partition_key(payload) 'some_int' => 123, 'message_id' => a_kind_of(String), 'timestamp' => a_kind_of(String) } - expect(described_class).to receive(:produce_batch).once do |_, messages| - expect(messages.size).to eq(1) - expect(messages[0].to_h). - to match( - payload: payload, - topic: 'my-topic', - partition_key: 'key', - metadata: { - producer_name: 'MyProducerWithID', - decoded_payload: payload - }, - key: 'key' - ) - end MyProducerWithID.publish_list( [{ 'test_id' => 'foo', 'some_int' => 123, :payload_key => 'key' }] ) + expect(MyProducerWithID.topic).to have_sent(payload, 'key', 'key') + end it 'should not publish if publish disabled' do - expect(described_class).not_to receive(:produce_batch) Deimos.configure { |c| c.producers.disabled = true } MyProducer.publish_list( [{ 'test_id' => 'foo', 'some_int' => 123 }, @@ -209,102 +193,22 @@ def self.partition_key(payload) it 'should produce to a prefixed topic' do Deimos.configure { |c| c.producers.topic_prefix = 'prefix.' } payload = { 'test_id' => 'foo', 'some_int' => 123 } - expect(described_class).to receive(:produce_batch).once do |_, messages| - expect(messages.size).to eq(1) - expect(messages[0].to_h). - to eq( - payload: payload, - topic: 'prefix.my-topic', - partition_key: 'foo', - metadata: { - producer_name: 'MyProducer', - decoded_payload: payload - }, - key: 'foo' - ) - end MyProducer.publish_list([payload]) - Deimos.configure { |c| c.producers.topic_prefix = nil } - expect(described_class).to receive(:produce_batch).once do |_, messages| - expect(messages.size).to eq(1) - expect(messages[0].to_h). - to eq( - payload: payload, - topic: 'my-topic', - partition_key: 'foo', - metadata: { - producer_name: 'MyProducer', - decoded_payload: payload - }, - key: 'foo' - ) - end + expect('prefix.my-topic').to have_sent(payload, 'foo', 'foo') + expect(karafka.produced_messages.size).to eq(1) + karafka.produced_messages.clear - MyProducer.publish_list( - [{ 'test_id' => 'foo', 'some_int' => 123 }] - ) - end - - it 'should encode the key' do Deimos.configure { |c| c.producers.topic_prefix = nil } - expect(MyProducer.encoder).to receive(:encode_key).with('test_id', 'foo', topic: 'my-topic-key') - expect(MyProducer.encoder).to receive(:encode_key).with('test_id', 'bar', topic: 'my-topic-key') - expect(MyProducer.encoder).to receive(:encode).with({ - 'test_id' => 'foo', - 'some_int' => 123 - }, { topic: 'my-topic-value' }) - expect(MyProducer.encoder).to receive(:encode).with({ - 'test_id' => 'bar', - 'some_int' => 124 - }, { topic: 'my-topic-value' }) MyProducer.publish_list( - [{ 'test_id' => 'foo', 'some_int' => 123 }, - { 'test_id' => 'bar', 'some_int' => 124 }] - ) - end - - it 'should encode the key with topic prefix' do - Deimos.configure { |c| c.producers.topic_prefix = 'prefix.' } - expect(MyProducer.encoder).to receive(:encode_key).with('test_id', 'foo', topic: 'prefix.my-topic-key') - expect(MyProducer.encoder).to receive(:encode_key).with('test_id', 'bar', topic: 'prefix.my-topic-key') - expect(MyProducer.encoder).to receive(:encode).with({ 'test_id' => 'foo', - 'some_int' => 123 }, - { topic: 'prefix.my-topic-value' }) - expect(MyProducer.encoder).to receive(:encode).with({ 'test_id' => 'bar', - 'some_int' => 124 }, - { topic: 'prefix.my-topic-value' }) - - MyProducer.publish_list([{ 'test_id' => 'foo', 'some_int' => 123 }, - { 'test_id' => 'bar', 'some_int' => 124 }]) - end - - it 'should not encode with plaintext key' do - expect(MyNonEncodedProducer.key_encoder).not_to receive(:encode_key) - - MyNonEncodedProducer.publish_list( - [{ 'test_id' => 'foo', 'some_int' => 123, :payload_key => 'foo_key' }, - { 'test_id' => 'bar', 'some_int' => 124, :payload_key => 'bar_key' }] - ) - end - - it 'should encode with a schema' do - expect(MySchemaProducer.key_encoder).to receive(:encode).with({ 'test_id' => 'foo_key' }, - { topic: 'my-topic2-key' }) - expect(MySchemaProducer.key_encoder).to receive(:encode).with({ 'test_id' => 'bar_key' }, - { topic: 'my-topic2-key' }) - - MySchemaProducer.publish_list( - [{ 'test_id' => 'foo', 'some_int' => 123, - :payload_key => { 'test_id' => 'foo_key' } }, - { 'test_id' => 'bar', 'some_int' => 124, - :payload_key => { 'test_id' => 'bar_key' } }] + [{ 'test_id' => 'foo', 'some_int' => 123 }] ) + expect('my-topic').to have_sent(payload, 'foo', 'foo') + expect(karafka.produced_messages.size).to eq(1) end it 'should properly encode and coerce values with a nested record' do - expect(MyNestedSchemaProducer.encoder).to receive(:encode_key).with('test_id', 'foo', topic: 'my-topic-key') MyNestedSchemaProducer.publish({ 'test_id' => 'foo', 'test_float' => BigDecimal('123.456'), @@ -331,39 +235,6 @@ def self.partition_key(payload) ) end - it 'should raise error if blank topic is passed in explicitly' do - expect { - MyProducer.publish_list( - [{ 'test_id' => 'foo', - 'some_int' => 123 }, - { 'test_id' => 'bar', - 'some_int' => 124 }], - topic: '' - ) - }.to raise_error(RuntimeError, - 'Topic not specified. Please specify the topic.') - end - - it 'should raise error if the producer has not been initialized with a topic' do - expect { - MyNoTopicProducer.publish_list( - [{ 'test_id' => 'foo', - 'some_int' => 123 }, - { 'test_id' => 'bar', - 'some_int' => 124 }] - ) - }.to raise_error(RuntimeError, - 'Topic not specified. Please specify the topic.') - end - - it 'should error with nothing set' do - expect { - MyErrorProducer.publish_list( - [{ 'test_id' => 'foo', 'some_int' => 123, :payload_key => '123' }] - ) - }.to raise_error('No key config given - if you are not encoding keys, please use `key_config plain: true`') - end - it 'should error if no key given and none is not the config' do expect { MyNonEncodedProducer.publish_list( @@ -403,30 +274,25 @@ def self.partition_key(payload) context 'with Schema Class payloads' do it 'should fail on invalid message with error handler' do - subscriber = Deimos.subscribe('produce') do |event| - expect(event.payload[:payloads]).to eq([{ 'invalid' => 'key' }]) - end - expect(MyProducer.encoder).to receive(:validate).and_raise('OH NOES') - expect { - MyProducer.publish(Schemas::MyNamespace::MySchema.new(test_id: 'foo', some_int: 'invalid')) - }.to raise_error('OH NOES') - Deimos.unsubscribe(subscriber) + expect(Deimos::ProducerMiddleware).to receive(:call).and_raise('OH NOES') + expect { MyProducer.publish(Schemas::MyNamespace::MySchema.new(test_id: 'foo', some_int: 'invalid')) }. + to raise_error('OH NOES') end it 'should produce a message' do - expect(described_class).to receive(:produce_batch).once.with( - Deimos::Backends::Test, + expect(MyProducer).to receive(:produce_batch).once.with( + Deimos::Backends::Kafka, [ - Deimos::Message.new({ 'test_id' => 'foo', 'some_int' => 123 }, - MyProducer, - topic: 'my-topic', - partition_key: 'foo', - key: 'foo'), - Deimos::Message.new({ 'test_id' => 'bar', 'some_int' => 124 }, - MyProducer, - topic: 'my-topic', - partition_key: 'bar', - key: 'bar') + hash_including({ + payload: { 'test_id' => 'foo', 'some_int' => 123, 'payload_key' => nil }, + topic: 'my-topic', + partition_key: nil, + }), + hash_including({ + payload: { 'test_id' => 'bar', 'some_int' => 124, 'payload_key' => nil }, + topic: 'my-topic', + partition_key: nil + }) ] ).and_call_original @@ -440,7 +306,7 @@ def self.partition_key(payload) end it 'should not publish if publish disabled' do - expect(described_class).not_to receive(:produce_batch) + expect(MyProducer).not_to receive(:produce_batch) Deimos.configure { |c| c.producers.disabled = true } MyProducer.publish_list( [Schemas::MyNamespace::MySchema.new(test_id: 'foo', some_int: 123), @@ -451,16 +317,6 @@ def self.partition_key(payload) it 'should encode the key' do Deimos.configure { |c| c.producers.topic_prefix = nil } - expect(MyProducer.encoder).to receive(:encode_key).with('test_id', 'foo', topic: 'my-topic-key') - expect(MyProducer.encoder).to receive(:encode_key).with('test_id', 'bar', topic: 'my-topic-key') - expect(MyProducer.encoder).to receive(:encode).with({ - 'test_id' => 'foo', - 'some_int' => 123 - }, { topic: 'my-topic-value' }) - expect(MyProducer.encoder).to receive(:encode).with({ - 'test_id' => 'bar', - 'some_int' => 124 - }, { topic: 'my-topic-value' }) MyProducer.publish_list( [Schemas::MyNamespace::MySchema.new(test_id: 'foo', some_int: 123), @@ -469,11 +325,6 @@ def self.partition_key(payload) end it 'should encode with a schema' do - expect(MySchemaProducer.key_encoder).to receive(:encode).with({ 'test_id' => 'foo_key' }, - { topic: 'my-topic2-key' }) - expect(MySchemaProducer.key_encoder).to receive(:encode).with({ 'test_id' => 'bar_key' }, - { topic: 'my-topic2-key' }) - MySchemaProducer.publish_list( [Schemas::MyNamespace::MySchema.new(test_id: 'foo', some_int: 123, payload_key: { 'test_id' => 'foo_key' }), Schemas::MyNamespace::MySchema.new(test_id: 'bar', some_int: 124, payload_key: { 'test_id' => 'bar_key' })] @@ -481,7 +332,6 @@ def self.partition_key(payload) end it 'should properly encode and coerce values with a nested record' do - expect(MyNestedSchemaProducer.encoder).to receive(:encode_key).with('test_id', 'foo', topic: 'my-topic-key') MyNestedSchemaProducer.publish( Schemas::MyNamespace::MyNestedSchema.new( test_id: 'foo', @@ -532,7 +382,7 @@ def self.partition_key(payload) MyProducerWithID.publish({ 'test_id' => 'foo', 'some_int' => 123, :payload_key => 123 }) - expect('my-topic'). + expect('my-topic-with-id'). to have_sent('test_id' => 'foo', 'some_int' => 123, 'message_id' => anything, 'timestamp' => anything) expect(Deimos).not_to be_producers_disabled @@ -552,7 +402,7 @@ def self.partition_key(payload) :payload_key => '123' }) expect('my-topic').not_to have_sent(anything) - expect('my-topic2').to have_sent('test_id' => 'foo', 'some_int' => 123) + expect('my-topic-schema').to have_sent('test_id' => 'foo', 'some_int' => 123) expect(Deimos).not_to be_producers_disabled expect(Deimos).to be_producers_disabled(MyProducer) expect(Deimos).not_to be_producers_disabled(MySchemaProducer) @@ -590,11 +440,11 @@ def self.partition_key(payload) end it 'should return db if db is set' do - Deimos.configure { producers.backend = :db } + Deimos.configure { producers.backend = :outbox } expect(described_class.determine_backend_class(true, false)). - to eq(Deimos::Backends::Db) + to eq(Deimos::Backends::Outbox) expect(described_class.determine_backend_class(false, false)). - to eq(Deimos::Backends::Db) + to eq(Deimos::Backends::Outbox) end it 'should return kafka if force_send is true' do @@ -615,30 +465,5 @@ def self.partition_key(payload) end end - describe "max_batch_size" do - it 'should use top-level default value if max_batch_size is not defined by the producer' do - expect(MyProducer.config[:max_batch_size]).to eq(500) - end - - it 'should call produce_batch multiple times when max_batch_size < records size' do - Deimos::Message.new({ 'test_id' => 'foo', 'some_int' => 123 }, - MySmallBatchProducer, - topic: 'my-topic', - partition_key: 'foo', - key: 'foo') - Deimos::Message.new({ 'test_id' => 'bar', 'some_int' => 124 }, - MySmallBatchProducer, - topic: 'my-topic', - partition_key: 'bar', - key: 'bar') - expect(described_class).to receive(:produce_batch).twice - - MySmallBatchProducer.publish_list( - [{ 'test_id' => 'foo', 'some_int' => 123 }, - { 'test_id' => 'bar', 'some_int' => 124 }] - ) - end - end - end end diff --git a/spec/schema_backends/avro_validation_spec.rb b/spec/schema_backends/avro_validation_spec.rb index 586ab602..d1988404 100644 --- a/spec/schema_backends/avro_validation_spec.rb +++ b/spec/schema_backends/avro_validation_spec.rb @@ -16,7 +16,7 @@ it 'should encode and decode correctly' do results = backend.encode(payload) - expect(results).to eq(payload) + expect(results).to eq(payload.to_json) results = backend.decode(results) expect(results).to eq(payload) end diff --git a/spec/utils/db_poller_spec.rb b/spec/utils/db_poller_spec.rb index 5f5b2dc3..b4cb8c3f 100644 --- a/spec/utils/db_poller_spec.rb +++ b/spec/utils/db_poller_spec.rb @@ -20,21 +20,27 @@ def time_value(secs: 0, mins: 0) describe '#start!' do before(:each) do - producer_class = Class.new(Deimos::Producer) do - schema 'MySchema' - namespace 'com.my-namespace' - topic 'my-topic' - key_config field: 'test_id' - end + producer_class = Class.new(Deimos::Producer) stub_const('MyProducer', producer_class) - producer_class = Class.new(Deimos::Producer) do - schema 'MySchemaWithId' - namespace 'com.my-namespace' - topic 'my-topic' - key_config plain: true - end + producer_class = Class.new(Deimos::Producer) stub_const('MyProducerWithID', producer_class) + + Karafka::App.routes.redraw do + topic 'my-topic' do + schema 'MySchema' + namespace 'com.my-namespace' + key_config field: 'test_id' + producer_class MyProducer + end + topic 'my-topic-with-id' do + schema 'MySchemaWithId' + namespace 'com.my-namespace' + key_config plain: true + producer_class MyProducerWithID + end + end + end it 'should raise an error if no pollers configured' do @@ -76,12 +82,7 @@ def time_value(secs: 0, mins: 0) let(:config) { Deimos.config.db_poller_objects.first.dup } before(:each) do - Widget.delete_all producer_class = Class.new(Deimos::ActiveRecordProducer) do - schema 'MySchemaWithId' - namespace 'com.my-namespace' - topic 'my-topic-with-id' - key_config none: true record_class Widget # :nodoc: @@ -91,6 +92,16 @@ def self.generate_payload(attrs, widget) end stub_const('MyProducer', producer_class) + Widget.delete_all + Karafka::App.routes.redraw do + topic 'my-topic-with-id' do + schema 'MySchemaWithId' + namespace 'com.my-namespace' + key_config none: true + producer_class MyProducer + end + end + Deimos.configure do db_poller do producer_class 'MyProducer' @@ -195,7 +206,9 @@ def self.generate_payload(attrs, widget) before(:each) { config.skip_too_large_messages = true } it 'should skip and move on' do - error = Kafka::MessageSizeTooLarge.new('OH NOES') + rdkafka_error = instance_double(Rdkafka::RdkafkaError, code: :msg_size_too_large) + error = WaterDrop::Errors::ProduceManyError.new(nil, nil) + allow(error).to receive(:cause).and_return(rdkafka_error) allow(poller).to receive(:sleep) allow(poller).to receive(:process_batch) do raise error @@ -331,7 +344,7 @@ def self.generate_payload(attrs, widget) end it 'should send events across multiple batches' do - allow(Deimos.config.logger).to receive(:info) + allow(Deimos::Logging).to receive(:log_info) allow(MyProducer).to receive(:poll_query).and_call_original expect(poller).to receive(:process_and_touch_info).ordered. with([widgets[0], widgets[1], widgets[2]], anything).and_call_original @@ -376,7 +389,7 @@ def self.generate_payload(attrs, widget) time_to: time_value(secs: 120), # yes this is weird but it's because of travel_to column_name: :updated_at, min_id: last_widget.id) - expect(Deimos.config.logger).to have_received(:info). + expect(Deimos::Logging).to have_received(:log_info). with('Poll MyProducer: ["my-topic-with-id"] complete at 2015-05-05 00:59:58 -0400 (3 batches, 0 errored batches, 7 processed messages)') end @@ -398,7 +411,7 @@ def self.generate_payload(attrs, widget) describe 'errors' do before(:each) do poller.config.retries = 0 - allow(Deimos.config.logger).to receive(:info) + allow(Deimos::Logging).to receive(:log_info) end after(:each) do @@ -428,7 +441,7 @@ def self.generate_payload(attrs, widget) info = Deimos::PollInfo.last expect(info.last_sent.in_time_zone).to eq(time_value(mins: -61, secs: 30)) expect(info.last_sent_id).to eq(widgets[6].id) - expect(Deimos.config.logger).to have_received(:info). + expect(Deimos::Logging).to have_received(:log_info). with('Poll MyProducer: ["my-topic-with-id"] complete at 2015-05-05 00:59:58 -0400 (2 batches, 1 errored batches, 7 processed messages)') end end @@ -449,10 +462,6 @@ def self.generate_payload(attrs, widget) before(:each) do Widget.delete_all producer_class = Class.new(Deimos::ActiveRecordProducer) do - schema 'MySchemaWithId' - namespace 'com.my-namespace' - topic 'my-topic-with-id' - key_config none: true record_class Widget # :nodoc: @@ -461,20 +470,22 @@ def self.generate_payload(attrs, widget) end end stub_const('ProducerOne', producer_class) + stub_const('ProducerTwo', producer_class) - producer_class = Class.new(Deimos::ActiveRecordProducer) do - schema 'MySchemaWithId' - namespace 'com.my-namespace' - topic 'my-topic-with-id' - key_config none: true - record_class Widget - - # :nodoc: - def self.generate_payload(attrs, widget) - super.merge(message_id: widget.generated_id) + Karafka::App.routes.redraw do + topic 'my-topic-with-id' do + schema 'MySchemaWithId' + namespace 'com.my-namespace' + key_config none: true + producer_class ProducerOne + end + topic 'my-topic-with-id2' do + schema 'MySchemaWithId' + namespace 'com.my-namespace' + key_config none: true + producer_class ProducerTwo end end - stub_const('ProducerTwo', producer_class) poller_class = Class.new(Deimos::Utils::DbPoller::StateBased) do def self.producers @@ -513,8 +524,7 @@ def self.poll_query(*) expect(Deimos::Utils::DbPoller::MultiProducerPoller).to receive(:poll_query).at_least(:once) poller.process_updates - expect(ProducerOne).to have_received(:send_events).with(widgets) - expect(ProducerTwo).to have_received(:send_events).with(widgets) + expect(ProducerOne).to have_received(:send_events).twice.with(widgets) expect(widgets.map(&:reload).map(&:publish_status)).to eq(%w(PUBLISHED PUBLISHED PUBLISHED)) end diff --git a/spec/utils/db_producer_spec.rb b/spec/utils/outbox_producer_spec.rb similarity index 66% rename from spec/utils/db_producer_spec.rb rename to spec/utils/outbox_producer_spec.rb index 72ddcc14..f997e90c 100644 --- a/spec/utils/db_producer_spec.rb +++ b/spec/utils/outbox_producer_spec.rb @@ -1,23 +1,17 @@ # frozen_string_literal: true -each_db_config(Deimos::Utils::DbProducer) do +each_db_config(Deimos::Utils::OutboxProducer) do let(:producer) do producer = described_class.new(logger) allow(producer).to receive(:sleep) - allow(producer).to receive(:producer).and_return(phobos_producer) producer end - let(:logger) { nil } - let(:phobos_producer) do - pp = instance_double(Phobos::Producer::PublicAPI) - allow(pp).to receive(:publish_list) - pp - end + let(:logger) { instance_double(Logger, error: nil, info: nil, debug: nil )} before(:each) do - stub_const('Deimos::Utils::DbProducer::BATCH_SIZE', 2) - stub_const('Deimos::Utils::DbProducer::DELETE_BATCH_SIZE', 1) + stub_const('Deimos::Utils::OutboxProducer::BATCH_SIZE', 2) + stub_const('Deimos::Utils::OutboxProducer::DELETE_BATCH_SIZE', 1) end specify '#process_next_messages' do @@ -47,7 +41,7 @@ message: 'blah', key: "key#{i}") end - stub_const('Deimos::Utils::DbProducer::BATCH_SIZE', 5) + stub_const('Deimos::Utils::OutboxProducer::BATCH_SIZE', 5) producer.current_topic = 'topic1' messages = producer.retrieve_messages expect(messages.size).to eq(3) @@ -58,71 +52,8 @@ it 'should produce normally' do batch = ['A'] * 1000 - expect(phobos_producer).to receive(:publish_list).with(batch).once - expect(Deimos.config.metrics).to receive(:increment).with('publish', - tags: %w(status:success topic:), - by: 1000).once - producer.produce_messages(batch) - end - - it 'should split the batch size on buffer overflow' do - class_producer = double(Phobos::Producer::ClassMethods::PublicAPI, # rubocop:disable RSpec/VerifiedDoubles - sync_producer_shutdown: nil) - allow(producer.class).to receive(:producer).and_return(class_producer) - expect(class_producer).to receive(:sync_producer_shutdown).twice - count = 0 - allow(phobos_producer).to receive(:publish_list) do - count += 1 - raise Kafka::BufferOverflow if count < 3 - end - allow(Deimos.config.metrics).to receive(:increment) - batch = ['A'] * 1000 + expect(Karafka.producer).to receive(:produce_many_sync).with(batch).once producer.produce_messages(batch) - expect(phobos_producer).to have_received(:publish_list).with(batch) - expect(phobos_producer).to have_received(:publish_list).with(['A'] * 100) - expect(phobos_producer).to have_received(:publish_list).with(['A'] * 10).exactly(100).times - expect(Deimos.config.metrics).to have_received(:increment).with('publish', - tags: %w(status:success topic:), - by: 10).exactly(100).times - end - - it "should raise an error if it can't split any more" do - allow(phobos_producer).to receive(:publish_list) do - raise Kafka::BufferOverflow - end - expect(Deimos.config.metrics).not_to receive(:increment) - batch = ['A'] * 1000 - expect { producer.produce_messages(batch) }.to raise_error(Kafka::BufferOverflow) - expect(phobos_producer).to have_received(:publish_list).with(batch) - expect(phobos_producer).to have_received(:publish_list).with(['A'] * 100).once - expect(phobos_producer).to have_received(:publish_list).with(['A'] * 10).once - expect(phobos_producer).to have_received(:publish_list).with(['A']).once - end - - it 'should not resend batches of sent messages' do - allow(phobos_producer).to receive(:publish_list) do |group| - raise Kafka::BufferOverflow if group.any?('A') && group.size >= 1000 - raise Kafka::BufferOverflow if group.any?('BIG') && group.size >= 10 - end - allow(Deimos.config.metrics).to receive(:increment) - batch = ['A'] * 450 + ['BIG'] * 550 - producer.produce_messages(batch) - - expect(phobos_producer).to have_received(:publish_list).with(batch) - expect(phobos_producer).to have_received(:publish_list).with(['A'] * 100).exactly(4).times - expect(phobos_producer).to have_received(:publish_list).with(['A'] * 50 + ['BIG'] * 50) - expect(phobos_producer).to have_received(:publish_list).with(['A'] * 10).exactly(5).times - expect(phobos_producer).to have_received(:publish_list).with(['BIG'] * 1).exactly(550).times - - expect(Deimos.config.metrics).to have_received(:increment).with('publish', - tags: %w(status:success topic:), - by: 100).exactly(4).times - expect(Deimos.config.metrics).to have_received(:increment).with('publish', - tags: %w(status:success topic:), - by: 10).exactly(5).times - expect(Deimos.config.metrics).to have_received(:increment).with('publish', - tags: %w(status:success topic:), - by: 1).exactly(550).times end describe '#compact_messages' do @@ -149,17 +80,17 @@ let(:deduped_batch) { batch[1..2] } it 'should dedupe messages when :all is set' do - Deimos.configure { |c| c.db_producer.compact_topics = :all } + Deimos.configure { |c| c.outbox.compact_topics = :all } expect(producer.compact_messages(batch)).to eq(deduped_batch) end it 'should dedupe messages when topic is included' do - Deimos.configure { |c| c.db_producer.compact_topics = %w(my-topic my-topic2) } + Deimos.configure { |c| c.outbox.compact_topics = %w(my-topic my-topic2) } expect(producer.compact_messages(batch)).to eq(deduped_batch) end it 'should not dedupe messages when topic is not included' do - Deimos.configure { |c| c.db_producer.compact_topics = %w(my-topic3 my-topic2) } + Deimos.configure { |c| c.outbox.compact_topics = %w(my-topic3 my-topic2) } expect(producer.compact_messages(batch)).to eq(batch) end @@ -176,13 +107,13 @@ message: 'BBB' } ].map { |h| Deimos::KafkaMessage.create!(h) } - Deimos.configure { |c| c.db_producer.compact_topics = :all } + Deimos.configure { |c| c.outbox.compact_topics = :all } expect(producer.compact_messages(unkeyed_batch)).to eq(unkeyed_batch) - Deimos.configure { |c| c.db_producer.compact_topics = [] } + Deimos.configure { |c| c.outbox.compact_topics = [] } end it 'should compact messages when all messages are unique' do - Deimos.configure { |c| c.db_producer.compact_topics = %w(my-topic my-topic2) } + Deimos.configure { |c| c.outbox.compact_topics = %w(my-topic my-topic2) } expect(producer.compact_messages(deduped_batch)).to eq(deduped_batch) end end @@ -228,7 +159,7 @@ } ]) expect(Deimos.config.metrics).to receive(:increment).ordered.with( - 'db_producer.process', + 'outbox.process', tags: %w(topic:my-topic), by: 2 ) @@ -249,7 +180,7 @@ } ]) expect(Deimos.config.metrics).to receive(:increment).ordered.with( - 'db_producer.process', + 'outbox.process', tags: %w(topic:my-topic), by: 2 ) @@ -263,13 +194,11 @@ end it 'should register an error if it gets an error' do - allow(producer).to receive(:shutdown_producer) expect(producer).to receive(:retrieve_messages).and_raise('OH NOES') expect(Deimos::KafkaTopicInfo).to receive(:register_error). with('my-topic', 'abc') expect(producer).not_to receive(:produce_messages) producer.process_topic('my-topic') - expect(producer).to have_received(:shutdown_producer) end it 'should move on if it gets a partial batch' do @@ -299,14 +228,14 @@ expect(Deimos::KafkaTopicInfo).to receive(:register_error) expect(Deimos::KafkaMessage.count).to eq(4) - subscriber = Deimos.subscribe('db_producer.produce') do |event| + Karafka.monitor.subscribe('deimos.outbox.produce') do |event| expect(event.payload[:exception_object].message).to eq('OH NOES') expect(event.payload[:messages]).to eq(messages) end producer.process_topic('my-topic') # don't delete for regular errors expect(Deimos::KafkaMessage.count).to eq(4) - Deimos.unsubscribe(subscriber) + Karafka.monitor.notifications_bus.clear('deimos.outbox.produce') end it 'should retry deletes and not re-publish' do @@ -340,7 +269,7 @@ with('my-topic', 'abc').and_return(true) expect(producer).to receive(:retrieve_messages).ordered.and_return(messages) expect(producer).to receive(:retrieve_messages).ordered.and_return([]) - expect(phobos_producer).to receive(:publish_list).once.with(messages.map(&:phobos_message)) + expect(Karafka.producer).to receive(:produce_many_sync).once.with(messages.map(&:karafka_message)) expect(Deimos::KafkaMessage.count).to eq(8) producer.process_topic('my-topic') @@ -360,102 +289,6 @@ expect { producer.delete_messages(messages) }.to raise_exception('OH NOES') end - context 'with buffer overflow exception' do - let(:messages) do - (1..4).map do |i| - Deimos::KafkaMessage.create!( - id: i, - key: i, - topic: 'my-topic', - message: { message: "mess#{i}" }, - partition_key: "key#{i}" - ) - end - end - let(:logger) do - logger = instance_double(Logger) - allow(logger).to receive(:error) - logger - end - let(:message_producer) do - Deimos.config.schema.backend = :mock - Deimos::ActiveRecordProducer.topic('my-topic') - Deimos::ActiveRecordProducer.key_config - Deimos::ActiveRecordProducer - end - - around(:each) do |example| - config = Deimos::ActiveRecordProducer.config.clone - backend = Deimos.config.schema.backend - - example.run - ensure - Deimos::ActiveRecordProducer.instance_variable_set(:@config, config) - Deimos.config.schema.backend = backend - end - - before(:each) do - message_producer - (5..8).each do |i| - Deimos::KafkaMessage.create!( - id: i, - topic: 'my-topic2', - message: "mess#{i}", - partition_key: "key#{i}" - ) - end - allow(Deimos::KafkaTopicInfo).to receive(:lock). - with('my-topic', 'abc').and_return(true) - allow(producer).to receive(:produce_messages).and_raise(Kafka::BufferOverflow) - allow(producer).to receive(:retrieve_messages).and_return(messages) - allow(Deimos::KafkaTopicInfo).to receive(:register_error) - end - - it 'should delete messages on buffer overflow' do - expect(Deimos::KafkaMessage.count).to eq(8) - producer.process_topic('my-topic') - expect(Deimos::KafkaMessage.count).to eq(4) - end - - it 'should notify on buffer overflow' do - subscriber = Deimos.subscribe('db_producer.produce') do |event| - expect(event.payload[:exception_object].message).to eq('Kafka::BufferOverflow') - expect(event.payload[:messages]).to eq(messages) - end - producer.process_topic('my-topic') - Deimos.unsubscribe(subscriber) - expect(logger).to have_received(:error).with('Message batch too large, deleting...') - expect(logger).to have_received(:error).with( - [ - { key: '1', payload: 'payload-decoded' }, - { key: '2', payload: 'payload-decoded' }, - { key: '3', payload: 'payload-decoded' }, - { key: '4', payload: 'payload-decoded' } - ] - ) - end - - context 'with exception on error logging attempt' do - let(:message_producer) do - Deimos::ActiveRecordProducer.topic('my-topic') - Deimos::ActiveRecordProducer - end - - it 'should notify on buffer overflow disregarding decoding exception' do - subscriber = Deimos.subscribe('db_producer.produce') do |event| - expect(event.payload[:exception_object].message).to eq('Kafka::BufferOverflow') - expect(event.payload[:messages]).to eq(messages) - end - producer.process_topic('my-topic') - Deimos.unsubscribe(subscriber) - expect(logger).to have_received(:error).with('Message batch too large, deleting...') - expect(logger).to have_received(:error).with( - 'Large message details logging failure: '\ - 'No key config given - if you are not decoding keys, please use `key_config plain: true`' - ) - end - end - end end describe '#send_pending_metrics' do From bb4d3691dab2c0fd6e7b9aeb16eb23cbfc13a642 Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 14:42:02 -0400 Subject: [PATCH 16/18] V2 generator --- lib/deimos/railtie.rb | 6 + lib/deimos/version.rb | 2 +- .../deimos/v2/templates/karafka.rb.tt | 149 ++++++++++++++ lib/generators/deimos/v2_generator.rb | 193 ++++++++++++++++++ 4 files changed, 349 insertions(+), 1 deletion(-) create mode 100644 lib/generators/deimos/v2/templates/karafka.rb.tt create mode 100644 lib/generators/deimos/v2_generator.rb diff --git a/lib/deimos/railtie.rb b/lib/deimos/railtie.rb index 1bfc09e5..2d129c60 100644 --- a/lib/deimos/railtie.rb +++ b/lib/deimos/railtie.rb @@ -2,6 +2,12 @@ # Add rake task to Rails. class Deimos::Railtie < Rails::Railtie + config.before_initialize do + if ARGV[0] == "deimos:v2" + FigTree.keep_removed_configs = true + end + end + rake_tasks do load 'tasks/deimos.rake' end diff --git a/lib/deimos/version.rb b/lib/deimos/version.rb index e2f055f6..55460ebe 100644 --- a/lib/deimos/version.rb +++ b/lib/deimos/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module Deimos - VERSION = '1.24.3' + VERSION = '2.0.0-alpha1' end diff --git a/lib/generators/deimos/v2/templates/karafka.rb.tt b/lib/generators/deimos/v2/templates/karafka.rb.tt new file mode 100644 index 00000000..4ea447ff --- /dev/null +++ b/lib/generators/deimos/v2/templates/karafka.rb.tt @@ -0,0 +1,149 @@ +# frozen_string_literal: true + +require 'deimos' +require 'karafka' + +Deimos.configure do +<%- deimos_configs.each do |group, settings| -%> + <%= group -%> do + <%- settings.each do |k, v| -%> + <%= k.to_s %> <%= v.inspect %> + <%- end -%> + end + <%- end -%> + <%- deimos_config.db_poller_objects.each do |poller| -%> + db_poller do + <%- poller.non_default_settings! -%> + <%- poller.to_h.each do |k, v| -%> + <%= k.to_s %> <%= v.inspect %> + <%- end -%> + <%- end -%> + end +end + +class KarafkaApp < Karafka::App + setup do |config| + <%- setup_configs.each do |k, v| -%> + config.<%= k %> = <%= v.inspect %> + <%- end -%> + config.kafka = { + <%- default_kafka_configs.compact.each do |k, v| -%> + "<%= k.to_s %>": <%= v.inspect %>, + <%- end -%> + } + # Recreate consumers with each batch. This will allow Rails code reload to work in the + # development mode. Otherwise Karafka process would not be aware of code changes + config.consumer_persistence = !Rails.env.development? + end + + # Comment out this part if you are not using instrumentation and/or you are not + # interested in logging events for certain environments. Since instrumentation + # notifications add extra boilerplate, if you want to achieve max performance, + # listen to only what you really need for given environment. + Karafka.monitor.subscribe(Karafka::Instrumentation::LoggerListener.new) + # Karafka.monitor.subscribe(Karafka::Instrumentation::ProctitleListener.new) + + # This logger prints the producer development info using the Karafka logger. + # It is similar to the consumer logger listener but producer oriented. + Karafka.producer.monitor.subscribe( + WaterDrop::Instrumentation::LoggerListener.new( + # Log producer operations using the Karafka logger + Karafka.logger, + # If you set this to true, logs will contain each message details + # Please note, that this can be extensive + log_messages: false + ) + ) + + # You can subscribe to all consumer related errors and record/track then that way + # + # Karafka.monitor.subscribe 'error.occurred' do |event| + # type = event[:type] + # error = event[:error] + # details = (error.backtrace || []).join("\n") + # ErrorTracker.send_error(error, type, details) + # end + + # You can subscribe to all producer related errors and record/track then that way + # Please note, that producer and consumer have their own notifications pipeline so you need to + # setup error tracking independently for each of them + # + # Karafka.producer.monitor.subscribe('error.occurred') do |event| + # type = event[:type] + # error = event[:error] + # details = (error.backtrace || []).join("\n") + # ErrorTracker.send_error(error, type, details) + # end + + routes.draw do + defaults do + <%- default_configs.each do |k, v| -%> + <%= k.to_s %> <%= v.inspect %> + <%- end -%> + end + + <%- producer_configs.each do |producer| -%> + topic "<%= producer[:topic] %>" do + <%- producer.except(:topic).each do |k, v| -%> + <%- if k.to_sym == :key_config -%> + <%= k.to_s %>(<%= v.inspect %>) + <%- else -%> + <%= k.to_s %> <%= v.inspect %> + <%- end -%> + <%- end -%> + end + <%- end -%> + + <%- consumer_configs.each do |group_id, topics| -%> + <%- if consumer_configs.length > 1 -%> + consumer_group :<%= group_id %> do<%- end -%> + <%- topics.each do |consumer| %> + topic "<%= consumer[:topic] -%>" do + <%- if consumer[:kafka].present? -%> + kafka( + <%- consumer[:kafka].each do |k, v| -%> + "<%= k.to_s %>": <%= v.inspect %>, + <%- end -%> + ) + <%- end -%> + <%- consumer.except(:topic, :kafka).each do |k, v| -%> + <%- if k.to_sym == :key_config -%> + <%= k.to_s %>(<%= v.inspect %>) + <%- else -%> + <%= k.to_s %> <%= v.inspect %> + <%- end -%> + <%- end -%> + end + <%- end -%> + <%- if consumer_configs.length > 1 -%> + end<%- end %> + <%- end -%> + + # Uncomment this if you use Karafka with ActiveJob + # You need to define the topic per each queue name you use + # active_job_topic :default + # topic :example do + # Uncomment this if you want Karafka to manage your topics configuration + # Managing topics configuration via routing will allow you to ensure config consistency + # across multiple environments + # + # config(partitions: 2, 'cleanup.policy': 'compact') + # consumer ExampleConsumer + # end + end +end + +Deimos.setup_karafka + +# Karafka now features a Web UI! +# Visit the setup documentation to get started and enhance your experience. +# +# https://karafka.io/docs/Web-UI-Getting-Started + +# Karafka::Web.setup do |config| +# # You may want to set it per ENV. This value was randomly generated. +# config.ui.sessions.secret = '<%= SecureRandom.hex %>' +# end + +# Karafka::Web.enable! + diff --git a/lib/generators/deimos/v2_generator.rb b/lib/generators/deimos/v2_generator.rb new file mode 100644 index 00000000..89df4d85 --- /dev/null +++ b/lib/generators/deimos/v2_generator.rb @@ -0,0 +1,193 @@ +# frozen_string_literal: true + +# THINGS TO REMEMBER +# logger +# fatal_error +# bulk_import_id_generator + +require 'rails/generators' +require 'rails/version' + +# Generates a new consumer. +module Deimos + module Generators + # Generator for ActiveRecord model and migration. + class V2Generator < Rails::Generators::Base + + class ProcString < String + def inspect + self.to_s + end + end + + source_root File.expand_path('v2/templates', __dir__) + + no_commands do + def deimos_config + Deimos.config + end + + def deimos_configs + configs = { + producers: %i(topic_prefix disabled backend), + schema: %i(backend registry_url user password path generated_class_path use_schema_classes + nest_child_schemas use_full_namespace schema_namespace_map), + db_producer: %i(log_topics compact_topics), + } + + response = {} + configs.each do |group, settings| + group_setting = deimos_config.send(group) + next if settings.all? { |s| group_setting.default_value?(s)} + + response[group] = {} + settings.each do |field| + unless group_setting.default_value?(field.to_sym) + response[group][field.to_s] = group_setting.send(field.to_sym) + end + end + end + response + end + + def setup_configs + configs = {} + configs[:client_id] = if deimos_config.kafka.client_id && deimos_config.kafka.client_id != 'phobos' + deimos_config.kafka.client_id + else + Rails::Application.subclasses.first&.name&.gsub('::Application', '')&.underscore + end + if deimos_config.consumer_objects.any? { |c| c.max_concurrency.present? } + configs[:concurrency] = deimos_config.consumer_objects.map(&:max_concurrency).compact.max + end + if deimos_config.consumer_objects.any? { |c| c.max_wait_time.present? } + configs[:max_wait_time] = deimos_config.consumer_objects.map(&:max_wait_time).compact.max + end + configs.compact + end + + def default_kafka_configs + configs = {} + configs["bootstrap.servers"] = deimos_config.kafka.seed_brokers.join(',') + configs["socket.connection.setup.timeout.ms"] = deimos_config.kafka.connect_timeout * 1000 + configs["socket.timeout.ms"] = deimos_config.kafka.socket_timeout * 1000 + configs["security.protocol"] = if deimos_config.kafka.ssl.enabled + "ssl" + elsif deimos_config.kafka.sasl.enabled + if deimos_config.kafka.sasl.enforce_ssl + "sasl_ssl" + else + "sasl_plain" + end + end + configs["ssl.ca.pem"] = deimos_config.kafka.ssl.ca_cert + configs["ssl.certificate.pem"] = deimos_config.kafka.ssl.client_cert + configs["ssl.key.pem"] = deimos_config.kafka.ssl.client_cert_key + configs["ssl.endpoint.identification.algorithm"] = "https" if deimos_config.kafka.ssl.verify_hostname + configs["sasl.kerberos.principal"] = deimos_config.kafka.sasl.gssapi_principal + configs["sasl.kerberos.keytab"] = deimos_config.kafka.sasl.gssapi_keytab + configs["sasl.username"] = deimos_config.kafka.sasl.plain_username || deimos_config.kafka.sasl.scram_username + configs["sasl.password"] = deimos_config.kafka.sasl.plain_password || deimos_config.kafka.sasl.scram_password + configs["sasl.mechanisms"] = deimos_config.kafka.sasl.scram_mechanism + configs["request.required.acks"] = deimos_config.producers.required_acks + configs["message.send.max.retries"] = deimos_config.producers.max_retries + configs["retry.backoff.ms"] = deimos_config.producers.retry_backoff * 1000 if deimos_config.producers.retry_backoff + configs["compression.codec"] = deimos_config.producers.compression_codec + configs.compact + end + + def default_configs + { + payload_log: deimos_config.payload_log, + reraise_errors: deimos_config.consumers.reraise_errors, + replace_associations: deimos_config.consumers.replace_associations, + namespace: deimos_config.producers.schema_namespace, + use_schema_classes: deimos_config.schema.use_schema_classes + }.compact + end + + def consumer_configs + deimos_config.consumer_objects.group_by(&:group_id).map do |group_id, consumers| + [group_id, consumers.map do |consumer| + kafka_configs = {} + kafka_configs["auto.offset.reset"] = consumer.start_from_beginning ? 'earliest' : 'latest' + kafka_configs["session.timeout.ms"] = consumer.session_timeout * 1000 unless consumer.default_value?(:session_timeout) + kafka_configs["auto.commit.interval.ms"] = consumer.offset_commit_interval * 1000 unless consumer.default_value?(:offset_commit_interval) + kafka_configs["heartbeat.interval.ms"] = consumer.heartbeat_interval * 1000 unless consumer.default_value?(:heartbeat_interval) + configs = { + kafka: kafka_configs.compact, + topic: consumer.topic, + consumer: ProcString.new(consumer.class_name), + schema: consumer.schema, + namespace: consumer.namespace, + key_config: consumer.key_config, + } + configs[:use_schema_classes] = consumer.use_schema_classes unless consumer.default_value?(:use_schema_classes) + configs[:max_db_batch_size] = consumer.max_db_batch_size unless consumer.default_value?(:max_db_batch_size) + configs[:bulk_import_id_column] = consumer.bulk_import_id_column unless consumer.default_value?(:bulk_import_id_column) + configs[:replace_associations] = consumer.replace_associations unless consumer.default_value?(:replace_associations) + configs[:active] = false if consumer.disabled + configs[:each_message] = true unless consumer.delivery.to_s == 'inline_batch' + configs + end] + end.to_h + end + + def producer_configs + deimos_config.producer_objects.map do |producer| + { + topic: producer.topic, + producer_class: ProcString.new(producer.class_name), + schema: producer.schema, + namespace: producer.namespace || deimos_config.producers.schema_namespace, + key_config: producer.key_config, + use_schema_classes: producer.use_schema_classes + }.compact + end + end + + def rename_consumer_methods + deimos_config.consumer_objects.each do |consumer| + consumer.class_name.constantize + file = Object.const_source_location(consumer.class_name)[0] + if file.to_s.include?(Rails.root.to_s) + gsub_file(file, /([\t ]+)def consume\((\w+)(, *(\w+)?)\)/, + "\\1def consume_message(message)\n\\1 \\2 = message.payload\n\\1 \\4 = message.metadata") + gsub_file(file, /([\t ]+)def consume_batch\((\w+)(, *(\w+)?)\)/, + "\\1def consume_batch\n\\1 \\2 = messages.payloads\n\\1 \\4 = messages.metadata") + gsub_file(file, /def record_attributes\((\w+)\)/, + "def record_attributes(\\1, key)") + end + end + end + + def fix_specs + Dir["*/**/*_spec.rb"].each do |file| + gsub_file(file, /,\s*call_original: true/, "") + gsub_file(file, 'Deimos::Backends::Test.sent_messages', "Deimos::TestHelpers.sent_messages") + end + end + + def process_all_files + template('karafka.rb.tt', "karafka.rb", force: true) + rename_consumer_methods + fix_specs + end + + end + + desc 'Generate and update app files for version 2.0' + # @return [void] + def generate + process_all_files + say "Generation complete! You are safe to remove the existing initializer that configures Deimos.", :green + say "Note: The following settings cannot be determined by the generator:", :yellow + say "* logger / phobos_logger (dynamic object, cannot be printed out)", :yellow + say "* kafka.sasl.oauth_token_provider", :yellow + say "* producers.max_buffer_size", :yellow + say "* consumer.backoff (only handles minimum, not maximum)", :yellow + say "For more information, see https://github.com/flipp-oss/deimos/blob/master/docs/UPGRADING.md", :yellow + end + end + end +end From b6449db5c3dc1bba17858abed3d557e8d712e49b Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 14:42:11 -0400 Subject: [PATCH 17/18] Docs --- README.md | 785 +++++++++++++++--------------------------- docs/CONFIGURATION.md | 362 +++++++------------ docs/UPGRADING.md | 233 +++++++++++++ 3 files changed, 653 insertions(+), 727 deletions(-) diff --git a/README.md b/README.md index f44c7c2a..eb3b61c9 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,10 @@ A Ruby framework for marrying Kafka, a schema definition like Avro, and/or ActiveRecord and provide a useful toolbox of goodies for Ruby-based Kafka development. -Built on Phobos and hence Ruby-Kafka. +Built on [Karafka](https://karafka.io/). + +[!IMPORTANT] +Deimos 2.x is a major rewrite from 1.x. Please see the [Upgrading Guide](./docs/UPGRADING.md) for information on the changes and how to upgrade. * [Additional Documentation](#additional-documentation) @@ -23,15 +26,15 @@ Built on Phobos and hence Ruby-Kafka. * [Kafka Message Keys](#kafka-message-keys) * [Consumers](#consumers) * [Rails Integration](#rails-integration) - * [Controller Mixin](#controller-mixin) - * [Database Backend](#database-backend) + * [Producing](#rails-producing) + * [Consuming](#rails-consuming) + * [Generating Tables and Models](#generating-tables-and-models) + * [Outbox Backend](#outbox-backend) * [Database Poller](#database-poller) * [Running Consumers](#running-consumers) * [Generated Schema Classes](#generated-schema-classes) * [Metrics](#metrics) * [Testing](#testing) - * [Test Helpers](#test-helpers) - * [Integration Test Helpers](#integration-test-helpers) * [Utilities](#utilities) * [Contributing](#contributing) @@ -70,7 +73,7 @@ are for bugfixes or new functionality which does not affect existing code. You should be locking your Gemfile to the minor version: ```ruby -gem 'deimos-ruby', '~> 1.1' +gem 'deimos-ruby', '~> 1.1.0' ``` # Configuration @@ -100,7 +103,15 @@ To create a new schema backend, please see the existing examples [here](lib/deim # Producers -Producers will look like this: +With the correct [configuration](./docs/CONFIGURATION.md), you do not need to use a Deimos producer class in order to send schema-encoded messages to Kafka. You can simply use `Karafka.producer.produce()` (see [here](https://karafka.io/docs/Producing-messages/)). There are a few features that Deimos producers provide: + +* Using an instance method to determine partition key based on the provided payload +* Allowing global disabling of producers (or a particular producer class) +* Usage of the [Outbox](#outbox) producer backend. + +Producer classes in general are a handy way to coerce some object into a hash or [schema class](#generated-schema-classes) that represents the payload. + +A Deimos producer could look like this: ```ruby class MyProducer < Deimos::Producer @@ -113,27 +124,22 @@ class MyProducer < Deimos::Producer payload[:my_id] end - # You can call publish / publish_list directly, or create new methods - # wrapping them. + # You can call produce directly, or create new methods wrapping it. def send_some_message(an_object) payload = { 'some-key' => an_object.foo, 'some-key2' => an_object.bar } - # You can also publish an array with self.publish_list(payloads) - # You may specify the topic here with self.publish(payload, topic: 'my-topic') - # You may also specify the headers here with self.publish(payload, headers: { 'foo' => 'bar' }) - self.publish(payload) + self.produce([{payload: payload}]) + # additional keys can be added - see https://karafka.io/docs/WaterDrop-Usage/ + self.produce([{payload: payload, topic: "other-topic", key: "some-key", partition_key: "some-key2"}]) end - end - - end ``` -### Auto-added Fields +## Auto-added Fields If your schema has a field called `message_id`, and the payload you give your producer doesn't have this set, Deimos will auto-generate @@ -143,7 +149,7 @@ so that you can track each sent message via logging. You can also provide a field in your schema called `timestamp` which will be auto-filled with the current timestamp if not provided. -### Coerced Values +## Coerced Values Deimos will do some simple coercions if you pass values that don't exactly match the schema. @@ -155,60 +161,28 @@ representing a number, will be parsed to Float. * If the schema is :string, if the value implements its own `to_s` method, this will be called on it. This includes hashes, symbols, numbers, dates, etc. -### Instrumentation - -Deimos will send ActiveSupport Notifications. -You can listen to these notifications e.g. as follows: +## Disabling Producers +You can disable producers globally or inside a block. Globally: ```ruby - Deimos.subscribe('produce') do |event| - # event is an ActiveSupport::Notifications::Event - # you can access time, duration, and transaction_id - # payload contains :producer, :topic, and :payloads - data = event.payload - end -``` +Deimos.config.producers.disabled = true +``` -The following events are produced (in addition to the ones already -produced by Phobos and RubyKafka): +For the duration of a block: +```ruby +Deimos.disable_producers do + # code goes here +end +``` -* `produce_error` - sent when an error occurs when producing a message. - * producer - the class that produced the message - * topic - * exception_object - * payloads - the unencoded payloads -* `encode_messages` - sent when messages are being schema-encoded. - * producer - the class that produced the message - * topic - * payloads - the unencoded payloads -* `db_producer.produce` - sent when the DB producer sends messages for the - DB backend. Messages that are too large will be caught with this - notification - they will be deleted from the table and this notification - will be fired with an exception object. - * topic - * exception_object - * messages - the batch of messages (in the form of `Deimos::KafkaMessage`s) - that failed - this should have only a single message in the batch. -* `batch_consumption.valid_records` - sent when the consumer has successfully upserted records. Limited by `max_db_batch_size`. - * consumer: class of the consumer that upserted these records - * records: Records upserted into the DB (of type `ActiveRecord::Base`) -* `batch_consumption.invalid_records` - sent when the consumer has rejected records returned from `filtered_records`. Limited by `max_db_batch_size`. - * consumer: class of the consumer that rejected these records - * records: Rejected records (of type `Deimos::ActiveRecordConsume::BatchRecord`) - -Similarly: +For specific producers only: ```ruby - Deimos.subscribe('produce_error') do |event| - data = event.payloads - Mail.send("Got an error #{event.exception_object.message} on topic #{data[:topic]} with payloads #{data[:payloads]}") - end - - Deimos.subscribe('encode_messages') do |event| - # ... - end -``` +Deimos.disable_producers(Producer1, Producer2) do + # code goes here +end +``` -### Kafka Message Keys +## Kafka Message Keys Topics representing events rather than domain data don't need keys. However, best practice for domain messages is to schema-encode message keys @@ -291,6 +265,40 @@ it will be encoded first against the schema). So your payload would look like Remember that if you're using `schema`, the `payload_key` must be a *hash*, not a plain value. +## Instrumentation + +Deimos will send events through the [Karafka instrumentation monitor](https://karafka.io/docs/Monitoring-and-Logging/#subscribing-to-the-instrumentation-events). +You can listen to these notifications e.g. as follows: + +```ruby + Karafka.monitor.subscribe('deimos.encode_message') do |event| + # event is a Karafka Event. You can use [] to access keys in the payload. + messages = event[:messages] + end +``` + +The following events are produced (in addition to the ones already +produced by Phobos and RubyKafka): + +* `deimos.encode_message` - sent when messages are being schema-encoded. + * producer - the class that produced the message + * topic + * payloads - the unencoded payloads +* `outbox.produce` - sent when the outbox producer sends messages for the + outbox backend. Messages that are too large will be caught with this + notification - they will be deleted from the table and this notification + will be fired with an exception object. + * topic + * exception_object + * messages - the batch of messages (in the form of `Deimos::KafkaMessage`s) + that failed - this should have only a single message in the batch. +* `deimos.batch_consumption.valid_records` - sent when the consumer has successfully upserted records. Limited by `max_db_batch_size`. + * consumer: class of the consumer that upserted these records + * records: Records upserted into the DB (of type `ActiveRecord::Base`) +* `deimos.batch_consumption.invalid_records` - sent when the consumer has rejected records returned from `filtered_records`. Limited by `max_db_batch_size`. + * consumer: class of the consumer that rejected these records + * records: Rejected records (of type `Deimos::ActiveRecordConsume::BatchRecord`) + # Consumers Here is a sample consumer: @@ -305,18 +313,16 @@ class MyConsumer < Deimos::Consumer exception.is_a?(MyBadError) end - def consume(payload, metadata) - # Same method as Phobos consumers. - # payload is an schema-decoded hash. - # metadata is a hash that contains information like :key and :topic. - # In general, your key should be included in the payload itself. However, - # if you need to access it separately from the payload, you can use - # metadata[:key] + def consume_batch + # messages is a Karafka Messages - see https://github.com/karafka/karafka/blob/master/lib/karafka/messages/messages.rb + messages.payloads.each do |payload| + puts payload + end end end ``` -### Fatal Errors +## Fatal Errors The recommended configuration is for consumers *not* to raise errors they encounter while consuming messages. Errors can be come from @@ -330,95 +336,31 @@ can use instrumentation to handle errors you receive. You can also specify "fatal errors" either via global configuration (`config.fatal_error`) or via overriding a method on an individual consumer (`def fatal_error`). -### Batch Consumption +## Per-Message Consumption -Instead of consuming messages one at a time, consumers can receive a batch of -messages as an array and then process them together. This can improve -consumer throughput, depending on the use case. Batch consumers behave like -other consumers in regards to key and payload decoding, etc. +Instead of consuming messages in a batch, consumers can process one message at a time. This is +helpful if the logic involved in each message is independent and you don't want to treat the whole +batch as a single unit. -To enable batch consumption, ensure that the `delivery` property of your -consumer is set to `inline_batch`. +To enable message consumption, ensure that the `each_message` property of your +consumer is set to `true`. -Batch consumers will invoke the `consume_batch` method instead of `consume` +Per-message consumers will invoke the `consume_message` method instead of `consume_batch` as in this example: ```ruby -class MyBatchConsumer < Deimos::Consumer - - def consume_batch(payloads, metadata) - # payloads is an array of schema-decoded hashes. - # metadata is a hash that contains information like :keys, :topic, - # and :first_offset. - # Keys are automatically decoded and available as an array with - # the same cardinality as the payloads. If you need to iterate - # over payloads and keys together, you can use something like this: - - payloads.zip(metadata[:keys]) do |_payload, _key| - # Do something - end - end -end -``` -#### Saving data to Multiple Database tables - -> This feature is implemented and tested with MySQL database ONLY. - -Sometimes, the Kafka message needs to be saved to multiple database tables. For example, if a `User` topic provides you metadata and profile image for users, we might want to save it to multiple tables: `User` and `Image`. - -- Return associations as keys in `record_attributes` to enable this feature. -- The `bulk_import_id_column` config allows you to specify column_name on `record_class` which can be used to retrieve IDs after save. Defaults to `bulk_import_id`. This config is *required* if you have associations but optional if you do not. - -You must override the `record_attributes` (and optionally `column` and `key_columns`) methods on your consumer class for this feature to work. -- `record_attributes` - This method is required to map Kafka messages to ActiveRecord model objects. -- `columns(klass)` - Should return an array of column names that should be used by ActiveRecord klass during SQL insert operation. -- `key_columns(messages, klass)` - Should return an array of column name(s) that makes a row unique. -```ruby -class User < ApplicationRecord - has_many :images -end - -class MyBatchConsumer < Deimos::ActiveRecordConsumer - - record_class User +class MyMessageConsumer < Deimos::Consumer - def record_attributes(payload, _key) - { - first_name: payload.first_name, - images: [ - { - attr1: payload.image_url - }, - { - attr2: payload.other_image_url - } - ] - } - end - - def key_columns(klass) - case klass - when User - nil # use default - when Image - ["image_url", "image_name"] - end - end - - def columns(klass) - case klass - when User - nil # use default - when Image - klass.columns.map(&:name) - [:created_at, :updated_at, :id] - end + def consume_message(message) + # message is a Karafka Message object + puts message.payload end end ``` # Rails Integration -### Producing +## Producing Deimos comes with an ActiveRecordProducer. This takes a single or list of ActiveRecord objects or hashes and maps it to the given schema. @@ -439,7 +381,7 @@ class MyProducer < Deimos::ActiveRecordProducer # Optionally override this if you want the message to be # sent even if fields that aren't in the schema are changed. - def watched_attributes + def watched_attributes(_record) super + ['a_non_schema_attribute'] end @@ -458,28 +400,7 @@ MyProducer.send_events([Widget.new(foo: 1), Widget.new(foo: 2)]) MyProducer.send_events([{foo: 1}, {foo: 2}]) ``` -#### Disabling Producers - -You can disable producers globally or inside a block. Globally: -```ruby -Deimos.config.producers.disabled = true -``` - -For the duration of a block: -```ruby -Deimos.disable_producers do - # code goes here -end -``` - -For specific producers only: -```ruby -Deimos.disable_producers(Producer1, Producer2) do - # code goes here -end -``` - -#### KafkaSource +### KafkaSource There is a special mixin which can be added to any ActiveRecord class. This will create callbacks which will automatically send messages to Kafka whenever @@ -491,7 +412,7 @@ will not fire if using pure SQL or Arel. Note that these messages are sent *during the transaction*, i.e. using `after_create`, `after_update` and `after_destroy`. If there are questions of consistency between the database and Kafka, it is recommended -to switch to using the DB backend (see next section) to avoid these issues. +to switch to using the outbox backend (see next section) to avoid these issues. When the object is destroyed, an empty payload with a payload key consisting of the record's primary key is sent to the producer. If your topic's key is @@ -525,120 +446,7 @@ class Widget < ActiveRecord::Base end ``` -### Controller Mixin - -Deimos comes with a mixin for `ActionController` which automatically encodes and decodes schema -payloads. There are some advantages to encoding your data in e.g. Avro rather than straight JSON, -particularly if your service is talking to another backend service rather than the front-end -browser: - -* It enforces a contract between services. Solutions like [OpenAPI](https://swagger.io/specification/) - do this as well, but in order for the client to know the contract, usually some kind of code - generation has to happen. Using schemas ensures both sides know the contract without having to change code. - In addition, OpenAPI is now a huge and confusing format, and using simpler schema formats - can be beneficial. -* Using Avro or Protobuf ensures both forwards and backwards compatibility, - which reduces the need for versioning since both sides can simply ignore fields they aren't aware - of. -* Encoding and decoding using Avro or Protobuf is generally faster than straight JSON, and - results in smaller payloads and therefore less network traffic. - -To use the mixin, add the following to your `WhateverController`: - -```ruby -class WhateverController < ApplicationController - include Deimos::Utils::SchemaControllerMixin - - request_namespace 'my.namespace.requests' - response_namespace 'my.namespace.responses' - - # Add a "schemas" line for all routes that should encode/decode schemas. - # Default is to match the schema name to the route name. - schemas :index - # will look for: my.namespace.requests.Index.avsc - # my.namespace.responses.Index.avsc - - # Can use mapping to change the schema but keep the namespaces, - # i.e. use the same schema name across the two namespaces - schemas create: 'CreateTopic' - # will look for: my.namespace.requests.CreateTopic.avsc - # my.namespace.responses.CreateTopic.avsc - - # If all routes use the default, you can add them all at once - schemas :index, :show, :update - - # Different schemas can be specified as well - schemas :index, :show, request: 'IndexRequest', response: 'IndexResponse' - - # To access the encoded data, use the `payload` helper method, and to render it back, - # use the `render_schema` method. - - def index - response = { 'response_id' => payload['request_id'] + 'hi mom' } - render_schema(response) - end -end -``` - -To make use of this feature, your requests and responses need to have the correct content type. -For Avro content, this is the `avro/binary` content type. - -# Database Backend - -Deimos provides a way to allow Kafka messages to be created inside a -database transaction, and send them asynchronously. This ensures that your -database transactions and Kafka messages related to those transactions -are always in sync. Essentially, it separates the message logic so that a -message is first validated, encoded, and saved in the database, and then sent -on a separate thread. This means if you have to roll back your transaction, -it also rolls back your Kafka messages. - -This is also known as the [Transactional Outbox pattern](https://microservices.io/patterns/data/transactional-outbox.html). - -To enable this, first generate the migration to create the relevant tables: - - rails g deimos:db_backend - -You can now set the following configuration: - - config.producers.backend = :db - -This will save all your Kafka messages to the `kafka_messages` table instead -of immediately sending to Kafka. Now, you just need to call - - Deimos.start_db_backend! - -You can do this inside a thread or fork block. -If using Rails, you can use a Rake task to do this: - - rails deimos:db_producer - -This creates one or more threads dedicated to scanning and publishing these -messages by using the `kafka_topics` table in a manner similar to -[Delayed Job](https://github.com/collectiveidea/delayed_job). -You can pass in a number of threads to the method: - - Deimos.start_db_backend!(thread_count: 2) # OR - THREAD_COUNT=5 rails deimos:db_producer - -If you want to force a message to send immediately, just call the `publish_list` -method with `force_send: true`. You can also pass `force_send` into any of the -other methods that publish events, like `send_event` in `ActiveRecordProducer`. - -A couple of gotchas when using this feature: -* This may result in high throughput depending on your scale. If you're - using Rails < 5.1, you should add a migration to change the `id` column - to `BIGINT`. Rails >= 5.1 sets it to BIGINT by default. -* This table is high throughput but should generally be empty. Make sure - you optimize/vacuum this table regularly to reclaim the disk space. -* Currently, threads allow you to scale the *number* of topics but not - a single large topic with lots of messages. There is an [issue](https://github.com/flipp-oss/deimos/issues/23) - opened that would help with this case. - -For more information on how the database backend works and why it was -implemented, please see [Database Backends](docs/DATABASE_BACKEND.md). - -### Consuming +## Consuming Deimos provides an ActiveRecordConsumer which will take a payload and automatically save it to a provided model. It will take the intersection @@ -702,42 +510,19 @@ class MyConsumer < Deimos::ActiveRecordConsumer end ``` -#### Generating Tables and Models - -Deimos provides a generator that takes an existing schema and generates a -database table based on its fields. By default, any complex sub-types (such as -records or arrays) are turned into JSON (if supported) or string columns. - -Before running this migration, you must first copy the schema into your repo -in the correct path (in the example above, you would need to have a file -`{SCHEMA_ROOT}/com/my-namespace/MySchema.avsc`). - -To generate a model and migration, run the following: - - rails g deimos:active_record TABLE_NAME FULL_SCHEMA_NAME - -Example: - - rails g deimos:active_record my_table com.my-namespace.MySchema - -...would generate: - - db/migrate/1234_create_my_table.rb - app/models/my_table.rb - -#### Batch Consumers +### Batch Consuming Deimos also provides a batch consumption mode for `ActiveRecordConsumer` which processes groups of messages at once using the ActiveRecord backend. -Batch ActiveRecord consumers make use of the +Batch ActiveRecord consumers make use of [activerecord-import](https://github.com/zdennis/activerecord-import) to insert or update multiple records in bulk SQL statements. This reduces processing time at the cost of skipping ActiveRecord callbacks for individual records. Deleted records (tombstones) are grouped into `delete_all` calls and thus also skip `destroy` callbacks. -Batch consumption is used when the `delivery` setting for your consumer is set to `inline_batch`. +Batch consumption is used when the `each_message` setting for your consumer is set to `false` (the default). **Note**: Currently, batch consumption only supports only primary keys as identifiers out of the box. See [the specs](spec/active_record_batch_consumer_spec.rb) for an example of how to use compound keys. @@ -750,8 +535,6 @@ A sample batch consumer would look as follows: ```ruby class MyConsumer < Deimos::ActiveRecordConsumer - schema 'MySchema' - key_config field: 'my_field' record_class Widget # Controls whether the batch is compacted before consuming. @@ -760,7 +543,7 @@ class MyConsumer < Deimos::ActiveRecordConsumer # If false, messages will be grouped into "slices" of independent keys # and each slice will be imported separately. # - # compacted false + compacted false # Optional override of the default behavior, which is to call `delete_all` @@ -778,7 +561,141 @@ class MyConsumer < Deimos::ActiveRecordConsumer end ``` -## Database Poller +### Saving data to Multiple Database tables + +> This feature is implemented and tested with MySQL ONLY. + +Sometimes, a Kafka message needs to be saved to multiple database tables. For example, if a `User` topic provides you metadata and profile image for users, we might want to save it to multiple tables: `User` and `Image`. + +- Return associations as keys in `record_attributes` to enable this feature. +- The `bulk_import_id_column` config allows you to specify column_name on `record_class` which can be used to retrieve IDs after save. Defaults to `bulk_import_id`. This config is *required* if you have associations but optional if you do not. + +You must override the `record_attributes` (and optionally `column` and `key_columns`) methods on your consumer class for this feature to work. +- `record_attributes` - This method is required to map Kafka messages to ActiveRecord model objects. +- `columns(klass)` - Should return an array of column names that should be used by ActiveRecord klass during SQL insert operation. +- `key_columns(messages, klass)` - Should return an array of column name(s) that makes a row unique. + +```ruby +class User < ApplicationRecord + has_many :images +end + +class MyConsumer < Deimos::ActiveRecordConsumer + + record_class User + + def record_attributes(payload, _key) + { + first_name: payload.first_name, + images: [ + { + attr1: payload.image_url + }, + { + attr2: payload.other_image_url + } + ] + } + end + + def key_columns(klass) + case klass + when User + nil # use default + when Image + ["image_url", "image_name"] + end + end + + def columns(klass) + case klass + when User + nil # use default + when Image + klass.columns.map(&:name) - [:created_at, :updated_at, :id] + end + end +end +``` + +## Generating Tables and Models + +Deimos provides a generator that takes an existing schema and generates a +database table based on its fields. By default, any complex sub-types (such as +records or arrays) are turned into JSON (if supported) or string columns. + +Before running this migration, you must first copy the schema into your repo +in the correct path (in the example above, you would need to have a file +`{SCHEMA_ROOT}/com/my-namespace/MySchema.avsc`). + +To generate a model and migration, run the following: + + rails g deimos:active_record TABLE_NAME FULL_SCHEMA_NAME + +Example: + + rails g deimos:active_record my_table com.my-namespace.MySchema + +...would generate: + + db/migrate/1234_create_my_table.rb + app/models/my_table.rb + +# Outbox Backend + +Deimos provides a way to allow Kafka messages to be created inside a +database transaction, and send them asynchronously. This ensures that your +database transactions and Kafka messages related to those transactions +are always in sync. Essentially, it separates the message logic so that a +message is first validated, encoded, and saved in the database, and then sent +on a separate thread. This means if you have to roll back your transaction, +it also rolls back your Kafka messages. + +This is also known as the [Transactional Outbox pattern](https://microservices.io/patterns/data/transactional-outbox.html). + +To enable this, first generate the migration to create the relevant tables: + + rails g deimos:outbox + +You can now set the following configuration: + + config.producers.backend = :outbox + +This will save all your Kafka messages to the `kafka_messages` table instead +of immediately sending to Kafka. Now, you just need to call + + Deimos.start_outbox_backend! + +You can do this inside a thread or fork block. +If using Rails, you can use a Rake task to do this: + + rails deimos:outbox + +This creates one or more threads dedicated to scanning and publishing these +messages by using the `kafka_topics` table in a manner similar to +[Delayed Job](https://github.com/collectiveidea/delayed_job). +You can pass in a number of threads to the method: + + Deimos.start_outbox_backend!(thread_count: 2) # OR + THREAD_COUNT=5 rails deimos:outbox + +If you want to force a message to send immediately, just call the `produce` +method with `backend: kafka`. + +A couple of gotchas when using this feature: +* This may result in high throughput depending on your scale. If you're + using Rails < 5.1, you should add a migration to change the `id` column + to `BIGINT`. Rails >= 5.1 sets it to BIGINT by default. +* This table is high throughput but should generally be empty. Make sure + you optimize/vacuum this table regularly to reclaim the disk space. +* Currently, threads allow you to scale the *number* of topics but not + a single large topic with lots of messages. There is an [issue](https://github.com/flipp-oss/deimos/issues/23) + opened that would help with this case. + +For more information on how the database backend works and why it was +implemented, please see [Database Backends](docs/DATABASE_BACKEND.md). + +# Database Poller Another method of fetching updates from the database to Kafka is by polling the database (a process popularized by [Kafka Connect](https://docs.confluent.io/current/connect/index.html)). @@ -825,7 +742,7 @@ define one additional method on the producer: ```ruby class MyProducer < Deimos::ActiveRecordProducer - ... + # ... def poll_query(time_from:, time_to:, column_name:, min_id:) # Default is to use the timestamp `column_name` to find all records # between time_from and time_to, or records where `updated_at` is equal to @@ -834,6 +751,12 @@ class MyProducer < Deimos::ActiveRecordProducer # middle of a timestamp, we won't miss any records. # You can override or change this behavior if necessary. end + + # You can define this method if you need to do some extra actions with + # the collection of elements you just sent to Kafka + def post_process(batch) + # write some code here + end end ``` @@ -847,25 +770,10 @@ have one process running at a time. If a particular poll takes longer than the poll interval (i.e. interval is set at 1 minute but it takes 75 seconds) the next poll will begin immediately following the first one completing. -To Post-Process records that are sent to Kafka: - -You need to define one additional method in your producer class to post-process the messages sent to Kafka. - -```ruby -class MyProducer < Deimos::ActiveRecordProducer - ... - def post_process(batch) - # If you need to do some extra actions with - # the collection of elements you just sent to Kafka - # write some code here - end -end -``` - Note that the poller will retry infinitely if it encounters a Kafka-related error such as a communication failure. For all other errors, it will retry once by default. -### State-based pollers +## State-based pollers By default, pollers use timestamps and IDs to determine the records to publish. However, you can set a different mode whereby it will include all records that match your query, and when done, @@ -884,7 +792,7 @@ db_poller do end ``` -## Running consumers +# Running consumers Deimos includes a rake task. Once it's in your gemfile, just run @@ -895,7 +803,7 @@ which can be useful if you want to figure out if you're inside the task as opposed to running your Rails server or console. E.g. you could start your DB backend only when your rake task is running. -## Generated Schema Classes +# Generated Schema Classes Deimos offers a way to generate classes from Avro schemas. These classes are documented with YARD to aid in IDE auto-complete, and will help to move errors closer to the code. @@ -925,7 +833,7 @@ One additional configuration option indicates whether nested records should be g You can generate a tombstone message (with only a key and no value) by calling the `YourSchemaClass.tombstone(key)` method. If you're using a `:field` key config, you can pass in just the key scalar value. If using a key schema, you can pass it in as a hash or as another schema class. -### Consumer +## Consumer The consumer interface uses the `decode_message` method to turn JSON hash into the Schemas generated Class and provides it to the `consume`/`consume_batch` methods for their use. @@ -933,13 +841,13 @@ generated Class and provides it to the `consume`/`consume_batch` methods for the Examples of consumers would look like this: ```ruby class MyConsumer < Deimos::Consumer - def consume(payload, metadata) - # Same method as Phobos consumers but payload is now an instance of Deimos::SchemaClass::Record - # rather than a hash. metadata is still a hash that contains information like :key and :topic. + def consume_message(message) + # Same method as before but message.payload is now an instance of Deimos::SchemaClass::Record + # rather than a hash. # You can interact with the schema class instance in the following way: - do_something(payload.test_id, payload.some_int) + do_something(message.payload.test_id, message.payload.some_int) # The original behaviour was as follows: - do_something(payload[:test_id], payload[:some_int]) + do_something(message.payload[:test_id], message.payload[:some_int]) end end ``` @@ -958,9 +866,10 @@ class MyActiveRecordConsumer < Deimos::ActiveRecordConsumer end ``` -### Producer +## Producer + Similarly to the consumer interface, the producer interface for using Schema Classes in your app -relies on the `publish`/`publish_list` methods to convert a _provided_ instance of a Schema Class +relies on the `produce` method to convert a _provided_ instance of a Schema Class into a hash that can be used freely by the Kafka client. Examples of producers would look like this: @@ -976,8 +885,7 @@ class MyProducer < Deimos::Producer test_id: test_id, some_int: some_int ) - self.publish(message) - self.publish_list([message]) + self.produce({payload: message}) end end end @@ -986,8 +894,9 @@ end ```ruby class MyActiveRecordProducer < Deimos::ActiveRecordProducer record_class Widget - # @param payload [Deimos::SchemaClass::Record] + # @param attributes [Hash] # @param _record [Widget] + # @return [Deimos::SchemaClass::Record] def self.generate_payload(attributes, _record) # This method converts your ActiveRecord into a Deimos::SchemaClass::Record. You will be able to use super # as an instance of Schemas::MySchema and set values that are not on your ActiveRecord schema. @@ -1000,51 +909,26 @@ end # Metrics -Deimos includes some metrics reporting out the box. It ships with DataDog support, but you can add custom metric providers as well. +Deimos includes some metrics reporting out of the box. It adds to the existing [Karafka DataDog support](https://karafka.io/docs/Monitoring-and-Logging/#datadog-and-statsd-integration). It ships with DataDog support, but you can add custom metric providers as well. The following metrics are reported: -* `consumer_lag` - for each partition, the number of messages - it's behind the tail of the partition (a gauge). This is only sent if - `config.consumers.report_lag` is set to true. -* `handler` - a count of the number of messages received. Tagged - with the following: - * `topic:{topic_name}` - * `status:received` - * `status:success` - * `status:error` - * `time:consume` (histogram) - * Amount of time spent executing handler for each message - * Batch Consumers - report counts by number of batches - * `status:batch_received` - * `status:batch_success` - * `status:batch_error` - * `time:consume_batch` (histogram) - * Amount of time spent executing handler for entire batch - * `time:time_delayed` (histogram) - * Indicates the amount of time between the `timestamp` property of each - payload (if present) and the time that the consumer started processing - the message. -* `publish` - a count of the number of messages received. Tagged - with `topic:{topic_name}` -* `publish_error` - a count of the number of messages which failed - to publish. Tagged with `topic:{topic_name}` -* `pending_db_messages_max_wait` - the number of seconds which the +* `deimos.pending_db_messages_max_wait` - the number of seconds which the oldest KafkaMessage in the database has been waiting for, for use with the database backend. Tagged with the topic that is waiting. Will send a value of 0 with no topics tagged if there are no messages waiting. -* `db_producer.insert` - the number of messages inserted into the database +* `deimos.outbox.publish` - the number of messages inserted into the database for publishing. Tagged with `topic:{topic_name}` -* `db_producer.process` - the number of DB messages processed. Note that this +* `deimos.outbox.process` - the number of DB messages processed. Note that this is *not* the same as the number of messages *published* if those messages are compacted. Tagged with `topic:{topic_name}` -### Configuring Metrics Providers +## Configuring Metrics Providers See the `metrics` field under [Configuration](#configuration). View all available Metrics Providers [here](lib/deimos/metrics) -### Custom Metrics Providers +## Custom Metrics Providers Using the above configuration, it is possible to pass in any generic Metrics Provider class as long as it exposes the methods and definitions expected by @@ -1059,17 +943,18 @@ Also see [deimos.rb](lib/deimos.rb) under `Configure metrics` to see how the met # Tracing Deimos also includes some tracing for kafka consumers. It ships with -DataDog support, but you can add custom tracing providers as well. +DataDog support, but you can add custom tracing providers as well. (It does not use the built-in Karafka +tracers so that it can support per-message tracing, which Karafka does not provide for.) Trace spans are used for when incoming messages are schema-decoded, and a separate span for message consume logic. -### Configuring Tracing Providers +## Configuring Tracing Providers See the `tracing` field under [Configuration](#configuration). View all available Tracing Providers [here](lib/deimos/tracing) -### Custom Tracing Providers +## Custom Tracing Providers Using the above configuration, it is possible to pass in any generic Tracing Provider class as long as it exposes the methods and definitions expected by @@ -1083,7 +968,9 @@ Also see [deimos.rb](lib/deimos.rb) under `Configure tracing` to see how the tra # Testing -Deimos comes with a test helper class which provides useful methods for testing consumers. +Deimos comes with a test helper class which provides useful methods for testing consumers. This is built on top of +Karafka's [testing library](https://karafka.io/docs/Testing/) and is primarily helpful because it can decode +the sent messages for comparison (Karafka only decodes the messages once they have been consumed). In `spec_helper.rb`: ```ruby @@ -1097,55 +984,34 @@ end ```ruby # The following can be added to a rpsec file so that each unit # test can have the same settings every time it is run -around(:each) do |example| - Deimos::TestHelpers.unit_test! - example.run - Deimos.config.reset! -end - -# Similarly you can use the Kafka test helper -around(:each) do |example| - Deimos::TestHelpers.kafka_test! - example.run - Deimos.config.reset! -end - -# Kakfa test helper using schema registry -around(:each) do |example| - Deimos::TestHelpers.full_integration_test! - example.run +after(:each) do Deimos.config.reset! + Deimos.config.schema.backend = :avro_validation end ``` -With the help of these helper methods, rspec examples can be written without having to tinker with Deimos settings. -This also prevents Deimos setting changes from leaking in to other examples. - -This does not take away the ability to configure Deimos manually in individual examples. Deimos can still be configured like so: +With the help of these helper methods, RSpec examples can be written without having to tinker with Deimos settings. +This also prevents Deimos setting changes from leaking in to other examples. You can make these changes on an individual test level and ensure that it resets back to where it needs to go: ```ruby it 'should not fail this random test' do Deimos.configure do |config| config.consumers.fatal_error = proc { true } - config.consumers.reraise_errors = false end ... expect(some_object).to be_truthy - ... - Deimos.config.reset! end ``` -If you are using one of the test helpers in an `around(:each)` block and want to override few settings for one example, -you can do it like in the example shown above. These settings would only apply to that specific example and the Deimos config should -reset once the example has finished running. ## Test Usage -In your tests, you now have the following methods available: +You can use `karafka.produce()` and `consumer.consume` in your tests without having to go through +Deimos TestHelpers. However, there are some useful abilities that Deimos gives you: + ```ruby -# Pass a consumer class (not instance) to validate a payload against it. -# This will fail if the payload does not match the schema the consumer -# is set up to consume. +# Pass a consumer class (not instance) to validate a payload against it. This takes either a class +# or a topic (Karafka only supports topics in its test helpers). This will validate the payload +# and execute the consumer logic. test_consume_message(MyConsumer, { 'some-payload' => 'some-value' }) do |payload, metadata| # do some expectation handling here @@ -1158,15 +1024,6 @@ test_consume_message('my-topic-name', # do some expectation handling here end -# Alternatively, you can test the actual consume logic: -test_consume_message(MyConsumer, - { 'some-payload' => 'some-value' }, - call_original: true) - -# Test that a given payload is invalid against the schema: -test_consume_invalid_message(MyConsumer, - { 'some-invalid-payload' => 'some-value' }) - # For batch consumers, there are similar methods such as: test_consume_batch(MyBatchConsumer, [{ 'some-payload' => 'some-value' }, @@ -1181,7 +1038,7 @@ end expect(topic_name).to have_sent(payload, key=nil, partition_key=nil, headers=nil) # Inspect sent messages -message = Deimos::Backends::Test.sent_messages[0] +message = Deimos::TestHelpers.sent_messages[0] expect(message).to eq({ message: {'some-key' => 'some-value'}, topic: 'my-topic', @@ -1190,75 +1047,7 @@ expect(message).to eq({ }) ``` -### Test Utilities - -There is also a helper method that will let you test if an existing schema -would be compatible with a new version of it. You can use this in your -Ruby console but it would likely not be part of your RSpec test: - -```ruby -require 'deimos/test_helpers' -# Can pass a file path, a string or a hash into this: -Deimos::TestHelpers.schemas_compatible?(schema1, schema2) -``` - -You can use the `InlineConsumer` class to help with integration testing, -with a full external Kafka running. - -If you have a consumer you want to test against messages in a Kafka topic, -use the `consume` method: -```ruby -Deimos::Utils::InlineConsumer.consume( - topic: 'my-topic', - frk_consumer: MyConsumerClass, - num_messages: 5 - ) -``` - -This is a _synchronous_ call which will run the consumer against the -last 5 messages in the topic. You can set `num_messages` to a number -like `1_000_000` to always consume all the messages. Once the last -message is retrieved, the process will wait 1 second to make sure -they're all done, then continue execution. - -If you just want to retrieve the contents of a topic, you can use -the `get_messages_for` method: - -```ruby -Deimos::Utils::InlineConsumer.get_messages_for( - topic: 'my-topic', - schema: 'my-schema', - namespace: 'my.namespace', - key_config: { field: 'id' }, - num_messages: 5 -) -``` - -This will run the process and simply return the last 5 messages on the -topic, as hashes, once it's done. The format of the messages will simply -be -```ruby -{ - payload: { key: value }, # payload hash here - key: "some_value" # key value or hash here -} -``` - -Both payload and key will be schema-decoded as necessary according to the -key config. - -You can also just pass an existing producer or consumer class into the method, -and it will extract the necessary configuration from it: - -```ruby -Deimos::Utils::InlineConsumer.get_messages_for( - topic: 'my-topic', - config_class: MyProducerClass, - num_messages: 5 -) -``` - -## Utilities +# Utilities You can use your configured schema backend directly if you want to encode and decode payloads outside of the context of sending messages. @@ -1272,14 +1061,14 @@ backend.validate(my_payload) # throws an error if not valid fields = backend.schema_fields # list of fields defined in the schema ``` -You can also do an even faster encode/decode: +You can also do an even more concise encode/decode: ```ruby encoded = Deimos.encode(schema: 'MySchema', namespace: 'com.my-namespace', payload: my_payload) decoded = Deimos.decode(schema: 'MySchema', namespace: 'com.my-namespace', payload: my_encoded_payload) ``` -## Contributing +# Contributing Bug reports and pull requests are welcome on GitHub at https://github.com/flipp-oss/deimos . @@ -1289,15 +1078,15 @@ You can/should re-generate RBS types when methods or classes change by running t rbs collection update bundle exec sord --hide-private --no-sord-comments sig/defs.rbs --tags 'override:Override' -### Linting +## Linting Deimos uses Rubocop to lint the code. Please run Rubocop on your code before submitting a PR. The GitHub CI will also run rubocop on your pull request. --- -

+

Sponsored by
- + Flipp logo

diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 85098edb..2cdf0a53 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -1,111 +1,78 @@ # Configuration -Deimos supports a succinct, readable syntax which uses -pure Ruby to allow flexible configuration. +Deimos has two methods of configuration: + +* Main Deimos configuration, which uses the [FigTree](https://github.com/flipp-oss/fig_tree) gem for its own settings. +* Karafka routing configuration, which adds extensions to existing [Karafka routes](https://karafka.io/docs/Routing/). + +The majority of application configuration, including Kafka and `librdkafka` settings, are part of existing [Karafka configuration](https://karafka.io/docs/Configuration/). + +## Main Configuration You can access any configuration value via a simple `Deimos.config.whatever`. -Nested configuration is denoted in simple dot notation: -`kafka.ssl.enabled`. Headings below will follow the nested -configurations. +Nested configuration is denoted in simple dot notation: `schema.path`. Headings below will follow the nested configurations. -## Base Configuration -Config name| Default |Description ------------|-----------------------------|----------- -logger| `Logger.new(STDOUT)` |The logger that Deimos will use. -payload_log| `:full` |Determines how much data is logged per payload.
`:full` - all keys and payloads are logged.
`:keys` - only keys are logged.
`:count` - only the total count of messages are logged. -phobos_logger| `Deimos.config.logger` |The logger passed to Phobos. -metrics| `Deimos::Metrics::Mock.new` |The metrics backend use for reporting. -tracer| `Deimos::Tracing::Mock.new` |The tracer backend used for debugging. +### Configuration Syntax -## Defining Producers +Sample: -You can define a new producer thusly: ```ruby Deimos.configure do - producer do - class_name 'MyProducer' - topic 'MyTopic' - schema 'MyTopicSchema' - namespace 'my.namespace' - key_config field: :id - - # If config.schema.path is app/schemas, assumes there is a file in - # app/schemas/my/namespace/MyTopicSchema.avsc + metrics { Deimos::Metrics::Datadog.new({host: 'localhost'}) } + schema.path "#{Rails.root}/app/schemas" + + # Multiple nested config fields via block + consumers do + session_timeout 30 + offset_commit_interval 10 end end ``` -You can have as many `producer` blocks as you like to define more producers. +### Base Configuration -Config name|Default|Description ------------|-------|----------- -class_name|nil|Class name of the producer class (subclass of `Deimos::Producer`.) -topic|nil|Topic to produce to. -schema|nil|Name of the schema to use to encode data before producing. -namespace|nil|Namespace of the schema to use when finding it locally. -key_config|nil|Configuration hash for message keys. See [Kafka Message Keys](../README.md#installation) -use_schema_classes|nil|Set to true or false to enable or disable using the producers schema classes. See [Generated Schema Classes](../README.md#generated-schema-classes) -max_batch_size|500|Maximum publishing batch size. Defaults to top-level configuration of 500. +| Config name | Default | Description | +|-------------|-----------------------------|----------------------------------------| +| metrics | `Deimos::Metrics::Mock.new` | The metrics backend use for reporting. | +| tracer | `Deimos::Tracing::Mock.new` | The tracer backend used for debugging. | -## Defining Consumers +Note that all blocks are evaluated in the context of the configuration object. +If you're calling this inside another class or method, you'll need to save +things you need to reference into local variables before calling `configure`. -Consumers are defined almost identically to producers: +### Producer Configuration -```ruby -Deimos.configure do - consumer do - class_name 'MyConsumer' - topic 'MyTopic' - schema 'MyTopicSchema' - namespace 'my.namespace' - key_config field: :id +| Config name | Default | Description | +|------------------------|----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| producers.topic_prefix | nil | Add a prefix to all topic names. This can be useful if you're using the same Kafka broker for different environments that are producing the same topics. | +| producers.disabled | false | Disable all actual message producing. Generally more useful to use the `disable_producers` method instead. | +| producers.backend | `:kafka_async` | Currently can be set to `:db`, `:kafka`, or `:kafka_async`. If using Kafka directly, a good pattern is to set to async in your user-facing app, and sync in your consumers or delayed workers. | - # Setting to :inline_batch will invoke consume_batch instead of consume - # for each batch of messages. - delivery :batch +### Schema Configuration - # If config.schema.path is app/schemas, assumes there is a file in - # app/schemas/my/namespace/MyTopicSchema.avsc - end -end -``` +| Config name | Default | Description | +|-----------------------------|--------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------| +| schema.backend | `:mock` | Backend representing the schema encoder/decoder. You can see a full list [here](../lib/deimos/schema_backends). | +| schema.registry_url | `http://localhost:8081` | URL of the Confluent schema registry. | +| schema.user | nil | Basic auth user. | +| schema.password | nil | Basic auth password. | +| schema.path | nil | Local path to find your schemas. | +| schema.use_schema_classes | false | Set this to true to use generated schema classes in your application. | +| schema.generated_class_path | `app/lib/schema_classes` | Local path to generated schema classes. | +| schema.nest_child_schemas | false | Set to true to nest subschemas within the generated class for the parent schema. | +| schema.use_full_namespace | false | Set to true to generate folders for schemas matching the full namespace. | +| schema.schema_namespace_map | {} | A map of namespace prefixes to base module name(s). Example: { 'com.mycompany.suborg' => ['SchemaClasses'] }. Requires `use_full_namespace` to be true. | + +### Outbox Configuration + +| Config name | Default | Description | +|-----------------------|------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| outbox.logger | `Deimos.config.logger` | Logger to use inside the DB producer. | +| outbox.log_topics | `[]` | List of topics to print full messages for, or `:all` to print all topics. This can introduce slowdown since it needs to decode each message using the schema registry. | +| outbox.compact_topics | `[]` | List of topics to compact before sending, i.e. only send the last message with any given key in a batch. This is an optimization which mirrors what Kafka itself will do with compaction turned on but only within a single batch. You can also specify `:all` to compact all topics. | -In addition to the producer configs, you can define a number of overrides -to the basic consumer configuration for each consumer. This is analogous to -the `listener` config in `phobos.yml`. - -Config name|Default|Description ------------|-------|----------- -class_name|nil|Class name of the consumer class (subclass of `Deimos::Consumer`.) -topic|nil|Topic to produce to. -schema|nil|This is optional but strongly recommended for testing purposes; this will validate against a local schema file used as the reader schema, as well as being able to write tests against this schema. This is recommended since it ensures you are always getting the values you expect. -namespace|nil|Namespace of the schema to use when finding it locally. -key_config|nil|Configuration hash for message keys. See [Kafka Message Keys](../README.md#installation) -disabled|false|Set to true to skip starting an actual listener for this consumer on startup. -group_id|nil|ID of the consumer group. -use_schema_classes|nil|Set to true or false to enable or disable using the consumers schema classes. See [Generated Schema Classes](../README.md#generated-schema-classes) -bulk_import_id_column|:bulk_import_id|Name of the column to use for multi-table imports. -replace_associations|true|If false, append to associations in multi-table imports rather than replacing them. -max_db_batch_size|nil|Maximum limit for batching database calls to reduce the load on the db. -max_concurrency|1|Number of threads created for this listener. Each thread will behave as an independent consumer. They don't share any state. -start_from_beginning|true|Once the consumer group has checkpointed its progress in the topic's partitions, the consumers will always start from the checkpointed offsets, regardless of config. As such, this setting only applies when the consumer initially starts consuming from a topic -max_bytes_per_partition|512.kilobytes|Maximum amount of data fetched from a single partition at a time. -min_bytes|1|Minimum number of bytes to read before returning messages from the server; if `max_wait_time` is reached, this is ignored. -max_wait_time|5|Maximum duration of time to wait before returning messages from the server, in seconds. -force_encoding|nil|Apply this encoding to the message payload. If blank it uses the original encoding. This property accepts values defined by the ruby Encoding class (https://ruby-doc.org/core-2.3.0/Encoding.html). Ex: UTF_8, ASCII_8BIT, etc. -delivery|`:batch`|The delivery mode for the consumer. Possible values: `:message, :batch, :inline_batch`. See Phobos documentation for more details. -session_timeout|300|Number of seconds after which, if a client hasn't contacted the Kafka cluster, it will be kicked out of the group. -offset_commit_interval|10|Interval between offset commits, in seconds. -offset_commit_threshold|0|Number of messages that can be processed before their offsets are committed. If zero, offset commits are not triggered by message processing -offset_retention_time|nil|The time period that committed offsets will be retained, in seconds. Defaults to the broker setting. -heartbeat_interval|10|Interval between heartbeats; must be less than the session window. -backoff|`(1000..60_000)`|Range representing the minimum and maximum number of milliseconds to back off after a consumer error. -replace_associations|nil| Whether to delete existing associations for records during bulk consumption for this consumer. If no value is specified the provided/default value from the `consumers` configuration will be used. -bulk_import_id_generator|nil| Block to determine the `bulk_import_id` generated during bulk consumption. If no block is specified the provided/default block from the `consumers` configuration will be used. -save_associations_first|false|Whether to save associated records of primary class prior to upserting primary records. Foreign key of associated records are assigned to the record class prior to saving the record class - -## Defining Database Pollers +### Defining Database Pollers These are used when polling the database via `rake deimos:db_poller`. You can create a number of pollers, one per topic. @@ -119,157 +86,94 @@ Deimos.configure do end ``` -Config name|Default|Description ------------|-------|----------- -producer_class|nil|ActiveRecordProducer class to use for sending messages. -mode|:time_based|Whether to use time-based polling or state-based polling. -run_every|60|Amount of time in seconds to wait between runs. -timestamp_column|`:updated_at`|Name of the column to query. Remember to add an index to this column! -delay_time|2|Amount of time in seconds to wait before picking up records, to allow for transactions to finish. -retries|1|The number of times to retry for a *non-Kafka* error. -full_table|false|If set to true, do a full table dump to Kafka each run. Good for very small tables. Time-based only. -start_from_beginning|true|If false, start from the current time instead of the beginning of time if this is the first time running the poller. Time-based only. -state_column|nil|If set, this represents the DB column to use to update publishing status. State-based only. -publish_timestamp_column|nil|If set, this represents the DB column to use to update when publishing is done. State-based only. -published_state|nil|If set, the poller will update the `state_column` to this value when publishing succeeds. State-based only. -failed_state|nil|If set, the poller will update the `state_column` to this value when publishing fails. State-based only. -poller_class|nil|Inherited poller class name to use for publishing to multiple kafka topics from a single poller. - -## Kafka Configuration - -Config name|Default|Description ------------|-------|----------- -kafka.logger|`Deimos.config.logger`|Logger passed to RubyKafka. -kafka.seed_brokers|`['localhost:9092']`|URL for the Kafka brokers. -kafka.client_id|`phobos`|Identifier for this application. -kafka.connect_timeout|15|The socket timeout for connecting to the broker, in seconds. -kafka.socket_timeout|15|The socket timeout for reading and writing to the broker, in seconds. -kafka.ssl.enabled|false|Whether SSL is enabled on the brokers. -kafka.ssl.ca_certs_from_system|false|Use CA certs from system. -kafka.ssl.ca_cert|nil| A PEM encoded CA cert, a file path to the cert, or an Array of certs to use with an SSL connection. -kafka.ssl.client_cert|nil|A PEM encoded client cert to use with an SSL connection, or a file path to the cert. -kafka.ssl.client_cert_key|nil|A PEM encoded client cert key to use with an SSL connection. -kafka.sasl.enabled|false|Whether SASL is enabled on the brokers. -kafka.sasl.gssapi_principal|nil|A KRB5 principal. -kafka.sasl.gssapi_keytab|nil|A KRB5 keytab filepath. -kafka.sasl.plain_authzid|nil|Plain authorization ID. -kafka.sasl.plain_username|nil|Plain username. -kafka.sasl.plain_password|nil|Plain password. -kafka.sasl.scram_username|nil|SCRAM username. -kafka.sasl.scram_password|nil|SCRAM password. -kafka.sasl.scram_mechanism|nil|Scram mechanism, either "sha256" or "sha512". -kafka.sasl.enforce_ssl|nil|Whether to enforce SSL with SASL. -kafka.sasl.oauth_token_provider|nil|OAuthBearer Token Provider instance that implements method token. See {Sasl::OAuth#initialize}. - -## Consumer Configuration - -These are top-level configuration settings, but they can be overridden -by individual consumers. - -Config name|Default|Description ------------|-------|----------- -consumers.session_timeout|300|Number of seconds after which, if a client hasn't contacted the Kafka cluster, it will be kicked out of the group. -consumers.offset_commit_interval|10|Interval between offset commits, in seconds. -consumers.offset_commit_threshold|0|Number of messages that can be processed before their offsets are committed. If zero, offset commits are not triggered by message processing -consumers.heartbeat_interval|10|Interval between heartbeats; must be less than the session window. -consumers.backoff|`(1000..60_000)`|Range representing the minimum and maximum number of milliseconds to back off after a consumer error. -consumers.reraise_errors|false|Default behavior is to swallow uncaught exceptions and log to the metrics provider. Set this to true to instead raise all errors. Note that raising an error will ensure that the message cannot be processed - if there is a bad message which will always raise that error, your consumer will not be able to proceed past it and will be stuck forever until you fix your code. See also the `fatal_error` configuration. This is automatically set to true when using the `TestHelpers` module in RSpec. -consumers.report_lag|false|Whether to send the `consumer_lag` metric. This requires an extra thread per consumer. -consumers.fatal_error|`proc { false }`|Block taking an exception, payload and metadata and returning true if this should be considered a fatal error and false otherwise. E.g. you can use this to always fail if the database is available. Not needed if reraise_errors is set to true. -consumers.replace_associations|true|Whether to delete existing associations for records during bulk consumption prior to inserting new associated records -consumers.bulk_import_id_generator|`proc { SecureRandom.uuid }`| Block to determine the `bulk_import_id` generated during bulk consumption. Block will be used for all bulk consumers unless explicitly set for individual consumers - -## Producer Configuration - -Config name|Default|Description ------------|-------|----------- -producers.ack_timeout|5|Number of seconds a broker can wait for replicas to acknowledge a write before responding with a timeout. -producers.required_acks|1|Number of replicas that must acknowledge a write, or `:all` if all in-sync replicas must acknowledge. -producers.max_retries|2|Number of retries that should be attempted before giving up sending messages to the cluster. Does not include the original attempt. -producers.retry_backoff|1|Number of seconds to wait between retries. -producers.max_buffer_size|10_000|Number of messages allowed in the buffer before new writes will raise `BufferOverflow` exceptions. -producers.max_buffer_bytesize|10_000_000|Maximum size of the buffer in bytes. Attempting to produce messages when the buffer reaches this size will result in `BufferOverflow` being raised. -producers.compression_codec|nil|Name of the compression codec to use, or nil if no compression should be performed. Valid codecs: `:snappy` and `:gzip` -producers.compression_threshold|1|Number of messages that needs to be in a message set before it should be compressed. Note that message sets are per-partition rather than per-topic or per-producer. -producers.max_queue_size|10_000|Maximum number of messages allowed in the queue. Only used for async_producer. -producers.delivery_threshold|0|If greater than zero, the number of buffered messages that will automatically trigger a delivery. Only used for async_producer. -producers.delivery_interval|0|if greater than zero, the number of seconds between automatic message deliveries. Only used for async_producer. -producers.persistent_connections|false|Set this to true to keep the producer connection between publish calls. This can speed up subsequent messages by around 30%, but it does mean that you need to manually call sync_producer_shutdown before exiting, similar to async_producer_shutdown. -producers.schema_namespace|nil|Default namespace for all producers. Can remain nil. Individual producers can override. -producers.topic_prefix|nil|Add a prefix to all topic names. This can be useful if you're using the same Kafka broker for different environments that are producing the same topics. -producers.disabled|false|Disable all actual message producing. Generally more useful to use the `disable_producers` method instead. -producers.backend|`:kafka_async`|Currently can be set to `:db`, `:kafka`, or `:kafka_async`. If using Kafka directly, a good pattern is to set to async in your user-facing app, and sync in your consumers or delayed workers. -producers.max_batch_size|500|Maximum batch size for publishing. Individual producers can override. - -## Schema Configuration - -Config name|Default|Description ------------|-------|----------- -schema.backend|`:mock`|Backend representing the schema encoder/decoder. You can see a full list [here](../lib/deimos/schema_backends). -schema.registry_url|`http://localhost:8081`|URL of the Confluent schema registry. -schema.user|nil|Basic auth user. -schema.password|nil|Basic auth password. -schema.path|nil|Local path to find your schemas. -schema.use_schema_classes|false|Set this to true to use generated schema classes in your application. -schema.generated_class_path|`app/lib/schema_classes`|Local path to generated schema classes. -schema.nest_child_schemas|false|Set to true to nest subschemas within the generated class for the parent schema. -schema.use_full_namespace|false|Set to true to generate folders for schemas matching the full namespace. -schema.schema_namespace_map|{}|A map of namespace prefixes to base module name(s). Example: { 'com.mycompany.suborg' => ['SchemaClasses'] }. Requires `use_full_namespace` to be true. - -## Database Producer Configuration - -Config name|Default|Description ------------|-------|----------- -db_producer.logger|`Deimos.config.logger`|Logger to use inside the DB producer. -db_producer.log_topics|`[]`|List of topics to print full messages for, or `:all` to print all topics. This can introduce slowdown since it needs to decode each message using the schema registry. -db_producer.compact_topics|`[]`|List of topics to compact before sending, i.e. only send the last message with any given key in a batch. This is an optimization which mirrors what Kafka itself will do with compaction turned on but only within a single batch. You can also specify `:all` to compact all topics. - -## Configuration Syntax - -Sample: - +| Config name | Default | Description | +|--------------------------|---------------|---------------------------------------------------------------------------------------------------------------------------------------| +| producer_class | nil | ActiveRecordProducer class to use for sending messages. | +| mode | :time_based | Whether to use time-based polling or state-based polling. | +| run_every | 60 | Amount of time in seconds to wait between runs. | +| timestamp_column | `:updated_at` | Name of the column to query. Remember to add an index to this column! | +| delay_time | 2 | Amount of time in seconds to wait before picking up records, to allow for transactions to finish. | +| retries | 1 | The number of times to retry for a *non-Kafka* error. | +| full_table | false | If set to true, do a full table dump to Kafka each run. Good for very small tables. Time-based only. | +| start_from_beginning | true | If false, start from the current time instead of the beginning of time if this is the first time running the poller. Time-based only. | +| state_column | nil | If set, this represents the DB column to use to update publishing status. State-based only. | +| publish_timestamp_column | nil | If set, this represents the DB column to use to update when publishing is done. State-based only. | +| published_state | nil | If set, the poller will update the `state_column` to this value when publishing succeeds. State-based only. | +| failed_state | nil | If set, the poller will update the `state_column` to this value when publishing fails. State-based only. | +| poller_class | nil | Poller subclass name to use for publishing to multiple kafka topics from a single poller. | + +## Karafka Routing + +The following are additional settings that can be added to the `topic` block in Karafka routes, or to `defaults` blocks. + +### Shared Settings + +| Config name | Default | Description | +|--------------------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| payload_log | :full | Determines how much data is logged per payload.
`:full` - all keys and payloads are logged.
`:keys` - only keys are logged.
`:count` - only the total count of messages are logged. | +| schema | nil | Name of the schema to use to encode data before producing. | +| namespace | nil | Namespace of the schema to use when finding it locally. | +| key_config | nil | Configuration hash for message keys. See [Kafka Message Keys](../README.md#kafka-message-keys). | +| use_schema_classes | nil | Set to true or false to enable or disable using the producers schema classes. See [Generated Schema Classes](../README.md#generated-schema-classes). | + +### Consumer Settings + +| Config name | Default | Description | +|--------------------------|-------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| each_message | false | If true, use `consume_message` for each message rather than `consume_batch` for the full batch. | +| reraise_errors | false | Default behavior is to swallow uncaught exceptions and log to the metrics provider. Set this to true to instead raise all errors. Note that raising an error will ensure that the message cannot be processed - if there is a bad message which will always raise that error, your consumer will not be able to proceed past it and will be stuck forever until you fix your code. See also the fatal_error configuration. | +| fatal_error | `proc { false }` | Block taking an exception, payload and metadata and returning true if this should be considered a fatal error and false otherwise. E.g. you can use this to always fail if the database is available. Not needed if reraise_errors is set to true. | +| max_db_batch_size | nil | Maximum limit for batching database calls to reduce the load on the db. | +| bulk_import_id_column | `:bulk_import_id` | Name of the column to use for multi-table imports. | +| replace_associations | true | If false, append to associations in multi-table imports rather than replacing them. | +| bulk_import_id_generator | nil | Block to determine the bulk_import_id generated during bulk consumption. If no block is specified the provided/default block from the consumers configuration will be used. | +| save_associations_first |false|Whether to save associated records of primary class prior to upserting primary records. Foreign key of associated records are assigned to the record class prior to saving the record class + +### Defining Consumers + +An example consumer: ```ruby -Deimos.configure do - logger Logger.new(STDOUT) - # Nested config field - kafka.seed_brokers ['my.kafka.broker:9092'] - - # Multiple nested config fields via block - consumers do - session_timeout 30 - offset_commit_interval 10 +Karafka::App.routes.draw do + defaults do + payload_log :keys end - - # Define a new producer - producer do - class_name 'MyProducer' - topic 'MyTopic' + + topic 'MyTopic' do + namespace 'my-namespace' + consumer MyConsumer schema 'MyTopicSchema' key_config field: :id - end - # Define another new producer - producer do - class_name 'AnotherProducer' - topic 'AnotherTopic' - schema 'AnotherSchema' - key_config plain: true + # If config.schema.path is app/schemas, assumes there is a file in + # app/schemas/my/namespace/MyTopicSchema.avsc end +end +``` - # Define a consumer - consumer do - class_name 'MyConsumer' - topic 'TopicToConsume' - schema 'ConsumerSchema' - key_config plain: true - # include Phobos / RubyKafka configs - start_from_beginning true - heartbeat_interval 10 +### Producer Settings + +| Config name | Default | Description | +|----------------|---------|------------------------------------------------------------------------------------------------------------| +| producer_class | nil | Class of the producer to use for the current topic. | +| disabled | false | Disable all actual message producing. Generally more useful to use the `disable_producers` method instead. | + +## Defining Producers + +You can define a new producer almost identically to consumers: +```ruby +Karafka::App.routes.draw do + defaults do + namespace 'my.namespace' end + topic 'MyTopic' do + producer_class MyProducer + schema 'MyTopicSchema' + key_config field: :id + payload_log :count + # If config.schema.path is app/schemas, assumes there is a file in + # app/schemas/my/namespace/MyTopicSchema.avsc + end end ``` -Note that all blocks are evaluated in the context of the configuration object. -If you're calling this inside another class or method, you'll need to save -things you need to reference into local variables before calling `configure`. diff --git a/docs/UPGRADING.md b/docs/UPGRADING.md index 083fea65..9defe7b7 100644 --- a/docs/UPGRADING.md +++ b/docs/UPGRADING.md @@ -1,5 +1,238 @@ # Upgrading Deimos +## Upgrading to 2.x + +2.x is a major rewrite from 1.0. The underlying library has been changed from [Phobos](https://github.com/phobos/phobos) to [Karafka](https://karafka.io/). This change has given us an opportunity to fix some issues and deprecated code paths from version 1.0 as well as provide much more functionality by integrating more fully with the Karafka ecosystem. + +For a deeper dive into the internal changes, please see [...](). + +There are a number of breaking changes. We provide a `v2` generator to attempt to auto-fix many of these breaking changes automatically. To run the generator: + + KARAFKA_BOOT_FILE=false rails g deimos:v2 + +### Configuration + +In V1, Deimos configuration was all done in a single `Deimos.configure` block, including Kafka configs, consumers and producers: + +```ruby +Deimos.configure do + producers.schema_namespace 'com.my-namespace' + kafka.seed_brokers ['my-broker:9092'] + + consumer do + class_name 'MyConsumer' + topic 'MyTopic' + session_timeout 30 + schema 'MySchema' + key_config field: :id + namespace 'com.my-namespace' + end + + producer do + class_name 'MyProducer' + topic 'MyTopic2' + schema 'MySchema2' + key_config none: true + end +end +``` + +In V2, the `Deimos.configure` block now only takes Deimos-specific settings, and is **not** used to configure producers and consumers. Kafka settings now go in the Karafka `kafka` setting method, and producers and consumers use Karafka [routing](https://karafka.io/docs/Routing/). There are Deimos-specific extensions to routing to apply to consumers and producers, either via a `defaults` block (applying to all consumers and producers) or in individual `topic` blocks: + +```ruby +Deimos.configure do + producers.schema_namespace 'com.my-namespace' +end + +class KarafkaApp < Karafka::App + setup do |config| + config.kafka = { + "bootstrap.servers": "my-broker:9092" + } + end + + routes.draw do + defaults do + namespace "com.my-namespace" + end + + topic "MyTopic" do + # Karafka settings + consumer MyConsumer + kafka({"session.timeout.ms": 30_000}) + # Deimos settings + schema "MySchema" # the res + key_config({field: id}) + end + + topic "MyTopic2" do + # these are all Deimos settings since Karafka doesn't actually do per-topic producer configs + producer_class MyProducer + schema 'MySchema2' + key_config none: true + end + end +end +``` + +This configuration must be in a file called `karafka.rb` at the root of your application. The V2 generator will generate this file for you. Without the generator, if you have this file and start up your app with the old `Deimos.configure` code, you will get notifications of the correct places to put these settings. + + +### Removed deprecations + +The following were deprecated in version 1.x and are removed in 2.0. + +* The `kafka_producer` method for KafkaSource is no longer supported. Please use `kafka_producers`. (This is not addressed by the V2 generator.) + +```ruby +# before: +class MyRecord < ApplicationRecord + def kafka_producer + MyProducer + end +end + +# after: +class MyRecord < ApplicationRecord + def kafka_producers + [MyProducer] + end +end +``` + +* The `record_attributes` method for ActiveRecordConsumer now must take two parameters, not one. (The V2 generator can fix this.) + +```ruby +# before: +class MyConsumer < Deimos::ActiveRecordConsumer + def record_attributes(payload) + # ... + end +end + +# after: +class MyConsumer < Deimos::ActiveRecordConsumer + def record_attributes(payload, key) + # ... + end +end +``` + +* The `BatchConsumer` class has been removed. Please use the `Consumer` class. +* You can no longer configure your application using a `phobos.yml` file. The V2 generator will not be able to work on apps using this approach. +* Removed `test_consume_invalid_message` and `test_consume_batch_invalid_message` test helpers. These did not serve a useful purpose. +* The following deprecated testing functions have been removed: `stub_producers_and_consumers!`, `stub_producer`, `stub_consumer`, `stub_batch_consumer`. These have not done anything in a long time. + +### Major breaking changes +* Since Karafka only supports Ruby >= 3.0, that means Deimos also only supports those versions. +* Deimos no longer supports a separate logger from Karafka. When you configure a Karafka logger, Deimos will use that logger for all its logging. (Deimos logs will be prefixed with a `[Deimos]` tag.) +* The `:db` backend has been renamed to `:outbox`. All associated classes (like `DbProducer`) have likewise been renamed. The Rake task has also been renamed to `rake deimos:outbox`. +* The `SchemaControllerMixin` has been removed as there was no serious usage for it. +* `InlineConsumer` has been removed - Karafka Pro has an [Iterator API](https://karafka.io/docs/Pro-Iterator-API/) that does the same thing. There also has been no evidence that it was used (and was probably pretty buggy). +* The `:test` backend has been removed and the `Deimos::TestHelpers` module is now largely powered by [karafka-testing](https://github.com/karafka/karafka-testing/). This means that you can no longer use `Deimos::Backends::Test.sent_messages` - you need to use `Deimos::TestHelpers.sent_messages`. (The V2 generator should fix this.) +* Individual consumer and producer settings now live within Karafka route configuration. This means you can no longer call e.g. `consumer.schema` to retrieve this information, as settings are no longer stored directly on the consumer and producer objects (it is still available, but via different methods). +* Consumers should no longer define a `consume` method, as the semantics have changed with Karafka. Instead, you can define a `consume_message` or `consume_batch` method. Both of these methods now take Karafka `Message` objects instead of hashes. The V2 generator can handle translating this for you, but if you create new consumers, you should take advantage of the Karafka functionality and use it first-class. +* Phobos `delivery_method` is no longer relevant. Instead, specify an `each_message` setting for your consumer. If set to true, you should define a `consume_message` method. Otherwise, you should define a `consume_batch` method. (Note that this is the reverse from the previous default, which assumed `delivery_method: message`.) The V2 generator will create the correct setting for each consumer. + +```ruby +# before: +class MyConsumer < Deimos::Consumer + def consume(payload, metadata) + # payload and metadata are both hashes + end + + # OR with delivery_method: inline_batch + def batch_consume(payloads, metadata) + # payloads is an array of hashes, metadata is a hash + end +end + +# now: +class MyConsumer < Deimos::Consumer + def consume_batch + payloads = messages.payloads # messages is an instance method and `payloads` will return the decoded hashes + end + + # OR with batch(false) + def consume_message(message) + # message is a Karafka Message object + payload = message.payload + key = message.key # etc. + end +end +``` + +### Metrics + +The following metrics have been **removed** in favor of Karafka's more robust [DataDog metrics](https://karafka.io/docs/Monitoring-and-Logging/#datadog-and-statsd-integration) and WaterDrop's [DataDog metrics](https://karafka.io/docs/WaterDrop-Monitoring-and-Logging/#datadog-and-statsd-integration): +* `consumer_lag` (use `consumer.lags`) +* `handler` (use `consumer.consumed.time_taken`) +* `publish` (use `produced_sync` and `produced_async`) +* `publish_error` (use `deliver.errors`) + +You will need to manually add the DataDog MetricsListener as shown in the above pages. + +The following metrics have been **renamed**: + +* `db_producer.insert` -> `outbox.insert` +* `db_producer.process` -> `outbox.process` + +### Instrumentation + +Deimos's own instrumentation layer has been removed in favor of Karafka's. You can still subscribe to Deimos notifications - you simply do it via Karafka's monitor instead of Deimos's. + +```ruby +# before: +Deimos.subscribe('encode_messages') do |event| + # ... +end + +# after: +Karafka.monitor.subscribe('deimos.encode_messages') do |event| + # ... +end +``` + +Note that Karafka's monitors do not support the legacy "splatted" subscribe: +```ruby +Deimos.subscribe("event") do |*args| + payload = ActiveSupport::Notifications::Event.new(*args).payload +end +``` + +The following instrumentation events have been **removed** in favor of Karafka's [events](https://karafka.io/docs/Monitoring-and-Logging/#subscribing-to-the-instrumentation-events): + +* `produce_error` (use `error.occurred`) + +The following events have been **renamed**: +* `encode_messages` -> `deimos.encode_message` (**note that only one message is instrumented at a time now**) +* `db_producer.produce` -> `deimos.outbox.produce` +* `batch_consumption.valid_records` -> `deimos.batch_consumption.valid_records` +* `batch_consumption.invalid_records` -> `deimos.batch_consumption.invalid_records` + +### Additional breaking changes +* `key_config` now defaults to `{none: true}` instead of erroring out if not set. +* `fatal_error?` now receives a Karafka `messages` object instead of a payload hash or array of hashes. +* `watched_attributes` has been moved from the corresponding ActiveRecord class to the ActiveRecordProducer class. The object being watched is passed into the method. +* Removed `TestHelpers.full_integration_test!` and `kafka_test!` as Karafka does not currently support these use cases. If we need them back, we will need to put in changes to the testing library to support them. +* `test_consume_message` and `test_consume_batch` used to not fully validate schemas when using the `:avro_validation` backend. Now these are fully validated, which may cause test errors when upgrading. + +### New functionality + +* When setting up a Datadog metrics client, you can pass `:karafka_namespace`, `:karafka_distribution_mode`, or `:rd_kafka_metrics` tags to specify the Karafka settings for Datadog metrics. +- The `payload_log` setting now works for consumers as well as producers, as it is now a topic setting. +- You can publish messages **without a Deimos Producer class**. Karafka producers take a hash with `:message`, `:topic`, `:key`, `:headers` and `:partition_key` keys. As long as the topic is configured in `karafka.rb`, you don't need a special class to send the message. You can simply call `Karafka.producer.produce()`. +- The only features that are now available on the bare Producer (as opposed to ActiveRecordProducer) class are: + - Outbox backend + - Instance method to determine partition key (rather than passing it in) + - Using `Deimos.disable_producers` +- If you need these features, you must continue using a `Deimos::Producer`. +- You can now call `.produce(messages)` directly on a `Deimos::Producer` which allows for use of these features while still passing a Karafka message hash. This removes the need to add a `payload_key` key into your payload. This is now the recommended method to use in a Deimos Producer. + +### New deprecations +* For testing, you no longer have to call `unit_test!` to get the right settings. It is handled automatically by Karafka. The only thing this method now does is set the schema backend to `:avro_validation`, and you can do that in a single line. +* The `skip_expectation` and `call_original` arguments to `test_consume_message` and `test_consume_batch` have been deprecated and no longer need to be provided. The assumption is that `call_original` is always true. + ## Upgrading from < 1.5.0 to >= 1.5.0 If you are using Confluent's schema registry to Avro-encode your From b559f941d1ae59ae2e9ebd18d9cd8a8d8bb9293f Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 31 May 2024 15:01:05 -0400 Subject: [PATCH 18/18] bump fig_tree version --- deimos-ruby.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deimos-ruby.gemspec b/deimos-ruby.gemspec index 8379ee51..3bfe2498 100644 --- a/deimos-ruby.gemspec +++ b/deimos-ruby.gemspec @@ -20,7 +20,7 @@ Gem::Specification.new do |spec| spec.add_runtime_dependency('avro_turf', '>= 1.4', '< 2') spec.add_runtime_dependency('karafka', '~> 2.0') - spec.add_runtime_dependency('fig_tree', '~> 0.0.2') + spec.add_runtime_dependency('fig_tree', '~> 0.2.0') spec.add_runtime_dependency('sigurd', '>= 0.1.0', '< 1.0') spec.add_development_dependency('activerecord-import')