Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix dcc ingestor #1023

Merged
merged 1 commit into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions lib/ingestors/dcc_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,24 @@ def read(url)
private

def process_dcc(url)
event_page = Nokogiri::HTML5.parse(open_url(url.to_s, raise: true)).css("div[class='archive__content grid']")[0].css("div[class='column span-4-sm span-8-md span-6-lg']")
# event_page = Nokogiri::HTML5.parse(open_url(url.to_s, raise: true)).css('main > .archive__content.grid > .column.span-4-sm.span-4-md.span-6-lg')
event_page = Nokogiri::HTML5.parse(open_url(url.to_s, raise: true)).css('main > article > .archive__grid > .column > .archive__content > .column')
event_page.each do |event_data|
event = OpenStruct.new

event.url = event_data.css("h2[class='post-item__title h5']")[0].css("a")[0].get_attribute('href')
event.title = event_data.css("h2[class='post-item__title h5']")[0].css("a")[0].text.strip
event.url = event_data.css('h2.post-item__title > a')[0].get_attribute('href')
event.title = event_data.css('h2.post-item__title > a')[0].text.strip

start_str = event_data.css("ul[class='post-item__meta']")[0].css("li")[0].text.strip.split('—')
event.start = Time.zone.parse(start_str[0])
event.end = Time.zone.parse(start_str[0]).beginning_of_day + Time.zone.parse(start_str[1]).seconds_since_midnight.seconds
start_str = event_data.css('ul.post-item__meta > li')[0].text.strip.split('—')
if start_str[1].include?(':')
event.start = Time.zone.parse(start_str[0])
event.end = Time.zone.parse(start_str[0]).beginning_of_day + Time.zone.parse(start_str[1]).seconds_since_midnight.seconds
else
event.start = Time.zone.parse(start_str[0])
event.end = Time.zone.parse(start_str[1])
end

event.venue = event_data.css("ul[class='post-item__meta']")[0].css("li")[1].text.strip
event.venue = event_data.css('ul.post-item__meta > li')[1].text.strip

event.source = 'DCC'
event.timezone = 'Amsterdam'
Expand Down
16 changes: 8 additions & 8 deletions test/unit/ingestors/dcc_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,23 @@ class DccIngestorTest < ActiveSupport::TestCase
ingestor = Ingestors::DccIngestor.new

# check event doesn't
new_title = "DCC-PO dag"
new_url = 'https://dcc-po.nl/agenda/dcc-po-dag/'
new_title = 'Training FAIR data management'
new_url = 'https://dcc-po.nl/agenda/training-fair-data-management/'
refute Event.where(title: new_title, url: new_url).any?

# run task
assert_difference 'Event.count', 1 do
assert_difference 'Event.count', 2 do
freeze_time(2019) do
VCR.use_cassette("ingestors/dcc") do
VCR.use_cassette('ingestors/dcc') do
ingestor.read(source.url)
ingestor.write(@user, @content_provider)
end
end
end

assert_equal 1, ingestor.events.count
assert_equal 2, ingestor.events.count
assert ingestor.materials.empty?
assert_equal 1, ingestor.stats[:events][:added]
assert_equal 2, ingestor.stats[:events][:added]
assert_equal 0, ingestor.stats[:events][:updated]
assert_equal 0, ingestor.stats[:events][:rejected]

Expand All @@ -51,8 +51,8 @@ class DccIngestorTest < ActiveSupport::TestCase
# check other fields
assert_equal 'DCC', event.source
assert_equal 'Amsterdam', event.timezone
assert_equal Time.zone.parse('Mon, 09 Oct 2019 10:00:00.000000000 UTC +00:00'), event.start
assert_equal Time.zone.parse('Mon, 09 Oct 2019 16:30:00.000000000 UTC +00:00'), event.end
assert_equal Time.zone.parse('Fri, 30 Sep 2019 09:00:00.000000000 UTC +00:00'), event.start
assert_equal Time.zone.parse('Sat, 14 Oct 2019 17:00:00.000000000 UTC +00:00'), event.end
assert_equal 'Domstad, Utrecht', event.venue
end
end
26 changes: 16 additions & 10 deletions test/vcr_cassettes/ingestors/dcc.yml

Large diffs are not rendered by default.

Loading