Skip to content

Commit

Permalink
Merge pull request #825 from security-force-monitor/hcg/mm-import
Browse files Browse the repository at this point in the history
Remove redundant import steps for easier debugging
  • Loading branch information
smcalilly committed Feb 28, 2024
2 parents c8a00b4 + c3bc6f9 commit b8f0638
Show file tree
Hide file tree
Showing 14 changed files with 2,395 additions and 1,789 deletions.
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ PG_HOST=localhost
PG_USER=datamade
PG_PASSWORD=

SOURCE_DATA_PATH=sfm_pc/management/commands/country_data/countries
DATA_ARCHIVE_PATH=data/wwic_download/countries

.PHONY : import_directory import_db flush_db recreate_db

Expand Down Expand Up @@ -43,4 +45,4 @@ recreate_db : import_directory flush_db import_docket_import data_archive
clean :
rm auth_models.json *errors.csv

include docket.mk
include docket.mk download.mk
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ appropriate directory, and fire the recipe to build a fresh database:
```
tmux new -s fresh-import
sudo su - datamade
workon sfm
source ~/.virtualenvs/sfm/bin/activate
cd ~/sfm-importer
make recreate_db
```
Expand All @@ -376,7 +376,7 @@ Finally, switch the `sfm` and `importer` databases:

```
# Renames the databases in a transaction -- the app doesn't need to stop
psql postgres < sfm_pc/management/commands/flush/rename.sql
psql -U postgres < sfm_pc/management/commands/flush/rename.sql
```

Presto! A fresh import, with no server downtime.
6 changes: 3 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ services:
- .:/app
environment:
- IMPORT_DIRECTORY=${IMPORT_DIRECTORY}
- PG_HOST=${PG_HOST}
- PG_USER=${PG_USER}
- PG_PASSWORD=${PG_PASSWORD}
- PG_HOST=postgres
- PG_USER=sfm
- PG_PASSWORD=postgres
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
command: python manage.py runserver 0.0.0.0:8000
Expand Down
66 changes: 11 additions & 55 deletions docket.mk
Original file line number Diff line number Diff line change
@@ -1,67 +1,23 @@
# Path variable for the source data
SOURCE_DATA_PATH=sfm_pc/management/commands/country_data/countries
clean_import :
rm -rf $(SOURCE_DATA_PATH)/*

# Variables for the archive data
DATA_ARCHIVE_PATH=data/wwic_download/countries
COUNTRY_NAMES=$(shell perl -pe "s/,/ /g" import_docket.csv | cut -d' ' -f5)
ENTITIES=units.csv persons.csv incidents.csv locations.csv locations.geojson sources.csv
.PHONY: $(SOURCE_DATA_PATH) source_import clean_import

.PHONY: $(SOURCE_DATA_PATH) data/wwic_download/countries data_archive wwic_download.zip directories data/wwic_download/metadata/sfm_research_handbook.pdf


# Create the data archive and upload it to S3
data_archive : wwic_download.zip
aws s3 cp $< s3://$(shell cat configs/s3_config.json | jq -r '.data_archive_bucket')/
rm $<

wwic_download.zip : filtered_data data/wwic_download/metadata/sfm_research_handbook.pdf
cd data/wwic_download && zip -r ../../$@ .

filtered_data: directories $(SOURCE_DATA_PATH) $(foreach country,$(COUNTRY_NAMES),$(patsubst %,$(country)_%,$(ENTITIES)))
echo "filtered csvs for entities"

directories :
mkdir -p $(foreach country,$(COUNTRY_NAMES),$(DATA_ARCHIVE_PATH)/$(country))

define filter_entity_data
$(shell csvgrep --columns $(1):status:admin --match 3 $< | \
python data/processors/blank_columns.py --entity $(1) > $(DATA_ARCHIVE_PATH)/$*/$@)
endef

%_units.csv : $(SOURCE_DATA_PATH)/%/units.csv
$(call filter_entity_data,unit)

%_persons.csv : $(SOURCE_DATA_PATH)/%/persons.csv
$(call filter_entity_data,person)

%_incidents.csv : $(SOURCE_DATA_PATH)/%/incidents.csv
$(call filter_entity_data,incident)

%_sources.csv : $(SOURCE_DATA_PATH)/%/sources.csv
cp $< $(DATA_ARCHIVE_PATH)/$*/$@

%_locations.csv : $(SOURCE_DATA_PATH)/%/locations.csv
cp $< $(DATA_ARCHIVE_PATH)/$*/$@

%_locations.geojson : $(SOURCE_DATA_PATH)/%/locations.geojson
cp $< $(DATA_ARCHIVE_PATH)/$*/$@

data/wwic_download/metadata/sfm_research_handbook.pdf :
curl -o $@ https://help.securityforcemonitor.org/_/downloads/en/latest/pdf/


# Download the source data and load it into the database
%_import : %.csv $(SOURCE_DATA_PATH)
%_import : %.csv $(SOURCE_DATA_PATH) source_import
perl -pe "s/,/ /g" $< | \
xargs -L1 bash -c ' \
echo "Loading data for country code $$3" && (\
echo "Loading data for country code $$3 from $(SOURCE_DATA_PATH)/$$4" && (\
python -u manage.py import_country_data \
--country_code $$3 \
--country_path $(word 2, $^)/$$4 \
--sources_path $(word 2, $^)/sources.csv || \
--country_path $(SOURCE_DATA_PATH)/$$4 || \
exit 255 \
)'

source_import : $(SOURCE_DATA_PATH)
echo "Loading source data" && \
python -u manage.py import_source_data \
--sources_path $(SOURCE_DATA_PATH)/sources.csv

$(SOURCE_DATA_PATH) : import_docket.csv
perl -pe "s/,/ /g" $< | \
xargs -L1 bash -c ' \
Expand Down
49 changes: 49 additions & 0 deletions download.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Variables for the archive data
COUNTRY_NAMES=$(shell perl -pe "s/,/ /g" import_docket.csv | cut -d' ' -f5)
ENTITIES=units.csv persons.csv incidents.csv locations.csv locations.geojson sources.csv

clean_archive :
rm -rf $(DATA_ARCHIVE_PATH)/*

.PHONY : $(DATA_ARCHIVE_PATH) data_archive wwic_download.zip directories \
data/wwic_download/metadata/sfm_research_handbook.pdf clean_archive

# Create the data archive and upload it to S3
data_archive : wwic_download.zip
aws s3 cp $< s3://$(shell cat configs/s3_config.json | jq -r '.data_archive_bucket')/
rm $<

wwic_download.zip : filtered_data data/wwic_download/metadata/sfm_research_handbook.pdf
cd data/wwic_download && zip -r ../../$@ .

filtered_data: directories $(SOURCE_DATA_PATH) $(foreach country,$(COUNTRY_NAMES),$(patsubst %,$(country)_%,$(ENTITIES)))
echo "filtered csvs for entities"

directories :
mkdir -p $(foreach country,$(COUNTRY_NAMES),$(DATA_ARCHIVE_PATH)/$(country))

define filter_entity_data
$(shell csvgrep --columns $(1):status:admin --match 3 $< | \
python data/processors/blank_columns.py --entity $(1) > $(DATA_ARCHIVE_PATH)/$*/$@)
endef

%_units.csv : $(SOURCE_DATA_PATH)/%/units.csv
$(call filter_entity_data,unit)

%_persons.csv : $(SOURCE_DATA_PATH)/%/persons.csv
$(call filter_entity_data,person)

%_incidents.csv : $(SOURCE_DATA_PATH)/%/incidents.csv
$(call filter_entity_data,incident)

%_sources.csv : $(SOURCE_DATA_PATH)/%/sources.csv
cp $< $(DATA_ARCHIVE_PATH)/$*/$@

%_locations.csv : $(SOURCE_DATA_PATH)/%/locations.csv
cp $< $(DATA_ARCHIVE_PATH)/$*/$@

%_locations.geojson : $(SOURCE_DATA_PATH)/%/locations.geojson
cp $< $(DATA_ARCHIVE_PATH)/$*/$@

data/wwic_download/metadata/sfm_research_handbook.pdf :
curl -o $@ https://help.securityforcemonitor.org/_/downloads/en/latest/pdf/
26 changes: 1 addition & 25 deletions fixtures/import_docket.csv
Original file line number Diff line number Diff line change
@@ -1,26 +1,2 @@
source_document_id,location_document_id,entitity_document_id,sfm:iso,sfm:country_name
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1ztOfGaQT3WDrq-BOjT0x5VErzgrWQ0Ku,1Ck11zLFVP6iJZFAR0_Xsq0UaeEJrmFl7ysbFX9mGu7c,ae,united-arab-emirates
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1HpIjYaH_iMCRQD1jP159VGz-2NL4nB5p,1EqAi59wjE1v-bYX3cC1qdl6zkThpWJ8YcvSPUC-RGHc,bd,bangladesh
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1j8KgLnpjlnLy6bN4ozkwnBpkHUI6i3si,1wBmSuTkoEhosDzfHtyvZqd9SKez-sWoPoJ9oPonWsSo,bf,burkina-faso
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1mjTLMZ1la3zyiVQxLZ56sW497Sp8Lh5m,1c0O2XlwSpTAtB0AdhkkdgevWbsBUxvsmsETUwPPVIlk,bh,bahrain
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1qZoQciglG1DOeEa3hh5iUvF7q4_bKOQl,1cZVy2PUAzeq2xOoLRLwL9z9mqbry32zv_XY7sjEih2c,eg,egypt
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1eZHw6k7xM7Z_ZNMnF0Wc5cjJuVyuOop3,1CKkNsXwRdwXDiOldwT-6baw9DayXA2Vsn4ttpwP9SuM,jo,jordan
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1vnWgeTkq5TUyF7555F4renMJnl4WiFNy,1Y6-9-9kai-YyK1pXvcv_W6fqUn9lORltUhuFc2YUu1I,kw,kuwait
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,13XnZOF7U0uwL3EP_QpdTVd1FUh3A3cwi,1aGbMvFHzGn9ZlKKcFhiQ2c9egsoGDH11QBgyqmhS-IM,lr,liberia
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1fnTq_ONVxzVBbCyQy_-s6ngmadA6st70,16962grIJlisFbh2Zp9kBAhv6jVnZz6bHgb6RGBUHd3o,ma,morocco
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1syUOihIFKzk6QsLXd7XNUZIwzZZfAqEH,1UcgoJ_ytS-WSWl2_5OuV9h92wSCBWRFBoDtr4Ztqt14,ml,mali
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1dU24WM8MAnqApFcBoYCiKPzPevebal6_,1vwb7ENaOeVRJIc5iCDBbF8K0Oql4SscENmLEdUT77Hg,mm,myanmar
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,17Zqej6mrCT3BBBKcKj7949qHyRCa-9SJ,1cUtCEUuZRMqcxlRqFyoEM9eAdiDdWy2DUocroYivCx4,mr,mauritania
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1nVWV5_1kGDwyWJ3PPqExKfchs3sAlEuh,168KuHwUr9565zWaQVZ5au3qtGOb-qyJx_WOwNzqt_Eo,mx,mexico
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1qlHquI9EDz2lteBcjz_MheNLspg3mp_q,1_Pj5BryFXUPQPmMigII8G2HBUrpsnkK5V-Zu_9LCdGw,ne,niger
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1boFMPurqyxbfYBvfisRiROmzr8TuPI8j,1f3W3qJATCzVjZGw239Wy3D25THs8ThnvoC24aUFaGZQ,ng,nigeria
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1nMXXtFwJ3TqeynpKSW11uYAzihSMV8So,1Uc5eZswLB6mrwQLhd_OYQm7v7ThH99N0eb7RbTtD5iY,np,nepal
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,19o8a1zwxUEYFxvZkqs2AwCyIm0oe_CPF,1h1a0S5aVv9Z3wucgKsYXmg5Z_CWzsKfjJSfJFcXxPSY,ph,philippines
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1x4LjK_UWdxZm5EyNCupj7ikv7E-WMrkd,1UGOxjmJdJ9Dzj8cX3mZkgXAzT_ap_EMD2OqLjzDeGeE,qa,qatar
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1wSzKP9bsXB9w4U8frj4Y5kHrfV3C6Vi8,1QAgVpj0bf_A0HGFzHgwxBbZqgIFurfH4h7u1MnfKzJc,rw,rwanda
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1FLls5iHQD71Omy4VEzXYQ4HacMubzg8v,1a9XRXK5rG4_n0Afw7tIDkIbAmdydqKcU8J8zx5pLnVU,sa,saudi-arabia
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1tNS4yJYlh265zDy9rQnjAZSqSmBZxrRh,11dEjFSe56YdmJfVeKhRZpQKSgRb6mfM1DWKoNFxYg9Y,sd,sudan
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1l3NE8P8Xi-1qGwqZcdVdvV3Hn1h4Bwjv,1YxRrB39ItO_kEPTrMQ9FJlvMEp1Fjby0vchHiwW3C_I,sl,sierra-leone
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1yPQVFwbQ4edUfBhgAbL2o9DAOljZigTF,15cnbBqIlp4LzEXrs2z2L4_RTnY5e1GMrGV150JV615Q,td,chad
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1cyrCvMKVRHJtpQtcbTpoboJc9iNZ-oHy,1WlN4Hbv3JKE76hnNYkr80HU9oNJwjjOnj9nt7mm9ddw,ug,uganda
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1Ii31JX8y2InKt-FnHK-6kaqVK41XBOzY,1r62axKA5xgvJAiSiHrKgHZSATwSkKB-K15fdmLbn3zo,ye,yemen
1d2FIMxqeL7Oa1hQrnbuFuzNr2lVLwfD7hknTbX8E-Dw,12O-PyMp4CN7O8ZdnZpCNm8Rs3lzLfMvo,1uVz_9edm0ejSGOHCRV2BWZoPPKOX64XpBORjm47hopU,mm,myanmar
101 changes: 62 additions & 39 deletions location/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,21 @@


class LocationManager(models.Manager):

def from_humane_id(self, humane_id):
if isinstance(humane_id, str):
return self.get(**{'sfm__location:humane_id:admin': humane_id})
return self.get(
**{
"sfm__location:humane_id:admin": humane_id,
"geometry__isnull": False,
}
)

return self.filter(**{'sfm__location:humane_id:admin__in': humane_id})
return self.filter(
**{
"sfm__location:humane_id:admin__in": humane_id,
"geometry__isnull": False,
}
)


class Location(models.Model):
Expand All @@ -21,16 +30,20 @@ class Location(models.Model):
feature_type = models.TextField(blank=True, null=True)
tags = models.JSONField(blank=True, null=True)
sfm = models.JSONField(blank=True, null=True)
adminlevel1 = models.ForeignKey('self',
related_name='area_locations',
on_delete=models.CASCADE,
null=True,
blank=True)
adminlevel2 = models.ForeignKey('self',
related_name='place_locations',
on_delete=models.CASCADE,
null=True,
blank=True)
adminlevel1 = models.ForeignKey(
"self",
related_name="area_locations",
on_delete=models.CASCADE,
null=True,
blank=True,
)
adminlevel2 = models.ForeignKey(
"self",
related_name="place_locations",
on_delete=models.CASCADE,
null=True,
blank=True,
)
adminlevel = models.CharField(max_length=50, null=True, blank=True)
geometry = GeometryField(blank=True, null=True)

Expand Down Expand Up @@ -60,42 +73,52 @@ def related_entities(self):
for associationarea in self.associationarea_set.all():
association = associationarea.object_ref
organization = association.organization.get_value().value
related_entities.append({
'name': organization.name.get_value().value,
'entity_type': _('Organization'),
'start_date': association.startdate.get_value(),
'end_date': association.enddate.get_value(),
'open_ended': association.open_ended.get_value(),
'url': reverse('view-organization', kwargs={'slug': organization.uuid}),
})
related_entities.append(
{
"name": organization.name.get_value().value,
"entity_type": _("Organization"),
"start_date": association.startdate.get_value(),
"end_date": association.enddate.get_value(),
"open_ended": association.open_ended.get_value(),
"url": reverse(
"view-organization", kwargs={"slug": organization.uuid}
),
}
)

for emplacementsite in self.emplacementsite_set.all():
emplacement = emplacementsite.object_ref
organization = emplacement.organization.get_value().value
related_entities.append({
'name': organization.name.get_value().value,
'entity_type': _('Organization'),
'start_date': emplacement.startdate.get_value(),
'end_date': emplacement.enddate.get_value(),
'open_ended': emplacement.open_ended.get_value(),
'url': reverse('view-organization', kwargs={'slug': organization.uuid}),
})
related_entities.append(
{
"name": organization.name.get_value().value,
"entity_type": _("Organization"),
"start_date": emplacement.startdate.get_value(),
"end_date": emplacement.enddate.get_value(),
"open_ended": emplacement.open_ended.get_value(),
"url": reverse(
"view-organization", kwargs={"slug": organization.uuid}
),
}
)

for violationlocation in self.violationlocation_set.all():
violation = violationlocation.object_ref
related_entities.append({
'name': truncatewords(violation.description.get_value(), 10),
'entity_type': _('Violation'),
'start_date': violation.startdate.get_value(),
'end_date': violation.enddate.get_value(),
'open_ended': '',
'url': reverse('view-violation', kwargs={'slug': violation.uuid}),
})
related_entities.append(
{
"name": truncatewords(violation.description.get_value(), 10),
"entity_type": _("Violation"),
"start_date": violation.startdate.get_value(),
"end_date": violation.enddate.get_value(),
"open_ended": "",
"url": reverse("view-violation", kwargs={"slug": violation.uuid}),
}
)

return related_entities

@property
def osm_feature_type(self):
if self.feature_type == 'boundary':
return 'relation'
if self.feature_type == "boundary":
return "relation"
return self.feature_type
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ django-countries-plus==1.3.2
django-bootstrap-pagination==1.6.4
django-date-extensions==3.1.1
django-leaflet==0.28.2
psycopg2==2.8.6
psycopg2-binary==2.8.6
django-rosetta==0.9.8
django-queryset-csv==1.1.0
boto3==1.24.21
Expand Down
Loading

0 comments on commit b8f0638

Please sign in to comment.