diff --git a/cartography/data/indexes.cypher b/cartography/data/indexes.cypher index 072599f71..085f84e27 100644 --- a/cartography/data/indexes.cypher +++ b/cartography/data/indexes.cypher @@ -321,10 +321,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:SpotlightVulnerability) ON (n.host_info_local_ CREATE INDEX IF NOT EXISTS FOR (n:SpotlightVulnerability) ON (n.lastupdated); CREATE INDEX IF NOT EXISTS FOR (n:SQSQueue) ON (n.id); CREATE INDEX IF NOT EXISTS FOR (n:SQSQueue) ON (n.lastupdated); -CREATE INDEX IF NOT EXISTS FOR (n:SSMInstanceInformation) ON (n.id); -CREATE INDEX IF NOT EXISTS FOR (n:SSMInstanceInformation) ON (n.lastupdated); -CREATE INDEX IF NOT EXISTS FOR (n:SSMInstancePatch) ON (n.id); -CREATE INDEX IF NOT EXISTS FOR (n:SSMInstancePatch) ON (n.lastupdated); CREATE INDEX IF NOT EXISTS FOR (n:User) ON (n.arn); CREATE INDEX IF NOT EXISTS FOR (n:User) ON (n.lastupdated); CREATE INDEX IF NOT EXISTS FOR (n:AzureTenant) ON (n.id); diff --git a/cartography/data/jobs/cleanup/aws_import_ssm_cleanup.json b/cartography/data/jobs/cleanup/aws_import_ssm_cleanup.json deleted file mode 100644 index 7f77a3c84..000000000 --- a/cartography/data/jobs/cleanup/aws_import_ssm_cleanup.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "statements": [ - { - "query": "MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(n:SSMInstanceInformation) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(:EC2Instance)-[r:HAS_INFORMATION]->(:SSMInstanceInformation) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(n:SSMInstancePatch) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(:EC2Instance)-[r:HAS_PATCH]->(:SSMInstancePatch) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 - } - ], - "name": "cleanup SSM" -} diff --git a/cartography/intel/aws/ssm.py b/cartography/intel/aws/ssm.py index cd69499d6..937cba52b 100644 --- a/cartography/intel/aws/ssm.py +++ b/cartography/intel/aws/ssm.py @@ -6,9 +6,12 @@ import boto3 import neo4j +from cartography.client.core.tx import load +from cartography.graph.job import GraphJob +from cartography.models.aws.ssm.instance_information import SSMInstanceInformationSchema +from cartography.models.aws.ssm.instance_patch import SSMInstancePatchSchema from cartography.util import aws_handle_regions from cartography.util import dict_date_to_epoch -from cartography.util import run_cleanup_job from cartography.util import timeit logger = logging.getLogger(__name__) @@ -31,7 +34,9 @@ def get_instance_ids(neo4j_session: neo4j.Session, region: str, current_aws_acco @timeit @aws_handle_regions def get_instance_information( - boto3_session: boto3.session.Session, region: str, instance_ids: List[str], + boto3_session: boto3.session.Session, + region: str, + instance_ids: List[str], ) -> List[Dict[str, Any]]: client = boto3_session.client('ssm', region_name=region) instance_information: List[Dict[str, Any]] = [] @@ -46,10 +51,21 @@ def get_instance_information( return instance_information +def transform_instance_information(data_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + for ii in data_list: + ii["LastPingDateTime"] = dict_date_to_epoch(ii, "LastPingDateTime") + ii["RegistrationDate"] = dict_date_to_epoch(ii, "RegistrationDate") + ii["LastAssociationExecutionDate"] = dict_date_to_epoch(ii, "LastAssociationExecutionDate") + ii["LastSuccessfulAssociationExecutionDate"] = dict_date_to_epoch(ii, "LastSuccessfulAssociationExecutionDate") + return data_list + + @timeit @aws_handle_regions def get_instance_patches( - boto3_session: boto3.session.Session, region: str, instance_ids: List[str], + boto3_session: boto3.session.Session, + region: str, + instance_ids: List[str], ) -> List[Dict[str, Any]]: client = boto3_session.client('ssm', region_name=region) instance_patches: List[Dict[str, Any]] = [] @@ -65,6 +81,16 @@ def get_instance_patches( return instance_patches +def transform_instance_patches(data_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + for p in data_list: + p["Id"] = f"{p['_instance_id']}-{p['Title']}" + p["InstalledTime"] = dict_date_to_epoch(p, "InstalledTime") + # Split the comma separated CVEIds, if they exist, and strip + # the empty string from the list if not. + p["CVEIds"] = list(filter(None, p.get("CVEIds", "").split(","))) + return data_list + + @timeit def load_instance_information( neo4j_session: neo4j.Session, @@ -73,55 +99,13 @@ def load_instance_information( current_aws_account_id: str, aws_update_tag: int, ) -> None: - ingest_query = """ - UNWIND $InstanceInformation AS instance - MERGE (i:SSMInstanceInformation{id: instance.InstanceId}) - ON CREATE SET i.firstseen = timestamp() - SET i.instance_id = instance.InstanceId, - i.ping_status = instance.PingStatus, - i.last_ping_date_time = instance.LastPingDateTime, - i.agent_version = instance.AgentVersion, - i.is_latest_version = instance.IsLatestVersion, - i.platform_type = instance.PlatformType, - i.platform_name = instance.PlatformName, - i.platform_version = instance.PlatformVersion, - i.activation_id = instance.ActivationId, - i.iam_role = instance.IamRole, - i.registration_date = instance.RegistrationDate, - i.resource_type = instance.ResourceType, - i.name = instance.Name, - i.ip_address = instance.IPAddress, - i.computer_name = instance.ComputerName, - i.association_status = instance.AssociationStatus, - i.last_association_execution_date = instance.LastAssociationExecutionDate, - i.last_successful_association_execution_date = instance.LastSuccessfulAssociationExecutionDate, - i.source_id = instance.SourceId, - i.source_type = instance.SourceType, - i.region = $Region, - i.lastupdated = $aws_update_tag - WITH i - MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID}) - MERGE (owner)-[r:RESOURCE]->(i) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $aws_update_tag - WITH i - MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(ec2_instance:EC2Instance{id: i.instance_id}) - MERGE (ec2_instance)-[r2:HAS_INFORMATION]->(i) - ON CREATE SET r2.firstseen = timestamp() - SET r2.lastupdated = $aws_update_tag - """ - for ii in data: - ii["LastPingDateTime"] = dict_date_to_epoch(ii, "LastPingDateTime") - ii["RegistrationDate"] = dict_date_to_epoch(ii, "RegistrationDate") - ii["LastAssociationExecutionDate"] = dict_date_to_epoch(ii, "LastAssociationExecutionDate") - ii["LastSuccessfulAssociationExecutionDate"] = dict_date_to_epoch(ii, "LastSuccessfulAssociationExecutionDate") - - neo4j_session.run( - ingest_query, - InstanceInformation=data, + load( + neo4j_session, + SSMInstanceInformationSchema(), + data, Region=region, - AWS_ACCOUNT_ID=current_aws_account_id, - aws_update_tag=aws_update_tag, + AWS_ID=current_aws_account_id, + lastupdated=aws_update_tag, ) @@ -133,61 +117,40 @@ def load_instance_patches( current_aws_account_id: str, aws_update_tag: int, ) -> None: - ingest_query = """ - UNWIND $InstancePatch AS patch - MERGE (p:SSMInstancePatch{id: patch._instance_id + "-" + patch.Title}) - ON CREATE SET p.firstseen = timestamp() - SET p.instance_id = patch._instance_id, - p.title = patch.Title, - p.kb_id = patch.KBId, - p.classification = patch.Classification, - p.severity = patch.Severity, - p.state = patch.State, - p.installed_time = patch.InstalledTime, - p.cve_ids = patch.CVEIds, - p.region = $Region, - p.lastupdated = $aws_update_tag - WITH p - MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID}) - MERGE (owner)-[r:RESOURCE]->(p) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $aws_update_tag - WITH p - MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(ec2_instance:EC2Instance{id: p.instance_id}) - MERGE (ec2_instance)-[r2:HAS_PATCH]->(p) - ON CREATE SET r2.firstseen = timestamp() - SET r2.lastupdated = $aws_update_tag - """ - for p in data: - p["InstalledTime"] = dict_date_to_epoch(p, "InstalledTime") - # Split the comma separated CVEIds, if they exist, and strip - # the empty string from the list if not. - p["CVEIds"] = list(filter(None, p.get("CVEIds", "").split(","))) - - neo4j_session.run( - ingest_query, - InstancePatch=data, + load( + neo4j_session, + SSMInstancePatchSchema(), + data, Region=region, - AWS_ACCOUNT_ID=current_aws_account_id, - aws_update_tag=aws_update_tag, + AWS_ID=current_aws_account_id, + lastupdated=aws_update_tag, ) @timeit -def cleanup_ssm(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None: - run_cleanup_job('aws_import_ssm_cleanup.json', neo4j_session, common_job_parameters) +def cleanup_ssm(neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]) -> None: + logger.info("Running SSM cleanup") + GraphJob.from_node_schema(SSMInstanceInformationSchema(), common_job_parameters).run(neo4j_session) + GraphJob.from_node_schema(SSMInstancePatchSchema(), common_job_parameters).run(neo4j_session) @timeit def sync( - neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str], current_aws_account_id: str, - update_tag: int, common_job_parameters: Dict, + neo4j_session: neo4j.Session, + boto3_session: boto3.session.Session, + regions: List[str], + current_aws_account_id: str, + update_tag: int, + common_job_parameters: Dict[str, Any], ) -> None: for region in regions: logger.info("Syncing SSM for region '%s' in account '%s'.", region, current_aws_account_id) instance_ids = get_instance_ids(neo4j_session, region, current_aws_account_id) data = get_instance_information(boto3_session, region, instance_ids) + data = transform_instance_information(data) load_instance_information(neo4j_session, data, region, current_aws_account_id, update_tag) + data = get_instance_patches(boto3_session, region, instance_ids) + data = transform_instance_patches(data) load_instance_patches(neo4j_session, data, region, current_aws_account_id, update_tag) cleanup_ssm(neo4j_session, common_job_parameters) diff --git a/cartography/models/aws/ssm/__init__.py b/cartography/models/aws/ssm/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cartography/models/aws/ssm/instance_information.py b/cartography/models/aws/ssm/instance_information.py new file mode 100644 index 000000000..b678ebe30 --- /dev/null +++ b/cartography/models/aws/ssm/instance_information.py @@ -0,0 +1,82 @@ +from dataclasses import dataclass + +from cartography.models.core.common import PropertyRef +from cartography.models.core.nodes import CartographyNodeProperties +from cartography.models.core.nodes import CartographyNodeSchema +from cartography.models.core.relationships import CartographyRelProperties +from cartography.models.core.relationships import CartographyRelSchema +from cartography.models.core.relationships import LinkDirection +from cartography.models.core.relationships import make_target_node_matcher +from cartography.models.core.relationships import OtherRelationships +from cartography.models.core.relationships import TargetNodeMatcher + + +@dataclass(frozen=True) +class SSMInstanceInformationNodeProperties(CartographyNodeProperties): + id: PropertyRef = PropertyRef('InstanceId') + instance_id: PropertyRef = PropertyRef('InstanceId', extra_index=True) + region: PropertyRef = PropertyRef('Region', set_in_kwargs=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + ping_status: PropertyRef = PropertyRef('PingStatus') + last_ping_date_time: PropertyRef = PropertyRef('LastPingDateTime') + agent_version: PropertyRef = PropertyRef('AgentVersion') + is_latest_version: PropertyRef = PropertyRef('IsLatestVersion') + platform_type: PropertyRef = PropertyRef('PlatformType') + platform_name: PropertyRef = PropertyRef('PlatformName') + platform_version: PropertyRef = PropertyRef('PlatformVersion') + activation_id: PropertyRef = PropertyRef('ActivationId') + iam_role: PropertyRef = PropertyRef('IamRole') + registration_date: PropertyRef = PropertyRef('RegistrationDate') + resource_type: PropertyRef = PropertyRef('ResourceType') + name: PropertyRef = PropertyRef('Name') + ip_address: PropertyRef = PropertyRef('IPAddress') + computer_name: PropertyRef = PropertyRef('ComputerName') + association_status: PropertyRef = PropertyRef('AssociationStatus') + last_association_execution_date: PropertyRef = PropertyRef('LastAssociationExecutionDate') + last_successful_association_execution_date: PropertyRef = PropertyRef('LastSuccessfulAssociationExecutionDate') + source_id: PropertyRef = PropertyRef('SourceId') + source_type: PropertyRef = PropertyRef('SourceType') + + +@dataclass(frozen=True) +class SSMInstanceInformationToAWSAccountRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class SSMInstanceInformationToAWSAccount(CartographyRelSchema): + target_node_label: str = 'AWSAccount' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('AWS_ID', set_in_kwargs=True)}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RESOURCE" + properties: SSMInstanceInformationToAWSAccountRelProperties = SSMInstanceInformationToAWSAccountRelProperties() + + +@dataclass(frozen=True) +class SSMInstanceInformationToEC2InstanceRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class SSMInstanceInformationToEC2Instance(CartographyRelSchema): + target_node_label: str = 'EC2Instance' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('InstanceId')}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "HAS_INFORMATION" + properties: SSMInstanceInformationToEC2InstanceRelProperties = SSMInstanceInformationToEC2InstanceRelProperties() + + +@dataclass(frozen=True) +class SSMInstanceInformationSchema(CartographyNodeSchema): + label: str = 'SSMInstanceInformation' + properties: SSMInstanceInformationNodeProperties = SSMInstanceInformationNodeProperties() + sub_resource_relationship: SSMInstanceInformationToAWSAccount = SSMInstanceInformationToAWSAccount() + other_relationships: OtherRelationships = OtherRelationships( + [ + SSMInstanceInformationToEC2Instance(), + ], + ) diff --git a/cartography/models/aws/ssm/instance_patch.py b/cartography/models/aws/ssm/instance_patch.py new file mode 100644 index 000000000..25686f5b8 --- /dev/null +++ b/cartography/models/aws/ssm/instance_patch.py @@ -0,0 +1,70 @@ +from dataclasses import dataclass + +from cartography.models.core.common import PropertyRef +from cartography.models.core.nodes import CartographyNodeProperties +from cartography.models.core.nodes import CartographyNodeSchema +from cartography.models.core.relationships import CartographyRelProperties +from cartography.models.core.relationships import CartographyRelSchema +from cartography.models.core.relationships import LinkDirection +from cartography.models.core.relationships import make_target_node_matcher +from cartography.models.core.relationships import OtherRelationships +from cartography.models.core.relationships import TargetNodeMatcher + + +@dataclass(frozen=True) +class SSMInstancePatchNodeProperties(CartographyNodeProperties): + id: PropertyRef = PropertyRef('Id') + instance_id: PropertyRef = PropertyRef('_instance_id', extra_index=True) + region: PropertyRef = PropertyRef('Region', set_in_kwargs=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + title: PropertyRef = PropertyRef('Title', extra_index=True) + kb_id: PropertyRef = PropertyRef('KBId', extra_index=True) + classification: PropertyRef = PropertyRef('Classification') + severity: PropertyRef = PropertyRef('Severity') + state: PropertyRef = PropertyRef('State') + installed_time: PropertyRef = PropertyRef('InstalledTime') + cve_ids: PropertyRef = PropertyRef('CVEIds') + + +@dataclass(frozen=True) +class SSMInstancePatchToAWSAccountRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class SSMInstancePatchToAWSAccount(CartographyRelSchema): + target_node_label: str = 'AWSAccount' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('AWS_ID', set_in_kwargs=True)}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RESOURCE" + properties: SSMInstancePatchToAWSAccountRelProperties = SSMInstancePatchToAWSAccountRelProperties() + + +@dataclass(frozen=True) +class SSMInstancePatchToEC2InstanceRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class SSMInstancePatchToEC2Instance(CartographyRelSchema): + target_node_label: str = 'EC2Instance' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('_instance_id')}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "HAS_PATCH" + properties: SSMInstancePatchToEC2InstanceRelProperties = SSMInstancePatchToEC2InstanceRelProperties() + + +@dataclass(frozen=True) +class SSMInstancePatchSchema(CartographyNodeSchema): + label: str = 'SSMInstancePatch' + properties: SSMInstancePatchNodeProperties = SSMInstancePatchNodeProperties() + sub_resource_relationship: SSMInstancePatchToAWSAccount = SSMInstancePatchToAWSAccount() + other_relationships: OtherRelationships = OtherRelationships( + [ + SSMInstancePatchToEC2Instance(), + ], + ) diff --git a/tests/integration/cartography/intel/aws/test_ssm.py b/tests/integration/cartography/intel/aws/test_ssm.py index a27e1a3f4..8b31471e7 100644 --- a/tests/integration/cartography/intel/aws/test_ssm.py +++ b/tests/integration/cartography/intel/aws/test_ssm.py @@ -28,13 +28,16 @@ def _ensure_load_instances(neo4j_session): @patch.object(cartography.intel.aws.ec2.instances, 'get_ec2_instances', return_value=DESCRIBE_INSTANCES['Reservations']) def test_load_instance_information(mock_get_instances, neo4j_session): + # Arrange # load account and instances, to be able to test relationships create_test_account(neo4j_session, TEST_ACCOUNT_ID, TEST_UPDATE_TAG) _ensure_load_instances(neo4j_session) + # Act + data_list = cartography.intel.aws.ssm.transform_instance_information(tests.data.aws.ssm.INSTANCE_INFORMATION) cartography.intel.aws.ssm.load_instance_information( neo4j_session, - tests.data.aws.ssm.INSTANCE_INFORMATION, + data_list, TEST_REGION, TEST_ACCOUNT_ID, TEST_UPDATE_TAG, @@ -84,15 +87,17 @@ def test_load_instance_information(mock_get_instances, neo4j_session): assert actual_nodes == {"i-02"} -def test_load_instance_patches(neo4j_session): +@patch.object(cartography.intel.aws.ec2.instances, 'get_ec2_instances', return_value=DESCRIBE_INSTANCES['Reservations']) +def test_load_instance_patches(mock_get_instances, neo4j_session): # Arrange: load account and instances, to be able to test relationships create_test_account(neo4j_session, TEST_ACCOUNT_ID, TEST_UPDATE_TAG) _ensure_load_instances(neo4j_session) # Act + data_list = cartography.intel.aws.ssm.transform_instance_patches(tests.data.aws.ssm.INSTANCE_PATCHES) cartography.intel.aws.ssm.load_instance_patches( neo4j_session, - tests.data.aws.ssm.INSTANCE_PATCHES, + data_list, TEST_REGION, TEST_ACCOUNT_ID, TEST_UPDATE_TAG,