Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#1216: AWS SSM->new data model #1217

Merged
merged 5 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions cartography/data/indexes.cypher
Original file line number Diff line number Diff line change
Expand Up @@ -321,10 +321,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:SpotlightVulnerability) ON (n.host_info_local_
CREATE INDEX IF NOT EXISTS FOR (n:SpotlightVulnerability) ON (n.lastupdated);
CREATE INDEX IF NOT EXISTS FOR (n:SQSQueue) ON (n.id);
CREATE INDEX IF NOT EXISTS FOR (n:SQSQueue) ON (n.lastupdated);
CREATE INDEX IF NOT EXISTS FOR (n:SSMInstanceInformation) ON (n.id);
Copy link
Contributor Author

@achantavy achantavy Jul 15, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No longer need this in the file since these are now automatically handled

CREATE INDEX IF NOT EXISTS FOR (n:SSMInstanceInformation) ON (n.lastupdated);
CREATE INDEX IF NOT EXISTS FOR (n:SSMInstancePatch) ON (n.id);
CREATE INDEX IF NOT EXISTS FOR (n:SSMInstancePatch) ON (n.lastupdated);
CREATE INDEX IF NOT EXISTS FOR (n:User) ON (n.arn);
CREATE INDEX IF NOT EXISTS FOR (n:User) ON (n.lastupdated);
CREATE INDEX IF NOT EXISTS FOR (n:AzureTenant) ON (n.id);
Expand Down
25 changes: 0 additions & 25 deletions cartography/data/jobs/cleanup/aws_import_ssm_cleanup.json

This file was deleted.

145 changes: 54 additions & 91 deletions cartography/intel/aws/ssm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
import boto3
import neo4j

from cartography.client.core.tx import load
from cartography.graph.job import GraphJob
from cartography.models.aws.ssm.instance_information import SSMInstanceInformationSchema
from cartography.models.aws.ssm.instance_patch import SSMInstancePatchSchema
from cartography.util import aws_handle_regions
from cartography.util import dict_date_to_epoch
from cartography.util import run_cleanup_job
from cartography.util import timeit

logger = logging.getLogger(__name__)
Expand All @@ -31,7 +34,9 @@ def get_instance_ids(neo4j_session: neo4j.Session, region: str, current_aws_acco
@timeit
@aws_handle_regions
def get_instance_information(
boto3_session: boto3.session.Session, region: str, instance_ids: List[str],
boto3_session: boto3.session.Session,
region: str,
instance_ids: List[str],
) -> List[Dict[str, Any]]:
client = boto3_session.client('ssm', region_name=region)
instance_information: List[Dict[str, Any]] = []
Expand All @@ -46,10 +51,21 @@ def get_instance_information(
return instance_information


def transform_instance_information(data_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
for ii in data_list:
ii["LastPingDateTime"] = dict_date_to_epoch(ii, "LastPingDateTime")
ii["RegistrationDate"] = dict_date_to_epoch(ii, "RegistrationDate")
ii["LastAssociationExecutionDate"] = dict_date_to_epoch(ii, "LastAssociationExecutionDate")
ii["LastSuccessfulAssociationExecutionDate"] = dict_date_to_epoch(ii, "LastSuccessfulAssociationExecutionDate")
return data_list


@timeit
@aws_handle_regions
def get_instance_patches(
boto3_session: boto3.session.Session, region: str, instance_ids: List[str],
boto3_session: boto3.session.Session,
region: str,
instance_ids: List[str],
) -> List[Dict[str, Any]]:
client = boto3_session.client('ssm', region_name=region)
instance_patches: List[Dict[str, Any]] = []
Expand All @@ -65,6 +81,16 @@ def get_instance_patches(
return instance_patches


def transform_instance_patches(data_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
for p in data_list:
p["Id"] = f"{p['_instance_id']}-{p['Title']}"
Copy link
Contributor

@ramonpetgrave64 ramonpetgrave64 Jul 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nonblocker:
The title can have hyphen and spaces. I think we should use the kbid instead.

Suggested change
p["Id"] = f"{p['_instance_id']}-{p['Title']}"
p["Id"] = f"{p['_instance_id']}/{p['KBId']}"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Non-blocker: we could move the somewhat out-of-band transformation on line 79 to a loop within sync().

  • patch["_instance_id"] = instance_id

to something like

...
for instance_id in instance_ids:
  data = transform_instance_patches(instance_id, data)
  load_instance_patches(neo4j_session, data, region, current_aws_account_id, update_tag)
...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The old way of forming an ID used this way. Let's keep it the same so that this is a refactor that doesn't change something important like an id.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the out-of-band thing, good idea, we can include that later on

p["InstalledTime"] = dict_date_to_epoch(p, "InstalledTime")
# Split the comma separated CVEIds, if they exist, and strip
# the empty string from the list if not.
p["CVEIds"] = list(filter(None, p.get("CVEIds", "").split(",")))
return data_list


@timeit
def load_instance_information(
neo4j_session: neo4j.Session,
Expand All @@ -73,55 +99,13 @@ def load_instance_information(
current_aws_account_id: str,
aws_update_tag: int,
) -> None:
ingest_query = """
UNWIND $InstanceInformation AS instance
MERGE (i:SSMInstanceInformation{id: instance.InstanceId})
ON CREATE SET i.firstseen = timestamp()
SET i.instance_id = instance.InstanceId,
i.ping_status = instance.PingStatus,
i.last_ping_date_time = instance.LastPingDateTime,
i.agent_version = instance.AgentVersion,
i.is_latest_version = instance.IsLatestVersion,
i.platform_type = instance.PlatformType,
i.platform_name = instance.PlatformName,
i.platform_version = instance.PlatformVersion,
i.activation_id = instance.ActivationId,
i.iam_role = instance.IamRole,
i.registration_date = instance.RegistrationDate,
i.resource_type = instance.ResourceType,
i.name = instance.Name,
i.ip_address = instance.IPAddress,
i.computer_name = instance.ComputerName,
i.association_status = instance.AssociationStatus,
i.last_association_execution_date = instance.LastAssociationExecutionDate,
i.last_successful_association_execution_date = instance.LastSuccessfulAssociationExecutionDate,
i.source_id = instance.SourceId,
i.source_type = instance.SourceType,
i.region = $Region,
i.lastupdated = $aws_update_tag
WITH i
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})
MERGE (owner)-[r:RESOURCE]->(i)
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $aws_update_tag
WITH i
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(ec2_instance:EC2Instance{id: i.instance_id})
MERGE (ec2_instance)-[r2:HAS_INFORMATION]->(i)
ON CREATE SET r2.firstseen = timestamp()
SET r2.lastupdated = $aws_update_tag
"""
for ii in data:
ii["LastPingDateTime"] = dict_date_to_epoch(ii, "LastPingDateTime")
ii["RegistrationDate"] = dict_date_to_epoch(ii, "RegistrationDate")
ii["LastAssociationExecutionDate"] = dict_date_to_epoch(ii, "LastAssociationExecutionDate")
ii["LastSuccessfulAssociationExecutionDate"] = dict_date_to_epoch(ii, "LastSuccessfulAssociationExecutionDate")

neo4j_session.run(
ingest_query,
InstanceInformation=data,
load(
neo4j_session,
SSMInstanceInformationSchema(),
data,
Region=region,
AWS_ACCOUNT_ID=current_aws_account_id,
aws_update_tag=aws_update_tag,
AWS_ID=current_aws_account_id,
lastupdated=aws_update_tag,
)


Expand All @@ -133,61 +117,40 @@ def load_instance_patches(
current_aws_account_id: str,
aws_update_tag: int,
) -> None:
ingest_query = """
UNWIND $InstancePatch AS patch
MERGE (p:SSMInstancePatch{id: patch._instance_id + "-" + patch.Title})
ON CREATE SET p.firstseen = timestamp()
SET p.instance_id = patch._instance_id,
p.title = patch.Title,
p.kb_id = patch.KBId,
p.classification = patch.Classification,
p.severity = patch.Severity,
p.state = patch.State,
p.installed_time = patch.InstalledTime,
p.cve_ids = patch.CVEIds,
p.region = $Region,
p.lastupdated = $aws_update_tag
WITH p
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})
MERGE (owner)-[r:RESOURCE]->(p)
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $aws_update_tag
WITH p
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(ec2_instance:EC2Instance{id: p.instance_id})
MERGE (ec2_instance)-[r2:HAS_PATCH]->(p)
ON CREATE SET r2.firstseen = timestamp()
SET r2.lastupdated = $aws_update_tag
"""
for p in data:
p["InstalledTime"] = dict_date_to_epoch(p, "InstalledTime")
# Split the comma separated CVEIds, if they exist, and strip
# the empty string from the list if not.
p["CVEIds"] = list(filter(None, p.get("CVEIds", "").split(",")))

neo4j_session.run(
ingest_query,
InstancePatch=data,
load(
neo4j_session,
SSMInstancePatchSchema(),
data,
Region=region,
AWS_ACCOUNT_ID=current_aws_account_id,
aws_update_tag=aws_update_tag,
AWS_ID=current_aws_account_id,
lastupdated=aws_update_tag,
)


@timeit
def cleanup_ssm(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
run_cleanup_job('aws_import_ssm_cleanup.json', neo4j_session, common_job_parameters)
def cleanup_ssm(neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]) -> None:
logger.info("Running SSM cleanup")
GraphJob.from_node_schema(SSMInstanceInformationSchema(), common_job_parameters).run(neo4j_session)
GraphJob.from_node_schema(SSMInstancePatchSchema(), common_job_parameters).run(neo4j_session)


@timeit
def sync(
neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str], current_aws_account_id: str,
update_tag: int, common_job_parameters: Dict,
neo4j_session: neo4j.Session,
boto3_session: boto3.session.Session,
regions: List[str],
current_aws_account_id: str,
update_tag: int,
common_job_parameters: Dict[str, Any],
) -> None:
for region in regions:
logger.info("Syncing SSM for region '%s' in account '%s'.", region, current_aws_account_id)
instance_ids = get_instance_ids(neo4j_session, region, current_aws_account_id)
data = get_instance_information(boto3_session, region, instance_ids)
data = transform_instance_information(data)
load_instance_information(neo4j_session, data, region, current_aws_account_id, update_tag)

data = get_instance_patches(boto3_session, region, instance_ids)
data = transform_instance_patches(data)
load_instance_patches(neo4j_session, data, region, current_aws_account_id, update_tag)
cleanup_ssm(neo4j_session, common_job_parameters)
Empty file.
82 changes: 82 additions & 0 deletions cartography/models/aws/ssm/instance_information.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from dataclasses import dataclass

from cartography.models.core.common import PropertyRef
from cartography.models.core.nodes import CartographyNodeProperties
from cartography.models.core.nodes import CartographyNodeSchema
from cartography.models.core.relationships import CartographyRelProperties
from cartography.models.core.relationships import CartographyRelSchema
from cartography.models.core.relationships import LinkDirection
from cartography.models.core.relationships import make_target_node_matcher
from cartography.models.core.relationships import OtherRelationships
from cartography.models.core.relationships import TargetNodeMatcher


@dataclass(frozen=True)
class SSMInstanceInformationNodeProperties(CartographyNodeProperties):
id: PropertyRef = PropertyRef('InstanceId')
instance_id: PropertyRef = PropertyRef('InstanceId', extra_index=True)
region: PropertyRef = PropertyRef('Region', set_in_kwargs=True)
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
ping_status: PropertyRef = PropertyRef('PingStatus')
last_ping_date_time: PropertyRef = PropertyRef('LastPingDateTime')
agent_version: PropertyRef = PropertyRef('AgentVersion')
is_latest_version: PropertyRef = PropertyRef('IsLatestVersion')
platform_type: PropertyRef = PropertyRef('PlatformType')
platform_name: PropertyRef = PropertyRef('PlatformName')
platform_version: PropertyRef = PropertyRef('PlatformVersion')
activation_id: PropertyRef = PropertyRef('ActivationId')
iam_role: PropertyRef = PropertyRef('IamRole')
registration_date: PropertyRef = PropertyRef('RegistrationDate')
resource_type: PropertyRef = PropertyRef('ResourceType')
name: PropertyRef = PropertyRef('Name')
ip_address: PropertyRef = PropertyRef('IPAddress')
computer_name: PropertyRef = PropertyRef('ComputerName')
association_status: PropertyRef = PropertyRef('AssociationStatus')
last_association_execution_date: PropertyRef = PropertyRef('LastAssociationExecutionDate')
last_successful_association_execution_date: PropertyRef = PropertyRef('LastSuccessfulAssociationExecutionDate')
source_id: PropertyRef = PropertyRef('SourceId')
source_type: PropertyRef = PropertyRef('SourceType')


@dataclass(frozen=True)
class SSMInstanceInformationToAWSAccountRelProperties(CartographyRelProperties):
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)


@dataclass(frozen=True)
class SSMInstanceInformationToAWSAccount(CartographyRelSchema):
target_node_label: str = 'AWSAccount'
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
{'id': PropertyRef('AWS_ID', set_in_kwargs=True)},
)
direction: LinkDirection = LinkDirection.INWARD
rel_label: str = "RESOURCE"
properties: SSMInstanceInformationToAWSAccountRelProperties = SSMInstanceInformationToAWSAccountRelProperties()


@dataclass(frozen=True)
class SSMInstanceInformationToEC2InstanceRelProperties(CartographyRelProperties):
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)


@dataclass(frozen=True)
class SSMInstanceInformationToEC2Instance(CartographyRelSchema):
target_node_label: str = 'EC2Instance'
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
{'id': PropertyRef('InstanceId')},
)
direction: LinkDirection = LinkDirection.INWARD
rel_label: str = "HAS_INFORMATION"
properties: SSMInstanceInformationToEC2InstanceRelProperties = SSMInstanceInformationToEC2InstanceRelProperties()


@dataclass(frozen=True)
class SSMInstanceInformationSchema(CartographyNodeSchema):
label: str = 'SSMInstanceInformation'
properties: SSMInstanceInformationNodeProperties = SSMInstanceInformationNodeProperties()
sub_resource_relationship: SSMInstanceInformationToAWSAccount = SSMInstanceInformationToAWSAccount()
other_relationships: OtherRelationships = OtherRelationships(
[
SSMInstanceInformationToEC2Instance(),
],
)
70 changes: 70 additions & 0 deletions cartography/models/aws/ssm/instance_patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from dataclasses import dataclass

from cartography.models.core.common import PropertyRef
from cartography.models.core.nodes import CartographyNodeProperties
from cartography.models.core.nodes import CartographyNodeSchema
from cartography.models.core.relationships import CartographyRelProperties
from cartography.models.core.relationships import CartographyRelSchema
from cartography.models.core.relationships import LinkDirection
from cartography.models.core.relationships import make_target_node_matcher
from cartography.models.core.relationships import OtherRelationships
from cartography.models.core.relationships import TargetNodeMatcher


@dataclass(frozen=True)
class SSMInstancePatchNodeProperties(CartographyNodeProperties):
id: PropertyRef = PropertyRef('Id')
instance_id: PropertyRef = PropertyRef('_instance_id', extra_index=True)
region: PropertyRef = PropertyRef('Region', set_in_kwargs=True)
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
title: PropertyRef = PropertyRef('Title', extra_index=True)
kb_id: PropertyRef = PropertyRef('KBId', extra_index=True)
classification: PropertyRef = PropertyRef('Classification')
severity: PropertyRef = PropertyRef('Severity')
state: PropertyRef = PropertyRef('State')
installed_time: PropertyRef = PropertyRef('InstalledTime')
cve_ids: PropertyRef = PropertyRef('CVEIds')


@dataclass(frozen=True)
class SSMInstancePatchToAWSAccountRelProperties(CartographyRelProperties):
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)


@dataclass(frozen=True)
class SSMInstancePatchToAWSAccount(CartographyRelSchema):
target_node_label: str = 'AWSAccount'
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
{'id': PropertyRef('AWS_ID', set_in_kwargs=True)},
)
direction: LinkDirection = LinkDirection.INWARD
rel_label: str = "RESOURCE"
properties: SSMInstancePatchToAWSAccountRelProperties = SSMInstancePatchToAWSAccountRelProperties()


@dataclass(frozen=True)
class SSMInstancePatchToEC2InstanceRelProperties(CartographyRelProperties):
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)


@dataclass(frozen=True)
class SSMInstancePatchToEC2Instance(CartographyRelSchema):
target_node_label: str = 'EC2Instance'
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
{'id': PropertyRef('_instance_id')},
)
direction: LinkDirection = LinkDirection.INWARD
rel_label: str = "HAS_PATCH"
properties: SSMInstancePatchToEC2InstanceRelProperties = SSMInstancePatchToEC2InstanceRelProperties()


@dataclass(frozen=True)
class SSMInstancePatchSchema(CartographyNodeSchema):
label: str = 'SSMInstancePatch'
properties: SSMInstancePatchNodeProperties = SSMInstancePatchNodeProperties()
sub_resource_relationship: SSMInstancePatchToAWSAccount = SSMInstancePatchToAWSAccount()
other_relationships: OtherRelationships = OtherRelationships(
[
SSMInstancePatchToEC2Instance(),
],
)
Loading
Loading