Skip to content

Commit

Permalink
Added PCIe transaction check for all peripherals on the bus (#331)
Browse files Browse the repository at this point in the history
* Added PCIe transaction check for all peripherals on the bus

This is to determine if there are any unreachable devices on the PCIe bus.
The gold list of PCI peripherals is a static file that varies depending on the platform. BDF information is parsed from this YAML file. Transcations are then carried out to determine the health of each peripheral.

* Modified code to read pcie devices from the pre-configured YAML file
Modified cmd to be executed to setpci (does pci transaction)
Added exception handling

* Formalized syslog error messages
Added specific check for ASIC missing to differentiate from device mismatch
Added yaml support

* Made changes to the way setpci command is called per SA review comments

* Added comments per prgeor review comments

* Added relevant comments per prgeor review comments

* Fixed a bug that was causing UnboundLocalError.

* Added unit tests for the check_pcie_devices method
  • Loading branch information
assrinivasan committed Jul 12, 2023
1 parent 432602a commit d73808c
Show file tree
Hide file tree
Showing 2 changed files with 184 additions and 7 deletions.
53 changes: 53 additions & 0 deletions sonic-pcied/scripts/pcied
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import os
import signal
import sys
import threading
import subprocess
import yaml

from sonic_py_common import daemon_base, device_info, logger
from swsscommon import swsscommon
Expand All @@ -32,6 +34,7 @@ PCIED_MAIN_THREAD_SLEEP_SECS = 60

PCIEUTIL_CONF_FILE_ERROR = 1
PCIEUTIL_LOAD_ERROR = 2
PLATFORM_PCIE_YAML_FILE = "/usr/share/sonic/platform/pcie.yaml"

platform_pcieutil = None

Expand Down Expand Up @@ -157,6 +160,56 @@ class DaemonPcied(daemon_base.DaemonBase):
if self.resultInfo is None:
return

# This block reads the PCIE YAML file, iterates through each device and
# for each device runs a transaction check to verify that the device ID
# matches the one on file. In the event of an invalid device ID such as
# '0xffff', it marks the device as missing as that is the only scenario
# in which such a value is returned.

# Default, invalid BDF values to begin
bus = None
device = None
func = None

try:
stream = open(PLATFORM_PCIE_YAML_FILE, 'r')

# Load contents of the PCIe YAML file into a dictionary object
dictionary = yaml.safe_load(stream)

# Iterate through each element in dict; extract the BDF information and device ID for that element
for elem in dictionary:
bus = int(elem.get('bus'), 16)
device = int(elem.get('dev'), 16)
func = int(elem.get('fn'), 16)
pcieDeviceID = elem.get('id')

# Form the PCI device address from the BDF information above
pcie_device = ("{:02x}:{:02x}.{:01d}".format(bus, device, func))

# Form and run the command to read the device ID from the PCI device
cmd = ["setpci", "-s", pcie_device, "2.w"]
output = subprocess.check_output(cmd)
pcieDeviceQueryResult = output.decode('utf8').rstrip()

# verify that the queried device ID matches what we have on file for the current PCI device
# If not, take appropriate action based on the queried value
if pcieDeviceQueryResult is None or "No devices selected" in pcieDeviceQueryResult:
self.log_error("Invalid PCIe Peripheral {:02x}:{:02x}.{:01d} - {}".format(bus, device, func, pcieDeviceQueryResult))
elif pcieDeviceQueryResult != pcieDeviceID:
if pcieDeviceQueryResult == 'ffff':
self.log_error("PCIe device {:02x}:{:02x}.{:01d} missing.".format(bus, device, func))
else:
self.log_error("PCIe device {:02x}:{:02x}.{:01d} ID mismatch. Expected {}, received {}".format(bus, device, func, pcieDeviceID, pcieDeviceQueryResult))

except Exception as ex:
# Verify that none of the BDF objects are None. If condition evals to False, do not mention BDF information in error log
if not [i for i in (bus, device, func) if i is None]:
self.log_error("PCIe Exception occurred for PCIe device {:02x}:{:02x}.{:01d}: {}".format(bus, device, func, ex))
else:
self.log_error("PCIe Exception occurred: {}".format(ex))
pass

for result in self.resultInfo:
if result["result"] == "Failed":
self.log_warning("PCIe Device: " + result["name"] + " Not Found")
Expand Down
138 changes: 131 additions & 7 deletions sonic-pcied/tests/test_DaemonPcied.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,23 @@

pcie_check_result_no = []

pcie_check_result_pass = \
"""
[{'bus': '00', 'dev': '01', 'fn': '0', 'id': '1f10', 'name': 'PCI A', 'result': 'Passed'},
pcie_check_result_pass = [{'bus': '00', 'dev': '01', 'fn': '0', 'id': '1f10', 'name': 'PCI A', 'result': 'Passed'},
{'bus': '00', 'dev': '02', 'fn': '0', 'id': '1f11', 'name': 'PCI B', 'result': 'Passed'},
{'bus': '00', 'dev': '03', 'fn': '0', 'id': '1f12', 'name': 'PCI C', 'result': 'Passed'}]
"""

pcie_check_result_fail = \
"""
[{'bus': '00', 'dev': '01', 'fn': '0', 'id': '1f10', 'name': 'PCI A', 'result': 'Passed'},

pcie_check_result_fail = [{'bus': '00', 'dev': '01', 'fn': '0', 'id': '1f10', 'name': 'PCI A', 'result': 'Passed'},
{'bus': '00', 'dev': '02', 'fn': '0', 'id': '1f11', 'name': 'PCI B', 'result': 'Passed'},
{'bus': '00', 'dev': '03', 'fn': '0', 'id': '1f12', 'name': 'PCI C', 'result': 'Failed'}]


TEST_PLATFORM_PCIE_YAML_FILE = \
"""
- bus: '00'
dev: '01'
fn: '0'
id: '9170'
name: 'PCI A'
"""

class TestDaemonPcied(object):
Expand Down Expand Up @@ -143,6 +148,125 @@ def test_run(self):
daemon_pcied.run()
assert daemon_pcied.check_pcie_devices.call_count == 1

@mock.patch('pcied.load_platform_pcieutil', mock.MagicMock())
def test_check_pcie_devices_yaml_file_open_error(self):
daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER)
pcied.platform_pcieutil.get_pcie_check = mock.MagicMock()

daemon_pcied.check_pcie_devices()

assert pcied.platform_pcieutil.get_pcie_check.called


@mock.patch('pcied.load_platform_pcieutil', mock.MagicMock())
def test_check_pcie_devices_result_fail(self):
daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER)
pcied.platform_pcieutil.get_pcie_check = mock.MagicMock(return_value=pcie_check_result_fail)

daemon_pcied.check_pcie_devices()

assert pcied.platform_pcieutil.get_pcie_check.called

@mock.patch('pcied.load_platform_pcieutil', mock.MagicMock())
def test_check_pcie_devices_result_pass(self):
daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER)
pcied.platform_pcieutil.get_pcie_check = mock.MagicMock(return_value=pcie_check_result_pass)

daemon_pcied.check_pcie_devices()

assert pcied.platform_pcieutil.get_pcie_check.called

@mock.patch('pcied.load_platform_pcieutil', mock.MagicMock())
def test_check_pcie_devices_result_none(self):
daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER)
pcied.platform_pcieutil.get_pcie_check = mock.MagicMock(return_value=None)

daemon_pcied.check_pcie_devices()

assert pcied.platform_pcieutil.get_pcie_check.called

@mock.patch('pcied.load_platform_pcieutil', mock.MagicMock())
def test_check_pcie_devices_load_yaml_happy(self):
daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER)

with mock.patch('builtins.open', new_callable=mock.mock_open, read_data=TEST_PLATFORM_PCIE_YAML_FILE) as mock_fd:

class MockOutput:
def decode(self, encodingType):
return self
def rstrip(self):
return "9170"

mock_output = MockOutput()
with mock.patch('subprocess.check_output', mock.MagicMock()) as mock_check_output:
mock_check_output.return_value = mock_output

daemon_pcied.check_pcie_devices()

assert mock_check_output.called

@mock.patch('pcied.load_platform_pcieutil', mock.MagicMock())
def test_check_pcie_devices_load_yaml_mismatch(self):
daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER)

with mock.patch('builtins.open', new_callable=mock.mock_open, read_data=TEST_PLATFORM_PCIE_YAML_FILE) as mock_fd:

class MockOutput:
def decode(self, encodingType):
return self
def rstrip(self):
return "0123"

mock_output = MockOutput()
with mock.patch('subprocess.check_output', mock.MagicMock()) as mock_check_output:
mock_check_output.return_value = mock_output

daemon_pcied.check_pcie_devices()

assert mock_check_output.called

@mock.patch('pcied.load_platform_pcieutil', mock.MagicMock())
def test_check_pcie_devices_load_yaml_missing_device(self):
daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER)

with mock.patch('builtins.open', new_callable=mock.mock_open, read_data=TEST_PLATFORM_PCIE_YAML_FILE) as mock_fd:

class MockOutput:
def decode(self, encodingType):
return self
def rstrip(self):
return "ffff"

mock_output = MockOutput()
with mock.patch('subprocess.check_output', mock.MagicMock()) as mock_check_output:
mock_check_output.return_value = mock_output

daemon_pcied.check_pcie_devices()

assert mock_check_output.called

@mock.patch('pcied.load_platform_pcieutil', mock.MagicMock())
def test_check_pcie_devices_load_yaml_invalid_device(self):
daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER)

with mock.patch('builtins.open', new_callable=mock.mock_open, read_data=TEST_PLATFORM_PCIE_YAML_FILE) as mock_fd:

class MockOutput:
def decode(self, encodingType):
return self
def rstrip(self):
return "No devices selected"

mock_output = MockOutput()
with mock.patch('subprocess.check_output', mock.MagicMock()) as mock_check_output:
mock_check_output.return_value = mock_output

daemon_pcied.check_pcie_devices()

assert mock_check_output.called



@mock.patch('pcied.load_platform_pcieutil', mock.MagicMock())
def test_check_pcie_devices(self):
daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER)
Expand Down

0 comments on commit d73808c

Please sign in to comment.