Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Iblsort #811

Open
wants to merge 30 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
8b8e8b2
remove deprecated spike sorting task
oliche Jul 16, 2024
856c79e
flake
oliche Jul 16, 2024
4d1f1cd
add pointer to the lock file when the task fails
oliche Jul 16, 2024
9944e25
spike sorting loader uses iblsorter as default collection
oliche Jul 17, 2024
ca975bd
set the waveform extraction chunk size to 30_000
oliche Jul 17, 2024
b6cf506
bugfix: reflect changes in arguments of waveform extraction
oliche Jul 19, 2024
d7b9add
assertion reversed fixed
oliche Jul 19, 2024
2a018a6
change entrypoint for spike sorting script
oliche Jul 19, 2024
b07f438
remove ephys cell qc task from pipeline as it is part of spike sorting
oliche Jul 19, 2024
431db81
add iblsort environment to the spike sorting task
oliche Jul 19, 2024
d1aac3f
typo
Jul 26, 2024
696ff01
test forcing subprocess for large_jobs
Jul 29, 2024
81481d0
Revert "test forcing subprocess for large_jobs"
Jul 30, 2024
b803a3e
Merge branch 'aggregate_training' into iblsort
mayofaulkner Sep 10, 2024
40f0b8b
Merge branch 'aggregate_training' into iblsort
mayofaulkner Sep 10, 2024
b9651e2
label probe qc if ONE instance
mayofaulkner Sep 11, 2024
12b722c
add passingSpikes.pqt to spike sorting job - update task signature
oliche Sep 12, 2024
1099989
configure task to decompress cbin beforehand
oliche Sep 17, 2024
dbc3143
Merge branch 'aggregate_training' into iblsort
mayofaulkner Sep 18, 2024
b9ac1c4
ensure camera times have correct length for plotting
mayofaulkner Sep 24, 2024
080b148
ensure qc is updated on alyx
mayofaulkner Sep 24, 2024
b07f792
populate qc with string
mayofaulkner Sep 24, 2024
598aeeb
account for tuple qc results
mayofaulkner Sep 24, 2024
6470b5e
account for no camera times
mayofaulkner Sep 24, 2024
87ccc96
pass in output_files to assert_expected
mayofaulkner Sep 24, 2024
29ca0c0
update wiring signature
mayofaulkner Sep 25, 2024
77f2e97
add meta file
mayofaulkner Sep 25, 2024
7dc070f
Merge remote-tracking branch 'origin/develop' into iblsort
oliche Sep 26, 2024
637c1fb
add motion energy and wheel to postdlc signatures
mayofaulkner Sep 26, 2024
067c1e7
Merge branch 'camera_fixes' into iblsort
mayofaulkner Sep 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions brainbox/io/one.py
Original file line number Diff line number Diff line change
Expand Up @@ -866,13 +866,18 @@ def _get_attributes(dataset_types):
waveform_attributes = list(set(WAVEFORMS_ATTRIBUTES + waveform_attributes))
return {'spikes': spike_attributes, 'clusters': cluster_attributes, 'waveforms': waveform_attributes}

def _get_spike_sorting_collection(self, spike_sorter='pykilosort'):
def _get_spike_sorting_collection(self, spike_sorter=None):
"""
Filters a list or array of collections to get the relevant spike sorting dataset
if there is a pykilosort, load it
"""
collection = next(filter(lambda c: c == f'alf/{self.pname}/{spike_sorter}', self.collections), None)
# otherwise, prefers the shortest
for sorter in list([spike_sorter, 'iblsorter', 'pykilosort']):
if sorter is None:
continue
collection = next(filter(lambda c: c == f'alf/{self.pname}/{sorter}', self.collections), None)
if collection is not None:
return collection
# if none is found amongst the defaults, prefers the shortest
collection = collection or next(iter(sorted(filter(lambda c: f'alf/{self.pname}' in c, self.collections), key=len)), None)
_logger.debug(f"selecting: {collection} to load amongst candidates: {self.collections}")
return collection
Expand Down
2 changes: 1 addition & 1 deletion ibllib/ephys/sync_probes.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def sync(ses_path, **kwargs):
return version3B(ses_path, **kwargs)


def version3A(ses_path, display=True, type='smooth', tol=2.1):
def version3A(ses_path, display=True, type='smooth', tol=2.1, probe_names=None):
"""
From a session path with _spikeglx_sync arrays extracted, locate ephys files for 3A and
outputs one sync.timestamps.probeN.npy file per acquired probe. By convention the reference
Expand Down
9 changes: 8 additions & 1 deletion ibllib/io/extractors/video_motion.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,6 @@ def fix_keys(alf_object):
# Compute wheel velocity
self.wheel_vel, _ = wh.velocity_filtered(wheel_pos, 1000)
# Load in original camera times
self.camera_times = alfio.load_file_content(next(alf_path.rglob(f'_ibl_{self.label}Camera.times*.npy')))
self.camera_path = str(next(self.session_path.joinpath('raw_video_data').glob(f'_iblrig_{self.label}Camera.raw*.mp4')))
self.camera_meta = vidio.get_video_meta(self.camera_path)

Expand All @@ -461,17 +460,25 @@ def fix_keys(alf_object):
# Check if the ttl and video sizes match up
self.tdiff = self.ttls.size - self.camera_meta['length']

# Load in original camera times if available otherwise set to ttls
camera_times = next(alf_path.rglob(f'_ibl_{self.label}Camera.times*.npy'), None)
self.camera_times = alfio.load_file_content(camera_times) if camera_times else self.ttls

if self.tdiff < 0:
# In this case there are fewer ttls than camera frames. This is not ideal, for now we pad the ttls with
# nans but if this is too many we reject the wheel alignment based on the qc
self.ttl_times = self.ttls
self.times = np.r_[self.ttl_times, np.full((np.abs(self.tdiff)), np.nan)]
if self.camera_times.size != self.camera_meta['length']:
self.camera_times = np.r_[self.camera_times, np.full((np.abs(self.tdiff)), np.nan)]
self.short_flag = True
elif self.tdiff > 0:
# In this case there are more ttls than camera frames. This happens often, for now we remove the first
# tdiff ttls from the ttls
self.ttl_times = self.ttls[self.tdiff:]
self.times = self.ttls[self.tdiff:]
if self.camera_times.size != self.camera_meta['length']:
self.camera_times = self.camera_times[self.tdiff:]
self.short_flag = False

# Compute the frame rate of the camera
Expand Down
2 changes: 0 additions & 2 deletions ibllib/pipes/dynamic_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,8 +490,6 @@ def make_pipeline(session_path, **pkwargs):

tasks[f'RawEphysQC_{pname}'] = type(f'RawEphysQC_{pname}', (etasks.RawEphysQC,), {})(
**kwargs, **ephys_kwargs, pname=pname, parents=register_task)
tasks[f'EphysCellQC_{pname}'] = type(f'EphysCellQC_{pname}', (etasks.EphysCellsQc,), {})(
**kwargs, **ephys_kwargs, pname=pname, parents=[tasks[f'Spikesorting_{pname}']])

# Video tasks
if 'cameras' in devices:
Expand Down
210 changes: 125 additions & 85 deletions ibllib/pipes/ephys_tasks.py

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions ibllib/pipes/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def run(self, **kwargs):
if self.gpu >= 1:
if not self._creates_lock():
self.status = -2
_logger.info(f'Job {self.__class__} exited as a lock was found')
_logger.info(f'Job {self.__class__} exited as a lock was found at {self._lock_file_path()}')
new_log = log_capture_string.getvalue()
self.log = new_log if self.clobber else self.log + new_log
_logger.removeHandler(ch)
Expand Down Expand Up @@ -425,7 +425,7 @@ def assert_expected_outputs(self, raise_error=True):

return everything_is_fine, files

def assert_expected_inputs(self, raise_error=True):
def assert_expected_inputs(self, raise_error=True, raise_ambiguous=False):
"""
Check that all the files necessary to run the task have been are present on disk.

Expand Down Expand Up @@ -460,7 +460,7 @@ def assert_expected_inputs(self, raise_error=True):
for k, v in variant_datasets.items() if any(v)}
_logger.error('Ambiguous input datasets found: %s', ambiguous)

if raise_error or self.location == 'sdsc': # take no chances on SDSC
if raise_ambiguous or self.location == 'sdsc': # take no chances on SDSC
# This could be mitigated if loading with data OneSDSC
raise NotImplementedError(
'Multiple variant datasets found. Loading for these is undefined.')
Expand Down
18 changes: 12 additions & 6 deletions ibllib/pipes/video_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,8 @@ def run_qc(self, camera_data=None, update=True):
if camera_data is None:
camera_data, _ = self.extract_camera(save=False)
qc = run_camera_qc(
self.session_path, self.cameras, one=self.one, camlog=True, sync_collection=self.sync_collection, sync_type=self.sync)
self.session_path, self.cameras, one=self.one, camlog=True, sync_collection=self.sync_collection, sync_type=self.sync,
update=update)
return qc

def _run(self, update=True, **kwargs):
Expand Down Expand Up @@ -284,7 +285,8 @@ def signature(self):
[(f'_{self.sync_namespace}_sync.channels.npy', self.sync_collection, True),
(f'_{self.sync_namespace}_sync.polarities.npy', self.sync_collection, True),
(f'_{self.sync_namespace}_sync.times.npy', self.sync_collection, True),
('*.wiring.json', self.sync_collection, True),
(f'_{self.sync_namespace}_*.wiring.json', self.sync_collection, False),
(f'_{self.sync_namespace}_*.meta', self.sync_collection, True),
('*wheel.position.npy', 'alf', False),
('*wheel.timestamps.npy', 'alf', False),
('*experiment.description*', '', False)],
Expand All @@ -308,7 +310,8 @@ def run_qc(self, camera_data=None, update=True):
if camera_data is None:
camera_data, _ = self.extract_camera(save=False)
qc = run_camera_qc(
self.session_path, self.cameras, one=self.one, sync_collection=self.sync_collection, sync_type=self.sync)
self.session_path, self.cameras, one=self.one, sync_collection=self.sync_collection, sync_type=self.sync,
update=update)
return qc

def _run(self, update=True, **kwargs):
Expand Down Expand Up @@ -347,7 +350,7 @@ def signature(self):
'input_files': [(f'_iblrig_{cam}Camera.raw.mp4', self.device_collection, True) for cam in self.cameras],
'output_files': [(f'_ibl_{cam}Camera.dlc.pqt', 'alf', True) for cam in self.cameras] +
[(f'{cam}Camera.ROIMotionEnergy.npy', 'alf', True) for cam in self.cameras] +
[(f'{cam}ROIMotionEnergy.position.npy', 'alf', True)for cam in self.cameras]
[(f'{cam}ROIMotionEnergy.position.npy', 'alf', True) for cam in self.cameras]
}

return signature
Expand Down Expand Up @@ -504,8 +507,11 @@ def signature(self):
# In particular the raw videos don't need to be downloaded as they can be streamed
[(f'_iblrig_{cam}Camera.raw.mp4', self.device_collection, True) for cam in self.cameras] +
[(f'{cam}ROIMotionEnergy.position.npy', 'alf', False) for cam in self.cameras] +
[(f'{cam}Camera.ROIMotionEnergy.npy', 'alf', False) for cam in self.cameras] +
# The trials table is used in the DLC QC, however this is not an essential dataset
[('_ibl_trials.table.pqt', self.trials_collection, False)],
[('_ibl_trials.table.pqt', self.trials_collection, False),
('_ibl_wheel.position.npy', self.trials_collection, False),
('_ibl_wheel.timestamps.npy', self.trials_collection, False)],
'output_files': [(f'_ibl_{cam}Camera.features.pqt', 'alf', True) for cam in self.cameras] +
[('licks.times.npy', 'alf', True)]
}
Expand All @@ -522,7 +528,7 @@ def _run(self, overwrite=True, run_qc=True, plot_qc=True):

"""
# Check if output files exist locally
exist, output_files = self.assert_expected(self.signature['output_files'], silent=True)
exist, output_files = self.assert_expected(self.output_files, silent=True)
if exist and not overwrite:
_logger.warning('EphysPostDLC outputs exist and overwrite=False, skipping computations of outputs.')
else:
Expand Down
13 changes: 9 additions & 4 deletions ibllib/qc/camera.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,15 @@ def is_metric(x):
outcome = max(map(spec.QC.validate, values))

if update:
extended = {
k: spec.QC.NOT_SET if v is None else v
for k, v in self.metrics.items()
}
extended = dict()
for k, v in self.metrics.items():
if v is None:
extended[k] = spec.QC.NOT_SET.name
elif isinstance(v, tuple):
extended[k] = tuple(i.name if isinstance(i, spec.QC) else i for i in v)
else:
extended[k] = v.name

self.update_extended_qc(extended)
self.update(outcome, namespace)
return outcome, self.metrics
Expand Down
Loading