diff --git a/dfdx-core/Cargo.toml b/dfdx-core/Cargo.toml index f15a4d91..0eeac1f4 100644 --- a/dfdx-core/Cargo.toml +++ b/dfdx-core/Cargo.toml @@ -38,6 +38,9 @@ half = { version = "2.3.1", optional = true, features = ["num-traits", "rand_dis gemm = { version = "0.16.14", default-features = false, optional = true, features = ["rayon"] } rayon = { version = "1.7.0", optional = true } libm = { workspace = true } +wgpu = { version = "0.18.0", optional = true } +futures-lite = { version = "2.0.1", optional = true } +thingbuf = { version = "0.1.4", optional = true } [dev-dependencies] tempfile = "3.3.0" @@ -59,6 +62,7 @@ fast-alloc = ["std"] cuda = ["dep:cudarc", "dep:glob"] cudnn = ["cuda", "cudarc?/cudnn"] +webgpu = ["dep:wgpu", "dep:futures-lite", "dep:thingbuf", "wgpu/expose-ids"] f16 = ["dep:half", "cudarc?/f16", "gemm?/f16"] diff --git a/dfdx-core/src/tensor/error.rs b/dfdx-core/src/tensor/error.rs index f6b43c32..906c474c 100644 --- a/dfdx-core/src/tensor/error.rs +++ b/dfdx-core/src/tensor/error.rs @@ -16,6 +16,12 @@ pub enum Error { #[cfg(feature = "cudnn")] CudnnError(cudarc::cudnn::CudnnError), + + #[cfg(feature = "webgpu")] + WebgpuAdapterNotFound, + + #[cfg(feature = "webgpu")] + WebgpuRequestDeviceError(wgpu::RequestDeviceError), } impl std::fmt::Display for Error { diff --git a/dfdx-core/src/tensor/mod.rs b/dfdx-core/src/tensor/mod.rs index 2a7f9db3..acc4074a 100644 --- a/dfdx-core/src/tensor/mod.rs +++ b/dfdx-core/src/tensor/mod.rs @@ -145,6 +145,8 @@ mod gradients; mod masks; #[cfg(feature = "numpy")] pub(crate) mod numpy; +#[cfg(feature = "webgpu")] +pub(crate) mod webgpu; #[cfg(feature = "numpy")] pub use numpy::NumpyDtype; mod error; @@ -162,7 +164,7 @@ pub(crate) use storage_traits::{OneFillStorage, ZeroFillStorage}; pub use tensorlike::Tensorlike; pub use cpu::Cpu; -#[cfg(not(feature = "cuda"))] +#[cfg(not(any(feature = "cuda", feature = "webgpu")))] pub type AutoDevice = Cpu; #[cfg(feature = "cuda")] @@ -172,6 +174,11 @@ pub use cuda::Cuda; #[cfg(feature = "cuda")] pub type AutoDevice = Cuda; +#[cfg(feature = "webgpu")] +pub use webgpu::Webgpu; +#[cfg(feature = "webgpu")] +pub type AutoDevice = Webgpu; + pub use storage_traits::{AsArray, CopySlice, TensorFrom, TensorFromVec, TensorToArray}; pub use storage_traits::{Cache, RandomU64, Storage, Synchronize}; pub use storage_traits::{OnesTensor, SampleTensor, TriangleTensor, ZerosTensor}; diff --git a/dfdx-core/src/tensor/webgpu/allocate.rs b/dfdx-core/src/tensor/webgpu/allocate.rs new file mode 100644 index 00000000..49162381 --- /dev/null +++ b/dfdx-core/src/tensor/webgpu/allocate.rs @@ -0,0 +1,221 @@ +#![allow(clippy::needless_range_loop)] + +use crate::{ + shapes::*, + tensor::{masks::triangle_mask, storage_traits::*, unique_id, Cpu, Error, NoneTape, Tensor}, +}; + +use super::{device::CachableBuffer, Buffer, Webgpu}; + +use core::marker::PhantomData; +use rand::Rng; +use std::{sync::Arc, vec::Vec}; +use wgpu::COPY_BUFFER_ALIGNMENT; + +pub(crate) fn round_to_buffer_alignment(size: u64) -> u64 { + (size + (COPY_BUFFER_ALIGNMENT - 1)) / COPY_BUFFER_ALIGNMENT * COPY_BUFFER_ALIGNMENT +} + +impl Webgpu { + fn tensor_from_host_buf( + &self, + shape: S, + buf: Vec, + ) -> Result, Error> { + let buffer = unsafe { self.alloc_empty::(buf.len()) }?; + buffer.copy_to_device::(&self.dev, &self.queue, &buf); + + Ok(self.build_tensor(shape, shape.strides(), buffer)) + } + + pub(crate) fn build_tensor( + &self, + shape: S, + strides: S::Concrete, + buffer: Buffer, + ) -> Tensor { + let data = CachableBuffer { + dev: self.dev.clone(), + queue: self.queue.clone(), + data: buffer, + cache: self.cache.clone(), + _phantom: PhantomData, + }; + Tensor { + id: unique_id(), + data: Arc::new(data), + shape, + strides, + device: self.clone(), + tape: Default::default(), + } + } +} + +impl ZerosTensor for Webgpu { + fn try_zeros_like(&self, src: &S) -> Result, Error> { + let shape = *src.shape(); + let strides = shape.strides(); + let data = unsafe { self.alloc_empty::(shape.num_elements()) }?; + data.copy_to_device(&self.dev, &self.queue, &vec![0u8; data.size()]); + + Ok(self.build_tensor(shape, strides, data)) + } +} + +impl ZeroFillStorage for Webgpu { + fn try_fill_with_zeros(&self, storage: &mut Self::Vec) -> Result<(), Error> { + storage.copy_to_device(&self.dev, &self.queue, &vec![0u8; storage.size()]); + + Ok(()) + } +} + +impl OnesTensor for Webgpu { + fn try_ones_like(&self, src: &S) -> Result, Error> { + let shape = *src.shape(); + let buf = vec![E::ONE; shape.num_elements()]; + self.tensor_from_host_buf(shape, buf) + } +} + +impl TriangleTensor for Webgpu +where + Cpu: TriangleTensor, +{ + fn try_upper_tri_like( + &self, + src: &S, + val: E, + diagonal: impl Into>, + ) -> Result, Error> { + let shape = *src.shape(); + let mut data = vec![val; shape.num_elements()]; + let offset = diagonal.into().unwrap_or(0); + triangle_mask(&mut data, &shape, true, offset); + self.tensor_from_host_buf(shape, data) + } + + fn try_lower_tri_like( + &self, + src: &S, + val: E, + diagonal: impl Into>, + ) -> Result, Error> { + let shape = *src.shape(); + let mut data = vec![val; shape.num_elements()]; + let offset = diagonal.into().unwrap_or(0); + triangle_mask(&mut data, &shape, false, offset); + self.tensor_from_host_buf(shape, data) + } +} + +impl OneFillStorage for Webgpu { + fn try_fill_with_ones(&self, storage: &mut Self::Vec) -> Result<(), Error> { + let len = storage.size() as usize / std::mem::size_of::(); + let buf = vec![E::ONE; len]; + storage + .data + .copy_to_device::(&self.dev, &self.queue, &buf); + + Ok(()) + } +} + +impl SampleTensor for Webgpu +where + Cpu: SampleTensor, +{ + fn try_sample_like>( + &self, + src: &S, + distr: D, + ) -> Result, Error> { + let shape = *src.shape(); + let mut buf = Vec::with_capacity(shape.num_elements()); + { + #[cfg(not(feature = "no-std"))] + let mut rng = self.cpu.rng.lock().unwrap(); + #[cfg(feature = "no-std")] + let mut rng = self.cpu.rng.lock(); + buf.resize_with(shape.num_elements(), || rng.sample(&distr)); + } + self.tensor_from_host_buf::(shape, buf) + } + + fn try_fill_with_distr>( + &self, + storage: &mut Self::Vec, + distr: D, + ) -> Result<(), Error> { + let len = storage.size() as usize / std::mem::size_of::(); + let mut buf = Vec::with_capacity(len); + { + #[cfg(not(feature = "no-std"))] + let mut rng = self.cpu.rng.lock().unwrap(); + #[cfg(feature = "no-std")] + let mut rng = self.cpu.rng.lock(); + buf.resize_with(len, || rng.sample(&distr)); + } + unsafe { + std::ptr::copy_nonoverlapping( + buf.as_ptr(), + storage.data.slice(..).get_mapped_range_mut().as_mut_ptr() as *mut E, + len, + ) + }; + Ok(()) + } +} + +impl CopySlice for Webgpu { + fn copy_from(dst: &mut Tensor, src: &[E]) { + assert_eq!( + dst.data.size() as usize, + src.len() * std::mem::size_of::(), + "Slices must have same number of elements as *physical* Storage of tensors." + ); + dst.data + .data + .copy_to_device(&dst.device.dev, &dst.device.queue, src); + } + + fn copy_into(src: &Tensor, dst: &mut [E]) { + assert_eq!( + src.data.size() as usize, + dst.len() * std::mem::size_of::(), + "Slices must have same number of elements as *physical* Storage of tensors." + ); + src.data + .data + .copy_to_host(&src.device.dev, &src.device.queue, dst); + } +} + +impl TensorFromVec for Webgpu { + fn try_tensor_from_vec( + &self, + src: Vec, + shape: S, + ) -> Result, Error> { + let num_elements = shape.num_elements(); + + if src.len() != num_elements { + Err(Error::WrongNumElements) + } else { + self.tensor_from_host_buf(shape, src) + } + } +} + +impl TensorToArray for Webgpu +where + Cpu: TensorToArray + Storage, +{ + type Array = >::Array; + fn tensor_to_array(&self, tensor: &Tensor) -> Self::Array { + let buf = tensor.as_vec(); + let cpu_tensor = self.cpu.tensor_from_vec(buf, tensor.shape); + self.cpu.tensor_to_array::(&cpu_tensor) + } +} diff --git a/dfdx-core/src/tensor/webgpu/device.rs b/dfdx-core/src/tensor/webgpu/device.rs new file mode 100644 index 00000000..3cba06c7 --- /dev/null +++ b/dfdx-core/src/tensor/webgpu/device.rs @@ -0,0 +1,349 @@ +use wgpu::{ + Adapter, BufferDescriptor, BufferUsages, Device, Instance, InstanceDescriptor, Maintain, Queue, + RequestDeviceError, +}; + +use crate::{ + shapes::{Shape, Unit}, + tensor::{ + cache::TensorCache, cpu::Cpu, Cache, Error, NoneTape, RandomU64, Storage, Synchronize, + Tensor, + }, +}; + +#[cfg(feature = "no-std")] +use spin::Mutex; + +#[cfg(not(feature = "no-std"))] +use std::sync::Mutex; + +use std::{marker::PhantomData, sync::Arc, vec::Vec}; + +use super::allocate::round_to_buffer_alignment; + +#[derive(Debug)] +pub struct Buffer { + pub(crate) data: wgpu::Buffer, + pub(crate) size: usize, +} + +impl core::ops::Deref for Buffer { + type Target = wgpu::Buffer; + + fn deref(&self) -> &Self::Target { + &self.data + } +} + +impl Buffer { + pub(crate) fn size(&self) -> usize { + self.size + } + + #[allow(unused)] + pub(crate) fn capacity(&self) -> usize { + self.data.size() as usize + } + + pub(crate) fn copy_to_device(&self, dev: &Device, queue: &Queue, slice: &[E]) { + let slice = unsafe { + std::slice::from_raw_parts( + slice.as_ptr() as *const u8, + slice.len() * std::mem::size_of::(), + ) + }; + queue.write_buffer(&self.data, 0, slice); + queue.submit(std::iter::empty()); + dev.poll(Maintain::Wait); + } + + pub(crate) fn copy_to_host(&self, dev: &Device, queue: &Queue, buf: &mut [E]) { + let (sender, receiver) = thingbuf::mpsc::channel(1); + let buffer = dev.create_buffer(&BufferDescriptor { + label: None, + size: self.size() as u64, + usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + { + let mut encoder = dev.create_command_encoder(&Default::default()); + encoder.copy_buffer_to_buffer(&self.data, 0, &buffer, 0, self.size() as u64); + queue.submit(Some(encoder.finish())); + } + let slice = buffer.slice(..self.size() as u64); + slice.map_async(wgpu::MapMode::Read, move |_| { + futures_lite::future::block_on(sender.send(())).unwrap(); + }); + dev.poll(Maintain::Wait); + + let _ = futures_lite::future::block_on(receiver.recv()); + let data = slice.get_mapped_range(); + // TODO: How are we sure this is safe? + let slice = unsafe { + std::slice::from_raw_parts( + data.as_ptr() as *const E, + self.size() / std::mem::size_of::(), + ) + }; + buf.copy_from_slice(slice); + drop(data); + buffer.unmap(); + } +} + +#[derive(Clone, Debug)] +pub struct Webgpu { + pub(crate) cpu: Cpu, + #[allow(unused)] + pub(crate) instance: Arc, + #[allow(unused)] + pub(crate) adapter: Arc, + pub(crate) dev: Arc, + pub(crate) queue: Arc, + + pub(crate) cache: Arc>, +} + +impl From for Error { + fn from(e: RequestDeviceError) -> Self { + Error::WebgpuRequestDeviceError(e) + } +} + +impl Default for Webgpu { + fn default() -> Self { + Self::seed_from_u64(0) + } +} + +static CONSTRUCTOR_MUTEX: Mutex<()> = Mutex::new(()); + +impl Webgpu { + pub fn seed_from_u64(seed: u64) -> Self { + Self::try_build(seed).unwrap() + } + + pub fn try_build(seed: u64) -> Result { + #[cfg(feature = "no-std")] + let _lock = { CONSTRUCTOR_MUTEX.lock() }; + #[cfg(not(feature = "no-std"))] + let _lock = { CONSTRUCTOR_MUTEX.lock().unwrap() }; + + let cpu = Cpu::seed_from_u64(seed); + let instance = Arc::new(Instance::new(InstanceDescriptor::default())); + let adapter = futures_lite::future::block_on(instance.request_adapter(&Default::default())) + .ok_or(Error::WebgpuAdapterNotFound)?; + let adapter = Arc::new(adapter); + let (dev, queue) = + futures_lite::future::block_on(adapter.request_device(&Default::default(), None))?; + let dev = Arc::new(dev); + let queue = Arc::new(queue); + + Ok(Self { + cpu, + instance, + adapter, + dev, + queue, + + cache: Default::default(), + }) + } +} + +impl Webgpu { + pub(crate) unsafe fn alloc_empty(&self, len: usize) -> Result { + let data = self.cache.try_pop::(len).map_or_else( + || Buffer { + data: self.dev.create_buffer(&BufferDescriptor { + label: None, + size: round_to_buffer_alignment((len * std::mem::size_of::()) as u64), + usage: BufferUsages::COPY_SRC | BufferUsages::COPY_DST, + mapped_at_creation: false, + }), + size: len * std::mem::size_of::(), + }, + |bfr| bfr, + ); + Ok(data) + } + + // #[allow(unused)] + // pub(crate) unsafe fn get_workspace(&self, len: usize) -> Result, Error> { + // let num_bytes_required = len * std::mem::size_of::(); + // let mut workspace = self.workspace.as_ref().lock().unwrap(); + + // // re-allocate a larger workspace + // if (workspace.size() as usize) < num_bytes_required { + // *workspace = self.dev.create_buffer(&BufferDescriptor { + // label: None, + // size: (num_bytes_required) as u64, + // usage: BufferUsages::all(), + // mapped_at_creation: true, + // }); + // } + + // Ok(workspace) + // } +} + +#[derive(Debug)] +pub struct CachableBuffer { + pub(crate) dev: Arc, + pub(crate) queue: Arc, + pub(crate) data: Buffer, + pub(crate) cache: Arc>, + pub(crate) _phantom: PhantomData, +} + +impl Clone for CachableBuffer { + fn clone(&self) -> Self { + let len = self.data.size() as usize / std::mem::size_of::(); + let (encoder, data) = self.cache.try_pop::(len).map_or_else( + || { + let mut encoder = self.dev.create_command_encoder(&Default::default()); + let bfr = self.dev.create_buffer(&BufferDescriptor { + label: None, + size: round_to_buffer_alignment(self.data.size() as u64), + usage: BufferUsages::COPY_SRC | BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + encoder.copy_buffer_to_buffer(&self.data, 0, &bfr, 0, self.data.size() as u64); + ( + encoder, + Buffer { + data: bfr, + size: self.data.size as usize, + }, + ) + }, + |bfr| { + let mut encoder = self.dev.create_command_encoder(&Default::default()); + encoder.copy_buffer_to_buffer(&self.data, 0, &bfr, 0, self.data.size() as u64); + (encoder, bfr) + }, + ); + self.queue.submit(Some(encoder.finish())); + Self { + dev: self.dev.clone(), + queue: self.queue.clone(), + data, + cache: self.cache.clone(), + _phantom: PhantomData, + } + } +} + +impl std::ops::Deref for CachableBuffer { + type Target = Buffer; + + fn deref(&self) -> &Self::Target { + &self.data + } +} + +impl std::ops::DerefMut for CachableBuffer { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.data + } +} + +impl Drop for CachableBuffer { + fn drop(&mut self) { + if self.cache.is_enabled() { + let data = std::mem::replace( + &mut self.data, + Buffer { + data: self.dev.create_buffer(&BufferDescriptor { + label: None, + size: 0, + usage: BufferUsages::MAP_READ, + mapped_at_creation: false, + }), + size: 0, + }, + ); + let len = data.size() as usize / std::mem::size_of::(); + self.cache.insert::(len, data); + } + } +} + +impl RandomU64 for Webgpu { + fn random_u64(&self) -> u64 { + self.cpu.random_u64() + } +} + +impl Cache for Webgpu { + fn try_enable_cache(&self) -> Result<(), Error> { + self.cache.enable(); + Ok(()) + } + + fn try_disable_cache(&self) -> Result<(), Error> { + self.cache.disable(); + self.try_empty_cache() + } + + fn try_empty_cache(&self) -> Result<(), Error> { + #[cfg(not(feature = "no-std"))] + let mut cache = self.cache.allocations.write().unwrap(); + #[cfg(feature = "no-std")] + let mut cache = self.cache.allocations.write(); + for (&_key, allocations) in cache.iter_mut() { + for alloc in allocations.drain(..) { + drop(alloc); + } + } + cache.clear(); + Ok(()) + } +} + +impl Synchronize for Webgpu { + fn try_synchronize(&self) -> Result<(), Error> { + self.dev.poll(wgpu::MaintainBase::Wait); + Ok(()) + } +} + +impl Storage for Webgpu { + type Vec = CachableBuffer; + + fn try_alloc_len(&self, len: usize) -> Result { + let data = unsafe { self.alloc_empty::(len) }?; + Ok(CachableBuffer { + dev: self.dev.clone(), + queue: self.queue.clone(), + data, + cache: self.cache.clone(), + _phantom: PhantomData, + }) + } + + fn len(&self, v: &Self::Vec) -> usize { + v.size() as usize / std::mem::size_of::() + } + + fn tensor_to_vec(&self, tensor: &Tensor) -> Vec { + let buf = self + .cpu + .try_alloc_elem::( + tensor.data.data.size() as usize / std::mem::size_of::(), + Default::default(), + ) + .unwrap(); + let mut cpu_tensor = Tensor { + id: tensor.id, + data: Arc::new(buf), + shape: tensor.shape, + strides: tensor.strides, + device: self.cpu.clone(), + tape: NoneTape, + }; + let buf = Arc::get_mut(&mut cpu_tensor.data).unwrap(); + tensor.data.copy_to_host::(&self.dev, &self.queue, buf); + self.cpu.tensor_to_vec::(&cpu_tensor) + } +} diff --git a/dfdx-core/src/tensor/webgpu/mod.rs b/dfdx-core/src/tensor/webgpu/mod.rs new file mode 100644 index 00000000..666ce53e --- /dev/null +++ b/dfdx-core/src/tensor/webgpu/mod.rs @@ -0,0 +1,135 @@ +mod allocate; +mod device; + +pub use device::Buffer; +pub use device::Webgpu; + +#[cfg(test)] +mod tests { + use super::*; + use crate::{shapes::*, tensor::*}; + + #[test] + fn test_empty_cache() { + let dev: Webgpu = Default::default(); + dev.enable_cache(); + let tensor: Tensor, f32, _> = dev.zeros(); + drop(tensor); // insert allocation into cache + assert_eq!(dev.cache.len(), 1); + dev.empty_cache(); + assert_eq!(dev.cache.len(), 0); + } + + #[test] + fn test_disabling_cache_empties_it() { + let dev: Webgpu = Default::default(); + dev.enable_cache(); + let tensor: Tensor, f32, _> = dev.zeros(); + drop(tensor); // insert allocation into cache + assert_eq!(dev.cache.len(), 1); + dev.disable_cache(); + assert_eq!(dev.cache.len(), 0); + } + + #[test] + fn test_reuse_allocation_on_new_tensor() { + let dev: Webgpu = Default::default(); + dev.enable_cache(); + let tensor: Tensor, f32, _> = dev.zeros(); + let id = tensor.data.data.global_id(); + drop(tensor); // insert allocation into cache + assert_eq!(dev.cache.len(), 1); + let other: Tensor, f64, _> = dev.zeros(); + assert_eq!(dev.cache.len(), 1); + let tensor: Tensor, f32, _> = dev.zeros(); + assert_eq!(dev.cache.len(), 0); + assert_eq!(tensor.data.data.global_id(), id); + drop(other); + } + + #[test] + fn test_reuse_allocation_on_clone_tensor() { + let dev: Webgpu = Default::default(); + dev.enable_cache(); + let a: Tensor, f32, _> = dev.zeros(); + let b: Tensor, f32, _> = dev.zeros(); + drop(b); // insert allocation into cache + assert_eq!(dev.cache.len(), 1); + let mut b = a.clone(); + assert_eq!(dev.cache.len(), 1); + // will actually clone the data - should reuse allocation from cache + std::sync::Arc::make_mut(&mut b.data); + assert_eq!(dev.cache.len(), 0); + } + + #[test] + fn test_new_allocation_on_clone_tensor() { + let dev: Webgpu = Default::default(); + dev.enable_cache(); + let a: Tensor, f32, _> = dev.zeros(); + let mut b = a.clone(); + assert_eq!(dev.cache.len(), 0); + // will actually clone the data - should create new allocation + std::sync::Arc::make_mut(&mut b.data); + assert_eq!(dev.cache.len(), 0); + } + + #[test] + fn test_ones_like() { + let dev: Webgpu = Default::default(); + let a: Tensor, f32, _> = dev.ones(); + let b: Tensor, f32, _> = dev.ones_like(&a); + assert_eq!(a.array(), [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]); + assert_eq!(a.array(), b.array()); + } + + #[test] + fn test_copy() { + let dev: Webgpu = Default::default(); + let mut b: Tensor, f32, _> = dev.zeros(); + b.copy_from(&[1.0; 6]); + assert_eq!(b.array(), [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]); + let mut slice = [0.0; 6]; + b.copy_into(&mut slice); + assert_eq!(slice, [1.0; 6]); + } + + #[test] + fn test_fill_zeros() { + let dev: Webgpu = Default::default(); + let mut b: Tensor, f32, _> = dev.ones(); + assert_eq!(b.array(), [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]); + b.fill_with_zeros(); + assert_eq!(b.array(), [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]); + } + + #[test] + fn test_fill_ones() { + let dev: Webgpu = Default::default(); + let mut b: Tensor, f32, _> = dev.zeros(); + assert_eq!(b.array(), [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]); + b.fill_with_ones(); + assert_eq!(b.array(), [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]); + } + + #[test] + fn test_sample() { + let dev: Webgpu = Default::default(); + let b: Tensor, f32, _> = dev.sample_uniform(); + assert_eq!( + b.array(), + [ + [0.80145925, 0.7311134, 0.55528885], + [0.77346015, 0.809342, 0.025844634] + ] + ); + } + + #[test] + fn test_from_vec() { + let dev: Webgpu = Default::default(); + let b: Tensor, f32, _> = + dev.tensor_from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], (Const::<2>, Const::<3>)); + assert_eq!(b.array(), [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]); + } +} diff --git a/dfdx-core/src/tensor_ops/abs/mod.rs b/dfdx-core/src/tensor_ops/abs/mod.rs index f7ac117a..45c7794d 100644 --- a/dfdx-core/src/tensor_ops/abs/mod.rs +++ b/dfdx-core/src/tensor_ops/abs/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/abs/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/abs/webgpu_kernel.rs new file mode 100644 index 00000000..c993ee91 --- /dev/null +++ b/dfdx-core/src/tensor_ops/abs/webgpu_kernel.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::AbsKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::AbsKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/accurate_gelu/mod.rs b/dfdx-core/src/tensor_ops/accurate_gelu/mod.rs index f409954e..396c7fa2 100644 --- a/dfdx-core/src/tensor_ops/accurate_gelu/mod.rs +++ b/dfdx-core/src/tensor_ops/accurate_gelu/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/accurate_gelu/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/accurate_gelu/webgpu_kernel.rs new file mode 100644 index 00000000..080a857d --- /dev/null +++ b/dfdx-core/src/tensor_ops/accurate_gelu/webgpu_kernel.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::AccurateGeLUKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::AccurateGeLUKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/adam/mod.rs b/dfdx-core/src/tensor_ops/adam/mod.rs index b9a30723..9b2372e4 100644 --- a/dfdx-core/src/tensor_ops/adam/mod.rs +++ b/dfdx-core/src/tensor_ops/adam/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use crate::{ shapes::{Dtype, Shape}, tensor::{Error, Storage, Tensor}, diff --git a/dfdx-core/src/tensor_ops/adam/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/adam/webgpu_kernel.rs new file mode 100644 index 00000000..1ab9cbde --- /dev/null +++ b/dfdx-core/src/tensor_ops/adam/webgpu_kernel.rs @@ -0,0 +1,15 @@ +use crate::prelude::{Dtype, Webgpu}; + +impl super::AdamKernel for Webgpu { + fn adam_kernel( + &self, + t: i32, + cfg: &crate::prelude::AdamConfig, + param: &mut Self::Vec, + moment1: &mut Self::Vec, + moment2: &mut Self::Vec, + grad: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/add/mod.rs b/dfdx-core/src/tensor_ops/add/mod.rs index 29010e6f..33c27184 100644 --- a/dfdx-core/src/tensor_ops/add/mod.rs +++ b/dfdx-core/src/tensor_ops/add/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::*; use crate::{ shapes::*, diff --git a/dfdx-core/src/tensor_ops/add/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/add/webgpu_kernel.rs new file mode 100644 index 00000000..91becc55 --- /dev/null +++ b/dfdx-core/src/tensor_ops/add/webgpu_kernel.rs @@ -0,0 +1,56 @@ +use std::borrow::Cow; + +use crate::prelude::{ + ops::{BinaryKernel, UnaryKernel}, + Dtype, Webgpu, +}; + +impl UnaryKernel, E> for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = true; + + fn forward( + &self, + op: super::ScalarAddKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::ScalarAddKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} + +impl BinaryKernel for Webgpu { + const BACKWARD_WITHOUT_DATA: bool = true; + + fn forward( + &self, + op: super::BinaryAddKernelOp, + lhs: Cow>, + rhs: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::BinaryAddKernelOp, + lhs: &impl crate::prelude::Tensorlike, + grad_lhs: &mut Self::Vec, + rhs: &impl crate::prelude::Tensorlike, + grad_rhs: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/axpy/mod.rs b/dfdx-core/src/tensor_ops/axpy/mod.rs index 45e6f465..e0b1e66a 100644 --- a/dfdx-core/src/tensor_ops/axpy/mod.rs +++ b/dfdx-core/src/tensor_ops/axpy/mod.rs @@ -7,6 +7,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + /// Elementwise `a * alpha + b * beta`. /// /// See [Tensor::axpy] for in place version. diff --git a/dfdx-core/src/tensor_ops/axpy/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/axpy/webgpu_kernel.rs new file mode 100644 index 00000000..b820483e --- /dev/null +++ b/dfdx-core/src/tensor_ops/axpy/webgpu_kernel.rs @@ -0,0 +1,13 @@ +use crate::prelude::{Dtype, Webgpu}; + +impl super::AxpyKernel for Webgpu { + fn forward( + &self, + a: &mut Self::Vec, + alpha: E, + b: &Self::Vec, + beta: E, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/bce/mod.rs b/dfdx-core/src/tensor_ops/bce/mod.rs index 0df0bfeb..48735e68 100644 --- a/dfdx-core/src/tensor_ops/bce/mod.rs +++ b/dfdx-core/src/tensor_ops/bce/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_binary_op, BinaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/bce/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/bce/webgpu_kernel.rs new file mode 100644 index 00000000..02b7f3cf --- /dev/null +++ b/dfdx-core/src/tensor_ops/bce/webgpu_kernel.rs @@ -0,0 +1,27 @@ +use crate::prelude::{ops::BinaryKernel, Dtype, Webgpu}; +use std::borrow::Cow; + +impl BinaryKernel for Webgpu { + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::BCEKernelOp, + lhs: Cow>, + rhs: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::BCEKernelOp, + lhs: &impl crate::prelude::Tensorlike, + grad_lhs: &mut Self::Vec, + rhs: &impl crate::prelude::Tensorlike, + grad_rhs: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/boolean/mod.rs b/dfdx-core/src/tensor_ops/boolean/mod.rs index bb8cedf4..e86c4d16 100644 --- a/dfdx-core/src/tensor_ops/boolean/mod.rs +++ b/dfdx-core/src/tensor_ops/boolean/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernels; #[cfg(feature = "cuda")] mod cuda_kernels; +#[cfg(feature = "webgpu")] +mod webgpu_kernels; + use crate::{ prelude::{OnesTensor, Tensor, ZerosTensor}, shapes::*, diff --git a/dfdx-core/src/tensor_ops/boolean/webgpu_kernels.rs b/dfdx-core/src/tensor_ops/boolean/webgpu_kernels.rs new file mode 100644 index 00000000..d98ad772 --- /dev/null +++ b/dfdx-core/src/tensor_ops/boolean/webgpu_kernels.rs @@ -0,0 +1,34 @@ +use crate::prelude::Webgpu; + +impl super::BooleanKernel for Webgpu { + fn not( + &self, + inp: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn and( + &self, + lhs: &crate::prelude::Tensor, + rhs: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn or( + &self, + lhs: &crate::prelude::Tensor, + rhs: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn xor( + &self, + lhs: &crate::prelude::Tensor, + rhs: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/choose/mod.rs b/dfdx-core/src/tensor_ops/choose/mod.rs index a82a5c79..f391bd75 100644 --- a/dfdx-core/src/tensor_ops/choose/mod.rs +++ b/dfdx-core/src/tensor_ops/choose/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use crate::{ shapes::{Dtype, HasShape, Shape}, tensor::{Error, Merge, PutTape, SplitTape, Storage, Tape, Tensor}, diff --git a/dfdx-core/src/tensor_ops/choose/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/choose/webgpu_kernel.rs new file mode 100644 index 00000000..ac0bcf8a --- /dev/null +++ b/dfdx-core/src/tensor_ops/choose/webgpu_kernel.rs @@ -0,0 +1,24 @@ +use crate::prelude::{Dtype, Webgpu}; + +impl super::ChooseKernel for Webgpu { + fn forward( + &self, + cond: &crate::prelude::Tensor, + lhs: &crate::prelude::Tensor, + rhs: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + cond: &crate::prelude::Tensor, + lhs: &crate::prelude::Tensor, + grad_lhs: &mut >::Vec, + rhs: &crate::prelude::Tensor, + grad_rhs: &mut >::Vec, + grad_out: &>::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/clamp/mod.rs b/dfdx-core/src/tensor_ops/clamp/mod.rs index 1054d0ff..88d246bc 100644 --- a/dfdx-core/src/tensor_ops/clamp/mod.rs +++ b/dfdx-core/src/tensor_ops/clamp/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/clamp/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/clamp/webgpu_kernel.rs new file mode 100644 index 00000000..df700d20 --- /dev/null +++ b/dfdx-core/src/tensor_ops/clamp/webgpu_kernel.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel, E> for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::ClampKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::ClampKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/cmp/mod.rs b/dfdx-core/src/tensor_ops/cmp/mod.rs index cf5feed3..b82d7e27 100644 --- a/dfdx-core/src/tensor_ops/cmp/mod.rs +++ b/dfdx-core/src/tensor_ops/cmp/mod.rs @@ -7,6 +7,9 @@ mod cpu_kernels; #[cfg(feature = "cuda")] mod cuda_kernels; +#[cfg(feature = "webgpu")] +mod webgpu_kernels; + pub trait CmpKernel: Storage + Storage { fn forward( &self, diff --git a/dfdx-core/src/tensor_ops/cmp/webgpu_kernels.rs b/dfdx-core/src/tensor_ops/cmp/webgpu_kernels.rs new file mode 100644 index 00000000..c265636b --- /dev/null +++ b/dfdx-core/src/tensor_ops/cmp/webgpu_kernels.rs @@ -0,0 +1,121 @@ +use crate::prelude::{Dtype, Webgpu}; + +impl super::CmpKernel for Webgpu { + fn forward( + &self, + lhs: &crate::prelude::Tensor, + rhs: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> { + todo!() + } +} + +impl super::CmpKernel for Webgpu { + fn forward( + &self, + lhs: &crate::prelude::Tensor, + rhs: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> { + todo!() + } +} + +impl super::CmpKernel for Webgpu { + fn forward( + &self, + lhs: &crate::prelude::Tensor, + rhs: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> { + todo!() + } +} + +impl super::CmpKernel for Webgpu { + fn forward( + &self, + lhs: &crate::prelude::Tensor, + rhs: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> { + todo!() + } +} + +impl super::CmpKernel for Webgpu { + fn forward( + &self, + lhs: &crate::prelude::Tensor, + rhs: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> { + todo!() + } +} + +impl super::CmpKernel for Webgpu { + fn forward( + &self, + lhs: &crate::prelude::Tensor, + rhs: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> { + todo!() + } +} + +impl super::ScalarCmpKernel for Webgpu { + fn forward( + &self, + lhs: &crate::prelude::Tensor, + rhs: E, + ) -> Result, crate::prelude::Error> { + todo!() + } +} + +impl super::ScalarCmpKernel for Webgpu { + fn forward( + &self, + lhs: &crate::prelude::Tensor, + rhs: E, + ) -> Result, crate::prelude::Error> { + todo!() + } +} + +impl super::ScalarCmpKernel for Webgpu { + fn forward( + &self, + lhs: &crate::prelude::Tensor, + rhs: E, + ) -> Result, crate::prelude::Error> { + todo!() + } +} + +impl super::ScalarCmpKernel for Webgpu { + fn forward( + &self, + lhs: &crate::prelude::Tensor, + rhs: E, + ) -> Result, crate::prelude::Error> { + todo!() + } +} + +impl super::ScalarCmpKernel for Webgpu { + fn forward( + &self, + lhs: &crate::prelude::Tensor, + rhs: E, + ) -> Result, crate::prelude::Error> { + todo!() + } +} + +impl super::ScalarCmpKernel for Webgpu { + fn forward( + &self, + lhs: &crate::prelude::Tensor, + rhs: E, + ) -> Result, crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/concat/mod.rs b/dfdx-core/src/tensor_ops/concat/mod.rs index da6cdfee..1e719fc5 100644 --- a/dfdx-core/src/tensor_ops/concat/mod.rs +++ b/dfdx-core/src/tensor_ops/concat/mod.rs @@ -4,6 +4,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + /// Concatenate two tensors along the first dimension. /// /// **Pytorch equivalent** `torch.concat`. diff --git a/dfdx-core/src/tensor_ops/concat/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/concat/webgpu_kernel.rs new file mode 100644 index 00000000..2a36ad69 --- /dev/null +++ b/dfdx-core/src/tensor_ops/concat/webgpu_kernel.rs @@ -0,0 +1,25 @@ +use crate::{shapes::*, tensor::*}; + +use super::ConcatShape; + +impl super::ConcatKernel for Webgpu { + fn forward( + &self, + a: &Tensor, + b: &Tensor, + ) -> Result, Error> + where + A: ConcatShape, + { + todo!() + } + + fn backward( + &self, + grad_a: &mut Self::Vec, + grad_b: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/concat_along/mod.rs b/dfdx-core/src/tensor_ops/concat_along/mod.rs index 0c796d6e..92816692 100644 --- a/dfdx-core/src/tensor_ops/concat_along/mod.rs +++ b/dfdx-core/src/tensor_ops/concat_along/mod.rs @@ -3,6 +3,8 @@ use crate::{shapes::*, tensor::*}; mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; /// Concatenate two tensors along a given axis. /// diff --git a/dfdx-core/src/tensor_ops/concat_along/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/concat_along/webgpu_kernel.rs new file mode 100644 index 00000000..db789f77 --- /dev/null +++ b/dfdx-core/src/tensor_ops/concat_along/webgpu_kernel.rs @@ -0,0 +1,25 @@ +use crate::{shapes::*, tensor::*}; + +impl super::ConcatAlongKernel for Webgpu { + fn forward( + &self, + ax: usize, + a: &Tensor, + b: &Tensor, + c: &mut Tensor, + ) -> Result<(), Error> { + todo!() + } + + fn backward( + &self, + ax: usize, + a: &GhostTensor, + grad_a: &mut Self::Vec, + b: &GhostTensor, + grad_b: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/cos/mod.rs b/dfdx-core/src/tensor_ops/cos/mod.rs index a18be20b..434b1db8 100644 --- a/dfdx-core/src/tensor_ops/cos/mod.rs +++ b/dfdx-core/src/tensor_ops/cos/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/cos/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/cos/webgpu_kernel.rs new file mode 100644 index 00000000..a59bb5c8 --- /dev/null +++ b/dfdx-core/src/tensor_ops/cos/webgpu_kernel.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::CosKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::CosKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/div/mod.rs b/dfdx-core/src/tensor_ops/div/mod.rs index 41b0fe58..7aa56063 100644 --- a/dfdx-core/src/tensor_ops/div/mod.rs +++ b/dfdx-core/src/tensor_ops/div/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::*; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/div/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/div/webgpu_kernel.rs new file mode 100644 index 00000000..3a15ef7e --- /dev/null +++ b/dfdx-core/src/tensor_ops/div/webgpu_kernel.rs @@ -0,0 +1,56 @@ +use std::borrow::Cow; + +use crate::prelude::{ + ops::{BinaryKernel, UnaryKernel}, + Dtype, Webgpu, +}; + +impl UnaryKernel, E> for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = true; + + fn forward( + &self, + op: super::ScalarDivKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::ScalarDivKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} + +impl BinaryKernel for Webgpu { + const BACKWARD_WITHOUT_DATA: bool = true; + + fn forward( + &self, + op: super::BinaryDivKernelOp, + lhs: Cow>, + rhs: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::BinaryDivKernelOp, + lhs: &impl crate::prelude::Tensorlike, + grad_lhs: &mut Self::Vec, + rhs: &impl crate::prelude::Tensorlike, + grad_rhs: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/dropout/mod.rs b/dfdx-core/src/tensor_ops/dropout/mod.rs index 0ef0a10f..9277669a 100644 --- a/dfdx-core/src/tensor_ops/dropout/mod.rs +++ b/dfdx-core/src/tensor_ops/dropout/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use crate::{shapes::*, tensor::*}; #[repr(C)] diff --git a/dfdx-core/src/tensor_ops/dropout/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/dropout/webgpu_kernel.rs new file mode 100644 index 00000000..af8c9247 --- /dev/null +++ b/dfdx-core/src/tensor_ops/dropout/webgpu_kernel.rs @@ -0,0 +1,21 @@ +use crate::prelude::{Dtype, Webgpu}; + +impl super::DropoutKernel for Webgpu { + fn forward( + &self, + op: super::DropoutKernelOp, + inp: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::DropoutKernelOp, + inp: &crate::prelude::Tensor, + grad_inp: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/exp/mod.rs b/dfdx-core/src/tensor_ops/exp/mod.rs index 3d04959d..5d1066f3 100644 --- a/dfdx-core/src/tensor_ops/exp/mod.rs +++ b/dfdx-core/src/tensor_ops/exp/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/exp/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/exp/webgpu_kernel.rs new file mode 100644 index 00000000..4f552b49 --- /dev/null +++ b/dfdx-core/src/tensor_ops/exp/webgpu_kernel.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::ExpKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::ExpKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/fast_gelu/mod.rs b/dfdx-core/src/tensor_ops/fast_gelu/mod.rs index 6a4b46df..45c7dad6 100644 --- a/dfdx-core/src/tensor_ops/fast_gelu/mod.rs +++ b/dfdx-core/src/tensor_ops/fast_gelu/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/fast_gelu/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/fast_gelu/webgpu_kernel.rs new file mode 100644 index 00000000..cbdce3d9 --- /dev/null +++ b/dfdx-core/src/tensor_ops/fast_gelu/webgpu_kernel.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::FastGeLUKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::FastGeLUKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/huber_error/mod.rs b/dfdx-core/src/tensor_ops/huber_error/mod.rs index 38bd27b9..fb7df26e 100644 --- a/dfdx-core/src/tensor_ops/huber_error/mod.rs +++ b/dfdx-core/src/tensor_ops/huber_error/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::{ops::try_binary_op, Device}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/huber_error/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/huber_error/webgpu_kernel.rs new file mode 100644 index 00000000..b66b7d1e --- /dev/null +++ b/dfdx-core/src/tensor_ops/huber_error/webgpu_kernel.rs @@ -0,0 +1,27 @@ +use crate::prelude::{ops::BinaryKernel, Dtype, Webgpu}; +use std::borrow::Cow; + +impl BinaryKernel, E> for Webgpu { + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::HuberErrorKernelOp, + lhs: Cow>, + rhs: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::HuberErrorKernelOp, + lhs: &impl crate::prelude::Tensorlike, + grad_lhs: &mut Self::Vec, + rhs: &impl crate::prelude::Tensorlike, + grad_rhs: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/ln/mod.rs b/dfdx-core/src/tensor_ops/ln/mod.rs index 2e1ae067..51bc001f 100644 --- a/dfdx-core/src/tensor_ops/ln/mod.rs +++ b/dfdx-core/src/tensor_ops/ln/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/ln/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/ln/webgpu_kernel.rs new file mode 100644 index 00000000..64694c6f --- /dev/null +++ b/dfdx-core/src/tensor_ops/ln/webgpu_kernel.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::LnKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::LnKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/matmul/mod.rs b/dfdx-core/src/tensor_ops/matmul/mod.rs index b5ece54b..5e4d03b3 100644 --- a/dfdx-core/src/tensor_ops/matmul/mod.rs +++ b/dfdx-core/src/tensor_ops/matmul/mod.rs @@ -5,6 +5,9 @@ pub(super) mod cpu_kernel; #[cfg(feature = "cuda")] pub(super) mod cuda_kernel; +#[cfg(feature = "webgpu")] +pub(super) mod webgpu_kernel; + use crate::{ shapes::{Const, Dim, Dtype, Shape}, tensor::{Error, Merge, PutTape, SplitTape, Storage, Tape, Tensor}, diff --git a/dfdx-core/src/tensor_ops/matmul/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/matmul/webgpu_kernel.rs new file mode 100644 index 00000000..58ef7586 --- /dev/null +++ b/dfdx-core/src/tensor_ops/matmul/webgpu_kernel.rs @@ -0,0 +1,117 @@ +use crate::prelude::{Dtype, Webgpu}; + +impl super::MatMatKernel for Webgpu { + fn forward( + &self, + lhs: &crate::prelude::Tensor<(M, K), E, Self>, + rhs: &crate::prelude::Tensor<(K, N), E, Self>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + lhs: &crate::prelude::Tensor<(M, K), E, Self>, + grad_lhs: &mut Self::Vec, + rhs: &crate::prelude::Tensor<(K, N), E, Self>, + grad_rhs: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} + +impl super::MatMatBrKernel for Webgpu { + fn forward< + B: crate::prelude::Dim, + M: crate::prelude::Dim, + K: crate::prelude::Dim, + N: crate::prelude::Dim, + >( + &self, + lhs: &crate::prelude::Tensor<(B, M, K), E, Self>, + rhs: &crate::prelude::Tensor<(K, N), E, Self>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward< + B: crate::prelude::Dim, + M: crate::prelude::Dim, + K: crate::prelude::Dim, + N: crate::prelude::Dim, + >( + &self, + lhs: &crate::prelude::Tensor<(B, M, K), E, Self>, + grad_lhs: &mut Self::Vec, + rhs: &crate::prelude::Tensor<(K, N), E, Self>, + grad_rhs: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} + +impl super::MatMatBatch3Kernel for Webgpu { + fn forward< + B: crate::prelude::Dim, + M: crate::prelude::Dim, + K: crate::prelude::Dim, + N: crate::prelude::Dim, + >( + &self, + lhs: &crate::prelude::Tensor<(B, M, K), E, Self>, + rhs: &crate::prelude::Tensor<(B, K, N), E, Self>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward< + B: crate::prelude::Dim, + M: crate::prelude::Dim, + K: crate::prelude::Dim, + N: crate::prelude::Dim, + >( + &self, + lhs: &crate::prelude::Tensor<(B, M, K), E, Self>, + grad_lhs: &mut Self::Vec, + rhs: &crate::prelude::Tensor<(B, K, N), E, Self>, + grad_rhs: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} + +impl super::MatMatBatch4Kernel for Webgpu { + fn forward< + B: crate::prelude::Dim, + S: crate::prelude::Dim, + M: crate::prelude::Dim, + K: crate::prelude::Dim, + N: crate::prelude::Dim, + >( + &self, + lhs: &crate::prelude::Tensor<(B, S, M, K), E, Self>, + rhs: &crate::prelude::Tensor<(B, S, K, N), E, Self>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward< + B: crate::prelude::Dim, + S: crate::prelude::Dim, + M: crate::prelude::Dim, + K: crate::prelude::Dim, + N: crate::prelude::Dim, + >( + &self, + lhs: &crate::prelude::Tensor<(B, S, M, K), E, Self>, + grad_lhs: &mut Self::Vec, + rhs: &crate::prelude::Tensor<(B, S, K, N), E, Self>, + grad_rhs: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/max_to/mod.rs b/dfdx-core/src/tensor_ops/max_to/mod.rs index 462c6e56..e00ba600 100644 --- a/dfdx-core/src/tensor_ops/max_to/mod.rs +++ b/dfdx-core/src/tensor_ops/max_to/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use crate::{shapes::*, tensor::*}; pub trait MaxReduceKernel: Storage { diff --git a/dfdx-core/src/tensor_ops/max_to/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/max_to/webgpu_kernel.rs new file mode 100644 index 00000000..f82e16f0 --- /dev/null +++ b/dfdx-core/src/tensor_ops/max_to/webgpu_kernel.rs @@ -0,0 +1,27 @@ +use crate::prelude::{Dtype, Webgpu}; + +impl super::MaxReduceKernel for Webgpu { + fn forward( + &self, + dst: Dst, + inp: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> + where + Src: crate::prelude::ReduceShapeTo, + { + todo!() + } + + fn backward( + &self, + inp: &crate::prelude::Tensor, + grad_inp: &mut Self::Vec, + out: &crate::prelude::Tensor, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> + where + Src: crate::prelude::ReduceShapeTo, + { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/maximum/mod.rs b/dfdx-core/src/tensor_ops/maximum/mod.rs index 3ef18077..e1d1a89a 100644 --- a/dfdx-core/src/tensor_ops/maximum/mod.rs +++ b/dfdx-core/src/tensor_ops/maximum/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::{ops::try_binary_op, Device}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/maximum/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/maximum/webgpu_kernel.rs new file mode 100644 index 00000000..690e2471 --- /dev/null +++ b/dfdx-core/src/tensor_ops/maximum/webgpu_kernel.rs @@ -0,0 +1,27 @@ +use crate::prelude::{ops::BinaryKernel, Dtype, Webgpu}; +use std::borrow::Cow; + +impl BinaryKernel for Webgpu { + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::MaximumKernelOp, + lhs: Cow>, + rhs: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::MaximumKernelOp, + lhs: &impl crate::prelude::Tensorlike, + grad_lhs: &mut Self::Vec, + rhs: &impl crate::prelude::Tensorlike, + grad_rhs: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/min_to/mod.rs b/dfdx-core/src/tensor_ops/min_to/mod.rs index 9cef2da5..38166f6f 100644 --- a/dfdx-core/src/tensor_ops/min_to/mod.rs +++ b/dfdx-core/src/tensor_ops/min_to/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use crate::{shapes::*, tensor::*}; pub trait MinReduceKernel: Storage { diff --git a/dfdx-core/src/tensor_ops/min_to/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/min_to/webgpu_kernel.rs new file mode 100644 index 00000000..fe2e516c --- /dev/null +++ b/dfdx-core/src/tensor_ops/min_to/webgpu_kernel.rs @@ -0,0 +1,27 @@ +use crate::prelude::{Dtype, Webgpu}; + +impl super::MinReduceKernel for Webgpu { + fn forward( + &self, + dst: Dst, + inp: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> + where + Src: crate::prelude::ReduceShapeTo, + { + todo!() + } + + fn backward( + &self, + inp: &crate::prelude::Tensor, + grad_inp: &mut Self::Vec, + out: &crate::prelude::Tensor, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> + where + Src: crate::prelude::ReduceShapeTo, + { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/minimum/mod.rs b/dfdx-core/src/tensor_ops/minimum/mod.rs index adcc6dfb..f6b9b6e1 100644 --- a/dfdx-core/src/tensor_ops/minimum/mod.rs +++ b/dfdx-core/src/tensor_ops/minimum/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::{ops::try_binary_op, Device}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/minimum/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/minimum/webgpu_kernel.rs new file mode 100644 index 00000000..5ebcc561 --- /dev/null +++ b/dfdx-core/src/tensor_ops/minimum/webgpu_kernel.rs @@ -0,0 +1,27 @@ +use crate::prelude::{ops::BinaryKernel, Dtype, Webgpu}; +use std::borrow::Cow; + +impl BinaryKernel for Webgpu { + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::MinimumKernelOp, + lhs: Cow>, + rhs: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::MinimumKernelOp, + lhs: &impl crate::prelude::Tensorlike, + grad_lhs: &mut Self::Vec, + rhs: &impl crate::prelude::Tensorlike, + grad_rhs: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/mul/mod.rs b/dfdx-core/src/tensor_ops/mul/mod.rs index 8179509d..0dccebd8 100644 --- a/dfdx-core/src/tensor_ops/mul/mod.rs +++ b/dfdx-core/src/tensor_ops/mul/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::*; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/mul/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/mul/webgpu_kernel.rs new file mode 100644 index 00000000..240ba571 --- /dev/null +++ b/dfdx-core/src/tensor_ops/mul/webgpu_kernel.rs @@ -0,0 +1,56 @@ +use std::borrow::Cow; + +use crate::prelude::{ + ops::{BinaryKernel, UnaryKernel}, + Dtype, Webgpu, +}; + +impl UnaryKernel, E> for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = true; + + fn forward( + &self, + op: super::ScalarMulKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::ScalarMulKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} + +impl BinaryKernel for Webgpu { + const BACKWARD_WITHOUT_DATA: bool = true; + + fn forward( + &self, + op: super::BinaryMulKernelOp, + lhs: Cow>, + rhs: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::BinaryMulKernelOp, + lhs: &impl crate::prelude::Tensorlike, + grad_lhs: &mut Self::Vec, + rhs: &impl crate::prelude::Tensorlike, + grad_rhs: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/nans_to/mod.rs b/dfdx-core/src/tensor_ops/nans_to/mod.rs index 176bb1c5..f3ade77e 100644 --- a/dfdx-core/src/tensor_ops/nans_to/mod.rs +++ b/dfdx-core/src/tensor_ops/nans_to/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/nans_to/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/nans_to/webgpu_kernel.rs new file mode 100644 index 00000000..58cc8c36 --- /dev/null +++ b/dfdx-core/src/tensor_ops/nans_to/webgpu_kernel.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel, E> for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::NansToKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::NansToKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/negate/mod.rs b/dfdx-core/src/tensor_ops/negate/mod.rs index f6ad27db..f6dfa820 100644 --- a/dfdx-core/src/tensor_ops/negate/mod.rs +++ b/dfdx-core/src/tensor_ops/negate/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/negate/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/negate/webgpu_kernel.rs new file mode 100644 index 00000000..4794d906 --- /dev/null +++ b/dfdx-core/src/tensor_ops/negate/webgpu_kernel.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::NegateKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::NegateKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/pow/mod.rs b/dfdx-core/src/tensor_ops/pow/mod.rs index bce8cff1..83f12f4b 100644 --- a/dfdx-core/src/tensor_ops/pow/mod.rs +++ b/dfdx-core/src/tensor_ops/pow/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/pow/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/pow/webgpu_kernel.rs new file mode 100644 index 00000000..0cf6b43d --- /dev/null +++ b/dfdx-core/src/tensor_ops/pow/webgpu_kernel.rs @@ -0,0 +1,53 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel, E> for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::PowfKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::PowfKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} + +impl UnaryKernel for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::PowiKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::PowiKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/recip/mod.rs b/dfdx-core/src/tensor_ops/recip/mod.rs index 35922633..57b76b26 100644 --- a/dfdx-core/src/tensor_ops/recip/mod.rs +++ b/dfdx-core/src/tensor_ops/recip/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/recip/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/recip/webgpu_kernel.rs new file mode 100644 index 00000000..ca8fd312 --- /dev/null +++ b/dfdx-core/src/tensor_ops/recip/webgpu_kernel.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::RecipKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::RecipKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/relu/mod.rs b/dfdx-core/src/tensor_ops/relu/mod.rs index 1b8dc8b1..31496368 100644 --- a/dfdx-core/src/tensor_ops/relu/mod.rs +++ b/dfdx-core/src/tensor_ops/relu/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/relu/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/relu/webgpu_kernel.rs new file mode 100644 index 00000000..6da7d6b9 --- /dev/null +++ b/dfdx-core/src/tensor_ops/relu/webgpu_kernel.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::ReLUKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::ReLUKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/reshape_to/mod.rs b/dfdx-core/src/tensor_ops/reshape_to/mod.rs index 2d32ef46..8778de14 100644 --- a/dfdx-core/src/tensor_ops/reshape_to/mod.rs +++ b/dfdx-core/src/tensor_ops/reshape_to/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use crate::{shapes::*, tensor::*}; pub trait ReshapeKernel: Storage { diff --git a/dfdx-core/src/tensor_ops/reshape_to/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/reshape_to/webgpu_kernel.rs new file mode 100644 index 00000000..87b12ddb --- /dev/null +++ b/dfdx-core/src/tensor_ops/reshape_to/webgpu_kernel.rs @@ -0,0 +1,21 @@ +use crate::prelude::{Dtype, Webgpu}; + +impl super::ReshapeKernel for Webgpu { + fn forward( + &self, + dst: &Dst, + inp: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + dst: &Dst, + inp: &crate::prelude::Tensor, + grad_inp: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/rmsprop/mod.rs b/dfdx-core/src/tensor_ops/rmsprop/mod.rs index 0ccddad3..cc031546 100644 --- a/dfdx-core/src/tensor_ops/rmsprop/mod.rs +++ b/dfdx-core/src/tensor_ops/rmsprop/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use crate::{ shapes::{Dtype, Shape}, tensor::*, diff --git a/dfdx-core/src/tensor_ops/rmsprop/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/rmsprop/webgpu_kernel.rs new file mode 100644 index 00000000..215adc18 --- /dev/null +++ b/dfdx-core/src/tensor_ops/rmsprop/webgpu_kernel.rs @@ -0,0 +1,15 @@ +use crate::prelude::{Dtype, Webgpu}; + +impl super::RMSpropKernel for Webgpu { + fn rmsprop_kernel( + &self, + cfg: &crate::prelude::RMSpropConfig, + param: &mut Self::Vec, + momentum: &mut Self::Vec, + square_avg: &mut Self::Vec, + grad_avg: &mut Self::Vec, + grad: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/roll/mod.rs b/dfdx-core/src/tensor_ops/roll/mod.rs index 40a3ac5e..b0f1237d 100644 --- a/dfdx-core/src/tensor_ops/roll/mod.rs +++ b/dfdx-core/src/tensor_ops/roll/mod.rs @@ -7,6 +7,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + #[repr(C)] #[derive(Copy, Clone, Debug)] pub struct RollOp { diff --git a/dfdx-core/src/tensor_ops/roll/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/roll/webgpu_kernel.rs new file mode 100644 index 00000000..c1e28871 --- /dev/null +++ b/dfdx-core/src/tensor_ops/roll/webgpu_kernel.rs @@ -0,0 +1,21 @@ +use crate::prelude::{Dtype, Webgpu}; + +impl super::RollKernel for Webgpu { + fn forward( + &self, + op: super::RollOp, + inp: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::RollOp, + inp: &crate::prelude::Tensor, + grad_inp: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/select_and_gather/mod.rs b/dfdx-core/src/tensor_ops/select_and_gather/mod.rs index 2a56f1ac..b5ccebb0 100644 --- a/dfdx-core/src/tensor_ops/select_and_gather/mod.rs +++ b/dfdx-core/src/tensor_ops/select_and_gather/mod.rs @@ -5,6 +5,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use crate::{shapes::*, tensor::*}; pub trait ReplaceDimKernel: Storage + Storage { diff --git a/dfdx-core/src/tensor_ops/select_and_gather/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/select_and_gather/webgpu_kernel.rs new file mode 100644 index 00000000..39d867dd --- /dev/null +++ b/dfdx-core/src/tensor_ops/select_and_gather/webgpu_kernel.rs @@ -0,0 +1,63 @@ +use crate::prelude::{Dtype, Webgpu}; + +impl super::ReplaceDimKernel for Webgpu { + fn forward( + &self, + inp: &crate::prelude::Tensor, + idx: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> + where + Src: crate::prelude::ReplaceDimTo, + { + todo!() + } + + fn backward< + Src: crate::prelude::Shape, + Dst: crate::prelude::Shape, + Idx: crate::prelude::Shape, + >( + &self, + inp: &crate::prelude::Tensor, + grad_inp: &mut >::Vec, + idx: &crate::prelude::Tensor, + out: &crate::prelude::Tensor, + grad_out: &>::Vec, + ) -> Result<(), crate::prelude::Error> + where + Src: crate::prelude::ReplaceDimTo, + { + todo!() + } +} + +impl super::RemoveDimKernel for Webgpu { + fn forward( + &self, + inp: &crate::prelude::Tensor, + idx: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> + where + Src: crate::prelude::RemoveDimTo, + { + todo!() + } + + fn backward< + Src: crate::prelude::Shape, + Dst: crate::prelude::Shape, + Idx: crate::prelude::Shape, + >( + &self, + inp: &crate::prelude::Tensor, + grad_inp: &mut >::Vec, + idx: &crate::prelude::Tensor, + out: &crate::prelude::Tensor, + grad_out: &>::Vec, + ) -> Result<(), crate::prelude::Error> + where + Src: crate::prelude::RemoveDimTo, + { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/sgd/mod.rs b/dfdx-core/src/tensor_ops/sgd/mod.rs index 16737b2c..3cc28c05 100644 --- a/dfdx-core/src/tensor_ops/sgd/mod.rs +++ b/dfdx-core/src/tensor_ops/sgd/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use crate::{ shapes::{Dtype, Shape}, tensor::{Error, Storage, Tensor}, diff --git a/dfdx-core/src/tensor_ops/sgd/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/sgd/webgpu_kernel.rs new file mode 100644 index 00000000..d5f5cee3 --- /dev/null +++ b/dfdx-core/src/tensor_ops/sgd/webgpu_kernel.rs @@ -0,0 +1,13 @@ +use crate::prelude::{Dtype, Webgpu}; + +impl super::SgdKernel for Webgpu { + fn sgd_kernel( + &self, + cfg: &crate::prelude::SgdConfig, + param: &mut Self::Vec, + velocity: &mut Self::Vec, + grad: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/sigmoid/mod.rs b/dfdx-core/src/tensor_ops/sigmoid/mod.rs index ff41f64f..d2fdfe5e 100644 --- a/dfdx-core/src/tensor_ops/sigmoid/mod.rs +++ b/dfdx-core/src/tensor_ops/sigmoid/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/sigmoid/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/sigmoid/webgpu_kernel.rs new file mode 100644 index 00000000..f6e5c742 --- /dev/null +++ b/dfdx-core/src/tensor_ops/sigmoid/webgpu_kernel.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::SigmoidKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::SigmoidKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/sin/mod.rs b/dfdx-core/src/tensor_ops/sin/mod.rs index 035da953..033e2cb3 100644 --- a/dfdx-core/src/tensor_ops/sin/mod.rs +++ b/dfdx-core/src/tensor_ops/sin/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/sin/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/sin/webgpu_kernel.rs new file mode 100644 index 00000000..024c1382 --- /dev/null +++ b/dfdx-core/src/tensor_ops/sin/webgpu_kernel.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::SinKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::SinKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/slice/mod.rs b/dfdx-core/src/tensor_ops/slice/mod.rs index db3a6854..c69511a5 100644 --- a/dfdx-core/src/tensor_ops/slice/mod.rs +++ b/dfdx-core/src/tensor_ops/slice/mod.rs @@ -4,6 +4,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + pub trait SliceKernel: Storage { fn forward, Slice>( &self, diff --git a/dfdx-core/src/tensor_ops/slice/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/slice/webgpu_kernel.rs new file mode 100644 index 00000000..73a5fe0c --- /dev/null +++ b/dfdx-core/src/tensor_ops/slice/webgpu_kernel.rs @@ -0,0 +1,21 @@ +use crate::prelude::{Dtype, Webgpu}; + +impl super::SliceKernel for Webgpu { + fn forward, Slice>( + &self, + inp: &crate::prelude::Tensor, + slice: &Slice, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward, Slice>( + &self, + inp: &crate::prelude::Tensor, + grad_inp: &mut Self::Vec, + grad_out: &Self::Vec, + slice: &Slice, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/sqrt/mod.rs b/dfdx-core/src/tensor_ops/sqrt/mod.rs index b4703946..4138348e 100644 --- a/dfdx-core/src/tensor_ops/sqrt/mod.rs +++ b/dfdx-core/src/tensor_ops/sqrt/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/sqrt/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/sqrt/webgpu_kernel.rs new file mode 100644 index 00000000..0701ee08 --- /dev/null +++ b/dfdx-core/src/tensor_ops/sqrt/webgpu_kernel.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::SqrtKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::SqrtKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/square/mod.rs b/dfdx-core/src/tensor_ops/square/mod.rs index ae9ae0da..e4d26c94 100644 --- a/dfdx-core/src/tensor_ops/square/mod.rs +++ b/dfdx-core/src/tensor_ops/square/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/square/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/square/webgpu_kernel.rs new file mode 100644 index 00000000..522eae17 --- /dev/null +++ b/dfdx-core/src/tensor_ops/square/webgpu_kernel.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::SquareKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::SquareKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/stack/mod.rs b/dfdx-core/src/tensor_ops/stack/mod.rs index f8e13d59..b30e7e98 100644 --- a/dfdx-core/src/tensor_ops/stack/mod.rs +++ b/dfdx-core/src/tensor_ops/stack/mod.rs @@ -6,6 +6,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + /// Stack an array or vec of tensors together along a new dimension. /// /// An array of tensors will be turned into a [Const] dim, and diff --git a/dfdx-core/src/tensor_ops/stack/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/stack/webgpu_kernel.rs new file mode 100644 index 00000000..113aae1a --- /dev/null +++ b/dfdx-core/src/tensor_ops/stack/webgpu_kernel.rs @@ -0,0 +1,23 @@ +use crate::{shapes::*, tensor::Webgpu}; +use std::vec::Vec; + +impl super::StackKernel for Webgpu { + fn forward( + &self, + num: Num, + inp: &[crate::prelude::Tensor], + ) -> Result, crate::prelude::Error> + where + S: crate::prelude::AddDim, + { + todo!() + } + + fn backward( + &self, + grad_inp: Vec<&mut Self::Vec>, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/sub/mod.rs b/dfdx-core/src/tensor_ops/sub/mod.rs index a7b82759..9c798cda 100644 --- a/dfdx-core/src/tensor_ops/sub/mod.rs +++ b/dfdx-core/src/tensor_ops/sub/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::*; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/sub/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/sub/webgpu_kernel.rs new file mode 100644 index 00000000..8d5e943e --- /dev/null +++ b/dfdx-core/src/tensor_ops/sub/webgpu_kernel.rs @@ -0,0 +1,56 @@ +use std::borrow::Cow; + +use crate::prelude::{ + ops::{BinaryKernel, UnaryKernel}, + Dtype, Webgpu, +}; + +impl UnaryKernel, E> for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = true; + + fn forward( + &self, + op: super::ScalarSubKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::ScalarSubKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} + +impl BinaryKernel for Webgpu { + const BACKWARD_WITHOUT_DATA: bool = true; + + fn forward( + &self, + op: super::BinarySubKernelOp, + lhs: Cow>, + rhs: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::BinarySubKernelOp, + lhs: &impl crate::prelude::Tensorlike, + grad_lhs: &mut Self::Vec, + rhs: &impl crate::prelude::Tensorlike, + grad_rhs: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/sum_to/mod.rs b/dfdx-core/src/tensor_ops/sum_to/mod.rs index e99c3710..ad149df7 100644 --- a/dfdx-core/src/tensor_ops/sum_to/mod.rs +++ b/dfdx-core/src/tensor_ops/sum_to/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use crate::{shapes::*, tensor::*}; pub trait SumKernel: Storage { diff --git a/dfdx-core/src/tensor_ops/sum_to/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/sum_to/webgpu_kernel.rs new file mode 100644 index 00000000..29247ea7 --- /dev/null +++ b/dfdx-core/src/tensor_ops/sum_to/webgpu_kernel.rs @@ -0,0 +1,27 @@ +use crate::prelude::{Dtype, Webgpu}; + +impl super::SumKernel for Webgpu { + fn forward( + &self, + dst: Dst, + inp: &crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> + where + Src: crate::prelude::ReduceShapeTo, + { + todo!() + } + + fn backward( + &self, + dst: Dst, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> + where + Src: crate::prelude::ReduceShapeTo, + { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/tanh/mod.rs b/dfdx-core/src/tensor_ops/tanh/mod.rs index 2c5b9606..65340a87 100644 --- a/dfdx-core/src/tensor_ops/tanh/mod.rs +++ b/dfdx-core/src/tensor_ops/tanh/mod.rs @@ -3,6 +3,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use super::ops::{try_unary_op, UnaryKernel}; use crate::{shapes::*, tensor::*}; diff --git a/dfdx-core/src/tensor_ops/tanh/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/tanh/webgpu_kernel.rs new file mode 100644 index 00000000..51e661b2 --- /dev/null +++ b/dfdx-core/src/tensor_ops/tanh/webgpu_kernel.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu}; + +impl UnaryKernel for Webgpu { + const BACKWARD_WITHOUT_INP: bool = false; + + const BACKWARD_WITHOUT_DATA: bool = false; + + fn forward( + &self, + op: super::TanhKernelOp, + inp: Cow>, + ) -> Result, crate::prelude::Error> { + todo!() + } + + fn backward( + &self, + op: super::TanhKernelOp, + inp: &impl crate::prelude::Tensorlike, + grad_inp: &mut Self::Vec, + out: &impl crate::prelude::Tensorlike, + grad_out: &Self::Vec, + ) -> Result<(), crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/to_dtype/mod.rs b/dfdx-core/src/tensor_ops/to_dtype/mod.rs index 5f0b7c99..08f69a7f 100644 --- a/dfdx-core/src/tensor_ops/to_dtype/mod.rs +++ b/dfdx-core/src/tensor_ops/to_dtype/mod.rs @@ -2,6 +2,9 @@ mod cpu_kernel; #[cfg(feature = "cuda")] mod cuda_kernel; +#[cfg(feature = "webgpu")] +mod webgpu_kernel; + use crate::prelude::{Error, Shape, Storage, Tensor, Unit}; pub trait ToDtypeKernel: Storage + Storage { diff --git a/dfdx-core/src/tensor_ops/to_dtype/webgpu_kernel.rs b/dfdx-core/src/tensor_ops/to_dtype/webgpu_kernel.rs new file mode 100644 index 00000000..111b930e --- /dev/null +++ b/dfdx-core/src/tensor_ops/to_dtype/webgpu_kernel.rs @@ -0,0 +1,9 @@ +use crate::prelude::{Unit, Webgpu}; + +impl super::ToDtypeKernel for Webgpu { + fn forward( + inp: crate::prelude::Tensor, + ) -> Result, crate::prelude::Error> { + todo!() + } +} diff --git a/dfdx-core/src/tensor_ops/utilities/device.rs b/dfdx-core/src/tensor_ops/utilities/device.rs index 2504185f..277be7a6 100644 --- a/dfdx-core/src/tensor_ops/utilities/device.rs +++ b/dfdx-core/src/tensor_ops/utilities/device.rs @@ -130,3 +130,12 @@ impl Device> for crate::tensor::Cuda {} impl Device for crate::tensor::Cuda {} #[cfg(feature = "cuda")] impl Device for crate::tensor::Cuda {} + +#[cfg(all(feature = "webgpu", feature = "f16"))] +impl Device for crate::tensor::Webgpu {} +#[cfg(all(feature = "webgpu", feature = "f16"))] +impl Device> for crate::tensor::Webgpu {} +#[cfg(feature = "webgpu")] +impl Device for crate::tensor::Webgpu {} +#[cfg(feature = "webgpu")] +impl Device for crate::tensor::Webgpu {} diff --git a/dfdx/Cargo.toml b/dfdx/Cargo.toml index a79e81c4..73791a45 100644 --- a/dfdx/Cargo.toml +++ b/dfdx/Cargo.toml @@ -49,6 +49,7 @@ fast-alloc = ["dfdx-core/fast-alloc"] cuda = ["dfdx-core/cuda"] cudnn = ["dfdx-core/cudnn"] +webgpu = ["dfdx-core/webgpu"] f16 = ["dfdx-core/f16"]