Skip to content

Commit

Permalink
Finished up with template tensor_ops
Browse files Browse the repository at this point in the history
  • Loading branch information
favilo committed Nov 27, 2023
1 parent 20925cf commit cadc2d2
Show file tree
Hide file tree
Showing 57 changed files with 906 additions and 16 deletions.
16 changes: 8 additions & 8 deletions dfdx-core/src/tensor/webgpu/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ impl core::ops::Deref for Buffer {
}

impl Buffer {
pub(crate) fn size(&self) -> u64 {
self.size as u64
pub(crate) fn size(&self) -> usize {
self.size
}

#[allow(unused)]
Expand All @@ -57,16 +57,16 @@ impl Buffer {
let (sender, receiver) = std::sync::mpsc::channel();
let buffer = dev.create_buffer(&BufferDescriptor {
label: None,
size: self.size(),
size: self.size() as u64,
usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST,
mapped_at_creation: false,
});
{
let mut encoder = dev.create_command_encoder(&Default::default());
encoder.copy_buffer_to_buffer(&self.data, 0, &buffer, 0, self.size());
encoder.copy_buffer_to_buffer(&self.data, 0, &buffer, 0, self.size() as u64);
queue.submit(Some(encoder.finish()));
}
let slice = buffer.slice(..self.size());
let slice = buffer.slice(..self.size() as u64);
slice.map_async(wgpu::MapMode::Read, move |_| {
sender.send(()).unwrap();
});
Expand Down Expand Up @@ -190,11 +190,11 @@ impl<E> Clone for CachableBuffer<E> {
let mut encoder = self.dev.create_command_encoder(&Default::default());
let bfr = self.dev.create_buffer(&BufferDescriptor {
label: None,
size: round_to_buffer_alignment(self.data.size()),
size: round_to_buffer_alignment(self.data.size() as u64),
usage: BufferUsages::COPY_SRC | BufferUsages::COPY_DST,
mapped_at_creation: false,
});
encoder.copy_buffer_to_buffer(&self.data, 0, &bfr, 0, self.data.size());
encoder.copy_buffer_to_buffer(&self.data, 0, &bfr, 0, self.data.size() as u64);
(
encoder,
Buffer {
Expand All @@ -205,7 +205,7 @@ impl<E> Clone for CachableBuffer<E> {
},
|bfr| {
let mut encoder = self.dev.create_command_encoder(&Default::default());
encoder.copy_buffer_to_buffer(&self.data, 0, &bfr, 0, self.data.size());
encoder.copy_buffer_to_buffer(&self.data, 0, &bfr, 0, self.data.size() as u64);
(encoder, bfr)
},
);
Expand Down
3 changes: 3 additions & 0 deletions dfdx-core/src/tensor_ops/abs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ mod cpu_kernel;
#[cfg(feature = "cuda")]
mod cuda_kernel;

#[cfg(feature = "webgpu")]
mod webgpu_kernel;

use super::ops::{try_unary_op, UnaryKernel};
use crate::{shapes::*, tensor::*};

Expand Down
28 changes: 28 additions & 0 deletions dfdx-core/src/tensor_ops/abs/webgpu_kernel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
use std::borrow::Cow;

use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu};

impl<E: Dtype> UnaryKernel<super::AbsKernelOp, E> for Webgpu {
const BACKWARD_WITHOUT_INP: bool = false;

const BACKWARD_WITHOUT_DATA: bool = false;

fn forward<S: crate::prelude::Shape>(
&self,
op: super::AbsKernelOp,
inp: Cow<crate::prelude::Tensor<S, E, Self>>,
) -> Result<crate::prelude::Tensor<S, E, Self>, crate::prelude::Error> {
todo!()
}

fn backward<S: crate::prelude::Shape>(
&self,
op: super::AbsKernelOp,
inp: &impl crate::prelude::Tensorlike<S, E, Self>,
grad_inp: &mut Self::Vec,
out: &impl crate::prelude::Tensorlike<S, E, Self>,
grad_out: &Self::Vec,
) -> Result<(), crate::prelude::Error> {
todo!()
}
}
3 changes: 3 additions & 0 deletions dfdx-core/src/tensor_ops/accurate_gelu/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ mod cpu_kernel;
#[cfg(feature = "cuda")]
mod cuda_kernel;

#[cfg(feature = "webgpu")]
mod webgpu_kernel;

use super::ops::{try_unary_op, UnaryKernel};
use crate::{shapes::*, tensor::*};

Expand Down
28 changes: 28 additions & 0 deletions dfdx-core/src/tensor_ops/accurate_gelu/webgpu_kernel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
use std::borrow::Cow;

use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu};

impl<E: Dtype> UnaryKernel<super::AccurateGeLUKernelOp, E> for Webgpu {
const BACKWARD_WITHOUT_INP: bool = false;

const BACKWARD_WITHOUT_DATA: bool = false;

fn forward<S: crate::prelude::Shape>(
&self,
op: super::AccurateGeLUKernelOp,
inp: Cow<crate::prelude::Tensor<S, E, Self>>,
) -> Result<crate::prelude::Tensor<S, E, Self>, crate::prelude::Error> {
todo!()
}

fn backward<S: crate::prelude::Shape>(
&self,
op: super::AccurateGeLUKernelOp,
inp: &impl crate::prelude::Tensorlike<S, E, Self>,
grad_inp: &mut Self::Vec,
out: &impl crate::prelude::Tensorlike<S, E, Self>,
grad_out: &Self::Vec,
) -> Result<(), crate::prelude::Error> {
todo!()
}
}
3 changes: 1 addition & 2 deletions dfdx-core/src/tensor_ops/add/webgpu_kernel.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
extern crate alloc;
use alloc::borrow::Cow;
use std::borrow::Cow;

use crate::prelude::{
ops::{BinaryKernel, UnaryKernel},
Expand Down
3 changes: 3 additions & 0 deletions dfdx-core/src/tensor_ops/axpy/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ mod cpu_kernel;
#[cfg(feature = "cuda")]
mod cuda_kernel;

#[cfg(feature = "webgpu")]
mod webgpu_kernel;

/// Elementwise `a * alpha + b * beta`.
///
/// See [Tensor::axpy] for in place version.
Expand Down
13 changes: 13 additions & 0 deletions dfdx-core/src/tensor_ops/axpy/webgpu_kernel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
use crate::prelude::{Dtype, Webgpu};

impl<E: Dtype> super::AxpyKernel<E> for Webgpu {
fn forward(
&self,
a: &mut Self::Vec,
alpha: E,
b: &Self::Vec,
beta: E,
) -> Result<(), crate::prelude::Error> {
todo!()
}
}
3 changes: 3 additions & 0 deletions dfdx-core/src/tensor_ops/bce/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ mod cpu_kernel;
#[cfg(feature = "cuda")]
mod cuda_kernel;

#[cfg(feature = "webgpu")]
mod webgpu_kernel;

use super::ops::{try_binary_op, BinaryKernel};
use crate::{shapes::*, tensor::*};

Expand Down
27 changes: 27 additions & 0 deletions dfdx-core/src/tensor_ops/bce/webgpu_kernel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
use crate::prelude::{ops::BinaryKernel, Dtype, Webgpu};
use std::borrow::Cow;

impl<E: Dtype> BinaryKernel<super::BCEKernelOp, E> for Webgpu {
const BACKWARD_WITHOUT_DATA: bool = false;

fn forward<S: crate::prelude::Shape>(
&self,
op: super::BCEKernelOp,
lhs: Cow<crate::prelude::Tensor<S, E, Self>>,
rhs: Cow<crate::prelude::Tensor<S, E, Self>>,
) -> Result<crate::prelude::Tensor<S, E, Self>, crate::prelude::Error> {
todo!()
}

fn backward<S: crate::prelude::Shape>(
&self,
op: super::BCEKernelOp,
lhs: &impl crate::prelude::Tensorlike<S, E, Self>,
grad_lhs: &mut Self::Vec,
rhs: &impl crate::prelude::Tensorlike<S, E, Self>,
grad_rhs: &mut Self::Vec,
grad_out: &Self::Vec,
) -> Result<(), crate::prelude::Error> {
todo!()
}
}
3 changes: 3 additions & 0 deletions dfdx-core/src/tensor_ops/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ mod cpu_kernels;
#[cfg(feature = "cuda")]
mod cuda_kernels;

#[cfg(feature = "webgpu")]
mod webgpu_kernels;

use crate::{
prelude::{OnesTensor, Tensor, ZerosTensor},
shapes::*,
Expand Down
34 changes: 34 additions & 0 deletions dfdx-core/src/tensor_ops/boolean/webgpu_kernels.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
use crate::prelude::Webgpu;

impl super::BooleanKernel for Webgpu {
fn not<S: crate::prelude::Shape>(
&self,
inp: &crate::prelude::Tensor<S, bool, Self>,
) -> Result<crate::prelude::Tensor<S, bool, Self>, crate::prelude::Error> {
todo!()
}

fn and<S: crate::prelude::Shape>(
&self,
lhs: &crate::prelude::Tensor<S, bool, Self>,
rhs: &crate::prelude::Tensor<S, bool, Self>,
) -> Result<crate::prelude::Tensor<S, bool, Self>, crate::prelude::Error> {
todo!()
}

fn or<S: crate::prelude::Shape>(
&self,
lhs: &crate::prelude::Tensor<S, bool, Self>,
rhs: &crate::prelude::Tensor<S, bool, Self>,
) -> Result<crate::prelude::Tensor<S, bool, Self>, crate::prelude::Error> {
todo!()
}

fn xor<S: crate::prelude::Shape>(
&self,
lhs: &crate::prelude::Tensor<S, bool, Self>,
rhs: &crate::prelude::Tensor<S, bool, Self>,
) -> Result<crate::prelude::Tensor<S, bool, Self>, crate::prelude::Error> {
todo!()
}
}
3 changes: 3 additions & 0 deletions dfdx-core/src/tensor_ops/clamp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ mod cpu_kernel;
#[cfg(feature = "cuda")]
mod cuda_kernel;

#[cfg(feature = "webgpu")]
mod webgpu_kernel;

use super::ops::{try_unary_op, UnaryKernel};
use crate::{shapes::*, tensor::*};

Expand Down
28 changes: 28 additions & 0 deletions dfdx-core/src/tensor_ops/clamp/webgpu_kernel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
use std::borrow::Cow;

use crate::prelude::{ops::UnaryKernel, Dtype, Webgpu};

impl<E: Dtype> UnaryKernel<super::ClampKernelOp<E>, E> for Webgpu {
const BACKWARD_WITHOUT_INP: bool = false;

const BACKWARD_WITHOUT_DATA: bool = false;

fn forward<S: crate::prelude::Shape>(
&self,
op: super::ClampKernelOp<E>,
inp: Cow<crate::prelude::Tensor<S, E, Self>>,
) -> Result<crate::prelude::Tensor<S, E, Self>, crate::prelude::Error> {
todo!()
}

fn backward<S: crate::prelude::Shape>(
&self,
op: super::ClampKernelOp<E>,
inp: &impl crate::prelude::Tensorlike<S, E, Self>,
grad_inp: &mut Self::Vec,
out: &impl crate::prelude::Tensorlike<S, E, Self>,
grad_out: &Self::Vec,
) -> Result<(), crate::prelude::Error> {
todo!()
}
}
3 changes: 3 additions & 0 deletions dfdx-core/src/tensor_ops/cmp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ mod cpu_kernels;
#[cfg(feature = "cuda")]
mod cuda_kernels;

#[cfg(feature = "webgpu")]
mod webgpu_kernels;

pub trait CmpKernel<Op, E>: Storage<E> + Storage<bool> {
fn forward<S: Shape, T>(
&self,
Expand Down
Loading

0 comments on commit cadc2d2

Please sign in to comment.