From ffa8cfaa9f51ce5b40a63a46ad5af927a563d7b8 Mon Sep 17 00:00:00 2001
From: Vassil Keremidchiev <varosi@gmail.com>
Date: Mon, 29 May 2017 15:42:53 +0300
Subject: [PATCH 01/11] Stack LTS-8.15 (GHC 8.0.2) support

---
 stack.yaml | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 stack.yaml

diff --git a/stack.yaml b/stack.yaml
new file mode 100644
index 00000000..62d2cb8a
--- /dev/null
+++ b/stack.yaml
@@ -0,0 +1,76 @@
+# This file was automatically generated by 'stack init'
+#
+# Some commonly used options have been documented as comments in this file.
+# For advanced use and comprehensive documentation of the format, please see:
+# http://docs.haskellstack.org/en/stable/yaml_configuration/
+
+# A warning or info to be displayed to the user on config load.
+user-message: ! 'Warning (added by new or init): Specified resolver could not satisfy
+  all dependencies. Some external packages have been added as dependencies.
+
+  You can suppress this message by removing it from stack.yaml
+
+'
+
+# Resolver to choose a 'specific' stackage snapshot or a compiler version.
+# A snapshot resolver dictates the compiler version and the set of packages
+# to be used for project dependencies. For example:
+#
+# resolver: lts-3.5
+# resolver: nightly-2015-09-21
+# resolver: ghc-7.10.2
+# resolver: ghcjs-0.1.0_ghc-7.10.2
+# resolver:
+#  name: custom-snapshot
+#  location: "./custom-snapshot.yaml"
+resolver: lts-8.15
+
+# User packages to be built.
+# Various formats can be used as shown in the example below.
+#
+# packages:
+# - some-directory
+# - https://example.com/foo/bar/baz-0.0.2.tar.gz
+# - location:
+#    git: https://github.com/commercialhaskell/stack.git
+#    commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
+# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a
+#   extra-dep: true
+#  subdirs:
+#  - auto-update
+#  - wai
+#
+# A package marked 'extra-dep: true' will only be built if demanded by a
+# non-dependency (i.e. a user package), and its test suites and benchmarks
+# will not be run. This is useful for tweaking upstream packages.
+packages:
+- '.'
+- examples
+# Dependency packages to be pulled from upstream that are not in the resolver
+# (e.g., acme-missiles-0.3)
+extra-deps:
+- hedgehog-0.1
+
+# Override default flag values for local packages and extra-deps
+flags: {}
+
+# Extra package databases containing global packages
+extra-package-dbs: []
+
+# Control whether we use the GHC we find on the path
+# system-ghc: true
+#
+# Require a specific version of stack, using version ranges
+# require-stack-version: -any # Default
+# require-stack-version: ">=1.4"
+#
+# Override the architecture used by stack, especially useful on Windows
+# arch: i386
+# arch: x86_64
+#
+# Extra directories used by stack for building
+# extra-include-dirs: [/path/to/dir]
+# extra-lib-dirs: [/path/to/dir]
+#
+# Allow a newer minor version of GHC than the snapshot specifies
+# compiler-check: newer-minor
\ No newline at end of file

From 4c673474a6cffe43e19bc10b5ab2b032bdce80a2 Mon Sep 17 00:00:00 2001
From: Vassil Keremidchiev <varosi@gmail.com>
Date: Mon, 29 May 2017 15:48:16 +0300
Subject: [PATCH 02/11] Added Windows build instructions using Stack tool

---
 README.md | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/README.md b/README.md
index 4243345a..403e142d 100644
--- a/README.md
+++ b/README.md
@@ -160,6 +160,31 @@ and the tests run using:
 
 Grenade builds with ghc 7.10 and 8.0.
 
+Windows build
+-------------
+
+This recipe is for Stack 1.4.0 - tested and working.
+
+1) 
+
+	> stack setup
+
+2) Download and unzip somewhere OpenBLAS http://www.openblas.net/
+
+3) In MSYS2 console of Stack, i.e.: C:\Users\{User}\AppData\Local\Programs\stack\x86_64-windows\msys2-{version}\msys2_shell.bat
+
+    > cd /.../OpenBLAS
+    > pacman -Sy
+    > pacman -S make perl gcc-fortran
+    > make clean
+    > make
+    > make install
+
+3) Then in normal Windows console (fill in user name, versions and check if paths are different on your machine):
+
+    > stack install --flag hmatrix:openblas --extra-include-dirs=C:\Users\{User}\AppData\Local\Programs\stack\x86_64-windows\msys2-20150512\opt\OpenBLAS\include --extra-lib-dirs=C:\Users\{User}\AppData\Local\Programs\stack\x86_64-windows\msys2-20150512\opt\OpenBLAS\bin --extra-lib-dirs=C:\Users\{User}\AppData\Local\Programs\stack\x86_64-windows\msys2-20150512\usr\lib\gcc\x86_64-pc-msys\6.3.0\
+
+
 Thanks
 ------
 Writing a library like this has been on my mind for a while now, but a big shout

From 1210c178086ff562087ede9d5b66bd03b5574517 Mon Sep 17 00:00:00 2001
From: Calen Pennington <cale@edx.org>
Date: Wed, 9 Aug 2017 08:56:34 -0400
Subject: [PATCH 03/11] Update hedgehog version in stack.yaml, and add a
 section on stack to the README

---
 README.md  | 28 +++++++++++++++++++++++++++-
 stack.yaml |  2 +-
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 403e142d..8fe79726 100644
--- a/README.md
+++ b/README.md
@@ -143,6 +143,10 @@ elu, tanh, and fully connected.
 
 Build Instructions
 ------------------
+
+Mafia
+~~~~~
+
 Grenade is most easily built with the [mafia](https://github.com/ambiata/mafia)
 script that is located in the repository. You will also need the `lapack` and
 `blas` libraries and development tools. Once you have all that, Grenade can be
@@ -160,12 +164,34 @@ and the tests run using:
 
 Grenade builds with ghc 7.10 and 8.0.
 
+Stack
+~~~~~
+
+Grenade also supports [stack](https://docs.haskellstack.org). You can build
+the whole project with
+
+```
+stack build
+```
+
+and run the tests using:
+
+```
+stack test grenade
+```
+
+and run the benchmarkes using:
+
+```
+stack bench grenade
+```
+
 Windows build
 -------------
 
 This recipe is for Stack 1.4.0 - tested and working.
 
-1) 
+1)
 
 	> stack setup
 
diff --git a/stack.yaml b/stack.yaml
index 62d2cb8a..c2b610da 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -49,7 +49,7 @@ packages:
 # Dependency packages to be pulled from upstream that are not in the resolver
 # (e.g., acme-missiles-0.3)
 extra-deps:
-- hedgehog-0.1
+- hedgehog-0.4.1
 
 # Override default flag values for local packages and extra-deps
 flags: {}

From ae14bc25184ef967182d8cd2fa9fb165d6aad077 Mon Sep 17 00:00:00 2001
From: Calen Pennington <cale@edx.org>
Date: Wed, 9 Aug 2017 09:01:35 -0400
Subject: [PATCH 04/11] Update the README file to use a more nesting-amenable
 header format

---
 README.md | 39 +++++++++++++--------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/README.md b/README.md
index 8fe79726..fe42c5a8 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,4 @@
-Grenade
-=======
+# Grenade
 
 [![Build Status](https://api.travis-ci.org/HuwCampbell/grenade.svg?branch=master)](https://travis-ci.org/HuwCampbell/grenade)
 [![Hackage page (downloads and API reference)][hackage-png]][hackage]
@@ -47,8 +46,7 @@ type Shakespeare
     '[ 'D1 40, 'D1 80, 'D1 40, 'D1 40, 'D1 40 ]
 ```
 
-Design
-------
+## Design
 
 Networks in Grenade can be thought of as a heterogeneous lists of layers, where
 their type includes not only the layers of the network, but also the shapes of
@@ -78,8 +76,7 @@ outputs a three dimensional (`D3`) 24x24x10 image. The last item in the list is
 one dimensional (`D1`) with 10 values, representing the categories of the MNIST
 data.
 
-Usage
------
+## Usage
 
 To perform back propagation, one can call the eponymous function
 ```haskell
@@ -102,8 +99,7 @@ easy in downstream code. If the shapes of a network are not specified correctly
 and a layer can not sensibly perform the operation between two shapes, then
 it will result in a compile time error.
 
-Composition
------------
+## Composition
 
 Networks and Layers in Grenade are easily composed at the type level. As a `Network`
 is an instance of `Layer`, one can use a trained Network as a small component in a
@@ -125,27 +121,23 @@ See the [MNIST](https://github.com/HuwCampbell/grenade/blob/master/examples/main
 example, which has been overengineered to contain both residual style learning as well
 as inception style convolutions.
 
-Generative Adversarial Networks
--------------------------------
+## Generative Adversarial Networks
 
 As Grenade is purely functional, one can compose its training functions in flexible
 ways. [GAN-MNIST](https://github.com/HuwCampbell/grenade/blob/master/examples/main/gan-mnist.hs)
 example displays an interesting, type safe way of writing a generative adversarial
 training function in 10 lines of code.
 
-Layer Zoo
----------
+## Layer Zoo
 
 Grenade layers are normal haskell data types which are an instance of `Layer`, so
 it's easy to build one's own downstream code. We do however provide a decent set
 of layers, including convolution, deconvolution, pooling, pad, crop, logit, relu,
 elu, tanh, and fully connected.
 
-Build Instructions
-------------------
+## Build Instructions
 
-Mafia
-~~~~~
+### Mafia
 
 Grenade is most easily built with the [mafia](https://github.com/ambiata/mafia)
 script that is located in the repository. You will also need the `lapack` and
@@ -164,8 +156,7 @@ and the tests run using:
 
 Grenade builds with ghc 7.10 and 8.0.
 
-Stack
-~~~~~
+### Stack
 
 Grenade also supports [stack](https://docs.haskellstack.org). You can build
 the whole project with
@@ -186,8 +177,7 @@ and run the benchmarkes using:
 stack bench grenade
 ```
 
-Windows build
--------------
+## Windows build
 
 This recipe is for Stack 1.4.0 - tested and working.
 
@@ -211,16 +201,14 @@ This recipe is for Stack 1.4.0 - tested and working.
     > stack install --flag hmatrix:openblas --extra-include-dirs=C:\Users\{User}\AppData\Local\Programs\stack\x86_64-windows\msys2-20150512\opt\OpenBLAS\include --extra-lib-dirs=C:\Users\{User}\AppData\Local\Programs\stack\x86_64-windows\msys2-20150512\opt\OpenBLAS\bin --extra-lib-dirs=C:\Users\{User}\AppData\Local\Programs\stack\x86_64-windows\msys2-20150512\usr\lib\gcc\x86_64-pc-msys\6.3.0\
 
 
-Thanks
-------
+## Thanks
 Writing a library like this has been on my mind for a while now, but a big shout
 out must go to [Justin Le](https://github.com/mstksg), whose
 [dependently typed fully connected network](https://blog.jle.im/entry/practical-dependent-types-in-haskell-1.html)
 inspired me to get cracking, gave many ideas for the type level tools I
 needed, and was a great starting point for writing this library.
 
-Performance
------------
+## Performance
 Grenade is backed by hmatrix, BLAS, and LAPACK, with critical functions optimised
 in C. Using the im2col trick popularised by Caffe, it should be sufficient for
 many problems.
@@ -232,8 +220,7 @@ threaded.
 Training 15 generations over Kaggle's 41000 sample MNIST training set on a single
 core took around 12 minutes, achieving 1.5% error rate on a 1000 sample holdout set.
 
-Contributing
-------------
+## Contributing
 Contributions are welcome.
 
  [hackage]: http://hackage.haskell.org/package/grenade

From 266dc31103dbb01d3736843bc8d9011ad250a632 Mon Sep 17 00:00:00 2001
From: Calen Pennington <cale@edx.org>
Date: Fri, 11 Aug 2017 22:15:21 -0400
Subject: [PATCH 05/11] Implement im2col and col2im in Accelerate, with tests
 and benchmarks

---
 bench/bench.hs                                | 74 ++++++++++-----
 grenade.cabal                                 |  9 ++
 src/Grenade/Accelerate.hs                     |  1 +
 .../Layers/Internal/Convolution/Accelerate.hs | 89 +++++++++++++++++++
 .../Layers/Internal/Update/Accelerate.hs      | 24 +++++
 stack.yaml                                    |  7 ++
 .../Layers/Internal/Convolution/Accelerate.hs | 89 +++++++++++++++++++
 test/test.hs                                  |  2 +
 8 files changed, 273 insertions(+), 22 deletions(-)
 create mode 100644 src/Grenade/Accelerate.hs
 create mode 100644 src/Grenade/Layers/Internal/Convolution/Accelerate.hs
 create mode 100644 src/Grenade/Layers/Internal/Update/Accelerate.hs
 create mode 100644 test/Test/Grenade/Layers/Internal/Convolution/Accelerate.hs

diff --git a/bench/bench.hs b/bench/bench.hs
index 6d1a5e1a..cf9787c9 100644
--- a/bench/bench.hs
+++ b/bench/bench.hs
@@ -3,9 +3,18 @@
 import Criterion.Main
 
 import           Grenade
+import           Grenade.Accelerate as GA
 
 import           Grenade.Layers.Internal.Convolution
+import qualified Grenade.Layers.Internal.Convolution.Accelerate as CA
 import           Grenade.Layers.Internal.Pooling
+import qualified Grenade.Layers.Internal.Pooling.Accelerate as PA
+
+import qualified Data.Array.Accelerate as A
+import           Data.Array.Accelerate (Z(..), (:.)(..))
+import           Data.Array.Accelerate.Interpreter as I
+import           Data.Array.Accelerate.LLVM.Native as LN
+--import           Data.Array.Accelerate.LLVM.PTX as LP
 
 import           Numeric.LinearAlgebra
 
@@ -14,28 +23,49 @@ main = do
   x    :: S ('D2 60 60  )  <- randomOfShape
   y    :: S ('D3 60 60 1)  <- randomOfShape
 
-  defaultMain [
-      bgroup "im2col" [ bench "im2col 3x4"     $ whnf (im2col 2 2 1 1)   ((3><4) [1..])
-                      , bench "im2col 28x28"   $ whnf (im2col 5 5 1 1)   ((28><28) [1..])
-                      , bench "im2col 100x100" $ whnf (im2col 10 10 1 1) ((100><100) [1..])
-                      ]
-    , bgroup "col2im" [ bench "col2im 3x4"      $ whnf (col2im 2 2 1 1 3 4)       ((6><4) [1..])
-                      , bench "col2im 28x28"    $ whnf (col2im 5 5 1 1 28 28)     ((576><25) [1..])
-                      , bench "col2im 100x100"  $ whnf (col2im 10 10 1 1 100 100) ((8281><100) [1..])
-                      ]
-    , bgroup "poolfw" [ bench "poolforwards 3x4"      $ whnf (poolForward 1 3 4 2 2 1 1)     ((3><4) [1..])
-                      , bench "poolforwards 28x28"    $ whnf (poolForward 1 28 28 5 5 1 1)    ((28><28) [1..])
-                      , bench "poolforwards 100x100"  $ whnf (poolForward 1 100 100 10 10 1 1) ((100><100) [1..])
-                      ]
-    , bgroup "poolbw" [ bench "poolbackwards 3x4"      $ whnf (poolBackward 1 3 4 2 2 1 1   ((3><4) [1..]))     ((2><3) [1..])
-                      , bench "poolbackwards 28x28"    $ whnf (poolBackward 1 28 28 5 5 1 1   ((28><28) [1..]))   ((24><24) [1..])
-                      , bench "poolbackwards 100x100"  $ whnf (poolBackward 1 100 100 10 10 1 1 ((100><100) [1..])) ((91><91) [1..])
-                      ]
-    , bgroup "padcrop" [ bench "pad 2D 60x60"    $ whnf (testRun2D Pad) x
-                       , bench "pad 3D 60x60"    $ whnf (testRun3D Pad) y
-                       , bench "crop 2D 60x60"   $ whnf (testRun2D' Crop) x
-                       , bench "crop 3D 60x60"   $ whnf (testRun3D' Crop) y
-                       ]
+  defaultMain
+    [ bgroup "linear algebra"
+        [ bgroup "im2col"
+            [ bench "im2col 3x4"     $ nf (im2col 2 2 1 1)   ((3><4) [1..])
+            , bench "im2col 28x28"   $ nf (im2col 5 5 1 1)   ((28><28) [1..])
+            , bench "im2col 100x100" $ nf (im2col 10 10 1 1) ((100><100) [1..])
+            ]
+        , bgroup "col2im"
+            [ bench "col2im 3x4"      $ nf (col2im 2 2 1 1 3 4)       ((6><4) [1..])
+            , bench "col2im 28x28"    $ nf (col2im 5 5 1 1 28 28)     ((576><25) [1..])
+            , bench "col2im 100x100"  $ nf (col2im 10 10 1 1 100 100) ((8281><100) [1..])
+            ]
+        , bgroup "poolfw"
+            [ bench "poolforwards 3x4"      $ nf (poolForward 1 3 4 2 2 1 1)     ((3><4) [1..])
+            , bench "poolforwards 28x28"    $ nf (poolForward 1 28 28 5 5 1 1)    ((28><28) [1..])
+            , bench "poolforwards 100x100"  $ nf (poolForward 1 100 100 10 10 1 1) ((100><100) [1..])
+            ]
+        , bgroup "poolbw"
+            [ bench "poolbackwards 3x4"      $ nf (poolBackward 1 3 4 2 2 1 1   ((3><4) [1..]))     ((2><3) [1..])
+            , bench "poolbackwards 28x28"    $ nf (poolBackward 1 28 28 5 5 1 1   ((28><28) [1..]))   ((24><24) [1..])
+            , bench "poolbackwards 100x100"  $ nf (poolBackward 1 100 100 10 10 1 1 ((100><100) [1..])) ((91><91) [1..])
+            ]
+        , bgroup "padcrop"
+            [ bench "pad 2D 60x60"    $ nf (testRun2D Pad) x
+            , bench "pad 3D 60x60"    $ nf (testRun3D Pad) y
+            , bench "crop 2D 60x60"   $ nf (testRun2D' Crop) x
+            , bench "crop 3D 60x60"   $ nf (testRun3D' Crop) y
+            ]
+        ]
+    , bgroup "accelerate"
+        [ bgroup name
+            [ bgroup "im2col"
+                [ bench "im2col 3x4"     $ nf (run . CA.im2col 2 2 1 1)   (A.use $ A.fromList (Z :. 3 :. 4) [1..])
+                , bench "im2col 28x28"   $ nf (run . CA.im2col 5 5 1 1)   (A.use $ A.fromList (Z :. 28 :. 28) [1..])
+                , bench "im2col 100x100" $ nf (run . CA.im2col 10 10 1 1) (A.use $ A.fromList (Z :. 100 :. 100) [1..])
+                ]
+            ]
+        | (name, run) <-
+            [ ("interpreter", I.run)
+            , ("llvm-native", LN.run)
+            --, ("llvm-ptx", LP.run1)
+            ]
+        ]
     ]
 
 
diff --git a/grenade.cabal b/grenade.cabal
index 7cc8efce..4ca53808 100644
--- a/grenade.cabal
+++ b/grenade.cabal
@@ -50,6 +50,7 @@ library
                      , text                            == 1.2.*
                      , singletons                      >= 2.1         && < 2.4
                      , vector                          >= 0.11        && < 0.13
+                     , accelerate                      == 1.0.*
 
   ghc-options:
                        -Wall
@@ -62,6 +63,7 @@ library
 
   exposed-modules:
                        Grenade
+                       Grenade.Accelerate
                        Grenade.Core
                        Grenade.Core.Layer
                        Grenade.Core.LearningParameters
@@ -89,8 +91,10 @@ library
                        Grenade.Layers.Trivial
 
                        Grenade.Layers.Internal.Convolution
+                       Grenade.Layers.Internal.Convolution.Accelerate
                        Grenade.Layers.Internal.Pad
                        Grenade.Layers.Internal.Pooling
+                       Grenade.Layers.Internal.Pooling.Accelerate
                        Grenade.Layers.Internal.Update
 
                        Grenade.Recurrent
@@ -138,6 +142,7 @@ test-suite test
                        Test.Grenade.Layers.PadCrop
                        Test.Grenade.Layers.Pooling
                        Test.Grenade.Layers.Internal.Convolution
+                       Test.Grenade.Layers.Internal.Convolution.Accelerate
                        Test.Grenade.Layers.Internal.Pooling
                        Test.Grenade.Layers.Internal.Reference
 
@@ -163,6 +168,7 @@ test-suite test
                      , ad
                      , reflection
                      , vector
+                     , accelerate
 
 
 benchmark bench
@@ -181,6 +187,9 @@ benchmark bench
                      , criterion                       == 1.1.*
                      , grenade
                      , hmatrix
+                     , accelerate
+                     , accelerate-llvm-native
+                     --, accelerate-llvm-ptx
 
 benchmark bench-lstm
   type:                exitcode-stdio-1.0
diff --git a/src/Grenade/Accelerate.hs b/src/Grenade/Accelerate.hs
new file mode 100644
index 00000000..38eca2ff
--- /dev/null
+++ b/src/Grenade/Accelerate.hs
@@ -0,0 +1 @@
+module Grenade.Accelerate where
diff --git a/src/Grenade/Layers/Internal/Convolution/Accelerate.hs b/src/Grenade/Layers/Internal/Convolution/Accelerate.hs
new file mode 100644
index 00000000..3e9ec897
--- /dev/null
+++ b/src/Grenade/Layers/Internal/Convolution/Accelerate.hs
@@ -0,0 +1,89 @@
+{-# LANGUAGE TypeOperators #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+module Grenade.Layers.Internal.Convolution.Accelerate where
+
+import qualified Prelude as P
+import Data.Array.Accelerate
+
+type Matrix e = Array DIM2 e
+
+transpose :: Elt e => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
+transpose mat =
+  let swap = lift1 $ \(Z:.x:.y) -> Z:.y:.x :: Z :. Exp Int :. Exp Int
+  in  backpermute (swap $ shape mat) swap mat
+
+
+im2colShape :: (P.Num a, P.Integral a) => a -> a -> a -> a -> (Z :. a :. a) -> Z :. a :. a
+im2colShape kRs kCs sRs sCs (Z :. height :. width) =
+  let
+    rowOut = (height - kRs) `div` sRs + 1
+    colOut = (width - kCs) `div` sCs + 1
+    kernelSize = (kRs * kCs)
+    numPatches = rowOut * colOut
+  in Z :. numPatches :. kernelSize
+
+colIx2imIx
+  :: (P.Num a, P.Integral a)
+  => a -> a
+  -> a -> a
+  -> (Z :. a :. a)
+  -> (Z :. a :. a)
+  -> (Z :. a :. a)
+colIx2imIx kRs kCs sRs sCs (Z :. height :. width) (Z :. y' :. x') =
+  let
+    kX = x' `mod` kCs
+    kY = x' `div` kCs
+    rowOut = (height - kRs) `div` sRs + 1
+    colOut = (width - kCs) `div` sCs + 1
+    sX = y' `mod` colOut
+    sY = y' `div` colOut
+  in Z :. (sY * sRs + kY) :. (sX * sCs + kX)
+
+im2col :: Int -> Int -> Int -> Int -> Acc (Matrix Double) -> Acc (Matrix Double)
+im2col kernelRows kernelColumns strideRows strideColumns dataIm = dataCol
+  where
+    imSh :: Exp DIM2
+    imSh = shape dataIm
+
+    colSh :: Exp DIM2
+    colSh = lift1 (im2colShape (lift kernelRows) (lift kernelColumns) (lift strideRows) (lift strideColumns) :: Z :. Exp Int :. Exp Int -> Z :. Exp Int :. Exp Int) imSh
+
+    mapIxs :: Exp DIM2 -> Exp DIM2 -> Exp DIM2
+    mapIxs = lift2 $ colIx2imIx (lift kernelRows :: Exp Int) (lift kernelColumns) (lift strideRows) (lift strideColumns)
+
+    dataCol :: Acc (Matrix Double)
+    dataCol = backpermute colSh (mapIxs imSh) dataIm
+
+imIx2colIx
+  :: (P.Num a, P.Integral a)
+  => a
+  -> a
+  -> a
+  -> a
+  -> Z :. a :. a
+  -> Z :. a :. a
+  -> Z :. a :. a
+imIx2colIx kRs kCs sRs sCs (Z :. height :. width) (Z :. y :. x) =
+  let
+    rowOut = (height - kRs) `div` sRs + 1
+    colOut = (width - kCs) `div` sCs + 1
+    sX = P.min (x `div` sCs) (colOut - 1)
+    kX = x - sX * sCs
+    sY = P.min (y `div` sRs) (rowOut - 1)
+    kY = y - sY * sRs
+    x' = kY * kCs + kX
+    y' = sY * colOut + sX
+  in Z :. y' :. x'
+
+col2im :: Int -> Int -> Int -> Int -> Int -> Int -> Acc (Matrix Double) -> Acc (Matrix Double)
+col2im kernelRows kernelColumns strideRows strideColumns height width dataCol = dataIm
+  where
+    imSh :: DIM2
+    imSh = Z :. height :. width
+
+    mapIxs :: Exp DIM2 -> Exp DIM2 -> Exp DIM2
+    mapIxs = lift2 $ colIx2imIx (lift kernelRows :: Exp Int) (lift kernelColumns) (lift strideRows) (lift strideColumns)
+
+    dataIm :: Acc (Matrix Double)
+    dataIm = permute (+) (fill (constant imSh) 0) (mapIxs $ lift imSh) dataCol
diff --git a/src/Grenade/Layers/Internal/Update/Accelerate.hs b/src/Grenade/Layers/Internal/Update/Accelerate.hs
new file mode 100644
index 00000000..d9252326
--- /dev/null
+++ b/src/Grenade/Layers/Internal/Update/Accelerate.hs
@@ -0,0 +1,24 @@
+module Grenade.Layers.Internal.Update.Accelerate (
+    descend
+  ) where
+
+import qualified Prelude as P
+import Data.Array.Accelerate
+import Grenade.Core.LearningParameters
+
+descend
+  :: Shape sh
+  => AccelLearningParameters
+  -> Acc (Array sh Double)
+  -> Acc (Array sh Double)
+  -> Acc (Array sh Double)
+  -> Acc (Array sh Double, Array sh Double)
+descend params weights gradient lastUpdate =
+  let
+    rate = params !! 0
+    momentum = params !! 1
+    regulariser = params !! 2
+    outMomentum = zipWith (-) (map (momentum *) lastUpdate) (map (rate *) gradient)
+    outWeights = zipWith (-) (zipWith (*) weights outMomentum) (map ((rate * regulariser) *) weights)
+  in
+    lift (outWeights, outMomentum)
\ No newline at end of file
diff --git a/stack.yaml b/stack.yaml
index c2b610da..a3b35f66 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -49,7 +49,14 @@ packages:
 # Dependency packages to be pulled from upstream that are not in the resolver
 # (e.g., acme-missiles-0.3)
 extra-deps:
+- accelerate-1.0.0.0
+- accelerate-llvm-1.0.0.0
+- accelerate-llvm-native-1.0.0.0
+#- accelerate-llvm-ptx-1.0.0.1
+- chaselev-deque-0.5.0.5
+#- cuda-0.7.5.3
 - hedgehog-0.4.1
+- llvm-hs-4.0.1.0
 
 # Override default flag values for local packages and extra-deps
 flags: {}
diff --git a/test/Test/Grenade/Layers/Internal/Convolution/Accelerate.hs b/test/Test/Grenade/Layers/Internal/Convolution/Accelerate.hs
new file mode 100644
index 00000000..2ee9e074
--- /dev/null
+++ b/test/Test/Grenade/Layers/Internal/Convolution/Accelerate.hs
@@ -0,0 +1,89 @@
+{-# LANGUAGE TemplateHaskell     #-}
+{-# LANGUAGE DataKinds           #-}
+{-# LANGUAGE GADTs               #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE ConstraintKinds     #-}
+{-# LANGUAGE TypeOperators       #-}
+
+{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
+module Test.Grenade.Layers.Internal.Convolution.Accelerate where
+
+import           Grenade.Layers.Internal.Convolution.Accelerate
+import           Numeric.LinearAlgebra hiding (uniformSample, konst, (===))
+
+import           Hedgehog
+import qualified Hedgehog.Gen as Gen
+import qualified Hedgehog.Range as Range
+
+import qualified Test.Grenade.Layers.Internal.Reference as Reference
+import           Test.Hedgehog.Compat
+
+import           Data.Array.Accelerate.Interpreter
+import qualified Data.Array.Accelerate as A
+import           Data.Array.Accelerate (Z(..), (:.)(..))
+
+prop_im2col_col2im_indexes_symmetrical_with_kernel_stride =
+  let factors n = [x | x <- [1..n], n `mod` x == 0]
+  in  property $ do
+        height   <- forAll $ choose 2 100
+        width    <- forAll $ choose 2 100
+        kernel_h <- forAll $ (height `div`)    <$> Gen.element (factors height)
+        kernel_w <- forAll $ (width `div`)     <$> Gen.element (factors width)
+
+        let stride_h = kernel_h
+            stride_w = kernel_w
+            size = Z :. height :. width
+            _imIx2colIx = imIx2colIx kernel_h kernel_w stride_h stride_w size
+            _colIx2imIx = colIx2imIx kernel_h kernel_w stride_h stride_w size
+
+        x <- forAll $ choose 0 (width - 1)
+        y <- forAll $ choose 0 (height - 1)
+        let input = Z :. y :. x
+
+        let out      = (_colIx2imIx . _imIx2colIx) input
+        input === out
+
+prop_im2col_col2im_symmetrical_with_kernel_stride =
+  let factors n = [x | x <- [1..n], n `mod` x == 0]
+  in  property $ do
+        height   <- forAll $ choose 2 100
+        width    <- forAll $ choose 2 100
+        kernel_h <- forAll $ (height `div`)    <$> Gen.element (factors height)
+        kernel_w <- forAll $ (width `div`)     <$> Gen.element (factors width)
+        let imageShape = (Z :. height :. width)
+        input    <- forAll $ A.fromList imageShape <$> Gen.list (Range.singleton $ height * width) (Gen.realFloat $ Range.linearFracFrom 0 (-100) 100)
+
+        let stride_h = kernel_h
+            stride_w = kernel_w
+            _col2im = col2im kernel_h kernel_w stride_h stride_w height width
+            _im2col = im2col kernel_h kernel_w stride_h stride_w
+
+        let out      = run1 (_col2im . _im2col) input
+        input === out
+
+prop_im2col_col2im_behaves_as_reference =
+  let ok extent kernel = [stride | stride <- [1..extent], (extent - kernel) `mod` stride == 0]
+  in  property $ do
+        height   <- forAll $ choose 2 100
+        width    <- forAll $ choose 2 100
+        kernel_h <- forAll $ choose 2 (height - 1)
+        kernel_w <- forAll $ choose 2 (width - 1)
+        stride_h <- forAll $ Gen.element (ok height kernel_h)
+        stride_w <- forAll $ Gen.element (ok width kernel_w)
+        let contents = [0..]
+
+        let inputRef = (height >< width) contents
+            imageShape = (Z :. height :. width)
+            input    =  A.fromList imageShape contents
+
+            outFast       = run1 (im2col kernel_h kernel_w stride_h stride_w) input
+            retFast       = run1 (col2im kernel_h kernel_w stride_h stride_w height width) outFast
+
+            outReference  = Reference.im2col kernel_h kernel_w stride_h stride_w inputRef
+            retReference  = Reference.col2im kernel_h kernel_w stride_h stride_w height width outReference
+
+        (A.toList outFast) === (concat $ toLists outReference)
+        (A.toList retFast) === (concat $ toLists retReference)
+
+tests :: IO Bool
+tests = checkParallel $$(discover)
diff --git a/test/test.hs b/test/test.hs
index 5c725bb1..aabebffb 100644
--- a/test/test.hs
+++ b/test/test.hs
@@ -9,6 +9,7 @@ import qualified Test.Grenade.Layers.Nonlinear
 import qualified Test.Grenade.Layers.PadCrop
 
 import qualified Test.Grenade.Layers.Internal.Convolution
+import qualified Test.Grenade.Layers.Internal.Convolution.Accelerate
 import qualified Test.Grenade.Layers.Internal.Pooling
 
 import qualified Test.Grenade.Recurrent.Layers.LSTM
@@ -28,6 +29,7 @@ main =
     , Test.Grenade.Layers.PadCrop.tests
 
     , Test.Grenade.Layers.Internal.Convolution.tests
+    , Test.Grenade.Layers.Internal.Convolution.Accelerate.tests
     , Test.Grenade.Layers.Internal.Pooling.tests
 
     , Test.Grenade.Recurrent.Layers.LSTM.tests

From d5442aed77fef8fd6fd8ae0e89a23a9381422b1b Mon Sep 17 00:00:00 2001
From: Calen Pennington <cale@edx.org>
Date: Wed, 16 Aug 2017 09:35:39 -0400
Subject: [PATCH 06/11] Implement FullyConnected via Accelerate

---
 grenade.cabal                                 |   9 ++
 src/Grenade/Core/Accelerate.hs                |  15 +++
 src/Grenade/Core/Layer/Accelerate.hs          |  85 +++++++++++++
 .../Core/LearningParameters/Accelerate.hs     |  23 ++++
 src/Grenade/Core/Matrix/Accelerate.hs         |  49 ++++++++
 src/Grenade/Core/Shape/Accelerate.hs          |  25 ++++
 src/Grenade/Layers/FullyConnected.hs          |  47 +++++++-
 .../Layers/Internal/Update/Accelerate.hs      |  14 +--
 stack.yaml                                    |   3 +-
 .../Layers/FullyConnected/Accelerate.hs       | 113 ++++++++++++++++++
 test/Test/Hedgehog/Accelerate.hs              |  16 +++
 test/test.hs                                  |   3 +-
 12 files changed, 391 insertions(+), 11 deletions(-)
 create mode 100644 src/Grenade/Core/Accelerate.hs
 create mode 100644 src/Grenade/Core/Layer/Accelerate.hs
 create mode 100644 src/Grenade/Core/LearningParameters/Accelerate.hs
 create mode 100644 src/Grenade/Core/Matrix/Accelerate.hs
 create mode 100644 src/Grenade/Core/Shape/Accelerate.hs
 create mode 100644 test/Test/Grenade/Layers/FullyConnected/Accelerate.hs
 create mode 100644 test/Test/Hedgehog/Accelerate.hs

diff --git a/grenade.cabal b/grenade.cabal
index 4ca53808..6034c1ce 100644
--- a/grenade.cabal
+++ b/grenade.cabal
@@ -51,6 +51,7 @@ library
                      , singletons                      >= 2.1         && < 2.4
                      , vector                          >= 0.11        && < 0.13
                      , accelerate                      == 1.0.*
+                     , accelerate-io                   == 1.0.*
 
   ghc-options:
                        -Wall
@@ -65,11 +66,16 @@ library
                        Grenade
                        Grenade.Accelerate
                        Grenade.Core
+                       Grenade.Core.Accelerate
                        Grenade.Core.Layer
+                       Grenade.Core.Layer.Accelerate
                        Grenade.Core.LearningParameters
+                       Grenade.Core.LearningParameters.Accelerate
+                       Grenade.Core.Matrix.Accelerate
                        Grenade.Core.Network
                        Grenade.Core.Runner
                        Grenade.Core.Shape
+                       Grenade.Core.Shape.Accelerate
 
                        Grenade.Layers
                        Grenade.Layers.Concat
@@ -96,6 +102,7 @@ library
                        Grenade.Layers.Internal.Pooling
                        Grenade.Layers.Internal.Pooling.Accelerate
                        Grenade.Layers.Internal.Update
+                       Grenade.Layers.Internal.Update.Accelerate
 
                        Grenade.Recurrent
 
@@ -133,11 +140,13 @@ test-suite test
 
   other-modules:       Test.Hedgehog.Compat
                        Test.Hedgehog.Hmatrix
+                       Test.Hedgehog.Accelerate
                        Test.Hedgehog.TypeLits
 
                        Test.Grenade.Network
                        Test.Grenade.Layers.Convolution
                        Test.Grenade.Layers.FullyConnected
+                       Test.Grenade.Layers.FullyConnected.Accelerate
                        Test.Grenade.Layers.Nonlinear
                        Test.Grenade.Layers.PadCrop
                        Test.Grenade.Layers.Pooling
diff --git a/src/Grenade/Core/Accelerate.hs b/src/Grenade/Core/Accelerate.hs
new file mode 100644
index 00000000..bf23f7f7
--- /dev/null
+++ b/src/Grenade/Core/Accelerate.hs
@@ -0,0 +1,15 @@
+module Grenade.Core.Accelerate (
+    module Grenade.Core.Layer.Accelerate
+  , module Grenade.Core.LearningParameters
+  , module Grenade.Core.Network
+  , module Grenade.Core.Runner
+  , module Grenade.Core.Shape.Accelerate
+  , module Grenade.Core.Matrix.Accelerate
+  ) where
+
+import           Grenade.Core.Layer.Accelerate
+import           Grenade.Core.LearningParameters
+import           Grenade.Core.Network
+import           Grenade.Core.Runner
+import           Grenade.Core.Shape.Accelerate
+import           Grenade.Core.Matrix.Accelerate
diff --git a/src/Grenade/Core/Layer/Accelerate.hs b/src/Grenade/Core/Layer/Accelerate.hs
new file mode 100644
index 00000000..e6dd3ffe
--- /dev/null
+++ b/src/Grenade/Core/Layer/Accelerate.hs
@@ -0,0 +1,85 @@
+{-# LANGUAGE DataKinds              #-}
+{-# LANGUAGE GADTs                  #-}
+{-# LANGUAGE TypeOperators          #-}
+{-# LANGUAGE TypeFamilies           #-}
+{-# LANGUAGE MultiParamTypeClasses  #-}
+{-# LANGUAGE FlexibleContexts       #-}
+{-# LANGUAGE RankNTypes             #-}
+{-# LANGUAGE FlexibleInstances      #-}
+{-# LANGUAGE FunctionalDependencies #-}
+{-|
+Module      : Grenade.Core.Layer.Accelerate
+Description : Defines the Layer Classes required for the Accelerate backend
+Copyright   : (c) Huw Campbell, 2016-2017
+License     : BSD2
+Stability   : experimental
+
+This module defines what a Layer is in a Grenade
+neural network.
+
+There are two classes of interest: `UpdateLayer` and `Layer`.
+
+`UpdateLayer` is required for all types which are used as a layer
+in a network. Having no shape information, this class is agnotostic
+to the input and output data of the layer.
+
+An instance of `Layer` on the other hand is required for usage in
+a neural network, but also specifies the shapes of data that the
+network can transform. Multiple instance of `Layer` are permitted
+for a single type, to transform different shapes. The `Reshape` layer
+for example can act as a flattening layer, and its inverse, projecting
+a 1D shape up to 2 or 3 dimensions.
+
+Instances of `Layer` should be as strict as possible, and not emit
+runtime errors.
+-}
+module Grenade.Core.Layer.Accelerate (
+    Layer (..)
+  , UpdateLayer (..)
+  ) where
+
+import           Data.List ( foldl' )
+import           Data.Array.Accelerate
+
+import           Grenade.Core.Shape.Accelerate
+import           Grenade.Core.LearningParameters.Accelerate
+import           Grenade.Core.Matrix.Accelerate
+
+-- | Class for updating a layer. All layers implement this, as it
+--   describes how to create and update the layer.
+--
+class Accelerable l a => UpdateLayer l a | a -> l where
+  -- | The type for the gradient for this layer.
+  --   Unit if there isn't a gradient to pass back.
+  type Gradient a :: *
+
+  -- | Update a layer with its gradient and learning parameters
+  runUpdate       :: Acc LearningParameters -> a -> Gradient a -> a
+
+  -- | Update a layer with many Gradients
+  runUpdates      :: Acc LearningParameters -> a -> [Gradient a] -> a
+  runUpdates rate = foldl' (runUpdate rate)
+
+  {-# MINIMAL runUpdate #-}
+
+-- | Class for a layer. All layers implement this, however, they don't
+--   need to implement it for all shapes, only ones which are
+--   appropriate.
+--
+class UpdateLayer l a => Layer l a i o | a -> l i o where
+  -- | The Wengert tape for this layer. Includes all that is required
+  --   to generate the back propagated gradients efficiently. As a
+  --   default, `S i` is fine.
+  type Tape a i o :: *
+
+  -- | Used in training and scoring. Take the input from the previous
+  --   layer, and give the output from this layer.
+  runForwards    :: a -> Acc (S i) -> (Tape a i o, Acc (S o))
+
+  -- | Back propagate a step. Takes the current layer, the input that
+  --   the layer gave from the input and the back propagated derivatives
+  --   from the layer above.
+  --
+  --   Returns the gradient layer and the derivatives to push back
+  --   further.
+  runBackwards   :: a -> Tape a i o -> Acc (S o) -> (Gradient a, Acc (S i))
diff --git a/src/Grenade/Core/LearningParameters/Accelerate.hs b/src/Grenade/Core/LearningParameters/Accelerate.hs
new file mode 100644
index 00000000..6c80780a
--- /dev/null
+++ b/src/Grenade/Core/LearningParameters/Accelerate.hs
@@ -0,0 +1,23 @@
+{-|
+Module      : Grenade.Core.LearningParameters
+Description : Stochastic gradient descent learning parameters
+Copyright   : (c) Huw Campbell, 2016-2017
+License     : BSD2
+Stability   : experimental
+-}
+module Grenade.Core.LearningParameters.Accelerate (
+  -- | This module contains learning algorithm specific
+  --   code. Currently, this module should be considered
+  --   unstable, due to issue #26.
+
+    LearningParameters
+  ) where
+
+import Data.Array.Accelerate
+
+-- | Learning parameters for stochastic gradient descent.
+type LearningParameters =
+  ( Scalar Double -- learningRate
+  , Scalar Double -- learningMomentum
+  , Scalar Double -- learningRegulariser
+  )
diff --git a/src/Grenade/Core/Matrix/Accelerate.hs b/src/Grenade/Core/Matrix/Accelerate.hs
new file mode 100644
index 00000000..172a0f55
--- /dev/null
+++ b/src/Grenade/Core/Matrix/Accelerate.hs
@@ -0,0 +1,49 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeOperators #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE FunctionalDependencies #-}
+
+module Grenade.Core.Matrix.Accelerate where
+
+import qualified Prelude as P
+import           Data.Proxy
+import           GHC.TypeLits
+
+import           Data.Array.Accelerate hiding (flatten, size)
+import           Data.Array.Accelerate.IO
+import           Numeric.LinearAlgebra.Static (R, L, unwrap, size)
+import           Numeric.LinearAlgebra (flatten)
+
+class Accelerable g a | a -> g where
+  -- | Accelerate a Grenade type
+  toAccel :: g -> a
+
+
+outer :: (P.Num (Exp e), Elt e) => Acc (Vector e) -> Acc (Vector e) -> Acc (Array DIM2 e)
+outer a b = zipWith (*) aRepl bRepl
+  where
+    (Z :. aN) = unlift $ shape a :: Z :. Exp Int
+    (Z :. bN) = unlift $ shape b :: Z :. Exp Int
+    aRepl = replicate (lift $ Z :. All :. bN) a
+    bRepl = replicate (lift $ Z :. aN :. All) b
+
+(#>) :: (P.Num (Exp e), Elt e) => Acc (Array DIM2 e) -> Acc (Vector e) -> Acc (Vector e)
+m #> v = fold (+) 0 mul
+  where
+    (Z :. vN) = unlift $ shape v :: Z :. Exp Int
+    (Z :. mN :. nN) = unlift $ shape m :: Z :. Exp Int :. Exp Int
+
+    vRepl = replicate (lift $ Z :. mN :. All) v
+    mul = zipWith (*) m vRepl
+
+fromMatrix :: (KnownNat m, KnownNat n) => L m n -> Array DIM2 Double
+fromMatrix mat = (fromVectors (Z :. rows :. cols)) $ flatten $ unwrap mat
+  where
+    (rows, cols) = size mat
+
+fromVector :: (KnownNat n) => R n -> Vector Double
+fromVector vec = (fromVectors (Z :. len)) $ unwrap vec
+  where
+    len = size vec
diff --git a/src/Grenade/Core/Shape/Accelerate.hs b/src/Grenade/Core/Shape/Accelerate.hs
new file mode 100644
index 00000000..13ab35a8
--- /dev/null
+++ b/src/Grenade/Core/Shape/Accelerate.hs
@@ -0,0 +1,25 @@
+{-# LANGUAGE DataKinds             #-}
+{-# LANGUAGE GADTs                 #-}
+{-# LANGUAGE KindSignatures        #-}
+{-# LANGUAGE TypeFamilies          #-}
+{-# LANGUAGE TypeOperators         #-}
+{-# LANGUAGE StandaloneDeriving    #-}
+{-# LANGUAGE FlexibleContexts      #-}
+{-# LANGUAGE ScopedTypeVariables   #-}
+{-# LANGUAGE RankNTypes            #-}
+{-|
+Module      : Grenade.Core.Shape
+Description : Dependently typed shapes of data which are passed between layers of a network
+Copyright   : (c) Huw Campbell, 2016-2017
+License     : BSD2
+Stability   : experimental
+
+
+-}
+module Grenade.Core.Shape.Accelerate
+  ( S
+  ) where
+
+import           Data.Array.Accelerate
+
+type S sh = Array sh Double
diff --git a/src/Grenade/Layers/FullyConnected.hs b/src/Grenade/Layers/FullyConnected.hs
index 1d8798a1..e5bef4a5 100644
--- a/src/Grenade/Layers/FullyConnected.hs
+++ b/src/Grenade/Layers/FullyConnected.hs
@@ -4,24 +4,30 @@
 {-# LANGUAGE TypeFamilies          #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE ScopedTypeVariables   #-}
+{-# LANGUAGE TypeSynonymInstances  #-}
+{-# LANGUAGE FlexibleInstances     #-}
 module Grenade.Layers.FullyConnected (
     FullyConnected (..)
   , FullyConnected' (..)
+  , AFullyConnected (..)
   , randomFullyConnected
   ) where
 
-import           Control.Monad.Random hiding (fromList)
+import           Control.Monad.Random hiding (fromList, lift)
 
 import           Data.Proxy
 import           Data.Serialize
 import           Data.Singletons.TypeLits
+import           Data.Array.Accelerate hiding (Shape, fromIntegral)
 
 import qualified Numeric.LinearAlgebra as LA
 import           Numeric.LinearAlgebra.Static
 
 import           Grenade.Core
+import qualified Grenade.Core.Accelerate as A
 
 import           Grenade.Layers.Internal.Update
+import           Grenade.Layers.Internal.Update.Accelerate
 
 -- | A basic fully connected (or inner product) neural network layer.
 data FullyConnected i o = FullyConnected
@@ -81,3 +87,42 @@ randomFullyConnected = do
         bm = konst 0
         mm = konst 0
     return $ FullyConnected (FullyConnected' wB wN) (FullyConnected' bm mm)
+
+data AFullyConnected (i :: Nat) (o :: Nat) = AFullyConnected
+  (Acc (Vector Double))
+  (Acc (Array DIM2 Double))
+  (Acc (Vector Double))
+  (Acc (Array DIM2 Double))
+
+instance (KnownNat i, KnownNat o) => A.Accelerable (FullyConnected i o) (AFullyConnected i o) where
+  toAccel (FullyConnected (FullyConnected' b a) (FullyConnected' bM m)) =
+    AFullyConnected
+      (use $ A.fromVector b)
+      (use $ A.fromMatrix a)
+      (use $ A.fromVector bM)
+      (use $ A.fromMatrix m)
+
+instance (KnownNat i, KnownNat o) => A.UpdateLayer (FullyConnected i o) (AFullyConnected i o) where
+
+  type Gradient (AFullyConnected i o) = (Acc (Vector Double), Acc (Array DIM2 Double))
+
+  runUpdate
+    params
+    (AFullyConnected oldBias oldActivations oldBiasMomentum oldMomentum)
+    (biasGradient, activationGradient) =
+    let (newBias, newBiasMomentum) :: (Acc (Array DIM1 Double), Acc (Array DIM1 Double)) = unlift $ descend params oldBias biasGradient oldBiasMomentum
+        (newActivations, newMomentum) :: (Acc (Array DIM2 Double), Acc (Array DIM2 Double)) = unlift $ descend params oldActivations activationGradient oldMomentum
+    in AFullyConnected newBias newActivations newBiasMomentum newMomentum
+
+
+instance (KnownNat i, KnownNat o) => A.Layer (FullyConnected i o) (AFullyConnected i o) DIM1 DIM1 where
+
+  type Tape (AFullyConnected i o) DIM1 DIM1 = Acc (Vector Double)
+
+  runForwards (AFullyConnected wB wN _ _) v = (v, Data.Array.Accelerate.zipWith (+) wB (wN A.#> v))
+  runBackwards (AFullyConnected _ wN _ _) x dEdy =
+    let wB'  = dEdy
+        mm'  = dEdy `A.outer` x
+        -- calcluate derivatives for next step
+        dWs  = transpose wN A.#> dEdy
+    in ((wB', mm'), dWs)
diff --git a/src/Grenade/Layers/Internal/Update/Accelerate.hs b/src/Grenade/Layers/Internal/Update/Accelerate.hs
index d9252326..b1c1a817 100644
--- a/src/Grenade/Layers/Internal/Update/Accelerate.hs
+++ b/src/Grenade/Layers/Internal/Update/Accelerate.hs
@@ -4,21 +4,19 @@ module Grenade.Layers.Internal.Update.Accelerate (
 
 import qualified Prelude as P
 import Data.Array.Accelerate
-import Grenade.Core.LearningParameters
+import Grenade.Core.LearningParameters.Accelerate
 
 descend
   :: Shape sh
-  => AccelLearningParameters
+  => Acc LearningParameters
   -> Acc (Array sh Double)
   -> Acc (Array sh Double)
   -> Acc (Array sh Double)
   -> Acc (Array sh Double, Array sh Double)
 descend params weights gradient lastUpdate =
   let
-    rate = params !! 0
-    momentum = params !! 1
-    regulariser = params !! 2
-    outMomentum = zipWith (-) (map (momentum *) lastUpdate) (map (rate *) gradient)
-    outWeights = zipWith (-) (zipWith (*) weights outMomentum) (map ((rate * regulariser) *) weights)
+    (rate, momentum, regulariser) = unlift params
+    outMomentum = zipWith (-) (map ((the momentum) *) lastUpdate) (map ((the rate) *) gradient)
+    outWeights = zipWith (-) (zipWith (*) weights outMomentum) (map (((the rate) * (the regulariser)) *) weights)
   in
-    lift (outWeights, outMomentum)
\ No newline at end of file
+    lift (outWeights, outMomentum)
diff --git a/stack.yaml b/stack.yaml
index a3b35f66..78bdc215 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -50,6 +50,7 @@ packages:
 # (e.g., acme-missiles-0.3)
 extra-deps:
 - accelerate-1.0.0.0
+- accelerate-io-1.0.0.0
 - accelerate-llvm-1.0.0.0
 - accelerate-llvm-native-1.0.0.0
 #- accelerate-llvm-ptx-1.0.0.1
@@ -80,4 +81,4 @@ extra-package-dbs: []
 # extra-lib-dirs: [/path/to/dir]
 #
 # Allow a newer minor version of GHC than the snapshot specifies
-# compiler-check: newer-minor
\ No newline at end of file
+# compiler-check: newer-minor
diff --git a/test/Test/Grenade/Layers/FullyConnected/Accelerate.hs b/test/Test/Grenade/Layers/FullyConnected/Accelerate.hs
new file mode 100644
index 00000000..5be20c04
--- /dev/null
+++ b/test/Test/Grenade/Layers/FullyConnected/Accelerate.hs
@@ -0,0 +1,113 @@
+{-# LANGUAGE BangPatterns        #-}
+{-# LANGUAGE TemplateHaskell     #-}
+{-# LANGUAGE DataKinds           #-}
+{-# LANGUAGE KindSignatures      #-}
+{-# LANGUAGE GADTs               #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleContexts    #-}
+{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
+module Test.Grenade.Layers.FullyConnected.Accelerate where
+
+import qualified Prelude as P
+import           Prelude (Show, Maybe(..), IO, Integer, Monad, show, return, seq, (<$>))
+import           Data.Proxy
+import           Data.Singletons ()
+import           Data.Array.Accelerate
+import           Data.Array.Accelerate.Interpreter
+
+import           GHC.TypeLits
+
+import qualified Grenade.Core as G
+import           Grenade.Core.Accelerate as A
+import           Grenade.Layers.FullyConnected
+
+import           Hedgehog
+
+import           Test.Hedgehog.Compat
+import           Test.Hedgehog.Hmatrix
+import           Test.Hedgehog.Accelerate
+
+data OpaqueFullyConnected :: * where
+     OpaqueFullyConnected :: (KnownNat i, KnownNat o) => FullyConnected i o -> OpaqueFullyConnected
+
+instance Show OpaqueFullyConnected where
+    show (OpaqueFullyConnected n) = show n
+
+genOpaqueFullyConnected :: Monad m => Gen m OpaqueFullyConnected
+genOpaqueFullyConnected = do
+    input   :: Integer  <- choose 2 100
+    output  :: Integer  <- choose 1 100
+    let Just input'      = someNatVal input
+    let Just output'     = someNatVal output
+    case (input', output') of
+       (SomeNat (Proxy :: Proxy i'), SomeNat (Proxy :: Proxy o')) -> do
+            wB    <- randomVector
+            bM    <- randomVector
+            wN    <- uniformSample
+            kM    <- uniformSample
+            return . OpaqueFullyConnected $ (FullyConnected (FullyConnected' wB wN) (FullyConnected' bM kM) :: FullyConnected i' o')
+
+prop_fully_connected_forwards :: Property
+prop_fully_connected_forwards = property $ do
+    OpaqueFullyConnected (fclayer :: FullyConnected i o) <- blindForAll genOpaqueFullyConnected
+    input :: S DIM1 <- blindForAll (randomArray (Z :. (P.fromIntegral $ natVal (Proxy :: Proxy i))))
+    let (fclayer' :: AFullyConnected i o) = toAccel fclayer
+        (tape, output :: Acc (S DIM1)) = runForwards fclayer' (use input)
+        backed :: (Gradient (AFullyConnected i o), Acc (S DIM1))
+                                    = runBackwards fclayer' tape output
+    (run $ lift backed) `seq` success
+
+(~===) :: (P.Num (Exp e), P.Fractional (Exp e), RealFrac e, Monad m, P.Eq sh, P.Eq e, Elt e, Shape sh, FromIntegral Int e) => Array sh e -> Array sh e -> Test m ()
+a ~=== b = fuzzy a === fuzzy b
+  where
+    fuzzy :: (P.Num (Exp e), P.Fractional (Exp e), RealFrac e, Shape sh, Elt e, FromIntegral Int e) => Array sh e -> Array sh e
+    fuzzy = run1 $ map $ \x ->
+      let
+        scaledUp :: Exp Int
+        scaledUp = round $ x * 1e7
+      in (fromIntegral scaledUp) / 1e7
+
+prop_fully_connected_forwards_equals_reference :: Property
+prop_fully_connected_forwards_equals_reference = property $ do
+    OpaqueFullyConnected (fclayer :: FullyConnected i o) <- blindForAll genOpaqueFullyConnected
+    input :: G.S ('G.D1 i) <- blindForAll (G.S1D <$> randomVector)
+
+
+    let
+      input' = case input of
+        G.S1D v -> fromVector v
+      output :: G.S ('G.D1 o)
+      inputGrad :: G.S ('G.D1 i)
+      (tape, output) = G.runForwards fclayer input
+      (gradient, inputGrad) = G.runBackwards fclayer tape output
+
+      tapeV :: Vector Double
+      tapeV = case tape of
+        G.S1D v -> fromVector v
+
+      outputV :: Vector Double
+      outputV = case output of
+        G.S1D v -> fromVector v
+
+      inputGradV :: Vector Double
+      inputGradV = case inputGrad of
+        G.S1D v -> fromVector v
+
+      biasGradV :: Vector Double
+      actGradV :: Array DIM2 Double
+      (biasGradV, actGradV) = case gradient of
+        FullyConnected' b a -> (fromVector b, fromMatrix a)
+
+      (fclayer' :: AFullyConnected i o) = toAccel fclayer
+      (tape', output') = runForwards fclayer' (use input')
+      (gradient', inputGrad') = runBackwards fclayer' tape' output'
+
+      (tapeV', outputV', (biasGradV', actGradV'), inputGradV') = run $ lift (tape', output', gradient', inputGrad')
+    tapeV ~=== tapeV'
+    outputV ~=== outputV'
+    biasGradV ~=== biasGradV'
+    actGradV ~=== actGradV'
+    inputGradV ~=== inputGradV'
+
+tests :: IO Bool
+tests = checkParallel $$(discover)
diff --git a/test/Test/Hedgehog/Accelerate.hs b/test/Test/Hedgehog/Accelerate.hs
new file mode 100644
index 00000000..103d7360
--- /dev/null
+++ b/test/Test/Hedgehog/Accelerate.hs
@@ -0,0 +1,16 @@
+{-# LANGUAGE RankNTypes            #-}
+{-# LANGUAGE DataKinds             #-}
+{-# LANGUAGE ScopedTypeVariables   #-}
+{-# LANGUAGE GADTs                 #-}
+
+module Test.Hedgehog.Accelerate where
+
+import           Data.Singletons
+import           Data.Singletons.TypeLits
+import           Data.Array.Accelerate
+
+import qualified Hedgehog.Gen as Gen
+import qualified Hedgehog.Range as Range
+
+randomArray :: ( Monad m, Shape sh ) => sh -> Gen.Gen m (Array sh Double)
+randomArray sh = fromList sh <$> Gen.list (Range.singleton $ arraySize sh) (Gen.realFloat $ Range.linearFracFrom 0 (-1) 1)
diff --git a/test/test.hs b/test/test.hs
index aabebffb..0eeaf6ca 100644
--- a/test/test.hs
+++ b/test/test.hs
@@ -5,6 +5,7 @@ import qualified Test.Grenade.Network
 import qualified Test.Grenade.Layers.Pooling
 import qualified Test.Grenade.Layers.Convolution
 import qualified Test.Grenade.Layers.FullyConnected
+import qualified Test.Grenade.Layers.FullyConnected.Accelerate
 import qualified Test.Grenade.Layers.Nonlinear
 import qualified Test.Grenade.Layers.PadCrop
 
@@ -25,6 +26,7 @@ main =
     , Test.Grenade.Layers.Pooling.tests
     , Test.Grenade.Layers.Convolution.tests
     , Test.Grenade.Layers.FullyConnected.tests
+    , Test.Grenade.Layers.FullyConnected.Accelerate.tests
     , Test.Grenade.Layers.Nonlinear.tests
     , Test.Grenade.Layers.PadCrop.tests
 
@@ -33,7 +35,6 @@ main =
     , Test.Grenade.Layers.Internal.Pooling.tests
 
     , Test.Grenade.Recurrent.Layers.LSTM.tests
-
     ]
 
 disorderMain :: [IO Bool] -> IO ()

From 385375ccde61f414b8f441282b6c5e03c8eca6c0 Mon Sep 17 00:00:00 2001
From: Calen Pennington <cale@edx.org>
Date: Thu, 17 Aug 2017 23:17:11 -0400
Subject: [PATCH 07/11] Make Network Accelerable

N.B. the hedgehog tests of accelerated networks still suffer
from type issues when trying to compare the network results.
---
 grenade.cabal                                 |   1 +
 src/Grenade/Core/Accelerate.hs                |  10 +-
 src/Grenade/Core/Layer/Accelerate.hs          |  18 +-
 src/Grenade/Core/Matrix/Accelerate.hs         |   9 +-
 src/Grenade/Core/Network/Accelerate.hs        | 201 ++++++++++++++++++
 src/Grenade/Core/Shape.hs                     |  17 ++
 src/Grenade/Layers/FullyConnected.hs          |  28 +--
 .../Layers/Internal/Pooling/Accelerate.hs     |   1 +
 .../Recurrent/Core/Network/Accelerate.hs      | 101 +++++++++
 test/Test/Grenade/Layers/FullyConnected.hs    |  12 +-
 .../Layers/FullyConnected/Accelerate.hs       |  32 ++-
 test/Test/Grenade/Network.hs                  |   1 +
 test/Test/Grenade/Network/Accelerate.hs       | 177 +++++++++++++++
 test/Test/Hedgehog/Accelerate.hs              |  17 +-
 test/test.hs                                  |   2 +
 15 files changed, 567 insertions(+), 60 deletions(-)
 create mode 100644 src/Grenade/Core/Network/Accelerate.hs
 create mode 100644 src/Grenade/Layers/Internal/Pooling/Accelerate.hs
 create mode 100644 src/Grenade/Recurrent/Core/Network/Accelerate.hs
 create mode 100644 test/Test/Grenade/Network/Accelerate.hs

diff --git a/grenade.cabal b/grenade.cabal
index 6034c1ce..2a04a04c 100644
--- a/grenade.cabal
+++ b/grenade.cabal
@@ -73,6 +73,7 @@ library
                        Grenade.Core.LearningParameters.Accelerate
                        Grenade.Core.Matrix.Accelerate
                        Grenade.Core.Network
+                       Grenade.Core.Network.Accelerate
                        Grenade.Core.Runner
                        Grenade.Core.Shape
                        Grenade.Core.Shape.Accelerate
diff --git a/src/Grenade/Core/Accelerate.hs b/src/Grenade/Core/Accelerate.hs
index bf23f7f7..04419204 100644
--- a/src/Grenade/Core/Accelerate.hs
+++ b/src/Grenade/Core/Accelerate.hs
@@ -1,15 +1,13 @@
 module Grenade.Core.Accelerate (
     module Grenade.Core.Layer.Accelerate
-  , module Grenade.Core.LearningParameters
-  , module Grenade.Core.Network
-  , module Grenade.Core.Runner
+  , module Grenade.Core.LearningParameters.Accelerate
+  , module Grenade.Core.Network.Accelerate
   , module Grenade.Core.Shape.Accelerate
   , module Grenade.Core.Matrix.Accelerate
   ) where
 
 import           Grenade.Core.Layer.Accelerate
-import           Grenade.Core.LearningParameters
-import           Grenade.Core.Network
-import           Grenade.Core.Runner
+import           Grenade.Core.LearningParameters.Accelerate
+import           Grenade.Core.Network.Accelerate
 import           Grenade.Core.Shape.Accelerate
 import           Grenade.Core.Matrix.Accelerate
diff --git a/src/Grenade/Core/Layer/Accelerate.hs b/src/Grenade/Core/Layer/Accelerate.hs
index e6dd3ffe..eeb4bfca 100644
--- a/src/Grenade/Core/Layer/Accelerate.hs
+++ b/src/Grenade/Core/Layer/Accelerate.hs
@@ -41,23 +41,23 @@ module Grenade.Core.Layer.Accelerate (
 import           Data.List ( foldl' )
 import           Data.Array.Accelerate
 
-import           Grenade.Core.Shape.Accelerate
+import qualified Grenade.Core.Shape as GS
 import           Grenade.Core.LearningParameters.Accelerate
 import           Grenade.Core.Matrix.Accelerate
 
 -- | Class for updating a layer. All layers implement this, as it
 --   describes how to create and update the layer.
 --
-class Accelerable l a => UpdateLayer l a | a -> l where
+class Accelerable l => UpdateLayer l where
   -- | The type for the gradient for this layer.
   --   Unit if there isn't a gradient to pass back.
-  type Gradient a :: *
+  type Gradient l :: *
 
   -- | Update a layer with its gradient and learning parameters
-  runUpdate       :: Acc LearningParameters -> a -> Gradient a -> a
+  runUpdate       :: Acc LearningParameters -> (Accelerated l) -> Gradient l -> (Accelerated l)
 
   -- | Update a layer with many Gradients
-  runUpdates      :: Acc LearningParameters -> a -> [Gradient a] -> a
+  runUpdates      :: Acc LearningParameters -> (Accelerated l) -> [Gradient l] -> (Accelerated l)
   runUpdates rate = foldl' (runUpdate rate)
 
   {-# MINIMAL runUpdate #-}
@@ -66,15 +66,15 @@ class Accelerable l a => UpdateLayer l a | a -> l where
 --   need to implement it for all shapes, only ones which are
 --   appropriate.
 --
-class UpdateLayer l a => Layer l a i o | a -> l i o where
+class UpdateLayer l => Layer l (i :: GS.Shape) (o :: GS.Shape) where
   -- | The Wengert tape for this layer. Includes all that is required
   --   to generate the back propagated gradients efficiently. As a
   --   default, `S i` is fine.
-  type Tape a i o :: *
+  type Tape l i o :: *
 
   -- | Used in training and scoring. Take the input from the previous
   --   layer, and give the output from this layer.
-  runForwards    :: a -> Acc (S i) -> (Tape a i o, Acc (S o))
+  runForwards    :: (Accelerated l) -> Accelerated (GS.S i) -> (Tape l i o, (Accelerated (GS.S o)))
 
   -- | Back propagate a step. Takes the current layer, the input that
   --   the layer gave from the input and the back propagated derivatives
@@ -82,4 +82,4 @@ class UpdateLayer l a => Layer l a i o | a -> l i o where
   --
   --   Returns the gradient layer and the derivatives to push back
   --   further.
-  runBackwards   :: a -> Tape a i o -> Acc (S o) -> (Gradient a, Acc (S i))
+  runBackwards   :: (Accelerated l) -> Tape l i o -> (Accelerated (GS.S o)) -> (Gradient l, Accelerated (GS.S i))
diff --git a/src/Grenade/Core/Matrix/Accelerate.hs b/src/Grenade/Core/Matrix/Accelerate.hs
index 172a0f55..14bc5c49 100644
--- a/src/Grenade/Core/Matrix/Accelerate.hs
+++ b/src/Grenade/Core/Matrix/Accelerate.hs
@@ -8,7 +8,6 @@
 module Grenade.Core.Matrix.Accelerate where
 
 import qualified Prelude as P
-import           Data.Proxy
 import           GHC.TypeLits
 
 import           Data.Array.Accelerate hiding (flatten, size)
@@ -16,9 +15,10 @@ import           Data.Array.Accelerate.IO
 import           Numeric.LinearAlgebra.Static (R, L, unwrap, size)
 import           Numeric.LinearAlgebra (flatten)
 
-class Accelerable g a | a -> g where
+class Accelerable g where
+  data Accelerated g :: *
   -- | Accelerate a Grenade type
-  toAccel :: g -> a
+  toAccel :: g -> Accelerated g
 
 
 outer :: (P.Num (Exp e), Elt e) => Acc (Vector e) -> Acc (Vector e) -> Acc (Array DIM2 e)
@@ -32,8 +32,7 @@ outer a b = zipWith (*) aRepl bRepl
 (#>) :: (P.Num (Exp e), Elt e) => Acc (Array DIM2 e) -> Acc (Vector e) -> Acc (Vector e)
 m #> v = fold (+) 0 mul
   where
-    (Z :. vN) = unlift $ shape v :: Z :. Exp Int
-    (Z :. mN :. nN) = unlift $ shape m :: Z :. Exp Int :. Exp Int
+    (Z :. mN :. _) = unlift $ shape m :: Z :. Exp Int :. Exp Int
 
     vRepl = replicate (lift $ Z :. mN :. All) v
     mul = zipWith (*) m vRepl
diff --git a/src/Grenade/Core/Network/Accelerate.hs b/src/Grenade/Core/Network/Accelerate.hs
new file mode 100644
index 00000000..cfcf5380
--- /dev/null
+++ b/src/Grenade/Core/Network/Accelerate.hs
@@ -0,0 +1,201 @@
+{-# LANGUAGE DataKinds             #-}
+{-# LANGUAGE BangPatterns          #-}
+{-# LANGUAGE GADTs                 #-}
+{-# LANGUAGE ScopedTypeVariables   #-}
+{-# LANGUAGE TypeOperators         #-}
+{-# LANGUAGE TypeFamilies          #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE FlexibleContexts      #-}
+{-# LANGUAGE FlexibleInstances     #-}
+
+-- I'm not sure if I need this, but couldn't figure out how to avoid using it
+{-# LANGUAGE UndecidableInstances  #-}
+
+{-|
+Module      : Grenade.Core.Network
+Description : Core definition of a Neural Network
+Copyright   : (c) Huw Campbell, 2016-2017
+License     : BSD2
+Stability   : experimental
+
+This module defines the core data types and functions
+for non-recurrent neural networks.
+-}
+
+module Grenade.Core.Network.Accelerate (
+    {-# Network (..)
+  , #-} Gradients (..)
+  , Tapes (..)
+
+  , runNetwork
+  , runGradient
+  -- , applyUpdate
+
+  -- , randomNetwork
+  ) where
+
+import           Data.Array.Accelerate
+import           Data.Singletons
+import           Data.Singletons.Prelude.List
+import           Data.Singletons.TypeLits
+
+import           Grenade.Core.Layer.Accelerate
+import           Grenade.Core.LearningParameters.Accelerate
+import qualified Grenade.Core as G
+import           Grenade.Core.Shape.Accelerate
+import           Grenade.Core.Shape (Shape)
+import           Grenade.Core.Matrix.Accelerate
+
+-- | Type of a network.
+--
+--   The @[*]@ type specifies the types of the layers.
+--
+--   The @[Shape]@ type specifies the shapes of data passed between the layers.
+--
+--   Can be considered to be a heterogeneous list of layers which are able to
+--   transform the data shapes of the network.
+-- data Network :: [*] -> [G.Shape] -> * where
+--     NNil  :: SingI i
+--           => Network '[] '[i]
+
+--     (:~>) :: (SingI i, SingI h, G.Layer l i h, Accelerable l)
+--           => !(Accelerated l)
+--           -> !(Accelerated (G.Network ls (h ': hs)))
+--           -> Accelerated (G.Network (l ': ls) (i ': h ': hs))
+-- infixr 5 :~>
+
+-- instance Show (Network n) where
+--   show NNil = "NNil"
+-- instance (Show x, Show (Network xs rs)) => Show (Network (x ': xs) (i ': rs)) where
+--   show (x :~> xs) = show x ++ "\n~>\n" ++ show xs
+
+
+instance Accelerable (G.Network '[] '[i]) where
+  data Accelerated (G.Network '[] '[i]) = ANil
+  toAccel G.NNil = ANil
+
+instance
+  ( Accelerable l
+  , Accelerable (G.S i)
+  , Accelerable (G.S h)
+  , Accelerable (G.Network ls (h ': hs))
+  )
+  => Accelerable
+    (G.Network (l ': ls) (i ': h ': hs)) where
+
+  data Accelerated (G.Network (l ': ls) (i ': h ': hs)) =
+    ANetwork (Accelerated l) (Accelerated (G.Network ls (h ': hs)))
+
+  toAccel (x G.:~> xs) = ANetwork (toAccel x) (toAccel xs)
+
+
+-- | Gradient of a network.
+--
+--   Parameterised on the layers of the network.
+data Gradients :: [*] -> * where
+   GNil  :: Gradients '[]
+
+   (:/>) :: UpdateLayer l
+         => Gradient l
+         -> Gradients ls
+         -> Gradients (l ': ls)
+
+-- | Wegnert Tape of a network.
+--
+--   Parameterised on the layers and shapes of the network.
+data Tapes :: [*] -> [G.Shape] -> * where
+   TNil  :: SingI i
+         => Tapes '[] '[i]
+
+   (:\>) :: (SingI i, SingI h, Layer l i h, Accelerable l)
+         => !(Tape l i h)
+         -> !(Tapes ls (h ': hs))
+         -> Tapes (l ': ls) (i ': h ': hs)
+
+
+-- | Running a network forwards with some input data.
+--
+--   This gives the output, and the Wengert tape required for back
+--   propagation.
+runNetwork :: forall layers shapes h l.
+              ( Accelerable (G.S (Head shapes))
+              , Accelerable (G.S (Last shapes))
+              )
+           => Accelerated (G.Network layers shapes)
+           -> Accelerated (G.S (Head shapes))
+           -> (Tapes layers shapes, Accelerated (G.S (Last shapes)))
+runNetwork = undefined
+--   go
+--     where
+--   go  :: forall js ss. (Last js ~ Last shapes)
+--       => Network ss js
+--       -> S (Head js)
+--       -> (Tapes ss js, S (Last js))
+--   go (layer :~> n) !x =
+--     let (tape, forward) = runForwards layer x
+--         (tapes, answer) = go n forward
+--     in  (tape :\> tapes, answer)
+
+--   go NNil !x
+--       = (TNil, x)
+
+
+-- | Running a loss gradient back through the network.
+--
+--   This requires a Wengert tape, generated with the appropriate input
+--   for the loss.
+--
+--   Gives the gradients for the layer, and the gradient across the
+--   input (which may not be required).
+runGradient :: forall layers shapes h l.
+              ( Accelerable (G.S (Head shapes))
+              , Accelerable (G.S (Last shapes))
+              )
+            => Accelerated (G.Network layers shapes)
+            -> Tapes layers shapes
+            -> Accelerated (G.S (Last shapes))
+            -> (Gradients layers, Accelerated (G.S (Head shapes)))
+runGradient net tapes o = undefined
+--   go net tapes
+--     where
+--   go  :: forall js ss. (Last js ~ Last shapes)
+--       => Network ss js
+--       -> Tapes ss js
+--       -> (Gradients ss, S (Head js))
+--   go (layer :~> n) (tape :\> nt) =
+--     let (gradients, feed)  = go n nt
+--         (layer', backGrad) = runBackwards layer tape feed
+--     in  (layer' :/> gradients, backGrad)
+
+--   go NNil TNil
+--       = (GNil, o)
+
+
+-- -- | Apply one step of stochastic gradient decent across the network.
+-- applyUpdate :: LearningParameters
+--             -> Network layers shapes
+--             -> Gradients layers
+--             -> Network layers shapes
+-- applyUpdate rate (layer :~> rest) (gradient :/> grest)
+--   = runUpdate rate layer gradient :~> applyUpdate rate rest grest
+
+-- applyUpdate _ NNil GNil
+--   = NNil
+
+
+-- -- | Ultimate composition.
+-- --
+-- --   This allows a complete network to be treated as a layer in a larger network.
+-- instance UpdateLayer (GN.Network sublayers subshapes) (Network asublayers asubshapes) where
+--   type Gradient (Network asublayers asubshapes) = Gradients asublayers
+--   runUpdate    = applyUpdate
+
+-- -- | Ultimate composition.
+-- --
+-- --   This allows a complete network to be treated as a layer in a larger network.
+-- instance (i ~ (Head subshapes), o ~ (Last subshapes))
+--   -- => Layer (GN.Network sublayers subshapes) (Network asublayers asubshapes) i o where
+
+--   type Tape (Network asublayers asubshapes) i o = Tapes asublayers asubshapes
+--   runForwards  = runNetwork
+--   runBackwards = runGradient
diff --git a/src/Grenade/Core/Shape.hs b/src/Grenade/Core/Shape.hs
index e8775150..f4e98879 100644
--- a/src/Grenade/Core/Shape.hs
+++ b/src/Grenade/Core/Shape.hs
@@ -7,6 +7,7 @@
 {-# LANGUAGE FlexibleContexts      #-}
 {-# LANGUAGE ScopedTypeVariables   #-}
 {-# LANGUAGE RankNTypes            #-}
+{-# LANGUAGE FlexibleInstances     #-}
 {-|
 Module      : Grenade.Core.Shape
 Description : Dependently typed shapes of data which are passed between layers of a network
@@ -20,6 +21,7 @@ module Grenade.Core.Shape (
     Shape (..)
   , S (..)
   , Sing (..)
+  , Accelerated(..)
 
   , randomOfShape
   , fromStorable
@@ -41,6 +43,9 @@ import qualified Numeric.LinearAlgebra.Static as H
 import           Numeric.LinearAlgebra.Static
 import qualified Numeric.LinearAlgebra as NLA
 
+import Grenade.Core.Matrix.Accelerate
+import Data.Array.Accelerate (DIM1, DIM2, Array, Acc, use)
+
 -- | The current shapes we accept.
 --   at the moment this is just one, two, and three dimensional
 --   Vectors/Matricies.
@@ -76,6 +81,18 @@ data S (n :: Shape) where
 
 deriving instance Show (S n)
 
+instance Accelerable (S ('D1 len)) where
+  data Accelerated (S ('D1 len)) = AS1D (Acc (Array DIM1 Double))
+
+  toAccel v = case v of
+    S1D v' -> AS1D $ use $ fromVector v'
+
+instance Accelerable (S ('D2 w h)) where
+  data Accelerated (S ('D2 w h)) = AS2D (Acc (Array DIM2 Double))
+
+  toAccel v = case v of
+    S2D v' -> AS2D $ use $ fromMatrix v'
+
 -- Singleton instances.
 --
 -- These could probably be derived with template haskell, but this seems
diff --git a/src/Grenade/Layers/FullyConnected.hs b/src/Grenade/Layers/FullyConnected.hs
index e5bef4a5..5bbcb86f 100644
--- a/src/Grenade/Layers/FullyConnected.hs
+++ b/src/Grenade/Layers/FullyConnected.hs
@@ -9,7 +9,7 @@
 module Grenade.Layers.FullyConnected (
     FullyConnected (..)
   , FullyConnected' (..)
-  , AFullyConnected (..)
+  , Accelerated (..)
   , randomFullyConnected
   ) where
 
@@ -88,13 +88,13 @@ randomFullyConnected = do
         mm = konst 0
     return $ FullyConnected (FullyConnected' wB wN) (FullyConnected' bm mm)
 
-data AFullyConnected (i :: Nat) (o :: Nat) = AFullyConnected
-  (Acc (Vector Double))
-  (Acc (Array DIM2 Double))
-  (Acc (Vector Double))
-  (Acc (Array DIM2 Double))
+instance (KnownNat i, KnownNat o) => A.Accelerable (FullyConnected i o) where
+  data Accelerated (FullyConnected i o) = AFullyConnected
+    (Acc (Vector Double))
+    (Acc (Array DIM2 Double))
+    (Acc (Vector Double))
+    (Acc (Array DIM2 Double))
 
-instance (KnownNat i, KnownNat o) => A.Accelerable (FullyConnected i o) (AFullyConnected i o) where
   toAccel (FullyConnected (FullyConnected' b a) (FullyConnected' bM m)) =
     AFullyConnected
       (use $ A.fromVector b)
@@ -102,9 +102,9 @@ instance (KnownNat i, KnownNat o) => A.Accelerable (FullyConnected i o) (AFullyC
       (use $ A.fromVector bM)
       (use $ A.fromMatrix m)
 
-instance (KnownNat i, KnownNat o) => A.UpdateLayer (FullyConnected i o) (AFullyConnected i o) where
+instance (KnownNat i, KnownNat o) => A.UpdateLayer (FullyConnected i o) where
 
-  type Gradient (AFullyConnected i o) = (Acc (Vector Double), Acc (Array DIM2 Double))
+  type Gradient (FullyConnected i o) = (Acc (Vector Double), Acc (Array DIM2 Double))
 
   runUpdate
     params
@@ -115,14 +115,14 @@ instance (KnownNat i, KnownNat o) => A.UpdateLayer (FullyConnected i o) (AFullyC
     in AFullyConnected newBias newActivations newBiasMomentum newMomentum
 
 
-instance (KnownNat i, KnownNat o) => A.Layer (FullyConnected i o) (AFullyConnected i o) DIM1 DIM1 where
+instance (KnownNat i, KnownNat o) => A.Layer (FullyConnected i o) ('D1 i) ('D1 o) where
 
-  type Tape (AFullyConnected i o) DIM1 DIM1 = Acc (Vector Double)
+  type Tape (FullyConnected i o) ('D1 i) ('D1 o) = Acc (Vector Double)
 
-  runForwards (AFullyConnected wB wN _ _) v = (v, Data.Array.Accelerate.zipWith (+) wB (wN A.#> v))
-  runBackwards (AFullyConnected _ wN _ _) x dEdy =
+  runForwards (AFullyConnected wB wN _ _) (AS1D v) = (v, AS1D $ Data.Array.Accelerate.zipWith (+) wB (wN A.#> v))
+  runBackwards (AFullyConnected _ wN _ _) x (AS1D dEdy) =
     let wB'  = dEdy
         mm'  = dEdy `A.outer` x
         -- calcluate derivatives for next step
         dWs  = transpose wN A.#> dEdy
-    in ((wB', mm'), dWs)
+    in ((wB', mm'), AS1D dWs)
diff --git a/src/Grenade/Layers/Internal/Pooling/Accelerate.hs b/src/Grenade/Layers/Internal/Pooling/Accelerate.hs
new file mode 100644
index 00000000..79c93ceb
--- /dev/null
+++ b/src/Grenade/Layers/Internal/Pooling/Accelerate.hs
@@ -0,0 +1 @@
+module Grenade.Layers.Internal.Pooling.Accelerate where
\ No newline at end of file
diff --git a/src/Grenade/Recurrent/Core/Network/Accelerate.hs b/src/Grenade/Recurrent/Core/Network/Accelerate.hs
new file mode 100644
index 00000000..155d4f24
--- /dev/null
+++ b/src/Grenade/Recurrent/Core/Network/Accelerate.hs
@@ -0,0 +1,101 @@
+module Grenade.Recurrent.Core.Network.Accelerate where
+
+-- | Type of a recurrent neural network.
+--
+--   The [*] type specifies the types of the layers.
+--
+--   The [Shape] type specifies the shapes of data passed between the layers.
+--
+--   The definition is similar to a Network, but every layer in the
+--   type is tagged by whether it's a FeedForward Layer of a Recurrent layer.
+--
+--   Often, to make the definitions more concise, one will use a type alias
+--   for these empty data types.
+data RecurrentNetwork :: [*] -> [Shape] -> * where
+  RNil   :: SingI i
+         => RecurrentNetwork '[] '[i]
+
+  (:~~>) :: (SingI i, Layer x i h)
+         => !x
+         -> !(RecurrentNetwork xs (h ': hs))
+         -> RecurrentNetwork (FeedForward x ': xs) (i ': h ': hs)
+
+  (:~@>) :: (SingI i, RecurrentLayer x i h)
+         => !x
+         -> !(RecurrentNetwork xs (h ': hs))
+         -> RecurrentNetwork (Recurrent x ': xs) (i ': h ': hs)
+infixr 5 :~~>
+infixr 5 :~@>
+
+-- | Gradient of a network.
+--
+--   Parameterised on the layers of the network.
+data RecurrentGradient :: [*] -> * where
+   RGNil  :: RecurrentGradient '[]
+
+   (://>) :: UpdateLayer x
+          => Gradient x
+          -> RecurrentGradient xs
+          -> RecurrentGradient (phantom x ': xs)
+
+-- | Recurrent inputs (sideways shapes on an imaginary unrolled graph)
+--   Parameterised on the layers of a Network.
+data RecurrentInputs :: [*] -> * where
+   RINil   :: RecurrentInputs '[]
+
+   (:~~+>) :: (UpdateLayer x, Fractional (RecurrentInputs xs))
+           => ()                      -> !(RecurrentInputs xs) -> RecurrentInputs (FeedForward x ': xs)
+
+   (:~@+>) :: (Fractional (RecurrentShape x), Fractional (RecurrentInputs xs), RecurrentUpdateLayer x)
+           => !(RecurrentShape x) -> !(RecurrentInputs xs) -> RecurrentInputs (Recurrent x ': xs)
+
+-- | All the information required to backpropogate
+--   through time safely.
+--
+--   We index on the time step length as well, to ensure
+--   that that all Tape lengths are the same.
+data RecurrentTape :: [*] -> [Shape] -> * where
+   TRNil  :: SingI i
+          => RecurrentTape '[] '[i]
+
+   (:\~>) :: Tape x i h
+          -> !(RecurrentTape xs (h ': hs))
+          -> RecurrentTape (FeedForward x ': xs) (i ': h ': hs)
+
+   (:\@>) :: RecTape x i h
+          -> !(RecurrentTape xs (h ': hs))
+          -> RecurrentTape (Recurrent x ': xs) (i ': h ': hs)
+
+
+runRecurrent
+  :: forall shapes layers.
+     RecurrentNetwork layers shapes
+  -> RecurrentInputs layers
+  -> S (Head shapes)
+  -> (RecurrentTape layers shapes, RecurrentInputs layers, S (Last shapes))
+runRecurrent =
+  go
+    where
+  go  :: forall js sublayers. (Last js ~ Last shapes)
+      => RecurrentNetwork sublayers js
+      -> RecurrentInputs sublayers
+      -> S (Head js)
+      -> (RecurrentTape sublayers js, RecurrentInputs sublayers, S (Last js))
+  go (!layer :~~> n) (() :~~+> nIn) !x
+      = let (!tape, !forwards) = runForwards layer x
+
+            -- recursively run the rest of the network, and get the gradients from above.
+            (!newFN, !ig, !answer) = go n nIn forwards
+        in (tape :\~> newFN, () :~~+> ig, answer)
+
+  -- This is a recurrent layer, so we need to do a scan, first input to last, providing
+  -- the recurrent shape output to the next layer.
+  go (layer :~@> n) (recIn :~@+> nIn) !x
+      = let (tape, shape, forwards) = runRecurrentForwards layer recIn x
+            (newFN, ig, answer) = go n nIn forwards
+        in (tape :\@> newFN, shape :~@+> ig, answer)
+
+  -- Handle the output layer, bouncing the derivatives back down.
+  -- We may not have a target for each example, so when we don't use 0 gradient.
+  go RNil RINil !x
+    = (TRNil, RINil, x)
diff --git a/test/Test/Grenade/Layers/FullyConnected.hs b/test/Test/Grenade/Layers/FullyConnected.hs
index 5c3af83e..bf5abe6f 100644
--- a/test/Test/Grenade/Layers/FullyConnected.hs
+++ b/test/Test/Grenade/Layers/FullyConnected.hs
@@ -33,12 +33,12 @@ genOpaqueFullyConnected = do
     let Just input'      = someNatVal input
     let Just output'     = someNatVal output
     case (input', output') of
-       (SomeNat (Proxy :: Proxy i'), SomeNat (Proxy :: Proxy o')) -> do
-            wB    <- randomVector
-            bM    <- randomVector
-            wN    <- uniformSample
-            kM    <- uniformSample
-            return . OpaqueFullyConnected $ (FullyConnected (FullyConnected' wB wN) (FullyConnected' bM kM) :: FullyConnected i' o')
+      (SomeNat (Proxy :: Proxy i'), SomeNat (Proxy :: Proxy o')) -> do
+        wB    <- randomVector
+        bM    <- randomVector
+        wN    <- uniformSample
+        kM    <- uniformSample
+        return . OpaqueFullyConnected $ (FullyConnected (FullyConnected' wB wN) (FullyConnected' bM kM) :: FullyConnected i' o')
 
 prop_fully_connected_forwards :: Property
 prop_fully_connected_forwards = property $ do
diff --git a/test/Test/Grenade/Layers/FullyConnected/Accelerate.hs b/test/Test/Grenade/Layers/FullyConnected/Accelerate.hs
index 5be20c04..53569ed6 100644
--- a/test/Test/Grenade/Layers/FullyConnected/Accelerate.hs
+++ b/test/Test/Grenade/Layers/FullyConnected/Accelerate.hs
@@ -51,21 +51,15 @@ prop_fully_connected_forwards :: Property
 prop_fully_connected_forwards = property $ do
     OpaqueFullyConnected (fclayer :: FullyConnected i o) <- blindForAll genOpaqueFullyConnected
     input :: S DIM1 <- blindForAll (randomArray (Z :. (P.fromIntegral $ natVal (Proxy :: Proxy i))))
-    let (fclayer' :: AFullyConnected i o) = toAccel fclayer
-        (tape, output :: Acc (S DIM1)) = runForwards fclayer' (use input)
-        backed :: (Gradient (AFullyConnected i o), Acc (S DIM1))
-                                    = runBackwards fclayer' tape output
-    (run $ lift backed) `seq` success
-
-(~===) :: (P.Num (Exp e), P.Fractional (Exp e), RealFrac e, Monad m, P.Eq sh, P.Eq e, Elt e, Shape sh, FromIntegral Int e) => Array sh e -> Array sh e -> Test m ()
-a ~=== b = fuzzy a === fuzzy b
-  where
-    fuzzy :: (P.Num (Exp e), P.Fractional (Exp e), RealFrac e, Shape sh, Elt e, FromIntegral Int e) => Array sh e -> Array sh e
-    fuzzy = run1 $ map $ \x ->
-      let
-        scaledUp :: Exp Int
-        scaledUp = round $ x * 1e7
-      in (fromIntegral scaledUp) / 1e7
+    let fclayer' :: Accelerated (FullyConnected i o)
+        fclayer' = toAccel fclayer
+        tape :: Tape (FullyConnected i o) ('G.D1 i) ('G.D1 o)
+        output :: Accelerated (G.S ('G.D1 o))
+        (tape, output) = runForwards fclayer' (AS1D $ use input :: Accelerated (G.S ('G.D1 i)))
+        grad :: Gradient (FullyConnected i o)
+        inputGrad :: Acc (Vector Double)
+        (grad, AS1D inputGrad :: Accelerated (G.S ('G.D1 i))) = runBackwards fclayer' tape output
+    (run $ lift (grad, inputGrad)) `seq` success
 
 prop_fully_connected_forwards_equals_reference :: Property
 prop_fully_connected_forwards_equals_reference = property $ do
@@ -98,9 +92,11 @@ prop_fully_connected_forwards_equals_reference = property $ do
       (biasGradV, actGradV) = case gradient of
         FullyConnected' b a -> (fromVector b, fromMatrix a)
 
-      (fclayer' :: AFullyConnected i o) = toAccel fclayer
-      (tape', output') = runForwards fclayer' (use input')
-      (gradient', inputGrad') = runBackwards fclayer' tape' output'
+      fclayer' :: Accelerated (FullyConnected i o)
+      fclayer' = toAccel fclayer
+      tape' :: Tape (FullyConnected i o) ('G.D1 i) ('G.D1 o)
+      (tape', AS1D output' :: Accelerated (G.S ('G.D1 o))) = runForwards fclayer' (AS1D $ use input' :: Accelerated (G.S ('G.D1 i)))
+      (gradient', AS1D inputGrad' :: Accelerated (G.S ('G.D1 i))) = runBackwards fclayer' tape' (AS1D output' :: Accelerated (G.S ('G.D1 o)))
 
       (tapeV', outputV', (biasGradV', actGradV'), inputGradV') = run $ lift (tape', output', gradient', inputGrad')
     tapeV ~=== tapeV'
diff --git a/test/Test/Grenade/Network.hs b/test/Test/Grenade/Network.hs
index b168a761..024d2dd3 100644
--- a/test/Test/Grenade/Network.hs
+++ b/test/Test/Grenade/Network.hs
@@ -49,6 +49,7 @@ data SomeNetwork :: * where
 instance Show SomeNetwork where
   show (SomeNetwork net) = show net
 
+
 -- | Generate a random network of a random type
 --
 -- This is slightly insane for a few reasons. Everything must be wrapped up
diff --git a/test/Test/Grenade/Network/Accelerate.hs b/test/Test/Grenade/Network/Accelerate.hs
new file mode 100644
index 00000000..d354fb08
--- /dev/null
+++ b/test/Test/Grenade/Network/Accelerate.hs
@@ -0,0 +1,177 @@
+{-# LANGUAGE CPP                   #-}
+{-# LANGUAGE BangPatterns          #-}
+{-# LANGUAGE RankNTypes            #-}
+{-# LANGUAGE DataKinds             #-}
+{-# LANGUAGE ScopedTypeVariables   #-}
+{-# LANGUAGE PolyKinds             #-}
+{-# LANGUAGE GADTs                 #-}
+{-# LANGUAGE TypeOperators         #-}
+{-# LANGUAGE TemplateHaskell       #-}
+{-# LANGUAGE FlexibleContexts      #-}
+
+module Test.Grenade.Network.Accelerate where
+
+import qualified Prelude as P
+import Prelude hiding (zipWith, replicate, length)
+
+import           Data.Constraint
+#if __GLASGOW_HASKELL__ < 800
+import           Data.Proxy
+#endif
+import           Data.Singletons.Prelude.List
+import           Data.Singletons
+import           Hedgehog
+import qualified Hedgehog.Gen as Gen
+
+import Data.Array.Accelerate
+import Data.Array.Accelerate.Interpreter
+
+import Grenade as G
+import Grenade.Core.Accelerate as GA
+import Test.Grenade.Network ((~~~), maxVal, oneUp)
+
+-- import           Data.Type.Equality
+
+import           GHC.TypeLits
+import           GHC.TypeLits.Witnesses
+
+import           Test.Hedgehog.Compat
+import           Test.Hedgehog.Hmatrix
+import           Test.Hedgehog.TypeLits
+import           Unsafe.Coerce
+
+
+data SomeNetwork :: * where
+  SomeNetwork ::
+    ( SingI shapes
+    , SingI (Head shapes)
+    , SingI (Last shapes)
+    , Show (G.Network layers shapes)
+    , Accelerable (G.Network layers shapes)
+    , Accelerable (G.S (Head shapes))
+    , Accelerable (G.S (Last shapes))
+    ) => G.Network layers shapes -> SomeNetwork
+
+instance Show SomeNetwork where
+  show (SomeNetwork net) = show net
+
+
+-- | Generate a random network of a random type
+--
+-- This is slightly insane for a few reasons. Everything must be wrapped up
+-- in a SomeNetwork.
+genNetwork :: Monad m => Gen.Gen m SomeNetwork
+genNetwork =
+  Gen.recursive Gen.choice [
+    do
+      output  :: Integer  <- choose 1 100
+      let Just output' = someNatVal output
+      case (output') of
+        (SomeNat (Proxy :: Proxy o')) ->   do
+          pure (SomeNetwork (G.NNil :: G.Network '[] '[ 'D1 o' ] ))
+  ] [
+    do
+      SomeNetwork ( rest :: G.Network layers shapes ) <- genNetwork
+      case ( sing :: Sing shapes ) of
+        SNil -> Gen.discard -- Can't occur
+        SCons ( h :: Sing h ) ( _ :: Sing hs ) ->
+          withSingI h $
+          case h of
+            D1Sing l@SNat -> do -- Reshape to two dimensions
+              let divisors n = 1 : [x | x <- [2..(n-1)], n `rem` x Prelude.== 0]
+              let len = natVal l
+              rs  <- Gen.element $ divisors len
+              let cs  = len `quot` rs
+              case ( someNatVal rs, someNatVal cs, someNatVal len ) of
+                ( Just (SomeNat (rs' :: Proxy inRows)), Just (SomeNat (cs' :: Proxy inCols)), Just (SomeNat (_ :: Proxy outLen ) )) ->
+                  let p1 = natDict rs'
+                      p2 = natDict cs'
+                  in  case ( p1 %* p2, unsafeCoerce (Dict :: Dict ()) :: Dict ((inRows * inCols) ~ outLen), unsafeCoerce (Dict :: Dict ()) :: Dict (( 'D1 outLen ) ~ h )) of
+                        ( Dict, Dict, Dict ) -> do
+                          wB    <- randomVector
+                          bM    <- randomVector
+                          wN    <- uniformSample
+                          kM    <- uniformSample
+                          let layer = FullyConnected (FullyConnected' wB wN) (FullyConnected' bM kM)
+                          return (SomeNetwork (layer :~> rest :: G.Network ( FullyConnected inRows outLen ': layers ) ( ('D1 inRows) ': h ': hs )))
+                _ -> Gen.discard -- Doesn't occur
+            D2Sing r@SNat c@SNat -> Gen.discard
+            D3Sing r@SNat c@SNat f@SNat -> Gen.discard
+  ]
+
+
+type AH s = Accelerated (G.S (Head s))
+type AL s = Accelerated (G.S (Last s))
+
+
+-- | Test a partial derivative numerically for a random network and input
+--
+-- This is the most important test.
+prop_auto_diff :: Property
+prop_auto_diff = withDiscards 1000 . withTests 10000 . property $ do
+  SomeNetwork (network :: G.Network layers shapes) <- forAll genNetwork
+  (input  :: G.S (Head shapes))     <- forAllWith nice genOfShape
+  (target :: G.S (Last shapes))     <- forAllWith nice oneUp
+  (tested :: G.S (Head shapes))     <- forAllWith nice oneUp
+
+  let
+    (!tapes, !output) = G.runNetwork network input
+    (_, !backgrad)    = G.runGradient network tapes target
+    inputDiff         = input + tested * 0.00001
+    expected          = maxVal ( backgrad * tested )
+    (_, !outputDiff)  = G.runNetwork network inputDiff
+    result            = maxVal ( outputDiff * target - output * target ) / 0.00001
+
+    networkA = toAccel network
+
+    inputA :: AH shapes
+    inputA = toAccel input
+
+    targetA :: AL shapes
+    targetA = toAccel target
+
+    testedA :: AH shapes
+    testedA = toAccel tested
+
+    tapesA :: GA.Tapes layers shapes
+    outputA :: AL shapes
+    (!tapesA, !outputA) = GA.runNetwork networkA inputA
+
+    backgradA :: AH shapes
+    (_, !backgradA)     = GA.runGradient networkA tapesA targetA
+
+    sA :: Exp DIM1
+    sA = case inputA of
+      AS1D v -> shape v
+
+    inputDiffA :: AH shapes
+    inputDiffA = _diffInputs inputA testedA sA
+
+    outputDiffA :: AL shapes
+    (_, !outputDiffA)   = GA.runNetwork networkA inputDiffA
+
+    (result', expected') = run $ _results backgradA testedA targetA outputA outputDiffA
+
+  result ~~~ expected
+  result' ~~~ expected'
+
+_diffInputs (AS1D inputV) (AS1D testedV) sA = AS1D $ zipWith (+) inputV (zipWith (*) testedV (fill sA 0.00001))
+
+_results
+  :: (Accelerated (G.S ('D1 i)))
+  -> (Accelerated (G.S ('D1 i)))
+  -> (Accelerated (G.S ('D1 o)))
+  -> (Accelerated (G.S ('D1 o)))
+  -> (Accelerated (G.S ('D1 o)))
+  -> Acc (Double, Double)
+_results (AS1D backgradV) (AS1D testedV) (AS1D targetV) (AS1D outputV) (AS1D outputDiffV) =
+      let
+        expectedA = maxVal' (zipWith (*) backgradV testedV)
+        resultA = maxVal' (zipWith (-) (zipWith (*) outputDiffV targetV) (zipWith (*) outputV targetV)) / 0.00001
+      in lift (resultA, expectedA)
+
+maxVal' :: Acc (Array sh Double) -> Exp Double
+maxVal' x = undefined
+
+tests :: IO Bool
+tests = checkParallel $$(discover)
diff --git a/test/Test/Hedgehog/Accelerate.hs b/test/Test/Hedgehog/Accelerate.hs
index 103d7360..80cb1e5a 100644
--- a/test/Test/Hedgehog/Accelerate.hs
+++ b/test/Test/Hedgehog/Accelerate.hs
@@ -2,15 +2,28 @@
 {-# LANGUAGE DataKinds             #-}
 {-# LANGUAGE ScopedTypeVariables   #-}
 {-# LANGUAGE GADTs                 #-}
+{-# LANGUAGE FlexibleContexts      #-}
 
 module Test.Hedgehog.Accelerate where
 
-import           Data.Singletons
-import           Data.Singletons.TypeLits
+import qualified Prelude as P
+import           Prelude (Monad, (<$>))
 import           Data.Array.Accelerate
+import           Data.Array.Accelerate.Interpreter
 
+import           Hedgehog
 import qualified Hedgehog.Gen as Gen
 import qualified Hedgehog.Range as Range
 
 randomArray :: ( Monad m, Shape sh ) => sh -> Gen.Gen m (Array sh Double)
 randomArray sh = fromList sh <$> Gen.list (Range.singleton $ arraySize sh) (Gen.realFloat $ Range.linearFracFrom 0 (-1) 1)
+
+(~===) :: (P.Num (Exp e), P.Fractional (Exp e), RealFrac e, Monad m, P.Eq sh, P.Eq e, Elt e, Shape sh, FromIntegral Int e) => Array sh e -> Array sh e -> Test m ()
+a ~=== b = fuzzy a === fuzzy b
+  where
+    fuzzy :: (P.Num (Exp e), P.Fractional (Exp e), RealFrac e, Shape sh, Elt e, FromIntegral Int e) => Array sh e -> Array sh e
+    fuzzy = run1 $ map $ \x ->
+      let
+        scaledUp :: Exp Int
+        scaledUp = round $ x * 1e7
+      in (fromIntegral scaledUp) / 1e7
diff --git a/test/test.hs b/test/test.hs
index 0eeaf6ca..9dabe794 100644
--- a/test/test.hs
+++ b/test/test.hs
@@ -1,6 +1,7 @@
 import           Control.Monad
 
 import qualified Test.Grenade.Network
+--import qualified Test.Grenade.Network.Accelerate
 
 import qualified Test.Grenade.Layers.Pooling
 import qualified Test.Grenade.Layers.Convolution
@@ -22,6 +23,7 @@ main :: IO ()
 main =
   disorderMain [
       Test.Grenade.Network.tests
+    --, Test.Grenade.Network.Accelerate.tests
 
     , Test.Grenade.Layers.Pooling.tests
     , Test.Grenade.Layers.Convolution.tests

From e83275a4e0d71e616fee7919caccaa66968f7bcd Mon Sep 17 00:00:00 2001
From: Calen Pennington <cale@edx.org>
Date: Thu, 24 Aug 2017 07:00:03 -0400
Subject: [PATCH 08/11] Install LLVM 4 when building on travis

---
 .travis.yml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 9d63c047..4f2f498a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,14 @@
 # NB: don't set `language: haskell` here
 
+dist: trusty
+
+addons:
+  apt:
+    sources:
+      - llvm-toolchain-trusty-4.0
+    packages:
+      - llvm-4.0
+
 # The following enables several GHC versions to be tested; often it's enough to test only against the last release in a major GHC version. Feel free to omit lines listings versions you don't need/want testing for.
 env:
  - CABALVER=1.22 GHCVER=7.10.3

From ed63ef19027e81e97fd479cc5e456ddf3b186ad1 Mon Sep 17 00:00:00 2001
From: Calen Pennington <cale@edx.org>
Date: Thu, 24 Aug 2017 07:51:09 -0400
Subject: [PATCH 09/11] fixup! Install LLVM 4 when building on travis

---
 .travis.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 4f2f498a..76db72f2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,7 +7,12 @@ addons:
     sources:
       - llvm-toolchain-trusty-4.0
     packages:
+      - libllvm4.0
+      - libllvm4.0-dbg
+      - lldb-4.0
       - llvm-4.0
+      - llvm-4.0-dev
+      - llvm-4.0-runtime
 
 # The following enables several GHC versions to be tested; often it's enough to test only against the last release in a major GHC version. Feel free to omit lines listings versions you don't need/want testing for.
 env:

From bdcf32eb379d28ac8cf9a3a1ded3bed782e1f44b Mon Sep 17 00:00:00 2001
From: Calen Pennington <cale@edx.org>
Date: Tue, 5 Sep 2017 16:16:25 -0400
Subject: [PATCH 10/11] fixup! Make Network Accelerable

---
 src/Grenade/Core/Network/Accelerate.hs  | 73 ++++++++++---------------
 test/Test/Grenade/Network/Accelerate.hs |  7 +--
 2 files changed, 32 insertions(+), 48 deletions(-)

diff --git a/src/Grenade/Core/Network/Accelerate.hs b/src/Grenade/Core/Network/Accelerate.hs
index cfcf5380..31c2381e 100644
--- a/src/Grenade/Core/Network/Accelerate.hs
+++ b/src/Grenade/Core/Network/Accelerate.hs
@@ -26,6 +26,7 @@ module Grenade.Core.Network.Accelerate (
     {-# Network (..)
   , #-} Gradients (..)
   , Tapes (..)
+  , Accelerated (..)
 
   , runNetwork
   , runGradient
@@ -52,41 +53,35 @@ import           Grenade.Core.Matrix.Accelerate
 --
 --   The @[Shape]@ type specifies the shapes of data passed between the layers.
 --
---   Can be considered to be a heterogeneous list of layers which are able to
---   transform the data shapes of the network.
--- data Network :: [*] -> [G.Shape] -> * where
---     NNil  :: SingI i
---           => Network '[] '[i]
-
---     (:~>) :: (SingI i, SingI h, G.Layer l i h, Accelerable l)
---           => !(Accelerated l)
---           -> !(Accelerated (G.Network ls (h ': hs)))
---           -> Accelerated (G.Network (l ': ls) (i ': h ': hs))
--- infixr 5 :~>
+-- Can be considered to be a heterogeneous list of layers which are able to
+-- transform the data shapes of the network.
+data Network :: [*] -> [G.Shape] -> * where
+    NNil  :: SingI i
+          => Network '[] '[i]
+
+    (:~>) :: (SingI i, SingI h, G.Layer l i h, Layer l i h, Accelerable l)
+          => !(Accelerated l)
+          -> !(Accelerated (G.Network ls (h ': hs)))
+          -> Network (l ': ls) (i ': h ': hs)
+infixr 5 :~>
 
 -- instance Show (Network n) where
 --   show NNil = "NNil"
 -- instance (Show x, Show (Network xs rs)) => Show (Network (x ': xs) (i ': rs)) where
 --   show (x :~> xs) = show x ++ "\n~>\n" ++ show xs
 
-
-instance Accelerable (G.Network '[] '[i]) where
-  data Accelerated (G.Network '[] '[i]) = ANil
-  toAccel G.NNil = ANil
-
 instance
-  ( Accelerable l
-  , Accelerable (G.S i)
-  , Accelerable (G.S h)
-  , Accelerable (G.Network ls (h ': hs))
-  )
-  => Accelerable
-    (G.Network (l ': ls) (i ': h ': hs)) where
+  ( Accelerable (Head layers)
+  , Accelerable (G.Network (Tail layers) (Tail shapes))
+  , Accelerable (G.S (Head (Tail shapes)))
+  , Layer (Head layers) (Head shapes) (Head (Tail shapes))
+  ) => Accelerable (G.Network layers shapes) where
 
-  data Accelerated (G.Network (l ': ls) (i ': h ': hs)) =
-    ANetwork (Accelerated l) (Accelerated (G.Network ls (h ': hs)))
+  data Accelerated (G.Network layers shapes) =
+    ANetwork (Network layers shapes)
 
-  toAccel (x G.:~> xs) = ANetwork (toAccel x) (toAccel xs)
+  toAccel (x G.:~> xs) = ANetwork ((toAccel x) :~> (toAccel xs))
+  toAccel G.NNil = ANetwork NNil
 
 
 -- | Gradient of a network.
@@ -117,27 +112,19 @@ data Tapes :: [*] -> [G.Shape] -> * where
 --
 --   This gives the output, and the Wengert tape required for back
 --   propagation.
-runNetwork :: forall layers shapes h l.
-              ( Accelerable (G.S (Head shapes))
-              , Accelerable (G.S (Last shapes))
+runNetwork :: forall layers shapes.
+              ( Accelerable (G.S (Last shapes))
               )
            => Accelerated (G.Network layers shapes)
            -> Accelerated (G.S (Head shapes))
            -> (Tapes layers shapes, Accelerated (G.S (Last shapes)))
-runNetwork = undefined
---   go
---     where
---   go  :: forall js ss. (Last js ~ Last shapes)
---       => Network ss js
---       -> S (Head js)
---       -> (Tapes ss js, S (Last js))
---   go (layer :~> n) !x =
---     let (tape, forward) = runForwards layer x
---         (tapes, answer) = go n forward
---     in  (tape :\> tapes, answer)
-
---   go NNil !x
---       = (TNil, x)
+runNetwork (ANetwork (layer :~> n)) !x =
+  let
+    (tape, forward) = runForwards layer x
+    (tapes, answer) = runNetwork n forward
+  in  (tape :\> tapes, answer)
+
+runNetwork (ANetwork NNil) !x = (TNil, x)
 
 
 -- | Running a loss gradient back through the network.
diff --git a/test/Test/Grenade/Network/Accelerate.hs b/test/Test/Grenade/Network/Accelerate.hs
index d354fb08..8a15dda8 100644
--- a/test/Test/Grenade/Network/Accelerate.hs
+++ b/test/Test/Grenade/Network/Accelerate.hs
@@ -166,12 +166,9 @@ _results
   -> Acc (Double, Double)
 _results (AS1D backgradV) (AS1D testedV) (AS1D targetV) (AS1D outputV) (AS1D outputDiffV) =
       let
-        expectedA = maxVal' (zipWith (*) backgradV testedV)
-        resultA = maxVal' (zipWith (-) (zipWith (*) outputDiffV targetV) (zipWith (*) outputV targetV)) / 0.00001
+        expectedA = maximum (zipWith (*) backgradV testedV)
+        resultA = maximum (zipWith (-) (zipWith (*) outputDiffV targetV) (zipWith (*) outputV targetV)) / 0.00001
       in lift (resultA, expectedA)
 
-maxVal' :: Acc (Array sh Double) -> Exp Double
-maxVal' x = undefined
-
 tests :: IO Bool
 tests = checkParallel $$(discover)

From 5c6828884e7248eba0bd91ea7afd60f129d3b893 Mon Sep 17 00:00:00 2001
From: Calen Pennington <cale@edx.org>
Date: Tue, 5 Sep 2017 16:53:02 -0400
Subject: [PATCH 11/11] fixup! Make Network Accelerable

---
 test/Test/Grenade/Network/Accelerate.hs | 5 +++--
 test/test.hs                            | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/test/Test/Grenade/Network/Accelerate.hs b/test/Test/Grenade/Network/Accelerate.hs
index 8a15dda8..352cdee5 100644
--- a/test/Test/Grenade/Network/Accelerate.hs
+++ b/test/Test/Grenade/Network/Accelerate.hs
@@ -12,7 +12,7 @@
 module Test.Grenade.Network.Accelerate where
 
 import qualified Prelude as P
-import Prelude hiding (zipWith, replicate, length)
+import Prelude hiding (zipWith, replicate, length, maximum)
 
 import           Data.Constraint
 #if __GLASGOW_HASKELL__ < 800
@@ -43,7 +43,8 @@ import           Unsafe.Coerce
 
 data SomeNetwork :: * where
   SomeNetwork ::
-    ( SingI shapes
+    (
+      SingI shapes
     , SingI (Head shapes)
     , SingI (Last shapes)
     , Show (G.Network layers shapes)
diff --git a/test/test.hs b/test/test.hs
index 9dabe794..c868c5cc 100644
--- a/test/test.hs
+++ b/test/test.hs
@@ -1,7 +1,7 @@
 import           Control.Monad
 
 import qualified Test.Grenade.Network
---import qualified Test.Grenade.Network.Accelerate
+import qualified Test.Grenade.Network.Accelerate
 
 import qualified Test.Grenade.Layers.Pooling
 import qualified Test.Grenade.Layers.Convolution
@@ -23,7 +23,7 @@ main :: IO ()
 main =
   disorderMain [
       Test.Grenade.Network.tests
-    --, Test.Grenade.Network.Accelerate.tests
+    , Test.Grenade.Network.Accelerate.tests
 
     , Test.Grenade.Layers.Pooling.tests
     , Test.Grenade.Layers.Convolution.tests