diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 4122397..104ed4d 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -10,28 +10,34 @@ on:
       - main
     paths-ignore:
       - 'docs/**'
+  schedule:
+    - cron: '41 0 * * 5'
 jobs:
   test:
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        group:
+          - Core
+        version:
+          - '1'
+        os:
+          - ubuntu-latest
+          - macos-latest
+          - windows-latest
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v1
         with:
-          version: 1
-      - uses: actions/cache@v3
-        env:
-          cache-name: cache-artifacts
+          version: ${{ matrix.version }}
+      - uses: julia-actions/cache@v1
         with:
-          path: ~/.julia/artifacts
-          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-test-${{ env.cache-name }}-
-            ${{ runner.os }}-test-
-            ${{ runner.os }}-
+          token: ${{ secrets.GITHUB_TOKEN }}
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
-        env:
-          GROUP: ${{ matrix.group }}
+        with:
+          depwarn: error
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v3
         with:
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index 8084891..414eda9 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -6,7 +6,8 @@ on:
       - main
     tags: '*'
   pull_request:
-
+  schedule:
+    - cron: '41 0 * * 5'
 jobs:
   build:
     runs-on: ubuntu-latest
@@ -14,7 +15,7 @@ jobs:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@latest
         with:
-          version: '1.6'
+          version: '1'
       - name: Install dependencies
         run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
       - name: Build and deploy
diff --git a/Project.toml b/Project.toml
index 83edf41..37245f4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -7,7 +7,6 @@ version = "1.2.1"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
-ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -15,17 +14,29 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
+Aqua = "0.8"
 CUDA = "3, 5"
 DiffEqBase = "6"
 DocStringExtensions = "0.8, 0.9"
-ExprTools = "0.1"
 Flux = "0.13, 0.14"
 Functors = "0.2, 0.3, 0.4"
+LinearAlgebra = "1"
+Random = "1"
 Reexport = "1"
+SafeTestsets = "0.1"
+SparseArrays = "1"
 Statistics = "1"
+Test = "1"
 Zygote = "0.6"
-julia = "1.6,1.7"
+julia = "1.10"
+
+[extras]
+Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Aqua", "Test", "SafeTestsets"]
diff --git a/src/DeepBSDE.jl b/src/DeepBSDE.jl
index f27b51a..18c49e1 100644
--- a/src/DeepBSDE.jl
+++ b/src/DeepBSDE.jl
@@ -1,14 +1,14 @@
 """
 ```julia
-NNPDENS(u0,σᵀ∇u;opt=Flux.ADAM(0.1))
+NNPDENS(u0,σᵀ∇u;opt=Flux.Optimise.Adam(0.1))
 ```
-Uses a neural stochastic differential equation, which is then solved by the methods available in DifferentialEquations.jl. 
-The alg keyword is required for specifying the SDE solver algorithm that will be used on the internal SDE. All of the other 
+Uses a neural stochastic differential equation, which is then solved by the methods available in DifferentialEquations.jl.
+The alg keyword is required for specifying the SDE solver algorithm that will be used on the internal SDE. All of the other
 keyword arguments are passed to the SDE solver.
 ## Arguments
 - `u0`: a Flux.jl `Chain` for the initial condition guess.
 - `σᵀ∇u`: a Flux.jl `Chain` for the BSDE value guess.
-- `opt`: the optimization algorithm to be used to optimize the neural networks. Defaults to `ADAM`.
+- `opt`: the optimization algorithm to be used to optimize the neural networks. Defaults to `Flux.Optimise.Adam`.
 """
 struct NNPDENS{C1,C2,O} <: NeuralPDEAlgorithm
     u0::C1
@@ -16,7 +16,7 @@ struct NNPDENS{C1,C2,O} <: NeuralPDEAlgorithm
     opt::O
 end
 
-NNPDENS(u0,σᵀ∇u;opt=Flux.ADAM(0.1)) = NNPDENS(u0,σᵀ∇u,opt)
+NNPDENS(u0,σᵀ∇u;opt=Flux.Optimise.Adam(0.1)) = NNPDENS(u0,σᵀ∇u,opt)
 
 function DiffEqBase.solve(
     prob::TerminalPDEProblem,
@@ -165,7 +165,7 @@ function DiffEqBase.solve(
             l < 1e-6 && Flux.stop()
         end
         dataS = Iterators.repeated((), maxiters_upper)
-        Flux.train!(loss_, ps, dataS, ADAM(0.01); cb = cb)
+        Flux.train!(loss_, ps, dataS, Flux.Optimise.Adam(0.01); cb = cb)
         u_high = loss_()
         # Function to precalculate the f values over the domain
         function give_f_matrix(X,urange,σᵀ∇u,p,t)
diff --git a/src/DeepSplitting.jl b/src/DeepSplitting.jl
index 4bb4058..c518f2d 100644
--- a/src/DeepSplitting.jl
+++ b/src/DeepSplitting.jl
@@ -1,17 +1,18 @@
-Base.copy(t::Tuple) = t # required for below
-function Base.copy(opt::O) where  O<:Flux.Optimise.AbstractOptimiser
-    return O([copy(getfield(opt,f)) for f in fieldnames(typeof(opt))]...)
+_copy(t::Tuple) = t
+_copy(t) = t
+function _copy(opt::O) where  O<:Flux.Optimise.AbstractOptimiser
+    return O([_copy(getfield(opt,f)) for f in fieldnames(typeof(opt))]...)
 end
 
 """
-    DeepSplitting(nn, K=1, opt = ADAM(0.01), λs = nothing, mc_sample =  NoSampling())
+    DeepSplitting(nn, K=1, opt = Flux.Optimise.Adam(0.01), λs = nothing, mc_sample =  NoSampling())
 
 Deep splitting algorithm.
 
 # Arguments
 * `nn`: a [Flux.Chain](https://fluxml.ai/Flux.jl/stable/models/layers/#Flux.Chain), or more generally a [functor](https://github.com/FluxML/Functors.jl).
 * `K`: the number of Monte Carlo integrations.
-* `opt`: optimizer to be used. By default, `Flux.ADAM(0.01)`.
+* `opt`: optimizer to be used. By default, `Flux.Optimise.Adam(0.01)`.
 * `λs`: the learning rates, used sequentially. Defaults to a single value taken from `opt`.
 * `mc_sample::MCSampling` : sampling method for Monte Carlo integrations of the non-local term. Can be `UniformSampling(a,b)`, `NormalSampling(σ_sampling, shifted)`, or `NoSampling` (by default).
 
@@ -25,7 +26,7 @@ nn = Flux.Chain(Dense(d, hls, tanh),
                 Dense(hls,hls,tanh),
                 Dense(hls, 1, x->x^2))
 
-alg = DeepSplitting(nn, K=10, opt = ADAM(), λs = [5e-3,1e-3],
+alg = DeepSplitting(nn, K=10, opt = Flux.Optimise.Adam(), λs = [5e-3,1e-3],
                     mc_sample = UniformSampling(zeros(d), ones(d)) )
 ```
 """
@@ -39,7 +40,7 @@ end
 
 function DeepSplitting(nn;
                         K=1,
-                        opt::O = ADAM(0.01),
+                        opt::O = Flux.Optimise.Adam(0.01),
                         λs::L = nothing,
                         mc_sample::MCSampling = NoSampling()) where {O <: Flux.Optimise.AbstractOptimiser, L <: Union{Nothing,Vector{N}} where N <: Number}
     isnothing(λs) ? λs = [opt.eta] : nothing
@@ -175,7 +176,7 @@ function solve(
         _maxiters = length(maxiters) > 1 ? maxiters[min(net,2)] : maxiters[]
 
         for λ in λs
-            opt_net = copy(opt) # starting with a new optimiser state at each time step
+            opt_net = _copy(opt) # starting with a new optimiser state at each time step
             opt_net.eta = λ
             verbose && println("Training started with ", typeof(opt_net), " and λ :", opt_net.eta)
             for epoch in 1:_maxiters
diff --git a/test/DeepSplitting.jl b/test/DeepSplitting.jl
index 8a85d46..a35eb72 100644
--- a/test/DeepSplitting.jl
+++ b/test/DeepSplitting.jl
@@ -44,7 +44,7 @@ end
                         Dense(hls,hls,relu),
                         Dense(hls,1)) # Neural network used by the scheme
 
-        opt = ADAM(0.01) #optimiser
+        opt = Flux.Optimise.Adam(0.01) #optimiser
         alg = DeepSplitting(nn, opt = opt)
 
         f(y, z, v_y, v_z, ∇v_y, ∇v_z, p, t) = 0f0 .* v_y
@@ -88,7 +88,7 @@ end
                         Dense(hls,hls,relu),
                         Dense(hls,1)) # Neural network used by the scheme
 
-        opt = ADAM(0.01) #optimiser
+        opt = Flux.Optimise.Adam(0.01) #optimiser
         alg = DeepSplitting(nn, opt = opt)
 
         f(y, z, v_y, v_z, ∇v_y, ∇v_z, p, t) = 0f0 .* v_y #TODO: this fix is not nice
@@ -134,19 +134,19 @@ end
                         Dense(hls,hls,relu),
                         Dense(hls,1)) # Neural network used by the scheme
 
-        opt = ADAM(0.01) #optimiser
+        opt = Flux.Optimise.Adam(0.01) #optimiser
         alg = DeepSplitting(nn, opt = opt)
 
         f(y, z, v_y, v_z, ∇v_y, ∇v_z, p, t) = 0f0 .* v_y #TODO: this fix is not nice
 
         # defining the problem
-        prob = PIDEProblem(g, f, μ, σ, x0, tspan, 
+        prob = PIDEProblem(g, f, μ, σ, x0, tspan,
                             x0_sample = x0_sample,
                             neumann_bc = [-∂, ∂]
                             )
         # solving
-        sol = solve(prob, alg, dt, 
-                    verbose = false, 
+        sol = solve(prob, alg, dt,
+                    verbose = false,
                     use_cuda = use_cuda,
                     maxiters = 1000,
                     batch_size = batch_size,
@@ -194,23 +194,23 @@ end
                         Dense(hls,hls,relu),
                         Dense(hls,1)) # Neural network used by the scheme
 
-        opt = ADAM(0.01) #optimiser
+        opt = Flux.Optimise.Adam(0.01) #optimiser
         alg = DeepSplitting(nn, opt = opt)
 
         f(y, z, v_y, v_z, ∇v_y, ∇v_z, p, t) = r * v_y #TODO: this fix is not nice
 
         # defining the problem
-        prob = PIDEProblem(g, f, μ, σ, x0, tspan, 
+        prob = PIDEProblem(g, f, μ, σ, x0, tspan,
                             x0_sample = x0_sample,
                             )
         # solving
-        sol = solve(prob, alg, dt, 
-                    verbose = false, 
+        sol = solve(prob, alg, dt,
+                    verbose = false,
                     use_cuda = use_cuda,
                     maxiters = 1000,
                     batch_size = batch_size,
                     cuda_device = cuda_device)
-        
+
         xs = x0_sample(repeat(x0,1,batch_size))
         u1 = [sol.ufuns[end](x)[] for x in eachcol(xs)]
         u1_anal = [u_anal(x, tspan[end]) for x in eachcol(xs) ]
@@ -239,7 +239,7 @@ end
                         Dense(hls,hls,tanh),
                         Dense(hls,1)) # Neural network used by the scheme
 
-        opt = ADAM(1e-3) #optimiser
+        opt = Flux.Optimise.Adam(1e-3) #optimiser
         alg = DeepSplitting(nn, opt = opt )
 
 
@@ -251,10 +251,10 @@ end
         # defining the problem
         prob = PIDEProblem(g, f, μ, σ, X0, tspan,)
         # solving
-        @time sol = solve(prob, 
-                        alg, 
-                        dt, 
-                        verbose = false, 
+        @time sol = solve(prob,
+                        alg,
+                        dt,
+                        verbose = false,
                         # abstol = 1e-5,
                         use_cuda = use_cuda,
                         maxiters = train_steps,
@@ -263,7 +263,7 @@ end
         u1 = sol.us[end]
         # value coming from \cite{Beck2017a}
         e_l2 = rel_error_l2(u1, 0.30879)
-        @test e_l2 < 0.5 # this is quite high as a relative error. 
+        @test e_l2 < 0.5 # this is quite high as a relative error.
         println("d = $d, rel_error_l2 = $e_l2")
     end
 end
@@ -287,7 +287,7 @@ end
                 Dense(hls,hls,relu),
                 Dense(hls,1)) # Neural network used by the scheme
 
-        opt = ADAM(1e-2) #optimiser
+        opt = Flux.Optimise.Adam(1e-2) #optimiser
         alg = DeepSplitting(nn, opt = opt )
 
 
@@ -299,10 +299,10 @@ end
         # defining the problem
         prob = PIDEProblem(g, f, μ, σ, X0, tspan, neumann_bc = [-∂, ∂] )
         # solving
-        @time sol = solve(prob, 
-                        alg, 
-                        dt, 
-                        verbose = false, 
+        @time sol = solve(prob,
+                        alg,
+                        dt,
+                        verbose = false,
                         abstol = 1e-5,
                         use_cuda = use_cuda,
                         maxiters = train_steps,
@@ -337,7 +337,7 @@ if false
                             Dense(hls,hls,tanh),
                             Dense(hls,1)) # Neural network used by the scheme
 
-            opt = ADAM(1e-3) #optimiser
+            opt = Flux.Optimise.Adam(1e-3) #optimiser
             alg = DeepSplitting(nn, opt = opt )
 
             X0 = repeat([1.0f0, 0.5f0], div(d,2))  # initial point
@@ -347,10 +347,10 @@ if false
             # defining the problem
             prob = PIDEProblem(g, f, μ, σ, X0, tspan)
             # solving
-            @time xs,ts,sol = solve(prob, 
-                            alg, 
-                            dt, 
-                            verbose = true, 
+            @time xs,ts,sol = solve(prob,
+                            alg,
+                            dt,
+                            verbose = true,
                             # abstol = 1e-5,
                             use_cuda = use_cuda,
                             maxiters = train_steps,
@@ -388,7 +388,7 @@ if false
                         Dense(hls,hls,tanh),
                         Dense(hls,1)) # Neural network used by the scheme
 
-        opt = ADAM(1e-3) #optimiser
+        opt = Flux.Optimise.Adam(1e-3) #optimiser
         alg = DeepSplitting(nn, opt = opt )
 
         X0 = fill(0.0f0,d)  # initial point
@@ -398,10 +398,10 @@ if false
         # defining the problem
         prob = PIDEProblem(g, f, μ, σ, X0, tspan)
         # solving
-        @time sol = solve(prob, 
-                        alg, 
-                        dt, 
-                        verbose = true, 
+        @time sol = solve(prob,
+                        alg,
+                        dt,
+                        verbose = true,
                         # abstol = 1e-5,
                         use_cuda = false,
                         maxiters = train_steps,
@@ -439,7 +439,7 @@ end
                     Dense(hls,hls,tanh),
                     Dense(hls,1)) # Neural network used by the scheme
 
-    opt = ADAM()
+    opt = Flux.Optimise.Adam()
     alg = DeepSplitting(nn, opt = opt, λs = [1e-2,1e-3] )
 
     X0 = fill(100f0,d)  # initial point
@@ -463,10 +463,10 @@ end
     # defining the problem
     prob = PIDEProblem(g, f, μ, σ, X0, tspan, )
     # solving
-    @time sol = solve(prob, 
-                    alg, 
-                    dt, 
-                    verbose = true, 
+    @time sol = solve(prob,
+                    alg,
+                    dt,
+                    verbose = true,
                     # abstol = 1e-5,
                     use_cuda = use_cuda,
                     maxiters = train_steps,
@@ -525,7 +525,7 @@ end
                             # BatchNorm(hls, affine = true, dim = 1),
                             Dense(hls, 1, x->x^2)) # positive function
 
-        opt = ADAM(1e-2)#optimiser
+        opt = Flux.Optimise.Adam(1e-2)#optimiser
         alg = DeepSplitting(nn_batch, K=K, opt = opt, mc_sample = x0_sample)
 
         g(x) = Float32((2*π)^(-d/2)) * ss0^(- Float32(d) * 5f-1) * exp.(-5f-1 *sum(x .^2f0 / ss0, dims = 1)) # initial condition
@@ -534,14 +534,14 @@ end
         f(y, z, v_y, v_z, ∇v_y, ∇v_z, p, t) =  max.(v_y, 0f0) .* (m(y) .- vol *  max.(v_z, 0f0) .* m(z)) # nonlocal nonlinear part of the
 
         # defining the problem
-        prob = PIDEProblem(g, f, μ, σ, x0, tspan, 
+        prob = PIDEProblem(g, f, μ, σ, x0, tspan,
                             x0_sample = x0_sample
                             )
         # solving
-        sol = solve(prob, 
-                    alg, 
-                    dt, 
-                    verbose = false, 
+        sol = solve(prob,
+                    alg,
+                    dt,
+                    verbose = false,
                     abstol = 1f-3,
                     maxiters = train_steps,
                     batch_size = batch_size,
@@ -568,7 +568,7 @@ end
 
     μ(x, p, t) = 0f0 # advection coefficients
     σ(x, p, t) = 1f-1 #1f-1 # diffusion coefficients
-    
+
     for d in [1,2,5]
         u1s = []
         for _ in 1:2
@@ -579,7 +579,7 @@ end
                             Dense(hls,hls,tanh),
                             Dense(hls,1)) # Neural network used by the scheme
 
-            opt = ADAM(1e-2) #optimiser
+            opt = Flux.Optimise.Adam(1e-2) #optimiser
             alg = DeepSplitting(nn, K=K, opt = opt, mc_sample = UniformSampling(-∂, ∂) )
 
             x0 = fill(0f0,d)  # initial point
@@ -590,10 +590,10 @@ end
             # defining the problem
             prob = PIDEProblem(g, f, μ, σ, x0, tspan, neumann_bc = [-∂, ∂])
             # solving
-            @time sol = solve(prob, 
-                            alg, 
-                            dt, 
-                            # verbose = true, 
+            @time sol = solve(prob,
+                            alg,
+                            dt,
+                            # verbose = true,
                             # abstol=1e-5,
                             use_cuda = use_cuda,
                             cuda_device = cuda_device,
@@ -608,4 +608,3 @@ end
         @test e_l2 < 0.1
     end
 end
-
diff --git a/test/qa.jl b/test/qa.jl
new file mode 100644
index 0000000..4ab460f
--- /dev/null
+++ b/test/qa.jl
@@ -0,0 +1,11 @@
+using HighDimPDE, Aqua
+@testset "Aqua" begin
+    Aqua.find_persistent_tasks_deps(HighDimPDE)
+    Aqua.test_ambiguities(HighDimPDE, recursive = false)
+    Aqua.test_deps_compat(HighDimPDE)
+    Aqua.test_piracies(HighDimPDE)
+    Aqua.test_project_extras(HighDimPDE)
+    Aqua.test_stale_deps(HighDimPDE)
+    Aqua.test_unbound_args(HighDimPDE)
+    Aqua.test_undefined_exports(HighDimPDE)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 806f7c3..145ffe2 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,9 +1,9 @@
-using Test: include
-using HighDimPDE, Test
-
+using SafeTestsets, Test
 @testset "HighDimPDE" begin
-    include("reflect.jl")
-    include("MLP.jl")
-    include("DeepSplitting.jl")
-    include("MCSample.jl")
-end
\ No newline at end of file
+    @time @safetestset "Quality Assurance" include("qa.jl")
+    @time @safetestset "reflect" include("reflect.jl")
+    @time @safetestset "reflect" include("reflect.jl")
+    @time @safetestset "MLP" include("MLP.jl")
+    @time @safetestset "Deep Splitting" include("DeepSplitting.jl")
+    @time @safetestset "MC Sample" include("MCSample.jl")
+end