diff --git a/Project.toml b/Project.toml
index 026a29ba7..a514f9d23 100644
--- a/Project.toml
+++ b/Project.toml
@@ -42,45 +42,48 @@ Adapt = "4"
 AdvancedHMC = "0.6.1"
 Aqua = "0.8"
 ArrayInterface = "7.9"
-CUDA = "5.3"
+CUDA = "5.3.2"
 ChainRulesCore = "1.24"
-ComponentArrays = "0.15.14"
+ComponentArrays = "0.15.16"
 Cubature = "1.5"
 DiffEqNoiseProcess = "5.20"
 Distributions = "0.25.107"
 DocStringExtensions = "0.9.3"
 DomainSets = "0.6, 0.7"
-Flux = "0.14.11"
+Flux = "0.14.17"
 ForwardDiff = "0.10.36"
-Functors = "0.4.10"
+Functors = "0.4.12"
 Integrals = "4.4"
 LineSearches = "7.2"
-LinearAlgebra = "1"
+LinearAlgebra = "1.10"
 LogDensityProblems = "2"
-Lux = "0.5.58"
+Lux = "0.5.65"
 LuxCUDA = "0.3.2"
+LuxCore = "0.1.24"
+LuxLib = "0.3.48"
 MCMCChains = "6"
 MethodOfLines = "0.11"
 ModelingToolkit = "9.9"
 MonteCarloMeasurements = "1.1"
 Optim = "1.7.8"
-Optimization = "3.24, 4"
+Optimization = "3.25"
 OptimizationOptimJL = "0.2.1"
-OptimizationOptimisers = "0.2.1, 0.3"
-OrdinaryDiffEq = "6.74"
-Pkg = "1"
+OptimizationOptimisers = "0.2.1"
+OrdinaryDiffEq = "6.87"
+Pkg = "1.10"
+Preferences = "1.4.3"
 QuasiMonteCarlo = "0.3.2"
 Random = "1"
 Reexport = "1.2"
 RuntimeGeneratedFunctions = "0.5.12"
 SafeTestsets = "0.1"
-SciMLBase = "2.28"
+SciMLBase = "2.34"
 Statistics = "1.10"
 SymbolicUtils = "1.5, 2, 3"
 Symbolics = "5.27.1, 6"
-Test = "1"
+Test = "1.10"
 UnPack = "1"
-Zygote = "0.6.69"
+Zygote = "0.6.70"
 julia = "1.10"
 
 [extras]
@@ -89,12 +92,15 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
+LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
+LuxLib = "82251201-b29d-42c6-8e01-566dec8acb11"
 MethodOfLines = "94925ecb-adb7-4558-8ed8-f975c56a0bf4"
 OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+Preferences = "21216c6a-2e73-6563-6e65-726566657250"
 SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Aqua", "Test", "CUDA", "SafeTestsets", "OptimizationOptimJL", "Pkg", "OrdinaryDiffEq", "LineSearches", "LuxCUDA", "Flux", "MethodOfLines"]
+test = ["Aqua", "CUDA", "Flux", "LineSearches", "LuxCUDA", "LuxCore", "LuxLib", "MethodOfLines", "OptimizationOptimJL", "OrdinaryDiffEq", "Pkg", "Preferences", "SafeTestsets", "Test"]
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index 7105346aa..116478a51 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -109,7 +109,7 @@ function L2loss2(Tar::LogTargetDensity, θ)
     # parameter estimation chosen or not
     if Tar.extraparams > 0
         autodiff = Tar.autodiff
-        # Timepoints to enforce Physics 
+        # Timepoints to enforce Physics
         t = Tar.dataset[end]
         u1 = Tar.dataset[2]
         û = Tar.dataset[1]
@@ -122,22 +122,23 @@ function L2loss2(Tar::LogTargetDensity, θ)
 
         if length(Tar.prob.u0) == 1
             physsol = [f(û[i],
-                ode_params,
-                t[i])
+                           ode_params,
+                           t[i])
                        for i in 1:length(û[:, 1])]
         else
             physsol = [f([û[i], u1[i]],
-                ode_params,
-                t[i])
+                           ode_params,
+                           t[i])
                        for i in 1:length(û)]
         end
-        #form of NN output matrix output dim x n 
+        #form of NN output matrix output dim x n
         deri_physsol = reduce(hcat, physsol)
-   
+
         physlogprob = 0
         for i in 1:length(Tar.prob.u0)
-            # can add phystd[i] for u[i] 
-            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
+            # can add phystd[i] for u[i]
+            physlogprob += logpdf(
+                MvNormal(deri_physsol[i, :],
                     LinearAlgebra.Diagonal(map(abs2,
                         (Tar.l2std[i] * 4.0) .*
                         ones(length(nnsol[i, :]))))),
@@ -353,7 +354,10 @@ NN OUTPUT AT t,θ ~ phi(t,θ).
 function (f::LogTargetDensity{C, S})(t::AbstractVector,
         θ) where {C <: Lux.AbstractExplicitLayer, S}
     θ = vector_to_parameters(θ, f.init_params)
-    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st)
+    θ_ = ComponentArrays.getdata(θ)
+    eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_)
+    t_ = convert.(eltypeθ, adapt(typeθ, t'))
+    y, st = f.chain(t_, θ, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
     f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* y
 end
@@ -361,7 +365,10 @@ end
 function (f::LogTargetDensity{C, S})(t::Number,
         θ) where {C <: Lux.AbstractExplicitLayer, S}
     θ = vector_to_parameters(θ, f.init_params)
-    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st)
+    θ_ = ComponentArrays.getdata(θ)
+    eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_)
+    t_ = convert.(eltypeθ, adapt(typeθ, [t]))
+    y, st = f.chain(t_, θ, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
     f.prob.u0 .+ (t .- f.prob.tspan[1]) .* y
 end
@@ -574,7 +581,8 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
     @info("Current Prior Log-likelihood : ", priorweights(ℓπ, initial_θ))
     @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, initial_θ))
     if estim_collocate
-        @info("Current gradient loss against dataset Log-likelihood : ", L2loss2(ℓπ, initial_θ))
+        @info("Current gradient loss against dataset Log-likelihood : ",
+            L2loss2(ℓπ, initial_θ))
     end
 
     Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor],
@@ -628,7 +636,8 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
         @info("Current Prior Log-likelihood : ", priorweights(ℓπ, samples[end]))
         @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, samples[end]))
         if estim_collocate
-            @info("Current gradient loss against dataset Log-likelihood : ", L2loss2(ℓπ, samples[end]))
+            @info("Current gradient loss against dataset Log-likelihood : ",
+                L2loss2(ℓπ, samples[end]))
         end
 
         # return a chain(basic chain),samples and stats
diff --git a/src/discretize.jl b/src/discretize.jl
index 9a40e0fe8..8acd77533 100644
--- a/src/discretize.jl
+++ b/src/discretize.jl
@@ -217,21 +217,21 @@ function generate_training_sets(domains, dx, eqs, bcs, eltypeθ, dict_indvars::D
 
     bcs_train_sets = map(bound_args) do bt
         span = map(b -> get(dict_var_span, b, b), bt)
-        _set = adapt(eltypeθ,
-            hcat(vec(map(points -> collect(points), Iterators.product(span...)))...))
+        _set = hcat(vec(map(points -> collect(points), Iterators.product(span...)))...)
+        x = convert.(eltypeθ, adapt(eltypeθ, _set))
     end
 
     pde_vars = get_variables(eqs, dict_indvars, dict_depvars)
     pde_args = get_argument(eqs, dict_indvars, dict_depvars)
 
-    pde_train_set = adapt(eltypeθ,
-        hcat(vec(map(points -> collect(points),
-            Iterators.product(bc_data...)))...))
+    # pde_train_set = adapt(eltypeθ,
+    #     hcat(vec(map(points -> collect(points),
+    #         Iterators.product(bc_data...)))...))
 
     pde_train_sets = map(pde_args) do bt
         span = map(b -> get(dict_var_span_, b, b), bt)
-        _set = adapt(eltypeθ,
-            hcat(vec(map(points -> collect(points), Iterators.product(span...)))...))
+        _set = hcat(vec(map(points -> collect(points), Iterators.product(span...)))...)
+        x = convert.(eltypeθ, adapt(eltypeθ, _set))
     end
     [pde_train_sets, bcs_train_sets]
 end
@@ -266,11 +266,11 @@ function get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars,
 
     pde_lower_bounds = map(pde_args) do pd
         span = map(p -> get(dict_lower_bound, p, p), pd)
-        map(s -> adapt(eltypeθ, s) + cbrt(eps(eltypeθ)), span)
+        map(s -> convert(eltypeθ, adapt(eltypeθ, s)) + cbrt(eps(eltypeθ)), span)
     end
     pde_upper_bounds = map(pde_args) do pd
         span = map(p -> get(dict_upper_bound, p, p), pd)
-        map(s -> adapt(eltypeθ, s) - cbrt(eps(eltypeθ)), span)
+        map(s -> convert(eltypeθ, adapt(eltypeθ, s)) - cbrt(eps(eltypeθ)), span)
     end
     pde_bounds = [pde_lower_bounds, pde_upper_bounds]
 
diff --git a/src/neural_adapter.jl b/src/neural_adapter.jl
index e54c6e818..ee69249e0 100644
--- a/src/neural_adapter.jl
+++ b/src/neural_adapter.jl
@@ -5,8 +5,8 @@ function generate_training_sets(domains, dx, eqs, eltypeθ)
         dxs = fill(dx, length(domains))
     end
     spans = [infimum(d.domain):dx:supremum(d.domain) for (d, dx) in zip(domains, dxs)]
-    train_set = adapt(eltypeθ,
-        hcat(vec(map(points -> collect(points), Iterators.product(spans...)))...))
+    train_set = hcat(vec(map(points -> collect(points), Iterators.product(spans...)))...)
+    x = convert.(eltypeθ, adapt(eltypeθ, train_set))
 end
 
 function get_loss_function_(loss, init_params, pde_system, strategy::GridTraining)
@@ -30,7 +30,7 @@ function get_bounds_(domains, eqs, eltypeθ, dict_indvars, dict_depvars, strateg
 
     bounds = first(map(args) do pd
         span = map(p -> get(dict_span, p, p), pd)
-        map(s -> adapt(eltypeθ, s), span)
+        map(s -> covert(eltypeθ, adapt(eltypeθ, s)), span)
     end)
     bounds = [getindex.(bounds, 1), getindex.(bounds, 2)]
     return bounds
@@ -75,11 +75,11 @@ function get_bounds_(domains, eqs, eltypeθ, dict_indvars, dict_depvars,
 
     lower_bounds = map(args) do pd
         span = map(p -> get(dict_lower_bound, p, p), pd)
-        map(s -> adapt(eltypeθ, s), span)
+        map(s -> convert(eltypeθ, adapt(eltypeθ, s)), span)
     end
     upper_bounds = map(args) do pd
         span = map(p -> get(dict_upper_bound, p, p), pd)
-        map(s -> adapt(eltypeθ, s), span)
+        map(s -> convert(eltypeθ, adapt(eltypeθ, s)), span)
     end
     bound = lower_bounds, upper_bounds
 end
diff --git a/src/ode_solve.jl b/src/ode_solve.jl
index bcf9c68eb..2b3ac4dfd 100644
--- a/src/ode_solve.jl
+++ b/src/ode_solve.jl
@@ -118,8 +118,10 @@ end
 
 function (f::ODEPhi{C, T, U})(t::Number,
         θ) where {C <: Lux.AbstractExplicitLayer, T, U <: Number}
-    y, st = f.chain(
-        adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), [t]), θ.depvar, f.st)
+    θ_  = ComponentArrays.getdata(θ.depvar)
+    eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_)
+    t_ = convert.(eltypeθ, adapt(typeθ, [t]))
+    y, st = f.chain(t_, θ.depvar, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
     f.u0 + (t - f.t0) * first(y)
 end
@@ -127,15 +129,19 @@ end
 function (f::ODEPhi{C, T, U})(t::AbstractVector,
         θ) where {C <: Lux.AbstractExplicitLayer, T, U <: Number}
     # Batch via data as row vectors
-    y, st = f.chain(
-        adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), t'), θ.depvar, f.st)
+    θ_ = ComponentArrays.getdata(θ.depvar)
+    eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_)
+    t_ = convert.(eltypeθ, adapt(typeθ, t'))
+    y, st = f.chain(t_, θ.depvar, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
     f.u0 .+ (t' .- f.t0) .* y
 end
 
 function (f::ODEPhi{C, T, U})(t::Number, θ) where {C <: Lux.AbstractExplicitLayer, T, U}
-    y, st = f.chain(
-        adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), [t]), θ.depvar, f.st)
+    θ_ = ComponentArrays.getdata(θ.depvar)
+    eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_)
+    t_ = convert.(eltypeθ, adapt(typeθ, [t]))
+    y, st = f.chain(t_, θ.depvar, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
     f.u0 .+ (t .- f.t0) .* y
 end
@@ -143,8 +149,10 @@ end
 function (f::ODEPhi{C, T, U})(t::AbstractVector,
         θ) where {C <: Lux.AbstractExplicitLayer, T, U}
     # Batch via data as row vectors
-    y, st = f.chain(
-        adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), t'), θ.depvar, f.st)
+    θ_ = ComponentArrays.getdata(θ.depvar)
+    eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_)
+    t_ = convert.(eltypeθ, adapt(typeθ, t'))
+    y, st = f.chain(t_, θ.depvar, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
     f.u0 .+ (t' .- f.t0) .* y
 end
diff --git a/src/pinn_types.jl b/src/pinn_types.jl
index 59480d8a6..713e67e73 100644
--- a/src/pinn_types.jl
+++ b/src/pinn_types.jl
@@ -505,13 +505,19 @@ mutable struct Phi{C, S}
 end
 
 function (f::Phi{<:Lux.AbstractExplicitLayer})(x::Number, θ)
-    y, st = f.f(adapt(parameterless_type(ComponentArrays.getdata(θ)), [x]), θ, f.st)
+    θ_ = ComponentArrays.getdata(θ)
+    eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_)
+    x_ = convert.(eltypeθ, adapt(typeθ, [x]))
+    y, st = f.f(x_, θ, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
     y
 end
 
 function (f::Phi{<:Lux.AbstractExplicitLayer})(x::AbstractArray, θ)
-    y, st = f.f(adapt(parameterless_type(ComponentArrays.getdata(θ)), x), θ, f.st)
+    θ_ = ComponentArrays.getdata(θ)
+    eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_)
+    x_ = convert.(eltypeθ, adapt(typeθ, x))
+    y, st = f.f(x_, θ, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
     y
 end
@@ -522,13 +528,14 @@ end
 
 # the method to calculate the derivative
 function numeric_derivative(phi, u, x, εs, order, θ)
-    _type = parameterless_type(ComponentArrays.getdata(θ))
+    θ_ = ComponentArrays.getdata(θ)
+    eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_)
 
     ε = εs[order]
     _epsilon = inv(first(ε[ε .!= zero(ε)]))
 
-    ε = adapt(_type, ε)
-    x = adapt(_type, x)
+    ε = convert.(eltypeθ, adapt(typeθ, ε))
+    x = convert.(eltypeθ, adapt(typeθ, x))
 
     # any(x->x!=εs[1],εs)
     # εs is the epsilon for each order, if they are all the same then we use a fancy formula
diff --git a/test/NNPDE_tests.jl b/test/NNPDE_tests.jl
index 7236ac041..155b300d8 100644
--- a/test/NNPDE_tests.jl
+++ b/test/NNPDE_tests.jl
@@ -110,17 +110,17 @@ end
         Lux.Chain(Lux.Dense(2, 12, Lux.tanh), Lux.Dense(12, 12, Lux.tanh),
             Lux.Dense(12, 1))]
 
-    grid_strategy = NeuralPDE.GridTraining(0.1)
-    quadrature_strategy = NeuralPDE.QuadratureTraining(quadrature_alg = CubatureJLh(),
+    grid_strategy = GridTraining(0.1)
+    quadrature_strategy = QuadratureTraining(quadrature_alg = CubatureJLh(),
         reltol = 1e-3, abstol = 1e-3,
         maxiters = 50, batch = 100)
 
-    discretization = NeuralPDE.PhysicsInformedNN(chain, grid_strategy)
+    discretization = PhysicsInformedNN(chain, grid_strategy)
 
     @named pde_system = PDESystem(eqs, bcs, domains, [x, y, z],
         [u(x, y, z), v(y, x), h(z), p(x, z)])
 
-    prob = NeuralPDE.discretize(pde_system, discretization)
+    prob = discretize(pde_system, discretization)
 
     callback = function (p, l)
         println("Current loss is: $l")
diff --git a/test/runtests.jl b/test/runtests.jl
index e6248eae6..a874e66e9 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,6 +1,14 @@
 using Pkg
 using SafeTestsets
 
+# Run Lux in "safe-mode" with aggressive error checking
+using Preferences
+
+# TODO: `warn` is for debugging purposes. Change to `error` for best performance.
+Preferences.set_preferences!("Lux", "eltype_mismatch_handling" => "warn")
+Preferences.set_preferences!("LuxLib", "instability_check" => "warn")
+Preferences.set_preferences!("LuxCore", "instability_check" => "warn")
+
 const GROUP = get(ENV, "GROUP", "All")
 
 const is_APPVEYOR = Sys.iswindows() && haskey(ENV, "APPVEYOR")