Skip to content

Commit

Permalink
Add new scheduler darts (Data-Aware Reactive Task Scheduling)
Browse files Browse the repository at this point in the history
  • Loading branch information
nfurmento committed Sep 25, 2023
1 parent 977a22d commit 0481d90
Show file tree
Hide file tree
Showing 24 changed files with 11,438 additions and 2 deletions.
1 change: 1 addition & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ New features:
* Add starpu_data_register_victim_selector to let schedulers select eviction
victims.
* Add bus performance model for HIP driver.
* New scheduler darts (Data-Aware Reactive Task Scheduling)

Small features:
* Add FXT option -use-task-color to propagate the specified task
Expand Down
33 changes: 33 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -4160,6 +4160,39 @@ AC_SUBST(STARPU_LIB_PATH, $(eval echo ${prefix}/lib))
AC_SUBST(STARPU_MODULE_LIBS, "$module_libs")
AC_SUBST(STARPU_OPTION_LIBS, "$option_libs")

###############################################################################
# #
# DARTS settings #
# #
###############################################################################

AC_MSG_CHECKING(whether DARTS debug messages should be displayed)
AC_ARG_ENABLE(darts-verbose, [AS_HELP_STRING([--enable-darts-verbose],
[display DARTS verbose debug messages])],
enable_darts_verbose=$enableval, enable_darts_verbose=no)
AC_MSG_RESULT($enable_darts_verbose)
if test x$enable_darts_verbose = xyes; then
AC_DEFINE(STARPU_DARTS_VERBOSE, [1], [display DARTS verbose debug messages])
fi

AC_MSG_CHECKING(whether DARTS statistics should be enabled)
AC_ARG_ENABLE(darts-stats, [AS_HELP_STRING([--enable-darts-stats],
[enable DARTS statistics])],
enable_darts_stats=$enableval, enable_darts_stats=no)
AC_MSG_RESULT($enable_darts_stats)
if test x$enable_darts_stats = xyes; then
AC_DEFINE(STARPU_DARTS_STATS, [1], [enable DARTS statistics])
fi

AC_MSG_CHECKING(whether DARTS linear mutex should be used)
AC_ARG_ENABLE(darts-linear-mutex, [AS_HELP_STRING([--enable-darts-linear-mutex],
[enable DARTS linear mutex])],
enable_darts_linear_mutex=$enableval, enable_darts_linear_mutex=no)
AC_MSG_RESULT($enable_darts_linear_mutex)
if test x$enable_darts_linear_mutex = xyes; then
AC_DEFINE(STARPU_DARTS_LINEAR_MUTEX, [1], [enable DARTS linear mutex])
fi

###############################################################################
# #
# Final settings #
Expand Down
1 change: 1 addition & 0 deletions doc/doxygen/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ chapters = \
chapters/starpu_extensions/socl_opencl_extensions.doxy \
chapters/starpu_extensions/bubble.doxy \
chapters/starpu_extensions/parallel_worker.doxy \
chapters/starpu_extensions/darts.doxy \
chapters/starpu_extensions/interoperability.doxy \
chapters/starpu_extensions/scheduling_policy_definition.doxy \
chapters/starpu_extensions/simgrid.doxy \
Expand Down
140 changes: 140 additions & 0 deletions doc/doxygen/chapters/starpu_extensions/darts.doxy
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/* StarPU --- Runtime system for heterogeneous multicore architectures.
*
* Copyright (C) 2009-2023 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
*
* StarPU is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or (at
* your option) any later version.
*
* StarPU is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* See the GNU Lesser General Public License in COPYING.LGPL for more details.
*/

/*! \page DARTS Data-aware Scheduler and Visualization Tool

\section DARTS_Scheduling_Policy Overview

DARTS is a research scheduler designed to address memory constraints.
Study of results available as a conference paper: https://ieeexplore.ieee.org/abstract/document/9820704
Further study as a pre-publication: https://inria.hal.science/hal-04146714v1

\subsection darts_purpose Purpose

DARTS (for Data-Aware Reactive Task Scheduling) is a scheduling policy that aims to achieve good performance under memory constraints.
DARTS looks for the "best" data, that is, the data that has the smallest ratio of transfer time to computation made available without additional data load.
DARTS computes all tasks using this "best" data and the data already loaded into memory.
If no data allows at least one task to be computed without additional load, the highest priority task is scheduled next.
DARTS can be used with or without a memory constraint.

\subsection darts_features Features

DARTS has been tested on the outer product, GEMM, the Cholesky and LU factorizations.
These applications are typically used as follows:
\verbatim
./examples/cholesky/cholesky_implicit -size $((block_size*N)) -nblocks $((N)) -niter 1
./examples/mult/sgemm -xy $((block_size*N)) -nblocks $((N)) -iter 1
./examples/mult/sgemm -xyz $((block_size*N)) -nblocks $((N)) -nblocksz $((N)) -iter 1
./examples/lu/lu_implicit_example_float -size $((block_size*N)) -nblocks $((N)) -iter 1
\endverbatim
In theory, DARTS can be used for any task-based application.

\section darts_best_practices Best Practices

It is highly recommended to use only GPUs for best performance.
It is therefore recommended to set the variables \ref STARPU_NOPENCL and \ref STARPU_NCPU to 0.

If the application does not use dependencies (such as the outer product), use the following environment variables:
\verbatim
STARPU_DARTS_DEPENDANCES=0
STARPU_DARTS_PRIO=0
\endverbatim

For example, a set of parameters for DARTS that achieves the best performance is
\verbatim
STARPU_SCHED_READY=1 STARPU_SIMGRID_CUDA_MALLOC_COST=0 STARPU_EXPECTED_TRANSFER_TIME_WRITEBACK=0 STARPU_SCHED=darts STARPU_NTASKS_THRESHOLD=30 STARPU_CUDA_PIPELINE=5 STARPU_MINIMUM_CLEAN_BUFFERS=0 STARPU_TARGET_CLEAN_BUFFERS=0 STARPU_NCPU=0 STARPU_NCUDA=$((NGPU)) STARPU_NOPENCL=0 ${APPLICATION}
\endverbatim

\section DARTS_Building_Visualizations Building Visualizations

DARTS is also equipped with a visualization tool that allows to plot the processing order of the task set by a processing unit on a matrix multiplication or a Cholesky factorization.
The files that make up the visualization are located in the directory \c tools/darts/.
The visualizations only work for Gemm, the outer product, and the Cholesky factorization when using only GPUs.

\subsection darts_visu_Configuration Configuration

The configure options required are: <c>--enable-darts-stats --enable-darts-verbose<c>.

\subsection darts_visu_launch Launch Options

Add the following environment variables when launching the application:

<ul>
<li>
<c>PRINT_N=$((N))<c> where <c>N<c> is the side of the matrix used in the application.
</li>
<li>
<c>PRINT_IN_TERMINAL=1<c>.
</li>
<li>
<c>STARPU_SCHED_OUTPUT=path_to_output<c> to specify where the output file will be stored.
</li>
</ul>

If your target application is Cholesky, use <c>-niter 1</c>. If your target application is Gemm or the outer product, use <c>-iter 1</c>

An example of launch options is for the outer product:
\verbatim
STARPU_SCHED_OUTPUT=${OUTPUT_PATH} STARPU_SCHED=darts PRINT_IN_TERMINAL=1 PRINT_N=$((N)) STARPU_NTASKS_THRESHOLD=30 STARPU_CUDA_PIPELINE=5 STARPU_SIMGRID_CUDA_MALLOC_COST=0 STARPU_MINIMUM_CLEAN_BUFFERS=0 STARPU_TARGET_CLEAN_BUFFERS=0 STARPU_NCPU=0 STARPU_NCUDA=$((NGPU)) STARPU_NOPENCL=0 ./examples/mult/sgemm -xy $((block_size*N)) -nblocks $((N)) -iter 1
python3 ./tools/darts/visualization_darts.py ${N} darts ${NGPU} Matrice_ligne 1 0 ${block_size} ${OUTPUT_PATH}
\endverbatim

A full example of the command used to build the visualization is available in \c tools/darts/example_script_visualization_darts.sh.

The output visualization is stored in the current folder.

\section More_Scheduler More research schedulers about memory-aware scheduling

Other memory-constrained schedulers are also available for experimental purposes, note they only function with GPUs and on GEMM and the outer product.

<ul>
<li>
\c HFP for Hierarchical Fair Packing groups tasks that share data into
packages of maximum size the size of the processing units' memory.
It should be used with the following command line with one GPU:
\verbatim
STARPU_SCHED=HFP MULTIGPU=6 TASK_STEALING=3 STARPU_SCHED_READY=1 BELADY=1 ORDER_U=1 STARPU_MINIMUM_CLEAN_BUFFERS=0 STARPU_TARGET_CLEAN_BUFFERS=0 STARPU_NCPU=0 STARPU_NCUDA=$((NGPU)) STARPU_NOPENCL=0 ./examples/mult/sgemm -xy $((block_size*N)) -nblocks $((N))
\endverbatim
With multiple GPUs it should be used with:
</li>
<li>
\c cuthillmckee for Cuthill-McKee is an algorithm that transforms a
sparse matrix into a minimum band matrix. The algorithm is adapted to
task-based scheduling by considering vertices as tasks and edges as
data shares. It should be used as follows:
\verbatim
STARPU_SCHED=cuthillmckee REVERSE=1 STARPU_MINIMUM_CLEAN_BUFFERS=0 STARPU_TARGET_CLEAN_BUFFERS=0 STARPU_NCPU=0 STARPU_NCUDA=$((NGPU)) STARPU_NOPENCL=0 ./examples/mult/sgemm -xy $((block_size*N)) -nblocks $((N))
\endverbatim
</li>
<li>
\c MST for Maximum Spanning Tree (mst) follows Prim's algorithm to add
vertices to a spanning tree with maximum weights. Vertices are tasks
and weighted edges are the number of data shared between two tasks. It
should be used as follows:
\verbatim
STARPU_SCHED=mst STARPU_MINIMUM_CLEAN_BUFFERS=0 STARPU_TARGET_CLEAN_BUFFERS=0 STARPU_NCPU=0 STARPU_NCUDA=$((NGPU)) STARPU_NOPENCL=0 ./examples/mult/sgemm -xy $((block_size*N)) -nblocks $((N))
\endverbatim
</li>
<li>
\c random_order returns in a randomized order a set of tasks. It
should be used with the command line:
\verbatim
STARPU_SCHED=random_order STARPU_MINIMUM_CLEAN_BUFFERS=0 STARPU_TARGET_CLEAN_BUFFERS=0 STARPU_NCPU=0 STARPU_NCUDA=$((NGPU)) STARPU_NOPENCL=0 ./examples/mult/sgemm -xy $((block_size*N)) -nblocks $((N))
\endverbatim
</li>
</ul>

*/
1 change: 1 addition & 0 deletions doc/doxygen/doxygen-config.cfg.in
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_introduction/i
@top_srcdir@/doc/doxygen/chapters/starpu_extensions/socl_opencl_extensions.doxy \
@top_srcdir@/doc/doxygen/chapters/starpu_extensions/bubble.doxy \
@top_srcdir@/doc/doxygen/chapters/starpu_extensions/parallel_worker.doxy \
@top_srcdir@/doc/doxygen/chapters/starpu_extensions/darts.doxy \
@top_srcdir@/doc/doxygen/chapters/starpu_extensions/interoperability.doxy \
@top_srcdir@/doc/doxygen/chapters/starpu_extensions/simgrid.doxy \
@top_srcdir@/doc/doxygen/chapters/starpu_extensions/debugging_tools.doxy \
Expand Down
5 changes: 5 additions & 0 deletions doc/doxygen/refman.tex
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,11 @@ \chapter{Parallel Workers}
\hypertarget{ParallelWorker}{}
\input{ParallelWorker}

\chapter{Data-aware Scheduler and Visualization Tool}
\label{Darts}
\hypertarget{Darts}{}
\input{Darts}

\chapter{Interoperability Support}
\label{InteropSupport}
\hypertarget{InteropSupport}{}
Expand Down
1 change: 1 addition & 0 deletions doc/doxygen_web_extensions/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ chapters = \
../doxygen/chapters/starpu_extensions/socl_opencl_extensions.doxy \
../doxygen/chapters/starpu_extensions/bubble.doxy \
../doxygen/chapters/starpu_extensions/parallel_worker.doxy \
../doxygen/chapters/starpu_extensions/darts.doxy \
../doxygen/chapters/starpu_extensions/interoperability.doxy \
../doxygen/chapters/starpu_extensions/scheduling_policy_definition.doxy \
../doxygen/chapters/starpu_extensions/simgrid.doxy \
Expand Down
1 change: 1 addition & 0 deletions doc/doxygen_web_extensions/doxygen-config.cfg.in
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_extensions/ext
@top_srcdir@/doc/doxygen/chapters/starpu_extensions/socl_opencl_extensions.doxy \
@top_srcdir@/doc/doxygen/chapters/starpu_extensions/bubble.doxy \
@top_srcdir@/doc/doxygen/chapters/starpu_extensions/parallel_worker.doxy \
@top_srcdir@/doc/doxygen/chapters/starpu_extensions/darts.doxy \
@top_srcdir@/doc/doxygen/chapters/starpu_extensions/interoperability.doxy \
@top_srcdir@/doc/doxygen/chapters/starpu_extensions/simgrid.doxy \
@top_srcdir@/doc/doxygen/chapters/starpu_extensions/debugging_tools.doxy \
Expand Down
5 changes: 5 additions & 0 deletions doc/doxygen_web_extensions/refman.tex
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ \chapter{Parallel Workers}
\hypertarget{ParallelWorker}{}
\input{ParallelWorker}

\chapter{Data-aware Scheduler and Visualization Tool}
\label{Darts}
\hypertarget{Darts}{}
\input{Darts}

\chapter{Interoperability Support}
\label{InteropSupport}
\hypertarget{InteropSupport}{}
Expand Down
11 changes: 10 additions & 1 deletion src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,10 @@ noinst_HEADERS = \
util/starpu_task_insert_utils.h \
util/starpu_data_cpy.h \
sched_policies/prio_deque.h \
sched_policies/sched_component.h
sched_policies/sched_component.h \
sched_policies/darts.h \
sched_policies/HFP.h \
sched_policies/sched_visu.h

libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \
common/barrier.c \
Expand Down Expand Up @@ -314,6 +317,12 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \
sched_policies/modular_eager_prio.c \
sched_policies/modular_eager_prefetching.c \
sched_policies/modular_gemm.c \
sched_policies/random_order.c \
sched_policies/mst_policy.c \
sched_policies/HFP.c \
sched_policies/sched_visu.c \
sched_policies/darts.c \
sched_policies/cuthillmckee_policy.c \
sched_policies/modular_prio.c \
sched_policies/modular_prio_prefetching.c \
sched_policies/modular_random.c \
Expand Down
15 changes: 14 additions & 1 deletion src/core/sched_policy.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <common/barrier.h>
#include <core/debug.h>
#include <core/task.h>
#include <sched_policies/sched_visu.h>

#ifdef HAVE_DLOPEN
#include <dlfcn.h>
Expand All @@ -45,6 +46,7 @@ static const char *sched_lib = NULL;

void _starpu_sched_init(void)
{
_starpu_visu_init();
_starpu_task_break_on_push = starpu_getenv_number_default("STARPU_TASK_BREAK_ON_PUSH", -1);
_starpu_task_break_on_sched = starpu_getenv_number_default("STARPU_TASK_BREAK_ON_SCHED", -1);
_starpu_task_break_on_pop = starpu_getenv_number_default("STARPU_TASK_BREAK_ON_POP", -1);
Expand Down Expand Up @@ -101,6 +103,12 @@ static struct starpu_sched_policy *predefined_policies[] =

static struct starpu_sched_policy *predefined_policies_non_default[] =
{
&_starpu_sched_darts_policy,
&_starpu_sched_random_order_policy,
&_starpu_sched_HFP_policy,
&_starpu_sched_modular_heft_HFP_policy,
&_starpu_sched_mst_policy,
&_starpu_sched_cuthillmckee_policy,
NULL
};

Expand Down Expand Up @@ -1194,14 +1202,19 @@ struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker)
task->prologue_callback_pop_func(task->prologue_callback_pop_arg);
_starpu_set_current_task(NULL);
}


_sched_visu_pop_ready_task(task);

return task;
}

void _starpu_sched_pre_exec_hook(struct starpu_task *task)
{
unsigned sched_ctx_id = starpu_sched_ctx_get_ctx_for_task(task);
struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);

_sched_visu_get_current_tasks_for_visualization(task, sched_ctx_id);

if (sched_ctx->sched_policy && sched_ctx->sched_policy->pre_exec_hook)
{
_STARPU_SCHED_BEGIN;
Expand Down
6 changes: 6 additions & 0 deletions src/core/sched_policy.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ extern struct starpu_sched_policy _starpu_sched_modular_eager_policy;
extern struct starpu_sched_policy _starpu_sched_modular_eager_prefetching_policy;
extern struct starpu_sched_policy _starpu_sched_modular_eager_prio_policy;
extern struct starpu_sched_policy _starpu_sched_modular_gemm_policy;
extern struct starpu_sched_policy _starpu_sched_darts_policy;
extern struct starpu_sched_policy _starpu_sched_random_order_policy;
extern struct starpu_sched_policy _starpu_sched_HFP_policy;
extern struct starpu_sched_policy _starpu_sched_modular_heft_HFP_policy;
extern struct starpu_sched_policy _starpu_sched_mst_policy;
extern struct starpu_sched_policy _starpu_sched_cuthillmckee_policy;
extern struct starpu_sched_policy _starpu_sched_modular_prio_policy;
extern struct starpu_sched_policy _starpu_sched_modular_prio_prefetching_policy;
extern struct starpu_sched_policy _starpu_sched_modular_random_policy;
Expand Down
Loading

0 comments on commit 0481d90

Please sign in to comment.