class generator_impl

Declaration

class generator_impl { /* full declaration omitted */ };

Member Variables

instruction_graph* m_idag
const task_manager* m_tm
size_t m_num_nodes
node_id m_local_nid
system_info m_system
instruction_graph_generator::delegate* m_delegate
instruction_recorder* m_recorder
instruction_graph_generator::policy_set m_policy
instruction_id m_next_instruction_id = 0
message_id m_next_message_id = 0
instruction* m_last_horizon = nullptr
instruction* m_last_epoch = nullptr
std::unordered_set<instruction_id> m_execution_front: The set of all instructions that are not yet depended upon by other instructions. These are collected by collapse_execution_front_to() as part of horizon / epoch generation.
dense_map<memory_id, memory_state> m_memories
std::unordered_map<buffer_id, buffer_state> m_buffers
std::unordered_map<host_object_id, host_object_state> m_host_objects
std::unordered_map<collective_group_id, collective_group_state> m_collective_groups
std::vector<allocation_id> m_unreferenced_user_allocations: The instruction executor maintains a mapping of allocation_id -> USM pointer. For IDAG-managed memory, these entries are deleted after executing a `free_instruction`, but since user allocations are not deallocated by us, we notify the executor on each horizon or epoch via the `instruction_garbage` struct about entries that will no longer be used and can therefore be collected. We include user allocations for buffer fences immediately after emitting the fence, and buffer host-initialization user allocations after the buffer has been destroyed.
static const box<3> scalar_reduction_box = {zeros, ones}

Member Function Overview

compile(const abstract_command & cmd) → void
generator_impl(const task_manager & tm, size_t num_nodes, node_id local_nid, const system_info & system, instruction_graph & idag, instruction_graph_generator::delegate * dlg, instruction_recorder * recorder, const instruction_graph_generator::policy_set & policy)
notify_buffer_created(buffer_id bid, const range<3> & range, size_t elem_size, size_t elem_align, allocation_id user_aid = null_allocation_id) → void
notify_buffer_debug_name_changed(buffer_id bid, const std::string & name) → void
notify_buffer_destroyed(buffer_id bid) → void
notify_host_object_created(host_object_id hoid, bool owns_instance) → void
notify_host_object_destroyed(host_object_id hoid) → void
add_dependencies_on_last_concurrent_accesses(instruction * accessing_instruction, buffer_allocation_state & allocation, const region<3> & region, instruction_dependency_origin origin_for_read_write_front) → void
add_dependencies_on_last_writers(instruction * accessing_instruction, buffer_allocation_state & allocation, const region<3> & region) → void
add_dependency(instruction * from, instruction * to, instruction_dependency_origin record_origin) → void
allocate_contiguously(batch & batch, buffer_id bid, memory_id mid, box_vector<3> && required_contiguous_boxes) → void
apply_epoch(instruction * epoch) → void
collapse_execution_front_to(instruction * horizon_or_epoch) → void
commit_pending_region_receive_to_host_memory(batch & batch, buffer_id bid, const buffer_state::region_receive & receives, const std::vector<region<3>> & concurrent_reads) → void
compile_epoch_command(batch & batch, const epoch_command & ecmd) → void
compile_execution_command(batch & batch, const execution_command & ecmd) → void
compile_fence_command(batch & batch, const fence_command & fcmd) → void
compile_horizon_command(batch & batch, const horizon_command & hcmd) → void
compile_push_command(batch & batch, const push_command & pcmd) → void
compile_reduction_command(batch & batch, const reduction_command & rcmd) → void
template <typename Instruction, typename... CtorParamsAndRecordWithFn>
create(batch & batch, CtorParamsAndRecordWithFn &&... ctor_args_and_record_with) → Instruction *
template <typename Instruction, typename... CtorParamsAndRecordWithFn, size_t... CtorParamIndices, size_t RecordWithFnIndex>
create_internal(batch & batch, const std::tuple<CtorParamsAndRecordWithFn...> & ctor_args_and_record_with, std::index_sequence<CtorParamIndices...>, std::index_sequence<RecordWithFnIndex>) → Instruction *
create_outbound_pilot(batch & batch, node_id target, const transfer_id & trid, const box<3> & box) → message_id
create_task_collective_groups(batch & command_batch, const task & tsk) → void
defer_await_push_command(const await_push_command & apcmd) → void
establish_coherence_between_buffer_memories(batch & batch, buffer_id bid, memory_id dest_mid, const std::vector<region<3>> & concurrent_reads) → void
finish_task_local_reduction(batch & command_batch, const local_reduction & red, const reduction_info & rinfo, const execution_command & ecmd, const task & tsk, const std::vector<localized_chunk> & concurrent_chunks) → void
flush_batch(batch && batch) → void
is_recording() const → bool
launch_task_kernel(batch & command_batch, const execution_command & ecmd, const task & tsk, const localized_chunk & chunk) → instruction *
new_allocation_id(memory_id mid) → allocation_id
perform_atomic_write_to_allocation(instruction * writing_instruction, buffer_allocation_state & allocation, const region<3> & region) → void
perform_concurrent_read_from_allocation(instruction * reading_instruction, buffer_allocation_state & allocation, const region<3> & region) → void
perform_task_buffer_accesses(const task & tsk, const std::vector<localized_chunk> & concurrent_chunks, const std::vector<instruction *> & command_instructions) → void
perform_task_collective_operations(const task & tsk, const std::vector<localized_chunk> & concurrent_chunks, const std::vector<instruction *> & command_instructions) → void
perform_task_side_effects(const task & tsk, const std::vector<localized_chunk> & concurrent_chunks, const std::vector<instruction *> & command_instructions) → void
prepare_task_local_reduction(batch & command_batch, const reduction_info & rinfo, const execution_command & ecmd, const task & tsk, size_t num_concurrent_chunks) → local_reduction
print_buffer_debug_label(buffer_id bid) const → std::string
report_task_overlapping_writes(const task & tsk, const std::vector<localized_chunk> & concurrent_chunks) const → void
satisfy_task_buffer_requirements(batch & batch, buffer_id bid, const task & tsk, const subrange<3> & local_execution_range, bool is_reduction_initializer, const std::vector<localized_chunk> & concurrent_chunks_after_split) → void
split_task_execution_range(const execution_command & ecmd, const task & tsk) → std::vector<localized_chunk>

Member Functions

¶void compile(const abstract_command& cmd)

Parameters

const abstract_command& cmd

¶generator_impl(
    const task_manager& tm,
    size_t num_nodes,
    node_id local_nid,
    const system_info& system,
    instruction_graph& idag,
    instruction_graph_generator::delegate* dlg,
    instruction_recorder* recorder,
    const instruction_graph_generator::policy_set&
        policy)

Parameters

const task_manager& tm
size_t num_nodes
node_id local_nid
const system_info& system
instruction_graph& idag
instruction_graph_generator::delegate* dlg
instruction_recorder* recorder
const instruction_graph_generator::policy_set& policy

¶void notify_buffer_created(
    buffer_id bid,
    const range<3>& range,
    size_t elem_size,
    size_t elem_align,
    allocation_id user_aid = null_allocation_id)

Parameters

buffer_id bid
const range<3>& range
size_t elem_size
size_t elem_align
allocation_id user_aid = null_allocation_id

¶void notify_buffer_debug_name_changed(
    buffer_id bid,
    const std::string& name)

Parameters

buffer_id bid
const std::string& name

¶void notify_buffer_destroyed(buffer_id bid)

Parameters

buffer_id bid

¶void notify_host_object_created(
    host_object_id hoid,
    bool owns_instance)

Parameters

host_object_id hoid
bool owns_instance

¶void notify_host_object_destroyed(
    host_object_id hoid)

Parameters

host_object_id hoid

¶void add_dependencies_on_last_concurrent_accesses(
    instruction* accessing_instruction,
    buffer_allocation_state& allocation,
    const region<3>& region,
    instruction_dependency_origin
        origin_for_read_write_front)

Parameters

instruction* accessing_instruction
buffer_allocation_state& allocation
const region<3>& region
instruction_dependency_origin origin_for_read_write_front

¶void add_dependencies_on_last_writers(
    instruction* accessing_instruction,
    buffer_allocation_state& allocation,
    const region<3>& region)

Parameters

instruction* accessing_instruction
buffer_allocation_state& allocation
const region<3>& region

¶void add_dependency(
    instruction* from,
    instruction* to,
    instruction_dependency_origin record_origin)

Description

Inserts a graph dependency and removes to form the execution front (if present). The record_origin is debug information.

Parameters

instruction* from
instruction* to
instruction_dependency_origin record_origin

¶void allocate_contiguously(
    batch& batch,
    buffer_id bid,
    memory_id mid,
    box_vector<3>&& required_contiguous_boxes)

Description

Ensure that all boxes in required_contiguous_boxes have a contiguous allocation on mid. Re-allocation of one buffer on one memory never interacts with other buffers or other memories backing the same buffer, this function can be called in any order of allocation requirements without generating additional dependencies.

Parameters

batch& batch
buffer_id bid
memory_id mid
box_vector<3>&& required_contiguous_boxes

¶void apply_epoch(instruction* epoch)

Description

Replace all tracked instructions that older than epoch with epoch.

Parameters

instruction* epoch

¶void collapse_execution_front_to(
    instruction* horizon_or_epoch)

Description

Add dependencies from horizon_or_epoch to all instructions in m_execution_front and clear the set.

Parameters

instruction* horizon_or_epoch

¶void commit_pending_region_receive_to_host_memory(
    batch& batch,
    buffer_id bid,
    const buffer_state::region_receive& receives,
    const std::vector<region<3>>&
        concurrent_reads)

Description

Insert one or more receive instructions in order to fulfil a pending receive, making the received data available in host_memory_id. This may entail receiving a region that is larger than the union of all regions read.

Parameters

batch& batch
buffer_id bid
const buffer_state::region_receive& receives
const std::vector<region<3>>& concurrent_reads

¶void compile_epoch_command(
    batch& batch,
    const epoch_command& ecmd)

Parameters

batch& batch
const epoch_command& ecmd

¶void compile_execution_command(
    batch& batch,
    const execution_command& ecmd)

Parameters

batch& batch
const execution_command& ecmd

¶void compile_fence_command(
    batch& batch,
    const fence_command& fcmd)

Parameters

batch& batch
const fence_command& fcmd

¶void compile_horizon_command(
    batch& batch,
    const horizon_command& hcmd)

Parameters

batch& batch
const horizon_command& hcmd

¶void compile_push_command(
    batch& batch,
    const push_command& pcmd)

Parameters

batch& batch
const push_command& pcmd

¶void compile_reduction_command(
    batch& batch,
    const reduction_command& rcmd)

Parameters

batch& batch
const reduction_command& rcmd

¶template <typename Instruction,
          typename... CtorParamsAndRecordWithFn>
Instruction* create(
    batch& batch,
    CtorParamsAndRecordWithFn&&... ctor_args_and_record_with)

Description

Create an instruction, insert it into the IDAG and the current execution front, and record it if a recorder is present. Invoke as create <instruction -type>(instruction-ctor-params..., [ & ](const auto record_debug_info) { return record_debug_info(instruction-record-additional-ctor-params)})

Template Parameters

Instruction
CtorParamsAndRecordWithFn

Parameters

batch& batch
CtorParamsAndRecordWithFn&&... ctor_args_and_record_with

¶template <typename Instruction,
          typename... CtorParamsAndRecordWithFn,
          size_t... CtorParamIndices,
          size_t RecordWithFnIndex>
Instruction* create_internal(
    batch& batch,
    const std::tuple<
        CtorParamsAndRecordWithFn...>&
        ctor_args_and_record_with,
    std::index_sequence<CtorParamIndices...>,
    std::index_sequence<RecordWithFnIndex>)

Template Parameters

Instruction
CtorParamsAndRecordWithFn
size_t CtorParamIndices
size_t RecordWithFnIndex

Parameters

batch& batch
const std::tuple<CtorParamsAndRecordWithFn...>& ctor_args_and_record_with
std::index_sequence<CtorParamIndices...>
std::index_sequence<RecordWithFnIndex>

¶message_id create_outbound_pilot(
    batch& batch,
    node_id target,
    const transfer_id& trid,
    const box<3>& box)

Parameters

batch& batch
node_id target
const transfer_id& trid
const box<3>& box

¶void create_task_collective_groups(
    batch& command_batch,
    const task& tsk)

Description

Issue instructions to create any collective group required by a task.

Parameters

batch& command_batch
const task& tsk

¶void defer_await_push_command(
    const await_push_command& apcmd)

Parameters

const await_push_command& apcmd

¶void establish_coherence_between_buffer_memories(
    batch& batch,
    buffer_id bid,
    memory_id dest_mid,
    const std::vector<region<3>>&
        concurrent_reads)

Description

Insert coherence copy instructions where necessary to make dest_mid coherent for all concurrent_reads. Requires the necessary allocations in dest_mid to already be present. We deliberately allow overlapping read-regions to avoid aggregated copies introducing synchronization points between otherwise independent instructions.

Parameters

batch& batch
buffer_id bid
memory_id dest_mid
const std::vector<region<3>>& concurrent_reads

¶void finish_task_local_reduction(
    batch& command_batch,
    const local_reduction& red,
    const reduction_info& rinfo,
    const execution_command& ecmd,
    const task& tsk,
    const std::vector<localized_chunk>&
        concurrent_chunks)

Description

Combine any partial reduction results computed by local chunks and write it to buffer host memory.

Parameters

batch& command_batch
const local_reduction& red
const reduction_info& rinfo
const execution_command& ecmd
const task& tsk
const std::vector<localized_chunk>& concurrent_chunks

¶void flush_batch(batch&& batch)

Description

Passes all instructions and outbound pilots that have been accumulated in batch to the delegate (if any). Called after compiling a command, creating or destroying a buffer or host object, and also in our constructor for the creation of the initial epoch.

Parameters

batch&& batch

¶bool is_recording() const

Description

True if a recorder is present and create() will call the record_with lambda passed as its last parameter.

¶instruction* launch_task_kernel(
    batch& command_batch,
    const execution_command& ecmd,
    const task& tsk,
    const localized_chunk& chunk)

Description

Launch a device kernel for each local chunk of a task, passing the relevant buffer allocations in place of accessors and reduction descriptors.

Parameters

batch& command_batch
const execution_command& ecmd
const task& tsk
const localized_chunk& chunk

¶allocation_id new_allocation_id(memory_id mid)

Parameters

memory_id mid

¶void perform_atomic_write_to_allocation(
    instruction* writing_instruction,
    buffer_allocation_state& allocation,
    const region<3>& region)

Description

Add dependencies to the last concurrent accesses of a region, and track the instruction as the new last (unique) writer.

Parameters

instruction* writing_instruction
buffer_allocation_state& allocation
const region<3>& region

¶void perform_concurrent_read_from_allocation(
    instruction* reading_instruction,
    buffer_allocation_state& allocation,
    const region<3>& region)

Description

Add dependencies to the last writer of a region, and track the instruction as the new last (concurrent) reader.

Parameters

instruction* reading_instruction
buffer_allocation_state& allocation
const region<3>& region

¶void perform_task_buffer_accesses(
    const task& tsk,
    const std::vector<localized_chunk>&
        concurrent_chunks,
    const std::vector<instruction*>&
        command_instructions)

Description

Add dependencies for all buffer accesses and reductions of a task, then update tracking structures accordingly.

Parameters

const task& tsk
const std::vector<localized_chunk>& concurrent_chunks
const std::vector<instruction*>& command_instructions

¶void perform_task_collective_operations(
    const task& tsk,
    const std::vector<localized_chunk>&
        concurrent_chunks,
    const std::vector<instruction*>&
        command_instructions)

Description

If a task is part of a collective group, serialize it with respect to the last host task in that group.

Parameters

const task& tsk
const std::vector<localized_chunk>& concurrent_chunks
const std::vector<instruction*>& command_instructions

¶void perform_task_side_effects(
    const task& tsk,
    const std::vector<localized_chunk>&
        concurrent_chunks,
    const std::vector<instruction*>&
        command_instructions)

Description

If a task has side effects, serialize it with respect to the last task that shares a host object.

Parameters

const task& tsk
const std::vector<localized_chunk>& concurrent_chunks
const std::vector<instruction*>& command_instructions

¶local_reduction prepare_task_local_reduction(
    batch& command_batch,
    const reduction_info& rinfo,
    const execution_command& ecmd,
    const task& tsk,
    size_t num_concurrent_chunks)

Description

Create a gather allocation and optionally save the current buffer value before creating partial reduction results in any kernel.

Parameters

batch& command_batch
const reduction_info& rinfo
const execution_command& ecmd
const task& tsk
size_t num_concurrent_chunks

¶std::string print_buffer_debug_label(
    buffer_id bid) const

Parameters

buffer_id bid

¶void report_task_overlapping_writes(
    const task& tsk,
    const std::vector<localized_chunk>&
        concurrent_chunks) const

Description

Detect overlapping writes between local chunks of a task and report it according to m_policy.

Parameters

const task& tsk
const std::vector<localized_chunk>& concurrent_chunks

¶void satisfy_task_buffer_requirements(
    batch& batch,
    buffer_id bid,
    const task& tsk,
    const subrange<3>& local_execution_range,
    bool is_reduction_initializer,
    const std::vector<localized_chunk>&
        concurrent_chunks_after_split)

Description

Allocate memory, apply any pending receives, and issue resize- and coherence copies to prepare all buffer memories for a task's execution.

Parameters

batch& batch
buffer_id bid
const task& tsk
const subrange<3>& local_execution_range
bool is_reduction_initializer
const std::vector<localized_chunk>& concurrent_chunks_after_split

¶std::vector<localized_chunk>
split_task_execution_range(
    const execution_command& ecmd,
    const task& tsk)

Description

Split a tasks local execution range (given by execution_command) into chunks according to device configuration and a possible oversubscription hint.

Parameters

const execution_command& ecmd
const task& tsk

class generator_impl

Declaration

Member Variables

Member Function Overview

Member Functions

¶void compile(const abstract_command& cmd)

Parameters

¶generator_impl( const task_manager& tm, size_t num_nodes, node_id local_nid, const system_info& system, instruction_graph& idag, instruction_graph_generator::delegate* dlg, instruction_recorder* recorder, const instruction_graph_generator::policy_set& policy)

Parameters

¶void notify_buffer_created( buffer_id bid, const range<3>& range, size_t elem_size, size_t elem_align, allocation_id user_aid = null_allocation_id)

Parameters

¶void notify_buffer_debug_name_changed( buffer_id bid, const std::string& name)

Parameters

¶void notify_buffer_destroyed(buffer_id bid)

Parameters

¶void notify_host_object_created( host_object_id hoid, bool owns_instance)

Parameters

¶void notify_host_object_destroyed( host_object_id hoid)

Parameters

¶void add_dependencies_on_last_concurrent_accesses( instruction* accessing_instruction, buffer_allocation_state& allocation, const region<3>& region, instruction_dependency_origin origin_for_read_write_front)

Parameters

¶void add_dependencies_on_last_writers( instruction* accessing_instruction, buffer_allocation_state& allocation, const region<3>& region)

Parameters

¶void add_dependency( instruction* from, instruction* to, instruction_dependency_origin record_origin)

Description

Parameters

¶void allocate_contiguously( batch& batch, buffer_id bid, memory_id mid, box_vector<3>&& required_contiguous_boxes)

Description

Parameters

¶void apply_epoch(instruction* epoch)

Description

Parameters

¶void collapse_execution_front_to( instruction* horizon_or_epoch)

Description

Parameters

¶void commit_pending_region_receive_to_host_memory( batch& batch, buffer_id bid, const buffer_state::region_receive& receives, const std::vector<region<3>>& concurrent_reads)

Description

Parameters

¶void compile_epoch_command( batch& batch, const epoch_command& ecmd)

Parameters

¶void compile_execution_command( batch& batch, const execution_command& ecmd)

Parameters

¶void compile_fence_command( batch& batch, const fence_command& fcmd)

Parameters

¶void compile_horizon_command( batch& batch, const horizon_command& hcmd)

Parameters

¶void compile_push_command( batch& batch, const push_command& pcmd)

Parameters

¶void compile_reduction_command( batch& batch, const reduction_command& rcmd)

Parameters

¶template <typename Instruction, typename... CtorParamsAndRecordWithFn> Instruction* create( batch& batch, CtorParamsAndRecordWithFn&&... ctor_args_and_record_with)

Description

Template Parameters

Parameters

Template Parameters

Parameters

¶message_id create_outbound_pilot( batch& batch, node_id target, const transfer_id& trid, const box<3>& box)

Parameters

¶void create_task_collective_groups( batch& command_batch, const task& tsk)

Description

Parameters

¶void defer_await_push_command( const await_push_command& apcmd)

Parameters

¶void establish_coherence_between_buffer_memories( batch& batch, buffer_id bid, memory_id dest_mid, const std::vector<region<3>>& concurrent_reads)

Description

Parameters

¶void finish_task_local_reduction( batch& command_batch, const local_reduction& red, const reduction_info& rinfo, const execution_command& ecmd, const task& tsk, const std::vector<localized_chunk>& concurrent_chunks)

Description

Parameters

¶void flush_batch(batch&& batch)

Description

Parameters

¶bool is_recording() const

Description

¶instruction* launch_task_kernel( batch& command_batch, const execution_command& ecmd, const task& tsk, const localized_chunk& chunk)

Description

Parameters

¶allocation_id new_allocation_id(memory_id mid)

Parameters

¶void perform_atomic_write_to_allocation( instruction* writing_instruction, buffer_allocation_state& allocation, const region<3>& region)

¶`void compile(const abstract_command& cmd)`

¶`generator_impl( const task_manager& tm, size_t num_nodes, node_id local_nid, const system_info& system, instruction_graph& idag, instruction_graph_generator::delegate* dlg, instruction_recorder* recorder, const instruction_graph_generator::policy_set& policy)`

¶`void notify_buffer_created( buffer_id bid, const range<3>& range, size_t elem_size, size_t elem_align, allocation_id user_aid = null_allocation_id)`

¶`void notify_buffer_debug_name_changed( buffer_id bid, const std::string& name)`

¶`void notify_buffer_destroyed(buffer_id bid)`

¶`void notify_host_object_created( host_object_id hoid, bool owns_instance)`

¶`void notify_host_object_destroyed( host_object_id hoid)`

¶`void add_dependencies_on_last_concurrent_accesses( instruction* accessing_instruction, buffer_allocation_state& allocation, const region<3>& region, instruction_dependency_origin origin_for_read_write_front)`

¶`void add_dependencies_on_last_writers( instruction* accessing_instruction, buffer_allocation_state& allocation, const region<3>& region)`

¶`void add_dependency( instruction* from, instruction* to, instruction_dependency_origin record_origin)`

¶`void allocate_contiguously( batch& batch, buffer_id bid, memory_id mid, box_vector<3>&& required_contiguous_boxes)`

¶`void apply_epoch(instruction* epoch)`

¶`void collapse_execution_front_to( instruction* horizon_or_epoch)`

¶`void commit_pending_region_receive_to_host_memory( batch& batch, buffer_id bid, const buffer_state::region_receive& receives, const std::vector<region<3>>& concurrent_reads)`

¶`void compile_epoch_command( batch& batch, const epoch_command& ecmd)`

¶`void compile_execution_command( batch& batch, const execution_command& ecmd)`

¶`void compile_fence_command( batch& batch, const fence_command& fcmd)`

¶`void compile_horizon_command( batch& batch, const horizon_command& hcmd)`

¶`void compile_push_command( batch& batch, const push_command& pcmd)`

¶`void compile_reduction_command( batch& batch, const reduction_command& rcmd)`

¶`template <typename Instruction, typename... CtorParamsAndRecordWithFn> Instruction* create( batch& batch, CtorParamsAndRecordWithFn&&... ctor_args_and_record_with)`

¶`message_id create_outbound_pilot( batch& batch, node_id target, const transfer_id& trid, const box<3>& box)`

¶`void create_task_collective_groups( batch& command_batch, const task& tsk)`

¶`void defer_await_push_command( const await_push_command& apcmd)`

¶`void establish_coherence_between_buffer_memories( batch& batch, buffer_id bid, memory_id dest_mid, const std::vector<region<3>>& concurrent_reads)`

¶`void finish_task_local_reduction( batch& command_batch, const local_reduction& red, const reduction_info& rinfo, const execution_command& ecmd, const task& tsk, const std::vector<localized_chunk>& concurrent_chunks)`

¶`void flush_batch(batch&& batch)`

¶`bool is_recording() const`

¶`instruction* launch_task_kernel( batch& command_batch, const execution_command& ecmd, const task& tsk, const localized_chunk& chunk)`

¶`allocation_id new_allocation_id(memory_id mid)`

¶`void perform_atomic_write_to_allocation( instruction* writing_instruction, buffer_allocation_state& allocation, const region<3>& region)`

¶`void perform_concurrent_read_from_allocation( instruction* reading_instruction, buffer_allocation_state& allocation, const region<3>& region)`

¶`void perform_task_buffer_accesses( const task& tsk, const std::vector<localized_chunk>& concurrent_chunks, const std::vector<instruction*>& command_instructions)`

¶`void perform_task_collective_operations( const task& tsk, const std::vector<localized_chunk>& concurrent_chunks, const std::vector<instruction*>& command_instructions)`

¶`void perform_task_side_effects( const task& tsk, const std::vector<localized_chunk>& concurrent_chunks, const std::vector<instruction*>& command_instructions)`

¶`local_reduction prepare_task_local_reduction( batch& command_batch, const reduction_info& rinfo, const execution_command& ecmd, const task& tsk, size_t num_concurrent_chunks)`

¶`std::string print_buffer_debug_label( buffer_id bid) const`

¶`void report_task_overlapping_writes( const task& tsk, const std::vector<localized_chunk>& concurrent_chunks) const`

¶`void satisfy_task_buffer_requirements( batch& batch, buffer_id bid, const task& tsk, const subrange<3>& local_execution_range, bool is_reduction_initializer, const std::vector<localized_chunk>& concurrent_chunks_after_split)`

¶`std::vector<localized_chunk> split_task_execution_range( const execution_command& ecmd, const task& tsk)`