Skip to main content

class generator_impl

Declaration

class generator_impl { /* full declaration omitted */ };

Member Variables

instruction_graph* m_idag
const task_manager* m_tm
size_t m_num_nodes
node_id m_local_nid
system_info m_system
instruction_graph_generator::delegate* m_delegate
instruction_recorder* m_recorder
instruction_graph_generator::policy_set m_policy
instruction_id m_next_instruction_id = 0
message_id m_next_message_id = 0
instruction* m_last_horizon = nullptr
instruction* m_last_epoch = nullptr
std::unordered_set<instruction_id> m_execution_front
The set of all instructions that are not yet depended upon by other instructions. These are collected by collapse_execution_front_to() as part of horizon / epoch generation.
dense_map<memory_id, memory_state> m_memories
std::unordered_map<buffer_id, buffer_state> m_buffers
std::unordered_map<host_object_id, host_object_state> m_host_objects
std::unordered_map<collective_group_id, collective_group_state> m_collective_groups
std::vector<allocation_id> m_unreferenced_user_allocations
The instruction executor maintains a mapping of allocation_id -> USM pointer. For IDAG-managed memory, these entries are deleted after executing a `free_instruction`, but since user allocations are not deallocated by us, we notify the executor on each horizon or epoch via the `instruction_garbage` struct about entries that will no longer be used and can therefore be collected. We include user allocations for buffer fences immediately after emitting the fence, and buffer host-initialization user allocations after the buffer has been destroyed.
static const box<3> scalar_reduction_box = {zeros, ones}

Member Function Overview

Member Functions

void compile(const abstract_command& cmd)

Parameters

const abstract_command& cmd

generator_impl(
    const task_manager& tm,
    size_t num_nodes,
    node_id local_nid,
    const system_info& system,
    instruction_graph& idag,
    instruction_graph_generator::delegate* dlg,
    instruction_recorder* recorder,
    const instruction_graph_generator::policy_set&
        policy)

Parameters

const task_manager& tm
size_t num_nodes
node_id local_nid
const system_info& system
instruction_graph& idag
instruction_graph_generator::delegate* dlg
instruction_recorder* recorder
const instruction_graph_generator::policy_set& policy

void notify_buffer_created(
    buffer_id bid,
    const range<3>& range,
    size_t elem_size,
    size_t elem_align,
    allocation_id user_aid = null_allocation_id)

Parameters

buffer_id bid
const range<3>& range
size_t elem_size
size_t elem_align
allocation_id user_aid = null_allocation_id

void notify_buffer_debug_name_changed(
    buffer_id bid,
    const std::string& name)

Parameters

buffer_id bid
const std::string& name

void notify_buffer_destroyed(buffer_id bid)

Parameters

buffer_id bid

void notify_host_object_created(
    host_object_id hoid,
    bool owns_instance)

Parameters

host_object_id hoid
bool owns_instance

void notify_host_object_destroyed(
    host_object_id hoid)

Parameters

host_object_id hoid

void add_dependencies_on_last_concurrent_accesses(
    instruction* accessing_instruction,
    buffer_allocation_state& allocation,
    const region<3>& region,
    instruction_dependency_origin
        origin_for_read_write_front)

Parameters

instruction* accessing_instruction
buffer_allocation_state& allocation
const region<3>& region
instruction_dependency_origin origin_for_read_write_front

void add_dependencies_on_last_writers(
    instruction* accessing_instruction,
    buffer_allocation_state& allocation,
    const region<3>& region)

Parameters

instruction* accessing_instruction
buffer_allocation_state& allocation
const region<3>& region

void add_dependency(
    instruction* from,
    instruction* to,
    instruction_dependency_origin record_origin)

Description

Inserts a graph dependency and removes to form the execution front (if present). The record_origin is debug information.

Parameters

instruction* from
instruction* to
instruction_dependency_origin record_origin

void allocate_contiguously(
    batch& batch,
    buffer_id bid,
    memory_id mid,
    box_vector<3>&& required_contiguous_boxes)

Description

Ensure that all boxes in required_contiguous_boxes have a contiguous allocation on mid. Re-allocation of one buffer on one memory never interacts with other buffers or other memories backing the same buffer, this function can be called in any order of allocation requirements without generating additional dependencies.

Parameters

batch& batch
buffer_id bid
memory_id mid
box_vector<3>&& required_contiguous_boxes

void apply_epoch(instruction* epoch)

Description

Replace all tracked instructions that older than epoch with epoch.

Parameters

instruction* epoch

void collapse_execution_front_to(
    instruction* horizon_or_epoch)

Description

Add dependencies from horizon_or_epoch to all instructions in m_execution_front and clear the set.

Parameters

instruction* horizon_or_epoch

void commit_pending_region_receive_to_host_memory(
    batch& batch,
    buffer_id bid,
    const buffer_state::region_receive& receives,
    const std::vector<region<3>>&
        concurrent_reads)

Description

Insert one or more receive instructions in order to fulfil a pending receive, making the received data available in host_memory_id. This may entail receiving a region that is larger than the union of all regions read.

Parameters

batch& batch
buffer_id bid
const buffer_state::region_receive& receives
const std::vector<region<3>>& concurrent_reads

void compile_epoch_command(
    batch& batch,
    const epoch_command& ecmd)

Parameters

batch& batch
const epoch_command& ecmd

void compile_execution_command(
    batch& batch,
    const execution_command& ecmd)

Parameters

batch& batch
const execution_command& ecmd

void compile_fence_command(
    batch& batch,
    const fence_command& fcmd)

Parameters

batch& batch
const fence_command& fcmd

void compile_horizon_command(
    batch& batch,
    const horizon_command& hcmd)

Parameters

batch& batch
const horizon_command& hcmd

void compile_push_command(
    batch& batch,
    const push_command& pcmd)

Parameters

batch& batch
const push_command& pcmd

void compile_reduction_command(
    batch& batch,
    const reduction_command& rcmd)

Parameters

batch& batch
const reduction_command& rcmd

template <typename Instruction,
          typename... CtorParamsAndRecordWithFn>
Instruction* create(
    batch& batch,
    CtorParamsAndRecordWithFn&&... ctor_args_and_record_with)

Description

Create an instruction, insert it into the IDAG and the current execution front, and record it if a recorder is present. Invoke as create <instruction -type>(instruction-ctor-params..., [ & ](const auto record_debug_info) { return record_debug_info(instruction-record-additional-ctor-params)})

Template Parameters

Instruction
CtorParamsAndRecordWithFn

Parameters

batch& batch
CtorParamsAndRecordWithFn&&... ctor_args_and_record_with

template <typename Instruction,
          typename... CtorParamsAndRecordWithFn,
          size_t... CtorParamIndices,
          size_t RecordWithFnIndex>
Instruction* create_internal(
    batch& batch,
    const std::tuple<
        CtorParamsAndRecordWithFn...>&
        ctor_args_and_record_with,
    std::index_sequence<CtorParamIndices...>,
    std::index_sequence<RecordWithFnIndex>)

Template Parameters

Instruction
CtorParamsAndRecordWithFn
size_t CtorParamIndices
size_t RecordWithFnIndex

Parameters

batch& batch
const std::tuple<CtorParamsAndRecordWithFn...>& ctor_args_and_record_with
std::index_sequence<CtorParamIndices...>
std::index_sequence<RecordWithFnIndex>

message_id create_outbound_pilot(
    batch& batch,
    node_id target,
    const transfer_id& trid,
    const box<3>& box)

Parameters

batch& batch
node_id target
const transfer_id& trid
const box<3>& box

void create_task_collective_groups(
    batch& command_batch,
    const task& tsk)

Description

Issue instructions to create any collective group required by a task.

Parameters

batch& command_batch
const task& tsk

void defer_await_push_command(
    const await_push_command& apcmd)

Parameters

const await_push_command& apcmd

void establish_coherence_between_buffer_memories(
    batch& batch,
    buffer_id bid,
    memory_id dest_mid,
    const std::vector<region<3>>&
        concurrent_reads)

Description

Insert coherence copy instructions where necessary to make dest_mid coherent for all concurrent_reads. Requires the necessary allocations in dest_mid to already be present. We deliberately allow overlapping read-regions to avoid aggregated copies introducing synchronization points between otherwise independent instructions.

Parameters

batch& batch
buffer_id bid
memory_id dest_mid
const std::vector<region<3>>& concurrent_reads

void finish_task_local_reduction(
    batch& command_batch,
    const local_reduction& red,
    const reduction_info& rinfo,
    const execution_command& ecmd,
    const task& tsk,
    const std::vector<localized_chunk>&
        concurrent_chunks)

Description

Combine any partial reduction results computed by local chunks and write it to buffer host memory.

Parameters

batch& command_batch
const local_reduction& red
const reduction_info& rinfo
const execution_command& ecmd
const task& tsk
const std::vector<localized_chunk>& concurrent_chunks

void flush_batch(batch&& batch)

Description

Passes all instructions and outbound pilots that have been accumulated in batch to the delegate (if any). Called after compiling a command, creating or destroying a buffer or host object, and also in our constructor for the creation of the initial epoch.

Parameters

batch&& batch

bool is_recording() const

Description

True if a recorder is present and create() will call the record_with lambda passed as its last parameter.


instruction* launch_task_kernel(
    batch& command_batch,
    const execution_command& ecmd,
    const task& tsk,
    const localized_chunk& chunk)

Description

Launch a device kernel for each local chunk of a task, passing the relevant buffer allocations in place of accessors and reduction descriptors.

Parameters

batch& command_batch
const execution_command& ecmd
const task& tsk
const localized_chunk& chunk

allocation_id new_allocation_id(memory_id mid)

Parameters

memory_id mid

void perform_atomic_write_to_allocation(
    instruction* writing_instruction,
    buffer_allocation_state& allocation,
    const region<3>& region)

Description

Add dependencies to the last concurrent accesses of a region, and track the instruction as the new last (unique) writer.

Parameters

instruction* writing_instruction
buffer_allocation_state& allocation
const region<3>& region

void perform_concurrent_read_from_allocation(
    instruction* reading_instruction,
    buffer_allocation_state& allocation,
    const region<3>& region)

Description

Add dependencies to the last writer of a region, and track the instruction as the new last (concurrent) reader.

Parameters

instruction* reading_instruction
buffer_allocation_state& allocation
const region<3>& region

void perform_task_buffer_accesses(
    const task& tsk,
    const std::vector<localized_chunk>&
        concurrent_chunks,
    const std::vector<instruction*>&
        command_instructions)

Description

Add dependencies for all buffer accesses and reductions of a task, then update tracking structures accordingly.

Parameters

const task& tsk
const std::vector<localized_chunk>& concurrent_chunks
const std::vector<instruction*>& command_instructions

void perform_task_collective_operations(
    const task& tsk,
    const std::vector<localized_chunk>&
        concurrent_chunks,
    const std::vector<instruction*>&
        command_instructions)

Description

If a task is part of a collective group, serialize it with respect to the last host task in that group.

Parameters

const task& tsk
const std::vector<localized_chunk>& concurrent_chunks
const std::vector<instruction*>& command_instructions

void perform_task_side_effects(
    const task& tsk,
    const std::vector<localized_chunk>&
        concurrent_chunks,
    const std::vector<instruction*>&
        command_instructions)

Description

If a task has side effects, serialize it with respect to the last task that shares a host object.

Parameters

const task& tsk
const std::vector<localized_chunk>& concurrent_chunks
const std::vector<instruction*>& command_instructions

local_reduction prepare_task_local_reduction(
    batch& command_batch,
    const reduction_info& rinfo,
    const execution_command& ecmd,
    const task& tsk,
    size_t num_concurrent_chunks)

Description

Create a gather allocation and optionally save the current buffer value before creating partial reduction results in any kernel.

Parameters

batch& command_batch
const reduction_info& rinfo
const execution_command& ecmd
const task& tsk
size_t num_concurrent_chunks

std::string print_buffer_debug_label(
    buffer_id bid) const

Parameters

buffer_id bid

void report_task_overlapping_writes(
    const task& tsk,
    const std::vector<localized_chunk>&
        concurrent_chunks) const

Description

Detect overlapping writes between local chunks of a task and report it according to m_policy.

Parameters

const task& tsk
const std::vector<localized_chunk>& concurrent_chunks

void satisfy_task_buffer_requirements(
    batch& batch,
    buffer_id bid,
    const task& tsk,
    const subrange<3>& local_execution_range,
    bool is_reduction_initializer,
    const std::vector<localized_chunk>&
        concurrent_chunks_after_split)

Description

Allocate memory, apply any pending receives, and issue resize- and coherence copies to prepare all buffer memories for a task's execution.

Parameters

batch& batch
buffer_id bid
const task& tsk
const subrange<3>& local_execution_range
bool is_reduction_initializer
const std::vector<localized_chunk>& concurrent_chunks_after_split

std::vector<localized_chunk>
split_task_execution_range(
    const execution_command& ecmd,
    const task& tsk)

Description

Split a tasks local execution range (given by execution_command) into chunks according to device configuration and a possible oversubscription hint.

Parameters

const execution_command& ecmd
const task& tsk