Skip to main content

class generator_impl

Declaration

class generator_impl { /* full declaration omitted */ };

Declared at: src/instruction_graph_generator.cc:587

Member Variables

instruction_graph* m_idag
size_t m_num_nodes
node_id m_local_nid
system_info m_system
instruction_graph_generator::delegate* m_delegate
instruction_recorder* m_recorder
instruction_graph_generator::policy_set m_policy
instruction_id m_next_instruction_id = 0
message_id m_next_message_id = 0
instruction* m_last_horizon = nullptr
instruction* m_last_epoch = nullptr
std::unordered_set<instruction_id> m_execution_front
The set of all instructions that are not yet depended upon by other instructions. These are collected by collapse_execution_front_to() as part of horizon / epoch generation.
dense_map<memory_id, memory_state> m_memories
std::unordered_map<buffer_id, buffer_state> m_buffers
std::unordered_map<host_object_id, host_object_state> m_host_objects
std::unordered_map<collective_group_id, collective_group_state> m_collective_groups
std::vector<allocation_id> m_unreferenced_user_allocations
The instruction executor maintains a mapping of allocation_id -> USM pointer. For IDAG-managed memory, these entries are deleted after executing a `free_instruction`, but since user allocations are not deallocated by us, we notify the executor on each horizon or epoch via the `instruction_garbage` struct about entries that will no longer be used and can therefore be collected. We include user allocations for buffer fences immediately after emitting the fence, and buffer host-initialization user allocations after the buffer has been destroyed.
static const box<3> scalar_reduction_box = {zeros, ones}

Member Function Overview

Member Functions

instruction_graph_generator::scheduling_hint
anticipate(const command& cmd)

Declared at: src/instruction_graph_generator.cc:597

Parameters

const command& cmd

void compile(const command& cmd)

Declared at: src/instruction_graph_generator.cc:598

Parameters

const command& cmd

generator_impl(
    size_t num_nodes,
    node_id local_nid,
    const system_info& system,
    instruction_graph& idag,
    instruction_graph_generator::delegate* dlg,
    instruction_recorder* recorder,
    const instruction_graph_generator::policy_set&
        policy)

Declared at: src/instruction_graph_generator.cc:589

Parameters

size_t num_nodes
node_id local_nid
const system_info& system
instruction_graph& idag
instruction_graph_generator::delegate* dlg
instruction_recorder* recorder
const instruction_graph_generator::policy_set& policy

void notify_buffer_created(
    buffer_id bid,
    const range<3>& range,
    size_t elem_size,
    size_t elem_align,
    allocation_id user_aid = null_allocation_id)

Declared at: src/instruction_graph_generator.cc:592

Parameters

buffer_id bid
const range<3>& range
size_t elem_size
size_t elem_align
allocation_id user_aid = null_allocation_id

void notify_buffer_debug_name_changed(
    buffer_id bid,
    const std::string& name)

Declared at: src/instruction_graph_generator.cc:593

Parameters

buffer_id bid
const std::string& name

void notify_buffer_destroyed(buffer_id bid)

Declared at: src/instruction_graph_generator.cc:594

Parameters

buffer_id bid

void notify_host_object_created(
    host_object_id hoid,
    bool owns_instance)

Declared at: src/instruction_graph_generator.cc:595

Parameters

host_object_id hoid
bool owns_instance

void notify_host_object_destroyed(
    host_object_id hoid)

Declared at: src/instruction_graph_generator.cc:596

Parameters

host_object_id hoid

staging_allocation& acquire_staging_allocation(
    batch& current_batch,
    memory_id mid,
    size_t size_bytes,
    size_t align_bytes)

Description

Create a new host allocation for copy staging, or re-use a cached staging allocation whose last access is older than the current epoch.

Declared at: src/instruction_graph_generator.cc:678

Parameters

batch& current_batch
memory_id mid
size_t size_bytes
size_t align_bytes

void add_dependencies_on_access_front(
    instruction* accessing_instruction,
    const access_front& front,
    instruction_dependency_origin
        origin_for_read_write_front)

Declared at: src/instruction_graph_generator.cc:657

Parameters

instruction* accessing_instruction
const access_front& front
instruction_dependency_origin origin_for_read_write_front

void add_dependencies_on_last_concurrent_accesses(
    instruction* accessing_instruction,
    buffer_allocation_state& allocation,
    const region<3>& region,
    instruction_dependency_origin
        origin_for_read_write_front)

Declared at: src/instruction_graph_generator.cc:665

Parameters

instruction* accessing_instruction
buffer_allocation_state& allocation
const region<3>& region
instruction_dependency_origin origin_for_read_write_front

void add_dependencies_on_last_writers(
    instruction* accessing_instruction,
    buffer_allocation_state& allocation,
    const region<3>& region)

Declared at: src/instruction_graph_generator.cc:660

Parameters

instruction* accessing_instruction
buffer_allocation_state& allocation
const region<3>& region

void add_dependency(
    instruction* from,
    instruction* to,
    instruction_dependency_origin record_origin)

Description

Inserts a graph dependency and removes to form the execution front (if present). The record_origin is debug information.

Declared at: src/instruction_graph_generator.cc:655

Parameters

instruction* from
instruction* to
instruction_dependency_origin record_origin

void allocate_contiguously(
    batch& batch,
    buffer_id bid,
    memory_id mid,
    box_vector<3>&& required_contiguous_boxes)

Description

Ensure that all boxes in required_contiguous_boxes have a contiguous allocation on mid. Re-allocation of one buffer on one memory never interacts with other buffers or other memories backing the same buffer, this function can be called in any order of allocation requirements without generating additional dependencies.

Declared at: src/instruction_graph_generator.cc:686

Parameters

batch& batch
buffer_id bid
memory_id mid
box_vector<3>&& required_contiguous_boxes

void apply_epoch(instruction* epoch)

Description

Replace all tracked instructions that older than epoch with epoch.

Declared at: src/instruction_graph_generator.cc:672

Parameters

instruction* epoch

void collapse_execution_front_to(
    instruction* horizon_or_epoch)

Description

Add dependencies from horizon_or_epoch to all instructions in m_execution_front and clear the set.

Declared at: src/instruction_graph_generator.cc:675

Parameters

instruction* horizon_or_epoch

void commit_pending_region_receive_to_host_memory(
    batch& batch,
    buffer_id bid,
    const buffer_state::region_receive& receives,
    const std::vector<region<3>>&
        concurrent_reads)

Description

Insert one or more receive instructions in order to fulfil a pending receive, making the received data available in host_memory_id. This may entail receiving a region that is larger than the union of all regions read.

Declared at: src/instruction_graph_generator.cc:690

Parameters

batch& batch
buffer_id bid
const buffer_state::region_receive& receives
const std::vector<region<3>>& concurrent_reads

void compile_epoch_command(
    batch& batch,
    const epoch_command& ecmd)

Declared at: src/instruction_graph_generator.cc:741

Parameters

batch& batch
const epoch_command& ecmd

void compile_execution_command(
    batch& batch,
    const execution_command& ecmd)

Declared at: src/instruction_graph_generator.cc:735

Parameters

batch& batch
const execution_command& ecmd

void compile_fence_command(
    batch& batch,
    const fence_command& fcmd)

Declared at: src/instruction_graph_generator.cc:739

Parameters

batch& batch
const fence_command& fcmd

void compile_horizon_command(
    batch& batch,
    const horizon_command& hcmd)

Declared at: src/instruction_graph_generator.cc:740

Parameters

batch& batch
const horizon_command& hcmd

void compile_push_command(
    batch& batch,
    const push_command& pcmd)

Declared at: src/instruction_graph_generator.cc:736

Parameters

batch& batch
const push_command& pcmd

void compile_reduction_command(
    batch& batch,
    const reduction_command& rcmd)

Declared at: src/instruction_graph_generator.cc:738

Parameters

batch& batch
const reduction_command& rcmd

template <typename Instruction,
          typename... CtorParamsAndRecordWithFn>
Instruction* create(
    batch& batch,
    CtorParamsAndRecordWithFn&&... ctor_args_and_record_with)

Description

Create an instruction, insert it into the IDAG and the current execution front, and record it if a recorder is present. Invoke as create <instruction -type>(instruction-ctor-params..., [ & ](const auto record_debug_info) { return record_debug_info(instruction-record-additional-ctor-params)})

Declared at: src/instruction_graph_generator.cc:650

Template Parameters

Instruction
CtorParamsAndRecordWithFn

Parameters

batch& batch
CtorParamsAndRecordWithFn&&... ctor_args_and_record_with

template <typename Instruction,
          typename... CtorParamsAndRecordWithFn,
          size_t... CtorParamIndices,
          size_t RecordWithFnIndex>
Instruction* create_internal(
    batch& batch,
    const std::tuple<
        CtorParamsAndRecordWithFn...>&
        ctor_args_and_record_with,
    std::index_sequence<CtorParamIndices...>,
    std::index_sequence<RecordWithFnIndex>)

Declared at: src/instruction_graph_generator.cc:639

Template Parameters

Instruction
CtorParamsAndRecordWithFn
size_t CtorParamIndices
size_t RecordWithFnIndex

Parameters

batch& batch
const std::tuple<CtorParamsAndRecordWithFn...>& ctor_args_and_record_with
std::index_sequence<CtorParamIndices...>
std::index_sequence<RecordWithFnIndex>

message_id create_outbound_pilot(
    batch& batch,
    node_id target,
    const transfer_id& trid,
    const box<3>& box)

Declared at: src/instruction_graph_generator.cc:652

Parameters

batch& batch
node_id target
const transfer_id& trid
const box<3>& box

void create_task_collective_groups(
    batch& command_batch,
    const task& tsk)

Description

Issue instructions to create any collective group required by a task.

Declared at: src/instruction_graph_generator.cc:700

Parameters

batch& command_batch
const task& tsk

void defer_await_push_command(
    const await_push_command& apcmd)

Declared at: src/instruction_graph_generator.cc:737

Parameters

const await_push_command& apcmd

void establish_coherence_between_buffer_memories(
    batch& current_batch,
    buffer_id bid,
    dense_map<memory_id, std::vector<region<3>>>&
        concurrent_reads_from_memory)

Description

Insert coherence copy instructions where necessary to make all specified regions coherent on their respective memories. Requires the necessary allocations in dest_mid to already be present. We deliberately allow overlapping read-regions to avoid aggregated copies introducing synchronization points between otherwise independent instructions.

Declared at: src/instruction_graph_generator.cc:696

Parameters

batch& current_batch
buffer_id bid
dense_map<memory_id, std::vector<region<3>>>& concurrent_reads_from_memory

void finish_task_local_reduction(
    batch& command_batch,
    const local_reduction& red,
    const reduction_info& rinfo,
    const execution_command& ecmd,
    const task& tsk,
    const std::vector<localized_chunk>&
        concurrent_chunks)

Description

Combine any partial reduction results computed by local chunks and write it to buffer host memory.

Declared at: src/instruction_graph_generator.cc:717

Parameters

batch& command_batch
const local_reduction& red
const reduction_info& rinfo
const execution_command& ecmd
const task& tsk
const std::vector<localized_chunk>& concurrent_chunks

void flush_batch(batch&& batch)

Description

Passes all instructions and outbound pilots that have been accumulated in batch to the delegate (if any). Called after compiling a command, creating or destroying a buffer or host object, and also in our constructor for the creation of the initial epoch.

Declared at: src/instruction_graph_generator.cc:745

Parameters

batch&& batch

void free_all_staging_allocations(
    batch& current_batch)

Description

Free all cached staging allocations allocated so far.

Declared at: src/instruction_graph_generator.cc:681

Parameters

batch& current_batch

bool is_recording() const

Description

True if a recorder is present and create() will call the record_with lambda passed as its last parameter.

Declared at: src/instruction_graph_generator.cc:634


instruction* launch_task_kernel(
    batch& command_batch,
    const execution_command& ecmd,
    const task& tsk,
    const localized_chunk& chunk)

Description

Launch a device kernel for each local chunk of a task, passing the relevant buffer allocations in place of accessors and reduction descriptors.

Declared at: src/instruction_graph_generator.cc:721

Parameters

batch& command_batch
const execution_command& ecmd
const task& tsk
const localized_chunk& chunk

allocation_id new_allocation_id(memory_id mid)

Declared at: src/instruction_graph_generator.cc:636

Parameters

memory_id mid

void perform_atomic_write_to_allocation(
    instruction* writing_instruction,
    buffer_allocation_state& allocation,
    const region<3>& region)

Description

Add dependencies to the last concurrent accesses of a region, and track the instruction as the new last (unique) writer.

Declared at: src/instruction_graph_generator.cc:669

Parameters

instruction* writing_instruction
buffer_allocation_state& allocation
const region<3>& region

void perform_concurrent_read_from_allocation(
    instruction* reading_instruction,
    buffer_allocation_state& allocation,
    const region<3>& region)

Description

Add dependencies to the last writer of a region, and track the instruction as the new last (concurrent) reader.

Declared at: src/instruction_graph_generator.cc:663

Parameters

instruction* reading_instruction
buffer_allocation_state& allocation
const region<3>& region

void perform_task_buffer_accesses(
    buffer_id bid,
    const execution_command& ecmd,
    const task& tsk,
    const std::vector<localized_chunk>&
        concurrent_chunks,
    const std::vector<instruction*>&
        command_instructions)

Description

Add dependencies for all buffer accesses and reductions of a task, then update tracking structures accordingly.

Declared at: src/instruction_graph_generator.cc:724

Parameters

buffer_id bid
const execution_command& ecmd
const task& tsk
const std::vector<localized_chunk>& concurrent_chunks
const std::vector<instruction*>& command_instructions

void perform_task_collective_operations(
    const task& tsk,
    const std::vector<localized_chunk>&
        concurrent_chunks,
    const std::vector<instruction*>&
        command_instructions)

Description

If a task is part of a collective group, serialize it with respect to the last host task in that group.

Declared at: src/instruction_graph_generator.cc:732

Parameters

const task& tsk
const std::vector<localized_chunk>& concurrent_chunks
const std::vector<instruction*>& command_instructions

void perform_task_side_effects(
    const task& tsk,
    const std::vector<localized_chunk>&
        concurrent_chunks,
    const std::vector<instruction*>&
        command_instructions)

Description

If a task has side effects, serialize it with respect to the last task that shares a host object.

Declared at: src/instruction_graph_generator.cc:728

Parameters

const task& tsk
const std::vector<localized_chunk>& concurrent_chunks
const std::vector<instruction*>& command_instructions

local_reduction prepare_task_local_reduction(
    batch& command_batch,
    const reduction_info& rinfo,
    const execution_command& ecmd,
    const task& tsk,
    size_t num_concurrent_chunks)

Description

Create a gather allocation and optionally save the current buffer value before creating partial reduction results in any kernel.

Declared at: src/instruction_graph_generator.cc:713

Parameters

batch& command_batch
const reduction_info& rinfo
const execution_command& ecmd
const task& tsk
size_t num_concurrent_chunks

std::string print_buffer_debug_label(
    buffer_id bid) const

Declared at: src/instruction_graph_generator.cc:747

Parameters

buffer_id bid

void report_task_overlapping_writes(
    const task& tsk,
    const std::vector<localized_chunk>&
        concurrent_chunks) const

Description

Detect overlapping writes between local chunks of a task and report it according to m_policy.

Declared at: src/instruction_graph_generator.cc:706

Parameters

const task& tsk
const std::vector<localized_chunk>& concurrent_chunks

void satisfy_task_buffer_requirements(
    batch& batch,
    buffer_id bid,
    const task& tsk,
    const subrange<3>& local_execution_range,
    bool is_reduction_initializer,
    const std::vector<localized_chunk>&
        concurrent_chunks_after_split)

Description

Allocate memory, apply any pending receives, and issue resize- and coherence copies to prepare all buffer memories for a task's execution.

Declared at: src/instruction_graph_generator.cc:709

Parameters

batch& batch
buffer_id bid
const task& tsk
const subrange<3>& local_execution_range
bool is_reduction_initializer
const std::vector<localized_chunk>& concurrent_chunks_after_split

std::vector<localized_chunk>
split_task_execution_range(
    const execution_command& ecmd,
    const task& tsk)

Description

Split a tasks local execution range (given by execution_command) into chunks according to device configuration and a possible oversubscription hint.

Declared at: src/instruction_graph_generator.cc:703

Parameters

const execution_command& ecmd
const task& tsk