class generator_impl
Declaration
class generator_impl { /* full declaration omitted */ };
Declared at: src/instruction_graph_generator.cc:587
Member Variables
- instruction_graph* m_idag
- size_t m_num_nodes
- node_id m_local_nid
- system_info m_system
- instruction_graph_generator::delegate* m_delegate
- instruction_recorder* m_recorder
- instruction_graph_generator::policy_set m_policy
- instruction_id m_next_instruction_id = 0
- message_id m_next_message_id = 0
- instruction* m_last_horizon = nullptr
- instruction* m_last_epoch = nullptr
- std::unordered_set<instruction_id> m_execution_front
- The set of all instructions that are not yet depended upon by other instructions. These are collected by collapse_execution_front_to() as part of horizon / epoch generation.
- dense_map<memory_id, memory_state> m_memories
- std::unordered_map<buffer_id, buffer_state> m_buffers
- std::unordered_map<host_object_id, host_object_state> m_host_objects
- std::unordered_map<collective_group_id, collective_group_state> m_collective_groups
- std::vector<allocation_id> m_unreferenced_user_allocations
- The instruction executor maintains a mapping of allocation_id -> USM pointer. For IDAG-managed memory, these entries are deleted after executing a `free_instruction`, but since user allocations are not deallocated by us, we notify the executor on each horizon or epoch via the `instruction_garbage` struct about entries that will no longer be used and can therefore be collected. We include user allocations for buffer fences immediately after emitting the fence, and buffer host-initialization user allocations after the buffer has been destroyed.
- static const box<3> scalar_reduction_box = {zeros, ones}
Member Function Overview
- anticipate(const command & cmd) → instruction_graph_generator::scheduling_hint
- compile(const command & cmd) → void
- generator_impl(size_t num_nodes, node_id local_nid, const system_info & system, instruction_graph & idag, instruction_graph_generator::delegate * dlg, instruction_recorder * recorder, const instruction_graph_generator::policy_set & policy)
- notify_buffer_created(buffer_id bid, const range<3> & range, size_t elem_size, size_t elem_align, allocation_id user_aid = null_allocation_id) → void
- notify_buffer_debug_name_changed(buffer_id bid, const std::string & name) → void
- notify_buffer_destroyed(buffer_id bid) → void
- notify_host_object_created(host_object_id hoid, bool owns_instance) → void
- notify_host_object_destroyed(host_object_id hoid) → void
- acquire_staging_allocation(batch & current_batch, memory_id mid, size_t size_bytes, size_t align_bytes) → staging_allocation &
- add_dependencies_on_access_front(instruction * accessing_instruction, const access_front & front, instruction_dependency_origin origin_for_read_write_front) → void
- add_dependencies_on_last_concurrent_accesses(instruction * accessing_instruction, buffer_allocation_state & allocation, const region<3> & region, instruction_dependency_origin origin_for_read_write_front) → void
- add_dependencies_on_last_writers(instruction * accessing_instruction, buffer_allocation_state & allocation, const region<3> & region) → void
- add_dependency(instruction * from, instruction * to, instruction_dependency_origin record_origin) → void
- allocate_contiguously(batch & batch, buffer_id bid, memory_id mid, box_vector<3> && required_contiguous_boxes) → void
- apply_epoch(instruction * epoch) → void
- collapse_execution_front_to(instruction * horizon_or_epoch) → void
- commit_pending_region_receive_to_host_memory(batch & batch, buffer_id bid, const buffer_state::region_receive & receives, const std::vector<region<3>> & concurrent_reads) → void
- compile_epoch_command(batch & batch, const epoch_command & ecmd) → void
- compile_execution_command(batch & batch, const execution_command & ecmd) → void
- compile_fence_command(batch & batch, const fence_command & fcmd) → void
- compile_horizon_command(batch & batch, const horizon_command & hcmd) → void
- compile_push_command(batch & batch, const push_command & pcmd) → void
- compile_reduction_command(batch & batch, const reduction_command & rcmd) → void
- template <typename Instruction, typename... CtorParamsAndRecordWithFn>
create(batch & batch, CtorParamsAndRecordWithFn &&... ctor_args_and_record_with) → Instruction * - template <typename Instruction, typename... CtorParamsAndRecordWithFn, size_t... CtorParamIndices, size_t RecordWithFnIndex>
create_internal(batch & batch, const std::tuple<CtorParamsAndRecordWithFn...> & ctor_args_and_record_with, std::index_sequence<CtorParamIndices...>, std::index_sequence<RecordWithFnIndex>) → Instruction * - create_outbound_pilot(batch & batch, node_id target, const transfer_id & trid, const box<3> & box) → message_id
- create_task_collective_groups(batch & command_batch, const task & tsk) → void
- defer_await_push_command(const await_push_command & apcmd) → void
- establish_coherence_between_buffer_memories(batch & current_batch, buffer_id bid, dense_map<memory_id, std::vector<region<3>>> & concurrent_reads_from_memory) → void
- finish_task_local_reduction(batch & command_batch, const local_reduction & red, const reduction_info & rinfo, const execution_command & ecmd, const task & tsk, const std::vector<localized_chunk> & concurrent_chunks) → void
- flush_batch(batch && batch) → void
- free_all_staging_allocations(batch & current_batch) → void
- is_recording() const → bool
- launch_task_kernel(batch & command_batch, const execution_command & ecmd, const task & tsk, const localized_chunk & chunk) → instruction *
- new_allocation_id(memory_id mid) → allocation_id
- perform_atomic_write_to_allocation(instruction * writing_instruction, buffer_allocation_state & allocation, const region<3> & region) → void
- perform_concurrent_read_from_allocation(instruction * reading_instruction, buffer_allocation_state & allocation, const region<3> & region) → void
- perform_task_buffer_accesses(buffer_id bid, const execution_command & ecmd, const task & tsk, const std::vector<localized_chunk> & concurrent_chunks, const std::vector<instruction *> & command_instructions) → void
- perform_task_collective_operations(const task & tsk, const std::vector<localized_chunk> & concurrent_chunks, const std::vector<instruction *> & command_instructions) → void
- perform_task_side_effects(const task & tsk, const std::vector<localized_chunk> & concurrent_chunks, const std::vector<instruction *> & command_instructions) → void
- prepare_task_local_reduction(batch & command_batch, const reduction_info & rinfo, const execution_command & ecmd, const task & tsk, size_t num_concurrent_chunks) → local_reduction
- print_buffer_debug_label(buffer_id bid) const → std::string
- report_task_overlapping_writes(const task & tsk, const std::vector<localized_chunk> & concurrent_chunks) const → void
- satisfy_task_buffer_requirements(batch & batch, buffer_id bid, const task & tsk, const subrange<3> & local_execution_range, bool is_reduction_initializer, const std::vector<localized_chunk> & concurrent_chunks_after_split) → void
- split_task_execution_range(const execution_command & ecmd, const task & tsk) → std::vector<localized_chunk>
Member Functions
¶instruction_graph_generator::scheduling_hint
anticipate(const command& cmd)
instruction_graph_generator::scheduling_hint
anticipate(const command& cmd)
Declared at: src/instruction_graph_generator.cc:597
Parameters
- const command& cmd
¶void compile(const command& cmd)
void compile(const command& cmd)
Declared at: src/instruction_graph_generator.cc:598
Parameters
- const command& cmd
¶generator_impl(
size_t num_nodes,
node_id local_nid,
const system_info& system,
instruction_graph& idag,
instruction_graph_generator::delegate* dlg,
instruction_recorder* recorder,
const instruction_graph_generator::policy_set&
policy)
generator_impl(
size_t num_nodes,
node_id local_nid,
const system_info& system,
instruction_graph& idag,
instruction_graph_generator::delegate* dlg,
instruction_recorder* recorder,
const instruction_graph_generator::policy_set&
policy)
Declared at: src/instruction_graph_generator.cc:589
Parameters
- size_t num_nodes
- node_id local_nid
- const system_info& system
- instruction_graph& idag
- instruction_graph_generator::delegate* dlg
- instruction_recorder* recorder
- const instruction_graph_generator::policy_set& policy
¶void notify_buffer_created(
buffer_id bid,
const range<3>& range,
size_t elem_size,
size_t elem_align,
allocation_id user_aid = null_allocation_id)
void notify_buffer_created(
buffer_id bid,
const range<3>& range,
size_t elem_size,
size_t elem_align,
allocation_id user_aid = null_allocation_id)
Declared at: src/instruction_graph_generator.cc:592
Parameters
- buffer_id bid
- const range<3>& range
- size_t elem_size
- size_t elem_align
- allocation_id user_aid = null_allocation_id
¶void notify_buffer_debug_name_changed(
buffer_id bid,
const std::string& name)
void notify_buffer_debug_name_changed(
buffer_id bid,
const std::string& name)
Declared at: src/instruction_graph_generator.cc:593
Parameters
- buffer_id bid
- const std::string& name
¶void notify_buffer_destroyed(buffer_id bid)
void notify_buffer_destroyed(buffer_id bid)
Declared at: src/instruction_graph_generator.cc:594
Parameters
- buffer_id bid
¶void notify_host_object_created(
host_object_id hoid,
bool owns_instance)
void notify_host_object_created(
host_object_id hoid,
bool owns_instance)
Declared at: src/instruction_graph_generator.cc:595
Parameters
- host_object_id hoid
- bool owns_instance
¶void notify_host_object_destroyed(
host_object_id hoid)
void notify_host_object_destroyed(
host_object_id hoid)
Declared at: src/instruction_graph_generator.cc:596
Parameters
- host_object_id hoid
¶staging_allocation& acquire_staging_allocation(
batch& current_batch,
memory_id mid,
size_t size_bytes,
size_t align_bytes)
staging_allocation& acquire_staging_allocation(
batch& current_batch,
memory_id mid,
size_t size_bytes,
size_t align_bytes)
Description
Create a new host allocation for copy staging, or re-use a cached staging allocation whose last access is older than the current epoch.
Declared at: src/instruction_graph_generator.cc:678
Parameters
¶void add_dependencies_on_access_front(
instruction* accessing_instruction,
const access_front& front,
instruction_dependency_origin
origin_for_read_write_front)
void add_dependencies_on_access_front(
instruction* accessing_instruction,
const access_front& front,
instruction_dependency_origin
origin_for_read_write_front)
Declared at: src/instruction_graph_generator.cc:657
Parameters
- instruction* accessing_instruction
- const access_front& front
- instruction_dependency_origin origin_for_read_write_front
¶void add_dependencies_on_last_concurrent_accesses(
instruction* accessing_instruction,
buffer_allocation_state& allocation,
const region<3>& region,
instruction_dependency_origin
origin_for_read_write_front)
void add_dependencies_on_last_concurrent_accesses(
instruction* accessing_instruction,
buffer_allocation_state& allocation,
const region<3>& region,
instruction_dependency_origin
origin_for_read_write_front)
Declared at: src/instruction_graph_generator.cc:665
Parameters
- instruction* accessing_instruction
- buffer_allocation_state& allocation
- const region<3>& region
- instruction_dependency_origin origin_for_read_write_front
¶void add_dependencies_on_last_writers(
instruction* accessing_instruction,
buffer_allocation_state& allocation,
const region<3>& region)
void add_dependencies_on_last_writers(
instruction* accessing_instruction,
buffer_allocation_state& allocation,
const region<3>& region)
Declared at: src/instruction_graph_generator.cc:660
Parameters
- instruction* accessing_instruction
- buffer_allocation_state& allocation
- const region<3>& region
¶void add_dependency(
instruction* from,
instruction* to,
instruction_dependency_origin record_origin)
void add_dependency(
instruction* from,
instruction* to,
instruction_dependency_origin record_origin)
Description
Inserts a graph dependency and removes to
form the execution front (if present). The record_origin
is debug information.
Declared at: src/instruction_graph_generator.cc:655
Parameters
- instruction* from
- instruction* to
- instruction_dependency_origin record_origin
¶void allocate_contiguously(
batch& batch,
buffer_id bid,
memory_id mid,
box_vector<3>&& required_contiguous_boxes)
void allocate_contiguously(
batch& batch,
buffer_id bid,
memory_id mid,
box_vector<3>&& required_contiguous_boxes)
Description
Ensure that all boxes in required_contiguous_boxes
have a contiguous allocation on mid
. Re-allocation of one buffer on one memory never interacts with other buffers or other memories backing the same buffer, this function can be called in any order of allocation requirements without generating additional dependencies.
Declared at: src/instruction_graph_generator.cc:686
Parameters
¶void apply_epoch(instruction* epoch)
void apply_epoch(instruction* epoch)
Description
Replace all tracked instructions that older than epoch
with epoch
.
Declared at: src/instruction_graph_generator.cc:672
Parameters
- instruction* epoch
¶void collapse_execution_front_to(
instruction* horizon_or_epoch)
void collapse_execution_front_to(
instruction* horizon_or_epoch)
Description
Add dependencies from horizon_or_epoch
to all instructions in m_execution_front
and clear the set.
Declared at: src/instruction_graph_generator.cc:675
Parameters
- instruction* horizon_or_epoch
¶void commit_pending_region_receive_to_host_memory(
batch& batch,
buffer_id bid,
const buffer_state::region_receive& receives,
const std::vector<region<3>>&
concurrent_reads)
void commit_pending_region_receive_to_host_memory(
batch& batch,
buffer_id bid,
const buffer_state::region_receive& receives,
const std::vector<region<3>>&
concurrent_reads)
Description
Insert one or more receive instructions in order to fulfil a pending receive, making the received data available in host_memory_id. This may entail receiving a region that is larger than the union of all regions read.
Declared at: src/instruction_graph_generator.cc:690
Parameters
- batch& batch
- buffer_id bid
- const buffer_state::region_receive& receives
- const std::vector<region<3>>& concurrent_reads
¶void compile_epoch_command(
batch& batch,
const epoch_command& ecmd)
void compile_epoch_command(
batch& batch,
const epoch_command& ecmd)
Declared at: src/instruction_graph_generator.cc:741
Parameters
- batch& batch
- const epoch_command& ecmd
¶void compile_execution_command(
batch& batch,
const execution_command& ecmd)
void compile_execution_command(
batch& batch,
const execution_command& ecmd)
Declared at: src/instruction_graph_generator.cc:735
Parameters
- batch& batch
- const execution_command& ecmd
¶void compile_fence_command(
batch& batch,
const fence_command& fcmd)
void compile_fence_command(
batch& batch,
const fence_command& fcmd)
Declared at: src/instruction_graph_generator.cc:739
Parameters
- batch& batch
- const fence_command& fcmd
¶void compile_horizon_command(
batch& batch,
const horizon_command& hcmd)
void compile_horizon_command(
batch& batch,
const horizon_command& hcmd)
Declared at: src/instruction_graph_generator.cc:740
Parameters
- batch& batch
- const horizon_command& hcmd
¶void compile_push_command(
batch& batch,
const push_command& pcmd)
void compile_push_command(
batch& batch,
const push_command& pcmd)
Declared at: src/instruction_graph_generator.cc:736
Parameters
- batch& batch
- const push_command& pcmd
¶void compile_reduction_command(
batch& batch,
const reduction_command& rcmd)
void compile_reduction_command(
batch& batch,
const reduction_command& rcmd)
Declared at: src/instruction_graph_generator.cc:738
Parameters
- batch& batch
- const reduction_command& rcmd
¶template <typename Instruction,
typename... CtorParamsAndRecordWithFn>
Instruction* create(
batch& batch,
CtorParamsAndRecordWithFn&&... ctor_args_and_record_with)
template <typename Instruction,
typename... CtorParamsAndRecordWithFn>
Instruction* create(
batch& batch,
CtorParamsAndRecordWithFn&&... ctor_args_and_record_with)
Description
Create an instruction, insert it into the IDAG and the current execution front, and record it if a recorder is present. Invoke as create <instruction -type>(instruction-ctor-params..., [ & ](const auto record_debug_info) { return record_debug_info(instruction-record-additional-ctor-params)})
Declared at: src/instruction_graph_generator.cc:650
Template Parameters
- Instruction
- CtorParamsAndRecordWithFn
Parameters
- batch& batch
- CtorParamsAndRecordWithFn&&... ctor_args_and_record_with
¶template <typename Instruction,
typename... CtorParamsAndRecordWithFn,
size_t... CtorParamIndices,
size_t RecordWithFnIndex>
Instruction* create_internal(
batch& batch,
const std::tuple<
CtorParamsAndRecordWithFn...>&
ctor_args_and_record_with,
std::index_sequence<CtorParamIndices...>,
std::index_sequence<RecordWithFnIndex>)
template <typename Instruction,
typename... CtorParamsAndRecordWithFn,
size_t... CtorParamIndices,
size_t RecordWithFnIndex>
Instruction* create_internal(
batch& batch,
const std::tuple<
CtorParamsAndRecordWithFn...>&
ctor_args_and_record_with,
std::index_sequence<CtorParamIndices...>,
std::index_sequence<RecordWithFnIndex>)
Declared at: src/instruction_graph_generator.cc:639
Template Parameters
- Instruction
- CtorParamsAndRecordWithFn
- size_t CtorParamIndices
- size_t RecordWithFnIndex
Parameters
- batch& batch
- const std::tuple<CtorParamsAndRecordWithFn...>& ctor_args_and_record_with
- std::index_sequence<CtorParamIndices...>
- std::index_sequence<RecordWithFnIndex>
¶message_id create_outbound_pilot(
batch& batch,
node_id target,
const transfer_id& trid,
const box<3>& box)
message_id create_outbound_pilot(
batch& batch,
node_id target,
const transfer_id& trid,
const box<3>& box)
Declared at: src/instruction_graph_generator.cc:652
Parameters
- batch& batch
- node_id target
- const transfer_id& trid
- const box<3>& box
¶void create_task_collective_groups(
batch& command_batch,
const task& tsk)
void create_task_collective_groups(
batch& command_batch,
const task& tsk)
Description
Issue instructions to create any collective group required by a task.
Declared at: src/instruction_graph_generator.cc:700
Parameters
¶void defer_await_push_command(
const await_push_command& apcmd)
void defer_await_push_command(
const await_push_command& apcmd)
Declared at: src/instruction_graph_generator.cc:737
Parameters
- const await_push_command& apcmd
¶void establish_coherence_between_buffer_memories(
batch& current_batch,
buffer_id bid,
dense_map<memory_id, std::vector<region<3>>>&
concurrent_reads_from_memory)
void establish_coherence_between_buffer_memories(
batch& current_batch,
buffer_id bid,
dense_map<memory_id, std::vector<region<3>>>&
concurrent_reads_from_memory)
Description
Insert coherence copy instructions where necessary to make all specified regions coherent on their respective memories. Requires the necessary allocations in dest_mid
to already be present. We deliberately allow overlapping read-regions to avoid aggregated copies introducing synchronization points between otherwise independent instructions.
Declared at: src/instruction_graph_generator.cc:696
Parameters
- batch& current_batch
- buffer_id bid
- dense_map<memory_id, std::vector<region<3>>>& concurrent_reads_from_memory
¶void finish_task_local_reduction(
batch& command_batch,
const local_reduction& red,
const reduction_info& rinfo,
const execution_command& ecmd,
const task& tsk,
const std::vector<localized_chunk>&
concurrent_chunks)
void finish_task_local_reduction(
batch& command_batch,
const local_reduction& red,
const reduction_info& rinfo,
const execution_command& ecmd,
const task& tsk,
const std::vector<localized_chunk>&
concurrent_chunks)
Description
Combine any partial reduction results computed by local chunks and write it to buffer host memory.
Declared at: src/instruction_graph_generator.cc:717
Parameters
- batch& command_batch
- const local_reduction& red
- const reduction_info& rinfo
- const execution_command& ecmd
- const task& tsk
- const std::vector<localized_chunk>& concurrent_chunks
¶void flush_batch(batch&& batch)
void flush_batch(batch&& batch)
Description
Passes all instructions and outbound pilots that have been accumulated in batch
to the delegate (if any). Called after compiling a command, creating or destroying a buffer or host object, and also in our constructor for the creation of the initial epoch.
Declared at: src/instruction_graph_generator.cc:745
Parameters
- batch&& batch
¶void free_all_staging_allocations(
batch& current_batch)
void free_all_staging_allocations(
batch& current_batch)
Description
Free all cached staging allocations allocated so far.
Declared at: src/instruction_graph_generator.cc:681
Parameters
- batch& current_batch
¶bool is_recording() const
bool is_recording() const
Description
True if a recorder is present and create() will call the record_with
lambda passed as its last parameter.
Declared at: src/instruction_graph_generator.cc:634
¶instruction* launch_task_kernel(
batch& command_batch,
const execution_command& ecmd,
const task& tsk,
const localized_chunk& chunk)
instruction* launch_task_kernel(
batch& command_batch,
const execution_command& ecmd,
const task& tsk,
const localized_chunk& chunk)
Description
Launch a device kernel for each local chunk of a task, passing the relevant buffer allocations in place of accessors and reduction descriptors.
Declared at: src/instruction_graph_generator.cc:721
Parameters
- batch& command_batch
- const execution_command& ecmd
- const task& tsk
- const localized_chunk& chunk
¶allocation_id new_allocation_id(memory_id mid)
allocation_id new_allocation_id(memory_id mid)
Declared at: src/instruction_graph_generator.cc:636
Parameters
- memory_id mid
¶void perform_atomic_write_to_allocation(
instruction* writing_instruction,
buffer_allocation_state& allocation,
const region<3>& region)
void perform_atomic_write_to_allocation(
instruction* writing_instruction,
buffer_allocation_state& allocation,
const region<3>& region)
Description
Add dependencies to the last concurrent accesses of a region, and track the instruction as the new last (unique) writer.
Declared at: src/instruction_graph_generator.cc:669
Parameters
- instruction* writing_instruction
- buffer_allocation_state& allocation
- const region<3>& region
¶void perform_concurrent_read_from_allocation(
instruction* reading_instruction,
buffer_allocation_state& allocation,
const region<3>& region)
void perform_concurrent_read_from_allocation(
instruction* reading_instruction,
buffer_allocation_state& allocation,
const region<3>& region)
Description
Add dependencies to the last writer of a region, and track the instruction as the new last (concurrent) reader.
Declared at: src/instruction_graph_generator.cc:663
Parameters
- instruction* reading_instruction
- buffer_allocation_state& allocation
- const region<3>& region
¶void perform_task_buffer_accesses(
buffer_id bid,
const execution_command& ecmd,
const task& tsk,
const std::vector<localized_chunk>&
concurrent_chunks,
const std::vector<instruction*>&
command_instructions)
void perform_task_buffer_accesses(
buffer_id bid,
const execution_command& ecmd,
const task& tsk,
const std::vector<localized_chunk>&
concurrent_chunks,
const std::vector<instruction*>&
command_instructions)
Description
Add dependencies for all buffer accesses and reductions of a task, then update tracking structures accordingly.
Declared at: src/instruction_graph_generator.cc:724
Parameters
- buffer_id bid
- const execution_command& ecmd
- const task& tsk
- const std::vector<localized_chunk>& concurrent_chunks
- const std::vector<instruction*>& command_instructions
¶void perform_task_collective_operations(
const task& tsk,
const std::vector<localized_chunk>&
concurrent_chunks,
const std::vector<instruction*>&
command_instructions)
void perform_task_collective_operations(
const task& tsk,
const std::vector<localized_chunk>&
concurrent_chunks,
const std::vector<instruction*>&
command_instructions)
Description
If a task is part of a collective group, serialize it with respect to the last host task in that group.
Declared at: src/instruction_graph_generator.cc:732
Parameters
- const task& tsk
- const std::vector<localized_chunk>& concurrent_chunks
- const std::vector<instruction*>& command_instructions
¶void perform_task_side_effects(
const task& tsk,
const std::vector<localized_chunk>&
concurrent_chunks,
const std::vector<instruction*>&
command_instructions)
void perform_task_side_effects(
const task& tsk,
const std::vector<localized_chunk>&
concurrent_chunks,
const std::vector<instruction*>&
command_instructions)
Description
If a task has side effects, serialize it with respect to the last task that shares a host object.
Declared at: src/instruction_graph_generator.cc:728
Parameters
- const task& tsk
- const std::vector<localized_chunk>& concurrent_chunks
- const std::vector<instruction*>& command_instructions
¶local_reduction prepare_task_local_reduction(
batch& command_batch,
const reduction_info& rinfo,
const execution_command& ecmd,
const task& tsk,
size_t num_concurrent_chunks)
local_reduction prepare_task_local_reduction(
batch& command_batch,
const reduction_info& rinfo,
const execution_command& ecmd,
const task& tsk,
size_t num_concurrent_chunks)
Description
Create a gather allocation and optionally save the current buffer value before creating partial reduction results in any kernel.
Declared at: src/instruction_graph_generator.cc:713
Parameters
- batch& command_batch
- const reduction_info& rinfo
- const execution_command& ecmd
- const task& tsk
- size_t num_concurrent_chunks
¶std::string print_buffer_debug_label(
buffer_id bid) const
std::string print_buffer_debug_label(
buffer_id bid) const
Declared at: src/instruction_graph_generator.cc:747
Parameters
- buffer_id bid
¶void report_task_overlapping_writes(
const task& tsk,
const std::vector<localized_chunk>&
concurrent_chunks) const
void report_task_overlapping_writes(
const task& tsk,
const std::vector<localized_chunk>&
concurrent_chunks) const
Description
Detect overlapping writes between local chunks of a task and report it according to m_policy.
Declared at: src/instruction_graph_generator.cc:706
Parameters
- const task& tsk
- const std::vector<localized_chunk>& concurrent_chunks
¶void satisfy_task_buffer_requirements(
batch& batch,
buffer_id bid,
const task& tsk,
const subrange<3>& local_execution_range,
bool is_reduction_initializer,
const std::vector<localized_chunk>&
concurrent_chunks_after_split)
void satisfy_task_buffer_requirements(
batch& batch,
buffer_id bid,
const task& tsk,
const subrange<3>& local_execution_range,
bool is_reduction_initializer,
const std::vector<localized_chunk>&
concurrent_chunks_after_split)
Description
Allocate memory, apply any pending receives, and issue resize- and coherence copies to prepare all buffer memories for a task's execution.
Declared at: src/instruction_graph_generator.cc:709
Parameters
- batch& batch
- buffer_id bid
- const task& tsk
- const subrange<3>& local_execution_range
- bool is_reduction_initializer
- const std::vector<localized_chunk>& concurrent_chunks_after_split
¶std::vector<localized_chunk>
split_task_execution_range(
const execution_command& ecmd,
const task& tsk)
std::vector<localized_chunk>
split_task_execution_range(
const execution_command& ecmd,
const task& tsk)
Description
Split a tasks local execution range (given by execution_command) into chunks according to device configuration and a possible oversubscription hint.
Declared at: src/instruction_graph_generator.cc:703
Parameters
- const execution_command& ecmd
- const task& tsk