Skip to content

RTL codegen "line" error #1403

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion dace/codegen/targets/rtl.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.

import itertools

from typing import List, Tuple, Dict
import warnings

from dace import dtypes, config, registry, symbolic, nodes, sdfg, data
from dace.sdfg import graph, state, find_input_arraynode, find_output_arraynode
Expand Down Expand Up @@ -102,6 +102,21 @@ def copy_memory(self, sdfg: sdfg.SDFG, dfg: state.StateSubgraphView, state_id: i
elif isinstance(arr, data.Scalar):
line: str = "{} {} = {};".format(dst_node.in_connectors[edge.dst_conn].ctype, edge.dst_conn,
edge.src.data)
elif isinstance(arr, data.Stream):
# TODO Streams are currently unsupported, as the proper
# behaviour has to be implemented to avoid deadlocking. It
# is only a warning, as the RTL backend is partially used
# by the Xilinx backend, which may hit this case, but will
# discard the errorneous code.
warnings.warn(
'Streams are currently unsupported by the RTL backend.' \
'This may produce errors or deadlocks in the generated code.'
)
line: str = "// WARNING: Unsupported read from ({}) variable '{}' from stream '{}'." \
" This may lead to a deadlock if used in code.\n".format(
dst_node.in_connectors[edge.dst_conn].ctype, edge.dst_conn, edge.src_conn)
line += "{} {} = {}.pop();".format(
dst_node.in_connectors[edge.dst_conn].ctype, edge.dst_conn, edge.src.data)
elif isinstance(edge.src, nodes.MapEntry) and isinstance(edge.dst, nodes.Tasklet):
rtl_name = self.unique_name(edge.dst, sdfg.nodes()[state_id], sdfg)
self.n_unrolled[rtl_name] = symbolic.evaluate(edge.src.map.range[0][1] + 1, sdfg.constants)
Expand Down
39 changes: 20 additions & 19 deletions samples/fpga/rtl/add_fortytwo.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
#
# This sample shows adding a constant integer value to a stream of integers.
#
# It is intended for running hardware_emulation or hardware xilinx targets.
# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
"""
This sample shows adding a constant integer value to a stream of integers.

It is intended for running hardware_emulation or hardware xilinx targets.
"""

import dace
import numpy as np
Expand Down Expand Up @@ -116,21 +117,21 @@
######################################################################

if __name__ == '__main__':
with dace.config.set_temporary('compiler', 'xilinx', 'mode', value='hardware_emulation'):
# init data structures
N.set(8192)
a = np.random.randint(0, 100, N.get()).astype(np.int32)
b = np.zeros((N.get(), )).astype(np.int32)

# init data structures
N.set(8192)
a = np.random.randint(0, 100, N.get()).astype(np.int32)
b = np.zeros((N.get(), )).astype(np.int32)

# show initial values
print("a={}, b={}".format(a, b))
# show initial values
print("a={}, b={}".format(a, b))

# call program
sdfg(A=a, B=b, N=N)
# call program
sdfg(A=a, B=b, N=N)

# show result
print("a={}, b={}".format(a, b))
# show result
print("a={}, b={}".format(a, b))

# check result
for i in range(N.get()):
assert b[i] == a[i] + 42
# check result
for i in range(N.get()):
assert b[i] == a[i] + 42
13 changes: 8 additions & 5 deletions samples/fpga/rtl/axpy.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# Copyright 2019-2022 ETH Zurich and the DaCe authors. All rights reserved.
#
# This sample shows the AXPY BLAS routine. It is implemented through Xilinx IPs in order to utilize floating point
# operations. It is intended for running hardware_emulation or hardware xilinx targets.
# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
"""
This sample shows the AXPY BLAS routine. It is implemented through Xilinx IPs in order to utilize floating point
operations.

It is intended for running hardware_emulation or hardware xilinx targets.
"""

import dace
import numpy as np
Expand Down Expand Up @@ -259,4 +262,4 @@ def make_sdfg(veclen=2):
expected = a * x + y
diff = np.linalg.norm(expected - result) / N.get()
print("Difference:", diff)
exit(0 if diff <= 1e-5 else 1)
assert diff <= 1e-5
143 changes: 72 additions & 71 deletions samples/fpga/rtl/axpy_double_pump.py
Original file line number Diff line number Diff line change
@@ -1,73 +1,74 @@
# Copyright 2019-2022 ETH Zurich and the DaCe authors. All rights reserved.
#
# This sample shows the AXPY BLAS routine. It is implemented through Xilinx
# IPs in order to utilize double pumping, which doubles the performance per
# consumed FPGA resource. The double pumping operation is "inwards", which
# means that the internal vectorization width of the core computation is half
# that of the external vectorization width. This translates into utilizing half
# the amount of internal computing resources, compared to a regular vectorized
# implementetation. The block diagram of the design for a 32-bit floating-point
# implementation using vectorization width 2 is:
#
# ap_aclk s_axis_y_in s_axis_x_in a
# │ │ │ │
# │ │ │ │
# │ │ │ │
# ┌───────┼─────────┬────────┼─────────┐ │ │
# │ │ │ │ │ │ │
# │ │ │ ▼ │ ▼ │
# │ │ │ ┌────────────┐ │ ┌────────────┐ │
# │ │ └─►│ │ └─►│ │ │
# │ │ │ Clock sync │ │ Clock sync │ │
# │ │ ┌─►│ │ ┌─►│ │ │
# │ ▼ 300 MHz │ └─────┬──────┘ │ └─────┬──────┘ │
# │ ┌────────────┐ │ │ │ │ │
# │ │ Clock │ │ │ │ │ │
# │ │ │ ├────────┼─────────┤ │ │
# │ │ Multiplier │ │ │ │ │ │
# │ └─────┬──────┘ │ ▼ 64 bit │ ▼ 64 bit │
# │ │ 600 MHz │ ┌────────────┐ │ ┌────────────┐ │
# │ │ │ │ │ │ │ │ │
# │ └─────────┼─►│ Data issue │ └─►│ Data issue │ │
# │ │ │ │ │ │ │
# │ │ └─────┬──────┘ └─────┬──────┘ │
# │ │ │ 32 bit │ 32 bit │
# │ │ │ │ │
# │ │ │ │ │
# │ │ │ ▼ ▼
# │ │ │ ┌────────────┐
# │ │ │ │ │
# │ ├────────┼────────────────►│ Multiplier │
# │ │ │ │ │
# │ │ │ └─────┬──────┘
# │ │ │ │
# │ │ │ ┌──────────────┘
# │ │ │ │
# │ │ ▼ ▼
# │ │ ┌────────────┐
# │ │ │ │
# │ ├─────►│ Adder │
# │ │ │ │
# │ │ └─────┬──────┘
# │ │ │
# │ │ ▼ 32 bit
# │ │ ┌─────────────┐
# │ │ │ │
# │ ├─────►│ Data packer │
# │ │ │ │
# │ │ └─────┬───────┘
# │ │ │ 64 bit
# │ │ ▼
# │ │ ┌────────────┐
# │ └─────►│ │
# │ │ Clock sync │
# └───────────────────────►│ │
# └─────┬──────┘
# │
# ▼
# m_axis_result_out
#
# It is intended for running hardware_emulation or hardware xilinx targets.
# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
"""
This sample shows the AXPY BLAS routine. It is implemented through Xilinx
IPs in order to utilize double pumping, which doubles the performance per
consumed FPGA resource. The double pumping operation is "inwards", which
means that the internal vectorization width of the core computation is half
that of the external vectorization width. This translates into utilizing half
the amount of internal computing resources, compared to a regular vectorized
implementetation. The block diagram of the design for a 32-bit floating-point
implementation using vectorization width 2 is:

ap_aclk s_axis_y_in s_axis_x_in a
│ │ │ │
│ │ │ │
│ │ │ │
┌───────┼─────────┬────────┼─────────┐ │ │
│ │ │ │ │ │ │
│ │ │ ▼ │ ▼ │
│ │ │ ┌────────────┐ │ ┌────────────┐ │
│ │ └─►│ │ └─►│ │ │
│ │ │ Clock sync │ │ Clock sync │ │
│ │ ┌─►│ │ ┌─►│ │ │
│ ▼ 300 MHz │ └─────┬──────┘ │ └─────┬──────┘ │
│ ┌────────────┐ │ │ │ │ │
│ │ Clock │ │ │ │ │ │
│ │ │ ├────────┼─────────┤ │ │
│ │ Multiplier │ │ │ │ │ │
│ └─────┬──────┘ │ ▼ 64 bit │ ▼ 64 bit │
│ │ 600 MHz │ ┌────────────┐ │ ┌────────────┐ │
│ │ │ │ │ │ │ │ │
│ └─────────┼─►│ Data issue │ └─►│ Data issue │ │
│ │ │ │ │ │ │
│ │ └─────┬──────┘ └─────┬──────┘ │
│ │ │ 32 bit │ 32 bit │
│ │ │ │ │
│ │ │ │ │
│ │ │ ▼ ▼
│ │ │ ┌────────────┐
│ │ │ │ │
│ ├────────┼────────────────►│ Multiplier │
│ │ │ │ │
│ │ │ └─────┬──────┘
│ │ │ │
│ │ │ ┌──────────────┘
│ │ │ │
│ │ ▼ ▼
│ │ ┌────────────┐
│ │ │ │
│ ├─────►│ Adder │
│ │ │ │
│ │ └─────┬──────┘
│ │ │
│ │ ▼ 32 bit
│ │ ┌─────────────┐
│ │ │ │
│ ├─────►│ Data packer │
│ │ │ │
│ │ └─────┬───────┘
│ │ │ 64 bit
│ │ ▼
│ │ ┌────────────┐
│ └─────►│ │
│ │ Clock sync │
└───────────────────────►│ │
└─────┬──────┘
m_axis_result_out

It is intended for running hardware_emulation or hardware xilinx targets.
"""

import dace
import numpy as np
Expand Down Expand Up @@ -452,4 +453,4 @@ def make_sdfg(veclen=2):
diff = np.linalg.norm(expected - result) / N.get()
print("Difference:", diff)

exit(0 if diff <= 1e-5 else 1)
assert diff <= 1e-5
17 changes: 9 additions & 8 deletions samples/fpga/rtl/fladd.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
#
# This sample shows how to utilize an IP core in an RTL tasklet. This is done
# through the vector add problem, which adds two floating point vectors
# together.
#
# It is intended for running hardware_emulation or hardware xilinx targets.
# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
"""
This sample shows how to utilize an IP core in an RTL tasklet. This is done
through the vector add problem, which adds two floating point vectors
together.

It is intended for running hardware_emulation or hardware xilinx targets.
"""

import dace
import numpy as np
Expand Down Expand Up @@ -190,4 +191,4 @@
expected = a + b
diff = np.linalg.norm(expected - c) / N.get()
print("Difference:", diff)
exit(0 if diff <= 1e-5 else 1)
assert diff <= 1e-5
41 changes: 21 additions & 20 deletions samples/fpga/rtl/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
#
# This sample shows a DEPTH deep pipeline, where each stage adds 1 to the
# integer input stream.
#
# It is intended for running hardware_emulation or hardware xilinx targets.
# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
"""
This sample shows a DEPTH deep pipeline, where each stage adds 1 to the
integer input stream.

It is intended for running hardware_emulation or hardware xilinx targets.
"""

import dace
import numpy as np
Expand Down Expand Up @@ -151,21 +152,21 @@
######################################################################

if __name__ == '__main__':
with dace.config.set_temporary('compiler', 'xilinx', 'mode', value='hardware_emulation'):
# init data structures
N.set(8192)
a = np.random.randint(0, 100, N.get()).astype(np.int32)
b = np.zeros((N.get(), )).astype(np.int32)

# init data structures
N.set(8192)
a = np.random.randint(0, 100, N.get()).astype(np.int32)
b = np.zeros((N.get(), )).astype(np.int32)

# show initial values
print("a={}, b={}".format(a, b))
# show initial values
print("a={}, b={}".format(a, b))

# call program
sdfg(A=a, B=b, N=N)
# call program
sdfg(A=a, B=b, N=N)

# show result
print("a={}, b={}".format(a, b))
# show result
print("a={}, b={}".format(a, b))

# check result
for i in range(N.get()):
assert b[i] == a[i] + depth
# check result
for i in range(N.get()):
assert b[i] == a[i] + depth
Loading