ojosynariz |
-- Company: CEI
-- Engineer: David Aledo
-- Create Date: 12:41:19 06/10/2013
-- Design Name: Configurable ANN
-- Module Name: layerSP_top - Behavioral
-- Project Name:
-- Target Devices:
-- Tool versions:
-- Description: neuron layer top for artificial neural networks. Serial input and
-- parallel output.
-- Dependencies:
-- Revision:
-- Revision 0.01 - File Created
-- Additional Comments:
library IEEE;
use ieee.numeric_std.all;
-- Deprecated XPS library:
--library proc_common_v3_00_a;
--use proc_common_v3_00_a.proc_common_pkg.all; -- Only for simulation ( pad_power2() )
entity layerSP_top is
NumN : natural := 8; ------- Number of neurons of the layer
NumIn : natural := 64; ------- Number of inputs of each neuron
NbitIn : natural := 8; ------- Bit width of the input data
NbitW : natural := 8; ------- Bit width of weights and biases
NbitOut : natural := 12; ------- Bit width of the output data
lra_l : natural := 10; ------- Layer RAM address length. It should value log2(NumN)+log2(NumIn)
wra_l : natural := 6; ------- Weight RAM address length. It should value log2(NumIn)
bra_l : natural := 3; ------- Bias RAM address length. It should value log2(NumN)
LSbit : natural := 4 ------- Less significant bit of the outputs
-- Input ports
reset : in std_logic;
clk : in std_logic;
run_in : in std_logic; -- Start and input data validation
m_en : in std_logic; -- Memory enable (external interface)
b_sel : in std_logic; -- Bias memory select
m_we : in std_logic_vector(((NbitW+7)/8)-1 downto 0); -- Memory write enable (external interface)
inputs : in std_logic_vector(NbitIn-1 downto 0); -- Input data (serial)
wdata : in std_logic_vector(NbitW-1 downto 0); -- Write data of weight and bias memories
addr : in std_logic_vector(lra_l-1 downto 0); -- Address of weight and bias memories
-- Output ports
run_out : out std_logic; -- Output data validation, run_in for the next layer
rdata : out std_logic_vector(NbitW-1 downto 0); -- Read data of weight and bias memories
outputs : out std_logic_vector((NbitOut*NumN)-1 downto 0) -- Output data (parallel)
end layerSP_top;
architecture Behavioral of layerSP_top is
67 |
--type ramd_type is array (pad_power2(NumIn)-1 downto 0) of std_logic_vector(NbitW-1 downto 0); -- Optimal: 32 or 64 spaces
--type layer_ram is array (pad_power2(NumN)-1 downto 0) of ramd_type;
type ramd_type is array (NumIn-1 downto 0) of std_logic_vector(NbitW-1 downto 0); -- Optimal: 32 or 64 spaces
type layer_ram is array (NumN-1 downto 0) of ramd_type;
type outm_type is array (NumN-1 downto 0) of std_logic_vector(NbitW-1 downto 0);
signal lram : layer_ram; -- Layer RAM. One RAM per neuron. It stores the weights
signal breg : outm_type; -- Bias registers. They can not be RAM because they are accessed simultaneously
signal outm : outm_type; -- RAM outputs to be multiplexed into rdata
signal m_sel : std_logic_vector(NumN-1 downto 0); -------- RAM select
signal Wyb : std_logic_vector((NbitW*NumN)-1 downto 0); --- Weight vectors
signal bias : std_logic_vector((NbitW*NumN)-1 downto 0); --- Bias vector
signal Nouts : std_logic_vector((NbitOut*NumN)-1 downto 0); -- Outputs from neurons
signal uaddr : unsigned(lra_l-1 downto 0); -- Unsigned address of weight and bias memories
signal inreg : std_logic_vector(NbitIn-1 downto 0); -- Input data register -- en1 is delayed 1 cycle in order to insert a register for Wyb
83 |
-- Control signals
signal cont : integer range 0 to NumIn-1; -- Input counter
signal en1 : std_logic; -- First step enable (multiplication of MAC)
signal en2 : std_logic; -- Second stage enable (accumulation of MAC)
signal en3 : std_logic; -- Shift register enable
signal a0 : std_logic; -- Signal to load accumulators with the multiplication result
signal aux_en3 : std_logic; -- Auxiliary signal to delay en3 two cycles
signal aux_a0 : std_logic;
signal aux2_en3 : std_logic;
layerSP_inst: entity work.layerSP
generic map
NumN => NumN,
NumIn => NumIn,
NbitIn => NbitIn,
NbitW => NbitW,
NbitOut => NbitOut,
LSbit => LSbit
port map
-- Input ports
reset => reset,
clk => clk,
en => en1,
en2 => en2,
en_r => en3,
a0 => a0,
inputs => inreg,
Wyb => Wyb,
bias => bias,
-- Output ports
outputs => Nouts
122 |
uaddr <= unsigned(addr);
125 |
process (uaddr(lra_l-1 downto wra_l),b_sel) -- Top part of memory address and b_sel
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
138 |
139 |
140 |
141 |
143 |
144 |
145 |
146 |
147 |
d((8*(j+1))-1 downto 8*j) := lram(i)(to_integer(uaddr(wra_l-1 downto 0)))((8*(j+1))-1 downto 8*j);
end if;
end loop;
152 |
lram(i)(to_integer(uaddr(wra_l-1 downto 0))) <= d;
153 |
154 |
end if;
155 |
end if;
156 |
end process;
157 |
-- Outputs are read in parallel, resulting in a bus of weights:
158 |
--Wyb((NbitW*(i+1))-1 downto NbitW*i) <= lram(i)(cont); -- Asynchronous read (forces distributed RAM)
159 |
process (clk) -- Synchronous read
160 |
161 |
if clk'event and clk = '1' then
162 |
if reset = '1' then
163 |
--Wyb((NbitW*(i+1))-1 downto NbitW*i) <= (others => '0');
164 |
165 |
Wyb((NbitW*(i+1))-1 downto NbitW*i) <= lram(i)(cont);
166 |
end if;
167 |
end if;
168 |
end process;
169 |
outm(i) <= lram(i)(to_integer(uaddr(wra_l-1 downto 0))); -- Read all RAM
170 |
end generate;
171 |
172 |
-- Synchronous read including breg:
173 |
process (clk)
174 |
175 |
if (clk'event and clk = '1') then
176 |
if (m_en = '1') then
177 |
if (b_sel = '1') then
178 |
rdata <= breg(to_integer(uaddr(bra_l-1 downto 0))); -- Bias registers selected
179 |
else -- Other RAM selected:
180 |
rdata <= outm(to_integer(uaddr(lra_l-1 downto wra_l))); -- Multiplexes RAM outputs
181 |
-- May be safer if accesses to top address grater than NumN are avoided
182 |
end if;
183 |
end if;
184 |
end if;
185 |
end process;
186 |
187 |
188 |
process (clk)
189 |
variable d : std_logic_vector(NbitW-1 downto 0); -- Beware of elements whose length is not a multiple of 8
190 |
191 |
if (clk'event and clk = '1') then
192 |
if ( (m_en = '1') and (b_sel = '1') ) then
193 |
for i in ((NbitW+7)/8)-1 downto 0 loop -- we byte to byte
194 |
if (m_we(i) = '1') then
195 |
d((8*(i+1))-1 downto 8*i) := wdata((8*(i+1))-1 downto 8*i);
196 |
197 |
d((8*(i+1))-1 downto 8*i) := breg(to_integer(uaddr(bra_l-1 downto 0)))((8*(i+1))-1 downto 8*i);
198 |
end if;
199 |
end loop;
200 |
-- The bottom part (reduced) of layer RAM address selects the bias
201 |
breg(to_integer(uaddr(bra_l-1 downto 0))) <= d;
202 |
end if;
203 |
end if;
204 |
end process;
205 |
206 |
for i in (NumN-1) downto 0 generate
207 |
--bias((NbitW*(i+1))-1 downto NbitW*i) <= breg(i); -- Asynchronous read of all biases in parallel
208 |
process (clk)
209 |
210 |
if clk'event and clk = '1' then
211 |
if reset = '1' then
212 |
--bias((NbitW*(i+1))-1 downto NbitW*i) <= (others => '0');
213 |
214 |
bias((NbitW*(i+1))-1 downto NbitW*i) <= breg(i); -- Synchronous read of all biases in parallel
215 |
end if;
216 |
end if;
217 |
end process;
218 |
end generate;
219 |
220 |
outputs <= Nouts;
221 |
222 |
223 |
process (clk)
224 |
225 |
if (clk'event and clk = '1') then
226 |
if (reset = '1') then
227 |
cont <= 0;
228 |
en1 <= '0';
229 |
en2 <= '0';
230 |
en3 <= '0';
231 |
a0 <= '0';
232 |
run_out <= '0';
233 |
aux_en3 <= '0';
234 |
aux2_en3 <= '0';
235 |
aux_a0 <= '0';
236 |
inreg <= (others => '0');
237 |
238 |
en1 <= run_in; -- en1 is delayed 1 cycle in order to insert a register for Wyb
239 |
inreg <= inputs;
240 |
-- Default:
241 |
aux2_en3 <= '0';
242 |
if (run_in = '1') then
243 |
if (cont = NumIn-1) then
244 |
cont <= 0; -- Restarts input counter
245 |
aux2_en3 <= '1';
246 |
247 |
cont <= cont +1;
248 |
end if;
249 |
end if;
250 |
en2 <= en1;
251 |
if (cont = 0 and run_in = '1') then
252 |
aux_a0 <= '1'; -- At the count beginning
253 |
254 |
aux_a0 <= '0';
255 |
end if;
256 |
a0 <= aux_a0;
257 |
aux_en3 <= aux2_en3;
258 |
en3 <= aux_en3;
259 |
run_out <= en3; -- It lasts for 1 cycle, just after the output enable of the layer (when all outputs have just updated)
260 |
end if;
261 |
end if;
262 |
end process;
263 |
264 |
end Behavioral;