| 1 |
3 |
ojosynariz |
----------------------------------------------------------------------------------
|
| 2 |
|
|
-- Company: CEI
|
| 3 |
|
|
-- Engineer: David Aledo
|
| 4 |
|
|
--
|
| 5 |
|
|
-- Create Date: 11:24:24 05/28/2013
|
| 6 |
|
|
-- Design Name: Configurable ANN
|
| 7 |
|
|
-- Module Name: layerSP - arq
|
| 8 |
|
|
-- Project Name:
|
| 9 |
|
|
-- Target Devices:
|
| 10 |
|
|
-- Tool versions:
|
| 11 |
|
|
-- Description: basic and parametrizable neuron layer for hardware artificial
|
| 12 |
|
|
-- neural networks. Serial input and parallel output.
|
| 13 |
|
|
-- Implemented by MAC.
|
| 14 |
|
|
--
|
| 15 |
|
|
-- Dependencies:
|
| 16 |
|
|
--
|
| 17 |
|
|
-- Revision:
|
| 18 |
|
|
-- Revision 0.01 - File Created
|
| 19 |
|
|
-- Additional Comments:
|
| 20 |
|
|
--
|
| 21 |
|
|
----------------------------------------------------------------------------------
|
| 22 |
|
|
|
| 23 |
|
|
-- NOTE: To optimize MAC, inputs should be registered, and should be checked that this register is implemented as DSP input register
|
| 24 |
|
|
|
| 25 |
|
|
library ieee;
|
| 26 |
|
|
use ieee.std_logic_1164.all;
|
| 27 |
|
|
use ieee.numeric_std.all;
|
| 28 |
|
|
|
| 29 |
|
|
use work.layers_pkg.all;
|
| 30 |
|
|
|
| 31 |
|
|
|
| 32 |
|
|
entity layerSP is
|
| 33 |
|
|
|
| 34 |
|
|
generic
|
| 35 |
|
|
(
|
| 36 |
|
|
NumN : natural := 8; -- Number of neurons of the layer
|
| 37 |
|
|
NumIn : natural := 64; -- Number of inputs of each neuron (data account before restart Acc)
|
| 38 |
|
|
NbitIn : natural := 8; -- Bit width of the input data
|
| 39 |
|
|
NbitW : natural := 8; -- Bit width of weights and biases
|
| 40 |
|
|
NbitOut : natural := 12; -- Bit width of the output data
|
| 41 |
|
|
LSbit : natural := 4 -- Less significant bit of the outputs
|
| 42 |
|
|
);
|
| 43 |
|
|
|
| 44 |
|
|
port
|
| 45 |
|
|
(
|
| 46 |
|
|
-- Input ports
|
| 47 |
|
|
reset : in std_logic;
|
| 48 |
|
|
clk : in std_logic;
|
| 49 |
|
|
en : in std_logic; -- First step enable (multiplication of MAC)
|
| 50 |
|
|
en2 : in std_logic; -- Second stage enable (accumulation of MAC)
|
| 51 |
|
|
en_r : in std_logic; -- Shift register enable
|
| 52 |
|
|
a0 : in std_logic; -- Signal to load accumulators with the multiplication result
|
| 53 |
|
|
inputs : in std_logic_vector(NbitIn-1 downto 0); -- Input data (serial)
|
| 54 |
|
|
Wyb : in std_logic_vector((NbitW*NumN)-1 downto 0); -- Weight vectors
|
| 55 |
|
|
bias : in std_logic_vector((NbitW*NumN)-1 downto 0); -- Bias vector
|
| 56 |
|
|
|
| 57 |
|
|
-- Output ports
|
| 58 |
|
|
outputs : out std_logic_vector((NbitOut*NumN)-1 downto 0) -- Output data (parallel)
|
| 59 |
|
|
);
|
| 60 |
|
|
end layerSP;
|
| 61 |
|
|
|
| 62 |
|
|
|
| 63 |
|
|
|
| 64 |
|
|
architecture arq of layerSP is
|
| 65 |
|
|
|
| 66 |
|
|
constant NbOvrf : natural := log2(NumIn); -- Extra bits in acc to avoid overflow
|
| 67 |
|
|
constant sat_max : signed(NbitIn+NbitW+NbOvrf downto 0) := (NbitIn+NbitW+NbOvrf downto LSbit+NbitOut-1 => '0') & (LSbit+NbitOut-2 downto 0 => '1'); -- E.g. "0001111"
|
| 68 |
|
|
constant sat_min : signed(NbitIn+NbitW+NbOvrf downto 0) := (NbitIn+NbitW+NbOvrf downto LSbit+NbitOut-1 => '1') & (LSbit+NbitOut-2 downto 0 => '0'); -- E.g. "1110000"
|
| 69 |
|
|
|
| 70 |
|
|
type v_res is array(NumN-1 downto 0) of std_logic_vector(NbitIn+NbitW+NbOvrf downto 0); -- Array type for MAC results
|
| 71 |
|
|
type v_reg is array(NumN-1 downto 0) of std_logic_vector(NbitOut-1 downto 0); -- Array type for shift register
|
| 72 |
|
|
|
| 73 |
|
|
signal res : v_res; -- MAC results
|
| 74 |
|
|
signal reg : v_reg := (others => (others => '0')); -- Output register
|
| 75 |
|
|
|
| 76 |
|
|
begin
|
| 77 |
|
|
|
| 78 |
|
|
macs: -- Instances as MAC as NumN
|
| 79 |
|
|
for i in (NumN-1) downto 0 generate
|
| 80 |
|
|
mac_i: entity work.mac
|
| 81 |
|
|
generic map
|
| 82 |
|
|
(
|
| 83 |
|
|
dirload => FALSE,
|
| 84 |
|
|
NbOvrf => NbOvrf,
|
| 85 |
|
|
NbitIn => NbitIn,
|
| 86 |
|
|
NbitC => NbitW
|
| 87 |
|
|
)
|
| 88 |
|
|
port map
|
| 89 |
|
|
(
|
| 90 |
|
|
CLK => clk,
|
| 91 |
|
|
RST => reset,
|
| 92 |
|
|
A => inputs,
|
| 93 |
|
|
B => Wyb((NbitW*(i+1))-1 downto NbitW*i),
|
| 94 |
|
|
C => bias((NbitW*(i+1))-1 downto NbitW*i),
|
| 95 |
|
|
P => res(i),
|
| 96 |
|
|
CE1 => en,
|
| 97 |
|
|
CE2 => en2,
|
| 98 |
|
|
LOAD => a0
|
| 99 |
|
|
);
|
| 100 |
|
|
end generate;
|
| 101 |
|
|
|
| 102 |
|
|
process(clk)
|
| 103 |
|
|
begin
|
| 104 |
|
|
if rising_edge(clk) then
|
| 105 |
|
|
if reset = '1' then -- Synchronous reset, active high
|
| 106 |
|
|
reg <= (others => (others => '0'));
|
| 107 |
|
|
else
|
| 108 |
|
|
|
| 109 |
|
|
if en_r = '1' then -- Output register enable (clipping)
|
| 110 |
|
|
|
| 111 |
|
|
for i in 0 to NumN-1 loop -- As many results as NumN are loaded in parallel
|
| 112 |
|
|
|
| 113 |
|
|
if signed(res(i)) > sat_max then
|
| 114 |
|
|
-- Saturating result to the maximum value:
|
| 115 |
|
|
reg(i) <= '0' & (NbitOut-2 downto 0 => '1');
|
| 116 |
|
|
elsif signed(res(i)) < sat_min then
|
| 117 |
|
|
-- Saturating result to the minimum value:
|
| 118 |
|
|
reg(i) <= '1' & (NbitOut-2 downto 0 => '0');
|
| 119 |
|
|
else
|
| 120 |
|
|
-- Configured window of result bits are assigned to the output:
|
| 121 |
|
|
reg(i) <= res(i)(LSbit+NbitOut-1 downto LSbit);
|
| 122 |
|
|
end if;
|
| 123 |
|
|
|
| 124 |
|
|
end loop;
|
| 125 |
|
|
|
| 126 |
|
|
end if;
|
| 127 |
|
|
end if;
|
| 128 |
|
|
|
| 129 |
|
|
end if;
|
| 130 |
|
|
end process;
|
| 131 |
|
|
|
| 132 |
|
|
-- Assigns output registers to output data port:
|
| 133 |
|
|
process (reg)
|
| 134 |
|
|
begin
|
| 135 |
|
|
for i in 0 to NumN-1 loop
|
| 136 |
|
|
outputs((NbitOut*(i+1))-1 downto NbitOut*i) <= reg(i);
|
| 137 |
|
|
end loop;
|
| 138 |
|
|
end process;
|
| 139 |
|
|
|
| 140 |
|
|
end arq;
|