Browse Source

Integrated valid bits into cache block ram. Reset now iteratively clears the entire cache. Allows for much lower FPGA usage and better timings. Should be able to greatly increase cache size as there is an abundance of BRAM.

bart 1 year ago
parent
commit
2287e54c6b
4 changed files with 105 additions and 102 deletions
  1. BIN
      Quartus/FPGC.qws
  2. 41 46
      Quartus/modules/Memory/L2cache.v
  3. 41 46
      Verilog/modules/Memory/L2cache.v
  4. 23 10
      Verilog/output/FPGC.gtkw

BIN
Quartus/FPGC.qws


+ 41 - 46
Quartus/modules/Memory/L2cache.v

@@ -31,7 +31,7 @@ assign cache_reset = 1'b0;
 parameter cache_size = 1024;                // cache size in words. 8129*4bytes = 32KiB
 parameter cache_size = 1024;                // cache size in words. 8129*4bytes = 32KiB
 parameter index_size = 10;                  // index size: log2(cache_size)
 parameter index_size = 10;                  // index size: log2(cache_size)
 parameter tag_size = 14;                    // mem_add_bits-index_size = 24-13 = 11
 parameter tag_size = 14;                    // mem_add_bits-index_size = 24-13 = 11
-parameter cache_line_size = tag_size+32;    // tag + word
+parameter cache_line_size = tag_size+32+1;  // tag + word + valid bit
 
 
 reg [cache_line_size-1:0] cache [0:cache_size-1];   // cache memory
 reg [cache_line_size-1:0] cache [0:cache_size-1];   // cache memory
 
 
@@ -41,14 +41,14 @@ initial
 begin
 begin
     for (i = 0; i < cache_size; i = i + 1)
     for (i = 0; i < cache_size; i = i + 1)
     begin
     begin
-        cache[i] = 46'd0;
+        cache[i] = 47'd0;
     end
     end
 end
 end
 
 
 reg [index_size-1:0]        cache_addr = 10'd0;
 reg [index_size-1:0]        cache_addr = 10'd0;
-reg [cache_line_size-1:0]   cache_d = 46'd0;
+reg [cache_line_size-1:0]   cache_d = 47'd0;
 reg                         cache_we = 1'b0;
 reg                         cache_we = 1'b0;
-reg [cache_line_size-1:0]   cache_q = 46'd0;
+reg [cache_line_size-1:0]   cache_q = 47'd0;
 always @(posedge clk) 
 always @(posedge clk) 
 begin
 begin
     cache_q <= cache[cache_addr];
     cache_q <= cache[cache_addr];
@@ -77,42 +77,18 @@ parameter state_check_cache     = 3'd3;
 parameter state_miss_read_ram   = 3'd4;
 parameter state_miss_read_ram   = 3'd4;
 parameter state_delay_cache     = 3'd5;
 parameter state_delay_cache     = 3'd5;
 parameter state_done_high       = 3'd6;
 parameter state_done_high       = 3'd6;
+parameter state_clear_cache     = 3'd7;
 
 
-//wire cache_hit = valid_bits[cache_addr] && l2_addr[23:index_size] == cache_q[42:32];
+reg [31:0] addr_prev = 32'd0;
 
 
-// uninferrable valid bit memory
-reg [cache_size-1:0] valid_bits = 1024'd0;
-reg [9:0] valid_a = 10'd0;
-reg valid_d = 1'b0;
-reg valid_q = 1'b0;
-reg valid_we = 1'b0;
-always @(posedge clk) 
-begin
-    if (reset | cache_reset)
-    begin
-        valid_bits <= 1024'd0;
-    end
-    else
-    begin
-        valid_q <= valid_bits[valid_a];
-        if (valid_we)
-        begin
-            valid_bits[valid_a] <= valid_d;
-            $display("%d: wrote valid bit l2", $time);
-        end
-    end
-end
+reg [15:0] clear_cache_counter = 16'd0; // 64k max
 
 
-reg [31:0] addr_prev = 32'd0;
+reg start_registered = 1'b0;
 
 
 always @(posedge clk) 
 always @(posedge clk) 
 begin
 begin
     if (reset)
     if (reset)
     begin
     begin
-        valid_a <= 10'd0;
-        valid_d <= 1'b0;
-        valid_we <= 1'b0;
-
         l2_q_reg <= 32'd0;
         l2_q_reg <= 32'd0;
         l2_done_reg <= 1'b0;
         l2_done_reg <= 1'b0;
         sdc_addr_reg <= 24'd0;
         sdc_addr_reg <= 24'd0;
@@ -124,7 +100,11 @@ begin
         
         
         // Make sure the next cycle a new request can be detected!
         // Make sure the next cycle a new request can be detected!
         start_prev <= 1'b0;
         start_prev <= 1'b0;
-        state <= state_idle;
+        state <= state_clear_cache;
+
+        clear_cache_counter <= 16'd0;
+
+        start_registered <= 1'b0;
     end
     end
     else
     else
     begin
     begin
@@ -132,9 +112,6 @@ begin
         start_prev <= l2_start;
         start_prev <= l2_start;
         l2_done_reg <= 1'b0;
         l2_done_reg <= 1'b0;
         cache_we <= 1'b0;
         cache_we <= 1'b0;
-
-        valid_d <= 1'b0;
-        valid_we <= 1'b0;
         
         
 
 
         // NOTE: make sure to use latched l2_addr from rising start to make sure all addresses are correct!
         // NOTE: make sure to use latched l2_addr from rising start to make sure all addresses are correct!
@@ -143,16 +120,38 @@ begin
         case(state)
         case(state)
             state_init: 
             state_init: 
             begin
             begin
-                state <= state_idle;
+                state <= state_clear_cache;
+            end
+
+            state_clear_cache:
+            begin
+                if (l2_addr < 27'h800000 && ( (l2_start && !start_prev) || addr_prev >= 27'h800000 && l2_start) )
+                begin
+                    start_registered <= 1'b1;
+                end
+
+                if (clear_cache_counter == cache_size)
+                begin
+                    clear_cache_counter <= 16'd0;
+                    state <= state_idle;
+                end
+                else
+                begin
+                    clear_cache_counter <= clear_cache_counter + 1'b1;
+                    cache_we <= 1'b1;
+                    cache_d <= 47'd0;
+                    cache_addr <= clear_cache_counter;
+                end
+                
             end
             end
 
 
             state_idle: 
             state_idle: 
             begin
             begin
-                valid_a <= l2_addr[index_size-1:0];
                 if (l2_addr < 27'h800000)
                 if (l2_addr < 27'h800000)
                 begin
                 begin
-                    if ( (l2_start && !start_prev) || addr_prev >= 27'h800000 && l2_start)
+                    if ( ( (l2_start && !start_prev) || addr_prev >= 27'h800000 && l2_start) || start_registered)
                     begin
                     begin
+                        start_registered <= 1'b0;
                         if (l2_we)
                         if (l2_we)
                         begin
                         begin
                             // update cache and write SDRAM
                             // update cache and write SDRAM
@@ -162,7 +161,7 @@ begin
                             sdc_start_reg <= 1'b1;
                             sdc_start_reg <= 1'b1;
                             sdc_data_reg <= l2_data;
                             sdc_data_reg <= l2_data;
 
 
-                            cache_d <= {l2_addr[23:index_size], l2_data}; // tag + data
+                            cache_d <= {1'b1, l2_addr[23:index_size], l2_data}; // tag + data
                             cache_addr <= l2_addr[index_size-1:0];
                             cache_addr <= l2_addr[index_size-1:0];
                             
                             
                         end
                         end
@@ -197,8 +196,6 @@ begin
                     sdc_data_reg <= 32'd0;
                     sdc_data_reg <= 32'd0;
 
 
                     cache_we <= 1'b1;
                     cache_we <= 1'b1;
-                    valid_d <= 1'b1;
-                    valid_we <= 1'b1;
 
 
                     l2_done_reg <= 1'b1;
                     l2_done_reg <= 1'b1;
                 end
                 end
@@ -207,7 +204,7 @@ begin
             state_check_cache: 
             state_check_cache: 
             begin
             begin
                 // check cache. if hit, return cached item
                 // check cache. if hit, return cached item
-                if (valid_q && sdc_addr_reg[23:index_size] == cache_q[45:32]) // valid and tag check
+                if (cache_q[46] && sdc_addr_reg[23:index_size] == cache_q[45:32]) // valid and tag check
                 begin
                 begin
                     state <= state_done_high;
                     state <= state_done_high;
 
 
@@ -234,10 +231,8 @@ begin
                     sdc_start_reg <= 1'b0;
                     sdc_start_reg <= 1'b0;
 
 
                     cache_we <= 1'b1;
                     cache_we <= 1'b1;
-                    cache_d <= {sdc_addr_reg[23:index_size], sdc_q}; // tag + data
+                    cache_d <= {1'b1, sdc_addr_reg[23:index_size], sdc_q}; // tag + data
                     
                     
-                    valid_d <= 1'b1;
-                    valid_we <= 1'b1;
 
 
                     l2_done_reg <= 1'b1;
                     l2_done_reg <= 1'b1;
                     l2_q_reg <= sdc_q;
                     l2_q_reg <= sdc_q;

+ 41 - 46
Verilog/modules/Memory/L2cache.v

@@ -31,7 +31,7 @@ assign cache_reset = 1'b0;
 parameter cache_size = 1024;                // cache size in words. 8129*4bytes = 32KiB
 parameter cache_size = 1024;                // cache size in words. 8129*4bytes = 32KiB
 parameter index_size = 10;                  // index size: log2(cache_size)
 parameter index_size = 10;                  // index size: log2(cache_size)
 parameter tag_size = 14;                    // mem_add_bits-index_size = 24-13 = 11
 parameter tag_size = 14;                    // mem_add_bits-index_size = 24-13 = 11
-parameter cache_line_size = tag_size+32;    // tag + word
+parameter cache_line_size = tag_size+32+1;  // tag + word + valid bit
 
 
 reg [cache_line_size-1:0] cache [0:cache_size-1];   // cache memory
 reg [cache_line_size-1:0] cache [0:cache_size-1];   // cache memory
 
 
@@ -41,14 +41,14 @@ initial
 begin
 begin
     for (i = 0; i < cache_size; i = i + 1)
     for (i = 0; i < cache_size; i = i + 1)
     begin
     begin
-        cache[i] = 46'd0;
+        cache[i] = 47'd0;
     end
     end
 end
 end
 
 
 reg [index_size-1:0]        cache_addr = 10'd0;
 reg [index_size-1:0]        cache_addr = 10'd0;
-reg [cache_line_size-1:0]   cache_d = 46'd0;
+reg [cache_line_size-1:0]   cache_d = 47'd0;
 reg                         cache_we = 1'b0;
 reg                         cache_we = 1'b0;
-reg [cache_line_size-1:0]   cache_q = 46'd0;
+reg [cache_line_size-1:0]   cache_q = 47'd0;
 always @(posedge clk) 
 always @(posedge clk) 
 begin
 begin
     cache_q <= cache[cache_addr];
     cache_q <= cache[cache_addr];
@@ -77,42 +77,18 @@ parameter state_check_cache     = 3'd3;
 parameter state_miss_read_ram   = 3'd4;
 parameter state_miss_read_ram   = 3'd4;
 parameter state_delay_cache     = 3'd5;
 parameter state_delay_cache     = 3'd5;
 parameter state_done_high       = 3'd6;
 parameter state_done_high       = 3'd6;
+parameter state_clear_cache     = 3'd7;
 
 
-//wire cache_hit = valid_bits[cache_addr] && l2_addr[23:index_size] == cache_q[42:32];
+reg [31:0] addr_prev = 32'd0;
 
 
-// uninferrable valid bit memory
-reg [cache_size-1:0] valid_bits = 1024'd0;
-reg [9:0] valid_a = 10'd0;
-reg valid_d = 1'b0;
-reg valid_q = 1'b0;
-reg valid_we = 1'b0;
-always @(posedge clk) 
-begin
-    if (reset | cache_reset)
-    begin
-        valid_bits <= 1024'd0;
-    end
-    else
-    begin
-        valid_q <= valid_bits[valid_a];
-        if (valid_we)
-        begin
-            valid_bits[valid_a] <= valid_d;
-            $display("%d: wrote valid bit l2", $time);
-        end
-    end
-end
+reg [15:0] clear_cache_counter = 16'd0; // 64k max
 
 
-reg [31:0] addr_prev = 32'd0;
+reg start_registered = 1'b0;
 
 
 always @(posedge clk) 
 always @(posedge clk) 
 begin
 begin
     if (reset)
     if (reset)
     begin
     begin
-        valid_a <= 10'd0;
-        valid_d <= 1'b0;
-        valid_we <= 1'b0;
-
         l2_q_reg <= 32'd0;
         l2_q_reg <= 32'd0;
         l2_done_reg <= 1'b0;
         l2_done_reg <= 1'b0;
         sdc_addr_reg <= 24'd0;
         sdc_addr_reg <= 24'd0;
@@ -124,7 +100,11 @@ begin
         
         
         // Make sure the next cycle a new request can be detected!
         // Make sure the next cycle a new request can be detected!
         start_prev <= 1'b0;
         start_prev <= 1'b0;
-        state <= state_idle;
+        state <= state_clear_cache;
+
+        clear_cache_counter <= 16'd0;
+
+        start_registered <= 1'b0;
     end
     end
     else
     else
     begin
     begin
@@ -132,9 +112,6 @@ begin
         start_prev <= l2_start;
         start_prev <= l2_start;
         l2_done_reg <= 1'b0;
         l2_done_reg <= 1'b0;
         cache_we <= 1'b0;
         cache_we <= 1'b0;
-
-        valid_d <= 1'b0;
-        valid_we <= 1'b0;
         
         
 
 
         // NOTE: make sure to use latched l2_addr from rising start to make sure all addresses are correct!
         // NOTE: make sure to use latched l2_addr from rising start to make sure all addresses are correct!
@@ -143,16 +120,38 @@ begin
         case(state)
         case(state)
             state_init: 
             state_init: 
             begin
             begin
-                state <= state_idle;
+                state <= state_clear_cache;
+            end
+
+            state_clear_cache:
+            begin
+                if (l2_addr < 27'h800000 && ( (l2_start && !start_prev) || addr_prev >= 27'h800000 && l2_start) )
+                begin
+                    start_registered <= 1'b1;
+                end
+
+                if (clear_cache_counter == cache_size)
+                begin
+                    clear_cache_counter <= 16'd0;
+                    state <= state_idle;
+                end
+                else
+                begin
+                    clear_cache_counter <= clear_cache_counter + 1'b1;
+                    cache_we <= 1'b1;
+                    cache_d <= 47'd0;
+                    cache_addr <= clear_cache_counter;
+                end
+                
             end
             end
 
 
             state_idle: 
             state_idle: 
             begin
             begin
-                valid_a <= l2_addr[index_size-1:0];
                 if (l2_addr < 27'h800000)
                 if (l2_addr < 27'h800000)
                 begin
                 begin
-                    if ( (l2_start && !start_prev) || addr_prev >= 27'h800000 && l2_start)
+                    if ( ( (l2_start && !start_prev) || addr_prev >= 27'h800000 && l2_start) || start_registered)
                     begin
                     begin
+                        start_registered <= 1'b0;
                         if (l2_we)
                         if (l2_we)
                         begin
                         begin
                             // update cache and write SDRAM
                             // update cache and write SDRAM
@@ -162,7 +161,7 @@ begin
                             sdc_start_reg <= 1'b1;
                             sdc_start_reg <= 1'b1;
                             sdc_data_reg <= l2_data;
                             sdc_data_reg <= l2_data;
 
 
-                            cache_d <= {l2_addr[23:index_size], l2_data}; // tag + data
+                            cache_d <= {1'b1, l2_addr[23:index_size], l2_data}; // tag + data
                             cache_addr <= l2_addr[index_size-1:0];
                             cache_addr <= l2_addr[index_size-1:0];
                             
                             
                         end
                         end
@@ -197,8 +196,6 @@ begin
                     sdc_data_reg <= 32'd0;
                     sdc_data_reg <= 32'd0;
 
 
                     cache_we <= 1'b1;
                     cache_we <= 1'b1;
-                    valid_d <= 1'b1;
-                    valid_we <= 1'b1;
 
 
                     l2_done_reg <= 1'b1;
                     l2_done_reg <= 1'b1;
                 end
                 end
@@ -207,7 +204,7 @@ begin
             state_check_cache: 
             state_check_cache: 
             begin
             begin
                 // check cache. if hit, return cached item
                 // check cache. if hit, return cached item
-                if (valid_q && sdc_addr_reg[23:index_size] == cache_q[45:32]) // valid and tag check
+                if (cache_q[46] && sdc_addr_reg[23:index_size] == cache_q[45:32]) // valid and tag check
                 begin
                 begin
                     state <= state_done_high;
                     state <= state_done_high;
 
 
@@ -234,10 +231,8 @@ begin
                     sdc_start_reg <= 1'b0;
                     sdc_start_reg <= 1'b0;
 
 
                     cache_we <= 1'b1;
                     cache_we <= 1'b1;
-                    cache_d <= {sdc_addr_reg[23:index_size], sdc_q}; // tag + data
+                    cache_d <= {1'b1, sdc_addr_reg[23:index_size], sdc_q}; // tag + data
                     
                     
-                    valid_d <= 1'b1;
-                    valid_we <= 1'b1;
 
 
                     l2_done_reg <= 1'b1;
                     l2_done_reg <= 1'b1;
                     l2_q_reg <= sdc_q;
                     l2_q_reg <= sdc_q;

+ 23 - 10
Verilog/output/FPGC.gtkw

@@ -1,23 +1,22 @@
 [*]
 [*]
 [*] GTKWave Analyzer v3.3.107 (w)1999-2020 BSI
 [*] GTKWave Analyzer v3.3.107 (w)1999-2020 BSI
-[*] Sun Aug 27 13:38:25 2023
+[*] Sat Sep  2 15:51:57 2023
 [*]
 [*]
 [dumpfile] "/home/bart/Documents/FPGA/FPGC6/Verilog/output/wave.vcd"
 [dumpfile] "/home/bart/Documents/FPGA/FPGC6/Verilog/output/wave.vcd"
-[dumpfile_mtime] "Sun Aug 27 13:36:03 2023"
-[dumpfile_size] 16357434
+[dumpfile_mtime] "Sat Sep  2 15:42:56 2023"
+[dumpfile_size] 48021003
 [savefile] "/home/bart/Documents/FPGA/FPGC6/Verilog/output/FPGC.gtkw"
 [savefile] "/home/bart/Documents/FPGA/FPGC6/Verilog/output/FPGC.gtkw"
-[timestart] 204900
-[size] 2560 1387
+[timestart] 0
+[size] 1920 1054
 [pos] -1 -1
 [pos] -1 -1
-*-16.666576 525000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+*-26.666576 407400000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 [treeopen] FPGC_tb.
 [treeopen] FPGC_tb.
 [treeopen] FPGC_tb.fpgc.
 [treeopen] FPGC_tb.fpgc.
-[treeopen] FPGC_tb.fpgc.cpu.
 [treeopen] FPGC_tb.fpgc.cpu.arbiter.
 [treeopen] FPGC_tb.fpgc.cpu.arbiter.
 [sst_width] 227
 [sst_width] 227
-[signals_width] 713
+[signals_width] 366
 [sst_expanded] 1
 [sst_expanded] 1
-[sst_vpaned_height] 985
+[sst_vpaned_height] 495
 @28
 @28
 FPGC_tb.fpgc.clk
 FPGC_tb.fpgc.clk
 @22
 @22
@@ -50,7 +49,6 @@ FPGC_tb.fpgc.cpu.arbiter.done_b
 FPGC_tb.fpgc.cpu.arbiter.q[31:0]
 FPGC_tb.fpgc.cpu.arbiter.q[31:0]
 @200
 @200
 -
 -
-@201
 -
 -
 @22
 @22
 FPGC_tb.fpgc.cpu.arbiter.bus_addr[26:0]
 FPGC_tb.fpgc.cpu.arbiter.bus_addr[26:0]
@@ -293,7 +291,22 @@ FPGC_tb.fpgc.cpu.clearCache_MEM
 @200
 @200
 -
 -
 -L2Cache
 -L2Cache
+@24
+FPGC_tb.fpgc.l2cache.state[2:0]
+FPGC_tb.fpgc.l2cache.clear_cache_counter[15:0]
+@200
 -
 -
+@29
+FPGC_tb.fpgc.l2cache.start_registered
+@24
+FPGC_tb.fpgc.l2cache.cache_addr[9:0]
+@22
+FPGC_tb.fpgc.l2cache.cache_d[46:0]
+@28
+FPGC_tb.fpgc.l2cache.cache_we
+@22
+FPGC_tb.fpgc.l2cache.cache_q[46:0]
+@200
 -
 -
 -
 -
 -
 -