Browse Source

L2 cache at 100MHz now greatly increases performance. Attempted to create l1i and l1d cache, but becomes unstable and the issue is difficult to similate/replicate. Therefore, all l1 cache is now bypassed and the code can be found in l1cacheUnstable.v. As no l1 cache anymore, I removed some ccache statements to increase performance as this instruction currently does nothing.

bart 1 year ago
parent
commit
add43b75da

+ 1 - 1
BCC/BDOS/lib/fs.c

@@ -142,7 +142,7 @@ word FS_spiTransfer(word dataByte)
 {
     word retval = 0;
     asm(
-        "ccache\n"
+        //"ccache\n"
         "load32 FS_SPI1_ADDR r2             ; r2 = FS_SPI1_ADDR\n"
         "write 0 r2 r4                      ; write r4 over SPI1\n"
         "read 0 r2 r2                       ; read return value\n"

+ 1 - 1
BCC/BDOS/lib/wiz5500.c

@@ -167,7 +167,7 @@ word WizSpiTransfer(word dataByte)
 {
     word retval = 0;
     asm(
-        "ccache\n"
+        //"ccache\n"
         "load32 W5500_SPI3_ADDR r2          ; r2 = W5500_SPI3_ADDR\n"
         "write 0 r2 r4                      ; write r4 over SPI3\n"
         "read 0 r2 r2                       ; read return value\n"

+ 1 - 1
BCC/FPGCbuildTools/asm/lib/fs.c

@@ -128,7 +128,7 @@ word FS_spiTransfer(word dataByte)
 {
     word retval = 0;
     asm(
-        "ccache\n"
+        //"ccache\n"
         "load32 0xC0272B r2             ; r2 = 0xC0272B\n"
         "write 0 r2 r4                      ; write r4 over SPI1\n"
         "read 0 r2 r2                       ; read return value\n"

+ 1 - 1
BCC/FPGCbuildTools/bcc/lib/fs.c

@@ -128,7 +128,7 @@ word FS_spiTransfer(word dataByte)
 {
     word retval = 0;
     asm(
-        "ccache\n"
+        //"ccache\n"
         "load32 0xC0272B r2             ; r2 = 0xC0272B\n"
         "write 0 r2 r4                      ; write r4 over SPI1\n"
         "read 0 r2 r2                       ; read return value\n"

+ 1 - 1
BCC/userBDOS/LIB/FS.C

@@ -128,7 +128,7 @@ word FS_spiTransfer(word dataByte)
 {
   word retval = 0;
   asm(
-    "ccache\n"
+    //"ccache\n"
     "load32 0xC0272B r2       ; r2 = 0xC0272B\n"
     "write 0 r2 r4            ; write r4 over SPI1\n"
     "read 0 r2 r2             ; read return value\n"

+ 1 - 1
BCC/userBDOS/LIB/SYS.C

@@ -39,7 +39,7 @@ word* syscall(word ID)
     "push r13\n"
     "push r14\n"
     "push r15\n"
-    "ccache\n"
+    //"ccache\n"
     "savpc r1\n"
     "push r1\n"
     "jump 4\n"

+ 1 - 1
BCC/userBDOS/LIB/WIZ5500.C

@@ -158,7 +158,7 @@ word WizSpiTransfer(word dataByte)
 {
   word retval = 0;
   asm(
-      "ccache\n"
+      //"ccache\n"
       "load32 0xC02731 r2                 ; r2 = 0xC02731\n"
       "write 0 r2 r4                      ; write r4 over SPI3\n"
       "read 0 r2 r2                       ; read return value\n"

+ 2 - 1
Quartus/FPGC.qsf

@@ -226,7 +226,8 @@ set_instance_assignment -name WEAK_PULL_UP_RESISTOR ON -to GPI[3]
 set_global_assignment -name SDC_FILE FPGC.sdc
 set_global_assignment -name QIP_FILE clkMux/synthesis/clkMux.qip
 set_global_assignment -name VERILOG_FILE modules/FPGC.v
-set_global_assignment -name VERILOG_FILE modules/Memory/L1cache.v
+set_global_assignment -name VERILOG_FILE modules/Memory/L1Icache.v
+set_global_assignment -name VERILOG_FILE modules/Memory/L1Dcache.v
 set_global_assignment -name VERILOG_FILE modules/Memory/L2cache.v
 set_global_assignment -name VERILOG_FILE Source/Test02_project_key.v
 set_global_assignment -name VERILOG_FILE modules/GPU/NTSC/RGBtoYPhaseAmpl.v

BIN
Quartus/FPGC.qws


+ 2 - 2
Quartus/modules/CPU/CPU.v

@@ -226,7 +226,7 @@ wire             l1i_start; // start trigger
 wire [31:0]      l1i_q;     // memory output
 wire             l1i_done;  // output ready
 
-L1cache l1icache(
+L1Icache l1icache(
 .clk            (clk),
 .reset          (reset),
 .cache_reset    (clearCache_EX | clearCache_MEM),
@@ -661,7 +661,7 @@ wire             l1d_start; // start trigger
 wire [31:0]      l1d_q;     // memory output
 wire             l1d_done;  // output ready
 
-L1cache l1dcache(
+L1Dcache l1dcache(
 .clk            (clk),
 .reset          (reset),
 .cache_reset    (clearCache_EX | clearCache_MEM),

+ 2 - 10
Quartus/modules/FPGC.v

@@ -670,16 +670,8 @@ wire             l2_start; // start trigger
 wire [31:0]      l2_q;     // memory output
 wire             l2_done;  // output ready
 
-// Currently, L2 cache decreases performance, so we skip it
-assign sdc_addr =   l2_addr;
-assign sdc_data =  l2_data;
-assign sdc_we =    l2_we;
-assign sdc_start =  l2_start;
-assign l2_q =        sdc_q;
-assign l2_done =   sdc_done;
-/*
 L2cache l2cache(
-.clk            (clk),
+.clk            (clk_SDRAM),
 .reset          (reset),
 
 // CPU bus
@@ -698,7 +690,7 @@ L2cache l2cache(
 .sdc_q          (sdc_q),
 .sdc_done       (sdc_done)
 );
-*/
+
 
 //---------------CPU----------------
 // CPU I/O

+ 37 - 0
Quartus/modules/Memory/L1Dcache.v

@@ -0,0 +1,37 @@
+/*
+* L1 Data Cache
+* Sits between Datamem and arbiter
+* Currently skipped because of issues
+*/
+module L1Dcache(
+    // clock/reset inputs
+    input               clk,
+    input               reset,
+    input               cache_reset,
+
+    // CPU bus
+    input [31:0]        l2_addr,
+    input [31:0]        l2_data,
+    input               l2_we,
+    input               l2_start,
+    output [31:0]       l2_q,
+    output              l2_done,
+
+    // SDRAM controller bus
+    output [31:0]       sdc_addr,
+    output [31:0]       sdc_data,
+    output              sdc_we,
+    output              sdc_start,
+    input [31:0]        sdc_q,
+    input               sdc_done
+);
+
+// passthrough to skip
+assign sdc_addr =  l2_addr;
+assign sdc_data = l2_data;
+assign sdc_we =   l2_we;
+assign sdc_start = l2_start;
+assign l2_q =       sdc_q;
+assign l2_done =  sdc_done;
+
+endmodule

+ 37 - 0
Quartus/modules/Memory/L1Icache.v

@@ -0,0 +1,37 @@
+/*
+* L1 Instruction Cache
+* Sits between Instrmem and arbiter
+* Currently skipped because of issues
+*/
+module L1Icache(
+    // clock/reset inputs
+    input               clk,
+    input               reset,
+    input               cache_reset,
+
+    // CPU bus
+    input [31:0]        l2_addr,
+    input [31:0]        l2_data,
+    input               l2_we,
+    input               l2_start,
+    output [31:0]       l2_q,
+    output              l2_done,
+
+    // SDRAM controller bus
+    output [31:0]       sdc_addr,
+    output [31:0]       sdc_data,
+    output              sdc_we,
+    output              sdc_start,
+    input [31:0]        sdc_q,
+    input               sdc_done
+);
+
+// passthrough to skip
+assign sdc_addr =  l2_addr;
+assign sdc_data = l2_data;
+assign sdc_we =   l2_we;
+assign sdc_start = l2_start;
+assign l2_q =       sdc_q;
+assign l2_done =  sdc_done;
+
+endmodule

+ 36 - 23
Quartus/modules/Memory/L2cache.v

@@ -1,6 +1,7 @@
 /*
 * L2 Cache
 * Sits between CPU and SDRAM controller
+* Made to run at 100MHz
 */
 module L2cache(
     // clock/reset inputs
@@ -34,6 +35,15 @@ parameter cache_line_size = tag_size+32;    // tag + word
 
 reg [cache_line_size-1:0] cache [0:cache_size-1];   // cache memory
 
+integer i;
+// init cache to all zeros
+initial
+begin
+    for (i = 0; i < cache_size; i = i + 1)
+    begin
+        cache[i] = 46'd0;
+    end
+end
 
 reg [index_size-1:0]        cache_addr = 10'd0;
 reg [cache_line_size-1:0]   cache_d = 46'd0;
@@ -45,7 +55,7 @@ begin
     if (cache_we)
     begin
         cache[cache_addr] <= cache_d;
-        //$display("%d: wrote to l2 cache", $time);
+        $display("%d: wrote to l2 cache", $time);
     end
 end
 
@@ -66,6 +76,7 @@ parameter state_writing         = 3'd2;
 parameter state_check_cache     = 3'd3;
 parameter state_miss_read_ram   = 3'd4;
 parameter state_delay_cache     = 3'd5;
+parameter state_done_high       = 3'd6;
 
 //wire cache_hit = valid_bits[cache_addr] && l2_addr[23:index_size] == cache_q[42:32];
 
@@ -77,7 +88,7 @@ reg valid_q = 1'b0;
 reg valid_we = 1'b0;
 always @(posedge clk) 
 begin
-    if (reset)
+    if (reset | cache_reset)
     begin
         valid_bits <= 1024'd0;
     end
@@ -87,11 +98,12 @@ begin
         if (valid_we)
         begin
             valid_bits[valid_a] <= valid_d;
-            //$display("%d: wrote valid bit", $time);
+            $display("%d: wrote valid bit l2", $time);
         end
     end
 end
 
+reg [31:0] addr_prev = 32'd0;
 
 always @(posedge clk) 
 begin
@@ -108,18 +120,19 @@ begin
         sdc_we_reg <= 1'b0;
         sdc_start_reg <= 1'b0;
 
+        addr_prev <= 32'd0;
+        
+        // Make sure the next cycle a new request can be detected!
         start_prev <= 1'b0;
-
-        state <= state_init;
+        state <= state_idle;
     end
     else
     begin
-
+        addr_prev <= l2_addr;
         start_prev <= l2_start;
         l2_done_reg <= 1'b0;
         cache_we <= 1'b0;
 
-        valid_a <= 10'd0;
         valid_d <= 1'b0;
         valid_we <= 1'b0;
         
@@ -138,7 +151,7 @@ begin
                 valid_a <= l2_addr[index_size-1:0];
                 if (l2_addr < 27'h800000)
                 begin
-                    if (l2_start && !start_prev)
+                    if ( (l2_start && !start_prev) || addr_prev >= 27'h800000 && l2_start)
                     begin
                         if (l2_we)
                         begin
@@ -149,12 +162,9 @@ begin
                             sdc_start_reg <= 1'b1;
                             sdc_data_reg <= l2_data;
 
-                            cache_we <= 1'b1;
                             cache_d <= {l2_addr[23:index_size], l2_data}; // tag + data
                             cache_addr <= l2_addr[index_size-1:0];
                             
-                            valid_d <= 1'b1;
-                            valid_we <= 1'b1;
                         end
                         else
                         begin
@@ -173,21 +183,22 @@ begin
             state_delay_cache:
             begin
                 state <= state_check_cache;
-                //valid_a <= cache_addr;
             end
 
-            state_writing: 
+            state_writing: // Currently disabled setting valid bit to 0. Otherwise causes crashes after a few seconds in L1D cache.
             begin
                 if (sdc_done)
                 begin
-                    state <= state_idle;
+                    state <= state_done_high;
 
                     sdc_addr_reg <= 24'd0;
                     sdc_we_reg <= 1'b0;
                     sdc_start_reg <= 1'b0;
                     sdc_data_reg <= 32'd0;
 
-                    cache_we <= 1'b0;
+                    cache_we <= 1'b1; // as long as valid_d <= 0, this does not matter
+                    valid_d <= 1'b1;
+                    valid_we <= 1'b1;
 
                     l2_done_reg <= 1'b1;
                 end
@@ -198,7 +209,7 @@ begin
                 // check cache. if hit, return cached item
                 if (valid_q && sdc_addr_reg[23:index_size] == cache_q[45:32]) // valid and tag check
                 begin
-                    state <= state_idle;
+                    state <= state_done_high;
 
                     l2_done_reg <= 1'b1;
                     l2_q_reg <= cache_q[31:0];
@@ -216,7 +227,7 @@ begin
             begin
                 if (sdc_done)
                 begin
-                    state <= state_idle;
+                    state <= state_done_high;
 
                     // we received item from ram, now write to cache and return
                     sdc_addr_reg <= 24'd0;
@@ -225,8 +236,6 @@ begin
                     cache_we <= 1'b1;
                     cache_d <= {sdc_addr_reg[23:index_size], sdc_q}; // tag + data
                     
-                    //valid_bits[cache_addr] <= 1'b1;
-                    valid_a <= cache_addr;
                     valid_d <= 1'b1;
                     valid_we <= 1'b1;
 
@@ -235,14 +244,18 @@ begin
                 end
             end
 
+            state_done_high:
+            begin
+                // keep done high for one cycle as we run on double clock speed from CPU
+                l2_done_reg <= 1'b1;
+                state <= state_idle;
+            end
+
         endcase
     end
 end
 
-
-
-
-
+// passthrough when above SDRAM memory range
 assign sdc_addr =   (l2_addr < 27'h800000) ? sdc_addr_reg   : l2_addr;
 assign sdc_data =   (l2_addr < 27'h800000) ? sdc_data_reg   : l2_data;
 assign sdc_we =     (l2_addr < 27'h800000) ? sdc_we_reg     : l2_we;

BIN
Quartus/output_files/output_file.jic


BIN
Verilog/memory/code.bin


File diff suppressed because it is too large
+ 0 - 0
Verilog/memory/spi.txt


+ 2 - 2
Verilog/modules/CPU/CPU.v

@@ -226,7 +226,7 @@ wire             l1i_start; // start trigger
 wire [31:0]      l1i_q;     // memory output
 wire             l1i_done;  // output ready
 
-L1cache l1icache(
+L1Icache l1icache(
 .clk            (clk),
 .reset          (reset),
 .cache_reset    (clearCache_EX | clearCache_MEM),
@@ -661,7 +661,7 @@ wire             l1d_start; // start trigger
 wire [31:0]      l1d_q;     // memory output
 wire             l1d_done;  // output ready
 
-L1cache l1dcache(
+L1Dcache l1dcache(
 .clk            (clk),
 .reset          (reset),
 .cache_reset    (clearCache_EX | clearCache_MEM),

+ 1 - 10
Verilog/modules/FPGC6.v

@@ -635,16 +635,8 @@ wire             l2_start; // start trigger
 wire [31:0]      l2_q;     // memory output
 wire             l2_done;  // output ready
 
-// Currently, L2 cache decreases performance, so we skip it
-assign sdc_addr =   l2_addr;
-assign sdc_data =  l2_data;
-assign sdc_we =    l2_we;
-assign sdc_start =  l2_start;
-assign l2_q =        sdc_q;
-assign l2_done =   sdc_done;
-/*
 L2cache l2cache(
-.clk            (clk),
+.clk            (clk_SDRAM),
 .reset          (reset),
 
 // CPU bus
@@ -663,7 +655,6 @@ L2cache l2cache(
 .sdc_q          (sdc_q),
 .sdc_done       (sdc_done)
 );
-*/
 
 
 //---------------CPU----------------

+ 37 - 0
Verilog/modules/Memory/L1Dcache.v

@@ -0,0 +1,37 @@
+/*
+* L1 Data Cache
+* Sits between Datamem and arbiter
+* Currently skipped because of issues
+*/
+module L1Dcache(
+    // clock/reset inputs
+    input               clk,
+    input               reset,
+    input               cache_reset,
+
+    // CPU bus
+    input [31:0]        l2_addr,
+    input [31:0]        l2_data,
+    input               l2_we,
+    input               l2_start,
+    output [31:0]       l2_q,
+    output              l2_done,
+
+    // SDRAM controller bus
+    output [31:0]       sdc_addr,
+    output [31:0]       sdc_data,
+    output              sdc_we,
+    output              sdc_start,
+    input [31:0]        sdc_q,
+    input               sdc_done
+);
+
+// passthrough to skip
+assign sdc_addr =  l2_addr;
+assign sdc_data = l2_data;
+assign sdc_we =   l2_we;
+assign sdc_start = l2_start;
+assign l2_q =       sdc_q;
+assign l2_done =  sdc_done;
+
+endmodule

+ 1 - 1
Verilog/modules/Memory/L1cache.v → Verilog/modules/Memory/L1DcacheUnstable.v

@@ -3,7 +3,7 @@
 * Sits between Datamem or Instrmem and arbiter
 * Current problem: crashes unless cache item is invalidated after a WRITE
 */
-module L1cache(
+module L1Dcache(
     // clock/reset inputs
     input               clk,
     input               reset,

+ 37 - 0
Verilog/modules/Memory/L1Icache.v

@@ -0,0 +1,37 @@
+/*
+* L1 Instruction Cache
+* Sits between Instrmem and arbiter
+* Currently skipped because of issues
+*/
+module L1Icache(
+    // clock/reset inputs
+    input               clk,
+    input               reset,
+    input               cache_reset,
+
+    // CPU bus
+    input [31:0]        l2_addr,
+    input [31:0]        l2_data,
+    input               l2_we,
+    input               l2_start,
+    output [31:0]       l2_q,
+    output              l2_done,
+
+    // SDRAM controller bus
+    output [31:0]       sdc_addr,
+    output [31:0]       sdc_data,
+    output              sdc_we,
+    output              sdc_start,
+    input [31:0]        sdc_q,
+    input               sdc_done
+);
+
+// passthrough to skip
+assign sdc_addr =  l2_addr;
+assign sdc_data = l2_data;
+assign sdc_we =   l2_we;
+assign sdc_start = l2_start;
+assign l2_q =       sdc_q;
+assign l2_done =  sdc_done;
+
+endmodule

+ 23 - 48
Quartus/modules/Memory/L1cache.v → Verilog/modules/Memory/L1IcacheUnstable.v

@@ -1,9 +1,9 @@
 /*
-* L1 Cache
-* Sits between Datamem or Instrmem and arbiter
-* Current problem: crashes unless cache item is invalidated after a WRITE
+* L1 Instruction Cache
+* Sits between Instrmem and arbiter
+* Current problem: requires clear cache when spi transfer loop (no idea why)
 */
-module L1cache(
+module L1Icache(
     // clock/reset inputs
     input               clk,
     input               reset,
@@ -33,6 +33,15 @@ parameter cache_line_size = tag_size+32;    // tag + word
 
 reg [cache_line_size-1:0] cache [0:cache_size-1];   // cache memory
 
+integer i;
+// init cache to all zeros
+initial
+begin
+    for (i = 0; i < cache_size; i = i + 1)
+    begin
+        cache[i] = 46'd0;
+    end
+end
 
 reg [index_size-1:0]        cache_addr = 10'd0;
 reg [cache_line_size-1:0]   cache_d = 46'd0;
@@ -43,8 +52,9 @@ begin
     cache_q <= cache[cache_addr];
     if (cache_we)
     begin
+        cache_q <= cache_d;
         cache[cache_addr] <= cache_d;
-        //$display("%d: wrote to l1 cache", $time);
+        $display("%d: wrote to l1i cache", $time);
     end
 end
 
@@ -86,7 +96,7 @@ begin
         if (valid_we)
         begin
             valid_bits[valid_a] <= valid_d;
-            //$display("%d: wrote valid bit l1", $time);
+            $display("%d: wrote valid bit l1", $time);
         end
     end
 end
@@ -141,29 +151,13 @@ begin
                 begin
                     if ( (l2_start && !start_prev) || addr_prev >= 27'h800000 && l2_start)
                     begin
-                        if (l2_we)
-                        begin
-                            // update cache and write SDRAM
-                            state <= state_writing;
-                            sdc_addr_reg <= l2_addr;
-                            sdc_we_reg <= 1'b1;
-                            sdc_start_reg <= 1'b1;
-                            sdc_data_reg <= l2_data;
-
-                            cache_d <= {l2_addr[23:index_size], l2_data}; // tag + data
-                            cache_addr <= l2_addr[index_size-1:0];
-                            
-                        end
-                        else
-                        begin
-                            // wait a cycle for cache to be read
-                            cache_addr <= l2_addr[index_size-1:0];
-                            state <= state_delay_cache;
-
-                            // just in case we have a cache miss in the next cycle, prepare address on sdram controller bus
-                            sdc_addr_reg <= l2_addr;
-                            sdc_we_reg <= 1'b0;
-                        end
+                        // wait a cycle for cache to be read
+                        cache_addr <= l2_addr[index_size-1:0];
+                        state <= state_delay_cache;
+
+                        // just in case we have a cache miss in the next cycle, prepare address on sdram controller bus
+                        sdc_addr_reg <= l2_addr;
+                        sdc_we_reg <= 1'b0;
                     end
                 end
             end
@@ -173,25 +167,6 @@ begin
                 state <= state_check_cache;
             end
 
-            state_writing: // Currently disabled setting valid bit to 0. Otherwise causes crashes after a few seconds in L1D cache.
-            begin
-                if (sdc_done)
-                begin
-                    state <= state_idle;
-
-                    sdc_addr_reg <= 24'd0;
-                    sdc_we_reg <= 1'b0;
-                    sdc_start_reg <= 1'b0;
-                    sdc_data_reg <= 32'd0;
-
-                    cache_we <= 1'b1; // as long as valid_d <= 0, this does not matter
-                    valid_d <= 1'b0;
-                    valid_we <= 1'b1;
-
-                    l2_done_reg <= 1'b1;
-                end
-            end
-
             state_check_cache: 
             begin
                 // check cache. if hit, return cached item

+ 26 - 22
Verilog/modules/Memory/L2cache.v

@@ -1,6 +1,7 @@
 /*
 * L2 Cache
 * Sits between CPU and SDRAM controller
+* Made to run at 100MHz
 */
 module L2cache(
     // clock/reset inputs
@@ -75,6 +76,7 @@ parameter state_writing         = 3'd2;
 parameter state_check_cache     = 3'd3;
 parameter state_miss_read_ram   = 3'd4;
 parameter state_delay_cache     = 3'd5;
+parameter state_done_high       = 3'd6;
 
 //wire cache_hit = valid_bits[cache_addr] && l2_addr[23:index_size] == cache_q[42:32];
 
@@ -86,7 +88,7 @@ reg valid_q = 1'b0;
 reg valid_we = 1'b0;
 always @(posedge clk) 
 begin
-    if (reset)
+    if (reset | cache_reset)
     begin
         valid_bits <= 1024'd0;
     end
@@ -96,11 +98,12 @@ begin
         if (valid_we)
         begin
             valid_bits[valid_a] <= valid_d;
-            $display("%d: wrote valid bit", $time);
+            $display("%d: wrote valid bit l2", $time);
         end
     end
 end
 
+reg [31:0] addr_prev = 32'd0;
 
 always @(posedge clk) 
 begin
@@ -117,18 +120,19 @@ begin
         sdc_we_reg <= 1'b0;
         sdc_start_reg <= 1'b0;
 
+        addr_prev <= 32'd0;
+        
+        // Make sure the next cycle a new request can be detected!
         start_prev <= 1'b0;
-
-        state <= state_init;
+        state <= state_idle;
     end
     else
     begin
-
+        addr_prev <= l2_addr;
         start_prev <= l2_start;
         l2_done_reg <= 1'b0;
         cache_we <= 1'b0;
 
-        valid_a <= 10'd0;
         valid_d <= 1'b0;
         valid_we <= 1'b0;
         
@@ -147,7 +151,7 @@ begin
                 valid_a <= l2_addr[index_size-1:0];
                 if (l2_addr < 27'h800000)
                 begin
-                    if (l2_start && !start_prev)
+                    if ( (l2_start && !start_prev) || addr_prev >= 27'h800000 && l2_start)
                     begin
                         if (l2_we)
                         begin
@@ -158,12 +162,9 @@ begin
                             sdc_start_reg <= 1'b1;
                             sdc_data_reg <= l2_data;
 
-                            cache_we <= 1'b1;
                             cache_d <= {l2_addr[23:index_size], l2_data}; // tag + data
                             cache_addr <= l2_addr[index_size-1:0];
                             
-                            valid_d <= 1'b1;
-                            valid_we <= 1'b1;
                         end
                         else
                         begin
@@ -182,21 +183,22 @@ begin
             state_delay_cache:
             begin
                 state <= state_check_cache;
-                //valid_a <= cache_addr;
             end
 
-            state_writing: 
+            state_writing: // Currently disabled setting valid bit to 0. Otherwise causes crashes after a few seconds in L1D cache.
             begin
                 if (sdc_done)
                 begin
-                    state <= state_idle;
+                    state <= state_done_high;
 
                     sdc_addr_reg <= 24'd0;
                     sdc_we_reg <= 1'b0;
                     sdc_start_reg <= 1'b0;
                     sdc_data_reg <= 32'd0;
 
-                    cache_we <= 1'b0;
+                    cache_we <= 1'b1; // as long as valid_d <= 0, this does not matter
+                    valid_d <= 1'b1;
+                    valid_we <= 1'b1;
 
                     l2_done_reg <= 1'b1;
                 end
@@ -207,7 +209,7 @@ begin
                 // check cache. if hit, return cached item
                 if (valid_q && sdc_addr_reg[23:index_size] == cache_q[45:32]) // valid and tag check
                 begin
-                    state <= state_idle;
+                    state <= state_done_high;
 
                     l2_done_reg <= 1'b1;
                     l2_q_reg <= cache_q[31:0];
@@ -225,7 +227,7 @@ begin
             begin
                 if (sdc_done)
                 begin
-                    state <= state_idle;
+                    state <= state_done_high;
 
                     // we received item from ram, now write to cache and return
                     sdc_addr_reg <= 24'd0;
@@ -234,8 +236,6 @@ begin
                     cache_we <= 1'b1;
                     cache_d <= {sdc_addr_reg[23:index_size], sdc_q}; // tag + data
                     
-                    //valid_bits[cache_addr] <= 1'b1;
-                    valid_a <= cache_addr;
                     valid_d <= 1'b1;
                     valid_we <= 1'b1;
 
@@ -244,14 +244,18 @@ begin
                 end
             end
 
+            state_done_high:
+            begin
+                // keep done high for one cycle as we run on double clock speed from CPU
+                l2_done_reg <= 1'b1;
+                state <= state_idle;
+            end
+
         endcase
     end
 end
 
-
-
-
-
+// passthrough when above SDRAM memory range
 assign sdc_addr =   (l2_addr < 27'h800000) ? sdc_addr_reg   : l2_addr;
 assign sdc_data =   (l2_addr < 27'h800000) ? sdc_data_reg   : l2_data;
 assign sdc_we =     (l2_addr < 27'h800000) ? sdc_we_reg     : l2_we;

+ 20 - 72
Verilog/output/FPGC.gtkw

@@ -1,15 +1,15 @@
 [*]
 [*] GTKWave Analyzer v3.3.107 (w)1999-2020 BSI
-[*] Thu Aug 24 20:42:17 2023
+[*] Sun Aug 27 09:16:09 2023
 [*]
 [dumpfile] "/home/bart/Documents/FPGA/FPGC6/Verilog/output/wave.vcd"
-[dumpfile_mtime] "Thu Aug 24 20:17:33 2023"
-[dumpfile_size] 28102944
+[dumpfile_mtime] "Sun Aug 27 09:15:50 2023"
+[dumpfile_size] 30040315
 [savefile] "/home/bart/Documents/FPGA/FPGC6/Verilog/output/FPGC.gtkw"
-[timestart] 267700000
+[timestart] 310055000
 [size] 2560 1387
 [pos] -1 -1
-*-22.666576 315845000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+*-17.666576 310851000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 [treeopen] FPGC_tb.
 [treeopen] FPGC_tb.fpgc.
 [treeopen] FPGC_tb.fpgc.cpu.
@@ -17,7 +17,7 @@
 [sst_width] 227
 [signals_width] 713
 [sst_expanded] 1
-[sst_vpaned_height] 775
+[sst_vpaned_height] 985
 @28
 FPGC_tb.fpgc.clk
 @22
@@ -83,11 +83,14 @@ FPGC_tb.fpgc.cpu.sdc_done
 @200
 -
 -SDRAMcontroller
+@24
+FPGC_tb.fpgc.sdramcontroller.sdc_addr[23:0]
 @22
 FPGC_tb.fpgc.sdramcontroller.sdc_data[31:0]
 @28
 FPGC_tb.fpgc.sdramcontroller.sdc_we
 FPGC_tb.fpgc.sdramcontroller.sdc_start
+FPGC_tb.fpgc.sdramcontroller.sdc_done
 @24
 FPGC_tb.fpgc.sdramcontroller.state[4:0]
 @28
@@ -203,8 +206,9 @@ FPGC_tb.fpgc.cpu.interruptValid
 FPGC_tb.fpgc.cpu.instrMem.clk
 @200
 -
-@22
+@25
 FPGC_tb.fpgc.cpu.instrMem.addr[31:0]
+@22
 FPGC_tb.fpgc.cpu.instrMem.q[31:0]
 @28
 FPGC_tb.fpgc.cpu.instrMem.ignoreNext
@@ -231,7 +235,7 @@ FPGC_tb.fpgc.cpu.l1icache.valid_a[9:0]
 FPGC_tb.fpgc.cpu.l1icache.valid_d
 FPGC_tb.fpgc.cpu.l1icache.valid_q
 FPGC_tb.fpgc.cpu.l1icache.valid_we
-@a3
+@a2
 FPGC_tb.fpgc.cpu.l1icache.valid_bits[1023:0]
 @200
 -
@@ -247,6 +251,14 @@ FPGC_tb.fpgc.cpu.l1icache.state[2:0]
 FPGC_tb.fpgc.cpu.l1icache.addr_prev[31:0]
 @200
 -
+@22
+FPGC_tb.fpgc.cpu.l1icache.cache_addr[9:0]
+FPGC_tb.fpgc.cpu.l1icache.cache_d[45:0]
+@28
+FPGC_tb.fpgc.cpu.l1icache.cache_we
+@22
+FPGC_tb.fpgc.cpu.l1icache.cache_q[45:0]
+@200
 -
 -DataMem
 @22
@@ -297,73 +309,9 @@ FPGC_tb.fpgc.cpu.clearCache_MEM
 @200
 -
 -L2Cache
-@28
-FPGC_tb.fpgc.l2cache.clk
-FPGC_tb.fpgc.l2cache.reset
-@200
 -
-@24
-FPGC_tb.fpgc.l2cache.l2_addr[23:0]
-@28
-FPGC_tb.fpgc.l2cache.l2_start
-@22
-FPGC_tb.fpgc.l2cache.l2_data[31:0]
-@28
-FPGC_tb.fpgc.l2cache.l2_we
-@22
-FPGC_tb.fpgc.l2cache.l2_q[31:0]
-@28
-FPGC_tb.fpgc.l2cache.l2_done
-@200
 -
-@22
-FPGC_tb.fpgc.l2cache.sdc_addr[23:0]
-FPGC_tb.fpgc.l2cache.sdc_data[31:0]
-@28
-FPGC_tb.fpgc.l2cache.sdc_done
-@22
-FPGC_tb.fpgc.l2cache.sdc_q[31:0]
-@28
-FPGC_tb.fpgc.l2cache.sdc_start
-FPGC_tb.fpgc.l2cache.sdc_we
-@200
--
-@22
-FPGC_tb.fpgc.l2cache.sdc_addr_reg[23:0]
-FPGC_tb.fpgc.l2cache.sdc_data_reg[31:0]
-@28
-FPGC_tb.fpgc.l2cache.sdc_start_reg
-FPGC_tb.fpgc.l2cache.sdc_we_reg
-FPGC_tb.fpgc.l2cache.l2_done_reg
-@22
-FPGC_tb.fpgc.l2cache.l2_q_reg[31:0]
-@200
--
-@28
-FPGC_tb.fpgc.l2cache.l2_start
-FPGC_tb.fpgc.l2cache.start_prev
-@c00024
-FPGC_tb.fpgc.l2cache.state[2:0]
-@28
-(0)FPGC_tb.fpgc.l2cache.state[2:0]
-(1)FPGC_tb.fpgc.l2cache.state[2:0]
-(2)FPGC_tb.fpgc.l2cache.state[2:0]
-@1401200
--group_end
-@200
 -
-@22
-FPGC_tb.fpgc.l2cache.cache_reset
-FPGC_tb.fpgc.l2cache.cache_we
-@200
--
-@28
-FPGC_tb.fpgc.l2cache.valid_d
-FPGC_tb.fpgc.l2cache.valid_we
-FPGC_tb.fpgc.l2cache.valid_q
-@22
-FPGC_tb.fpgc.l2cache.valid_bits[1023:0]
-@200
 -
 [pattern_trace] 1
 [pattern_trace] 0

+ 4 - 2
Verilog/testbench/FPGC_tb.v

@@ -35,7 +35,8 @@
 `include "/home/bart/Documents/FPGA/FPGC6/Verilog/modules/Memory/ROM.v"
 `include "/home/bart/Documents/FPGA/FPGC6/Verilog/modules/Memory/MemoryUnit.v"
 `include "/home/bart/Documents/FPGA/FPGC6/Verilog/modules/Memory/L2cache.v"
-`include "/home/bart/Documents/FPGA/FPGC6/Verilog/modules/Memory/L1cache.v"
+`include "/home/bart/Documents/FPGA/FPGC6/Verilog/modules/Memory/L1Icache.v"
+`include "/home/bart/Documents/FPGA/FPGC6/Verilog/modules/Memory/L1Dcache.v"
 
 // io
 `include "/home/bart/Documents/FPGA/FPGC6/Verilog/modules/IO/Keyboard.v"
@@ -135,6 +136,7 @@ wire SPI1_clk;
 wire SPI1_cs;
 wire SPI1_mosi;
 wire SPI1_miso;
+assign SPI1_miso = 1'b1;
 wire SPI1_rst;
 reg  SPI1_nint;
 
@@ -330,7 +332,7 @@ begin
         #5 clk_SDRAM = ~clk_SDRAM; //100MHz
     end
 
-    repeat(2000)
+    repeat(20000)
     begin
         #5 clk_SDRAM = ~clk_SDRAM; clk = ~clk; //50MHz
         #5 clk_SDRAM = ~clk_SDRAM; //100MHz

Some files were not shown because too many files changed in this diff