فهرست منبع

New arbiter that further improves performance by removing latency for instruction memory access. Created instability again, which I found to be caused by bad interrupt timings. Fixed by changing the interruptValid wire to use branch_MEM for alignment. I do not know why this fixed all problems as it is very hard to simulate this issue. L1 cache should be possible again!

bart 1 سال پیش
والد
کامیت
88681ec5d0

+ 59 - 43
Quartus/modules/CPU/Arbiter.v

@@ -1,7 +1,9 @@
 /*
 * Arbiter
 * Regulates access to the CPU memory bus from both Instruction and Data memory
-* In case of two requests at the same time, Data memory is granted first
+* Port a (instruction memory) will directly access the bus (no latency)
+* When port b (data memory) requests an access, it will stop port a when it is finished and give access to port b
+* This will give port b quite some latency, but as port b requests are more rare, this should be okay (and can be reduced by l1d cache)
 */
 
 module Arbiter(
@@ -26,67 +28,81 @@ module Arbiter(
     output [31:0] q,
 
     // bus
-    output reg [26:0] bus_addr = 27'd0,
-    output reg [31:0] bus_data = 32'd0,
-    output reg       bus_we = 1'b0,
-    output       bus_start,
+    output [26:0] bus_addr,
+    output [31:0] bus_data,
+    output        bus_we ,
+    output        bus_start,
     input [31:0]  bus_q,
     input         bus_done
 );
 
 assign q = bus_q;
 
-assign done_a = busy_a && !busy_b && bus_done;
-assign done_b = busy_b && bus_done;
+assign bus_addr     = (!port_b_access) ? addr_a   : bus_addr_reg;
+assign bus_data     = (!port_b_access) ? data_a   : bus_data_reg;
+assign bus_we       = (!port_b_access) ? we_a     : bus_we_reg;
+assign bus_start    = (!port_b_access) ? start_a  : (bus_start_reg && !bus_done);
 
-reg busy_a = 1'b0;
-reg busy_b = 1'b0;
+assign done_a       = (!port_b_access) && bus_done;
+assign done_b       = (state == state_wait_b_done) && bus_done;
 
-reg bus_start_reg = 1'b0;
-assign bus_start = (bus_start_reg) && (!bus_done);
+
+reg port_b_access = 1'b0;
+
+reg [26:0] bus_addr_reg = 27'd0;
+reg [31:0] bus_data_reg = 32'd0;
+reg bus_we_reg          = 1'b0;
+reg bus_start_reg       = 1'b0;
+
+// state machine
+reg [2:0] state = 3'd0; // 0-7 states limit
+parameter state_idle            = 3'd0;
+parameter state_wait_b_done     = 3'd1;
 
 always @(posedge clk) 
 begin
     if (reset)
     begin
-        bus_start_reg <= 1'b0;
-        bus_addr <= 27'd0;
-        bus_data <= 32'd0;
-        bus_we <= 1'b0;
-        busy_b <= 1'b0;
-        busy_a <= 1'b0;
+        port_b_access   <= 1'b0;
+        
+        bus_addr_reg    <= 27'd0;
+        bus_data_reg    <= 32'd0;
+        bus_we_reg      <= 1'b0;
+        bus_start_reg   <= 1'b0;
+
+        state           <= state_idle;
     end
     else
     begin
-        if (start_b && (!busy_a || bus_done))
-        begin
-            bus_start_reg <= 1'b1;
-            bus_addr <= addr_b;
-            bus_data <= data_b;
-            bus_we <= we_b;
-            busy_b <= 1'b1;
-        end
-        else if (start_a && (!busy_b || bus_done))
-        begin
-            bus_start_reg <= 1'b1;
-            bus_addr <= addr_a;
-            bus_data <= data_a;
-            bus_we <= we_a;
-            busy_a <= 1'b1;
-        end
-        
-        if (bus_done)
-        begin
-            if (!busy_b)
+        case(state)
+            state_idle: 
             begin
-                busy_a <= 1'b0;
-            end
+                // if port b is requested and port a is just finished
+                if (!start_a && bus_done && start_b)
+                begin
+                    // give access to port b before a starts a new request
+                    port_b_access   <= 1'b1;
+
+                    bus_addr_reg    <= addr_b;
+                    bus_data_reg    <= data_b;
+                    bus_we_reg      <= we_b;
+                    bus_start_reg   <= 1'b1;
 
-            busy_b <= 1'b0;
-            bus_start_reg <= 1'b0;
-        end
+                    state <= state_wait_b_done;
+                end
+                
+            end
+            state_wait_b_done:
+            begin
+                if (bus_done)
+                begin
+                    // return access to port a
+                    state           <= state_idle;
+                    port_b_access   <= 1'b0;
+                end
+            end
+        endcase
     end
 end
 
-
 endmodule

+ 7 - 3
Quartus/modules/CPU/CPU.v

@@ -174,9 +174,13 @@ assign PC = pc_FE;
 wire [31:0] PC_backup_current;
 assign PC_backup_current = pc4_EX - PCincrease;
 
-wire interruptValid;
-// instr_hit_FE to align with the pipeline
-assign interruptValid = (intCPU && !intDisabled && instr_hit_FE && PC_backup_current < PCstart);
+// branch_MEM (for some reason) aligns interrupt with pipeline, removing all instability since the addition of caching
+assign interruptValid = (
+    intCPU && 
+    !intDisabled && 
+    PC_backup_current < PCstart && 
+    branch_MEM
+);
 
 always @(posedge clk) 
 begin

BIN
Quartus/output_files/output_file.jic


BIN
Verilog/memory/code.bin


تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 0 - 0
Verilog/memory/spi.txt


+ 59 - 43
Verilog/modules/CPU/Arbiter.v

@@ -1,7 +1,9 @@
 /*
 * Arbiter
 * Regulates access to the CPU memory bus from both Instruction and Data memory
-* In case of two requests at the same time, Data memory is granted first
+* Port a (instruction memory) will directly access the bus (no latency)
+* When port b (data memory) requests an access, it will stop port a when it is finished and give access to port b
+* This will give port b quite some latency, but as port b requests are more rare, this should be okay (and can be reduced by l1d cache)
 */
 
 module Arbiter(
@@ -26,67 +28,81 @@ module Arbiter(
     output [31:0] q,
 
     // bus
-    output reg [26:0] bus_addr = 27'd0,
-    output reg [31:0] bus_data = 32'd0,
-    output reg       bus_we = 1'b0,
-    output       bus_start,
+    output [26:0] bus_addr,
+    output [31:0] bus_data,
+    output        bus_we ,
+    output        bus_start,
     input [31:0]  bus_q,
     input         bus_done
 );
 
 assign q = bus_q;
 
-assign done_a = busy_a && !busy_b && bus_done;
-assign done_b = busy_b && bus_done;
+assign bus_addr     = (!port_b_access) ? addr_a   : bus_addr_reg;
+assign bus_data     = (!port_b_access) ? data_a   : bus_data_reg;
+assign bus_we       = (!port_b_access) ? we_a     : bus_we_reg;
+assign bus_start    = (!port_b_access) ? start_a  : (bus_start_reg && !bus_done);
 
-reg busy_a = 1'b0;
-reg busy_b = 1'b0;
+assign done_a       = (!port_b_access) && bus_done;
+assign done_b       = (state == state_wait_b_done) && bus_done;
 
-reg bus_start_reg = 1'b0;
-assign bus_start = (bus_start_reg) && (!bus_done);
+
+reg port_b_access = 1'b0;
+
+reg [26:0] bus_addr_reg = 27'd0;
+reg [31:0] bus_data_reg = 32'd0;
+reg bus_we_reg          = 1'b0;
+reg bus_start_reg       = 1'b0;
+
+// state machine
+reg [2:0] state = 3'd0; // 0-7 states limit
+parameter state_idle            = 3'd0;
+parameter state_wait_b_done     = 3'd1;
 
 always @(posedge clk) 
 begin
     if (reset)
     begin
-        bus_start_reg <= 1'b0;
-        bus_addr <= 27'd0;
-        bus_data <= 32'd0;
-        bus_we <= 1'b0;
-        busy_b <= 1'b0;
-        busy_a <= 1'b0;
+        port_b_access   <= 1'b0;
+        
+        bus_addr_reg    <= 27'd0;
+        bus_data_reg    <= 32'd0;
+        bus_we_reg      <= 1'b0;
+        bus_start_reg   <= 1'b0;
+
+        state           <= state_idle;
     end
     else
     begin
-        if (start_b && (!busy_a || bus_done))
-        begin
-            bus_start_reg <= 1'b1;
-            bus_addr <= addr_b;
-            bus_data <= data_b;
-            bus_we <= we_b;
-            busy_b <= 1'b1;
-        end
-        else if (start_a && (!busy_b || bus_done))
-        begin
-            bus_start_reg <= 1'b1;
-            bus_addr <= addr_a;
-            bus_data <= data_a;
-            bus_we <= we_a;
-            busy_a <= 1'b1;
-        end
-        
-        if (bus_done)
-        begin
-            if (!busy_b)
+        case(state)
+            state_idle: 
             begin
-                busy_a <= 1'b0;
-            end
+                // if port b is requested and port a is just finished
+                if (!start_a && bus_done && start_b)
+                begin
+                    // give access to port b before a starts a new request
+                    port_b_access   <= 1'b1;
+
+                    bus_addr_reg    <= addr_b;
+                    bus_data_reg    <= data_b;
+                    bus_we_reg      <= we_b;
+                    bus_start_reg   <= 1'b1;
 
-            busy_b <= 1'b0;
-            bus_start_reg <= 1'b0;
-        end
+                    state <= state_wait_b_done;
+                end
+                
+            end
+            state_wait_b_done:
+            begin
+                if (bus_done)
+                begin
+                    // return access to port a
+                    state           <= state_idle;
+                    port_b_access   <= 1'b0;
+                end
+            end
+        endcase
     end
 end
 
-
 endmodule

+ 7 - 3
Verilog/modules/CPU/CPU.v

@@ -174,9 +174,13 @@ assign PC = pc_FE;
 wire [31:0] PC_backup_current;
 assign PC_backup_current = pc4_EX - PCincrease;
 
-wire interruptValid;
-// instr_hit_FE to align with the pipeline
-assign interruptValid = (intCPU && !intDisabled && instr_hit_FE && PC_backup_current < PCstart);
+// branch_MEM (for some reason) aligns interrupt with pipeline, removing all instability since the addition of caching
+assign interruptValid = (
+    intCPU && 
+    !intDisabled && 
+    PC_backup_current < PCstart && 
+    branch_MEM
+);
 
 always @(posedge clk) 
 begin

+ 37 - 53
Verilog/output/FPGC.gtkw

@@ -1,15 +1,15 @@
 [*]
 [*] GTKWave Analyzer v3.3.107 (w)1999-2020 BSI
-[*] Sun Aug 27 09:16:09 2023
+[*] Sun Aug 27 13:38:25 2023
 [*]
 [dumpfile] "/home/bart/Documents/FPGA/FPGC6/Verilog/output/wave.vcd"
-[dumpfile_mtime] "Sun Aug 27 09:15:50 2023"
-[dumpfile_size] 30040315
+[dumpfile_mtime] "Sun Aug 27 13:36:03 2023"
+[dumpfile_size] 16357434
 [savefile] "/home/bart/Documents/FPGA/FPGC6/Verilog/output/FPGC.gtkw"
-[timestart] 310055000
+[timestart] 204900
 [size] 2560 1387
 [pos] -1 -1
-*-17.666576 310851000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+*-16.666576 525000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 [treeopen] FPGC_tb.
 [treeopen] FPGC_tb.fpgc.
 [treeopen] FPGC_tb.fpgc.cpu.
@@ -26,26 +26,51 @@ FPGC_tb.fpgc.cpu.PC[26:0]
 -
 -
 -Arbiter
+-
 @22
 FPGC_tb.fpgc.cpu.arbiter.addr_a[31:0]
 FPGC_tb.fpgc.cpu.arbiter.data_a[31:0]
 @28
-FPGC_tb.fpgc.cpu.arbiter.done_a
-FPGC_tb.fpgc.cpu.arbiter.start_a
 FPGC_tb.fpgc.cpu.arbiter.we_a
-FPGC_tb.fpgc.cpu.arbiter.busy_a
+FPGC_tb.fpgc.cpu.arbiter.start_a
+FPGC_tb.fpgc.cpu.arbiter.done_a
+@22
+FPGC_tb.fpgc.cpu.arbiter.q[31:0]
 @200
 -
+-
 @22
 FPGC_tb.fpgc.cpu.arbiter.addr_b[31:0]
 FPGC_tb.fpgc.cpu.arbiter.data_b[31:0]
 @28
-FPGC_tb.fpgc.cpu.arbiter.done_b
-FPGC_tb.fpgc.cpu.arbiter.start_b
 FPGC_tb.fpgc.cpu.arbiter.we_b
-FPGC_tb.fpgc.cpu.arbiter.busy_b
+FPGC_tb.fpgc.cpu.arbiter.start_b
+FPGC_tb.fpgc.cpu.arbiter.done_b
+@22
+FPGC_tb.fpgc.cpu.arbiter.q[31:0]
 @200
 -
+@201
+-
+@22
+FPGC_tb.fpgc.cpu.arbiter.bus_addr[26:0]
+FPGC_tb.fpgc.cpu.arbiter.bus_data[31:0]
+@28
+FPGC_tb.fpgc.cpu.arbiter.bus_we
+FPGC_tb.fpgc.cpu.arbiter.bus_start
+FPGC_tb.fpgc.cpu.arbiter.bus_done
+@200
+-
+-
+@24
+FPGC_tb.fpgc.cpu.arbiter.state[2:0]
+@200
+-
+-
+-
+-
+-
+-
 @22
 FPGC_tb.fpgc.cpu.arbiter_bus_addr[26:0]
 FPGC_tb.fpgc.cpu.arbiter_bus_data[31:0]
@@ -206,7 +231,7 @@ FPGC_tb.fpgc.cpu.interruptValid
 FPGC_tb.fpgc.cpu.instrMem.clk
 @200
 -
-@25
+@24
 FPGC_tb.fpgc.cpu.instrMem.addr[31:0]
 @22
 FPGC_tb.fpgc.cpu.instrMem.q[31:0]
@@ -229,15 +254,6 @@ FPGC_tb.fpgc.cpu.instrMem.bus_start
 FPGC_tb.fpgc.cpu.instrMem.bus_we
 @200
 -
-@24
-FPGC_tb.fpgc.cpu.l1icache.valid_a[9:0]
-@28
-FPGC_tb.fpgc.cpu.l1icache.valid_d
-FPGC_tb.fpgc.cpu.l1icache.valid_q
-FPGC_tb.fpgc.cpu.l1icache.valid_we
-@a2
-FPGC_tb.fpgc.cpu.l1icache.valid_bits[1023:0]
-@200
 -
 @28
 FPGC_tb.fpgc.cpu.clearCache_DE
@@ -245,20 +261,7 @@ FPGC_tb.fpgc.cpu.clearCache_EX
 FPGC_tb.fpgc.cpu.clearCache_MEM
 @200
 -
-@24
-FPGC_tb.fpgc.cpu.l1icache.state[2:0]
-@22
-FPGC_tb.fpgc.cpu.l1icache.addr_prev[31:0]
-@200
 -
-@22
-FPGC_tb.fpgc.cpu.l1icache.cache_addr[9:0]
-FPGC_tb.fpgc.cpu.l1icache.cache_d[45:0]
-@28
-FPGC_tb.fpgc.cpu.l1icache.cache_we
-@22
-FPGC_tb.fpgc.cpu.l1icache.cache_q[45:0]
-@200
 -
 -DataMem
 @22
@@ -280,27 +283,8 @@ FPGC_tb.fpgc.cpu.dataMem.qreg[31:0]
 FPGC_tb.fpgc.cpu.dataMem.q[31:0]
 @200
 -
-@22
-FPGC_tb.fpgc.cpu.l1dcache.cache_addr[9:0]
-FPGC_tb.fpgc.cpu.l1dcache.cache_d[45:0]
-FPGC_tb.fpgc.cpu.l1dcache.cache_q[45:0]
-@28
-FPGC_tb.fpgc.cpu.l1dcache.cache_we
-@200
 -
-@22
-FPGC_tb.fpgc.cpu.l1dcache.valid_a[9:0]
-@28
-FPGC_tb.fpgc.cpu.l1dcache.valid_q
-FPGC_tb.fpgc.cpu.l1dcache.valid_d
-FPGC_tb.fpgc.cpu.l1dcache.valid_we
-@a2
-FPGC_tb.fpgc.cpu.l1dcache.valid_bits[1023:0]
-@200
 -
-@24
-FPGC_tb.fpgc.cpu.l1dcache.state[2:0]
-@200
 -
 @28
 FPGC_tb.fpgc.cpu.clearCache_DE

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است