CPU.v 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909
  1. /*
  2. * B32P CPU
  3. */
  4. /* Features:
  5. - 5 stage pipeline
  6. - fetch FE (1)
  7. - decode DE (2)
  8. - execute EX (3)
  9. - memory MEM (4)
  10. - write back WB (5)
  11. - Hazard detection:
  12. - flush
  13. - stall (MEM to reg)
  14. - forward
  15. - Extendable amount of interrupts
  16. - higher priority for lower interrupt numbers
  17. - Variable delay support from InstrMem and DataMem:
  18. - NOTE/BUG: the instruction after a READ or WRITE was skipped if there is a DataMem delay but no InstrMem delay
  19. This might still be a problem when caching is implemented
  20. */
  21. module CPU(
  22. input clk, reset,
  23. output [26:0] bus_addr,
  24. output [31:0] bus_data,
  25. output bus_we,
  26. output bus_start,
  27. input [31:0] bus_q,
  28. input bus_done,
  29. // sdram bus
  30. output [23:0] sdc_addr, // bus_addr
  31. output [31:0] sdc_data, // bus_data
  32. output sdc_we, // bus_we
  33. output sdc_start, // bus_start
  34. input [31:0] sdc_q, // bus_q
  35. input sdc_done, // bus_done
  36. input int1, int2, int3, int4, int5, int6, int7, int8, int9, int10,
  37. output [26:0] PC
  38. );
  39. parameter PCstart = 27'hC02522; // internal ROM addr 0 //27'hC02522;
  40. parameter PCincrease = 1'b1; // number of addresses to increase the PC with after each instruction
  41. parameter InterruptJumpAddr = 27'd1;
  42. /*
  43. * CPU BUS
  44. */
  45. wire [31:0] arbiter_q;
  46. wire [31:0] addr_a;
  47. wire [31:0] data_a;
  48. wire we_a;
  49. wire start_a;
  50. wire done_a;
  51. wire [31:0] addr_b;
  52. wire [31:0] data_b;
  53. wire we_b;
  54. wire start_b;
  55. wire done_b;
  56. wire [26:0] arbiter_bus_addr; // bus_addr
  57. wire [31:0] arbiter_bus_data; // bus_data
  58. wire arbiter_bus_we; // bus_we
  59. wire arbiter_bus_start; // bus_start
  60. wire [31:0] arbiter_bus_q; // bus_q
  61. wire arbiter_bus_done; // bus_done
  62. // bus splitter
  63. assign sdc_addr = (arbiter_bus_addr < 27'h800000) ? arbiter_bus_addr: 24'd0;
  64. assign sdc_data = (arbiter_bus_addr < 27'h800000) ? arbiter_bus_data: 32'd0;
  65. assign sdc_we = (arbiter_bus_addr < 27'h800000) ? arbiter_bus_we: 1'b0;
  66. assign sdc_start = (arbiter_bus_addr < 27'h800000) ? arbiter_bus_start: 1'b0;
  67. assign bus_addr = (arbiter_bus_addr < 27'h800000) ? 27'd0: arbiter_bus_addr;
  68. assign bus_data = (arbiter_bus_addr < 27'h800000) ? 32'd0: arbiter_bus_data;
  69. assign bus_we = (arbiter_bus_addr < 27'h800000) ? 1'b0: arbiter_bus_we;
  70. assign bus_start = (arbiter_bus_addr < 27'h800000) ? 1'b0: arbiter_bus_start;
  71. assign arbiter_bus_q = (arbiter_bus_addr < 27'h800000) ? sdc_q: bus_q;
  72. assign arbiter_bus_done = (arbiter_bus_addr < 27'h800000) ? sdc_done: bus_done;
  73. Arbiter arbiter (
  74. .clk(clk),
  75. .reset(reset),
  76. // port a (Instr)
  77. .addr_a(addr_a),
  78. .data_a(data_a),
  79. .we_a(we_a),
  80. .start_a(start_a),
  81. .done_a(done_a),
  82. // port b (Data)
  83. .addr_b(addr_b),
  84. .data_b(data_b),
  85. .we_b(we_b),
  86. .start_b(start_b),
  87. .done_b(done_b),
  88. // output (both ports)
  89. .q(arbiter_q),
  90. // bus
  91. .bus_addr(arbiter_bus_addr),
  92. .bus_data(arbiter_bus_data),
  93. .bus_we(arbiter_bus_we),
  94. .bus_start(arbiter_bus_start),
  95. .bus_q(arbiter_bus_q),
  96. .bus_done(arbiter_bus_done)
  97. );
  98. /*
  99. * Interrupts
  100. */
  101. reg intDisabled = 1'b0;
  102. wire intCPU;
  103. wire [7:0] intID;
  104. IntController intController(
  105. .clk(clk),
  106. .reset(reset),
  107. .int1(int1),
  108. .int2(int2),
  109. .int3(int3),
  110. .int4(int4),
  111. .int5(int5),
  112. .int6(int6),
  113. .int7(int7),
  114. .int8(int8),
  115. .int9(int9),
  116. .int10(int10),
  117. .intDisabled(intDisabled),
  118. .intCPU(intCPU),
  119. .intID(intID)
  120. );
  121. // Registers for flush, stall and forwarding
  122. reg flush_FE, flush_DE, flush_EX, flush_MEM, flush_WB;
  123. reg stall_FE, stall_DE, stall_EX, stall_MEM, stall_WB;
  124. reg [1:0] forward_a, forward_b;
  125. // Cache delays
  126. wire instr_hit_FE;
  127. wire datamem_busy_MEM;
  128. /*
  129. * FETCH (FE)
  130. */
  131. // Program Counter, start at ROM[0]
  132. reg [31:0] pc_FE = PCstart;
  133. reg [31:0] pc_FE_backup = 32'd0;
  134. wire [31:0] pc4_FE;
  135. assign pc4_FE = pc_FE + 1'b1;
  136. assign PC = pc_FE;
  137. wire [31:0] PC_backup_current;
  138. assign PC_backup_current = pc4_EX - PCincrease;
  139. // branch/jump/halt properly aligns interrupt with pipeline, as if it was a normal jump
  140. // this fixed all instability since the addition of caching (because this decreased the time to obtain instructions)
  141. assign interruptValid = (
  142. intCPU &&
  143. !intDisabled &&
  144. PC_backup_current < PCstart &&
  145. (
  146. branch_MEM || jumpr_MEM || jumpc_MEM || halt_MEM
  147. )
  148. );
  149. always @(posedge clk)
  150. begin
  151. if (reset)
  152. begin
  153. pc_FE <= PCstart;
  154. pc_FE_backup <= 32'd0;
  155. intDisabled <= 1'b0;
  156. end
  157. else
  158. begin
  159. // interrupt has highest priority
  160. if (interruptValid)
  161. begin
  162. intDisabled <= 1'b1;
  163. pc_FE_backup <= PC_backup_current;
  164. pc_FE <= InterruptJumpAddr;
  165. end
  166. else if (reti_MEM)
  167. begin
  168. intDisabled <= 1'b0;
  169. pc_FE <= pc_FE_backup;
  170. end
  171. // jump has priority over instruction cache stalls
  172. else if (jumpc_MEM || jumpr_MEM || halt_MEM || (branch_MEM && branch_passed_MEM))
  173. begin
  174. pc_FE <= jump_addr_MEM;
  175. end
  176. else if (stall_FE || (!instr_hit_FE) )
  177. begin
  178. pc_FE <= pc_FE;
  179. end
  180. else
  181. begin
  182. pc_FE <= pc4_FE;
  183. end
  184. end
  185. end
  186. //------------L1i Cache--------------
  187. //CPU bus
  188. wire [31:0] l1i_addr; // address to write or to start reading from
  189. wire [31:0] l1i_data; // data to write
  190. wire l1i_we; // write enable
  191. wire l1i_start; // start trigger
  192. wire [31:0] l1i_q; // memory output
  193. wire l1i_done; // output ready
  194. L1Icache l1icache(
  195. .clk (clk),
  196. .reset (reset),
  197. .cache_reset (clearCache_EX | clearCache_MEM),
  198. // CPU bus
  199. .l2_addr (l1i_addr),
  200. .l2_data (l1i_data),
  201. .l2_we (l1i_we),
  202. .l2_start (l1i_start),
  203. .l2_q (l1i_q),
  204. .l2_done (l1i_done),
  205. // sdram bus
  206. .sdc_addr (addr_a),
  207. .sdc_data (data_a),
  208. .sdc_we (we_a),
  209. .sdc_start (start_a),
  210. .sdc_q (arbiter_q),
  211. .sdc_done (done_a)
  212. );
  213. // Instruction Memory
  214. // should eventually become a memory with variable latency
  215. // writes directly to next stage
  216. wire [31:0] instr_DE;
  217. InstrMem instrMem(
  218. .clk(clk),
  219. .reset(reset),
  220. .addr(pc_FE),
  221. .q(instr_DE),
  222. .hit(instr_hit_FE),
  223. // bus
  224. .bus_addr(l1i_addr),
  225. .bus_data(l1i_data),
  226. .bus_we(l1i_we),
  227. .bus_start(l1i_start),
  228. .bus_q(l1i_q),
  229. .bus_done(l1i_done),
  230. .hold(stall_FE),
  231. .clear(flush_FE)
  232. );
  233. // Pass data from FE to DE
  234. wire [31:0] pc4_DE;
  235. Regr #(.N(32)) regr_pc4_FE_DE(
  236. .clk(clk),
  237. .hold(stall_FE),
  238. .clear(reset||flush_FE),
  239. .in(pc4_FE),
  240. .out(pc4_DE)
  241. );
  242. /*
  243. * DECODE (DE)
  244. */
  245. // Instruction Decoder
  246. wire [3:0] areg_DE, breg_DE, instrOP_DE;
  247. wire he_DE, oe_DE, sig_DE;
  248. InstructionDecoder instrDec_DE(
  249. .instr(instr_DE),
  250. .instrOP(instrOP_DE),
  251. .aluOP(),
  252. .constAlu(),
  253. .const16(),
  254. .const27(),
  255. .areg(areg_DE),
  256. .breg(breg_DE),
  257. .dreg(),
  258. .he(he_DE),
  259. .oe(oe_DE),
  260. .sig(sig_DE)
  261. );
  262. // Control Unit
  263. wire alu_use_const_DE;
  264. wire push_DE, pop_DE;
  265. wire dreg_we_DE;
  266. wire mem_write_DE, mem_read_DE;
  267. wire jumpc_DE, jumpr_DE, branch_DE, halt_DE, reti_DE, clearCache_DE;
  268. wire getIntID_DE, getPC_DE;
  269. ControlUnit controlUnit(
  270. // in
  271. .instrOP (instrOP_DE),
  272. .he (he_DE),
  273. // out
  274. .alu_use_const (alu_use_const_DE),
  275. .push (push_DE),
  276. .pop (pop_DE),
  277. .dreg_we (dreg_we_DE),
  278. .mem_write (mem_write_DE),
  279. .mem_read (mem_read_DE),
  280. .jumpc (jumpc_DE),
  281. .jumpr (jumpr_DE),
  282. .halt (halt_DE),
  283. .reti (reti_DE),
  284. .branch (branch_DE),
  285. .getIntID (getIntID_DE),
  286. .getPC (getPC_DE),
  287. .clearCache (clearCache_DE)
  288. );
  289. // Register Bank
  290. // writes directly to next stage
  291. wire [31:0] data_a_EX, data_b_EX;
  292. wire [3:0] dreg_WB;
  293. wire dreg_we_WB;
  294. reg [31:0] data_d_WB;
  295. Regbank regbank(
  296. .clk(clk),
  297. .reset(reset),
  298. .addr_a(areg_DE),
  299. .addr_b(breg_DE),
  300. .data_a(data_a_EX),
  301. .data_b(data_b_EX),
  302. // from WB stage
  303. .addr_d(dreg_WB),
  304. .data_d(data_d_WB),
  305. .we(dreg_we_WB),
  306. .hold(stall_DE),
  307. .clear(flush_DE)
  308. );
  309. // Pass data from DE to EX
  310. // Set to 0 during stall (bubble)
  311. wire [31:0] instr_EX;
  312. Regr #(.N(32)) regr_instr_DE_EX(
  313. .clk(clk),
  314. .hold(stall_DE),
  315. .clear(reset||flush_DE || stall_DE),
  316. .in(instr_DE),
  317. .out(instr_EX)
  318. );
  319. wire [31:0] pc4_EX;
  320. Regr #(.N(32)) regr_pc4_DE_EX(
  321. .clk(clk),
  322. .hold(stall_DE),
  323. .clear(reset||flush_DE),
  324. .in(pc4_DE),
  325. .out(pc4_EX)
  326. );
  327. // Set to 0 during stall (bubble)
  328. wire alu_use_const_EX;
  329. wire push_EX, pop_EX;
  330. wire dreg_we_EX;
  331. wire mem_write_EX, mem_read_EX;
  332. wire jumpc_EX, jumpr_EX, halt_EX, reti_EX, branch_EX, clearCache_EX;
  333. wire getIntID_EX, getPC_EX;
  334. Regr #(.N(14)) regr_cuflags_DE_EX(
  335. .clk (clk),
  336. .hold (stall_DE),
  337. .clear (reset||flush_DE || stall_DE),
  338. .in ({alu_use_const_DE, push_DE, pop_DE, dreg_we_DE, mem_write_DE, mem_read_DE, jumpc_DE, jumpr_DE, halt_DE, reti_DE, branch_DE, getIntID_DE, getPC_DE, clearCache_DE}),
  339. .out ({alu_use_const_EX, push_EX, pop_EX, dreg_we_EX, mem_write_EX, mem_read_EX, jumpc_EX, jumpr_EX, halt_EX, reti_EX, branch_EX, getIntID_EX, getPC_EX, clearCache_EX})
  340. );
  341. /*
  342. * EXECUTE (EX)
  343. */
  344. // Instruction Decoder
  345. wire [31:0] alu_const16_EX, alu_const16u_EX;
  346. wire [3:0] aluOP_EX;
  347. wire [3:0] areg_EX, breg_EX, dreg_EX;
  348. InstructionDecoder instrDec_EX(
  349. .instr(instr_EX),
  350. .instrOP(),
  351. .aluOP(aluOP_EX),
  352. .constAlu(alu_const16_EX),
  353. .constAluu(alu_const16u_EX),
  354. .const16(),
  355. .const27(),
  356. .areg(areg_EX),
  357. .breg(breg_EX),
  358. .dreg(dreg_EX),
  359. .he(),
  360. .oe(),
  361. .sig()
  362. );
  363. // ALU
  364. wire [31:0] alu_result_EX;
  365. // select constant or register for input b
  366. wire[31:0] alu_input_b_EX;
  367. assign alu_input_b_EX = (alu_use_const_EX && aluOP_EX[3:1] == 3'b110) ? alu_const16u_EX : // unsigned const for load(hi) instruction
  368. (alu_use_const_EX) ? alu_const16_EX :
  369. data_b_EX;
  370. // if forwarding, select forwarded data instead for input a of ALU
  371. reg [31:0] fw_data_a_EX;
  372. always @(*)
  373. begin
  374. case (forward_a)
  375. 2'd1: fw_data_a_EX <= alu_result_MEM;
  376. 2'd2: fw_data_a_EX <= data_d_WB;
  377. default: fw_data_a_EX <= data_a_EX;
  378. endcase
  379. end
  380. // if forwarding, select forwarded data instead for input b of ALU
  381. reg [31:0] fw_data_b_EX;
  382. always @(*)
  383. begin
  384. case (forward_b)
  385. 2'd1: fw_data_b_EX <= alu_result_MEM;
  386. 2'd2: fw_data_b_EX <= data_d_WB;
  387. default: fw_data_b_EX <= alu_input_b_EX;
  388. endcase
  389. end
  390. ALU alu(
  391. .opcode(aluOP_EX),
  392. .a(fw_data_a_EX),
  393. .b(fw_data_b_EX),
  394. .y(alu_result_EX)
  395. );
  396. // for special instructions, pass other data than alu result
  397. wire [31:0] execute_result_EX;
  398. assign execute_result_EX = (getPC_EX) ? pc4_EX - 1'b1:
  399. (getIntID_EX) ? intID:
  400. alu_result_EX;
  401. // Pass data from EX to MEM
  402. wire [31:0] instr_MEM;
  403. Regr #(.N(32)) regr_instr_EX_MEM(
  404. .clk(clk),
  405. .hold(stall_EX),
  406. .clear(reset||flush_EX),
  407. .in(instr_EX),
  408. .out(instr_MEM)
  409. );
  410. wire [31:0] data_a_MEM, data_b_MEM;
  411. Regr #(.N(64)) regr_regdata_EX_MEM(
  412. .clk(clk),
  413. .hold(stall_EX),
  414. .clear(reset||flush_EX),
  415. .in({fw_data_a_EX, fw_data_b_EX}), // forwarded data
  416. .out({data_a_MEM, data_b_MEM})
  417. );
  418. wire [31:0] pc4_MEM;
  419. Regr #(.N(32)) regr_pc4_EX_MEM(
  420. .clk(clk),
  421. .hold(stall_EX),
  422. .clear(reset||flush_EX),
  423. .in(pc4_EX),
  424. .out(pc4_MEM)
  425. );
  426. wire push_MEM, pop_MEM;
  427. wire dreg_we_MEM;
  428. wire mem_write_MEM, mem_read_MEM;
  429. wire jumpc_MEM, jumpr_MEM, halt_MEM, reti_MEM, branch_MEM, clearCache_MEM;
  430. Regr #(.N(11)) regr_cuflags_EX_MEM(
  431. .clk (clk),
  432. .hold (stall_EX),
  433. .clear (reset||flush_EX),
  434. .in ({push_EX, pop_EX, dreg_we_EX, mem_write_EX, mem_read_EX, jumpc_EX, jumpr_EX, halt_EX, reti_EX, branch_EX, clearCache_EX}),
  435. .out ({push_MEM, pop_MEM, dreg_we_MEM, mem_write_MEM, mem_read_MEM, jumpc_MEM, jumpr_MEM, halt_MEM, reti_MEM, branch_MEM, clearCache_MEM})
  436. );
  437. wire [31:0] alu_result_MEM;
  438. Regr #(.N(32)) regr_alu_result_EX_MEM(
  439. .clk(clk),
  440. .hold(stall_EX),
  441. .clear(reset||flush_EX),
  442. .in(execute_result_EX), // other data in case of special instructions
  443. .out(alu_result_MEM)
  444. );
  445. /*
  446. * MEMORY (MEM)
  447. */
  448. // Instruction Decoder
  449. wire [31:0] const16_MEM;
  450. wire [26:0] const27_MEM;
  451. wire [2:0] branchOP_MEM;
  452. wire oe_MEM, sig_MEM;
  453. wire [3:0] dreg_MEM;
  454. InstructionDecoder instrDec_MEM(
  455. .instr(instr_MEM),
  456. .instrOP(),
  457. .aluOP(),
  458. .branchOP(branchOP_MEM),
  459. .constAlu(),
  460. .const16(const16_MEM),
  461. .const27(const27_MEM),
  462. .areg(),
  463. .breg(),
  464. .dreg(dreg_MEM),
  465. .he(),
  466. .oe(oe_MEM),
  467. .sig(sig_MEM)
  468. );
  469. reg [31:0] jump_addr_MEM;
  470. always @(*)
  471. begin
  472. jump_addr_MEM <= 32'd0;
  473. if (jumpc_MEM)
  474. begin
  475. if (oe_MEM)
  476. begin
  477. // add sign extended to allow negative offsets
  478. jump_addr_MEM <= (pc4_MEM - 1'b1) + {{5{const27_MEM[26]}}, const27_MEM[26:0]};
  479. end
  480. else
  481. begin
  482. jump_addr_MEM <= {5'd0, const27_MEM};
  483. end
  484. end
  485. else if (jumpr_MEM)
  486. begin
  487. if (oe_MEM)
  488. begin
  489. jump_addr_MEM <= (pc4_MEM - 1'b1) + (data_b_MEM + const16_MEM);
  490. end
  491. else
  492. begin
  493. jump_addr_MEM <= data_b_MEM + const16_MEM;
  494. end
  495. end
  496. else if (branch_MEM)
  497. begin
  498. jump_addr_MEM <= (pc4_MEM - 1'b1) + const16_MEM;
  499. end
  500. else if (halt_MEM)
  501. begin
  502. // jump to same address to keep halting
  503. jump_addr_MEM <= pc4_MEM - 1'b1;
  504. end
  505. end
  506. // Opcodes
  507. localparam
  508. BRANCH_OP_BEQ = 3'b000, // A == B
  509. BRANCH_OP_BGT = 3'b001, // A > B
  510. BRANCH_OP_BGE = 3'b010, // A >= B
  511. BRANCH_OP_U1 = 3'b011, // Unimplemented 1
  512. BRANCH_OP_BNE = 3'b100, // A != B
  513. BRANCH_OP_BLT = 3'b101, // A < B
  514. BRANCH_OP_BLE = 3'b110, // A <= B
  515. BRANCH_OP_U2 = 3'b111; // Unimplemented 2
  516. reg branch_passed_MEM;
  517. always @(*)
  518. begin
  519. branch_passed_MEM <= 1'b0;
  520. case (branchOP_MEM)
  521. BRANCH_OP_BEQ:
  522. begin
  523. branch_passed_MEM <= (data_a_MEM == data_b_MEM);
  524. end
  525. BRANCH_OP_BGT:
  526. begin
  527. branch_passed_MEM <= (sig_MEM) ? ($signed(data_a_MEM) > $signed(data_b_MEM)) : (data_a_MEM > data_b_MEM);
  528. end
  529. BRANCH_OP_BGE:
  530. begin
  531. branch_passed_MEM <= (sig_MEM) ? ($signed(data_a_MEM) >= $signed(data_b_MEM)) : (data_a_MEM >= data_b_MEM);
  532. end
  533. BRANCH_OP_BNE:
  534. begin
  535. branch_passed_MEM <= (data_a_MEM != data_b_MEM);
  536. end
  537. BRANCH_OP_BLT:
  538. begin
  539. branch_passed_MEM <= (sig_MEM) ? ($signed(data_a_MEM) < $signed(data_b_MEM)) : (data_a_MEM < data_b_MEM);
  540. end
  541. BRANCH_OP_BLE:
  542. begin
  543. branch_passed_MEM <= (sig_MEM) ? ($signed(data_a_MEM) <= $signed(data_b_MEM)) : (data_a_MEM <= data_b_MEM);
  544. end
  545. endcase
  546. end
  547. //------------L1d Cache--------------
  548. //CPU bus
  549. wire [31:0] l1d_addr; // address to write or to start reading from
  550. wire [31:0] l1d_data; // data to write
  551. wire l1d_we; // write enable
  552. wire l1d_start; // start trigger
  553. wire [31:0] l1d_q; // memory output
  554. wire l1d_done; // output ready
  555. L1Dcache l1dcache(
  556. .clk (clk),
  557. .reset (reset),
  558. .cache_reset (clearCache_EX | clearCache_MEM),
  559. // CPU bus
  560. .l2_addr (l1d_addr),
  561. .l2_data (l1d_data),
  562. .l2_we (l1d_we),
  563. .l2_start (l1d_start),
  564. .l2_q (l1d_q),
  565. .l2_done (l1d_done),
  566. // sdram bus
  567. .sdc_addr (addr_b),
  568. .sdc_data (data_b),
  569. .sdc_we (we_b),
  570. .sdc_start (start_b),
  571. .sdc_q (arbiter_q),
  572. .sdc_done (done_b)
  573. );
  574. // Data Memory
  575. // should eventually become a memory with variable latency
  576. // writes directly to the next stage
  577. wire [31:0] dataMem_q_WB;
  578. wire [31:0] dataMem_addr_MEM;
  579. assign dataMem_addr_MEM = data_a_MEM + const16_MEM;
  580. DataMem dataMem(
  581. .clk(clk),
  582. .reset(reset),
  583. .addr(dataMem_addr_MEM),
  584. .we(mem_write_MEM),
  585. .re(mem_read_MEM),
  586. .data(data_b_MEM),
  587. .q(dataMem_q_WB),
  588. .busy(datamem_busy_MEM),
  589. // bus
  590. .bus_addr(l1d_addr),
  591. .bus_data(l1d_data),
  592. .bus_we(l1d_we),
  593. .bus_start(l1d_start),
  594. .bus_q(l1d_q),
  595. .bus_done(l1d_done),
  596. .hold(stall_MEM),
  597. .clear(flush_MEM)
  598. );
  599. // Stack
  600. // writes directly to the next stage
  601. wire [31:0] stack_q_WB;
  602. Stack stack(
  603. .clk(clk),
  604. .reset(reset),
  605. .q(stack_q_WB),
  606. .d(data_b_MEM),
  607. .push(push_MEM),
  608. .pop(pop_MEM),
  609. .hold(stall_MEM),
  610. .clear(flush_MEM)
  611. );
  612. // Pass data from MEM to WB
  613. wire [31:0] instr_WB;
  614. Regr #(.N(32)) regr_instr_MEM_WB(
  615. .clk(clk),
  616. .hold(stall_MEM),
  617. .clear(reset||flush_MEM),
  618. .in(instr_MEM),
  619. .out(instr_WB)
  620. );
  621. wire [31:0] alu_result_WB;
  622. Regr #(.N(32)) regr_alu_result_MEM_WB(
  623. .clk(clk),
  624. .hold(stall_MEM),
  625. .clear(reset||flush_MEM),
  626. .in(alu_result_MEM),
  627. .out(alu_result_WB)
  628. );
  629. wire [31:0] pc4_WB;
  630. Regr #(.N(32)) regr_pc4_MEM_WB(
  631. .clk(clk),
  632. .hold(stall_MEM),
  633. .clear(reset||flush_MEM),
  634. .in(pc4_MEM),
  635. .out(pc4_WB)
  636. );
  637. wire pop_WB, mem_read_WB;
  638. //wire dreg_we_WB;
  639. Regr #(.N(3)) regr_cuflags_MEM_WB(
  640. .clk (clk),
  641. .hold (stall_MEM),
  642. .clear (reset||flush_MEM),
  643. .in ({pop_MEM, dreg_we_MEM, mem_read_MEM}),
  644. .out ({pop_WB, dreg_we_WB, mem_read_WB})
  645. );
  646. /*
  647. * WRITE BACK (WB)
  648. */
  649. wire [15:0] const16u_WB;
  650. InstructionDecoder instrDec_WB(
  651. .instr(instr_WB),
  652. .instrOP(),
  653. .aluOP(),
  654. .constAlu(),
  655. .const16(),
  656. .const16u(const16u_WB),
  657. .const27(),
  658. .areg(),
  659. .breg(),
  660. .dreg(dreg_WB),
  661. .he(),
  662. .oe(),
  663. .sig()
  664. );
  665. always @(*)
  666. begin
  667. case (1'b1)
  668. pop_WB:
  669. begin
  670. data_d_WB <= stack_q_WB;
  671. end
  672. mem_read_WB:
  673. begin
  674. data_d_WB <= dataMem_q_WB;
  675. end
  676. default: // (ALU, savPC, IntID)
  677. begin
  678. data_d_WB <= alu_result_WB;
  679. end
  680. endcase
  681. end
  682. /*
  683. * FLUSH
  684. */
  685. always @(*)
  686. begin
  687. flush_FE <= 1'b0;
  688. flush_DE <= 1'b0;
  689. flush_EX <= 1'b0;
  690. flush_MEM <= 1'b0;
  691. flush_WB <= 1'b0;
  692. // flush on jumps or interrupts
  693. if (jumpc_MEM || jumpr_MEM || halt_MEM || (branch_MEM && branch_passed_MEM) || reti_MEM || interruptValid)
  694. begin
  695. flush_FE <= 1'b1;
  696. flush_DE <= 1'b1;
  697. flush_EX <= 1'b1;
  698. end
  699. // flush MEM when busy, causing a bubble
  700. if ((mem_read_MEM || mem_write_MEM) && datamem_busy_MEM)
  701. begin
  702. flush_MEM <= 1'b1;
  703. end
  704. end
  705. /*
  706. * STALL
  707. */
  708. always @(*)
  709. begin
  710. stall_FE <= 1'b0;
  711. stall_DE <= 1'b0;
  712. stall_EX <= 1'b0;
  713. stall_MEM <= 1'b0;
  714. stall_WB <= 1'b0;
  715. // stall if an instruction in EX uses the result of a some operation in MEM (dreg_mem)
  716. if ((mem_read_EX || pop_EX) && ( (dreg_EX == areg_DE) || (dreg_EX == breg_DE)) )
  717. begin
  718. stall_FE <= 1'b1;
  719. stall_DE <= 1'b1;
  720. end
  721. // stall if read or write in data MEM causes the busy flag to be set
  722. if ((mem_read_MEM || mem_write_MEM) && datamem_busy_MEM)
  723. begin
  724. stall_FE <= 1'b1;
  725. stall_DE <= 1'b1;
  726. stall_EX <= 1'b1;
  727. end
  728. end
  729. /*
  730. * FORWARDING
  731. */
  732. // MEM (4) -> EX (3)
  733. // WB (5) -> EX (3)
  734. always @(*)
  735. begin
  736. // input a of ALU
  737. forward_a <= 2'd0; // default to no forwarding
  738. if (dreg_we_MEM && (dreg_MEM == areg_EX) && (areg_EX != 4'd0))
  739. begin
  740. forward_a <= 2'd1; // priority 1: forward from MEM to EX
  741. end
  742. else if (dreg_we_WB && (dreg_WB == areg_EX) && (areg_EX != 4'd0))
  743. begin
  744. forward_a <= 2'd2; // priority 2: forward from WB to EX
  745. end
  746. // input b of ALU
  747. forward_b <= 2'd0; // default to no forwarding
  748. if (dreg_we_MEM && (dreg_MEM == breg_EX) && (breg_EX != 4'd0))
  749. begin
  750. forward_b <= 2'd1; // priority 1: forward from MEM to EX
  751. end
  752. else if (dreg_we_WB && (dreg_WB == breg_EX) && (breg_EX != 4'd0))
  753. begin
  754. forward_b <= 2'd2; // priority 2: forward from WB to EX
  755. end
  756. end
  757. endmodule