CPU.v 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910
  1. /*
  2. * B32P CPU
  3. */
  4. /* Features:
  5. - 5 stage pipeline
  6. - fetch FE (1)
  7. - decode DE (2)
  8. - execute EX (3)
  9. - memory MEM (4)
  10. - write back WB (5)
  11. - Hazard detection:
  12. - flush
  13. - stall (MEM to reg)
  14. - forward
  15. - Extendable amount of interrupts
  16. - higher priority for lower interrupt numbers
  17. - Variable delay support from InstrMem and DataMem:
  18. - NOTE/BUG: the instruction after a READ or WRITE was skipped if there is a DataMem delay but no InstrMem delay
  19. This might still be a problem when caching is implemented
  20. */
  21. module CPU(
  22. input clk, reset,
  23. output [26:0] bus_addr,
  24. output [31:0] bus_data,
  25. output bus_we,
  26. output bus_start,
  27. input [31:0] bus_q,
  28. input bus_done,
  29. // sdram bus
  30. output [23:0] sdc_addr, // bus_addr
  31. output [31:0] sdc_data, // bus_data
  32. output sdc_we, // bus_we
  33. output sdc_start, // bus_start
  34. input [31:0] sdc_q, // bus_q
  35. input sdc_done, // bus_done
  36. input int1, int2, int3, int4, int5, int6, int7, int8, int9, int10,
  37. output [26:0] PC
  38. );
  39. parameter PCstart = 27'hC02522; // internal ROM addr 0 //27'hC02522;
  40. parameter PCincrease = 1'b1; // number of addresses to increase the PC with after each instruction
  41. parameter InterruptJumpAddr = 27'd1;
  42. /*
  43. * CPU BUS
  44. */
  45. wire [31:0] arbiter_q;
  46. wire [31:0] addr_a;
  47. wire [31:0] data_a;
  48. wire we_a;
  49. wire start_a;
  50. wire done_a;
  51. wire [31:0] addr_b;
  52. wire [31:0] data_b;
  53. wire we_b;
  54. wire start_b;
  55. wire done_b;
  56. wire [26:0] arbiter_bus_addr; // bus_addr
  57. wire [31:0] arbiter_bus_data; // bus_data
  58. wire arbiter_bus_we; // bus_we
  59. wire arbiter_bus_start; // bus_start
  60. wire [31:0] arbiter_bus_q; // bus_q
  61. wire arbiter_bus_done; // bus_done
  62. // bus splitter
  63. assign sdc_addr = (arbiter_bus_addr < 27'h800000) ? arbiter_bus_addr: 24'd0;
  64. assign sdc_data = (arbiter_bus_addr < 27'h800000) ? arbiter_bus_data: 32'd0;
  65. assign sdc_we = (arbiter_bus_addr < 27'h800000) ? arbiter_bus_we: 1'b0;
  66. assign sdc_start = (arbiter_bus_addr < 27'h800000) ? arbiter_bus_start: 1'b0;
  67. assign bus_addr = (arbiter_bus_addr < 27'h800000) ? 27'd0: arbiter_bus_addr;
  68. assign bus_data = (arbiter_bus_addr < 27'h800000) ? 32'd0: arbiter_bus_data;
  69. assign bus_we = (arbiter_bus_addr < 27'h800000) ? 1'b0: arbiter_bus_we;
  70. assign bus_start = (arbiter_bus_addr < 27'h800000) ? 1'b0: arbiter_bus_start;
  71. assign arbiter_bus_q = (arbiter_bus_addr < 27'h800000) ? sdc_q: bus_q;
  72. assign arbiter_bus_done = (arbiter_bus_addr < 27'h800000) ? sdc_done: bus_done;
  73. Arbiter arbiter (
  74. .clk(clk),
  75. .reset(reset),
  76. // port a (Instr)
  77. .addr_a(addr_a),
  78. .data_a(data_a),
  79. .we_a(we_a),
  80. .start_a(start_a),
  81. .done_a(done_a),
  82. // port b (Data)
  83. .addr_b(addr_b),
  84. .data_b(data_b),
  85. .we_b(we_b),
  86. .start_b(start_b),
  87. .done_b(done_b),
  88. // output (both ports)
  89. .q(arbiter_q),
  90. // bus
  91. .bus_addr(arbiter_bus_addr),
  92. .bus_data(arbiter_bus_data),
  93. .bus_we(arbiter_bus_we),
  94. .bus_start(arbiter_bus_start),
  95. .bus_q(arbiter_bus_q),
  96. .bus_done(arbiter_bus_done)
  97. );
  98. /*
  99. * Interrupts
  100. */
  101. reg intDisabled = 1'b0;
  102. wire intCPU;
  103. wire [7:0] intID;
  104. IntController intController(
  105. .clk(clk),
  106. .reset(reset),
  107. .int1(int1),
  108. .int2(int2),
  109. .int3(int3),
  110. .int4(int4),
  111. .int5(int5),
  112. .int6(int6),
  113. .int7(int7),
  114. .int8(int8),
  115. .int9(int9),
  116. .int10(int10),
  117. .intDisabled(intDisabled),
  118. .intCPU(intCPU),
  119. .intID(intID)
  120. );
  121. // Registers for flush, stall and forwarding
  122. reg flush_FE, flush_DE, flush_EX, flush_MEM, flush_WB;
  123. reg stall_FE, stall_DE, stall_EX, stall_MEM, stall_WB;
  124. reg [1:0] forward_a, forward_b;
  125. // Cache delays
  126. wire instr_hit_FE;
  127. wire datamem_busy_MEM;
  128. /*
  129. * FETCH (FE)
  130. */
  131. // Program Counter, start at ROM[0]
  132. reg [31:0] pc_FE = PCstart;
  133. reg [31:0] pc_FE_backup = 32'd0;
  134. wire [31:0] pc4_FE;
  135. assign pc4_FE = pc_FE + 1'b1;
  136. assign PC = pc_FE;
  137. wire [31:0] PC_backup_current;
  138. assign PC_backup_current = pc4_EX - PCincrease;
  139. // branch/jump/halt properly aligns interrupt with pipeline, as if it was a normal jump
  140. // this fixed all instability since the addition of caching (because this decreased the time to obtain instructions)
  141. assign interruptValid = (
  142. intCPU &&
  143. !intDisabled &&
  144. PC_backup_current < PCstart &&
  145. (
  146. branch_MEM || jumpr_MEM || jumpc_MEM || halt_MEM
  147. )
  148. );
  149. always @(posedge clk)
  150. begin
  151. if (reset)
  152. begin
  153. pc_FE <= PCstart;
  154. pc_FE_backup <= 32'd0;
  155. intDisabled <= 1'b0;
  156. end
  157. else
  158. begin
  159. // interrupt has highest priority
  160. if (interruptValid)
  161. begin
  162. intDisabled <= 1'b1;
  163. pc_FE_backup <= PC_backup_current;
  164. pc_FE <= InterruptJumpAddr;
  165. end
  166. else if (reti_MEM)
  167. begin
  168. intDisabled <= 1'b0;
  169. pc_FE <= pc_FE_backup;
  170. end
  171. // jump has priority over instruction cache stalls
  172. else if (jumpc_MEM || jumpr_MEM || halt_MEM || (branch_MEM && branch_passed_MEM))
  173. begin
  174. pc_FE <= jump_addr_MEM;
  175. end
  176. else if (stall_FE || (!instr_hit_FE) )
  177. begin
  178. pc_FE <= pc_FE;
  179. end
  180. else
  181. begin
  182. pc_FE <= pc4_FE;
  183. end
  184. end
  185. end
  186. //------------L1i Cache--------------
  187. //CPU bus
  188. wire [31:0] l1i_addr; // address to write or to start reading from
  189. wire [31:0] l1i_data; // data to write
  190. wire l1i_we; // write enable
  191. wire l1i_start; // start trigger
  192. wire [31:0] l1i_q; // memory output
  193. wire l1i_done; // output ready
  194. L1Icache l1icache(
  195. .clk (clk),
  196. .reset (reset),
  197. .cache_reset (clearCache_EX | clearCache_MEM),
  198. // CPU bus
  199. .l2_addr (l1i_addr),
  200. .l2_data (l1i_data),
  201. .l2_we (l1i_we),
  202. .l2_start (l1i_start),
  203. .l2_q (l1i_q),
  204. .l2_done (l1i_done),
  205. // sdram bus
  206. .sdc_addr (addr_a),
  207. .sdc_data (data_a),
  208. .sdc_we (we_a),
  209. .sdc_start (start_a),
  210. .sdc_q (arbiter_q),
  211. .sdc_done (done_a)
  212. );
  213. // Instruction Memory
  214. // should eventually become a memory with variable latency
  215. // writes directly to next stage
  216. wire [31:0] instr_DE;
  217. InstrMem instrMem(
  218. .clk(clk),
  219. .reset(reset),
  220. .addr(pc_FE),
  221. .q(instr_DE),
  222. .hit(instr_hit_FE),
  223. // bus
  224. .bus_addr(l1i_addr),
  225. .bus_data(l1i_data),
  226. .bus_we(l1i_we),
  227. .bus_start(l1i_start),
  228. .bus_q(l1i_q),
  229. .bus_done(l1i_done),
  230. .hold(stall_FE),
  231. .clear(flush_FE)
  232. );
  233. // Pass data from FE to DE
  234. wire [31:0] pc4_DE;
  235. Regr #(.N(32)) regr_pc4_FE_DE(
  236. .clk(clk),
  237. .hold(stall_FE),
  238. .clear(reset||flush_FE),
  239. .in(pc4_FE),
  240. .out(pc4_DE)
  241. );
  242. /*
  243. * DECODE (DE)
  244. */
  245. // Instruction Decoder
  246. wire [3:0] areg_DE, breg_DE, instrOP_DE;
  247. wire he_DE, oe_DE, sig_DE;
  248. InstructionDecoder instrDec_DE(
  249. .instr(instr_DE),
  250. .instrOP(instrOP_DE),
  251. .aluOP(),
  252. .constAlu(),
  253. .const16(),
  254. .const27(),
  255. .areg(areg_DE),
  256. .breg(breg_DE),
  257. .dreg(),
  258. .he(he_DE),
  259. .oe(oe_DE),
  260. .sig(sig_DE)
  261. );
  262. // Control Unit
  263. wire alu_use_const_DE;
  264. wire push_DE, pop_DE;
  265. wire dreg_we_DE;
  266. wire mem_write_DE, mem_read_DE;
  267. wire jumpc_DE, jumpr_DE, branch_DE, halt_DE, reti_DE, clearCache_DE;
  268. wire getIntID_DE, getPC_DE;
  269. ControlUnit controlUnit(
  270. // in
  271. .instrOP (instrOP_DE),
  272. .he (he_DE),
  273. // out
  274. .alu_use_const (alu_use_const_DE),
  275. .push (push_DE),
  276. .pop (pop_DE),
  277. .dreg_we (dreg_we_DE),
  278. .mem_write (mem_write_DE),
  279. .mem_read (mem_read_DE),
  280. .jumpc (jumpc_DE),
  281. .jumpr (jumpr_DE),
  282. .halt (halt_DE),
  283. .reti (reti_DE),
  284. .branch (branch_DE),
  285. .getIntID (getIntID_DE),
  286. .getPC (getPC_DE),
  287. .clearCache (clearCache_DE)
  288. );
  289. // Register Bank
  290. // writes directly to next stage
  291. wire [31:0] data_a_EX, data_b_EX;
  292. wire [3:0] dreg_WB;
  293. wire dreg_we_WB;
  294. reg [31:0] data_d_WB;
  295. Regbank regbank(
  296. .clk(clk),
  297. .reset(reset),
  298. .addr_a(areg_DE),
  299. .addr_b(breg_DE),
  300. .data_a(data_a_EX),
  301. .data_b(data_b_EX),
  302. // from WB stage
  303. .addr_d(dreg_WB),
  304. .data_d(data_d_WB),
  305. .we(dreg_we_WB),
  306. .hold(stall_DE),
  307. .clear(flush_DE)
  308. );
  309. // Pass data from DE to EX
  310. // Set to 0 during stall (bubble)
  311. wire [31:0] instr_EX;
  312. Regr #(.N(32)) regr_instr_DE_EX(
  313. .clk(clk),
  314. .hold(stall_DE),
  315. .clear(reset||flush_DE || stall_DE),
  316. .in(instr_DE),
  317. .out(instr_EX)
  318. );
  319. wire [31:0] pc4_EX;
  320. Regr #(.N(32)) regr_pc4_DE_EX(
  321. .clk(clk),
  322. .hold(stall_DE),
  323. .clear(reset||flush_DE),
  324. .in(pc4_DE),
  325. .out(pc4_EX)
  326. );
  327. // Set to 0 during stall (bubble)
  328. wire alu_use_const_EX;
  329. wire push_EX, pop_EX;
  330. wire dreg_we_EX;
  331. wire mem_write_EX, mem_read_EX;
  332. wire jumpc_EX, jumpr_EX, halt_EX, reti_EX, branch_EX, clearCache_EX;
  333. wire getIntID_EX, getPC_EX;
  334. Regr #(.N(14)) regr_cuflags_DE_EX(
  335. .clk (clk),
  336. .hold (stall_DE),
  337. .clear (reset||flush_DE || stall_DE),
  338. .in ({alu_use_const_DE, push_DE, pop_DE, dreg_we_DE, mem_write_DE, mem_read_DE, jumpc_DE, jumpr_DE, halt_DE, reti_DE, branch_DE, getIntID_DE, getPC_DE, clearCache_DE}),
  339. .out ({alu_use_const_EX, push_EX, pop_EX, dreg_we_EX, mem_write_EX, mem_read_EX, jumpc_EX, jumpr_EX, halt_EX, reti_EX, branch_EX, getIntID_EX, getPC_EX, clearCache_EX})
  340. );
  341. /*
  342. * EXECUTE (EX)
  343. */
  344. // Instruction Decoder
  345. wire [31:0] alu_const16_EX, alu_const16u_EX;
  346. wire [3:0] aluOP_EX;
  347. wire [3:0] areg_EX, breg_EX, dreg_EX;
  348. InstructionDecoder instrDec_EX(
  349. .instr(instr_EX),
  350. .instrOP(),
  351. .aluOP(aluOP_EX),
  352. .constAlu(alu_const16_EX),
  353. .constAluu(alu_const16u_EX),
  354. .const16(),
  355. .const27(),
  356. .areg(areg_EX),
  357. .breg(breg_EX),
  358. .dreg(dreg_EX),
  359. .he(),
  360. .oe(),
  361. .sig()
  362. );
  363. // ALU
  364. wire [31:0] alu_result_EX;
  365. // select constant or register for input b
  366. wire[31:0] alu_input_b_EX;
  367. assign alu_input_b_EX = (alu_use_const_EX && aluOP_EX[3:1] == 3'b110) ? alu_const16u_EX : // unsigned const for load(hi) instruction
  368. (alu_use_const_EX) ? alu_const16_EX :
  369. data_b_EX;
  370. // if forwarding, select forwarded data instead for input a of ALU
  371. reg [31:0] fw_data_a_EX;
  372. always @(*)
  373. begin
  374. case (forward_a)
  375. 2'd1: fw_data_a_EX <= alu_result_MEM;
  376. 2'd2: fw_data_a_EX <= data_d_WB;
  377. default: fw_data_a_EX <= data_a_EX;
  378. endcase
  379. end
  380. // if forwarding, select forwarded data instead for input b of ALU
  381. reg [31:0] fw_data_b_EX;
  382. always @(*)
  383. begin
  384. case (forward_b)
  385. 2'd1: fw_data_b_EX <= alu_result_MEM;
  386. 2'd2: fw_data_b_EX <= data_d_WB;
  387. default: fw_data_b_EX <= alu_input_b_EX;
  388. endcase
  389. end
  390. ALU alu(
  391. .clk(clk),
  392. .opcode(aluOP_EX),
  393. .ax(fw_data_a_EX),
  394. .bx(fw_data_b_EX),
  395. .y(alu_result_EX)
  396. );
  397. // for special instructions, pass other data than alu result
  398. wire [31:0] execute_result_EX;
  399. assign execute_result_EX = (getPC_EX) ? pc4_EX - 1'b1:
  400. (getIntID_EX) ? intID:
  401. alu_result_EX;
  402. // Pass data from EX to MEM
  403. wire [31:0] instr_MEM;
  404. Regr #(.N(32)) regr_instr_EX_MEM(
  405. .clk(clk),
  406. .hold(stall_EX),
  407. .clear(reset||flush_EX),
  408. .in(instr_EX),
  409. .out(instr_MEM)
  410. );
  411. wire [31:0] data_a_MEM, data_b_MEM;
  412. Regr #(.N(64)) regr_regdata_EX_MEM(
  413. .clk(clk),
  414. .hold(stall_EX),
  415. .clear(reset||flush_EX),
  416. .in({fw_data_a_EX, fw_data_b_EX}), // forwarded data
  417. .out({data_a_MEM, data_b_MEM})
  418. );
  419. wire [31:0] pc4_MEM;
  420. Regr #(.N(32)) regr_pc4_EX_MEM(
  421. .clk(clk),
  422. .hold(stall_EX),
  423. .clear(reset||flush_EX),
  424. .in(pc4_EX),
  425. .out(pc4_MEM)
  426. );
  427. wire push_MEM, pop_MEM;
  428. wire dreg_we_MEM;
  429. wire mem_write_MEM, mem_read_MEM;
  430. wire jumpc_MEM, jumpr_MEM, halt_MEM, reti_MEM, branch_MEM, clearCache_MEM;
  431. Regr #(.N(11)) regr_cuflags_EX_MEM(
  432. .clk (clk),
  433. .hold (stall_EX),
  434. .clear (reset||flush_EX),
  435. .in ({push_EX, pop_EX, dreg_we_EX, mem_write_EX, mem_read_EX, jumpc_EX, jumpr_EX, halt_EX, reti_EX, branch_EX, clearCache_EX}),
  436. .out ({push_MEM, pop_MEM, dreg_we_MEM, mem_write_MEM, mem_read_MEM, jumpc_MEM, jumpr_MEM, halt_MEM, reti_MEM, branch_MEM, clearCache_MEM})
  437. );
  438. wire [31:0] alu_result_MEM;
  439. Regr #(.N(32)) regr_alu_result_EX_MEM(
  440. .clk(clk),
  441. .hold(stall_EX),
  442. .clear(reset||flush_EX),
  443. .in(execute_result_EX), // other data in case of special instructions
  444. .out(alu_result_MEM)
  445. );
  446. /*
  447. * MEMORY (MEM)
  448. */
  449. // Instruction Decoder
  450. wire [31:0] const16_MEM;
  451. wire [26:0] const27_MEM;
  452. wire [2:0] branchOP_MEM;
  453. wire oe_MEM, sig_MEM;
  454. wire [3:0] dreg_MEM;
  455. InstructionDecoder instrDec_MEM(
  456. .instr(instr_MEM),
  457. .instrOP(),
  458. .aluOP(),
  459. .branchOP(branchOP_MEM),
  460. .constAlu(),
  461. .const16(const16_MEM),
  462. .const27(const27_MEM),
  463. .areg(),
  464. .breg(),
  465. .dreg(dreg_MEM),
  466. .he(),
  467. .oe(oe_MEM),
  468. .sig(sig_MEM)
  469. );
  470. reg [31:0] jump_addr_MEM;
  471. always @(*)
  472. begin
  473. jump_addr_MEM <= 32'd0;
  474. if (jumpc_MEM)
  475. begin
  476. if (oe_MEM)
  477. begin
  478. // add sign extended to allow negative offsets
  479. jump_addr_MEM <= (pc4_MEM - 1'b1) + {{5{const27_MEM[26]}}, const27_MEM[26:0]};
  480. end
  481. else
  482. begin
  483. jump_addr_MEM <= {5'd0, const27_MEM};
  484. end
  485. end
  486. else if (jumpr_MEM)
  487. begin
  488. if (oe_MEM)
  489. begin
  490. jump_addr_MEM <= (pc4_MEM - 1'b1) + (data_b_MEM + const16_MEM);
  491. end
  492. else
  493. begin
  494. jump_addr_MEM <= data_b_MEM + const16_MEM;
  495. end
  496. end
  497. else if (branch_MEM)
  498. begin
  499. jump_addr_MEM <= (pc4_MEM - 1'b1) + const16_MEM;
  500. end
  501. else if (halt_MEM)
  502. begin
  503. // jump to same address to keep halting
  504. jump_addr_MEM <= pc4_MEM - 1'b1;
  505. end
  506. end
  507. // Opcodes
  508. localparam
  509. BRANCH_OP_BEQ = 3'b000, // A == B
  510. BRANCH_OP_BGT = 3'b001, // A > B
  511. BRANCH_OP_BGE = 3'b010, // A >= B
  512. BRANCH_OP_U1 = 3'b011, // Unimplemented 1
  513. BRANCH_OP_BNE = 3'b100, // A != B
  514. BRANCH_OP_BLT = 3'b101, // A < B
  515. BRANCH_OP_BLE = 3'b110, // A <= B
  516. BRANCH_OP_U2 = 3'b111; // Unimplemented 2
  517. reg branch_passed_MEM;
  518. always @(*)
  519. begin
  520. branch_passed_MEM <= 1'b0;
  521. case (branchOP_MEM)
  522. BRANCH_OP_BEQ:
  523. begin
  524. branch_passed_MEM <= (data_a_MEM == data_b_MEM);
  525. end
  526. BRANCH_OP_BGT:
  527. begin
  528. branch_passed_MEM <= (sig_MEM) ? ($signed(data_a_MEM) > $signed(data_b_MEM)) : (data_a_MEM > data_b_MEM);
  529. end
  530. BRANCH_OP_BGE:
  531. begin
  532. branch_passed_MEM <= (sig_MEM) ? ($signed(data_a_MEM) >= $signed(data_b_MEM)) : (data_a_MEM >= data_b_MEM);
  533. end
  534. BRANCH_OP_BNE:
  535. begin
  536. branch_passed_MEM <= (data_a_MEM != data_b_MEM);
  537. end
  538. BRANCH_OP_BLT:
  539. begin
  540. branch_passed_MEM <= (sig_MEM) ? ($signed(data_a_MEM) < $signed(data_b_MEM)) : (data_a_MEM < data_b_MEM);
  541. end
  542. BRANCH_OP_BLE:
  543. begin
  544. branch_passed_MEM <= (sig_MEM) ? ($signed(data_a_MEM) <= $signed(data_b_MEM)) : (data_a_MEM <= data_b_MEM);
  545. end
  546. endcase
  547. end
  548. //------------L1d Cache--------------
  549. //CPU bus
  550. wire [31:0] l1d_addr; // address to write or to start reading from
  551. wire [31:0] l1d_data; // data to write
  552. wire l1d_we; // write enable
  553. wire l1d_start; // start trigger
  554. wire [31:0] l1d_q; // memory output
  555. wire l1d_done; // output ready
  556. L1Dcache l1dcache(
  557. .clk (clk),
  558. .reset (reset),
  559. .cache_reset (clearCache_EX | clearCache_MEM),
  560. // CPU bus
  561. .l2_addr (l1d_addr),
  562. .l2_data (l1d_data),
  563. .l2_we (l1d_we),
  564. .l2_start (l1d_start),
  565. .l2_q (l1d_q),
  566. .l2_done (l1d_done),
  567. // sdram bus
  568. .sdc_addr (addr_b),
  569. .sdc_data (data_b),
  570. .sdc_we (we_b),
  571. .sdc_start (start_b),
  572. .sdc_q (arbiter_q),
  573. .sdc_done (done_b)
  574. );
  575. // Data Memory
  576. // should eventually become a memory with variable latency
  577. // writes directly to the next stage
  578. wire [31:0] dataMem_q_WB;
  579. wire [31:0] dataMem_addr_MEM;
  580. assign dataMem_addr_MEM = data_a_MEM + const16_MEM;
  581. DataMem dataMem(
  582. .clk(clk),
  583. .reset(reset),
  584. .addr(dataMem_addr_MEM),
  585. .we(mem_write_MEM),
  586. .re(mem_read_MEM),
  587. .data(data_b_MEM),
  588. .q(dataMem_q_WB),
  589. .busy(datamem_busy_MEM),
  590. // bus
  591. .bus_addr(l1d_addr),
  592. .bus_data(l1d_data),
  593. .bus_we(l1d_we),
  594. .bus_start(l1d_start),
  595. .bus_q(l1d_q),
  596. .bus_done(l1d_done),
  597. .hold(stall_MEM),
  598. .clear(flush_MEM)
  599. );
  600. // Stack
  601. // writes directly to the next stage
  602. wire [31:0] stack_q_WB;
  603. Stack stack(
  604. .clk(clk),
  605. .reset(reset),
  606. .q(stack_q_WB),
  607. .d(data_b_MEM),
  608. .push(push_MEM),
  609. .pop(pop_MEM),
  610. .hold(stall_MEM),
  611. .clear(flush_MEM)
  612. );
  613. // Pass data from MEM to WB
  614. wire [31:0] instr_WB;
  615. Regr #(.N(32)) regr_instr_MEM_WB(
  616. .clk(clk),
  617. .hold(stall_MEM),
  618. .clear(reset||flush_MEM),
  619. .in(instr_MEM),
  620. .out(instr_WB)
  621. );
  622. wire [31:0] alu_result_WB;
  623. Regr #(.N(32)) regr_alu_result_MEM_WB(
  624. .clk(clk),
  625. .hold(stall_MEM),
  626. .clear(reset||flush_MEM),
  627. .in(alu_result_MEM),
  628. .out(alu_result_WB)
  629. );
  630. wire [31:0] pc4_WB;
  631. Regr #(.N(32)) regr_pc4_MEM_WB(
  632. .clk(clk),
  633. .hold(stall_MEM),
  634. .clear(reset||flush_MEM),
  635. .in(pc4_MEM),
  636. .out(pc4_WB)
  637. );
  638. wire pop_WB, mem_read_WB;
  639. //wire dreg_we_WB;
  640. Regr #(.N(3)) regr_cuflags_MEM_WB(
  641. .clk (clk),
  642. .hold (stall_MEM),
  643. .clear (reset||flush_MEM),
  644. .in ({pop_MEM, dreg_we_MEM, mem_read_MEM}),
  645. .out ({pop_WB, dreg_we_WB, mem_read_WB})
  646. );
  647. /*
  648. * WRITE BACK (WB)
  649. */
  650. wire [15:0] const16u_WB;
  651. InstructionDecoder instrDec_WB(
  652. .instr(instr_WB),
  653. .instrOP(),
  654. .aluOP(),
  655. .constAlu(),
  656. .const16(),
  657. .const16u(const16u_WB),
  658. .const27(),
  659. .areg(),
  660. .breg(),
  661. .dreg(dreg_WB),
  662. .he(),
  663. .oe(),
  664. .sig()
  665. );
  666. always @(*)
  667. begin
  668. case (1'b1)
  669. pop_WB:
  670. begin
  671. data_d_WB <= stack_q_WB;
  672. end
  673. mem_read_WB:
  674. begin
  675. data_d_WB <= dataMem_q_WB;
  676. end
  677. default: // (ALU, savPC, IntID)
  678. begin
  679. data_d_WB <= alu_result_WB;
  680. end
  681. endcase
  682. end
  683. /*
  684. * FLUSH
  685. */
  686. always @(*)
  687. begin
  688. flush_FE <= 1'b0;
  689. flush_DE <= 1'b0;
  690. flush_EX <= 1'b0;
  691. flush_MEM <= 1'b0;
  692. flush_WB <= 1'b0;
  693. // flush on jumps or interrupts
  694. if (jumpc_MEM || jumpr_MEM || halt_MEM || (branch_MEM && branch_passed_MEM) || reti_MEM || interruptValid)
  695. begin
  696. flush_FE <= 1'b1;
  697. flush_DE <= 1'b1;
  698. flush_EX <= 1'b1;
  699. end
  700. // flush MEM when busy, causing a bubble
  701. if ((mem_read_MEM || mem_write_MEM) && datamem_busy_MEM)
  702. begin
  703. flush_MEM <= 1'b1;
  704. end
  705. end
  706. /*
  707. * STALL
  708. */
  709. always @(*)
  710. begin
  711. stall_FE <= 1'b0;
  712. stall_DE <= 1'b0;
  713. stall_EX <= 1'b0;
  714. stall_MEM <= 1'b0;
  715. stall_WB <= 1'b0;
  716. // stall if an instruction in EX uses the result of a some operation in MEM (dreg_mem)
  717. if ((mem_read_EX || pop_EX) && ( (dreg_EX == areg_DE) || (dreg_EX == breg_DE)) )
  718. begin
  719. stall_FE <= 1'b1;
  720. stall_DE <= 1'b1;
  721. end
  722. // stall if read or write in data MEM causes the busy flag to be set
  723. if ((mem_read_MEM || mem_write_MEM) && datamem_busy_MEM)
  724. begin
  725. stall_FE <= 1'b1;
  726. stall_DE <= 1'b1;
  727. stall_EX <= 1'b1;
  728. end
  729. end
  730. /*
  731. * FORWARDING
  732. */
  733. // MEM (4) -> EX (3)
  734. // WB (5) -> EX (3)
  735. always @(*)
  736. begin
  737. // input a of ALU
  738. forward_a <= 2'd0; // default to no forwarding
  739. if (dreg_we_MEM && (dreg_MEM == areg_EX) && (areg_EX != 4'd0))
  740. begin
  741. forward_a <= 2'd1; // priority 1: forward from MEM to EX
  742. end
  743. else if (dreg_we_WB && (dreg_WB == areg_EX) && (areg_EX != 4'd0))
  744. begin
  745. forward_a <= 2'd2; // priority 2: forward from WB to EX
  746. end
  747. // input b of ALU
  748. forward_b <= 2'd0; // default to no forwarding
  749. if (dreg_we_MEM && (dreg_MEM == breg_EX) && (breg_EX != 4'd0))
  750. begin
  751. forward_b <= 2'd1; // priority 1: forward from MEM to EX
  752. end
  753. else if (dreg_we_WB && (dreg_WB == breg_EX) && (breg_EX != 4'd0))
  754. begin
  755. forward_b <= 2'd2; // priority 2: forward from WB to EX
  756. end
  757. end
  758. endmodule