Assembler.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. #!/usr/bin/env python3
  2. import sys
  3. import CompileInstruction
  4. #List of already inserted libraries.
  5. #To prevent multiple insertions of the same library.
  6. #Global to allow access in recursion
  7. libraryList = []
  8. #If the program we assemble is the BDOS operating system
  9. BDOSos = False
  10. #If we have to assemble the program as a BDOS user program
  11. BDOSprogram = False
  12. #Global offset of program in memory
  13. programOffset = 0
  14. #Remove unreachable code
  15. optimizeSize = False
  16. def removeFunctionFromCode(parsedLines, toRemove):
  17. returnList = []
  18. start = -1
  19. end = -1
  20. belowCodeSection = False
  21. for idx, line in enumerate(parsedLines):
  22. if line[1][0] == "Int:":
  23. belowCodeSection = True
  24. if start == -1:
  25. if line[1][0] == toRemove + ":":
  26. start = idx
  27. #print (start)
  28. #print(parsedLines[start])
  29. elif end == -1:
  30. # continue until next function found
  31. if (line[1][0][-1] == ':'):
  32. if not belowCodeSection:
  33. if ("Label_" not in line[1][0]):
  34. end = idx
  35. #print (end)
  36. #print(parsedLines[end])
  37. # when we are below the code section, stop at any new label
  38. else:
  39. end = idx
  40. returnList = returnList + parsedLines[0:start]
  41. returnList = returnList + parsedLines[end:]
  42. return returnList
  43. def removeUnreachebleCode(parsedLines):
  44. #print("orig len:", len(parsedLines))
  45. returnList = parsedLines
  46. asm = [x[1] for x in parsedLines]
  47. functionNames = []
  48. jumps = []
  49. for x in asm:
  50. if len(x) > 0:
  51. if (x[0][-1] == ':'):
  52. if ("Label_" not in x[0]):
  53. functionNames.append(x[0][:-1])
  54. if (x[0] == "addr2reg"):
  55. if ("Label_" not in x[1]):
  56. jumps.append(x[1])
  57. if (x[0] == "jump"):
  58. if ("Label_" not in x[1]):
  59. jumps.append(x[1])
  60. #for f in functionNames:
  61. # print(f)
  62. #for j in jumps:
  63. # print(j)
  64. unusedFunctions = list((set(functionNames).difference(jumps)).difference(["Main", "Int", "Syscall"]))
  65. foundUnusedFunctions = len(unusedFunctions)
  66. for u in unusedFunctions:
  67. #print(u)
  68. returnList = removeFunctionFromCode(returnList, u)
  69. # recursive check
  70. if foundUnusedFunctions > 0:
  71. returnList = removeUnreachebleCode(returnList)
  72. #print("after len:", len(returnList))
  73. return returnList
  74. def parseLines(fileName):
  75. parsedLines = []
  76. with open(fileName, 'r') as f:
  77. for i, line in enumerate(f, start=1):
  78. # do something special in case of a .ds instruction
  79. if (len(line) > 4 and line.split(" ",maxsplit=1)[0] == ".ds"):
  80. parsedLines.append((i, ['.ds', line.split(" ",maxsplit=1)[1].rstrip('\n')]))
  81. else:
  82. parsedLine = line.strip().split(";",maxsplit=1)[0].split()
  83. if (parsedLine != []):
  84. parsedLines.append((i, parsedLine))
  85. parsedLines.append((0, ['.EOF'])) # add end of file token
  86. return parsedLines
  87. def moveDataDown(parsedLines):
  88. for idx, line in enumerate(parsedLines):
  89. if (line[1][0] == ".EOF"): # return when gone through entire file
  90. return parsedLines
  91. if (line[1][0] == ".data"): # when we found the start of a .data segment
  92. while (parsedLines[idx][1][0] != ".code" and parsedLines[idx][1][0] != ".rdata" and parsedLines[idx][1][0] != ".bss" and parsedLines[idx][1][0] != ".EOF"): # move all lines to the end until .code, .rdata or .EOF
  93. parsedLines.append(parsedLines.pop(idx))
  94. # should not get here
  95. print("SHOULD NOT GET HERE")
  96. sys.exit(1)
  97. return None
  98. def moveRDataDown(parsedLines):
  99. for idx, line in enumerate(parsedLines):
  100. if (line[1][0] == ".EOF"): # return when gone through entire file
  101. return parsedLines
  102. if (line[1][0] == ".rdata"): # when we found the start of a .rdata segment
  103. while (parsedLines[idx][1][0] != ".code" and parsedLines[idx][1][0] != ".data" and parsedLines[idx][1][0] != ".bss" and parsedLines[idx][1][0] != ".EOF"): # move all lines to the end until .code, .data or .EOF
  104. parsedLines.append(parsedLines.pop(idx))
  105. # should not get here
  106. print("SHOULD NOT GET HERE")
  107. sys.exit(1)
  108. return None
  109. def moveBssDown(parsedLines):
  110. for idx, line in enumerate(parsedLines):
  111. if (line[1][0] == ".EOF"): # return when gone through entire file
  112. return parsedLines
  113. if (line[1][0] == ".bss"): # when we found the start of a .rdata segment
  114. while (parsedLines[idx][1][0] != ".code" and parsedLines[idx][1][0] != ".data" and parsedLines[idx][1][0] != ".rdata" and parsedLines[idx][1][0] != ".EOF"): # move all lines to the end until .code, .data or .EOF
  115. parsedLines.append(parsedLines.pop(idx))
  116. # should not get here
  117. print("SHOULD NOT GET HERE")
  118. sys.exit(1)
  119. return None
  120. def removeAssemblerDirectives(parsedLines):
  121. return [line for line in parsedLines if line[1][0] not in [".code", ".rdata", ".data", ".bss", ".EOF"]]
  122. def insertLibraries(parsedLines):
  123. returnList = []
  124. returnList.extend(parsedLines)
  125. for line in parsedLines:
  126. if (len(line[1]) == 2):
  127. if (line[1][0]) == "`include":
  128. if (line[1][1] not in libraryList):
  129. libraryList.append(line[1][1])
  130. insertList = insertLibraries(parseLines(line[1][1])) #recursion to include libraries within libraries
  131. for i in range(len(insertList)):
  132. returnList.insert(i, insertList[i])
  133. return returnList
  134. def compileLine(line):
  135. compiledLine = ""
  136. #check what kind of instruction this line is
  137. switch = {
  138. "halt" : CompileInstruction.compileHalt,
  139. "read" : CompileInstruction.compileRead,
  140. "write" : CompileInstruction.compileWrite,
  141. "readintid" : CompileInstruction.compileIntID,
  142. "push" : CompileInstruction.compilePush,
  143. "pop" : CompileInstruction.compilePop,
  144. "jump" : CompileInstruction.compileJump,
  145. "jumpo" : CompileInstruction.compileJumpo,
  146. "jumpr" : CompileInstruction.compileJumpr,
  147. "jumpro" : CompileInstruction.compileJumpro,
  148. "beq" : CompileInstruction.compileBEQ,
  149. "bgt" : CompileInstruction.compileBGT,
  150. "bgts" : CompileInstruction.compileBGTS,
  151. "bge" : CompileInstruction.compileBGE,
  152. "bges" : CompileInstruction.compileBGES,
  153. "bne" : CompileInstruction.compileBNE,
  154. "blt" : CompileInstruction.compileBLT,
  155. "blts" : CompileInstruction.compileBLTS,
  156. "ble" : CompileInstruction.compileBLE,
  157. "bles" : CompileInstruction.compileBLES,
  158. "savpc" : CompileInstruction.compileSavPC,
  159. "reti" : CompileInstruction.compileReti,
  160. "ccache" : CompileInstruction.compileCcache,
  161. "or" : CompileInstruction.compileOR,
  162. "and" : CompileInstruction.compileAND,
  163. "xor" : CompileInstruction.compileXOR,
  164. "add" : CompileInstruction.compileADD,
  165. "sub" : CompileInstruction.compileSUB,
  166. "shiftl" : CompileInstruction.compileSHIFTL,
  167. "shiftr" : CompileInstruction.compileSHIFTR,
  168. "shiftrs" : CompileInstruction.compileSHIFTRS,
  169. "not" : CompileInstruction.compileNOT,
  170. "mults" : CompileInstruction.compileMULTS,
  171. "multu" : CompileInstruction.compileMULTU,
  172. "multfp" : CompileInstruction.compileMULTFP,
  173. "slt" : CompileInstruction.compileSLT,
  174. "sltu" : CompileInstruction.compileSLTU,
  175. "load" : CompileInstruction.compileLoad,
  176. "loadhi" : CompileInstruction.compileLoadHi,
  177. "addr2reg" : CompileInstruction.compileAddr2reg,
  178. "load32" : CompileInstruction.compileLoad32,
  179. "nop" : CompileInstruction.compileNop,
  180. ".dw" : CompileInstruction.compileDw,
  181. ".dd" : CompileInstruction.compileDd,
  182. ".db" : CompileInstruction.compileDb,
  183. ".ds" : CompileInstruction.compileDs,
  184. ".dl" : CompileInstruction.compileDl,
  185. "loadlabellow" : CompileInstruction.compileLoadLabelLow,
  186. "loadlabelhigh" : CompileInstruction.compileLoadLabelHigh,
  187. "`include" : CompileInstruction.compileNothing,
  188. ".eof" : CompileInstruction.compileNothing
  189. }
  190. try:
  191. compiledLine = switch[line[0].lower()](line)
  192. #print errors
  193. except KeyError:
  194. #check if line is a label
  195. if len(line) == 1 and line[0][-1] == ':':
  196. compiledLine = "Label " + str(line[0])
  197. #if not a label, raise error
  198. else:
  199. raise Exception("Unknown instruction '" + str(line[0]) + "'" )
  200. return compiledLine
  201. #compiles lines that can be compiled directly
  202. def passOne(parsedLines):
  203. passOneResult = []
  204. for line in parsedLines:
  205. try:
  206. compiledLine = compileLine(line[1])
  207. #fix instructions that have multiple lines
  208. if compiledLine.split()[0] == "loadBoth":
  209. passOneResult.append((line[0], compileLine(["load", compiledLine.split()[2], compiledLine.split()[3]])))
  210. compiledLine = compileLine(["loadhi", compiledLine.split()[1], compiledLine.split()[3]])
  211. if compiledLine.split()[0] == "loadLabelHigh":
  212. passOneResult.append((line[0], "loadLabelLow " + " ".join(compiledLine.split()[1:])))
  213. if compiledLine.split()[0] == "data":
  214. for i in compiledLine.split():
  215. if i != "data":
  216. passOneResult.append((line[0], i + " //data"))
  217. else:
  218. if (compiledLine != "ignore"):
  219. passOneResult.append((line[0], compiledLine))
  220. except Exception as e:
  221. print("Error in line " + str(line[0]) + ": " + " ".join(line[1]))
  222. print("The error is: {0}".format(e))
  223. print("Assembler will now exit")
  224. sys.exit(1)
  225. return passOneResult
  226. #reads and removes define statements, stores them into dictionary
  227. def obtainDefines(content):
  228. defines = {} #list of definitions with their value
  229. contentWithoutDefines = [] #lines without defines
  230. defineLines = [] #lines with defines
  231. #seperate defines from other lines
  232. for line in content:
  233. if line[1][0].lower() == "define":
  234. #do error checking
  235. if len(line[1]) != 4 or line[1][2] != "=":
  236. print("Error in line " + str(line[0]) + ": " + " ".join(line[1]))
  237. print("Invalid define statement")
  238. print("Assembler will now exit")
  239. sys.exit(1)
  240. defineLines.append(line)
  241. else:
  242. contentWithoutDefines.append(line)
  243. #parse the lines with defines
  244. for line in defineLines:
  245. if (line[1][1] in defines):
  246. print("Error: define " + line[1][1] + " is already defined")
  247. print("Assembler will now exit")
  248. sys.exit(1)
  249. defines.update({line[1][1]:line[1][3]})
  250. return defines, contentWithoutDefines
  251. #replace defined words with their value
  252. def processDefines(defines, content):
  253. replacedContent = [] #lines where defined words have been replaced
  254. #for each line, replace the words with their corresponding value if defined
  255. for line in content:
  256. replacedContent.append((line[0], [defines.get(word, word) for word in line[1]]))
  257. return replacedContent
  258. #adds interrupts, program length placeholder and jump to main
  259. #skip program length placeholder in case of BDOS program
  260. #add jump to syscall if BDOS os
  261. #NOTE: because of a unknown bug in B32P (probably related to return address of interrupt directly after jumping to SDRAM from ROM,
  262. # the 4th instruction needs to be jump Main as well
  263. def addHeaderCode(parsedLines):
  264. if BDOSprogram:
  265. header = [(0,"jump Main"),(0,"jump Int"), (0,"jump Main"), (0,"jump Main")]
  266. elif BDOSos:
  267. header = [(0,"jump Main"),(0,"jump Int"), (0,"LengthOfProgram"), (0,"jump Main"), (0,"jump Syscall")]
  268. else:
  269. header = [(0,"jump Main"),(0,"jump Int"), (0,"LengthOfProgram"), (0,"jump Main")]
  270. return header + parsedLines
  271. #move labels to the next line
  272. def moveLabels(parsedLines):
  273. returnList = []
  274. #move to next line
  275. # (old iteration) for idx, line in enumerate(parsedLines):
  276. idx = 0;
  277. while idx < len(parsedLines):
  278. line = parsedLines[idx]
  279. if line[1].lower().split()[0] == "label":
  280. if idx < len(parsedLines) - 1:
  281. if parsedLines[idx+1][1].lower().split()[0] == "label":
  282. # (OLD) if we have a label directly below, insert a nop as a quick fix
  283. #parsedLines.insert(idx+1, (0, "$*" + line[1].split()[1] + "*$ " +"00000000000000000000000000000000 //NOP to quickfix double labels"))
  284. # if we have a label directly below, insert the label in the first non-label line
  285. i = 2
  286. labelDone = False
  287. while idx+i < len(parsedLines) - 1 and not labelDone:
  288. if parsedLines[idx+i][1].lower().split()[0] != "label":
  289. labelDone = True
  290. parsedLines[idx+i] = (parsedLines[idx+i][0], "$*" + line[1].split()[1] + "*$ " + parsedLines[idx+i][1])
  291. # add label in comments, but only if the line does not need to have a second pass
  292. # TODO implement this!
  293. #if parsedLines[idx+i][1].split()[1][0] == "0" or parsedLines[idx+i][1].split()[1][0] == "1":
  294. # parsedLines[idx+i][1] = parsedLines[idx+i][1] + " @" + line[1].split()[1][:-1]
  295. i+=1
  296. else:
  297. parsedLines[idx+1] = (parsedLines[idx+1][0], "$*" + line[1].split()[1] + "*$ " + parsedLines[idx+1][1])
  298. # add label in comments, but only if the line does not need to have a second pass
  299. # TODO implement this!
  300. #if parsedLines[idx+1][1].split()[1][0] == "0" or parsedLines[idx+1][1].split()[1][0] == "1":
  301. # parsedLines[idx+1][1] = parsedLines[idx+1][1] + " @" + line[1].split()[1][:-1]
  302. else:
  303. print("Error: label " + line[1].split()[1] + " has no instructions below it")
  304. print("Assembler will now exit")
  305. sys.exit(1)
  306. idx += 1
  307. #remove original labels
  308. for line in parsedLines:
  309. if line[1].lower().split()[0] != "label":
  310. returnList.append(line)
  311. return returnList
  312. #renumbers each line
  313. def redoLineNumbering(parsedLines):
  314. returnList = []
  315. for idx, line in enumerate(parsedLines):
  316. returnList.append((idx + programOffset, line[1]))
  317. return returnList
  318. #removes label prefix and returns a map of labels to line numbers
  319. #assumes that $* does not occur somewhere else, and that labels are seperated by space
  320. def getLabelMap(parsedLines):
  321. labelMap = {}
  322. returnList = []
  323. for line in parsedLines:
  324. numberOfLabels = line[1].count("$*")
  325. for i in range(numberOfLabels):
  326. if line[1].split()[i][:2] == "$*" and line[1].split()[i][-3:] == ":*$":
  327. if (line[1].split()[i][2:-3] in labelMap):
  328. print("Error: label " + line[1].split()[i][2:-3] + " is already defined")
  329. print("Assembler will now exit")
  330. sys.exit(1)
  331. labelMap[line[1].split()[i][2:-3]] = line[0]
  332. if line[1].split()[0][:2] == "$*" and line[1].split()[0][-2:] == "*$":
  333. returnList.append((line[0], line[1].split("*$ ")[-1]))
  334. else:
  335. returnList.append(line)
  336. return returnList, labelMap
  337. #compiles all labels
  338. def passTwo(parsedLines, labelMap):
  339. #lines that start with these names should be compiled
  340. toCompileList = ["jump", "beq", "bgt", "bgts", "bge", "bges", "bne", "blt", "blts", "ble", "bles", "loadlabellow" ,"loadlabelhigh", ".dl"]
  341. for idx, line in enumerate(parsedLines):
  342. if line[1].lower().split()[0] in toCompileList:
  343. for idx2, word in enumerate(line[1].split()):
  344. if word in labelMap:
  345. x = line[1].split()
  346. x[idx2] = str(labelMap.get(word))
  347. y = compileLine(x)
  348. parsedLines[idx] = (parsedLines[idx][0], y)
  349. return parsedLines
  350. #check if all labels are compiled
  351. def checkNoLabels(parsedLines):
  352. toCompileList = ["jump", "beq", "bgt", "bgts", "bge", "bges", "bne", "blt", "blts", "ble", "bles", "loadlabellow" ,"loadlabelhigh", ".dl"]
  353. for idx, line in enumerate(parsedLines):
  354. if line[1].lower().split()[0] in toCompileList:
  355. labelPos = 0
  356. if line[1].lower().split()[0] in ["jump", "loadlabellow", "loadlabelhigh", ".dl"]:
  357. labelPos = 1
  358. if line[1].lower().split()[0] in ["beq", "bgt", "bgts", "bge", "bges", "bne", "blt", "blts", "ble", "bles"]:
  359. labelPos = 3
  360. print("Error: label " + line[1].split()[labelPos] + " is undefined")
  361. print("Assembler will now exit")
  362. sys.exit(1)
  363. if line[1].lower().split()[0] == "label":
  364. print("Error: label " + line[1].split()[1] + " is used directly after another label")
  365. print("Assembler will now exit")
  366. sys.exit(1)
  367. def main():
  368. #check assemble mode and offset
  369. global BDOSos
  370. global BDOSprogram
  371. global programOffset
  372. global optimizeSize
  373. if len(sys.argv) >= 3:
  374. BDOSprogram = (sys.argv[1].lower() == "bdos")
  375. if BDOSprogram:
  376. programOffset = CompileInstruction.getNumber(sys.argv[2])
  377. if len(sys.argv) >= 2:
  378. BDOSos = (sys.argv[1].lower() == "os")
  379. if sys.argv[len(sys.argv)-1] == "-O":
  380. optimizeSize = True
  381. #parse lines from file
  382. parsedLines = parseLines("code.asm")
  383. #move .data sections down
  384. parsedLines = moveDataDown(parsedLines)
  385. #move .rdata sections down
  386. parsedLines = moveRDataDown(parsedLines)
  387. #move .bss sections down
  388. parsedLines = moveBssDown(parsedLines)
  389. #remove all .code, .data, .rdata, .bss and .EOF lines
  390. parsedLines = removeAssemblerDirectives(parsedLines)
  391. #insert libraries
  392. parsedLines = insertLibraries(parsedLines)
  393. if optimizeSize:
  394. parsedLines = removeUnreachebleCode(parsedLines)
  395. #obtain and remove the define statements
  396. defines, parsedLines = obtainDefines(parsedLines)
  397. #replace defined words with their value
  398. parsedLines = processDefines(defines, parsedLines)
  399. #do pass one
  400. passOneResult = passOne(parsedLines)
  401. #add interrupt code and jumps
  402. passOneResult = addHeaderCode(passOneResult)
  403. #move labels to the next line
  404. passOneResult = moveLabels(passOneResult)
  405. #redo line numbers for jump addressing
  406. #from this point no line should become multiple lines in the final code!
  407. #also no shifting in line numbers!
  408. passOneResult = redoLineNumbering(passOneResult)
  409. #removes label prefixes and creates mapping from label to line
  410. passOneResult, labelMap = getLabelMap(passOneResult)
  411. #do pass two
  412. passTwoResult = passTwo(passOneResult, labelMap)
  413. #check if all labels are processed
  414. checkNoLabels(passTwoResult)
  415. #only add length of program if not BDOS user program
  416. if not BDOSprogram:
  417. lenString = '{0:032b}'.format(len(passTwoResult)) + " //Length of program"
  418. #calculate length of program
  419. passTwoResult[2] = (2, lenString)
  420. #print result without line numbers
  421. for line in passTwoResult:
  422. print(line[1])
  423. if __name__ == '__main__':
  424. main()