Assembler.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
  1. #!/usr/bin/env python3
  2. import sys
  3. import CompileInstruction
  4. #List of already inserted libraries.
  5. #To prevent multiple insertions of the same library.
  6. #Global to allow access in recursion
  7. libraryList = []
  8. #If the program we assemble is the BDOS operating system
  9. BDOSos = False
  10. #If we have to assemble the program as a BDOS user program
  11. BDOSprogram = False
  12. #Global offset of program in memory
  13. programOffset = 0
  14. #Remove unreachable code
  15. optimizeSize = False
  16. def removeFunctionFromCode(parsedLines, toRemove):
  17. returnList = []
  18. start = -1
  19. end = -1
  20. belowCodeSection = False
  21. for idx, line in enumerate(parsedLines):
  22. if line[1][0] == "Int:":
  23. belowCodeSection = True
  24. if start == -1:
  25. if line[1][0] == toRemove + ":":
  26. start = idx
  27. #print (start)
  28. #print(parsedLines[start])
  29. elif end == -1:
  30. # continue until next function found
  31. if (line[1][0][-1] == ':'):
  32. if not belowCodeSection:
  33. if ("Label_" not in line[1][0]):
  34. end = idx
  35. #print (end)
  36. #print(parsedLines[end])
  37. # when we are below the code section, stop at any new label
  38. else:
  39. end = idx
  40. returnList = returnList + parsedLines[0:start]
  41. returnList = returnList + parsedLines[end:]
  42. return returnList
  43. def removeUnreachebleCode(parsedLines):
  44. #print("orig len:", len(parsedLines))
  45. returnList = parsedLines
  46. asm = [x[1] for x in parsedLines]
  47. functionNames = []
  48. jumps = []
  49. for x in asm:
  50. if len(x) > 0:
  51. if (x[0][-1] == ':'):
  52. if ("Label_" not in x[0]):
  53. functionNames.append(x[0][:-1])
  54. if (x[0] == "addr2reg"):
  55. if ("Label_" not in x[1]):
  56. jumps.append(x[1])
  57. if (x[0] == "jump"):
  58. if ("Label_" not in x[1]):
  59. jumps.append(x[1])
  60. #for f in functionNames:
  61. # print(f)
  62. #for j in jumps:
  63. # print(j)
  64. unusedFunctions = list((set(functionNames).difference(jumps)).difference(["Main", "Int", "Syscall"]))
  65. foundUnusedFunctions = len(unusedFunctions)
  66. for u in unusedFunctions:
  67. #print(u)
  68. returnList = removeFunctionFromCode(returnList, u)
  69. # recursive check
  70. if foundUnusedFunctions > 0:
  71. returnList = removeUnreachebleCode(returnList)
  72. #print("after len:", len(returnList))
  73. return returnList
  74. def parseLines(fileName):
  75. parsedLines = []
  76. with open(fileName, 'r') as f:
  77. for i, line in enumerate(f, start=1):
  78. # do something special in case of a .ds instruction
  79. if (len(line) > 4 and line.split(" ",maxsplit=1)[0] == ".ds"):
  80. parsedLines.append((i, ['.ds', line.split(" ",maxsplit=1)[1].rstrip('\n')]))
  81. else:
  82. parsedLine = line.strip().split(";",maxsplit=1)[0].split()
  83. if (parsedLine != []):
  84. parsedLines.append((i, parsedLine))
  85. parsedLines.append((0, ['.EOF'])) # add end of file token
  86. return parsedLines
  87. def moveDataDown(parsedLines):
  88. for idx, line in enumerate(parsedLines):
  89. if (line[1][0] == ".EOF"): # return when gone through entire file
  90. return parsedLines
  91. if (line[1][0] == ".data"): # when we found the start of a .data segment
  92. while (parsedLines[idx][1][0] != ".code" and parsedLines[idx][1][0] != ".rdata" and parsedLines[idx][1][0] != ".bss" and parsedLines[idx][1][0] != ".EOF"): # move all lines to the end until .code, .rdata or .EOF
  93. parsedLines.append(parsedLines.pop(idx))
  94. # should not get here
  95. print("SHOULD NOT GET HERE")
  96. sys.exit(1)
  97. return None
  98. def moveRDataDown(parsedLines):
  99. for idx, line in enumerate(parsedLines):
  100. if (line[1][0] == ".EOF"): # return when gone through entire file
  101. return parsedLines
  102. if (line[1][0] == ".rdata"): # when we found the start of a .rdata segment
  103. while (parsedLines[idx][1][0] != ".code" and parsedLines[idx][1][0] != ".data" and parsedLines[idx][1][0] != ".bss" and parsedLines[idx][1][0] != ".EOF"): # move all lines to the end until .code, .data or .EOF
  104. parsedLines.append(parsedLines.pop(idx))
  105. # should not get here
  106. print("SHOULD NOT GET HERE")
  107. sys.exit(1)
  108. return None
  109. def moveBssDown(parsedLines):
  110. for idx, line in enumerate(parsedLines):
  111. if (line[1][0] == ".EOF"): # return when gone through entire file
  112. return parsedLines
  113. if (line[1][0] == ".bss"): # when we found the start of a .rdata segment
  114. while (parsedLines[idx][1][0] != ".code" and parsedLines[idx][1][0] != ".data" and parsedLines[idx][1][0] != ".rdata" and parsedLines[idx][1][0] != ".EOF"): # move all lines to the end until .code, .data or .EOF
  115. parsedLines.append(parsedLines.pop(idx))
  116. # should not get here
  117. print("SHOULD NOT GET HERE")
  118. sys.exit(1)
  119. return None
  120. def removeAssemblerDirectives(parsedLines):
  121. return [line for line in parsedLines if line[1][0] not in [".code", ".rdata", ".data", ".bss", ".EOF"]]
  122. def insertLibraries(parsedLines):
  123. returnList = []
  124. returnList.extend(parsedLines)
  125. for line in parsedLines:
  126. if (len(line[1]) == 2):
  127. if (line[1][0]) == "`include":
  128. if (line[1][1] not in libraryList):
  129. libraryList.append(line[1][1])
  130. insertList = insertLibraries(parseLines(line[1][1])) #recursion to include libraries within libraries
  131. for i in range(len(insertList)):
  132. returnList.insert(i, insertList[i])
  133. return returnList
  134. def compileLine(line):
  135. compiledLine = ""
  136. #check what kind of instruction this line is
  137. switch = {
  138. "halt" : CompileInstruction.compileHalt,
  139. "read" : CompileInstruction.compileRead,
  140. "write" : CompileInstruction.compileWrite,
  141. "readintid" : CompileInstruction.compileIntID,
  142. "push" : CompileInstruction.compilePush,
  143. "pop" : CompileInstruction.compilePop,
  144. "jump" : CompileInstruction.compileJump,
  145. "jumpo" : CompileInstruction.compileJumpo,
  146. "jumpr" : CompileInstruction.compileJumpr,
  147. "jumpro" : CompileInstruction.compileJumpro,
  148. "beq" : CompileInstruction.compileBEQ,
  149. "bgt" : CompileInstruction.compileBGT,
  150. "bgts" : CompileInstruction.compileBGTS,
  151. "bge" : CompileInstruction.compileBGE,
  152. "bges" : CompileInstruction.compileBGES,
  153. "bne" : CompileInstruction.compileBNE,
  154. "blt" : CompileInstruction.compileBLT,
  155. "blts" : CompileInstruction.compileBLTS,
  156. "ble" : CompileInstruction.compileBLE,
  157. "bles" : CompileInstruction.compileBLES,
  158. "savpc" : CompileInstruction.compileSavPC,
  159. "reti" : CompileInstruction.compileReti,
  160. "or" : CompileInstruction.compileOR,
  161. "and" : CompileInstruction.compileAND,
  162. "xor" : CompileInstruction.compileXOR,
  163. "add" : CompileInstruction.compileADD,
  164. "sub" : CompileInstruction.compileSUB,
  165. "shiftl" : CompileInstruction.compileSHIFTL,
  166. "shiftr" : CompileInstruction.compileSHIFTR,
  167. "not" : CompileInstruction.compileNOT,
  168. "mults" : CompileInstruction.compileMULTS,
  169. "multu" : CompileInstruction.compileMULTU,
  170. "slt" : CompileInstruction.compileSLT,
  171. "sltu" : CompileInstruction.compileSLTU,
  172. "load" : CompileInstruction.compileLoad,
  173. "loadhi" : CompileInstruction.compileLoadHi,
  174. "addr2reg" : CompileInstruction.compileAddr2reg,
  175. "load32" : CompileInstruction.compileLoad32,
  176. "nop" : CompileInstruction.compileNop,
  177. ".dw" : CompileInstruction.compileDw,
  178. ".dd" : CompileInstruction.compileDd,
  179. ".db" : CompileInstruction.compileDb,
  180. ".ds" : CompileInstruction.compileDs,
  181. ".dl" : CompileInstruction.compileDl,
  182. "loadlabellow" : CompileInstruction.compileLoadLabelLow,
  183. "loadlabelhigh" : CompileInstruction.compileLoadLabelHigh,
  184. "`include" : CompileInstruction.compileNothing,
  185. ".eof" : CompileInstruction.compileNothing
  186. }
  187. try:
  188. compiledLine = switch[line[0].lower()](line)
  189. #print errors
  190. except KeyError:
  191. #check if line is a label
  192. if len(line) == 1 and line[0][-1] == ':':
  193. compiledLine = "Label " + str(line[0])
  194. #if not a label, raise error
  195. else:
  196. raise Exception("Unknown instruction '" + str(line[0]) + "'" )
  197. return compiledLine
  198. #compiles lines that can be compiled directly
  199. def passOne(parsedLines):
  200. passOneResult = []
  201. for line in parsedLines:
  202. try:
  203. compiledLine = compileLine(line[1])
  204. #fix instructions that have multiple lines
  205. if compiledLine.split()[0] == "loadBoth":
  206. passOneResult.append((line[0], compileLine(["load", compiledLine.split()[2], compiledLine.split()[3]])))
  207. compiledLine = compileLine(["loadhi", compiledLine.split()[1], compiledLine.split()[3]])
  208. if compiledLine.split()[0] == "loadLabelHigh":
  209. passOneResult.append((line[0], "loadLabelLow " + " ".join(compiledLine.split()[1:])))
  210. if compiledLine.split()[0] == "data":
  211. for i in compiledLine.split():
  212. if i != "data":
  213. passOneResult.append((line[0], i + " //data"))
  214. else:
  215. if (compiledLine != "ignore"):
  216. passOneResult.append((line[0], compiledLine))
  217. except Exception as e:
  218. print("Error in line " + str(line[0]) + ": " + " ".join(line[1]))
  219. print("The error is: {0}".format(e))
  220. print("Assembler will now exit")
  221. sys.exit(1)
  222. return passOneResult
  223. #reads and removes define statements, stores them into dictionary
  224. def obtainDefines(content):
  225. defines = {} #list of definitions with their value
  226. contentWithoutDefines = [] #lines without defines
  227. defineLines = [] #lines with defines
  228. #seperate defines from other lines
  229. for line in content:
  230. if line[1][0].lower() == "define":
  231. #do error checking
  232. if len(line[1]) != 4 or line[1][2] != "=":
  233. print("Error in line " + str(line[0]) + ": " + " ".join(line[1]))
  234. print("Invalid define statement")
  235. print("Assembler will now exit")
  236. sys.exit(1)
  237. defineLines.append(line)
  238. else:
  239. contentWithoutDefines.append(line)
  240. #parse the lines with defines
  241. for line in defineLines:
  242. if (line[1][1] in defines):
  243. print("Error: define " + line[1][1] + " is already defined")
  244. print("Assembler will now exit")
  245. sys.exit(1)
  246. defines.update({line[1][1]:line[1][3]})
  247. return defines, contentWithoutDefines
  248. #replace defined words with their value
  249. def processDefines(defines, content):
  250. replacedContent = [] #lines where defined words have been replaced
  251. #for each line, replace the words with their corresponding value if defined
  252. for line in content:
  253. replacedContent.append((line[0], [defines.get(word, word) for word in line[1]]))
  254. return replacedContent
  255. #adds interrupts, program length placeholder and jump to main
  256. #skip program length placeholder in case of BDOS program
  257. #add jump to syscall if BDOS os
  258. #NOTE: because of a unknown bug in B32P (probably related to return address of interrupt directly after jumping to SDRAM from ROM,
  259. # the 4th instruction needs to be jump Main as well
  260. def addHeaderCode(parsedLines):
  261. if BDOSprogram:
  262. header = [(0,"jump Main"),(0,"jump Int"), (0,"jump Main"), (0,"jump Main")]
  263. elif BDOSos:
  264. header = [(0,"jump Main"),(0,"jump Int"), (0,"LengthOfProgram"), (0,"jump Main"), (0,"jump Syscall")]
  265. else:
  266. header = [(0,"jump Main"),(0,"jump Int"), (0,"LengthOfProgram"), (0,"jump Main")]
  267. return header + parsedLines
  268. #move labels to the next line
  269. def moveLabels(parsedLines):
  270. returnList = []
  271. #move to next line
  272. # (old iteration) for idx, line in enumerate(parsedLines):
  273. idx = 0;
  274. while idx < len(parsedLines):
  275. line = parsedLines[idx]
  276. if line[1].lower().split()[0] == "label":
  277. if idx < len(parsedLines) - 1:
  278. if parsedLines[idx+1][1].lower().split()[0] == "label":
  279. # (OLD) if we have a label directly below, insert a nop as a quick fix
  280. #parsedLines.insert(idx+1, (0, "$*" + line[1].split()[1] + "*$ " +"00000000000000000000000000000000 //NOP to quickfix double labels"))
  281. # if we have a label directly below, insert the label in the first non-label line
  282. i = 2
  283. labelDone = False
  284. while idx+i < len(parsedLines) - 1 and not labelDone:
  285. if parsedLines[idx+i][1].lower().split()[0] != "label":
  286. labelDone = True
  287. parsedLines[idx+i] = (parsedLines[idx+i][0], "$*" + line[1].split()[1] + "*$ " + parsedLines[idx+i][1])
  288. # add label in comments, but only if the line does not need to have a second pass
  289. # TODO implement this!
  290. #if parsedLines[idx+i][1].split()[1][0] == "0" or parsedLines[idx+i][1].split()[1][0] == "1":
  291. # parsedLines[idx+i][1] = parsedLines[idx+i][1] + " @" + line[1].split()[1][:-1]
  292. i+=1
  293. else:
  294. parsedLines[idx+1] = (parsedLines[idx+1][0], "$*" + line[1].split()[1] + "*$ " + parsedLines[idx+1][1])
  295. # add label in comments, but only if the line does not need to have a second pass
  296. # TODO implement this!
  297. #if parsedLines[idx+1][1].split()[1][0] == "0" or parsedLines[idx+1][1].split()[1][0] == "1":
  298. # parsedLines[idx+1][1] = parsedLines[idx+1][1] + " @" + line[1].split()[1][:-1]
  299. else:
  300. print("Error: label " + line[1].split()[1] + " has no instructions below it")
  301. print("Assembler will now exit")
  302. sys.exit(1)
  303. idx += 1
  304. #remove original labels
  305. for line in parsedLines:
  306. if line[1].lower().split()[0] != "label":
  307. returnList.append(line)
  308. return returnList
  309. #renumbers each line
  310. def redoLineNumbering(parsedLines):
  311. returnList = []
  312. for idx, line in enumerate(parsedLines):
  313. returnList.append((idx + programOffset, line[1]))
  314. return returnList
  315. #removes label prefix and returns a map of labels to line numbers
  316. #assumes that $* does not occur somewhere else, and that labels are seperated by space
  317. def getLabelMap(parsedLines):
  318. labelMap = {}
  319. returnList = []
  320. for line in parsedLines:
  321. numberOfLabels = line[1].count("$*")
  322. for i in range(numberOfLabels):
  323. if line[1].split()[i][:2] == "$*" and line[1].split()[i][-3:] == ":*$":
  324. if (line[1].split()[i][2:-3] in labelMap):
  325. print("Error: label " + line[1].split()[i][2:-3] + " is already defined")
  326. print("Assembler will now exit")
  327. sys.exit(1)
  328. labelMap[line[1].split()[i][2:-3]] = line[0]
  329. if line[1].split()[0][:2] == "$*" and line[1].split()[0][-2:] == "*$":
  330. returnList.append((line[0], line[1].split("*$ ")[-1]))
  331. else:
  332. returnList.append(line)
  333. return returnList, labelMap
  334. #compiles all labels
  335. def passTwo(parsedLines, labelMap):
  336. #lines that start with these names should be compiled
  337. toCompileList = ["jump", "beq", "bgt", "bgts", "bge", "bges", "bne", "blt", "blts", "ble", "bles", "loadlabellow" ,"loadlabelhigh", ".dl"]
  338. for idx, line in enumerate(parsedLines):
  339. if line[1].lower().split()[0] in toCompileList:
  340. for idx2, word in enumerate(line[1].split()):
  341. if word in labelMap:
  342. x = line[1].split()
  343. x[idx2] = str(labelMap.get(word))
  344. y = compileLine(x)
  345. parsedLines[idx] = (parsedLines[idx][0], y)
  346. return parsedLines
  347. #check if all labels are compiled
  348. def checkNoLabels(parsedLines):
  349. toCompileList = ["jump", "beq", "bgt", "bgts", "bge", "bges", "bne", "blt", "blts", "ble", "bles", "loadlabellow" ,"loadlabelhigh", ".dl"]
  350. for idx, line in enumerate(parsedLines):
  351. if line[1].lower().split()[0] in toCompileList:
  352. labelPos = 0
  353. if line[1].lower().split()[0] in ["jump", "loadlabellow", "loadlabelhigh", ".dl"]:
  354. labelPos = 1
  355. if line[1].lower().split()[0] in ["beq", "bgt", "bgts", "bge", "bges", "bne", "blt", "blts", "ble", "bles"]:
  356. labelPos = 3
  357. print("Error: label " + line[1].split()[labelPos] + " is undefined")
  358. print("Assembler will now exit")
  359. sys.exit(1)
  360. if line[1].lower().split()[0] == "label":
  361. print("Error: label " + line[1].split()[1] + " is used directly after another label")
  362. print("Assembler will now exit")
  363. sys.exit(1)
  364. def main():
  365. #check assemble mode and offset
  366. global BDOSos
  367. global BDOSprogram
  368. global programOffset
  369. global optimizeSize
  370. if len(sys.argv) >= 3:
  371. BDOSprogram = (sys.argv[1].lower() == "bdos")
  372. if BDOSprogram:
  373. programOffset = CompileInstruction.getNumber(sys.argv[2])
  374. if len(sys.argv) >= 2:
  375. BDOSos = (sys.argv[1].lower() == "os")
  376. if sys.argv[len(sys.argv)-1] == "-O":
  377. optimizeSize = True
  378. #parse lines from file
  379. parsedLines = parseLines("code.asm")
  380. #move .data sections down
  381. parsedLines = moveDataDown(parsedLines)
  382. #move .rdata sections down
  383. parsedLines = moveRDataDown(parsedLines)
  384. #move .bss sections down
  385. parsedLines = moveBssDown(parsedLines)
  386. #remove all .code, .data, .rdata, .bss and .EOF lines
  387. parsedLines = removeAssemblerDirectives(parsedLines)
  388. #insert libraries
  389. parsedLines = insertLibraries(parsedLines)
  390. if optimizeSize:
  391. parsedLines = removeUnreachebleCode(parsedLines)
  392. #obtain and remove the define statements
  393. defines, parsedLines = obtainDefines(parsedLines)
  394. #replace defined words with their value
  395. parsedLines = processDefines(defines, parsedLines)
  396. #do pass one
  397. passOneResult = passOne(parsedLines)
  398. #add interrupt code and jumps
  399. passOneResult = addHeaderCode(passOneResult)
  400. #move labels to the next line
  401. passOneResult = moveLabels(passOneResult)
  402. #redo line numbers for jump addressing
  403. #from this point no line should become multiple lines in the final code!
  404. #also no shifting in line numbers!
  405. passOneResult = redoLineNumbering(passOneResult)
  406. #removes label prefixes and creates mapping from label to line
  407. passOneResult, labelMap = getLabelMap(passOneResult)
  408. #do pass two
  409. passTwoResult = passTwo(passOneResult, labelMap)
  410. #check if all labels are processed
  411. checkNoLabels(passTwoResult)
  412. #only add length of program if not BDOS user program
  413. if not BDOSprogram:
  414. lenString = '{0:032b}'.format(len(passTwoResult)) + " //Length of program"
  415. #calculate length of program
  416. passTwoResult[2] = (2, lenString)
  417. #print result without line numbers
  418. for line in passTwoResult:
  419. print(line[1])
  420. if __name__ == '__main__':
  421. main()