1
0

Assembler.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. #!/usr/bin/env python3
  2. import sys
  3. import CompileInstruction
  4. #List of already inserted libraries.
  5. #To prevent multiple insertions of the same library.
  6. #Global to allow access in recursion
  7. libraryList = []
  8. #If the program we assemble is the BDOS operating system
  9. BDOSos = False
  10. #If we have to assemble the program as a BDOS user program
  11. BDOSprogram = False
  12. #Global offset of program in memory
  13. programOffset = 0
  14. #Remove unreachable code
  15. optimizeSize = False
  16. def removeFunctionFromCode(parsedLines, toRemove):
  17. returnList = []
  18. start = -1
  19. end = -1
  20. belowCodeSection = False
  21. for idx, line in enumerate(parsedLines):
  22. if line[1][0] == "Int:":
  23. belowCodeSection = True
  24. if start == -1:
  25. if line[1][0] == toRemove + ":":
  26. start = idx
  27. #print (start)
  28. #print(parsedLines[start])
  29. elif end == -1:
  30. # continue until next function found
  31. if (line[1][0][-1] == ':'):
  32. if not belowCodeSection:
  33. if ("Label_" not in line[1][0]):
  34. end = idx
  35. #print (end)
  36. #print(parsedLines[end])
  37. # when we are below the code section, stop at any new label
  38. else:
  39. end = idx
  40. returnList = returnList + parsedLines[0:start]
  41. returnList = returnList + parsedLines[end:]
  42. return returnList
  43. def removeUnreachebleCode(parsedLines):
  44. #print("orig len:", len(parsedLines))
  45. returnList = parsedLines
  46. asm = [x[1] for x in parsedLines]
  47. functionNames = []
  48. jumps = []
  49. for x in asm:
  50. if len(x) > 0:
  51. if (x[0][-1] == ':'):
  52. if ("Label_" not in x[0]):
  53. functionNames.append(x[0][:-1])
  54. if (x[0] == "addr2reg"):
  55. if ("Label_" not in x[1]):
  56. jumps.append(x[1])
  57. if (x[0] == "jump"):
  58. if ("Label_" not in x[1]):
  59. jumps.append(x[1])
  60. #for f in functionNames:
  61. # print(f)
  62. #for j in jumps:
  63. # print(j)
  64. unusedFunctions = list((set(functionNames).difference(jumps)).difference(["Main", "Int", "Syscall"]))
  65. foundUnusedFunctions = len(unusedFunctions)
  66. for u in unusedFunctions:
  67. #print(u)
  68. returnList = removeFunctionFromCode(returnList, u)
  69. # recursive check
  70. if foundUnusedFunctions > 0:
  71. returnList = removeUnreachebleCode(returnList)
  72. #print("after len:", len(returnList))
  73. return returnList
  74. def parseLines(fileName):
  75. parsedLines = []
  76. with open(fileName, 'r') as f:
  77. for i, line in enumerate(f, start=1):
  78. # do something special in case of a .ds instruction
  79. if (len(line) > 4 and line.split(" ",maxsplit=1)[0] == ".ds"):
  80. parsedLines.append((i, ['.ds', line.split(" ",maxsplit=1)[1].rstrip('\n')]))
  81. else:
  82. parsedLine = line.strip().split(";",maxsplit=1)[0].split()
  83. if (parsedLine != []):
  84. parsedLines.append((i, parsedLine))
  85. parsedLines.append((0, ['.EOF'])) # add end of file token
  86. return parsedLines
  87. def moveDataDown(parsedLines):
  88. for idx, line in enumerate(parsedLines):
  89. if (line[1][0] == ".EOF"): # return when gone through entire file
  90. return parsedLines
  91. if (line[1][0] == ".data"): # when we found the start of a .data segment
  92. while (parsedLines[idx][1][0] != ".code" and parsedLines[idx][1][0] != ".rdata" and parsedLines[idx][1][0] != ".bss" and parsedLines[idx][1][0] != ".EOF"): # move all lines to the end until .code, .rdata or .EOF
  93. parsedLines.append(parsedLines.pop(idx))
  94. # should not get here
  95. print("SHOULD NOT GET HERE")
  96. sys.exit(1)
  97. return None
  98. def moveRDataDown(parsedLines):
  99. for idx, line in enumerate(parsedLines):
  100. if (line[1][0] == ".EOF"): # return when gone through entire file
  101. return parsedLines
  102. if (line[1][0] == ".rdata"): # when we found the start of a .rdata segment
  103. while (parsedLines[idx][1][0] != ".code" and parsedLines[idx][1][0] != ".data" and parsedLines[idx][1][0] != ".bss" and parsedLines[idx][1][0] != ".EOF"): # move all lines to the end until .code, .data or .EOF
  104. parsedLines.append(parsedLines.pop(idx))
  105. # should not get here
  106. print("SHOULD NOT GET HERE")
  107. sys.exit(1)
  108. return None
  109. def moveBssDown(parsedLines):
  110. for idx, line in enumerate(parsedLines):
  111. if (line[1][0] == ".EOF"): # return when gone through entire file
  112. return parsedLines
  113. if (line[1][0] == ".bss"): # when we found the start of a .rdata segment
  114. while (parsedLines[idx][1][0] != ".code" and parsedLines[idx][1][0] != ".data" and parsedLines[idx][1][0] != ".rdata" and parsedLines[idx][1][0] != ".EOF"): # move all lines to the end until .code, .data or .EOF
  115. parsedLines.append(parsedLines.pop(idx))
  116. # should not get here
  117. print("SHOULD NOT GET HERE")
  118. sys.exit(1)
  119. return None
  120. def removeAssemblerDirectives(parsedLines):
  121. return [line for line in parsedLines if line[1][0] not in [".code", ".rdata", ".data", ".bss", ".EOF"]]
  122. def insertLibraries(parsedLines):
  123. returnList = []
  124. returnList.extend(parsedLines)
  125. for line in parsedLines:
  126. if (len(line[1]) == 2):
  127. if (line[1][0]) == "`include":
  128. if (line[1][1] not in libraryList):
  129. libraryList.append(line[1][1])
  130. insertList = insertLibraries(parseLines(line[1][1])) #recursion to include libraries within libraries
  131. for i in range(len(insertList)):
  132. returnList.insert(i, insertList[i])
  133. return returnList
  134. def compileLine(line):
  135. compiledLine = ""
  136. #check what kind of instruction this line is
  137. switch = {
  138. "halt" : CompileInstruction.compileHalt,
  139. "read" : CompileInstruction.compileRead,
  140. "write" : CompileInstruction.compileWrite,
  141. "readintid" : CompileInstruction.compileIntID,
  142. "push" : CompileInstruction.compilePush,
  143. "pop" : CompileInstruction.compilePop,
  144. "jump" : CompileInstruction.compileJump,
  145. "jumpo" : CompileInstruction.compileJumpo,
  146. "jumpr" : CompileInstruction.compileJumpr,
  147. "jumpro" : CompileInstruction.compileJumpro,
  148. "beq" : CompileInstruction.compileBEQ,
  149. "bgt" : CompileInstruction.compileBGT,
  150. "bgts" : CompileInstruction.compileBGTS,
  151. "bge" : CompileInstruction.compileBGE,
  152. "bges" : CompileInstruction.compileBGES,
  153. "bne" : CompileInstruction.compileBNE,
  154. "blt" : CompileInstruction.compileBLT,
  155. "blts" : CompileInstruction.compileBLTS,
  156. "ble" : CompileInstruction.compileBLE,
  157. "bles" : CompileInstruction.compileBLES,
  158. "savpc" : CompileInstruction.compileSavPC,
  159. "reti" : CompileInstruction.compileReti,
  160. "ccache" : CompileInstruction.compileCcache,
  161. "or" : CompileInstruction.compileOR,
  162. "and" : CompileInstruction.compileAND,
  163. "xor" : CompileInstruction.compileXOR,
  164. "add" : CompileInstruction.compileADD,
  165. "sub" : CompileInstruction.compileSUB,
  166. "shiftl" : CompileInstruction.compileSHIFTL,
  167. "shiftr" : CompileInstruction.compileSHIFTR,
  168. "shiftrs" : CompileInstruction.compileSHIFTRS,
  169. "not" : CompileInstruction.compileNOT,
  170. "mults" : CompileInstruction.compileMULTS,
  171. "multu" : CompileInstruction.compileMULTU,
  172. "slt" : CompileInstruction.compileSLT,
  173. "sltu" : CompileInstruction.compileSLTU,
  174. "load" : CompileInstruction.compileLoad,
  175. "loadhi" : CompileInstruction.compileLoadHi,
  176. "addr2reg" : CompileInstruction.compileAddr2reg,
  177. "load32" : CompileInstruction.compileLoad32,
  178. "nop" : CompileInstruction.compileNop,
  179. ".dw" : CompileInstruction.compileDw,
  180. ".dd" : CompileInstruction.compileDd,
  181. ".db" : CompileInstruction.compileDb,
  182. ".ds" : CompileInstruction.compileDs,
  183. ".dl" : CompileInstruction.compileDl,
  184. "loadlabellow" : CompileInstruction.compileLoadLabelLow,
  185. "loadlabelhigh" : CompileInstruction.compileLoadLabelHigh,
  186. "`include" : CompileInstruction.compileNothing,
  187. ".eof" : CompileInstruction.compileNothing
  188. }
  189. try:
  190. compiledLine = switch[line[0].lower()](line)
  191. #print errors
  192. except KeyError:
  193. #check if line is a label
  194. if len(line) == 1 and line[0][-1] == ':':
  195. compiledLine = "Label " + str(line[0])
  196. #if not a label, raise error
  197. else:
  198. raise Exception("Unknown instruction '" + str(line[0]) + "'" )
  199. return compiledLine
  200. #compiles lines that can be compiled directly
  201. def passOne(parsedLines):
  202. passOneResult = []
  203. for line in parsedLines:
  204. try:
  205. compiledLine = compileLine(line[1])
  206. #fix instructions that have multiple lines
  207. if compiledLine.split()[0] == "loadBoth":
  208. passOneResult.append((line[0], compileLine(["load", compiledLine.split()[2], compiledLine.split()[3]])))
  209. compiledLine = compileLine(["loadhi", compiledLine.split()[1], compiledLine.split()[3]])
  210. if compiledLine.split()[0] == "loadLabelHigh":
  211. passOneResult.append((line[0], "loadLabelLow " + " ".join(compiledLine.split()[1:])))
  212. if compiledLine.split()[0] == "data":
  213. for i in compiledLine.split():
  214. if i != "data":
  215. passOneResult.append((line[0], i + " //data"))
  216. else:
  217. if (compiledLine != "ignore"):
  218. passOneResult.append((line[0], compiledLine))
  219. except Exception as e:
  220. print("Error in line " + str(line[0]) + ": " + " ".join(line[1]))
  221. print("The error is: {0}".format(e))
  222. print("Assembler will now exit")
  223. sys.exit(1)
  224. return passOneResult
  225. #reads and removes define statements, stores them into dictionary
  226. def obtainDefines(content):
  227. defines = {} #list of definitions with their value
  228. contentWithoutDefines = [] #lines without defines
  229. defineLines = [] #lines with defines
  230. #seperate defines from other lines
  231. for line in content:
  232. if line[1][0].lower() == "define":
  233. #do error checking
  234. if len(line[1]) != 4 or line[1][2] != "=":
  235. print("Error in line " + str(line[0]) + ": " + " ".join(line[1]))
  236. print("Invalid define statement")
  237. print("Assembler will now exit")
  238. sys.exit(1)
  239. defineLines.append(line)
  240. else:
  241. contentWithoutDefines.append(line)
  242. #parse the lines with defines
  243. for line in defineLines:
  244. if (line[1][1] in defines):
  245. print("Error: define " + line[1][1] + " is already defined")
  246. print("Assembler will now exit")
  247. sys.exit(1)
  248. defines.update({line[1][1]:line[1][3]})
  249. return defines, contentWithoutDefines
  250. #replace defined words with their value
  251. def processDefines(defines, content):
  252. replacedContent = [] #lines where defined words have been replaced
  253. #for each line, replace the words with their corresponding value if defined
  254. for line in content:
  255. replacedContent.append((line[0], [defines.get(word, word) for word in line[1]]))
  256. return replacedContent
  257. #adds interrupts, program length placeholder and jump to main
  258. #skip program length placeholder in case of BDOS program
  259. #add jump to syscall if BDOS os
  260. #NOTE: because of a unknown bug in B32P (probably related to return address of interrupt directly after jumping to SDRAM from ROM,
  261. # the 4th instruction needs to be jump Main as well
  262. def addHeaderCode(parsedLines):
  263. if BDOSprogram:
  264. header = [(0,"jump Main"),(0,"jump Int"), (0,"jump Main"), (0,"jump Main")]
  265. elif BDOSos:
  266. header = [(0,"jump Main"),(0,"jump Int"), (0,"LengthOfProgram"), (0,"jump Main"), (0,"jump Syscall")]
  267. else:
  268. header = [(0,"jump Main"),(0,"jump Int"), (0,"LengthOfProgram"), (0,"jump Main")]
  269. return header + parsedLines
  270. #move labels to the next line
  271. def moveLabels(parsedLines):
  272. returnList = []
  273. #move to next line
  274. # (old iteration) for idx, line in enumerate(parsedLines):
  275. idx = 0;
  276. while idx < len(parsedLines):
  277. line = parsedLines[idx]
  278. if line[1].lower().split()[0] == "label":
  279. if idx < len(parsedLines) - 1:
  280. if parsedLines[idx+1][1].lower().split()[0] == "label":
  281. # (OLD) if we have a label directly below, insert a nop as a quick fix
  282. #parsedLines.insert(idx+1, (0, "$*" + line[1].split()[1] + "*$ " +"00000000000000000000000000000000 //NOP to quickfix double labels"))
  283. # if we have a label directly below, insert the label in the first non-label line
  284. i = 2
  285. labelDone = False
  286. while idx+i < len(parsedLines) - 1 and not labelDone:
  287. if parsedLines[idx+i][1].lower().split()[0] != "label":
  288. labelDone = True
  289. parsedLines[idx+i] = (parsedLines[idx+i][0], "$*" + line[1].split()[1] + "*$ " + parsedLines[idx+i][1])
  290. # add label in comments, but only if the line does not need to have a second pass
  291. # TODO implement this!
  292. #if parsedLines[idx+i][1].split()[1][0] == "0" or parsedLines[idx+i][1].split()[1][0] == "1":
  293. # parsedLines[idx+i][1] = parsedLines[idx+i][1] + " @" + line[1].split()[1][:-1]
  294. i+=1
  295. else:
  296. parsedLines[idx+1] = (parsedLines[idx+1][0], "$*" + line[1].split()[1] + "*$ " + parsedLines[idx+1][1])
  297. # add label in comments, but only if the line does not need to have a second pass
  298. # TODO implement this!
  299. #if parsedLines[idx+1][1].split()[1][0] == "0" or parsedLines[idx+1][1].split()[1][0] == "1":
  300. # parsedLines[idx+1][1] = parsedLines[idx+1][1] + " @" + line[1].split()[1][:-1]
  301. else:
  302. print("Error: label " + line[1].split()[1] + " has no instructions below it")
  303. print("Assembler will now exit")
  304. sys.exit(1)
  305. idx += 1
  306. #remove original labels
  307. for line in parsedLines:
  308. if line[1].lower().split()[0] != "label":
  309. returnList.append(line)
  310. return returnList
  311. #renumbers each line
  312. def redoLineNumbering(parsedLines):
  313. returnList = []
  314. for idx, line in enumerate(parsedLines):
  315. returnList.append((idx + programOffset, line[1]))
  316. return returnList
  317. #removes label prefix and returns a map of labels to line numbers
  318. #assumes that $* does not occur somewhere else, and that labels are seperated by space
  319. def getLabelMap(parsedLines):
  320. labelMap = {}
  321. returnList = []
  322. for line in parsedLines:
  323. numberOfLabels = line[1].count("$*")
  324. for i in range(numberOfLabels):
  325. if line[1].split()[i][:2] == "$*" and line[1].split()[i][-3:] == ":*$":
  326. if (line[1].split()[i][2:-3] in labelMap):
  327. print("Error: label " + line[1].split()[i][2:-3] + " is already defined")
  328. print("Assembler will now exit")
  329. sys.exit(1)
  330. labelMap[line[1].split()[i][2:-3]] = line[0]
  331. if line[1].split()[0][:2] == "$*" and line[1].split()[0][-2:] == "*$":
  332. returnList.append((line[0], line[1].split("*$ ")[-1]))
  333. else:
  334. returnList.append(line)
  335. return returnList, labelMap
  336. #compiles all labels
  337. def passTwo(parsedLines, labelMap):
  338. #lines that start with these names should be compiled
  339. toCompileList = ["jump", "beq", "bgt", "bgts", "bge", "bges", "bne", "blt", "blts", "ble", "bles", "loadlabellow" ,"loadlabelhigh", ".dl"]
  340. for idx, line in enumerate(parsedLines):
  341. if line[1].lower().split()[0] in toCompileList:
  342. for idx2, word in enumerate(line[1].split()):
  343. if word in labelMap:
  344. x = line[1].split()
  345. x[idx2] = str(labelMap.get(word))
  346. y = compileLine(x)
  347. parsedLines[idx] = (parsedLines[idx][0], y)
  348. return parsedLines
  349. #check if all labels are compiled
  350. def checkNoLabels(parsedLines):
  351. toCompileList = ["jump", "beq", "bgt", "bgts", "bge", "bges", "bne", "blt", "blts", "ble", "bles", "loadlabellow" ,"loadlabelhigh", ".dl"]
  352. for idx, line in enumerate(parsedLines):
  353. if line[1].lower().split()[0] in toCompileList:
  354. labelPos = 0
  355. if line[1].lower().split()[0] in ["jump", "loadlabellow", "loadlabelhigh", ".dl"]:
  356. labelPos = 1
  357. if line[1].lower().split()[0] in ["beq", "bgt", "bgts", "bge", "bges", "bne", "blt", "blts", "ble", "bles"]:
  358. labelPos = 3
  359. print("Error: label " + line[1].split()[labelPos] + " is undefined")
  360. print("Assembler will now exit")
  361. sys.exit(1)
  362. if line[1].lower().split()[0] == "label":
  363. print("Error: label " + line[1].split()[1] + " is used directly after another label")
  364. print("Assembler will now exit")
  365. sys.exit(1)
  366. def main():
  367. #check assemble mode and offset
  368. global BDOSos
  369. global BDOSprogram
  370. global programOffset
  371. global optimizeSize
  372. if len(sys.argv) >= 3:
  373. BDOSprogram = (sys.argv[1].lower() == "bdos")
  374. if BDOSprogram:
  375. programOffset = CompileInstruction.getNumber(sys.argv[2])
  376. if len(sys.argv) >= 2:
  377. BDOSos = (sys.argv[1].lower() == "os")
  378. if sys.argv[len(sys.argv)-1] == "-O":
  379. optimizeSize = True
  380. #parse lines from file
  381. parsedLines = parseLines("code.asm")
  382. #move .data sections down
  383. parsedLines = moveDataDown(parsedLines)
  384. #move .rdata sections down
  385. parsedLines = moveRDataDown(parsedLines)
  386. #move .bss sections down
  387. parsedLines = moveBssDown(parsedLines)
  388. #remove all .code, .data, .rdata, .bss and .EOF lines
  389. parsedLines = removeAssemblerDirectives(parsedLines)
  390. #insert libraries
  391. parsedLines = insertLibraries(parsedLines)
  392. if optimizeSize:
  393. parsedLines = removeUnreachebleCode(parsedLines)
  394. #obtain and remove the define statements
  395. defines, parsedLines = obtainDefines(parsedLines)
  396. #replace defined words with their value
  397. parsedLines = processDefines(defines, parsedLines)
  398. #do pass one
  399. passOneResult = passOne(parsedLines)
  400. #add interrupt code and jumps
  401. passOneResult = addHeaderCode(passOneResult)
  402. #move labels to the next line
  403. passOneResult = moveLabels(passOneResult)
  404. #redo line numbers for jump addressing
  405. #from this point no line should become multiple lines in the final code!
  406. #also no shifting in line numbers!
  407. passOneResult = redoLineNumbering(passOneResult)
  408. #removes label prefixes and creates mapping from label to line
  409. passOneResult, labelMap = getLabelMap(passOneResult)
  410. #do pass two
  411. passTwoResult = passTwo(passOneResult, labelMap)
  412. #check if all labels are processed
  413. checkNoLabels(passTwoResult)
  414. #only add length of program if not BDOS user program
  415. if not BDOSprogram:
  416. lenString = '{0:032b}'.format(len(passTwoResult)) + " //Length of program"
  417. #calculate length of program
  418. passTwoResult[2] = (2, lenString)
  419. #print result without line numbers
  420. for line in passTwoResult:
  421. print(line[1])
  422. if __name__ == '__main__':
  423. main()