#! /usr/bin/env perl
# MIPS INSTRUCTION CHECKER
#
# Searches through STDIN to verify that the file is entirely made up of valid
# instructions (and not pseudo-instruction).It reads each line, removes
# comments, collapses whitespace, removes labels, discards blank lines
# (including lines that *became* blank when everything else was removed), and
# then verifies that what is left is a valid instruction.
#
# There are several formats that must be recognized:
#R-format (3 operand)
#2 register and integer (addi, sll, etc.)
#BEQ / BNE
#J / JAL
#JR
#LA
#SYSCALL
#.data / .text
#.globl
#.word / .half / .byte
#.asciiz / .ascii
# CONFIG OPTIONS
#
# As the semester progresses, we will turn on support for new instructions,
# which are not allowed earlier in the semester.
$allow_unsigned_addsub = 1;
$allow_muldiv= 0;
$allow_lui = 1;
$allow_jr= 1;
$allow_mfhilo= 0;
foreach $line (
{
# get rid of any comment that might exist.The s/foo/bar/g regular
# expression form is for substitution; in this case, we replace
# anything that looks like a comment with an empty string.
$line =~ s/#.*//g;
# remove leading and trailing whitespace, includine newlines.
# Thanks to
# https://perlmaven.com/trim
# for the elegant regexp.
$line =~ s/^s+|s+$//g;
# collapse all whitespace down to single spaces.
$line =~ s/s+/ /g;
# remove labels from the front of any line.
$line =~ s/^s*[a-zA-Z0-9_]+s*:s*//;
# accept blank lines
if ($line eq )
{
next;
}
# accept lines which are simple assembler directives
if ($line =~ m/^[.](text|data)/)
{
next;
}
if ($line =~ m/^[.]globls+[a-zA-Z0-9_]+$/)
{
next;
}
if ($line =~ m/^[.](word|half|byte)s+(-?[0-9]+|0x[0-9a-fA-F]+|[a-zA-Z0-9_]+|'([^]|\.))$/)
{
next;
}
# UPDATE: MARS doesnt require a space before the leading double-quote
# in an .asciiz line.So I made it optional.
if ($line =~ m/^[.]asciiz? ?.*$/)
{
next;
}
# This pattern matches all R-format instructions (except for shift)
if ($line =~ m/^([a-z]+) $[a-z0-9]+ ?, ?$[a-z0-9]+ ?, ?$[a-z0-9]+$/)
{
# is the instruction one of the ones we recognize?
$inst = $1;
if ($inst =~ m/^(add|sub|slt|and|or|nor|xor)$/)
{
next;
}
if ($allow_unsigned_addsub && $inst =~ m/^(addu|subu)$/)
{
next;
}
if ($allow_muldiv && $inst eq mul)
{
next;
}
}
# recognize all of the conditional branch instructions
if ($line =~ m/^(beq|bne) $[a-z0-9]+ ?, ?$[a-z0-9]+ ?, ?[a-zA-Z0-9_]+$/)
{
next;
}
# recognize all of the I-format (immediate value) instructions, PLUS
# the shift instructions.
if ($line =~ m/^(addiu?|slti|andi|ori|xori|sll|srl|sra) $[a-z0-9]+ ?, ?$[a-z0-9]+ ?, ?(-?[0-9]{1,5}|0x[0-9a-fA-F]{1,4}|'([^]|\.))/)
{
next;
}
# recognize the LA instruction here since its a real oddball
if ($line =~ m/^la $[a-z0-9]+ ?, ?[a-zA-Z0-9_]+$/)
{
next;
}
# so is lui, if were hoenst!
if ($allow_lui && $line =~ m/^lui $[a-z0-9]+ ?, ?(-?[0-9]{1,5}|0x[0-9a-fA-F]{1,4})/)
{
next;
}
# weve added support for mult/div; well handle mfhi/mflo later,
# along with jr
if ($allow_muldiv && $line =~ m/^(mult|div) $[a-z0-9]+ ?, ?$[a-z0-9]+/)
{
next;
}
# recognize all load/store instructions.
if ($line =~ m/^[ls][whb] $[a-z0-9]+ ?, ?(-?[0-9]+|0x[0-9a-fA-F]+) ?( ?$[a-z0-9]+ ?)$/)
{
next;
}
# recognize all of the j-format instructions
if ($line =~ m/^(j|jal) [a-zA-Z0-9_]+$/)
{
next;
}
# recognize JR and also MFHI,MFLO all of which only take a single
# register parameter.
if ($line =~ m/^([a-z]+) $[a-z0-9]+$/)
{
$inst = $1;
if ($allow_jr && $inst eq jr)
{
next;
}
if ($allow_mfhilo && $inst =~ m/^(mfhi|mflo)$/)
{
next;
}
}
# recognize syscall the only non-parameterized instruction
if ($line eq syscall)
{
next;
}
print ERROR: UNRECOGNIZED LINE: $line
;
# handle very common mixed-up formats:
#LW
#LA
#LW
if ($line =~ m/^[ls][whb] $[a-z0-9]+ ?, ?[a-zA-Z0-9_]+$/)
{
print (You used a load/store instruction, but gave the proper parameters for an LA instruction.)
;
print LA load the address of a label into a register
;
print LW read a value from memory
;
}
if ($line =~ m/^la $[a-z0-9]+ ?, ?(-?[0-9]+|0x[0-9a-fA-F]+) ?( ?$[a-z0-9]+ ?)$/)
{
print (You used the LA instruction, but gave the proper parameters for an load/store instruction.)
;
print LA load the address of a label into a register
;
print LW read a value from memory
;
}
if ($line =~ m/^[ls][whb] ?(-?[0-9]+|0x[0-9a-fA-F]+) ?( ?$[a-z0-9]+ ?)$/)
{
print (All load/store operations require an offset, as well as the base register.)
;
print EXAMPLE:lw$t0, 0($t3)
;
}
}
Reviews
There are no reviews yet.