CPU-Z80-Disassembler

 view release on metacpan or  search on metacpan

lib/CPU/Z80/Disassembler.pm  view on Meta::CPAN

handle calls to that routine on the next iteration.

The C<analyse> function can be called just before dumping the output to try to find 
higher level constructs in the assembly listing. For example, it transforms the
sequence C<ld b,h:ld c,l> into C<ld bc,hl>.

The C<write_asm> dumps an assembly listing that can be re-assembled to obtain the
starting binary file. All the unknown region bytes are disassembled as C<defb> 
instructions, and a map is shown at the end of the file with the code regions (C<C>),
byte regions (C<B>), word regions (C<W>) and unknown regions (C<->).

=head1 FUNCTIONS

=head2 new

Creates the object.

=head2 memory

L<CPU::Z80::Disassembler::Memory|CPU::Z80::Disassembler::Memory> object
containing the memory being analysed.

=head2 instr

Reference to an array that contains all the disassembled instructions
as L<CPU::Z80::Disassembler::Intruction|CPU::Z80::Disassembler::Intruction>
objects, indexed 
by the address of the instruction. The entry is C<undef> if there is no
disassembled instruction at that address (either not known, or pointing to the second,
etc, bytes of a multi-byte instruction).

=head2 labels

Returns the L<CPU::Z80::Disassembler::Labels|CPU::Z80::Disassembler::Labels>
object that contains all the defined labels.

=head2 header, footer

Attributes containing blocks of text to dump before and after the assembly listing.
They are used by C<write_asm>.

=head2 ix_base, iy_base

Base addess for (IX+DIS) and (IY+DIS) instructions, if constant in all the code.
Causes the disassembly to dump:

  IY0 equ 0xHHHH                ; 0xHHHH is iy_base
      ...
      ld  a,(iy+0xHHHH-IY0)     ; 0xHHHH is the absolute address

=cut

#------------------------------------------------------------------------------
# Hold a disassembly session
use base 'Class::Accessor';
__PACKAGE__->mk_accessors(
		'memory',		# memory to disassemble
		'_type',		# identified type of each memory address, TYPE_xxx
		'instr',		# array of Instruction objects at each address
		'labels',		# all defined labels
		'_call_instr',	# hash of all call instructions where we are blocked
		'_can_call',	# hash of all subroutines we may call:
						# 1 	 : can be called, no stack impact
						# 0      : has stack impact, needs to be checked manually
						# sub {} : call sub->($self, $next_addr) to handle 
						#		   stack impact and return next code addresses
						#          to continue disassembly after call
		'_block_comments',	
						# array of block comment string at each address, printed before
						# the address
		'header', 'footer',
						# header and footer sections of disassembled file
		'ix_base', 'iy_base',
						# base addess for (IX+DIS) and (IY+DIS)
);

use constant TYPE_UNKNOWN	=> '-';
use constant TYPE_CODE		=> 'C';
use constant TYPE_BYTE		=> 'B';
use constant TYPE_WORD		=> 'W';
my $TYPES_RE = qr/^[-CBW]$/;

use Exporter 'import';
our @EXPORT = qw( TYPE_UNKNOWN TYPE_CODE TYPE_BYTE TYPE_WORD );


sub new {
	my($class) = @_;
	my $memory = CPU::Z80::Disassembler::Memory->new;
	my $type   = CPU::Z80::Disassembler::Memory->new;
	my $labels = CPU::Z80::Disassembler::Labels->new;
	return bless {	memory 			=> $memory, 
					_type 			=> $type, 
					instr 			=> [],
					labels			=> $labels,
					_call_instr		=> {},
					_can_call		=> {},
					_block_comments	=> [],
				}, $class;
}
#------------------------------------------------------------------------------

=head2 write_dump

Outputs a disassembly dump on the given file, or standard output if no file
provided.

The disassembly dump shows the address and bytes of each instruction with 
the disassembled instruction.

=cut

#------------------------------------------------------------------------------

sub write_dump {
	my($self, $file) = @_;

	my $fh = _opt_output_fh($file);
	
	my $it = $self->memory->loaded_iter;
	my $instr;

lib/CPU/Z80/Disassembler.pm  view on Meta::CPAN

	# read from cache or disassemble
	$self->instr->[$addr] ||= 
			CPU::Z80::Disassembler::Instruction->disassemble($self->memory, $addr);
}
	
sub code {
	my($self, $addr, $label) = @_;

	defined($label) and $self->labels->add($addr, $label);
	
	my @stack = ($addr);						# all addresses to investigate
	
	# check calls
	while (@stack) {
		# follow all streams of code
		while (@stack) {
			my $addr = pop @stack;
			
			# if address is not loaded, assume a ROM entry point
			if (!defined $self->memory->peek($addr)) {
				if (!$self->labels->search_addr($addr)) {
					my $instr = $self->labels->add($addr);
				}
				next;
			}
			
			# skip if already checked
			next if $self->get_type($addr) eq TYPE_CODE;
			
			# get instruction and mark as code
			my $instr = $self->_get_instr($addr);
			$self->set_type_code($addr, $instr->size);
			
			# create labels for branches (jump or call)
			if ($instr->is_branch) {
				my $branch_addr = $instr->NN;
				my $label = $self->labels->add($branch_addr, undef, $addr);
				$instr->format->{NN} = sub { $label->name };
			}
			
			# check call / rst addresses
			if ($instr->is_call) {
				my $call_addr = $instr->NN;
				my $can_call = $self->_can_call->{$call_addr};
				if (! defined $can_call) {
					$self->_call_instr->{$addr}++;		# mark road block
				}
				elsif (ref $can_call) {
					push @stack, $can_call->($self, $instr->next_addr);
														# call sub to handle impact
				}
				elsif ($can_call) {
					push @stack, $instr->next_addr;		# can continue
				}
			}
			
			# continue on next addresses
			push @stack, $instr->next_code;
		}
	
		# check if we can unwind any blocked calls, after all paths without calls are
		# exhausted
		push @stack, $self->_check_call_instr;
	}
}

#------------------------------------------------------------------------------
sub _check_call_instr {
	my($self) = @_;

	my @stack;
	
	# check simple call instructions where we blocked
	for my $addr (keys %{$self->_call_instr}) {
		my $instr = $self->_get_instr($addr);
		my $call_addr = $instr->NN;
		
		if (	# if any of the calls is conditional, then _can_call
				$instr->opcode =~ /call \w+,NN/
			||	
				# if address after the call is CODE, then _can_call
				$self->get_type($instr->next_addr) eq TYPE_CODE
			) {
			
			# mark for later; do not call code() directly because we are 
			# iterating over _call_instr that might be changed by code()
			$self->_can_call->{$call_addr} = 1;
			push @stack, $instr->next_addr;				# code from here
			delete $self->_call_instr->{$addr};			# processed
		}
	}
	
	# check remaining by following code flow
	for my $addr (keys %{$self->_call_instr}) {
		my $instr = $self->_get_instr($addr);
		my $call_addr = $instr->NN;
		
		# if call flow in called subroutine 
		# does not pop return address, than _can_call
		my $can_call = $self->_check_call($call_addr);
		if (defined $can_call) {
			$self->_can_call->{$call_addr} = $can_call;
			push @stack, $addr;							# re-check call to call can_call 
			$self->_set_type(TYPE_UNKNOWN, $addr, $instr->size);
														# allow recheck to happen
			delete $self->_call_instr->{$addr};			# processed
		}
	}
	
	return @stack;
}

#------------------------------------------------------------------------------
sub _check_call {
	my($self, $call_addr) = @_;
	
	my %seen;									# addresses we have checked
	my($addr, $sp_level) = ($call_addr, 0);
	my @stack = ([$addr, $sp_level]);			# all addresses to investigate
	
	# follow code
	while (@stack) {
		($addr, $sp_level) = @{pop @stack};
		next if $seen{$addr}++;					# prevent loops
		
		# run into some known code
		my $can_call = $self->_can_call->{$addr};
		if (defined $can_call) {
			return $can_call if $sp_level == 0;
		}

		# if address is not loaded, return "dont know"
		if (!defined $self->memory->peek($addr)) {



( run in 0.955 second using v1.01-cache-2.11-cpan-39bf76dae61 )