#! /usr/bin/perl

# Memory references fixer and other NASM ports script
#  2023 to 2024 by E. C. Masloch, Public Domain

use warnings;
use strict;

use Data::Dumper;

sub addbrackets {
  my $par_no_ptr = shift;
  my $par_ptr = "";
  if ($par_no_ptr =~ /^((?:byte|word|dword)\s+ptr\s+)(.*)$/i) {
    $par_no_ptr = $2;
    $par_ptr = $1;
  };
  return $par_ptr."[".$par_no_ptr."]";
}

sub handle_variable_definition {
  our ($debug, %size, %labels, %labelsnorm, %letterstosize);
  $_ = shift;
  if (/^\s*([\$A-Za-z0-9\._]+)(?::|\s+)\s*d([bwd])\b/i) {
    if ($debug) { print STDERR $1." ".$2."\n"; };
    if (exists $size{$1}) {
      if ($size{$1} ne $letterstosize{$2}) {
        print STDERR "Differing size for symbol \"${1}\"\n";
      };
      if ($debug) { print STDERR "Repeated size for symbol \"${1}\"\n"; };
    } else {
      $labelsnorm{uc($1)} = $1;	# normalised = exact
      $labels{$1} = $1;		# exact = exact
      $size{$1} = $letterstosize{$2};
    };
  };
  return $_;
};

sub addequate {
  our (%equates, %equatesnorm);
  my $usedequate = shift;
  my $eol = shift;
  if (exists $equates{$usedequate}) {
    return "";
  }
  if (not defined $eol or $eol eq '') {
    $eol = "\n";
  }
  my $normalisedequate = uc($usedequate);
  my $canonicalequate = $equatesnorm{$normalisedequate};
  $equates{$usedequate} = $canonicalequate;
  return "$usedequate equ $canonicalequate\t; NASM port equate$eol";
}

sub addlabel {
  our (%labels, %labelsnorm);
  my $usedlabel = shift;
  my $eol = shift;
  if (exists $labels{$usedlabel}) {
    return "";
  }
  if (not defined $eol or $eol eq '') {
    $eol = "\n";
  }
  my $normalisedlabel = uc($usedlabel);
  my $canonicallabel = $labelsnorm{$normalisedlabel};
  $labels{$usedlabel} = $canonicallabel;
  return "$usedlabel equ $canonicallabel\t; NASM port label$eol";
}

our $debug = 0;
our %letterstosize =
	( b => "byte", B => "byte",
	  w => "word", W => "word",
	  d => "dword", D => "dword" );
my %sizetoletters =
	( "byte" => "b",
	  "word" => "w",
	  "dword" => "d" );
our %size;
our %equates = ( "DGROUP" => "DGROUP", "dg" => "dg" );
our %equatesnorm = ( "DGROUP" => "DGROUP", "DG" => "dg" );
our %labels = ();
our %labelsnorm = ();
my $commentmode = 0;
my $commentletter;
my $strucmode = 0;
my $strucname;
my %strucs;
my $new = 1;

LINE:
while (<<>>) {
  if ($new) {
    unlink($ARGV);
    open(ARGVOUT, ">$ARGV");
    select(ARGVOUT);
    $new = 0;
  }
  s/^(\s*)(IFN?DEF|ELSE|ENDIF)\b/$1\%$2/i;
  s/^(\s*)(IF)\s+(N)OT\b/$1\%$2$3/i;
  s/^(\s*)(IF\b)/$1\%$2/i;

  s/^(\s*\.model\s|
    \s*\.stack\s|
    \s*title\s|
    \s*page\s|
    \s*dosseg\b|
    \s*subttl\b|
    \s*subtitle\b)
    /;$1/xi;

  if (/^\s*([\$A-Za-z0-9\._]+)\s+STRUC\s*(?:;.*)?$/i) {
    if ($strucmode) {
      print STDERR "Error: Already in struc mode\n";
    }
    $strucmode = 1;
    $strucname = $1;
    $strucs{$strucname} = [];
    if ($debug) { print STDERR "struc \"$strucname\" entered\n"; }
  } elsif ($strucmode and /^\s*([\$A-Za-z0-9\._]+)(?::|\s+)\s*ENDS\s*(?:;.*)?$/i) {
    $strucmode = 0;
    if ($debug) { print STDERR "struc \"$strucname\" exited\n"; }
  } elsif ($strucmode and /^\s*([\$A-Za-z0-9\._]+)(?::|\s+)(?:\s*d([bwd]\b))/i) {
    my $struclabel = $1;
    my $strucsize = lc($2);
    push (@{ $strucs{$strucname} }, { $struclabel => $strucsize });
    if ($debug) { print STDERR "struc \"$strucname\" label \"$struclabel\" size \"$strucsize\"\n"; }
  } elsif ($strucmode and /^\s*([\$A-Za-z0-9\._]+)(?::|\s+|$)/i) {
    my $struclabel = $1;
    my $strucsize = "unknown";
    push (@{ $strucs{$strucname} }, { $struclabel => $strucsize });
    if ($debug) { print STDERR "struc \"$strucname\" label \"$struclabel\" size \"$strucsize\"\n"; }
  }

  if (/^\s*([\$A-Za-z0-9\._]+):?\s+([\$A-Za-z0-9\._]+)\s*([^\<\'\";,]*\<.*)/) {
    if (not exists $strucs{$2}) {
      print STDERR "Structure instance of unknown structure \"${2}\"\n";
    } else {
      if ($debug) { print STDERR "istruc \"$2\" contents \"${3}\"\n"; }
      my $repeats = 0;
      my $label = $1;
      my $strucname = $2;
      my $contents = $3;
      /([\r\n]*)$/;
      my $linebreak = $1;
      if ($1 eq "") { $linebreak = "\n" };
      if ($contents =~ s/^([0-9][0-9A-FX]*)\s*DUP\s*//) {
        $repeats = $1;
        if ($repeats <= 0) {
          $repeats = 1;
          print STDERR "Invalid structure instance repetition number \"$1\n";
        }
        $repeats -= 1;
      }
      if (not $contents =~ s/^\s*\<(.*)\>\s*$/$1/
          and not $contents =~ s/^\s*\(\s*\<(.*)\>\s*\)\s*$/$1/) {
        print STDERR "Invalid structure instance initialisation data \"$contents\"\n";
        $contents = "";
      }
      my $repeats_are_zero = 1;
      my @items;
      foreach my $item (split ",", $contents) {
        $item =~ s/^\s+|\s+$//g;
        if ($debug) { print STDERR "item $item\n"; }
        push (@items, $item);
        if (not $item =~ /^0*$/) {
          $repeats_are_zero = 0;
        }
      }
      my $fullrepeats = 1;
      my $emptyrepeats = $repeats;
      if (not $repeats_are_zero) {
        $fullrepeats += $repeats;
        $emptyrepeats = 0;
      }
      print ($label.":".$linebreak);
      for (my $ii = 0; $ii < $fullrepeats; ++ $ii) {
        print ("istruc ".$strucname.$linebreak);
        foreach my $strucitem (@{ $strucs{$strucname} }) {
          my %strucitem = %{ $strucitem };
          my $struclabel = (keys %strucitem)[0];
          # my $strucsize = $strucitem{$struclabel};
          my $strucsize = (values %strucitem)[0];
          my $item = shift @items;
          print "at ".$struclabel.$linebreak;
          my $line = "";
          if ($ii == 0) {
            $line .= $label.".".$struclabel.":";
          }
          if (not defined $item or $item eq "") {
            if ($ii == 0) {
              $line .= $linebreak;
            }
          } else {
            $line .= "\td".$strucsize." ".$item.$linebreak;
          }
          $line = handle_variable_definition($line);
          print $line;
        }
        print ("iend".$linebreak);
      }
      if ($emptyrepeats) {
        print ("\%rep ".$emptyrepeats.$linebreak);
        print ("istruc ".$strucname.$linebreak);
        print ("iend".$linebreak);
        print ("\%endrep".$linebreak);
      }
      $_ = "";
    }
  }

  if (/^\s*istruc_equates
      \s+([\$A-Za-z0-9\._]+)\s*,
      \s*([\$A-Za-z0-9\._]+)\s*(?:;.*)?$
      /ix) {
    if ($debug) { print STDERR "$2\n"; };
    if (not exists $strucs{$2}) {
      print STDERR "Structure equates instance of unknown structure \"${2}\"\n";
    } else {
      if ($debug) { print STDERR "istruc equates \"${2}\"\n"; }
      my $label = $1;
      my $strucname = $2;
      /([\r\n]*)$/;
      my $linebreak = $1;
      if ($1 eq "") { $linebreak = "\n" };
      print ";$_";
      {
        foreach my $strucitem (@{ $strucs{$strucname} }) {
          my %strucitem = %{ $strucitem };
          my $struclabel = (keys %strucitem)[0];
          my $strucsize = (values %strucitem)[0];
          my $line = "";
          $line .= $label.".".$struclabel.":";
          $line .= "\td".$strucsize." "."0".$linebreak;
          $line = handle_variable_definition($line);
          # discard result, above is only used to learn size
          $line = "";
          $line .= $label.".".$struclabel.":";
          $line .= "\tequ ".$label." + ".$struclabel.$linebreak;
          print $line;
        }
      }
      $_ = "";
    }
  }

  if (/^([^;]+)SIZE\s+([^\)\( \t\r\n;]+)/) {
    if (not exists $strucs{$2}) {
      print STDERR "Structure size of unknown structure \"${2}\"\n";
    } else {
      s/^([^;]+)SIZE\s+([^\)\( \t\r\n;]+)/$1$2_size/;
    }
  }

  if (/^\s*[\$A-Za-z0-9\._]+\s*=\s*/) {
    s/^(\s*[\$A-Za-z0-9\._]+\s+)=(\s+)/$1equ$2/;
    s/^(\s*[\$A-Za-z0-9\._]+\s+)=/$1equ /;
    s/^(\s*[\$A-Za-z0-9\._]+)=(\s+)/$1 equ$2/;
    s/^(\s*[\$A-Za-z0-9\._]+)=/$1 equ /;
  }

  if (/^((?:\s*[^:;]+(?::|\s+))?\s*[a-zA-Z0-9_]+\s+[^,]+\s*,\s*)
       ([^;]*[\"\'][^;\r\n]*\s*?)(;.*?)?([\r\n]*)$/x) {
    my $prefix = $1;
    my $suffix = $2;
    my $comment = $3;
    my $eol = $4;
    if (not defined $eol or $eol eq '') {
      $eol = "\n";
    }
    if (not defined $comment) {
      $comment = "";
    }
    if (not $comment =~ /NASM port swapped text literals/) {
      if ($suffix =~ s/\'([^\'])([^\'])\'/\'$2$1\'/g
          or $suffix =~ s/\"([^\"])([^\"])\"/\"$2$1\"/g) {
        $_ = $prefix.$suffix.$comment."\t; NASM port swapped text literals".$eol;
      }
    }
  }

  if (/^((?:\s*[^:;\"\'\s]+(?::|\s+))?\s*
       (?:db\b|dw\b|dd\b|
        [a-zA-Z0-9_]+\s+[^;,\"\']+\s*,\s*|[A-Za-z_][0-9A-Za-z_\.]*\s+equ\s+)
       )
       ([^;]*\b(?:shl|shr|and|or|not|xor)\b[^;]*\s*[\r\n]*)(;.*[\r\n]*)?$/ix) {
    my $prefix = $1;
    my $suffix = $2;
    my $comment = $3;
    if (not defined $comment) {
      $comment = "";
    }
    $suffix =~ s/^([^\"\']*)\bshl\b/$1<</g;
    $suffix =~ s/^([^\"\']*)\bshr\b/$1>>/g;
    $suffix =~ s/^([^\"\']*)\band\b/$1&/g;
    $suffix =~ s/^([^\"\']*)\bor\b/$1|/g;
    $suffix =~ s/^([^\"\']*)\bnot\b/$1~/g;
    $suffix =~ s/^([^\"\']*)\bxor\b/$1^/g;
    $_ = $prefix.$suffix.$comment;
  }

  if (/^(\s*)INCLUDE\s+([\$A-Za-z0-9\._]+)(\s*(?:;.*)?[\r\n]*)$/i) {
    my $includename = lc($2);
    my $first = $1;
    my $third = $3;
    $includename =~ s/\.inc$/.mac/;
    $_ = $first."%include \"".$includename."\"".$third;
  }

  if (/^(\s*(?:[\$A-Za-z0-9\._]+):?\s*)=(\s*\$\s*(?:;.*)?[\r\n]*)/i) {
    if ($debug) { print STDERR "$_\n"; };
    my $first = $1;
    my $second = $2;
    if (not $first =~ /\s$/) {
      $first .= " ";
    }
    if (not $second =~ /^\s/) {
      $second = " " . $second;
    }
    $_ = $first."equ".$second;
  }
  while (/^(.*)\b(DGROUP|_TEXT|DG):
          ([\$A-Za-z0-9\._]+)\b([^\],;]*?)(\s*(?:[\],;].*)?[\r\n]*)$
          /xi) {
    if ($debug) { print STDERR "$_\n"; };
    $_ = $1.$3.$4." wrt ".$2.$5;
  }
  while (/^(.*)\b(DGROUP|_TEXT|DG):
          \(
          ([\$A-Za-z0-9\._]+)\b([^\],;]*?)(\s*(?:[\],;].*)?[\r\n]*)$
          /xi) {
    if ($debug) { print STDERR "$_\n"; };
    $_ = $1."(".$3.$4." wrt ".$2.$5;
    if ($debug) { print STDERR "$_ 1>$1< 2>$2< 3>$3< 4>$4< 5>$5<\n"; };
  }

  $_ = handle_variable_definition($_);

  if (/^\s*([\$A-Za-z0-9\._]+)(?::|\s+)\s*equ\b\s+([^;]+?)\s*?(;.*?)?([\r\n]*)$/i) {
    my $equatename = uc($1);
    my $equatenameexact = $1;
    my $equatecalc = $2;
    my $comment = $3;
    if (not defined $comment) {
      $comment = "";
    }
    my $eol = $4;
    if ($comment =~ /NASM port equate/) {
      if (exists $equatesnorm{uc($equatecalc)}) {
        $equates{$equatenameexact} = $equatecalc;
      }
    } elsif ($comment =~ /NASM port label/) {
      if (exists $labelsnorm{uc($equatecalc)}) {
        $labels{$equatenameexact} = $equatecalc;
      }
    } elsif (not /^\s*([\$A-Za-z0-9\._]+)(?::|\s+)\s*equ\b\s+\$\s*(?:;.*)?$/i) {
      $equatesnorm{$equatename} = $equatenameexact;
      $equates{$equatenameexact} = $equatenameexact;
      if ($debug) {
        my $whole = $_;
        $whole =~ s/\s+$//;
        if ($debug) { print STDERR "equate $equatename $whole\n"; };
      };
      if ($equatecalc =~ /^(?:\s*\(+\s*|(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*\s*
                  (?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*)*
                  (?:[-\+]*\s*)*([A-Za-z_][A-Za-z_0-9]*)
                  (?:\s*\)+\s*|\s*(?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*)*$/ix
          and exists $equatesnorm{uc($1)}) { print addequate($1, $eol); };
    } elsif ($debug) {
      my $whole = $_;
      $whole =~ s/\s+$//;
      if ($debug) { print STDERR "equatedollar $equatename $whole\n"; };
    };
  } elsif (/^\s*([\$A-Za-z0-9\._]+):/) {
    $labelsnorm{uc($1)} = $1;	# normalised = exact
    $labels{$1} = $1;		# exact = exact
  }

  if (/^\s*EXTRN\s*([^;]+)/i) {
    foreach my $item (split ",", $1) {
      $item =~ s/^\s+|\s+$//g;
      if ($debug) { print STDERR $item."\n"; };
      if ($item =~ /^(.*):(byte|word|dword)$/i) {
        if ($debug) { print STDERR "$1:::$2\n"; };
        if (exists $size{$1}) {
          if ($size{$1} ne lc($2)) {
            print STDERR "Differing size for symbol \"${1}\"\n";
          };
          if ($debug) { print STDERR "Repeated size for symbol \"${1}\"\n"; };
        } else {
          $size{$1} = lc($2);
        };
      } elsif ($item =~ /^(.*):(abs)$/i) {
        my $equatename = uc($1);
        my $equatenameexact = $1;
        $equatesnorm{$equatename} = $equatenameexact;
        $equates{$equatenameexact} = $equatenameexact;
      };
    };
  };


  if (/^\s*(?:[\$A-Za-z0-9\._]+(?::|\s+))?
      \s*(?:db|dw|dd)\b\s+([^;]+?)\s*?(?:;.*?)?([\r\n]*)$/xi) {
    my $calc = $1;
    my $eol = $2;
    if ($calc =~ /^(?:\s*\(+\s*|(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*\s*
                  (?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*)*
                  (?:[-\+]*\s*)*([A-Za-z_][A-Za-z_0-9]*)
                  (?:\s*\)+\s*|\s*(?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*)*$/ix
          and exists $equatesnorm{uc($1)}) { print addequate($1, $eol); };
  }

  s/^(\s*|[^;\'\"\$]*)\b(Wait|default)\b/$1\$$2/i;

  if (/^(\s*(?:[\$A-Za-z0-9\._]+:\s*|[\$A-Za-z0-9\._]+\s+)?
      (?:jmp|call)\s+)
      (DWORD\s+PTR\s+)([^][;]+?)
      (\s*(?:;.*)?)?([\r\n]*)$/xi) {
    if ($debug) { print STDERR "$_"; };
    $_ = $1."far [".$3."]".$4;
    if ($debug) { print STDERR "$_"; };
  }

  if (/^(\s*(?:[\$A-Za-z0-9\._]+:\s*|[\$A-Za-z0-9\._]+\s+)?
      (?:jmp|call)\s+)
      (DWORD\s+PTR\s+)((?:[cdesfg]s:)?)(\[[^][;]+?\])
      (\s*(?:;.*)?)?([\r\n]*)$/xi) {
    if ($debug) { print STDERR "$_"; };
    my $par1 = $1;
    my $par2 = $2;
    my $par3 = $3;
    my $par4 = $4;
    my $par5 = $5;
    $par4 =~ s/[][]//g;
    $_ = $par1."far [".$par3.$par4."]".$par5;
    if ($debug) { print STDERR "$_"; };
  }

  if (/^(\s*(?:[\$A-Za-z0-9\._]+:\s*|[\$A-Za-z0-9\._]+\s+)?
      (?:jmp|call)\s+)
      (NEAR\s+PTR\s+)((?:[cdesfg]s:)?)(\[[^][;]+?\])
      (\s*(?:;.*)?)?([\r\n]*)$/xi) {
    if ($debug) { print STDERR "$_"; };
    my $par1 = $1;
    my $par2 = $2;
    my $par3 = $3;
    my $par4 = $4;
    my $par5 = $5;
    $par4 =~ s/[][]//g;
    $_ = $par1."near [".$par3.$par4."]".$par5;
    if ($debug) { print STDERR "$_"; };
  }

  if (/^(\s*(?:[\$A-Za-z0-9\._]+:\s*|[\$A-Za-z0-9\._]+\s+)?
      (?:mov|add|adc|sub|sbb|xor|and|or|test|cmp
      |rol|ror|rcl|rcr|shl|sal|shr|sar|xchg|lds|les|lss|lfs|lgs)\s+)
      ([^][,;]+?)(\s*,\s*)([^][;]+?)
      (\s*?(?:;.*?)?)?([\r\n]*)$/xi) {
    my $reg1 = 0;
    my $reg2 = 0;
    my $equ1 = 0;
    my $equ2 = 0;
    my $num1 = 0;
    my $num2 = 0;
    my $par1 = $1;
    my $par2 = $2;
    my $par3 = $3;
    my $par4 = $4;
    my $par5 = $5;
    my $par6 = $6;
    if ($par2 =~ /^(?:[abcd][hlx]|si|di|bp|sp|[dsec]s)$/i) { $reg1 = 1; };
    if ($par4 =~ /^(?:[abcd][hlx]|si|di|bp|sp|[dsec]s)$/i) { $reg2 = 1; };
    my $iscl = ($par4 =~ /^cl$/i);
    my $isrot = ($par1 =~ /\b(?:rol|ror|rcl|rcr|shl|sal|shr|sar)\s*$/i);
    my $isclrot = ($iscl and $isrot);
    if (exists $equatesnorm{uc($par2)}) { $equ1 = 1; print addequate($par2, $par6); };
    if (exists $equatesnorm{uc($par4)}) { $equ2 = 1; print addequate($par4, $par6); };
    if ($par2 =~ /^(?:\s*\(+\s*|(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*\s*
                  (?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*)*
                  (?:[-\+]*\s*)*([A-Za-z_][A-Za-z_0-9]*)
                  (?:\s*\)+\s*|\s*(?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*)*$/ix
        and exists $equatesnorm{uc($1)}) { $equ1 = 1; print addequate($1, $par6); };
    if ($par4 =~ /^(?:\s*\(+\s*|(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*\s*
                  (?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*)*
                  (?:[-\+]*\s*)*([A-Za-z_][A-Za-z_0-9]*)
                  (?:\s*\)+\s*|\s*(?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*)*$/ix
        and exists $equatesnorm{uc($1)}) { $equ2 = 1; print addequate($1, $par6); };
    if ($par2 =~ /^(?:(?:byte|word|dword)\s*(?:ptr\s*))?
                  (?:\s*\(+\s*|(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*\s*
                  (?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*)*
                  (?:[-\+]*\s*)*
                  ([0-9][A-Za-z0-9]*)
                  (?:\s*\)+\s*|\s*(?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*)*$/ix)
      { $num1 = 1; };
    if ($par4 =~ /^(?:(?:byte|word|dword)\s*(?:ptr\s*))?
                  (?:\s*\(+\s*|(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*\s*
                  (?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*)*
                  (?:[-\+]*\s*)*
                  ([0-9][A-Za-z0-9]*)
                  (?:\s*\)+\s*|\s*(?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*)*$/ix)
      { $num2 = 1; };
    if ($par2 =~ /[\"\']/i) { $num1 = 1; };
    if ($par4 =~ /[\"\']/i) { $num2 = 1; };
    if ($par2 =~ /\boffset\b/i) { $num1 = 1; };
    if ($par4 =~ /\boffset\b/i) { $num2 = 1; };
    my $whole = $_;
    $whole =~ s/\s+$//;
    if ($reg1 != $reg2 and not $isclrot) {
      if ($reg1 and not $num2 and not $equ2) {
        if ($debug) { print STDERR "reg1nnum2 secondbr $whole"."\n"; };
        $_ = $par1.$par2.$par3.addbrackets($par4).$par5.$par6;
      } elsif ($reg2 and not $num1) {
        if ($debug) { print STDERR "reg2nnum1 firstbr $whole"."\n"; };
        $_ = $par1.addbrackets($par2).$par3.$par4.$par5.$par6;
      } elsif ($reg1 and $num2) {
        if ($debug) { print STDERR "reg1num2 $whole"."\n"; };
      } elsif ($reg2 and $num1) {
        if ($debug) { print STDERR "reg2num1 $whole"."\n"; };
      } else {
        if ($debug) { print STDERR "confuse $whole"."\n"; };
      };
    } elsif ($reg1 == 0 and ($reg2 == 0 or $isclrot)) {
      if (not $num1) {
        if ($debug) { print STDERR "regnnumn1 firstbr+size $whole"."\n"; };
        my $seg = "";
        if ($par2 =~ /^([descfg]s:)(.*)$/i) {
          $seg = $1;
          $par2 = $2;
        }
        if (exists $size{$par2}) {
          $_ = $par1.$size{$par2}." [".$seg.$par2."]".$par3.$par4.$par5.$par6;
        } else {
          if ($par2 =~ /^(byte|word|dword)\s+ptr\s+(.*)$/i) {
            $_ = $par1.$1." [".$seg.$2."]".$par3.$par4.$par5.$par6;
          } else {
            print STDERR "Size for symbol \"$par2\" not found!\n";
          };
        };
      } elsif ($num1 and not $num2 and not $equ2) {
        if ($debug) { print STDERR "regnnumn2 secondbr+size $whole"."\n"; };
        my $seg = "";
        if ($par4 =~ /^([descfg]s:)(.*)$/i) {
          $seg = $1;
          $par4 = $2;
        }
        if (exists $size{$par4}) {
          $_ = $par1.$par2.$par3.$size{$par4}." [".$seg.$par4."]".$par5.$par6;
        } else {
          if ($par4 =~ /^(byte|word|dword)\s+ptr\s+(.*)$/i) {
            $_ = $par1.$par2.$par3.$1." [".$seg.$2."]".$par5.$par6;
          } else {
            print STDERR "Size for symbol \"$par4\" not found!\n";
          };
        };
      } elsif ($num1 and $num2) {
        if ($debug) { print STDERR "regnnumb $whole"."\n"; };
      } else {
        if ($debug) { print STDERR "confuse $whole"."\n"; };
      };
    } else {
      if ($debug) { print STDERR "regb $whole"."\n"; };
    };
  };

  if (/^(\s*(?:[\$A-Za-z0-9\._]+:\s*|[\$A-Za-z0-9\._]+\s+)?
      (?:mov|add|adc|sub|sbb|xor|and|or|test|cmp
      |rol|ror|rcl|rcr|shl|sal|shr|sar|xchg|lds|les|lss|lfs|lgs)\s+)
      ((?:byte|word|dword)(?:\s+ptr)?\s*)?
      \[([^][,;]+?)\](\s*,\s*)([^][;]+?)
      (\s*?(?:;.*?)?)?([\r\n]*)$/xi) {
    my $par1 = $1;
    my $parsize = $2;
    my $par2 = $3;
    my $par3 = $4;
    my $par4 = $5;
    my $par5 = $6;
    my $par6 = $7;
    my $iscl = ($par4 =~ /^cl$/i);
    my $isrot = ($par1 =~ /\b(?:rol|ror|rcl|rcr|shl|sal|shr|sar)\s*$/i);
    my $isclrot = ($iscl and $isrot);
    $par2 =~ s/^\[.*\]$//;
    my $whole = $_;
    $whole =~ s/\s+$//;
    if ($par2 =~ /^(?:\s*\(+\s*|(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*\s*
                  (?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*)*
                  (?:[-\+]*\s*)*([A-Za-z_][A-Za-z_0-9]*)
                  (?:\s*\)+\s*|\s*(?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*)*$/ix
        and exists $equatesnorm{uc($1)}) { print addequate($1, $par6); };
    if ($par4 =~ /^(?:\s*\(+\s*|(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*\s*
                  (?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*)*
                  (?:[-\+]*\s*)*([A-Za-z_][A-Za-z_0-9]*)
                  (?:\s*\)+\s*|\s*(?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*)*$/ix
        and exists $equatesnorm{uc($1)}) { print addequate($1, $par6); };
    if ($isclrot or not $par4 =~ /^(?:[abcd][hlx]|si|di|bp|sp|[dsec]s)$/i) {
      {
        if ($debug) { print STDERR "regnnumn1 firstbr+size $whole"."\n"; };
        my $seg = "";
        if ($par2 =~ /^([descfg]s:)(.*)$/i) {
          $seg = $1;
          $par2 = $2;
        }
        if (defined $parsize and $parsize ne '') {
          $parsize =~ s/\s+$//g;
          $_ = $par1.$parsize." [".$seg.$par2."]".$par3.$par4.$par5.$par6;
        } elsif (exists $size{$par2}) {
          $_ = $par1.$size{$par2}." [".$seg.$par2."]".$par3.$par4.$par5.$par6;
        } else {
          if ($par2 =~ /^(byte|word|dword)\s+ptr\s+(.*)$/i) {
            $_ = $par1.$1." [".$seg.$2."]".$par3.$par4.$par5.$par6;
          } else {
            print STDERR "Size for symbol \"$par2\" not found!\n";
          };
        };
      };
    };
  };

  if (/^(\s*(?:[\$A-Za-z0-9\._]+:\s*|[\$A-Za-z0-9\._]+\s+)?
      (?:mov|add|adc|sub|sbb|xor|and|or|test|cmp
      |rol|ror|rcl|rcr|shl|sal|shr|sar|xchg|lds|les|lss|lfs|lgs
      |lea)\s+)		# $1
      ([^,;]+?)		# $2
      (\s*,\s*)		# $3
      ([^;]+?)		# $4
      (\s*?(?:;.*?)?)?	# $5
      ([\r\n]*)		# $6
      $/xi) {
    my $par1 = $1;
    my $par2 = $2;
    my $par3 = $3;
    my $par4 = $4;
    my $par5 = $5;
    my $par6 = $6;
    $par2 =~ s/^\s*\b(byte|word|dword)\s*(ptr\b\s*)?//i;
    $par2 =~ s/^\s*\b[descfg]s:\s*//;
    $par2 =~ s/[\]\[]+/ /g;
    if ($par2 =~ /^\s*(?:\s*\(+\s*|(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*\s*
                  (?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*)*
                  (?:[-\+]*\s*)*([A-Za-z_][A-Za-z_0-9]*)
                  (?:\s*\)+\s*|\s*(?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*)*\s*$/ix) {
      my $nameexact = $1;
      my $namenormalised = uc($1);
      if (exists $equatesnorm{$namenormalised}) {
        print addequate($nameexact, $par6);
      }
      if (exists $labelsnorm{$namenormalised}) {
        print addlabel($nameexact, $par6);
      }
    };
    $par4 =~ s/^\s*\b(byte|word|dword)\s*(ptr\b\s*)?//i;
    $par4 =~ s/^\s*\b[descfg]s:\s*//;
    $par4 =~ s/[\]\[]+/ /g;
    if ($par4 =~ /^\s*(?:\s*\(+\s*|(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*\s*
                  (?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*)*
                  (?:[-\+]*\s*)*([A-Za-z_][A-Za-z_0-9]*)
                  (?:\s*\)+\s*|\s*(?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*)*\s*$/ix) {
      my $nameexact = $1;
      my $namenormalised = uc($1);
      if (exists $equatesnorm{$namenormalised}) {
        print addequate($nameexact, $par6);
      }
      if (exists $labelsnorm{$namenormalised}) {
        print addlabel($nameexact, $par6);
      }
    };
  };

  if (/^(\s*(?:[\$A-Za-z0-9\._]+:\s*|[\$A-Za-z0-9\._]+\s+)?
      (?:call|jmp|je|jne|jz|jnz|jb|jnb|jc|jnc
      |ja|jna|jae|jnae|jbe|jnbe
      |jo|jno|jp|jnp|jpo|jnpo|jpe|jnpe
      |js|jns|jl|jnl|jg|jng|jle|jnle|jge|jnge
      |jcxz|jecxz|loop|loopz|loope|loopnz|loopne
      )\s+)		# $1
      ([^,;]+?)		# $2
      (\s*?(?:;.*?)?)?	# $3
      ([\r\n]*)		# $4
      $/xi) {
    my $par1 = $1;
    my $par2 = $2;
    my $par3 = $3;
    my $par4 = $4;
    $par2 =~ s/^\s*\b(near|far|short|byte|word|dword)\s*(ptr\b\s*)?//i;
    $par2 =~ s/^\s*\b[descfg]s:\s*//;
    $par2 =~ s/[\]\[]+/ /g;
    if ($par2 =~ /^\s*(?:\s*\(+\s*|(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*\s*
                  (?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*)*
                  (?:[-\+]*\s*)*([A-Za-z_][A-Za-z_0-9]*)
                  (?:\s*\)+\s*|\s*(?:[-\+\*\/]
                  |\bshl\b|\bshr\b|\bnot\b|\band\b|\bor\b|\bxor\b
                  |\blt\b|\bgt\b|\blte\b|\bgte\b
                  |<<|>>|~|\&|\||\^)
                  \s*(?:[-\+]*\s*)*[0-9][0-9A-Za-z]*)*\s*$/ix) {
      my $nameexact = $1;
      my $namenormalised = uc($1);
      if (exists $equatesnorm{$namenormalised}) {
        print addequate($nameexact, $par4);
      }
      if (exists $labelsnorm{$namenormalised}) {
        print addlabel($nameexact, $par4);
      }
    };
  };

  if (/^(\s*(?:[\$A-Za-z0-9\._]+:\s*|[\$A-Za-z0-9\._]+\s+)?
      (?:not|neg|dec|inc|mul|imul|div|idiv|push|pop)\s+)
      ([^][;]+?)
      (\s*(?:;.*)?)?([\r\n]*)$/xi) {
    my $reg1 = 0;
    my $par1 = $1;
    my $par2 = $2;
    my $par3 = $3;
    my $par4 = $4;
    if ($par2 =~ /^(?:[abcd][hlx]|si|di|bp|sp|[dsec]s)$/i) { $reg1 = 1; };
    my $whole = $_;
    $whole =~ s/\s+$//;
    if ($reg1 == 0) {
      if ($debug) { print STDERR "singregn firstbr+size $whole"."\n"; };
      my $seg = "";
      if ($par2 =~ /^([descfg]s:)(.*)$/i) {
        $seg = $1;
        $par2 = $2;
      }
      if (exists $size{$par2}) {
        $_ = $par1.$size{$par2}." [".$seg.$par2."]".$par3.$par4;
      } else {
        if ($par2 =~ /^(byte|word|dword)\s+ptr\s+(.*)$/i) {
          $_ = $par1.$1." [".$seg.$2."]".$par3.$par4;
        } else {
          print STDERR "Size for symbol \"$par2\" not found!\n";
        };
      };
    } else {
      if ($debug) { print STDERR "singreg $whole"."\n"; };
    };
  };

  if (/^(\s*(?:[\$A-Za-z0-9\._]+:\s*|[\$A-Za-z0-9\._]+\s+)?
      (?:not|neg|dec|inc|mul|imul|div|idiv|push|pop)\s+)
      \[([^][;]+?)\]
      (\s*(?:;.*)?)?([\r\n]*)$/xi) {
    my $par1 = $1;
    my $par2 = $2;
    my $par3 = $3;
    my $par4 = $4;
    my $whole = $_;
    $whole =~ s/\s+$//;
    {
      if ($debug) { print STDERR "singregn firstbr+size $whole"."\n"; };
      my $seg = "";
      if ($par2 =~ /^([descfg]s:)(.*)$/i) {
        $seg = $1;
        $par2 = $2;
      }
      if (exists $size{$par2}) {
        $_ = $par1.$size{$par2}." [".$seg.$par2."]".$par3.$par4;
      } else {
        if ($par2 =~ /^(byte|word|dword)\s+ptr\s+(.*)$/i) {
          $_ = $par1.$1." [".$seg.$2."]".$par3.$par4;
        } else {
          if ($par1 =~ /push\s*$|pop\s*$/i) {
            $_ = $par1."word"." [".$seg.$par2."]".$par3.$par4;
          } else {
            print STDERR "Size for symbol \"$par2\" not found!\n";
          };
        };
      };
    };
  };

  s/([descfg]s:)\[/\[$1/i;

  s/([descfg]s:)(\s*(?:BYTE|WORD|DWORD)\s*(?:PTR\s*)?)\[/$2\[$1/i;

  s/\[([descfg]s:)
    \s*\(\s*((?:BYTE|WORD|DWORD)\s*(?:PTR\s*)?)
    ([^\]\[;\'\"]+)\s*\)\](\s*(?:,.*)?(?:;.*)?[\r\n]*)$/$2\[$1$3\]$4/xi;

  s/([descfg]s:)
    \s*\(\s*((?:BYTE|WORD|DWORD)\s*(?:PTR\s*)?)
    ([^\]\[;\'\"]+)\s*\)(\s*(?:,.*)?(?:;.*)?[\r\n]*)$/$2\[$1$3\]$4/xi;

  s/([descfg]s:)
    \s*\(\s*((?:BYTE|WORD|DWORD)\s*(?:PTR\s*)?)
    \s*\[\s*([^\]\[;\'\"]+)\s*\]\s*\)(\s*(?:,.*)?(?:;.*)?[\r\n]*)
    $/$2\[$1$3\]$4/xi;

  s/^(\s*(?:[\$A-Za-z0-9\._]+:\s*|[\$A-Za-z0-9\._]+\s+)?
      (?:lods|stos|scas|movs|cmps)\s+
      (?:BYTE|WORD|DWORD)(?:\s*PTR)?\s*)
      ((?:[descfg]s:)?[^][;,]+?)
      (\s*(?:;.*)?)?([\r\n]*)$
      /$1\[$2\]$3$4/xi;

  s/^(\s*(?:[\$A-Za-z0-9\._]+:\s*|[\$A-Za-z0-9\._]+\s+)?
    (?:push|pop)\s+)
    \[
    /$1word \[/xi;

  if (s/^(\s*(?:[\$A-Za-z0-9\._]+:\s*|[\$A-Za-z0-9\._]+\s+)?
      (?:mov|add|adc|sub|sbb|xor|and|or|test|cmp
      |not|neg|inc|dec|mul|div|imul|idiv
      |push|pop|jmp|call|lea
      |rol|ror|rcl|rcr|shl|sal|shr|sar|xchg|lds|les|lss|lfs|lgs)\s+)
    ([^\]\[,;]*\[[^\]\[,;]+)\]\.([A-Za-z_\.\$]+)
    /$1$2 \+ $3\]/xi) {
    my $par = $3;
    if (not /(byte|word|dword)(?:\s+ptr)?\s*\[/i) {
      if (exists $size{$par}) {
        s/\[/$size{$par} \[/;
      }
    }
  }

  s/^(\s*(?:[\$A-Za-z0-9\._]+:\s*|[\$A-Za-z0-9\._]+\s+)?
      (?:mov|add|adc|sub|sbb|xor|and|or|test|cmp
      |not|neg|inc|dec|mul|div|imul|idiv
      |push|pop|jmp|call|lea
      |rol|ror|rcl|rcr|shl|sal|shr|sar|xchg|lds|les|lss|lfs|lgs)\s+)
    ([^;]*),
    ([^\]\[,;]*\[[^\]\[,;]+)\]\.([A-Za-z_\.\$]+)
    /$1$2,$3 \+ $4\]/xi;

  if (/^(\s*(?:[\$A-Za-z0-9\._]+:\s*|[\$A-Za-z0-9\._]+\s+)?)
      (lods|stos|scas|movs|cmps)\s+
      (BYTE|WORD|DWORD)(?:\s*PTR)?\s*(?:\[((?:[descfg]s:)?)[^][;,]+\])
      (\s*(?:;.*)?)?([\r\n]*)$/xi) {
    my $parlabel = $1;
    my $parinst = $2;
    my $parsize = $3;
    my $parseg = $4;
    my $partrail = $5;
    my $pareol = $6;
    $parseg =~ s/:$//;
    if ($parseg ne "") { $parseg .= " "; };
    if ($debug) { print STDERR "$_\n"; };
    $_ = $parlabel.$parseg.$parinst
      .$sizetoletters{lc($parsize)}.$partrail.$pareol;
    if ($debug) { print STDERR "$_\n"; };
  };

  if (/^([^,;]*,\s*)((?:(?:BYTE|WORD|DWORD)\s*(?:PTR\s*)?)?\[[^;]*\bds:[^;]+)(\s*(?:;.*)?[\r\n]*)$/i) {
    my $par1 = $1;
    my $par2 = $2;
    my $par3 = $3;
    if (not $par2 =~ /\bbp\b/i) {
      $par2 =~ s/\bds://i;
      $_ = $par1.$par2.$par3;
    }
  }

  if (/^([^,;\[]*)(\[[^;]*\bds:[^,;]+)((?:,[^;]+)?\s*(?:;.*)?[\r\n]*)$/i) {
    my $par1 = $1;
    my $par2 = $2;
    my $par3 = $3;
    if (not $par2 =~ /\bbp\b/i) {
      $par2 =~ s/\bds://i;
      $_ = $par1.$par2.$par3;
    }
  }

  if (/^(\s*(?:[\$A-Za-z0-9\._]+:\s*|[\$A-Za-z0-9\._]+\s+)?
      (?:
      lds|les|lss|lfs|lgs)\s+)			# $1
      ([^;,]+?)					# $2
      (\s*,\s*)					# $3
      ((?:byte|word|dword)(?:\s+ptr)?\s*)?	# $4
      ([^,;]+?)					# $5
      (\s*(?:;.*)?)?				# $6
      ([\r\n]*)					# $7
      $/xi) {
    my $par1 = $1;
    my $par2 = $2;
    my $par3 = $3;
    my $par4 = $4;
    my $par5 = $5;
    my $par6 = $6;
    my $par7 = $7;
    $_ = $par1.$par2.$par3 . $par5.$par6.$par7;
  }

  if (/^\s*COMMENT\s+(\S+)(.*)$/i) {
    if ($commentmode) {
      print STDERR "Error: Already in comment mode\n";
    }
    $commentletter = $1;
    if ($2 =~ /\Q$commentletter\E/) {
      $commentmode = 0;
      $_ = ";".$_;
    } else {
      $commentmode = 1;
      $_ = "\%if 0 ;".$_;
    };
  } elsif ($commentmode and /\Q$commentletter\E/) {
    $commentmode = 0;
    /([\r\n]*)$/;
    my $linebreaktrail = $1;
    if ($1 eq "") { $linebreaktrail = "\n" };
    $_ .= "\%endif".$linebreaktrail;
  }

  print $_;
} continue {
  if (eof(ARGV)) {
    $new = 1;
  }
}
select(STDOUT);
if ($debug) { print STDERR Dumper(\%size); };
if ($debug) { print STDERR Dumper(\%strucs); };
