
%if 0

lDOS COMLOADER - load embedded programs
 by E. C. Masloch, 2018--2025

Usage of the works is permitted provided that this
instrument is retained with the works, so that any entity
that uses the works is notified of this instrument.

DISCLAIMER: THE WORKS ARE WITHOUT WARRANTY.

Parts copied from msdos4/src/CMD/DEBUG/DEBCOM2.ASM and
msdos4/src/CMD/DEBCOM3.ASM (under MIT license).

%endif

	cpu 8086

%include "lmacros3.mac"
%include "mzheader.mac"


	numdef MZEXESUPPORT, 1
	strdef DEVICEPAYLOAD, "srdmulti.sys"

addsection DEVICE
device:
	incbin _DEVICEPAYLOAD
	align 16, db 38
	endarea device

%assign DEVICESIZE device_size
%warning devicesize=%[DEVICESIZE]


addsection PAYLOAD, align=16
payload:
	incbin "srdiskc.exe"
	align 16, db 38
	endarea payload

%assign PAYLOADSIZE payload_size
%warning payloadsize=%[PAYLOADSIZE]


addsection COMLOADER, align=16
..start:
COMLOADER_start:
		; INP:	ds = es = PSP
		;	cs:ip = COMLOADER segment : 0
		;	ss:sp -> stack behind COMLOADER
%if _MZEXESUPPORT
	jmp @F
relocatecomloader: equ $
	rep movsw		; move downwards
	push es
	call .retf		; relocate
	jmp donerelocatedcomloader
				; continue at relocated comloader
.retf:
	retf

%if ($ - relocatecomloader) > 16
 %error Relocate comloader is too large
%endif


@@:
	push ax			; pass through ax
%endif
	mov cx, payload_size_p
	mov ax, PAYLOAD

%if _MZEXESUPPORT
	mov dx, es
	add dx, 10h		; => behind PSP
	call movp

	mov ds, dx		; => at MZ exe header if any
	xor bx, bx		; -> at header
	cmp word [bx + exeSignature], "MZ"
	je .load_exe
	cmp word [bx + exeSignature], "ZM"
	jne .load_com
.load_exe:
	push es
	mov cx, word [bx + exeRelocItems]
	push ds
	jcxz .donereloc
	mov si, word [bx + exeRelocTable]
				; ds:si -> reloc table
				;  (may overflow 64 KiB but starts within first)
	mov bp, dx
	add bp, word [bx + exeHeaderSize]
				; bp => exe image (after header)
.loopreloc:
	mov ax, si		; preserve upper 12 bits
	and si, 15		; isolate low 4 bits
	shr ax, 1
	shr ax, 1
	shr ax, 1
	shr ax, 1		; divide by 16
	mov di, ds
	add di, ax		; => normalised relocation table entry
	mov ds, di
	lodsw			; load offset
	xchg di, ax		; di = offset
	lodsw			; load segment
	add ax, bp		; => relocation entry in exe image (after header)
	mov es, ax		; es:di -> relocation entry
	add word [es:di], dx	; relocate (with dx => behind PSP)
	loop .loopreloc		; loop for all relocations
.donereloc:
	pop ds			; => exe header
	mov bp, word [bx + exePages]
	mov cl, 5
	shl bp, cl		; = how many paragraphs in header + image
	mov ax, word [bx + exeHeaderSize]
	sub bp, ax		; = how many paragraphs in image
	jbe error_format
		; lMS-DOS doesn't ever use exeExtraBytes.
		;  we can ignore it likewise.
	mov cx, bp
	add ax, dx		; => exe image
	jc error_format
	pop es			; => PSP
	add word [bx + exeInitCS], dx
	add word [bx + exeInitSS], dx
		; on stack: original ax
	push word [bx + exeInitCS]
	push word [bx + exeInitIP]
	push word [bx + exeInitSS]
	push word [bx + exeInitSP]
	push word [bx + exeMinAlloc]
	push word [bx + exeMaxAlloc]
	call movp

	add dx, cx		; => behind exe image at destination
	jc error_format
	pop bx			; max alloc
	pop ax			; min alloc
	cmp ax, bx		; minimum > maximum ?
	ja error_format
	test bx, bx		; maximum == 0 ?
	jz error_format		; not supported -->
	add bx, cx		; bx = maximum allocation size (maxalloc + image)
	jc .unlimited
	add bx, 10h		; account for PSP
	jnc @F
.unlimited:
	mov bx, -1		; if maximum requested, pass along bx = 0FFFFh
@@:
	add dx, ax		; dx => behind minimum allocation
	jc error_format

	push es			; preserve => PSP
	mov ax, cs
	cmp ax, dx
	jae .noreloc
	mov cx, ss
	sub cx, paras(COMLOADER_end - COMLOADER_start)
	cmp ax, cx
	jae error_memory
	cmp cx, dx
	jb error_format
	mov es, cx		; => comloader destination
	push cs
	pop ds			; => comloader source
	mov cx, words(COMLOADER_end - COMLOADER_start)
	mov si, cx		; = amount words
	add si, si		; = amount bytes
	mov di, si		; -> after last byte to move
	std
	cmpsw			; -> at last word to move
	jmp relocatecomloader	; do rep movsw and far branch to relocated

donerelocatedcomloader: equ $	; returns control flow here from relocator
	cld			; UP !
.noreloc:
	pop ax			; ax => PSP
				; bx = requested maximum allocation size

	pop di
	pop bp			; bp:di = stack

	push cs
	pop ds
	mov si, .enterstub	; -> stub source
	pop word [si + .enterstub_ip]
	pop word [si + .enterstub_cs]
				; set cs:ip
	pop word [si + .enterstub_ax]
				; set ax (from our entrypoint)

	mov es, bp		; => stack destination
%if 0
	test di, di		; sp = 0 ?
	jnz @F
	dec di
	dec di			; yes, allocate 1 word on the stack
		; This cures a crash on dosemu2 that occurs if
		;  the retf 000Ch instruction is placed directly
		;  at the end of a segment (starting on offset
		;  0FFFDh extending below 10000h). Refer to
		;  https://github.com/dosemu2/dosemu2/issues/2575
@@:
%endif
	mov cx, .enterstub_size_w * 2
	sub di, cx		; -> space for stub
	shr cx, 1		; = amount words
	 push di
	rep movsw		; place stub
	 pop di			; bp:di -> stub
	mov es, ax		; es => PSP
	mov ds, ax		; ds => PSP
	cli
	mov ss, bp
	mov sp, di		; relocate stack
	sti
	lea ax, [di + .enterstub_entry]	; ss:ax -> stub entry
	push ss
	push ax			; -> stub's entry on stack
	mov di, 2		; es:di -> PSP word at [2]
	mov ah, 4Ah		; ah = 4Ah
	retf			; branch to there

	align 2, nop
.enterstub:
.enterstub_ax: equ $ - .enterstub
	dw 0
.enterstub_ip: equ $ - .enterstub
	dw 0
.enterstub_cs: equ $ - .enterstub
	dw 0

		; INP:	es = ds => PSP
		;	bx = maximum allocation size
		;	MCB already >= minimum allocation size
		;	ah = 4Ah
		;	di = 2
		;	ss:sp -> stack with ax, ip, cs, stub
.enterstub_entry: equ $ - .enterstub
	int 21h
	mov ax, es		; => PSP
	add ax, bx		; => behind allocation
	stosw			; store in word [PSP:2]
	pop ax			; pass through ax
	retf .enterstub_size - 6; branch to entry and pop off the enterstub
	nop	; Better cure for https://github.com/dosemu2/dosemu2/issues/2575
	align 2, nop
	endarea .enterstub

.load_com:
%else
	mov ds, ax
	xor si, si		; ds:si -> source

%if 01
	cmp word [si + exeSignature], "MZ"
	je error_mz_exe_not_supported
	cmp word [si + exeSignature], "ZM"
	je error_mz_exe_not_supported
%endif

	push di
	rep movsw		; move to behind PSP
%endif

	mov di, es
	add di, 1000h		; => behind area to be used by stack
	mov ax, cs		; => COMLOADER segment
	cmp ax, di		; COMLOADER starts above-or-equal stack end ?
	jae @F

	mov ax, ss
	sub ax, .to_reloc_size_p
	cmp ax, di
	jae .reloc

error_memory: equ $
	mov dx, msg.internal_error_memory
	jmp .error_exit

%if _MZEXESUPPORT
error_format: equ $
	mov dx, msg.internal_error_format
	jmp .error_exit
%elif 01
error_mz_exe_not_supported: equ $
	mov dx, msg.internal_error_mz
	jmp .error_exit
%endif

@@:
.to_reloc:
%if _MZEXESUPPORT
	mov di, 100h
%else
	pop di
%endif
	pop ax			; preserve original ax

	push es
	cli
	pop ss
	xor sp, sp		; full 64 KiB's stack
	sti
	xor si, si		; si = 0
	push si			; 0 word on stack

	 push es
	 pop ds			; ds = es = ss = cs = PSP

	mov cx, cs		; => name table

	push es
	push di			; -> PSP : 256
	retf
	endarea .to_reloc


.reloc:
	push es
	mov es, ax
	xor di, di
	mov cx, words(fromparas(.to_reloc_size_p))
	push cs
	pop ds
	mov si, .to_reloc
	rep movsw
	mov si, bx
	pop es
	push ax
	push cx
	retf


.error_exit:
	push cs
	pop ds
	mov ah, 09h
	int 21h
	mov ax, 4C01h
	int 21h


%if _MZEXESUPPORT
		; Move paragraphs
		;
		; INP:	ax:0-> source
		;	dx:0-> destination
		;	cx = number of paragraphs
		; CHG:	-
		; Note:	Doesn't work correctly on HMA; doesn't always wrap to LMA either.
		;	Do not provide a wrapped/HMA source or destination!
movp:
	push cx
	push ds
	push si
	push es
	push di

	cmp ax, dx		; source above destination ?
	ja .up			; yes, move up (forwards) -->
	je .return		; same, no need to move -->
	push ax
	add ax, cx		; (expected not to carry)
	cmp ax, dx		; end of source is above destination ?
	pop ax
	ja .down		; yes, move from top down -->
	; Here, the end of source is below-or-equal the destination,
	;  so they do not overlap. In this case we prefer moving up.

.up:
	push ax
	push dx
.uploop:
	mov ds, ax
	mov es, dx
	xor di, di
	xor si, si		; -> start of segment
	sub cx, 1000h		; 64 KiB left ?
	jbe .uplast		; no -->
	push cx
	mov cx, 10000h /2
	rep movsw		; move 64 KiB
	pop cx
	add ax, 1000h
	add dx, 1000h		; -> next segment
	jmp short .uploop	; proceed for more -->
.uplast:
	add cx, 1000h		; restore counter
	shl cx, 1
	shl cx, 1
	shl cx, 1		; *8, paragraphs to words
	rep movsw		; move last part
	pop dx
	pop ax
	jmp short .return

.down:
	std			; _AMD_ERRATUM_109_WORKAROUND as below
.dnloop:
	sub cx, 1000h		; 64 KiB left ?
	jbe .dnlast		; no -->
	push ax
	push dx
	add ax, cx
	add dx, cx
	mov ds, ax		; -> 64 KiB not yet moved
	mov es, dx
	pop dx
	pop ax
	mov di, -2
	mov si, di		; moved from last word down
	push cx
	mov cx, 10000h /2
	rep movsw		; move 64 KiB
	pop cx
	jmp short .dnloop	; proceed for more -->
.dnlast:
	add cx, 1000h		; restore counter
	shl cx, 1
	shl cx, 1
	shl cx, 1		; *8, paragraphs to words
	mov di, cx
	dec di
	shl di, 1		; words to offset, -> last word
	mov si, di
	mov ds, ax
	mov es, dx		; first segment correct


	numdef AMD_ERRATUM_109_WORKAROUND, 1
		; Refer to comment in init.asm init_movp.

%if _AMD_ERRATUM_109_WORKAROUND
	jcxz @FF
	cmp cx, 20
	ja @FF
@@:
	movsw
	loop @B
@@:
%endif
	rep movsw		; move first part
	cld
.return:
	pop di
	pop es
	pop si
	pop ds
	pop cx
	retn
%endif


msg:
.internal_error_memory:
	db 13,10,"Internal error, lacking memory for nonrelocated stack."
	db 13,10,36
%if _MZEXESUPPORT
.internal_error_format:
	db 13,10,"Internal error, MZ exe has invalid header or too large."
	db 13,10,36
%elif 01
.internal_error_mz:
	db 13,10,"Internal error, MZ exe not supported."
	db 13,10,36
%endif

	align 16, db 38
COMLOADER_end:

%assign COMLOADERSIZE (COMLOADER_end - COMLOADER_start)
%warning comloadersize=%[COMLOADERSIZE]
