	.TITLE	QLZW -- Quick LZW record compression
;
;	Written: 12-Apr-1991 by John Osudar, Argonne National Laboratory
;
;	These routines implement record-level data compression, using the
;	LZW (Lempel-Ziv-Welch) algorithm that is in common use for file
;	compression.  The major differences here are:
;
;		(1) This code is designed to compress one record at a time
;			(maximum 65535 bytes in)
;
;		(2) This code uses a variation on direct table lookup for
;			the code (string) table, to achieve better speed than
;			typical hash-based implementations while requiring less
;			virtual memory than full direct-table implementations.
;
;	The table lookup technique allocates a page of memory for a table "row"
;	the first time a code is to be inserted into that row.  An index list
;	of row pointers is maintained to track the allocated pages.  Two lists
;	of locations that have been modified are kept, so that the table may
;	be zeroed for each record without zeroing megabytes of memory at a
;	time (the vast majority of which will already be zero anyway).
;
;	The following two entry points are supplied:
;
;	STATUS=QLZW_CMP(INBUF,INLEN,OUTBUF,OUTLEN,OUTSIZE)
;
;	Compress record INBUF (length INLEN bytes) into OUTBUF, and return
;	resulting length in OUTLEN.  OUTBUF is limited to OUTSIZE bytes.
;
;	All arguments passed by reference
;	INLEN, OUTLEN, OUTSIZE are longwords
;	OUTLEN is set to the number of bytes in the output if status is success
;
;	returns 1 if record successfully compressed,
;		2 if compression would produce longer record,
;		fatal status code (severity 4) in case of error
;
;	STATUS=QLZW_DCM(INBUF,INLEN,OUTBUF,OUTLEN,OUTSIZE)
;
;	Decompress record INBUF (length INLEN bytes) into OUTBUF, and return
;	resulting length in OUTLEN.  OUTBUF is limited to OUTSIZE bytes.
;
;	All arguments passed by reference
;	INLEN, OUTLEN, OUTSIZE are longwords
;	OUTLEN is set to the number of bytes in the output if status is success
;
;	returns 1 if record successfully decompressed,
;		SS$_ABORT if OUTBUF is too small to contain record
;
;===============================================================================
;
;	Parameters that control the page allocation; AREAPAGES are obtained
;	from the system (via LIB$GET_VM_PAGE) at a time, and are given out via
;	an internal allocation routine one page at a time.  Once pages are
;	obtained they are not given back (e.g. via LIB$FREE_VM_PAGE), but are
;	kept in a list and reused for each record.
;
AREAPAGES=256			; Pages in area
AREAMAX=37120/AREAPAGES		; Number of areas we can have
;
;	Pure data PSECT
;
	.PSECT	QLZW_PURE_DATA		PIC,CON,REL,LCL,SHR,NOEXE,RD,NOWRT,QUAD
;
AREASIZE:
	.LONG	AREAPAGES	; Area size, in pages
ARGLIST:			; Argument list for LIB$GET_VM_PAGE
	.LONG		2
	.ADDRESS	AREASIZE
	.ADDRESS	WHERE
;
;	Impure data, structured as a "COMMON block" to facilitate external
;	instrumentation
;
	.PSECT	QLZW_COMMON_DATA	PIC,OVR,REL,GBL,SHR,NOEXE,RD,WRT,PAGE
;
BEGIN_COMMON=.
;
;	The first two tables provide addresses of location to zero for each call
;
TBLLOC:	.BLKL	37120		; Addresses of nonzero entries in ROWADR
REFLOC:	.BLKL	37120		; Addresses of references to codes 256+ in tbls
ZRFLOC=REFLOC-1024		; Label to pretend REFLOC has space for 0-255
;
;	The next table contains the index of row addresses for each code
;
ROWADR:	.BLKL	37120		; Addresses of row tables for codes 0-37119
;
;	The list of memory areas obtained from LIB$GET_VM_PAGE follows
;
AREAS:	.BLKL	AREAMAX		; List of addresses of page areas
;
;	Various pointers, limits, counters and bitmasks follow
;
;	TBLIDX and MAXCOD must occur together in that order
;
TBLIDX:	.LONG	0		; Index to last TBLLOC entry used
MAXCOD:	.LONG	0		; Maximum code in use
;
;	WHERE and NXTIDX must occur together in that order
;
WHERE:	.BLKL	1		; Temporary storage for LIB$GET_VM_PAGE
NXTIDX:	.LONG	0		; Index of next area pointer to use in AREAS
;
;	FREEPG and NOMORE must occur together in that order
;
FREEPG:	.LONG	0		; Address of next free row page
NOMORE:	.LONG	0		; Address of location after end of area
;
FMASK:	.BLKL	1		; Bit field mask value
END_COMMON=.
;
;	Executable code PSECT
;
	.PSECT	QLZW_CODE	PIC,CON,REL,LCL,SHR,EXE,RD,NOWRT,QUAD
;
	.ENTRY	QLZW_CMP,^M<R2,R3,R4,R5,R6,R7,R8,R9,R10,R11>
;
	MOVQ	#9,R2		; Initial field size is 9 bits, R3=FPOS is zero
	MOVZWL	#^X1FF,FMASK	; Initial field mask is nine bits
	CLRL	NXTIDX		; Clear next area pointer
	CLRQ	FREEPG		; Clear free row page pointers
	MOVL	4(AP),R6	; R6 = input pointer
	MOVL	@8(AP),R7	; R7 = input bytes left
	MOVL	12(AP),R8	; R8 = output pointer
	ASHL	#3,@20(AP),R9	; R9 = length of output buffer in bits
;
;	Clear the reference locations and corresponding row addrs, if were used
;
	MOVZBL	#255,R5		; Initialize max code register
	SUBL3	R5,MAXCOD,R0	; Get index minus single-character high code
	BLEQ	19$		; If none longer than single, skip loop
	MOVAL	REFLOC,R1	; Pointer
1$:	CLRW	@(R1)+		; Clear reference location, advance pointer
	SOBGTR	R0,1$		; Loop for all of them
19$:	MOVL	TBLIDX,R0	; Get count of TBLLOC entries used
	BLEQ	29$		; If none, skip it
	MOVAL	TBLLOC,R1	; Pointer
2$:	CLRL	@(R1)+		; Clear one of them, advance pointer
	SOBGTR	R0,2$		; Loop for all of them
29$:	CLRL	R4		; Clear TBLLOC index register
;
;	(1) Initialize string table with single characters (pretend to do it)
;	(2) Read the first character into W (the prefix code register R10)
;
	MOVZBL	(R6)+,R10	; Get first code for prefix
;
;	(3) Read next input character K (into R11)
;	(4) If at end of file, output code(W) and exit
;
3$:	DECL	R7		; See if any left
	BGTR	4$		; Yes, some left; go get it
	INSV	R10,R3,R2,(R8)	; Output code(W): insert bit field
	ADDL	R2,R3		; Advance bit pointer by field width
	MOVL	#1,R0		; Assume successful compression
	CMPL	R3,R9		; Reached/exceeded output length?
	BLSS	37$		; No, return success
	INCL	R0		; Else set status to 2 (indicating no compress)
;
;	Finish up and return
;
37$:	MOVQ	R4,TBLIDX	; Store TBLLOC index and max code used
	ADDL	#7,R3		; Round up to next whole byte
	ASHL	#-3,R3,@16(AP)	; Compute and store length of output
	RET			; Return
39$:	MOVL	#^X2C,R0	; Indicate failure
	BRB	37$		; Return
;
;	Get next character (K) from input
;
4$:	MOVZBL	(R6)+,R11	; Get next character
;
;	(5a) Is W+K in string table?
;
;		See if W has a row table; if not, W+K can't be in table
;
	MOVL	ROWADR[R10],R0	; Row address nonzero?
	BEQL	6$		; Nope, no row there
;
;		W has a row table; check word K for nonzero
;
	MOVZWL	(R0)[R11],R1	; Get code for W+K
	BEQL	7$		; Zero there, no code for W+K
;
;	(5b) If W+K is in string table, set W to W+K and go to (3)
;
	MOVL	R1,R10		; W set to W+K's code
	BRB	3$		; Loop
;
;	(6a) W has no row table, so allocate it one and fill in table entries
;
6$:	MOVQ	FREEPG,R0	; Get FREEPG into R0, NOMORE into R1
	CMPL	R0,R1		; FREEPG < NOMORE ?
	BLSS	63$		; Yes, allocate a page directly
	MOVL	NXTIDX,R1	; Get next index
	MOVL	AREAS[R1],R0	; Get area address
	BNEQ	62$		; If nonzero, already allocated one
	CALLG	ARGLIST,G^LIB$GET_VM_PAGE	; Get contiguous area of pages
	BLBC	R0,39$		; Abort on error
	MOVQ	WHERE,R0	; Get address, next index
	MOVL	R0,AREAS[R1]	; Store area address
62$:	INCL	NXTIDX		; Advance index
	MOVAB	AREAPAGES*512(R0),NOMORE	; Store limit address
63$:	ADDL3	#512,R0,FREEPG	; Set new FREEPG value
	MOVAL	ROWADR[R10],R1	; Get address of vector entry for row table
	MOVL	R0,(R1)		; Store row table address into vector
	MOVL	R1,TBLLOC[R4]	; Store address into TBLLOC slot
	INCL	R4		; Increment TBLLOC index
;
;	(6b) W has a row table (address is in R0), so use it;
;		W+K is not in table; output code(W),
;		put W+K into string table, set W to K and go to (3)
;
7$:	INSV	R10,R3,R2,(R8)	; Output code(W): insert bit field
	ADDL	R2,R3		; Advance bit pointer by field width
	CMPL	R3,R9		; Reached/exceeded output length?
	BLSS	73$		; No, proceed
	MOVL	#2,R0		; Yes, don't go any further; no compression
	BRW	37$		; Get out now
73$:	INCL	R5		; Advance max code to get next one
	CMPL	R5,FMASK	; Is max code now larger than max field?
	BLEQ	77$		; No, don't update FLDSIZ
	ADDL	R5,FMASK	; Else advance FMASK
	INCL	R2		; And advance field size
77$:	MOVAW	(R0)[R11],R1	; Get address of code word
	MOVW	R5,(R1)		; Put the code number into that word
	MOVL	R1,ZRFLOC[R5]	; Put address of that word into REFLOC
	MOVL	R11,R10		; Set W to K
	BRW	3$		; Go to step 3
;
	.PSECT	QLZW_COMMON_DATA	PIC,OVR,REL,GBL,SHR,NOEXE,RD,WRT,PAGE
;
;	Redefine beginning parts of common area for decompression routine
;
.=BEGIN_COMMON
PREFIX:	.BLKW	36864		; Prefix part of codes 256-37120
VALLEN:	.BLKW	36864		; Length of final resulting string for code
SUFFIX:	.BLKB	36864		; Suffix character for code
.=END_COMMON
;
	.PSECT	QLZW_CODE	PIC,CON,REL,LCL,SHR,EXE,RD,NOWRT,QUAD
;
	.ALIGN	QUAD
	.ENTRY	QLZW_DCM,^M<R2,R3,R4,R5,R6,R7,R8,R9,R10,R11>
;
;	First, clear the reference locations and corresponding row addrs, if 
;	were used by last QLZW_CMP call
;
	MOVZBL	#255,R5		; Initialize max code register
	SUBL3	R5,MAXCOD,R0	; Get index minus single-character high code
	BLEQ	111$		; If none longer than single, skip loop
	MOVAL	REFLOC,R1	; Pointer
110$:	CLRW	@(R1)+		; Clear reference location, advance pointer
	SOBGTR	R0,110$		; Loop for all of them
	CLRL	MAXCOD		; Zap it so we don't do this again!
111$:	MOVL	TBLIDX,R0	; Get count of TBLLOC entries used
	BLEQ	113$		; If none, skip it
	MOVAL	TBLLOC,R1	; Pointer
112$:	CLRL	@(R1)+		; Clear one of them, advance pointer
	SOBGTR	R0,112$		; Loop for all of them
	CLRL	TBLIDX		; Zap this one too
113$:	MOVL	4(AP),R6	; Get input buffer pointer
	ASHL	#3,@8(AP),R7	; Get length of input buffer in bits
	SUBL	#7,R7		; Knock down seven bits in case not full byte
	MOVL	12(AP),R8	; Get output buffer pointer
	ADDL3	@20(AP),R8,R9	; Compute address of first byte after output
	MOVQ	#9,R2		; Initial field size is 9 bits, R3=FPOS is zero
	MOVZWL	#^X1FF,FMASK	; Initial field mask is nine bits
;
;	(1a) Read first input code into CODE (R10) and OLDCODE (R11)
;
	EXTZV	R3,R2,(R6),R10	; Extract bit field
	MOVL	R10,R11		; Copy it to OLDCODE
	ADDL	R2,R3		; Advance bit pointer by field width
;
;	(1b) Output K=uncode(CODE) and set FINCHAR (R4) to K
;
	CMPL	R8,R9		; Exceeding output buffer size?
	BEQL	555$		; Yes, quit with error
	MOVB	R10,(R8)+	; Put byte into buffer
	MOVL	R10,R4		; Set FINCHAR
;
;	(2) Read next code to CODE; set INCODE to CODE; exit if EOF
;
22$:	CMPL	R7,R3		; Done?
	BGTR	3$		; No, proceed
	MOVL	#1,R0		; Successful completion
	SUBL3	12(AP),R8,R9	; Compute size of output
29$:	MOVL	R9,@16(AP)	; Store output length
	RET			; Return!
;
;	See if we need to increase bit field size first
;
3$:	CMPL	R5,FMASK	; Current highest code equal to max field?
	BNEQ	33$		; No, skip
	ADDL	R5,FMASK	; Double FMASK
	INCL	FMASK		; Add one more
	INCL	R2		; Advance field size
33$:	EXTZV	R3,R2,(R6),R10	; Extract bit field
	MOVL	R10,R1		; Copy it to INCODE
	ADDL	R2,R3		; Advance bit pointer by field width
;
;	(3) If code is not in string table (special case), then:
;		output (FINCHAR); CODE = OLDCODE;
;
	CMPL	R10,R5		; Compare CODE to maximum known code
	BLEQ	7$		; If less or equal, it's in table
	MOVL	R11,R10		; Copy OLDCODE to CODE
	CMPL	#255,R10	; Is it a single character?
	BLSS	6$		; No, do full code
	MOVL	#1,R0		; Else length of prefix is 1
	MOVB	R4,1(R8)	; Store FINCHAR
	PUSHL	#2		; Push actual length
	BRB	8$		; Jump into loop
;
;	This code has to go somewhere -- this is a convenient spot for BRB's
;
555$:	MOVL	#^X2C,R0	; Report abort
	CLRL	R9		; No data
	BRB	29$		;
6$:	MOVZWL	VALLEN-512[R10],R0	; Get length of result - 1
	MOVB	R4,(R8)[R0]	; Store FINCHAR
	ADDL3	#1,R0,-(SP)	; Push actual length
	BRB	8$		; Jump into loop
;
;	(4a) See if CODE has a prefix string (i.e. >256) or not
;
7$:	CMPL	#255,R10	; Single character or not?
	BGEQ	10$		; Single character
;
;	(4b) CODE has a prefix string; put its last character at length
;		position, change code to prefix code, and repeat until
;		prefix <256
;
	MOVZWL	VALLEN-512[R10],R0	; Get length of result
	PUSHL	R0			; Save the length
	ADDL	R8,R0			; Figure out how far we're going
	CMPL	R0,R9			; Too far?
	BGTR	555$			; Yes, quit with error
	MOVL	(SP),R0			; Restore length
8$:	DECL	R0			; Back it off one
	BEQL	9$			; When down to single character, done
	MOVB	SUFFIX-256[R10],(R8)[R0]; Store into buffer
	MOVZWL	PREFIX-512[R10],R10	; Get prefix code
	BRB	8$			; Loop
9$:	MOVB	R10,(R8)		; Store final character
	MOVL	(SP)+,R0		; Restore length
	ADDL	R0,R8			; Advance output pointer
	MOVL	R10,R4			; Set FINCHAR to final character
	BRB	11$			; Skip single-character case
;
;	(5) Output single character, and (6) empty stack (not needed here!)
;
10$:	CMPL	R8,R9		; Exceeding output buffer size?
	BEQL	555$		; Yes, quit with error
	MOVB	R10,(R8)+	; Store character
	MOVL	R10,R4		; Set FINCHAR to CODE
;
;	(7) Add (OLDCODE,K) to string table, set OLDCODE = INCODE, goto (2)
;
11$:	INCL	R5			; Advance to next code
	MOVW	R11,PREFIX-512[R5]	; Store prefix (OLDCODE)
	MOVB	R10,SUFFIX-256[R5]	; Store suffix
	CMPL	R11,#255		; Was OLDCODE single character?
	BGTR	12$			; No, get its length
	MOVL	#1,R0			; Length of single character is 1
	BRB	13$			; Merge
12$:	MOVZWL	VALLEN-512[R11],R0	; Get OLDCODE length
13$:	ADDW3	#1,R0,VALLEN-512[R5]	; Store length of new code
	MOVL	R1,R11			; Copy INCODE to OLDCODE
	BRW	22$			; Go to (2)
;
	.END
