samtools · Jan 27, 2023
diff --git a/‎CRAMcodecs.tex
+285-219 b/‎CRAMcodecs.tex
+285-219
@@ -134,6 +134,11 @@ \subsection{Pseudocode introduction}
 have many models.  Here we use an object oriented way of describing
 the problem with $instance$.\textsc{Function} notation.
 
+Note some functions may return multiple items, such as
+\texttt{return (}\textit{value, length}\texttt{)}, but the calling
+code may assign a single variable to this result.  In this case the first
+value \textit{value} will be used and \textit{length} will be discarded.
+
 \subsection{Mathematical operators}
 
 \begin{tabular}{rl}
@@ -154,9 +159,9 @@ \subsection{Mathematical operators}
 $a \bitor b$  & Bit-wise OR operator, joining values $a$, $b$\\
 $a \logor b$  & Logical OR operator, joining expressions $a$, $b$\\
 $a \logand b$ & Logical AND operator, joining expressions $a$, $b$\\
-$a \concat b$ & String concatenation of $a$ and $b$: $ab$.\\
-$V_i$         & Element $i$ of vector $V$.\\
-              & The entire vector $V$ may be passed into a function.\\
+$a \concat b$ & String concatenation of $a$ and $b$: $ab$\\
+$V_i$         & Element $i$ of vector $V$\\
+              & The entire vector $V$ may be passed into a function\\
 $W_{i,j}$     & Element $i,j$ of two-dimensional vector $W$.\\
               & The entire vector $W$ or a one dimensional slice $W_i$ (of size $j$) may be passed into a function.\\
 \hline
@@ -199,10 +204,12 @@ \subsection{Implicit functions}
 
 \subsection{Other basic functions}
 
+7-bit integer encoding stores values 7-bits at a time with the top bit
+set if further bytes are required.
+
 \begin{algorithmic}[1]
 \Statex
-\Statex \textit{Read a variable sized unsigned integer 7-bits at a time.}
-\Statex \textit{Returns the value and number of bytes read, but caller is permitted to only use value in assignments.}
+\Statex (Read a variable sized unsigned integer 7-bits at a time.  Returns the value.)
 \Function{ReadUint7}{$source$} \Comment{If $source$ is unspecified then it is the default input stream}
   \State $value \gets 0$
   \State $length \gets 0$
@@ -211,18 +218,51 @@ \subsection{Other basic functions}
     \State $value \gets (value \shiftl 7) + (c \bitand 127)$
     \State $length \gets length + 1$
   \Until{$c < 128$}
-  \State \Return ($value$, $length$) \Comment{or just $value$ if caller uses only that}
+  \State \Return $value$
+  \EndFunction
+\end{algorithmic}
+
+ITF8 integer encoding stores the additional number of bytes needed in
+the count of the top bits set in the initial byte (ending with a zero
+bit), followed by any subsequent whole bytes.  See the main CRAM
+specification for more details.
+
+\begin{algorithmic}[1]
+\Statex
+\Statex (Read a variable sized unsigned integer with ITF8 encoding.  Returns the value.)
+\Function{ReadITF8}{$source$} \Comment{If $source$ is unspecified then it is the default input stream}
+  \State $v \gets$ \Call{ReadUint8}{}
+  \If{$i >= \mathtt{0xf0}$}\Comment{1111xxxx => +4 bytes}
+    \State $v \gets (v\ \bitand \mathtt{0x0f}) \shiftl 28$
+    \State $v \gets v + ($ \Call{ReadUint8}{} $\shiftl 20)$
+    \State $v \gets v + ($ \Call{ReadUint8}{} $\shiftl 12)$
+    \State $v \gets v + ($ \Call{ReadUint8}{} $\shiftl  4)$
+    \State $v \gets v + ($ \Call{ReadUint8}{} $\shiftr  4)$
+  \ElsIf{$i >= \mathtt{0xe0}$}\Comment{1110xxxx => +3 bytes}
+    \State $v \gets (v\ \bitand \mathtt{0x0f}) \shiftl 24$
+    \State $v \gets v + ($ \Call{ReadUint8}{} $\shiftl 16)$
+    \State $v \gets v + ($ \Call{ReadUint8}{} $\shiftl 8)$
+    \State $v \gets v + $ \Call{ReadUint8}{}
+  \ElsIf{$i >= \mathtt{0xc0}$}\Comment{110xxxxx => +2 bytes}
+    \State $v \gets (v\ \bitand \mathtt{0x1f}) \shiftl 16$
+    \State $v \gets v + ($ \Call{ReadUint8}{} $\shiftl 8)$
+    \State $v \gets v + $ \Call{ReadUint8}{}
+  \ElsIf{$i >= \mathtt{0x80}$}\Comment{10xxxxxx => +1 bytes}
+    \State $v \gets (v\ \bitand \mathtt{0x3f}) \shiftl 8$
+    \State $v \gets v + $ \Call{ReadUint8}{}
+  \EndIf
+  \State \Return $v$
   \EndFunction
 \end{algorithmic}
 
-\section{rANS 4x8 - Asymmetric Numeral System}
+\section{rANS 4x8 - Asymmetric Numeral Systems}
 
 % Lifted over from CRAMv3.tex
 
 This is the rANS format first defined in CRAM v3.0.
 
 rANS is the range-coder variant of the Asymmetric Numerical
-System\footnote{J. Duda, \textit{Asymmetric numeral systems: entropy
+Systems\footnote{J. Duda, \textit{Asymmetric numeral systems: entropy
     coding combining speed of Huffman coding with compression rate of
     arithmetic coding}, \url{http://arxiv.org/abs/1311.2540}}.
 
@@ -257,9 +297,9 @@ \subsubsection*{\textbf{rANS 4x8 compressed data structure}}
 \hline
 byte & order & the order of the codec, either 0 or 1\tabularnewline
 \hline
-int & compressed size & the size in bytes of frequency table and compressed blob\tabularnewline
+uint32 & compressed size & the size in bytes of frequency table and compressed blob\tabularnewline
 \hline
-int & data size & raw or uncompressed data size in bytes\tabularnewline
+uint32 & data size & raw or uncompressed data size in bytes\tabularnewline
 \hline
 byte[] & frequency table & byte frequencies of input data written using RLE\tabularnewline
 \hline
@@ -269,12 +309,20 @@ \subsubsection*{\textbf{rANS 4x8 compressed data structure}}
 
 \subsection{\textbf{Frequency table}}
 
-The alphabet used here is simply byte values, so a maximum of 256
-symbols as some values may not be present.
+The alphabet used here has a maximum of 256 possible symbols (all byte
+values), but alphabets where fewer symbols are permitted too.
 
 The symbol frequency table indicates which symbols are present and
 what their relative frequencies are.  The total sum of symbol
-frequencies are normalised to add up to 4095.
+frequencies are normalised to add up to 4095\footnote{While the maths
+  work fine up to 4096, for historical reasons this has always been
+  documented as having a limit of 4095.  Implementations may wish to
+  validate decoding on $<= 4096$, but we recommend they use a limit of
+  4095 in their encoding output.}.  Given rounding differences when
+renormalising to a fixed sum, it is up to the encoder to decide how to
+distribute any remainder or remove excess frequencies.  The normalised
+frequency tables below are examples and not prescriptive of a specific
+normalisation strategy.
 
 Formally, this is an ordered alphabet $\mathbb{A}$ containing symbols $s$ where
 $s_{i}$ with the $i$-th symbol in $\mathbb{A}$, occurring with the frequency $freq_{i}$.
@@ -431,9 +479,9 @@ \subsubsection*{Order-1 encoding}
 used for each rANS state, which will not be used as a context.  In
 extreme cases this may even be the only time that symbols occurs
 anywhere.  While these scenarios represent unnecessary data to store,
-their presence does not invalidate the data format and it may be
-simpler to use a more naive algorithm when producing the frequency
-tables.
+and these frequency entries can be safely omitted, their presence does
+not invalidate the data format and it may be simpler to use a more
+naive algorithm when producing the frequency tables.
 
 
 The above tables are encoded as:
@@ -468,10 +516,9 @@ \subsubsection*{Order-1 encoding}
 0x00                 # end of contexts
 \end{verbatim}
 
-\newpage
 \subsection{rANS entropy encoding}
 
-The encoder takes a symbol $s$ and a current state $x$ (initially zero) to
+The encoder takes a symbol $s$ and a current state $x$ (initially $L$ below) to
 produce a new state $x'$ with function $C$.
 
 {
@@ -543,7 +590,7 @@ \subsection{rANS entropy encoding}
 }
 
 
-The $x' = C(s,x)$ function used for the ${i}-th symbol ${s} is:
+The $x' = C(s,x)$ function used for the $i$-th symbol $s$ is:
 
 {
 \setlength{\parindent}{1cm}
@@ -568,8 +615,8 @@ \subsection{rANS entropy encoding}
 at a time (encoded and decoded in reverse order).
 
 Before every encode $C(s,x)$ we renormalise $x$, shifting out the bottom 8
-bits of $x$ until $x < \mathtt{0x80000} \times freq_i$.  After finishing encoding we
-flush 4 more bytes (lowest 8-bits first) from $x$.
+bits of $x$ until $x < \mathtt{0x80000} \times freq_i$.  After
+finishing all encoding we flush 4 more bytes (lowest 8-bits first) from $x$.
 
 After every decoded $D(x')$ we renormalise $x'$, shifting in the bottom 8
 bits until $x \geq \mathtt{0x800000}$.
@@ -585,16 +632,16 @@ \subsubsection*{Interleaving}
 (so the output bytes get interleaved).
 
 For the Order-1 codec we cannot do this as we need to know the
-previous byte value as the context for the next byte.  Therefore split
+previous byte value as the context for the next byte.  We therefore split
 the input data into 4 approximately equal sized
 fragments\footnote{This was why the `\textbackslash0' $\to$ `a'
   context in the example above had a frequency of 4 instead of 1.}
 starting at $0$, $\lfloor{}len/4\rfloor{}$,
 $\lfloor{}len/4\rfloor{}\times2$ and $\lfloor{}len/4\rfloor{}\times 3$.  Each
 Order-1 codec operates in a cyclic fashion as with Order-0, all
-starting with 0 as their state and sharing the same output buffer. Any
-remainder, when the input buffer is not divisible by 4, is processed at
-the end by the 4th rANS state.
+starting with 0 as their state and sharing the same compressed output
+buffer. Any remainder, when the input buffer is not divisible by 4, is
+processed at the end by the 4th rANS state.
 
 We do not permit Order-1 encoding of data streams smaller than 4
 bytes.
@@ -603,7 +650,7 @@ \subsection{rANS decode pseudocode}
 
 A na\"ive implementation of a rANS decoder follows.
 This pseudocode is for clarity only and is not expected to be performant and we would normally rewrite this to use lookup tables for maximum efficiency.
-The function \textsc{ReadUint8} below is undefined, but is expected to fetch the next single unsigned byte from an unspecified input source.  Similarly for \textsc{ReadITF8} (variable size inetger) and \textsc{ReadUint32} (32-bit unsigned integer in little endian format).
+The function \textsc{ReadUint8} fetches the next single unsigned byte from an unspecified input source.  Similarly for \textsc{ReadITF8} (variable size integer) and \textsc{ReadUint32} (32-bit unsigned integer in little endian format).
 
 \vskip 0.5cm
 
@@ -630,16 +677,15 @@ \subsubsection*{rANS order-0}
 \vskip 0.5cm
 
 \begin{algorithmic}[1]
-\Statex (Reads a table of Order-0 symbol frequencies $F_i$
+\Statex (Reads a table of Order-0 symbol frequencies $F_i$)
 \Statex (and sets the cumulative frequency table $C_{i+1} = C_i+F_i$)
-\Procedure{ReadFrequencies0}{$F, C$}
+\Procedure{ReadFrequencies0}{$F,\ C$}
 \State $s \gets$ \Call{ReadUint8}{}\Comment{Next alphabet symbol}
 \State $last\_sym \gets s$
 \State $rle \gets 0$
 \Repeat
-  \State $f \gets$ \Call{ReadITF8}{}
   \settowidth{\maxwidth}{$C_s$}
-  \State \algalign{F_s}{\gets} $f$
+  \State \algalign{F_s}{\gets} \Call{ReadITF8}{}
   \If{$rle > 0$}
     \settowidth{\maxwidth}{rle\ }
     \State \algalign{rle}{\gets} $rle-1$
@@ -690,7 +736,7 @@ \subsubsection*{rANS order-0}
 \Statex
 \Procedure{RansDecode0}{$output$, $nbytes$}
   \State \Call{ReadFrequencies0}{$F, C$}
-  \For{$j\gets 0 \algorithmicto 3$}\Comment{4 interleaved streams}
+  \For{$j\gets 0 \algorithmicto 3$}\Comment{Initialise the 4 interleaved streams}
     \State $R_j \gets$ \Call{ReadUint32}{}\Comment{Unsigned 32-bit little endian}
   \EndFor
   \For{$i\gets 0 \algorithmicto nbytes-1$}
@@ -707,7 +753,7 @@ \subsubsection*{rANS order-0}
 \subsubsection*{rANS order-1}
 
 As described above, the decode logic is very similar to rANS Order-0 except we have a two dimensional array of frequencies to read and the decode uses the last character as the context for decoding the next one.
-In the pseudocode we demonstrate this by using two dimensional vectors $C_{i,j}$ and $F_{i,j}$.
+In the pseudocode we illustrate this by using two dimensional vectors $C_{i,j}$ and $F_{i,j}$.
 For simplicity, we reuse the Order-0 code by referring to $C_i$ and $F_i$ of the 2D vectors to get a single dimensional vector that operates in the same manner as the Order-0 code.
 This is not necessarily the most efficient implementation.
 
@@ -717,9 +763,9 @@ \subsubsection*{rANS order-1}
 \vskip 0.5cm
 
 \begin{algorithmic}[1]
-\Statex (Reads a table of Order-1 symbol frequencies $F_{i,j}$
+\Statex (Reads a table of Order-1 symbol frequencies $F_{i,j}$)
 \Statex (and sets the cumulative frequency table $C_{i,j+1} = C_{i,j}+F_{i,j}$)
-\Procedure{ReadFrequencies1}{$F, C$}
+\Procedure{ReadFrequencies1}{$F,\ C$}
 \State $sym \gets$ \Call{ReadUint8}{}\Comment{Next alphabet symbol}
 \State $last\_sym \gets sym$
 \State $rle \gets 0$
@@ -742,7 +788,7 @@ \subsubsection*{rANS order-1}
 \Statex
 \Procedure{RansDecode1}{$output$, $nbytes$}
   \State \Call{ReadFrequencies1}{$F, C$}
-  \For{$j\gets 0 \algorithmicto 3$}\Comment{4 interleaved streams}
+  \For{$j\gets 0 \algorithmicto 3$}\Comment{Initialise 4 interleaved streams}
     \State $R_j \gets$ \Call{ReadUint32}{}\Comment{Unsigned 32-bit little endian}
     \State $L_j \gets 0$\Comment{Last symbol}
   \EndFor
@@ -791,15 +837,59 @@ \section{rANS Nx16}
 Frequencies are now stored using uint7 format instead of ITF8.  The
 tables are also stored differently, separating the list of symbols
 present in the alphabet (those with frequency greater than zero) from
-the frequencies themselves.  The symbol list must be stored in
-ascending ASCII order, with their frequency values in the same
-ordering as their corresponding symbols.  For the Order-1 frequency
-table this list of symbols is those used in any context, thus we only
-have one alphabet recorded for all contexts.  This means in some
-contexts some (potentially many) symbols will have zero frequency.  To
-reduce the Order-1 table size an additional zero run-length encoding
-step is used.  Finally the Order-1 frequencies may optionally be
-compressed using the Order-0 rANS Nx16 codec.
+the frequencies themselves.
+
+Finally transformations may be applied to the data prior to
+compression (or after decompression).  These consist of stripe, for
+structured data where every Nth byte is sent to one of N separate
+compression streams, Run Length Encoding replacing repeated strings of
+symbols with a symbol and count, and bit-packing where reduced
+alphabets can combine multiple symbols into a byte prior to entropy
+encoding.
+
+The initial ``Order'' byte is expanded with additional bits to list
+the transformations to be applied.  The specifics of each sub-format
+are listed below, in the order they are applied.
+
+\begin{itemize}
+\item{\textbf{\textsc{Stripe}}:}
+rANS Nx16 with multi-way interleaving (see Section~\ref{sec:ransstripe}).
+
+\item{\textbf{\textsc{NoSize}}:}
+Do not store the size of the uncompressed data stream.
+This information is not required when the data stream is one of the four sub-streams in the \textsc{Stripe} format.
+
+\item{\textbf{\textsc{Cat}}:}
+If present, the order bit flag is ignored.
+
+The uncompressed data stream is the same as the compressed stream.
+This is useful for very short data where the overheads of compressing are too high.
+
+\item{\textbf{\textsc{N32}}:}
+Flag indicating whether to interleave 4 or 32 rANS states.
+
+\item{\textbf{\textsc{Order}}:}
+Bit field defining order-0 (unset) or order-1 (set) entropy encoding, as described above by the \textsc{RansDecodeNx16\_0} and \textsc{RansDecodeNx16\_1} functions.
+
+\item{\textbf{\textsc{RLE}}:}
+Bit field defining whether Run Length Encoding has been applied to the data.  If set, the reverse transorm will be applied using \textsc{DecodeRLE} after Order-0 or Order-1 uncompression (see Section~\ref{sec:ransRLE}).
+
+\item{\textbf{\textsc{Pack}}:}
+Bit field indicating the data was packed prior to compression (see Section~\ref{sec:ranspack}).  If set, unpack the bits after any RLE decoding has been applied (if required) using the \textsc{DecodePack} function.
+\end{itemize}
+
+\subsection{Frequency tables}
+
+Frequency tables in rANS Nx16 separate the list of symbols from their
+frequencies.  The symbol list must be stored in ascending ASCII order,
+with their frequency values in the same ordering as their
+corresponding symbols.  For the Order-1 frequency table this list of
+symbols is those used in any context, thus we only have one alphabet
+recorded for all contexts.  This means in some contexts some
+(potentially many) symbols will have zero frequency.  To reduce the
+Order-1 table size an additional zero run-length encoding step is
+used.  Finally the Order-1 frequencies may optionally be compressed
+using the Order-0 rANS Nx16 codec.
 
 Frequencies must always add up to a power of 2, but do not necessarily
 have to match the final power of two used in the Order-0 (4096) and
@@ -808,8 +898,6 @@ \section{rANS Nx16}
 This is required as the Order-1 frequencies may be scaled differently
 for each context.
 
-\subsection{Frequency tables}
-
 \begin{algorithmic}[1]
 \Statex (Reads a set of symbols $A$ used in our alphabet)
 \Function{ReadAlphabet}{}
@@ -837,9 +925,9 @@ \subsection{Frequency tables}
 \vskip 0.5cm
 
 \begin{algorithmic}[1]
-\Statex (Reads a table of Order-0 symbol frequencies $F_i$
+\Statex (Reads a table of Order-0 symbol frequencies $F_i$)
 \Statex (and sets the cumulative frequency table $C_{i+1} = C_i+F_i$)
-\Procedure{ReadFrequenciesNx16\_0}{$F, C$}
+\Procedure{ReadFrequenciesNx16\_0}{$F,\ C$}
 \State $F \gets (0,\ ...)$ \Comment(Set to zero for all $i \in \{0, 1,
   ..., 255\}$)
 \State $A \gets$ \Call{ReadAlphabet}{}
@@ -858,7 +946,7 @@ \subsection{Frequency tables}
 
 \begin{algorithmic}[1]
 \Statex (Normalises a table of frequencies $F_i$ to sum to a specified power of 2.)
-\Procedure{NormaliseFrequenciesNx16\_0}{$F$, $bits$}
+\Procedure{NormaliseFrequenciesNx16\_0}{$F,\ bits$}
 \State $tot \gets 0$
 \For{$i\gets 0 \algorithmicto 255$}
   \State $tot \gets tot + F_i$
@@ -900,9 +988,9 @@ \subsection{Frequency tables}
 % unspecified inputs or specified ones?
 
 \begin{algorithmic}[1]
-\Statex (Reads a table of Order-1 symbol frequencies $F_{i,j}$
+\Statex (Reads a table of Order-1 symbol frequencies $F_{i,j}$)
 \Statex (and sets the cumulative frequency table $C_{i,j+1} = C_{i,j}+F_{i,j}$)
-\Procedure{ReadFrequenciesNx16\_1}{$F, C, bits$}
+\Procedure{ReadFrequenciesNx16\_1}{$F,\ C,\ bits$}
 \State $comp \gets$ \Call{ReadUint8}{}
 \State $bits \gets comp \shiftr 4$
 \If{$(comp \logand 1) \ne 0$}
@@ -948,16 +1036,17 @@ \subsection{rANS Nx16 Order-0}
 interleave to different amounts.
 
 \begin{algorithmic}[1]
-\Function{RansGetCumulativeFreqNx16}{$R, bits$}
+\Function{RansGetCumulativeFreqNx16}{$R,\ bits$}
   \State \Return $R\ \bitand ((1 \shiftl bits) -1)$
 \EndFunction
+\Statex
 \Function{RansAdvanceStepNx16}{$R, c, f, bits$}
   \State \Return $f \times (R \shiftr bits) + (R\ \bitand ((1 \shiftl bits) -1) - c$
 \EndFunction
 \Statex
-\Function{RansRenormNx16}{$source$, $R$}
+\Function{RansRenormNx16}{$R$}
   \If{$R < (1 \shiftl 15)$}
-    \State $R \gets (R \shiftl 16) +$\ \Call{ReadUint16}{$source$}
+    \State $R \gets (R \shiftl 16) +$\ \Call{ReadUint16}{}
   \EndIf
   \State \Return $R$
 \EndFunction
@@ -993,7 +1082,7 @@ \subsection{rANS Nx16 Order-1}
 more complex too.
 
 \begin{algorithmic}[1]
-\Function{RansDecodeNx16\_1}{$len, N$}
+\Function{RansDecodeNx16\_1}{$len,\ N$}
   \State \Call{ReadFrequenciesNx16\_1}{$F$, $C$, $bits$}
   \For{$j \gets 0 \algorithmicto N-1$}
     \State $R_j \gets$ \Call{ReadUint32}{}
@@ -1028,11 +1117,11 @@ \subsection{rANS Nx16 Run Length Encoding}
 
 For symbols that occur many times in succession, we can replace them
 with a single symbol and a count.  In this specification, run lengths
-are always provided for certain symbol values (even if the run length
+are always provided for certain symbols (even if the run length
 is 1) and never for the other symbols (even if many are consecutive).
 
-The data stream is split into two: meta-data holding run-lengths
-and the run-removed data itself.
+The data stream is split into two parts: the meta-data holding
+run-lengths and the run-removed data itself.
 
 \begin{table}[h]
 \centering
@@ -1056,14 +1145,17 @@ \subsection{rANS Nx16 Run Length Encoding}
 \Statex (Reads and optionally uncompresses the blob of run-lengths and the array $L$)
 \Statex (indicating which symbols have associates run-lengths.)
 \Function{DecodeRLEMeta}{$N$}
+  \State $L \gets (0,\ ...)$ \Comment(Set to zero for all $i \in \{0, 1,
+  ..., 255\}$)
   \State $rle\_meta\_len \gets $\Call{ReadUint7}{}
   \State $len \gets $\Call{ReadUint7}{} \Comment{Length of uncompressed O0/O1 data, pre-expansion}
   \If{$rle\_meta\_len \bitand 1$}
     \State $rle\_meta \gets $\Call{ReadData}{$\lfloor{}rle\_meta\_len/2\rfloor{}$}
   \Else
     \State $comp\_meta\_len \gets $\Call{ReadUint7}{}
     \State $rle\_meta \gets $\Call{ReadData}{$comp\_meta\_len$}
-    \State $rle\_meta \gets $\Call{RansDecodeNx16\_0}{$rle\_meta\_len/2$, $source = rle\_meta$, $N$}
+    \State $rle\_meta \gets
+    $\Call{RansDecodeNx16\_0}{$rle\_meta\_len/2$, $N$, $source = rle\_meta$, $source = rle\_meta$}
     \EndIf
 
   \Statex
@@ -1085,7 +1177,7 @@ \subsection{rANS Nx16 Run Length Encoding}
 
 \begin{algorithmic}[1]
 \Statex (Expands data ($in$) using run-length metadata)
-\Function{DecodeRLE}{$in$, $L$, $metadata$, $in\_len$}
+\Function{DecodeRLE}{$in,\ L,\ metadata,\ in\_len$}
   \State $j \gets 0$
   \For{$i \gets 0 \algorithmicto in\_len - 1$}
     \State $sym \gets$ \Call{ReadUint8}{$in$}
@@ -1125,7 +1217,7 @@ \subsection{rANS Nx16 Bit Packing}
 \hline
 1      & byte   & $nsym$ & Number of distinct symbols\\
 $nsym$ & byte[] & $P$    & Symbol map \\
--?     & uint7  & $len$  & Length of packed data
+?      & uint7  & $len$  & Length of packed data
 \end{tabular}
 \end{table}
 
@@ -1157,7 +1249,7 @@ \subsection{rANS Nx16 Bit Packing}
 data as described above.
 
 \begin{algorithmic}[1]
-\Function{DecodePack}{$data$, $P$, $nsym$, $len$}
+\Function{DecodePack}{$data,\ P,\ nsym,\ len$}
   \State $j \gets 0$ \Comment{Index into $data$; $i$ is index into output}
   \If{$nsym \le 1$} \Comment{Constant value}
     \For{$i \gets 0$ to $len-1$}
@@ -1206,33 +1298,33 @@ \subsection{rANS Nx16 Bit Packing}
 \subsection{Striped rANS Nx16}
 \label{sec:ransstripe}
 
-If we have a series of 32-bit values, we can get better compression by
+If we have a series of 32-bit values, we can often get better compression by
 treating it as a series of 4 8-bit values representing the first to
 last bytes in each 32-bit word, than we can by simply processing it as
 a stream of 8-bit values.
 Each $4{th}$ byte is sent to its own stream producing 4 interleaved streams, so the $1^{st}$ stream will hold data from byte 0, 4, 8, etc while the $2^{nd}$ stream will hold data from byte 1, 5, 9, etc.
 Each of those four streams is then itself compressed using this compression format.
 
-For example an input block of small unsigned 32-bit little-endian numbers may use RLE for the first three streams as they are mostly zero, and a non-RLE Order-0 entropy encoder of the last stream.
+For example an input block of small unsigned 32-bit little-endian numbers may use RLE for the first three streams as they are mostly zero, and a non-RLE Order-0 entropy encoder for the last stream.
 
-In the general case we describe this as $X$-way interleaved streams.
+In the general case we describe this as $N$-way interleaved streams.
 We can consider this interleaving process to be equivalent to a table
-transpose of $Y$ rows by $X$ columns to $X$ rows by $Y$ columns,
-followed by compressing each $X$ row independently.
+transpose of $M$ rows by $N$ columns to $N$ rows by $M$ columns,
+followed by compressing each $N$ row independently.
 
 The byte stream consists of a 7-bit encoded uncompressed combined
-length, a byte holding the value of $X$, followed by $X$ compressed
+length, a byte holding the value of $N$, followed by $N$ compressed
 lengths also 7-bit encoded.  Finally the data sub-streams themselves,
 each a valid $cdata$ stream, follow.
 
 Normally our $cdata$ format will include the decoded size, but with
 \textsc{Stripe} we can omit this from the internal compressed sub-streams
-as given the total length we know how to compute the sub-lengths.
+(using the \textsc{NoSize} flag) as given the total length we know how to compute the sub-lengths.
 
-Reproducing the original uncompressed data involves decoding the $X$
+Reproducing the original uncompressed data involves decoding the $N$
 sub-streams and interleaving them together again (reversing the table
 transpose).  The uncompressed data length may not necessary be an exact
-multiple of $X$, in which case the latter uncompressed sub-streams may
+multiple of $N$, in which case the latter uncompressed sub-streams may
 be 1 byte shorter.
 
 As an example starting with input data $D$ we define the transposed data $T$ as:
@@ -1243,18 +1335,18 @@ \subsection{Striped rANS Nx16}
 \hspace{1cm}
 $T = [\ abcde,\ ABCD,\ \underline{A}\underline{B}\underline{C}\underline{D}\ ]$
 
-Note our example data is not a multiple of $X$ long, missing
+Note our example data is not a multiple of $N$ long, missing
 $E\underline{E}$, which gives $T$ fragments of length [5, 4, 4].
 
 If $D_i$ is the $i^{th}$ character in $D$ and $T_{j,i}$ is the
 $i^{th}$ character of the $j^{th}$ substring in $T$, transformations
 between $D$ and $T$ are defined as:
 
 \hspace{1cm}
-$T_{j,i} = D_{i X +j}$
+$T_{j,i} = D_{i N +j}$
 
 \hspace{1cm}
-$D_i = T_{(i \bmod X),\ (i \bdiv X)}$
+$D_i = T_{(i \bmod N),\ (i \bdiv N)}$
 
 
 % Example:
@@ -1329,24 +1421,24 @@ \subsection{Striped rANS Nx16}
 \vskip 0.5cm
 
 \begin{algorithmic}[1]
-\Function{RansDecodeStripe}{$len, N$}
-  \State $X \gets $\Call{ReadUint8}{}
-  \For{$j \gets 0$ to $X$} \Comment{Fetch X compressed lengths}
+\Function{RansDecodeStripe}{$len$}
+  \State $N \gets $\Call{ReadUint8}{}
+  \For{$j \gets 0$ to $N$} \Comment{Fetch N compressed lengths}
     \State $clen_j \gets $\Call{ReadUint7}{}
   \EndFor
   \Statex
-  \For{$j \gets 0$ to $X$} \Comment{Decode X streams}
-    \State $ulen_j \gets (len \bdiv X) + ((len \bmod X) > j)$
+  \For{$j \gets 0$ to $N$} \Comment{Decode N streams}
+    \State $ulen_j \gets (len \bdiv N) + ((len \bmod N) > j)$
 \Comment{$(x > y)$ expression being 1 if true, 0 if false}
     \State $T_j \gets $\Call{RansDecodeNx16}{$ulen_j$}
   \EndFor
   \Statex
 %  \For{$i \gets 0$ to $len - 1$} \Comment{Interleave}
-%    \State $out_i \gets T_{(i \bmod X),\ (i \bdiv X)}$
+%    \State $out_i \gets T_{(i \bmod N),\ (i \bdiv N)}$
 %  \EndFor
-  \For{$j \gets 0$ to $X - 1$} \Comment{Stripe}
+  \For{$j \gets 0$ to $N - 1$} \Comment{Stripe}
     \For{$i \gets 0$ to $ulen_j - 1$}
-      \State $out_{i \times X + j} \gets T_{j,i}$
+      \State $out_{i \times N + j} \gets T_{j,i}$
     \EndFor
   \EndFor
   \State \Return $out$
@@ -1387,9 +1479,9 @@ \subsection{Combined rANS Nx16 Format}
 \multicolumn{6}{|l|}{}\\[-0.3em]
 \multicolumn{6}{|l|}{\textit{If \textsc{Stripe} flag is set:} } \\
 \cline{2-5}
-& ? & uint8 & X & Number of sub-streams & \\
-& ? & uint7[] & clen[] & X copies of compressed sub-block length & \\
-& ? & uint8[] & cdata[] & X copies of Compressed data sub-block (recurse) & \\
+& 8 & uint8 & N & Number of sub-streams & \\
+& ? & uint7[] & clen[] & N copies of compressed sub-block length & \\
+& ? & uint8[] & cdata[] & N copies of Compressed data sub-block (recurse) & \\
 \cline{2-5}
 
 \multicolumn{6}{|l|}{}\\[-0.7em]
@@ -1407,7 +1499,7 @@ \subsection{Combined rANS Nx16 Format}
 \multicolumn{6}{|l|}{}\\[-0.7em]
 \multicolumn{6}{|l|}{\textit{If \textsc{RLE} flag is set (and neither \textsc{Stripe} or \textsc{Cat} flags are set):} } \\
 \cline{2-5}
-& ? & uint8[] & rle\_meta & RLE meta-data.\\
+& ? & uint8[] & rle\_meta & RLE meta-data\\
 \cline{2-5}
 
 \multicolumn{6}{|l|}{}\\[-0.7em]
@@ -1428,19 +1520,18 @@ \subsection{Combined rANS Nx16 Format}
 \hline
 \textbf{Bit AND value} & \textbf{Code} & \textbf{Description} \\
 \hline
-1 & \textsc{Order} & Order-0 or Order-1 entropy coding. \\
+1 & \textsc{Order} & Order-0 or Order-1 entropy coding\\
 2 & reserved & Reserved (for possible order-2/3)\\
 4 & \textsc{N32} & Interleave $N=32$ rANS states (else $N=4$)\\
-8 & \textsc{Stripe}\tnote{\textbf{a}} & multi-way interleaving of byte streams.\\
+8 & \textsc{Stripe}\tnote{\textbf{$*$}} & multi-way interleaving of byte streams\\
 16 & \textsc{NoSize} & original size is not recorded (for use by \textsc{Stripe})\\
 32 & \textsc{Cat} & Data is uncompressed\\
 64 & \textsc{RLE} & Run length encoding, with runs and literals encoded separately\\
-128 & \textsc{Pack} & Pack 2, 4, 8 or infinite symbols per byte.\\
+128 & \textsc{Pack} & Pack 2, 4, 8 or infinite symbols per byte\\
 \hline
 \end{tabular}
 \begin{tablenotes}
-\item{\textbf{a.}} Not to be used in conjunction with other bit-field
-  values except \textsc{NoSize}.
+\item{\footnotesize{($*$)}} \footnotesize{Not to be used in conjunction with other bit-field values except \textsc{NoSize}.}
 \end{tablenotes}
 \end{threeparttable}
 
@@ -1499,41 +1590,11 @@ \subsection{Combined rANS Nx16 Format}
   \EndFunction
 \end{algorithmic}
 
-The specifics of each sub-format are described below, in the order (minus meta-data specific shuffling) they are applied.
-
-\begin{itemize}
-\item{\textbf{\textsc{Stripe}}:}
-rANS Nx16 with multi-way interleaving (see Section~\ref{sec:ransstripe}).
-
-\item{\textbf{\textsc{NoSize}}:}
-Do not store the size of the uncompressed data stream.
-This information is not required when the data stream is one of the four sub-streams in the \textsc{Stripe} format.
-
-\item{\textbf{\textsc{Cat}}:}
-If present, the order bit flag is ignored.
-
-The uncompressed data stream is the same as the compressed stream.
-This is useful for very short data where the overheads of compressing are too high.
-
-\item{\textbf{\textsc{N32}}:}
-Flag indicating whether to interleave 4 or 32 rANS states.
-
-\item{\textbf{\textsc{Order}}:}
-Bit field defining order-0 (unset) or order-1 (set) entropy encoding, as described above by the \textsc{RansDecodeNx16\_0} and \textsc{RansDecodeNx16\_1} functions.
-
-\item{\textbf{\textsc{RLE}}:}
-Bit field defining whether Run Length Encoding has been applied to the data.  If set, the reverse transorm will be applied using \textsc{DecodeRLE} after Order-0 or Order-1 uncompression (see Section~\ref{sec:ransRLE}).
-
-\item{\textbf{\textsc{Pack}}:}
-Bit field indicating the data was packed prior to compression (see Section~\ref{sec:ranspack}).  If set, unpack the bits after any RLE decoding has been applied (if required) using the \textsc{DecodePack} function.
-
-\end{itemize}
-
 \section{Range coding}
 
 The range coder is a byte-wise arithmetic coder that operates by
 repeatedly reducing a probability range (for example 0.0 to 1.0) one
-symbol (byte) at a time with the complete compressed data can be
+symbol (byte) at a time, with the complete compressed data being
 represented by any value within the final range.
 
 This is easiest demonstrated with a worked example, so let us imagine
@@ -1563,27 +1624,25 @@ \section{Range coding}
 table footnotes below for the worked mathematics.
 
 \begin{threeparttable}[t]
-\begin{tabular}{rrrrr}
+\begin{tabular}{rrrrrrr}
 \hline
-\textbf{Range low} & \textbf{Range high} & \textbf{Symbol} & \textbf{Symbol low} & \textbf{Symbol high}\\
+\textbf{Range low} & \textbf{/ high} & \textbf{Symbol} & \textbf{Sym. low} & \textbf{/ high} & \textbf{New range low} & \textbf{New range high}\\
 \hline
-0.000 & 1.000 & c & 0.2 & 0.5\\
-0.200 & 0.500 & a & 0.8 & 1.0\\
-0.440\tnote{\textbf{a}} & 0.500\tnote{\textbf{a}} & t & 0.0 & 0.2\\
+0.000 & 1.000 & c & 0.2 & 0.5 & $0+(1-0)\times.2$ & $0+(1-0)\times.5$\\
+0.200 & 0.500 & a & 0.8 & 1.0 & $.2+(.5-.2)\times.8$& $.2+(.5-.2)\times 1$\\
+0.440 & 0.500 & t & 0.0 & 0.2 & $.44+(.5-.44)\times 0$ & $.44+(.5-.44)\times .2$\\
 0.440 & 0.452 & <end>\\
 \hline
 \end{tabular}
-\begin{tablenotes}
-\item{\textbf{a.}} Old range 0.2 to 0.5 plus symbol range 0.8 to 1.0 gives an updated range of 0.44 to 0.5:\\
- $0.2 + 0.8\times(0.5-0.2) = 0.44$\\
-$0.2 + 1.0\times(0.5-0.2) = 0.50$
-\end{tablenotes}
 \end{threeparttable}
 
 Our final range is 0.44 to 0.452 with any value in that range representing
 ``cat'', thus 0.45 would suffice.  A pictorial example of this process is below.
 
+\begin{figure}[h]
 \includegraphics[height=250pt, keepaspectratio=true]{img/range_code.png}
+\caption{A pictorial demonstration of range reduction.}
+\end{figure}
 
 Decoding is simply the reverse of this.  In the above picture we can see that 0.45 would read off `c', `a' and `t' by repeatedly comparing the symbol ranges to the current range and using those to identify the symbol and produce a new range.
 
@@ -1640,7 +1699,7 @@ \section{Range coding}
 \end{algorithmic}
 
 \begin{algorithmic}[1]
-\Procedure{RangeDecode}{$sym\_low, sym\_freq, tot\_freq$}
+\Procedure{RangeDecode}{$sym\_low,\ sym\_freq,\ tot\_freq$}
   \settowidth{\maxwidth}{range\ }
   \State \algalign{code}{\gets} $code - sym\_low \times range$
   \State \algalign{range}{\gets} $range \times sym\_freq$
@@ -1660,7 +1719,7 @@ \section{Range coding}
 The \textsc{RangeEncode} function is a straight forward reversal of the \textsc{RangeDecode}, with the exception of the special code for shifting the top byte out of the $low$ variable.
 
 \begin{algorithmic}[1]
-\Procedure{RangeEncode}{$sym\_low, sym\_freq, tot\_freq$}
+\Procedure{RangeEncode}{$sym\_low,\ sym\_freq,\ tot\_freq$}
   \settowidth{\maxwidth}{old\_low\ }
   \State \algalign{old\_low}{\gets} $low$
   \State \algalign{range}{\gets} $range \bdiv tot\_freq$
@@ -1730,7 +1789,7 @@ \section{Range coding}
 \end{algorithmic}
 
 
-\subsection{Statistical Modelling}
+\subsection{Adaptive Modelling}
 
 The probabilities passed to the range coder may be fixed for all scenarios (as we had in the ``cat'' example), or they may be adaptive and context aware.
 For example the letter `u' occurs around 3\% of time in English text, but if the previous letter was `q' it is close to 100\% and if the previous letter was `u' it is close to 0\%.
@@ -1861,10 +1920,9 @@ \subsection{RLE with Order-0 and Order-1 Encoding}
 (if $\ge 4$) and 257 for any further continuation runs.  Thus encoding
 10 `A' characters would first store symbol `A' followed by run length
 3 (with context `A'), length 3 (context 256), length 3 (context
-257), and length 1 (context 258).
+257), and length 1 (context 257).
 
-For example, if we have the string ``RRRRUNN'' we will decode
-symbol `R' run 3, symbol `U' run 0, symbol `N' run 1.
+For example, if we have the string ``ABBCCCCDDDDD'' we will record ``A''<0> ``B''<1> ``C''<3,0> and ``D''<3,1>.
 
 \begin{algorithmic}[1]
 \Function{DecodeRLE0}{$len$}
@@ -1962,7 +2020,7 @@ \subsection{RLE with Order-0 and Order-1 Encoding}
 \multicolumn{6}{|l|}{}\\[-0.3em]
 \multicolumn{6}{|l|}{\textit{If \textsc{Stripe} flag is set:} } \\
 \cline{2-5}
-& ? & uint8 & X & Number of sub-streams & \\
+& 8 & uint8 & N & Number of sub-streams & \\
 & ? & uint7[] & clen[] & N copies of compressed sub-block length & \\
 & ? & uint8[] & cdata[] & N copies of Compressed data sub-block (recurse) & \\
 \cline{2-5}
@@ -1997,20 +2055,19 @@ \subsection{RLE with Order-0 and Order-1 Encoding}
 \hline
 \textbf{Bit AND value} & \textbf{Code} & \textbf{Description} \\
 \hline
-1 & \textsc{Order}\tnote{\textbf{a}} & Order-0 or Order-1 entropy coding. \\
+1 & \textsc{Order}\tnote{\textbf{$*$}} & Order-0 or Order-1 entropy coding\\
 2 & reserved & Reserved (for possible order-2/3)\\
 4 & \textsc{Ext} & ``External'' compression via bzip2\\
-8 & \textsc{Stripe}\tnote{\textbf{b}} & N-way interleaving of byte streams.\\
-16 & \textsc{NoSize} & original size is not recorded (used by \textsc{Stripe})\\
-32 & \textsc{Cat}\tnote{\textbf{b}} & Data is uncompressed\\
-64 & \textsc{RLE}\tnote{\textbf{a}} & Run length encoding, with runs and literals encoded separately\\
-128 & \textsc{Pack} & Pack 2, 4, 8 or infinite symbols per byte.\\
+8 & \textsc{Stripe}\tnote{\textbf{\dag}} & N-way interleaving of byte streams\\
+16 & \textsc{NoSize} & Original size is not recorded (used by \textsc{Stripe})\\
+32 & \textsc{Cat}\tnote{\textbf{\dag}} & Data is uncompressed\\
+64 & \textsc{RLE}\tnote{\textbf{$*$}} & Run length encoding, with runs and literals encoded separately\\
+128 & \textsc{Pack} & Pack 2, 4, 8 or infinite symbols per byte\\
 \hline
 \end{tabular}
 \begin{tablenotes}
-\item{\textbf{a.}} Has no effect when \textsc{Ext} flag is set.
-\item{\textbf{b.}} Not to be used in conjunction with other flags
-  except \textsc{Pack} and \textsc{NoSize}.
+\item{\footnotesize{($*$)}} \footnotesize{Has no effect when \textsc{Ext} flag is set.}
+\item{\footnotesize{(\dag)}} \footnotesize{Not to be used in conjunction with other flags except \textsc{Pack} and \textsc{NoSize}.}
 \end{tablenotes}
 \end{threeparttable}
 
@@ -2031,37 +2088,37 @@ \subsection{RLE with Order-0 and Order-1 Encoding}
 \begin{algorithmic}[1]
 \Function{ArithDecode}{$len$}
   \State $flags \gets $\Call{ReadUint8}{}
-  \If{$flags \bitand$ \textsc{NoSize} $\ne 0$}
+  \If{$flags\ \bitand$ \textsc{NoSize} $\ne 0$}
     \State $len \gets$\Call{ReadUint7}{}
   \EndIf
-  \If{$flags \bitand$ \textsc{Stripe}}
-    \State $data \gets $\Call{DecodeSTRIPE}{$len$}
+  \If{$flags\ \bitand$ \textsc{Stripe}}
+    \State $data \gets $\Call{DecodeStripe}{$len$}
     \State \Return $data$
   \EndIf{}
-  \If{$flags \bitand$ \textsc{Pack}}
+  \If{$flags\ \bitand$ \textsc{Pack}}
     \State $pack\_len \gets len$
     \State $(P,\ nsym,\ len) \gets $\Call{DecodePackMeta}{}
   \EndIf
   \Statex \Comment{Entropy Decoding}
-  \If{$flags \bitand$ \textsc{Cat}}
+  \If{$flags\ \bitand$ \textsc{Cat}}
     \State $data \gets $\Call{ReadData}{$len$}
-  \ElsIf{$flags \bitand$ \textsc{Ext}}
+  \ElsIf{$flags\ \bitand$ \textsc{Ext}}
     \State $data \gets $\Call{DecodeEXT}{$len$}
-  \ElsIf{$flags \bitand$ \textsc{RLE}}
-    \If{$flags \bitand$ \textsc{Order}}
+  \ElsIf{$flags\ \bitand$ \textsc{RLE}}
+    \If{$flags\ \bitand$ \textsc{Order}}
       \State $data \gets $\Call{DecodeRLE1}{$len$}
     \Else
       \State $data \gets $\Call{DecodeRLE0}{$len$}
     \EndIf
   \Else
-    \If{$flags \bitand$ \textsc{Order}}
+    \If{$flags\ \bitand$ \textsc{Order}}
       \State $data \gets $\Call{DecodeOrder1}{$len$}
     \Else
       \State $data \gets $\Call{DecodeOrder0}{$len$}
     \EndIf
   \EndIf
   \Statex \Comment{Apply data transformations}
-  \If{$flags \bitand$ \textsc{Pack}}
+  \If{$flags\ \bitand$ \textsc{Pack}}
     \State $data \gets $\Call{DecodePack}{$data$, $P$, $nsym$, $pack\_len$}
   \EndIf
   \State \Return $data$
@@ -2086,7 +2143,7 @@ \subsection{RLE with Order-0 and Order-1 Encoding}
 their own algorithm.
 
 \begin{algorithmic}[1]
-\Function{RansDecodeStripe}{$len$}
+\Function{DecodeStripe}{$len$}
   \State $N \gets $\Call{ReadUint8}{}
   \For{$j \gets 0$ to $N$} \Comment{Fetch N compressed lengths}
     \State $clen_j \gets $\Call{ReadUint7}{}
@@ -2101,7 +2158,7 @@ \subsection{RLE with Order-0 and Order-1 Encoding}
 %  \For{$i \gets 0$ to $len - 1$} \Comment{Interleave}
 %    \State $out_i \gets T_{(i \bmod N),\ (i \bdiv N)}$
 %  \EndFor
-  \For{$j \gets 0$ to $N - 1$} \Comment{Interleave}
+  \For{$j \gets 0$ to $N - 1$} \Comment{Stripe}
     \For{$i \gets 0$ to $ulen_j - 1$}
       \State $out_{i \times N + j} \gets T_{j,i}$
     \EndFor
@@ -2161,7 +2218,7 @@ \subsection{RLE with Order-0 and Order-1 Encoding}
 \section{Name tokenisation codec}
 
 Sequence names (identifiers) typically follow a structured pattern and compression based on columns within those structures usually leads to smaller sizes.
-The sequence name (identifier) tokenisation relies heavily on the General Purpose Entropy Encoder described above.
+The sequence name (identifier) tokenisation relies heavily on the rANS Nx16 and Adaptive arithmetic coders described above.
 
 As an example, take a series of names:
 
@@ -2191,17 +2248,17 @@ \section{Name tokenisation codec}
 \hline
 \textbf{ID} & \textbf{Type} & \textbf{Value} & \textbf{Description}\\
 \hline
- 0 & TYPE    & Type    & Used to determine the type of token at a given position. \\
+ 0 & TYPE    & Type    & Used to determine the type of token at a given position \\
 \hline
- 5 & DUP     & Integer (distance) & The entire name is a duplicate of an earlier one.  Used in position 0 only.\\
- 6 & DIFF    & Integer (distance) & The entire name is differs to earlier ones.  Used in position 0 only.\\
+ 5 & DUP     & Integer (distance) & The entire name is a duplicate of an earlier one.  Used in position 0 only\\
+ 6 & DIFF    & Integer (distance) & The entire name differs to earlier ones.  Used in position 0 only\\
 \hline
  1 & STRING  & String  & A nul-terminated string of characters \\
  2 & CHAR    & Byte    & A single character \\
  7 & DIGITS  & $0 \le$ Int $< 2^{32}$ & A numerical value, not containing a leadng zero \\
  3 & DIGITS0 & $0 \le$ Int $< 2^{32}$ & A numerical value possibly starting in leading zeros \\
- 4 & DZLEN   & Int length & Length of associated DIGITS0 token.\\
- 8 & DELTA  & $0 \le$ Int $< 256$   & A numeric value being stored as the difference to the numeric value of this token on the previous name. \\
+ 4 & DZLEN   & Int length & Length of associated DIGITS0 token\\
+ 8 & DELTA  & $0 \le$ Int $< 256$   & A numeric value being stored as the difference to the numeric value of this token on the previous name \\
  9 & DELTA0 & $0 \le$ Int $< 256$ & As DELTA, but for numeric values starting with leading zeros \\
 10 & MATCH   & (none) & This token is identical type and value to the same position in the previous name (NB: not permitted for DELTA/DELTA0)\\
 11 & NOP & (none) & Does nothing\\
@@ -2238,7 +2295,7 @@ \section{Name tokenisation codec}
 These types are fetched for position 0, at the start of each new
 identifier.  The value is an integer value describing how many reads
 before this (with 1 being the immediately previous name) we are
-comparing against.  When we refer to ``previous name'' below, we
+comparing against.  When we subsequently refer to ``previous name'' below, we
 always mean the one indicated by the DIFF field and not the one
 immediately prior to the current name.
 
@@ -2427,18 +2484,18 @@ \section{Name tokenisation codec}
 \cline{3-6}
 & \multicolumn{5}{l|}{\textit{else if not duplicate}}\\
 \cline{3-6}
-& & ? & i7 & $clen$ & compressed length (7-bit encoding)\\
-& & $clen$ & $cdata$ & stream & compressed data stream\\
+& & ? & uint7 & $clen$ & compressed length\\
+& & $clen$ & uint8[] & $cdata$ & compressed data stream\\
 \hline
 \end{tabular}
 
 A few tricks are used to remove some byte streams.  In addition to the explicit marking of duplicate bytes streams, if a byte stream of token types is entirely MATCH apart from the very first value it is discarded.  It is possible to regenerate this during decode by observing the other byte streams.  For example if we have a byte stream $B_{5,DIGITS}$ but no $B_{5,TYPE}$ then we assume the contents of $B_{5,TYPE}$ consist of one DIGITS type followed by as many MATCH types as are needed.
 
-The $cdata$ stream itself is as described in the General Purpose Entropy Encoder section above, with the \textsc{ArithDecode} function.
+The $cdata$ stream itself is as described in the relevant entropy encoder section above (rANS or arithmetic coding).
 
 \begin{algorithmic}[1]
 \Statex
-\Statex \textit{(Decodes and uncompressed the serialised token byte streams)}
+\Statex \textit{(Decodes and uncompresses the serialised token byte streams)}
 \Function{DecodeTokenByteStreams}{$use\_arith$}
   \State $sz \gets 0$
   \State $t \gets -1$
@@ -2462,9 +2519,9 @@ \section{Name tokenisation codec}
       \State $clen \gets$ \Call{ReadUint7}{}
       \State $data \gets$ \Call{ReadData}{$clen$}
       \If{$use\_arith$}
-        \State $B_{t,type} \gets$ \Call{ArithDecode}{}
+        \State $B_{t,type} \gets$ \Call{ArithDecode}{$clen,\ source=data$}
       \Else
-        \State $B_{t,type} \gets$ \Call{RansDecodeNx16}{}
+        \State $B_{t,type} \gets$ \Call{RansDecodeNx16}{$clen,\ source=data$}
       \EndIf
     \EndIf
   \Until{\Call{EOF}{}}
@@ -2474,7 +2531,7 @@ \section{Name tokenisation codec}
 
 \begin{algorithmic}[1]
 \Statex
-\Statex \textit{(Decodes the $n^{th}$ name, returning $N_n$ and updating $N_n$ and $T_n$)}
+\Statex \textit{(Decodes all names, returning $N$)}
 \Function{DecodeNames}{}
   \State $ulen \gets$ \Call{ReadUint32}{}
   \State $nnames \gets$ \Call{ReadUint32}{}
@@ -2527,12 +2584,13 @@ \section{FQZComp quality codec}
 sequence is the second in a pair, and a running total of number of
 times the quality has changed in this sequence).
 
-For each quality value, the models produce probabilities for all
-possible next quality values, which are passed into an arithmetic
-entropy encoder to encode or decode the actual next quality value.
-The models are then updated based on the actual next quality in order
-to learn the statistical properties of the quality data stream.  This
-step wise update process is identical for both encoding and decoding.
+For each position along the sequence, the models produce probabilities
+for all possible next quality values, which are passed into an
+arithmetic entropy encoder to encode or decode the actual next quality
+value.  The models are then updated based on the actual next quality
+in order to learn the statistical properties of the quality data
+stream.  This step wise update process is identical for both encoding
+and decoding.
 
 The algorithm is a generalisation on the original fqzcomp program,
 described in \textit{Compression of FASTQ and SAM Format Sequencing
@@ -2552,7 +2610,7 @@ \subsection{FQZComp Models}
 
 The parameter selector model also has no context associated with it
 and encodes $max\_sel$ distinct values.  The selector value may be
-quantised further using $stab$ to reduce the selector to fewer
+quantised further using $stab$ (Selector Table) to reduce the selector to fewer
 sets of parameters.  This is useful if we wish to use the selector
 bits directly in the context using the same parameters.  The selector
 is arbitrary and may be used for distinguishing READ1 from READ2, as
@@ -2569,7 +2627,31 @@ \subsection{FQZComp Models}
 There are 4 read length models each having $max\_sym$ of 256.  Each
 model is used for the 4 successive bytes in a 32-bit length value.
 
-\begin{center}
+The entropy encoder used is shared between all models, so the bit
+streams are multiplexed together.
+
+The 16-bit quality value context is constructed by adding sub-contexts
+together consisting of previous quality values, position along the
+current record, a running count (per record) of how many times the
+quality value has differed to the previous one (delta), and an
+arbitrary stored selector value, each shifted to a defined location
+within the combined context value ($qloc$, $ploc$, $dloc$ and
+$sloc$ respectively).  The qual, pos and delta sub-contexts are
+computed from the previous data while the selector, if used, is read
+directly from the compressed data stream.  The selector may be used to
+switch parameter sets, or simply to group quality strings into
+arbitrary user-defined sub-sets.  The numeric values for each of these
+components can be passed through lookup tables ($qtab$ for quality,
+$ptab$ for positions, $dtab$ for running delta and $stab$ for turning
+the selector $s$ into a parameter index $x$).  These all convert the
+monotonically increasing range 0$\rightarrow$M to a (usually smaller)
+monotonically increasing 0$\rightarrow$N.  For example if we wish to
+use the approximate position along a 100 byte string, we may uniformly
+map 0$\rightarrow$127 to 0$\rightarrow$15 to utilise 4 bits of our
+16-bit combined context.
+
+\begin{figure}[h]
+\centering
 \begin{tikzpicture}[
   boxed/.style={rectangle, draw=black, text width=1cm},
   boxed1/.style={rectangle, draw=black},
@@ -2646,42 +2728,20 @@ \subsection{FQZComp Models}
 \node[below right,boxed1] (d3) at (d2.south west) {\footnotesize{3}};
 
 \end{tikzpicture}
-\end{center}
-
-The entropy encoder used is shared between all models, so the bit
-streams are multiplexed together.
-
-The 16-bit quality value context is constructed by adding sub-contexts
-together consisting of previous quality values, position along the
-current record, a running count (per record) of how many times the
-quality value has differed to the previous one (delta), and an
-arbitrary stored selector value, each shifted to a defined location
-within the combined context value ($qloc$, $ploc$, $dloc$ and
-$sloc$ respectively).  The qual, pos and delta sub-contexts are
-computed from the previous data while the selector, if used, is read
-directly from the compressed data stream.  The selector may be used to
-switch parameter sets, or simply to group quality strings into
-arbitrary user-defined sub-sets.  The numeric values for each of these
-components can be passed through lookup tables ($qtab$ for quality,
-$ptab$ for positions, $dtab$ for running delta and $stab$ for turning
-the selector $s$ into a parameter index $x$).  These all convert the
-monotonically increasing range 0$\rightarrow$M to a (usually smaller)
-monotonically increasing 0$\rightarrow$N.  For example if we wish to
-use the approximate position along a 100 byte string, we may uniformly
-map 0$\rightarrow$127 to 0$\rightarrow$15 to utilise 4 bits of our
-16-bit combined context.
+\caption{An example FQZComp configuration.}
+\end{figure}
 
 As some sequencing instruments produce binned qualities, e.g. 0, 10, 25,
 35, these values are squashed to incremental values from 0 to
 $max\_sym-1$ where $max\_sym$ is the maximum number of distinct
 quality values observed.  If this transform is required, the flag
 $have\_qmap$ will be set and a mapping table ($qmap$) will hold the
-original quality values.  The decoded qualities will be the smaller
+original quality values.  The encoded qualities will be the smaller
 mapped range.
 
 The quality sub-context is constructed by shifting left the previous
 quality sub-context by $qshift$ bits and adding the current quality
-after passing through the $qmap$ squashing process and if defined
+after passing through the $qmap$ transform and if defined
 through the $qtab$ lookup table.  The quality context is limited to
 $qbits$ long and is added to the combined context starting at bit
 $qloc$.  The quality sub-context is reset to zero at the start of each
@@ -2718,7 +2778,9 @@ \subsection{FQZComp Models}
 $pos$, $delta$, $prevq$, $qctx$ and $sel$ parameters referred are global and updateable.
 
 \begin{algorithmic}[1]
-\Function{FQZUpdateContext}{$params, q$}
+\Statex
+\Statex (Add quality $q$ to produce and return a new context $ctx$)
+\Function{FQZUpdateContext}{$params,\ q$}
   \State $ctx \gets params.context$ \Comment{Also the initial value}
   \State $qctx \gets (qctx \shiftl params.qshift) + qtab_q$
   \State $ctx   \gets ctx + ((qctx \bitand (2^{params.qbits}-1)) \shiftl params.qloc)$
@@ -2762,7 +2824,7 @@ \subsection{FQZComp Data Stream}
 The start of an FQZComp data stream consists of the parameters used by
 the decoder. The data layout is as follows.
 
-\begin{table}
+\begin{table}[H]
 \centering
 \begin{tabular}{|r|r|r|r|r|p{8cm}|l|l|}
 \hline
@@ -2773,7 +2835,7 @@ \subsection{FQZComp Data Stream}
 \multicolumn{3}{|r|}{8}                                & uint8          & $gflags$                       & \multicolumn{3}{p{8.8cm}|}{Global FQZcomp bit-flags. From lowest bit to highest:}\\
 \multicolumn{3}{|r|}{}                                 &                &                                & \multicolumn{3}{p{8.8cm}|}{1: $multi\_param$: indicates more than one parameter block is present.  Otherwise set $nparam = 1$} \\
 \multicolumn{3}{|r|}{}                                 &                &                                & \multicolumn{3}{p{8.8cm}|}{2: $have\_stab$: indicates the parameter selector is mapped through $stab$.  Otherwise set $stab_i = i$} \\
-\multicolumn{3}{|r|}{}                                 &                &                                & \multicolumn{3}{p{8.8cm}|}{4: $do\_rev$: $model\_revcomp$ will be used. (CRAM v3.1)} \\
+\multicolumn{3}{|r|}{}                                 &                &                                & \multicolumn{3}{p{8.8cm}|}{4: $do\_rev$: $model\_revcomp$ will be used (CRAM v3.1)} \\
 \hline
 
 \multicolumn{8}{|l|}{}\\[-0.7em]
@@ -2799,8 +2861,8 @@ \subsection{FQZComp Data Stream}
                        & \multicolumn{2}{r|}{8}        & uint8          & $pflags$                       & \multicolumn{2}{p{8.4cm}|}{Per-parameter block bit-flags. From lowest bit to highest:} & \\
                        & \multicolumn{2}{r|}{}         &                &                                & \multicolumn{2}{p{8.4cm}|}{1: Reserved} & \\
                        & \multicolumn{2}{r|}{}         &                &                                & \multicolumn{2}{p{8.4cm}|}{2: $do\_dedup$: model\_dup will be used} & \\
-                       & \multicolumn{2}{r|}{}         &                &                                & \multicolumn{2}{p{8.4cm}|}{4: $do\_len$: model\_len will be used for every record.} & \\
-                       & \multicolumn{2}{r|}{}         &                &                                & \multicolumn{2}{p{8.4cm}|}{8: $do\_sel$: model\_sel will be used.} & \\
+                       & \multicolumn{2}{r|}{}         &                &                                & \multicolumn{2}{p{8.4cm}|}{4: $do\_len$: model\_len will be used for every record} & \\
+                       & \multicolumn{2}{r|}{}         &                &                                & \multicolumn{2}{p{8.4cm}|}{8: $do\_sel$: model\_sel will be used} & \\
                        & \multicolumn{2}{r|}{}         &                &                                & \multicolumn{2}{p{8.4cm}|}{16: $have\_qmap$: indicates quality map is present} & \\
                        & \multicolumn{2}{r|}{}         &                &                                & \multicolumn{2}{p{8.4cm}|}{32: $have\_ptab$: Load $ptab$, otherwise position contexts are unused} & \\
                        & \multicolumn{2}{r|}{}         &                &                                & \multicolumn{2}{p{8.4cm}|}{64: $have\_dtab$: Load $dtab$, otherwise delta contexts are unused} & \\
@@ -2878,11 +2940,12 @@ \subsection{FQZComp Data Stream}
 % FIXME: our worked example should include actual bytes for qmap, ptab
 % and dtab too.
 
-
+\pagebreak
 \textsc{FQZDecodeParams} below describes the pseudocode for reading
 the parameter block.
 
 \begin{algorithmic}[1]
+\Statex
 \Procedure{FQZDecodeParams}{}
   \State $vers \gets $\Call{ReadUint8}{}
   \If{$vers \ne 5$}
@@ -2911,6 +2974,7 @@ \subsection{FQZComp Data Stream}
 \end{algorithmic}
 
 \begin{algorithmic}[1]
+\Statex
 \Function{FQZDecodeSingleParam}{}
   \settowidth{\maxwidth}{p.have\_qtab\ }
   \State \algalign{p.context}{\gets} \Call{ReadUint16}{}
@@ -3010,6 +3074,7 @@ \subsection{FQZComp Data Stream}
 then $A$.  The following pseudocode demonstrates this process.
 
 \begin{algorithmic}[1]
+\Statex
 \Function{ReadArray}{n}
 \State $i,j,z \gets 0$
 \State $last \gets -1$
@@ -3048,7 +3113,7 @@ \subsection{FQZComp Data Stream}
 \EndFunction
 \end{algorithmic}
 
-The main loop decodes data in the following order per read:  read
+The FQZComp main loop decodes data in the following order per read:  read
 length (if not fixed), the flag for whether this is read 2 (if
 needed), a bit flag to indicate if the quality is duplicated (if
 needed), followed by record length number of quality values using
@@ -3068,6 +3133,7 @@ \subsection{FQZComp Data Stream}
 \algnewcommand{\Label}{\State\unskip}
 
 \begin{algorithmic}[1]
+\Statex
 \Function{FQZNewRecord}{}
   \State $sel \gets 0$
   \State $x \gets 0$
@@ -3083,9 +3149,7 @@ \subsection{FQZComp Data Stream}
   \If{$param.do\_len \logor param.first\_len$} \Comment{Decode read length}
     \State $rec\_len \gets $\Call{DecodeLength}{rc}
     \State $param.last\_len \gets rec\_len$
-    \If{$param.do\_len = 0$}
-      \State $param.first\_len = 0$
-    \EndIf
+    \State $param.first\_len = 0$
   \Else
     \State $rec\_len \gets param.last\_len$
   \EndIf
@@ -3160,7 +3224,8 @@ \subsection{FQZComp Data Stream}
 Read lengths are encoded as 4 8-bit bytes, each having its own model.
 
 \begin{algorithmic}[1]
-\Function{DecodeLength}{rc}
+\Statex
+\Function{DecodeLength}{$rc$}
 \State $rec\_len \gets model\_len_0.$\Call{ModelDecode}{$rc$}
 \State $rec\_len \gets rec\_len + (model\_len_1.$\Call{ModelDecode}{$rc$}$ \shiftl 8)$
 \State $rec\_len \gets rec\_len + (model\_len_2.$\Call{ModelDecode}{$rc$}$ \shiftl 16)$
@@ -3176,6 +3241,7 @@ \subsection{FQZComp Data Stream}
 \textsc{ReverseQualities} procedure called below after decoding.
 
 \begin{algorithmic}[1]
+\Statex
 \Procedure{ReverseQualities}{$qual,\ qual\_len,\ rev,\ len$}
 \State $rec \gets 0$
 \State $i \gets 0$