001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math3.linear;
019    
020    import java.io.Serializable;
021    
022    import org.apache.commons.math3.Field;
023    import org.apache.commons.math3.FieldElement;
024    import org.apache.commons.math3.exception.NoDataException;
025    import org.apache.commons.math3.exception.DimensionMismatchException;
026    import org.apache.commons.math3.exception.NotStrictlyPositiveException;
027    import org.apache.commons.math3.exception.NullArgumentException;
028    import org.apache.commons.math3.exception.NumberIsTooSmallException;
029    import org.apache.commons.math3.exception.OutOfRangeException;
030    import org.apache.commons.math3.exception.util.LocalizedFormats;
031    import org.apache.commons.math3.util.FastMath;
032    import org.apache.commons.math3.util.MathUtils;
033    
034    /**
035     * Cache-friendly implementation of FieldMatrix using a flat arrays to store
036     * square blocks of the matrix.
037     * <p>
038     * This implementation is specially designed to be cache-friendly. Square blocks are
039     * stored as small arrays and allow efficient traversal of data both in row major direction
040     * and columns major direction, one block at a time. This greatly increases performances
041     * for algorithms that use crossed directions loops like multiplication or transposition.
042     * </p>
043     * <p>
044     * The size of square blocks is a static parameter. It may be tuned according to the cache
045     * size of the target computer processor. As a rule of thumbs, it should be the largest
046     * value that allows three blocks to be simultaneously cached (this is necessary for example
047     * for matrix multiplication). The default value is to use 36x36 blocks.
048     * </p>
049     * <p>
050     * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
051     * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
052     * blocks are flattened in row major order in single dimension arrays which are therefore
053     * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
054     * organized in row major order.
055     * </p>
056     * <p>
057     * As an example, for a block size of 36x36, a 100x60 matrix would be stored in 6 blocks.
058     * Block 0 would be a Field[1296] array holding the upper left 36x36 square, block 1 would be
059     * a Field[1296] array holding the upper center 36x36 square, block 2 would be a Field[1008]
060     * array holding the upper right 36x28 rectangle, block 3 would be a Field[864] array holding
061     * the lower left 24x36 rectangle, block 4 would be a Field[864] array holding the lower center
062     * 24x36 rectangle and block 5 would be a Field[672] array holding the lower right 24x28
063     * rectangle.
064     * </p>
065     * <p>
066     * The layout complexity overhead versus simple mapping of matrices to java
067     * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
068     * to up to 3-fold improvements for matrices of moderate to large size.
069     * </p>
070     * @param <T> the type of the field elements
071     * @version $Id: BlockFieldMatrix.java 1416643 2012-12-03 19:37:14Z tn $
072     * @since 2.0
073     */
074    public class BlockFieldMatrix<T extends FieldElement<T>> extends AbstractFieldMatrix<T> implements Serializable {
075        /** Block size. */
076        public static final int BLOCK_SIZE = 36;
077        /** Serializable version identifier. */
078        private static final long serialVersionUID = -4602336630143123183L;
079        /** Blocks of matrix entries. */
080        private final T blocks[][];
081        /** Number of rows of the matrix. */
082        private final int rows;
083        /** Number of columns of the matrix. */
084        private final int columns;
085        /** Number of block rows of the matrix. */
086        private final int blockRows;
087        /** Number of block columns of the matrix. */
088        private final int blockColumns;
089    
090        /**
091         * Create a new matrix with the supplied row and column dimensions.
092         *
093         * @param field Field to which the elements belong.
094         * @param rows Number of rows in the new matrix.
095         * @param columns Number of columns in the new matrix.
096         * @throws NotStrictlyPositiveException if row or column dimension is not
097         * positive.
098         */
099        public BlockFieldMatrix(final Field<T> field, final int rows,
100                                final int columns)
101            throws NotStrictlyPositiveException {
102            super(field, rows, columns);
103            this.rows    = rows;
104            this.columns = columns;
105    
106            // number of blocks
107            blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
108            blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
109    
110            // allocate storage blocks, taking care of smaller ones at right and bottom
111            blocks = createBlocksLayout(field, rows, columns);
112        }
113    
114        /**
115         * Create a new dense matrix copying entries from raw layout data.
116         * <p>The input array <em>must</em> already be in raw layout.</p>
117         * <p>Calling this constructor is equivalent to call:
118         * <pre>matrix = new BlockFieldMatrix<T>(getField(), rawData.length, rawData[0].length,
119         *                                   toBlocksLayout(rawData), false);</pre>
120         * </p>
121         *
122         * @param rawData Data for the new matrix, in raw layout.
123         * @throws DimensionMismatchException if the {@code blockData} shape is
124         * inconsistent with block layout.
125         * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
126         */
127        public BlockFieldMatrix(final T[][] rawData)
128            throws DimensionMismatchException {
129            this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
130        }
131    
132        /**
133         * Create a new dense matrix copying entries from block layout data.
134         * <p>The input array <em>must</em> already be in blocks layout.</p>
135         * @param rows  the number of rows in the new matrix
136         * @param columns  the number of columns in the new matrix
137         * @param blockData data for new matrix
138         * @param copyArray if true, the input array will be copied, otherwise
139         * it will be referenced
140         *
141         * @throws DimensionMismatchException if the {@code blockData} shape is
142         * inconsistent with block layout.
143         * @throws NotStrictlyPositiveException if row or column dimension is not
144         * positive.
145         * @see #createBlocksLayout(Field, int, int)
146         * @see #toBlocksLayout(FieldElement[][])
147         * @see #BlockFieldMatrix(FieldElement[][])
148         */
149        public BlockFieldMatrix(final int rows, final int columns,
150                                final T[][] blockData, final boolean copyArray)
151            throws DimensionMismatchException, NotStrictlyPositiveException {
152            super(extractField(blockData), rows, columns);
153            this.rows    = rows;
154            this.columns = columns;
155    
156            // number of blocks
157            blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
158            blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
159    
160            if (copyArray) {
161                // allocate storage blocks, taking care of smaller ones at right and bottom
162                blocks = buildArray(getField(), blockRows * blockColumns, -1);
163            } else {
164                // reference existing array
165                blocks = blockData;
166            }
167    
168            int index = 0;
169            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
170                final int iHeight = blockHeight(iBlock);
171                for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
172                    if (blockData[index].length != iHeight * blockWidth(jBlock)) {
173                        throw new DimensionMismatchException(blockData[index].length,
174                                                             iHeight * blockWidth(jBlock));
175                    }
176                    if (copyArray) {
177                        blocks[index] = blockData[index].clone();
178                    }
179                }
180            }
181        }
182    
183        /**
184         * Convert a data array from raw layout to blocks layout.
185         * <p>
186         * Raw layout is the straightforward layout where element at row i and
187         * column j is in array element <code>rawData[i][j]</code>. Blocks layout
188         * is the layout used in {@link BlockFieldMatrix} instances, where the matrix
189         * is split in square blocks (except at right and bottom side where blocks may
190         * be rectangular to fit matrix size) and each block is stored in a flattened
191         * one-dimensional array.
192         * </p>
193         * <p>
194         * This method creates an array in blocks layout from an input array in raw layout.
195         * It can be used to provide the array argument of the {@link
196         * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
197         * constructor.
198         * </p>
199         * @param <T> Type of the field elements.
200         * @param rawData Data array in raw layout.
201         * @return a new data array containing the same entries but in blocks layout
202         * @throws DimensionMismatchException if {@code rawData} is not rectangular
203         *  (not all rows have the same length).
204         * @see #createBlocksLayout(Field, int, int)
205         * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
206         */
207        public static <T extends FieldElement<T>> T[][] toBlocksLayout(final T[][] rawData)
208            throws DimensionMismatchException {
209    
210            final int rows         = rawData.length;
211            final int columns      = rawData[0].length;
212            final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
213            final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
214    
215            // safety checks
216            for (int i = 0; i < rawData.length; ++i) {
217                final int length = rawData[i].length;
218                if (length != columns) {
219                    throw new DimensionMismatchException(columns, length);
220                }
221            }
222    
223            // convert array
224            final Field<T> field = extractField(rawData);
225            final T[][] blocks = buildArray(field, blockRows * blockColumns, -1);
226            int blockIndex = 0;
227            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
228                final int pStart  = iBlock * BLOCK_SIZE;
229                final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, rows);
230                final int iHeight = pEnd - pStart;
231                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
232                    final int qStart = jBlock * BLOCK_SIZE;
233                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
234                    final int jWidth = qEnd - qStart;
235    
236                    // allocate new block
237                    final T[] block = buildArray(field, iHeight * jWidth);
238                    blocks[blockIndex] = block;
239    
240                    // copy data
241                    int index = 0;
242                    for (int p = pStart; p < pEnd; ++p) {
243                        System.arraycopy(rawData[p], qStart, block, index, jWidth);
244                        index += jWidth;
245                    }
246    
247                    ++blockIndex;
248                }
249            }
250    
251            return blocks;
252        }
253    
254        /**
255         * Create a data array in blocks layout.
256         * <p>
257         * This method can be used to create the array argument of the {@link
258         * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
259         * constructor.
260         * </p>
261         * @param <T> Type of the field elements.
262         * @param field Field to which the elements belong.
263         * @param rows Number of rows in the new matrix.
264         * @param columns Number of columns in the new matrix.
265         * @return a new data array in blocks layout.
266         * @see #toBlocksLayout(FieldElement[][])
267         * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
268         */
269        public static <T extends FieldElement<T>> T[][] createBlocksLayout(final Field<T> field,
270                                                                           final int rows, final int columns) {
271            final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
272            final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
273    
274            final T[][] blocks = buildArray(field, blockRows * blockColumns, -1);
275            int blockIndex = 0;
276            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
277                final int pStart  = iBlock * BLOCK_SIZE;
278                final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, rows);
279                final int iHeight = pEnd - pStart;
280                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
281                    final int qStart = jBlock * BLOCK_SIZE;
282                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
283                    final int jWidth = qEnd - qStart;
284                    blocks[blockIndex] = buildArray(field, iHeight * jWidth);
285                    ++blockIndex;
286                }
287            }
288    
289            return blocks;
290        }
291    
292        /** {@inheritDoc} */
293        @Override
294        public FieldMatrix<T> createMatrix(final int rowDimension,
295                                           final int columnDimension)
296            throws NotStrictlyPositiveException {
297            return new BlockFieldMatrix<T>(getField(), rowDimension,
298                                           columnDimension);
299        }
300    
301        /** {@inheritDoc} */
302        @Override
303        public FieldMatrix<T> copy() {
304    
305            // create an empty matrix
306            BlockFieldMatrix<T> copied = new BlockFieldMatrix<T>(getField(), rows, columns);
307    
308            // copy the blocks
309            for (int i = 0; i < blocks.length; ++i) {
310                System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
311            }
312    
313            return copied;
314        }
315    
316        /** {@inheritDoc} */
317        @Override
318        public FieldMatrix<T> add(final FieldMatrix<T> m)
319            throws MatrixDimensionMismatchException {
320            try {
321                return add((BlockFieldMatrix<T>) m);
322            } catch (ClassCastException cce) {
323    
324                // safety check
325                checkAdditionCompatible(m);
326    
327                final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
328    
329                // perform addition block-wise, to ensure good cache behavior
330                int blockIndex = 0;
331                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
332                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
333    
334                        // perform addition on the current block
335                        final T[] outBlock = out.blocks[blockIndex];
336                        final T[] tBlock   = blocks[blockIndex];
337                        final int      pStart   = iBlock * BLOCK_SIZE;
338                        final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, rows);
339                        final int      qStart   = jBlock * BLOCK_SIZE;
340                        final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, columns);
341                        int k = 0;
342                        for (int p = pStart; p < pEnd; ++p) {
343                            for (int q = qStart; q < qEnd; ++q) {
344                                outBlock[k] = tBlock[k].add(m.getEntry(p, q));
345                                ++k;
346                            }
347                        }
348    
349                        // go to next block
350                        ++blockIndex;
351    
352                    }
353                }
354    
355                return out;
356            }
357        }
358    
359        /**
360         * Compute the sum of {@code this} and {@code m}.
361         *
362         * @param m matrix to be added
363         * @return {@code this + m}
364         * @throws MatrixDimensionMismatchException if {@code m} is not the same
365         * size as {@code this}
366         */
367        public BlockFieldMatrix<T> add(final BlockFieldMatrix<T> m)
368            throws MatrixDimensionMismatchException {
369    
370            // safety check
371            checkAdditionCompatible(m);
372    
373            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
374    
375            // perform addition block-wise, to ensure good cache behavior
376            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
377                final T[] outBlock = out.blocks[blockIndex];
378                final T[] tBlock   = blocks[blockIndex];
379                final T[] mBlock   = m.blocks[blockIndex];
380                for (int k = 0; k < outBlock.length; ++k) {
381                    outBlock[k] = tBlock[k].add(mBlock[k]);
382                }
383            }
384    
385            return out;
386        }
387    
388        /** {@inheritDoc} */
389        @Override
390        public FieldMatrix<T> subtract(final FieldMatrix<T> m)
391            throws MatrixDimensionMismatchException {
392            try {
393                return subtract((BlockFieldMatrix<T>) m);
394            } catch (ClassCastException cce) {
395    
396                // safety check
397                checkSubtractionCompatible(m);
398    
399                final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
400    
401                // perform subtraction block-wise, to ensure good cache behavior
402                int blockIndex = 0;
403                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
404                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
405    
406                        // perform subtraction on the current block
407                        final T[] outBlock = out.blocks[blockIndex];
408                        final T[] tBlock   = blocks[blockIndex];
409                        final int      pStart   = iBlock * BLOCK_SIZE;
410                        final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, rows);
411                        final int      qStart   = jBlock * BLOCK_SIZE;
412                        final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, columns);
413                        int k = 0;
414                        for (int p = pStart; p < pEnd; ++p) {
415                            for (int q = qStart; q < qEnd; ++q) {
416                                outBlock[k] = tBlock[k].subtract(m.getEntry(p, q));
417                                ++k;
418                            }
419                        }
420    
421                        // go to next block
422                        ++blockIndex;
423    
424                    }
425                }
426    
427                return out;
428            }
429        }
430    
431        /**
432         * Compute {@code this - m}.
433         *
434         * @param m matrix to be subtracted
435         * @return {@code this - m}
436         * @throws MatrixDimensionMismatchException if {@code m} is not the same
437         * size as {@code this}
438         */
439        public BlockFieldMatrix<T> subtract(final BlockFieldMatrix<T> m) throws MatrixDimensionMismatchException {
440            // safety check
441            checkSubtractionCompatible(m);
442    
443            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
444    
445            // perform subtraction block-wise, to ensure good cache behavior
446            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
447                final T[] outBlock = out.blocks[blockIndex];
448                final T[] tBlock   = blocks[blockIndex];
449                final T[] mBlock   = m.blocks[blockIndex];
450                for (int k = 0; k < outBlock.length; ++k) {
451                    outBlock[k] = tBlock[k].subtract(mBlock[k]);
452                }
453            }
454    
455            return out;
456        }
457    
458        /** {@inheritDoc} */
459        @Override
460        public FieldMatrix<T> scalarAdd(final T d) {
461            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
462    
463            // perform subtraction block-wise, to ensure good cache behavior
464            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
465                final T[] outBlock = out.blocks[blockIndex];
466                final T[] tBlock   = blocks[blockIndex];
467                for (int k = 0; k < outBlock.length; ++k) {
468                    outBlock[k] = tBlock[k].add(d);
469                }
470            }
471    
472            return out;
473        }
474    
475        /** {@inheritDoc} */
476        @Override
477        public FieldMatrix<T> scalarMultiply(final T d) {
478    
479            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
480    
481            // perform subtraction block-wise, to ensure good cache behavior
482            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
483                final T[] outBlock = out.blocks[blockIndex];
484                final T[] tBlock   = blocks[blockIndex];
485                for (int k = 0; k < outBlock.length; ++k) {
486                    outBlock[k] = tBlock[k].multiply(d);
487                }
488            }
489    
490            return out;
491        }
492    
493        /** {@inheritDoc} */
494        @Override
495        public FieldMatrix<T> multiply(final FieldMatrix<T> m)
496            throws DimensionMismatchException {
497            try {
498                return multiply((BlockFieldMatrix<T>) m);
499            } catch (ClassCastException cce) {
500    
501                // safety check
502                checkMultiplicationCompatible(m);
503    
504                final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.getColumnDimension());
505                final T zero = getField().getZero();
506    
507                // perform multiplication block-wise, to ensure good cache behavior
508                int blockIndex = 0;
509                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
510    
511                    final int pStart = iBlock * BLOCK_SIZE;
512                    final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
513    
514                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
515    
516                        final int qStart = jBlock * BLOCK_SIZE;
517                        final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());
518    
519                        // select current block
520                        final T[] outBlock = out.blocks[blockIndex];
521    
522                        // perform multiplication on current block
523                        for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
524                            final int kWidth      = blockWidth(kBlock);
525                            final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
526                            final int rStart      = kBlock * BLOCK_SIZE;
527                            int k = 0;
528                            for (int p = pStart; p < pEnd; ++p) {
529                                final int lStart = (p - pStart) * kWidth;
530                                final int lEnd   = lStart + kWidth;
531                                for (int q = qStart; q < qEnd; ++q) {
532                                    T sum = zero;
533                                    int r = rStart;
534                                    for (int l = lStart; l < lEnd; ++l) {
535                                        sum = sum.add(tBlock[l].multiply(m.getEntry(r, q)));
536                                        ++r;
537                                    }
538                                    outBlock[k] = outBlock[k].add(sum);
539                                    ++k;
540                                }
541                            }
542                        }
543    
544                        // go to next block
545                        ++blockIndex;
546    
547                    }
548                }
549    
550                return out;
551            }
552        }
553    
554        /**
555         * Returns the result of postmultiplying {@code this} by {@code m}.
556         *
557         * @param m matrix to postmultiply by
558         * @return {@code this * m}
559         * @throws DimensionMismatchException if the matrices are not compatible.
560         */
561        public BlockFieldMatrix<T> multiply(BlockFieldMatrix<T> m)
562            throws DimensionMismatchException {
563    
564            // safety check
565            checkMultiplicationCompatible(m);
566    
567            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.columns);
568            final T zero = getField().getZero();
569    
570            // perform multiplication block-wise, to ensure good cache behavior
571            int blockIndex = 0;
572            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
573    
574                final int pStart = iBlock * BLOCK_SIZE;
575                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
576    
577                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
578                    final int jWidth = out.blockWidth(jBlock);
579                    final int jWidth2 = jWidth  + jWidth;
580                    final int jWidth3 = jWidth2 + jWidth;
581                    final int jWidth4 = jWidth3 + jWidth;
582    
583                    // select current block
584                    final T[] outBlock = out.blocks[blockIndex];
585    
586                    // perform multiplication on current block
587                    for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
588                        final int kWidth = blockWidth(kBlock);
589                        final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
590                        final T[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
591                        int k = 0;
592                        for (int p = pStart; p < pEnd; ++p) {
593                            final int lStart = (p - pStart) * kWidth;
594                            final int lEnd   = lStart + kWidth;
595                            for (int nStart = 0; nStart < jWidth; ++nStart) {
596                                T sum = zero;
597                                int l = lStart;
598                                int n = nStart;
599                                while (l < lEnd - 3) {
600                                    sum = sum.
601                                          add(tBlock[l].multiply(mBlock[n])).
602                                          add(tBlock[l + 1].multiply(mBlock[n + jWidth])).
603                                          add(tBlock[l + 2].multiply(mBlock[n + jWidth2])).
604                                          add(tBlock[l + 3].multiply(mBlock[n + jWidth3]));
605                                    l += 4;
606                                    n += jWidth4;
607                                }
608                                while (l < lEnd) {
609                                    sum = sum.add(tBlock[l++].multiply(mBlock[n]));
610                                    n += jWidth;
611                                }
612                                outBlock[k] = outBlock[k].add(sum);
613                                ++k;
614                            }
615                        }
616                    }
617    
618                    // go to next block
619                    ++blockIndex;
620                }
621            }
622    
623            return out;
624        }
625    
626        /** {@inheritDoc} */
627        @Override
628        public T[][] getData() {
629    
630            final T[][] data = buildArray(getField(), getRowDimension(), getColumnDimension());
631            final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
632    
633            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
634                final int pStart = iBlock * BLOCK_SIZE;
635                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
636                int regularPos   = 0;
637                int lastPos      = 0;
638                for (int p = pStart; p < pEnd; ++p) {
639                    final T[] dataP = data[p];
640                    int blockIndex = iBlock * blockColumns;
641                    int dataPos    = 0;
642                    for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
643                        System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
644                        dataPos += BLOCK_SIZE;
645                    }
646                    System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
647                    regularPos += BLOCK_SIZE;
648                    lastPos    += lastColumns;
649                }
650            }
651    
652            return data;
653        }
654    
655        /** {@inheritDoc} */
656        @Override
657        public FieldMatrix<T> getSubMatrix(final int startRow, final int endRow,
658                                           final int startColumn,
659                                           final int endColumn)
660            throws OutOfRangeException, NumberIsTooSmallException {
661            // safety checks
662            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
663    
664            // create the output matrix
665            final BlockFieldMatrix<T> out =
666                new BlockFieldMatrix<T>(getField(), endRow - startRow + 1, endColumn - startColumn + 1);
667    
668            // compute blocks shifts
669            final int blockStartRow    = startRow    / BLOCK_SIZE;
670            final int rowsShift        = startRow    % BLOCK_SIZE;
671            final int blockStartColumn = startColumn / BLOCK_SIZE;
672            final int columnsShift     = startColumn % BLOCK_SIZE;
673    
674            // perform extraction block-wise, to ensure good cache behavior
675            int pBlock = blockStartRow;
676            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
677                final int iHeight = out.blockHeight(iBlock);
678                int qBlock = blockStartColumn;
679                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
680                    final int jWidth = out.blockWidth(jBlock);
681    
682                    // handle one block of the output matrix
683                    final int      outIndex = iBlock * out.blockColumns + jBlock;
684                    final T[] outBlock = out.blocks[outIndex];
685                    final int      index    = pBlock * blockColumns + qBlock;
686                    final int      width    = blockWidth(qBlock);
687    
688                    final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
689                    final int widthExcess  = jWidth + columnsShift - BLOCK_SIZE;
690                    if (heightExcess > 0) {
691                        // the submatrix block spans on two blocks rows from the original matrix
692                        if (widthExcess > 0) {
693                            // the submatrix block spans on two blocks columns from the original matrix
694                            final int width2 = blockWidth(qBlock + 1);
695                            copyBlockPart(blocks[index], width,
696                                          rowsShift, BLOCK_SIZE,
697                                          columnsShift, BLOCK_SIZE,
698                                          outBlock, jWidth, 0, 0);
699                            copyBlockPart(blocks[index + 1], width2,
700                                          rowsShift, BLOCK_SIZE,
701                                          0, widthExcess,
702                                          outBlock, jWidth, 0, jWidth - widthExcess);
703                            copyBlockPart(blocks[index + blockColumns], width,
704                                          0, heightExcess,
705                                          columnsShift, BLOCK_SIZE,
706                                          outBlock, jWidth, iHeight - heightExcess, 0);
707                            copyBlockPart(blocks[index + blockColumns + 1], width2,
708                                          0, heightExcess,
709                                          0, widthExcess,
710                                          outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
711                        } else {
712                            // the submatrix block spans on one block column from the original matrix
713                            copyBlockPart(blocks[index], width,
714                                          rowsShift, BLOCK_SIZE,
715                                          columnsShift, jWidth + columnsShift,
716                                          outBlock, jWidth, 0, 0);
717                            copyBlockPart(blocks[index + blockColumns], width,
718                                          0, heightExcess,
719                                          columnsShift, jWidth + columnsShift,
720                                          outBlock, jWidth, iHeight - heightExcess, 0);
721                        }
722                    } else {
723                        // the submatrix block spans on one block row from the original matrix
724                        if (widthExcess > 0) {
725                            // the submatrix block spans on two blocks columns from the original matrix
726                            final int width2 = blockWidth(qBlock + 1);
727                            copyBlockPart(blocks[index], width,
728                                          rowsShift, iHeight + rowsShift,
729                                          columnsShift, BLOCK_SIZE,
730                                          outBlock, jWidth, 0, 0);
731                            copyBlockPart(blocks[index + 1], width2,
732                                          rowsShift, iHeight + rowsShift,
733                                          0, widthExcess,
734                                          outBlock, jWidth, 0, jWidth - widthExcess);
735                        } else {
736                            // the submatrix block spans on one block column from the original matrix
737                            copyBlockPart(blocks[index], width,
738                                          rowsShift, iHeight + rowsShift,
739                                          columnsShift, jWidth + columnsShift,
740                                          outBlock, jWidth, 0, 0);
741                        }
742                   }
743                    ++qBlock;
744                }
745                ++pBlock;
746            }
747    
748            return out;
749        }
750    
751        /**
752         * Copy a part of a block into another one
753         * <p>This method can be called only when the specified part fits in both
754         * blocks, no verification is done here.</p>
755         * @param srcBlock source block
756         * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
757         * @param srcStartRow start row in the source block
758         * @param srcEndRow end row (exclusive) in the source block
759         * @param srcStartColumn start column in the source block
760         * @param srcEndColumn end column (exclusive) in the source block
761         * @param dstBlock destination block
762         * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
763         * @param dstStartRow start row in the destination block
764         * @param dstStartColumn start column in the destination block
765         */
766        private void copyBlockPart(final T[] srcBlock, final int srcWidth,
767                                   final int srcStartRow, final int srcEndRow,
768                                   final int srcStartColumn, final int srcEndColumn,
769                                   final T[] dstBlock, final int dstWidth,
770                                   final int dstStartRow, final int dstStartColumn) {
771            final int length = srcEndColumn - srcStartColumn;
772            int srcPos = srcStartRow * srcWidth + srcStartColumn;
773            int dstPos = dstStartRow * dstWidth + dstStartColumn;
774            for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
775                System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
776                srcPos += srcWidth;
777                dstPos += dstWidth;
778            }
779        }
780    
781        /** {@inheritDoc} */
782        @Override
783        public void setSubMatrix(final T[][] subMatrix, final int row,
784                                 final int column)
785            throws DimensionMismatchException, OutOfRangeException,
786            NoDataException, NullArgumentException {
787            // safety checks
788            MathUtils.checkNotNull(subMatrix);
789            final int refLength = subMatrix[0].length;
790            if (refLength == 0) {
791                throw new NoDataException(LocalizedFormats.AT_LEAST_ONE_COLUMN);
792            }
793            final int endRow    = row + subMatrix.length - 1;
794            final int endColumn = column + refLength - 1;
795            checkSubMatrixIndex(row, endRow, column, endColumn);
796            for (final T[] subRow : subMatrix) {
797                if (subRow.length != refLength) {
798                    throw new DimensionMismatchException(refLength, subRow.length);
799                }
800            }
801    
802            // compute blocks bounds
803            final int blockStartRow    = row / BLOCK_SIZE;
804            final int blockEndRow      = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
805            final int blockStartColumn = column / BLOCK_SIZE;
806            final int blockEndColumn   = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
807    
808            // perform copy block-wise, to ensure good cache behavior
809            for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
810                final int iHeight  = blockHeight(iBlock);
811                final int firstRow = iBlock * BLOCK_SIZE;
812                final int iStart   = FastMath.max(row,    firstRow);
813                final int iEnd     = FastMath.min(endRow + 1, firstRow + iHeight);
814    
815                for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
816                    final int jWidth      = blockWidth(jBlock);
817                    final int firstColumn = jBlock * BLOCK_SIZE;
818                    final int jStart      = FastMath.max(column,    firstColumn);
819                    final int jEnd        = FastMath.min(endColumn + 1, firstColumn + jWidth);
820                    final int jLength     = jEnd - jStart;
821    
822                    // handle one block, row by row
823                    final T[] block = blocks[iBlock * blockColumns + jBlock];
824                    for (int i = iStart; i < iEnd; ++i) {
825                        System.arraycopy(subMatrix[i - row], jStart - column,
826                                         block, (i - firstRow) * jWidth + (jStart - firstColumn),
827                                         jLength);
828                    }
829    
830                }
831            }
832        }
833    
834        /** {@inheritDoc} */
835        @Override
836        public FieldMatrix<T> getRowMatrix(final int row)
837            throws OutOfRangeException {
838            checkRowIndex(row);
839            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), 1, columns);
840    
841            // perform copy block-wise, to ensure good cache behavior
842            final int iBlock  = row / BLOCK_SIZE;
843            final int iRow    = row - iBlock * BLOCK_SIZE;
844            int outBlockIndex = 0;
845            int outIndex      = 0;
846            T[] outBlock = out.blocks[outBlockIndex];
847            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
848                final int jWidth     = blockWidth(jBlock);
849                final T[] block = blocks[iBlock * blockColumns + jBlock];
850                final int available  = outBlock.length - outIndex;
851                if (jWidth > available) {
852                    System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
853                    outBlock = out.blocks[++outBlockIndex];
854                    System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
855                    outIndex = jWidth - available;
856                } else {
857                    System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
858                    outIndex += jWidth;
859                }
860            }
861    
862            return out;
863        }
864    
865        /** {@inheritDoc} */
866        @Override
867        public void setRowMatrix(final int row, final FieldMatrix<T> matrix)
868            throws MatrixDimensionMismatchException, OutOfRangeException {
869            try {
870                setRowMatrix(row, (BlockFieldMatrix<T>) matrix);
871            } catch (ClassCastException cce) {
872                super.setRowMatrix(row, matrix);
873            }
874        }
875    
876        /**
877         * Sets the entries in row number <code>row</code>
878         * as a row matrix.  Row indices start at 0.
879         *
880         * @param row the row to be set
881         * @param matrix row matrix (must have one row and the same number of columns
882         * as the instance)
883         * @throws MatrixDimensionMismatchException if the matrix dimensions do
884         * not match one instance row.
885         * @throws OutOfRangeException if the specified row index is invalid.
886         */
887        public void setRowMatrix(final int row, final BlockFieldMatrix<T> matrix)
888            throws MatrixDimensionMismatchException, OutOfRangeException {
889            checkRowIndex(row);
890            final int nCols = getColumnDimension();
891            if ((matrix.getRowDimension() != 1) ||
892                (matrix.getColumnDimension() != nCols)) {
893                throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
894                                                           matrix.getColumnDimension(),
895                                                           1, nCols);
896            }
897    
898            // perform copy block-wise, to ensure good cache behavior
899            final int iBlock = row / BLOCK_SIZE;
900            final int iRow   = row - iBlock * BLOCK_SIZE;
901            int mBlockIndex  = 0;
902            int mIndex       = 0;
903            T[] mBlock  = matrix.blocks[mBlockIndex];
904            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
905                final int jWidth     = blockWidth(jBlock);
906                final T[] block = blocks[iBlock * blockColumns + jBlock];
907                final int available  = mBlock.length - mIndex;
908                if (jWidth > available) {
909                    System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
910                    mBlock = matrix.blocks[++mBlockIndex];
911                    System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
912                    mIndex = jWidth - available;
913                } else {
914                    System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
915                    mIndex += jWidth;
916               }
917            }
918        }
919    
920        /** {@inheritDoc} */
921        @Override
922        public FieldMatrix<T> getColumnMatrix(final int column)
923            throws OutOfRangeException {
924            checkColumnIndex(column);
925            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, 1);
926    
927            // perform copy block-wise, to ensure good cache behavior
928            final int jBlock  = column / BLOCK_SIZE;
929            final int jColumn = column - jBlock * BLOCK_SIZE;
930            final int jWidth  = blockWidth(jBlock);
931            int outBlockIndex = 0;
932            int outIndex      = 0;
933            T[] outBlock = out.blocks[outBlockIndex];
934            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
935                final int iHeight = blockHeight(iBlock);
936                final T[] block = blocks[iBlock * blockColumns + jBlock];
937                for (int i = 0; i < iHeight; ++i) {
938                    if (outIndex >= outBlock.length) {
939                        outBlock = out.blocks[++outBlockIndex];
940                        outIndex = 0;
941                    }
942                    outBlock[outIndex++] = block[i * jWidth + jColumn];
943                }
944            }
945    
946            return out;
947        }
948    
949        /** {@inheritDoc} */
950        @Override
951        public void setColumnMatrix(final int column, final FieldMatrix<T> matrix)
952            throws MatrixDimensionMismatchException, OutOfRangeException {
953            try {
954                setColumnMatrix(column, (BlockFieldMatrix<T>) matrix);
955            } catch (ClassCastException cce) {
956                super.setColumnMatrix(column, matrix);
957            }
958        }
959    
960        /**
961         * Sets the entries in column number {@code column}
962         * as a column matrix.  Column indices start at 0.
963         *
964         * @param column Column to be set.
965         * @param matrix Column matrix (must have one column and the same number of rows
966         * as the instance).
967         * @throws MatrixDimensionMismatchException if the matrix dimensions do
968         * not match one instance column.
969         * @throws OutOfRangeException if the specified column index is invalid.
970         */
971        void setColumnMatrix(final int column, final BlockFieldMatrix<T> matrix)
972            throws MatrixDimensionMismatchException, OutOfRangeException {
973            checkColumnIndex(column);
974            final int nRows = getRowDimension();
975            if ((matrix.getRowDimension() != nRows) ||
976                (matrix.getColumnDimension() != 1)) {
977                throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
978                                                           matrix.getColumnDimension(),
979                                                           nRows, 1);
980            }
981    
982            // perform copy block-wise, to ensure good cache behavior
983            final int jBlock  = column / BLOCK_SIZE;
984            final int jColumn = column - jBlock * BLOCK_SIZE;
985            final int jWidth  = blockWidth(jBlock);
986            int mBlockIndex = 0;
987            int mIndex      = 0;
988            T[] mBlock = matrix.blocks[mBlockIndex];
989            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
990                final int iHeight = blockHeight(iBlock);
991                final T[] block = blocks[iBlock * blockColumns + jBlock];
992                for (int i = 0; i < iHeight; ++i) {
993                    if (mIndex >= mBlock.length) {
994                        mBlock = matrix.blocks[++mBlockIndex];
995                        mIndex = 0;
996                    }
997                    block[i * jWidth + jColumn] = mBlock[mIndex++];
998                }
999            }
1000        }
1001    
1002        /** {@inheritDoc} */
1003        @Override
1004        public FieldVector<T> getRowVector(final int row)
1005            throws OutOfRangeException {
1006            checkRowIndex(row);
1007            final T[] outData = buildArray(getField(), columns);
1008    
1009            // perform copy block-wise, to ensure good cache behavior
1010            final int iBlock  = row / BLOCK_SIZE;
1011            final int iRow    = row - iBlock * BLOCK_SIZE;
1012            int outIndex      = 0;
1013            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1014                final int jWidth     = blockWidth(jBlock);
1015                final T[] block = blocks[iBlock * blockColumns + jBlock];
1016                System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1017                outIndex += jWidth;
1018            }
1019    
1020            return new ArrayFieldVector<T>(getField(), outData, false);
1021        }
1022    
1023        /** {@inheritDoc} */
1024        @Override
1025        public void setRowVector(final int row, final FieldVector<T> vector)
1026            throws MatrixDimensionMismatchException, OutOfRangeException {
1027            try {
1028                setRow(row, ((ArrayFieldVector<T>) vector).getDataRef());
1029            } catch (ClassCastException cce) {
1030                super.setRowVector(row, vector);
1031            }
1032        }
1033    
1034        /** {@inheritDoc} */
1035        @Override
1036        public FieldVector<T> getColumnVector(final int column)
1037            throws OutOfRangeException {
1038            checkColumnIndex(column);
1039            final T[] outData = buildArray(getField(), rows);
1040    
1041            // perform copy block-wise, to ensure good cache behavior
1042            final int jBlock  = column / BLOCK_SIZE;
1043            final int jColumn = column - jBlock * BLOCK_SIZE;
1044            final int jWidth  = blockWidth(jBlock);
1045            int outIndex      = 0;
1046            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1047                final int iHeight = blockHeight(iBlock);
1048                final T[] block = blocks[iBlock * blockColumns + jBlock];
1049                for (int i = 0; i < iHeight; ++i) {
1050                    outData[outIndex++] = block[i * jWidth + jColumn];
1051                }
1052            }
1053    
1054            return new ArrayFieldVector<T>(getField(), outData, false);
1055        }
1056    
1057        /** {@inheritDoc} */
1058        @Override
1059        public void setColumnVector(final int column, final FieldVector<T> vector)
1060            throws OutOfRangeException, MatrixDimensionMismatchException {
1061            try {
1062                setColumn(column, ((ArrayFieldVector<T>) vector).getDataRef());
1063            } catch (ClassCastException cce) {
1064                super.setColumnVector(column, vector);
1065            }
1066        }
1067    
1068        /** {@inheritDoc} */
1069        @Override
1070        public T[] getRow(final int row) throws OutOfRangeException {
1071            checkRowIndex(row);
1072            final T[] out = buildArray(getField(), columns);
1073    
1074            // perform copy block-wise, to ensure good cache behavior
1075            final int iBlock  = row / BLOCK_SIZE;
1076            final int iRow    = row - iBlock * BLOCK_SIZE;
1077            int outIndex      = 0;
1078            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1079                final int jWidth     = blockWidth(jBlock);
1080                final T[] block = blocks[iBlock * blockColumns + jBlock];
1081                System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1082                outIndex += jWidth;
1083            }
1084    
1085            return out;
1086        }
1087    
1088        /** {@inheritDoc} */
1089        @Override
1090        public void setRow(final int row, final T[] array)
1091            throws OutOfRangeException, MatrixDimensionMismatchException {
1092            checkRowIndex(row);
1093            final int nCols = getColumnDimension();
1094            if (array.length != nCols) {
1095                throw new MatrixDimensionMismatchException(1, array.length, 1, nCols);
1096            }
1097    
1098            // perform copy block-wise, to ensure good cache behavior
1099            final int iBlock  = row / BLOCK_SIZE;
1100            final int iRow    = row - iBlock * BLOCK_SIZE;
1101            int outIndex      = 0;
1102            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1103                final int jWidth     = blockWidth(jBlock);
1104                final T[] block = blocks[iBlock * blockColumns + jBlock];
1105                System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1106                outIndex += jWidth;
1107            }
1108        }
1109    
1110        /** {@inheritDoc} */
1111        @Override
1112        public T[] getColumn(final int column) throws OutOfRangeException {
1113            checkColumnIndex(column);
1114            final T[] out = buildArray(getField(), rows);
1115    
1116            // perform copy block-wise, to ensure good cache behavior
1117            final int jBlock  = column / BLOCK_SIZE;
1118            final int jColumn = column - jBlock * BLOCK_SIZE;
1119            final int jWidth  = blockWidth(jBlock);
1120            int outIndex      = 0;
1121            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1122                final int iHeight = blockHeight(iBlock);
1123                final T[] block = blocks[iBlock * blockColumns + jBlock];
1124                for (int i = 0; i < iHeight; ++i) {
1125                    out[outIndex++] = block[i * jWidth + jColumn];
1126                }
1127            }
1128    
1129            return out;
1130        }
1131    
1132        /** {@inheritDoc} */
1133        @Override
1134        public void setColumn(final int column, final T[] array)
1135            throws MatrixDimensionMismatchException, OutOfRangeException {
1136            checkColumnIndex(column);
1137            final int nRows = getRowDimension();
1138            if (array.length != nRows) {
1139                throw new MatrixDimensionMismatchException(array.length, 1, nRows, 1);
1140            }
1141    
1142            // perform copy block-wise, to ensure good cache behavior
1143            final int jBlock  = column / BLOCK_SIZE;
1144            final int jColumn = column - jBlock * BLOCK_SIZE;
1145            final int jWidth  = blockWidth(jBlock);
1146            int outIndex      = 0;
1147            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1148                final int iHeight = blockHeight(iBlock);
1149                final T[] block = blocks[iBlock * blockColumns + jBlock];
1150                for (int i = 0; i < iHeight; ++i) {
1151                    block[i * jWidth + jColumn] = array[outIndex++];
1152                }
1153            }
1154        }
1155    
1156        /** {@inheritDoc} */
1157        @Override
1158        public T getEntry(final int row, final int column)
1159            throws OutOfRangeException {
1160            checkRowIndex(row);
1161            checkColumnIndex(column);
1162    
1163            final int iBlock = row    / BLOCK_SIZE;
1164            final int jBlock = column / BLOCK_SIZE;
1165            final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1166                (column - jBlock * BLOCK_SIZE);
1167    
1168            return blocks[iBlock * blockColumns + jBlock][k];
1169        }
1170    
1171        /** {@inheritDoc} */
1172        @Override
1173        public void setEntry(final int row, final int column, final T value)
1174            throws OutOfRangeException {
1175            checkRowIndex(row);
1176            checkColumnIndex(column);
1177    
1178            final int iBlock = row    / BLOCK_SIZE;
1179            final int jBlock = column / BLOCK_SIZE;
1180            final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1181                (column - jBlock * BLOCK_SIZE);
1182    
1183            blocks[iBlock * blockColumns + jBlock][k] = value;
1184        }
1185    
1186        /** {@inheritDoc} */
1187        @Override
1188        public void addToEntry(final int row, final int column, final T increment)
1189            throws OutOfRangeException {
1190            checkRowIndex(row);
1191            checkColumnIndex(column);
1192    
1193            final int iBlock = row    / BLOCK_SIZE;
1194            final int jBlock = column / BLOCK_SIZE;
1195            final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1196                (column - jBlock * BLOCK_SIZE);
1197            final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1198    
1199            blockIJ[k] = blockIJ[k].add(increment);
1200        }
1201    
1202        /** {@inheritDoc} */
1203        @Override
1204        public void multiplyEntry(final int row, final int column, final T factor)
1205            throws OutOfRangeException {
1206            checkRowIndex(row);
1207            checkColumnIndex(column);
1208    
1209            final int iBlock = row    / BLOCK_SIZE;
1210            final int jBlock = column / BLOCK_SIZE;
1211            final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1212                (column - jBlock * BLOCK_SIZE);
1213            final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1214    
1215            blockIJ[k] = blockIJ[k].multiply(factor);
1216        }
1217    
1218        /** {@inheritDoc} */
1219        @Override
1220        public FieldMatrix<T> transpose() {
1221            final int nRows = getRowDimension();
1222            final int nCols = getColumnDimension();
1223            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), nCols, nRows);
1224    
1225            // perform transpose block-wise, to ensure good cache behavior
1226            int blockIndex = 0;
1227            for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1228                for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1229    
1230                    // transpose current block
1231                    final T[] outBlock = out.blocks[blockIndex];
1232                    final T[] tBlock   = blocks[jBlock * blockColumns + iBlock];
1233                    final int      pStart   = iBlock * BLOCK_SIZE;
1234                    final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, columns);
1235                    final int      qStart   = jBlock * BLOCK_SIZE;
1236                    final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, rows);
1237                    int k = 0;
1238                    for (int p = pStart; p < pEnd; ++p) {
1239                        final int lInc = pEnd - pStart;
1240                        int l = p - pStart;
1241                        for (int q = qStart; q < qEnd; ++q) {
1242                            outBlock[k] = tBlock[l];
1243                            ++k;
1244                            l+= lInc;
1245                        }
1246                    }
1247    
1248                    // go to next block
1249                    ++blockIndex;
1250    
1251                }
1252            }
1253    
1254            return out;
1255        }
1256    
1257        /** {@inheritDoc} */
1258        @Override
1259        public int getRowDimension() {
1260            return rows;
1261        }
1262    
1263        /** {@inheritDoc} */
1264        @Override
1265        public int getColumnDimension() {
1266            return columns;
1267        }
1268    
1269        /** {@inheritDoc} */
1270        @Override
1271        public T[] operate(final T[] v) throws DimensionMismatchException {
1272            if (v.length != columns) {
1273                throw new DimensionMismatchException(v.length, columns);
1274            }
1275            final T[] out = buildArray(getField(), rows);
1276            final T zero = getField().getZero();
1277    
1278            // perform multiplication block-wise, to ensure good cache behavior
1279            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1280                final int pStart = iBlock * BLOCK_SIZE;
1281                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1282                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1283                    final T[] block  = blocks[iBlock * blockColumns + jBlock];
1284                    final int      qStart = jBlock * BLOCK_SIZE;
1285                    final int      qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1286                    int k = 0;
1287                    for (int p = pStart; p < pEnd; ++p) {
1288                        T sum = zero;
1289                        int q = qStart;
1290                        while (q < qEnd - 3) {
1291                            sum = sum.
1292                                  add(block[k].multiply(v[q])).
1293                                  add(block[k + 1].multiply(v[q + 1])).
1294                                  add(block[k + 2].multiply(v[q + 2])).
1295                                  add(block[k + 3].multiply(v[q + 3]));
1296                            k += 4;
1297                            q += 4;
1298                        }
1299                        while (q < qEnd) {
1300                            sum = sum.add(block[k++].multiply(v[q++]));
1301                        }
1302                        out[p] = out[p].add(sum);
1303                    }
1304                }
1305            }
1306    
1307            return out;
1308        }
1309    
1310        /** {@inheritDoc} */
1311        @Override
1312        public T[] preMultiply(final T[] v) throws DimensionMismatchException {
1313    
1314            if (v.length != rows) {
1315                throw new DimensionMismatchException(v.length, rows);
1316            }
1317            final T[] out = buildArray(getField(), columns);
1318            final T zero = getField().getZero();
1319    
1320            // perform multiplication block-wise, to ensure good cache behavior
1321            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1322                final int jWidth  = blockWidth(jBlock);
1323                final int jWidth2 = jWidth  + jWidth;
1324                final int jWidth3 = jWidth2 + jWidth;
1325                final int jWidth4 = jWidth3 + jWidth;
1326                final int qStart = jBlock * BLOCK_SIZE;
1327                final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1328                for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1329                    final T[] block  = blocks[iBlock * blockColumns + jBlock];
1330                    final int      pStart = iBlock * BLOCK_SIZE;
1331                    final int      pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1332                    for (int q = qStart; q < qEnd; ++q) {
1333                        int k = q - qStart;
1334                        T sum = zero;
1335                        int p = pStart;
1336                        while (p < pEnd - 3) {
1337                            sum = sum.
1338                                  add(block[k].multiply(v[p])).
1339                                  add(block[k + jWidth].multiply(v[p + 1])).
1340                                  add(block[k + jWidth2].multiply(v[p + 2])).
1341                                  add(block[k + jWidth3].multiply(v[p + 3]));
1342                            k += jWidth4;
1343                            p += 4;
1344                        }
1345                        while (p < pEnd) {
1346                            sum = sum.add(block[k].multiply(v[p++]));
1347                            k += jWidth;
1348                        }
1349                        out[q] = out[q].add(sum);
1350                    }
1351                }
1352            }
1353    
1354            return out;
1355        }
1356    
1357        /** {@inheritDoc} */
1358        @Override
1359        public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor) {
1360            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1361            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1362                final int pStart = iBlock * BLOCK_SIZE;
1363                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1364                for (int p = pStart; p < pEnd; ++p) {
1365                    for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1366                        final int jWidth = blockWidth(jBlock);
1367                        final int qStart = jBlock * BLOCK_SIZE;
1368                        final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1369                        final T[] block = blocks[iBlock * blockColumns + jBlock];
1370                        int k = (p - pStart) * jWidth;
1371                        for (int q = qStart; q < qEnd; ++q) {
1372                            block[k] = visitor.visit(p, q, block[k]);
1373                            ++k;
1374                        }
1375                    }
1376                 }
1377            }
1378            return visitor.end();
1379        }
1380    
1381        /** {@inheritDoc} */
1382        @Override
1383        public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor) {
1384            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1385            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1386                final int pStart = iBlock * BLOCK_SIZE;
1387                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1388                for (int p = pStart; p < pEnd; ++p) {
1389                    for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1390                        final int jWidth = blockWidth(jBlock);
1391                        final int qStart = jBlock * BLOCK_SIZE;
1392                        final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1393                        final T[] block = blocks[iBlock * blockColumns + jBlock];
1394                        int k = (p - pStart) * jWidth;
1395                        for (int q = qStart; q < qEnd; ++q) {
1396                            visitor.visit(p, q, block[k]);
1397                            ++k;
1398                        }
1399                    }
1400                 }
1401            }
1402            return visitor.end();
1403        }
1404    
1405        /** {@inheritDoc} */
1406        @Override
1407        public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor,
1408                                final int startRow, final int endRow,
1409                                final int startColumn, final int endColumn)
1410            throws OutOfRangeException, NumberIsTooSmallException {
1411            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1412            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1413            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1414                final int p0     = iBlock * BLOCK_SIZE;
1415                final int pStart = FastMath.max(startRow, p0);
1416                final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1417                for (int p = pStart; p < pEnd; ++p) {
1418                    for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1419                        final int jWidth = blockWidth(jBlock);
1420                        final int q0     = jBlock * BLOCK_SIZE;
1421                        final int qStart = FastMath.max(startColumn, q0);
1422                        final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1423                        final T[] block = blocks[iBlock * blockColumns + jBlock];
1424                        int k = (p - p0) * jWidth + qStart - q0;
1425                        for (int q = qStart; q < qEnd; ++q) {
1426                            block[k] = visitor.visit(p, q, block[k]);
1427                            ++k;
1428                        }
1429                    }
1430                 }
1431            }
1432            return visitor.end();
1433        }
1434    
1435        /** {@inheritDoc} */
1436        @Override
1437        public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor,
1438                                final int startRow, final int endRow,
1439                                final int startColumn, final int endColumn)
1440            throws OutOfRangeException, NumberIsTooSmallException {
1441            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1442            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1443            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1444                final int p0     = iBlock * BLOCK_SIZE;
1445                final int pStart = FastMath.max(startRow, p0);
1446                final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1447                for (int p = pStart; p < pEnd; ++p) {
1448                    for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1449                        final int jWidth = blockWidth(jBlock);
1450                        final int q0     = jBlock * BLOCK_SIZE;
1451                        final int qStart = FastMath.max(startColumn, q0);
1452                        final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1453                        final T[] block = blocks[iBlock * blockColumns + jBlock];
1454                        int k = (p - p0) * jWidth + qStart - q0;
1455                        for (int q = qStart; q < qEnd; ++q) {
1456                            visitor.visit(p, q, block[k]);
1457                            ++k;
1458                        }
1459                    }
1460                 }
1461            }
1462            return visitor.end();
1463        }
1464    
1465        /** {@inheritDoc} */
1466        @Override
1467        public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor) {
1468            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1469            int blockIndex = 0;
1470            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1471                final int pStart = iBlock * BLOCK_SIZE;
1472                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1473                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1474                    final int qStart = jBlock * BLOCK_SIZE;
1475                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1476                    final T[] block = blocks[blockIndex];
1477                    int k = 0;
1478                    for (int p = pStart; p < pEnd; ++p) {
1479                        for (int q = qStart; q < qEnd; ++q) {
1480                            block[k] = visitor.visit(p, q, block[k]);
1481                            ++k;
1482                        }
1483                    }
1484                    ++blockIndex;
1485                }
1486            }
1487            return visitor.end();
1488        }
1489    
1490        /** {@inheritDoc} */
1491        @Override
1492        public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor) {
1493            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1494            int blockIndex = 0;
1495            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1496                final int pStart = iBlock * BLOCK_SIZE;
1497                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1498                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1499                    final int qStart = jBlock * BLOCK_SIZE;
1500                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1501                    final T[] block = blocks[blockIndex];
1502                    int k = 0;
1503                    for (int p = pStart; p < pEnd; ++p) {
1504                        for (int q = qStart; q < qEnd; ++q) {
1505                            visitor.visit(p, q, block[k]);
1506                            ++k;
1507                        }
1508                    }
1509                    ++blockIndex;
1510                }
1511            }
1512            return visitor.end();
1513        }
1514    
1515        /** {@inheritDoc} */
1516        @Override
1517        public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor,
1518                                      final int startRow, final int endRow,
1519                                      final int startColumn, final int endColumn)
1520            throws OutOfRangeException, NumberIsTooSmallException {
1521            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1522            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1523            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1524                final int p0     = iBlock * BLOCK_SIZE;
1525                final int pStart = FastMath.max(startRow, p0);
1526                final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1527                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1528                    final int jWidth = blockWidth(jBlock);
1529                    final int q0     = jBlock * BLOCK_SIZE;
1530                    final int qStart = FastMath.max(startColumn, q0);
1531                    final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1532                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1533                    for (int p = pStart; p < pEnd; ++p) {
1534                        int k = (p - p0) * jWidth + qStart - q0;
1535                        for (int q = qStart; q < qEnd; ++q) {
1536                            block[k] = visitor.visit(p, q, block[k]);
1537                            ++k;
1538                        }
1539                    }
1540                }
1541            }
1542            return visitor.end();
1543        }
1544    
1545        /** {@inheritDoc} */
1546        @Override
1547        public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor,
1548                                      final int startRow, final int endRow,
1549                                      final int startColumn, final int endColumn)
1550            throws OutOfRangeException, NumberIsTooSmallException {
1551            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1552            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1553            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1554                final int p0     = iBlock * BLOCK_SIZE;
1555                final int pStart = FastMath.max(startRow, p0);
1556                final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1557                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1558                    final int jWidth = blockWidth(jBlock);
1559                    final int q0     = jBlock * BLOCK_SIZE;
1560                    final int qStart = FastMath.max(startColumn, q0);
1561                    final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1562                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1563                    for (int p = pStart; p < pEnd; ++p) {
1564                        int k = (p - p0) * jWidth + qStart - q0;
1565                        for (int q = qStart; q < qEnd; ++q) {
1566                            visitor.visit(p, q, block[k]);
1567                            ++k;
1568                        }
1569                    }
1570                }
1571            }
1572            return visitor.end();
1573        }
1574    
1575        /**
1576         * Get the height of a block.
1577         * @param blockRow row index (in block sense) of the block
1578         * @return height (number of rows) of the block
1579         */
1580        private int blockHeight(final int blockRow) {
1581            return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1582        }
1583    
1584        /**
1585         * Get the width of a block.
1586         * @param blockColumn column index (in block sense) of the block
1587         * @return width (number of columns) of the block
1588         */
1589        private int blockWidth(final int blockColumn) {
1590            return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1591        }
1592    }