ConvertDouble.java
/***** BEGIN LICENSE BLOCK *****
 * Version: EPL 1.0/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Eclipse Public
 * License Version 1.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.eclipse.org/legal/epl-v10.html
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * Copyright (C) 2010 Thomas E Enebo (tom.enebo@gmail.com)
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the EPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the EPL, the GPL or the LGPL.
 ***** END LICENSE BLOCK *****/
package org.jruby.util;
public class ConvertDouble {
    /**
     * Converts supplied ByteList into a double.  strict-mode will not like
     * extra text non-numeric text or multiple sequention underscores.
     */
    public static double byteListToDouble(ByteList bytes, boolean strict) {
        return new DoubleConverter().parse(bytes, strict, false);
    }
    /**
     * Converts supplied ByteList into a double.  This is almost exactly the
     * same as byteListToDouble, but leading underscores are no longer skipped
     * in 1.9 (e.g. __1 == 0.0 in 1.9 while 1.8 parses it as 1.0).
     */
    public static double byteListToDouble19(ByteList bytes, boolean strict) {
        return new DoubleConverter().parse(bytes, strict, true);
    }
    public static class DoubleConverter {
        private byte[] bytes;
        private int index;
        private int endIndex;
        private boolean isStrict;
        private boolean is19;
        private char[] chars; // result string we use to parse.
        private int charsIndex;
        private int significantDigitsProcessed;
        private int adjustExponent;
        private boolean wroteExponent;
        private double result;
        // To match MRI so that we end up rounding on values greater than being storable in a 64
        // bit value in the same fashion we are using 30.  Based on wikipedia a note recommends 
        //17 + 3 (or 20), but I don't know why that is recommended and we would rather be compatible 
        //behaviorally with MRI.
        private final static int SIGNIFICANT_DIGITS_LIMIT = 30;
        // Maximal exponent is +308, minimal -308, -324 with subnormal float
        private final static int EXPONENT_DIGITS_LIMIT = 3;
        private final static int MAX_EXPONENT = (int) (Math.pow(10, EXPONENT_DIGITS_LIMIT)) - 1;
        // Max length is '-' + significant_digits + '.' + 'E' + '-' + exponent_digits
        // + 1 (first digit before point) + 1 (first decimal digit).
        private final static int MAX_LENGTH = SIGNIFICANT_DIGITS_LIMIT + EXPONENT_DIGITS_LIMIT + 6;
        public DoubleConverter() {}
        public void init(ByteList list, boolean isStrict, boolean is19) {
            bytes = list.getUnsafeBytes();
            index = list.begin();
            endIndex = index + list.length();
            this.isStrict = isStrict;
            this.is19 = is19;
            // +2 for added exponent: E...
            // The algorithm trades digits for inc/dec exponent.
            // Worse case is adding E-1 when no exponent,
            // it trades one digit for 3 chars.
            chars = new char[Math.min(list.length()+2, MAX_LENGTH)];
            charsIndex = 0;
            significantDigitsProcessed = 0;
            adjustExponent = 0;
            wroteExponent = false;
            result = -1.0; // -1.0 is "parse String"
        }
        private byte next() {
            return bytes[index++];
        }
        /**
         * Shift back to previous character in the incoming bytes
         * @return false to indicate we are not in an EOS condition
         */
        private boolean previous() {
            index--;
            return false;
        }
        private boolean isEOS() {
            return index >= endIndex;
        }
        private boolean stopParsing() {
            index = endIndex;
            return true;
        }
        private boolean isDigit(byte b) {
            return b >= '0' && b <= '9';
        }
        private boolean isExponent(byte b) {
            return b == 'e' || b == 'E';
        }
        private boolean isWhitespace(byte b) {
            return b == ' ' || (b <= 13 && b >= 9 && b != 11);
        }
        private void addToResult(byte b) {
            chars[charsIndex++] = (char) b;
        }
        private void addExponentToResult(int exponent) {
            String exp = Integer.toString(exponent);
            for (int i = 0; i < exp.length(); i++) {
                addToResult((byte) exp.charAt(i));
            }
        }
        private boolean eatUnderscores() {
            while (!isEOS()) {
                byte value = next();
                if (value != '_') {
                    previous();
                    return isEOS();
                } else if (isStrict) {
                    strictError();
                }
            }
            return true;
        }
        private double completeCalculation() {
            if (charsIndex == 0 || (charsIndex == 1 && chars[0] == '-')) { // "" or "-"
                strictError(); // Strict requires at least one digit.
                return 0.0; // Treated as 0.0 (not -0.0) in non-strict.
            } else if (isExponent((byte) chars[charsIndex - 1])) { // Covers 12.0efrog
                strictError();
                addExponentToResult(adjustExponent);
            } else if (isStrict && !isEOS()) {
                strictError(); // We know it is not whitespace at this point
            } else if (!wroteExponent && adjustExponent != 0) {
                addToResult((byte) 'E');
                addExponentToResult(adjustExponent);
            }
            return SafeDoubleParser.parseDouble(new String(chars, 0, charsIndex));
        }
        
        private boolean strictError() {
            if (isStrict) {
                throw new NumberFormatException("does not meet strict criteria");
            } else {
                return true; // means EOS for non-strict
            }
        }
        public double parse(ByteList list, boolean strict, boolean is19) {
            init(list, strict, is19);
            if (skipWhitespace()) return completeCalculation();
            if (parseOptionalSign()) return completeCalculation();
            parseDigits();
            if (result != -1.0) { // abnormal result
                return result;
            }
            return completeCalculation();
        }
        /**
         * Consume initial whitespace and underscores so that next character
         * examined is not whitespace.  1.9 and strict do not allow leading
         * underscores.  Returns whether next position is at the end of the
         * string or not.
         *
         * Trivia: " _ _ _ _ 1".to_f == 1.0 in Ruby 1.8
         */
        private boolean skipWhitespace() {
            while (!isEOS()) {
                byte value = next();
                if (isWhitespace(value)) continue;
                if (value != '_' || isStrict || is19) return previous();
            }
            return true;
        }
        private boolean parseOptionalSign() {
            byte sign = next();
            if (sign == '-') {
                addToResult(sign);
            } else if (sign != '+') {
                previous();  // backup...not a sign-char
            }
            return isEOS();
        }
        private boolean parseDigits() {
            if (!isEOS()) {
                byte value = next();
                if (isDigit(value)) {
                    // Always add the first digit, in case it is the only digit
                    if (value != '0') {
                        significantDigitsProcessed++;
                    }
                    addToResult(value);
                } else if (value == '.') {
                    addToResult(value);
                    return parseDecimalDigits();
                } else {
                    return isEOS();
                }
            } else {
                strictError();
            }
            
            while (!isEOS()) {
                byte value = next();
                if (isDigit(value)) {
                    if (significantDigitsProcessed < SIGNIFICANT_DIGITS_LIMIT) {
                        // Ignore leading 0s
                        if (value != '0' || significantDigitsProcessed > 0) {
                            significantDigitsProcessed++;
                            addToResult(value);
                        }
                    } else {
                        adjustExponent++;
                    }
                } else if (value == '.') {
                    addToResult(value);
                    return parseDecimalDigits();
                } else if (value == '_') {
                    verifyNumberAfterUnderscore();
                } else if (isExponent(value)) {
                    addToResult(value);
                    return parseExponent();
                } else if (isWhitespace(value)) {
                    return skipWhitespace();
                } else {
                    return strictError();
                }
 
            }
            return true;
        }
        private boolean parseDecimalDigits() {
            if (isEOS()) return strictError();
            
            byte value = next();
            // Wonky, but 12._2e2 is 1200.0 and 12.__e2 is 12.0
            if (value == '_') {
                strictError();
                if (isEOS()) return strictError();
                value = next();
            }
            if (isDigit(value)) {
                if (significantDigitsProcessed < SIGNIFICANT_DIGITS_LIMIT) {
                    if (!(value == '0' && significantDigitsProcessed == 0)) { // not a leading 0
                        significantDigitsProcessed++;
                    }
                } else {
                    value = '0';
                }
                // Always add a digit after the .
                addToResult(value);
            } else {
                return strictError();
            } 
            while (!isEOS()) {
                value = next();
                if (isDigit(value)) {
                    if (significantDigitsProcessed < SIGNIFICANT_DIGITS_LIMIT) {
                        if (value == '0' && significantDigitsProcessed == 0) { // leading 0
                            adjustExponent--;
                        } else {
                            significantDigitsProcessed++;
                            addToResult(value);
                        }
                    } // otherwise ignore it
                } else if (isExponent(value)) {
                    addToResult(value);
                    return parseExponent();
                } else if (value == '_') {
                    verifyNumberAfterUnderscore();
                } else if (isWhitespace(value)) {
                    return skipWhitespace();
                } else {
                    return strictError();
                }
            }
            return true;
        }
        private boolean parseExponent() {
            if (eatUnderscores()) return isEOS();
            byte value = next();
            int exponent = 0;
            int digits = 0;
            boolean negative = false;
            if (value == '-') {
                negative = true;
            } else if (value != '+') {
                previous(); // backup...not a sign-char
            }
            while (!isEOS()) {
                value = next();
                if (isDigit(value)) {
                    if (digits < EXPONENT_DIGITS_LIMIT) {
                        // Ignore leading 0s
                        if (value != '0' || digits > 0) {
                            digits++;
                            exponent = 10 * exponent + (value - '0');
                        }
                    } else {
                        return tooLargeExponent(chars[0] == '-', negative);
                    }
                } else if (isWhitespace(value)) {
                    skipWhitespace();
                    break;
                } else if (value == '_') {
                    verifyNumberAfterUnderscore();
                } else {
                    strictError();
                    stopParsing();
                    break;
                }
            } 
            if (negative) {
                exponent = -exponent;
            }
            exponent += adjustExponent;
            if (-MAX_EXPONENT <= exponent && exponent <= MAX_EXPONENT) {
                addExponentToResult(exponent);
                wroteExponent = true;
                return isEOS(); // Exponent end of double...let's finish.
            } else {
                return tooLargeExponent(chars[0] == '-', negative);
            }
        }
        private boolean tooLargeExponent(boolean negativeFloat, boolean negativeExponent) {
            if (negativeExponent) {
                if (negativeFloat) {
                    result = -0.0;
                } else {
                    result = 0.0;
                }
            } else {
                if (negativeFloat) {
                    result = Double.NEGATIVE_INFINITY;
                } else {
                    result = Double.POSITIVE_INFINITY;
                }
            }
            return stopParsing();
        }
        private void verifyNumberAfterUnderscore() {
            if (isStrict && (isEOS() || !isDigit(bytes[index]))) {
                strictError();
            }
        }
    }
}