RegexpOptions.java

  1. /*
  2.  * To change this template, choose Tools | Templates
  3.  * and open the template in the editor.
  4.  */

  5. package org.jruby.util;

  6. import org.jcodings.Encoding;
  7. import org.jcodings.specific.ASCIIEncoding;
  8. import org.jcodings.specific.EUCJPEncoding;
  9. import org.jcodings.specific.UTF8Encoding;
  10. import org.jruby.Ruby;
  11. import org.jruby.RubyRegexp;

  12. public class RegexpOptions implements Cloneable {
  13.     private static ByteList WINDOWS31J = new ByteList(new byte[] {'W', 'i', 'n', 'd', 'o', 'w', 's', '-', '3', '1', 'J'});    
  14.     public static final RegexpOptions NULL_OPTIONS = new RegexpOptions(KCode.NONE, true);
  15.    
  16.     public RegexpOptions() {
  17.         this(KCode.NONE, true);
  18.     }
  19.    
  20.     public RegexpOptions(KCode kcode, boolean isKCodeDefault) {
  21.         this.kcode = kcode;
  22.         this.kcodeDefault = isKCodeDefault;
  23.        
  24.         assert kcode != null : "kcode must always be set to something";
  25.     }

  26.     public boolean isExtended() {
  27.         return extended;
  28.     }

  29.     public void setExtended(boolean extended) {
  30.         this.extended = extended;
  31.     }

  32.     public boolean isIgnorecase() {
  33.         return ignorecase;
  34.     }

  35.     public void setIgnorecase(boolean ignorecase) {
  36.         this.ignorecase = ignorecase;
  37.     }

  38.     public boolean isFixed() {
  39.         return fixed;
  40.     }

  41.     public void setFixed(boolean fixed) {
  42.         this.fixed = fixed;
  43.     }

  44.     public KCode getKCode() {
  45.         return kcode;
  46.     }
  47.    
  48.     public String getKCodeName() {
  49.         return isKcodeDefault() ? null : getKCode().name().toLowerCase();
  50.     }    

  51.     /**
  52.      * This regexp has an explicit encoding flag or 'nesu' letter associated
  53.      * with it.
  54.      *
  55.      * @param kcode to be set
  56.      */
  57.     public void setExplicitKCode(KCode kcode) {
  58.         this.kcode = kcode;
  59.         kcodeDefault = false;
  60.     }
  61.    
  62.     private KCode getExplicitKCode() {
  63.         if (kcodeDefault == true) return null;
  64.        
  65.         return kcode;
  66.     }

  67.     /**
  68.      * Whether the kcode associated with this regexp is implicit (aka
  69.      * default) or is specified explicitly (via 'nesu' syntax postscript or
  70.      * flags to Regexp.new.
  71.      */
  72.     public boolean isKcodeDefault() {
  73.         return kcodeDefault;
  74.     }

  75.     public boolean isMultiline() {
  76.         return multiline;
  77.     }

  78.     public void setMultiline(boolean multiline) {
  79.         this.multiline = multiline;
  80.     }

  81.     public boolean isOnce() {
  82.         return once;
  83.     }

  84.     public void setOnce(boolean once) {
  85.         this.once = once;
  86.     }

  87.     public boolean isJava() {
  88.         return java;
  89.     }

  90.     public void setJava(boolean java) {
  91.         this.java = java;
  92.     }

  93.     public boolean isEncodingNone() {
  94.         return encodingNone;
  95.     }

  96.     public void setEncodingNone(boolean encodingNone) {
  97.         this.encodingNone = encodingNone;
  98.     }

  99.     public boolean isLiteral() {
  100.         return literal;
  101.     }

  102.     public void setLiteral(boolean literal) {
  103.         this.literal = literal;
  104.     }

  105.     public boolean isEmbeddable() {
  106.         return multiline && ignorecase && extended;
  107.     }
  108.    
  109.    
  110.     /**
  111.      * Calculate the encoding based on kcode option set via 'nesu'.  Also as
  112.      * side-effects:
  113.      * 1.set whether this marks the soon to be made regexp as  'fixed'.
  114.      * 2.kcode.none will set 'none' option
  115.      * @return null if no explicit encoding is specified.
  116.      */
  117.     public Encoding setup(Ruby runtime) {
  118.         KCode explicitKCode = getExplicitKCode();
  119.        
  120.         // None will not set fixed
  121.         if (explicitKCode == KCode.NONE) {
  122.             setEncodingNone(true);
  123.             return ASCIIEncoding.INSTANCE;
  124.         }
  125.        
  126.         if (explicitKCode == KCode.EUC) {
  127.             setFixed(true);
  128.             return EUCJPEncoding.INSTANCE;
  129.         } else if (explicitKCode == KCode.SJIS) {
  130.             setFixed(true);
  131.             return runtime.getEncodingService().loadEncoding(WINDOWS31J);
  132.         } else if (explicitKCode == KCode.UTF8) {
  133.             setFixed(true);
  134.             return UTF8Encoding.INSTANCE;
  135.         }
  136.        
  137.         return null;
  138.     }
  139.    
  140.     /**
  141.      * This int value can be used by compiler or any place where we want
  142.      * an integer representation of the state of this object.
  143.      *
  144.      * Note: This is for full representation of state in the JIT.  It is not
  145.      * to be confused with state of marshalled regexp data.
  146.      */
  147.     public int toEmbeddedOptions() {
  148.         int options = toJoniOptions();

  149.         if (once) options |= RubyRegexp.RE_OPTION_ONCE;
  150.         if (literal) options |= RubyRegexp.RE_LITERAL;
  151.         if (kcodeDefault) options |= RubyRegexp.RE_DEFAULT;
  152.         if (fixed) options |= RubyRegexp.RE_FIXED;

  153.         return options;
  154.     }

  155.     /**
  156.      * This int value is meant to only be used when dealing directly with
  157.      * the joni regular expression library.  It differs from embeddedOptions
  158.      * in that it only contains bit values which Joni cares about.
  159.      */
  160.     public int toJoniOptions() {
  161.         int options = 0;
  162.         // Note: once is not an option that is pertinent to Joni so we exclude it.
  163.         if (multiline) options |= RubyRegexp.RE_OPTION_MULTILINE;
  164.         if (ignorecase) options |= RubyRegexp.RE_OPTION_IGNORECASE;
  165.         if (extended) options |= RubyRegexp.RE_OPTION_EXTENDED;
  166.         if (!isKcodeDefault()) options |= kcode.bits();
  167.         return options;
  168.     }
  169.    
  170.     /**
  171.      * This int value is used by Regex#options
  172.      */
  173.     public int toOptions() {
  174.         int options = 0;
  175.         if (multiline) options |= RubyRegexp.RE_OPTION_MULTILINE;
  176.         if (ignorecase) options |= RubyRegexp.RE_OPTION_IGNORECASE;
  177.         if (extended) options |= RubyRegexp.RE_OPTION_EXTENDED;
  178.         if (fixed) options |= RubyRegexp.RE_FIXED;
  179.         if (encodingNone) options |= RubyRegexp.ARG_ENCODING_NONE;
  180.         return options;
  181.     }

  182.     public static RegexpOptions fromEmbeddedOptions(int embeddedOptions) {
  183.         RegexpOptions options = fromJoniOptions(embeddedOptions);

  184.         options.kcodeDefault = (embeddedOptions & RubyRegexp.RE_DEFAULT) != 0;        
  185.         options.setOnce((embeddedOptions & RubyRegexp.RE_OPTION_ONCE) != 0);
  186.         options.setLiteral((embeddedOptions & RubyRegexp.RE_LITERAL) != 0);
  187.         options.setFixed((embeddedOptions & RubyRegexp.RE_FIXED) != 0);        
  188.        
  189.         return options;
  190.     }

  191.     public static RegexpOptions fromJoniOptions(int joniOptions) {
  192.         KCode kcode = KCode.fromBits(joniOptions);
  193.         RegexpOptions options = new RegexpOptions(kcode, kcode == KCode.NONE);
  194.         options.setMultiline((joniOptions & RubyRegexp.RE_OPTION_MULTILINE) != 0);
  195.         options.setIgnorecase((joniOptions & RubyRegexp.RE_OPTION_IGNORECASE) != 0);
  196.         options.setExtended((joniOptions & RubyRegexp.RE_OPTION_EXTENDED) != 0);
  197.         options.setFixed((joniOptions & RubyRegexp.RE_FIXED) != 0);
  198.         options.setOnce((joniOptions & RubyRegexp.RE_OPTION_ONCE) != 0);

  199.         return options;
  200.     }

  201.     public RegexpOptions withoutOnce() {
  202.         RegexpOptions options = (RegexpOptions)clone();
  203.         options.setOnce(false);
  204.         return options;
  205.     }

  206.     @Override
  207.     public int hashCode() {
  208.         int hash = 7;
  209.         hash = 11 * hash + (this.kcode != null ? this.kcode.hashCode() : 0);
  210.         hash = 11 * hash + (this.fixed ? 1 : 0);
  211.         hash = 11 * hash + (this.once ? 1 : 0);
  212.         hash = 11 * hash + (this.extended ? 1 : 0);
  213.         hash = 11 * hash + (this.multiline ? 1 : 0);
  214.         hash = 11 * hash + (this.ignorecase ? 1 : 0);
  215.         hash = 11 * hash + (this.java ? 1 : 0);
  216.         hash = 11 * hash + (this.encodingNone ? 1 : 0);
  217.         hash = 11 * hash + (this.kcodeDefault ? 1 : 0);
  218.         hash = 11 * hash + (this.literal ? 1 : 0);
  219.         return hash;
  220.     }

  221.     @Override
  222.     public Object clone() {
  223.         try {
  224.             return super.clone();
  225.         } catch (CloneNotSupportedException cnse) {throw new RuntimeException(cnse);}
  226.     }

  227.     @Override
  228.     public boolean equals(Object other) {
  229.         if (!(other instanceof RegexpOptions)) return false;

  230.         // Note: literal and once can be different in this object but for the
  231.         // sake of equality we ignore those two fields since those flags do
  232.         // not affect Ruby equality.
  233.         RegexpOptions o = (RegexpOptions)other;
  234.         boolean equality = o.extended == extended &&
  235.                            o.fixed == fixed &&
  236.                            o.ignorecase == ignorecase &&
  237.                            o.java == java &&
  238.                            o.multiline == multiline;
  239.         if(encodingNone || o.encodingNone) {
  240.             return equality && o.kcode == kcode;
  241.         } else {
  242.             return equality &&
  243.                     o.encodingNone == encodingNone &&
  244.                     o.kcode == kcode &&
  245.                     o.kcodeDefault == kcodeDefault;
  246.         }
  247.     }
  248.    
  249.     @Override
  250.     public String toString() {
  251.         return "RegexpOptions(kcode: " + kcode +
  252.                 (encodingNone == true ? ", encodingNone" : "") +
  253.                 (extended == true ? ", extended" : "") +
  254.                 (fixed == true ? ", fixed" : "") +
  255.                 (ignorecase == true ? ", ignorecase" : "") +
  256.                 (java == true ? ", java" : "") +
  257.                 (kcodeDefault == true ? ", kcodeDefault" : "") +
  258.                 (literal == true ? ", literal" : "") +
  259.                 (multiline == true ? ", multiline" : "") +
  260.                 (once == true ? ", once" : "") +                
  261.                 ")";
  262.     }
  263.    
  264.     private KCode kcode;
  265.     private boolean fixed;
  266.     private boolean once;
  267.     private boolean extended;
  268.     private boolean multiline;
  269.     private boolean ignorecase;
  270.     private boolean java;
  271.     private boolean encodingNone;
  272.     private boolean kcodeDefault;
  273.     private boolean literal;
  274. }