package hirondelle.web4j.security;

import hirondelle.web4j.BuildImpl;
import hirondelle.web4j.model.ModelUtil;
import hirondelle.web4j.util.Consts;
import hirondelle.web4j.util.EscapeChars;
import hirondelle.web4j.util.Util;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.logging.Logger;

/**
 Models free-form text entered by the user, and 
 protects your application from 
 <a href='http://www.owasp.org/index.php/Cross_Site_Scripting'>Cross Site Scripting</a> (XSS). 
 
 <P>Free-form text refers to text entered by the end user. 
 It differs from other data in that its content is not tightly 
 constrained. Examples of free-form text might include a user name, a description 
 of something, a comment, and so on. If you model free-form text as a simple 
 <tt>String</tt>, then when presenting that text in a web page, you must take 
 special precautions against Cross Site Scripting attacks, by escaping 
 special characters. When modeling such data as <tt>SafeText</tt>, 
 however, such special steps are not needed, since the escaping is built
 directly into its {@link #toString} method. 
 
 <P>It is worth noting that there are two defects with JSTL' s handling of this problem :
 <ul>
 <li>the {@code <c:out>} tag <em>escapes only 5 of the 12 special characters</em> identified
 by the Open Web App Security Project as being a concern.
 <li>used in a JSP, the Expression Language allows pleasingly concise presentation, but 
 <em>does not escape special characters in any way</em>. Even when one is aware of this, 
 it is easy to forget to take precautions against Cross Site Scripting attacks.
 </ul>
 
 <P>Using <tt>SafeText</tt> will protect you from both of these defects.
 Since the correct escaping is built into {@link #toString}, you may freely use JSP 
 Expression Language, without needing to do any escaping in the view. Note that if you use 
 {@code <c:out>} with <tt>SafeText</tt> (not recommeded), then you must 
 use <tt>escapeXml='false'</tt> to avoid double-escaping of special characters.
 
 <P>There are various ways of presenting text :
 <ul>
 <li>as HTML (most common) - use {@link #toString()} to escape a large number of 
  special characters. 
 <li>as XML - use {@link #getXmlSafe()} to escape 5 special characters.
 <li>as JavaScript Object Notation (JSON) - use {@link #getJsonSafe()} to escape 
  a number of special characters 
  <li>as plain text - use {@link #getRawString()} to do no escaping at all.
 </ul> 
 
 <h4>Checking For Vulnerabilities Upon Startup</h4>
 WEB4J will perform checks for Cross-Site Scripting vulnerabilities 
 upon startup, by scanning your application's classes for <tt>public</tt> Model Objects 
 having <tt>public getXXX</tt> methods that return a <tt>String</tt>. It will log such  
 occurrences to encourage you to investigate them further.
 
 <P><em>Design Notes :</em><br>
 This class is <tt>final</tt>, immutable, {@link Serializable}, 
 and {@link Comparable}, in imitation of the other building block classes 
 such as {@link String}, {@link Integer}, and so on.
 
 <P>The reason why protection against Cross-Site Scripting is not implemented as a 
 Servlet Filter is because a filter would have no means of distinguishing between safe and 
 unsafe markup. 
 
 <P>One might object to escaping special characters in the Model, instead of in the View. 
 However, from a practical point of view, it seems more likely that the programmer will  
 remember to use <tt>SafeText</tt> once in the Model, than remember to do the 
 escaping repeatedly in the View. 
*/
public final class SafeText implements  Serializable, Comparable<SafeText> {

  /**
   Returns <tt>true</tt> only if the given character is always escaped by
   {@link #toString()}. For the list of characters, see {@link EscapeChars#forHTML(String)}. 
   
   <P>Recommended that your implementation of {@link PermittedCharacters}
   use this method. This will allow you to accept many special characters in your 
   list of permissible characters. 
  */
  public static boolean isEscaped(int aCodePoint){
    return ESCAPED_CODE_POINTS.contains(aCodePoint);
  }
  
  /**
   Constructor.
   
   @param aText free-form text input by the end user, which may contain 
   Cross Site Scripting attacks.  Non-null. The text is trimmed by this 
   constructor.
  */
  public SafeText(String aText) {
    fText = Util.trimPossiblyNull(aText);  
    validateState();
  }
  
  /**
   Factory method.
   
   Simply a slightly more compact way of building an object, as opposed to 'new'.
  */
  public static SafeText from(String aText){
    return new SafeText(aText);
  }
  
  /** 
   Return the text in a form safe for an HTML document.
   
   Passes the raw text through {@link EscapeChars#forHTML(String)}.  
  */
  @Override public String toString(){
    if( ! Util.textHasContent(fEscapedForHTML) ){
      fEscapedForHTML = EscapeChars.forHTML(fText);
    }
    return fEscapedForHTML;
  }
  
  /** Return the (trimmed) text passed to the constructor.  */
  public String getRawString(){
    return fText;
  }
  
  /** 
   Return the text in a form safe for an XML element.
  
   <P>Arbitrary text can be rendered safely in an XML document in two ways :
   <ul>
   <li>using a <tt>CDATA</tt> block
   <li>escaping special characters {@code &, <, >, ", '}.
   </ul>
   
   <P>This method will escape the above five special characters, and replace them with 
   character entities, using {@link EscapeChars#forXML(String)}
  */
  public String getXmlSafe(){
    return EscapeChars.forXML(fText);
  }
  
  /**
   Return the text in a form safe for <a href='http://www.json.org/'>JSON</a> (JavaScript Object Notation) data.
   
   <P>This method is intended for the <i>data</i> elements of JSON. 
   It is intended for <i>values</i> of things, not for their <i>names</i>.
   Typically, only the values will come from end user input, while the names will 
   be hard-coded.
  */
  public String getJsonSafe(){
    return EscapeChars.forJSON(fText);
  }
  
  @Override public boolean equals(Object aThat){
    Boolean result = ModelUtil.quickEquals(this, aThat);
    if ( result == null ){
      SafeText that = (SafeText)aThat;
      result = ModelUtil.equalsFor(this.getSignificantFields(), that.getSignificantFields());
    }
    return result;
  }
  
  @Override public int hashCode(){
    if ( fHashCode == 0){
      fHashCode = ModelUtil.hashCodeFor(getSignificantFields());
    }
    return fHashCode;
  }
  
  public int compareTo(SafeText aThat){
    final int EQUAL = 0;
    if ( this == aThat ) return EQUAL;
    
    int comparison = this.fText.compareTo(aThat.fText);
    if ( comparison != EQUAL ) return comparison;
    
    return EQUAL;
  }
  
  // PRIVATE //
  
  /** @serial   */
  private final String fText;
  /** The return value of toString, cached like fHashCode.  */
  private String fEscapedForHTML;
  private int fHashCode;
  private static final Logger fLogger = Util.getLogger(SafeText.class);
  
  private Object[] getSignificantFields(){
    return new Object[] {fText};
  }
  
  /** During deserialization, this method cannot be called, since the implementation of PermittedChars is null. */
  private void validateState() {
    if (fText == null){
      throw new NullPointerException("Free form text cannot be null.");
    }
    String badCharacters = findBadCharacters(fText);
    if( Util.textHasContent(badCharacters) ) {
      throw new IllegalArgumentException("Unpermitted character(s) in text: " + Util.quote(badCharacters) );
    }
  }
  
  private String findBadCharacters(String aArbitraryText){
    String result = Consts.EMPTY_STRING; //default
    StringBuilder badCharacters = new StringBuilder();
    PermittedCharacters whitelist = getPermittedChars();
    
    int idx = 0;
    int length = aArbitraryText.length();
    while ( idx < length ) {
      int codePoint = aArbitraryText.codePointAt(idx);
      if( ! whitelist.isPermitted(codePoint) ) {
        fLogger.severe("Bad Code Point : " + codePoint);
        char[] badChar = Character.toChars(codePoint);
        badCharacters.append(String.valueOf(badChar));        
      }
      idx = idx  + Character.charCount(codePoint);
    }
    
    if( Util.textHasContent(badCharacters.toString()) ) {
      result = badCharacters.toString();
      fLogger.severe("Bad Characters found in request, disallowed by PermittedCharacters implementation: " + result);
    }
    return result;
  }
  
  private PermittedCharacters getPermittedChars(){
    return BuildImpl.forPermittedCharacters();
  }
  
  /**
   For evolution of this class, see Sun guidelines : 
   http://java.sun.com/j2se/1.5.0/docs/guide/serialization/spec/version.html#6678 
  */
  private static final long serialVersionUID = 7526472295633676147L;

  /**
   Always treat de-serialization as a full-blown constructor, by
   validating the final state of the de-serialized object.
  */
  private void readObject(ObjectInputStream aInputStream) throws ClassNotFoundException, IOException {
     aInputStream.defaultReadObject();
     //partial validation only, without looking for 'bad' characters (BuildImpl not available):
     if (fText == null){
       throw new NullPointerException("Free form text cannot be null.");
     }
  }
  
  /**
   This is the default implementation of writeObject.
   Customise if necessary.
  */
  private void writeObject(ObjectOutputStream aOutputStream) throws IOException {
    aOutputStream.defaultWriteObject();
  }
  
  /** List of characters that this class will always escape.  */
  private static List<Character> ESCAPED = Arrays.asList(
      '<', 
      '>' ,
      '&' ,
      '"' ,
      '\t' ,
      '!' ,
      '#' ,
      '$' ,
      '%' ,
      '\'' ,
      '(' ,
      ')' ,
      '*' ,
      '+' ,
      ',' ,
      '-' ,
      '.' ,
      '/' ,
      ':' ,
      ';' ,
      '=' ,
      '?' ,
      '@' ,
      '[' ,
      '\\' ,
      ']' ,
      '^' ,
      '_' ,
      '`' ,
      '{' ,
      '|' ,
      '}' ,
      '~'
   );

  /** As above, but translated into a form that uses code points. */
  private static List<Integer> ESCAPED_CODE_POINTS = new ArrayList<Integer>();
  static {
    for (Character character : ESCAPED){
      ESCAPED_CODE_POINTS.add(Character.toString(character).codePointAt(0));
    }
  }
}
