001    package hirondelle.web4j.security;
002    
003    import java.io.IOException;
004    import java.io.ObjectInputStream;
005    import java.io.ObjectOutputStream;
006    import java.io.Serializable;
007    import java.util.*;
008    import hirondelle.web4j.util.Util;
009    import hirondelle.web4j.model.ModelUtil;
010    import hirondelle.web4j.util.EscapeChars;
011    import hirondelle.web4j.util.Consts;
012    import hirondelle.web4j.BuildImpl;
013    import hirondelle.web4j.TESTAll;
014    import java.util.logging.Logger;
015    
016    /**
017     Models free-form text entered by the user, and 
018     protects your application from 
019     <a href='http://www.owasp.org/index.php/Cross_Site_Scripting'>Cross Site Scripting</a> (XSS). 
020     
021     <P>Free-form text refers to text entered by the end user. 
022     It differs from other data in that its content is not tightly 
023     constrained. Examples of free-form text might include a user name, a description 
024     of something, a comment, and so on. If you model free-form text as a simple 
025     <tt>String</tt>, then when presenting that text in a web page, you must take 
026     special precautions against Cross Site Scripting attacks, by escaping 
027     special characters. When modeling such data as <tt>SafeText</tt>, 
028     however, such special steps are not needed, since the escaping is built
029     directly into its {@link #toString} method. 
030     
031     <P>It is worth noting that there are two defects with JSTL' s handling of this problem :
032     <ul>
033     <li>the {@code <c:out>} tag <em>escapes only 5 of the 12 special characters</em> identified
034     by the Open Web App Security Project as being a concern.
035     <li>used in a JSP, the Expression Language allows pleasingly concise presentation, but 
036     <em>does not escape special characters in any way</em>. Even when one is aware of this, 
037     it is easy to forget to take precautions against Cross Site Scripting attacks.
038     </ul>
039     
040     <P>Using <tt>SafeText</tt> will protect you from both of these defects.
041     Since the correct escaping is built into {@link #toString}, you may freely use JSP 
042     Expression Language, without needing to do any escaping in the view. Note that if you use 
043     {@code <c:out>} with <tt>SafeText</tt> (not recommeded), then you must 
044     use <tt>escapeXml='false'</tt> to avoid double-escaping of special characters.
045     
046     <P>There are various ways of presenting text :
047     <ul>
048     <li>as HTML (most common) - use {@link #toString()} to escape a large number of 
049      special characters. 
050     <li>as XML - use {@link #getXmlSafe()} to escape 5 special characters.
051     <li>as JavaScript Object Notation (JSON) - use {@link #getJsonSafe()} to escape 
052      a number of special characters 
053      <li>as plain text - use {@link #getRawString()} to do no escaping at all.
054     </ul> 
055     
056     <h4>Checking For Vulnerabilities Upon Startup</h4>
057     WEB4J will perform checks for Cross-Site Scripting vulnerabilities 
058     upon startup, by scanning your application's classes for <tt>public</tt> Model Objects 
059     having <tt>public getXXX</tt> methods that return a <tt>String</tt>. It will log such  
060     occurrences to encourage you to investigate them further.
061     
062     <P><em>Design Notes :</em><br>
063     This class is <tt>final</tt>, immutable, {@link Serializable}, 
064     and {@link Comparable}, in imitation of the other building block classes 
065     such as {@link String}, {@link Integer}, and so on.
066     
067     <P>The reason why protection against Cross-Site Scripting is not implemented as a 
068     Servlet Filter is because a filter would have no means of distinguishing between safe and 
069     unsafe markup. 
070     
071     <P>One might object to escaping special characters in the Model, instead of in the View. 
072     However, from a practical point of view, it seems more likely that the programmer will  
073     remember to use <tt>SafeText</tt> once in the Model, than remember to do the 
074     escaping repeatedly in the View. 
075    */
076    public final class SafeText implements  Serializable, Comparable<SafeText> {
077    
078      /**
079       Returns <tt>true</tt> only if the given character is always escaped by
080       {@link #toString()}. For the list of characters, see {@link EscapeChars#forHTML(String)}. 
081       
082       <P>Recommended that your implementation of {@link PermittedCharacters}
083       use this method. This will allow you to accept many special characters in your 
084       list of permissible characters. 
085      */
086      public static boolean isEscaped(int aCodePoint){
087        return ESCAPED_CODE_POINTS.contains(aCodePoint);
088      }
089      
090      /**
091       Constructor.
092       
093       @param aText free-form text input by the end user, which may contain 
094       Cross Site Scripting attacks.  Non-null. The text is trimmed by this 
095       constructor.
096      */
097      public SafeText(String aText) {
098        fText = Util.trimPossiblyNull(aText);  
099        validateState();
100      }
101      
102      /**
103       Factory method.
104       
105       Simply a slightly more compact way of building an object, as opposed to 'new'.
106      */
107      public static SafeText from(String aText){
108        return new SafeText(aText);
109      }
110      
111      /** 
112       Return the text in a form safe for an HTML document.
113       
114       Passes the raw text through {@link EscapeChars#forHTML(String)}.  
115      */
116      @Override public String toString(){
117        if( ! Util.textHasContent(fEscapedForHTML) ){
118          fEscapedForHTML = EscapeChars.forHTML(fText);
119        }
120        return fEscapedForHTML;
121      }
122      
123      /** Return the (trimmed) text passed to the constructor.  */
124      public String getRawString(){
125        return fText;
126      }
127      
128      /** 
129       Return the text in a form safe for an XML element.
130      
131       <P>Arbitrary text can be rendered safely in an XML document in two ways :
132       <ul>
133       <li>using a <tt>CDATA</tt> block
134       <li>escaping special characters {@code &, <, >, ", '}.
135       </ul>
136       
137       <P>This method will escape the above five special characters, and replace them with 
138       character entities, using {@link EscapeChars#forXML(String)}
139      */
140      public String getXmlSafe(){
141        return EscapeChars.forXML(fText);
142      }
143      
144      /**
145       Return the text in a form safe for <a href='http://www.json.org/'>JSON</a> (JavaScript Object Notation) data.
146       
147       <P>This method is intended for the <i>data</i> elements of JSON. 
148       It is intended for <i>values</i> of things, not for their <i>names</i>.
149       Typically, only the values will come from end user input, while the names will 
150       be hard-coded.
151      */
152      public String getJsonSafe(){
153        return EscapeChars.forJSON(fText);
154      }
155      
156      @Override public boolean equals(Object aThat){
157        Boolean result = ModelUtil.quickEquals(this, aThat);
158        if ( result == null ){
159          SafeText that = (SafeText)aThat;
160          result = ModelUtil.equalsFor(this.getSignificantFields(), that.getSignificantFields());
161        }
162        return result;
163      }
164      
165      @Override public int hashCode(){
166        if ( fHashCode == 0){
167          fHashCode = ModelUtil.hashCodeFor(getSignificantFields());
168        }
169        return fHashCode;
170      }
171      
172      public int compareTo(SafeText aThat){
173        final int EQUAL = 0;
174        if ( this == aThat ) return EQUAL;
175        
176        int comparison = this.fText.compareTo(aThat.fText);
177        if ( comparison != EQUAL ) return comparison;
178        
179        return EQUAL;
180      }
181      
182      // PRIVATE //
183      
184      /** @serial   */
185      private final String fText;
186      /** The return value of toString, cached like fHashCode.  */
187      private String fEscapedForHTML;
188      private int fHashCode;
189      private static final Logger fLogger = Util.getLogger(SafeText.class);
190      
191      private Object[] getSignificantFields(){
192        return new Object[] {fText};
193      }
194      
195      /** During deserialization, this method cannot be called, since the implementation of PermittedChars is null. */
196      private void validateState() {
197        if (fText == null){
198          throw new NullPointerException("Free form text cannot be null.");
199        }
200        String badCharacters = findBadCharacters(fText);
201        if( Util.textHasContent(badCharacters) ) {
202          throw new IllegalArgumentException("Unpermitted character(s) in text: " + Util.quote(badCharacters) );
203        }
204      }
205      
206      private String findBadCharacters(String aArbitraryText){
207        String result = Consts.EMPTY_STRING; //default
208        StringBuilder badCharacters = new StringBuilder();
209        PermittedCharacters whitelist = getPermittedChars();
210        
211        int idx = 0;
212        int length = aArbitraryText.length();
213        while ( idx < length ) {
214          int codePoint = aArbitraryText.codePointAt(idx);
215          if( ! whitelist.isPermitted(codePoint) ) {
216            fLogger.severe("Bad Code Point : " + codePoint);
217            char[] badChar = Character.toChars(codePoint);
218            badCharacters.append(String.valueOf(badChar));        
219          }
220          idx = idx  + Character.charCount(codePoint);
221        }
222        
223        if( Util.textHasContent(badCharacters.toString()) ) {
224          result = badCharacters.toString();
225          fLogger.severe("Bad Characters found in request, disallowed by PermittedCharacters implementation: " + result);
226        }
227        return result;
228      }
229      
230      private PermittedCharacters getPermittedChars(){
231        PermittedCharacters result = null;
232        if( TESTAll.IS_TESTING ) {
233          result =  new PermittedCharactersImpl(); //default
234        }
235        else {
236          result = BuildImpl.forPermittedCharacters(); //dynamic
237        }
238        return result;
239      }
240      
241      /**
242       For evolution of this class, see Sun guidelines : 
243       http://java.sun.com/j2se/1.5.0/docs/guide/serialization/spec/version.html#6678 
244      */
245      private static final long serialVersionUID = 7526472295633676147L;
246    
247      /**
248       Always treat de-serialization as a full-blown constructor, by
249       validating the final state of the de-serialized object.
250      */
251      private void readObject(ObjectInputStream aInputStream) throws ClassNotFoundException, IOException {
252         aInputStream.defaultReadObject();
253         //partial validation only, without looking for 'bad' characters (BuildImpl not available):
254         if (fText == null){
255           throw new NullPointerException("Free form text cannot be null.");
256         }
257      }
258      
259      /**
260       This is the default implementation of writeObject.
261       Customise if necessary.
262      */
263      private void writeObject(ObjectOutputStream aOutputStream) throws IOException {
264        aOutputStream.defaultWriteObject();
265      }
266      
267      /** List of characters that this class will always escape.  */
268      private static List<Character> ESCAPED = Arrays.asList(
269          '<', 
270          '>' ,
271          '&' ,
272          '"' ,
273          '\t' ,
274          '!' ,
275          '#' ,
276          '$' ,
277          '%' ,
278          '\'' ,
279          '(' ,
280          ')' ,
281          '*' ,
282          '+' ,
283          ',' ,
284          '-' ,
285          '.' ,
286          '/' ,
287          ':' ,
288          ';' ,
289          '=' ,
290          '?' ,
291          '@' ,
292          '[' ,
293          '\\' ,
294          ']' ,
295          '^' ,
296          '_' ,
297          '`' ,
298          '{' ,
299          '|' ,
300          '}' ,
301          '~'
302       );
303    
304      /** As above, but translated into a form that uses code points. */
305      private static List<Integer> ESCAPED_CODE_POINTS = new ArrayList<Integer>();
306      static {
307        for (Character character : ESCAPED){
308          ESCAPED_CODE_POINTS.add(Character.toString(character).codePointAt(0));
309        }
310      }
311    }