001    package hirondelle.web4j.security;
002    
003    import hirondelle.web4j.BuildImpl;
004    import hirondelle.web4j.model.ModelUtil;
005    import hirondelle.web4j.util.Consts;
006    import hirondelle.web4j.util.EscapeChars;
007    import hirondelle.web4j.util.Util;
008    
009    import java.io.IOException;
010    import java.io.ObjectInputStream;
011    import java.io.ObjectOutputStream;
012    import java.io.Serializable;
013    import java.util.ArrayList;
014    import java.util.Arrays;
015    import java.util.List;
016    import java.util.logging.Logger;
017    
018    /**
019     Models free-form text entered by the user, and 
020     protects your application from 
021     <a href='http://www.owasp.org/index.php/Cross_Site_Scripting'>Cross Site Scripting</a> (XSS). 
022     
023     <P>Free-form text refers to text entered by the end user. 
024     It differs from other data in that its content is not tightly 
025     constrained. Examples of free-form text might include a user name, a description 
026     of something, a comment, and so on. If you model free-form text as a simple 
027     <tt>String</tt>, then when presenting that text in a web page, you must take 
028     special precautions against Cross Site Scripting attacks, by escaping 
029     special characters. When modeling such data as <tt>SafeText</tt>, 
030     however, such special steps are not needed, since the escaping is built
031     directly into its {@link #toString} method. 
032     
033     <P>It is worth noting that there are two defects with JSTL' s handling of this problem :
034     <ul>
035     <li>the {@code <c:out>} tag <em>escapes only 5 of the 12 special characters</em> identified
036     by the Open Web App Security Project as being a concern.
037     <li>used in a JSP, the Expression Language allows pleasingly concise presentation, but 
038     <em>does not escape special characters in any way</em>. Even when one is aware of this, 
039     it is easy to forget to take precautions against Cross Site Scripting attacks.
040     </ul>
041     
042     <P>Using <tt>SafeText</tt> will protect you from both of these defects.
043     Since the correct escaping is built into {@link #toString}, you may freely use JSP 
044     Expression Language, without needing to do any escaping in the view. Note that if you use 
045     {@code <c:out>} with <tt>SafeText</tt> (not recommeded), then you must 
046     use <tt>escapeXml='false'</tt> to avoid double-escaping of special characters.
047     
048     <P>There are various ways of presenting text :
049     <ul>
050     <li>as HTML (most common) - use {@link #toString()} to escape a large number of 
051      special characters. 
052     <li>as XML - use {@link #getXmlSafe()} to escape 5 special characters.
053     <li>as JavaScript Object Notation (JSON) - use {@link #getJsonSafe()} to escape 
054      a number of special characters 
055      <li>as plain text - use {@link #getRawString()} to do no escaping at all.
056     </ul> 
057     
058     <h4>Checking For Vulnerabilities Upon Startup</h4>
059     WEB4J will perform checks for Cross-Site Scripting vulnerabilities 
060     upon startup, by scanning your application's classes for <tt>public</tt> Model Objects 
061     having <tt>public getXXX</tt> methods that return a <tt>String</tt>. It will log such  
062     occurrences to encourage you to investigate them further.
063     
064     <P><em>Design Notes :</em><br>
065     This class is <tt>final</tt>, immutable, {@link Serializable}, 
066     and {@link Comparable}, in imitation of the other building block classes 
067     such as {@link String}, {@link Integer}, and so on.
068     
069     <P>The reason why protection against Cross-Site Scripting is not implemented as a 
070     Servlet Filter is because a filter would have no means of distinguishing between safe and 
071     unsafe markup. 
072     
073     <P>One might object to escaping special characters in the Model, instead of in the View. 
074     However, from a practical point of view, it seems more likely that the programmer will  
075     remember to use <tt>SafeText</tt> once in the Model, than remember to do the 
076     escaping repeatedly in the View. 
077    */
078    public final class SafeText implements  Serializable, Comparable<SafeText> {
079    
080      /**
081       Returns <tt>true</tt> only if the given character is always escaped by
082       {@link #toString()}. For the list of characters, see {@link EscapeChars#forHTML(String)}. 
083       
084       <P>Recommended that your implementation of {@link PermittedCharacters}
085       use this method. This will allow you to accept many special characters in your 
086       list of permissible characters. 
087      */
088      public static boolean isEscaped(int aCodePoint){
089        return ESCAPED_CODE_POINTS.contains(aCodePoint);
090      }
091      
092      /**
093       Constructor.
094       
095       @param aText free-form text input by the end user, which may contain 
096       Cross Site Scripting attacks.  Non-null. The text is trimmed by this 
097       constructor.
098      */
099      public SafeText(String aText) {
100        fText = Util.trimPossiblyNull(aText);  
101        validateState();
102      }
103      
104      /**
105       Factory method.
106       
107       Simply a slightly more compact way of building an object, as opposed to 'new'.
108      */
109      public static SafeText from(String aText){
110        return new SafeText(aText);
111      }
112      
113      /** 
114       Return the text in a form safe for an HTML document.
115       
116       Passes the raw text through {@link EscapeChars#forHTML(String)}.  
117      */
118      @Override public String toString(){
119        if( ! Util.textHasContent(fEscapedForHTML) ){
120          fEscapedForHTML = EscapeChars.forHTML(fText);
121        }
122        return fEscapedForHTML;
123      }
124      
125      /** Return the (trimmed) text passed to the constructor.  */
126      public String getRawString(){
127        return fText;
128      }
129      
130      /** 
131       Return the text in a form safe for an XML element.
132      
133       <P>Arbitrary text can be rendered safely in an XML document in two ways :
134       <ul>
135       <li>using a <tt>CDATA</tt> block
136       <li>escaping special characters {@code &, <, >, ", '}.
137       </ul>
138       
139       <P>This method will escape the above five special characters, and replace them with 
140       character entities, using {@link EscapeChars#forXML(String)}
141      */
142      public String getXmlSafe(){
143        return EscapeChars.forXML(fText);
144      }
145      
146      /**
147       Return the text in a form safe for <a href='http://www.json.org/'>JSON</a> (JavaScript Object Notation) data.
148       
149       <P>This method is intended for the <i>data</i> elements of JSON. 
150       It is intended for <i>values</i> of things, not for their <i>names</i>.
151       Typically, only the values will come from end user input, while the names will 
152       be hard-coded.
153      */
154      public String getJsonSafe(){
155        return EscapeChars.forJSON(fText);
156      }
157      
158      @Override public boolean equals(Object aThat){
159        Boolean result = ModelUtil.quickEquals(this, aThat);
160        if ( result == null ){
161          SafeText that = (SafeText)aThat;
162          result = ModelUtil.equalsFor(this.getSignificantFields(), that.getSignificantFields());
163        }
164        return result;
165      }
166      
167      @Override public int hashCode(){
168        if ( fHashCode == 0){
169          fHashCode = ModelUtil.hashCodeFor(getSignificantFields());
170        }
171        return fHashCode;
172      }
173      
174      public int compareTo(SafeText aThat){
175        final int EQUAL = 0;
176        if ( this == aThat ) return EQUAL;
177        
178        int comparison = this.fText.compareTo(aThat.fText);
179        if ( comparison != EQUAL ) return comparison;
180        
181        return EQUAL;
182      }
183      
184      // PRIVATE //
185      
186      /** @serial   */
187      private final String fText;
188      /** The return value of toString, cached like fHashCode.  */
189      private String fEscapedForHTML;
190      private int fHashCode;
191      private static final Logger fLogger = Util.getLogger(SafeText.class);
192      
193      private Object[] getSignificantFields(){
194        return new Object[] {fText};
195      }
196      
197      /** During deserialization, this method cannot be called, since the implementation of PermittedChars is null. */
198      private void validateState() {
199        if (fText == null){
200          throw new NullPointerException("Free form text cannot be null.");
201        }
202        String badCharacters = findBadCharacters(fText);
203        if( Util.textHasContent(badCharacters) ) {
204          throw new IllegalArgumentException("Unpermitted character(s) in text: " + Util.quote(badCharacters) );
205        }
206      }
207      
208      private String findBadCharacters(String aArbitraryText){
209        String result = Consts.EMPTY_STRING; //default
210        StringBuilder badCharacters = new StringBuilder();
211        PermittedCharacters whitelist = getPermittedChars();
212        
213        int idx = 0;
214        int length = aArbitraryText.length();
215        while ( idx < length ) {
216          int codePoint = aArbitraryText.codePointAt(idx);
217          if( ! whitelist.isPermitted(codePoint) ) {
218            fLogger.severe("Bad Code Point : " + codePoint);
219            char[] badChar = Character.toChars(codePoint);
220            badCharacters.append(String.valueOf(badChar));        
221          }
222          idx = idx  + Character.charCount(codePoint);
223        }
224        
225        if( Util.textHasContent(badCharacters.toString()) ) {
226          result = badCharacters.toString();
227          fLogger.severe("Bad Characters found in request, disallowed by PermittedCharacters implementation: " + result);
228        }
229        return result;
230      }
231      
232      private PermittedCharacters getPermittedChars(){
233        return BuildImpl.forPermittedCharacters();
234      }
235      
236      /**
237       For evolution of this class, see Sun guidelines : 
238       http://java.sun.com/j2se/1.5.0/docs/guide/serialization/spec/version.html#6678 
239      */
240      private static final long serialVersionUID = 7526472295633676147L;
241    
242      /**
243       Always treat de-serialization as a full-blown constructor, by
244       validating the final state of the de-serialized object.
245      */
246      private void readObject(ObjectInputStream aInputStream) throws ClassNotFoundException, IOException {
247         aInputStream.defaultReadObject();
248         //partial validation only, without looking for 'bad' characters (BuildImpl not available):
249         if (fText == null){
250           throw new NullPointerException("Free form text cannot be null.");
251         }
252      }
253      
254      /**
255       This is the default implementation of writeObject.
256       Customise if necessary.
257      */
258      private void writeObject(ObjectOutputStream aOutputStream) throws IOException {
259        aOutputStream.defaultWriteObject();
260      }
261      
262      /** List of characters that this class will always escape.  */
263      private static List<Character> ESCAPED = Arrays.asList(
264          '<', 
265          '>' ,
266          '&' ,
267          '"' ,
268          '\t' ,
269          '!' ,
270          '#' ,
271          '$' ,
272          '%' ,
273          '\'' ,
274          '(' ,
275          ')' ,
276          '*' ,
277          '+' ,
278          ',' ,
279          '-' ,
280          '.' ,
281          '/' ,
282          ':' ,
283          ';' ,
284          '=' ,
285          '?' ,
286          '@' ,
287          '[' ,
288          '\\' ,
289          ']' ,
290          '^' ,
291          '_' ,
292          '`' ,
293          '{' ,
294          '|' ,
295          '}' ,
296          '~'
297       );
298    
299      /** As above, but translated into a form that uses code points. */
300      private static List<Integer> ESCAPED_CODE_POINTS = new ArrayList<Integer>();
301      static {
302        for (Character character : ESCAPED){
303          ESCAPED_CODE_POINTS.add(Character.toString(character).codePointAt(0));
304        }
305      }
306    }