001 package hirondelle.web4j.security; 002 003 import hirondelle.web4j.BuildImpl; 004 import hirondelle.web4j.model.ModelUtil; 005 import hirondelle.web4j.util.Consts; 006 import hirondelle.web4j.util.EscapeChars; 007 import hirondelle.web4j.util.Util; 008 009 import java.io.IOException; 010 import java.io.ObjectInputStream; 011 import java.io.ObjectOutputStream; 012 import java.io.Serializable; 013 import java.util.ArrayList; 014 import java.util.Arrays; 015 import java.util.List; 016 import java.util.logging.Logger; 017 018 /** 019 Models free-form text entered by the user, and 020 protects your application from 021 <a href='http://www.owasp.org/index.php/Cross_Site_Scripting'>Cross Site Scripting</a> (XSS). 022 023 <P>Free-form text refers to text entered by the end user. 024 It differs from other data in that its content is not tightly 025 constrained. Examples of free-form text might include a user name, a description 026 of something, a comment, and so on. If you model free-form text as a simple 027 <tt>String</tt>, then when presenting that text in a web page, you must take 028 special precautions against Cross Site Scripting attacks, by escaping 029 special characters. When modeling such data as <tt>SafeText</tt>, 030 however, such special steps are not needed, since the escaping is built 031 directly into its {@link #toString} method. 032 033 <P>It is worth noting that there are two defects with JSTL' s handling of this problem : 034 <ul> 035 <li>the {@code <c:out>} tag <em>escapes only 5 of the 12 special characters</em> identified 036 by the Open Web App Security Project as being a concern. 037 <li>used in a JSP, the Expression Language allows pleasingly concise presentation, but 038 <em>does not escape special characters in any way</em>. Even when one is aware of this, 039 it is easy to forget to take precautions against Cross Site Scripting attacks. 040 </ul> 041 042 <P>Using <tt>SafeText</tt> will protect you from both of these defects. 043 Since the correct escaping is built into {@link #toString}, you may freely use JSP 044 Expression Language, without needing to do any escaping in the view. Note that if you use 045 {@code <c:out>} with <tt>SafeText</tt> (not recommeded), then you must 046 use <tt>escapeXml='false'</tt> to avoid double-escaping of special characters. 047 048 <P>There are various ways of presenting text : 049 <ul> 050 <li>as HTML (most common) - use {@link #toString()} to escape a large number of 051 special characters. 052 <li>as XML - use {@link #getXmlSafe()} to escape 5 special characters. 053 <li>as JavaScript Object Notation (JSON) - use {@link #getJsonSafe()} to escape 054 a number of special characters 055 <li>as plain text - use {@link #getRawString()} to do no escaping at all. 056 </ul> 057 058 <h4>Checking For Vulnerabilities Upon Startup</h4> 059 WEB4J will perform checks for Cross-Site Scripting vulnerabilities 060 upon startup, by scanning your application's classes for <tt>public</tt> Model Objects 061 having <tt>public getXXX</tt> methods that return a <tt>String</tt>. It will log such 062 occurrences to encourage you to investigate them further. 063 064 <P><em>Design Notes :</em><br> 065 This class is <tt>final</tt>, immutable, {@link Serializable}, 066 and {@link Comparable}, in imitation of the other building block classes 067 such as {@link String}, {@link Integer}, and so on. 068 069 <P>The reason why protection against Cross-Site Scripting is not implemented as a 070 Servlet Filter is because a filter would have no means of distinguishing between safe and 071 unsafe markup. 072 073 <P>One might object to escaping special characters in the Model, instead of in the View. 074 However, from a practical point of view, it seems more likely that the programmer will 075 remember to use <tt>SafeText</tt> once in the Model, than remember to do the 076 escaping repeatedly in the View. 077 */ 078 public final class SafeText implements Serializable, Comparable<SafeText> { 079 080 /** 081 Returns <tt>true</tt> only if the given character is always escaped by 082 {@link #toString()}. For the list of characters, see {@link EscapeChars#forHTML(String)}. 083 084 <P>Recommended that your implementation of {@link PermittedCharacters} 085 use this method. This will allow you to accept many special characters in your 086 list of permissible characters. 087 */ 088 public static boolean isEscaped(int aCodePoint){ 089 return ESCAPED_CODE_POINTS.contains(aCodePoint); 090 } 091 092 /** 093 Constructor. 094 095 @param aText free-form text input by the end user, which may contain 096 Cross Site Scripting attacks. Non-null. The text is trimmed by this 097 constructor. 098 */ 099 public SafeText(String aText) { 100 fText = Util.trimPossiblyNull(aText); 101 validateState(); 102 } 103 104 /** 105 Factory method. 106 107 Simply a slightly more compact way of building an object, as opposed to 'new'. 108 */ 109 public static SafeText from(String aText){ 110 return new SafeText(aText); 111 } 112 113 /** 114 Return the text in a form safe for an HTML document. 115 116 Passes the raw text through {@link EscapeChars#forHTML(String)}. 117 */ 118 @Override public String toString(){ 119 if( ! Util.textHasContent(fEscapedForHTML) ){ 120 fEscapedForHTML = EscapeChars.forHTML(fText); 121 } 122 return fEscapedForHTML; 123 } 124 125 /** Return the (trimmed) text passed to the constructor. */ 126 public String getRawString(){ 127 return fText; 128 } 129 130 /** 131 Return the text in a form safe for an XML element. 132 133 <P>Arbitrary text can be rendered safely in an XML document in two ways : 134 <ul> 135 <li>using a <tt>CDATA</tt> block 136 <li>escaping special characters {@code &, <, >, ", '}. 137 </ul> 138 139 <P>This method will escape the above five special characters, and replace them with 140 character entities, using {@link EscapeChars#forXML(String)} 141 */ 142 public String getXmlSafe(){ 143 return EscapeChars.forXML(fText); 144 } 145 146 /** 147 Return the text in a form safe for <a href='http://www.json.org/'>JSON</a> (JavaScript Object Notation) data. 148 149 <P>This method is intended for the <i>data</i> elements of JSON. 150 It is intended for <i>values</i> of things, not for their <i>names</i>. 151 Typically, only the values will come from end user input, while the names will 152 be hard-coded. 153 */ 154 public String getJsonSafe(){ 155 return EscapeChars.forJSON(fText); 156 } 157 158 @Override public boolean equals(Object aThat){ 159 Boolean result = ModelUtil.quickEquals(this, aThat); 160 if ( result == null ){ 161 SafeText that = (SafeText)aThat; 162 result = ModelUtil.equalsFor(this.getSignificantFields(), that.getSignificantFields()); 163 } 164 return result; 165 } 166 167 @Override public int hashCode(){ 168 if ( fHashCode == 0){ 169 fHashCode = ModelUtil.hashCodeFor(getSignificantFields()); 170 } 171 return fHashCode; 172 } 173 174 public int compareTo(SafeText aThat){ 175 final int EQUAL = 0; 176 if ( this == aThat ) return EQUAL; 177 178 int comparison = this.fText.compareTo(aThat.fText); 179 if ( comparison != EQUAL ) return comparison; 180 181 return EQUAL; 182 } 183 184 // PRIVATE // 185 186 /** @serial */ 187 private final String fText; 188 /** The return value of toString, cached like fHashCode. */ 189 private String fEscapedForHTML; 190 private int fHashCode; 191 private static final Logger fLogger = Util.getLogger(SafeText.class); 192 193 private Object[] getSignificantFields(){ 194 return new Object[] {fText}; 195 } 196 197 /** During deserialization, this method cannot be called, since the implementation of PermittedChars is null. */ 198 private void validateState() { 199 if (fText == null){ 200 throw new NullPointerException("Free form text cannot be null."); 201 } 202 String badCharacters = findBadCharacters(fText); 203 if( Util.textHasContent(badCharacters) ) { 204 throw new IllegalArgumentException("Unpermitted character(s) in text: " + Util.quote(badCharacters) ); 205 } 206 } 207 208 private String findBadCharacters(String aArbitraryText){ 209 String result = Consts.EMPTY_STRING; //default 210 StringBuilder badCharacters = new StringBuilder(); 211 PermittedCharacters whitelist = getPermittedChars(); 212 213 int idx = 0; 214 int length = aArbitraryText.length(); 215 while ( idx < length ) { 216 int codePoint = aArbitraryText.codePointAt(idx); 217 if( ! whitelist.isPermitted(codePoint) ) { 218 fLogger.severe("Bad Code Point : " + codePoint); 219 char[] badChar = Character.toChars(codePoint); 220 badCharacters.append(String.valueOf(badChar)); 221 } 222 idx = idx + Character.charCount(codePoint); 223 } 224 225 if( Util.textHasContent(badCharacters.toString()) ) { 226 result = badCharacters.toString(); 227 fLogger.severe("Bad Characters found in request, disallowed by PermittedCharacters implementation: " + result); 228 } 229 return result; 230 } 231 232 private PermittedCharacters getPermittedChars(){ 233 return BuildImpl.forPermittedCharacters(); 234 } 235 236 /** 237 For evolution of this class, see Sun guidelines : 238 http://java.sun.com/j2se/1.5.0/docs/guide/serialization/spec/version.html#6678 239 */ 240 private static final long serialVersionUID = 7526472295633676147L; 241 242 /** 243 Always treat de-serialization as a full-blown constructor, by 244 validating the final state of the de-serialized object. 245 */ 246 private void readObject(ObjectInputStream aInputStream) throws ClassNotFoundException, IOException { 247 aInputStream.defaultReadObject(); 248 //partial validation only, without looking for 'bad' characters (BuildImpl not available): 249 if (fText == null){ 250 throw new NullPointerException("Free form text cannot be null."); 251 } 252 } 253 254 /** 255 This is the default implementation of writeObject. 256 Customise if necessary. 257 */ 258 private void writeObject(ObjectOutputStream aOutputStream) throws IOException { 259 aOutputStream.defaultWriteObject(); 260 } 261 262 /** List of characters that this class will always escape. */ 263 private static List<Character> ESCAPED = Arrays.asList( 264 '<', 265 '>' , 266 '&' , 267 '"' , 268 '\t' , 269 '!' , 270 '#' , 271 '$' , 272 '%' , 273 '\'' , 274 '(' , 275 ')' , 276 '*' , 277 '+' , 278 ',' , 279 '-' , 280 '.' , 281 '/' , 282 ':' , 283 ';' , 284 '=' , 285 '?' , 286 '@' , 287 '[' , 288 '\\' , 289 ']' , 290 '^' , 291 '_' , 292 '`' , 293 '{' , 294 '|' , 295 '}' , 296 '~' 297 ); 298 299 /** As above, but translated into a form that uses code points. */ 300 private static List<Integer> ESCAPED_CODE_POINTS = new ArrayList<Integer>(); 301 static { 302 for (Character character : ESCAPED){ 303 ESCAPED_CODE_POINTS.add(Character.toString(character).codePointAt(0)); 304 } 305 } 306 }