001 package hirondelle.web4j.security;
002
003 import java.io.IOException;
004 import java.io.ObjectInputStream;
005 import java.io.ObjectOutputStream;
006 import java.io.Serializable;
007 import java.util.*;
008 import hirondelle.web4j.util.Util;
009 import hirondelle.web4j.model.ModelUtil;
010 import hirondelle.web4j.util.EscapeChars;
011 import hirondelle.web4j.util.Consts;
012 import hirondelle.web4j.BuildImpl;
013 import hirondelle.web4j.TESTAll;
014 import java.util.logging.Logger;
015
016 /**
017 Models free-form text entered by the user, and
018 protects your application from
019 <a href='http://www.owasp.org/index.php/Cross_Site_Scripting'>Cross Site Scripting</a> (XSS).
020
021 <P>Free-form text refers to text entered by the end user.
022 It differs from other data in that its content is not tightly
023 constrained. Examples of free-form text might include a user name, a description
024 of something, a comment, and so on. If you model free-form text as a simple
025 <tt>String</tt>, then when presenting that text in a web page, you must take
026 special precautions against Cross Site Scripting attacks, by escaping
027 special characters. When modeling such data as <tt>SafeText</tt>,
028 however, such special steps are not needed, since the escaping is built
029 directly into its {@link #toString} method.
030
031 <P>It is worth noting that there are two defects with JSTL' s handling of this problem :
032 <ul>
033 <li>the {@code <c:out>} tag <em>escapes only 5 of the 12 special characters</em> identified
034 by the Open Web App Security Project as being a concern.
035 <li>used in a JSP, the Expression Language allows pleasingly concise presentation, but
036 <em>does not escape special characters in any way</em>. Even when one is aware of this,
037 it is easy to forget to take precautions against Cross Site Scripting attacks.
038 </ul>
039
040 <P>Using <tt>SafeText</tt> will protect you from both of these defects.
041 Since the correct escaping is built into {@link #toString}, you may freely use JSP
042 Expression Language, without needing to do any escaping in the view. Note that if you use
043 {@code <c:out>} with <tt>SafeText</tt> (not recommeded), then you must
044 use <tt>escapeXml='false'</tt> to avoid double-escaping of special characters.
045
046 <P>There are various ways of presenting text :
047 <ul>
048 <li>as HTML (most common) - use {@link #toString()} to escape a large number of
049 special characters.
050 <li>as XML - use {@link #getXmlSafe()} to escape 5 special characters.
051 <li>as JavaScript Object Notation (JSON) - use {@link #getJsonSafe()} to escape
052 a number of special characters
053 <li>as plain text - use {@link #getRawString()} to do no escaping at all.
054 </ul>
055
056 <h4>Checking For Vulnerabilities Upon Startup</h4>
057 WEB4J will perform checks for Cross-Site Scripting vulnerabilities
058 upon startup, by scanning your application's classes for <tt>public</tt> Model Objects
059 having <tt>public getXXX</tt> methods that return a <tt>String</tt>. It will log such
060 occurrences to encourage you to investigate them further.
061
062 <P><em>Design Notes :</em><br>
063 This class is <tt>final</tt>, immutable, {@link Serializable},
064 and {@link Comparable}, in imitation of the other building block classes
065 such as {@link String}, {@link Integer}, and so on.
066
067 <P>The reason why protection against Cross-Site Scripting is not implemented as a
068 Servlet Filter is because a filter would have no means of distinguishing between safe and
069 unsafe markup.
070
071 <P>One might object to escaping special characters in the Model, instead of in the View.
072 However, from a practical point of view, it seems more likely that the programmer will
073 remember to use <tt>SafeText</tt> once in the Model, than remember to do the
074 escaping repeatedly in the View.
075 */
076 public final class SafeText implements Serializable, Comparable<SafeText> {
077
078 /**
079 Returns <tt>true</tt> only if the given character is always escaped by
080 {@link #toString()}. For the list of characters, see {@link EscapeChars#forHTML(String)}.
081
082 <P>Recommended that your implementation of {@link PermittedCharacters}
083 use this method. This will allow you to accept many special characters in your
084 list of permissible characters.
085 */
086 public static boolean isEscaped(int aCodePoint){
087 return ESCAPED_CODE_POINTS.contains(aCodePoint);
088 }
089
090 /**
091 Constructor.
092
093 @param aText free-form text input by the end user, which may contain
094 Cross Site Scripting attacks. Non-null. The text is trimmed by this
095 constructor.
096 */
097 public SafeText(String aText) {
098 fText = Util.trimPossiblyNull(aText);
099 validateState();
100 }
101
102 /**
103 Factory method.
104
105 Simply a slightly more compact way of building an object, as opposed to 'new'.
106 */
107 public static SafeText from(String aText){
108 return new SafeText(aText);
109 }
110
111 /**
112 Return the text in a form safe for an HTML document.
113
114 Passes the raw text through {@link EscapeChars#forHTML(String)}.
115 */
116 @Override public String toString(){
117 if( ! Util.textHasContent(fEscapedForHTML) ){
118 fEscapedForHTML = EscapeChars.forHTML(fText);
119 }
120 return fEscapedForHTML;
121 }
122
123 /** Return the (trimmed) text passed to the constructor. */
124 public String getRawString(){
125 return fText;
126 }
127
128 /**
129 Return the text in a form safe for an XML element.
130
131 <P>Arbitrary text can be rendered safely in an XML document in two ways :
132 <ul>
133 <li>using a <tt>CDATA</tt> block
134 <li>escaping special characters {@code &, <, >, ", '}.
135 </ul>
136
137 <P>This method will escape the above five special characters, and replace them with
138 character entities, using {@link EscapeChars#forXML(String)}
139 */
140 public String getXmlSafe(){
141 return EscapeChars.forXML(fText);
142 }
143
144 /**
145 Return the text in a form safe for <a href='http://www.json.org/'>JSON</a> (JavaScript Object Notation) data.
146
147 <P>This method is intended for the <i>data</i> elements of JSON.
148 It is intended for <i>values</i> of things, not for their <i>names</i>.
149 Typically, only the values will come from end user input, while the names will
150 be hard-coded.
151 */
152 public String getJsonSafe(){
153 return EscapeChars.forJSON(fText);
154 }
155
156 @Override public boolean equals(Object aThat){
157 Boolean result = ModelUtil.quickEquals(this, aThat);
158 if ( result == null ){
159 SafeText that = (SafeText)aThat;
160 result = ModelUtil.equalsFor(this.getSignificantFields(), that.getSignificantFields());
161 }
162 return result;
163 }
164
165 @Override public int hashCode(){
166 if ( fHashCode == 0){
167 fHashCode = ModelUtil.hashCodeFor(getSignificantFields());
168 }
169 return fHashCode;
170 }
171
172 public int compareTo(SafeText aThat){
173 final int EQUAL = 0;
174 if ( this == aThat ) return EQUAL;
175
176 int comparison = this.fText.compareTo(aThat.fText);
177 if ( comparison != EQUAL ) return comparison;
178
179 return EQUAL;
180 }
181
182 // PRIVATE //
183
184 /** @serial */
185 private final String fText;
186 /** The return value of toString, cached like fHashCode. */
187 private String fEscapedForHTML;
188 private int fHashCode;
189 private static final Logger fLogger = Util.getLogger(SafeText.class);
190
191 private Object[] getSignificantFields(){
192 return new Object[] {fText};
193 }
194
195 /** During deserialization, this method cannot be called, since the implementation of PermittedChars is null. */
196 private void validateState() {
197 if (fText == null){
198 throw new NullPointerException("Free form text cannot be null.");
199 }
200 String badCharacters = findBadCharacters(fText);
201 if( Util.textHasContent(badCharacters) ) {
202 throw new IllegalArgumentException("Unpermitted character(s) in text: " + Util.quote(badCharacters) );
203 }
204 }
205
206 private String findBadCharacters(String aArbitraryText){
207 String result = Consts.EMPTY_STRING; //default
208 StringBuilder badCharacters = new StringBuilder();
209 PermittedCharacters whitelist = getPermittedChars();
210
211 int idx = 0;
212 int length = aArbitraryText.length();
213 while ( idx < length ) {
214 int codePoint = aArbitraryText.codePointAt(idx);
215 if( ! whitelist.isPermitted(codePoint) ) {
216 fLogger.severe("Bad Code Point : " + codePoint);
217 char[] badChar = Character.toChars(codePoint);
218 badCharacters.append(String.valueOf(badChar));
219 }
220 idx = idx + Character.charCount(codePoint);
221 }
222
223 if( Util.textHasContent(badCharacters.toString()) ) {
224 result = badCharacters.toString();
225 fLogger.severe("Bad Characters found in request, disallowed by PermittedCharacters implementation: " + result);
226 }
227 return result;
228 }
229
230 private PermittedCharacters getPermittedChars(){
231 return BuildImpl.forPermittedCharacters();
232 }
233
234 /**
235 For evolution of this class, see Sun guidelines :
236 http://java.sun.com/j2se/1.5.0/docs/guide/serialization/spec/version.html#6678
237 */
238 private static final long serialVersionUID = 7526472295633676147L;
239
240 /**
241 Always treat de-serialization as a full-blown constructor, by
242 validating the final state of the de-serialized object.
243 */
244 private void readObject(ObjectInputStream aInputStream) throws ClassNotFoundException, IOException {
245 aInputStream.defaultReadObject();
246 //partial validation only, without looking for 'bad' characters (BuildImpl not available):
247 if (fText == null){
248 throw new NullPointerException("Free form text cannot be null.");
249 }
250 }
251
252 /**
253 This is the default implementation of writeObject.
254 Customise if necessary.
255 */
256 private void writeObject(ObjectOutputStream aOutputStream) throws IOException {
257 aOutputStream.defaultWriteObject();
258 }
259
260 /** List of characters that this class will always escape. */
261 private static List<Character> ESCAPED = Arrays.asList(
262 '<',
263 '>' ,
264 '&' ,
265 '"' ,
266 '\t' ,
267 '!' ,
268 '#' ,
269 '$' ,
270 '%' ,
271 '\'' ,
272 '(' ,
273 ')' ,
274 '*' ,
275 '+' ,
276 ',' ,
277 '-' ,
278 '.' ,
279 '/' ,
280 ':' ,
281 ';' ,
282 '=' ,
283 '?' ,
284 '@' ,
285 '[' ,
286 '\\' ,
287 ']' ,
288 '^' ,
289 '_' ,
290 '`' ,
291 '{' ,
292 '|' ,
293 '}' ,
294 '~'
295 );
296
297 /** As above, but translated into a form that uses code points. */
298 private static List<Integer> ESCAPED_CODE_POINTS = new ArrayList<Integer>();
299 static {
300 for (Character character : ESCAPED){
301 ESCAPED_CODE_POINTS.add(Character.toString(character).codePointAt(0));
302 }
303 }
304 }