001 package hirondelle.web4j.ui.translate; 002 003 import java.text.CharacterIterator; 004 import java.text.StringCharacterIterator; 005 import java.util.Locale; 006 import java.util.regex.*; 007 008 import hirondelle.web4j.BuildImpl; 009 import hirondelle.web4j.request.LocaleSource; 010 import hirondelle.web4j.ui.tag.TagHelper; 011 import hirondelle.web4j.util.EscapeChars; 012 import hirondelle.web4j.util.Util; 013 import hirondelle.web4j.util.Regex; 014 015 /** 016 Custom tag for translating regular text flow in large sections of a web page. 017 018 <P><span class="highlight">This tag treats every piece of free flow text delimited by a 019 tag as a unit of translatable base text</span>, and passes it to {@link Translator}. 020 That is, all tags are treated as <em>delimiters</em> of units of translatable text. 021 022 <P>This tag is suitable for translating most, but not all, of the 023 regular text flow in a web page. <span class="highlight">It is suitable for translating 024 markup that contains short, isolated snippets of text, that have no "structure", and 025 no dynamic data</span>, such as the labels in a form, the column headers in a listing, 026 and so on. (For many intranet applications, this 027 makes up most of the free flow text appearing in the application.) Instead of using many 028 separate <tt><w:txt></tt> {@link Text} tags to translate each item one by one, 029 a single <tt><w:txtFlow></tt> tag can often be used to do the same thing in a single step. 030 031 <P><span class="highlight">Using this class has two strong advantages</span> : 032 <ul> 033 <li>the effort needed to internationalize a page is greatly reduced 034 <li>the markup will be significantly easier to read and maintain, since most of the free flow text 035 remains unchanged from the single-language case 036 </ul> 037 038 <P> 039 This tag is <em>not suitable</em> when the base text to be translated : 040 <ul> 041 <li>contains markup 042 <li>has dynamic data of any sort 043 <li>contains a <tt>TEXTAREA</tt> with a <em>non-empty</em> body. Such text will be seen as a translatable 044 unit, which is usually undesired, since such text is usually not fixed, but dynamic (that is, from the database). 045 (To avoid this, simply nest this tag <em>inside</em> the <tt><w:populate></tt> tag surrounding the 046 form that contains the <tt>TEXTAREA</tt>. This ensures that the population is not affected by the action of this 047 tag.) 048 </ul> 049 050 <P>For example, given this text containing markup : 051 <PRE>The <EM>raison-d'etre</EM> for this...</PRE> 052 then this tag will split the text into three separate pieces, delimited by the <tt>EM</tt> tags. 053 Then, each piece will be translated. For such items, this is almost always undesirable. Instead, 054 one must use a <tt><w:txt></tt> {@link Text} tag, which can treat such items as 055 a single unit of translatable text, without chopping it up into three pieces. 056 057 <P><b>Example</b><br> 058 Here, all of the <tt>LABEL</tt> tags in this form will have their content translated by the 059 <tt><w:txtFlow></tt> tag : 060 <PRE> 061 <w:populate style='edit' using="myUser"> 062 <w:txtFlow> 063 <form action='blah.do' method='post' class="user-input"> 064 <table align="center"> 065 <tr> 066 <td> 067 <label class="mandatory">Email</label> 068 </td> 069 <td> 070 <input type="text" name="Email Address" size='30'> 071 </td> 072 </tr> 073 074 <tr> 075 <td> 076 <label>Age</label> 077 </td> 078 <td> 079 <input type="text" name="Age" size="30"> 080 </td> 081 </tr> 082 083 <tr> 084 <td> 085 <label>Desired Salary</label> 086 </td> 087 <td> 088 <input type="text" name="Desired Salary" size="30"> 089 </td> 090 </tr> 091 092 <tr> 093 <td> 094 <label> Birth Date </label> 095 </td> 096 <td> 097 <input type="text" name="Birth Date" size="30"> 098 </td> 099 </tr> 100 101 <tr> 102 <td> 103 <input type='submit' value='UPDATE'> 104 </td> 105 </tr> 106 </table> 107 </form> 108 </w:txtFlow> 109 </w:populate> 110 </PRE> 111 */ 112 public final class TextFlow extends TagHelper { 113 114 /** 115 By default, this tag will escape any special characters appearing in the 116 text flow, using {@link EscapeChars#forHTML(String)}. To change that default 117 behaviour, set this value to <tt>false</tt>. 118 119 <P><span class="highlight">Exercise care that text is not doubly escaped.</span> 120 For instance, if the text already contains 121 character entities, and <tt>setEscapeChars</tt> is true, then the text <tt>&amp;</tt> 122 will be emitted by this tag as <tt>&amp;amp;</tt>, for example. 123 */ 124 public void setEscapeChars(boolean aValue){ 125 fEscapeChars = aValue; 126 } 127 128 /** 129 Translate each piece of free flow text appearing in <tt>aOriginalBody</tt>. 130 131 <P>Each piece of text is delimited by one or more tags, and is translated using the configured 132 {@link Translator}. Leading or trailing white space is preserved. 133 */ 134 @Override protected String getEmittedText(String aOriginalBody) { 135 final StringBuffer result = new StringBuffer(); 136 final StringBuffer snippet = new StringBuffer(); 137 boolean isInsideTag = false; 138 139 final StringCharacterIterator iterator = new StringCharacterIterator(aOriginalBody); 140 char character = iterator.current(); 141 while (character != CharacterIterator.DONE ){ 142 if (character == '<') { 143 doStartTag(result, snippet, character); 144 isInsideTag = true; 145 } 146 else if (character == '>') { 147 doEndTag(result, character); 148 isInsideTag = false; 149 } 150 else { 151 doRegularCharacter(result, snippet, isInsideTag, character); 152 } 153 character = iterator.next(); 154 } 155 if( Util.textHasContent(snippet.toString()) ) { 156 appendTranslation(snippet, result); 157 } 158 return result.toString(); 159 } 160 161 // PRIVATE // 162 static Pattern TRIMMED_TEXT = Pattern.compile("((?:\\S(?:.)*\\S)|(?:\\S))"); 163 164 private boolean fEscapeChars = true; 165 private LocaleSource fLocaleSource = BuildImpl.forLocaleSource(); 166 private Translator fTranslator = BuildImpl.forTranslator(); 167 168 private void doStartTag(StringBuffer aResult, StringBuffer aSnippet, char aCharacter) { 169 if (Util.textHasContent(aSnippet.toString()) ){ 170 appendTranslation(aSnippet, aResult); 171 } 172 else { 173 //often contains just spaces and/or new lines, which are just appended 174 aResult.append(aSnippet.toString()); 175 } 176 aSnippet.setLength(0); 177 aResult.append(aCharacter); 178 } 179 180 private void doEndTag(StringBuffer aResult, char aCharacter) { 181 aResult.append(aCharacter); 182 } 183 184 private void doRegularCharacter(StringBuffer aResult, StringBuffer aSnippet, boolean aIsInsideTag, char aCharacter) { 185 if( aIsInsideTag ){ 186 aResult.append(aCharacter); 187 } 188 else { 189 aSnippet.append(aCharacter); 190 } 191 //fLogger.fine("Snippet : " + aSnippet); 192 } 193 194 /** 195 The snippet may contain leading or trailing white space, or control chars (new lines), 196 which must be preserved. 197 */ 198 private void appendTranslation(StringBuffer aSnippet, StringBuffer aResult){ 199 if( Util.textHasContent(aSnippet.toString()) ) { 200 StringBuffer translatedSnippet = new StringBuffer(); 201 202 Matcher matcher = TRIMMED_TEXT.matcher(aSnippet.toString()); 203 while ( matcher.find() ) { 204 matcher.appendReplacement(translatedSnippet, getReplacement(matcher)); 205 } 206 matcher.appendTail(translatedSnippet); 207 208 if( fEscapeChars ) { 209 aResult.append(EscapeChars.forHTML(translatedSnippet.toString())); 210 } 211 else { 212 aResult.append(translatedSnippet); 213 } 214 } 215 else { 216 aResult.append(aSnippet.toString()); 217 } 218 } 219 220 private String getReplacement(Matcher aMatcher){ 221 String result = null; 222 String baseText = aMatcher.group(Regex.FIRST_GROUP); 223 if (Util.textHasContent(baseText)){ 224 Locale locale = fLocaleSource.get(getRequest()); 225 result = fTranslator.get(baseText, locale); 226 } 227 else { 228 result = baseText; 229 } 230 return EscapeChars.forReplacementString(result); 231 } 232 }