001 package hirondelle.web4j.ui.translate;
002
003 import java.text.CharacterIterator;
004 import java.text.StringCharacterIterator;
005 import java.util.Locale;
006 import java.util.regex.*;
007
008 import hirondelle.web4j.BuildImpl;
009 import hirondelle.web4j.request.LocaleSource;
010 import hirondelle.web4j.ui.tag.TagHelper;
011 import hirondelle.web4j.util.EscapeChars;
012 import hirondelle.web4j.util.Util;
013 import hirondelle.web4j.util.Regex;
014
015 /**
016 Custom tag for translating regular text flow in large sections of a web page.
017
018 <P><span class="highlight">This tag treats every piece of free flow text delimited by a
019 tag as a unit of translatable base text</span>, and passes it to {@link Translator}.
020 That is, all tags are treated as <em>delimiters</em> of units of translatable text.
021
022 <P>This tag is suitable for translating most, but not all, of the
023 regular text flow in a web page. <span class="highlight">It is suitable for translating
024 markup that contains short, isolated snippets of text, that have no "structure", and
025 no dynamic data</span>, such as the labels in a form, the column headers in a listing,
026 and so on. (For many intranet applications, this
027 makes up most of the free flow text appearing in the application.) Instead of using many
028 separate <tt><w:txt></tt> {@link Text} tags to translate each item one by one,
029 a single <tt><w:txtFlow></tt> tag can often be used to do the same thing in a single step.
030
031 <P><span class="highlight">Using this class has two strong advantages</span> :
032 <ul>
033 <li>the effort needed to internationalize a page is greatly reduced
034 <li>the markup will be significantly easier to read and maintain, since most of the free flow text
035 remains unchanged from the single-language case
036 </ul>
037
038 <P>
039 This tag is <em>not suitable</em> when the base text to be translated :
040 <ul>
041 <li>contains markup
042 <li>has dynamic data of any sort
043 <li>contains a <tt>TEXTAREA</tt> with a <em>non-empty</em> body. Such text will be seen as a translatable
044 unit, which is usually undesired, since such text is usually not fixed, but dynamic (that is, from the database).
045 (To avoid this, simply nest this tag <em>inside</em> the <tt><w:populate></tt> tag surrounding the
046 form that contains the <tt>TEXTAREA</tt>. This ensures that the population is not affected by the action of this
047 tag.)
048 </ul>
049
050 <P>For example, given this text containing markup :
051 <PRE>The <EM>raison-d'etre</EM> for this...</PRE>
052 then this tag will split the text into three separate pieces, delimited by the <tt>EM</tt> tags.
053 Then, each piece will be translated. For such items, this is almost always undesirable. Instead,
054 one must use a <tt><w:txt></tt> {@link Text} tag, which can treat such items as
055 a single unit of translatable text, without chopping it up into three pieces.
056
057 <P><b>Example</b><br>
058 Here, all of the <tt>LABEL</tt> tags in this form will have their content translated by the
059 <tt><w:txtFlow></tt> tag :
060 <PRE>
061 <w:populate style='edit' using="myUser">
062 <w:txtFlow>
063 <form action='blah.do' method='post' class="user-input">
064 <table align="center">
065 <tr>
066 <td>
067 <label class="mandatory">Email</label>
068 </td>
069 <td>
070 <input type="text" name="Email Address" size='30'>
071 </td>
072 </tr>
073
074 <tr>
075 <td>
076 <label>Age</label>
077 </td>
078 <td>
079 <input type="text" name="Age" size="30">
080 </td>
081 </tr>
082
083 <tr>
084 <td>
085 <label>Desired Salary</label>
086 </td>
087 <td>
088 <input type="text" name="Desired Salary" size="30">
089 </td>
090 </tr>
091
092 <tr>
093 <td>
094 <label> Birth Date </label>
095 </td>
096 <td>
097 <input type="text" name="Birth Date" size="30">
098 </td>
099 </tr>
100
101 <tr>
102 <td>
103 <input type='submit' value='UPDATE'>
104 </td>
105 </tr>
106 </table>
107 </form>
108 </w:txtFlow>
109 </w:populate>
110 </PRE>
111 */
112 public final class TextFlow extends TagHelper {
113
114 /**
115 By default, this tag will escape any special characters appearing in the
116 text flow, using {@link EscapeChars#forHTML(String)}. To change that default
117 behaviour, set this value to <tt>false</tt>.
118
119 <P><span class="highlight">Exercise care that text is not doubly escaped.</span>
120 For instance, if the text already contains
121 character entities, and <tt>setEscapeChars</tt> is true, then the text <tt>&amp;</tt>
122 will be emitted by this tag as <tt>&amp;amp;</tt>, for example.
123 */
124 public void setEscapeChars(boolean aValue){
125 fEscapeChars = aValue;
126 }
127
128 /**
129 Translate each piece of free flow text appearing in <tt>aOriginalBody</tt>.
130
131 <P>Each piece of text is delimited by one or more tags, and is translated using the configured
132 {@link Translator}. Leading or trailing white space is preserved.
133 */
134 @Override protected String getEmittedText(String aOriginalBody) {
135 final StringBuffer result = new StringBuffer();
136 final StringBuffer snippet = new StringBuffer();
137 boolean isInsideTag = false;
138
139 final StringCharacterIterator iterator = new StringCharacterIterator(aOriginalBody);
140 char character = iterator.current();
141 while (character != CharacterIterator.DONE ){
142 if (character == '<') {
143 doStartTag(result, snippet, character);
144 isInsideTag = true;
145 }
146 else if (character == '>') {
147 doEndTag(result, character);
148 isInsideTag = false;
149 }
150 else {
151 doRegularCharacter(result, snippet, isInsideTag, character);
152 }
153 character = iterator.next();
154 }
155 if( Util.textHasContent(snippet.toString()) ) {
156 appendTranslation(snippet, result);
157 }
158 return result.toString();
159 }
160
161 // PRIVATE //
162 static Pattern TRIMMED_TEXT = Pattern.compile("((?:\\S(?:.)*\\S)|(?:\\S))");
163
164 private boolean fEscapeChars = true;
165 private LocaleSource fLocaleSource = BuildImpl.forLocaleSource();
166 private Translator fTranslator = BuildImpl.forTranslator();
167
168 private void doStartTag(StringBuffer aResult, StringBuffer aSnippet, char aCharacter) {
169 if (Util.textHasContent(aSnippet.toString()) ){
170 appendTranslation(aSnippet, aResult);
171 }
172 else {
173 //often contains just spaces and/or new lines, which are just appended
174 aResult.append(aSnippet.toString());
175 }
176 aSnippet.setLength(0);
177 aResult.append(aCharacter);
178 }
179
180 private void doEndTag(StringBuffer aResult, char aCharacter) {
181 aResult.append(aCharacter);
182 }
183
184 private void doRegularCharacter(StringBuffer aResult, StringBuffer aSnippet, boolean aIsInsideTag, char aCharacter) {
185 if( aIsInsideTag ){
186 aResult.append(aCharacter);
187 }
188 else {
189 aSnippet.append(aCharacter);
190 }
191 //fLogger.fine("Snippet : " + aSnippet);
192 }
193
194 /**
195 The snippet may contain leading or trailing white space, or control chars (new lines),
196 which must be preserved.
197 */
198 private void appendTranslation(StringBuffer aSnippet, StringBuffer aResult){
199 if( Util.textHasContent(aSnippet.toString()) ) {
200 StringBuffer translatedSnippet = new StringBuffer();
201
202 Matcher matcher = TRIMMED_TEXT.matcher(aSnippet.toString());
203 while ( matcher.find() ) {
204 matcher.appendReplacement(translatedSnippet, getReplacement(matcher));
205 }
206 matcher.appendTail(translatedSnippet);
207
208 if( fEscapeChars ) {
209 aResult.append(EscapeChars.forHTML(translatedSnippet.toString()));
210 }
211 else {
212 aResult.append(translatedSnippet);
213 }
214 }
215 else {
216 aResult.append(aSnippet.toString());
217 }
218 }
219
220 private String getReplacement(Matcher aMatcher){
221 String result = null;
222 String baseText = aMatcher.group(Regex.FIRST_GROUP);
223 if (Util.textHasContent(baseText)){
224 Locale locale = fLocaleSource.get(getRequest());
225 result = fTranslator.get(baseText, locale);
226 }
227 else {
228 result = baseText;
229 }
230 return EscapeChars.forReplacementString(result);
231 }
232 }