001    package hirondelle.web4j.ui.translate;
002    
003    import java.text.CharacterIterator;
004    import java.text.StringCharacterIterator;
005    import java.util.Locale;
006    import java.util.regex.*;
007    
008    import hirondelle.web4j.BuildImpl;
009    import hirondelle.web4j.request.LocaleSource;
010    import hirondelle.web4j.ui.tag.TagHelper;
011    import hirondelle.web4j.util.EscapeChars;
012    import hirondelle.web4j.util.Util;
013    import hirondelle.web4j.util.Regex;
014    
015    /**
016     Custom tag for translating regular text flow in large sections of a web page. 
017     
018     <P><span class="highlight">This tag treats every piece of free flow text delimited by a  
019     tag as a unit of translatable base text</span>, and passes it to {@link Translator}. 
020     That is, all tags are treated as <em>delimiters</em> of units of translatable text.
021      
022     <P>This tag is suitable for translating most, but not all, of the 
023     regular text flow in a web page. <span class="highlight">It is suitable for translating 
024     markup that contains short, isolated snippets of text, that have no "structure", and 
025     no dynamic data</span>, such as the labels in a form, the column headers in a listing, 
026     and so on. (For many intranet applications, this 
027     makes up most of the free flow text appearing in the application.) Instead of using many 
028     separate <tt>&lt;w:txt&gt;</tt> {@link Text} tags to translate each item one by one,
029     a single <tt>&lt;w:txtFlow&gt;</tt> tag can often be used to do the same thing in a single step.
030     
031     <P><span class="highlight">Using this class has two strong advantages</span> : 
032    <ul>
033     <li>the effort needed to internationalize a page is greatly reduced
034     <li>the markup will be significantly easier to read and maintain, since most of the free flow text 
035     remains unchanged from the single-language case
036    </ul>
037     
038     <P>
039     This tag is <em>not suitable</em> when the base text to be translated : 
040    <ul>
041     <li>contains markup
042     <li>has dynamic data of any sort
043     <li>contains a <tt>TEXTAREA</tt> with a <em>non-empty</em> body. Such text will be seen as a translatable 
044     unit, which is usually undesired, since such text is usually not fixed, but dynamic (that is, from the database). 
045     (To avoid this, simply nest this tag <em>inside</em> the <tt>&lt;w:populate&gt;</tt> tag surrounding the 
046     form that contains the <tt>TEXTAREA</tt>. This ensures that the population is not affected by the action of this 
047     tag.)
048    </ul>
049      
050     <P>For example, given this text containing markup :
051     <PRE>The &lt;EM&gt;raison-d'etre&lt;/EM&gt; for this...</PRE>
052     then this tag will split the text into three separate pieces, delimited by the <tt>EM</tt> tags.
053     Then, each piece will be translated. For such items, this is almost always undesirable. Instead, 
054     one must use a <tt>&lt;w:txt&gt;</tt> {@link Text} tag, which can treat such items as 
055     a single unit of translatable text, without chopping it up into three pieces. 
056     
057     <P><b>Example</b><br>
058     Here, all of the <tt>LABEL</tt> tags in this form will have their content translated by the 
059     <tt>&lt;w:txtFlow&gt;</tt> tag :
060     <PRE>
061    &lt;w:populate style='edit' using="myUser"&gt;
062    &lt;w:txtFlow&gt;
063    &lt;form action='blah.do' method='post' class="user-input"&gt;
064    &lt;table align="center"&gt;
065    &lt;tr&gt;
066     &lt;td&gt;
067      &lt;label class="mandatory"&gt;Email&lt;/label&gt;
068     &lt;/td&gt;
069     &lt;td&gt;
070      &lt;input type="text" name="Email Address" size='30'&gt;
071     &lt;/td&gt; 
072    &lt;/tr&gt;
073    
074    &lt;tr&gt;
075     &lt;td&gt;
076      &lt;label&gt;Age&lt;/label&gt;
077     &lt;/td&gt;
078     &lt;td&gt;
079      &lt;input type="text" name="Age" size="30"&gt;
080     &lt;/td&gt; 
081    &lt;/tr&gt;
082    
083    &lt;tr&gt;
084     &lt;td&gt;
085      &lt;label&gt;Desired Salary&lt;/label&gt;
086     &lt;/td&gt;
087     &lt;td&gt;
088      &lt;input type="text" name="Desired Salary" size="30"&gt;
089     &lt;/td&gt; 
090    &lt;/tr&gt;
091    
092    &lt;tr&gt;
093     &lt;td&gt;
094      &lt;label&gt; Birth Date &lt;/label&gt;
095     &lt;/td&gt;
096     &lt;td&gt;
097      &lt;input type="text" name="Birth Date" size="30"&gt;
098     &lt;/td&gt; 
099    &lt;/tr&gt;
100    
101    &lt;tr&gt;
102     &lt;td&gt;
103      &lt;input type='submit' value='UPDATE'&gt; 
104     &lt;/td&gt;
105    &lt;/tr&gt;
106    &lt;/table&gt;
107    &lt;/form&gt;
108    &lt;/w:txtFlow&gt;
109    &lt;/w:populate&gt;
110    </PRE>
111    */
112    public final class TextFlow extends TagHelper {
113    
114      /**
115       By default, this tag will escape any special characters appearing in the 
116       text flow, using {@link EscapeChars#forHTML(String)}. To change that default  
117       behaviour, set this value to <tt>false</tt>.
118       
119       <P><span class="highlight">Exercise care that text is not doubly escaped.</span> 
120       For instance, if the text already contains 
121       character entities, and <tt>setEscapeChars</tt> is true, then the text <tt>&amp;amp;</tt>
122       will be emitted by this tag as <tt>&amp;amp;amp;</tt>, for example.
123      */
124      public void setEscapeChars(boolean aValue){
125        fEscapeChars = aValue;
126      }
127      
128      /**
129       Translate each piece of free flow text appearing in <tt>aOriginalBody</tt>.
130       
131       <P>Each piece of text is delimited by one or more tags, and is translated using the configured 
132       {@link Translator}. Leading or trailing white space is preserved.
133      */
134      @Override protected String getEmittedText(String aOriginalBody) {
135        final StringBuffer result = new StringBuffer();
136        final StringBuffer snippet = new StringBuffer();
137        boolean isInsideTag = false;
138        
139        final StringCharacterIterator iterator = new StringCharacterIterator(aOriginalBody);
140        char character =  iterator.current();
141        while (character != CharacterIterator.DONE ){
142          if (character == '<') {
143            doStartTag(result, snippet, character);
144            isInsideTag = true;
145          }
146          else if (character == '>') {
147            doEndTag(result, character);
148            isInsideTag = false;
149          }
150          else {
151            doRegularCharacter(result, snippet, isInsideTag, character);
152          }
153          character = iterator.next();
154        }
155        if( Util.textHasContent(snippet.toString()) ) {
156          appendTranslation(snippet, result);
157        }
158        return result.toString();
159      }
160      
161      // PRIVATE //
162      static Pattern TRIMMED_TEXT = Pattern.compile("((?:\\S(?:.)*\\S)|(?:\\S))");
163      
164      private boolean fEscapeChars = true;
165      private LocaleSource fLocaleSource = BuildImpl.forLocaleSource();
166      private Translator fTranslator = BuildImpl.forTranslator();
167      
168      private void doStartTag(StringBuffer aResult, StringBuffer aSnippet, char aCharacter) {
169        if (Util.textHasContent(aSnippet.toString()) ){ 
170          appendTranslation(aSnippet, aResult);
171        }
172        else {
173          //often contains just spaces and/or new lines, which are just appended
174          aResult.append(aSnippet.toString());
175        }
176        aSnippet.setLength(0);
177        aResult.append(aCharacter);
178      }
179      
180      private void doEndTag(StringBuffer aResult, char aCharacter) {
181        aResult.append(aCharacter);
182      }
183      
184      private void doRegularCharacter(StringBuffer aResult, StringBuffer aSnippet, boolean aIsInsideTag, char aCharacter) {
185        if( aIsInsideTag ){
186          aResult.append(aCharacter);
187        }
188        else {
189          aSnippet.append(aCharacter);
190        }
191        //fLogger.fine("Snippet : " + aSnippet);
192      }
193    
194      /**
195       The snippet may contain leading or trailing white space, or control chars (new lines), 
196       which must be preserved. 
197      */
198      private void appendTranslation(StringBuffer aSnippet, StringBuffer aResult){
199        if( Util.textHasContent(aSnippet.toString()) ) {
200          StringBuffer translatedSnippet = new StringBuffer();
201          
202          Matcher matcher = TRIMMED_TEXT.matcher(aSnippet.toString());
203          while ( matcher.find() ) {
204            matcher.appendReplacement(translatedSnippet, getReplacement(matcher));
205          }
206          matcher.appendTail(translatedSnippet);
207          
208          if( fEscapeChars ) {
209            aResult.append(EscapeChars.forHTML(translatedSnippet.toString()));
210          }
211          else {
212            aResult.append(translatedSnippet);
213          }
214        }
215        else {
216          aResult.append(aSnippet.toString());
217        }
218      }
219      
220      private String getReplacement(Matcher aMatcher){
221        String result = null;
222        String baseText = aMatcher.group(Regex.FIRST_GROUP);
223        if (Util.textHasContent(baseText)){
224          Locale locale = fLocaleSource.get(getRequest());
225          result = fTranslator.get(baseText, locale);
226        }
227        else {
228          result = baseText;
229        }
230        return EscapeChars.forReplacementString(result);
231      }
232    }