001 package hirondelle.web4j.ui.translate;
002
003 import java.util.Locale;
004 import java.util.logging.Logger;
005 import java.util.regex.Pattern;
006 import java.util.regex.Matcher;
007 import javax.servlet.jsp.JspException;
008
009 import hirondelle.web4j.BuildImpl;
010 import hirondelle.web4j.ui.tag.TagHelper;
011 import hirondelle.web4j.util.EscapeChars;
012 import hirondelle.web4j.util.Util;
013 import hirondelle.web4j.util.Consts;
014
015 /**
016 Custom tag for translating base language text (or a "coder key") into a localized form, and applying
017 "wiki-style" formatting.
018
019 <P>This tag uses {@link hirondelle.web4j.ui.translate.Translator} and
020 {@link hirondelle.web4j.request.LocaleSource} to localize text.
021
022 <P>There are several use cases for this tag. In general, the attributes control:
023 <ul>
024 <li><a href="#BaseText">specifying base text</a> to be translated
025 <li><a href="#WikiStyleFormatting">formatting</a> of the translated result
026 <li><a href="#TurningOffTranslation">turning off translation</a> altogether
027 </ul>
028
029 <P><b><a name="BaseText">Specifying Base Text</a></b>
030 <P>The base text may be specified simply as the tag body, or as the <tt>value</tt> attribute.
031 <P>Example 1 : <br>
032 <PRE>
033 <w:txt>
034 Of prayers, I am the prayer of silence.
035 Of things that move not, I am the Himalayas.
036 </w:txt>
037 </PRE>
038
039 Example 2 uses a "coder key" : <br>
040 <PRE>
041 <w:txt value="quotation.from.bhagavad.gita" />
042 </PRE>
043
044 <P>This second form is intended especially for translating items appearing
045 inside a tag.
046
047 <P>These two use cases are mutually exclusive : either a body must be specified, or
048 the <tt>value</tt> attribute must be specified, <em>but not both</em>.
049
050 <P>Here is another example, combining the two styles :
051 <PRE>
052 <span title='<w:txt value="quotation.from.bhagavad.gita" />'>
053 <w:txt>
054 Of prayers, I am the prayer of silence.
055 Of things that move not, I am the Himalayas.
056 </w:txt>
057 </span>
058 </PRE>
059
060 <P>In either case, the item to be translated may be either some text in the
061 application's base language, or it may be a 'coder key' (see {@link Translator}).
062
063 <P><a name="WikiStyleFormatting"><b>Formatting of the Result</b>
064 <P>By default, this tag will escape all special characters using {@link EscapeChars#forHTML(String)}.
065 Occasionally, it is desirable to allow some limited formatting of text input by the user. Even simple
066 effects such as bold and italic can measurably increase legibility and clarity, and allowing links
067 is also very useful. Most wikis allow such simple formatting. This tag uses the following special
068 characters to denote various effects :
069 <ul>
070 <li> *bold* for <b>bold</b> (needs intial space before first '*')
071 <li> _italic_ for <em>italic</em> (needs initial space before first '_')
072 <li>^preserve formatting^ for preserving whitespace (<PRE>)
073 <li>~~~~ on an otherwise blank line for a horizontal rule
074 <li>[link:http:\\www.javapractices.com\Topic1.cjp enumerations] produces this link: <a href="http:\\www.javapractices.com\Topic1.cjp">enumerations</a>
075 <li>one or more empty lines for a paragraph (<P>)
076 <li>a bar | at the end of a line for a line break (<BR>)
077 <li>a line starting with ' ~ ' for a bullet entry in a list
078 </ul>
079
080 <P>Example 3 has wiki style formatting:
081 <PRE>
082 <w:txt wikiMarkup="true">
083 Default Locale |
084 _Not all Locales are treated equally_ : there *must* be ...
085 </w:txt>
086 </PRE>
087
088 Is rendered as :
089 <P>Default Locale<br>
090 <em>Not all Locales are treated equally</em> : there <b>must</b> be ...
091
092 <P>To allow the above rules to be interpreted as HTML by this tag, {@link #setWikiMarkup(boolean)} to <tt>true</tt>.
093
094 <P><b><a name="TurningOffTranslation">Turning Off Translation</a></b>
095 <P>There are two cases in which it is useful to turn off translation altogether:
096 <ul>
097 <li>using only the formatting services of this tag. For example, a message board application may want to
098 provide basic wiki-style formatting, without any translation.
099 <li>as a workaround for database issues regarding large text. Sometimes databases treat large
100 text differently from small text. For example in MySQL, it is not possible for a <tt>TEXT</tt> field to be
101 assigned a <tt>UNIQUE</tt> constraint. This split between large text and small text can be a problem, since
102 it may mean that blocks of text are treated differently simply according to their length, and workarounds for
103 large blocks of text are needed.
104 </ul>
105
106 <P>Example 4 has wiki style formatting for untranslated user input:
107 <PRE>
108 <w:txt wikiMarkup="true" translate="false">
109 ...render some user input with wiki style formatting...
110 </w:txt>
111 </PRE>
112
113 <P>Example 5 has wiki style formatting for <em>large</em> untranslated text, hard-coded in the JSP.
114 An example of such text may be an extended section of "help" information :
115 <PRE>
116 <w:txt locale="en">
117 ..<em>large</em> amount of hard-coded text in English...
118 </w:txt>
119
120 <w:txt locale="fr">
121 ..<em>large</em> amount of hard-coded text in French...
122 </w:txt>
123 </PRE>
124
125 <em>The above style is outside the usual translation mechanism.</em> It does not use the configured
126 {@link Translator}. It does not translate its content at all. Rather, it will echo the tag content only
127 when the specified <tt>locale</tt> matches that returned by the {@link hirondelle.web4j.request.LocaleSource}.
128 <span class="highlight">It is recommended that this style be used only when the above-mentioned problem regarding
129 database text size exists.</span> This style implicitly has <tt>translate="false"</tt>.
130 */
131 public final class Text extends TagHelper {
132
133 /**
134 Set the item to be translated (optional).
135
136 <P><tt>aTextAsAttr</tt> takes two forms : user-visible text in the base language,
137 or a coder key (known to the programmer, but not seen by the end user). See {@link Translator}
138 for more information.
139
140 <P>If this attribute is set, then the tag must <em>not</em> have a body.
141
142 @param aTextAsAttr must have content; this value is always trimmed by this method.
143 */
144 public void setValue(String aTextAsAttr){
145 checkForContent("Value", aTextAsAttr);
146 fTextAsAttr = aTextAsAttr.trim();
147 }
148
149 /**
150 Toggle translation on and off (optional, default <tt>true</tt>).
151
152 <P>By default, text will be translated. An example of setting this item to <tt>false</tt> is a
153 discussion board, where users input in a single language, and
154 <a href="#WikiStyleFormatting">wiki style formatting</a> is desired.
155
156 <P>An example of rendering such text is :
157 <PRE>
158 <w:txt translate="false" wikiMarkup="true">
159 This is *bold*, and so on...
160 </w:txt>
161 </PRE>
162 */
163 public void setTranslate(boolean aValue) {
164 fIsTranslating = aValue;
165 }
166
167 /**
168 Specify an explicit {@link Locale} (optional).
169
170 <P><span class='highlight'>When this attribute is specified, this tag will never translate its content.</span>
171 Instead, this tag will simply <em>emit or suppress</em> its content, according to whether <tt>aLocale</tt> matches
172 that returned by {@link hirondelle.web4j.request.LocaleSource}.
173 */
174 public void setLocale(String aLocale){
175 fSpecificLocale = Util.buildLocale(aLocale);
176 fIsTranslating = false;
177 }
178
179 /**
180 Allow <a href="#WikiStyleFormatting">wiki style formatting</a> to be used (optional, default <tt>false</tt>).
181 */
182 public void setWikiMarkup(boolean aValue){
183 fHasWikiMarkup = aValue;
184 }
185
186 /** Validate attributes against each other. */
187 protected void crossCheckAttributes() {
188 if( fSpecificLocale != null && fIsTranslating ){
189 String message = "Cannot translate and specify a Locale at the same time. Page : " + getPageName();
190 fLogger.severe(message);
191 throw new IllegalArgumentException(message);
192 }
193 }
194
195 /** See class comment. */
196 @Override protected String getEmittedText(String aOriginalBody) throws JspException {
197 if ( Util.textHasContent(fTextAsAttr) && Util.textHasContent(aOriginalBody) ){
198 throw new JspException(
199 "Please specify text (or key) to be translated as either value attribute or tag body, but not both." +
200 " Value attribute: " +Util.quote(fTextAsAttr) + ". Tag body: " + Util.quote(aOriginalBody) +
201 " Page Name :" + getPageName()
202 );
203 }
204
205 String baseText = Util.textHasContent(fTextAsAttr) ? fTextAsAttr : aOriginalBody;
206 String result = Consts.EMPTY_STRING;
207 if(Util.textHasContent(baseText)){
208 Locale locale = BuildImpl.forLocaleSource().get(getRequest());
209 if(fIsTranslating){
210 Translator translator = BuildImpl.forTranslator();
211 result = translator.get(baseText, locale);
212 fLogger.finest("Translating base text : " + Util.quote(baseText) + " using Locale " + locale + " into " + Util.quote(result));
213 }
214 else {
215 fLogger.finest("LocaleSource: " + locale + ", locale attribute: " + fSpecificLocale);
216 if (fSpecificLocale == null || fSpecificLocale.equals(locale)){
217 fLogger.finest("Echoing tag content (possibly adding formatting).");
218 result = baseText;
219 }
220 else {
221 fLogger.finest("Suppressing tag content.");
222 result = Consts.EMPTY_STRING;
223 }
224 }
225 result = processMarkup(result);
226 }
227 return result;
228 }
229
230 /**
231 Translate the text into a result. Escapes characters, then optionally changes wiki markup into hypertext.
232 Made package-private for testing purposes.
233 */
234 String processMarkup(String aText) {
235 String result = aText;
236 if(Util.textHasContent(result)){
237 result = EscapeChars.forHTML(result);
238 if( fHasWikiMarkup ){
239 result = changePseudoMarkupToHTML(result);
240 }
241 }
242 return result;
243 }
244
245 // PRIVATE
246 private String fTextAsAttr;
247 private boolean fIsTranslating = true;
248 private Locale fSpecificLocale;
249 private boolean fHasWikiMarkup = false;
250 private static final Logger fLogger = Util.getLogger(Text.class);
251
252 /*
253 The regexes don't refer to the original '*' and so on. Rather, they refer to the
254 escaped versions thereof - their 'fingerprints', so to speak.
255
256 Implementation Note:
257 Seeing undesired wiki-formatting of _blah_ text in links. To fix, introduced second, small variation
258 for *blah* and _blah_, which matches to the beginning of the input.
259
260 This is a rather hacky, and a more robust implmentation would use javacc.
261 */
262
263 private static final Pattern PSEUDO_LINK = Pattern.compile("[link:((\\S)*) ((.)+?)\\]");
264 private static final Pattern PSEUDO_BOLD = Pattern.compile("(?: *)((?:.)+?)(?:*)");
265 private static final Pattern PSEUDO_BOLD_START_OF_INPUT = Pattern.compile("(?:^*)((?:.)+?)(?:*)");
266 private static final Pattern PSEUDO_ITALIC = Pattern.compile("(?: _)((?:.)+?)(?:_)");
267 private static final Pattern PSEUDO_ITALIC_START_OF_INPUT = Pattern.compile("(?:^_)((?:.)+?)(?:_)");
268 private static final Pattern PSEUDO_CODE = Pattern.compile("(?:\\^)((?:.)+?)(?:\\^)", Pattern.MULTILINE | Pattern.DOTALL);
269 private static final Pattern PSEUDO_HR = Pattern.compile("^(?:\\s)*~~~~(?:\\s)*$", Pattern.MULTILINE);
270 private static final Pattern PSEUDO_PARAGRAPH = Pattern.compile("(^(?:\\s)*$)", Pattern.MULTILINE);
271 private static final Pattern PSEUDO_LINE_BREAK = Pattern.compile("\\|(?: )*$", Pattern.MULTILINE);
272 private static final Pattern PSEUDO_LIST = Pattern.compile("^(?: )*(\\~)(?: )", Pattern.MULTILINE);
273
274 private String changePseudoMarkupToHTML(String aText){
275 String result = null;
276 result = addLink(aText);
277 result = addBold(result);
278 result = addBold2(result);
279 result = addItalic(result);
280 result = addItalic2(result);
281 result = addLineBreak(result);
282 result = addParagraph(result);
283 result = addList(result);
284 result = addCode(result);
285 result = addHorizontalRule(result);
286 return result;
287 }
288
289 private String addBold(String aText){
290 Matcher matcher = PSEUDO_BOLD.matcher(aText);
291 return matcher.replaceAll(" <b>$1</b>"); //extra space
292 }
293
294 private String addBold2(String aText){
295 Matcher matcher = PSEUDO_BOLD_START_OF_INPUT.matcher(aText);
296 return matcher.replaceAll("<b>$1</b>"); //extra space
297 }
298
299 private String addItalic(String aText){
300 Matcher matcher = PSEUDO_ITALIC.matcher(aText);
301 return matcher.replaceAll(" <em>$1</em>"); //extra space
302 }
303
304 private String addItalic2(String aText){
305 Matcher matcher = PSEUDO_ITALIC_START_OF_INPUT.matcher(aText);
306 return matcher.replaceAll("<em>$1</em>"); //extra space
307 }
308
309 private String addCode(String aText){
310 Matcher matcher = PSEUDO_CODE.matcher(aText);
311 return matcher.replaceAll("<PRE>$1</PRE>");
312 }
313
314 private String addHorizontalRule(String aText){
315 Matcher matcher = PSEUDO_HR.matcher(aText);
316 return matcher.replaceAll("<hr>");
317 }
318
319 private String addLink(String aText){
320 Matcher matcher = PSEUDO_LINK.matcher(aText);
321 return matcher.replaceAll("<a href='$1'>$3</a>");
322 }
323
324 private String addParagraph(String aText){
325 Matcher matcher = PSEUDO_PARAGRAPH.matcher(aText);
326 return removeInitialAndFinalParagraphs(matcher.replaceAll("<P>"));
327 }
328
329 /** Bit hacky - cannot find correct regex to take care of this cleanly. */
330 private String removeInitialAndFinalParagraphs(String aText){
331 String result = aText.trim();
332 if (aText.startsWith("<P>")){
333 result = result.substring(3);
334 }
335 if (aText.endsWith("<P>")){
336 result = result.substring(0, result.length()-3);
337 }
338 return result;
339 }
340
341 private String addLineBreak(String aText){
342 Matcher matcher = PSEUDO_LINE_BREAK.matcher(aText);
343 return matcher.replaceAll("<BR>");
344 }
345
346 private String addList(String aText){
347 Matcher matcher = PSEUDO_LIST.matcher(aText);
348 return matcher.replaceAll("<br> • "); //another version of bull has a non-standard number
349 }
350 }