Cutelee  6.1.0
lexer.cpp
1 /*
2  This file is part of the Cutelee template system.
3 
4  Copyright (c) 2009,2010,2011 Stephen Kelly <steveire@gmail.com>
5 
6  This library is free software; you can redistribute it and/or
7  modify it under the terms of the GNU Lesser General Public
8  License as published by the Free Software Foundation; either version
9  2.1 of the Licence, or (at your option) any later version.
10 
11  This library is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  Lesser General Public License for more details.
15 
16  You should have received a copy of the GNU Lesser General Public
17  License along with this library. If not, see <http://www.gnu.org/licenses/>.
18 
19 */
20 
21 #include "lexer_p.h"
22 
23 using namespace Cutelee;
24 
25 typedef State<TextProcessingMachine::Type> TextProcessingState;
26 typedef TextProcessingMachine::Transition TextProcessingTransition;
27 
28 typedef LexerObject<TextProcessingState, NullTest, MarksClearer> ChurningState;
29 typedef LexerObject<TextProcessingState, NullTest, TokenFinalizer>
30  FinalizeTokenState;
31 typedef LexerObject<TextProcessingTransition, NullTest, TokenFinalizer>
32  EofHandler;
33 typedef LexerObject<TextProcessingTransition, NullTest,
34  TokenFinalizerWithTrimming>
35  EofHandlerWithTrimming;
36 
37 typedef CharacterTransition<'{'> MaybeTemplateSyntaxHandler;
38 
39 typedef CharacterTransition<'%', MarkStartSyntax> TagStartHandler;
40 typedef CharacterTransition<'#', MarkStartSyntax> CommentStartHandler;
41 typedef CharacterTransition<'%'> TagEndHandler;
42 typedef CharacterTransition<'#'> CommentEndHandler;
43 typedef CharacterTransition<'{', MarkStartSyntax> BeginValueHandler;
44 typedef CharacterTransition<'}'> MaybeEndValueHandler;
45 typedef CharacterTransition<'\n', MarkNewline> NewlineHandler;
46 typedef CharacterTransition<'}', MarkEndSyntax> EndTemplateSyntaxHandler;
47 typedef NegateCharacterTransition<'}'> NotEndTemplateSyntaxHandler;
48 
49 typedef LexerObject<
50  TextProcessingTransition,
51  Negate<OrTest<CharacterTest<'{'>,
52  OrTest<CharacterTest<'#'>, CharacterTest<'%'>>>>>
53  NotBeginTemplateSyntaxHandler;
54 
55 typedef LexerObject<
56  TextProcessingTransition,
57  Negate<OrTest<CharacterTest<'{'>,
58  OrTest<CharacterTest<'#'>,
59  OrTest<CharacterTest<'%'>, CharacterTest<'\n'>>>>>>
60  NotBeginTemplateSyntaxOrNewlineHandler;
61 
62 typedef LexerObject<
63  TextProcessingTransition,
64  Negate<OrTest<CharacterTest<'#'>,
65  OrTest<CharacterTest<'%'>, CharacterTest<'\n'>>>>>
66  NotTagCommentOrNewlineHandler;
67 
68 typedef LexerObject<TextProcessingTransition,
69  Negate<OrTest<IsSpace, CharacterTest<'{'>>>>
70  NonWhitespaceLineTextHandler;
71 
72 typedef LexerObject<TextProcessingTransition,
73  AndTest<Negate<CharacterTest<'\n'>>, IsSpace>>
74  WhitespaceNonNewlineHandler;
75 
76 typedef LexerObject<TextProcessingTransition,
77  Negate<OrTest<CharacterTest<'{'>, IsSpace>>, TokenFinalizer>
78  FinalizingLineTextHandler;
79 
80 typedef CharacterTransition<'\n', TokenFinalizerWithTrimmingAndNewline>
81  SyntaxBoundaryNewlineHandler;
82 typedef CharacterTransition<'{', FinalizeAndMarkStartSyntax>
83  SyntaxBoundaryHandler;
84 
85 template <typename Transition>
86 void addTransition(TextProcessingState *source, Lexer *lexer,
87  TextProcessingState *target)
88 {
89  auto tr = new Transition(lexer, source);
90  tr->setTargetState(target);
91 }
92 
93 TextProcessingMachine *createMachine(Lexer *lexer, Lexer::TrimType type)
94 {
95  auto machine = new TextProcessingMachine;
96 
97  auto notFinished = new TextProcessingState(machine);
98  auto finished = new TextProcessingState(machine);
99  machine->setInitialState(notFinished);
100 
101  auto processingText = new ChurningState(lexer, notFinished);
102  auto processingPostNewline = new TextProcessingState(notFinished);
103  auto processingBeginTemplateSyntax = new TextProcessingState(notFinished);
104  auto processingTag = new TextProcessingState(notFinished);
105  auto processingComment = new TextProcessingState(notFinished);
106  auto processingValue = new TextProcessingState(notFinished);
107  auto maybeProcessingValue = new TextProcessingState(notFinished);
108  auto processingEndTag = new TextProcessingState(notFinished);
109  auto processingEndComment = new TextProcessingState(notFinished);
110  auto processingEndValue = new TextProcessingState(notFinished);
111  TextProcessingState *processingPostTemplateSyntax;
112 
113  if (type == Lexer::SmartTrim)
114  processingPostTemplateSyntax = new TextProcessingState(notFinished);
115  else
116  processingPostTemplateSyntax = new FinalizeTokenState(lexer, notFinished);
117  auto processingPostTemplateSyntaxWhitespace
118  = new TextProcessingState(notFinished);
119 
120  if (type == Lexer::SmartTrim)
121  notFinished->setInitialState(processingPostNewline);
122  else
123  notFinished->setInitialState(processingText);
124 
125  if (type == Lexer::SmartTrim) {
126  addTransition<NewlineHandler>(processingText, lexer, processingPostNewline);
127 
128  addTransition<NewlineHandler>(processingPostNewline, lexer,
129  processingPostNewline);
130  addTransition<MaybeTemplateSyntaxHandler>(processingPostNewline, lexer,
131  processingBeginTemplateSyntax);
132  addTransition<NonWhitespaceLineTextHandler>(processingPostNewline, lexer,
133  processingText);
134  }
135  addTransition<MaybeTemplateSyntaxHandler>(processingText, lexer,
136  processingBeginTemplateSyntax);
137 
138  addTransition<TagStartHandler>(processingBeginTemplateSyntax, lexer,
139  processingTag);
140  addTransition<CommentStartHandler>(processingBeginTemplateSyntax, lexer,
141  processingComment);
142  addTransition<BeginValueHandler>(processingBeginTemplateSyntax, lexer,
143  maybeProcessingValue);
144 
145  if (type == Lexer::SmartTrim) {
146  addTransition<NotBeginTemplateSyntaxOrNewlineHandler>(
147  processingBeginTemplateSyntax, lexer, processingText);
148  addTransition<NewlineHandler>(processingBeginTemplateSyntax, lexer,
149  processingPostNewline);
150  } else {
151  addTransition<NotBeginTemplateSyntaxHandler>(processingBeginTemplateSyntax,
152  lexer, processingText);
153  }
154 
155  addTransition<NewlineHandler>(processingTag, lexer,
156  type == Lexer::SmartTrim ? processingPostNewline
157  : processingText);
158  addTransition<TagEndHandler>(processingTag, lexer, processingEndTag);
159 
160  addTransition<NewlineHandler>(processingComment, lexer,
161  type == Lexer::SmartTrim ? processingPostNewline
162  : processingText);
163  addTransition<CommentEndHandler>(processingComment, lexer,
164  processingEndComment);
165 
166  addTransition<TagStartHandler>(maybeProcessingValue, lexer, processingTag);
167  addTransition<CommentStartHandler>(maybeProcessingValue, lexer,
168  processingComment);
169  addTransition<NotTagCommentOrNewlineHandler>(maybeProcessingValue, lexer,
170  processingValue);
171  addTransition<NewlineHandler>(maybeProcessingValue, lexer,
172  type == Lexer::SmartTrim ? processingPostNewline
173  : processingText);
174 
175  addTransition<NewlineHandler>(processingValue, lexer,
176  type == Lexer::SmartTrim ? processingPostNewline
177  : processingText);
178  addTransition<MaybeEndValueHandler>(processingValue, lexer,
179  processingEndValue);
180 
181  addTransition<NewlineHandler>(processingEndTag, lexer, processingPostNewline);
182  addTransition<NotEndTemplateSyntaxHandler>(processingEndTag, lexer,
183  processingTag);
184  addTransition<EndTemplateSyntaxHandler>(processingEndTag, lexer,
185  processingPostTemplateSyntax);
186 
187  addTransition<NewlineHandler>(processingEndComment, lexer,
188  processingPostNewline);
189  addTransition<NotEndTemplateSyntaxHandler>(processingEndComment, lexer,
190  processingComment);
191  addTransition<EndTemplateSyntaxHandler>(processingEndComment, lexer,
192  processingPostTemplateSyntax);
193 
194  addTransition<NewlineHandler>(processingEndValue, lexer,
195  processingPostNewline);
196  addTransition<NotEndTemplateSyntaxHandler>(processingEndValue, lexer,
197  processingValue);
198  addTransition<EndTemplateSyntaxHandler>(processingEndValue, lexer,
199  processingPostTemplateSyntax);
200 
201  if (type != Lexer::SmartTrim) {
202  processingPostTemplateSyntax->setUnconditionalTransition(processingText);
203  } else {
204  addTransition<SyntaxBoundaryNewlineHandler>(processingPostTemplateSyntax,
205  lexer, processingPostNewline);
206  addTransition<WhitespaceNonNewlineHandler>(
207  processingPostTemplateSyntax, lexer,
208  processingPostTemplateSyntaxWhitespace);
209  addTransition<FinalizingLineTextHandler>(processingPostTemplateSyntax,
210  lexer, processingText);
211  addTransition<SyntaxBoundaryHandler>(processingPostTemplateSyntax, lexer,
212  processingBeginTemplateSyntax);
213 
214  // NOTE: We only have to transition to this if there was whitespace
215  // before the opening tag. Maybe store that in an external state property?
216  // Actually, this may be a bug if we try to finalize with trimming and
217  // there is no leading whitespace.
218  addTransition<SyntaxBoundaryNewlineHandler>(
219  processingPostTemplateSyntaxWhitespace, lexer, processingPostNewline);
220  addTransition<FinalizingLineTextHandler>(
221  processingPostTemplateSyntaxWhitespace, lexer, processingText);
222  addTransition<SyntaxBoundaryHandler>(processingPostTemplateSyntaxWhitespace,
223  lexer, processingBeginTemplateSyntax);
224  }
225 
226  {
227  auto handler = new EofHandler(lexer, notFinished);
228  handler->setTargetState(finished);
229  notFinished->setEndTransition(handler);
230  }
231 
232  if (type == Lexer::SmartTrim) {
233  {
234  auto handler = new EofHandlerWithTrimming(
235  lexer, processingPostTemplateSyntaxWhitespace);
236  handler->setTargetState(finished);
237  processingPostTemplateSyntaxWhitespace->setEndTransition(handler);
238  }
239  {
240  auto handler
241  = new EofHandlerWithTrimming(lexer, processingPostTemplateSyntax);
242  handler->setTargetState(finished);
243  processingPostTemplateSyntax->setEndTransition(handler);
244  }
245  }
246  return machine;
247 }
248 
249 Lexer::Lexer(const QString &templateString) : m_templateString(templateString)
250 {
251 }
252 
253 Lexer::~Lexer() {}
254 
255 void Lexer::clearMarkers()
256 {
257  m_startSyntaxPosition = -1;
258  m_endSyntaxPosition = -1;
259  m_newlinePosition = -1;
260 }
261 
262 void Lexer::reset()
263 {
264  m_tokenList.clear();
265  m_lineCount = 0;
266  m_upto = 0;
267  m_processedUpto = 0;
268  clearMarkers();
269 }
270 
271 QList<Token> Lexer::tokenize(TrimType type)
272 {
273  auto machine = createMachine(this, type);
274 
275  machine->start();
276 
277  auto it = m_templateString.constBegin();
278  const auto end = m_templateString.constEnd();
279 
280  reset();
281  for (; it != end; ++it, ++m_upto)
282  machine->processCharacter(it);
283 
284  machine->finished();
285 
286  machine->stop();
287 
288  delete machine;
289 
290  return m_tokenList;
291 }
292 
293 void Lexer::markStartSyntax() { m_startSyntaxPosition = m_upto; }
294 
295 void Lexer::markEndSyntax() { m_endSyntaxPosition = m_upto + 1; }
296 
297 void Lexer::markNewline()
298 {
299  m_newlinePosition = m_upto;
300  ++m_lineCount;
301 }
302 
303 void Lexer::finalizeToken()
304 {
305  auto nextPosition = m_upto;
306  const auto validSyntax = m_endSyntaxPosition > m_startSyntaxPosition
307  && (m_startSyntaxPosition >= m_processedUpto);
308 
309  if (validSyntax) {
310  Q_ASSERT(m_startSyntaxPosition >= 0);
311  nextPosition = m_startSyntaxPosition - 1;
312  }
313  finalizeToken(nextPosition, validSyntax);
314 }
315 
316 void Lexer::finalizeTokenWithTrimmedWhitespace()
317 {
318  auto nextPosition = m_upto;
319  // We know this to be true because the state machine has already guaranteed
320  // it. This method is only called from transition and state actions which
321  // occur after valid syntax.
322  // TODO Investigate performance and other implications of changing the state
323  // machine to assure similar in finalizeToken()
324  Q_ASSERT(m_endSyntaxPosition > m_startSyntaxPosition);
325 
326  Q_ASSERT(m_startSyntaxPosition >= 0);
327  if (m_newlinePosition >= 0 && m_newlinePosition >= m_processedUpto)
328  nextPosition = qMin(m_startSyntaxPosition - 1, m_newlinePosition);
329  else
330  nextPosition = m_startSyntaxPosition - 1;
331  finalizeToken(nextPosition, true);
332 }
333 
334 void Lexer::finalizeToken(int nextPosition, bool processSyntax)
335 {
336  {
337  Token token;
338  token.content
339  = m_templateString.mid(m_processedUpto, nextPosition - m_processedUpto);
340  token.tokenType = TextToken;
341  token.linenumber = m_lineCount;
342  m_tokenList.append(token);
343  }
344 
345  m_processedUpto = nextPosition;
346 
347  if (!processSyntax)
348  return;
349 
350  m_processedUpto = m_endSyntaxPosition;
351 
352  const auto differentiator
353  = *(m_templateString.constData() + m_startSyntaxPosition);
354  if (differentiator == QLatin1Char('#'))
355  return;
356 
357  Token syntaxToken;
358  syntaxToken.content
359  = m_templateString
360  .mid(m_startSyntaxPosition + 1,
361  m_endSyntaxPosition - m_startSyntaxPosition - 3)
362  .trimmed();
363  syntaxToken.linenumber = m_lineCount;
364 
365  if (differentiator == QLatin1Char('{')) {
366  syntaxToken.tokenType = VariableToken;
367  } else {
368  Q_ASSERT(differentiator == QLatin1Char('%'));
369  syntaxToken.tokenType = BlockToken;
370  }
371  m_tokenList.append(syntaxToken);
372 }
The Cutelee namespace holds all public Cutelee API.
Definition: Mainpage.dox:8
@ BlockToken
The Token is a block, ie, part of a tag.
Definition: token.h:37
@ TextToken
The Token is a text fragment.
Definition: token.h:35
@ VariableToken
The Token is a variable node.
Definition: token.h:36
int linenumber
The line number this Token starts at.
Definition: token.h:50
QString content
The content of this Token.
Definition: token.h:51
int tokenType
The Type of this Token.
Definition: token.h:49