Source code for stilus.lexer

import re
from collections import deque

from .nodes.boolean import Boolean
from .nodes.color import RGBA
from .nodes.null import null
from .nodes.string import String
from .nodes.unit import Unit
from .nodes.comment import Comment
from .nodes.ident import Ident
from .nodes.literal import Literal


[docs]class Token:
[docs] def __init__(self, type, value=None, space=None, lineno=None, column=None): """A token.""" self.type = type self.value = value self.space = space self.anonymous = False self.lineno = lineno self.column = column
[docs] def __str__(self): """Return the value if available, the tokens type otherwise.""" if self.value: return f"{self.value}" else: return f"{self.type}"
[docs] def __repr__(self): """Return the value if available, the tokens type otherwise.""" return str(self)
def __key(self): return ( self.type, self.value, self.space, self.anonymous, self.lineno, self.column, )
[docs] def __hash__(self): return hash(self.__key())
[docs] def __eq__(self, other): if isinstance(other, Token): return self.__key() == other.__key() return False
[docs]class Lexer: alias = { "and": "&&", "or": "||", "is": "==", "isnt": "!=", "is not": "!=", ":=": "?=", }
[docs] def __init__(self, s: str, options: dict): self.stash = deque([]) self.indent_stack = deque([]) self.indent_re = None self.lineno = 1 self.column = 1 self.options = options self.prev = None self.is_url = False self.at_eos = False self.s = self.clean(s)
[docs] def __eq__(self, other): if isinstance(other, Lexer): return self.s == other.s and self.options == other.options return False
def clean(self, s: str) -> str: # handle UTF-8 BOM s = s[1:] if len(s) > 1 and "\uFEFF" == s[0] else s s = re.sub(r"\s+$", "\n", s) s = re.sub(r"\r\n?", "\n", s) s = re.sub(r"\\ *\n", "\r", s) def _comment(match): # TODO: this needs a rewrite m = match.group(0) value = match.groups()[0] offset = match.start(0) string = match.string in_comment = string.rfind("/*", 0, offset) > string.rfind( "*/", 0, offset ) comment_index = string.rfind("//", 0, offset) i = s.rfind("\n", 0, offset) double = 0 single = 0 if comment_index != -1 and comment_index > i: while i != offset: if "'" == s[i]: single = single - 1 if single else single + 1 if '"' == s[i]: double = double - 1 if double else double + 1 if "/" == s[i] and "/" == s[i + 1]: in_comment = not single and not double break i += 1 return m if in_comment else value + "\r" s = re.sub( r"([,(:](?!\/\/[^ ])) *(?:\/\/[^\n]*|\/\*.*?\*\/)?\n\s*", _comment, s, ) s = re.sub(r"\s*\n[ \t]*([,)])", _comment, s) return s def _skip_number(self, len): self.s = self.s[len:] self.column += len def _skip_string(self, str): self.s = self.s[len(str) :] self.move(str) def move(self, str): lines = str.count("\n") self.lineno += lines idx = str.rfind("\n") if idx == -1: self.column += len(str) else: self.column = len(str) - idx def is_part_of_selector(self): tok = self.stash[-1] if self.stash else self.prev if tok and tok.type == "color": return 2 == len(tok.value.raw) elif tok and (tok.type in [".", "["]): return True else: return False def next(self) -> Token: tok = self.stash.popleft() if len(self.stash) > 0 else self.advance() self.prev = tok self.at_eos = tok.type == "eos" return tok def peek(self): return self.lookahead(1) def lookahead(self, n): fetch = n - len(self.stash) while fetch > 0: fetch -= 1 self.stash.append(self.advance()) n -= 1 return self.stash[n] def __next__(self) -> Token: if self.at_eos: raise StopIteration return self.next() def __iter__(self): return self # todo: use a dict to clean this up? def advance(self): # noqa: C901 lineno = self.lineno column = self.column tok = self.eos() if tok: return self._tok_with_location(tok, lineno, column) tok = self.null() if tok: return self._tok_with_location(tok, lineno, column) tok = self.sep() if tok: return self._tok_with_location(tok, lineno, column) tok = self.keyword() if tok: return self._tok_with_location(tok, lineno, column) tok = self.urlchars() if tok: return self._tok_with_location(tok, lineno, column) tok = self.comment() if tok: return self._tok_with_location(tok, lineno, column) tok = self.newline() if tok: return self._tok_with_location(tok, lineno, column) tok = self.escaped() if tok: return self._tok_with_location(tok, lineno, column) tok = self.important() if tok: return self._tok_with_location(tok, lineno, column) tok = self.literal() if tok: return self._tok_with_location(tok, lineno, column) tok = self.anonymous_function() if tok: return self._tok_with_location(tok, lineno, column) tok = self.atrule() if tok: return self._tok_with_location(tok, lineno, column) tok = self.function() if tok: return self._tok_with_location(tok, lineno, column) tok = self.brace() if tok: return self._tok_with_location(tok, lineno, column) tok = self.paren() if tok: return self._tok_with_location(tok, lineno, column) tok = self.color() if tok: return self._tok_with_location(tok, lineno, column) tok = self.string() if tok: return self._tok_with_location(tok, lineno, column) tok = self.unit() if tok: return self._tok_with_location(tok, lineno, column) tok = self.namedop() if tok: return self._tok_with_location(tok, lineno, column) tok = self.boolean() if tok: return self._tok_with_location(tok, lineno, column) tok = self.unicode() if tok: return self._tok_with_location(tok, lineno, column) tok = self.ident() if tok: return self._tok_with_location(tok, lineno, column) tok = self.op() if tok: return self._tok_with_location(tok, lineno, column) tok = self.eol() if tok: return self._tok_with_location(tok, lineno, column) tok = self.space() if tok: return self._tok_with_location(tok, lineno, column) tok = self.selector() if tok: return self._tok_with_location(tok, lineno, column) return None
[docs] def eos(self): """ eos | trailing outdents """ if self.s != "": return False if self.indent_stack: self.indent_stack.popleft() return Token("outdent") else: return Token("eos")
[docs] def null(self): """ null """ match = re.match(r"^(null)\b[ \t]*", self.s) if match: self._skip_string(match.group()) if self.is_part_of_selector(): return Token("ident", Ident(match.group(0))) else: return Token("null", value=null)
[docs] def newline(self): """ '\n' ' '+ """ if self.indent_re: match = re.match(self.indent_re, self.s) # figure out if we are using tabs or spaces else: # try tabs first possible_re = r"^\n([\t]*)[ \t]*" match = re.match(possible_re, self.s) if match and not match.group(1): # try spaces next possible_re = r"^\n([ \t]*)" match = re.match(possible_re, self.s) # established if match and match.group(1): self.indent_re = possible_re if match: indents = len(match.group(1)) self._skip_string(match.group(0)) if self.s and self.s[0] in [" ", "\t"]: raise SyntaxError( "Invalid indentation. You can use tabs or " "spaces to indent, but not both." ) # blank line if self.s and self.s[0] == "\n": return self.advance() # outdent if self.indent_stack and indents < self.indent_stack[0]: while self.indent_stack and self.indent_stack[0] > indents: self.stash.append(Token("outdent")) self.indent_stack.popleft() tok = self.stash.pop() # indent elif indents and ( len(self.indent_stack) == 0 or indents != self.indent_stack[0] ): self.indent_stack.appendleft(indents) tok = Token("indent") # newline else: tok = Token("newline") return tok return False
[docs] def selector(self): """ ^|[^\n,;]+ """ match = re.match(r"^\^|.*?(?=\/\/(?![^\[]*\])|[,\n{])", self.s) if match and match.group(0): selector = match.group(0) self._skip_string(selector) return Token("selector", selector)
[docs] def space(self): """ ' '+ | '\t'+ """ match = re.match(r"^([ \t]+)", self.s) if match: self._skip_string(match.group(0)) return Token("space")
[docs] def eol(self): """ '\r' """ if self.s and self.s[0] == "\r": self.lineno += 1 self._skip_number(1) return self.advance()
[docs] def op(self): """ ',' | '+' | '+=' | '-' | '-=' | '*' | '*=' | '/' | '/=' | '%' | '%=' | '**' | '!' | '&' | '&&' | '||' | '>' | '>=' | '<' | '<=' | '=' | '==' | '!=' | '!' | '~' | '?=' | ':=' | '?' | ':' | '[' | ']' | '.' | '..' |'...' """ match = re.match( r"^([.]{1,3}|&&|\|\||[!<>=?:]=|\*\*|[-+*\/%]=?|" r"[,=?:!~<>&\[\]])([ \t]*)", self.s, ) if match: op = match.group(1) self._skip_string(match.group(0)) op = self.alias.get(op, op) tok = Token(op, op) tok.space = match.group(2) self.is_url = False return tok
[docs] def sep(self): """ ';' [ \t]* """ match = re.match(r"^;[ \t]*", self.s) if match: self._skip_string(match.group(0)) return Token(";")
[docs] def keyword(self): """ 'if' | 'else' | 'unless' | 'return' | 'for' | 'in' """ match = re.match(r"^(return|if|else|unless|for|in)\b[ \t]*", self.s) if match: keyword = match.group(1) self._skip_string(match.group(0)) if self.is_part_of_selector(): tok = Token("ident", Ident(match.group(0))) else: tok = Token(keyword, keyword) return tok
[docs] def urlchars(self): """ url char """ if not self.is_url: return match = re.match(r"^[\/:@.;?&=*!,<>#%0-9]+", self.s) if match: self._skip_string(match.group(0)) return Token("literal", Literal(match.group(0)))
[docs] def comment(self): """ '//' * """ # single line if ( self.s and len(self.s) > 2 and "/" == self.s[0] and "/" == self.s[1] ): end = self.s.find("\n") if -1 == end: end = len(self.s) self._skip_number(end) return self.advance() # multi-line if ( self.s and len(self.s) > 2 and "/" == self.s[0] and "*" == self.s[1] ): end = self.s.find("*/") suppress = True inline = False s = self.s[0 : end + 2] lines = len(re.split(r"[\n|\r]", s)) - 1 self.lineno += lines self._skip_number(end + 2) # output if "!" == s[2]: s = s.replace("*!", "*") suppress = False if self.prev and ";" == self.prev.type: inline = True return Token("comment", Comment(s, suppress, inline))
[docs] def escaped(self): """ '\\' . ' '* """ match = re.match(r"^\\(.)[ \t]*", self.s) if match: escape = match.group(1) self._skip_string(match.group(0)) if match.group(0).startswith("\\."): escape = "\\." return Token("ident", Literal(escape))
[docs] def important(self): """ '!important' ' '* """ match = re.match(r"^!important[ \t]*", self.s) if match: self._skip_string(match.group(0)) return Token("ident", Literal("!important"))
[docs] def literal(self): """ '@css' ' '* '{' .* '}' ' '* """ match = re.match(r"^@css[ \t]*{", self.s) if match: self._skip_string(match.group(0)) braces = 1 css = "" while True: c = self.s[0] self.s = self.s[1:] if c == "{": braces += 1 elif c == "}": braces -= 1 elif c in ["\n", "\r"]: self.lineno += 1 css += c if not braces: break css = re.sub(r"\s*}$", "", css) return Token("literal", Literal(css, css=True))
[docs] def anonymous_function(self): """ '@(' """ if "@" == self.s[0] and "(" == self.s[1]: self._skip_number(2) tok = Token("function", Ident("anonymous")) tok.anonymous = True return tok
[docs] def atrule(self): r""" # '@' (-(\w+)-)?[a-zA-Z0-9-_]+ """ match = re.match(r"^@(?:-(\w+)-)?([a-zA-Z0-9-_]+)[ \t]*", self.s) if match: self._skip_string(match.group(0)) vendor = match.group(1) type = match.group(2) if type in [ "require", "import", "charset", "namespace", "media", "scope", "supports", ]: return Token(type) elif type == "document": return Token("-moz-document") elif type == "block": return Token("atblock") elif type in ["extend", "extends"]: return Token("extend") elif type == "keyframes": return Token(type, vendor) else: return Token("atrule", f"-{vendor}-{type}" if vendor else type)
[docs] def function(self): r""" -*[_a-zA-Z$] [-\w\d$]* '(' """ match = re.match(r"^(-*[_a-zA-Z$][-\w\d$]*)\(([ \t]*)", self.s) if match: name = match.group(1) self._skip_string(match.group(0)) self.is_url = "url" == name tok = Token("function", Ident(name)) tok.space = match.group(2) return tok
[docs] def brace(self): """ '{' | '}' """ match = re.match(r"^([{}])", self.s) if match: self._skip_number(1) brace = match.group(1) return Token(brace, brace)
[docs] def paren(self): """ '(' | ')' ' '* """ match = re.match(r"^([()])([ \t]*)", self.s) if match: paren = match.group(1) self._skip_string(match.group(0)) if ")" == paren: self.is_url = False tok = Token(paren, paren) tok.space = match.group(2) return tok
[docs] def rrggbbaa(self): """ #rrggbbaa """ match = re.match(r"^#([a-fA-F0-9]{8})[ \t]*", self.s) if match: self._skip_string(match.group(0)) rgb = match.group(1) r = int(rgb[0:2], 16) g = int(rgb[2:4], 16) b = int(rgb[4:6], 16) a = int(rgb[6:8], 16) color = RGBA(r, g, b, a / 255) color.raw = match.group(0) return Token("color", color)
[docs] def rrggbb(self): """ #rrggbb """ match = re.match(r"^#([a-fA-F0-9]{6})[ \t]*", self.s) if match: self._skip_string(match.group(0)) rgb = match.group(1) r = int(rgb[0:2], 16) g = int(rgb[2:4], 16) b = int(rgb[4:6], 16) color = RGBA(r, g, b, 1) color.raw = match.group(0) return Token("color", color)
[docs] def rgba(self): """ #rgba """ match = re.match(r"^#([a-fA-F0-9]{4})[ \t]*", self.s) if match: self._skip_string(match.group(0)) rgb = match.group(1) r = int(f"{rgb[0]}{rgb[0]}", 16) g = int(f"{rgb[1]}{rgb[1]}", 16) b = int(f"{rgb[2]}{rgb[2]}", 16) a = int(f"{rgb[3]}{rgb[3]}", 16) color = RGBA(r, g, b, a / 255) color.raw = match.group(0) return Token("color", color)
[docs] def rgb(self): """ #rgb """ match = re.match(r"^#([a-fA-F0-9]{3})[ \t]*", self.s) if match: self._skip_string(match.group(0)) rgb = match.group(1) r = int(f"{rgb[0]}{rgb[0]}", 16) g = int(f"{rgb[1]}{rgb[1]}", 16) b = int(f"{rgb[2]}{rgb[2]}", 16) color = RGBA(r, g, b, 1) color.raw = match.group(0) return Token("color", color)
[docs] def nn(self): """ #nn """ match = re.match(r"^#([a-fA-F0-9]{2})[ \t]*", self.s) if match: self._skip_string(match.group(0)) n = int(match.group(1), 16) color = RGBA(n, n, n, 1) color.raw = match.group(0) return Token("color", color)
[docs] def n(self): """ #n """ match = re.match(r"^#([a-fA-F0-9]{1})[ \t]*", self.s) if match: self._skip_string(match.group(0)) n = int(f"{match.group(1)}{match.group(1)}", 16) color = RGBA(n, n, n, 1) color.raw = match.group(0) return Token("color", color)
def color(self): tok = self.rrggbbaa() if tok: return tok tok = self.rrggbb() if tok: return tok tok = self.rgba() if tok: return tok tok = self.rgb() if tok: return tok tok = self.nn() if tok: return tok tok = self.n() if tok: return tok
[docs] def string(self): """ '"' [^"]+ '"' | "'"" [^']+ "'" """ match = re.match(r'^("[^"]*"|\'[^\']*\')[ \t]*', self.s) if match: self._skip_string(match.group(0)) string = match.group(0).strip()[1:-1] quote = match.group(0)[0] return Token("string", String(string, quote))
[docs] def unit(self): """ '-'? (digit+ | digit* '.' digit+) unit """ match = re.match( r"^(-)?(\d+\.\d+|\d+|\.\d+)(%|[a-zA-Z]+)?[ \t]*", self.s ) if match: self._skip_string(match.group(0)) n = float(match.group(2)) if match.group(1) == "-": n = -n unit = Unit(n, match.group(3)) unit.raw = match.group(0) return Token("unit", unit)
[docs] def namedop(self): """ 'not' | 'and' | 'or' | 'is' | 'is not' | 'isnt' | 'is a' | 'is defined' """ match = re.match( r"^(not|and|or|is a|is defined|isnt|is not|is)" r"(?!-)\b([ \t]*)", self.s, ) if match: self._skip_string(match.group(0)) op = match.group(1) if self.is_part_of_selector(): tok = Token("ident", Ident(match.group(0))) else: op = self.alias.get(op, op) tok = Token(op, op) tok.space = match.group(2) return tok
[docs] def boolean(self): """ true | false """ match = re.match(r"^(true|false)\b([ \t]*)", self.s) if match: self._skip_string(match.group(0)) val = Boolean("true" == match.group(1)) tok = Token("boolean", val) tok.space = match.group(2) return tok
[docs] def unicode(self): """ 'U+' [0-9A-Fa-f?]{1,6}(?:-[0-9A-Fa-f]{1,6})? """ match = re.match( r"^u\+[0-9a-f?]{1,6}(?:-[0-9a-f]{1,6})?", self.s, flags=re.IGNORECASE, ) if match: self._skip_string(match.group(0)) return Token("literal", Literal(match.group(0)))
[docs] def ident(self): r""" -*[_a-zA-Z$] [-\w\d$]* """ match = re.match(r"^-*[_a-zA-Z$][-\w\d$]*", self.s) if match: self._skip_string(match.group(0)) return Token("ident", Ident(match.group(0)))
def _tok_with_location(self, tok, lineno, column): tok.lineno = lineno tok.column = column return tok