Newer
Older
Import / svn / viewvc-1.1-dev / lib / vclib / ccvs / rcsparse / common.py
# -*-python-*-
#
# Copyright (C) 1999-2007 The ViewCVS Group. All Rights Reserved.
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE.html file which can be found at the top level of the ViewVC
# distribution or at http://viewvc.org/license-1.html.
#
# For more information, visit http://viewvc.org/
#
# -----------------------------------------------------------------------

"""common.py: common classes and functions for the RCS parsing tools."""

import calendar
import string

class Sink:
  def set_head_revision(self, revision):
    pass

  def set_principal_branch(self, branch_name):
    pass

  def set_access(self, accessors):
    pass

  def define_tag(self, name, revision):
    pass

  def set_locker(self, revision, locker):
    pass

  def set_locking(self, mode):
    """Used to signal locking mode.

    Called with mode argument 'strict' if strict locking
    Not called when no locking used."""

    pass

  def set_comment(self, comment):
    pass

  def set_expansion(self, mode):
    pass

  def admin_completed(self):
    pass

  def define_revision(self, revision, timestamp, author, state,
                      branches, next):
    pass

  def tree_completed(self):
    pass

  def set_description(self, description):
    pass

  def set_revision_info(self, revision, log, text):
    pass

  def parse_completed(self):
    pass


# --------------------------------------------------------------------------
#
# EXCEPTIONS USED BY RCSPARSE
#

class RCSParseError(Exception):
  pass


class RCSIllegalCharacter(RCSParseError):
  pass


class RCSExpected(RCSParseError):
  def __init__(self, got, wanted):
    RCSParseError.__init__(
        self,
        'Unexpected parsing error in RCS file.\n'
        'Expected token: %s, but saw: %s'
        % (wanted, got)
        )


class RCSStopParser(Exception):
  pass


# --------------------------------------------------------------------------
#
# STANDARD TOKEN STREAM-BASED PARSER
#

class _Parser:
  stream_class = None   # subclasses need to define this

  def _read_until_semicolon(self):
    """Read all tokens up to and including the next semicolon token.

    Return the tokens (not including the semicolon) as a list."""

    tokens = []

    while 1:
      token = self.ts.get()
      if token == ';':
        break
      tokens.append(token)

    return tokens

  def _parse_admin_head(self, token):
    rev = self.ts.get()
    if rev == ';':
      # The head revision is not specified.  Just drop the semicolon
      # on the floor.
      pass
    else:
      self.sink.set_head_revision(rev)
      self.ts.match(';')

  def _parse_admin_branch(self, token):
    branch = self.ts.get()
    if branch != ';':
      self.sink.set_principal_branch(branch)
      self.ts.match(';')

  def _parse_admin_access(self, token):
    accessors = self._read_until_semicolon()
    if accessors:
      self.sink.set_access(accessors)

  def _parse_admin_symbols(self, token):
    while 1:
      tag_name = self.ts.get()
      if tag_name == ';':
        break
      self.ts.match(':')
      tag_rev = self.ts.get()
      self.sink.define_tag(tag_name, tag_rev)

  def _parse_admin_locks(self, token):
    while 1:
      locker = self.ts.get()
      if locker == ';':
        break
      self.ts.match(':')
      rev = self.ts.get()
      self.sink.set_locker(rev, locker)

  def _parse_admin_strict(self, token):
    self.sink.set_locking("strict")
    self.ts.match(';')

  def _parse_admin_comment(self, token):
    self.sink.set_comment(self.ts.get())
    self.ts.match(';')

  def _parse_admin_expand(self, token):
    expand_mode = self.ts.get()
    self.sink.set_expansion(expand_mode)
    self.ts.match(';')

  admin_token_map = {
      'head' : _parse_admin_head,
      'branch' : _parse_admin_branch,
      'access' : _parse_admin_access,
      'symbols' : _parse_admin_symbols,
      'locks' : _parse_admin_locks,
      'strict' : _parse_admin_strict,
      'comment' : _parse_admin_comment,
      'expand' : _parse_admin_expand,
      'desc' : None,
      }

  def parse_rcs_admin(self):
    while 1:
      # Read initial token at beginning of line
      token = self.ts.get()

      try:
        f = self.admin_token_map[token]
      except KeyError:
        # We're done once we reach the description of the RCS tree
        if token[0] in string.digits:
          self.ts.unget(token)
          return
        else:
          # Chew up "newphrase"
          # warn("Unexpected RCS token: $token\n")
          pass
      else:
        if f is None:
          self.ts.unget(token)
          return
        else:
          f(self, token)

  def _parse_rcs_tree_entry(self, revision):
    # Parse date
    self.ts.match('date')
    date = self.ts.get()
    self.ts.match(';')

    # Convert date into timestamp
    date_fields = string.split(date, '.')
    # According to rcsfile(5): the year "contains just the last two
    # digits of the year for years from 1900 through 1999, and all the
    # digits of years thereafter".
    if len(date_fields[0]) == 2:
      date_fields[0] = '19' + date_fields[0]
    date_fields = map(string.atoi, date_fields)
    EPOCH = 1970
    if date_fields[0] < EPOCH:
      raise ValueError, 'invalid year'
    timestamp = calendar.timegm(tuple(date_fields) + (0, 0, 0,))

    # Parse author
    ### NOTE: authors containing whitespace are violations of the
    ### RCS specification.  We are making an allowance here because
    ### CVSNT is known to produce these sorts of authors.
    self.ts.match('author')
    author = ' '.join(self._read_until_semicolon())

    # Parse state
    self.ts.match('state')
    state = ''
    while 1:
      token = self.ts.get()
      if token == ';':
        break
      state = state + token + ' '
    state = state[:-1]   # toss the trailing space

    # Parse branches
    self.ts.match('branches')
    branches = self._read_until_semicolon()

    # Parse revision of next delta in chain
    self.ts.match('next')
    next = self.ts.get()
    if next == ';':
      next = None
    else:
      self.ts.match(';')

    # there are some files with extra tags in them. for example:
    #    owner	640;
    #    group	15;
    #    permissions	644;
    #    hardlinks	@configure.in@;
    # this is "newphrase" in RCSFILE(5). we just want to skip over these.
    while 1:
      token = self.ts.get()
      if token == 'desc' or token[0] in string.digits:
        self.ts.unget(token)
        break
      # consume everything up to the semicolon
      self._read_until_semicolon()

    self.sink.define_revision(revision, timestamp, author, state, branches,
                              next)

  def parse_rcs_tree(self):
    while 1:
      revision = self.ts.get()

      # End of RCS tree description ?
      if revision == 'desc':
        self.ts.unget(revision)
        return

      self._parse_rcs_tree_entry(revision)

  def parse_rcs_description(self):
    self.ts.match('desc')
    self.sink.set_description(self.ts.get())

  def parse_rcs_deltatext(self):
    while 1:
      revision = self.ts.get()
      if revision is None:
        # EOF
        break
      text, sym2, log, sym1 = self.ts.mget(4)
      if sym1 != 'log':
        print `text[:100], sym2[:100], log[:100], sym1[:100]`
        raise RCSExpected(sym1, 'log')
      if sym2 != 'text':
        raise RCSExpected(sym2, 'text')
      ### need to add code to chew up "newphrase"
      self.sink.set_revision_info(revision, log, text)

  def parse(self, file, sink):
    self.ts = self.stream_class(file)
    self.sink = sink

    self.parse_rcs_admin()

    # let sink know when the admin section has been completed
    self.sink.admin_completed()

    self.parse_rcs_tree()

    # many sinks want to know when the tree has been completed so they can
    # do some work to prep for the arrival of the deltatext
    self.sink.tree_completed()

    self.parse_rcs_description()
    self.parse_rcs_deltatext()

    # easiest for us to tell the sink it is done, rather than worry about
    # higher level software doing it.
    self.sink.parse_completed()

    self.ts = self.sink = None

# --------------------------------------------------------------------------