Logo Search packages:      
Sourcecode: jade version File versions  Download package

ExtendEntityManager.cxx

// Copyright (c) 1994, 1995, 1996 James Clark
// See the file COPYING for copying permission.

#ifdef __GNUG__
#pragma implementation
#endif

#include "splib.h"
#include "ExtendEntityManager.h"
#include "Message.h"
#include "MessageArg.h"
#include "OffsetOrderedList.h"
#include "rtti.h"
#include "StorageManager.h"
#include "Vector.h"
#include "NCVector.h"
#include "Owner.h"
#include "constant.h"
#include "EntityManagerMessages.h"
#include "StorageObjectPosition.h"
#include "Owner.h"
#include "CodingSystem.h"
#include "CodingSystemKit.h"
#include "InputSource.h"
#include "Mutex.h"
#include "macros.h"
#include "EntityCatalog.h"
#include "CharMap.h"

#include <stddef.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <stdio.h>

#ifdef DECLARE_MEMMOVE
extern "C" {
  void *memmove(void *, const void *, size_t);
}
#endif

#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif

const char EOFCHAR = '\032';  // Control-Z

class ExternalInputSource;

class EntityManagerImpl : public ExtendEntityManager {
public:
  EntityManagerImpl(StorageManager *defaultStorageManager,
                const InputCodingSystem *defaultCodingSystem,
                const ConstPtr<InputCodingSystemKit> &,
                Boolean internalCharsetIsDocCharset);
  void setCatalogManager(CatalogManager *catalogManager);
  void registerStorageManager(StorageManager *);
  InputSource *open(const StringC &sysid,
                const CharsetInfo &,
                InputSourceOrigin *,
                unsigned flags,
                Messenger &);
  const CharsetInfo &charset() const;
  Boolean internalCharsetIsDocCharset() const;
  ConstPtr<EntityCatalog> makeCatalog(StringC &systemId,
                              const CharsetInfo &charset,
                              Messenger &mgr);
  Boolean expandSystemId(const StringC &,
                   const Location &,
                   Boolean isNdata,
                   const CharsetInfo &,
                   const StringC *,
                   Messenger &,
                   StringC &);
  Boolean mergeSystemIds(const Vector<StringC> &,
                   Boolean mapCatalogDocument,
                   const CharsetInfo &,
                   Messenger &mgr,
                   StringC &) const;
  StorageManager *lookupStorageType(const StringC &, const CharsetInfo &) const;
  StorageManager *lookupStorageType(const char *) const;
  StorageManager *guessStorageType(const StringC &, const CharsetInfo &) const;
  const InputCodingSystem *lookupCodingSystem(const StringC &,
                                    const CharsetInfo &,
                                    Boolean isBctf,
                                    const char *&) const;
  Boolean resolveSystemId(const StringC &str,
                    const CharsetInfo &idCharset,
                    Messenger &mgr,
                    const Location &defLocation,
                    Boolean isNdata,
                    ParsedSystemId &parsedSysid) const;
  Boolean parseSystemId(const StringC &str,
                  const CharsetInfo &idCharset,
                  Boolean isNdata,
                  const StorageObjectLocation *def,
                  Messenger &mgr,
                  ParsedSystemId &parsedSysid) const;
  const CharsetInfo &internalCharset(const CharsetInfo &docCharset) const {
    if (internalCharsetIsDocCharset_)
      return docCharset;
    else
      return charset();
  }
private:
  EntityManagerImpl(const EntityManagerImpl &); // undefined
  void operator=(const EntityManagerImpl &); // undefined
  static Boolean defLocation(const Location &, StorageObjectLocation &);
  static Boolean matchKey(const StringC &type, const char *s,
                    const CharsetInfo &internalCharset);
  NCVector<Owner<StorageManager> > storageManagers_;
  Owner<StorageManager> defaultStorageManager_;
  const InputCodingSystem *defaultCodingSystem_;
  Owner<CatalogManager> catalogManager_;
  Boolean internalCharsetIsDocCharset_;
  ConstPtr<InputCodingSystemKit> codingSystemKit_;
  friend class FSIParser;
};

class ExternalInfoImpl : public ExternalInfo {
  RTTI_CLASS
public:
  ExternalInfoImpl(ParsedSystemId &parsedSysid);
  const StorageObjectSpec &spec(size_t i) const;
  size_t nSpecs() const;
  const ParsedSystemId &parsedSystemId() const;
  void noteRS(Offset);
  void noteStorageObjectEnd(Offset);
  void noteInsertedRSs();
  void setDecoder(size_t i, Decoder *);
  void setId(size_t i, StringC &);
  void getId(size_t i, StringC &) const;
  Boolean convertOffset(Offset, StorageObjectLocation &) const;
private:
  ParsedSystemId parsedSysid_;
  NCVector<StorageObjectPosition> position_;
  size_t currentIndex_;
  // list of inserted RSs
  OffsetOrderedList rsList_;
  Boolean notrack_;
  Mutex mutex_;
};

class ExternalInputSource : public InputSource {
public:
  ExternalInputSource(ParsedSystemId &parsedSysid,
                  const CharsetInfo &internalCharset,
                  const CharsetInfo &docCharset,
                  Boolean internalCharsetIsDocCharset,
                  Char replacementChar,
                  InputSourceOrigin *origin,
                  unsigned flags);
  void pushCharRef(Char, const NamedCharRef &);
  ~ExternalInputSource();
private:
  Xchar fill(Messenger &);
  Boolean rewind(Messenger &);
  void willNotRewind();
  void setDocCharset(const CharsetInfo &, const CharsetInfo &);
  void willNotSetDocCharset();

  void init();
  void noteRS();
  void noteRSAt(const Char *);
  void reallocateBuffer(size_t size);
  void insertChar(Char);
  void buildMap(const CharsetInfo &internalCharset,
            const CharsetInfo &docCharset);
  void buildMap1(const CharsetInfo &, const CharsetInfo &);
  static const Char *findNextCr(const Char *start, const Char *end);
  static const Char *findNextLf(const Char *start, const Char *end);
  static const Char *findNextCrOrLf(const Char *start, const Char *end);

  ExternalInfoImpl *info_;
  Char *buf_;
  const Char *bufLim_;
  Offset bufLimOffset_;
  size_t bufSize_;
  size_t readSize_;
  NCVector<Owner<StorageObject> > sov_;
  StorageObject *so_;
  size_t soIndex_;
  Boolean insertRS_;
  Decoder *decoder_;
  const char *leftOver_;
  size_t nLeftOver_;
  Boolean mayRewind_;
  Boolean maySetDocCharset_;
  Boolean mayNotExist_;
  enum RecordType {
    unknown,
    crUnknown,
    crlf,
    lf,
    cr,
    asis
    };
  RecordType recordType_;
  Boolean zapEof_;
  Boolean internalCharsetIsDocCharset_;
  Char replacementChar_;
  Ptr<CharMapResource<Unsigned32> > map_;
};

class FSIParser {
public:
  FSIParser(const StringC &, const CharsetInfo &idCharset,
          Boolean isNdata,
          const StorageObjectLocation *defLoc,
          const EntityManagerImpl *em,
          Messenger &mgr);
  Boolean parse(ParsedSystemId &parsedSysid);
  static const char *recordsName(StorageObjectSpec::Records records);
  struct RecordType {
    const char *name;
    StorageObjectSpec::Records value;
  };
private:
  Boolean handleInformal(size_t startIndex, ParsedSystemId &parsedSysid);
  Boolean convertId(StringC &, Xchar smcrd, const StorageManager *);
  Xchar get();
  void unget();
  StorageManager *lookupStorageType(const StringC &key, Boolean &neutral);
  Boolean matchKey(const StringC &, const char *);
  Boolean matchChar(Xchar, char);
  Boolean isS(Xchar);
  Boolean convertDigit(Xchar c, int &weight);
  void uncharref(StringC &);
  Boolean setAttributes(StorageObjectSpec &sos, Boolean neutral,
                  Xchar &smcrd, Boolean &fold);
  Boolean setCatalogAttributes(ParsedSystemId &parsedSysid);
  void setDefaults(StorageObjectSpec &sos);
  Boolean parseAttribute(StringC &token, Boolean &gotValue, StringC &value);
  Boolean lookupRecords(const StringC &token, StorageObjectSpec::Records &);
  void convertMinimumLiteral(const StringC &from, StringC &to);

  const StringC &str_;
  size_t strIndex_;
  Messenger &mgr_;
  const EntityManagerImpl *em_;
  const StorageObjectSpec *defSpec_;
  const StringC *defId_;
  const CharsetInfo &idCharset_;
  Boolean isNdata_;
  static RecordType recordTypeTable[];
};

const Char RS = '\n';
const Char RE = '\r';

ExtendEntityManager::CatalogManager::~CatalogManager()
{
}

ExtendEntityManager *ExtendEntityManager::make(StorageManager *sm,
                                     const InputCodingSystem *cs,
                                     const ConstPtr<InputCodingSystemKit> &csKit,
                                     Boolean internalCharsetIsDocCharset)
{
  return new EntityManagerImpl(sm, cs, csKit, internalCharsetIsDocCharset);
}

Boolean ExtendEntityManager::externalize(const ExternalInfo *info,
                               Offset off,
                               StorageObjectLocation &loc)
{
  if (!info)
    return false;
  const ExternalInfoImpl *p = DYNAMIC_CAST_CONST_PTR(ExternalInfoImpl, info);
  if (!p)
    return false;
  return p->convertOffset(off, loc);
}

const ParsedSystemId *
ExtendEntityManager::externalInfoParsedSystemId(const ExternalInfo *info)
{
  if (!info)
    return 0;
  const ExternalInfoImpl *p = DYNAMIC_CAST_CONST_PTR(ExternalInfoImpl, info);
  if (!p)
    return 0;
  return &p->parsedSystemId();
}

EntityManagerImpl::EntityManagerImpl(StorageManager *defaultStorageManager,
                             const InputCodingSystem *defaultCodingSystem,
                             const ConstPtr<InputCodingSystemKit> &codingSystemKit,
                             Boolean internalCharsetIsDocCharset)
: defaultStorageManager_(defaultStorageManager),
  defaultCodingSystem_(defaultCodingSystem),
  codingSystemKit_(codingSystemKit),
  internalCharsetIsDocCharset_(internalCharsetIsDocCharset)
{
}

Boolean EntityManagerImpl::internalCharsetIsDocCharset() const
{
  return internalCharsetIsDocCharset_;
}

const CharsetInfo &EntityManagerImpl::charset() const
{
  return codingSystemKit_->systemCharset();
}

InputSource *EntityManagerImpl::open(const StringC &sysid,
                             const CharsetInfo &docCharset,
                             InputSourceOrigin *origin,
                             unsigned flags,
                             Messenger &mgr)
{
  ParsedSystemId parsedSysid;
  if (!parseSystemId(sysid, docCharset, (flags & ExtendEntityManager::isNdata) != 0,
                 0, mgr, parsedSysid)
      || !catalogManager_->mapCatalog(parsedSysid, this, mgr))
    return 0;
  return new ExternalInputSource(parsedSysid,
                         charset(),
                         docCharset,
                         internalCharsetIsDocCharset_,
                         codingSystemKit_->replacementChar(),
                         origin, flags);
}


ConstPtr<EntityCatalog>
EntityManagerImpl::makeCatalog(StringC &systemId,
                         const CharsetInfo &docCharset,
                         Messenger &mgr)
{
  return catalogManager_->makeCatalog(systemId, docCharset, this, mgr);
}

Boolean
EntityManagerImpl::mergeSystemIds(const Vector<StringC> &sysids,
                          Boolean mapCatalogDocument,
                          const CharsetInfo &docCharset,
                          Messenger &mgr,
                          StringC &result) const
{
  ParsedSystemId parsedSysid;
  if (mapCatalogDocument) {
    parsedSysid.maps.resize(parsedSysid.maps.size() + 1);
    parsedSysid.maps.back().type = ParsedSystemId::Map::catalogDocument;
  }
  for (size_t i = 0; i < sysids.size(); i++)
    if (!parseSystemId(sysids[i],
                   docCharset,
                   0,
                   0,
                   mgr,
                   parsedSysid))
      return 0;
  parsedSysid.unparse(internalCharset(docCharset), 0, result);
  return 1;
}

Boolean
EntityManagerImpl::expandSystemId(const StringC &str,
                          const Location &defLoc,
                          Boolean isNdata,
                          const CharsetInfo &docCharset,
                          const StringC *mapCatalogPublic,
                          Messenger &mgr,
                          StringC &result)
{
  ParsedSystemId parsedSysid;
  StorageObjectLocation defSoLoc;
  const StorageObjectLocation *defSoLocP;
  if (defLocation(defLoc, defSoLoc))
    defSoLocP = &defSoLoc;
  else
    defSoLocP = 0;
  if (!parseSystemId(str, docCharset, isNdata, defSoLocP, mgr, parsedSysid))
    return 0;
  if (mapCatalogPublic) {
    ParsedSystemId::Map map;
    map.type = ParsedSystemId::Map::catalogPublic;
    map.publicId = *mapCatalogPublic;
    parsedSysid.maps.insert(parsedSysid.maps.begin(), 1, map);
  }
  parsedSysid.unparse(internalCharset(docCharset), isNdata, result);
  return 1;
}

Boolean EntityManagerImpl::parseSystemId(const StringC &str,
                               const CharsetInfo &docCharset,
                               Boolean isNdata,
                               const StorageObjectLocation *defLoc,
                               Messenger &mgr,
                               ParsedSystemId &parsedSysid) const
{
  FSIParser fsiParser(str, internalCharset(docCharset), isNdata, defLoc, this, mgr);
  return fsiParser.parse(parsedSysid);
}

StorageManager *
EntityManagerImpl::guessStorageType(const StringC &type,
                            const CharsetInfo &internalCharset) const
{
  for (size_t i = 0; i < storageManagers_.size(); i++)
    if (storageManagers_[i]->guessIsId(type, internalCharset))
      return storageManagers_[i].pointer();
  if (defaultStorageManager_->guessIsId(type, internalCharset))
    return defaultStorageManager_.pointer();
  return 0;
}

StorageManager *
EntityManagerImpl::lookupStorageType(const StringC &type,
                             const CharsetInfo &internalCharset) const
{
  if (type.size() == 0)
    return 0;
  if (matchKey(type, defaultStorageManager_->type(), internalCharset))
    return defaultStorageManager_.pointer();
  for (size_t i = 0; i < storageManagers_.size(); i++)
    if (matchKey(type, storageManagers_[i]->type(), internalCharset))
      return storageManagers_[i].pointer();
  return 0;
}

StorageManager *
EntityManagerImpl::lookupStorageType(const char *type) const
{
  if (type == defaultStorageManager_->type())
    return defaultStorageManager_.pointer();
  for (size_t i = 0; i < storageManagers_.size(); i++)
    if (type == storageManagers_[i]->type())
      return storageManagers_[i].pointer();
  return 0;
}

const InputCodingSystem *
EntityManagerImpl::lookupCodingSystem(const StringC &type,
                              const CharsetInfo &internalCharset,
                              Boolean isBctf,
                              const char *&name) const
{
  return codingSystemKit_->makeInputCodingSystem(type, internalCharset, isBctf, name);
}

Boolean
EntityManagerImpl::matchKey(const StringC &type,
                      const char *s,
                      const CharsetInfo &internalCharset)
{
  if (strlen(s) != type.size())
    return false;
  for (size_t i = 0; i < type.size(); i++)
    if (internalCharset.execToDesc(toupper(s[i])) != type[i]
      && internalCharset.execToDesc(tolower(s[i])) != type[i])
      return false;
  return true;
}

void EntityManagerImpl::registerStorageManager(StorageManager *sm)
{
  storageManagers_.resize(storageManagers_.size() + 1);
  storageManagers_.back() = sm;
}

void EntityManagerImpl::setCatalogManager(CatalogManager *catalogManager)
{
  catalogManager_ = catalogManager;
}

Boolean
EntityManagerImpl::defLocation(const Location &defLocation,
                         StorageObjectLocation &soLoc)
{
  Offset off;
  const ExternalInfo *info;
  const Origin *origin = defLocation.origin().pointer();
  Index index = defLocation.index();
  for (;;) {
    if (!origin)
      return 0;
    const InputSourceOrigin *inputSourceOrigin = origin->asInputSourceOrigin();
    if (inputSourceOrigin) {
      off = inputSourceOrigin->startOffset(index);
      info = inputSourceOrigin->externalInfo();
      if (info)
      break;
      if (!inputSourceOrigin->defLocation(off, origin, index))
      return 0;
    }
    else {
      const Location &parentLoc = origin->parent();
      origin = parentLoc.origin().pointer();
      index = parentLoc.index();
    }
  }
  return ExtendEntityManager::externalize(info, off, soLoc);
}

class UnbufferingStorageObject : public StorageObject {
public:
  UnbufferingStorageObject(StorageObject *sub,
                     const Boolean *unbuffer)
    : sub_(sub), buf_(0), bufAvail_(0), bufNext_(0), unbuffer_(unbuffer) { }
  ~UnbufferingStorageObject() { delete [] buf_; }
  Boolean read(char *buf, size_t bufSize, Messenger &mgr,
               size_t &nread) {
    if (bufNext_ >= bufAvail_) {
      bufAvail_ = bufNext_ = 0;
      if (!*unbuffer_)
      return sub_->read(buf, bufSize, mgr, nread);
      if (buf_ == 0)
      buf_ = new char[bufSize_ = bufSize];
      if (!sub_->read(buf_, bufSize_, mgr, bufAvail_))
      return 0;
    }
    *buf = buf_[bufNext_++];
    nread = 1;
    return 1;
  }
  Boolean rewind(Messenger &mgr) {
    bufAvail_ = bufNext_ = 0;
    return sub_->rewind(mgr);
  }
  void willNotRewind() { sub_->willNotRewind(); }
  size_t getBlockSize() const { return sub_->getBlockSize(); }
private:
  Owner<StorageObject> sub_;
  size_t bufSize_;
  size_t bufAvail_;
  size_t bufNext_;
  char *buf_;
  const Boolean *unbuffer_;
};

class MappingDecoder : public Decoder {
public:
  MappingDecoder(Decoder *,
             const ConstPtr<CharMapResource<Unsigned32> > &);
  Boolean convertOffset(unsigned long &offset) const;
  size_t decode(Char *, const char *, size_t, const char **);
private:
  Owner<Decoder> sub_;
  ConstPtr<CharMapResource<Unsigned32> > map_;
};

MappingDecoder::MappingDecoder(Decoder *sub,
                         const ConstPtr<CharMapResource<Unsigned32> > &map)
: Decoder(sub->minBytesPerChar()), sub_(sub), map_(map)
{
}

size_t MappingDecoder::decode(Char *to, const char *s,
                        size_t slen, const char **rest)
{
  size_t n = sub_->decode(to, s, slen, rest);
  const CharMap<Unsigned32> &map = *map_;
  for (size_t i = 0; i < n; i++) {
    Unsigned32 d = map[to[i]];
    if (d & (unsigned(1) << 31))
      to[i] = (d & ~(unsigned(1) << 31));
    else
      to[i] += d;
  }
  return n;
}

Boolean MappingDecoder::convertOffset(unsigned long &offset) const
{
  return sub_->convertOffset(offset);
}
  
ExternalInputSource::ExternalInputSource(ParsedSystemId &parsedSysid,
                               const CharsetInfo &systemCharset,
                               const CharsetInfo &docCharset,
                               Boolean internalCharsetIsDocCharset,
                               Char replacementChar,
                               InputSourceOrigin *origin,
                               unsigned flags)
: InputSource(origin, 0, 0),
  mayRewind_((flags & EntityManager::mayRewind) != 0),
  mayNotExist_((flags & ExtendEntityManager::mayNotExist) != 0),
  sov_(parsedSysid.size()),
  internalCharsetIsDocCharset_(internalCharsetIsDocCharset),
  // hack
  maySetDocCharset_((flags & EntityManager::maySetDocCharset) != 0),
  replacementChar_(replacementChar)
{
  for (size_t i = 0; i < parsedSysid.size(); i++) {
    if (parsedSysid[i].codingSystemType
        != (internalCharsetIsDocCharset
          ? StorageObjectSpec::bctf
          : StorageObjectSpec::encoding)
      && parsedSysid[i].codingSystemType != StorageObjectSpec::special) {
      map_ = new CharMapResource<Unsigned32>;
      buildMap(systemCharset, docCharset);
      break;
    }
  }
  for (size_t i = 0; i < sov_.size(); i++)
    sov_[i] = 0;
  init();
  info_ = new ExternalInfoImpl(parsedSysid);
  origin->setExternalInfo(info_);
}

void ExternalInputSource::setDocCharset(const CharsetInfo &docCharset,
                              const CharsetInfo &systemCharset)
{
  if (!map_.isNull())
    buildMap(systemCharset, docCharset);
  willNotSetDocCharset();
}

void ExternalInputSource::willNotSetDocCharset()
{
  maySetDocCharset_ = 0;
}

void ExternalInputSource::buildMap(const CharsetInfo &systemCharset,
                           const CharsetInfo &docCharset)
{
  CharMap<Unsigned32> &map = *map_;
  // FIXME How should invalidChar be chosen when internalCharsetIsDocCharset_?
  Char invalidChar
    = internalCharsetIsDocCharset_ ? 0 : replacementChar_;
  map.setAll((Unsigned32(1) << 31) | invalidChar);
  if (internalCharsetIsDocCharset_)
    buildMap1(systemCharset, docCharset);
  else
    buildMap1(docCharset, systemCharset);
}

void ExternalInputSource::buildMap1(const CharsetInfo &fromCharset,
                            const CharsetInfo &toCharset)
{
  UnivCharsetDescIter iter(fromCharset.desc());
  for (;;) {
    WideChar descMin, descMax;
    UnivChar univMin;
    if (!iter.next(descMin, descMax, univMin))
      break;
    if (descMin > charMax)
      break;
    if (descMax > charMax)
      descMax = charMax;
    WideChar totalCount = 1 + (descMax - descMin);
    do {
      WideChar count;
      WideChar toMin;
      ISet<WideChar> set;
      int nMap = toCharset.univToDesc(univMin, toMin, set, count);
      if (count > totalCount)
      count = totalCount;
      if (nMap && toMin <= charMax) {
      Char toMax;
      if (count - 1 > charMax - toMin)
        toMax = charMax;
      else
        toMax = toMin + (count - 1);
      map_->setRange(descMin, descMin + (toMax - toMin), Char(toMin - descMin));
      }
      descMin += count;
      univMin += count;
      totalCount -= count;
    } while (totalCount > 0);
  }
}

void ExternalInputSource::init()
{
  so_ = 0;
  buf_ = 0;
  bufSize_ = 0;
  bufLim_ = 0;
  bufLimOffset_ = 0;
  insertRS_ = true;
  soIndex_ = 0;
  leftOver_ = 0;
  nLeftOver_ = 0;  
}

ExternalInputSource::~ExternalInputSource()
{
  if (buf_)
    delete [] buf_;
}

Boolean ExternalInputSource::rewind(Messenger &mgr)
{
  reset(0, 0);
  if (buf_)
    delete [] buf_;
  // reset makes a new EntityOrigin
  ParsedSystemId parsedSysid(info_->parsedSystemId());
  ExternalInfoImpl *oldInfo = info_;
  info_ = new ExternalInfoImpl(parsedSysid);
  so_ = 0;
  for (size_t i = 0; i < soIndex_; i++) {
    if (sov_[i] && !sov_[i]->rewind(mgr))
      return 0;
    StringC tem;
    oldInfo->getId(i, tem);
    info_->setId(i, tem);
  }
  inputSourceOrigin()->setExternalInfo(info_);
  init();
  return 1;
}

void ExternalInputSource::willNotRewind()
{
  for (size_t i = 0; i < sov_.size(); i++)
    if (sov_[i])
      sov_[i]->willNotRewind();
  mayRewind_ = 0;
}


// Round up N so that it is a power of TO.
// TO must be a power of 2.

inline
size_t roundUp(size_t n, size_t to)
{
  return (n + (to - 1)) & ~(to - 1);
}

inline
void ExternalInputSource::noteRSAt(const Char *p)
{
  info_->noteRS(bufLimOffset_ - (bufLim_ - p));
}

inline
void ExternalInputSource::noteRS()
{
  noteRSAt(cur());
}

Xchar ExternalInputSource::fill(Messenger &mgr)
{
  ASSERT(cur() == end());
  while (end() >= bufLim_) {
    // need more data
    while (so_ == 0) {
      if (soIndex_ >= sov_.size())
      return eE;
      if (soIndex_ > 0)
      info_->noteStorageObjectEnd(bufLimOffset_ - (bufLim_ - end()));
      const StorageObjectSpec &spec = info_->spec(soIndex_);
      if (!sov_[soIndex_]) {
      StringC id;
      if (mayNotExist_) {
        NullMessenger nullMgr;
        sov_[soIndex_]
          = spec.storageManager->makeStorageObject(spec.specId, spec.baseId,
                                         spec.search,
                                         mayRewind_, nullMgr, id);
      }
      else
        sov_[soIndex_]
          = spec.storageManager->makeStorageObject(spec.specId, spec.baseId,
                                         spec.search,
                                         mayRewind_, mgr, id);
      info_->setId(soIndex_, id);
      }
      so_ = sov_[soIndex_].pointer();
      if (so_) {
      decoder_ = spec.codingSystem->makeDecoder();
      if (spec.codingSystemType != StorageObjectSpec::special
          && spec.codingSystemType != (internalCharsetIsDocCharset_ 
                               ? StorageObjectSpec::bctf
                               : StorageObjectSpec::encoding)) {
        decoder_ = new MappingDecoder(decoder_, map_);
        if (maySetDocCharset_) {
          sov_[soIndex_] = new UnbufferingStorageObject(sov_[soIndex_].extract(), &maySetDocCharset_);
          so_ = sov_[soIndex_].pointer();
        }
      }
      info_->setDecoder(soIndex_, decoder_);
      zapEof_ = spec.zapEof;
      switch (spec.records) {
      case StorageObjectSpec::asis:
        recordType_ = asis;
        insertRS_ = false;
        break;
      case StorageObjectSpec::cr:
        recordType_ = cr;
        break;
      case StorageObjectSpec::lf:
        recordType_ = lf;
        break;
      case StorageObjectSpec::crlf:
        recordType_ = crlf;
        break;
      case StorageObjectSpec::find:
        recordType_ = unknown;
        break;
      default:
        CANNOT_HAPPEN();
      }
      soIndex_++;
      readSize_ = so_->getBlockSize();
      nLeftOver_ = 0;
      break;
      }
      else
      setAccessError();
      soIndex_++;
    }

    size_t keepSize = end() - start();
    const size_t align = sizeof(int)/sizeof(Char);
    size_t readSizeChars = (readSize_ + (sizeof(Char) - 1))/sizeof(Char);
    readSizeChars = roundUp(readSizeChars, align);
    size_t neededSize;        // in Chars
    size_t startOffset;
    // compute neededSize and readSize
    unsigned minBytesPerChar = decoder_->minBytesPerChar();
    if (nLeftOver_ == 0 && minBytesPerChar >= sizeof(Char)) {
      // In this case we want to do decoding in place.
      // FIXME It might be a win on some systems (Irix?) to arrange that the
      // read buffer is on a page boundary.

      if (keepSize >= size_t(-1)/sizeof(Char) - (align - 1) - insertRS_)
      abort();                // FIXME throw an exception
      
      // Now size_t(-1)/sizeof(Char) - (align - 1) - insertRS_ - keepSize > 0
      if (readSizeChars
        > size_t(-1)/sizeof(Char) - (align - 1) - insertRS_ - keepSize)
      abort();
      neededSize = roundUp(readSizeChars + keepSize + insertRS_, align);
      startOffset = ((neededSize > bufSize_ ? neededSize : bufSize_)
                 - readSizeChars - insertRS_ - keepSize);
    }
    else {
      // Needs to be room for everything before decoding.
      neededSize = (keepSize + insertRS_ + readSizeChars
                + (nLeftOver_ + sizeof(Char) - 1)/sizeof(Char));
      // Also must be room for everything after decoding.
      size_t neededSize2
      = (keepSize + insertRS_
         // all the converted characters
         + (nLeftOver_ + readSize_)/minBytesPerChar
         // enough Chars to contain left over bytes
         + ((readSize_ % minBytesPerChar + sizeof(Char) - 1)
            / sizeof(Char)));
      if (neededSize2 > neededSize)
      neededSize = neededSize2;
      neededSize = roundUp(neededSize, align);
      if (neededSize > size_t(-1)/sizeof(Char))
      abort();
      startOffset = 0;
    }
    if (bufSize_ < neededSize)
      reallocateBuffer(neededSize);
    Char *newStart = buf_ + startOffset;
    if (newStart != start() && keepSize > 0)
      memmove(newStart, start(), keepSize*sizeof(Char));
    char *bytesStart = (char *)(buf_ + bufSize_ - readSizeChars) - nLeftOver_;
    if (nLeftOver_ > 0 && leftOver_ != bytesStart)
      memmove(bytesStart, leftOver_, nLeftOver_);
    moveStart(newStart);
    bufLim_ = end();

    size_t nread;
    if (so_->read((char *)(buf_ + bufSize_ - readSizeChars), readSize_,
              mgr, nread)) {
      if (nread > 0) {
      const char *bytesEnd = bytesStart + nLeftOver_ + nread;
      size_t nChars = decoder_->decode((Char *)end() + insertRS_,
                               bytesStart,
                               nLeftOver_ + nread
                               - (zapEof_ && bytesEnd[-1] == EOFCHAR),
                               &leftOver_);
      nLeftOver_ = bytesEnd - leftOver_;
      if (nChars > 0) {
        if (insertRS_) {
          noteRS();
          *(Char *)end() = RS;
          advanceEnd(end() + 1);
          insertRS_ = false;
          bufLim_ += 1;
          bufLimOffset_ += 1;
        }
        bufLim_ += nChars;
        bufLimOffset_ += nChars;
        break;
      }
      }
    }
    else
      so_ = 0;
  }
  ASSERT(end() < bufLim_);
  if (insertRS_) {
    noteRS();
    insertChar(RS);
    insertRS_ = false;
    bufLimOffset_ += 1;
  }
  switch (recordType_) {
  case unknown:
    {
      const Char *e = findNextCrOrLf(end(), bufLim_);
      if (e) {
      if (*e == '\n') {
        recordType_ = lf;
        info_->noteInsertedRSs();
        *(Char *)e = RE;
        advanceEnd(e + 1);
        insertRS_ = true;
      }
      else {
        if (e + 1 < bufLim_) {
          if (e[1] == '\n') {
            recordType_ = crlf;
            advanceEnd(e + 1);
            if (e + 2 == bufLim_) {
            bufLim_--;
            bufLimOffset_--;
            insertRS_ = true;
            }
          }
          else {
            advanceEnd(e + 1);
            recordType_ = cr;
            info_->noteInsertedRSs();
            insertRS_ = true;
          }
        }
        else {
          recordType_ = crUnknown;
          advanceEnd(e + 1);
        }
      }
      }
      else
      advanceEnd(bufLim_);
    }
    break;
  case crUnknown:
    {
      if (*cur() == '\n') {
      noteRS();
      advanceEnd(cur() + 1);
      recordType_ = crlf;
      }
      else {
      advanceEnd(cur() + 1);
      insertRS_ = true;
      recordType_ = cr;
      info_->noteInsertedRSs();
      }
    }
    break;
  case lf:
    {
      Char *e = (Char *)findNextLf(end(), bufLim_);
      if (e) {
      advanceEnd(e + 1);
      *e = RE;
      insertRS_ = true;
      }
      else
      advanceEnd(bufLim_);
    }
    break;
  case cr:
    {
      const Char *e = findNextCr(end(), bufLim_);
      if (e) {
      advanceEnd(e + 1);
      insertRS_ = true;
      }
      else
      advanceEnd(bufLim_);
    }
    break;
  case crlf:
    {
      const Char *e = end();
      for (;;) {
      e = findNextLf(e, bufLim_);
      if (!e) {
        advanceEnd(bufLim_);
        break;
      }
      // Need to delete final RS if not followed by anything.
      if (e + 1 == bufLim_) {
        bufLim_--;
        bufLimOffset_--;
        advanceEnd(e);
        insertRS_ = true;
        if (cur() == end())
          return fill(mgr);
        break;
      }
      noteRSAt(e);
      e++;
      }
    }
    break;
  case asis:
    advanceEnd(bufLim_);
    break;
  default:
    CANNOT_HAPPEN();
  }
  ASSERT(cur() < end());
  return nextChar();
}

const Char *ExternalInputSource::findNextCr(const Char *start,
                                  const Char *end)
{
  for (; start < end; start++)
    if (*start == '\r')
      return start;
  return 0;
}

const Char *ExternalInputSource::findNextLf(const Char *start,
                                  const Char *end)
{
  for (; start < end; start++)
    if (*start == '\n')
      return start;
  return 0;
}

const Char *ExternalInputSource::findNextCrOrLf(const Char *start,
                                    const Char *end)
{
  for (; start < end; start++)
    if (*start == '\n' || *start == '\r')
      return start;
  return 0;
}

void ExternalInputSource::pushCharRef(Char ch, const NamedCharRef &ref)
{
  ASSERT(cur() == start());
  noteCharRef(startIndex() + (cur() - start()), ref);
  insertChar(ch);
}

void ExternalInputSource::insertChar(Char ch)
{
  if (start() > buf_) {
    if (cur() > start())
      memmove((Char *)start() - 1, start(), (cur() - start())*sizeof(Char));
    moveLeft();
    *(Char *)cur() = ch;
  }
  else {
    // must have start == buf
    if (buf_ + (bufSize_ - (nLeftOver_ + sizeof(Char) - 1)/sizeof(Char))
      == bufLim_) {
      if (bufSize_ == size_t(-1))
      abort();          // FIXME throw an exception
      reallocateBuffer(bufSize_ + 1);
    }
    else if (nLeftOver_ > 0 && ((char *)(bufLim_ + 1) > leftOver_)) {
      char *s = (char *)(buf_ + bufSize_) - nLeftOver_;
      memmove(s, leftOver_, nLeftOver_);
      leftOver_ = s;
    }
    if (cur() < bufLim_)
      memmove((Char *)cur() + 1, cur(), (bufLim_ - cur())*sizeof(Char));
    *(Char *)cur() = ch;
    advanceEnd(end() + 1);
    bufLim_ += 1;
  }
}

void ExternalInputSource::reallocateBuffer(size_t newSize)
{
  Char *newBuf = new Char[newSize];
  
  memcpy(newBuf, buf_, bufSize_*sizeof(Char));
  bufSize_ = newSize;
  changeBuffer(newBuf, buf_);
  bufLim_ = newBuf + (bufLim_ - buf_);
  if (nLeftOver_ > 0) {
    char *s = (char *)(newBuf + bufSize_) - nLeftOver_;
    memmove(s,
          (char *)newBuf + (leftOver_ - (char *)buf_),
          nLeftOver_);
    leftOver_ = s;
  }
  delete [] buf_;
  buf_ = newBuf;
}

RTTI_DEF1(ExternalInfoImpl, ExternalInfo)

ExternalInfoImpl::ExternalInfoImpl(ParsedSystemId &parsedSysid)
: currentIndex_(0), position_(parsedSysid.size())
{
  parsedSysid.swap(parsedSysid_);
  if (parsedSysid_.size() > 0)
    notrack_ = parsedSysid_[0].notrack;
}

void ExternalInfoImpl::setId(size_t i, StringC &id)
{
  Mutex::Lock lock(&mutex_);
  id.swap(position_[i].id);
}

void ExternalInfoImpl::getId(size_t i, StringC &id) const
{
  Mutex::Lock lock(&((ExternalInfoImpl *)this)->mutex_);
  id = position_[i].id;
}

void ExternalInfoImpl::setDecoder(size_t i, Decoder *decoder)
{
  Mutex::Lock lock(&mutex_);
  position_[i].decoder = decoder;
}

void ExternalInfoImpl::noteInsertedRSs()
{
  position_[currentIndex_].insertedRSs = 1;
}

void ExternalInfoImpl::noteRS(Offset offset)
{
  // We do the locking in OffsetOrderedList.
  if (!notrack_)
    rsList_.append(offset);
  if (offset
      == (currentIndex_ == 0 ? 0 : position_[currentIndex_- 1].endOffset))
    position_[currentIndex_].startsWithRS = 1;
}

void ExternalInfoImpl::noteStorageObjectEnd(Offset offset)
{
  Mutex::Lock lock(&mutex_);
  ASSERT(currentIndex_ < position_.size());
  // The last endOffset_ must be -1.
  if (currentIndex_ < position_.size() - 1) {
    position_[currentIndex_++].endOffset = offset;
    position_[currentIndex_].line1RS = rsList_.size();
    notrack_ = parsedSysid_[currentIndex_].notrack;
  }
}

Boolean ExternalInfoImpl::convertOffset(Offset off,
                              StorageObjectLocation &ret) const
{
  Mutex::Lock lock(&((ExternalInfoImpl *)this)->mutex_);
  if (off == Offset(-1) || position_.size() == 0)
    return false;
  // the last endOffset_ is Offset(-1), so this will
  // terminate
  int i;
  for (i = 0; off >= position_[i].endOffset; i++)
    ;
  for (; position_[i].id.size() == 0; i--)
    if (i == 0)
      return false;
  ret.storageObjectSpec = &parsedSysid_[i];
  ret.actualStorageId = position_[i].id;
  Offset startOffset = i == 0 ? 0 : position_[i - 1].endOffset;
  ret.storageObjectOffset = off - startOffset;
  ret.byteIndex = ret.storageObjectOffset;
  if (parsedSysid_[i].notrack
      || parsedSysid_[i].records == StorageObjectSpec::asis) {
    ret.lineNumber = (unsigned long)-1;
    if (parsedSysid_[i].records != StorageObjectSpec::asis) {
      if (position_[i].insertedRSs)
      ret.byteIndex = (unsigned long)-1;
      else if (ret.byteIndex > 0 && position_[i].startsWithRS)
      ret.byteIndex--;  // first RS is inserted
    }
    ret.columnNumber = (unsigned long)-1;
    return true;
  }
  else {
    size_t line1RS = position_[i].line1RS;
    // line1RS is now the number of RSs that are before or on the current line.
    size_t j;
    Offset colStart;
    if (rsList_.findPreceding(off, j, colStart)) {
      if (position_[i].insertedRSs)
      ret.byteIndex -= j + 1 - line1RS;
      else if (ret.byteIndex > 0 && position_[i].startsWithRS)
      ret.byteIndex--;  // first RS is inserted
      j++;
      colStart++;
    }
    else {
      j = 0;
      colStart = 0;
    }
    // j is now the number of RSs that are before or on the current line
    // colStart is the offset of the first column
    ret.lineNumber = j - line1RS + 1 - position_[i].startsWithRS;
    // the offset of the first column
    if (colStart < startOffset)
      colStart = startOffset;
    // the RS that starts a line will be in column 0;
    // the first real character of a line will be column 1
    ret.columnNumber = 1 + off - colStart;
  }
  if (!position_[i].decoder
      || !position_[i].decoder->convertOffset(ret.byteIndex))
    ret.byteIndex = (unsigned long)-1;
  return true;
}

const StorageObjectSpec &ExternalInfoImpl::spec(size_t i) const
{
  return parsedSysid_[i];
}

size_t ExternalInfoImpl::nSpecs() const
{
  return parsedSysid_.size();
}

const ParsedSystemId &ExternalInfoImpl::parsedSystemId() const
{
  return parsedSysid_;
}

StorageObjectSpec::StorageObjectSpec()
: storageManager(0), codingSystem(0), codingSystemName(0), notrack(0),
  records(find), zapEof(1), search(1)
{
}

StorageObjectPosition::StorageObjectPosition()
: endOffset(Offset(-1)), line1RS(0), startsWithRS(0), insertedRSs(0)
{
}

FSIParser::FSIParser(const StringC &str,
                 const CharsetInfo &idCharset,
                 Boolean isNdata,
                 const StorageObjectLocation *defLoc,
                 const EntityManagerImpl *em,
                 Messenger &mgr)
: str_(str),
  strIndex_(0),
  idCharset_(idCharset),
  isNdata_(isNdata),
  defSpec_(defLoc ? defLoc->storageObjectSpec : 0),
  defId_(defLoc ? &defLoc->actualStorageId : 0),
  em_(em),
  mgr_(mgr)
{
}

Xchar FSIParser::get()
{
  if (strIndex_ < str_.size())
    return str_[strIndex_++];
  else
    return -1;
}

void FSIParser::unget()
{
  if (strIndex_ > 0)
    strIndex_ -= 1;
}

Boolean FSIParser::matchKey(const StringC &str, const char *s)
{
  if (strlen(s) != str.size())
    return false;
  for (size_t i = 0; i < str.size(); i++)
    if (idCharset_.execToDesc(toupper(s[i])) != str[i]
      && idCharset_.execToDesc(tolower(s[i])) != str[i])
      return false;
  return true;
}

Boolean FSIParser::matchChar(Xchar ch, char execC)
{
  return ch == idCharset_.execToDesc(execC);
}

Boolean FSIParser::isS(Xchar c)
{
  return (matchChar(c, ' ')
        || matchChar(c, '\r')
        || matchChar(c, '\n')
        || matchChar(c, ' '));
}

Boolean FSIParser::convertDigit(Xchar c, int &weight)
{
  static const char digits[] = "0123456789";
  for (int i = 0; digits[i] != '\0'; i++)
    if (matchChar(c, digits[i])) {
      weight = i;
      return 1;
    }
  return 0;
}

Boolean FSIParser::parse(ParsedSystemId &parsedSysid)
{
  size_t startIndex = strIndex_;
  if (!matchChar(get(), '<'))
    return handleInformal(startIndex, parsedSysid);
  StringC key;
  for (;;) {
    Xchar c = get();
    if (c == -1)
      return handleInformal(startIndex, parsedSysid);
    if (isS(c) || matchChar(c, '>'))
      break;
    key += Char(c);
  }
  unget();
  if (matchKey(key, "CATALOG")) {
    if (!setCatalogAttributes(parsedSysid))
      return 0;
    return parse(parsedSysid);
  }
  Boolean neutral;
  StorageManager *sm = lookupStorageType(key, neutral);
  if (!sm)
    return handleInformal(startIndex, parsedSysid);
  for (;;) {
    parsedSysid.resize(parsedSysid.size() + 1);
    StorageObjectSpec &sos = parsedSysid.back();
    sos.storageManager = sm;
    Xchar smcrd;
    Boolean fold;
    if (!setAttributes(sos, neutral, smcrd, fold))
      return 0;
    sm = 0;
    StringC id;
    Boolean hadData = 0;
    for (;;) {
      Xchar c = get();
      if (c == -1)
      break;
      if (matchChar(c, '<')) {
      hadData = 1;
      Char stago = c;
      key.resize(0);
      for (;;) {
        c = get();
        if (c == -1) {
          id += stago;
          id += key;
          break;
        }
        if (isS(c) || matchChar(c, '>')) {
          unget();
          sm = lookupStorageType(key, neutral);
          if (!sm) {
            id += stago;
            id += key;
          }
          break;
        }
        key += c;
      }
      if (sm)
        break;
      }
      else if (!((!hadData && matchChar(c, '\r')) // ignored RE
             || matchChar(c, '\n') )) {     // ignored RS
      hadData = 1;
      id += c;
      }
    }
    if (id.size() > 0 && matchChar(id[id.size() - 1], '\r'))
      id.resize(id.size() - 1);
    uncharref(id);
    id.swap(sos.specId);
    if (!convertId(sos.specId, smcrd, sos.storageManager))
      return 0;
    if (neutral) {
      if (!sos.storageManager->transformNeutral(sos.specId, fold, mgr_))
      return 0;
    }
    if (sos.storageManager->resolveRelative(sos.baseId, sos.specId,
                                  sos.search))
      sos.baseId.resize(0);
    if (!sm)
      break;
  }
  return 1;
}

Boolean FSIParser::handleInformal(size_t index, ParsedSystemId &parsedSysid)
{
  parsedSysid.resize(parsedSysid.size() + 1);
  StorageObjectSpec &sos = parsedSysid.back();
  sos.specId.assign(str_.data() + index,
                str_.size() - index);
  sos.storageManager = em_->guessStorageType(sos.specId, idCharset_);
  if (!sos.storageManager) {
    if (defSpec_ && defSpec_->storageManager->inheritable())
      sos.storageManager = defSpec_->storageManager;
    else
      sos.storageManager = em_->defaultStorageManager_.pointer();
  }
  setDefaults(sos);
  if (!convertId(sos.specId, -1, sos.storageManager))
    return 0;
  if (sos.storageManager->resolveRelative(sos.baseId, sos.specId, sos.search))
    sos.baseId.resize(0);
  return 1;
}

StorageManager *FSIParser::lookupStorageType(const StringC &key,
                                   Boolean &neutral)
{
  if (matchKey(key, "NEUTRAL")) {
    neutral = 1;
    if (defSpec_ && defSpec_->storageManager->inheritable())
      return defSpec_->storageManager;
    else
      return em_->defaultStorageManager_.pointer();
  }
  else {
    StorageManager *sm = em_->lookupStorageType(key, idCharset_);
    if (sm)
      neutral = 0;
    return sm;
  }
}

Boolean FSIParser::setCatalogAttributes(ParsedSystemId &parsedSysid)
{
  Boolean hadPublic = 0;
  parsedSysid.maps.resize(parsedSysid.maps.size() + 1);
  parsedSysid.maps.back().type = ParsedSystemId::Map::catalogDocument;
  for (;;) {
    StringC token, value;
    Boolean gotValue;
    if (!parseAttribute(token, gotValue, value)) {
      mgr_.message(EntityManagerMessages::fsiSyntax, StringMessageArg(str_));
      return 0;
    }
    if (token.size() == 0)
      break;
    if (matchKey(token, "PUBLIC")) {
      if (hadPublic)
      mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
                 StringMessageArg(idCharset_.execToDesc("PUBLIC")));
      else if (gotValue) {
      convertMinimumLiteral(value, parsedSysid.maps.back().publicId);
      parsedSysid.maps.back().type = ParsedSystemId::Map::catalogPublic;
      }
      else
      mgr_.message(EntityManagerMessages::fsiMissingValue,
                 StringMessageArg(token));
      hadPublic = 1;
    }
    else
      mgr_.message(gotValue
               ? EntityManagerMessages::fsiUnsupportedAttribute
               : EntityManagerMessages::fsiUnsupportedAttributeToken,
               StringMessageArg(token));
  }
  return 1;
}

void FSIParser::convertMinimumLiteral(const StringC &from, StringC &to)
{
  // Do just enough to ensure it can be reparsed.
  to.resize(0);
  for (size_t i = 0; i < from.size(); i++) {
    Char c = from[i];
    if (matchChar(c, '"') || matchChar(c, '#'))
      mgr_.message(EntityManagerMessages::fsiLookupChar, NumberMessageArg(c));
    else if (matchChar(c, ' ')) {
      if (to.size() && to[to.size() - 1] != c)
      to += c;
    }
    else
      to += c;
  }
  if (to.size() && matchChar(to[to.size() - 1], ' '))
    to.resize(to.size() - 1);
}

// FIXME This should be table driven.

Boolean FSIParser::setAttributes(StorageObjectSpec &sos,
                         Boolean neutral,
                         Xchar &smcrd,
                         Boolean &fold)
{
  Boolean hadBctf = 0;
  Boolean hadEncoding = 0;
  Boolean hadTracking = 0;
  Boolean hadSmcrd = 0;
  smcrd = -1;
  fold = 1;
  Boolean hadRecords = 0;
  Boolean hadBase = 0;
  Boolean hadZapeof = 0;
  Boolean hadSearch = 0;
  Boolean hadFold = 0;
  StorageObjectSpec::Records records;
  setDefaults(sos);
  for (;;) {
    StringC token, value;
    Boolean gotValue;
    if (!parseAttribute(token, gotValue, value)) {
      mgr_.message(EntityManagerMessages::fsiSyntax, StringMessageArg(str_));
      return 0;
    }
    if (token.size() == 0)
      break;
    if (matchKey(token, "BCTF")) {
      if (sos.storageManager->requiredCodingSystem())
      mgr_.message(EntityManagerMessages::fsiBctfEncodingNotApplicable);
      else if (hadBctf)
      mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
                 StringMessageArg(token));
      else if (hadEncoding)
      mgr_.message(EntityManagerMessages::fsiBctfAndEncoding);
      else if (gotValue) {
      const char *codingSystemName;
      const InputCodingSystem *codingSystem
        = em_->lookupCodingSystem(value, idCharset_, 1, codingSystemName);
      if (codingSystem) {
        sos.codingSystem = codingSystem;
        sos.codingSystemName = codingSystemName;
        sos.codingSystemType = StorageObjectSpec::bctf;
      }
      else if (matchKey(value, "SAME")) {
        if (!isNdata_) {
          if (defSpec_) {
            sos.codingSystem = defSpec_->codingSystem;
            sos.codingSystemName = defSpec_->codingSystemName;
            sos.codingSystemType = defSpec_->codingSystemType;
          }
          else {
            sos.codingSystem = em_->defaultCodingSystem_;
            sos.codingSystemName = 0;
            sos.codingSystemType = (em_->internalCharsetIsDocCharset_
                              ? StorageObjectSpec::bctf
                              : StorageObjectSpec::encoding);
          }
        }
      }
      else
        mgr_.message(EntityManagerMessages::fsiUnknownBctf,
                   StringMessageArg(value));
      }
      else
      mgr_.message(EntityManagerMessages::fsiMissingValue,
                 StringMessageArg(token));
      hadBctf = 1;
    }
    else if (matchKey(token, "ENCODING")) {
      if (sos.storageManager->requiredCodingSystem())
      mgr_.message(EntityManagerMessages::fsiBctfEncodingNotApplicable);
      else if (hadEncoding)
      mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
                 StringMessageArg(token));
      else if (hadBctf)
      mgr_.message(EntityManagerMessages::fsiBctfAndEncoding);
      else if (gotValue) {
      const char *codingSystemName;
      const InputCodingSystem *codingSystem
        = em_->lookupCodingSystem(value, idCharset_, 0, codingSystemName);
      if (codingSystem) {
        sos.codingSystem = codingSystem;
        sos.codingSystemName = codingSystemName;
        sos.codingSystemType = StorageObjectSpec::encoding;
      }
      else if (matchKey(value, "SAME")) {
        if (!isNdata_) {
          if (defSpec_) {
            sos.codingSystem = defSpec_->codingSystem;
            sos.codingSystemName = defSpec_->codingSystemName;
            sos.codingSystemType = defSpec_->codingSystemType;
          }
          else {
            sos.codingSystem = em_->defaultCodingSystem_;
            sos.codingSystemName = 0;
            sos.codingSystemType = (em_->internalCharsetIsDocCharset_
                              ? StorageObjectSpec::bctf
                              : StorageObjectSpec::encoding);
          }
        }
      }
      else
        mgr_.message(EntityManagerMessages::fsiUnknownEncoding,
                   StringMessageArg(value));
      }
      else
      mgr_.message(EntityManagerMessages::fsiMissingValue,
                 StringMessageArg(token));
      hadEncoding = 1;
    }
    else if (matchKey(token, "TRACKING")) {
      if (hadTracking)
      mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
                 StringMessageArg(token));
      else if (gotValue) {
      if (matchKey(value, "NOTRACK"))
        sos.notrack = 1;
      else if (!matchKey(value, "TRACK"))
        mgr_.message(EntityManagerMessages::fsiBadTracking,
                   StringMessageArg(value));
      }
      else
      mgr_.message(EntityManagerMessages::fsiMissingValue,
                 StringMessageArg(token));
      hadTracking = 1;
    }
    else if (matchKey(token, "ZAPEOF")) {
      if (sos.storageManager->requiredCodingSystem())
      mgr_.message(EntityManagerMessages::fsiZapeofNotApplicable);
      else if (hadZapeof)
      mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
                 StringMessageArg(token));
      else if (gotValue) {
      if (matchKey(value, "ZAPEOF"))
        sos.zapEof = 1;
      else if (matchKey(value, "NOZAPEOF"))
        sos.zapEof = 0;
      else
        mgr_.message(EntityManagerMessages::fsiBadZapeof,
                   StringMessageArg(value));
      }
      else
      sos.zapEof = 1;
      hadZapeof = 1;
    }
    else if (matchKey(token, "NOZAPEOF")) {
      if (sos.storageManager->requiredCodingSystem())
      mgr_.message(EntityManagerMessages::fsiZapeofNotApplicable);
      else if (hadZapeof)
      mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
                 StringMessageArg(idCharset_.execToDesc("ZAPEOF")));
      else if (gotValue)
      mgr_.message(EntityManagerMessages::fsiValueAsName,
                 StringMessageArg(token));
      else
      sos.zapEof = 0;
      hadZapeof = 1;
    }
    else if (matchKey(token, "SEARCH")) {
      if (hadSearch)
      mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
                 StringMessageArg(token));
      else if (gotValue) {
      if (matchKey(value, "SEARCH"))
        sos.search = 1;
      else if (matchKey(value, "NOSEARCH"))
        sos.search = 0;
      else
        mgr_.message(EntityManagerMessages::fsiBadSearch,
                   StringMessageArg(value));
      }
      else
      sos.search = 1;
      hadSearch = 1;
    }
    else if (matchKey(token, "NOSEARCH")) {
      if (hadSearch)
      mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
                 StringMessageArg(idCharset_.execToDesc("SEARCH")));
      else if (gotValue)
      mgr_.message(EntityManagerMessages::fsiValueAsName,
                 StringMessageArg(token));
      else
      sos.search = 0;
      hadSearch = 1;
    }
    else if (matchKey(token, "FOLD")) {
      if (!neutral)
      mgr_.message(EntityManagerMessages::fsiFoldNotNeutral);
      else if (hadFold)
      mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
                 StringMessageArg(token));
      else if (gotValue) {
      if (matchKey(value, "FOLD"))
        fold = 1;
      else if (matchKey(value, "NOFOLD"))
        fold = 0;
      else
        mgr_.message(EntityManagerMessages::fsiBadFold,
                   StringMessageArg(value));
      }
      else
      fold = 1;
      hadFold = 1;
    }
    else if (matchKey(token, "NOFOLD")) {
      if (!neutral)
      mgr_.message(EntityManagerMessages::fsiFoldNotNeutral);
      else if (hadFold)
      mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
                 StringMessageArg(idCharset_.execToDesc("FOLD")));
      else if (gotValue)
      mgr_.message(EntityManagerMessages::fsiValueAsName,
                 StringMessageArg(token));
      else
      fold = 0;
      hadFold = 1;
    }
    else if (matchKey(token, "SMCRD")) {
      if (hadSmcrd)
      mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
                 StringMessageArg(token));
      else if (gotValue) {
      if (value.size() == 0)
        smcrd = -1;
      else if (value.size() == 1)
        smcrd = value[0];
      else
        mgr_.message(EntityManagerMessages::fsiBadSmcrd,
                   StringMessageArg(value));
      }
      else
      mgr_.message(EntityManagerMessages::fsiMissingValue,
                 StringMessageArg(token));
      hadSmcrd = 1;
    }
    else if (matchKey(token, "RECORDS")) {
      if (sos.storageManager->requiresCr())
      mgr_.message(EntityManagerMessages::fsiRecordsNotApplicable);
      else if (hadRecords)
      mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
                 StringMessageArg(token));
      else if (gotValue) {
      if (!lookupRecords(value, sos.records))
        mgr_.message(EntityManagerMessages::fsiUnsupportedRecords,
                   StringMessageArg(value));
      }
      else
      mgr_.message(EntityManagerMessages::fsiMissingValue,
                 StringMessageArg(token));
      hadRecords = 1;
    }
    else if (matchKey(token, "SOIBASE")) {
      if (hadBase)
      mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
                 StringMessageArg(token));
      else if (gotValue)
      value.swap(sos.baseId);
      else {
      mgr_.message(EntityManagerMessages::fsiMissingValue,
                 StringMessageArg(token));
      sos.baseId.resize(0);
      }
      hadBase = 1;
    }
    else if (lookupRecords(token, records)) {
      if (sos.storageManager->requiresCr())
      mgr_.message(EntityManagerMessages::fsiRecordsNotApplicable);
      else if (hadRecords)
      mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
                 StringMessageArg(idCharset_.execToDesc("RECORDS")));
      else if (!gotValue)
      sos.records = records;
      else
      mgr_.message(EntityManagerMessages::fsiValueAsName,
                 StringMessageArg(token));
      hadRecords = 1;
    }
    else if (matchKey(token, "NOTRACK")) {
      if (hadTracking)
      mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
                 StringMessageArg(idCharset_.execToDesc("TRACKING")));
      else if (!gotValue)
      sos.notrack = 1;
      else
      mgr_.message(EntityManagerMessages::fsiValueAsName,
                 StringMessageArg(token));
      hadTracking = 1;
    }
    else if (matchKey(token, "TRACK")) {
      if (hadTracking)
      mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
                 StringMessageArg(idCharset_.execToDesc("TRACKING")));
      else if (gotValue)
      mgr_.message(EntityManagerMessages::fsiValueAsName,
                 StringMessageArg(token));
      hadTracking = 1;
    }
    else
      mgr_.message(gotValue
               ? EntityManagerMessages::fsiUnsupportedAttribute
               : EntityManagerMessages::fsiUnsupportedAttributeToken,
               StringMessageArg(token));
  }
  if (hadBase && sos.baseId.size() > 0) {
    convertId(sos.baseId, smcrd, sos.storageManager);
    if (neutral) {
      if (!sos.storageManager->transformNeutral(sos.baseId, fold, mgr_))
      sos.baseId.resize(0);
    }
  }
  if (!hadZapeof && hadRecords && sos.records == StorageObjectSpec::asis)
    sos.zapEof = 0;
  return 1;
}

FSIParser::RecordType FSIParser::recordTypeTable[] = {
  { "FIND", StorageObjectSpec::find },
  { "ASIS", StorageObjectSpec::asis },
  { "CR", StorageObjectSpec::cr },
  { "LF", StorageObjectSpec::lf },
  { "CRLF", StorageObjectSpec::crlf }
};

const char *FSIParser::recordsName(StorageObjectSpec::Records records)
{
  for (size_t i = 0; i < SIZEOF(recordTypeTable); i++)
    if (records == recordTypeTable[i].value)
      return recordTypeTable[i].name;
  return 0;
}

Boolean FSIParser::lookupRecords(const StringC &token,
                         StorageObjectSpec::Records &result)
{
  for (size_t i = 0; i < SIZEOF(recordTypeTable); i++)
    if (matchKey(token, recordTypeTable[i].name)) {
      result = recordTypeTable[i].value;
      return 1;
    }
  return 0;
}

void FSIParser::setDefaults(StorageObjectSpec &sos)
{
  if (sos.storageManager->requiresCr())
    sos.records = StorageObjectSpec::cr;
  else if (isNdata_
         || (defSpec_ && defSpec_->records == StorageObjectSpec::asis))
    sos.records = StorageObjectSpec::asis;
  if (isNdata_ || (defSpec_ && !defSpec_->zapEof))
    sos.zapEof = 0;
  if (defSpec_ && defSpec_->storageManager == sos.storageManager) {
    if (defId_)
      sos.baseId = *defId_;
    else {
      sos.baseId = defSpec_->specId;
      sos.storageManager->resolveRelative(defSpec_->baseId,
                                sos.baseId,
                                0);
    }
  }
  sos.codingSystem = sos.storageManager->requiredCodingSystem();
  if (sos.codingSystem) {
    sos.zapEof = 0;           // hack
    sos.codingSystemType = StorageObjectSpec::special;
  }
  else {
    sos.codingSystem = em_->defaultCodingSystem_;
    sos.codingSystemType
      = (em_->internalCharsetIsDocCharset_
         ? StorageObjectSpec::bctf
       : StorageObjectSpec::encoding);
    if (isNdata_) {
      sos.codingSystem = em_->codingSystemKit_->identityInputCodingSystem();
      sos.codingSystemType = StorageObjectSpec::special;
    }
    else if (defSpec_) {
      sos.codingSystem = defSpec_->codingSystem;
      sos.codingSystemName = defSpec_->codingSystemName;
      sos.codingSystemType = defSpec_->codingSystemType;
    }
  }
}

Boolean FSIParser::parseAttribute(StringC &token, Boolean &gotValue,
                          StringC &value)
{
  Xchar c = get();
  while (isS(c))
    c = get();
  if (c == -1) {
    return 0;
  }
  token.resize(0);
  if (matchChar(c, '>'))
    return 1;
  if (matchChar(c, '"') || matchChar(c, '\'') || matchChar(c, '='))
    return 0;
  for (;;) {
    token += c;
    c = get();
    if (c == -1)
      return 0;
    if (isS(c))
      break;
    if (matchChar(c, '>') || matchChar(c, '='))
      break;
  }
  while (isS(c))
    c = get();
  if (c == -1)
    return 0;
  if (!matchChar(c, '=')) {
    unget();
    gotValue = 0;
    return 1;
  }
  gotValue = 1;
  value.resize(0);

  c = get();
  while (isS(c))
    c = get();
  if (matchChar(c, '>') || matchChar(c, '='))
    return 0;
  if (matchChar(c, '"') || matchChar(c, '\'')) {
    Char lit = c;
    for (;;) {
      Xchar c = get();
      if (c == lit)
      break;
      if (c == -1)
      return 0;
      if (matchChar(c, '\n'))
      ;
      else if (matchChar(c, '\r') || matchChar(c, '\t'))
      value += idCharset_.execToDesc(' ');
      else
      value += c;
    }
    uncharref(value);
  }
  else {
    for (;;) {
      value += c;
      c = get();
      if (c == -1)
      return 0;
      if (isS(c))
      break;
      if (matchChar(c, '>') || matchChar(c, '=')) {
      unget();
      break;
      }
    }
  }
  return 1;
}

void FSIParser::uncharref(StringC &str)
{
  size_t j = 0;
  size_t i = 0;
  while (i < str.size()) {
    int digit;
    if (matchChar(str[i], '&')
      && i + 2 < str.size()
      && matchChar(str[i + 1], '#')
      && convertDigit(str[i + 2], digit)) {
      unsigned long val = digit;
      i += 3;
      while (i < str.size() && convertDigit(str[i], digit)) {
      val = val*10 + digit;
      i++;
      }
      str[j++] = val;
      if (i < str.size() && matchChar(str[i], ';'))
      i++;
    }
    else
      str[j++] = str[i++];
  }
  str.resize(j);
}

Boolean FSIParser::convertId(StringC &id, Xchar smcrd,
                       const StorageManager *sm)
{
  const CharsetInfo *smCharset = sm->idCharset();
  StringC newId;
  size_t i = 0;
  while (i < id.size()) {
    UnivChar univ;
    WideChar wide;
    ISet<WideChar> wideSet;
    int digit;
    if (Xchar(id[i]) == smcrd
      && i + 1 < id.size()
      && convertDigit(id[i + 1], digit)) {
      i += 2;
      Char val = digit;
      while (i < id.size() && convertDigit(id[i], digit)) {
      val = val*10 + digit;
      i++;
      }
      newId += val;
      if (i < id.size() && matchChar(id[i], ';'))
      i++;
    }
    else if (smCharset) {
      if (!idCharset_.descToUniv(id[i++], univ))
      return 0;
      if (univ == UnivCharsetDesc::rs)
      ;
      else if (univ == UnivCharsetDesc::re && sm->reString())
      newId += *sm->reString();
      else if (smCharset->univToDesc(univ, wide, wideSet) != 1
             || wide > charMax)
      return 0;               // FIXME give error
      else
      newId += Char(wide);
    }
    else
      newId += id[i++];
  }
  newId.swap(id);
  return 1;
}

ParsedSystemId:: ParsedSystemId()
{
}

static
void unparseSoi(const StringC &soi,
            const CharsetInfo *idCharset,
            const CharsetInfo &resultCharset,
            StringC &result,
            Boolean &needSmcrd);

void ParsedSystemId::unparse(const CharsetInfo &resultCharset,
                       Boolean isNdata,
                       StringC &result) const
{
  size_t len = size();
  result.resize(0);
  size_t i;
  for (i = 0; i < maps.size(); i++) {
    if (maps[i].type == Map::catalogDocument)
      result += resultCharset.execToDesc("<CATALOG>");
    else if (maps[i].type == Map::catalogPublic) {
      result += resultCharset.execToDesc("<CATALOG PUBLIC=\"");
      result += maps[i].publicId;
      result += resultCharset.execToDesc("\">");
    }
  }
  for (i = 0; i < len; i++) {
    const StorageObjectSpec &sos = (*this)[i];
    result += resultCharset.execToDesc('<');
    result += resultCharset.execToDesc(sos.storageManager->type());
    if (sos.notrack)
      result += resultCharset.execToDesc(" NOTRACK");
    if (!sos.search)
      result += resultCharset.execToDesc(" NOSEARCH");
    if (!sos.storageManager->requiresCr()
        && sos.records != (isNdata ? StorageObjectSpec::asis : StorageObjectSpec::find)) {
      result += resultCharset.execToDesc(' ');
      result += resultCharset.execToDesc(FSIParser::recordsName(sos.records));
    }
    if (sos.codingSystemName && sos.codingSystemType != StorageObjectSpec::special) {
      if (!sos.zapEof)
      result += resultCharset.execToDesc(" NOZAPEOF");
      result += resultCharset.execToDesc(sos.codingSystemType == StorageObjectSpec::bctf
                               ? " BCTF="
                               : " ENCODING=");
      result += resultCharset.execToDesc(sos.codingSystemName);
    }
    Boolean needSmcrd = 0;
    if (sos.baseId.size() != 0) {
      result += resultCharset.execToDesc(" SOIBASE='");
      unparseSoi(sos.baseId,
             sos.storageManager->idCharset(),
             resultCharset,
             result,
             needSmcrd);
      result += resultCharset.execToDesc('\'');
    }
    StringC tem;
    unparseSoi(sos.specId,
             sos.storageManager->idCharset(),
             resultCharset,
             tem,
             needSmcrd);
    if (needSmcrd)
      result += resultCharset.execToDesc(" SMCRD='^'");
    result += resultCharset.execToDesc('>');
    result += tem;
  }
}

void unparseSoi(const StringC &soi,
            const CharsetInfo *idCharset,
            const CharsetInfo &resultCharset,
            StringC &result,
            Boolean &needSmcrd)
{
  if (!idCharset) {
    for (size_t i = 0; i < soi.size(); i++) {
      char buf[32];
      sprintf(buf, "&#%lu;", (unsigned long)soi[i]);
      result += resultCharset.execToDesc(buf);
    }
    return;
  }
  for (size_t i = 0; i < soi.size(); i++) {
    UnivChar univ;
    WideChar to;
    ISet<WideChar> toSet;
    if (!idCharset->descToUniv(soi[i], univ)
      || univ >= 127
      || univ < 32
      || univ == 36           // $
      || univ == 96           // `
#ifndef MSDOS_FILENAMES
      || univ == 92           // backslash
#endif
      || univ == 94           // ^
      || resultCharset.univToDesc(univ, to, toSet) != 1) {
      needSmcrd = 1;
      char buf[32];
      sprintf(buf, "^%lu;", (unsigned long)soi[i]);
      result += resultCharset.execToDesc(buf);
    }
    else {
      switch (univ) {
      case 34:          // double quote
      case 35:          // #
      case 39:          // apostrophe
      case 60:          // <
      {
        char buf[32];
        sprintf(buf, "&#%lu;", (unsigned long)to);
        result += resultCharset.execToDesc(buf);
      }
      break;
      default:
      result += Char(to);
      break;
      }
    }
  }
}

#ifdef SP_NAMESPACE
}
#endif

Generated by  Doxygen 1.6.0   Back to index