@TechReport{ertl89,
  author = 	 "M. Anton Ertl",
  title = 	 "{GRAY -- ein Generator f\"ur rekursiv absteigende Ybersetzer}",
  institution =  "Institut f{\"u}r Computersprachen, Technische
		 Universit{\"a}t Wien",
  year = 	 "1989",
  type =	 "Praktikum",
  url =		 "http://www.complang.tuwien.ac.at/papers/ertl89.ps.gz",
  note =	 "In German"
}

@InProceedings{ertl&krall91,
  author = 	"M. Anton Ertl and Andreas Krall",
  title = 	"Optimal Instruction Scheduling Using Constraint Logic
		 Programming",
  crossref =	"plilp91",
  pages = 	"75--86",
  url =		"http://www.complang.tuwien.ac.at/papers/ertl%26krall91.ps.gz",
  abstract = 	"Instruction scheduling is essential for the efficient
		 operation of today's and tomorrow's processors. It
		 can be stated easily and declaratively as a logic
		 program. Consistency techniques embedded in logic
		 programming makes the efficient solution of this
		 problem possible. This paper describes an instruction
		 scheduling program for the Motorola 88100 RISC
		 processor, which minimizes the number of pipeline
		 stalls. The scheduler is written in the constraint
		 logic programming language ARISTO and uses a
		 declarative model of the processor to generate an
		 optimal schedule.  The model uses lists of domain
		 variables to represent the pipeline stages and
		 describes the dependencies between instructions by
		 constraints in order to ensure correct scheduling.
		 Although optimal instruction scheduling is
		 NP-complete, the scheduler can be applied to real
		 programs because of the speed gained through
		 consistency techniques."
}

@Proceedings{plilp91,
  key =		"PLILP'91",
  title = 	"Programming Language Implementation and Logic
		 Programming (PLILP)",
  booktitle = 	"Programming Language Implementation and Logic
		 Programming (PLILP)",
  year = 	"1991",
  OPTeditor = 	"Jan Ma{\l}uszy\'nski and Martin Wirsing",
  publisher = 	"Springer LNCS~528",
  address = 	"Passau"
}

@unpublished{gl91a,
author=    {Robert Gl\"{u}ck},
title=     {The Generation of Program Specializers},
note=	   {New York University Computation and Program Analysis Day},
month=     {June},
year=      {1991}
}

@unpublished{gl91b,
author=    {Robert Gl\"{u}ck},
title=     {The Generation of Programs by self-applicable Metaprograms},
note=      {Akademie der Wissenschaften, Moskau},
month=     {September},
year=      {1991}
}

@inproceedings{gl91c,
author=    {Robert Gl\"{u}ck},
title=	   {Metasystem Transition in the Machine and its Application to Knowledge Systems},
booktitle= {Workbook of the 1st Principia Cybernetica Workshop}, 
editor=	   {Francis Heylighen},
address=   {Free University of Brussels}, 
month=     {Juli},
year=      {1991}
} 

@inproceedings{gl91d,
author=    {Robert Gl\"{u}ck},
title=     {Towards Multiple Self-Application},
booktitle= {Symposium on Partial Evaluation and Semantics-Based Program Manipulation, PEPM`91},
publisher= {SIGPLAN Notices and IFIP, ACM Press},
volume=    {26},
number=    {9},
address=   {New Haven, USA},
year=      {1991},
month=     {September}
} 


@inproceedings{kral&wass91,
author=    {Andreas Krall and D. Wassileff},
title=     {CCOM - A Compiler for Commercial C},
booktitle= {UNIX Forum VI}, 
publisher= {Schriftenreihe der OCG},
editor=    {e.~K\"{u}hn},
year=      {1991}
}

@inproceedings{kpe91a,
author=    {E. K\"{u}hn and F. Puntigam and A.K. Elmagarmid},
title=     {Multidatabase Transaction and Query Processing in Logic},
booktitle= {Database Transaction Models for Advanced Applications},
editor=    {A.~K.~Elmagarmid}, 
publisher= {Morgan Kaufmann Publishers}, 
year=      {1991},
url=       "http://www.complang.tuwien.ac.at/franz/papers/chapter9.ps.Z"
}

@inproceedings{kpe91b,
author=    {E. K\"{u}hn and F. Puntigam and A.K. Elmagarmid},
title=     {Transaction Specification in Multidatabase Systems Based on 
	    Parallel Logic Programming},
booktitle= {First International Workshop on Interoperability in Multidatabase Systems},
publisher= {IEEE}, 
address=   {Kyoto, Japan}, 
month=     {April},
year=      {1991},
url=       "http://www.complang.tuwien.ac.at/franz/papers/ride91.ps.gz"
}


@unpublished{razek,
author=    {G. Razek},
title=     {Object Oriented Software Development},
note=      {Medical Informatics Europe MIE 91, Vienna}, 
month=     {August},
year=      {1991}
}

@InProceedings{ertl&krall92,
  author = 	"M. Anton Ertl and Andreas Krall",
  title = 	"Instruction Scheduling for Complex Pipelines",
  crossref =	"cc92",
  pages = 	"207--218",
  url =		"http://www.complang.tuwien.ac.at/papers/ertl%26krall92.ps.gz",
  abstract =	"We designed heuristics for applying the list
		 scheduling algorithm to processors with complex
		 pipelines. On these processors the pipeline can stall
		 due to resource contention (structural hazards) in
		 addition to the usual data hazards. Conventional
		 heuristics consider only data hazards. Our heuristics
		 reduce structural hazards, too. Code with much
		 instruction-level parallelism is optimized to avoid
		 structural hazards, sequential code is scheduled for
		 reducing data hazards. Embedded in a postpass
		 strategy our scheduler removes 60\%--100\% of the
		 removable stalls from conventionally scheduled code."
}
@Proceedings{cc92,
  booktitle = 	"Compiler Construction (CC'92)",
  title = 	"Compiler Construction (CC'92)",
  key =		"CC'92",
  year = 	"1992",
  publisher = 	"Springer LNCS~641",
  address = 	"Paderborn",
}

@InProceedings{ertl92,
  author = 	"M. Anton Ertl",
  title = 	"A New Approach to {Forth} Native Code Generation",
  crossref =	"euroforth92",
  pages = 	"73--78",
  url =		"http://www.complang.tuwien.ac.at/papers/ertl92.ps.gz",
  abstract =    "RAFTS is a framework for applying state of the art
		 compiler technology to the compilation of Forth.  The
		 heart of RAFTS is a simple method for transforming
		 Forth programs into data flow graphs and static
		 single assignment form. Standard code generation and
		 optimization techniques can be applied to programs in
		 these forms. Specifically, RAFTS uses interprocedural
		 register allocation to eliminate nearly all stack
		 accesses. It also removes nearly all stack pointer
		 updates. Inlining and tail call optimization reduce
		 the call overhead. RAFTS compiles all of Forth,
		 including difficult cases like unknown stack heights,
		 {\tt PICK}, {\tt ROLL} and {\tt EXECUTE}. And last,
		 but not least, RAFTS is designed for interactive
		 Forth systems; it is not restricted to batch
		 compilers"
}

@Proceedings{euroforth92,
  key = 	"EuroForth~'92",
  title = 	"EuroForth~'92",
  booktitle = 	"EuroForth~'92",
  year = 	"1992",
  organization = 	"MicroProcessor Engineering",
  address = 	"Southampton, England"
}


@InProceedings{krall&berger92,
  author =      "Andreas Krall and Thomas Berger",
  title =       "Fast {P}rolog with a {VAM1p} based {P}rolog Compiler",
  crossref =    "plilp92",
  pages =       "245--259",
  abstract =    "The VAM (Vienna Abstract Machine) is a Prolog machine
		 developed at the TU Wien. In contrast to the standard
		 implementation technique (Warren Abstract Machine) an
		 inference in the VAM is performed by unifying the head
		 and goal arguments in a single undivided step. The
		 interpreter based VAM2p implements unification by combining
		 head and goal instructions during run time whereas the
		 compiler based VAM1p combines the instructions during
		 compile time. This enables compilers based on the VAM1p
		 to make extensive optimizations like variable elimination,
		 instruction elimination, extended clause indexing and
		 fast last-call optimizations. This results in fast execution
		 and small stack sizes. Our prototype compiler is implemented
		 in Prolog and emits intermediate VAM1p code. This intermediate
		 code is translated to native code instructions for the
		 MIPS R3000 processor. The native code instructions are
		 reordered using a Prolog based instruction scheduler and
		 assembled using the MIPS assembler. The resulting programs
		 achieve 3.1 MLips on a DecStation 5000/200."
}

@Proceedings{plilp92,
  booktitle =   "Programming Language Implementation and Logic
		 Programming (PLILP'92)",
  year =        "1992",
  publisher =   "Springer LNCS~631",
  address =     "Leuven",
}

@article{Ra92a,
  author =      "G.~Razek",
  title  =      "Combining Objects and Relations",
  journal =     "ACM Sigplan Notices",
  year =        "1992",
  volume =      "27",
  number =      "12",
  month =       "December",
  pages =       "66--70",
  abstract =    "Whereas the object oriented paradigm is best suited for
         the modelling of self contained objects it is not suited for the
         modelling of relations between objects. For the logic oriented
         paradigm on the other hand the converse is true. This pater
         shows that objects and their relations can be modelled in an
         integrated way by combining both paradigms."
}


@article{Ra92b,
	author =	{G.~Razek and others},
	title =		{{C}omputerunterst\"{u}tzte {P}atientenverwaltung: {I}ndividuelle
			    {EDV-}unterst\"{u}tzte {D}okumentation in der {G}yn\"{a}kologie},
	journal =	{Gyn\"{a}kologisch-geburtshilfliche Rundschau},
	year = 		"1992",
	volume =	"32",
	pages =		"151--154"
}

@article{Ra92c,
	author =	{G.~Razek and others},
	title =		{{C}omputerunterst\"{u}tzte {P}atientenverwaltung: {V}on
			    abteilungsspezifischen {I}nsell\"{o}sungen zur wienweit vernetzten
			    {G}esamtl\"{o}sung},
	journal =	"Wiener klinische Wochenschrift",
	year =		"1992",
	volume =	"104",
	number =	"23",
	pages =		"725--727"
}

@inproceedings{Ne92,
author={U.~Neumerkel},
title={Pruning Infinite Failure Branches in Programs with Occur-Check},
booktitle={Proceedings of Logic Programming and Automated Reasoning},
publisher={Springer LNCS 624},
address = {St.Petersburg, Russia},
month = {July},
year = {1992}
}


@inproceedings{KuPu92a,
author={Eva K\"{u}hn and Franz Puntigam},
title={Embedding {MSQL} Queries into a Logic Based Transaction Processing Framework},
booktitle= {Proc.~of the 2nd International Workshop on Research Issues on Data
		Engineering: Transaction and Query Processing (RIDE-TQP)},
publisher={IEEE}, 
address={Phoenix, Arizona}, 
month={February},
year={1992},
url=       "http://www.complang.tuwien.ac.at/franz/papers/ride92.ps.Z"
}

@inproceedings{KuPu92b,	
author={Eva K\"{u}hn and Franz Puntigam},
title={An Execution Model for Distributed Database Transactions and 
		Its Implementation in {VPL}},
booktitle={Proc.~of the International Conference on Extending Database Technology,
		EDBT-92}, 
publisher={Springer LNCS 580},
address={Vienna},
month={March},
year={1992},
url=       "http://www.complang.tuwien.ac.at/franz/papers/edbt92.ps.Z"
}

@inproceedings{KuPu92c,	
author={Eva K\"{u}hn and Franz Puntigam},
title={Reliable Communication in {VPL}},
booktitle={Proceedings of the Parallel Architectures and Languages 
		Europe (PARLE-92)},
publisher={Springer LNCS 605}, 
address={Paris}, 
month={June},
year={1992},
url=       "http://www.complang.tuwien.ac.at/franz/papers/parle92.ps.gz"
}

@inproceedings{Gl92a,
author={Robert Gl\"{u}ck}, 
title={Projections for Knowledge Based Systems},
booktitle={Cybernetics and Systems 92},
editor={Robert Trappl},
publisher={World Scientific Publishing},
address={Vienna},
year={1992}
}

@inproceedings{Gl92b,
author={Robert Gl\"{u}ck},
title={The Requirement of Identical Variety},
booktitle={Proceedings of the 13th International Congress on Cybernetics},
address={Namur, Belgium},
year={1992}
} 

@unpublished{Gl92c,
author={Robert Gl\"{u}ck},
title={Yet Another Future Problem of Specialization},
note={University of Copenhagen},
month={April},
year={1992}
}





@article{BuEK93,
author	=	"O. Bukhres and A. Elmagarmid and {e}. K{\"{u}}hn",
title	=	"Implementations of the Flex Transaction Model",
journal	=	"Data Engineering Bulletin",
year	=	"1993",
volume	=	"",
number	=	"",
pages	=	"",
month	=	"June",
note	=	"Special Issue on Workflow Applications",
annote	=	""
}


@InProceedings{BuKP92,
author	=	"O. Bukhres and {e}. K{\"{u}}hn and F. Puntigam",
title	=	"A Language Multidatabase System Communication Protocol",
booktitle=	"Proceedings of the 9th International Conference on Data Engineering",
year	=	1993,
editor	=	"",
pages	=	"",
organization=	"",
publisher=	"IEEE Computer Society",
address	=	"",
month	=	"April",
note	=	"",
annote	=	"",
url=       "http://www.complang.tuwien.ac.at/franz/papers/de93.ps.Z"
}


@InProceedings{ertl93,
  author = 	 "M. Anton Ertl",
  title = 	 "A Portable {Forth} Engine",
  booktitle =	 "EuroFORTH '93 conference proceedings",
  year =	 "1993",
  address =	 "Mari\'ansk\'e L\'azn\`e (Marienbad)",
  url =		 "http://www.complang.tuwien.ac.at/papers/ertl93.ps.gz",
  abstract = 	 "The Forth engine discussed in this paper is written
		  in GNU C, which provides several extensions that are
		  important for Forth implementation. The indirect
		  threaded Forth engine is completely
		  machine-independent, direct threading requires a few
		  machine-specific lines for each machine. Using
		  a portable language like GNU C encourages producing
		  an engine with many primitives. In order to make the
		  development of primitives easier and less
		  error-prone, an automatic tool generates most of the
		  code for a Forth primitive from the stack effect
		  notation, even if the TOS is kept in a register. The
		  engine is combined with the parts of the system
		  written in Forth by loading a machine-independent
		  image file that contains the executable Forth code
		  in relocatable form."
}

@InProceedings{ertl&krall93,
  author = 	 "M. Anton Ertl and Andreas Krall",
  title = 	 "High Level Constraints over Finite Domains",
  crossref =	 "constraints93",
  pages =	 "65--76",
  url =		 "http://www.complang.tuwien.ac.at/papers/ertl%26krall93.ps.gz",
  abstract = 	 "Constraint logic programming languages that employ
		  consistency techniques have been used to solve many
		  combinatorial search problems. In solving such
		  problems, the built-in constraints often do not
		  suffice. Unfortunately, new constraints defined with
		  lookahead and forward declarations are often
		  inefficient. In this paper, we present an efficient
		  high-level constraint mechanism. High-level
		  constraints are ordinary predicates with an
		  additional constraint declaration. They offer
		  fine-grained control over the tradeoff between
		  pruning power and execution time and achieve huge
		  speedups over lookahead declarations."
}

@Proceedings{constraints93,
  title = 	 "Constraint Processing, International Workshop at CSAM '93",
  booktitle = 	 "Constraint Processing, International Workshop at CSAM '93",
  year = 	 "1993",
  key =		 "Constraint Processing",
  editor =	 "Manfred Meyer",
  address =	 "St. Petersburg",
  url =		 "ftp://ftp.cs.unh.edu/pub/csp/archive/papers/nato/DFKI-RR-93-39.ps.gz"
}

@unpublished{Gl93a,
author={Robert Gl\"{u}ck},
title={Generating Optimizing Specializers},
note={ESPRIT Semantique Workshop, Imperial College of Science and Technology,
      University of London},
month={July},
year={1993}
}

@unpublished{Gl93b,
author={Robert Gl\"{u}ck},
title={Information Propagation in Driving},
note={TOPPS Seminar, DIKU, Dept. of Computer Science, University of Copenhagen},
month={April},
year={1993}
}

@unpublished{Gl93c,
author={Robert Gl\"{u}ck},
title={Metasystem Transition Schemes},
note={Keldysh Institute of Applied Mathematics, Russian Academy of Sciences,Moscow},
month={February},
year={1993}
}
 
@inproceedings{Gl93,
author={Robert Gl\"{u}ck}, 
title={Occam's Razor in Metacomputation: the Notion of a Perfect Process Tree},
booktitle={3rd International Workshop on Static Analysis},
series ={LNCS 724},
publisher={Springer Verlag},
address={Padova (Italy)},
year={1993}
}


@INPROCEEDINGS{Kral93,
	AUTHOR      = {Andreas Krall},
	TITLE       = {Clause Indexing in {VAM} and {WAM} based Compilers},
	BOOKTITLE   = {Second International Workshop on Functional/Logic Programming},
	ORGANIZATION= {Ludwig-Maximilians-Universit\"at M\"unchen},
	SERIES      = {Bericht 9311},
	YEAR        = 1993}


@InProceedings{Kueh93b,
author	=	"{e}. K{\"{u}}hn",
title	=	"{F}ehlertolerantes {K}oordinieren verteilter autonomer {D}ienste",
booktitle=	"Proceedings of the Euro-ARCH'93 (European Informatics Congress Computing Systems Architecture)",
year	=	"1993",
editor	=	"",
pages	=	"",
organization=	"",
publisher=	"Springer-Verlag",
address	=	"Munich",
month	=	"October 18--19",
note	=	"",
annote	=	""
}

@InProceedings{Kueh93a,
author	=	"{e}. K{\"{u}}hn",
title	=	"Multidatabase Language Requirements",
booktitle=	"Proceedings of the 3rd International Workshop on
		 Research Interests in data Engineering,
		 Interoperability in Multidatabase Systems, RIDE-IMS-93",
year	=	"1993",
editor	=	"",
pages	=	"",
organization=	"",
publisher=	"IEEE Computer Society",
address	=	"",
month	=	"",
note	=	"",
annote	=	""
}

@unpublished{KuVa,
author = "{e}. K{\"{u}}hn",
title  = {{VPL}, the language and its implementation},
note   = {KFKI, Budapest},
month  = {November},
year   = {1993}
}


@article{KuPP93,
author	=	"{e}. K{\"{u}}hn and H. Pohlai and F. Puntigam",
title	=	"Concurrency and Backtracking in {VPL}",
journal	=	"Computer Languages",
year	=	"1993",
volume	=	"19",
number	=	"3",
pages	=	"",
month	=	"July",
note	=	"",
annote	=	"",
url=       "http://www.complang.tuwien.ac.at/franz/papers/complang93.ps.Z"
}

@unpublished{hung93:Neu,
title = {The binary {WAM}, a simplified Prolog engine},
author={U.~Neumerkel},
note = {KFKI, Budapest},
month = {November},
year =  1993
}

@inproceedings{TR:Neu:93xx1,
author={U.~Neumerkel},
title={Une transformation de programme bas\'{e}e sur la notion d'\'{e}quations entre termes},
booktitle={Secondes journ\'{e}es francophones sur la programmation en
logique (JFPL'93)},
address = {N\^{\i}mes-Avingnon, France},
year = "1993"
}


@inproceedings{LOPSTR93:Neumerkel,
title= "A Transformation Based on the Equality between Terms",
booktitle= "Logic Program Synthesis and Transformation, LOPSTR 1993",
author={U.~Neumerkel},
place = "Louvain-la-Neuve",
series = "Workshops in Computing",
publisher = "Springer-Verlag",
year = "1994"
}

@InProceedings{ambrosch+94,
  author = 	 "Wolfgang Ambrosch and M. Anton Ertl and Felix Beer
		  and Andreas Krall",
  title = 	 "Dependence-Conscious Global Register Allocation",
  booktitle = 	 "Programming Languages and System Architectures",
  year = 	 "1994",
  editor =	 "J{\"u}rg Gutknecht",
  publisher =	 "Springer LNCS~782",
  address =	 "{Z\"urich}",
  pages =	 "125--136",
  url =		 "http://www.complang.tuwien.ac.at/papers/ambrosch+94.ps.gz",
  abstract =	 "Register allocation and instruction scheduling are
		  antagonistic optimizations: Whichever is applied
		  first, it will impede the other. To solve this problem, we
		  propose dependence-conscious colouring, a register
		  allocation method that takes the dependence graph
		  used by the instruction scheduler into
		  consideration.  Dependence-conscious colouring
		  consists of two parts: First, the interference graph
		  is built by analysing the dependence graphs,
		  resulting in fewer interference edges and less
		  spilling than the conventional preordering approach.
		  Secondly, during colouring the register selection
		  keeps dependence paths short, ensuring good
		  scheduling. Dependence-conscious
		  colouring reduces the number of interference edges
		  by 7\%--24\% and antidependences by 46\%--100\%."
}

@InProceedings{BGZ94,
   Author="Baier, Romana and Gl{\"u}ck, Robert and Z{\"o}chling, Robert",
   Title="Partial evaluation of numerical programs in {Fortran}",
   BookTitle="ACM SIGPLAN Workshop on Partial Evaluation and Semantics-Based Program Manipulation",
   Address="Orlando, Florida",
   Pages="119-132",
   Year="1994"
}

@InBook{BuEK94a,
author	=	"O. Bukhres and A. Elmagarmid and {e}. K{\"{u}}hn",
title	=	"Object-Oriented Multidatabase Systems",
chapter	=	"Advanced Languages for Multidatabase Systems",
publisher=	"Prentice-Hall",
year	=	"1994",	
editor	=	"O. Bukhres and A. Elmagarmid"
}

@inproceedings{b94d, 
author = {Christine Eisenbeis and S. Lelait and A. Sawaya and Jian Wang},
title = {Parallelisation de boucles: constraintes de ressources et registres},
booktitle = {proceedings of Actes des 6es Rencontres Francophones du Parallelisme, RenPar'6},
year = 1994,
month = {June},
organization = {ENS Lyon, France},

}

@InProceedings{ertl94l,
  author = 	 "M. Anton Ertl",
  title = 	 "Automatic Scoping of Local Variables",
  booktitle = 	 "EuroForth~'94 Conference Proceedings",
  year = 	 "1994",
  address =	 "Winchester, UK",
  pages =	 "31--37",
  url =		 "http://www.complang.tuwien.ac.at/papers/ertl94l.ps.gz",
  abstract = 	 "In the process of lifting the restrictions on using
		  locals in Forth, an interesting problem poses
		  itself: What does it mean if a local is defined in a
		  control structure? Where is the local visible? Since
		  the user can create every possible control structure
		  in ANS Forth, the answer is not as simple as it may
		  seem. Ideally, the local is visible at a place if
		  the control flow {\em must} pass through the
		  definition of the local to reach this place. This
		  paper discusses locals in general, the visibility
		  problem, its solution, the consequences and the
		  implementation as well as related programming style
		  questions."
}

@InProceedings{ertl94sc,
  author = 	 "M. Anton Ertl",
  title = 	 "Stack Caching for Interpreters",
  booktitle = 	 "EuroForth~'94 Conference Proceedings",
  year = 	 "1994",
  address =	 "Winchester, UK",
  pages =	 "3--12",
  url =		 "http://www.complang.tuwien.ac.at/papers/ertl94sc.ps.gz",
  abstract = 	 "An interpreter for a virtual stack machine can spend
		  a significant part of its execution time fetching
		  values from and storing values to the stack. This
		  paper explores two methods to reduce this overhead
		  by caching top-of-stack values in registers. The
		  dynamic method is based on having one version of the
		  whole interpreter for every possible state of the
		  cache; the execution of a primitive usually changes
		  the state of the cache and the next primitive is
		  executed in the version corresponding to the new
		  state. In the static method a state machine that
		  keeps track of the cache state is added to the
		  compiler. Common primitives exist in versions for
		  several states, but it is not necessary to have a
		  version of every primitive for every cache state.
		  The compiler generates glue code, if necessary, and
		  compiles the version of the primitive appropriate
		  for the cache state. Stack manipulation primitives
		  are usually optimized away."
}

@InProceedings{ertl&krall94,
  author = 	"M. Anton Ertl and Andreas Krall",
  title = 	"Delayed Exceptions --- Speculative Execution of
		  Trapping Instructions",
  booktitle = 	 "Compiler Construction (CC '94)",
  year = 	 "1994",
  publisher =	 "Springer LNCS~786",
  address =	 "Edinburgh",
  month =	 "April",
  pages =	 "158--171",
  url =		 "http://www.complang.tuwien.ac.at/papers/ertl-krall94cc.ps.gz",
  abstract =	 "Superscalar processors, which execute basic blocks
		  sequentially, cannot use much instruction level
		  parallelism. Speculative execution has been proposed
		  to execute basic blocks in parallel. A pure software
		  approach suffers from low performance, because
		  exception-generating instructions cannot be executed
		  speculatively. We propose delayed exceptions, a
		  combination of hardware and compiler extensions that
		  can provide high performance and correct exception 
		  handling in compiler-based speculative execution.
		  Delayed exceptions exploit the fact that exceptions
		  are rare. The compiler assumes the typical case (no
		  exceptions), schedules the code accordingly, and
		  inserts run-time checks and fix-up code that ensure
		  correct execution when exceptions do happen."
}


@InProceedings{FKPS94a,
author	=	"A. Forst and {e}. K{\"{u}}hn and H. Pohlai and 
		 K. Schwarz",
title	=	"Logic Based and Imperative Coordination Languages",
booktitle=	"Seventh International Conference on Parallel and
		 Distributed Comuting Systems, PDCS'94",
year	=	"1994",
organization=	"ISCA, in cooperation with ACM, IEEE",
address	=	"Las Vegas, Nevada",
url = {http://www.complang.tuwien.ac.at/papers/forst-etal1994.ps.gz},
month	=	"October"
}


@unpublished{Gv94a,
title = {Generating Transformers for Deforestation and Driving},
author={R.~Gl{\"u}ck},
note = {Atlantique Workshop on Semantics-Based Program Manipulation, Portland, Oregon, USA},
month = {January},
year = 1994
}

@unpublished{Gv94b,
title = {Fortran Program Specialization},
author={R.~Gl{\"u}ck},
note = {IFIP WG 2.4 Meeting, Szeged, Hungary},
month = {May},
year = 1994
}

@unpublished{Gv94c,
title = {Deforestation and Supercompilation},
author={R.~Gl{\"u}ck},
note = {ESPRIT Semantique Workshop, Aarhus, Denmark},
month = {July},
year = 1994
}

@Article{G94:JFP,
   Author="Gl{\"u}ck, Robert",
   Title="On the generation of specializers",
   Journal="Journal of Functional Programming",
   Volume="4",
   Number="4",
   Pages="499-514",
   Year="1994"
}

@InProceedings{GJ94:IEEE,
   Author="Gl{\"u}ck, Robert and J{\o}rgensen, Jesper",
   Title="Generating optimizing specializers",
   BookTitle="1994 International Conference on Computer Languages",
   Address="Toulouse, France",
   Publisher="IEEE Computer Society Press",
   Pages="183-194",
   Year="1994"
}

@InProceedings{GJ94:SAS,
   Author="Gl{\"u}ck, Robert and J{\o}rgensen, Jesper",
   Title="Generating transformers for deforestation and supercompilation",
   BookTitle="Static Analysis. Proceedings",
   Editor="Le Charlier, B.",
   Series="Lecture Notes in Computer Science",
   Address="Namur, Belgium",
   Publisher="Springer-Verlag",
   Volume="864",
   Pages="432-448",
   Year="1994"
}

@InCollection{GK94:LMMC,
   Author="Gl{\"u}ck, Robert and Klimov, Andrei V.",
   Title="Metacomputation as a tool for formal linguistic modeling",
   BookTitle="Cybernetics and Systems '94",
   Editor="Trappl, R.",
   Publisher="World Scientific",
   Address="Singapore",
   Volume="2",
   Pages="1563-1570",
   Year="1994"
}

@InProceedings{GS94,
   Author="Gl{\"u}ck, Robert and S{\o}rensen, Morten Heine",
   Title="Partial deduction and driving are equivalent",
   BookTitle="Programming Language Implementation and Logic Programming. Proceedings",
   Editor="Hermenegildo, M. and Penjam, J.",
   Series="Lecture Notes in Computer Science",
   Address="Madrid, Spain",
   Publisher="Springer-Verlag",
   Volume="844",
   Pages="165-181",
   Year="1994"
}


@InProceedings{KKZG94,
   Author="Kleinrubatscher, Paul and Kriegshaber, Albert and Z{\"o}chling, Robert and Gl{\"u}ck, Robert",
   Title="Fortran program specialization",
   BookTitle="Semantikgest{\"u}tzte Analyse, Entwicklung und Generierung von Programmen. GI Workshop",
   Editor="Snelting, Gregor and Meyer, Uwe",
   Address="Schloss Rauischholzhausen, Germany",
   Publisher="Justus-Liebig-Universit{\"a}t Giessen",
   Pages="45-54",
   Year="1994"
}


@INPROCEEDINGS{Kral94a,
        AUTHOR      = {Andreas Krall},
        TITLE       = {Improving Semi-static Branch Prediction by Code Replication},
        BOOKTITLE   = {Conference on Programming Language Design and Implementation},
        ORGANIZATION= {ACM},
        SERIES      = {SIGPLAN},
        VOLUME      = {29(7)},
        PAGES       = {97--106},
        ADDRESS     = {Orlando},
url = "http://www.complang.tuwien.ac.at/papers/krall94-pldi/",
        YEAR        = 1994}

@INPROCEEDINGS{Kral94b,
        AUTHOR      = {Andreas Krall},
        TITLE       = {Implementation Techniques for {Prolog}},
        BOOKTITLE   = {10. Workshop Logische Programmierung},
        EDITOR      = {Norbert E. Fuchs},
        ORGANIZATION= {Universit\"at Z\"urich},
        SERIES      = {Bericht},
        PAGES       = {1--15},
        ADDRESS     = {Z\"urich},
        YEAR        = 1994}

@INPROCEEDINGS{KrBe94a,
        AUTHOR      = {Andreas Krall and Thomas Berger},
        TITLE       = {Incremental Flow Analysis},
        BOOKTITLE   = {3rd Workshop on Functional Logic Programming},
        EDITOR      = {Hendrik Lock},
        ORGANIZATION= {IBM Scientific Center Heidelberg},
        PAGES       = {15--19},
        ADDRESS     = {Schwarzenberg},
        YEAR        = 1994}

@INPROCEEDINGS{KrBe94b,
        AUTHOR      = {Andreas Krall and Thomas Berger},
        TITLE       = {A Progress Report on Incremental Global Compilation of {Prolog}},
        BOOKTITLE   = {International Logic Programming Symposium, Workshop on Implementation
                       Techniques for  Logic  Programming  Languages},
        EDITOR      = {Koen De Bosschere and Bart Demoen and Paul Tarau},
        ORGANIZATION= {ALP},
        PAGES       = {59--67},
        ADDRESS     = {Ithaca},
        YEAR        = 1994}


@InProceedings{Kueh94a,
author	=	"{e}. K{\"{u}}hn",
title	=	"Fault-Tolerance for Communicating Multidatabase Transactions",
booktitle=	"Proceedings of the 27th Hawaii International Conference on System Sciences (HICSS)",
year	=	"1994",
publisher=	"ACM, IEEE",
address	=	"Wailea, Maui, Hawaii",
month	=	"January"
}

@InProceedings{KuLP94,
author	=	"{e}. K{\"{u}}hn and W. Liu and H. Pohlai",
title	=	"Scheduling Transactions on Distributed Systems with the {VPL} 
		 Engine",
booktitle=	"In: Proceedings of the Second Biennial European Joint Conference 
		 on Engineering Systems Design, ESDA'94",
year	=	"1994",
publisher=	"The American Society of Mechanical Engineers (ASME)",
address	=	"London, England",
month	=	"July"
}

@article{KuPP94,
author	=	"{e}. K{\"{u}}hn and H. Pohlai and F. Puntigam",
title	=	"Communication and Transactions in {VPL}",
journal	=	"Computers and Artificial Intelligence",
year	=	"1994",
volume	=	"13",
number	=	"4",
url=       "http://www.complang.tuwien.ac.at/franz/papers/cai94.ps.Z"
}


@InProceedings{KuPS94a,
author	=	"{e}. K{\"{u}}hn and H. Pohlai and K. Schwarz",
title	=	"Efficient Coordination Support with Transputer Systems",
booktitle=	"2nd Austrian-Hungarian Workshop on Transputer Applications",
year	=	"1994",
organization=	"KFKI-Reports Series",
address	=	"Budapest, Hungary",
month	=	"September"
}

@InProceedings{KuST94,
author	=	"{e}. K{\"{u}}hn and K. Schwarz and T. Tschernko",
title	=	"A Language Based Multidatabase System",
booktitle=	"Proceedings of the ACM-SIGMOD 1994 Conference",
year	=	"1994",
address	=	"Minnesota",
month	=	"May 24--27",
note	=	"prototype system description"
}

@unpublished{Nv94,
title ="A Program Transformation Based on Equality",
author="Ulrich Neumerkel",
note= "IBM T.J.Watson Research Centre",
month= "September",
year=1994
}


@inproceedings{Punt94a,
author = "Franz Puntigam",
title = "Transactions on Shared Data: A Coordination Model",
booktitle = "Hawaii International Conference on System Sciences",
month = "January",
year = 1994,
url=       "http://www.complang.tuwien.ac.at/franz/papers/hicss94.ps.gz"
}

@inproceedings{Punt94b,
author = "Franz Puntigam",
title = "Type Specialization and Coordination",
booktitle = "Workshop on Coordination Models and Languages for Parallelism and Distribution",
month = "July",
year = 1994,
url=       "http://www.complang.tuwien.ac.at/franz/papers/ecoop-ws94.ps.gz"
}

@inproceedings{Punt94c,
author = "Franz Puntigam",
title = "Type Specialization for Object-Oriented Coordination",
booktitle = "Joint Conference on Information Sciences",
month = "November",
year = 1994,
url=       "http://www.complang.tuwien.ac.at/franz/papers/jcis94.ps.gz"
}

@InProceedings{SGJ94,
   Author="S{\o}rensen, Morten Heine and Gl{\"u}ck, Robert and Jones, Neil D.",
   Title="Towards unifying partial evaluation, deforestation, supercompilation, and GPC",
   BookTitle="Programming Languages and Systems - ESOP '94. Proceedings",
   Editor="Sannella, Donald",
   Series="Lecture Notes in Computer Science",
   Address="Edinburgh, Scotland",
   Publisher="Springer-Verlag",
   Volume="788",
   Pages="485-500",
   Year="1994"
}

@inproceedings{b94c, 
         author = {Bogong Su and Stanley Habib and Youfeng Wu and Jian
                  Wang and Wei Zhao},
         title = {A study of pointer aliasing for software pipelining
                  using run-time disambiguation},
         booktitle = {proceedings of the 27th International Symposium and Workshop on
         Microprogramming and Microarchitecture (MICRO-27)},
         year = 1994,
         month = {December},
         organization = {ACM and IEEE},

}

@InProceedings{TN94,
  author         = "P. Tarau and U. Neumerkel",
  title          = "A Novel Term Compression Scheme and Data Representation in the {BinWAM}",
  booktitle      = "Programming Language Implementation and Logic
                    Programming, International Symposium, PLILP'94",
  year           = 1994,
  series         = "Lecture Notes in Computer Science",
  publisher      = "Springer-Verlag"
}


@article{a94b, 
         author = {Jian Wang and Christine Eisenbeis and Martin Jourdan and Bogong Su},
         title = {{Decomposed Software Pipelining}: A New Perspective and
         A New Approach},
         journal = {International Journal of Parallel
         Programming},
         year = 1994,
         volume = 22,
         number = 3,
         pages = {357--379},
}

@article{a94a, 
         author = {Jian Wang and Christine Eisenbeis and Bogong Su},
         title = {Using Timed Petri Net to Model Instruction-Level Loop Scheduling
         with Resource Constraints},
         journal = {Journal of Computer Science
         and Technology},
         year = 1994,
         volume = 9,
         number = 2,
         pages = {128--143},
}

@inproceedings{b94b, 
         author = {Jian Wang and Andreas Krall and M. Anton Ertl and Christine Eisenbeis},
         title = {Software pipelining with register allocation and spilling},
         booktitle = {proceedings of the 27th International Symposium and Workshop on
         Microprogramming and Microarchitecture (MICRO-27)},
         year = 1994,
         month = {December},
         organization = {ACM and IEEE},

}

@inproceedings{b94a, 
         author = {Jian Wang and Andreas Krall and M. Anton Ertl and Christine Eisenbeis},
         title = {{Trace Software Pipelining}: A Novel Technique for
         Parallelization of Loops with Branches},
         booktitle = {proceedings of  International Conference 
         on Parallel Architectures and Compilation Techniques (PACT'94)},
         year = 1994,
         month = {August},
         organization = {IFIP},
         editor = {Michel Cosnard and G.R. Gao and G.M. Silberman},
         publisher = {Elsevier Science B. V. (North-Holland)}, 
}

@article{KELB1995,
author  =       "{e}. K{\"u}hn and A. K.Elmagarmid and Y. Leu and N. Boudriga",
title   =       "A Parallel Logic Language for Transaction Specification in 
                 Multidatabase Systems",
journal =       "Journal of Systems Integration",
year    =       "1995",
volume  =       "5",
pages   =       "219--252",
publisher =     "Kluwer Academic Publishers",
}

@article{BuKu1995,
author  =       "O. Bukhres and {e}. K{\"{u}}hn",
title   =       "Highly Available and Reliable Communication Protocols for 
                 Heterogeneous Systems",
journal =       "Information Sciences",
year    =       "1995",
volume  =       "3",
number  =       "1",
pages   =       "1--40",
month   =       "January",
}

@InProceedings{ertl95pldi,
  author =       "M. Anton Ertl",
  title =        "Stack Caching for Interpreters",
  crossref =     "sigplan95",
  pages =	 "315--327",
  url =		 "http://www.complang.tuwien.ac.at/papers/ertl95pldi.ps.gz",
  abstract =     "An interpreter can spend a significant part of its
                  execution time on arguments of virtual machine
                  instructions. This paper explores two methods to
                  reduce this overhead for virtual stack machines by
                  caching top-of-stack values in (real machine)
                  registers. The {\em dynamic method} is based on
                  having, for every possible state of the cache, one
                  specialized version of the whole interpreter; the
                  execution of an instruction usually changes the
                  state of the cache and the next instruction is
                  executed in the version corresponding to the new
                  state. In the {\em static method} a state machine
                  that keeps track of the cache state is added to the
                  compiler. Common instructions exist in specialized
                  versions for several states, but it is not necessary
                  to have a version of every instruction for every
                  cache state. Stack manipulation instructions are
                  optimized away."
}

@Proceedings{sigplan95,
  booktitle =    "SIGPLAN Conference on Programming Language
                  Design and Implementation (PLDI'95)",
  title =        "SIGPLAN Conference on Programming Language
                  Design and Implementation (PLDI'95)",
  year =         "1995",
  key =          "PLDI '95"
}

@INPROCEEDINGS{ErKr95,
        AUTHOR      = {M. Anton Ertl and Andreas Krall},
        TITLE       = {High Level Constraints over Finite Domains},
        BOOKTITLE   = {Constraint Processing},
        EDITOR      = {Manfred Meyer},
        SERIES      = {LNCS 923},
        PUBLISHER   = {Springer},
        PAGES       = {51--66},
        ADDRESS     = {Heidelberg},
  url =		 "http://www.complang.tuwien.ac.at/papers/ertl%26krall93.ps.gz",
        YEAR        = 1995}

@InProceedings{ertl&maierhofer95,
  author =       {M. Anton Ertl and Martin Maierhofer},
  title =        {Translating {Forth} to Efficient {C}},
  crossref =     {euroforth95},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl%26maierhofer95.ps.gz},
  url2 =         {http://www.complang.tuwien.ac.at/papers/ertl%26maierhofer95.pdf},
  abstract =     {An automatic translator can translate Forth into C
                  code which the current generation of optimizing C
                  compilers compiles to efficient machine code.  I.e.,
                  the resulting code keeps stack items in registers
                  and rarely updates the stack pointer. This paper
                  presents a simple translation method that produces
                  efficient C code, describes an implementation of the
                  method and presents results achieved with this
                  implementation: The translated code is 4.5--7.5
                  times faster than Gforth (the fastest measured
                  interpretive system), 1.3--3 times faster than
                  BigForth 386 (a native code compiler), and smaller
                  than Gforth's threaded code.}
}

@Proceedings{euroforth95,
  title =        "EuroForth~'95 Conference Proceedings",
  booktitle =    "EuroForth~'95 Conference Proceedings",
  year =         "1995",
  key =          "EuroForth '95",
  address =      "Schloss Dagstuhl, Germany",
}

@article{FoKB1995,
   author =    {A. Forst and {e.} K{\"{u}}hn and O. Bukhres},
   title =     {General Purpose Work Flow Languages},
   journal =   {Distributed and Parallel Databases},
   volume =    3,
   pages =     {187--218},
   year =      1995,
   number =    2,
   month =     {April},
   editor =    {A. K. Elmagarmid},
   note =      {ISSN: 0926-8782},
   publisher = {Kluwer Academic Publishers},
   url = {http://www.complang.tuwien.ac.at/papers/forst-etal1995.ps.gz}
}

@inproceedings{gl951,
  author = {R. Gl{\"u}ck and J. J{\o}rgensen},
  title = {Efficient multi-level generating extensions for program specialization},	 
  booktitle = {Programming Languages: Implementations, Logics and Programs}, 	 
  year = 1995,	 
  editor = {S. D. Swierstra and M. Hermenegildo},
  series = {Lecture Notes in Computer Science 982},	
  publisher = {Springer-Verlag},	
  address = {Utrecht, The Netherlands},	
  pages = {259--278}
}	

@inproceedings{gl952,
  author = {R. Gl{\"u}ck and A. V. Klimov},
  title = {Reduction of language hierarchies},
  booktitle = {Proceedings of the 14th International Congress on Cybernetics},
  year = 1995,
  publisher = {International Association for Cybernetics},
  address = {Namur, Belgium},
}

@inproceedings{gl953,
  author = {R. Gl{\"u}ck and R. Nakashige and R. Z{\"o}chling},
  title = {Binding-time analysis applied to mathematical algorithms},
  booktitle = {System Modelling and Optimization},
  year = 1995,
  editor = {J. Dolezal and J. Fidler},
  publisher = {Chapman and Hall},
  address = {Prague, Czech Republic},
  pages = {137--146},
}

@inproceedings{gl954,
  author = {M. H. S{\o}rensen and R. Gl{\"u}ck},
  title = {An algorithm of generalization in positive supercompilation},
  booktitle = {Logic Programming: Proceedings of the 1995 International Symposium},
  year = 1995,
  editor = {J. W. Lloyd},
  publisher = {MIT Press},
  address = {Portland, Oregon},
  pages = {465--479},
}

@inproceedings{gl955,
  author = {P. Thiemann and R. Gl{\"u}ck},
  title = {The generation of a higher-order online partial evaluator},
  booktitle = {Fuji International Workshop on Functional and Logic Programming},
  year = 1995,
  editor = {M. Takeichi and T. Ida},
  publisher = {World Scientific},
  address = {Susono, Japan},
}

@INPROCEEDINGS{KrBe95a,
        AUTHOR      = {Andreas Krall and Thomas Berger},
        TITLE       = {Incremental Global Compilation of {Prolog} with the {Vienna Abstract Machine}},
        BOOKTITLE   = {Twelfth International Conference on Logic Programming},
        EDITOR      = {Leon Sterling},
        PUBLISHER   = {MIT Press},
        PAGES       = {333--347},
        ADDRESS     = {Tokyo},
        YEAR        = 1995}

@INPROCEEDINGS{KrBe95b,
        AUTHOR      = {Andreas Krall and Thomas Berger},
        TITLE       = {The {VAM$_{\rm AI}$} - an Abstract machine for Incremental Global Dataflow Analysis of {Prolog}},
        BOOKTITLE   = {ICLP'95 Post-Conference Workshop on Abstract Interpretation of Logic Languages},
        EDITOR      = {Maria Garcia de la Banda and Gerda Janssens and Peter Stuckey},
        PUBLISHER   = {Science University of Tokyo},
        PAGES       = {80--91},
        ADDRESS     = {Tokyo},
        YEAR        = 1995}

@PROCEEDINGS{KrGe95,
        TITLE       = {{11. Workshop Logische Programmierung}},
        EDITOR      = {Andreas Krall and Ulrich Geske},
        SERIES      = {GMD-Studien Nr. 270},
        ADDRESS     = {Wien},
        YEAR        = 1995}

@UNPUBLISHED{Kral95,
        AUTHOR      = {Andreas Krall},
        TITLE       = {Fast Abstract Interpretation of Prolog with an Abstract
                       Machine},
        NOTE        = {4th Workshop on Functional Logic Programming, Schwarzenberg},
        YEAR        = 1995}

@UNPUBLISHED{Neu95-2,
        AUTHOR      = {Ulrich Neumerkel},
        TITLE       = {{Tutorial: Teaching beginners Prolog, How to teach Prolog}},
        NOTE        = {International Conference on the Pratical
                       Application of Prolog, Paris},
        YEAR        = 1995}

@INPROCEEDINGS{Neu95-3,
        AUTHOR      = {Ulrich Neumerkel},
        TITLE       = {Interprozedurale {Registerallokation} durch
                       {Quelltexttransformationen}},
        BOOKTITLE   = {{11. Workshop Logische Programmierung}},
        EDITOR      = {Andreas Krall and Ulrich Geske},
        SERIES      = {GMD-Studien Nr. 270},
        ADDRESS     = {Wien},
        YEAR        = 1995}

@UNPUBLISHED{Neu95-4,
        AUTHOR      = {Ulrich Neumerkel},
        TITLE       = {Transforming binary logic programs using Continuation
                       Prolog},
        NOTE        = {INTAS Workshop Efficient Symbolic Computing, Namur},
        YEAR        = 1995}

@INPROCEEDINGS{Neu95-5,
        AUTHOR      = {Ulrich Neumerkel},
        BOOKTITLE   = {ILPS 95 Workshop on Sequential Implementation Technologies for
                       Logic Programming Languages},
        EDITOR      = {Zoltan Somogy and Lee Naish and Fergus Henderson and
                       Thomas Conway},
        TITLE       = {Continuation Prolog: A new intermediary
                       language for WAM and BinWAM code generation},
        ADDRESS     = {Portland},
        YEAR        = 1995}


@InProceedings{PuntOOPMOC95,
  author =       "Franz Puntigam",
  title =        "Flexible Types for a Concurrent Model",
  booktitle =    "Proceedings of the Workshop on Object-Oriented
                  Programming and Models of Concurrency",
  year =         1995,
  address =      "Torino",
  month =        "June",
  url =		 "http://www.complang.tuwien.ac.at/franz/papers/oopmoc95.ps.gz"
}

@InProceedings{PuntWLP95,
  author =       "Franz Puntigam",
  title =        {Typen f{\"u}r logikbasierte aktive {Objekte}},
  booktitle =    "Workshop Logische Programmierung",
  year =         1995,
  month =        "Sept",
  address =      "Vienna",
  series =       "{GMD}-Studien",
  number =       270,
  url =		 "http://www.complang.tuwien.ac.at/franz/papers/wlp95.ps.gz"
}

@InProceedings{PuntFORTE95,
  author =       "Franz Puntigam",
  title =        "Type Specifications with Processes",
  booktitle =    "Proceedings {FORTE}'95",
  year =         1995,
  address =      "Montreal",
  publisher =    "IFIP",
  month =        "October",
  url =		 "http://www.complang.tuwien.ac.at/franz/papers/forte95.ps.gz"
}

@Article{WaKr95a,
  author = 	 {Jian Wang and Andreas Krall and M. Anton Ertl},
  title = 	 {Trace Software Pipelining},
  journal = 	 {Journal of Computer Science and Technology},
  year = 	 {1995},
  volume =	 {10},
  number =	 {6},
  pages =	 {481--490}
}

@INPROCEEDINGS{WaKr95b,
        AUTHOR      = {Jian Wang and Andreas Krall and M. Anton Ertl},
        TITLE       = {Decomposed Software Pipelining with reduced Register Requirement},
        BOOKTITLE   = {International Conference on Parallel Architectures and
                       Compilation Techniques},
        EDITOR      = {Jean-Luc Gaudiot},
        ORGANIZATION= {IFIP,ACM,IEEE},
        PUBLISHER   = {North-Holland},
        PAGES       = {277--280},
        ADDRESS     = {Limassol},
        YEAR        = 1995}

@INPROCEEDINGS{WaKr95c,
        AUTHOR      = {Jian Wang and Andreas Krall and M. Anton Ertl},
        TITLE       = {Register Requirement for Exploiting Loops' Maximum
                       Instruction-Level Parallelism},
        BOOKTITLE   = {The Fourth International Conference for Young Computer Scientists},
        EDITOR      = {Shou Bai and Jianping Fan and Xiaozhong Li},
        PUBLISHER   = {Peking University Press},
        PAGES       = {70--75},
        ADDRESS     = {Beijing},
        YEAR        = 1995}

@INPROCEEDINGS{Yann95,
        AUTHOR          = {Marinos Yannikos},
        TITLE           = {Partielle Maschinencodegenerierung f{\"u}r bin{\"a}res {Prolog}},
        BOOKTITLE       = {{11. Workshop Logische Programmierung}},
        EDITOR      = {Andreas Krall and Ulrich Geske},
        SERIES      = {GMD-Studien Nr. 270},
        ADDRESS     = {Wien},
        YEAR        = 1995}

@InProceedings{ertl&krall96,
  author = 	 "M. Anton Ertl and Andreas Krall",
  title = 	 "Removing Anti Dependences by Repairing",
  booktitle =    "Compiler Construction (CC'96)",
  crossref =	 "cc96proceedings",
  pages =	 "33--43",
  url = 	 "http://www.complang.tuwien.ac.at/papers/ertl-krall96cc.ps.gz",
  abstract = 	 "Anti dependences (write-after-read dependences)
                  constrain the reordering of instructions and limit
                  the effectiveness of instruction scheduling and
                  software pipelining techniques for superscalar and
                  VLIW processors. Repairing solves this problem: If
                  the definition of a variable is moved before a
                  previous use of that variable, compiler-generated
                  repair code reconstructs the value that the
                  definition destroyed.  Repairing features several
                  potential advantages over register renaming, another
                  technique for removing anti dependences: less
                  register pressure, less loop unrolling and fewer
                  move instructions."
}

@Proceedings{cc96proceedings,
  title = 	 "Compiler Construction (CC'96)",
  year = 	 "1996",
  key =		 "CC'96",
  editor =	 "Tibor Gyim\'{o}thy",
  volume =	 "1060",
  series =	 "LNCS",
  publisher =	 "Springer",
  address =	 "Link{\"o}ping"
}

@PhdThesis{ertl96diss,
  author = 	 {M. Anton Ertl},
  title = 	 {Implementation of Stack-Based Languages on Register
                  Machines},
  school = 	 {{Technische  Universit\"{a}t Wien}},
  year = 	 {1996},
  address =	 {Austria},
  url = 	 {http://www.complang.tuwien.ac.at/papers/ertl96diss.ps.gz},
  abstract = 	 {Languages with programmer-visible stacks
                  (stack-based languages) are used widely, as
                  intermediate languages (e.g., JavaVM, FCode), and as
                  languages for human programmers (e.g., Forth,
                  PostScript).  However, the prevalent computer
                  architecture is the register machine. This poses the
                  problem of efficiently implementing stack-based
                  languages on register machines. A straight-forward
                  implementation of the stack consists of a memory
                  area that contains the stack items, and a pointer to
                  the top-of-stack item.\par The basic optimizations
                  explored in this thesis are: Caching the
                  frequently-accessed top-of-stack items in registers
                  reduces stack access overhead, and combining
                  stack-pointer updates eliminates most of them.\par
                  This thesis examines these optimizations in the
                  context of three basic implementation techniques:
                  \begin{itemize} \item For (virtual machine)
                  \textbf{interpreters}, I regard the stack cache in
                  the registers as finite state machine, where the
                  execution of a virtual machine instruction performs
                  a state transition; there are specialized
                  implementations of the virtual machine instructions
                  for each state. \item My \textbf{native-code
                  compilation} technique transforms the programs into
                  standard compiler data structures; then
                  state-of-the-art compiler technology can be applied
                  for optimization and code generation. In particular,
                  stack items are represented by pseudo-registers,
                  which register allocation will (usually) put into
                  machine registers; stack pointer updates are
                  executed symbolically, i.e., at compile time. \item
                  For \textbf{translation to C}, I emphasize
                  simplicity; the optimizer of the C compiler takes
                  care of the complex problems. The translator just
                  has to represent stack items as local C variables,
                  and the C compiler will keep them in registers. As
                  with native-code compilers, (usually) no stack
                  pointer updates at run-time are
                  necessary. \end{itemize} For interpreters, the
                  optimizations eliminate about two real machine
                  instructions per virtual machine instruction,
                  resulting in speedups of 17\%--31\% (for Forth on a
                  DecStation~3100). The techniques presented here for
                  native-code compilation and translation to C achieve
                  a speedup factor of 1.3--3 over traditional native
                  code compilers and more than 3 over a
                  straight-forward translator to C (for Forth on a
                  486DX2/66).}
}

@InProceedings{ertl&pirker96,
  author = 	 {M. Anton Ertl and Christian Pirker},
  title = 	 {{RAFTS} for Basic Blocks: A progress report on {Forth}
                  Native Code Compilation},
  booktitle = 	 {EuroForth '96 Conference Proceedings},
  year =	 {1996},
  address =	 {St. Petersburg, Russia},
  url =		 {http://www.complang.tuwien.ac.at/papers/ertl%26pirker96.ps.gz},
  abstract = 	 {RAFTS is a framework for applying state of the art
                  compiler technology to the compilation of
                  stack-based languages, in particular Forth. This
                  paper describes our experiences and findings in
                  implementing the basic block (straight-line code)
                  part of RAFTS; it also presents empirical results:
                  the basic block part is the simplest part of RAFTS,
                  but without the other parts it is hardly better than
                  existing techniques, because in Forth basic blocks
                  are very short (there are few basic blocks with more
                  than two useful instructions).}
}

@InProceedings{PuntPOOMA96,
  author = 	 "Franz Puntigam",
  title = 	 "Practical Expressive Types for Active Objects",
  booktitle = 	 "Proceedings of the Conference on Parallel Object-Oriented
                  Methods and Applications (POOMA'96)",
  year =	 1996,
  address =	 "Santa Fe, New Mexico",
  month =	 feb,
  url =		 "http://www.complang.tuwien.ac.at/franz/papers/pooma96.ps.gz"
}

@InProceedings{PuntFMOODS96,
  author = 	 "Franz Puntigam",
  title = 	 "Types for Active Objects based on Trace Semantics",
  booktitle = 	 "Proceedings of the Workshop on Formal Methods
		  for Open Object-based Distributed Systems (FMOODS'96)",
  editor =	 "Elie Najm et al.",
  year =	 1996,
  publisher =	 "Chapman {\&} Hall",
  organization = "{IFIP} {WG} 6.1",
  address =	 "Paris, France",
  month =	 mar,
  url =		 "http://www.complang.tuwien.ac.at/franz/papers/fmoods96.ps.gz"
}

@InProceedings{PuntEuroPar96,
  author = 	 "Franz Puntigam",
  title = 	 "Synchronization Expressed in Types of Communication Channels",
  booktitle = 	 "Proceedings of Euro-Par'96",
  editor =	 "Luc Boug{\'e} et al.",
  number =	 1023,
  series =	 "Lecture Notes in Computer Science",
  year =	 1996,
  publisher =	 "Springer-Verlag",
  address =	 "Lyon, France",
  month =	 aug,
  url =		 "http://www.complang.tuwien.ac.at/franz/papers/europar96.ps.gz"
}

@Unpublished{PuntGenf96,
  author = 	 "Franz Puntigam",
  title = 	 "Process Types",
  note = 	 "{V}ortrag an der {U}niversit{\"a}t {G}enf, {S}chweiz",
  year = 	 "1996",
  month = 	 jun
}

@InProceedings{Kueh1996,
  author =       {{e.} K{\"{u}}hn},
  title =        {A Distributed and Recoverable Linda Implementation with {Prolog\&Co}},
  booktitle =    {Austrian-Hungarian Workshop on Distributed and Parallel
                  Systems (DAPSYS'96)},
  year =         1996,
  address =      {Miskolc, Hungary},
  month =        {October 2-4}
}

@InProceedings{FoKu1996,
  author =       {A. Forst and {e.} K{\"{u}}hn},
  title =        {Implementing Cooperative Software with High-Level 
                  Communication Packages},
  booktitle =    {Eight IEEE Symposium on Parallel and Distributed Processing},
  year =         1996,
  address =      {New Orleans, Louisiana},
  url = "http://www.complang.tuwien.ac.at/papers/forst-kuehn1996.ps.gz",
  month =        {October}
}

@ARTICLE{Krall96,
	AUTHOR      = {Andreas Krall},
	TITLE       = {The {Vienna Abstract Machine}},
	JOURNAL     = {Journal of Logic Programming},
	PUBLISHER   = {Elsevier},
	VOLUME      = {29(1-3)},
	PAGES       = {85--106},
	YEAR        = 1996}

@UNPUBLISHED{Krall96talka,
	AUTHOR      = {Andreas Krall},
	TITLE       = {Improving Branch Alignment and Branch Prediction by Code Replication},
	NOTE        = {Universit\'e de Gen\`eve},
	MONTH       = {February},
	YEAR        = 1996}

@UNPUBLISHED{Krall96talkb,
	AUTHOR      = {Andreas Krall},
	TITLE       = {Improving Branch Alignment and Branch Prediction by Code Replication},
	NOTE        = {INRIA Rocquencourt},
	MONTH       = {June},
	YEAR        = 1996}

@INPROCEEDINGS{Neu96-1,
        AUTHOR      = {Ulrich Neumerkel},
        BOOKTITLE   = {Joint International Conference and Symposium on Logic Programming JICSLP'96},
        EDITOR      = {Michael Maher},
        TITLE       = {Interprocedural register allocation for the WAM based on source-to-source transformations},
        ADDRESS     = {Bonn},
        PUBLISHER   = {MIT Press},
        YEAR        = 1996}

@INPROCEEDINGS{Neu96-2,
        AUTHOR      = {Ulrich Neumerkel},
        BOOKTITLE   = {Proceedings of the Poster Sesion at JICSLP'96},
        EDITOR      = {Norbert Fuchs and Ulrich Geske},
        TITLE       = {GUPU: A Prolog course environment and its programming methodology},
        ADDRESS     = {Bonn},
        SERIES      = {GMD-Studien},
	NUMBER      = 296,	
        YEAR        = 1996}

@UNPUBLISHED{Neu96-3,
        AUTHOR      = {Ulrich Neumerkel},
        TITLE       = {Guiding Heuristics for transformation based
interprocedural register allocation in Continuation Prolog},
        NOTE        = {INTAS Workshop Efficient Symbolic Computing, St.~Petersburg},
        YEAR        = 1996}

@MastersThesis{maierhofer97,
  author =       {Martin Maierhofer},
  title =        {Erzeugung optimierten Codes f\"{u}r Stackmaschinen},
  school =       {{Technische  Universit\"{a}t Wien}},
  type =         {Diplomarbeit},
  year =         {1997},
  address =      {Austria},
  url =          {http://www.complang.tuwien.ac.at/Diplomarbeiten/maierhofer97.ps.gz},
  abstract =     {The success of the Java programming language and the 
                  underlying stack architecture (the JavaVM) recently 
                  have caused renewed interest in stack architectures. 
                  New and special techniques are required to provide
                  support for the efficient execution of Algol-like 
                  high level languages on stack machines. An optimizing 
                  compiler should be able to eliminate dispensable 
                  accesses to main memory when loading and storing local 
                  variables by temporarily keeping a copy of their value
                  on the stack. This technique, however, is only 
                  meaningful for machines that can handle 
                  stackmanipulations faster than accesses to main 
                  memory. \par This thesis presents two solutions for 
                  the problem and compares the results that can be 
                  achieved for two stack architectures (one of them is 
                  the JavaVM). Both techniques work on a ``local'' level, 
                  meaning that each basic block is considered separately. 
                  Phil Koopman's ``stack scheduling'' performs well in
                  cooperation with a simple instruction scheduling 
                  strategy (a depth first postorder traversal of the 
                  dependency graph is used). The second technique 
                  (``{\scshape Dag} scheduling'') reorders the 
                  instructions (i.e. it schedules the instructions) in 
                  order to minimize accesses to local variables and 
                  leads to optimal code. \par The efficiency of Koopman's 
                  algorithm can be assessed with respect to the results 
                  achieved by {\scshape Dag} scheduling: stack scheduling 
                  leads to results that come quite near the optimum. 
                  Accesses to variables can be reduced from around 40\% 
                  (in code produced by the Java compiler \texttt{javac}) 
                  to some 30\% (after optimization) of the total 
                  instructions in JavaVM code. Instructions for 
                  stackmanipulation, however, increase from about 5\% to 
                  up to 15\% of the total.},
  note =         {In German}
}

@MastersThesis{ertl90,
  author =      {M. Anton Ertl},
  title =       {{Coroutining und Constraints in der Logik-Programmierung}},
  school =      {{Technische  Universit\"{a}t Wien}},
  type =        {Diplomarbeit},
  year =        {1990},
  address =     {Austria},
  url =		{http://www.complang.tuwien.ac.at/Diplomarbeiten/ertl90.ps.gz},
  note =	{In German},
  abstract =    {Logic Programming is distinguished by the declarative
                  programming style it encourages. Solutions to search
                  problems are specified easily. Some extensions to
                  Prolog are necessary to apply these advantages to
                  real-world problems.\par {\bf Coroutining} provides
                  for data driven goal execution. Using Coroutining it
                  is easier to separate logic and control and to use a
                  declarative programming style.\par {\bf Constraint
                  Logic Programming} liberates unification from its
                  restriction to syntactic equality and extends it to
                  relations on arbitrary computation domains. It also
                  enables the integration of {\bf consistency
                  techniques} in logic programming. Combinatorial
                  problems are solved efficiently using consistency
                  techniques, since they prune the search tree early
                  and provide information for the application of
                  heuristics.\par Coroutining and consistency
                  techniques have been embedded in a Prolog compiler
                  based on the Warren Abstract Machine. The
                  application of these extensions to other Prolog
                  implementations is described.\par The application of
                  the resulting extended logic programming languages
                  is demonstrated using examples from artificial
                  intelligence, operations research, and systems
                  programming.}
}


@MastersThesis{ambrosch93,
  author =      {Wolfgang Ambrosch},
  title =       {{Vergleich von Registerallokationsalgorithmen}},
  school =      {{Technische  Universit\"{a}t Wien}},
  type =        {Diplomarbeit},
  year =        {1993},
  address =     {Austria},
  url =		{http://www.complang.tuwien.ac.at/Diplomarbeiten/ambrosch93.ps.gz},
  note =	{In German},
  abstract =    {Register allocation is one of the most important
                  optimizations in modern compilers.  Most of the
                  algorithms used today are based on the work of
                  Gregory Chaitin who was the first to implement a
                  graph coloring register allocator in 1981. This
                  thesis concentrates on register allocation via graph
                  coloring.\par After explaining the fundamental ideas
                  we present a basic framework for register
                  allocation. We analyze existing algorithms and show
                  how their parts can be integrated in and combined
                  with the help of our framework. Important but often
                  neglected details as the retargeting of register
                  allocators, the handling of different classes of
                  registers, or the effects of different subprogram
                  linkage conventions are covered in a separate
                  chapter.\par Experimental results on the
                  effectiveness of the described methods for a large
                  suite of test programs are given. Compile-time
                  characteristics, register usage and the behavior in
                  the case of spilling were some of the issues we in
                  vestigated. An exact description of the papers our
                  work was based on completes the thesis.}
}

@MastersThesis{pirker95,
  author =      {Christian Pirker},
  title =       {{{\"U}bersetzung von Forth in Maschinensprache}},
  school =      {{Technische  Universit\"{a}t Wien}},
  type =        {Diplomarbeit},
  year =        {1995},
  address =     {Austria},
  url =		{http://www.complang.tuwien.ac.at/Diplomarbeiten/pirker95.ps.gz},
  note =	{In German},
  abstract =    {Forth is an extensible and interactive language. It
                provides two programmer-visible stacks (data and
                returnstack). The supplied instructions (words)
                manipulate data on the stacks. The efficiency of the
                stack access and control flow determine mainly the
                performance of Forth implementations.\par This thesis
                builds a compiler that generates native code for
                {\em MIPS RISC processors}. The compiler translates
                Forth programs into native code using state of the
                art compiler technology. The code is directly
                executable on the processor.\par The compiler
                generates a {\bf data flow graph} for each basic
                block of the program. Then simple {\bf instruction
                selection}, {\bf instruction scheduling} and {\bf
                register allocation} algorithms produce the native
                code. The algorithms try to reduce the stack
                operations and eliminate unneccesary stack pointer
                updates.\par The native code compiler is written in
                Forth and can compile itself. The compiler is
                integrated into the interpreter. Therefore it also
                handles interpreter words.\par Forth programs
                compiled by this compiler run about 13 to 196 \%
                faster compared to interpreted programs. Currently
                compiling takes 220 \% longer than compiling into
                interpreting code.}
}

@InProceedings{ertl97,
  author = 	 {M. Anton Ertl},
  title = 	 {{GRAY -- ein Generator f{\"u}r rekursiv absteigende Ybersetzer}},
  crossref =	 {forth-tagung97},
  url =		 {http://www.complang.tuwien.ac.at/papers/ertl97.ps.gz},
  note =	 {In German},
  abstract = 	 {Der Parser-Generator Gray \"ubersetzt Grammatiken in
                  ausf\"uhrbaren Forth-Code f\"ur einen Parser. Als
                  Besonderheit ist dabei die problemlose Kombination
                  semantischen Aktionen und Erweiterungen der BNF zu
                  nennen, die sich aus der Verwendung des Stacks zur
                  Kommunikation zwischen den semantischen Aktionen
                  ergibt. Dieser Artikel beschreibt den Entwurf und
                  die Verwendung von Gray.}
}

@InProceedings{maierhofer&ertl97,
  author = 	 {Martin Maierhofer and M. Anton Ertl},
  title = 	 {Optimizing Stack Code},
  crossref =	 {forth-tagung97},
  url = 	 {http://www.complang.tuwien.ac.at/papers/maierhofer%26ertl97.ps.gz},
  abstract = 	 {With interest in stack machines recently growing
                  (e.g. the JavaVM architecture used by the Java
                  programming language), support for the efficient
                  execution of Algol-like high level languages on this
                  class of hardware becomes an issue. Local variables
                  accesses in the source language should be translated
                  into stack accesses of the target machine (similar
                  to register allocation on register machines).\par In
                  this paper we explore such stack allocation
                  techniques for basic blocks. We evaluate Phil
                  Koopman's ``stack scheduling'' by adding an
                  instruction scheduler and comparing the result with
                  an optimal stack allocation and instruction
                  scheduling strategy. Stack scheduling in cooperation
                  with a depth first postorder instruction scheduling
                  produces results close to the optimum. The
                  optimizations discussed in this paper are only
                  useful for stack hardware that allows faster access
                  to the stack than to main memory.}
}

@Proceedings{forth-tagung97,
  title = 	 {Forth-Tagung},
  booktitle = 	 {Forth-Tagung},
  key =		 {Forth-Tagung 1997},
  year = 	 {1997},
  address =	 {Ludwigshafen}
}

@InProceedings{ertl&pirker97,
  author = 	 {M. Anton Ertl and Christian Pirker},
  title = 	 {The Structure of a {Forth} Native Code Compiler},
  booktitle = 	 {EuroForth '97 Conference Proceedings},
  year =	 {1997},
  address =	 {Oxford},
  pages =	 {107--116},
  url =		 {http://www.complang.tuwien.ac.at/papers/ertl%26pirker97.ps.gz},
  abstract = 	 {Writing a sophisticated Forth native code compiler
                  poses some tasks that are not discussed in compiler
                  text books. Some of these tasks arise from specific
                  language features, others from the requirement for
                  very fast compilation to maintain interactivity. In
                  this paper we describe some of the more interesting
                  data structures and algorithms used in the RAFTS
                  prototype.}
}

@TechReport{ertl&pirker98,
  author = 	 {M. Anton Ertl and Christian Pirker},
  title = 	 {Compilation of Stack-Based Languages ({Abschlu{\ss}bericht})},
  institution =  {Institut f{\"u}r Computersprachen, Technische
		 Universit{\"a}t Wien},
  year = 	 {1998},
  type =	 {Final report to {FWF} for research project {P11231}},
  url =		 {http://www.complang.tuwien.ac.at/papers/ertl%26pirker98.ps.gz},
  abstract = 	 {RAFTS is a framework for applying state of the art
                  compiler technology to the compilation of
                  stack-based languages like Forth and Postscript. The
                  special needs of stack-based languages are an
                  efficient stack representation, fast procedure
                  calls, and fast compilation. RAFTS addresses the
                  stack representation problem by allocating stack
                  items to registers such that most stack accesses in
                  the source program are register accesses in the
                  machine language program, and by eliminating most
                  stack pointer updates. To achieve fast calls, RAFTS
                  performs these optimizations interprocedurally and
                  also performs procedure inlining and tail call
                  optimization. Fast compilation is achieved by
                  selecting fast algorithms and implementing them
                  efficiently. Until now we have implemented the basic
                  block part of RAFTS and a part of the work necessary
                  for inlining and interprocedural optimizations.}
}

@Unpublished{ertl97ibm,
  author = 	 {M. Anton Ertl},
  title = 	 {Fast high-quality {JavaVM} compilation},
  note = 	 {Seminar given at IBM Watson Research Center,
                  Yorktown Heights},
  year =	 {1997},
  month =	 aug,
  abstract =	 {I will discuss how to apply the technology we
                  developed for our Forth native code compilation
                  project RAFTS to JavaVM just-in-time (JIT)
                  compilation, and describe parts of the CACAO JavaVM
                  just-in-time compiler. Overall, this gives an
                  outline how CACAO will probably look in the
                  future.\par For generating good code for register
                  machines from stack-based languages, we have to avoid
                  the overheads of the straightforward memory-based
                  stack representation; instead, we keep the stack
                  items in registers, and eliminate as many
                  stack-pointer updates as possible by combining
                  them.\par The basis of our native code generation
                  technique is a method for converting the stack-based
                  code for a basic block into a data dependence graph;
                  this allows us to apply standard code generation
                  techniques: we select code with tree parsing,
                  schedule the instructions with list scheduling, and
                  use a simple basic block register allocator; these
                  phases are merged into as few passes as possible to
                  achieve very fast compilation. The global
                  (intraprocedural) register allocation for the JavaVM
                  is performed by another simple register allocator
                  that assigns a register to each stack slot that is
                  alive at a basic block boundary, and one register to
                  each local variable slot.}
}

@InProceedings{kuehn97a,
author	=	"Thomas Gschwind and {eva} K{\"{u}}hn",
title	=	"Dynamic Replica Configuration Architectures in Distributed Intranets",
booktitle=	"Proceedings of the First European International Conference on 
                 Parallel and Distributed Systems (Euro-PDS'97)",
year	=	"1997",
address	=	"Barcelona",
month	=	"",
note	=	""
}

@InProceedings{kuehn97b,
author	=	"{eva} K{\"{u}}hn and Christian K{\"{u}}hn and Marcus Herzog",
title	=	"The Implementation of a Distributed Hypermedia Archive for Architectural
                 Design Precedents",
booktitle=	"Proceedings of the 15th ECAADE Conference",
year	=	"1997",
address	=	"Vienna",
month	=	"September",
note	=	""
}

@InProceedings{kuehn97c,
author	=	"{eva} K{\"{u}}hn and Georg Nozicka",
title	=	"Post-Client/Server Coordination Tools",
booktitle=	"Coordination Technology for Collaborative Applications",
year	=	"1997",
publisher=	"Springer Series Lecture Notes in Computer Science",
address	=	"",
month	=	"",
editor  =       "Wolfram Cohen and Gustaf Neumann",
note	=	""
}

@InProceedings{ertl&krall93deutsch,
  author = 	 "M. Anton Ertl and Andreas Krall",
  title = 	 "{Benutzerdefinierte Constraints}",
  booktitle =	 "9.~Workshop Logische Programmierung",
  pages =	 "18--22",
  year =	 "1993",
  address =	 "Hagen",
  series =	 "FernUniversit{\"a}t Hagen 146-10/1993",
  url=		 "http://www.complang.tuwien.ac.at/papers/ertl%26krall93deutsch.ps.gz",
  note =	 "In German",
  abstract =	 "{Um kombinatorische Suchprobleme zu l"osen, werden
		  Konsistenztechniken in Logik-Programmiersprachen
		  verwendet. Bei vielen dieser Probleme reichen
		  allerdings die vordefinierten Constraints nicht aus.
		  Leider sind Constraints, die mit lookahead- oder
		  forward-Deklarationen definiert werden, oft sehr
		  langsam. In dieser Arbeit stellen wir einen neuen
		  Ansatz f"ur benutzerdefinierte Constraints vor.
		  Benutzerdefinierte Constraints sind normale
		  Prolog-Pr"adikate mit einer zus"atzlichen
		  Constraint-Deklaration. Sie erm"oglichen eine genaue
		  Kontrolle "uber Laufzeit und Suchraumbeschr"ankung.
		  Damit lassen sich gewaltige Laufzeitverbesserungen
		  gegen"uber lookahead-De\-kla\-ra\-tionen erzielen.}"
}

@Article{ertl97objects,
  author = 	 {M. Anton Ertl},
  title = 	 {Yet Another {Forth} Objects Package},
  journal = 	 {Forth Dimensions},
  year = 	 {1997},
  volume =	 {19},
  number =	 {2},
  pages =	 {37--43},
  url=		 {http://www.complang.tuwien.ac.at/forth/objects/objects.html}
}

@Article{ertl97structs,
  author = 	 {M. Anton Ertl},
  title = 	 {Yet Another {Forth} Structures Package},
  journal = 	 {Forth Dimensions},
  year = 	 {1997},
  volume =	 {19},
  number =	 {3},
  pages =	 {13--16},
  url =		 {http://www.complang.tuwien.ac.at/forth/objects/structs.html}
}

@InProceedings{maierhofer&ertl98,
  author = 	 "Martin Maierhofer and M. Anton Ertl",
  title = 	 "Local Stack Allocation",
  crossref =	 "cc98",
  pages =	 "189--203",
  url =		 "http://www.complang.tuwien.ac.at/papers/maierhofer%26ertl98.ps.gz",
  abstract =	 "Considering the renewed interest in stack machines
		  (in particular, the Java Virtual Machine), efficient
		  execution of Algol-family languages on this class of
		  hardware becomes increasingly important. Local
		  variable accesses in the source language should be
		  translated into stack accesses on the target machine
		  (in analogy to register allocation on register
		  machines).\par In this paper we explore such stack
		  allocation techniques for basic blocks. We present
		  some improvements to Phil Koopman's \emph{stack
		  scheduling}, add an instruction scheduler and
		  compare the result with an optimal stack allocation
		  and instruction scheduling strategy. Stack
		  scheduling in cooperation with depth first postorder
		  instruction scheduling produces results close to the
		  optimum. The optimizations discussed in this paper
		  are profitable only for stack hardware where stack
		  manipulation operations are faster than local
		  variable accesses."
}

@Proceedings{cc98,
  title = 	 "Compiler Construction (CC'98)",
  booktitle = 	 "Compiler Construction (CC'98)",
  year = 	 "1998",
  key =		 "CC'98",
  editor =	 "Kai Koskimies",
  OPTvolume =	 "1383",
  OPTseries =	 "LNCS",
  publisher =	 "Springer LNCS~1383",
  address =	 "Lisbon"
}

@InProceedings{krall+98,
  author = 	 "Andreas Krall and Anton Ertl and Michael Gschwind",
  title = 	 "{JavaVM} Implementation: Compilers versus Hardware",
  crossref =	 "acac98",
  pages =	 "101--110",
  url =		 "http://www.complang.tuwien.ac.at/papers/krall+98.ps.gz",
  abstract =	 "The Java Virtual Machine (JavaVM) has contributed
		  greatly to Java's success because it provides a
		  common intermediate format which can be shared
		  across the Internet. Unfortunately, the JavaVM has
		  been optimized for an interpreted model, resulting
		  in inferior performance because its stack-based
		  execution model cannot exploit instruction-level
		  parallelism. The inherent serialization of the stack
		  execution model can be addressed either by using
		  compilation techniques or by hardware. In this
		  article, we review the different JavaVM
		  implementation methods based on our experiences with
		  the implementation of the CACAO just-in-time
		  compiler.  For comparison, we have also investigated
		  different hardware architectures for the direct
		  implementation of the JavaVM."
}

@Proceedings{acac98,
  title = 	 "Computer Architecture 98 (ACAC '98)",
  booktitle = 	 "Computer Architecture 98 (ACAC '98)",
  year = 	 "1998",
  key =		 "ACAC '98",
  editor =	 "John Morris",
  volume =	 "20",
  number =	 "4",
  series =	 "Australian Computer Science Communications",
  publisher =	 "Springer",
  address =	 "Perth"
}

@InProceedings{PuntJMLC97,
  author = 	 "Franz Puntigam",
  title = 	 "Types that Reflect Changes of Object Usability",
  booktitle = 	 "Proceedings of the Joint Modular Languages Conference (JMLC'97)",
  year = 	 1997,
  month =	 mar,
  number =	 1204,
  series =	 "Lecture Notes in Computer Science",
  publisher =	 "Springer-Verlag",
  address =	 "{L}inz, {A}ustria",
  url =		 "http://www.complang.tuwien.ac.at/franz/papers/jmlc97.ps.gz"
}

@InProceedings{PePuECOOP-WS97,
  author = 	 "Christof Peter and Franz Puntigam",
  title = 	 "Static Type Checking and Deadlock Prevention in Systems Based on Asynchronous Message Passing",
  booktitle = 	 "Proceedings of the ECOOP Workshop on Models, Formalisms and Methods for Distributed Object-Oriented Computing",
  year =	 1997,
  address =	 "Jyv{\"a}skyl{\"a}, Finland",
  month =	 jun,
  url =		 "http://www.complang.tuwien.ac.at/franz/papers/ecoop-ws97.ps.gz"
}

@InProceedings{PuntECOOP97,
  author = 	 "Franz Puntigam",
  title = 	 "Coordination Requirements Expressed in Types for Active Objects",
  booktitle = 	 "Proceedings {ECOOP} '97",
  editor =	 "Mehmet Aksit and Satoshi Matsuoka",
  series =	 "Lecture Notes in Computer Science",
  year =	 1997,
  number =	 1241,
  publisher =	 "Springer-Verlag",
  address =	 "{Jyv\"askyl\"a}, Finland",
  month =	 jun,
  url =		 "http://www.complang.tuwien.ac.at/franz/papers/ecoop97.ps.gz"
}



@INPROCEEDINGS{KrVi97,
	AUTHOR      = {Andreas Krall and Jan Vitek},
	TITLE       = {On Extending {Java}},
	BOOKTITLE   = {Joint Modular Languages Conference ({JMLC'97})},
	EDITOR      = {Hanspeter M\"ossenb\"ock},
	SERIES      = {LNCS 1204},
	PUBLISHER   = {Springer},
	PAGES       = {321--335},
	ADDRESS     = {Linz},
url = {http://www.complang.tuwien.ac.at/papers/jmlc_97_krall_vitek.ps.gz},
	YEAR        = 1997}

@INPROCEEDINGS{KrViHo97,
	AUTHOR      = {Andreas Krall and Jan Vitek and Nigel Horspool},
	TITLE       = {Near Optimal Hierarchical Encoding of Types},
	BOOKTITLE   = {11th European Conference on Object Oriented Programming ({ECOOP'97})},
	EDITOR      = {Mehmet Aksit and Satoshi Matsuoka},
	SERIES      = {LNCS 1241},
	PUBLISHER   = {Springer},
	PAGES       = {128--145},
	ADDRESS     = {Finland},
	url = {http://www.complang.tuwien.ac.at/papers/ecoop_97_krall_vitek_horspool.ps.gz},
	YEAR        = 1997}

@INPROCEEDINGS{KrGr97,
	AUTHOR      = {Andreas Krall and Reinhard Grafl},
	TITLE       = {{CACAO} -- A 64 bit {JavaVM} Just-in-Time Compiler},
	BOOKTITLE   = {PPoPP'97 Workshop on Java for Science and Engineering Computation},
	EDITOR      = {Geoffrey C.~Fox and Wei Li},
	ORGANIZATION= {ACM},
	MONTH       = {June},
	ADDRESS     = {Las Vegas},
	YEAR        = 1997}

@INPROCEEDINGS{ViHoKr97,
	AUTHOR      = {Jan Vitek and Nigel Horspool and Andreas Krall},
	TITLE       = {Efficient Type Inclusion Tests},
	BOOKTITLE   = {Conference on Object Oriented Programming Systems,
	               Languages \& Applications ({OOPSLA'97})},
	EDITOR      = {Toby Bloom},
	ORGANIZATION= {ACM},
	MONTH       = {October},
	PAGES       = {142--157},
	ADDRESS     = {Atlanta},
url = "http://www.complang.tuwien.ac.at/papers/oopsla_97_vitek-horspool-krall.ps.gz",
	YEAR        = 1997}

@ARTICLE{KrGr97b,
	AUTHOR      = {Andreas Krall and Reinhard Grafl},
	TITLE       = {{CACAO} -- A 64 bit {JavaVM} Just-in-Time Compiler},
	JOURNAL     = {Concurrency: Practice and Experience},
	PUBLISHER   = {John Wiley \& Sons},
	VOLUME      = {9},
	NUMBER      = {11},
	PAGES       = {1017--1030},
	YEAR        = 1997}


@INPROCEEDINGS{Krall97talka,
	AUTHOR      = {Andreas Krall},
	TITLE       = {The {VAM$_{\rm 2\&1P}$} - combining the advantages of {VAM$_{\rm 2P}$}
	               and {VAM$_{\rm 1P}$}},
	BOOKTITLE   = {5th Workshop on Functional Logic Programming},
	ADDRESS     = {Schwarzenberg},
	YEAR        = 1997}

@UNPUBLISHED{Krall97talkb,
	AUTHOR      = {Andreas Krall},
	TITLE       = {{Effiziente Implementierung der Java Virtuellen Maschine}},
	NOTE        = {Universit\"at Salzburg},
	MONTH       = {October},
	YEAR        = 1997}

@UNPUBLISHED{Krall97talkc,
	AUTHOR      = {Andreas Krall},
	TITLE       = {{Effiziente Implementierung der Java Virtuellen Maschine}},
	NOTE        = {Universit\"at Passau},
	MONTH       = {November},
	YEAR        = 1997}

@UNPUBLISHED{Krall97talkd,
	AUTHOR      = {Andreas Krall},
	TITLE       = {Efficient Implementation of the {JavaVM}},
	NOTE        = {Universit\'e de Gen\`eve},
	MONTH       = {December},
	YEAR        = 1997}

@UNPUBLISHED{Krall97talke,
	AUTHOR      = {Andreas Krall},
	TITLE       = {{Effiziente Implementierung der Java Virtuellen Maschine}},
	NOTE        = {Universit\"at Klagenfurt},
	MONTH       = {December},
	YEAR        = 1997}

@UNPUBLISHED{Krall98talka,
	AUTHOR      = {Andreas Krall},
	TITLE       = {Efficient Implementation of the {JavaVM}},
	NOTE        = {Thomas Watson Research Center, Yorktown Heights},
	MONTH       = {March},
	YEAR        = 1998}

@UNPUBLISHED{Krall98talkb,
	AUTHOR      = {Andreas Krall},
	TITLE       = {{Effiziente Implementierung der Java Virtuellen Maschine}},
	NOTE        = {Universit\"at Linz},
	MONTH       = {April},
	YEAR        = 1998}

@UNPUBLISHED{Krall98talkc,
	AUTHOR      = {Andreas Krall},
	TITLE       = {Efficient Implementation of the {JavaVM}},
	NOTE        = {Universit\"at Wien},
	MONTH       = {May},
	YEAR        = 1998}

@UNPUBLISHED{Krall98talkd,
	AUTHOR      = {Andreas Krall},
	TITLE       = {{Effiziente Implementierung der Java Virtuellen Maschine}},
	NOTE        = {Universit\"at Chemnitz},
	MONTH       = {Juli},
	YEAR        = 1998}


@INPROCEEDINGS{KrPr98a,
	AUTHOR      = {Andreas Krall and Mark Probst},
	TITLE       = {Monitors and Exceptions: How to implement {Java} efficiently},
	BOOKTITLE   = {ACM 1998 Workshop on Java for High-Performance Computing},
	EDITOR      = {Siamak Hassanzadeh and Klaus Schauser},
	ORGANIZATION= {ACM},
	MONTH       = {March},
	PAGES       = {15--24},
	ADDRESS     = {Palo Alto},
	YEAR        = 1998}

@ARTICLE{KrPr98b,
	AUTHOR      = {Andreas Krall and Mark Probst},
	TITLE       = {Monitors and Exceptions: How to implement {Java} efficiently},
	JOURNAL     = {Concurrency: Practice and Experience},
	PUBLISHER   = {John Wiley \& Sons},
	VOLUME      = {10},
	NUMBER      = {11--13},
	PAGES       = {837--850},
	YEAR        = 1998}

@INPROCEEDINGS{Kr98a,
	AUTHOR      = {Andreas Krall},
	TITLE       = {{CACAO - Eine effiziente JavaVM Implementierung}},
	BOOKTITLE   = {JAVA und Eingebettete Systeme '98},
	EDITOR      = {W. Rosenstiel},
	ORGANIZATION= {GI/ITG},
	PAGES       = {to appear},
	ADDRESS     = {Karlsruhe},
	MONTH       = {September},
	YEAR        = 1998}

@INPROCEEDINGS{Kr98b,
	AUTHOR      = {Andreas Krall},
	TITLE       = {Efficient {JavaVM} Just-in-Time Compilation},
	BOOKTITLE   = {International Conference on Parallel Architectures and
	               Compilation Techniques},
	EDITOR      = {Jean-Luc Gaudiot},
	ORGANIZATION= {IFIP,ACM,IEEE},
	PUBLISHER   = {North-Holland},
	PAGES       = {205--212},
	ADDRESS     = {Paris},
	MONTH       = {October},
	YEAR        = 1998}

@INPROCEEDINGS{KrTom99,
	AUTHOR      = {Andreas Krall and Philipp Tomsich},
	TITLE       = {Garbage Collection for Large Memory {Java} Applications},
	BOOKTITLE   = {HPCN'99, Java in High Performance Computing},
	EDITOR      = {Geoffrey Fox and Vladimir Getov},
	SERIES      = {LNCS 1593},
	PUBLISHER   = {Springer},
	PAGES       = {895--905},
	ADDRESS     = {Amsterdam},
	MONTH       = {April},
	YEAR        = 1999}

@INPROCEEDINGS{EckKr99,
	AUTHOR      = {Erik Eckstein and Andreas Krall},
	TITLE       = {Minimizing Cost of Local Variables Access for {DSP}-Processors},
	BOOKTITLE   = {LCTES'99 Workshop on Languages, Compilers and Tools for Embedded Systems},
	EDITOR      = {Y. Annie Liu and Reinhard Wilhelm},
	ORGANIZATION= {ACM},
	SERIES      = {SIGPLAN},
	VOLUME      = {34(7)},
	PAGES       = {20--27},
	MONTH       = {July},
	ADDRESS     = {Atlanta},
	YEAR        = 1999}

@UNPUBLISHED{KrallSyslvain99talk,
	AUTHOR      = {Andreas Krall and Sylvain Lelait},
	TITLE       = {{Compilation Techniques for Multimedia Processors}},
	NOTE        = {Dagstuhl Seminar on Instruction Level Parallelism and
	               Parallelizing Compilation},
	MONTH       = {April},
	YEAR        = 1999}

@UNPUBLISHED{Krall99talk,
	AUTHOR      = {Andreas Krall},
	TITLE       = {{\"Ubersetzungstechniken f\"ur Multimedia-Prozessoren}},
	NOTE        = {Universit\"at Jena},
	MONTH       = {Juni},
	YEAR        = 1999}

@UNPUBLISHED{EcksteinKrall99talk,
	AUTHOR      = {Erik Eckstein and Andreas Krall},
	TITLE       = {{Optimal and Heuristic Solutions for Minimizing Costs of
                    Local Variables Access for DSPs}},
	NOTE        = {SCOPES'99},
	MONTH       = {September},
	YEAR        = 1999}

@ARTICLE{KrallLelait,
	AUTHOR      = {Andreas Krall and Sylvain Lelait},
	TITLE       = {Compilation Techniques for Multimedia Processors}},
	JOURNAL     = {International Journal of Parallel Programming},
	PUBLISHER   = {Kluwer},
	VOLUME      = {28(4)},
	PAGES       = {347--361},
	YEAR        = 2000}



@TechReport{DELM97,
  author = 	 {Dominique de Werra and Christine Eisenbeis and Sylvain Lelait and Bruno Marmol},
  title = 	 {On a graph-theoretical model for cyclic register allocation},
  institution =  {École Polytechnique Fédérale de Lausanne},
  year = 	 {1997},
  key =		 {DELM97},
  number =	 {ORWP 97/04},
  address = 	 {Lausanne, Switzerland},
  month =	 {May},
}

@TechReport{ELM98,
  author = 	 {Christine Eisenbeis and Sylvain Lelait and Bruno Marmol},
  title = 	 {Circular-arc graph coloring and unrolling},
  institution =  {INRIA},
  year = 	 {1998},
  key =		 {ELM98},
  type =	 {{Research Report}},
  number = 	 {3336},
  address = 	 {Rocquencourt},
  month =	 {January},
}

@TechReport{LGE98a,
  author = 	 {Sylvain Lelait and Guang R. Gao and Christine Eisenbeis},
  title = 	 {A new fast algorithm for optimal register allocation in modulo scheduled loops},
  institution =  {INRIA},
  year = 	 {1998},
  key =		 {LGE98},
  type =	 {{Research Report}},
  number = 	 {3337},
  address = 	 {Rocquencourt},
  month =	 {January},
}

@Article{LM97,
  author = 	 {Sylvain Lelait and Bruno Marmol},
  title = 	 {Allocation cyclique de registres et d\'eroulage de boucles},
  journal = 	 {Technique et Science Informatiques, Herm\`es},
  year = 	 {1997},
  key =		 {LM97},
  volume =	 {16},
  number =	 {5},
  month =	 {Mai},
  pages =	 {583-608},
}

@InProceedings{ELM97,
  author = 	 {Christine Eisenbeis and Sylvain Lelait and Bruno Marmol},
  title = 	 {Circular-arc graph coloring and unrolling},
  booktitle = {Proceedings of the $5^{th}$ Twente Workshop on Graphs
and Combinatorial Optimization},
  key =		 {ELM97},
  editor =	 {U. Faigle and C. Hoede},
  year =	 {1997},
  organization = {Universiteit Twente},
  address =	 {Twente, Netherlands},
  month =	 {May},
  pages =	 {71-74},
}

@InProceedings{ELM98a,
  key = 	 "ELM98a",
  author = 	 "Christine Eisenbeis and Sylvain Lelait and Bruno Marmol",
  title = 	 "Stockage en registres des variables indicées",
  booktitle = 	 "Actes des $4^{e}$ Journées Adéquation Algorithme
		  Architecture en traitement du signal et images",
  year = 	 "1998",
  pages = 	 "121-126",
  organization = "CEA/LETI",
  address = 	 "Saclay, France",
  month = 	 jan,
}

@InProceedings{LGE98,
  key = 	 "LGE98",
  author = 	 "Sylvain Lelait and Guang R. Gao and Christine Eisenbeis",
  title = 	 "{A New Fast Algorithm for Optimal Register Allocation
in Modulo Scheduled Loops}",
 booktitle =  "Proceedings of the $7^{th}$ International Conference on
  Compiler Construction, CC'98, held as part of ETAPS'98",
  year = 	 "1998",
  editor =	 "Kai Koskimies",
  volume =	 "1383",
  series =	 "Lecture Notes in Computer Science",
  pages =	 "204--218",
  publisher = "Springer",
  address =	 "Lisbon, Portugal",
  month = 	 "March 28 - April 4",
}

@InProceedings{EL98,
  key =		 {EL98},
  author =	 {Christine Eisenbeis and Sylvain Lelait},
  title =	 "{LoRA: a Package for Loop Optimal Register Allocation}",
  booktitle = {3rd International Workshop on Code Generation for Embedded
Processors, Witten, Germany},
  year =	 {1998},
  month =	 {March},
  OPTnote = 	 {},
  OPTannote = 	 {}
}

@InProceedings{BCELRSW98,
  author = 	 {F. Bodin and Z. Chamski and Ch. Eisenbeis and S. Lelait and E. Rohou and A. Sawaya and A. Seznec and J. Wang},
  title = 	 {{Towards a Retargetable Framework for Software Pipelining}},
  booktitle = 	 {Proceedings of 7th Intl. Workshop on Compilers for Parallel Computers, CPC'98},
  OPTcrossref =  {},
  key =		 {BCELRSW98},
  editor =	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  year =	 {1998},
  OPTorganization = {},
  OPTpublisher = {},
  address =	 {Linköping, Sweden},
  month =	 {June},
  pages =	 {91--99},
  OPTnote = 	 {},
  OPTannote = 	 {}
}



@UNPUBLISHED{Lelait97talk,
        AUTHOR      = {Sylvain Lelait},
        TITLE       = {Cyclic register allocation in loops by graph coloring and decomposition},
        ADDRESS     = {INRIA Rocquencourt},
        MONTH       = {April},
        YEAR        = {1997}
}

@InProceedings{ELM97talk,
  author = 	 {Christine Eisenbeis and Sylvain Lelait and Bruno Marmol},
  title = 	 {Circular-arc graph coloring and unrolling},
  booktitle = {Proceedings of the $5^{th}$ Twente Workshop on Graphs
and Combinatorial Optimization},
  key =		 {ELM97},
  editor =	 {U. Faigle and C. Hoede},
  year =	 {1997},
  organization = {Universiteit Twente},
  address =	 {Twente, Netherlands},
  month =	 {May},
  pages =	 {71-74},
}


@InProceedings{ELM98atalk,
  key = 	 "ELM98atalk",
  author = 	 "Christine Eisenbeis and Sylvain Lelait and Bruno Marmol",
  title = 	 "Stockage en registres des variables indicées",
  booktitle = 	 "Actes des $4^{e}$ Journées Adéquation Algorithme
		  Architecture en traitement du signal et images",
  year = 	 "1998",
  pages = 	 "121-126",
  organization = "CEA/LETI",
  address = 	 "Saclay, France",
  month = 	 jan,
}

@InProceedings{LGE98talk,
  key = 	 "LGE98talk",
  author = 	 "Sylvain Lelait and Guang R. Gao and Christine Eisenbeis",
  title = 	 "{A New Fast Algorithm for Optimal Register Allocation
in Modulo Scheduled Loops}",
 booktitle =  "Proceedings of the $7^{th}$ International Conference on
  Compiler Construction, CC'98, held as part of ETAPS'98",
  year = 	 "1998",
  editor =	 "Kai Koskimies",
  volume =	 "1383",
  series =	 "Lecture Notes in Computer Science",
  pages =	 "204--218",
  publisher = "Springer",
  address =	 "Lisbon, Portugal",
  month = 	 "March 28 - April 4",
}

@Article{DELM99, 
author = {Dominique de Werra and Christine Eisenbeis and Sylvain Lelait and Bruno Marmol}, 
title = {On a graph-theoretical model for cyclic register allocation}, 
journal = {Discrete Applied Mathematics}, 
year = {1999}, 
key = {DELM99}, 
volume = {93}, 
number = {2-3}, 
month = {July}, 
pages = {191-203} 
} 

@InProceedings{GL99,
  author =       {Daniela Genius and Sylvain Lelait},
  title =        {{Compiler-Directed Reordering of Data by Cyclic Graph Coloring}},
  booktitle =    {Proceedings of the 5th International Euro-Par Conference},
  key =          {GL99},
  editor =       {P. Amestoy and P. Berger and M. Dayd\'e and I. Duff and V. Frayss\'e and L. Giraud and D. Ruiz
},
  number =       {1685},
  series =       {Lecture Notes in Computer Science},
  year =         {1999},
  publisher = {Springer-Verlag},
  address =      {Toulouse, France},
  month =        {August/September},
  pages =        {1260--1264},
}


@Unpublished{DELS99,
  author = {D. de Werra, Ch. Eisenbeis, S. Lelait and E. Stöhr },
  title =  {Circular-arc Graph Coloring: on Chords and Circuits in the Meeting Graph },
  note =   {European Chapter on Combinatorial Optimization XII, Bendor},
  key =    {DELS99},
  month =  {May},
  year =   {1999},
  annote = {Conference without Proceedings}
}



@Unpublished{KL99,
  author = {Andreas Krall and Sylvain Lelait},
  title =  {Compilation Techniques for Multimedia Processors: An Example with the UltraSPARC and the Visual Instruction Set.},
  note =   {S\'eminaire A3, INRIA-Rocquencourt},
  key =    {KL99},
  month =  {June 14},
  year =   {1999},
  annote = {talk}
}

@Unpublished{Lelait99,
  author = {Sylvain Lelait},
  title =  {Instruction-Level Parallelism Exploitation in Modern Microprocessors},
  note =   {Talk by Siemens ZT SE 4, Munich},
  key =    {Lelait99},
  month =  {October 10},
  year =   {1999},
  annote = {talk}
}

@TechReport{GL99tr, 
author = {Daniela Genius and Sylvain Lelait}, 
title = {{Improving Data Layout through Coloring-directed Array merging}}, 
institution = {Fakultät für Informatik}, 
year = {1999}, 
key = {GL99}, 
type = {{Research Report}}, 
number = {iratr-1999-3}, 
address = {Universität Karlsruhe}, 
month = {January}, 
} 

@TechReport{EL99, 
author = {Christine Eisenbeis and Sylvain Lelait}, 
title = {LoRA: a Package for Loop Optimal Register Allocation}, 
institution = {INRIA}, 
year = {1999}, 
key = {EL99}, 
type = {{Research Report}}, 
number = {3709}, 
address = {Rocquencourt}, 
month = {June}, 
} 


@Unpublished{Gregg98ucd_talk,
  author =       {David Gregg},
  title =        {Compiling for EPIC: Finding the Parallelism in
                  Ordinary Sequential Code},
  note =         {Presented at University College Dublin},
  month =        {August},
  year =         1998}

@Unpublished{Gregg99dag_talk,
  author =       {David Gregg},
  title =        {Software Pipelining with Iteration Preselection},
  note =         {Dagstuhl Seminar on Instruction Level Parallelism and
                  Parallelizing Compilation},
  month =        {April},
  year =         1999}

@Unpublished{Gregg99inria_talk,
  author =       {David Gregg},
  title =        {Global Software Pipelining with Iteration Preselection},
  note =         {Presented at INRIA Rocquencourt Research Centre},
  month =        {October},
  year =         1999}



@unpublished{neu97-tuticlp,
        AUTHOR      = {Ulrich Neumerkel},
	title       = {Teaching Prolog and CLP},
        note        = {Tutorial given at ICLP'97},
        month       = jul,
        abstract = { 
   		 We discuss in this tutorial the problems that arise in introductory
   		 courses and propose several improvements. A major improvement concerns
   		 the way how Prolog programs can be read and understood. The
   		 traditional approach in teaching Prolog focuses first on two separate
   		 issues: the meaning of logic formulae and Prolog's execution
   		 mechanism. While both are intimately related to each other, this
   		 relation is rarely exploited. On the one hand reading logic formulae
   		 is not explained in depth but on the other hand an overly detailed
   		 account of Prolog's execution mechanism is given which is a burden to
   		 comprehending Prolog.  A practical way of reading Prolog programs is
   		 presented that focuses on the meaning of the programs and avoids
   		 execution traces and proof trees altogether. This reading scheme is
   		 then extended to cope with the more procedural aspects of Prolog like
   		 termination and resource consumption. Equally well the reading scheme
   		 is applied to constraints.  The reading scheme allows a programmer to
   		 reason in an efficient manner while still avoiding reference to
   		 superfluous details of the computation.}
}

@InProceedings{neu97-wlpe,
  author = 	{Ulrich Neumerkel and Christoph Rettig and Christian Schallhart},
  title =       {Visualizing Solutions with Viewers},
  booktitle =  {Proceedings of the 8th Workshop on Logic Programming Environments},
  year = 	 "1997",
  month = jul
}

@InProceedings{ertl98,
  author = 	 {M. Anton Ertl},
  title = 	 {\texttt{State}-smartness --- Why it is Evil and How
                  to Exorcise it},
  booktitle = 	 {EuroForth'98 Conference Proceedings},
  year =	 {1998},
  address =	 {Schlo\ss{} Dagstuhl},
  url = 	 {http://www.complang.tuwien.ac.at/papers/ertl98.ps.gz},
  abstract = 	 {\texttt{State}-smart words provide a number of unpleasant
                  surprises to their users. They are applied in two
                  contexts, and they fail in both: 1) for providing an
                  arbitrary combination of interpretation and
                  compilation semantics; 2) for optimizing with a
                  special implementation of the (default) compilation
                  semantics. This paper discusses these issues and
                  shows programmers and system implementors how to
                  avoid \texttt{state}-smart words. It also reports our
                  experiences in converting the \texttt{state}-smart
                  words in Gforth into a clean solution: little work
                  and few problems.}
}

@InProceedings{ertl99,
  author =	 {M. Anton Ertl},
  title =	 {Optimal Code Selection in {DAG}s},
  booktitle =	 {Principles of Programming Languages (POPL '99)},
  year =	 {1999},
  OPTpages =	 {242--249},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl99.ps.gz},
  abstract =	 {We extend the tree parsing approach to code
                  selection to DAGs. In general, our extension does
                  not produce the optimal code selection for all DAGs
                  (this problem would be NP-complete), but for certain
                  code selection grammars, it does. We present a
                  method for checking whether a code selection grammar
                  belongs to this set of DAG-optimal grammars, and use
                  this method to check code selection grammars adapted
                  from lcc: the grammars for the MIPS and SPARC
                  architectures are DAG-optimal, and the code
                  selection grammar for the 386 architecture is almost
                  DAG-optimal.}
}

@Unpublished{ertl+99,
  author =	 {M. Anton Ertl and David Gregg and Andreas Krall},
  title =	 {Optimal Global Instruction Scheduling with Unlimited
                  Resources},
  note =	 {http://www.complang.tuwien.ac.at/papers/ertl+99.ps.gz},
  url =		 {http://www.complang.tuwien.ac.at/papers/ertl+99.ps.gz},
  year =	 {1999},
  abstract =	 {We present a method for optimal whole-procedure
                  instruction scheduling for machines with unlimited
                  resources: The program and its dependences are
                  transformed into a linear programming problem, which
                  can then be solved using an off-the-shelf linear
                  problem solver. This scheduler is an intermediate
                  step towards a more realistic global instruction
                  scheduler, but it has also an immediate use: We use
                  it to evaluate the significance of the restrictions
                  imposed by static scheduling and for determining an
                  upper bound for the performance of more realistic
                  global instruction schedulers. We have applied the
                  scheduler to several benchmarks and compared it to a
                  dynamic scheduler with unlimited resources. For some
                  benchmarks, they perform equally well; for others,
                  dynamic scheduling performs much better; on closer
                  inspection it appears that the causes for this
                  performance difference can be reduced by performing
                  well-known transformations before scheduling (in
                  particular, loop transformations).}
}


@MastersThesis{czezatke98,
  author =      {Christian Czezatke},
  title =       {dtfs---A Log-Structured Filesystem for Linux},
  school =      {{Technische  Universit\"{a}t Wien}},
  type =        {Diplomarbeit},
  year =        {1998},
  address =     {Austria},
  url =		{http://www.complang.tuwien.ac.at/Diplomarbeiten/czezatke98.ps.gz},
  abstract =	{This thesis discusses the design and implementation of
                  dtfs, a log-structured filesystem for Linux. dtfs
                  features a generic core providing logging facilities that
                  are filesystem-independent and a ``filesystem
                  personality'' that borrows heavily from the Linux ext2
                  filesystem. Furthermore, the dtfs design supports the
                  placement of multiple filesystems (even of different
                  filesystem personalities) on top of one dtfs filesystem
                  device and the creation of snapshots and different
                  versions for these filesystems.\par I have also made a
                  first implementation of dtfs using the Linux 2.0.33
                  kernel and investigated the performance effects caused by
                  a log-structured filesystem. The results show that this
                  implementation of dtfs is already approximately on par
                  with the 2.0.33 ext2 filesystem performance-wise. This
                  also illustrates that traditional approaches have been
                  closing the performance gap during the last years,
                  especially when dealing with write and metadata update
                  operations. However, other qualtitative improvements
                  offered by the dtfs design, such as fast crash recovery
                  or the ability to create consistent backups without
                  restricting user access to the filesystem, cannot be
                  added to traditional approaches easily.}
}
 
@InProceedings{PuntEuroPar98,
  author = 	 "Franz Puntigam",
  title = 	 "Dynamic Type Information in Process Types",
  booktitle = 	 "Proceedings EuroPar '98",
  editor =	 "David Pritchard and Jeff Reeve",
  number =	 1470,
  series =	 lncs,
  year =	 1998,
  address =	 "Southampton, England",
  month =	 sep,
  publisher =	 springer,
url=       "http://www.complang.tuwien.ac.at/franz/papers/europar98.ps.gz"
}

@InProceedings{PeterPuntMiddleware98,
  author = 	 "Christof Peter and Franz Puntigam",
  title = 	 "Coordination of {CORBA} Objects with Process Types",
  booktitle = 	 "Middleware '98, Work-in-progress papers",
  year =	 1998,
  address =	 "The Lake District, England",
  month =	 sep,
url=       "http://www.complang.tuwien.ac.at/franz/papers/hicss94.ps.gz"
}

@InBook{PuntOOPMOC98,
  author = 	 "Franz Puntigam",
  title = 	 "Object-Oriented Programming and Models of Concurrency",
  chapter = 	 "Flexible Types for a Concurrent Model",
  publisher = 	 "Springer-Verlag",
  year = 	 1998,
  editor =	 "Gul Agha and Fiorella De Cindio",
  series =	 "Advances in Petri Nets",
url=       "http://www.complang.tuwien.ac.at/franz/papers/middleware98.ps.gz"
}

@Article{kuehn98vsm,
  author =       {eva K\"{u}hn},
  title =        {How to Approach the Virtual Shared Memory Paradigm},
  journal =      {Journal of Parallel and Distributed Computing Practices,
                  Special Issue on Virtual Shared Memory for Distributed Architectures,
                  NOVA Science Books, Commack, New York},
  year =         {1998},
  volume =       {1},
  number =       {3},
  pages =        {},
  url =          {}
}

@InProceedings{wernhartkuehn98,
  author =       {Heidemarie Wernhart and eva K\"{u}hn},
  title =        "The Replicator Coordination Design Pattern",
  booktitle =    "DAPSYS'98 Distributed and Parallel Systems - (Education, Programming Tools)",
  pages =        "",
  year =         1998,
  date =         "September 28-30",
  address =      "Budapest, Hungary",
}

@InProceedings{ertl99ef,
  author = 	 {M. Anton Ertl},
  title = 	 {Is {Forth} Code Compact? {A} Case Study},
  booktitle = 	 {EuroForth'99 Conference Proceedings},
  year =	 {1999},
  address =	 {St. Petersburg, Russia},
  url = 	 {http://www.complang.tuwien.ac.at/papers/ertl99ef.ps.gz},
  abstract = 	 {Forth advocates often claim that Forth code is
                  smaller, faster, and requires less development time
                  than equivalent programs in other languages.  This
                  paper investigates this claim by comparing a number
                  of parser generators written in various languages
                  with respect to source code size.  The smallest
                  parser generator (14 lines) in this comparison is
                  written in Forth, and the other Forth program is
                  smaller than the others in its class by a factor of
                  8 or more; however, the Forth programs do not have
                  all the features of their counterparts.  I took a
                  closer look at Gray (in Forth) and Coco/R (in
                  Modula-2) and found that several Forth features
                  missing from Modula-2 give Gray more than a factor
                  of three advantage over Coco/R (even if the other
                  size differences were solely due to differences in
                  functionality): run-time code generation; access to
                  the parser and a simple, flexible syntax; and
                  Forth's dictionary.}
}

@InProceedings{PuntigamEuroPar99,
  author = 	 {Franz Puntigam},
  title = 	 {Non-Regular Process Types},
  booktitle = 	 {Proceedings of the 5th European Conference on Parallel Processing (Euro-Par'99)},
  year =	 1999,
  editor =	 {P. Amestoy and others},
  number =	 1685,
  series =	 {LNCS},
  address =	 {Toulouse, France},
  month =	 sep,
  publisher =	 {Springer-Verlag},
url=       "http://www.complang.tuwien.ac.at/franz/papers/europar99.ps.gz"
}

@InProceedings{PePuSOAP99,
  author = 	 {Christof Peter and Franz Puntigam},
  title = 	 {A Concurrent Object Calculus with Types that Express Sequences},
  booktitle = 	 {Proceedings of the Workshop on Semantics of Objects as Processes (SOAP'99)},
  year =	 1999,
  address =	 {Lisbon, Portugal},
  month =	 jun,
url=       "http://www.complang.tuwien.ac.at/franz/papers/soap99.ps.gz"
}

@InProceedings{PuPeSAC99,
  author = 	 {Franz Puntigam and Christof Peter},
  title = 	 {Changeable Interfaces and Promised Messages for Concurrent Components},
  booktitle = 	 {Proceedings of the ACM Symposium on Applied Computing (SAC'99)},
  year =	 1999,
  address =	 {San Antonio, Texas, {USA}},
  month =	 feb,
url=       "http://www.complang.tuwien.ac.at/franz/papers/sac99.ps.gz"
}

@Unpublished{ertl99boulder,
  author = 	 {M. Anton Ertl},
  title = 	 {A fast compiler for a stack-based language},
  note = 	 {Talk given at the University of Colorado in Boulder},
  year =	 {1999},
  month =	 apr,
  abstract =	 {I will present RAFTS, a Forth compiler, focussing on
                  aspects that are also useful in compilers for other
                  langauges.\parSpecific needs addressed by RAFTS are:
                  The source language relies on programmer-visible
                  stacks (like JavaVM and Postscript), it requires
                  very fast compilation (like e.g., Lisp, Prolog,
                  Smalltalk, and JavaVM) to remain interactive, and
                  the source code usually has a very high procedure
                  call frequency (like, e.g., Lisp, Prolog, ML).\par
                  My presentation will give a tour of the compiler,
                  pointing out some of the techniques that address
                  these needs, e.g.: How convert the stack-based code
                  into data flow graphs, enabling the application of
                  standard code generation techniques; how backwards
                  scheduling simplifies the data structure and saves
                  two passes; how we minimized the number of passes
                  without sacrificing code quality; how a
                  threaded-code intermediate representation speeds up
                  code replication optimizations.}
}


@UNPUBLISHED{Neu1996-intas,
 author    = {Ulrich Neumerkel},
 title     = {Guiding heuristics for transformation based interprocedural register allocation in Continuation Prolog},
 note      = {INTAS Workshop on Efficient Symbolic Computing},
 address   = {St. Petersburg},
 year      = 1996}

@UNPUBLISHED{Neu1998-1,
 author    = {Ulrich Neumerkel},
 title     = {Localizing and explaining reasons for nonterminating logic programs with failure slices},
 note      = {INTAS Workshop on Efficient Symbolic Computing},
 address   = {Kiew},
 year      = 1998}

@InProceedings{Neu1998sli,
 author    = "Ulrich Neumerkel",
 title     = {Slicing nichtterminierender Programme},
 booktitle = "13. Workshop Logische Programmierung",
 address   = "Wien",
 year      = 1998}

@article{MesNeu1999,
 author    = {Fred Mesnard and Ulrich Neumerkel},
 title     = {CHR for prototyping abstract interpretatons},
 journal   = {Applied Artifical Intelligence},
 publisher = {Tailor \& Francis},
 note      = {System description, accepted},
 year      = 1999}

@InProceedings{ReiNeuRi1999,
 author    = "Philipp Reisner and Ulrich Neumerkel and Philipp Richter",
 title     = {MERGEMEM},
 booktitle = "6th International Linux-Congress, GUUG",
 address   = "Augsburg",
 year      = 1999}

@InProceedings{NeuMes1999,
 author    = "Ulrich Neumerkel and Fred Mesnard",
 title     = "Localizing and explaining reasons for nonterminating logic programs with failure slices",
 booktitle = "Principles and Practice of Declarative Programming",
 editor    = "Gopalan Nadathur",
 number    = 1702,
 series    = {LNCS},
 publisher = "Springer-Verlag",
 address   = "Paris",
 month     = sep,
 year      = 1999}

@InProceedings{MesNeu2001-4,
 author    = "Fred Mesnard and Ulrich Neumerkel",
 title     = "Applying static analysis techniques for inferring termination conditions of logic programs",
 booktitle = "8th Static Analysis Symposium (SAS'01)",
 editor    = "Patrick Cousot",
 series    = {LNCS},
 number    = 2126,
 publisher = "Springer-Verlag",
 address   = "Paris",
 month     = jul,
 year      = 2001}


@inproceedings{MesNeuPay2001-3,
author= {Fred Mesnard and Ulrich Neumerkel and Etienne Payet},
title= {{cTI}~: un outil pour l'inf{\'e}rence de conditions optimales de terminaison pour {Prolog}},
booktitle= {Dixi\`{e}mes Journ\'{e}es Francophones de Programmation Logique et Programmation par Contraintes (JFPLC'01)},
address = "Paris",
publisher = "Herm\`{e}s Science Publications",
editor = "Philippe Codognet",
month = apr,
year = 2001}

@UNPUBLISHED{Neu2001-1,
 author    = {Ulrich Neumerkel},
 title     = {Localisation des erreurs par "slicing" dans les programmes logiques avec contraintes},
 note      = {System Demonstration, {Dixi\`{e}mes Journ\'{e}es Francophones de Programmation Logique et Programmation par Contraintes (JFPLC'01)}},
 address   = "Paris",
 abstract  = {
  Nous présentons des outils pour expliquer les phénomènes inattendus
  des programmes logiques avec contraintes (PLC). Lesdits outils
  s'appuient sur des lectures sélectives adaptées aux besoins
  éducatifs. Les techniques développées localisent des erreurs en
  déterminant un fragment exécutable du programme, appelé {\em slice} ou
  tranche. Afin d'enlever l'erreur dans le programme original, il est
  indispensable de modifier le fragment proposé par le système. Par
  contre, si on ne modifie que des parties hors du fragment, l'erreur
  persistera. Nos outils ont été intégrés dans un environnement de
  programmation spécialisé pour l'enseignement de la PLC, facilitant les
  lectures sélectives pour l'échec inattendu, les solutions inattendues
  et la non terminaison.
  },
 url       = {http://www.complang.tuwien.ac.at/ulrich/gupu/material/2001-JFPLC/},
 month     = apr,
 year      = 2001}

@UNPUBLISHED{Neu2001-2,
 author    = {Ulrich Neumerkel},
 title     = {GUPU --- un environnement pour l'enseignement de Prolog et de la PLC},
 note      = {Poster, {Dixi\`{e}mes Journ\'{e}es Francophones de Programmation Logique et Programmation par Contraintes (JFPLC'01)}},
 address   = "Paris",
 url       = {http://www.complang.tuwien.ac.at/ulrich/gupu/material/2001-JFPLC/},
 month     = apr,
 abstract  = {
  GUPU est un environnement de programmation spécialisé pour
  l'enseignement de Prolog et de la programmation logique avec
  contraintes.  Notre approche repose sur une technique de lecture
  sélective textuelle à plusieurs niveaux.  Par rapport aux approches
  courantes, les aspects déclaratifs, comme également quelques
  propriétés opérationnelles essentielles ---~en particulier, la terminaison
  ainsi que la non-terminaison~--- sont expliqués tout en évitant les
  notions d'arbres de preuve et les traces.  L'environnement GUPU a les
  caractéristiques suivantes :
  
  + langage Prolog pur \& extensions de la PLC, CLP(FD) de SICStus.
  + intégration de toutes activités de programmation dans une seulle
  commande (sauvegarde, validation syntaxique, compilation, chargement,
  tests d'assertions, pré-notation) permettant un retour immédiat par
  rapport à l'état du système.
  + pratique d'écrire les tests (assertions) puis coder ---
  récemment vulgarisée par le mouvement extreme programming ---
  mettant l'accent sur la spécification du problème.
  + pré-notation automatisée et instantanée.
  + validation de la syntaxe (indentation, orthographe etc.) pour
  renforcer des standards de codage et remédier aux difficultés de la
  syntaxe édimbourgeoise fragile.
  + inférence de conditions de terminaison (cTI).
  + vérification de la non-terminaison (NTI).
  + explication des phénomènes inattendus pour supporter des
  lectures sélectives.
  + affichage des graphismes sans effet de bords et animation du
  processus d'instanciation.
  + messagerie intégrée dans le texte du programme.
  + dialogues trilingues (allemand/français/anglais).
             },
 year      = 2001}


@InProceedings{KraNeuMes2000-1,
 author    = "Stefan Kral and Ulrich Neumerkel and Fred Mesnard",
 title     = {Slicing zur Fehlersuche in (Constraint-) Logikprogrammen},
 note      = {Abstract zur System-Demonstration},
 booktitle = "14. Workshop Logische Programmierung",
 address   = {W\"urzburg},
 series    = "{GMD}-Report",
 number    = 90,
 pages     = {241--243},
 month     = jan,
 year      = 2000}

@UNPUBLISHED{Neu2000-2,
 author    = {Ulrich Neumerkel},
 title     = {Localisation des erreurs par "slicing" dans les programmes logiques avec contraintes},
 note      = {S\'eminaire de l'{IREMIA}},
 address   = {Universit\'e de la R\'eunion},
 url       = {http://www.univ-reunion.fr/~liremia/seminaire/encours.html},
 month     = feb,
 year      = 2000}


@InProceedings{HoaMesNeu2000-3,
 author    = "S\'ebastien Hoarau and Fred Mesnard and Ulrich Neumerkel",
 title     = {Implementing {cTI}: a constraint-based left-termination inference tool for {LP}},
 booktitle = {Workshop on Parallelism and Implementation Technology for (Constraint) Logic Programming},
 address   = {London},
 month     = jul,
 year      = 2000}

@UNPUBLISHED{Neu2000-4,
 author    = {Ulrich Neumerkel},
 title     = {{GUPU} - {Eine Prolog-Lernumgebung.  Deklaratives Programmieren mit Prolog und Constraints}},
 note      = {Vortragsreihe des Instituts Wirtschaftsinformatik der Produktionsunternehmen},
 address   = {Universit\"at Essen},
 url       = {http://wip.wi-inf.uni-essen.de/sys/layout/frames/speech_f.html},
 month     = jul,
 year      = 2000}

@InProceedings{BurHoaMesNeu2000-5,
 author    = "Serge Burckel and S\'ebastien Hoarau and Fred Mesnard and Ulrich Neumerkel",
 title     = {{cTI}: Bottom-Up Termination Inference for Logic Programs},
 booktitle = {15th Workshop on Logic Programming and Constraint Systems},
 address   = {Berlin},
 month     = aug,
 year      = 2000}

@inproceedings{MesPayNeu2002-1,
 author    = {Fred Mesnard and Etienne Payet and Ulrich Neumerkel},
 title     = {Non-termination inference for optimal termination conditions of logic programs},
 booktitle = {Onzi\`{e}mes Journ\'{e}es Francophones de Programmation Logique et Programmation par Contraintes (JFPLC'02)},
 address   = "Paris",
 publisher = "Herm\`{e}s Science Publications",
 year      = 2002}

@InProceedings{NeuKra2002-2,
 author    = {Ulrich Neumerkel and Stefan Kral},
 title     = {Declarative program development in Prolog with GUPU},
 booktitle = {Proceedings of the 12th Workshop on Logic Programming Environments},
 year      = 2002,
 month     = jul,
 address   = "Copenhagen"}

@InProceedings{MesPayNeu2002-3,
 author    = "Fred Mesnard and {\'E}tienne Payet Ulrich Neumerkel",
 title     = "Dectecting Optimal Termination Conditions of Logic Programs",
 booktitle = "9th Static Analysis Symposium (SAS'02)",
 series    = {LNCS},
 number    = 2477,
 publisher = "Springer-Verlag",
 address   = "Madrid",
 year      = 2002}

@PhdThesis{Peter00,
  author = 	 "Christof Peter",
  title = 	 "Expressiveness and Applicability of Process Types",
  school = 	 "{Technische Universit\"at Wien}",
  year = 	 2000,
  url = 	 {http://www.complang.tuwien.ac.at/Dissertationen/peter00.ps.gz},
  address =	 "Vienna, Austria",
  month =	 feb
}

@MastersThesis{Bermann99,
  author = 	 "Inge Bermann",
  title = 	 "{Vergleich von Vererbungsmechanismen anhand von Design Patterns in C++, Eiffel und Smalltalk}",
  school = 	 "{Technische Universit\"at Wien}",
  year = 	 1999,
  type =	 {Diplomarbeit},
  url = 	 {http://www.complang.tuwien.ac.at/Diplomarbeiten/bermann99.ps.gz},
  address =	 "Vienna, Austria",
  month =	 dec,
  note =	 "in German"
}

@MastersThesis{Kurtev00,
  author = 	 "Stoyan Kurtev",
  title = 	 "Subtyping and Inheritance in Object-Oriented Programming",
  school = 	 "{Technische Universit\"at Wien}",
  year = 	 2000,
  type =	 {Diplomarbeit},
  url = 	 {http://www.complang.tuwien.ac.at/Diplomarbeiten/kurtev00.ps.gz},
  address =	 "Vienna, Austria",
  month =	 feb
}

@MastersThesis{Peck00,
  author = 	 "Patrick Peck",
  title = 	 "{Rigorose objektorientierte Analyse mit LOTOS: Spezifikation eines nebenl\"aufigen, objektorientierten Datenflu{\ss}systems f\"ur die digitale Signalverarbeitung}",
  school = 	 "{Technische Universit\"at Wien}",
  year = 	 2000,
  type =	 {Diplomarbeit},
  url = 	 {http://www.complang.tuwien.ac.at/Diplomarbeiten/peck00.ps.gz},
  address =	 "Vienna, Austria",
  month =	 feb,
  note =	 "in German"
}

@MastersThesis{reisner00,
  author =      {Philipp Reisner},
  title =       {{DRBD -- Festplattenspiegelung \"ubers Netzwerk f\"ur die Realisierung hochverf\"ugbarer Server unter Linux}},
  school =      {{Technische  Universit\"{a}t Wien}},
  type =        {Diplomarbeit},
  year =        {2000},
  address =     {Austria},
  url =         {http://www.complang.tuwien.ac.at/Diplomarbeiten/reisner00.ps.gz},
  sourceurl =	{http://www.complang.tuwien.ac.at/reisner/drbd/},
  abstract =	{This work shows that it is possible to implement
                  high-availability clusters without expensive shared
                  devices. A description of the design and the
                  implementation of a device driver for Linux is
                  provided, which allows harddisk mirroring via the
                  network. In order to be able to offer good
                  performance and support for journaling filesystems
                  an algorithm was developed which gives the disk
                  scheduler maximum freedom during the write process
                  to reorder blocks without compromising the order
                  imposed by the filesystem.\par The device reaches
                  between 50~\% and 98~\% of the maximum theoretical
                  performance. Apart from that, this work gives an
                  overview of how DRBD is integrated into the other
                  clustering components under Linux.},
  kurzfassung =	{Diese Arbeit zeigt, daß Hochverfügbarkeits-Cluster
                  auch ohne teure Shared Devices implementiert werden
                  können. Es werden das Design und die Implementierung
                  eines Gerätetreibers (=DRBD) für Linux gezeigt, der
                  das Spiegeln von Festplatten über das Netzwerk
                  erlaubt. Um sowohl gute Leistung als auch
                  Unterstützung für Journaling-Filesysteme bieten zu
                  können, wurde ein Algorithmus entwickelt, der dem
                  Disk-Scheduler beim Schreiben die größtmögliche
                  Freiheit einräumt, Blöcke umzuordnen, dabei aber die
                  Reihenfolge, die das Filesystem vorgibt, nicht
                  verletzt.\par Das Gerät erreicht zwischen 50~\% und
                  98~\% der theoretisch möglichen Leistung. Weiters
                  gebe ich einen Überblick darüber, wie sich DRBD in
                  die anderen Clustering-Komponenten unter Linux
                  eingliedert.},
  note =	{in German}
}

@MastersThesis{syrowatka00,
  author =      {Peter Syrowatka},
  title =       {{Verallgemeinerung von mmap() am Beispiel des Memory
                  Mappings von Pipes in Linux}},
  school =      {{Technische  Universit\"{a}t Wien}},
  type =        {Diplomarbeit},
  year =        {2000},
  address =     {Austria},
  url =         {http://www.complang.tuwien.ac.at/Diplomarbeiten/syrowatka00.ps.gz},
  sourceurl =	{http://www.complang.tuwien.ac.at/syro/thesis.html},
  abstract =	{Memory mapping (API function \texttt{mmap()}) is a
                  alternative way of accessing files. Compared to the
                  IO functions \texttt{read()} and \texttt{write()} it
                  avoids unnecessary copy operations. The method is
                  not widely used, though, because its standard
                  implementations cannot be applied to stream objects
                  like pipes, sockets or (pseudo-)terminal
                  devices. This consistent applicability of the
                  standard IO functions to all kinds of objects makes
                  them more attractive.\par  To eliminate this
                  disadvantage I present the model of an extended
                  \texttt{mmap()} call, that allows for mapping
                  arbitrary stream objects into a program's address
                  space. A program using this extended call can be
                  written adhering to only one paradigm of interacting
                  with external objects, the one of \texttt{mmap()},
                  and still avoid unnecessary copying.\par  The
                  properties of the memory object that is created by
                  mapping a stream object, are modeled after the
                  behavior of a mapped file, though they still keep
                  some properties of the original object, e.g.\  the
                  size of a pipe or a socket is by definition not
                  known. Although \texttt{mmap()} cannot change this,
                  the large virtual address spaces of current
                  architectures make the model very useful.\par  Using
                  the example of memory mapped pipes I specify the
                  model of extended \texttt{mmap()} in detail,
                  benchmark it and demonstrate a sample implementation
                  for Linux.\par  Extended memory mapping, especially
                  the shift from page granularity to byte granularity,
                  requires some refinements in virtual memory
                  management.\par  I have adapted some simple commands
                  (\texttt{cat}, \texttt{wc} and \texttt{grep}) and
                  compared their performance to the unmodified
                  versions implemented on basis of standard IO. As
                  expected, the versions using \texttt{mmap()} of
                  ordinary files performed better. The difference
                  being mostly in system time used. For the standard
                  IO implementations the measurements required more
                  than twice as much time for \texttt{mmap()}.\par  A
                  second experiment measured the run-times of programs
                  communicating via a pipe. Access methods were either
                  \texttt{read()/write()} or \texttt{mmap()}. Here we
                  see that the result was not as expected. The slight
                  increase of by factor of 1.1 for \texttt{mmap()} can
                  be attributed to the sample implementation not being
                  optimized.},
  kurzfassung = {Memory Mapping (API-Funktion {\tt mmap()}) bietet
                  eine M"oglichkeit auf Dateien zuzugreifen. Dabei
                  entfallen Kopieroperationen, die bei Verwendung der
                  normalen I/O-Funktionen {\tt read()} {\tt write()}
                  intern durchgef"uhrt werden. Diese Methode wird von
                  den Anwendungsprogrammierern aber nicht sehr oft
                  verwendet, da sie nicht auf Stream-Objekte wie
                  Pipes, Sockets oder Terminals angewendet werden
                  kann. Die Standard-I/O Methoden stellen hier einen
                  Weg zur konsistenten Handhabung der
                  unterschiedlichen Objekte zur Verf"ugung. \par Um
                  diesen Nachteil zu eliminieren, stelle ich ein
                  Modell f"ur einen erweiterten {\tt mmap()}-\-Aufruf
                  vor. Mit dieser Erweiterung ist es m"oglich, auch
                  Stream-Objekte in den Adressraum
                  einzublenden. Dadurch kann auch bei Verwendung der
                  alternativen, mmap()-bedingten Programmiermethode
                  eine konsistente Struktur der Programme realisiert
                  und durch die Vermeidung der Kopieroperationen eine
                  Effizienzsteigerung erreicht werden.\par Die
                  Eigenschaften des Speicherobjektes, welches durch
                  das Einblenden eines Stream-Objektes entsteht,
                  orientieren sich am Verhalten einer eingeblendeten
                  Datei, beh"alt aber einige Eigenheiten des
                  Ursprungsobjektes. So ist zum Beispiel die
                  Gr"o{\ss}e einer Pipe oder eines Sockets per
                  Definition unbestimmt. Diese Eigenschaft bleibt auf
                  jeden Fall auch beim Einblenden erhalten. Abhilfe
                  bietet die Nutzung der gro{\ss}en virtuellen
                  Adressr"aume, die auf aktuellen Architekturen zur
                  Verf"ugung stehen.\par Am Beispiel des Memory
                  Mappings f"ur Pipes spezifiziere ich das Modell des
                  erweiterten {\tt mmap()} im Detail und unterziehe es
                  einer Pr"ufung und demonstriere es anhand einer
                  beispielhaften Implementierung f"ur Linux.\par Das
                  erweiterte Memory Mapping stellt einige
                  Anforderungen an das Virtual Memory Management, vor
                  allem den "Ubergang der Verwaltung von {\em
                  Page-Granularit"at} auf {\em
                  Byte-Granularit"at}.\par   Als Demonstrations- und
                  Benchmark-Programme habe ich einfache Anwendungen
                  ({\tt cat, wc} und {\tt grep}) entsprechend
                  angepasst und zwei Messreihen durchgef"uhrt. Es
                  erfolgte ein Vergleich zwischen den Applikationen
                  auf Standard-I/O- und {\tt mmap()}-Basis. Dabei
                  schneiden die Programme mit Memory Mapping
                  erwartungsgem"a{\ss} besser ab (Basissysteme:
                  FreeBSD, SCO, Sun Solaris und Linux). Der Gewinn
                  betrifft vor allem die Systemzeit. Die erhaltenen
                  Messwerte f"ur die Standard-I/O-Methode sind
                  mindestens doppelt so hoch wie bei Verwendung von
                  {\tt mmap()}.\par  In der zweiten Messreihe wurden
                  die Laufzeiten von Programmen, die "uber eine Pipe
                  untereinander Daten austauschen, ermittelt. Der
                  Zugriff erfolgt mit {\tt read()/write()} oder mit
                  {\tt mmap()}. Das Ergebnis dieser Messungen
                  entspricht leider nicht ganz den Erwartungen.
                  Aufgrund der nicht optimierten, beispielhaften
                  Implementierung kommt es zu einer leichten Erh"ohung
                  der Laufzeit (Gesamt-Zeit steigt ca. um Faktor 1,1
                  im {\tt mmap()}-Fall).},
  note =	{in German}
}

@Unpublished{ertl00dagstuhl,
  author = 	 {M. Anton Ertl},
  title = 	 {Optimization During Tree-Parsing Code Selection},
  note = 	 {Talk given at the Dagstuhl Seminar on Code Optimization},
  year =	 {2000},
  month =	 sep,
  url =		 {http://www.complang.tuwien.ac.at/papers/ertl00dagstuhl.ps.gz},
  abstract =	 {Tree parsing is well-known as a method for code
                  selection.  This talk presents a technique for also
                  using it for some optimizations.  The basic
                  principle is to introduce additional nonterminals
                  that correspond to additional data representations.
                  For example, a nonterminal representing complemented
                  values can be used to distribute complement
                  operations to the optimal position (for a machine)
                  using DeMorgan's laws.  Other examples are other
                  unary operators, constant folding across intervening
                  non-constants, and optimizing the conversion between
                  different representations, such as various flag
                  representations, tagged and untagged
                  representations, or various data sizes.  The
                  advantages of this technique are that it optimizes
                  for the machine, not some intermediate code metric
                  and that it has no compile-time overhead when used
                  with tree parsing automata.  The limitations are
                  that there are only a finite number of nonterminals
                  and thus data representations, that optimality is
                  limited to trees, that otherwise it is limited to
                  single-entry regions, and that it results in large
                  grammars.  This talk also mentions further work in
                  factoring the grammar and in dealing with DAGs.}
}

@InProceedings{ertl00,
  author =	 {M. Anton Ertl},
  title =	 {\texttt{CONST-DOES>}},
  booktitle =	 {EuroForth 2000 Conference Proceedings},
  year =	 2000,
  address =	 {Prestbury, UK},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl00.ps.gz},
  abstract =	 {A frequent use of the \code{create}...\code{does>}
                  construct is to provide some constants at definition
                  time that are then used at execution time (a
                  \code{constant}-style use). However,
                  \code{create}...\code{does>} also supports a
                  \code{value}-style use, where the data is not
                  constant and can change at any time. This additional
                  functionality inhibits optimization. This paper
                  proposes \code{const-does>}, which can only be used
                  to define \code{constant}-style words, and thus
                  makes optimization possible.}
}

@InProceedings{czezatke&ertl00,
  author =	 {Christian Czezatke and M. Anton Ertl},
  title =	 {{LinLogFS} --- A Log-Structured Filesystem For {Linux}},
  booktitle =	 {Freenix Track of Usenix Annual Technical Conference},
  pages =	 {77--88},
  year =	 {2000},
  url =		 {http://www.complang.tuwien.ac.at/papers/czezatke%26ertl00/},
  ps-url =	 {http://www.complang.tuwien.ac.at/papers/czezatke%26ertl00.ps.gz},
  abstract =	 {LinLogFS is a log-structured filesystem for
                  Linux. It currently offers features like fast crash
                  recovery and in-order write semantics. We
                  implemented LinLogFS by putting a logging layer
                  between an adapted version of the ext2 file system
                  and the block device.}
}

% Articles

@Article{FS:00,
  author =       {Thomas Fahringer and Bernhard Scholz},
  title =        {{A Unified Symbolic Evaluation Framework for Parallelizing Compilers}},
  journal =      {IEEE Transactions on Parallel and Distributed Systems},
  volume =       "11",
  number =       "11",
  month =        "November",
  year =         "2000"
}

@Article{BFS:00,
  author =    {Johann Blieberger and Thomas Fahringer and Bernhard Scholz},
  title =     {{Symbolic Cache Analysis for Real-Time Systems}},
  journal =   {Real-Time Systems Journal},
  publisher = {Kluwer Academic Publishers},
  address =   {Boston, USA},
  volume =    {18},
  number =    {2/3},
  pages =     {181-215},
  month =     {May},
  year =      {2000}
}

@Article{MS:00a,
  AUTHOR      = { E.~Mehofer and B.~Scholz},
  TITLE       = { {Probabilistic data flow system with two-edge profiling.
                   {\em Workshop on Dynamic and Adaptive Compilation and
                   Optimization (Dynamo'00)} } },
  JOURNAL     = { ACM SIGPLAN Notices },
  VOLUME      = { 35 },
  NUMBER      = { 7 },
  PAGES       = { 65 - 72 },
  YEAR        = { 2000 },
  MONTH       = { July },
}

% Proceedings

@InProceedings{SBF:00,
  author =       {B.~Scholz and J.~Blieberger and T.~Fahringer},
  title =        {{Symbolic Pointer Analysis for Detecting Memory Leaks}},
  pages =        {104--113},
  booktitle =    {Proc. of the Workshop on Partial Evaluation and
                 Semantics-Based Program Manipulation},
  month =        {January},
  publisher =    {ACM Press},
  address =      {Boston, MA, USA},
  year =         {2000},
}

@InProceedings{FSS:00,
  author =    {T.~Fahringer and B.~Scholz and X.~Sun}, 
  title =     {{Execution-Driven Performance Analysis for Distributed and Parallel Systems.}} ,
  booktitle = {2nd International ACM Sigmetrics Workshop on Software and Performance (WOSP 2000)},
  address =   {Ottawa, Canada},
  month =     {September},
  year =      {2000}
} 

@InProceedings{MS:00b,
  AUTHOR      = { E.~Mehofer and B.~Scholz},
  TITLE       = { {Probabilistic procedure cloning for high-performance
                   systems} },
  BOOKTITLE   = { 12th Symposium on Computer Architecture and High Performance
                  Computing (SBAC-PAD'2000)},
  YEAR        = { 2000 },
  ADDRESS     = { Sao Pedro, Brazil },
  MONTH       = { October },
}

@InProceedings{BBS:00,
  author =    {J.~Blieberger and B.~Burgstaller and B.~Scholz},
  title =     {{Symbolic Data Flow Analysis for Detecting Deadlocks in Ada Tasking Programs}},
  booktitle = {Proc. of the Ada-Europe International Conference on
                  Reliable Software Technologies},
  year =      {2000},
  address =   {Potsdam, Germany},
  month =     {June}
}

@Unpublished{BS:00,
  author =       {B.~Scholz},
  title =        {{Symbolic Analysis for the VFC Compiler}},
  note =         {Talk given at the Dagstuhl Seminar on Code Optimization},
  year =         {2000},
  month =        sep,
  abstract =     {
        The quality of many optimizations and analyses for
        parallelizing compilers significantly depends on the ability
        to evaluate symbolic expressions and on the amount of
        information available about program variables at arbitrary
        program points. We describe an effective and unified symbolic
        evaluation framework that statically determines the values of
        variables and symbolic expressions, assumptions about and
        constraints between variable values and the condition under
        which control flow reaches a program statement. The framework computes
        program contexts at arbitrary program points, which are a
        novel representation for comprehensive and compact control and
        data flow analysis information. All of our techniques target
        both linear and non-linear expressions and constraints. The efficiency
        of symbolic analysis is highly improved by aggressive
        simplification techniques. To illustrate the effectiveness of our
        approach we present an example for communication vectorization.  }
}


@InProceedings{eK:00,
  author =       {e.~Kuehn and A.~Lederer},
  title =        {{Automatischer Datenabgleich für heterogene, relationale Datenbanken}},
  pages =        {},
  booktitle =    {Konferenzproceedings der AOUG (Austrian Oracle User Group) Jahrestagung 2000},
  month =        {October},
  publisher =    {AOUG},
  address =      {Wien},
  year =         {2000},
}

@InCollection{PuntCOOP-PetriNets00,
  author = 	 {Franz Puntigam},
  title = 	 {Flexible Types for a Concurrent Model},
  booktitle = 	 {Concurrent Object-Oriented Programming and Petri Nets},
  publisher =	 {Springer-Verlag},
  year =	 2000,
  editor =	 {G. Agha and F. {De~Cindio} and G. Rozenberg},
  number =	 2001,
  series =	 {LNCS}
}

@Book{PuntHab00,
  author =	 {Franz Puntigam},
  title = 	 {Concurrent Object-Oriented Programming with Process Types},
  publisher = 	 {{Der Andere Verlag}},
  year = 	 2000,
  address =	 {Osnabr{\"u}ck, Germany}
}


@INPROCEEDINGS{KrTom01,
	AUTHOR      = {Andreas Krall and Philipp Tomsich},
	TITLE       = {{Java} for Large-Scale Scientific Computations?},
	BOOKTITLE   = {LSSC'01, Large-Scale Scientific Computing},
	EDITOR      = {Svetozar Margenov and Jerzy Wasniewski and Plamen Yalamov},
	SERIES      = {LNCS 2179},
	PUBLISHER   = {Springer},
	PAGES       = {228--235},
	ADDRESS     = {Sozopol},
	MONTH       = {June},
	YEAR        = 2001}

@InProceedings{gregg+01,
  author =	 {David Gregg and M.~Anton Ertl and Andreas Krall},
  title =	 {A Fast {Java} Interpreter},
  booktitle =	 {JOSES Workshop at ETAPS'01},
  year =	 {2001},
  editor =	 {Uwe Assmann},
  url =		 {http://www.complang.tuwien.ac.at/papers/gregg+01.ps.gz},
  abstract =	 {The Java virtual machine (JVM) is usually
                  implemented with an interpreter or just-in-time
                  (JIT) compiler. JITs provide the best performance,
                  but must be substantially rewritten for each
                  architecture they are ported to. Interpreters are
                  easier to develop and maintain, need less memory and
                  can be ported to new architectures with almost no
                  changes. The weakness of interpreters is that they
                  are much slower than JITs. This paper describes work
                  in progress on faster Java interpreters. Our goal is
                  to bring interpreter performance to a new higher
                  level by developing new optimisations for modern
                  computer architectures, and by adapting recent
                  research on compilers to interpreters.}
}

@InProceedings{ertl&gregg01,
  author =	 {M.~Anton Ertl and David Gregg},
  title =	 {The Behaviour of Efficient Virtual Machine
                  Interpreters on Modern Architectures},
  booktitle =	 {Euro-Par 2001},
  pages =	 {403--412},
  year =	 {2001},
  publisher =	 {Springer LNCS~2150},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl%26gregg01.ps.gz},
  abstract =	 {Romer et al (ASPLOS 96) examined several
                  interpreters and concluded that they behave much
                  like general purpose integer programs such as
                  gcc. We show that there is an important class of
                  interpreters which behave very
                  differently. Efficient virtual machine interpreters
                  perform a large number of indirect branches
                  (3.2\%--13\% of all executed instructions in our
                  benchmarks, taking up to 61\%-79\% of the cycles on
                  a machine with no branch prediction). We evaluate
                  how various branch prediction schemes and methods to
                  reduce the mispredict penalty affect the performance
                  of several virtual machine interpreters. Our results
                  show that for current branch predictors, threaded
                  code interpreters cause fewer mispredictions, and
                  are almost twice as fast as switch based
                  interpreters on modern superscalar architectures.}
}

@InProceedings{gregg+01hpcn,
  author =	 {David Gregg and M.~Anton Ertl and Andreas Krall},
  title =	 {Implementing an Efficient {Java} Interpreter},
  booktitle =	 {High-Performance Computing and Networking (HPCN
                  Europe 2001)},
  pages =	 {613--620},
  year =	 {2001},
  publisher =	 {Springer LNCS~2110},
  url =          {http://www.complang.tuwien.ac.at/papers/gregg+01hpcn.ps.gz},
  pdf-url =      {http://link.springer-ny.com/link/service/series/0558/papers/2110/21100613.pdf},
  abstract =	 {The Java virtual machine (JVM) is usually
                  implemented with an interpreter or just-in-time
                  (JIT) compiler. JIT compilers provide the best
                  performance, but must be substantially rewritten for
                  each architecture they are ported to. Interpreters
                  are easier to develop and maintain, and can be
                  ported to new architectures with almost no
                  changes. The weakness of interpreters is that they
                  are much slower than JIT compilers. This paper
                  describes work in progress on a highly efficient
                  Java interpreter. We describe the main features that
                  make our interpreter efficient. Our initial
                  experimental results show that an interpreter-based
                  JVM may be only 1.9 times slower than a
                  compiler-based JVM for some important
                  applications. }
}

@InProceedings{ertl01,
  author =	 {M. Anton Ertl},
  title =	 {Threaded Code Variations and Optimizations},
  booktitle =	 {EuroForth 2001 Conference Proceedings},
  pages =	 {49--55},
  year =	 2001,
  url =          {http://www.complang.tuwien.ac.at/papers/ertl01.ps.gz},
  abstract =	 {Forth has been traditionally implemented as indirect
                  threaded code, where the code for non-primitives is
                  the code-field address of the word. To get the
                  maximum benefit from combining sequences of
                  primitives into superinstructions, the code produced
                  for a non-primitive should be a primitive followed
                  by a parameter (e.g., \code{lit} \emph{addr} for
                  variables). This paper takes a look at the steps
                  from a traditional threaded-code implementation to
                  superinstructions, and at the size and speed effects
                  of the various steps. The use of superinstructions
                  gives speedups of up to a factor of 2 on large
                  benchmarks on processors with branch target buffers,
                  but requires more space for the primitives and the
                  optimization tables, and also a little more space
                  for the threaded code.}
}

@InProceedings{gregg+01euroforth,
  author =	 {David Gregg and M. Anton Ertl and John Waldron},
  title =	 {The Common Case in {Forth} Programs},
  booktitle =	 {EuroForth 2001 Conference Proceedings},
  pages =	 {63--70},
  year =	 {2001},
  url1 =	 {http://www.cs.tcd.ie/David.Gregg/euroforth01.ps.gz},
  url =          {http://www.complang.tuwien.ac.at/papers/gregg+01euroforth.ps.gz},
  abstract =	 {Identifying common features in Forth programs is
                  important for those designing Forth machines and
                  optimisers. In this paper we measure the behaviour
                  of six large Forth programs and four small ones. We
                  look at the ratio of user to system code, basic
                  block lengths, common instructions, and common
                  sequences of instructions. Our most important
                  finding is that for most large programs, many
                  (38.4\%--47.6\% statically and 21.8\%--40.9\%
                  dynamically) basic blocks consist of only a single
                  instruction, which hinders optimisation. We also
                  show static measures of frequent instructions and
                  sequences of instructions are more consistent across
                  programs, and may be a better predictor of the
                  behaviour of other programs than dynamic measures.}
}

@Unpublished{ertl01vhs,
  author =	 {M. Anton Ertl},
  title =	 {{Wie werden Computer schneller?}},
  note =	 {Vortrag im Rahmen der VHS-Veranstaltungsreihe ``University meets Public''},
  month =	 feb,
  year =	 {2001},
  url =		 {http://www.complang.tuwien.ac.at/papers/ertl01vhs.ps.gz},
  abstract =	 {Computerhardware wird regelmaessig schneller und
                  leistungsfaehiger. Dieser Vortrag beschreibt diese
                  Entwicklungen, insbesondere in Bezug auf den
                  internen Aufbau der Prozessoren, und ihre
                  Auswirkungen auf die Software.}
}

@Article{PuntCaC01,
  author = 	 {Franz Puntigam},
  title = 	 {Strong types for coordinating active objects},
  journal = 	 {Concurrency and Computation: Practice and Experience},
  year = 	 2001,
  volume =	 13,
  pages =	 {293--326}
}

@Article{PuntFI01,
  author = 	 {Franz Puntigam and Christof Peter},
  title = 	 {Types for Active Objects with Static Deadlock Prevention},
  journal = 	 {Fundamenta Informaticae},
  year = 	 2001,
  volume =	 49,
  pages =	 {1--27}
}


@inproceedings{MS01a,
AUTHOR      = { Mehofer, E. and Scholz, B. },
TITLE       = { {Probabilistic communication optimizations and parallelization
                 for distributed-memory systems} },
BOOKTITLE   = { 9th Euromicro Workshop on Parallel and Distributed Processing (PDP)},
YEAR        = { 2001},
ADDRESS     = { Mantova, Italy },
MONTH       = { February },
}


@inproceedings{MS01b,
AUTHOR      = { Mehofer, E. and Scholz, B. },
TITLE       = {{A  Novel Probabilistic Data-Flow Framework}},
BOOKTITLE   = { International Conference on Compiler Construction (CC)},
YEAR        = { 2001},
ADDRESS     = { Genova, Italy },
MONTH       = { April },
}

@article{FBHLMMS:01,
AUTHOR      = { T.~Fahringer and P.~Blaha and A.~H\"ossinger and J.~Luitz and E.~Mehofer and H.~Moritsch and B.~Scholz },
TITLE       = {{Development and Performance Analysis of Real-World Applications for Distributed and Parallel Architectures}},
  journal =      {Concurrency, Practice and Experience},
  publisher =    { John Wiley \& Sons },
  year =         "2001 "
}

@Article{ertl+02,
  author =	 {M. Anton Ertl and David Gregg and Andreas Krall and
                  Bernd Paysan},
  title =	 {\textsf{vmgen} --- A Generator of Efficient Virtual
                  Machine Interpreters},
  journal =	 {Software---Practice and Experience},
  year =	 {2002},
  volume =	 {32},
  number =	 {3},
  pages =	 {265--294},
  OPTmonth =	 {},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl+02.ps.gz},
  abstract-url = {http://www3.interscience.wiley.com/cgi-bin/abstract/90010508/START},
  keywords =	 {interpreter; virtual machine; generator; stack
                  architecture; superinstruction; byte code},
  abstract =	 {In a virtual machine interpreter, the code for each
                  virtual machine instruction has similarities to code
                  for other instructions. We present an interpreter
                  generator that takes simple virtual machine
                  instruction descriptions as input and generates C
                  code for processing the instructions in several
                  ways: execution, virtual machine code generation,
                  disassembly, tracing, and profiling. The generator
                  is designed to support efficient interpreters: it
                  supports threaded code, caching the top-of-stack
                  item in a register, combining simple instructions
                  into superinstructions, and other optimizations. We
                  have used the generator to create interpreters for
                  Forth and Java. The resulting interpreters are
                  faster than other interpreters for the same
                  languages and they are typically 2-10 times slower
                  than code produced by native-code compilers. We also
                  present results for the effects of the individual
                  optimizations supported by the generator.}
}

@InProceedings{ertl&gregg02,
  author =	 {M. Anton Ertl and David Gregg},
  title =	 {Building an interpreter with \textsf{vmgen}},
  booktitle =	 {Compiler Construction (CC'02)},
  pages =	 {5--8},
  year =	 {2002},
  publisher =	 {Springer LNCS~2304},
  note =	 {Tool Demonstration},
  url =		 {http://www.complang.tuwien.ac.at/papers/ertl%26gregg02.ps.gz},
  abstract =	 {\textsf{Vmgen} automates many of the tasks of
                  writing the virtual machine part of an interpreter,
                  resulting in less coding, debugging and maintenance
                  effort. This paper gives some quantitative data
                  about the source code and generated code for a
                  \textsf{vmgen}-based interpreter, and gives some
                  examples demonstrating the simplicity of using
                  \textsf{vmgen}.}
}

@InProceedings{ertl02,
  author =	 {M. Anton Ertl},
  title =	 {Threaded Code Variations and Optimizations (Extended
                  Version)},
  booktitle =	 {Forth-Tagung 2002},
  year =	 {2002},
  address =	 {Garmisch-Partenkirchen},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl02.ps.gz},
  abstract =	 {Forth has been traditionally implemented as indirect
                  threaded code, where the code for non-primitives is
                  the code-field address of the word. To get the
                  maximum benefit from combining sequences of
                  primitives into superinstructions, the code produced
                  for a non-primitive should be a primitive followed
                  by a parameter (e.g., \code{lit} \emph{addr} for
                  variables). This paper takes a look at the steps
                  from a traditional threaded-code implementation to
                  superinstructions, and at the size and speed effects
                  of the various steps.\comment{It also compares these
                  variants of Gforth to various other Forth
                  implementations on contemporary machines.} The use
                  of superinstructions gives speedups of up to a
                  factor of 2 on large benchmarks on processors with
                  branch target buffers, but requires more space for
                  the primitives and the optimization tables, and also
                  a little more space for the threaded code.}
}

@InProceedings{ertl02ef,
  author = 	 {M. Anton Ertl},
  title = 	 {The Evolution of Vmgen},
  crossref =	 {euroforth02},
  pages =	 {33--37},
  url =		 {http://www.complang.tuwien.ac.at/anton/euroforth2002/papers/ertl.ps.gz},
  note =	 {Slides}
}

@InProceedings{ertl02efb,
  author = 	 {M. Anton Ertl},
  title = 	 {Superinstructions in {Gforth}},
  crossref =	 {euroforth02},
  note =	 {Demonstration only, no paper}
}

@Proceedings{euroforth02,
  title = 	 {18th EuroForth Conference},
  booktitle = 	 {18th EuroForth Conference},
  year = 	 {2002},
  key =		 {EuroForth'02},
  editor =	 {M. Anton Ertl}
}

@InProceedings{Probst02,
  author =	{Mark Probst},
  title =	{Dynamic Binary Translation},
  booktitle =	{UKUUG Linux Developer's Conference 2002},
  year =	{2002}
}

@INPROCEEDINGS{LaKrPu02a ,
	AUTHOR      = {Martin Lackner and Andreas Krall and Franz Puntigam},
	TITLE       = {Supporting Design by Contract in Java},
	BOOKTITLE   = {TOOLS USA 2002},
	EDITOR      = {Christine Mingins},
	PAGES       = {57--76},
	ADDRESS     = {Santa Barbara},
	MONTH       = {July},
	YEAR        = {2002},
	URL         = {http://www.jot.fm/issues/issue_2002_08/article4},
	ABSTRACT    = {
                  Design by Contract is a valuable design method for trusted
                  software components. Eiffel shows how to provide appropriate
                  language support for it. However, no such concepts currently
                  exist in Java. Full integration of them into Java may help
                  to improve and guarantee the quality of Java classes. We
                  briefly compare several approaches to extend Java in this
                  way and present our model and a compiler that translates
                  extended Java code into JVM byte code. Our Java extension
                  integrates preconditions, postconditions, and invariants as
                  in Eiffel while respecting the characteristics of Java. The
                  evaluation shows that Design by Contract can be added
                  effciently to Java while keeping compatibility.}
}


@ARTICLE{LaKrPu02b ,
	AUTHOR      = {Martin Lackner and Andreas Krall and Franz Puntigam},
	TITLE       = {Supporting Design by Contract in Java},
	JOURNAL     = {Journal of Object Technology},
	VOLUME      = {1},
	NUMBER      = {3},
	PAGES       = {57--76},
	YEAR        = {2002},
	URL         = {http://www.jot.fm/issues/issue_2002_08/article4},
	ABSTRACT    = {
                  Design by Contract is a valuable design method for trusted
                  software components. Eiffel shows how to provide appropriate
                  language support for it. However, no such concepts currently
                  exist in Java. Full integration of them into Java may help
                  to improve and guarantee the quality of Java classes. We
                  briefly compare several approaches to extend Java in this
                  way and present our model and a compiler that translates
                  extended Java code into JVM byte code. Our Java extension
                  integrates preconditions, postconditions, and invariants as
                  in Eiffel while respecting the characteristics of Java. The
                  evaluation shows that Design by Contract can be added
                  effciently to Java while keeping compatibility.}
}


@INCOLLECTION{KrHo02,
	AUTHOR      = {Andreas Krall and Nigel Horspool},
	EDITOR      = {Y.N. Skrikant and Priti Shankar},
	BOOKTITLE   = {The Compiler Design Handbook:
	               Optimizations and Machine Code Generation},
	TITLE       = {Optimizations for Object-Oriented Languages},
	PUBLISHER   = {CRC Press},
	PAGES       = {219--246},
	MONTH       = {September},
	YEAR        = {2002}
}


@INPROCEEDINGS{PrKrSch02,
	AUTHOR      = {Mark Probst and Andreas Krall and Bernhard Scholz},
	TITLE       = {Register Liveness Analysis for Optimizing Binary Translation},
	BOOKTITLE   = {Working Conference on Reverse Engineering},
	EDITOR      = {Elizabeth Burd and Arie van Deursen},
	PUBLISHER   = {IEEE},
	ADDRESS     = {Richmond},
	MONTH       = {October},
	YEAR        = 2002}

@inproceedings{SM02,
AUTHOR      = { Bernhard Scholz and Eduard Mehofer },
TITLE       = { {Dataflow frequency analysis based on whole program paths} },
BOOKTITLE   = { Proceedings of the IEEE International Conference on Parallel
                Architectures and Compilation Techniques (PACT-2002) },
YEAR        = { 2002 },
ADDRESS     = { Charlottesville, VA },
MONTH       = { September },
}

@inproceedings{SE02,
AUTHOR      = { Bernhard Scholz and Erik Eckstein},  
TITLE       = { {Register Allocation for Irregular Architectures}}, 
BOOKTITLE   = { Proceedings of the Joint-Conference on Languages, 
                Compilers, and Tools for Embedded Systems and Software 
                and Compilers for Embedded Systems (LCTES/SCOPES)},
YEAR        = { 2002 },
ADDRESS     = { Berlin, Germany },
MONTH       = { June }
}

@Unpublished{S02a,
AUTHOR      =  { Bernhard Scholz},
TITLE       =  { { Probabilistic Data Flow Analysis and its Applications }},
NOTE        =  { Talk given at the French National Institute for Research 
                 in Computer Science and Control (INRIA) },
YEAR        =  { 2002 },
ADDRESS     =  { Paris, France },
MONTH       =  { February },
URL         =  { http://www-rocq.inria.fr/a3/seminars/scholz.html },
ABSTRACT    = "Classical data flow analysis determines whether a data
               flow fact may hold or does not hold at some program point. 
               Probabilistic data flow analysis (PDFA) systems compute a 
               range, i.e. a probability, with which a data flow fact will 
               hold at some program point.  In this talk a novel, practicable 
               framework for probabilistic data flow problems is presented and
               some applications for PDFA are given.  Effectiveness and 
               efficiency of our approach are shown by performance numbers of 
               the SPECint95 benchmark suite."
}

@Unpublished{S02b,
AUTHOR      =  { Bernhard Scholz },
TITLE       =  { { Speculative Partial Redundancy Elimination }},
NOTE        =  { Workshop on Compiler-Driven Performance (co-loacted with CASCON'02) },
YEAR        =  { 2002 },
MONTH       =  { October },
ADDRESS     =  { Toronto, Canada}
} 

@Article{PuntCompLang02,
  author = 	 {Franz Puntigam},
  title = 	 {State inference for dynamically changing interfaces},
  journal = 	 {Computer Languages},
  year = 	 2002,
  volume =	 27,
  pages =	 {163--202}
}

@Article{ertl03vd,
  author = 	 {M. Anton Ertl},
  title = 	 {{Threaded Code -- Varianten und Optimierungen (Kurzfassung)}},
  journal = 	 {Vierte Dimension -- Das FORTH-Magazin},
  year = 	 {2003},
  volume =	 {19},
  number =	 {1},
  pages =	 {12--15},
  annote =	 {A shortened, German version of \cite{ertl02}}
}

@Unpublished{ertl03dagstuhl,
  author =	 {M. Anton Ertl},
  title =	 {Optimizing Interpreters},
  note =	 {Talk given at the Dagstuhl Seminar 3071: Emerging
                  Technologies: Can Optimization Technology meet their
                  Demands?},
  year =	 {2003},
  month =	 feb
}

@InProceedings{ertl&gregg03,
  author =       "M. Anton Ertl and David Gregg",
  title =        "Optimizing Indirect Branch Prediction Accuracy in Virtual Machine Interpreters",
  crossref =     "sigplan03",
  OPTpages =	 "",
  url =		 "http://www.complang.tuwien.ac.at/papers/ertl%26gregg03.ps.gz",
  abstract =     "Interpreters designed for efficiency execute a huge
                  number of indirect branches and can spend more than
                  half of the execution time in indirect branch
                  mispredictions.  Branch target buffers are the best
                  widely available\mn{on all recent general-purpose
                  machines?} form of indirect branch prediction;
                  however, their prediction accuracy for existing
                  interpretes is only 2\%--50\%.  In this paper we
                  investigate two methods for improving the prediction
                  accuracy of BTBs for interpreters: replicating
                  virtual machine (VM) instructions and combining
                  sequences of VM instructions into superinstructions.
                  We investigate static (interpreter build-time) and
                  dynamic (interpreter run-time) variants of these
                  techniques and compare them and several combinations
                  of these techniques.  These techniques can eliminate
                  nearly all of the dispatch branch mispredictions,
                  and have other benefits, resulting in speedups by a
                  factor of up to 3.17 over efficient threaded-code
                  interpreters, and speedups by a factor of up to 1.3
                  over techniques relying on superinstructions alone."
}

@Proceedings{sigplan03,
  booktitle =    "SIGPLAN Conference on Programming Language
                  Design and Implementation (PLDI'03)",
  title =        "SIGPLAN Conference on Programming Language
                  Design and Implementation (PLDI'03)",
  year =         "2003",
  key =          "PLDI '03"
}

@Proceedings{ivme03,
  title = 	 {Interpreters, Virtual Machines and Emulators
                  (IVME~'03)},
  booktitle = 	 {Interpreters, Virtual Machines and Emulators
                  (IVME~'03)},
  year = 	 {2003},
  key =		 {IVME~'03},
  editor =	 {M. Anton Ertl},
  url = 	 {http://www.complang.tuwien.ac.at/anton/ivme03/proceedings/ivme.ps.gz},
  url2 = 	 {http://portal.acm.org/toc.cfm?id=858570&type=proceeding}
}

@InProceedings{ertl&gregg03euroforth,
  author =	 {M. Anton Ertl and David Gregg},
  title =	 {Implementation Issues for Superinstructions in
                  {Gforth}},
  booktitle =	 {EuroForth 2003 Conference Proceedings},
  OPTpages =	 {},
  year =	 {2003},
  URL =          {http://www.complang.tuwien.ac.at/papers/ertl%26gregg03euroforth.ps.gz},
  abstract =	 {Combining Forth primitives into superinstructions
                  provides nice speedups. Several approaches to
                  superinstructions were explored in the Gforth
                  project. This paper discusses the effects of these
                  approaches on performance, compilation time,
                  implementation effort, and on programming tools such
                  as the decompiler and backtracing.}
}

@Article{ertl&gregg03jilp,
  author =	 {M. Anton Ertl and David Gregg},
  title =	 {The Structure and Performance of \emph{Efficient}
                  Interpreters},
  journal =	 {The Journal of Instruction-Level Parallelism},
  year =	 {2003},
  volume =	 {5},
  month =	 nov,
  url =         {http://www.complang.tuwien.ac.at/papers/ertl%26gregg03jilp.ps.gz},
  url2 =	 {http://www.jilp.org/vol5/v5paper12.pdf},
  note =	 {http://www.jilp.org/vol5/},
  abstract =	 {Interpreters designed for high general-purpose
                  performance typically perform a large number of
                  indirect branches (3.2\%--13\% of all executed
                  instructions in our benchmarks). These branches
                  consume more than half of the run-time in a number
                  of configurations we simulated. We evaluate how
                  accurate various existing and proposed branch
                  prediction schemes are on a number of interpreters,
                  how the mispredictions affect the performance of the
                  interpreters and how two different interpreter
                  implementation techniques perform with various
                  branch predictors. We also suggest various ways in
                  which hardware designers, C compiler writers, and
                  interpreter writers can improve the performance of
                  interpreters.}
}

@InProceedings{ertl&gregg04ivme,
  author =	 {M. Anton Ertl and David Gregg},
  title =	 {Combining Stack Caching with Dynamic
                  Superinstructions},
  crossref =	 {ivme04},
  pages =	 {7--14},
  URL =          {http://www.complang.tuwien.ac.at/papers/ertl%26gregg04ivme.ps.gz},
  abstract =	 {Dynamic superinstructions eliminate most of the
                  interpreter dispatch overhead. This results in a
                  higher proportion of interpreter time spent in stack
                  accesses (on a stack-based virtual machine). Stack
                  caching reduces the stack access overhead. Each of
                  these optimizations provides more speedup, if the
                  other one is applied, too. Combining these
                  optimizations also opens additional opportunities:
                  we can insert stack state transitions without
                  dispatch cost; this reduces the number of necessary
                  VM instruction instances significantly. A
                  shortest-path search can find the optimal sequence
                  of state transitions and VM instructions. In this
                  paper we describe an implementation of static stack
                  caching employing these ideas. We also represent
                  empirical results for our implementation, resulting
                  in a speedup of up to 58\% over a version that keeps
                  one value in registers all the time.}
}

@Proceedings{ivme04,
  booktitle = 	 {Interpreters, Virtual Machines and Emulators (IVME '04)},
  title = 	 {Interpreters, Virtual Machines and Emulators (IVME '04)},
  year = 	 {2004}
}

@InProceedings{ertl&gregg04pact,
  author =	 {M. Anton Ertl and David Gregg},
  title =	 {Retargeting {JIT} compilers by using {C}-compiler
                  generated executable code},
  crossref =	 {pact04},
  pages =	 {41--50},
  URL =          {http://www.complang.tuwien.ac.at/papers/ertl%26gregg04pact.ps.gz},
  abstract =	 {JIT compilers produce fast code, whereas
                  interpreters are easy to port between
                  architectures. We propose to combine the advantages
                  of these language implementation techniques as
                  follows: we generate native code by concatenating
                  and patching machine code fragments taken from
                  interpreter-derived code (generated by a C
                  compiler); we completely eliminate the interpreter
                  dispatch overhead and accesses to the interpreted
                  code by patching jump target addresses and other
                  constants into the fragments. In this paper we
                  present the basic idea, discuss some issues in more
                  detail, and present results from a proof-of-concept
                  implementation, providing speedups of up to 1.87
                  over the fastest previous interpreter-based
                  technique, and performance comparable to simple
                  native-code compilers. The effort required for
                  retargeting our implementation from the 386 to the
                  PPC architecture was less than a person-day.}
}

@Proceedings{pact04,
  title = 	 {Parallel Architecture and Compilation Techniques (PACT' 04)},
  booktitle = 	 {Parallel Architecture and Compilation Techniques (PACT' 04)},
  year = 	 {2004},
  key =		 {PACT '04},
  OPTpublisher = {IEEE}
}

@InProceedings{ertl04euroforth,
  author =	 {M. Anton Ertl},
  title =	 {Forth Family Tree and Timeline},
  booktitle =	 {EuroForth 2004 Conference Proceedings},
  pages =	 {1--4},
  year =	 {2004},
  OPTnote =	 {not refereed},
  URL =          {http://www.complang.tuwien.ac.at/forth/family-tree/}
}

@InProceedings{gregg&ertl04euroforth,
  author =	 {David Gregg and M. Anton Ertl},
  title =	 {Inlining in {Gforth}: Early Experiences},
  booktitle =	 {EuroForth 2004 Conference Proceedings},
  pages =        {33--40},
  year =	 {2004},
  URL =          {http://www.complang.tuwien.ac.at/papers/gregg%26ertl04euroforth.ps.gz},
  OPTnote =	 {not refereed},
  abstract =	 {Many optimizations are easier or more effective for
                  straight-line code (basic blocks). Straight-line
                  code in Forth is limited mainly by calls and
                  returns. Inlining eliminates calls and returns,
                  which in turn makes the basic blocks longer, and
                  increases the effectiveness of other
                  optimizations. In this paper we present a first
                  prototype implementation of ininlining for Gforth.}
}

@InProceedings{Punt03a,
  author    = {Franz Puntigam},
  title     = {State Information in Statically Checked Interfaces},
  booktitle = {Eighth International Workshop on Component-Oriented Programming},
  year      = 2003,
  address   = {Darmstadt, Germany},
  month     = jul,
  url       = "http://www.complang.tuwien.ac.at/franz/papers/Punt03a.ps.gz"
}

@InProceedings{Punt03b,
  author    = {Franz Puntigam},
  title     = {Synchronization with Type Variables},
  booktitle = {Workshop on Object-oriented Language Engineering for the Post-Java Era},
  year      = 2003,
  address   = {Darmstadt, Germany},
  month     = jul,
  url       = "http://www.complang.tuwien.ac.at/franz/papers/Punt03b.ps.gz"
}

@Unpublished{Punt03c,
  author =	 {Franz Puntigam},
  title =	 {State Information in Types or No Memory Consistency Model is the Better Consistency Model},
  note =	 {Talk given at the Dagstuhl Seminar 03431: Hardware and Software Consistency Models: Programmability and Performance},
  year =	 {2003},
  month =	 oct
}

@InProceedings{MS-WOMPAT03,
  author     = "Daniel Quinlan and Markus Schordan and Qing Yi and Bronis de Supinski",
  title      = "A {C++} Infrastructure for Automatic Introduction and Translation of {OpenMP} Directives",
  booktitle  = "WOMPAT'03: Workshop on OpenMP Applications and Tools",
  series    = {Lecture Notes in Computer Science},
  volume     = {2716},
  pages      = "13--25",
  month      = jun,
  publisher  = "Springer Verlag",
  year       = "2003",
  isbn       = {3-540-40435-X},
  comment    = {http://www.informatik.uni-trier.de/~ley/db/conf/wompat/wompat2003.html},
  URL        = "http://springerlink.com/openurl.asp?genre=article&issn=0302-9743&volume=2716&spage=13",
}

@InProceedings{MS-JMLC03,
  author =       "Markus Schordan and Daniel Quinlan",
  title =        "A Source-To-Source Architecture for User-Defined Optimizations",
  booktitle =      "JMLC'03: Joint Modular Languages Conference",
  series    = {Lecture Notes in Computer Science},
  volume =       {2789},
  month =        aug,
  publisher =    "Springer Verlag",
  year =         "2003",
  pages =        "214--223",
  URL   =        "http://springerlink.com/openurl.asp?genre=article&issn=0302-9743&volume=2789&spage=214"
}

@InProceedings{MS-Europar03,
  author = "Michael Gerndt and Chau-Wen Tseng and Michael O'Boyle and Markus Schordan (topic chairs)",
  title = "Topic 4 Compilers for High Performance",
  booktitle = "Euro-Par 2003 Parallel Processing: 9th International Euro-Par Conference",
  series = "Lecture Notes in Computer Science",
  volume = "2790",
  publisher = "Springer Verlag",
  mon =   aug,
  year = "2003",
  pages = "241",
  comment = "http://www.springerlink.com/index/7UTCVFAG5XMWNGLD",
}

@InProceedings{MS-LCPC03,
  author =       "Dan Quinlan and Markus Schordan and Qing Yi and Bronis de Supinski",
  title =        "Semantic-Driven Parallelization of Loops Operating on User-Defined Containers",
  booktitle =    "LCPC'03: 16th Annual Workshop on Languages and Compilers for Parallel Computing",
  series    =    "Lecture Notes in Computer Science",
  volume =       "2958",
  pages =        "524--538",
  publisher =    "Springer Verlag",
  mon =          may,
  year =         "2004",
  ULR =          "http://www.springerlink.com/openurl.asp?genre=volume&id=doi:10.1007/b95707",
}

@article{MS-QSMK04CPA,
  author =       "Daniel Quinlan and Markus Schordan and Brian Miller and Markus Kowarschik",
  title =        "Parallel Object-Oriented Framework Optimization",
  journal =    "Concurrency and Computation: Practice and Experience",
  pages =        "293--302",
  publisher =    "Wiley InterScience",
  volume =       "16, Issue 2-3",
  mon =          feb,
  year =         "2004",
  issn =         "1532-0634",
  URL =           "http://www3.interscience.wiley.com/search/allsearch?mode=viewselected&product=journal&ID=106599657&view_selected.x=46&view_selected.y=8&view_selected=view_selected"
}

@inproceedings{KR-sblp03,
   author = {Knoop, J. and R\"uthing, O.},
   title = {Constant Propagation on Predicated Code},
   booktitle = {Proceedings of the 7th Brazilian Symposium on Programming 
   Languages $($SBLP 2003$)$ $($Ouro Preto, MG, Brazil, 
   May 28 - 30, 2003$)$},  
   year = {2003},
   pages = {135 - 148}
 }



@article{KR-jucs03,
   author = {Knoop, J. and R\"uthing, O.},
   title = {Constant Propagation on Predicated Code},
   journal = "Journal of Universal Computer Science",
   volume = {9},
   number = {8},
   year = {2003},
   pages = {829 - 850},
   note = {(Special issue devoted to SBLP'03)}
}


@book{KZ-entcs03,
   editor = {Knoop, J. and Zimmermann, W.},
   title = {Proceedings of the 2nd International 
            Workshop on Compiler Optimization Meets Compiler 
            Verification (COCV 2003)},
   series = "Electronic Notes in Theoretical 
              Computer Science",
   volume = {82(2)},
   year = {2003},
   address = {Warsaw, Poland},
   date = {April},
}


@book{KZ-jucs03,
   editor = {Knoop, J. and Zimmermann, W.},
   title = {Special Issue on Compiler 
            Optimization meets Compiler Verification 
            (COCV 2002)},
   series = "Journal of Universal Computer Science",
   volume = {9(3)},
   year = {2003}
}


@Unpublished{Kn-hagen03,
  author =       {Knoop, J.},
  title =        {Code Size vs.~Run-Time Optimization: Towards Getting Both},
  note =         {Talk (Colloquim) given at the FernUniversit\"at in Hagen, Germany},
  year =         {2003},
  month =        May
}


@Unpublished{Kn-santacruz03,
  author =       {Knoop, J.},
  title =        {Simple Constants and Beyond: The Beauty of Value 
                  Graphs for Constant Propagation},
  note =         {Talk given at the 41st Meeting of the IFIP Working 
                  Group 2.4 Software Implementation Technology,
                  Santa Cruz, California, USA},
  year =         {2003},
  month =        August
}


@Unpublished{Kn-dagstuhl03,
  author =       {Knoop, J.},
  title =        {Code Size vs. Execution-Time Optimization --- Towards 
                  Getting Both},
  note =         {Talk given at the Dagstuhl-Seminar 03071 on "Emerging 
                  Technologies: Can Optimization Technology Meet Their 
                  Demands?", Internationales Begegnungs- und 
                  Forschungszentrum f\"ur Informatik, Schlo{\ss} 
                  Dagstuhl, Germany},
  year =         {2003},
  month =        February
}


@Unpublished{Kn-badhonnef03,
  author =       {Knoop, J.},
  title =        {Constant Propagation on the Value Graph: Simple
                  Constants and Beyond},
  note =         {Talk given at the 20th Workshop der GI-Fachgruppe 
                  2.1.4 ''Programmiersprachen und Rechenkonzept''
  Physikzentrum Bad Honnef, Germany},
  year =         {2003},
  month =        May
}

@article{evakuehn2003a,
  author =       "Eva K{\"u}hn",
  title =        "The Zero-Delay Data Warehouse: Mobilizing Heterogeneous
Databases",
  journal =    "Proceedings of the Very Large Databases Conference (VLDB)",
  publisher =    "ACM, IEEE",
  mon =          sep,
  year =         "2003",
}

@Proceedings{Krall:2003:SCE,
  editor =       "Andreas Krall",
  booktitle =    "Software and Compilers for Embedded Systems: 7th
                 International Workshop, SCOPES 2003, Vienna, Austria,
                 September 24--26, 2003: Proceedings",
  title =        "Software and Compilers for Embedded Systems: 7th
                 International Workshop, {SCOPES 2003}, Vienna, Austria,
                 September 24--26, 2003: Proceedings",
  volume =       "2826",
  publisher =    "Springer-Verlag",
  address =      "New York, NY, USA",
  pages =        "xi + 402",
  year =         "2003",
  CODEN =        "LNCSD9",
  ISBN =         "3-540-20145-9",
  ISSN =         "0302-9743",
  LCCN =         "QA76.6",
  bibdate =      "Thu Nov 11 19:26:30 MST 2004",
  series =       "Lecture Notes in Computer Science",
  URL =          "http://link.springer-ny.com/link/service/series/0558/tocs/t2826.htm",
  acknowledgement = ack-nhfb,
  doi =          "10.1007/b13482",
}

@Article{sipkova2003a:2003:EDM,
  author =       "Siegfried Benkner and Viera Sipkova",
  title =        "Exploiting Distributed-Memory and Shared-Memory
                 Parallelism on Clusters of {SMPs} with Data Parallel
                 Programs",
  journal =      "International Journal of Parallel Programming",
  volume =       "31",
  number =       "1",
  pages =        "3--19",
  month =        feb,
  year =         "2003",
  CODEN =        "IJPPE5",
  ISSN =         "0885-7458",
  bibdate =      "Sat Jan 24 14:51:20 MST 2004",
  URL =          "http://ipsapp007.kluweronline.com/content/getfile/4773/31/2/abstract.htm;
                 http://ipsapp007.kluweronline.com/content/getfile/4773/31/2/fulltext.pdf",
  acknowledgement = ack-nhfb,
}

@Article{Scholz-Mehofer-Horspool/03,
  author =       "Bernhard Scholz and Eduard Mehofer and Nigel
                 Horspool",
  title =        "Predicated partial redundancy elimination using a cost
                 analysis",
  journal =      "Parallel Processing Letters",
  pages =        "525--536",
  year =         "2003",
  number =       "4",
  volume =       "13",
  keywords =     "partial redundancy elimination (pre), probabilistic
                 data-flow analysis (pdfa), predication",
  abstract =     "Partial redundancy elimination (PRE) is a key
                 technology for modern compilers. However traditional
                 approaches are conservative and fail to exploit many
                 opportunities for optimization. New PRE approaches
                 which greatly increase the number of eliminated
                 redundancies have been developed. However, they either
                 cause the code size to explode or they cannot handle
                 statements with side-effects. In this paper we describe
                 a predicated partial redundancy elimination (PPRE)
                 approach which can potentially remove all partial
                 redundancies. To avoid performance overheads caused by
                 predication, PPRE is applied selectively based on a
                 cost model. The cost analysis presented in the paper
                 utilizes probabilistic data-flow information to decide
                 whether PPRE is profitable for each instance of a
                 partially redundant computation. Refinements of the
                 basic PPRE transformation are described in detail. In
                 contrast to some other approaches our transformation is
                 strictly semantics preserving. (Copyright 2004 World
                 Scientific Publishing)",
  editor =       "M. Cosnard",
  publisher =    "World Scientific Publishing Co.",
  address =      "New Jersey-London-Singapore-Hong Kong",
  URL =          "http://dx.doi.org/10.1142/S0129626403001483",
  acknowledgement = "LEABib --- Lehrstuhl f{\"u}r Effiziente Algorithmen
                 der Technischen Universit{\"a}t M{\"u}nchen.
                 http://wwwmayr.informatik.tu-muenchen.de/leabib",
  cdate =        "1970-01-01",
  mdate =        "1970-01-01",
}

@Book{Scholz:2003:ASA,
  author =       "Thomas Fahringer and Bernhard Scholz",
  booktitle =    "Advanced symbolic analysis for compilers: new
                 techniques and algorithms for symbolic program analysis
                 and optimization",
  title =        "Advanced symbolic analysis for compilers: new
                 techniques and algorithms for symbolic program analysis
                 and optimization",
  volume =       "2628",
  publisher =    "Springer-Verlag",
  address =      "New York, NY, USA",
  pages =        "xii + 129",
  year =         "2003",
  CODEN =        "LNCSD9",
  ISBN =         "3-540-01185-4 (softcover)",
  ISSN =         "0302-9743",
  LCCN =         "QA76.76.C65 .F34 2003",
  bibdate =      "Thu Aug 21 09:09:03 MDT 2003",
  note =         "Also available via the World Wide Web.",
  series =       "Lecture Notes in Computer Science",
  URL =          "http://link.springer-ny.com/link/service/series/0558/tocs/t2628.htm",
  acknowledgement = ack-nhfb,
  doi =          "????",
  keywords =     "compilers (computer programs); computer algorithms",
}


@article{krall2003b,
  author =       "Andreas Krall and Christian Panis and G{\"u}nther Laure and Wolfgang Lazian and Herbert Gr{\"u}nbacher and Jari Nurmi",
  title =        "Design Space Exploration for Configurable {D}{S}{P} Core",
  journal =      "International Signal Processing Conference",
  year =         "2003",
  comment =      "Dallas, talk: C. Panis",
}

@article{scholz2003c,
author="Johann Blieberger and Bernd Burgstaller and Bernhard Scholz",
title="Busy Wait Analysis",
journal="Reliable Software Technologies - Ada-Europe",
comment ="Toulouse, France; 16.06.2003 - 20.06.2003",
series="Lecture Notes in Computer Science - Ada-Europe",
publisher="Springer-Verlag",
volume="2655",
year="2003",
isbn="3-540-40376-0",
pages="142--152",
}

@article{scholz2003d,
author="Ulrich Hirnschrott and Andreas Krall and Bernhard Scholz",
title="Graph -coloring vs.Optimal Register Allocation for Optimizing Compilers",
journal="Proceedings of the Joint Modular Language Conference",
comment="Klagenfurt; 31.08.2003",
year="2003",
isbn="3-540-40796-0",
pages="202--213",
comment="talk:Hirnschrott"
}

@article{krall2003e,
author="Ivan Pryanishnikov and Andreas Krall and Nigel Horspool",
title="Pointer Alignment Analysis for Processors with {S}{I}{M}{D} Instructions",
journal="5th Workshop on Media and Streaming Processors at Micro'03",
comment="San Diego; 31.12.2003",
year="2003",
pages="50--57",
comment="talk:Pryanishnikov",
}

@article{sipkova2003b,
author="Vera Sipkova",
title="Efficient Variable Allocation to Dual Memory Banks of {D}{S}{P}s",
journal="7th International Workshop on Software and Compilers (SCOPES'03)",
comment="Wien; 24.09.2003 - 26.09.2003",
publisher="Springer Verlag",
year="2003",
isbn="3-540-20145-9",
pages="359--372",
comment="talk:Sipkova",
}

@article{hirnschrott2003a,
author="Ulrich Hirnschrott",
title="{V}{L}{I}{W} Operation Refinement for Reducing Energy Consumption",
journal="International Symposium on System-on Chip",
publisher="IEEE",
comment="Tampere, Finland",
year="2003",
isbn="0-7803-8160-2",
pages="131--134",
comment="talk:Hirnschrott",
}

@INPROCEEDINGS{Panis+04vlsi,
        AUTHOR      = {Christian Panis and Ulrich Hirnschrott and Andreas Krall and
                       Gunther Laure and Wolfgang Lazian and Jari Nurmi},
        TITLE       = {{FSEL} - Selective Predicated Execution for a Configurable {DSP} Core},
        BOOKTITLE   = {Annual Symposium on VLSI},
        EDITOR      = {Asim Smailagic},
        PAGES       = {317--320},
        PUBLISHER   = {IEEE},
        ADDRESS     = {Lafayette, Louisiana},
        MONTH       = {February},
        YEAR        = 2004,
        ISBN        = {0-7695-2097-9}
}

@INPROCEEDINGS{Panis+04sac,
        AUTHOR      = {Christian Panis and Ulrich Hirnschrott and
                       Gunther Laure and Wolfgang Lazian and Jari Nurmi},
        TITLE       = {{DSPxPlore} -- Design Space Exploration Methodology for an
                       Embedded {DSP} Core},
        BOOKTITLE   = {Symposium on Applied Computing},
        EDITOR      = {Alessio Bechini},
        PAGES       = {876--883},
        PUBLISHER   = {ACM},
        ADDRESS     = {Nicosia},
        MONTH       = {March},
        YEAR        = 2004}

@INPROCEEDINGS{Panis+04soc,
        AUTHOR      = {Christian Panis and Ulrich Hirnschrott and
                       Andreas Krall and Stefan Farfeleder and
                       Gunther Laure and Wolfgang Lazian and Jari Nurmi},
        TITLE       = {A Scalable {DSP} Core for {SoC} Applications},
        BOOKTITLE   = {International Symposium on System-on Chip (SOC 2004)},
        EDITOR      = {Jari Nurmi and Jarmo Takkala and Timo D. H\"am\"al\"ainen},
        ISBN        = {0-7803-8558-6},
        PAGES       = {85--88},
        PUBLISHER   = {IEEE},
        ADDRESS     = {Tampere},
        MONTH       = {November},
        YEAR        = 2004,
        comment     = "talk:Laure"
        }

@article{Nerina04,
 author = {Xavier Vera and Nerina Bermudo and Josep Llosa and Antonio Gonzalez},
 title = {A fast and accurate framework to analyze and optimize cache memory behavior},
 journal = {ACM Trans. Program. Lang. Syst.},
 volume = {26},
 number = {2},
 year = {2004},
 issn = {0164-0925},
 pages = {263--300},
 doi = {http://doi.acm.org/10.1145/973097.973099},
 publisher = {ACM Press},
 }

@Article{Krall+04micro,
  author =       {Andreas Krall and Ulrich Hirnschrott and Christian Panis and Ivan Pryanishnikov},
  title =        {x{DSP}core: {A} {C}ompiler-{B}ased {C}onfigureable {D}igital {S}ignal {P}rocessor},
  journal =      {IEEE Micro},
  year =         {2004},
  OPTkey =       {},
  volume =       {24},
  number =       {4},
  pages =        {67-78},
  month =        {July/August},
  OPTnote =      {},
  OPTannote =    {},
}
@article{knoop04a,
 author = {Jens Knoop and Oliver R\&\#252;thing and Bernhard Steffen},
 title = {Lazy code motion},
 journal = {SIGPLAN Not.},
 volume = {39},
 number = {4},
 year = {2004},
 issn = {0362-1340},
 pages = {460--472},
 doi = {http://doi.acm.org/10.1145/989393.989439},
 publisher = {ACM Press},
 }
@inproceedings{scholz04,
 author = {Bernhard Scholz and Nigel Horspool and Jens Knoop},
 title = {Optimizing for space and time usage with speculative partial redundanc
y elimination},
 booktitle = {LCTES '04: Proceedings of the 2004 ACM SIGPLAN/SIGBED conference o
n Languages, compilers, and tools},
 year = {2004},
 isbn = {1-58113-806-7},
 pages = {221--230},
 location = {Washington, DC, USA},
 doi = {http://doi.acm.org/10.1145/997163.997195},
 publisher = {ACM Press},
 }

@book{knoop04b,
author = {Jens Knoop and G. Necula and Wolf Zimmermann},
title =  {Perliminary Proceedings of the 3rd International Workshop on "Compiler Optimization meets Compiler Verification},
publisher = {University Barcelona},
year = 2004,
address = {Barcelona, Spain}
}

@MastersThesis{eller05,
  author =	 {Helmut Eller},
  title =	 {Optimizing Interpreters with Superinstructions},
  school =	 {TU Wien},
  year =	 {2005},
  type =	 {Diplomarbeit},
  url =          {http://www.complang.tuwien.ac.at/Diplomarbeiten/eller05.ps.gz},
  abstract =	 {Superinstructions can be used to make virtual
                  machine (VM) interpreters faster. A superinstruction
                  is a combination of simpler VM instructions which
                  can be executed faster than the corresponding
                  sequence of simpler VM instructions, because the
                  interpretative overhead, like instruction dispatch
                  and argument fetching, is reduced. This work
                  discusses the following three topics related to
                  superinstructions. First, I present some heuristics
                  to choose superinstructions. I evaluated the
                  heuristics for Forth and Java programs. If the
                  number of allowed superinstructions was very large,
                  $> 1000$, then the heuristic which chooses all
                  possible subsequences up to length 4 achieved the
                  best results. If the number of allowed
                  superinstructions was more limited, then a heuristic
                  which favors short sequences and sequences which
                  occur in many different programs and many different
                  basic blocks performed better than the
                  others. Second, I compare a simple greedy algorithm
                  and an optimal algorithm to cover a program with
                  superinstructions. I found that the greedy algorithm
                  achieves almost optimal results. Finally, I compare
                  superinstructions with non-sequential patterns. In
                  my experiments, superinstructions performed slightly
                  better than non-sequential patterns.}
}

@MastersThesis{strauss-haslinglehner05,
  author =	 {Stefan Strauß-Haslinglehner},
  title =	 {Schnelles Starten grosser Programme},
  school =	 {TU Wien},
  year =	 {2005},
  type =	 {Diplomarbeit},
  url =          {http://www.complang.tuwien.ac.at/Diplomarbeiten/strauss-haslinglehner05.ps.gz},
  abstract =	 {This thesis shows a way to reduce application
                  startup time. This is done by preloading the data
                  needed during application startup in an order
                  optimized for block device I/O performance. The
                  thesis describes the design and implementation of a
                  Linux kernel module, consisting of a recording unit,
                  a database and a preloading unit. The recording unit
                  is responsible for recording access patterns on
                  block-devices at application startup. The recorded
                  access patterns are optimized and stored
                  persistently in the database. The preloading unit is
                  responsible for preloading the corresponding access
                  patterns on future application startups.\par The
                  effect of preloading was analyzed using several
                  applications on two test systems. Measurements
                  showed a speedup by a factor of 1.5 to 1.8.},
  kurzfassung =	 {Diese Arbeit zeigt eine Möglichkeit zur Reduktion
                  der Zeit für den Kaltstart von Anwendungen durch
                  Preloading der für den Start benötigten
                  Blockgerätedaten. Dabei wird die Zugriffsfolge des
                  Demand Pagings so umgeordnet, dass die Blockgeräte
                  I/O Performance beim Preloading optimiert wird.\par
                  Gezeigt wird das Design und die Implementierung
                  eines Linux Kernelmoduls, bestehend aus einer
                  Recording Einheit, einer Datenbank und einer
                  Preloading Einheit. Das Recording zeichnet die
                  Zugriffsfolge eines Anwendungsstarts auf die
                  Blockgeräte des Systems auf. Die so gewonnenen Daten
                  werden persistent in einer Datenbank gespeichert. Zu
                  Beginn zukünftiger Starts der Anwendung führt das
                  Preloading die optimierte Zugriffsfolgen
                  konzentriert aus.\par Die Auswirkung des Preloadings
                  wurde auf zwei unterschiedlichen Testplattformen mit
                  verschiedenen Anwendungen untersucht. Messungen
                  zeigten dabei eine Reduktion der Startzeit um den
                  Faktor 1.5 bis 1.8.}
}

@InProceedings{ertl&gregg05,
  author =       {M. Anton Ertl and David Gregg},
  title =        {Stack Caching in {Forth}},
  crossref =     {euroforth05},
  pages =        {6--15},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl%26gregg05.ps.gz},
  pdfurl =       {http://www.complang.tuwien.ac.at/anton/euroforth2005/papers/ertl%26gregg05.pdf},
  OPTnote =	 {not refereed},
  abstract =     {Stack caching speeds Forth up by keeping stack items
                  in registers, reducing the number of memory accesses
                  for stack items. This paper describes our work on
                  extending Gforth's stack caching implementation to
                  support more than one register in the canonical
                  state, and presents timing results for the resulting
                  Forth system. For single-representation stack
                  caches, keeping just one stack item in registers is
                  usually best, and provides speedups up to a factor
                  of 2.84 over the straight-forward stack
                  representation. For stack caches with multiple stack
                  representations, using the one-register
                  representation as canonical representation is
                  usually optimal, resulting in an overall speedup of
                  up to a factor of 3.80 (and up to a factor of 1.53
                  over single-representation stack caching).}
}

@InProceedings{ertl&paysan05,
  author =       {M. Anton Ertl and Bernd Paysan},
  title =        {Xchars or {Unicode} in {Forth}},
  crossref =     {euroforth05},
  pages =        {16--20},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl%26paysan05.ps.gz},
  pdfurl =       {http://www.complang.tuwien.ac.at/anton/euroforth2005/papers/ertl%26paysan05.pdf},
  OPTnote =	 {not refereed},
  abstract =     {When dealing with different scripts at the same time
                  (e.g., Latin, Greek, Cyrillic), or with Chinese
                  ideograms, 8-bit fixed-width characters are too
                  narrow. However, many Forth programs have an
                  environmental dependency on $\code{1 chars}=1$, so
                  just making Forth characters wider would cause quite
                  a lot of portability problems. We propose to add
                  xchars for dealing with potentially wider,
                  variable-width characters. This extension is
                  relatively painless, requiring changes in only those
                  program parts that work with individual characters,
                  if they should work with the extended characters;
                  uses of string words need no changes to work with
                  extended characters. The xchar words can also be
                  implemented on 8-bit-only Forth systems, so programs
                  written to use xchars can also work on such
                  systems.}
}

@Proceedings{euroforth05,
  title = 	 {21st EuroForth Conference},
  booktitle = 	 {21st EuroForth Conference},
  year = 	 {2005},
  key =		 {EuroForth'05},
  editor =	 {M. Anton Ertl},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth2005/papers/proceedings.pdf}
}

@InCollection{gregg&ertl04dspg,
  author =       {David Gregg and M. Anton Ertl},
  title =        {A Language and Tool for Generating Efficient Virtual
                  Machine Interpreters},
  booktitle =    {Domain-Specific Program Generation},
  pages =        {196--215},
  publisher =    {Springer},
  year =         {2004},
  series =       {LNCS 3016}
}

@InProceedings{shi+05,
  author =       {Yunhe Shi and David Gregg and Andrew Beatty and
                  M. Anton Ertl},
  title =        {Virtual Machine Showdown: Stack Versus Registers},
  booktitle =    {Virtual Execution Environments (VEE '05)},
  pages =        {153--163},
  year =         {2005},
  url =          {http://www.cs.tcd.ie/David.Gregg/papers/vee05-ShiGreggBeattyErtl.pdf}
}

@Article{ertl05ivme-editorial,
  author =       {M. Anton Ertl},
  title =        {Advances in Interpreters, Virtual Machines, and
                  Emulators},
  journal =      {Science of Computer Programming},
  year =         {2005},
  volume =       {57},
  number =       {3},
  pages =        {251--252},
  month =        sep,
  note =         {Editorial for special issue with journal versions of
                  the IVME'03 papers.}
}

@Article{ertl05ivme-issue,
  author =       {M. Anton {Ertl, editor}},
  title =        {Advances in Interpreters, Virtual Machines, and
                  Emulators},
  journal =      {Science of Computer Programming},
  year =         {2005},
  volume =       {57},
  number =       {3},
  month =        sep,
  note =         {Special issue with journal versions of
                  IVME'03 papers.}
}

@Article{ertl06vd,
  author =       {M. Anton Ertl},
  title =        {Bericht von der EuroForth 2005},
  journal =      {Vierte Dimension},
  year =         {2006},
  volume =       {22},
  number =       {1},
  pages =        {27},
  month =        jan,
  OPTnote =      {not refereed}
}

@InProceedings{casey+03,
  author =       {Kevin Casey and David Gregg and M. Anton Ertl and Andrew Nisbeth},
  title =        {Towards Superinstructions for Java Interpreters},
  booktitle =    {Software and Compilers for Embedded Systems (SCOPES 2003)},
  pages =        {329--343},
  year =         {2003},
  volume =       {2826},
  series =       {LNCS},
  publisher =    {Springer},
  url =          {},
  OPTannote =    {}
}

@Article{ertl06vd2,
  author =       {M. Anton Ertl},
  title =        {Ank\"undigung EuroForth 2006},
  journal =      {Vierte Dimension},
  year =         {2006},
  volume =       {22},
  number =       {2},
  pages =        {8},
  month =        apr,
  OPTnote =      {not refereed}
}

@InProceedings{burgstaller+06,
  author =       {Bernd Burgstaller and Bernhard Scholz and Anton
                  Ertl},
  title =        {An Embedded Systems Programming Environment for {C}},
  booktitle =    {Euro-Par 2006},
  pages =        {1204--1216},
  year =         {2006},
  volume =       {4128},
  series =       {LNCS},
  publisher =    {Springer},
  ISBN =         {3-540-37783-2},
  ISBN13 =       {978-3-540-37783-2},
  abstract =     {Resource constraints are a major concern with the
                  design, development, and deployment of embedded
                  systems. Embedded systems are highly
                  hardware-dependent and have little computational
                  power. Mobile embedded systems are further
                  constrained by their limited battery capacity. Many
                  of these systems are still programmed in assembly
                  language because there is a lack of efficient
                  programming environments.\par To overcome or at
                  least alleviate the restrictions, we propose a
                  light-weight and versatile programming environment
                  for the C programming language that offers
                  mixed-mode execution, i.e., code is either executed
                  on the CPU or on a virtual machine (VM). This
                  mixed-mode execution environment combines the
                  advantages of highly compressed bytecode with the
                  speed of machine code.\par We have implemented the
                  programming environment and conducted experiments
                  for selected programs of the MiBench suite and the
                  Spec~2000. The VM has a footprint of 12~KB on the
                  Intel~IA32. Initial results show that the
                  performance of the virtual machine is typically only
                  2 to 36 times slower than the binary execution, with
                  compressed code occupying only 36\%--57\% of the
                  machine code size. Combining sequences of VM
                  instructions into new VM instructions
                  (superinstructions) increases the execution speed
                  and reduces the VM code size. Preliminary
                  experiments indicate a speedup by a factor of 3.}
}

@MastersThesis{judt06,
  author =       {Christian Judt},
  title =        {{Scannergenerator mit benannten regul\"aren
                  Teilausdr\"ucken}},
  school =       {TU Wien},
  year =         {2006},
  type =	 {Diplomarbeit},
  url =          {http://www.complang.tuwien.ac.at/Diplomarbeiten/judt06/Magisterarbeit_Christian_Judt.pdf},
  abstract =     {Common scanner generators like Lex and Flex share a
                  disadvantage. The scanners only determine the
                  matched string and which rule matched it.\par They
                  don't supply information about the string's internal
                  structure (for example the digits before and after a
                  floating point number's dot and the corresponding
                  exponent). Afterwards, in many cases the string has
                  to be refined by hand written, scanner like
                  code.\par This master thesis describes possibilities
                  to implement a scanner generator without this
                  disadvantage, arising common problems and
                  corresponding solutions.\par In addition it
                  describes an extension of the scanner generator Flex
                  which implements such a system.},
  note =         {In German}
}

@Unpublished{ertl06forth-tagung1,
  author =       {M. Anton Ertl},
  title =        {Forth 200x},
  note =         {Talk given at Forth-Tagung 2006 in Witten},
  month =        may,
  year =         {2006},
  url =          {http://www.forth200x.org/}
}

@Unpublished{ertl06forth-tagung2,
  author =       {M. Anton Ertl},
  title =        {{Der Forth-Stammbaum}},
  note =         {Talk given at Forth-Tagung 2006 in Witten},
  month =        may,
  year =         {2006},
  url =          {http://www.complang.tuwien.ac.at/forth/family-tree/}
}

@Article{ertl+06dotnet,
  author =       {M. Anton Ertl and Christian Thalinger and Andreas
                  Krall},
  title =        {Superinstructions and Replication in the {Cacao}
                  {JVM} interpreter},
  journal =      {Journal of .NET Technologies},
  year =         {2006},
  volume =       {4},
  pages =        {25--32},
  note =         {Journal papers from \emph{{.NET} Technologies 2006}
                  conference},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl+06dotnet.ps.gz},
  issueurl =     {http://dotnet.zcu.cz/NET_2006/Papers_2006/2006_Vol_4.pdf},
  ISBN =         {80-86943-13-5},
  ISSN =         {1801-2108},
  abstract =     {Dynamic superinstructions and replication can
                  provide large speedups over plain interpretation. In
                  a JVM implementation we have to overcome two
                  problems to realize the full potential of these
                  optimizations: the conflict between
                  superinstructions and the quickening optimization;
                  and the non-relocatability of JVM instructions that
                  can throw exceptions. In this paper, we present
                  solutions for these problems. We also present
                  empirical results: We see speedups of up to a factor
                  of 4 on SpecJVM98 benchmarks from superinstructions
                  with all these problems solved. The contribution of
                  making potentially throwing JVM instructions
                  relocatable is up to a factor of 2. A simple way of
                  dealing with quickening instructions is good enough,
                  if superinstructions are generated in JIT
                  style. Replication has little effect on
                  performance. }
}

@Unpublished{ertl+06linkoeping1,
  author =       {M. Anton Ertl},
  title =        {Fast and Flexible Instruction Selection with
                  On-Demand Tree-Parsing Automata},
  note =         {Talk given at \emph{Mini-Workshop on Code
                  Generation} in Link\"oping, June 7--8},
  year =         {2006},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl+06pldi.ps.gz}
}

@Unpublished{ertl+06linkoeping2,
  author =       {M. Anton Ertl},
  title =        {Superinstructions and Replication in the {Cacao}
                  {JVM} interpreter},
  note =         {Talk given at \emph{Mini-Workshop on Code
                  Generation} in Link\"oping, June 7--8},
  year =         {2006},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl+06dotnet.ps.gz}
}

@InProceedings{ertl+06pldi,
  author =       {M. Anton Ertl and Kevin Casey and David Gregg},
  title =        {Fast and Flexible Instruction Selection with
                  On-Demand Tree-Parsing Automata},
  booktitle =    {SIGPLAN Conference on Programming Language
                  Design and Implementation (PLDI '06)},
  ISBN =         {1-59593-320-4},
  pages =        {52--60},
  year =         {2006},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl+06pldi.ps.gz},
  abstract =     {Tree parsing as supported by code generator
                  generators like BEG, burg, iburg, lburg and ml-burg
                  is a popular instruction selection method. There are
                  two existing approaches for implementing tree
                  parsing: dynamic programming, and tree-parsing
                  automata; each approach has its advantages and
                  disadvantages. We propose a new implementation
                  approach that combines the advantages of both
                  existing approaches: we start out with dynamic
                  programming at compile time, but at every step we
                  generate a state for a tree-parsing automaton, which
                  is used the next time a tree matching the state is
                  found, turning the instruction selector into a fast
                  tree-parsing automaton. We have implemented this
                  approach in the Gforth code generator. The
                  implementation required little effort and reduced
                  the startup time of Gforth by up to a factor of
                  2.5.}
}

@InProceedings{ertl06,
  author =       {M. Anton Ertl},
  title =        {A Portable {C} Function Call Interface},
  crossref =     {euroforth06},
  pages =        {47--51},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl06.ps.gz},
  pdfurl =       {http://www.complang.tuwien.ac.at/anton/euroforth2006/papers/ertl.pdf},
  OPTnote =      {not refereed},
  abstract =     {Many Forth systems provide means to call C
                  functions, but these interfaces are not designed to
                  be portable between platforms: A call to a C library
                  function that works on one platform may fail on the
                  next platform, because the parameter and return
                  value types of the C function may be different. In
                  this paper, we present an interface that avoids this
                  problem: In particular, the actual calls can be made
                  platform-independent; a part of the declarations is
                  platform-dependent, but can be generated
                  automatically from C .h-files.}
}

@Proceedings{euroforth06,
  title = 	 {22nd EuroForth Conference},
  booktitle = 	 {22nd EuroForth Conference},
  year = 	 {2006},
  key =		 {EuroForth'06},
  editor =	 {M. Anton Ertl},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth2006/papers/proceedings.pdf}
}

@Article{ertl06vd4,
  author =       {M. Anton Ertl},
  title =        {Einladung zur Forth-Tagung 2007},
  journal =      {Vierte Dimension},
  year =         {2006},
  volume =       {22},
  number =       {4},
  pages =        {36},
  month =        dec,
  OPTnote =      {not refereed}
}

@InProceedings{MS-ISoLA04,
  author =       "Daniel Quinlan and Markus Schordan and Qing Yi and Andreas Saebjornsen",
  title =        "Classification and Utilization of Abstractions for Optimization",
  booktitle =    "Preliminary Proceedings of the 1st International Symposium on Leveraging Applications of Formal Methods (ISoLA'04)",
  pages =        "2--9",
  publisher =    "TR-2004-6, Department of Computer Science, University Cyprus",
  mon =          oct,
  year =         "2004",
}

@InProceedings{MS-HPCA04,
  author =       "Daniel Quinlan and Qing Yi and Gary Kumfert and Thomas Epperly and Tamara Dahlgren and Markus Schordan and Brian White",
  title =        "Toward the Automated Generation of Components from Existing Source Code",
  booktitle =    "Proceedings of the 2nd Workshop on Productivity and Performance in High-End Computing",
  pages =        "12--19",
  publisher =    "",
  mon =          feb,
  year =         "2005",
}

@InProceedings{MS-SCAM05,
  author =       "Markus Schordan and Daniel Quinlan",
  title =        "Specifying Transformation Sequences as Computation on Program Fragments with an Abstract Attribute Grammar",
  booktitle =    "Proceedings of the Fifth IEEE International Workshop on Source Code Analysis and Manipulation (SCAM'05)",
  publisher =    "IEEE Computer Society Press",
  year =         "2005",
  mon =          sep,
  pages =        "97--106",
  isbn =         "0-7695-2292-0",
}

@InProceedings{MS-POHLL06,
  author =       "Daniel Quinlan and Markus Schordan and Richard Vuduc and Qing Yi",
  title =        "Annotating User-Defined Abstractions for Optimization",
  booktitle =    "Proceedings of the 20th IEEE International Parallel \& Distributed Processing Symposium (IPDPS 2006), Workshop on Performance Optimization for High-Level Languages and Libraries ({POHLL}'06)",
  publisher =    "IEEE Computer Society Press",
  year =         "2006",
  mon =          April,
  pages =        "",
}

@InProceedings{MS-SBLP06,
  author =       "Markus Schordan",
  title =        "The Language of the Visitor Design Pattern",
  booktitle =    "Proceedings of the 10th Brazilian Symposium on Programming Languages",
  publisher =    "ANAIS",
  year =         "2006",
  mon =          May,
  pages =        "235-248",
  isbn =         "85-7669-071-3",
}

@InProceedings{MS-ISoLA04_revised06,
  author =       "Daniel Quinlan and Markus Schordan and Qing Yi and Andreas Saebjornsen",
  title =        "Classification and Utilization of Abstractions for Optimization",
  booktitle =    "1st International Symposium on Leveraging Applications of Formal Methods (ISoLA'04), revised selected papers",
  series    =    {Lecture Notes in Computer Science},
  volume =       "4313",
  pages =        "57--73",
  publisher =    "Springer",
  mon =          nov,
  year =         "2006",
}

@String{j-jucs = "Journal of Universal Computer Science"}

@Article{MS-JUCS06,
  author =     "Markus Schordan",
  title =      "The Language of the Visitor Design Pattern",
  abstract =   "Design patterns have been developed to cope with the vast space of possible different designs within object-oriented systems. One of those classic patterns is the Visitor Pattern, used for representing an operation to be performed on the elements of an object structure. In general, the order in which the objects are visited is crucial. We present a mapping from the Visitor Pattern to a context free grammar that defines the set of all such visit sequences, a given Visitor can perform. The language defined by this grammar is the language of the Visitor Design Pattern and the mapping encodes knowledge about the class hierarchy and the implementation of the accept methods of a Visitor Design Pattern. It is general enough to model complications that occur when traversing arbitrary object structures, and also properly represents cases such as lack of a common base class, multiple inheritance, and inheritance from concrete classes. Due to its particular design, the grammar can also be used as precise documentation for a Visitor Design Pattern.",
  journal =    j-jucs,
  year =       "2006",
  volume =     "12",
  number =     "7",
  pages =      "849--867",
  month =      "",
  note =       "\url|http://www.jucs.org/jucs_12_7/the_language_of_the|"}
}

@Manual{gforth03,
  title =        {Gforth (Manual)},
  author =       {Neal Crook and Anton Ertl and David Kuehling and Bernd Paysan and Jens Wilke},
  organization = {Free Software Foundation},
  edition =      {0.6.2},
  year =         {2003}
}

@InProceedings{ertl07euroforth,
  author =       {M. Anton Ertl},
  title =        {Gforth's libcc {C} Function Call Interface},
  crossref =     {euroforth07},
  pages =        {7--11},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl07euroforth.ps.gz},
  pdfurl =       {http://www.complang.tuwien.ac.at/anton/euroforth2007/papers/ertl.pdf},
  OPTnote =      {not refereed},
  abstract =     {A major problem in our earlier proposal for a C
                  interface was that a part of the interface was not
                  portable between platforms. The libcc interface
                  solves this problem by using a C compiler and its
                  \code{.h}-files. The \code{.h}-files contain
                  knowledge about the specific platform, and the C
                  compiler automatically inserts the necessary
                  conversions between Forth and C types. In this paper
                  we describe the libcc implementation and
                  interface. We also discuss how a Forth-C interface
                  might be standardized.}
}

@Proceedings{euroforth07,
  title = 	 {23rd EuroForth Conference},
  booktitle = 	 {23rd EuroForth Conference},
  year = 	 {2007},
  key =		 {EuroForth'07},
  editor =	 {M. Anton Ertl},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth2007/papers/proceedings.pdf}
}

@InProceedings{ertl07kps,
  author =       {M. Anton Ertl},
  title =        {Domination-Based Scoping and Static Single
                  Assignment Languages},
  crossref =     {kps07},
  pages =        {36--41},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl07kps.ps.gz},
  url2 =         {http://www.isp.uni-luebeck.de/kps07/files/papers/ertl.pdf},
  OPTnote =      {not refereed},
  abstract =     {For local variables, domination-based scoping is an
                  alternative to block scoping: the scope of a
                  variable extends from its initializing definition to
                  every place that can be reached only via its
                  definition, ensuring that the variable is
                  initialized everywhere in its scope. The difference
                  between domination-based scoping and block scoping
                  depends on the control structures in the language;
                  domination-based scoping is preferable if the
                  language allows branching into the middle of
                  blocks. Another programming language feature
                  explored in this paper is how to enable static
                  single assignment (SSA)
                  programming. $\phi$-functions as in SSA form are
                  confusing to programmers, so we propose an
                  alternative based on passing tuples of values across
                  control flow edges.}
}

@Proceedings{kps07,
  title =        {{14. Kolloquium Programmiersprachen und Grundlagen der Programmierung (KPS '07)}},
  year =         {2007},
  booktitle =    {{14. Kolloquium Programmiersprachen und Grundlagen der Programmierung (KPS '07)}},
  editor =       {W. Dosch and C. Grelck and A. Stümpel},
  proceedingsurl = {http://www.isp.uni-luebeck.de/kps07/Tagungsband/index.htm}
}

@Article{casey+07toplas,
  author =       {Kevin Casey and M. Anton Ertl and David Gregg},
  title =        {Optimizing indirect branch prediction accuracy in
                  virtual machine interpreters},
  journal =      toplas,
  year =         {2007},
  volume =       {29},
  number =       {6},
  pages =        {37:1--37:36},
  month =        oct,
  url =          {http://doi.acm.org/10.1145/1286821.1286828},
  abstract =     {Interpreters designed for efficiency execute a huge
                  number of indirect branches and can spend more than
                  half of the execution time in indirect branch
                  mispredictions. Branch target buffers (BTBs) are the
                  most widely available form of indirect branch
                  prediction; however, their prediction accuracy for
                  existing interpreters is only 2\%--50\%. In this
                  article we investigate two methods for improving the
                  prediction accuracy of BTBs for interpreters:
                  replicating virtual machine (VM) instructions and
                  combining sequences of VM instructions into
                  superinstructions. We investigate static
                  (interpreter build-time) and dynamic (interpreter
                  runtime) variants of these techniques and compare
                  them and several combinations of these
                  techniques. To show their generality, we have
                  implemented these optimizations in VMs for both Java
                  and Forth. These techniques can eliminate nearly all
                  of the dispatch branch mispredictions, and have
                  other benefits, resulting in speedups by a factor of
                  up to 4.55 over efficient threaded-code
                  interpreters, and speedups by a factor of up to 1.34
                  over techniques relying on dynamic superinstructions
                  alone.}
}

@Article{ertl07vd1,
  author =       {M. Anton Ertl},
  title =        {Forth 200x -- Treffen auf der EuroForth 2006},
  journal =      {Vierte Dimension},
  year =         {2007},
  volume =       {23},
  number =       {1},
  pages =        {34},
  url =          {http://www.forth-ev.de/filemgmt/visit.php?lid=133},
  OPTnote =      {not refereed}
}

@Article{ertl07vd2a,
  author =       {M. Anton Ertl},
  title =        {{Factor, Postscript, und Forth: Ein kleiner Vergleich}},
  journal =      {Vierte Dimension},
  year =         {2007},
  volume =       {23},
  number =       {2},
  pages =        {10--12},
  OPTnote =      {not refereed}
}

@Article{ertl07vd2b,
  author =       {M. Anton Ertl},
  title =        {{Der Forth-Stammbaum}},
  journal =      {Vierte Dimension},
  year =         {2007},
  volume =       {23},
  number =       {2},
  pages =        {15--18},
  OPTnote =      {not refereed}
}

@Article{shi+08,
  author =       {Yunhe Shi and Kevin Casey and M. Anton Ertl and
                  David Gregg},
  title =        {Virtual machine showdown: Stack versus registers},
  journal =      {ACM Transactions on Architecture and Code
                  Optimization (TACO)},
  year =         {2008},
  volume =       {4},
  number =       {4},
  pages =        {21:1--21:36},
  month =        jan,
  url =          {http://doi.acm.org/10.1145/1328195.1328197},
  abstract =     {Virtual machines (VMs) enable the distribution of
                  programs in an architecture-neutral format, which
                  can easily be interpreted or compiled. A
                  long-running question in the design of VMs is
                  whether a stack architecture or register
                  architecture can be implemented more efficiently
                  with an interpreter. We extend existing work on
                  comparing virtual stack and virtual register
                  architectures in three ways. First, our translation
                  from stack to register code and optimization are
                  much more sophisticated. The result is that we
                  eliminate an average of more than 46\% of
                  executed VM instructions, with the bytecode size of
                  the register machine being only 26\% larger
                  than that of the corresponding stack one. Second, we
                  present a fully functional virtual-register
                  implementation of the Java virtual machine (JVM),
                  which supports Intel, AMD64, PowerPC and Alpha
                  processors. This register VM supports
                  inline-threaded, direct-threaded, token-threaded,
                  and switch dispatch. Third, we present experimental
                  results on a range of additional optimizations such
                  as register allocation and elimination of redundant
                  heap loads. On the AMD64 architecture the register
                  machine using switch dispatch achieves an average
                  speedup of 1.48 over the corresponding stack
                  machine. Even using the more efficient
                  inline-threaded dispatch, the register VM achieves a
                  speedup of 1.15 over the equivalent stack-based VM.}
}

@MastersThesis{levrinc08,
  author =       {Rastislav Levrinc},
  title =        {LLFS: A Copy-On-Write File System For Linux},
  school =       {TU Wien},
  year =         {2008},
  type =	 {Diplomarbeit},
  url =          {http://www.complang.tuwien.ac.at/Diplomarbeiten/levrinc08.pdf},
  abstract =     {This thesis discusses the design and implementation
                  of LLFS, a Linux file system. LLFS combines
                  clustering with copy-on-write. With copy-on-write no
                  allocated blocks are overwritten between commits,
                  and thanks to the clustering the speed of LLFS
                  remains comparable with clustered file systems such
                  as Ext2. Copy-on-write opens new possibilities for
                  features like snapshots, writable snapshots (clones)
                  and fast crash recovery to the consistent state of
                  the file system, while the clustering helps to keep
                  fragmentation low and speed high.\par Clustered
                  reads and writes are achieved with Ext2-like groups
                  and free-blocks bitmaps for allocating and freeing
                  of blocks. Journaling file systems like Ext3 need to
                  keep a journal and write blocks twice; By using
                  copy-on-write, LLFS avoids these overheads. By using
                  free-blocks bitmaps, it does not need a cleaner like
                  log-structured le systems. Yet LLFS offers the
                  combined functionality of journaling and
                  log-structured le systems.\par I have implemented
                  LLFS starting from the Ext2 file system and tested
                  the performance. The benchmarks have shown that LLFS
                  achieves similar performance and in some cases
                  better than Linux journaling file systems.}
}

@InProceedings{ertl08euroforth,
  author =       {M. Anton Ertl},
  title =        {Cleaning up After Yourself},
  crossref =     {euroforth08},
  pages =        {35--38},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef08/papers/ertl.pdf},
  psurl =        {http://www.complang.tuwien.ac.at/papers/ertl08euroforth.ps.gz},
  OPTnote =      {not refereed},
  abstract =     {Performing cleanup actions such as restoring a
                  variable or closing a file used to be impossible to
                  guarantee in Forth before Forth-94 gave us
                  \code{catch}. Even with \code{catch}, the cleanup
                  code can be skipped due to user interrupts if you
                  are unlucky. We introduce a construct that
                  guarantees that the cleanup code is always
                  completed. We also discuss a cheaper implementation
                  approach for cleanup code than using a full
                  exception frame.}
}

@Proceedings{euroforth08,
  title = 	 {24th EuroForth Conference},
  booktitle = 	 {24th EuroForth Conference},
  year = 	 {2008},
  key =		 {EuroForth'08},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef08/papers/proceedings.pdf}
}

@Unpublished{ertl08ft,
  author =       {M. Anton Ertl},
  title =        {Die Multicore-Herausforderung},
  note =         {Talk given at Forth-Tagung 2008},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl08ft.pdf},
  year =         {2008}
}


@Unpublished{ertl08dagstuhl,
  author =       {M. Anton Ertl},
  title =        {Using C for the Back End},
  note =         {Talk given at Dagstuhl Seminar on Emerging Uses and
                  Paradigms for Dynamic Binary Translation (08441)},
  year =         {2008},
  slides-url =   {http://www.dagstuhl.de/Materials/Files/08/08441/08441.ErtlAnton.Slides.pdf},
  abstract =     {If we want to implement a translator easily and
                  portably, but with with good code quality, then
                  translating through C is a good option. While C is a
                  static language, we can also use this technique for
                  a dynamic translator with the help of a dynamic
                  linker. The disadvantage of this approach is the
                  large startup time; this disadvantage can be
                  partially overcome by caching, batching, and seeding
                  the cache. Some challenges for this technique in
                  binary translation are modeling the (arbitrary)
                  control flow in C and the compilation granularity of
                  C. One example of using dynamic translation through
                  C, although not in a binary translation context is
                  the implementation of a foreign function
                  interface.}
}

@Unpublished{ertl09ft1,
  author =       {M. Anton Ertl},
  title =        {Neuigkeiten in Gforth 0.7.0},
  note =         {Talk given at Forth-Tagung 2009},
  year =         {2009},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl09ft1.pdf}
}

@Unpublished{ertl09ft2,
  author =       {Gerald Wodni and M. Anton Ertl},
  title =        {Neuigkeiten seit Gforth 0.7.0},
  note =         {Talk given at Forth-Tagung 2009},
  year =         {2009},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl09ft2.pdf}
}

@Article{ertl09vd3a,
  author =       {M. Anton Ertl},
  title =        {{Gforth~0.7.0}},
  journal =      {Vierte Dimension},
  year =         {2009},
  volume =       {25},
  number =       {3},
  pages =        {13--14},
  OPTnote =      {not refereed}
}

@Article{ertl09vd3b,
  author =       {M. Anton Ertl},
  title =        {{Forth200x --- Berichte von den Standardisierungstreffen}},
  journal =      {Vierte Dimension},
  year =         {2009},
  volume =       {25},
  number =       {3},
  pages =        {22},
  OPTnote =      {not refereed}
}

@Unpublished{ertl09autrans,
  author =       {M. Anton Ertl},
  title =        {Domination-Based Scoping and Static Single
                  Assignment Languages},
  note =         {Talk given at Static Single-Assignment Form Seminar
                  in Autrans, France},
  month =        {April},
  year =         {2009},
  slidesurl =    {http://www.prog.uni-saarland.de/ssasem/talks/Anton.Ertl.pdf},
  abstract =     {SSA form makes data flow easier to understand. This
                  is not just an advantage for the compiler, but can
                  also be an advantage for the programmer. One style
                  of using local variables in Forth is to program in
                  single-assignment style. Based on these experiences,
                  we also designed an experimental programming
                  language with a more conventional syntax that
                  enforces static single assignments, and collected
                  some experience with it. One consequence of this
                  approach is a new scoping regime based on
                  domination; in many cases domination-based scoping
                  is mostly equvalent to classical block-based
                  scoping, but there are also cases where there is a
                  difference.}
}

@InProceedings{ertl09kps,
  author =       {M. Anton Ertl},
  title =        {Utilizing Multiple Hardware Threads with Pipeline
                  Parallelism},
  crossref =     {kps09},
  pages =        {69--75},
  url =          {http://www.complang.tuwien.ac.at/kps09/pdfs/ertl.pdf},
  slidesurl =    {http://www.complang.tuwien.ac.at/kps09/pdfs/vortraege/kps09_ertl.pdf},
  abstract =     {In recent years general-purpose CPUs have aquired
                  multiple hardware threads by providing multiple
                  cores per CPU (multi-core) and multiple hardware
                  threads per core (simultaneous
                  multi-threading). Making good use of such resources
                  has been a challenge for several decades that has
                  been successfully attacked for scientific
                  applications, but not to a significant extent for
                  general-purpose applications. The main current
                  paradigm for this programming problem is to have
                  explicit threads that share memory and are
                  synchronized by a variety of synchronzation
                  constructs. Unfortunately, it seems to be too hard
                  to program profitably in this paradigm for
                  general-purpose applications. Pipeline parallelism
                  is a programming paradigm that has proved so easy to
                  understand that shell programmers use it even on
                  machines that have only one hardware thread. In this
                  work we present the case for better support for
                  pipeline parallelism in programming languages, and
                  present ideas on how to improve the scalability of
                  the implementation of pipeline parallelism.}
}

@Proceedings{kps09,
  title =        {{15. Kolloquium Programmiersprachen und Grundlagen der Programmierung (KPS '09)}},
  year =         {2009},
  booktitle =    {{15. Kolloquium Programmiersprachen und Grundlagen der Programmierung (KPS '09)}},
  editor =       {Jens Knoop and Adrian Prantl},
  proceedingsurl = {http://www.isp.uni-luebeck.de/kps07/Tagungsband/index.htm}
}

@InProceedings{ertl09euroforth,
  author =       {M. Anton Ertl},
  title =        {A Look at {G}forth Performance},
  crossref =     {euroforth09},
  pages =        {23--31},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef09/papers/ertl.pdf},
  slidesurl =    {http://www.complang.tuwien.ac.at/anton/euroforth/ef09/papers/ertl-slides.pdf},
  OPTnote =      {not refereed},
  abstract =     {Gforth used to be an traditional threaded-code
                  system. In the last decade we integrated a number of
                  performance features into Gforth. Several of them
                  were evaluated individually, but an evaluation with
                  a more global perspective has been missing until
                  now. This paper fills this void: We have measured
                  the performance of Gforth releases from 0.5.0 to
                  0.7.0, on a wide variety of machines, and employing
                  a wide variety of GCC versions for compiling
                  Gforth. We present that data and give explanations
                  for the performance differences.}
}

@Proceedings{euroforth09,
  title = 	 {25th EuroForth Conference},
  booktitle = 	 {25th EuroForth Conference},
  year = 	 {2009},
  key =		 {EuroForth'09},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef09/papers/proceedings.pdf}
}

@MastersThesis{baumann10,
  author =       {Christian Baumann},
  title =        {Efficiently Implementing Postscript in C\#},
  school =       {TU Wien},
  year =         {2010},
  type =         {Diplomarbeit},
  url =          {http://www.complang.tuwien.ac.at/Diplomarbeiten/baumann10.pdf},
  abstract =     {PostScript is a very powerful language for
                  describing graphics for displaying and printing.
                  What most people don't know, is that PostScript is
                  also a mighty stack-based programming language. The
                  .NET Framework is a huge collection of class
                  libraries, programming languages and standards built
                  by Microsoft. The aim of this work is it to develop
                  a PostScript interpreter with the .NET Framework
                  whose main focus lies on the execution speed of
                  PostScript programs. In a first step we will find
                  out the bottlenecks of the execution of PostScript
                  programs. For this purpose we will analyse some
                  "real-world" programs. Another important point for
                  the execution are so-called procedures. These are
                  represented by arrays in PostScript. They can be
                  changed even when they are being executed by the
                  interpreter and allow infinite tail-call
                  recursion. Name resolution in PostScript is also of
                  importance, because of its impact on the overall
                  execution time. We will see that is is possible to
                  write an interpreter for PostScript in a high-level
                  programming language like C# which can keep up with
                  current (commercial) interpreters.}
}

@MastersThesis{khyo11,
  author =       {G\"unther Anton Khyo},
  title =        {Language Support for Linux Device Driver
                  Programming},
  school =       {TU Wien},
  year =         {2011},
  type =         {Diplomarbeit},
  url =          {http://www.complang.tuwien.ac.at/Diplomarbeiten/khyo11.pdf},
  poster =       {http://www.complang.tuwien.ac.at/Diplomarbeiten/khyo11-poster.pdf},
  abstract =     {The success of any commodity operating system is
                  determined by the quality of device driver
                  support. Over the last decades, the computer
                  hardware industry has been advancing at a rapid
                  pace, putting high pressure on device driver
                  developers. About 52\% of the Linux kernel code is
                  comprised by device drivers, accounting for close to
                  7.4 million lines of code. While managing such a
                  huge code base is a challenge on its own, Linux
                  device driver developers have to overcome additional
                  obstacles. The complex, multithreaded programming
                  model of the kernel creates a high potential for
                  bugs and many of them result in kernel
                  crashes. Device drivers constitute the largest and
                  most unreliable component of the kernel.\par This
                  thesis analyses the root causes of the device driver
                  reliability problem, and demonstrates how the
                  current driver programming model can be improved to
                  assist programmers in creating better driver
                  code. To examine and test feasible improvements, a
                  prototype language (called CiD) based on a subset of
                  C was designed with the special requirements on
                  Linux device driver development in mind. CiD
                  features syntactical additions for three essential
                  device driver code aspects: concurrency,
                  synchronization and hardware communication. The
                  compiler is programmed with basic rules of the
                  concurrency model of the kernel and is able to
                  detect simple but common mistakes that lead to
                  deadlocks. Additional consistency checks support the
                  programmer in generating correct hardware I/O
                  code.\par Two device drivers have been converted
                  into CiD code to test the language extensions and
                  the implementation of the compiler. The results for
                  concurrency and synchronization are satisfying: race
                  conditions in the data-flow are reported with a
                  false positive rate of 6\% to 21\%.  The compiler also
                  generates correct concurrency and synchronization
                  code, thus mitigating the potential for
                  deadlocks. The results show that hardware I/O
                  generator leaves much room for improvement, since it
                  generates 1.5 times more I/O operations than the
                  original driver. Related approaches show that
                  further optimizations can reduce the gap.\par In
                  conclusion, we find that device driver programming
                  can be made more robust with only minor alterations
                  to the existing programming model and little
                  compiler complexity.}
}

@MastersThesis{blechmann11,
  author =       {Tim Blechmann},
  title =        {Supernova --- A Multiprocessor Aware Real-Time Audio
                  Synthesis Engine For SuperCollider},
  school =       {TU Wien},
  year =         {2011},
  type =         {Diplomarbeit},
  url =          {http://www.complang.tuwien.ac.at/Diplomarbeiten/blechmann11.pdf},
  abstract =     {These days, most computer systems are built with
                  multi-core processors. However, most computer music
                  systems use a single-threaded synthesis
                  engine. While the single-core performance for
                  current CPUs is sufficient for many computer music
                  applications, some demanding applications benefit
                  from using multiple processors simultaneously.\par
                  Real-time audio synthesis imposes specific
                  constraints regarding real-time safety, since the
                  synthesis engine needs to be tuned for worst-case
                  latencies below one millisecond. As a result, no
                  blocking synchronization can be used and the signal
                  processing graph needs to be split into reasonably
                  sized parts, that provide enough parallelism without
                  introducing a significant scheduling overhead.\par
                  During the work on this master thesis, I developed
                  Supernova as a multiprocessor aware synthesis engine
                  for SuperCollider. SuperCollider is a computer music
                  system based on a dynamic scripting language with a
                  real-time garbage collector, that is used to control
                  a separate audio synthesis server. Supernova
                  replaces the original audio synthesis engine.  It is
                  not possible to automatically parallelize the
                  synthesis graph of SuperCollider without
                  fundamentally changing the semantics of the
                  SuperCollider class library. Therefore a the
                  programming model for the synthesis graph was
                  extended, exposing parallelism explicitly to the
                  user. To achieve this, I propose two simple
                  concepts, `parallel groups' and `satellite
                  nodes'.\par To my knowledge, Supernova is the first
                  parallel audio synthesis engine that is designed for
                  real-time operations under low-latency constraints
                  without adding any additional latency to the audio
                  signal.}
}

@InProceedings{ertl&kuehling10,
  author =       {M. Anton Ertl and David K\"uhling},
  title =        {\texttt{ABI-CODE}: Increasing the portability of assembly language words},
  crossref =     {euroforth10},
  pages =        {5--14},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef10/papers/ertl.pdf},
  OPTnote =      {refereed},
  abstract =     {Code words are not portable between Forth systems,
                  even on the same architecture; worse, in the case of
                  Gforth, they are not even portable between different
                  engines nor between different installations.  We
                  propose a new mechanism for interfacing to assembly
                  code: \texttt{abi-code} words are written to comply with the
                  calling conventions (ABI) of the target platform,
                  which does not change between Forth systems. In the
                  trade-off between performance and portability,
                  \texttt{abi-code} provides a new option between code words
                  and colon definitions. Compared to \texttt{code} words, the
                  \texttt{abi-code} mechanism incurs an overhead of 16
                  instructions on AMD64. Compared to colon
                  definitions, we achieved a speedup by a factor of
                  1.27 on an application by rewriting one short colon
                  definition as an abi-code word.}
}

@InProceedings{wodni10,
  author =       {Gerald Wodni and M. Anton Ertl},
  title =        {The Forth Net},
  crossref =     {euroforth10},
  pages =        {68--69},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef10/papers/wodni.pdf},
  OPTnote =      {not refereed},
  abstract =     {CPAN and PECL are impressive ways of sharing custom
                  libraries.  Projects are discussed, hosted and
                  downloaded.  Their dependencies are clear (no need
                  to search across the web) and also downloaded at
                  once.  There is no such web portal for Forth ---
                  until now.}
}

@Proceedings{euroforth10,
  title = 	 {26th EuroForth Conference},
  booktitle = 	 {26th EuroForth Conference},
  year = 	 {2010},
  key =		 {EuroForth'10},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef10/papers/proceedings.pdf}
}

@MastersThesis{sabin11,
  author =       {Patrick Sabin},
  title =        {Implementing a Reversible Debugger for Python},
  school =       {TU Wien},
  year =         {2011},
  type =         {Diplomarbeit},
  url =          {http://www.complang.tuwien.ac.at/Diplomarbeiten/sabin11.pdf},
  codeurl =      {http://code.google.com/p/epdb/},
  abstract =     {The programmer usually initiates a debugging process
                  because of a failure and his goal is to find the
                  defect. The defect is always executed before the
                  failure occurs, so it is natural to start at the
                  failure and move backwards in a program to find the
                  defect. However this procedure is usually not
                  supported by actual debuggers. \par There are two
                  different methods of implementing a reversible
                  debugger, i.e., a debugger which can run the program
                  forwards and backwards. The first one is the
                  logging-based approach, which records the state of
                  the program after every instruction and allows
                  inspection after the program has finished
                  running. The second one is the replay-based
                  approach, where the debugger runs the debuggee
                  interactively.  For this purpose it makes periodic
                  snapshots. The debugger runs the debuggee backwards
                  by restoring a previous snapshot and then running
                  the program forward until it reaches the desired
                  position. \par In this thesis, I show that it is
                  possible to implement a reversible debugger by
                  continuous snapshotting of the program state. There
                  are indeed some challenges with using such a
                  feature. For example, there are non-deterministic
                  instructions, which execute differently each
                  instance the interpreter executes them, e.g., a
                  function, which returns the system time. Another
                  example of this is when instructions change some
                  external state like a file on the hard drive, which
                  the debugger does not save when it makes a
                  snapshot. Another problem is that some instructions
                  do something different each time the debugger
                  executes them. \par Therefore I present some methods
                  of treating these problems. Accompanying this
                  thesis, I have developed a proof-of-concept
                  implementation of a reversible debugger called epdb
                  for the Python programming language, which solves
                  most of the problems of reversible debugging. \par
                  In order to support reversible debugging of programs
                  which have non-deterministic instructions in it, I
                  introduce the new concept of timelines. With
                  timelines, the user can decide which execution path
                  he wants to take. I also introduce stateful resource
                  management to support the management of the external
                  state. This allows the user to investigate the
                  environment corresponding to the actual position
                  inside the program, when he executes the program
                  backwards.},
}

@InProceedings{wodni&ertl11,
  author =       {Gerald Wodni and M. Anton Ertl},
  title =        {{SWIG} \& {The Forth Net}: Hands-On},
  crossref =     {euroforth11},
  pages =        {32--35},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef11/papers/wodni.pdf},
  OPTnote =      {not refereed},
  abstract =     {We have shown the basic functionality of SWIG and
                  The Forth Net in the past.  Now we want to provide
                  two basic examples which explain how to use them and
                  to show how easy it is to create C-interfaces or
                  Forth-libraries and share them.  }
}

@InProceedings{ertl11euroforth,
  author =       {M. Anton Ertl},
  title =        {Ways to Reduce the Stack Depth},
  crossref =     {euroforth11},
  pages =        {36--41},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl11euroforth.ps.gz},
  url2 =         {http://www.complang.tuwien.ac.at/anton/euroforth/ef11/papers/ertl.pdf},
  OPTnote =      {not refereed},
  abstract =     {Having to deal with many different data can lead to
                  problems in Forth: The data stack is the preferred
                  place to store data; on the other hand, dealing with
                  too many data stack items is cumbersome and usually
                  bad style. This paper presents and discusses ways to
                  unburden the data stack; some of them are used
                  widely, others are almost unknown or new.}
}

@Proceedings{euroforth11,
  title = 	 {27th EuroForth Conference},
  booktitle = 	 {27th EuroForth Conference},
  year = 	 {2011},
  key =		 {EuroForth'11},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef11/papers/proceedings.pdf}
}

@Unpublished{wodni&ertl11ft,
  author =       {Gerald Wodni and M. Anton Ertl},
  title =        {SWIG-Erweiterung für Forth},
  note =         {Vortrag bei der Forth-Tagung 2011},
  url =          {http://forth-ev.de/filemgmt/visit.php?lid=354},
  year =         {2011}
}

@Unpublished{ertl11ft-stack,
  author =       {M. Anton Ertl},
  title =        {Techniken für weniger Stack-Tiefe},
  note =         {Vortrag bei der Forth-Tagung 2011},
  url =          {http://forth-ev.de/filemgmt/visit.php?lid=353},
  year =         {2011}
}

@Unpublished{ertl11ft-multi,
  author =       {M. Anton Ertl},
  title =        {Multi-Threading und Multi-Tasking in Gforth},
  note =         {Vortrag bei der Forth-Tagung 2011},
  url =          {http://forth-ev.de/filemgmt/visit.php?lid=351},
  year =         {2011}
}

@Unpublished{ertl11ft-string,
  author =       {M. Anton Ertl},
  title =        {Ausgabe in Strings},
  note =         {Vortrag bei der Forth-Tagung 2011},
  url =          {http://forth-ev.de/filemgmt/visit.php?lid=351},
  year =         {2011}
}


@MastersThesis{schroeder12,
  author =       {Michael Schroeder},
  title =        {Optimizing {Lua} Using Run-Time Type Specialization},
  school =       {TU Wien},
  year =         {2012},
  type =         {Bachelor's Thesis},
  url =          {http://www.complang.tuwien.ac.at/anton/praktika-fertig/schroeder/thesis.pdf},
  abstract =     {Abstract: Like other dynamically typed languages,
                  Lua spends a significant amount of execution time on
                  type checks. Yet most programs, even if they are
                  written in a dynamic language, are actually
                  overwhelmingly monomorphically typed. To remove this
                  unnecessary type- checking overhead, we implement a
                  portable optimization scheme that rewrites virtual
                  machine instructions at run-time based on the types
                  of their operands. While not consistent across all
                  platforms, we achieve average speed-ups of 1.2x on
                  Intel, with a threaded variant of our VM showing
                  improvements in the 1.5x to 2.4x range.}
}

@Unpublished{ertl12ft,
  author =       {M. Anton Ertl},
  title =        {objects2.fs: Ein modernisiertes objektorientiertes Paket},
  note =         {Vortrag bei der Forth-Tagung 2012},
  url =          {http://forth-ev.de/filemgmt/visit.php?lid=415},
  year =         {2012}
}


@InProceedings{ertl12,
  author =       {M. Anton Ertl},
  title =        {Methods in objects2: Duck Typing and Performance},
  crossref =     {euroforth12},
  pages =        {96--103},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef12/papers/ertl.pdf},
  OPTnote =      {not refereed},
  abstract =     {The major new feature of objects2 is defining
                  methods for any class (like in Smalltalk): this
                  means that we can have two classes that are
                  unrelated by inheritance, yet react to the same
                  messages and can be used in the same contexts; this
                  is also known as duck typing.  This paper discusses
                  the implementation of method dispatch for these
                  general selectors as well as the more restricted
                  class selectors of the original \code{objects.fs},
                  and compares the memory and execution time costs of
                  these method selector implementations: Unhashed
                  general selectors are as fast as class selectors
                  (down to two instructions), but can consume a lot of
                  memory (megabytes of dispatch tables for large class
                  hierarchies); hashed general selectors are
                  significantly slower ($\ge 43$ cycles), but consume
                  less memory.  Programmers don't need to choose a
                  selector implementation up front; instead, it is
                  easy to switch between them later, on a per-selector
                  basis.}
}

@Proceedings{euroforth12,
  title = 	 {28th EuroForth Conference},
  booktitle = 	 {28th EuroForth Conference},
  year = 	 {2012},
  key =		 {EuroForth'12},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef12/papers/proceedings.pdf}
}


@Unpublished{ertl13ft,
  author =       {M. Anton Ertl},
  title =        {{Forth als Basis f\"ur einen portablen Assembler}},
  note =         {Vortrag bei der Forth-Tagung 2013},
  url =          {https://www.complang.tuwien.ac.at/anton/slides/forth-tagung13.pdf},
  year =         {2013}
}

@InProceedings{ertl13-paf,
  author =       {M. Anton Ertl},
  title =        {{PAF}: A Portable Assembly Language},
  crossref =     {euroforth13},
  pages =        {30--38},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef13/papers/ertl-paf.pdf},
  OPTnote =      {not refereed},
  abstract =     {A portable assembly language provides access to
                  machine-level features like memory addresses,
                  machine words, code addresses, and modulo
                  arithmetics, like assembly language, but abstracts
                  away differences between architectures like the
                  assembly language syntax, instruction encoding,
                  register set size, and addressing modes.  Forth
                  already satisfies a number of the characteristics of
                  a portable assembly language, and is therefore a
                  good basis.  This paper presents PAF, a portable
                  assembly language based on Forth, and specifically
                  discusses language features that other portable
                  assembly languages do not have, and their benefits;
                  it also discusses the differences from Forth.  The
                  main innovations of PAF are: tags indicate the
                  control flow for indirect branches and calls; and
                  PAF has two kinds of calls and definitions: the ABI
                  ones follow the platform's calling convention and
                  are useful for interfacing to the outside world,
                  while the PAF ones allow tail-call elimination and
                  are useful for implementing general control
                  structures.}
}

@InProceedings{ertl13-strings,
  author =       {M. Anton Ertl},
  title =        {Standardize Strings Now!},
  crossref =     {euroforth13},
  pages =        {39--43},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef13/papers/ertl-strings.pdf},
  OPTnote =      {not refereed},
  abstract =     {This paper looks at the issues in string words: what
                  operations may be required, various design options,
                  and why this has lead to the current state of
                  standardization of string operations that is
                  insufficient in the eyes of many.}
}

@InProceedings{ertl13-regions,
  author =       {M. Anton Ertl},
  title =        {Region-Based Memory Allocation},
  crossref =     {euroforth13},
  pages =        {65--67},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef13/papers/ertl-regions.pdf},
  OPTnote =      {Presentation slides}
}

@Proceedings{euroforth13,
  title = 	 {29th EuroForth Conference},
  booktitle = 	 {29th EuroForth Conference},
  year = 	 {2013},
  key =		 {EuroForth'13},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef13/papers/proceedings.pdf}
}

@InProceedings{ertl13-kps,
  author =       {M. Anton Ertl},
  title =        {{PAF}: A Portable Assembly Language Based on {F}orth},
  booktitle =    {17. Kolloquium Programmiersprachen und Grundlagen der Programmierung (KPS'13)},
  year =         {2013},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl-kps13.ps.gz},
  abstract =     {A portable assembly language provides access to
                  machine-level features like memory addresses,
                  machine words, code addresses, and modulo
                  arithmetics, like assembly language, but abstracts
                  away differences between architectures like the
                  assembly language syntax, instruction encoding,
                  register set size, and addressing modes. Forth
                  already satisfies a number of the characteristics of
                  a portable assembly language, and is therefore a
                  good basis. This paper presents PAF, a portable
                  assembly language based on Forth, and specifically
                  discusses language features that other portable
                  assembly languages do not have, and their benefits;
                  it also discusses the differences from Forth. The
                  main innovations of PAF are: tags indicate the
                  control flow for indirect branches and calls; and
                  PAF has two kinds of calls and definitions: the ABI
                  ones follow the platform's calling convention and
                  are useful for interfacing to the outside world,
                  while the PAF ones allow tail-call elimination and
                  are useful for implementing general control
                  structures.}
}

@Unpublished{ertl14ft,
  author =       {M. Anton Ertl},
  title =        {{Forth in "Grundlagen der Programmkonstruktion"}},
  note =         {Vortrag bei der Forth-Tagung 2014},
  url =          {http://www.forth-ev.de/wiki/lib/exe/fetch.php/events:slides-anton.pdf},
  year =         {2014}
}

@InProceedings{ertl14,
  author =       {M. Anton Ertl},
  title =        {Region-Based Memory Allocation in {Forth}},
  crossref =     {euroforth14},
  pages =        {45--49},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef14/papers/ertl.pdf},
  OPTnote =      {not refereed},
  abstract =     {Memory management has a pervasive effect on the way
                  we program.  In region-based memory allocation,
                  objects with roughly the same life expectancy are
                  allocated in one region, and in the end the whole
                  region is freed at once.  This avoids the need to
                  keep track of the individual objects for
                  \texttt{free}.  Regions are simple to implement and
                  compatible with real-time requirements and
                  multi-threading, and seem to be ideal for Forth,
                  except for one thing: The region id has to be passed
                  to the allocation word, increasing the stack load.
                  We propose using context wrappers to avoid that
                  problem.  This even allows to use existing
                  \texttt{allocate}-based libraries with regions, but we
                  then have to decide what \texttt{free} and
                  \texttt{resize} inside these libraries do.}
}

@InProceedings{ertl14-c,
  author =       {M. Anton Ertl},
  title =        {How to get rid of {C}},
  crossref =     {euroforth14},
  pages =        {63--65},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef14/papers/ertl-c.pdf},
  video =        {http://www.forth-ev.de/wiki/doku.php/events:euroforth-2014:ridofc},
  OPTnote =      {Presentation slides}
}

@Proceedings{euroforth14,
  title = 	 {30th EuroForth Conference},
  booktitle = 	 {30th EuroForth Conference},
  year = 	 {2014},
  key =		 {EuroForth'14},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef14/papers/proceedings.pdf}
}

@Article{ertl15vd,
  author =       {M. Anton Ertl},
  title =        {Forth 200x­Treffen auf der EuroForth 2015},
  journal =      {Vierte Dimension},
  year =         {2015},
  volume =       {31},
  number =       {3+4},
  pages =        {21}
}

@InProceedings{ertl15,
  author =       {M. Anton Ertl and Bernd Paysan},
  title =        {From \texttt{exit} to \texttt{set-does>} --- A Story of {Gforth} Re-Implementation},
  crossref =     {euroforth15},
  pages =        {41--47},
  url =          {http://www.euroforth.org/ef15/papers/ertl.pdf},
  url-slides =   {http://www.euroforth.org/ef15/papers/ertl-slides.pdf},
  OPTnote =      {not refereed},
  abstract =     {We changed \code{exit} from an immediate to a
                  non-immediate word; this requires changes in the
                  de-allocation of locals, which leads to changes in
                  the implementation of colon definitions, and to
                  generalizing \code{does>} into \code{set-does>}
                  which allows the defined word to call arbitrary
                  execution tokens.  The new implementation of locals
                  cleanup can usually be optimized to similar
                  performance as the old implementation.  The new
                  implementation of \code{does>} has performance
                  similar to the old implementation, while using
                  \code{set-does>} results in speedups in certain
                  cases.}
}

@InProceedings{ertl-recognizers15,
  author =       {M. Anton Ertl},
  title =        {Recognizers --- Why and How},
  crossref =     {euroforth15},
  pages =        {77--78},
  url-slides =   {http://www.euroforth.org/ef15/papers/ertl-recognizers-slides.pdf},
  OPTnote =      {presentation slides}
}

@Proceedings{euroforth15,
  title = 	 {31st EuroForth Conference},
  booktitle = 	 {31st EuroForth Conference},
  year = 	 {2015},
  key =		 {EuroForth'15},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef15/papers/},
  url-pdf =      {http://www.complang.tuwien.ac.at/anton/euroforth/ef15/papers/proceedings.pdf}
}

@InProceedings{ertl15kps,
  author =       {M. Anton Ertl},
  title =        {What every compiler writer should know about programmers},
  crossref =	 {kps15},
  pages =	 {112--133},
  url =          {http://www.complang.tuwien.ac.at/kps2015/proceedings/KPS_2015_submission_29.pdf},
  abstract =     {In recent years C compiler writers have taken the
                  attitude that tested and working production (i.e.,
                  \emph{conforming} according to the C standard) C
                  programs are buggy if they contain undefined
                  behavior, and they feel free to compile these
                  programs (except benchmarks) in a way that they no
                  longer work.  The justification for this attitude is
                  that it allows C compilers to optimize better.  But
                  while these ``optimizations'' provide a factor 1.017
                  speedup with Clang-3.1 on SPECint 2006, for
                  non-benchmarks it also has a cost: if we go by the
                  wishes of the compiler maintainers, we have to
                  ``fix'' our working, conforming C programs; this
                  requires a substantial effort, and can result in
                  bigger and slower code.  The effort could otherwise
                  be invested in source-level optimizations by
                  programmers, which can have a much bigger effect
                  (e.g., a factor $>2.7$ for Jon Bentley's traveling
                  salesman program).  Therefore, optimizations that
                  assume that undefined behavior does not exist are a
                  bad idea not just for security, but also for the
                  performance of non-benchmark programs.}
}

@Proceedings{kps15,
  title =        {18. Kolloquium Programmiersprachen und Grundlagen der Programmierung (KPS'15)},
  booktitle =    {18. Kolloquium Programmiersprachen und Grundlagen der Programmierung (KPS'15)},
  year =         {2015},
  key =          {KPS '15},
  editor =       {Jens Knoop and M. Anton Ertl},
  url = 	 {http://www.complang.tuwien.ac.at/kps2015/proceedings/},
  url-pdf = 	 {http://www.complang.tuwien.ac.at/kps2015/proceedings/kps15.pdf}
}

@TechReport{hajdarevic16,
  author =       {Esad Hajdarevic},
  title =        {Lot: Prototype of a Constraints-Based Spreadsheet},
  institution =  {TU Wien},
  year =         {2016},
  type =         {Bachelor's thesis},
  url =          {http://www.complang.tuwien.ac.at/Bachelorarbeiten/hajdarevic16.pdf},
  abstract =     {Spreadsheet formulas can be seen as a collection of
                  unidirectional constraints. Users often want to
                  apply the formulas in the other direction (for
                  example, to find values that add up to a certain sum
                  vs. calculating the sum), which can lead to a time
                  consuming trial-and-error approach of finding the
                  right values or error-prone rewriting of existing
                  formulas.\par As part of this thesis, I developed a
                  web-based spreadsheet that supports working with
                  constraints. Number of examples are presented to
                  illustrate that addition of constraints make
                  spreadsheets applicable to a whole new set of
                  problems for which they would otherwise be
                  unsuitable.}
}

@InProceedings{ertl-sections16,
  author =       {M. Anton Ertl},
  title =        {Sections},
  crossref =     {euroforth16},
  pages =        {55--57},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl-sections16.pdf},
  video =	 {https://wiki.forth-ev.de/lib/exe/fetch.php/events:sections.mp4},
  OPTnote =      {not refereed},
  abstract =     {A section is a contiguous region of memory, to which
                  data or code can be appended (like the Forth
                  dictionary). Assembly languages and linkers have
                  supported multiple sections for a long time. This
                  paper describes the benefits of supporting multiple
                  sections in Forth, interfaces and implementation
                  techniques.}
}

@InProceedings{ertl-recognizers16,
  author =       {M. Anton Ertl},
  title =        {Recognizers: Arguments and Design Decisions},
  crossref =     {euroforth16},
  pages =        {58--63},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl-recognizers16.pdf},
  video =	 {https://wiki.forth-ev.de/lib/exe/fetch.php/events:recognizers.mp4},
  OPTnote =      {not refereed},
  abstract =     {The Forth text interpreter processes words and
                  numbers.  Currently the set of words can be extended
                  by programmers, but not the recognized numbers.
                  User-defined recognizers allow to extend the
                  number-recognizer part, too.  This paper shows the
                  benefits of recognizers and discusses
                  counterarguments.  It also discusses several design
                  decisions: Whether to define temporary words, or a
                  set of interpretation, compilation, and postponing
                  actions; and whether to hook the recognizers inside
                  \code{find} or in the text interpreter.}
}

@InProceedings{ertl-secure16,
  author =       {M. Anton Ertl},
  title =        {Security},
  crossref =     {euroforth16},
  pages =        {82--83},
  url =          {http://www.euroforth.org/ef16/papers/ertl-secure.pdf},
  video =	 {https://wiki.forth-ev.de/lib/exe/fetch.php/events:security.mp4},
  OPTnote =      {presentation slides}
}

@Proceedings{euroforth16,
  title = 	 {32nd EuroForth Conference},
  booktitle = 	 {32nd EuroForth Conference},
  year = 	 {2016},
  key =		 {EuroForth'16},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef16/papers/proceedings.pdf}
}

@Unpublished{ertl16ft-secure,
  author =       {M. Anton Ertl},
  title =        {{Sicheres Forth}},
  note =         {Vortrag bei der Forth-Tagung 2016},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl16ft-secure.pdf},
  year =         {2016},
  abstract =     {Buffer overflows sind ein beliebtes Einfallstor fuer
                  Angriffe auf Software. Forth vermeidet zwar einige
                  Probleme von C, aber es ist auch nicht dagegen
                  gefeit. Das Problem komplett zu eliminieren wuerde
                  Forth zu weit einschraenken, aber man kann die
                  Programme ein zwei Teile teilen: einen, der die
                  unsicheren Features verwenden darf und genau
                  inspiziert werden muss, und einen, der nur sichere
                  Features verwenden darf und bei dem man in der
                  Inspektion nicht auf Buffer overflows schauen muss.}
}

@Unpublished{ertl16ft-vectors,
  author =       {M. Anton Ertl},
  title =        {{Gedanken zu SIMD und Vektorisierung}},
  note =         {Vortrag bei der Forth-Tagung 2016},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl16ft-vectors.pdf},
  year =         {2016},
  abstract =     {Seit zwei Jahrzehnten stellen populäre Prozessoren
                  SIMD-Erweiterungen wie SSE zur Verfügung. Einen
                  portabler Assembler auf Basis von Forth sollte es
                  auch ermöglichen, diese Befehle zu
                  nutzen. Klassischerweise geht das über Intrinsics
                  (sehr architekturspezifisch) oder über automatische
                  Vektorisierung (sehr kompliziert). In diesem Vortrag
                  stelle ich einige Überlegungen zu dem Thema vor. Ein
                  zentrales Element dabei sind Überlegungen zur
                  Speicherverwaltung, um Komplikationen oder
                  Geschwindigkeitsnachteile durch Abhängigkeiten zu
                  vermeiden.}
}

@Unpublished{ertl17ft,
  author =       {M. Anton Ertl},
  title =        {{Statische Typüberprüfung}},
  note =         {Vortrag bei der Forth-Tagung 2017},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl17ft.pdf},
  video =        {https://wiki.forth-ev.de/doku.php/events:tagung-2017:statische-typpruefung},
  year =         {2017}
}

@Unpublished{ertl15ft,
  author =       {M. Anton Ertl},
  title =        {{Forth-2012: Der neue Standard}},
  note =         {Vortrag bei der Forth-Tagung 2015},
  url=           {http://www.complang.tuwien.ac.at/papers/ertl15ft.pdf},
  video =        {https://wiki.forth-ev.de/doku.php/events:tagung-2015:forth-2012},
  year =         {2015}
}

@Article{ertl16vd,
  author =       {M. Anton Ertl},
  title =        {Forth-2012: Der neue Standard},
  journal =      {Vierte Dimension},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl16vd.pdf},
  year =         {2016},
  volume =       {32},
  number =       {3},
  pages =        {13--18}
}

@InProceedings{ertl17,
  author =       {M. Anton Ertl},
  title =        {SIMD and Vectors},
  crossref =     {euroforth17},
  pages =        {25--36},
  url =          {http://www.euroforth.org/ef17/papers/ertl.pdf},
  video =        {https://wiki.forth-ev.de/lib/exe/fetch.php/events:ef2017:simd-vectors.mp4},
  OPTnote =      {refereed},
  abstract =     {Many programs have parts with significant data
                  parallelism, and many CPUs provide SIMD instructions
                  for processing data-parallel parts faster.  The weak
                  link in this chain is the programming language.  We
                  propose a vector wordset so that Forth programmers
                  can make use of SIMD instructions to speed up the
                  data-parallel parts of their applications.  The
                  vector wordset uses a separate vector stack
                  containing opaque vectors with run-time determined
                  length.  Preliminary results using one benchmark
                  show a factor~8 speedup of a simple vector
                  implementation over scalar Gforth code, a smaller
                  (factor 1.8) speedup over scalar VFX code; another
                  factor of 3 is possible on this benchmark with a
                  more sophisticated implementation.  However, vectors
                  have an overhead; this overhead is amortized in this
                  benchmark at vector lengths between 3 and 250
                  (depending on which variants we compare).}
}

@Proceedings{euroforth17,
  title = 	 {33rd EuroForth Conference},
  booktitle = 	 {33rd EuroForth Conference},
  year = 	 {2017},
  key =		 {EuroForth'17},
  url =          {http://www.complang.tuwien.ac.at/anton/euroforth/ef17/papers/proceedings.pdf}
}

@InProceedings{ertl17kps,
  author =       {M. Anton Ertl},
  title =        {The Intended Meaning of \emph{Undefined Behaviour}
                  in {C} Programs},
  crossref =     {kps17},
  pages =        {20--28},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl17kps.pdf},
  url2 =         {http://www.kps2017.uni-jena.de/proceedings/kps2017_submission_5.pdf},
  abstract =     {All significant C programs contain undefined
                  behaviour. There are conflicting positions about how
                  to deal with that: One position is that all these
                  programs are broken and may be compiled to arbitrary
                  code.  Another position is that tested and working
                  programs should continue to work as intended by the
                  programmer with future versions of the same C
                  compiler. In that context the advocates of the first
                  position often claim that they do not know the
                  intended meaning of a program with undefined
                  behaviour. This paper explores this topic in greater
                  depth. The goal is to preserve the behaviour of
                  existing, tested programs. It is achieved by letting
                  the compiler define a consistent mapping of C
                  operations to machine code; and the compiler then
                  has to stick to this behaviour during optimizations
                  and in future releases.}
}

@Proceedings{kps17,
  title =        {19. Kolloquium Programmiersprachen und Grundlagen der Programmierung (KPS'17)},
  booktitle =    {19. Kolloquium Programmiersprachen und Grundlagen der Programmierung (KPS'17)},
  year =         {2017},
  key =          {KPS '17},
  editor =       {Wolfram Amme and Thomas Heinze},
  url = 	 {http://www.kps2017.uni-jena.de/kps2017_proceedings.html},
  url-pdf = 	 {http://www.kps2017.uni-jena.de/proceedings/kps2017.pdf}
}

@InProceedings{thier+18,
  author =       {Patrick Thier and M. Anton Ertl and Andreas Krall},
  title =        {Fast and Flexible Instruction Selection with Constraints},
  booktitle =    {Compiler Construction (CC'18)},
  year =         {2018},
  pages =        {93--103},
  url =          {http://www.complang.tuwien.ac.at/papers/thier+18.pdf},
  doi =		 {10.1145/3178372.3179501},
  abstract =     {Tree-parsing instruction selection as used in, e.g.,
                  lcc, uses dynamic costs to gain flexibility and
                  handle situations (such as read-modify-write
                  instructions) that do not fit into the basic
                  tree-parsing model.  The disadvantage of dynamic
                  costs is that we can no longer turn the tree grammar
                  into a tree automaton (as is done by burg) for fast
                  instruction selection for JIT compilers.  In this
                  paper we introduce constraints that say whether a
                  tree-grammar rule is applicable or not.  While
                  theoretically less powerful than dynamic costs,
                  constraints cover the practical uses of dynamic
                  costs; more importantly, they allow turning the tree
                  grammar with constraints into a tree automaton (with
                  instruction-selection-time checks), resulting in
                  faster instruction selection than with pure
                  instruction-selection-time dynamic programming.  We
                  integrate constraints in an instruction selector
                  that matches DAGs with tree rules.  We evaluate this
                  concept in lcc and the CACAO JavaVM JIT compiler,
                  and see instruction selector speedups by a factor
                  1.33--1.89.}
}

@Unpublished{ertl18ft,
  author =       {M. Anton Ertl},
  title =        {Verallgemeinerung von locals},
  note =         {Vortrag bei der Forth-Tagung 2018},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl18ft.pdf},
  video =        {https://wiki.forth-ev.de/doku.php/events:tagung-2018:locals},
  year =         {2018}
}

@InProceedings{ertl18manlang,
  author =       {M. Anton Ertl},
  title =        {Software Vector Chaining},
  booktitle =    {15th International Conference on Managed Languages &
                  Runtimes (Manlang'18)},
  year =         {2018},
  pages =        {Article-18},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl18manlang.pdf},
  doi =          {10.1145/3237009.3237021},
  abstract =     {Providing vectors of run-time determined length as
                  opaque value types is a good interface between the
                  machine-level SIMD instructions and portable
                  application-oriented programming languages.
                  Implementing vector operations requires a loop that
                  breaks the vector into SIMD-register-sized chunks.
                  A compiler can fuse the loops of several vector
                  operations together.  However, during normal
                  compilation this is only easy if no other control
                  structures are involved.  This paper explores an
                  alternative: collect a trace of vector operations at
                  run-time (following the program control flow during
                  this collecting step), and then perform the combined
                  vector loop.  This arrangement has a certain
                  run-time overhead, but its implementation is simpler
                  and can happen independently, in a library.
                  Preliminary performance results indicate that the
                  overhead makes this approach beneficial only for
                  long vectors ($>1$KB).  For shorter vectors, unfused
                  loops should be used in a library setting.
                  Fortunately, this choice can be made at run time,
                  individually for each vector operation.}
}

@InProceedings{ertl&paysan18,
  author =       {M. Anton Ertl and Bernd Paysan},
  title =        {Closures --- the {Forth} way},
  crossref =     {euroforth18},
  pages =        {17--30},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl%26paysan.pdf},
  url2 =         {http://www.euroforth.org/ef18/papers/ertl.pdf},
  slides-url =   {http://www.euroforth.org/ef18/papers/ertl-slides.pdf},
  video =	 {https://wiki.forth-ev.de/doku.php/events:ef2018:closures},
  OPTnote =      {refereed},
  abstract =     {In Forth 200x, a quotation cannot access a local
                  defined outside it, and therefore cannot be
                  parameterized in the definition that produces its
                  execution token.  We present Forth closures; they
                  lift this restriction with minimal implementation
                  complexity.  They are based on passing parameters on
                  the stack when producing the execution token.  The
                  programmer has to explicitly manage the memory of
                  the closure.  We show a number of usage examples.
                  We also present the current implementation, which
                  takes 109~source lines of code (including some extra
                  features).  The programmer can mechanically convert
                  lexical scoping (accessing a local defined outside)
                  into code using our closures, by applying assignment
                  conversion and flat-closure conversion.  The result
                  can do everything one expects from closures,
                  including passing Knuth's man-or-boy test and living
                  beyond the end of their enclosing definitions.}
}

@InProceedings{ertl18chaining,
  author =       {M. Anton Ertl},
  title =        {Software Vector Chaining},
  crossref =     {euroforth18},
  pages =        {54-55},
  url =          {http://www.euroforth.org/ef18/papers/ertl-chaining.pdf},
  url-slides =   {http://www.euroforth.org/ef18/papers/ertl-chaining-slides.pdf},
  video =        {https://wiki.forth-ev.de/doku.php/events:ef2018:vectors},
  OPTnote =      {presentation slides, paper published at Manlang'18},
  abstract =     {Providing vectors of run-time determined length as
                  opaque value types is a good interface between the
                  machine-level SIMD instructions and portable
                  application-oriented programming languages.
                  Implementing vector operations requires a loop that
                  breaks the vector into SIMD-register-sized chunks.
                  A compiler can fuse the loops of several vector
                  operations together.  However, during normal
                  compilation this is only easy if no other control
                  structures are involved.  This paper explores an
                  alternative: collect a trace of vector operations at
                  run-time (following the program control flow during
                  this collecting step), and then perform the combined
                  vector loop.  This arrangement has a certain
                  run-time overhead, but its implementation is simpler
                  and can happen independently, in a library.
                  Preliminary performance results indicate that the
                  overhead makes this approach beneficial only for
                  long vectors ($>1$KB).  For shorter vectors, unfused
                  loops should be used in a library setting.
                  Fortunately, this choice can be made at run time,
                  individually for each vector operation.}
}

@Proceedings{euroforth18,
  title = 	 {34th EuroForth Conference},
  booktitle = 	 {34th EuroForth Conference},
  year = 	 {2018},
  key =		 {EuroForth'18},
  url =          {http://www.euroforth.org/ef18/papers/proceedings.pdf}
}

@Article{ertl18vd,
  author =       {M. Anton Ertl},
  title =        {Forth-200X-Treffen auf der EuroForth 2017},
  journal =      {Vierte Dimension},
  year =         {2018},
  volume =       {34},
  number =       {3},
  pages =        {5--6}
}

@Unpublished{ertl19ft,
  author =       {M. Anton Ertl and Bernd Paysan},
  title =        {Der neue {Gforth}-Header},
  note =         {Vortrag bei der Forth-Tagung 2019},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl19ft.pdf},
  video =        {https://forth-ev.de/wiki/events:ft2019:anton-header},
  year =         {2019}
}

@Unpublished{ertl19ft2,
  author =       {M. Anton Ertl},
  title =        {{Forth-Quellcode im Flash}},
  note =         {Vortrag bei der Forth-Tagung 2019},
  url =          {https://forth-ev.de/wiki/events:ft2019:anton-flash},
  year =         {2019},
  usenetmsgid =  {<2019Apr19.185811@mips.complang.tuwien.ac.at>}
}

@InProceedings{paysan&ertl19,
  author =       {Bernd Paysan and M. Anton Ertl},
  title =        {The new {Gforth} Header},
  crossref =     {euroforth19},
  pages =        {5--20},
  url =          {http://www.euroforth.org/ef19/papers/paysan.pdf},
  url-slides =   {http://www.euroforth.org/ef19/papers/paysan-slides.pdf},
  video =        {https://wiki.forth-ev.de/doku.php/events:ef2019:header},
  OPTnote =      {refereed},
  abstract =     {The new Gforth header is designed to directly
                  implement the requirements of Forth-94 and
                  Forth-2012.  Every header is an object with a fixed
                  set of fields (code, parameter, count, name, link)
                  and methods (\texttt{execute}, \texttt{compile,},
                  \texttt{(to)}, \texttt{defer@}, \texttt{does},
                  \texttt{name>interpret}, \texttt{name>compile},
                  \texttt{name>string}, \texttt{name>link}).  The
                  implementation of each method can be changed
                  per-word (prototype-based object-oriented
                  programming).  We demonstrate how to use these
                  features to implement optimization of constants,
                  \texttt{fvalue}, \texttt{defer}, \texttt{immediate},
                  \texttt{to} and other dual-semantics words, and
                  \texttt{synonym}.}
}

@Proceedings{euroforth19,
  title = 	 {35th EuroForth Conference},
  booktitle = 	 {35th EuroForth Conference},
  year = 	 {2019},
  key =		 {EuroForth'19},
  url =          {http://www.euroforth.org/ef19/papers/proceedings.pdf}
}

@Misc{ef2019:rs-int,
  author =       {M. Anton Ertl},
  title =        {Interactive multiline \code{>R R>} in {Gforth}},
  howpublished = {https://wiki.forth-ev.de/doku.php/events:ef2019:rs-int},
  year =         {2019},
  note =         {Video of otherwise unpublished talk at EuroForth 2019}
}

@InProceedings{ertl19kps,
  author =       {M. Anton Ertl},
  title =        {Integer Division by Multiplying with the
                  Double-Width Reciprocal},
  crossref =     {kps19},
  pages =        {75--84},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl19kps.pdf},
  url-slides =   {http://www.complang.tuwien.ac.at/papers/ertl19kps-slides.pdf},
  abstract =     {Earlier work on integer division by multiplying with
                  the reciprocal has focused on multiplying with a
                  single-width reciprocal, combined with a correction
                  and followed by a shift.  The present work explores
                  using a double-width reciprocal to allow getting rid
                  of the correction and shift.}
}

@Proceedings{kps19,
  title =        {20. Kolloquium Programmiersprachen und Grundlagen
                  der Programmierung (KPS)},
  booktitle =    {20. Kolloquium Programmiersprachen und Grundlagen
                  der Programmierung (KPS)},
  year =         {2019},
  key =          {kps19},
  editor =       {Martin Pl\"umicke and Fayez Abu Alia},
  url =          {https://www.hb.dhbw-stuttgart.de/kps2019/kps2019_Tagungsband.pdf}
}

@Unpublished{ertl01state,
  author =       {M. Anton Ertl},
  title =        {STATE-smartness: Applications, Pitfalls, Alternatives},
  note =         {This paper is an updated and significantly revised
                  version of my EuroForth '98 paper \cite{ertl98}; I
                  submitted this draft version to JFAR in 2001, but it
                  was not processed, so I finally put it online.},
  year =         {2001},
  url =          {http://www.complang.tuwien.ac.at/papers/ertl01state.pdf},
  abstract =     {STATE-smart words provide a number of unpleasant
                  surprises to their users. They are applied in two
                  contexts, and they fail in both: 1) for implementing
                  words like \texttt{s"} that provide an arbitrary
                  combination of interpretation and compilation
                  semantics (combined words); 2) for optimizing using
                  a special implementation of the (default)
                  compilation semantics. This paper discusses these
                  issues and shows programmers and system implementors
                  how to avoid STATE-smart words. It also reports our
                  experiences in converting the STATE-smart words in
                  Gforth into a clean solution: little work and few
                  problems.}
}

