13 yıl önce · 4b7965921c
--- a/docs/data/diagrams.tex
+++ b/docs/data/diagrams.tex
@@ -48,7 +48,7 @@
 
				     \end{tikzpicture}
			
 
				 }
			
 
				 
			
 
				-\newcommand{\basicdiagram}[1]{
			
 
				+\def\basicdiagram{
			
 
				     \begin{figure}[h]
			
 
				         \center
			
 
				         \architecture{
			
@@ -57,7 +57,11 @@
 
				             \node[block, below of=arch] {Application}
			
 
				                 edge[linefrom] node[right] {gestures} (arch);
			
 
				         }
			
 
				-        \caption{#1}
			
 
				+        \caption{A diagram showing the position of the architecture relative to
			
 
				+        the device driver and a multi-touch application. The input of the
			
 
				+        architecture is given by a touch device driver. The output is
			
 
				+        translated to complex interaction gestures and passed to the
			
 
				+        application that is using the architecture.}
			
 
				         \label{fig:basicdiagram}
			
 
				     \end{figure}
			
 
				 }
			
@@ -111,7 +115,7 @@
 
				     \end{figure}
			
 
				 }
			
 
				 
			
 
				-\newcommand{\areadiagram}[1]{
			
 
				+\def\areadiagram{
			
 
				     \begin{figure}[h]
			
 
				         \center
			
 
				         \architecture{
			
@@ -127,12 +131,16 @@
 
				 
			
 
				             \group{eventdriver}{eventdriver}{analysis}{area}{Architecture}
			
 
				         }
			
 
				-        \caption{#1}
			
 
				+        \caption{Extension of the diagram from figure \ref{fig:driverdiagram},
			
 
				+        showing the position of areas in the architecture. An area delegate
			
 
				+        events to a gesture detection component that trigger gestures. The area
			
 
				+        then calls the handler that is bound to the gesture type by the
			
 
				+        application.}
			
 
				         \label{fig:areadiagram}
			
 
				     \end{figure}
			
 
				 }
			
 
				 
			
 
				-\newcommand{\trackerdiagram}[1]{
			
 
				+\def\trackerdiagram{
			
 
				     \begin{figure}[h]
			
 
				         \center
			
 
				         \architecture{
			
@@ -148,7 +156,8 @@
 
				 
			
 
				             \group{eventdriver}{eventdriver}{tracker}{area}{Architecture}
			
 
				         }
			
 
				-        \caption{#1}
			
 
				+        \caption{Extension of the diagram from figure \ref{fig:areadiagram},
			
 
				+        showing the position of gesture trackers in the architecture.}
			
 
				         \label{fig:trackerdiagram}
			
 
				     \end{figure}
			
 
				 }
			
@@ -330,6 +339,6 @@
 
				         \end{tikzpicture}
			
 
				         \caption{Daemon setup of an architecture implementation, serving
			
 
				         gestures to multiple applications at the same time.}
			
 
				-        \label{fig:ex3}
			
 
				+        \label{fig:daemon}
			
 
				     \end{figure}
			
 
				 }
			
--- a/docs/report.bib
+++ b/docs/report.bib
@@ -171,3 +171,10 @@
 
				 	year = "2001"
			
 
				 }
			
 
				 
			
 
				+@misc{ZeroMQ,
			
 
				+	author = "Corporation, iMatix",
			
 
				+	howpublished = "\url{http://www.zeromq.org/}",
			
 
				+	title = "{Zero MQ - The intelligent transport layer}",
			
 
				+	year = "2007"
			
 
				+}
			
 
				+
			
--- a/docs/report.tex
+++ b/docs/report.tex
@@ -2,7 +2,7 @@
 
				 
			
 
				 \usepackage[english]{babel}
			
 
				 \usepackage[utf8]{inputenc}
			
 
				-\usepackage{hyperref,graphicx,tikz,subfigure,float,lipsum}
			
 
				+\usepackage{hyperref,graphicx,tikz,subfigure,float}
			
 
				 
			
 
				 % Link colors
			
 
				 \hypersetup{colorlinks=true,linkcolor=black,urlcolor=blue,citecolor=DarkGreen}
			
@@ -18,7 +18,19 @@
 
				 % Title page
			
 
				 \maketitle
			
 
				 \begin{abstract}
			
 
				-    % TODO
			
 
				+    Device drivers provide a primitive set of messages. Applications that use
			
 
				+    complex gesture-based interaction need to translate these events to complex
			
 
				+    gestures, and map these gestures to elements in an application. This paper
			
 
				+    presents a generic architecture for the detection of complex gestures in an
			
 
				+    application. The architecture translates driver-specific messages to a
			
 
				+    common set of ``events''. The events are then delegated to a tree of
			
 
				+    ``areas'', which are used to group events and assign it to an element in
			
 
				+    the application. Gesture detection is performed on a group of events
			
 
				+    assigned to an area, using detection units called ``gesture tackers''. An
			
 
				+    implementation of the architecture should run as a daemon process, serving
			
 
				+    gestures to multiple applications at the same time. A reference
			
 
				+    implementation and two test case applications have been created to test the
			
 
				+    effectiveness of the architecture design.
			
 
				 \end{abstract}
			
 
				 
			
 
				 % Set paragraph indentation
			
@@ -158,14 +170,13 @@ goal is to test the effectiveness of the design and detect its shortcomings.
 
				 
			
 
				     \section{Introduction}
			
 
				 
			
 
				-    % TODO: rewrite intro?
			
 
				     This chapter describes the realization of a design for the generic
			
 
				-    multi-touch gesture detection architecture. The chapter represents the
			
 
				-    architecture as a diagram of relations between different components.
			
 
				-    Sections \ref{sec:driver-support} to \ref{sec:daemon} define requirements
			
 
				-    for the architecture, and extend the diagram with components that meet
			
 
				-    these requirements. Section \ref{sec:example} describes an example usage of
			
 
				-    the architecture in an application.
			
 
				+    multi-touch gesture detection architecture. The architecture is represented
			
 
				+    as diagram of relations between different components. Sections
			
 
				+    \ref{sec:driver-support} to \ref{sec:daemon} define requirements for the
			
 
				+    architecture, and extend the diagram with components that meet these
			
 
				+    requirements. Section \ref{sec:example} describes an example usage of the
			
 
				+    architecture in an application.
			
 
				 
			
 
				     The input of the architecture comes from a multi-touch device driver.
			
 
				     The task of the architecture is to translate this input to multi-touch
			
@@ -173,11 +184,7 @@ goal is to test the effectiveness of the design and detect its shortcomings.
 
				     \ref{fig:basicdiagram}. In the course of this chapter, the diagram is
			
 
				     extended with the different components of the architecture.
			
 
				 
			
 
				-    \basicdiagram{A diagram showing the position of the architecture
			
 
				-    relative to the device driver and a multi-touch application. The input
			
 
				-    of the architecture is given by a touch device driver. This output is
			
 
				-    translated to complex interaction gestures and passed to the
			
 
				-    application that is using the architecture.}
			
 
				+    \basicdiagram
			
 
				 
			
 
				     \section{Supporting multiple drivers}
			
 
				     \label{sec:driver-support}
			
@@ -229,7 +236,7 @@ goal is to test the effectiveness of the design and detect its shortcomings.
 
				     \multipledriversdiagram
			
 
				 
			
 
				     \section{Restricting events to a screen area}
			
 
				-    \label{sec:restricting-gestures}
			
 
				+    \label{sec:areas}
			
 
				 
			
 
				     % TODO: in introduction: gestures zijn opgebouwd uit meerdere primitieven
			
 
				     Touch input devices are unaware of the graphical input widgets rendered on
			
@@ -294,10 +301,7 @@ goal is to test the effectiveness of the design and detect its shortcomings.
 
				     type are bound to an area. Figure \ref{fig:areadiagram} shows the position
			
 
				     of areas in the architecture.
			
 
				 
			
 
				-    \areadiagram{Extension of the diagram from figure \ref{fig:driverdiagram},
			
 
				-    showing the position of areas in the architecture. An area delegate events
			
 
				-    to a gesture detection component that trigger gestures. The area then calls
			
 
				-    the handler that is bound to the gesture type by the application.}
			
 
				+    \areadiagram
			
 
				 
			
 
				     An area can be seen as an independent subset of a touch surface. Therefore,
			
 
				     the parameters (coordinates) of events and gestures within an area should
			
@@ -394,9 +398,7 @@ goal is to test the effectiveness of the design and detect its shortcomings.
 
				     type by the application. Figure \ref{fig:trackerdiagram} shows the position
			
 
				     of gesture trackers in the architecture.
			
 
				 
			
 
				-    \trackerdiagram{Extension of the diagram from figure
			
 
				-    \ref{fig:areadiagram}, showing the position of gesture trackers in the
			
 
				-    architecture.}
			
 
				+    \trackerdiagram
			
 
				 
			
 
				     The use of gesture trackers as small detection units provides extendability
			
 
				     of the architecture. A developer can write a custom gesture tracker and
			
@@ -518,10 +520,6 @@ events. See appendix \ref{app:tuio} for details regarding the TUIO protocol.
 
				 \section{Reference implementation}
			
 
				 \label{sec:implementation}
			
 
				 
			
 
				-% TODO
			
 
				-% een paar simpele areas en trackers
			
 
				-% Geen netwerk protocol
			
 
				-
			
 
				 The reference implementation is written in Python and available at
			
 
				 \cite{gitrepos}. The following component implementations are included:
			
 
				 
			
@@ -612,27 +610,125 @@ synchronized with the root area of the architecture.
 
				 % TODO
			
 
				 \emph{TODO: uitbreiden en screenshots erbij (dit programma is nog niet af)}
			
 
				 
			
 
				-\chapter{Conclusions}
			
 
				+\section{Discussion}
			
 
				 
			
 
				 % TODO
			
 
				+\emph{TODO: Tekortkomingen aangeven die naar voren komen uit de tests}
			
 
				+
			
 
				+% Verschillende apparaten/drivers geven een ander soort primitieve events af.
			
 
				+% Een vertaling van deze device-specifieke events naar een algemeen formaat van
			
 
				+% events is nodig om gesture detection op een generieke manier te doen.
			
 
				+
			
 
				+% Door input van meerdere drivers door dezelfde event driver heen te laten gaan
			
 
				+% is er ondersteuning voor meerdere apparaten tegelijkertijd.
			
 
				+
			
 
				+% Event driver levert low-level events. niet elke event hoort bij elke gesture,
			
 
				+% dus moet er een filtering plaatsvinden van welke events bij welke gesture
			
 
				+% horen.  Areas geven de mogelijkheid hiervoor op apparaten waarvan het
			
 
				+% filteren locatiegebonden is.
			
 
				+
			
 
				+% Het opsplitsten van gesture detection voor gesture trackers is een manier om
			
 
				+% flexibel te zijn in ondersteunde types detection logic, en het beheersbaar
			
 
				+% houden van complexiteit.
			
 
				 
			
 
				 \chapter{Suggestions for future work}
			
 
				 
			
 
				-\section{A generic way for grouping events}
			
 
				+\section{A generic method for grouping events}
			
 
				 \label{sec:eventfilter}
			
 
				-% TODO
			
 
				-% - "event filter" ipv "area"
			
 
				+
			
 
				+As mentioned in section \ref{sec:areas}, the concept of an \emph{area} is based
			
 
				+on the assumption that the set or originating events that form a particular
			
 
				+gesture, can be determined based exclusively on the location of the events.
			
 
				+Since this thesis focuses on multi-touch surface based devices, and every
			
 
				+object on a multi-touch surface has a position, this assumption is valid.
			
 
				+However, the design of the architecture is meant to be more generic; to provide
			
 
				+a structured design of managing gesture detection.
			
 
				+
			
 
				+An in-air gesture detection device, such as the Microsoft Kinect \cite{kinect},
			
 
				+provides 3D positions. Some multi-touch tables work with a camera that can also
			
 
				+determine the shape and rotational orientation of objects touching the surface.
			
 
				+For these devices, events delegated by the event driver have more parameters
			
 
				+than a 2D position alone. The term ``area'' is not suitable to describe a group
			
 
				+of events that consist of these parameters.
			
 
				+
			
 
				+A more generic term for a component that groups similar events is the
			
 
				+\emph{event filter}. The concept of an event filter is based on the same
			
 
				+principle as areas, which is the assumption that gestures are formed from a
			
 
				+subset of all events. However, an event filter takes all parameters of an event
			
 
				+into account. An application on the camera-based multi-touch table could be to
			
 
				+group all objects that are triangular into one filter, and all rectangular
			
 
				+objects into another. Or, to separate small finger tips from large ones to be
			
 
				+able to recognize whether a child or an adult touches the table.
			
 
				 
			
 
				 \section{Using a state machine for gesture detection}
			
 
				-% TODO
			
 
				-% - gebruik formelere definitie van gestures ipv expliciete detection logic,
			
 
				-%   bijv. een state machine
			
 
				+
			
 
				+All gesture trackers in the reference implementation are based on the explicit
			
 
				+analysis of events. Gesture detection is a widely researched subject, and the
			
 
				+separation of detection logic into different trackers allows for multiple types
			
 
				+of gesture detection in the same architecture. An interesting question is
			
 
				+whether multi-touch gestures can be described in a formal way so that explicit
			
 
				+detection code can be avoided.
			
 
				+
			
 
				+\cite{GART} and \cite{conf/gw/RigollKE97} propose the use of machine learning
			
 
				+to recognizes gestures. To use machine learning, a set of input events forming
			
 
				+a particular gesture must be represented as a feature vector. A learning set
			
 
				+containing a set of feature vectors that represent some gesture ``teaches'' the
			
 
				+machine what the feature of the gesture looks like.
			
 
				+
			
 
				+An advantage of using explicit gesture detection code is the fact that it
			
 
				+provides a flexible way to specify the characteristics of a gesture, whereas
			
 
				+the performance of feature vector-based machine learning is dependent on the
			
 
				+quality of the learning set.
			
 
				+
			
 
				+A better method to describe a gesture might be to specify its features as a
			
 
				+``signature''. The parameters of such a signature must be be based on input
			
 
				+events. When a set of input events matches the signature of some gesture, the
			
 
				+gesture is be triggered. A gesture signature should be a complete description
			
 
				+of all requirements the set of events must meet to form the gesture.
			
 
				+
			
 
				+A way to describe signatures on a multi-touch surface can be by the use of a
			
 
				+state machine of its touch objects. The states of a simple touch point could be
			
 
				+${down, move, up, hold}$ to indicate respectively that a point is put down, is
			
 
				+being moved, is held on a position for some time, and is released. In this
			
 
				+case, a ``drag'' gesture can be described by the sequence $down - move - up$
			
 
				+and a ``select'' gesture by the sequence $down - hold$. If the set of states is
			
 
				+not sufficient to describe a desired gesture, a developer can add additional
			
 
				+states. For example, to be able to make a distinction between an element being
			
 
				+``dragged'' or ``thrown'' in some direction on the screen, two additional
			
 
				+states can be added: ${start, stop}$ to indicate that a point starts and stops
			
 
				+moving. The resulting state transitions are sequences $down - start - move -
			
 
				+stop - up$ and $down - start - move - up$ (the latter does not include a $stop$
			
 
				+to indicate that the element must keep moving after the gesture had been
			
 
				+performed).
			
 
				+
			
 
				+An additional way to describe even more complex gestures is to use other
			
 
				+gestures in a signature. An example is to combine $select - drag$ to specify
			
 
				+that an element must be selected before it can be dragged.
			
 
				+
			
 
				+The application of a state machine to describe multi-touch gestures is an
			
 
				+subject well worth exploring in the future.
			
 
				 
			
 
				 \section{Daemon implementation}
			
 
				-% TODO
			
 
				-% - network protocol (ZeroMQ) voor meerdere talen en simultane processen
			
 
				-% - volgende stap: maken van een library die meerdere drivers en complexe
			
 
				-%   gestures bevat
			
 
				+
			
 
				+Section \ref{sec:daemon} proposes the usage of a network protocol to
			
 
				+communicate between an architecture implementation and (multiple) gesture-based
			
 
				+applications, as illustrated in figure \ref{fig:daemon}. The reference
			
 
				+implementation does not support network communication. If the architecture
			
 
				+design is to become successful in the future, the implementation of network
			
 
				+communication is a must. ZeroMQ (or $\emptyset$MQ) \cite{ZeroMQ} is a
			
 
				+high-performance software library with support for a wide range of programming
			
 
				+languages. A good basis for a future implementation could use this library as
			
 
				+the basis for its communication layer.
			
 
				+
			
 
				+If an implementation of the architecture will be released, a good idea would be
			
 
				+to do so within a community of application developers. A community can
			
 
				+contribute to a central database of gesture trackers, making the interaction
			
 
				+from their applications available for use other applications.
			
 
				+
			
 
				+Ideally, a user can install a daemon process containing the architecture so
			
 
				+that it is usable for any gesture-based application on the device. Applications
			
 
				+that use the architecture can specify it as being a software dependency, or
			
 
				+include it in a software distribution.
			
 
				 
			
 
				 \bibliographystyle{plain}
			
 
				 \bibliography{report}{}
			
@@ -706,4 +802,6 @@ normalized using division by the number of touch points. A pinch event contains
 
				 a scale factor, and therefore uses a division of the current by the previous
			
 
				 average distance to the centroid.
			
 
				 
			
 
				+\emph{TODO}
			
 
				+
			
 
				 \end{document}