\documentclass[10pt]{article}
\usepackage{amsfonts}
\usepackage{fullpage}
%\usepackage{epsfig}
\newcommand{\SIZE}{{\rm SIZE}}
\newcommand{\CSIZE}{{\rm C-SIZE}}
\newcommand{\FSIZE}{{\rm F-SIZE}}
\newcommand{\BPSIZE}{{\rm BP-SIZE}}
\newcommand{\DEPTH}{{\rm DEPTH}}
\begin{document}
\input{preamble.tex}
\lecture{3}{Feb 11, 2009}{Madhu Sudan}{Debmalya Panigrahi}
%%%% body goes in here %%%%
In today's lecture, we will focus on {\em non-uniform models of
computation}. In non-uniform computation, we have a
{\em different gadget/program/machine for each input size}
for a given problem. Specifically, in this lecture, we will
focus on {\em circuit families} (which are equivalent to
deterministic Turing Machines (DTMs) with
{\em advice} strings), {\em branching programs} and
{\em formulas}, and compare them.
Finally, we will study {\em Neciporuk's lower bound} on the
succinctness of branching programs.
\section{Circuits and Circuit Families}
A {\em circuit} is a {\em directed acyclic graph} (DAG), where the
nodes are of three kinds: {\em input nodes}, {\em logic gates} and
an {\em output node}. The goal of a circuit with $n$ input nodes
is to compute a
boolean function $f: \{0, 1\}^n \rightarrow \{0, 1\}$\footnote{
In general, a circuit may have $m\geq 1$ output nodes
and compute a function $f: \{0, 1\}^n \rightarrow \{0, 1\}^m$.} in
the following way: if a boolean vector
$\bar{x} = (x_1, x_2, \ldots, x_n)$
is fed to the $n$ input nodes, then the logic gates compute the
function $f(\bar{x})$ and output it at the output node.
A {\em basis} is a set of logic gates that can be used to compute {\em any}
boolean function.
A circuit $C$ has two main complexity measures:
\begin{itemize}
\item The {\em size} of circuit $C$, denoted by $\SIZE(C)$ is the
number of edges in $C$.\footnote{Often, the size of a circuit is
defined as the number of logic gates (i.e. internal nodes) in the circuit.
The two definitions are identical upto polynomial factors.} For a function
$f$, $\CSIZE(f) = \min_{C {\rm computes} f} \SIZE(C)$. $\CSIZE(f)$
depends on the basis of gates used in the circuit, but only
upto a constant factor.
\item The {\em depth} of circuit $C$, denoted by $\DEPTH(C)$ is the
longest path in $C$.
\end{itemize}
The following is a set of simple facts about
circuits:
\begin{itemize}
\item Given a circuit $C$ of size $S$, evaluating $C(\bar{x})$ for
an input vector $\bar{x}$ takes $O(S^2)$ time.
\item To simulate a $DTIME(t(n))$ TM on an input of length $n$, we need
a circuit of size $O(t(n)^2)$.
\item For any function $f: \{0,1\}^n\rightarrow \{0, 1\}$,
$\CSIZE(f) = O(2^n)$ (compare this to the fact that some functions
might take much longer to compute).
\item Circuits of size at most $S$ ($S\geq n$, where $n$ is the input size)
can compute roughly $S^{S\log S}$ different functions. (This can be used
to show that there are functions (in fact, even a random function) which
have circuit complexity of $\Omega(2^n/n)$. In fact, the worst-case
complexity of an $n$-input function is also $(1-o(1))2^n/n$.)
\end{itemize}
A {\em circuit family} $\{C_n\}$ is an infinite sequence of circuits,
one for each input size. It is said to compute function
$f:\{0,1\}^* \rightarrow \{0,1\}$ if $C_n$ computes the function
$f|_n: \{0,1\}^n\rightarrow \{0,1\}$, which is identical to
the function $f$, except that the input is restricted to be of
length $n$.
We now compare functions that can be computed by poly-sized circuits
families and those that have poly-time DTMs
(i.e. the complexity class {\bf P}). Clearly, a poly-time
DTM can be simulated by a poly-sized circuit
(follows from the second fact we mentioned above).
However, the converse is not true. Consider, for instance,
the language
\begin{center}
$L = \{1^n: 1^n$ is the unary representation of a
TM $M$ and a string $x$ such that $M$ halts on $x\}$.
\end{center}
Clearly, there is a poly-sized circuit family for $L$;
the answer for a specific $n$ can be hard-coded in the circuit.
However, this is the halting problem which is known to be
undecidable; therefore, in particular, $L\notin$ {\bf P}.
This leads us to the definition of advice strings. A deterministic
TM $M$ with {\em advice sequence} $\bar{a} = (a_1, a_2, \ldots)$, where
$a_i \in \{0,1\}^*$, accepts the following language:
\begin{equation}
L(M, \bar{a}) = \{x: M(x, a_{|x|}) {\rm accepts}\}.
\end{equation}
If $\ell(n) = |a_n|$ and $M$ runs in time $t(n)$, then
\begin{equation}
L(M, \bar{a}) \in DTIME(t(n))/\ell(n).
\end{equation}
In particular,
\begin{equation}
{\bf P/poly} = \cup_{{\rm polynomials} p, q} DTIME(p(n))/q(n)
\end{equation}
is equivalent to poly-sized circuits in computation power.
If a language is in {\bf P/poly}, then the advice string for input
length $n$ can be hard-coded into the poly-sized circuit simulating
the poly-time DTM for the language. On the other
hand, if a language has a poly-sized circuit family, then the
corresponding circuits can be used as advice strings to the DTM,
which simply evaluates the input for the circuit.
If one can show that {\bf NP}$\not\subseteq$ {\bf P/poly}, then it would
follow that {\bf P}$\not=${\bf NP}\footnote{Minor typo in previous version corrected by Madhu.}. However, for problems in {\bf NP},
the best lower bound known on the length of the advice string required
is $4.5 n$.
\section{Branching Programs}
A branching program (BP) is a DAG computing some
function $f:\{0,1\}^n\rightarrow \{0,1\}$. The nodes of the DAG
are of two types: {\em branching nodes}, each of which represents an input
variable and has two outgoing edges labeled 0 and 1 respectively,
and two {\em output nodes} labeled 0 and 1 and having no outgoing edges.
To compute the function for a given input, we start at a designated
input node and take the outgoing edge at each branching node corresponding
to the value of the variable in the input. Finally, we reach the output
node corresponding to the value of the function for the given input.
A BP is said to be {\em layered} if the nodes can be partitioned
into layers $L_1, L_2, \ldots, L_k$ such that all edges are from
nodes in layer $L_i$ to those in layer $L_{i+1}$, for any $i$.
The primary complexity measures of a layered BP $B$ are the following:
\begin{itemize}
\item $WIDTH(B) = \max_i |L_i|$, and
\item $\SIZE(B) = \sum_i |L_i|$.
\end{itemize}
We mention the following facts abouts BPs:
\begin{itemize}
\item A language having a log-space DTM (i.e.
is in the complexity class {\bf L}) has a poly-sized BP.
\item A circuit can simulate a BP with roughly the same size, but not
vice-versa.
\end{itemize}
\section{Formulas}
A {\em formula} is a circuit where each logic gate has maximum out-degree of 1.
The following facts about formulas are worth mentioning:
\begin{itemize}
\item A change of basis of the logic gates affects the depth of a formula computing
a function by only a constant factor, but its size can change by a polynomial factor.
\item If for function $f$, $\FSIZE(f)$ and $F-\DEPTH(f)$ respectively represent
the minimum size and depth of a formula computing $f$, then
\begin{equation}
F-\DEPTH(f) = O(\log \FSIZE(f)).
\end{equation}
\item For any formula, there is a BP computing the same function whose size at
most a polynomial factor greater than that of the formula.
\end{itemize}
\section{Neciporuk's Lower Bound}
We construct an explicit family of boolean functions that require BPs
of almost quadratic size. Consider any function $f$ with a BP of size
$\BPSIZE(f)$. Let $S$ be a subset
of input variables of $f$ and $\BPSIZE_S(f)$ denote the number of nodes
in the BP of $f$ that read the variables in $S$. Now, if
$S_1, S_2, \ldots, S_k$ is a partition of the input variables of $f$,
then
\begin{equation}
\BPSIZE(f) = \sum_{i=1}^k \BPSIZE_{S_i}(f).
\end{equation}
Now, consider the function $f$ where all the variables other than
those in $S_i$ have been fixed by an assignment $\rho$.
Let this function be denoted by $f|_{\rho}$. Then
\begin{equation}
\BPSIZE_{S_i}(f) \geq \BPSIZE(f|_{\rho}).
\end{equation}
We define a function $f$ (called {\em distinctness})
on an input of length
$kl$, where the input is grouped into $k$ groups of length $l$
each. Let us denote the input by
$\bar{x} = (\bar{x}_1, \bar{x}_2,\ldots, \bar{x}_k)$,
where $\bar{x}_i = (x_{i1}, x_{i2}, \ldots, x_{il})$.
Then, $f$ is defined as follows:
\begin{equation}
f(\bar{x}) = \left\{\begin{array}{ll}
1 & ,~{\rm if}~ \bar{x}_i \not= \bar{x}_j,~\forall i\not= j\\
0 & ,~{\rm otherwise.}
\end{array}\right.
\end{equation}
Now, suppose we fix all inputs except $\bar{x}_i$, i.e.
$\bar{x}_j = \bar{a}_j$ for $j\not= i$. This can be
done in
\begin{equation}
{{2^l}\choose{k-1}} \geq \left(\frac{2^l}{k}\right)^k
\end{equation}
ways. Thus, the number of different functions on $\bar{x}_i$
is at least $(\frac{2^l}{k})^k$. But, we know that a BP
of size $s$ can represent at most $2^{O(s)}$ functions. Thus,
\begin{equation}
\BPSIZE_{S_i}(f) = \Omega(lk-k\log k).
\end{equation}
Therefore,
\begin{equation}
\BPSIZE(f) = \Omega(k(lk-k\log k)) = \Omega\left(\frac{n^2}{\log^2 n}\right),
\end{equation}
by setting $k=n/\log n$ and $l=\log n$.
\end{document}