\documentclass[10pt]{article}
%%%% PREAMBLE BEGINS HERE:
\usepackage{fullpage}
\usepackage{amsmath,amssymb,algorithm}
\usepackage{tikz}
\usetikzlibrary{shapes.gates.logic.US,automata,positioning}
\usepackage[noend]{algorithmic}
\usepackage{aliascnt}
\usepackage{hyperref}
\usepackage[hyperref]{cleveref}
\newcommand{\handout}[5]{
\renewcommand{\thepage}{#1-\arabic{page}}
\noindent
\begin{center}
\framebox{
\vbox{
\hbox to 5.78in { {\bf 6.841 Advanced Complexity Theory}
\hfill #2 }
\vspace{4mm}
\hbox to 5.78in { {\Large \hfill #5 \hfill} }
\vspace{2mm}
\hbox to 5.78in { {\it #3 \hfill #4} }
}
}
\end{center}
\vspace*{4mm}
}
\newcommand{\lecture}[4]{\handout{#1}{#2}{Lecturer:
#3}{Scribe: #4}{Lecture #1}}
\newcommand{\qed}{\hfill \rule{7pt}{7pt}}
\newenvironment{proof}{\noindent{\bf Proof}\hspace*{1em}}{\qed\bigskip}
\newenvironment{proof-sketch}{\noindent{\bf Sketch of Proof}\hspace*{1em}}{\qed\bigskip}
\newenvironment{proof-idea}{\noindent{\bf Proof Idea}\hspace*{1em}}{\qed\bigskip}
\newenvironment{proof-of-lemma}[1]{\noindent{\bf Proof of Lemma #1}\hspace*{1em}}{\qed\bigskip}
\newenvironment{proof-attempt}{\noindent{\bf Proof Attempt}\hspace*{1em}}{\qed\bigskip}
\newenvironment{proofof}[1]{\noindent{\bf Proof}
of #1:\hspace*{1em}}{\qed\bigskip}
\newenvironment{remark}{\noindent{\bf Remark}\hspace*{1em}}{\bigskip}
%% MACROS BEGIN HERE:
\newcommand{\N}{\ensuremath{\mathbb{N}}}
\newcommand{\Z}{\ensuremath{\mathbb{Z}}}
\renewcommand{\algorithmicrequire}{\textbf{Input:}}
\renewcommand{\algorithmicensure}{\textbf{Output:}}
\ifx\theorem\undefined
\newtheorem{theorem}{Theorem}[section]
\fi
\providecommand\newtheoremwithcnt[3]{%
% counter to use, name, displayed name, referred name
\newaliascnt{#2}{#1}%
\newtheorem{#2}[#2]{#3}%
\aliascntresetthe{#2}%
\crefformat{#2}{#3~##2##1##3}%
%TODO: optional argument to have a different ref name, which by
%defaults to #3.
}
\ifx\lemma\undefined
\newtheoremwithcnt{theorem}{lemma}{Lemma}
\fi
\crefformat{algorithm}{algorithm~#2#1#3}
%% MACROS END HERE.
%%%% PREAMBLE ENDS HERE.
\begin{document}
\lecture{10}{March 9, 2009}{Madhu Sudan}{Asilata Bapat}
%%%% body goes in here %%%%
\noindent Meeting to talk about final projects on Wednesday, 11 March 2009, from 5pm to 7pm. Location: TBA. Includes food.
\section{Overview of today's lecture}
\begin{itemize}
\item Randomized computation.
\item Complexity classes: RP, coRP, BPP, ZPP.
\item Basic properties of these complexity classes.
\end{itemize}
So far, we know that P is a computationally feasible class. We could try and expand this notion, and then study where the expanded notions lie in relation with P, NP, etc.
\section{Examples of problems which have randomized algorithms}
\begin{enumerate}
\item \textbf{Problem:} Find an $n$-bit prime.\\
\textbf{Input:} $N\in\N$, $N>3$ such that $2^{n-1}< N\leq 2^n$.\\
\textbf{Output:} A prime $p$, such that $N\leq p< 2N$.
A polynomial-time algorithm for this problem is as follows. This algorithm is randomized. No deterministic algorithm is known.
\begin{algorithmic}[1]
\label{primealgorithm}
\LOOP [$n$ times] \label{loop}
\STATE Pick $k$ randomly and uniformly between $N$ (inclusive) and $2N$ (exclusive).
\IF {$k$ is prime}
\RETURN $k$
\ELSE
\STATE continue loop.
\ENDIF
\ENDLOOP
\RETURN a random value between $N$ (inclusive) and $2N$ (exclusive).
\end{algorithmic}
A sketch of the proof of correctness of this algorithm is as follows.
\begin{proof-sketch}
First we observe that we can always find such a prime. This is the following lemma, which we state without proof.
\begin{lemma}[Bertrand's Postulate]
\label{bertrand}
If $n$ is a natural number greater than 3, then there exists a prime number $p$ such that $n\leq p< 2n$.
\end{lemma}
Apart from \Cref{bertrand} the algorithm depends on the Prime Number Theorem, which we state without proof.
\begin{theorem}[Prime Number Theorem]
For any real number $x$, let $\pi(x)$ be the number of primes less than or equal to $x$. Then,
\[
\lim_{x\to\infty}\frac{\pi(x)}{x/\ln x} = 1.
\]
\end{theorem}
In this context, the Prime Number Theorem implies that the number of primes between $N$ and $2N$ is about $\frac{2N}{n+1} - \frac{N}{n}$, which is approximately $\frac{N}{n}$. So the probability of $k$ being prime is approximately $\frac{1}{n}$. Since the algorithm is repeated $n$ times, the probability of it not returning a prime is approximately
\[
\left(\frac{n-1}{n}\right)^n = \left(1 -\frac{1}{n}\right)^n \leq \frac{1}{e}.
\]
We will see later that this error probability is small enough for our purposes.
\end{proof-sketch}
\item
\label{sqrt}
\textbf{Problem:} Square-root modulo primes.\\
\textbf{Input:} An $n$-bit long prime $p$, an integer $a$ such that $0\leq a\leq p$.\\
\textbf{Output:} An integer $\alpha$ such that $\alpha^2 = a\,(\text{mod }p)$.
Berlekamp, and later Adleman, Manders and Miller, gave randomized polynomial-time algorithms to solve this problem. A deterministic polynomial-time algorithm is not known.
A randomized polynomial-time algorithm to solve this problem is as follows.
First, $\beta$ is chosen randomly and uniformly from $[p-1]$. If we can solve the equation $\gamma^2 = \beta^2\alpha\,(\text{mod }p)$ for $\gamma$, then $\alpha = \beta/\gamma$. For this, $\theta$ is picked randomly and uniformly from $[p-1]$, and the following equation can be solved, $(x-\theta)^2 = \beta^2\alpha\,(\text{mod }p)$. To do this, we use (without proof) the fact that $\text{gcd}(x^2 -2x\theta + \theta^2 -\beta^2\alpha, x^{\frac{p-1}{2}}-1)$ is linear in $x$ with probability 1/2.
If we find this gcd $q$ and if it is linear in $x$, then it will be either $x - \theta - \beta\alpha$ or $x - \theta - \beta\alpha$, so we can just return $(x - \theta - q)/\beta$.
\item \textbf{Problem:} Given $k$ $n\times n$ square matrices of integers $M_1,M_2,\ldots, M_k$, do there exist integers $r_1,r_2,\ldots,r_k$ such that $\text{det}(\sum_{i=0}^kr_iM_i)\neq 0$?
A randomized algorithm for this problem is as follows. Pick $r_1,r_2,\ldots,r_k$ randomly and uniformly from $\{1,2,\ldots,3n\}$ and check if $\text{det}(\sum_{i=0}^kr_iM_i)\neq 0$. If so, output `yes'; otherwise output `no'.
The proof of the correctness of this algorithm is discussed in \Cref{someproofs}.
\item
\label{circuits}
\textbf{Problem:} Equivalence of circuits.\\
\textbf{Input:} Circuits $C_1,C_2$ over integer inputs $x_1,x_2,\ldots,x_n$ with addition and multiplication gates and the constants $\{-1,0,1\}$.\\
\textbf{Output:} Is $C_1$ equivalent to $C_2$? (Is the function computed by $C_1$ the same as the function computed by $C_2$?)
A randomized algorithm for this problem is as follows. (Here, we assume that the size of the circuit is a polynomial in the number of inputs, to make estimations about the input size of our problem simpler.)
\begin{algorithmic}[1]
\STATE Pick a prime of size about $2^{O(n)}$ and call it $p$.
\STATE Pick $x_1,x_2,\ldots,x_n$ randomly and uniformly in $\Z_p$.
\STATE \COMMENT{In the following if-statement, the output of each gate is computed in $\Z_p$.}
\IF {$C_1(x_1,x_2,\ldots,x_n) = C_2(x_1,x_2,\ldots,x_n)$}
\RETURN `yes'
\ELSE
\RETURN `no'
\ENDIF
\end{algorithmic}
Observe that we can have a polynomial-sized circuit that computes $2^{2^n}$, as follows. (Here, `A' denotes addition and `M' denotes multiplication).
\begin{center}
\begin{tikzpicture}[shorten >=1pt,node distance=2cm,on grid,auto]
\tikzstyle{stage1}=[shape=rectangle,rounded corners, draw=red!50, fill=red!20]
\tikzstyle{stage2}=[or gate US, draw=blue!50, fill=blue!20]
\tikzstyle{stage3}=[and gate US, draw=blue!50, fill=blue!20]
\node (constant) [stage1] {1};
\node (addtotwo) [right of=constant,stage2] {A};
\node (multiply) [right of=addtotwo,stage3] {M};
\node (multiply2) [right of=multiply,stage3] {M};
\node (ellipsis) [right of=multiply2,xshift=1cm] {(total $n$ `M' gates)};
\node (final) [stage1,right of=ellipsis,xshift=1cm] {$2^{2^n}$};
\path[->, thick,every node/.style={font=\scriptsize}]
(constant) edge [bend right] node [swap] {1} (addtotwo)
(constant) edge [bend left] node {1} (addtotwo)
(addtotwo) edge [bend left] node {2} (multiply)
(addtotwo) edge [bend right] node [swap] {2} (multiply)
(multiply2) edge [bend right] node [swap] {$2^{2^2}$} (ellipsis)
(multiply2) edge [bend left] node {$2^{2^2}$} (ellipsis)
(multiply) edge [bend left] node {$2^2$} (multiply2)
(multiply) edge [bend right] node [swap] {$2^2$} (multiply2)
(ellipsis) edge node {} (final);
\end{tikzpicture}
\end{center}
The number $2^{2^n}$ is too big for polynomial-time simulations, and it is clear that we can actually get a number of this size from a circuit of polynomial size. So we reduce modulo $p$, so as to restrict all the possible numbers in our computations to have at most $O(n)$ bits. This ensures that the algorithm does not exceed polynomial time.
\end{enumerate}
\section{Some proofs}
\label{someproofs}
To prove the algorithms for finding square-root modulo primes and for checking the equivalence of two circuits, we will use the following lemma. Recall that we have already used once in a previous lecture.
\begin{lemma}[Schwarz-Zippel Lemma]
\label{szlemma}
Let $p(x_1,\ldots,x_n)$ be a not identically zero polynomial of total degree $d$ over any (possibly infinite) field $\mathbb{F}$. If $\alpha_1,\ldots,\alpha_n$ are chosen uniformly at random from any finite set $S\subset \mathbb{F}$, then
\[
\mathrm{Pr}\left[p(\alpha_1,\ldots,\alpha_n) = 0\right] \leq \frac{d}{|S|}.
\]
\end{lemma}
To prove \Cref{sqrt} (Square-root modulo primes), we have to calculate the probability that the algorithm makes an error. Note that $p(x_1,\ldots,x_k) = \text{det}(\sum_{i=0}^kx_iM_i)$ is a polynomial of degree $d=n$ in the variables $x_i$. Suppose that the polynomial is not identically zero, otherwise the algorithm can never err.
If with the randomly chosen $r_i$, $\text{det}(\sum_{i=0}^kr_iM_i)$ turns out to be non-zero, then there certainly exists an assignment to the $x_i$s such that $p(x_1,\ldots,x_i)$ is non-zero. On the other hand, if with the randomly chosen $r_i$ the quantity $p(r_1,\ldots,r_i)$ is zero, then there is some probability of error. This can be calculated by using \Cref{szlemma}. We have chosen the set $S$ to be $\{1,2,\ldots,3n\}$, and the $r_i$ have been chosen randomly and uniformly from $S$. Therefore,
\[
\text{Pr}\left[p(r_1,\ldots,r_k) = 0\right] \leq \frac{d}{|S|} = \frac{n}{3n} = \frac{1}{3}.
\]
We will only sketch the proof of \Cref{circuits} (Circuit equivalence). For this we need to estimate the error probability. If for some choice of $x_1,x_2,\ldots,x_n$, we get that $C_1(x_1,\ldots,x_n)\neq C_2(x_1,\ldots,x_n)$ modulo $p$, then the circuits are certainly not equivalent. On the other hand, if $C_1(x_1,\ldots,x_n)= C_2(x_1,\ldots,x_n)$ modulo $p$, then there is some probability of error. We can also represent $C_1(x_1,\ldots,x_n)$ and $C_2(x_1,\ldots,x_n)$ as polynomials in $x_1,\ldots,x_n$. The following facts lead to the proof that the probability of error is sufficiently small.
\begin{itemize}
\item The degrees of the polynomials corresponding to the circuits may be quite large, but \Cref{szlemma} still works because $|S| = |\Z_p| \geq 2^{\Omega(n)}$.
\item The numbers appearing during the computation are no more than $n$ bits long after reduction modulo $p$. If the original probability of error is $\epsilon$, then we can repeat this algorithm $\text{poly}(n)$ times to decrease this to $\epsilon^{\text{poly}(n)}$, by using the following well-known result.
\begin{theorem}[Chinese Remainder Theorem]
Let $M,N$ be integers such that for each prime $p_i$ from $k$ distinct primes $p_1,p_2,\ldots,p_k$, $M\equiv N\,(\text{mod } p_i)$. Then $M\equiv N\,(\text{mod }p_1p_2\cdots p_k)$.
\end{theorem}
\end{itemize}
\section{Complexity classes}
\subsection{Types of randomized algorithms}
To start off the discussion of complexity classes, we first consider the kinds of errors that may occur in a randomized algorithm. For the purposes of the discussion below, we fix $\epsilon = \frac{1}{3}$. We will see later that this particular choice of $\epsilon$ is not special. Let $L$ be a language, and suppose that our algorithm is deciding membership of $x$ in $L$. Then we can have the following types of errors.
\begin{enumerate}
\item Two-sided error: \\
$x\in L\Rightarrow $ probability of error is at most $\epsilon$, and\\
$x\notin L\Rightarrow $ probability of error is at most $\epsilon$.
The class of polynomial-time algorithms that behave in this manner is called BPP, which stands for Bounded-error Probabilistic Polynomial-time.
\item One-sided error. There are two types of one-sided error:
\begin{enumerate}
\item $x\notin L\Rightarrow$ no errors, and \\
$x\in L\Rightarrow$ probability of error is at most $\epsilon$.
The class of polynomial-time algorithms that behave in this manner is called RP, which stands for Randomized Polynomial-time.
\item $x\in L\Rightarrow$ no errors, and \\
$x\notin L\Rightarrow$ probability of error is at most $\epsilon$.
The class of polynomial-time algorithms that behave in this manner is called coRP, which stands for co-Randomized Polynomial-time.
\end{enumerate}
\item Zero-sided error:\\
$x\in L\Rightarrow$ no error,\\
$x\notin L\Rightarrow$ no error, but\\
may not halt on some inputs.
Alternatively, we can say that the running time of the algorithm is a random variable with polynomial expectation. Or, we can say that the algorithm is permitted to return one of three values, 1 if it accepts, 0 if it rejects, and ? if it does not know (within some fixed time).
The class of polynomial-time algorithms that behave in this manner is called ZPP, which stands for Zero-error Probabilistic Polynomial-time.
\end{enumerate}
\subsection{Models of randomized computation}
A natural model for randomized computation is a Turing Machine $M$ which has a special `coin-tossing' state, the `\$' state.
However, the preferred model for randomized computation is that of \emph{Two-input Turing Machines}. In this case, $x$ is the real input and $y$ is an auxiliary input. The input $x$ represents the actual input data, and the input $y$ captures the randomness used for a particular instance of a randomized computation. A Two-input Turing Machine $M$ takes in $(x,y)$ and runs deterministically on $(x,y)$. (For the cases of RP, coRP, ZPP and BPP, $M$ must run in polynomial-time of the input, and therefore $y$ must be a polynomial in the size of $x$).
\subsection{New definitions for complexity classes}
Using the language of two-input Turing Machines, we can redefine some of the complexity classes that we already know. Again, $\epsilon = \frac{1}{3}$. For each of these complexity classes, the language $L$ is in the class if there is a two-input Turing Machine $M$ with the second input always a polynomial in the size of the first, such that certain results (defined in the following list) are true. In the following experiments, $y$ is always chosen uniformly from all the available possibilities.
\begin{enumerate}
\item BPP
\begin{enumerate}
\item If $x\in L$ then $\text{Pr}_y\left[M(x,y)\text{ accepts}\right]\geq 1 -\epsilon$.
\item If $x\notin L$ then $\text{Pr}_y\left[M(x,y)\text{ accepts}\right]\leq\epsilon$.
\end{enumerate}
\item NP
\begin{enumerate}
\item If $x\in L$ then $\text{Pr}_y\left[M(x,y)\text{ accepts}\right]> 0$.
\item If $x\notin L$ then $\text{Pr}_y\left[M(x,y)\text{ accepts}\right]=0$.
\end{enumerate}
\item RP
\begin{enumerate}
\item If $x\in L$ then $\text{Pr}_y\left[M(x,y)\text{ accepts}\right]\geq 1-\epsilon$.
\item If $x\notin L$ then $\text{Pr}_y\left[M(x,y)\text{ accepts}\right]=0$.
\end{enumerate}
\item coRP
\begin{enumerate}
\item If $x\in L$ then $\text{Pr}_y\left[M(x,y)\text{ accepts}\right]=1$.
\item If $x\notin L$ then $\text{Pr}_y\left[M(x,y)\text{ accepts}\right]\leq \epsilon$.
\end{enumerate}
\item ZPP\\
ZPP cannot be naturally defined with this notation, so we can give the following definition.
\[
\text{ZPP} = \text{RP}\cap\text{coRP}.
\]
\end{enumerate}
\section{Choice of error parameter}
What is the ideal choice for the maximum permissible error? Is it on the order of $1/3$, $1/n^3$, $1/2^n$, or on the order of $1 - 1/n^5$, $1 - 1/2^n$? Let us only look at the class RP for now. This can be formalized by looking at the following result.
\begin{lemma}[Amplification Lemma]
Suppose an algorithm $M$ errs with probability $e(n)$, so that if $x\in L$ then $\text{Pr}_y\left[M(x,y)\text{ accepts}\right]\geq 1-e(n)$ and if $x\notin L$ then $\text{Pr}_y\left[M(x,y)\text{ accepts}\right]=0$ (when $y$ is chosen uniformly). Repeat $M$ $k(n)$ times, for some polynomial $k$. The new algorithm makes errors with the following probabilities.
\begin{align*}
x\in L &\Rightarrow \text{Pr}_y\left[M(x,y)\text{ accepts}\right]\geq 1-(e(n))^{k(n)},\\
x\notin L &\Rightarrow \text{Pr}_y\left[M(x,y)\text{ accepts}\right]=0.
\end{align*}
\end{lemma}
RP with an error probability of $e(n)$ may be written as $\text{RP}_{e(n)}$.
If we start with a constant error probability, we can make it as small as $1/2^{n^c}$ for any $c$ in polynomially many iterations of the algorithm. This probability is small enough.
If we start with an error probability $e(n) = 1 - 1/n^5$, then
\[
(e(n))^{k(n)} = \left(1 - \frac{1}{n^5}\right)^{n^5 l(n)} \leq \left(\frac{1}{e}\right)^{l(n)},
\]
which is small enough if $k(n)$ is a sufficiently large degree polynomial.
But if we start with an error probability $e(n) = 1 - 1/2^n$, then we cannot make the error probability small enough after polynomially many iterations of the algorithm. In this case, $\text{RP}_{e(n)} = \text{NP}$.
But $\text{RP}_{1 - 1/\text{poly}(n)} = \text{RP}_{1 - 1/2^{\text{poly}(n)}}$. So the class RP is robust with respect to large changes in the maximum permissible error probability.
\end{document}