\documentclass[11pt]{article}
\usepackage{amsfonts}
\usepackage{amsthm}
\usepackage{amsmath}
\usepackage{multicol}
\usepackage{latexsym}
\usepackage[pdftex]{graphicx}
\usepackage[table]{xcolor}
\usepackage{url}
\usepackage{mathdots}
\usepackage{mathrsfs}
\usepackage{hyperref}
\hypersetup{colorlinks=true}
\setlength{\oddsidemargin}{.25in}
\setlength{\evensidemargin}{.25in}
\setlength{\textwidth}{6in}
\setlength{\topmargin}{-0.4in}
\setlength{\textheight}{8.5in}

\input{preamble.tex}

\begin{document}
\lecture{5: Linear Algebra}{Week 5}

This is the fifth week of the Mathematics Subject Test GRE prep course; here, we review the field of \textbf{linear algebra!}

\section{Definitions and Concepts}

Unlike the calculus we've been studying earlier, linear algebra is a field much more focused on its \textbf{definitions} than its applications.  Accordingly, most of what you'll be asked to recall on a test are concepts and words, rather than any specific processes!  We review as many of these concepts as we can here, though I recommend skimming through a linear algebra textbook for a more in-depth review.  
\subsection{Vector Spaces: Some Important Types}
\begin{itemize}
\item \textbf{Vector space}:  A \textbf{vector space} over a field $F$ is a set $V$ and a pair of operations $+ : V \times V \to V$ and $\cdot : \mathbb{R} \times V \to V$, that are in a certain sense ``well-behaved:'' i.e. the addition operation is associative and commutative, there are additive identites and inverses, the addition and multiplication operations distribute over each other, the scalar multiplication is compatible with multiplication in $F$, and 1 is the multiplicative identity.\footnote{See \href{http://en.wikipedia.org/wiki/Vector_space}{Wikipedia} if you want a precise description of these properties.} 

$F$ will usually be $\mathbb{R}$ on the GRE.
\begin{itemize}
\item Examples: $\mathbb{R}^n, \mathbb{C}^n, \mathbb{Q}^n,$ the collection of all polynomials with coefficients from some field, the collection of all $n \times n$ matrices with entries from some field.
\end{itemize}
\item \textbf{Subspace}: A subset $S$ of a vector space $V$ over a field $F$ is called a \textbf{subspace} if it satisfies the following two properties: (1) for any $\textbf{x}, \textbf{y}\in S$ and $a,b\in F$, we have that $a\textbf{x} + b \textbf{y}$ is also an element of $S$, and (2) $S$ is nonempty.
\item \textbf{Span}: For a set $S$ of vectors inside of some vector space $V$, the \textbf{span} of $S$ is the subspace formed by taking all of the possible linear combinations of elements of $S$.
\item \textbf{Row space}:  For a $n \times k$ matrix $M$, the \textbf{row space} of $M$ is the subspace of $F^n$ spanned by the $k$ rows of $M$.
\item \textbf{Null space}: For a $n \times k$ matrix $M$, the \textbf{null space} of $M$ is the following subspace:
\begin{align*}
\{ \textbf{x} \in F^n: M \cdot \textbf{x} = \textbf{0} \}.
\end{align*}
\begin{itemize}
\item Useful Theorem: The orthogonal complement of the row space of a matrix $M$ is the null space of $M$.  Conversely, the orthogonal complement of the null space of a matrix $M$ is the row space of $M$.
\end{itemize}
\item \textbf{Eigenspace}: For any eigenvalue $\lambda$, we can define the \textbf{eigenspace} $E_\lambda$ associated to $\lambda$ as the space
\begin{align*}
E_\lambda =: \{ \mathbf{v} \in V : A\mathbf{v} = \lambda\mathbf{v} \}.
\end{align*}
\end{itemize}


\subsection{Matrices: Some Important Types}
\begin{itemize}
\item \textbf{Elementary Matrices}:  There are three kinds of elementary matrices, which we draw below:
\begin{align*}\left(\begin{array}{ccccccc}
1 & 0 & 0 & 0 & 0 & \ldots  & 0\\
0 & 1 & 0 & 0 & 0 &  \ldots  & 0\\
0 & 0 & 1 & 0 & 0 &  \ldots  & 0\\
0 & 0 & 0 & \lambda & 0 &  \ldots  & 0\\
0 & 0 & 0 & 0 & 1 &  \ldots  & 0\\
\vdots & \vdots & \vdots &\vdots & \vdots & \ddots  & \vdots\\
0 & 0 & 0 & 0 & 0 &  \ldots  & 1\\
\end{array}\right), 
\left(\begin{array}{ccccccc}
1 & \ldots & 0 & \ldots & 0 &  \ldots  & 0\\
0 & \ldots & 0 & \ldots & 0 &  \ldots  & 0\\
0 & \ldots & 0 & \ldots & 1 &  \ldots  & 0\\
0 & \ldots & \vdots & \ldots & \vdots &  \ldots  & 0\\
0 & \ldots & 1 & \ldots & 0 &  \ldots  & 0\\
\vdots & \vdots & \vdots &\vdots & \vdots & \ddots  & \vdots\\
0 & \ldots & 0 & 0 & 0 &  \ldots  & 1\\
\end{array}\right)
\left(\begin{array}{ccccccc}
1 & 0 & 0 & 0 & 0 & \ldots  & 0\\
0 & 1 & 0 & 0 & 0 &  \ldots  & 0\\
0 & 0 & 1 & 0 & 0 &  \ldots  & 0\\
0 & 0 & 0 & 1 & 0 &  \ldots  & 0\\
0 & 0 & \lambda & 0 & 1 &  \ldots  & 0\\
\vdots & \vdots & \vdots &\vdots & \vdots & \ddots  & \vdots\\
0 & 0 & 0 & 0 & 0 &  \ldots  & 1\\
\end{array}\right)
\end{align*}
The first matrix above multiplies a given row by $\lambda$, the second matrix switches two given rows, and the third matrix adds $\lambda$ times one row to another row.
\item \textbf{Reflection Matrices}:  For a subspace $U$ of $\mathbb{R}^n$, we can find a matrix corresponding to the map $\textrm{Refl}_U(\mathbf{x})$ by simply looking for eigenvectors.  Specfically, if $\{\mathbf{u}_1, \ldots \mathbf{u}_k\}$ form a basis for $U$ and $\{\mathbf{w}_1, \ldots \mathbf{w}_{n-k}\}$ form a basis for $U^\perp$, note that the $\mathbf{u}_i$'s are all eigenvectors with eigenvalue 1, and the $\mathbf{w}_i$'s are all eigenvectors with eigenvalue -1 (because reflecting through $U$ fixes the elements in $U$ and flips the elements in $U^\perp$.)  As a result, because we have $n$ linearly independent eigenvectors, we can use our diagonalization construction (discussed later in these notes) $EDE^{-1}$ to make a reflection matrix $R$.
\item \textbf{Adjacency Matrices}: For a graph\footnote{A \textbf{directed graph} $G = (V,E)$ consists of a set $V$, which we call the set of \textit{vertices} for $G$ and a set $E \subset V^2$, made of ordered pairs of vertices, which we call the set of \textit{edges} for $G$.} $G$ on the vertex set $V = \{1, 2, \ldots n\}$, we can define the \textbf{adjacency matrix} for $G$ as the following $n \times n$ matrix:
\begin{align*}
A_G := \left\{a_{ij} \left|  \begin{array}{rl}a_{ij} = 1 & \textrm{if the edge }(i,j) \textrm{ is in }E; \\a_{ij} = 0 & \textrm{otherwise}.\\ \end{array} \right. \right\}
\end{align*}
It bears noting that we can reverse this process: given a $n \times n$ matrix $A_G$, we can create a graph $G$ by setting $V = \{1, \ldots n\}$ and $E = \{(i,j): a_{ij} \neq 0\}$.
\begin{itemize}
\item Useful Theorem: In a graph $G$ with adjacency matrix $A_G$, the number of paths from $i$ to $j$ of length $m$ is the $(i,j)$-th entry in $(A_G)^m$.
\end{itemize}

\item \textbf{Permutation}: A \textbf{permutation} $\sigma$ of the list $(1, \ldots n)$ is simply some way to reorder this list into some other $(\sigma(1), \ldots \sigma(n))$.  (If you prefer to think about functions, $\sigma$ is simply a bijection from $\{1, \ldots n\}$ to $\{1, \ldots n\}$.)

Given any permutation $\sigma$ of $(1, \ldots n$), the \textbf{permutation matrix} $P_\sigma$ is simply the $n \times n$ matrix whose $i$-th column is given by $e_{\sigma(i)}$.  In other words,
\begin{align*}
P_\sigma = \begin{bmatrix} \vdots & \vdots & &\vdots \\ \vec{e_{\sigma(1)}} & \vec{e_{\sigma(2)}} & \ldots & \vec{e_{\sigma(n)}} \\  \vdots & \vdots & &\vdots \\ \end{bmatrix}
\end{align*}
\end{itemize}


\subsection{Various Vector/Vector Space Properties}
\begin{itemize}
\item \textbf{Dimension}: The \textbf{dimension} of a space $V$ is the number of elements in a basis for $V$. 
\item \textbf{Rank}:  The \textbf{rank} of a matrix is the dimension of its row space.
\item \textbf{The rank-nullity theorem.}  The rank-nullity is the following result:
\begin{thm}
Let $U, V$ be a pair of finite-dimensional vector spaces, and let $T: U \to V$ be a linear map.  Then the following equation holds:
\begin{align*}
\textrm{dimension}(\textrm{null}(T)) + \textrm{dimension}(\textrm{range}(T))  = \textrm{dimension}(U).
\end{align*}
\end{thm}
\item \textbf{Orthogonality}:  Two vectors $\mathbf{u},\mathbf{v}$ are called \textbf{orthogonal} iff their inner product is 0; i.e. if $\langle \textbf{u}, \textbf{v}\rangle = 0$.
\begin{itemize}
\item Useful Theorem: If we have a basis $B$ for some space $V$, the \textbf{Gram-Schmidt process} will transform $B$ into an orthogonal basis $U$ for $V$ -- i.e. a basis for $V$ that's made of vectors that are all orthogonal to each other.  See \href{http://math.ucsb.edu/~padraic/ucsb_2013_14/math108b_w2014/math108b_w2014_lecture3.pdf}{my notes} for an in-depth description of this process.
\end{itemize}
\item \textbf{Linear indepdendence/dependence}:  A collection $v_1 \ldots v_k$ of vectors is called \textbf{linearly dependent} iff there are $k$ constants $a_1\ldots a_k$, not all identically 0, such that $ \sum_{i=1}^k a_iv_i = 0.$
They are called \textbf{linearly independent} if no such collection exists.
\begin{itemize}
\item Useful Theorem: A collection of vectors $\{\mathbf{v}_1, \ldots \mathbf{v}_n\}$ is linearly dependent iff the matrix formed by taking the $\mathbf{v}_i$'s as its rows has a zero row in its reduced row-echelon form.
\item Equivalently, a collection of vectors $\{\mathbf{v}_1, \ldots \mathbf{v}_n\}$ is linearly dependent iff the determinant of the matrix formed by taking the $\mathbf{v}_i$'s as its rows is zero.
\end{itemize}
\item \textbf{Basis}: A \textbf{basis} for a space $V$ is a collection of vectors $B$, contained within $V$, that is linearly independent and spans the entire space $V$.  A basis is called \textbf{orthogonal} iff all of its elements are orthogonal to each other; it is called \textbf{orthonormal} iff all of its elements are orthogonal to each other and furthermore all have length 1.
\item \textbf{Eigenvector/eigenvalue}: For a matrix $A$, vector $\mathbf{x}$, and scalar $\lambda$, we say that $\lambda$ is an \textbf{eigenvalue} for $A$ and $\mathbf{x}$ is a \textbf{eigenvector} for A if and only if $A\mathbf{x} = \lambda\mathbf{x}.$
\begin{itemize}
\item \textbf{Algebraic multiplicity}: The \textbf{algebraic multiplicity}  of an eigenvalue $\mu$ is the number of times it shows up as a root of $A$'s characteristic polynomial.  I.e. if $p_A(\lambda) = (\lambda-\pi)^2$, $\pi$ would have algebraic multiplicity 2.
\item \textbf{Geometric multiplicity}: The \textbf{geometric multiplicity} of an eigenvalue $\mu$ is the dimension of the eigenspace associated to $\mu$.
\item Useful Theorem: The algebraic multiplicity of an eigenvalue is always greater than or equal to the geometric multiplicty of that eigenvalue.
\item Useful Theorem: A matrix is diagonalizable iff every eigenvalue has its algebraic multiplicity equal to its geometric multiplicity.  (If you want it to be diagonalizable via real-valued matrices, you should also insist that the matrix and all of its eigenvalues are real.)
\item \textbf{Dominant eigenvalue}: The \textbf{dominant eigenvalue}: is the largest eigenvalue of a matrix.
\end{itemize}
\end{itemize}

\subsection{Various Matrix Properties}
\begin{itemize}
\item \textbf{Symmetric}: A matrix is called \textbf{symmetric} iff $A^T =A$.
\begin{itemize}
\item Useful Theorem: $(AB)^T = B^TA^T$.
\end{itemize}
\item \textbf{Singular/Nonsingular}:  A $n \times n$ matrix is called \textbf{singular} iff it has rank $< n$, and is called \textbf{nonsingular} iff it has rank $n$.
\begin{itemize}
\item Useful Theorem: A matrix is nonsingular if and only if it has an inverse.
\item Useful Theorem: A matrix is nonsingular if and only if its determinant is nonzero.
\end{itemize}
\item \textbf{Orthogonal}:  A $n \times n$ matrix $U$ is called \textbf{orthogonal} iff all of its columns are of length 1 and orthogonal to each other.  Equivalently, $U$ is orthogonal iff $U^T = U^{-1}$; i.e. $U^TU = UU^T = I$.
\begin{itemize}
\item Useful Theorem: Any $n \times n$ orthogonal matrix can be written as the product of no more than $n-1$ reflections.  (Specifically, no more than $n-1$ reflections through spaces of dimension $n-1$, which we call hyperplanes.)
\end{itemize}
\item \textbf{Regular}:  A matrix $A$ is called regular if $a_{ij} > 0$, for every entry $a_{ij}$ in $A$.  We will often write $A > 0$ to denote this.
\item \textbf{Nonnegative}:  A matrix is called \textbf{nonnegative} if and only if all of its entries are $\geq 0$.
\begin{itemize}
\item Useful Theorem: Suppose that $A$ is a nonnegative matrix and $\lambda$ is the maximum of the absolute values of $A$'s eigenvalues.  Then $\lambda$ is itself an eigenvalue, and there is a vector of nonnegative numbers that is an eigenvector for $\lambda$.
\item Perron-Frobenius: If $A$ is a nonnegative matrix such that $A^m > 0$ for some value of $m$, then the nonnegative eigenvector above is unique, up to scalar multiplication.
\item If $\lambda$ is an eigenvector of a nonnegative matrix $A$ that corresponds to a nonnegative eigenvector, then $\lambda$ is at least the minimum of the row sums, and at most the maximum of the row sums; similarly, $\lambda$ is at least the minimum of the column sums, and at most the maximum of the column sums.
\end{itemize}
\item \textbf{Similarity}.  Two matrices $A$, $B$ are called \textbf{similar} if there is some matrix $U$ such that $A = UBU^{-1}$.  If we want to specify what $U$ is, we can specifically state that $A$ and $B$ are similar \textbf{via} $U$.
\item \textbf{Diagonalizable}:  A \textbf{diagonalization} of a matrix $A$ is an orthogonal matrix $E$ and a diagonal matrix $D$ such that $A = EDE^{-1}$.
\begin{itemize}
\item Useful Theorem: A matrix $A$ is diagonalizable if and only if it has $n$ linearly independent eigenvectors $\mathbf{e}_1, \ldots \mathbf{e}_n$.  In this case, if $\lambda_1, \ldots \lambda_n$ are the corresponding eigenvalues to the $\mathbf{e}_i$'s, we can actually give the explicit diagonalization of $A$ as 
\begin{align*}
\left( \begin{array}{cccc}
| & | &  & | \\
\mathbf{e}_1 & \mathbf{e}_2 & \ldots & \mathbf{e}_n \\
| & | &  & | \\
\end{array}\right) \cdot \left( \begin{array}{ccccc}
\lambda_1 & 0 & 0 & \ldots & 0 \\
0 & \lambda_2 & 0 & \ldots & 0 \\
0 & 0 & \lambda_3 & \ldots & 0 \\
\vdots & \vdots & \vdots & \ddots & \vdots \\
0 & 0 & 0 & \ldots & \lambda_n \\
\end{array} \right) \cdot \left( \begin{array}{cccc}
| & | &  & | \\
\mathbf{e}_1 & \mathbf{e}_2 & \ldots & \mathbf{e}_n \\
| & | &  & | \\
\end{array}\right)^{-1} 
\end{align*}
\item Suppose that $A$ is diagonalized as $EDE^{-1}.$  Then we can write the $n$-th power of $A$ as $ED^nE^{-1}$.  As well, if all of the entries along the diagonal of $D$ have $k$-th roots, we can give a $k$-th root of $A$ as the product $ED^{1/k}E{-1}$.  
\end{itemize}
\item \textbf{Positive-definite/positive-semidefinite}:  A matrix $A$ is called \textbf{positive-definite} iff for any nonzero vector $\mathbf{x}$, we have $\mathbf{x}^T \cdot A \cdot \mathbf{x} > 0$.  Similarly, it is called  \textbf{positive-semidefinite} iff for any nonzero vector $\mathbf{x}$, we have  $\mathbf{x}^T \cdot A \cdot \mathbf{x} \geq 0$. 
\begin{itemize}
\item Useful Theorem: A matrix is positive-definite iff all of its eigenvalues are positive; similarly, a matrix is positive-semidefinite iff all of its eigenvalues are nonnegative.
\end{itemize}
\item \textbf{Probability}:  A $n \times n$ matrix $P$ is called a \textbf{probability matrix} if and only if the following two properties are satisfied:
\begin{itemize}
\item $P \geq 0$; in other words, $p_{ij} \geq 0$ for every entry $p_{ij}$ of $P$.
\item The column sums of $P$ are all 1; in other words, $\sum_{i=1}^n p_{ij} = 1$, for every $j$.
\end{itemize}
\begin{itemize}
\item Useful Theorem: Every probability matrix has a stable vector.
\item Useful Theorem: If $P$ is a probability matrix such that there is a value of $m$ where $P^m > 0$, then there is only one stable vector $\mathbf{v}$ for $P$.  Furthermore, for very large values of $m$, $P^m$'s columns all converge to $\mathbf{v}$.  This theorem also holds in the case where the graph represented by $P$ is \textbf{strongly connected}\footnote{A graph is \textbf{strongly connected} iff it's possible to get from any node to any other node via edges in the graph.}, even if $P^m$ is never $>0$.
\item Useful Theorem 3: If we have a probability matrix $P$ representing some finite system with $n$ states $\{1,\ldots n\}$, then the probability of starting in state $j$ and ending in state $i$ in precisely $m$ steps is the $(i,j)$-th entry in $P^m$.
\end{itemize}
\item \textbf{Polar decomposition}:  For a nonsingular  $n \times n$ matrix $A$, a \textbf{polar decomposition} of $A$ is a pair of matrices $Q$, $S$ such that $Q$ is an orthogonal matrix and $S$ is a positive-definite symmetric matrix.
\item \textbf{Singular Value Decomposition}:  For a $m \times n$ matrix $A$, a \textbf{singular value decomposition} of $A$ is a $n \times n$ orthogonal matrix $V$, a $m \times n$ matrix $D$ such that $d_{ij} \neq 0$ only when $i=j$, and a $m \times m$ orthogonal matrix $U$ such that $A = UDV^T.$
\begin{itemize}
\item Useful Theorem: If $A$ has a singular value decomposition given by $UDV^T$, then $A$'s Moore-Penrose pseudoinverse $A^+$ is given by the product $VD^+U^T$, where $D^+$ is the $n \times m$ matrix formed by taking $D$'s transpose and replacing all of its nonzero entries with their reciprocals.
\item Useful Theorem: If $A$ is a $n \times n$ matrix with SVD $UDV^T$, then the minimum value of $||A\mathbf{x}||/||\mathbf{x}||$ can be found by plugging in $\mathbf{v}_i$, where $\mathbf{v}_i$ is the column of $V$ corresponding to the smallest value $d_{ii}$ on $D$'s diagonal.
\end{itemize}
\item \textbf{Moore-Penrose pseudoinverse}: For a matrix $A$, we say that $A^+$ is the \textbf{pseudoinverse} of $A$ iff the following four properties hold: (1) $AA^+A = A$, (2) $A^+AA^+ = A^+$, (3) $AA^+$ is symmetric, and (4) $A^+A$ is also symmetric.
\begin{itemize}
\item Useful Theorem: The least-squares best-fit solutions to the system $A\mathbf{x} = \mathbf{b}$ are given by vectors of the form
\begin{align*}
A^+\cdot \mathbf{b} + (I - A^+A)\mathbf{w},
\end{align*}
where we let $\mathbf{w}$ be any vector.  Furthermore, if there is a solution to $A\mathbf{x} = \mathbf{b}$, then $A^+\cdot \mathbf{b}$ is a solution of minimum length.
\end{itemize}
\item \textbf{The spectral theorem}.  Suppose that $A$ is a $n \times n$ real symmetric matrix (i.e.\ don't make any assumptions about what $U$ is like we did above.)  Then in $A$'s Schur decomposition $URU^{-1}$, $R$ is a diagonal real-valued matrix!  Furthermore, we can insure in our construction of $U$ that it is a real-valued orthogonal matrix.
\item \textbf{QR-decomposition}.  A \textbf{QR-decomposition} of an $n \times n$ matrix $A$ is an orthogonal matrix $Q$ and an upper-triangular\footnote{A matrix is called \textbf{upper-triangular} if all of its entries below the main diagonal are 0.  For example, $\begin{bmatrix} 1 & 2 & 3\\ 0 & 3 & 2\\ 0 & 0 &1 \end{bmatrix} $ is upper-triangular.} matrix $R$, such that 
\begin{align*}
A = QR.
\end{align*}

Every invertible matrix has a QR-decomposition, where $R$ is invertible.


\item \textbf{Jordan block}.
A block $B_i$ of some block-diagonal matrix is called a \textbf{Jordan block} if it is in the form
\begin{align*}
\begin{bmatrix}
\lambda & 1 & 0 & 0 &\ldots& 0\\
0 & \lambda & 1 & 0 & \ldots & 0 \\
0 & 0 & \lambda & 1 & \ldots & 0\\
\vdots & \vdots & \vdots & \ddots & \ddots & \vdots \\
0 & 0 & 0 & \ldots  & \lambda & 1\\
0 & 0 & 0 & \ldots & 0 & \lambda\\
\end{bmatrix}_.
\end{align*}
 In other words, there is some value $\lambda$ such that $B_i$ is a matrix with $\lambda$ on its main diagonal, 1's in the cells directly above this diagonal, and 0's elsewhere. 

\item \textbf{Jordan canonical/normal form}.  Suppose that $A$ is similar to an $n \times n$ block-diagonal matrix $B$ in which all of its blocks are Jordan blocks; in other words, that $A = UBU^{-1}$, for some invertible $U$.  We say that any such matrix $A$ has been written in \textbf{Jordan canonical form.}  

Any $n \times n$ matrix $A$ can be written in Jordan canonical form.

\end{itemize}


\subsection{Operations on Vectors and Vector Spaces}
\begin{itemize}
\item \textbf{Dot product}:  For two vectors $\mathbf{x}, \mathbf{y} \in \mathbb{R}^n,$ we define the \textbf{dot product} $\mathbf{x}\cdot \mathbf{y}$ as the sum $\sum_{i=1}^n x_iy_i$.
\item \textbf{Inner product}:  For two vectors $\mathbf{x}, \mathbf{y} \in \mathbb{R}^n,$ we define the inner product  $\langle \mathbf{x}, \mathbf{y}\rangle$ of  $\mathbf{x}$ and $\mathbf{y}$ as their dot product,  $\mathbf{x}\cdot \mathbf{y}$.  
\begin{itemize}
\item Useful Observation:  Often, it's quite handy to work with the transpose of certain vectors.  So, remember: when you're taking the inner product or dot product of two vectors, taking the transpose of either vector doesn't change the results!  I.e. $\langle \mathbf{x}, \mathbf{y}\rangle = \langle \mathbf{x}^T, \mathbf{y}\rangle = \langle \mathbf{x}, \mathbf{y}^T\rangle= \langle \mathbf{x}^T, \mathbf{y}^T\rangle$.  We use this a *lot* in proofs and applications where there are symmetric or orthogonal matrices running about.
\end{itemize}
\item \textbf{Magnitude}: The magnitude of a vector $\mathbf{x}$ is the square root of its inner product with itself: $||\mathbf{x}|| = \sqrt{\langle \mathbf{x}, \mathbf{x}\rangle}$.  This denotes the distance of this vector from the origin.
\item \textbf{Distance}:The distance of two vectors $\mathbf{x}, \mathbf{y}$ from each other is the square root of the inner product of the difference of these two vectors: $||\mathbf{x} - \mathbf{y}|| = \sqrt{\langle \mathbf{x} - \mathbf{y}, \mathbf{x}-\mathbf{y}\rangle}$. 
\item \textbf{Projection, onto a vector}: For two vectors $\mathbf{u}, \mathbf{v}$, we define the projection of $\mathbf{v}$ onto $\mathbf{u}$ as the following vector:
\begin{align*}
\mathrm{proj}_{\mathbf{u}}(\mathbf{v}):= \frac{\langle \mathbf{v}, \mathbf{u} \rangle}{\langle \mathbf{u}, \mathbf{u} \rangle}\cdot \mathbf{u}.
\end{align*}
\item \textbf{Projection, onto a space}: Suppose that $U$ is a subspace with orthogonal basis $\{b_1, \ldots b_n\}$, and $\mathbf{x}$ is some vector.  Then, we can define the \textbf{orthogonal projection} of $\textbf{x}$ onto $U$ as the following vector in $U$:
\begin{align*}
\mathrm{proj}_U(\textbf{x}) = \sum^n_{i=1} \mathrm{proj}_{\mathbf{b}_i}(\mathbf{x}). 
\end{align*}
\begin{itemize}
\item Useful Theorem: This vector is the closest vector in $U$ to $\textbf{x}$.
\end{itemize}
\item \textbf{Orthogonal complement}:  For a subspace $S$ of a vector space $V$, we define the \textbf{orthogonal complement} $S^\perp$ as the following set:
\begin{align*}
S^\perp = \left\{ v \in V: \langle v, s \rangle = 0, \forall s \in S\right\}.
\end{align*}
\item \textbf{Isometry}: A \textbf{isometry} is a map $f: \mathbb{R}^n \to \mathbb{R}^n$ that preserves distances: i.e. for any $\mathbf{x}, \mathbf{y} \in \mathbb{R}^n,$ we have
\begin{align*}
||\mathbf{x} - \mathbf{y}|| =||f(\mathbf{x}) - f(\mathbf{y})||
\end{align*}
\item \textbf{Reflection}:  For a subspace $U$ of $\mathbb{R}^n$, we define the \textbf{reflection map through U} as the function
\begin{align*}
\textrm{Refl}_U(\mathbf{x}) = \mathbf{x} -2\cdot \textrm{proj}_{U^\perp}(\mathbf{x})
\end{align*}
\end{itemize}

\subsection{Operations on Matrices}
\begin{itemize}
\item \textbf{Transpose}: For a $m \times n$ matrix $A$, the transpose $A^T$ is the $n \times m$ matrix defined by setting its $(i,j)$-th cell as $a_{ji}$, for every cell $(i,j)$.
\item \textbf{Determinant} For a $n \times n$ matrix $A$, we define
\begin{align*}
\det(A) = \sum^{n}_{i=1} (-1)^{i-1} a_{1i} \cdot \det(A_{1i}).
\end{align*}
\begin{itemize}
\item Properties of the Determinant:
\begin{itemize}
\item Multiplying one of the rows of a matrix by some constant $\lambda$ multiplies that matrix's determinant by $\lambda$; switching two rows in a matrix multiplies the that matrix's determinant by $-1$; adding a multiple of one row to another in a matrix does not change its determinant.
\item $\det(A^T) = \det(A).$
\item $\det(AB) = \det(A)\det(B)$.
\item The determinant of the matrix $A$ is the volume of the parallelepiped spanned by the columns of $A$ (up to a factor of $\pm 1$, which is how we determine if the map is orientation-preserving or -reversing.)
\end{itemize}
\item Useful Theorem: The determinant of a matrix $A$  is nonzero if and only if $A$ is invertible.
\end{itemize}
\item \textbf{Trace}:  The trace of a $n \times n$ matrix $A$ is the sum of the entries on $A$'s diagonal.
\begin{itemize}
\item Useful Theorem: The trace of a matrix is equal to the sum of its eigenvalues.
\end{itemize}
\item \textbf{Characteristic polynomial}: The characteristic polynomial of a matrix $A$ is the polynomial  $p_A(\lambda) = \det(\lambda I - A)$, where $\lambda$ is the variable.
\begin{itemize}
\item $x$ is a root of $p_A(\lambda)$ iff $x$ is an eigenvalue for $A$.
\end{itemize}
\end{itemize}

\section{Example problems}

We work some sample problems here, to illustrate some of the ideas. 
\begin{quest}
Suppose that $A$ is an $n \times n$ matrix such that $A^3$ is the all-zeroes matrix, i.e.\ the $n \times n$ matrix in which every entry is 0.
\begin{enumerate}
\item Think of $A$ as a linear map from $\mathbb{R}^n \to \mathbb{R}^n$.  Can the range of $A$ be equal to $\mathbb{R}^n$?
\item Can you find an example of such a matrix $A$, such that $A$ and $A^2$ are not themselves all-zeroes matrices?
\end{enumerate}
\end{quest}
\begin{proof}
For an example of such a matrix, consider 
\begin{align*}
A = \begin{bmatrix} 0 & 1 & 0 \\ 0 & 0 & 1 \\ 0 & 0 & 0 \end{bmatrix}.
\end{align*}
We can easily check that
\begin{align*}
A^2 = \begin{bmatrix} 0 & 1 & 0 \\ 0 & 0 & 1 \\ 0 & 0 & 0 \end{bmatrix} \cdot\begin{bmatrix} 0 & 1 & 0 \\ 0 & 0 & 1 \\ 0 & 0 & 0 \end{bmatrix} &= \begin{bmatrix} 0 & 0 & 1 \\ 0 & 0 & 0 \\ 0 & 0 & 0 \end{bmatrix},\\
A^3 = A^2 \cdot A = \begin{bmatrix} 0 & 0 & 1 \\ 0 & 0 & 0 \\ 0 & 0 & 0 \end{bmatrix} \cdot \begin{bmatrix} 0 & 1 & 0 \\ 0 & 0 & 1 \\ 0 & 0 & 0 \end{bmatrix} &= \begin{bmatrix} 0 & 0 & 0 \\ 0 & 0 & 0 \\ 0 & 0 & 0 \end{bmatrix}.
\end{align*}
If you want a $n \times n$ example of such a matrix, simply add additional rows/columns of zeroes to the left and bottom of $A$.

In general, we claim that the range of any such matrix $A$ cannot be $\mathbb{R}^n$.  To see why, simply notice that if $A$ is a matrix with range equal to its domain, then $A$ must be invertible; consequently, for any natural number $k$, $A^k$ must also be invertible, with inverse given by $(A^{-1})^k$.  Therefore $A^k$ would have range $\mathbb{R}^n$ (as it is invertible, and thus has dim(nullspace) = 0;) and therefore in particular we could not have $A^k = $ the all-zeroes matrix for any $k$, as this has dim(nullspace) = $n$. 
\end{proof}


\begin{quest}
Take any $n \times n$ matrix $M$.  
\begin{enumerate}
\item Take any $k>0 \in \mathbb{N}$, and think of $M, M^k$ as a pair of linear maps from $\mathbb{R}^n \to \mathbb{R}^n$.  Prove that
\begin{align*}
\textrm{nullspc}(M^k) \supseteq \textrm{nullspc}(M).
\end{align*}
\item Suppose that $\det(M) = 0$.  Prove that $\det(M^k) = 0$ as well.
\end{enumerate}
\end{quest}

\begin{proof}
The first claim here is not hard to establish.  Take any vector $\vec{v} \in \textrm{nullspc}(M)$. By definition, we know that $M\vec{v} = \vec{0}$; therefore, we can conclude that $M^k\vec{v} = M^{k-1} \cdot M \vec{v} =  M^{k-1} \vec{0} = \vec{0}$ as well, and thus that $\vec{v} \in \textrm{nullspc}(M)$.  

As a side note, our earlier problem proves that inequality is possible (as the nullspaces of $A, A^3$ were distinct;) it is also not hard to see that equality is possible (let $M$ be the all-zeroes matrix!) and thus that this is the strongest statement we can make.

For the second part of our claim: we could simply use the multiplicative property of the determinant (which tells us that $\det(M^k) = \det(M) \cdot \ldots \cdot \det(M) = 0 \cdot \ldots \cdot 0 = 0$), or we could use the first part of this question to note that because
\begin{itemize}
\item $\det(M) = 0$ if and only if dim(nullspc($M$)) $\neq$ 0, and
\item  $\textrm{nullspc}(M^k) \supseteq \textrm{nullspc}(M)$, then dim(nullspc($M$)) $\leq$ dim(nullspc($M^k$)), 
\item then we can conclude that if $\det(M) = 0$ then dim(nullspc($M^k$)) $\neq 0$, and thus that
\item $\det(M^k) = 0$.
\end{itemize}  

\end{proof}


\begin{quest}
Create a $4 \times 4$ matrix $A$ with the following properties:
\begin{itemize}
\item No entry of $A$ is 0.
\item $A$ has 1,2,and 3 as eigenvalues.
\end{itemize}
\end{quest}
\begin{proof}
If we ignore our ``no zero entries'' condition, this is not too hard; the matrix
\begin{align*}
\begin{bmatrix} 1 & 0 & 0 \\ 0 & 2& 0 \\ 0 & 0 & 3 \\ \end{bmatrix}
\end{align*}
satisfies our eigenvalue properties, as $(1,0,0), (0,1,0), (0,0,1)$ are eigenvectors for these three eigenvalues $1,2,3$

Now, we can use the fact that eigenvalues are invariant under similarity; that is, if $A$ is a matrix and $B$ is an invertible matrix, then $A$ and $BAB^{-1}$ have the same eigenvalues!  (This is because if $\vec{v}$ is an eigenvector for $A$, then $B\vec{v}$ is an eigenvalue for $BAB^{-1}$, with the same eigenvalue.)

So we can try simply multiplying $A$ on the left and right by appropriate $B, B^{-1}$'s, and hope we get something without zeroes!  In particular, let's use some matrices whose inverses we know: elementary matrices!  Recall that 
\begin{align*}
B &= \begin{bmatrix} 1 & 1 & 0 \\ 0 & 1 & 0 \\ 0& 0 & 1 \end{bmatrix}
\Rightarrow B^{-1} &= \begin{bmatrix} 1 & 1 & 0 \\ 0 & 1 & 0 \\ 0& 0 & 1 \end{bmatrix},
\end{align*}
because the first map (when processed as $B \cdot (matrix)$) corresponds to the Gaussian elimination move of ``add one copy of row two to row three,'' and the second is just ``add $-1$ copies of row two to row three.''  

Therefore
\begin{align*}
\begin{bmatrix} 1 & 1 & 0 \\ 0 & 1 & 0 \\ 0& 0 & 1 \end{bmatrix}\cdot \begin{bmatrix} 1 & 0 & 0 \\ 0 & 2& 0 \\ 0 & 0 & 3 \\ \end{bmatrix} \cdot \begin{bmatrix} 1 & -1 & 0 \\ 0 & 1 & 0 \\ 0& 0 & 1 \end{bmatrix} = \begin{bmatrix} 1 & 1 & 0 \\ 0 & 2& 0 \\ 0 & 0 & 3 \\ \end{bmatrix};
\end{align*}
by using more of these elementary matrices, we can actually get
\begin{align*}
\begin{bmatrix} 1 & 0 & 1 \\ 0 & 1 & 0 \\ 0& 0 & 1 \end{bmatrix}\cdot \begin{bmatrix} 1 & 1 & 0 \\ 0 & 2& 0 \\ 0 & 0 & 3 \\ \end{bmatrix} \cdot \begin{bmatrix} 1 & 0 & -1 \\ 0 & 1 & 0 \\ 0& 0 & 1 \end{bmatrix} = \begin{bmatrix} 1 & 1 & 2 \\ 0 & 2& 0 \\ 0 & 0 & 3 \\ \end{bmatrix};\\
\begin{bmatrix} 1 & 0 & 0 \\ 2 & 1 & 0 \\ 0& 0 & 1 \end{bmatrix}\cdot \begin{bmatrix} 1 & 1 & 2 \\ 0 & 2& 0 \\ 0 & 0 & 3 \\ \end{bmatrix} \cdot \begin{bmatrix} 1 & 0 & 0 \\ -2 & 1 & 0 \\ 0& 0 & 1 \end{bmatrix} = \begin{bmatrix} -1 & 1 & 2 \\ -6 & 4& 4 \\ 0 & 0 & 3 \\ \end{bmatrix};\\
\begin{bmatrix} 1 & 0 & 0 \\ 0 & 1 & 0 \\ -1& 0 & 1 \end{bmatrix}\cdot\begin{bmatrix} -1 & 1 & 2 \\ -6 & 4& 4 \\ 0 & 0 & 3 \\ \end{bmatrix} \cdot \begin{bmatrix} 1 & 0 & 0 \\ 0 & 1 & 0 \\ 1& 0 & 1 \end{bmatrix} = \begin{bmatrix} 1 & 1 & 2 \\ -2 & 4& 4 \\ 2 &-1 & 1 \\ \end{bmatrix}.\\
\end{align*}
This is a matrix that has no nonzero entries, and by construction is similar to $\begin{bmatrix} 1 & 0 & 0 \\ 0 & 2& 0 \\ 0 & 0 & 3 \\ \end{bmatrix}$; so we've answered our problem!
\end{proof}

\begin{quest}
Suppose that $A$ is a $n \times n$ matrix with the following two properties:
\begin{itemize}
\item $A^n$ is the all-zeroes matrix.
\item There is exactly one nonzero vector $\vec{v}$, up to scalar multiples, that is an eigenvector of $A$.  (In other words, the only eigenvectors for $A$ are vectors of the form $c\cdot \vec{v}$.)
\end{itemize}
Find the Jordan normal form of $A$.
\end{quest}

\begin{proof}
Take any eigenvector $\vec{v}$ for $A$; then $A\vec{v} = \lambda\vec{v}$.  Consequently, $A^n\vec{v} = A^{n-1} \lambda \vec{v} = A^{n-2} \lambda^2 \vec{v} = \ldots = \lambda^n\vec{v}$.  Because $A^n $ is the all-zeroes matrix, we can also observe that $A^n\vec{v} = \vec{0}$, for any vector $\vec{v}$; consequently, we have proven that the only possible eigenvalue for $A$ is 0.

Our second bullet point is the claim that the dimension of the eigenspace for this only eigenvalue is 1.  Consequently, if we look at our matrix's Jordan normal form, we know that 
\begin{itemize}
\item The diagonals are all zeroes, as 0 is the only eigenvalue, and eigenvalues go on the diagonal of a Jordan normal form.
\item There is only one block, as there is only one dimension of eigenvectors.
\end{itemize}
Therefore, we have that the Jordan normal form here is just zeroes on the diagonal, ones directly about the diagonal, and zeroes elsewhere:\ i.e.
\begin{align*}
\begin{bmatrix}
0 & 1 & 0 & 0 & \ldots & 0 \\
0 & 0 & 1 & 0 & \ldots & 0 \\
0 & 0 & 0 & 1 & \ldots & 0 \\
\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\
0 & 0& 0 & 0 & \ldots & 1 \\
0 & 0 & 0 & 0 & \ldots & 0\\
\end{bmatrix}
\end{align*}

\end{proof}

\begin{quest}
Suppose that $A$ is a real-valued symmetric $n \times n$ matrix with the following two properties:
\begin{itemize}
\item All of $A$'s entries are either 0 or 1.
\item The all-1's vector is an eigenvector of $A$, with eigenvalue $10$.
\end{itemize}
\begin{enumerate}
\item How many 1's are there in each row of $A$?  
\item Suppose that $\lambda \neq 10$ is another eigenvalue of $A$.  Prove that $\lambda \leq 10$. 
\end{enumerate}
\end{quest}

\begin{proof}
Let $A = \begin{bmatrix} a_{11} & \ldots & a_{1n} \\ \vdots & \ddots & \vdots \\ a_{n1} & \ldots & a_{nn} \end{bmatrix}.$  Notice that by simply multiplying it out, $A \cdot (1,1,\ldots 1)$ is the vector
\begin{align*}
\left( \sum_{i=1}^n a_{1i}, \sum_{i=1}^n a_{2i}, \ldots \sum_{i=1}^n a_{ni} \right).
\end{align*}
If this is equal to $(10, 10 ,\ldots 10)$, then there are ten 1's in each row of $A$, as claimed.

Furthermore, suppose that we have any eigenvalue $\lambda$ other than 10 for this matrix $A$.  Let $\vec{v}$ be the eigenvector for this eigenvalue, and $v_k$ be the largest component of this eigenvector.  Then, again by definition, we have
\begin{align*}
\begin{bmatrix} a_{11} & \ldots & a_{1n} \\ \vdots & \ddots & \vdots \\ a_{n1} & \ldots & a_{nn} \end{bmatrix}\cdot\begin{bmatrix} v_{1} \\ \vdots \\ v_n \end{bmatrix} = \begin{bmatrix} \sum_{i=1}^n a_{1i}v_i\\ \vdots \\\sum_{i=1}^n a_{ni}v_i \end{bmatrix} = \lambda\begin{bmatrix} v_{1} \\ \vdots \\ v_n \end{bmatrix} .
\end{align*}

In particular, if we look at the $v_k$ co\"ordinate, we have
\begin{align*}
\sum_{i=1}^n a_{ki}v_i = \lambda v_k;
\end{align*}
but if we use the fact that $v_k$ is the ``biggest'' (i.e. $v_k \geq v_j, \forall j$), we can see that
\begin{align*}
\sum_{i=1}^n a_{ki}v_i \leq \sum_{i=1}^n a_{ki}v_k \leq 10v_k,
\end{align*}
because there are at most ten one-entries in the $k$-th row (and the rest are zeroes.)

But this means that $\lambda v_k \leq 10v_k$; i.e.\ $\lambda \leq 10$, as claimed.
\end{proof}


\end{document}