\documentclass[12pt,a4paper]{article}
\usepackage{amssymb,amsmath}
\newtheorem{Lemma}{Lemma}[section]
\newtheorem{Theorem}[Lemma]{Theorem}
\newtheorem{Definition}[Lemma]{Definition}
\newtheorem{Axiom}[Lemma]{Axiom}
\newtheorem{Convention}[Lemma]{Convention}
\newtheorem{Example}[Lemma]{Example}
\newtheorem{Corollary}[Lemma]{Corollary}
\newtheorem{Remark}[Lemma]{Remark}
\begin{document}
\title{Results in Linear Mathematics (P1)}
\author{T.W.K\"{o}rner}
\maketitle
\begin{footnotesize}
\noindent
{\bf Small print} The syllabus for the course is defined by
the Faculty Board Schedules (which are minimal for lecturing
and maximal for examining). I have starred certain results
which seem to me to go beyond a strict interpretation of
the syllabus. However whilst it would not, in my opinion, be
fair to set such results as bookwork they could well appear
as problems. I should {\bf very much} appreciate being told
of any corrections or possible improvements. This document
is written in \LaTeX and stored in the file labeled
\verb+~twk/1B/V1.tex+ on emu in (I hope) read permitted form.
My e-mail address is \verb+twk+.
\end{footnotesize}
\section{Vector Spaces}
\begin{Convention} We shall write ${\mathbb F}$ to mean ${\mathbb R}$
or ${\mathbb C}$.
\end{Convention}
\begin{Definition}
\label{Ax}
We call $(V,+,.,{\mathbb F})$ a vector space over ${\mathbb F}$
if, whenever $u,v,w\in V$ and $\lambda,\mu\in{\mathbb F}$, then
$u+v\in V$, $\lambda u\in V$ and
(i) $(V,+)$ is an Abelian group (so in particular
$u+(v+w)=(u+v)+w$, $u+v=v+u$).
(ii) $\lambda(\mu u)=(\lambda\mu)u$.
(iii) $(\lambda+\mu)u=\lambda u+\mu u$.
(iv) $\lambda(u+v)=\lambda u+\lambda v$.
(v) $1u=u$.
\end{Definition}
\begin{Lemma} (i) The zero $\underline{0}$ of $(V,+)$ satisfies
$0u=\underline{0}$ for all $u\in V$.
(ii) The additive inverse $-u$ of $u\in V$ satisfies
$-u=(-1)u$.
\end{Lemma}
We call $\underline{0}$ the zero vector and write it as $0$.
(Our general policy of dropping `boldface' ${\bf u}$ and
`underline' $\underline{u}$ in favour of the simple $u$ will
not usually lead to ambiguity but if it does we simply revert
to the less simple convention.)
\begin{Theorem}
\label{Funspace1}
If $X$ is any set then the set ${\mathbb F}^{X}$
of functions $f:X\rightarrow{\mathbb F}$ is a vector space if we
define `vector addition' and `multiplication by a scalar' by
\[(f+g)(x)=f(x)+g(x),\ and\ (\lambda f)(x)=\lambda f(x)\]
for all $x\in X$ where $f,g\in{\mathbb F}^{X}$ and $\lambda\in{\mathbb F}$.
\end{Theorem}
\begin{Definition} If $V$ is a vector space
we say that $U\subseteq V$ is a subspace of $V$ if $0\in U$
and
\[(\lambda,\mu\in{\mathbb F},\ u,v\in U)\Rightarrow
\lambda u+\mu v\in U.\]
\end{Definition}
\begin{Lemma}
\label{Subspace}
If $U$ is a subspace of a vector space $V$
then $U$ is itself a vector space.
\end{Lemma}
It is usually easier to use Theorem \ref{Funspace1} (or its
generalisation Theorem \ref{Funspace2} below) together with
Lemma \ref{Subspace} to prove that something is a vector space
than to verify the axioms in Definition \ref{Ax}.
\begin{Example}
\label{Example1}
The space $C([0,1])$ of continuous functions
$f:[0,1]\rightarrow {\mathbb F}$, the space ${\cal P}$ of real
polynomials $P:{\mathbb R}\rightarrow {\mathbb R}$, the classical
spaces ${\mathbb F}^{n}$
and the set $J$ of $n\times n$ real matrices all of whose rows
and columns add up to the same number can all be made into
vector spaces in a natural way.
\end{Example}
\begin{Definition} (i) Vectors $e_{1},e_{2},\ldots,e_{n}$
span a vector space $E$ if given any $e\in E$ we can
find $\lambda_{1},\lambda_{2},\ldots,\lambda_{n}\in {\mathbb F}$
such that
\[e=\lambda_{1}e_{1}+\lambda_{2}e_{2}+\ldots+\lambda_{n}e_{n}.\]
(ii) Vectors $e_{1},e_{2},\ldots,e_{n}$ in a vector space $E$
are linearly independent if the only solution of
\[0=\lambda_{1}e_{1}+\lambda_{2}e_{2}+\ldots+\lambda_{n}e_{n}\]
(with $\lambda_{1},\lambda_{2},\ldots,\lambda_{n}\in {\mathbb F}$)
is $\lambda_{1}=\lambda_{2}=\ldots=\lambda_{n}=0$.
(iii) Vectors $e_{1},e_{2},\ldots,e_{n}$ form a (finite) basis of a
vector space $E$ if they span $E$ and are linearly independent.
\end{Definition}
We shall not be interested in `infinite bases' (few people are, in an
algebraic context)
so we shall write `basis' rather than `(finite) basis' from now on.
\begin{Lemma} Vectors $e_{1},e_{2},\ldots,e_{n}$ form a basis of a
vector space $E$ if and only if the equation
\[e=\lambda_{1}e_{1}+\lambda_{2}e_{2}+\ldots+\lambda_{n}e_{n}\]
(with $\lambda_{1},\lambda_{2},\ldots,\lambda_{n}\in {\mathbb F}$)
has one and only one solution for each $e\in E$.
\end{Lemma}
\begin{Definition} A vector space $E$ is said to be finite dimensional
if it has a finite spanning set.
\end{Definition}
\begin{Lemma} (i) If vectors $e_{1},e_{2},\ldots,e_{n}$ span
a vector space $E$ then some sub-collection forms a basis.
(ii) Every finite dimensional vector space has a basis.
\end{Lemma}
The key result in the study of finite dimensional vector spaces is
the Steinitz Replacement Lemma.
\begin{Theorem} Let $E$ be a vector space. If
(A) $e_{1},e_{2},\ldots,e_{n}$ span $E$, and
(B) $f_{1},f_{2},\ldots,f_{m}$ are linearly independent in $E$,
\noindent
then $n\geq m$ and (possibly after reordering the $e_{j}$)
$f_{1},f_{2},\ldots,f_{m},e_{m+1},e_{m+2},\ldots,e_{n}$ span $E$.
\end{Theorem}
\begin{Corollary} Every finite dimensional space $E$ has an associated
dimension $N$ such that
(i) Every basis of $E$ has $N$ elements.
(ii) Every linearly independent collection of vectors in $E$ has
at most $N$ elements.
(iii) Every spanning collection of vectors in $E$ has
at least $N$ elements.
\end{Corollary}
\begin{Corollary}(i) Any subspace of a finite dimensional space
is finite dimensional (and the dimension of the subspace is no
greater than the dimension of the space).
(ii) Any linearly independent collection of vectors in a
finite dimensional space can be extended to a basis.
\end{Corollary}
\begin{Example}
In Example \ref{Example1}
the space $C([0,1])$ of continuous functions
is infinite dimensional and
the space $J$ of $n\times n$ magic squares is finite dimensional.
\end{Example}
\begin{Definition} If $V$ and $W$ are subspaces of a vector space
$U$ we write
\[V+W=\{v+w:\ v\in V,\ w\in W\}\].
\end{Definition}
\begin{Lemma} If $V$ and $W$ are subspaces of a vector space
$U$ then $V+W$ and $V\cap W$ are subspaces of $U$. Further,
if $V+W$ is finite dimensional,
\[{\rm dim}(V+W)+{\rm dim}(V\cap W)={\rm dim}(V)+{\rm dim}(W).\]
\end{Lemma}
\begin{Definition} Subspaces $E_{1},,E_{2},\ldots,E_{m}$ of a
vector space $E$ are said to have $E$ as direct sum
if and only if the equation
\[e=e_{1}+e_{2}+\ldots+e_{m}\]
(with $e_{j}\in E_{j}$ for $1\leq j\leq m$)
has one and only one solution for each $e\in E$. We then write
\[E=E_{1}\oplus E_{2}\oplus\ldots\oplus E_{m}.\]
\end{Definition}
\begin{Lemma} Subspaces $E_{1},,E_{2},\ldots,E_{m}$ of a finite dimensional
vector space $E$ have $E$ as direct sum if and only if
the combination of bases of $E_{1},,E_{2},\ldots,E_{m}$ gives
a basis of $E$.
\end{Lemma}
\begin{Lemma}
\label{2sum}
If $V$ and $W$ are subspaces of a vector space $U$ then
$V\oplus W=U$ if and only if $V+W=U$ and $V\cap W=\{0\}$.
\end{Lemma}
The reader is warned that Lemma \ref{2sum} does not generalise in the
obvious way to direct sums of more than two subspaces.
\begin{Example}\label{Notunique}
In ${\mathbb R}^{2}$ if we set
\begin{eqnarray*}
U&=&\{(x,0):\ x\in{\mathbb R}\}\\
V&=&\{(0,y):\ y\in{\mathbb R}\}\\
W&=&\{(t,t):\ t\in{\mathbb R}\}
\end{eqnarray*}
then $U\cap V=V\cap W=W\cap U=\{0\}$ and $U+V+W={\mathbb R}^{2}$ but
${\mathbb R}^{2}$ is not the direct sum of $U$, $V$ and $W$.
\end{Example}
\begin{Definition} If $V$ and $W$ are subspaces of a vector space
$U$ and $V\oplus W=U$ then $W$ is called a complementary subspace of
$V$ in $U$.
\end{Definition}
The reader is warned that this definition is a strong competitor
for the title of `Definition most frequently mangled by undergraduates'.
She is also warned that except in the trivial cases $V=U$ and
$V=\{0\}$ the complementary subspace of $V$ in $U$ IS NOT UNIQUE! In
Example \ref{Notunique} both $U$ and $V$ are complementary subspaces of
$W$ in ${\mathbb R}^{2}$.
\begin{Example} Consider the vector space $C({\mathbb R})$ of
continuous functions $f:{\mathbb R}\rightarrow{\mathbb R}$. Let
\begin{eqnarray*}
E&=&\{f\in C({\mathbb R}):\ f(x)=f(-x)\ \mbox{for all $x\in{\mathbb R}$}\}\\
F&=&\{f\in C({\mathbb R}):\ f(x)=-f(-x)\ \mbox{for all $x\in{\mathbb R}$}\}\\
G&=&\{f\in C({\mathbb R}):\ f(x)=0\ \mbox{for all $x\leq 0$}\}.
\end{eqnarray*}
Then both $F$ and $G$ are complements of
$E$ in $C({\mathbb R})$.
\end{Example}
The course now contains some remarks on quotient
groups. These are starred,
but only to prevent the examiners going
overboard, the actual ideas are very easy.
The development is parallel to, but easier than the
development of quotient groups in course C1.
Suppose that $U$ is a subspace of a vector space $V$.
We observe that $U$ is a subgroup of $(V,+)$ the
the vector space $V$ considered
as an Abelian group under addition. We take over from
group theory the idea of a coset
\[v+U=\{v+u:u\in U\}\]
and observe that the first part of the proof of
Lagrange's theorem shows that the cosets form a
disjoint cover of $V$.
\begin{Lemma} Let $U$ be a subspace of a vector space $V$. Then
(i) $\bigcup_{v\in V}(v+U)=V$.
(ii) If $v,w\in V$ then either $(v+U)\cap(w+U)=\emptyset$
or $v+V=w+I$.
\end{Lemma}
The remarkable thing is that we can define addition
and scalar multiplication of these cosets in a natural way.
(Of course, we can deal with addition by noting that
any subgroup of an Abelian group is normal and
quoting course C1 but it is just as easy to do things
directly.)
\begin{Lemma}
If $U$ is a subspace of a vector space $V$ over ${\mathbb F}$
and
\[v_{1}+U=v_{2}+U,\ w_{1}+U=w_{2}+U, \lambda\in{\mathbb F}\]
then
\[(v_{1}+w_{1})+U=(v_{2}+w_{2})+U,
\ \lambda v_{1}+U=\lambda v_{2}+U.\]
\end{Lemma}
\begin{Definition}\label{quotient operations}
If $U$ is a subspace of a vector space $V$ over ${\mathbb F}$
we write $V/U$ for the set of cosets of $U$ and define
addition and scalar multiplication on $V/U$ by
\[(v+U)+(w+U)=(u+w)+U,\ \lambda(v+U)=\lambda v+U.\]
\end{Definition}
Note that $0(v+U)=0v+U=U$.
\begin{Lemma}
If $U$ is a subspace of a vector space $V$ over ${\mathbb F}$
then
$V/U$ with addition and scalar multiplication as in the
previous definition is
a vector space over ${\mathbb F}$.
\end{Lemma}
We call $V/U$ a quotient space (or a quotient vector space).
The reader will natural expect us to produce an isomorphism
theorem. We shall do so in Theorem~\ref{isomorphism}
but first we need to discuss linear maps.
\section{Linear Maps}
\begin{Definition} If $U$ and $V$ are vector spaces over ${\mathbb F}$
then the map $\alpha:U\rightarrow V$is said to be linear if
\[\alpha(\lambda_{1}u_{1}+\lambda_{2}u_{2})=
\lambda_{1}\alpha(u_{1})+\lambda_{2}\alpha(u_{2})\]
for all $u_{j}\in U_{j}$, $\lambda_{j}\in {\mathbb F}$.
\end{Definition}
If abstract algebraists were the only people to use vector spaces
then `linear maps' would be called `vector space homomorphisms'.
The following definitions may have been mentioned in previous
courses.
\begin{Definition} Let $f:X\rightarrow Y$.
(i) We say that $f$ is injective if $f(x_{1})=f(x_{2})$ implies
$x_{1}=x_{2}$.
(ii) We say that $f$ is surjective if, given $y\in Y$, we can find
an $x\in X$ such that $f(x)=y$.
(iii) We say that $f$ is bijective if it is both injective and surjective.
\end{Definition}
If $f:X\rightarrow Y$ is bijective then there is a unique function
$f^{-1}:Y\rightarrow X$ (the inverse of $f$) such that
$f^{-1}(f(x))=x$ for all $x\in X$ and
$f(f^{-1}(y))=y$ for all $y\in Y$.
\begin{Definition} If $U$ and $V$ are vector spaces over ${\mathbb F}$
the linear map $\alpha:U\rightarrow V$is said to be an isomorphism
if it is a bijection.
\end{Definition}
\begin{Lemma} If $\alpha:U\rightarrow V$ is an isomorphism then
$\alpha^{-1}:V\rightarrow U$ is also linear (and so also an
isomorphism).
\end{Lemma}
\begin{Lemma}\label{Prelrank}
If $U$ and $V$ are vector spaces over ${\mathbb F}$
and $\alpha:U\rightarrow V$ is linear then
(i) $\alpha(U)=\{\alpha(u):\ u\in U\}$ is a subspace of $V$.
(ii) $\alpha^{-1}(0)=\{u\in U: \alpha(u)=0\}$ is a subspace of $U$.
(iii) $\alpha$ is injective if and only if $\alpha^{-1}(0)=\{0\}$.
\end{Lemma}
We call $\alpha^{-1}(0)$ the null space of $\alpha$ and
$\alpha(U)$ the range space of $\alpha$. (Note that, unless
$\alpha$ is bijective, there is no inverse function $\alpha^{-1}$.)
\begin{Theorem}\label{Class}
(The Classification Theorem For Finite Dimensional
Vector Spaces) Every vector space over ${\mathbb F}$ of dimension $N$
is isomorphic to ${\mathbb F}^{N}$.
\end{Theorem}
We now give the promised generalisation of Theorem \ref{Funspace1}
\begin{Theorem}
\label{Funspace2}
If $X$ is any set and $U$ any vector space over ${\mathbb F}$
then the set $U^{X}$
of functions $f:X\rightarrow U$ is a vector space over ${\mathbb F}$
if we
define `vector addition' and `multiplication by a scalar' by
\[(f+g)(x)=f(x)+g(x),\ and\ (\lambda f)(x)=\lambda f(x)\]
for all $x\in X$ where $f,g\in X^{U}$ and $\lambda\in{\mathbb F}$.
\end{Theorem}
\begin{Theorem}\label{L(U,V)}
If $U$ and $V$ are vector spaces over ${\mathbb F}$
then the set $L(U,V)$ of linear maps forms a vector space
if we
define `vector addition' and `multiplication by a scalar' by
\[(\alpha+\beta)(u)=\alpha(u)+\beta(u),
\ and\ (\lambda\alpha)(u)=\lambda\alpha(u)\]
for all $u\in U$ where $\alpha,\beta\in L(U,V)$ and $\lambda\in{\mathbb F}$.
\end{Theorem}
The operation of composition interacts with the operation
of addition just defined in a suggestive way.
\begin{Theorem}\label{Mult}
If $U$, $V$ and $W$ are vector spaces over ${\mathbb F}$
and $\alpha,\beta\in L(U,V)$, $\gamma,\delta\in L(V,W)$,
$\epsilon\in L(W,X)$ then
\[\epsilon(\gamma\alpha)=(\epsilon\gamma)\alpha,\ \
\gamma(\alpha+\beta)=\gamma\alpha+\gamma\beta,\ \
(\gamma+\delta)\alpha=\gamma\alpha+\delta\alpha.\]
\end{Theorem}
Theorems \ref{Funspace2} and \ref{Mult} apply in particular to
$L(U,U)$.
\begin{Theorem}\label{Ring} If $U$ is a vector space over ${\mathbb F}$
then $L(U,U)$ is a vector space over ${\mathbb F}$ which obeys
the additional laws
\[\alpha(\beta\gamma)=(\alpha\beta)\gamma,\ \
\alpha(\beta+\gamma)=\alpha\beta+\alpha\gamma,\ \
(\alpha+\beta)\gamma=\alpha\gamma+\beta\gamma,\ \
\iota\alpha=\alpha\iota, \]
where $\iota$ is the identity map.
\end{Theorem}
(A vector space which obeys these laws is called an `algebra'
but the definition is not part of this course.)
\begin{Theorem}\label{Group} If $U$ is a vector space over ${\mathbb F}$
then the set of bijective (`invertible') maps in $L(U,U)$
form a group $GL(U)$ under composition with unit the
identity \mbox{map $\iota$}.
\end{Theorem}
$GL(U)$ is called `the general linear group' and its elements
are called automorphisms.
Theorem \ref{Ring} forms a link with an older but not
unsuccessful tradition of formal symbolic manipulation.
\begin{Example} (i) Consider the vector space
$C^{\infty}_{\mathbb C}({\mathbb R})$
of infinitely
differentiable functions $f:{\mathbb R}\rightarrow{\mathbb C}$. The map
$D:C^{\infty}_{\mathbb C}({\mathbb R})\rightarrow C^{\infty}_{\mathbb C}({\mathbb R})$
is a well defined linear map.
If $a_{0},a_{1},\ldots,a_{n}\in{\mathbb R}$ then,
taking $D^{0}=I$ the identity function
\[\left(\sum_{r=0}^{n}a_{r}D^{r}\right)f=\sum_{r=0}^{n}a_{r}f^{(r)}.\]
(ii)$^{*}$ If $a\in{\mathbb C}$ then $D-aI$ is surjective with a
one dimensional nul space. The map
$T_{a}:C^{\infty}_{\mathbb C}({\mathbb R})\rightarrow C^{\infty}_{\mathbb C}({\mathbb R})$
defined by
\[(T_{a}f)(t)=e^{at}\int_{0}^{t}f(x)e^{-ax}dx\]
is linear and injective but not surjective. We have
\[(D-aI)T_{a}=I.\]
The general solution $f$ of
\[(D-aI)f=g\]
(with $f,g\in C^{\infty}_{\mathbb C}{\mathbb R}$) is $f=T_{a}g+h$ with
$h\in (D-aI)^{-1}(0)$.
(iii)$^{*}$ If $a_{j}\in{\mathbb C}$ for $1\leq j\leq n$
then $(D-a_{1}I)(D-a_{2}I)\ldots(D-a_{n}I)$ is surjective with an
$n$ dimensional nul space $H$. The equation
\[(D-a_{1}I)(D-a_{2}I)\ldots(D-a_{n}I)f=g\]
(with $f,g\in C^{\infty}_{\mathbb C}({\mathbb R})$) always has a solution
$f_{0}$ (the `particular integral') and its general solution
is $f=f_{0}+h$ where $h$ (the `complementary function') lies
in the $n$ dimensional nul space $H$.
\end{Example}
Although general theorems about linear maps are often best
viewed geometrically or abstractly, particular computations
require matrices.
\begin{Theorem}\label{Matrep}
Suppose that $U$ is a finite dimensional vector space
over ${\mathbb F}$
with basis $u_{1},u_{2},\ldots,u_{m}$ and $V$ is a finite
dimensional vector space over ${\mathbb F}$ with
basis $v_{1},v_{2},\ldots,v_{n}$.
If $\alpha:U\rightarrow V$ is linear then $\alpha$ has an
associated $n\times m$ matrix $A=(a_{ij})$ with entries in ${\mathbb F}$
given by
\[\alpha(u_{j})=\sum_{i=1}^{n}a_{ij}v_{i}.\ \ \ (*)\]
Automatically
\[\alpha(\sum_{j=1}^{m}x_{j}u_{j})=
\sum_{i=1}^{n}\left(\sum_{j=1}^{m}a_{ij}x_{j}\right)v_{i}.\ \ \ (**)\]
Conversely if $A$ is an
$n\times m$ matrix with entries in ${\mathbb F}$ then the formula
$(**)$ defines a linear map $\alpha:U\rightarrow V$ which
has $A$ as associated matrix.
\end{Theorem}
We say that $A$ is the matrix of $\alpha$ with respect to the
bases $u_{1},u_{2},\ldots,u_{n}$ of $U$ and
$v_{1},v_{2},\ldots,v_{n}$ of V. Equation $(*)$ is conventional
(in the same way that making clocks go clockwise is
conventional) but it represents a universal convention
which you should follow.
\begin{Theorem} Suppose that $U$ is a finite dimensional vector space
over ${\mathbb F}$
with basis $u_{1},u_{2},\ldots,u_{m}$, $V$ is a finite
dimensional vector space over ${\mathbb F}$ with
basis $v_{1},v_{2},\ldots,v_{n}$ and $W$ is a finite
dimensional vector space over ${\mathbb F}$ with
basis $w_{1},w_{2},\ldots,w_{p}$. If
$\alpha,\beta\in L(U,V)$ have matrices $A$ and $B$ with respect to the
bases $u_{1},u_{2},\ldots,u_{m}$ of $U$,
$v_{1},v_{2},\ldots,v_{n}$ of V and $\gamma\in L(V,W)$
has matrix $C$ with respect to the bases
$v_{1},v_{2},\ldots,v_{n}$ of V and $w_{1},w_{2},\ldots,w_{p}$ of $W$
and $\lambda\in{\mathbb F}$
then $\alpha+\beta$ has matrix $A+B$
and $\lambda\alpha$ has matrix $\lambda A$ with respect to the
bases $u_{1},u_{2},\ldots,u_{m}$ of $U$ and
$v_{1},v_{2},\ldots,v_{n}$ of V and $\gamma\beta$ has matrix
$CB$ with respect to the
bases $u_{1},u_{2},\ldots,u_{n}$ of $U$
and $w_{1},w_{2},\ldots,w_{p}$of $W$.
\end{Theorem}
Here $A+B$ is the matrix $E$ given by
\[e_{ij}=a_{ij}+b_{ij}\ \ \mbox{for $1\leq i\leq n$, $1\leq j\leq m$},\]
$\lambda A$ is the matrix $F$ given by
\[f_{ij}=\lambda a_{ij}\ \ \mbox{for $1\leq i\leq n$, $1\leq j\leq m$},\]
and $CB$ is the matrix $G$ given by
\[g_{rj}=\sum_{i=1}^{n}c_{ri}b_{ij}
\ \ \mbox{for $1\leq r\leq p$, $1\leq j\leq m$}\].
Notice that if, in Theorem \ref{Matrep}, we write
\[\alpha\left(\sum_{j=1}^{m}x_{j}u_{j}\right)=
\sum_{i=1}^{n}y_{i}v_{i}\]
and write ${\bf x}$ for the column vector (i.e. $m\times 1$
matrix) $(x_{j})$, ${\bf y}$ for the column vector $(y_{i})$
then equation $(**)$ becomes
\[{\bf y}=A{\bf x}.\]
The exact correspondence between linear maps of finite dimensional
vector spaces and matrices means that we can rewrite theorems
about maps as theorems about matrices. For example, restricting
Theorem \ref{Ring} to finite dimensional spaces, we obtain
the following matricial translation.
\begin{Theorem}\label{Ringmatrix} If we use the standard matrix
addition, multiplication and multiplication by a scalar
the set $M_{n}({\mathbb F})$ of $n\times n$ matrices with entries
in ${\mathbb F}$ is a vector space satisfying the further rules
\[A(B+C)=AB+AC,\ \
A(B+C)=AB+AC,\ \
(A+B)C=AC+BC,\ \ IA=AI=A, \]
where $I$ is the identity matrix with $(i,j)$th entry $\delta_{ij}$.
\end{Theorem}
Conversely we can use results about matrices to obtain
results on linear maps between finite dimensional spaces.
\begin{Theorem} (i) The vector space $M_{nm}({\mathbb F})$
of $n\times m$ matrices over ${\mathbb F}$ has dimension $nm$.
(ii) If $U$ and $V$ are vector spaces of dimension $n$ and $m$
then $L(U,V)$ has dimension $nm$.
\end{Theorem}
Since Theorem \ref{Class} tells us that all finite dimensional
vector spaces are isomorphic to ${\mathbb F}^{n}$ for some $n$
and since linear maps between such spaces can always be represented by
matrices, it is clearly possible
to do all questions involving finite dimensional vector spaces
by taking bases and using co-ordinates. There are however
several reasons for using co-ordinate free methods when possible.
\begin{itemize}
\item As Maxwell pointed out, co-ordinate free methods often give
a better formulation of the underlying physical or geometric
problem.
\item For analysts and most physicists the study of finite dimensional
is merely a prelude to the study of infinite dimensional spaces
where co-ordinate methods are often not available.
\item One of the reasons for doing the course P1 is to learn
more abstract modes of thought. Sticking to concrete co-ordinate
systems is hardly the way to go about it.
\end{itemize}
Returning to the ideas associated with Lemma \ref{Prelrank}
we make the following definitions.
\begin{Definition}
If $U$ and $V$ are finite dimensional vector spaces over ${\mathbb F}$
and $\alpha:U\rightarrow V$ is linear then
(i) The rank $r(\alpha)$ of $\alpha$ is the dimension of
the range space $\alpha(U)$.
(ii) The nullity $n(\alpha)$ of $\alpha$ is the dimension of
the nul space $\alpha^{-1}(0)$.
\end{Definition}
\begin{Theorem}\label{Rank-Nullity}
If $U$ and $V$ are finite dimensional vector spaces over ${\mathbb F}$
and $\alpha:U\rightarrow V$ is linear then
\[r(\alpha)+n(\alpha)=\dim U\ \ \ (*).\]
Further we can find bases for $U$ and $V$ such that the matrix
$A$ associated with $\alpha$ is given by
$a_{ij}=1$ if $1\leq i=j\leq r(\alpha)$, $b_{ij}=0$ otherwise.
\end{Theorem}
Formula $(*)$ is the famous `rank-nullity formula'.
\begin{Theorem}
If $U$ is a vector space over ${\mathbb F}$
and $\alpha:U\rightarrow V$ is linear then
\[U\supseteq\alpha(U)\supseteq\alpha^{2}(U)\supseteq\ldots
\supseteq\alpha^{k}(U)\supseteq\alpha^{k+1}(U)\supseteq\ldots\ .\]
If $\alpha^{l}(U)=\alpha^{l+1}(U)$ then $\alpha^{k}(U)=\alpha^{l}(U)$
for all $k\geq l$.
If $U$ is finite dimensional then
\[\dim(U)\geq r(\alpha)\geq r(\alpha^{2})\geq\ldots\ ,\]
and there exists an $l$ with $l\leq \dim(U)$ such that
$r(\alpha^{k})>r(\alpha^{k+1})$ for $kj$).
\end{Theorem}
Our method of proof will be induction on the dimension of $V$
starting from the observation that an endomorphism of a complex
vector space always has an eigenvector. This method of proof will
recur in the course P4. We note a profound difference between
the real and complex cases.
\begin{Example}\label{Nottriang} If we work over ${\mathbb R}$
the the matrix
\[A=\left(\begin{array}{cc}1&1\\-1&1\end{array}\right)\]
has no eigenvectors and so, in particular we can not
find an invertible $P$ such that $P^{-1}AP$ is triangular.
\end{Example}
We now turn to the study of the characteristic polynomial
for its own sake. By direct calculation, or by using
Lemma \ref{Samechar} we know that $P_{Q^{-1}AQ}=P_{A}$
whenever $Q$ is invertible. Thus if we write
\[P_{A}(t)=\sum_{r=0}^{n}c_{r}(A)t^{r}\]
we know that the coefficients $c_{r}(A)$ are `matrix conjugacy
class invariants'
in the sense that $c_{r}(Q^{-1}AQ)=c_{r}(A)$. We can readily
identify three of these invariants: $c_{n}(A)=1$ (which is not
very interesting), $c_{0}(A)=(-1)^{n}\det A$ (which we already
knew to be invariant) and $c_{n-1}(A)=-\sum_{i=1}^{n}a_{ii}$.
\begin{Definition} If $A$ is an $n\times n$ matrix over ${\mathbb F}$
then we define the trace $\mbox{\rm tr}A$ of $A$ by
\[\mbox{\rm tr}A=\sum_{i=1}^{n}a_{ii}.\]
The trace of an endomorphism on a finite dimensional space is
defined to be the trace of any associated matrix.
\end{Definition}
In the sense made precise by the next lemma, trace is the only
linear matrix conjugacy class invariant.
\begin{Lemma}$^{*}$ Consider the collection $M_{n}({\mathbb F})$
of $n\times n$ matrices over ${\mathbb F}$.
(i) $\mbox{\rm tr}:M_{n}({\mathbb F})\rightarrow{\mathbb F}$ is a linear map.
(ii) If $T:M_{n}({\mathbb F})\rightarrow{\mathbb F}$ is a linear map
such that $T(P^{-1}AP)=T(A)$ for all $A\in M_{n}({\mathbb F})$
and all invertible $P\in M_{n}({\mathbb F})$ then
$T=(T(I)n^{-1})\mbox{\rm tr}$.
\end{Lemma}
If $f:{\mathbb R}^{n}\rightarrow{\mathbb R}$ is smooth the Laplacian of
$f$ is the trace of its Hessian.
It should be noted that the characteristic polynomial,
informative as it is, does not tell us everything about
the associated matrix or endomorphism.
\begin{Example} The matrices
\[\left(\begin{array}{cc}0&0\\0&0\end{array}\right), \ \
\left(\begin{array}{cc}0&1\\0&0\end{array}\right)\]
have the same characteristic polynomial, yet have different
ranks. Further one is diagonalisable and the other is not.
\end{Example}
If we write out the characteristic polynomial of an
endomorphism of a finite dimensional vector space $V$ as
\[P_{\alpha}(t)=\sum_{r=0}^{n}c_{r}(\alpha)t^{r}\]
we see that we may define $P_{\alpha}(\beta)$ for any
endomorphism $\beta$ of $V$ by
\[P_{\alpha}(\beta)=\sum_{r=0}^{n}c_{r}(\alpha)\beta^{r}.\]
Observe that $P_{\alpha}(\beta)$ is itself an endomorphism.
If $P_{\alpha}(\beta)$ is the zero endomorphism we say
that $\beta$ satisfies the characteristic equation of $\alpha$.
In exactly the same way if $A$ and $B$ are $n\times n$ matrices
over ${\mathbb F}$ we can define an $n\times n$ matrix $P_{A}(B)$.
If $P_{A}(B)$ is the zero matrix we say that $B$ satisfies the
characteristic equation of $A$.
\begin{Example}\label{Dontsubst} Consider the two $2\times 2$ matrices
\[A=I=\left(\begin{array}{cc}1&0\\0&1\end{array}\right),\ \
B=\left(\begin{array}{cc}1&0\\0&0\end{array}\right).\]
The characteristic polynomial of $A$ is $P_{A}(t)=t^{2}-2t+1=(t-1)^{2}$
so $P_{A}(B)$ is the non-zero $2\times 2$ matrix
\[\left(\begin{array}{cc}0&0\\0&1\end{array}\right)\]
so that $B$ does not satisfy the characteristic equation of $A$
although $\det(BI-A)$ is the scalar $0$.
\end{Example}
\begin{Lemma}
(The Cayley-Hamilton Theorem for Triangular Matrices)
If $A$ is an $n\times n$ triangular matrix
over ${\mathbb F}$ with diagonal
entries $a_{ii}$ then
\[\det(tI-A)=(t-a_{11})(t-a_{22})\ldots(t-a_{nn})\]
and
\[(A-a_{11}I)(A-a_{22})\ldots(A-a_{nn}I)=0.\]
\end{Lemma}
At first sight the result just proved seems very special
but in Theorem \ref{Triang} we showed that any endomorphism
of a finite dimensional vector space over ${\mathbb C}$ can
be represented (with respect to an appropriate basis)
by a triangular matrix. We can thus extend the Cayley-Hamilton
Theorem to a wide range of cases.
\begin{Corollary}
(i) If $V$ is a finite dimensional vector space over ${\mathbb C}$
then any linear map \mbox{$\alpha:V\rightarrow V$}
satisfies its own characteristic equation.
(ii) Any $n\times n$ matrix over
${\mathbb C}$ satisfies its own characteristic equation.
(iii) Any $n\times n$ matrix over
${\mathbb R}$ satisfies its own characteristic equation.
(iv) If $V$ is a finite dimensional vector space over ${\mathbb R}$
then any linear map \mbox{$\alpha:V\rightarrow V$}
satisfies its own characteristic equation.
\end{Corollary}
We bring together the results of the corollary as a theorem.
\begin{Theorem} (The Cayley-Hamilton Theorem)
(i) Any $n\times n$ matrix over
${\mathbb F}$ satisfies its own characteristic equation.
(ii) If $V$ is a finite dimensional vector space over ${\mathbb F}$
then any linear map $\alpha:V\rightarrow V$
satisfies its own characteristic equation.
\end{Theorem}
There are many different proofs of the Cayley-Hamilton Theorem
but (so far as I know) no totally trivial ones. If you invent
a new proof, first check it against Example \ref{Dontsubst}
and then get your supervisor to check it.
\section{Jordan Forms} It is unsatisfactory to leave non-diagonalisable
endomorphisms (and matrices) over ${\mathbb F}$ unexamined.
In this section we show that they can be fully classified using
the Jordan normal form. Although some of the material
is more or less explicitly starred some is not
and the development
is pretty and instructive.
Our first steps are unstarred.
Suppose $V$ is a finite dimensional vector space over
${\mathbb C}$ and $\alpha$ is an endomorphism of $V$.
The Cayley Hamilton theorem tells us that there is a monic polynomial.
$P_{\alpha}$ with $P_{\alpha}(\alpha)=0$. It follows that there
must be a monic polynomial of least degree which anhilates $\alpha$.
\begin{Definition} If $V$ is a finite dimensional vector space over
${\mathbb C}$ and $\alpha$ is an endomorphism of $V$
the monic polynomial $Q$ of least degree such that
$Q(\alpha)=0$ is called the minimal polynomial of $\alpha$.
\end{Definition}
\begin{Lemma} Suppose that
$V$ is a finite dimensional vector space over
${\mathbb C}$ and $\alpha$ is an endomorphism of $V$.
The minimal polynomial divides (i.e. is a factor of)
every polynomial $P$ with $P(\alpha)=0$.
In particular the minimal polynomial divides the characteristic
polynomial.
\end{Lemma}
\begin{Example} Suppose that
$V$ is a finite dimensional vector space over
${\mathbb C}$ and $\alpha$ is an endomorphism of $V$
which may be diagonalised with associated diagonal matrix $D$.
If the distinct diagonal entries of $D$ are $d_{1}$, $d_{2}$,
\dots, $d_{k}$ then the minimal polynomial $Q_{\alpha}$
of $\alpha$ is given by
\[Q_{\alpha}(t)=\prod_{j=1}^{k}(t-d_{j}).\]
In particular the characteristic and minimal polynomials
coincide if and only if $k=n$.
\end{Example}
The next results may or may not be starred but are
sufficiently important that everyone should know them.
\begin{Lemma} Let $P(t)=\sum_{j=0}^{n}a_{j}t^{n}$
be a polynomial with coefficients in ${\mathbb F}$
If
$P(t)=0$ for $n+1$ distinct values of $t\in{\mathbb F}$
then $a_{0}=a_{1}=\dots=a_{n}=0$. In particular
$P(\alpha)=0$ and $P(A)=0$ whenever $\alpha$ is an appropriate
endomorphism or $A$ an appropriate matrix.
\end{Lemma}
\begin{Theorem}[Bezout's Theorem for polynomials] If
$R_{1}$, $R_{2}$, \dots $R_{k}$ are polynomials
with highest common factor $1$ (i.e. if no non-constant
polynomial divides all of $R_{1}$, $R_{2}$, \dots $R_{k}$)
then we can find polynomials $P_{1}$, $P_{2}$, \dots, $P_{k}$
such that
\[\sum_{j=1}^{k}P_{j}R_{j}=1.\]
\end{Theorem}
We now apply the last two results to the minimal polynomial.
\begin{Theorem} Suppose that
$V$ is a finite dimensional vector space over
${\mathbb C}$ and $\alpha$ is an endomorphism of $V$.
with minimal polynomial $Q$. If we write
$Q(t)=\prod_{j=1}^{k}Q_{j}$ where $Q_{j}(t)=(t-\lambda_{j})^{n(j)}$
with $n_{j}\geq 1$ and the $\lambda_{j}$ distinct
and set
\[R_{j}(t)=\prod_{i\neq j}Q_{j}\]
we can find polynomials $P_{1}$, $P_{2}$, \dots, $P_{k}$
such that
\[\sum_{j=1}^{k}P_{j}(\alpha)R_{j}(\alpha)=\iota.\]
Further, if we write $V_{j}=R_{j}(\alpha)V$ the following facts are
true.
(i) $V_{j}=Q_{j}(\alpha)^{-1}(0)$.
(ii) $\alpha(V_{j})\subseteq V_{j}.$
(iii) $V=V_{1}\oplus V_{2}\oplus\dots\oplus V_{k}$,
\end{Theorem}
\begin{Lemma} With the notation of the preceeding theorem
suppose ${\cal E}_{j}$ is a basis for $V_{j}$.
If $\alpha|E_{J}$ considered as an endomorphism of
$V_{j}$ (see (ii) in the preceeding theorem)
has matrix $A_{J}$ with respect to ${\cal E}_{j}$, then
the matrix of $\alpha$ with respect the basis
${\cal E}=\bigcup_{j=1}^{k}{\cal E}_{j}$
(see (iii) in the preceeding theorem)
consists of the square matrices $A_{j}$ along the
diagonal with all other entries $0$.
\end{Lemma}
Thus we have reduced the problem
\noindent\emph{Problem$^{*}$} Find a basis for which an endomorphism
$\alpha$ has a nice matrix.
\noindent
to a rather simpler problem
\noindent\emph{Problem$^{**}$} Suppose that the endomorphism $\alpha$
of $V$ has the property that $(\alpha-\lambda\iota)^{m}=0$.
Find a basis for which an endomorphism
$\alpha$ has a nice matrix.
If we observe that whenever $\alpha$ has matrix $A$ with respect
to a certain basis then $\alpha-\lambda\iota$ has matrix
$A-\lambda I$ we can make the further reduction to
\noindent\emph{Problem$^{***}$} Suppose that the endomorphism $\alpha$
on $V$ has the property that $\alpha^{m}=0$.
Find a basis for which an endomorphism
$\alpha$ has a nice matrix.
This suggests the following definition.
\begin{Definition} An endomorphism $\alpha$ of
a vector space is called nilpotent if we can find
an $m\geq 0$ with $\alpha^{m}=0$.
\end{Definition}
To solve the problems
just stated we need the notion of a Jordan matrix.
We write
$J(\lambda,n)$ for the $n\times n$ matrix
with $\lambda$'s down the diagonal, $1$'s immediately
below and zero every where else so that
\[J(\lambda,n)=\begin{pmatrix}
\lambda&0&0&\cdots&0&0&0&0\\
1&\lambda&0&\cdots&0&0&0&0\\
0&1&\lambda&\cdots&0&0&0&0\\
\vdots&\vdots&\vdots&&\vdots&\vdots&\vdots&\vdots\\
0&0&0&\cdots&0&1&\lambda&0\\
0&0&0&\cdots&0&0&1&\lambda\end{pmatrix}\ .\]
We call $J(\lambda,n)$ a Jordan matrix.
We can now solve Problem$^{***}$.
\begin{Theorem}\label{Jordan} Suppose that
$V$ is a finite dimensional vector space over
${\mathbb C}$ and $\alpha$ is a nilpotent endomorphism of $V$.
Then there is basis for $V$ such that $\alpha$ has matrix
$A$ (relative to this basis) which consists of zeros
except for Jordan matrices $J(0,n_{i})$ $[1\leq i\leq s]$.
down the diagonal.
\end{Theorem}
The only proofs that I know of Theorem~\ref{Jordan} are
hard and in my view the only reason it is included is to show
that your lecturers are cleverer than you are. The proof
is starred but not the statement of the theorem.
You should, however, convince yourselves that
the result is plausible.
It is now easy to solve Problem$^{**}$.
\begin{Lemma} Suppose that
$V$ is a finite dimensional vector space over
${\mathbb C}$ and $\alpha$ is an
endomorphism
of $V$ with the property that $(\alpha-\lambda\iota)^{m}=0$.
Then there is basis for $V$ such that $\alpha$ has matrix
$A$ (relative to this basis) which consists of zeros
except for Jordan matrices $J(\lambda,n_{i})$ $[1\leq i\leq s]$.
down the diagonal.
\end{Lemma}
We can now solve our original problem, Problem$^{*}$.
\begin{Theorem}[Jordan Normal Form]\label{normal}
Suppose that
$V$ is a finite dimensional vector space over
${\mathbb C}$ and $\alpha$ is an
endomorphism
of $V$.
Then there is basis for $V$ such that $\alpha$ has matrix
$A$ (relative to this basis) which consists of zeros
except for Jordan matrices $J(\lambda_{i},m_{ij})$ $[1\leq m_{ij}\leq m_{i}, 1\leq i\leq s]$.
We may demand that the $\lambda_{i}$ be distinct and
that $m_{i,1}\geq m{i,2}\geq\dots$.
\end{Theorem}
A little thought shows that the Jordan form is uniques
up to shuffling the diagonal blocks.
The flowing observation is trivial but worth making.
\begin{Lemma} With the notation of Theorem~\ref{Jordan}
$\alpha$ has characteristic polynomial
\[P_{\alpha}(t)=\prod_{i=1}^{s}\prod_{j=1}^{m_{i}}(t-\lambda_{i})^{m_{ij}}\]
and minimal polynomial
\[P_{\alpha}(t)=\prod_{i=1}^{s}(t-\lambda_{i})^{m_{i1}}.\]
The algebraic multiplicity of
$\lambda_{i}$ is $\sum_{j=1}^{m_{i}}m_{ij}$.
and the geometric multiplicity is $\max_{1\leq j\leq m_{i}}m_{ij}$.
\end{Lemma}
In Course~C1 you saw how the Jordan normal form was used
to classify the solution of two simultaneous first order
differential equations in two unknowns. The extension to
$n$ simultaneous first order
differential equations in $n$ unknowns is obvious.
\section{Dual Spaces}
We saw in Theorem \ref{L(U,V)} that the linear maps
from one vector space $U$ to another vector space $V$
form a vector space $L(U,V)$. In the two previous sections
we investigated the
special case when $U=V$. Now we look at the case when $V$ is one
dimensional.
\begin{Definition} If $U$ is a vector space over ${\mathbb F}$
the vector space of linear maps $u':U\rightarrow{\mathbb F}$ is called
the dual space of $U$ and denoted by $U'$.
\end{Definition}
Thus for example the trace map is in the dual space of $M_{n}({\mathbb F})$.
\begin{Example} Let $C^{\infty}({\mathbb R})$ be the space of infinitely
differentiable functions $f:{\mathbb R}\rightarrow{\mathbb R}$. If
we fix $x\in{\mathbb R}$ and set
\[J(f)=\int_{-1}^{1}f(t)dt,\ \ \delta_{x}(f)=f(x),\ \ \delta_{x}'(f)=f'(x)\]
then $J$, $\delta_{x}$ and $\delta_{x}'$ are all in the dual space
of $C^{\infty}({\mathbb R})$.
\end{Example}
As the example indicates analysts are very interested in
objects which belong to dual spaces whilst algebraists are interested
in dual objects in general. However, at this level, we can only
say interesting things about the duals of finite dimensional spaces.
\begin{Lemma}\label{Dimdual} Suppose that $V$ is a vector space
over ${\mathbb F}$
with basis $e_{1},e_{2},\ldots,e_{n}$. Then the dual space $V'$
has the same dimension as $V$ and may be given a basis
(the so called dual basis) $E_{1},E_{2},\ldots,E_{n}$ with
$E_{i}(e_{j})=\delta_{ij}$.
\end{Lemma}
\begin{Lemma} Let $V$ be a finite dimensional vector space. If we write
\[\Phi(v)(v')=v'(v)\]
for all $v\in V$, $v'\in V'$ then $\Phi:V \rightarrow V''$
is an isomorphism.
\end{Lemma}
We already knew from Lemma \ref{Dimdual} that
$\dim V=\dim V'=\dim V''$ but $\Phi$ gives us a `natural isomorphism'
defined without reference to some particularly chosen basis.
The `standard convention' is to write $v=\Phi(v)$, i.e. to
identify $V$ and $V''$ via $\Phi$.
\begin{Definition} If $U$ is a non-empty subset of a vector space
$V$ then we define the annihilator $U^{\circ}$ of $U$ to be
the subset of $U$ given by
\[U^{\circ}=\{v'\in V':\ v'(u)=0\ \mbox{for all}\ u\in U\}.\]
\end{Definition}
\begin{Lemma}
If $U$ is a non-empty subset of a vector space $V$ then $U$ is
a subspace of $V'$. If, further, $V$ is finite dimensional and
$U$ is a subspace of $V$ then
(i) $\dim U+\dim U^{\circ}=\dim V$.
(ii) $\Phi(U)=U^{\circ\circ}$ so, using the standard convention,
$U^{\circ\circ}=U$.
\end{Lemma}
We conclude the section and the course by looking at the notion
of the adjoint of a linear map.
\begin{Lemma} If $U$ and $V$ are vector spaces over ${\mathbb F}$
and $\alpha\in L(U,V)$ then the equation
\[\alpha'(v')(u)=v'(\alpha u)\]
for all $v'\in V$, $u\in U$ defines an $\alpha'\in L(V',U')$.
(We call $\alpha'$ the {\em dual}, or the {\em adjoint}, of $\alpha$).
\end{Lemma}
\begin{Lemma} If $U$ and $V$ are finite dimensional vector
spaces and $\alpha$ has matrix $A$ with respect to given bases
of $U$ and $V$ then $\alpha'$ has matrix $A^{T}$ with respect
to the dual bases.
\end{Lemma}
\begin{Lemma} If $U$, $V$ and $W$ are vector spaces over ${\mathbb F}$
and $\alpha\in L(U,V)$, $\beta\in L(V,W)$ then
$(\beta\alpha)'=\alpha'\beta'$.
\end{Lemma}
If we choose appropriate bases we recover the matricial formula
$(BA)^{T}=A^{T}B^{T}$ for matrices of appropriate sizes.
\begin{Theorem}\label{Nodettrans} If $U$ and $V$ are finite dimensional vector
spaces. If we adopt the standard convention of identifying
$U''$ with $U$ and $V''$ with $V$ then, if $\alpha\in L(U,V)$,
(i) $\alpha''=\alpha$.
(ii) $(\alpha U)^{\circ}=(\alpha')^{-1}(0)$.
(iii) $r(\alpha)=r(\alpha')$
(iv) The row rank and the column rank of any matrix are equal.
\end{Theorem}
We have thus fulfilled our promise to prove the equivalence of
row and column rank in a natural context.
\begin{Lemma} If $\alpha$ is an endomorphism of a finite
dimensional vector space $V$ the $\alpha'$ is an
endomorphism of $V'$ and $\det\alpha'=\det\alpha$.
\end{Lemma}
\end{document}